libnovel/scraper/internal/server/server.go

// Package server exposes the scraper as an HTTP service.
//
// Endpoints:
//
//	POST /scrape          — enqueue a full catalogue scrape
//	POST /scrape/book     — enqueue a single-book scrape (JSON body: {"url":"..."})
//	GET  /health          — liveness probe
package server

import (
	"context"
	"encoding/json"
	"fmt"
	"log/slog"
	"net/http"
	"strconv"
	"sync"
	"time"

	"github.com/libnovel/scraper/internal/orchestrator"
	"github.com/libnovel/scraper/internal/scraper"
	"github.com/libnovel/scraper/internal/writer"
)

// Server wraps an HTTP mux with the scraping endpoints.
type Server struct {
	addr        string
	oCfg        orchestrator.Config
	novel       scraper.NovelScraper
	log         *slog.Logger
	writer      *writer.Writer
	mu          sync.Mutex
	running     bool
	kokoroURL   string // Kokoro-FastAPI base URL, e.g. http://kokoro:8880
	kokoroVoice string // default voice, e.g. af_bella
}

// New creates a new Server.
func New(addr string, oCfg orchestrator.Config, novel scraper.NovelScraper, log *slog.Logger, kokoroURL, kokoroVoice string) *Server {
	return &Server{
		addr:        addr,
		oCfg:        oCfg,
		novel:       novel,
		log:         log,
		writer:      writer.New(oCfg.StaticRoot),
		kokoroURL:   kokoroURL,
		kokoroVoice: kokoroVoice,
	}
}

// ListenAndServe starts the HTTP server and blocks until the provided context
// is cancelled.
func (s *Server) ListenAndServe(ctx context.Context) error {
	mux := http.NewServeMux()
	mux.HandleFunc("GET /health", s.handleHealth)
	mux.HandleFunc("POST /scrape", s.handleScrapeCatalogue)
	mux.HandleFunc("POST /scrape/book", s.handleScrapeBook)
	// UI routes
	mux.HandleFunc("GET /", s.handleHome)
	mux.HandleFunc("GET /ranking", s.handleRanking)
	mux.HandleFunc("POST /ranking/refresh", s.handleRankingRefresh)
	mux.HandleFunc("GET /ranking/view", s.handleRankingView)
	mux.HandleFunc("GET /books/{slug}", s.handleBook)
	mux.HandleFunc("GET /books/{slug}/chapters/{n}", s.handleChapter)
	mux.HandleFunc("POST /ui/scrape/book", s.handleUIScrapeBook)
	mux.HandleFunc("GET /ui/scrape/status", s.handleUIScrapeStatus)
	// Plain-text chapter content for browser-side TTS
	mux.HandleFunc("GET /ui/chapter-text/{slug}/{n}", s.handleChapterText)

	srv := &http.Server{
		Addr:         s.addr,
		Handler:      mux,
		ReadTimeout:  15 * time.Second,
		WriteTimeout: 60 * time.Second,
		IdleTimeout:  60 * time.Second,
	}

	errCh := make(chan error, 1)
	go func() { errCh <- srv.ListenAndServe() }()

	s.log.Info("HTTP server listening", "addr", s.addr)

	select {
	case <-ctx.Done():
		shutCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
		defer cancel()
		return srv.Shutdown(shutCtx)
	case err := <-errCh:
		return err
	}
}

func (s *Server) handleHealth(w http.ResponseWriter, _ *http.Request) {
	w.Header().Set("Content-Type", "application/json")
	_ = json.NewEncoder(w).Encode(map[string]string{"status": "ok"})
}

// handleChapterText returns the plain text of a chapter (markdown stripped)
// for browser-side TTS. The browser POSTs this directly to Kokoro-FastAPI.
func (s *Server) handleChapterText(w http.ResponseWriter, r *http.Request) {
	slug := r.PathValue("slug")
	n, err := strconv.Atoi(r.PathValue("n"))
	if err != nil || n < 1 {
		http.NotFound(w, r)
		return
	}
	raw, err := s.writer.ReadChapter(slug, n)
	if err != nil {
		http.NotFound(w, r)
		return
	}
	w.Header().Set("Content-Type", "text/plain; charset=utf-8")
	w.Header().Set("Cache-Control", "no-store")
	fmt.Fprint(w, stripMarkdown(raw))
}

func (s *Server) handleScrapeCatalogue(w http.ResponseWriter, r *http.Request) {
	cfg := s.oCfg
	cfg.SingleBookURL = "" // full catalogue

	s.runAsync(w, cfg)
}

func (s *Server) handleScrapeBook(w http.ResponseWriter, r *http.Request) {
	var body struct {
		URL string `json:"url"`
	}
	if err := json.NewDecoder(r.Body).Decode(&body); err != nil || body.URL == "" {
		http.Error(w, `{"error":"request body must be JSON with \"url\" field"}`, http.StatusBadRequest)
		return
	}

	cfg := s.oCfg
	cfg.SingleBookURL = body.URL

	s.runAsync(w, cfg)
}

// runAsync launches an orchestrator in the background and returns 202 Accepted.
// Only one scrape job runs at a time; concurrent requests receive 409 Conflict.
func (s *Server) runAsync(w http.ResponseWriter, cfg orchestrator.Config) {
	s.mu.Lock()
	if s.running {
		s.mu.Unlock()
		http.Error(w, `{"error":"a scrape job is already running"}`, http.StatusConflict)
		return
	}
	s.running = true
	s.mu.Unlock()

	w.Header().Set("Content-Type", "application/json")
	w.WriteHeader(http.StatusAccepted)
	_ = json.NewEncoder(w).Encode(map[string]string{"status": "accepted"})

	go func() {
		defer func() {
			s.mu.Lock()
			s.running = false
			s.mu.Unlock()
		}()

		ctx, cancel := context.WithTimeout(context.Background(), 24*time.Hour)
		defer cancel()

		o := orchestrator.New(cfg, s.novel, s.log)
		if err := o.Run(ctx); err != nil {
			s.log.Error("scrape job failed", "err", fmt.Sprintf("%v", err))
		}
	}()
}