- Add Kokoro-FastAPI TTS integration to the chapter reader UI: - Browser-side MSE streaming with paragraph-level click-to-start - Voice selector, speed slider, auto-next with prefetch of the next chapter - New GET /ui/chapter-text endpoint that strips Markdown and serves plain text - Add ranking page (novelfire /ranking scraper, WriteRanking/ReadRankingItems in writer, GET /ranking + POST /ranking/refresh + GET /ranking/view routes) with local-library annotation and one-click scrape buttons - Add StrategyDirect (plain HTTP client) as a new browser strategy; the default strategy is now 'direct' for chapter fetching and 'content' for chapter-list URL retrieval (split via BROWSERLESS_URL_STRATEGY) - Fix chapter numbering bug: numbers are now derived from the URL path (/chapter-N) rather than list position, correcting newest-first ordering - Add 'refresh <slug>' CLI sub-command to re-scrape a book from its saved source_url without knowing the original URL - Extend NovelScraper interface with RankingProvider (ScrapeRanking) - Tune scraper timeouts: wait-for-selector reduced to 5 s, GotoOptions timeout set to 60 s, content/scrape client defaults raised to 90 s - Add cover extraction fix (figure.cover > img rather than bare img.cover) - Add AGENTS.md and .aiignore for AI tooling context - Add integration tests for browser client and novelfire scraper (build tag: integration) and unit tests for chapterNumberFromURL and pagination
69 lines
1.9 KiB
Go
69 lines
1.9 KiB
Go
package browser
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"time"
|
|
)
|
|
|
|
type httpClient struct {
|
|
cfg Config
|
|
http *http.Client
|
|
sem chan struct{}
|
|
}
|
|
|
|
func NewDirectHTTPClient(cfg Config) BrowserClient {
|
|
if cfg.Timeout == 0 {
|
|
cfg.Timeout = 30 * time.Second
|
|
}
|
|
return &httpClient{
|
|
cfg: cfg,
|
|
http: &http.Client{Timeout: cfg.Timeout},
|
|
sem: makeSem(cfg.MaxConcurrent),
|
|
}
|
|
}
|
|
|
|
func (c *httpClient) Strategy() Strategy { return StrategyDirect }
|
|
|
|
func (c *httpClient) GetContent(ctx context.Context, req ContentRequest) (string, error) {
|
|
if err := acquire(ctx, c.sem); err != nil {
|
|
return "", fmt.Errorf("http: semaphore: %w", err)
|
|
}
|
|
defer release(c.sem)
|
|
|
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, req.URL, nil)
|
|
if err != nil {
|
|
return "", fmt.Errorf("http: build request: %w", err)
|
|
}
|
|
httpReq.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
|
|
httpReq.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
|
|
httpReq.Header.Set("Accept-Language", "en-US,en;q=0.5")
|
|
|
|
resp, err := c.http.Do(httpReq)
|
|
if err != nil {
|
|
return "", fmt.Errorf("http: do request: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
b, _ := io.ReadAll(resp.Body)
|
|
return "", fmt.Errorf("http: unexpected status %d: %s", resp.StatusCode, b)
|
|
}
|
|
|
|
raw, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("http: read body: %w", err)
|
|
}
|
|
return string(raw), nil
|
|
}
|
|
|
|
func (c *httpClient) ScrapePage(_ context.Context, _ ScrapeRequest) (ScrapeResponse, error) {
|
|
return ScrapeResponse{}, fmt.Errorf("http client does not support ScrapePage; use browserless")
|
|
}
|
|
|
|
func (c *httpClient) CDPSession(_ context.Context, _ string, _ CDPSessionFunc) error {
|
|
return fmt.Errorf("http client does not support CDP; use browserless")
|
|
}
|