- Add Kokoro-FastAPI TTS integration to the chapter reader UI: - Browser-side MSE streaming with paragraph-level click-to-start - Voice selector, speed slider, auto-next with prefetch of the next chapter - New GET /ui/chapter-text endpoint that strips Markdown and serves plain text - Add ranking page (novelfire /ranking scraper, WriteRanking/ReadRankingItems in writer, GET /ranking + POST /ranking/refresh + GET /ranking/view routes) with local-library annotation and one-click scrape buttons - Add StrategyDirect (plain HTTP client) as a new browser strategy; the default strategy is now 'direct' for chapter fetching and 'content' for chapter-list URL retrieval (split via BROWSERLESS_URL_STRATEGY) - Fix chapter numbering bug: numbers are now derived from the URL path (/chapter-N) rather than list position, correcting newest-first ordering - Add 'refresh <slug>' CLI sub-command to re-scrape a book from its saved source_url without knowing the original URL - Extend NovelScraper interface with RankingProvider (ScrapeRanking) - Tune scraper timeouts: wait-for-selector reduced to 5 s, GotoOptions timeout set to 60 s, content/scrape client defaults raised to 90 s - Add cover extraction fix (figure.cover > img rather than bare img.cover) - Add AGENTS.md and .aiignore for AI tooling context - Add integration tests for browser client and novelfire scraper (build tag: integration) and unit tests for chapterNumberFromURL and pagination
84 lines
3.3 KiB
YAML
84 lines
3.3 KiB
YAML
version: "3.9"
|
|
|
|
services:
|
|
# ─── Browserless ────────────────────────────────────────────────────────────
|
|
browserless:
|
|
image: ghcr.io/browserless/chromium:latest
|
|
container_name: libnovel-browserless
|
|
restart: unless-stopped
|
|
environment:
|
|
# Set a token to lock down the endpoint; the scraper reads it via
|
|
# BROWSERLESS_TOKEN below.
|
|
TOKEN: "${BROWSERLESS_TOKEN:-}"
|
|
# Allow up to 10 concurrent browser sessions.
|
|
CONCURRENT: "${BROWSERLESS_CONCURRENT:-10}"
|
|
# Queue up to 100 requests before returning 429.
|
|
QUEUED: "${BROWSERLESS_QUEUED:-100}"
|
|
# Per-session timeout in ms.
|
|
TIMEOUT: "${BROWSERLESS_TIMEOUT:-60000}"
|
|
# Optional webhook URL for Browserless error alerts.
|
|
ERROR_ALERT_URL: "${ERROR_ALERT_URL:-}"
|
|
ports:
|
|
- "3000:3000"
|
|
# Shared memory is required for Chrome.
|
|
shm_size: "2gb"
|
|
healthcheck:
|
|
test: ["CMD", "wget", "-qO-", "http://localhost:3000/json/version"]
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 5
|
|
|
|
# ─── Kokoro-FastAPI (TTS) ────────────────────────────────────────────────────
|
|
# CPU image; swap for ghcr.io/remsky/kokoro-fastapi-gpu:latest on NVIDIA hosts.
|
|
# Models are baked in — no volume mount required for the default voice set.
|
|
kokoro:
|
|
image: ghcr.io/remsky/kokoro-fastapi-cpu:latest
|
|
container_name: libnovel-kokoro
|
|
restart: unless-stopped
|
|
ports:
|
|
- "8880:8880"
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8880/health"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 5
|
|
|
|
# ─── Scraper ─────────────────────────────────────────────────────────────────
|
|
scraper:
|
|
build:
|
|
context: ./scraper
|
|
dockerfile: Dockerfile
|
|
container_name: libnovel-scraper
|
|
restart: unless-stopped
|
|
depends_on:
|
|
kokoro:
|
|
condition: service_healthy
|
|
environment:
|
|
BROWSERLESS_URL: "http://browserless:3000"
|
|
BROWSERLESS_TOKEN: "${BROWSERLESS_TOKEN:-}"
|
|
# content | scrape | cdp | direct — swap to test different strategies.
|
|
BROWSERLESS_STRATEGY: "${BROWSERLESS_STRATEGY:-direct}"
|
|
# Strategy for URL retrieval (chapter list). Default: content (browserless)
|
|
BROWSERLESS_URL_STRATEGY: "${BROWSERLESS_URL_STRATEGY:-content}"
|
|
# 0 → defaults to NumCPU inside the container.
|
|
SCRAPER_WORKERS: "${SCRAPER_WORKERS:-0}"
|
|
SCRAPER_STATIC_ROOT: "/app/static/books"
|
|
SCRAPER_HTTP_ADDR: ":8080"
|
|
LOG_LEVEL: "debug"
|
|
# Kokoro-FastAPI TTS endpoint.
|
|
KOKORO_URL: "${KOKORO_URL:-http://localhost:8880}"
|
|
KOKORO_VOICE: "${KOKORO_VOICE:-af_bella}"
|
|
ports:
|
|
- "8080:8080"
|
|
volumes:
|
|
# Mount the host static directory so scraped content is available outside
|
|
# the container. Create ./static/books on the host first if needed.
|
|
- "${STATIC_ROOT:-./static/books}:/app/static/books"
|
|
healthcheck:
|
|
test: ["CMD", "wget", "-qO-", "http://localhost:8080/health"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 3
|
|
|
|
volumes: {}
|