Files
libnovel/docker-compose.yml
Admin 7879a51fe3 feat: add Kokoro TTS, ranking page, direct HTTP strategy, and chapter-number fix
- Add Kokoro-FastAPI TTS integration to the chapter reader UI:
  - Browser-side MSE streaming with paragraph-level click-to-start
  - Voice selector, speed slider, auto-next with prefetch of the next chapter
  - New GET /ui/chapter-text endpoint that strips Markdown and serves plain text

- Add ranking page (novelfire /ranking scraper, WriteRanking/ReadRankingItems
  in writer, GET /ranking + POST /ranking/refresh + GET /ranking/view routes)
  with local-library annotation and one-click scrape buttons

- Add StrategyDirect (plain HTTP client) as a new browser strategy; the
  default strategy is now 'direct' for chapter fetching and 'content'
  for chapter-list URL retrieval (split via BROWSERLESS_URL_STRATEGY)

- Fix chapter numbering bug: numbers are now derived from the URL path
  (/chapter-N) rather than list position, correcting newest-first ordering

- Add 'refresh <slug>' CLI sub-command to re-scrape a book from its saved
  source_url without knowing the original URL

- Extend NovelScraper interface with RankingProvider (ScrapeRanking)

- Tune scraper timeouts: wait-for-selector reduced to 5 s, GotoOptions
  timeout set to 60 s, content/scrape client defaults raised to 90 s

- Add cover extraction fix (figure.cover > img rather than bare img.cover)

- Add AGENTS.md and .aiignore for AI tooling context

- Add integration tests for browser client and novelfire scraper (build
  tag: integration) and unit tests for chapterNumberFromURL and pagination
2026-03-01 12:25:16 +05:00

84 lines
3.3 KiB
YAML

version: "3.9"
services:
# ─── Browserless ────────────────────────────────────────────────────────────
browserless:
image: ghcr.io/browserless/chromium:latest
container_name: libnovel-browserless
restart: unless-stopped
environment:
# Set a token to lock down the endpoint; the scraper reads it via
# BROWSERLESS_TOKEN below.
TOKEN: "${BROWSERLESS_TOKEN:-}"
# Allow up to 10 concurrent browser sessions.
CONCURRENT: "${BROWSERLESS_CONCURRENT:-10}"
# Queue up to 100 requests before returning 429.
QUEUED: "${BROWSERLESS_QUEUED:-100}"
# Per-session timeout in ms.
TIMEOUT: "${BROWSERLESS_TIMEOUT:-60000}"
# Optional webhook URL for Browserless error alerts.
ERROR_ALERT_URL: "${ERROR_ALERT_URL:-}"
ports:
- "3000:3000"
# Shared memory is required for Chrome.
shm_size: "2gb"
healthcheck:
test: ["CMD", "wget", "-qO-", "http://localhost:3000/json/version"]
interval: 10s
timeout: 5s
retries: 5
# ─── Kokoro-FastAPI (TTS) ────────────────────────────────────────────────────
# CPU image; swap for ghcr.io/remsky/kokoro-fastapi-gpu:latest on NVIDIA hosts.
# Models are baked in — no volume mount required for the default voice set.
kokoro:
image: ghcr.io/remsky/kokoro-fastapi-cpu:latest
container_name: libnovel-kokoro
restart: unless-stopped
ports:
- "8880:8880"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8880/health"]
interval: 15s
timeout: 5s
retries: 5
# ─── Scraper ─────────────────────────────────────────────────────────────────
scraper:
build:
context: ./scraper
dockerfile: Dockerfile
container_name: libnovel-scraper
restart: unless-stopped
depends_on:
kokoro:
condition: service_healthy
environment:
BROWSERLESS_URL: "http://browserless:3000"
BROWSERLESS_TOKEN: "${BROWSERLESS_TOKEN:-}"
# content | scrape | cdp | direct — swap to test different strategies.
BROWSERLESS_STRATEGY: "${BROWSERLESS_STRATEGY:-direct}"
# Strategy for URL retrieval (chapter list). Default: content (browserless)
BROWSERLESS_URL_STRATEGY: "${BROWSERLESS_URL_STRATEGY:-content}"
# 0 → defaults to NumCPU inside the container.
SCRAPER_WORKERS: "${SCRAPER_WORKERS:-0}"
SCRAPER_STATIC_ROOT: "/app/static/books"
SCRAPER_HTTP_ADDR: ":8080"
LOG_LEVEL: "debug"
# Kokoro-FastAPI TTS endpoint.
KOKORO_URL: "${KOKORO_URL:-http://localhost:8880}"
KOKORO_VOICE: "${KOKORO_VOICE:-af_bella}"
ports:
- "8080:8080"
volumes:
# Mount the host static directory so scraped content is available outside
# the container. Create ./static/books on the host first if needed.
- "${STATIC_ROOT:-./static/books}:/app/static/books"
healthcheck:
test: ["CMD", "wget", "-qO-", "http://localhost:8080/health"]
interval: 15s
timeout: 5s
retries: 3
volumes: {}