Scraped content is now stored in the 'static_books' Docker named volume instead of a host bind mount, removing the dependency on STATIC_ROOT and the need to pre-create ./static/books on the host.
83 lines
3.2 KiB
YAML
83 lines
3.2 KiB
YAML
version: "3.9"
|
|
|
|
services:
|
|
# ─── Browserless ────────────────────────────────────────────────────────────
|
|
browserless:
|
|
image: ghcr.io/browserless/chromium:latest
|
|
container_name: libnovel-browserless
|
|
restart: unless-stopped
|
|
environment:
|
|
# Set a token to lock down the endpoint; the scraper reads it via
|
|
# BROWSERLESS_TOKEN below.
|
|
TOKEN: "${BROWSERLESS_TOKEN:-}"
|
|
# Allow up to 10 concurrent browser sessions.
|
|
CONCURRENT: "${BROWSERLESS_CONCURRENT:-10}"
|
|
# Queue up to 100 requests before returning 429.
|
|
QUEUED: "${BROWSERLESS_QUEUED:-100}"
|
|
# Per-session timeout in ms.
|
|
TIMEOUT: "${BROWSERLESS_TIMEOUT:-60000}"
|
|
# Optional webhook URL for Browserless error alerts.
|
|
ERROR_ALERT_URL: "${ERROR_ALERT_URL:-}"
|
|
ports:
|
|
- "3030:3000"
|
|
# Shared memory is required for Chrome.
|
|
shm_size: "2gb"
|
|
healthcheck:
|
|
test: ["CMD", "wget", "-qO-", "http://localhost:3000/json/version"]
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 5
|
|
|
|
# ─── Kokoro-FastAPI (TTS) ────────────────────────────────────────────────────
|
|
# CPU image; swap for ghcr.io/remsky/kokoro-fastapi-gpu:latest on NVIDIA hosts.
|
|
# Models are baked in — no volume mount required for the default voice set.
|
|
kokoro:
|
|
image: ghcr.io/remsky/kokoro-fastapi-cpu:latest
|
|
container_name: libnovel-kokoro
|
|
restart: unless-stopped
|
|
ports:
|
|
- "8880:8880"
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8880/health"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 5
|
|
|
|
# ─── Scraper ─────────────────────────────────────────────────────────────────
|
|
scraper:
|
|
build:
|
|
context: ./scraper
|
|
dockerfile: Dockerfile
|
|
container_name: libnovel-scraper
|
|
restart: unless-stopped
|
|
depends_on:
|
|
kokoro:
|
|
condition: service_healthy
|
|
environment:
|
|
BROWSERLESS_URL: "http://browserless:3000"
|
|
BROWSERLESS_TOKEN: "${BROWSERLESS_TOKEN:-}"
|
|
# content | scrape | cdp | direct — swap to test different strategies.
|
|
BROWSERLESS_STRATEGY: "${BROWSERLESS_STRATEGY:-direct}"
|
|
# Strategy for URL retrieval (chapter list). Default: content (browserless)
|
|
BROWSERLESS_URL_STRATEGY: "${BROWSERLESS_URL_STRATEGY:-content}"
|
|
# 0 → defaults to NumCPU inside the container.
|
|
SCRAPER_WORKERS: "${SCRAPER_WORKERS:-0}"
|
|
SCRAPER_STATIC_ROOT: "/app/static/books"
|
|
SCRAPER_HTTP_ADDR: ":8080"
|
|
LOG_LEVEL: "debug"
|
|
# Kokoro-FastAPI TTS endpoint.
|
|
KOKORO_URL: "${KOKORO_URL:-http://localhost:8880}"
|
|
KOKORO_VOICE: "${KOKORO_VOICE:-af_bella}"
|
|
ports:
|
|
- "8080:8080"
|
|
volumes:
|
|
- static_books:/app/static/books
|
|
healthcheck:
|
|
test: ["CMD", "wget", "-qO-", "http://localhost:8080/health"]
|
|
interval: 15s
|
|
timeout: 5s
|
|
retries: 3
|
|
|
|
volumes:
|
|
static_books:
|