libnovel/docker-compose.yml

version: "3.9"

services:
  # ─── Browserless ────────────────────────────────────────────────────────────
  browserless:
    image: ghcr.io/browserless/chromium:latest
    container_name: libnovel-browserless
    restart: unless-stopped
    environment:
      # Set a token to lock down the endpoint; the scraper reads it via
      # BROWSERLESS_TOKEN below.
      TOKEN: "${BROWSERLESS_TOKEN:-}"
      # Allow up to 10 concurrent browser sessions.
      CONCURRENT: "${BROWSERLESS_CONCURRENT:-10}"
      # Queue up to 100 requests before returning 429.
      QUEUED: "${BROWSERLESS_QUEUED:-100}"
      # Per-session timeout in ms.
      TIMEOUT: "${BROWSERLESS_TIMEOUT:-60000}"
      # Optional webhook URL for Browserless error alerts.
      ERROR_ALERT_URL: "${ERROR_ALERT_URL:-}"
    ports:
      - "3000:3000"
    # Shared memory is required for Chrome.
    shm_size: "2gb"
    healthcheck:
      test: ["CMD", "wget", "-qO-", "http://localhost:3000/json/version"]
      interval: 10s
      timeout: 5s
      retries: 5

  # ─── Kokoro-FastAPI (TTS) ────────────────────────────────────────────────────
  # CPU image; swap for ghcr.io/remsky/kokoro-fastapi-gpu:latest on NVIDIA hosts.
  # Models are baked in — no volume mount required for the default voice set.
  kokoro:
    image: ghcr.io/remsky/kokoro-fastapi-cpu:latest
    container_name: libnovel-kokoro
    restart: unless-stopped
    ports:
      - "8880:8880"
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8880/health"]
      interval: 15s
      timeout: 5s
      retries: 5

  # ─── Scraper ─────────────────────────────────────────────────────────────────
  scraper:
    build:
      context: ./scraper
      dockerfile: Dockerfile
    container_name: libnovel-scraper
    restart: unless-stopped
    depends_on:
      kokoro:
        condition: service_healthy
    environment:
      BROWSERLESS_URL: "http://browserless:3000"
      BROWSERLESS_TOKEN: "${BROWSERLESS_TOKEN:-}"
      # content | scrape | cdp | direct — swap to test different strategies.
      BROWSERLESS_STRATEGY: "${BROWSERLESS_STRATEGY:-direct}"
      # Strategy for URL retrieval (chapter list). Default: content (browserless)
      BROWSERLESS_URL_STRATEGY: "${BROWSERLESS_URL_STRATEGY:-content}"
      # 0 → defaults to NumCPU inside the container.
      SCRAPER_WORKERS: "${SCRAPER_WORKERS:-0}"
      SCRAPER_STATIC_ROOT: "/app/static/books"
      SCRAPER_HTTP_ADDR: ":8080"
      LOG_LEVEL: "debug"
      # Kokoro-FastAPI TTS endpoint.
      KOKORO_URL: "${KOKORO_URL:-http://localhost:8880}"
      KOKORO_VOICE: "${KOKORO_VOICE:-af_bella}"
    ports:
      - "8080:8080"
    volumes:
      # Mount the host static directory so scraped content is available outside
      # the container. Create ./static/books on the host first if needed.
      - "${STATIC_ROOT:-./static/books}:/app/static/books"
    healthcheck:
      test: ["CMD", "wget", "-qO-", "http://localhost:8080/health"]
      interval: 15s
      timeout: 5s
      retries: 3

volumes: {}