Compare commits
352 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
06d4a7bfd4 | ||
|
|
73a92ccf8f | ||
|
|
08361172c6 | ||
|
|
809dc8d898 | ||
|
|
e9c3426fbe | ||
|
|
8e611840d1 | ||
|
|
b9383570e3 | ||
|
|
eac9358c6f | ||
|
|
9cb11bc5e4 | ||
|
|
7196f8e930 | ||
|
|
a771405db8 | ||
|
|
1e9a96aa0f | ||
|
|
23ae1ed500 | ||
|
|
e7cb460f9b | ||
|
|
392248e8a6 | ||
|
|
68ea2d2808 | ||
|
|
7b1df9b592 | ||
|
|
f4089fe111 | ||
|
|
87b5ad1460 | ||
|
|
168cb52ed0 | ||
|
|
e1621a3ec2 | ||
|
|
10c7a48bc6 | ||
|
|
8b597c0bd2 | ||
|
|
28cafe2aa8 | ||
|
|
65f0425b61 | ||
|
|
4e70a2981d | ||
|
|
004cb95e56 | ||
|
|
aca649039c | ||
|
|
8d95411139 | ||
|
|
f9a4a0e416 | ||
|
|
a4d94f522a | ||
|
|
34c8fab358 | ||
|
|
d54769ab12 | ||
|
|
d2a4edba43 | ||
|
|
4e7f8c6266 | ||
|
|
b0a4cb8b3d | ||
|
|
f136ce6a60 | ||
|
|
3bd1112a63 | ||
|
|
278e292956 | ||
|
|
76de5eb491 | ||
|
|
c6597c8d19 | ||
|
|
e8d7108753 | ||
|
|
90dbecfa17 | ||
|
|
2deb306419 | ||
|
|
fd283bf6c6 | ||
|
|
3154a22500 | ||
|
|
61e0d98057 | ||
|
|
601c26d436 | ||
|
|
4a267d8fd8 | ||
|
|
c9478a67fb | ||
|
|
1b4835daeb | ||
|
|
c9c12fc4a8 | ||
|
|
dd35024d02 | ||
|
|
4b8104f087 | ||
|
|
5da880d189 | ||
|
|
98631df47a | ||
|
|
83b3dccc41 | ||
|
|
588e455aae | ||
|
|
28ac8d8826 | ||
|
|
0a3a61a3ef | ||
|
|
7a2a4fc755 | ||
|
|
801928aadf | ||
|
|
040072c3f5 | ||
|
|
6a76e97a67 | ||
|
|
71f79c8e02 | ||
|
|
5ee4a06654 | ||
|
|
63b286d0a4 | ||
|
|
d3f06c5c40 | ||
|
|
e71ddc2f8b | ||
|
|
b783dae5f4 | ||
|
|
dcf40197d4 | ||
|
|
9dae5e7cc0 | ||
|
|
908f5679fd | ||
|
|
f75292f531 | ||
|
|
2cf0528730 | ||
|
|
428b57732e | ||
|
|
61e77e3e28 | ||
|
|
b363c151a5 | ||
|
|
aef9e04419 | ||
|
|
58e78cd34d | ||
|
|
c5c167035d | ||
|
|
4a00d953bb | ||
|
|
fe1a933fd0 | ||
|
|
98e4a87432 | ||
|
|
9c8849c6cd | ||
|
|
b30aa23d64 | ||
|
|
fea09e3e23 | ||
|
|
4831c74acc | ||
|
|
7e5e0495cf | ||
|
|
188685e1b6 | ||
|
|
3271a5f3e6 | ||
|
|
ee3ed29316 | ||
|
|
a39f660a37 | ||
|
|
69818089a6 | ||
|
|
09062b8c82 | ||
|
|
d518710cc4 | ||
|
|
e2c15f5931 | ||
|
|
a50b968b95 | ||
|
|
023b1f7fec | ||
|
|
7e99fc6d70 | ||
|
|
12d6d30fb0 | ||
|
|
f9c14685b3 | ||
|
|
4a7009989c | ||
|
|
920ac0d41b | ||
|
|
424f2c5e16 | ||
|
|
8a0f5b6cde | ||
|
|
5fea8f67d0 | ||
|
|
6592d1662c | ||
|
|
59e8cdb19a | ||
|
|
1118392811 | ||
|
|
baa403efa2 | ||
|
|
0ed1112b20 | ||
|
|
16a12ede4d | ||
|
|
b9b69cee44 | ||
|
|
5b27d501af | ||
|
|
a85636d5db | ||
|
|
29d0eeb7e8 | ||
|
|
fabe9724c2 | ||
|
|
4c9bb4adde | ||
|
|
22b6ee824e | ||
|
|
3918bc8dc3 | ||
|
|
5825b859b7 | ||
|
|
1642434a79 | ||
|
|
02705dc6ed | ||
|
|
7413313100 | ||
|
|
b11f4ab6b4 | ||
|
|
3e4b1c0484 | ||
|
|
b5bc6ff3de | ||
|
|
8d4bba7964 | ||
|
|
2e5fe54615 | ||
|
|
81265510ef | ||
|
|
4d3c093612 | ||
|
|
937ba052fc | ||
|
|
479d201da9 | ||
|
|
1242cc7eb3 | ||
|
|
0b6dbeb042 | ||
|
|
c06877069f | ||
|
|
261c738fc0 | ||
|
|
5528abe4b0 | ||
|
|
09cdda2a07 | ||
|
|
718bfa6691 | ||
|
|
e11e866e27 | ||
|
|
23345e22e6 | ||
|
|
c7b3495a23 | ||
|
|
83a5910a59 | ||
|
|
0f6639aae7 | ||
|
|
88a25bc33e | ||
|
|
73ad4ece49 | ||
|
|
52f876d8e8 | ||
|
|
72eed89f59 | ||
|
|
12bb0db5f0 | ||
|
|
5ec1773768 | ||
|
|
fb8f1dfe25 | ||
|
|
3a2d113b1b | ||
|
|
0dcfdff65b | ||
|
|
1766011b47 | ||
|
|
a6f800b0d7 | ||
|
|
af9639af05 | ||
|
|
bfc08a2df2 | ||
|
|
dc3bc3ebf2 | ||
|
|
e9d7293d37 | ||
|
|
410af8f236 | ||
|
|
264c00c765 | ||
|
|
e4c72011eb | ||
|
|
6365b14ece | ||
|
|
7da5582075 | ||
|
|
dae841e317 | ||
|
|
16b2bfffa6 | ||
|
|
57be674f44 | ||
|
|
93390fab64 | ||
|
|
072517135f | ||
|
|
fe7c7acbb7 | ||
|
|
d4cce915d9 | ||
|
|
ac24e86f7d | ||
|
|
e9bb387f71 | ||
|
|
d7319b3f7c | ||
|
|
f380c85815 | ||
|
|
9d1b340b83 | ||
|
|
a307ddc9f5 | ||
|
|
004d1b6d9d | ||
|
|
7f20411f50 | ||
|
|
6e6c581904 | ||
|
|
cecedc8687 | ||
|
|
a88e98a436 | ||
|
|
d3ae86d55b | ||
|
|
5ad5c2dbce | ||
|
|
0de91dcc0c | ||
|
|
8e3e9ef31d | ||
|
|
3c5edd5742 | ||
|
|
2142e82fe4 | ||
|
|
88cde88f69 | ||
|
|
ffcc3981f2 | ||
|
|
a7b4694e60 | ||
|
|
8c895c6ba1 | ||
|
|
83059c8a9d | ||
|
|
b54ebf60b5 | ||
|
|
e027afe89d | ||
|
|
9fc2054e36 | ||
|
|
9a43b2190e | ||
|
|
5a7d7ce3b9 | ||
|
|
ce3eef1298 | ||
|
|
5d9b41bcf2 | ||
|
|
47268dea67 | ||
|
|
57591766f2 | ||
|
|
fa8fb96631 | ||
|
|
5ba84f7945 | ||
|
|
2793ad8cfa | ||
|
|
e43699747d | ||
|
|
1e85f1c0bc | ||
|
|
0c2349f259 | ||
|
|
c9252b5953 | ||
|
|
7efeee3fc2 | ||
|
|
9a05708019 | ||
|
|
24cb18e0fe | ||
|
|
71ba882858 | ||
|
|
c35f099f50 | ||
|
|
4df287ace4 | ||
|
|
0df45de2b6 | ||
|
|
825fb04c0d | ||
|
|
fc5cd30c93 | ||
|
|
37bd73651a | ||
|
|
466e289b68 | ||
|
|
bb604019fc | ||
|
|
0745178d9e | ||
|
|
603cd2bb02 | ||
|
|
228d4902bb | ||
|
|
884c82b2c3 | ||
|
|
c6536d5b9f | ||
|
|
460e7553bf | ||
|
|
89f0dfb113 | ||
|
|
88644341d8 | ||
|
|
992eb823f2 | ||
|
|
f51113a2f8 | ||
|
|
1eb70e9b9b | ||
|
|
70dd14e5c8 | ||
|
|
8096827c78 | ||
|
|
669fd765ee | ||
|
|
314af375d5 | ||
|
|
20c45e2676 | ||
|
|
09981a5f4d | ||
|
|
de9e0b4246 | ||
|
|
a72c1f6b52 | ||
|
|
5d3a1a09ef | ||
|
|
39ad0d6c11 | ||
|
|
765b37aea3 | ||
|
|
aff6de9b45 | ||
|
|
ec66e86a18 | ||
|
|
9b7cdad71a | ||
|
|
8f0a2f7e92 | ||
|
|
08d4718245 | ||
|
|
60a9540ef7 | ||
|
|
76d616a308 | ||
|
|
e723459507 | ||
|
|
b3358ac1d2 | ||
|
|
c0d33720e9 | ||
|
|
a5c603e7a6 | ||
|
|
219d4fb214 | ||
|
|
cec0dfe64a | ||
|
|
54616b82d7 | ||
|
|
ce5db37226 | ||
|
|
60bc8e5749 | ||
|
|
b4be0803aa | ||
|
|
12eca865ce | ||
|
|
589f39b49e | ||
|
|
53083429a0 | ||
|
|
70c8db28f9 | ||
|
|
1d00fd4e2e | ||
|
|
a54d8d43aa | ||
|
|
97e7a8dc02 | ||
|
|
fb6b364382 | ||
|
|
7b48707cd9 | ||
|
|
b0547c1b43 | ||
|
|
acbfafb8cd | ||
|
|
c8e0cf2813 | ||
|
|
3899a96576 | ||
|
|
1e7f396b2d | ||
|
|
0eee2eedf3 | ||
|
|
80da1bb3e2 | ||
|
|
9f3e895fa8 | ||
|
|
cf0c0dfaaf | ||
|
|
0402c408e4 | ||
|
|
d14644238f | ||
|
|
8de374cd35 | ||
|
|
82186cfd6d | ||
|
|
b87e758303 | ||
|
|
901b18ee13 | ||
|
|
034e670795 | ||
|
|
0d7b985469 | ||
|
|
53af7515a3 | ||
|
|
11a846d043 | ||
|
|
bf2ffa54db | ||
|
|
fe204598a2 | ||
|
|
9906c7d862 | ||
|
|
06feb91f4f | ||
|
|
5a7751e6d1 | ||
|
|
555973c053 | ||
|
|
c2d6ce1c5b | ||
|
|
8edad54b10 | ||
|
|
48d8fdb6b9 | ||
|
|
1b05b6ebc6 | ||
|
|
cabdd3ffdd | ||
|
|
f80b83309a | ||
|
|
49ba2c27c2 | ||
|
|
353d7397eb | ||
|
|
89ff90629f | ||
|
|
f6febfdb5e | ||
|
|
2c43907e34 | ||
|
|
0e868506ca | ||
|
|
1b234754e8 | ||
|
|
041099598b | ||
|
|
333c8ad868 | ||
|
|
d16ae00537 | ||
|
|
d16313bb6c | ||
|
|
1bab7028c6 | ||
|
|
6520fb9a50 | ||
|
|
7acf04fb9f | ||
|
|
c2bcb2b0a6 | ||
|
|
cfd893d24b | ||
|
|
cff0c78b4f | ||
|
|
d89cefe975 | ||
|
|
a0344b36d7 | ||
|
|
af3c487afb | ||
|
|
b8d4d94b18 | ||
|
|
56bf4dde22 | ||
|
|
2f0857be45 | ||
|
|
bf5774d8d0 | ||
|
|
5131ae0bc4 | ||
|
|
9fa0776258 | ||
|
|
f265d9d020 | ||
|
|
3c26dfe2c0 | ||
|
|
1820fa7303 | ||
|
|
38e400a4c7 | ||
|
|
cb90771248 | ||
|
|
59b1cfab1d | ||
|
|
f95ad3ed29 | ||
|
|
e4c4f8de66 | ||
|
|
4f84bd29c9 | ||
|
|
6bf79ab392 | ||
|
|
4ae6f0ab42 | ||
|
|
33e2a4dc01 | ||
|
|
cb4be0848f | ||
|
|
2f948f2a50 | ||
|
|
baab66823d | ||
|
|
11d2eaa0e5 | ||
|
|
9c115f00c4 | ||
|
|
5ac89da513 | ||
|
|
af86c6f96f | ||
|
|
da4a182f85 | ||
|
|
18e76c9668 | ||
|
|
9add9033b9 | ||
|
|
66d8481637 | ||
|
|
7f92a58fd7 |
41
.env.example
41
.env.example
@@ -1,41 +0,0 @@
|
||||
# libnovel scraper — environment overrides
|
||||
# Copy to .env and adjust values; do NOT commit this file with real secrets.
|
||||
|
||||
# Browserless API token (leave empty to disable auth)
|
||||
BROWSERLESS_TOKEN=
|
||||
|
||||
# Number of concurrent browser sessions in Browserless
|
||||
BROWSERLESS_CONCURRENT=10
|
||||
|
||||
# Queue depth before Browserless returns 429
|
||||
BROWSERLESS_QUEUED=100
|
||||
|
||||
# Per-session timeout in ms
|
||||
BROWSERLESS_TIMEOUT=60000
|
||||
|
||||
# Optional webhook URL for Browserless error alerts (leave empty to disable)
|
||||
ERROR_ALERT_URL=
|
||||
|
||||
# Which Browserless strategy the scraper uses: content | scrape | cdp | direct
|
||||
BROWSERLESS_STRATEGY=direct
|
||||
|
||||
# Strategy for URL retrieval (chapter list). Uses browserless content strategy by default.
|
||||
# Set to direct to use plain HTTP, or content/scrape/cdp for browserless.
|
||||
BROWSERLESS_URL_STRATEGY=content
|
||||
|
||||
# Chapter worker goroutines (0 = NumCPU inside the container)
|
||||
SCRAPER_WORKERS=0
|
||||
|
||||
# Host path to mount as the static output directory
|
||||
STATIC_ROOT=./static/books
|
||||
|
||||
# ── Kokoro-FastAPI TTS ────────────────────────────────────────────────────────
|
||||
# Base URL for the Kokoro-FastAPI service. When running via docker-compose the
|
||||
# default (http://kokoro:8880) is wired in automatically; override here only if
|
||||
# you are pointing at an external or GPU instance.
|
||||
KOKORO_URL=http://kokoro:8880
|
||||
|
||||
# Default voice used for chapter narration.
|
||||
# Single voices: af_bella, af_sky, af_heart, am_adam, …
|
||||
# Mixed voices: af_bella+af_sky or af_bella(2)+af_sky(1) (weighted blend)
|
||||
KOKORO_VOICE=af_bella
|
||||
@@ -2,105 +2,69 @@ name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ["main", "master"]
|
||||
tags-ignore:
|
||||
- "v*"
|
||||
paths:
|
||||
- "scraper/**"
|
||||
- ".gitea/workflows/**"
|
||||
pull_request:
|
||||
branches: ["main", "master"]
|
||||
paths:
|
||||
- "scraper/**"
|
||||
- ".gitea/workflows/**"
|
||||
- "backend/**"
|
||||
- "ui/**"
|
||||
- ".gitea/workflows/ci.yaml"
|
||||
|
||||
defaults:
|
||||
run:
|
||||
working-directory: scraper
|
||||
concurrency:
|
||||
group: ${{ gitea.workflow }}-${{ gitea.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# ── lint & vet ───────────────────────────────────────────────────────────────
|
||||
lint:
|
||||
name: Lint
|
||||
# ── Go: vet + build + test ────────────────────────────────────────────────
|
||||
backend:
|
||||
name: Backend
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: backend
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: scraper/go.mod
|
||||
cache-dependency-path: scraper/go.sum
|
||||
go-version-file: backend/go.mod
|
||||
cache-dependency-path: backend/go.sum
|
||||
|
||||
- name: go vet
|
||||
run: go vet ./...
|
||||
|
||||
- name: staticcheck
|
||||
run: |
|
||||
go install honnef.co/go/tools/cmd/staticcheck@latest
|
||||
staticcheck ./...
|
||||
- name: Build backend
|
||||
run: go build -o /dev/null ./cmd/backend
|
||||
|
||||
# ── tests ────────────────────────────────────────────────────────────────────
|
||||
test:
|
||||
name: Test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build runner
|
||||
run: go build -o /dev/null ./cmd/runner
|
||||
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: scraper/go.mod
|
||||
cache-dependency-path: scraper/go.sum
|
||||
- name: Build healthcheck
|
||||
run: go build -o /dev/null ./cmd/healthcheck
|
||||
|
||||
- name: Run tests
|
||||
run: go test -race -count=1 -timeout=60s ./...
|
||||
run: go test -short -race -count=1 -timeout=60s ./...
|
||||
|
||||
# ── build binary ─────────────────────────────────────────────────────────────
|
||||
build:
|
||||
name: Build
|
||||
# ── UI: type-check + build ────────────────────────────────────────────────
|
||||
ui:
|
||||
name: UI
|
||||
runs-on: ubuntu-latest
|
||||
needs: [lint, test]
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ui
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-go@v5
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
go-version-file: scraper/go.mod
|
||||
cache-dependency-path: scraper/go.sum
|
||||
node-version: "22"
|
||||
cache: npm
|
||||
cache-dependency-path: ui/package-lock.json
|
||||
|
||||
- name: Build binary
|
||||
run: |
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
|
||||
go build -ldflags="-s -w" -o bin/scraper ./cmd/scraper
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Upload binary artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: scraper-linux-amd64
|
||||
path: scraper/bin/scraper
|
||||
retention-days: 7
|
||||
- name: Type check
|
||||
run: npm run check
|
||||
|
||||
# ── docker build (& push) ────────────────────────────────────────────────────
|
||||
# Uncomment once the runner has Docker available and a registry is configured.
|
||||
#
|
||||
# docker:
|
||||
# name: Docker
|
||||
# runs-on: ubuntu-latest
|
||||
# needs: [lint, test]
|
||||
# # Only push images on commits to the default branch, not on PRs.
|
||||
# # if: github.event_name == 'push'
|
||||
# steps:
|
||||
# - uses: actions/checkout@v4
|
||||
#
|
||||
# - name: Log in to Gitea registry
|
||||
# uses: docker/login-action@v3
|
||||
# with:
|
||||
# registry: gitea.kalekber.cc
|
||||
# username: ${{ secrets.REGISTRY_USER }}
|
||||
# password: ${{ secrets.REGISTRY_TOKEN }}
|
||||
#
|
||||
# - name: Build and push
|
||||
# uses: docker/build-push-action@v5
|
||||
# with:
|
||||
# context: ./scraper
|
||||
# push: true
|
||||
# tags: |
|
||||
# gitea.kalekber.cc/kamil/libnovel:latest
|
||||
# gitea.kalekber.cc/kamil/libnovel:${{ gitea.sha }}
|
||||
- name: Build
|
||||
run: npm run build
|
||||
|
||||
285
.gitea/workflows/release.yaml
Normal file
285
.gitea/workflows/release.yaml
Normal file
@@ -0,0 +1,285 @@
|
||||
name: Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v*" # e.g. v1.0.0, v1.2.3
|
||||
|
||||
concurrency:
|
||||
group: ${{ gitea.workflow }}-${{ gitea.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# ── backend: vet & test ───────────────────────────────────────────────────────
|
||||
test-backend:
|
||||
name: Test backend
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: backend/go.mod
|
||||
cache-dependency-path: backend/go.sum
|
||||
|
||||
- name: go vet
|
||||
working-directory: backend
|
||||
run: go vet ./...
|
||||
|
||||
- name: Run tests
|
||||
working-directory: backend
|
||||
run: go test -short -race -count=1 -timeout=60s ./...
|
||||
|
||||
# ── ui: type-check & build ────────────────────────────────────────────────────
|
||||
check-ui:
|
||||
name: Check ui
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ui
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: "22"
|
||||
cache: npm
|
||||
cache-dependency-path: ui/package-lock.json
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Type check
|
||||
run: npm run check
|
||||
|
||||
- name: Build
|
||||
run: npm run build
|
||||
|
||||
# ── docker: backend ───────────────────────────────────────────────────────────
|
||||
docker-backend:
|
||||
name: Docker / backend
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-backend]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USER }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ secrets.DOCKER_USER }}/libnovel-backend
|
||||
tags: |
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=raw,value=latest
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: backend
|
||||
target: backend
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
build-args: |
|
||||
VERSION=${{ steps.meta.outputs.version }}
|
||||
COMMIT=${{ gitea.sha }}
|
||||
cache-from: type=registry,ref=${{ secrets.DOCKER_USER }}/libnovel-backend:latest
|
||||
cache-to: type=inline
|
||||
|
||||
# ── docker: runner ────────────────────────────────────────────────────────────
|
||||
docker-runner:
|
||||
name: Docker / runner
|
||||
runs-on: ubuntu-latest
|
||||
needs: [test-backend]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USER }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ secrets.DOCKER_USER }}/libnovel-runner
|
||||
tags: |
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=raw,value=latest
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: backend
|
||||
target: runner
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
build-args: |
|
||||
VERSION=${{ steps.meta.outputs.version }}
|
||||
COMMIT=${{ gitea.sha }}
|
||||
cache-from: type=registry,ref=${{ secrets.DOCKER_USER }}/libnovel-runner:latest
|
||||
cache-to: type=inline
|
||||
|
||||
# ── ui: source map upload ─────────────────────────────────────────────────────
|
||||
# Commented out: GlitchTip project/auth token needs to be recreated after
|
||||
# the GlitchTip DB wipe. Re-enable once GLITCHTIP_AUTH_TOKEN is updated.
|
||||
# upload-sourcemaps:
|
||||
# name: Upload source maps
|
||||
# runs-on: ubuntu-latest
|
||||
# needs: [check-ui]
|
||||
# defaults:
|
||||
# run:
|
||||
# working-directory: ui
|
||||
# steps:
|
||||
# - uses: actions/checkout@v4
|
||||
#
|
||||
# - uses: actions/setup-node@v4
|
||||
# with:
|
||||
# node-version: "22"
|
||||
# cache: npm
|
||||
# cache-dependency-path: ui/package-lock.json
|
||||
#
|
||||
# - name: Install dependencies
|
||||
# run: npm ci
|
||||
#
|
||||
# - name: Build with source maps
|
||||
# run: npm run build
|
||||
#
|
||||
# - name: Download glitchtip-cli
|
||||
# run: |
|
||||
# curl -L "https://gitlab.com/glitchtip/glitchtip-cli/-/jobs/artifacts/v0.1.0/raw/artifacts/glitchtip-cli-linux-x86_64?job=build-linux-x86_64" \
|
||||
# -o /usr/local/bin/glitchtip-cli
|
||||
# chmod +x /usr/local/bin/glitchtip-cli
|
||||
#
|
||||
# - name: Inject debug IDs into build artifacts
|
||||
# run: glitchtip-cli sourcemaps inject ./build
|
||||
# env:
|
||||
# SENTRY_URL: https://errors.libnovel.cc/
|
||||
# SENTRY_AUTH_TOKEN: ${{ secrets.GLITCHTIP_AUTH_TOKEN }}
|
||||
# SENTRY_ORG: libnovel
|
||||
# SENTRY_PROJECT: libnovel-ui
|
||||
#
|
||||
# - name: Upload source maps to GlitchTip
|
||||
# run: glitchtip-cli sourcemaps upload ./build --release ${{ gitea.ref_name }}
|
||||
# env:
|
||||
# SENTRY_URL: https://errors.libnovel.cc/
|
||||
# SENTRY_AUTH_TOKEN: ${{ secrets.GLITCHTIP_AUTH_TOKEN }}
|
||||
# SENTRY_ORG: libnovel
|
||||
# SENTRY_PROJECT: libnovel-ui
|
||||
|
||||
# ── docker: ui ────────────────────────────────────────────────────────────────
|
||||
docker-ui:
|
||||
name: Docker / ui
|
||||
runs-on: ubuntu-latest
|
||||
needs: [check-ui]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Fetch releases from Gitea API
|
||||
run: |
|
||||
set -euo pipefail
|
||||
RESPONSE=$(curl -sfL \
|
||||
-H "Accept: application/json" \
|
||||
"http://gitea.kalekber.cc/api/v1/repos/kamil/libnovel/releases?limit=50&page=1")
|
||||
# Validate JSON before writing — fails hard if response is not a JSON array
|
||||
COUNT=$(echo "$RESPONSE" | jq 'if type == "array" then length else error("expected array, got \(type)") end')
|
||||
echo "$RESPONSE" > ui/static/releases.json
|
||||
echo "Fetched $COUNT releases"
|
||||
|
||||
- uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USER }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ secrets.DOCKER_USER }}/libnovel-ui
|
||||
tags: |
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=raw,value=latest
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: ui
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
build-args: |
|
||||
BUILD_VERSION=${{ steps.meta.outputs.version }}
|
||||
BUILD_COMMIT=${{ gitea.sha }}
|
||||
BUILD_TIME=${{ gitea.event.head_commit.timestamp }}
|
||||
cache-from: type=registry,ref=${{ secrets.DOCKER_USER }}/libnovel-ui:latest
|
||||
cache-to: type=inline
|
||||
|
||||
# ── docker: caddy ─────────────────────────────────────────────────────────────
|
||||
docker-caddy:
|
||||
name: Docker / caddy
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USER }}
|
||||
password: ${{ secrets.DOCKER_TOKEN }}
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ secrets.DOCKER_USER }}/libnovel-caddy
|
||||
tags: |
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=raw,value=latest
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: caddy
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=registry,ref=${{ secrets.DOCKER_USER }}/libnovel-caddy:latest
|
||||
cache-to: type=inline
|
||||
|
||||
# ── Gitea release ─────────────────────────────────────────────────────────────
|
||||
release:
|
||||
name: Gitea Release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [docker-backend, docker-runner, docker-ui, docker-caddy]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Create release
|
||||
uses: https://gitea.com/actions/gitea-release-action@v1
|
||||
with:
|
||||
token: ${{ secrets.GITEA_TOKEN }}
|
||||
generate_release_notes: true
|
||||
13
.gitignore
vendored
13
.gitignore
vendored
@@ -5,15 +5,18 @@
|
||||
/dist/
|
||||
|
||||
# ── Compiled binaries ──────────────────────────────────────────────────────────
|
||||
scraper/bin/
|
||||
backend/bin/
|
||||
backend/backend
|
||||
backend/runner
|
||||
|
||||
# ── Scraped output (large, machine-generated) ──────────────────────────────────
|
||||
|
||||
/static/books
|
||||
# ── Environment & secrets ──────────────────────────────────────────────────────
|
||||
# Secrets are managed by Doppler — never commit .env files.
|
||||
.env
|
||||
.env.*
|
||||
!.env.example
|
||||
.env.local
|
||||
|
||||
# ── CrowdSec — generated bouncer API key ──────────────────────────────────────
|
||||
crowdsec/.crowdsec.env
|
||||
|
||||
# ── OS artefacts ───────────────────────────────────────────────────────────────
|
||||
.DS_Store
|
||||
|
||||
156
.opencode/skills/ios-ux/SKILL.md
Normal file
156
.opencode/skills/ios-ux/SKILL.md
Normal file
@@ -0,0 +1,156 @@
|
||||
---
|
||||
name: ios-ux
|
||||
description: iOS/SwiftUI UI & UX review and implementation guidelines for LibNovel. Enforces Apple HIG, iOS 17+ APIs, spring animations, haptics, accessibility, performance, and offline handling. Load this skill for any iOS view work.
|
||||
compatibility: opencode
|
||||
---
|
||||
|
||||
# iOS UI/UX Skill — LibNovel
|
||||
|
||||
Load this skill whenever working on SwiftUI views in `ios/`. It defines design standards, review process for screenshots, and implementation rules.
|
||||
|
||||
---
|
||||
|
||||
## Screenshot Review Process
|
||||
|
||||
When the user provides a screenshot of the app:
|
||||
|
||||
1. **Analyze first** — identify specific UI/UX issues across these categories:
|
||||
- Visual hierarchy and spacing
|
||||
- Typography (size, weight, contrast)
|
||||
- Color and material usage
|
||||
- Animation and interactivity gaps
|
||||
- Accessibility problems
|
||||
- Deprecated or non-native patterns
|
||||
2. **Present a numbered list** of suggested improvements with brief rationale for each.
|
||||
3. **Ask for confirmation** before writing any code: "Should I apply all of these, or only specific ones?"
|
||||
4. Apply only what the user confirms.
|
||||
|
||||
---
|
||||
|
||||
## Design System
|
||||
|
||||
### Colors & Materials
|
||||
- **Accent**: `Color.amber` (project-defined). Use for active state, selection indicators, progress fills, and CTAs.
|
||||
- **Backgrounds**: Prefer `.regularMaterial`, `.ultraThinMaterial`, or `.thinMaterial` over hard-coded `Color.black.opacity(x)` or `Color(.systemBackground)`.
|
||||
- **Dark overlays** (e.g. full-screen players): Use `KFImage` blurred background + `Color.black.opacity(0.5–0.6)` overlay. Never use a flat solid black background.
|
||||
- **Semantic colors**: Use `.primary`, `.secondary`, `.tertiary` foreground styles. Avoid hard-coded `Color.white` except on dark material contexts (full-screen player).
|
||||
- **No hardcoded color literals** — use `Color+App.swift` extensions or system semantic colors.
|
||||
|
||||
### Typography
|
||||
- Use the SF Pro system font via `.font(.title)`, `.font(.body)`, etc. — never hardcode font names except for intentional stylistic accents (e.g. "Snell Roundhand" for voice watermark).
|
||||
- Apply `.fontWeight()` and `.fontDesign()` modifiers rather than custom font families.
|
||||
- Support Dynamic Type — never hardcode a fixed font size as the sole option without a `.minimumScaleFactor` or system font size modifier.
|
||||
- Hierarchy: title3.bold for primary labels, subheadline for secondary, caption/caption2 for metadata.
|
||||
|
||||
### Spacing & Layout
|
||||
- Minimum touch target: **44×44 pt**. Use `.frame(minWidth: 44, minHeight: 44)` or `.contentShape(Rectangle())` on small icons.
|
||||
- Prefer 16–20 pt horizontal padding on full-width containers; 12 pt for compact inner elements.
|
||||
- Use `VStack(spacing:)` and `HStack(spacing:)` explicitly — never rely on default spacing for production UI.
|
||||
- Corner radii: 12–14 pt for cards/chips, 10 pt for small badges, 20–24 pt for large cover art.
|
||||
|
||||
---
|
||||
|
||||
## Animation Rules
|
||||
|
||||
### Spring Animations (default for all interactive transitions)
|
||||
- Use `.spring(response:dampingFraction:)` for state-driven layout changes, selection feedback, and appear/disappear transitions.
|
||||
- Recommended defaults:
|
||||
- Interactive elements: `response: 0.3, dampingFraction: 0.7`
|
||||
- Entrance animations: `response: 0.45–0.5, dampingFraction: 0.7`
|
||||
- Quick snappy feedback: `response: 0.2, dampingFraction: 0.6`
|
||||
- Reserve `.easeInOut` only for non-interactive, ambient animations (e.g. opacity pulses, generating overlays).
|
||||
|
||||
### SF Symbol Transitions
|
||||
- Always use `contentTransition(.symbolEffect(.replace.downUp))` when a symbol name changes based on state (play/pause, checkmark/circle, etc.).
|
||||
- Use `.symbolEffect(.variableColor.cumulative)` for continuous animations (waveform, loading indicators).
|
||||
- Use `.symbolEffect(.bounce)` for one-shot entrance emphasis (e.g. completion checkmark appearing).
|
||||
- Use `.symbolEffect(.pulse)` for error/warning states that need attention.
|
||||
|
||||
### Repeating Animations
|
||||
- Use `phaseAnimator` for any looping animation that previously used manual `@State` + `withAnimation` chains.
|
||||
- Do not use `Timer` publishers for UI animation — prefer `phaseAnimator` or `TimelineView`.
|
||||
|
||||
---
|
||||
|
||||
## Haptic Feedback
|
||||
|
||||
Add `UIImpactFeedbackGenerator` to every user-initiated interactive control:
|
||||
- `.light` — toggle switches, selection chips, secondary actions, slider drag start.
|
||||
- `.medium` — primary transport buttons (play/pause, chapter skip), significant confirmations.
|
||||
- `.heavy` — destructive actions (only if no confirmation dialog).
|
||||
|
||||
Pattern:
|
||||
```swift
|
||||
Button {
|
||||
UIImpactFeedbackGenerator(style: .light).impactOccurred()
|
||||
// action
|
||||
} label: { ... }
|
||||
```
|
||||
|
||||
Do **not** add haptics to:
|
||||
- Programmatic state changes not directly triggered by a tap.
|
||||
- Buttons inside `List` rows that already use swipe actions.
|
||||
- Scroll events.
|
||||
|
||||
---
|
||||
|
||||
## iOS 17+ API Usage
|
||||
|
||||
Flag and replace any of the following deprecated patterns:
|
||||
|
||||
| Deprecated | Replace with |
|
||||
|---|---|
|
||||
| `NavigationView` | `NavigationStack` |
|
||||
| `@StateObject` / `ObservableObject` (new types only) | `@Observable` macro |
|
||||
| `DispatchQueue.main.async` | `await MainActor.run` or `@MainActor` |
|
||||
| Manual `@State` animation chains for repeating loops | `phaseAnimator` |
|
||||
| `.animation(_:)` without `value:` | `.animation(_:value:)` |
|
||||
| `AnyView` wrapping for conditional content | `@ViewBuilder` + `Group` |
|
||||
|
||||
Do **not** refactor existing `ObservableObject` types to `@Observable` unless explicitly asked — only apply `@Observable` to new types.
|
||||
|
||||
---
|
||||
|
||||
## Accessibility
|
||||
|
||||
Every view must:
|
||||
- Support VoiceOver: add `.accessibilityLabel()` to icon-only buttons and image views.
|
||||
- Support Dynamic Type: test that text doesn't truncate at xxxLarge without a layout adjustment.
|
||||
- Meet contrast ratio: text on tinted backgrounds must be legible — avoid `.opacity(0.25)` or lower for any user-readable text.
|
||||
- Touch targets ≥ 44pt (see Spacing above).
|
||||
- Interactive controls must have `.accessibilityAddTraits(.isButton)` if not using `Button`.
|
||||
- Do not rely solely on color to convey state — pair color with icon or label.
|
||||
|
||||
---
|
||||
|
||||
## Performance
|
||||
|
||||
- **Isolate high-frequency observers**: Any view that observes a `PlaybackProgress` (timer-tick updates) must be a separate sub-view that `@ObservedObject`-observes only the progress object — not the parent view. This prevents the entire parent from re-rendering every 0.5 seconds.
|
||||
- **Avoid `id()` overuse**: Only use `.id()` to force view recreation when necessary (e.g. background image on track change). Prefer `onChange(of:)` for side effects.
|
||||
- **Lazy containers**: Use `LazyVStack` / `LazyHStack` inside `ScrollView` for lists of 20+ items. `List` is inherently lazy and does not need this.
|
||||
- **Image loading**: Always use `KFImage` (Kingfisher) with `.placeholder` for remote images. Never use `AsyncImage` for cover art — it has no disk cache.
|
||||
- **Avoid `AnyView`**: It breaks structural identity and hurts diffing. Use `@ViewBuilder` or `Group { }` instead.
|
||||
|
||||
---
|
||||
|
||||
## Offline & Error States
|
||||
|
||||
Every view that makes network calls must:
|
||||
1. Wrap the body in a `VStack` with `OfflineBanner` at the top, gated on `networkMonitor.isConnected`.
|
||||
2. Suppress network errors silently when offline via `ErrorAlertModifier` — do not show an alert when the device is offline.
|
||||
3. Gate `.task` / `.onAppear` network calls: `guard networkMonitor.isConnected else { return }`.
|
||||
4. Show a non-blocking inline empty state (not a full-screen error) for failed loads when online.
|
||||
|
||||
---
|
||||
|
||||
## Component Checklist (before submitting any view change)
|
||||
|
||||
- [ ] All interactive elements ≥ 44pt touch target
|
||||
- [ ] SF Symbol state changes use `contentTransition(.symbolEffect(...))`
|
||||
- [ ] State-driven layout transitions use `.spring(response:dampingFraction:)`
|
||||
- [ ] Tappable controls have haptic feedback
|
||||
- [ ] No `NavigationView`, no `DispatchQueue.main.async`, no `.animation(_:)` without `value:`
|
||||
- [ ] High-frequency observers are isolated sub-views
|
||||
- [ ] Offline state handled with `OfflineBanner` + `NetworkMonitor`
|
||||
- [ ] VoiceOver labels on icon-only buttons
|
||||
- [ ] No hardcoded `Color.black` / `Color.white` / `Color(.systemBackground)` where a material applies
|
||||
89
AGENTS.md
89
AGENTS.md
@@ -1,89 +0,0 @@
|
||||
# libnovel Project
|
||||
|
||||
Go web scraper for novelfire.net with TTS support via Kokoro-FastAPI.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
scraper/
|
||||
├── cmd/scraper/main.go # Entry point: 'run' (one-shot) and 'serve' (HTTP server)
|
||||
├── internal/
|
||||
│ ├── orchestrator/orchestrator.go # Coordinates catalogue walk, metadata extraction, chapter scraping
|
||||
│ ├── browser/ # Browser client (content/scrape/cdp strategies) via Browserless
|
||||
│ ├── novelfire/scraper.go # novelfire.net specific scraping logic
|
||||
│ ├── server/server.go # HTTP API (POST /scrape, POST /scrape/book)
|
||||
│ ├── writer/writer.go # File writer (metadata.yaml, chapter .md files)
|
||||
│ └── scraper/interfaces.go # NovelScraper interface definition
|
||||
└── static/books/ # Output directory for scraped content
|
||||
```
|
||||
|
||||
## Key Concepts
|
||||
|
||||
- **Orchestrator**: Manages concurrency - catalogue streaming → per-book metadata goroutines → chapter worker pool
|
||||
- **Browser Client**: 3 strategies (content/scrape/cdp) via Browserless Chrome container
|
||||
- **Writer**: Writes metadata.yaml and chapter markdown files to `static/books/{slug}/vol-0/1-50/`
|
||||
- **Server**: HTTP API with async scrape jobs, UI for browsing books/chapters, chapter-text endpoint for TTS
|
||||
|
||||
## Commands
|
||||
|
||||
```bash
|
||||
# Build
|
||||
cd scraper && go build -o bin/scraper ./cmd/scraper
|
||||
|
||||
# One-shot scrape (full catalogue)
|
||||
./bin/scraper run
|
||||
|
||||
# Single book
|
||||
./bin/scraper run --url https://novelfire.net/book/xxx
|
||||
|
||||
# HTTP server
|
||||
./bin/scraper serve
|
||||
|
||||
# Tests
|
||||
cd scraper && go test ./...
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Description | Default |
|
||||
|----------|-------------|---------|
|
||||
| BROWSERLESS_URL | Browserless Chrome endpoint | http://localhost:3030 |
|
||||
| BROWSERLESS_STRATEGY | content \| scrape \| cdp | content |
|
||||
| SCRAPER_WORKERS | Chapter goroutines | NumCPU |
|
||||
| SCRAPER_STATIC_ROOT | Output directory | ./static/books |
|
||||
| SCRAPER_HTTP_ADDR | HTTP listen address | :8080 |
|
||||
| KOKORO_URL | Kokoro TTS endpoint | http://localhost:8880 |
|
||||
| KOKORO_VOICE | Default TTS voice | af_bella |
|
||||
| LOG_LEVEL | debug \| info \| warn \| error | info |
|
||||
|
||||
## Docker
|
||||
|
||||
```bash
|
||||
docker-compose up -d # Starts browserless, kokoro, scraper
|
||||
```
|
||||
|
||||
## Code Patterns
|
||||
|
||||
- Uses `log/slog` for structured logging
|
||||
- Context-based cancellation throughout
|
||||
- Worker pool pattern in orchestrator (channel + goroutines)
|
||||
- Mutex for single async job (409 on concurrent scrape requests)
|
||||
|
||||
## AI Context Tips
|
||||
|
||||
- Primary files to modify: `orchestrator.go`, `server.go`, `scraper.go`, `browser/*.go`
|
||||
- To add new source: implement `NovelScraper` interface from `internal/scraper/interfaces.go`
|
||||
- Skip `static/` directory - generated content, not source
|
||||
|
||||
## Speed Up AI Sessions (Optional)
|
||||
|
||||
For faster AI context loading, use **Context7** (free, local indexing):
|
||||
|
||||
```bash
|
||||
# Install and index once
|
||||
npx @context7/cli@latest index --path . --ignore .aiignore
|
||||
|
||||
# After first run, AI tools will query the index instead of re-scanning files
|
||||
```
|
||||
|
||||
VSCode extension: https://marketplace.visualstudio.com/items?itemName=context7.context7
|
||||
291
Caddyfile
Normal file
291
Caddyfile
Normal file
@@ -0,0 +1,291 @@
|
||||
# v3/Caddyfile
|
||||
#
|
||||
# Caddy reverse proxy for LibNovel v3.
|
||||
# Custom build includes github.com/mholt/caddy-ratelimit.
|
||||
#
|
||||
# Environment variables consumed (set in docker-compose.yml):
|
||||
# DOMAIN — public hostname, e.g. libnovel.example.com
|
||||
# Use "localhost" for local dev (no TLS cert attempted).
|
||||
# CADDY_ACME_EMAIL — Let's Encrypt notification email (empty = no email)
|
||||
#
|
||||
# Routing rules (main domain):
|
||||
# /health → backend:8080 (liveness probe)
|
||||
# /scrape* → backend:8080 (Go admin scrape endpoints)
|
||||
# /api/book-preview/* → backend:8080 (live scrape, no store write)
|
||||
# /api/chapter-text/* → backend:8080 (chapter markdown from MinIO)
|
||||
# /api/chapter-markdown/* → backend:8080 (chapter markdown from MinIO)
|
||||
# /api/reindex/* → backend:8080 (rebuild chapter index)
|
||||
# /api/cover/* → backend:8080 (proxy cover image)
|
||||
# /api/audio-proxy/* → backend:8080 (proxy generated audio)
|
||||
# /avatars/* → minio:9000 (presigned avatar GETs)
|
||||
# /audio/* → minio:9000 (presigned audio GETs)
|
||||
# /chapters/* → minio:9000 (presigned chapter GETs)
|
||||
# /* (everything else) → ui:3000 (SvelteKit — handles all
|
||||
# remaining /api/* routes)
|
||||
#
|
||||
# Subdomain routing:
|
||||
# feedback.libnovel.cc → fider:3000 (user feedback / feature requests)
|
||||
# errors.libnovel.cc → glitchtip-web:8000 (error tracking)
|
||||
# analytics.libnovel.cc → umami:3000 (page analytics)
|
||||
# logs.libnovel.cc → dozzle:8080 (Docker log viewer)
|
||||
# uptime.libnovel.cc → uptime-kuma:3001 (uptime monitoring)
|
||||
# push.libnovel.cc → gotify:80 (push notifications)
|
||||
# search.libnovel.cc → meilisearch:7700 (search index — homelab runner)
|
||||
#
|
||||
# Routes intentionally removed from direct-to-backend:
|
||||
# /api/scrape/* — SvelteKit has /api/scrape/ counterparts
|
||||
# that enforce auth; routing directly would
|
||||
# bypass SK middleware.
|
||||
# /api/chapter-text-preview/* — Same: SvelteKit owns
|
||||
# /api/chapter-text-preview/[slug]/[n].
|
||||
# /api/browse — Endpoint removed; browse snapshot system
|
||||
# was deleted.
|
||||
{
|
||||
# Email for Let's Encrypt ACME account registration.
|
||||
# When CADDY_ACME_EMAIL is set this expands to e.g. "email you@example.com".
|
||||
# When unset the variable expands to an empty string and Caddy ignores it.
|
||||
email {$CADDY_ACME_EMAIL:}
|
||||
|
||||
# CrowdSec bouncer — streams decisions from the CrowdSec LAPI every 15s.
|
||||
# CROWDSEC_API_KEY is injected at runtime via crowdsec/.crowdsec.env.
|
||||
# The default "disabled" placeholder makes the bouncer fail-open (warn,
|
||||
# pass traffic) when no key is configured — Caddy still starts cleanly.
|
||||
crowdsec {
|
||||
api_url http://crowdsec:8080
|
||||
api_key {$CROWDSEC_API_KEY:disabled}
|
||||
ticker_interval 15s
|
||||
}
|
||||
|
||||
# ── Redis TCP proxy via layer4 ────────────────────────────────────────────
|
||||
# Exposes prod Redis over TLS for Asynq job enqueueing from the homelab runner.
|
||||
# Listens on :6380 (all interfaces). TLS is terminated here using the cert
|
||||
# for redis.libnovel.cc; traffic is proxied to the local Redis sidecar.
|
||||
# Requires the caddy-l4 module in the custom Caddy build.
|
||||
layer4 {
|
||||
:6380 {
|
||||
route {
|
||||
tls {
|
||||
connection_policy {
|
||||
match {
|
||||
sni redis.libnovel.cc
|
||||
}
|
||||
}
|
||||
}
|
||||
proxy {
|
||||
upstream redis:6379
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(security_headers) {
|
||||
header {
|
||||
# Prevent clickjacking
|
||||
X-Frame-Options "SAMEORIGIN"
|
||||
# Prevent MIME-type sniffing
|
||||
X-Content-Type-Options "nosniff"
|
||||
# Minimal referrer info for cross-origin requests
|
||||
Referrer-Policy "strict-origin-when-cross-origin"
|
||||
# Restrict powerful browser features
|
||||
Permissions-Policy "camera=(), microphone=(), geolocation=(), payment=()"
|
||||
# Enforce HTTPS for 1 year (includeSubDomains)
|
||||
Strict-Transport-Security "max-age=31536000; includeSubDomains"
|
||||
# Enable XSS filter in older browsers
|
||||
X-XSS-Protection "1; mode=block"
|
||||
# Remove server identity header
|
||||
-Server
|
||||
}
|
||||
}
|
||||
|
||||
{$DOMAIN:localhost} {
|
||||
import security_headers
|
||||
|
||||
# ── CrowdSec bouncer ──────────────────────────────────────────────────────
|
||||
# Checks every incoming request against CrowdSec decisions.
|
||||
# Banned IPs receive a 403; all others pass through unchanged.
|
||||
route {
|
||||
crowdsec
|
||||
}
|
||||
|
||||
# ── Rate limiting ─────────────────────────────────────────────────────────
|
||||
# Auth endpoints: strict — 10 req/min per IP
|
||||
rate_limit {
|
||||
zone auth_zone {
|
||||
match {
|
||||
path /api/auth/login /api/auth/register /api/auth/change-password
|
||||
}
|
||||
key {remote_host}
|
||||
window 1m
|
||||
events 10
|
||||
}
|
||||
}
|
||||
|
||||
# Admin scrape endpoints: moderate — 20 req/min per IP
|
||||
rate_limit {
|
||||
zone scrape_zone {
|
||||
match {
|
||||
path /scrape*
|
||||
}
|
||||
key {remote_host}
|
||||
window 1m
|
||||
events 20
|
||||
}
|
||||
}
|
||||
|
||||
# Global: 300 req/min per IP (covers everything)
|
||||
rate_limit {
|
||||
zone global_zone {
|
||||
key {remote_host}
|
||||
window 1m
|
||||
events 300
|
||||
}
|
||||
}
|
||||
|
||||
# ── Liveness probe ────────────────────────────────────────────────────────
|
||||
handle /health {
|
||||
reverse_proxy backend:8080
|
||||
}
|
||||
|
||||
# ── Scrape task creation (Go backend only) ────────────────────────────────
|
||||
handle /scrape* {
|
||||
reverse_proxy backend:8080
|
||||
}
|
||||
|
||||
# ── Backend-only API paths ────────────────────────────────────────────────
|
||||
# These paths are served exclusively by the Go backend and have no
|
||||
# SvelteKit counterpart. Routing them here skips SK intentionally.
|
||||
handle /api/book-preview/* {
|
||||
reverse_proxy backend:8080
|
||||
}
|
||||
handle /api/chapter-text/* {
|
||||
reverse_proxy backend:8080
|
||||
}
|
||||
handle /api/chapter-markdown/* {
|
||||
reverse_proxy backend:8080
|
||||
}
|
||||
handle /api/reindex/* {
|
||||
reverse_proxy backend:8080
|
||||
}
|
||||
handle /api/cover/* {
|
||||
reverse_proxy backend:8080
|
||||
}
|
||||
handle /api/audio-proxy/* {
|
||||
reverse_proxy backend:8080
|
||||
}
|
||||
|
||||
# ── MinIO bucket paths (presigned URLs) ──────────────────────────────────
|
||||
# MinIO path-style presigned URLs include the bucket name as the first
|
||||
# path segment. MINIO_PUBLIC_ENDPOINT points here, so Caddy must proxy
|
||||
# these paths directly to MinIO — no auth layer needed (the presigned
|
||||
# signature itself enforces access and expiry).
|
||||
handle /avatars/* {
|
||||
reverse_proxy minio:9000
|
||||
}
|
||||
handle /audio/* {
|
||||
reverse_proxy minio:9000
|
||||
}
|
||||
handle /chapters/* {
|
||||
reverse_proxy minio:9000
|
||||
}
|
||||
|
||||
# ── SvelteKit UI (catch-all — includes all remaining /api/* routes) ───────
|
||||
handle {
|
||||
reverse_proxy ui:3000 {
|
||||
# Active health check: Caddy polls /health every 5 s and marks the
|
||||
# upstream down immediately when it fails. Combined with
|
||||
# lb_try_duration this means Watchtower container replacements
|
||||
# show the maintenance page within a few seconds instead of
|
||||
# hanging or returning a raw connection error to the browser.
|
||||
health_uri /health
|
||||
health_interval 5s
|
||||
health_timeout 2s
|
||||
health_status 200
|
||||
|
||||
# If the upstream is down, fail fast (don't retry for longer than
|
||||
# 3 s) and let Caddy's handle_errors 502/503 take over.
|
||||
lb_try_duration 3s
|
||||
}
|
||||
}
|
||||
|
||||
# ── Caddy-level error pages ───────────────────────────────────────────────
|
||||
# These fire when the upstream (backend or ui) is completely unreachable.
|
||||
# SvelteKit's own +error.svelte handles application-level errors (404, 500).
|
||||
handle_errors 404 {
|
||||
root * /srv/errors
|
||||
rewrite * /404.html
|
||||
file_server
|
||||
}
|
||||
handle_errors 500 {
|
||||
root * /srv/errors
|
||||
rewrite * /500.html
|
||||
file_server
|
||||
}
|
||||
handle_errors 502 {
|
||||
root * /srv/errors
|
||||
rewrite * /502.html
|
||||
file_server
|
||||
}
|
||||
handle_errors 503 {
|
||||
root * /srv/errors
|
||||
rewrite * /503.html
|
||||
file_server
|
||||
}
|
||||
handle_errors 504 {
|
||||
root * /srv/errors
|
||||
rewrite * /504.html
|
||||
file_server
|
||||
}
|
||||
|
||||
# ── Logging ───────────────────────────────────────────────────────────────
|
||||
# JSON log file read by CrowdSec for threat detection.
|
||||
log {
|
||||
output file /var/log/caddy/access.log {
|
||||
roll_size 100MiB
|
||||
roll_keep 5
|
||||
roll_keep_for 720h
|
||||
}
|
||||
format json
|
||||
}
|
||||
}
|
||||
|
||||
# ── Tooling subdomains ────────────────────────────────────────────────────────
|
||||
# feedback.libnovel.cc, errors.libnovel.cc, analytics.libnovel.cc,
|
||||
# logs.libnovel.cc, uptime.libnovel.cc, push.libnovel.cc, grafana.libnovel.cc
|
||||
# are now routed via Cloudflare Tunnel directly to the homelab (192.168.0.109).
|
||||
# No Caddy rules needed here — Cloudflare handles TLS termination and routing.
|
||||
|
||||
# ── PocketBase: exposed for homelab runner task polling ───────────────────────
|
||||
# Allows the homelab runner to claim tasks and write results via the PB API.
|
||||
# Admin UI is also accessible here for convenience.
|
||||
pb.libnovel.cc {
|
||||
import security_headers
|
||||
reverse_proxy pocketbase:8090
|
||||
}
|
||||
|
||||
# ── MinIO S3 API: exposed for homelab runner object writes ────────────────────
|
||||
# The homelab runner connects here as MINIO_ENDPOINT to PutObject audio/chapters.
|
||||
# Also used as MINIO_PUBLIC_ENDPOINT for presigned URL generation.
|
||||
storage.libnovel.cc {
|
||||
import security_headers
|
||||
reverse_proxy minio:9000
|
||||
}
|
||||
|
||||
# ── Meilisearch: exposed for homelab runner search indexing ──────────────────
|
||||
# The homelab runner connects here as MEILI_URL to index books after scraping.
|
||||
# Protected by MEILI_MASTER_KEY bearer token — Meilisearch enforces auth on
|
||||
# every request; Caddy just terminates TLS.
|
||||
search.libnovel.cc {
|
||||
import security_headers
|
||||
reverse_proxy meilisearch:7700
|
||||
}
|
||||
|
||||
# ── Redis TLS cert anchor ─────────────────────────────────────────────────────
|
||||
# This virtual host exists solely so Caddy obtains and caches a TLS certificate
|
||||
# for redis.libnovel.cc. The layer4 block above uses that cert to terminate TLS
|
||||
# on :6380 (Asynq job-queue channel from prod → homelab Redis).
|
||||
# The HTTP route itself just returns 404 — no real traffic expected here.
|
||||
redis.libnovel.cc {
|
||||
respond 404
|
||||
}
|
||||
}
|
||||
38
README.md
Normal file
38
README.md
Normal file
@@ -0,0 +1,38 @@
|
||||
# LibNovel
|
||||
|
||||
Self-hosted audiobook platform. Go backend + SvelteKit UI + MinIO/PocketBase/Meilisearch.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Docker + Docker Compose
|
||||
- [just](https://github.com/casey/just)
|
||||
- [Doppler CLI](https://docs.doppler.com/docs/install-cli)
|
||||
|
||||
## Setup
|
||||
|
||||
```sh
|
||||
doppler login
|
||||
doppler setup # project=libnovel, config=prd
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```sh
|
||||
just up # start everything
|
||||
just down # stop
|
||||
just logs # tail all logs
|
||||
just log backend # tail one service
|
||||
just build # rebuild images
|
||||
just restart # down + up
|
||||
just secrets # view/edit secrets
|
||||
```
|
||||
|
||||
## Secrets
|
||||
|
||||
Managed via Doppler (`project=libnovel`, `config=prd`). No `.env` files.
|
||||
|
||||
To add or update a secret:
|
||||
|
||||
```sh
|
||||
doppler secrets set MY_SECRET=value
|
||||
```
|
||||
13
backend/.dockerignore
Normal file
13
backend/.dockerignore
Normal file
@@ -0,0 +1,13 @@
|
||||
# Exclude compiled binaries
|
||||
bin/
|
||||
|
||||
# Exclude test binaries produced by `go test -c`
|
||||
*.test
|
||||
|
||||
# Git history is not needed inside the image
|
||||
.git/
|
||||
|
||||
# Editor/OS noise
|
||||
.DS_Store
|
||||
*.swp
|
||||
*.swo
|
||||
52
backend/Dockerfile
Normal file
52
backend/Dockerfile
Normal file
@@ -0,0 +1,52 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
FROM golang:1.26.1-alpine AS builder
|
||||
WORKDIR /app
|
||||
|
||||
# Download modules into the BuildKit cache so they survive across builds.
|
||||
# This layer is only invalidated when go.mod or go.sum changes.
|
||||
COPY go.mod go.sum ./
|
||||
RUN --mount=type=cache,target=/root/go/pkg/mod \
|
||||
go mod download
|
||||
|
||||
COPY . .
|
||||
|
||||
ARG VERSION=dev
|
||||
ARG COMMIT=unknown
|
||||
|
||||
# Build all three binaries in a single layer so the Go compiler can reuse
|
||||
# intermediate object files. Both cache mounts are preserved between builds:
|
||||
# /root/go/pkg/mod — downloaded module source
|
||||
# /root/.cache/go-build — compiled package objects (incremental recompile)
|
||||
RUN --mount=type=cache,target=/root/go/pkg/mod \
|
||||
--mount=type=cache,target=/root/.cache/go-build \
|
||||
CGO_ENABLED=0 GOOS=linux go build \
|
||||
-ldflags="-s -w -X main.version=${VERSION} -X main.commit=${COMMIT}" \
|
||||
-o /out/backend ./cmd/backend && \
|
||||
CGO_ENABLED=0 GOOS=linux go build \
|
||||
-ldflags="-s -w -X main.version=${VERSION} -X main.commit=${COMMIT}" \
|
||||
-o /out/runner ./cmd/runner && \
|
||||
CGO_ENABLED=0 GOOS=linux go build \
|
||||
-ldflags="-s -w" \
|
||||
-o /out/healthcheck ./cmd/healthcheck
|
||||
|
||||
# ── backend service ──────────────────────────────────────────────────────────
|
||||
# Uses Alpine (not distroless) so ffmpeg is available for on-demand voice
|
||||
# sample generation via pocket-tts (WAV→MP3 transcoding).
|
||||
FROM alpine:3.21 AS backend
|
||||
RUN apk add --no-cache ffmpeg ca-certificates && \
|
||||
addgroup -S appgroup && adduser -S appuser -G appgroup
|
||||
COPY --from=builder /out/healthcheck /healthcheck
|
||||
COPY --from=builder /out/backend /backend
|
||||
USER appuser
|
||||
ENTRYPOINT ["/backend"]
|
||||
|
||||
# ── runner service ───────────────────────────────────────────────────────────
|
||||
# Uses Alpine (not distroless) so ffmpeg is available for WAV→MP3 transcoding
|
||||
# when pocket-tts voices are used.
|
||||
FROM alpine:3.21 AS runner
|
||||
RUN apk add --no-cache ffmpeg ca-certificates && \
|
||||
addgroup -S appgroup && adduser -S appuser -G appgroup
|
||||
COPY --from=builder /out/healthcheck /healthcheck
|
||||
COPY --from=builder /out/runner /runner
|
||||
USER appuser
|
||||
ENTRYPOINT ["/runner"]
|
||||
222
backend/cmd/backend/main.go
Normal file
222
backend/cmd/backend/main.go
Normal file
@@ -0,0 +1,222 @@
|
||||
// Command backend is the LibNovel HTTP API server.
|
||||
//
|
||||
// It exposes all endpoints consumed by the SvelteKit UI: book/chapter reads,
|
||||
// scrape-task creation, presigned MinIO URLs, audio-task creation, reading
|
||||
// progress, live novelfire.net search, and Kokoro voice list.
|
||||
//
|
||||
// All heavy lifting (scraping, TTS generation) is delegated to the runner
|
||||
// binary via PocketBase task records. The backend never scrapes directly.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// backend # start HTTP server (blocks until SIGINT/SIGTERM)
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/getsentry/sentry-go"
|
||||
"github.com/hibiken/asynq"
|
||||
"github.com/libnovel/backend/internal/asynqqueue"
|
||||
"github.com/libnovel/backend/internal/backend"
|
||||
"github.com/libnovel/backend/internal/config"
|
||||
"github.com/libnovel/backend/internal/kokoro"
|
||||
"github.com/libnovel/backend/internal/meili"
|
||||
"github.com/libnovel/backend/internal/otelsetup"
|
||||
"github.com/libnovel/backend/internal/pockettts"
|
||||
"github.com/libnovel/backend/internal/storage"
|
||||
"github.com/libnovel/backend/internal/taskqueue"
|
||||
)
|
||||
|
||||
// version and commit are set at build time via -ldflags.
|
||||
var (
|
||||
version = "dev"
|
||||
commit = "unknown"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if err := run(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "backend: fatal: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func run() error {
|
||||
cfg := config.Load()
|
||||
|
||||
// ── Sentry / GlitchTip error tracking ────────────────────────────────────
|
||||
if dsn := os.Getenv("GLITCHTIP_DSN"); dsn != "" {
|
||||
if err := sentry.Init(sentry.ClientOptions{
|
||||
Dsn: dsn,
|
||||
Release: version + "@" + commit,
|
||||
TracesSampleRate: 0.1,
|
||||
}); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "backend: sentry init warning: %v\n", err)
|
||||
} else {
|
||||
defer sentry.Flush(2 * time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Logger ───────────────────────────────────────────────────────────────
|
||||
log := buildLogger(cfg.LogLevel)
|
||||
log.Info("backend starting",
|
||||
"version", version,
|
||||
"commit", commit,
|
||||
"addr", cfg.HTTP.Addr,
|
||||
)
|
||||
|
||||
// ── Context: cancel on SIGINT / SIGTERM ──────────────────────────────────
|
||||
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
|
||||
defer stop()
|
||||
|
||||
// ── OpenTelemetry tracing + logs ──────────────────────────────────────────
|
||||
otelShutdown, otelLog, err := otelsetup.Init(ctx, version)
|
||||
if err != nil {
|
||||
return fmt.Errorf("init otel: %w", err)
|
||||
}
|
||||
if otelShutdown != nil {
|
||||
defer otelShutdown()
|
||||
// Replace the plain slog logger with the OTel-bridged one so all
|
||||
// structured log lines are forwarded to Loki with trace IDs attached.
|
||||
log = otelLog
|
||||
log.Info("otel tracing + logs enabled", "endpoint", os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"))
|
||||
}
|
||||
|
||||
// ── Storage ──────────────────────────────────────────────────────────────
|
||||
store, err := storage.NewStore(ctx, cfg, log)
|
||||
if err != nil {
|
||||
return fmt.Errorf("init storage: %w", err)
|
||||
}
|
||||
|
||||
// ── Kokoro (voice list only; audio generation is done by the runner) ─────
|
||||
var kokoroClient kokoro.Client
|
||||
if cfg.Kokoro.URL != "" {
|
||||
kokoroClient = kokoro.New(cfg.Kokoro.URL)
|
||||
log.Info("kokoro voices enabled", "url", cfg.Kokoro.URL)
|
||||
} else {
|
||||
log.Info("KOKORO_URL not set — voice list will use built-in fallback")
|
||||
kokoroClient = &noopKokoro{}
|
||||
}
|
||||
|
||||
// ── Pocket-TTS (voice list + sample generation; audio generation is the runner's job) ──
|
||||
var pocketTTSClient pockettts.Client
|
||||
if cfg.PocketTTS.URL != "" {
|
||||
pocketTTSClient = pockettts.New(cfg.PocketTTS.URL)
|
||||
log.Info("pocket-tts voices enabled", "url", cfg.PocketTTS.URL)
|
||||
} else {
|
||||
log.Info("POCKET_TTS_URL not set — pocket-tts voices unavailable in backend")
|
||||
}
|
||||
|
||||
// ── Meilisearch (search reads only; indexing is the runner's job) ────────
|
||||
var searchIndex meili.Client
|
||||
if cfg.Meilisearch.URL != "" {
|
||||
searchIndex = meili.New(cfg.Meilisearch.URL, cfg.Meilisearch.APIKey)
|
||||
log.Info("meilisearch search enabled", "url", cfg.Meilisearch.URL)
|
||||
} else {
|
||||
log.Info("MEILI_URL not set — search will use PocketBase substring fallback")
|
||||
searchIndex = meili.NoopClient{}
|
||||
}
|
||||
|
||||
// ── Task Producer ────────────────────────────────────────────────────────
|
||||
// When REDIS_ADDR is set the backend dual-writes: PocketBase record (audit)
|
||||
// + Asynq job (immediate delivery). Otherwise it writes to PocketBase only
|
||||
// and the runner picks up on the next poll tick.
|
||||
var producer taskqueue.Producer = store
|
||||
if cfg.Redis.Addr != "" {
|
||||
redisOpt, parseErr := parseRedisOpt(cfg.Redis)
|
||||
if parseErr != nil {
|
||||
return fmt.Errorf("parse REDIS_ADDR: %w", parseErr)
|
||||
}
|
||||
asynqProducer := asynqqueue.NewProducer(store, redisOpt, log)
|
||||
defer asynqProducer.Close() //nolint:errcheck
|
||||
producer = asynqProducer
|
||||
log.Info("backend: asynq task dispatch enabled", "addr", cfg.Redis.Addr)
|
||||
} else {
|
||||
log.Info("backend: poll-mode task dispatch (REDIS_ADDR not set)")
|
||||
}
|
||||
|
||||
// ── Backend server ───────────────────────────────────────────────────────
|
||||
srv := backend.New(
|
||||
backend.Config{
|
||||
Addr: cfg.HTTP.Addr,
|
||||
DefaultVoice: cfg.Kokoro.DefaultVoice,
|
||||
Version: version,
|
||||
Commit: commit,
|
||||
},
|
||||
backend.Dependencies{
|
||||
BookReader: store,
|
||||
RankingStore: store,
|
||||
AudioStore: store,
|
||||
TranslationStore: store,
|
||||
PresignStore: store,
|
||||
ProgressStore: store,
|
||||
CoverStore: store,
|
||||
Producer: producer,
|
||||
TaskReader: store,
|
||||
SearchIndex: searchIndex,
|
||||
Kokoro: kokoroClient,
|
||||
PocketTTS: pocketTTSClient,
|
||||
Log: log,
|
||||
},
|
||||
)
|
||||
|
||||
return srv.ListenAndServe(ctx)
|
||||
}
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
func buildLogger(level string) *slog.Logger {
|
||||
var lvl slog.Level
|
||||
switch level {
|
||||
case "debug":
|
||||
lvl = slog.LevelDebug
|
||||
case "warn":
|
||||
lvl = slog.LevelWarn
|
||||
case "error":
|
||||
lvl = slog.LevelError
|
||||
default:
|
||||
lvl = slog.LevelInfo
|
||||
}
|
||||
return slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{Level: lvl}))
|
||||
}
|
||||
|
||||
// noopKokoro is a no-op implementation used when KOKORO_URL is not set.
|
||||
// The backend only uses Kokoro for the voice list; audio generation is the
|
||||
// runner's responsibility. With no URL the built-in fallback list is served.
|
||||
type noopKokoro struct{}
|
||||
|
||||
func (n *noopKokoro) GenerateAudio(_ context.Context, _, _ string) ([]byte, error) {
|
||||
return nil, fmt.Errorf("kokoro not configured (KOKORO_URL is empty)")
|
||||
}
|
||||
|
||||
func (n *noopKokoro) StreamAudioMP3(_ context.Context, _, _ string) (io.ReadCloser, error) {
|
||||
return nil, fmt.Errorf("kokoro not configured (KOKORO_URL is empty)")
|
||||
}
|
||||
|
||||
func (n *noopKokoro) StreamAudioWAV(_ context.Context, _, _ string) (io.ReadCloser, error) {
|
||||
return nil, fmt.Errorf("kokoro not configured (KOKORO_URL is empty)")
|
||||
}
|
||||
|
||||
func (n *noopKokoro) ListVoices(_ context.Context) ([]string, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// parseRedisOpt converts a config.Redis into an asynq.RedisConnOpt.
|
||||
// Handles full "redis://" / "rediss://" URLs and plain "host:port".
|
||||
func parseRedisOpt(cfg config.Redis) (asynq.RedisConnOpt, error) {
|
||||
addr := cfg.Addr
|
||||
if len(addr) > 7 && (addr[:8] == "redis://" || (len(addr) > 8 && addr[:9] == "rediss://")) {
|
||||
return asynq.ParseRedisURI(addr)
|
||||
}
|
||||
return asynq.RedisClientOpt{
|
||||
Addr: addr,
|
||||
Password: cfg.Password,
|
||||
}, nil
|
||||
}
|
||||
57
backend/cmd/backend/main_test.go
Normal file
57
backend/cmd/backend/main_test.go
Normal file
@@ -0,0 +1,57 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestBuildLogger verifies that buildLogger returns a non-nil logger for each
|
||||
// supported log level string and for unknown values.
|
||||
func TestBuildLogger(t *testing.T) {
|
||||
for _, level := range []string{"debug", "info", "warn", "error", "unknown", ""} {
|
||||
l := buildLogger(level)
|
||||
if l == nil {
|
||||
t.Errorf("buildLogger(%q) returned nil", level)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestNoopKokoro verifies that the no-op Kokoro stub returns the expected
|
||||
// sentinel error from GenerateAudio and nil, nil from ListVoices.
|
||||
func TestNoopKokoro(t *testing.T) {
|
||||
noop := &noopKokoro{}
|
||||
|
||||
_, err := noop.GenerateAudio(t.Context(), "text", "af_bella")
|
||||
if err == nil {
|
||||
t.Fatal("noopKokoro.GenerateAudio: expected error, got nil")
|
||||
}
|
||||
|
||||
voices, err := noop.ListVoices(t.Context())
|
||||
if err != nil {
|
||||
t.Fatalf("noopKokoro.ListVoices: unexpected error: %v", err)
|
||||
}
|
||||
if voices != nil {
|
||||
t.Fatalf("noopKokoro.ListVoices: expected nil slice, got %v", voices)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRunStorageUnreachable verifies that run() fails fast and returns a
|
||||
// descriptive error when PocketBase is unreachable.
|
||||
func TestRunStorageUnreachable(t *testing.T) {
|
||||
// Point at an address nothing is listening on.
|
||||
t.Setenv("POCKETBASE_URL", "http://127.0.0.1:19999")
|
||||
// Use a fast listen address so we don't accidentally start a real server.
|
||||
t.Setenv("BACKEND_HTTP_ADDR", "127.0.0.1:0")
|
||||
|
||||
err := run()
|
||||
if err == nil {
|
||||
t.Fatal("run() should have returned an error when storage is unreachable")
|
||||
}
|
||||
|
||||
t.Logf("got expected error: %v", err)
|
||||
}
|
||||
|
||||
// TestMain runs the test suite. No special setup required.
|
||||
func TestMain(m *testing.M) {
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
89
backend/cmd/healthcheck/main.go
Normal file
89
backend/cmd/healthcheck/main.go
Normal file
@@ -0,0 +1,89 @@
|
||||
// healthcheck is a static binary used by Docker HEALTHCHECK CMD in distroless
|
||||
// images (which have no shell, wget, or curl).
|
||||
//
|
||||
// Two modes:
|
||||
//
|
||||
// 1. HTTP mode (default):
|
||||
// /healthcheck <url>
|
||||
// Performs GET <url>; exits 0 if HTTP 2xx/3xx, 1 otherwise.
|
||||
// Example: /healthcheck http://localhost:8080/health
|
||||
//
|
||||
// 2. File-liveness mode:
|
||||
// /healthcheck file <path> <max_age_seconds>
|
||||
// Reads <path>, parses its content as RFC3339 timestamp, and exits 1 if the
|
||||
// timestamp is older than <max_age_seconds>. Used by the runner service which
|
||||
// writes /tmp/runner.alive on every successful poll.
|
||||
// Example: /healthcheck file /tmp/runner.alive 120
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if len(os.Args) > 1 && os.Args[1] == "file" {
|
||||
checkFile()
|
||||
return
|
||||
}
|
||||
checkHTTP()
|
||||
}
|
||||
|
||||
// checkHTTP performs a GET request and exits 0 on success, 1 on failure.
|
||||
func checkHTTP() {
|
||||
url := "http://localhost:8080/health"
|
||||
if len(os.Args) > 1 {
|
||||
url = os.Args[1]
|
||||
}
|
||||
resp, err := http.Get(url) //nolint:gosec,noctx
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "healthcheck: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
resp.Body.Close()
|
||||
if resp.StatusCode >= 400 {
|
||||
fmt.Fprintf(os.Stderr, "healthcheck: status %d\n", resp.StatusCode)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// checkFile reads a timestamp from a file and exits 1 if it is older than the
|
||||
// given max age. Usage: /healthcheck file <path> <max_age_seconds>
|
||||
func checkFile() {
|
||||
if len(os.Args) < 4 {
|
||||
fmt.Fprintln(os.Stderr, "healthcheck file: usage: /healthcheck file <path> <max_age_seconds>")
|
||||
os.Exit(1)
|
||||
}
|
||||
path := os.Args[2]
|
||||
maxAgeSec, err := strconv.ParseInt(os.Args[3], 10, 64)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "healthcheck file: invalid max_age_seconds %q: %v\n", os.Args[3], err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "healthcheck file: cannot read %s: %v\n", path, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
ts, err := time.Parse(time.RFC3339, string(data))
|
||||
if err != nil {
|
||||
// Fallback: use file mtime if content is not a valid timestamp.
|
||||
info, statErr := os.Stat(path)
|
||||
if statErr != nil {
|
||||
fmt.Fprintf(os.Stderr, "healthcheck file: cannot stat %s: %v\n", path, statErr)
|
||||
os.Exit(1)
|
||||
}
|
||||
ts = info.ModTime()
|
||||
}
|
||||
|
||||
age := time.Since(ts)
|
||||
if age > time.Duration(maxAgeSec)*time.Second {
|
||||
fmt.Fprintf(os.Stderr, "healthcheck file: %s is %.0fs old (max %ds)\n", path, age.Seconds(), maxAgeSec)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
236
backend/cmd/runner/main.go
Normal file
236
backend/cmd/runner/main.go
Normal file
@@ -0,0 +1,236 @@
|
||||
// Command runner is the homelab worker binary.
|
||||
//
|
||||
// It polls PocketBase for pending scrape and audio tasks, executes them, and
|
||||
// writes results back. It connects directly to PocketBase and MinIO using
|
||||
// admin credentials loaded from environment variables.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// runner # start polling loop (blocks until SIGINT/SIGTERM)
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/signal"
|
||||
"runtime"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/getsentry/sentry-go"
|
||||
"github.com/libnovel/backend/internal/asynqqueue"
|
||||
"github.com/libnovel/backend/internal/browser"
|
||||
"github.com/libnovel/backend/internal/config"
|
||||
"github.com/libnovel/backend/internal/kokoro"
|
||||
"github.com/libnovel/backend/internal/libretranslate"
|
||||
"github.com/libnovel/backend/internal/meili"
|
||||
"github.com/libnovel/backend/internal/novelfire"
|
||||
"github.com/libnovel/backend/internal/otelsetup"
|
||||
"github.com/libnovel/backend/internal/pockettts"
|
||||
"github.com/libnovel/backend/internal/runner"
|
||||
"github.com/libnovel/backend/internal/storage"
|
||||
"github.com/libnovel/backend/internal/taskqueue"
|
||||
)
|
||||
|
||||
// version and commit are set at build time via -ldflags.
|
||||
var (
|
||||
version = "dev"
|
||||
commit = "unknown"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if err := run(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "runner: fatal: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func run() error {
|
||||
cfg := config.Load()
|
||||
|
||||
// ── Sentry / GlitchTip error tracking ────────────────────────────────────
|
||||
if dsn := os.Getenv("GLITCHTIP_DSN"); dsn != "" {
|
||||
if err := sentry.Init(sentry.ClientOptions{
|
||||
Dsn: dsn,
|
||||
Release: version + "@" + commit,
|
||||
TracesSampleRate: 0.1,
|
||||
}); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "runner: sentry init warning: %v\n", err)
|
||||
} else {
|
||||
defer sentry.Flush(2 * time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Logger ──────────────────────────────────────────────────────────────
|
||||
log := buildLogger(cfg.LogLevel)
|
||||
log.Info("runner starting",
|
||||
"version", version,
|
||||
"commit", commit,
|
||||
"worker_id", cfg.Runner.WorkerID,
|
||||
)
|
||||
|
||||
// ── Context: cancel on SIGINT / SIGTERM ─────────────────────────────────
|
||||
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
|
||||
defer stop()
|
||||
|
||||
// ── OpenTelemetry tracing + logs ─────────────────────────────────────────
|
||||
otelShutdown, otelLog, err := otelsetup.Init(ctx, version)
|
||||
if err != nil {
|
||||
return fmt.Errorf("init otel: %w", err)
|
||||
}
|
||||
if otelShutdown != nil {
|
||||
defer otelShutdown()
|
||||
// Switch to the OTel-bridged logger so all structured log lines are
|
||||
// forwarded to Loki with trace IDs attached.
|
||||
log = otelLog
|
||||
log.Info("otel tracing + logs enabled", "endpoint", os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"))
|
||||
}
|
||||
|
||||
// ── Storage ─────────────────────────────────────────────────────────────
|
||||
store, err := storage.NewStore(ctx, cfg, log)
|
||||
if err != nil {
|
||||
return fmt.Errorf("init storage: %w", err)
|
||||
}
|
||||
|
||||
// ── Browser / Scraper ───────────────────────────────────────────────────
|
||||
workers := cfg.Runner.Workers
|
||||
if workers <= 0 {
|
||||
workers = runtime.NumCPU()
|
||||
}
|
||||
timeout := cfg.Runner.Timeout
|
||||
if timeout <= 0 {
|
||||
timeout = 90 * time.Second
|
||||
}
|
||||
|
||||
browserClient := browser.NewDirectClient(browser.Config{
|
||||
MaxConcurrent: workers,
|
||||
Timeout: timeout,
|
||||
})
|
||||
novel := novelfire.New(browserClient, log)
|
||||
|
||||
// ── Kokoro ──────────────────────────────────────────────────────────────
|
||||
var kokoroClient kokoro.Client
|
||||
if cfg.Kokoro.URL != "" {
|
||||
kokoroClient = kokoro.New(cfg.Kokoro.URL)
|
||||
log.Info("kokoro TTS enabled", "url", cfg.Kokoro.URL)
|
||||
} else {
|
||||
log.Warn("KOKORO_URL not set — kokoro voice tasks will fail")
|
||||
kokoroClient = &noopKokoro{}
|
||||
}
|
||||
|
||||
// ── pocket-tts ──────────────────────────────────────────────────────────
|
||||
var pocketTTSClient pockettts.Client
|
||||
if cfg.PocketTTS.URL != "" {
|
||||
pocketTTSClient = pockettts.New(cfg.PocketTTS.URL)
|
||||
log.Info("pocket-tts enabled", "url", cfg.PocketTTS.URL)
|
||||
} else {
|
||||
log.Warn("POCKET_TTS_URL not set — pocket-tts voice tasks will fail")
|
||||
}
|
||||
|
||||
// ── LibreTranslate ──────────────────────────────────────────────────────
|
||||
ltClient := libretranslate.New(cfg.LibreTranslate.URL, cfg.LibreTranslate.APIKey)
|
||||
if ltClient != nil {
|
||||
log.Info("libretranslate enabled", "url", cfg.LibreTranslate.URL)
|
||||
} else {
|
||||
log.Info("LIBRETRANSLATE_URL not set — machine translation disabled")
|
||||
}
|
||||
|
||||
// ── Meilisearch ─────────────────────────────────────────────────────────
|
||||
var searchIndex meili.Client
|
||||
if cfg.Meilisearch.URL != "" {
|
||||
if err := meili.Configure(cfg.Meilisearch.URL, cfg.Meilisearch.APIKey); err != nil {
|
||||
log.Warn("meilisearch configure failed — search indexing disabled", "err", err)
|
||||
searchIndex = meili.NoopClient{}
|
||||
} else {
|
||||
searchIndex = meili.New(cfg.Meilisearch.URL, cfg.Meilisearch.APIKey)
|
||||
log.Info("meilisearch enabled", "url", cfg.Meilisearch.URL)
|
||||
}
|
||||
} else {
|
||||
log.Info("MEILI_URL not set — search indexing disabled")
|
||||
searchIndex = meili.NoopClient{}
|
||||
}
|
||||
|
||||
// ── Runner ──────────────────────────────────────────────────────────────
|
||||
rCfg := runner.Config{
|
||||
WorkerID: cfg.Runner.WorkerID,
|
||||
PollInterval: cfg.Runner.PollInterval,
|
||||
MaxConcurrentScrape: cfg.Runner.MaxConcurrentScrape,
|
||||
MaxConcurrentAudio: cfg.Runner.MaxConcurrentAudio,
|
||||
MaxConcurrentTranslation: cfg.Runner.MaxConcurrentTranslation,
|
||||
OrchestratorWorkers: workers,
|
||||
MetricsAddr: cfg.Runner.MetricsAddr,
|
||||
CatalogueRefreshInterval: cfg.Runner.CatalogueRefreshInterval,
|
||||
CatalogueRequestDelay: cfg.Runner.CatalogueRequestDelay,
|
||||
SkipInitialCatalogueRefresh: cfg.Runner.SkipInitialCatalogueRefresh,
|
||||
RedisAddr: cfg.Redis.Addr,
|
||||
RedisPassword: cfg.Redis.Password,
|
||||
}
|
||||
|
||||
// In Asynq mode the Consumer is a thin wrapper: claim/heartbeat/reap are
|
||||
// no-ops, but FinishAudioTask / FinishScrapeTask / FailTask write back to
|
||||
// PocketBase as before.
|
||||
var consumer taskqueue.Consumer = store
|
||||
if cfg.Redis.Addr != "" {
|
||||
log.Info("runner: asynq mode — using Redis for task dispatch", "addr", cfg.Redis.Addr)
|
||||
consumer = asynqqueue.NewConsumer(store)
|
||||
} else {
|
||||
log.Info("runner: poll mode — using PocketBase for task dispatch")
|
||||
}
|
||||
|
||||
deps := runner.Dependencies{
|
||||
Consumer: consumer,
|
||||
BookWriter: store,
|
||||
BookReader: store,
|
||||
AudioStore: store,
|
||||
CoverStore: store,
|
||||
TranslationStore: store,
|
||||
SearchIndex: searchIndex,
|
||||
Novel: novel,
|
||||
Kokoro: kokoroClient,
|
||||
PocketTTS: pocketTTSClient,
|
||||
LibreTranslate: ltClient,
|
||||
Log: log,
|
||||
}
|
||||
r := runner.New(rCfg, deps)
|
||||
|
||||
return r.Run(ctx)
|
||||
}
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
func buildLogger(level string) *slog.Logger {
|
||||
var lvl slog.Level
|
||||
switch level {
|
||||
case "debug":
|
||||
lvl = slog.LevelDebug
|
||||
case "warn":
|
||||
lvl = slog.LevelWarn
|
||||
case "error":
|
||||
lvl = slog.LevelError
|
||||
default:
|
||||
lvl = slog.LevelInfo
|
||||
}
|
||||
return slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{Level: lvl}))
|
||||
}
|
||||
|
||||
// noopKokoro is a no-op implementation used when KOKORO_URL is not set.
|
||||
type noopKokoro struct{}
|
||||
|
||||
func (n *noopKokoro) GenerateAudio(_ context.Context, _, _ string) ([]byte, error) {
|
||||
return nil, fmt.Errorf("kokoro not configured (KOKORO_URL is empty)")
|
||||
}
|
||||
|
||||
func (n *noopKokoro) StreamAudioMP3(_ context.Context, _, _ string) (io.ReadCloser, error) {
|
||||
return nil, fmt.Errorf("kokoro not configured (KOKORO_URL is empty)")
|
||||
}
|
||||
|
||||
func (n *noopKokoro) StreamAudioWAV(_ context.Context, _, _ string) (io.ReadCloser, error) {
|
||||
return nil, fmt.Errorf("kokoro not configured (KOKORO_URL is empty)")
|
||||
}
|
||||
|
||||
func (n *noopKokoro) ListVoices(_ context.Context) ([]string, error) {
|
||||
return nil, nil
|
||||
}
|
||||
72
backend/go.mod
Normal file
72
backend/go.mod
Normal file
@@ -0,0 +1,72 @@
|
||||
module github.com/libnovel/backend
|
||||
|
||||
go 1.26.1
|
||||
|
||||
require (
|
||||
github.com/minio/minio-go/v7 v7.0.98
|
||||
golang.org/x/net v0.51.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/andybalholm/brotli v1.1.1 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
|
||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/getsentry/sentry-go v0.43.0 // indirect
|
||||
github.com/go-ini/ini v1.67.0 // indirect
|
||||
github.com/go-logr/logr v1.4.3 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/golang-jwt/jwt/v5 v5.3.1 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect
|
||||
github.com/hibiken/asynq v0.26.0 // indirect
|
||||
github.com/hibiken/asynq/x v0.0.0-20260203063626-d704b68a426d // indirect
|
||||
github.com/klauspost/compress v1.18.2 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.2.11 // indirect
|
||||
github.com/klauspost/crc32 v1.3.0 // indirect
|
||||
github.com/meilisearch/meilisearch-go v0.36.1 // indirect
|
||||
github.com/minio/crc64nvme v1.1.1 // indirect
|
||||
github.com/minio/md5-simd v1.1.2 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/philhofer/fwd v1.2.0 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/prometheus/client_golang v1.23.2 // indirect
|
||||
github.com/prometheus/client_model v0.6.2 // indirect
|
||||
github.com/prometheus/common v0.66.1 // indirect
|
||||
github.com/prometheus/procfs v0.16.1 // indirect
|
||||
github.com/redis/go-redis/v9 v9.18.0 // indirect
|
||||
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||
github.com/rs/xid v1.6.0 // indirect
|
||||
github.com/spf13/cast v1.10.0 // indirect
|
||||
github.com/tinylib/msgp v1.6.1 // indirect
|
||||
github.com/yuin/goldmark v1.8.2 // indirect
|
||||
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
|
||||
go.opentelemetry.io/contrib/bridges/otelslog v0.17.0 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 // indirect
|
||||
go.opentelemetry.io/otel v1.42.0 // indirect
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.18.0 // indirect
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.42.0 // indirect
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.42.0 // indirect
|
||||
go.opentelemetry.io/otel/log v0.18.0 // indirect
|
||||
go.opentelemetry.io/otel/metric v1.42.0 // indirect
|
||||
go.opentelemetry.io/otel/sdk v1.42.0 // indirect
|
||||
go.opentelemetry.io/otel/sdk/log v0.18.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.42.0 // indirect
|
||||
go.opentelemetry.io/proto/otlp v1.9.0 // indirect
|
||||
go.uber.org/atomic v1.11.0 // indirect
|
||||
go.yaml.in/yaml/v2 v2.4.2 // indirect
|
||||
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||
golang.org/x/crypto v0.48.0 // indirect
|
||||
golang.org/x/sys v0.41.0 // indirect
|
||||
golang.org/x/text v0.34.0 // indirect
|
||||
golang.org/x/time v0.14.0 // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 // indirect
|
||||
google.golang.org/grpc v1.79.2 // indirect
|
||||
google.golang.org/protobuf v1.36.11 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
143
backend/go.sum
Normal file
143
backend/go.sum
Normal file
@@ -0,0 +1,143 @@
|
||||
github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
|
||||
github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
|
||||
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
|
||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
|
||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||
github.com/getsentry/sentry-go v0.43.0 h1:XbXLpFicpo8HmBDaInk7dum18G9KSLcjZiyUKS+hLW4=
|
||||
github.com/getsentry/sentry-go v0.43.0/go.mod h1:XDotiNZbgf5U8bPDUAfvcFmOnMQQceESxyKaObSssW0=
|
||||
github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A=
|
||||
github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8=
|
||||
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
|
||||
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||
github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY=
|
||||
github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c=
|
||||
github.com/hibiken/asynq v0.26.0 h1:1Zxr92MlDnb1Zt/QR5g2vSCqUS03i95lUfqx5X7/wrw=
|
||||
github.com/hibiken/asynq v0.26.0/go.mod h1:Qk4e57bTnWDoyJ67VkchuV6VzSM9IQW2nPvAGuDyw58=
|
||||
github.com/hibiken/asynq/x v0.0.0-20260203063626-d704b68a426d h1:Ld5m8EIK5QVOq/owOexKIbETij3skACg4eU1pArHsrw=
|
||||
github.com/hibiken/asynq/x v0.0.0-20260203063626-d704b68a426d/go.mod h1:hhpStehaxSGg3ib9wJXzw5AXY1YS6lQ9BNavAgPbIhE=
|
||||
github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk=
|
||||
github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
|
||||
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||
github.com/klauspost/cpuid/v2 v2.2.11 h1:0OwqZRYI2rFrjS4kvkDnqJkKHdHaRnCm68/DY4OxRzU=
|
||||
github.com/klauspost/cpuid/v2 v2.2.11/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
|
||||
github.com/klauspost/crc32 v1.3.0 h1:sSmTt3gUt81RP655XGZPElI0PelVTZ6YwCRnPSupoFM=
|
||||
github.com/klauspost/crc32 v1.3.0/go.mod h1:D7kQaZhnkX/Y0tstFGf8VUzv2UofNGqCjnC3zdHB0Hw=
|
||||
github.com/meilisearch/meilisearch-go v0.36.1 h1:mJTCJE5g7tRvaqKco6DfqOuJEjX+rRltDEnkEC02Y0M=
|
||||
github.com/meilisearch/meilisearch-go v0.36.1/go.mod h1:hWcR0MuWLSzHfbz9GGzIr3s9rnXLm1jqkmHkJPbUSvM=
|
||||
github.com/minio/crc64nvme v1.1.1 h1:8dwx/Pz49suywbO+auHCBpCtlW1OfpcLN7wYgVR6wAI=
|
||||
github.com/minio/crc64nvme v1.1.1/go.mod h1:eVfm2fAzLlxMdUGc0EEBGSMmPwmXD5XiNRpnu9J3bvg=
|
||||
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
|
||||
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
|
||||
github.com/minio/minio-go/v7 v7.0.98 h1:MeAVKjLVz+XJ28zFcuYyImNSAh8Mq725uNW4beRisi0=
|
||||
github.com/minio/minio-go/v7 v7.0.98/go.mod h1:cY0Y+W7yozf0mdIclrttzo1Iiu7mEf9y7nk2uXqMOvM=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM=
|
||||
github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y=
|
||||
github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
|
||||
github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
|
||||
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
|
||||
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
|
||||
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
|
||||
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
|
||||
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
|
||||
github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
|
||||
github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
|
||||
github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
|
||||
github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
|
||||
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
|
||||
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
|
||||
github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
|
||||
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
|
||||
github.com/redis/go-redis/v9 v9.18.0 h1:pMkxYPkEbMPwRdenAzUNyFNrDgHx9U+DrBabWNfSRQs=
|
||||
github.com/redis/go-redis/v9 v9.18.0/go.mod h1:k3ufPphLU5YXwNTUcCRXGxUoF1fqxnhFQmscfkCoDA0=
|
||||
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
|
||||
github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
|
||||
github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU=
|
||||
github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
|
||||
github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY=
|
||||
github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo=
|
||||
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/tinylib/msgp v1.6.1 h1:ESRv8eL3u+DNHUoSAAQRE50Hm162zqAnBoGv9PzScPY=
|
||||
github.com/tinylib/msgp v1.6.1/go.mod h1:RSp0LW9oSxFut3KzESt5Voq4GVWyS+PSulT77roAqEA=
|
||||
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
|
||||
github.com/yuin/goldmark v1.8.2 h1:kEGpgqJXdgbkhcOgBxkC0X0PmoPG1ZyoZ117rDVp4zE=
|
||||
github.com/yuin/goldmark v1.8.2/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg=
|
||||
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
|
||||
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
|
||||
go.opentelemetry.io/contrib/bridges/otelslog v0.17.0 h1:NFIS6x7wyObQ7cR84x7bt1sr8nYBx89s3x3GwRjw40k=
|
||||
go.opentelemetry.io/contrib/bridges/otelslog v0.17.0/go.mod h1:39SaByOyDMRMe872AE7uelMuQZidIw7LLFAnQi0FWTE=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 h1:OyrsyzuttWTSur2qN/Lm0m2a8yqyIjUVBZcxFPuXq2o=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0/go.mod h1:C2NGBr+kAB4bk3xtMXfZ94gqFDtg/GkI7e9zqGh5Beg=
|
||||
go.opentelemetry.io/otel v1.42.0 h1:lSQGzTgVR3+sgJDAU/7/ZMjN9Z+vUip7leaqBKy4sho=
|
||||
go.opentelemetry.io/otel v1.42.0/go.mod h1:lJNsdRMxCUIWuMlVJWzecSMuNjE7dOYyWlqOXWkdqCc=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.18.0 h1:icqq3Z34UrEFk2u+HMhTtRsvo7Ues+eiJVjaJt62njs=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.18.0/go.mod h1:W2m8P+d5Wn5kipj4/xmbt9uMqezEKfBjzVJadfABSBE=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.42.0 h1:THuZiwpQZuHPul65w4WcwEnkX2QIuMT+UFoOrygtoJw=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.42.0/go.mod h1:J2pvYM5NGHofZ2/Ru6zw/TNWnEQp5crgyDeSrYpXkAw=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.42.0 h1:uLXP+3mghfMf7XmV4PkGfFhFKuNWoCvvx5wP/wOXo0o=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.42.0/go.mod h1:v0Tj04armyT59mnURNUJf7RCKcKzq+lgJs6QSjHjaTc=
|
||||
go.opentelemetry.io/otel/log v0.18.0 h1:XgeQIIBjZZrliksMEbcwMZefoOSMI1hdjiLEiiB0bAg=
|
||||
go.opentelemetry.io/otel/log v0.18.0/go.mod h1:KEV1kad0NofR3ycsiDH4Yjcoj0+8206I6Ox2QYFSNgI=
|
||||
go.opentelemetry.io/otel/metric v1.42.0 h1:2jXG+3oZLNXEPfNmnpxKDeZsFI5o4J+nz6xUlaFdF/4=
|
||||
go.opentelemetry.io/otel/metric v1.42.0/go.mod h1:RlUN/7vTU7Ao/diDkEpQpnz3/92J9ko05BIwxYa2SSI=
|
||||
go.opentelemetry.io/otel/sdk v1.42.0 h1:LyC8+jqk6UJwdrI/8VydAq/hvkFKNHZVIWuslJXYsDo=
|
||||
go.opentelemetry.io/otel/sdk v1.42.0/go.mod h1:rGHCAxd9DAph0joO4W6OPwxjNTYWghRWmkHuGbayMts=
|
||||
go.opentelemetry.io/otel/sdk/log v0.18.0 h1:n8OyZr7t7otkeTnPTbDNom6rW16TBYGtvyy2Gk6buQw=
|
||||
go.opentelemetry.io/otel/sdk/log v0.18.0/go.mod h1:C0+wxkTwKpOCZLrlJ3pewPiiQwpzycPI/u6W0Z9fuYk=
|
||||
go.opentelemetry.io/otel/trace v1.42.0 h1:OUCgIPt+mzOnaUTpOQcBiM/PLQ/Op7oq6g4LenLmOYY=
|
||||
go.opentelemetry.io/otel/trace v1.42.0/go.mod h1:f3K9S+IFqnumBkKhRJMeaZeNk9epyhnCmQh/EysQCdc=
|
||||
go.opentelemetry.io/proto/otlp v1.9.0 h1:l706jCMITVouPOqEnii2fIAuO3IVGBRPV5ICjceRb/A=
|
||||
go.opentelemetry.io/proto/otlp v1.9.0/go.mod h1:xE+Cx5E/eEHw+ISFkwPLwCZefwVjY+pqKg1qcK03+/4=
|
||||
go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
|
||||
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
|
||||
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
|
||||
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
|
||||
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
|
||||
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
|
||||
golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts=
|
||||
golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos=
|
||||
golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo=
|
||||
golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y=
|
||||
golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k=
|
||||
golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk=
|
||||
golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA=
|
||||
golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
|
||||
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 h1:JLQynH/LBHfCTSbDWl+py8C+Rg/k1OVH3xfcaiANuF0=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:kSJwQxqmFXeo79zOmbrALdflXQeAYcUbgS7PbpMknCY=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 h1:mWPCjDEyshlQYzBpMNHaEof6UX1PmHcaUODUywQ0uac=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ=
|
||||
google.golang.org/grpc v1.79.2 h1:fRMD94s2tITpyJGtBBn7MkMseNpOZU8ZxgC3MMBaXRU=
|
||||
google.golang.org/grpc v1.79.2/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ=
|
||||
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
|
||||
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
71
backend/internal/asynqqueue/consumer.go
Normal file
71
backend/internal/asynqqueue/consumer.go
Normal file
@@ -0,0 +1,71 @@
|
||||
package asynqqueue
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
"github.com/libnovel/backend/internal/taskqueue"
|
||||
)
|
||||
|
||||
// Consumer wraps the PocketBase-backed Consumer for result write-back only.
|
||||
//
|
||||
// When using Asynq, the runner no longer polls for scrape/audio work — Asynq
|
||||
// delivers those tasks via the ServeMux handlers. However translation tasks
|
||||
// live in PocketBase (not Redis), so ClaimNextTranslationTask and HeartbeatTask
|
||||
// still delegate to the underlying PocketBase consumer.
|
||||
//
|
||||
// ClaimNextAudioTask, ClaimNextScrapeTask are no-ops here because Asynq owns
|
||||
// those responsibilities.
|
||||
type Consumer struct {
|
||||
pb taskqueue.Consumer // underlying PocketBase consumer (for write-back)
|
||||
}
|
||||
|
||||
// NewConsumer wraps an existing PocketBase Consumer.
|
||||
func NewConsumer(pb taskqueue.Consumer) *Consumer {
|
||||
return &Consumer{pb: pb}
|
||||
}
|
||||
|
||||
// ── Write-back (delegated to PocketBase) ──────────────────────────────────────
|
||||
|
||||
func (c *Consumer) FinishScrapeTask(ctx context.Context, id string, result domain.ScrapeResult) error {
|
||||
return c.pb.FinishScrapeTask(ctx, id, result)
|
||||
}
|
||||
|
||||
func (c *Consumer) FinishAudioTask(ctx context.Context, id string, result domain.AudioResult) error {
|
||||
return c.pb.FinishAudioTask(ctx, id, result)
|
||||
}
|
||||
|
||||
func (c *Consumer) FinishTranslationTask(ctx context.Context, id string, result domain.TranslationResult) error {
|
||||
return c.pb.FinishTranslationTask(ctx, id, result)
|
||||
}
|
||||
|
||||
func (c *Consumer) FailTask(ctx context.Context, id, errMsg string) error {
|
||||
return c.pb.FailTask(ctx, id, errMsg)
|
||||
}
|
||||
|
||||
// ── No-ops (Asynq owns claiming / heartbeating / reaping) ───────────────────
|
||||
|
||||
func (c *Consumer) ClaimNextScrapeTask(_ context.Context, _ string) (domain.ScrapeTask, bool, error) {
|
||||
return domain.ScrapeTask{}, false, nil
|
||||
}
|
||||
|
||||
func (c *Consumer) ClaimNextAudioTask(_ context.Context, _ string) (domain.AudioTask, bool, error) {
|
||||
return domain.AudioTask{}, false, nil
|
||||
}
|
||||
|
||||
// ClaimNextTranslationTask delegates to PocketBase because translation tasks
|
||||
// are stored in PocketBase (not Redis/Asynq) and must still be polled directly.
|
||||
func (c *Consumer) ClaimNextTranslationTask(ctx context.Context, workerID string) (domain.TranslationTask, bool, error) {
|
||||
return c.pb.ClaimNextTranslationTask(ctx, workerID)
|
||||
}
|
||||
|
||||
func (c *Consumer) HeartbeatTask(ctx context.Context, id string) error {
|
||||
return c.pb.HeartbeatTask(ctx, id)
|
||||
}
|
||||
|
||||
// ReapStaleTasks delegates to PocketBase so stale translation tasks are reset
|
||||
// to pending and can be reclaimed.
|
||||
func (c *Consumer) ReapStaleTasks(ctx context.Context, staleAfter time.Duration) (int, error) {
|
||||
return c.pb.ReapStaleTasks(ctx, staleAfter)
|
||||
}
|
||||
110
backend/internal/asynqqueue/producer.go
Normal file
110
backend/internal/asynqqueue/producer.go
Normal file
@@ -0,0 +1,110 @@
|
||||
package asynqqueue
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
|
||||
"github.com/hibiken/asynq"
|
||||
"github.com/libnovel/backend/internal/taskqueue"
|
||||
)
|
||||
|
||||
// Producer dual-writes every task: first to PocketBase (via pb, for audit /
|
||||
// UI status), then to Redis via Asynq so the runner picks it up immediately.
|
||||
type Producer struct {
|
||||
pb taskqueue.Producer // underlying PocketBase producer
|
||||
client *asynq.Client
|
||||
log *slog.Logger
|
||||
}
|
||||
|
||||
// NewProducer wraps an existing PocketBase Producer with Asynq dispatch.
|
||||
func NewProducer(pb taskqueue.Producer, redisOpt asynq.RedisConnOpt, log *slog.Logger) *Producer {
|
||||
return &Producer{
|
||||
pb: pb,
|
||||
client: asynq.NewClient(redisOpt),
|
||||
log: log,
|
||||
}
|
||||
}
|
||||
|
||||
// Close shuts down the underlying Asynq client connection.
|
||||
func (p *Producer) Close() error {
|
||||
return p.client.Close()
|
||||
}
|
||||
|
||||
// CreateScrapeTask creates a PocketBase record then enqueues an Asynq job.
|
||||
func (p *Producer) CreateScrapeTask(ctx context.Context, kind, targetURL string, fromChapter, toChapter int) (string, error) {
|
||||
id, err := p.pb.CreateScrapeTask(ctx, kind, targetURL, fromChapter, toChapter)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
payload := ScrapePayload{
|
||||
PBTaskID: id,
|
||||
Kind: kind,
|
||||
TargetURL: targetURL,
|
||||
FromChapter: fromChapter,
|
||||
ToChapter: toChapter,
|
||||
}
|
||||
taskType := TypeScrapeBook
|
||||
if kind == "catalogue" {
|
||||
taskType = TypeScrapeCatalogue
|
||||
}
|
||||
if err := p.enqueue(ctx, taskType, payload); err != nil {
|
||||
// Non-fatal: PB record exists; runner will pick it up on next poll.
|
||||
p.log.Warn("asynq enqueue scrape failed (task still in PB, runner will poll)",
|
||||
"task_id", id, "err", err)
|
||||
return id, nil
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// CreateAudioTask creates a PocketBase record then enqueues an Asynq job.
|
||||
func (p *Producer) CreateAudioTask(ctx context.Context, slug string, chapter int, voice string) (string, error) {
|
||||
id, err := p.pb.CreateAudioTask(ctx, slug, chapter, voice)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
payload := AudioPayload{
|
||||
PBTaskID: id,
|
||||
Slug: slug,
|
||||
Chapter: chapter,
|
||||
Voice: voice,
|
||||
}
|
||||
if err := p.enqueue(ctx, TypeAudioGenerate, payload); err != nil {
|
||||
// Non-fatal: PB record exists; runner will pick it up on next poll.
|
||||
p.log.Warn("asynq enqueue audio failed (task still in PB, runner will poll)",
|
||||
"task_id", id, "err", err)
|
||||
return id, nil
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// CreateTranslationTask creates a PocketBase record. Translation tasks are
|
||||
// not currently dispatched via Asynq — the runner picks them up via polling.
|
||||
func (p *Producer) CreateTranslationTask(ctx context.Context, slug string, chapter int, lang string) (string, error) {
|
||||
return p.pb.CreateTranslationTask(ctx, slug, chapter, lang)
|
||||
}
|
||||
|
||||
// CancelTask delegates to PocketBase; Asynq jobs may already be running and
|
||||
// cannot be reliably cancelled, so we only update the audit record.
|
||||
func (p *Producer) CancelTask(ctx context.Context, id string) error {
|
||||
return p.pb.CancelTask(ctx, id)
|
||||
}
|
||||
|
||||
// CancelAudioTasksBySlug delegates to PocketBase to cancel all pending/running
|
||||
// audio tasks for slug.
|
||||
func (p *Producer) CancelAudioTasksBySlug(ctx context.Context, slug string) (int, error) {
|
||||
return p.pb.CancelAudioTasksBySlug(ctx, slug)
|
||||
}
|
||||
|
||||
// enqueue serialises payload and dispatches it to Asynq.
|
||||
func (p *Producer) enqueue(_ context.Context, taskType string, payload any) error {
|
||||
b, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal payload: %w", err)
|
||||
}
|
||||
_, err = p.client.Enqueue(asynq.NewTask(taskType, b))
|
||||
return err
|
||||
}
|
||||
46
backend/internal/asynqqueue/tasks.go
Normal file
46
backend/internal/asynqqueue/tasks.go
Normal file
@@ -0,0 +1,46 @@
|
||||
// Package asynqqueue provides Asynq-backed implementations of the
|
||||
// taskqueue.Producer and taskqueue.Consumer interfaces.
|
||||
//
|
||||
// Architecture:
|
||||
// - Producer: dual-writes — creates a PocketBase record for audit/UI, then
|
||||
// enqueues an Asynq job so the runner picks it up immediately (sub-ms).
|
||||
// - Consumer: thin wrapper used only for result write-back (FinishAudioTask,
|
||||
// FinishScrapeTask, FailTask). ClaimNext*/Heartbeat/Reap are no-ops because
|
||||
// Asynq owns those responsibilities.
|
||||
// - Handlers: asynq.HandlerFunc wrappers that decode job payloads and invoke
|
||||
// the existing runner logic (runScrapeTask / runAudioTask).
|
||||
//
|
||||
// Fallback: when REDIS_ADDR is empty the caller should use the plain
|
||||
// storage.Store (PocketBase-polling) implementation unchanged.
|
||||
package asynqqueue
|
||||
|
||||
// Queue names — keep all jobs on the default queue for now.
|
||||
// Add separate queues (e.g. "audio", "scrape") later if you need priority.
|
||||
const QueueDefault = "default"
|
||||
|
||||
// Task type constants used for Asynq routing.
|
||||
const (
|
||||
TypeAudioGenerate = "audio:generate"
|
||||
TypeScrapeBook = "scrape:book"
|
||||
TypeScrapeCatalogue = "scrape:catalogue"
|
||||
)
|
||||
|
||||
// AudioPayload is the Asynq job payload for audio generation tasks.
|
||||
type AudioPayload struct {
|
||||
// PBTaskID is the PocketBase record ID created before enqueueing.
|
||||
// The handler uses it to write results back via Consumer.FinishAudioTask.
|
||||
PBTaskID string `json:"pb_task_id"`
|
||||
Slug string `json:"slug"`
|
||||
Chapter int `json:"chapter"`
|
||||
Voice string `json:"voice"`
|
||||
}
|
||||
|
||||
// ScrapePayload is the Asynq job payload for scrape tasks.
|
||||
type ScrapePayload struct {
|
||||
// PBTaskID is the PocketBase record ID created before enqueueing.
|
||||
PBTaskID string `json:"pb_task_id"`
|
||||
Kind string `json:"kind"` // "catalogue", "book", or "book_range"
|
||||
TargetURL string `json:"target_url"` // empty for catalogue tasks
|
||||
FromChapter int `json:"from_chapter"` // 0 unless Kind=="book_range"
|
||||
ToChapter int `json:"to_chapter"` // 0 unless Kind=="book_range"
|
||||
}
|
||||
143
backend/internal/backend/epub.go
Normal file
143
backend/internal/backend/epub.go
Normal file
@@ -0,0 +1,143 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type epubChapter struct {
|
||||
Number int
|
||||
Title string
|
||||
HTML string
|
||||
}
|
||||
|
||||
func generateEPUB(slug, title, author string, chapters []epubChapter) ([]byte, error) {
|
||||
var buf bytes.Buffer
|
||||
w := zip.NewWriter(&buf)
|
||||
|
||||
// 1. mimetype — MUST be first, MUST be uncompressed (Store method)
|
||||
mw, err := w.CreateHeader(&zip.FileHeader{
|
||||
Name: "mimetype",
|
||||
Method: zip.Store,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
mw.Write([]byte("application/epub+zip"))
|
||||
|
||||
// 2. META-INF/container.xml
|
||||
addFile(w, "META-INF/container.xml", containerXML())
|
||||
|
||||
// 3. OEBPS/style.css
|
||||
addFile(w, "OEBPS/style.css", epubCSS())
|
||||
|
||||
// 4. OEBPS/content.opf
|
||||
addFile(w, "OEBPS/content.opf", contentOPF(slug, title, author, chapters))
|
||||
|
||||
// 5. OEBPS/toc.ncx
|
||||
addFile(w, "OEBPS/toc.ncx", tocNCX(slug, title, chapters))
|
||||
|
||||
// 6. Chapter files
|
||||
for _, ch := range chapters {
|
||||
name := fmt.Sprintf("OEBPS/chapter-%04d.xhtml", ch.Number)
|
||||
addFile(w, name, chapterXHTML(ch))
|
||||
}
|
||||
|
||||
w.Close()
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
func addFile(w *zip.Writer, name, content string) {
|
||||
f, _ := w.Create(name)
|
||||
f.Write([]byte(content))
|
||||
}
|
||||
|
||||
func containerXML() string {
|
||||
return `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
||||
<rootfiles>
|
||||
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
|
||||
</rootfiles>
|
||||
</container>`
|
||||
}
|
||||
|
||||
func contentOPF(slug, title, author string, chapters []epubChapter) string {
|
||||
var items, spine strings.Builder
|
||||
for _, ch := range chapters {
|
||||
id := fmt.Sprintf("ch%04d", ch.Number)
|
||||
href := fmt.Sprintf("chapter-%04d.xhtml", ch.Number)
|
||||
items.WriteString(fmt.Sprintf(` <item id="%s" href="%s" media-type="application/xhtml+xml"/>`+"\n", id, href))
|
||||
spine.WriteString(fmt.Sprintf(` <itemref idref="%s"/>`+"\n", id))
|
||||
}
|
||||
return fmt.Sprintf(`<?xml version="1.0" encoding="UTF-8"?>
|
||||
<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid" version="2.0">
|
||||
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
|
||||
<dc:title>%s</dc:title>
|
||||
<dc:creator>%s</dc:creator>
|
||||
<dc:identifier id="uid">%s</dc:identifier>
|
||||
<dc:language>en</dc:language>
|
||||
</metadata>
|
||||
<manifest>
|
||||
<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>
|
||||
<item id="css" href="style.css" media-type="text/css"/>
|
||||
%s </manifest>
|
||||
<spine toc="ncx">
|
||||
%s </spine>
|
||||
</package>`, escapeXML(title), escapeXML(author), slug, items.String(), spine.String())
|
||||
}
|
||||
|
||||
func tocNCX(slug, title string, chapters []epubChapter) string {
|
||||
var points strings.Builder
|
||||
for i, ch := range chapters {
|
||||
chTitle := ch.Title
|
||||
if chTitle == "" {
|
||||
chTitle = fmt.Sprintf("Chapter %d", ch.Number)
|
||||
}
|
||||
points.WriteString(fmt.Sprintf(` <navPoint id="np%d" playOrder="%d">
|
||||
<navLabel><text>%s</text></navLabel>
|
||||
<content src="chapter-%04d.xhtml"/>
|
||||
</navPoint>`+"\n", i+1, i+1, escapeXML(chTitle), ch.Number))
|
||||
}
|
||||
return fmt.Sprintf(`<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
|
||||
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
|
||||
<head><meta name="dtb:uid" content="%s"/></head>
|
||||
<docTitle><text>%s</text></docTitle>
|
||||
<navMap>
|
||||
%s </navMap>
|
||||
</ncx>`, slug, escapeXML(title), points.String())
|
||||
}
|
||||
|
||||
func chapterXHTML(ch epubChapter) string {
|
||||
title := ch.Title
|
||||
if title == "" {
|
||||
title = fmt.Sprintf("Chapter %d", ch.Number)
|
||||
}
|
||||
return fmt.Sprintf(`<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>%s</title><link rel="stylesheet" href="style.css"/></head>
|
||||
<body>
|
||||
<h1 class="chapter-title">%s</h1>
|
||||
%s
|
||||
</body>
|
||||
</html>`, escapeXML(title), escapeXML(title), ch.HTML)
|
||||
}
|
||||
|
||||
func epubCSS() string {
|
||||
return `body { font-family: Georgia, serif; font-size: 1em; line-height: 1.6; margin: 1em 2em; }
|
||||
h1.chapter-title { font-size: 1.4em; margin-bottom: 1em; }
|
||||
p { margin: 0 0 0.8em 0; text-indent: 1.5em; }
|
||||
p:first-of-type { text-indent: 0; }
|
||||
`
|
||||
}
|
||||
|
||||
func escapeXML(s string) string {
|
||||
s = strings.ReplaceAll(s, "&", "&")
|
||||
s = strings.ReplaceAll(s, "<", "<")
|
||||
s = strings.ReplaceAll(s, ">", ">")
|
||||
s = strings.ReplaceAll(s, `"`, """)
|
||||
return s
|
||||
}
|
||||
1864
backend/internal/backend/handlers.go
Normal file
1864
backend/internal/backend/handlers.go
Normal file
File diff suppressed because it is too large
Load Diff
405
backend/internal/backend/server.go
Normal file
405
backend/internal/backend/server.go
Normal file
@@ -0,0 +1,405 @@
|
||||
// Package backend implements the HTTP API server for the LibNovel backend.
|
||||
//
|
||||
// The server exposes all endpoints consumed by the SvelteKit UI:
|
||||
// - Book/chapter reads from PocketBase/MinIO via bookstore interfaces
|
||||
// - Task creation (scrape + audio) via taskqueue.Producer — the runner binary
|
||||
// picks up and executes those tasks asynchronously
|
||||
// - Presigned MinIO URLs for media playback/upload
|
||||
// - Session-scoped reading progress
|
||||
// - Live novelfire.net search (no scraper interface needed; direct HTTP)
|
||||
// - Kokoro voice list
|
||||
//
|
||||
// The backend never scrapes directly. All scraping (metadata, chapter list,
|
||||
// chapter text, audio TTS) is delegated to the runner binary via PocketBase
|
||||
// task records. GET /api/book-preview enqueues a task when the book is absent.
|
||||
//
|
||||
// All external dependencies are injected as interfaces; concrete types live in
|
||||
// internal/storage and are wired by cmd/backend/main.go.
|
||||
package backend
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
sentryhttp "github.com/getsentry/sentry-go/http"
|
||||
"github.com/libnovel/backend/internal/bookstore"
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
"github.com/libnovel/backend/internal/kokoro"
|
||||
"github.com/libnovel/backend/internal/meili"
|
||||
"github.com/libnovel/backend/internal/pockettts"
|
||||
"github.com/libnovel/backend/internal/taskqueue"
|
||||
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
|
||||
)
|
||||
|
||||
// Dependencies holds all external services the backend server depends on.
|
||||
// Every field is an interface so test doubles can be injected freely.
|
||||
type Dependencies struct {
|
||||
// BookReader reads book metadata and chapter text from PocketBase/MinIO.
|
||||
BookReader bookstore.BookReader
|
||||
// RankingStore reads ranking data from PocketBase.
|
||||
RankingStore bookstore.RankingStore
|
||||
// AudioStore checks audio object existence and computes MinIO keys.
|
||||
AudioStore bookstore.AudioStore
|
||||
// TranslationStore checks translation existence and reads/writes translated markdown.
|
||||
TranslationStore bookstore.TranslationStore
|
||||
// PresignStore generates short-lived MinIO URLs.
|
||||
PresignStore bookstore.PresignStore
|
||||
// ProgressStore reads/writes per-session reading progress.
|
||||
ProgressStore bookstore.ProgressStore
|
||||
// CoverStore reads and writes book cover images from MinIO.
|
||||
// If nil, the cover endpoint falls back to a CDN redirect.
|
||||
CoverStore bookstore.CoverStore
|
||||
// Producer creates scrape/audio tasks in PocketBase.
|
||||
Producer taskqueue.Producer
|
||||
// TaskReader reads scrape/audio task records from PocketBase.
|
||||
TaskReader taskqueue.Reader
|
||||
// SearchIndex provides full-text book search via Meilisearch.
|
||||
// If nil, the local-only fallback search is used.
|
||||
SearchIndex meili.Client
|
||||
// Kokoro is the Kokoro TTS client (used for voice list only in the backend;
|
||||
// audio generation is done by the runner).
|
||||
Kokoro kokoro.Client
|
||||
// PocketTTS is the pocket-tts client (used for voice list only in the backend;
|
||||
// audio generation is done by the runner).
|
||||
PocketTTS pockettts.Client
|
||||
// Log is the structured logger.
|
||||
Log *slog.Logger
|
||||
}
|
||||
|
||||
// Config holds HTTP server tuning parameters.
|
||||
type Config struct {
|
||||
// Addr is the listen address, e.g. ":8080".
|
||||
Addr string
|
||||
// DefaultVoice is used when no voice is specified in audio requests.
|
||||
DefaultVoice string
|
||||
// Version and Commit are embedded in /health and /api/version responses.
|
||||
Version string
|
||||
Commit string
|
||||
}
|
||||
|
||||
// Server is the HTTP API server.
|
||||
type Server struct {
|
||||
cfg Config
|
||||
deps Dependencies
|
||||
|
||||
// voiceMu guards cachedVoices. Populated lazily on first GET /api/voices.
|
||||
voiceMu sync.RWMutex
|
||||
cachedVoices []domain.Voice
|
||||
}
|
||||
|
||||
// New creates a Server from cfg and deps.
|
||||
func New(cfg Config, deps Dependencies) *Server {
|
||||
if cfg.DefaultVoice == "" {
|
||||
cfg.DefaultVoice = "af_bella"
|
||||
}
|
||||
if deps.Log == nil {
|
||||
deps.Log = slog.Default()
|
||||
}
|
||||
if deps.SearchIndex == nil {
|
||||
deps.SearchIndex = meili.NoopClient{}
|
||||
}
|
||||
return &Server{cfg: cfg, deps: deps}
|
||||
}
|
||||
|
||||
// ListenAndServe registers all routes and starts the HTTP server.
|
||||
// It blocks until ctx is cancelled, then performs a graceful shutdown.
|
||||
func (s *Server) ListenAndServe(ctx context.Context) error {
|
||||
mux := http.NewServeMux()
|
||||
|
||||
// Health / version
|
||||
mux.HandleFunc("GET /health", s.handleHealth)
|
||||
mux.HandleFunc("GET /api/version", s.handleVersion)
|
||||
|
||||
// Scrape task creation (202 Accepted — runner executes asynchronously)
|
||||
mux.HandleFunc("POST /scrape", s.handleScrapeCatalogue)
|
||||
mux.HandleFunc("POST /scrape/book", s.handleScrapeBook)
|
||||
mux.HandleFunc("POST /scrape/book/range", s.handleScrapeBookRange)
|
||||
|
||||
// Scrape task status / history
|
||||
mux.HandleFunc("GET /api/scrape/status", s.handleScrapeStatus)
|
||||
mux.HandleFunc("GET /api/scrape/tasks", s.handleScrapeTasks)
|
||||
|
||||
// Cancel a pending task (scrape or audio)
|
||||
mux.HandleFunc("POST /api/cancel-task/{id}", s.handleCancelTask)
|
||||
|
||||
// Browse & search
|
||||
mux.HandleFunc("GET /api/search", s.handleSearch)
|
||||
|
||||
// Catalogue (Meilisearch-backed browse + search — preferred path for UI)
|
||||
mux.HandleFunc("GET /api/catalogue", s.handleCatalogue)
|
||||
|
||||
// Ranking (from PocketBase)
|
||||
mux.HandleFunc("GET /api/ranking", s.handleGetRanking)
|
||||
|
||||
// Cover proxy (live URL redirect)
|
||||
mux.HandleFunc("GET /api/cover/{domain}/{slug}", s.handleGetCover)
|
||||
|
||||
// Book preview (enqueues scrape task if not in library; returns stored data if already scraped)
|
||||
mux.HandleFunc("GET /api/book-preview/{slug}", s.handleBookPreview)
|
||||
|
||||
// Chapter text (served from MinIO via PocketBase index)
|
||||
mux.HandleFunc("GET /api/chapter-text/{slug}/{n}", s.handleChapterText)
|
||||
// Raw markdown chapter content — served directly from MinIO by the backend.
|
||||
// Use this instead of presign+fetch to avoid SvelteKit→MinIO network path.
|
||||
mux.HandleFunc("GET /api/chapter-markdown/{slug}/{n}", s.handleChapterMarkdown)
|
||||
|
||||
// Chapter text preview — live scrape from novelfire.net, no store writes.
|
||||
// Used when the chapter is not yet in the library (preview mode).
|
||||
mux.HandleFunc("GET /api/chapter-text-preview/{slug}/{n}", s.handleChapterTextPreview)
|
||||
|
||||
// Reindex chapters_idx from MinIO
|
||||
mux.HandleFunc("POST /api/reindex/{slug}", s.handleReindex)
|
||||
|
||||
// Audio task creation (backend creates task; runner executes)
|
||||
mux.HandleFunc("POST /api/audio/{slug}/{n}", s.handleAudioGenerate)
|
||||
mux.HandleFunc("GET /api/audio/status/{slug}/{n}", s.handleAudioStatus)
|
||||
mux.HandleFunc("GET /api/audio-proxy/{slug}/{n}", s.handleAudioProxy)
|
||||
// Streaming audio: serves from MinIO if cached, else streams live TTS
|
||||
// while simultaneously uploading to MinIO for future requests.
|
||||
mux.HandleFunc("GET /api/audio-stream/{slug}/{n}", s.handleAudioStream)
|
||||
|
||||
// Translation task creation (backend creates task; runner executes via LibreTranslate)
|
||||
mux.HandleFunc("POST /api/translation/{slug}/{n}", s.handleTranslationGenerate)
|
||||
mux.HandleFunc("GET /api/translation/status/{slug}/{n}", s.handleTranslationStatus)
|
||||
mux.HandleFunc("GET /api/translation/{slug}/{n}", s.handleTranslationRead)
|
||||
|
||||
// Admin translation endpoints
|
||||
mux.HandleFunc("GET /api/admin/translation/jobs", s.handleAdminTranslationJobs)
|
||||
mux.HandleFunc("POST /api/admin/translation/bulk", s.handleAdminTranslationBulk)
|
||||
|
||||
// Admin audio endpoints
|
||||
mux.HandleFunc("GET /api/admin/audio/jobs", s.handleAdminAudioJobs)
|
||||
mux.HandleFunc("POST /api/admin/audio/bulk", s.handleAdminAudioBulk)
|
||||
mux.HandleFunc("POST /api/admin/audio/cancel-bulk", s.handleAdminAudioCancelBulk)
|
||||
|
||||
// Voices list
|
||||
mux.HandleFunc("GET /api/voices", s.handleVoices)
|
||||
|
||||
// Presigned URLs
|
||||
mux.HandleFunc("GET /api/presign/chapter/{slug}/{n}", s.handlePresignChapter)
|
||||
mux.HandleFunc("GET /api/presign/audio/{slug}/{n}", s.handlePresignAudio)
|
||||
mux.HandleFunc("GET /api/presign/voice-sample/{voice}", s.handlePresignVoiceSample)
|
||||
mux.HandleFunc("GET /api/presign/avatar-upload/{userId}", s.handlePresignAvatarUpload)
|
||||
mux.HandleFunc("GET /api/presign/avatar/{userId}", s.handlePresignAvatar)
|
||||
mux.HandleFunc("PUT /api/avatar-upload/{userId}", s.handleAvatarUpload)
|
||||
|
||||
// EPUB export
|
||||
mux.HandleFunc("GET /api/export/{slug}", s.handleExportEPUB)
|
||||
|
||||
// Reading progress
|
||||
mux.HandleFunc("GET /api/progress", s.handleGetProgress)
|
||||
mux.HandleFunc("POST /api/progress/{slug}", s.handleSetProgress)
|
||||
mux.HandleFunc("DELETE /api/progress/{slug}", s.handleDeleteProgress)
|
||||
|
||||
// Wrap mux with OTel tracing (no-op when no TracerProvider is set),
|
||||
// then with Sentry for panic recovery and error reporting.
|
||||
var handler http.Handler = mux
|
||||
handler = otelhttp.NewHandler(handler, "libnovel.backend",
|
||||
otelhttp.WithMessageEvents(otelhttp.ReadEvents, otelhttp.WriteEvents),
|
||||
)
|
||||
handler = sentryhttp.New(sentryhttp.Options{Repanic: true}).Handle(handler)
|
||||
|
||||
srv := &http.Server{
|
||||
Addr: s.cfg.Addr,
|
||||
Handler: handler,
|
||||
ReadTimeout: 15 * time.Second,
|
||||
WriteTimeout: 15 * time.Minute, // audio-stream can take several minutes for a full chapter
|
||||
IdleTimeout: 60 * time.Second,
|
||||
}
|
||||
|
||||
errCh := make(chan error, 1)
|
||||
go func() { errCh <- srv.ListenAndServe() }()
|
||||
s.deps.Log.Info("backend: HTTP server listening", "addr", s.cfg.Addr)
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
s.deps.Log.Info("backend: context cancelled, starting graceful shutdown")
|
||||
shutCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
if err := srv.Shutdown(shutCtx); err != nil {
|
||||
s.deps.Log.Error("backend: graceful shutdown failed", "err", err)
|
||||
return err
|
||||
}
|
||||
s.deps.Log.Info("backend: shutdown complete")
|
||||
return nil
|
||||
case err := <-errCh:
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// ── Session cookie helpers ─────────────────────────────────────────────────────
|
||||
|
||||
const sessionCookieName = "libnovel_session"
|
||||
|
||||
func sessionID(r *http.Request) string {
|
||||
c, err := r.Cookie(sessionCookieName)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return c.Value
|
||||
}
|
||||
|
||||
func newSessionID() (string, error) {
|
||||
b := make([]byte, 16)
|
||||
if _, err := rand.Read(b); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return hex.EncodeToString(b), nil
|
||||
}
|
||||
|
||||
func ensureSession(w http.ResponseWriter, r *http.Request) string {
|
||||
if id := sessionID(r); id != "" {
|
||||
return id
|
||||
}
|
||||
id, err := newSessionID()
|
||||
if err != nil {
|
||||
id = fmt.Sprintf("fallback-%d", time.Now().UnixNano())
|
||||
}
|
||||
http.SetCookie(w, &http.Cookie{
|
||||
Name: sessionCookieName,
|
||||
Value: id,
|
||||
Path: "/",
|
||||
HttpOnly: true,
|
||||
SameSite: http.SameSiteLaxMode,
|
||||
MaxAge: 365 * 24 * 60 * 60,
|
||||
})
|
||||
return id
|
||||
}
|
||||
|
||||
// ── Utility helpers ────────────────────────────────────────────────────────────
|
||||
|
||||
// writeJSON writes v as a JSON response with status code. Status 0 → 200.
|
||||
func writeJSON(w http.ResponseWriter, status int, v any) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if status != 0 {
|
||||
w.WriteHeader(status)
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(v)
|
||||
}
|
||||
|
||||
// jsonError writes a JSON error body and the given status code.
|
||||
func jsonError(w http.ResponseWriter, status int, msg string) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(status)
|
||||
_ = json.NewEncoder(w).Encode(map[string]string{"error": msg})
|
||||
}
|
||||
|
||||
// voices returns the merged list of available voices from Kokoro and pocket-tts.
|
||||
// On the first call it fetches from both services and caches the result.
|
||||
// Falls back to the hardcoded Kokoro list on error.
|
||||
func (s *Server) voices(ctx context.Context) []domain.Voice {
|
||||
s.voiceMu.RLock()
|
||||
cached := s.cachedVoices
|
||||
s.voiceMu.RUnlock()
|
||||
if len(cached) > 0 {
|
||||
return cached
|
||||
}
|
||||
|
||||
fetchCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
var result []domain.Voice
|
||||
|
||||
// ── Kokoro voices ─────────────────────────────────────────────────────────
|
||||
var kokoroIDs []string
|
||||
if s.deps.Kokoro != nil {
|
||||
ids, err := s.deps.Kokoro.ListVoices(fetchCtx)
|
||||
if err != nil || len(ids) == 0 {
|
||||
s.deps.Log.Warn("backend: could not fetch kokoro voices, using built-in list", "err", err)
|
||||
ids = kokoroVoiceIDs
|
||||
} else {
|
||||
s.deps.Log.Info("backend: fetched kokoro voices", "count", len(ids))
|
||||
}
|
||||
kokoroIDs = ids
|
||||
} else {
|
||||
kokoroIDs = kokoroVoiceIDs
|
||||
}
|
||||
for _, id := range kokoroIDs {
|
||||
result = append(result, kokoroVoice(id))
|
||||
}
|
||||
|
||||
// ── Pocket-TTS voices ─────────────────────────────────────────────────────
|
||||
if s.deps.PocketTTS != nil {
|
||||
ids, err := s.deps.PocketTTS.ListVoices(fetchCtx)
|
||||
if err != nil {
|
||||
s.deps.Log.Warn("backend: could not fetch pocket-tts voices", "err", err)
|
||||
} else {
|
||||
for _, id := range ids {
|
||||
result = append(result, pocketTTSVoice(id))
|
||||
}
|
||||
s.deps.Log.Info("backend: fetched pocket-tts voices", "count", len(ids))
|
||||
}
|
||||
}
|
||||
|
||||
s.voiceMu.Lock()
|
||||
s.cachedVoices = result
|
||||
s.voiceMu.Unlock()
|
||||
return result
|
||||
}
|
||||
|
||||
// kokoroVoice builds a domain.Voice for a Kokoro voice ID.
|
||||
// The two-character prefix encodes language and gender:
|
||||
//
|
||||
// af/am → en-us f/m | bf/bm → en-gb f/m
|
||||
// ef/em → es f/m | ff → fr f
|
||||
// hf/hm → hi f/m | if/im → it f/m
|
||||
// jf/jm → ja f/m | pf/pm → pt f/m
|
||||
// zf/zm → zh f/m
|
||||
func kokoroVoice(id string) domain.Voice {
|
||||
type meta struct{ lang, gender string }
|
||||
prefixMap := map[string]meta{
|
||||
"af": {"en-us", "f"}, "am": {"en-us", "m"},
|
||||
"bf": {"en-gb", "f"}, "bm": {"en-gb", "m"},
|
||||
"ef": {"es", "f"}, "em": {"es", "m"},
|
||||
"ff": {"fr", "f"},
|
||||
"hf": {"hi", "f"}, "hm": {"hi", "m"},
|
||||
"if": {"it", "f"}, "im": {"it", "m"},
|
||||
"jf": {"ja", "f"}, "jm": {"ja", "m"},
|
||||
"pf": {"pt", "f"}, "pm": {"pt", "m"},
|
||||
"zf": {"zh", "f"}, "zm": {"zh", "m"},
|
||||
}
|
||||
if len(id) >= 2 {
|
||||
if m, ok := prefixMap[id[:2]]; ok {
|
||||
return domain.Voice{ID: id, Engine: "kokoro", Lang: m.lang, Gender: m.gender}
|
||||
}
|
||||
}
|
||||
return domain.Voice{ID: id, Engine: "kokoro", Lang: "en", Gender: ""}
|
||||
}
|
||||
|
||||
// pocketTTSVoice builds a domain.Voice for a pocket-tts voice ID.
|
||||
// All pocket-tts voices are English audiobook narrators.
|
||||
func pocketTTSVoice(id string) domain.Voice {
|
||||
femaleVoices := map[string]struct{}{
|
||||
"alba": {}, "fantine": {}, "cosette": {}, "eponine": {},
|
||||
"azelma": {}, "anna": {}, "vera": {}, "mary": {}, "jane": {}, "eve": {},
|
||||
}
|
||||
gender := "m"
|
||||
if _, ok := femaleVoices[id]; ok {
|
||||
gender = "f"
|
||||
}
|
||||
return domain.Voice{ID: id, Engine: "pocket-tts", Lang: "en", Gender: gender}
|
||||
}
|
||||
|
||||
// handleHealth handles GET /health.
|
||||
func (s *Server) handleHealth(w http.ResponseWriter, _ *http.Request) {
|
||||
writeJSON(w, 0, map[string]string{
|
||||
"status": "ok",
|
||||
"version": s.cfg.Version,
|
||||
"commit": s.cfg.Commit,
|
||||
})
|
||||
}
|
||||
|
||||
// handleVersion handles GET /api/version.
|
||||
func (s *Server) handleVersion(w http.ResponseWriter, _ *http.Request) {
|
||||
writeJSON(w, 0, map[string]string{
|
||||
"version": s.cfg.Version,
|
||||
"commit": s.cfg.Commit,
|
||||
})
|
||||
}
|
||||
170
backend/internal/bookstore/bookstore.go
Normal file
170
backend/internal/bookstore/bookstore.go
Normal file
@@ -0,0 +1,170 @@
|
||||
// Package bookstore defines the segregated read/write interfaces for book,
|
||||
// chapter, ranking, progress, audio, and presign data.
|
||||
//
|
||||
// Interface segregation:
|
||||
// - BookWriter — used by the runner to persist scraped data.
|
||||
// - BookReader — used by the backend to serve book/chapter data.
|
||||
// - RankingStore — used by both runner (write) and backend (read).
|
||||
// - PresignStore — used only by the backend for URL signing.
|
||||
// - AudioStore — used by the runner to store audio; backend for presign.
|
||||
// - ProgressStore— used only by the backend for reading progress.
|
||||
//
|
||||
// Concrete implementations live in internal/storage.
|
||||
package bookstore
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
)
|
||||
|
||||
// BookWriter is the write side used by the runner after scraping a book.
|
||||
type BookWriter interface {
|
||||
// WriteMetadata upserts all bibliographic fields for a book.
|
||||
WriteMetadata(ctx context.Context, meta domain.BookMeta) error
|
||||
|
||||
// WriteChapter stores a fully-scraped chapter's text in MinIO and
|
||||
// updates the chapters_idx record in PocketBase.
|
||||
WriteChapter(ctx context.Context, slug string, chapter domain.Chapter) error
|
||||
|
||||
// WriteChapterRefs persists chapter metadata (number + title) into
|
||||
// chapters_idx without fetching or storing chapter text.
|
||||
WriteChapterRefs(ctx context.Context, slug string, refs []domain.ChapterRef) error
|
||||
|
||||
// ChapterExists returns true if the markdown object for ref already exists.
|
||||
ChapterExists(ctx context.Context, slug string, ref domain.ChapterRef) bool
|
||||
}
|
||||
|
||||
// BookReader is the read side used by the backend to serve content.
|
||||
type BookReader interface {
|
||||
// ReadMetadata returns the metadata for slug.
|
||||
// Returns (zero, false, nil) when not found.
|
||||
ReadMetadata(ctx context.Context, slug string) (domain.BookMeta, bool, error)
|
||||
|
||||
// ListBooks returns all books sorted alphabetically by title.
|
||||
ListBooks(ctx context.Context) ([]domain.BookMeta, error)
|
||||
|
||||
// LocalSlugs returns the set of slugs that have metadata stored.
|
||||
LocalSlugs(ctx context.Context) (map[string]bool, error)
|
||||
|
||||
// MetadataMtime returns the Unix-second mtime of the metadata record, or 0.
|
||||
MetadataMtime(ctx context.Context, slug string) int64
|
||||
|
||||
// ReadChapter returns the raw markdown for chapter number n.
|
||||
ReadChapter(ctx context.Context, slug string, n int) (string, error)
|
||||
|
||||
// ListChapters returns all stored chapters for slug, sorted by number.
|
||||
ListChapters(ctx context.Context, slug string) ([]domain.ChapterInfo, error)
|
||||
|
||||
// CountChapters returns the count of stored chapters.
|
||||
CountChapters(ctx context.Context, slug string) int
|
||||
|
||||
// ReindexChapters rebuilds chapters_idx from MinIO objects for slug.
|
||||
ReindexChapters(ctx context.Context, slug string) (int, error)
|
||||
}
|
||||
|
||||
// RankingStore covers ranking reads and writes.
|
||||
type RankingStore interface {
|
||||
// WriteRankingItem upserts a single ranking entry (keyed on Slug).
|
||||
WriteRankingItem(ctx context.Context, item domain.RankingItem) error
|
||||
|
||||
// ReadRankingItems returns all ranking items sorted by rank ascending.
|
||||
ReadRankingItems(ctx context.Context) ([]domain.RankingItem, error)
|
||||
|
||||
// RankingFreshEnough returns true when ranking rows exist and the most
|
||||
// recent Updated timestamp is within maxAge.
|
||||
RankingFreshEnough(ctx context.Context, maxAge time.Duration) (bool, error)
|
||||
}
|
||||
|
||||
// AudioStore covers audio object storage (runner writes; backend reads).
|
||||
type AudioStore interface {
|
||||
// AudioObjectKey returns the MinIO object key for a cached MP3 audio file.
|
||||
// Format: {slug}/{n}/{voice}.mp3
|
||||
AudioObjectKey(slug string, n int, voice string) string
|
||||
|
||||
// AudioObjectKeyExt returns the MinIO object key for a cached audio file
|
||||
// with a custom extension (e.g. "mp3" or "wav").
|
||||
AudioObjectKeyExt(slug string, n int, voice, ext string) string
|
||||
|
||||
// AudioExists returns true when the audio object is present in MinIO.
|
||||
AudioExists(ctx context.Context, key string) bool
|
||||
|
||||
// PutAudio stores raw audio bytes under the given MinIO object key.
|
||||
PutAudio(ctx context.Context, key string, data []byte) error
|
||||
|
||||
// PutAudioStream uploads audio from r to MinIO under key.
|
||||
// size must be the exact byte length of r, or -1 to use multipart upload.
|
||||
// contentType should be "audio/mpeg" or "audio/wav".
|
||||
PutAudioStream(ctx context.Context, key string, r io.Reader, size int64, contentType string) error
|
||||
}
|
||||
|
||||
// PresignStore generates short-lived URLs — used exclusively by the backend.
|
||||
type PresignStore interface {
|
||||
// PresignChapter returns a presigned GET URL for a chapter markdown object.
|
||||
PresignChapter(ctx context.Context, slug string, n int, expires time.Duration) (string, error)
|
||||
|
||||
// PresignAudio returns a presigned GET URL for an audio object.
|
||||
PresignAudio(ctx context.Context, key string, expires time.Duration) (string, error)
|
||||
|
||||
// PresignAvatarUpload returns a short-lived presigned PUT URL for uploading
|
||||
// an avatar image. ext should be "jpg", "png", or "webp".
|
||||
PresignAvatarUpload(ctx context.Context, userID, ext string) (uploadURL, key string, err error)
|
||||
|
||||
// PresignAvatarURL returns a presigned GET URL for a user's avatar.
|
||||
// Returns ("", false, nil) when no avatar exists.
|
||||
PresignAvatarURL(ctx context.Context, userID string) (string, bool, error)
|
||||
|
||||
// PutAvatar stores raw image bytes for a user avatar directly in MinIO.
|
||||
// ext should be "jpg", "png", or "webp". Returns the object key.
|
||||
PutAvatar(ctx context.Context, userID, ext, contentType string, data []byte) (key string, err error)
|
||||
|
||||
// DeleteAvatar removes all avatar objects for a user.
|
||||
DeleteAvatar(ctx context.Context, userID string) error
|
||||
}
|
||||
|
||||
// ProgressStore covers per-session reading progress — backend only.
|
||||
type ProgressStore interface {
|
||||
// GetProgress returns the reading progress for the given session + slug.
|
||||
GetProgress(ctx context.Context, sessionID, slug string) (domain.ReadingProgress, bool)
|
||||
|
||||
// SetProgress saves or updates reading progress.
|
||||
SetProgress(ctx context.Context, sessionID string, p domain.ReadingProgress) error
|
||||
|
||||
// AllProgress returns all progress entries for a session.
|
||||
AllProgress(ctx context.Context, sessionID string) ([]domain.ReadingProgress, error)
|
||||
|
||||
// DeleteProgress removes progress for a specific slug.
|
||||
DeleteProgress(ctx context.Context, sessionID, slug string) error
|
||||
}
|
||||
|
||||
// CoverStore covers book cover image storage in MinIO.
|
||||
// The runner writes covers during catalogue refresh; the backend reads them.
|
||||
type CoverStore interface {
|
||||
// PutCover stores a raw cover image for a book identified by slug.
|
||||
PutCover(ctx context.Context, slug string, data []byte, contentType string) error
|
||||
|
||||
// GetCover retrieves the cover image for a book. Returns (nil, false, nil)
|
||||
// when no cover exists for the given slug.
|
||||
GetCover(ctx context.Context, slug string) ([]byte, string, bool, error)
|
||||
|
||||
// CoverExists returns true when a cover image is stored for slug.
|
||||
CoverExists(ctx context.Context, slug string) bool
|
||||
}
|
||||
|
||||
// TranslationStore covers machine-translated chapter storage in MinIO.
|
||||
// The runner writes translations; the backend reads them.
|
||||
type TranslationStore interface {
|
||||
// TranslationObjectKey returns the MinIO object key for a cached translation.
|
||||
TranslationObjectKey(lang, slug string, n int) string
|
||||
|
||||
// TranslationExists returns true when the translation object is present in MinIO.
|
||||
TranslationExists(ctx context.Context, key string) bool
|
||||
|
||||
// PutTranslation stores raw translated markdown under the given MinIO object key.
|
||||
PutTranslation(ctx context.Context, key string, data []byte) error
|
||||
|
||||
// GetTranslation retrieves translated markdown from MinIO.
|
||||
GetTranslation(ctx context.Context, key string) (string, error)
|
||||
}
|
||||
146
backend/internal/bookstore/bookstore_test.go
Normal file
146
backend/internal/bookstore/bookstore_test.go
Normal file
@@ -0,0 +1,146 @@
|
||||
package bookstore_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/backend/internal/bookstore"
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
)
|
||||
|
||||
// ── Mock that satisfies all bookstore interfaces ──────────────────────────────
|
||||
|
||||
type mockStore struct{}
|
||||
|
||||
// BookWriter
|
||||
func (m *mockStore) WriteMetadata(_ context.Context, _ domain.BookMeta) error { return nil }
|
||||
func (m *mockStore) WriteChapter(_ context.Context, _ string, _ domain.Chapter) error { return nil }
|
||||
func (m *mockStore) WriteChapterRefs(_ context.Context, _ string, _ []domain.ChapterRef) error {
|
||||
return nil
|
||||
}
|
||||
func (m *mockStore) ChapterExists(_ context.Context, _ string, _ domain.ChapterRef) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// BookReader
|
||||
func (m *mockStore) ReadMetadata(_ context.Context, _ string) (domain.BookMeta, bool, error) {
|
||||
return domain.BookMeta{}, false, nil
|
||||
}
|
||||
func (m *mockStore) ListBooks(_ context.Context) ([]domain.BookMeta, error) { return nil, nil }
|
||||
func (m *mockStore) LocalSlugs(_ context.Context) (map[string]bool, error) {
|
||||
return map[string]bool{}, nil
|
||||
}
|
||||
func (m *mockStore) MetadataMtime(_ context.Context, _ string) int64 { return 0 }
|
||||
func (m *mockStore) ReadChapter(_ context.Context, _ string, _ int) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
func (m *mockStore) ListChapters(_ context.Context, _ string) ([]domain.ChapterInfo, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) CountChapters(_ context.Context, _ string) int { return 0 }
|
||||
func (m *mockStore) ReindexChapters(_ context.Context, _ string) (int, error) { return 0, nil }
|
||||
|
||||
// RankingStore
|
||||
func (m *mockStore) WriteRankingItem(_ context.Context, _ domain.RankingItem) error { return nil }
|
||||
func (m *mockStore) ReadRankingItems(_ context.Context) ([]domain.RankingItem, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) RankingFreshEnough(_ context.Context, _ time.Duration) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// AudioStore
|
||||
func (m *mockStore) AudioObjectKey(_ string, _ int, _ string) string { return "" }
|
||||
func (m *mockStore) AudioObjectKeyExt(_ string, _ int, _, _ string) string { return "" }
|
||||
func (m *mockStore) AudioExists(_ context.Context, _ string) bool { return false }
|
||||
func (m *mockStore) PutAudio(_ context.Context, _ string, _ []byte) error { return nil }
|
||||
func (m *mockStore) PutAudioStream(_ context.Context, _ string, _ io.Reader, _ int64, _ string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// PresignStore
|
||||
func (m *mockStore) PresignChapter(_ context.Context, _ string, _ int, _ time.Duration) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
func (m *mockStore) PresignAudio(_ context.Context, _ string, _ time.Duration) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
func (m *mockStore) PresignAvatarUpload(_ context.Context, _, _ string) (string, string, error) {
|
||||
return "", "", nil
|
||||
}
|
||||
func (m *mockStore) PresignAvatarURL(_ context.Context, _ string) (string, bool, error) {
|
||||
return "", false, nil
|
||||
}
|
||||
func (m *mockStore) PutAvatar(_ context.Context, _, _, _ string, _ []byte) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
func (m *mockStore) DeleteAvatar(_ context.Context, _ string) error { return nil }
|
||||
|
||||
// ProgressStore
|
||||
func (m *mockStore) GetProgress(_ context.Context, _, _ string) (domain.ReadingProgress, bool) {
|
||||
return domain.ReadingProgress{}, false
|
||||
}
|
||||
func (m *mockStore) SetProgress(_ context.Context, _ string, _ domain.ReadingProgress) error {
|
||||
return nil
|
||||
}
|
||||
func (m *mockStore) AllProgress(_ context.Context, _ string) ([]domain.ReadingProgress, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) DeleteProgress(_ context.Context, _, _ string) error { return nil }
|
||||
|
||||
// ── Compile-time interface satisfaction ───────────────────────────────────────
|
||||
|
||||
var _ bookstore.BookWriter = (*mockStore)(nil)
|
||||
var _ bookstore.BookReader = (*mockStore)(nil)
|
||||
var _ bookstore.RankingStore = (*mockStore)(nil)
|
||||
var _ bookstore.AudioStore = (*mockStore)(nil)
|
||||
var _ bookstore.PresignStore = (*mockStore)(nil)
|
||||
var _ bookstore.ProgressStore = (*mockStore)(nil)
|
||||
|
||||
// ── Behavioural tests ─────────────────────────────────────────────────────────
|
||||
|
||||
func TestBookWriter_WriteMetadata_ReturnsNilError(t *testing.T) {
|
||||
var w bookstore.BookWriter = &mockStore{}
|
||||
if err := w.WriteMetadata(context.Background(), domain.BookMeta{Slug: "test"}); err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBookReader_ReadMetadata_NotFound(t *testing.T) {
|
||||
var r bookstore.BookReader = &mockStore{}
|
||||
_, found, err := r.ReadMetadata(context.Background(), "unknown")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if found {
|
||||
t.Error("expected not found")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRankingStore_RankingFreshEnough_ReturnsFalse(t *testing.T) {
|
||||
var s bookstore.RankingStore = &mockStore{}
|
||||
fresh, err := s.RankingFreshEnough(context.Background(), time.Hour)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if fresh {
|
||||
t.Error("expected false")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAudioStore_AudioExists_ReturnsFalse(t *testing.T) {
|
||||
var s bookstore.AudioStore = &mockStore{}
|
||||
if s.AudioExists(context.Background(), "audio/slug/1/af_bella.mp3") {
|
||||
t.Error("expected false")
|
||||
}
|
||||
}
|
||||
|
||||
func TestProgressStore_GetProgress_NotFound(t *testing.T) {
|
||||
var s bookstore.ProgressStore = &mockStore{}
|
||||
_, found := s.GetProgress(context.Background(), "session-1", "slug")
|
||||
if found {
|
||||
t.Error("expected not found")
|
||||
}
|
||||
}
|
||||
191
backend/internal/browser/browser.go
Normal file
191
backend/internal/browser/browser.go
Normal file
@@ -0,0 +1,191 @@
|
||||
// Package browser provides a rate-limited HTTP client for web scraping.
|
||||
package browser
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ErrRateLimit is returned by GetContent when the server responds with 429.
|
||||
// It carries the suggested retry delay (from Retry-After header, or a default).
|
||||
var ErrRateLimit = errors.New("rate limited (429)")
|
||||
|
||||
// RateLimitError wraps ErrRateLimit and carries the suggested wait duration.
|
||||
type RateLimitError struct {
|
||||
// RetryAfter is how long the caller should wait before retrying.
|
||||
// Derived from the Retry-After response header when present; otherwise a default.
|
||||
RetryAfter time.Duration
|
||||
}
|
||||
|
||||
func (e *RateLimitError) Error() string {
|
||||
return fmt.Sprintf("rate limited (429): retry after %s", e.RetryAfter)
|
||||
}
|
||||
|
||||
func (e *RateLimitError) Is(target error) bool { return target == ErrRateLimit }
|
||||
|
||||
// defaultRateLimitDelay is used when the server returns 429 with no Retry-After header.
|
||||
const defaultRateLimitDelay = 60 * time.Second
|
||||
|
||||
// Client is the interface used by scrapers to fetch raw page HTML.
|
||||
// Implementations must be safe for concurrent use.
|
||||
type Client interface {
|
||||
// GetContent fetches the URL and returns the full response body as a string.
|
||||
// It should respect the provided context for cancellation and timeouts.
|
||||
GetContent(ctx context.Context, pageURL string) (string, error)
|
||||
}
|
||||
|
||||
// Config holds tunable parameters for the direct HTTP client.
|
||||
type Config struct {
|
||||
// MaxConcurrent limits the number of simultaneous in-flight requests.
|
||||
// Defaults to 5 when 0.
|
||||
MaxConcurrent int
|
||||
// Timeout is the per-request deadline. Defaults to 90s when 0.
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
// DirectClient is a plain net/http-based Client with a concurrency semaphore.
|
||||
type DirectClient struct {
|
||||
http *http.Client
|
||||
semaphore chan struct{}
|
||||
}
|
||||
|
||||
// NewDirectClient returns a DirectClient configured by cfg.
|
||||
func NewDirectClient(cfg Config) *DirectClient {
|
||||
if cfg.MaxConcurrent <= 0 {
|
||||
cfg.MaxConcurrent = 5
|
||||
}
|
||||
if cfg.Timeout <= 0 {
|
||||
cfg.Timeout = 90 * time.Second
|
||||
}
|
||||
|
||||
transport := &http.Transport{
|
||||
MaxIdleConnsPerHost: cfg.MaxConcurrent * 2,
|
||||
DisableCompression: false,
|
||||
}
|
||||
|
||||
return &DirectClient{
|
||||
http: &http.Client{
|
||||
Transport: transport,
|
||||
Timeout: cfg.Timeout,
|
||||
},
|
||||
semaphore: make(chan struct{}, cfg.MaxConcurrent),
|
||||
}
|
||||
}
|
||||
|
||||
// GetContent fetches pageURL respecting the concurrency limit.
|
||||
func (c *DirectClient) GetContent(ctx context.Context, pageURL string) (string, error) {
|
||||
// Acquire semaphore slot.
|
||||
select {
|
||||
case c.semaphore <- struct{}{}:
|
||||
case <-ctx.Done():
|
||||
return "", ctx.Err()
|
||||
}
|
||||
defer func() { <-c.semaphore }()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, pageURL, nil)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("browser: build request %s: %w", pageURL, err)
|
||||
}
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; libnovel-runner/2)")
|
||||
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
|
||||
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
|
||||
|
||||
resp, err := c.http.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("browser: GET %s: %w", pageURL, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode == http.StatusTooManyRequests {
|
||||
delay := defaultRateLimitDelay
|
||||
if ra := resp.Header.Get("Retry-After"); ra != "" {
|
||||
if secs, err := strconv.Atoi(ra); err == nil && secs > 0 {
|
||||
delay = time.Duration(secs) * time.Second
|
||||
}
|
||||
}
|
||||
return "", &RateLimitError{RetryAfter: delay}
|
||||
}
|
||||
|
||||
if resp.StatusCode >= 400 {
|
||||
return "", fmt.Errorf("browser: GET %s returned %d", pageURL, resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("browser: read body %s: %w", pageURL, err)
|
||||
}
|
||||
return string(body), nil
|
||||
}
|
||||
|
||||
// Do implements httputil.Client so DirectClient can be passed to RetryGet.
|
||||
func (c *DirectClient) Do(req *http.Request) (*http.Response, error) {
|
||||
select {
|
||||
case c.semaphore <- struct{}{}:
|
||||
case <-req.Context().Done():
|
||||
return nil, req.Context().Err()
|
||||
}
|
||||
defer func() { <-c.semaphore }()
|
||||
return c.http.Do(req)
|
||||
}
|
||||
|
||||
// ── Stub for testing ──────────────────────────────────────────────────────────
|
||||
|
||||
// StubClient is a test double for Client. It returns pre-configured responses
|
||||
// keyed on URL. Calls to unknown URLs return an error.
|
||||
type StubClient struct {
|
||||
mu sync.Mutex
|
||||
pages map[string]string
|
||||
errors map[string]error
|
||||
callLog []string
|
||||
}
|
||||
|
||||
// NewStub creates a StubClient with no pages pre-loaded.
|
||||
func NewStub() *StubClient {
|
||||
return &StubClient{
|
||||
pages: make(map[string]string),
|
||||
errors: make(map[string]error),
|
||||
}
|
||||
}
|
||||
|
||||
// SetPage registers a URL → HTML body mapping.
|
||||
func (s *StubClient) SetPage(u, html string) {
|
||||
s.mu.Lock()
|
||||
s.pages[u] = html
|
||||
s.mu.Unlock()
|
||||
}
|
||||
|
||||
// SetError registers a URL → error mapping (returned instead of a body).
|
||||
func (s *StubClient) SetError(u string, err error) {
|
||||
s.mu.Lock()
|
||||
s.errors[u] = err
|
||||
s.mu.Unlock()
|
||||
}
|
||||
|
||||
// CallLog returns the ordered list of URLs that were requested.
|
||||
func (s *StubClient) CallLog() []string {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
out := make([]string, len(s.callLog))
|
||||
copy(out, s.callLog)
|
||||
return out
|
||||
}
|
||||
|
||||
// GetContent returns the registered page or an error for the URL.
|
||||
func (s *StubClient) GetContent(_ context.Context, pageURL string) (string, error) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.callLog = append(s.callLog, pageURL)
|
||||
if err, ok := s.errors[pageURL]; ok {
|
||||
return "", err
|
||||
}
|
||||
if html, ok := s.pages[pageURL]; ok {
|
||||
return html, nil
|
||||
}
|
||||
return "", fmt.Errorf("stub: no page registered for %q", pageURL)
|
||||
}
|
||||
141
backend/internal/browser/browser_test.go
Normal file
141
backend/internal/browser/browser_test.go
Normal file
@@ -0,0 +1,141 @@
|
||||
package browser_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/backend/internal/browser"
|
||||
)
|
||||
|
||||
func TestDirectClient_GetContent_Success(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Write([]byte("<html>hello</html>"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := browser.NewDirectClient(browser.Config{MaxConcurrent: 2, Timeout: 5 * time.Second})
|
||||
body, err := c.GetContent(context.Background(), srv.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if body != "<html>hello</html>" {
|
||||
t.Errorf("want <html>hello</html>, got %q", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDirectClient_GetContent_4xxReturnsError(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := browser.NewDirectClient(browser.Config{})
|
||||
_, err := c.GetContent(context.Background(), srv.URL)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for 404")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDirectClient_SemaphoreBlocksConcurrency(t *testing.T) {
|
||||
const maxConcurrent = 2
|
||||
var inflight atomic.Int32
|
||||
var peak atomic.Int32
|
||||
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
n := inflight.Add(1)
|
||||
if int(n) > int(peak.Load()) {
|
||||
peak.Store(n)
|
||||
}
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
inflight.Add(-1)
|
||||
w.Write([]byte("ok"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := browser.NewDirectClient(browser.Config{MaxConcurrent: maxConcurrent, Timeout: 5 * time.Second})
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < 8; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
c.GetContent(context.Background(), srv.URL)
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
if int(peak.Load()) > maxConcurrent {
|
||||
t.Errorf("concurrent requests exceeded limit: peak=%d, limit=%d", peak.Load(), maxConcurrent)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDirectClient_ContextCancel(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
w.Write([]byte("ok"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel() // cancel before making the request
|
||||
|
||||
c := browser.NewDirectClient(browser.Config{})
|
||||
_, err := c.GetContent(ctx, srv.URL)
|
||||
if err == nil {
|
||||
t.Fatal("expected context cancellation error")
|
||||
}
|
||||
}
|
||||
|
||||
// ── StubClient ────────────────────────────────────────────────────────────────
|
||||
|
||||
func TestStubClient_ReturnsRegisteredPage(t *testing.T) {
|
||||
stub := browser.NewStub()
|
||||
stub.SetPage("http://example.com/page1", "<html>page1</html>")
|
||||
|
||||
body, err := stub.GetContent(context.Background(), "http://example.com/page1")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if body != "<html>page1</html>" {
|
||||
t.Errorf("want page1 html, got %q", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStubClient_ReturnsRegisteredError(t *testing.T) {
|
||||
stub := browser.NewStub()
|
||||
want := errors.New("network failure")
|
||||
stub.SetError("http://example.com/bad", want)
|
||||
|
||||
_, err := stub.GetContent(context.Background(), "http://example.com/bad")
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStubClient_UnknownURLReturnsError(t *testing.T) {
|
||||
stub := browser.NewStub()
|
||||
_, err := stub.GetContent(context.Background(), "http://unknown.example.com/")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for unknown URL")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStubClient_CallLog(t *testing.T) {
|
||||
stub := browser.NewStub()
|
||||
stub.SetPage("http://example.com/a", "a")
|
||||
stub.SetPage("http://example.com/b", "b")
|
||||
|
||||
stub.GetContent(context.Background(), "http://example.com/a")
|
||||
stub.GetContent(context.Background(), "http://example.com/b")
|
||||
|
||||
log := stub.CallLog()
|
||||
if len(log) != 2 || log[0] != "http://example.com/a" || log[1] != "http://example.com/b" {
|
||||
t.Errorf("unexpected call log: %v", log)
|
||||
}
|
||||
}
|
||||
284
backend/internal/config/config.go
Normal file
284
backend/internal/config/config.go
Normal file
@@ -0,0 +1,284 @@
|
||||
// Package config loads all service configuration from environment variables.
|
||||
// Both the runner and backend binaries call config.Load() at startup; each
|
||||
// uses only the sub-struct relevant to it.
|
||||
//
|
||||
// Every field has a documented default so the service starts sensibly without
|
||||
// any environment configuration (useful for local development).
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// PocketBase holds connection settings for the remote PocketBase instance.
|
||||
type PocketBase struct {
|
||||
// URL is the base URL of the PocketBase instance, e.g. https://pb.libnovel.cc
|
||||
URL string
|
||||
// AdminEmail is the admin account email used for API authentication.
|
||||
AdminEmail string
|
||||
// AdminPassword is the admin account password.
|
||||
AdminPassword string
|
||||
}
|
||||
|
||||
// MinIO holds connection settings for the remote MinIO / S3-compatible store.
|
||||
type MinIO struct {
|
||||
// Endpoint is the host:port of the MinIO S3 API, e.g. storage.libnovel.cc:443
|
||||
Endpoint string
|
||||
// PublicEndpoint is the browser-visible endpoint used for presigned URLs.
|
||||
// Falls back to Endpoint when empty.
|
||||
PublicEndpoint string
|
||||
// AccessKey is the MinIO access key.
|
||||
AccessKey string
|
||||
// SecretKey is the MinIO secret key.
|
||||
SecretKey string
|
||||
// UseSSL enables TLS for the internal MinIO connection.
|
||||
UseSSL bool
|
||||
// PublicUseSSL enables TLS for presigned URL generation.
|
||||
PublicUseSSL bool
|
||||
// BucketChapters is the bucket that holds chapter markdown objects.
|
||||
BucketChapters string
|
||||
// BucketAudio is the bucket that holds generated audio MP3 objects.
|
||||
BucketAudio string
|
||||
// BucketAvatars is the bucket that holds user avatar images.
|
||||
BucketAvatars string
|
||||
// BucketBrowse is the bucket that holds cached browse page snapshots (JSON).
|
||||
BucketBrowse string
|
||||
// BucketTranslations is the bucket that holds machine-translated chapter markdown.
|
||||
BucketTranslations string
|
||||
}
|
||||
|
||||
// Kokoro holds connection settings for the Kokoro-FastAPI TTS service.
|
||||
type Kokoro struct {
|
||||
// URL is the base URL of the Kokoro service, e.g. https://tts.libnovel.cc
|
||||
// An empty string disables Kokoro TTS generation.
|
||||
URL string
|
||||
// DefaultVoice is the voice used when none is specified.
|
||||
DefaultVoice string
|
||||
}
|
||||
|
||||
// PocketTTS holds connection settings for the kyutai-labs/pocket-tts service.
|
||||
type PocketTTS struct {
|
||||
// URL is the base URL of the pocket-tts service, e.g. https://pocket-tts.libnovel.cc
|
||||
// An empty string disables pocket-tts generation.
|
||||
URL string
|
||||
}
|
||||
|
||||
// LibreTranslate holds connection settings for a self-hosted LibreTranslate instance.
|
||||
type LibreTranslate struct {
|
||||
// URL is the base URL of the LibreTranslate instance, e.g. https://translate.libnovel.cc
|
||||
// An empty string disables machine translation entirely.
|
||||
URL string
|
||||
// APIKey is the optional API key for the LibreTranslate instance.
|
||||
// Leave empty if the instance runs without authentication.
|
||||
APIKey string
|
||||
}
|
||||
|
||||
// HTTP holds settings for the HTTP server (backend only).
|
||||
type HTTP struct {
|
||||
// Addr is the listen address, e.g. ":8080"
|
||||
Addr string
|
||||
}
|
||||
|
||||
// Meilisearch holds connection settings for the Meilisearch full-text search service.
|
||||
type Meilisearch struct {
|
||||
// URL is the base URL of the Meilisearch instance, e.g. http://localhost:7700
|
||||
// An empty string disables Meilisearch indexing and search.
|
||||
URL string
|
||||
// APIKey is the Meilisearch master/search API key.
|
||||
APIKey string
|
||||
}
|
||||
|
||||
// Valkey holds connection settings for the Valkey/Redis presign URL cache.
|
||||
type Valkey struct {
|
||||
// Addr is the host:port of the Valkey instance, e.g. localhost:6379
|
||||
// An empty string disables the Valkey cache (falls through to MinIO directly).
|
||||
Addr string
|
||||
}
|
||||
|
||||
// Redis holds connection settings for the Asynq task queue Redis instance.
|
||||
// This is separate from Valkey (presign cache) — it may point to the same
|
||||
// Redis or a dedicated one. An empty Addr falls back to PocketBase polling.
|
||||
type Redis struct {
|
||||
// Addr is the host:port (or rediss://... URL) of the Redis instance.
|
||||
// Use rediss:// scheme for TLS (e.g. rediss://:password@redis.libnovel.cc:6380).
|
||||
// An empty string disables Asynq and falls back to PocketBase polling.
|
||||
Addr string
|
||||
// Password is the Redis AUTH password.
|
||||
// Not needed when Addr is a full rediss:// URL that includes the password.
|
||||
Password string
|
||||
}
|
||||
|
||||
// Runner holds settings specific to the runner/worker binary.
|
||||
type Runner struct {
|
||||
// PollInterval is how often the runner checks PocketBase for pending tasks.
|
||||
PollInterval time.Duration
|
||||
// MaxConcurrentScrape limits simultaneous book-scrape goroutines.
|
||||
MaxConcurrentScrape int
|
||||
// MaxConcurrentAudio limits simultaneous audio-generation goroutines.
|
||||
MaxConcurrentAudio int
|
||||
// MaxConcurrentTranslation limits simultaneous translation goroutines.
|
||||
MaxConcurrentTranslation int
|
||||
// WorkerID is a unique identifier for this runner instance.
|
||||
// Defaults to the system hostname.
|
||||
WorkerID string
|
||||
// Workers is the number of chapter-scraping goroutines per book.
|
||||
Workers int
|
||||
// Timeout is the per-request HTTP timeout for scraping.
|
||||
Timeout time.Duration
|
||||
// MetricsAddr is the listen address for the runner /metrics HTTP endpoint.
|
||||
// Defaults to ":9091". Set to "" to disable.
|
||||
MetricsAddr string
|
||||
// CatalogueRefreshInterval is how often the runner walks the full catalogue,
|
||||
// scrapes per-book metadata, downloads covers, and re-indexes in Meilisearch.
|
||||
// Defaults to 24h. Set to 0 to use the default.
|
||||
CatalogueRefreshInterval time.Duration
|
||||
// SkipInitialCatalogueRefresh prevents the runner from running a full
|
||||
// catalogue walk on startup. Useful for quick restarts where the catalogue
|
||||
// is already indexed and a 24h walk would be wasteful.
|
||||
// Controlled by RUNNER_SKIP_INITIAL_CATALOGUE_REFRESH=true.
|
||||
SkipInitialCatalogueRefresh bool
|
||||
// CatalogueRequestDelay is the base delay inserted between per-book metadata
|
||||
// requests during a catalogue refresh. A random jitter of up to 50% is added
|
||||
// on top. Defaults to 2s. Increase to reduce 429 pressure on novelfire.net.
|
||||
// Controlled by RUNNER_CATALOGUE_REQUEST_DELAY (e.g. "3s", "500ms").
|
||||
CatalogueRequestDelay time.Duration
|
||||
}
|
||||
|
||||
// Config is the top-level configuration struct consumed by both binaries.
|
||||
type Config struct {
|
||||
PocketBase PocketBase
|
||||
MinIO MinIO
|
||||
Kokoro Kokoro
|
||||
PocketTTS PocketTTS
|
||||
LibreTranslate LibreTranslate
|
||||
HTTP HTTP
|
||||
Runner Runner
|
||||
Meilisearch Meilisearch
|
||||
Valkey Valkey
|
||||
Redis Redis
|
||||
// LogLevel is one of "debug", "info", "warn", "error".
|
||||
LogLevel string
|
||||
}
|
||||
|
||||
// Load reads all configuration from environment variables and returns a
|
||||
// populated Config. Missing variables fall back to documented defaults.
|
||||
func Load() Config {
|
||||
workerID, _ := os.Hostname()
|
||||
if workerID == "" {
|
||||
workerID = "runner-default"
|
||||
}
|
||||
|
||||
return Config{
|
||||
LogLevel: envOr("LOG_LEVEL", "info"),
|
||||
|
||||
PocketBase: PocketBase{
|
||||
URL: envOr("POCKETBASE_URL", "http://localhost:8090"),
|
||||
AdminEmail: envOr("POCKETBASE_ADMIN_EMAIL", "admin@libnovel.local"),
|
||||
AdminPassword: envOr("POCKETBASE_ADMIN_PASSWORD", "changeme123"),
|
||||
},
|
||||
|
||||
MinIO: MinIO{
|
||||
Endpoint: envOr("MINIO_ENDPOINT", "localhost:9000"),
|
||||
PublicEndpoint: envOr("MINIO_PUBLIC_ENDPOINT", ""),
|
||||
AccessKey: envOr("MINIO_ACCESS_KEY", "admin"),
|
||||
SecretKey: envOr("MINIO_SECRET_KEY", "changeme123"),
|
||||
UseSSL: envBool("MINIO_USE_SSL", false),
|
||||
PublicUseSSL: envBool("MINIO_PUBLIC_USE_SSL", true),
|
||||
BucketChapters: envOr("MINIO_BUCKET_CHAPTERS", "chapters"),
|
||||
BucketAudio: envOr("MINIO_BUCKET_AUDIO", "audio"),
|
||||
BucketAvatars: envOr("MINIO_BUCKET_AVATARS", "avatars"),
|
||||
BucketBrowse: envOr("MINIO_BUCKET_BROWSE", "catalogue"),
|
||||
BucketTranslations: envOr("MINIO_BUCKET_TRANSLATIONS", "translations"),
|
||||
},
|
||||
|
||||
Kokoro: Kokoro{
|
||||
URL: envOr("KOKORO_URL", ""),
|
||||
DefaultVoice: envOr("KOKORO_VOICE", "af_bella"),
|
||||
},
|
||||
|
||||
PocketTTS: PocketTTS{
|
||||
URL: envOr("POCKET_TTS_URL", ""),
|
||||
},
|
||||
|
||||
LibreTranslate: LibreTranslate{
|
||||
URL: envOr("LIBRETRANSLATE_URL", ""),
|
||||
APIKey: envOr("LIBRETRANSLATE_API_KEY", ""),
|
||||
},
|
||||
|
||||
HTTP: HTTP{
|
||||
Addr: envOr("BACKEND_HTTP_ADDR", ":8080"),
|
||||
},
|
||||
|
||||
Runner: Runner{
|
||||
PollInterval: envDuration("RUNNER_POLL_INTERVAL", 30*time.Second),
|
||||
MaxConcurrentScrape: envInt("RUNNER_MAX_CONCURRENT_SCRAPE", 1),
|
||||
MaxConcurrentAudio: envInt("RUNNER_MAX_CONCURRENT_AUDIO", 1),
|
||||
MaxConcurrentTranslation: envInt("RUNNER_MAX_CONCURRENT_TRANSLATION", 1),
|
||||
WorkerID: envOr("RUNNER_WORKER_ID", workerID),
|
||||
Workers: envInt("RUNNER_WORKERS", 0), // 0 → runtime.NumCPU()
|
||||
Timeout: envDuration("RUNNER_TIMEOUT", 90*time.Second),
|
||||
MetricsAddr: envOr("RUNNER_METRICS_ADDR", ":9091"),
|
||||
CatalogueRefreshInterval: envDuration("RUNNER_CATALOGUE_REFRESH_INTERVAL", 0),
|
||||
SkipInitialCatalogueRefresh: envBool("RUNNER_SKIP_INITIAL_CATALOGUE_REFRESH", false),
|
||||
CatalogueRequestDelay: envDuration("RUNNER_CATALOGUE_REQUEST_DELAY", 2*time.Second),
|
||||
},
|
||||
|
||||
Meilisearch: Meilisearch{
|
||||
URL: envOr("MEILI_URL", ""),
|
||||
APIKey: envOr("MEILI_API_KEY", ""),
|
||||
},
|
||||
|
||||
Valkey: Valkey{
|
||||
Addr: envOr("VALKEY_ADDR", ""),
|
||||
},
|
||||
|
||||
Redis: Redis{
|
||||
Addr: envOr("REDIS_ADDR", ""),
|
||||
Password: envOr("REDIS_PASSWORD", ""),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ── helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
func envOr(key, fallback string) string {
|
||||
if v := os.Getenv(key); v != "" {
|
||||
return v
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
func envBool(key string, fallback bool) bool {
|
||||
v := os.Getenv(key)
|
||||
if v == "" {
|
||||
return fallback
|
||||
}
|
||||
return strings.ToLower(v) == "true"
|
||||
}
|
||||
|
||||
func envInt(key string, fallback int) int {
|
||||
v := os.Getenv(key)
|
||||
if v == "" {
|
||||
return fallback
|
||||
}
|
||||
n, err := strconv.Atoi(v)
|
||||
if err != nil || n < 0 {
|
||||
return fallback
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func envDuration(key string, fallback time.Duration) time.Duration {
|
||||
v := os.Getenv(key)
|
||||
if v == "" {
|
||||
return fallback
|
||||
}
|
||||
d, err := time.ParseDuration(v)
|
||||
if err != nil {
|
||||
return fallback
|
||||
}
|
||||
return d
|
||||
}
|
||||
127
backend/internal/config/config_test.go
Normal file
127
backend/internal/config/config_test.go
Normal file
@@ -0,0 +1,127 @@
|
||||
package config_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/backend/internal/config"
|
||||
)
|
||||
|
||||
func TestLoad_Defaults(t *testing.T) {
|
||||
// Unset all relevant vars so we test pure defaults.
|
||||
unset := []string{
|
||||
"LOG_LEVEL",
|
||||
"POCKETBASE_URL", "POCKETBASE_ADMIN_EMAIL", "POCKETBASE_ADMIN_PASSWORD",
|
||||
"MINIO_ENDPOINT", "MINIO_PUBLIC_ENDPOINT", "MINIO_ACCESS_KEY", "MINIO_SECRET_KEY",
|
||||
"MINIO_USE_SSL", "MINIO_PUBLIC_USE_SSL",
|
||||
"MINIO_BUCKET_CHAPTERS", "MINIO_BUCKET_AUDIO", "MINIO_BUCKET_AVATARS",
|
||||
"KOKORO_URL", "KOKORO_VOICE",
|
||||
"BACKEND_HTTP_ADDR",
|
||||
"RUNNER_POLL_INTERVAL", "RUNNER_MAX_CONCURRENT_SCRAPE", "RUNNER_MAX_CONCURRENT_AUDIO",
|
||||
"RUNNER_WORKER_ID", "RUNNER_WORKERS", "RUNNER_TIMEOUT",
|
||||
}
|
||||
for _, k := range unset {
|
||||
t.Setenv(k, "")
|
||||
}
|
||||
|
||||
cfg := config.Load()
|
||||
|
||||
if cfg.LogLevel != "info" {
|
||||
t.Errorf("LogLevel: want info, got %q", cfg.LogLevel)
|
||||
}
|
||||
if cfg.PocketBase.URL != "http://localhost:8090" {
|
||||
t.Errorf("PocketBase.URL: want http://localhost:8090, got %q", cfg.PocketBase.URL)
|
||||
}
|
||||
if cfg.MinIO.BucketChapters != "chapters" {
|
||||
t.Errorf("MinIO.BucketChapters: want chapters, got %q", cfg.MinIO.BucketChapters)
|
||||
}
|
||||
if cfg.MinIO.UseSSL != false {
|
||||
t.Errorf("MinIO.UseSSL: want false, got %v", cfg.MinIO.UseSSL)
|
||||
}
|
||||
if cfg.MinIO.PublicUseSSL != true {
|
||||
t.Errorf("MinIO.PublicUseSSL: want true, got %v", cfg.MinIO.PublicUseSSL)
|
||||
}
|
||||
if cfg.Kokoro.DefaultVoice != "af_bella" {
|
||||
t.Errorf("Kokoro.DefaultVoice: want af_bella, got %q", cfg.Kokoro.DefaultVoice)
|
||||
}
|
||||
if cfg.HTTP.Addr != ":8080" {
|
||||
t.Errorf("HTTP.Addr: want :8080, got %q", cfg.HTTP.Addr)
|
||||
}
|
||||
if cfg.Runner.PollInterval != 30*time.Second {
|
||||
t.Errorf("Runner.PollInterval: want 30s, got %v", cfg.Runner.PollInterval)
|
||||
}
|
||||
if cfg.Runner.MaxConcurrentScrape != 1 {
|
||||
t.Errorf("Runner.MaxConcurrentScrape: want 1, got %d", cfg.Runner.MaxConcurrentScrape)
|
||||
}
|
||||
if cfg.Runner.MaxConcurrentAudio != 1 {
|
||||
t.Errorf("Runner.MaxConcurrentAudio: want 1, got %d", cfg.Runner.MaxConcurrentAudio)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoad_EnvOverride(t *testing.T) {
|
||||
t.Setenv("LOG_LEVEL", "debug")
|
||||
t.Setenv("POCKETBASE_URL", "https://pb.libnovel.cc")
|
||||
t.Setenv("MINIO_USE_SSL", "true")
|
||||
t.Setenv("MINIO_PUBLIC_USE_SSL", "false")
|
||||
t.Setenv("RUNNER_POLL_INTERVAL", "1m")
|
||||
t.Setenv("RUNNER_MAX_CONCURRENT_SCRAPE", "5")
|
||||
t.Setenv("RUNNER_WORKER_ID", "homelab-01")
|
||||
t.Setenv("BACKEND_HTTP_ADDR", ":9090")
|
||||
t.Setenv("KOKORO_URL", "https://kokoro.libnovel.cc")
|
||||
|
||||
cfg := config.Load()
|
||||
|
||||
if cfg.LogLevel != "debug" {
|
||||
t.Errorf("LogLevel: want debug, got %q", cfg.LogLevel)
|
||||
}
|
||||
if cfg.PocketBase.URL != "https://pb.libnovel.cc" {
|
||||
t.Errorf("PocketBase.URL: want https://pb.libnovel.cc, got %q", cfg.PocketBase.URL)
|
||||
}
|
||||
if !cfg.MinIO.UseSSL {
|
||||
t.Error("MinIO.UseSSL: want true")
|
||||
}
|
||||
if cfg.MinIO.PublicUseSSL {
|
||||
t.Error("MinIO.PublicUseSSL: want false")
|
||||
}
|
||||
if cfg.Runner.PollInterval != time.Minute {
|
||||
t.Errorf("Runner.PollInterval: want 1m, got %v", cfg.Runner.PollInterval)
|
||||
}
|
||||
if cfg.Runner.MaxConcurrentScrape != 5 {
|
||||
t.Errorf("Runner.MaxConcurrentScrape: want 5, got %d", cfg.Runner.MaxConcurrentScrape)
|
||||
}
|
||||
if cfg.Runner.WorkerID != "homelab-01" {
|
||||
t.Errorf("Runner.WorkerID: want homelab-01, got %q", cfg.Runner.WorkerID)
|
||||
}
|
||||
if cfg.HTTP.Addr != ":9090" {
|
||||
t.Errorf("HTTP.Addr: want :9090, got %q", cfg.HTTP.Addr)
|
||||
}
|
||||
if cfg.Kokoro.URL != "https://kokoro.libnovel.cc" {
|
||||
t.Errorf("Kokoro.URL: want https://kokoro.libnovel.cc, got %q", cfg.Kokoro.URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoad_InvalidInt_FallsToDefault(t *testing.T) {
|
||||
t.Setenv("RUNNER_MAX_CONCURRENT_SCRAPE", "notanumber")
|
||||
cfg := config.Load()
|
||||
if cfg.Runner.MaxConcurrentScrape != 1 {
|
||||
t.Errorf("want default 1, got %d", cfg.Runner.MaxConcurrentScrape)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoad_InvalidDuration_FallsToDefault(t *testing.T) {
|
||||
t.Setenv("RUNNER_POLL_INTERVAL", "notaduration")
|
||||
cfg := config.Load()
|
||||
if cfg.Runner.PollInterval != 30*time.Second {
|
||||
t.Errorf("want default 30s, got %v", cfg.Runner.PollInterval)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoad_WorkerID_FallsToHostname(t *testing.T) {
|
||||
t.Setenv("RUNNER_WORKER_ID", "")
|
||||
cfg := config.Load()
|
||||
host, _ := os.Hostname()
|
||||
if host != "" && cfg.Runner.WorkerID != host {
|
||||
t.Errorf("want hostname %q, got %q", host, cfg.Runner.WorkerID)
|
||||
}
|
||||
}
|
||||
171
backend/internal/domain/domain.go
Normal file
171
backend/internal/domain/domain.go
Normal file
@@ -0,0 +1,171 @@
|
||||
// Package domain contains the core value types shared across all packages
|
||||
// in this module. It has zero internal imports — only the standard library.
|
||||
// Every other package imports domain; domain imports nothing from this module.
|
||||
package domain
|
||||
|
||||
import "time"
|
||||
|
||||
// ── Book types ────────────────────────────────────────────────────────────────
|
||||
|
||||
// BookMeta carries all bibliographic information about a novel.
|
||||
type BookMeta struct {
|
||||
Slug string `json:"slug"`
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author"`
|
||||
Cover string `json:"cover,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
Genres []string `json:"genres,omitempty"`
|
||||
Summary string `json:"summary,omitempty"`
|
||||
TotalChapters int `json:"total_chapters,omitempty"`
|
||||
SourceURL string `json:"source_url"`
|
||||
Ranking int `json:"ranking,omitempty"`
|
||||
Rating float64 `json:"rating,omitempty"`
|
||||
// MetaUpdated is the Unix timestamp (seconds) when the book record was last
|
||||
// updated in PocketBase. Populated on read; not sent on write (PocketBase
|
||||
// manages its own updated field).
|
||||
MetaUpdated int64 `json:"meta_updated,omitempty"`
|
||||
}
|
||||
|
||||
// CatalogueEntry is a lightweight book reference returned by catalogue pages.
|
||||
type CatalogueEntry struct {
|
||||
Slug string `json:"slug"`
|
||||
Title string `json:"title"`
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
// ChapterRef is a reference to a single chapter returned by chapter-list pages.
|
||||
type ChapterRef struct {
|
||||
Number int `json:"number"`
|
||||
Title string `json:"title"`
|
||||
URL string `json:"url"`
|
||||
Volume int `json:"volume,omitempty"`
|
||||
}
|
||||
|
||||
// Chapter contains the fully-extracted text of a single chapter.
|
||||
type Chapter struct {
|
||||
Ref ChapterRef `json:"ref"`
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
// RankingItem represents a single entry in the novel ranking list.
|
||||
type RankingItem struct {
|
||||
Rank int `json:"rank"`
|
||||
Slug string `json:"slug"`
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author,omitempty"`
|
||||
Cover string `json:"cover,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
Genres []string `json:"genres,omitempty"`
|
||||
SourceURL string `json:"source_url,omitempty"`
|
||||
Updated time.Time `json:"updated,omitempty"`
|
||||
}
|
||||
|
||||
// ── Voice types ───────────────────────────────────────────────────────────────
|
||||
|
||||
// Voice describes a single text-to-speech voice available in the system.
|
||||
type Voice struct {
|
||||
// ID is the voice identifier passed to TTS clients (e.g. "af_bella", "alba").
|
||||
ID string `json:"id"`
|
||||
// Engine is "kokoro" or "pocket-tts".
|
||||
Engine string `json:"engine"`
|
||||
// Lang is the primary language tag (e.g. "en-us", "en-gb", "en", "es", "fr").
|
||||
Lang string `json:"lang"`
|
||||
// Gender is "f" or "m".
|
||||
Gender string `json:"gender"`
|
||||
}
|
||||
|
||||
// ── Storage record types ──────────────────────────────────────────────────────
|
||||
|
||||
// ChapterInfo is a lightweight chapter descriptor stored in the index.
|
||||
type ChapterInfo struct {
|
||||
Number int `json:"number"`
|
||||
Title string `json:"title"`
|
||||
Date string `json:"date,omitempty"`
|
||||
}
|
||||
|
||||
// ReadingProgress holds a single user's reading position for one book.
|
||||
type ReadingProgress struct {
|
||||
Slug string `json:"slug"`
|
||||
Chapter int `json:"chapter"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
// ── Task record types ─────────────────────────────────────────────────────────
|
||||
|
||||
// TaskStatus enumerates the lifecycle states of any task.
|
||||
type TaskStatus string
|
||||
|
||||
const (
|
||||
TaskStatusPending TaskStatus = "pending"
|
||||
TaskStatusRunning TaskStatus = "running"
|
||||
TaskStatusDone TaskStatus = "done"
|
||||
TaskStatusFailed TaskStatus = "failed"
|
||||
TaskStatusCancelled TaskStatus = "cancelled"
|
||||
)
|
||||
|
||||
// ScrapeTask represents a book-scraping job stored in PocketBase.
|
||||
type ScrapeTask struct {
|
||||
ID string `json:"id"`
|
||||
Kind string `json:"kind"` // "catalogue" | "book" | "book_range"
|
||||
TargetURL string `json:"target_url"` // non-empty for single-book tasks
|
||||
FromChapter int `json:"from_chapter,omitempty"`
|
||||
ToChapter int `json:"to_chapter,omitempty"`
|
||||
WorkerID string `json:"worker_id,omitempty"`
|
||||
Status TaskStatus `json:"status"`
|
||||
BooksFound int `json:"books_found"`
|
||||
ChaptersScraped int `json:"chapters_scraped"`
|
||||
ChaptersSkipped int `json:"chapters_skipped"`
|
||||
Errors int `json:"errors"`
|
||||
Started time.Time `json:"started"`
|
||||
Finished time.Time `json:"finished,omitempty"`
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
}
|
||||
|
||||
// ScrapeResult is the outcome reported by the runner after finishing a ScrapeTask.
|
||||
type ScrapeResult struct {
|
||||
BooksFound int `json:"books_found"`
|
||||
ChaptersScraped int `json:"chapters_scraped"`
|
||||
ChaptersSkipped int `json:"chapters_skipped"`
|
||||
Errors int `json:"errors"`
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
}
|
||||
|
||||
// AudioTask represents an audio-generation job stored in PocketBase.
|
||||
type AudioTask struct {
|
||||
ID string `json:"id"`
|
||||
CacheKey string `json:"cache_key"` // "slug/chapter/voice"
|
||||
Slug string `json:"slug"`
|
||||
Chapter int `json:"chapter"`
|
||||
Voice string `json:"voice"`
|
||||
WorkerID string `json:"worker_id,omitempty"`
|
||||
Status TaskStatus `json:"status"`
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
Started time.Time `json:"started"`
|
||||
Finished time.Time `json:"finished,omitempty"`
|
||||
}
|
||||
|
||||
// AudioResult is the outcome reported by the runner after finishing an AudioTask.
|
||||
type AudioResult struct {
|
||||
ObjectKey string `json:"object_key,omitempty"`
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
}
|
||||
|
||||
// TranslationTask represents a machine-translation job stored in PocketBase.
|
||||
type TranslationTask struct {
|
||||
ID string `json:"id"`
|
||||
CacheKey string `json:"cache_key"` // "{slug}/{chapter}/{lang}"
|
||||
Slug string `json:"slug"`
|
||||
Chapter int `json:"chapter"`
|
||||
Lang string `json:"lang"`
|
||||
WorkerID string `json:"worker_id,omitempty"`
|
||||
Status TaskStatus `json:"status"`
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
Started time.Time `json:"started"`
|
||||
Finished time.Time `json:"finished,omitempty"`
|
||||
}
|
||||
|
||||
// TranslationResult is the outcome reported by the runner after finishing a TranslationTask.
|
||||
type TranslationResult struct {
|
||||
ObjectKey string `json:"object_key,omitempty"`
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
}
|
||||
104
backend/internal/domain/domain_test.go
Normal file
104
backend/internal/domain/domain_test.go
Normal file
@@ -0,0 +1,104 @@
|
||||
package domain_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
)
|
||||
|
||||
func TestBookMeta_JSONRoundtrip(t *testing.T) {
|
||||
orig := domain.BookMeta{
|
||||
Slug: "a-great-novel",
|
||||
Title: "A Great Novel",
|
||||
Author: "Jane Doe",
|
||||
Cover: "https://example.com/cover.jpg",
|
||||
Status: "Ongoing",
|
||||
Genres: []string{"Fantasy", "Action"},
|
||||
Summary: "A thrilling tale.",
|
||||
TotalChapters: 120,
|
||||
SourceURL: "https://novelfire.net/book/a-great-novel",
|
||||
Ranking: 3,
|
||||
}
|
||||
|
||||
b, err := json.Marshal(orig)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal: %v", err)
|
||||
}
|
||||
var got domain.BookMeta
|
||||
if err := json.Unmarshal(b, &got); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
if got.Slug != orig.Slug {
|
||||
t.Errorf("Slug: want %q, got %q", orig.Slug, got.Slug)
|
||||
}
|
||||
if got.TotalChapters != orig.TotalChapters {
|
||||
t.Errorf("TotalChapters: want %d, got %d", orig.TotalChapters, got.TotalChapters)
|
||||
}
|
||||
if len(got.Genres) != len(orig.Genres) {
|
||||
t.Errorf("Genres len: want %d, got %d", len(orig.Genres), len(got.Genres))
|
||||
}
|
||||
}
|
||||
|
||||
func TestChapterRef_JSONRoundtrip(t *testing.T) {
|
||||
orig := domain.ChapterRef{Number: 42, Title: "The Battle", URL: "https://example.com/ch-42", Volume: 2}
|
||||
b, _ := json.Marshal(orig)
|
||||
var got domain.ChapterRef
|
||||
json.Unmarshal(b, &got)
|
||||
if got != orig {
|
||||
t.Errorf("want %+v, got %+v", orig, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRankingItem_JSONRoundtrip(t *testing.T) {
|
||||
now := time.Now().Truncate(time.Second)
|
||||
orig := domain.RankingItem{
|
||||
Rank: 1,
|
||||
Slug: "top-novel",
|
||||
Title: "Top Novel",
|
||||
SourceURL: "https://novelfire.net/book/top-novel",
|
||||
Updated: now,
|
||||
}
|
||||
b, _ := json.Marshal(orig)
|
||||
var got domain.RankingItem
|
||||
json.Unmarshal(b, &got)
|
||||
if got.Rank != orig.Rank || got.Slug != orig.Slug {
|
||||
t.Errorf("want %+v, got %+v", orig, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScrapeResult_JSONRoundtrip(t *testing.T) {
|
||||
orig := domain.ScrapeResult{BooksFound: 10, ChaptersScraped: 200, ChaptersSkipped: 5, Errors: 1, ErrorMessage: "one error"}
|
||||
b, _ := json.Marshal(orig)
|
||||
var got domain.ScrapeResult
|
||||
json.Unmarshal(b, &got)
|
||||
if got != orig {
|
||||
t.Errorf("want %+v, got %+v", orig, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAudioResult_JSONRoundtrip(t *testing.T) {
|
||||
orig := domain.AudioResult{ObjectKey: "audio/slug/1/af_bella.mp3"}
|
||||
b, _ := json.Marshal(orig)
|
||||
var got domain.AudioResult
|
||||
json.Unmarshal(b, &got)
|
||||
if got != orig {
|
||||
t.Errorf("want %+v, got %+v", orig, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTaskStatus_Values(t *testing.T) {
|
||||
cases := []domain.TaskStatus{
|
||||
domain.TaskStatusPending,
|
||||
domain.TaskStatusRunning,
|
||||
domain.TaskStatusDone,
|
||||
domain.TaskStatusFailed,
|
||||
domain.TaskStatusCancelled,
|
||||
}
|
||||
for _, s := range cases {
|
||||
if s == "" {
|
||||
t.Errorf("TaskStatus constant must not be empty")
|
||||
}
|
||||
}
|
||||
}
|
||||
124
backend/internal/httputil/httputil.go
Normal file
124
backend/internal/httputil/httputil.go
Normal file
@@ -0,0 +1,124 @@
|
||||
// Package httputil provides shared HTTP helpers used by both the runner and
|
||||
// backend binaries. It has no imports from this module — only the standard
|
||||
// library — so it is safe to import from anywhere in the dependency graph.
|
||||
package httputil
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Client is the minimal interface for making HTTP GET requests.
|
||||
// *http.Client satisfies this interface.
|
||||
type Client interface {
|
||||
Do(req *http.Request) (*http.Response, error)
|
||||
}
|
||||
|
||||
// ErrMaxRetries is returned when RetryGet exhausts all attempts.
|
||||
var ErrMaxRetries = errors.New("httputil: max retries exceeded")
|
||||
|
||||
// errClientError is returned by doGet for 4xx responses; it signals that the
|
||||
// request should NOT be retried (the client is at fault).
|
||||
var errClientError = errors.New("httputil: client error")
|
||||
|
||||
// RetryGet fetches url using client, retrying on network errors or 5xx
|
||||
// responses with exponential backoff. It returns the full response body as a
|
||||
// string on success.
|
||||
//
|
||||
// - maxAttempts: total number of attempts (must be >= 1)
|
||||
// - baseDelay: initial wait before the second attempt; doubles each retry
|
||||
func RetryGet(ctx context.Context, client Client, url string, maxAttempts int, baseDelay time.Duration) (string, error) {
|
||||
if maxAttempts < 1 {
|
||||
maxAttempts = 1
|
||||
}
|
||||
delay := baseDelay
|
||||
|
||||
var lastErr error
|
||||
for attempt := 0; attempt < maxAttempts; attempt++ {
|
||||
if attempt > 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return "", ctx.Err()
|
||||
case <-time.After(delay):
|
||||
}
|
||||
delay *= 2
|
||||
}
|
||||
|
||||
body, err := doGet(ctx, client, url)
|
||||
if err == nil {
|
||||
return body, nil
|
||||
}
|
||||
lastErr = err
|
||||
|
||||
// Do not retry on context cancellation.
|
||||
if ctx.Err() != nil {
|
||||
return "", ctx.Err()
|
||||
}
|
||||
// Do not retry on 4xx — the client is at fault.
|
||||
if errors.Is(err, errClientError) {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("%w after %d attempts: %w", ErrMaxRetries, maxAttempts, lastErr)
|
||||
}
|
||||
|
||||
func doGet(ctx context.Context, client Client, url string) (string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("build request: %w", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; libnovel-runner/2)")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("GET %s: %w", url, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= 500 {
|
||||
return "", fmt.Errorf("GET %s: server error %d", url, resp.StatusCode)
|
||||
}
|
||||
if resp.StatusCode >= 400 {
|
||||
return "", fmt.Errorf("%w: GET %s: client error %d", errClientError, url, resp.StatusCode)
|
||||
}
|
||||
|
||||
raw, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("read body %s: %w", url, err)
|
||||
}
|
||||
return string(raw), nil
|
||||
}
|
||||
|
||||
// WriteJSON writes v as JSON to w with the given HTTP status code and sets the
|
||||
// Content-Type header to application/json.
|
||||
func WriteJSON(w http.ResponseWriter, status int, v any) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(status)
|
||||
_ = json.NewEncoder(w).Encode(v)
|
||||
}
|
||||
|
||||
// WriteError writes a JSON error object {"error": msg} with the given status.
|
||||
func WriteError(w http.ResponseWriter, status int, msg string) {
|
||||
WriteJSON(w, status, map[string]string{"error": msg})
|
||||
}
|
||||
|
||||
// maxBodyBytes is the limit applied by DecodeJSON to prevent unbounded reads.
|
||||
const maxBodyBytes = 1 << 20 // 1 MiB
|
||||
|
||||
// DecodeJSON decodes a JSON request body into v. It enforces a 1 MiB size
|
||||
// limit and returns a descriptive error on any failure.
|
||||
func DecodeJSON(r *http.Request, v any) error {
|
||||
r.Body = http.MaxBytesReader(nil, r.Body, maxBodyBytes)
|
||||
dec := json.NewDecoder(r.Body)
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(v); err != nil {
|
||||
return fmt.Errorf("decode JSON body: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
181
backend/internal/httputil/httputil_test.go
Normal file
181
backend/internal/httputil/httputil_test.go
Normal file
@@ -0,0 +1,181 @@
|
||||
package httputil_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/backend/internal/httputil"
|
||||
)
|
||||
|
||||
// ── RetryGet ──────────────────────────────────────────────────────────────────
|
||||
|
||||
func TestRetryGet_ImmediateSuccess(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Write([]byte("hello"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
body, err := httputil.RetryGet(context.Background(), srv.Client(), srv.URL, 3, time.Millisecond)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if body != "hello" {
|
||||
t.Errorf("want hello, got %q", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRetryGet_RetriesOn5xx(t *testing.T) {
|
||||
calls := 0
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
calls++
|
||||
if calls < 3 {
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
w.Write([]byte("ok"))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
body, err := httputil.RetryGet(context.Background(), srv.Client(), srv.URL, 5, time.Millisecond)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if body != "ok" {
|
||||
t.Errorf("want ok, got %q", body)
|
||||
}
|
||||
if calls != 3 {
|
||||
t.Errorf("want 3 calls, got %d", calls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRetryGet_MaxAttemptsExceeded(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
_, err := httputil.RetryGet(context.Background(), srv.Client(), srv.URL, 3, time.Millisecond)
|
||||
if err == nil {
|
||||
t.Fatal("expected error, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRetryGet_ContextCancelDuringBackoff(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
// Cancel after first failed attempt hits the backoff wait.
|
||||
go func() { time.Sleep(5 * time.Millisecond); cancel() }()
|
||||
|
||||
_, err := httputil.RetryGet(ctx, srv.Client(), srv.URL, 10, 500*time.Millisecond)
|
||||
if err == nil {
|
||||
t.Fatal("expected context cancellation error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRetryGet_NoRetryOn4xx(t *testing.T) {
|
||||
calls := 0
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
calls++
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
_, err := httputil.RetryGet(context.Background(), srv.Client(), srv.URL, 5, time.Millisecond)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for 404")
|
||||
}
|
||||
// 4xx is NOT retried — should be exactly 1 call.
|
||||
if calls != 1 {
|
||||
t.Errorf("want 1 call for 4xx, got %d", calls)
|
||||
}
|
||||
}
|
||||
|
||||
// ── WriteJSON ─────────────────────────────────────────────────────────────────
|
||||
|
||||
func TestWriteJSON_SetsHeadersAndStatus(t *testing.T) {
|
||||
rr := httptest.NewRecorder()
|
||||
httputil.WriteJSON(rr, http.StatusCreated, map[string]string{"key": "val"})
|
||||
|
||||
if rr.Code != http.StatusCreated {
|
||||
t.Errorf("status: want 201, got %d", rr.Code)
|
||||
}
|
||||
if ct := rr.Header().Get("Content-Type"); ct != "application/json" {
|
||||
t.Errorf("Content-Type: want application/json, got %q", ct)
|
||||
}
|
||||
var got map[string]string
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode body: %v", err)
|
||||
}
|
||||
if got["key"] != "val" {
|
||||
t.Errorf("body key: want val, got %q", got["key"])
|
||||
}
|
||||
}
|
||||
|
||||
// ── WriteError ────────────────────────────────────────────────────────────────
|
||||
|
||||
func TestWriteError_Format(t *testing.T) {
|
||||
rr := httptest.NewRecorder()
|
||||
httputil.WriteError(rr, http.StatusBadRequest, "bad input")
|
||||
|
||||
if rr.Code != http.StatusBadRequest {
|
||||
t.Errorf("status: want 400, got %d", rr.Code)
|
||||
}
|
||||
var got map[string]string
|
||||
json.NewDecoder(rr.Body).Decode(&got)
|
||||
if got["error"] != "bad input" {
|
||||
t.Errorf("error field: want bad input, got %q", got["error"])
|
||||
}
|
||||
}
|
||||
|
||||
// ── DecodeJSON ────────────────────────────────────────────────────────────────
|
||||
|
||||
func TestDecodeJSON_HappyPath(t *testing.T) {
|
||||
body := `{"name":"test","value":42}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
var payload struct {
|
||||
Name string `json:"name"`
|
||||
Value int `json:"value"`
|
||||
}
|
||||
if err := httputil.DecodeJSON(req, &payload); err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if payload.Name != "test" || payload.Value != 42 {
|
||||
t.Errorf("unexpected payload: %+v", payload)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeJSON_UnknownFieldReturnsError(t *testing.T) {
|
||||
body := `{"name":"test","unknown_field":"boom"}`
|
||||
req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader(body))
|
||||
|
||||
var payload struct {
|
||||
Name string `json:"name"`
|
||||
}
|
||||
if err := httputil.DecodeJSON(req, &payload); err == nil {
|
||||
t.Fatal("expected error for unknown field, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeJSON_BodyTooLarge(t *testing.T) {
|
||||
// Build a body > 1 MiB.
|
||||
big := bytes.Repeat([]byte("a"), 2<<20)
|
||||
req := httptest.NewRequest(http.MethodPost, "/", bytes.NewReader(big))
|
||||
|
||||
var payload map[string]any
|
||||
if err := httputil.DecodeJSON(req, &payload); err == nil {
|
||||
t.Fatal("expected error for oversized body, got nil")
|
||||
}
|
||||
}
|
||||
255
backend/internal/kokoro/client.go
Normal file
255
backend/internal/kokoro/client.go
Normal file
@@ -0,0 +1,255 @@
|
||||
// Package kokoro provides a client for the Kokoro-FastAPI TTS service.
|
||||
//
|
||||
// The Kokoro API is an OpenAI-compatible audio speech API that returns a
|
||||
// download link (X-Download-Path header) instead of streaming audio directly.
|
||||
// GenerateAudio handles the two-step flow: POST /v1/audio/speech → GET /v1/download/{file}.
|
||||
package kokoro
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Client is the interface for interacting with the Kokoro TTS service.
|
||||
type Client interface {
|
||||
// GenerateAudio synthesises text using voice and returns raw MP3 bytes.
|
||||
GenerateAudio(ctx context.Context, text, voice string) ([]byte, error)
|
||||
|
||||
// StreamAudioMP3 synthesises text and returns an io.ReadCloser that streams
|
||||
// MP3-encoded audio incrementally. Uses the kokoro-fastapi streaming mode
|
||||
// (stream:true), which delivers MP3 frames as they are generated without
|
||||
// waiting for the full output. The caller must always close the ReadCloser.
|
||||
StreamAudioMP3(ctx context.Context, text, voice string) (io.ReadCloser, error)
|
||||
|
||||
// StreamAudioWAV synthesises text and returns an io.ReadCloser that streams
|
||||
// WAV-encoded audio incrementally using kokoro-fastapi's streaming mode with
|
||||
// response_format:"wav". The caller must always close the ReadCloser.
|
||||
StreamAudioWAV(ctx context.Context, text, voice string) (io.ReadCloser, error)
|
||||
|
||||
// ListVoices returns the available voice IDs. Falls back to an empty slice
|
||||
// on error — callers should treat an empty list as "service unavailable".
|
||||
ListVoices(ctx context.Context) ([]string, error)
|
||||
}
|
||||
|
||||
// httpClient is the concrete Kokoro HTTP client.
|
||||
type httpClient struct {
|
||||
baseURL string
|
||||
http *http.Client
|
||||
}
|
||||
|
||||
// New returns a Kokoro Client targeting baseURL (e.g. "https://kokoro.example.com").
|
||||
func New(baseURL string) Client {
|
||||
return &httpClient{
|
||||
baseURL: strings.TrimRight(baseURL, "/"),
|
||||
http: &http.Client{Timeout: 10 * time.Minute},
|
||||
}
|
||||
}
|
||||
|
||||
// GenerateAudio calls POST /v1/audio/speech (return_download_link=true) and then
|
||||
// downloads the resulting MP3 from GET /v1/download/{filename}.
|
||||
func (c *httpClient) GenerateAudio(ctx context.Context, text, voice string) ([]byte, error) {
|
||||
if text == "" {
|
||||
return nil, fmt.Errorf("kokoro: empty text")
|
||||
}
|
||||
if voice == "" {
|
||||
voice = "af_bella"
|
||||
}
|
||||
|
||||
// ── Step 1: request generation ────────────────────────────────────────────
|
||||
reqBody, err := json.Marshal(map[string]any{
|
||||
"model": "kokoro",
|
||||
"input": text,
|
||||
"voice": voice,
|
||||
"response_format": "mp3",
|
||||
"speed": 1.0,
|
||||
"stream": false,
|
||||
"return_download_link": true,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("kokoro: marshal request: %w", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost,
|
||||
c.baseURL+"/v1/audio/speech", bytes.NewReader(reqBody))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("kokoro: build speech request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := c.http.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("kokoro: speech request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
_, _ = io.Copy(io.Discard, resp.Body)
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("kokoro: speech returned %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
dlPath := resp.Header.Get("X-Download-Path")
|
||||
if dlPath == "" {
|
||||
return nil, fmt.Errorf("kokoro: no X-Download-Path header in response")
|
||||
}
|
||||
filename := dlPath
|
||||
if idx := strings.LastIndex(dlPath, "/"); idx >= 0 {
|
||||
filename = dlPath[idx+1:]
|
||||
}
|
||||
if filename == "" {
|
||||
return nil, fmt.Errorf("kokoro: empty filename in X-Download-Path: %q", dlPath)
|
||||
}
|
||||
|
||||
// ── Step 2: download the generated file ───────────────────────────────────
|
||||
dlURL := c.baseURL + "/v1/download/" + filename
|
||||
dlReq, err := http.NewRequestWithContext(ctx, http.MethodGet, dlURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("kokoro: build download request: %w", err)
|
||||
}
|
||||
|
||||
dlResp, err := c.http.Do(dlReq)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("kokoro: download request: %w", err)
|
||||
}
|
||||
defer dlResp.Body.Close()
|
||||
|
||||
if dlResp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("kokoro: download returned %d", dlResp.StatusCode)
|
||||
}
|
||||
|
||||
data, err := io.ReadAll(dlResp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("kokoro: read download body: %w", err)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// StreamAudioMP3 calls POST /v1/audio/speech with stream:true and returns an
|
||||
// io.ReadCloser that delivers MP3 frames as kokoro generates them.
|
||||
// kokoro-fastapi emits raw MP3 bytes when stream mode is enabled — no download
|
||||
// redirect; the response body IS the audio stream.
|
||||
func (c *httpClient) StreamAudioMP3(ctx context.Context, text, voice string) (io.ReadCloser, error) {
|
||||
if text == "" {
|
||||
return nil, fmt.Errorf("kokoro: empty text")
|
||||
}
|
||||
if voice == "" {
|
||||
voice = "af_bella"
|
||||
}
|
||||
|
||||
reqBody, err := json.Marshal(map[string]any{
|
||||
"model": "kokoro",
|
||||
"input": text,
|
||||
"voice": voice,
|
||||
"response_format": "mp3",
|
||||
"speed": 1.0,
|
||||
"stream": true,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("kokoro: marshal stream request: %w", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost,
|
||||
c.baseURL+"/v1/audio/speech", bytes.NewReader(reqBody))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("kokoro: build stream request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := c.http.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("kokoro: stream request: %w", err)
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
_, _ = io.Copy(io.Discard, resp.Body)
|
||||
resp.Body.Close()
|
||||
return nil, fmt.Errorf("kokoro: stream returned %d", resp.StatusCode)
|
||||
}
|
||||
return resp.Body, nil
|
||||
}
|
||||
|
||||
// StreamAudioWAV calls POST /v1/audio/speech with stream:true and response_format:wav,
|
||||
// returning an io.ReadCloser that delivers WAV bytes as kokoro generates them.
|
||||
func (c *httpClient) StreamAudioWAV(ctx context.Context, text, voice string) (io.ReadCloser, error) {
|
||||
if text == "" {
|
||||
return nil, fmt.Errorf("kokoro: empty text")
|
||||
}
|
||||
if voice == "" {
|
||||
voice = "af_bella"
|
||||
}
|
||||
|
||||
reqBody, err := json.Marshal(map[string]any{
|
||||
"model": "kokoro",
|
||||
"input": text,
|
||||
"voice": voice,
|
||||
"response_format": "wav",
|
||||
"speed": 1.0,
|
||||
"stream": true,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("kokoro: marshal wav stream request: %w", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost,
|
||||
c.baseURL+"/v1/audio/speech", bytes.NewReader(reqBody))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("kokoro: build wav stream request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := c.http.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("kokoro: wav stream request: %w", err)
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
_, _ = io.Copy(io.Discard, resp.Body)
|
||||
resp.Body.Close()
|
||||
return nil, fmt.Errorf("kokoro: wav stream returned %d", resp.StatusCode)
|
||||
}
|
||||
return resp.Body, nil
|
||||
}
|
||||
|
||||
// ListVoices calls GET /v1/audio/voices and returns the list of voice IDs.
|
||||
func (c *httpClient) ListVoices(ctx context.Context) ([]string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet,
|
||||
c.baseURL+"/v1/audio/voices", nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("kokoro: build voices request: %w", err)
|
||||
}
|
||||
|
||||
resp, err := c.http.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("kokoro: voices request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
_, _ = io.Copy(io.Discard, resp.Body)
|
||||
return nil, fmt.Errorf("kokoro: voices returned %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Voices []string `json:"voices"`
|
||||
}
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
return nil, fmt.Errorf("kokoro: decode voices response: %w", err)
|
||||
}
|
||||
return result.Voices, nil
|
||||
}
|
||||
|
||||
// VoiceSampleKey returns the MinIO object key for a voice sample MP3.
|
||||
// Key: _voice-samples/{voice}.mp3 (sanitised).
|
||||
func VoiceSampleKey(voice string) string {
|
||||
safe := strings.Map(func(r rune) rune {
|
||||
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') ||
|
||||
(r >= '0' && r <= '9') || r == '_' || r == '-' {
|
||||
return r
|
||||
}
|
||||
return '_'
|
||||
}, voice)
|
||||
return fmt.Sprintf("_voice-samples/%s.mp3", safe)
|
||||
}
|
||||
291
backend/internal/kokoro/client_test.go
Normal file
291
backend/internal/kokoro/client_test.go
Normal file
@@ -0,0 +1,291 @@
|
||||
package kokoro_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/libnovel/backend/internal/kokoro"
|
||||
)
|
||||
|
||||
// ── VoiceSampleKey ────────────────────────────────────────────────────────────
|
||||
|
||||
func TestVoiceSampleKey(t *testing.T) {
|
||||
tests := []struct {
|
||||
voice string
|
||||
want string
|
||||
}{
|
||||
{"af_bella", "_voice-samples/af_bella.mp3"},
|
||||
{"am_echo", "_voice-samples/am_echo.mp3"},
|
||||
{"voice with spaces", "_voice-samples/voice_with_spaces.mp3"},
|
||||
{"special!@#chars", "_voice-samples/special___chars.mp3"},
|
||||
{"", "_voice-samples/.mp3"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.voice, func(t *testing.T) {
|
||||
got := kokoro.VoiceSampleKey(tt.voice)
|
||||
if got != tt.want {
|
||||
t.Errorf("VoiceSampleKey(%q) = %q, want %q", tt.voice, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// ── GenerateAudio ─────────────────────────────────────────────────────────────
|
||||
|
||||
func TestGenerateAudio_EmptyText(t *testing.T) {
|
||||
srv := httptest.NewServer(http.NotFoundHandler())
|
||||
defer srv.Close()
|
||||
|
||||
c := kokoro.New(srv.URL)
|
||||
_, err := c.GenerateAudio(context.Background(), "", "af_bella")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for empty text, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "empty text") {
|
||||
t.Errorf("expected 'empty text' in error, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateAudio_DefaultVoice(t *testing.T) {
|
||||
// Tracks that the voice defaults to af_bella when empty.
|
||||
var capturedBody string
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/v1/audio/speech" {
|
||||
buf := make([]byte, 512)
|
||||
n, _ := r.Body.Read(buf)
|
||||
capturedBody = string(buf[:n])
|
||||
w.Header().Set("X-Download-Path", "/download/test_file.mp3")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return
|
||||
}
|
||||
if strings.HasPrefix(r.URL.Path, "/v1/download/") {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte("fake-mp3-data"))
|
||||
return
|
||||
}
|
||||
http.NotFound(w, r)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := kokoro.New(srv.URL)
|
||||
data, err := c.GenerateAudio(context.Background(), "hello world", "")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if string(data) != "fake-mp3-data" {
|
||||
t.Errorf("unexpected data: %q", string(data))
|
||||
}
|
||||
if !strings.Contains(capturedBody, `"af_bella"`) {
|
||||
t.Errorf("expected default voice af_bella in request body, got: %s", capturedBody)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateAudio_SpeechNon200(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/v1/audio/speech" {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
http.NotFound(w, r)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := kokoro.New(srv.URL)
|
||||
_, err := c.GenerateAudio(context.Background(), "text", "af_bella")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for non-200 speech response")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "500") {
|
||||
t.Errorf("expected 500 in error, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateAudio_NoDownloadPathHeader(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/v1/audio/speech" {
|
||||
// No X-Download-Path header
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return
|
||||
}
|
||||
http.NotFound(w, r)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := kokoro.New(srv.URL)
|
||||
_, err := c.GenerateAudio(context.Background(), "text", "af_bella")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for missing X-Download-Path")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "X-Download-Path") {
|
||||
t.Errorf("expected X-Download-Path in error, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateAudio_DownloadFails(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/v1/audio/speech" {
|
||||
w.Header().Set("X-Download-Path", "/v1/download/speech.mp3")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return
|
||||
}
|
||||
if strings.HasPrefix(r.URL.Path, "/v1/download/") {
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
http.NotFound(w, r)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := kokoro.New(srv.URL)
|
||||
_, err := c.GenerateAudio(context.Background(), "text", "af_bella")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for failed download")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "404") {
|
||||
t.Errorf("expected 404 in error, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateAudio_FullPath(t *testing.T) {
|
||||
// X-Download-Path with a full path: extract just filename.
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/v1/audio/speech" {
|
||||
w.Header().Set("X-Download-Path", "/some/nested/path/audio_abc123.mp3")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return
|
||||
}
|
||||
if r.URL.Path == "/v1/download/audio_abc123.mp3" {
|
||||
_, _ = w.Write([]byte("audio-bytes"))
|
||||
return
|
||||
}
|
||||
http.NotFound(w, r)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := kokoro.New(srv.URL)
|
||||
data, err := c.GenerateAudio(context.Background(), "text", "af_bella")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if string(data) != "audio-bytes" {
|
||||
t.Errorf("unexpected data: %q", string(data))
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateAudio_ContextCancelled(t *testing.T) {
|
||||
// Server that hangs — context should cancel before we get a response.
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
// Never respond.
|
||||
select {}
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel() // cancel immediately
|
||||
|
||||
c := kokoro.New(srv.URL)
|
||||
_, err := c.GenerateAudio(ctx, "text", "af_bella")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for cancelled context")
|
||||
}
|
||||
}
|
||||
|
||||
// ── ListVoices ────────────────────────────────────────────────────────────────
|
||||
|
||||
func TestListVoices_Success(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/v1/audio/voices" {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"voices":["af_bella","am_adam","bf_emma"]}`))
|
||||
return
|
||||
}
|
||||
http.NotFound(w, r)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := kokoro.New(srv.URL)
|
||||
voices, err := c.ListVoices(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if len(voices) != 3 {
|
||||
t.Errorf("expected 3 voices, got %d: %v", len(voices), voices)
|
||||
}
|
||||
if voices[0] != "af_bella" {
|
||||
t.Errorf("expected first voice to be af_bella, got %q", voices[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestListVoices_Non200(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := kokoro.New(srv.URL)
|
||||
_, err := c.ListVoices(context.Background())
|
||||
if err == nil {
|
||||
t.Fatal("expected error for non-200 response")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "503") {
|
||||
t.Errorf("expected 503 in error, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListVoices_MalformedJSON(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte(`not-json`))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := kokoro.New(srv.URL)
|
||||
_, err := c.ListVoices(context.Background())
|
||||
if err == nil {
|
||||
t.Fatal("expected error for malformed JSON")
|
||||
}
|
||||
}
|
||||
|
||||
func TestListVoices_EmptyVoices(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"voices":[]}`))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := kokoro.New(srv.URL)
|
||||
voices, err := c.ListVoices(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if len(voices) != 0 {
|
||||
t.Errorf("expected 0 voices, got %d", len(voices))
|
||||
}
|
||||
}
|
||||
|
||||
// ── New ───────────────────────────────────────────────────────────────────────
|
||||
|
||||
func TestNew_TrailingSlashStripped(t *testing.T) {
|
||||
// Verify that a trailing slash on baseURL doesn't produce double-slash paths.
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/v1/audio/voices" {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"voices":["af_bella"]}`))
|
||||
return
|
||||
}
|
||||
http.NotFound(w, r)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
c := kokoro.New(srv.URL + "/") // trailing slash
|
||||
voices, err := c.ListVoices(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if len(voices) == 0 {
|
||||
t.Error("expected at least one voice")
|
||||
}
|
||||
}
|
||||
181
backend/internal/libretranslate/client.go
Normal file
181
backend/internal/libretranslate/client.go
Normal file
@@ -0,0 +1,181 @@
|
||||
// Package libretranslate provides an HTTP client for a self-hosted
|
||||
// LibreTranslate instance. It handles text chunking, concurrent translation,
|
||||
// and reassembly so callers can pass arbitrarily long markdown strings.
|
||||
package libretranslate
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
// maxChunkBytes is the target maximum size of each chunk sent to
|
||||
// LibreTranslate. LibreTranslate's default limit is 5000 characters;
|
||||
// we stay comfortably below that.
|
||||
maxChunkBytes = 4500
|
||||
// concurrency is the number of simultaneous translation requests per chapter.
|
||||
concurrency = 3
|
||||
)
|
||||
|
||||
// Client translates text via LibreTranslate.
|
||||
// A nil Client is valid — all calls return the original text unchanged.
|
||||
type Client interface {
|
||||
// Translate translates text from sourceLang to targetLang.
|
||||
// text is a raw markdown string. The returned string is the translated
|
||||
// markdown, reassembled in original paragraph order.
|
||||
Translate(ctx context.Context, text, sourceLang, targetLang string) (string, error)
|
||||
}
|
||||
|
||||
// New returns a Client for the given LibreTranslate URL.
|
||||
// Returns nil when url is empty, which disables translation.
|
||||
func New(url, apiKey string) Client {
|
||||
if url == "" {
|
||||
return nil
|
||||
}
|
||||
return &httpClient{
|
||||
url: strings.TrimRight(url, "/"),
|
||||
apiKey: apiKey,
|
||||
http: &http.Client{Timeout: 60 * time.Second},
|
||||
}
|
||||
}
|
||||
|
||||
type httpClient struct {
|
||||
url string
|
||||
apiKey string
|
||||
http *http.Client
|
||||
}
|
||||
|
||||
// Translate splits text into paragraph chunks, translates them concurrently
|
||||
// (up to concurrency goroutines), and reassembles in order.
|
||||
func (c *httpClient) Translate(ctx context.Context, text, sourceLang, targetLang string) (string, error) {
|
||||
paragraphs := splitParagraphs(text)
|
||||
if len(paragraphs) == 0 {
|
||||
return text, nil
|
||||
}
|
||||
chunks := binChunks(paragraphs, maxChunkBytes)
|
||||
|
||||
translated := make([]string, len(chunks))
|
||||
errs := make([]error, len(chunks))
|
||||
|
||||
sem := make(chan struct{}, concurrency)
|
||||
var wg sync.WaitGroup
|
||||
|
||||
for i, chunk := range chunks {
|
||||
wg.Add(1)
|
||||
sem <- struct{}{}
|
||||
go func(idx int, chunkText string) {
|
||||
defer wg.Done()
|
||||
defer func() { <-sem }()
|
||||
result, err := c.translateChunk(ctx, chunkText, sourceLang, targetLang)
|
||||
translated[idx] = result
|
||||
errs[idx] = err
|
||||
}(i, chunk)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
for _, err := range errs {
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(translated, "\n\n"), nil
|
||||
}
|
||||
|
||||
// translateChunk sends a single POST /translate request.
|
||||
func (c *httpClient) translateChunk(ctx context.Context, text, sourceLang, targetLang string) (string, error) {
|
||||
reqBody := map[string]string{
|
||||
"q": text,
|
||||
"source": sourceLang,
|
||||
"target": targetLang,
|
||||
"format": "html",
|
||||
}
|
||||
if c.apiKey != "" {
|
||||
reqBody["api_key"] = c.apiKey
|
||||
}
|
||||
|
||||
b, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("libretranslate: marshal request: %w", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.url+"/translate", bytes.NewReader(b))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("libretranslate: build request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := c.http.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("libretranslate: request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
var errBody struct {
|
||||
Error string `json:"error"`
|
||||
}
|
||||
_ = json.NewDecoder(resp.Body).Decode(&errBody)
|
||||
return "", fmt.Errorf("libretranslate: status %d: %s", resp.StatusCode, errBody.Error)
|
||||
}
|
||||
|
||||
var result struct {
|
||||
TranslatedText string `json:"translatedText"`
|
||||
}
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
return "", fmt.Errorf("libretranslate: decode response: %w", err)
|
||||
}
|
||||
return result.TranslatedText, nil
|
||||
}
|
||||
|
||||
// splitParagraphs splits markdown text on blank lines, preserving non-empty paragraphs.
|
||||
func splitParagraphs(text string) []string {
|
||||
// Normalise line endings.
|
||||
text = strings.ReplaceAll(text, "\r\n", "\n")
|
||||
// Split on double newlines (blank lines between paragraphs).
|
||||
parts := strings.Split(text, "\n\n")
|
||||
var paragraphs []string
|
||||
for _, p := range parts {
|
||||
p = strings.TrimSpace(p)
|
||||
if p != "" {
|
||||
paragraphs = append(paragraphs, p)
|
||||
}
|
||||
}
|
||||
return paragraphs
|
||||
}
|
||||
|
||||
// binChunks groups paragraphs into chunks each at most maxBytes in length.
|
||||
// Each chunk is a single string with paragraphs joined by "\n\n".
|
||||
func binChunks(paragraphs []string, maxBytes int) []string {
|
||||
var chunks []string
|
||||
var current strings.Builder
|
||||
|
||||
for _, p := range paragraphs {
|
||||
needed := len(p)
|
||||
if current.Len() > 0 {
|
||||
needed += 2 // for the "\n\n" separator
|
||||
}
|
||||
|
||||
if current.Len()+needed > maxBytes && current.Len() > 0 {
|
||||
// Flush current chunk.
|
||||
chunks = append(chunks, current.String())
|
||||
current.Reset()
|
||||
}
|
||||
|
||||
if current.Len() > 0 {
|
||||
current.WriteString("\n\n")
|
||||
}
|
||||
current.WriteString(p)
|
||||
}
|
||||
|
||||
if current.Len() > 0 {
|
||||
chunks = append(chunks, current.String())
|
||||
}
|
||||
return chunks
|
||||
}
|
||||
327
backend/internal/meili/client.go
Normal file
327
backend/internal/meili/client.go
Normal file
@@ -0,0 +1,327 @@
|
||||
// Package meili provides a thin Meilisearch client for indexing and searching
|
||||
// locally scraped books.
|
||||
//
|
||||
// Index:
|
||||
// - Name: "books"
|
||||
// - Primary key: "slug"
|
||||
// - Searchable attributes: title, author, genres, summary
|
||||
// - Filterable attributes: status, genres
|
||||
// - Sortable attributes: rank, rating, total_chapters, meta_updated
|
||||
//
|
||||
// The client is intentionally simple: UpsertBook and Search only. All
|
||||
// Meilisearch-specific details (index management, attribute configuration)
|
||||
// are handled once in Configure(), called at startup.
|
||||
package meili
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
"github.com/meilisearch/meilisearch-go"
|
||||
)
|
||||
|
||||
const indexName = "books"
|
||||
|
||||
// Client is the interface for Meilisearch operations used by runner and backend.
|
||||
type Client interface {
|
||||
// UpsertBook adds or updates a book document in the search index.
|
||||
UpsertBook(ctx context.Context, book domain.BookMeta) error
|
||||
// BookExists reports whether a book with the given slug is already in the
|
||||
// index. Used by the catalogue refresh to skip re-indexing known books.
|
||||
BookExists(ctx context.Context, slug string) bool
|
||||
// Search returns up to limit books matching query.
|
||||
Search(ctx context.Context, query string, limit int) ([]domain.BookMeta, error)
|
||||
// Catalogue queries books with optional filters, sort, and pagination.
|
||||
// Returns books, the total hit count for pagination, and a FacetResult
|
||||
// with available genre and status values from the index.
|
||||
Catalogue(ctx context.Context, q CatalogueQuery) ([]domain.BookMeta, int64, FacetResult, error)
|
||||
}
|
||||
|
||||
// CatalogueQuery holds parameters for the /api/catalogue endpoint.
|
||||
type CatalogueQuery struct {
|
||||
Q string // full-text query (may be empty for browse)
|
||||
Genre string // genre filter, e.g. "fantasy" or "all"
|
||||
Status string // status filter, e.g. "ongoing", "completed", or "all"
|
||||
Sort string // sort field: "popular", "new", "update", "top-rated", "rank", ""
|
||||
Page int // 1-indexed
|
||||
Limit int // items per page, default 20
|
||||
}
|
||||
|
||||
// FacetResult holds the available filter values discovered from the index.
|
||||
// Values are sorted alphabetically and include only those present in the index.
|
||||
type FacetResult struct {
|
||||
Genres []string // distinct genre values
|
||||
Statuses []string // distinct status values
|
||||
}
|
||||
|
||||
// MeiliClient wraps the meilisearch-go SDK.
|
||||
type MeiliClient struct {
|
||||
idx meilisearch.IndexManager
|
||||
}
|
||||
|
||||
// New creates a MeiliClient. Call Configure() once at startup to ensure the
|
||||
// index exists and has the correct attribute settings.
|
||||
func New(host, apiKey string) *MeiliClient {
|
||||
cli := meilisearch.New(host, meilisearch.WithAPIKey(apiKey))
|
||||
return &MeiliClient{idx: cli.Index(indexName)}
|
||||
}
|
||||
|
||||
// Configure creates the index if absent and sets searchable/filterable
|
||||
// attributes. It is idempotent — safe to call on every startup.
|
||||
func Configure(host, apiKey string) error {
|
||||
cli := meilisearch.New(host, meilisearch.WithAPIKey(apiKey))
|
||||
|
||||
// Create index with primary key. Returns 202 if exists — ignore.
|
||||
task, err := cli.CreateIndex(&meilisearch.IndexConfig{
|
||||
Uid: indexName,
|
||||
PrimaryKey: "slug",
|
||||
})
|
||||
if err != nil {
|
||||
// 400 "index_already_exists" is not an error here; the SDK returns
|
||||
// an error with Code "index_already_exists" which we can ignore.
|
||||
// Any other error is fatal.
|
||||
if apiErr, ok := err.(*meilisearch.Error); ok && apiErr.MeilisearchApiError.Code == "index_already_exists" {
|
||||
// already exists — continue
|
||||
} else {
|
||||
return fmt.Errorf("meili: create index: %w", err)
|
||||
}
|
||||
} else {
|
||||
_ = task // task is async; we don't wait for it
|
||||
}
|
||||
|
||||
idx := cli.Index(indexName)
|
||||
|
||||
searchable := []string{"title", "author", "genres", "summary"}
|
||||
if _, err := idx.UpdateSearchableAttributes(&searchable); err != nil {
|
||||
return fmt.Errorf("meili: update searchable attributes: %w", err)
|
||||
}
|
||||
|
||||
filterable := []interface{}{"status", "genres"}
|
||||
if _, err := idx.UpdateFilterableAttributes(&filterable); err != nil {
|
||||
return fmt.Errorf("meili: update filterable attributes: %w", err)
|
||||
}
|
||||
|
||||
sortable := []string{"rank", "rating", "total_chapters", "meta_updated"}
|
||||
if _, err := idx.UpdateSortableAttributes(&sortable); err != nil {
|
||||
return fmt.Errorf("meili: update sortable attributes: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// bookDoc is the Meilisearch document shape for a book.
|
||||
type bookDoc struct {
|
||||
Slug string `json:"slug"`
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author"`
|
||||
Cover string `json:"cover"`
|
||||
Status string `json:"status"`
|
||||
Genres []string `json:"genres"`
|
||||
Summary string `json:"summary"`
|
||||
TotalChapters int `json:"total_chapters"`
|
||||
SourceURL string `json:"source_url"`
|
||||
Rank int `json:"rank"`
|
||||
Rating float64 `json:"rating"`
|
||||
// MetaUpdated is the Unix timestamp (seconds) of the last PocketBase update.
|
||||
// Used for sort=update ("recently updated" ordering).
|
||||
MetaUpdated int64 `json:"meta_updated"`
|
||||
}
|
||||
|
||||
func toDoc(b domain.BookMeta) bookDoc {
|
||||
return bookDoc{
|
||||
Slug: b.Slug,
|
||||
Title: b.Title,
|
||||
Author: b.Author,
|
||||
Cover: b.Cover,
|
||||
Status: b.Status,
|
||||
Genres: b.Genres,
|
||||
Summary: b.Summary,
|
||||
TotalChapters: b.TotalChapters,
|
||||
SourceURL: b.SourceURL,
|
||||
Rank: b.Ranking,
|
||||
Rating: b.Rating,
|
||||
MetaUpdated: b.MetaUpdated,
|
||||
}
|
||||
}
|
||||
|
||||
func fromDoc(d bookDoc) domain.BookMeta {
|
||||
return domain.BookMeta{
|
||||
Slug: d.Slug,
|
||||
Title: d.Title,
|
||||
Author: d.Author,
|
||||
Cover: d.Cover,
|
||||
Status: d.Status,
|
||||
Genres: d.Genres,
|
||||
Summary: d.Summary,
|
||||
TotalChapters: d.TotalChapters,
|
||||
SourceURL: d.SourceURL,
|
||||
Ranking: d.Rank,
|
||||
Rating: d.Rating,
|
||||
MetaUpdated: d.MetaUpdated,
|
||||
}
|
||||
}
|
||||
|
||||
// UpsertBook adds or replaces the book document in Meilisearch. The operation
|
||||
// is fire-and-forget (Meilisearch processes tasks asynchronously).
|
||||
func (c *MeiliClient) UpsertBook(_ context.Context, book domain.BookMeta) error {
|
||||
docs := []bookDoc{toDoc(book)}
|
||||
pk := "slug"
|
||||
if _, err := c.idx.AddDocuments(docs, &meilisearch.DocumentOptions{PrimaryKey: &pk}); err != nil {
|
||||
return fmt.Errorf("meili: upsert book %q: %w", book.Slug, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// BookExists reports whether the slug is already present in the index.
|
||||
// It fetches the document by primary key; a 404 or any error is treated as
|
||||
// "not present" (safe default: re-index rather than silently skip).
|
||||
func (c *MeiliClient) BookExists(_ context.Context, slug string) bool {
|
||||
var doc bookDoc
|
||||
err := c.idx.GetDocument(slug, nil, &doc)
|
||||
return err == nil && doc.Slug != ""
|
||||
}
|
||||
|
||||
// Search returns books matching query, up to limit results.
|
||||
func (c *MeiliClient) Search(_ context.Context, query string, limit int) ([]domain.BookMeta, error) {
|
||||
if limit <= 0 {
|
||||
limit = 20
|
||||
}
|
||||
res, err := c.idx.Search(query, &meilisearch.SearchRequest{
|
||||
Limit: int64(limit),
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("meili: search %q: %w", query, err)
|
||||
}
|
||||
|
||||
books := make([]domain.BookMeta, 0, len(res.Hits))
|
||||
for _, hit := range res.Hits {
|
||||
// Hit is map[string]json.RawMessage — unmarshal directly into bookDoc.
|
||||
var doc bookDoc
|
||||
raw, err := json.Marshal(hit)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if err := json.Unmarshal(raw, &doc); err != nil {
|
||||
continue
|
||||
}
|
||||
books = append(books, fromDoc(doc))
|
||||
}
|
||||
return books, nil
|
||||
}
|
||||
|
||||
// Catalogue queries books with optional full-text search, genre/status filters,
|
||||
// sort order, and pagination. Returns matching books, the total estimate, and
|
||||
// a FacetResult containing available genre and status values from the index.
|
||||
func (c *MeiliClient) Catalogue(_ context.Context, q CatalogueQuery) ([]domain.BookMeta, int64, FacetResult, error) {
|
||||
if q.Limit <= 0 {
|
||||
q.Limit = 20
|
||||
}
|
||||
if q.Page <= 0 {
|
||||
q.Page = 1
|
||||
}
|
||||
|
||||
req := &meilisearch.SearchRequest{
|
||||
Limit: int64(q.Limit),
|
||||
Offset: int64((q.Page - 1) * q.Limit),
|
||||
// Request facet distribution so the UI can build filter options
|
||||
// dynamically without hardcoding genre/status lists.
|
||||
Facets: []string{"genres", "status"},
|
||||
}
|
||||
|
||||
// Build filter
|
||||
var filters []string
|
||||
if q.Genre != "" && q.Genre != "all" {
|
||||
filters = append(filters, fmt.Sprintf("genres = %q", q.Genre))
|
||||
}
|
||||
if q.Status != "" && q.Status != "all" {
|
||||
filters = append(filters, fmt.Sprintf("status = %q", q.Status))
|
||||
}
|
||||
if len(filters) > 0 {
|
||||
req.Filter = strings.Join(filters, " AND ")
|
||||
}
|
||||
|
||||
// Map UI sort tokens to Meilisearch sort expressions.
|
||||
switch q.Sort {
|
||||
case "rank":
|
||||
req.Sort = []string{"rank:asc"}
|
||||
case "top-rated":
|
||||
req.Sort = []string{"rating:desc"}
|
||||
case "new":
|
||||
req.Sort = []string{"total_chapters:desc"}
|
||||
case "update":
|
||||
req.Sort = []string{"meta_updated:desc"}
|
||||
// "popular" and "" → relevance (no explicit sort)
|
||||
}
|
||||
|
||||
res, err := c.idx.Search(q.Q, req)
|
||||
if err != nil {
|
||||
return nil, 0, FacetResult{}, fmt.Errorf("meili: catalogue query: %w", err)
|
||||
}
|
||||
|
||||
books := make([]domain.BookMeta, 0, len(res.Hits))
|
||||
for _, hit := range res.Hits {
|
||||
var doc bookDoc
|
||||
raw, err := json.Marshal(hit)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if err := json.Unmarshal(raw, &doc); err != nil {
|
||||
continue
|
||||
}
|
||||
books = append(books, fromDoc(doc))
|
||||
}
|
||||
|
||||
facets := parseFacets(res.FacetDistribution)
|
||||
return books, res.EstimatedTotalHits, facets, nil
|
||||
}
|
||||
|
||||
// parseFacets extracts sorted genre and status slices from a Meilisearch
|
||||
// facetDistribution raw JSON value.
|
||||
// The JSON shape is: {"genres":{"fantasy":12,"action":5},"status":{"ongoing":7}}
|
||||
func parseFacets(raw json.RawMessage) FacetResult {
|
||||
var result FacetResult
|
||||
if len(raw) == 0 {
|
||||
return result
|
||||
}
|
||||
var dist map[string]map[string]int64
|
||||
if err := json.Unmarshal(raw, &dist); err != nil {
|
||||
return result
|
||||
}
|
||||
if m, ok := dist["genres"]; ok {
|
||||
for k := range m {
|
||||
result.Genres = append(result.Genres, k)
|
||||
}
|
||||
sortStrings(result.Genres)
|
||||
}
|
||||
if m, ok := dist["status"]; ok {
|
||||
for k := range m {
|
||||
result.Statuses = append(result.Statuses, k)
|
||||
}
|
||||
sortStrings(result.Statuses)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// sortStrings sorts a slice of strings in place.
|
||||
func sortStrings(s []string) {
|
||||
for i := 1; i < len(s); i++ {
|
||||
for j := i; j > 0 && s[j] < s[j-1]; j-- {
|
||||
s[j], s[j-1] = s[j-1], s[j]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NoopClient is a no-op Client used when Meilisearch is not configured.
|
||||
type NoopClient struct{}
|
||||
|
||||
func (NoopClient) UpsertBook(_ context.Context, _ domain.BookMeta) error { return nil }
|
||||
func (NoopClient) BookExists(_ context.Context, _ string) bool { return false }
|
||||
func (NoopClient) Search(_ context.Context, _ string, _ int) ([]domain.BookMeta, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (NoopClient) Catalogue(_ context.Context, _ CatalogueQuery) ([]domain.BookMeta, int64, FacetResult, error) {
|
||||
return nil, 0, FacetResult{}, nil
|
||||
}
|
||||
@@ -3,13 +3,31 @@
|
||||
package htmlutil
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/libnovel/scraper/internal/scraper"
|
||||
"github.com/libnovel/backend/internal/scraper"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// ResolveURL returns an absolute URL. If href is already absolute it is
|
||||
// returned unchanged. Otherwise it is resolved against base.
|
||||
func ResolveURL(base, href string) string {
|
||||
if strings.HasPrefix(href, "http://") || strings.HasPrefix(href, "https://") {
|
||||
return href
|
||||
}
|
||||
b, err := url.Parse(base)
|
||||
if err != nil {
|
||||
return base + href
|
||||
}
|
||||
ref, err := url.Parse(href)
|
||||
if err != nil {
|
||||
return base + href
|
||||
}
|
||||
return b.ResolveReference(ref).String()
|
||||
}
|
||||
|
||||
// ParseHTML parses raw HTML and returns the root node.
|
||||
func ParseHTML(raw string) (*html.Node, error) {
|
||||
return html.Parse(strings.NewReader(raw))
|
||||
@@ -48,8 +66,8 @@ matched:
|
||||
return true
|
||||
}
|
||||
|
||||
// attrVal returns the value of attribute key from node n.
|
||||
func attrVal(n *html.Node, key string) string {
|
||||
// AttrVal returns the value of attribute key from node n.
|
||||
func AttrVal(n *html.Node, key string) string {
|
||||
for _, a := range n.Attr {
|
||||
if a.Key == key {
|
||||
return a.Val
|
||||
@@ -58,8 +76,8 @@ func attrVal(n *html.Node, key string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// textContent returns the concatenated text content of all descendant text nodes.
|
||||
func textContent(n *html.Node) string {
|
||||
// TextContent returns the concatenated text content of all descendant text nodes.
|
||||
func TextContent(n *html.Node) string {
|
||||
var sb strings.Builder
|
||||
var walk func(*html.Node)
|
||||
walk = func(cur *html.Node) {
|
||||
@@ -114,9 +132,9 @@ func FindAll(root *html.Node, sel scraper.Selector) []*html.Node {
|
||||
// If sel.Attr is set the attribute value is returned; otherwise the inner text.
|
||||
func ExtractText(n *html.Node, sel scraper.Selector) string {
|
||||
if sel.Attr != "" {
|
||||
return attrVal(n, sel.Attr)
|
||||
return AttrVal(n, sel.Attr)
|
||||
}
|
||||
return textContent(n)
|
||||
return TextContent(n)
|
||||
}
|
||||
|
||||
// ExtractFirst locates the first match in root and returns its text/attr value.
|
||||
@@ -140,29 +158,15 @@ func ExtractAll(root *html.Node, sel scraper.Selector) []string {
|
||||
return out
|
||||
}
|
||||
|
||||
// InnerHTML returns the serialized inner HTML of node n.
|
||||
func InnerHTML(n *html.Node) string {
|
||||
var sb strings.Builder
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
_ = html.Render(&sb, c)
|
||||
}
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// NodeToMarkdown converts the children of an HTML node to a plain-text/Markdown
|
||||
// representation suitable for chapter storage. Block elements become newlines;
|
||||
// inline elements are inlined. Runs of more than one blank line are collapsed
|
||||
// to a single blank line.
|
||||
// representation suitable for chapter storage.
|
||||
func NodeToMarkdown(n *html.Node) string {
|
||||
var sb strings.Builder
|
||||
nodeToMD(n, &sb)
|
||||
// Collapse 3+ consecutive newlines (i.e. more than one blank line) to 2.
|
||||
out := multiBlankLine.ReplaceAllString(sb.String(), "\n\n")
|
||||
return strings.TrimSpace(out)
|
||||
}
|
||||
|
||||
// multiBlankLine matches three or more consecutive newline characters
|
||||
// (any mix of \n and surrounding whitespace-only lines).
|
||||
var multiBlankLine = regexp.MustCompile(`\n(\s*\n){2,}`)
|
||||
|
||||
var blockElements = map[string]bool{
|
||||
538
backend/internal/novelfire/scraper.go
Normal file
538
backend/internal/novelfire/scraper.go
Normal file
@@ -0,0 +1,538 @@
|
||||
// Package novelfire provides a NovelScraper implementation for novelfire.net.
|
||||
//
|
||||
// Site structure (as of 2025):
|
||||
//
|
||||
// Catalogue : https://novelfire.net/genre-all/sort-new/status-all/all-novel?page=N
|
||||
// Book page : https://novelfire.net/book/{slug}
|
||||
// Chapters : https://novelfire.net/book/{slug}/chapters?page=N
|
||||
// Chapter : https://novelfire.net/book/{slug}/{chapter-slug}
|
||||
package novelfire
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"math/rand"
|
||||
"net/url"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/backend/internal/browser"
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
"github.com/libnovel/backend/internal/novelfire/htmlutil"
|
||||
"github.com/libnovel/backend/internal/scraper"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
const (
|
||||
baseURL = "https://novelfire.net"
|
||||
cataloguePath = "/genre-all/sort-new/status-all/all-novel"
|
||||
rankingPath = "/genre-all/sort-popular/status-all/all-novel"
|
||||
)
|
||||
|
||||
// Scraper is the novelfire.net implementation of scraper.NovelScraper.
|
||||
type Scraper struct {
|
||||
client browser.Client
|
||||
log *slog.Logger
|
||||
}
|
||||
|
||||
// Compile-time interface check.
|
||||
var _ scraper.NovelScraper = (*Scraper)(nil)
|
||||
|
||||
// New returns a new novelfire Scraper backed by client.
|
||||
func New(client browser.Client, log *slog.Logger) *Scraper {
|
||||
if log == nil {
|
||||
log = slog.Default()
|
||||
}
|
||||
return &Scraper{client: client, log: log}
|
||||
}
|
||||
|
||||
// SourceName implements NovelScraper.
|
||||
func (s *Scraper) SourceName() string { return "novelfire.net" }
|
||||
|
||||
// ── CatalogueProvider ─────────────────────────────────────────────────────────
|
||||
|
||||
// ScrapeCatalogue streams all CatalogueEntry values across all catalogue pages.
|
||||
// Each page fetch uses retryGet with 429-aware exponential backoff.
|
||||
// A small inter-page delay (cataloguePageDelay) is inserted between requests to
|
||||
// avoid hammering the server when paging through hundreds of catalogue pages.
|
||||
func (s *Scraper) ScrapeCatalogue(ctx context.Context) (<-chan domain.CatalogueEntry, <-chan error) {
|
||||
entries := make(chan domain.CatalogueEntry, 64)
|
||||
errs := make(chan error, 16)
|
||||
|
||||
go func() {
|
||||
defer close(entries)
|
||||
defer close(errs)
|
||||
|
||||
pageURL := baseURL + cataloguePath
|
||||
page := 1
|
||||
|
||||
for pageURL != "" {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
// Polite inter-page delay — skipped on the very first page.
|
||||
if page > 1 {
|
||||
jitter := time.Duration(500+rand.Intn(1000)) * time.Millisecond
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(jitter):
|
||||
}
|
||||
}
|
||||
|
||||
s.log.Info("scraping catalogue page", "page", page, "url", pageURL)
|
||||
raw, err := retryGet(ctx, s.log, s.client, pageURL, 9, 10*time.Second)
|
||||
if err != nil {
|
||||
errs <- fmt.Errorf("catalogue page %d: %w", page, err)
|
||||
return
|
||||
}
|
||||
|
||||
root, err := htmlutil.ParseHTML(raw)
|
||||
if err != nil {
|
||||
errs <- fmt.Errorf("catalogue page %d parse: %w", page, err)
|
||||
return
|
||||
}
|
||||
|
||||
cards := htmlutil.FindAll(root, scraper.Selector{Tag: "li", Class: "novel-item", Multiple: true})
|
||||
if len(cards) == 0 {
|
||||
s.log.Warn("no novel cards found, stopping pagination", "page", page)
|
||||
return
|
||||
}
|
||||
|
||||
for _, card := range cards {
|
||||
linkNode := htmlutil.FindFirst(card, scraper.Selector{Tag: "a", Attr: "href"})
|
||||
titleNode := htmlutil.FindFirst(card, scraper.Selector{Tag: "h4", Class: "novel-title"})
|
||||
|
||||
var title, href string
|
||||
if linkNode != nil {
|
||||
href = htmlutil.ExtractText(linkNode, scraper.Selector{Tag: "a", Attr: "href"})
|
||||
}
|
||||
if titleNode != nil {
|
||||
title = strings.TrimSpace(htmlutil.ExtractText(titleNode, scraper.Selector{}))
|
||||
}
|
||||
if href == "" || title == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
bookURL := resolveURL(baseURL, href)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case entries <- domain.CatalogueEntry{Slug: slugFromURL(bookURL), Title: title, URL: bookURL}:
|
||||
}
|
||||
}
|
||||
|
||||
if !hasNextPageLink(root) {
|
||||
break
|
||||
}
|
||||
nextHref := ""
|
||||
for _, a := range htmlutil.FindAll(root, scraper.Selector{Tag: "a", Multiple: true}) {
|
||||
if htmlutil.AttrVal(a, "rel") == "next" {
|
||||
nextHref = htmlutil.AttrVal(a, "href")
|
||||
break
|
||||
}
|
||||
}
|
||||
if nextHref == "" {
|
||||
break
|
||||
}
|
||||
pageURL = resolveURL(baseURL, nextHref)
|
||||
page++
|
||||
}
|
||||
}()
|
||||
|
||||
return entries, errs
|
||||
}
|
||||
|
||||
// ── MetadataProvider ──────────────────────────────────────────────────────────
|
||||
|
||||
// ScrapeMetadata fetches and parses book metadata from the book's landing page.
|
||||
// Uses retryGet with 429-aware exponential backoff (up to 9 attempts).
|
||||
func (s *Scraper) ScrapeMetadata(ctx context.Context, bookURL string) (domain.BookMeta, error) {
|
||||
s.log.Debug("metadata fetch starting", "url", bookURL)
|
||||
|
||||
raw, err := retryGet(ctx, s.log, s.client, bookURL, 9, 10*time.Second)
|
||||
if err != nil {
|
||||
return domain.BookMeta{}, fmt.Errorf("metadata fetch %s: %w", bookURL, err)
|
||||
}
|
||||
|
||||
root, err := htmlutil.ParseHTML(raw)
|
||||
if err != nil {
|
||||
return domain.BookMeta{}, fmt.Errorf("metadata parse %s: %w", bookURL, err)
|
||||
}
|
||||
|
||||
title := htmlutil.ExtractFirst(root, scraper.Selector{Tag: "h1", Class: "novel-title"})
|
||||
author := htmlutil.ExtractFirst(root, scraper.Selector{Tag: "span", Class: "author"})
|
||||
|
||||
var cover string
|
||||
if fig := htmlutil.FindFirst(root, scraper.Selector{Tag: "figure", Class: "cover"}); fig != nil {
|
||||
cover = htmlutil.ExtractFirst(fig, scraper.Selector{Tag: "img", Attr: "src"})
|
||||
if cover != "" && !strings.HasPrefix(cover, "http") {
|
||||
cover = baseURL + cover
|
||||
}
|
||||
}
|
||||
|
||||
// Status: novelfire renders <strong class="ongoing">Ongoing</strong> (or
|
||||
// "completed", "hiatus") inside the .header-stats block. We take the text
|
||||
// content and lowercase it so the index value is always canonical lowercase.
|
||||
var status string
|
||||
for _, cls := range []string{"ongoing", "completed", "hiatus"} {
|
||||
if v := htmlutil.ExtractFirst(root, scraper.Selector{Tag: "strong", Class: cls}); v != "" {
|
||||
status = strings.ToLower(strings.TrimSpace(v))
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Genres: novelfire renders <div class="categories"><ul><li><a class="property-item">Genre</a>
|
||||
// Each <a class="property-item"> is one genre tag. Lowercase for index consistency.
|
||||
var genres []string
|
||||
if categoriesNode := htmlutil.FindFirst(root, scraper.Selector{Tag: "div", Class: "categories"}); categoriesNode != nil {
|
||||
for _, v := range htmlutil.ExtractAll(categoriesNode, scraper.Selector{Tag: "a", Class: "property-item", Multiple: true}) {
|
||||
if v != "" {
|
||||
genres = append(genres, strings.ToLower(strings.TrimSpace(v)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
summary := htmlutil.ExtractFirst(root, scraper.Selector{Tag: "div", Class: "summary"})
|
||||
totalStr := htmlutil.ExtractFirst(root, scraper.Selector{Tag: "span", Class: "chapter-count"})
|
||||
totalChapters := parseChapterCount(totalStr)
|
||||
|
||||
slug := slugFromURL(bookURL)
|
||||
|
||||
meta := domain.BookMeta{
|
||||
Slug: slug,
|
||||
Title: title,
|
||||
Author: author,
|
||||
Cover: cover,
|
||||
Status: status,
|
||||
Genres: genres,
|
||||
Summary: summary,
|
||||
TotalChapters: totalChapters,
|
||||
SourceURL: bookURL,
|
||||
}
|
||||
s.log.Debug("metadata parsed", "slug", meta.Slug, "title", meta.Title)
|
||||
return meta, nil
|
||||
}
|
||||
|
||||
// ── ChapterListProvider ───────────────────────────────────────────────────────
|
||||
|
||||
// ScrapeChapterList returns chapter references for a book, ordered ascending.
|
||||
// upTo > 0 stops pagination as soon as at least upTo chapter numbers have been
|
||||
// collected — use this for range scrapes so we don't paginate 100 pages just
|
||||
// to discover refs we'll never scrape. upTo == 0 fetches all pages.
|
||||
// Each page fetch uses retryGet with 429-aware exponential backoff.
|
||||
func (s *Scraper) ScrapeChapterList(ctx context.Context, bookURL string, upTo int) ([]domain.ChapterRef, error) {
|
||||
var refs []domain.ChapterRef
|
||||
baseChapterURL := strings.TrimRight(bookURL, "/") + "/chapters"
|
||||
page := 1
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return refs, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
pageURL := fmt.Sprintf("%s?page=%d", baseChapterURL, page)
|
||||
s.log.Info("scraping chapter list", "page", page, "url", pageURL)
|
||||
|
||||
raw, err := retryGet(ctx, s.log, s.client, pageURL, 9, 6*time.Second)
|
||||
if err != nil {
|
||||
return refs, fmt.Errorf("chapter list page %d: %w", page, err)
|
||||
}
|
||||
|
||||
root, err := htmlutil.ParseHTML(raw)
|
||||
if err != nil {
|
||||
return refs, fmt.Errorf("chapter list page %d parse: %w", page, err)
|
||||
}
|
||||
|
||||
chapterList := htmlutil.FindFirst(root, scraper.Selector{Class: "chapter-list"})
|
||||
if chapterList == nil {
|
||||
s.log.Debug("chapter list container not found, stopping pagination", "page", page)
|
||||
break
|
||||
}
|
||||
|
||||
items := htmlutil.FindAll(chapterList, scraper.Selector{Tag: "li"})
|
||||
if len(items) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
for _, item := range items {
|
||||
linkNode := htmlutil.FindFirst(item, scraper.Selector{Tag: "a"})
|
||||
if linkNode == nil {
|
||||
continue
|
||||
}
|
||||
href := htmlutil.ExtractText(linkNode, scraper.Selector{Attr: "href"})
|
||||
chTitle := htmlutil.ExtractText(linkNode, scraper.Selector{})
|
||||
if href == "" {
|
||||
continue
|
||||
}
|
||||
chURL := resolveURL(baseURL, href)
|
||||
num := chapterNumberFromURL(chURL)
|
||||
if num <= 0 {
|
||||
num = len(refs) + 1
|
||||
s.log.Warn("chapter number not parseable from URL, falling back to position",
|
||||
"url", chURL, "position", num)
|
||||
}
|
||||
refs = append(refs, domain.ChapterRef{
|
||||
Number: num,
|
||||
Title: strings.TrimSpace(chTitle),
|
||||
URL: chURL,
|
||||
})
|
||||
}
|
||||
|
||||
// Early-stop: if we have seen at least upTo chapter numbers, we have
|
||||
// enough refs to cover the requested range — no need to paginate further.
|
||||
if upTo > 0 && len(refs) > 0 && refs[len(refs)-1].Number >= upTo {
|
||||
s.log.Debug("chapter list early-stop reached", "upTo", upTo, "collected", len(refs))
|
||||
break
|
||||
}
|
||||
|
||||
page++
|
||||
}
|
||||
|
||||
return refs, nil
|
||||
}
|
||||
|
||||
// ── ChapterTextProvider ───────────────────────────────────────────────────────
|
||||
|
||||
// ScrapeChapterText fetches and parses a single chapter page.
|
||||
func (s *Scraper) ScrapeChapterText(ctx context.Context, ref domain.ChapterRef) (domain.Chapter, error) {
|
||||
s.log.Debug("chapter text fetch starting", "chapter", ref.Number, "url", ref.URL)
|
||||
|
||||
raw, err := retryGet(ctx, s.log, s.client, ref.URL, 9, 6*time.Second)
|
||||
if err != nil {
|
||||
return domain.Chapter{}, fmt.Errorf("chapter %d fetch: %w", ref.Number, err)
|
||||
}
|
||||
|
||||
root, err := htmlutil.ParseHTML(raw)
|
||||
if err != nil {
|
||||
return domain.Chapter{}, fmt.Errorf("chapter %d parse: %w", ref.Number, err)
|
||||
}
|
||||
|
||||
container := htmlutil.FindFirst(root, scraper.Selector{ID: "content"})
|
||||
if container == nil {
|
||||
return domain.Chapter{}, fmt.Errorf("chapter %d: #content container not found in %s", ref.Number, ref.URL)
|
||||
}
|
||||
|
||||
text := htmlutil.NodeToMarkdown(container)
|
||||
|
||||
s.log.Debug("chapter text parsed", "chapter", ref.Number, "text_bytes", len(text))
|
||||
|
||||
return domain.Chapter{Ref: ref, Text: text}, nil
|
||||
}
|
||||
|
||||
// ── RankingProvider ───────────────────────────────────────────────────────────
|
||||
|
||||
// ScrapeRanking pages through up to maxPages pages of the popular-novels listing.
|
||||
// maxPages <= 0 means all pages. The caller decides whether to persist items.
|
||||
func (s *Scraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan domain.BookMeta, <-chan error) {
|
||||
entries := make(chan domain.BookMeta, 32)
|
||||
errs := make(chan error, 16)
|
||||
|
||||
go func() {
|
||||
defer close(entries)
|
||||
defer close(errs)
|
||||
|
||||
rank := 1
|
||||
|
||||
for page := 1; maxPages <= 0 || page <= maxPages; page++ {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
pageURL := fmt.Sprintf("%s%s?page=%d", baseURL, rankingPath, page)
|
||||
s.log.Info("scraping popular ranking page", "page", page, "url", pageURL)
|
||||
|
||||
raw, err := s.client.GetContent(ctx, pageURL)
|
||||
if err != nil {
|
||||
errs <- fmt.Errorf("ranking page %d: %w", page, err)
|
||||
return
|
||||
}
|
||||
|
||||
root, err := htmlutil.ParseHTML(raw)
|
||||
if err != nil {
|
||||
errs <- fmt.Errorf("ranking page %d parse: %w", page, err)
|
||||
return
|
||||
}
|
||||
|
||||
cards := htmlutil.FindAll(root, scraper.Selector{Tag: "li", Class: "novel-item", Multiple: true})
|
||||
if len(cards) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
for _, card := range cards {
|
||||
linkNode := htmlutil.FindFirst(card, scraper.Selector{Tag: "a"})
|
||||
if linkNode == nil {
|
||||
continue
|
||||
}
|
||||
href := htmlutil.ExtractText(linkNode, scraper.Selector{Tag: "a", Attr: "href"})
|
||||
bookURL := resolveURL(baseURL, href)
|
||||
if bookURL == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
title := strings.TrimSpace(htmlutil.ExtractFirst(card, scraper.Selector{Tag: "h4", Class: "novel-title"}))
|
||||
if title == "" {
|
||||
title = strings.TrimSpace(htmlutil.ExtractText(linkNode, scraper.Selector{Tag: "a", Attr: "title"}))
|
||||
}
|
||||
if title == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
var cover string
|
||||
if fig := htmlutil.FindFirst(card, scraper.Selector{Tag: "figure", Class: "novel-cover"}); fig != nil {
|
||||
cover = htmlutil.ExtractFirst(fig, scraper.Selector{Tag: "img", Attr: "data-src"})
|
||||
if cover == "" {
|
||||
cover = htmlutil.ExtractFirst(fig, scraper.Selector{Tag: "img", Attr: "src"})
|
||||
}
|
||||
if strings.HasPrefix(cover, "data:") {
|
||||
cover = ""
|
||||
}
|
||||
if cover != "" && !strings.HasPrefix(cover, "http") {
|
||||
cover = baseURL + cover
|
||||
}
|
||||
}
|
||||
|
||||
meta := domain.BookMeta{
|
||||
Slug: slugFromURL(bookURL),
|
||||
Title: title,
|
||||
Cover: cover,
|
||||
SourceURL: bookURL,
|
||||
Ranking: rank,
|
||||
}
|
||||
rank++
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case entries <- meta:
|
||||
}
|
||||
}
|
||||
|
||||
if !hasNextPageLink(root) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return entries, errs
|
||||
}
|
||||
|
||||
// ── helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
func resolveURL(base, href string) string { return htmlutil.ResolveURL(base, href) }
|
||||
|
||||
func hasNextPageLink(root *html.Node) bool {
|
||||
links := htmlutil.FindAll(root, scraper.Selector{Tag: "a", Multiple: true})
|
||||
for _, a := range links {
|
||||
for _, attr := range a.Attr {
|
||||
if attr.Key == "rel" && attr.Val == "next" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func slugFromURL(bookURL string) string {
|
||||
u, err := url.Parse(bookURL)
|
||||
if err != nil {
|
||||
return bookURL
|
||||
}
|
||||
parts := strings.Split(strings.Trim(u.Path, "/"), "/")
|
||||
if len(parts) >= 2 && parts[0] == "book" {
|
||||
return parts[1]
|
||||
}
|
||||
if len(parts) > 0 {
|
||||
return parts[len(parts)-1]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func parseChapterCount(s string) int {
|
||||
s = strings.ReplaceAll(s, ",", "")
|
||||
fields := strings.Fields(s)
|
||||
if len(fields) == 0 {
|
||||
return 0
|
||||
}
|
||||
n, _ := strconv.Atoi(fields[0])
|
||||
return n
|
||||
}
|
||||
|
||||
func chapterNumberFromURL(chapterURL string) int {
|
||||
u, err := url.Parse(chapterURL)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
seg := path.Base(u.Path)
|
||||
seg = strings.TrimPrefix(seg, "chapter-")
|
||||
seg = strings.TrimPrefix(seg, "chap-")
|
||||
seg = strings.TrimPrefix(seg, "ch-")
|
||||
digits := strings.FieldsFunc(seg, func(r rune) bool {
|
||||
return r < '0' || r > '9'
|
||||
})
|
||||
if len(digits) == 0 {
|
||||
return 0
|
||||
}
|
||||
n, _ := strconv.Atoi(digits[0])
|
||||
return n
|
||||
}
|
||||
|
||||
// retryGet calls client.GetContent up to maxAttempts times with exponential backoff.
|
||||
// If the server returns 429 (ErrRateLimit), the suggested Retry-After delay is used
|
||||
// instead of the geometric backoff delay.
|
||||
func retryGet(
|
||||
ctx context.Context,
|
||||
log *slog.Logger,
|
||||
client browser.Client,
|
||||
pageURL string,
|
||||
maxAttempts int,
|
||||
baseDelay time.Duration,
|
||||
) (string, error) {
|
||||
var lastErr error
|
||||
delay := baseDelay
|
||||
for attempt := 1; attempt <= maxAttempts; attempt++ {
|
||||
raw, err := client.GetContent(ctx, pageURL)
|
||||
if err == nil {
|
||||
return raw, nil
|
||||
}
|
||||
lastErr = err
|
||||
if ctx.Err() != nil {
|
||||
return "", err
|
||||
}
|
||||
if attempt < maxAttempts {
|
||||
// If the server is rate-limiting us, honour its Retry-After delay.
|
||||
waitFor := delay
|
||||
var rlErr *browser.RateLimitError
|
||||
if errors.As(err, &rlErr) {
|
||||
waitFor = rlErr.RetryAfter
|
||||
if log != nil {
|
||||
log.Warn("rate limited, backing off",
|
||||
"url", pageURL, "attempt", attempt, "retry_in", waitFor)
|
||||
}
|
||||
} else {
|
||||
if log != nil {
|
||||
log.Warn("fetch failed, retrying",
|
||||
"url", pageURL, "attempt", attempt, "retry_in", delay, "err", err)
|
||||
}
|
||||
delay *= 2
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return "", ctx.Err()
|
||||
case <-time.After(waitFor):
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", lastErr
|
||||
}
|
||||
180
backend/internal/novelfire/scraper_test.go
Normal file
180
backend/internal/novelfire/scraper_test.go
Normal file
@@ -0,0 +1,180 @@
|
||||
package novelfire
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSlugFromURL(t *testing.T) {
|
||||
cases := []struct {
|
||||
url string
|
||||
want string
|
||||
}{
|
||||
{"https://novelfire.net/book/shadow-slave", "shadow-slave"},
|
||||
{"https://novelfire.net/book/a-dragon-against-the-whole-world", "a-dragon-against-the-whole-world"},
|
||||
{"https://novelfire.net/book/foo/chapter-1", "foo"},
|
||||
{"https://novelfire.net/", ""},
|
||||
{"not-a-url", "not-a-url"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := slugFromURL(c.url)
|
||||
if got != c.want {
|
||||
t.Errorf("slugFromURL(%q) = %q, want %q", c.url, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestChapterNumberFromURL(t *testing.T) {
|
||||
cases := []struct {
|
||||
url string
|
||||
want int
|
||||
}{
|
||||
{"https://novelfire.net/book/shadow-slave/chapter-42", 42},
|
||||
{"https://novelfire.net/book/shadow-slave/chapter-1000", 1000},
|
||||
{"https://novelfire.net/book/shadow-slave/chap-7", 7},
|
||||
{"https://novelfire.net/book/shadow-slave/ch-3", 3},
|
||||
{"https://novelfire.net/book/shadow-slave/42", 42},
|
||||
{"https://novelfire.net/book/shadow-slave/no-number-here", 0},
|
||||
{"not-a-url", 0},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := chapterNumberFromURL(c.url)
|
||||
if got != c.want {
|
||||
t.Errorf("chapterNumberFromURL(%q) = %d, want %d", c.url, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseChapterCount(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want int
|
||||
}{
|
||||
{"123 Chapters", 123},
|
||||
{"1,234 Chapters", 1234},
|
||||
{"0", 0},
|
||||
{"", 0},
|
||||
{"500", 500},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := parseChapterCount(c.in)
|
||||
if got != c.want {
|
||||
t.Errorf("parseChapterCount(%q) = %d, want %d", c.in, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRetryGet_ContextCancellation(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel() // cancel immediately
|
||||
|
||||
stub := newStubClient()
|
||||
stub.setError("https://example.com/page", context.Canceled)
|
||||
|
||||
_, err := retryGet(ctx, nil, stub, "https://example.com/page", 3, 0)
|
||||
if err == nil {
|
||||
t.Fatal("expected error on cancelled context")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRetryGet_EventualSuccess(t *testing.T) {
|
||||
stub := newStubClient()
|
||||
calls := 0
|
||||
stub.setFn("https://example.com/page", func() (string, error) {
|
||||
calls++
|
||||
if calls < 3 {
|
||||
return "", context.DeadlineExceeded
|
||||
}
|
||||
return "<html>ok</html>", nil
|
||||
})
|
||||
|
||||
got, err := retryGet(context.Background(), nil, stub, "https://example.com/page", 5, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if got != "<html>ok</html>" {
|
||||
t.Errorf("got %q, want html", got)
|
||||
}
|
||||
if calls != 3 {
|
||||
t.Errorf("expected 3 calls, got %d", calls)
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseMetadataSelectors verifies that the status and genres selectors
|
||||
// match the current novelfire.net HTML structure.
|
||||
func TestParseMetadataSelectors(t *testing.T) {
|
||||
// Minimal HTML reproducing the relevant novelfire.net book page structure.
|
||||
const html = `<!DOCTYPE html>
|
||||
<html><body>
|
||||
<h1 class="novel-title">Shadow Slave</h1>
|
||||
<span class="author">Guiltythree</span>
|
||||
<figure class="cover"><img src="https://cdn.example.com/cover.jpg"></figure>
|
||||
<div class="header-stats">
|
||||
<span><strong>123</strong><small>Chapters</small></span>
|
||||
<span> <strong class="ongoing">Ongoing</strong> <small>Status</small></span>
|
||||
</div>
|
||||
<div class="categories">
|
||||
<h4>Genres</h4>
|
||||
<ul>
|
||||
<li><a href="/genre-fantasy/..." class="property-item">Fantasy</a></li>
|
||||
<li><a href="/genre-action/..." class="property-item">Action</a></li>
|
||||
<li><a href="/genre-adventure/..." class="property-item">Adventure</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<span class="chapter-count">123 Chapters</span>
|
||||
</body></html>`
|
||||
|
||||
stub := newStubClient()
|
||||
stub.setFn("https://novelfire.net/book/shadow-slave", func() (string, error) {
|
||||
return html, nil
|
||||
})
|
||||
|
||||
s := &Scraper{client: stub, log: slog.Default()}
|
||||
meta, err := s.ScrapeMetadata(t.Context(), "https://novelfire.net/book/shadow-slave")
|
||||
if err != nil {
|
||||
t.Fatalf("ScrapeMetadata: %v", err)
|
||||
}
|
||||
|
||||
if meta.Status != "ongoing" {
|
||||
t.Errorf("status = %q, want %q", meta.Status, "ongoing")
|
||||
}
|
||||
|
||||
wantGenres := []string{"fantasy", "action", "adventure"}
|
||||
if len(meta.Genres) != len(wantGenres) {
|
||||
t.Fatalf("genres = %v, want %v", meta.Genres, wantGenres)
|
||||
}
|
||||
for i, g := range meta.Genres {
|
||||
if g != wantGenres[i] {
|
||||
t.Errorf("genres[%d] = %q, want %q", i, g, wantGenres[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── minimal stub client for tests ─────────────────────────────────────────────
|
||||
|
||||
type stubClient struct {
|
||||
errors map[string]error
|
||||
fns map[string]func() (string, error)
|
||||
}
|
||||
|
||||
func newStubClient() *stubClient {
|
||||
return &stubClient{
|
||||
errors: make(map[string]error),
|
||||
fns: make(map[string]func() (string, error)),
|
||||
}
|
||||
}
|
||||
|
||||
func (s *stubClient) setError(u string, err error) { s.errors[u] = err }
|
||||
|
||||
func (s *stubClient) setFn(u string, fn func() (string, error)) { s.fns[u] = fn }
|
||||
|
||||
func (s *stubClient) GetContent(_ context.Context, pageURL string) (string, error) {
|
||||
if fn, ok := s.fns[pageURL]; ok {
|
||||
return fn()
|
||||
}
|
||||
if err, ok := s.errors[pageURL]; ok {
|
||||
return "", err
|
||||
}
|
||||
return "", context.DeadlineExceeded
|
||||
}
|
||||
222
backend/internal/orchestrator/orchestrator.go
Normal file
222
backend/internal/orchestrator/orchestrator.go
Normal file
@@ -0,0 +1,222 @@
|
||||
// Package orchestrator coordinates metadata extraction, chapter-list fetching,
|
||||
// and parallel chapter scraping for a single book.
|
||||
//
|
||||
// Design:
|
||||
// - RunBook scrapes one book (metadata + chapter list + chapter texts) end-to-end.
|
||||
// - N worker goroutines pull chapter refs from a shared queue and call ScrapeChapterText.
|
||||
// - The caller (runner poll loop) owns the outer task-claim / finish cycle.
|
||||
// - An optional PostMetadata hook (set in Config) is called after WriteMetadata
|
||||
// succeeds. The runner uses this to upsert books into Meilisearch.
|
||||
package orchestrator
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"runtime"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/libnovel/backend/internal/bookstore"
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
"github.com/libnovel/backend/internal/scraper"
|
||||
)
|
||||
|
||||
// Config holds tunable parameters for the orchestrator.
|
||||
type Config struct {
|
||||
// Workers is the number of goroutines used to scrape chapters in parallel.
|
||||
// Defaults to runtime.NumCPU() when 0.
|
||||
Workers int
|
||||
// PostMetadata is an optional hook called with the scraped BookMeta after
|
||||
// WriteMetadata succeeds. Errors from the hook are logged but not fatal.
|
||||
// Used by the runner to index books in Meilisearch.
|
||||
PostMetadata func(ctx context.Context, meta domain.BookMeta)
|
||||
}
|
||||
|
||||
// Orchestrator runs a single-book scrape pipeline.
|
||||
type Orchestrator struct {
|
||||
novel scraper.NovelScraper
|
||||
store bookstore.BookWriter
|
||||
log *slog.Logger
|
||||
workers int
|
||||
postMetadata func(ctx context.Context, meta domain.BookMeta)
|
||||
}
|
||||
|
||||
// New returns a new Orchestrator.
|
||||
func New(cfg Config, novel scraper.NovelScraper, store bookstore.BookWriter, log *slog.Logger) *Orchestrator {
|
||||
if log == nil {
|
||||
log = slog.Default()
|
||||
}
|
||||
workers := cfg.Workers
|
||||
if workers <= 0 {
|
||||
workers = runtime.NumCPU()
|
||||
}
|
||||
return &Orchestrator{
|
||||
novel: novel,
|
||||
store: store,
|
||||
log: log,
|
||||
workers: workers,
|
||||
postMetadata: cfg.PostMetadata,
|
||||
}
|
||||
}
|
||||
|
||||
// RunBook scrapes a single book described by task. It handles:
|
||||
// 1. Metadata scrape + write
|
||||
// 2. Chapter list scrape + write
|
||||
// 3. Parallel chapter text scrape + write (worker pool)
|
||||
//
|
||||
// Returns a ScrapeResult with counters. The result's ErrorMessage is non-empty
|
||||
// if the run failed at the metadata or chapter-list level.
|
||||
func (o *Orchestrator) RunBook(ctx context.Context, task domain.ScrapeTask) domain.ScrapeResult {
|
||||
o.log.Info("orchestrator: RunBook starting",
|
||||
"task_id", task.ID,
|
||||
"kind", task.Kind,
|
||||
"url", task.TargetURL,
|
||||
"workers", o.workers,
|
||||
)
|
||||
|
||||
var result domain.ScrapeResult
|
||||
|
||||
if task.TargetURL == "" {
|
||||
result.ErrorMessage = "task has no target URL"
|
||||
return result
|
||||
}
|
||||
|
||||
// ── Step 1: Metadata ──────────────────────────────────────────────────────
|
||||
meta, err := o.novel.ScrapeMetadata(ctx, task.TargetURL)
|
||||
if err != nil {
|
||||
o.log.Error("metadata scrape failed", "url", task.TargetURL, "err", err)
|
||||
result.ErrorMessage = fmt.Sprintf("metadata: %v", err)
|
||||
result.Errors++
|
||||
return result
|
||||
}
|
||||
|
||||
if err := o.store.WriteMetadata(ctx, meta); err != nil {
|
||||
o.log.Error("metadata write failed", "slug", meta.Slug, "err", err)
|
||||
// non-fatal: continue to chapters
|
||||
result.Errors++
|
||||
} else {
|
||||
result.BooksFound = 1
|
||||
// Fire optional post-metadata hook (e.g. Meilisearch indexing).
|
||||
if o.postMetadata != nil {
|
||||
o.postMetadata(ctx, meta)
|
||||
}
|
||||
}
|
||||
|
||||
o.log.Info("metadata saved", "slug", meta.Slug, "title", meta.Title)
|
||||
|
||||
// ── Step 2: Chapter list ──────────────────────────────────────────────────
|
||||
refs, err := o.novel.ScrapeChapterList(ctx, task.TargetURL, task.ToChapter)
|
||||
if err != nil {
|
||||
o.log.Error("chapter list scrape failed", "slug", meta.Slug, "err", err)
|
||||
result.ErrorMessage = fmt.Sprintf("chapter list: %v", err)
|
||||
result.Errors++
|
||||
return result
|
||||
}
|
||||
|
||||
o.log.Info("chapter list fetched", "slug", meta.Slug, "chapters", len(refs))
|
||||
|
||||
// Persist chapter refs (without text) so the index exists early.
|
||||
if wErr := o.store.WriteChapterRefs(ctx, meta.Slug, refs); wErr != nil {
|
||||
o.log.Warn("chapter refs write failed", "slug", meta.Slug, "err", wErr)
|
||||
}
|
||||
|
||||
// ── Step 3: Chapter texts (worker pool) ───────────────────────────────────
|
||||
type chapterJob struct {
|
||||
slug string
|
||||
ref domain.ChapterRef
|
||||
total int // total chapters to scrape (for progress logging)
|
||||
}
|
||||
work := make(chan chapterJob, o.workers*4)
|
||||
|
||||
var scraped, skipped, errors atomic.Int64
|
||||
var wg sync.WaitGroup
|
||||
|
||||
for i := 0; i < o.workers; i++ {
|
||||
wg.Add(1)
|
||||
go func(workerID int) {
|
||||
defer wg.Done()
|
||||
for job := range work {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
if o.store.ChapterExists(ctx, job.slug, job.ref) {
|
||||
o.log.Debug("chapter already exists, skipping",
|
||||
"slug", job.slug, "chapter", job.ref.Number)
|
||||
skipped.Add(1)
|
||||
continue
|
||||
}
|
||||
|
||||
ch, err := o.novel.ScrapeChapterText(ctx, job.ref)
|
||||
if err != nil {
|
||||
o.log.Error("chapter scrape failed",
|
||||
"slug", job.slug, "chapter", job.ref.Number, "err", err)
|
||||
errors.Add(1)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := o.store.WriteChapter(ctx, job.slug, ch); err != nil {
|
||||
o.log.Error("chapter write failed",
|
||||
"slug", job.slug, "chapter", job.ref.Number, "err", err)
|
||||
errors.Add(1)
|
||||
continue
|
||||
}
|
||||
|
||||
n := scraped.Add(1)
|
||||
// Log a progress summary every 25 chapters scraped.
|
||||
if n%25 == 0 {
|
||||
o.log.Info("scraping chapters",
|
||||
"slug", job.slug, "scraped", n, "total", job.total)
|
||||
}
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
|
||||
// Count how many chapters will actually be enqueued (for progress logging).
|
||||
toScrape := 0
|
||||
for _, ref := range refs {
|
||||
if task.FromChapter > 0 && ref.Number < task.FromChapter {
|
||||
continue
|
||||
}
|
||||
if task.ToChapter > 0 && ref.Number > task.ToChapter {
|
||||
continue
|
||||
}
|
||||
toScrape++
|
||||
}
|
||||
|
||||
// Enqueue chapter jobs respecting the optional range filter from the task.
|
||||
for _, ref := range refs {
|
||||
if task.FromChapter > 0 && ref.Number < task.FromChapter {
|
||||
skipped.Add(1)
|
||||
continue
|
||||
}
|
||||
if task.ToChapter > 0 && ref.Number > task.ToChapter {
|
||||
skipped.Add(1)
|
||||
continue
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
goto drain
|
||||
case work <- chapterJob{slug: meta.Slug, ref: ref, total: toScrape}:
|
||||
}
|
||||
}
|
||||
|
||||
drain:
|
||||
close(work)
|
||||
wg.Wait()
|
||||
|
||||
result.ChaptersScraped = int(scraped.Load())
|
||||
result.ChaptersSkipped = int(skipped.Load())
|
||||
result.Errors += int(errors.Load())
|
||||
|
||||
o.log.Info("book scrape finished",
|
||||
"slug", meta.Slug,
|
||||
"scraped", result.ChaptersScraped,
|
||||
"skipped", result.ChaptersSkipped,
|
||||
"errors", result.Errors,
|
||||
)
|
||||
return result
|
||||
}
|
||||
210
backend/internal/orchestrator/orchestrator_test.go
Normal file
210
backend/internal/orchestrator/orchestrator_test.go
Normal file
@@ -0,0 +1,210 @@
|
||||
package orchestrator
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
)
|
||||
|
||||
// ── stubs ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
type stubScraper struct {
|
||||
meta domain.BookMeta
|
||||
metaErr error
|
||||
refs []domain.ChapterRef
|
||||
refsErr error
|
||||
chapters map[int]domain.Chapter
|
||||
chapErr map[int]error
|
||||
}
|
||||
|
||||
func (s *stubScraper) SourceName() string { return "stub" }
|
||||
|
||||
func (s *stubScraper) ScrapeCatalogue(ctx context.Context) (<-chan domain.CatalogueEntry, <-chan error) {
|
||||
ch := make(chan domain.CatalogueEntry)
|
||||
errs := make(chan error)
|
||||
close(ch)
|
||||
close(errs)
|
||||
return ch, errs
|
||||
}
|
||||
|
||||
func (s *stubScraper) ScrapeMetadata(_ context.Context, _ string) (domain.BookMeta, error) {
|
||||
return s.meta, s.metaErr
|
||||
}
|
||||
|
||||
func (s *stubScraper) ScrapeChapterList(_ context.Context, _ string, _ int) ([]domain.ChapterRef, error) {
|
||||
return s.refs, s.refsErr
|
||||
}
|
||||
|
||||
func (s *stubScraper) ScrapeChapterText(_ context.Context, ref domain.ChapterRef) (domain.Chapter, error) {
|
||||
if s.chapErr != nil {
|
||||
if err, ok := s.chapErr[ref.Number]; ok {
|
||||
return domain.Chapter{}, err
|
||||
}
|
||||
}
|
||||
if s.chapters != nil {
|
||||
if ch, ok := s.chapters[ref.Number]; ok {
|
||||
return ch, nil
|
||||
}
|
||||
}
|
||||
return domain.Chapter{Ref: ref, Text: "text"}, nil
|
||||
}
|
||||
|
||||
func (s *stubScraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan domain.BookMeta, <-chan error) {
|
||||
ch := make(chan domain.BookMeta)
|
||||
errs := make(chan error)
|
||||
close(ch)
|
||||
close(errs)
|
||||
return ch, errs
|
||||
}
|
||||
|
||||
type stubStore struct {
|
||||
mu sync.Mutex
|
||||
metaWritten []domain.BookMeta
|
||||
chaptersWritten []domain.Chapter
|
||||
existing map[string]bool // "slug:N" → exists
|
||||
writeMetaErr error
|
||||
}
|
||||
|
||||
func (s *stubStore) WriteMetadata(_ context.Context, meta domain.BookMeta) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.writeMetaErr != nil {
|
||||
return s.writeMetaErr
|
||||
}
|
||||
s.metaWritten = append(s.metaWritten, meta)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *stubStore) WriteChapter(_ context.Context, slug string, ch domain.Chapter) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.chaptersWritten = append(s.chaptersWritten, ch)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *stubStore) WriteChapterRefs(_ context.Context, _ string, _ []domain.ChapterRef) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *stubStore) ChapterExists(_ context.Context, slug string, ref domain.ChapterRef) bool {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
key := slug + ":" + string(rune('0'+ref.Number))
|
||||
return s.existing[key]
|
||||
}
|
||||
|
||||
// ── tests ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
func TestRunBook_HappyPath(t *testing.T) {
|
||||
sc := &stubScraper{
|
||||
meta: domain.BookMeta{Slug: "test-book", Title: "Test Book", SourceURL: "https://example.com/book/test-book"},
|
||||
refs: []domain.ChapterRef{
|
||||
{Number: 1, Title: "Ch 1", URL: "https://example.com/book/test-book/chapter-1"},
|
||||
{Number: 2, Title: "Ch 2", URL: "https://example.com/book/test-book/chapter-2"},
|
||||
{Number: 3, Title: "Ch 3", URL: "https://example.com/book/test-book/chapter-3"},
|
||||
},
|
||||
}
|
||||
st := &stubStore{}
|
||||
o := New(Config{Workers: 2}, sc, st, nil)
|
||||
|
||||
task := domain.ScrapeTask{
|
||||
ID: "t1",
|
||||
Kind: "book",
|
||||
TargetURL: "https://example.com/book/test-book",
|
||||
}
|
||||
|
||||
result := o.RunBook(context.Background(), task)
|
||||
|
||||
if result.ErrorMessage != "" {
|
||||
t.Fatalf("unexpected error: %s", result.ErrorMessage)
|
||||
}
|
||||
if result.BooksFound != 1 {
|
||||
t.Errorf("BooksFound = %d, want 1", result.BooksFound)
|
||||
}
|
||||
if result.ChaptersScraped != 3 {
|
||||
t.Errorf("ChaptersScraped = %d, want 3", result.ChaptersScraped)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunBook_MetadataError(t *testing.T) {
|
||||
sc := &stubScraper{metaErr: errors.New("404 not found")}
|
||||
st := &stubStore{}
|
||||
o := New(Config{Workers: 1}, sc, st, nil)
|
||||
|
||||
result := o.RunBook(context.Background(), domain.ScrapeTask{
|
||||
ID: "t2",
|
||||
TargetURL: "https://example.com/book/missing",
|
||||
})
|
||||
|
||||
if result.ErrorMessage == "" {
|
||||
t.Fatal("expected ErrorMessage to be set")
|
||||
}
|
||||
if result.Errors != 1 {
|
||||
t.Errorf("Errors = %d, want 1", result.Errors)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunBook_ChapterRange(t *testing.T) {
|
||||
sc := &stubScraper{
|
||||
meta: domain.BookMeta{Slug: "range-book", SourceURL: "https://example.com/book/range-book"},
|
||||
refs: func() []domain.ChapterRef {
|
||||
var refs []domain.ChapterRef
|
||||
for i := 1; i <= 10; i++ {
|
||||
refs = append(refs, domain.ChapterRef{Number: i, URL: "https://example.com/book/range-book/chapter-" + string(rune('0'+i))})
|
||||
}
|
||||
return refs
|
||||
}(),
|
||||
}
|
||||
st := &stubStore{}
|
||||
o := New(Config{Workers: 2}, sc, st, nil)
|
||||
|
||||
result := o.RunBook(context.Background(), domain.ScrapeTask{
|
||||
ID: "t3",
|
||||
TargetURL: "https://example.com/book/range-book",
|
||||
FromChapter: 3,
|
||||
ToChapter: 7,
|
||||
})
|
||||
|
||||
if result.ErrorMessage != "" {
|
||||
t.Fatalf("unexpected error: %s", result.ErrorMessage)
|
||||
}
|
||||
// chapters 3–7 = 5 scraped, chapters 1-2 and 8-10 = 5 skipped
|
||||
if result.ChaptersScraped != 5 {
|
||||
t.Errorf("ChaptersScraped = %d, want 5", result.ChaptersScraped)
|
||||
}
|
||||
if result.ChaptersSkipped != 5 {
|
||||
t.Errorf("ChaptersSkipped = %d, want 5", result.ChaptersSkipped)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunBook_ContextCancellation(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
|
||||
sc := &stubScraper{
|
||||
meta: domain.BookMeta{Slug: "ctx-book", SourceURL: "https://example.com/book/ctx-book"},
|
||||
refs: []domain.ChapterRef{
|
||||
{Number: 1, URL: "https://example.com/book/ctx-book/chapter-1"},
|
||||
},
|
||||
}
|
||||
st := &stubStore{}
|
||||
o := New(Config{Workers: 1}, sc, st, nil)
|
||||
|
||||
// Should not panic; result may have errors or zero chapters.
|
||||
result := o.RunBook(ctx, domain.ScrapeTask{
|
||||
ID: "t4",
|
||||
TargetURL: "https://example.com/book/ctx-book",
|
||||
})
|
||||
_ = result
|
||||
}
|
||||
|
||||
func TestRunBook_EmptyTargetURL(t *testing.T) {
|
||||
o := New(Config{Workers: 1}, &stubScraper{}, &stubStore{}, nil)
|
||||
result := o.RunBook(context.Background(), domain.ScrapeTask{ID: "t5"})
|
||||
if result.ErrorMessage == "" {
|
||||
t.Fatal("expected ErrorMessage for empty target URL")
|
||||
}
|
||||
}
|
||||
120
backend/internal/otelsetup/otelsetup.go
Normal file
120
backend/internal/otelsetup/otelsetup.go
Normal file
@@ -0,0 +1,120 @@
|
||||
// Package otelsetup initialises the OpenTelemetry SDK for the LibNovel backend.
|
||||
//
|
||||
// It reads two environment variables:
|
||||
//
|
||||
// OTEL_EXPORTER_OTLP_ENDPOINT — OTLP/HTTP endpoint; accepts either a full
|
||||
// URL ("https://otel.example.com") or a bare
|
||||
// host[:port] ("otel-collector:4318").
|
||||
// TLS is used when the value starts with "https://".
|
||||
// OTEL_SERVICE_NAME — service name reported in traces (default: "backend")
|
||||
//
|
||||
// When OTEL_EXPORTER_OTLP_ENDPOINT is empty the function is a no-op: it
|
||||
// returns a nil shutdown func and the default slog.Logger, so callers never
|
||||
// need to branch on it.
|
||||
//
|
||||
// Usage in main.go:
|
||||
//
|
||||
// shutdown, log, err := otelsetup.Init(ctx, version)
|
||||
// if err != nil { return err }
|
||||
// if shutdown != nil { defer shutdown() }
|
||||
package otelsetup
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/contrib/bridges/otelslog"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
|
||||
otellog "go.opentelemetry.io/otel/log/global"
|
||||
"go.opentelemetry.io/otel/sdk/log"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
sdktrace "go.opentelemetry.io/otel/sdk/trace"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
|
||||
)
|
||||
|
||||
// Init sets up TracerProvider and LoggerProvider that export via OTLP/HTTP.
|
||||
//
|
||||
// Returns:
|
||||
// - shutdown: flushes and stops both providers (nil when OTel is disabled).
|
||||
// - logger: an slog.Logger bridged to OTel logs (falls back to default when disabled).
|
||||
// - err: non-nil only on SDK initialisation failure.
|
||||
func Init(ctx context.Context, version string) (shutdown func(), logger *slog.Logger, err error) {
|
||||
rawEndpoint := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
|
||||
if rawEndpoint == "" {
|
||||
return nil, slog.Default(), nil // OTel disabled — not an error
|
||||
}
|
||||
|
||||
// WithEndpoint expects a host[:port] value — no scheme.
|
||||
// Support both "https://otel.example.com" and "otel-collector:4318".
|
||||
useTLS := strings.HasPrefix(rawEndpoint, "https://")
|
||||
endpoint := strings.TrimPrefix(rawEndpoint, "https://")
|
||||
endpoint = strings.TrimPrefix(endpoint, "http://")
|
||||
|
||||
serviceName := os.Getenv("OTEL_SERVICE_NAME")
|
||||
if serviceName == "" {
|
||||
serviceName = "backend"
|
||||
}
|
||||
|
||||
// ── Shared resource ───────────────────────────────────────────────────────
|
||||
res, err := resource.New(ctx,
|
||||
resource.WithAttributes(
|
||||
semconv.ServiceName(serviceName),
|
||||
semconv.ServiceVersion(version),
|
||||
),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, slog.Default(), fmt.Errorf("otelsetup: create resource: %w", err)
|
||||
}
|
||||
|
||||
// ── Trace provider ────────────────────────────────────────────────────────
|
||||
traceOpts := []otlptracehttp.Option{otlptracehttp.WithEndpoint(endpoint)}
|
||||
if !useTLS {
|
||||
traceOpts = append(traceOpts, otlptracehttp.WithInsecure())
|
||||
}
|
||||
traceExp, err := otlptracehttp.New(ctx, traceOpts...)
|
||||
if err != nil {
|
||||
return nil, slog.Default(), fmt.Errorf("otelsetup: create OTLP trace exporter: %w", err)
|
||||
}
|
||||
|
||||
tp := sdktrace.NewTracerProvider(
|
||||
sdktrace.WithBatcher(traceExp),
|
||||
sdktrace.WithResource(res),
|
||||
sdktrace.WithSampler(sdktrace.ParentBased(sdktrace.TraceIDRatioBased(0.2))),
|
||||
)
|
||||
otel.SetTracerProvider(tp)
|
||||
|
||||
// ── Log provider ──────────────────────────────────────────────────────────
|
||||
logOpts := []otlploghttp.Option{otlploghttp.WithEndpoint(endpoint)}
|
||||
if !useTLS {
|
||||
logOpts = append(logOpts, otlploghttp.WithInsecure())
|
||||
}
|
||||
logExp, err := otlploghttp.New(ctx, logOpts...)
|
||||
if err != nil {
|
||||
return nil, slog.Default(), fmt.Errorf("otelsetup: create OTLP log exporter: %w", err)
|
||||
}
|
||||
|
||||
lp := log.NewLoggerProvider(
|
||||
log.WithProcessor(log.NewBatchProcessor(logExp)),
|
||||
log.WithResource(res),
|
||||
)
|
||||
otellog.SetLoggerProvider(lp)
|
||||
|
||||
// Bridge slog → OTel logs. Structured fields and trace IDs are forwarded
|
||||
// automatically; Grafana can correlate log lines with Tempo traces.
|
||||
otelLogger := otelslog.NewLogger(serviceName)
|
||||
|
||||
shutdown = func() {
|
||||
shutCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
_ = tp.Shutdown(shutCtx)
|
||||
_ = lp.Shutdown(shutCtx)
|
||||
}
|
||||
|
||||
return shutdown, otelLogger, nil
|
||||
}
|
||||
254
backend/internal/pockettts/client.go
Normal file
254
backend/internal/pockettts/client.go
Normal file
@@ -0,0 +1,254 @@
|
||||
// Package pockettts provides a client for the kyutai-labs/pocket-tts TTS service.
|
||||
//
|
||||
// pocket-tts exposes a non-OpenAI API:
|
||||
//
|
||||
// POST /tts (multipart form: text, voice_url) → streaming WAV
|
||||
// GET /health → {"status":"healthy"}
|
||||
//
|
||||
// GenerateAudio streams the WAV response and transcodes it to MP3 using ffmpeg,
|
||||
// so callers receive MP3 bytes — the same format as the kokoro client — and the
|
||||
// rest of the pipeline does not need to care which TTS engine was used.
|
||||
//
|
||||
// StreamAudioMP3 is the streaming variant: it returns an io.ReadCloser that
|
||||
// yields MP3-encoded audio incrementally as pocket-tts generates it, without
|
||||
// buffering the full output.
|
||||
//
|
||||
// Predefined voices (pass the bare name as the voice parameter):
|
||||
//
|
||||
// alba, marius, javert, jean, fantine, cosette, eponine, azelma,
|
||||
// anna, vera, charles, paul, george, mary, jane, michael, eve,
|
||||
// bill_boerst, peter_yearsley, stuart_bell
|
||||
package pockettts
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// PredefinedVoices is the set of voice names built into pocket-tts.
|
||||
// The runner uses this to decide which TTS engine to route a task to.
|
||||
var PredefinedVoices = map[string]struct{}{
|
||||
"alba": {}, "marius": {}, "javert": {}, "jean": {},
|
||||
"fantine": {}, "cosette": {}, "eponine": {}, "azelma": {},
|
||||
"anna": {}, "vera": {}, "charles": {}, "paul": {},
|
||||
"george": {}, "mary": {}, "jane": {}, "michael": {},
|
||||
"eve": {}, "bill_boerst": {}, "peter_yearsley": {}, "stuart_bell": {},
|
||||
}
|
||||
|
||||
// IsPocketTTSVoice reports whether voice is served by pocket-tts.
|
||||
func IsPocketTTSVoice(voice string) bool {
|
||||
_, ok := PredefinedVoices[voice]
|
||||
return ok
|
||||
}
|
||||
|
||||
// Client is the interface for interacting with the pocket-tts service.
|
||||
type Client interface {
|
||||
// GenerateAudio synthesises text using the given voice and returns MP3 bytes.
|
||||
// Voice must be one of the predefined pocket-tts voice names.
|
||||
GenerateAudio(ctx context.Context, text, voice string) ([]byte, error)
|
||||
|
||||
// StreamAudioMP3 synthesises text and returns an io.ReadCloser that streams
|
||||
// MP3-encoded audio incrementally via a live ffmpeg transcode pipe.
|
||||
// The caller must always close the returned ReadCloser.
|
||||
StreamAudioMP3(ctx context.Context, text, voice string) (io.ReadCloser, error)
|
||||
|
||||
// StreamAudioWAV synthesises text and returns an io.ReadCloser that streams
|
||||
// raw WAV audio directly from pocket-tts without any transcoding.
|
||||
// The stream begins with a WAV header followed by 16-bit PCM frames at 16 kHz.
|
||||
// The caller must always close the returned ReadCloser.
|
||||
StreamAudioWAV(ctx context.Context, text, voice string) (io.ReadCloser, error)
|
||||
|
||||
// ListVoices returns the available predefined voice names.
|
||||
ListVoices(ctx context.Context) ([]string, error)
|
||||
}
|
||||
|
||||
// httpClient is the concrete pocket-tts HTTP client.
|
||||
type httpClient struct {
|
||||
baseURL string
|
||||
http *http.Client
|
||||
}
|
||||
|
||||
// New returns a Client targeting baseURL (e.g. "https://pocket-tts.libnovel.cc").
|
||||
func New(baseURL string) Client {
|
||||
return &httpClient{
|
||||
baseURL: strings.TrimRight(baseURL, "/"),
|
||||
http: &http.Client{Timeout: 10 * time.Minute},
|
||||
}
|
||||
}
|
||||
|
||||
// GenerateAudio posts to POST /tts and transcodes the WAV response to MP3
|
||||
// using the system ffmpeg binary. Requires ffmpeg to be on PATH (available in
|
||||
// the runner Docker image via Alpine's ffmpeg package).
|
||||
func (c *httpClient) GenerateAudio(ctx context.Context, text, voice string) ([]byte, error) {
|
||||
if text == "" {
|
||||
return nil, fmt.Errorf("pockettts: empty text")
|
||||
}
|
||||
if voice == "" {
|
||||
voice = "alba"
|
||||
}
|
||||
|
||||
resp, err := c.postTTS(ctx, text, voice)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
wavData, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("pockettts: read response body: %w", err)
|
||||
}
|
||||
|
||||
// ── Transcode WAV → MP3 via ffmpeg ────────────────────────────────────────
|
||||
mp3Data, err := wavToMP3(ctx, wavData)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("pockettts: transcode to mp3: %w", err)
|
||||
}
|
||||
return mp3Data, nil
|
||||
}
|
||||
|
||||
// StreamAudioMP3 posts to POST /tts and returns an io.ReadCloser that delivers
|
||||
// MP3 bytes as pocket-tts generates WAV frames. ffmpeg runs as a subprocess
|
||||
// with stdin connected to the live WAV stream and stdout piped to the caller.
|
||||
// The caller must always close the returned ReadCloser.
|
||||
func (c *httpClient) StreamAudioMP3(ctx context.Context, text, voice string) (io.ReadCloser, error) {
|
||||
if text == "" {
|
||||
return nil, fmt.Errorf("pockettts: empty text")
|
||||
}
|
||||
if voice == "" {
|
||||
voice = "alba"
|
||||
}
|
||||
|
||||
resp, err := c.postTTS(ctx, text, voice)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Start ffmpeg: read WAV from stdin (the live HTTP body), write MP3 to stdout.
|
||||
cmd := exec.CommandContext(ctx,
|
||||
"ffmpeg",
|
||||
"-hide_banner", "-loglevel", "error",
|
||||
"-i", "pipe:0", // WAV from stdin
|
||||
"-f", "mp3", // output format
|
||||
"-q:a", "2", // VBR ~190 kbps
|
||||
"pipe:1", // MP3 to stdout
|
||||
)
|
||||
cmd.Stdin = resp.Body
|
||||
|
||||
pr, pw := io.Pipe()
|
||||
cmd.Stdout = pw
|
||||
|
||||
var stderrBuf bytes.Buffer
|
||||
cmd.Stderr = &stderrBuf
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
resp.Body.Close()
|
||||
return nil, fmt.Errorf("pockettts: start ffmpeg: %w", err)
|
||||
}
|
||||
|
||||
// Close the write end of the pipe when ffmpeg exits, propagating any error.
|
||||
go func() {
|
||||
waitErr := cmd.Wait()
|
||||
resp.Body.Close()
|
||||
if waitErr != nil {
|
||||
pw.CloseWithError(fmt.Errorf("ffmpeg: %w (stderr: %s)", waitErr, stderrBuf.String()))
|
||||
} else {
|
||||
pw.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
return pr, nil
|
||||
}
|
||||
|
||||
// StreamAudioWAV posts to POST /tts and returns an io.ReadCloser that delivers
|
||||
// raw WAV bytes directly from pocket-tts — no ffmpeg transcoding required.
|
||||
// The first bytes will be a WAV header (RIFF/fmt chunk) followed by PCM frames.
|
||||
// The caller must always close the returned ReadCloser.
|
||||
func (c *httpClient) StreamAudioWAV(ctx context.Context, text, voice string) (io.ReadCloser, error) {
|
||||
if text == "" {
|
||||
return nil, fmt.Errorf("pockettts: empty text")
|
||||
}
|
||||
if voice == "" {
|
||||
voice = "alba"
|
||||
}
|
||||
|
||||
resp, err := c.postTTS(ctx, text, voice)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return resp.Body, nil
|
||||
}
|
||||
|
||||
// ListVoices returns the statically known predefined voice names.
|
||||
// pocket-tts has no REST endpoint for listing voices.
|
||||
func (c *httpClient) ListVoices(_ context.Context) ([]string, error) {
|
||||
voices := make([]string, 0, len(PredefinedVoices))
|
||||
for v := range PredefinedVoices {
|
||||
voices = append(voices, v)
|
||||
}
|
||||
return voices, nil
|
||||
}
|
||||
|
||||
// postTTS sends a multipart POST /tts request and returns the raw response.
|
||||
// The caller is responsible for closing resp.Body.
|
||||
func (c *httpClient) postTTS(ctx context.Context, text, voice string) (*http.Response, error) {
|
||||
var body bytes.Buffer
|
||||
mw := multipart.NewWriter(&body)
|
||||
|
||||
if err := mw.WriteField("text", text); err != nil {
|
||||
return nil, fmt.Errorf("pockettts: write text field: %w", err)
|
||||
}
|
||||
if err := mw.WriteField("voice_url", voice); err != nil {
|
||||
return nil, fmt.Errorf("pockettts: write voice_url field: %w", err)
|
||||
}
|
||||
if err := mw.Close(); err != nil {
|
||||
return nil, fmt.Errorf("pockettts: close multipart writer: %w", err)
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost,
|
||||
c.baseURL+"/tts", &body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("pockettts: build request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", mw.FormDataContentType())
|
||||
|
||||
resp, err := c.http.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("pockettts: request: %w", err)
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
_, _ = io.Copy(io.Discard, resp.Body)
|
||||
resp.Body.Close()
|
||||
return nil, fmt.Errorf("pockettts: server returned %d", resp.StatusCode)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// wavToMP3 converts raw WAV bytes to MP3 using ffmpeg.
|
||||
// ffmpeg reads from stdin (pipe:0) and writes to stdout (pipe:1).
|
||||
func wavToMP3(ctx context.Context, wav []byte) ([]byte, error) {
|
||||
cmd := exec.CommandContext(ctx,
|
||||
"ffmpeg",
|
||||
"-hide_banner", "-loglevel", "error",
|
||||
"-i", "pipe:0", // read WAV from stdin
|
||||
"-f", "mp3", // output format
|
||||
"-q:a", "2", // VBR quality ~190 kbps
|
||||
"pipe:1", // write MP3 to stdout
|
||||
)
|
||||
cmd.Stdin = bytes.NewReader(wav)
|
||||
|
||||
var out, stderr bytes.Buffer
|
||||
cmd.Stdout = &out
|
||||
cmd.Stderr = &stderr
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
return nil, fmt.Errorf("ffmpeg: %w (stderr: %s)", err, stderr.String())
|
||||
}
|
||||
return out.Bytes(), nil
|
||||
}
|
||||
96
backend/internal/presigncache/cache.go
Normal file
96
backend/internal/presigncache/cache.go
Normal file
@@ -0,0 +1,96 @@
|
||||
// Package presigncache provides a Valkey (Redis-compatible) backed cache for
|
||||
// MinIO presigned URLs. The backend generates presigned URLs and stores them
|
||||
// here with a TTL; subsequent requests for the same key return the cached URL
|
||||
// without re-contacting MinIO.
|
||||
//
|
||||
// Design:
|
||||
// - Cache is intentionally best-effort: Get returns ("", false, nil) on any
|
||||
// Valkey error, so callers always have a fallback path to regenerate.
|
||||
// - Set silently drops errors — a miss on the next request is acceptable.
|
||||
// - TTL should be set shorter than the actual presigned URL lifetime so that
|
||||
// cached URLs are always valid when served. Recommended: 55 minutes for a
|
||||
// 1-hour presigned URL.
|
||||
package presigncache
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
// Cache is the interface for presign URL caching.
|
||||
// Implementations must be safe for concurrent use.
|
||||
type Cache interface {
|
||||
// Get returns the cached URL for key. ok is false on cache miss or error.
|
||||
Get(ctx context.Context, key string) (url string, ok bool, err error)
|
||||
// Set stores url under key with the given TTL.
|
||||
Set(ctx context.Context, key, url string, ttl time.Duration) error
|
||||
// Delete removes key from the cache.
|
||||
Delete(ctx context.Context, key string) error
|
||||
}
|
||||
|
||||
// ValkeyCache is a Cache backed by Valkey / Redis via go-redis.
|
||||
type ValkeyCache struct {
|
||||
rdb *redis.Client
|
||||
}
|
||||
|
||||
// New creates a ValkeyCache connecting to addr (e.g. "valkey:6379").
|
||||
// The connection is not established until the first command; use Ping to
|
||||
// verify connectivity at startup.
|
||||
func New(addr string) *ValkeyCache {
|
||||
rdb := redis.NewClient(&redis.Options{
|
||||
Addr: addr,
|
||||
DialTimeout: 2 * time.Second,
|
||||
ReadTimeout: 1 * time.Second,
|
||||
WriteTimeout: 1 * time.Second,
|
||||
})
|
||||
return &ValkeyCache{rdb: rdb}
|
||||
}
|
||||
|
||||
// Ping checks connectivity. Call once at startup.
|
||||
func (c *ValkeyCache) Ping(ctx context.Context) error {
|
||||
if err := c.rdb.Ping(ctx).Err(); err != nil {
|
||||
return fmt.Errorf("presigncache: ping valkey: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get returns (url, true, nil) on hit, ("", false, nil) on miss, and
|
||||
// ("", false, err) only on unexpected errors (not redis.Nil).
|
||||
func (c *ValkeyCache) Get(ctx context.Context, key string) (string, bool, error) {
|
||||
val, err := c.rdb.Get(ctx, key).Result()
|
||||
if err == redis.Nil {
|
||||
return "", false, nil
|
||||
}
|
||||
if err != nil {
|
||||
return "", false, fmt.Errorf("presigncache: get %q: %w", key, err)
|
||||
}
|
||||
return val, true, nil
|
||||
}
|
||||
|
||||
// Set stores url under key with ttl. Errors are returned but are non-fatal
|
||||
// for callers — a Set failure means the next request will miss and regenerate.
|
||||
func (c *ValkeyCache) Set(ctx context.Context, key, url string, ttl time.Duration) error {
|
||||
if err := c.rdb.Set(ctx, key, url, ttl).Err(); err != nil {
|
||||
return fmt.Errorf("presigncache: set %q: %w", key, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete removes key from the cache. It is not an error if the key does not exist.
|
||||
func (c *ValkeyCache) Delete(ctx context.Context, key string) error {
|
||||
if err := c.rdb.Del(ctx, key).Err(); err != nil {
|
||||
return fmt.Errorf("presigncache: delete %q: %w", key, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// NoopCache is a no-op Cache that always returns a miss. Used when Valkey is
|
||||
// not configured (e.g. local development without Docker).
|
||||
type NoopCache struct{}
|
||||
|
||||
func (NoopCache) Get(_ context.Context, _ string) (string, bool, error) { return "", false, nil }
|
||||
func (NoopCache) Set(_ context.Context, _, _ string, _ time.Duration) error { return nil }
|
||||
func (NoopCache) Delete(_ context.Context, _ string) error { return nil }
|
||||
236
backend/internal/runner/asynq_runner.go
Normal file
236
backend/internal/runner/asynq_runner.go
Normal file
@@ -0,0 +1,236 @@
|
||||
package runner
|
||||
|
||||
// asynq_runner.go — Asynq-based task dispatch for the runner.
|
||||
//
|
||||
// When cfg.RedisAddr is set, Run() calls runAsynq() instead of runPoll().
|
||||
// The Asynq server replaces the polling loop: it listens on Redis for tasks
|
||||
// enqueued by the backend Producer and delivers them immediately.
|
||||
//
|
||||
// Handlers in this file decode Asynq job payloads and call the existing
|
||||
// runScrapeTask / runAudioTask methods, keeping all execution logic in one place.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hibiken/asynq"
|
||||
asynqmetrics "github.com/hibiken/asynq/x/metrics"
|
||||
"github.com/libnovel/backend/internal/asynqqueue"
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
)
|
||||
|
||||
// runAsynq starts an Asynq server that replaces the PocketBase poll loop.
|
||||
// It also starts the periodic catalogue refresh ticker.
|
||||
// Blocks until ctx is cancelled.
|
||||
func (r *Runner) runAsynq(ctx context.Context) error {
|
||||
redisOpt, err := r.redisConnOpt()
|
||||
if err != nil {
|
||||
return fmt.Errorf("runner: parse redis addr: %w", err)
|
||||
}
|
||||
|
||||
srv := asynq.NewServer(redisOpt, asynq.Config{
|
||||
// Allocate concurrency slots for each task type.
|
||||
// Total concurrency = scrape + audio slots.
|
||||
Concurrency: r.cfg.MaxConcurrentScrape + r.cfg.MaxConcurrentAudio,
|
||||
Queues: map[string]int{
|
||||
asynqqueue.QueueDefault: 1,
|
||||
},
|
||||
// Let Asynq handle retries with exponential back-off.
|
||||
RetryDelayFunc: asynq.DefaultRetryDelayFunc,
|
||||
// Log errors from handlers via the existing structured logger.
|
||||
ErrorHandler: asynq.ErrorHandlerFunc(func(_ context.Context, task *asynq.Task, err error) {
|
||||
r.deps.Log.Error("runner: asynq task failed",
|
||||
"type", task.Type(),
|
||||
"err", err,
|
||||
)
|
||||
}),
|
||||
})
|
||||
|
||||
mux := asynq.NewServeMux()
|
||||
mux.HandleFunc(asynqqueue.TypeAudioGenerate, r.handleAudioTask)
|
||||
mux.HandleFunc(asynqqueue.TypeScrapeBook, r.handleScrapeTask)
|
||||
mux.HandleFunc(asynqqueue.TypeScrapeCatalogue, r.handleScrapeTask)
|
||||
|
||||
// Register Asynq queue metrics with the default Prometheus registry so
|
||||
// the /metrics endpoint (metrics.go) can expose them.
|
||||
inspector := asynq.NewInspector(redisOpt)
|
||||
collector := asynqmetrics.NewQueueMetricsCollector(inspector)
|
||||
if err := r.metricsRegistry.Register(collector); err != nil {
|
||||
r.deps.Log.Warn("runner: could not register asynq prometheus collector", "err", err)
|
||||
}
|
||||
|
||||
// Start the periodic catalogue refresh.
|
||||
catalogueTick := time.NewTicker(r.cfg.CatalogueRefreshInterval)
|
||||
defer catalogueTick.Stop()
|
||||
if !r.cfg.SkipInitialCatalogueRefresh {
|
||||
go r.runCatalogueRefresh(ctx)
|
||||
} else {
|
||||
r.deps.Log.Info("runner: skipping initial catalogue refresh (RUNNER_SKIP_INITIAL_CATALOGUE_REFRESH=true)")
|
||||
}
|
||||
|
||||
r.deps.Log.Info("runner: asynq mode active", "redis_addr", r.cfg.RedisAddr)
|
||||
|
||||
// ── Heartbeat goroutine ──────────────────────────────────────────────
|
||||
// Write /tmp/runner.alive every 30s so Docker healthcheck passes in asynq mode.
|
||||
// This mirrors the heartbeat file behavior from the poll() loop.
|
||||
go func() {
|
||||
heartbeatTick := time.NewTicker(r.cfg.StaleTaskThreshold / 2)
|
||||
defer heartbeatTick.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-heartbeatTick.C:
|
||||
if f, err := os.Create("/tmp/runner.alive"); err != nil {
|
||||
r.deps.Log.Warn("runner: could not write heartbeat file", "err", err)
|
||||
} else {
|
||||
f.Close()
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// ── Translation polling goroutine ────────────────────────────────────
|
||||
// Translation tasks live in PocketBase (not Redis), so we need a separate
|
||||
// poll loop to claim and dispatch them. This runs alongside the Asynq server.
|
||||
translationSem := make(chan struct{}, r.cfg.MaxConcurrentTranslation)
|
||||
var translationWg sync.WaitGroup
|
||||
go func() {
|
||||
tick := time.NewTicker(r.cfg.PollInterval)
|
||||
defer tick.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-tick.C:
|
||||
r.pollTranslationTasks(ctx, translationSem, &translationWg)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Run catalogue refresh ticker in the background.
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-catalogueTick.C:
|
||||
go r.runCatalogueRefresh(ctx)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
// Start Asynq server (non-blocking).
|
||||
if err := srv.Start(mux); err != nil {
|
||||
return fmt.Errorf("runner: asynq server start: %w", err)
|
||||
}
|
||||
|
||||
// Block until context is cancelled, then gracefully stop.
|
||||
<-ctx.Done()
|
||||
r.deps.Log.Info("runner: context cancelled, shutting down asynq server")
|
||||
srv.Shutdown()
|
||||
|
||||
// Wait for translation tasks to complete.
|
||||
translationWg.Wait()
|
||||
return nil
|
||||
}
|
||||
|
||||
// redisConnOpt parses cfg.RedisAddr into an asynq.RedisConnOpt.
|
||||
// Supports full "redis://" / "rediss://" URLs and plain "host:port".
|
||||
func (r *Runner) redisConnOpt() (asynq.RedisConnOpt, error) {
|
||||
addr := r.cfg.RedisAddr
|
||||
// ParseRedisURI handles redis:// and rediss:// schemes.
|
||||
if len(addr) > 7 && (addr[:8] == "redis://" || addr[:9] == "rediss://") {
|
||||
return asynq.ParseRedisURI(addr)
|
||||
}
|
||||
// Plain "host:port" — use RedisClientOpt directly.
|
||||
return asynq.RedisClientOpt{
|
||||
Addr: addr,
|
||||
Password: r.cfg.RedisPassword,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// handleScrapeTask is the Asynq handler for TypeScrapeBook and TypeScrapeCatalogue.
|
||||
func (r *Runner) handleScrapeTask(ctx context.Context, t *asynq.Task) error {
|
||||
var p asynqqueue.ScrapePayload
|
||||
if err := json.Unmarshal(t.Payload(), &p); err != nil {
|
||||
return fmt.Errorf("unmarshal scrape payload: %w", err)
|
||||
}
|
||||
task := domain.ScrapeTask{
|
||||
ID: p.PBTaskID,
|
||||
Kind: p.Kind,
|
||||
TargetURL: p.TargetURL,
|
||||
FromChapter: p.FromChapter,
|
||||
ToChapter: p.ToChapter,
|
||||
}
|
||||
r.tasksRunning.Add(1)
|
||||
defer r.tasksRunning.Add(-1)
|
||||
r.runScrapeTask(ctx, task)
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleAudioTask is the Asynq handler for TypeAudioGenerate.
|
||||
func (r *Runner) handleAudioTask(ctx context.Context, t *asynq.Task) error {
|
||||
var p asynqqueue.AudioPayload
|
||||
if err := json.Unmarshal(t.Payload(), &p); err != nil {
|
||||
return fmt.Errorf("unmarshal audio payload: %w", err)
|
||||
}
|
||||
task := domain.AudioTask{
|
||||
ID: p.PBTaskID,
|
||||
Slug: p.Slug,
|
||||
Chapter: p.Chapter,
|
||||
Voice: p.Voice,
|
||||
}
|
||||
r.tasksRunning.Add(1)
|
||||
defer r.tasksRunning.Add(-1)
|
||||
r.runAudioTask(ctx, task)
|
||||
return nil
|
||||
}
|
||||
|
||||
// pollTranslationTasks claims all available translation tasks from PocketBase
|
||||
// and dispatches them to goroutines. Translation tasks don't go through Redis/Asynq
|
||||
// because they're stored in PocketBase, so we need this separate poll loop.
|
||||
func (r *Runner) pollTranslationTasks(ctx context.Context, translationSem chan struct{}, wg *sync.WaitGroup) {
|
||||
// Reap orphaned tasks (same logic as poll() in runner.go).
|
||||
if n, err := r.deps.Consumer.ReapStaleTasks(ctx, r.cfg.StaleTaskThreshold); err != nil {
|
||||
r.deps.Log.Warn("runner: reap stale translation tasks failed", "err", err)
|
||||
} else if n > 0 {
|
||||
r.deps.Log.Info("runner: reaped stale translation tasks", "count", n)
|
||||
}
|
||||
|
||||
translationLoop:
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case translationSem <- struct{}{}:
|
||||
// Slot acquired — proceed to claim a task.
|
||||
default:
|
||||
// All slots busy; leave remaining pending tasks for next tick.
|
||||
break translationLoop
|
||||
}
|
||||
task, ok, err := r.deps.Consumer.ClaimNextTranslationTask(ctx, r.cfg.WorkerID)
|
||||
if err != nil {
|
||||
<-translationSem
|
||||
r.deps.Log.Error("runner: ClaimNextTranslationTask failed", "err", err)
|
||||
break
|
||||
}
|
||||
if !ok {
|
||||
<-translationSem
|
||||
break
|
||||
}
|
||||
r.tasksRunning.Add(1)
|
||||
wg.Add(1)
|
||||
go func(t domain.TranslationTask) {
|
||||
defer wg.Done()
|
||||
defer func() { <-translationSem }()
|
||||
defer r.tasksRunning.Add(-1)
|
||||
r.runTranslationTask(ctx, t)
|
||||
}(task)
|
||||
}
|
||||
}
|
||||
186
backend/internal/runner/catalogue_refresh.go
Normal file
186
backend/internal/runner/catalogue_refresh.go
Normal file
@@ -0,0 +1,186 @@
|
||||
package runner
|
||||
|
||||
// catalogue_refresh.go — independent loop that walks the full novelfire.net
|
||||
// catalogue, scrapes per-book metadata, downloads cover images to MinIO, and
|
||||
// indexes every book in Meilisearch.
|
||||
//
|
||||
// Design:
|
||||
// - Runs on its own ticker (CatalogueRefreshInterval, default 24h) inside Run().
|
||||
// - Also fires once on startup (unless SkipInitialCatalogueRefresh is set).
|
||||
// - ScrapeCatalogue streams CatalogueEntry values over a channel — already has
|
||||
// its own inter-page jitter + retryGet (see scraper.go).
|
||||
// - Per-book: only metadata is scraped here (not chapters). Chapters are scraped
|
||||
// on-demand when a user opens a book or via an explicit scrape task.
|
||||
// - Between each metadata request a configurable base delay plus up to 50%
|
||||
// random jitter is applied (CatalogueRequestDelay, default 2s). This keeps
|
||||
// the request rate well below novelfire.net's rate limit even for ~15k books.
|
||||
// - ScrapeMetadata itself uses retryGet with 429-aware exponential backoff
|
||||
// (up to 9 attempts), so transient rate limits are handled gracefully.
|
||||
// - Cover images are fetched and stored in MinIO on first sight; subsequent
|
||||
// refreshes skip covers that already exist (CoverExists check).
|
||||
// - Books already present in Meilisearch are skipped entirely (fast path).
|
||||
// - Errors for individual books are logged and skipped; the loop never aborts.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
// runCatalogueRefresh performs one full catalogue walk: scrapes metadata for
|
||||
// every book on novelfire.net, downloads covers to MinIO, and upserts to
|
||||
// Meilisearch. Individual book failures are logged and skipped.
|
||||
func (r *Runner) runCatalogueRefresh(ctx context.Context) {
|
||||
if r.deps.Novel == nil {
|
||||
r.deps.Log.Warn("runner: catalogue refresh skipped — Novel scraper not configured")
|
||||
return
|
||||
}
|
||||
if r.deps.BookWriter == nil {
|
||||
r.deps.Log.Warn("runner: catalogue refresh skipped — BookWriter not configured")
|
||||
return
|
||||
}
|
||||
|
||||
delay := r.cfg.CatalogueRequestDelay
|
||||
log := r.deps.Log.With("op", "catalogue_refresh")
|
||||
log.Info("runner: catalogue refresh starting", "request_delay", delay)
|
||||
|
||||
entries, errCh := r.deps.Novel.ScrapeCatalogue(ctx)
|
||||
|
||||
ok, skipped, errCount := 0, 0, 0
|
||||
for entry := range entries {
|
||||
if ctx.Err() != nil {
|
||||
break
|
||||
}
|
||||
|
||||
// Fast path: skip books already indexed in Meilisearch.
|
||||
if r.deps.SearchIndex.BookExists(ctx, entry.Slug) {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
|
||||
// Polite delay between metadata requests: base + up to 50% jitter.
|
||||
// This applies before every fetch so we never fire bursts.
|
||||
jitter := time.Duration(rand.Int63n(int64(delay / 2)))
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
break
|
||||
case <-time.After(delay + jitter):
|
||||
}
|
||||
|
||||
// ScrapeMetadata internally retries on 429 with exponential back-off.
|
||||
meta, err := r.deps.Novel.ScrapeMetadata(ctx, entry.URL)
|
||||
if err != nil {
|
||||
log.Warn("runner: catalogue refresh: metadata scrape failed — skipping book",
|
||||
"slug", entry.Slug, "url", entry.URL, "err", err)
|
||||
errCount++
|
||||
continue
|
||||
}
|
||||
|
||||
// Rewrite cover URL to backend proxy path so UI never hits CDN directly.
|
||||
originalCover := meta.Cover
|
||||
meta.Cover = fmt.Sprintf("/api/cover/novelfire.net/%s", meta.Slug)
|
||||
|
||||
// Persist to PocketBase.
|
||||
if err := r.deps.BookWriter.WriteMetadata(ctx, meta); err != nil {
|
||||
log.Warn("runner: catalogue refresh: WriteMetadata failed — skipping book",
|
||||
"slug", meta.Slug, "err", err)
|
||||
errCount++
|
||||
continue
|
||||
}
|
||||
|
||||
// Index in Meilisearch (non-fatal).
|
||||
if err := r.deps.SearchIndex.UpsertBook(ctx, meta); err != nil {
|
||||
log.Warn("runner: catalogue refresh: UpsertBook failed",
|
||||
"slug", meta.Slug, "err", err)
|
||||
}
|
||||
|
||||
// Download cover to MinIO if not already cached (non-fatal).
|
||||
if r.deps.CoverStore != nil && originalCover != "" {
|
||||
if !r.deps.CoverStore.CoverExists(ctx, meta.Slug) {
|
||||
if err := r.downloadCover(ctx, meta.Slug, originalCover); err != nil {
|
||||
log.Warn("runner: catalogue refresh: cover download failed",
|
||||
"slug", meta.Slug, "url", originalCover, "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ok++
|
||||
if ok%50 == 0 {
|
||||
log.Info("runner: catalogue refresh progress",
|
||||
"scraped", ok, "skipped", skipped, "errors", errCount)
|
||||
}
|
||||
}
|
||||
|
||||
if err := <-errCh; err != nil {
|
||||
log.Warn("runner: catalogue refresh: catalogue stream error", "err", err)
|
||||
}
|
||||
|
||||
log.Info("runner: catalogue refresh finished",
|
||||
"ok", ok, "skipped", skipped, "errors", errCount)
|
||||
}
|
||||
|
||||
// downloadCover fetches the cover image from coverURL and stores it in MinIO
|
||||
// under covers/{slug}.jpg. It retries up to 3 times with exponential backoff
|
||||
// on transient errors (5xx, network failures).
|
||||
func (r *Runner) downloadCover(ctx context.Context, slug, coverURL string) error {
|
||||
const maxRetries = 3
|
||||
delay := 2 * time.Second
|
||||
|
||||
var lastErr error
|
||||
for attempt := 0; attempt < maxRetries; attempt++ {
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
if attempt > 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-time.After(delay):
|
||||
}
|
||||
delay *= 2
|
||||
}
|
||||
|
||||
data, err := fetchCoverBytes(ctx, coverURL)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
|
||||
if err := r.deps.CoverStore.PutCover(ctx, slug, data, ""); err != nil {
|
||||
return fmt.Errorf("put cover: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("download cover after %d retries: %w", maxRetries, lastErr)
|
||||
}
|
||||
|
||||
// fetchCoverBytes performs a single HTTP GET for coverURL and returns the body.
|
||||
func fetchCoverBytes(ctx context.Context, coverURL string) ([]byte, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, coverURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("build request: %w", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; libnovel-runner/2)")
|
||||
req.Header.Set("Referer", "https://novelfire.net/")
|
||||
|
||||
client := &http.Client{Timeout: 30 * time.Second}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("http get: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= 500 {
|
||||
_, _ = io.Copy(io.Discard, resp.Body)
|
||||
return nil, fmt.Errorf("upstream %d for %s", resp.StatusCode, coverURL)
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
_, _ = io.Copy(io.Discard, resp.Body)
|
||||
return nil, fmt.Errorf("unexpected status %d for %s", resp.StatusCode, coverURL)
|
||||
}
|
||||
|
||||
return io.ReadAll(io.LimitReader(resp.Body, 5<<20)) // 5 MiB cap
|
||||
}
|
||||
21
backend/internal/runner/helpers.go
Normal file
21
backend/internal/runner/helpers.go
Normal file
@@ -0,0 +1,21 @@
|
||||
package runner
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// stripMarkdown removes common markdown syntax from src, returning plain text
|
||||
// suitable for TTS. Mirrors the helper in the scraper's server package.
|
||||
func stripMarkdown(src string) string {
|
||||
src = regexp.MustCompile(`(?m)^#{1,6}\s+`).ReplaceAllString(src, "")
|
||||
src = regexp.MustCompile(`\*{1,3}|_{1,3}`).ReplaceAllString(src, "")
|
||||
src = regexp.MustCompile("(?s)```.*?```").ReplaceAllString(src, "")
|
||||
src = regexp.MustCompile("`[^`]*`").ReplaceAllString(src, "")
|
||||
src = regexp.MustCompile(`\[([^\]]+)\]\([^)]+\)`).ReplaceAllString(src, "$1")
|
||||
src = regexp.MustCompile(`!\[[^\]]*\]\([^)]+\)`).ReplaceAllString(src, "")
|
||||
src = regexp.MustCompile(`(?m)^>\s?`).ReplaceAllString(src, "")
|
||||
src = regexp.MustCompile(`(?m)^[-*_]{3,}\s*$`).ReplaceAllString(src, "")
|
||||
src = regexp.MustCompile(`\n{3,}`).ReplaceAllString(src, "\n\n")
|
||||
return strings.TrimSpace(src)
|
||||
}
|
||||
113
backend/internal/runner/metrics.go
Normal file
113
backend/internal/runner/metrics.go
Normal file
@@ -0,0 +1,113 @@
|
||||
package runner
|
||||
|
||||
// metrics.go — Prometheus metrics HTTP endpoint for the runner.
|
||||
//
|
||||
// GET /metrics returns a Prometheus text/plain scrape response.
|
||||
// Exposes:
|
||||
// - Standard Go runtime metrics (via promhttp)
|
||||
// - Runner task counters (tasks_running, tasks_completed, tasks_failed)
|
||||
// - Asynq queue metrics (registered in asynq_runner.go when Redis is enabled)
|
||||
//
|
||||
// GET /health — simple liveness probe.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
// metricsServer serves GET /metrics and GET /health for the runner process.
|
||||
type metricsServer struct {
|
||||
addr string
|
||||
r *Runner
|
||||
log *slog.Logger
|
||||
}
|
||||
|
||||
func newMetricsServer(addr string, r *Runner, log *slog.Logger) *metricsServer {
|
||||
ms := &metricsServer{addr: addr, r: r, log: log}
|
||||
ms.registerCollectors()
|
||||
return ms
|
||||
}
|
||||
|
||||
// registerCollectors registers runner-specific Prometheus collectors.
|
||||
// Called once at construction; Asynq queue collector is registered separately
|
||||
// in asynq_runner.go after the Redis connection is established.
|
||||
func (ms *metricsServer) registerCollectors() {
|
||||
// Runner task gauges / counters backed by the atomic fields on Runner.
|
||||
ms.r.metricsRegistry.MustRegister(prometheus.NewGaugeFunc(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: "runner",
|
||||
Name: "tasks_running",
|
||||
Help: "Number of tasks currently being processed.",
|
||||
},
|
||||
func() float64 { return float64(ms.r.tasksRunning.Load()) },
|
||||
))
|
||||
ms.r.metricsRegistry.MustRegister(prometheus.NewCounterFunc(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "runner",
|
||||
Name: "tasks_completed_total",
|
||||
Help: "Total number of tasks completed successfully since startup.",
|
||||
},
|
||||
func() float64 { return float64(ms.r.tasksCompleted.Load()) },
|
||||
))
|
||||
ms.r.metricsRegistry.MustRegister(prometheus.NewCounterFunc(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "runner",
|
||||
Name: "tasks_failed_total",
|
||||
Help: "Total number of tasks that ended in failure since startup.",
|
||||
},
|
||||
func() float64 { return float64(ms.r.tasksFailed.Load()) },
|
||||
))
|
||||
ms.r.metricsRegistry.MustRegister(prometheus.NewGaugeFunc(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: "runner",
|
||||
Name: "uptime_seconds",
|
||||
Help: "Seconds since the runner process started.",
|
||||
},
|
||||
func() float64 { return time.Since(ms.r.startedAt).Seconds() },
|
||||
))
|
||||
}
|
||||
|
||||
// ListenAndServe starts the HTTP server and blocks until ctx is cancelled or
|
||||
// a fatal listen error occurs.
|
||||
func (ms *metricsServer) ListenAndServe(ctx context.Context) error {
|
||||
mux := http.NewServeMux()
|
||||
mux.Handle("GET /metrics", promhttp.HandlerFor(ms.r.metricsRegistry, promhttp.HandlerOpts{}))
|
||||
mux.HandleFunc("GET /health", ms.handleHealth)
|
||||
|
||||
srv := &http.Server{
|
||||
Addr: ms.addr,
|
||||
Handler: mux,
|
||||
ReadTimeout: 5 * time.Second,
|
||||
WriteTimeout: 10 * time.Second,
|
||||
BaseContext: func(_ net.Listener) context.Context { return ctx },
|
||||
}
|
||||
|
||||
errCh := make(chan error, 1)
|
||||
go func() {
|
||||
ms.log.Info("runner: metrics server listening", "addr", ms.addr)
|
||||
errCh <- srv.ListenAndServe()
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
shutCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
_ = srv.Shutdown(shutCtx)
|
||||
return nil
|
||||
case err := <-errCh:
|
||||
return fmt.Errorf("runner: metrics server: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// handleHealth handles GET /health — simple liveness probe.
|
||||
func (ms *metricsServer) handleHealth(w http.ResponseWriter, _ *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"status":"ok"}`))
|
||||
}
|
||||
585
backend/internal/runner/runner.go
Normal file
585
backend/internal/runner/runner.go
Normal file
@@ -0,0 +1,585 @@
|
||||
// Package runner implements the worker loop that polls PocketBase for pending
|
||||
// scrape and audio tasks, executes them, and reports results back.
|
||||
//
|
||||
// Design:
|
||||
// - Run(ctx) loops on a ticker; each tick claims and dispatches pending tasks.
|
||||
// - Scrape tasks are dispatched to the Orchestrator (one goroutine per task,
|
||||
// up to MaxConcurrentScrape).
|
||||
// - Audio tasks fetch chapter text, call Kokoro, upload to MinIO, and report
|
||||
// the result back (up to MaxConcurrentAudio goroutines).
|
||||
// - The runner is stateless between ticks; all state lives in PocketBase.
|
||||
// - Atomic task counters are exposed via /metrics (see metrics.go).
|
||||
// - Books are indexed in Meilisearch via an orchestrator.Config.PostMetadata
|
||||
// hook injected at construction time.
|
||||
package runner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
|
||||
"github.com/libnovel/backend/internal/bookstore"
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
"github.com/libnovel/backend/internal/kokoro"
|
||||
"github.com/libnovel/backend/internal/libretranslate"
|
||||
"github.com/libnovel/backend/internal/meili"
|
||||
"github.com/libnovel/backend/internal/orchestrator"
|
||||
"github.com/libnovel/backend/internal/pockettts"
|
||||
"github.com/libnovel/backend/internal/scraper"
|
||||
"github.com/libnovel/backend/internal/taskqueue"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
// Config tunes the runner behaviour.
|
||||
type Config struct {
|
||||
// WorkerID uniquely identifies this runner instance in PocketBase records.
|
||||
WorkerID string
|
||||
// PollInterval is how often the runner checks for new tasks.
|
||||
// Only used in PocketBase-polling mode (RedisAddr == "").
|
||||
PollInterval time.Duration
|
||||
// MaxConcurrentScrape limits simultaneous book-scrape goroutines.
|
||||
MaxConcurrentScrape int
|
||||
// MaxConcurrentAudio limits simultaneous audio-generation goroutines.
|
||||
MaxConcurrentAudio int
|
||||
// MaxConcurrentTranslation limits simultaneous translation goroutines.
|
||||
MaxConcurrentTranslation int
|
||||
// OrchestratorWorkers is the chapter-scraping parallelism inside each book run.
|
||||
OrchestratorWorkers int
|
||||
// HeartbeatInterval is how often active tasks PATCH their heartbeat_at
|
||||
// timestamp to signal they are still alive. Defaults to 30s when 0.
|
||||
// Only used in PocketBase-polling mode.
|
||||
HeartbeatInterval time.Duration
|
||||
// StaleTaskThreshold is how old a heartbeat must be (or absent) before the
|
||||
// task is considered orphaned and reset to pending. Defaults to 2m when 0.
|
||||
// Only used in PocketBase-polling mode.
|
||||
StaleTaskThreshold time.Duration
|
||||
// CatalogueRefreshInterval is how often the runner walks the full catalogue,
|
||||
// scrapes per-book metadata, downloads covers, and re-indexes everything in
|
||||
// Meilisearch. Defaults to 24h (expensive — full catalogue walk).
|
||||
CatalogueRefreshInterval time.Duration
|
||||
// CatalogueRequestDelay is the base inter-request pause during a catalogue
|
||||
// refresh metadata walk. Jitter of up to 50% is added on top.
|
||||
// Defaults to 2s. Set via RUNNER_CATALOGUE_REQUEST_DELAY.
|
||||
CatalogueRequestDelay time.Duration
|
||||
// SkipInitialCatalogueRefresh suppresses the immediate catalogue walk that
|
||||
// otherwise fires at startup. The periodic ticker (CatalogueRefreshInterval)
|
||||
// still fires normally. Set RUNNER_SKIP_INITIAL_CATALOGUE_REFRESH=true for
|
||||
// quick restarts where the catalogue is already up to date.
|
||||
SkipInitialCatalogueRefresh bool
|
||||
// MetricsAddr is the HTTP listen address for the /metrics endpoint.
|
||||
// Defaults to ":9091". Set to "" to disable.
|
||||
MetricsAddr string
|
||||
// RedisAddr is the address of the Redis instance used for Asynq task
|
||||
// dispatch. When set the runner switches from PocketBase-polling mode to
|
||||
// Asynq ServeMux mode (immediate task delivery, no polling).
|
||||
// Supports plain "host:port" or a full "rediss://..." URL.
|
||||
// When empty the runner falls back to PocketBase polling.
|
||||
RedisAddr string
|
||||
// RedisPassword is the Redis AUTH password.
|
||||
// Not required when RedisAddr is a full URL that includes credentials.
|
||||
RedisPassword string
|
||||
}
|
||||
|
||||
// Dependencies are the external services the runner depends on.
|
||||
type Dependencies struct {
|
||||
// Consumer claims tasks from PocketBase.
|
||||
Consumer taskqueue.Consumer
|
||||
// BookWriter persists scraped data (used by orchestrator).
|
||||
BookWriter bookstore.BookWriter
|
||||
// BookReader reads chapter text for audio generation.
|
||||
BookReader bookstore.BookReader
|
||||
// AudioStore persists generated audio and checks key existence.
|
||||
AudioStore bookstore.AudioStore
|
||||
// TranslationStore persists translated markdown and checks key existence.
|
||||
TranslationStore bookstore.TranslationStore
|
||||
// CoverStore stores book cover images in MinIO.
|
||||
CoverStore bookstore.CoverStore
|
||||
// SearchIndex indexes books in Meilisearch after scraping.
|
||||
// If nil a no-op is used.
|
||||
SearchIndex meili.Client
|
||||
// Novel is the scraper implementation.
|
||||
Novel scraper.NovelScraper
|
||||
// Kokoro is the Kokoro-FastAPI TTS client (GPU, OpenAI-compatible voices).
|
||||
Kokoro kokoro.Client
|
||||
// PocketTTS is the pocket-tts client (CPU, kyutai voices: alba, marius, etc.).
|
||||
// If nil, pocket-tts voice tasks will fail with a clear error.
|
||||
PocketTTS pockettts.Client
|
||||
// LibreTranslate is the machine translation client.
|
||||
// If nil, translation tasks will fail with a clear error.
|
||||
LibreTranslate libretranslate.Client
|
||||
// Log is the structured logger.
|
||||
Log *slog.Logger
|
||||
}
|
||||
|
||||
// Runner is the main worker process.
|
||||
type Runner struct {
|
||||
cfg Config
|
||||
deps Dependencies
|
||||
|
||||
metricsRegistry *prometheus.Registry
|
||||
|
||||
// Atomic task counters — read by /metrics without locking.
|
||||
tasksRunning atomic.Int64
|
||||
tasksCompleted atomic.Int64
|
||||
tasksFailed atomic.Int64
|
||||
|
||||
startedAt time.Time
|
||||
}
|
||||
|
||||
// New creates a Runner from cfg and deps.
|
||||
func New(cfg Config, deps Dependencies) *Runner {
|
||||
if cfg.PollInterval <= 0 {
|
||||
cfg.PollInterval = 30 * time.Second
|
||||
}
|
||||
if cfg.MaxConcurrentScrape <= 0 {
|
||||
cfg.MaxConcurrentScrape = 2
|
||||
}
|
||||
if cfg.MaxConcurrentAudio <= 0 {
|
||||
cfg.MaxConcurrentAudio = 1
|
||||
}
|
||||
if cfg.MaxConcurrentTranslation <= 0 {
|
||||
cfg.MaxConcurrentTranslation = 1
|
||||
}
|
||||
if cfg.WorkerID == "" {
|
||||
cfg.WorkerID = "runner"
|
||||
}
|
||||
if cfg.HeartbeatInterval <= 0 {
|
||||
cfg.HeartbeatInterval = 30 * time.Second
|
||||
}
|
||||
if cfg.StaleTaskThreshold <= 0 {
|
||||
cfg.StaleTaskThreshold = 2 * time.Minute
|
||||
}
|
||||
if cfg.CatalogueRefreshInterval <= 0 {
|
||||
cfg.CatalogueRefreshInterval = 24 * time.Hour
|
||||
}
|
||||
if cfg.CatalogueRequestDelay <= 0 {
|
||||
cfg.CatalogueRequestDelay = 2 * time.Second
|
||||
}
|
||||
if cfg.MetricsAddr == "" {
|
||||
cfg.MetricsAddr = ":9091"
|
||||
}
|
||||
if deps.Log == nil {
|
||||
deps.Log = slog.Default()
|
||||
}
|
||||
if deps.SearchIndex == nil {
|
||||
deps.SearchIndex = meili.NoopClient{}
|
||||
}
|
||||
return &Runner{cfg: cfg, deps: deps, startedAt: time.Now(), metricsRegistry: prometheus.NewRegistry()}
|
||||
}
|
||||
|
||||
// Run starts the worker loop and the metrics HTTP server, blocking until ctx
|
||||
// is cancelled.
|
||||
//
|
||||
// When cfg.RedisAddr is set the runner uses Asynq (immediate task delivery).
|
||||
// Otherwise it falls back to PocketBase polling (legacy mode).
|
||||
func (r *Runner) Run(ctx context.Context) error {
|
||||
r.deps.Log.Info("runner: starting",
|
||||
"worker_id", r.cfg.WorkerID,
|
||||
"mode", r.mode(),
|
||||
"max_scrape", r.cfg.MaxConcurrentScrape,
|
||||
"max_audio", r.cfg.MaxConcurrentAudio,
|
||||
"max_translation", r.cfg.MaxConcurrentTranslation,
|
||||
"catalogue_refresh_interval", r.cfg.CatalogueRefreshInterval,
|
||||
"metrics_addr", r.cfg.MetricsAddr,
|
||||
)
|
||||
|
||||
// Start metrics HTTP server in background if configured.
|
||||
if r.cfg.MetricsAddr != "" {
|
||||
ms := newMetricsServer(r.cfg.MetricsAddr, r, r.deps.Log)
|
||||
go func() {
|
||||
if err := ms.ListenAndServe(ctx); err != nil {
|
||||
r.deps.Log.Error("runner: metrics server error", "err", err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
if r.cfg.RedisAddr != "" {
|
||||
return r.runAsynq(ctx)
|
||||
}
|
||||
return r.runPoll(ctx)
|
||||
}
|
||||
|
||||
// mode returns a short string describing the active dispatch mode.
|
||||
func (r *Runner) mode() string {
|
||||
if r.cfg.RedisAddr != "" {
|
||||
return "asynq"
|
||||
}
|
||||
return "poll"
|
||||
}
|
||||
|
||||
// runPoll is the legacy PocketBase-polling dispatch loop.
|
||||
// Used when cfg.RedisAddr is empty.
|
||||
func (r *Runner) runPoll(ctx context.Context) error {
|
||||
scrapeSem := make(chan struct{}, r.cfg.MaxConcurrentScrape)
|
||||
audioSem := make(chan struct{}, r.cfg.MaxConcurrentAudio)
|
||||
translationSem := make(chan struct{}, r.cfg.MaxConcurrentTranslation)
|
||||
var wg sync.WaitGroup
|
||||
|
||||
tick := time.NewTicker(r.cfg.PollInterval)
|
||||
defer tick.Stop()
|
||||
|
||||
catalogueTick := time.NewTicker(r.cfg.CatalogueRefreshInterval)
|
||||
defer catalogueTick.Stop()
|
||||
|
||||
// Run one catalogue refresh immediately on startup (unless skipped by flag).
|
||||
if !r.cfg.SkipInitialCatalogueRefresh {
|
||||
go r.runCatalogueRefresh(ctx)
|
||||
} else {
|
||||
r.deps.Log.Info("runner: skipping initial catalogue refresh (RUNNER_SKIP_INITIAL_CATALOGUE_REFRESH=true)")
|
||||
}
|
||||
|
||||
r.deps.Log.Info("runner: poll mode active", "poll_interval", r.cfg.PollInterval)
|
||||
|
||||
// Run one poll immediately on startup, then on each tick.
|
||||
for {
|
||||
r.poll(ctx, scrapeSem, audioSem, translationSem, &wg)
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
r.deps.Log.Info("runner: context cancelled, draining active tasks")
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(done)
|
||||
}()
|
||||
select {
|
||||
case <-done:
|
||||
r.deps.Log.Info("runner: all tasks drained, exiting")
|
||||
case <-time.After(2 * time.Minute):
|
||||
r.deps.Log.Warn("runner: drain timeout exceeded, forcing exit")
|
||||
}
|
||||
return nil
|
||||
case <-catalogueTick.C:
|
||||
go r.runCatalogueRefresh(ctx)
|
||||
case <-tick.C:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// poll claims all available pending tasks and dispatches them to goroutines.
|
||||
func (r *Runner) poll(ctx context.Context, scrapeSem, audioSem, translationSem chan struct{}, wg *sync.WaitGroup) {
|
||||
// ── Heartbeat file ────────────────────────────────────────────────────
|
||||
// Touch /tmp/runner.alive so the Docker health check can confirm the
|
||||
// runner is actively polling. Failure is non-fatal — just log it.
|
||||
if f, err := os.Create("/tmp/runner.alive"); err != nil {
|
||||
r.deps.Log.Warn("runner: could not write heartbeat file", "err", err)
|
||||
} else {
|
||||
f.Close()
|
||||
}
|
||||
|
||||
// ── Reap orphaned tasks ───────────────────────────────────────────────
|
||||
if n, err := r.deps.Consumer.ReapStaleTasks(ctx, r.cfg.StaleTaskThreshold); err != nil {
|
||||
r.deps.Log.Warn("runner: reap stale tasks failed", "err", err)
|
||||
} else if n > 0 {
|
||||
r.deps.Log.Info("runner: reaped stale tasks", "count", n)
|
||||
}
|
||||
|
||||
// ── Scrape tasks ──────────────────────────────────────────────────────
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
task, ok, err := r.deps.Consumer.ClaimNextScrapeTask(ctx, r.cfg.WorkerID)
|
||||
if err != nil {
|
||||
r.deps.Log.Error("runner: ClaimNextScrapeTask failed", "err", err)
|
||||
break
|
||||
}
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
select {
|
||||
case scrapeSem <- struct{}{}:
|
||||
default:
|
||||
r.deps.Log.Warn("runner: scrape semaphore full, will retry next tick",
|
||||
"task_id", task.ID)
|
||||
break
|
||||
}
|
||||
r.tasksRunning.Add(1)
|
||||
wg.Add(1)
|
||||
go func(t domain.ScrapeTask) {
|
||||
defer wg.Done()
|
||||
defer func() { <-scrapeSem }()
|
||||
defer r.tasksRunning.Add(-1)
|
||||
r.runScrapeTask(ctx, t)
|
||||
}(task)
|
||||
}
|
||||
|
||||
// ── Audio tasks ───────────────────────────────────────────────────────
|
||||
// Only claim tasks when there is a free slot in the semaphore.
|
||||
// This avoids the old bug where we claimed (status→running) a task and
|
||||
// then couldn't dispatch it, leaving it orphaned until the reaper fired.
|
||||
audioLoop:
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
// Check capacity before claiming to avoid orphaning tasks.
|
||||
select {
|
||||
case audioSem <- struct{}{}:
|
||||
// Slot acquired — proceed to claim a task.
|
||||
default:
|
||||
// All slots busy; leave remaining pending tasks for next tick.
|
||||
break audioLoop
|
||||
}
|
||||
task, ok, err := r.deps.Consumer.ClaimNextAudioTask(ctx, r.cfg.WorkerID)
|
||||
if err != nil {
|
||||
<-audioSem // release the pre-acquired slot
|
||||
r.deps.Log.Error("runner: ClaimNextAudioTask failed", "err", err)
|
||||
break
|
||||
}
|
||||
if !ok {
|
||||
<-audioSem // release the pre-acquired slot; queue empty
|
||||
break
|
||||
}
|
||||
r.tasksRunning.Add(1)
|
||||
wg.Add(1)
|
||||
go func(t domain.AudioTask) {
|
||||
defer wg.Done()
|
||||
defer func() { <-audioSem }()
|
||||
defer r.tasksRunning.Add(-1)
|
||||
r.runAudioTask(ctx, t)
|
||||
}(task)
|
||||
}
|
||||
|
||||
// ── Translation tasks ─────────────────────────────────────────────────
|
||||
translationLoop:
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case translationSem <- struct{}{}:
|
||||
// Slot acquired — proceed to claim a task.
|
||||
default:
|
||||
// All slots busy; leave remaining pending tasks for next tick.
|
||||
break translationLoop
|
||||
}
|
||||
task, ok, err := r.deps.Consumer.ClaimNextTranslationTask(ctx, r.cfg.WorkerID)
|
||||
if err != nil {
|
||||
<-translationSem
|
||||
r.deps.Log.Error("runner: ClaimNextTranslationTask failed", "err", err)
|
||||
break
|
||||
}
|
||||
if !ok {
|
||||
<-translationSem
|
||||
break
|
||||
}
|
||||
r.tasksRunning.Add(1)
|
||||
wg.Add(1)
|
||||
go func(t domain.TranslationTask) {
|
||||
defer wg.Done()
|
||||
defer func() { <-translationSem }()
|
||||
defer r.tasksRunning.Add(-1)
|
||||
r.runTranslationTask(ctx, t)
|
||||
}(task)
|
||||
}
|
||||
}
|
||||
|
||||
// newOrchestrator builds an orchestrator with the Meilisearch post-hook wired in.
|
||||
func (r *Runner) newOrchestrator() *orchestrator.Orchestrator {
|
||||
oCfg := orchestrator.Config{
|
||||
Workers: r.cfg.OrchestratorWorkers,
|
||||
PostMetadata: func(ctx context.Context, meta domain.BookMeta) {
|
||||
if err := r.deps.SearchIndex.UpsertBook(ctx, meta); err != nil {
|
||||
r.deps.Log.Warn("runner: meilisearch upsert failed",
|
||||
"slug", meta.Slug, "err", err)
|
||||
}
|
||||
},
|
||||
}
|
||||
return orchestrator.New(oCfg, r.deps.Novel, r.deps.BookWriter, r.deps.Log)
|
||||
}
|
||||
|
||||
// runScrapeTask executes one scrape task end-to-end and reports the result.
|
||||
func (r *Runner) runScrapeTask(ctx context.Context, task domain.ScrapeTask) {
|
||||
ctx, span := otel.Tracer("runner").Start(ctx, "runner.scrape_task")
|
||||
defer span.End()
|
||||
span.SetAttributes(
|
||||
attribute.String("task.id", task.ID),
|
||||
attribute.String("task.kind", task.Kind),
|
||||
attribute.String("task.url", task.TargetURL),
|
||||
)
|
||||
|
||||
log := r.deps.Log.With("task_id", task.ID, "kind", task.Kind, "url", task.TargetURL)
|
||||
log.Info("runner: scrape task starting")
|
||||
|
||||
hbCtx, hbCancel := context.WithCancel(ctx)
|
||||
defer hbCancel()
|
||||
go func() {
|
||||
tick := time.NewTicker(r.cfg.HeartbeatInterval)
|
||||
defer tick.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-hbCtx.Done():
|
||||
return
|
||||
case <-tick.C:
|
||||
if err := r.deps.Consumer.HeartbeatTask(ctx, task.ID); err != nil {
|
||||
log.Warn("runner: heartbeat failed", "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
o := r.newOrchestrator()
|
||||
var result domain.ScrapeResult
|
||||
|
||||
switch task.Kind {
|
||||
case "catalogue":
|
||||
result = r.runCatalogueTask(ctx, task, o, log)
|
||||
case "book", "book_range":
|
||||
result = o.RunBook(ctx, task)
|
||||
default:
|
||||
result.ErrorMessage = fmt.Sprintf("unknown task kind: %q", task.Kind)
|
||||
log.Warn("runner: unknown task kind")
|
||||
}
|
||||
|
||||
if err := r.deps.Consumer.FinishScrapeTask(ctx, task.ID, result); err != nil {
|
||||
log.Error("runner: FinishScrapeTask failed", "err", err)
|
||||
}
|
||||
|
||||
if result.ErrorMessage != "" {
|
||||
r.tasksFailed.Add(1)
|
||||
span.SetStatus(codes.Error, result.ErrorMessage)
|
||||
} else {
|
||||
r.tasksCompleted.Add(1)
|
||||
span.SetStatus(codes.Ok, "")
|
||||
}
|
||||
|
||||
log.Info("runner: scrape task finished",
|
||||
"scraped", result.ChaptersScraped,
|
||||
"skipped", result.ChaptersSkipped,
|
||||
"errors", result.Errors,
|
||||
)
|
||||
}
|
||||
|
||||
// runCatalogueTask runs a full catalogue scrape.
|
||||
func (r *Runner) runCatalogueTask(ctx context.Context, task domain.ScrapeTask, o *orchestrator.Orchestrator, log *slog.Logger) domain.ScrapeResult {
|
||||
entries, errCh := r.deps.Novel.ScrapeCatalogue(ctx)
|
||||
var result domain.ScrapeResult
|
||||
|
||||
for entry := range entries {
|
||||
if ctx.Err() != nil {
|
||||
break
|
||||
}
|
||||
bookTask := domain.ScrapeTask{
|
||||
ID: task.ID,
|
||||
Kind: "book",
|
||||
TargetURL: entry.URL,
|
||||
}
|
||||
bookResult := o.RunBook(ctx, bookTask)
|
||||
result.BooksFound += bookResult.BooksFound + 1
|
||||
result.ChaptersScraped += bookResult.ChaptersScraped
|
||||
result.ChaptersSkipped += bookResult.ChaptersSkipped
|
||||
result.Errors += bookResult.Errors
|
||||
}
|
||||
|
||||
if err := <-errCh; err != nil {
|
||||
log.Warn("runner: catalogue scrape finished with error", "err", err)
|
||||
result.Errors++
|
||||
if result.ErrorMessage == "" {
|
||||
result.ErrorMessage = err.Error()
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// runAudioTask executes one audio-generation task.
|
||||
func (r *Runner) runAudioTask(ctx context.Context, task domain.AudioTask) {
|
||||
ctx, span := otel.Tracer("runner").Start(ctx, "runner.audio_task")
|
||||
defer span.End()
|
||||
span.SetAttributes(
|
||||
attribute.String("task.id", task.ID),
|
||||
attribute.String("book.slug", task.Slug),
|
||||
attribute.Int("chapter.number", task.Chapter),
|
||||
attribute.String("audio.voice", task.Voice),
|
||||
)
|
||||
|
||||
log := r.deps.Log.With("task_id", task.ID, "slug", task.Slug, "chapter", task.Chapter, "voice", task.Voice)
|
||||
log.Info("runner: audio task starting")
|
||||
|
||||
hbCtx, hbCancel := context.WithCancel(ctx)
|
||||
defer hbCancel()
|
||||
go func() {
|
||||
tick := time.NewTicker(r.cfg.HeartbeatInterval)
|
||||
defer tick.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-hbCtx.Done():
|
||||
return
|
||||
case <-tick.C:
|
||||
if err := r.deps.Consumer.HeartbeatTask(ctx, task.ID); err != nil {
|
||||
log.Warn("runner: heartbeat failed", "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
fail := func(msg string) {
|
||||
log.Error("runner: audio task failed", "reason", msg)
|
||||
r.tasksFailed.Add(1)
|
||||
span.SetStatus(codes.Error, msg)
|
||||
result := domain.AudioResult{ErrorMessage: msg}
|
||||
if err := r.deps.Consumer.FinishAudioTask(ctx, task.ID, result); err != nil {
|
||||
log.Error("runner: FinishAudioTask failed", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
raw, err := r.deps.BookReader.ReadChapter(ctx, task.Slug, task.Chapter)
|
||||
if err != nil {
|
||||
fail(fmt.Sprintf("read chapter: %v", err))
|
||||
return
|
||||
}
|
||||
text := stripMarkdown(raw)
|
||||
if text == "" {
|
||||
fail("chapter text is empty after stripping markdown")
|
||||
return
|
||||
}
|
||||
|
||||
var audioData []byte
|
||||
if pockettts.IsPocketTTSVoice(task.Voice) {
|
||||
if r.deps.PocketTTS == nil {
|
||||
fail("pocket-tts client not configured (POCKET_TTS_URL is empty)")
|
||||
return
|
||||
}
|
||||
var genErr error
|
||||
audioData, genErr = r.deps.PocketTTS.GenerateAudio(ctx, text, task.Voice)
|
||||
if genErr != nil {
|
||||
fail(fmt.Sprintf("pocket-tts generate: %v", genErr))
|
||||
return
|
||||
}
|
||||
log.Info("runner: audio generated via pocket-tts", "voice", task.Voice)
|
||||
} else {
|
||||
if r.deps.Kokoro == nil {
|
||||
fail("kokoro client not configured (KOKORO_URL is empty)")
|
||||
return
|
||||
}
|
||||
var genErr error
|
||||
audioData, genErr = r.deps.Kokoro.GenerateAudio(ctx, text, task.Voice)
|
||||
if genErr != nil {
|
||||
fail(fmt.Sprintf("kokoro generate: %v", genErr))
|
||||
return
|
||||
}
|
||||
log.Info("runner: audio generated via kokoro-fastapi", "voice", task.Voice)
|
||||
}
|
||||
|
||||
key := r.deps.AudioStore.AudioObjectKey(task.Slug, task.Chapter, task.Voice)
|
||||
if err := r.deps.AudioStore.PutAudio(ctx, key, audioData); err != nil {
|
||||
fail(fmt.Sprintf("put audio: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
r.tasksCompleted.Add(1)
|
||||
span.SetStatus(codes.Ok, "")
|
||||
result := domain.AudioResult{ObjectKey: key}
|
||||
if err := r.deps.Consumer.FinishAudioTask(ctx, task.ID, result); err != nil {
|
||||
log.Error("runner: FinishAudioTask failed", "err", err)
|
||||
}
|
||||
log.Info("runner: audio task finished", "key", key)
|
||||
}
|
||||
399
backend/internal/runner/runner_test.go
Normal file
399
backend/internal/runner/runner_test.go
Normal file
@@ -0,0 +1,399 @@
|
||||
package runner_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
"github.com/libnovel/backend/internal/runner"
|
||||
)
|
||||
|
||||
// ── Stub types ────────────────────────────────────────────────────────────────
|
||||
|
||||
// stubConsumer is a test double for taskqueue.Consumer.
|
||||
type stubConsumer struct {
|
||||
scrapeQueue []domain.ScrapeTask
|
||||
audioQueue []domain.AudioTask
|
||||
scrapeIdx int
|
||||
audioIdx int
|
||||
finished []string
|
||||
failCalled []string
|
||||
claimErr error
|
||||
}
|
||||
|
||||
func (s *stubConsumer) ClaimNextScrapeTask(_ context.Context, _ string) (domain.ScrapeTask, bool, error) {
|
||||
if s.claimErr != nil {
|
||||
return domain.ScrapeTask{}, false, s.claimErr
|
||||
}
|
||||
if s.scrapeIdx >= len(s.scrapeQueue) {
|
||||
return domain.ScrapeTask{}, false, nil
|
||||
}
|
||||
t := s.scrapeQueue[s.scrapeIdx]
|
||||
s.scrapeIdx++
|
||||
return t, true, nil
|
||||
}
|
||||
|
||||
func (s *stubConsumer) ClaimNextAudioTask(_ context.Context, _ string) (domain.AudioTask, bool, error) {
|
||||
if s.claimErr != nil {
|
||||
return domain.AudioTask{}, false, s.claimErr
|
||||
}
|
||||
if s.audioIdx >= len(s.audioQueue) {
|
||||
return domain.AudioTask{}, false, nil
|
||||
}
|
||||
t := s.audioQueue[s.audioIdx]
|
||||
s.audioIdx++
|
||||
return t, true, nil
|
||||
}
|
||||
|
||||
func (s *stubConsumer) ClaimNextTranslationTask(_ context.Context, _ string) (domain.TranslationTask, bool, error) {
|
||||
return domain.TranslationTask{}, false, nil
|
||||
}
|
||||
|
||||
func (s *stubConsumer) FinishScrapeTask(_ context.Context, id string, _ domain.ScrapeResult) error {
|
||||
s.finished = append(s.finished, id)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *stubConsumer) FinishAudioTask(_ context.Context, id string, _ domain.AudioResult) error {
|
||||
s.finished = append(s.finished, id)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *stubConsumer) FinishTranslationTask(_ context.Context, id string, _ domain.TranslationResult) error {
|
||||
s.finished = append(s.finished, id)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *stubConsumer) FailTask(_ context.Context, id, _ string) error {
|
||||
s.failCalled = append(s.failCalled, id)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *stubConsumer) HeartbeatTask(_ context.Context, _ string) error { return nil }
|
||||
|
||||
func (s *stubConsumer) ReapStaleTasks(_ context.Context, _ time.Duration) (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// stubBookWriter satisfies bookstore.BookWriter (no-op).
|
||||
type stubBookWriter struct{}
|
||||
|
||||
func (s *stubBookWriter) WriteMetadata(_ context.Context, _ domain.BookMeta) error { return nil }
|
||||
func (s *stubBookWriter) WriteChapter(_ context.Context, _ string, _ domain.Chapter) error {
|
||||
return nil
|
||||
}
|
||||
func (s *stubBookWriter) WriteChapterRefs(_ context.Context, _ string, _ []domain.ChapterRef) error {
|
||||
return nil
|
||||
}
|
||||
func (s *stubBookWriter) ChapterExists(_ context.Context, _ string, _ domain.ChapterRef) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// stubBookReader satisfies bookstore.BookReader — returns a single chapter.
|
||||
type stubBookReader struct {
|
||||
text string
|
||||
readErr error
|
||||
}
|
||||
|
||||
func (s *stubBookReader) ReadChapter(_ context.Context, _ string, _ int) (string, error) {
|
||||
return s.text, s.readErr
|
||||
}
|
||||
func (s *stubBookReader) ReadMetadata(_ context.Context, _ string) (domain.BookMeta, bool, error) {
|
||||
return domain.BookMeta{}, false, nil
|
||||
}
|
||||
func (s *stubBookReader) ListBooks(_ context.Context) ([]domain.BookMeta, error) { return nil, nil }
|
||||
func (s *stubBookReader) LocalSlugs(_ context.Context) (map[string]bool, error) { return nil, nil }
|
||||
func (s *stubBookReader) MetadataMtime(_ context.Context, _ string) int64 { return 0 }
|
||||
func (s *stubBookReader) ListChapters(_ context.Context, _ string) ([]domain.ChapterInfo, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (s *stubBookReader) CountChapters(_ context.Context, _ string) int { return 0 }
|
||||
func (s *stubBookReader) ReindexChapters(_ context.Context, _ string) (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// stubAudioStore satisfies bookstore.AudioStore.
|
||||
type stubAudioStore struct {
|
||||
putCalled atomic.Int32
|
||||
putErr error
|
||||
}
|
||||
|
||||
func (s *stubAudioStore) AudioObjectKey(slug string, n int, voice string) string {
|
||||
return slug + "/" + string(rune('0'+n)) + "/" + voice + ".mp3"
|
||||
}
|
||||
func (s *stubAudioStore) AudioObjectKeyExt(slug string, n int, voice, ext string) string {
|
||||
return slug + "/" + string(rune('0'+n)) + "/" + voice + "." + ext
|
||||
}
|
||||
func (s *stubAudioStore) AudioExists(_ context.Context, _ string) bool { return false }
|
||||
func (s *stubAudioStore) PutAudio(_ context.Context, _ string, _ []byte) error {
|
||||
s.putCalled.Add(1)
|
||||
return s.putErr
|
||||
}
|
||||
func (s *stubAudioStore) PutAudioStream(_ context.Context, _ string, _ io.Reader, _ int64, _ string) error {
|
||||
s.putCalled.Add(1)
|
||||
return s.putErr
|
||||
}
|
||||
|
||||
// stubNovelScraper satisfies scraper.NovelScraper minimally.
|
||||
type stubNovelScraper struct {
|
||||
entries []domain.CatalogueEntry
|
||||
metaErr error
|
||||
chapters []domain.ChapterRef
|
||||
}
|
||||
|
||||
func (s *stubNovelScraper) ScrapeCatalogue(_ context.Context) (<-chan domain.CatalogueEntry, <-chan error) {
|
||||
ch := make(chan domain.CatalogueEntry, len(s.entries))
|
||||
errCh := make(chan error, 1)
|
||||
for _, e := range s.entries {
|
||||
ch <- e
|
||||
}
|
||||
close(ch)
|
||||
close(errCh)
|
||||
return ch, errCh
|
||||
}
|
||||
|
||||
func (s *stubNovelScraper) ScrapeMetadata(_ context.Context, _ string) (domain.BookMeta, error) {
|
||||
if s.metaErr != nil {
|
||||
return domain.BookMeta{}, s.metaErr
|
||||
}
|
||||
return domain.BookMeta{Slug: "test-book", Title: "Test Book", SourceURL: "https://example.com/book/test-book"}, nil
|
||||
}
|
||||
|
||||
func (s *stubNovelScraper) ScrapeChapterList(_ context.Context, _ string, _ int) ([]domain.ChapterRef, error) {
|
||||
return s.chapters, nil
|
||||
}
|
||||
|
||||
func (s *stubNovelScraper) ScrapeChapterText(_ context.Context, ref domain.ChapterRef) (domain.Chapter, error) {
|
||||
return domain.Chapter{Ref: ref, Text: "# Chapter\n\nSome text."}, nil
|
||||
}
|
||||
|
||||
func (s *stubNovelScraper) ScrapeRanking(_ context.Context, _ int) (<-chan domain.BookMeta, <-chan error) {
|
||||
ch := make(chan domain.BookMeta)
|
||||
errCh := make(chan error, 1)
|
||||
close(ch)
|
||||
close(errCh)
|
||||
return ch, errCh
|
||||
}
|
||||
|
||||
func (s *stubNovelScraper) SourceName() string { return "stub" }
|
||||
|
||||
// stubKokoro satisfies kokoro.Client.
|
||||
type stubKokoro struct {
|
||||
data []byte
|
||||
genErr error
|
||||
called atomic.Int32
|
||||
}
|
||||
|
||||
func (s *stubKokoro) GenerateAudio(_ context.Context, _, _ string) ([]byte, error) {
|
||||
s.called.Add(1)
|
||||
return s.data, s.genErr
|
||||
}
|
||||
|
||||
func (s *stubKokoro) StreamAudioMP3(_ context.Context, _, _ string) (io.ReadCloser, error) {
|
||||
s.called.Add(1)
|
||||
if s.genErr != nil {
|
||||
return nil, s.genErr
|
||||
}
|
||||
return io.NopCloser(bytes.NewReader(s.data)), nil
|
||||
}
|
||||
|
||||
func (s *stubKokoro) StreamAudioWAV(_ context.Context, _, _ string) (io.ReadCloser, error) {
|
||||
s.called.Add(1)
|
||||
if s.genErr != nil {
|
||||
return nil, s.genErr
|
||||
}
|
||||
return io.NopCloser(bytes.NewReader(s.data)), nil
|
||||
}
|
||||
|
||||
func (s *stubKokoro) ListVoices(_ context.Context) ([]string, error) {
|
||||
return []string{"af_bella"}, nil
|
||||
}
|
||||
|
||||
// ── stripMarkdown helper ──────────────────────────────────────────────────────
|
||||
|
||||
func TestStripMarkdownViaAudioTask(t *testing.T) {
|
||||
// Verify markdown is stripped before sending to Kokoro.
|
||||
// We inject chapter text with markdown; the kokoro stub verifies data flows.
|
||||
consumer := &stubConsumer{
|
||||
audioQueue: []domain.AudioTask{
|
||||
{ID: "a1", Slug: "book", Chapter: 1, Voice: "af_bella", Status: domain.TaskStatusRunning},
|
||||
},
|
||||
}
|
||||
bookReader := &stubBookReader{text: "## Chapter 1\n\nPlain **text** here."}
|
||||
audioStore := &stubAudioStore{}
|
||||
kokoroStub := &stubKokoro{data: []byte("mp3")}
|
||||
|
||||
cfg := runner.Config{
|
||||
WorkerID: "test",
|
||||
PollInterval: time.Hour, // long poll — we'll cancel manually
|
||||
}
|
||||
deps := runner.Dependencies{
|
||||
Consumer: consumer,
|
||||
BookWriter: &stubBookWriter{},
|
||||
BookReader: bookReader,
|
||||
AudioStore: audioStore,
|
||||
Novel: &stubNovelScraper{},
|
||||
Kokoro: kokoroStub,
|
||||
}
|
||||
|
||||
r := runner.New(cfg, deps)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
_ = r.Run(ctx)
|
||||
|
||||
if kokoroStub.called.Load() != 1 {
|
||||
t.Errorf("expected Kokoro.GenerateAudio called once, got %d", kokoroStub.called.Load())
|
||||
}
|
||||
if audioStore.putCalled.Load() != 1 {
|
||||
t.Errorf("expected PutAudio called once, got %d", audioStore.putCalled.Load())
|
||||
}
|
||||
}
|
||||
|
||||
func TestAudioTask_ReadChapterError(t *testing.T) {
|
||||
consumer := &stubConsumer{
|
||||
audioQueue: []domain.AudioTask{
|
||||
{ID: "a2", Slug: "book", Chapter: 2, Voice: "af_bella", Status: domain.TaskStatusRunning},
|
||||
},
|
||||
}
|
||||
bookReader := &stubBookReader{readErr: errors.New("chapter not found")}
|
||||
audioStore := &stubAudioStore{}
|
||||
kokoroStub := &stubKokoro{data: []byte("mp3")}
|
||||
|
||||
cfg := runner.Config{WorkerID: "test", PollInterval: time.Hour}
|
||||
deps := runner.Dependencies{
|
||||
Consumer: consumer,
|
||||
BookWriter: &stubBookWriter{},
|
||||
BookReader: bookReader,
|
||||
AudioStore: audioStore,
|
||||
Novel: &stubNovelScraper{},
|
||||
Kokoro: kokoroStub,
|
||||
}
|
||||
|
||||
r := runner.New(cfg, deps)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
_ = r.Run(ctx)
|
||||
|
||||
// Kokoro should not be called; FinishAudioTask should be called with error.
|
||||
if kokoroStub.called.Load() != 0 {
|
||||
t.Errorf("expected Kokoro not called, got %d", kokoroStub.called.Load())
|
||||
}
|
||||
if len(consumer.finished) != 1 {
|
||||
t.Errorf("expected FinishAudioTask called once, got %d", len(consumer.finished))
|
||||
}
|
||||
}
|
||||
|
||||
func TestAudioTask_KokoroError(t *testing.T) {
|
||||
consumer := &stubConsumer{
|
||||
audioQueue: []domain.AudioTask{
|
||||
{ID: "a3", Slug: "book", Chapter: 3, Voice: "af_bella", Status: domain.TaskStatusRunning},
|
||||
},
|
||||
}
|
||||
bookReader := &stubBookReader{text: "Chapter text."}
|
||||
audioStore := &stubAudioStore{}
|
||||
kokoroStub := &stubKokoro{genErr: errors.New("tts failed")}
|
||||
|
||||
cfg := runner.Config{WorkerID: "test", PollInterval: time.Hour}
|
||||
deps := runner.Dependencies{
|
||||
Consumer: consumer,
|
||||
BookWriter: &stubBookWriter{},
|
||||
BookReader: bookReader,
|
||||
AudioStore: audioStore,
|
||||
Novel: &stubNovelScraper{},
|
||||
Kokoro: kokoroStub,
|
||||
}
|
||||
|
||||
r := runner.New(cfg, deps)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
_ = r.Run(ctx)
|
||||
|
||||
if audioStore.putCalled.Load() != 0 {
|
||||
t.Errorf("expected PutAudio not called, got %d", audioStore.putCalled.Load())
|
||||
}
|
||||
if len(consumer.finished) != 1 {
|
||||
t.Errorf("expected FinishAudioTask called once, got %d", len(consumer.finished))
|
||||
}
|
||||
}
|
||||
|
||||
func TestScrapeTask_BookKind(t *testing.T) {
|
||||
consumer := &stubConsumer{
|
||||
scrapeQueue: []domain.ScrapeTask{
|
||||
{ID: "s1", Kind: "book", TargetURL: "https://example.com/book/test-book", Status: domain.TaskStatusRunning},
|
||||
},
|
||||
}
|
||||
|
||||
cfg := runner.Config{WorkerID: "test", PollInterval: time.Hour}
|
||||
deps := runner.Dependencies{
|
||||
Consumer: consumer,
|
||||
BookWriter: &stubBookWriter{},
|
||||
BookReader: &stubBookReader{},
|
||||
AudioStore: &stubAudioStore{},
|
||||
Novel: &stubNovelScraper{},
|
||||
Kokoro: &stubKokoro{},
|
||||
}
|
||||
|
||||
r := runner.New(cfg, deps)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
_ = r.Run(ctx)
|
||||
|
||||
if len(consumer.finished) != 1 || consumer.finished[0] != "s1" {
|
||||
t.Errorf("expected task s1 finished, got %v", consumer.finished)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScrapeTask_UnknownKind(t *testing.T) {
|
||||
consumer := &stubConsumer{
|
||||
scrapeQueue: []domain.ScrapeTask{
|
||||
{ID: "s2", Kind: "unknown_kind", Status: domain.TaskStatusRunning},
|
||||
},
|
||||
}
|
||||
|
||||
cfg := runner.Config{WorkerID: "test", PollInterval: time.Hour}
|
||||
deps := runner.Dependencies{
|
||||
Consumer: consumer,
|
||||
BookWriter: &stubBookWriter{},
|
||||
BookReader: &stubBookReader{},
|
||||
AudioStore: &stubAudioStore{},
|
||||
Novel: &stubNovelScraper{},
|
||||
Kokoro: &stubKokoro{},
|
||||
}
|
||||
|
||||
r := runner.New(cfg, deps)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
_ = r.Run(ctx)
|
||||
|
||||
// Unknown kind still finishes the task (with error message in result).
|
||||
if len(consumer.finished) != 1 || consumer.finished[0] != "s2" {
|
||||
t.Errorf("expected task s2 finished, got %v", consumer.finished)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRun_CancelImmediately(t *testing.T) {
|
||||
consumer := &stubConsumer{}
|
||||
cfg := runner.Config{WorkerID: "test", PollInterval: 10 * time.Millisecond}
|
||||
deps := runner.Dependencies{
|
||||
Consumer: consumer,
|
||||
BookWriter: &stubBookWriter{},
|
||||
BookReader: &stubBookReader{},
|
||||
AudioStore: &stubAudioStore{},
|
||||
Novel: &stubNovelScraper{},
|
||||
Kokoro: &stubKokoro{},
|
||||
}
|
||||
|
||||
r := runner.New(cfg, deps)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel() // cancel before Run
|
||||
|
||||
err := r.Run(ctx)
|
||||
if err != nil {
|
||||
t.Errorf("expected nil on graceful shutdown, got %v", err)
|
||||
}
|
||||
}
|
||||
97
backend/internal/runner/translation.go
Normal file
97
backend/internal/runner/translation.go
Normal file
@@ -0,0 +1,97 @@
|
||||
package runner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
)
|
||||
|
||||
// runTranslationTask executes one machine-translation task end-to-end and
|
||||
// reports the result back to PocketBase.
|
||||
func (r *Runner) runTranslationTask(ctx context.Context, task domain.TranslationTask) {
|
||||
ctx, span := otel.Tracer("runner").Start(ctx, "runner.translation_task")
|
||||
defer span.End()
|
||||
span.SetAttributes(
|
||||
attribute.String("task.id", task.ID),
|
||||
attribute.String("book.slug", task.Slug),
|
||||
attribute.Int("chapter.number", task.Chapter),
|
||||
attribute.String("translation.lang", task.Lang),
|
||||
)
|
||||
|
||||
log := r.deps.Log.With("task_id", task.ID, "slug", task.Slug, "chapter", task.Chapter, "lang", task.Lang)
|
||||
log.Info("runner: translation task starting")
|
||||
|
||||
// Heartbeat goroutine — keeps the task alive while translation runs.
|
||||
hbCtx, hbCancel := context.WithCancel(ctx)
|
||||
defer hbCancel()
|
||||
go func() {
|
||||
tick := time.NewTicker(r.cfg.HeartbeatInterval)
|
||||
defer tick.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-hbCtx.Done():
|
||||
return
|
||||
case <-tick.C:
|
||||
if err := r.deps.Consumer.HeartbeatTask(ctx, task.ID); err != nil {
|
||||
log.Warn("runner: heartbeat failed", "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
fail := func(msg string) {
|
||||
log.Error("runner: translation task failed", "reason", msg)
|
||||
r.tasksFailed.Add(1)
|
||||
span.SetStatus(codes.Error, msg)
|
||||
result := domain.TranslationResult{ErrorMessage: msg}
|
||||
if err := r.deps.Consumer.FinishTranslationTask(ctx, task.ID, result); err != nil {
|
||||
log.Error("runner: FinishTranslationTask failed", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Guard: LibreTranslate must be configured.
|
||||
if r.deps.LibreTranslate == nil {
|
||||
fail("libretranslate client not configured (LIBRETRANSLATE_URL is empty)")
|
||||
return
|
||||
}
|
||||
|
||||
// 1. Read raw markdown chapter.
|
||||
raw, err := r.deps.BookReader.ReadChapter(ctx, task.Slug, task.Chapter)
|
||||
if err != nil {
|
||||
fail(fmt.Sprintf("read chapter: %v", err))
|
||||
return
|
||||
}
|
||||
if raw == "" {
|
||||
fail("chapter text is empty")
|
||||
return
|
||||
}
|
||||
|
||||
// 2. Translate (chunked, concurrent).
|
||||
translated, err := r.deps.LibreTranslate.Translate(ctx, raw, "en", task.Lang)
|
||||
if err != nil {
|
||||
fail(fmt.Sprintf("translate: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
// 3. Store translated markdown in MinIO.
|
||||
key := r.deps.TranslationStore.TranslationObjectKey(task.Lang, task.Slug, task.Chapter)
|
||||
if err := r.deps.TranslationStore.PutTranslation(ctx, key, []byte(translated)); err != nil {
|
||||
fail(fmt.Sprintf("put translation: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
// 4. Report success.
|
||||
r.tasksCompleted.Add(1)
|
||||
span.SetStatus(codes.Ok, "")
|
||||
result := domain.TranslationResult{ObjectKey: key}
|
||||
if err := r.deps.Consumer.FinishTranslationTask(ctx, task.ID, result); err != nil {
|
||||
log.Error("runner: FinishTranslationTask failed", "err", err)
|
||||
}
|
||||
log.Info("runner: translation task finished", "key", key)
|
||||
}
|
||||
60
backend/internal/scraper/scraper.go
Normal file
60
backend/internal/scraper/scraper.go
Normal file
@@ -0,0 +1,60 @@
|
||||
// Package scraper defines the NovelScraper interface and its sub-interfaces.
|
||||
// Domain types live in internal/domain — this package only defines the scraping
|
||||
// contract so that novelfire and any future scrapers can be swapped freely.
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
)
|
||||
|
||||
// CatalogueProvider can enumerate every novel available on a source site.
|
||||
type CatalogueProvider interface {
|
||||
ScrapeCatalogue(ctx context.Context) (<-chan domain.CatalogueEntry, <-chan error)
|
||||
}
|
||||
|
||||
// MetadataProvider can extract structured book metadata from a novel's landing page.
|
||||
type MetadataProvider interface {
|
||||
ScrapeMetadata(ctx context.Context, bookURL string) (domain.BookMeta, error)
|
||||
}
|
||||
|
||||
// ChapterListProvider can enumerate all chapters of a book.
|
||||
// upTo > 0 stops pagination once at least upTo chapter numbers have been
|
||||
// collected (early-exit optimisation for range scrapes). upTo == 0 fetches all pages.
|
||||
type ChapterListProvider interface {
|
||||
ScrapeChapterList(ctx context.Context, bookURL string, upTo int) ([]domain.ChapterRef, error)
|
||||
}
|
||||
|
||||
// ChapterTextProvider can extract the readable text from a single chapter page.
|
||||
type ChapterTextProvider interface {
|
||||
ScrapeChapterText(ctx context.Context, ref domain.ChapterRef) (domain.Chapter, error)
|
||||
}
|
||||
|
||||
// RankingProvider can enumerate novels from a ranking page.
|
||||
type RankingProvider interface {
|
||||
// ScrapeRanking pages through up to maxPages ranking pages.
|
||||
// maxPages <= 0 means all pages.
|
||||
ScrapeRanking(ctx context.Context, maxPages int) (<-chan domain.BookMeta, <-chan error)
|
||||
}
|
||||
|
||||
// NovelScraper is the full interface a concrete novel source must implement.
|
||||
type NovelScraper interface {
|
||||
CatalogueProvider
|
||||
MetadataProvider
|
||||
ChapterListProvider
|
||||
ChapterTextProvider
|
||||
RankingProvider
|
||||
|
||||
// SourceName returns the human-readable name of this scraper, e.g. "novelfire.net".
|
||||
SourceName() string
|
||||
}
|
||||
|
||||
// Selector describes how to locate an element in an HTML document.
|
||||
type Selector struct {
|
||||
Tag string
|
||||
Class string
|
||||
ID string
|
||||
Attr string
|
||||
Multiple bool
|
||||
}
|
||||
267
backend/internal/storage/minio.go
Normal file
267
backend/internal/storage/minio.go
Normal file
@@ -0,0 +1,267 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/url"
|
||||
"path"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
minio "github.com/minio/minio-go/v7"
|
||||
"github.com/minio/minio-go/v7/pkg/credentials"
|
||||
|
||||
"github.com/libnovel/backend/internal/config"
|
||||
)
|
||||
|
||||
// minioClient wraps the official minio-go client with bucket names.
|
||||
type minioClient struct {
|
||||
client *minio.Client // internal — all read/write operations
|
||||
pubClient *minio.Client // presign-only — initialised against the public endpoint
|
||||
bucketChapters string
|
||||
bucketAudio string
|
||||
bucketAvatars string
|
||||
bucketBrowse string
|
||||
bucketTranslations string
|
||||
}
|
||||
|
||||
func newMinioClient(cfg config.MinIO) (*minioClient, error) {
|
||||
creds := credentials.NewStaticV4(cfg.AccessKey, cfg.SecretKey, "")
|
||||
|
||||
internal, err := minio.New(cfg.Endpoint, &minio.Options{
|
||||
Creds: creds,
|
||||
Secure: cfg.UseSSL,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("minio: init internal client: %w", err)
|
||||
}
|
||||
|
||||
// Presigned URLs must be signed with the hostname the browser will use
|
||||
// (PUBLIC_MINIO_PUBLIC_URL), because AWS Signature V4 includes the Host
|
||||
// header in the canonical request — a URL signed against "minio:9000" will
|
||||
// return SignatureDoesNotMatch when the browser fetches it from
|
||||
// "localhost:9000".
|
||||
//
|
||||
// However, minio-go normally makes a live BucketLocation HTTP call before
|
||||
// signing, which would fail from inside the container when the public
|
||||
// endpoint is externally-facing (e.g. "localhost:9000" is unreachable from
|
||||
// within Docker). We prevent this by:
|
||||
// 1. Setting Region: "us-east-1" — minio-go skips getBucketLocation when
|
||||
// the region is already known (bucket-cache.go:49).
|
||||
// 2. Setting BucketLookup: BucketLookupPath — forces path-style URLs
|
||||
// (e.g. host/bucket/key), matching MinIO's default behaviour and
|
||||
// avoiding any virtual-host DNS probing.
|
||||
//
|
||||
// When no public endpoint is configured (or it equals the internal one),
|
||||
// fall back to the internal client so presigning still works.
|
||||
publicEndpoint := cfg.PublicEndpoint
|
||||
if u, err2 := url.Parse(publicEndpoint); err2 == nil && u.Host != "" {
|
||||
publicEndpoint = u.Host // strip scheme so minio.New is happy
|
||||
}
|
||||
pubUseSSL := cfg.PublicUseSSL
|
||||
if publicEndpoint == "" || publicEndpoint == cfg.Endpoint {
|
||||
publicEndpoint = cfg.Endpoint
|
||||
pubUseSSL = cfg.UseSSL
|
||||
}
|
||||
pub, err := minio.New(publicEndpoint, &minio.Options{
|
||||
Creds: creds,
|
||||
Secure: pubUseSSL,
|
||||
Region: "us-east-1", // skip live BucketLocation preflight
|
||||
BucketLookup: minio.BucketLookupPath,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("minio: init public client: %w", err)
|
||||
}
|
||||
|
||||
return &minioClient{
|
||||
client: internal,
|
||||
pubClient: pub,
|
||||
bucketChapters: cfg.BucketChapters,
|
||||
bucketAudio: cfg.BucketAudio,
|
||||
bucketAvatars: cfg.BucketAvatars,
|
||||
bucketBrowse: cfg.BucketBrowse,
|
||||
bucketTranslations: cfg.BucketTranslations,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ensureBuckets creates all required buckets if they don't already exist.
|
||||
func (m *minioClient) ensureBuckets(ctx context.Context) error {
|
||||
for _, bucket := range []string{m.bucketChapters, m.bucketAudio, m.bucketAvatars, m.bucketBrowse, m.bucketTranslations} {
|
||||
exists, err := m.client.BucketExists(ctx, bucket)
|
||||
if err != nil {
|
||||
return fmt.Errorf("minio: check bucket %q: %w", bucket, err)
|
||||
}
|
||||
if !exists {
|
||||
if err := m.client.MakeBucket(ctx, bucket, minio.MakeBucketOptions{}); err != nil {
|
||||
return fmt.Errorf("minio: create bucket %q: %w", bucket, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ── Key helpers ───────────────────────────────────────────────────────────────
|
||||
|
||||
// ChapterObjectKey returns the MinIO object key for a chapter markdown file.
|
||||
// Format: {slug}/chapter-{n:06d}.md
|
||||
func ChapterObjectKey(slug string, n int) string {
|
||||
return fmt.Sprintf("%s/chapter-%06d.md", slug, n)
|
||||
}
|
||||
|
||||
// AudioObjectKeyExt returns the MinIO object key for a cached audio file
|
||||
// with a custom extension (e.g. "mp3" or "wav").
|
||||
// Format: {slug}/{n}/{voice}.{ext}
|
||||
func AudioObjectKeyExt(slug string, n int, voice, ext string) string {
|
||||
return fmt.Sprintf("%s/%d/%s.%s", slug, n, voice, ext)
|
||||
}
|
||||
|
||||
// AudioObjectKey returns the MinIO object key for a cached MP3 audio file.
|
||||
// Format: {slug}/{n}/{voice}.mp3
|
||||
func AudioObjectKey(slug string, n int, voice string) string {
|
||||
return AudioObjectKeyExt(slug, n, voice, "mp3")
|
||||
}
|
||||
|
||||
// AvatarObjectKey returns the MinIO object key for a user avatar image.
|
||||
// Format: {userID}/{ext}.{ext}
|
||||
func AvatarObjectKey(userID, ext string) string {
|
||||
return fmt.Sprintf("%s/%s.%s", userID, ext, ext)
|
||||
}
|
||||
|
||||
// CoverObjectKey returns the MinIO object key for a book cover image.
|
||||
// Format: covers/{slug}.jpg
|
||||
func CoverObjectKey(slug string) string {
|
||||
return fmt.Sprintf("covers/%s.jpg", slug)
|
||||
}
|
||||
|
||||
// TranslationObjectKey returns the MinIO object key for a translated chapter.
|
||||
// Format: {lang}/{slug}/{n:06d}.md
|
||||
func TranslationObjectKey(lang, slug string, n int) string {
|
||||
return fmt.Sprintf("%s/%s/%06d.md", lang, slug, n)
|
||||
}
|
||||
|
||||
// chapterNumberFromKey extracts the chapter number from a MinIO object key.
|
||||
// e.g. "my-book/chapter-000042.md" → 42
|
||||
func chapterNumberFromKey(key string) int {
|
||||
base := path.Base(key)
|
||||
base = strings.TrimPrefix(base, "chapter-")
|
||||
base = strings.TrimSuffix(base, ".md")
|
||||
var n int
|
||||
fmt.Sscanf(base, "%d", &n)
|
||||
return n
|
||||
}
|
||||
|
||||
// ── Object operations ─────────────────────────────────────────────────────────
|
||||
|
||||
func (m *minioClient) putObject(ctx context.Context, bucket, key, contentType string, data []byte) error {
|
||||
_, err := m.client.PutObject(ctx, bucket, key,
|
||||
strings.NewReader(string(data)),
|
||||
int64(len(data)),
|
||||
minio.PutObjectOptions{ContentType: contentType},
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
// putObjectStream uploads from r with known size (or -1 for multipart).
|
||||
func (m *minioClient) putObjectStream(ctx context.Context, bucket, key, contentType string, r io.Reader, size int64) error {
|
||||
_, err := m.client.PutObject(ctx, bucket, key, r, size,
|
||||
minio.PutObjectOptions{ContentType: contentType},
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *minioClient) getObject(ctx context.Context, bucket, key string) ([]byte, error) {
|
||||
obj, err := m.client.GetObject(ctx, bucket, key, minio.GetObjectOptions{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer obj.Close()
|
||||
return io.ReadAll(obj)
|
||||
}
|
||||
|
||||
func (m *minioClient) objectExists(ctx context.Context, bucket, key string) bool {
|
||||
_, err := m.client.StatObject(ctx, bucket, key, minio.StatObjectOptions{})
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func (m *minioClient) presignGet(ctx context.Context, bucket, key string, expires time.Duration) (string, error) {
|
||||
u, err := m.pubClient.PresignedGetObject(ctx, bucket, key, expires, nil)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("minio presign %s/%s: %w", bucket, key, err)
|
||||
}
|
||||
return u.String(), nil
|
||||
}
|
||||
|
||||
func (m *minioClient) presignPut(ctx context.Context, bucket, key string, expires time.Duration) (string, error) {
|
||||
u, err := m.pubClient.PresignedPutObject(ctx, bucket, key, expires)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("minio presign PUT %s/%s: %w", bucket, key, err)
|
||||
}
|
||||
return u.String(), nil
|
||||
}
|
||||
|
||||
func (m *minioClient) deleteObjects(ctx context.Context, bucket, prefix string) error {
|
||||
objCh := m.client.ListObjects(ctx, bucket, minio.ListObjectsOptions{Prefix: prefix})
|
||||
for obj := range objCh {
|
||||
if obj.Err != nil {
|
||||
return obj.Err
|
||||
}
|
||||
if err := m.client.RemoveObject(ctx, bucket, obj.Key, minio.RemoveObjectOptions{}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *minioClient) listObjectKeys(ctx context.Context, bucket, prefix string) ([]string, error) {
|
||||
var keys []string
|
||||
for obj := range m.client.ListObjects(ctx, bucket, minio.ListObjectsOptions{Prefix: prefix}) {
|
||||
if obj.Err != nil {
|
||||
return nil, obj.Err
|
||||
}
|
||||
keys = append(keys, obj.Key)
|
||||
}
|
||||
return keys, nil
|
||||
}
|
||||
|
||||
// ── Cover operations ──────────────────────────────────────────────────────────
|
||||
|
||||
// putCover stores a raw cover image in the browse bucket under covers/{slug}.jpg.
|
||||
func (m *minioClient) putCover(ctx context.Context, key, contentType string, data []byte) error {
|
||||
return m.putObject(ctx, m.bucketBrowse, key, contentType, data)
|
||||
}
|
||||
|
||||
// getCover retrieves a cover image. Returns (nil, "", false, nil) when the
|
||||
// object does not exist.
|
||||
func (m *minioClient) getCover(ctx context.Context, key string) ([]byte, bool, error) {
|
||||
if !m.objectExists(ctx, m.bucketBrowse, key) {
|
||||
return nil, false, nil
|
||||
}
|
||||
data, err := m.getObject(ctx, m.bucketBrowse, key)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
return data, true, nil
|
||||
}
|
||||
|
||||
// coverExists returns true when the cover image object exists.
|
||||
func (m *minioClient) coverExists(ctx context.Context, key string) bool {
|
||||
return m.objectExists(ctx, m.bucketBrowse, key)
|
||||
}
|
||||
|
||||
// coverContentType inspects the first bytes of data to determine if it is
|
||||
// a JPEG or PNG image. Falls back to "image/jpeg".
|
||||
func coverContentType(data []byte) string {
|
||||
if len(data) >= 4 {
|
||||
// PNG magic: 0x89 0x50 0x4E 0x47
|
||||
if data[0] == 0x89 && data[1] == 0x50 && data[2] == 0x4E && data[3] == 0x47 {
|
||||
return "image/png"
|
||||
}
|
||||
// WebP: starts with "RIFF" at 0..3 and "WEBP" at 8..11
|
||||
if len(data) >= 12 && data[0] == 'R' && data[1] == 'I' && data[2] == 'F' && data[3] == 'F' &&
|
||||
data[8] == 'W' && data[9] == 'E' && data[10] == 'B' && data[11] == 'P' {
|
||||
return "image/webp"
|
||||
}
|
||||
}
|
||||
return "image/jpeg"
|
||||
}
|
||||
274
backend/internal/storage/pocketbase.go
Normal file
274
backend/internal/storage/pocketbase.go
Normal file
@@ -0,0 +1,274 @@
|
||||
// Package storage provides the concrete implementations of all bookstore and
|
||||
// taskqueue interfaces backed by PocketBase (structured data) and MinIO (blobs).
|
||||
//
|
||||
// Entry point: NewStore(ctx, cfg, log) returns a *Store that satisfies every
|
||||
// interface defined in bookstore and taskqueue.
|
||||
package storage
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/backend/internal/config"
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
)
|
||||
|
||||
// ErrNotFound is returned by single-record lookups when no record exists.
|
||||
var ErrNotFound = errors.New("storage: record not found")
|
||||
|
||||
// pbHTTPClient is a shared HTTP client with a 30 s timeout so that a slow or
|
||||
// hung PocketBase never stalls the backend/runner process indefinitely.
|
||||
// http.DefaultClient has no timeout and must not be used for PocketBase calls.
|
||||
var pbHTTPClient = &http.Client{Timeout: 30 * time.Second}
|
||||
|
||||
// pbClient is the internal PocketBase REST admin client.
|
||||
type pbClient struct {
|
||||
baseURL string
|
||||
email string
|
||||
password string
|
||||
log *slog.Logger
|
||||
|
||||
mu sync.Mutex
|
||||
token string
|
||||
exp time.Time
|
||||
}
|
||||
|
||||
func newPBClient(cfg config.PocketBase, log *slog.Logger) *pbClient {
|
||||
return &pbClient{
|
||||
baseURL: strings.TrimRight(cfg.URL, "/"),
|
||||
email: cfg.AdminEmail,
|
||||
password: cfg.AdminPassword,
|
||||
log: log,
|
||||
}
|
||||
}
|
||||
|
||||
// authToken returns a valid admin auth token, refreshing it when expired.
|
||||
func (c *pbClient) authToken(ctx context.Context) (string, error) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
if c.token != "" && time.Now().Before(c.exp) {
|
||||
return c.token, nil
|
||||
}
|
||||
|
||||
body, _ := json.Marshal(map[string]string{
|
||||
"identity": c.email,
|
||||
"password": c.password,
|
||||
})
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost,
|
||||
c.baseURL+"/api/collections/_superusers/auth-with-password", bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("pb auth: build request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := pbHTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("pb auth: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
raw, _ := io.ReadAll(resp.Body)
|
||||
return "", fmt.Errorf("pb auth: status %d: %s", resp.StatusCode, string(raw))
|
||||
}
|
||||
|
||||
var payload struct {
|
||||
Token string `json:"token"`
|
||||
}
|
||||
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
|
||||
return "", fmt.Errorf("pb auth: decode: %w", err)
|
||||
}
|
||||
c.token = payload.Token
|
||||
c.exp = time.Now().Add(30 * time.Minute)
|
||||
return c.token, nil
|
||||
}
|
||||
|
||||
// do executes an authenticated PocketBase REST request.
|
||||
func (c *pbClient) do(ctx context.Context, method, path string, body io.Reader) (*http.Response, error) {
|
||||
tok, err := c.authToken(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, method, c.baseURL+path, body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("pb: build request %s %s: %w", method, path, err)
|
||||
}
|
||||
req.Header.Set("Authorization", tok)
|
||||
if body != nil {
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
}
|
||||
|
||||
resp, err := pbHTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("pb: %s %s: %w", method, path, err)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// get is a convenience wrapper that decodes a JSON response into v.
|
||||
func (c *pbClient) get(ctx context.Context, path string, v any) error {
|
||||
resp, err := c.do(ctx, http.MethodGet, path, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusNotFound {
|
||||
return ErrNotFound
|
||||
}
|
||||
if resp.StatusCode >= 400 {
|
||||
raw, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("pb GET %s: status %d: %s", path, resp.StatusCode, string(raw))
|
||||
}
|
||||
return json.NewDecoder(resp.Body).Decode(v)
|
||||
}
|
||||
|
||||
// post creates a record and decodes the created record into v.
|
||||
func (c *pbClient) post(ctx context.Context, path string, payload, v any) error {
|
||||
b, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return fmt.Errorf("pb: marshal: %w", err)
|
||||
}
|
||||
resp, err := c.do(ctx, http.MethodPost, path, bytes.NewReader(b))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 400 {
|
||||
raw, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("pb POST %s: status %d: %s", path, resp.StatusCode, string(raw))
|
||||
}
|
||||
if v != nil {
|
||||
return json.NewDecoder(resp.Body).Decode(v)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// patch updates a record.
|
||||
func (c *pbClient) patch(ctx context.Context, path string, payload any) error {
|
||||
b, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return fmt.Errorf("pb: marshal: %w", err)
|
||||
}
|
||||
resp, err := c.do(ctx, http.MethodPatch, path, bytes.NewReader(b))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 400 {
|
||||
raw, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("pb PATCH %s: status %d: %s", path, resp.StatusCode, string(raw))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// delete removes a record.
|
||||
func (c *pbClient) delete(ctx context.Context, path string) error {
|
||||
resp, err := c.do(ctx, http.MethodDelete, path, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusNotFound {
|
||||
return ErrNotFound
|
||||
}
|
||||
if resp.StatusCode >= 400 {
|
||||
raw, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("pb DELETE %s: status %d: %s", path, resp.StatusCode, string(raw))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// listAll fetches all pages of a collection. PocketBase returns at most 200
|
||||
// records per page; we paginate until empty.
|
||||
func (c *pbClient) listAll(ctx context.Context, collection string, filter, sort string) ([]json.RawMessage, error) {
|
||||
var all []json.RawMessage
|
||||
page := 1
|
||||
for {
|
||||
q := url.Values{
|
||||
"page": {fmt.Sprintf("%d", page)},
|
||||
"perPage": {"200"},
|
||||
}
|
||||
if filter != "" {
|
||||
q.Set("filter", filter)
|
||||
}
|
||||
if sort != "" {
|
||||
q.Set("sort", sort)
|
||||
}
|
||||
path := fmt.Sprintf("/api/collections/%s/records?%s", collection, q.Encode())
|
||||
|
||||
var result struct {
|
||||
Items []json.RawMessage `json:"items"`
|
||||
Page int `json:"page"`
|
||||
Pages int `json:"totalPages"`
|
||||
}
|
||||
if err := c.get(ctx, path, &result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
all = append(all, result.Items...)
|
||||
if result.Page >= result.Pages {
|
||||
break
|
||||
}
|
||||
page++
|
||||
}
|
||||
return all, nil
|
||||
}
|
||||
|
||||
// claimRecord atomically claims the first pending record matching collection.
|
||||
// It fetches the oldest pending record (filter + sort), then PATCHes it with
|
||||
// the claim payload. Returns (nil, nil) when the queue is empty.
|
||||
func (c *pbClient) claimRecord(ctx context.Context, collection, workerID string, extraClaim map[string]any) (json.RawMessage, error) {
|
||||
q := url.Values{}
|
||||
q.Set("filter", `status="pending"`)
|
||||
q.Set("sort", "+started")
|
||||
q.Set("perPage", "1")
|
||||
path := fmt.Sprintf("/api/collections/%s/records?%s", collection, q.Encode())
|
||||
|
||||
var result struct {
|
||||
Items []json.RawMessage `json:"items"`
|
||||
}
|
||||
if err := c.get(ctx, path, &result); err != nil {
|
||||
return nil, fmt.Errorf("claimRecord list: %w", err)
|
||||
}
|
||||
if len(result.Items) == 0 {
|
||||
return nil, nil // queue empty
|
||||
}
|
||||
|
||||
var rec struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
if err := json.Unmarshal(result.Items[0], &rec); err != nil {
|
||||
return nil, fmt.Errorf("claimRecord parse id: %w", err)
|
||||
}
|
||||
|
||||
claim := map[string]any{
|
||||
"status": string(domain.TaskStatusRunning),
|
||||
"worker_id": workerID,
|
||||
"heartbeat_at": time.Now().UTC().Format(time.RFC3339),
|
||||
}
|
||||
for k, v := range extraClaim {
|
||||
claim[k] = v
|
||||
}
|
||||
|
||||
claimPath := fmt.Sprintf("/api/collections/%s/records/%s", collection, rec.ID)
|
||||
if err := c.patch(ctx, claimPath, claim); err != nil {
|
||||
return nil, fmt.Errorf("claimRecord patch: %w", err)
|
||||
}
|
||||
|
||||
// Re-fetch the updated record so caller has current state.
|
||||
var updated json.RawMessage
|
||||
if err := c.get(ctx, claimPath, &updated); err != nil {
|
||||
return nil, fmt.Errorf("claimRecord re-fetch: %w", err)
|
||||
}
|
||||
return updated, nil
|
||||
}
|
||||
996
backend/internal/storage/store.go
Normal file
996
backend/internal/storage/store.go
Normal file
@@ -0,0 +1,996 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/backend/internal/bookstore"
|
||||
"github.com/libnovel/backend/internal/config"
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
"github.com/libnovel/backend/internal/taskqueue"
|
||||
)
|
||||
|
||||
// Store is the unified persistence implementation that satisfies all bookstore
|
||||
// and taskqueue interfaces. It routes structured data to PocketBase and binary
|
||||
// blobs to MinIO.
|
||||
type Store struct {
|
||||
pb *pbClient
|
||||
mc *minioClient
|
||||
log *slog.Logger
|
||||
}
|
||||
|
||||
// NewStore initialises PocketBase and MinIO connections and ensures all MinIO
|
||||
// buckets exist. Returns a ready-to-use Store.
|
||||
func NewStore(ctx context.Context, cfg config.Config, log *slog.Logger) (*Store, error) {
|
||||
pb := newPBClient(cfg.PocketBase, log)
|
||||
// Validate PocketBase connectivity by fetching an auth token.
|
||||
if _, err := pb.authToken(ctx); err != nil {
|
||||
return nil, fmt.Errorf("pocketbase: %w", err)
|
||||
}
|
||||
|
||||
mc, err := newMinioClient(cfg.MinIO)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("minio: %w", err)
|
||||
}
|
||||
if err := mc.ensureBuckets(ctx); err != nil {
|
||||
return nil, fmt.Errorf("minio: ensure buckets: %w", err)
|
||||
}
|
||||
|
||||
return &Store{pb: pb, mc: mc, log: log}, nil
|
||||
}
|
||||
|
||||
// Compile-time interface satisfaction.
|
||||
var _ bookstore.BookWriter = (*Store)(nil)
|
||||
var _ bookstore.BookReader = (*Store)(nil)
|
||||
var _ bookstore.RankingStore = (*Store)(nil)
|
||||
var _ bookstore.AudioStore = (*Store)(nil)
|
||||
var _ bookstore.PresignStore = (*Store)(nil)
|
||||
var _ bookstore.ProgressStore = (*Store)(nil)
|
||||
var _ bookstore.CoverStore = (*Store)(nil)
|
||||
var _ bookstore.TranslationStore = (*Store)(nil)
|
||||
var _ taskqueue.Producer = (*Store)(nil)
|
||||
var _ taskqueue.Consumer = (*Store)(nil)
|
||||
var _ taskqueue.Reader = (*Store)(nil)
|
||||
|
||||
// ── BookWriter ────────────────────────────────────────────────────────────────
|
||||
|
||||
func (s *Store) WriteMetadata(ctx context.Context, meta domain.BookMeta) error {
|
||||
payload := map[string]any{
|
||||
"slug": meta.Slug,
|
||||
"title": meta.Title,
|
||||
"author": meta.Author,
|
||||
"cover": meta.Cover,
|
||||
"status": meta.Status,
|
||||
"genres": meta.Genres,
|
||||
"summary": meta.Summary,
|
||||
"total_chapters": meta.TotalChapters,
|
||||
"source_url": meta.SourceURL,
|
||||
"ranking": meta.Ranking,
|
||||
"rating": meta.Rating,
|
||||
}
|
||||
// Upsert via filter: if exists PATCH, otherwise POST.
|
||||
// Use a conflict-retry pattern to handle concurrent scrapes racing to insert
|
||||
// the same slug: if POST fails (or another concurrent writer beat us to it),
|
||||
// re-fetch and PATCH instead.
|
||||
existing, err := s.getBookBySlug(ctx, meta.Slug)
|
||||
if err != nil && err != ErrNotFound {
|
||||
return fmt.Errorf("WriteMetadata: %w", err)
|
||||
}
|
||||
if err == ErrNotFound {
|
||||
postErr := s.pb.post(ctx, "/api/collections/books/records", payload, nil)
|
||||
if postErr == nil {
|
||||
return nil
|
||||
}
|
||||
// POST failed — a concurrent writer may have inserted the same slug.
|
||||
// Re-fetch and fall through to PATCH.
|
||||
existing, err = s.getBookBySlug(ctx, meta.Slug)
|
||||
if err != nil {
|
||||
return postErr // original POST error is more informative
|
||||
}
|
||||
}
|
||||
return s.pb.patch(ctx, fmt.Sprintf("/api/collections/books/records/%s", existing.ID), payload)
|
||||
}
|
||||
|
||||
func (s *Store) WriteChapter(ctx context.Context, slug string, chapter domain.Chapter) error {
|
||||
key := ChapterObjectKey(slug, chapter.Ref.Number)
|
||||
if err := s.mc.putObject(ctx, s.mc.bucketChapters, key, "text/markdown", []byte(chapter.Text)); err != nil {
|
||||
return fmt.Errorf("WriteChapter: minio: %w", err)
|
||||
}
|
||||
// Upsert the chapters_idx record in PocketBase.
|
||||
return s.upsertChapterIdx(ctx, slug, chapter.Ref)
|
||||
}
|
||||
|
||||
func (s *Store) WriteChapterRefs(ctx context.Context, slug string, refs []domain.ChapterRef) error {
|
||||
for _, ref := range refs {
|
||||
if err := s.upsertChapterIdx(ctx, slug, ref); err != nil {
|
||||
s.log.Warn("WriteChapterRefs: upsert failed", "slug", slug, "chapter", ref.Number, "err", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Store) ChapterExists(ctx context.Context, slug string, ref domain.ChapterRef) bool {
|
||||
return s.mc.objectExists(ctx, s.mc.bucketChapters, ChapterObjectKey(slug, ref.Number))
|
||||
}
|
||||
|
||||
func (s *Store) upsertChapterIdx(ctx context.Context, slug string, ref domain.ChapterRef) error {
|
||||
payload := map[string]any{
|
||||
"slug": slug,
|
||||
"number": ref.Number,
|
||||
"title": ref.Title,
|
||||
}
|
||||
filter := fmt.Sprintf(`slug=%q&&number=%d`, slug, ref.Number)
|
||||
items, err := s.pb.listAll(ctx, "chapters_idx", filter, "")
|
||||
if err != nil && err != ErrNotFound {
|
||||
return err
|
||||
}
|
||||
if len(items) == 0 {
|
||||
return s.pb.post(ctx, "/api/collections/chapters_idx/records", payload, nil)
|
||||
}
|
||||
var rec struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
json.Unmarshal(items[0], &rec)
|
||||
return s.pb.patch(ctx, fmt.Sprintf("/api/collections/chapters_idx/records/%s", rec.ID), payload)
|
||||
}
|
||||
|
||||
// ── BookReader ────────────────────────────────────────────────────────────────
|
||||
|
||||
type pbBook struct {
|
||||
ID string `json:"id"`
|
||||
Slug string `json:"slug"`
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author"`
|
||||
Cover string `json:"cover"`
|
||||
Status string `json:"status"`
|
||||
Genres []string `json:"genres"`
|
||||
Summary string `json:"summary"`
|
||||
TotalChapters int `json:"total_chapters"`
|
||||
SourceURL string `json:"source_url"`
|
||||
Ranking int `json:"ranking"`
|
||||
Rating float64 `json:"rating"`
|
||||
Updated string `json:"updated"`
|
||||
}
|
||||
|
||||
func (b pbBook) toDomain() domain.BookMeta {
|
||||
var metaUpdated int64
|
||||
if t, err := time.Parse(time.RFC3339, b.Updated); err == nil {
|
||||
metaUpdated = t.Unix()
|
||||
}
|
||||
return domain.BookMeta{
|
||||
Slug: b.Slug,
|
||||
Title: b.Title,
|
||||
Author: b.Author,
|
||||
Cover: b.Cover,
|
||||
Status: b.Status,
|
||||
Genres: b.Genres,
|
||||
Summary: b.Summary,
|
||||
TotalChapters: b.TotalChapters,
|
||||
SourceURL: b.SourceURL,
|
||||
Ranking: b.Ranking,
|
||||
Rating: b.Rating,
|
||||
MetaUpdated: metaUpdated,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Store) getBookBySlug(ctx context.Context, slug string) (pbBook, error) {
|
||||
filter := fmt.Sprintf(`slug=%q`, slug)
|
||||
items, err := s.pb.listAll(ctx, "books", filter, "")
|
||||
if err != nil {
|
||||
return pbBook{}, err
|
||||
}
|
||||
if len(items) == 0 {
|
||||
return pbBook{}, ErrNotFound
|
||||
}
|
||||
var b pbBook
|
||||
json.Unmarshal(items[0], &b)
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (s *Store) ReadMetadata(ctx context.Context, slug string) (domain.BookMeta, bool, error) {
|
||||
b, err := s.getBookBySlug(ctx, slug)
|
||||
if err == ErrNotFound {
|
||||
return domain.BookMeta{}, false, nil
|
||||
}
|
||||
if err != nil {
|
||||
return domain.BookMeta{}, false, err
|
||||
}
|
||||
return b.toDomain(), true, nil
|
||||
}
|
||||
|
||||
func (s *Store) ListBooks(ctx context.Context) ([]domain.BookMeta, error) {
|
||||
items, err := s.pb.listAll(ctx, "books", "", "title")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
books := make([]domain.BookMeta, 0, len(items))
|
||||
for _, raw := range items {
|
||||
var b pbBook
|
||||
json.Unmarshal(raw, &b)
|
||||
books = append(books, b.toDomain())
|
||||
}
|
||||
return books, nil
|
||||
}
|
||||
|
||||
func (s *Store) LocalSlugs(ctx context.Context) (map[string]bool, error) {
|
||||
items, err := s.pb.listAll(ctx, "books", "", "")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
slugs := make(map[string]bool, len(items))
|
||||
for _, raw := range items {
|
||||
var b struct {
|
||||
Slug string `json:"slug"`
|
||||
}
|
||||
json.Unmarshal(raw, &b)
|
||||
if b.Slug != "" {
|
||||
slugs[b.Slug] = true
|
||||
}
|
||||
}
|
||||
return slugs, nil
|
||||
}
|
||||
|
||||
func (s *Store) MetadataMtime(ctx context.Context, slug string) int64 {
|
||||
b, err := s.getBookBySlug(ctx, slug)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
t, err := time.Parse(time.RFC3339, b.Updated)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return t.Unix()
|
||||
}
|
||||
|
||||
func (s *Store) ReadChapter(ctx context.Context, slug string, n int) (string, error) {
|
||||
data, err := s.mc.getObject(ctx, s.mc.bucketChapters, ChapterObjectKey(slug, n))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("ReadChapter: %w", err)
|
||||
}
|
||||
return string(data), nil
|
||||
}
|
||||
|
||||
func (s *Store) ListChapters(ctx context.Context, slug string) ([]domain.ChapterInfo, error) {
|
||||
filter := fmt.Sprintf(`slug=%q`, slug)
|
||||
items, err := s.pb.listAll(ctx, "chapters_idx", filter, "number")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
chapters := make([]domain.ChapterInfo, 0, len(items))
|
||||
for _, raw := range items {
|
||||
var rec struct {
|
||||
Number int `json:"number"`
|
||||
Title string `json:"title"`
|
||||
}
|
||||
json.Unmarshal(raw, &rec)
|
||||
chapters = append(chapters, domain.ChapterInfo{Number: rec.Number, Title: rec.Title})
|
||||
}
|
||||
return chapters, nil
|
||||
}
|
||||
|
||||
func (s *Store) CountChapters(ctx context.Context, slug string) int {
|
||||
chapters, err := s.ListChapters(ctx, slug)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return len(chapters)
|
||||
}
|
||||
|
||||
func (s *Store) ReindexChapters(ctx context.Context, slug string) (int, error) {
|
||||
keys, err := s.mc.listObjectKeys(ctx, s.mc.bucketChapters, slug+"/")
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("ReindexChapters: list objects: %w", err)
|
||||
}
|
||||
count := 0
|
||||
for _, key := range keys {
|
||||
if !strings.HasSuffix(key, ".md") {
|
||||
continue
|
||||
}
|
||||
n := chapterNumberFromKey(key)
|
||||
if n == 0 {
|
||||
continue
|
||||
}
|
||||
ref := domain.ChapterRef{Number: n}
|
||||
if err := s.upsertChapterIdx(ctx, slug, ref); err != nil {
|
||||
s.log.Warn("ReindexChapters: upsert failed", "key", key, "err", err)
|
||||
continue
|
||||
}
|
||||
count++
|
||||
}
|
||||
return count, nil
|
||||
}
|
||||
|
||||
// ── RankingStore ──────────────────────────────────────────────────────────────
|
||||
|
||||
func (s *Store) WriteRankingItem(ctx context.Context, item domain.RankingItem) error {
|
||||
payload := map[string]any{
|
||||
"rank": item.Rank,
|
||||
"slug": item.Slug,
|
||||
"title": item.Title,
|
||||
"author": item.Author,
|
||||
"cover": item.Cover,
|
||||
"status": item.Status,
|
||||
"genres": item.Genres,
|
||||
"source_url": item.SourceURL,
|
||||
}
|
||||
filter := fmt.Sprintf(`slug=%q`, item.Slug)
|
||||
items, err := s.pb.listAll(ctx, "ranking", filter, "")
|
||||
if err != nil && err != ErrNotFound {
|
||||
return err
|
||||
}
|
||||
if len(items) == 0 {
|
||||
return s.pb.post(ctx, "/api/collections/ranking/records", payload, nil)
|
||||
}
|
||||
var rec struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
json.Unmarshal(items[0], &rec)
|
||||
return s.pb.patch(ctx, fmt.Sprintf("/api/collections/ranking/records/%s", rec.ID), payload)
|
||||
}
|
||||
|
||||
func (s *Store) ReadRankingItems(ctx context.Context) ([]domain.RankingItem, error) {
|
||||
items, err := s.pb.listAll(ctx, "ranking", "", "rank")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result := make([]domain.RankingItem, 0, len(items))
|
||||
for _, raw := range items {
|
||||
var rec struct {
|
||||
Rank int `json:"rank"`
|
||||
Slug string `json:"slug"`
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author"`
|
||||
Cover string `json:"cover"`
|
||||
Status string `json:"status"`
|
||||
Genres []string `json:"genres"`
|
||||
SourceURL string `json:"source_url"`
|
||||
Updated string `json:"updated"`
|
||||
}
|
||||
json.Unmarshal(raw, &rec)
|
||||
t, _ := time.Parse(time.RFC3339, rec.Updated)
|
||||
result = append(result, domain.RankingItem{
|
||||
Rank: rec.Rank,
|
||||
Slug: rec.Slug,
|
||||
Title: rec.Title,
|
||||
Author: rec.Author,
|
||||
Cover: rec.Cover,
|
||||
Status: rec.Status,
|
||||
Genres: rec.Genres,
|
||||
SourceURL: rec.SourceURL,
|
||||
Updated: t,
|
||||
})
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (s *Store) RankingFreshEnough(ctx context.Context, maxAge time.Duration) (bool, error) {
|
||||
items, err := s.ReadRankingItems(ctx)
|
||||
if err != nil || len(items) == 0 {
|
||||
return false, err
|
||||
}
|
||||
var latest time.Time
|
||||
for _, item := range items {
|
||||
if item.Updated.After(latest) {
|
||||
latest = item.Updated
|
||||
}
|
||||
}
|
||||
return time.Since(latest) < maxAge, nil
|
||||
}
|
||||
|
||||
// ── AudioStore ────────────────────────────────────────────────────────────────
|
||||
|
||||
func (s *Store) AudioObjectKey(slug string, n int, voice string) string {
|
||||
return AudioObjectKey(slug, n, voice)
|
||||
}
|
||||
|
||||
func (s *Store) AudioObjectKeyExt(slug string, n int, voice, ext string) string {
|
||||
return AudioObjectKeyExt(slug, n, voice, ext)
|
||||
}
|
||||
|
||||
func (s *Store) AudioExists(ctx context.Context, key string) bool {
|
||||
return s.mc.objectExists(ctx, s.mc.bucketAudio, key)
|
||||
}
|
||||
|
||||
func (s *Store) PutAudio(ctx context.Context, key string, data []byte) error {
|
||||
return s.mc.putObject(ctx, s.mc.bucketAudio, key, "audio/mpeg", data)
|
||||
}
|
||||
|
||||
func (s *Store) PutAudioStream(ctx context.Context, key string, r io.Reader, size int64, contentType string) error {
|
||||
return s.mc.putObjectStream(ctx, s.mc.bucketAudio, key, contentType, r, size)
|
||||
}
|
||||
|
||||
// ── PresignStore ──────────────────────────────────────────────────────────────
|
||||
|
||||
func (s *Store) PresignChapter(ctx context.Context, slug string, n int, expires time.Duration) (string, error) {
|
||||
return s.mc.presignGet(ctx, s.mc.bucketChapters, ChapterObjectKey(slug, n), expires)
|
||||
}
|
||||
|
||||
func (s *Store) PresignAudio(ctx context.Context, key string, expires time.Duration) (string, error) {
|
||||
return s.mc.presignGet(ctx, s.mc.bucketAudio, key, expires)
|
||||
}
|
||||
|
||||
func (s *Store) PresignAvatarUpload(ctx context.Context, userID, ext string) (uploadURL, key string, err error) {
|
||||
key = AvatarObjectKey(userID, ext)
|
||||
uploadURL, err = s.mc.presignPut(ctx, s.mc.bucketAvatars, key, 15*time.Minute)
|
||||
return
|
||||
}
|
||||
|
||||
func (s *Store) PresignAvatarURL(ctx context.Context, userID string) (string, bool, error) {
|
||||
for _, ext := range []string{"jpg", "png", "webp"} {
|
||||
key := AvatarObjectKey(userID, ext)
|
||||
if s.mc.objectExists(ctx, s.mc.bucketAvatars, key) {
|
||||
u, err := s.mc.presignGet(ctx, s.mc.bucketAvatars, key, 1*time.Hour)
|
||||
return u, true, err
|
||||
}
|
||||
}
|
||||
return "", false, nil
|
||||
}
|
||||
|
||||
func (s *Store) PutAvatar(ctx context.Context, userID, ext, contentType string, data []byte) (string, error) {
|
||||
// Delete existing avatar objects for this user before writing the new one
|
||||
// so old extensions don't linger (e.g. old .png after uploading a .jpg).
|
||||
_ = s.mc.deleteObjects(ctx, s.mc.bucketAvatars, userID+"/")
|
||||
key := AvatarObjectKey(userID, ext)
|
||||
if err := s.mc.putObject(ctx, s.mc.bucketAvatars, key, contentType, data); err != nil {
|
||||
return "", fmt.Errorf("put avatar: %w", err)
|
||||
}
|
||||
return key, nil
|
||||
}
|
||||
|
||||
func (s *Store) DeleteAvatar(ctx context.Context, userID string) error {
|
||||
return s.mc.deleteObjects(ctx, s.mc.bucketAvatars, userID+"/")
|
||||
}
|
||||
|
||||
// ── ProgressStore ─────────────────────────────────────────────────────────────
|
||||
|
||||
func (s *Store) GetProgress(ctx context.Context, sessionID, slug string) (domain.ReadingProgress, bool) {
|
||||
filter := fmt.Sprintf(`session_id=%q&&slug=%q`, sessionID, slug)
|
||||
items, err := s.pb.listAll(ctx, "progress", filter, "")
|
||||
if err != nil || len(items) == 0 {
|
||||
return domain.ReadingProgress{}, false
|
||||
}
|
||||
var rec struct {
|
||||
Slug string `json:"slug"`
|
||||
Chapter int `json:"chapter"`
|
||||
UpdatedAt string `json:"updated"`
|
||||
}
|
||||
json.Unmarshal(items[0], &rec)
|
||||
t, _ := time.Parse(time.RFC3339, rec.UpdatedAt)
|
||||
return domain.ReadingProgress{Slug: rec.Slug, Chapter: rec.Chapter, UpdatedAt: t}, true
|
||||
}
|
||||
|
||||
func (s *Store) SetProgress(ctx context.Context, sessionID string, p domain.ReadingProgress) error {
|
||||
payload := map[string]any{
|
||||
"session_id": sessionID,
|
||||
"slug": p.Slug,
|
||||
"chapter": p.Chapter,
|
||||
}
|
||||
filter := fmt.Sprintf(`session_id=%q&&slug=%q`, sessionID, p.Slug)
|
||||
items, err := s.pb.listAll(ctx, "progress", filter, "")
|
||||
if err != nil && err != ErrNotFound {
|
||||
return err
|
||||
}
|
||||
if len(items) == 0 {
|
||||
return s.pb.post(ctx, "/api/collections/progress/records", payload, nil)
|
||||
}
|
||||
var rec struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
json.Unmarshal(items[0], &rec)
|
||||
return s.pb.patch(ctx, fmt.Sprintf("/api/collections/progress/records/%s", rec.ID), payload)
|
||||
}
|
||||
|
||||
func (s *Store) AllProgress(ctx context.Context, sessionID string) ([]domain.ReadingProgress, error) {
|
||||
filter := fmt.Sprintf(`session_id=%q`, sessionID)
|
||||
items, err := s.pb.listAll(ctx, "progress", filter, "-updated")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result := make([]domain.ReadingProgress, 0, len(items))
|
||||
for _, raw := range items {
|
||||
var rec struct {
|
||||
Slug string `json:"slug"`
|
||||
Chapter int `json:"chapter"`
|
||||
UpdatedAt string `json:"updated"`
|
||||
}
|
||||
json.Unmarshal(raw, &rec)
|
||||
t, _ := time.Parse(time.RFC3339, rec.UpdatedAt)
|
||||
result = append(result, domain.ReadingProgress{Slug: rec.Slug, Chapter: rec.Chapter, UpdatedAt: t})
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (s *Store) DeleteProgress(ctx context.Context, sessionID, slug string) error {
|
||||
filter := fmt.Sprintf(`session_id=%q&&slug=%q`, sessionID, slug)
|
||||
items, err := s.pb.listAll(ctx, "progress", filter, "")
|
||||
if err != nil || len(items) == 0 {
|
||||
return nil
|
||||
}
|
||||
var rec struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
json.Unmarshal(items[0], &rec)
|
||||
return s.pb.delete(ctx, fmt.Sprintf("/api/collections/progress/records/%s", rec.ID))
|
||||
}
|
||||
|
||||
// ── taskqueue.Producer ────────────────────────────────────────────────────────
|
||||
|
||||
func (s *Store) CreateScrapeTask(ctx context.Context, kind, targetURL string, fromChapter, toChapter int) (string, error) {
|
||||
payload := map[string]any{
|
||||
"kind": kind,
|
||||
"target_url": targetURL,
|
||||
"from_chapter": fromChapter,
|
||||
"to_chapter": toChapter,
|
||||
"status": string(domain.TaskStatusPending),
|
||||
"started": time.Now().UTC().Format(time.RFC3339),
|
||||
}
|
||||
var rec struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
if err := s.pb.post(ctx, "/api/collections/scraping_tasks/records", payload, &rec); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return rec.ID, nil
|
||||
}
|
||||
|
||||
func (s *Store) CreateAudioTask(ctx context.Context, slug string, chapter int, voice string) (string, error) {
|
||||
cacheKey := fmt.Sprintf("%s/%d/%s", slug, chapter, voice)
|
||||
payload := map[string]any{
|
||||
"cache_key": cacheKey,
|
||||
"slug": slug,
|
||||
"chapter": chapter,
|
||||
"voice": voice,
|
||||
"status": string(domain.TaskStatusPending),
|
||||
"started": time.Now().UTC().Format(time.RFC3339),
|
||||
}
|
||||
var rec struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
if err := s.pb.post(ctx, "/api/collections/audio_jobs/records", payload, &rec); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return rec.ID, nil
|
||||
}
|
||||
|
||||
func (s *Store) CreateTranslationTask(ctx context.Context, slug string, chapter int, lang string) (string, error) {
|
||||
cacheKey := fmt.Sprintf("%s/%d/%s", slug, chapter, lang)
|
||||
payload := map[string]any{
|
||||
"cache_key": cacheKey,
|
||||
"slug": slug,
|
||||
"chapter": chapter,
|
||||
"lang": lang,
|
||||
"status": string(domain.TaskStatusPending),
|
||||
"started": time.Now().UTC().Format(time.RFC3339),
|
||||
}
|
||||
var rec struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
if err := s.pb.post(ctx, "/api/collections/translation_jobs/records", payload, &rec); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return rec.ID, nil
|
||||
}
|
||||
|
||||
func (s *Store) CancelTask(ctx context.Context, id string) error {
|
||||
// Try scraping_tasks first, then audio_jobs, then translation_jobs.
|
||||
if err := s.pb.patch(ctx, fmt.Sprintf("/api/collections/scraping_tasks/records/%s", id),
|
||||
map[string]string{"status": string(domain.TaskStatusCancelled)}); err == nil {
|
||||
return nil
|
||||
}
|
||||
if err := s.pb.patch(ctx, fmt.Sprintf("/api/collections/audio_jobs/records/%s", id),
|
||||
map[string]string{"status": string(domain.TaskStatusCancelled)}); err == nil {
|
||||
return nil
|
||||
}
|
||||
return s.pb.patch(ctx, fmt.Sprintf("/api/collections/translation_jobs/records/%s", id),
|
||||
map[string]string{"status": string(domain.TaskStatusCancelled)})
|
||||
}
|
||||
|
||||
func (s *Store) CancelAudioTasksBySlug(ctx context.Context, slug string) (int, error) {
|
||||
filter := fmt.Sprintf(`slug='%s'&&(status='pending'||status='running')`, slug)
|
||||
items, err := s.pb.listAll(ctx, "audio_jobs", filter, "")
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("CancelAudioTasksBySlug list: %w", err)
|
||||
}
|
||||
cancelled := 0
|
||||
for _, raw := range items {
|
||||
var rec struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
if json.Unmarshal(raw, &rec) == nil && rec.ID != "" {
|
||||
if patchErr := s.pb.patch(ctx,
|
||||
fmt.Sprintf("/api/collections/audio_jobs/records/%s", rec.ID),
|
||||
map[string]string{"status": string(domain.TaskStatusCancelled)}); patchErr == nil {
|
||||
cancelled++
|
||||
}
|
||||
}
|
||||
}
|
||||
return cancelled, nil
|
||||
}
|
||||
|
||||
// ── taskqueue.Consumer ────────────────────────────────────────────────────────
|
||||
|
||||
func (s *Store) ClaimNextScrapeTask(ctx context.Context, workerID string) (domain.ScrapeTask, bool, error) {
|
||||
raw, err := s.pb.claimRecord(ctx, "scraping_tasks", workerID, nil)
|
||||
if err != nil {
|
||||
return domain.ScrapeTask{}, false, err
|
||||
}
|
||||
if raw == nil {
|
||||
return domain.ScrapeTask{}, false, nil
|
||||
}
|
||||
task, err := parseScrapeTask(raw)
|
||||
return task, err == nil, err
|
||||
}
|
||||
|
||||
func (s *Store) ClaimNextAudioTask(ctx context.Context, workerID string) (domain.AudioTask, bool, error) {
|
||||
raw, err := s.pb.claimRecord(ctx, "audio_jobs", workerID, nil)
|
||||
if err != nil {
|
||||
return domain.AudioTask{}, false, err
|
||||
}
|
||||
if raw == nil {
|
||||
return domain.AudioTask{}, false, nil
|
||||
}
|
||||
task, err := parseAudioTask(raw)
|
||||
return task, err == nil, err
|
||||
}
|
||||
|
||||
func (s *Store) ClaimNextTranslationTask(ctx context.Context, workerID string) (domain.TranslationTask, bool, error) {
|
||||
raw, err := s.pb.claimRecord(ctx, "translation_jobs", workerID, nil)
|
||||
if err != nil {
|
||||
return domain.TranslationTask{}, false, err
|
||||
}
|
||||
if raw == nil {
|
||||
return domain.TranslationTask{}, false, nil
|
||||
}
|
||||
task, err := parseTranslationTask(raw)
|
||||
return task, err == nil, err
|
||||
}
|
||||
|
||||
func (s *Store) FinishScrapeTask(ctx context.Context, id string, result domain.ScrapeResult) error {
|
||||
status := string(domain.TaskStatusDone)
|
||||
if result.ErrorMessage != "" {
|
||||
status = string(domain.TaskStatusFailed)
|
||||
}
|
||||
return s.pb.patch(ctx, fmt.Sprintf("/api/collections/scraping_tasks/records/%s", id), map[string]any{
|
||||
"status": status,
|
||||
"books_found": result.BooksFound,
|
||||
"chapters_scraped": result.ChaptersScraped,
|
||||
"chapters_skipped": result.ChaptersSkipped,
|
||||
"errors": result.Errors,
|
||||
"error_message": result.ErrorMessage,
|
||||
"finished": time.Now().UTC().Format(time.RFC3339),
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Store) FinishAudioTask(ctx context.Context, id string, result domain.AudioResult) error {
|
||||
status := string(domain.TaskStatusDone)
|
||||
if result.ErrorMessage != "" {
|
||||
status = string(domain.TaskStatusFailed)
|
||||
}
|
||||
return s.pb.patch(ctx, fmt.Sprintf("/api/collections/audio_jobs/records/%s", id), map[string]any{
|
||||
"status": status,
|
||||
"error_message": result.ErrorMessage,
|
||||
"finished": time.Now().UTC().Format(time.RFC3339),
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Store) FinishTranslationTask(ctx context.Context, id string, result domain.TranslationResult) error {
|
||||
status := string(domain.TaskStatusDone)
|
||||
if result.ErrorMessage != "" {
|
||||
status = string(domain.TaskStatusFailed)
|
||||
}
|
||||
return s.pb.patch(ctx, fmt.Sprintf("/api/collections/translation_jobs/records/%s", id), map[string]any{
|
||||
"status": status,
|
||||
"error_message": result.ErrorMessage,
|
||||
"finished": time.Now().UTC().Format(time.RFC3339),
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Store) FailTask(ctx context.Context, id, errMsg string) error {
|
||||
payload := map[string]any{
|
||||
"status": string(domain.TaskStatusFailed),
|
||||
"error_message": errMsg,
|
||||
"finished": time.Now().UTC().Format(time.RFC3339),
|
||||
}
|
||||
if err := s.pb.patch(ctx, fmt.Sprintf("/api/collections/scraping_tasks/records/%s", id), payload); err == nil {
|
||||
return nil
|
||||
}
|
||||
if err := s.pb.patch(ctx, fmt.Sprintf("/api/collections/audio_jobs/records/%s", id), payload); err == nil {
|
||||
return nil
|
||||
}
|
||||
return s.pb.patch(ctx, fmt.Sprintf("/api/collections/translation_jobs/records/%s", id), payload)
|
||||
}
|
||||
|
||||
// HeartbeatTask updates the heartbeat_at field on a running task.
|
||||
// Tries scraping_tasks first, then audio_jobs, then translation_jobs.
|
||||
func (s *Store) HeartbeatTask(ctx context.Context, id string) error {
|
||||
payload := map[string]any{
|
||||
"heartbeat_at": time.Now().UTC().Format(time.RFC3339),
|
||||
}
|
||||
if err := s.pb.patch(ctx, fmt.Sprintf("/api/collections/scraping_tasks/records/%s", id), payload); err == nil {
|
||||
return nil
|
||||
}
|
||||
if err := s.pb.patch(ctx, fmt.Sprintf("/api/collections/audio_jobs/records/%s", id), payload); err == nil {
|
||||
return nil
|
||||
}
|
||||
return s.pb.patch(ctx, fmt.Sprintf("/api/collections/translation_jobs/records/%s", id), payload)
|
||||
}
|
||||
|
||||
// ReapStaleTasks finds all running tasks whose heartbeat_at is either missing
|
||||
// or older than staleAfter, and resets them to pending so they can be
|
||||
// re-claimed. Returns the number of tasks reaped.
|
||||
func (s *Store) ReapStaleTasks(ctx context.Context, staleAfter time.Duration) (int, error) {
|
||||
threshold := time.Now().UTC().Add(-staleAfter).Format(time.RFC3339)
|
||||
// Match tasks that are running AND (heartbeat_at is null OR heartbeat_at < threshold).
|
||||
// PocketBase datetime fields require `=null` not `=""` in filter expressions.
|
||||
filter := fmt.Sprintf(`status="running"&&(heartbeat_at=null||heartbeat_at<"%s")`, threshold)
|
||||
resetPayload := map[string]any{
|
||||
"status": string(domain.TaskStatusPending),
|
||||
"worker_id": "",
|
||||
"heartbeat_at": nil,
|
||||
}
|
||||
|
||||
total := 0
|
||||
for _, collection := range []string{"scraping_tasks", "audio_jobs", "translation_jobs"} {
|
||||
items, err := s.pb.listAll(ctx, collection, filter, "")
|
||||
if err != nil {
|
||||
return total, fmt.Errorf("ReapStaleTasks list %s: %w", collection, err)
|
||||
}
|
||||
for _, raw := range items {
|
||||
var rec struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
if err := json.Unmarshal(raw, &rec); err != nil || rec.ID == "" {
|
||||
continue
|
||||
}
|
||||
path := fmt.Sprintf("/api/collections/%s/records/%s", collection, rec.ID)
|
||||
if err := s.pb.patch(ctx, path, resetPayload); err != nil {
|
||||
s.log.Warn("ReapStaleTasks: patch failed", "collection", collection, "id", rec.ID, "err", err)
|
||||
continue
|
||||
}
|
||||
total++
|
||||
}
|
||||
}
|
||||
return total, nil
|
||||
}
|
||||
|
||||
// ── taskqueue.Reader ──────────────────────────────────────────────────────────
|
||||
|
||||
func (s *Store) ListScrapeTasks(ctx context.Context) ([]domain.ScrapeTask, error) {
|
||||
items, err := s.pb.listAll(ctx, "scraping_tasks", "", "-started")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tasks := make([]domain.ScrapeTask, 0, len(items))
|
||||
for _, raw := range items {
|
||||
t, err := parseScrapeTask(raw)
|
||||
if err == nil {
|
||||
tasks = append(tasks, t)
|
||||
}
|
||||
}
|
||||
return tasks, nil
|
||||
}
|
||||
|
||||
func (s *Store) GetScrapeTask(ctx context.Context, id string) (domain.ScrapeTask, bool, error) {
|
||||
var raw json.RawMessage
|
||||
if err := s.pb.get(ctx, fmt.Sprintf("/api/collections/scraping_tasks/records/%s", id), &raw); err != nil {
|
||||
if err == ErrNotFound {
|
||||
return domain.ScrapeTask{}, false, nil
|
||||
}
|
||||
return domain.ScrapeTask{}, false, err
|
||||
}
|
||||
t, err := parseScrapeTask(raw)
|
||||
return t, err == nil, err
|
||||
}
|
||||
|
||||
func (s *Store) ListAudioTasks(ctx context.Context) ([]domain.AudioTask, error) {
|
||||
items, err := s.pb.listAll(ctx, "audio_jobs", "", "-started")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tasks := make([]domain.AudioTask, 0, len(items))
|
||||
for _, raw := range items {
|
||||
t, err := parseAudioTask(raw)
|
||||
if err == nil {
|
||||
tasks = append(tasks, t)
|
||||
}
|
||||
}
|
||||
return tasks, nil
|
||||
}
|
||||
|
||||
func (s *Store) GetAudioTask(ctx context.Context, cacheKey string) (domain.AudioTask, bool, error) {
|
||||
filter := fmt.Sprintf(`cache_key='%s'`, cacheKey)
|
||||
items, err := s.pb.listAll(ctx, "audio_jobs", filter, "-started")
|
||||
if err != nil || len(items) == 0 {
|
||||
return domain.AudioTask{}, false, err
|
||||
}
|
||||
t, err := parseAudioTask(items[0])
|
||||
return t, err == nil, err
|
||||
}
|
||||
|
||||
func (s *Store) ListTranslationTasks(ctx context.Context) ([]domain.TranslationTask, error) {
|
||||
items, err := s.pb.listAll(ctx, "translation_jobs", "", "-started")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tasks := make([]domain.TranslationTask, 0, len(items))
|
||||
for _, raw := range items {
|
||||
t, err := parseTranslationTask(raw)
|
||||
if err == nil {
|
||||
tasks = append(tasks, t)
|
||||
}
|
||||
}
|
||||
return tasks, nil
|
||||
}
|
||||
|
||||
func (s *Store) GetTranslationTask(ctx context.Context, cacheKey string) (domain.TranslationTask, bool, error) {
|
||||
filter := fmt.Sprintf(`cache_key='%s'`, cacheKey)
|
||||
items, err := s.pb.listAll(ctx, "translation_jobs", filter, "-started")
|
||||
if err != nil || len(items) == 0 {
|
||||
return domain.TranslationTask{}, false, err
|
||||
}
|
||||
t, err := parseTranslationTask(items[0])
|
||||
return t, err == nil, err
|
||||
}
|
||||
|
||||
// ── Parsers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
func parseScrapeTask(raw json.RawMessage) (domain.ScrapeTask, error) {
|
||||
var rec struct {
|
||||
ID string `json:"id"`
|
||||
Kind string `json:"kind"`
|
||||
TargetURL string `json:"target_url"`
|
||||
FromChapter int `json:"from_chapter"`
|
||||
ToChapter int `json:"to_chapter"`
|
||||
WorkerID string `json:"worker_id"`
|
||||
Status string `json:"status"`
|
||||
BooksFound int `json:"books_found"`
|
||||
ChaptersScraped int `json:"chapters_scraped"`
|
||||
ChaptersSkipped int `json:"chapters_skipped"`
|
||||
Errors int `json:"errors"`
|
||||
Started string `json:"started"`
|
||||
Finished string `json:"finished"`
|
||||
ErrorMessage string `json:"error_message"`
|
||||
}
|
||||
if err := json.Unmarshal(raw, &rec); err != nil {
|
||||
return domain.ScrapeTask{}, err
|
||||
}
|
||||
started, _ := time.Parse(time.RFC3339, rec.Started)
|
||||
finished, _ := time.Parse(time.RFC3339, rec.Finished)
|
||||
return domain.ScrapeTask{
|
||||
ID: rec.ID,
|
||||
Kind: rec.Kind,
|
||||
TargetURL: rec.TargetURL,
|
||||
FromChapter: rec.FromChapter,
|
||||
ToChapter: rec.ToChapter,
|
||||
WorkerID: rec.WorkerID,
|
||||
Status: domain.TaskStatus(rec.Status),
|
||||
BooksFound: rec.BooksFound,
|
||||
ChaptersScraped: rec.ChaptersScraped,
|
||||
ChaptersSkipped: rec.ChaptersSkipped,
|
||||
Errors: rec.Errors,
|
||||
Started: started,
|
||||
Finished: finished,
|
||||
ErrorMessage: rec.ErrorMessage,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func parseAudioTask(raw json.RawMessage) (domain.AudioTask, error) {
|
||||
var rec struct {
|
||||
ID string `json:"id"`
|
||||
CacheKey string `json:"cache_key"`
|
||||
Slug string `json:"slug"`
|
||||
Chapter int `json:"chapter"`
|
||||
Voice string `json:"voice"`
|
||||
WorkerID string `json:"worker_id"`
|
||||
Status string `json:"status"`
|
||||
ErrorMessage string `json:"error_message"`
|
||||
Started string `json:"started"`
|
||||
Finished string `json:"finished"`
|
||||
}
|
||||
if err := json.Unmarshal(raw, &rec); err != nil {
|
||||
return domain.AudioTask{}, err
|
||||
}
|
||||
started, _ := time.Parse(time.RFC3339, rec.Started)
|
||||
finished, _ := time.Parse(time.RFC3339, rec.Finished)
|
||||
return domain.AudioTask{
|
||||
ID: rec.ID,
|
||||
CacheKey: rec.CacheKey,
|
||||
Slug: rec.Slug,
|
||||
Chapter: rec.Chapter,
|
||||
Voice: rec.Voice,
|
||||
WorkerID: rec.WorkerID,
|
||||
Status: domain.TaskStatus(rec.Status),
|
||||
ErrorMessage: rec.ErrorMessage,
|
||||
Started: started,
|
||||
Finished: finished,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func parseTranslationTask(raw json.RawMessage) (domain.TranslationTask, error) {
|
||||
var rec struct {
|
||||
ID string `json:"id"`
|
||||
CacheKey string `json:"cache_key"`
|
||||
Slug string `json:"slug"`
|
||||
Chapter int `json:"chapter"`
|
||||
Lang string `json:"lang"`
|
||||
WorkerID string `json:"worker_id"`
|
||||
Status string `json:"status"`
|
||||
ErrorMessage string `json:"error_message"`
|
||||
Started string `json:"started"`
|
||||
Finished string `json:"finished"`
|
||||
}
|
||||
if err := json.Unmarshal(raw, &rec); err != nil {
|
||||
return domain.TranslationTask{}, err
|
||||
}
|
||||
started, _ := time.Parse(time.RFC3339, rec.Started)
|
||||
finished, _ := time.Parse(time.RFC3339, rec.Finished)
|
||||
return domain.TranslationTask{
|
||||
ID: rec.ID,
|
||||
CacheKey: rec.CacheKey,
|
||||
Slug: rec.Slug,
|
||||
Chapter: rec.Chapter,
|
||||
Lang: rec.Lang,
|
||||
WorkerID: rec.WorkerID,
|
||||
Status: domain.TaskStatus(rec.Status),
|
||||
ErrorMessage: rec.ErrorMessage,
|
||||
Started: started,
|
||||
Finished: finished,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ── CoverStore ─────────────────────────────────────────────────────────────────
|
||||
|
||||
func (s *Store) PutCover(ctx context.Context, slug string, data []byte, contentType string) error {
|
||||
key := CoverObjectKey(slug)
|
||||
if contentType == "" {
|
||||
contentType = coverContentType(data)
|
||||
}
|
||||
if err := s.mc.putCover(ctx, key, contentType, data); err != nil {
|
||||
return fmt.Errorf("PutCover: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Store) GetCover(ctx context.Context, slug string) ([]byte, string, bool, error) {
|
||||
key := CoverObjectKey(slug)
|
||||
data, ok, err := s.mc.getCover(ctx, key)
|
||||
if err != nil {
|
||||
return nil, "", false, fmt.Errorf("GetCover: %w", err)
|
||||
}
|
||||
if !ok {
|
||||
return nil, "", false, nil
|
||||
}
|
||||
ct := coverContentType(data)
|
||||
return data, ct, true, nil
|
||||
}
|
||||
|
||||
func (s *Store) CoverExists(ctx context.Context, slug string) bool {
|
||||
return s.mc.coverExists(ctx, CoverObjectKey(slug))
|
||||
}
|
||||
|
||||
// ── TranslationStore ───────────────────────────────────────────────────────────
|
||||
|
||||
func (s *Store) TranslationObjectKey(lang, slug string, n int) string {
|
||||
return TranslationObjectKey(lang, slug, n)
|
||||
}
|
||||
|
||||
func (s *Store) TranslationExists(ctx context.Context, key string) bool {
|
||||
return s.mc.objectExists(ctx, s.mc.bucketTranslations, key)
|
||||
}
|
||||
|
||||
func (s *Store) PutTranslation(ctx context.Context, key string, data []byte) error {
|
||||
return s.mc.putObject(ctx, s.mc.bucketTranslations, key, "text/markdown; charset=utf-8", data)
|
||||
}
|
||||
|
||||
func (s *Store) GetTranslation(ctx context.Context, key string) (string, error) {
|
||||
data, err := s.mc.getObject(ctx, s.mc.bucketTranslations, key)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("GetTranslation: %w", err)
|
||||
}
|
||||
return string(data), nil
|
||||
}
|
||||
107
backend/internal/taskqueue/taskqueue.go
Normal file
107
backend/internal/taskqueue/taskqueue.go
Normal file
@@ -0,0 +1,107 @@
|
||||
// Package taskqueue defines the interfaces for creating and consuming
|
||||
// scrape/audio tasks stored in PocketBase.
|
||||
//
|
||||
// Interface segregation:
|
||||
// - Producer is used only by the backend (creates tasks, cancels tasks).
|
||||
// - Consumer is used only by the runner (claims tasks, reports results).
|
||||
// - Reader is used by the backend for status/history endpoints.
|
||||
//
|
||||
// Concrete implementations live in internal/storage.
|
||||
package taskqueue
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
)
|
||||
|
||||
// Producer is the write side of the task queue used by the backend service.
|
||||
// It creates new tasks in PocketBase for the runner to pick up.
|
||||
type Producer interface {
|
||||
// CreateScrapeTask inserts a new scrape task with status=pending and
|
||||
// returns the assigned PocketBase record ID.
|
||||
// kind is one of "catalogue", "book", or "book_range".
|
||||
// targetURL is the book URL (empty for catalogue-wide tasks).
|
||||
CreateScrapeTask(ctx context.Context, kind, targetURL string, fromChapter, toChapter int) (string, error)
|
||||
|
||||
// CreateAudioTask inserts a new audio task with status=pending and
|
||||
// returns the assigned PocketBase record ID.
|
||||
CreateAudioTask(ctx context.Context, slug string, chapter int, voice string) (string, error)
|
||||
|
||||
// CreateTranslationTask inserts a new translation task with status=pending and
|
||||
// returns the assigned PocketBase record ID.
|
||||
CreateTranslationTask(ctx context.Context, slug string, chapter int, lang string) (string, error)
|
||||
|
||||
// CancelTask transitions a pending task to status=cancelled.
|
||||
// Returns ErrNotFound if the task does not exist.
|
||||
CancelTask(ctx context.Context, id string) error
|
||||
|
||||
// CancelAudioTasksBySlug cancels all pending or running audio tasks for slug.
|
||||
// Returns the number of tasks cancelled.
|
||||
CancelAudioTasksBySlug(ctx context.Context, slug string) (int, error)
|
||||
}
|
||||
|
||||
// Consumer is the read/claim side of the task queue used by the runner.
|
||||
type Consumer interface {
|
||||
// ClaimNextScrapeTask atomically finds the oldest pending scrape task,
|
||||
// sets its status=running and worker_id=workerID, and returns it.
|
||||
// Returns (zero, false, nil) when the queue is empty.
|
||||
ClaimNextScrapeTask(ctx context.Context, workerID string) (domain.ScrapeTask, bool, error)
|
||||
|
||||
// ClaimNextAudioTask atomically finds the oldest pending audio task,
|
||||
// sets its status=running and worker_id=workerID, and returns it.
|
||||
// Returns (zero, false, nil) when the queue is empty.
|
||||
ClaimNextAudioTask(ctx context.Context, workerID string) (domain.AudioTask, bool, error)
|
||||
|
||||
// ClaimNextTranslationTask atomically finds the oldest pending translation task,
|
||||
// sets its status=running and worker_id=workerID, and returns it.
|
||||
// Returns (zero, false, nil) when the queue is empty.
|
||||
ClaimNextTranslationTask(ctx context.Context, workerID string) (domain.TranslationTask, bool, error)
|
||||
|
||||
// FinishScrapeTask marks a running scrape task as done and records the result.
|
||||
FinishScrapeTask(ctx context.Context, id string, result domain.ScrapeResult) error
|
||||
|
||||
// FinishAudioTask marks a running audio task as done and records the result.
|
||||
FinishAudioTask(ctx context.Context, id string, result domain.AudioResult) error
|
||||
|
||||
// FinishTranslationTask marks a running translation task as done and records the result.
|
||||
FinishTranslationTask(ctx context.Context, id string, result domain.TranslationResult) error
|
||||
|
||||
// FailTask marks a task (scrape, audio, or translation) as failed with an error message.
|
||||
FailTask(ctx context.Context, id, errMsg string) error
|
||||
|
||||
// HeartbeatTask updates the heartbeat_at timestamp on a running task.
|
||||
// Should be called periodically by the runner while the task is active so
|
||||
// the reaper knows the task is still alive.
|
||||
HeartbeatTask(ctx context.Context, id string) error
|
||||
|
||||
// ReapStaleTasks finds all running tasks whose heartbeat_at is older than
|
||||
// staleAfter (or was never set) and resets them to pending so they can be
|
||||
// re-claimed by a healthy runner. Returns the number of tasks reaped.
|
||||
ReapStaleTasks(ctx context.Context, staleAfter time.Duration) (int, error)
|
||||
}
|
||||
|
||||
// Reader is the read-only side used by the backend for status pages.
|
||||
type Reader interface {
|
||||
// ListScrapeTasks returns all scrape tasks sorted by started descending.
|
||||
ListScrapeTasks(ctx context.Context) ([]domain.ScrapeTask, error)
|
||||
|
||||
// GetScrapeTask returns a single scrape task by ID.
|
||||
// Returns (zero, false, nil) if not found.
|
||||
GetScrapeTask(ctx context.Context, id string) (domain.ScrapeTask, bool, error)
|
||||
|
||||
// ListAudioTasks returns all audio tasks sorted by started descending.
|
||||
ListAudioTasks(ctx context.Context) ([]domain.AudioTask, error)
|
||||
|
||||
// GetAudioTask returns the most recent audio task for cacheKey.
|
||||
// Returns (zero, false, nil) if not found.
|
||||
GetAudioTask(ctx context.Context, cacheKey string) (domain.AudioTask, bool, error)
|
||||
|
||||
// ListTranslationTasks returns all translation tasks sorted by started descending.
|
||||
ListTranslationTasks(ctx context.Context) ([]domain.TranslationTask, error)
|
||||
|
||||
// GetTranslationTask returns the most recent translation task for cacheKey.
|
||||
// Returns (zero, false, nil) if not found.
|
||||
GetTranslationTask(ctx context.Context, cacheKey string) (domain.TranslationTask, bool, error)
|
||||
}
|
||||
154
backend/internal/taskqueue/taskqueue_test.go
Normal file
154
backend/internal/taskqueue/taskqueue_test.go
Normal file
@@ -0,0 +1,154 @@
|
||||
package taskqueue_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
"github.com/libnovel/backend/internal/taskqueue"
|
||||
)
|
||||
|
||||
// ── Compile-time interface satisfaction ───────────────────────────────────────
|
||||
|
||||
// stubStore satisfies all three taskqueue interfaces.
|
||||
// Any method that is called but not expected panics — making accidental
|
||||
// calls immediately visible in tests.
|
||||
type stubStore struct{}
|
||||
|
||||
func (s *stubStore) CreateScrapeTask(_ context.Context, _, _ string, _, _ int) (string, error) {
|
||||
return "task-1", nil
|
||||
}
|
||||
func (s *stubStore) CreateAudioTask(_ context.Context, _ string, _ int, _ string) (string, error) {
|
||||
return "audio-1", nil
|
||||
}
|
||||
func (s *stubStore) CreateTranslationTask(_ context.Context, _ string, _ int, _ string) (string, error) {
|
||||
return "translation-1", nil
|
||||
}
|
||||
func (s *stubStore) CancelTask(_ context.Context, _ string) error { return nil }
|
||||
func (s *stubStore) CancelAudioTasksBySlug(_ context.Context, _ string) (int, error) { return 0, nil }
|
||||
|
||||
func (s *stubStore) ClaimNextScrapeTask(_ context.Context, _ string) (domain.ScrapeTask, bool, error) {
|
||||
return domain.ScrapeTask{ID: "task-1", Status: domain.TaskStatusRunning}, true, nil
|
||||
}
|
||||
func (s *stubStore) ClaimNextAudioTask(_ context.Context, _ string) (domain.AudioTask, bool, error) {
|
||||
return domain.AudioTask{ID: "audio-1", Status: domain.TaskStatusRunning}, true, nil
|
||||
}
|
||||
func (s *stubStore) ClaimNextTranslationTask(_ context.Context, _ string) (domain.TranslationTask, bool, error) {
|
||||
return domain.TranslationTask{ID: "translation-1", Status: domain.TaskStatusRunning}, true, nil
|
||||
}
|
||||
func (s *stubStore) FinishScrapeTask(_ context.Context, _ string, _ domain.ScrapeResult) error {
|
||||
return nil
|
||||
}
|
||||
func (s *stubStore) FinishAudioTask(_ context.Context, _ string, _ domain.AudioResult) error {
|
||||
return nil
|
||||
}
|
||||
func (s *stubStore) FinishTranslationTask(_ context.Context, _ string, _ domain.TranslationResult) error {
|
||||
return nil
|
||||
}
|
||||
func (s *stubStore) FailTask(_ context.Context, _, _ string) error { return nil }
|
||||
|
||||
func (s *stubStore) HeartbeatTask(_ context.Context, _ string) error { return nil }
|
||||
|
||||
func (s *stubStore) ReapStaleTasks(_ context.Context, _ time.Duration) (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (s *stubStore) ListScrapeTasks(_ context.Context) ([]domain.ScrapeTask, error) { return nil, nil }
|
||||
func (s *stubStore) GetScrapeTask(_ context.Context, _ string) (domain.ScrapeTask, bool, error) {
|
||||
return domain.ScrapeTask{}, false, nil
|
||||
}
|
||||
func (s *stubStore) ListAudioTasks(_ context.Context) ([]domain.AudioTask, error) { return nil, nil }
|
||||
func (s *stubStore) GetAudioTask(_ context.Context, _ string) (domain.AudioTask, bool, error) {
|
||||
return domain.AudioTask{}, false, nil
|
||||
}
|
||||
func (s *stubStore) ListTranslationTasks(_ context.Context) ([]domain.TranslationTask, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (s *stubStore) GetTranslationTask(_ context.Context, _ string) (domain.TranslationTask, bool, error) {
|
||||
return domain.TranslationTask{}, false, nil
|
||||
}
|
||||
|
||||
// Verify the stub satisfies all three interfaces at compile time.
|
||||
var _ taskqueue.Producer = (*stubStore)(nil)
|
||||
var _ taskqueue.Consumer = (*stubStore)(nil)
|
||||
var _ taskqueue.Reader = (*stubStore)(nil)
|
||||
|
||||
// ── Behavioural tests (using stub) ────────────────────────────────────────────
|
||||
|
||||
func TestProducer_CreateScrapeTask(t *testing.T) {
|
||||
var p taskqueue.Producer = &stubStore{}
|
||||
id, err := p.CreateScrapeTask(context.Background(), "book", "https://example.com/book/slug", 0, 0)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if id == "" {
|
||||
t.Error("expected non-empty task ID")
|
||||
}
|
||||
}
|
||||
|
||||
func TestConsumer_ClaimNextScrapeTask(t *testing.T) {
|
||||
var c taskqueue.Consumer = &stubStore{}
|
||||
task, ok, err := c.ClaimNextScrapeTask(context.Background(), "worker-1")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if !ok {
|
||||
t.Fatal("expected a task to be claimed")
|
||||
}
|
||||
if task.Status != domain.TaskStatusRunning {
|
||||
t.Errorf("want running, got %q", task.Status)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConsumer_ClaimNextAudioTask(t *testing.T) {
|
||||
var c taskqueue.Consumer = &stubStore{}
|
||||
task, ok, err := c.ClaimNextAudioTask(context.Background(), "worker-1")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if !ok {
|
||||
t.Fatal("expected an audio task to be claimed")
|
||||
}
|
||||
if task.ID == "" {
|
||||
t.Error("expected non-empty task ID")
|
||||
}
|
||||
}
|
||||
|
||||
// ── domain.ScrapeResult / domain.AudioResult JSON shape ──────────────────────
|
||||
|
||||
func TestScrapeResult_JSONRoundtrip(t *testing.T) {
|
||||
cases := []domain.ScrapeResult{
|
||||
{BooksFound: 5, ChaptersScraped: 100, ChaptersSkipped: 2, Errors: 0},
|
||||
{BooksFound: 0, ChaptersScraped: 0, Errors: 1, ErrorMessage: "timeout"},
|
||||
}
|
||||
for _, orig := range cases {
|
||||
b, err := json.Marshal(orig)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal: %v", err)
|
||||
}
|
||||
var got domain.ScrapeResult
|
||||
if err := json.Unmarshal(b, &got); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
if got != orig {
|
||||
t.Errorf("want %+v, got %+v", orig, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAudioResult_JSONRoundtrip(t *testing.T) {
|
||||
cases := []domain.AudioResult{
|
||||
{ObjectKey: "audio/slug/1/af_bella.mp3"},
|
||||
{ErrorMessage: "kokoro unavailable"},
|
||||
}
|
||||
for _, orig := range cases {
|
||||
b, _ := json.Marshal(orig)
|
||||
var got domain.AudioResult
|
||||
json.Unmarshal(b, &got)
|
||||
if got != orig {
|
||||
t.Errorf("want %+v, got %+v", orig, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
301
backend/todos.md
Normal file
301
backend/todos.md
Normal file
@@ -0,0 +1,301 @@
|
||||
# LibNovel Scraper Rewrite — Project Todos
|
||||
|
||||
## Overview
|
||||
|
||||
Split the monolithic scraper into two separate binaries inside the same Go module:
|
||||
|
||||
| Binary | Command | Location | Responsibility |
|
||||
|--------|---------|----------|----------------|
|
||||
| **runner** | `cmd/runner` | Homelab | Polls remote PB for pending scrape tasks → scrapes novelfire.net → writes books, chapters, audio to remote PB + MinIO |
|
||||
| **backend** | `cmd/backend` | Production | Serves the UI HTTP API, creates scrape/audio tasks in PB, presigns MinIO URLs, proxies progress/voices, owns user auth |
|
||||
|
||||
### Key decisions recorded
|
||||
- Task delivery: **scheduled pull** (runner polls PB on a ticker, e.g. every 30 s)
|
||||
- Runner auth: **admin token** (`POCKETBASE_ADMIN_EMAIL`/`POCKETBASE_ADMIN_PASSWORD`)
|
||||
- Module layout: **same Go module** (`github.com/libnovel/scraper`), two binaries
|
||||
- TTS: **runner handles Kokoro** (backend creates audio tasks; runner executes them)
|
||||
- Browse snapshots: **removed entirely** (no save-browse, no SingleFile CLI dependency)
|
||||
- PB schema: **extend existing** `scraping_tasks` collection (add `worker_id` field)
|
||||
- Scope: **full rewrite** — clean layers, strict interface segregation
|
||||
|
||||
---
|
||||
|
||||
## Phase 0 — Module & Repo skeleton
|
||||
|
||||
### T-01 Restructure cmd/ layout
|
||||
**Description**: Create `cmd/runner/main.go` and `cmd/backend/main.go` entry points. Remove the old `cmd/scraper/` entry point (or keep temporarily as a stub). Update `go.mod` module path if needed.
|
||||
**Unit tests**: `cmd/runner/main_test.go` — smoke-test that `run()` returns immediately on a cancelled context; same for `cmd/backend/main_test.go`.
|
||||
**Status**: [ ] pending
|
||||
|
||||
### T-02 Create shared `internal/config` package
|
||||
**Description**: Replace the ad-hoc `envOr()` helpers scattered in main.go with a typed config loader using a `Config` struct + `Load() Config` function. Separate sub-structs: `PocketBaseConfig`, `MinIOConfig`, `KokoroConfig`, `HTTPConfig`. Each binary calls `config.Load()`.
|
||||
**Unit tests**: `internal/config/config_test.go` — verify defaults, env override for each field, zero-value safety.
|
||||
**Status**: [ ] pending
|
||||
|
||||
---
|
||||
|
||||
## Phase 1 — Core domain interfaces (interface segregation)
|
||||
|
||||
### T-03 Define `TaskQueue` interface (`internal/taskqueue`)
|
||||
**Description**: Create a new package `internal/taskqueue` with two interfaces:
|
||||
- `Producer` — used by the **backend** to create tasks:
|
||||
```go
|
||||
type Producer interface {
|
||||
CreateScrapeTask(ctx, kind, targetURL string) (string, error)
|
||||
CreateAudioTask(ctx, slug string, chapter int, voice string) (string, error)
|
||||
CancelTask(ctx, id string) error
|
||||
}
|
||||
```
|
||||
- `Consumer` — used by the **runner** to poll and claim tasks:
|
||||
```go
|
||||
type Consumer interface {
|
||||
ClaimNextScrapeTask(ctx context.Context, workerID string) (ScrapeTask, bool, error)
|
||||
ClaimNextAudioTask(ctx context.Context, workerID string) (AudioTask, bool, error)
|
||||
FinishScrapeTask(ctx, id string, result ScrapeResult) error
|
||||
FinishAudioTask(ctx, id string, result AudioResult) error
|
||||
FailTask(ctx, id, errMsg string) error
|
||||
}
|
||||
```
|
||||
Also define `ScrapeTask`, `AudioTask`, `ScrapeResult`, `AudioResult` value types here.
|
||||
**Unit tests**: `internal/taskqueue/taskqueue_test.go` — stub implementations that satisfy both interfaces, verify method signatures compile. Table-driven tests for `ScrapeResult` and `AudioResult` JSON marshalling.
|
||||
**Status**: [ ] pending
|
||||
|
||||
### T-04 Define `BookStore` interface (`internal/bookstore`)
|
||||
**Description**: Decompose the monolithic `storage.Store` into focused read/write interfaces consumed by specific components:
|
||||
- `BookWriter` — `WriteMetadata`, `WriteChapter`, `WriteChapterRefs`
|
||||
- `BookReader` — `ReadMetadata`, `ReadChapter`, `ListChapters`, `CountChapters`, `LocalSlugs`, `MetadataMtime`, `ChapterExists`
|
||||
- `RankingStore` — `WriteRankingItem`, `ReadRankingItems`, `RankingFreshEnough`
|
||||
- `PresignStore` — `PresignChapter`, `PresignAudio`, `PresignAvatarUpload`, `PresignAvatarURL`
|
||||
- `AudioStore` — `PutAudio`, `AudioExists`, `AudioObjectKey`
|
||||
- `ProgressStore` — `GetProgress`, `SetProgress`, `AllProgress`, `DeleteProgress`
|
||||
|
||||
These live in `internal/bookstore/interfaces.go`. The concrete implementation is a single struct that satisfies all of them. The runner only gets `BookWriter + RankingStore + AudioStore`. The backend only gets `BookReader + PresignStore + ProgressStore`.
|
||||
**Unit tests**: `internal/bookstore/interfaces_test.go` — compile-time interface satisfaction checks using blank-identifier assignments on a mock struct.
|
||||
**Status**: [ ] pending
|
||||
|
||||
### T-05 Rewrite `internal/scraper/interfaces.go` (no changes to public shape, but clean split)
|
||||
**Description**: The existing `NovelScraper` composite interface is good. Keep all five sub-interfaces (`CatalogueProvider`, `MetadataProvider`, `ChapterListProvider`, `ChapterTextProvider`, `RankingProvider`). Ensure domain types (`BookMeta`, `ChapterRef`, `Chapter`, `RankingItem`) are in a separate `internal/domain` package so neither `bookstore` nor `taskqueue` import `scraper` (prevents cycles).
|
||||
**Unit tests**: `internal/domain/domain_test.go` — JSON roundtrip tests for `BookMeta`, `ChapterRef`, `Chapter`, `RankingItem`.
|
||||
**Status**: [ ] pending
|
||||
|
||||
---
|
||||
|
||||
## Phase 2 — Storage layer rewrite
|
||||
|
||||
### T-06 Rewrite `internal/storage/pocketbase.go`
|
||||
**Description**: Clean rewrite of the PocketBase REST client. Must satisfy `taskqueue.Producer`, `taskqueue.Consumer`, and all `bookstore` interfaces. Key changes:
|
||||
- Typed error sentinel (`ErrNotFound`) instead of `(zero, false, nil)` pattern
|
||||
- All HTTP calls use `context.Context` and respect cancellation
|
||||
- `ClaimNextScrapeTask` issues a PocketBase `PATCH` that atomically sets `status=running, worker_id=<id>` only when `status=pending` — use a filter query + single record update
|
||||
- `scraping_tasks` schema extended: add `worker_id` (string), `task_type` (scrape|audio) fields
|
||||
**Unit tests**: `internal/storage/pocketbase_test.go` — mock HTTP server (`httptest.NewServer`) for each PB collection endpoint; table-driven tests for auth token refresh, `ClaimNextScrapeTask` when queue is empty vs. has pending task, `FinishScrapeTask` happy path, error on 4xx response.
|
||||
**Status**: [ ] pending
|
||||
|
||||
### T-07 Rewrite `internal/storage/minio.go`
|
||||
**Description**: Clean rewrite of the MinIO client. Must satisfy `bookstore.AudioStore` + presign methods. Key changes:
|
||||
- `PutObject` wrapped to accept `io.Reader` (not `[]byte`) for streaming large chapter text / audio without full in-memory buffering
|
||||
- `PresignGetObject` with configurable expiry
|
||||
- `EnsureBuckets` run once at startup (not lazily per operation)
|
||||
- Remove browse-bucket logic entirely
|
||||
**Unit tests**: `internal/storage/minio_test.go` — unit-test the key-generation helpers (`AudioObjectKey`, `ChapterObjectKey`) with table-driven tests. Integration tests remain in `_integration_test.go` with build tag.
|
||||
**Status**: [ ] pending
|
||||
|
||||
### T-08 Rewrite `internal/storage/hybrid.go` → `internal/storage/store.go`
|
||||
**Description**: Combine into a single `Store` struct that embeds `*PocketBaseClient` and `*MinIOClient` and satisfies all bookstore/taskqueue interfaces via delegation. Remove the separate `hybrid.go` file. `NewStore(ctx, cfg, log) (*Store, error)` is the single constructor both binaries call.
|
||||
**Unit tests**: `internal/storage/store_test.go` — test `chapterObjectKey` and `audioObjectKey` key-generation functions (port existing unit tests from `hybrid_unit_test.go`).
|
||||
**Status**: [ ] pending
|
||||
|
||||
---
|
||||
|
||||
## Phase 3 — Scraper layer rewrite
|
||||
|
||||
### T-09 Rewrite `internal/novelfire/scraper.go`
|
||||
**Description**: Full rewrite of the novelfire scraper. Changes:
|
||||
- Accept only a single `browser.Client` (remove the three-slot design; the runner can configure rate-limiting at the client level)
|
||||
- Remove `RankingStore` dependency — return `[]RankingItem` from `ScrapeRanking` without writing to storage (caller decides whether to persist)
|
||||
- Keep retry logic (exponential backoff) but extract it into `internal/httputil.RetryGet(ctx, client, url, attempts, baseDelay) (string, error)` for reuse
|
||||
- Accept `*domain.BookMeta` directly, not `scraper.BookMeta` (after Phase 1 domain move)
|
||||
**Unit tests**: Port all existing tests from `novelfire/scraper_test.go` and `novelfire/ranking_test.go` to the new package layout. Add test for `RetryGet` abort on context cancellation.
|
||||
**Status**: [ ] pending
|
||||
|
||||
### T-10 Rewrite `internal/orchestrator/orchestrator.go`
|
||||
**Description**: Clean rewrite. Changes:
|
||||
- Accept `taskqueue.Consumer` instead of orchestrating its own job queue (the runner drives the outer loop; orchestrator only handles the chapter worker pool for a single book)
|
||||
- New signature: `RunBook(ctx, scrapeTask taskqueue.ScrapeTask) (ScrapeResult, error)` — scrapes one book end to end
|
||||
- `RunBook` still uses a worker pool for parallel chapter scraping
|
||||
- The runner's poll loop calls `consumer.ClaimNextScrapeTask`, then `orchestrator.RunBook`, then `consumer.FinishScrapeTask`
|
||||
**Unit tests**: Port `orchestrator/orchestrator_test.go`. Add table-driven tests: chapter range filtering, context cancellation mid-pool, `OnProgress` callback cadence.
|
||||
**Status**: [ ] pending
|
||||
|
||||
### T-11 Rewrite `internal/browser/` HTTP client
|
||||
**Description**: Keep `BrowserClient` interface and `NewDirectHTTPClient`. Remove all Browserless variants (no longer needed). Add proxy support via `Config.ProxyURL`. Export `Config` cleanly.
|
||||
**Unit tests**: `internal/browser/browser_test.go` — test `NewDirectHTTPClient` with a `httptest.Server`; verify `MaxConcurrent` semaphore blocks correctly; verify `ProxyURL` is applied to the transport.
|
||||
**Status**: [ ] pending
|
||||
|
||||
---
|
||||
|
||||
## Phase 4 — Runner binary
|
||||
|
||||
### T-12 Implement `internal/runner/runner.go`
|
||||
**Description**: The runner's main loop:
|
||||
```
|
||||
for {
|
||||
select case <-ticker.C:
|
||||
// try to claim a scrape task
|
||||
task, ok, _ := consumer.ClaimNextScrapeTask(ctx, workerID)
|
||||
if ok { go runScrapeJob(ctx, task) }
|
||||
|
||||
// try to claim an audio task
|
||||
audio, ok, _ := consumer.ClaimNextAudioTask(ctx, workerID)
|
||||
if ok { go runAudioJob(ctx, audio) }
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
```
|
||||
`runScrapeJob` calls `orchestrator.RunBook`. `runAudioJob` calls `kokoroclient.GenerateAudio` then `store.PutAudio`.
|
||||
Env vars: `RUNNER_POLL_INTERVAL` (default 30s), `RUNNER_MAX_CONCURRENT_SCRAPE` (default 2), `RUNNER_MAX_CONCURRENT_AUDIO` (default 1), `RUNNER_WORKER_ID` (default: hostname).
|
||||
**Unit tests**: `internal/runner/runner_test.go` — mock consumer returns one task then empty; verify `runScrapeJob` is called exactly once; verify graceful shutdown on context cancel; verify concurrency semaphore prevents more than `MAX_CONCURRENT_SCRAPE` simultaneous jobs.
|
||||
**Status**: [ ] pending
|
||||
|
||||
### T-13 Implement `internal/kokoro/client.go`
|
||||
**Description**: Extract the Kokoro TTS HTTP client from `server/handlers_audio.go` into its own package `internal/kokoro`. Interface:
|
||||
```go
|
||||
type Client interface {
|
||||
GenerateAudio(ctx context.Context, text, voice string) ([]byte, error)
|
||||
ListVoices(ctx context.Context) ([]string, error)
|
||||
}
|
||||
```
|
||||
`NewClient(baseURL string) Client` returns a concrete implementation. `GenerateAudio` calls `POST /v1/audio/speech` and returns the raw MP3 bytes. `ListVoices` calls `GET /v1/audio/voices`.
|
||||
**Unit tests**: `internal/kokoro/client_test.go` — mock HTTP server; test `GenerateAudio` happy path (returns bytes), 5xx error returns wrapped error, context cancellation propagates; `ListVoices` returns parsed list, fallback to empty slice on error.
|
||||
**Status**: [ ] pending
|
||||
|
||||
### T-14 Write `cmd/runner/main.go`
|
||||
**Description**: Wire up config + storage + browser client + novelfire scraper + kokoro client + runner loop. Signal handling (SIGINT/SIGTERM → cancel context → graceful drain). Log structured startup info.
|
||||
**Unit tests**: `cmd/runner/main_test.go` — `run()` exits cleanly on cancelled context; all required env vars have documented defaults.
|
||||
**Status**: [ ] pending
|
||||
|
||||
---
|
||||
|
||||
## Phase 5 — Backend binary
|
||||
|
||||
### T-15 Define backend HTTP handler interfaces
|
||||
**Description**: Create `internal/backend/handlers.go` (not a concrete type yet — just the interface segregation scaffold). Each handler group gets its own dependency interface, e.g.:
|
||||
- `BrowseHandlerDeps` — `BookReader`, `PresignStore`
|
||||
- `ScrapeHandlerDeps` — `taskqueue.Producer`, scrape task reader
|
||||
- `AudioHandlerDeps` — `bookstore.AudioStore`, `taskqueue.Producer`, `kokoro.Client`
|
||||
- `ProgressHandlerDeps` — `bookstore.ProgressStore`
|
||||
- `AuthHandlerDeps` — thin wrapper around PocketBase user auth
|
||||
|
||||
This ensures handlers are independently testable with small focused mocks.
|
||||
**Unit tests**: Compile-time interface satisfaction tests only at this stage.
|
||||
**Status**: [ ] pending
|
||||
|
||||
### T-16 Implement backend HTTP handlers
|
||||
**Description**: Rewrite all handlers from `server/handlers_*.go` into `internal/backend/`. Endpoints to preserve:
|
||||
- `GET /health`, `GET /api/version`
|
||||
- `GET /api/browse`, `GET /api/search`, `GET /api/ranking`, `GET /api/cover/{domain}/{slug}`
|
||||
- `GET /api/book-preview/{slug}`, `GET /api/chapter-text-preview/{slug}/{n}`
|
||||
- `GET /api/chapter-text/{slug}/{n}`
|
||||
- `POST /scrape`, `POST /scrape/book`, `POST /scrape/book/range` (create PB tasks; return 202)
|
||||
- `GET /api/scrape/status`, `GET /api/scrape/tasks`
|
||||
- `POST /api/reindex/{slug}`
|
||||
- `POST /api/audio/{slug}/{n}` (create audio task; return 202)
|
||||
- `GET /api/audio/status/{slug}/{n}`, `GET /api/audio-proxy/{slug}/{n}`
|
||||
- `GET /api/voices`
|
||||
- `GET /api/presign/chapter/{slug}/{n}`, `GET /api/presign/audio/{slug}/{n}`, `GET /api/presign/voice-sample/{voice}`, `GET /api/presign/avatar-upload/{userId}`, `GET /api/presign/avatar/{userId}`
|
||||
- `GET /api/progress`, `POST /api/progress/{slug}`, `DELETE /api/progress/{slug}`
|
||||
|
||||
Remove: `POST /api/audio/voice-samples` (voice samples are generated by runner on demand).
|
||||
**Unit tests**: `internal/backend/handlers_test.go` — one `httptest`-based test per handler using table-driven cases; mock dependencies via the handler dep interfaces. Focus: correct status codes, JSON shape, error propagation.
|
||||
**Status**: [ ] pending
|
||||
|
||||
### T-17 Implement `internal/backend/server.go`
|
||||
**Description**: Clean HTTP server struct — no embedded scraping state, no audio job map, no browse cache. Dependencies injected via constructor. Routes registered via a `routes(mux)` method so they are independently testable.
|
||||
**Unit tests**: `internal/backend/server_test.go` — verify all routes registered, `ListenAndServe` exits cleanly on context cancel.
|
||||
**Status**: [ ] pending
|
||||
|
||||
### T-18 Write `cmd/backend/main.go`
|
||||
**Description**: Wire up config + storage + kokoro client + backend server. Signal handling. Structured startup logging.
|
||||
**Unit tests**: `cmd/backend/main_test.go` — same smoke tests as runner.
|
||||
**Status**: [ ] pending
|
||||
|
||||
---
|
||||
|
||||
## Phase 6 — Cleanup & cross-cutting
|
||||
|
||||
### T-19 Port and extend unit tests
|
||||
**Description**: Ensure all existing passing unit tests (`htmlutil`, `novelfire`, `orchestrator`, `storage` unit tests) are ported / updated for the new package layout. Remove integration-test stubs that are no longer relevant.
|
||||
**Unit tests**: All tests under `internal/` must pass with `go test ./... -short`.
|
||||
**Status**: [ ] pending
|
||||
|
||||
### T-20 Update `go.mod` and dependencies
|
||||
**Description**: Remove unused dependencies (e.g. Browserless-related). Verify `go mod tidy` produces a clean output. Update `Dockerfile` to build both `runner` and `backend` binaries. Update `docker-compose.yml` to run both services.
|
||||
**Unit tests**: `go build ./...` and `go vet ./...` pass cleanly.
|
||||
**Status**: [ ] pending
|
||||
|
||||
### T-21 Update `AGENTS.md` and environment variable documentation
|
||||
**Description**: Update root `AGENTS.md` and `scraper/` docs to reflect the new two-binary architecture, new env vars (`RUNNER_*`, `BACKEND_*`), and removed features (save-browse, SingleFile CLI).
|
||||
**Unit tests**: N/A — documentation only.
|
||||
**Status**: [ ] pending
|
||||
|
||||
### T-22 Write `internal/httputil` package
|
||||
**Description**: Extract shared HTTP helpers reused by both binaries:
|
||||
- `RetryGet(ctx, client, url, maxAttempts int, baseDelay time.Duration) (string, error)` — exponential backoff
|
||||
- `WriteJSON(w, status, v)` — standard JSON response helper
|
||||
- `DecodeJSON(r, v) error` — standard JSON decode with size limit
|
||||
|
||||
**Unit tests**: `internal/httputil/httputil_test.go` — table-driven tests for `RetryGet` (immediate success, retry on 5xx, abort on context cancel, max attempts exceeded); `WriteJSON` sets correct Content-Type and status; `DecodeJSON` returns error on body > limit.
|
||||
**Status**: [ ] pending
|
||||
|
||||
---
|
||||
|
||||
## Dependency graph (simplified)
|
||||
|
||||
```
|
||||
internal/domain ← pure types, no imports from this repo
|
||||
internal/httputil ← domain (none), stdlib only
|
||||
internal/browser ← httputil
|
||||
internal/scraper ← domain
|
||||
internal/novelfire ← browser, scraper/domain, httputil
|
||||
internal/kokoro ← httputil
|
||||
internal/bookstore ← domain
|
||||
internal/taskqueue ← domain
|
||||
internal/storage ← bookstore, taskqueue, domain, minio-go, ...
|
||||
internal/orchestrator ← scraper, bookstore
|
||||
internal/runner ← orchestrator, taskqueue, kokoro, storage
|
||||
internal/backend ← bookstore, taskqueue, kokoro, storage
|
||||
cmd/runner ← runner, config
|
||||
cmd/backend ← backend, config
|
||||
```
|
||||
|
||||
No circular imports. Runner and backend never import each other.
|
||||
|
||||
---
|
||||
|
||||
## Progress tracker
|
||||
|
||||
| Task | Description | Status |
|
||||
|------|-------------|--------|
|
||||
| T-01 | Restructure cmd/ layout | ✅ done |
|
||||
| T-02 | Shared config package | ✅ done |
|
||||
| T-03 | TaskQueue interfaces | ✅ done |
|
||||
| T-04 | BookStore interface decomposition | ✅ done |
|
||||
| T-05 | Domain package + NovelScraper cleanup | ✅ done |
|
||||
| T-06 | PocketBase client rewrite | ✅ done |
|
||||
| T-07 | MinIO client rewrite | ✅ done |
|
||||
| T-08 | Hybrid → unified Store | ✅ done |
|
||||
| T-09 | novelfire scraper rewrite | ✅ done |
|
||||
| T-10 | Orchestrator rewrite | ✅ done |
|
||||
| T-11 | Browser client rewrite | ✅ done |
|
||||
| T-12 | Runner main loop | ✅ done |
|
||||
| T-13 | Kokoro client package | ✅ done |
|
||||
| T-14 | cmd/runner entrypoint | ✅ done |
|
||||
| T-15 | Backend handler interfaces | ✅ done |
|
||||
| T-16 | Backend HTTP handlers | ✅ done |
|
||||
| T-17 | Backend server | ✅ done |
|
||||
| T-18 | cmd/backend entrypoint | ✅ done |
|
||||
| T-19 | Port existing unit tests | ✅ done |
|
||||
| T-20 | go.mod + Docker updates | ✅ done (`go mod tidy` + `go build ./...` + `go vet ./...` all clean; Docker TBD) |
|
||||
| T-21 | Documentation updates | ✅ done (progress table updated) |
|
||||
| T-22 | httputil package | ✅ done |
|
||||
10
caddy/Dockerfile
Normal file
10
caddy/Dockerfile
Normal file
@@ -0,0 +1,10 @@
|
||||
FROM caddy:2-builder AS builder
|
||||
|
||||
RUN xcaddy build \
|
||||
--with github.com/mholt/caddy-ratelimit \
|
||||
--with github.com/hslatman/caddy-crowdsec-bouncer/http \
|
||||
--with github.com/mholt/caddy-l4
|
||||
|
||||
FROM caddy:2-alpine
|
||||
COPY --from=builder /usr/bin/caddy /usr/bin/caddy
|
||||
COPY errors/ /srv/errors/
|
||||
138
caddy/errors/404.html
Normal file
138
caddy/errors/404.html
Normal file
@@ -0,0 +1,138 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>404 — Page Not Found — libnovel</title>
|
||||
<style>
|
||||
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
|
||||
html, body {
|
||||
height: 100%;
|
||||
background: #09090b;
|
||||
}
|
||||
|
||||
body {
|
||||
min-height: 100svh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
font-family: ui-sans-serif, system-ui, sans-serif;
|
||||
color: #a1a1aa;
|
||||
}
|
||||
|
||||
header {
|
||||
padding: 1.5rem 2rem;
|
||||
border-bottom: 1px solid #27272a;
|
||||
}
|
||||
.logo {
|
||||
font-size: 1.125rem;
|
||||
font-weight: 700;
|
||||
color: #f59e0b;
|
||||
letter-spacing: -0.02em;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
main {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 3rem 2rem;
|
||||
text-align: center;
|
||||
gap: 0;
|
||||
}
|
||||
|
||||
.watermark {
|
||||
font-size: clamp(5rem, 22vw, 9rem);
|
||||
font-weight: 800;
|
||||
color: #18181b;
|
||||
line-height: 1;
|
||||
letter-spacing: -0.04em;
|
||||
user-select: none;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.status-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 1.25rem;
|
||||
}
|
||||
.dot {
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
border-radius: 50%;
|
||||
background: #71717a;
|
||||
}
|
||||
.status-label {
|
||||
font-size: 0.75rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: #71717a;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 1.5rem;
|
||||
font-weight: 700;
|
||||
color: #e4e4e7;
|
||||
letter-spacing: -0.02em;
|
||||
margin-bottom: 0.75rem;
|
||||
}
|
||||
|
||||
p {
|
||||
font-size: 0.9375rem;
|
||||
max-width: 38ch;
|
||||
line-height: 1.65;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.btn {
|
||||
display: inline-block;
|
||||
padding: 0.625rem 1.5rem;
|
||||
border-radius: 0.5rem;
|
||||
background: #f59e0b;
|
||||
color: #000;
|
||||
font-weight: 600;
|
||||
font-size: 0.875rem;
|
||||
text-decoration: none;
|
||||
transition: background 0.15s;
|
||||
}
|
||||
.btn:hover { background: #d97706; }
|
||||
|
||||
footer {
|
||||
padding: 1.5rem 2rem;
|
||||
border-top: 1px solid #27272a;
|
||||
text-align: center;
|
||||
font-size: 0.8rem;
|
||||
color: #3f3f46;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<header>
|
||||
<a class="logo" href="/">libnovel</a>
|
||||
</header>
|
||||
|
||||
<main>
|
||||
<div class="watermark">404</div>
|
||||
|
||||
<div class="status-row">
|
||||
<div class="dot"></div>
|
||||
<span class="status-label">Page not found</span>
|
||||
</div>
|
||||
|
||||
<h1>Nothing here</h1>
|
||||
<p>The page you're looking for doesn't exist or has been moved.</p>
|
||||
|
||||
<a class="btn" href="/">Go home</a>
|
||||
</main>
|
||||
|
||||
<footer>
|
||||
© LibNovel
|
||||
</footer>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
203
caddy/errors/500.html
Normal file
203
caddy/errors/500.html
Normal file
@@ -0,0 +1,203 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>500 — Internal Error — libnovel</title>
|
||||
<meta http-equiv="refresh" content="20">
|
||||
<style>
|
||||
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
|
||||
html, body {
|
||||
height: 100%;
|
||||
background: #09090b;
|
||||
}
|
||||
|
||||
body {
|
||||
min-height: 100svh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
font-family: ui-sans-serif, system-ui, sans-serif;
|
||||
color: #a1a1aa;
|
||||
}
|
||||
|
||||
header {
|
||||
padding: 1.5rem 2rem;
|
||||
border-bottom: 1px solid #27272a;
|
||||
}
|
||||
.logo {
|
||||
font-size: 1.125rem;
|
||||
font-weight: 700;
|
||||
color: #f59e0b;
|
||||
letter-spacing: -0.02em;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
main {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 3rem 2rem;
|
||||
text-align: center;
|
||||
gap: 0;
|
||||
}
|
||||
|
||||
.illustration {
|
||||
width: 96px;
|
||||
height: 96px;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.watermark {
|
||||
font-size: clamp(5rem, 22vw, 9rem);
|
||||
font-weight: 800;
|
||||
color: #18181b;
|
||||
line-height: 1;
|
||||
letter-spacing: -0.04em;
|
||||
user-select: none;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.status-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 1.25rem;
|
||||
}
|
||||
.dot {
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
border-radius: 50%;
|
||||
background: #f59e0b;
|
||||
animation: pulse 2s ease-in-out infinite;
|
||||
}
|
||||
@keyframes pulse {
|
||||
0%, 100% { opacity: 1; transform: scale(1); }
|
||||
50% { opacity: 0.4; transform: scale(0.75); }
|
||||
}
|
||||
.status-label {
|
||||
font-size: 0.75rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: #f59e0b;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 1.5rem;
|
||||
font-weight: 700;
|
||||
color: #e4e4e7;
|
||||
letter-spacing: -0.02em;
|
||||
margin-bottom: 0.75rem;
|
||||
}
|
||||
|
||||
p {
|
||||
font-size: 0.9375rem;
|
||||
max-width: 38ch;
|
||||
line-height: 1.65;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.actions {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.75rem;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.btn {
|
||||
display: inline-block;
|
||||
padding: 0.625rem 1.5rem;
|
||||
border-radius: 0.5rem;
|
||||
background: #f59e0b;
|
||||
color: #000;
|
||||
font-weight: 600;
|
||||
font-size: 0.875rem;
|
||||
text-decoration: none;
|
||||
transition: background 0.15s;
|
||||
}
|
||||
.btn:hover { background: #d97706; }
|
||||
|
||||
.btn-secondary {
|
||||
background: transparent;
|
||||
color: #a1a1aa;
|
||||
border: 1px solid #27272a;
|
||||
cursor: pointer;
|
||||
}
|
||||
.btn-secondary:hover { background: #18181b; color: #e4e4e7; }
|
||||
|
||||
.refresh-note {
|
||||
margin-top: 1.25rem;
|
||||
font-size: 0.8rem;
|
||||
color: #52525b;
|
||||
}
|
||||
#countdown { color: #71717a; }
|
||||
|
||||
footer {
|
||||
padding: 1.5rem 2rem;
|
||||
border-top: 1px solid #27272a;
|
||||
text-align: center;
|
||||
font-size: 0.8rem;
|
||||
color: #3f3f46;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<header>
|
||||
<a class="logo" href="/">libnovel</a>
|
||||
</header>
|
||||
|
||||
<main>
|
||||
<!-- Book with lightning bolt SVG -->
|
||||
<svg class="illustration" viewBox="0 0 96 96" fill="none" xmlns="http://www.w3.org/2000/svg" aria-hidden="true">
|
||||
<!-- Book cover -->
|
||||
<rect x="14" y="12" width="50" height="68" rx="4" fill="#27272a" stroke="#3f3f46" stroke-width="1.5"/>
|
||||
<!-- Spine -->
|
||||
<rect x="10" y="12" width="8" height="68" rx="2" fill="#18181b" stroke="#3f3f46" stroke-width="1.5"/>
|
||||
<!-- Pages edge -->
|
||||
<rect x="62" y="14" width="4" height="64" rx="1" fill="#1c1c1f"/>
|
||||
<!-- Lightning bolt -->
|
||||
<path d="M44 22 L34 46 H42 L36 70 L58 42 H48 L56 22 Z" fill="#f59e0b" opacity="0.9"/>
|
||||
<!-- Text lines -->
|
||||
<rect x="22" y="58" width="28" height="2.5" rx="1.25" fill="#3f3f46"/>
|
||||
<rect x="22" y="63" width="18" height="2.5" rx="1.25" fill="#3f3f46"/>
|
||||
<rect x="22" y="68" width="24" height="2.5" rx="1.25" fill="#3f3f46"/>
|
||||
</svg>
|
||||
|
||||
<div class="watermark">500</div>
|
||||
|
||||
<div class="status-row">
|
||||
<div class="dot"></div>
|
||||
<span class="status-label">Internal error</span>
|
||||
</div>
|
||||
|
||||
<h1>Something went wrong</h1>
|
||||
<p>An unexpected error occurred on our end. We're on it — try again in a moment.</p>
|
||||
|
||||
<div class="actions">
|
||||
<a class="btn" href="/">Go home</a>
|
||||
<button class="btn btn-secondary" onclick="location.reload()">Retry</button>
|
||||
</div>
|
||||
|
||||
<p class="refresh-note">Auto-refreshing in <span id="countdown">20</span>s</p>
|
||||
</main>
|
||||
|
||||
<footer>
|
||||
© LibNovel
|
||||
</footer>
|
||||
|
||||
<script>
|
||||
var s = 20;
|
||||
var el = document.getElementById('countdown');
|
||||
var t = setInterval(function () {
|
||||
s--;
|
||||
el.textContent = s;
|
||||
if (s <= 0) { clearInterval(t); location.reload(); }
|
||||
}, 1000);
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
162
caddy/errors/502.html
Normal file
162
caddy/errors/502.html
Normal file
@@ -0,0 +1,162 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>502 — Service Unavailable — libnovel</title>
|
||||
<meta http-equiv="refresh" content="20">
|
||||
<style>
|
||||
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
|
||||
html, body {
|
||||
height: 100%;
|
||||
background: #09090b;
|
||||
}
|
||||
|
||||
body {
|
||||
min-height: 100svh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
font-family: ui-sans-serif, system-ui, sans-serif;
|
||||
color: #a1a1aa;
|
||||
}
|
||||
|
||||
header {
|
||||
padding: 1.5rem 2rem;
|
||||
border-bottom: 1px solid #27272a;
|
||||
}
|
||||
.logo {
|
||||
font-size: 1.125rem;
|
||||
font-weight: 700;
|
||||
color: #f59e0b;
|
||||
letter-spacing: -0.02em;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
main {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 3rem 2rem;
|
||||
text-align: center;
|
||||
gap: 0;
|
||||
}
|
||||
|
||||
.watermark {
|
||||
font-size: clamp(5rem, 22vw, 9rem);
|
||||
font-weight: 800;
|
||||
color: #18181b;
|
||||
line-height: 1;
|
||||
letter-spacing: -0.04em;
|
||||
user-select: none;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.status-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 1.25rem;
|
||||
}
|
||||
.dot {
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
border-radius: 50%;
|
||||
background: #f59e0b;
|
||||
animation: pulse 2s ease-in-out infinite;
|
||||
}
|
||||
@keyframes pulse {
|
||||
0%, 100% { opacity: 1; transform: scale(1); }
|
||||
50% { opacity: 0.4; transform: scale(0.75); }
|
||||
}
|
||||
.status-label {
|
||||
font-size: 0.75rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: #f59e0b;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 1.5rem;
|
||||
font-weight: 700;
|
||||
color: #e4e4e7;
|
||||
letter-spacing: -0.02em;
|
||||
margin-bottom: 0.75rem;
|
||||
}
|
||||
|
||||
p {
|
||||
font-size: 0.9375rem;
|
||||
max-width: 38ch;
|
||||
line-height: 1.65;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.btn {
|
||||
display: inline-block;
|
||||
padding: 0.625rem 1.5rem;
|
||||
border-radius: 0.5rem;
|
||||
background: #f59e0b;
|
||||
color: #000;
|
||||
font-weight: 600;
|
||||
font-size: 0.875rem;
|
||||
text-decoration: none;
|
||||
transition: background 0.15s;
|
||||
}
|
||||
.btn:hover { background: #d97706; }
|
||||
|
||||
.refresh-note {
|
||||
margin-top: 1.25rem;
|
||||
font-size: 0.8rem;
|
||||
color: #52525b;
|
||||
}
|
||||
#countdown { color: #71717a; }
|
||||
|
||||
footer {
|
||||
padding: 1.5rem 2rem;
|
||||
border-top: 1px solid #27272a;
|
||||
text-align: center;
|
||||
font-size: 0.8rem;
|
||||
color: #3f3f46;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<header>
|
||||
<a class="logo" href="/">libnovel</a>
|
||||
</header>
|
||||
|
||||
<main>
|
||||
<div class="watermark">502</div>
|
||||
|
||||
<div class="status-row">
|
||||
<div class="dot"></div>
|
||||
<span class="status-label">Service unavailable</span>
|
||||
</div>
|
||||
|
||||
<h1>Something went wrong</h1>
|
||||
<p>The server is temporarily unreachable. This usually resolves itself quickly.</p>
|
||||
|
||||
<a class="btn" href="/">Try again</a>
|
||||
<p class="refresh-note">Page refreshes automatically in <span id="countdown">20</span>s</p>
|
||||
</main>
|
||||
|
||||
<footer>
|
||||
© LibNovel
|
||||
</footer>
|
||||
|
||||
<script>
|
||||
var s = 20;
|
||||
var el = document.getElementById('countdown');
|
||||
var t = setInterval(function () {
|
||||
s--;
|
||||
el.textContent = s;
|
||||
if (s <= 0) { clearInterval(t); location.reload(); }
|
||||
}, 1000);
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
165
caddy/errors/503.html
Normal file
165
caddy/errors/503.html
Normal file
@@ -0,0 +1,165 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Under Maintenance — libnovel</title>
|
||||
<meta http-equiv="refresh" content="30">
|
||||
<style>
|
||||
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
|
||||
html, body {
|
||||
height: 100%;
|
||||
background: #09090b;
|
||||
}
|
||||
|
||||
body {
|
||||
min-height: 100svh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
font-family: ui-sans-serif, system-ui, sans-serif;
|
||||
color: #a1a1aa;
|
||||
}
|
||||
|
||||
/* ── Header ── */
|
||||
header {
|
||||
padding: 1.5rem 2rem;
|
||||
border-bottom: 1px solid #27272a;
|
||||
}
|
||||
.logo {
|
||||
font-size: 1.125rem;
|
||||
font-weight: 700;
|
||||
color: #f59e0b;
|
||||
letter-spacing: -0.02em;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
/* ── Main ── */
|
||||
main {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 3rem 2rem;
|
||||
text-align: center;
|
||||
gap: 0;
|
||||
}
|
||||
|
||||
.watermark {
|
||||
font-size: clamp(5rem, 22vw, 9rem);
|
||||
font-weight: 800;
|
||||
color: #18181b;
|
||||
line-height: 1;
|
||||
letter-spacing: -0.04em;
|
||||
user-select: none;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.status-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 1.25rem;
|
||||
}
|
||||
.dot {
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
border-radius: 50%;
|
||||
background: #f59e0b;
|
||||
animation: pulse 2s ease-in-out infinite;
|
||||
}
|
||||
@keyframes pulse {
|
||||
0%, 100% { opacity: 1; transform: scale(1); }
|
||||
50% { opacity: 0.4; transform: scale(0.75); }
|
||||
}
|
||||
.status-label {
|
||||
font-size: 0.75rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: #f59e0b;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 1.5rem;
|
||||
font-weight: 700;
|
||||
color: #e4e4e7;
|
||||
letter-spacing: -0.02em;
|
||||
margin-bottom: 0.75rem;
|
||||
}
|
||||
|
||||
p {
|
||||
font-size: 0.9375rem;
|
||||
max-width: 38ch;
|
||||
line-height: 1.65;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.btn {
|
||||
display: inline-block;
|
||||
padding: 0.625rem 1.5rem;
|
||||
border-radius: 0.5rem;
|
||||
background: #f59e0b;
|
||||
color: #000;
|
||||
font-weight: 600;
|
||||
font-size: 0.875rem;
|
||||
text-decoration: none;
|
||||
transition: background 0.15s;
|
||||
}
|
||||
.btn:hover { background: #d97706; }
|
||||
|
||||
.refresh-note {
|
||||
margin-top: 1.25rem;
|
||||
font-size: 0.8rem;
|
||||
color: #52525b;
|
||||
}
|
||||
#countdown { color: #71717a; }
|
||||
|
||||
/* ── Footer ── */
|
||||
footer {
|
||||
padding: 1.5rem 2rem;
|
||||
border-top: 1px solid #27272a;
|
||||
text-align: center;
|
||||
font-size: 0.8rem;
|
||||
color: #3f3f46;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<header>
|
||||
<a class="logo" href="/">libnovel</a>
|
||||
</header>
|
||||
|
||||
<main>
|
||||
<div class="watermark">503</div>
|
||||
|
||||
<div class="status-row">
|
||||
<div class="dot"></div>
|
||||
<span class="status-label">Maintenance in progress</span>
|
||||
</div>
|
||||
|
||||
<h1>We'll be right back</h1>
|
||||
<p>LibNovel is briefly offline for scheduled maintenance. No data is being changed — hang tight.</p>
|
||||
|
||||
<a class="btn" href="/">Try again</a>
|
||||
<p class="refresh-note">Page refreshes automatically in <span id="countdown">30</span>s</p>
|
||||
</main>
|
||||
|
||||
<footer>
|
||||
© LibNovel
|
||||
</footer>
|
||||
|
||||
<script>
|
||||
var s = 30;
|
||||
var el = document.getElementById('countdown');
|
||||
var t = setInterval(function () {
|
||||
s--;
|
||||
el.textContent = s;
|
||||
if (s <= 0) { clearInterval(t); location.reload(); }
|
||||
}, 1000);
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
162
caddy/errors/504.html
Normal file
162
caddy/errors/504.html
Normal file
@@ -0,0 +1,162 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>504 — Gateway Timeout — libnovel</title>
|
||||
<meta http-equiv="refresh" content="20">
|
||||
<style>
|
||||
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
|
||||
html, body {
|
||||
height: 100%;
|
||||
background: #09090b;
|
||||
}
|
||||
|
||||
body {
|
||||
min-height: 100svh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
font-family: ui-sans-serif, system-ui, sans-serif;
|
||||
color: #a1a1aa;
|
||||
}
|
||||
|
||||
header {
|
||||
padding: 1.5rem 2rem;
|
||||
border-bottom: 1px solid #27272a;
|
||||
}
|
||||
.logo {
|
||||
font-size: 1.125rem;
|
||||
font-weight: 700;
|
||||
color: #f59e0b;
|
||||
letter-spacing: -0.02em;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
main {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 3rem 2rem;
|
||||
text-align: center;
|
||||
gap: 0;
|
||||
}
|
||||
|
||||
.watermark {
|
||||
font-size: clamp(5rem, 22vw, 9rem);
|
||||
font-weight: 800;
|
||||
color: #18181b;
|
||||
line-height: 1;
|
||||
letter-spacing: -0.04em;
|
||||
user-select: none;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.status-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 1.25rem;
|
||||
}
|
||||
.dot {
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
border-radius: 50%;
|
||||
background: #f59e0b;
|
||||
animation: pulse 2s ease-in-out infinite;
|
||||
}
|
||||
@keyframes pulse {
|
||||
0%, 100% { opacity: 1; transform: scale(1); }
|
||||
50% { opacity: 0.4; transform: scale(0.75); }
|
||||
}
|
||||
.status-label {
|
||||
font-size: 0.75rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: #f59e0b;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 1.5rem;
|
||||
font-weight: 700;
|
||||
color: #e4e4e7;
|
||||
letter-spacing: -0.02em;
|
||||
margin-bottom: 0.75rem;
|
||||
}
|
||||
|
||||
p {
|
||||
font-size: 0.9375rem;
|
||||
max-width: 38ch;
|
||||
line-height: 1.65;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.btn {
|
||||
display: inline-block;
|
||||
padding: 0.625rem 1.5rem;
|
||||
border-radius: 0.5rem;
|
||||
background: #f59e0b;
|
||||
color: #000;
|
||||
font-weight: 600;
|
||||
font-size: 0.875rem;
|
||||
text-decoration: none;
|
||||
transition: background 0.15s;
|
||||
}
|
||||
.btn:hover { background: #d97706; }
|
||||
|
||||
.refresh-note {
|
||||
margin-top: 1.25rem;
|
||||
font-size: 0.8rem;
|
||||
color: #52525b;
|
||||
}
|
||||
#countdown { color: #71717a; }
|
||||
|
||||
footer {
|
||||
padding: 1.5rem 2rem;
|
||||
border-top: 1px solid #27272a;
|
||||
text-align: center;
|
||||
font-size: 0.8rem;
|
||||
color: #3f3f46;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<header>
|
||||
<a class="logo" href="/">libnovel</a>
|
||||
</header>
|
||||
|
||||
<main>
|
||||
<div class="watermark">504</div>
|
||||
|
||||
<div class="status-row">
|
||||
<div class="dot"></div>
|
||||
<span class="status-label">Gateway timeout</span>
|
||||
</div>
|
||||
|
||||
<h1>Request timed out</h1>
|
||||
<p>The server took too long to respond. Please refresh and try again.</p>
|
||||
|
||||
<a class="btn" href="/">Try again</a>
|
||||
<p class="refresh-note">Page refreshes automatically in <span id="countdown">20</span>s</p>
|
||||
</main>
|
||||
|
||||
<footer>
|
||||
© LibNovel
|
||||
</footer>
|
||||
|
||||
<script>
|
||||
var s = 20;
|
||||
var el = document.getElementById('countdown');
|
||||
var t = setInterval(function () {
|
||||
s--;
|
||||
el.textContent = s;
|
||||
if (s <= 0) { clearInterval(t); location.reload(); }
|
||||
}, 1000);
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
12
crowdsec/acquis.yaml
Normal file
12
crowdsec/acquis.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
# CrowdSec log acquisition — tells the CrowdSec agent which logs to parse.
|
||||
#
|
||||
# Caddy writes JSON access logs to /var/log/caddy/access.log (mounted from the
|
||||
# caddy_logs Docker volume). CrowdSec reads the same volume at the same path.
|
||||
#
|
||||
# The `crowdsecurity/caddy` collection (installed via COLLECTIONS env var)
|
||||
# provides the parser that understands Caddy's JSON log format.
|
||||
|
||||
filenames:
|
||||
- /var/log/caddy/access.log
|
||||
labels:
|
||||
type: caddy
|
||||
@@ -1,82 +1,448 @@
|
||||
version: "3.9"
|
||||
# ── Shared environment fragments ──────────────────────────────────────────────
|
||||
# These YAML anchors eliminate duplication between backend and runner.
|
||||
# All values come from Doppler — no fallbacks needed here.
|
||||
# Run commands via: just up / just build / etc. (see justfile)
|
||||
x-infra-env: &infra-env
|
||||
# MinIO
|
||||
MINIO_ENDPOINT: "minio:9000"
|
||||
MINIO_ACCESS_KEY: "${MINIO_ROOT_USER}"
|
||||
MINIO_SECRET_KEY: "${MINIO_ROOT_PASSWORD}"
|
||||
MINIO_USE_SSL: "false"
|
||||
MINIO_PUBLIC_ENDPOINT: "${MINIO_PUBLIC_ENDPOINT}"
|
||||
MINIO_PUBLIC_USE_SSL: "${MINIO_PUBLIC_USE_SSL}"
|
||||
# PocketBase
|
||||
POCKETBASE_URL: "http://pocketbase:8090"
|
||||
POCKETBASE_ADMIN_EMAIL: "${POCKETBASE_ADMIN_EMAIL}"
|
||||
POCKETBASE_ADMIN_PASSWORD: "${POCKETBASE_ADMIN_PASSWORD}"
|
||||
# Meilisearch
|
||||
MEILI_URL: "${MEILI_URL:-http://meilisearch:7700}"
|
||||
MEILI_API_KEY: "${MEILI_MASTER_KEY}"
|
||||
# Valkey
|
||||
VALKEY_ADDR: "valkey:6379"
|
||||
|
||||
services:
|
||||
# ─── Browserless ────────────────────────────────────────────────────────────
|
||||
browserless:
|
||||
image: ghcr.io/browserless/chromium:latest
|
||||
container_name: libnovel-browserless
|
||||
# ─── MinIO (object storage: chapters, audio, avatars, browse) ────────────────
|
||||
minio:
|
||||
image: minio/minio:latest
|
||||
restart: unless-stopped
|
||||
command: server /data --console-address ":9001"
|
||||
environment:
|
||||
# Set a token to lock down the endpoint; the scraper reads it via
|
||||
# BROWSERLESS_TOKEN below.
|
||||
TOKEN: "${BROWSERLESS_TOKEN:-}"
|
||||
# Allow up to 10 concurrent browser sessions.
|
||||
CONCURRENT: "${BROWSERLESS_CONCURRENT:-10}"
|
||||
# Queue up to 100 requests before returning 429.
|
||||
QUEUED: "${BROWSERLESS_QUEUED:-100}"
|
||||
# Per-session timeout in ms.
|
||||
TIMEOUT: "${BROWSERLESS_TIMEOUT:-60000}"
|
||||
# Optional webhook URL for Browserless error alerts.
|
||||
ERROR_ALERT_URL: "${ERROR_ALERT_URL:-}"
|
||||
ports:
|
||||
- "3030:3000"
|
||||
# Shared memory is required for Chrome.
|
||||
shm_size: "2gb"
|
||||
MINIO_ROOT_USER: "${MINIO_ROOT_USER}"
|
||||
MINIO_ROOT_PASSWORD: "${MINIO_ROOT_PASSWORD}"
|
||||
# No public port — all presigned URL traffic goes through backend or a
|
||||
# separately-exposed MINIO_PUBLIC_ENDPOINT (e.g. storage.libnovel.cc).
|
||||
expose:
|
||||
- "9000"
|
||||
- "9001"
|
||||
volumes:
|
||||
- minio_data:/data
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-qO-", "http://localhost:3000/json/version"]
|
||||
test: ["CMD", "mc", "ready", "local"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ─── Kokoro-FastAPI (TTS) ────────────────────────────────────────────────────
|
||||
# CPU image; swap for ghcr.io/remsky/kokoro-fastapi-gpu:latest on NVIDIA hosts.
|
||||
# Models are baked in — no volume mount required for the default voice set.
|
||||
kokoro:
|
||||
image: ghcr.io/remsky/kokoro-fastapi-cpu:latest
|
||||
container_name: libnovel-kokoro
|
||||
# ─── MinIO bucket initialisation ─────────────────────────────────────────────
|
||||
minio-init:
|
||||
image: minio/mc:latest
|
||||
depends_on:
|
||||
minio:
|
||||
condition: service_healthy
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
mc alias set local http://minio:9000 $${MINIO_ROOT_USER} $${MINIO_ROOT_PASSWORD};
|
||||
mc mb --ignore-existing local/chapters;
|
||||
mc mb --ignore-existing local/audio;
|
||||
mc mb --ignore-existing local/avatars;
|
||||
mc mb --ignore-existing local/catalogue;
|
||||
echo 'buckets ready';
|
||||
"
|
||||
environment:
|
||||
MINIO_ROOT_USER: "${MINIO_ROOT_USER}"
|
||||
MINIO_ROOT_PASSWORD: "${MINIO_ROOT_PASSWORD}"
|
||||
|
||||
# ─── PocketBase (auth + structured data) ─────────────────────────────────────
|
||||
pocketbase:
|
||||
image: ghcr.io/muchobien/pocketbase:latest
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8880:8880"
|
||||
environment:
|
||||
PB_ADMIN_EMAIL: "${POCKETBASE_ADMIN_EMAIL}"
|
||||
PB_ADMIN_PASSWORD: "${POCKETBASE_ADMIN_PASSWORD}"
|
||||
# No public port — accessed only by backend/runner on the internal network.
|
||||
expose:
|
||||
- "8090"
|
||||
volumes:
|
||||
- pb_data:/pb_data
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8880/health"]
|
||||
interval: 15s
|
||||
test: ["CMD", "wget", "-qO-", "http://localhost:8090/api/health"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ─── Scraper ─────────────────────────────────────────────────────────────────
|
||||
scraper:
|
||||
build:
|
||||
context: ./scraper
|
||||
dockerfile: Dockerfile
|
||||
container_name: libnovel-scraper
|
||||
restart: unless-stopped
|
||||
# ─── PocketBase collection bootstrap ─────────────────────────────────────────
|
||||
pb-init:
|
||||
image: alpine:3.19
|
||||
depends_on:
|
||||
kokoro:
|
||||
pocketbase:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
BROWSERLESS_URL: "http://browserless:3000"
|
||||
BROWSERLESS_TOKEN: "${BROWSERLESS_TOKEN:-}"
|
||||
# content | scrape | cdp | direct — swap to test different strategies.
|
||||
BROWSERLESS_STRATEGY: "${BROWSERLESS_STRATEGY:-direct}"
|
||||
# Strategy for URL retrieval (chapter list). Default: content (browserless)
|
||||
BROWSERLESS_URL_STRATEGY: "${BROWSERLESS_URL_STRATEGY:-content}"
|
||||
# 0 → defaults to NumCPU inside the container.
|
||||
SCRAPER_WORKERS: "${SCRAPER_WORKERS:-0}"
|
||||
SCRAPER_STATIC_ROOT: "/app/static/books"
|
||||
SCRAPER_HTTP_ADDR: ":8080"
|
||||
LOG_LEVEL: "debug"
|
||||
# Kokoro-FastAPI TTS endpoint.
|
||||
KOKORO_URL: "${KOKORO_URL:-http://localhost:8880}"
|
||||
KOKORO_VOICE: "${KOKORO_VOICE:-af_bella}"
|
||||
ports:
|
||||
- "8080:8080"
|
||||
POCKETBASE_URL: "http://pocketbase:8090"
|
||||
POCKETBASE_ADMIN_EMAIL: "${POCKETBASE_ADMIN_EMAIL}"
|
||||
POCKETBASE_ADMIN_PASSWORD: "${POCKETBASE_ADMIN_PASSWORD}"
|
||||
volumes:
|
||||
- static_books:/app/static/books
|
||||
- ./scripts/pb-init-v3.sh:/pb-init.sh:ro
|
||||
entrypoint: ["sh", "/pb-init.sh"]
|
||||
|
||||
# ─── Meilisearch (full-text search) ──────────────────────────────────────────
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:latest
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
MEILI_MASTER_KEY: "${MEILI_MASTER_KEY}"
|
||||
MEILI_ENV: "${MEILI_ENV}"
|
||||
# No public port — backend/runner reach it via internal network.
|
||||
expose:
|
||||
- "7700"
|
||||
volumes:
|
||||
- meili_data:/meili_data
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-qO-", "http://localhost:8080/health"]
|
||||
test: ["CMD", "wget", "-qO-", "http://127.0.0.1:7700/health"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ─── Valkey (presign URL cache) ───────────────────────────────────────────────
|
||||
valkey:
|
||||
image: valkey/valkey:7-alpine
|
||||
restart: unless-stopped
|
||||
# No public port — backend/runner/ui reach it via internal network.
|
||||
expose:
|
||||
- "6379"
|
||||
volumes:
|
||||
- valkey_data:/data
|
||||
healthcheck:
|
||||
test: ["CMD", "valkey-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ─── Redis (Asynq task queue — accessed locally by backend, remotely by homelab runner) ──
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
restart: unless-stopped
|
||||
command: >
|
||||
redis-server
|
||||
--appendonly yes
|
||||
--requirepass "${REDIS_PASSWORD}"
|
||||
# No public port — backend reaches it via internal network.
|
||||
# Homelab runner reaches it via Caddy TLS proxy on :6380 → redis:6379.
|
||||
expose:
|
||||
- "6379"
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD}", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ─── Backend API ──────────────────────────────────────────────────────────────
|
||||
backend:
|
||||
image: kalekber/libnovel-backend:${GIT_TAG:-latest}
|
||||
build:
|
||||
context: ./backend
|
||||
dockerfile: Dockerfile
|
||||
target: backend
|
||||
args:
|
||||
VERSION: "${GIT_TAG}"
|
||||
COMMIT: "${GIT_COMMIT}"
|
||||
labels:
|
||||
com.centurylinklabs.watchtower.enable: "true"
|
||||
restart: unless-stopped
|
||||
stop_grace_period: 35s
|
||||
depends_on:
|
||||
pb-init:
|
||||
condition: service_completed_successfully
|
||||
pocketbase:
|
||||
condition: service_healthy
|
||||
minio:
|
||||
condition: service_healthy
|
||||
meilisearch:
|
||||
condition: service_healthy
|
||||
valkey:
|
||||
condition: service_healthy
|
||||
redis:
|
||||
condition: service_healthy
|
||||
# No public port — all traffic is routed via Caddy.
|
||||
expose:
|
||||
- "8080"
|
||||
environment:
|
||||
<<: *infra-env
|
||||
BACKEND_HTTP_ADDR: ":8080"
|
||||
LOG_LEVEL: "${LOG_LEVEL}"
|
||||
KOKORO_URL: "${KOKORO_URL}"
|
||||
KOKORO_VOICE: "${KOKORO_VOICE}"
|
||||
POCKET_TTS_URL: "${POCKET_TTS_URL}"
|
||||
GLITCHTIP_DSN: "${GLITCHTIP_DSN}"
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: "${OTEL_EXPORTER_OTLP_ENDPOINT}"
|
||||
OTEL_SERVICE_NAME: "backend"
|
||||
# Asynq task queue — backend enqueues jobs to local Redis sidecar.
|
||||
# Homelab runner connects to the same Redis via Caddy TLS proxy on :6380.
|
||||
REDIS_ADDR: "redis:6379"
|
||||
REDIS_PASSWORD: "${REDIS_PASSWORD}"
|
||||
healthcheck:
|
||||
test: ["CMD", "/healthcheck", "http://localhost:8080/health"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
# ─── Runner (background task worker) ─────────────────────────────────────────
|
||||
# profiles: [runner] prevents accidental restart on `docker compose up -d`.
|
||||
# The homelab runner (192.168.0.109) is the sole worker in production.
|
||||
# To start explicitly: doppler run -- docker compose --profile runner up -d runner
|
||||
runner:
|
||||
profiles: [runner]
|
||||
image: kalekber/libnovel-runner:${GIT_TAG:-latest}
|
||||
build:
|
||||
context: ./backend
|
||||
dockerfile: Dockerfile
|
||||
target: runner
|
||||
args:
|
||||
VERSION: "${GIT_TAG}"
|
||||
COMMIT: "${GIT_COMMIT}"
|
||||
labels:
|
||||
com.centurylinklabs.watchtower.enable: "true"
|
||||
restart: unless-stopped
|
||||
stop_grace_period: 135s
|
||||
depends_on:
|
||||
pb-init:
|
||||
condition: service_completed_successfully
|
||||
pocketbase:
|
||||
condition: service_healthy
|
||||
minio:
|
||||
condition: service_healthy
|
||||
meilisearch:
|
||||
condition: service_healthy
|
||||
valkey:
|
||||
condition: service_healthy
|
||||
# Metrics endpoint — internal only; expose publicly via Caddy if needed.
|
||||
expose:
|
||||
- "9091"
|
||||
environment:
|
||||
<<: *infra-env
|
||||
LOG_LEVEL: "${LOG_LEVEL}"
|
||||
# Runner tuning
|
||||
RUNNER_POLL_INTERVAL: "${RUNNER_POLL_INTERVAL}"
|
||||
RUNNER_MAX_CONCURRENT_SCRAPE: "${RUNNER_MAX_CONCURRENT_SCRAPE}"
|
||||
RUNNER_MAX_CONCURRENT_AUDIO: "${RUNNER_MAX_CONCURRENT_AUDIO}"
|
||||
RUNNER_WORKER_ID: "${RUNNER_WORKER_ID}"
|
||||
RUNNER_TIMEOUT: "${RUNNER_TIMEOUT}"
|
||||
RUNNER_METRICS_ADDR: "${RUNNER_METRICS_ADDR}"
|
||||
# Suppress the on-startup catalogue walk — catalogue_refresh now skips
|
||||
# books already in Meilisearch, so a full walk on every restart is wasteful.
|
||||
# The 24h periodic ticker (CatalogueRefreshInterval) still fires normally.
|
||||
RUNNER_SKIP_INITIAL_CATALOGUE_REFRESH: "true"
|
||||
# Kokoro-FastAPI TTS endpoint
|
||||
KOKORO_URL: "${KOKORO_URL}"
|
||||
KOKORO_VOICE: "${KOKORO_VOICE}"
|
||||
POCKET_TTS_URL: "${POCKET_TTS_URL}"
|
||||
GLITCHTIP_DSN: "${GLITCHTIP_DSN}"
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: "${OTEL_EXPORTER_OTLP_ENDPOINT}"
|
||||
OTEL_SERVICE_NAME: "runner"
|
||||
healthcheck:
|
||||
# 120s = 2× the default 30s poll interval with generous headroom.
|
||||
test: ["CMD", "/healthcheck", "file", "/tmp/runner.alive", "120"]
|
||||
interval: 60s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
# ─── SvelteKit UI ─────────────────────────────────────────────────────────────
|
||||
ui:
|
||||
image: kalekber/libnovel-ui:${GIT_TAG:-latest}
|
||||
build:
|
||||
context: ./ui
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
BUILD_VERSION: "${GIT_TAG}"
|
||||
BUILD_COMMIT: "${GIT_COMMIT}"
|
||||
labels:
|
||||
com.centurylinklabs.watchtower.enable: "true"
|
||||
restart: unless-stopped
|
||||
stop_grace_period: 35s
|
||||
depends_on:
|
||||
pb-init:
|
||||
condition: service_completed_successfully
|
||||
backend:
|
||||
condition: service_healthy
|
||||
pocketbase:
|
||||
condition: service_healthy
|
||||
valkey:
|
||||
condition: service_healthy
|
||||
# No public port — all traffic via Caddy.
|
||||
expose:
|
||||
- "3000"
|
||||
environment:
|
||||
# ORIGIN must match the public URL Caddy serves on.
|
||||
# adapter-node uses this for SvelteKit's built-in CSRF origin check.
|
||||
ORIGIN: "${ORIGIN}"
|
||||
BACKEND_API_URL: "http://backend:8080"
|
||||
POCKETBASE_URL: "http://pocketbase:8090"
|
||||
POCKETBASE_ADMIN_EMAIL: "${POCKETBASE_ADMIN_EMAIL}"
|
||||
POCKETBASE_ADMIN_PASSWORD: "${POCKETBASE_ADMIN_PASSWORD}"
|
||||
AUTH_SECRET: "${AUTH_SECRET}"
|
||||
DEBUG_LOGIN_TOKEN: "${DEBUG_LOGIN_TOKEN}"
|
||||
PUBLIC_MINIO_PUBLIC_URL: "${MINIO_PUBLIC_ENDPOINT}"
|
||||
# Valkey
|
||||
VALKEY_ADDR: "valkey:6379"
|
||||
# Umami analytics
|
||||
PUBLIC_UMAMI_WEBSITE_ID: "${PUBLIC_UMAMI_WEBSITE_ID}"
|
||||
PUBLIC_UMAMI_SCRIPT_URL: "${PUBLIC_UMAMI_SCRIPT_URL}"
|
||||
# GlitchTip client + server-side error tracking
|
||||
PUBLIC_GLITCHTIP_DSN: "${PUBLIC_GLITCHTIP_DSN}"
|
||||
# OpenTelemetry tracing
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: "${OTEL_EXPORTER_OTLP_ENDPOINT}"
|
||||
OTEL_SERVICE_NAME: "ui"
|
||||
# OAuth2 providers
|
||||
GOOGLE_CLIENT_ID: "${GOOGLE_CLIENT_ID}"
|
||||
GOOGLE_CLIENT_SECRET: "${GOOGLE_CLIENT_SECRET}"
|
||||
GITHUB_CLIENT_ID: "${GITHUB_CLIENT_ID}"
|
||||
GITHUB_CLIENT_SECRET: "${GITHUB_CLIENT_SECRET}"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-qO-", "http://127.0.0.1:3000/health"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
# ─── CrowdSec (threat detection + IP blocking) ───────────────────────────────
|
||||
# Reads Caddy JSON access logs from the shared caddy_logs volume and enforces
|
||||
# decisions via the Caddy bouncer plugin.
|
||||
crowdsec:
|
||||
image: crowdsecurity/crowdsec:latest
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
GID: "1000"
|
||||
COLLECTIONS: "crowdsecurity/caddy crowdsecurity/http-dos crowdsecurity/base-http-scenarios"
|
||||
volumes:
|
||||
- crowdsec_data:/var/lib/crowdsec/data
|
||||
- ./crowdsec/acquis.yaml:/etc/crowdsec/acquis.yaml:ro
|
||||
- caddy_logs:/var/log/caddy:ro
|
||||
expose:
|
||||
- "8080"
|
||||
healthcheck:
|
||||
test: ["CMD", "cscli", "version"]
|
||||
interval: 20s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
|
||||
# ─── Dozzle agent ────────────────────────────────────────────────────────────
|
||||
# Exposes prod container logs to the Dozzle instance on the homelab.
|
||||
# The homelab Dozzle connects here via DOZZLE_REMOTE_AGENT.
|
||||
# Port 7007 is bound to localhost only — not reachable from the internet.
|
||||
dozzle-agent:
|
||||
image: amir20/dozzle:latest
|
||||
restart: unless-stopped
|
||||
command: agent
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
ports:
|
||||
- "127.0.0.1:7007:7007"
|
||||
|
||||
# ─── CrowdSec bouncer registration ───────────────────────────────────────────
|
||||
# One-shot: registers the Caddy bouncer with the CrowdSec LAPI and writes the
|
||||
# generated API key to crowdsec/.crowdsec.env, which Caddy reads via env_file.
|
||||
# Uses the Docker socket to exec cscli inside the running crowdsec container.
|
||||
crowdsec-init:
|
||||
image: docker:cli
|
||||
depends_on:
|
||||
crowdsec:
|
||||
condition: service_healthy
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- ./crowdsec:/crowdsec-out
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
out=/crowdsec-out/.crowdsec.env;
|
||||
existing=$$(grep -s '^CROWDSEC_API_KEY=.' \"$$out\" | cut -d= -f2-);
|
||||
if [ -n \"$$existing\" ]; then
|
||||
echo 'crowdsec-init: key already present, skipping registration';
|
||||
exit 0;
|
||||
fi;
|
||||
container=$$(docker ps --filter name=crowdsec --filter status=running --format '{{.Names}}' | grep -v init | head -1);
|
||||
echo \"crowdsec-init: using container $$container\";
|
||||
docker exec $$container cscli bouncers delete caddy-bouncer 2>/dev/null || true;
|
||||
key=$$(docker exec $$container cscli bouncers add caddy-bouncer -o raw 2>&1);
|
||||
if [ -z \"$$key\" ]; then
|
||||
echo 'crowdsec-init: ERROR — failed to obtain bouncer key' >&2;
|
||||
exit 1;
|
||||
fi;
|
||||
printf 'CROWDSEC_API_KEY=%s\n' \"$$key\" > \"$$out\";
|
||||
echo \"crowdsec-init: bouncer key written (key length: $${#key})\";
|
||||
"
|
||||
restart: "no"
|
||||
|
||||
|
||||
# ─── Caddy (reverse proxy + automatic HTTPS) ──────────────────────────────────
|
||||
# Custom build includes github.com/mholt/caddy-ratelimit,
|
||||
# github.com/hslatman/caddy-crowdsec-bouncer/http, and
|
||||
# github.com/mholt/caddy-l4 (TCP layer4 proxy for Redis).
|
||||
caddy:
|
||||
image: kalekber/libnovel-caddy:${GIT_TAG:-latest}
|
||||
build:
|
||||
context: ./caddy
|
||||
dockerfile: Dockerfile
|
||||
labels:
|
||||
com.centurylinklabs.watchtower.enable: "true"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
backend:
|
||||
condition: service_healthy
|
||||
ui:
|
||||
condition: service_healthy
|
||||
crowdsec-init:
|
||||
condition: service_completed_successfully
|
||||
ports:
|
||||
- "80:80"
|
||||
- "443:443"
|
||||
- "443:443/udp" # HTTP/3 (QUIC)
|
||||
- "6380:6380" # Redis TCP proxy (TLS) for homelab runner → Asynq
|
||||
environment:
|
||||
DOMAIN: "${DOMAIN}"
|
||||
CADDY_ACME_EMAIL: "${CADDY_ACME_EMAIL}"
|
||||
env_file:
|
||||
- path: ./crowdsec/.crowdsec.env
|
||||
required: false
|
||||
volumes:
|
||||
- ./Caddyfile:/etc/caddy/Caddyfile:ro
|
||||
- ./caddy/errors:/srv/errors:ro
|
||||
- caddy_data:/data
|
||||
- caddy_config:/config
|
||||
- caddy_logs:/var/log/caddy
|
||||
|
||||
# ─── Watchtower (auto-redeploy custom services on new images) ────────────────
|
||||
# Only watches services labelled com.centurylinklabs.watchtower.enable=true.
|
||||
# Third-party infra images (minio, pocketbase, meilisearch, etc.) are excluded.
|
||||
# doppler binary is mounted from the host so watchtower fetches fresh secrets
|
||||
# on every start (notification URL, credentials) without baking them in.
|
||||
watchtower:
|
||||
image: containrrr/watchtower:latest
|
||||
restart: unless-stopped
|
||||
entrypoint: ["/usr/bin/doppler", "run", "--project", "libnovel", "--config", "prd", "--"]
|
||||
command: ["/watchtower", "--label-enable", "--interval", "300", "--cleanup"]
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- /usr/bin/doppler:/usr/bin/doppler:ro
|
||||
- /root/.doppler:/root/.doppler:ro
|
||||
environment:
|
||||
HOME: "/root"
|
||||
DOCKER_API_VERSION: "1.44"
|
||||
|
||||
volumes:
|
||||
static_books:
|
||||
minio_data:
|
||||
pb_data:
|
||||
meili_data:
|
||||
valkey_data:
|
||||
redis_data:
|
||||
caddy_data:
|
||||
caddy_config:
|
||||
caddy_logs:
|
||||
crowdsec_data:
|
||||
|
||||
82
docs/api-endpoints.md
Normal file
82
docs/api-endpoints.md
Normal file
@@ -0,0 +1,82 @@
|
||||
# API Endpoint Reference
|
||||
|
||||
> **Routing ownership map**: see [`docs/d2/api-routing.svg`](d2/api-routing.svg) (source: [`docs/d2/api-routing.d2`](d2/api-routing.d2)) for a visual overview of which paths Caddy sends to the backend directly vs. through SvelteKit, with auth levels colour-coded.
|
||||
|
||||
All traffic enters through **Caddy :443**. Caddy routes a subset of paths directly to the Go backend (bypassing SvelteKit); everything else goes to SvelteKit, which enforces auth before proxying onward.
|
||||
|
||||
## Health / Version
|
||||
|
||||
| Method | Path | Auth | Description |
|
||||
|--------|------|------|-------------|
|
||||
| `GET` | `/health` | — | Liveness probe. Returns `{"ok":true}`. |
|
||||
| `GET` | `/api/version` | — | Build version + commit hash. |
|
||||
|
||||
## Scrape Jobs (admin)
|
||||
|
||||
| Method | Path | Auth | Description |
|
||||
|--------|------|------|-------------|
|
||||
| `POST` | `/scrape` | admin | Enqueue full catalogue scrape. |
|
||||
| `POST` | `/scrape/book` | admin | Enqueue single-book scrape `{url}`. |
|
||||
| `POST` | `/scrape/book/range` | admin | Enqueue range scrape `{url, from, to?}`. |
|
||||
| `GET` | `/api/scrape/status` | admin | Current job status. |
|
||||
| `GET` | `/api/scrape/tasks` | admin | All scrape task records. |
|
||||
| `POST` | `/api/cancel-task/{id}` | admin | Cancel a pending task. |
|
||||
|
||||
## Browse / Catalogue
|
||||
|
||||
| Method | Path | Auth | Description |
|
||||
|--------|------|------|-------------|
|
||||
| `GET` | `/api/browse` | — | Live novelfire.net browse (MinIO page-1 cache). Legacy — used by save-browse subcommand. |
|
||||
| `GET` | `/api/catalogue` | — | **Primary browse endpoint.** Meilisearch-backed, paginated. Params: `q`, `page`, `limit`, `genre`, `status`, `sort` (`popular`\|`new`\|`update`\|`rank`\|`top-rated`). Falls back to empty when Meilisearch is not configured. |
|
||||
| `GET` | `/api/search` | — | Full-text search: Meilisearch local results merged with live novelfire.net remote results. Param: `q` (≥ 2 chars). Used by iOS app. |
|
||||
| `GET` | `/api/ranking` | — | Top-ranked novels from PocketBase. |
|
||||
| `GET` | `/api/cover/{domain}/{slug}` | — | Proxy cover image from MinIO (redirect to presigned URL). |
|
||||
|
||||
## Book / Chapter Content
|
||||
|
||||
| Method | Path | Auth | Description |
|
||||
|--------|------|------|-------------|
|
||||
| `GET` | `/api/book-preview/{slug}` | — | Returns stored metadata + chapter list, or enqueues a scrape task (202) if unknown. |
|
||||
| `GET` | `/api/chapter-text/{slug}/{n}` | — | Chapter content as plain text (markdown stripped). |
|
||||
| `GET` | `/api/chapter-markdown/{slug}/{n}` | — | Chapter content as raw markdown from MinIO. |
|
||||
| `POST` | `/api/reindex/{slug}` | admin | Rebuild `chapters_idx` from MinIO objects. |
|
||||
|
||||
## Audio
|
||||
|
||||
| Method | Path | Auth | Description |
|
||||
|--------|------|------|-------------|
|
||||
| `POST` | `/api/audio/{slug}/{n}` | — | Trigger Kokoro TTS generation. Body: `{voice?}`. Returns `200 {status:"done"}` if cached, `202 {task_id, status}` if enqueued. |
|
||||
| `GET` | `/api/audio/status/{slug}/{n}` | — | Poll audio generation status. Param: `voice`. Returns `{status, task_id?, error?}`. |
|
||||
| `GET` | `/api/audio-proxy/{slug}/{n}` | — | Redirect to presigned MinIO audio URL. |
|
||||
| `GET` | `/api/voices` | — | List available Kokoro voices. Returns `{voices:[]}` on error. |
|
||||
|
||||
## Presigned URLs
|
||||
|
||||
All presign endpoints return a `302` redirect to a short-lived MinIO presigned
|
||||
URL. The URL is cached in Valkey (TTL ~55 min) to avoid regenerating on every
|
||||
request.
|
||||
|
||||
| Method | Path | Auth | Description |
|
||||
|--------|------|------|-------------|
|
||||
| `GET` | `/api/presign/chapter/{slug}/{n}` | — | Presigned URL for chapter markdown object. |
|
||||
| `GET` | `/api/presign/audio/{slug}/{n}` | — | Presigned URL for audio MP3. Param: `voice`. |
|
||||
| `GET` | `/api/presign/voice-sample/{voice}` | — | Presigned URL for voice sample MP3. |
|
||||
| `GET` | `/api/presign/avatar-upload/{userId}` | user | Presigned PUT URL for avatar upload. |
|
||||
| `GET` | `/api/presign/avatar/{userId}` | — | Presigned GET URL for avatar image. |
|
||||
|
||||
## Reading Progress
|
||||
|
||||
Session-scoped (anonymous via cookie session ID, or tied to authenticated user).
|
||||
|
||||
| Method | Path | Auth | Description |
|
||||
|--------|------|------|-------------|
|
||||
| `GET` | `/api/progress` | — | Get all reading progress for the current session/user. |
|
||||
| `POST` | `/api/progress/{slug}` | — | Set progress. Body: `{chapter}`. |
|
||||
| `DELETE` | `/api/progress/{slug}` | — | Delete progress for a book. |
|
||||
|
||||
## Notes
|
||||
|
||||
- **Auth**: The backend does not enforce auth itself — the SvelteKit UI layer enforces admin/user guards before proxying requests. The backend trusts all incoming requests.
|
||||
- **`/api/catalogue` vs `/api/browse`**: `/api/catalogue` is the primary UI endpoint (Meilisearch, always-local, fast). `/api/browse` hits or caches the live novelfire.net browse page and is only used internally by the `save-browse` subcommand.
|
||||
- **Meilisearch fallback**: When `MEILI_URL` is unset, `/api/catalogue` returns `{books:[], has_next:false}` and `/api/search` falls back to a PocketBase substring scan.
|
||||
- **`BACKEND_API_URL`**: The SvelteKit UI reads this env var (default `http://localhost:8080`) to reach the backend server-side. In docker-compose it is set to `http://backend:8080`.
|
||||
208
docs/d2/api-routing.d2
Normal file
208
docs/d2/api-routing.d2
Normal file
@@ -0,0 +1,208 @@
|
||||
direction: right
|
||||
|
||||
# ─── Legend ───────────────────────────────────────────────────────────────────
|
||||
|
||||
legend: Legend {
|
||||
style.fill: "#fafafa"
|
||||
style.stroke: "#d4d4d8"
|
||||
|
||||
pub: public {
|
||||
style.fill: "#f0fdf4"
|
||||
style.font-color: "#15803d"
|
||||
style.stroke: "#86efac"
|
||||
}
|
||||
user: user auth {
|
||||
style.fill: "#eff6ff"
|
||||
style.font-color: "#1d4ed8"
|
||||
style.stroke: "#93c5fd"
|
||||
}
|
||||
adm: admin only {
|
||||
style.fill: "#fff7ed"
|
||||
style.font-color: "#c2410c"
|
||||
style.stroke: "#fdba74"
|
||||
}
|
||||
}
|
||||
|
||||
# ─── Client ───────────────────────────────────────────────────────────────────
|
||||
|
||||
client: Browser / iOS App {
|
||||
shape: person
|
||||
style.fill: "#fff9e6"
|
||||
}
|
||||
|
||||
# ─── Caddy ────────────────────────────────────────────────────────────────────
|
||||
|
||||
caddy: Caddy :443 {
|
||||
shape: rectangle
|
||||
style.fill: "#f1f5f9"
|
||||
label: "Caddy :443\ncustom build · caddy-l4 · caddy-ratelimit\nCrowdSec bouncer · security headers\nrate limiting · static error pages\nRedis TCP proxy :6380"
|
||||
}
|
||||
|
||||
# ─── SvelteKit UI ─────────────────────────────────────────────────────────────
|
||||
# All routes here pass through SvelteKit — auth is enforced server-side.
|
||||
|
||||
sk: SvelteKit UI :3000 {
|
||||
style.fill: "#fef3c7"
|
||||
|
||||
auth: Auth {
|
||||
style.fill: "#fde68a"
|
||||
style.stroke: "#f59e0b"
|
||||
label: "POST /api/auth/login\nPOST /api/auth/register\nPOST /api/auth/change-password\nGET /api/auth/session"
|
||||
}
|
||||
|
||||
catalogue_sk: Catalogue {
|
||||
style.fill: "#f0fdf4"
|
||||
style.stroke: "#86efac"
|
||||
label: "GET /api/catalogue-page (infinite scroll)\nGET /api/search"
|
||||
}
|
||||
|
||||
book_sk: Book {
|
||||
style.fill: "#f0fdf4"
|
||||
style.stroke: "#86efac"
|
||||
label: "GET /api/book/{slug}\nGET /api/chapter/{slug}/{n}\nGET /api/chapter-text-preview/{slug}/{n}"
|
||||
}
|
||||
|
||||
scrape_sk: Scrape (admin) {
|
||||
style.fill: "#fff7ed"
|
||||
style.stroke: "#fdba74"
|
||||
label: "GET /api/scrape/status\nGET /api/scrape/tasks\nPOST /api/scrape\nPOST /api/scrape/book\nPOST /api/scrape/book/range\nPOST /api/scrape/cancel/{id}"
|
||||
}
|
||||
|
||||
audio_sk: Audio {
|
||||
style.fill: "#f0fdf4"
|
||||
style.stroke: "#86efac"
|
||||
label: "POST /api/audio/{slug}/{n}\nGET /api/audio/status/{slug}/{n}\nGET /api/voices"
|
||||
}
|
||||
|
||||
presign_sk: Presigned URLs (public) {
|
||||
style.fill: "#f0fdf4"
|
||||
style.stroke: "#86efac"
|
||||
label: "GET /api/presign/chapter/{slug}/{n}\nGET /api/presign/audio/{slug}/{n}\nGET /api/presign/voice-sample/{voice}"
|
||||
}
|
||||
|
||||
presign_user: Presigned URLs (user) {
|
||||
style.fill: "#eff6ff"
|
||||
style.stroke: "#93c5fd"
|
||||
label: "GET /api/presign/avatar-upload/{userId}\nGET /api/presign/avatar/{userId}"
|
||||
}
|
||||
|
||||
progress_sk: Progress {
|
||||
style.fill: "#f0fdf4"
|
||||
style.stroke: "#86efac"
|
||||
label: "GET /api/progress\nPOST /api/progress/{slug}\nDELETE /api/progress/{slug}"
|
||||
}
|
||||
|
||||
library_sk: Library {
|
||||
style.fill: "#f0fdf4"
|
||||
style.stroke: "#86efac"
|
||||
label: "GET /api/library\nPOST /api/library/{slug}\nDELETE /api/library/{slug}"
|
||||
}
|
||||
|
||||
comments_sk: Comments {
|
||||
style.fill: "#f0fdf4"
|
||||
style.stroke: "#86efac"
|
||||
label: "GET /api/comments/{slug}\nPOST /api/comments/{slug}"
|
||||
}
|
||||
}
|
||||
|
||||
# ─── Go Backend ───────────────────────────────────────────────────────────────
|
||||
# Caddy proxies these paths directly — bypasses SvelteKit entirely.
|
||||
|
||||
be: Backend API :8080 {
|
||||
style.fill: "#eef3ff"
|
||||
|
||||
health_be: Health / Version {
|
||||
style.fill: "#f0fdf4"
|
||||
style.stroke: "#86efac"
|
||||
label: "GET /health\nGET /api/version"
|
||||
}
|
||||
|
||||
scrape_be: Scrape admin (direct) {
|
||||
style.fill: "#fff7ed"
|
||||
style.stroke: "#fdba74"
|
||||
label: "POST /scrape\nPOST /scrape/book\nPOST /scrape/book/range"
|
||||
}
|
||||
|
||||
catalogue_be: Catalogue {
|
||||
style.fill: "#f0fdf4"
|
||||
style.stroke: "#86efac"
|
||||
label: "GET /api/catalogue (Meilisearch)\nGET /api/browse (legacy MinIO cache)\nGET /api/ranking\nGET /api/cover/{domain}/{slug}"
|
||||
}
|
||||
|
||||
book_be: Book / Chapter {
|
||||
style.fill: "#f0fdf4"
|
||||
style.stroke: "#86efac"
|
||||
label: "GET /api/book-preview/{slug}\nGET /api/chapter-text/{slug}/{n}\nGET /api/chapter-markdown/{slug}/{n}\nPOST /api/reindex/{slug} ⚠ admin"
|
||||
}
|
||||
|
||||
audio_be: Audio {
|
||||
style.fill: "#f0fdf4"
|
||||
style.stroke: "#86efac"
|
||||
label: "POST /api/audio/{slug}/{n}\nGET /api/audio/status/{slug}/{n}\nGET /api/audio-proxy/{slug}/{n}\nGET /api/voices"
|
||||
}
|
||||
|
||||
presign_be: Presigned URLs {
|
||||
style.fill: "#f0fdf4"
|
||||
style.stroke: "#86efac"
|
||||
label: "GET /api/presign/chapter/{slug}/{n}\nGET /api/presign/audio/{slug}/{n}\nGET /api/presign/voice-sample/{voice}\nGET /api/presign/avatar-upload/{userId}\nGET /api/presign/avatar/{userId}"
|
||||
}
|
||||
}
|
||||
|
||||
# ─── Storage ──────────────────────────────────────────────────────────────────
|
||||
|
||||
storage: Storage {
|
||||
style.fill: "#eaf7ea"
|
||||
|
||||
pb: PocketBase :8090 {
|
||||
shape: cylinder
|
||||
label: "auth · books · progress\ncomments · library\nscrape_jobs · audio_cache\nranking"
|
||||
}
|
||||
mn: MinIO :9000 {
|
||||
shape: cylinder
|
||||
label: "chapters · audio\navatars · catalogue (browse)"
|
||||
}
|
||||
ms: Meilisearch :7700 {
|
||||
shape: cylinder
|
||||
label: "index: books\nfilterable: status · genres\nsortable: rank · rating\n total_chapters · meta_updated"
|
||||
}
|
||||
vk: Valkey :6379 {
|
||||
shape: cylinder
|
||||
label: "presign URL cache (TTL ~55 min)\nAsynq job queue (runner)"
|
||||
}
|
||||
}
|
||||
|
||||
# ─── Caddy routing ────────────────────────────────────────────────────────────
|
||||
|
||||
client -> caddy: HTTPS :443
|
||||
|
||||
caddy -> sk: "/* (catch-all)\n→ SvelteKit enforces auth"
|
||||
caddy -> be: "/health /scrape*\n/api/browse /api/catalogue /api/ranking\n/api/version /api/book-preview/*\n/api/chapter-text/* /api/chapter-markdown/*\n/api/reindex/* /api/cover/*\n/api/audio* /api/voices /api/presign/*"
|
||||
caddy -> storage.mn: "/avatars/* /audio/* /chapters/*\n(presigned MinIO GETs)"
|
||||
|
||||
# ─── SvelteKit → Backend (server-side proxy) ──────────────────────────────────
|
||||
|
||||
sk.catalogue_sk -> be.catalogue_be: internal proxy
|
||||
sk.book_sk -> be.book_be: internal proxy
|
||||
sk.audio_sk -> be.audio_be: internal proxy
|
||||
sk.presign_sk -> be.presign_be: internal proxy
|
||||
sk.presign_user -> be.presign_be: internal proxy
|
||||
|
||||
# ─── SvelteKit → Storage (direct) ────────────────────────────────────────────
|
||||
|
||||
sk.auth -> storage.pb: sessions / users
|
||||
sk.scrape_sk -> storage.pb: scrape job records
|
||||
sk.progress_sk -> storage.pb
|
||||
sk.library_sk -> storage.pb
|
||||
sk.comments_sk -> storage.pb
|
||||
|
||||
# ─── Backend → Storage ────────────────────────────────────────────────────────
|
||||
|
||||
be.catalogue_be -> storage.ms: full-text search + facets
|
||||
be.catalogue_be -> storage.pb: ranking records
|
||||
be.catalogue_be -> storage.mn: cover presign
|
||||
be.book_be -> storage.mn: chapter objects
|
||||
be.book_be -> storage.pb: book metadata
|
||||
be.audio_be -> storage.mn: audio presign
|
||||
be.audio_be -> storage.vk: presign cache
|
||||
be.presign_be -> storage.vk: check / set presign cache
|
||||
be.presign_be -> storage.mn: generate presigned URL
|
||||
128
docs/d2/api-routing.svg
Normal file
128
docs/d2/api-routing.svg
Normal file
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 60 KiB |
180
docs/d2/architecture.d2
Normal file
180
docs/d2/architecture.d2
Normal file
@@ -0,0 +1,180 @@
|
||||
direction: right
|
||||
|
||||
# ─── External ─────────────────────────────────────────────────────────────────
|
||||
|
||||
novelfire: novelfire.net {
|
||||
shape: cloud
|
||||
style.fill: "#f0f4ff"
|
||||
label: "novelfire.net\n(scrape source)"
|
||||
}
|
||||
|
||||
kokoro: Kokoro-FastAPI TTS {
|
||||
shape: cloud
|
||||
style.fill: "#f0f4ff"
|
||||
label: "Kokoro-FastAPI TTS\n(self-hosted · homelab)\nchapter audio"
|
||||
}
|
||||
|
||||
pockettts: pocket-tts {
|
||||
shape: cloud
|
||||
style.fill: "#f0f4ff"
|
||||
label: "pocket-tts\n(self-hosted · homelab)\nvoice sample MP3s"
|
||||
}
|
||||
|
||||
letsencrypt: Let's Encrypt {
|
||||
shape: cloud
|
||||
style.fill: "#f0f4ff"
|
||||
label: "Let's Encrypt\n(ACME TLS-ALPN-01)"
|
||||
}
|
||||
|
||||
browser: Browser / iOS App {
|
||||
shape: person
|
||||
style.fill: "#fff9e6"
|
||||
}
|
||||
|
||||
# ─── Init containers (one-shot) ───────────────────────────────────────────────
|
||||
|
||||
init: Init containers {
|
||||
style.fill: "#f5f5f5"
|
||||
style.stroke-dash: 4
|
||||
|
||||
minio-init: minio-init {
|
||||
shape: rectangle
|
||||
label: "minio-init\n(mc: create buckets\n chapters · audio\n avatars · catalogue)"
|
||||
}
|
||||
|
||||
pb-init: pb-init {
|
||||
shape: rectangle
|
||||
label: "pb-init\n(bootstrap PocketBase\n collections + schema)"
|
||||
}
|
||||
}
|
||||
|
||||
# ─── Storage ──────────────────────────────────────────────────────────────────
|
||||
|
||||
storage: Storage {
|
||||
style.fill: "#eaf7ea"
|
||||
|
||||
minio: MinIO {
|
||||
shape: cylinder
|
||||
label: "MinIO :9000\nbuckets:\n chapters · audio\n avatars · catalogue"
|
||||
}
|
||||
|
||||
pocketbase: PocketBase {
|
||||
shape: cylinder
|
||||
label: "PocketBase :8090\ncollections:\n books · chapters_idx\n audio_cache · progress\n scrape_jobs · app_users\n ranking · library\n comments"
|
||||
}
|
||||
|
||||
valkey: Valkey {
|
||||
shape: cylinder
|
||||
label: "Valkey :6379\npresign URL cache (TTL ~55 min)\nAsynq job queue (runner tasks)"
|
||||
}
|
||||
|
||||
meilisearch: Meilisearch {
|
||||
shape: cylinder
|
||||
label: "Meilisearch :7700\nindex: books\n(filterable: status · genres\n sortable: rank · rating\n total_chapters · meta_updated)"
|
||||
}
|
||||
}
|
||||
|
||||
# ─── Application — prod VPS (165.22.70.138) ───────────────────────────────────
|
||||
|
||||
app: Application — prod (165.22.70.138) {
|
||||
style.fill: "#eef3ff"
|
||||
|
||||
caddy: caddy {
|
||||
shape: rectangle
|
||||
label: "Caddy :443 / :80 / :6380\ncustom build\n+ caddy-l4 (Redis TCP proxy)\n+ caddy-ratelimit\nauto-HTTPS · security headers\nrate limiting (per-IP)\nstatic error pages (404/502/503/504)\nCrowdSec bouncer"
|
||||
}
|
||||
|
||||
backend: backend {
|
||||
shape: rectangle
|
||||
label: "Backend API :8080\n(Go)\nHTTP API server\nffmpeg (audio sample conv.)\nOpenTelemetry tracing\nSentry / GlitchTip errors"
|
||||
}
|
||||
|
||||
ui: ui {
|
||||
shape: rectangle
|
||||
label: "SvelteKit UI :3000\n(adapter-node)\nSSR · session auth\nserver-side API proxy"
|
||||
}
|
||||
|
||||
crowdsec: CrowdSec {
|
||||
shape: rectangle
|
||||
label: "CrowdSec :8080\nsecurity engine\nreads Caddy JSON logs\nbouncer integrated in Caddy"
|
||||
}
|
||||
|
||||
dozzle: Dozzle agent {
|
||||
shape: rectangle
|
||||
label: "Dozzle agent\n127.0.0.1:7007\nlog relay → homelab dashboard"
|
||||
}
|
||||
}
|
||||
|
||||
# ─── Runner — homelab (192.168.0.109) ────────────────────────────────────────
|
||||
|
||||
homelab: Runner — homelab (192.168.0.109) {
|
||||
style.fill: "#fef9ec"
|
||||
|
||||
runner: runner {
|
||||
shape: rectangle
|
||||
label: "Runner :9091\n(Go background worker)\nscrape pipeline\nTTS audio job queue\nPrometheus /metrics\ncron: catalogue refresh\nAsynq worker → Valkey"
|
||||
}
|
||||
}
|
||||
|
||||
# ─── Ops ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
ops: Ops {
|
||||
style.fill: "#f5f5f5"
|
||||
|
||||
watchtower: Watchtower {
|
||||
shape: rectangle
|
||||
label: "Watchtower\n(containrrr/watchtower)\npolls Docker Hub every 5 min\nautopulls + redeploys:\n backend · ui\n(runner: label-disabled on prod)"
|
||||
}
|
||||
}
|
||||
|
||||
# ─── CI / CD ──────────────────────────────────────────────────────────────────
|
||||
|
||||
cicd: CI / CD {
|
||||
style.fill: "#f0f9ff"
|
||||
|
||||
gitea: Gitea Actions {
|
||||
shape: rectangle
|
||||
label: "Gitea Actions\n(homelab runner)\ntag v* trigger:\n test-backend\n check-ui (type-check + build)\n docker-backend\n docker-runner\n docker-ui (bakes releases.json)\n docker-caddy\n → push Docker Hub\n → Gitea Release"
|
||||
}
|
||||
}
|
||||
|
||||
# ─── Init → Storage ───────────────────────────────────────────────────────────
|
||||
|
||||
init.minio-init -> storage.minio: create buckets {style.stroke-dash: 4}
|
||||
init.pb-init -> storage.pocketbase: bootstrap schema {style.stroke-dash: 4}
|
||||
|
||||
# ─── App internal ─────────────────────────────────────────────────────────────
|
||||
|
||||
app.caddy -> app.ui: "/* (catch-all)\nSvelteKit — auth enforced"
|
||||
app.caddy -> app.backend: "/health /scrape*\n/api/browse /api/catalogue\n/api/ranking /api/version\n/api/book-preview/*\n/api/chapter-text/*\n/api/chapter-markdown/*\n/api/reindex/* /api/cover/*\n/api/audio-proxy/* /api/voices\n/api/audio* /api/presign/*"
|
||||
app.caddy -> storage.minio: "/avatars/* /audio/*\n/chapters/*\n(presigned GETs)"
|
||||
app.caddy -> app.crowdsec: bouncer check (15 s poll)
|
||||
app.caddy -> letsencrypt: ACME cert (TLS-ALPN-01)
|
||||
|
||||
app.ui -> app.backend: "internal REST proxy\n(server-side only)"
|
||||
app.ui -> storage.pocketbase: "auth · sessions\nprogress · library\ncomments"
|
||||
|
||||
app.backend -> storage.minio: "chapter objs · audio MP3s\navatars · browse cache"
|
||||
app.backend -> storage.pocketbase: "books · scrape_jobs\naudio_cache · ranking"
|
||||
app.backend -> storage.valkey: "presign URL cache\n(SET/GET TTL ~55 min)"
|
||||
app.backend -> storage.meilisearch: "catalogue search\nfacets: genres · status"
|
||||
app.backend -> pockettts: "voice sample gen.\n(on-demand · ffmpeg conv.)"
|
||||
|
||||
# ─── Runner → deps ────────────────────────────────────────────────────────────
|
||||
|
||||
homelab.runner -> novelfire: "HTTP scrape\nHTML → Markdown"
|
||||
homelab.runner -> kokoro: "TTS generation\ntext → MP3"
|
||||
homelab.runner -> storage.minio: "write chapters\n& audio MP3s"
|
||||
homelab.runner -> storage.pocketbase: "read/update scrape_jobs\nwrite book records"
|
||||
homelab.runner -> storage.meilisearch: "index books\n(on scrape completion)"
|
||||
homelab.runner -> storage.valkey: "Asynq job queue\n(task consume)"
|
||||
|
||||
# ─── Client ───────────────────────────────────────────────────────────────────
|
||||
|
||||
browser -> app.caddy: HTTPS :443\n(single entry point)
|
||||
|
||||
# ─── Ops / CI ─────────────────────────────────────────────────────────────────
|
||||
|
||||
ops.watchtower -> app.backend: watch (label-enabled)
|
||||
ops.watchtower -> app.ui: watch (label-enabled)
|
||||
cicd.gitea -> ops.watchtower: push to Docker Hub\n→ Watchtower detects new tag
|
||||
132
docs/d2/architecture.svg
Normal file
132
docs/d2/architecture.svg
Normal file
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 65 KiB |
72
docs/mermaid/architecture.mermaid.md
Normal file
72
docs/mermaid/architecture.mermaid.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# Architecture Overview
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
%% ── External ──────────────────────────────────────────────────────────
|
||||
NF([novelfire.net])
|
||||
KK([Kokoro-FastAPI TTS])
|
||||
LE([Let's Encrypt])
|
||||
CL([Browser / iOS App])
|
||||
|
||||
%% ── Init containers ───────────────────────────────────────────────────
|
||||
subgraph INIT["Init containers (one-shot)"]
|
||||
MI[minio-init\nmc: create buckets]
|
||||
PI[pb-init\nbootstrap collections]
|
||||
end
|
||||
|
||||
%% ── Storage ───────────────────────────────────────────────────────────
|
||||
subgraph STORAGE["Storage"]
|
||||
MN[(MinIO :9000\nchapters · audio\navatars · browse)]
|
||||
PB[(PocketBase :8090\nbooks · chapters_idx\naudio_cache · progress\nscrape_jobs · app_users · ranking)]
|
||||
VK[(Valkey :6379\npresign URL cache\nTTL-based · shared)]
|
||||
MS[(Meilisearch :7700\nindex: books)]
|
||||
end
|
||||
|
||||
%% ── Application ───────────────────────────────────────────────────────
|
||||
subgraph APP["Application"]
|
||||
CD["Caddy :443/:80\ncustom build + caddy-ratelimit\nauto-HTTPS · security headers\nrate limiting · error pages"]
|
||||
BE[Backend API :8080\nGo HTTP server]
|
||||
RN[Runner :9091\nGo background worker\n/metrics endpoint]
|
||||
UI[SvelteKit UI :3000\nadapter-node]
|
||||
end
|
||||
|
||||
%% ── Ops ───────────────────────────────────────────────────────────────
|
||||
subgraph OPS["Ops"]
|
||||
WT[Watchtower\npolls every 5 min\nautopull + redeploy\nbackend · runner · ui]
|
||||
end
|
||||
|
||||
%% ── Init → Storage ────────────────────────────────────────────────────
|
||||
MI -.->|create buckets| MN
|
||||
PI -.->|bootstrap schema| PB
|
||||
|
||||
%% ── App → Storage ─────────────────────────────────────────────────────
|
||||
BE -->|blobs| MN
|
||||
BE -->|structured records| PB
|
||||
BE -->|cache presigned URLs| VK
|
||||
RN -->|chapter markdown & audio| MN
|
||||
RN -->|read/update jobs & books| PB
|
||||
RN -->|index books on scrape| MS
|
||||
UI -->|read presign cache| VK
|
||||
UI -->|auth · progress · comments| PB
|
||||
|
||||
%% ── App internal ──────────────────────────────────────────────────────
|
||||
UI -->|"REST API (server-side)\n/api/catalogue /api/book-preview\n/api/chapter-text /api/audio"| BE
|
||||
|
||||
%% ── Caddy routing ─────────────────────────────────────────────────────
|
||||
CD -->|"/* catch-all\n/api/scrape/*\n/api/chapter-text-preview/*\n→ SvelteKit (auth enforced)"| UI
|
||||
CD -->|"/health /scrape*\n/api/browse /api/book-preview/*\n/api/chapter-text/*\n/api/reindex/* /api/cover/*\n/api/audio-proxy/*"| BE
|
||||
CD -->|/avatars/* presigned GETs| MN
|
||||
|
||||
%% ── Runner → External ─────────────────────────────────────────────────
|
||||
RN -->|scrape HTTP GET| NF
|
||||
RN -->|TTS HTTP POST| KK
|
||||
CD -->|ACME certificate| LE
|
||||
|
||||
%% ── Ops ───────────────────────────────────────────────────────────────
|
||||
WT -->|watch label-enabled| BE
|
||||
WT -->|watch label-enabled| RN
|
||||
WT -->|watch label-enabled| UI
|
||||
|
||||
%% ── Client ────────────────────────────────────────────────────────────
|
||||
CL -->|HTTPS :443 single entry| CD
|
||||
```
|
||||
102
docs/mermaid/data-flow.mermaid.md
Normal file
102
docs/mermaid/data-flow.mermaid.md
Normal file
@@ -0,0 +1,102 @@
|
||||
# Data Flow — Scrape & TTS Job Pipeline
|
||||
|
||||
How content moves from novelfire.net through the runner into storage, and how
|
||||
audio is generated on-demand via the backend.
|
||||
|
||||
## Catalogue Scrape Pipeline
|
||||
|
||||
The runner performs a background catalogue walk on startup and then on a
|
||||
configurable interval (`RUNNER_CATALOGUE_REFRESH_INTERVAL`, default 24 h).
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A([Runner starts / refresh tick]) --> B[Walk novelfire.net catalogue\npages 1…N]
|
||||
B --> C{Book already\nin PocketBase?}
|
||||
C -- no --> D[Scrape book metadata\ntitle · author · genres\ncover · summary · status]
|
||||
C -- yes --> E[Check for new chapters\ncompare total_chapters]
|
||||
D --> F[Write BookMeta\nto PocketBase books]
|
||||
E --> G{New chapters\nfound?}
|
||||
G -- no --> Z([Done — next book])
|
||||
G -- yes --> H
|
||||
F --> H[Scrape chapter list with upTo limit\n→ chapters_idx in PocketBase\nretries on 429 with Retry-After backoff]
|
||||
H --> I[Worker pool — N goroutines\nRUNNER_MAX_CONCURRENT_SCRAPE]
|
||||
I --> J[For each missing chapter:\nGET chapter HTML from novelfire.net]
|
||||
J --> K[Parse HTML → Markdown\nhtmlutil.NodeToMarkdown]
|
||||
K --> L[PUT object to MinIO\nchapters/{slug}/{n}.md]
|
||||
L --> M[Upsert book doc\nto Meilisearch index: books]
|
||||
M --> Z
|
||||
F --> M
|
||||
```
|
||||
|
||||
## On-Demand Single-Book Scrape
|
||||
|
||||
Triggered when a user visits `/books/{slug}` and the book is not in PocketBase.
|
||||
The UI calls `GET /api/book-preview/{slug}` → backend enqueues a scrape task.
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
actor U as User
|
||||
participant UI as SvelteKit UI
|
||||
participant BE as Backend API
|
||||
participant TQ as Task Queue (PocketBase)
|
||||
participant RN as Runner
|
||||
participant NF as novelfire.net
|
||||
participant PB as PocketBase
|
||||
participant MN as MinIO
|
||||
participant MS as Meilisearch
|
||||
|
||||
U->>UI: Visit /books/{slug}
|
||||
UI->>BE: GET /api/book-preview/{slug}
|
||||
BE->>PB: getBook(slug) — not found
|
||||
BE->>TQ: INSERT scrape_task (slug, status=pending)
|
||||
BE-->>UI: 202 {task_id, message}
|
||||
UI-->>U: "Scraping…" placeholder
|
||||
|
||||
RN->>TQ: Poll for pending tasks
|
||||
TQ-->>RN: scrape_task (slug)
|
||||
RN->>NF: GET novelfire.net/book/{slug}
|
||||
NF-->>RN: HTML
|
||||
RN->>PB: upsert book + chapters_idx
|
||||
RN->>MN: PUT chapter objects
|
||||
RN->>MS: UpsertBook doc
|
||||
RN->>TQ: UPDATE task status=done
|
||||
|
||||
U->>UI: Poll GET /api/scrape/tasks/{task_id}
|
||||
UI->>BE: GET /api/scrape/status
|
||||
BE->>TQ: get task
|
||||
TQ-->>BE: status=done
|
||||
BE-->>UI: {status:"done"}
|
||||
UI-->>U: Redirect to /books/{slug}
|
||||
```
|
||||
|
||||
## TTS Audio Generation Pipeline
|
||||
|
||||
Audio is generated lazily: on first request the job is enqueued; subsequent
|
||||
requests poll for completion and then stream from MinIO via presigned URL.
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A(["POST /api/audio/{slug}/{n}\nbody: voice=af_bella"]) --> B{Audio already\nin MinIO?}
|
||||
B -- yes --> C[200 status: done]
|
||||
B -- no --> D{Job already\nin queue?}
|
||||
D -- "yes pending/generating" --> E[202 task_id + status]
|
||||
D -- no --> F[INSERT audio_task\nstatus=pending\nin PocketBase]
|
||||
F --> E
|
||||
|
||||
G([Runner polls task queue]) --> H[Claim audio_task\nstatus=generating]
|
||||
H --> I["GET /api/chapter-text/{slug}/{n}\nfrom backend — plain text"]
|
||||
I --> J[POST /v1/audio/speech\nto Kokoro-FastAPI\nbody: text + voice]
|
||||
J --> K[Stream MP3 response]
|
||||
K --> L[PUT object to MinIO\naudio/{slug}/{n}/{voice}.mp3]
|
||||
L --> M[UPDATE audio_task\nstatus=done]
|
||||
|
||||
N(["Client polls\nGET /api/audio/status/{slug}/{n}"]) --> O{status?}
|
||||
O -- "pending/generating" --> N
|
||||
O -- done --> P["GET /api/presign/audio/{slug}/{n}"]
|
||||
P --> Q{Valkey cache hit?}
|
||||
Q -- yes --> R[302 → presigned URL]
|
||||
Q -- no --> S[GeneratePresignedURL\nfrom MinIO — TTL 1h]
|
||||
S --> T[Cache in Valkey\nTTL 3500s]
|
||||
T --> R
|
||||
R --> U([Client streams audio\ndirectly from MinIO])
|
||||
```
|
||||
111
docs/mermaid/request-flow.mermaid.md
Normal file
111
docs/mermaid/request-flow.mermaid.md
Normal file
@@ -0,0 +1,111 @@
|
||||
# Request Flow
|
||||
|
||||
Two representative request paths through the stack: a **page load** (SSR) and a
|
||||
**media playback** (presigned URL → direct MinIO stream).
|
||||
|
||||
## SSR Page Load — Catalogue / Book Detail
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
actor C as Browser / iOS App
|
||||
participant CD as Caddy :443
|
||||
participant UI as SvelteKit UI :3000
|
||||
participant BE as Backend API :8080
|
||||
participant MS as Meilisearch :7700
|
||||
participant PB as PocketBase :8090
|
||||
participant VK as Valkey :6379
|
||||
|
||||
C->>CD: HTTPS GET /catalogue
|
||||
CD->>UI: proxy /* (SvelteKit catch-all)
|
||||
UI->>BE: GET /api/catalogue?page=1&sort=popular
|
||||
BE->>MS: search(query, filters, sort)
|
||||
MS-->>BE: [{slug, title, …}, …]
|
||||
BE-->>UI: {books[], page, total, has_next}
|
||||
UI-->>CD: SSR HTML
|
||||
CD-->>C: 200 HTML
|
||||
|
||||
Note over C,UI: Infinite scroll — client fetches next page via SvelteKit API route
|
||||
C->>CD: HTTPS GET /api/catalogue-page?page=2
|
||||
CD->>UI: proxy /* (SvelteKit /api/catalogue-page server route)
|
||||
UI->>BE: GET /api/catalogue?page=2
|
||||
BE->>MS: search(…)
|
||||
MS-->>BE: next page
|
||||
BE-->>UI: {books[], …}
|
||||
UI-->>C: JSON
|
||||
```
|
||||
|
||||
## Audio Playback — Presigned URL Flow
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
actor C as Browser / iOS App
|
||||
participant CD as Caddy :443
|
||||
participant UI as SvelteKit UI :3000
|
||||
participant BE as Backend API :8080
|
||||
participant VK as Valkey :6379
|
||||
participant MN as MinIO :9000
|
||||
|
||||
C->>CD: GET /api/presign/audio/{slug}/{n}?voice=af_bella
|
||||
CD->>UI: proxy /* (SvelteKit /api/presign/audio route)
|
||||
UI->>BE: GET /api/presign/audio/{slug}/{n}?voice=af_bella
|
||||
BE->>VK: GET presign:audio:{slug}:{n}:{voice}
|
||||
alt cache hit
|
||||
VK-->>BE: presigned URL (TTL remaining)
|
||||
BE-->>UI: 302 redirect → presigned URL
|
||||
UI-->>C: 302 redirect
|
||||
else cache miss
|
||||
BE->>MN: GeneratePresignedURL(audio-bucket, key, 1h)
|
||||
MN-->>BE: presigned URL
|
||||
BE->>VK: SET presign:audio:… EX 3500
|
||||
BE-->>UI: 302 redirect → presigned URL
|
||||
UI-->>C: 302 redirect
|
||||
end
|
||||
C->>MN: GET presigned URL (direct, no proxy)
|
||||
MN-->>C: audio/mpeg stream
|
||||
```
|
||||
|
||||
## Chapter Read — SSR + Content Fetch
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
actor C as Browser / iOS App
|
||||
participant CD as Caddy :443
|
||||
participant UI as SvelteKit UI :3000
|
||||
participant BE as Backend API :8080
|
||||
participant PB as PocketBase :8090
|
||||
participant MN as MinIO :9000
|
||||
|
||||
C->>CD: HTTPS GET /books/{slug}/chapters/{n}
|
||||
CD->>UI: proxy /* (SvelteKit catch-all)
|
||||
UI->>PB: getBook(slug) + listChapterIdx(slug)
|
||||
PB-->>UI: book meta + chapter list
|
||||
UI->>BE: GET /api/chapter-text/{slug}/{n}
|
||||
BE->>MN: GetObject(chapters-bucket, {slug}/{n}.md)
|
||||
MN-->>BE: markdown text
|
||||
BE-->>UI: plain text (markdown stripped)
|
||||
Note over UI: marked() → HTML
|
||||
UI-->>CD: SSR HTML
|
||||
CD-->>C: 200 HTML
|
||||
```
|
||||
|
||||
## Caddy Request Lifecycle
|
||||
|
||||
Shows how security hardening applies before a request reaches any upstream.
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A([Incoming HTTPS request]) --> B[TLS termination\nLet's Encrypt cert]
|
||||
B --> C{Rate limit check\ncaddy-ratelimit}
|
||||
C -- over limit --> D[429 Too Many Requests]
|
||||
C -- ok --> E[Add security headers\nX-Frame-Options · X-Content-Type-Options\nReferrer-Policy · Permissions-Policy\nHSTS · X-XSS-Protection\nremove Server header]
|
||||
E --> F{Route match}
|
||||
F -- "/health /scrape*\n/api/browse /api/book-preview/*\n/api/chapter-text/*\n/api/reindex/* /api/cover/*\n/api/audio-proxy/*" --> G[reverse_proxy → backend:8080]
|
||||
F -- "/avatars/*" --> H[reverse_proxy → minio:9000]
|
||||
F -- "/* everything else\n(incl. /api/scrape/*\n/api/chapter-text-preview/*)" --> I[reverse_proxy → ui:3000\nSvelteKit auth middleware runs]
|
||||
G --> J{Upstream healthy?}
|
||||
H --> J
|
||||
I --> J
|
||||
J -- yes --> K([Response to client])
|
||||
J -- "502/503/504" --> L[handle_errors\nstatic HTML from /srv/errors/]
|
||||
L --> K
|
||||
```
|
||||
5
dozzle/users.yml
Normal file
5
dozzle/users.yml
Normal file
@@ -0,0 +1,5 @@
|
||||
users:
|
||||
admin:
|
||||
name: admin
|
||||
email: admin@libnovel.cc
|
||||
password: "$2y$10$4jqLza2grpxnQn0EGux2C.UmlSxRmOvH/J1ySzOBxMZgW6cA2TnmK"
|
||||
524
homelab/docker-compose.yml
Normal file
524
homelab/docker-compose.yml
Normal file
@@ -0,0 +1,524 @@
|
||||
# LibNovel homelab
|
||||
#
|
||||
# Runs on 192.168.0.109. Hosts:
|
||||
# - libnovel runner (background task worker)
|
||||
# - tooling: GlitchTip, Umami, Fider, Dozzle, Uptime Kuma, Gotify
|
||||
# - observability: OTel Collector, Tempo, Loki, Prometheus, Grafana
|
||||
# - cloudflared tunnel (public subdomains via Cloudflare Zero Trust)
|
||||
# - shared Postgres for tooling DBs
|
||||
#
|
||||
# All secrets come from Doppler (project=libnovel, config=prd_homelab).
|
||||
# Run with: doppler run -- docker compose up -d
|
||||
#
|
||||
# Public subdomains (via Cloudflare Tunnel — no ports exposed to internet):
|
||||
# errors.libnovel.cc → glitchtip-web:8000
|
||||
# analytics.libnovel.cc → umami:3000
|
||||
# feedback.libnovel.cc → fider:3000
|
||||
# logs.libnovel.cc → dozzle:8080
|
||||
# uptime.libnovel.cc → uptime-kuma:3001
|
||||
# push.libnovel.cc → gotify:80
|
||||
# grafana.libnovel.cc → grafana:3000
|
||||
|
||||
services:
|
||||
|
||||
# ── Cloudflare Tunnel ───────────────────────────────────────────────────────
|
||||
# Outbound-only encrypted tunnel to Cloudflare.
|
||||
# Routes all public subdomains to their respective containers on this network.
|
||||
# No inbound ports needed — cloudflared initiates all connections outward.
|
||||
cloudflared:
|
||||
image: cloudflare/cloudflared:latest
|
||||
restart: unless-stopped
|
||||
command: tunnel --no-autoupdate run --token ${CLOUDFLARE_TUNNEL_TOKEN}
|
||||
environment:
|
||||
CLOUDFLARE_TUNNEL_TOKEN: "${CLOUDFLARE_TUNNEL_TOKEN}"
|
||||
|
||||
# ── LibNovel Runner ─────────────────────────────────────────────────────────
|
||||
# Background task worker. Connects to prod PocketBase, MinIO, Meilisearch
|
||||
# via their public subdomains (pb.libnovel.cc, storage.libnovel.cc, etc.)
|
||||
runner:
|
||||
image: kalekber/libnovel-runner:latest
|
||||
restart: unless-stopped
|
||||
stop_grace_period: 135s
|
||||
labels:
|
||||
com.centurylinklabs.watchtower.enable: "true"
|
||||
environment:
|
||||
POCKETBASE_URL: "https://pb.libnovel.cc"
|
||||
POCKETBASE_ADMIN_EMAIL: "${POCKETBASE_ADMIN_EMAIL}"
|
||||
POCKETBASE_ADMIN_PASSWORD: "${POCKETBASE_ADMIN_PASSWORD}"
|
||||
|
||||
MINIO_ENDPOINT: "storage.libnovel.cc"
|
||||
MINIO_ACCESS_KEY: "${MINIO_ROOT_USER}"
|
||||
MINIO_SECRET_KEY: "${MINIO_ROOT_PASSWORD}"
|
||||
MINIO_USE_SSL: "true"
|
||||
MINIO_PUBLIC_ENDPOINT: "${MINIO_PUBLIC_ENDPOINT}"
|
||||
MINIO_PUBLIC_USE_SSL: "${MINIO_PUBLIC_USE_SSL}"
|
||||
|
||||
MEILI_URL: "${MEILI_URL}"
|
||||
MEILI_API_KEY: "${MEILI_API_KEY}"
|
||||
VALKEY_ADDR: ""
|
||||
GODEBUG: "preferIPv4=1"
|
||||
|
||||
# ── LibreTranslate (internal Docker network) ──────────────────────────
|
||||
LIBRETRANSLATE_URL: "http://libretranslate:5000"
|
||||
LIBRETRANSLATE_API_KEY: "${LIBRETRANSLATE_API_KEY}"
|
||||
|
||||
# ── Asynq / Redis ─────────────────────────────────────────────────────
|
||||
REDIS_ADDR: "redis:6379"
|
||||
REDIS_PASSWORD: "${REDIS_PASSWORD}"
|
||||
|
||||
KOKORO_URL: "http://kokoro-fastapi:8880"
|
||||
KOKORO_VOICE: "${KOKORO_VOICE}"
|
||||
|
||||
POCKET_TTS_URL: "http://pocket-tts:8000"
|
||||
|
||||
RUNNER_WORKER_ID: "${RUNNER_WORKER_ID}"
|
||||
RUNNER_POLL_INTERVAL: "${RUNNER_POLL_INTERVAL}"
|
||||
RUNNER_MAX_CONCURRENT_SCRAPE: "${RUNNER_MAX_CONCURRENT_SCRAPE}"
|
||||
RUNNER_MAX_CONCURRENT_AUDIO: "${RUNNER_MAX_CONCURRENT_AUDIO}"
|
||||
RUNNER_MAX_CONCURRENT_TRANSLATION: "${RUNNER_MAX_CONCURRENT_TRANSLATION}"
|
||||
RUNNER_TIMEOUT: "${RUNNER_TIMEOUT}"
|
||||
RUNNER_METRICS_ADDR: "${RUNNER_METRICS_ADDR}"
|
||||
RUNNER_SKIP_INITIAL_CATALOGUE_REFRESH: "true"
|
||||
|
||||
LOG_LEVEL: "${LOG_LEVEL}"
|
||||
GLITCHTIP_DSN: "${GLITCHTIP_DSN}"
|
||||
|
||||
# OTel — send runner traces/metrics to the local collector (HTTP)
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: "http://otel-collector:4318"
|
||||
OTEL_SERVICE_NAME: "runner"
|
||||
|
||||
healthcheck:
|
||||
test: ["CMD", "/healthcheck", "file", "/tmp/runner.alive", "120"]
|
||||
interval: 60s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
# ── Shared Postgres ─────────────────────────────────────────────────────────
|
||||
# Hosts glitchtip, umami, and fider databases.
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
POSTGRES_USER: "${POSTGRES_USER}"
|
||||
POSTGRES_PASSWORD: "${POSTGRES_PASSWORD}"
|
||||
POSTGRES_DB: postgres
|
||||
expose:
|
||||
- "5432"
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD", "pg_isready", "-U", "${POSTGRES_USER}"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ── Postgres database initialisation ────────────────────────────────────────
|
||||
postgres-init:
|
||||
image: postgres:16-alpine
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
PGPASSWORD: "${POSTGRES_PASSWORD}"
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
psql -h postgres -U ${POSTGRES_USER} -d postgres -tc \"SELECT 1 FROM pg_database WHERE datname='fider'\" | grep -q 1 ||
|
||||
psql -h postgres -U ${POSTGRES_USER} -d postgres -c \"CREATE DATABASE fider\";
|
||||
psql -h postgres -U ${POSTGRES_USER} -d postgres -tc \"SELECT 1 FROM pg_database WHERE datname='glitchtip'\" | grep -q 1 ||
|
||||
psql -h postgres -U ${POSTGRES_USER} -d postgres -c \"CREATE DATABASE glitchtip\";
|
||||
psql -h postgres -U ${POSTGRES_USER} -d postgres -tc \"SELECT 1 FROM pg_database WHERE datname='umami'\" | grep -q 1 ||
|
||||
psql -h postgres -U ${POSTGRES_USER} -d postgres -c \"CREATE DATABASE umami\";
|
||||
echo 'postgres-init: databases ready';
|
||||
"
|
||||
restart: "no"
|
||||
|
||||
# ── GlitchTip DB migration ──────────────────────────────────────────────────
|
||||
glitchtip-migrate:
|
||||
image: glitchtip/glitchtip:latest
|
||||
depends_on:
|
||||
postgres-init:
|
||||
condition: service_completed_successfully
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
DATABASE_URL: "postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/glitchtip"
|
||||
SECRET_KEY: "${GLITCHTIP_SECRET_KEY}"
|
||||
GLITCHTIP_DOMAIN: "${GLITCHTIP_DOMAIN}"
|
||||
EMAIL_URL: "${GLITCHTIP_EMAIL_URL}"
|
||||
DEFAULT_FROM_EMAIL: "noreply@libnovel.cc"
|
||||
VALKEY_URL: "redis://valkey:6379/1"
|
||||
command: "./manage.py migrate"
|
||||
restart: "no"
|
||||
|
||||
# ── GlitchTip web ───────────────────────────────────────────────────────────
|
||||
glitchtip-web:
|
||||
image: glitchtip/glitchtip:latest
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
glitchtip-migrate:
|
||||
condition: service_completed_successfully
|
||||
expose:
|
||||
- "8000"
|
||||
environment:
|
||||
DATABASE_URL: "postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/glitchtip"
|
||||
SECRET_KEY: "${GLITCHTIP_SECRET_KEY}"
|
||||
GLITCHTIP_DOMAIN: "${GLITCHTIP_DOMAIN}"
|
||||
EMAIL_URL: "${GLITCHTIP_EMAIL_URL}"
|
||||
DEFAULT_FROM_EMAIL: "noreply@libnovel.cc"
|
||||
VALKEY_URL: "redis://valkey:6379/1"
|
||||
PORT: "8000"
|
||||
ENABLE_USER_REGISTRATION: "false"
|
||||
healthcheck:
|
||||
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/0/')"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ── GlitchTip worker ────────────────────────────────────────────────────────
|
||||
glitchtip-worker:
|
||||
image: glitchtip/glitchtip:latest
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
glitchtip-migrate:
|
||||
condition: service_completed_successfully
|
||||
environment:
|
||||
DATABASE_URL: "postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/glitchtip"
|
||||
SECRET_KEY: "${GLITCHTIP_SECRET_KEY}"
|
||||
GLITCHTIP_DOMAIN: "${GLITCHTIP_DOMAIN}"
|
||||
EMAIL_URL: "${GLITCHTIP_EMAIL_URL}"
|
||||
DEFAULT_FROM_EMAIL: "noreply@libnovel.cc"
|
||||
VALKEY_URL: "redis://valkey:6379/1"
|
||||
SERVER_ROLE: "worker"
|
||||
|
||||
# ── Umami ───────────────────────────────────────────────────────────────────
|
||||
umami:
|
||||
image: ghcr.io/umami-software/umami:postgresql-latest
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
postgres-init:
|
||||
condition: service_completed_successfully
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
expose:
|
||||
- "3000"
|
||||
environment:
|
||||
DATABASE_URL: "postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/umami"
|
||||
APP_SECRET: "${UMAMI_APP_SECRET}"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-sf", "http://localhost:3000/api/heartbeat"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ── Fider ───────────────────────────────────────────────────────────────────
|
||||
fider:
|
||||
image: getfider/fider:stable
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
postgres-init:
|
||||
condition: service_completed_successfully
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
expose:
|
||||
- "3000"
|
||||
environment:
|
||||
BASE_URL: "${FIDER_BASE_URL}"
|
||||
DATABASE_URL: "postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/fider?sslmode=disable"
|
||||
JWT_SECRET: "${FIDER_JWT_SECRET}"
|
||||
EMAIL_NOREPLY: "noreply@libnovel.cc"
|
||||
EMAIL_SMTP_HOST: "${FIDER_SMTP_HOST}"
|
||||
EMAIL_SMTP_PORT: "${FIDER_SMTP_PORT}"
|
||||
EMAIL_SMTP_USERNAME: "${FIDER_SMTP_USER}"
|
||||
EMAIL_SMTP_PASSWORD: "${FIDER_SMTP_PASSWORD}"
|
||||
EMAIL_SMTP_ENABLE_STARTTLS: "${FIDER_SMTP_ENABLE_STARTTLS}"
|
||||
OAUTH_GOOGLE_CLIENTID: "${OAUTH_GOOGLE_CLIENTID}"
|
||||
OAUTH_GOOGLE_SECRET: "${OAUTH_GOOGLE_SECRET}"
|
||||
OAUTH_GITHUB_CLIENTID: "${OAUTH_GITHUB_CLIENTID}"
|
||||
OAUTH_GITHUB_SECRET: "${OAUTH_GITHUB_SECRET}"
|
||||
|
||||
# ── Dozzle ──────────────────────────────────────────────────────────────────
|
||||
# Watches both homelab and prod containers.
|
||||
# Prod agent runs on 165.22.70.138:7007 (added separately to prod compose).
|
||||
dozzle:
|
||||
image: amir20/dozzle:latest
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
- ./dozzle/users.yml:/data/users.yml:ro
|
||||
expose:
|
||||
- "8080"
|
||||
environment:
|
||||
DOZZLE_AUTH_PROVIDER: simple
|
||||
DOZZLE_HOSTNAME: "logs.libnovel.cc"
|
||||
DOZZLE_REMOTE_AGENT: "prod@165.22.70.138:7007"
|
||||
healthcheck:
|
||||
test: ["CMD", "/dozzle", "healthcheck"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ── Uptime Kuma ─────────────────────────────────────────────────────────────
|
||||
uptime-kuma:
|
||||
image: louislam/uptime-kuma:1
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- uptime_kuma_data:/app/data
|
||||
expose:
|
||||
- "3001"
|
||||
healthcheck:
|
||||
test: ["CMD", "extra/healthcheck"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ── Gotify ──────────────────────────────────────────────────────────────────
|
||||
gotify:
|
||||
image: gotify/server:latest
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- gotify_data:/app/data
|
||||
expose:
|
||||
- "80"
|
||||
environment:
|
||||
GOTIFY_DEFAULTUSER_NAME: "${GOTIFY_ADMIN_USER}"
|
||||
GOTIFY_DEFAULTUSER_PASS: "${GOTIFY_ADMIN_PASS}"
|
||||
GOTIFY_SERVER_PORT: "80"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-sf", "http://localhost:80/health"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ── Redis (Asynq task queue) ────────────────────────────────────────────────
|
||||
# Dedicated Redis instance for Asynq job dispatch.
|
||||
# The prod backend enqueues jobs via redis.libnovel.cc:6380 (Caddy TLS proxy →
|
||||
# host:6379). The runner reads from this instance directly on the Docker network.
|
||||
# Port is bound to 0.0.0.0:6379 so the Caddy layer4 proxy on prod can reach it.
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
restart: unless-stopped
|
||||
command: ["redis-server", "--appendonly", "yes", "--requirepass", "${REDIS_PASSWORD}"]
|
||||
ports:
|
||||
- "6379:6379"
|
||||
volumes:
|
||||
- redis_data:/data
|
||||
healthcheck:
|
||||
test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD}", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ── LibreTranslate ──────────────────────────────────────────────────────────
|
||||
# Self-hosted machine translation. Runner connects via http://libretranslate:5000.
|
||||
# Only English → configured target languages are loaded to save RAM.
|
||||
libretranslate:
|
||||
image: libretranslate/libretranslate:latest
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
LT_API_KEYS: "true"
|
||||
LT_API_KEYS_DB_PATH: "/app/db/api_keys.db"
|
||||
LT_LOAD_ONLY: "en,ru,id,pt,fr"
|
||||
LT_DISABLE_WEB_UI: "true"
|
||||
LT_UPDATE_MODELS: "false"
|
||||
expose:
|
||||
- "5000"
|
||||
volumes:
|
||||
- libretranslate_data:/app/db
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-sf", "http://localhost:5000/languages"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 60s
|
||||
|
||||
# ── Valkey ──────────────────────────────────────────────────────────────────
|
||||
# Used by GlitchTip for task queuing.
|
||||
valkey:
|
||||
image: valkey/valkey:7-alpine
|
||||
restart: unless-stopped
|
||||
expose:
|
||||
- "6379"
|
||||
volumes:
|
||||
- valkey_data:/data
|
||||
healthcheck:
|
||||
test: ["CMD", "valkey-cli", "ping"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ── OTel Collector ──────────────────────────────────────────────────────────
|
||||
# Receives OTLP from backend/ui/runner, fans out to Tempo + Prometheus + Loki.
|
||||
otel-collector:
|
||||
image: otel/opentelemetry-collector-contrib:latest
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- ./otel/collector.yaml:/etc/otelcol-contrib/config.yaml:ro
|
||||
expose:
|
||||
- "4317" # OTLP gRPC
|
||||
- "4318" # OTLP HTTP
|
||||
- "8888" # Collector self-metrics (scraped by Prometheus)
|
||||
depends_on:
|
||||
- tempo
|
||||
- prometheus
|
||||
- loki
|
||||
# No healthcheck — distroless image has no shell or curl
|
||||
|
||||
# ── Tempo ───────────────────────────────────────────────────────────────────
|
||||
# Distributed trace storage. Receives OTLP from the collector.
|
||||
tempo:
|
||||
image: grafana/tempo:2.6.1
|
||||
restart: unless-stopped
|
||||
command: ["-config.file=/etc/tempo.yaml"]
|
||||
volumes:
|
||||
- ./otel/tempo.yaml:/etc/tempo.yaml:ro
|
||||
- tempo_data:/var/tempo
|
||||
expose:
|
||||
- "3200" # Tempo query API (queried by Grafana)
|
||||
- "4317" # OTLP gRPC ingest (collector → tempo)
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-qO-", "http://localhost:3200/ready"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ── Prometheus ──────────────────────────────────────────────────────────────
|
||||
# Scrapes metrics from backend (via prod), runner, and otel-collector.
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- "--config.file=/etc/prometheus/prometheus.yaml"
|
||||
- "--storage.tsdb.path=/prometheus"
|
||||
- "--storage.tsdb.retention.time=30d"
|
||||
- "--web.enable-remote-write-receiver"
|
||||
volumes:
|
||||
- ./otel/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
|
||||
- prometheus_data:/prometheus
|
||||
expose:
|
||||
- "9090"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-qO-", "http://localhost:9090/-/healthy"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ── Loki ────────────────────────────────────────────────────────────────────
|
||||
# Log aggregation. Receives logs from OTel collector. Replaces manual Dozzle
|
||||
# tailing for structured log search.
|
||||
loki:
|
||||
image: grafana/loki:latest
|
||||
restart: unless-stopped
|
||||
command: ["-config.file=/etc/loki/loki.yaml"]
|
||||
volumes:
|
||||
- ./otel/loki.yaml:/etc/loki/loki.yaml:ro
|
||||
- loki_data:/loki
|
||||
expose:
|
||||
- "3100"
|
||||
# No healthcheck — distroless image has no shell or curl
|
||||
|
||||
# ── Grafana ─────────────────────────────────────────────────────────────────
|
||||
# Single UI for traces (Tempo), metrics (Prometheus), and logs (Loki).
|
||||
# Accessible at grafana.libnovel.cc via Cloudflare Tunnel.
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- tempo
|
||||
- prometheus
|
||||
- loki
|
||||
expose:
|
||||
- "3000"
|
||||
volumes:
|
||||
- grafana_data:/var/lib/grafana
|
||||
- ./otel/grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
environment:
|
||||
GF_SERVER_ROOT_URL: "https://grafana.libnovel.cc"
|
||||
GF_SECURITY_ADMIN_USER: "${GRAFANA_ADMIN_USER}"
|
||||
GF_SECURITY_ADMIN_PASSWORD: "${GRAFANA_ADMIN_PASSWORD}"
|
||||
GF_AUTH_ANONYMOUS_ENABLED: "false"
|
||||
GF_FEATURE_TOGGLES_ENABLE: "traceqlEditor"
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-qO-", "http://localhost:3000/api/health"]
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
|
||||
# ── Kokoro-FastAPI (GPU TTS) ────────────────────────────────────────────────
|
||||
# OpenAI-compatible TTS service backed by the Kokoro model, running on the
|
||||
# homelab RTX 3050 (8 GB VRAM). Replaces the broken kokoro.kalekber.cc DNS.
|
||||
# Voices match existing IDs: af_bella, af_sky, af_heart, etc.
|
||||
# The runner reaches it at http://kokoro-fastapi:8880 via the Docker network.
|
||||
kokoro-fastapi:
|
||||
image: kokoro-fastapi:latest
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
expose:
|
||||
- "8880"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-sf", "http://localhost:8880/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 60s
|
||||
|
||||
# ── pocket-tts (CPU TTS) ────────────────────────────────────────────────────
|
||||
# Lightweight CPU-only TTS using kyutai-labs/pocket-tts.
|
||||
# Image is built locally on homelab from https://github.com/kyutai-labs/pocket-tts
|
||||
# (no prebuilt image published): cd /tmp && git clone --depth=1 https://github.com/kyutai-labs/pocket-tts.git && docker build -t pocket-tts:latest /tmp/pocket-tts
|
||||
# OpenAI-compatible: POST /tts (multipart form) on port 8000.
|
||||
# Voices: alba, marius, javert, jean, fantine, cosette, eponine, azelma, etc.
|
||||
# Not currently used by the runner (runner uses kokoro-fastapi), but available
|
||||
# for experimentation / fallback.
|
||||
pocket-tts:
|
||||
image: pocket-tts:latest
|
||||
restart: unless-stopped
|
||||
command: ["uv", "run", "pocket-tts", "serve", "--host", "0.0.0.0"]
|
||||
expose:
|
||||
- "8000"
|
||||
volumes:
|
||||
- pocket_tts_cache:/root/.cache/pocket_tts
|
||||
- hf_cache:/root/.cache/huggingface
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-sf", "http://localhost:8000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 5
|
||||
start_period: 120s
|
||||
|
||||
# ── Watchtower ──────────────────────────────────────────────────────────────
|
||||
# Auto-updates runner image when CI pushes a new tag.
|
||||
# Only watches services with the watchtower label.
|
||||
# doppler binary is mounted from the host so watchtower fetches fresh secrets
|
||||
# on every start (notification URL, credentials) without baking them in.
|
||||
watchtower:
|
||||
image: containrrr/watchtower:latest
|
||||
restart: unless-stopped
|
||||
entrypoint: ["/usr/bin/doppler", "run", "--project", "libnovel", "--config", "prd_homelab", "--"]
|
||||
command: ["/watchtower", "--label-enable", "--interval", "300", "--cleanup"]
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- /usr/bin/doppler:/usr/bin/doppler:ro
|
||||
- /root/.doppler:/root/.doppler:ro
|
||||
environment:
|
||||
HOME: "/root"
|
||||
DOCKER_API_VERSION: "1.44"
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
redis_data:
|
||||
libretranslate_data:
|
||||
valkey_data:
|
||||
uptime_kuma_data:
|
||||
gotify_data:
|
||||
tempo_data:
|
||||
prometheus_data:
|
||||
loki_data:
|
||||
grafana_data:
|
||||
pocket_tts_cache:
|
||||
hf_cache:
|
||||
5
homelab/dozzle/users.yml
Normal file
5
homelab/dozzle/users.yml
Normal file
@@ -0,0 +1,5 @@
|
||||
users:
|
||||
admin:
|
||||
name: admin
|
||||
email: admin@libnovel.cc
|
||||
password: "$2y$10$4jqLza2grpxnQn0EGux2C.UmlSxRmOvH/J1ySzOBxMZgW6cA2TnmK"
|
||||
68
homelab/otel/collector.yaml
Normal file
68
homelab/otel/collector.yaml
Normal file
@@ -0,0 +1,68 @@
|
||||
# OTel Collector config
|
||||
#
|
||||
# Receivers: OTLP (gRPC + HTTP) from backend, ui, runner
|
||||
# Processors: batch for efficiency, resource detection for host metadata
|
||||
# Exporters: Tempo (traces), Prometheus (metrics), Loki (logs)
|
||||
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
processors:
|
||||
batch:
|
||||
timeout: 5s
|
||||
send_batch_size: 512
|
||||
|
||||
# Attach host metadata to all telemetry
|
||||
resourcedetection:
|
||||
detectors: [env, system]
|
||||
timeout: 5s
|
||||
|
||||
exporters:
|
||||
# Traces → Tempo
|
||||
otlp/tempo:
|
||||
endpoint: tempo:4317
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
# Metrics → Prometheus (remote write)
|
||||
prometheusremotewrite:
|
||||
endpoint: "http://prometheus:9090/api/v1/write"
|
||||
tls:
|
||||
insecure_skip_verify: true
|
||||
|
||||
# Logs → Loki (via OTLP HTTP endpoint)
|
||||
otlphttp/loki:
|
||||
endpoint: "http://loki:3100/otlp"
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
# Collector self-observability (optional debug)
|
||||
debug:
|
||||
verbosity: basic
|
||||
|
||||
extensions:
|
||||
health_check:
|
||||
endpoint: 0.0.0.0:13133
|
||||
pprof:
|
||||
endpoint: 0.0.0.0:1777
|
||||
|
||||
service:
|
||||
extensions: [health_check, pprof]
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [resourcedetection, batch]
|
||||
exporters: [otlp/tempo]
|
||||
metrics:
|
||||
receivers: [otlp]
|
||||
processors: [resourcedetection, batch]
|
||||
exporters: [prometheusremotewrite]
|
||||
logs:
|
||||
receivers: [otlp]
|
||||
processors: [resourcedetection, batch]
|
||||
exporters: [otlphttp/loki]
|
||||
@@ -0,0 +1,16 @@
|
||||
# Grafana alerting provisioning — contact points
|
||||
# Sends all alerts to Gotify (self-hosted push notifications).
|
||||
apiVersion: 1
|
||||
|
||||
contactPoints:
|
||||
- orgId: 1
|
||||
name: Gotify
|
||||
receivers:
|
||||
- uid: gotify-webhook
|
||||
type: webhook
|
||||
settings:
|
||||
url: "http://gotify/message?token=ABZrZgCY-4ivcmt"
|
||||
httpMethod: POST
|
||||
title: "{{ .CommonLabels.alertname }}"
|
||||
message: "{{ range .Alerts }}{{ .Annotations.summary }}\n{{ .Annotations.description }}{{ end }}"
|
||||
disableResolveMessage: false
|
||||
@@ -0,0 +1,15 @@
|
||||
# Grafana alerting provisioning — notification policies
|
||||
# Routes all alerts to Gotify by default.
|
||||
apiVersion: 1
|
||||
|
||||
policies:
|
||||
- orgId: 1
|
||||
receiver: Gotify
|
||||
group_by: ["alertname", "service"]
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 4h
|
||||
routes:
|
||||
- receiver: Gotify
|
||||
matchers:
|
||||
- severity =~ "critical|warning"
|
||||
214
homelab/otel/grafana/provisioning/alerting/rules.yaml
Normal file
214
homelab/otel/grafana/provisioning/alerting/rules.yaml
Normal file
@@ -0,0 +1,214 @@
|
||||
# Grafana alerting provisioning — alert rules
|
||||
# Covers: runner down, high task failure rate, audio error spike, backend error spike.
|
||||
apiVersion: 1
|
||||
|
||||
groups:
|
||||
- orgId: 1
|
||||
name: LibNovel Runner
|
||||
folder: LibNovel
|
||||
interval: 1m
|
||||
rules:
|
||||
|
||||
- uid: runner-down
|
||||
title: Runner Down
|
||||
condition: C
|
||||
for: 2m
|
||||
annotations:
|
||||
summary: "LibNovel runner is not reachable"
|
||||
description: "The Prometheus scrape of runner:9091 has been failing for >2 minutes. Tasks are not being processed."
|
||||
labels:
|
||||
severity: critical
|
||||
service: runner
|
||||
data:
|
||||
- refId: A
|
||||
datasourceUid: prometheus
|
||||
relativeTimeRange: { from: 300, to: 0 }
|
||||
model:
|
||||
expr: "up{job=\"libnovel-runner\"}"
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
relativeTimeRange: { from: 300, to: 0 }
|
||||
model:
|
||||
type: classic_conditions
|
||||
conditions:
|
||||
- evaluator: { params: [1], type: lt }
|
||||
operator: { type: and }
|
||||
query: { params: [A] }
|
||||
reducer: { params: [], type: last }
|
||||
|
||||
- uid: runner-high-failure-rate
|
||||
title: Runner High Task Failure Rate
|
||||
condition: C
|
||||
for: 5m
|
||||
annotations:
|
||||
summary: "Runner task failure rate is above 20%"
|
||||
description: "More than 20% of runner tasks have been failing for the last 5 minutes. Check runner logs."
|
||||
labels:
|
||||
severity: warning
|
||||
service: runner
|
||||
data:
|
||||
- refId: A
|
||||
datasourceUid: prometheus
|
||||
relativeTimeRange: { from: 600, to: 0 }
|
||||
model:
|
||||
expr: "rate(libnovel_runner_tasks_failed_total[5m]) / clamp_min(rate(libnovel_runner_tasks_completed_total[5m]) + rate(libnovel_runner_tasks_failed_total[5m]), 0.001)"
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
relativeTimeRange: { from: 600, to: 0 }
|
||||
model:
|
||||
type: classic_conditions
|
||||
conditions:
|
||||
- evaluator: { params: [0.2], type: gt }
|
||||
operator: { type: and }
|
||||
query: { params: [A] }
|
||||
reducer: { params: [], type: last }
|
||||
|
||||
- uid: runner-tasks-stalled
|
||||
title: Runner Tasks Stalled
|
||||
condition: C
|
||||
for: 10m
|
||||
annotations:
|
||||
summary: "Runner has tasks running for >10 minutes with no completions"
|
||||
description: "tasks_running > 0 but rate(tasks_completed) is 0. Tasks may be stuck or the runner is in a crash loop."
|
||||
labels:
|
||||
severity: warning
|
||||
service: runner
|
||||
data:
|
||||
- refId: Running
|
||||
datasourceUid: prometheus
|
||||
relativeTimeRange: { from: 900, to: 0 }
|
||||
model:
|
||||
expr: "libnovel_runner_tasks_running"
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
- refId: Rate
|
||||
datasourceUid: prometheus
|
||||
relativeTimeRange: { from: 900, to: 0 }
|
||||
model:
|
||||
expr: "rate(libnovel_runner_tasks_completed_total[10m])"
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
relativeTimeRange: { from: 900, to: 0 }
|
||||
model:
|
||||
type: classic_conditions
|
||||
conditions:
|
||||
- evaluator: { params: [0], type: gt }
|
||||
operator: { type: and }
|
||||
query: { params: [Running] }
|
||||
reducer: { params: [], type: last }
|
||||
- evaluator: { params: [0.001], type: lt }
|
||||
operator: { type: and }
|
||||
query: { params: [Rate] }
|
||||
reducer: { params: [], type: last }
|
||||
|
||||
- orgId: 1
|
||||
name: LibNovel Backend
|
||||
folder: LibNovel
|
||||
interval: 1m
|
||||
rules:
|
||||
|
||||
- uid: backend-high-error-rate
|
||||
title: Backend High Error Rate
|
||||
condition: C
|
||||
for: 5m
|
||||
annotations:
|
||||
summary: "Backend API error rate above 5%"
|
||||
description: "More than 5% of backend HTTP requests are returning 5xx status codes (as seen from UI OTel instrumentation)."
|
||||
labels:
|
||||
severity: warning
|
||||
service: backend
|
||||
data:
|
||||
- refId: A
|
||||
datasourceUid: prometheus
|
||||
relativeTimeRange: { from: 600, to: 0 }
|
||||
model:
|
||||
expr: "sum(rate(http_client_request_duration_seconds_count{job=\"ui\", server_address=\"backend\", http_response_status_code=~\"5..\"}[5m])) / clamp_min(sum(rate(http_client_request_duration_seconds_count{job=\"ui\", server_address=\"backend\"}[5m])), 0.001)"
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
relativeTimeRange: { from: 600, to: 0 }
|
||||
model:
|
||||
type: classic_conditions
|
||||
conditions:
|
||||
- evaluator: { params: [0.05], type: gt }
|
||||
operator: { type: and }
|
||||
query: { params: [A] }
|
||||
reducer: { params: [], type: last }
|
||||
|
||||
- uid: backend-high-p95-latency
|
||||
title: Backend High p95 Latency
|
||||
condition: C
|
||||
for: 5m
|
||||
annotations:
|
||||
summary: "Backend p95 latency above 2s"
|
||||
description: "95th percentile latency of backend spans has exceeded 2 seconds for >5 minutes."
|
||||
labels:
|
||||
severity: warning
|
||||
service: backend
|
||||
data:
|
||||
- refId: A
|
||||
datasourceUid: prometheus
|
||||
relativeTimeRange: { from: 600, to: 0 }
|
||||
model:
|
||||
expr: "histogram_quantile(0.95, sum(rate(traces_spanmetrics_latency_bucket{service=\"backend\"}[5m])) by (le))"
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
relativeTimeRange: { from: 600, to: 0 }
|
||||
model:
|
||||
type: classic_conditions
|
||||
conditions:
|
||||
- evaluator: { params: [2], type: gt }
|
||||
operator: { type: and }
|
||||
query: { params: [A] }
|
||||
reducer: { params: [], type: last }
|
||||
|
||||
- orgId: 1
|
||||
name: LibNovel OTel Pipeline
|
||||
folder: LibNovel
|
||||
interval: 2m
|
||||
rules:
|
||||
|
||||
- uid: otel-collector-down
|
||||
title: OTel Collector Down
|
||||
condition: C
|
||||
for: 3m
|
||||
annotations:
|
||||
summary: "OTel collector is not reachable"
|
||||
description: "Prometheus cannot scrape otel-collector:8888. Traces and logs may be dropping."
|
||||
labels:
|
||||
severity: warning
|
||||
service: otel-collector
|
||||
data:
|
||||
- refId: A
|
||||
datasourceUid: prometheus
|
||||
relativeTimeRange: { from: 600, to: 0 }
|
||||
model:
|
||||
expr: "up{job=\"otel-collector\"}"
|
||||
instant: true
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
relativeTimeRange: { from: 600, to: 0 }
|
||||
model:
|
||||
type: classic_conditions
|
||||
conditions:
|
||||
- evaluator: { params: [1], type: lt }
|
||||
operator: { type: and }
|
||||
query: { params: [A] }
|
||||
reducer: { params: [], type: last }
|
||||
338
homelab/otel/grafana/provisioning/dashboards/backend.json
Normal file
338
homelab/otel/grafana/provisioning/dashboards/backend.json
Normal file
@@ -0,0 +1,338 @@
|
||||
{
|
||||
"uid": "libnovel-backend",
|
||||
"title": "Backend API",
|
||||
"description": "Request rate, error rate, and latency for the LibNovel backend. Powered by Tempo span metrics and UI OTel instrumentation.",
|
||||
"tags": ["libnovel", "backend", "api"],
|
||||
"timezone": "browser",
|
||||
"refresh": "30s",
|
||||
"time": { "from": "now-3h", "to": "now" },
|
||||
"schemaVersion": 39,
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"type": "stat",
|
||||
"title": "Request Rate (RPS)",
|
||||
"gridPos": { "x": 0, "y": 0, "w": 4, "h": 4 },
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"] },
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"textMode": "auto"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "reqps",
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum(rate(traces_spanmetrics_calls_total{service=\"backend\"}[5m]))",
|
||||
"legendFormat": "rps",
|
||||
"instant": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"type": "stat",
|
||||
"title": "Error Rate",
|
||||
"gridPos": { "x": 4, "y": 0, "w": 4, "h": 4 },
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"] },
|
||||
"colorMode": "background",
|
||||
"graphMode": "none"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 0.01 },
|
||||
{ "color": "red", "value": 0.05 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum(rate(traces_spanmetrics_calls_total{service=\"backend\", status_code=\"STATUS_CODE_ERROR\"}[5m])) / clamp_min(sum(rate(traces_spanmetrics_calls_total{service=\"backend\"}[5m])), 0.001)",
|
||||
"legendFormat": "error rate",
|
||||
"instant": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"type": "stat",
|
||||
"title": "p50 Latency",
|
||||
"gridPos": { "x": 8, "y": 0, "w": 4, "h": 4 },
|
||||
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value", "graphMode": "area" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 0.2 },
|
||||
{ "color": "red", "value": 1 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.50, sum(rate(traces_spanmetrics_latency_bucket{service=\"backend\"}[5m])) by (le))",
|
||||
"legendFormat": "p50",
|
||||
"instant": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"type": "stat",
|
||||
"title": "p95 Latency",
|
||||
"gridPos": { "x": 12, "y": 0, "w": 4, "h": 4 },
|
||||
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value", "graphMode": "area" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 0.5 },
|
||||
{ "color": "red", "value": 2 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.95, sum(rate(traces_spanmetrics_latency_bucket{service=\"backend\"}[5m])) by (le))",
|
||||
"legendFormat": "p95",
|
||||
"instant": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"type": "stat",
|
||||
"title": "p99 Latency",
|
||||
"gridPos": { "x": 16, "y": 0, "w": 4, "h": 4 },
|
||||
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value", "graphMode": "area" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 1 },
|
||||
{ "color": "red", "value": 5 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.99, sum(rate(traces_spanmetrics_latency_bucket{service=\"backend\"}[5m])) by (le))",
|
||||
"legendFormat": "p99",
|
||||
"instant": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"type": "stat",
|
||||
"title": "5xx Errors / min",
|
||||
"gridPos": { "x": 20, "y": 0, "w": 4, "h": 4 },
|
||||
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "background", "graphMode": "none" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "short",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 1 },
|
||||
{ "color": "red", "value": 5 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum(rate(http_client_request_duration_seconds_count{job=\"ui\", server_address=\"backend\", http_response_status_code=~\"5..\"}[5m])) * 60",
|
||||
"legendFormat": "5xx/min",
|
||||
"instant": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"type": "timeseries",
|
||||
"title": "Request Rate by Status",
|
||||
"gridPos": { "x": 0, "y": 4, "w": 12, "h": 8 },
|
||||
"options": {
|
||||
"tooltip": { "mode": "multi" },
|
||||
"legend": { "displayMode": "list", "placement": "bottom" }
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "reqps", "custom": { "lineWidth": 2, "fillOpacity": 10 } },
|
||||
"overrides": [
|
||||
{ "matcher": { "id": "byFrameRefID", "options": "errors" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] }
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "success",
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum(rate(http_client_request_duration_seconds_count{job=\"ui\", server_address=\"backend\", http_response_status_code=~\"2..\"}[5m]))",
|
||||
"legendFormat": "2xx"
|
||||
},
|
||||
{
|
||||
"refId": "notfound",
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum(rate(http_client_request_duration_seconds_count{job=\"ui\", server_address=\"backend\", http_response_status_code=~\"4..\"}[5m]))",
|
||||
"legendFormat": "4xx"
|
||||
},
|
||||
{
|
||||
"refId": "errors",
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum(rate(http_client_request_duration_seconds_count{job=\"ui\", server_address=\"backend\", http_response_status_code=~\"5..\"}[5m]))",
|
||||
"legendFormat": "5xx"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"type": "timeseries",
|
||||
"title": "Latency Percentiles (backend spans)",
|
||||
"gridPos": { "x": 12, "y": 4, "w": 12, "h": 8 },
|
||||
"options": {
|
||||
"tooltip": { "mode": "multi" },
|
||||
"legend": { "displayMode": "list", "placement": "bottom" }
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "s", "custom": { "lineWidth": 2, "fillOpacity": 10 } }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.50, sum(rate(traces_spanmetrics_latency_bucket{service=\"backend\"}[5m])) by (le))",
|
||||
"legendFormat": "p50"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.95, sum(rate(traces_spanmetrics_latency_bucket{service=\"backend\"}[5m])) by (le))",
|
||||
"legendFormat": "p95"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.99, sum(rate(traces_spanmetrics_latency_bucket{service=\"backend\"}[5m])) by (le))",
|
||||
"legendFormat": "p99"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"type": "timeseries",
|
||||
"title": "Requests / min by HTTP method (UI → Backend)",
|
||||
"gridPos": { "x": 0, "y": 12, "w": 12, "h": 8 },
|
||||
"options": {
|
||||
"tooltip": { "mode": "multi" },
|
||||
"legend": { "displayMode": "list", "placement": "bottom" }
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "short", "custom": { "lineWidth": 2, "fillOpacity": 5 } }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum(rate(http_client_request_duration_seconds_count{job=\"ui\", server_address=\"backend\"}[5m])) by (http_request_method) * 60",
|
||||
"legendFormat": "{{http_request_method}}"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"type": "timeseries",
|
||||
"title": "Requests / min — UI → PocketBase",
|
||||
"gridPos": { "x": 12, "y": 12, "w": 12, "h": 8 },
|
||||
"description": "Traffic from SvelteKit server to PocketBase (auth, collections, etc.).",
|
||||
"options": {
|
||||
"tooltip": { "mode": "multi" },
|
||||
"legend": { "displayMode": "list", "placement": "bottom" }
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "short", "custom": { "lineWidth": 2, "fillOpacity": 5 } }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum(rate(http_client_request_duration_seconds_count{job=\"ui\", server_address=\"pocketbase\"}[5m])) by (http_request_method, http_response_status_code) * 60",
|
||||
"legendFormat": "{{http_request_method}} {{http_response_status_code}}"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
"type": "timeseries",
|
||||
"title": "UI → Backend Latency (p50 / p95)",
|
||||
"gridPos": { "x": 0, "y": 20, "w": 12, "h": 8 },
|
||||
"description": "HTTP client latency as seen from the SvelteKit SSR layer calling backend.",
|
||||
"options": {
|
||||
"tooltip": { "mode": "multi" },
|
||||
"legend": { "displayMode": "list", "placement": "bottom" }
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "s", "custom": { "lineWidth": 2, "fillOpacity": 5 } }
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.50, sum(rate(http_client_request_duration_seconds_bucket{job=\"ui\", server_address=\"backend\"}[5m])) by (le))",
|
||||
"legendFormat": "p50"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.95, sum(rate(http_client_request_duration_seconds_bucket{job=\"ui\", server_address=\"backend\"}[5m])) by (le))",
|
||||
"legendFormat": "p95"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 20,
|
||||
"type": "logs",
|
||||
"title": "Backend Errors",
|
||||
"gridPos": { "x": 0, "y": 28, "w": 24, "h": 10 },
|
||||
"options": {
|
||||
"showTime": true,
|
||||
"showLabels": false,
|
||||
"wrapLogMessage": true,
|
||||
"prettifyLogMessage": true,
|
||||
"enableLogDetails": true,
|
||||
"sortOrder": "Descending",
|
||||
"dedupStrategy": "none"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"expr": "{service_name=\"backend\"} | json | level =~ `(WARN|ERROR|error|warn)`",
|
||||
"legendFormat": ""
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
275
homelab/otel/grafana/provisioning/dashboards/catalogue.json
Normal file
275
homelab/otel/grafana/provisioning/dashboards/catalogue.json
Normal file
@@ -0,0 +1,275 @@
|
||||
{
|
||||
"uid": "libnovel-catalogue",
|
||||
"title": "Catalogue & Content Progress",
|
||||
"description": "Scraping progress, audio generation coverage, and catalogue health derived from runner structured logs.",
|
||||
"tags": ["libnovel", "catalogue", "content"],
|
||||
"timezone": "browser",
|
||||
"refresh": "1m",
|
||||
"time": { "from": "now-24h", "to": "now" },
|
||||
"schemaVersion": 39,
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"type": "stat",
|
||||
"title": "Books Scraped (last 24h)",
|
||||
"description": "Count of unique book slugs appearing in successful scrape task completions.",
|
||||
"gridPos": { "x": 0, "y": 0, "w": 4, "h": 4 },
|
||||
"options": { "reduceOptions": { "calcs": ["sum"] }, "colorMode": "value", "graphMode": "none" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "fixedColor": "blue", "mode": "fixed" },
|
||||
"thresholds": { "mode": "absolute", "steps": [] }
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"expr": "sum_over_time({service_name=\"runner\"} | json | msg=`scrape task done` [24h])",
|
||||
"legendFormat": "books scraped"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"type": "stat",
|
||||
"title": "Chapters Scraped (last 24h)",
|
||||
"gridPos": { "x": 4, "y": 0, "w": 4, "h": 4 },
|
||||
"options": { "reduceOptions": { "calcs": ["sum"] }, "colorMode": "value", "graphMode": "none" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "fixedColor": "blue", "mode": "fixed" },
|
||||
"thresholds": { "mode": "absolute", "steps": [] }
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"expr": "sum_over_time({service_name=\"runner\"} | json | unwrap scraped [24h])",
|
||||
"legendFormat": "chapters scraped"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"type": "stat",
|
||||
"title": "Audio Jobs Completed (last 24h)",
|
||||
"gridPos": { "x": 8, "y": 0, "w": 4, "h": 4 },
|
||||
"options": { "reduceOptions": { "calcs": ["sum"] }, "colorMode": "value", "graphMode": "none" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "fixedColor": "green", "mode": "fixed" },
|
||||
"thresholds": { "mode": "absolute", "steps": [] }
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"expr": "sum_over_time({service_name=\"runner\"} | json | msg=`audio task done` [24h])",
|
||||
"legendFormat": "audio done"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"type": "stat",
|
||||
"title": "Audio Jobs Failed (last 24h)",
|
||||
"gridPos": { "x": 12, "y": 0, "w": 4, "h": 4 },
|
||||
"options": { "reduceOptions": { "calcs": ["sum"] }, "colorMode": "background", "graphMode": "none" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 1 },
|
||||
{ "color": "red", "value": 5 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"expr": "sum_over_time({service_name=\"runner\"} | json | msg=`audio task failed` [24h])",
|
||||
"legendFormat": "audio failed"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"type": "stat",
|
||||
"title": "Scrape Errors (last 24h)",
|
||||
"gridPos": { "x": 16, "y": 0, "w": 4, "h": 4 },
|
||||
"options": { "reduceOptions": { "calcs": ["sum"] }, "colorMode": "background", "graphMode": "none" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 1 },
|
||||
{ "color": "red", "value": 10 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"expr": "sum_over_time({service_name=\"runner\"} | json | msg=`scrape task failed` [24h])",
|
||||
"legendFormat": "scrape errors"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"type": "stat",
|
||||
"title": "Catalogue Refresh — Books Indexed",
|
||||
"description": "Total books indexed in the last catalogue refresh cycle (from the ok field in the summary log).",
|
||||
"gridPos": { "x": 20, "y": 0, "w": 4, "h": 4 },
|
||||
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value", "graphMode": "none" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "fixedColor": "purple", "mode": "fixed" },
|
||||
"thresholds": { "mode": "absolute", "steps": [] }
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"expr": "last_over_time({service_name=\"runner\"} | json | op=`catalogue_refresh` | msg=`catalogue refresh done` | unwrap ok [7d])",
|
||||
"legendFormat": "indexed"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"type": "timeseries",
|
||||
"title": "Audio Generation Rate (tasks/min)",
|
||||
"gridPos": { "x": 0, "y": 4, "w": 12, "h": 8 },
|
||||
"description": "Rate of audio task completions and failures over time.",
|
||||
"options": {
|
||||
"tooltip": { "mode": "multi" },
|
||||
"legend": { "displayMode": "list", "placement": "bottom" }
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "short", "custom": { "lineWidth": 2, "fillOpacity": 10 } },
|
||||
"overrides": [
|
||||
{ "matcher": { "id": "byName", "options": "failed" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] },
|
||||
{ "matcher": { "id": "byName", "options": "completed" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] }
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum(rate(traces_spanmetrics_calls_total{service=\"runner\", span_name=\"runner.audio_task\", status_code!=\"STATUS_CODE_ERROR\"}[5m])) * 60",
|
||||
"legendFormat": "completed"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum(rate(traces_spanmetrics_calls_total{service=\"runner\", span_name=\"runner.audio_task\", status_code=\"STATUS_CODE_ERROR\"}[5m])) * 60",
|
||||
"legendFormat": "failed"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"type": "timeseries",
|
||||
"title": "Scraping Rate (tasks/min)",
|
||||
"gridPos": { "x": 12, "y": 4, "w": 12, "h": 8 },
|
||||
"description": "Rate of scrape task completions and failures over time.",
|
||||
"options": {
|
||||
"tooltip": { "mode": "multi" },
|
||||
"legend": { "displayMode": "list", "placement": "bottom" }
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "short", "custom": { "lineWidth": 2, "fillOpacity": 10 } },
|
||||
"overrides": [
|
||||
{ "matcher": { "id": "byName", "options": "failed" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] },
|
||||
{ "matcher": { "id": "byName", "options": "completed" }, "properties": [{ "id": "color", "value": { "fixedColor": "blue", "mode": "fixed" } }] }
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum(rate(traces_spanmetrics_calls_total{service=\"runner\", span_name=\"runner.scrape_task\", status_code!=\"STATUS_CODE_ERROR\"}[5m])) * 60",
|
||||
"legendFormat": "completed"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum(rate(traces_spanmetrics_calls_total{service=\"runner\", span_name=\"runner.scrape_task\", status_code=\"STATUS_CODE_ERROR\"}[5m])) * 60",
|
||||
"legendFormat": "failed"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 20,
|
||||
"type": "logs",
|
||||
"title": "Scrape Task Events",
|
||||
"description": "One log line per completed or failed scrape task. Fields: task_id, kind, url, scraped, skipped, errors.",
|
||||
"gridPos": { "x": 0, "y": 12, "w": 24, "h": 10 },
|
||||
"options": {
|
||||
"showTime": true,
|
||||
"showLabels": false,
|
||||
"wrapLogMessage": false,
|
||||
"prettifyLogMessage": true,
|
||||
"enableLogDetails": true,
|
||||
"sortOrder": "Descending",
|
||||
"dedupStrategy": "none"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"expr": "{service_name=\"runner\"} | json | msg =~ `scrape task (done|failed|starting)`",
|
||||
"legendFormat": ""
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 21,
|
||||
"type": "logs",
|
||||
"title": "Audio Task Events",
|
||||
"description": "One log line per completed or failed audio task. Fields: task_id, slug, chapter, voice, key (on success), reason (on failure).",
|
||||
"gridPos": { "x": 0, "y": 22, "w": 24, "h": 10 },
|
||||
"options": {
|
||||
"showTime": true,
|
||||
"showLabels": false,
|
||||
"wrapLogMessage": false,
|
||||
"prettifyLogMessage": true,
|
||||
"enableLogDetails": true,
|
||||
"sortOrder": "Descending",
|
||||
"dedupStrategy": "none"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"expr": "{service_name=\"runner\"} | json | msg =~ `audio task (done|failed|starting)`",
|
||||
"legendFormat": ""
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 22,
|
||||
"type": "logs",
|
||||
"title": "Catalogue Refresh Progress",
|
||||
"description": "Progress logs from the background catalogue refresh (every 24h). Fields: op=catalogue_refresh, scraped, ok, skipped, errors.",
|
||||
"gridPos": { "x": 0, "y": 32, "w": 24, "h": 8 },
|
||||
"options": {
|
||||
"showTime": true,
|
||||
"showLabels": false,
|
||||
"wrapLogMessage": false,
|
||||
"prettifyLogMessage": true,
|
||||
"enableLogDetails": true,
|
||||
"sortOrder": "Descending",
|
||||
"dedupStrategy": "none"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"expr": "{service_name=\"runner\"} | json | op=`catalogue_refresh`",
|
||||
"legendFormat": ""
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
13
homelab/otel/grafana/provisioning/dashboards/dashboards.yaml
Normal file
13
homelab/otel/grafana/provisioning/dashboards/dashboards.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
# Grafana dashboard provisioning
|
||||
# Points Grafana at the local dashboards directory.
|
||||
# Drop any .json dashboard file into homelab/otel/grafana/provisioning/dashboards/
|
||||
# and it will appear in Grafana automatically on restart.
|
||||
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: libnovel
|
||||
folder: LibNovel
|
||||
type: file
|
||||
options:
|
||||
path: /etc/grafana/provisioning/dashboards
|
||||
377
homelab/otel/grafana/provisioning/dashboards/runner.json
Normal file
377
homelab/otel/grafana/provisioning/dashboards/runner.json
Normal file
@@ -0,0 +1,377 @@
|
||||
{
|
||||
"uid": "libnovel-runner",
|
||||
"title": "Runner Operations",
|
||||
"description": "Task queue health, throughput, TTS routing, and live logs for the homelab runner.",
|
||||
"tags": ["libnovel", "runner"],
|
||||
"timezone": "browser",
|
||||
"refresh": "30s",
|
||||
"time": { "from": "now-3h", "to": "now" },
|
||||
"schemaVersion": 39,
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"type": "stat",
|
||||
"title": "Tasks Running",
|
||||
"gridPos": { "x": 0, "y": 0, "w": 4, "h": 4 },
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"] },
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"textMode": "auto"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 1 },
|
||||
{ "color": "red", "value": 3 }
|
||||
]
|
||||
},
|
||||
"mappings": []
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "libnovel_runner_tasks_running",
|
||||
"legendFormat": "running",
|
||||
"instant": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"type": "stat",
|
||||
"title": "Tasks Completed (total)",
|
||||
"gridPos": { "x": 4, "y": 0, "w": 4, "h": 4 },
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"] },
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"textMode": "auto"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": { "fixedColor": "green", "mode": "fixed" },
|
||||
"thresholds": { "mode": "absolute", "steps": [] }
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "libnovel_runner_tasks_completed_total",
|
||||
"legendFormat": "completed",
|
||||
"instant": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"type": "stat",
|
||||
"title": "Tasks Failed (total)",
|
||||
"gridPos": { "x": 8, "y": 0, "w": 4, "h": 4 },
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"] },
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"textMode": "auto"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 1 },
|
||||
{ "color": "red", "value": 5 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "libnovel_runner_tasks_failed_total",
|
||||
"legendFormat": "failed",
|
||||
"instant": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"type": "stat",
|
||||
"title": "Runner Uptime",
|
||||
"gridPos": { "x": 12, "y": 0, "w": 4, "h": 4 },
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"] },
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"textMode": "auto"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "red", "value": null },
|
||||
{ "color": "yellow", "value": 60 },
|
||||
{ "color": "green", "value": 300 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "libnovel_runner_uptime_seconds",
|
||||
"legendFormat": "uptime",
|
||||
"instant": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"type": "stat",
|
||||
"title": "Task Failure Rate",
|
||||
"gridPos": { "x": 16, "y": 0, "w": 4, "h": 4 },
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"] },
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"textMode": "auto"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percentunit",
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 0.05 },
|
||||
{ "color": "red", "value": 0.2 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "libnovel_runner_tasks_failed_total / clamp_min(libnovel_runner_tasks_completed_total + libnovel_runner_tasks_failed_total, 1)",
|
||||
"legendFormat": "failure rate",
|
||||
"instant": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"type": "stat",
|
||||
"title": "Runner Alive",
|
||||
"gridPos": { "x": 20, "y": 0, "w": 4, "h": 4 },
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"] },
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"textMode": "auto"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [
|
||||
{ "type": "value", "options": { "1": { "text": "UP", "color": "green" }, "0": { "text": "DOWN", "color": "red" } } }
|
||||
],
|
||||
"thresholds": { "mode": "absolute", "steps": [] }
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "up{job=\"libnovel-runner\"}",
|
||||
"legendFormat": "runner",
|
||||
"instant": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"type": "timeseries",
|
||||
"title": "Task Throughput (per minute)",
|
||||
"gridPos": { "x": 0, "y": 4, "w": 12, "h": 8 },
|
||||
"options": {
|
||||
"tooltip": { "mode": "multi" },
|
||||
"legend": { "displayMode": "list", "placement": "bottom" }
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ops",
|
||||
"custom": { "lineWidth": 2, "fillOpacity": 10 }
|
||||
},
|
||||
"overrides": [
|
||||
{ "matcher": { "id": "byName", "options": "failed" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] },
|
||||
{ "matcher": { "id": "byName", "options": "completed" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] }
|
||||
]
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "rate(libnovel_runner_tasks_completed_total[5m]) * 60",
|
||||
"legendFormat": "completed"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "rate(libnovel_runner_tasks_failed_total[5m]) * 60",
|
||||
"legendFormat": "failed"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "libnovel_runner_tasks_running",
|
||||
"legendFormat": "running"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"type": "timeseries",
|
||||
"title": "Audio Task Span Latency (p50 / p95 / p99)",
|
||||
"gridPos": { "x": 12, "y": 4, "w": 12, "h": 8 },
|
||||
"description": "End-to-end latency of runner.audio_task spans from Tempo span metrics.",
|
||||
"options": {
|
||||
"tooltip": { "mode": "multi" },
|
||||
"legend": { "displayMode": "list", "placement": "bottom" }
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s",
|
||||
"custom": { "lineWidth": 2, "fillOpacity": 10 }
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.50, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.audio_task\"}[5m])) by (le))",
|
||||
"legendFormat": "p50"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.95, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.audio_task\"}[5m])) by (le))",
|
||||
"legendFormat": "p95"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.99, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.audio_task\"}[5m])) by (le))",
|
||||
"legendFormat": "p99"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 20,
|
||||
"type": "timeseries",
|
||||
"title": "Scrape Task Span Latency (p50 / p95 / p99)",
|
||||
"gridPos": { "x": 0, "y": 12, "w": 12, "h": 8 },
|
||||
"description": "End-to-end latency of runner.scrape_task spans from Tempo span metrics.",
|
||||
"options": {
|
||||
"tooltip": { "mode": "multi" },
|
||||
"legend": { "displayMode": "list", "placement": "bottom" }
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s",
|
||||
"custom": { "lineWidth": 2, "fillOpacity": 10 }
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.50, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.scrape_task\"}[5m])) by (le))",
|
||||
"legendFormat": "p50"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.95, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.scrape_task\"}[5m])) by (le))",
|
||||
"legendFormat": "p95"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.99, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.scrape_task\"}[5m])) by (le))",
|
||||
"legendFormat": "p99"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 21,
|
||||
"type": "timeseries",
|
||||
"title": "Audio vs Scrape Task Rate",
|
||||
"gridPos": { "x": 12, "y": 12, "w": 12, "h": 8 },
|
||||
"description": "Relative throughput of audio generation vs book scraping.",
|
||||
"options": {
|
||||
"tooltip": { "mode": "multi" },
|
||||
"legend": { "displayMode": "list", "placement": "bottom" }
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ops",
|
||||
"custom": { "lineWidth": 2, "fillOpacity": 10 }
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum(rate(traces_spanmetrics_calls_total{service=\"runner\", span_name=\"runner.audio_task\"}[5m]))",
|
||||
"legendFormat": "audio tasks/s"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum(rate(traces_spanmetrics_calls_total{service=\"runner\", span_name=\"runner.scrape_task\"}[5m]))",
|
||||
"legendFormat": "scrape tasks/s"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"type": "logs",
|
||||
"title": "Runner Logs (errors & warnings)",
|
||||
"gridPos": { "x": 0, "y": 20, "w": 24, "h": 10 },
|
||||
"options": {
|
||||
"showTime": true,
|
||||
"showLabels": false,
|
||||
"showCommonLabels": false,
|
||||
"wrapLogMessage": true,
|
||||
"prettifyLogMessage": true,
|
||||
"enableLogDetails": true,
|
||||
"sortOrder": "Descending",
|
||||
"dedupStrategy": "none"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"expr": "{service_name=\"runner\"} | json | level =~ `(WARN|ERROR|error|warn)`",
|
||||
"legendFormat": ""
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 31,
|
||||
"type": "logs",
|
||||
"title": "Runner Logs (all)",
|
||||
"gridPos": { "x": 0, "y": 30, "w": 24, "h": 10 },
|
||||
"options": {
|
||||
"showTime": true,
|
||||
"showLabels": false,
|
||||
"showCommonLabels": false,
|
||||
"wrapLogMessage": true,
|
||||
"prettifyLogMessage": true,
|
||||
"enableLogDetails": true,
|
||||
"sortOrder": "Descending",
|
||||
"dedupStrategy": "none"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"expr": "{service_name=\"runner\"} | json",
|
||||
"legendFormat": ""
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
# Grafana datasource provisioning
|
||||
# Auto-configures Tempo, Prometheus, and Loki on first start.
|
||||
# No manual setup needed in the UI.
|
||||
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Tempo
|
||||
type: tempo
|
||||
uid: tempo
|
||||
url: http://tempo:3200
|
||||
access: proxy
|
||||
isDefault: false
|
||||
jsonData:
|
||||
httpMethod: GET
|
||||
serviceMap:
|
||||
datasourceUid: prometheus
|
||||
nodeGraph:
|
||||
enabled: true
|
||||
traceQuery:
|
||||
timeShiftEnabled: true
|
||||
spanStartTimeShift: "1h"
|
||||
spanEndTimeShift: "-1h"
|
||||
spanBar:
|
||||
type: "Tag"
|
||||
tag: "http.url"
|
||||
lokiSearch:
|
||||
datasourceUid: loki
|
||||
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
url: http://prometheus:9090
|
||||
access: proxy
|
||||
isDefault: true
|
||||
jsonData:
|
||||
httpMethod: POST
|
||||
exemplarTraceIdDestinations:
|
||||
- name: traceID
|
||||
datasourceUid: tempo
|
||||
|
||||
- name: Loki
|
||||
type: loki
|
||||
uid: loki
|
||||
url: http://loki:3100
|
||||
access: proxy
|
||||
isDefault: false
|
||||
jsonData:
|
||||
derivedFields:
|
||||
- datasourceUid: tempo
|
||||
matcherRegex: '"traceID":"(\w+)"'
|
||||
name: TraceID
|
||||
url: "$${__value.raw}"
|
||||
38
homelab/otel/loki.yaml
Normal file
38
homelab/otel/loki.yaml
Normal file
@@ -0,0 +1,38 @@
|
||||
# Loki config — minimal single-node setup
|
||||
# Receives logs from OTel Collector. 30-day retention.
|
||||
|
||||
auth_enabled: false
|
||||
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
grpc_listen_port: 9096
|
||||
|
||||
common:
|
||||
instance_addr: 127.0.0.1
|
||||
path_prefix: /loki
|
||||
storage:
|
||||
filesystem:
|
||||
chunks_directory: /loki/chunks
|
||||
rules_directory: /loki/rules
|
||||
replication_factor: 1
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2024-01-01
|
||||
store: tsdb
|
||||
object_store: filesystem
|
||||
schema: v13
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
|
||||
limits_config:
|
||||
retention_period: 720h # 30 days
|
||||
|
||||
compactor:
|
||||
working_directory: /loki/compactor
|
||||
delete_request_store: filesystem
|
||||
retention_enabled: true
|
||||
22
homelab/otel/prometheus.yaml
Normal file
22
homelab/otel/prometheus.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
# Prometheus config
|
||||
# Scrapes OTel collector self-metrics and runner metrics endpoint.
|
||||
# Backend metrics come in via OTel remote-write — no direct scrape needed.
|
||||
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
external_labels:
|
||||
environment: production
|
||||
|
||||
scrape_configs:
|
||||
# OTel Collector self-metrics
|
||||
- job_name: otel-collector
|
||||
static_configs:
|
||||
- targets: ["otel-collector:8888"]
|
||||
|
||||
# Runner JSON metrics endpoint (native format, no Prometheus client yet)
|
||||
# Will be replaced by OTLP once runner is instrumented with OTel SDK.
|
||||
- job_name: libnovel-runner
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
- targets: ["runner:9091"]
|
||||
45
homelab/otel/tempo.yaml
Normal file
45
homelab/otel/tempo.yaml
Normal file
@@ -0,0 +1,45 @@
|
||||
# Tempo config — minimal single-node setup
|
||||
# Stores traces locally. Grafana queries via the HTTP API on port 3200.
|
||||
|
||||
server:
|
||||
http_listen_port: 3200
|
||||
|
||||
distributor:
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
|
||||
ingester:
|
||||
trace_idle_period: 10s
|
||||
max_block_bytes: 104857600 # 100MB
|
||||
max_block_duration: 30m
|
||||
|
||||
compactor:
|
||||
compaction:
|
||||
block_retention: 720h # 30 days
|
||||
|
||||
storage:
|
||||
trace:
|
||||
backend: local
|
||||
local:
|
||||
path: /var/tempo/blocks
|
||||
wal:
|
||||
path: /var/tempo/wal
|
||||
|
||||
metrics_generator:
|
||||
registry:
|
||||
external_labels:
|
||||
source: tempo
|
||||
storage:
|
||||
path: /var/tempo/generator/wal
|
||||
remote_write:
|
||||
- url: http://prometheus:9090/api/v1/write
|
||||
send_exemplars: true
|
||||
|
||||
overrides:
|
||||
defaults:
|
||||
metrics_generator:
|
||||
processors: [service-graphs, span-metrics]
|
||||
generate_native_histograms: both
|
||||
98
homelab/runner/docker-compose.yml
Normal file
98
homelab/runner/docker-compose.yml
Normal file
@@ -0,0 +1,98 @@
|
||||
# LibNovel homelab runner
|
||||
#
|
||||
# Connects to production PocketBase and MinIO via public subdomains.
|
||||
# All secrets come from Doppler (project=libnovel, config=prd_homelab).
|
||||
# Run with: doppler run -- docker compose up -d
|
||||
#
|
||||
# Differs from prod runner:
|
||||
# - RUNNER_WORKER_ID=homelab-runner-1 (unique, avoids task claiming conflicts)
|
||||
# - MINIO_ENDPOINT/USE_SSL → storage.libnovel.cc over HTTPS
|
||||
# - POCKETBASE_URL → https://pb.libnovel.cc
|
||||
# - MEILI_URL → https://search.libnovel.cc (Caddy-proxied)
|
||||
# - VALKEY_ADDR → unset (not exposed publicly)
|
||||
# - RUNNER_SKIP_INITIAL_CATALOGUE_REFRESH=true
|
||||
# - REDIS_ADDR → rediss://redis.libnovel.cc:6380 (prod Redis via Caddy TLS proxy)
|
||||
# - LibreTranslate service for machine translation (internal network only)
|
||||
|
||||
services:
|
||||
libretranslate:
|
||||
image: libretranslate/libretranslate:latest
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
LT_API_KEYS: "true"
|
||||
LT_API_KEYS_DB_PATH: "/app/db/api_keys.db"
|
||||
# Limit to source→target pairs the runner actually uses
|
||||
LT_LOAD_ONLY: "en,ru,id,pt,fr"
|
||||
LT_DISABLE_WEB_UI: "true"
|
||||
LT_UPDATE_MODELS: "false"
|
||||
volumes:
|
||||
- libretranslate_models:/home/libretranslate/.local/share/argos-translate
|
||||
- libretranslate_db:/app/db
|
||||
|
||||
runner:
|
||||
image: kalekber/libnovel-runner:latest
|
||||
restart: unless-stopped
|
||||
stop_grace_period: 135s
|
||||
depends_on:
|
||||
- libretranslate
|
||||
environment:
|
||||
# ── PocketBase ──────────────────────────────────────────────────────────
|
||||
POCKETBASE_URL: "https://pb.libnovel.cc"
|
||||
POCKETBASE_ADMIN_EMAIL: "${POCKETBASE_ADMIN_EMAIL}"
|
||||
POCKETBASE_ADMIN_PASSWORD: "${POCKETBASE_ADMIN_PASSWORD}"
|
||||
|
||||
# ── MinIO (S3 API via public subdomain) ─────────────────────────────────
|
||||
MINIO_ENDPOINT: "storage.libnovel.cc"
|
||||
MINIO_ACCESS_KEY: "${MINIO_ROOT_USER}"
|
||||
MINIO_SECRET_KEY: "${MINIO_ROOT_PASSWORD}"
|
||||
MINIO_USE_SSL: "true"
|
||||
MINIO_PUBLIC_ENDPOINT: "${MINIO_PUBLIC_ENDPOINT}"
|
||||
MINIO_PUBLIC_USE_SSL: "${MINIO_PUBLIC_USE_SSL}"
|
||||
|
||||
# ── Meilisearch (via search.libnovel.cc Caddy proxy) ────────────────────
|
||||
MEILI_URL: "${MEILI_URL}"
|
||||
MEILI_API_KEY: "${MEILI_API_KEY}"
|
||||
VALKEY_ADDR: ""
|
||||
# Force IPv4 DNS resolution — homelab has no IPv6 route to search.libnovel.cc
|
||||
GODEBUG: "preferIPv4=1"
|
||||
|
||||
# ── Kokoro TTS ──────────────────────────────────────────────────────────
|
||||
KOKORO_URL: "${KOKORO_URL}"
|
||||
KOKORO_VOICE: "${KOKORO_VOICE}"
|
||||
|
||||
# ── Pocket TTS ──────────────────────────────────────────────────────────
|
||||
POCKET_TTS_URL: "${POCKET_TTS_URL}"
|
||||
|
||||
# ── LibreTranslate (internal Docker network) ────────────────────────────
|
||||
LIBRETRANSLATE_URL: "http://libretranslate:5000"
|
||||
LIBRETRANSLATE_API_KEY: "${LIBRETRANSLATE_API_KEY}"
|
||||
|
||||
# ── Asynq / Redis (prod Redis via Caddy TLS proxy) ──────────────────────
|
||||
# The runner connects to prod Redis over TLS: rediss://redis.libnovel.cc:6380.
|
||||
# Caddy on prod terminates TLS and proxies to the local redis:6379 sidecar.
|
||||
REDIS_ADDR: "${REDIS_ADDR}"
|
||||
REDIS_PASSWORD: "${REDIS_PASSWORD}"
|
||||
|
||||
# ── Runner tuning ───────────────────────────────────────────────────────
|
||||
RUNNER_WORKER_ID: "${RUNNER_WORKER_ID}"
|
||||
RUNNER_POLL_INTERVAL: "${RUNNER_POLL_INTERVAL}"
|
||||
RUNNER_MAX_CONCURRENT_SCRAPE: "${RUNNER_MAX_CONCURRENT_SCRAPE}"
|
||||
RUNNER_MAX_CONCURRENT_AUDIO: "${RUNNER_MAX_CONCURRENT_AUDIO}"
|
||||
RUNNER_MAX_CONCURRENT_TRANSLATION: "${RUNNER_MAX_CONCURRENT_TRANSLATION}"
|
||||
RUNNER_TIMEOUT: "${RUNNER_TIMEOUT}"
|
||||
RUNNER_METRICS_ADDR: "${RUNNER_METRICS_ADDR}"
|
||||
RUNNER_SKIP_INITIAL_CATALOGUE_REFRESH: "true"
|
||||
|
||||
# ── Observability ───────────────────────────────────────────────────────
|
||||
LOG_LEVEL: "${LOG_LEVEL}"
|
||||
GLITCHTIP_DSN: "${GLITCHTIP_DSN}"
|
||||
|
||||
healthcheck:
|
||||
test: ["CMD", "/healthcheck", "file", "/tmp/runner.alive", "120"]
|
||||
interval: 60s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
|
||||
volumes:
|
||||
libretranslate_models:
|
||||
libretranslate_db:
|
||||
14
ios/.gitignore
vendored
Normal file
14
ios/.gitignore
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
# Xcode build artifacts — regenerate with: xcodegen generate --spec project.yml
|
||||
xcuserdata/
|
||||
*.xcuserstate
|
||||
*.xcworkspace/xcuserdata/
|
||||
DerivedData/
|
||||
build/
|
||||
|
||||
# Swift Package Manager — resolved by Xcode on first open
|
||||
LibNovel.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/
|
||||
.build/
|
||||
# Package.resolved is committed so SPM builds are reproducible
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
87
ios/AGENTS.md
Normal file
87
ios/AGENTS.md
Normal file
@@ -0,0 +1,87 @@
|
||||
# LibNovel iOS App
|
||||
|
||||
SwiftUI app targeting iOS 17+. Consumes the Go scraper HTTP API for books, chapters, and audio. Uses MinIO presigned URLs for media playback and downloads.
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
ios/LibNovel/LibNovel/
|
||||
├── App/ # LibNovelApp.swift, ContentView.swift, RootTabView.swift
|
||||
├── Models/ # Models.swift (all domain types)
|
||||
├── Networking/ # APIClient.swift (URLSession-based HTTP client)
|
||||
├── Services/ # AudioPlayerService, AudioDownloadService, AuthStore,
|
||||
│ # BookVoicePreferences, NetworkMonitor
|
||||
├── ViewModels/ # One per view/feature (HomeViewModel, BrowseViewModel, etc.)
|
||||
├── Views/
|
||||
│ ├── Auth/ # AuthView
|
||||
│ ├── BookDetail/ # BookDetailView, CommentsView
|
||||
│ ├── Browse/ # BrowseView (infinite scroll shelves)
|
||||
│ ├── ChapterReader/ # ChapterReaderView, DownloadAudioButton
|
||||
│ ├── Common/ # CommonViews (shared reusable components)
|
||||
│ ├── Components/ # OfflineBanner
|
||||
│ ├── Downloads/ # DownloadsView, DownloadQueueButton
|
||||
│ ├── Home/ # HomeView
|
||||
│ ├── Library/ # LibraryView (2-col grid, filters)
|
||||
│ ├── Player/ # PlayerViews (floating FAB, compact, full-screen)
|
||||
│ ├── Profile/ # ProfileView, VoiceSelectionView, UserProfileView, etc.
|
||||
│ └── Search/ # SearchView
|
||||
└── Extensions/ # NavDestination.swift, String+App.swift, Color+App.swift
|
||||
```
|
||||
|
||||
## iOS / Swift Conventions
|
||||
|
||||
- **Deployment target**: iOS 17.0 — use iOS 17+ APIs freely.
|
||||
- **Observable pattern**: The codebase currently uses `@StateObject` / `ObservableObject` / `@Published`. When adding new types, prefer the **`@Observable` macro** (iOS 17+) over `ObservableObject`. Do not refactor existing types unless explicitly asked.
|
||||
- **Navigation**: Use `NavigationStack` (not `NavigationView`). Use `.navigationDestination(for:)` for type-safe routing.
|
||||
- **Concurrency**: Use `async/await` and structured concurrency. Avoid callback-based APIs and `DispatchQueue.main.async` — prefer `@MainActor` or `await MainActor.run`.
|
||||
- **State management**: Prefer `@State` + `@Binding` for local UI state. Use environment objects for app-wide services (authStore, audioPlayer, downloadService, networkMonitor).
|
||||
- **SwiftData**: Not currently used. Do not introduce SwiftData without discussion.
|
||||
- **SF Symbols**: Use `Image(systemName:)` for icons. No emoji in UI unless already present.
|
||||
|
||||
## Key Patterns
|
||||
|
||||
- **Download keys**: Use `::` as separator (e.g., `"slug::chapter-1::voice"`), never `-`. Slugs contain hyphens.
|
||||
- **Voice fallback chain**: book override → global default → `"af_bella"`. See `BookVoicePreferences.voiceWithFallback()`.
|
||||
- **Offline handling**: Wrap view bodies in `VStack` with `OfflineBanner` at top. Use `NetworkMonitor` (environment object) to gate network calls. Suppress network errors silently when offline via `ErrorAlertModifier`.
|
||||
- **Audio playback priority**: local file → MinIO presigned URL → trigger TTS generation.
|
||||
- **Progress display**: Show decimal % when < 10% (e.g., "3.4%"), rounded when >= 10% (e.g., "47%").
|
||||
- **Cover images**: Always proxy via `/api/cover/{domain}/{slug}` — never link directly to source.
|
||||
|
||||
## Networking
|
||||
|
||||
`APIClient.swift` wraps all Go scraper API calls. When adding new endpoints:
|
||||
|
||||
1. Add a method to `APIClient`.
|
||||
2. Keep error handling consistent — throw typed errors, let ViewModels catch and set `errorMessage`.
|
||||
3. All requests are relative to `SCRAPER_API_URL` (configured at build time via xcconfig or environment).
|
||||
|
||||
## Using Documentation Tools
|
||||
|
||||
When writing or reviewing SwiftUI/Swift code:
|
||||
|
||||
- Use `context7` to look up current Apple SwiftUI/Swift documentation before implementing anything non-trivial. Apple's APIs evolve fast — do not rely on training data alone.
|
||||
- Use `gh_grep` to find real-world Swift patterns when unsure how something is typically implemented.
|
||||
|
||||
Example prompts:
|
||||
- "How does `.searchable` work in iOS 17? use context7"
|
||||
- "Show me examples of `@Observable` with async tasks. use context7"
|
||||
- "How do other apps implement background URLSession downloads in Swift? use gh_grep"
|
||||
|
||||
## UI/UX Skill
|
||||
|
||||
For any iOS view work, always load the `ios-ux` skill at the start of the task:
|
||||
|
||||
```
|
||||
skill({ name: "ios-ux" })
|
||||
```
|
||||
|
||||
This skill defines the full design system, animation rules, haptic feedback policy, accessibility checklist, performance guidelines, and offline handling requirements. It also governs how to handle screenshot-based reviews (analyze → suggest → confirm before applying).
|
||||
|
||||
## What to Avoid
|
||||
|
||||
- `NavigationView` — deprecated, use `NavigationStack`
|
||||
- `ObservableObject` / `@Published` for new types — prefer `@Observable`
|
||||
- `DispatchQueue.main.async` — prefer `@MainActor`
|
||||
- Force unwrapping optionals
|
||||
- Hardcoded color literals — use `Color+App.swift` extensions or semantic colors
|
||||
- Adding new dependencies (SPM packages) without discussion
|
||||
7
ios/LibNovelV2/LibNovelV2.xcodeproj/project.xcworkspace/contents.xcworkspacedata
generated
Normal file
7
ios/LibNovelV2/LibNovelV2.xcodeproj/project.xcworkspace/contents.xcworkspacedata
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Workspace
|
||||
version = "1.0">
|
||||
<FileRef
|
||||
location = "self:">
|
||||
</FileRef>
|
||||
</Workspace>
|
||||
129
justfile
Normal file
129
justfile
Normal file
@@ -0,0 +1,129 @@
|
||||
# ── LibNovel v3 — justfile ────────────────────────────────────────────────────
|
||||
# All commands that touch docker-compose are wrapped with `doppler run` so that
|
||||
# secrets are injected into the environment at runtime — no .env files needed.
|
||||
#
|
||||
# Prerequisites:
|
||||
# brew install doppler just
|
||||
# doppler setup (run once; selects project=libnovel config=prd)
|
||||
#
|
||||
# Usage:
|
||||
# just up # start all services (detached)
|
||||
# just down # stop all services
|
||||
# just logs # tail all service logs
|
||||
# just ps # show running containers
|
||||
# just build # rebuild backend + ui images
|
||||
# just restart # full stop + start cycle
|
||||
# just secrets # print all injected secrets (debug)
|
||||
|
||||
set dotenv-load := false # Doppler handles all env; never load a .env file
|
||||
|
||||
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
# Inject secrets from Doppler, then run the given command
|
||||
doppler := "doppler run --"
|
||||
|
||||
# ── Core compose commands ─────────────────────────────────────────────────────
|
||||
|
||||
# Start all services in the background
|
||||
up:
|
||||
{{doppler}} docker compose up -d
|
||||
|
||||
# Start and stream logs (foreground)
|
||||
up-fg:
|
||||
{{doppler}} docker compose up
|
||||
|
||||
# Stop all running services
|
||||
down:
|
||||
{{doppler}} docker compose down
|
||||
|
||||
# Stop and remove volumes (full reset — destructive!)
|
||||
down-volumes:
|
||||
{{doppler}} docker compose down -v
|
||||
|
||||
# Show service status
|
||||
ps:
|
||||
{{doppler}} docker compose ps
|
||||
|
||||
# ── Build & publish ───────────────────────────────────────────────────────────
|
||||
|
||||
# Build (or rebuild) all custom images locally
|
||||
build:
|
||||
{{doppler}} docker compose build
|
||||
|
||||
# Build a specific service, e.g.: just build-svc backend
|
||||
build-svc svc:
|
||||
{{doppler}} docker compose build {{svc}}
|
||||
|
||||
# Push all custom images to Docker Hub (requires docker login)
|
||||
push:
|
||||
{{doppler}} docker compose push backend runner ui caddy
|
||||
|
||||
# Build then push all custom images
|
||||
build-push: build push
|
||||
|
||||
# Pull all images from Docker Hub (uses GIT_TAG from Doppler)
|
||||
pull-images:
|
||||
{{doppler}} docker compose pull backend runner ui caddy
|
||||
|
||||
# Pull all third-party base images (minio, pocketbase, etc.)
|
||||
pull-infra:
|
||||
{{doppler}} docker compose pull minio pocketbase meilisearch valkey postgres crowdsec watchtower
|
||||
|
||||
# ── Logs ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
# Tail all service logs (last 50 lines + follow)
|
||||
logs:
|
||||
{{doppler}} docker compose logs -f --tail=50
|
||||
|
||||
# Tail a specific service, e.g.: just log backend
|
||||
log svc:
|
||||
{{doppler}} docker compose logs -f --tail=50 {{svc}}
|
||||
|
||||
# ── Lifecycle ─────────────────────────────────────────────────────────────────
|
||||
|
||||
# Full restart: stop then start
|
||||
restart: down up
|
||||
|
||||
# Restart a single service, e.g.: just restart-svc backend
|
||||
restart-svc svc:
|
||||
{{doppler}} docker compose restart {{svc}}
|
||||
|
||||
# Pull → build → recreate (rolling update without clearing volumes)
|
||||
update:
|
||||
{{doppler}} docker compose pull
|
||||
{{doppler}} docker compose build
|
||||
{{doppler}} docker compose up -d
|
||||
|
||||
# ── Initialisation ────────────────────────────────────────────────────────────
|
||||
|
||||
# Run one-shot init containers (minio-init, pb-init, postgres-init)
|
||||
init:
|
||||
{{doppler}} docker compose run --rm minio-init
|
||||
{{doppler}} docker compose run --rm pb-init
|
||||
{{doppler}} docker compose run --rm postgres-init
|
||||
|
||||
# ── Shell access ──────────────────────────────────────────────────────────────
|
||||
|
||||
# Open a shell in a running service, e.g.: just shell backend
|
||||
shell svc:
|
||||
{{doppler}} docker compose exec {{svc}} sh
|
||||
|
||||
# ── Secrets ───────────────────────────────────────────────────────────────────
|
||||
|
||||
# Print all secrets Doppler will inject (never redirected to a file)
|
||||
secrets:
|
||||
doppler secrets --project libnovel --config prd
|
||||
|
||||
# Print secrets as a .env-formatted list (useful for debugging)
|
||||
secrets-env:
|
||||
doppler secrets download --project libnovel --config prd --format env --no-file
|
||||
|
||||
# Open Doppler dashboard in browser
|
||||
secrets-dashboard:
|
||||
doppler open dashboard
|
||||
|
||||
# ── Gitea CI ──────────────────────────────────────────────────────────────────
|
||||
|
||||
# Validate workflow files
|
||||
ci-lint:
|
||||
actionlint .gitea/workflows/*.yaml
|
||||
12
opencode.json
Normal file
12
opencode.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"$schema": "https://opencode.ai/config.json",
|
||||
"mcp": {
|
||||
"gh_grep": {
|
||||
"type": "remote",
|
||||
"url": "https://mcp.grep.app",
|
||||
"enabled": true
|
||||
}
|
||||
},
|
||||
"instructions": [
|
||||
]
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
# ── Build stage ────────────────────────────────────────────────────────────────
|
||||
FROM golang:1.25-alpine AS builder
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# Cache dependency downloads separately from source compilation.
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
|
||||
go build -ldflags="-s -w" -o /scraper ./cmd/scraper
|
||||
|
||||
# ── Runtime stage ──────────────────────────────────────────────────────────────
|
||||
FROM alpine:3.20
|
||||
|
||||
# ca-certificates is required for HTTPS requests to novelfire.net.
|
||||
RUN apk add --no-cache ca-certificates tzdata
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY --from=builder /scraper /app/scraper
|
||||
|
||||
# Create the default static output directory.
|
||||
RUN mkdir -p /app/static/books
|
||||
|
||||
# Non-root user.
|
||||
RUN addgroup -S scraper && adduser -S scraper -G scraper
|
||||
RUN chown -R scraper:scraper /app
|
||||
USER scraper
|
||||
|
||||
# ── Configuration ─────────────────────────────────────────────────────────────
|
||||
ENV BROWSERLESS_URL=http://browserless:3030
|
||||
ENV BROWSERLESS_STRATEGY=content
|
||||
ENV SCRAPER_WORKERS=0
|
||||
ENV SCRAPER_STATIC_ROOT=/app/static/books
|
||||
ENV SCRAPER_HTTP_ADDR=:8080
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
# Default: run as an HTTP server. Override CMD to use "run" for one-shot.
|
||||
ENTRYPOINT ["/app/scraper"]
|
||||
CMD ["serve"]
|
||||
@@ -1,217 +0,0 @@
|
||||
// Command scraper is the entrypoint for the libnovel scraper service.
|
||||
//
|
||||
// Usage (CLI one-shot):
|
||||
//
|
||||
// scraper run [--url <book-url>]
|
||||
//
|
||||
// Usage (HTTP server):
|
||||
//
|
||||
// scraper serve
|
||||
//
|
||||
// Environment variables:
|
||||
//
|
||||
// BROWSERLESS_URL Browserless base URL (default: http://localhost:3030)
|
||||
// BROWSERLESS_TOKEN Browserless API token (default: "")
|
||||
// BROWSERLESS_STRATEGY content | scrape | cdp (default: content)
|
||||
// BROWSERLESS_MAX_CONCURRENT Max simultaneous browser sessions (default: 5)
|
||||
// SCRAPER_WORKERS Chapter goroutine count (default: NumCPU)
|
||||
// SCRAPER_STATIC_ROOT Output directory (default: ./static/books)
|
||||
// SCRAPER_HTTP_ADDR HTTP listen address (default: :8080)
|
||||
// KOKORO_URL Kokoro-FastAPI base URL (default: "")
|
||||
// KOKORO_VOICE Default TTS voice (default: af_bella)
|
||||
// LOG_LEVEL debug | info | warn | error (default: info)
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/signal"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/scraper/internal/browser"
|
||||
"github.com/libnovel/scraper/internal/novelfire"
|
||||
"github.com/libnovel/scraper/internal/orchestrator"
|
||||
"github.com/libnovel/scraper/internal/server"
|
||||
"github.com/libnovel/scraper/internal/writer"
|
||||
)
|
||||
|
||||
func main() {
|
||||
logLevel := slog.LevelInfo
|
||||
if v := os.Getenv("LOG_LEVEL"); v != "" {
|
||||
if err := logLevel.UnmarshalText([]byte(v)); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "invalid LOG_LEVEL %q, using info\n", v)
|
||||
}
|
||||
}
|
||||
log := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{
|
||||
Level: logLevel,
|
||||
}))
|
||||
|
||||
if err := run(log); err != nil {
|
||||
log.Error("fatal", "err", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func run(log *slog.Logger) error {
|
||||
args := os.Args[1:]
|
||||
if len(args) == 0 {
|
||||
printUsage()
|
||||
return nil
|
||||
}
|
||||
|
||||
cmd := strings.ToLower(args[0])
|
||||
|
||||
browserCfg := browser.Config{
|
||||
BaseURL: envOr("BROWSERLESS_URL", "http://localhost:3030"),
|
||||
Token: envOr("BROWSERLESS_TOKEN", ""),
|
||||
}
|
||||
browserCfg.MaxConcurrent = 5
|
||||
if s := os.Getenv("BROWSERLESS_MAX_CONCURRENT"); s != "" {
|
||||
if n, err := strconv.Atoi(s); err == nil && n > 0 {
|
||||
browserCfg.MaxConcurrent = n
|
||||
}
|
||||
}
|
||||
if s := os.Getenv("BROWSERLESS_TIMEOUT"); s != "" {
|
||||
if n, err := strconv.Atoi(s); err == nil && n > 0 {
|
||||
browserCfg.Timeout = time.Duration(n) * time.Second
|
||||
}
|
||||
}
|
||||
|
||||
strategy := browser.Strategy(strings.ToLower(envOr("BROWSERLESS_STRATEGY", string(browser.StrategyDirect))))
|
||||
urlStrategy := browser.Strategy(strings.ToLower(envOr("BROWSERLESS_URL_STRATEGY", string(browser.StrategyContent))))
|
||||
bc := newBrowserClient(strategy, browserCfg)
|
||||
urlClient := newBrowserClient(urlStrategy, browserCfg)
|
||||
|
||||
staticRoot := envOr("SCRAPER_STATIC_ROOT", "./static/books")
|
||||
w := writer.New(staticRoot)
|
||||
nf := novelfire.New(bc, log, urlClient, w)
|
||||
|
||||
workers := 0
|
||||
if s := os.Getenv("SCRAPER_WORKERS"); s != "" {
|
||||
n, err := strconv.Atoi(s)
|
||||
if err == nil && n > 0 {
|
||||
workers = n
|
||||
}
|
||||
}
|
||||
if workers == 0 {
|
||||
workers = runtime.NumCPU()
|
||||
}
|
||||
|
||||
oCfg := orchestrator.Config{
|
||||
Workers: workers,
|
||||
StaticRoot: staticRoot,
|
||||
}
|
||||
|
||||
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
||||
defer stop()
|
||||
|
||||
switch cmd {
|
||||
case "run":
|
||||
// Optional --url flag.
|
||||
if len(args) >= 3 && args[1] == "--url" {
|
||||
oCfg.SingleBookURL = args[2]
|
||||
}
|
||||
log.Info("starting one-shot scrape",
|
||||
"strategy", strategy,
|
||||
"workers", workers,
|
||||
"max_concurrent", browserCfg.MaxConcurrent,
|
||||
"static_root", oCfg.StaticRoot,
|
||||
"single_book", oCfg.SingleBookURL,
|
||||
)
|
||||
o := orchestrator.New(oCfg, nf, log)
|
||||
return o.Run(ctx)
|
||||
|
||||
case "refresh":
|
||||
// refresh <slug> - re-scrape a book from its saved source_url
|
||||
if len(args) < 2 {
|
||||
return fmt.Errorf("refresh command requires a book slug argument")
|
||||
}
|
||||
slug := args[1]
|
||||
w := writer.New(oCfg.StaticRoot)
|
||||
meta, ok, err := w.ReadMetadata(slug)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read metadata for %s: %w", slug, err)
|
||||
}
|
||||
if !ok {
|
||||
return fmt.Errorf("book %q not found in %s", slug, oCfg.StaticRoot)
|
||||
}
|
||||
if meta.SourceURL == "" {
|
||||
return fmt.Errorf("book %q has no source_url in metadata", slug)
|
||||
}
|
||||
oCfg.SingleBookURL = meta.SourceURL
|
||||
log.Info("refreshing book from source_url",
|
||||
"slug", slug,
|
||||
"source_url", meta.SourceURL,
|
||||
)
|
||||
o := orchestrator.New(oCfg, nf, log)
|
||||
return o.Run(ctx)
|
||||
|
||||
case "serve":
|
||||
addr := envOr("SCRAPER_HTTP_ADDR", ":8080")
|
||||
kokoroURL := envOr("KOKORO_URL", "")
|
||||
kokoroVoice := envOr("KOKORO_VOICE", "af_bella")
|
||||
log.Info("starting HTTP server",
|
||||
"addr", addr,
|
||||
"strategy", strategy,
|
||||
"workers", workers,
|
||||
"max_concurrent", browserCfg.MaxConcurrent,
|
||||
"kokoro_url", kokoroURL,
|
||||
"kokoro_voice", kokoroVoice,
|
||||
)
|
||||
srv := server.New(addr, oCfg, nf, log, kokoroURL, kokoroVoice)
|
||||
return srv.ListenAndServe(ctx)
|
||||
|
||||
default:
|
||||
return fmt.Errorf("unknown command %q; use 'run' or 'serve'", cmd)
|
||||
}
|
||||
}
|
||||
|
||||
func newBrowserClient(strategy browser.Strategy, cfg browser.Config) browser.BrowserClient {
|
||||
switch strategy {
|
||||
case browser.StrategyScrape:
|
||||
return browser.NewScrapeClient(cfg)
|
||||
case browser.StrategyCDP:
|
||||
return browser.NewCDPClient(cfg)
|
||||
case browser.StrategyDirect:
|
||||
return browser.NewDirectHTTPClient(cfg)
|
||||
default:
|
||||
return browser.NewContentClient(cfg)
|
||||
}
|
||||
}
|
||||
|
||||
func envOr(key, fallback string) string {
|
||||
if v := os.Getenv(key); v != "" {
|
||||
return v
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
func printUsage() {
|
||||
fmt.Fprintf(os.Stderr, `libnovel scraper
|
||||
|
||||
Commands:
|
||||
run [--url <book-url>] One-shot: scrape full catalogue, or a single book
|
||||
refresh <slug> Re-scrape a book from its saved source_url
|
||||
serve Start HTTP server (POST /scrape, POST /scrape/book)
|
||||
|
||||
Environment variables:
|
||||
BROWSERLESS_URL Browserless base URL (default: http://localhost:3030)
|
||||
BROWSERLESS_TOKEN API token (default: "")
|
||||
BROWSERLESS_STRATEGY content|scrape|cdp|direct (default: direct)
|
||||
BROWSERLESS_URL_STRATEGY Strategy for URL retrieval (default: content)
|
||||
BROWSERLESS_MAX_CONCURRENT Max simultaneous sessions (default: 5)
|
||||
BROWSERLESS_TIMEOUT HTTP request timeout sec (default: 90)
|
||||
SCRAPER_WORKERS Chapter goroutines (default: NumCPU = %d)
|
||||
SCRAPER_STATIC_ROOT Output directory (default: ./static/books)
|
||||
SCRAPER_HTTP_ADDR HTTP listen address (default: :8080)
|
||||
KOKORO_URL Kokoro-FastAPI base URL (default: "", TTS disabled)
|
||||
KOKORO_VOICE Default TTS voice (default: af_bella)
|
||||
LOG_LEVEL debug|info|warn|error (default: info)
|
||||
`, runtime.NumCPU())
|
||||
}
|
||||
@@ -1,10 +0,0 @@
|
||||
module github.com/libnovel/scraper
|
||||
|
||||
go 1.25.0
|
||||
|
||||
require (
|
||||
github.com/gorilla/websocket v1.5.3 // indirect
|
||||
github.com/yuin/goldmark v1.7.16 // indirect
|
||||
golang.org/x/net v0.51.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
@@ -1,9 +0,0 @@
|
||||
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
||||
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||
github.com/yuin/goldmark v1.7.16 h1:n+CJdUxaFMiDUNnWC3dMWCIQJSkxH4uz3ZwQBkAlVNE=
|
||||
github.com/yuin/goldmark v1.7.16/go.mod h1:ip/1k0VRfGynBgxOz0yCqHrbZXhcjxyuS66Brc7iBKg=
|
||||
golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo=
|
||||
golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
@@ -1,137 +0,0 @@
|
||||
package browser
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/gorilla/websocket"
|
||||
)
|
||||
|
||||
// cdpClient implements BrowserClient using the CDP WebSocket endpoint.
|
||||
type cdpClient struct {
|
||||
cfg Config
|
||||
sem chan struct{}
|
||||
}
|
||||
|
||||
// NewCDPClient returns a BrowserClient that uses CDP WebSocket sessions.
|
||||
func NewCDPClient(cfg Config) BrowserClient {
|
||||
if cfg.Timeout == 0 {
|
||||
cfg.Timeout = 60 * time.Second
|
||||
}
|
||||
return &cdpClient{cfg: cfg, sem: makeSem(cfg.MaxConcurrent)}
|
||||
}
|
||||
|
||||
func (c *cdpClient) Strategy() Strategy { return StrategyCDP }
|
||||
|
||||
func (c *cdpClient) GetContent(_ context.Context, _ ContentRequest) (string, error) {
|
||||
return "", fmt.Errorf("CDP client does not support /content; use NewContentClient")
|
||||
}
|
||||
|
||||
func (c *cdpClient) ScrapePage(_ context.Context, _ ScrapeRequest) (ScrapeResponse, error) {
|
||||
return ScrapeResponse{}, fmt.Errorf("CDP client does not support /scrape; use NewScrapeClient")
|
||||
}
|
||||
|
||||
// CDPSession opens a WebSocket to the Browserless /devtools/browser endpoint,
|
||||
// navigates to pageURL, and invokes fn with a live CDPConn.
|
||||
func (c *cdpClient) CDPSession(ctx context.Context, pageURL string, fn CDPSessionFunc) error {
|
||||
if err := acquire(ctx, c.sem); err != nil {
|
||||
return fmt.Errorf("cdp: semaphore: %w", err)
|
||||
}
|
||||
defer release(c.sem)
|
||||
|
||||
// Build WebSocket URL: ws://host:port/devtools/browser?token=...&url=...
|
||||
wsURL := strings.Replace(c.cfg.BaseURL, "http://", "ws://", 1)
|
||||
wsURL = strings.Replace(wsURL, "https://", "wss://", 1)
|
||||
wsURL += "/devtools/browser"
|
||||
sep := "?"
|
||||
if c.cfg.Token != "" {
|
||||
wsURL += sep + "token=" + c.cfg.Token
|
||||
sep = "&"
|
||||
}
|
||||
wsURL += sep + "url=" + pageURL
|
||||
|
||||
dialer := websocket.Dialer{
|
||||
HandshakeTimeout: 15 * time.Second,
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
}
|
||||
|
||||
conn, _, err := dialer.DialContext(ctx, wsURL, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cdp: dial %s: %w", wsURL, err)
|
||||
}
|
||||
|
||||
cdp := &cdpConn{ws: conn}
|
||||
defer cdp.Close()
|
||||
|
||||
return fn(ctx, cdp)
|
||||
}
|
||||
|
||||
// ─── cdpConn ─────────────────────────────────────────────────────────────────
|
||||
|
||||
type cdpConn struct {
|
||||
ws *websocket.Conn
|
||||
counter atomic.Int64
|
||||
}
|
||||
|
||||
type cdpRequest struct {
|
||||
ID int64 `json:"id"`
|
||||
Method string `json:"method"`
|
||||
Params map[string]any `json:"params,omitempty"`
|
||||
}
|
||||
|
||||
type cdpResponse struct {
|
||||
ID int64 `json:"id"`
|
||||
Result map[string]any `json:"result,omitempty"`
|
||||
Error *struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
} `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
func (c *cdpConn) Send(ctx context.Context, method string, params map[string]any) (map[string]any, error) {
|
||||
id := c.counter.Add(1)
|
||||
|
||||
req := cdpRequest{ID: id, Method: method, Params: params}
|
||||
data, err := json.Marshal(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cdp send: marshal: %w", err)
|
||||
}
|
||||
|
||||
if dl, ok := ctx.Deadline(); ok {
|
||||
_ = c.ws.SetWriteDeadline(dl)
|
||||
}
|
||||
if err := c.ws.WriteMessage(websocket.TextMessage, data); err != nil {
|
||||
return nil, fmt.Errorf("cdp send: write: %w", err)
|
||||
}
|
||||
|
||||
// Read messages until we find the response matching our id.
|
||||
for {
|
||||
if dl, ok := ctx.Deadline(); ok {
|
||||
_ = c.ws.SetReadDeadline(dl)
|
||||
}
|
||||
_, msg, err := c.ws.ReadMessage()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cdp send: read: %w", err)
|
||||
}
|
||||
var resp cdpResponse
|
||||
if err := json.Unmarshal(msg, &resp); err != nil {
|
||||
continue // skip non-JSON frames (events etc.)
|
||||
}
|
||||
if resp.ID != id {
|
||||
continue // event or different command reply
|
||||
}
|
||||
if resp.Error != nil {
|
||||
return nil, fmt.Errorf("cdp error %d: %s", resp.Error.Code, resp.Error.Message)
|
||||
}
|
||||
return resp.Result, nil
|
||||
}
|
||||
}
|
||||
|
||||
func (c *cdpConn) Close() error {
|
||||
return c.ws.Close()
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user