Compare commits

...

5 Commits

Author SHA1 Message Date
Admin
0e5eb84097 feat: add SvelteKit proxy route for admin dedup-chapters endpoint
All checks were successful
Release / Test backend (push) Successful in 48s
Release / Check ui (push) Successful in 46s
Release / Docker / caddy (push) Successful in 43s
Release / Docker / backend (push) Successful in 6m12s
Release / Docker / runner (push) Successful in 3m8s
Release / Docker / ui (push) Successful in 2m15s
Release / Gitea Release (push) Successful in 47s
2026-04-04 22:34:26 +05:00
Admin
6ef82a1d12 fix: add DeduplicateChapters stub to test mocks to satisfy BookWriter interface
All checks were successful
Release / Test backend (push) Successful in 44s
Release / Check ui (push) Successful in 44s
Release / Docker / caddy (push) Successful in 43s
Release / Docker / backend (push) Successful in 2m46s
Release / Docker / runner (push) Successful in 3m19s
Release / Docker / ui (push) Successful in 3m12s
Release / Gitea Release (push) Successful in 1m22s
2026-04-04 21:17:55 +05:00
Admin
7a418ee62b fix: await marked() to prevent Promise being passed as chapter HTML
Some checks failed
Release / Test backend (push) Failing after 15s
Release / Docker / backend (push) Has been skipped
Release / Docker / runner (push) Has been skipped
Release / Check ui (push) Successful in 44s
Release / Docker / caddy (push) Successful in 39s
Release / Docker / ui (push) Successful in 2m41s
Release / Gitea Release (push) Has been skipped
marked() returns string | Promise<string>; the previous cast 'as string'
silently passed a Promise object, which Svelte rendered as nothing.
Free users saw blank content even though SSR HTML was correct.
2026-04-04 21:15:06 +05:00
Admin
d4f35a4899 fix: prevent duplicate chapters_idx records + add dedup endpoint
Some checks failed
Release / Test backend (push) Failing after 18s
Release / Docker / backend (push) Has been skipped
Release / Docker / runner (push) Has been skipped
Release / Check ui (push) Successful in 45s
Release / Docker / caddy (push) Successful in 38s
Release / Docker / ui (push) Successful in 2m45s
Release / Gitea Release (push) Has been skipped
- Fix upsertChapterIdx race: use conflict-retry pattern (mirrors WriteMetadata)
  so concurrent goroutines don't double-POST the same chapter number
- Add DeduplicateChapters to BookWriter interface and Store implementation;
  keeps the latest record per (slug, number) and deletes extras
- Wire POST /api/admin/dedup-chapters/{slug} handler in server.go
2026-04-04 21:00:10 +05:00
Admin
6559a8c015 fix: split long text into chunks before sending to Cloudflare AI TTS
Some checks failed
Release / Test backend (push) Successful in 41s
Release / Check ui (push) Successful in 1m5s
Release / Docker / caddy (push) Successful in 37s
Release / Docker / backend (push) Has been cancelled
Release / Docker / runner (push) Has been cancelled
Release / Docker / ui (push) Has been cancelled
Release / Gitea Release (push) Has been cancelled
The aura-2-en model enforces a hard 2 000-character limit per request.
Chapters routinely exceed this, producing 413 errors.

GenerateAudio now splits the stripped text into ≤1 800-char chunks at
paragraph → sentence → space → hard-cut boundaries, calls the API once
per chunk, and concatenates the MP3 frames. Callers (runner, streaming
handler) are unchanged. StreamAudioMP3/WAV inherit the fix automatically
since they delegate to GenerateAudio.
2026-04-04 20:45:22 +05:00
10 changed files with 243 additions and 8 deletions

View File

@@ -569,6 +569,30 @@ func (s *Server) handleReindex(w http.ResponseWriter, r *http.Request) {
writeJSON(w, 0, map[string]any{"slug": slug, "indexed": count})
}
// handleDedupChapters handles POST /api/admin/dedup-chapters/{slug}.
// Removes duplicate chapters_idx records for a book, keeping the latest record
// per chapter number. Returns the number of duplicate records deleted.
func (s *Server) handleDedupChapters(w http.ResponseWriter, r *http.Request) {
slug := r.PathValue("slug")
if slug == "" {
jsonError(w, http.StatusBadRequest, "missing slug")
return
}
deleted, err := s.deps.BookWriter.DeduplicateChapters(r.Context(), slug)
if err != nil {
s.deps.Log.Error("dedup-chapters failed", "slug", slug, "err", err)
writeJSON(w, http.StatusInternalServerError, map[string]any{
"error": err.Error(),
"deleted": deleted,
})
return
}
s.deps.Log.Info("dedup-chapters complete", "slug", slug, "deleted", deleted)
writeJSON(w, 0, map[string]any{"slug": slug, "deleted": deleted})
}
// ── Audio ──────────────────────────────────────────────────────────────────────
// handleAudioGenerate handles POST /api/audio/{slug}/{n}.

View File

@@ -204,6 +204,9 @@ func (s *Server) ListenAndServe(ctx context.Context) error {
mux.HandleFunc("POST /api/admin/text-gen/description", s.handleAdminTextGenDescription)
mux.HandleFunc("POST /api/admin/text-gen/description/apply", s.handleAdminTextGenApplyDescription)
// Admin data repair endpoints
mux.HandleFunc("POST /api/admin/dedup-chapters/{slug}", s.handleDedupChapters)
// Voices list
mux.HandleFunc("GET /api/voices", s.handleVoices)

View File

@@ -35,6 +35,11 @@ type BookWriter interface {
// ChapterExists returns true if the markdown object for ref already exists.
ChapterExists(ctx context.Context, slug string, ref domain.ChapterRef) bool
// DeduplicateChapters removes duplicate chapters_idx records for slug,
// keeping only one record per chapter number (the one with the latest
// updated timestamp). Returns the number of duplicate records deleted.
DeduplicateChapters(ctx context.Context, slug string) (int, error)
}
// BookReader is the read side used by the backend to serve content.

View File

@@ -39,8 +39,9 @@ func (m *mockStore) ReadChapter(_ context.Context, _ string, _ int) (string, err
func (m *mockStore) ListChapters(_ context.Context, _ string) ([]domain.ChapterInfo, error) {
return nil, nil
}
func (m *mockStore) CountChapters(_ context.Context, _ string) int { return 0 }
func (m *mockStore) ReindexChapters(_ context.Context, _ string) (int, error) { return 0, nil }
func (m *mockStore) CountChapters(_ context.Context, _ string) int { return 0 }
func (m *mockStore) ReindexChapters(_ context.Context, _ string) (int, error) { return 0, nil }
func (m *mockStore) DeduplicateChapters(_ context.Context, _ string) (int, error) { return 0, nil }
// RankingStore
func (m *mockStore) WriteRankingItem(_ context.Context, _ domain.RankingItem) error { return nil }
@@ -52,10 +53,10 @@ func (m *mockStore) RankingFreshEnough(_ context.Context, _ time.Duration) (bool
}
// AudioStore
func (m *mockStore) AudioObjectKey(_ string, _ int, _ string) string { return "" }
func (m *mockStore) AudioObjectKeyExt(_ string, _ int, _, _ string) string { return "" }
func (m *mockStore) AudioExists(_ context.Context, _ string) bool { return false }
func (m *mockStore) PutAudio(_ context.Context, _ string, _ []byte) error { return nil }
func (m *mockStore) AudioObjectKey(_ string, _ int, _ string) string { return "" }
func (m *mockStore) AudioObjectKeyExt(_ string, _ int, _, _ string) string { return "" }
func (m *mockStore) AudioExists(_ context.Context, _ string) bool { return false }
func (m *mockStore) PutAudio(_ context.Context, _ string, _ []byte) error { return nil }
func (m *mockStore) PutAudioStream(_ context.Context, _ string, _ io.Reader, _ int64, _ string) error {
return nil
}

View File

@@ -17,6 +17,10 @@
// response. There is no 100-second Cloudflare proxy timeout because we are
// calling the Cloudflare API directly, not routing through a Cloudflare-proxied
// homelab tunnel.
//
// The aura-2-en model enforces a hard 2 000-character limit per request.
// GenerateAudio transparently splits longer texts into sentence-boundary chunks
// and concatenates the resulting MP3 frames.
package cfai
import (
@@ -145,6 +149,8 @@ func New(accountID, apiToken, model string) Client {
}
// GenerateAudio calls the Cloudflare Workers AI TTS endpoint and returns MP3 bytes.
// The aura-2-en model rejects inputs longer than 2 000 characters, so this method
// splits the text into sentence-bounded chunks and concatenates the MP3 responses.
func (c *httpClient) GenerateAudio(ctx context.Context, text, voice string) ([]byte, error) {
if text == "" {
return nil, fmt.Errorf("cfai: empty text")
@@ -154,6 +160,20 @@ func (c *httpClient) GenerateAudio(ctx context.Context, text, voice string) ([]b
speaker = "luna"
}
chunks := splitText(text, 1800) // stay comfortably under the 2 000-char limit
var combined []byte
for _, chunk := range chunks {
part, err := c.generateChunk(ctx, chunk, speaker)
if err != nil {
return nil, err
}
combined = append(combined, part...)
}
return combined, nil
}
// generateChunk sends a single ≤2 000-character request and returns MP3 bytes.
func (c *httpClient) generateChunk(ctx context.Context, text, speaker string) ([]byte, error) {
body, err := json.Marshal(map[string]any{
"text": text,
"speaker": speaker,
@@ -189,6 +209,87 @@ func (c *httpClient) GenerateAudio(ctx context.Context, text, voice string) ([]b
return mp3, nil
}
// splitText splits src into chunks of at most maxChars characters each.
// It tries to break at paragraph boundaries first, then at sentence-ending
// punctuation (. ! ?), and falls back to the nearest space.
func splitText(src string, maxChars int) []string {
if len(src) <= maxChars {
return []string{src}
}
var chunks []string
remaining := src
for len(remaining) > 0 {
if len(remaining) <= maxChars {
chunks = append(chunks, strings.TrimSpace(remaining))
break
}
// Search window: the first maxChars bytes of remaining.
// Use byte length here because the API limit is in bytes/chars for ASCII;
// for safety we operate on rune-aware slices.
window := remaining
if len(window) > maxChars {
// Trim to maxChars runes (not bytes), ensuring we don't split a multi-byte char.
window = runeSlice(remaining, maxChars)
}
cut := -1
// 1. Prefer paragraph break (\n\n or \n).
if i := strings.LastIndex(window, "\n\n"); i > 0 {
cut = i + 2
} else if i := strings.LastIndex(window, "\n"); i > 0 {
cut = i + 1
}
// 2. Fall back to sentence-ending punctuation followed by a space.
if cut < 0 {
for _, punct := range []string{". ", "! ", "? ", ".\n", "!\n", "?\n"} {
if i := strings.LastIndex(window, punct); i > 0 {
candidate := i + len(punct)
if cut < 0 || candidate > cut {
cut = candidate
}
}
}
}
// 3. Last resort: nearest space.
if cut < 0 {
if i := strings.LastIndex(window, " "); i > 0 {
cut = i + 1
}
}
// 4. Hard cut at maxChars runes if no boundary found.
if cut < 0 {
cut = len(window)
}
chunk := strings.TrimSpace(remaining[:cut])
if chunk != "" {
chunks = append(chunks, chunk)
}
remaining = remaining[cut:]
}
return chunks
}
// runeSlice returns the first n runes of s as a string.
func runeSlice(s string, n int) string {
count := 0
for i := range s {
if count == n {
return s[:i]
}
count++
}
return s
}
// StreamAudioMP3 generates audio and wraps the MP3 bytes as an io.ReadCloser.
func (c *httpClient) StreamAudioMP3(ctx context.Context, text, voice string) (io.ReadCloser, error) {
mp3, err := c.GenerateAudio(ctx, text, voice)

View File

@@ -89,6 +89,8 @@ func (s *stubStore) WriteChapterRefs(_ context.Context, _ string, _ []domain.Cha
return nil
}
func (s *stubStore) DeduplicateChapters(_ context.Context, _ string) (int, error) { return 0, nil }
func (s *stubStore) ChapterExists(_ context.Context, slug string, ref domain.ChapterRef) bool {
s.mu.Lock()
defer s.mu.Unlock()

View File

@@ -94,6 +94,10 @@ func (s *stubBookWriter) ChapterExists(_ context.Context, _ string, _ domain.Cha
return false
}
func (s *stubBookWriter) DeduplicateChapters(_ context.Context, _ string) (int, error) {
return 0, nil
}
// stubBookReader satisfies bookstore.BookReader — returns a single chapter.
type stubBookReader struct {
text string

View File

@@ -130,7 +130,16 @@ func (s *Store) upsertChapterIdx(ctx context.Context, slug string, ref domain.Ch
return err
}
if len(items) == 0 {
return s.pb.post(ctx, "/api/collections/chapters_idx/records", payload, nil)
postErr := s.pb.post(ctx, "/api/collections/chapters_idx/records", payload, nil)
if postErr == nil {
return nil
}
// POST failed — a concurrent writer may have inserted the same slug+number.
// Re-fetch and fall through to PATCH (mirrors WriteMetadata retry pattern).
items, err = s.pb.listAll(ctx, "chapters_idx", filter, "")
if err != nil || len(items) == 0 {
return postErr // original POST error is more informative
}
}
var rec struct {
ID string `json:"id"`
@@ -139,6 +148,59 @@ func (s *Store) upsertChapterIdx(ctx context.Context, slug string, ref domain.Ch
return s.pb.patch(ctx, fmt.Sprintf("/api/collections/chapters_idx/records/%s", rec.ID), payload)
}
// DeduplicateChapters removes duplicate chapters_idx records for slug.
// For each chapter number that has more than one record, it keeps the record
// with the latest "updated" timestamp and deletes the rest.
// Returns the number of records deleted.
func (s *Store) DeduplicateChapters(ctx context.Context, slug string) (int, error) {
filter := fmt.Sprintf(`slug=%q`, slug)
items, err := s.pb.listAll(ctx, "chapters_idx", filter, "number")
if err != nil {
return 0, fmt.Errorf("DeduplicateChapters: list: %w", err)
}
type record struct {
ID string `json:"id"`
Number int `json:"number"`
Updated string `json:"updated"`
}
// Group records by chapter number.
byNumber := make(map[int][]record)
for _, raw := range items {
var rec record
if err := json.Unmarshal(raw, &rec); err != nil || rec.ID == "" {
continue
}
byNumber[rec.Number] = append(byNumber[rec.Number], rec)
}
deleted := 0
for _, recs := range byNumber {
if len(recs) <= 1 {
continue
}
// Keep the record with the latest Updated timestamp; delete the rest.
keep := 0
for i := 1; i < len(recs); i++ {
if recs[i].Updated > recs[keep].Updated {
keep = i
}
}
for i, rec := range recs {
if i == keep {
continue
}
if delErr := s.pb.delete(ctx, fmt.Sprintf("/api/collections/chapters_idx/records/%s", rec.ID)); delErr != nil {
s.log.Warn("DeduplicateChapters: delete failed", "slug", slug, "number", rec.Number, "id", rec.ID, "err", delErr)
continue
}
deleted++
}
}
return deleted, nil
}
// ── BookReader ────────────────────────────────────────────────────────────────
type pbBook struct {

View File

@@ -0,0 +1,33 @@
/**
* POST /api/admin/dedup-chapters/[slug]
*
* Admin-only proxy to the Go backend's dedup endpoint.
* Removes duplicate chapters_idx records for a book, keeping the latest
* record per chapter number. Returns { slug, deleted }.
*/
import { json, error } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { log } from '$lib/server/logger';
import { backendFetch } from '$lib/server/scraper';
export const POST: RequestHandler = async ({ params, locals }) => {
if (!locals.user || locals.user.role !== 'admin') {
throw error(403, 'Forbidden');
}
const { slug } = params;
let res: Response;
try {
res = await backendFetch(`/api/admin/dedup-chapters/${encodeURIComponent(slug)}`, {
method: 'POST'
});
} catch (e) {
log.error('admin/dedup-chapters', 'backend proxy error', { slug, err: String(e) });
throw error(502, 'Could not reach backend');
}
const data = await res.json().catch(() => ({}));
return json(data, { status: res.status });
};

View File

@@ -154,7 +154,7 @@ export const load: PageServerLoad = async ({ params, url, locals }) => {
error(res.status === 404 ? 404 : 502, res.status === 404 ? `Chapter ${n} not found` : 'Could not fetch chapter content');
}
const markdown = await res.text();
html = marked(markdown) as string;
html = await marked(markdown);
} catch (e) {
if (e instanceof Error && 'status' in e) throw e;
// Don't hard-fail — show empty content with error message