Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0e5eb84097 | ||
|
|
6ef82a1d12 | ||
|
|
7a418ee62b | ||
|
|
d4f35a4899 | ||
|
|
6559a8c015 |
@@ -569,6 +569,30 @@ func (s *Server) handleReindex(w http.ResponseWriter, r *http.Request) {
|
||||
writeJSON(w, 0, map[string]any{"slug": slug, "indexed": count})
|
||||
}
|
||||
|
||||
// handleDedupChapters handles POST /api/admin/dedup-chapters/{slug}.
|
||||
// Removes duplicate chapters_idx records for a book, keeping the latest record
|
||||
// per chapter number. Returns the number of duplicate records deleted.
|
||||
func (s *Server) handleDedupChapters(w http.ResponseWriter, r *http.Request) {
|
||||
slug := r.PathValue("slug")
|
||||
if slug == "" {
|
||||
jsonError(w, http.StatusBadRequest, "missing slug")
|
||||
return
|
||||
}
|
||||
|
||||
deleted, err := s.deps.BookWriter.DeduplicateChapters(r.Context(), slug)
|
||||
if err != nil {
|
||||
s.deps.Log.Error("dedup-chapters failed", "slug", slug, "err", err)
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]any{
|
||||
"error": err.Error(),
|
||||
"deleted": deleted,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
s.deps.Log.Info("dedup-chapters complete", "slug", slug, "deleted", deleted)
|
||||
writeJSON(w, 0, map[string]any{"slug": slug, "deleted": deleted})
|
||||
}
|
||||
|
||||
// ── Audio ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
// handleAudioGenerate handles POST /api/audio/{slug}/{n}.
|
||||
|
||||
@@ -204,6 +204,9 @@ func (s *Server) ListenAndServe(ctx context.Context) error {
|
||||
mux.HandleFunc("POST /api/admin/text-gen/description", s.handleAdminTextGenDescription)
|
||||
mux.HandleFunc("POST /api/admin/text-gen/description/apply", s.handleAdminTextGenApplyDescription)
|
||||
|
||||
// Admin data repair endpoints
|
||||
mux.HandleFunc("POST /api/admin/dedup-chapters/{slug}", s.handleDedupChapters)
|
||||
|
||||
// Voices list
|
||||
mux.HandleFunc("GET /api/voices", s.handleVoices)
|
||||
|
||||
|
||||
@@ -35,6 +35,11 @@ type BookWriter interface {
|
||||
|
||||
// ChapterExists returns true if the markdown object for ref already exists.
|
||||
ChapterExists(ctx context.Context, slug string, ref domain.ChapterRef) bool
|
||||
|
||||
// DeduplicateChapters removes duplicate chapters_idx records for slug,
|
||||
// keeping only one record per chapter number (the one with the latest
|
||||
// updated timestamp). Returns the number of duplicate records deleted.
|
||||
DeduplicateChapters(ctx context.Context, slug string) (int, error)
|
||||
}
|
||||
|
||||
// BookReader is the read side used by the backend to serve content.
|
||||
|
||||
@@ -39,8 +39,9 @@ func (m *mockStore) ReadChapter(_ context.Context, _ string, _ int) (string, err
|
||||
func (m *mockStore) ListChapters(_ context.Context, _ string) ([]domain.ChapterInfo, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) CountChapters(_ context.Context, _ string) int { return 0 }
|
||||
func (m *mockStore) ReindexChapters(_ context.Context, _ string) (int, error) { return 0, nil }
|
||||
func (m *mockStore) CountChapters(_ context.Context, _ string) int { return 0 }
|
||||
func (m *mockStore) ReindexChapters(_ context.Context, _ string) (int, error) { return 0, nil }
|
||||
func (m *mockStore) DeduplicateChapters(_ context.Context, _ string) (int, error) { return 0, nil }
|
||||
|
||||
// RankingStore
|
||||
func (m *mockStore) WriteRankingItem(_ context.Context, _ domain.RankingItem) error { return nil }
|
||||
@@ -52,10 +53,10 @@ func (m *mockStore) RankingFreshEnough(_ context.Context, _ time.Duration) (bool
|
||||
}
|
||||
|
||||
// AudioStore
|
||||
func (m *mockStore) AudioObjectKey(_ string, _ int, _ string) string { return "" }
|
||||
func (m *mockStore) AudioObjectKeyExt(_ string, _ int, _, _ string) string { return "" }
|
||||
func (m *mockStore) AudioExists(_ context.Context, _ string) bool { return false }
|
||||
func (m *mockStore) PutAudio(_ context.Context, _ string, _ []byte) error { return nil }
|
||||
func (m *mockStore) AudioObjectKey(_ string, _ int, _ string) string { return "" }
|
||||
func (m *mockStore) AudioObjectKeyExt(_ string, _ int, _, _ string) string { return "" }
|
||||
func (m *mockStore) AudioExists(_ context.Context, _ string) bool { return false }
|
||||
func (m *mockStore) PutAudio(_ context.Context, _ string, _ []byte) error { return nil }
|
||||
func (m *mockStore) PutAudioStream(_ context.Context, _ string, _ io.Reader, _ int64, _ string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -17,6 +17,10 @@
|
||||
// response. There is no 100-second Cloudflare proxy timeout because we are
|
||||
// calling the Cloudflare API directly, not routing through a Cloudflare-proxied
|
||||
// homelab tunnel.
|
||||
//
|
||||
// The aura-2-en model enforces a hard 2 000-character limit per request.
|
||||
// GenerateAudio transparently splits longer texts into sentence-boundary chunks
|
||||
// and concatenates the resulting MP3 frames.
|
||||
package cfai
|
||||
|
||||
import (
|
||||
@@ -145,6 +149,8 @@ func New(accountID, apiToken, model string) Client {
|
||||
}
|
||||
|
||||
// GenerateAudio calls the Cloudflare Workers AI TTS endpoint and returns MP3 bytes.
|
||||
// The aura-2-en model rejects inputs longer than 2 000 characters, so this method
|
||||
// splits the text into sentence-bounded chunks and concatenates the MP3 responses.
|
||||
func (c *httpClient) GenerateAudio(ctx context.Context, text, voice string) ([]byte, error) {
|
||||
if text == "" {
|
||||
return nil, fmt.Errorf("cfai: empty text")
|
||||
@@ -154,6 +160,20 @@ func (c *httpClient) GenerateAudio(ctx context.Context, text, voice string) ([]b
|
||||
speaker = "luna"
|
||||
}
|
||||
|
||||
chunks := splitText(text, 1800) // stay comfortably under the 2 000-char limit
|
||||
var combined []byte
|
||||
for _, chunk := range chunks {
|
||||
part, err := c.generateChunk(ctx, chunk, speaker)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
combined = append(combined, part...)
|
||||
}
|
||||
return combined, nil
|
||||
}
|
||||
|
||||
// generateChunk sends a single ≤2 000-character request and returns MP3 bytes.
|
||||
func (c *httpClient) generateChunk(ctx context.Context, text, speaker string) ([]byte, error) {
|
||||
body, err := json.Marshal(map[string]any{
|
||||
"text": text,
|
||||
"speaker": speaker,
|
||||
@@ -189,6 +209,87 @@ func (c *httpClient) GenerateAudio(ctx context.Context, text, voice string) ([]b
|
||||
return mp3, nil
|
||||
}
|
||||
|
||||
// splitText splits src into chunks of at most maxChars characters each.
|
||||
// It tries to break at paragraph boundaries first, then at sentence-ending
|
||||
// punctuation (. ! ?), and falls back to the nearest space.
|
||||
func splitText(src string, maxChars int) []string {
|
||||
if len(src) <= maxChars {
|
||||
return []string{src}
|
||||
}
|
||||
|
||||
var chunks []string
|
||||
remaining := src
|
||||
|
||||
for len(remaining) > 0 {
|
||||
if len(remaining) <= maxChars {
|
||||
chunks = append(chunks, strings.TrimSpace(remaining))
|
||||
break
|
||||
}
|
||||
|
||||
// Search window: the first maxChars bytes of remaining.
|
||||
// Use byte length here because the API limit is in bytes/chars for ASCII;
|
||||
// for safety we operate on rune-aware slices.
|
||||
window := remaining
|
||||
if len(window) > maxChars {
|
||||
// Trim to maxChars runes (not bytes), ensuring we don't split a multi-byte char.
|
||||
window = runeSlice(remaining, maxChars)
|
||||
}
|
||||
|
||||
cut := -1
|
||||
|
||||
// 1. Prefer paragraph break (\n\n or \n).
|
||||
if i := strings.LastIndex(window, "\n\n"); i > 0 {
|
||||
cut = i + 2
|
||||
} else if i := strings.LastIndex(window, "\n"); i > 0 {
|
||||
cut = i + 1
|
||||
}
|
||||
|
||||
// 2. Fall back to sentence-ending punctuation followed by a space.
|
||||
if cut < 0 {
|
||||
for _, punct := range []string{". ", "! ", "? ", ".\n", "!\n", "?\n"} {
|
||||
if i := strings.LastIndex(window, punct); i > 0 {
|
||||
candidate := i + len(punct)
|
||||
if cut < 0 || candidate > cut {
|
||||
cut = candidate
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Last resort: nearest space.
|
||||
if cut < 0 {
|
||||
if i := strings.LastIndex(window, " "); i > 0 {
|
||||
cut = i + 1
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Hard cut at maxChars runes if no boundary found.
|
||||
if cut < 0 {
|
||||
cut = len(window)
|
||||
}
|
||||
|
||||
chunk := strings.TrimSpace(remaining[:cut])
|
||||
if chunk != "" {
|
||||
chunks = append(chunks, chunk)
|
||||
}
|
||||
remaining = remaining[cut:]
|
||||
}
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
// runeSlice returns the first n runes of s as a string.
|
||||
func runeSlice(s string, n int) string {
|
||||
count := 0
|
||||
for i := range s {
|
||||
if count == n {
|
||||
return s[:i]
|
||||
}
|
||||
count++
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// StreamAudioMP3 generates audio and wraps the MP3 bytes as an io.ReadCloser.
|
||||
func (c *httpClient) StreamAudioMP3(ctx context.Context, text, voice string) (io.ReadCloser, error) {
|
||||
mp3, err := c.GenerateAudio(ctx, text, voice)
|
||||
|
||||
@@ -89,6 +89,8 @@ func (s *stubStore) WriteChapterRefs(_ context.Context, _ string, _ []domain.Cha
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *stubStore) DeduplicateChapters(_ context.Context, _ string) (int, error) { return 0, nil }
|
||||
|
||||
func (s *stubStore) ChapterExists(_ context.Context, slug string, ref domain.ChapterRef) bool {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
@@ -94,6 +94,10 @@ func (s *stubBookWriter) ChapterExists(_ context.Context, _ string, _ domain.Cha
|
||||
return false
|
||||
}
|
||||
|
||||
func (s *stubBookWriter) DeduplicateChapters(_ context.Context, _ string) (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// stubBookReader satisfies bookstore.BookReader — returns a single chapter.
|
||||
type stubBookReader struct {
|
||||
text string
|
||||
|
||||
@@ -130,7 +130,16 @@ func (s *Store) upsertChapterIdx(ctx context.Context, slug string, ref domain.Ch
|
||||
return err
|
||||
}
|
||||
if len(items) == 0 {
|
||||
return s.pb.post(ctx, "/api/collections/chapters_idx/records", payload, nil)
|
||||
postErr := s.pb.post(ctx, "/api/collections/chapters_idx/records", payload, nil)
|
||||
if postErr == nil {
|
||||
return nil
|
||||
}
|
||||
// POST failed — a concurrent writer may have inserted the same slug+number.
|
||||
// Re-fetch and fall through to PATCH (mirrors WriteMetadata retry pattern).
|
||||
items, err = s.pb.listAll(ctx, "chapters_idx", filter, "")
|
||||
if err != nil || len(items) == 0 {
|
||||
return postErr // original POST error is more informative
|
||||
}
|
||||
}
|
||||
var rec struct {
|
||||
ID string `json:"id"`
|
||||
@@ -139,6 +148,59 @@ func (s *Store) upsertChapterIdx(ctx context.Context, slug string, ref domain.Ch
|
||||
return s.pb.patch(ctx, fmt.Sprintf("/api/collections/chapters_idx/records/%s", rec.ID), payload)
|
||||
}
|
||||
|
||||
// DeduplicateChapters removes duplicate chapters_idx records for slug.
|
||||
// For each chapter number that has more than one record, it keeps the record
|
||||
// with the latest "updated" timestamp and deletes the rest.
|
||||
// Returns the number of records deleted.
|
||||
func (s *Store) DeduplicateChapters(ctx context.Context, slug string) (int, error) {
|
||||
filter := fmt.Sprintf(`slug=%q`, slug)
|
||||
items, err := s.pb.listAll(ctx, "chapters_idx", filter, "number")
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("DeduplicateChapters: list: %w", err)
|
||||
}
|
||||
|
||||
type record struct {
|
||||
ID string `json:"id"`
|
||||
Number int `json:"number"`
|
||||
Updated string `json:"updated"`
|
||||
}
|
||||
|
||||
// Group records by chapter number.
|
||||
byNumber := make(map[int][]record)
|
||||
for _, raw := range items {
|
||||
var rec record
|
||||
if err := json.Unmarshal(raw, &rec); err != nil || rec.ID == "" {
|
||||
continue
|
||||
}
|
||||
byNumber[rec.Number] = append(byNumber[rec.Number], rec)
|
||||
}
|
||||
|
||||
deleted := 0
|
||||
for _, recs := range byNumber {
|
||||
if len(recs) <= 1 {
|
||||
continue
|
||||
}
|
||||
// Keep the record with the latest Updated timestamp; delete the rest.
|
||||
keep := 0
|
||||
for i := 1; i < len(recs); i++ {
|
||||
if recs[i].Updated > recs[keep].Updated {
|
||||
keep = i
|
||||
}
|
||||
}
|
||||
for i, rec := range recs {
|
||||
if i == keep {
|
||||
continue
|
||||
}
|
||||
if delErr := s.pb.delete(ctx, fmt.Sprintf("/api/collections/chapters_idx/records/%s", rec.ID)); delErr != nil {
|
||||
s.log.Warn("DeduplicateChapters: delete failed", "slug", slug, "number", rec.Number, "id", rec.ID, "err", delErr)
|
||||
continue
|
||||
}
|
||||
deleted++
|
||||
}
|
||||
}
|
||||
return deleted, nil
|
||||
}
|
||||
|
||||
// ── BookReader ────────────────────────────────────────────────────────────────
|
||||
|
||||
type pbBook struct {
|
||||
|
||||
33
ui/src/routes/api/admin/dedup-chapters/[slug]/+server.ts
Normal file
33
ui/src/routes/api/admin/dedup-chapters/[slug]/+server.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* POST /api/admin/dedup-chapters/[slug]
|
||||
*
|
||||
* Admin-only proxy to the Go backend's dedup endpoint.
|
||||
* Removes duplicate chapters_idx records for a book, keeping the latest
|
||||
* record per chapter number. Returns { slug, deleted }.
|
||||
*/
|
||||
|
||||
import { json, error } from '@sveltejs/kit';
|
||||
import type { RequestHandler } from './$types';
|
||||
import { log } from '$lib/server/logger';
|
||||
import { backendFetch } from '$lib/server/scraper';
|
||||
|
||||
export const POST: RequestHandler = async ({ params, locals }) => {
|
||||
if (!locals.user || locals.user.role !== 'admin') {
|
||||
throw error(403, 'Forbidden');
|
||||
}
|
||||
|
||||
const { slug } = params;
|
||||
|
||||
let res: Response;
|
||||
try {
|
||||
res = await backendFetch(`/api/admin/dedup-chapters/${encodeURIComponent(slug)}`, {
|
||||
method: 'POST'
|
||||
});
|
||||
} catch (e) {
|
||||
log.error('admin/dedup-chapters', 'backend proxy error', { slug, err: String(e) });
|
||||
throw error(502, 'Could not reach backend');
|
||||
}
|
||||
|
||||
const data = await res.json().catch(() => ({}));
|
||||
return json(data, { status: res.status });
|
||||
};
|
||||
@@ -154,7 +154,7 @@ export const load: PageServerLoad = async ({ params, url, locals }) => {
|
||||
error(res.status === 404 ? 404 : 502, res.status === 404 ? `Chapter ${n} not found` : 'Could not fetch chapter content');
|
||||
}
|
||||
const markdown = await res.text();
|
||||
html = marked(markdown) as string;
|
||||
html = await marked(markdown);
|
||||
} catch (e) {
|
||||
if (e instanceof Error && 'status' in e) throw e;
|
||||
// Don't hard-fail — show empty content with error message
|
||||
|
||||
Reference in New Issue
Block a user