Compare commits

...

2 Commits

Author SHA1 Message Date
root
85492fae73 fix: replace speechSynthesis announce with real audio clip via /api/tts-announce
All checks were successful
Release / Test backend (push) Successful in 41s
Release / Check ui (push) Successful in 1m48s
Release / Docker (push) Successful in 13m20s
Release / Gitea Release (push) Successful in 40s
speechSynthesis is silently muted on iOS Safari and Chrome Android after
the audio session ends (onended), so chapter announcements never played.

Fix:
- Add GET /api/tts-announce backend endpoint: streams a short TTS clip
  for arbitrary text without MinIO caching (backend/internal/backend/)
- Add GET /api/announce SvelteKit proxy route (no paywall)
- Add announceNavigatePending/announcePendingSlug/announcePendingChapter
  to AudioStore
- Rewrite onended announce branch: sets audioStore.audioUrl to the
  announcement clip URL so the persistent <audio> element plays it;
  the next onended detects announceNavigatePending and navigates
- 10s safety timeout in case the clip fails to load/end
2026-04-08 11:57:04 +05:00
root
559b6234e7 fix: update otel-collector telemetry.metrics config for v0.103+ (address → readers) 2026-04-07 18:15:21 +05:00
6 changed files with 228 additions and 26 deletions

View File

@@ -9,6 +9,7 @@ package backend
// handleGetRanking, handleGetCover
// handleBookPreview, handleChapterText, handleChapterTextPreview, handleChapterMarkdown, handleReindex
// handleAudioGenerate, handleAudioStatus, handleAudioProxy, handleAudioStream
// handleTTSAnnounce
// handleVoices
// handlePresignChapter, handlePresignAudio, handlePresignVoiceSample
// handlePresignAvatarUpload, handlePresignAvatar
@@ -904,7 +905,119 @@ func (s *Server) handleAudioStream(w http.ResponseWriter, r *http.Request) {
// on its next poll as soon as the MinIO object is present.
}
// handleAudioPreview handles GET /api/audio-preview/{slug}/{n}.
// handleTTSAnnounce handles GET /api/tts-announce.
//
// Streams a short TTS clip for arbitrary text — used by the UI to announce
// the upcoming chapter number/title through the real <audio> element instead
// of the Web Speech API (which is silently muted on mobile after the audio
// session ends).
//
// Query params:
// - text — the text to synthesize (required, max 300 chars)
// - voice — voice ID (defaults to server default)
// - format — "mp3" or "wav" (default "mp3")
//
// No MinIO caching — announcement clips are tiny and ephemeral.
func (s *Server) handleTTSAnnounce(w http.ResponseWriter, r *http.Request) {
text := r.URL.Query().Get("text")
if text == "" {
jsonError(w, http.StatusBadRequest, "text is required")
return
}
if len(text) > 300 {
text = text[:300]
}
voice := r.URL.Query().Get("voice")
if voice == "" {
voice = s.cfg.DefaultVoice
}
format := r.URL.Query().Get("format")
if format != "wav" {
format = "mp3"
}
contentType := "audio/mpeg"
if format == "wav" {
contentType = "audio/wav"
}
var (
audioStream io.ReadCloser
err error
)
if format == "wav" {
if cfai.IsCFAIVoice(voice) {
if s.deps.CFAI == nil {
jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
return
}
audioStream, err = s.deps.CFAI.StreamAudioWAV(r.Context(), text, voice)
} else if pockettts.IsPocketTTSVoice(voice) {
if s.deps.PocketTTS == nil {
jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
return
}
audioStream, err = s.deps.PocketTTS.StreamAudioWAV(r.Context(), text, voice)
} else {
if s.deps.Kokoro == nil {
jsonError(w, http.StatusServiceUnavailable, "kokoro not configured")
return
}
audioStream, err = s.deps.Kokoro.StreamAudioWAV(r.Context(), text, voice)
}
} else {
if cfai.IsCFAIVoice(voice) {
if s.deps.CFAI == nil {
jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
return
}
audioStream, err = s.deps.CFAI.StreamAudioMP3(r.Context(), text, voice)
} else if pockettts.IsPocketTTSVoice(voice) {
if s.deps.PocketTTS == nil {
jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
return
}
audioStream, err = s.deps.PocketTTS.StreamAudioMP3(r.Context(), text, voice)
} else {
if s.deps.Kokoro == nil {
jsonError(w, http.StatusServiceUnavailable, "kokoro not configured")
return
}
audioStream, err = s.deps.Kokoro.StreamAudioMP3(r.Context(), text, voice)
}
}
if err != nil {
s.deps.Log.Error("handleTTSAnnounce: TTS stream failed", "voice", voice, "err", err)
jsonError(w, http.StatusInternalServerError, "tts stream failed")
return
}
defer audioStream.Close()
w.Header().Set("Content-Type", contentType)
w.Header().Set("Cache-Control", "no-store")
w.Header().Set("X-Accel-Buffering", "no")
w.WriteHeader(http.StatusOK)
flusher, canFlush := w.(http.Flusher)
buf := make([]byte, 32*1024)
for {
nr, readErr := audioStream.Read(buf)
if nr > 0 {
if _, writeErr := w.Write(buf[:nr]); writeErr != nil {
return
}
if canFlush {
flusher.Flush()
}
}
if readErr != nil {
break
}
}
}
//
// CF AI voices are batch-only and can take 1-2+ minutes to generate a full
// chapter. This endpoint generates only the FIRST chunk of text (~1 800 chars,

View File

@@ -180,6 +180,8 @@ func (s *Server) ListenAndServe(ctx context.Context) error {
// Streaming audio: serves from MinIO if cached, else streams live TTS
// while simultaneously uploading to MinIO for future requests.
mux.HandleFunc("GET /api/audio-stream/{slug}/{n}", s.handleAudioStream)
// TTS for arbitrary short text (chapter announcements) — no MinIO caching.
mux.HandleFunc("GET /api/tts-announce", s.handleTTSAnnounce)
// CF AI preview: generates only the first ~1 800-char chunk so the client
// can start playing immediately while the full audio is generated by the runner.
mux.HandleFunc("GET /api/audio-preview/{slug}/{n}", s.handleAudioPreview)

View File

@@ -55,7 +55,13 @@ service:
extensions: [health_check, pprof]
telemetry:
metrics:
address: 0.0.0.0:8888
# otel-collector v0.103+ replaced `address` with `readers`
readers:
- pull:
exporter:
prometheus:
host: 0.0.0.0
port: 8888
pipelines:
traces:
receivers: [otlp]

View File

@@ -160,6 +160,19 @@ class AudioStore {
return this.slug === slug && this.chapter === chapter;
}
// ── Announce-chapter navigation state ────────────────────────────────────
/**
* When true, the <audio> element is playing a short announcement clip
* (not chapter audio). The next `onended` should navigate to
* announcePendingSlug / announcePendingChapter instead of the normal
* auto-next flow.
*/
announceNavigatePending = $state(false);
/** Target book slug for the pending announce-then-navigate transition. */
announcePendingSlug = $state('');
/** Target chapter number for the pending announce-then-navigate transition. */
announcePendingChapter = $state(0);
/** Reset all next-chapter pre-fetch state. */
resetNextPrefetch() {
this.nextStatus = 'none';

View File

@@ -260,6 +260,11 @@
navigator.mediaSession.playbackState = audioStore.isPlaying ? 'playing' : 'paused';
});
// ── Announce-chapter safety timeout ──────────────────────────────────────
// Module-level so the onended handler can clear it if the clip completes
// before the timeout fires.
let announceTimeout = 0;
// ── Save audio time on pause/end (debounced 2s) ─────────────────────────
let audioTimeSaveTimer = 0;
function saveAudioTime() {
@@ -366,6 +371,22 @@
}}
onended={() => {
audioStore.isPlaying = false;
// ── If we just finished playing an announcement clip, navigate now ──
if (audioStore.announceNavigatePending) {
audioStore.announceNavigatePending = false;
clearTimeout(announceTimeout);
announceTimeout = 0;
const slug = audioStore.announcePendingSlug;
const chapter = audioStore.announcePendingChapter;
audioStore.announcePendingSlug = '';
audioStore.announcePendingChapter = 0;
goto(`/books/${slug}/chapters/${chapter}`).catch(() => {
audioStore.autoStartChapter = null;
});
return;
}
// Cancel any pending debounced save and reset the position to 0 for
// the chapter that just finished. Without this, the 2s debounce fires
// after navigation and saves currentTime≈duration, causing resume to
@@ -390,45 +411,53 @@
// Capture values synchronously before any async work — the AudioPlayer
// component will unmount during navigation, but we've already read what
// we need.
const targetSlug = audioStore.slug;
const targetSlug = audioStore.slug;
const targetChapter = audioStore.nextChapter;
// Store the target chapter number so only the newly-mounted AudioPlayer
// for that chapter reacts — not the outgoing chapter's component.
audioStore.autoStartChapter = targetChapter;
// Announce the upcoming chapter via Web Speech API if enabled.
const doNavigate = () => {
goto(`/books/${targetSlug}/chapters/${targetChapter}`).catch(() => {
audioStore.autoStartChapter = null;
});
};
if (audioStore.announceChapter && typeof window !== 'undefined' && 'speechSynthesis' in window) {
const nextInfo = audioStore.chapters.find((c) => c.number === targetChapter);
// Announce via a real audio clip so the audio session stays alive on
// iOS Safari / Chrome Android (speechSynthesis is silently muted after
// onended because the audio session has been released).
if (audioStore.announceChapter) {
const nextInfo = audioStore.chapters.find((c) => c.number === targetChapter);
const titlePart = nextInfo?.title ? ` ${nextInfo.title}` : '';
const text = `Chapter ${targetChapter}${titlePart}`;
window.speechSynthesis.cancel();
const utterance = new SpeechSynthesisUtterance(text);
const text = `Chapter ${targetChapter}${titlePart}`;
// Guard: ensure doNavigate can only fire once even if both
// onend and the timeout fire, or onerror fires after onend.
let navigated = false;
const safeNavigate = () => {
if (navigated) return;
navigated = true;
clearTimeout(announceTimeout);
doNavigate();
};
// Always request MP3 — universally supported and the backend
// auto-selects the right TTS engine from the voice ID.
const qs = new URLSearchParams({ text, voice: audioStore.voice, format: 'mp3' });
const announceUrl = `/api/announce?${qs}`;
// Hard fallback: if speechSynthesis silently drops the utterance
// (common on Chrome Android due to gesture policy, or when the
// browser is busy fetching the next chapter's audio), navigate
// anyway after a generous 8-second window.
const announceTimeout = setTimeout(safeNavigate, 8000);
// Store pending navigation target so the next onended (from the
// announcement clip) knows where to go.
audioStore.announcePendingSlug = targetSlug;
audioStore.announcePendingChapter = targetChapter;
audioStore.announceNavigatePending = true;
utterance.onend = safeNavigate;
utterance.onerror = safeNavigate;
window.speechSynthesis.speak(utterance);
// Safety timeout: if the clip never loads/ends (network issue,
// browser policy, unsupported codec), navigate anyway after 10s.
clearTimeout(announceTimeout);
announceTimeout = setTimeout(() => {
if (audioStore.announceNavigatePending) {
audioStore.announceNavigatePending = false;
audioStore.announcePendingSlug = '';
audioStore.announcePendingChapter = 0;
doNavigate();
}
}, 10_000) as unknown as number;
// Point the persistent <audio> element at the announcement clip.
// The $effect in the layout that watches audioStore.audioUrl will
// pick this up, set audioEl.src, and call play().
audioStore.audioUrl = announceUrl;
} else {
doNavigate();
}

View File

@@ -0,0 +1,39 @@
import { error } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { backendFetch } from '$lib/server/scraper';
/**
* GET /api/announce?text=...&voice=...&format=...
*
* Thin proxy to backend GET /api/tts-announce.
* No paywall — this is a short announcement clip (a few words), not chapter audio.
* No MinIO caching — the backend streams the clip directly.
*/
export const GET: RequestHandler = async ({ url }) => {
const text = url.searchParams.get('text') ?? '';
if (!text) error(400, 'text is required');
const qs = new URLSearchParams();
qs.set('text', text);
const voice = url.searchParams.get('voice');
if (voice) qs.set('voice', voice);
const format = url.searchParams.get('format') ?? 'mp3';
qs.set('format', format);
const backendRes = await backendFetch(`/api/tts-announce?${qs}`);
if (!backendRes.ok) {
error(backendRes.status as Parameters<typeof error>[0], 'TTS announce failed');
}
return new Response(backendRes.body, {
status: 200,
headers: {
'Content-Type': backendRes.headers.get('Content-Type') ?? 'audio/mpeg',
'Cache-Control': 'no-store',
'X-Accel-Buffering': 'no'
}
});
};