fix: replace speechSynthesis announce with real audio clip via /api/tts-announce

speechSynthesis is silently muted on iOS Safari and Chrome Android after the audio session ends (onended), so chapter announcements never played. Fix: - Add GET /api/tts-announce backend endpoint: streams a short TTS clip for arbitrary text without MinIO caching (backend/internal/backend/) - Add GET /api/announce SvelteKit proxy route (no paywall) - Add announceNavigatePending/announcePendingSlug/announcePendingChapter to AudioStore - Rewrite onended announce branch: sets audioStore.audioUrl to the announcement clip URL so the persistent <audio> element plays it; the next onended detects announceNavigatePending and navigates - 10s safety timeout in case the clip fails to load/end
fix: update otel-collector telemetry.metrics config for v0.103+ (address → readers)
2026-04-08 11:57:04 +05:00 · 2026-04-07 18:15:21 +05:00
6 changed files with 228 additions and 26 deletions
--- a/backend/internal/backend/handlers.go
+++ b/backend/internal/backend/handlers.go
@@ -9,6 +9,7 @@ package backend
 //   handleGetRanking, handleGetCover
 //   handleBookPreview, handleChapterText, handleChapterTextPreview, handleChapterMarkdown, handleReindex
 //   handleAudioGenerate, handleAudioStatus, handleAudioProxy, handleAudioStream
+//   handleTTSAnnounce
 //   handleVoices
 //   handlePresignChapter, handlePresignAudio, handlePresignVoiceSample
 //   handlePresignAvatarUpload, handlePresignAvatar
@@ -904,7 +905,119 @@ func (s *Server) handleAudioStream(w http.ResponseWriter, r *http.Request) {
 	// on its next poll as soon as the MinIO object is present.
 }

-// handleAudioPreview handles GET /api/audio-preview/{slug}/{n}.
+// handleTTSAnnounce handles GET /api/tts-announce.
+//
+// Streams a short TTS clip for arbitrary text — used by the UI to announce
+// the upcoming chapter number/title through the real <audio> element instead
+// of the Web Speech API (which is silently muted on mobile after the audio
+// session ends).
+//
+// Query params:
+//   - text  — the text to synthesize (required, max 300 chars)
+//   - voice — voice ID (defaults to server default)
+//   - format — "mp3" or "wav" (default "mp3")
+//
+// No MinIO caching — announcement clips are tiny and ephemeral.
+func (s *Server) handleTTSAnnounce(w http.ResponseWriter, r *http.Request) {
+	text := r.URL.Query().Get("text")
+	if text == "" {
+		jsonError(w, http.StatusBadRequest, "text is required")
+		return
+	}
+	if len(text) > 300 {
+		text = text[:300]
+	}
+
+	voice := r.URL.Query().Get("voice")
+	if voice == "" {
+		voice = s.cfg.DefaultVoice
+	}
+
+	format := r.URL.Query().Get("format")
+	if format != "wav" {
+		format = "mp3"
+	}
+
+	contentType := "audio/mpeg"
+	if format == "wav" {
+		contentType = "audio/wav"
+	}
+
+	var (
+		audioStream io.ReadCloser
+		err         error
+	)
+
+	if format == "wav" {
+		if cfai.IsCFAIVoice(voice) {
+			if s.deps.CFAI == nil {
+				jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
+				return
+			}
+			audioStream, err = s.deps.CFAI.StreamAudioWAV(r.Context(), text, voice)
+		} else if pockettts.IsPocketTTSVoice(voice) {
+			if s.deps.PocketTTS == nil {
+				jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
+				return
+			}
+			audioStream, err = s.deps.PocketTTS.StreamAudioWAV(r.Context(), text, voice)
+		} else {
+			if s.deps.Kokoro == nil {
+				jsonError(w, http.StatusServiceUnavailable, "kokoro not configured")
+				return
+			}
+			audioStream, err = s.deps.Kokoro.StreamAudioWAV(r.Context(), text, voice)
+		}
+	} else {
+		if cfai.IsCFAIVoice(voice) {
+			if s.deps.CFAI == nil {
+				jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
+				return
+			}
+			audioStream, err = s.deps.CFAI.StreamAudioMP3(r.Context(), text, voice)
+		} else if pockettts.IsPocketTTSVoice(voice) {
+			if s.deps.PocketTTS == nil {
+				jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
+				return
+			}
+			audioStream, err = s.deps.PocketTTS.StreamAudioMP3(r.Context(), text, voice)
+		} else {
+			if s.deps.Kokoro == nil {
+				jsonError(w, http.StatusServiceUnavailable, "kokoro not configured")
+				return
+			}
+			audioStream, err = s.deps.Kokoro.StreamAudioMP3(r.Context(), text, voice)
+		}
+	}
+	if err != nil {
+		s.deps.Log.Error("handleTTSAnnounce: TTS stream failed", "voice", voice, "err", err)
+		jsonError(w, http.StatusInternalServerError, "tts stream failed")
+		return
+	}
+	defer audioStream.Close()
+
+	w.Header().Set("Content-Type", contentType)
+	w.Header().Set("Cache-Control", "no-store")
+	w.Header().Set("X-Accel-Buffering", "no")
+	w.WriteHeader(http.StatusOK)
+
+	flusher, canFlush := w.(http.Flusher)
+	buf := make([]byte, 32*1024)
+	for {
+		nr, readErr := audioStream.Read(buf)
+		if nr > 0 {
+			if _, writeErr := w.Write(buf[:nr]); writeErr != nil {
+				return
+			}
+			if canFlush {
+				flusher.Flush()
+			}
+		}
+		if readErr != nil {
+			break
+		}
+	}
+}
 //
 // CF AI voices are batch-only and can take 1-2+ minutes to generate a full
 // chapter. This endpoint generates only the FIRST chunk of text (~1 800 chars,
--- a/backend/internal/backend/server.go
+++ b/backend/internal/backend/server.go
@@ -180,6 +180,8 @@ func (s *Server) ListenAndServe(ctx context.Context) error {
 	// Streaming audio: serves from MinIO if cached, else streams live TTS
 	// while simultaneously uploading to MinIO for future requests.
 	mux.HandleFunc("GET /api/audio-stream/{slug}/{n}", s.handleAudioStream)
+	// TTS for arbitrary short text (chapter announcements) — no MinIO caching.
+	mux.HandleFunc("GET /api/tts-announce", s.handleTTSAnnounce)
 	// CF AI preview: generates only the first ~1 800-char chunk so the client
 	// can start playing immediately while the full audio is generated by the runner.
 	mux.HandleFunc("GET /api/audio-preview/{slug}/{n}", s.handleAudioPreview)
--- a/homelab/otel/collector.yaml
+++ b/homelab/otel/collector.yaml
@@ -55,7 +55,13 @@ service:
  extensions: [health_check, pprof]
  telemetry:
    metrics:
-      address: 0.0.0.0:8888
+      # otel-collector v0.103+ replaced `address` with `readers`
+      readers:
+        - pull:
+            exporter:
+              prometheus:
+                host: 0.0.0.0
+                port: 8888
  pipelines:
    traces:
      receivers: [otlp]
--- a/ui/src/lib/audio.svelte.ts
+++ b/ui/src/lib/audio.svelte.ts
@@ -160,6 +160,19 @@ class AudioStore {
 		return this.slug === slug && this.chapter === chapter;
 	}

+	// ── Announce-chapter navigation state ────────────────────────────────────
+	/**
+	 * When true, the <audio> element is playing a short announcement clip
+	 * (not chapter audio).  The next `onended` should navigate to
+	 * announcePendingSlug / announcePendingChapter instead of the normal
+	 * auto-next flow.
+	 */
+	announceNavigatePending = $state(false);
+	/** Target book slug for the pending announce-then-navigate transition. */
+	announcePendingSlug    = $state('');
+	/** Target chapter number for the pending announce-then-navigate transition. */
+	announcePendingChapter = $state(0);
+
 	/** Reset all next-chapter pre-fetch state. */
 	resetNextPrefetch() {
 		this.nextStatus = 'none';
--- a/ui/src/routes/+layout.svelte
+++ b/ui/src/routes/+layout.svelte
@@ -260,6 +260,11 @@
 		navigator.mediaSession.playbackState = audioStore.isPlaying ? 'playing' : 'paused';
 	});

+	// ── Announce-chapter safety timeout ──────────────────────────────────────
+	// Module-level so the onended handler can clear it if the clip completes
+	// before the timeout fires.
+	let announceTimeout = 0;
+
 	// ── Save audio time on pause/end (debounced 2s) ─────────────────────────
 	let audioTimeSaveTimer = 0;
 	function saveAudioTime() {
@@ -366,6 +371,22 @@
 	}}
 	onended={() => {
 		audioStore.isPlaying = false;
+
+		// ── If we just finished playing an announcement clip, navigate now ──
+		if (audioStore.announceNavigatePending) {
+			audioStore.announceNavigatePending = false;
+			clearTimeout(announceTimeout);
+			announceTimeout = 0;
+			const slug    = audioStore.announcePendingSlug;
+			const chapter = audioStore.announcePendingChapter;
+			audioStore.announcePendingSlug    = '';
+			audioStore.announcePendingChapter = 0;
+			goto(`/books/${slug}/chapters/${chapter}`).catch(() => {
+				audioStore.autoStartChapter = null;
+			});
+			return;
+		}
+
 		// Cancel any pending debounced save and reset the position to 0 for
 		// the chapter that just finished. Without this, the 2s debounce fires
 		// after navigation and saves currentTime≈duration, causing resume to
@@ -390,45 +411,53 @@
 			// Capture values synchronously before any async work — the AudioPlayer
 			// component will unmount during navigation, but we've already read what
 			// we need.
-			const targetSlug = audioStore.slug;
+			const targetSlug    = audioStore.slug;
 			const targetChapter = audioStore.nextChapter;
 			// Store the target chapter number so only the newly-mounted AudioPlayer
 			// for that chapter reacts — not the outgoing chapter's component.
 			audioStore.autoStartChapter = targetChapter;

-			// Announce the upcoming chapter via Web Speech API if enabled.
 			const doNavigate = () => {
 				goto(`/books/${targetSlug}/chapters/${targetChapter}`).catch(() => {
 					audioStore.autoStartChapter = null;
 				});
 			};

-			if (audioStore.announceChapter && typeof window !== 'undefined' && 'speechSynthesis' in window) {
-				const nextInfo = audioStore.chapters.find((c) => c.number === targetChapter);
+			// Announce via a real audio clip so the audio session stays alive on
+			// iOS Safari / Chrome Android (speechSynthesis is silently muted after
+			// onended because the audio session has been released).
+			if (audioStore.announceChapter) {
+				const nextInfo  = audioStore.chapters.find((c) => c.number === targetChapter);
 				const titlePart = nextInfo?.title ? ` — ${nextInfo.title}` : '';
-				const text = `Chapter ${targetChapter}${titlePart}`;
-				window.speechSynthesis.cancel();
-				const utterance = new SpeechSynthesisUtterance(text);
+				const text      = `Chapter ${targetChapter}${titlePart}`;

-				// Guard: ensure doNavigate can only fire once even if both
-				// onend and the timeout fire, or onerror fires after onend.
-				let navigated = false;
-				const safeNavigate = () => {
-					if (navigated) return;
-					navigated = true;
-					clearTimeout(announceTimeout);
-					doNavigate();
-				};
+				// Always request MP3 — universally supported and the backend
+				// auto-selects the right TTS engine from the voice ID.
+				const qs       = new URLSearchParams({ text, voice: audioStore.voice, format: 'mp3' });
+				const announceUrl = `/api/announce?${qs}`;

-				// Hard fallback: if speechSynthesis silently drops the utterance
-				// (common on Chrome Android due to gesture policy, or when the
-				// browser is busy fetching the next chapter's audio), navigate
-				// anyway after a generous 8-second window.
-				const announceTimeout = setTimeout(safeNavigate, 8000);
+				// Store pending navigation target so the next onended (from the
+				// announcement clip) knows where to go.
+				audioStore.announcePendingSlug    = targetSlug;
+				audioStore.announcePendingChapter = targetChapter;
+				audioStore.announceNavigatePending = true;

-				utterance.onend = safeNavigate;
-				utterance.onerror = safeNavigate;
-				window.speechSynthesis.speak(utterance);
+				// Safety timeout: if the clip never loads/ends (network issue,
+				// browser policy, unsupported codec), navigate anyway after 10s.
+				clearTimeout(announceTimeout);
+				announceTimeout = setTimeout(() => {
+					if (audioStore.announceNavigatePending) {
+						audioStore.announceNavigatePending = false;
+						audioStore.announcePendingSlug    = '';
+						audioStore.announcePendingChapter = 0;
+						doNavigate();
+					}
+				}, 10_000) as unknown as number;
+
+				// Point the persistent <audio> element at the announcement clip.
+				// The $effect in the layout that watches audioStore.audioUrl will
+				// pick this up, set audioEl.src, and call play().
+				audioStore.audioUrl = announceUrl;
 			} else {
 				doNavigate();
 			}
--- a/ui/src/routes/api/announce/+server.ts
+++ b/ui/src/routes/api/announce/+server.ts
@@ -0,0 +1,39 @@
+import { error } from '@sveltejs/kit';
+import type { RequestHandler } from './$types';
+import { backendFetch } from '$lib/server/scraper';
+
+/**
+ * GET /api/announce?text=...&voice=...&format=...
+ *
+ * Thin proxy to backend GET /api/tts-announce.
+ * No paywall — this is a short announcement clip (a few words), not chapter audio.
+ * No MinIO caching — the backend streams the clip directly.
+ */
+export const GET: RequestHandler = async ({ url }) => {
+	const text = url.searchParams.get('text') ?? '';
+	if (!text) error(400, 'text is required');
+
+	const qs = new URLSearchParams();
+	qs.set('text', text);
+
+	const voice = url.searchParams.get('voice');
+	if (voice) qs.set('voice', voice);
+
+	const format = url.searchParams.get('format') ?? 'mp3';
+	qs.set('format', format);
+
+	const backendRes = await backendFetch(`/api/tts-announce?${qs}`);
+
+	if (!backendRes.ok) {
+		error(backendRes.status as Parameters<typeof error>[0], 'TTS announce failed');
+	}
+
+	return new Response(backendRes.body, {
+		status: 200,
+		headers: {
+			'Content-Type': backendRes.headers.get('Content-Type') ?? 'audio/mpeg',
+			'Cache-Control': 'no-store',
+			'X-Accel-Buffering': 'no'
+		}
+	});
+};