Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
85492fae73 | ||
|
|
559b6234e7 |
@@ -9,6 +9,7 @@ package backend
|
||||
// handleGetRanking, handleGetCover
|
||||
// handleBookPreview, handleChapterText, handleChapterTextPreview, handleChapterMarkdown, handleReindex
|
||||
// handleAudioGenerate, handleAudioStatus, handleAudioProxy, handleAudioStream
|
||||
// handleTTSAnnounce
|
||||
// handleVoices
|
||||
// handlePresignChapter, handlePresignAudio, handlePresignVoiceSample
|
||||
// handlePresignAvatarUpload, handlePresignAvatar
|
||||
@@ -904,7 +905,119 @@ func (s *Server) handleAudioStream(w http.ResponseWriter, r *http.Request) {
|
||||
// on its next poll as soon as the MinIO object is present.
|
||||
}
|
||||
|
||||
// handleAudioPreview handles GET /api/audio-preview/{slug}/{n}.
|
||||
// handleTTSAnnounce handles GET /api/tts-announce.
|
||||
//
|
||||
// Streams a short TTS clip for arbitrary text — used by the UI to announce
|
||||
// the upcoming chapter number/title through the real <audio> element instead
|
||||
// of the Web Speech API (which is silently muted on mobile after the audio
|
||||
// session ends).
|
||||
//
|
||||
// Query params:
|
||||
// - text — the text to synthesize (required, max 300 chars)
|
||||
// - voice — voice ID (defaults to server default)
|
||||
// - format — "mp3" or "wav" (default "mp3")
|
||||
//
|
||||
// No MinIO caching — announcement clips are tiny and ephemeral.
|
||||
func (s *Server) handleTTSAnnounce(w http.ResponseWriter, r *http.Request) {
|
||||
text := r.URL.Query().Get("text")
|
||||
if text == "" {
|
||||
jsonError(w, http.StatusBadRequest, "text is required")
|
||||
return
|
||||
}
|
||||
if len(text) > 300 {
|
||||
text = text[:300]
|
||||
}
|
||||
|
||||
voice := r.URL.Query().Get("voice")
|
||||
if voice == "" {
|
||||
voice = s.cfg.DefaultVoice
|
||||
}
|
||||
|
||||
format := r.URL.Query().Get("format")
|
||||
if format != "wav" {
|
||||
format = "mp3"
|
||||
}
|
||||
|
||||
contentType := "audio/mpeg"
|
||||
if format == "wav" {
|
||||
contentType = "audio/wav"
|
||||
}
|
||||
|
||||
var (
|
||||
audioStream io.ReadCloser
|
||||
err error
|
||||
)
|
||||
|
||||
if format == "wav" {
|
||||
if cfai.IsCFAIVoice(voice) {
|
||||
if s.deps.CFAI == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
|
||||
return
|
||||
}
|
||||
audioStream, err = s.deps.CFAI.StreamAudioWAV(r.Context(), text, voice)
|
||||
} else if pockettts.IsPocketTTSVoice(voice) {
|
||||
if s.deps.PocketTTS == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
|
||||
return
|
||||
}
|
||||
audioStream, err = s.deps.PocketTTS.StreamAudioWAV(r.Context(), text, voice)
|
||||
} else {
|
||||
if s.deps.Kokoro == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "kokoro not configured")
|
||||
return
|
||||
}
|
||||
audioStream, err = s.deps.Kokoro.StreamAudioWAV(r.Context(), text, voice)
|
||||
}
|
||||
} else {
|
||||
if cfai.IsCFAIVoice(voice) {
|
||||
if s.deps.CFAI == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
|
||||
return
|
||||
}
|
||||
audioStream, err = s.deps.CFAI.StreamAudioMP3(r.Context(), text, voice)
|
||||
} else if pockettts.IsPocketTTSVoice(voice) {
|
||||
if s.deps.PocketTTS == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
|
||||
return
|
||||
}
|
||||
audioStream, err = s.deps.PocketTTS.StreamAudioMP3(r.Context(), text, voice)
|
||||
} else {
|
||||
if s.deps.Kokoro == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "kokoro not configured")
|
||||
return
|
||||
}
|
||||
audioStream, err = s.deps.Kokoro.StreamAudioMP3(r.Context(), text, voice)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
s.deps.Log.Error("handleTTSAnnounce: TTS stream failed", "voice", voice, "err", err)
|
||||
jsonError(w, http.StatusInternalServerError, "tts stream failed")
|
||||
return
|
||||
}
|
||||
defer audioStream.Close()
|
||||
|
||||
w.Header().Set("Content-Type", contentType)
|
||||
w.Header().Set("Cache-Control", "no-store")
|
||||
w.Header().Set("X-Accel-Buffering", "no")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
|
||||
flusher, canFlush := w.(http.Flusher)
|
||||
buf := make([]byte, 32*1024)
|
||||
for {
|
||||
nr, readErr := audioStream.Read(buf)
|
||||
if nr > 0 {
|
||||
if _, writeErr := w.Write(buf[:nr]); writeErr != nil {
|
||||
return
|
||||
}
|
||||
if canFlush {
|
||||
flusher.Flush()
|
||||
}
|
||||
}
|
||||
if readErr != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
//
|
||||
// CF AI voices are batch-only and can take 1-2+ minutes to generate a full
|
||||
// chapter. This endpoint generates only the FIRST chunk of text (~1 800 chars,
|
||||
|
||||
@@ -180,6 +180,8 @@ func (s *Server) ListenAndServe(ctx context.Context) error {
|
||||
// Streaming audio: serves from MinIO if cached, else streams live TTS
|
||||
// while simultaneously uploading to MinIO for future requests.
|
||||
mux.HandleFunc("GET /api/audio-stream/{slug}/{n}", s.handleAudioStream)
|
||||
// TTS for arbitrary short text (chapter announcements) — no MinIO caching.
|
||||
mux.HandleFunc("GET /api/tts-announce", s.handleTTSAnnounce)
|
||||
// CF AI preview: generates only the first ~1 800-char chunk so the client
|
||||
// can start playing immediately while the full audio is generated by the runner.
|
||||
mux.HandleFunc("GET /api/audio-preview/{slug}/{n}", s.handleAudioPreview)
|
||||
|
||||
@@ -55,7 +55,13 @@ service:
|
||||
extensions: [health_check, pprof]
|
||||
telemetry:
|
||||
metrics:
|
||||
address: 0.0.0.0:8888
|
||||
# otel-collector v0.103+ replaced `address` with `readers`
|
||||
readers:
|
||||
- pull:
|
||||
exporter:
|
||||
prometheus:
|
||||
host: 0.0.0.0
|
||||
port: 8888
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
|
||||
@@ -160,6 +160,19 @@ class AudioStore {
|
||||
return this.slug === slug && this.chapter === chapter;
|
||||
}
|
||||
|
||||
// ── Announce-chapter navigation state ────────────────────────────────────
|
||||
/**
|
||||
* When true, the <audio> element is playing a short announcement clip
|
||||
* (not chapter audio). The next `onended` should navigate to
|
||||
* announcePendingSlug / announcePendingChapter instead of the normal
|
||||
* auto-next flow.
|
||||
*/
|
||||
announceNavigatePending = $state(false);
|
||||
/** Target book slug for the pending announce-then-navigate transition. */
|
||||
announcePendingSlug = $state('');
|
||||
/** Target chapter number for the pending announce-then-navigate transition. */
|
||||
announcePendingChapter = $state(0);
|
||||
|
||||
/** Reset all next-chapter pre-fetch state. */
|
||||
resetNextPrefetch() {
|
||||
this.nextStatus = 'none';
|
||||
|
||||
@@ -260,6 +260,11 @@
|
||||
navigator.mediaSession.playbackState = audioStore.isPlaying ? 'playing' : 'paused';
|
||||
});
|
||||
|
||||
// ── Announce-chapter safety timeout ──────────────────────────────────────
|
||||
// Module-level so the onended handler can clear it if the clip completes
|
||||
// before the timeout fires.
|
||||
let announceTimeout = 0;
|
||||
|
||||
// ── Save audio time on pause/end (debounced 2s) ─────────────────────────
|
||||
let audioTimeSaveTimer = 0;
|
||||
function saveAudioTime() {
|
||||
@@ -366,6 +371,22 @@
|
||||
}}
|
||||
onended={() => {
|
||||
audioStore.isPlaying = false;
|
||||
|
||||
// ── If we just finished playing an announcement clip, navigate now ──
|
||||
if (audioStore.announceNavigatePending) {
|
||||
audioStore.announceNavigatePending = false;
|
||||
clearTimeout(announceTimeout);
|
||||
announceTimeout = 0;
|
||||
const slug = audioStore.announcePendingSlug;
|
||||
const chapter = audioStore.announcePendingChapter;
|
||||
audioStore.announcePendingSlug = '';
|
||||
audioStore.announcePendingChapter = 0;
|
||||
goto(`/books/${slug}/chapters/${chapter}`).catch(() => {
|
||||
audioStore.autoStartChapter = null;
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Cancel any pending debounced save and reset the position to 0 for
|
||||
// the chapter that just finished. Without this, the 2s debounce fires
|
||||
// after navigation and saves currentTime≈duration, causing resume to
|
||||
@@ -390,45 +411,53 @@
|
||||
// Capture values synchronously before any async work — the AudioPlayer
|
||||
// component will unmount during navigation, but we've already read what
|
||||
// we need.
|
||||
const targetSlug = audioStore.slug;
|
||||
const targetSlug = audioStore.slug;
|
||||
const targetChapter = audioStore.nextChapter;
|
||||
// Store the target chapter number so only the newly-mounted AudioPlayer
|
||||
// for that chapter reacts — not the outgoing chapter's component.
|
||||
audioStore.autoStartChapter = targetChapter;
|
||||
|
||||
// Announce the upcoming chapter via Web Speech API if enabled.
|
||||
const doNavigate = () => {
|
||||
goto(`/books/${targetSlug}/chapters/${targetChapter}`).catch(() => {
|
||||
audioStore.autoStartChapter = null;
|
||||
});
|
||||
};
|
||||
|
||||
if (audioStore.announceChapter && typeof window !== 'undefined' && 'speechSynthesis' in window) {
|
||||
const nextInfo = audioStore.chapters.find((c) => c.number === targetChapter);
|
||||
// Announce via a real audio clip so the audio session stays alive on
|
||||
// iOS Safari / Chrome Android (speechSynthesis is silently muted after
|
||||
// onended because the audio session has been released).
|
||||
if (audioStore.announceChapter) {
|
||||
const nextInfo = audioStore.chapters.find((c) => c.number === targetChapter);
|
||||
const titlePart = nextInfo?.title ? ` — ${nextInfo.title}` : '';
|
||||
const text = `Chapter ${targetChapter}${titlePart}`;
|
||||
window.speechSynthesis.cancel();
|
||||
const utterance = new SpeechSynthesisUtterance(text);
|
||||
const text = `Chapter ${targetChapter}${titlePart}`;
|
||||
|
||||
// Guard: ensure doNavigate can only fire once even if both
|
||||
// onend and the timeout fire, or onerror fires after onend.
|
||||
let navigated = false;
|
||||
const safeNavigate = () => {
|
||||
if (navigated) return;
|
||||
navigated = true;
|
||||
clearTimeout(announceTimeout);
|
||||
doNavigate();
|
||||
};
|
||||
// Always request MP3 — universally supported and the backend
|
||||
// auto-selects the right TTS engine from the voice ID.
|
||||
const qs = new URLSearchParams({ text, voice: audioStore.voice, format: 'mp3' });
|
||||
const announceUrl = `/api/announce?${qs}`;
|
||||
|
||||
// Hard fallback: if speechSynthesis silently drops the utterance
|
||||
// (common on Chrome Android due to gesture policy, or when the
|
||||
// browser is busy fetching the next chapter's audio), navigate
|
||||
// anyway after a generous 8-second window.
|
||||
const announceTimeout = setTimeout(safeNavigate, 8000);
|
||||
// Store pending navigation target so the next onended (from the
|
||||
// announcement clip) knows where to go.
|
||||
audioStore.announcePendingSlug = targetSlug;
|
||||
audioStore.announcePendingChapter = targetChapter;
|
||||
audioStore.announceNavigatePending = true;
|
||||
|
||||
utterance.onend = safeNavigate;
|
||||
utterance.onerror = safeNavigate;
|
||||
window.speechSynthesis.speak(utterance);
|
||||
// Safety timeout: if the clip never loads/ends (network issue,
|
||||
// browser policy, unsupported codec), navigate anyway after 10s.
|
||||
clearTimeout(announceTimeout);
|
||||
announceTimeout = setTimeout(() => {
|
||||
if (audioStore.announceNavigatePending) {
|
||||
audioStore.announceNavigatePending = false;
|
||||
audioStore.announcePendingSlug = '';
|
||||
audioStore.announcePendingChapter = 0;
|
||||
doNavigate();
|
||||
}
|
||||
}, 10_000) as unknown as number;
|
||||
|
||||
// Point the persistent <audio> element at the announcement clip.
|
||||
// The $effect in the layout that watches audioStore.audioUrl will
|
||||
// pick this up, set audioEl.src, and call play().
|
||||
audioStore.audioUrl = announceUrl;
|
||||
} else {
|
||||
doNavigate();
|
||||
}
|
||||
|
||||
39
ui/src/routes/api/announce/+server.ts
Normal file
39
ui/src/routes/api/announce/+server.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
import { error } from '@sveltejs/kit';
|
||||
import type { RequestHandler } from './$types';
|
||||
import { backendFetch } from '$lib/server/scraper';
|
||||
|
||||
/**
|
||||
* GET /api/announce?text=...&voice=...&format=...
|
||||
*
|
||||
* Thin proxy to backend GET /api/tts-announce.
|
||||
* No paywall — this is a short announcement clip (a few words), not chapter audio.
|
||||
* No MinIO caching — the backend streams the clip directly.
|
||||
*/
|
||||
export const GET: RequestHandler = async ({ url }) => {
|
||||
const text = url.searchParams.get('text') ?? '';
|
||||
if (!text) error(400, 'text is required');
|
||||
|
||||
const qs = new URLSearchParams();
|
||||
qs.set('text', text);
|
||||
|
||||
const voice = url.searchParams.get('voice');
|
||||
if (voice) qs.set('voice', voice);
|
||||
|
||||
const format = url.searchParams.get('format') ?? 'mp3';
|
||||
qs.set('format', format);
|
||||
|
||||
const backendRes = await backendFetch(`/api/tts-announce?${qs}`);
|
||||
|
||||
if (!backendRes.ok) {
|
||||
error(backendRes.status as Parameters<typeof error>[0], 'TTS announce failed');
|
||||
}
|
||||
|
||||
return new Response(backendRes.body, {
|
||||
status: 200,
|
||||
headers: {
|
||||
'Content-Type': backendRes.headers.get('Content-Type') ?? 'audio/mpeg',
|
||||
'Cache-Control': 'no-store',
|
||||
'X-Accel-Buffering': 'no'
|
||||
}
|
||||
});
|
||||
};
|
||||
Reference in New Issue
Block a user