Fetch voices from Kokoro API at runtime; replace select with styled voice card grid
This commit is contained in:
@@ -38,6 +38,10 @@ type Server struct {
|
||||
kokoroURL string // Kokoro-FastAPI base URL, e.g. http://kokoro:8880
|
||||
kokoroVoice string // default voice, e.g. af_bella
|
||||
|
||||
// voiceMu guards cachedVoices.
|
||||
voiceMu sync.RWMutex
|
||||
cachedVoices []string // populated on first request from Kokoro /v1/audio/voices
|
||||
|
||||
// audioMu guards audioCache and audioInFlight.
|
||||
// audioCache maps a cache key to the Kokoro download filename returned by
|
||||
// POST /v1/audio/speech with return_download_link=true.
|
||||
@@ -62,6 +66,45 @@ func New(addr string, oCfg orchestrator.Config, novel scraper.NovelScraper, log
|
||||
}
|
||||
}
|
||||
|
||||
// voices returns the list of available Kokoro voices. On the first call it
|
||||
// fetches GET /v1/audio/voices from the Kokoro service and caches the result.
|
||||
// If the fetch fails (Kokoro not up yet, network error, etc.) it falls back to
|
||||
// the hardcoded kokoroVoices list so the UI is never empty.
|
||||
func (s *Server) voices() []string {
|
||||
s.voiceMu.RLock()
|
||||
cached := s.cachedVoices
|
||||
s.voiceMu.RUnlock()
|
||||
if len(cached) > 0 {
|
||||
return cached
|
||||
}
|
||||
|
||||
if s.kokoroURL != "" {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, s.kokoroURL+"/v1/audio/voices", nil)
|
||||
if err == nil {
|
||||
req.Header.Set("Accept", "application/json")
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err == nil {
|
||||
defer resp.Body.Close()
|
||||
var payload struct {
|
||||
Voices []string `json:"voices"`
|
||||
}
|
||||
if resp.StatusCode == http.StatusOK && json.NewDecoder(resp.Body).Decode(&payload) == nil && len(payload.Voices) > 0 {
|
||||
s.voiceMu.Lock()
|
||||
s.cachedVoices = payload.Voices
|
||||
s.voiceMu.Unlock()
|
||||
s.log.Info("fetched kokoro voices", "count", len(payload.Voices))
|
||||
return payload.Voices
|
||||
}
|
||||
}
|
||||
}
|
||||
s.log.Warn("could not fetch kokoro voices, using built-in list")
|
||||
}
|
||||
|
||||
return kokoroVoices
|
||||
}
|
||||
|
||||
// ListenAndServe starts the HTTP server and blocks until the provided context
|
||||
// is cancelled.
|
||||
func (s *Server) ListenAndServe(ctx context.Context) error {
|
||||
|
||||
@@ -65,6 +65,64 @@ var kokoroVoices = []string{
|
||||
"zm_yunjian", "zm_yunxi", "zm_yunxia", "zm_yunyang",
|
||||
}
|
||||
|
||||
// voiceInfo holds the parsed display metadata for a single Kokoro voice.
|
||||
type voiceInfo struct {
|
||||
ID string // raw voice ID, e.g. "af_bella"
|
||||
Name string // display name, e.g. "Bella"
|
||||
Lang string // language label, e.g. "EN-US"
|
||||
Gender string // "F" or "M"
|
||||
}
|
||||
|
||||
// langLabel maps the two-letter prefix to a human-readable language tag.
|
||||
var langLabel = map[string]string{
|
||||
"a": "EN-US",
|
||||
"b": "EN-GB",
|
||||
"e": "ES",
|
||||
"f": "FR",
|
||||
"h": "HI",
|
||||
"i": "IT",
|
||||
"j": "JA",
|
||||
"p": "PT",
|
||||
"z": "ZH",
|
||||
}
|
||||
|
||||
// parseVoice decodes a Kokoro voice ID into display metadata.
|
||||
// IDs follow the pattern {lang}{gender}_{name} e.g. "af_bella".
|
||||
func parseVoice(id string) voiceInfo {
|
||||
v := voiceInfo{ID: id, Name: id, Lang: "?", Gender: "?"}
|
||||
if len(id) < 3 || id[2] != '_' {
|
||||
return v
|
||||
}
|
||||
lc := string(id[0])
|
||||
gc := string(id[1])
|
||||
name := id[3:]
|
||||
if l, ok := langLabel[lc]; ok {
|
||||
v.Lang = l
|
||||
}
|
||||
switch gc {
|
||||
case "f":
|
||||
v.Gender = "F"
|
||||
case "m":
|
||||
v.Gender = "M"
|
||||
}
|
||||
// Capitalise name, replace underscores with spaces.
|
||||
if len(name) > 0 {
|
||||
runes := []rune(name)
|
||||
runes[0] -= 'a' - 'A'
|
||||
v.Name = strings.ReplaceAll(string(runes), "_", " ")
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// parseVoices converts a slice of raw voice IDs to voiceInfo structs.
|
||||
func parseVoices(ids []string) []voiceInfo {
|
||||
out := make([]voiceInfo, len(ids))
|
||||
for i, id := range ids {
|
||||
out[i] = parseVoice(id)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// ─── shared layout ────────────────────────────────────────────────────────────
|
||||
|
||||
const layoutHead = `<!DOCTYPE html>
|
||||
@@ -1932,16 +1990,32 @@ const chapterTmpl = `
|
||||
role="dialog"
|
||||
aria-label="Reader settings"
|
||||
hidden
|
||||
class="absolute right-[max(0.5rem,calc(50%-32rem+0.5rem))] bottom-[calc(100%+0.25rem)] min-w-[260px] bg-zinc-900 border border-zinc-800 rounded-xl p-4 shadow-2xl z-[100]">
|
||||
<label class="block mb-3.5">
|
||||
<span class="block text-xs text-zinc-500 mb-1.5">Voice</span>
|
||||
<select id="tts-voice"
|
||||
class="w-full rounded-lg bg-zinc-800 border border-zinc-700 px-2 py-1.5 text-sm text-zinc-200 outline-none">
|
||||
class="absolute right-[max(0.5rem,calc(50%-32rem+0.5rem))] bottom-[calc(100%+0.25rem)] w-[min(320px,calc(100vw-1rem))] bg-zinc-900 border border-zinc-800 rounded-xl p-4 shadow-2xl z-[100]">
|
||||
<!-- hidden native select keeps existing JS working unchanged -->
|
||||
<select id="tts-voice" class="sr-only" aria-hidden="true" tabindex="-1">
|
||||
{{range .Voices}}
|
||||
<option value="{{.}}"{{if eq . $.DefaultVoice}} selected{{end}}>{{.}}</option>
|
||||
<option value="{{.ID}}"{{if eq .ID $.DefaultVoice}} selected{{end}}>{{.Name}}</option>
|
||||
{{end}}
|
||||
</select>
|
||||
</label>
|
||||
|
||||
<div class="mb-3.5">
|
||||
<span class="block text-xs text-zinc-500 mb-2">Voice</span>
|
||||
<div id="voice-grid" class="grid grid-cols-2 gap-1.5 max-h-48 overflow-y-auto pr-0.5">
|
||||
{{range .Voices}}
|
||||
<button type="button"
|
||||
data-voice="{{.ID}}"
|
||||
onclick="selectVoice(this)"
|
||||
class="voice-btn flex items-center gap-2 px-2.5 py-1.5 rounded-lg border text-left transition-colors
|
||||
{{if eq .ID $.DefaultVoice}}border-amber-500 bg-amber-500/10 text-amber-300{{else}}border-zinc-700 bg-zinc-800 text-zinc-300 hover:border-zinc-500 hover:text-zinc-100{{end}}">
|
||||
<span class="flex-1 min-w-0">
|
||||
<span class="block text-[0.8rem] font-medium leading-tight truncate">{{.Name}}</span>
|
||||
<span class="block text-[0.65rem] text-zinc-500 leading-tight">{{.Lang}} · {{.Gender}}</span>
|
||||
</span>
|
||||
</button>
|
||||
{{end}}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<label class="block mb-3.5">
|
||||
<span class="block text-xs text-zinc-500 mb-1.5">Speed — <span id="tts-speed-label">1.0×</span></span>
|
||||
<input id="tts-speed" type="range"
|
||||
@@ -2620,6 +2694,41 @@ const chapterTmpl = `
|
||||
document.addEventListener('touchend', window.__ttsDoubleTap, { passive: true });
|
||||
}());
|
||||
}());
|
||||
|
||||
// ── Voice card picker ─────────────────────────────────────────────────────────
|
||||
window.selectVoice = function (btn) {
|
||||
var voiceSel = document.getElementById('tts-voice');
|
||||
var grid = document.getElementById('voice-grid');
|
||||
if (!voiceSel || !grid) return;
|
||||
|
||||
// Update hidden select so voiceSel.value works in the existing TTS code.
|
||||
voiceSel.value = btn.dataset.voice;
|
||||
// Persist to localStorage using same key as the TTS IIFE.
|
||||
try { localStorage.setItem('tts_voice', btn.dataset.voice); } catch(_) {}
|
||||
|
||||
// Swap active styling across all cards.
|
||||
grid.querySelectorAll('.voice-btn').forEach(function (b) {
|
||||
var active = b === btn;
|
||||
b.classList.toggle('border-amber-500', active);
|
||||
b.classList.toggle('bg-amber-500/10', active);
|
||||
b.classList.toggle('text-amber-300', active);
|
||||
b.classList.toggle('border-zinc-700', !active);
|
||||
b.classList.toggle('bg-zinc-800', !active);
|
||||
b.classList.toggle('text-zinc-300', !active);
|
||||
});
|
||||
};
|
||||
|
||||
// On page load, sync voice grid selection to the restored localStorage value.
|
||||
(function syncVoiceGrid() {
|
||||
var voiceSel = document.getElementById('tts-voice');
|
||||
var grid = document.getElementById('voice-grid');
|
||||
if (!voiceSel || !grid) return;
|
||||
var saved = null;
|
||||
try { saved = localStorage.getItem('tts_voice'); } catch(_) {}
|
||||
if (!saved) return;
|
||||
var btn = grid.querySelector('[data-voice="' + saved + '"]');
|
||||
if (btn) window.selectVoice(btn);
|
||||
})();
|
||||
</script>`
|
||||
|
||||
func (s *Server) handleChapter(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -2665,7 +2774,7 @@ func (s *Server) handleChapter(w http.ResponseWriter, r *http.Request) {
|
||||
Title string
|
||||
ChapterDate string
|
||||
AllChapters interface{}
|
||||
Voices []string
|
||||
Voices []voiceInfo
|
||||
DefaultVoice string
|
||||
Cover string
|
||||
}{
|
||||
@@ -2677,7 +2786,7 @@ func (s *Server) handleChapter(w http.ResponseWriter, r *http.Request) {
|
||||
Title: chapterTitle,
|
||||
ChapterDate: chapterDate,
|
||||
AllChapters: chapters,
|
||||
Voices: kokoroVoices,
|
||||
Voices: parseVoices(s.voices()),
|
||||
DefaultVoice: s.kokoroVoice,
|
||||
Cover: coverURL,
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user