Fetch voices from Kokoro API at runtime; replace select with styled voice card grid
Some checks failed
CI / Lint (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Build (push) Has been cancelled

This commit is contained in:
Admin
2026-03-02 10:18:43 +05:00
parent 2107b6e6b8
commit 7589866965
2 changed files with 162 additions and 10 deletions

View File

@@ -38,6 +38,10 @@ type Server struct {
kokoroURL string // Kokoro-FastAPI base URL, e.g. http://kokoro:8880
kokoroVoice string // default voice, e.g. af_bella
// voiceMu guards cachedVoices.
voiceMu sync.RWMutex
cachedVoices []string // populated on first request from Kokoro /v1/audio/voices
// audioMu guards audioCache and audioInFlight.
// audioCache maps a cache key to the Kokoro download filename returned by
// POST /v1/audio/speech with return_download_link=true.
@@ -62,6 +66,45 @@ func New(addr string, oCfg orchestrator.Config, novel scraper.NovelScraper, log
}
}
// voices returns the list of available Kokoro voices. On the first call it
// fetches GET /v1/audio/voices from the Kokoro service and caches the result.
// If the fetch fails (Kokoro not up yet, network error, etc.) it falls back to
// the hardcoded kokoroVoices list so the UI is never empty.
func (s *Server) voices() []string {
s.voiceMu.RLock()
cached := s.cachedVoices
s.voiceMu.RUnlock()
if len(cached) > 0 {
return cached
}
if s.kokoroURL != "" {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, s.kokoroURL+"/v1/audio/voices", nil)
if err == nil {
req.Header.Set("Accept", "application/json")
resp, err := http.DefaultClient.Do(req)
if err == nil {
defer resp.Body.Close()
var payload struct {
Voices []string `json:"voices"`
}
if resp.StatusCode == http.StatusOK && json.NewDecoder(resp.Body).Decode(&payload) == nil && len(payload.Voices) > 0 {
s.voiceMu.Lock()
s.cachedVoices = payload.Voices
s.voiceMu.Unlock()
s.log.Info("fetched kokoro voices", "count", len(payload.Voices))
return payload.Voices
}
}
}
s.log.Warn("could not fetch kokoro voices, using built-in list")
}
return kokoroVoices
}
// ListenAndServe starts the HTTP server and blocks until the provided context
// is cancelled.
func (s *Server) ListenAndServe(ctx context.Context) error {

View File

@@ -65,6 +65,64 @@ var kokoroVoices = []string{
"zm_yunjian", "zm_yunxi", "zm_yunxia", "zm_yunyang",
}
// voiceInfo holds the parsed display metadata for a single Kokoro voice.
type voiceInfo struct {
ID string // raw voice ID, e.g. "af_bella"
Name string // display name, e.g. "Bella"
Lang string // language label, e.g. "EN-US"
Gender string // "F" or "M"
}
// langLabel maps the two-letter prefix to a human-readable language tag.
var langLabel = map[string]string{
"a": "EN-US",
"b": "EN-GB",
"e": "ES",
"f": "FR",
"h": "HI",
"i": "IT",
"j": "JA",
"p": "PT",
"z": "ZH",
}
// parseVoice decodes a Kokoro voice ID into display metadata.
// IDs follow the pattern {lang}{gender}_{name} e.g. "af_bella".
func parseVoice(id string) voiceInfo {
v := voiceInfo{ID: id, Name: id, Lang: "?", Gender: "?"}
if len(id) < 3 || id[2] != '_' {
return v
}
lc := string(id[0])
gc := string(id[1])
name := id[3:]
if l, ok := langLabel[lc]; ok {
v.Lang = l
}
switch gc {
case "f":
v.Gender = "F"
case "m":
v.Gender = "M"
}
// Capitalise name, replace underscores with spaces.
if len(name) > 0 {
runes := []rune(name)
runes[0] -= 'a' - 'A'
v.Name = strings.ReplaceAll(string(runes), "_", " ")
}
return v
}
// parseVoices converts a slice of raw voice IDs to voiceInfo structs.
func parseVoices(ids []string) []voiceInfo {
out := make([]voiceInfo, len(ids))
for i, id := range ids {
out[i] = parseVoice(id)
}
return out
}
// ─── shared layout ────────────────────────────────────────────────────────────
const layoutHead = `<!DOCTYPE html>
@@ -1932,16 +1990,32 @@ const chapterTmpl = `
role="dialog"
aria-label="Reader settings"
hidden
class="absolute right-[max(0.5rem,calc(50%-32rem+0.5rem))] bottom-[calc(100%+0.25rem)] min-w-[260px] bg-zinc-900 border border-zinc-800 rounded-xl p-4 shadow-2xl z-[100]">
<label class="block mb-3.5">
<span class="block text-xs text-zinc-500 mb-1.5">Voice</span>
<select id="tts-voice"
class="w-full rounded-lg bg-zinc-800 border border-zinc-700 px-2 py-1.5 text-sm text-zinc-200 outline-none">
class="absolute right-[max(0.5rem,calc(50%-32rem+0.5rem))] bottom-[calc(100%+0.25rem)] w-[min(320px,calc(100vw-1rem))] bg-zinc-900 border border-zinc-800 rounded-xl p-4 shadow-2xl z-[100]">
<!-- hidden native select keeps existing JS working unchanged -->
<select id="tts-voice" class="sr-only" aria-hidden="true" tabindex="-1">
{{range .Voices}}
<option value="{{.ID}}"{{if eq .ID $.DefaultVoice}} selected{{end}}>{{.Name}}</option>
{{end}}
</select>
<div class="mb-3.5">
<span class="block text-xs text-zinc-500 mb-2">Voice</span>
<div id="voice-grid" class="grid grid-cols-2 gap-1.5 max-h-48 overflow-y-auto pr-0.5">
{{range .Voices}}
<option value="{{.}}"{{if eq . $.DefaultVoice}} selected{{end}}>{{.}}</option>
<button type="button"
data-voice="{{.ID}}"
onclick="selectVoice(this)"
class="voice-btn flex items-center gap-2 px-2.5 py-1.5 rounded-lg border text-left transition-colors
{{if eq .ID $.DefaultVoice}}border-amber-500 bg-amber-500/10 text-amber-300{{else}}border-zinc-700 bg-zinc-800 text-zinc-300 hover:border-zinc-500 hover:text-zinc-100{{end}}">
<span class="flex-1 min-w-0">
<span class="block text-[0.8rem] font-medium leading-tight truncate">{{.Name}}</span>
<span class="block text-[0.65rem] text-zinc-500 leading-tight">{{.Lang}} · {{.Gender}}</span>
</span>
</button>
{{end}}
</select>
</label>
</div>
</div>
<label class="block mb-3.5">
<span class="block text-xs text-zinc-500 mb-1.5">Speed — <span id="tts-speed-label">1.0×</span></span>
<input id="tts-speed" type="range"
@@ -2620,6 +2694,41 @@ const chapterTmpl = `
document.addEventListener('touchend', window.__ttsDoubleTap, { passive: true });
}());
}());
// ── Voice card picker ─────────────────────────────────────────────────────────
window.selectVoice = function (btn) {
var voiceSel = document.getElementById('tts-voice');
var grid = document.getElementById('voice-grid');
if (!voiceSel || !grid) return;
// Update hidden select so voiceSel.value works in the existing TTS code.
voiceSel.value = btn.dataset.voice;
// Persist to localStorage using same key as the TTS IIFE.
try { localStorage.setItem('tts_voice', btn.dataset.voice); } catch(_) {}
// Swap active styling across all cards.
grid.querySelectorAll('.voice-btn').forEach(function (b) {
var active = b === btn;
b.classList.toggle('border-amber-500', active);
b.classList.toggle('bg-amber-500/10', active);
b.classList.toggle('text-amber-300', active);
b.classList.toggle('border-zinc-700', !active);
b.classList.toggle('bg-zinc-800', !active);
b.classList.toggle('text-zinc-300', !active);
});
};
// On page load, sync voice grid selection to the restored localStorage value.
(function syncVoiceGrid() {
var voiceSel = document.getElementById('tts-voice');
var grid = document.getElementById('voice-grid');
if (!voiceSel || !grid) return;
var saved = null;
try { saved = localStorage.getItem('tts_voice'); } catch(_) {}
if (!saved) return;
var btn = grid.querySelector('[data-voice="' + saved + '"]');
if (btn) window.selectVoice(btn);
})();
</script>`
func (s *Server) handleChapter(w http.ResponseWriter, r *http.Request) {
@@ -2665,7 +2774,7 @@ func (s *Server) handleChapter(w http.ResponseWriter, r *http.Request) {
Title string
ChapterDate string
AllChapters interface{}
Voices []string
Voices []voiceInfo
DefaultVoice string
Cover string
}{
@@ -2677,7 +2786,7 @@ func (s *Server) handleChapter(w http.ResponseWriter, r *http.Request) {
Title: chapterTitle,
ChapterDate: chapterDate,
AllChapters: chapters,
Voices: kokoroVoices,
Voices: parseVoices(s.voices()),
DefaultVoice: s.kokoroVoice,
Cover: coverURL,
})