Compare commits

...

1 Commits

Author SHA1 Message Date
Admin
aaa008ac99 feat: add Cloudflare AI TTS engine (aura-2-en) with voice grouping in UI
All checks were successful
Release / Test backend (push) Successful in 43s
Release / Check ui (push) Successful in 43s
Release / Docker / caddy (push) Successful in 46s
Release / Docker / backend (push) Successful in 2m45s
Release / Docker / runner (push) Successful in 2m53s
Release / Docker / ui (push) Successful in 2m5s
Release / Gitea Release (push) Successful in 41s
2026-04-04 11:12:55 +05:00
12 changed files with 402 additions and 12 deletions

View File

@@ -26,6 +26,7 @@ import (
"github.com/hibiken/asynq"
"github.com/libnovel/backend/internal/asynqqueue"
"github.com/libnovel/backend/internal/backend"
"github.com/libnovel/backend/internal/cfai"
"github.com/libnovel/backend/internal/config"
"github.com/libnovel/backend/internal/kokoro"
"github.com/libnovel/backend/internal/meili"
@@ -114,6 +115,15 @@ func run() error {
log.Info("POCKET_TTS_URL not set — pocket-tts voices unavailable in backend")
}
// ── Cloudflare Workers AI (voice sample generation + audio-stream live TTS) ──
var cfaiClient cfai.Client
if cfg.CFAI.AccountID != "" && cfg.CFAI.APIToken != "" {
cfaiClient = cfai.New(cfg.CFAI.AccountID, cfg.CFAI.APIToken, cfg.CFAI.Model)
log.Info("cloudflare AI TTS enabled", "model", cfg.CFAI.Model)
} else {
log.Info("CFAI_ACCOUNT_ID/CFAI_API_TOKEN not set — CF AI voices unavailable in backend")
}
// ── Meilisearch (search reads only; indexing is the runner's job) ────────
var searchIndex meili.Client
if cfg.Meilisearch.URL != "" {
@@ -163,6 +173,7 @@ func run() error {
SearchIndex: searchIndex,
Kokoro: kokoroClient,
PocketTTS: pocketTTSClient,
CFAI: cfaiClient,
Log: log,
},
)

View File

@@ -23,6 +23,7 @@ import (
"github.com/getsentry/sentry-go"
"github.com/libnovel/backend/internal/asynqqueue"
"github.com/libnovel/backend/internal/browser"
"github.com/libnovel/backend/internal/cfai"
"github.com/libnovel/backend/internal/config"
"github.com/libnovel/backend/internal/kokoro"
"github.com/libnovel/backend/internal/libretranslate"
@@ -130,6 +131,15 @@ func run() error {
log.Warn("POCKET_TTS_URL not set — pocket-tts voice tasks will fail")
}
// ── Cloudflare Workers AI ────────────────────────────────────────────────
var cfaiClient cfai.Client
if cfg.CFAI.AccountID != "" && cfg.CFAI.APIToken != "" {
cfaiClient = cfai.New(cfg.CFAI.AccountID, cfg.CFAI.APIToken, cfg.CFAI.Model)
log.Info("cloudflare AI TTS enabled", "model", cfg.CFAI.Model)
} else {
log.Info("CFAI_ACCOUNT_ID/CFAI_API_TOKEN not set — CF AI voice tasks will fail")
}
// ── LibreTranslate ──────────────────────────────────────────────────────
ltClient := libretranslate.New(cfg.LibreTranslate.URL, cfg.LibreTranslate.APIKey)
if ltClient != nil {
@@ -191,6 +201,7 @@ func run() error {
Novel: novel,
Kokoro: kokoroClient,
PocketTTS: pocketTTSClient,
CFAI: cfaiClient,
LibreTranslate: ltClient,
Log: log,
}

View File

@@ -44,6 +44,7 @@ import (
"strings"
"time"
"github.com/libnovel/backend/internal/cfai"
"github.com/libnovel/backend/internal/domain"
"github.com/libnovel/backend/internal/kokoro"
"github.com/libnovel/backend/internal/meili"
@@ -774,7 +775,13 @@ func (s *Server) handleAudioStream(w http.ResponseWriter, r *http.Request) {
// Open the TTS stream (WAV or MP3 depending on format param).
var audioStream io.ReadCloser
if format == "wav" {
if pockettts.IsPocketTTSVoice(voice) {
if cfai.IsCFAIVoice(voice) {
if s.deps.CFAI == nil {
jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
return
}
audioStream, err = s.deps.CFAI.StreamAudioWAV(r.Context(), text, voice)
} else if pockettts.IsPocketTTSVoice(voice) {
if s.deps.PocketTTS == nil {
jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
return
@@ -788,7 +795,13 @@ func (s *Server) handleAudioStream(w http.ResponseWriter, r *http.Request) {
audioStream, err = s.deps.Kokoro.StreamAudioWAV(r.Context(), text, voice)
}
} else {
if pockettts.IsPocketTTSVoice(voice) {
if cfai.IsCFAIVoice(voice) {
if s.deps.CFAI == nil {
jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
return
}
audioStream, err = s.deps.CFAI.StreamAudioMP3(r.Context(), text, voice)
} else if pockettts.IsPocketTTSVoice(voice) {
if s.deps.PocketTTS == nil {
jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
return
@@ -1343,6 +1356,9 @@ func (s *Server) handlePresignVoiceSample(w http.ResponseWriter, r *http.Request
}
key := kokoro.VoiceSampleKey(voice)
if cfai.IsCFAIVoice(voice) {
key = cfai.VoiceSampleKey(voice)
}
// Generate sample on demand when it is not in MinIO yet.
if !s.deps.AudioStore.AudioExists(r.Context(), key) {
@@ -1352,7 +1368,13 @@ func (s *Server) handlePresignVoiceSample(w http.ResponseWriter, r *http.Request
mp3 []byte
err error
)
if pockettts.IsPocketTTSVoice(voice) {
if cfai.IsCFAIVoice(voice) {
if s.deps.CFAI == nil {
jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
return
}
mp3, err = s.deps.CFAI.GenerateAudio(r.Context(), voiceSampleText, voice)
} else if pockettts.IsPocketTTSVoice(voice) {
if s.deps.PocketTTS == nil {
jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
return

View File

@@ -30,6 +30,7 @@ import (
sentryhttp "github.com/getsentry/sentry-go/http"
"github.com/libnovel/backend/internal/bookstore"
"github.com/libnovel/backend/internal/cfai"
"github.com/libnovel/backend/internal/domain"
"github.com/libnovel/backend/internal/kokoro"
"github.com/libnovel/backend/internal/meili"
@@ -69,6 +70,9 @@ type Dependencies struct {
// PocketTTS is the pocket-tts client (used for voice list only in the backend;
// audio generation is done by the runner).
PocketTTS pockettts.Client
// CFAI is the Cloudflare Workers AI TTS client (used for voice sample
// generation and audio-stream live TTS; audio task generation is done by the runner).
CFAI cfai.Client
// Log is the structured logger.
Log *slog.Logger
}
@@ -338,6 +342,23 @@ func (s *Server) voices(ctx context.Context) []domain.Voice {
}
}
// ── Cloudflare AI voices ──────────────────────────────────────────────────
if s.deps.CFAI != nil {
for _, speaker := range cfai.Speakers() {
gender := "m"
if cfai.IsFemale(speaker) {
gender = "f"
}
result = append(result, domain.Voice{
ID: cfai.VoiceID(speaker),
Engine: "cfai",
Lang: "en",
Gender: gender,
})
}
s.deps.Log.Info("backend: loaded CF AI voices", "count", len(cfai.Speakers()))
}
s.voiceMu.Lock()
s.cachedVoices = result
s.voiceMu.Unlock()

View File

@@ -0,0 +1,214 @@
// Package cfai provides a client for Cloudflare Workers AI Text-to-Speech models.
//
// The Cloudflare Workers AI REST API is used to run TTS models:
//
// POST https://api.cloudflare.com/client/v4/accounts/{accountID}/ai/run/{model}
// Authorization: Bearer {apiToken}
// Content-Type: application/json
// { "text": "...", "speaker": "luna" }
//
// → 200 audio/mpeg — raw MP3 bytes
//
// Currently supported model: @cf/deepgram/aura-2-en (40 English speakers).
// Voice IDs are prefixed with "cfai:" to distinguish them from Kokoro/pocket-tts
// voices (e.g. "cfai:luna", "cfai:orion").
//
// The API is batch-only (no streaming), so GenerateAudio waits for the full
// response. There is no 100-second Cloudflare proxy timeout because we are
// calling the Cloudflare API directly, not routing through a Cloudflare-proxied
// homelab tunnel.
package cfai
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
)
const (
// DefaultModel is the Cloudflare Workers AI TTS model used by default.
DefaultModel = "@cf/deepgram/aura-2-en"
// voicePrefix is the prefix used to namespace CF AI voice IDs.
voicePrefix = "cfai:"
)
// aura2Speakers is the exhaustive list of speakers supported by aura-2-en.
var aura2Speakers = []string{
"amalthea", "andromeda", "apollo", "arcas", "aries", "asteria",
"athena", "atlas", "aurora", "callista", "cora", "cordelia",
"delia", "draco", "electra", "harmonia", "helena", "hera",
"hermes", "hyperion", "iris", "janus", "juno", "jupiter",
"luna", "mars", "minerva", "neptune", "odysseus", "ophelia",
"orion", "orpheus", "pandora", "phoebe", "pluto", "saturn",
"thalia", "theia", "vesta", "zeus",
}
// femaleSpeakers is the set of aura-2-en speaker names that are female voices.
var femaleSpeakers = map[string]struct{}{
"amalthea": {}, "andromeda": {}, "aries": {}, "asteria": {},
"athena": {}, "aurora": {}, "callista": {}, "cora": {},
"cordelia": {}, "delia": {}, "electra": {}, "harmonia": {},
"helena": {}, "hera": {}, "iris": {}, "juno": {},
"luna": {}, "minerva": {}, "ophelia": {}, "pandora": {},
"phoebe": {}, "thalia": {}, "theia": {}, "vesta": {},
}
// IsCFAIVoice reports whether voice is served by the Cloudflare AI client.
// CF AI voices use the "cfai:" prefix, e.g. "cfai:luna".
func IsCFAIVoice(voice string) bool {
return strings.HasPrefix(voice, voicePrefix)
}
// SpeakerName strips the "cfai:" prefix and returns the bare speaker name.
// If voice is not a CF AI voice the original string is returned unchanged.
func SpeakerName(voice string) string {
return strings.TrimPrefix(voice, voicePrefix)
}
// VoiceID returns the full voice ID (with prefix) for a bare speaker name.
func VoiceID(speaker string) string {
return voicePrefix + speaker
}
// VoiceSampleKey returns the MinIO object key for a CF AI voice sample MP3.
func VoiceSampleKey(voice string) string {
safe := strings.Map(func(r rune) rune {
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') ||
(r >= '0' && r <= '9') || r == '_' || r == '-' {
return r
}
return '_'
}, voice)
return fmt.Sprintf("_voice-samples/%s.mp3", safe)
}
// IsFemale reports whether the given CF AI voice ID (with or without prefix)
// is a female speaker.
func IsFemale(voice string) bool {
speaker := SpeakerName(voice)
_, ok := femaleSpeakers[speaker]
return ok
}
// Speakers returns all available bare speaker names for aura-2-en.
func Speakers() []string {
out := make([]string, len(aura2Speakers))
copy(out, aura2Speakers)
return out
}
// Client is the interface for interacting with Cloudflare Workers AI TTS.
type Client interface {
// GenerateAudio synthesises text using the given voice (e.g. "cfai:luna")
// and returns raw MP3 bytes.
GenerateAudio(ctx context.Context, text, voice string) ([]byte, error)
// StreamAudioMP3 is not natively supported by the CF AI batch API.
// It buffers the full response and returns an io.ReadCloser over the bytes,
// so callers can use it like a stream without special-casing.
StreamAudioMP3(ctx context.Context, text, voice string) (io.ReadCloser, error)
// StreamAudioWAV is not natively supported; the CF AI model returns MP3.
// This method returns the same MP3 bytes wrapped as an io.ReadCloser.
StreamAudioWAV(ctx context.Context, text, voice string) (io.ReadCloser, error)
// ListVoices returns all available voice IDs (with the "cfai:" prefix).
ListVoices(ctx context.Context) ([]string, error)
}
// httpClient is the concrete CF AI HTTP client.
type httpClient struct {
accountID string
apiToken string
model string
http *http.Client
}
// New returns a Client for the given Cloudflare account and API token.
// model defaults to DefaultModel when empty.
func New(accountID, apiToken, model string) Client {
if model == "" {
model = DefaultModel
}
return &httpClient{
accountID: accountID,
apiToken: apiToken,
model: model,
http: &http.Client{Timeout: 5 * time.Minute},
}
}
// GenerateAudio calls the Cloudflare Workers AI TTS endpoint and returns MP3 bytes.
func (c *httpClient) GenerateAudio(ctx context.Context, text, voice string) ([]byte, error) {
if text == "" {
return nil, fmt.Errorf("cfai: empty text")
}
speaker := SpeakerName(voice)
if speaker == "" {
speaker = "luna"
}
body, err := json.Marshal(map[string]any{
"text": text,
"speaker": speaker,
})
if err != nil {
return nil, fmt.Errorf("cfai: marshal request: %w", err)
}
url := fmt.Sprintf("https://api.cloudflare.com/client/v4/accounts/%s/ai/run/%s",
c.accountID, c.model)
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("cfai: build request: %w", err)
}
req.Header.Set("Authorization", "Bearer "+c.apiToken)
req.Header.Set("Content-Type", "application/json")
resp, err := c.http.Do(req)
if err != nil {
return nil, fmt.Errorf("cfai: request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("cfai: server returned %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
}
mp3, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("cfai: read response: %w", err)
}
return mp3, nil
}
// StreamAudioMP3 generates audio and wraps the MP3 bytes as an io.ReadCloser.
func (c *httpClient) StreamAudioMP3(ctx context.Context, text, voice string) (io.ReadCloser, error) {
mp3, err := c.GenerateAudio(ctx, text, voice)
if err != nil {
return nil, err
}
return io.NopCloser(bytes.NewReader(mp3)), nil
}
// StreamAudioWAV generates audio (MP3) and wraps it as an io.ReadCloser.
// Note: the CF AI aura-2-en model returns MP3 regardless of the method name.
func (c *httpClient) StreamAudioWAV(ctx context.Context, text, voice string) (io.ReadCloser, error) {
return c.StreamAudioMP3(ctx, text, voice)
}
// ListVoices returns all available CF AI voice IDs (with the "cfai:" prefix).
func (c *httpClient) ListVoices(_ context.Context) ([]string, error) {
ids := make([]string, len(aura2Speakers))
for i, s := range aura2Speakers {
ids[i] = VoiceID(s)
}
return ids, nil
}

View File

@@ -66,6 +66,18 @@ type PocketTTS struct {
URL string
}
// CFAI holds credentials for Cloudflare Workers AI TTS.
type CFAI struct {
// AccountID is the Cloudflare account ID.
// An empty string disables CF AI generation.
AccountID string
// APIToken is a Workers AI API token with Workers AI Read+Edit permissions.
APIToken string
// Model is the Workers AI TTS model ID.
// Defaults to "@cf/deepgram/aura-2-en" when empty.
Model string
}
// LibreTranslate holds connection settings for a self-hosted LibreTranslate instance.
type LibreTranslate struct {
// URL is the base URL of the LibreTranslate instance, e.g. https://translate.libnovel.cc
@@ -153,6 +165,7 @@ type Config struct {
MinIO MinIO
Kokoro Kokoro
PocketTTS PocketTTS
CFAI CFAI
LibreTranslate LibreTranslate
HTTP HTTP
Runner Runner
@@ -203,6 +216,12 @@ func Load() Config {
URL: envOr("POCKET_TTS_URL", ""),
},
CFAI: CFAI{
AccountID: envOr("CFAI_ACCOUNT_ID", ""),
APIToken: envOr("CFAI_API_TOKEN", ""),
Model: envOr("CFAI_TTS_MODEL", ""),
},
LibreTranslate: LibreTranslate{
URL: envOr("LIBRETRANSLATE_URL", ""),
APIKey: envOr("LIBRETRANSLATE_API_KEY", ""),

View File

@@ -27,6 +27,7 @@ import (
"go.opentelemetry.io/otel/codes"
"github.com/libnovel/backend/internal/bookstore"
"github.com/libnovel/backend/internal/cfai"
"github.com/libnovel/backend/internal/domain"
"github.com/libnovel/backend/internal/kokoro"
"github.com/libnovel/backend/internal/libretranslate"
@@ -112,6 +113,9 @@ type Dependencies struct {
// PocketTTS is the pocket-tts client (CPU, kyutai voices: alba, marius, etc.).
// If nil, pocket-tts voice tasks will fail with a clear error.
PocketTTS pockettts.Client
// CFAI is the Cloudflare Workers AI TTS client (cfai:* prefixed voices).
// If nil, CF AI voice tasks will fail with a clear error.
CFAI cfai.Client
// LibreTranslate is the machine translation client.
// If nil, translation tasks will fail with a clear error.
LibreTranslate libretranslate.Client
@@ -555,6 +559,18 @@ func (r *Runner) runAudioTask(ctx context.Context, task domain.AudioTask) {
return
}
log.Info("runner: audio generated via pocket-tts", "voice", task.Voice)
} else if cfai.IsCFAIVoice(task.Voice) {
if r.deps.CFAI == nil {
fail("cloudflare AI client not configured (CFAI_ACCOUNT_ID/CFAI_API_TOKEN empty)")
return
}
var genErr error
audioData, genErr = r.deps.CFAI.GenerateAudio(ctx, text, task.Voice)
if genErr != nil {
fail(fmt.Sprintf("cfai generate: %v", genErr))
return
}
log.Info("runner: audio generated via cloudflare AI", "voice", task.Voice)
} else {
if r.deps.Kokoro == nil {
fail("kokoro client not configured (KOKORO_URL is empty)")

View File

@@ -13,6 +13,11 @@
# - RUNNER_SKIP_INITIAL_CATALOGUE_REFRESH=true
# - REDIS_ADDR → rediss://redis.libnovel.cc:6380 (prod Redis via Caddy TLS proxy)
# - LibreTranslate service for machine translation (internal network only)
#
# extra_hosts pins storage.libnovel.cc and pb.libnovel.cc to the prod server IP
# (165.22.70.138) so that large PutObject uploads and PocketBase writes bypass
# Cloudflare's 100-second proxy timeout entirely. TLS still terminates at Caddy
# on prod; the TLS certificate is valid for the domain names so SNI works fine.
services:
libretranslate:
@@ -35,6 +40,12 @@ services:
stop_grace_period: 135s
depends_on:
- libretranslate
# Pin prod subdomains to the prod server IP to bypass Cloudflare's 100s
# proxy timeout. Large MP3 PutObject uploads and PocketBase writes go
# directly to Caddy on prod; TLS and SNI still work normally.
extra_hosts:
- "storage.libnovel.cc:165.22.70.138"
- "pb.libnovel.cc:165.22.70.138"
environment:
# ── PocketBase ──────────────────────────────────────────────────────────
POCKETBASE_URL: "https://pb.libnovel.cc"
@@ -63,6 +74,10 @@ services:
# ── Pocket TTS ──────────────────────────────────────────────────────────
POCKET_TTS_URL: "${POCKET_TTS_URL}"
# ── Cloudflare Workers AI TTS ────────────────────────────────────────────
CFAI_ACCOUNT_ID: "${CFAI_ACCOUNT_ID}"
CFAI_API_TOKEN: "${CFAI_API_TOKEN}"
# ── LibreTranslate (internal Docker network) ────────────────────────────
LIBRETRANSLATE_URL: "http://libretranslate:5000"
LIBRETRANSLATE_API_KEY: "${LIBRETRANSLATE_API_KEY}"

View File

@@ -86,6 +86,7 @@
// ── Derived: voices grouped by engine ──────────────────────────────────
const kokoroVoices = $derived(voices.filter((v) => v.engine === 'kokoro'));
const pocketVoices = $derived(voices.filter((v) => v.engine === 'pocket-tts'));
const cfaiVoices = $derived(voices.filter((v) => v.engine === 'cfai'));
// ── Voice selector state ────────────────────────────────────────────────
let showVoicePanel = $state(false);
@@ -98,6 +99,7 @@
* Human-readable label for a voice.
* Kokoro: "af_bella" → "Bella (US F)"
* Pocket-TTS: "alba" → "Alba (EN F)"
* CF AI: "cfai:luna" → "Luna (EN F)"
* Falls back gracefully if called with a bare string (e.g. from the store default).
*/
function voiceLabel(v: Voice | string): string {
@@ -110,6 +112,14 @@
return kokoroLabelFromId(v);
}
if (v.engine === 'cfai') {
// "cfai:luna" → "Luna (EN F)"
const speaker = v.id.startsWith('cfai:') ? v.id.slice(5) : v.id;
const name = speaker.replace(/\b\w/g, (c) => c.toUpperCase());
const genderLabel = v.gender.toUpperCase();
return `${name} (EN ${genderLabel})`;
}
if (v.engine === 'pocket-tts') {
const langLabel = v.lang.toUpperCase().replace('-', '');
const genderLabel = v.gender.toUpperCase();
@@ -844,6 +854,16 @@
{@render voiceRow(v)}
{/each}
{/if}
<!-- Cloudflare AI section -->
{#if cfaiVoices.length > 0}
<div class="px-3 py-1.5 bg-(--color-surface-2)/70 border-b border-(--color-border)/50 {kokoroVoices.length > 0 || pocketVoices.length > 0 ? 'border-t border-(--color-border)' : ''}">
<span class="text-[10px] font-semibold text-(--color-muted) uppercase tracking-widest">Cloudflare AI</span>
</div>
{#each cfaiVoices as v (v.id)}
{@render voiceRow(v)}
{/each}
{/if}
</div>
<div class="px-3 py-2 border-t border-(--color-border) bg-(--color-surface-2)/50">
<p class="text-xs text-(--color-muted)">

View File

@@ -12,7 +12,7 @@
export interface Voice {
/** Voice identifier passed to TTS clients (e.g. "af_bella", "alba"). */
id: string;
/** TTS engine: "kokoro" | "pocket-tts". */
/** TTS engine: "kokoro" | "pocket-tts" | "cfai". */
engine: string;
/** Primary language tag (e.g. "en-us", "en-gb", "en", "es", "fr"). */
lang: string;

View File

@@ -57,6 +57,12 @@
return `${m}m ${s % 60}s`;
}
function engineLabel(voice: string): string {
if (voice.startsWith('cfai:')) return 'CF AI';
if (!voice.includes('_')) return 'Pocket TTS';
return 'Kokoro';
}
// ── Audio jobs stats + filter ────────────────────────────────────────────────
let jobsQ = $state('');
let filteredJobs = $derived(
@@ -160,6 +166,7 @@
<th class="px-4 py-3 text-left">Book</th>
<th class="px-4 py-3 text-right">Ch.</th>
<th class="px-4 py-3 text-left">Voice</th>
<th class="px-4 py-3 text-left">Engine</th>
<th class="px-4 py-3 text-left">Status</th>
<th class="px-4 py-3 text-left">Started</th>
<th class="px-4 py-3 text-left">Duration</th>
@@ -173,6 +180,7 @@
</td>
<td class="px-4 py-3 text-right text-(--color-muted)">{job.chapter}</td>
<td class="px-4 py-3 text-(--color-muted) font-mono text-xs">{job.voice}</td>
<td class="px-4 py-3 text-(--color-muted) text-xs">{engineLabel(job.voice)}</td>
<td class="px-4 py-3">
<span class="font-medium {jobStatusColor(job.status)}">{job.status}</span>
</td>
@@ -181,7 +189,7 @@
</tr>
{#if job.error_message}
<tr class="bg-(--color-danger)/10">
<td colspan="6" class="px-4 py-2 text-xs text-(--color-danger) font-mono">{job.error_message}</td>
<td colspan="7" class="px-4 py-2 text-xs text-(--color-danger) font-mono">{job.error_message}</td>
</tr>
{/if}
{/each}
@@ -202,6 +210,7 @@
<div class="grid grid-cols-2 gap-1 text-xs">
<span class="text-(--color-muted)">Chapter</span><span class="text-(--color-muted) text-right">{job.chapter}</span>
<span class="text-(--color-muted)">Voice</span><span class="text-(--color-muted) font-mono text-right truncate">{job.voice}</span>
<span class="text-(--color-muted)">Engine</span><span class="text-(--color-muted) text-right">{engineLabel(job.voice)}</span>
<span class="text-(--color-muted)">Started</span><span class="text-(--color-muted) text-right">{fmtDate(job.started)}</span>
<span class="text-(--color-muted)">Duration</span><span class="text-(--color-muted) text-right">{duration(job.started, job.finished)}</span>
</div>
@@ -236,6 +245,7 @@
<th class="px-4 py-3 text-left">Book</th>
<th class="px-4 py-3 text-left">Chapter</th>
<th class="px-4 py-3 text-left">Voice</th>
<th class="px-4 py-3 text-left">Engine</th>
<th class="px-4 py-3 text-left">Filename</th>
<th class="px-4 py-3 text-left">Updated</th>
</tr>
@@ -249,6 +259,7 @@
</td>
<td class="px-4 py-3 text-(--color-muted)">{parts.chapter}</td>
<td class="px-4 py-3 text-(--color-muted) font-mono text-xs">{parts.voice}</td>
<td class="px-4 py-3 text-(--color-muted) text-xs">{engineLabel(parts.voice)}</td>
<td class="px-4 py-3 text-(--color-muted) font-mono text-xs truncate max-w-[14rem]" title={entry.filename}>
{entry.filename}
</td>
@@ -267,11 +278,12 @@
<a href="/books/{parts.slug}" class="text-(--color-text) font-medium hover:text-(--color-brand) transition-colors block truncate">
{parts.slug}
</a>
<div class="grid grid-cols-2 gap-1 text-xs">
<span class="text-(--color-muted)">Chapter</span><span class="text-(--color-muted) text-right">{parts.chapter}</span>
<span class="text-(--color-muted)">Voice</span><span class="text-(--color-muted) font-mono text-right truncate">{parts.voice}</span>
<span class="text-(--color-muted)">Updated</span><span class="text-(--color-muted) text-right">{fmtDate(entry.updated)}</span>
</div>
<div class="grid grid-cols-2 gap-1 text-xs">
<span class="text-(--color-muted)">Chapter</span><span class="text-(--color-muted) text-right">{parts.chapter}</span>
<span class="text-(--color-muted)">Voice</span><span class="text-(--color-muted) font-mono text-right truncate">{parts.voice}</span>
<span class="text-(--color-muted)">Engine</span><span class="text-(--color-muted) text-right">{engineLabel(parts.voice)}</span>
<span class="text-(--color-muted)">Updated</span><span class="text-(--color-muted) text-right">{fmtDate(entry.updated)}</span>
</div>
{#if entry.filename}
<p class="text-xs text-(--color-muted) font-mono truncate" title={entry.filename}>{entry.filename}</p>
{/if}

View File

@@ -93,6 +93,30 @@
const kokoroVoices = $derived(voices.filter((v) => v.engine === 'kokoro'));
const pocketVoices = $derived(voices.filter((v) => v.engine === 'pocket-tts'));
const cfaiVoices = $derived(voices.filter((v) => v.engine === 'cfai'));
function voiceLabel(v: Voice): string {
if (v.engine === 'cfai') {
const speaker = v.id.startsWith('cfai:') ? v.id.slice(5) : v.id;
return speaker.replace(/\b\w/g, (c) => c.toUpperCase()) + (v.gender ? ` (EN ${v.gender.toUpperCase()})` : '');
}
if (v.engine === 'pocket-tts') {
const name = v.id.replace(/_/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
return name + (v.gender ? ` (EN ${v.gender.toUpperCase()})` : '');
}
// Kokoro: "af_bella" → "Bella (US F)"
const langMap: Record<string, string> = {
af: 'US', am: 'US', bf: 'UK', bm: 'UK',
ef: 'ES', em: 'ES', ff: 'FR',
hf: 'IN', hm: 'IN', 'if': 'IT', im: 'IT',
jf: 'JP', jm: 'JP', pf: 'PT', pm: 'PT', zf: 'ZH', zm: 'ZH',
};
const prefix = v.id.slice(0, 2);
const name = v.id.slice(3).replace(/^v0/, '').replace(/^([a-z])/, (c) => c.toUpperCase());
const lang = langMap[prefix] ?? prefix.toUpperCase();
const gender = v.gender ? v.gender.toUpperCase() : '?';
return `${name} (${lang} ${gender})`;
}
$effect(() => {
fetch('/api/voices')
@@ -492,12 +516,17 @@
class="w-full bg-(--color-surface-3) border border-(--color-border) rounded-lg px-3 py-2 text-(--color-text) text-sm focus:outline-none focus:ring-2 focus:ring-(--color-brand)">
{#if kokoroVoices.length > 0}
<optgroup label="Kokoro (GPU)">
{#each kokoroVoices as v}<option value={v.id}>{v.id}</option>{/each}
{#each kokoroVoices as v}<option value={v.id}>{voiceLabel(v)}</option>{/each}
</optgroup>
{/if}
{#if pocketVoices.length > 0}
<optgroup label="Pocket TTS (CPU)">
{#each pocketVoices as v}<option value={v.id}>{v.id}</option>{/each}
{#each pocketVoices as v}<option value={v.id}>{voiceLabel(v)}</option>{/each}
</optgroup>
{/if}
{#if cfaiVoices.length > 0}
<optgroup label="Cloudflare AI">
{#each cfaiVoices as v}<option value={v.id}>{voiceLabel(v)}</option>{/each}
</optgroup>
{/if}
</select>