Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
aaa008ac99 |
@@ -26,6 +26,7 @@ import (
|
||||
"github.com/hibiken/asynq"
|
||||
"github.com/libnovel/backend/internal/asynqqueue"
|
||||
"github.com/libnovel/backend/internal/backend"
|
||||
"github.com/libnovel/backend/internal/cfai"
|
||||
"github.com/libnovel/backend/internal/config"
|
||||
"github.com/libnovel/backend/internal/kokoro"
|
||||
"github.com/libnovel/backend/internal/meili"
|
||||
@@ -114,6 +115,15 @@ func run() error {
|
||||
log.Info("POCKET_TTS_URL not set — pocket-tts voices unavailable in backend")
|
||||
}
|
||||
|
||||
// ── Cloudflare Workers AI (voice sample generation + audio-stream live TTS) ──
|
||||
var cfaiClient cfai.Client
|
||||
if cfg.CFAI.AccountID != "" && cfg.CFAI.APIToken != "" {
|
||||
cfaiClient = cfai.New(cfg.CFAI.AccountID, cfg.CFAI.APIToken, cfg.CFAI.Model)
|
||||
log.Info("cloudflare AI TTS enabled", "model", cfg.CFAI.Model)
|
||||
} else {
|
||||
log.Info("CFAI_ACCOUNT_ID/CFAI_API_TOKEN not set — CF AI voices unavailable in backend")
|
||||
}
|
||||
|
||||
// ── Meilisearch (search reads only; indexing is the runner's job) ────────
|
||||
var searchIndex meili.Client
|
||||
if cfg.Meilisearch.URL != "" {
|
||||
@@ -163,6 +173,7 @@ func run() error {
|
||||
SearchIndex: searchIndex,
|
||||
Kokoro: kokoroClient,
|
||||
PocketTTS: pocketTTSClient,
|
||||
CFAI: cfaiClient,
|
||||
Log: log,
|
||||
},
|
||||
)
|
||||
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
"github.com/getsentry/sentry-go"
|
||||
"github.com/libnovel/backend/internal/asynqqueue"
|
||||
"github.com/libnovel/backend/internal/browser"
|
||||
"github.com/libnovel/backend/internal/cfai"
|
||||
"github.com/libnovel/backend/internal/config"
|
||||
"github.com/libnovel/backend/internal/kokoro"
|
||||
"github.com/libnovel/backend/internal/libretranslate"
|
||||
@@ -130,6 +131,15 @@ func run() error {
|
||||
log.Warn("POCKET_TTS_URL not set — pocket-tts voice tasks will fail")
|
||||
}
|
||||
|
||||
// ── Cloudflare Workers AI ────────────────────────────────────────────────
|
||||
var cfaiClient cfai.Client
|
||||
if cfg.CFAI.AccountID != "" && cfg.CFAI.APIToken != "" {
|
||||
cfaiClient = cfai.New(cfg.CFAI.AccountID, cfg.CFAI.APIToken, cfg.CFAI.Model)
|
||||
log.Info("cloudflare AI TTS enabled", "model", cfg.CFAI.Model)
|
||||
} else {
|
||||
log.Info("CFAI_ACCOUNT_ID/CFAI_API_TOKEN not set — CF AI voice tasks will fail")
|
||||
}
|
||||
|
||||
// ── LibreTranslate ──────────────────────────────────────────────────────
|
||||
ltClient := libretranslate.New(cfg.LibreTranslate.URL, cfg.LibreTranslate.APIKey)
|
||||
if ltClient != nil {
|
||||
@@ -191,6 +201,7 @@ func run() error {
|
||||
Novel: novel,
|
||||
Kokoro: kokoroClient,
|
||||
PocketTTS: pocketTTSClient,
|
||||
CFAI: cfaiClient,
|
||||
LibreTranslate: ltClient,
|
||||
Log: log,
|
||||
}
|
||||
|
||||
@@ -44,6 +44,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/backend/internal/cfai"
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
"github.com/libnovel/backend/internal/kokoro"
|
||||
"github.com/libnovel/backend/internal/meili"
|
||||
@@ -774,7 +775,13 @@ func (s *Server) handleAudioStream(w http.ResponseWriter, r *http.Request) {
|
||||
// Open the TTS stream (WAV or MP3 depending on format param).
|
||||
var audioStream io.ReadCloser
|
||||
if format == "wav" {
|
||||
if pockettts.IsPocketTTSVoice(voice) {
|
||||
if cfai.IsCFAIVoice(voice) {
|
||||
if s.deps.CFAI == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
|
||||
return
|
||||
}
|
||||
audioStream, err = s.deps.CFAI.StreamAudioWAV(r.Context(), text, voice)
|
||||
} else if pockettts.IsPocketTTSVoice(voice) {
|
||||
if s.deps.PocketTTS == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
|
||||
return
|
||||
@@ -788,7 +795,13 @@ func (s *Server) handleAudioStream(w http.ResponseWriter, r *http.Request) {
|
||||
audioStream, err = s.deps.Kokoro.StreamAudioWAV(r.Context(), text, voice)
|
||||
}
|
||||
} else {
|
||||
if pockettts.IsPocketTTSVoice(voice) {
|
||||
if cfai.IsCFAIVoice(voice) {
|
||||
if s.deps.CFAI == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
|
||||
return
|
||||
}
|
||||
audioStream, err = s.deps.CFAI.StreamAudioMP3(r.Context(), text, voice)
|
||||
} else if pockettts.IsPocketTTSVoice(voice) {
|
||||
if s.deps.PocketTTS == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
|
||||
return
|
||||
@@ -1343,6 +1356,9 @@ func (s *Server) handlePresignVoiceSample(w http.ResponseWriter, r *http.Request
|
||||
}
|
||||
|
||||
key := kokoro.VoiceSampleKey(voice)
|
||||
if cfai.IsCFAIVoice(voice) {
|
||||
key = cfai.VoiceSampleKey(voice)
|
||||
}
|
||||
|
||||
// Generate sample on demand when it is not in MinIO yet.
|
||||
if !s.deps.AudioStore.AudioExists(r.Context(), key) {
|
||||
@@ -1352,7 +1368,13 @@ func (s *Server) handlePresignVoiceSample(w http.ResponseWriter, r *http.Request
|
||||
mp3 []byte
|
||||
err error
|
||||
)
|
||||
if pockettts.IsPocketTTSVoice(voice) {
|
||||
if cfai.IsCFAIVoice(voice) {
|
||||
if s.deps.CFAI == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
|
||||
return
|
||||
}
|
||||
mp3, err = s.deps.CFAI.GenerateAudio(r.Context(), voiceSampleText, voice)
|
||||
} else if pockettts.IsPocketTTSVoice(voice) {
|
||||
if s.deps.PocketTTS == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
|
||||
return
|
||||
|
||||
@@ -30,6 +30,7 @@ import (
|
||||
|
||||
sentryhttp "github.com/getsentry/sentry-go/http"
|
||||
"github.com/libnovel/backend/internal/bookstore"
|
||||
"github.com/libnovel/backend/internal/cfai"
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
"github.com/libnovel/backend/internal/kokoro"
|
||||
"github.com/libnovel/backend/internal/meili"
|
||||
@@ -69,6 +70,9 @@ type Dependencies struct {
|
||||
// PocketTTS is the pocket-tts client (used for voice list only in the backend;
|
||||
// audio generation is done by the runner).
|
||||
PocketTTS pockettts.Client
|
||||
// CFAI is the Cloudflare Workers AI TTS client (used for voice sample
|
||||
// generation and audio-stream live TTS; audio task generation is done by the runner).
|
||||
CFAI cfai.Client
|
||||
// Log is the structured logger.
|
||||
Log *slog.Logger
|
||||
}
|
||||
@@ -338,6 +342,23 @@ func (s *Server) voices(ctx context.Context) []domain.Voice {
|
||||
}
|
||||
}
|
||||
|
||||
// ── Cloudflare AI voices ──────────────────────────────────────────────────
|
||||
if s.deps.CFAI != nil {
|
||||
for _, speaker := range cfai.Speakers() {
|
||||
gender := "m"
|
||||
if cfai.IsFemale(speaker) {
|
||||
gender = "f"
|
||||
}
|
||||
result = append(result, domain.Voice{
|
||||
ID: cfai.VoiceID(speaker),
|
||||
Engine: "cfai",
|
||||
Lang: "en",
|
||||
Gender: gender,
|
||||
})
|
||||
}
|
||||
s.deps.Log.Info("backend: loaded CF AI voices", "count", len(cfai.Speakers()))
|
||||
}
|
||||
|
||||
s.voiceMu.Lock()
|
||||
s.cachedVoices = result
|
||||
s.voiceMu.Unlock()
|
||||
|
||||
214
backend/internal/cfai/client.go
Normal file
214
backend/internal/cfai/client.go
Normal file
@@ -0,0 +1,214 @@
|
||||
// Package cfai provides a client for Cloudflare Workers AI Text-to-Speech models.
|
||||
//
|
||||
// The Cloudflare Workers AI REST API is used to run TTS models:
|
||||
//
|
||||
// POST https://api.cloudflare.com/client/v4/accounts/{accountID}/ai/run/{model}
|
||||
// Authorization: Bearer {apiToken}
|
||||
// Content-Type: application/json
|
||||
// { "text": "...", "speaker": "luna" }
|
||||
//
|
||||
// → 200 audio/mpeg — raw MP3 bytes
|
||||
//
|
||||
// Currently supported model: @cf/deepgram/aura-2-en (40 English speakers).
|
||||
// Voice IDs are prefixed with "cfai:" to distinguish them from Kokoro/pocket-tts
|
||||
// voices (e.g. "cfai:luna", "cfai:orion").
|
||||
//
|
||||
// The API is batch-only (no streaming), so GenerateAudio waits for the full
|
||||
// response. There is no 100-second Cloudflare proxy timeout because we are
|
||||
// calling the Cloudflare API directly, not routing through a Cloudflare-proxied
|
||||
// homelab tunnel.
|
||||
package cfai
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultModel is the Cloudflare Workers AI TTS model used by default.
|
||||
DefaultModel = "@cf/deepgram/aura-2-en"
|
||||
|
||||
// voicePrefix is the prefix used to namespace CF AI voice IDs.
|
||||
voicePrefix = "cfai:"
|
||||
)
|
||||
|
||||
// aura2Speakers is the exhaustive list of speakers supported by aura-2-en.
|
||||
var aura2Speakers = []string{
|
||||
"amalthea", "andromeda", "apollo", "arcas", "aries", "asteria",
|
||||
"athena", "atlas", "aurora", "callista", "cora", "cordelia",
|
||||
"delia", "draco", "electra", "harmonia", "helena", "hera",
|
||||
"hermes", "hyperion", "iris", "janus", "juno", "jupiter",
|
||||
"luna", "mars", "minerva", "neptune", "odysseus", "ophelia",
|
||||
"orion", "orpheus", "pandora", "phoebe", "pluto", "saturn",
|
||||
"thalia", "theia", "vesta", "zeus",
|
||||
}
|
||||
|
||||
// femaleSpeakers is the set of aura-2-en speaker names that are female voices.
|
||||
var femaleSpeakers = map[string]struct{}{
|
||||
"amalthea": {}, "andromeda": {}, "aries": {}, "asteria": {},
|
||||
"athena": {}, "aurora": {}, "callista": {}, "cora": {},
|
||||
"cordelia": {}, "delia": {}, "electra": {}, "harmonia": {},
|
||||
"helena": {}, "hera": {}, "iris": {}, "juno": {},
|
||||
"luna": {}, "minerva": {}, "ophelia": {}, "pandora": {},
|
||||
"phoebe": {}, "thalia": {}, "theia": {}, "vesta": {},
|
||||
}
|
||||
|
||||
// IsCFAIVoice reports whether voice is served by the Cloudflare AI client.
|
||||
// CF AI voices use the "cfai:" prefix, e.g. "cfai:luna".
|
||||
func IsCFAIVoice(voice string) bool {
|
||||
return strings.HasPrefix(voice, voicePrefix)
|
||||
}
|
||||
|
||||
// SpeakerName strips the "cfai:" prefix and returns the bare speaker name.
|
||||
// If voice is not a CF AI voice the original string is returned unchanged.
|
||||
func SpeakerName(voice string) string {
|
||||
return strings.TrimPrefix(voice, voicePrefix)
|
||||
}
|
||||
|
||||
// VoiceID returns the full voice ID (with prefix) for a bare speaker name.
|
||||
func VoiceID(speaker string) string {
|
||||
return voicePrefix + speaker
|
||||
}
|
||||
|
||||
// VoiceSampleKey returns the MinIO object key for a CF AI voice sample MP3.
|
||||
func VoiceSampleKey(voice string) string {
|
||||
safe := strings.Map(func(r rune) rune {
|
||||
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') ||
|
||||
(r >= '0' && r <= '9') || r == '_' || r == '-' {
|
||||
return r
|
||||
}
|
||||
return '_'
|
||||
}, voice)
|
||||
return fmt.Sprintf("_voice-samples/%s.mp3", safe)
|
||||
}
|
||||
|
||||
// IsFemale reports whether the given CF AI voice ID (with or without prefix)
|
||||
// is a female speaker.
|
||||
func IsFemale(voice string) bool {
|
||||
speaker := SpeakerName(voice)
|
||||
_, ok := femaleSpeakers[speaker]
|
||||
return ok
|
||||
}
|
||||
|
||||
// Speakers returns all available bare speaker names for aura-2-en.
|
||||
func Speakers() []string {
|
||||
out := make([]string, len(aura2Speakers))
|
||||
copy(out, aura2Speakers)
|
||||
return out
|
||||
}
|
||||
|
||||
// Client is the interface for interacting with Cloudflare Workers AI TTS.
|
||||
type Client interface {
|
||||
// GenerateAudio synthesises text using the given voice (e.g. "cfai:luna")
|
||||
// and returns raw MP3 bytes.
|
||||
GenerateAudio(ctx context.Context, text, voice string) ([]byte, error)
|
||||
|
||||
// StreamAudioMP3 is not natively supported by the CF AI batch API.
|
||||
// It buffers the full response and returns an io.ReadCloser over the bytes,
|
||||
// so callers can use it like a stream without special-casing.
|
||||
StreamAudioMP3(ctx context.Context, text, voice string) (io.ReadCloser, error)
|
||||
|
||||
// StreamAudioWAV is not natively supported; the CF AI model returns MP3.
|
||||
// This method returns the same MP3 bytes wrapped as an io.ReadCloser.
|
||||
StreamAudioWAV(ctx context.Context, text, voice string) (io.ReadCloser, error)
|
||||
|
||||
// ListVoices returns all available voice IDs (with the "cfai:" prefix).
|
||||
ListVoices(ctx context.Context) ([]string, error)
|
||||
}
|
||||
|
||||
// httpClient is the concrete CF AI HTTP client.
|
||||
type httpClient struct {
|
||||
accountID string
|
||||
apiToken string
|
||||
model string
|
||||
http *http.Client
|
||||
}
|
||||
|
||||
// New returns a Client for the given Cloudflare account and API token.
|
||||
// model defaults to DefaultModel when empty.
|
||||
func New(accountID, apiToken, model string) Client {
|
||||
if model == "" {
|
||||
model = DefaultModel
|
||||
}
|
||||
return &httpClient{
|
||||
accountID: accountID,
|
||||
apiToken: apiToken,
|
||||
model: model,
|
||||
http: &http.Client{Timeout: 5 * time.Minute},
|
||||
}
|
||||
}
|
||||
|
||||
// GenerateAudio calls the Cloudflare Workers AI TTS endpoint and returns MP3 bytes.
|
||||
func (c *httpClient) GenerateAudio(ctx context.Context, text, voice string) ([]byte, error) {
|
||||
if text == "" {
|
||||
return nil, fmt.Errorf("cfai: empty text")
|
||||
}
|
||||
speaker := SpeakerName(voice)
|
||||
if speaker == "" {
|
||||
speaker = "luna"
|
||||
}
|
||||
|
||||
body, err := json.Marshal(map[string]any{
|
||||
"text": text,
|
||||
"speaker": speaker,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cfai: marshal request: %w", err)
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("https://api.cloudflare.com/client/v4/accounts/%s/ai/run/%s",
|
||||
c.accountID, c.model)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cfai: build request: %w", err)
|
||||
}
|
||||
req.Header.Set("Authorization", "Bearer "+c.apiToken)
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := c.http.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cfai: request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("cfai: server returned %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
|
||||
}
|
||||
|
||||
mp3, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cfai: read response: %w", err)
|
||||
}
|
||||
return mp3, nil
|
||||
}
|
||||
|
||||
// StreamAudioMP3 generates audio and wraps the MP3 bytes as an io.ReadCloser.
|
||||
func (c *httpClient) StreamAudioMP3(ctx context.Context, text, voice string) (io.ReadCloser, error) {
|
||||
mp3, err := c.GenerateAudio(ctx, text, voice)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return io.NopCloser(bytes.NewReader(mp3)), nil
|
||||
}
|
||||
|
||||
// StreamAudioWAV generates audio (MP3) and wraps it as an io.ReadCloser.
|
||||
// Note: the CF AI aura-2-en model returns MP3 regardless of the method name.
|
||||
func (c *httpClient) StreamAudioWAV(ctx context.Context, text, voice string) (io.ReadCloser, error) {
|
||||
return c.StreamAudioMP3(ctx, text, voice)
|
||||
}
|
||||
|
||||
// ListVoices returns all available CF AI voice IDs (with the "cfai:" prefix).
|
||||
func (c *httpClient) ListVoices(_ context.Context) ([]string, error) {
|
||||
ids := make([]string, len(aura2Speakers))
|
||||
for i, s := range aura2Speakers {
|
||||
ids[i] = VoiceID(s)
|
||||
}
|
||||
return ids, nil
|
||||
}
|
||||
@@ -66,6 +66,18 @@ type PocketTTS struct {
|
||||
URL string
|
||||
}
|
||||
|
||||
// CFAI holds credentials for Cloudflare Workers AI TTS.
|
||||
type CFAI struct {
|
||||
// AccountID is the Cloudflare account ID.
|
||||
// An empty string disables CF AI generation.
|
||||
AccountID string
|
||||
// APIToken is a Workers AI API token with Workers AI Read+Edit permissions.
|
||||
APIToken string
|
||||
// Model is the Workers AI TTS model ID.
|
||||
// Defaults to "@cf/deepgram/aura-2-en" when empty.
|
||||
Model string
|
||||
}
|
||||
|
||||
// LibreTranslate holds connection settings for a self-hosted LibreTranslate instance.
|
||||
type LibreTranslate struct {
|
||||
// URL is the base URL of the LibreTranslate instance, e.g. https://translate.libnovel.cc
|
||||
@@ -153,6 +165,7 @@ type Config struct {
|
||||
MinIO MinIO
|
||||
Kokoro Kokoro
|
||||
PocketTTS PocketTTS
|
||||
CFAI CFAI
|
||||
LibreTranslate LibreTranslate
|
||||
HTTP HTTP
|
||||
Runner Runner
|
||||
@@ -203,6 +216,12 @@ func Load() Config {
|
||||
URL: envOr("POCKET_TTS_URL", ""),
|
||||
},
|
||||
|
||||
CFAI: CFAI{
|
||||
AccountID: envOr("CFAI_ACCOUNT_ID", ""),
|
||||
APIToken: envOr("CFAI_API_TOKEN", ""),
|
||||
Model: envOr("CFAI_TTS_MODEL", ""),
|
||||
},
|
||||
|
||||
LibreTranslate: LibreTranslate{
|
||||
URL: envOr("LIBRETRANSLATE_URL", ""),
|
||||
APIKey: envOr("LIBRETRANSLATE_API_KEY", ""),
|
||||
|
||||
@@ -27,6 +27,7 @@ import (
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
|
||||
"github.com/libnovel/backend/internal/bookstore"
|
||||
"github.com/libnovel/backend/internal/cfai"
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
"github.com/libnovel/backend/internal/kokoro"
|
||||
"github.com/libnovel/backend/internal/libretranslate"
|
||||
@@ -112,6 +113,9 @@ type Dependencies struct {
|
||||
// PocketTTS is the pocket-tts client (CPU, kyutai voices: alba, marius, etc.).
|
||||
// If nil, pocket-tts voice tasks will fail with a clear error.
|
||||
PocketTTS pockettts.Client
|
||||
// CFAI is the Cloudflare Workers AI TTS client (cfai:* prefixed voices).
|
||||
// If nil, CF AI voice tasks will fail with a clear error.
|
||||
CFAI cfai.Client
|
||||
// LibreTranslate is the machine translation client.
|
||||
// If nil, translation tasks will fail with a clear error.
|
||||
LibreTranslate libretranslate.Client
|
||||
@@ -555,6 +559,18 @@ func (r *Runner) runAudioTask(ctx context.Context, task domain.AudioTask) {
|
||||
return
|
||||
}
|
||||
log.Info("runner: audio generated via pocket-tts", "voice", task.Voice)
|
||||
} else if cfai.IsCFAIVoice(task.Voice) {
|
||||
if r.deps.CFAI == nil {
|
||||
fail("cloudflare AI client not configured (CFAI_ACCOUNT_ID/CFAI_API_TOKEN empty)")
|
||||
return
|
||||
}
|
||||
var genErr error
|
||||
audioData, genErr = r.deps.CFAI.GenerateAudio(ctx, text, task.Voice)
|
||||
if genErr != nil {
|
||||
fail(fmt.Sprintf("cfai generate: %v", genErr))
|
||||
return
|
||||
}
|
||||
log.Info("runner: audio generated via cloudflare AI", "voice", task.Voice)
|
||||
} else {
|
||||
if r.deps.Kokoro == nil {
|
||||
fail("kokoro client not configured (KOKORO_URL is empty)")
|
||||
|
||||
@@ -13,6 +13,11 @@
|
||||
# - RUNNER_SKIP_INITIAL_CATALOGUE_REFRESH=true
|
||||
# - REDIS_ADDR → rediss://redis.libnovel.cc:6380 (prod Redis via Caddy TLS proxy)
|
||||
# - LibreTranslate service for machine translation (internal network only)
|
||||
#
|
||||
# extra_hosts pins storage.libnovel.cc and pb.libnovel.cc to the prod server IP
|
||||
# (165.22.70.138) so that large PutObject uploads and PocketBase writes bypass
|
||||
# Cloudflare's 100-second proxy timeout entirely. TLS still terminates at Caddy
|
||||
# on prod; the TLS certificate is valid for the domain names so SNI works fine.
|
||||
|
||||
services:
|
||||
libretranslate:
|
||||
@@ -35,6 +40,12 @@ services:
|
||||
stop_grace_period: 135s
|
||||
depends_on:
|
||||
- libretranslate
|
||||
# Pin prod subdomains to the prod server IP to bypass Cloudflare's 100s
|
||||
# proxy timeout. Large MP3 PutObject uploads and PocketBase writes go
|
||||
# directly to Caddy on prod; TLS and SNI still work normally.
|
||||
extra_hosts:
|
||||
- "storage.libnovel.cc:165.22.70.138"
|
||||
- "pb.libnovel.cc:165.22.70.138"
|
||||
environment:
|
||||
# ── PocketBase ──────────────────────────────────────────────────────────
|
||||
POCKETBASE_URL: "https://pb.libnovel.cc"
|
||||
@@ -63,6 +74,10 @@ services:
|
||||
# ── Pocket TTS ──────────────────────────────────────────────────────────
|
||||
POCKET_TTS_URL: "${POCKET_TTS_URL}"
|
||||
|
||||
# ── Cloudflare Workers AI TTS ────────────────────────────────────────────
|
||||
CFAI_ACCOUNT_ID: "${CFAI_ACCOUNT_ID}"
|
||||
CFAI_API_TOKEN: "${CFAI_API_TOKEN}"
|
||||
|
||||
# ── LibreTranslate (internal Docker network) ────────────────────────────
|
||||
LIBRETRANSLATE_URL: "http://libretranslate:5000"
|
||||
LIBRETRANSLATE_API_KEY: "${LIBRETRANSLATE_API_KEY}"
|
||||
|
||||
@@ -86,6 +86,7 @@
|
||||
// ── Derived: voices grouped by engine ──────────────────────────────────
|
||||
const kokoroVoices = $derived(voices.filter((v) => v.engine === 'kokoro'));
|
||||
const pocketVoices = $derived(voices.filter((v) => v.engine === 'pocket-tts'));
|
||||
const cfaiVoices = $derived(voices.filter((v) => v.engine === 'cfai'));
|
||||
|
||||
// ── Voice selector state ────────────────────────────────────────────────
|
||||
let showVoicePanel = $state(false);
|
||||
@@ -98,6 +99,7 @@
|
||||
* Human-readable label for a voice.
|
||||
* Kokoro: "af_bella" → "Bella (US F)"
|
||||
* Pocket-TTS: "alba" → "Alba (EN F)"
|
||||
* CF AI: "cfai:luna" → "Luna (EN F)"
|
||||
* Falls back gracefully if called with a bare string (e.g. from the store default).
|
||||
*/
|
||||
function voiceLabel(v: Voice | string): string {
|
||||
@@ -110,6 +112,14 @@
|
||||
return kokoroLabelFromId(v);
|
||||
}
|
||||
|
||||
if (v.engine === 'cfai') {
|
||||
// "cfai:luna" → "Luna (EN F)"
|
||||
const speaker = v.id.startsWith('cfai:') ? v.id.slice(5) : v.id;
|
||||
const name = speaker.replace(/\b\w/g, (c) => c.toUpperCase());
|
||||
const genderLabel = v.gender.toUpperCase();
|
||||
return `${name} (EN ${genderLabel})`;
|
||||
}
|
||||
|
||||
if (v.engine === 'pocket-tts') {
|
||||
const langLabel = v.lang.toUpperCase().replace('-', '');
|
||||
const genderLabel = v.gender.toUpperCase();
|
||||
@@ -844,6 +854,16 @@
|
||||
{@render voiceRow(v)}
|
||||
{/each}
|
||||
{/if}
|
||||
|
||||
<!-- Cloudflare AI section -->
|
||||
{#if cfaiVoices.length > 0}
|
||||
<div class="px-3 py-1.5 bg-(--color-surface-2)/70 border-b border-(--color-border)/50 {kokoroVoices.length > 0 || pocketVoices.length > 0 ? 'border-t border-(--color-border)' : ''}">
|
||||
<span class="text-[10px] font-semibold text-(--color-muted) uppercase tracking-widest">Cloudflare AI</span>
|
||||
</div>
|
||||
{#each cfaiVoices as v (v.id)}
|
||||
{@render voiceRow(v)}
|
||||
{/each}
|
||||
{/if}
|
||||
</div>
|
||||
<div class="px-3 py-2 border-t border-(--color-border) bg-(--color-surface-2)/50">
|
||||
<p class="text-xs text-(--color-muted)">
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
export interface Voice {
|
||||
/** Voice identifier passed to TTS clients (e.g. "af_bella", "alba"). */
|
||||
id: string;
|
||||
/** TTS engine: "kokoro" | "pocket-tts". */
|
||||
/** TTS engine: "kokoro" | "pocket-tts" | "cfai". */
|
||||
engine: string;
|
||||
/** Primary language tag (e.g. "en-us", "en-gb", "en", "es", "fr"). */
|
||||
lang: string;
|
||||
|
||||
@@ -57,6 +57,12 @@
|
||||
return `${m}m ${s % 60}s`;
|
||||
}
|
||||
|
||||
function engineLabel(voice: string): string {
|
||||
if (voice.startsWith('cfai:')) return 'CF AI';
|
||||
if (!voice.includes('_')) return 'Pocket TTS';
|
||||
return 'Kokoro';
|
||||
}
|
||||
|
||||
// ── Audio jobs stats + filter ────────────────────────────────────────────────
|
||||
let jobsQ = $state('');
|
||||
let filteredJobs = $derived(
|
||||
@@ -160,6 +166,7 @@
|
||||
<th class="px-4 py-3 text-left">Book</th>
|
||||
<th class="px-4 py-3 text-right">Ch.</th>
|
||||
<th class="px-4 py-3 text-left">Voice</th>
|
||||
<th class="px-4 py-3 text-left">Engine</th>
|
||||
<th class="px-4 py-3 text-left">Status</th>
|
||||
<th class="px-4 py-3 text-left">Started</th>
|
||||
<th class="px-4 py-3 text-left">Duration</th>
|
||||
@@ -173,6 +180,7 @@
|
||||
</td>
|
||||
<td class="px-4 py-3 text-right text-(--color-muted)">{job.chapter}</td>
|
||||
<td class="px-4 py-3 text-(--color-muted) font-mono text-xs">{job.voice}</td>
|
||||
<td class="px-4 py-3 text-(--color-muted) text-xs">{engineLabel(job.voice)}</td>
|
||||
<td class="px-4 py-3">
|
||||
<span class="font-medium {jobStatusColor(job.status)}">{job.status}</span>
|
||||
</td>
|
||||
@@ -181,7 +189,7 @@
|
||||
</tr>
|
||||
{#if job.error_message}
|
||||
<tr class="bg-(--color-danger)/10">
|
||||
<td colspan="6" class="px-4 py-2 text-xs text-(--color-danger) font-mono">{job.error_message}</td>
|
||||
<td colspan="7" class="px-4 py-2 text-xs text-(--color-danger) font-mono">{job.error_message}</td>
|
||||
</tr>
|
||||
{/if}
|
||||
{/each}
|
||||
@@ -202,6 +210,7 @@
|
||||
<div class="grid grid-cols-2 gap-1 text-xs">
|
||||
<span class="text-(--color-muted)">Chapter</span><span class="text-(--color-muted) text-right">{job.chapter}</span>
|
||||
<span class="text-(--color-muted)">Voice</span><span class="text-(--color-muted) font-mono text-right truncate">{job.voice}</span>
|
||||
<span class="text-(--color-muted)">Engine</span><span class="text-(--color-muted) text-right">{engineLabel(job.voice)}</span>
|
||||
<span class="text-(--color-muted)">Started</span><span class="text-(--color-muted) text-right">{fmtDate(job.started)}</span>
|
||||
<span class="text-(--color-muted)">Duration</span><span class="text-(--color-muted) text-right">{duration(job.started, job.finished)}</span>
|
||||
</div>
|
||||
@@ -236,6 +245,7 @@
|
||||
<th class="px-4 py-3 text-left">Book</th>
|
||||
<th class="px-4 py-3 text-left">Chapter</th>
|
||||
<th class="px-4 py-3 text-left">Voice</th>
|
||||
<th class="px-4 py-3 text-left">Engine</th>
|
||||
<th class="px-4 py-3 text-left">Filename</th>
|
||||
<th class="px-4 py-3 text-left">Updated</th>
|
||||
</tr>
|
||||
@@ -249,6 +259,7 @@
|
||||
</td>
|
||||
<td class="px-4 py-3 text-(--color-muted)">{parts.chapter}</td>
|
||||
<td class="px-4 py-3 text-(--color-muted) font-mono text-xs">{parts.voice}</td>
|
||||
<td class="px-4 py-3 text-(--color-muted) text-xs">{engineLabel(parts.voice)}</td>
|
||||
<td class="px-4 py-3 text-(--color-muted) font-mono text-xs truncate max-w-[14rem]" title={entry.filename}>
|
||||
{entry.filename}
|
||||
</td>
|
||||
@@ -267,11 +278,12 @@
|
||||
<a href="/books/{parts.slug}" class="text-(--color-text) font-medium hover:text-(--color-brand) transition-colors block truncate">
|
||||
{parts.slug}
|
||||
</a>
|
||||
<div class="grid grid-cols-2 gap-1 text-xs">
|
||||
<span class="text-(--color-muted)">Chapter</span><span class="text-(--color-muted) text-right">{parts.chapter}</span>
|
||||
<span class="text-(--color-muted)">Voice</span><span class="text-(--color-muted) font-mono text-right truncate">{parts.voice}</span>
|
||||
<span class="text-(--color-muted)">Updated</span><span class="text-(--color-muted) text-right">{fmtDate(entry.updated)}</span>
|
||||
</div>
|
||||
<div class="grid grid-cols-2 gap-1 text-xs">
|
||||
<span class="text-(--color-muted)">Chapter</span><span class="text-(--color-muted) text-right">{parts.chapter}</span>
|
||||
<span class="text-(--color-muted)">Voice</span><span class="text-(--color-muted) font-mono text-right truncate">{parts.voice}</span>
|
||||
<span class="text-(--color-muted)">Engine</span><span class="text-(--color-muted) text-right">{engineLabel(parts.voice)}</span>
|
||||
<span class="text-(--color-muted)">Updated</span><span class="text-(--color-muted) text-right">{fmtDate(entry.updated)}</span>
|
||||
</div>
|
||||
{#if entry.filename}
|
||||
<p class="text-xs text-(--color-muted) font-mono truncate" title={entry.filename}>{entry.filename}</p>
|
||||
{/if}
|
||||
|
||||
@@ -93,6 +93,30 @@
|
||||
|
||||
const kokoroVoices = $derived(voices.filter((v) => v.engine === 'kokoro'));
|
||||
const pocketVoices = $derived(voices.filter((v) => v.engine === 'pocket-tts'));
|
||||
const cfaiVoices = $derived(voices.filter((v) => v.engine === 'cfai'));
|
||||
|
||||
function voiceLabel(v: Voice): string {
|
||||
if (v.engine === 'cfai') {
|
||||
const speaker = v.id.startsWith('cfai:') ? v.id.slice(5) : v.id;
|
||||
return speaker.replace(/\b\w/g, (c) => c.toUpperCase()) + (v.gender ? ` (EN ${v.gender.toUpperCase()})` : '');
|
||||
}
|
||||
if (v.engine === 'pocket-tts') {
|
||||
const name = v.id.replace(/_/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
|
||||
return name + (v.gender ? ` (EN ${v.gender.toUpperCase()})` : '');
|
||||
}
|
||||
// Kokoro: "af_bella" → "Bella (US F)"
|
||||
const langMap: Record<string, string> = {
|
||||
af: 'US', am: 'US', bf: 'UK', bm: 'UK',
|
||||
ef: 'ES', em: 'ES', ff: 'FR',
|
||||
hf: 'IN', hm: 'IN', 'if': 'IT', im: 'IT',
|
||||
jf: 'JP', jm: 'JP', pf: 'PT', pm: 'PT', zf: 'ZH', zm: 'ZH',
|
||||
};
|
||||
const prefix = v.id.slice(0, 2);
|
||||
const name = v.id.slice(3).replace(/^v0/, '').replace(/^([a-z])/, (c) => c.toUpperCase());
|
||||
const lang = langMap[prefix] ?? prefix.toUpperCase();
|
||||
const gender = v.gender ? v.gender.toUpperCase() : '?';
|
||||
return `${name} (${lang} ${gender})`;
|
||||
}
|
||||
|
||||
$effect(() => {
|
||||
fetch('/api/voices')
|
||||
@@ -492,12 +516,17 @@
|
||||
class="w-full bg-(--color-surface-3) border border-(--color-border) rounded-lg px-3 py-2 text-(--color-text) text-sm focus:outline-none focus:ring-2 focus:ring-(--color-brand)">
|
||||
{#if kokoroVoices.length > 0}
|
||||
<optgroup label="Kokoro (GPU)">
|
||||
{#each kokoroVoices as v}<option value={v.id}>{v.id}</option>{/each}
|
||||
{#each kokoroVoices as v}<option value={v.id}>{voiceLabel(v)}</option>{/each}
|
||||
</optgroup>
|
||||
{/if}
|
||||
{#if pocketVoices.length > 0}
|
||||
<optgroup label="Pocket TTS (CPU)">
|
||||
{#each pocketVoices as v}<option value={v.id}>{v.id}</option>{/each}
|
||||
{#each pocketVoices as v}<option value={v.id}>{voiceLabel(v)}</option>{/each}
|
||||
</optgroup>
|
||||
{/if}
|
||||
{#if cfaiVoices.length > 0}
|
||||
<optgroup label="Cloudflare AI">
|
||||
{#each cfaiVoices as v}<option value={v.id}>{voiceLabel(v)}</option>{/each}
|
||||
</optgroup>
|
||||
{/if}
|
||||
</select>
|
||||
|
||||
Reference in New Issue
Block a user