Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d1b7d3e36c | ||
|
|
aaa008ac99 |
@@ -26,6 +26,7 @@ import (
|
||||
"github.com/hibiken/asynq"
|
||||
"github.com/libnovel/backend/internal/asynqqueue"
|
||||
"github.com/libnovel/backend/internal/backend"
|
||||
"github.com/libnovel/backend/internal/cfai"
|
||||
"github.com/libnovel/backend/internal/config"
|
||||
"github.com/libnovel/backend/internal/kokoro"
|
||||
"github.com/libnovel/backend/internal/meili"
|
||||
@@ -114,6 +115,24 @@ func run() error {
|
||||
log.Info("POCKET_TTS_URL not set — pocket-tts voices unavailable in backend")
|
||||
}
|
||||
|
||||
// ── Cloudflare Workers AI (voice sample generation + audio-stream live TTS) ──
|
||||
var cfaiClient cfai.Client
|
||||
if cfg.CFAI.AccountID != "" && cfg.CFAI.APIToken != "" {
|
||||
cfaiClient = cfai.New(cfg.CFAI.AccountID, cfg.CFAI.APIToken, cfg.CFAI.Model)
|
||||
log.Info("cloudflare AI TTS enabled", "model", cfg.CFAI.Model)
|
||||
} else {
|
||||
log.Info("CFAI_ACCOUNT_ID/CFAI_API_TOKEN not set — CF AI voices unavailable in backend")
|
||||
}
|
||||
|
||||
// ── Cloudflare Workers AI Image Generation ────────────────────────────────
|
||||
var imageGenClient cfai.ImageGenClient
|
||||
if cfg.CFAI.AccountID != "" && cfg.CFAI.APIToken != "" {
|
||||
imageGenClient = cfai.NewImageGen(cfg.CFAI.AccountID, cfg.CFAI.APIToken)
|
||||
log.Info("cloudflare AI image generation enabled")
|
||||
} else {
|
||||
log.Info("CFAI_ACCOUNT_ID/CFAI_API_TOKEN not set — image generation unavailable")
|
||||
}
|
||||
|
||||
// ── Meilisearch (search reads only; indexing is the runner's job) ────────
|
||||
var searchIndex meili.Client
|
||||
if cfg.Meilisearch.URL != "" {
|
||||
@@ -163,6 +182,8 @@ func run() error {
|
||||
SearchIndex: searchIndex,
|
||||
Kokoro: kokoroClient,
|
||||
PocketTTS: pocketTTSClient,
|
||||
CFAI: cfaiClient,
|
||||
ImageGen: imageGenClient,
|
||||
Log: log,
|
||||
},
|
||||
)
|
||||
|
||||
@@ -23,6 +23,7 @@ import (
|
||||
"github.com/getsentry/sentry-go"
|
||||
"github.com/libnovel/backend/internal/asynqqueue"
|
||||
"github.com/libnovel/backend/internal/browser"
|
||||
"github.com/libnovel/backend/internal/cfai"
|
||||
"github.com/libnovel/backend/internal/config"
|
||||
"github.com/libnovel/backend/internal/kokoro"
|
||||
"github.com/libnovel/backend/internal/libretranslate"
|
||||
@@ -130,6 +131,15 @@ func run() error {
|
||||
log.Warn("POCKET_TTS_URL not set — pocket-tts voice tasks will fail")
|
||||
}
|
||||
|
||||
// ── Cloudflare Workers AI ────────────────────────────────────────────────
|
||||
var cfaiClient cfai.Client
|
||||
if cfg.CFAI.AccountID != "" && cfg.CFAI.APIToken != "" {
|
||||
cfaiClient = cfai.New(cfg.CFAI.AccountID, cfg.CFAI.APIToken, cfg.CFAI.Model)
|
||||
log.Info("cloudflare AI TTS enabled", "model", cfg.CFAI.Model)
|
||||
} else {
|
||||
log.Info("CFAI_ACCOUNT_ID/CFAI_API_TOKEN not set — CF AI voice tasks will fail")
|
||||
}
|
||||
|
||||
// ── LibreTranslate ──────────────────────────────────────────────────────
|
||||
ltClient := libretranslate.New(cfg.LibreTranslate.URL, cfg.LibreTranslate.APIKey)
|
||||
if ltClient != nil {
|
||||
@@ -191,6 +201,7 @@ func run() error {
|
||||
Novel: novel,
|
||||
Kokoro: kokoroClient,
|
||||
PocketTTS: pocketTTSClient,
|
||||
CFAI: cfaiClient,
|
||||
LibreTranslate: ltClient,
|
||||
Log: log,
|
||||
}
|
||||
|
||||
@@ -44,6 +44,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/backend/internal/cfai"
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
"github.com/libnovel/backend/internal/kokoro"
|
||||
"github.com/libnovel/backend/internal/meili"
|
||||
@@ -774,7 +775,13 @@ func (s *Server) handleAudioStream(w http.ResponseWriter, r *http.Request) {
|
||||
// Open the TTS stream (WAV or MP3 depending on format param).
|
||||
var audioStream io.ReadCloser
|
||||
if format == "wav" {
|
||||
if pockettts.IsPocketTTSVoice(voice) {
|
||||
if cfai.IsCFAIVoice(voice) {
|
||||
if s.deps.CFAI == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
|
||||
return
|
||||
}
|
||||
audioStream, err = s.deps.CFAI.StreamAudioWAV(r.Context(), text, voice)
|
||||
} else if pockettts.IsPocketTTSVoice(voice) {
|
||||
if s.deps.PocketTTS == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
|
||||
return
|
||||
@@ -788,7 +795,13 @@ func (s *Server) handleAudioStream(w http.ResponseWriter, r *http.Request) {
|
||||
audioStream, err = s.deps.Kokoro.StreamAudioWAV(r.Context(), text, voice)
|
||||
}
|
||||
} else {
|
||||
if pockettts.IsPocketTTSVoice(voice) {
|
||||
if cfai.IsCFAIVoice(voice) {
|
||||
if s.deps.CFAI == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
|
||||
return
|
||||
}
|
||||
audioStream, err = s.deps.CFAI.StreamAudioMP3(r.Context(), text, voice)
|
||||
} else if pockettts.IsPocketTTSVoice(voice) {
|
||||
if s.deps.PocketTTS == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
|
||||
return
|
||||
@@ -1343,6 +1356,9 @@ func (s *Server) handlePresignVoiceSample(w http.ResponseWriter, r *http.Request
|
||||
}
|
||||
|
||||
key := kokoro.VoiceSampleKey(voice)
|
||||
if cfai.IsCFAIVoice(voice) {
|
||||
key = cfai.VoiceSampleKey(voice)
|
||||
}
|
||||
|
||||
// Generate sample on demand when it is not in MinIO yet.
|
||||
if !s.deps.AudioStore.AudioExists(r.Context(), key) {
|
||||
@@ -1352,7 +1368,13 @@ func (s *Server) handlePresignVoiceSample(w http.ResponseWriter, r *http.Request
|
||||
mp3 []byte
|
||||
err error
|
||||
)
|
||||
if pockettts.IsPocketTTSVoice(voice) {
|
||||
if cfai.IsCFAIVoice(voice) {
|
||||
if s.deps.CFAI == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
|
||||
return
|
||||
}
|
||||
mp3, err = s.deps.CFAI.GenerateAudio(r.Context(), voiceSampleText, voice)
|
||||
} else if pockettts.IsPocketTTSVoice(voice) {
|
||||
if s.deps.PocketTTS == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
|
||||
return
|
||||
|
||||
234
backend/internal/backend/handlers_image.go
Normal file
234
backend/internal/backend/handlers_image.go
Normal file
@@ -0,0 +1,234 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/libnovel/backend/internal/cfai"
|
||||
)
|
||||
|
||||
// handleAdminImageGenModels handles GET /api/admin/image-gen/models.
|
||||
// Returns the list of supported Cloudflare AI image generation models.
|
||||
func (s *Server) handleAdminImageGenModels(w http.ResponseWriter, r *http.Request) {
|
||||
if s.deps.ImageGen == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "image generation not configured (CFAI_ACCOUNT_ID/CFAI_API_TOKEN missing)")
|
||||
return
|
||||
}
|
||||
models := s.deps.ImageGen.Models()
|
||||
writeJSON(w, 0, map[string]any{"models": models})
|
||||
}
|
||||
|
||||
// imageGenRequest is the JSON body for POST /api/admin/image-gen.
|
||||
type imageGenRequest struct {
|
||||
// Prompt is the text description of the desired image.
|
||||
Prompt string `json:"prompt"`
|
||||
|
||||
// Model is the CF Workers AI model ID (e.g. "@cf/black-forest-labs/flux-2-dev").
|
||||
// Defaults to the recommended model for the given type.
|
||||
Model string `json:"model"`
|
||||
|
||||
// Type is either "cover" or "chapter".
|
||||
Type string `json:"type"`
|
||||
|
||||
// Slug is the book slug. Required for cover; required for chapter.
|
||||
Slug string `json:"slug"`
|
||||
|
||||
// Chapter number (1-based). Required when type == "chapter".
|
||||
Chapter int `json:"chapter"`
|
||||
|
||||
// ReferenceImageB64 is an optional base64-encoded PNG/JPEG reference image.
|
||||
// When present the img2img path is used.
|
||||
ReferenceImageB64 string `json:"reference_image_b64"`
|
||||
|
||||
// NumSteps overrides inference steps (default 20).
|
||||
NumSteps int `json:"num_steps"`
|
||||
|
||||
// Width / Height override output dimensions (0 = model default).
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
|
||||
// Guidance overrides prompt guidance scale (0 = model default).
|
||||
Guidance float64 `json:"guidance"`
|
||||
|
||||
// Strength for img2img: 0.0–1.0, default 0.75.
|
||||
Strength float64 `json:"strength"`
|
||||
|
||||
// SaveToCover when true stores the result as the book cover in MinIO
|
||||
// (overwriting any existing cover) and sets the book's cover URL.
|
||||
// Only valid when type == "cover".
|
||||
SaveToCover bool `json:"save_to_cover"`
|
||||
}
|
||||
|
||||
// imageGenResponse is the JSON body returned by POST /api/admin/image-gen.
|
||||
type imageGenResponse struct {
|
||||
// ImageB64 is the generated image as a base64-encoded PNG string.
|
||||
ImageB64 string `json:"image_b64"`
|
||||
// ContentType is "image/png" or "image/jpeg".
|
||||
ContentType string `json:"content_type"`
|
||||
// Saved indicates whether the image was persisted to MinIO.
|
||||
Saved bool `json:"saved"`
|
||||
// CoverURL is the URL the cover is now served from (only set when Saved==true).
|
||||
CoverURL string `json:"cover_url,omitempty"`
|
||||
// Model is the model that was used.
|
||||
Model string `json:"model"`
|
||||
// Bytes is the raw image size in bytes.
|
||||
Bytes int `json:"bytes"`
|
||||
}
|
||||
|
||||
// handleAdminImageGen handles POST /api/admin/image-gen.
|
||||
//
|
||||
// Generates an image using Cloudflare Workers AI and optionally stores it.
|
||||
// Multipart/form-data is also accepted so the reference image can be uploaded
|
||||
// directly; otherwise the reference is expected as base64 JSON.
|
||||
func (s *Server) handleAdminImageGen(w http.ResponseWriter, r *http.Request) {
|
||||
if s.deps.ImageGen == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "image generation not configured (CFAI_ACCOUNT_ID/CFAI_API_TOKEN missing)")
|
||||
return
|
||||
}
|
||||
|
||||
var req imageGenRequest
|
||||
var refImageData []byte
|
||||
|
||||
ct := r.Header.Get("Content-Type")
|
||||
if strings.HasPrefix(ct, "multipart/form-data") {
|
||||
// Multipart: parse JSON fields from a "json" part + optional "reference" file part.
|
||||
if err := r.ParseMultipartForm(32 << 20); err != nil {
|
||||
jsonError(w, http.StatusBadRequest, "parse multipart: "+err.Error())
|
||||
return
|
||||
}
|
||||
if jsonPart := r.FormValue("json"); jsonPart != "" {
|
||||
if err := json.Unmarshal([]byte(jsonPart), &req); err != nil {
|
||||
jsonError(w, http.StatusBadRequest, "parse json field: "+err.Error())
|
||||
return
|
||||
}
|
||||
}
|
||||
if f, _, err := r.FormFile("reference"); err == nil {
|
||||
defer f.Close()
|
||||
refImageData, _ = io.ReadAll(f)
|
||||
}
|
||||
} else {
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
jsonError(w, http.StatusBadRequest, "parse body: "+err.Error())
|
||||
return
|
||||
}
|
||||
if req.ReferenceImageB64 != "" {
|
||||
var decErr error
|
||||
refImageData, decErr = base64.StdEncoding.DecodeString(req.ReferenceImageB64)
|
||||
if decErr != nil {
|
||||
// Try std without padding
|
||||
refImageData, decErr = base64.RawStdEncoding.DecodeString(req.ReferenceImageB64)
|
||||
if decErr != nil {
|
||||
jsonError(w, http.StatusBadRequest, "decode reference_image_b64: "+decErr.Error())
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if strings.TrimSpace(req.Prompt) == "" {
|
||||
jsonError(w, http.StatusBadRequest, "prompt is required")
|
||||
return
|
||||
}
|
||||
if req.Type != "cover" && req.Type != "chapter" {
|
||||
jsonError(w, http.StatusBadRequest, `type must be "cover" or "chapter"`)
|
||||
return
|
||||
}
|
||||
if req.Slug == "" {
|
||||
jsonError(w, http.StatusBadRequest, "slug is required")
|
||||
return
|
||||
}
|
||||
if req.Type == "chapter" && req.Chapter <= 0 {
|
||||
jsonError(w, http.StatusBadRequest, "chapter must be > 0 when type is chapter")
|
||||
return
|
||||
}
|
||||
|
||||
// Resolve model
|
||||
model := cfai.ImageModel(req.Model)
|
||||
if model == "" {
|
||||
if req.Type == "cover" {
|
||||
model = cfai.DefaultImageModel
|
||||
} else {
|
||||
model = cfai.ImageModelFlux2Klein4B
|
||||
}
|
||||
}
|
||||
|
||||
imgReq := cfai.ImageRequest{
|
||||
Prompt: req.Prompt,
|
||||
Model: model,
|
||||
NumSteps: req.NumSteps,
|
||||
Width: req.Width,
|
||||
Height: req.Height,
|
||||
Guidance: req.Guidance,
|
||||
Strength: req.Strength,
|
||||
}
|
||||
|
||||
s.deps.Log.Info("admin: image gen requested",
|
||||
"type", req.Type, "slug", req.Slug, "chapter", req.Chapter,
|
||||
"model", model, "has_reference", len(refImageData) > 0)
|
||||
|
||||
var imgData []byte
|
||||
var genErr error
|
||||
if len(refImageData) > 0 {
|
||||
imgData, genErr = s.deps.ImageGen.GenerateImageFromReference(r.Context(), imgReq, refImageData)
|
||||
} else {
|
||||
imgData, genErr = s.deps.ImageGen.GenerateImage(r.Context(), imgReq)
|
||||
}
|
||||
if genErr != nil {
|
||||
s.deps.Log.Error("admin: image gen failed", "err", genErr)
|
||||
jsonError(w, http.StatusBadGateway, "image generation failed: "+genErr.Error())
|
||||
return
|
||||
}
|
||||
|
||||
contentType := sniffImageContentType(imgData)
|
||||
|
||||
// ── Optional persistence ──────────────────────────────────────────────────
|
||||
var saved bool
|
||||
var coverURL string
|
||||
|
||||
if req.SaveToCover && req.Type == "cover" && s.deps.CoverStore != nil {
|
||||
if err := s.deps.CoverStore.PutCover(r.Context(), req.Slug, imgData, contentType); err != nil {
|
||||
s.deps.Log.Error("admin: save generated cover failed", "slug", req.Slug, "err", err)
|
||||
// Non-fatal: still return the image
|
||||
} else {
|
||||
saved = true
|
||||
coverURL = fmt.Sprintf("/api/cover/local/%s", req.Slug)
|
||||
s.deps.Log.Info("admin: generated cover saved", "slug", req.Slug, "bytes", len(imgData))
|
||||
}
|
||||
}
|
||||
|
||||
// Encode result as base64
|
||||
b64 := base64.StdEncoding.EncodeToString(imgData)
|
||||
|
||||
writeJSON(w, 0, imageGenResponse{
|
||||
ImageB64: b64,
|
||||
ContentType: contentType,
|
||||
Saved: saved,
|
||||
CoverURL: coverURL,
|
||||
Model: string(model),
|
||||
Bytes: len(imgData),
|
||||
})
|
||||
}
|
||||
|
||||
// sniffImageContentType returns the MIME type of the image bytes.
|
||||
func sniffImageContentType(data []byte) string {
|
||||
if len(data) >= 4 {
|
||||
// PNG: 0x89 P N G
|
||||
if data[0] == 0x89 && data[1] == 0x50 && data[2] == 0x4e && data[3] == 0x47 {
|
||||
return "image/png"
|
||||
}
|
||||
// JPEG: FF D8 FF
|
||||
if data[0] == 0xFF && data[1] == 0xD8 && data[2] == 0xFF {
|
||||
return "image/jpeg"
|
||||
}
|
||||
// WebP: RIFF....WEBP
|
||||
if len(data) >= 12 && data[0] == 'R' && data[1] == 'I' && data[2] == 'F' && data[3] == 'F' &&
|
||||
data[8] == 'W' && data[9] == 'E' && data[10] == 'B' && data[11] == 'P' {
|
||||
return "image/webp"
|
||||
}
|
||||
}
|
||||
return "image/png"
|
||||
}
|
||||
@@ -30,6 +30,7 @@ import (
|
||||
|
||||
sentryhttp "github.com/getsentry/sentry-go/http"
|
||||
"github.com/libnovel/backend/internal/bookstore"
|
||||
"github.com/libnovel/backend/internal/cfai"
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
"github.com/libnovel/backend/internal/kokoro"
|
||||
"github.com/libnovel/backend/internal/meili"
|
||||
@@ -69,6 +70,12 @@ type Dependencies struct {
|
||||
// PocketTTS is the pocket-tts client (used for voice list only in the backend;
|
||||
// audio generation is done by the runner).
|
||||
PocketTTS pockettts.Client
|
||||
// CFAI is the Cloudflare Workers AI TTS client (used for voice sample
|
||||
// generation and audio-stream live TTS; audio task generation is done by the runner).
|
||||
CFAI cfai.Client
|
||||
// ImageGen is the Cloudflare Workers AI image generation client.
|
||||
// If nil, image generation endpoints return 503.
|
||||
ImageGen cfai.ImageGenClient
|
||||
// Log is the structured logger.
|
||||
Log *slog.Logger
|
||||
}
|
||||
@@ -179,6 +186,10 @@ func (s *Server) ListenAndServe(ctx context.Context) error {
|
||||
mux.HandleFunc("POST /api/admin/audio/bulk", s.handleAdminAudioBulk)
|
||||
mux.HandleFunc("POST /api/admin/audio/cancel-bulk", s.handleAdminAudioCancelBulk)
|
||||
|
||||
// Admin image generation endpoints
|
||||
mux.HandleFunc("GET /api/admin/image-gen/models", s.handleAdminImageGenModels)
|
||||
mux.HandleFunc("POST /api/admin/image-gen", s.handleAdminImageGen)
|
||||
|
||||
// Voices list
|
||||
mux.HandleFunc("GET /api/voices", s.handleVoices)
|
||||
|
||||
@@ -338,6 +349,23 @@ func (s *Server) voices(ctx context.Context) []domain.Voice {
|
||||
}
|
||||
}
|
||||
|
||||
// ── Cloudflare AI voices ──────────────────────────────────────────────────
|
||||
if s.deps.CFAI != nil {
|
||||
for _, speaker := range cfai.Speakers() {
|
||||
gender := "m"
|
||||
if cfai.IsFemale(speaker) {
|
||||
gender = "f"
|
||||
}
|
||||
result = append(result, domain.Voice{
|
||||
ID: cfai.VoiceID(speaker),
|
||||
Engine: "cfai",
|
||||
Lang: "en",
|
||||
Gender: gender,
|
||||
})
|
||||
}
|
||||
s.deps.Log.Info("backend: loaded CF AI voices", "count", len(cfai.Speakers()))
|
||||
}
|
||||
|
||||
s.voiceMu.Lock()
|
||||
s.cachedVoices = result
|
||||
s.voiceMu.Unlock()
|
||||
|
||||
214
backend/internal/cfai/client.go
Normal file
214
backend/internal/cfai/client.go
Normal file
@@ -0,0 +1,214 @@
|
||||
// Package cfai provides a client for Cloudflare Workers AI Text-to-Speech models.
|
||||
//
|
||||
// The Cloudflare Workers AI REST API is used to run TTS models:
|
||||
//
|
||||
// POST https://api.cloudflare.com/client/v4/accounts/{accountID}/ai/run/{model}
|
||||
// Authorization: Bearer {apiToken}
|
||||
// Content-Type: application/json
|
||||
// { "text": "...", "speaker": "luna" }
|
||||
//
|
||||
// → 200 audio/mpeg — raw MP3 bytes
|
||||
//
|
||||
// Currently supported model: @cf/deepgram/aura-2-en (40 English speakers).
|
||||
// Voice IDs are prefixed with "cfai:" to distinguish them from Kokoro/pocket-tts
|
||||
// voices (e.g. "cfai:luna", "cfai:orion").
|
||||
//
|
||||
// The API is batch-only (no streaming), so GenerateAudio waits for the full
|
||||
// response. There is no 100-second Cloudflare proxy timeout because we are
|
||||
// calling the Cloudflare API directly, not routing through a Cloudflare-proxied
|
||||
// homelab tunnel.
|
||||
package cfai
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultModel is the Cloudflare Workers AI TTS model used by default.
|
||||
DefaultModel = "@cf/deepgram/aura-2-en"
|
||||
|
||||
// voicePrefix is the prefix used to namespace CF AI voice IDs.
|
||||
voicePrefix = "cfai:"
|
||||
)
|
||||
|
||||
// aura2Speakers is the exhaustive list of speakers supported by aura-2-en.
|
||||
var aura2Speakers = []string{
|
||||
"amalthea", "andromeda", "apollo", "arcas", "aries", "asteria",
|
||||
"athena", "atlas", "aurora", "callista", "cora", "cordelia",
|
||||
"delia", "draco", "electra", "harmonia", "helena", "hera",
|
||||
"hermes", "hyperion", "iris", "janus", "juno", "jupiter",
|
||||
"luna", "mars", "minerva", "neptune", "odysseus", "ophelia",
|
||||
"orion", "orpheus", "pandora", "phoebe", "pluto", "saturn",
|
||||
"thalia", "theia", "vesta", "zeus",
|
||||
}
|
||||
|
||||
// femaleSpeakers is the set of aura-2-en speaker names that are female voices.
|
||||
var femaleSpeakers = map[string]struct{}{
|
||||
"amalthea": {}, "andromeda": {}, "aries": {}, "asteria": {},
|
||||
"athena": {}, "aurora": {}, "callista": {}, "cora": {},
|
||||
"cordelia": {}, "delia": {}, "electra": {}, "harmonia": {},
|
||||
"helena": {}, "hera": {}, "iris": {}, "juno": {},
|
||||
"luna": {}, "minerva": {}, "ophelia": {}, "pandora": {},
|
||||
"phoebe": {}, "thalia": {}, "theia": {}, "vesta": {},
|
||||
}
|
||||
|
||||
// IsCFAIVoice reports whether voice is served by the Cloudflare AI client.
|
||||
// CF AI voices use the "cfai:" prefix, e.g. "cfai:luna".
|
||||
func IsCFAIVoice(voice string) bool {
|
||||
return strings.HasPrefix(voice, voicePrefix)
|
||||
}
|
||||
|
||||
// SpeakerName strips the "cfai:" prefix and returns the bare speaker name.
|
||||
// If voice is not a CF AI voice the original string is returned unchanged.
|
||||
func SpeakerName(voice string) string {
|
||||
return strings.TrimPrefix(voice, voicePrefix)
|
||||
}
|
||||
|
||||
// VoiceID returns the full voice ID (with prefix) for a bare speaker name.
|
||||
func VoiceID(speaker string) string {
|
||||
return voicePrefix + speaker
|
||||
}
|
||||
|
||||
// VoiceSampleKey returns the MinIO object key for a CF AI voice sample MP3.
|
||||
func VoiceSampleKey(voice string) string {
|
||||
safe := strings.Map(func(r rune) rune {
|
||||
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') ||
|
||||
(r >= '0' && r <= '9') || r == '_' || r == '-' {
|
||||
return r
|
||||
}
|
||||
return '_'
|
||||
}, voice)
|
||||
return fmt.Sprintf("_voice-samples/%s.mp3", safe)
|
||||
}
|
||||
|
||||
// IsFemale reports whether the given CF AI voice ID (with or without prefix)
|
||||
// is a female speaker.
|
||||
func IsFemale(voice string) bool {
|
||||
speaker := SpeakerName(voice)
|
||||
_, ok := femaleSpeakers[speaker]
|
||||
return ok
|
||||
}
|
||||
|
||||
// Speakers returns all available bare speaker names for aura-2-en.
|
||||
func Speakers() []string {
|
||||
out := make([]string, len(aura2Speakers))
|
||||
copy(out, aura2Speakers)
|
||||
return out
|
||||
}
|
||||
|
||||
// Client is the interface for interacting with Cloudflare Workers AI TTS.
|
||||
type Client interface {
|
||||
// GenerateAudio synthesises text using the given voice (e.g. "cfai:luna")
|
||||
// and returns raw MP3 bytes.
|
||||
GenerateAudio(ctx context.Context, text, voice string) ([]byte, error)
|
||||
|
||||
// StreamAudioMP3 is not natively supported by the CF AI batch API.
|
||||
// It buffers the full response and returns an io.ReadCloser over the bytes,
|
||||
// so callers can use it like a stream without special-casing.
|
||||
StreamAudioMP3(ctx context.Context, text, voice string) (io.ReadCloser, error)
|
||||
|
||||
// StreamAudioWAV is not natively supported; the CF AI model returns MP3.
|
||||
// This method returns the same MP3 bytes wrapped as an io.ReadCloser.
|
||||
StreamAudioWAV(ctx context.Context, text, voice string) (io.ReadCloser, error)
|
||||
|
||||
// ListVoices returns all available voice IDs (with the "cfai:" prefix).
|
||||
ListVoices(ctx context.Context) ([]string, error)
|
||||
}
|
||||
|
||||
// httpClient is the concrete CF AI HTTP client.
|
||||
type httpClient struct {
|
||||
accountID string
|
||||
apiToken string
|
||||
model string
|
||||
http *http.Client
|
||||
}
|
||||
|
||||
// New returns a Client for the given Cloudflare account and API token.
|
||||
// model defaults to DefaultModel when empty.
|
||||
func New(accountID, apiToken, model string) Client {
|
||||
if model == "" {
|
||||
model = DefaultModel
|
||||
}
|
||||
return &httpClient{
|
||||
accountID: accountID,
|
||||
apiToken: apiToken,
|
||||
model: model,
|
||||
http: &http.Client{Timeout: 5 * time.Minute},
|
||||
}
|
||||
}
|
||||
|
||||
// GenerateAudio calls the Cloudflare Workers AI TTS endpoint and returns MP3 bytes.
|
||||
func (c *httpClient) GenerateAudio(ctx context.Context, text, voice string) ([]byte, error) {
|
||||
if text == "" {
|
||||
return nil, fmt.Errorf("cfai: empty text")
|
||||
}
|
||||
speaker := SpeakerName(voice)
|
||||
if speaker == "" {
|
||||
speaker = "luna"
|
||||
}
|
||||
|
||||
body, err := json.Marshal(map[string]any{
|
||||
"text": text,
|
||||
"speaker": speaker,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cfai: marshal request: %w", err)
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("https://api.cloudflare.com/client/v4/accounts/%s/ai/run/%s",
|
||||
c.accountID, c.model)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cfai: build request: %w", err)
|
||||
}
|
||||
req.Header.Set("Authorization", "Bearer "+c.apiToken)
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := c.http.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cfai: request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return nil, fmt.Errorf("cfai: server returned %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
|
||||
}
|
||||
|
||||
mp3, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cfai: read response: %w", err)
|
||||
}
|
||||
return mp3, nil
|
||||
}
|
||||
|
||||
// StreamAudioMP3 generates audio and wraps the MP3 bytes as an io.ReadCloser.
|
||||
func (c *httpClient) StreamAudioMP3(ctx context.Context, text, voice string) (io.ReadCloser, error) {
|
||||
mp3, err := c.GenerateAudio(ctx, text, voice)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return io.NopCloser(bytes.NewReader(mp3)), nil
|
||||
}
|
||||
|
||||
// StreamAudioWAV generates audio (MP3) and wraps it as an io.ReadCloser.
|
||||
// Note: the CF AI aura-2-en model returns MP3 regardless of the method name.
|
||||
func (c *httpClient) StreamAudioWAV(ctx context.Context, text, voice string) (io.ReadCloser, error) {
|
||||
return c.StreamAudioMP3(ctx, text, voice)
|
||||
}
|
||||
|
||||
// ListVoices returns all available CF AI voice IDs (with the "cfai:" prefix).
|
||||
func (c *httpClient) ListVoices(_ context.Context) ([]string, error) {
|
||||
ids := make([]string, len(aura2Speakers))
|
||||
for i, s := range aura2Speakers {
|
||||
ids[i] = VoiceID(s)
|
||||
}
|
||||
return ids, nil
|
||||
}
|
||||
312
backend/internal/cfai/image.go
Normal file
312
backend/internal/cfai/image.go
Normal file
@@ -0,0 +1,312 @@
|
||||
// Image generation via Cloudflare Workers AI text-to-image models.
|
||||
//
|
||||
// API reference:
|
||||
//
|
||||
// POST https://api.cloudflare.com/client/v4/accounts/{accountID}/ai/run/{model}
|
||||
// Authorization: Bearer {apiToken}
|
||||
// Content-Type: application/json
|
||||
//
|
||||
// Text-only request (all models):
|
||||
//
|
||||
// { "prompt": "...", "num_steps": 20 }
|
||||
//
|
||||
// Reference-image request:
|
||||
// - FLUX models: { "prompt": "...", "image_b64": "<base64>" }
|
||||
// - SD img2img: { "prompt": "...", "image": [r,g,b,a,...], "strength": 0.75 }
|
||||
//
|
||||
// All models return raw PNG bytes on success (Content-Type: image/png).
|
||||
//
|
||||
// Recommended models for LibNovel:
|
||||
// - Book covers (no reference): flux-2-dev, flux-2-klein-9b, lucid-origin
|
||||
// - Chapter images (speed): flux-2-klein-4b, flux-1-schnell
|
||||
// - With reference image: flux-2-dev, flux-2-klein-9b, sd-v1-5-img2img
|
||||
package cfai
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"image"
|
||||
_ "image/jpeg" // register JPEG decoder
|
||||
_ "image/png" // register PNG decoder
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ImageModel identifies a Cloudflare Workers AI text-to-image model.
|
||||
type ImageModel string
|
||||
|
||||
const (
|
||||
// ImageModelFlux2Dev — best quality, multi-reference. Recommended for covers.
|
||||
ImageModelFlux2Dev ImageModel = "@cf/black-forest-labs/flux-2-dev"
|
||||
// ImageModelFlux2Klein9B — 9B params, multi-reference. Good for covers.
|
||||
ImageModelFlux2Klein9B ImageModel = "@cf/black-forest-labs/flux-2-klein-9b"
|
||||
// ImageModelFlux2Klein4B — ultra-fast, unified gen+edit. Recommended for chapters.
|
||||
ImageModelFlux2Klein4B ImageModel = "@cf/black-forest-labs/flux-2-klein-4b"
|
||||
// ImageModelFlux1Schnell — fastest, text-only. Good for quick illustrations.
|
||||
ImageModelFlux1Schnell ImageModel = "@cf/black-forest-labs/flux-1-schnell"
|
||||
// ImageModelSDXLLightning — fast 1024px generation.
|
||||
ImageModelSDXLLightning ImageModel = "@cf/bytedance/stable-diffusion-xl-lightning"
|
||||
// ImageModelSD15Img2Img — explicit img2img with flat RGBA reference.
|
||||
ImageModelSD15Img2Img ImageModel = "@cf/runwayml/stable-diffusion-v1-5-img2img"
|
||||
// ImageModelSDXLBase — Stability AI SDXL base.
|
||||
ImageModelSDXLBase ImageModel = "@cf/stabilityai/stable-diffusion-xl-base-1.0"
|
||||
// ImageModelLucidOrigin — Leonardo AI; strong prompt adherence.
|
||||
ImageModelLucidOrigin ImageModel = "@cf/leonardo/lucid-origin"
|
||||
// ImageModelPhoenix10 — Leonardo AI; accurate text rendering.
|
||||
ImageModelPhoenix10 ImageModel = "@cf/leonardo/phoenix-1.0"
|
||||
|
||||
// DefaultImageModel is the default model for book-cover generation.
|
||||
DefaultImageModel = ImageModelFlux2Dev
|
||||
)
|
||||
|
||||
// ImageModelInfo describes a single image generation model.
|
||||
type ImageModelInfo struct {
|
||||
ID string `json:"id"`
|
||||
Label string `json:"label"`
|
||||
Provider string `json:"provider"`
|
||||
SupportsRef bool `json:"supports_ref"`
|
||||
RecommendedFor []string `json:"recommended_for"` // "cover" and/or "chapter"
|
||||
Description string `json:"description"`
|
||||
}
|
||||
|
||||
// AllImageModels returns metadata about every supported image model.
|
||||
func AllImageModels() []ImageModelInfo {
|
||||
return []ImageModelInfo{
|
||||
{
|
||||
ID: string(ImageModelFlux2Dev), Label: "FLUX.2 Dev", Provider: "Black Forest Labs",
|
||||
SupportsRef: true, RecommendedFor: []string{"cover"},
|
||||
Description: "Best quality; multi-reference editing. Recommended for book covers.",
|
||||
},
|
||||
{
|
||||
ID: string(ImageModelFlux2Klein9B), Label: "FLUX.2 Klein 9B", Provider: "Black Forest Labs",
|
||||
SupportsRef: true, RecommendedFor: []string{"cover"},
|
||||
Description: "9B parameters with multi-reference support.",
|
||||
},
|
||||
{
|
||||
ID: string(ImageModelFlux2Klein4B), Label: "FLUX.2 Klein 4B", Provider: "Black Forest Labs",
|
||||
SupportsRef: true, RecommendedFor: []string{"chapter"},
|
||||
Description: "Ultra-fast unified gen+edit. Recommended for chapter images.",
|
||||
},
|
||||
{
|
||||
ID: string(ImageModelFlux1Schnell), Label: "FLUX.1 Schnell", Provider: "Black Forest Labs",
|
||||
SupportsRef: false, RecommendedFor: []string{"chapter"},
|
||||
Description: "Fastest inference. Good for quick chapter illustrations.",
|
||||
},
|
||||
{
|
||||
ID: string(ImageModelSDXLLightning), Label: "SDXL Lightning", Provider: "ByteDance",
|
||||
SupportsRef: false, RecommendedFor: []string{"chapter"},
|
||||
Description: "Lightning-fast 1024px images in a few steps.",
|
||||
},
|
||||
{
|
||||
ID: string(ImageModelSD15Img2Img), Label: "SD 1.5 img2img", Provider: "RunwayML",
|
||||
SupportsRef: true, RecommendedFor: []string{"cover", "chapter"},
|
||||
Description: "Explicit img2img: generates from a reference image + prompt.",
|
||||
},
|
||||
{
|
||||
ID: string(ImageModelSDXLBase), Label: "SDXL Base 1.0", Provider: "Stability AI",
|
||||
SupportsRef: false, RecommendedFor: []string{"cover"},
|
||||
Description: "Stable Diffusion XL base model.",
|
||||
},
|
||||
{
|
||||
ID: string(ImageModelLucidOrigin), Label: "Lucid Origin", Provider: "Leonardo AI",
|
||||
SupportsRef: false, RecommendedFor: []string{"cover"},
|
||||
Description: "Highly prompt-responsive; strong graphic design and HD renders.",
|
||||
},
|
||||
{
|
||||
ID: string(ImageModelPhoenix10), Label: "Phoenix 1.0", Provider: "Leonardo AI",
|
||||
SupportsRef: false, RecommendedFor: []string{"cover"},
|
||||
Description: "Exceptional prompt adherence; accurate text rendering.",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ImageRequest is the input to GenerateImage / GenerateImageFromReference.
|
||||
type ImageRequest struct {
|
||||
// Prompt is the text description of the desired image.
|
||||
Prompt string
|
||||
// Model is the CF Workers AI model. Defaults to DefaultImageModel when empty.
|
||||
Model ImageModel
|
||||
// NumSteps controls inference quality (default 20). Range: 1–20.
|
||||
NumSteps int
|
||||
// Width and Height in pixels. 0 = model default (typically 1024x1024).
|
||||
Width, Height int
|
||||
// Guidance controls prompt adherence (default 7.5).
|
||||
Guidance float64
|
||||
// Strength for img2img: 0.0 = copy reference, 1.0 = ignore reference (default 0.75).
|
||||
Strength float64
|
||||
}
|
||||
|
||||
// ImageGenClient generates images via Cloudflare Workers AI.
|
||||
type ImageGenClient interface {
|
||||
// GenerateImage creates an image from a text prompt only.
|
||||
// Returns raw PNG bytes.
|
||||
GenerateImage(ctx context.Context, req ImageRequest) ([]byte, error)
|
||||
|
||||
// GenerateImageFromReference creates an image from a text prompt + reference image.
|
||||
// refImage should be PNG or JPEG bytes. Returns raw PNG bytes.
|
||||
GenerateImageFromReference(ctx context.Context, req ImageRequest, refImage []byte) ([]byte, error)
|
||||
|
||||
// Models returns metadata about all supported image models.
|
||||
Models() []ImageModelInfo
|
||||
}
|
||||
|
||||
// imageGenHTTPClient is the concrete CF AI image generation client.
|
||||
type imageGenHTTPClient struct {
|
||||
accountID string
|
||||
apiToken string
|
||||
http *http.Client
|
||||
}
|
||||
|
||||
// NewImageGen returns an ImageGenClient for the given Cloudflare account.
|
||||
func NewImageGen(accountID, apiToken string) ImageGenClient {
|
||||
return &imageGenHTTPClient{
|
||||
accountID: accountID,
|
||||
apiToken: apiToken,
|
||||
http: &http.Client{Timeout: 5 * time.Minute},
|
||||
}
|
||||
}
|
||||
|
||||
// GenerateImage generates an image from text only.
|
||||
func (c *imageGenHTTPClient) GenerateImage(ctx context.Context, req ImageRequest) ([]byte, error) {
|
||||
req = applyImageDefaults(req)
|
||||
body := map[string]any{
|
||||
"prompt": req.Prompt,
|
||||
"num_steps": req.NumSteps,
|
||||
}
|
||||
if req.Width > 0 {
|
||||
body["width"] = req.Width
|
||||
}
|
||||
if req.Height > 0 {
|
||||
body["height"] = req.Height
|
||||
}
|
||||
if req.Guidance > 0 {
|
||||
body["guidance"] = req.Guidance
|
||||
}
|
||||
return c.callImageAPI(ctx, req.Model, body)
|
||||
}
|
||||
|
||||
// GenerateImageFromReference generates an image from a text prompt + reference image.
|
||||
func (c *imageGenHTTPClient) GenerateImageFromReference(ctx context.Context, req ImageRequest, refImage []byte) ([]byte, error) {
|
||||
if len(refImage) == 0 {
|
||||
return c.GenerateImage(ctx, req)
|
||||
}
|
||||
req = applyImageDefaults(req)
|
||||
|
||||
var body map[string]any
|
||||
if req.Model == ImageModelSD15Img2Img {
|
||||
pixels, err := decodeImageToRGBA(refImage)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cfai/image: decode reference: %w", err)
|
||||
}
|
||||
strength := req.Strength
|
||||
if strength <= 0 {
|
||||
strength = 0.75
|
||||
}
|
||||
body = map[string]any{
|
||||
"prompt": req.Prompt,
|
||||
"image": pixels,
|
||||
"strength": strength,
|
||||
"num_steps": req.NumSteps,
|
||||
}
|
||||
} else {
|
||||
b64 := base64.StdEncoding.EncodeToString(refImage)
|
||||
body = map[string]any{
|
||||
"prompt": req.Prompt,
|
||||
"image_b64": b64,
|
||||
"num_steps": req.NumSteps,
|
||||
}
|
||||
if req.Strength > 0 {
|
||||
body["strength"] = req.Strength
|
||||
}
|
||||
}
|
||||
if req.Width > 0 {
|
||||
body["width"] = req.Width
|
||||
}
|
||||
if req.Height > 0 {
|
||||
body["height"] = req.Height
|
||||
}
|
||||
if req.Guidance > 0 {
|
||||
body["guidance"] = req.Guidance
|
||||
}
|
||||
return c.callImageAPI(ctx, req.Model, body)
|
||||
}
|
||||
|
||||
// Models returns all supported image model metadata.
|
||||
func (c *imageGenHTTPClient) Models() []ImageModelInfo {
|
||||
return AllImageModels()
|
||||
}
|
||||
|
||||
func (c *imageGenHTTPClient) callImageAPI(ctx context.Context, model ImageModel, body map[string]any) ([]byte, error) {
|
||||
encoded, err := json.Marshal(body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cfai/image: marshal: %w", err)
|
||||
}
|
||||
url := fmt.Sprintf("https://api.cloudflare.com/client/v4/accounts/%s/ai/run/%s",
|
||||
c.accountID, string(model))
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(encoded))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cfai/image: build request: %w", err)
|
||||
}
|
||||
req.Header.Set("Authorization", "Bearer "+c.apiToken)
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
resp, err := c.http.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cfai/image: http: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
errBody, _ := io.ReadAll(resp.Body)
|
||||
msg := string(errBody)
|
||||
if len(msg) > 300 {
|
||||
msg = msg[:300]
|
||||
}
|
||||
return nil, fmt.Errorf("cfai/image: model %s returned %d: %s", model, resp.StatusCode, msg)
|
||||
}
|
||||
|
||||
data, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cfai/image: read response: %w", err)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func applyImageDefaults(req ImageRequest) ImageRequest {
|
||||
if req.Model == "" {
|
||||
req.Model = DefaultImageModel
|
||||
}
|
||||
if req.NumSteps <= 0 {
|
||||
req.NumSteps = 20
|
||||
}
|
||||
return req
|
||||
}
|
||||
|
||||
// decodeImageToRGBA decodes PNG/JPEG bytes to a flat []uint8 RGBA pixel array
|
||||
// required by the stable-diffusion-v1-5-img2img model.
|
||||
func decodeImageToRGBA(data []byte) ([]uint8, error) {
|
||||
img, _, err := image.Decode(bytes.NewReader(data))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("decode image: %w", err)
|
||||
}
|
||||
bounds := img.Bounds()
|
||||
w := bounds.Max.X - bounds.Min.X
|
||||
h := bounds.Max.Y - bounds.Min.Y
|
||||
pixels := make([]uint8, w*h*4)
|
||||
idx := 0
|
||||
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
|
||||
for x := bounds.Min.X; x < bounds.Max.X; x++ {
|
||||
r, g, b, a := img.At(x, y).RGBA()
|
||||
pixels[idx] = uint8(r >> 8)
|
||||
pixels[idx+1] = uint8(g >> 8)
|
||||
pixels[idx+2] = uint8(b >> 8)
|
||||
pixels[idx+3] = uint8(a >> 8)
|
||||
idx += 4
|
||||
}
|
||||
}
|
||||
return pixels, nil
|
||||
}
|
||||
@@ -66,6 +66,18 @@ type PocketTTS struct {
|
||||
URL string
|
||||
}
|
||||
|
||||
// CFAI holds credentials for Cloudflare Workers AI TTS.
|
||||
type CFAI struct {
|
||||
// AccountID is the Cloudflare account ID.
|
||||
// An empty string disables CF AI generation.
|
||||
AccountID string
|
||||
// APIToken is a Workers AI API token with Workers AI Read+Edit permissions.
|
||||
APIToken string
|
||||
// Model is the Workers AI TTS model ID.
|
||||
// Defaults to "@cf/deepgram/aura-2-en" when empty.
|
||||
Model string
|
||||
}
|
||||
|
||||
// LibreTranslate holds connection settings for a self-hosted LibreTranslate instance.
|
||||
type LibreTranslate struct {
|
||||
// URL is the base URL of the LibreTranslate instance, e.g. https://translate.libnovel.cc
|
||||
@@ -153,6 +165,7 @@ type Config struct {
|
||||
MinIO MinIO
|
||||
Kokoro Kokoro
|
||||
PocketTTS PocketTTS
|
||||
CFAI CFAI
|
||||
LibreTranslate LibreTranslate
|
||||
HTTP HTTP
|
||||
Runner Runner
|
||||
@@ -203,6 +216,12 @@ func Load() Config {
|
||||
URL: envOr("POCKET_TTS_URL", ""),
|
||||
},
|
||||
|
||||
CFAI: CFAI{
|
||||
AccountID: envOr("CFAI_ACCOUNT_ID", ""),
|
||||
APIToken: envOr("CFAI_API_TOKEN", ""),
|
||||
Model: envOr("CFAI_TTS_MODEL", ""),
|
||||
},
|
||||
|
||||
LibreTranslate: LibreTranslate{
|
||||
URL: envOr("LIBRETRANSLATE_URL", ""),
|
||||
APIKey: envOr("LIBRETRANSLATE_API_KEY", ""),
|
||||
|
||||
@@ -27,6 +27,7 @@ import (
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
|
||||
"github.com/libnovel/backend/internal/bookstore"
|
||||
"github.com/libnovel/backend/internal/cfai"
|
||||
"github.com/libnovel/backend/internal/domain"
|
||||
"github.com/libnovel/backend/internal/kokoro"
|
||||
"github.com/libnovel/backend/internal/libretranslate"
|
||||
@@ -112,6 +113,9 @@ type Dependencies struct {
|
||||
// PocketTTS is the pocket-tts client (CPU, kyutai voices: alba, marius, etc.).
|
||||
// If nil, pocket-tts voice tasks will fail with a clear error.
|
||||
PocketTTS pockettts.Client
|
||||
// CFAI is the Cloudflare Workers AI TTS client (cfai:* prefixed voices).
|
||||
// If nil, CF AI voice tasks will fail with a clear error.
|
||||
CFAI cfai.Client
|
||||
// LibreTranslate is the machine translation client.
|
||||
// If nil, translation tasks will fail with a clear error.
|
||||
LibreTranslate libretranslate.Client
|
||||
@@ -555,6 +559,18 @@ func (r *Runner) runAudioTask(ctx context.Context, task domain.AudioTask) {
|
||||
return
|
||||
}
|
||||
log.Info("runner: audio generated via pocket-tts", "voice", task.Voice)
|
||||
} else if cfai.IsCFAIVoice(task.Voice) {
|
||||
if r.deps.CFAI == nil {
|
||||
fail("cloudflare AI client not configured (CFAI_ACCOUNT_ID/CFAI_API_TOKEN empty)")
|
||||
return
|
||||
}
|
||||
var genErr error
|
||||
audioData, genErr = r.deps.CFAI.GenerateAudio(ctx, text, task.Voice)
|
||||
if genErr != nil {
|
||||
fail(fmt.Sprintf("cfai generate: %v", genErr))
|
||||
return
|
||||
}
|
||||
log.Info("runner: audio generated via cloudflare AI", "voice", task.Voice)
|
||||
} else {
|
||||
if r.deps.Kokoro == nil {
|
||||
fail("kokoro client not configured (KOKORO_URL is empty)")
|
||||
|
||||
@@ -13,6 +13,11 @@
|
||||
# - RUNNER_SKIP_INITIAL_CATALOGUE_REFRESH=true
|
||||
# - REDIS_ADDR → rediss://redis.libnovel.cc:6380 (prod Redis via Caddy TLS proxy)
|
||||
# - LibreTranslate service for machine translation (internal network only)
|
||||
#
|
||||
# extra_hosts pins storage.libnovel.cc and pb.libnovel.cc to the prod server IP
|
||||
# (165.22.70.138) so that large PutObject uploads and PocketBase writes bypass
|
||||
# Cloudflare's 100-second proxy timeout entirely. TLS still terminates at Caddy
|
||||
# on prod; the TLS certificate is valid for the domain names so SNI works fine.
|
||||
|
||||
services:
|
||||
libretranslate:
|
||||
@@ -35,6 +40,12 @@ services:
|
||||
stop_grace_period: 135s
|
||||
depends_on:
|
||||
- libretranslate
|
||||
# Pin prod subdomains to the prod server IP to bypass Cloudflare's 100s
|
||||
# proxy timeout. Large MP3 PutObject uploads and PocketBase writes go
|
||||
# directly to Caddy on prod; TLS and SNI still work normally.
|
||||
extra_hosts:
|
||||
- "storage.libnovel.cc:165.22.70.138"
|
||||
- "pb.libnovel.cc:165.22.70.138"
|
||||
environment:
|
||||
# ── PocketBase ──────────────────────────────────────────────────────────
|
||||
POCKETBASE_URL: "https://pb.libnovel.cc"
|
||||
@@ -63,6 +74,10 @@ services:
|
||||
# ── Pocket TTS ──────────────────────────────────────────────────────────
|
||||
POCKET_TTS_URL: "${POCKET_TTS_URL}"
|
||||
|
||||
# ── Cloudflare Workers AI TTS ────────────────────────────────────────────
|
||||
CFAI_ACCOUNT_ID: "${CFAI_ACCOUNT_ID}"
|
||||
CFAI_API_TOKEN: "${CFAI_API_TOKEN}"
|
||||
|
||||
# ── LibreTranslate (internal Docker network) ────────────────────────────
|
||||
LIBRETRANSLATE_URL: "http://libretranslate:5000"
|
||||
LIBRETRANSLATE_API_KEY: "${LIBRETRANSLATE_API_KEY}"
|
||||
|
||||
@@ -362,6 +362,7 @@
|
||||
"admin_nav_audio": "Audio",
|
||||
"admin_nav_translation": "Translation",
|
||||
"admin_nav_changelog": "Changelog",
|
||||
"admin_nav_image_gen": "Image Gen",
|
||||
"admin_nav_feedback": "Feedback",
|
||||
"admin_nav_errors": "Errors",
|
||||
"admin_nav_analytics": "Analytics",
|
||||
|
||||
@@ -362,6 +362,7 @@
|
||||
"admin_nav_audio": "Audio",
|
||||
"admin_nav_translation": "Traduction",
|
||||
"admin_nav_changelog": "Modifications",
|
||||
"admin_nav_image_gen": "Image Gen",
|
||||
"admin_nav_feedback": "Retours",
|
||||
"admin_nav_errors": "Erreurs",
|
||||
"admin_nav_analytics": "Analytique",
|
||||
|
||||
@@ -362,6 +362,7 @@
|
||||
"admin_nav_audio": "Audio",
|
||||
"admin_nav_translation": "Terjemahan",
|
||||
"admin_nav_changelog": "Perubahan",
|
||||
"admin_nav_image_gen": "Image Gen",
|
||||
"admin_nav_feedback": "Masukan",
|
||||
"admin_nav_errors": "Kesalahan",
|
||||
"admin_nav_analytics": "Analitik",
|
||||
|
||||
@@ -362,6 +362,7 @@
|
||||
"admin_nav_audio": "Áudio",
|
||||
"admin_nav_translation": "Tradução",
|
||||
"admin_nav_changelog": "Alterações",
|
||||
"admin_nav_image_gen": "Image Gen",
|
||||
"admin_nav_feedback": "Feedback",
|
||||
"admin_nav_errors": "Erros",
|
||||
"admin_nav_analytics": "Análise",
|
||||
|
||||
@@ -362,6 +362,7 @@
|
||||
"admin_nav_audio": "Аудио",
|
||||
"admin_nav_translation": "Перевод",
|
||||
"admin_nav_changelog": "Изменения",
|
||||
"admin_nav_image_gen": "Image Gen",
|
||||
"admin_nav_feedback": "Отзывы",
|
||||
"admin_nav_errors": "Ошибки",
|
||||
"admin_nav_analytics": "Аналитика",
|
||||
|
||||
@@ -86,6 +86,7 @@
|
||||
// ── Derived: voices grouped by engine ──────────────────────────────────
|
||||
const kokoroVoices = $derived(voices.filter((v) => v.engine === 'kokoro'));
|
||||
const pocketVoices = $derived(voices.filter((v) => v.engine === 'pocket-tts'));
|
||||
const cfaiVoices = $derived(voices.filter((v) => v.engine === 'cfai'));
|
||||
|
||||
// ── Voice selector state ────────────────────────────────────────────────
|
||||
let showVoicePanel = $state(false);
|
||||
@@ -98,6 +99,7 @@
|
||||
* Human-readable label for a voice.
|
||||
* Kokoro: "af_bella" → "Bella (US F)"
|
||||
* Pocket-TTS: "alba" → "Alba (EN F)"
|
||||
* CF AI: "cfai:luna" → "Luna (EN F)"
|
||||
* Falls back gracefully if called with a bare string (e.g. from the store default).
|
||||
*/
|
||||
function voiceLabel(v: Voice | string): string {
|
||||
@@ -110,6 +112,14 @@
|
||||
return kokoroLabelFromId(v);
|
||||
}
|
||||
|
||||
if (v.engine === 'cfai') {
|
||||
// "cfai:luna" → "Luna (EN F)"
|
||||
const speaker = v.id.startsWith('cfai:') ? v.id.slice(5) : v.id;
|
||||
const name = speaker.replace(/\b\w/g, (c) => c.toUpperCase());
|
||||
const genderLabel = v.gender.toUpperCase();
|
||||
return `${name} (EN ${genderLabel})`;
|
||||
}
|
||||
|
||||
if (v.engine === 'pocket-tts') {
|
||||
const langLabel = v.lang.toUpperCase().replace('-', '');
|
||||
const genderLabel = v.gender.toUpperCase();
|
||||
@@ -844,6 +854,16 @@
|
||||
{@render voiceRow(v)}
|
||||
{/each}
|
||||
{/if}
|
||||
|
||||
<!-- Cloudflare AI section -->
|
||||
{#if cfaiVoices.length > 0}
|
||||
<div class="px-3 py-1.5 bg-(--color-surface-2)/70 border-b border-(--color-border)/50 {kokoroVoices.length > 0 || pocketVoices.length > 0 ? 'border-t border-(--color-border)' : ''}">
|
||||
<span class="text-[10px] font-semibold text-(--color-muted) uppercase tracking-widest">Cloudflare AI</span>
|
||||
</div>
|
||||
{#each cfaiVoices as v (v.id)}
|
||||
{@render voiceRow(v)}
|
||||
{/each}
|
||||
{/if}
|
||||
</div>
|
||||
<div class="px-3 py-2 border-t border-(--color-border) bg-(--color-surface-2)/50">
|
||||
<p class="text-xs text-(--color-muted)">
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
export interface Voice {
|
||||
/** Voice identifier passed to TTS clients (e.g. "af_bella", "alba"). */
|
||||
id: string;
|
||||
/** TTS engine: "kokoro" | "pocket-tts". */
|
||||
/** TTS engine: "kokoro" | "pocket-tts" | "cfai". */
|
||||
engine: string;
|
||||
/** Primary language tag (e.g. "en-us", "en-gb", "en", "es", "fr"). */
|
||||
lang: string;
|
||||
|
||||
@@ -6,7 +6,8 @@
|
||||
{ href: '/admin/scrape', label: () => m.admin_nav_scrape() },
|
||||
{ href: '/admin/audio', label: () => m.admin_nav_audio() },
|
||||
{ href: '/admin/translation', label: () => m.admin_nav_translation() },
|
||||
{ href: '/admin/changelog', label: () => m.admin_nav_changelog() }
|
||||
{ href: '/admin/changelog', label: () => m.admin_nav_changelog() },
|
||||
{ href: '/admin/image-gen', label: () => m.admin_nav_image_gen() }
|
||||
];
|
||||
|
||||
const externalLinks = [
|
||||
|
||||
@@ -57,6 +57,12 @@
|
||||
return `${m}m ${s % 60}s`;
|
||||
}
|
||||
|
||||
function engineLabel(voice: string): string {
|
||||
if (voice.startsWith('cfai:')) return 'CF AI';
|
||||
if (!voice.includes('_')) return 'Pocket TTS';
|
||||
return 'Kokoro';
|
||||
}
|
||||
|
||||
// ── Audio jobs stats + filter ────────────────────────────────────────────────
|
||||
let jobsQ = $state('');
|
||||
let filteredJobs = $derived(
|
||||
@@ -160,6 +166,7 @@
|
||||
<th class="px-4 py-3 text-left">Book</th>
|
||||
<th class="px-4 py-3 text-right">Ch.</th>
|
||||
<th class="px-4 py-3 text-left">Voice</th>
|
||||
<th class="px-4 py-3 text-left">Engine</th>
|
||||
<th class="px-4 py-3 text-left">Status</th>
|
||||
<th class="px-4 py-3 text-left">Started</th>
|
||||
<th class="px-4 py-3 text-left">Duration</th>
|
||||
@@ -173,6 +180,7 @@
|
||||
</td>
|
||||
<td class="px-4 py-3 text-right text-(--color-muted)">{job.chapter}</td>
|
||||
<td class="px-4 py-3 text-(--color-muted) font-mono text-xs">{job.voice}</td>
|
||||
<td class="px-4 py-3 text-(--color-muted) text-xs">{engineLabel(job.voice)}</td>
|
||||
<td class="px-4 py-3">
|
||||
<span class="font-medium {jobStatusColor(job.status)}">{job.status}</span>
|
||||
</td>
|
||||
@@ -181,7 +189,7 @@
|
||||
</tr>
|
||||
{#if job.error_message}
|
||||
<tr class="bg-(--color-danger)/10">
|
||||
<td colspan="6" class="px-4 py-2 text-xs text-(--color-danger) font-mono">{job.error_message}</td>
|
||||
<td colspan="7" class="px-4 py-2 text-xs text-(--color-danger) font-mono">{job.error_message}</td>
|
||||
</tr>
|
||||
{/if}
|
||||
{/each}
|
||||
@@ -202,6 +210,7 @@
|
||||
<div class="grid grid-cols-2 gap-1 text-xs">
|
||||
<span class="text-(--color-muted)">Chapter</span><span class="text-(--color-muted) text-right">{job.chapter}</span>
|
||||
<span class="text-(--color-muted)">Voice</span><span class="text-(--color-muted) font-mono text-right truncate">{job.voice}</span>
|
||||
<span class="text-(--color-muted)">Engine</span><span class="text-(--color-muted) text-right">{engineLabel(job.voice)}</span>
|
||||
<span class="text-(--color-muted)">Started</span><span class="text-(--color-muted) text-right">{fmtDate(job.started)}</span>
|
||||
<span class="text-(--color-muted)">Duration</span><span class="text-(--color-muted) text-right">{duration(job.started, job.finished)}</span>
|
||||
</div>
|
||||
@@ -236,6 +245,7 @@
|
||||
<th class="px-4 py-3 text-left">Book</th>
|
||||
<th class="px-4 py-3 text-left">Chapter</th>
|
||||
<th class="px-4 py-3 text-left">Voice</th>
|
||||
<th class="px-4 py-3 text-left">Engine</th>
|
||||
<th class="px-4 py-3 text-left">Filename</th>
|
||||
<th class="px-4 py-3 text-left">Updated</th>
|
||||
</tr>
|
||||
@@ -249,6 +259,7 @@
|
||||
</td>
|
||||
<td class="px-4 py-3 text-(--color-muted)">{parts.chapter}</td>
|
||||
<td class="px-4 py-3 text-(--color-muted) font-mono text-xs">{parts.voice}</td>
|
||||
<td class="px-4 py-3 text-(--color-muted) text-xs">{engineLabel(parts.voice)}</td>
|
||||
<td class="px-4 py-3 text-(--color-muted) font-mono text-xs truncate max-w-[14rem]" title={entry.filename}>
|
||||
{entry.filename}
|
||||
</td>
|
||||
@@ -267,11 +278,12 @@
|
||||
<a href="/books/{parts.slug}" class="text-(--color-text) font-medium hover:text-(--color-brand) transition-colors block truncate">
|
||||
{parts.slug}
|
||||
</a>
|
||||
<div class="grid grid-cols-2 gap-1 text-xs">
|
||||
<span class="text-(--color-muted)">Chapter</span><span class="text-(--color-muted) text-right">{parts.chapter}</span>
|
||||
<span class="text-(--color-muted)">Voice</span><span class="text-(--color-muted) font-mono text-right truncate">{parts.voice}</span>
|
||||
<span class="text-(--color-muted)">Updated</span><span class="text-(--color-muted) text-right">{fmtDate(entry.updated)}</span>
|
||||
</div>
|
||||
<div class="grid grid-cols-2 gap-1 text-xs">
|
||||
<span class="text-(--color-muted)">Chapter</span><span class="text-(--color-muted) text-right">{parts.chapter}</span>
|
||||
<span class="text-(--color-muted)">Voice</span><span class="text-(--color-muted) font-mono text-right truncate">{parts.voice}</span>
|
||||
<span class="text-(--color-muted)">Engine</span><span class="text-(--color-muted) text-right">{engineLabel(parts.voice)}</span>
|
||||
<span class="text-(--color-muted)">Updated</span><span class="text-(--color-muted) text-right">{fmtDate(entry.updated)}</span>
|
||||
</div>
|
||||
{#if entry.filename}
|
||||
<p class="text-xs text-(--color-muted) font-mono truncate" title={entry.filename}>{entry.filename}</p>
|
||||
{/if}
|
||||
|
||||
28
ui/src/routes/admin/image-gen/+page.server.ts
Normal file
28
ui/src/routes/admin/image-gen/+page.server.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import type { PageServerLoad } from './$types';
|
||||
import { backendFetch } from '$lib/server/scraper';
|
||||
import { log } from '$lib/server/logger';
|
||||
|
||||
export interface ImageModelInfo {
|
||||
id: string;
|
||||
label: string;
|
||||
provider: string;
|
||||
supports_ref: boolean;
|
||||
recommended_for: string[]; // "cover" | "chapter"
|
||||
description: string;
|
||||
}
|
||||
|
||||
export const load: PageServerLoad = async () => {
|
||||
// parent layout already guards admin role
|
||||
try {
|
||||
const res = await backendFetch('/api/admin/image-gen/models');
|
||||
if (!res.ok) {
|
||||
log.warn('admin/image-gen', 'failed to load models', { status: res.status });
|
||||
return { models: [] as ImageModelInfo[] };
|
||||
}
|
||||
const data = await res.json();
|
||||
return { models: (data.models ?? []) as ImageModelInfo[] };
|
||||
} catch (e) {
|
||||
log.warn('admin/image-gen', 'backend unreachable', { err: String(e) });
|
||||
return { models: [] as ImageModelInfo[] };
|
||||
}
|
||||
};
|
||||
676
ui/src/routes/admin/image-gen/+page.svelte
Normal file
676
ui/src/routes/admin/image-gen/+page.svelte
Normal file
@@ -0,0 +1,676 @@
|
||||
<script lang="ts">
|
||||
import type { PageData } from './$types';
|
||||
import type { ImageModelInfo } from './+page.server';
|
||||
|
||||
let { data }: { data: PageData } = $props();
|
||||
|
||||
// ── Form state ───────────────────────────────────────────────────────────────
|
||||
type ImageType = 'cover' | 'chapter';
|
||||
let imageType = $state<ImageType>('cover');
|
||||
let slug = $state('');
|
||||
let chapter = $state<number>(1);
|
||||
let selectedModel = $state('');
|
||||
let prompt = $state('');
|
||||
let referenceFile = $state<File | null>(null);
|
||||
let referencePreviewUrl = $state('');
|
||||
|
||||
// Advanced
|
||||
let showAdvanced = $state(false);
|
||||
let numSteps = $state(20);
|
||||
let guidance = $state(7.5);
|
||||
let strength = $state(0.75);
|
||||
let width = $state(1024);
|
||||
let height = $state(1024);
|
||||
|
||||
// ── Generation state ─────────────────────────────────────────────────────────
|
||||
let generating = $state(false);
|
||||
let genError = $state('');
|
||||
let elapsedMs = $state(0);
|
||||
let elapsedInterval: ReturnType<typeof setInterval> | null = null;
|
||||
|
||||
// ── Result state ─────────────────────────────────────────────────────────────
|
||||
interface GenResult {
|
||||
imageSrc: string;
|
||||
model: string;
|
||||
bytes: number;
|
||||
contentType: string;
|
||||
saved: boolean;
|
||||
coverUrl: string;
|
||||
elapsedMs: number;
|
||||
slug: string;
|
||||
imageType: ImageType;
|
||||
chapter: number;
|
||||
}
|
||||
|
||||
let result = $state<GenResult | null>(null);
|
||||
let history = $state<GenResult[]>([]);
|
||||
|
||||
let saving = $state(false);
|
||||
let saveError = $state('');
|
||||
let saveSuccess = $state(false);
|
||||
|
||||
// ── Model helpers ────────────────────────────────────────────────────────────
|
||||
const models = data.models as ImageModelInfo[];
|
||||
|
||||
let filteredModels = $derived(
|
||||
referenceFile
|
||||
? models // show all; warn on ones without ref support
|
||||
: models
|
||||
);
|
||||
|
||||
let coverModels = $derived(filteredModels.filter((m) => m.recommended_for.includes('cover')));
|
||||
let chapterModels = $derived(filteredModels.filter((m) => m.recommended_for.includes('chapter')));
|
||||
let otherModels = $derived(
|
||||
filteredModels.filter(
|
||||
(m) => !m.recommended_for.includes('cover') && !m.recommended_for.includes('chapter')
|
||||
)
|
||||
);
|
||||
|
||||
// ── Auto-select default model when type changes ──────────────────────────────
|
||||
$effect(() => {
|
||||
const preferred = imageType === 'cover' ? coverModels : chapterModels;
|
||||
if (!selectedModel && preferred.length > 0) {
|
||||
selectedModel = preferred[0].id;
|
||||
}
|
||||
});
|
||||
|
||||
// Reset model selection when type changes if current selection no longer fits
|
||||
$effect(() => {
|
||||
void imageType; // track
|
||||
const preferred = imageType === 'cover' ? coverModels : chapterModels;
|
||||
if (preferred.length > 0) {
|
||||
// only auto-switch if current model isn't in preferred list for this type
|
||||
const current = models.find((m) => m.id === selectedModel);
|
||||
if (!current || !current.recommended_for.includes(imageType)) {
|
||||
selectedModel = preferred[0].id;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// ── Prompt templates ────────────────────────────────────────────────────────
|
||||
let promptTemplate = $derived(
|
||||
imageType === 'cover'
|
||||
? `Book cover for "${slug || 'untitled novel'}", a fantasy adventure novel. Epic scene with dramatic lighting, professional book cover art, cinematic composition, highly detailed, 4K.`
|
||||
: `Illustration for chapter ${chapter} of "${slug || 'untitled novel'}". Dramatic moment, vivid colors, anime-inspired style, detailed background, cinematic lighting.`
|
||||
);
|
||||
|
||||
function applyTemplate() {
|
||||
prompt = promptTemplate;
|
||||
}
|
||||
|
||||
// ── Reference image handling ─────────────────────────────────────────────────
|
||||
let dragOver = $state(false);
|
||||
|
||||
function handleReferenceFile(file: File | null) {
|
||||
referenceFile = file;
|
||||
if (referencePreviewUrl) URL.revokeObjectURL(referencePreviewUrl);
|
||||
referencePreviewUrl = file ? URL.createObjectURL(file) : '';
|
||||
}
|
||||
|
||||
function onFileInput(e: Event) {
|
||||
const input = e.target as HTMLInputElement;
|
||||
handleReferenceFile(input.files?.[0] ?? null);
|
||||
}
|
||||
|
||||
function onDrop(e: DragEvent) {
|
||||
e.preventDefault();
|
||||
dragOver = false;
|
||||
const file = e.dataTransfer?.files[0];
|
||||
if (file && file.type.startsWith('image/')) handleReferenceFile(file);
|
||||
}
|
||||
|
||||
function clearReference() {
|
||||
handleReferenceFile(null);
|
||||
const input = document.getElementById('ref-file-input') as HTMLInputElement | null;
|
||||
if (input) input.value = '';
|
||||
}
|
||||
|
||||
// ── Selected model info ──────────────────────────────────────────────────────
|
||||
let selectedModelInfo = $derived(models.find((m) => m.id === selectedModel) ?? null);
|
||||
let refWarning = $derived(
|
||||
referenceFile && selectedModelInfo && !selectedModelInfo.supports_ref
|
||||
? `${selectedModelInfo.label} does not support reference images. The reference will be ignored.`
|
||||
: ''
|
||||
);
|
||||
|
||||
// ── Generate ────────────────────────────────────────────────────────────────
|
||||
let canGenerate = $derived(prompt.trim().length > 0 && slug.trim().length > 0 && !generating);
|
||||
|
||||
async function generate() {
|
||||
if (!canGenerate) return;
|
||||
generating = true;
|
||||
genError = '';
|
||||
result = null;
|
||||
elapsedMs = 0;
|
||||
saveSuccess = false;
|
||||
saveError = '';
|
||||
|
||||
const startTs = Date.now();
|
||||
elapsedInterval = setInterval(() => {
|
||||
elapsedMs = Date.now() - startTs;
|
||||
}, 200);
|
||||
|
||||
try {
|
||||
const payload = {
|
||||
prompt: prompt.trim(),
|
||||
model: selectedModel,
|
||||
type: imageType,
|
||||
slug: slug.trim(),
|
||||
chapter: imageType === 'chapter' ? chapter : 0,
|
||||
num_steps: numSteps,
|
||||
guidance,
|
||||
strength,
|
||||
width,
|
||||
height
|
||||
};
|
||||
|
||||
let res: Response;
|
||||
if (referenceFile && selectedModelInfo?.supports_ref) {
|
||||
const fd = new FormData();
|
||||
fd.append('json', JSON.stringify(payload));
|
||||
fd.append('reference', referenceFile);
|
||||
res = await fetch('/api/admin/image-gen', { method: 'POST', body: fd });
|
||||
} else {
|
||||
res = await fetch('/api/admin/image-gen', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(payload)
|
||||
});
|
||||
}
|
||||
|
||||
const body = await res.json().catch(() => ({}));
|
||||
if (!res.ok) {
|
||||
genError = body.error ?? body.message ?? `Error ${res.status}`;
|
||||
return;
|
||||
}
|
||||
|
||||
const totalMs = Date.now() - startTs;
|
||||
const newResult: GenResult = {
|
||||
imageSrc: `data:${body.content_type};base64,${body.image_b64}`,
|
||||
model: body.model,
|
||||
bytes: body.bytes,
|
||||
contentType: body.content_type,
|
||||
saved: body.saved ?? false,
|
||||
coverUrl: body.cover_url ?? '',
|
||||
elapsedMs: totalMs,
|
||||
slug: slug.trim(),
|
||||
imageType,
|
||||
chapter
|
||||
};
|
||||
|
||||
result = newResult;
|
||||
history = [newResult, ...history].slice(0, 5);
|
||||
} catch {
|
||||
genError = 'Network error.';
|
||||
} finally {
|
||||
generating = false;
|
||||
if (elapsedInterval) {
|
||||
clearInterval(elapsedInterval);
|
||||
elapsedInterval = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Save as cover ────────────────────────────────────────────────────────────
|
||||
async function saveAsCover() {
|
||||
if (!result || saving) return;
|
||||
saving = true;
|
||||
saveError = '';
|
||||
saveSuccess = false;
|
||||
|
||||
try {
|
||||
const payload = {
|
||||
prompt: prompt.trim(),
|
||||
model: result.model,
|
||||
type: 'cover',
|
||||
slug: result.slug,
|
||||
num_steps: numSteps,
|
||||
guidance,
|
||||
strength,
|
||||
width,
|
||||
height,
|
||||
save_to_cover: true
|
||||
};
|
||||
|
||||
// Re-generate with save_to_cover=true (backend saves atomically)
|
||||
// Alternatively, we could add a separate save endpoint.
|
||||
// For now we pass the same prompt + model to re-generate and save.
|
||||
// TODO: A lighter approach would be a dedicated save endpoint that accepts
|
||||
// the base64 payload. For now re-gen is acceptable given admin-only usage.
|
||||
const res = await fetch('/api/admin/image-gen', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(payload)
|
||||
});
|
||||
const body = await res.json().catch(() => ({}));
|
||||
if (!res.ok) {
|
||||
saveError = body.error ?? body.message ?? `Error ${res.status}`;
|
||||
return;
|
||||
}
|
||||
if (body.saved) {
|
||||
saveSuccess = true;
|
||||
result = { ...result, saved: true, coverUrl: body.cover_url ?? result.coverUrl };
|
||||
} else {
|
||||
saveError = 'Backend did not save the cover.';
|
||||
}
|
||||
} catch {
|
||||
saveError = 'Network error.';
|
||||
} finally {
|
||||
saving = false;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Download ─────────────────────────────────────────────────────────────────
|
||||
function download() {
|
||||
if (!result) return;
|
||||
const a = document.createElement('a');
|
||||
a.href = result.imageSrc;
|
||||
const ext = result.contentType === 'image/jpeg' ? 'jpg' : 'png';
|
||||
a.download =
|
||||
result.imageType === 'cover'
|
||||
? `${result.slug}-cover.${ext}`
|
||||
: `${result.slug}-ch${result.chapter}.${ext}`;
|
||||
a.click();
|
||||
}
|
||||
|
||||
// ── Formatting helpers ───────────────────────────────────────────────────────
|
||||
function fmtElapsed(ms: number) {
|
||||
if (ms < 1000) return `${ms}ms`;
|
||||
return `${(ms / 1000).toFixed(1)}s`;
|
||||
}
|
||||
|
||||
function fmtBytes(b: number) {
|
||||
if (b < 1024) return `${b} B`;
|
||||
if (b < 1024 * 1024) return `${(b / 1024).toFixed(1)} KB`;
|
||||
return `${(b / 1024 / 1024).toFixed(2)} MB`;
|
||||
}
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
<title>Image Gen — Admin</title>
|
||||
</svelte:head>
|
||||
|
||||
<div class="space-y-6 max-w-6xl">
|
||||
<!-- Header -->
|
||||
<div>
|
||||
<h1 class="text-2xl font-bold text-(--color-text)">Image Generation</h1>
|
||||
<p class="text-(--color-muted) text-sm mt-1">
|
||||
Generate book covers and chapter images using Cloudflare Workers AI.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<!-- Layout: form + result side by side on large screens -->
|
||||
<div class="grid grid-cols-1 lg:grid-cols-2 gap-6 items-start">
|
||||
<!-- ── Left: Form panel ──────────────────────────────────────────────────── -->
|
||||
<div class="space-y-4">
|
||||
<!-- Type toggle -->
|
||||
<div class="flex gap-1 bg-(--color-surface-2) rounded-lg p-1 w-fit border border-(--color-border)">
|
||||
{#each (['cover', 'chapter'] as const) as t}
|
||||
<button
|
||||
onclick={() => (imageType = t)}
|
||||
class="px-4 py-1.5 rounded-md text-sm font-medium transition-colors
|
||||
{imageType === t
|
||||
? 'bg-(--color-surface-3) text-(--color-text)'
|
||||
: 'text-(--color-muted) hover:text-(--color-text)'}"
|
||||
>
|
||||
{t === 'cover' ? 'Cover' : 'Chapter Image'}
|
||||
</button>
|
||||
{/each}
|
||||
</div>
|
||||
|
||||
<!-- Slug + chapter -->
|
||||
<div class="flex gap-3">
|
||||
<div class="flex-1 min-w-0 space-y-1">
|
||||
<label class="text-xs font-medium text-(--color-muted) uppercase tracking-wide" for="slug-input">
|
||||
Book slug
|
||||
</label>
|
||||
<input
|
||||
id="slug-input"
|
||||
type="text"
|
||||
bind:value={slug}
|
||||
placeholder="e.g. shadow-slave"
|
||||
class="w-full bg-(--color-surface-2) border border-(--color-border) rounded-lg px-3 py-2 text-(--color-text) text-sm placeholder-zinc-500 focus:outline-none focus:ring-2 focus:ring-(--color-brand)"
|
||||
/>
|
||||
</div>
|
||||
|
||||
{#if imageType === 'chapter'}
|
||||
<div class="w-24 space-y-1 shrink-0">
|
||||
<label class="text-xs font-medium text-(--color-muted) uppercase tracking-wide" for="chapter-input">
|
||||
Chapter
|
||||
</label>
|
||||
<input
|
||||
id="chapter-input"
|
||||
type="number"
|
||||
bind:value={chapter}
|
||||
min="1"
|
||||
class="w-full bg-(--color-surface-2) border border-(--color-border) rounded-lg px-3 py-2 text-(--color-text) text-sm focus:outline-none focus:ring-2 focus:ring-(--color-brand)"
|
||||
/>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<!-- Model selector -->
|
||||
<div class="space-y-1">
|
||||
<label class="text-xs font-medium text-(--color-muted) uppercase tracking-wide" for="model-select">
|
||||
Model
|
||||
</label>
|
||||
<select
|
||||
id="model-select"
|
||||
bind:value={selectedModel}
|
||||
class="w-full bg-(--color-surface-2) border border-(--color-border) rounded-lg px-3 py-2 text-(--color-text) text-sm focus:outline-none focus:ring-2 focus:ring-(--color-brand)"
|
||||
>
|
||||
{#if coverModels.length > 0}
|
||||
<optgroup label="Recommended for covers">
|
||||
{#each coverModels as m}
|
||||
<option value={m.id}>
|
||||
{m.label} — {m.provider}{m.supports_ref ? ' ★ref' : ''}
|
||||
</option>
|
||||
{/each}
|
||||
</optgroup>
|
||||
{/if}
|
||||
{#if chapterModels.length > 0}
|
||||
<optgroup label="Recommended for chapters">
|
||||
{#each chapterModels as m}
|
||||
<option value={m.id}>
|
||||
{m.label} — {m.provider}{m.supports_ref ? ' ★ref' : ''}
|
||||
</option>
|
||||
{/each}
|
||||
</optgroup>
|
||||
{/if}
|
||||
{#if otherModels.length > 0}
|
||||
<optgroup label="All models">
|
||||
{#each otherModels as m}
|
||||
<option value={m.id}>
|
||||
{m.label} — {m.provider}{m.supports_ref ? ' ★ref' : ''}
|
||||
</option>
|
||||
{/each}
|
||||
</optgroup>
|
||||
{/if}
|
||||
</select>
|
||||
{#if selectedModelInfo}
|
||||
<p class="text-xs text-(--color-muted)">{selectedModelInfo.description}</p>
|
||||
{/if}
|
||||
{#if refWarning}
|
||||
<p class="text-xs text-amber-400">{refWarning}</p>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<!-- Prompt -->
|
||||
<div class="space-y-1">
|
||||
<div class="flex items-center justify-between">
|
||||
<label class="text-xs font-medium text-(--color-muted) uppercase tracking-wide" for="prompt-input">
|
||||
Prompt
|
||||
</label>
|
||||
<button
|
||||
onclick={applyTemplate}
|
||||
class="text-xs text-(--color-brand) hover:text-(--color-brand-dim) transition-colors"
|
||||
>
|
||||
Use template
|
||||
</button>
|
||||
</div>
|
||||
<textarea
|
||||
id="prompt-input"
|
||||
bind:value={prompt}
|
||||
rows="5"
|
||||
placeholder="Describe the image to generate…"
|
||||
class="w-full bg-(--color-surface-2) border border-(--color-border) rounded-lg px-3 py-2 text-(--color-text) text-sm placeholder-zinc-500 focus:outline-none focus:ring-2 focus:ring-(--color-brand) resize-y"
|
||||
></textarea>
|
||||
</div>
|
||||
|
||||
<!-- Reference image drop zone -->
|
||||
<div class="space-y-1">
|
||||
<p class="text-xs font-medium text-(--color-muted) uppercase tracking-wide">
|
||||
Reference image <span class="normal-case font-normal text-(--color-muted)">(optional, img2img)</span>
|
||||
</p>
|
||||
{#if referenceFile && referencePreviewUrl}
|
||||
<div class="flex items-start gap-3 p-3 bg-(--color-surface-2) rounded-lg border border-(--color-border)">
|
||||
<img
|
||||
src={referencePreviewUrl}
|
||||
alt="Reference"
|
||||
class="w-16 h-16 object-cover rounded-md shrink-0 border border-(--color-border)"
|
||||
/>
|
||||
<div class="min-w-0 flex-1 space-y-0.5">
|
||||
<p class="text-sm text-(--color-text) truncate">{referenceFile.name}</p>
|
||||
<p class="text-xs text-(--color-muted)">{fmtBytes(referenceFile.size)}</p>
|
||||
</div>
|
||||
<button
|
||||
onclick={clearReference}
|
||||
class="text-(--color-muted) hover:text-(--color-text) transition-colors shrink-0"
|
||||
aria-label="Remove reference image"
|
||||
>
|
||||
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M6 18L18 6M6 6l12 12" />
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
{:else}
|
||||
<!-- Drop zone -->
|
||||
<label
|
||||
class="flex flex-col items-center justify-center gap-2 p-4 border-2 border-dashed rounded-lg cursor-pointer transition-colors
|
||||
{dragOver
|
||||
? 'border-(--color-brand) bg-(--color-brand)/5'
|
||||
: 'border-(--color-border) hover:border-(--color-brand)/50 hover:bg-(--color-surface-2)'}"
|
||||
ondragover={(e) => { e.preventDefault(); dragOver = true; }}
|
||||
ondragleave={() => { dragOver = false; }}
|
||||
ondrop={onDrop}
|
||||
>
|
||||
<svg class="w-6 h-6 text-(--color-muted)" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"
|
||||
d="M4 16l4.586-4.586a2 2 0 012.828 0L16 16m-2-2l1.586-1.586a2 2 0 012.828 0L20 14m-6-6h.01M6 20h12a2 2 0 002-2V6a2 2 0 00-2-2H6a2 2 0 00-2 2v12a2 2 0 002 2z" />
|
||||
</svg>
|
||||
<span class="text-xs text-(--color-muted)">Drop image or <span class="text-(--color-brand)">click to browse</span></span>
|
||||
<input
|
||||
id="ref-file-input"
|
||||
type="file"
|
||||
accept="image/png,image/jpeg,image/webp"
|
||||
onchange={onFileInput}
|
||||
class="sr-only"
|
||||
/>
|
||||
</label>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<!-- Advanced collapsible -->
|
||||
<div class="border border-(--color-border) rounded-lg overflow-hidden">
|
||||
<button
|
||||
onclick={() => (showAdvanced = !showAdvanced)}
|
||||
class="w-full flex items-center justify-between px-4 py-2.5 bg-(--color-surface-2) text-sm font-medium text-(--color-muted) hover:text-(--color-text) transition-colors"
|
||||
>
|
||||
Advanced options
|
||||
<svg
|
||||
class="w-4 h-4 transition-transform {showAdvanced ? 'rotate-180' : ''}"
|
||||
fill="none" stroke="currentColor" viewBox="0 0 24 24"
|
||||
>
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 9l-7 7-7-7" />
|
||||
</svg>
|
||||
</button>
|
||||
|
||||
{#if showAdvanced}
|
||||
<div class="px-4 py-4 bg-(--color-surface) space-y-4">
|
||||
<!-- num_steps -->
|
||||
<div class="space-y-1">
|
||||
<div class="flex justify-between">
|
||||
<label class="text-xs text-(--color-muted)">Steps</label>
|
||||
<span class="text-xs text-(--color-text) font-mono">{numSteps}</span>
|
||||
</div>
|
||||
<input type="range" min="1" max="20" step="1" bind:value={numSteps}
|
||||
class="w-full accent-(--color-brand)" />
|
||||
</div>
|
||||
|
||||
<!-- guidance -->
|
||||
<div class="space-y-1">
|
||||
<div class="flex justify-between">
|
||||
<label class="text-xs text-(--color-muted)">Guidance</label>
|
||||
<span class="text-xs text-(--color-text) font-mono">{guidance.toFixed(1)}</span>
|
||||
</div>
|
||||
<input type="range" min="1" max="20" step="0.5" bind:value={guidance}
|
||||
class="w-full accent-(--color-brand)" />
|
||||
</div>
|
||||
|
||||
<!-- strength (only when reference present) -->
|
||||
{#if referenceFile}
|
||||
<div class="space-y-1">
|
||||
<div class="flex justify-between">
|
||||
<label class="text-xs text-(--color-muted)">Strength</label>
|
||||
<span class="text-xs text-(--color-text) font-mono">{strength.toFixed(2)}</span>
|
||||
</div>
|
||||
<input type="range" min="0" max="1" step="0.05" bind:value={strength}
|
||||
class="w-full accent-(--color-brand)" />
|
||||
<p class="text-xs text-(--color-muted)">0 = copy reference · 1 = ignore reference</p>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- width × height -->
|
||||
<div class="grid grid-cols-2 gap-3">
|
||||
<div class="space-y-1">
|
||||
<label class="text-xs text-(--color-muted)" for="width-input">Width</label>
|
||||
<input id="width-input" type="number" min="256" max="2048" step="64" bind:value={width}
|
||||
class="w-full bg-(--color-surface-2) border border-(--color-border) rounded-md px-3 py-1.5 text-(--color-text) text-sm focus:outline-none focus:ring-1 focus:ring-(--color-brand)" />
|
||||
</div>
|
||||
<div class="space-y-1">
|
||||
<label class="text-xs text-(--color-muted)" for="height-input">Height</label>
|
||||
<input id="height-input" type="number" min="256" max="2048" step="64" bind:value={height}
|
||||
class="w-full bg-(--color-surface-2) border border-(--color-border) rounded-md px-3 py-1.5 text-(--color-text) text-sm focus:outline-none focus:ring-1 focus:ring-(--color-brand)" />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<!-- Generate button -->
|
||||
<button
|
||||
onclick={generate}
|
||||
disabled={!canGenerate}
|
||||
class="w-full py-2.5 rounded-lg bg-(--color-brand) text-(--color-surface) font-semibold text-sm
|
||||
hover:bg-(--color-brand-dim) transition-colors disabled:opacity-50 disabled:cursor-not-allowed
|
||||
flex items-center justify-center gap-2"
|
||||
>
|
||||
{#if generating}
|
||||
<!-- Spinner -->
|
||||
<svg class="w-4 h-4 animate-spin" fill="none" viewBox="0 0 24 24">
|
||||
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4" />
|
||||
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8v8H4z" />
|
||||
</svg>
|
||||
Generating… {fmtElapsed(elapsedMs)}
|
||||
{:else}
|
||||
Generate
|
||||
{/if}
|
||||
</button>
|
||||
|
||||
{#if genError}
|
||||
<p class="text-sm text-(--color-danger) bg-(--color-danger)/10 rounded-lg px-3 py-2">{genError}</p>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<!-- ── Right: Result panel ────────────────────────────────────────────────── -->
|
||||
<div class="space-y-4">
|
||||
{#if result}
|
||||
<div class="bg-(--color-surface) border border-(--color-border) rounded-xl overflow-hidden">
|
||||
<!-- Image -->
|
||||
<img
|
||||
src={result.imageSrc}
|
||||
alt="Generated image"
|
||||
class="w-full object-contain max-h-[36rem] bg-zinc-950"
|
||||
/>
|
||||
|
||||
<!-- Meta bar -->
|
||||
<div class="px-4 py-3 border-t border-(--color-border) space-y-3">
|
||||
<div class="grid grid-cols-3 gap-2 text-xs">
|
||||
<div>
|
||||
<p class="text-(--color-muted)">Model</p>
|
||||
<p class="text-(--color-text) font-mono truncate" title={result.model}>
|
||||
{models.find((m) => m.id === result!.model)?.label ?? result.model}
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
<p class="text-(--color-muted)">Size</p>
|
||||
<p class="text-(--color-text)">{fmtBytes(result.bytes)}</p>
|
||||
</div>
|
||||
<div>
|
||||
<p class="text-(--color-muted)">Time</p>
|
||||
<p class="text-(--color-text)">{fmtElapsed(result.elapsedMs)}</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{#if result.saved}
|
||||
<p class="text-xs text-green-400">
|
||||
Cover saved →
|
||||
<a href={result.coverUrl} target="_blank" rel="noopener noreferrer"
|
||||
class="underline hover:text-green-300">{result.coverUrl}</a>
|
||||
</p>
|
||||
{/if}
|
||||
|
||||
{#if saveSuccess && !result.saved}
|
||||
<p class="text-xs text-green-400">Cover saved successfully.</p>
|
||||
{/if}
|
||||
{#if saveError}
|
||||
<p class="text-xs text-(--color-danger)">{saveError}</p>
|
||||
{/if}
|
||||
|
||||
<!-- Actions -->
|
||||
<div class="flex gap-2 flex-wrap">
|
||||
<button
|
||||
onclick={download}
|
||||
class="flex-1 px-3 py-1.5 rounded-md bg-(--color-surface-3) text-(--color-text) text-xs font-medium hover:bg-zinc-600 transition-colors"
|
||||
>
|
||||
Download
|
||||
</button>
|
||||
|
||||
{#if result.imageType === 'cover'}
|
||||
<button
|
||||
onclick={saveAsCover}
|
||||
disabled={saving || result.saved}
|
||||
class="flex-1 px-3 py-1.5 rounded-md bg-(--color-brand) text-(--color-surface) text-xs font-semibold
|
||||
hover:bg-(--color-brand-dim) transition-colors disabled:opacity-50"
|
||||
>
|
||||
{saving ? 'Saving…' : result.saved ? 'Saved ✓' : 'Save as cover'}
|
||||
</button>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{:else if generating}
|
||||
<!-- Placeholder while generating -->
|
||||
<div class="flex items-center justify-center bg-(--color-surface) border border-(--color-border) rounded-xl h-80">
|
||||
<div class="text-center space-y-3">
|
||||
<svg class="w-8 h-8 animate-spin mx-auto text-(--color-brand)" fill="none" viewBox="0 0 24 24">
|
||||
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4" />
|
||||
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8v8H4z" />
|
||||
</svg>
|
||||
<p class="text-sm text-(--color-muted)">Generating… {fmtElapsed(elapsedMs)}</p>
|
||||
</div>
|
||||
</div>
|
||||
{:else}
|
||||
<!-- Empty state -->
|
||||
<div class="flex items-center justify-center bg-(--color-surface) border border-(--color-border) border-dashed rounded-xl h-80">
|
||||
<p class="text-sm text-(--color-muted)">Generated image will appear here</p>
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- History thumbnails -->
|
||||
{#if history.length > 0}
|
||||
<div class="space-y-2">
|
||||
<p class="text-xs font-semibold text-(--color-muted) uppercase tracking-widest">Session history</p>
|
||||
<div class="flex gap-2 flex-wrap">
|
||||
{#each history as h, i}
|
||||
<button
|
||||
onclick={() => result = h}
|
||||
class="relative rounded-md overflow-hidden border transition-colors shrink-0
|
||||
{result === h ? 'border-(--color-brand)' : 'border-(--color-border) hover:border-(--color-brand)/50'}"
|
||||
>
|
||||
<img
|
||||
src={h.imageSrc}
|
||||
alt="History {i + 1}"
|
||||
class="w-16 h-16 object-cover"
|
||||
/>
|
||||
{#if h.saved}
|
||||
<span class="absolute bottom-0.5 right-0.5 w-2.5 h-2.5 rounded-full bg-green-500 border border-(--color-surface)"></span>
|
||||
{/if}
|
||||
</button>
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
46
ui/src/routes/api/admin/image-gen/+server.ts
Normal file
46
ui/src/routes/api/admin/image-gen/+server.ts
Normal file
@@ -0,0 +1,46 @@
|
||||
/**
|
||||
* POST /api/admin/image-gen
|
||||
*
|
||||
* Admin-only proxy to the Go backend's image generation endpoint.
|
||||
* Transparently forwards the request body (JSON or multipart/form-data)
|
||||
* and returns the JSON response containing the base64-encoded image.
|
||||
*/
|
||||
|
||||
import { json, error } from '@sveltejs/kit';
|
||||
import type { RequestHandler } from './$types';
|
||||
import { log } from '$lib/server/logger';
|
||||
import { backendFetch } from '$lib/server/scraper';
|
||||
|
||||
export const POST: RequestHandler = async ({ request, locals }) => {
|
||||
if (!locals.user || locals.user.role !== 'admin') {
|
||||
throw error(403, 'Forbidden');
|
||||
}
|
||||
|
||||
const ct = request.headers.get('content-type') ?? '';
|
||||
|
||||
let res: Response;
|
||||
try {
|
||||
if (ct.includes('multipart/form-data')) {
|
||||
// Forward raw body bytes; let the backend parse multipart
|
||||
const body = await request.arrayBuffer();
|
||||
res = await backendFetch('/api/admin/image-gen', {
|
||||
method: 'POST',
|
||||
headers: { 'content-type': ct },
|
||||
body
|
||||
});
|
||||
} else {
|
||||
const body = await request.text();
|
||||
res = await backendFetch('/api/admin/image-gen', {
|
||||
method: 'POST',
|
||||
headers: { 'content-type': 'application/json' },
|
||||
body
|
||||
});
|
||||
}
|
||||
} catch (e) {
|
||||
log.error('admin/image-gen', 'backend proxy error', { err: String(e) });
|
||||
throw error(502, 'Could not reach backend');
|
||||
}
|
||||
|
||||
const data = await res.json().catch(() => ({}));
|
||||
return json(data, { status: res.status });
|
||||
};
|
||||
@@ -93,6 +93,30 @@
|
||||
|
||||
const kokoroVoices = $derived(voices.filter((v) => v.engine === 'kokoro'));
|
||||
const pocketVoices = $derived(voices.filter((v) => v.engine === 'pocket-tts'));
|
||||
const cfaiVoices = $derived(voices.filter((v) => v.engine === 'cfai'));
|
||||
|
||||
function voiceLabel(v: Voice): string {
|
||||
if (v.engine === 'cfai') {
|
||||
const speaker = v.id.startsWith('cfai:') ? v.id.slice(5) : v.id;
|
||||
return speaker.replace(/\b\w/g, (c) => c.toUpperCase()) + (v.gender ? ` (EN ${v.gender.toUpperCase()})` : '');
|
||||
}
|
||||
if (v.engine === 'pocket-tts') {
|
||||
const name = v.id.replace(/_/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
|
||||
return name + (v.gender ? ` (EN ${v.gender.toUpperCase()})` : '');
|
||||
}
|
||||
// Kokoro: "af_bella" → "Bella (US F)"
|
||||
const langMap: Record<string, string> = {
|
||||
af: 'US', am: 'US', bf: 'UK', bm: 'UK',
|
||||
ef: 'ES', em: 'ES', ff: 'FR',
|
||||
hf: 'IN', hm: 'IN', 'if': 'IT', im: 'IT',
|
||||
jf: 'JP', jm: 'JP', pf: 'PT', pm: 'PT', zf: 'ZH', zm: 'ZH',
|
||||
};
|
||||
const prefix = v.id.slice(0, 2);
|
||||
const name = v.id.slice(3).replace(/^v0/, '').replace(/^([a-z])/, (c) => c.toUpperCase());
|
||||
const lang = langMap[prefix] ?? prefix.toUpperCase();
|
||||
const gender = v.gender ? v.gender.toUpperCase() : '?';
|
||||
return `${name} (${lang} ${gender})`;
|
||||
}
|
||||
|
||||
$effect(() => {
|
||||
fetch('/api/voices')
|
||||
@@ -492,12 +516,17 @@
|
||||
class="w-full bg-(--color-surface-3) border border-(--color-border) rounded-lg px-3 py-2 text-(--color-text) text-sm focus:outline-none focus:ring-2 focus:ring-(--color-brand)">
|
||||
{#if kokoroVoices.length > 0}
|
||||
<optgroup label="Kokoro (GPU)">
|
||||
{#each kokoroVoices as v}<option value={v.id}>{v.id}</option>{/each}
|
||||
{#each kokoroVoices as v}<option value={v.id}>{voiceLabel(v)}</option>{/each}
|
||||
</optgroup>
|
||||
{/if}
|
||||
{#if pocketVoices.length > 0}
|
||||
<optgroup label="Pocket TTS (CPU)">
|
||||
{#each pocketVoices as v}<option value={v.id}>{v.id}</option>{/each}
|
||||
{#each pocketVoices as v}<option value={v.id}>{voiceLabel(v)}</option>{/each}
|
||||
</optgroup>
|
||||
{/if}
|
||||
{#if cfaiVoices.length > 0}
|
||||
<optgroup label="Cloudflare AI">
|
||||
{#each cfaiVoices as v}<option value={v.id}>{voiceLabel(v)}</option>{/each}
|
||||
</optgroup>
|
||||
{/if}
|
||||
</select>
|
||||
|
||||
Reference in New Issue
Block a user