Compare commits

...

2 Commits

Author SHA1 Message Date
Admin
d1b7d3e36c feat: admin image generation via Cloudflare Workers AI
Some checks failed
Release / Test backend (push) Successful in 46s
Release / Check ui (push) Failing after 27s
Release / Docker / ui (push) Has been skipped
Release / Docker / caddy (push) Successful in 42s
Release / Docker / backend (push) Successful in 3m3s
Release / Docker / runner (push) Successful in 2m57s
Release / Gitea Release (push) Has been skipped
- Add cfai/image.go: ImageGenClient with GenerateImage, GenerateImageFromReference, AllImageModels (9 models)
- Add handlers_image.go: GET /api/admin/image-gen/models + POST /api/admin/image-gen (JSON + multipart)
- Wire ImageGen client in main.go + server.go Dependencies
- Add admin/image-gen SvelteKit page: type toggle, model selector, prompt, reference img2img, advanced options, result panel, history, save-as-cover, download
- Add SvelteKit proxy route api/admin/image-gen forwarding to Go backend
- Add admin_nav_image_gen message key to all 5 locale files
- Add Image Gen nav link to admin layout
2026-04-04 11:46:22 +05:00
Admin
aaa008ac99 feat: add Cloudflare AI TTS engine (aura-2-en) with voice grouping in UI
All checks were successful
Release / Test backend (push) Successful in 43s
Release / Check ui (push) Successful in 43s
Release / Docker / caddy (push) Successful in 46s
Release / Docker / backend (push) Successful in 2m45s
Release / Docker / runner (push) Successful in 2m53s
Release / Docker / ui (push) Successful in 2m5s
Release / Gitea Release (push) Successful in 41s
2026-04-04 11:12:55 +05:00
23 changed files with 1722 additions and 13 deletions

View File

@@ -26,6 +26,7 @@ import (
"github.com/hibiken/asynq"
"github.com/libnovel/backend/internal/asynqqueue"
"github.com/libnovel/backend/internal/backend"
"github.com/libnovel/backend/internal/cfai"
"github.com/libnovel/backend/internal/config"
"github.com/libnovel/backend/internal/kokoro"
"github.com/libnovel/backend/internal/meili"
@@ -114,6 +115,24 @@ func run() error {
log.Info("POCKET_TTS_URL not set — pocket-tts voices unavailable in backend")
}
// ── Cloudflare Workers AI (voice sample generation + audio-stream live TTS) ──
var cfaiClient cfai.Client
if cfg.CFAI.AccountID != "" && cfg.CFAI.APIToken != "" {
cfaiClient = cfai.New(cfg.CFAI.AccountID, cfg.CFAI.APIToken, cfg.CFAI.Model)
log.Info("cloudflare AI TTS enabled", "model", cfg.CFAI.Model)
} else {
log.Info("CFAI_ACCOUNT_ID/CFAI_API_TOKEN not set — CF AI voices unavailable in backend")
}
// ── Cloudflare Workers AI Image Generation ────────────────────────────────
var imageGenClient cfai.ImageGenClient
if cfg.CFAI.AccountID != "" && cfg.CFAI.APIToken != "" {
imageGenClient = cfai.NewImageGen(cfg.CFAI.AccountID, cfg.CFAI.APIToken)
log.Info("cloudflare AI image generation enabled")
} else {
log.Info("CFAI_ACCOUNT_ID/CFAI_API_TOKEN not set — image generation unavailable")
}
// ── Meilisearch (search reads only; indexing is the runner's job) ────────
var searchIndex meili.Client
if cfg.Meilisearch.URL != "" {
@@ -163,6 +182,8 @@ func run() error {
SearchIndex: searchIndex,
Kokoro: kokoroClient,
PocketTTS: pocketTTSClient,
CFAI: cfaiClient,
ImageGen: imageGenClient,
Log: log,
},
)

View File

@@ -23,6 +23,7 @@ import (
"github.com/getsentry/sentry-go"
"github.com/libnovel/backend/internal/asynqqueue"
"github.com/libnovel/backend/internal/browser"
"github.com/libnovel/backend/internal/cfai"
"github.com/libnovel/backend/internal/config"
"github.com/libnovel/backend/internal/kokoro"
"github.com/libnovel/backend/internal/libretranslate"
@@ -130,6 +131,15 @@ func run() error {
log.Warn("POCKET_TTS_URL not set — pocket-tts voice tasks will fail")
}
// ── Cloudflare Workers AI ────────────────────────────────────────────────
var cfaiClient cfai.Client
if cfg.CFAI.AccountID != "" && cfg.CFAI.APIToken != "" {
cfaiClient = cfai.New(cfg.CFAI.AccountID, cfg.CFAI.APIToken, cfg.CFAI.Model)
log.Info("cloudflare AI TTS enabled", "model", cfg.CFAI.Model)
} else {
log.Info("CFAI_ACCOUNT_ID/CFAI_API_TOKEN not set — CF AI voice tasks will fail")
}
// ── LibreTranslate ──────────────────────────────────────────────────────
ltClient := libretranslate.New(cfg.LibreTranslate.URL, cfg.LibreTranslate.APIKey)
if ltClient != nil {
@@ -191,6 +201,7 @@ func run() error {
Novel: novel,
Kokoro: kokoroClient,
PocketTTS: pocketTTSClient,
CFAI: cfaiClient,
LibreTranslate: ltClient,
Log: log,
}

View File

@@ -44,6 +44,7 @@ import (
"strings"
"time"
"github.com/libnovel/backend/internal/cfai"
"github.com/libnovel/backend/internal/domain"
"github.com/libnovel/backend/internal/kokoro"
"github.com/libnovel/backend/internal/meili"
@@ -774,7 +775,13 @@ func (s *Server) handleAudioStream(w http.ResponseWriter, r *http.Request) {
// Open the TTS stream (WAV or MP3 depending on format param).
var audioStream io.ReadCloser
if format == "wav" {
if pockettts.IsPocketTTSVoice(voice) {
if cfai.IsCFAIVoice(voice) {
if s.deps.CFAI == nil {
jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
return
}
audioStream, err = s.deps.CFAI.StreamAudioWAV(r.Context(), text, voice)
} else if pockettts.IsPocketTTSVoice(voice) {
if s.deps.PocketTTS == nil {
jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
return
@@ -788,7 +795,13 @@ func (s *Server) handleAudioStream(w http.ResponseWriter, r *http.Request) {
audioStream, err = s.deps.Kokoro.StreamAudioWAV(r.Context(), text, voice)
}
} else {
if pockettts.IsPocketTTSVoice(voice) {
if cfai.IsCFAIVoice(voice) {
if s.deps.CFAI == nil {
jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
return
}
audioStream, err = s.deps.CFAI.StreamAudioMP3(r.Context(), text, voice)
} else if pockettts.IsPocketTTSVoice(voice) {
if s.deps.PocketTTS == nil {
jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
return
@@ -1343,6 +1356,9 @@ func (s *Server) handlePresignVoiceSample(w http.ResponseWriter, r *http.Request
}
key := kokoro.VoiceSampleKey(voice)
if cfai.IsCFAIVoice(voice) {
key = cfai.VoiceSampleKey(voice)
}
// Generate sample on demand when it is not in MinIO yet.
if !s.deps.AudioStore.AudioExists(r.Context(), key) {
@@ -1352,7 +1368,13 @@ func (s *Server) handlePresignVoiceSample(w http.ResponseWriter, r *http.Request
mp3 []byte
err error
)
if pockettts.IsPocketTTSVoice(voice) {
if cfai.IsCFAIVoice(voice) {
if s.deps.CFAI == nil {
jsonError(w, http.StatusServiceUnavailable, "cloudflare AI TTS not configured")
return
}
mp3, err = s.deps.CFAI.GenerateAudio(r.Context(), voiceSampleText, voice)
} else if pockettts.IsPocketTTSVoice(voice) {
if s.deps.PocketTTS == nil {
jsonError(w, http.StatusServiceUnavailable, "pocket-tts not configured")
return

View File

@@ -0,0 +1,234 @@
package backend
import (
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"github.com/libnovel/backend/internal/cfai"
)
// handleAdminImageGenModels handles GET /api/admin/image-gen/models.
// Returns the list of supported Cloudflare AI image generation models.
func (s *Server) handleAdminImageGenModels(w http.ResponseWriter, r *http.Request) {
if s.deps.ImageGen == nil {
jsonError(w, http.StatusServiceUnavailable, "image generation not configured (CFAI_ACCOUNT_ID/CFAI_API_TOKEN missing)")
return
}
models := s.deps.ImageGen.Models()
writeJSON(w, 0, map[string]any{"models": models})
}
// imageGenRequest is the JSON body for POST /api/admin/image-gen.
type imageGenRequest struct {
// Prompt is the text description of the desired image.
Prompt string `json:"prompt"`
// Model is the CF Workers AI model ID (e.g. "@cf/black-forest-labs/flux-2-dev").
// Defaults to the recommended model for the given type.
Model string `json:"model"`
// Type is either "cover" or "chapter".
Type string `json:"type"`
// Slug is the book slug. Required for cover; required for chapter.
Slug string `json:"slug"`
// Chapter number (1-based). Required when type == "chapter".
Chapter int `json:"chapter"`
// ReferenceImageB64 is an optional base64-encoded PNG/JPEG reference image.
// When present the img2img path is used.
ReferenceImageB64 string `json:"reference_image_b64"`
// NumSteps overrides inference steps (default 20).
NumSteps int `json:"num_steps"`
// Width / Height override output dimensions (0 = model default).
Width int `json:"width"`
Height int `json:"height"`
// Guidance overrides prompt guidance scale (0 = model default).
Guidance float64 `json:"guidance"`
// Strength for img2img: 0.01.0, default 0.75.
Strength float64 `json:"strength"`
// SaveToCover when true stores the result as the book cover in MinIO
// (overwriting any existing cover) and sets the book's cover URL.
// Only valid when type == "cover".
SaveToCover bool `json:"save_to_cover"`
}
// imageGenResponse is the JSON body returned by POST /api/admin/image-gen.
type imageGenResponse struct {
// ImageB64 is the generated image as a base64-encoded PNG string.
ImageB64 string `json:"image_b64"`
// ContentType is "image/png" or "image/jpeg".
ContentType string `json:"content_type"`
// Saved indicates whether the image was persisted to MinIO.
Saved bool `json:"saved"`
// CoverURL is the URL the cover is now served from (only set when Saved==true).
CoverURL string `json:"cover_url,omitempty"`
// Model is the model that was used.
Model string `json:"model"`
// Bytes is the raw image size in bytes.
Bytes int `json:"bytes"`
}
// handleAdminImageGen handles POST /api/admin/image-gen.
//
// Generates an image using Cloudflare Workers AI and optionally stores it.
// Multipart/form-data is also accepted so the reference image can be uploaded
// directly; otherwise the reference is expected as base64 JSON.
func (s *Server) handleAdminImageGen(w http.ResponseWriter, r *http.Request) {
if s.deps.ImageGen == nil {
jsonError(w, http.StatusServiceUnavailable, "image generation not configured (CFAI_ACCOUNT_ID/CFAI_API_TOKEN missing)")
return
}
var req imageGenRequest
var refImageData []byte
ct := r.Header.Get("Content-Type")
if strings.HasPrefix(ct, "multipart/form-data") {
// Multipart: parse JSON fields from a "json" part + optional "reference" file part.
if err := r.ParseMultipartForm(32 << 20); err != nil {
jsonError(w, http.StatusBadRequest, "parse multipart: "+err.Error())
return
}
if jsonPart := r.FormValue("json"); jsonPart != "" {
if err := json.Unmarshal([]byte(jsonPart), &req); err != nil {
jsonError(w, http.StatusBadRequest, "parse json field: "+err.Error())
return
}
}
if f, _, err := r.FormFile("reference"); err == nil {
defer f.Close()
refImageData, _ = io.ReadAll(f)
}
} else {
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
jsonError(w, http.StatusBadRequest, "parse body: "+err.Error())
return
}
if req.ReferenceImageB64 != "" {
var decErr error
refImageData, decErr = base64.StdEncoding.DecodeString(req.ReferenceImageB64)
if decErr != nil {
// Try std without padding
refImageData, decErr = base64.RawStdEncoding.DecodeString(req.ReferenceImageB64)
if decErr != nil {
jsonError(w, http.StatusBadRequest, "decode reference_image_b64: "+decErr.Error())
return
}
}
}
}
if strings.TrimSpace(req.Prompt) == "" {
jsonError(w, http.StatusBadRequest, "prompt is required")
return
}
if req.Type != "cover" && req.Type != "chapter" {
jsonError(w, http.StatusBadRequest, `type must be "cover" or "chapter"`)
return
}
if req.Slug == "" {
jsonError(w, http.StatusBadRequest, "slug is required")
return
}
if req.Type == "chapter" && req.Chapter <= 0 {
jsonError(w, http.StatusBadRequest, "chapter must be > 0 when type is chapter")
return
}
// Resolve model
model := cfai.ImageModel(req.Model)
if model == "" {
if req.Type == "cover" {
model = cfai.DefaultImageModel
} else {
model = cfai.ImageModelFlux2Klein4B
}
}
imgReq := cfai.ImageRequest{
Prompt: req.Prompt,
Model: model,
NumSteps: req.NumSteps,
Width: req.Width,
Height: req.Height,
Guidance: req.Guidance,
Strength: req.Strength,
}
s.deps.Log.Info("admin: image gen requested",
"type", req.Type, "slug", req.Slug, "chapter", req.Chapter,
"model", model, "has_reference", len(refImageData) > 0)
var imgData []byte
var genErr error
if len(refImageData) > 0 {
imgData, genErr = s.deps.ImageGen.GenerateImageFromReference(r.Context(), imgReq, refImageData)
} else {
imgData, genErr = s.deps.ImageGen.GenerateImage(r.Context(), imgReq)
}
if genErr != nil {
s.deps.Log.Error("admin: image gen failed", "err", genErr)
jsonError(w, http.StatusBadGateway, "image generation failed: "+genErr.Error())
return
}
contentType := sniffImageContentType(imgData)
// ── Optional persistence ──────────────────────────────────────────────────
var saved bool
var coverURL string
if req.SaveToCover && req.Type == "cover" && s.deps.CoverStore != nil {
if err := s.deps.CoverStore.PutCover(r.Context(), req.Slug, imgData, contentType); err != nil {
s.deps.Log.Error("admin: save generated cover failed", "slug", req.Slug, "err", err)
// Non-fatal: still return the image
} else {
saved = true
coverURL = fmt.Sprintf("/api/cover/local/%s", req.Slug)
s.deps.Log.Info("admin: generated cover saved", "slug", req.Slug, "bytes", len(imgData))
}
}
// Encode result as base64
b64 := base64.StdEncoding.EncodeToString(imgData)
writeJSON(w, 0, imageGenResponse{
ImageB64: b64,
ContentType: contentType,
Saved: saved,
CoverURL: coverURL,
Model: string(model),
Bytes: len(imgData),
})
}
// sniffImageContentType returns the MIME type of the image bytes.
func sniffImageContentType(data []byte) string {
if len(data) >= 4 {
// PNG: 0x89 P N G
if data[0] == 0x89 && data[1] == 0x50 && data[2] == 0x4e && data[3] == 0x47 {
return "image/png"
}
// JPEG: FF D8 FF
if data[0] == 0xFF && data[1] == 0xD8 && data[2] == 0xFF {
return "image/jpeg"
}
// WebP: RIFF....WEBP
if len(data) >= 12 && data[0] == 'R' && data[1] == 'I' && data[2] == 'F' && data[3] == 'F' &&
data[8] == 'W' && data[9] == 'E' && data[10] == 'B' && data[11] == 'P' {
return "image/webp"
}
}
return "image/png"
}

View File

@@ -30,6 +30,7 @@ import (
sentryhttp "github.com/getsentry/sentry-go/http"
"github.com/libnovel/backend/internal/bookstore"
"github.com/libnovel/backend/internal/cfai"
"github.com/libnovel/backend/internal/domain"
"github.com/libnovel/backend/internal/kokoro"
"github.com/libnovel/backend/internal/meili"
@@ -69,6 +70,12 @@ type Dependencies struct {
// PocketTTS is the pocket-tts client (used for voice list only in the backend;
// audio generation is done by the runner).
PocketTTS pockettts.Client
// CFAI is the Cloudflare Workers AI TTS client (used for voice sample
// generation and audio-stream live TTS; audio task generation is done by the runner).
CFAI cfai.Client
// ImageGen is the Cloudflare Workers AI image generation client.
// If nil, image generation endpoints return 503.
ImageGen cfai.ImageGenClient
// Log is the structured logger.
Log *slog.Logger
}
@@ -179,6 +186,10 @@ func (s *Server) ListenAndServe(ctx context.Context) error {
mux.HandleFunc("POST /api/admin/audio/bulk", s.handleAdminAudioBulk)
mux.HandleFunc("POST /api/admin/audio/cancel-bulk", s.handleAdminAudioCancelBulk)
// Admin image generation endpoints
mux.HandleFunc("GET /api/admin/image-gen/models", s.handleAdminImageGenModels)
mux.HandleFunc("POST /api/admin/image-gen", s.handleAdminImageGen)
// Voices list
mux.HandleFunc("GET /api/voices", s.handleVoices)
@@ -338,6 +349,23 @@ func (s *Server) voices(ctx context.Context) []domain.Voice {
}
}
// ── Cloudflare AI voices ──────────────────────────────────────────────────
if s.deps.CFAI != nil {
for _, speaker := range cfai.Speakers() {
gender := "m"
if cfai.IsFemale(speaker) {
gender = "f"
}
result = append(result, domain.Voice{
ID: cfai.VoiceID(speaker),
Engine: "cfai",
Lang: "en",
Gender: gender,
})
}
s.deps.Log.Info("backend: loaded CF AI voices", "count", len(cfai.Speakers()))
}
s.voiceMu.Lock()
s.cachedVoices = result
s.voiceMu.Unlock()

View File

@@ -0,0 +1,214 @@
// Package cfai provides a client for Cloudflare Workers AI Text-to-Speech models.
//
// The Cloudflare Workers AI REST API is used to run TTS models:
//
// POST https://api.cloudflare.com/client/v4/accounts/{accountID}/ai/run/{model}
// Authorization: Bearer {apiToken}
// Content-Type: application/json
// { "text": "...", "speaker": "luna" }
//
// → 200 audio/mpeg — raw MP3 bytes
//
// Currently supported model: @cf/deepgram/aura-2-en (40 English speakers).
// Voice IDs are prefixed with "cfai:" to distinguish them from Kokoro/pocket-tts
// voices (e.g. "cfai:luna", "cfai:orion").
//
// The API is batch-only (no streaming), so GenerateAudio waits for the full
// response. There is no 100-second Cloudflare proxy timeout because we are
// calling the Cloudflare API directly, not routing through a Cloudflare-proxied
// homelab tunnel.
package cfai
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
)
const (
// DefaultModel is the Cloudflare Workers AI TTS model used by default.
DefaultModel = "@cf/deepgram/aura-2-en"
// voicePrefix is the prefix used to namespace CF AI voice IDs.
voicePrefix = "cfai:"
)
// aura2Speakers is the exhaustive list of speakers supported by aura-2-en.
var aura2Speakers = []string{
"amalthea", "andromeda", "apollo", "arcas", "aries", "asteria",
"athena", "atlas", "aurora", "callista", "cora", "cordelia",
"delia", "draco", "electra", "harmonia", "helena", "hera",
"hermes", "hyperion", "iris", "janus", "juno", "jupiter",
"luna", "mars", "minerva", "neptune", "odysseus", "ophelia",
"orion", "orpheus", "pandora", "phoebe", "pluto", "saturn",
"thalia", "theia", "vesta", "zeus",
}
// femaleSpeakers is the set of aura-2-en speaker names that are female voices.
var femaleSpeakers = map[string]struct{}{
"amalthea": {}, "andromeda": {}, "aries": {}, "asteria": {},
"athena": {}, "aurora": {}, "callista": {}, "cora": {},
"cordelia": {}, "delia": {}, "electra": {}, "harmonia": {},
"helena": {}, "hera": {}, "iris": {}, "juno": {},
"luna": {}, "minerva": {}, "ophelia": {}, "pandora": {},
"phoebe": {}, "thalia": {}, "theia": {}, "vesta": {},
}
// IsCFAIVoice reports whether voice is served by the Cloudflare AI client.
// CF AI voices use the "cfai:" prefix, e.g. "cfai:luna".
func IsCFAIVoice(voice string) bool {
return strings.HasPrefix(voice, voicePrefix)
}
// SpeakerName strips the "cfai:" prefix and returns the bare speaker name.
// If voice is not a CF AI voice the original string is returned unchanged.
func SpeakerName(voice string) string {
return strings.TrimPrefix(voice, voicePrefix)
}
// VoiceID returns the full voice ID (with prefix) for a bare speaker name.
func VoiceID(speaker string) string {
return voicePrefix + speaker
}
// VoiceSampleKey returns the MinIO object key for a CF AI voice sample MP3.
func VoiceSampleKey(voice string) string {
safe := strings.Map(func(r rune) rune {
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') ||
(r >= '0' && r <= '9') || r == '_' || r == '-' {
return r
}
return '_'
}, voice)
return fmt.Sprintf("_voice-samples/%s.mp3", safe)
}
// IsFemale reports whether the given CF AI voice ID (with or without prefix)
// is a female speaker.
func IsFemale(voice string) bool {
speaker := SpeakerName(voice)
_, ok := femaleSpeakers[speaker]
return ok
}
// Speakers returns all available bare speaker names for aura-2-en.
func Speakers() []string {
out := make([]string, len(aura2Speakers))
copy(out, aura2Speakers)
return out
}
// Client is the interface for interacting with Cloudflare Workers AI TTS.
type Client interface {
// GenerateAudio synthesises text using the given voice (e.g. "cfai:luna")
// and returns raw MP3 bytes.
GenerateAudio(ctx context.Context, text, voice string) ([]byte, error)
// StreamAudioMP3 is not natively supported by the CF AI batch API.
// It buffers the full response and returns an io.ReadCloser over the bytes,
// so callers can use it like a stream without special-casing.
StreamAudioMP3(ctx context.Context, text, voice string) (io.ReadCloser, error)
// StreamAudioWAV is not natively supported; the CF AI model returns MP3.
// This method returns the same MP3 bytes wrapped as an io.ReadCloser.
StreamAudioWAV(ctx context.Context, text, voice string) (io.ReadCloser, error)
// ListVoices returns all available voice IDs (with the "cfai:" prefix).
ListVoices(ctx context.Context) ([]string, error)
}
// httpClient is the concrete CF AI HTTP client.
type httpClient struct {
accountID string
apiToken string
model string
http *http.Client
}
// New returns a Client for the given Cloudflare account and API token.
// model defaults to DefaultModel when empty.
func New(accountID, apiToken, model string) Client {
if model == "" {
model = DefaultModel
}
return &httpClient{
accountID: accountID,
apiToken: apiToken,
model: model,
http: &http.Client{Timeout: 5 * time.Minute},
}
}
// GenerateAudio calls the Cloudflare Workers AI TTS endpoint and returns MP3 bytes.
func (c *httpClient) GenerateAudio(ctx context.Context, text, voice string) ([]byte, error) {
if text == "" {
return nil, fmt.Errorf("cfai: empty text")
}
speaker := SpeakerName(voice)
if speaker == "" {
speaker = "luna"
}
body, err := json.Marshal(map[string]any{
"text": text,
"speaker": speaker,
})
if err != nil {
return nil, fmt.Errorf("cfai: marshal request: %w", err)
}
url := fmt.Sprintf("https://api.cloudflare.com/client/v4/accounts/%s/ai/run/%s",
c.accountID, c.model)
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("cfai: build request: %w", err)
}
req.Header.Set("Authorization", "Bearer "+c.apiToken)
req.Header.Set("Content-Type", "application/json")
resp, err := c.http.Do(req)
if err != nil {
return nil, fmt.Errorf("cfai: request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("cfai: server returned %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
}
mp3, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("cfai: read response: %w", err)
}
return mp3, nil
}
// StreamAudioMP3 generates audio and wraps the MP3 bytes as an io.ReadCloser.
func (c *httpClient) StreamAudioMP3(ctx context.Context, text, voice string) (io.ReadCloser, error) {
mp3, err := c.GenerateAudio(ctx, text, voice)
if err != nil {
return nil, err
}
return io.NopCloser(bytes.NewReader(mp3)), nil
}
// StreamAudioWAV generates audio (MP3) and wraps it as an io.ReadCloser.
// Note: the CF AI aura-2-en model returns MP3 regardless of the method name.
func (c *httpClient) StreamAudioWAV(ctx context.Context, text, voice string) (io.ReadCloser, error) {
return c.StreamAudioMP3(ctx, text, voice)
}
// ListVoices returns all available CF AI voice IDs (with the "cfai:" prefix).
func (c *httpClient) ListVoices(_ context.Context) ([]string, error) {
ids := make([]string, len(aura2Speakers))
for i, s := range aura2Speakers {
ids[i] = VoiceID(s)
}
return ids, nil
}

View File

@@ -0,0 +1,312 @@
// Image generation via Cloudflare Workers AI text-to-image models.
//
// API reference:
//
// POST https://api.cloudflare.com/client/v4/accounts/{accountID}/ai/run/{model}
// Authorization: Bearer {apiToken}
// Content-Type: application/json
//
// Text-only request (all models):
//
// { "prompt": "...", "num_steps": 20 }
//
// Reference-image request:
// - FLUX models: { "prompt": "...", "image_b64": "<base64>" }
// - SD img2img: { "prompt": "...", "image": [r,g,b,a,...], "strength": 0.75 }
//
// All models return raw PNG bytes on success (Content-Type: image/png).
//
// Recommended models for LibNovel:
// - Book covers (no reference): flux-2-dev, flux-2-klein-9b, lucid-origin
// - Chapter images (speed): flux-2-klein-4b, flux-1-schnell
// - With reference image: flux-2-dev, flux-2-klein-9b, sd-v1-5-img2img
package cfai
import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"image"
_ "image/jpeg" // register JPEG decoder
_ "image/png" // register PNG decoder
"io"
"net/http"
"time"
)
// ImageModel identifies a Cloudflare Workers AI text-to-image model.
type ImageModel string
const (
// ImageModelFlux2Dev — best quality, multi-reference. Recommended for covers.
ImageModelFlux2Dev ImageModel = "@cf/black-forest-labs/flux-2-dev"
// ImageModelFlux2Klein9B — 9B params, multi-reference. Good for covers.
ImageModelFlux2Klein9B ImageModel = "@cf/black-forest-labs/flux-2-klein-9b"
// ImageModelFlux2Klein4B — ultra-fast, unified gen+edit. Recommended for chapters.
ImageModelFlux2Klein4B ImageModel = "@cf/black-forest-labs/flux-2-klein-4b"
// ImageModelFlux1Schnell — fastest, text-only. Good for quick illustrations.
ImageModelFlux1Schnell ImageModel = "@cf/black-forest-labs/flux-1-schnell"
// ImageModelSDXLLightning — fast 1024px generation.
ImageModelSDXLLightning ImageModel = "@cf/bytedance/stable-diffusion-xl-lightning"
// ImageModelSD15Img2Img — explicit img2img with flat RGBA reference.
ImageModelSD15Img2Img ImageModel = "@cf/runwayml/stable-diffusion-v1-5-img2img"
// ImageModelSDXLBase — Stability AI SDXL base.
ImageModelSDXLBase ImageModel = "@cf/stabilityai/stable-diffusion-xl-base-1.0"
// ImageModelLucidOrigin — Leonardo AI; strong prompt adherence.
ImageModelLucidOrigin ImageModel = "@cf/leonardo/lucid-origin"
// ImageModelPhoenix10 — Leonardo AI; accurate text rendering.
ImageModelPhoenix10 ImageModel = "@cf/leonardo/phoenix-1.0"
// DefaultImageModel is the default model for book-cover generation.
DefaultImageModel = ImageModelFlux2Dev
)
// ImageModelInfo describes a single image generation model.
type ImageModelInfo struct {
ID string `json:"id"`
Label string `json:"label"`
Provider string `json:"provider"`
SupportsRef bool `json:"supports_ref"`
RecommendedFor []string `json:"recommended_for"` // "cover" and/or "chapter"
Description string `json:"description"`
}
// AllImageModels returns metadata about every supported image model.
func AllImageModels() []ImageModelInfo {
return []ImageModelInfo{
{
ID: string(ImageModelFlux2Dev), Label: "FLUX.2 Dev", Provider: "Black Forest Labs",
SupportsRef: true, RecommendedFor: []string{"cover"},
Description: "Best quality; multi-reference editing. Recommended for book covers.",
},
{
ID: string(ImageModelFlux2Klein9B), Label: "FLUX.2 Klein 9B", Provider: "Black Forest Labs",
SupportsRef: true, RecommendedFor: []string{"cover"},
Description: "9B parameters with multi-reference support.",
},
{
ID: string(ImageModelFlux2Klein4B), Label: "FLUX.2 Klein 4B", Provider: "Black Forest Labs",
SupportsRef: true, RecommendedFor: []string{"chapter"},
Description: "Ultra-fast unified gen+edit. Recommended for chapter images.",
},
{
ID: string(ImageModelFlux1Schnell), Label: "FLUX.1 Schnell", Provider: "Black Forest Labs",
SupportsRef: false, RecommendedFor: []string{"chapter"},
Description: "Fastest inference. Good for quick chapter illustrations.",
},
{
ID: string(ImageModelSDXLLightning), Label: "SDXL Lightning", Provider: "ByteDance",
SupportsRef: false, RecommendedFor: []string{"chapter"},
Description: "Lightning-fast 1024px images in a few steps.",
},
{
ID: string(ImageModelSD15Img2Img), Label: "SD 1.5 img2img", Provider: "RunwayML",
SupportsRef: true, RecommendedFor: []string{"cover", "chapter"},
Description: "Explicit img2img: generates from a reference image + prompt.",
},
{
ID: string(ImageModelSDXLBase), Label: "SDXL Base 1.0", Provider: "Stability AI",
SupportsRef: false, RecommendedFor: []string{"cover"},
Description: "Stable Diffusion XL base model.",
},
{
ID: string(ImageModelLucidOrigin), Label: "Lucid Origin", Provider: "Leonardo AI",
SupportsRef: false, RecommendedFor: []string{"cover"},
Description: "Highly prompt-responsive; strong graphic design and HD renders.",
},
{
ID: string(ImageModelPhoenix10), Label: "Phoenix 1.0", Provider: "Leonardo AI",
SupportsRef: false, RecommendedFor: []string{"cover"},
Description: "Exceptional prompt adherence; accurate text rendering.",
},
}
}
// ImageRequest is the input to GenerateImage / GenerateImageFromReference.
type ImageRequest struct {
// Prompt is the text description of the desired image.
Prompt string
// Model is the CF Workers AI model. Defaults to DefaultImageModel when empty.
Model ImageModel
// NumSteps controls inference quality (default 20). Range: 120.
NumSteps int
// Width and Height in pixels. 0 = model default (typically 1024x1024).
Width, Height int
// Guidance controls prompt adherence (default 7.5).
Guidance float64
// Strength for img2img: 0.0 = copy reference, 1.0 = ignore reference (default 0.75).
Strength float64
}
// ImageGenClient generates images via Cloudflare Workers AI.
type ImageGenClient interface {
// GenerateImage creates an image from a text prompt only.
// Returns raw PNG bytes.
GenerateImage(ctx context.Context, req ImageRequest) ([]byte, error)
// GenerateImageFromReference creates an image from a text prompt + reference image.
// refImage should be PNG or JPEG bytes. Returns raw PNG bytes.
GenerateImageFromReference(ctx context.Context, req ImageRequest, refImage []byte) ([]byte, error)
// Models returns metadata about all supported image models.
Models() []ImageModelInfo
}
// imageGenHTTPClient is the concrete CF AI image generation client.
type imageGenHTTPClient struct {
accountID string
apiToken string
http *http.Client
}
// NewImageGen returns an ImageGenClient for the given Cloudflare account.
func NewImageGen(accountID, apiToken string) ImageGenClient {
return &imageGenHTTPClient{
accountID: accountID,
apiToken: apiToken,
http: &http.Client{Timeout: 5 * time.Minute},
}
}
// GenerateImage generates an image from text only.
func (c *imageGenHTTPClient) GenerateImage(ctx context.Context, req ImageRequest) ([]byte, error) {
req = applyImageDefaults(req)
body := map[string]any{
"prompt": req.Prompt,
"num_steps": req.NumSteps,
}
if req.Width > 0 {
body["width"] = req.Width
}
if req.Height > 0 {
body["height"] = req.Height
}
if req.Guidance > 0 {
body["guidance"] = req.Guidance
}
return c.callImageAPI(ctx, req.Model, body)
}
// GenerateImageFromReference generates an image from a text prompt + reference image.
func (c *imageGenHTTPClient) GenerateImageFromReference(ctx context.Context, req ImageRequest, refImage []byte) ([]byte, error) {
if len(refImage) == 0 {
return c.GenerateImage(ctx, req)
}
req = applyImageDefaults(req)
var body map[string]any
if req.Model == ImageModelSD15Img2Img {
pixels, err := decodeImageToRGBA(refImage)
if err != nil {
return nil, fmt.Errorf("cfai/image: decode reference: %w", err)
}
strength := req.Strength
if strength <= 0 {
strength = 0.75
}
body = map[string]any{
"prompt": req.Prompt,
"image": pixels,
"strength": strength,
"num_steps": req.NumSteps,
}
} else {
b64 := base64.StdEncoding.EncodeToString(refImage)
body = map[string]any{
"prompt": req.Prompt,
"image_b64": b64,
"num_steps": req.NumSteps,
}
if req.Strength > 0 {
body["strength"] = req.Strength
}
}
if req.Width > 0 {
body["width"] = req.Width
}
if req.Height > 0 {
body["height"] = req.Height
}
if req.Guidance > 0 {
body["guidance"] = req.Guidance
}
return c.callImageAPI(ctx, req.Model, body)
}
// Models returns all supported image model metadata.
func (c *imageGenHTTPClient) Models() []ImageModelInfo {
return AllImageModels()
}
func (c *imageGenHTTPClient) callImageAPI(ctx context.Context, model ImageModel, body map[string]any) ([]byte, error) {
encoded, err := json.Marshal(body)
if err != nil {
return nil, fmt.Errorf("cfai/image: marshal: %w", err)
}
url := fmt.Sprintf("https://api.cloudflare.com/client/v4/accounts/%s/ai/run/%s",
c.accountID, string(model))
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(encoded))
if err != nil {
return nil, fmt.Errorf("cfai/image: build request: %w", err)
}
req.Header.Set("Authorization", "Bearer "+c.apiToken)
req.Header.Set("Content-Type", "application/json")
resp, err := c.http.Do(req)
if err != nil {
return nil, fmt.Errorf("cfai/image: http: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
errBody, _ := io.ReadAll(resp.Body)
msg := string(errBody)
if len(msg) > 300 {
msg = msg[:300]
}
return nil, fmt.Errorf("cfai/image: model %s returned %d: %s", model, resp.StatusCode, msg)
}
data, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("cfai/image: read response: %w", err)
}
return data, nil
}
func applyImageDefaults(req ImageRequest) ImageRequest {
if req.Model == "" {
req.Model = DefaultImageModel
}
if req.NumSteps <= 0 {
req.NumSteps = 20
}
return req
}
// decodeImageToRGBA decodes PNG/JPEG bytes to a flat []uint8 RGBA pixel array
// required by the stable-diffusion-v1-5-img2img model.
func decodeImageToRGBA(data []byte) ([]uint8, error) {
img, _, err := image.Decode(bytes.NewReader(data))
if err != nil {
return nil, fmt.Errorf("decode image: %w", err)
}
bounds := img.Bounds()
w := bounds.Max.X - bounds.Min.X
h := bounds.Max.Y - bounds.Min.Y
pixels := make([]uint8, w*h*4)
idx := 0
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
for x := bounds.Min.X; x < bounds.Max.X; x++ {
r, g, b, a := img.At(x, y).RGBA()
pixels[idx] = uint8(r >> 8)
pixels[idx+1] = uint8(g >> 8)
pixels[idx+2] = uint8(b >> 8)
pixels[idx+3] = uint8(a >> 8)
idx += 4
}
}
return pixels, nil
}

View File

@@ -66,6 +66,18 @@ type PocketTTS struct {
URL string
}
// CFAI holds credentials for Cloudflare Workers AI TTS.
type CFAI struct {
// AccountID is the Cloudflare account ID.
// An empty string disables CF AI generation.
AccountID string
// APIToken is a Workers AI API token with Workers AI Read+Edit permissions.
APIToken string
// Model is the Workers AI TTS model ID.
// Defaults to "@cf/deepgram/aura-2-en" when empty.
Model string
}
// LibreTranslate holds connection settings for a self-hosted LibreTranslate instance.
type LibreTranslate struct {
// URL is the base URL of the LibreTranslate instance, e.g. https://translate.libnovel.cc
@@ -153,6 +165,7 @@ type Config struct {
MinIO MinIO
Kokoro Kokoro
PocketTTS PocketTTS
CFAI CFAI
LibreTranslate LibreTranslate
HTTP HTTP
Runner Runner
@@ -203,6 +216,12 @@ func Load() Config {
URL: envOr("POCKET_TTS_URL", ""),
},
CFAI: CFAI{
AccountID: envOr("CFAI_ACCOUNT_ID", ""),
APIToken: envOr("CFAI_API_TOKEN", ""),
Model: envOr("CFAI_TTS_MODEL", ""),
},
LibreTranslate: LibreTranslate{
URL: envOr("LIBRETRANSLATE_URL", ""),
APIKey: envOr("LIBRETRANSLATE_API_KEY", ""),

View File

@@ -27,6 +27,7 @@ import (
"go.opentelemetry.io/otel/codes"
"github.com/libnovel/backend/internal/bookstore"
"github.com/libnovel/backend/internal/cfai"
"github.com/libnovel/backend/internal/domain"
"github.com/libnovel/backend/internal/kokoro"
"github.com/libnovel/backend/internal/libretranslate"
@@ -112,6 +113,9 @@ type Dependencies struct {
// PocketTTS is the pocket-tts client (CPU, kyutai voices: alba, marius, etc.).
// If nil, pocket-tts voice tasks will fail with a clear error.
PocketTTS pockettts.Client
// CFAI is the Cloudflare Workers AI TTS client (cfai:* prefixed voices).
// If nil, CF AI voice tasks will fail with a clear error.
CFAI cfai.Client
// LibreTranslate is the machine translation client.
// If nil, translation tasks will fail with a clear error.
LibreTranslate libretranslate.Client
@@ -555,6 +559,18 @@ func (r *Runner) runAudioTask(ctx context.Context, task domain.AudioTask) {
return
}
log.Info("runner: audio generated via pocket-tts", "voice", task.Voice)
} else if cfai.IsCFAIVoice(task.Voice) {
if r.deps.CFAI == nil {
fail("cloudflare AI client not configured (CFAI_ACCOUNT_ID/CFAI_API_TOKEN empty)")
return
}
var genErr error
audioData, genErr = r.deps.CFAI.GenerateAudio(ctx, text, task.Voice)
if genErr != nil {
fail(fmt.Sprintf("cfai generate: %v", genErr))
return
}
log.Info("runner: audio generated via cloudflare AI", "voice", task.Voice)
} else {
if r.deps.Kokoro == nil {
fail("kokoro client not configured (KOKORO_URL is empty)")

View File

@@ -13,6 +13,11 @@
# - RUNNER_SKIP_INITIAL_CATALOGUE_REFRESH=true
# - REDIS_ADDR → rediss://redis.libnovel.cc:6380 (prod Redis via Caddy TLS proxy)
# - LibreTranslate service for machine translation (internal network only)
#
# extra_hosts pins storage.libnovel.cc and pb.libnovel.cc to the prod server IP
# (165.22.70.138) so that large PutObject uploads and PocketBase writes bypass
# Cloudflare's 100-second proxy timeout entirely. TLS still terminates at Caddy
# on prod; the TLS certificate is valid for the domain names so SNI works fine.
services:
libretranslate:
@@ -35,6 +40,12 @@ services:
stop_grace_period: 135s
depends_on:
- libretranslate
# Pin prod subdomains to the prod server IP to bypass Cloudflare's 100s
# proxy timeout. Large MP3 PutObject uploads and PocketBase writes go
# directly to Caddy on prod; TLS and SNI still work normally.
extra_hosts:
- "storage.libnovel.cc:165.22.70.138"
- "pb.libnovel.cc:165.22.70.138"
environment:
# ── PocketBase ──────────────────────────────────────────────────────────
POCKETBASE_URL: "https://pb.libnovel.cc"
@@ -63,6 +74,10 @@ services:
# ── Pocket TTS ──────────────────────────────────────────────────────────
POCKET_TTS_URL: "${POCKET_TTS_URL}"
# ── Cloudflare Workers AI TTS ────────────────────────────────────────────
CFAI_ACCOUNT_ID: "${CFAI_ACCOUNT_ID}"
CFAI_API_TOKEN: "${CFAI_API_TOKEN}"
# ── LibreTranslate (internal Docker network) ────────────────────────────
LIBRETRANSLATE_URL: "http://libretranslate:5000"
LIBRETRANSLATE_API_KEY: "${LIBRETRANSLATE_API_KEY}"

View File

@@ -362,6 +362,7 @@
"admin_nav_audio": "Audio",
"admin_nav_translation": "Translation",
"admin_nav_changelog": "Changelog",
"admin_nav_image_gen": "Image Gen",
"admin_nav_feedback": "Feedback",
"admin_nav_errors": "Errors",
"admin_nav_analytics": "Analytics",

View File

@@ -362,6 +362,7 @@
"admin_nav_audio": "Audio",
"admin_nav_translation": "Traduction",
"admin_nav_changelog": "Modifications",
"admin_nav_image_gen": "Image Gen",
"admin_nav_feedback": "Retours",
"admin_nav_errors": "Erreurs",
"admin_nav_analytics": "Analytique",

View File

@@ -362,6 +362,7 @@
"admin_nav_audio": "Audio",
"admin_nav_translation": "Terjemahan",
"admin_nav_changelog": "Perubahan",
"admin_nav_image_gen": "Image Gen",
"admin_nav_feedback": "Masukan",
"admin_nav_errors": "Kesalahan",
"admin_nav_analytics": "Analitik",

View File

@@ -362,6 +362,7 @@
"admin_nav_audio": "Áudio",
"admin_nav_translation": "Tradução",
"admin_nav_changelog": "Alterações",
"admin_nav_image_gen": "Image Gen",
"admin_nav_feedback": "Feedback",
"admin_nav_errors": "Erros",
"admin_nav_analytics": "Análise",

View File

@@ -362,6 +362,7 @@
"admin_nav_audio": "Аудио",
"admin_nav_translation": "Перевод",
"admin_nav_changelog": "Изменения",
"admin_nav_image_gen": "Image Gen",
"admin_nav_feedback": "Отзывы",
"admin_nav_errors": "Ошибки",
"admin_nav_analytics": "Аналитика",

View File

@@ -86,6 +86,7 @@
// ── Derived: voices grouped by engine ──────────────────────────────────
const kokoroVoices = $derived(voices.filter((v) => v.engine === 'kokoro'));
const pocketVoices = $derived(voices.filter((v) => v.engine === 'pocket-tts'));
const cfaiVoices = $derived(voices.filter((v) => v.engine === 'cfai'));
// ── Voice selector state ────────────────────────────────────────────────
let showVoicePanel = $state(false);
@@ -98,6 +99,7 @@
* Human-readable label for a voice.
* Kokoro: "af_bella" → "Bella (US F)"
* Pocket-TTS: "alba" → "Alba (EN F)"
* CF AI: "cfai:luna" → "Luna (EN F)"
* Falls back gracefully if called with a bare string (e.g. from the store default).
*/
function voiceLabel(v: Voice | string): string {
@@ -110,6 +112,14 @@
return kokoroLabelFromId(v);
}
if (v.engine === 'cfai') {
// "cfai:luna" → "Luna (EN F)"
const speaker = v.id.startsWith('cfai:') ? v.id.slice(5) : v.id;
const name = speaker.replace(/\b\w/g, (c) => c.toUpperCase());
const genderLabel = v.gender.toUpperCase();
return `${name} (EN ${genderLabel})`;
}
if (v.engine === 'pocket-tts') {
const langLabel = v.lang.toUpperCase().replace('-', '');
const genderLabel = v.gender.toUpperCase();
@@ -844,6 +854,16 @@
{@render voiceRow(v)}
{/each}
{/if}
<!-- Cloudflare AI section -->
{#if cfaiVoices.length > 0}
<div class="px-3 py-1.5 bg-(--color-surface-2)/70 border-b border-(--color-border)/50 {kokoroVoices.length > 0 || pocketVoices.length > 0 ? 'border-t border-(--color-border)' : ''}">
<span class="text-[10px] font-semibold text-(--color-muted) uppercase tracking-widest">Cloudflare AI</span>
</div>
{#each cfaiVoices as v (v.id)}
{@render voiceRow(v)}
{/each}
{/if}
</div>
<div class="px-3 py-2 border-t border-(--color-border) bg-(--color-surface-2)/50">
<p class="text-xs text-(--color-muted)">

View File

@@ -12,7 +12,7 @@
export interface Voice {
/** Voice identifier passed to TTS clients (e.g. "af_bella", "alba"). */
id: string;
/** TTS engine: "kokoro" | "pocket-tts". */
/** TTS engine: "kokoro" | "pocket-tts" | "cfai". */
engine: string;
/** Primary language tag (e.g. "en-us", "en-gb", "en", "es", "fr"). */
lang: string;

View File

@@ -6,7 +6,8 @@
{ href: '/admin/scrape', label: () => m.admin_nav_scrape() },
{ href: '/admin/audio', label: () => m.admin_nav_audio() },
{ href: '/admin/translation', label: () => m.admin_nav_translation() },
{ href: '/admin/changelog', label: () => m.admin_nav_changelog() }
{ href: '/admin/changelog', label: () => m.admin_nav_changelog() },
{ href: '/admin/image-gen', label: () => m.admin_nav_image_gen() }
];
const externalLinks = [

View File

@@ -57,6 +57,12 @@
return `${m}m ${s % 60}s`;
}
function engineLabel(voice: string): string {
if (voice.startsWith('cfai:')) return 'CF AI';
if (!voice.includes('_')) return 'Pocket TTS';
return 'Kokoro';
}
// ── Audio jobs stats + filter ────────────────────────────────────────────────
let jobsQ = $state('');
let filteredJobs = $derived(
@@ -160,6 +166,7 @@
<th class="px-4 py-3 text-left">Book</th>
<th class="px-4 py-3 text-right">Ch.</th>
<th class="px-4 py-3 text-left">Voice</th>
<th class="px-4 py-3 text-left">Engine</th>
<th class="px-4 py-3 text-left">Status</th>
<th class="px-4 py-3 text-left">Started</th>
<th class="px-4 py-3 text-left">Duration</th>
@@ -173,6 +180,7 @@
</td>
<td class="px-4 py-3 text-right text-(--color-muted)">{job.chapter}</td>
<td class="px-4 py-3 text-(--color-muted) font-mono text-xs">{job.voice}</td>
<td class="px-4 py-3 text-(--color-muted) text-xs">{engineLabel(job.voice)}</td>
<td class="px-4 py-3">
<span class="font-medium {jobStatusColor(job.status)}">{job.status}</span>
</td>
@@ -181,7 +189,7 @@
</tr>
{#if job.error_message}
<tr class="bg-(--color-danger)/10">
<td colspan="6" class="px-4 py-2 text-xs text-(--color-danger) font-mono">{job.error_message}</td>
<td colspan="7" class="px-4 py-2 text-xs text-(--color-danger) font-mono">{job.error_message}</td>
</tr>
{/if}
{/each}
@@ -202,6 +210,7 @@
<div class="grid grid-cols-2 gap-1 text-xs">
<span class="text-(--color-muted)">Chapter</span><span class="text-(--color-muted) text-right">{job.chapter}</span>
<span class="text-(--color-muted)">Voice</span><span class="text-(--color-muted) font-mono text-right truncate">{job.voice}</span>
<span class="text-(--color-muted)">Engine</span><span class="text-(--color-muted) text-right">{engineLabel(job.voice)}</span>
<span class="text-(--color-muted)">Started</span><span class="text-(--color-muted) text-right">{fmtDate(job.started)}</span>
<span class="text-(--color-muted)">Duration</span><span class="text-(--color-muted) text-right">{duration(job.started, job.finished)}</span>
</div>
@@ -236,6 +245,7 @@
<th class="px-4 py-3 text-left">Book</th>
<th class="px-4 py-3 text-left">Chapter</th>
<th class="px-4 py-3 text-left">Voice</th>
<th class="px-4 py-3 text-left">Engine</th>
<th class="px-4 py-3 text-left">Filename</th>
<th class="px-4 py-3 text-left">Updated</th>
</tr>
@@ -249,6 +259,7 @@
</td>
<td class="px-4 py-3 text-(--color-muted)">{parts.chapter}</td>
<td class="px-4 py-3 text-(--color-muted) font-mono text-xs">{parts.voice}</td>
<td class="px-4 py-3 text-(--color-muted) text-xs">{engineLabel(parts.voice)}</td>
<td class="px-4 py-3 text-(--color-muted) font-mono text-xs truncate max-w-[14rem]" title={entry.filename}>
{entry.filename}
</td>
@@ -267,11 +278,12 @@
<a href="/books/{parts.slug}" class="text-(--color-text) font-medium hover:text-(--color-brand) transition-colors block truncate">
{parts.slug}
</a>
<div class="grid grid-cols-2 gap-1 text-xs">
<span class="text-(--color-muted)">Chapter</span><span class="text-(--color-muted) text-right">{parts.chapter}</span>
<span class="text-(--color-muted)">Voice</span><span class="text-(--color-muted) font-mono text-right truncate">{parts.voice}</span>
<span class="text-(--color-muted)">Updated</span><span class="text-(--color-muted) text-right">{fmtDate(entry.updated)}</span>
</div>
<div class="grid grid-cols-2 gap-1 text-xs">
<span class="text-(--color-muted)">Chapter</span><span class="text-(--color-muted) text-right">{parts.chapter}</span>
<span class="text-(--color-muted)">Voice</span><span class="text-(--color-muted) font-mono text-right truncate">{parts.voice}</span>
<span class="text-(--color-muted)">Engine</span><span class="text-(--color-muted) text-right">{engineLabel(parts.voice)}</span>
<span class="text-(--color-muted)">Updated</span><span class="text-(--color-muted) text-right">{fmtDate(entry.updated)}</span>
</div>
{#if entry.filename}
<p class="text-xs text-(--color-muted) font-mono truncate" title={entry.filename}>{entry.filename}</p>
{/if}

View File

@@ -0,0 +1,28 @@
import type { PageServerLoad } from './$types';
import { backendFetch } from '$lib/server/scraper';
import { log } from '$lib/server/logger';
export interface ImageModelInfo {
id: string;
label: string;
provider: string;
supports_ref: boolean;
recommended_for: string[]; // "cover" | "chapter"
description: string;
}
export const load: PageServerLoad = async () => {
// parent layout already guards admin role
try {
const res = await backendFetch('/api/admin/image-gen/models');
if (!res.ok) {
log.warn('admin/image-gen', 'failed to load models', { status: res.status });
return { models: [] as ImageModelInfo[] };
}
const data = await res.json();
return { models: (data.models ?? []) as ImageModelInfo[] };
} catch (e) {
log.warn('admin/image-gen', 'backend unreachable', { err: String(e) });
return { models: [] as ImageModelInfo[] };
}
};

View File

@@ -0,0 +1,676 @@
<script lang="ts">
import type { PageData } from './$types';
import type { ImageModelInfo } from './+page.server';
let { data }: { data: PageData } = $props();
// ── Form state ───────────────────────────────────────────────────────────────
type ImageType = 'cover' | 'chapter';
let imageType = $state<ImageType>('cover');
let slug = $state('');
let chapter = $state<number>(1);
let selectedModel = $state('');
let prompt = $state('');
let referenceFile = $state<File | null>(null);
let referencePreviewUrl = $state('');
// Advanced
let showAdvanced = $state(false);
let numSteps = $state(20);
let guidance = $state(7.5);
let strength = $state(0.75);
let width = $state(1024);
let height = $state(1024);
// ── Generation state ─────────────────────────────────────────────────────────
let generating = $state(false);
let genError = $state('');
let elapsedMs = $state(0);
let elapsedInterval: ReturnType<typeof setInterval> | null = null;
// ── Result state ─────────────────────────────────────────────────────────────
interface GenResult {
imageSrc: string;
model: string;
bytes: number;
contentType: string;
saved: boolean;
coverUrl: string;
elapsedMs: number;
slug: string;
imageType: ImageType;
chapter: number;
}
let result = $state<GenResult | null>(null);
let history = $state<GenResult[]>([]);
let saving = $state(false);
let saveError = $state('');
let saveSuccess = $state(false);
// ── Model helpers ────────────────────────────────────────────────────────────
const models = data.models as ImageModelInfo[];
let filteredModels = $derived(
referenceFile
? models // show all; warn on ones without ref support
: models
);
let coverModels = $derived(filteredModels.filter((m) => m.recommended_for.includes('cover')));
let chapterModels = $derived(filteredModels.filter((m) => m.recommended_for.includes('chapter')));
let otherModels = $derived(
filteredModels.filter(
(m) => !m.recommended_for.includes('cover') && !m.recommended_for.includes('chapter')
)
);
// ── Auto-select default model when type changes ──────────────────────────────
$effect(() => {
const preferred = imageType === 'cover' ? coverModels : chapterModels;
if (!selectedModel && preferred.length > 0) {
selectedModel = preferred[0].id;
}
});
// Reset model selection when type changes if current selection no longer fits
$effect(() => {
void imageType; // track
const preferred = imageType === 'cover' ? coverModels : chapterModels;
if (preferred.length > 0) {
// only auto-switch if current model isn't in preferred list for this type
const current = models.find((m) => m.id === selectedModel);
if (!current || !current.recommended_for.includes(imageType)) {
selectedModel = preferred[0].id;
}
}
});
// ── Prompt templates ────────────────────────────────────────────────────────
let promptTemplate = $derived(
imageType === 'cover'
? `Book cover for "${slug || 'untitled novel'}", a fantasy adventure novel. Epic scene with dramatic lighting, professional book cover art, cinematic composition, highly detailed, 4K.`
: `Illustration for chapter ${chapter} of "${slug || 'untitled novel'}". Dramatic moment, vivid colors, anime-inspired style, detailed background, cinematic lighting.`
);
function applyTemplate() {
prompt = promptTemplate;
}
// ── Reference image handling ─────────────────────────────────────────────────
let dragOver = $state(false);
function handleReferenceFile(file: File | null) {
referenceFile = file;
if (referencePreviewUrl) URL.revokeObjectURL(referencePreviewUrl);
referencePreviewUrl = file ? URL.createObjectURL(file) : '';
}
function onFileInput(e: Event) {
const input = e.target as HTMLInputElement;
handleReferenceFile(input.files?.[0] ?? null);
}
function onDrop(e: DragEvent) {
e.preventDefault();
dragOver = false;
const file = e.dataTransfer?.files[0];
if (file && file.type.startsWith('image/')) handleReferenceFile(file);
}
function clearReference() {
handleReferenceFile(null);
const input = document.getElementById('ref-file-input') as HTMLInputElement | null;
if (input) input.value = '';
}
// ── Selected model info ──────────────────────────────────────────────────────
let selectedModelInfo = $derived(models.find((m) => m.id === selectedModel) ?? null);
let refWarning = $derived(
referenceFile && selectedModelInfo && !selectedModelInfo.supports_ref
? `${selectedModelInfo.label} does not support reference images. The reference will be ignored.`
: ''
);
// ── Generate ────────────────────────────────────────────────────────────────
let canGenerate = $derived(prompt.trim().length > 0 && slug.trim().length > 0 && !generating);
async function generate() {
if (!canGenerate) return;
generating = true;
genError = '';
result = null;
elapsedMs = 0;
saveSuccess = false;
saveError = '';
const startTs = Date.now();
elapsedInterval = setInterval(() => {
elapsedMs = Date.now() - startTs;
}, 200);
try {
const payload = {
prompt: prompt.trim(),
model: selectedModel,
type: imageType,
slug: slug.trim(),
chapter: imageType === 'chapter' ? chapter : 0,
num_steps: numSteps,
guidance,
strength,
width,
height
};
let res: Response;
if (referenceFile && selectedModelInfo?.supports_ref) {
const fd = new FormData();
fd.append('json', JSON.stringify(payload));
fd.append('reference', referenceFile);
res = await fetch('/api/admin/image-gen', { method: 'POST', body: fd });
} else {
res = await fetch('/api/admin/image-gen', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload)
});
}
const body = await res.json().catch(() => ({}));
if (!res.ok) {
genError = body.error ?? body.message ?? `Error ${res.status}`;
return;
}
const totalMs = Date.now() - startTs;
const newResult: GenResult = {
imageSrc: `data:${body.content_type};base64,${body.image_b64}`,
model: body.model,
bytes: body.bytes,
contentType: body.content_type,
saved: body.saved ?? false,
coverUrl: body.cover_url ?? '',
elapsedMs: totalMs,
slug: slug.trim(),
imageType,
chapter
};
result = newResult;
history = [newResult, ...history].slice(0, 5);
} catch {
genError = 'Network error.';
} finally {
generating = false;
if (elapsedInterval) {
clearInterval(elapsedInterval);
elapsedInterval = null;
}
}
}
// ── Save as cover ────────────────────────────────────────────────────────────
async function saveAsCover() {
if (!result || saving) return;
saving = true;
saveError = '';
saveSuccess = false;
try {
const payload = {
prompt: prompt.trim(),
model: result.model,
type: 'cover',
slug: result.slug,
num_steps: numSteps,
guidance,
strength,
width,
height,
save_to_cover: true
};
// Re-generate with save_to_cover=true (backend saves atomically)
// Alternatively, we could add a separate save endpoint.
// For now we pass the same prompt + model to re-generate and save.
// TODO: A lighter approach would be a dedicated save endpoint that accepts
// the base64 payload. For now re-gen is acceptable given admin-only usage.
const res = await fetch('/api/admin/image-gen', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload)
});
const body = await res.json().catch(() => ({}));
if (!res.ok) {
saveError = body.error ?? body.message ?? `Error ${res.status}`;
return;
}
if (body.saved) {
saveSuccess = true;
result = { ...result, saved: true, coverUrl: body.cover_url ?? result.coverUrl };
} else {
saveError = 'Backend did not save the cover.';
}
} catch {
saveError = 'Network error.';
} finally {
saving = false;
}
}
// ── Download ─────────────────────────────────────────────────────────────────
function download() {
if (!result) return;
const a = document.createElement('a');
a.href = result.imageSrc;
const ext = result.contentType === 'image/jpeg' ? 'jpg' : 'png';
a.download =
result.imageType === 'cover'
? `${result.slug}-cover.${ext}`
: `${result.slug}-ch${result.chapter}.${ext}`;
a.click();
}
// ── Formatting helpers ───────────────────────────────────────────────────────
function fmtElapsed(ms: number) {
if (ms < 1000) return `${ms}ms`;
return `${(ms / 1000).toFixed(1)}s`;
}
function fmtBytes(b: number) {
if (b < 1024) return `${b} B`;
if (b < 1024 * 1024) return `${(b / 1024).toFixed(1)} KB`;
return `${(b / 1024 / 1024).toFixed(2)} MB`;
}
</script>
<svelte:head>
<title>Image Gen — Admin</title>
</svelte:head>
<div class="space-y-6 max-w-6xl">
<!-- Header -->
<div>
<h1 class="text-2xl font-bold text-(--color-text)">Image Generation</h1>
<p class="text-(--color-muted) text-sm mt-1">
Generate book covers and chapter images using Cloudflare Workers AI.
</p>
</div>
<!-- Layout: form + result side by side on large screens -->
<div class="grid grid-cols-1 lg:grid-cols-2 gap-6 items-start">
<!-- ── Left: Form panel ──────────────────────────────────────────────────── -->
<div class="space-y-4">
<!-- Type toggle -->
<div class="flex gap-1 bg-(--color-surface-2) rounded-lg p-1 w-fit border border-(--color-border)">
{#each (['cover', 'chapter'] as const) as t}
<button
onclick={() => (imageType = t)}
class="px-4 py-1.5 rounded-md text-sm font-medium transition-colors
{imageType === t
? 'bg-(--color-surface-3) text-(--color-text)'
: 'text-(--color-muted) hover:text-(--color-text)'}"
>
{t === 'cover' ? 'Cover' : 'Chapter Image'}
</button>
{/each}
</div>
<!-- Slug + chapter -->
<div class="flex gap-3">
<div class="flex-1 min-w-0 space-y-1">
<label class="text-xs font-medium text-(--color-muted) uppercase tracking-wide" for="slug-input">
Book slug
</label>
<input
id="slug-input"
type="text"
bind:value={slug}
placeholder="e.g. shadow-slave"
class="w-full bg-(--color-surface-2) border border-(--color-border) rounded-lg px-3 py-2 text-(--color-text) text-sm placeholder-zinc-500 focus:outline-none focus:ring-2 focus:ring-(--color-brand)"
/>
</div>
{#if imageType === 'chapter'}
<div class="w-24 space-y-1 shrink-0">
<label class="text-xs font-medium text-(--color-muted) uppercase tracking-wide" for="chapter-input">
Chapter
</label>
<input
id="chapter-input"
type="number"
bind:value={chapter}
min="1"
class="w-full bg-(--color-surface-2) border border-(--color-border) rounded-lg px-3 py-2 text-(--color-text) text-sm focus:outline-none focus:ring-2 focus:ring-(--color-brand)"
/>
</div>
{/if}
</div>
<!-- Model selector -->
<div class="space-y-1">
<label class="text-xs font-medium text-(--color-muted) uppercase tracking-wide" for="model-select">
Model
</label>
<select
id="model-select"
bind:value={selectedModel}
class="w-full bg-(--color-surface-2) border border-(--color-border) rounded-lg px-3 py-2 text-(--color-text) text-sm focus:outline-none focus:ring-2 focus:ring-(--color-brand)"
>
{#if coverModels.length > 0}
<optgroup label="Recommended for covers">
{#each coverModels as m}
<option value={m.id}>
{m.label}{m.provider}{m.supports_ref ? ' ★ref' : ''}
</option>
{/each}
</optgroup>
{/if}
{#if chapterModels.length > 0}
<optgroup label="Recommended for chapters">
{#each chapterModels as m}
<option value={m.id}>
{m.label}{m.provider}{m.supports_ref ? ' ★ref' : ''}
</option>
{/each}
</optgroup>
{/if}
{#if otherModels.length > 0}
<optgroup label="All models">
{#each otherModels as m}
<option value={m.id}>
{m.label}{m.provider}{m.supports_ref ? ' ★ref' : ''}
</option>
{/each}
</optgroup>
{/if}
</select>
{#if selectedModelInfo}
<p class="text-xs text-(--color-muted)">{selectedModelInfo.description}</p>
{/if}
{#if refWarning}
<p class="text-xs text-amber-400">{refWarning}</p>
{/if}
</div>
<!-- Prompt -->
<div class="space-y-1">
<div class="flex items-center justify-between">
<label class="text-xs font-medium text-(--color-muted) uppercase tracking-wide" for="prompt-input">
Prompt
</label>
<button
onclick={applyTemplate}
class="text-xs text-(--color-brand) hover:text-(--color-brand-dim) transition-colors"
>
Use template
</button>
</div>
<textarea
id="prompt-input"
bind:value={prompt}
rows="5"
placeholder="Describe the image to generate…"
class="w-full bg-(--color-surface-2) border border-(--color-border) rounded-lg px-3 py-2 text-(--color-text) text-sm placeholder-zinc-500 focus:outline-none focus:ring-2 focus:ring-(--color-brand) resize-y"
></textarea>
</div>
<!-- Reference image drop zone -->
<div class="space-y-1">
<p class="text-xs font-medium text-(--color-muted) uppercase tracking-wide">
Reference image <span class="normal-case font-normal text-(--color-muted)">(optional, img2img)</span>
</p>
{#if referenceFile && referencePreviewUrl}
<div class="flex items-start gap-3 p-3 bg-(--color-surface-2) rounded-lg border border-(--color-border)">
<img
src={referencePreviewUrl}
alt="Reference"
class="w-16 h-16 object-cover rounded-md shrink-0 border border-(--color-border)"
/>
<div class="min-w-0 flex-1 space-y-0.5">
<p class="text-sm text-(--color-text) truncate">{referenceFile.name}</p>
<p class="text-xs text-(--color-muted)">{fmtBytes(referenceFile.size)}</p>
</div>
<button
onclick={clearReference}
class="text-(--color-muted) hover:text-(--color-text) transition-colors shrink-0"
aria-label="Remove reference image"
>
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M6 18L18 6M6 6l12 12" />
</svg>
</button>
</div>
{:else}
<!-- Drop zone -->
<label
class="flex flex-col items-center justify-center gap-2 p-4 border-2 border-dashed rounded-lg cursor-pointer transition-colors
{dragOver
? 'border-(--color-brand) bg-(--color-brand)/5'
: 'border-(--color-border) hover:border-(--color-brand)/50 hover:bg-(--color-surface-2)'}"
ondragover={(e) => { e.preventDefault(); dragOver = true; }}
ondragleave={() => { dragOver = false; }}
ondrop={onDrop}
>
<svg class="w-6 h-6 text-(--color-muted)" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5"
d="M4 16l4.586-4.586a2 2 0 012.828 0L16 16m-2-2l1.586-1.586a2 2 0 012.828 0L20 14m-6-6h.01M6 20h12a2 2 0 002-2V6a2 2 0 00-2-2H6a2 2 0 00-2 2v12a2 2 0 002 2z" />
</svg>
<span class="text-xs text-(--color-muted)">Drop image or <span class="text-(--color-brand)">click to browse</span></span>
<input
id="ref-file-input"
type="file"
accept="image/png,image/jpeg,image/webp"
onchange={onFileInput}
class="sr-only"
/>
</label>
{/if}
</div>
<!-- Advanced collapsible -->
<div class="border border-(--color-border) rounded-lg overflow-hidden">
<button
onclick={() => (showAdvanced = !showAdvanced)}
class="w-full flex items-center justify-between px-4 py-2.5 bg-(--color-surface-2) text-sm font-medium text-(--color-muted) hover:text-(--color-text) transition-colors"
>
Advanced options
<svg
class="w-4 h-4 transition-transform {showAdvanced ? 'rotate-180' : ''}"
fill="none" stroke="currentColor" viewBox="0 0 24 24"
>
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 9l-7 7-7-7" />
</svg>
</button>
{#if showAdvanced}
<div class="px-4 py-4 bg-(--color-surface) space-y-4">
<!-- num_steps -->
<div class="space-y-1">
<div class="flex justify-between">
<label class="text-xs text-(--color-muted)">Steps</label>
<span class="text-xs text-(--color-text) font-mono">{numSteps}</span>
</div>
<input type="range" min="1" max="20" step="1" bind:value={numSteps}
class="w-full accent-(--color-brand)" />
</div>
<!-- guidance -->
<div class="space-y-1">
<div class="flex justify-between">
<label class="text-xs text-(--color-muted)">Guidance</label>
<span class="text-xs text-(--color-text) font-mono">{guidance.toFixed(1)}</span>
</div>
<input type="range" min="1" max="20" step="0.5" bind:value={guidance}
class="w-full accent-(--color-brand)" />
</div>
<!-- strength (only when reference present) -->
{#if referenceFile}
<div class="space-y-1">
<div class="flex justify-between">
<label class="text-xs text-(--color-muted)">Strength</label>
<span class="text-xs text-(--color-text) font-mono">{strength.toFixed(2)}</span>
</div>
<input type="range" min="0" max="1" step="0.05" bind:value={strength}
class="w-full accent-(--color-brand)" />
<p class="text-xs text-(--color-muted)">0 = copy reference · 1 = ignore reference</p>
</div>
{/if}
<!-- width × height -->
<div class="grid grid-cols-2 gap-3">
<div class="space-y-1">
<label class="text-xs text-(--color-muted)" for="width-input">Width</label>
<input id="width-input" type="number" min="256" max="2048" step="64" bind:value={width}
class="w-full bg-(--color-surface-2) border border-(--color-border) rounded-md px-3 py-1.5 text-(--color-text) text-sm focus:outline-none focus:ring-1 focus:ring-(--color-brand)" />
</div>
<div class="space-y-1">
<label class="text-xs text-(--color-muted)" for="height-input">Height</label>
<input id="height-input" type="number" min="256" max="2048" step="64" bind:value={height}
class="w-full bg-(--color-surface-2) border border-(--color-border) rounded-md px-3 py-1.5 text-(--color-text) text-sm focus:outline-none focus:ring-1 focus:ring-(--color-brand)" />
</div>
</div>
</div>
{/if}
</div>
<!-- Generate button -->
<button
onclick={generate}
disabled={!canGenerate}
class="w-full py-2.5 rounded-lg bg-(--color-brand) text-(--color-surface) font-semibold text-sm
hover:bg-(--color-brand-dim) transition-colors disabled:opacity-50 disabled:cursor-not-allowed
flex items-center justify-center gap-2"
>
{#if generating}
<!-- Spinner -->
<svg class="w-4 h-4 animate-spin" fill="none" viewBox="0 0 24 24">
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4" />
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8v8H4z" />
</svg>
Generating… {fmtElapsed(elapsedMs)}
{:else}
Generate
{/if}
</button>
{#if genError}
<p class="text-sm text-(--color-danger) bg-(--color-danger)/10 rounded-lg px-3 py-2">{genError}</p>
{/if}
</div>
<!-- ── Right: Result panel ────────────────────────────────────────────────── -->
<div class="space-y-4">
{#if result}
<div class="bg-(--color-surface) border border-(--color-border) rounded-xl overflow-hidden">
<!-- Image -->
<img
src={result.imageSrc}
alt="Generated image"
class="w-full object-contain max-h-[36rem] bg-zinc-950"
/>
<!-- Meta bar -->
<div class="px-4 py-3 border-t border-(--color-border) space-y-3">
<div class="grid grid-cols-3 gap-2 text-xs">
<div>
<p class="text-(--color-muted)">Model</p>
<p class="text-(--color-text) font-mono truncate" title={result.model}>
{models.find((m) => m.id === result!.model)?.label ?? result.model}
</p>
</div>
<div>
<p class="text-(--color-muted)">Size</p>
<p class="text-(--color-text)">{fmtBytes(result.bytes)}</p>
</div>
<div>
<p class="text-(--color-muted)">Time</p>
<p class="text-(--color-text)">{fmtElapsed(result.elapsedMs)}</p>
</div>
</div>
{#if result.saved}
<p class="text-xs text-green-400">
Cover saved &rarr;
<a href={result.coverUrl} target="_blank" rel="noopener noreferrer"
class="underline hover:text-green-300">{result.coverUrl}</a>
</p>
{/if}
{#if saveSuccess && !result.saved}
<p class="text-xs text-green-400">Cover saved successfully.</p>
{/if}
{#if saveError}
<p class="text-xs text-(--color-danger)">{saveError}</p>
{/if}
<!-- Actions -->
<div class="flex gap-2 flex-wrap">
<button
onclick={download}
class="flex-1 px-3 py-1.5 rounded-md bg-(--color-surface-3) text-(--color-text) text-xs font-medium hover:bg-zinc-600 transition-colors"
>
Download
</button>
{#if result.imageType === 'cover'}
<button
onclick={saveAsCover}
disabled={saving || result.saved}
class="flex-1 px-3 py-1.5 rounded-md bg-(--color-brand) text-(--color-surface) text-xs font-semibold
hover:bg-(--color-brand-dim) transition-colors disabled:opacity-50"
>
{saving ? 'Saving…' : result.saved ? 'Saved ✓' : 'Save as cover'}
</button>
{/if}
</div>
</div>
</div>
{:else if generating}
<!-- Placeholder while generating -->
<div class="flex items-center justify-center bg-(--color-surface) border border-(--color-border) rounded-xl h-80">
<div class="text-center space-y-3">
<svg class="w-8 h-8 animate-spin mx-auto text-(--color-brand)" fill="none" viewBox="0 0 24 24">
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4" />
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8v8H4z" />
</svg>
<p class="text-sm text-(--color-muted)">Generating… {fmtElapsed(elapsedMs)}</p>
</div>
</div>
{:else}
<!-- Empty state -->
<div class="flex items-center justify-center bg-(--color-surface) border border-(--color-border) border-dashed rounded-xl h-80">
<p class="text-sm text-(--color-muted)">Generated image will appear here</p>
</div>
{/if}
<!-- History thumbnails -->
{#if history.length > 0}
<div class="space-y-2">
<p class="text-xs font-semibold text-(--color-muted) uppercase tracking-widest">Session history</p>
<div class="flex gap-2 flex-wrap">
{#each history as h, i}
<button
onclick={() => result = h}
class="relative rounded-md overflow-hidden border transition-colors shrink-0
{result === h ? 'border-(--color-brand)' : 'border-(--color-border) hover:border-(--color-brand)/50'}"
>
<img
src={h.imageSrc}
alt="History {i + 1}"
class="w-16 h-16 object-cover"
/>
{#if h.saved}
<span class="absolute bottom-0.5 right-0.5 w-2.5 h-2.5 rounded-full bg-green-500 border border-(--color-surface)"></span>
{/if}
</button>
{/each}
</div>
</div>
{/if}
</div>
</div>
</div>

View File

@@ -0,0 +1,46 @@
/**
* POST /api/admin/image-gen
*
* Admin-only proxy to the Go backend's image generation endpoint.
* Transparently forwards the request body (JSON or multipart/form-data)
* and returns the JSON response containing the base64-encoded image.
*/
import { json, error } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { log } from '$lib/server/logger';
import { backendFetch } from '$lib/server/scraper';
export const POST: RequestHandler = async ({ request, locals }) => {
if (!locals.user || locals.user.role !== 'admin') {
throw error(403, 'Forbidden');
}
const ct = request.headers.get('content-type') ?? '';
let res: Response;
try {
if (ct.includes('multipart/form-data')) {
// Forward raw body bytes; let the backend parse multipart
const body = await request.arrayBuffer();
res = await backendFetch('/api/admin/image-gen', {
method: 'POST',
headers: { 'content-type': ct },
body
});
} else {
const body = await request.text();
res = await backendFetch('/api/admin/image-gen', {
method: 'POST',
headers: { 'content-type': 'application/json' },
body
});
}
} catch (e) {
log.error('admin/image-gen', 'backend proxy error', { err: String(e) });
throw error(502, 'Could not reach backend');
}
const data = await res.json().catch(() => ({}));
return json(data, { status: res.status });
};

View File

@@ -93,6 +93,30 @@
const kokoroVoices = $derived(voices.filter((v) => v.engine === 'kokoro'));
const pocketVoices = $derived(voices.filter((v) => v.engine === 'pocket-tts'));
const cfaiVoices = $derived(voices.filter((v) => v.engine === 'cfai'));
function voiceLabel(v: Voice): string {
if (v.engine === 'cfai') {
const speaker = v.id.startsWith('cfai:') ? v.id.slice(5) : v.id;
return speaker.replace(/\b\w/g, (c) => c.toUpperCase()) + (v.gender ? ` (EN ${v.gender.toUpperCase()})` : '');
}
if (v.engine === 'pocket-tts') {
const name = v.id.replace(/_/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
return name + (v.gender ? ` (EN ${v.gender.toUpperCase()})` : '');
}
// Kokoro: "af_bella" → "Bella (US F)"
const langMap: Record<string, string> = {
af: 'US', am: 'US', bf: 'UK', bm: 'UK',
ef: 'ES', em: 'ES', ff: 'FR',
hf: 'IN', hm: 'IN', 'if': 'IT', im: 'IT',
jf: 'JP', jm: 'JP', pf: 'PT', pm: 'PT', zf: 'ZH', zm: 'ZH',
};
const prefix = v.id.slice(0, 2);
const name = v.id.slice(3).replace(/^v0/, '').replace(/^([a-z])/, (c) => c.toUpperCase());
const lang = langMap[prefix] ?? prefix.toUpperCase();
const gender = v.gender ? v.gender.toUpperCase() : '?';
return `${name} (${lang} ${gender})`;
}
$effect(() => {
fetch('/api/voices')
@@ -492,12 +516,17 @@
class="w-full bg-(--color-surface-3) border border-(--color-border) rounded-lg px-3 py-2 text-(--color-text) text-sm focus:outline-none focus:ring-2 focus:ring-(--color-brand)">
{#if kokoroVoices.length > 0}
<optgroup label="Kokoro (GPU)">
{#each kokoroVoices as v}<option value={v.id}>{v.id}</option>{/each}
{#each kokoroVoices as v}<option value={v.id}>{voiceLabel(v)}</option>{/each}
</optgroup>
{/if}
{#if pocketVoices.length > 0}
<optgroup label="Pocket TTS (CPU)">
{#each pocketVoices as v}<option value={v.id}>{v.id}</option>{/each}
{#each pocketVoices as v}<option value={v.id}>{voiceLabel(v)}</option>{/each}
</optgroup>
{/if}
{#if cfaiVoices.length > 0}
<optgroup label="Cloudflare AI">
{#each cfaiVoices as v}<option value={v.id}>{voiceLabel(v)}</option>{/each}
</optgroup>
{/if}
</select>