Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6559a8c015 |
@@ -17,6 +17,10 @@
|
||||
// response. There is no 100-second Cloudflare proxy timeout because we are
|
||||
// calling the Cloudflare API directly, not routing through a Cloudflare-proxied
|
||||
// homelab tunnel.
|
||||
//
|
||||
// The aura-2-en model enforces a hard 2 000-character limit per request.
|
||||
// GenerateAudio transparently splits longer texts into sentence-boundary chunks
|
||||
// and concatenates the resulting MP3 frames.
|
||||
package cfai
|
||||
|
||||
import (
|
||||
@@ -145,6 +149,8 @@ func New(accountID, apiToken, model string) Client {
|
||||
}
|
||||
|
||||
// GenerateAudio calls the Cloudflare Workers AI TTS endpoint and returns MP3 bytes.
|
||||
// The aura-2-en model rejects inputs longer than 2 000 characters, so this method
|
||||
// splits the text into sentence-bounded chunks and concatenates the MP3 responses.
|
||||
func (c *httpClient) GenerateAudio(ctx context.Context, text, voice string) ([]byte, error) {
|
||||
if text == "" {
|
||||
return nil, fmt.Errorf("cfai: empty text")
|
||||
@@ -154,6 +160,20 @@ func (c *httpClient) GenerateAudio(ctx context.Context, text, voice string) ([]b
|
||||
speaker = "luna"
|
||||
}
|
||||
|
||||
chunks := splitText(text, 1800) // stay comfortably under the 2 000-char limit
|
||||
var combined []byte
|
||||
for _, chunk := range chunks {
|
||||
part, err := c.generateChunk(ctx, chunk, speaker)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
combined = append(combined, part...)
|
||||
}
|
||||
return combined, nil
|
||||
}
|
||||
|
||||
// generateChunk sends a single ≤2 000-character request and returns MP3 bytes.
|
||||
func (c *httpClient) generateChunk(ctx context.Context, text, speaker string) ([]byte, error) {
|
||||
body, err := json.Marshal(map[string]any{
|
||||
"text": text,
|
||||
"speaker": speaker,
|
||||
@@ -189,6 +209,87 @@ func (c *httpClient) GenerateAudio(ctx context.Context, text, voice string) ([]b
|
||||
return mp3, nil
|
||||
}
|
||||
|
||||
// splitText splits src into chunks of at most maxChars characters each.
|
||||
// It tries to break at paragraph boundaries first, then at sentence-ending
|
||||
// punctuation (. ! ?), and falls back to the nearest space.
|
||||
func splitText(src string, maxChars int) []string {
|
||||
if len(src) <= maxChars {
|
||||
return []string{src}
|
||||
}
|
||||
|
||||
var chunks []string
|
||||
remaining := src
|
||||
|
||||
for len(remaining) > 0 {
|
||||
if len(remaining) <= maxChars {
|
||||
chunks = append(chunks, strings.TrimSpace(remaining))
|
||||
break
|
||||
}
|
||||
|
||||
// Search window: the first maxChars bytes of remaining.
|
||||
// Use byte length here because the API limit is in bytes/chars for ASCII;
|
||||
// for safety we operate on rune-aware slices.
|
||||
window := remaining
|
||||
if len(window) > maxChars {
|
||||
// Trim to maxChars runes (not bytes), ensuring we don't split a multi-byte char.
|
||||
window = runeSlice(remaining, maxChars)
|
||||
}
|
||||
|
||||
cut := -1
|
||||
|
||||
// 1. Prefer paragraph break (\n\n or \n).
|
||||
if i := strings.LastIndex(window, "\n\n"); i > 0 {
|
||||
cut = i + 2
|
||||
} else if i := strings.LastIndex(window, "\n"); i > 0 {
|
||||
cut = i + 1
|
||||
}
|
||||
|
||||
// 2. Fall back to sentence-ending punctuation followed by a space.
|
||||
if cut < 0 {
|
||||
for _, punct := range []string{". ", "! ", "? ", ".\n", "!\n", "?\n"} {
|
||||
if i := strings.LastIndex(window, punct); i > 0 {
|
||||
candidate := i + len(punct)
|
||||
if cut < 0 || candidate > cut {
|
||||
cut = candidate
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Last resort: nearest space.
|
||||
if cut < 0 {
|
||||
if i := strings.LastIndex(window, " "); i > 0 {
|
||||
cut = i + 1
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Hard cut at maxChars runes if no boundary found.
|
||||
if cut < 0 {
|
||||
cut = len(window)
|
||||
}
|
||||
|
||||
chunk := strings.TrimSpace(remaining[:cut])
|
||||
if chunk != "" {
|
||||
chunks = append(chunks, chunk)
|
||||
}
|
||||
remaining = remaining[cut:]
|
||||
}
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
// runeSlice returns the first n runes of s as a string.
|
||||
func runeSlice(s string, n int) string {
|
||||
count := 0
|
||||
for i := range s {
|
||||
if count == n {
|
||||
return s[:i]
|
||||
}
|
||||
count++
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// StreamAudioMP3 generates audio and wraps the MP3 bytes as an io.ReadCloser.
|
||||
func (c *httpClient) StreamAudioMP3(ctx context.Context, text, voice string) (io.ReadCloser, error) {
|
||||
mp3, err := c.GenerateAudio(ctx, text, voice)
|
||||
|
||||
Reference in New Issue
Block a user