Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e0dec05885 |
@@ -19,3 +19,53 @@ func stripMarkdown(src string) string {
|
||||
src = regexp.MustCompile(`\n{3,}`).ReplaceAllString(src, "\n\n")
|
||||
return strings.TrimSpace(src)
|
||||
}
|
||||
|
||||
// chunkText splits text into chunks of at most maxChars characters, breaking
|
||||
// at sentence boundaries (". ", "! ", "? ", "\n") so that the TTS service
|
||||
// receives natural prose fragments rather than mid-sentence cuts.
|
||||
//
|
||||
// If a single sentence exceeds maxChars it is included as its own chunk —
|
||||
// never silently truncated.
|
||||
func chunkText(text string, maxChars int) []string {
|
||||
if len(text) <= maxChars {
|
||||
return []string{text}
|
||||
}
|
||||
|
||||
// Sentence-boundary delimiters — we split AFTER these sequences.
|
||||
// Order matters: longer sequences first.
|
||||
delimiters := []string{".\n", "!\n", "?\n", ". ", "! ", "? ", "\n\n", "\n"}
|
||||
|
||||
var chunks []string
|
||||
remaining := text
|
||||
|
||||
for len(remaining) > 0 {
|
||||
if len(remaining) <= maxChars {
|
||||
chunks = append(chunks, strings.TrimSpace(remaining))
|
||||
break
|
||||
}
|
||||
|
||||
// Find the last sentence boundary within the maxChars window.
|
||||
window := remaining[:maxChars]
|
||||
cutAt := -1
|
||||
for _, delim := range delimiters {
|
||||
idx := strings.LastIndex(window, delim)
|
||||
if idx > 0 && idx+len(delim) > cutAt {
|
||||
cutAt = idx + len(delim)
|
||||
}
|
||||
}
|
||||
|
||||
if cutAt <= 0 {
|
||||
// No boundary found — hard-break at maxChars to avoid infinite loop.
|
||||
cutAt = maxChars
|
||||
}
|
||||
|
||||
chunk := strings.TrimSpace(remaining[:cutAt])
|
||||
if chunk != "" {
|
||||
chunks = append(chunks, chunk)
|
||||
}
|
||||
remaining = strings.TrimSpace(remaining[cutAt:])
|
||||
}
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
|
||||
@@ -656,7 +656,7 @@ func (r *Runner) runAudioTask(ctx context.Context, task domain.AudioTask) {
|
||||
return
|
||||
}
|
||||
var genErr error
|
||||
audioData, genErr = r.deps.Kokoro.GenerateAudio(ctx, text, task.Voice)
|
||||
audioData, genErr = kokoroGenerateChunked(ctx, r.deps.Kokoro, text, task.Voice, log)
|
||||
if genErr != nil {
|
||||
fail(fmt.Sprintf("kokoro generate: %v", genErr))
|
||||
return
|
||||
@@ -685,6 +685,31 @@ func (r *Runner) runAudioTask(ctx context.Context, task domain.AudioTask) {
|
||||
log.Info("runner: audio task finished", "key", key)
|
||||
}
|
||||
|
||||
// kokoroGenerateChunked splits text into ~1 000-character sentence-boundary
|
||||
// chunks, calls Kokoro.GenerateAudio for each, and concatenates the raw MP3
|
||||
// bytes. This avoids EOF / timeout failures that occur when the Kokoro
|
||||
// FastAPI server receives very large inputs (e.g. a full imported PDF chapter).
|
||||
//
|
||||
// Concatenating raw MP3 frames is valid — MP3 is a frame-based format and
|
||||
// standard players handle multi-segment files correctly.
|
||||
func kokoroGenerateChunked(ctx context.Context, k kokoro.Client, text, voice string, log *slog.Logger) ([]byte, error) {
|
||||
const chunkSize = 1000
|
||||
|
||||
chunks := chunkText(text, chunkSize)
|
||||
log.Info("runner: kokoro chunked generation", "chunks", len(chunks), "total_chars", len(text))
|
||||
|
||||
var combined []byte
|
||||
for i, chunk := range chunks {
|
||||
data, err := k.GenerateAudio(ctx, chunk, voice)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("chunk %d/%d: %w", i+1, len(chunks), err)
|
||||
}
|
||||
combined = append(combined, data...)
|
||||
log.Info("runner: kokoro chunk done", "chunk", i+1, "of", len(chunks), "bytes", len(data))
|
||||
}
|
||||
return combined, nil
|
||||
}
|
||||
|
||||
// runImportTask executes one PDF/EPUB import task.
|
||||
// Preferred path: when task.ChaptersKey is set, it reads pre-parsed chapters
|
||||
// JSON from MinIO (written by the backend at upload time) and ingests them.
|
||||
|
||||
Reference in New Issue
Block a user