Files
libnovel/backend/internal/libretranslate/client.go
Admin 28ac8d8826
Some checks failed
CI / Backend (push) Failing after 11s
Release / Check ui (push) Failing after 51s
Release / Docker / ui (push) Has been skipped
CI / UI (push) Failing after 55s
Release / Test backend (push) Failing after 1m9s
Release / Docker / backend (push) Has been skipped
Release / Docker / runner (push) Has been skipped
Release / Docker / caddy (push) Failing after 28s
Release / Gitea Release (push) Has been skipped
CI / UI (pull_request) Failing after 42s
CI / Backend (pull_request) Successful in 3m45s
feat(translation): machine translation pipeline + admin bulk enqueue UI
- LibreTranslate client (chunks on blank lines, ≤4500 chars, 3-goroutine semaphore)
- Runner translation task loop (OTel, heartbeat, MinIO storage)
- PocketBase translation_jobs collection support (create/claim/finish/list)
- Per-chapter language switcher on chapter reader (EN/RU/ID/PT/FR, polls until done)
- Admin /admin/translation page: bulk enqueue form + live-polling jobs table
- New backend routes: POST /api/translation/{slug}/{n}, GET /api/translation/status,
  GET /api/translation/{slug}/{n}, GET /api/admin/translation/jobs,
  POST /api/admin/translation/bulk
- ListTranslationTasks added to taskqueue.Reader interface + store impl
- All builds and tests pass; svelte-check: 0 errors
2026-03-29 11:32:42 +05:00

182 lines
4.8 KiB
Go

// Package libretranslate provides an HTTP client for a self-hosted
// LibreTranslate instance. It handles text chunking, concurrent translation,
// and reassembly so callers can pass arbitrarily long markdown strings.
package libretranslate
import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"strings"
"sync"
"time"
)
const (
// maxChunkBytes is the target maximum size of each chunk sent to
// LibreTranslate. LibreTranslate's default limit is 5000 characters;
// we stay comfortably below that.
maxChunkBytes = 4500
// concurrency is the number of simultaneous translation requests per chapter.
concurrency = 3
)
// Client translates text via LibreTranslate.
// A nil Client is valid — all calls return the original text unchanged.
type Client interface {
// Translate translates text from sourceLang to targetLang.
// text is a raw markdown string. The returned string is the translated
// markdown, reassembled in original paragraph order.
Translate(ctx context.Context, text, sourceLang, targetLang string) (string, error)
}
// New returns a Client for the given LibreTranslate URL.
// Returns nil when url is empty, which disables translation.
func New(url, apiKey string) Client {
if url == "" {
return nil
}
return &httpClient{
url: strings.TrimRight(url, "/"),
apiKey: apiKey,
http: &http.Client{Timeout: 60 * time.Second},
}
}
type httpClient struct {
url string
apiKey string
http *http.Client
}
// Translate splits text into paragraph chunks, translates them concurrently
// (up to concurrency goroutines), and reassembles in order.
func (c *httpClient) Translate(ctx context.Context, text, sourceLang, targetLang string) (string, error) {
paragraphs := splitParagraphs(text)
if len(paragraphs) == 0 {
return text, nil
}
chunks := binChunks(paragraphs, maxChunkBytes)
translated := make([]string, len(chunks))
errs := make([]error, len(chunks))
sem := make(chan struct{}, concurrency)
var wg sync.WaitGroup
for i, chunk := range chunks {
wg.Add(1)
sem <- struct{}{}
go func(idx int, chunkText string) {
defer wg.Done()
defer func() { <-sem }()
result, err := c.translateChunk(ctx, chunkText, sourceLang, targetLang)
translated[idx] = result
errs[idx] = err
}(i, chunk)
}
wg.Wait()
for _, err := range errs {
if err != nil {
return "", err
}
}
return strings.Join(translated, "\n\n"), nil
}
// translateChunk sends a single POST /translate request.
func (c *httpClient) translateChunk(ctx context.Context, text, sourceLang, targetLang string) (string, error) {
reqBody := map[string]string{
"q": text,
"source": sourceLang,
"target": targetLang,
"format": "html",
}
if c.apiKey != "" {
reqBody["api_key"] = c.apiKey
}
b, err := json.Marshal(reqBody)
if err != nil {
return "", fmt.Errorf("libretranslate: marshal request: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.url+"/translate", bytes.NewReader(b))
if err != nil {
return "", fmt.Errorf("libretranslate: build request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := c.http.Do(req)
if err != nil {
return "", fmt.Errorf("libretranslate: request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
var errBody struct {
Error string `json:"error"`
}
_ = json.NewDecoder(resp.Body).Decode(&errBody)
return "", fmt.Errorf("libretranslate: status %d: %s", resp.StatusCode, errBody.Error)
}
var result struct {
TranslatedText string `json:"translatedText"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return "", fmt.Errorf("libretranslate: decode response: %w", err)
}
return result.TranslatedText, nil
}
// splitParagraphs splits markdown text on blank lines, preserving non-empty paragraphs.
func splitParagraphs(text string) []string {
// Normalise line endings.
text = strings.ReplaceAll(text, "\r\n", "\n")
// Split on double newlines (blank lines between paragraphs).
parts := strings.Split(text, "\n\n")
var paragraphs []string
for _, p := range parts {
p = strings.TrimSpace(p)
if p != "" {
paragraphs = append(paragraphs, p)
}
}
return paragraphs
}
// binChunks groups paragraphs into chunks each at most maxBytes in length.
// Each chunk is a single string with paragraphs joined by "\n\n".
func binChunks(paragraphs []string, maxBytes int) []string {
var chunks []string
var current strings.Builder
for _, p := range paragraphs {
needed := len(p)
if current.Len() > 0 {
needed += 2 // for the "\n\n" separator
}
if current.Len()+needed > maxBytes && current.Len() > 0 {
// Flush current chunk.
chunks = append(chunks, current.String())
current.Reset()
}
if current.Len() > 0 {
current.WriteString("\n\n")
}
current.WriteString(p)
}
if current.Len() > 0 {
chunks = append(chunks, current.String())
}
return chunks
}