Some checks failed
CI / Backend (push) Failing after 11s
Release / Check ui (push) Failing after 51s
Release / Docker / ui (push) Has been skipped
CI / UI (push) Failing after 55s
Release / Test backend (push) Failing after 1m9s
Release / Docker / backend (push) Has been skipped
Release / Docker / runner (push) Has been skipped
Release / Docker / caddy (push) Failing after 28s
Release / Gitea Release (push) Has been skipped
CI / UI (pull_request) Failing after 42s
CI / Backend (pull_request) Successful in 3m45s
- LibreTranslate client (chunks on blank lines, ≤4500 chars, 3-goroutine semaphore)
- Runner translation task loop (OTel, heartbeat, MinIO storage)
- PocketBase translation_jobs collection support (create/claim/finish/list)
- Per-chapter language switcher on chapter reader (EN/RU/ID/PT/FR, polls until done)
- Admin /admin/translation page: bulk enqueue form + live-polling jobs table
- New backend routes: POST /api/translation/{slug}/{n}, GET /api/translation/status,
GET /api/translation/{slug}/{n}, GET /api/admin/translation/jobs,
POST /api/admin/translation/bulk
- ListTranslationTasks added to taskqueue.Reader interface + store impl
- All builds and tests pass; svelte-check: 0 errors
182 lines
4.8 KiB
Go
182 lines
4.8 KiB
Go
// Package libretranslate provides an HTTP client for a self-hosted
|
|
// LibreTranslate instance. It handles text chunking, concurrent translation,
|
|
// and reassembly so callers can pass arbitrarily long markdown strings.
|
|
package libretranslate
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
const (
|
|
// maxChunkBytes is the target maximum size of each chunk sent to
|
|
// LibreTranslate. LibreTranslate's default limit is 5000 characters;
|
|
// we stay comfortably below that.
|
|
maxChunkBytes = 4500
|
|
// concurrency is the number of simultaneous translation requests per chapter.
|
|
concurrency = 3
|
|
)
|
|
|
|
// Client translates text via LibreTranslate.
|
|
// A nil Client is valid — all calls return the original text unchanged.
|
|
type Client interface {
|
|
// Translate translates text from sourceLang to targetLang.
|
|
// text is a raw markdown string. The returned string is the translated
|
|
// markdown, reassembled in original paragraph order.
|
|
Translate(ctx context.Context, text, sourceLang, targetLang string) (string, error)
|
|
}
|
|
|
|
// New returns a Client for the given LibreTranslate URL.
|
|
// Returns nil when url is empty, which disables translation.
|
|
func New(url, apiKey string) Client {
|
|
if url == "" {
|
|
return nil
|
|
}
|
|
return &httpClient{
|
|
url: strings.TrimRight(url, "/"),
|
|
apiKey: apiKey,
|
|
http: &http.Client{Timeout: 60 * time.Second},
|
|
}
|
|
}
|
|
|
|
type httpClient struct {
|
|
url string
|
|
apiKey string
|
|
http *http.Client
|
|
}
|
|
|
|
// Translate splits text into paragraph chunks, translates them concurrently
|
|
// (up to concurrency goroutines), and reassembles in order.
|
|
func (c *httpClient) Translate(ctx context.Context, text, sourceLang, targetLang string) (string, error) {
|
|
paragraphs := splitParagraphs(text)
|
|
if len(paragraphs) == 0 {
|
|
return text, nil
|
|
}
|
|
chunks := binChunks(paragraphs, maxChunkBytes)
|
|
|
|
translated := make([]string, len(chunks))
|
|
errs := make([]error, len(chunks))
|
|
|
|
sem := make(chan struct{}, concurrency)
|
|
var wg sync.WaitGroup
|
|
|
|
for i, chunk := range chunks {
|
|
wg.Add(1)
|
|
sem <- struct{}{}
|
|
go func(idx int, chunkText string) {
|
|
defer wg.Done()
|
|
defer func() { <-sem }()
|
|
result, err := c.translateChunk(ctx, chunkText, sourceLang, targetLang)
|
|
translated[idx] = result
|
|
errs[idx] = err
|
|
}(i, chunk)
|
|
}
|
|
wg.Wait()
|
|
|
|
for _, err := range errs {
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
}
|
|
|
|
return strings.Join(translated, "\n\n"), nil
|
|
}
|
|
|
|
// translateChunk sends a single POST /translate request.
|
|
func (c *httpClient) translateChunk(ctx context.Context, text, sourceLang, targetLang string) (string, error) {
|
|
reqBody := map[string]string{
|
|
"q": text,
|
|
"source": sourceLang,
|
|
"target": targetLang,
|
|
"format": "html",
|
|
}
|
|
if c.apiKey != "" {
|
|
reqBody["api_key"] = c.apiKey
|
|
}
|
|
|
|
b, err := json.Marshal(reqBody)
|
|
if err != nil {
|
|
return "", fmt.Errorf("libretranslate: marshal request: %w", err)
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.url+"/translate", bytes.NewReader(b))
|
|
if err != nil {
|
|
return "", fmt.Errorf("libretranslate: build request: %w", err)
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
resp, err := c.http.Do(req)
|
|
if err != nil {
|
|
return "", fmt.Errorf("libretranslate: request: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
var errBody struct {
|
|
Error string `json:"error"`
|
|
}
|
|
_ = json.NewDecoder(resp.Body).Decode(&errBody)
|
|
return "", fmt.Errorf("libretranslate: status %d: %s", resp.StatusCode, errBody.Error)
|
|
}
|
|
|
|
var result struct {
|
|
TranslatedText string `json:"translatedText"`
|
|
}
|
|
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
|
return "", fmt.Errorf("libretranslate: decode response: %w", err)
|
|
}
|
|
return result.TranslatedText, nil
|
|
}
|
|
|
|
// splitParagraphs splits markdown text on blank lines, preserving non-empty paragraphs.
|
|
func splitParagraphs(text string) []string {
|
|
// Normalise line endings.
|
|
text = strings.ReplaceAll(text, "\r\n", "\n")
|
|
// Split on double newlines (blank lines between paragraphs).
|
|
parts := strings.Split(text, "\n\n")
|
|
var paragraphs []string
|
|
for _, p := range parts {
|
|
p = strings.TrimSpace(p)
|
|
if p != "" {
|
|
paragraphs = append(paragraphs, p)
|
|
}
|
|
}
|
|
return paragraphs
|
|
}
|
|
|
|
// binChunks groups paragraphs into chunks each at most maxBytes in length.
|
|
// Each chunk is a single string with paragraphs joined by "\n\n".
|
|
func binChunks(paragraphs []string, maxBytes int) []string {
|
|
var chunks []string
|
|
var current strings.Builder
|
|
|
|
for _, p := range paragraphs {
|
|
needed := len(p)
|
|
if current.Len() > 0 {
|
|
needed += 2 // for the "\n\n" separator
|
|
}
|
|
|
|
if current.Len()+needed > maxBytes && current.Len() > 0 {
|
|
// Flush current chunk.
|
|
chunks = append(chunks, current.String())
|
|
current.Reset()
|
|
}
|
|
|
|
if current.Len() > 0 {
|
|
current.WriteString("\n\n")
|
|
}
|
|
current.WriteString(p)
|
|
}
|
|
|
|
if current.Len() > 0 {
|
|
chunks = append(chunks, current.String())
|
|
}
|
|
return chunks
|
|
}
|