libnovel/backend/internal/libretranslate/client.go

// Package libretranslate provides an HTTP client for a self-hosted
// LibreTranslate instance. It handles text chunking, concurrent translation,
// and reassembly so callers can pass arbitrarily long markdown strings.
package libretranslate

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"net/http"
	"strings"
	"sync"
	"time"
)

const (
	// maxChunkBytes is the target maximum size of each chunk sent to
	// LibreTranslate. LibreTranslate's default limit is 5000 characters;
	// we stay comfortably below that.
	maxChunkBytes = 4500
	// concurrency is the number of simultaneous translation requests per chapter.
	concurrency = 3
)

// Client translates text via LibreTranslate.
// A nil Client is valid — all calls return the original text unchanged.
type Client interface {
	// Translate translates text from sourceLang to targetLang.
	// text is a raw markdown string. The returned string is the translated
	// markdown, reassembled in original paragraph order.
	Translate(ctx context.Context, text, sourceLang, targetLang string) (string, error)
}

// New returns a Client for the given LibreTranslate URL.
// Returns nil when url is empty, which disables translation.
func New(url, apiKey string) Client {
	if url == "" {
		return nil
	}
	return &httpClient{
		url:    strings.TrimRight(url, "/"),
		apiKey: apiKey,
		http:   &http.Client{Timeout: 60 * time.Second},
	}
}

type httpClient struct {
	url    string
	apiKey string
	http   *http.Client
}

// Translate splits text into paragraph chunks, translates them concurrently
// (up to concurrency goroutines), and reassembles in order.
func (c *httpClient) Translate(ctx context.Context, text, sourceLang, targetLang string) (string, error) {
	paragraphs := splitParagraphs(text)
	if len(paragraphs) == 0 {
		return text, nil
	}
	chunks := binChunks(paragraphs, maxChunkBytes)

	translated := make([]string, len(chunks))
	errs := make([]error, len(chunks))

	sem := make(chan struct{}, concurrency)
	var wg sync.WaitGroup

	for i, chunk := range chunks {
		wg.Add(1)
		sem <- struct{}{}
		go func(idx int, chunkText string) {
			defer wg.Done()
			defer func() { <-sem }()
			result, err := c.translateChunk(ctx, chunkText, sourceLang, targetLang)
			translated[idx] = result
			errs[idx] = err
		}(i, chunk)
	}
	wg.Wait()

	for _, err := range errs {
		if err != nil {
			return "", err
		}
	}

	return strings.Join(translated, "\n\n"), nil
}

// translateChunk sends a single POST /translate request.
func (c *httpClient) translateChunk(ctx context.Context, text, sourceLang, targetLang string) (string, error) {
	reqBody := map[string]string{
		"q":      text,
		"source": sourceLang,
		"target": targetLang,
		"format": "html",
	}
	if c.apiKey != "" {
		reqBody["api_key"] = c.apiKey
	}

	b, err := json.Marshal(reqBody)
	if err != nil {
		return "", fmt.Errorf("libretranslate: marshal request: %w", err)
	}

	req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.url+"/translate", bytes.NewReader(b))
	if err != nil {
		return "", fmt.Errorf("libretranslate: build request: %w", err)
	}
	req.Header.Set("Content-Type", "application/json")

	resp, err := c.http.Do(req)
	if err != nil {
		return "", fmt.Errorf("libretranslate: request: %w", err)
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		var errBody struct {
			Error string `json:"error"`
		}
		_ = json.NewDecoder(resp.Body).Decode(&errBody)
		return "", fmt.Errorf("libretranslate: status %d: %s", resp.StatusCode, errBody.Error)
	}

	var result struct {
		TranslatedText string `json:"translatedText"`
	}
	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
		return "", fmt.Errorf("libretranslate: decode response: %w", err)
	}
	return result.TranslatedText, nil
}

// splitParagraphs splits markdown text on blank lines, preserving non-empty paragraphs.
func splitParagraphs(text string) []string {
	// Normalise line endings.
	text = strings.ReplaceAll(text, "\r\n", "\n")
	// Split on double newlines (blank lines between paragraphs).
	parts := strings.Split(text, "\n\n")
	var paragraphs []string
	for _, p := range parts {
		p = strings.TrimSpace(p)
		if p != "" {
			paragraphs = append(paragraphs, p)
		}
	}
	return paragraphs
}

// binChunks groups paragraphs into chunks each at most maxBytes in length.
// Each chunk is a single string with paragraphs joined by "\n\n".
func binChunks(paragraphs []string, maxBytes int) []string {
	var chunks []string
	var current strings.Builder

	for _, p := range paragraphs {
		needed := len(p)
		if current.Len() > 0 {
			needed += 2 // for the "\n\n" separator
		}

		if current.Len()+needed > maxBytes && current.Len() > 0 {
			// Flush current chunk.
			chunks = append(chunks, current.String())
			current.Reset()
		}

		if current.Len() > 0 {
			current.WriteString("\n\n")
		}
		current.WriteString(p)
	}

	if current.Len() > 0 {
		chunks = append(chunks, current.String())
	}
	return chunks
}