Files
libnovel/backend/internal/cfai/text.go
Admin 0fc30d1328 fix: handle Llama 4 Scout array response shape in CF AI text decoder
Llama 4 Scout returns `result.response` as an array of objects
[{"generated_text":"..."}] instead of a plain string. Decode into
json.RawMessage and try both shapes; fall back to generated_text[0].

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 10:28:14 +05:00

254 lines
8.8 KiB
Go

// Text generation via Cloudflare Workers AI LLM models.
//
// API reference:
//
// POST https://api.cloudflare.com/client/v4/accounts/{accountID}/ai/run/{model}
// Authorization: Bearer {apiToken}
// Content-Type: application/json
//
// Request body (all models):
//
// { "messages": [{"role":"system","content":"..."},{"role":"user","content":"..."}] }
//
// Response (wrapped):
//
// { "result": { "response": "..." }, "success": true }
package cfai
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
)
// TextModel identifies a Cloudflare Workers AI text generation model.
type TextModel string
const (
// TextModelGemma4 — Google Gemma 4, 256k context.
TextModelGemma4 TextModel = "@cf/google/gemma-4-26b-a4b-it"
// TextModelLlama4Scout — Meta Llama 4 Scout 17B, multimodal.
TextModelLlama4Scout TextModel = "@cf/meta/llama-4-scout-17b-16e-instruct"
// TextModelLlama33_70B — Meta Llama 3.3 70B, fast fp8.
TextModelLlama33_70B TextModel = "@cf/meta/llama-3.3-70b-instruct-fp8-fast"
// TextModelQwen3_30B — Qwen3 30B MoE, function calling.
TextModelQwen3_30B TextModel = "@cf/qwen/qwen3-30b-a3b-fp8"
// TextModelMistralSmall — Mistral Small 3.1 24B, 128k context.
TextModelMistralSmall TextModel = "@cf/mistralai/mistral-small-3.1-24b-instruct"
// TextModelQwQ32B — Qwen QwQ 32B reasoning model.
TextModelQwQ32B TextModel = "@cf/qwen/qwq-32b"
// TextModelDeepSeekR1 — DeepSeek R1 distill Qwen 32B.
TextModelDeepSeekR1 TextModel = "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b"
// TextModelGemma3_12B — Google Gemma 3 12B, 80k context.
TextModelGemma3_12B TextModel = "@cf/google/gemma-3-12b-it"
// TextModelGPTOSS120B — OpenAI gpt-oss-120b, high reasoning.
TextModelGPTOSS120B TextModel = "@cf/openai/gpt-oss-120b"
// TextModelGPTOSS20B — OpenAI gpt-oss-20b, lower latency.
TextModelGPTOSS20B TextModel = "@cf/openai/gpt-oss-20b"
// TextModelNemotron3 — NVIDIA Nemotron 3 120B, agentic.
TextModelNemotron3 TextModel = "@cf/nvidia/nemotron-3-120b-a12b"
// TextModelLlama32_3B — Meta Llama 3.2 3B, lightweight.
TextModelLlama32_3B TextModel = "@cf/meta/llama-3.2-3b-instruct"
// DefaultTextModel is the default model used when none is specified.
DefaultTextModel = TextModelLlama4Scout
)
// TextModelInfo describes a single text generation model.
type TextModelInfo struct {
ID string `json:"id"`
Label string `json:"label"`
Provider string `json:"provider"`
ContextSize int `json:"context_size"` // max context in tokens
Description string `json:"description"`
}
// AllTextModels returns metadata about every supported text generation model.
func AllTextModels() []TextModelInfo {
return []TextModelInfo{
{
ID: string(TextModelGemma4), Label: "Gemma 4 26B", Provider: "Google",
ContextSize: 256000,
Description: "Google's most intelligent open model family. 256k context, function calling.",
},
{
ID: string(TextModelLlama4Scout), Label: "Llama 4 Scout 17B", Provider: "Meta",
ContextSize: 131000,
Description: "Natively multimodal, 16 experts. Good all-purpose model with function calling.",
},
{
ID: string(TextModelLlama33_70B), Label: "Llama 3.3 70B (fp8 fast)", Provider: "Meta",
ContextSize: 24000,
Description: "Llama 3.3 70B quantized to fp8 for speed. Excellent instruction following.",
},
{
ID: string(TextModelQwen3_30B), Label: "Qwen3 30B MoE", Provider: "Qwen",
ContextSize: 32768,
Description: "MoE architecture with strong reasoning and instruction following.",
},
{
ID: string(TextModelMistralSmall), Label: "Mistral Small 3.1 24B", Provider: "MistralAI",
ContextSize: 128000,
Description: "Strong text performance with 128k context and function calling.",
},
{
ID: string(TextModelQwQ32B), Label: "QwQ 32B (reasoning)", Provider: "Qwen",
ContextSize: 24000,
Description: "Reasoning model — thinks before answering. Slower but more accurate.",
},
{
ID: string(TextModelDeepSeekR1), Label: "DeepSeek R1 32B", Provider: "DeepSeek",
ContextSize: 80000,
Description: "R1-distilled reasoning model. Outperforms o1-mini on many benchmarks.",
},
{
ID: string(TextModelGemma3_12B), Label: "Gemma 3 12B", Provider: "Google",
ContextSize: 80000,
Description: "Multimodal, 128k context, multilingual (140+ languages).",
},
{
ID: string(TextModelGPTOSS120B), Label: "GPT-OSS 120B", Provider: "OpenAI",
ContextSize: 128000,
Description: "OpenAI open-weight model for production, general purpose, high reasoning.",
},
{
ID: string(TextModelGPTOSS20B), Label: "GPT-OSS 20B", Provider: "OpenAI",
ContextSize: 128000,
Description: "OpenAI open-weight model for lower latency and specialized use cases.",
},
{
ID: string(TextModelNemotron3), Label: "Nemotron 3 120B", Provider: "NVIDIA",
ContextSize: 256000,
Description: "Hybrid MoE with leading accuracy for multi-agent applications.",
},
{
ID: string(TextModelLlama32_3B), Label: "Llama 3.2 3B", Provider: "Meta",
ContextSize: 80000,
Description: "Lightweight model for simple tasks. Fast and cheap.",
},
}
}
// TextMessage is a single message in a chat conversation.
type TextMessage struct {
Role string `json:"role"` // "system" or "user"
Content string `json:"content"` // message text
}
// TextRequest is the input to Generate.
type TextRequest struct {
// Model is the CF Workers AI model ID. Defaults to DefaultTextModel when empty.
Model TextModel
// Messages is the conversation history (system + user messages).
Messages []TextMessage
// MaxTokens limits the output length (0 = model default).
MaxTokens int
}
// TextGenClient generates text via Cloudflare Workers AI LLM models.
type TextGenClient interface {
// Generate sends a chat-style request and returns the model's response text.
Generate(ctx context.Context, req TextRequest) (string, error)
// Models returns metadata about all supported text generation models.
Models() []TextModelInfo
}
// textGenHTTPClient is the concrete CF AI text generation client.
type textGenHTTPClient struct {
accountID string
apiToken string
http *http.Client
}
// NewTextGen returns a TextGenClient for the given Cloudflare account.
func NewTextGen(accountID, apiToken string) TextGenClient {
return &textGenHTTPClient{
accountID: accountID,
apiToken: apiToken,
http: &http.Client{Timeout: 5 * time.Minute},
}
}
// Generate sends messages to the model and returns the response text.
func (c *textGenHTTPClient) Generate(ctx context.Context, req TextRequest) (string, error) {
if req.Model == "" {
req.Model = DefaultTextModel
}
body := map[string]any{
"messages": req.Messages,
}
if req.MaxTokens > 0 {
body["max_tokens"] = req.MaxTokens
}
encoded, err := json.Marshal(body)
if err != nil {
return "", fmt.Errorf("cfai/text: marshal: %w", err)
}
url := fmt.Sprintf("https://api.cloudflare.com/client/v4/accounts/%s/ai/run/%s",
c.accountID, string(req.Model))
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(encoded))
if err != nil {
return "", fmt.Errorf("cfai/text: build request: %w", err)
}
httpReq.Header.Set("Authorization", "Bearer "+c.apiToken)
httpReq.Header.Set("Content-Type", "application/json")
resp, err := c.http.Do(httpReq)
if err != nil {
return "", fmt.Errorf("cfai/text: http: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
errBody, _ := io.ReadAll(resp.Body)
msg := string(errBody)
if len(msg) > 300 {
msg = msg[:300]
}
return "", fmt.Errorf("cfai/text: model %s returned %d: %s", req.Model, resp.StatusCode, msg)
}
// CF AI wraps responses: { "result": { "response": "..." }, "success": true }
// Some models (e.g. Llama 4 Scout) return response as an array:
// { "result": { "response": [{"generated_text":"..."}] } }
var wrapper struct {
Result struct {
Response json.RawMessage `json:"response"`
} `json:"result"`
Success bool `json:"success"`
Errors []string `json:"errors"`
}
if err := json.NewDecoder(resp.Body).Decode(&wrapper); err != nil {
return "", fmt.Errorf("cfai/text: decode response: %w", err)
}
if !wrapper.Success {
return "", fmt.Errorf("cfai/text: model %s error: %v", req.Model, wrapper.Errors)
}
// Try plain string first.
var text string
if err := json.Unmarshal(wrapper.Result.Response, &text); err == nil {
return text, nil
}
// Fall back: array of objects with a "generated_text" field.
var arr []struct {
GeneratedText string `json:"generated_text"`
}
if err := json.Unmarshal(wrapper.Result.Response, &arr); err == nil && len(arr) > 0 {
return arr[0].GeneratedText, nil
}
return "", fmt.Errorf("cfai/text: model %s: unrecognised response shape: %s", req.Model, wrapper.Result.Response)
}
// Models returns all supported text generation model metadata.
func (c *textGenHTTPClient) Models() []TextModelInfo {
return AllTextModels()
}