Some checks failed
CI / Docker / caddy (pull_request) Failing after 50s
CI / Check ui (pull_request) Successful in 1m5s
Release / Check ui (push) Successful in 45s
Release / Test backend (push) Successful in 1m29s
CI / Docker / ui (pull_request) Successful in 1m28s
Release / Docker / backend (push) Successful in 3m14s
CI / Test backend (pull_request) Failing after 42s
CI / Docker / backend (pull_request) Has been skipped
CI / Docker / runner (pull_request) Has been skipped
Release / Docker / caddy (push) Successful in 6m48s
Release / Docker / ui (push) Successful in 2m8s
Release / Docker / runner (push) Successful in 2m51s
Release / Upload source maps (push) Failing after 53s
Release / Gitea Release (push) Has been skipped
Introduce a Redis-backed Asynq task queue so the runner consumes TTS
jobs pushed by the backend instead of polling PocketBase.
- backend/internal/asynqqueue: Producer and Consumer wrappers
- backend/internal/runner: AsynqRunner mux, per-instance Prometheus
registry (fixes duplicate-collector panic in tests), redisConnOpt
- backend/internal/config: REDIS_ADDR / REDIS_PASSWORD env vars
- backend/cmd/{backend,runner}/main.go: wire Redis when env set; fall
back to legacy poll mode when unset
- Caddyfile: caddy-l4 TCP proxy for redis.libnovel.cc:6380 → homelab
- caddy/Dockerfile: add --with github.com/mholt/caddy-l4
- docker-compose.yml: Caddy exposes 6380, backend/runner get Redis env
- homelab/runner/docker-compose.yml: Redis sidecar, runner depends_on
- homelab/otel/grafana: Grafana dashboards (backend, catalogue, runner)
and alerting rules / contact-points provisioning
378 lines
12 KiB
JSON
378 lines
12 KiB
JSON
{
|
|
"uid": "libnovel-runner",
|
|
"title": "Runner Operations",
|
|
"description": "Task queue health, throughput, TTS routing, and live logs for the homelab runner.",
|
|
"tags": ["libnovel", "runner"],
|
|
"timezone": "browser",
|
|
"refresh": "30s",
|
|
"time": { "from": "now-3h", "to": "now" },
|
|
"schemaVersion": 39,
|
|
"panels": [
|
|
{
|
|
"id": 1,
|
|
"type": "stat",
|
|
"title": "Tasks Running",
|
|
"gridPos": { "x": 0, "y": 0, "w": 4, "h": 4 },
|
|
"options": {
|
|
"reduceOptions": { "calcs": ["lastNotNull"] },
|
|
"colorMode": "background",
|
|
"graphMode": "none",
|
|
"textMode": "auto"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": null },
|
|
{ "color": "yellow", "value": 1 },
|
|
{ "color": "red", "value": 3 }
|
|
]
|
|
},
|
|
"mappings": []
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "libnovel_runner_tasks_running",
|
|
"legendFormat": "running",
|
|
"instant": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 2,
|
|
"type": "stat",
|
|
"title": "Tasks Completed (total)",
|
|
"gridPos": { "x": 4, "y": 0, "w": 4, "h": 4 },
|
|
"options": {
|
|
"reduceOptions": { "calcs": ["lastNotNull"] },
|
|
"colorMode": "background",
|
|
"graphMode": "area",
|
|
"textMode": "auto"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": { "fixedColor": "green", "mode": "fixed" },
|
|
"thresholds": { "mode": "absolute", "steps": [] }
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "libnovel_runner_tasks_completed_total",
|
|
"legendFormat": "completed",
|
|
"instant": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 3,
|
|
"type": "stat",
|
|
"title": "Tasks Failed (total)",
|
|
"gridPos": { "x": 8, "y": 0, "w": 4, "h": 4 },
|
|
"options": {
|
|
"reduceOptions": { "calcs": ["lastNotNull"] },
|
|
"colorMode": "background",
|
|
"graphMode": "none",
|
|
"textMode": "auto"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": null },
|
|
{ "color": "yellow", "value": 1 },
|
|
{ "color": "red", "value": 5 }
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "libnovel_runner_tasks_failed_total",
|
|
"legendFormat": "failed",
|
|
"instant": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 4,
|
|
"type": "stat",
|
|
"title": "Runner Uptime",
|
|
"gridPos": { "x": 12, "y": 0, "w": 4, "h": 4 },
|
|
"options": {
|
|
"reduceOptions": { "calcs": ["lastNotNull"] },
|
|
"colorMode": "value",
|
|
"graphMode": "none",
|
|
"textMode": "auto"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "s",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "red", "value": null },
|
|
{ "color": "yellow", "value": 60 },
|
|
{ "color": "green", "value": 300 }
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "libnovel_runner_uptime_seconds",
|
|
"legendFormat": "uptime",
|
|
"instant": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 5,
|
|
"type": "stat",
|
|
"title": "Task Failure Rate",
|
|
"gridPos": { "x": 16, "y": 0, "w": 4, "h": 4 },
|
|
"options": {
|
|
"reduceOptions": { "calcs": ["lastNotNull"] },
|
|
"colorMode": "background",
|
|
"graphMode": "none",
|
|
"textMode": "auto"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percentunit",
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{ "color": "green", "value": null },
|
|
{ "color": "yellow", "value": 0.05 },
|
|
{ "color": "red", "value": 0.2 }
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "libnovel_runner_tasks_failed_total / clamp_min(libnovel_runner_tasks_completed_total + libnovel_runner_tasks_failed_total, 1)",
|
|
"legendFormat": "failure rate",
|
|
"instant": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 6,
|
|
"type": "stat",
|
|
"title": "Runner Alive",
|
|
"gridPos": { "x": 20, "y": 0, "w": 4, "h": 4 },
|
|
"options": {
|
|
"reduceOptions": { "calcs": ["lastNotNull"] },
|
|
"colorMode": "background",
|
|
"graphMode": "none",
|
|
"textMode": "auto"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"mappings": [
|
|
{ "type": "value", "options": { "1": { "text": "UP", "color": "green" }, "0": { "text": "DOWN", "color": "red" } } }
|
|
],
|
|
"thresholds": { "mode": "absolute", "steps": [] }
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "up{job=\"libnovel-runner\"}",
|
|
"legendFormat": "runner",
|
|
"instant": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 10,
|
|
"type": "timeseries",
|
|
"title": "Task Throughput (per minute)",
|
|
"gridPos": { "x": 0, "y": 4, "w": 12, "h": 8 },
|
|
"options": {
|
|
"tooltip": { "mode": "multi" },
|
|
"legend": { "displayMode": "list", "placement": "bottom" }
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ops",
|
|
"custom": { "lineWidth": 2, "fillOpacity": 10 }
|
|
},
|
|
"overrides": [
|
|
{ "matcher": { "id": "byName", "options": "failed" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] },
|
|
{ "matcher": { "id": "byName", "options": "completed" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] }
|
|
]
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "rate(libnovel_runner_tasks_completed_total[5m]) * 60",
|
|
"legendFormat": "completed"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "rate(libnovel_runner_tasks_failed_total[5m]) * 60",
|
|
"legendFormat": "failed"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "libnovel_runner_tasks_running",
|
|
"legendFormat": "running"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 11,
|
|
"type": "timeseries",
|
|
"title": "Audio Task Span Latency (p50 / p95 / p99)",
|
|
"gridPos": { "x": 12, "y": 4, "w": 12, "h": 8 },
|
|
"description": "End-to-end latency of runner.audio_task spans from Tempo span metrics.",
|
|
"options": {
|
|
"tooltip": { "mode": "multi" },
|
|
"legend": { "displayMode": "list", "placement": "bottom" }
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "s",
|
|
"custom": { "lineWidth": 2, "fillOpacity": 10 }
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "histogram_quantile(0.50, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.audio_task\"}[5m])) by (le))",
|
|
"legendFormat": "p50"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "histogram_quantile(0.95, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.audio_task\"}[5m])) by (le))",
|
|
"legendFormat": "p95"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "histogram_quantile(0.99, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.audio_task\"}[5m])) by (le))",
|
|
"legendFormat": "p99"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 20,
|
|
"type": "timeseries",
|
|
"title": "Scrape Task Span Latency (p50 / p95 / p99)",
|
|
"gridPos": { "x": 0, "y": 12, "w": 12, "h": 8 },
|
|
"description": "End-to-end latency of runner.scrape_task spans from Tempo span metrics.",
|
|
"options": {
|
|
"tooltip": { "mode": "multi" },
|
|
"legend": { "displayMode": "list", "placement": "bottom" }
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "s",
|
|
"custom": { "lineWidth": 2, "fillOpacity": 10 }
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "histogram_quantile(0.50, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.scrape_task\"}[5m])) by (le))",
|
|
"legendFormat": "p50"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "histogram_quantile(0.95, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.scrape_task\"}[5m])) by (le))",
|
|
"legendFormat": "p95"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "histogram_quantile(0.99, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.scrape_task\"}[5m])) by (le))",
|
|
"legendFormat": "p99"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 21,
|
|
"type": "timeseries",
|
|
"title": "Audio vs Scrape Task Rate",
|
|
"gridPos": { "x": 12, "y": 12, "w": 12, "h": 8 },
|
|
"description": "Relative throughput of audio generation vs book scraping.",
|
|
"options": {
|
|
"tooltip": { "mode": "multi" },
|
|
"legend": { "displayMode": "list", "placement": "bottom" }
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "ops",
|
|
"custom": { "lineWidth": 2, "fillOpacity": 10 }
|
|
}
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "sum(rate(traces_spanmetrics_calls_total{service=\"runner\", span_name=\"runner.audio_task\"}[5m]))",
|
|
"legendFormat": "audio tasks/s"
|
|
},
|
|
{
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"expr": "sum(rate(traces_spanmetrics_calls_total{service=\"runner\", span_name=\"runner.scrape_task\"}[5m]))",
|
|
"legendFormat": "scrape tasks/s"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 30,
|
|
"type": "logs",
|
|
"title": "Runner Logs (errors & warnings)",
|
|
"gridPos": { "x": 0, "y": 20, "w": 24, "h": 10 },
|
|
"options": {
|
|
"showTime": true,
|
|
"showLabels": false,
|
|
"showCommonLabels": false,
|
|
"wrapLogMessage": true,
|
|
"prettifyLogMessage": true,
|
|
"enableLogDetails": true,
|
|
"sortOrder": "Descending",
|
|
"dedupStrategy": "none"
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "loki", "uid": "loki" },
|
|
"expr": "{service_name=\"runner\"} | json | level =~ `(WARN|ERROR|error|warn)`",
|
|
"legendFormat": ""
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 31,
|
|
"type": "logs",
|
|
"title": "Runner Logs (all)",
|
|
"gridPos": { "x": 0, "y": 30, "w": 24, "h": 10 },
|
|
"options": {
|
|
"showTime": true,
|
|
"showLabels": false,
|
|
"showCommonLabels": false,
|
|
"wrapLogMessage": true,
|
|
"prettifyLogMessage": true,
|
|
"enableLogDetails": true,
|
|
"sortOrder": "Descending",
|
|
"dedupStrategy": "none"
|
|
},
|
|
"targets": [
|
|
{
|
|
"datasource": { "type": "loki", "uid": "loki" },
|
|
"expr": "{service_name=\"runner\"} | json",
|
|
"legendFormat": ""
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|