Files
libnovel/homelab/otel/grafana/provisioning/dashboards/runner.json
root e399b1ce01
Some checks failed
Release / Test backend (push) Successful in 41s
Release / Check ui (push) Failing after 32s
Release / Docker (push) Has been skipped
Release / Gitea Release (push) Has been skipped
feat: admin UX overhaul — status filters, retry/cancel, mobile cards, i18n, shelf pre-populate
- Admin layout: SVG icons, active highlight, divider between nav sections
- Scrape page: status filter pills with counts, text + status combined search
- Audio page: status filter pills, cancel jobs, retry failed jobs, mobile cards for cache tab
- Translation page: status filter pills (incl. cancelled), cancel + retry jobs, mobile cancel/retry cards, i18n for all labels
- AI Jobs page: fix concurrent cancel (Set instead of single slot), per-job cancel errors inline, full mobile card layout, i18n title/heading
- Text-gen page: tagline editable input + copy, warnings copy, i18n title/heading
- Book page: chapter cover Save button, audio monitor link, currentShelf pre-populated from server
- pocketbase.ts: add getBookShelf(), shelf field on UserLibraryEntry
- New API route: POST /api/admin/translation/bulk (proxy for translation retry)
- i18n: 15 new admin_translation_*, admin_ai_jobs_*, admin_text_gen_* keys across all 5 locales
2026-04-08 18:30:35 +05:00

378 lines
12 KiB
JSON

{
"uid": "libnovel-runner",
"title": "Runner Operations",
"description": "Task queue health, throughput, TTS routing, and live logs for the homelab runner.",
"tags": ["libnovel", "runner"],
"timezone": "browser",
"refresh": "30s",
"time": { "from": "now-3h", "to": "now" },
"schemaVersion": 39,
"panels": [
{
"id": 1,
"type": "stat",
"title": "Tasks Running",
"gridPos": { "x": 0, "y": 0, "w": 4, "h": 4 },
"options": {
"reduceOptions": { "calcs": ["lastNotNull"] },
"colorMode": "background",
"graphMode": "none",
"textMode": "auto"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 1 },
{ "color": "red", "value": 3 }
]
},
"mappings": []
}
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "runner_tasks_running",
"legendFormat": "running",
"instant": true
}
]
},
{
"id": 2,
"type": "stat",
"title": "Tasks Completed (total)",
"gridPos": { "x": 4, "y": 0, "w": 4, "h": 4 },
"options": {
"reduceOptions": { "calcs": ["lastNotNull"] },
"colorMode": "background",
"graphMode": "area",
"textMode": "auto"
},
"fieldConfig": {
"defaults": {
"color": { "fixedColor": "green", "mode": "fixed" },
"thresholds": { "mode": "absolute", "steps": [] }
}
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "runner_tasks_completed_total",
"legendFormat": "completed",
"instant": true
}
]
},
{
"id": 3,
"type": "stat",
"title": "Tasks Failed (total)",
"gridPos": { "x": 8, "y": 0, "w": 4, "h": 4 },
"options": {
"reduceOptions": { "calcs": ["lastNotNull"] },
"colorMode": "background",
"graphMode": "none",
"textMode": "auto"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 1 },
{ "color": "red", "value": 5 }
]
}
}
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "runner_tasks_failed_total",
"legendFormat": "failed",
"instant": true
}
]
},
{
"id": 4,
"type": "stat",
"title": "Runner Uptime",
"gridPos": { "x": 12, "y": 0, "w": 4, "h": 4 },
"options": {
"reduceOptions": { "calcs": ["lastNotNull"] },
"colorMode": "value",
"graphMode": "none",
"textMode": "auto"
},
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "red", "value": null },
{ "color": "yellow", "value": 60 },
{ "color": "green", "value": 300 }
]
}
}
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "runner_uptime_seconds",
"legendFormat": "uptime",
"instant": true
}
]
},
{
"id": 5,
"type": "stat",
"title": "Task Failure Rate",
"gridPos": { "x": 16, "y": 0, "w": 4, "h": 4 },
"options": {
"reduceOptions": { "calcs": ["lastNotNull"] },
"colorMode": "background",
"graphMode": "none",
"textMode": "auto"
},
"fieldConfig": {
"defaults": {
"unit": "percentunit",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 0.05 },
{ "color": "red", "value": 0.2 }
]
}
}
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "runner_tasks_failed_total / clamp_min(runner_tasks_completed_total + runner_tasks_failed_total, 1)",
"legendFormat": "failure rate",
"instant": true
}
]
},
{
"id": 6,
"type": "stat",
"title": "Runner Alive",
"gridPos": { "x": 20, "y": 0, "w": 4, "h": 4 },
"options": {
"reduceOptions": { "calcs": ["lastNotNull"] },
"colorMode": "background",
"graphMode": "none",
"textMode": "auto"
},
"fieldConfig": {
"defaults": {
"mappings": [
{ "type": "value", "options": { "1": { "text": "UP", "color": "green" }, "0": { "text": "DOWN", "color": "red" } } }
],
"thresholds": { "mode": "absolute", "steps": [] }
}
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "up{job=\"libnovel-runner\"}",
"legendFormat": "runner",
"instant": true
}
]
},
{
"id": 10,
"type": "timeseries",
"title": "Task Throughput (per minute)",
"gridPos": { "x": 0, "y": 4, "w": 12, "h": 8 },
"options": {
"tooltip": { "mode": "multi" },
"legend": { "displayMode": "list", "placement": "bottom" }
},
"fieldConfig": {
"defaults": {
"unit": "ops",
"custom": { "lineWidth": 2, "fillOpacity": 10 }
},
"overrides": [
{ "matcher": { "id": "byName", "options": "failed" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] },
{ "matcher": { "id": "byName", "options": "completed" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] }
]
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "rate(runner_tasks_completed_total[5m]) * 60",
"legendFormat": "completed"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "rate(runner_tasks_failed_total[5m]) * 60",
"legendFormat": "failed"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "runner_tasks_running",
"legendFormat": "running"
}
]
},
{
"id": 11,
"type": "timeseries",
"title": "Audio Task Span Latency (p50 / p95 / p99)",
"gridPos": { "x": 12, "y": 4, "w": 12, "h": 8 },
"description": "End-to-end latency of runner.audio_task spans from Tempo span metrics.",
"options": {
"tooltip": { "mode": "multi" },
"legend": { "displayMode": "list", "placement": "bottom" }
},
"fieldConfig": {
"defaults": {
"unit": "s",
"custom": { "lineWidth": 2, "fillOpacity": 10 }
}
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "histogram_quantile(0.50, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.audio_task\"}[5m])) by (le))",
"legendFormat": "p50"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "histogram_quantile(0.95, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.audio_task\"}[5m])) by (le))",
"legendFormat": "p95"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "histogram_quantile(0.99, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.audio_task\"}[5m])) by (le))",
"legendFormat": "p99"
}
]
},
{
"id": 20,
"type": "timeseries",
"title": "Scrape Task Span Latency (p50 / p95 / p99)",
"gridPos": { "x": 0, "y": 12, "w": 12, "h": 8 },
"description": "End-to-end latency of runner.scrape_task spans from Tempo span metrics.",
"options": {
"tooltip": { "mode": "multi" },
"legend": { "displayMode": "list", "placement": "bottom" }
},
"fieldConfig": {
"defaults": {
"unit": "s",
"custom": { "lineWidth": 2, "fillOpacity": 10 }
}
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "histogram_quantile(0.50, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.scrape_task\"}[5m])) by (le))",
"legendFormat": "p50"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "histogram_quantile(0.95, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.scrape_task\"}[5m])) by (le))",
"legendFormat": "p95"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "histogram_quantile(0.99, sum(rate(traces_spanmetrics_latency_bucket{service=\"runner\", span_name=\"runner.scrape_task\"}[5m])) by (le))",
"legendFormat": "p99"
}
]
},
{
"id": 21,
"type": "timeseries",
"title": "Audio vs Scrape Task Rate",
"gridPos": { "x": 12, "y": 12, "w": 12, "h": 8 },
"description": "Relative throughput of audio generation vs book scraping.",
"options": {
"tooltip": { "mode": "multi" },
"legend": { "displayMode": "list", "placement": "bottom" }
},
"fieldConfig": {
"defaults": {
"unit": "ops",
"custom": { "lineWidth": 2, "fillOpacity": 10 }
}
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "sum(rate(traces_spanmetrics_calls_total{service=\"runner\", span_name=\"runner.audio_task\"}[5m]))",
"legendFormat": "audio tasks/s"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "sum(rate(traces_spanmetrics_calls_total{service=\"runner\", span_name=\"runner.scrape_task\"}[5m]))",
"legendFormat": "scrape tasks/s"
}
]
},
{
"id": 30,
"type": "logs",
"title": "Runner Logs (errors & warnings)",
"gridPos": { "x": 0, "y": 20, "w": 24, "h": 10 },
"options": {
"showTime": true,
"showLabels": false,
"showCommonLabels": false,
"wrapLogMessage": true,
"prettifyLogMessage": true,
"enableLogDetails": true,
"sortOrder": "Descending",
"dedupStrategy": "none"
},
"targets": [
{
"datasource": { "type": "loki", "uid": "loki" },
"expr": "{service_name=\"runner\"}",
"legendFormat": ""
}
]
},
{
"id": 31,
"type": "logs",
"title": "Runner Logs (all)",
"gridPos": { "x": 0, "y": 30, "w": 24, "h": 10 },
"options": {
"showTime": true,
"showLabels": false,
"showCommonLabels": false,
"wrapLogMessage": true,
"prettifyLogMessage": true,
"enableLogDetails": true,
"sortOrder": "Descending",
"dedupStrategy": "none"
},
"targets": [
{
"datasource": { "type": "loki", "uid": "loki" },
"expr": "{service_name=\"runner\"}",
"legendFormat": ""
}
]
}
]
}