Files
libnovel/homelab/otel/grafana/provisioning/dashboards/backend.json
Admin fe1a933fd0
Some checks failed
CI / Docker / caddy (pull_request) Failing after 50s
CI / Check ui (pull_request) Successful in 1m5s
Release / Check ui (push) Successful in 45s
Release / Test backend (push) Successful in 1m29s
CI / Docker / ui (pull_request) Successful in 1m28s
Release / Docker / backend (push) Successful in 3m14s
CI / Test backend (pull_request) Failing after 42s
CI / Docker / backend (pull_request) Has been skipped
CI / Docker / runner (pull_request) Has been skipped
Release / Docker / caddy (push) Successful in 6m48s
Release / Docker / ui (push) Successful in 2m8s
Release / Docker / runner (push) Successful in 2m51s
Release / Upload source maps (push) Failing after 53s
Release / Gitea Release (push) Has been skipped
feat(queue): replace PocketBase polling with Asynq + Redis
Introduce a Redis-backed Asynq task queue so the runner consumes TTS
jobs pushed by the backend instead of polling PocketBase.

- backend/internal/asynqqueue: Producer and Consumer wrappers
- backend/internal/runner: AsynqRunner mux, per-instance Prometheus
  registry (fixes duplicate-collector panic in tests), redisConnOpt
- backend/internal/config: REDIS_ADDR / REDIS_PASSWORD env vars
- backend/cmd/{backend,runner}/main.go: wire Redis when env set; fall
  back to legacy poll mode when unset
- Caddyfile: caddy-l4 TCP proxy for redis.libnovel.cc:6380 → homelab
- caddy/Dockerfile: add --with github.com/mholt/caddy-l4
- docker-compose.yml: Caddy exposes 6380, backend/runner get Redis env
- homelab/runner/docker-compose.yml: Redis sidecar, runner depends_on
- homelab/otel/grafana: Grafana dashboards (backend, catalogue, runner)
  and alerting rules / contact-points provisioning
2026-03-28 14:32:40 +05:00

339 lines
12 KiB
JSON

{
"uid": "libnovel-backend",
"title": "Backend API",
"description": "Request rate, error rate, and latency for the LibNovel backend. Powered by Tempo span metrics and UI OTel instrumentation.",
"tags": ["libnovel", "backend", "api"],
"timezone": "browser",
"refresh": "30s",
"time": { "from": "now-3h", "to": "now" },
"schemaVersion": 39,
"panels": [
{
"id": 1,
"type": "stat",
"title": "Request Rate (RPS)",
"gridPos": { "x": 0, "y": 0, "w": 4, "h": 4 },
"options": {
"reduceOptions": { "calcs": ["lastNotNull"] },
"colorMode": "value",
"graphMode": "area",
"textMode": "auto"
},
"fieldConfig": {
"defaults": {
"unit": "reqps",
"color": { "mode": "thresholds" },
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }
}
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "sum(rate(traces_spanmetrics_calls_total{service=\"backend\"}[5m]))",
"legendFormat": "rps",
"instant": true
}
]
},
{
"id": 2,
"type": "stat",
"title": "Error Rate",
"gridPos": { "x": 4, "y": 0, "w": 4, "h": 4 },
"options": {
"reduceOptions": { "calcs": ["lastNotNull"] },
"colorMode": "background",
"graphMode": "none"
},
"fieldConfig": {
"defaults": {
"unit": "percentunit",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 0.01 },
{ "color": "red", "value": 0.05 }
]
}
}
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "sum(rate(traces_spanmetrics_calls_total{service=\"backend\", status_code=\"STATUS_CODE_ERROR\"}[5m])) / clamp_min(sum(rate(traces_spanmetrics_calls_total{service=\"backend\"}[5m])), 0.001)",
"legendFormat": "error rate",
"instant": true
}
]
},
{
"id": 3,
"type": "stat",
"title": "p50 Latency",
"gridPos": { "x": 8, "y": 0, "w": 4, "h": 4 },
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value", "graphMode": "area" },
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 0.2 },
{ "color": "red", "value": 1 }
]
}
}
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "histogram_quantile(0.50, sum(rate(traces_spanmetrics_latency_bucket{service=\"backend\"}[5m])) by (le))",
"legendFormat": "p50",
"instant": true
}
]
},
{
"id": 4,
"type": "stat",
"title": "p95 Latency",
"gridPos": { "x": 12, "y": 0, "w": 4, "h": 4 },
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value", "graphMode": "area" },
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 0.5 },
{ "color": "red", "value": 2 }
]
}
}
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "histogram_quantile(0.95, sum(rate(traces_spanmetrics_latency_bucket{service=\"backend\"}[5m])) by (le))",
"legendFormat": "p95",
"instant": true
}
]
},
{
"id": 5,
"type": "stat",
"title": "p99 Latency",
"gridPos": { "x": 16, "y": 0, "w": 4, "h": 4 },
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value", "graphMode": "area" },
"fieldConfig": {
"defaults": {
"unit": "s",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 1 },
{ "color": "red", "value": 5 }
]
}
}
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "histogram_quantile(0.99, sum(rate(traces_spanmetrics_latency_bucket{service=\"backend\"}[5m])) by (le))",
"legendFormat": "p99",
"instant": true
}
]
},
{
"id": 6,
"type": "stat",
"title": "5xx Errors / min",
"gridPos": { "x": 20, "y": 0, "w": 4, "h": 4 },
"options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "background", "graphMode": "none" },
"fieldConfig": {
"defaults": {
"unit": "short",
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 1 },
{ "color": "red", "value": 5 }
]
}
}
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "sum(rate(http_client_request_duration_seconds_count{job=\"ui\", server_address=\"backend\", http_response_status_code=~\"5..\"}[5m])) * 60",
"legendFormat": "5xx/min",
"instant": true
}
]
},
{
"id": 10,
"type": "timeseries",
"title": "Request Rate by Status",
"gridPos": { "x": 0, "y": 4, "w": 12, "h": 8 },
"options": {
"tooltip": { "mode": "multi" },
"legend": { "displayMode": "list", "placement": "bottom" }
},
"fieldConfig": {
"defaults": { "unit": "reqps", "custom": { "lineWidth": 2, "fillOpacity": 10 } },
"overrides": [
{ "matcher": { "id": "byFrameRefID", "options": "errors" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] }
]
},
"targets": [
{
"refId": "success",
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "sum(rate(http_client_request_duration_seconds_count{job=\"ui\", server_address=\"backend\", http_response_status_code=~\"2..\"}[5m]))",
"legendFormat": "2xx"
},
{
"refId": "notfound",
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "sum(rate(http_client_request_duration_seconds_count{job=\"ui\", server_address=\"backend\", http_response_status_code=~\"4..\"}[5m]))",
"legendFormat": "4xx"
},
{
"refId": "errors",
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "sum(rate(http_client_request_duration_seconds_count{job=\"ui\", server_address=\"backend\", http_response_status_code=~\"5..\"}[5m]))",
"legendFormat": "5xx"
}
]
},
{
"id": 11,
"type": "timeseries",
"title": "Latency Percentiles (backend spans)",
"gridPos": { "x": 12, "y": 4, "w": 12, "h": 8 },
"options": {
"tooltip": { "mode": "multi" },
"legend": { "displayMode": "list", "placement": "bottom" }
},
"fieldConfig": {
"defaults": { "unit": "s", "custom": { "lineWidth": 2, "fillOpacity": 10 } }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "histogram_quantile(0.50, sum(rate(traces_spanmetrics_latency_bucket{service=\"backend\"}[5m])) by (le))",
"legendFormat": "p50"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "histogram_quantile(0.95, sum(rate(traces_spanmetrics_latency_bucket{service=\"backend\"}[5m])) by (le))",
"legendFormat": "p95"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "histogram_quantile(0.99, sum(rate(traces_spanmetrics_latency_bucket{service=\"backend\"}[5m])) by (le))",
"legendFormat": "p99"
}
]
},
{
"id": 12,
"type": "timeseries",
"title": "Requests / min by HTTP method (UI → Backend)",
"gridPos": { "x": 0, "y": 12, "w": 12, "h": 8 },
"options": {
"tooltip": { "mode": "multi" },
"legend": { "displayMode": "list", "placement": "bottom" }
},
"fieldConfig": {
"defaults": { "unit": "short", "custom": { "lineWidth": 2, "fillOpacity": 5 } }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "sum(rate(http_client_request_duration_seconds_count{job=\"ui\", server_address=\"backend\"}[5m])) by (http_request_method) * 60",
"legendFormat": "{{http_request_method}}"
}
]
},
{
"id": 13,
"type": "timeseries",
"title": "Requests / min — UI → PocketBase",
"gridPos": { "x": 12, "y": 12, "w": 12, "h": 8 },
"description": "Traffic from SvelteKit server to PocketBase (auth, collections, etc.).",
"options": {
"tooltip": { "mode": "multi" },
"legend": { "displayMode": "list", "placement": "bottom" }
},
"fieldConfig": {
"defaults": { "unit": "short", "custom": { "lineWidth": 2, "fillOpacity": 5 } }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "sum(rate(http_client_request_duration_seconds_count{job=\"ui\", server_address=\"pocketbase\"}[5m])) by (http_request_method, http_response_status_code) * 60",
"legendFormat": "{{http_request_method}} {{http_response_status_code}}"
}
]
},
{
"id": 14,
"type": "timeseries",
"title": "UI → Backend Latency (p50 / p95)",
"gridPos": { "x": 0, "y": 20, "w": 12, "h": 8 },
"description": "HTTP client latency as seen from the SvelteKit SSR layer calling backend.",
"options": {
"tooltip": { "mode": "multi" },
"legend": { "displayMode": "list", "placement": "bottom" }
},
"fieldConfig": {
"defaults": { "unit": "s", "custom": { "lineWidth": 2, "fillOpacity": 5 } }
},
"targets": [
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "histogram_quantile(0.50, sum(rate(http_client_request_duration_seconds_bucket{job=\"ui\", server_address=\"backend\"}[5m])) by (le))",
"legendFormat": "p50"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"expr": "histogram_quantile(0.95, sum(rate(http_client_request_duration_seconds_bucket{job=\"ui\", server_address=\"backend\"}[5m])) by (le))",
"legendFormat": "p95"
}
]
},
{
"id": 20,
"type": "logs",
"title": "Backend Errors",
"gridPos": { "x": 0, "y": 28, "w": 24, "h": 10 },
"options": {
"showTime": true,
"showLabels": false,
"wrapLogMessage": true,
"prettifyLogMessage": true,
"enableLogDetails": true,
"sortOrder": "Descending",
"dedupStrategy": "none"
},
"targets": [
{
"datasource": { "type": "loki", "uid": "loki" },
"expr": "{service_name=\"backend\"} | json | level =~ `(WARN|ERROR|error|warn)`",
"legendFormat": ""
}
]
}
]
}