skills/observability/grafana-dashboards/SKILL.md
Create and manage production Grafana dashboards for real-time visualization of system and application metrics. Use when building monitoring dashboards, visualizing metrics, or creating operational observability interfaces.
npx skillsauth add codewithbehnam/cc-docs grafana-dashboardsInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
Create and manage production-ready Grafana dashboards for comprehensive system observability.
Design effective Grafana dashboards for monitoring applications, infrastructure, and business metrics.
┌─────────────────────────────────────┐
│ Critical Metrics (Big Numbers) │
├─────────────────────────────────────┤
│ Key Trends (Time Series) │
├─────────────────────────────────────┤
│ Detailed Metrics (Tables/Heatmaps) │
└─────────────────────────────────────┘
{
"dashboard": {
"title": "API Monitoring",
"tags": ["api", "production"],
"timezone": "browser",
"refresh": "30s",
"panels": [
{
"title": "Request Rate",
"type": "graph",
"targets": [
{
"expr": "sum(rate(http_requests_total[5m])) by (service)",
"legendFormat": "{{service}}"
}
],
"gridPos": { "x": 0, "y": 0, "w": 12, "h": 8 }
},
{
"title": "Error Rate %",
"type": "graph",
"targets": [
{
"expr": "(sum(rate(http_requests_total{status=~\"5..\"}[5m])) / sum(rate(http_requests_total[5m]))) * 100",
"legendFormat": "Error Rate"
}
],
"alert": {
"conditions": [
{
"evaluator": { "params": [5], "type": "gt" },
"operator": { "type": "and" },
"query": { "params": ["A", "5m", "now"] },
"type": "query"
}
]
},
"gridPos": { "x": 12, "y": 0, "w": 12, "h": 8 }
},
{
"title": "P95 Latency",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le, service))",
"legendFormat": "{{service}}"
}
],
"gridPos": { "x": 0, "y": 8, "w": 24, "h": 8 }
}
]
}
}
Reference: See assets/api-dashboard.json
{
"type": "stat",
"title": "Total Requests",
"targets": [
{
"expr": "sum(http_requests_total)"
}
],
"options": {
"reduceOptions": {
"values": false,
"calcs": ["lastNotNull"]
},
"orientation": "auto",
"textMode": "auto",
"colorMode": "value"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{ "value": 0, "color": "green" },
{ "value": 80, "color": "yellow" },
{ "value": 90, "color": "red" }
]
}
}
}
}
{
"type": "graph",
"title": "CPU Usage",
"targets": [
{
"expr": "100 - (avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)"
}
],
"yaxes": [
{ "format": "percent", "max": 100, "min": 0 },
{ "format": "short" }
]
}
{
"type": "table",
"title": "Service Status",
"targets": [
{
"expr": "up",
"format": "table",
"instant": true
}
],
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": { "Time": true },
"indexByName": {},
"renameByName": {
"instance": "Instance",
"job": "Service",
"Value": "Status"
}
}
}
]
}
{
"type": "heatmap",
"title": "Latency Heatmap",
"targets": [
{
"expr": "sum(rate(http_request_duration_seconds_bucket[5m])) by (le)",
"format": "heatmap"
}
],
"dataFormat": "tsbuckets",
"yAxis": {
"format": "s"
}
}
{
"templating": {
"list": [
{
"name": "namespace",
"type": "query",
"datasource": "Prometheus",
"query": "label_values(kube_pod_info, namespace)",
"refresh": 1,
"multi": false
},
{
"name": "service",
"type": "query",
"datasource": "Prometheus",
"query": "label_values(kube_service_info{namespace=\"$namespace\"}, service)",
"refresh": 1,
"multi": true
}
]
}
}
sum(rate(http_requests_total{namespace="$namespace", service=~"$service"}[5m]))
{
"alert": {
"name": "High Error Rate",
"conditions": [
{
"evaluator": {
"params": [5],
"type": "gt"
},
"operator": { "type": "and" },
"query": {
"params": ["A", "5m", "now"]
},
"reducer": { "type": "avg" },
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"message": "Error rate is above 5%",
"noDataState": "no_data",
"notifications": [{ "uid": "slack-channel" }]
}
}
dashboards.yml:
apiVersion: 1
providers:
- name: "default"
orgId: 1
folder: "General"
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /etc/grafana/dashboards
Key Panels:
Reference: See assets/infrastructure-dashboard.json
Key Panels:
Reference: See assets/database-dashboard.json
Key Panels:
resource "grafana_dashboard" "api_monitoring" {
config_json = file("${path.module}/dashboards/api-monitoring.json")
folder = grafana_folder.monitoring.id
}
resource "grafana_folder" "monitoring" {
title = "Production Monitoring"
}
- name: Deploy Grafana dashboards
copy:
src: "{{ item }}"
dest: /etc/grafana/dashboards/
with_fileglob:
- "dashboards/*.json"
notify: restart grafana
prometheus-configuration - For metric collectionslo-implementation - For SLO dashboardstools
macOS GUI automation CLI. Use steer to see the screen, click elements, type text, send hotkeys, scroll, drag, manage windows and apps, run OCR on Electron apps, and wait for UI conditions.
testing
Ship workflow: merge main, run tests, review diff, bump VERSION, update CHANGELOG, commit, push, create PR.
testing
Import cookies from your real browser (Comet, Chrome, Arc, Brave, Edge) into the headless browse session. Opens an interactive picker UI where you select which cookie domains to import. Use before QA testing authenticated pages.
development
Weekly engineering retrospective. Analyzes commit history, work patterns, and code quality metrics with persistent history and trend tracking. Team-aware: breaks down per-person contributions with praise and growth areas.