templates: add latency metrics to dashboard

Update the grafana dashboard to with metrics
for latency requests, including error budget
burn for compose latency.
This commit is contained in:
Gianluca Zuccarelli 2021-11-01 10:45:22 +00:00 committed by Tom Gundersen
parent bb15007f35
commit 47c41a0b8d

View file

@ -29,10 +29,13 @@ data:
] ]
}, },
"editable": true, "editable": true,
"fiscalYearStartMonth": 0,
"gnetId": null, "gnetId": null,
"graphTooltip": 0, "graphTooltip": 0,
"iteration": 1635445778494, "id": 207,
"iteration": 1635760556540,
"links": [], "links": [],
"liveNow": false,
"panels": [ "panels": [
{ {
"collapsed": false, "collapsed": false,
@ -50,7 +53,7 @@ data:
}, },
{ {
"datasource": "${datasource}", "datasource": "${datasource}",
"description": "The percentage of successful compose requests for the selected time range and interval", "description": "The percentage of successful compose requests for the selected time range",
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"color": { "color": {
@ -123,22 +126,22 @@ data:
"text": {}, "text": {},
"textMode": "auto" "textMode": "auto"
}, },
"pluginVersion": "8.1.5", "pluginVersion": "8.2.1",
"targets": [ "targets": [
{ {
"exemplar": true, "exemplar": true,
"expr": "sum(increase(total_successful_compose_requests[$__range]))/sum(increase(total_compose_requests[$__range]))", "expr": "1 - sum(increase(total_failed_compose_requests[$__range]))/sum(increase(total_compose_requests[$__range]))",
"interval": "", "interval": "",
"legendFormat": "", "legendFormat": "",
"refId": "A" "refId": "A"
} }
], ],
"title": "Compose Success Rate", "title": "Compose Request Success Rate",
"type": "stat" "type": "stat"
}, },
{ {
"datasource": "${datasource}", "datasource": "${datasource}",
"description": "The number of total compose requests for the selected interval", "description": "The number of total compose requests for the selected date range",
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"color": { "color": {
@ -180,7 +183,7 @@ data:
"text": {}, "text": {},
"textMode": "auto" "textMode": "auto"
}, },
"pluginVersion": "8.1.5", "pluginVersion": "8.2.1",
"targets": [ "targets": [
{ {
"exemplar": true, "exemplar": true,
@ -195,7 +198,7 @@ data:
}, },
{ {
"datasource": "${datasource}", "datasource": "${datasource}",
"description": "The number of compose errors (as a percentage) over time for the selected time range and interval", "description": "The number of compose errors (as a percentage) over time for the selected time range",
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"color": { "color": {
@ -264,7 +267,7 @@ data:
"targets": [ "targets": [
{ {
"exemplar": true, "exemplar": true,
"expr": "1 - sum(increase(total_successful_compose_requests[$__range]))/sum(increase(total_compose_requests[$__range]))", "expr": "sum(increase(total_failed_compose_requests[$__range]))/sum(increase(total_compose_requests[$__range]))",
"interval": "", "interval": "",
"legendFormat": "", "legendFormat": "",
"refId": "A" "refId": "A"
@ -276,7 +279,7 @@ data:
{ {
"cacheTimeout": 1, "cacheTimeout": 1,
"datasource": "${datasource}", "datasource": "${datasource}",
"description": "How long will it take to consume all our budget if our error consumption remains at the current rate for the selected interval.", "description": "How long will it take to consume all our budget if our error consumption remains at the current rate for the selected date range.",
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"color": { "color": {
@ -293,6 +296,17 @@ data:
} }
}, },
"type": "special" "type": "special"
},
{
"options": {
"from": 672,
"result": {
"index": 1,
"text": "∞"
},
"to": 3360100
},
"type": "range"
} }
], ],
"thresholds": { "thresholds": {
@ -343,11 +357,11 @@ data:
}, },
"textMode": "auto" "textMode": "auto"
}, },
"pluginVersion": "8.1.5", "pluginVersion": "8.2.1",
"targets": [ "targets": [
{ {
"exemplar": true, "exemplar": true,
"expr": "28 * 24 * $stability_error_budget / ((1 - sum(rate(total_successful_compose_requests[$__range])) by (job) / sum(rate(total_compose_requests[$__range])) by (job)))", "expr": "28 * 24 * (1 - $stability_slo) / ((sum(rate(total_failed_compose_requests[$__range]))/ sum(rate(total_compose_requests[$__range]))) + 0.001)",
"format": "time_series", "format": "time_series",
"interval": "", "interval": "",
"intervalFactor": 1, "intervalFactor": 1,
@ -362,8 +376,8 @@ data:
}, },
{ {
"cacheTimeout": null, "cacheTimeout": null,
"datasource": "$datasource", "datasource": "${datasource}",
"description": "The percentage of error budget consumed for the selected time range and interval. ", "description": "The percentage of error budget consumed for the selected time range. ",
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"color": { "color": {
@ -446,7 +460,393 @@ data:
"targets": [ "targets": [
{ {
"exemplar": true, "exemplar": true,
"expr": "1 - ((sum(increase(total_successful_compose_requests[28d]))/sum(increase(total_compose_requests[28d]))) - $stability_slo)/ (1 - $stability_slo)", "expr": "1 - ((1 - sum(increase(total_failed_compose_requests[$__range]))/sum(increase(total_compose_requests[$__range]))) - $stability_slo)/ (1 - $stability_slo)",
"instant": false,
"interval": "",
"intervalFactor": 10,
"legendFormat": "errorbudget",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Error Budget Consumed",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 17
},
"id": 129,
"panels": [],
"title": "Compose Latency",
"type": "row"
},
{
"datasource": "${datasource}",
"description": "The percentage of successful compose requests for the selected time range",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 0,
"mappings": [
{
"options": {
"match": "null",
"result": {
"index": 0,
"text": "N/A"
}
},
"type": "special"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": "175"
},
{
"color": "red",
"value": "200"
}
]
},
"unit": "ms"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 5,
"x": 0,
"y": 18
},
"id": 200,
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.2.1",
"targets": [
{
"exemplar": true,
"expr": "histogram_quantile(0.9, sum(rate(composer_http_duration_seconds_bucket[$__range])) by (le)) * 1000",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Compose Latency",
"type": "stat"
},
{
"datasource": "${datasource}",
"description": "The request latency for composer requests over the selected date range",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"axisLabel": "seconds",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 35,
"gradientMode": "scheme",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 3,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": "175"
},
{
"color": "red",
"value": "200"
}
]
},
"unit": "ms"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 19,
"x": 5,
"y": 18
},
"id": 201,
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"exemplar": true,
"expr": "histogram_quantile(0.9, sum(rate(composer_http_duration_seconds_bucket[$__range])) by (le)) * 1000",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Compose Request Latency",
"type": "timeseries"
},
{
"cacheTimeout": 1,
"datasource": "${datasource}",
"description": "How long will it take to consume all our budget if our error consumption remains at the current rate for the selected date range.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 2,
"mappings": [
{
"options": {
"match": "null",
"result": {
"index": 0,
"text": "1.40 days"
}
},
"type": "special"
},
{
"options": {
"from": 672,
"result": {
"index": 1,
"text": "∞"
},
"to": 3360100
},
"type": "range"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "#EAB839",
"value": 40
},
{
"color": "green",
"value": 50
}
]
},
"unit": "h"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 4,
"x": 0,
"y": 26
},
"id": 198,
"interval": null,
"links": [],
"maxDataPoints": 100,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {
"valueSize": 80
},
"textMode": "auto"
},
"pluginVersion": "8.2.1",
"targets": [
{
"exemplar": true,
"expr": "28 * 24 * (1 - $latency_slo) / (1 - sum(rate(composer_http_duration_seconds_bucket{le=\"0.2\"}[$__range]))/sum(rate(composer_http_duration_seconds_count[$__range])))",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Error Budget Remaining",
"type": "stat"
},
{
"cacheTimeout": null,
"datasource": "${datasource}",
"description": "The percentage of error budget consumed for the selected time range. ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 100,
"gradientMode": "scheme",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 0,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 0.95
},
{
"color": "red",
"value": 1
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 20,
"x": 4,
"y": 26
},
"id": 199,
"links": [],
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.1.5",
"targets": [
{
"exemplar": true,
"expr": "1 - ((sum(increase(composer_http_duration_seconds_bucket{le=\"0.2\"}[$__range]))/sum(increase(composer_http_duration_seconds_count[$__range]))) - $latency_slo)/ (1 - $latency_slo)",
"instant": false, "instant": false,
"interval": "", "interval": "",
"intervalFactor": 10, "intervalFactor": 10,
@ -461,14 +861,14 @@ data:
} }
], ],
"refresh": false, "refresh": false,
"schemaVersion": 30, "schemaVersion": 31,
"style": "dark", "style": "dark",
"tags": [], "tags": [],
"templating": { "templating": {
"list": [ "list": [
{ {
"current": { "current": {
"selected": false, "selected": true,
"text": "app-sre-prod-04-prometheus", "text": "app-sre-prod-04-prometheus",
"value": "app-sre-prod-04-prometheus" "value": "app-sre-prod-04-prometheus"
}, },
@ -492,7 +892,7 @@ data:
"auto_count": 30, "auto_count": 30,
"auto_min": "10s", "auto_min": "10s",
"current": { "current": {
"selected": true, "selected": false,
"text": "28d", "text": "28d",
"value": "28d" "value": "28d"
}, },
@ -570,12 +970,12 @@ data:
"type": "constant" "type": "constant"
}, },
{ {
"description": "Compose stability error budget", "description": "Compose latency SLO target",
"error": null, "error": null,
"hide": 2, "hide": 2,
"label": null, "label": null,
"name": "stability_error_budget", "name": "latency_slo",
"query": "0.05", "query": "0.9",
"skipUrlSync": false, "skipUrlSync": false,
"type": "constant" "type": "constant"
} }
@ -613,5 +1013,5 @@ data:
"timezone": "", "timezone": "",
"title": "Image Builder Composer", "title": "Image Builder Composer",
"uid": "cNGfs4Knz", "uid": "cNGfs4Knz",
"version": 1 "version": 2
} }