templates: add latency metrics to dashboard

Update the grafana dashboard to with metrics
for latency requests, including error budget
burn for compose latency.
This commit is contained in:
Gianluca Zuccarelli 2021-11-01 10:45:22 +00:00 committed by Tom Gundersen
parent bb15007f35
commit 47c41a0b8d

View file

@ -29,10 +29,13 @@ data:
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": null,
"graphTooltip": 0,
"iteration": 1635445778494,
"id": 207,
"iteration": 1635760556540,
"links": [],
"liveNow": false,
"panels": [
{
"collapsed": false,
@ -50,7 +53,7 @@ data:
},
{
"datasource": "${datasource}",
"description": "The percentage of successful compose requests for the selected time range and interval",
"description": "The percentage of successful compose requests for the selected time range",
"fieldConfig": {
"defaults": {
"color": {
@ -123,22 +126,22 @@ data:
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.1.5",
"pluginVersion": "8.2.1",
"targets": [
{
"exemplar": true,
"expr": "sum(increase(total_successful_compose_requests[$__range]))/sum(increase(total_compose_requests[$__range]))",
"expr": "1 - sum(increase(total_failed_compose_requests[$__range]))/sum(increase(total_compose_requests[$__range]))",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Compose Success Rate",
"title": "Compose Request Success Rate",
"type": "stat"
},
{
"datasource": "${datasource}",
"description": "The number of total compose requests for the selected interval",
"description": "The number of total compose requests for the selected date range",
"fieldConfig": {
"defaults": {
"color": {
@ -180,7 +183,7 @@ data:
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.1.5",
"pluginVersion": "8.2.1",
"targets": [
{
"exemplar": true,
@ -195,7 +198,7 @@ data:
},
{
"datasource": "${datasource}",
"description": "The number of compose errors (as a percentage) over time for the selected time range and interval",
"description": "The number of compose errors (as a percentage) over time for the selected time range",
"fieldConfig": {
"defaults": {
"color": {
@ -264,7 +267,7 @@ data:
"targets": [
{
"exemplar": true,
"expr": "1 - sum(increase(total_successful_compose_requests[$__range]))/sum(increase(total_compose_requests[$__range]))",
"expr": "sum(increase(total_failed_compose_requests[$__range]))/sum(increase(total_compose_requests[$__range]))",
"interval": "",
"legendFormat": "",
"refId": "A"
@ -276,7 +279,7 @@ data:
{
"cacheTimeout": 1,
"datasource": "${datasource}",
"description": "How long will it take to consume all our budget if our error consumption remains at the current rate for the selected interval.",
"description": "How long will it take to consume all our budget if our error consumption remains at the current rate for the selected date range.",
"fieldConfig": {
"defaults": {
"color": {
@ -293,6 +296,17 @@ data:
}
},
"type": "special"
},
{
"options": {
"from": 672,
"result": {
"index": 1,
"text": "∞"
},
"to": 3360100
},
"type": "range"
}
],
"thresholds": {
@ -343,11 +357,11 @@ data:
},
"textMode": "auto"
},
"pluginVersion": "8.1.5",
"pluginVersion": "8.2.1",
"targets": [
{
"exemplar": true,
"expr": "28 * 24 * $stability_error_budget / ((1 - sum(rate(total_successful_compose_requests[$__range])) by (job) / sum(rate(total_compose_requests[$__range])) by (job)))",
"expr": "28 * 24 * (1 - $stability_slo) / ((sum(rate(total_failed_compose_requests[$__range]))/ sum(rate(total_compose_requests[$__range]))) + 0.001)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@ -362,8 +376,8 @@ data:
},
{
"cacheTimeout": null,
"datasource": "$datasource",
"description": "The percentage of error budget consumed for the selected time range and interval. ",
"datasource": "${datasource}",
"description": "The percentage of error budget consumed for the selected time range. ",
"fieldConfig": {
"defaults": {
"color": {
@ -446,7 +460,393 @@ data:
"targets": [
{
"exemplar": true,
"expr": "1 - ((sum(increase(total_successful_compose_requests[28d]))/sum(increase(total_compose_requests[28d]))) - $stability_slo)/ (1 - $stability_slo)",
"expr": "1 - ((1 - sum(increase(total_failed_compose_requests[$__range]))/sum(increase(total_compose_requests[$__range]))) - $stability_slo)/ (1 - $stability_slo)",
"instant": false,
"interval": "",
"intervalFactor": 10,
"legendFormat": "errorbudget",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Error Budget Consumed",
"type": "timeseries"
},
{
"collapsed": false,
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 17
},
"id": 129,
"panels": [],
"title": "Compose Latency",
"type": "row"
},
{
"datasource": "${datasource}",
"description": "The percentage of successful compose requests for the selected time range",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 0,
"mappings": [
{
"options": {
"match": "null",
"result": {
"index": 0,
"text": "N/A"
}
},
"type": "special"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": "175"
},
{
"color": "red",
"value": "200"
}
]
},
"unit": "ms"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 5,
"x": 0,
"y": 18
},
"id": 200,
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.2.1",
"targets": [
{
"exemplar": true,
"expr": "histogram_quantile(0.9, sum(rate(composer_http_duration_seconds_bucket[$__range])) by (le)) * 1000",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Compose Latency",
"type": "stat"
},
{
"datasource": "${datasource}",
"description": "The request latency for composer requests over the selected date range",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"axisLabel": "seconds",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 35,
"gradientMode": "scheme",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 3,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": "175"
},
{
"color": "red",
"value": "200"
}
]
},
"unit": "ms"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 19,
"x": 5,
"y": 18
},
"id": 201,
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"targets": [
{
"exemplar": true,
"expr": "histogram_quantile(0.9, sum(rate(composer_http_duration_seconds_bucket[$__range])) by (le)) * 1000",
"interval": "",
"legendFormat": "",
"refId": "A"
}
],
"title": "Compose Request Latency",
"type": "timeseries"
},
{
"cacheTimeout": 1,
"datasource": "${datasource}",
"description": "How long will it take to consume all our budget if our error consumption remains at the current rate for the selected date range.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 2,
"mappings": [
{
"options": {
"match": "null",
"result": {
"index": 0,
"text": "1.40 days"
}
},
"type": "special"
},
{
"options": {
"from": 672,
"result": {
"index": 1,
"text": "∞"
},
"to": 3360100
},
"type": "range"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "#EAB839",
"value": 40
},
{
"color": "green",
"value": 50
}
]
},
"unit": "h"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 4,
"x": 0,
"y": 26
},
"id": 198,
"interval": null,
"links": [],
"maxDataPoints": 100,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {
"valueSize": 80
},
"textMode": "auto"
},
"pluginVersion": "8.2.1",
"targets": [
{
"exemplar": true,
"expr": "28 * 24 * (1 - $latency_slo) / (1 - sum(rate(composer_http_duration_seconds_bucket{le=\"0.2\"}[$__range]))/sum(rate(composer_http_duration_seconds_count[$__range])))",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"timeFrom": null,
"timeShift": null,
"title": "Error Budget Remaining",
"type": "stat"
},
{
"cacheTimeout": null,
"datasource": "${datasource}",
"description": "The percentage of error budget consumed for the selected time range. ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 100,
"gradientMode": "scheme",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 0,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": true,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "#EAB839",
"value": 0.95
},
{
"color": "red",
"value": 1
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 20,
"x": 4,
"y": 26
},
"id": 199,
"links": [],
"options": {
"legend": {
"calcs": [],
"displayMode": "hidden",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"pluginVersion": "8.1.5",
"targets": [
{
"exemplar": true,
"expr": "1 - ((sum(increase(composer_http_duration_seconds_bucket{le=\"0.2\"}[$__range]))/sum(increase(composer_http_duration_seconds_count[$__range]))) - $latency_slo)/ (1 - $latency_slo)",
"instant": false,
"interval": "",
"intervalFactor": 10,
@ -461,14 +861,14 @@ data:
}
],
"refresh": false,
"schemaVersion": 30,
"schemaVersion": 31,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"current": {
"selected": false,
"selected": true,
"text": "app-sre-prod-04-prometheus",
"value": "app-sre-prod-04-prometheus"
},
@ -492,7 +892,7 @@ data:
"auto_count": 30,
"auto_min": "10s",
"current": {
"selected": true,
"selected": false,
"text": "28d",
"value": "28d"
},
@ -570,12 +970,12 @@ data:
"type": "constant"
},
{
"description": "Compose stability error budget",
"description": "Compose latency SLO target",
"error": null,
"hide": 2,
"label": null,
"name": "stability_error_budget",
"query": "0.05",
"name": "latency_slo",
"query": "0.9",
"skipUrlSync": false,
"type": "constant"
}
@ -613,5 +1013,5 @@ data:
"timezone": "",
"title": "Image Builder Composer",
"uid": "cNGfs4Knz",
"version": 1
"version": 2
}