templates: add latency metrics to dashboard
Update the grafana dashboard to with metrics for latency requests, including error budget burn for compose latency.
This commit is contained in:
parent
bb15007f35
commit
47c41a0b8d
1 changed files with 422 additions and 22 deletions
|
|
@ -29,10 +29,13 @@ data:
|
|||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"iteration": 1635445778494,
|
||||
"id": 207,
|
||||
"iteration": 1635760556540,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
|
|
@ -50,7 +53,7 @@ data:
|
|||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"description": "The percentage of successful compose requests for the selected time range and interval",
|
||||
"description": "The percentage of successful compose requests for the selected time range",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
|
@ -123,22 +126,22 @@ data:
|
|||
"text": {},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "8.1.5",
|
||||
"pluginVersion": "8.2.1",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sum(increase(total_successful_compose_requests[$__range]))/sum(increase(total_compose_requests[$__range]))",
|
||||
"expr": "1 - sum(increase(total_failed_compose_requests[$__range]))/sum(increase(total_compose_requests[$__range]))",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Compose Success Rate",
|
||||
"title": "Compose Request Success Rate",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"description": "The number of total compose requests for the selected interval",
|
||||
"description": "The number of total compose requests for the selected date range",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
|
@ -180,7 +183,7 @@ data:
|
|||
"text": {},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "8.1.5",
|
||||
"pluginVersion": "8.2.1",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
|
|
@ -195,7 +198,7 @@ data:
|
|||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"description": "The number of compose errors (as a percentage) over time for the selected time range and interval",
|
||||
"description": "The number of compose errors (as a percentage) over time for the selected time range",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
|
@ -264,7 +267,7 @@ data:
|
|||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "1 - sum(increase(total_successful_compose_requests[$__range]))/sum(increase(total_compose_requests[$__range]))",
|
||||
"expr": "sum(increase(total_failed_compose_requests[$__range]))/sum(increase(total_compose_requests[$__range]))",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
|
|
@ -276,7 +279,7 @@ data:
|
|||
{
|
||||
"cacheTimeout": 1,
|
||||
"datasource": "${datasource}",
|
||||
"description": "How long will it take to consume all our budget if our error consumption remains at the current rate for the selected interval.",
|
||||
"description": "How long will it take to consume all our budget if our error consumption remains at the current rate for the selected date range.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
|
@ -293,6 +296,17 @@ data:
|
|||
}
|
||||
},
|
||||
"type": "special"
|
||||
},
|
||||
{
|
||||
"options": {
|
||||
"from": 672,
|
||||
"result": {
|
||||
"index": 1,
|
||||
"text": "∞"
|
||||
},
|
||||
"to": 3360100
|
||||
},
|
||||
"type": "range"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
|
|
@ -343,11 +357,11 @@ data:
|
|||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "8.1.5",
|
||||
"pluginVersion": "8.2.1",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "28 * 24 * $stability_error_budget / ((1 - sum(rate(total_successful_compose_requests[$__range])) by (job) / sum(rate(total_compose_requests[$__range])) by (job)))",
|
||||
"expr": "28 * 24 * (1 - $stability_slo) / ((sum(rate(total_failed_compose_requests[$__range]))/ sum(rate(total_compose_requests[$__range]))) + 0.001)",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
|
|
@ -362,8 +376,8 @@ data:
|
|||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"datasource": "$datasource",
|
||||
"description": "The percentage of error budget consumed for the selected time range and interval. ",
|
||||
"datasource": "${datasource}",
|
||||
"description": "The percentage of error budget consumed for the selected time range. ",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
|
@ -446,7 +460,393 @@ data:
|
|||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "1 - ((sum(increase(total_successful_compose_requests[28d]))/sum(increase(total_compose_requests[28d]))) - $stability_slo)/ (1 - $stability_slo)",
|
||||
"expr": "1 - ((1 - sum(increase(total_failed_compose_requests[$__range]))/sum(increase(total_compose_requests[$__range]))) - $stability_slo)/ (1 - $stability_slo)",
|
||||
"instant": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 10,
|
||||
"legendFormat": "errorbudget",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Error Budget Consumed",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"datasource": null,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 17
|
||||
},
|
||||
"id": 129,
|
||||
"panels": [],
|
||||
"title": "Compose Latency",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"description": "The percentage of successful compose requests for the selected time range",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"decimals": 0,
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"match": "null",
|
||||
"result": {
|
||||
"index": 0,
|
||||
"text": "N/A"
|
||||
}
|
||||
},
|
||||
"type": "special"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "#EAB839",
|
||||
"value": "175"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": "200"
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ms"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 5,
|
||||
"x": 0,
|
||||
"y": 18
|
||||
},
|
||||
"id": 200,
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"match": "null",
|
||||
"result": {
|
||||
"text": "N/A"
|
||||
}
|
||||
},
|
||||
"type": "special"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"text": {},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "8.2.1",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "histogram_quantile(0.9, sum(rate(composer_http_duration_seconds_bucket[$__range])) by (le)) * 1000",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Compose Latency",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": "${datasource}",
|
||||
"description": "The request latency for composer requests over the selected date range",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "seconds",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 35,
|
||||
"gradientMode": "scheme",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 3,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": true,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "#EAB839",
|
||||
"value": "175"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": "200"
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ms"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 19,
|
||||
"x": 5,
|
||||
"y": 18
|
||||
},
|
||||
"id": 201,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "hidden",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "histogram_quantile(0.9, sum(rate(composer_http_duration_seconds_bucket[$__range])) by (le)) * 1000",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Compose Request Latency",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": 1,
|
||||
"datasource": "${datasource}",
|
||||
"description": "How long will it take to consume all our budget if our error consumption remains at the current rate for the selected date range.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"decimals": 2,
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"match": "null",
|
||||
"result": {
|
||||
"index": 0,
|
||||
"text": "1.40 days"
|
||||
}
|
||||
},
|
||||
"type": "special"
|
||||
},
|
||||
{
|
||||
"options": {
|
||||
"from": 672,
|
||||
"result": {
|
||||
"index": 1,
|
||||
"text": "∞"
|
||||
},
|
||||
"to": 3360100
|
||||
},
|
||||
"type": "range"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "#EAB839",
|
||||
"value": 40
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 50
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "h"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 26
|
||||
},
|
||||
"id": 198,
|
||||
"interval": null,
|
||||
"links": [],
|
||||
"maxDataPoints": 100,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"text": {
|
||||
"valueSize": 80
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "8.2.1",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "28 * 24 * (1 - $latency_slo) / (1 - sum(rate(composer_http_duration_seconds_bucket{le=\"0.2\"}[$__range]))/sum(rate(composer_http_duration_seconds_count[$__range])))",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Error Budget Remaining",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"datasource": "${datasource}",
|
||||
"description": "The percentage of error budget consumed for the selected time range. ",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 100,
|
||||
"gradientMode": "scheme",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineStyle": {
|
||||
"fill": "solid"
|
||||
},
|
||||
"lineWidth": 0,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": true,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "#EAB839",
|
||||
"value": 0.95
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 20,
|
||||
"x": 4,
|
||||
"y": 26
|
||||
},
|
||||
"id": 199,
|
||||
"links": [],
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "hidden",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "8.1.5",
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "1 - ((sum(increase(composer_http_duration_seconds_bucket{le=\"0.2\"}[$__range]))/sum(increase(composer_http_duration_seconds_count[$__range]))) - $latency_slo)/ (1 - $latency_slo)",
|
||||
"instant": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 10,
|
||||
|
|
@ -461,14 +861,14 @@ data:
|
|||
}
|
||||
],
|
||||
"refresh": false,
|
||||
"schemaVersion": 30,
|
||||
"schemaVersion": 31,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"selected": true,
|
||||
"text": "app-sre-prod-04-prometheus",
|
||||
"value": "app-sre-prod-04-prometheus"
|
||||
},
|
||||
|
|
@ -492,7 +892,7 @@ data:
|
|||
"auto_count": 30,
|
||||
"auto_min": "10s",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"selected": false,
|
||||
"text": "28d",
|
||||
"value": "28d"
|
||||
},
|
||||
|
|
@ -570,12 +970,12 @@ data:
|
|||
"type": "constant"
|
||||
},
|
||||
{
|
||||
"description": "Compose stability error budget",
|
||||
"description": "Compose latency SLO target",
|
||||
"error": null,
|
||||
"hide": 2,
|
||||
"label": null,
|
||||
"name": "stability_error_budget",
|
||||
"query": "0.05",
|
||||
"name": "latency_slo",
|
||||
"query": "0.9",
|
||||
"skipUrlSync": false,
|
||||
"type": "constant"
|
||||
}
|
||||
|
|
@ -613,5 +1013,5 @@ data:
|
|||
"timezone": "",
|
||||
"title": "Image Builder Composer",
|
||||
"uid": "cNGfs4Knz",
|
||||
"version": 1
|
||||
"version": 2
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue