From 4facdd8515e691cb37d6eeee10445bf53eca9865 Mon Sep 17 00:00:00 2001 From: chemavx Date: Sun, 26 Apr 2026 15:46:39 +0000 Subject: [PATCH] =?UTF-8?q?fix(monitoring):=20correct=20alert=20rule=20pip?= =?UTF-8?q?eline=20to=20A=E2=86=92B(reduce)=E2=86=92C(threshold)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Grafana threshold expression requires a scalar input, not a raw time series. Added explicit reduce step (type: reduce, reducer: last) as refId B between the Prometheus query (A) and the threshold check (C). All 4 rules updated: CrashLoopBackOff, Disco >80%, RAM >85%, Pod Failed. condition field changed from B → C on each rule. --- monitoring/configmap-grafana-alerting.yaml | 84 ++++++++++++++-------- 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/monitoring/configmap-grafana-alerting.yaml b/monitoring/configmap-grafana-alerting.yaml index c48c84f..1aa505f 100644 --- a/monitoring/configmap-grafana-alerting.yaml +++ b/monitoring/configmap-grafana-alerting.yaml @@ -43,7 +43,7 @@ data: - uid: homelab-crashloop title: "Pod CrashLoopBackOff" - condition: B + condition: C for: 1m noDataState: OK execErrState: Error @@ -66,18 +66,25 @@ data: relativeTimeRange: {from: 0, to: 0} datasourceUid: "-100" model: - type: threshold + type: reduce refId: B + expression: A + reducer: last + settings: + mode: "" + - refId: C + relativeTimeRange: {from: 0, to: 0} + datasourceUid: "-100" + model: + type: threshold + refId: C + expression: B conditions: - - type: query - evaluator: {params: [0], type: gt} - operator: {type: and} - query: {params: [A]} - reducer: {params: [], type: last} + - evaluator: {params: [0], type: gt} - uid: homelab-disk-high title: "Disco > 80%" - condition: B + condition: C for: 5m noDataState: NoData execErrState: Error @@ -100,18 +107,25 @@ data: relativeTimeRange: {from: 0, to: 0} datasourceUid: "-100" model: - type: threshold + type: reduce refId: B + expression: A + reducer: last + settings: + mode: "" + - refId: C + relativeTimeRange: {from: 0, to: 0} + datasourceUid: "-100" + model: + type: threshold + refId: C + expression: B conditions: - - type: query - evaluator: {params: [0.8], type: gt} - operator: {type: and} - query: {params: [A]} - reducer: {params: [], type: last} + - evaluator: {params: [0.8], type: gt} - uid: homelab-ram-high title: "RAM > 85%" - condition: B + condition: C for: 5m noDataState: NoData execErrState: Error @@ -134,18 +148,25 @@ data: relativeTimeRange: {from: 0, to: 0} datasourceUid: "-100" model: - type: threshold + type: reduce refId: B + expression: A + reducer: last + settings: + mode: "" + - refId: C + relativeTimeRange: {from: 0, to: 0} + datasourceUid: "-100" + model: + type: threshold + refId: C + expression: B conditions: - - type: query - evaluator: {params: [0.85], type: gt} - operator: {type: and} - query: {params: [A]} - reducer: {params: [], type: last} + - evaluator: {params: [0.85], type: gt} - uid: homelab-pod-failed title: "Pod Failed/Unknown" - condition: B + condition: C for: 3m noDataState: OK execErrState: Error @@ -168,11 +189,18 @@ data: relativeTimeRange: {from: 0, to: 0} datasourceUid: "-100" model: - type: threshold + type: reduce refId: B + expression: A + reducer: last + settings: + mode: "" + - refId: C + relativeTimeRange: {from: 0, to: 0} + datasourceUid: "-100" + model: + type: threshold + refId: C + expression: B conditions: - - type: query - evaluator: {params: [0], type: gt} - operator: {type: and} - query: {params: [A]} - reducer: {params: [], type: last} + - evaluator: {params: [0], type: gt}