diff --git a/monitoring/configmap-grafana-alerting.yaml b/monitoring/configmap-grafana-alerting.yaml index 1aa505f..36e772d 100644 --- a/monitoring/configmap-grafana-alerting.yaml +++ b/monitoring/configmap-grafana-alerting.yaml @@ -16,6 +16,7 @@ data: bottoken: "${TELEGRAM_BOT_TOKEN}" chatid: "5138407666" parseMode: HTML + message: "{{ range .Alerts }}{{ if eq .Status \"firing\" }}{{ .Annotations.summary }}\n{{ else }}✅ Resuelto: {{ .Labels.alertname }}\n{{ end }}{{ end }}" disableResolveMessage: false notification-policy.yaml: | @@ -48,7 +49,7 @@ data: noDataState: OK execErrState: Error annotations: - summary: "CrashLoopBackOff: {{ $labels.pod }} ({{ $labels.namespace }})" + summary: "🔄 CrashLoop: {{ $labels.pod }}\nNamespace: {{ $labels.namespace }}" description: "Container {{ $labels.container }} ha entrado en CrashLoopBackOff." labels: severity: critical @@ -89,7 +90,7 @@ data: noDataState: NoData execErrState: Error annotations: - summary: "Disco lleno: {{ $labels.mountpoint }} en {{ $labels.instance }}" + summary: "💾 Disco casi lleno: {{ $labels.mountpoint }}\nUso: {{ humanizePercentage $values.B.Value }}" description: "Filesystem {{ $labels.mountpoint }} supera el 80% de uso." labels: severity: warning @@ -130,7 +131,7 @@ data: noDataState: NoData execErrState: Error annotations: - summary: "RAM alta: {{ $labels.instance }}" + summary: "🧠 RAM alta: {{ $labels.instance }}\nUso: {{ humanizePercentage $values.B.Value }}" description: "Uso de RAM supera el 85% en {{ $labels.instance }}." labels: severity: warning @@ -171,7 +172,7 @@ data: noDataState: OK execErrState: Error annotations: - summary: "Pod en estado {{ $labels.phase }}: {{ $labels.pod }} ({{ $labels.namespace }})" + summary: "🚨 Pod caído: {{ $labels.pod }}\nNamespace: {{ $labels.namespace }}\nEstado: {{ $labels.phase }}" description: "Pod {{ $labels.namespace }}/{{ $labels.pod }} lleva más de 3 minutos en estado {{ $labels.phase }}." labels: severity: warning @@ -182,7 +183,7 @@ data: datasourceUid: prometheus model: editorMode: code - expr: "kube_pod_status_phase{phase!~\"Running|Succeeded\"}" + expr: "kube_pod_status_phase{phase!~\"Running|Succeeded\"} == 1" instant: true refId: A - refId: B