apiVersion: v1
kind: ConfigMap
metadata:
  name: grafana-alerting
  namespace: monitoring
data:
  contact-points.yaml: |
    apiVersion: 1
    contactPoints:
      - orgId: 1
        name: Telegram Homelab
        receivers:
          - uid: telegram-homelab
            type: telegram
            settings:
              bottoken: "${TELEGRAM_BOT_TOKEN}"
              chatid: "${TELEGRAM_CHAT_ID}"
              parseMode: HTML
            disableResolveMessage: false

  notification-policy.yaml: |
    apiVersion: 1
    policies:
      - orgId: 1
        receiver: Telegram Homelab
        group_by:
          - alertname
          - namespace
          - pod
        group_wait: 30s
        group_interval: 5m
        repeat_interval: 4h
        routes: []

  alert-rules.yaml: |
    apiVersion: 1
    groups:
      - orgId: 1
        name: homelab-infra
        folder: Homelab Alerts
        interval: 1m
        rules:

          - uid: homelab-crashloop
            title: "Pod CrashLoopBackOff"
            condition: B
            for: 1m
            noDataState: OK
            execErrState: Error
            annotations:
              summary: "CrashLoopBackOff: {{ $labels.pod }} ({{ $labels.namespace }})"
              description: "Container {{ $labels.container }} ha entrado en CrashLoopBackOff."
            labels:
              severity: critical
            isPaused: false
            data:
              - refId: A
                relativeTimeRange: {from: 300, to: 0}
                datasourceUid: prometheus
                model:
                  editorMode: code
                  expr: kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff"}
                  instant: true
                  refId: A
              - refId: B
                relativeTimeRange: {from: 0, to: 0}
                datasourceUid: "-100"
                model:
                  type: threshold
                  refId: B
                  conditions:
                    - type: query
                      evaluator: {params: [0], type: gt}
                      operator: {type: and}
                      query: {params: [A]}
                      reducer: {params: [], type: last}

          - uid: homelab-disk-high
            title: "Disco > 80%"
            condition: B
            for: 5m
            noDataState: NoData
            execErrState: Error
            annotations:
              summary: "Disco lleno: {{ $labels.mountpoint }} en {{ $labels.instance }}"
              description: "Filesystem {{ $labels.mountpoint }} supera el 80% de uso."
            labels:
              severity: warning
            isPaused: false
            data:
              - refId: A
                relativeTimeRange: {from: 300, to: 0}
                datasourceUid: prometheus
                model:
                  editorMode: code
                  expr: "(node_filesystem_size_bytes{fstype!~\"tmpfs|overlay|squashfs|devtmpfs\"} - node_filesystem_avail_bytes{fstype!~\"tmpfs|overlay|squashfs|devtmpfs\"}) / node_filesystem_size_bytes{fstype!~\"tmpfs|overlay|squashfs|devtmpfs\"}"
                  instant: true
                  refId: A
              - refId: B
                relativeTimeRange: {from: 0, to: 0}
                datasourceUid: "-100"
                model:
                  type: threshold
                  refId: B
                  conditions:
                    - type: query
                      evaluator: {params: [0.8], type: gt}
                      operator: {type: and}
                      query: {params: [A]}
                      reducer: {params: [], type: last}

          - uid: homelab-ram-high
            title: "RAM > 85%"
            condition: B
            for: 5m
            noDataState: NoData
            execErrState: Error
            annotations:
              summary: "RAM alta: {{ $labels.instance }}"
              description: "Uso de RAM supera el 85% en {{ $labels.instance }}."
            labels:
              severity: warning
            isPaused: false
            data:
              - refId: A
                relativeTimeRange: {from: 300, to: 0}
                datasourceUid: prometheus
                model:
                  editorMode: code
                  expr: "(node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes"
                  instant: true
                  refId: A
              - refId: B
                relativeTimeRange: {from: 0, to: 0}
                datasourceUid: "-100"
                model:
                  type: threshold
                  refId: B
                  conditions:
                    - type: query
                      evaluator: {params: [0.85], type: gt}
                      operator: {type: and}
                      query: {params: [A]}
                      reducer: {params: [], type: last}

          - uid: homelab-pod-failed
            title: "Pod Failed/Unknown"
            condition: B
            for: 3m
            noDataState: OK
            execErrState: Error
            annotations:
              summary: "Pod en estado {{ $labels.phase }}: {{ $labels.pod }} ({{ $labels.namespace }})"
              description: "Pod {{ $labels.namespace }}/{{ $labels.pod }} lleva más de 3 minutos en estado {{ $labels.phase }}."
            labels:
              severity: warning
            isPaused: false
            data:
              - refId: A
                relativeTimeRange: {from: 300, to: 0}
                datasourceUid: prometheus
                model:
                  editorMode: code
                  expr: "kube_pod_status_phase{phase!~\"Running|Succeeded\"}"
                  instant: true
                  refId: A
              - refId: B
                relativeTimeRange: {from: 0, to: 0}
                datasourceUid: "-100"
                model:
                  type: threshold
                  refId: B
                  conditions:
                    - type: query
                      evaluator: {params: [0], type: gt}
                      operator: {type: and}
                      query: {params: [A]}
                      reducer: {params: [], type: last}