commit ff2e6cc98565478c384d1086931ee248c22c82dd Author: chemavx Date: Fri Apr 10 08:57:02 2026 +0000 feat: export all K8 Plus cluster manifests Namespaces: argocd, authentik, backup-system, cloudflare-ddns, gitea, homarr, monitoring, n8n, openclaw, polymarket-bot, vaultwarden Cluster-wide: clusterissuers, namespaces Secrets: redacted (structure only, data=REDACTED) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..201b64d --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +# Ignore auto-generated kube resources +**/configmap-kube-root-ca.crt.yaml +**/serviceaccount-default.yaml diff --git a/argocd/configmap-argocd-cm.yaml b/argocd/configmap-argocd-cm.yaml new file mode 100644 index 0000000..2225fa1 --- /dev/null +++ b/argocd/configmap-argocd-cm.yaml @@ -0,0 +1,53 @@ +apiVersion: v1 +data: + resource.customizations.ignoreResourceUpdates.ConfigMap: "jqPathExpressions:\n \ + \ # Ignore the cluster-autoscaler status\n - '.metadata.annotations.\"cluster-autoscaler.kubernetes.io/last-updated\"\ + '\n # Ignore the annotation of the legacy Leases election\n - '.metadata.annotations.\"\ + control-plane.alpha.kubernetes.io/leader\"'\n" + resource.customizations.ignoreResourceUpdates.Endpoints: "jsonPointers:\n - /metadata\n\ + \ - /subsets\n" + resource.customizations.ignoreResourceUpdates.all: "jsonPointers:\n - /status\n" + resource.customizations.ignoreResourceUpdates.apps_ReplicaSet: "jqPathExpressions:\n\ + \ - '.metadata.annotations.\"deployment.kubernetes.io/desired-replicas\"'\n \ + \ - '.metadata.annotations.\"deployment.kubernetes.io/max-replicas\"'\n - '.metadata.annotations.\"\ + rollout.argoproj.io/desired-replicas\"'\n" + resource.customizations.ignoreResourceUpdates.argoproj.io_Application: "jqPathExpressions:\n\ + \ - '.metadata.annotations.\"notified.notifications.argoproj.io\"'\n - '.metadata.annotations.\"\ + argocd.argoproj.io/refresh\"'\n - '.metadata.annotations.\"argocd.argoproj.io/hydrate\"\ + '\n - '.operation'\n" + resource.customizations.ignoreResourceUpdates.argoproj.io_Rollout: "jqPathExpressions:\n\ + \ - '.metadata.annotations.\"notified.notifications.argoproj.io\"'\n" + resource.customizations.ignoreResourceUpdates.autoscaling_HorizontalPodAutoscaler: "jqPathExpressions:\n\ + \ - '.metadata.annotations.\"autoscaling.alpha.kubernetes.io/behavior\"'\n -\ + \ '.metadata.annotations.\"autoscaling.alpha.kubernetes.io/conditions\"'\n -\ + \ '.metadata.annotations.\"autoscaling.alpha.kubernetes.io/metrics\"'\n - '.metadata.annotations.\"\ + autoscaling.alpha.kubernetes.io/current-metrics\"'\n" + resource.customizations.ignoreResourceUpdates.discovery.k8s.io_EndpointSlice: "jsonPointers:\n\ + \ - /metadata\n - /endpoints\n - /ports\n" + resource.exclusions: "### Network resources created by the Kubernetes control plane\ + \ and excluded to reduce the number of watched events and UI clutter\n- apiGroups:\n\ + \ - ''\n - discovery.k8s.io\n kinds:\n - Endpoints\n - EndpointSlice\n###\ + \ Internal Kubernetes resources excluded reduce the number of watched events\n\ + - apiGroups:\n - coordination.k8s.io\n kinds:\n - Lease\n### Internal Kubernetes\ + \ Authz/Authn resources excluded reduce the number of watched events\n- apiGroups:\n\ + \ - authentication.k8s.io\n - authorization.k8s.io\n kinds:\n - SelfSubjectReview\n\ + \ - TokenReview\n - LocalSubjectAccessReview\n - SelfSubjectAccessReview\n\ + \ - SelfSubjectRulesReview\n - SubjectAccessReview\n### Intermediate Certificate\ + \ Request excluded reduce the number of watched events\n- apiGroups:\n - certificates.k8s.io\n\ + \ kinds:\n - CertificateSigningRequest\n- apiGroups:\n - cert-manager.io\n\ + \ kinds:\n - CertificateRequest\n### Cilium internal resources excluded reduce\ + \ the number of watched events and UI Clutter\n- apiGroups:\n - cilium.io\n \ + \ kinds:\n - CiliumIdentity\n - CiliumEndpoint\n - CiliumEndpointSlice\n###\ + \ Kyverno intermediate and reporting resources excluded reduce the number of watched\ + \ events and improve performance\n- apiGroups:\n - kyverno.io\n - reports.kyverno.io\n\ + \ - wgpolicyk8s.io\n kinds:\n - PolicyReport\n - ClusterPolicyReport\n -\ + \ EphemeralReport\n - ClusterEphemeralReport\n - AdmissionReport\n - ClusterAdmissionReport\n\ + \ - BackgroundScanReport\n - ClusterBackgroundScanReport\n - UpdateRequest\n" +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/name: argocd-cm + app.kubernetes.io/part-of: argocd + name: argocd-cm + namespace: argocd + diff --git a/argocd/configmap-argocd-cmd-params-cm.yaml b/argocd/configmap-argocd-cmd-params-cm.yaml new file mode 100644 index 0000000..f7d1d6a --- /dev/null +++ b/argocd/configmap-argocd-cmd-params-cm.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +data: + server.insecure: 'true' +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/name: argocd-cmd-params-cm + app.kubernetes.io/part-of: argocd + name: argocd-cmd-params-cm + namespace: argocd + diff --git a/argocd/configmap-argocd-gpg-keys-cm.yaml b/argocd/configmap-argocd-gpg-keys-cm.yaml new file mode 100644 index 0000000..0cf33db --- /dev/null +++ b/argocd/configmap-argocd-gpg-keys-cm.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/name: argocd-gpg-keys-cm + app.kubernetes.io/part-of: argocd + name: argocd-gpg-keys-cm + namespace: argocd + diff --git a/argocd/configmap-argocd-notifications-cm.yaml b/argocd/configmap-argocd-notifications-cm.yaml new file mode 100644 index 0000000..bcac53d --- /dev/null +++ b/argocd/configmap-argocd-notifications-cm.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/component: notifications-controller + app.kubernetes.io/name: argocd-notifications-controller + app.kubernetes.io/part-of: argocd + name: argocd-notifications-cm + namespace: argocd + diff --git a/argocd/configmap-argocd-rbac-cm.yaml b/argocd/configmap-argocd-rbac-cm.yaml new file mode 100644 index 0000000..1a5229a --- /dev/null +++ b/argocd/configmap-argocd-rbac-cm.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/name: argocd-rbac-cm + app.kubernetes.io/part-of: argocd + name: argocd-rbac-cm + namespace: argocd + diff --git a/argocd/configmap-argocd-ssh-known-hosts-cm.yaml b/argocd/configmap-argocd-ssh-known-hosts-cm.yaml new file mode 100644 index 0000000..66d5fa3 --- /dev/null +++ b/argocd/configmap-argocd-ssh-known-hosts-cm.yaml @@ -0,0 +1,42 @@ +apiVersion: v1 +data: + ssh_known_hosts: '# This file was automatically generated by hack/update-ssh-known-hosts.sh. + DO NOT EDIT + + [ssh.github.com]:443 ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg= + + [ssh.github.com]:443 ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl + + [ssh.github.com]:443 ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk= + + bitbucket.org ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBPIQmuzMBuKdWeF4+a2sjSSpBK0iqitSQ+5BM9KhpexuGt20JpTVM7u5BDZngncgrqDMbWdxMWWOGtZ9UgbqgZE= + + bitbucket.org ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIazEu89wgQZ4bqs3d63QSMzYVa0MuJ2e2gKTKqu+UUO + + bitbucket.org ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDQeJzhupRu0u0cdegZIa8e86EG2qOCsIsD1Xw0xSeiPDlCr7kq97NLmMbpKTX6Esc30NuoqEEHCuc7yWtwp8dI76EEEB1VqY9QJq6vk+aySyboD5QF61I/1WeTwu+deCbgKMGbUijeXhtfbxSxm6JwGrXrhBdofTsbKRUsrN1WoNgUa8uqN1Vx6WAJw1JHPhglEGGHea6QICwJOAr/6mrui/oB7pkaWKHj3z7d1IC4KWLtY47elvjbaTlkN04Kc/5LFEirorGYVbt15kAUlqGM65pk6ZBxtaO3+30LVlORZkxOh+LKL/BvbZ/iRNhItLqNyieoQj/uh/7Iv4uyH/cV/0b4WDSd3DptigWq84lJubb9t/DnZlrJazxyDCulTmKdOR7vs9gMTo+uoIrPSb8ScTtvw65+odKAlBj59dhnVp9zd7QUojOpXlL62Aw56U4oO+FALuevvMjiWeavKhJqlR7i5n9srYcrNV7ttmDw7kf/97P5zauIhxcjX+xHv4M= + + github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg= + + github.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl + + github.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk= + + gitlab.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBFSMqzJeV9rUzU4kWitGjeR4PWSa29SPqJ1fVkhtj3Hw9xjLVXVYrU9QlYWrOLXBpQ6KWjbjTDTdDkoohFzgbEY= + + gitlab.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAfuCHKVTjquxvt6CM6tdG4SLp1Btn/nOeHHE5UOzRdf + + gitlab.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCsj2bNKTBSpIYDEGk9KxsGh3mySTRgMtXL583qmBpzeQ+jqCMRgBqB98u3z++J1sKlXHWfM9dyhSevkMwSbhoR8XIq/U0tCNyokEi/ueaBMCvbcTHhO7FcwzY92WK4Yt0aGROY5qX2UKSeOvuP4D6TPqKF1onrSzH9bx9XUf2lEdWT/ia1NEKjunUqu1xOB/StKDHMoX4/OKyIzuS0q/T1zOATthvasJFoPrAjkohTyaDUz2LN5JoH839hViyEG82yB+MjcFV5MU3N1l1QL3cVUCh93xSaua1N85qivl+siMkPGbO5xR/En4iEY6K2XPASUEMaieWVNTRCtJ4S8H+9 + + ssh.dev.azure.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC7Hr1oTWqNqOlzGJOfGJ4NakVyIzf1rXYd4d7wo6jBlkLvCA4odBlL0mDUyZ0/QUfTTqeu+tm22gOsv+VrVTMk6vwRU75gY/y9ut5Mb3bR5BV58dKXyq9A9UeB5Cakehn5Zgm6x1mKoVyf+FFn26iYqXJRgzIZZcZ5V6hrE0Qg39kZm4az48o0AUbf6Sp4SLdvnuMa2sVNwHBboS7EJkm57XQPVU3/QpyNLHbWDdzwtrlS+ez30S3AdYhLKEOxAG8weOnyrtLJAUen9mTkol8oII1edf7mWWbWVf0nBmly21+nZcmCTISQBtdcyPaEno7fFQMDD26/s0lfKob4Kw8H + + vs-ssh.visualstudio.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC7Hr1oTWqNqOlzGJOfGJ4NakVyIzf1rXYd4d7wo6jBlkLvCA4odBlL0mDUyZ0/QUfTTqeu+tm22gOsv+VrVTMk6vwRU75gY/y9ut5Mb3bR5BV58dKXyq9A9UeB5Cakehn5Zgm6x1mKoVyf+FFn26iYqXJRgzIZZcZ5V6hrE0Qg39kZm4az48o0AUbf6Sp4SLdvnuMa2sVNwHBboS7EJkm57XQPVU3/QpyNLHbWDdzwtrlS+ez30S3AdYhLKEOxAG8weOnyrtLJAUen9mTkol8oII1edf7mWWbWVf0nBmly21+nZcmCTISQBtdcyPaEno7fFQMDD26/s0lfKob4Kw8H + + ' +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/name: argocd-ssh-known-hosts-cm + app.kubernetes.io/part-of: argocd + name: argocd-ssh-known-hosts-cm + namespace: argocd + diff --git a/argocd/configmap-argocd-tls-certs-cm.yaml b/argocd/configmap-argocd-tls-certs-cm.yaml new file mode 100644 index 0000000..7efcccf --- /dev/null +++ b/argocd/configmap-argocd-tls-certs-cm.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/name: argocd-tls-certs-cm + app.kubernetes.io/part-of: argocd + name: argocd-tls-certs-cm + namespace: argocd + diff --git a/argocd/deployment-argocd-applicationset-controller.yaml b/argocd/deployment-argocd-applicationset-controller.yaml new file mode 100644 index 0000000..a6da8a6 --- /dev/null +++ b/argocd/deployment-argocd-applicationset-controller.yaml @@ -0,0 +1,288 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '3' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"labels":{"app.kubernetes.io/component":"applicationset-controller","app.kubernetes.io/name":"argocd-applicationset-controller","app.kubernetes.io/part-of":"argocd"},"name":"argocd-applicationset-controller","namespace":"argocd"},"spec":{"selector":{"matchLabels":{"app.kubernetes.io/name":"argocd-applicationset-controller"}},"template":{"metadata":{"labels":{"app.kubernetes.io/name":"argocd-applicationset-controller"}},"spec":{"containers":[{"args":["/usr/local/bin/argocd-applicationset-controller"],"env":[{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_GLOBAL_PRESERVED_ANNOTATIONS","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.global.preserved.annotations","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_GLOBAL_PRESERVED_LABELS","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.global.preserved.labels","name":"argocd-cmd-params-cm","optional":true}}},{"name":"NAMESPACE","valueFrom":{"fieldRef":{"fieldPath":"metadata.namespace"}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_LEADER_ELECTION","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.enable.leader.election","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_REPO_SERVER","valueFrom":{"configMapKeyRef":{"key":"repo.server","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_POLICY","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.policy","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_POLICY_OVERRIDE","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.enable.policy.override","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_DEBUG","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.debug","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_LOGFORMAT","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.log.format","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_LOGLEVEL","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.log.level","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_LOG_FORMAT_TIMESTAMP","valueFrom":{"configMapKeyRef":{"key":"log.format.timestamp","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_DRY_RUN","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.dryrun","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_GIT_MODULES_ENABLED","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.enable.git.submodule","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_PROGRESSIVE_SYNCS","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.enable.progressive.syncs","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_TOKENREF_STRICT_MODE","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.enable.tokenref.strict.mode","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_NEW_GIT_FILE_GLOBBING","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.enable.new.git.file.globbing","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_REPO_SERVER_PLAINTEXT","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.repo.server.plaintext","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_REPO_SERVER_STRICT_TLS","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.repo.server.strict.tls","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_REPO_SERVER_TIMEOUT_SECONDS","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.repo.server.timeout.seconds","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_CONCURRENT_RECONCILIATIONS","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.concurrent.reconciliations.max","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_NAMESPACES","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.namespaces","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_SCM_ROOT_CA_PATH","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.scm.root.ca.path","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_ALLOWED_SCM_PROVIDERS","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.allowed.scm.providers","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_SCM_PROVIDERS","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.enable.scm.providers","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_GITHUB_API_METRICS","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.enable.github.api.metrics","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_WEBHOOK_PARALLELISM_LIMIT","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.webhook.parallelism.limit","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_REQUEUE_AFTER","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.requeue.after","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_MAX_RESOURCES_STATUS_COUNT","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.status.max.resources.count","name":"argocd-cmd-params-cm","optional":true}}}],"image":"quay.io/argoproj/argocd:v3.3.6","imagePullPolicy":"Always","name":"argocd-applicationset-controller","ports":[{"containerPort":7000,"name":"webhook"},{"containerPort":8080,"name":"metrics"}],"securityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"seccompProfile":{"type":"RuntimeDefault"}},"volumeMounts":[{"mountPath":"/app/config/ssh","name":"ssh-known-hosts"},{"mountPath":"/app/config/tls","name":"tls-certs"},{"mountPath":"/app/config/gpg/source","name":"gpg-keys"},{"mountPath":"/app/config/gpg/keys","name":"gpg-keyring"},{"mountPath":"/tmp","name":"tmp"},{"mountPath":"/app/config/reposerver/tls","name":"argocd-repo-server-tls"},{"mountPath":"/home/argocd/params","name":"argocd-cmd-params-cm"}]}],"nodeSelector":{"kubernetes.io/os":"linux"},"serviceAccountName":"argocd-applicationset-controller","volumes":[{"configMap":{"name":"argocd-ssh-known-hosts-cm"},"name":"ssh-known-hosts"},{"configMap":{"name":"argocd-tls-certs-cm"},"name":"tls-certs"},{"configMap":{"name":"argocd-gpg-keys-cm"},"name":"gpg-keys"},{"emptyDir":{},"name":"gpg-keyring"},{"emptyDir":{},"name":"tmp"},{"name":"argocd-repo-server-tls","secret":{"items":[{"key":"tls.crt","path":"tls.crt"},{"key":"tls.key","path":"tls.key"},{"key":"ca.crt","path":"ca.crt"}],"optional":true,"secretName":"argocd-repo-server-tls"}},{"configMap":{"items":[{"key":"applicationsetcontroller.profile.enabled","path":"profiler.enabled"}],"name":"argocd-cmd-params-cm","optional":true},"name":"argocd-cmd-params-cm"}]}}}} + + ' + labels: + app.kubernetes.io/component: applicationset-controller + app.kubernetes.io/name: argocd-applicationset-controller + app.kubernetes.io/part-of: argocd + name: argocd-applicationset-controller + namespace: argocd +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/name: argocd-applicationset-controller + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + annotations: + kubectl.kubernetes.io/restartedAt: '2026-04-09T06:58:23Z' + labels: + app.kubernetes.io/name: argocd-applicationset-controller + spec: + containers: + - args: + - /usr/local/bin/argocd-applicationset-controller + env: + - name: ARGOCD_APPLICATIONSET_CONTROLLER_GLOBAL_PRESERVED_ANNOTATIONS + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.global.preserved.annotations + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_GLOBAL_PRESERVED_LABELS + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.global.preserved.labels + name: argocd-cmd-params-cm + optional: true + - name: NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_LEADER_ELECTION + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.enable.leader.election + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_REPO_SERVER + valueFrom: + configMapKeyRef: + key: repo.server + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_POLICY + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.policy + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_POLICY_OVERRIDE + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.enable.policy.override + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_DEBUG + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.debug + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_LOGFORMAT + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.log.format + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_LOGLEVEL + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.log.level + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_LOG_FORMAT_TIMESTAMP + valueFrom: + configMapKeyRef: + key: log.format.timestamp + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_DRY_RUN + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.dryrun + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_GIT_MODULES_ENABLED + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.enable.git.submodule + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_PROGRESSIVE_SYNCS + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.enable.progressive.syncs + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_TOKENREF_STRICT_MODE + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.enable.tokenref.strict.mode + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_NEW_GIT_FILE_GLOBBING + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.enable.new.git.file.globbing + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_REPO_SERVER_PLAINTEXT + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.repo.server.plaintext + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_REPO_SERVER_STRICT_TLS + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.repo.server.strict.tls + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_REPO_SERVER_TIMEOUT_SECONDS + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.repo.server.timeout.seconds + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_CONCURRENT_RECONCILIATIONS + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.concurrent.reconciliations.max + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_NAMESPACES + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.namespaces + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_SCM_ROOT_CA_PATH + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.scm.root.ca.path + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_ALLOWED_SCM_PROVIDERS + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.allowed.scm.providers + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_SCM_PROVIDERS + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.enable.scm.providers + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_GITHUB_API_METRICS + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.enable.github.api.metrics + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_WEBHOOK_PARALLELISM_LIMIT + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.webhook.parallelism.limit + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_REQUEUE_AFTER + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.requeue.after + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_MAX_RESOURCES_STATUS_COUNT + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.status.max.resources.count + name: argocd-cmd-params-cm + optional: true + image: quay.io/argoproj/argocd:v3.3.6 + imagePullPolicy: Always + name: argocd-applicationset-controller + ports: + - containerPort: 7000 + name: webhook + protocol: TCP + - containerPort: 8080 + name: metrics + protocol: TCP + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /app/config/ssh + name: ssh-known-hosts + - mountPath: /app/config/tls + name: tls-certs + - mountPath: /app/config/gpg/source + name: gpg-keys + - mountPath: /app/config/gpg/keys + name: gpg-keyring + - mountPath: /tmp + name: tmp + - mountPath: /app/config/reposerver/tls + name: argocd-repo-server-tls + - mountPath: /home/argocd/params + name: argocd-cmd-params-cm + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + serviceAccount: argocd-applicationset-controller + serviceAccountName: argocd-applicationset-controller + terminationGracePeriodSeconds: 30 + volumes: + - configMap: + defaultMode: 420 + name: argocd-ssh-known-hosts-cm + name: ssh-known-hosts + - configMap: + defaultMode: 420 + name: argocd-tls-certs-cm + name: tls-certs + - configMap: + defaultMode: 420 + name: argocd-gpg-keys-cm + name: gpg-keys + - emptyDir: {} + name: gpg-keyring + - emptyDir: {} + name: tmp + - name: argocd-repo-server-tls + secret: + defaultMode: 420 + items: + - key: tls.crt + path: tls.crt + - key: tls.key + path: tls.key + - key: ca.crt + path: ca.crt + optional: true + secretName: argocd-repo-server-tls + - configMap: + defaultMode: 420 + items: + - key: applicationsetcontroller.profile.enabled + path: profiler.enabled + name: argocd-cmd-params-cm + optional: true + name: argocd-cmd-params-cm + diff --git a/argocd/deployment-argocd-dex-server.yaml b/argocd/deployment-argocd-dex-server.yaml new file mode 100644 index 0000000..c26c5fd --- /dev/null +++ b/argocd/deployment-argocd-dex-server.yaml @@ -0,0 +1,151 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '1' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"labels":{"app.kubernetes.io/component":"dex-server","app.kubernetes.io/name":"argocd-dex-server","app.kubernetes.io/part-of":"argocd"},"name":"argocd-dex-server","namespace":"argocd"},"spec":{"selector":{"matchLabels":{"app.kubernetes.io/name":"argocd-dex-server"}},"template":{"metadata":{"labels":{"app.kubernetes.io/name":"argocd-dex-server"}},"spec":{"affinity":{"podAntiAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"podAffinityTerm":{"labelSelector":{"matchLabels":{"app.kubernetes.io/part-of":"argocd"}},"topologyKey":"kubernetes.io/hostname"},"weight":5}]}},"containers":[{"command":["/shared/argocd-dex","rundex"],"env":[{"name":"ARGOCD_DEX_SERVER_LOGFORMAT","valueFrom":{"configMapKeyRef":{"key":"dexserver.log.format","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_DEX_SERVER_LOGLEVEL","valueFrom":{"configMapKeyRef":{"key":"dexserver.log.level","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_LOG_FORMAT_TIMESTAMP","valueFrom":{"configMapKeyRef":{"key":"log.format.timestamp","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_DEX_SERVER_DISABLE_TLS","valueFrom":{"configMapKeyRef":{"key":"dexserver.disable.tls","name":"argocd-cmd-params-cm","optional":true}}}],"image":"ghcr.io/dexidp/dex:v2.43.0","imagePullPolicy":"Always","name":"dex","ports":[{"containerPort":5556},{"containerPort":5557},{"containerPort":5558}],"securityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"seccompProfile":{"type":"RuntimeDefault"}},"volumeMounts":[{"mountPath":"/shared","name":"static-files"},{"mountPath":"/tmp","name":"dexconfig"},{"mountPath":"/tls","name":"argocd-dex-server-tls"}]}],"initContainers":[{"command":["/bin/cp","-n","/usr/local/bin/argocd","/shared/argocd-dex"],"image":"quay.io/argoproj/argocd:v3.3.6","imagePullPolicy":"Always","name":"copyutil","securityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"seccompProfile":{"type":"RuntimeDefault"}},"volumeMounts":[{"mountPath":"/shared","name":"static-files"},{"mountPath":"/tmp","name":"dexconfig"}]}],"nodeSelector":{"kubernetes.io/os":"linux"},"serviceAccountName":"argocd-dex-server","volumes":[{"emptyDir":{},"name":"static-files"},{"emptyDir":{},"name":"dexconfig"},{"name":"argocd-dex-server-tls","secret":{"items":[{"key":"tls.crt","path":"tls.crt"},{"key":"tls.key","path":"tls.key"},{"key":"ca.crt","path":"ca.crt"}],"optional":true,"secretName":"argocd-dex-server-tls"}}]}}}} + + ' + labels: + app.kubernetes.io/component: dex-server + app.kubernetes.io/name: argocd-dex-server + app.kubernetes.io/part-of: argocd + name: argocd-dex-server + namespace: argocd +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/name: argocd-dex-server + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + labels: + app.kubernetes.io/name: argocd-dex-server + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchLabels: + app.kubernetes.io/part-of: argocd + topologyKey: kubernetes.io/hostname + weight: 5 + containers: + - command: + - /shared/argocd-dex + - rundex + env: + - name: ARGOCD_DEX_SERVER_LOGFORMAT + valueFrom: + configMapKeyRef: + key: dexserver.log.format + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_DEX_SERVER_LOGLEVEL + valueFrom: + configMapKeyRef: + key: dexserver.log.level + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_LOG_FORMAT_TIMESTAMP + valueFrom: + configMapKeyRef: + key: log.format.timestamp + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_DEX_SERVER_DISABLE_TLS + valueFrom: + configMapKeyRef: + key: dexserver.disable.tls + name: argocd-cmd-params-cm + optional: true + image: ghcr.io/dexidp/dex:v2.43.0 + imagePullPolicy: Always + name: dex + ports: + - containerPort: 5556 + protocol: TCP + - containerPort: 5557 + protocol: TCP + - containerPort: 5558 + protocol: TCP + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /shared + name: static-files + - mountPath: /tmp + name: dexconfig + - mountPath: /tls + name: argocd-dex-server-tls + dnsPolicy: ClusterFirst + initContainers: + - command: + - /bin/cp + - -n + - /usr/local/bin/argocd + - /shared/argocd-dex + image: quay.io/argoproj/argocd:v3.3.6 + imagePullPolicy: Always + name: copyutil + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /shared + name: static-files + - mountPath: /tmp + name: dexconfig + nodeSelector: + kubernetes.io/os: linux + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + serviceAccount: argocd-dex-server + serviceAccountName: argocd-dex-server + terminationGracePeriodSeconds: 30 + volumes: + - emptyDir: {} + name: static-files + - emptyDir: {} + name: dexconfig + - name: argocd-dex-server-tls + secret: + defaultMode: 420 + items: + - key: tls.crt + path: tls.crt + - key: tls.key + path: tls.key + - key: ca.crt + path: ca.crt + optional: true + secretName: argocd-dex-server-tls + diff --git a/argocd/deployment-argocd-notifications-controller.yaml b/argocd/deployment-argocd-notifications-controller.yaml new file mode 100644 index 0000000..508a0f1 --- /dev/null +++ b/argocd/deployment-argocd-notifications-controller.yaml @@ -0,0 +1,123 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '1' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"labels":{"app.kubernetes.io/component":"notifications-controller","app.kubernetes.io/name":"argocd-notifications-controller","app.kubernetes.io/part-of":"argocd"},"name":"argocd-notifications-controller","namespace":"argocd"},"spec":{"selector":{"matchLabels":{"app.kubernetes.io/name":"argocd-notifications-controller"}},"strategy":{"type":"Recreate"},"template":{"metadata":{"labels":{"app.kubernetes.io/name":"argocd-notifications-controller"}},"spec":{"containers":[{"args":["/usr/local/bin/argocd-notifications"],"env":[{"name":"ARGOCD_NOTIFICATIONS_CONTROLLER_LOGFORMAT","valueFrom":{"configMapKeyRef":{"key":"notificationscontroller.log.format","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_NOTIFICATIONS_CONTROLLER_LOGLEVEL","valueFrom":{"configMapKeyRef":{"key":"notificationscontroller.log.level","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_LOG_FORMAT_TIMESTAMP","valueFrom":{"configMapKeyRef":{"key":"log.format.timestamp","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATION_NAMESPACES","valueFrom":{"configMapKeyRef":{"key":"application.namespaces","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_NOTIFICATION_CONTROLLER_SELF_SERVICE_NOTIFICATION_ENABLED","valueFrom":{"configMapKeyRef":{"key":"notificationscontroller.selfservice.enabled","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_NOTIFICATION_CONTROLLER_REPO_SERVER_PLAINTEXT","valueFrom":{"configMapKeyRef":{"key":"notificationscontroller.repo.server.plaintext","name":"argocd-cmd-params-cm","optional":true}}}],"image":"quay.io/argoproj/argocd:v3.3.6","imagePullPolicy":"Always","livenessProbe":{"tcpSocket":{"port":9001}},"name":"argocd-notifications-controller","securityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true},"volumeMounts":[{"mountPath":"/app/config/tls","name":"tls-certs"},{"mountPath":"/app/config/reposerver/tls","name":"argocd-repo-server-tls"}],"workingDir":"/app"}],"nodeSelector":{"kubernetes.io/os":"linux"},"securityContext":{"runAsNonRoot":true,"seccompProfile":{"type":"RuntimeDefault"}},"serviceAccountName":"argocd-notifications-controller","volumes":[{"configMap":{"name":"argocd-tls-certs-cm"},"name":"tls-certs"},{"name":"argocd-repo-server-tls","secret":{"items":[{"key":"tls.crt","path":"tls.crt"},{"key":"tls.key","path":"tls.key"},{"key":"ca.crt","path":"ca.crt"}],"optional":true,"secretName":"argocd-repo-server-tls"}}]}}}} + + ' + labels: + app.kubernetes.io/component: notifications-controller + app.kubernetes.io/name: argocd-notifications-controller + app.kubernetes.io/part-of: argocd + name: argocd-notifications-controller + namespace: argocd +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/name: argocd-notifications-controller + strategy: + type: Recreate + template: + metadata: + labels: + app.kubernetes.io/name: argocd-notifications-controller + spec: + containers: + - args: + - /usr/local/bin/argocd-notifications + env: + - name: ARGOCD_NOTIFICATIONS_CONTROLLER_LOGFORMAT + valueFrom: + configMapKeyRef: + key: notificationscontroller.log.format + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_NOTIFICATIONS_CONTROLLER_LOGLEVEL + valueFrom: + configMapKeyRef: + key: notificationscontroller.log.level + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_LOG_FORMAT_TIMESTAMP + valueFrom: + configMapKeyRef: + key: log.format.timestamp + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_NAMESPACES + valueFrom: + configMapKeyRef: + key: application.namespaces + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_NOTIFICATION_CONTROLLER_SELF_SERVICE_NOTIFICATION_ENABLED + valueFrom: + configMapKeyRef: + key: notificationscontroller.selfservice.enabled + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_NOTIFICATION_CONTROLLER_REPO_SERVER_PLAINTEXT + valueFrom: + configMapKeyRef: + key: notificationscontroller.repo.server.plaintext + name: argocd-cmd-params-cm + optional: true + image: quay.io/argoproj/argocd:v3.3.6 + imagePullPolicy: Always + livenessProbe: + failureThreshold: 3 + periodSeconds: 10 + successThreshold: 1 + tcpSocket: + port: 9001 + timeoutSeconds: 1 + name: argocd-notifications-controller + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /app/config/tls + name: tls-certs + - mountPath: /app/config/reposerver/tls + name: argocd-repo-server-tls + workingDir: /app + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + restartPolicy: Always + schedulerName: default-scheduler + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + serviceAccount: argocd-notifications-controller + serviceAccountName: argocd-notifications-controller + terminationGracePeriodSeconds: 30 + volumes: + - configMap: + defaultMode: 420 + name: argocd-tls-certs-cm + name: tls-certs + - name: argocd-repo-server-tls + secret: + defaultMode: 420 + items: + - key: tls.crt + path: tls.crt + - key: tls.key + path: tls.key + - key: ca.crt + path: ca.crt + optional: true + secretName: argocd-repo-server-tls + diff --git a/argocd/deployment-argocd-redis.yaml b/argocd/deployment-argocd-redis.yaml new file mode 100644 index 0000000..ab66fed --- /dev/null +++ b/argocd/deployment-argocd-redis.yaml @@ -0,0 +1,109 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '1' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"labels":{"app.kubernetes.io/component":"redis","app.kubernetes.io/name":"argocd-redis","app.kubernetes.io/part-of":"argocd"},"name":"argocd-redis","namespace":"argocd"},"spec":{"selector":{"matchLabels":{"app.kubernetes.io/name":"argocd-redis"}},"template":{"metadata":{"labels":{"app.kubernetes.io/name":"argocd-redis"}},"spec":{"affinity":{"podAntiAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"podAffinityTerm":{"labelSelector":{"matchLabels":{"app.kubernetes.io/name":"argocd-redis"}},"topologyKey":"kubernetes.io/hostname"},"weight":100},{"podAffinityTerm":{"labelSelector":{"matchLabels":{"app.kubernetes.io/part-of":"argocd"}},"topologyKey":"kubernetes.io/hostname"},"weight":5}]}},"containers":[{"args":["--save","","--appendonly","no","--requirepass + $(REDIS_PASSWORD)"],"env":[{"name":"REDIS_PASSWORD","valueFrom":{"secretKeyRef":{"key":"auth","name":"argocd-redis"}}}],"image":"public.ecr.aws/docker/library/redis:8.2.3-alpine","imagePullPolicy":"Always","name":"redis","ports":[{"containerPort":6379}],"securityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true}}],"initContainers":[{"command":["argocd","admin","redis-initial-password"],"image":"quay.io/argoproj/argocd:v3.3.6","imagePullPolicy":"IfNotPresent","name":"secret-init","securityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"seccompProfile":{"type":"RuntimeDefault"}}}],"nodeSelector":{"kubernetes.io/os":"linux"},"securityContext":{"runAsNonRoot":true,"runAsUser":999,"seccompProfile":{"type":"RuntimeDefault"}},"serviceAccountName":"argocd-redis"}}}} + + ' + labels: + app.kubernetes.io/component: redis + app.kubernetes.io/name: argocd-redis + app.kubernetes.io/part-of: argocd + name: argocd-redis + namespace: argocd +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/name: argocd-redis + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + labels: + app.kubernetes.io/name: argocd-redis + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchLabels: + app.kubernetes.io/name: argocd-redis + topologyKey: kubernetes.io/hostname + weight: 100 + - podAffinityTerm: + labelSelector: + matchLabels: + app.kubernetes.io/part-of: argocd + topologyKey: kubernetes.io/hostname + weight: 5 + containers: + - args: + - --save + - '' + - --appendonly + - 'no' + - --requirepass $(REDIS_PASSWORD) + env: + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + key: auth + name: argocd-redis + image: public.ecr.aws/docker/library/redis:8.2.3-alpine + imagePullPolicy: Always + name: redis + ports: + - containerPort: 6379 + protocol: TCP + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + dnsPolicy: ClusterFirst + initContainers: + - command: + - argocd + - admin + - redis-initial-password + image: quay.io/argoproj/argocd:v3.3.6 + imagePullPolicy: IfNotPresent + name: secret-init + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + nodeSelector: + kubernetes.io/os: linux + restartPolicy: Always + schedulerName: default-scheduler + securityContext: + runAsNonRoot: true + runAsUser: 999 + seccompProfile: + type: RuntimeDefault + serviceAccount: argocd-redis + serviceAccountName: argocd-redis + terminationGracePeriodSeconds: 30 + diff --git a/argocd/deployment-argocd-repo-server.yaml b/argocd/deployment-argocd-repo-server.yaml new file mode 100644 index 0000000..d075632 --- /dev/null +++ b/argocd/deployment-argocd-repo-server.yaml @@ -0,0 +1,419 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '1' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"labels":{"app.kubernetes.io/component":"repo-server","app.kubernetes.io/name":"argocd-repo-server","app.kubernetes.io/part-of":"argocd"},"name":"argocd-repo-server","namespace":"argocd"},"spec":{"selector":{"matchLabels":{"app.kubernetes.io/name":"argocd-repo-server"}},"template":{"metadata":{"labels":{"app.kubernetes.io/name":"argocd-repo-server"}},"spec":{"affinity":{"podAntiAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"podAffinityTerm":{"labelSelector":{"matchLabels":{"app.kubernetes.io/name":"argocd-repo-server"}},"topologyKey":"kubernetes.io/hostname"},"weight":100},{"podAffinityTerm":{"labelSelector":{"matchLabels":{"app.kubernetes.io/part-of":"argocd"}},"topologyKey":"kubernetes.io/hostname"},"weight":5}]}},"automountServiceAccountToken":false,"containers":[{"args":["/usr/local/bin/argocd-repo-server"],"env":[{"name":"REDIS_PASSWORD","valueFrom":{"secretKeyRef":{"key":"auth","name":"argocd-redis"}}},{"name":"ARGOCD_RECONCILIATION_TIMEOUT","valueFrom":{"configMapKeyRef":{"key":"timeout.reconciliation","name":"argocd-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_LOGFORMAT","valueFrom":{"configMapKeyRef":{"key":"reposerver.log.format","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_LOGLEVEL","valueFrom":{"configMapKeyRef":{"key":"reposerver.log.level","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_LOG_FORMAT_TIMESTAMP","valueFrom":{"configMapKeyRef":{"key":"log.format.timestamp","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_PARALLELISM_LIMIT","valueFrom":{"configMapKeyRef":{"key":"reposerver.parallelism.limit","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_LISTEN_ADDRESS","valueFrom":{"configMapKeyRef":{"key":"reposerver.listen.address","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_LISTEN_METRICS_ADDRESS","valueFrom":{"configMapKeyRef":{"key":"reposerver.metrics.listen.address","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_DISABLE_TLS","valueFrom":{"configMapKeyRef":{"key":"reposerver.disable.tls","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_TLS_MIN_VERSION","valueFrom":{"configMapKeyRef":{"key":"reposerver.tls.minversion","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_TLS_MAX_VERSION","valueFrom":{"configMapKeyRef":{"key":"reposerver.tls.maxversion","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_TLS_CIPHERS","valueFrom":{"configMapKeyRef":{"key":"reposerver.tls.ciphers","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_CACHE_EXPIRATION","valueFrom":{"configMapKeyRef":{"key":"reposerver.repo.cache.expiration","name":"argocd-cmd-params-cm","optional":true}}},{"name":"REDIS_SERVER","valueFrom":{"configMapKeyRef":{"key":"redis.server","name":"argocd-cmd-params-cm","optional":true}}},{"name":"REDIS_COMPRESSION","valueFrom":{"configMapKeyRef":{"key":"redis.compression","name":"argocd-cmd-params-cm","optional":true}}},{"name":"REDISDB","valueFrom":{"configMapKeyRef":{"key":"redis.db","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_DEFAULT_CACHE_EXPIRATION","valueFrom":{"configMapKeyRef":{"key":"reposerver.default.cache.expiration","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_OTLP_ADDRESS","valueFrom":{"configMapKeyRef":{"key":"otlp.address","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_OTLP_INSECURE","valueFrom":{"configMapKeyRef":{"key":"otlp.insecure","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_OTLP_HEADERS","valueFrom":{"configMapKeyRef":{"key":"otlp.headers","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_OTLP_ATTRS","valueFrom":{"configMapKeyRef":{"key":"otlp.attrs","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_MAX_COMBINED_DIRECTORY_MANIFESTS_SIZE","valueFrom":{"configMapKeyRef":{"key":"reposerver.max.combined.directory.manifests.size","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_PLUGIN_TAR_EXCLUSIONS","valueFrom":{"configMapKeyRef":{"key":"reposerver.plugin.tar.exclusions","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_PLUGIN_USE_MANIFEST_GENERATE_PATHS","valueFrom":{"configMapKeyRef":{"key":"reposerver.plugin.use.manifest.generate.paths","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_ALLOW_OUT_OF_BOUNDS_SYMLINKS","valueFrom":{"configMapKeyRef":{"key":"reposerver.allow.oob.symlinks","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_TAR_SIZE","valueFrom":{"configMapKeyRef":{"key":"reposerver.streamed.manifest.max.tar.size","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_EXTRACTED_SIZE","valueFrom":{"configMapKeyRef":{"key":"reposerver.streamed.manifest.max.extracted.size","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_HELM_MANIFEST_MAX_EXTRACTED_SIZE","valueFrom":{"configMapKeyRef":{"key":"reposerver.helm.manifest.max.extracted.size","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_DISABLE_HELM_MANIFEST_MAX_EXTRACTED_SIZE","valueFrom":{"configMapKeyRef":{"key":"reposerver.disable.helm.manifest.max.extracted.size","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_OCI_MANIFEST_MAX_EXTRACTED_SIZE","valueFrom":{"configMapKeyRef":{"key":"reposerver.oci.manifest.max.extracted.size","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_DISABLE_OCI_MANIFEST_MAX_EXTRACTED_SIZE","valueFrom":{"configMapKeyRef":{"key":"reposerver.disable.oci.manifest.max.extracted.size","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_OCI_LAYER_MEDIA_TYPES","valueFrom":{"configMapKeyRef":{"key":"reposerver.oci.layer.media.types","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REVISION_CACHE_LOCK_TIMEOUT","valueFrom":{"configMapKeyRef":{"key":"reposerver.revision.cache.lock.timeout","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_GIT_MODULES_ENABLED","valueFrom":{"configMapKeyRef":{"key":"reposerver.enable.git.submodule","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_GIT_LS_REMOTE_PARALLELISM_LIMIT","valueFrom":{"configMapKeyRef":{"key":"reposerver.git.lsremote.parallelism.limit","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_GIT_REQUEST_TIMEOUT","valueFrom":{"configMapKeyRef":{"key":"reposerver.git.request.timeout","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_ENABLE_BUILTIN_GIT_CONFIG","valueFrom":{"configMapKeyRef":{"key":"reposerver.enable.builtin.git.config","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_GRPC_MAX_SIZE_MB","valueFrom":{"configMapKeyRef":{"key":"reposerver.grpc.max.size","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_REPO_SERVER_INCLUDE_HIDDEN_DIRECTORIES","valueFrom":{"configMapKeyRef":{"key":"reposerver.include.hidden.directories","name":"argocd-cmd-params-cm","optional":true}}},{"name":"HELM_CACHE_HOME","value":"/helm-working-dir"},{"name":"HELM_CONFIG_HOME","value":"/helm-working-dir"},{"name":"HELM_DATA_HOME","value":"/helm-working-dir"}],"image":"quay.io/argoproj/argocd:v3.3.6","imagePullPolicy":"Always","livenessProbe":{"failureThreshold":3,"httpGet":{"path":"/healthz?full=true","port":8084},"initialDelaySeconds":30,"periodSeconds":30,"timeoutSeconds":5},"name":"argocd-repo-server","ports":[{"containerPort":8081},{"containerPort":8084}],"readinessProbe":{"httpGet":{"path":"/healthz","port":8084},"initialDelaySeconds":5,"periodSeconds":10},"securityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"seccompProfile":{"type":"RuntimeDefault"}},"volumeMounts":[{"mountPath":"/app/config/ssh","name":"ssh-known-hosts"},{"mountPath":"/app/config/tls","name":"tls-certs"},{"mountPath":"/app/config/gpg/source","name":"gpg-keys"},{"mountPath":"/app/config/gpg/keys","name":"gpg-keyring"},{"mountPath":"/app/config/reposerver/tls","name":"argocd-repo-server-tls"},{"mountPath":"/tmp","name":"tmp"},{"mountPath":"/helm-working-dir","name":"helm-working-dir"},{"mountPath":"/home/argocd/cmp-server/plugins","name":"plugins"}]}],"initContainers":[{"args":["/bin/cp + --update=none /usr/local/bin/argocd /var/run/argocd/argocd \u0026\u0026 /bin/ln + -s /var/run/argocd/argocd /var/run/argocd/argocd-cmp-server"],"command":["sh","-c"],"image":"quay.io/argoproj/argocd:v3.3.6","name":"copyutil","securityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"seccompProfile":{"type":"RuntimeDefault"}},"volumeMounts":[{"mountPath":"/var/run/argocd","name":"var-files"}]}],"nodeSelector":{"kubernetes.io/os":"linux"},"serviceAccountName":"argocd-repo-server","volumes":[{"configMap":{"name":"argocd-ssh-known-hosts-cm"},"name":"ssh-known-hosts"},{"configMap":{"name":"argocd-tls-certs-cm"},"name":"tls-certs"},{"configMap":{"name":"argocd-gpg-keys-cm"},"name":"gpg-keys"},{"emptyDir":{},"name":"gpg-keyring"},{"emptyDir":{},"name":"tmp"},{"emptyDir":{},"name":"helm-working-dir"},{"name":"argocd-repo-server-tls","secret":{"items":[{"key":"tls.crt","path":"tls.crt"},{"key":"tls.key","path":"tls.key"},{"key":"ca.crt","path":"ca.crt"}],"optional":true,"secretName":"argocd-repo-server-tls"}},{"emptyDir":{},"name":"var-files"},{"emptyDir":{},"name":"plugins"}]}}}} + + ' + labels: + app.kubernetes.io/component: repo-server + app.kubernetes.io/name: argocd-repo-server + app.kubernetes.io/part-of: argocd + name: argocd-repo-server + namespace: argocd +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/name: argocd-repo-server + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + labels: + app.kubernetes.io/name: argocd-repo-server + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchLabels: + app.kubernetes.io/name: argocd-repo-server + topologyKey: kubernetes.io/hostname + weight: 100 + - podAffinityTerm: + labelSelector: + matchLabels: + app.kubernetes.io/part-of: argocd + topologyKey: kubernetes.io/hostname + weight: 5 + automountServiceAccountToken: false + containers: + - args: + - /usr/local/bin/argocd-repo-server + env: + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + key: auth + name: argocd-redis + - name: ARGOCD_RECONCILIATION_TIMEOUT + valueFrom: + configMapKeyRef: + key: timeout.reconciliation + name: argocd-cm + optional: true + - name: ARGOCD_REPO_SERVER_LOGFORMAT + valueFrom: + configMapKeyRef: + key: reposerver.log.format + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_LOGLEVEL + valueFrom: + configMapKeyRef: + key: reposerver.log.level + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_LOG_FORMAT_TIMESTAMP + valueFrom: + configMapKeyRef: + key: log.format.timestamp + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_PARALLELISM_LIMIT + valueFrom: + configMapKeyRef: + key: reposerver.parallelism.limit + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_LISTEN_ADDRESS + valueFrom: + configMapKeyRef: + key: reposerver.listen.address + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_LISTEN_METRICS_ADDRESS + valueFrom: + configMapKeyRef: + key: reposerver.metrics.listen.address + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_DISABLE_TLS + valueFrom: + configMapKeyRef: + key: reposerver.disable.tls + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_TLS_MIN_VERSION + valueFrom: + configMapKeyRef: + key: reposerver.tls.minversion + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_TLS_MAX_VERSION + valueFrom: + configMapKeyRef: + key: reposerver.tls.maxversion + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_TLS_CIPHERS + valueFrom: + configMapKeyRef: + key: reposerver.tls.ciphers + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_CACHE_EXPIRATION + valueFrom: + configMapKeyRef: + key: reposerver.repo.cache.expiration + name: argocd-cmd-params-cm + optional: true + - name: REDIS_SERVER + valueFrom: + configMapKeyRef: + key: redis.server + name: argocd-cmd-params-cm + optional: true + - name: REDIS_COMPRESSION + valueFrom: + configMapKeyRef: + key: redis.compression + name: argocd-cmd-params-cm + optional: true + - name: REDISDB + valueFrom: + configMapKeyRef: + key: redis.db + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_DEFAULT_CACHE_EXPIRATION + valueFrom: + configMapKeyRef: + key: reposerver.default.cache.expiration + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_OTLP_ADDRESS + valueFrom: + configMapKeyRef: + key: otlp.address + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_OTLP_INSECURE + valueFrom: + configMapKeyRef: + key: otlp.insecure + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_OTLP_HEADERS + valueFrom: + configMapKeyRef: + key: otlp.headers + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_OTLP_ATTRS + valueFrom: + configMapKeyRef: + key: otlp.attrs + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_MAX_COMBINED_DIRECTORY_MANIFESTS_SIZE + valueFrom: + configMapKeyRef: + key: reposerver.max.combined.directory.manifests.size + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_PLUGIN_TAR_EXCLUSIONS + valueFrom: + configMapKeyRef: + key: reposerver.plugin.tar.exclusions + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_PLUGIN_USE_MANIFEST_GENERATE_PATHS + valueFrom: + configMapKeyRef: + key: reposerver.plugin.use.manifest.generate.paths + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_ALLOW_OUT_OF_BOUNDS_SYMLINKS + valueFrom: + configMapKeyRef: + key: reposerver.allow.oob.symlinks + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_TAR_SIZE + valueFrom: + configMapKeyRef: + key: reposerver.streamed.manifest.max.tar.size + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_EXTRACTED_SIZE + valueFrom: + configMapKeyRef: + key: reposerver.streamed.manifest.max.extracted.size + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_HELM_MANIFEST_MAX_EXTRACTED_SIZE + valueFrom: + configMapKeyRef: + key: reposerver.helm.manifest.max.extracted.size + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_DISABLE_HELM_MANIFEST_MAX_EXTRACTED_SIZE + valueFrom: + configMapKeyRef: + key: reposerver.disable.helm.manifest.max.extracted.size + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_OCI_MANIFEST_MAX_EXTRACTED_SIZE + valueFrom: + configMapKeyRef: + key: reposerver.oci.manifest.max.extracted.size + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_DISABLE_OCI_MANIFEST_MAX_EXTRACTED_SIZE + valueFrom: + configMapKeyRef: + key: reposerver.disable.oci.manifest.max.extracted.size + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_OCI_LAYER_MEDIA_TYPES + valueFrom: + configMapKeyRef: + key: reposerver.oci.layer.media.types + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT + valueFrom: + configMapKeyRef: + key: reposerver.revision.cache.lock.timeout + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_GIT_MODULES_ENABLED + valueFrom: + configMapKeyRef: + key: reposerver.enable.git.submodule + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_GIT_LS_REMOTE_PARALLELISM_LIMIT + valueFrom: + configMapKeyRef: + key: reposerver.git.lsremote.parallelism.limit + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_GIT_REQUEST_TIMEOUT + valueFrom: + configMapKeyRef: + key: reposerver.git.request.timeout + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_ENABLE_BUILTIN_GIT_CONFIG + valueFrom: + configMapKeyRef: + key: reposerver.enable.builtin.git.config + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_GRPC_MAX_SIZE_MB + valueFrom: + configMapKeyRef: + key: reposerver.grpc.max.size + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_REPO_SERVER_INCLUDE_HIDDEN_DIRECTORIES + valueFrom: + configMapKeyRef: + key: reposerver.include.hidden.directories + name: argocd-cmd-params-cm + optional: true + - name: HELM_CACHE_HOME + value: /helm-working-dir + - name: HELM_CONFIG_HOME + value: /helm-working-dir + - name: HELM_DATA_HOME + value: /helm-working-dir + image: quay.io/argoproj/argocd:v3.3.6 + imagePullPolicy: Always + livenessProbe: + failureThreshold: 3 + httpGet: + path: /healthz?full=true + port: 8084 + scheme: HTTP + initialDelaySeconds: 30 + periodSeconds: 30 + successThreshold: 1 + timeoutSeconds: 5 + name: argocd-repo-server + ports: + - containerPort: 8081 + protocol: TCP + - containerPort: 8084 + protocol: TCP + readinessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: 8084 + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /app/config/ssh + name: ssh-known-hosts + - mountPath: /app/config/tls + name: tls-certs + - mountPath: /app/config/gpg/source + name: gpg-keys + - mountPath: /app/config/gpg/keys + name: gpg-keyring + - mountPath: /app/config/reposerver/tls + name: argocd-repo-server-tls + - mountPath: /tmp + name: tmp + - mountPath: /helm-working-dir + name: helm-working-dir + - mountPath: /home/argocd/cmp-server/plugins + name: plugins + dnsPolicy: ClusterFirst + initContainers: + - args: + - /bin/cp --update=none /usr/local/bin/argocd /var/run/argocd/argocd && /bin/ln + -s /var/run/argocd/argocd /var/run/argocd/argocd-cmp-server + command: + - sh + - -c + image: quay.io/argoproj/argocd:v3.3.6 + imagePullPolicy: IfNotPresent + name: copyutil + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /var/run/argocd + name: var-files + nodeSelector: + kubernetes.io/os: linux + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + serviceAccount: argocd-repo-server + serviceAccountName: argocd-repo-server + terminationGracePeriodSeconds: 30 + volumes: + - configMap: + defaultMode: 420 + name: argocd-ssh-known-hosts-cm + name: ssh-known-hosts + - configMap: + defaultMode: 420 + name: argocd-tls-certs-cm + name: tls-certs + - configMap: + defaultMode: 420 + name: argocd-gpg-keys-cm + name: gpg-keys + - emptyDir: {} + name: gpg-keyring + - emptyDir: {} + name: tmp + - emptyDir: {} + name: helm-working-dir + - name: argocd-repo-server-tls + secret: + defaultMode: 420 + items: + - key: tls.crt + path: tls.crt + - key: tls.key + path: tls.key + - key: ca.crt + path: ca.crt + optional: true + secretName: argocd-repo-server-tls + - emptyDir: {} + name: var-files + - emptyDir: {} + name: plugins + diff --git a/argocd/deployment-argocd-server.yaml b/argocd/deployment-argocd-server.yaml new file mode 100644 index 0000000..9e11ea0 --- /dev/null +++ b/argocd/deployment-argocd-server.yaml @@ -0,0 +1,447 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '2' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"labels":{"app.kubernetes.io/component":"server","app.kubernetes.io/name":"argocd-server","app.kubernetes.io/part-of":"argocd"},"name":"argocd-server","namespace":"argocd"},"spec":{"selector":{"matchLabels":{"app.kubernetes.io/name":"argocd-server"}},"template":{"metadata":{"labels":{"app.kubernetes.io/name":"argocd-server"}},"spec":{"affinity":{"podAntiAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"podAffinityTerm":{"labelSelector":{"matchLabels":{"app.kubernetes.io/name":"argocd-server"}},"topologyKey":"kubernetes.io/hostname"},"weight":100},{"podAffinityTerm":{"labelSelector":{"matchLabels":{"app.kubernetes.io/part-of":"argocd"}},"topologyKey":"kubernetes.io/hostname"},"weight":5}]}},"containers":[{"args":["/usr/local/bin/argocd-server"],"env":[{"name":"REDIS_PASSWORD","valueFrom":{"secretKeyRef":{"key":"auth","name":"argocd-redis"}}},{"name":"ARGOCD_SERVER_INSECURE","valueFrom":{"configMapKeyRef":{"key":"server.insecure","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_BASEHREF","valueFrom":{"configMapKeyRef":{"key":"server.basehref","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_ROOTPATH","valueFrom":{"configMapKeyRef":{"key":"server.rootpath","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_LOGFORMAT","valueFrom":{"configMapKeyRef":{"key":"server.log.format","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_LOG_LEVEL","valueFrom":{"configMapKeyRef":{"key":"server.log.level","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_REPO_SERVER","valueFrom":{"configMapKeyRef":{"key":"repo.server","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_DEX_SERVER","valueFrom":{"configMapKeyRef":{"key":"server.dex.server","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_DISABLE_AUTH","valueFrom":{"configMapKeyRef":{"key":"server.disable.auth","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_ENABLE_GZIP","valueFrom":{"configMapKeyRef":{"key":"server.enable.gzip","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_REPO_SERVER_TIMEOUT_SECONDS","valueFrom":{"configMapKeyRef":{"key":"server.repo.server.timeout.seconds","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_X_FRAME_OPTIONS","valueFrom":{"configMapKeyRef":{"key":"server.x.frame.options","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_CONTENT_SECURITY_POLICY","valueFrom":{"configMapKeyRef":{"key":"server.content.security.policy","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_REPO_SERVER_PLAINTEXT","valueFrom":{"configMapKeyRef":{"key":"server.repo.server.plaintext","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_REPO_SERVER_STRICT_TLS","valueFrom":{"configMapKeyRef":{"key":"server.repo.server.strict.tls","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_DEX_SERVER_PLAINTEXT","valueFrom":{"configMapKeyRef":{"key":"server.dex.server.plaintext","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_DEX_SERVER_STRICT_TLS","valueFrom":{"configMapKeyRef":{"key":"server.dex.server.strict.tls","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_TLS_MIN_VERSION","valueFrom":{"configMapKeyRef":{"key":"server.tls.minversion","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_TLS_MAX_VERSION","valueFrom":{"configMapKeyRef":{"key":"server.tls.maxversion","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_TLS_CIPHERS","valueFrom":{"configMapKeyRef":{"key":"server.tls.ciphers","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_CONNECTION_STATUS_CACHE_EXPIRATION","valueFrom":{"configMapKeyRef":{"key":"server.connection.status.cache.expiration","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_OIDC_CACHE_EXPIRATION","valueFrom":{"configMapKeyRef":{"key":"server.oidc.cache.expiration","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_STATIC_ASSETS","valueFrom":{"configMapKeyRef":{"key":"server.staticassets","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APP_STATE_CACHE_EXPIRATION","valueFrom":{"configMapKeyRef":{"key":"server.app.state.cache.expiration","name":"argocd-cmd-params-cm","optional":true}}},{"name":"REDIS_SERVER","valueFrom":{"configMapKeyRef":{"key":"redis.server","name":"argocd-cmd-params-cm","optional":true}}},{"name":"REDIS_COMPRESSION","valueFrom":{"configMapKeyRef":{"key":"redis.compression","name":"argocd-cmd-params-cm","optional":true}}},{"name":"REDISDB","valueFrom":{"configMapKeyRef":{"key":"redis.db","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_DEFAULT_CACHE_EXPIRATION","valueFrom":{"configMapKeyRef":{"key":"server.default.cache.expiration","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_MAX_COOKIE_NUMBER","valueFrom":{"configMapKeyRef":{"key":"server.http.cookie.maxnumber","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_LISTEN_ADDRESS","valueFrom":{"configMapKeyRef":{"key":"server.listen.address","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_METRICS_LISTEN_ADDRESS","valueFrom":{"configMapKeyRef":{"key":"server.metrics.listen.address","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_OTLP_ADDRESS","valueFrom":{"configMapKeyRef":{"key":"otlp.address","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_OTLP_INSECURE","valueFrom":{"configMapKeyRef":{"key":"otlp.insecure","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_OTLP_HEADERS","valueFrom":{"configMapKeyRef":{"key":"otlp.headers","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_OTLP_ATTRS","valueFrom":{"configMapKeyRef":{"key":"otlp.attrs","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATION_NAMESPACES","valueFrom":{"configMapKeyRef":{"key":"application.namespaces","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_ENABLE_PROXY_EXTENSION","valueFrom":{"configMapKeyRef":{"key":"server.enable.proxy.extension","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_K8SCLIENT_RETRY_MAX","valueFrom":{"configMapKeyRef":{"key":"server.k8sclient.retry.max","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_K8SCLIENT_RETRY_BASE_BACKOFF","valueFrom":{"configMapKeyRef":{"key":"server.k8sclient.retry.base.backoff","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_API_CONTENT_TYPES","valueFrom":{"configMapKeyRef":{"key":"server.api.content.types","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SERVER_WEBHOOK_PARALLELISM_LIMIT","valueFrom":{"configMapKeyRef":{"key":"server.webhook.parallelism.limit","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_NEW_GIT_FILE_GLOBBING","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.enable.new.git.file.globbing","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_SCM_ROOT_CA_PATH","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.scm.root.ca.path","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_ALLOWED_SCM_PROVIDERS","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.allowed.scm.providers","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_SCM_PROVIDERS","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.enable.scm.providers","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_GITHUB_API_METRICS","valueFrom":{"configMapKeyRef":{"key":"applicationsetcontroller.enable.github.api.metrics","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_HYDRATOR_ENABLED","valueFrom":{"configMapKeyRef":{"key":"hydrator.enabled","name":"argocd-cmd-params-cm","optional":true}}},{"name":"ARGOCD_SYNC_WITH_REPLACE_ALLOWED","valueFrom":{"configMapKeyRef":{"key":"server.sync.replace.allowed","name":"argocd-cmd-params-cm","optional":true}}}],"image":"quay.io/argoproj/argocd:v3.3.6","imagePullPolicy":"Always","livenessProbe":{"httpGet":{"path":"/healthz?full=true","port":8080},"initialDelaySeconds":3,"periodSeconds":30,"timeoutSeconds":5},"name":"argocd-server","ports":[{"containerPort":8080},{"containerPort":8083}],"readinessProbe":{"httpGet":{"path":"/healthz","port":8080},"initialDelaySeconds":3,"periodSeconds":30},"securityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"seccompProfile":{"type":"RuntimeDefault"}},"volumeMounts":[{"mountPath":"/app/config/ssh","name":"ssh-known-hosts"},{"mountPath":"/app/config/tls","name":"tls-certs"},{"mountPath":"/app/config/server/tls","name":"argocd-repo-server-tls"},{"mountPath":"/app/config/dex/tls","name":"argocd-dex-server-tls"},{"mountPath":"/home/argocd","name":"plugins-home"},{"mountPath":"/tmp","name":"tmp"},{"mountPath":"/home/argocd/params","name":"argocd-cmd-params-cm"}]}],"nodeSelector":{"kubernetes.io/os":"linux"},"serviceAccountName":"argocd-server","volumes":[{"emptyDir":{},"name":"plugins-home"},{"emptyDir":{},"name":"tmp"},{"configMap":{"name":"argocd-ssh-known-hosts-cm"},"name":"ssh-known-hosts"},{"configMap":{"name":"argocd-tls-certs-cm"},"name":"tls-certs"},{"name":"argocd-repo-server-tls","secret":{"items":[{"key":"tls.crt","path":"tls.crt"},{"key":"tls.key","path":"tls.key"},{"key":"ca.crt","path":"ca.crt"}],"optional":true,"secretName":"argocd-repo-server-tls"}},{"name":"argocd-dex-server-tls","secret":{"items":[{"key":"tls.crt","path":"tls.crt"},{"key":"ca.crt","path":"ca.crt"}],"optional":true,"secretName":"argocd-dex-server-tls"}},{"configMap":{"items":[{"key":"server.profile.enabled","path":"profiler.enabled"}],"name":"argocd-cmd-params-cm","optional":true},"name":"argocd-cmd-params-cm"}]}}}} + + ' + labels: + app.kubernetes.io/component: server + app.kubernetes.io/name: argocd-server + app.kubernetes.io/part-of: argocd + name: argocd-server + namespace: argocd +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/name: argocd-server + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + annotations: + kubectl.kubernetes.io/restartedAt: '2026-04-08T19:44:20Z' + labels: + app.kubernetes.io/name: argocd-server + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchLabels: + app.kubernetes.io/name: argocd-server + topologyKey: kubernetes.io/hostname + weight: 100 + - podAffinityTerm: + labelSelector: + matchLabels: + app.kubernetes.io/part-of: argocd + topologyKey: kubernetes.io/hostname + weight: 5 + containers: + - args: + - /usr/local/bin/argocd-server + env: + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + key: auth + name: argocd-redis + - name: ARGOCD_SERVER_INSECURE + valueFrom: + configMapKeyRef: + key: server.insecure + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_BASEHREF + valueFrom: + configMapKeyRef: + key: server.basehref + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_ROOTPATH + valueFrom: + configMapKeyRef: + key: server.rootpath + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_LOGFORMAT + valueFrom: + configMapKeyRef: + key: server.log.format + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_LOG_LEVEL + valueFrom: + configMapKeyRef: + key: server.log.level + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_REPO_SERVER + valueFrom: + configMapKeyRef: + key: repo.server + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_DEX_SERVER + valueFrom: + configMapKeyRef: + key: server.dex.server + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_DISABLE_AUTH + valueFrom: + configMapKeyRef: + key: server.disable.auth + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_ENABLE_GZIP + valueFrom: + configMapKeyRef: + key: server.enable.gzip + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_REPO_SERVER_TIMEOUT_SECONDS + valueFrom: + configMapKeyRef: + key: server.repo.server.timeout.seconds + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_X_FRAME_OPTIONS + valueFrom: + configMapKeyRef: + key: server.x.frame.options + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_CONTENT_SECURITY_POLICY + valueFrom: + configMapKeyRef: + key: server.content.security.policy + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_REPO_SERVER_PLAINTEXT + valueFrom: + configMapKeyRef: + key: server.repo.server.plaintext + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_REPO_SERVER_STRICT_TLS + valueFrom: + configMapKeyRef: + key: server.repo.server.strict.tls + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_DEX_SERVER_PLAINTEXT + valueFrom: + configMapKeyRef: + key: server.dex.server.plaintext + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_DEX_SERVER_STRICT_TLS + valueFrom: + configMapKeyRef: + key: server.dex.server.strict.tls + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_TLS_MIN_VERSION + valueFrom: + configMapKeyRef: + key: server.tls.minversion + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_TLS_MAX_VERSION + valueFrom: + configMapKeyRef: + key: server.tls.maxversion + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_TLS_CIPHERS + valueFrom: + configMapKeyRef: + key: server.tls.ciphers + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_CONNECTION_STATUS_CACHE_EXPIRATION + valueFrom: + configMapKeyRef: + key: server.connection.status.cache.expiration + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_OIDC_CACHE_EXPIRATION + valueFrom: + configMapKeyRef: + key: server.oidc.cache.expiration + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_STATIC_ASSETS + valueFrom: + configMapKeyRef: + key: server.staticassets + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APP_STATE_CACHE_EXPIRATION + valueFrom: + configMapKeyRef: + key: server.app.state.cache.expiration + name: argocd-cmd-params-cm + optional: true + - name: REDIS_SERVER + valueFrom: + configMapKeyRef: + key: redis.server + name: argocd-cmd-params-cm + optional: true + - name: REDIS_COMPRESSION + valueFrom: + configMapKeyRef: + key: redis.compression + name: argocd-cmd-params-cm + optional: true + - name: REDISDB + valueFrom: + configMapKeyRef: + key: redis.db + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_DEFAULT_CACHE_EXPIRATION + valueFrom: + configMapKeyRef: + key: server.default.cache.expiration + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_MAX_COOKIE_NUMBER + valueFrom: + configMapKeyRef: + key: server.http.cookie.maxnumber + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_LISTEN_ADDRESS + valueFrom: + configMapKeyRef: + key: server.listen.address + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_METRICS_LISTEN_ADDRESS + valueFrom: + configMapKeyRef: + key: server.metrics.listen.address + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_OTLP_ADDRESS + valueFrom: + configMapKeyRef: + key: otlp.address + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_OTLP_INSECURE + valueFrom: + configMapKeyRef: + key: otlp.insecure + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_OTLP_HEADERS + valueFrom: + configMapKeyRef: + key: otlp.headers + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_OTLP_ATTRS + valueFrom: + configMapKeyRef: + key: otlp.attrs + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_NAMESPACES + valueFrom: + configMapKeyRef: + key: application.namespaces + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_ENABLE_PROXY_EXTENSION + valueFrom: + configMapKeyRef: + key: server.enable.proxy.extension + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_K8SCLIENT_RETRY_MAX + valueFrom: + configMapKeyRef: + key: server.k8sclient.retry.max + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_K8SCLIENT_RETRY_BASE_BACKOFF + valueFrom: + configMapKeyRef: + key: server.k8sclient.retry.base.backoff + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_API_CONTENT_TYPES + valueFrom: + configMapKeyRef: + key: server.api.content.types + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SERVER_WEBHOOK_PARALLELISM_LIMIT + valueFrom: + configMapKeyRef: + key: server.webhook.parallelism.limit + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_NEW_GIT_FILE_GLOBBING + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.enable.new.git.file.globbing + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_SCM_ROOT_CA_PATH + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.scm.root.ca.path + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_ALLOWED_SCM_PROVIDERS + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.allowed.scm.providers + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_SCM_PROVIDERS + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.enable.scm.providers + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATIONSET_CONTROLLER_ENABLE_GITHUB_API_METRICS + valueFrom: + configMapKeyRef: + key: applicationsetcontroller.enable.github.api.metrics + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_HYDRATOR_ENABLED + valueFrom: + configMapKeyRef: + key: hydrator.enabled + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SYNC_WITH_REPLACE_ALLOWED + valueFrom: + configMapKeyRef: + key: server.sync.replace.allowed + name: argocd-cmd-params-cm + optional: true + image: quay.io/argoproj/argocd:v3.3.6 + imagePullPolicy: Always + livenessProbe: + failureThreshold: 3 + httpGet: + path: /healthz?full=true + port: 8080 + scheme: HTTP + initialDelaySeconds: 3 + periodSeconds: 30 + successThreshold: 1 + timeoutSeconds: 5 + name: argocd-server + ports: + - containerPort: 8080 + protocol: TCP + - containerPort: 8083 + protocol: TCP + readinessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: 8080 + scheme: HTTP + initialDelaySeconds: 3 + periodSeconds: 30 + successThreshold: 1 + timeoutSeconds: 1 + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /app/config/ssh + name: ssh-known-hosts + - mountPath: /app/config/tls + name: tls-certs + - mountPath: /app/config/server/tls + name: argocd-repo-server-tls + - mountPath: /app/config/dex/tls + name: argocd-dex-server-tls + - mountPath: /home/argocd + name: plugins-home + - mountPath: /tmp + name: tmp + - mountPath: /home/argocd/params + name: argocd-cmd-params-cm + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + serviceAccount: argocd-server + serviceAccountName: argocd-server + terminationGracePeriodSeconds: 30 + volumes: + - emptyDir: {} + name: plugins-home + - emptyDir: {} + name: tmp + - configMap: + defaultMode: 420 + name: argocd-ssh-known-hosts-cm + name: ssh-known-hosts + - configMap: + defaultMode: 420 + name: argocd-tls-certs-cm + name: tls-certs + - name: argocd-repo-server-tls + secret: + defaultMode: 420 + items: + - key: tls.crt + path: tls.crt + - key: tls.key + path: tls.key + - key: ca.crt + path: ca.crt + optional: true + secretName: argocd-repo-server-tls + - name: argocd-dex-server-tls + secret: + defaultMode: 420 + items: + - key: tls.crt + path: tls.crt + - key: ca.crt + path: ca.crt + optional: true + secretName: argocd-dex-server-tls + - configMap: + defaultMode: 420 + items: + - key: server.profile.enabled + path: profiler.enabled + name: argocd-cmd-params-cm + optional: true + name: argocd-cmd-params-cm + diff --git a/argocd/ingress-argocd-server.yaml b/argocd/ingress-argocd-server.yaml new file mode 100644 index 0000000..8632417 --- /dev/null +++ b/argocd/ingress-argocd-server.yaml @@ -0,0 +1,29 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"networking.k8s.io/v1","kind":"Ingress","metadata":{"annotations":{"cert-manager.io/cluster-issuer":"letsencrypt-prod","traefik.ingress.kubernetes.io/router.entrypoints":"websecure"},"name":"argocd-server","namespace":"argocd"},"spec":{"ingressClassName":"traefik","rules":[{"host":"argocd.chemavx.xyz","http":{"paths":[{"backend":{"service":{"name":"argocd-server","port":{"number":80}}},"path":"/","pathType":"Prefix"}]}}],"tls":[{"hosts":["argocd.chemavx.xyz"],"secretName":"argocd-tls"}]}} + + ' + traefik.ingress.kubernetes.io/router.entrypoints: websecure + name: argocd-server + namespace: argocd +spec: + ingressClassName: traefik + rules: + - host: argocd.chemavx.xyz + http: + paths: + - backend: + service: + name: argocd-server + port: + number: 80 + path: / + pathType: Prefix + tls: + - hosts: + - argocd.chemavx.xyz + secretName: argocd-tls + diff --git a/argocd/secret-argocd-notifications-secret.yaml b/argocd/secret-argocd-notifications-secret.yaml new file mode 100644 index 0000000..adb01e6 --- /dev/null +++ b/argocd/secret-argocd-notifications-secret.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +data: {} +kind: Secret +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Secret","metadata":{"annotations":{},"labels":{"app.kubernetes.io/component":"notifications-controller","app.kubernetes.io/name":"argocd-notifications-controller","app.kubernetes.io/part-of":"argocd"},"name":"argocd-notifications-secret","namespace":"argocd"},"type":"Opaque"} + + ' + labels: + app.kubernetes.io/component: notifications-controller + app.kubernetes.io/name: argocd-notifications-controller + app.kubernetes.io/part-of: argocd + name: argocd-notifications-secret + namespace: argocd +type: Opaque + diff --git a/argocd/secret-argocd-redis.yaml b/argocd/secret-argocd-redis.yaml new file mode 100644 index 0000000..bcdaadf --- /dev/null +++ b/argocd/secret-argocd-redis.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +data: + auth: REDACTED +kind: Secret +metadata: + name: argocd-redis + namespace: argocd +type: Opaque + diff --git a/argocd/secret-argocd-secret.yaml b/argocd/secret-argocd-secret.yaml new file mode 100644 index 0000000..1103827 --- /dev/null +++ b/argocd/secret-argocd-secret.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +data: + admin.password: REDACTED + admin.passwordMtime: REDACTED + server.secretkey: REDACTED + tls.crt: REDACTED + tls.key: REDACTED +kind: Secret +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Secret","metadata":{"annotations":{},"labels":{"app.kubernetes.io/name":"argocd-secret","app.kubernetes.io/part-of":"argocd"},"name":"argocd-secret","namespace":"argocd"},"type":"Opaque"} + + ' + labels: + app.kubernetes.io/name: argocd-secret + app.kubernetes.io/part-of: argocd + name: argocd-secret + namespace: argocd +type: Opaque + diff --git a/argocd/secret-argocd-tls.yaml b/argocd/secret-argocd-tls.yaml new file mode 100644 index 0000000..a7934d4 --- /dev/null +++ b/argocd/secret-argocd-tls.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + tls.crt: REDACTED + tls.key: REDACTED +kind: Secret +metadata: + annotations: + cert-manager.io/alt-names: argocd.chemavx.xyz + cert-manager.io/certificate-name: argocd-tls + cert-manager.io/common-name: argocd.chemavx.xyz + cert-manager.io/ip-sans: '' + cert-manager.io/issuer-group: cert-manager.io + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod + cert-manager.io/uri-sans: '' + labels: + controller.cert-manager.io/fao: 'true' + name: argocd-tls + namespace: argocd +type: kubernetes.io/tls + diff --git a/argocd/secret-gitea-k8s-manifests.yaml b/argocd/secret-gitea-k8s-manifests.yaml new file mode 100644 index 0000000..92a53b4 --- /dev/null +++ b/argocd/secret-gitea-k8s-manifests.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +data: + insecure: REDACTED + password: REDACTED + type: REDACTED + url: REDACTED + username: REDACTED +kind: Secret +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Secret","metadata":{"annotations":{},"labels":{"argocd.argoproj.io/secret-type":"repository"},"name":"gitea-k8s-manifests","namespace":"argocd"},"stringData":{"insecure":"true","password":"GitAdmin2026x","type":"git","url":"https://git.chemavx.xyz/chemavx/k8s-manifests","username":"chemavx"},"type":"Opaque"} + + ' + labels: + argocd.argoproj.io/secret-type: repository + name: gitea-k8s-manifests + namespace: argocd +type: Opaque + diff --git a/argocd/service-argocd-applicationset-controller.yaml b/argocd/service-argocd-applicationset-controller.yaml new file mode 100644 index 0000000..c7c06f1 --- /dev/null +++ b/argocd/service-argocd-applicationset-controller.yaml @@ -0,0 +1,31 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: applicationset-controller + app.kubernetes.io/name: argocd-applicationset-controller + app.kubernetes.io/part-of: argocd + name: argocd-applicationset-controller + namespace: argocd +spec: + clusterIP: 10.43.111.150 + clusterIPs: + - 10.43.111.150 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: webhook + port: 7000 + protocol: TCP + targetPort: webhook + - name: metrics + port: 8080 + protocol: TCP + targetPort: metrics + selector: + app.kubernetes.io/name: argocd-applicationset-controller + sessionAffinity: None + type: ClusterIP + diff --git a/argocd/service-argocd-dex-server.yaml b/argocd/service-argocd-dex-server.yaml new file mode 100644 index 0000000..63726c1 --- /dev/null +++ b/argocd/service-argocd-dex-server.yaml @@ -0,0 +1,36 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: dex-server + app.kubernetes.io/name: argocd-dex-server + app.kubernetes.io/part-of: argocd + name: argocd-dex-server + namespace: argocd +spec: + clusterIP: 10.43.125.110 + clusterIPs: + - 10.43.125.110 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - appProtocol: TCP + name: http + port: 5556 + protocol: TCP + targetPort: 5556 + - name: grpc + port: 5557 + protocol: TCP + targetPort: 5557 + - name: metrics + port: 5558 + protocol: TCP + targetPort: 5558 + selector: + app.kubernetes.io/name: argocd-dex-server + sessionAffinity: None + type: ClusterIP + diff --git a/argocd/service-argocd-metrics.yaml b/argocd/service-argocd-metrics.yaml new file mode 100644 index 0000000..77e4e52 --- /dev/null +++ b/argocd/service-argocd-metrics.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: metrics + app.kubernetes.io/name: argocd-metrics + app.kubernetes.io/part-of: argocd + name: argocd-metrics + namespace: argocd +spec: + clusterIP: 10.43.114.194 + clusterIPs: + - 10.43.114.194 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: metrics + port: 8082 + protocol: TCP + targetPort: 8082 + selector: + app.kubernetes.io/name: argocd-application-controller + sessionAffinity: None + type: ClusterIP + diff --git a/argocd/service-argocd-notifications-controller-metrics.yaml b/argocd/service-argocd-notifications-controller-metrics.yaml new file mode 100644 index 0000000..ac37655 --- /dev/null +++ b/argocd/service-argocd-notifications-controller-metrics.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: notifications-controller + app.kubernetes.io/name: argocd-notifications-controller-metrics + app.kubernetes.io/part-of: argocd + name: argocd-notifications-controller-metrics + namespace: argocd +spec: + clusterIP: 10.43.30.178 + clusterIPs: + - 10.43.30.178 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: metrics + port: 9001 + protocol: TCP + targetPort: 9001 + selector: + app.kubernetes.io/name: argocd-notifications-controller + sessionAffinity: None + type: ClusterIP + diff --git a/argocd/service-argocd-redis.yaml b/argocd/service-argocd-redis.yaml new file mode 100644 index 0000000..15c2171 --- /dev/null +++ b/argocd/service-argocd-redis.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: redis + app.kubernetes.io/name: argocd-redis + app.kubernetes.io/part-of: argocd + name: argocd-redis + namespace: argocd +spec: + clusterIP: 10.43.63.154 + clusterIPs: + - 10.43.63.154 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: tcp-redis + port: 6379 + protocol: TCP + targetPort: 6379 + selector: + app.kubernetes.io/name: argocd-redis + sessionAffinity: None + type: ClusterIP + diff --git a/argocd/service-argocd-repo-server.yaml b/argocd/service-argocd-repo-server.yaml new file mode 100644 index 0000000..ffb084f --- /dev/null +++ b/argocd/service-argocd-repo-server.yaml @@ -0,0 +1,31 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: repo-server + app.kubernetes.io/name: argocd-repo-server + app.kubernetes.io/part-of: argocd + name: argocd-repo-server + namespace: argocd +spec: + clusterIP: 10.43.103.206 + clusterIPs: + - 10.43.103.206 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: server + port: 8081 + protocol: TCP + targetPort: 8081 + - name: metrics + port: 8084 + protocol: TCP + targetPort: 8084 + selector: + app.kubernetes.io/name: argocd-repo-server + sessionAffinity: None + type: ClusterIP + diff --git a/argocd/service-argocd-server-metrics.yaml b/argocd/service-argocd-server-metrics.yaml new file mode 100644 index 0000000..6b51163 --- /dev/null +++ b/argocd/service-argocd-server-metrics.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: server + app.kubernetes.io/name: argocd-server-metrics + app.kubernetes.io/part-of: argocd + name: argocd-server-metrics + namespace: argocd +spec: + clusterIP: 10.43.162.46 + clusterIPs: + - 10.43.162.46 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: metrics + port: 8083 + protocol: TCP + targetPort: 8083 + selector: + app.kubernetes.io/name: argocd-server + sessionAffinity: None + type: ClusterIP + diff --git a/argocd/service-argocd-server.yaml b/argocd/service-argocd-server.yaml new file mode 100644 index 0000000..24a8be0 --- /dev/null +++ b/argocd/service-argocd-server.yaml @@ -0,0 +1,31 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: server + app.kubernetes.io/name: argocd-server + app.kubernetes.io/part-of: argocd + name: argocd-server + namespace: argocd +spec: + clusterIP: 10.43.16.182 + clusterIPs: + - 10.43.16.182 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 8080 + - name: https + port: 443 + protocol: TCP + targetPort: 8080 + selector: + app.kubernetes.io/name: argocd-server + sessionAffinity: None + type: ClusterIP + diff --git a/argocd/serviceaccount-argocd-application-controller.yaml b/argocd/serviceaccount-argocd-application-controller.yaml new file mode 100644 index 0000000..9f40046 --- /dev/null +++ b/argocd/serviceaccount-argocd-application-controller.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: application-controller + app.kubernetes.io/name: argocd-application-controller + app.kubernetes.io/part-of: argocd + name: argocd-application-controller + namespace: argocd + diff --git a/argocd/serviceaccount-argocd-applicationset-controller.yaml b/argocd/serviceaccount-argocd-applicationset-controller.yaml new file mode 100644 index 0000000..ef8b103 --- /dev/null +++ b/argocd/serviceaccount-argocd-applicationset-controller.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: applicationset-controller + app.kubernetes.io/name: argocd-applicationset-controller + app.kubernetes.io/part-of: argocd + name: argocd-applicationset-controller + namespace: argocd + diff --git a/argocd/serviceaccount-argocd-dex-server.yaml b/argocd/serviceaccount-argocd-dex-server.yaml new file mode 100644 index 0000000..0f80dfa --- /dev/null +++ b/argocd/serviceaccount-argocd-dex-server.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: dex-server + app.kubernetes.io/name: argocd-dex-server + app.kubernetes.io/part-of: argocd + name: argocd-dex-server + namespace: argocd + diff --git a/argocd/serviceaccount-argocd-notifications-controller.yaml b/argocd/serviceaccount-argocd-notifications-controller.yaml new file mode 100644 index 0000000..cf55063 --- /dev/null +++ b/argocd/serviceaccount-argocd-notifications-controller.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: notifications-controller + app.kubernetes.io/name: argocd-notifications-controller + app.kubernetes.io/part-of: argocd + name: argocd-notifications-controller + namespace: argocd + diff --git a/argocd/serviceaccount-argocd-redis.yaml b/argocd/serviceaccount-argocd-redis.yaml new file mode 100644 index 0000000..94137ef --- /dev/null +++ b/argocd/serviceaccount-argocd-redis.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: redis + app.kubernetes.io/name: argocd-redis + app.kubernetes.io/part-of: argocd + name: argocd-redis + namespace: argocd + diff --git a/argocd/serviceaccount-argocd-repo-server.yaml b/argocd/serviceaccount-argocd-repo-server.yaml new file mode 100644 index 0000000..6985de1 --- /dev/null +++ b/argocd/serviceaccount-argocd-repo-server.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: repo-server + app.kubernetes.io/name: argocd-repo-server + app.kubernetes.io/part-of: argocd + name: argocd-repo-server + namespace: argocd + diff --git a/argocd/serviceaccount-argocd-server.yaml b/argocd/serviceaccount-argocd-server.yaml new file mode 100644 index 0000000..8c61215 --- /dev/null +++ b/argocd/serviceaccount-argocd-server.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: server + app.kubernetes.io/name: argocd-server + app.kubernetes.io/part-of: argocd + name: argocd-server + namespace: argocd + diff --git a/argocd/statefulset-argocd-application-controller.yaml b/argocd/statefulset-argocd-application-controller.yaml new file mode 100644 index 0000000..5d3419f --- /dev/null +++ b/argocd/statefulset-argocd-application-controller.yaml @@ -0,0 +1,382 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + app.kubernetes.io/component: application-controller + app.kubernetes.io/name: argocd-application-controller + app.kubernetes.io/part-of: argocd + name: argocd-application-controller + namespace: argocd +spec: + persistentVolumeClaimRetentionPolicy: + whenDeleted: Retain + whenScaled: Retain + podManagementPolicy: OrderedReady + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/name: argocd-application-controller + serviceName: argocd-application-controller + template: + metadata: + labels: + app.kubernetes.io/name: argocd-application-controller + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchLabels: + app.kubernetes.io/name: argocd-application-controller + topologyKey: kubernetes.io/hostname + weight: 100 + - podAffinityTerm: + labelSelector: + matchLabels: + app.kubernetes.io/part-of: argocd + topologyKey: kubernetes.io/hostname + weight: 5 + containers: + - args: + - /usr/local/bin/argocd-application-controller + env: + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + key: auth + name: argocd-redis + - name: ARGOCD_CONTROLLER_REPLICAS + value: '1' + - name: ARGOCD_RECONCILIATION_TIMEOUT + valueFrom: + configMapKeyRef: + key: timeout.reconciliation + name: argocd-cm + optional: true + - name: ARGOCD_HARD_RECONCILIATION_TIMEOUT + valueFrom: + configMapKeyRef: + key: timeout.hard.reconciliation + name: argocd-cm + optional: true + - name: ARGOCD_RECONCILIATION_JITTER + valueFrom: + configMapKeyRef: + key: timeout.reconciliation.jitter + name: argocd-cm + optional: true + - name: ARGOCD_REPO_ERROR_GRACE_PERIOD_SECONDS + valueFrom: + configMapKeyRef: + key: controller.repo.error.grace.period.seconds + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_REPO_SERVER + valueFrom: + configMapKeyRef: + key: repo.server + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_REPO_SERVER_TIMEOUT_SECONDS + valueFrom: + configMapKeyRef: + key: controller.repo.server.timeout.seconds + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_STATUS_PROCESSORS + valueFrom: + configMapKeyRef: + key: controller.status.processors + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_OPERATION_PROCESSORS + valueFrom: + configMapKeyRef: + key: controller.operation.processors + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_LOGFORMAT + valueFrom: + configMapKeyRef: + key: controller.log.format + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_LOGLEVEL + valueFrom: + configMapKeyRef: + key: controller.log.level + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_LOG_FORMAT_TIMESTAMP + valueFrom: + configMapKeyRef: + key: log.format.timestamp + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_METRICS_CACHE_EXPIRATION + valueFrom: + configMapKeyRef: + key: controller.metrics.cache.expiration + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_TIMEOUT_SECONDS + valueFrom: + configMapKeyRef: + key: controller.self.heal.timeout.seconds + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_TIMEOUT_SECONDS + valueFrom: + configMapKeyRef: + key: controller.self.heal.backoff.timeout.seconds + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_FACTOR + valueFrom: + configMapKeyRef: + key: controller.self.heal.backoff.factor + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_CAP_SECONDS + valueFrom: + configMapKeyRef: + key: controller.self.heal.backoff.cap.seconds + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_SELF_HEAL_BACKOFF_COOLDOWN_SECONDS + valueFrom: + configMapKeyRef: + key: controller.self.heal.backoff.cooldown.seconds + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_SYNC_WAVE_DELAY + valueFrom: + configMapKeyRef: + key: controller.sync.wave.delay.seconds + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_SYNC_TIMEOUT + valueFrom: + configMapKeyRef: + key: controller.sync.timeout.seconds + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_REPO_SERVER_PLAINTEXT + valueFrom: + configMapKeyRef: + key: controller.repo.server.plaintext + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_REPO_SERVER_STRICT_TLS + valueFrom: + configMapKeyRef: + key: controller.repo.server.strict.tls + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_PERSIST_RESOURCE_HEALTH + valueFrom: + configMapKeyRef: + key: controller.resource.health.persist + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APP_STATE_CACHE_EXPIRATION + valueFrom: + configMapKeyRef: + key: controller.app.state.cache.expiration + name: argocd-cmd-params-cm + optional: true + - name: REDIS_SERVER + valueFrom: + configMapKeyRef: + key: redis.server + name: argocd-cmd-params-cm + optional: true + - name: REDIS_COMPRESSION + valueFrom: + configMapKeyRef: + key: redis.compression + name: argocd-cmd-params-cm + optional: true + - name: REDISDB + valueFrom: + configMapKeyRef: + key: redis.db + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_DEFAULT_CACHE_EXPIRATION + valueFrom: + configMapKeyRef: + key: controller.default.cache.expiration + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_OTLP_ADDRESS + valueFrom: + configMapKeyRef: + key: otlp.address + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_OTLP_INSECURE + valueFrom: + configMapKeyRef: + key: otlp.insecure + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_OTLP_HEADERS + valueFrom: + configMapKeyRef: + key: otlp.headers + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_OTLP_ATTRS + valueFrom: + configMapKeyRef: + key: otlp.attrs + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_NAMESPACES + valueFrom: + configMapKeyRef: + key: application.namespaces + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_CONTROLLER_SHARDING_ALGORITHM + valueFrom: + configMapKeyRef: + key: controller.sharding.algorithm + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_KUBECTL_PARALLELISM_LIMIT + valueFrom: + configMapKeyRef: + key: controller.kubectl.parallelism.limit + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_K8SCLIENT_RETRY_MAX + valueFrom: + configMapKeyRef: + key: controller.k8sclient.retry.max + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_K8SCLIENT_RETRY_BASE_BACKOFF + valueFrom: + configMapKeyRef: + key: controller.k8sclient.retry.base.backoff + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_SERVER_SIDE_DIFF + valueFrom: + configMapKeyRef: + key: controller.diff.server.side + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_IGNORE_NORMALIZER_JQ_TIMEOUT + valueFrom: + configMapKeyRef: + key: controller.ignore.normalizer.jq.timeout + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_HYDRATOR_ENABLED + valueFrom: + configMapKeyRef: + key: hydrator.enabled + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_CLUSTER_CACHE_BATCH_EVENTS_PROCESSING + valueFrom: + configMapKeyRef: + key: controller.cluster.cache.batch.events.processing + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_CLUSTER_CACHE_EVENTS_PROCESSING_INTERVAL + valueFrom: + configMapKeyRef: + key: controller.cluster.cache.events.processing.interval + name: argocd-cmd-params-cm + optional: true + - name: ARGOCD_APPLICATION_CONTROLLER_COMMIT_SERVER + valueFrom: + configMapKeyRef: + key: commit.server + name: argocd-cmd-params-cm + optional: true + - name: KUBECACHEDIR + value: /tmp/kubecache + image: quay.io/argoproj/argocd:v3.3.6 + imagePullPolicy: Always + name: argocd-application-controller + ports: + - containerPort: 8082 + protocol: TCP + readinessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: 8082 + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /app/config/controller/tls + name: argocd-repo-server-tls + - mountPath: /home/argocd + name: argocd-home + - mountPath: /home/argocd/params + name: argocd-cmd-params-cm + - mountPath: /tmp + name: argocd-application-controller-tmp + workingDir: /home/argocd + dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/os: linux + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + serviceAccount: argocd-application-controller + serviceAccountName: argocd-application-controller + terminationGracePeriodSeconds: 30 + volumes: + - emptyDir: {} + name: argocd-home + - emptyDir: {} + name: argocd-application-controller-tmp + - name: argocd-repo-server-tls + secret: + defaultMode: 420 + items: + - key: tls.crt + path: tls.crt + - key: tls.key + path: tls.key + - key: ca.crt + path: ca.crt + optional: true + secretName: argocd-repo-server-tls + - configMap: + defaultMode: 420 + items: + - key: controller.profile.enabled + path: profiler.enabled + name: argocd-cmd-params-cm + optional: true + name: argocd-cmd-params-cm + updateStrategy: + rollingUpdate: + partition: 0 + type: RollingUpdate + diff --git a/authentik/deployment-authentik-redis.yaml b/authentik/deployment-authentik-redis.yaml new file mode 100644 index 0000000..2a6d86a --- /dev/null +++ b/authentik/deployment-authentik-redis.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '1' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"name":"authentik-redis","namespace":"authentik"},"spec":{"replicas":1,"selector":{"matchLabels":{"app":"authentik-redis"}},"template":{"metadata":{"labels":{"app":"authentik-redis"}},"spec":{"containers":[{"command":["redis-server","--save","60","1","--loglevel","warning"],"image":"redis:alpine","name":"redis","ports":[{"containerPort":6379}],"resources":{"limits":{"cpu":"100m","memory":"128Mi"},"requests":{"cpu":"25m","memory":"64Mi"}}}]}}}} + + ' + name: authentik-redis + namespace: authentik +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: authentik-redis + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + labels: + app: authentik-redis + spec: + containers: + - command: + - redis-server + - --save + - '60' + - '1' + - --loglevel + - warning + image: redis:alpine + imagePullPolicy: IfNotPresent + name: redis + ports: + - containerPort: 6379 + protocol: TCP + resources: + limits: + cpu: 100m + memory: 128Mi + requests: + cpu: 25m + memory: 64Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + diff --git a/authentik/deployment-authentik-server.yaml b/authentik/deployment-authentik-server.yaml new file mode 100644 index 0000000..9a0a709 --- /dev/null +++ b/authentik/deployment-authentik-server.yaml @@ -0,0 +1,95 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '15' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"name":"authentik-server","namespace":"authentik"},"spec":{"replicas":1,"selector":{"matchLabels":{"app":"authentik-server"}},"template":{"metadata":{"labels":{"app":"authentik-server"}},"spec":{"containers":[{"args":["server"],"env":[{"name":"AUTHENTIK_REDIS__HOST","value":"authentik-redis"},{"name":"AUTHENTIK_POSTGRESQL__HOST","value":"postgresql"},{"name":"AUTHENTIK_POSTGRESQL__USER","value":"authentik"},{"name":"AUTHENTIK_POSTGRESQL__NAME","value":"authentik"},{"name":"AUTHENTIK_POSTGRESQL__PASSWORD","valueFrom":{"secretKeyRef":{"key":"POSTGRES_PASSWORD","name":"authentik-secrets"}}},{"name":"AUTHENTIK_SECRET_KEY","valueFrom":{"secretKeyRef":{"key":"AUTHENTIK_SECRET_KEY","name":"authentik-secrets"}}},{"name":"AUTHENTIK_ERROR_REPORTING__ENABLED","value":"false"}],"image":"ghcr.io/goauthentik/server:2024.12.3","name":"server","ports":[{"containerPort":9000},{"containerPort":9443}],"resources":{"limits":{"cpu":"500m","memory":"1Gi"},"requests":{"cpu":"100m","memory":"512Mi"}},"volumeMounts":[{"mountPath":"/media","name":"media"}]}],"initContainers":[{"command":["sh","-c","until + pg_isready -h postgresql -U authentik; do echo waiting; sleep 2; done"],"image":"postgres:17-alpine","name":"wait-postgres"}],"volumes":[{"name":"media","persistentVolumeClaim":{"claimName":"authentik-media-pvc"}}]}}}} + + ' + name: authentik-server + namespace: authentik +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: authentik-server + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + annotations: + kubectl.kubernetes.io/restartedAt: '2026-04-09T11:10:52Z' + labels: + app: authentik-server + spec: + containers: + - args: + - server + env: + - name: AUTHENTIK_REDIS__HOST + value: authentik-redis + - name: AUTHENTIK_POSTGRESQL__HOST + value: postgresql + - name: AUTHENTIK_POSTGRESQL__USER + value: authentik + - name: AUTHENTIK_POSTGRESQL__NAME + value: authentik + - name: AUTHENTIK_POSTGRESQL__PASSWORD + valueFrom: + secretKeyRef: + key: POSTGRES_PASSWORD + name: authentik-secrets + - name: AUTHENTIK_SECRET_KEY + valueFrom: + secretKeyRef: + key: AUTHENTIK_SECRET_KEY + name: authentik-secrets + - name: AUTHENTIK_ERROR_REPORTING__ENABLED + value: 'false' + image: ghcr.io/goauthentik/server:2024.12.3 + imagePullPolicy: IfNotPresent + name: server + ports: + - containerPort: 9000 + protocol: TCP + - containerPort: 9443 + protocol: TCP + resources: + limits: + cpu: 500m + memory: 1Gi + requests: + cpu: 100m + memory: 512Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /media + name: media + dnsPolicy: ClusterFirst + initContainers: + - command: + - sh + - -c + - until pg_isready -h postgresql -U authentik; do echo waiting; sleep 2; done + image: postgres:17-alpine + imagePullPolicy: IfNotPresent + name: wait-postgres + resources: {} + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + volumes: + - name: media + persistentVolumeClaim: + claimName: authentik-media-pvc + diff --git a/authentik/deployment-authentik-worker.yaml b/authentik/deployment-authentik-worker.yaml new file mode 100644 index 0000000..c989632 --- /dev/null +++ b/authentik/deployment-authentik-worker.yaml @@ -0,0 +1,90 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '6' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"name":"authentik-worker","namespace":"authentik"},"spec":{"replicas":1,"selector":{"matchLabels":{"app":"authentik-worker"}},"template":{"metadata":{"labels":{"app":"authentik-worker"}},"spec":{"containers":[{"args":["worker"],"env":[{"name":"AUTHENTIK_REDIS__HOST","value":"authentik-redis"},{"name":"AUTHENTIK_POSTGRESQL__HOST","value":"postgresql"},{"name":"AUTHENTIK_POSTGRESQL__USER","value":"authentik"},{"name":"AUTHENTIK_POSTGRESQL__NAME","value":"authentik"},{"name":"AUTHENTIK_POSTGRESQL__PASSWORD","valueFrom":{"secretKeyRef":{"key":"POSTGRES_PASSWORD","name":"authentik-secrets"}}},{"name":"AUTHENTIK_SECRET_KEY","valueFrom":{"secretKeyRef":{"key":"AUTHENTIK_SECRET_KEY","name":"authentik-secrets"}}},{"name":"AUTHENTIK_ERROR_REPORTING__ENABLED","value":"false"}],"image":"ghcr.io/goauthentik/server:2024.12.3","name":"worker","resources":{"limits":{"cpu":"300m","memory":"512Mi"},"requests":{"cpu":"50m","memory":"256Mi"}},"volumeMounts":[{"mountPath":"/media","name":"media"}]}],"initContainers":[{"command":["sh","-c","until + pg_isready -h postgresql -U authentik; do echo waiting; sleep 2; done"],"image":"postgres:17-alpine","name":"wait-postgres"}],"volumes":[{"name":"media","persistentVolumeClaim":{"claimName":"authentik-media-pvc"}}]}}}} + + ' + name: authentik-worker + namespace: authentik +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: authentik-worker + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + annotations: + kubectl.kubernetes.io/restartedAt: '2026-04-08T20:03:35Z' + labels: + app: authentik-worker + spec: + containers: + - args: + - worker + env: + - name: AUTHENTIK_REDIS__HOST + value: authentik-redis + - name: AUTHENTIK_POSTGRESQL__HOST + value: postgresql + - name: AUTHENTIK_POSTGRESQL__USER + value: authentik + - name: AUTHENTIK_POSTGRESQL__NAME + value: authentik + - name: AUTHENTIK_POSTGRESQL__PASSWORD + valueFrom: + secretKeyRef: + key: POSTGRES_PASSWORD + name: authentik-secrets + - name: AUTHENTIK_SECRET_KEY + valueFrom: + secretKeyRef: + key: AUTHENTIK_SECRET_KEY + name: authentik-secrets + - name: AUTHENTIK_ERROR_REPORTING__ENABLED + value: 'false' + image: ghcr.io/goauthentik/server:2024.12.3 + imagePullPolicy: IfNotPresent + name: worker + resources: + limits: + cpu: 300m + memory: 512Mi + requests: + cpu: 50m + memory: 256Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /media + name: media + dnsPolicy: ClusterFirst + initContainers: + - command: + - sh + - -c + - until pg_isready -h postgresql -U authentik; do echo waiting; sleep 2; done + image: postgres:17-alpine + imagePullPolicy: IfNotPresent + name: wait-postgres + resources: {} + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + volumes: + - name: media + persistentVolumeClaim: + claimName: authentik-media-pvc + diff --git a/authentik/ingress-authentik.yaml b/authentik/ingress-authentik.yaml new file mode 100644 index 0000000..63a5d11 --- /dev/null +++ b/authentik/ingress-authentik.yaml @@ -0,0 +1,30 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"networking.k8s.io/v1","kind":"Ingress","metadata":{"annotations":{"cert-manager.io/cluster-issuer":"letsencrypt-prod","traefik.ingress.kubernetes.io/router.entrypoints":"websecure"},"name":"authentik","namespace":"authentik"},"spec":{"ingressClassName":"traefik","rules":[{"host":"authentik.chemavx.xyz","http":{"paths":[{"backend":{"service":{"name":"authentik-server","port":{"number":9000}}},"path":"/","pathType":"Prefix"}]}}],"tls":[{"hosts":["authentik.chemavx.xyz"],"secretName":"authentik-tls"}]}} + + ' + traefik.ingress.kubernetes.io/reload-timestamp: '1775738348' + traefik.ingress.kubernetes.io/router.entrypoints: websecure + name: authentik + namespace: authentik +spec: + ingressClassName: traefik + rules: + - host: auth.chemavx.xyz + http: + paths: + - backend: + service: + name: authentik-server + port: + number: 9000 + path: / + pathType: Prefix + tls: + - hosts: + - auth.chemavx.xyz + secretName: auth-tls + diff --git a/authentik/pvc-authentik-media-pvc.yaml b/authentik/pvc-authentik-media-pvc.yaml new file mode 100644 index 0000000..0dfd761 --- /dev/null +++ b/authentik/pvc-authentik-media-pvc.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"PersistentVolumeClaim","metadata":{"annotations":{},"name":"authentik-media-pvc","namespace":"authentik"},"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"5Gi"}},"storageClassName":"local-path"}} + + ' + pv.kubernetes.io/bind-completed: 'yes' + pv.kubernetes.io/bound-by-controller: 'yes' + volume.beta.kubernetes.io/storage-provisioner: rancher.io/local-path + volume.kubernetes.io/selected-node: chemavx-k8 + volume.kubernetes.io/storage-provisioner: rancher.io/local-path + name: authentik-media-pvc + namespace: authentik +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + storageClassName: local-path + volumeMode: Filesystem + volumeName: pvc-2485eef0-a8bb-40c6-8013-86134841d095 + diff --git a/authentik/pvc-authentik-pg-pvc.yaml b/authentik/pvc-authentik-pg-pvc.yaml new file mode 100644 index 0000000..9734561 --- /dev/null +++ b/authentik/pvc-authentik-pg-pvc.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"PersistentVolumeClaim","metadata":{"annotations":{},"name":"authentik-pg-pvc","namespace":"authentik"},"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"10Gi"}},"storageClassName":"local-path","volumeName":"authentik-pg-pv"}} + + ' + pv.kubernetes.io/bind-completed: 'yes' + name: authentik-pg-pvc + namespace: authentik +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: local-path + volumeMode: Filesystem + volumeName: authentik-pg-pv + diff --git a/authentik/secret-auth-tls.yaml b/authentik/secret-auth-tls.yaml new file mode 100644 index 0000000..1d089c7 --- /dev/null +++ b/authentik/secret-auth-tls.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + tls.crt: REDACTED + tls.key: REDACTED +kind: Secret +metadata: + annotations: + cert-manager.io/alt-names: auth.chemavx.xyz + cert-manager.io/certificate-name: auth-tls + cert-manager.io/common-name: auth.chemavx.xyz + cert-manager.io/ip-sans: '' + cert-manager.io/issuer-group: cert-manager.io + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod + cert-manager.io/uri-sans: '' + labels: + controller.cert-manager.io/fao: 'true' + name: auth-tls + namespace: authentik +type: kubernetes.io/tls + diff --git a/authentik/secret-authentik-secrets.yaml b/authentik/secret-authentik-secrets.yaml new file mode 100644 index 0000000..8330427 --- /dev/null +++ b/authentik/secret-authentik-secrets.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +data: + AUTHENTIK_POSTGRESQL__PASSWORD: REDACTED + AUTHENTIK_SECRET_KEY: REDACTED + POSTGRES_PASSWORD: REDACTED +kind: Secret +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Secret","metadata":{"annotations":{},"name":"authentik-secrets","namespace":"authentik"},"stringData":{"AUTHENTIK_POSTGRESQL__PASSWORD":"authentik","AUTHENTIK_SECRET_KEY":"PLACEHOLDER_WILL_UPDATE","POSTGRES_PASSWORD":"authentik"},"type":"Opaque"} + + ' + name: authentik-secrets + namespace: authentik +type: Opaque + diff --git a/authentik/secret-authentik-tls.yaml b/authentik/secret-authentik-tls.yaml new file mode 100644 index 0000000..896b36f --- /dev/null +++ b/authentik/secret-authentik-tls.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + tls.crt: REDACTED + tls.key: REDACTED +kind: Secret +metadata: + annotations: + cert-manager.io/alt-names: authentik.chemavx.xyz + cert-manager.io/certificate-name: authentik-tls + cert-manager.io/common-name: authentik.chemavx.xyz + cert-manager.io/ip-sans: '' + cert-manager.io/issuer-group: cert-manager.io + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod + cert-manager.io/uri-sans: '' + labels: + controller.cert-manager.io/fao: 'true' + name: authentik-tls + namespace: authentik +type: kubernetes.io/tls + diff --git a/authentik/service-authentik-redis.yaml b/authentik/service-authentik-redis.yaml new file mode 100644 index 0000000..bb00915 --- /dev/null +++ b/authentik/service-authentik-redis.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: authentik-redis + namespace: authentik +spec: + clusterIP: 10.43.156.120 + clusterIPs: + - 10.43.156.120 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - port: 6379 + protocol: TCP + targetPort: 6379 + selector: + app: authentik-redis + sessionAffinity: None + type: ClusterIP + diff --git a/authentik/service-authentik-server.yaml b/authentik/service-authentik-server.yaml new file mode 100644 index 0000000..e5e8f91 --- /dev/null +++ b/authentik/service-authentik-server.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: Service +metadata: + name: authentik-server + namespace: authentik +spec: + clusterIP: 10.43.135.224 + clusterIPs: + - 10.43.135.224 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: http + port: 9000 + protocol: TCP + targetPort: 9000 + - name: https + port: 9443 + protocol: TCP + targetPort: 9443 + selector: + app: authentik-server + sessionAffinity: None + type: ClusterIP + diff --git a/authentik/service-postgresql.yaml b/authentik/service-postgresql.yaml new file mode 100644 index 0000000..1285626 --- /dev/null +++ b/authentik/service-postgresql.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: postgresql + namespace: authentik +spec: + clusterIP: 10.43.75.133 + clusterIPs: + - 10.43.75.133 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - port: 5432 + protocol: TCP + targetPort: 5432 + selector: + app: postgresql + sessionAffinity: None + type: ClusterIP + diff --git a/authentik/statefulset-postgresql.yaml b/authentik/statefulset-postgresql.yaml new file mode 100644 index 0000000..2c86a88 --- /dev/null +++ b/authentik/statefulset-postgresql.yaml @@ -0,0 +1,74 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: postgresql + namespace: authentik +spec: + persistentVolumeClaimRetentionPolicy: + whenDeleted: Retain + whenScaled: Retain + podManagementPolicy: OrderedReady + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: postgresql + serviceName: postgresql + template: + metadata: + labels: + app: postgresql + spec: + containers: + - env: + - name: POSTGRES_USER + value: authentik + - name: POSTGRES_DB + value: authentik + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + key: POSTGRES_PASSWORD + name: authentik-secrets + - name: PGDATA + value: /var/lib/postgresql/data + image: postgres:17-alpine + imagePullPolicy: IfNotPresent + name: postgresql + ports: + - containerPort: 5432 + protocol: TCP + readinessProbe: + exec: + command: + - pg_isready + - -U + - authentik + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 1 + resources: {} + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /var/lib/postgresql/data + name: pg-data + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: + fsGroup: 999 + runAsGroup: 999 + runAsUser: 999 + terminationGracePeriodSeconds: 30 + volumes: + - name: pg-data + persistentVolumeClaim: + claimName: authentik-pg-pvc + updateStrategy: + rollingUpdate: + partition: 0 + type: RollingUpdate + diff --git a/backup-system/serviceaccount-backup-sa.yaml b/backup-system/serviceaccount-backup-sa.yaml new file mode 100644 index 0000000..948f39e --- /dev/null +++ b/backup-system/serviceaccount-backup-sa.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: backup-sa + namespace: backup-system + diff --git a/cloudflare-ddns/deployment-cloudflare-ddns.yaml b/cloudflare-ddns/deployment-cloudflare-ddns.yaml new file mode 100644 index 0000000..b7d496b --- /dev/null +++ b/cloudflare-ddns/deployment-cloudflare-ddns.yaml @@ -0,0 +1,70 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '1' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"name":"cloudflare-ddns","namespace":"cloudflare-ddns"},"spec":{"replicas":1,"selector":{"matchLabels":{"app":"cloudflare-ddns"}},"template":{"metadata":{"labels":{"app":"cloudflare-ddns"}},"spec":{"containers":[{"env":[{"name":"CF_API_TOKEN","valueFrom":{"secretKeyRef":{"key":"CF_API_TOKEN","name":"cloudflare-ddns-secret"}}},{"name":"DOMAINS","value":"chemavx.xyz,*.chemavx.xyz"},{"name":"PROXIED","value":"false"},{"name":"UPDATE_CRON","value":"@every + 5m"},{"name":"IP4_PROVIDER","value":"cloudflare.trace"}],"image":"favonia/cloudflare-ddns:latest","name":"cloudflare-ddns","resources":{"limits":{"cpu":"50m","memory":"64Mi"},"requests":{"cpu":"10m","memory":"32Mi"}},"securityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsGroup":1000,"runAsNonRoot":true,"runAsUser":1000}}]}}}} + + ' + name: cloudflare-ddns + namespace: cloudflare-ddns +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: cloudflare-ddns + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + labels: + app: cloudflare-ddns + spec: + containers: + - env: + - name: CF_API_TOKEN + valueFrom: + secretKeyRef: + key: CF_API_TOKEN + name: cloudflare-ddns-secret + - name: DOMAINS + value: chemavx.xyz,*.chemavx.xyz + - name: PROXIED + value: 'false' + - name: UPDATE_CRON + value: '@every 5m' + - name: IP4_PROVIDER + value: cloudflare.trace + image: favonia/cloudflare-ddns:latest + imagePullPolicy: Always + name: cloudflare-ddns + resources: + limits: + cpu: 50m + memory: 64Mi + requests: + cpu: 10m + memory: 32Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsGroup: 1000 + runAsNonRoot: true + runAsUser: 1000 + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + diff --git a/cloudflare-ddns/secret-cloudflare-ddns-secret.yaml b/cloudflare-ddns/secret-cloudflare-ddns-secret.yaml new file mode 100644 index 0000000..3cd2ff3 --- /dev/null +++ b/cloudflare-ddns/secret-cloudflare-ddns-secret.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +data: + CF_API_TOKEN: REDACTED +kind: Secret +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Secret","metadata":{"annotations":{},"name":"cloudflare-ddns-secret","namespace":"cloudflare-ddns"},"stringData":{"CF_API_TOKEN":"SMDp7QpoGiM_5JVeq4IXCGCv5oKAWQK5MfsBt3n_"},"type":"Opaque"} + + ' + name: cloudflare-ddns-secret + namespace: cloudflare-ddns +type: Opaque + diff --git a/cluster-wide/clusterissuers.yaml b/cluster-wide/clusterissuers.yaml new file mode 100644 index 0000000..d4ac01a --- /dev/null +++ b/cluster-wide/clusterissuers.yaml @@ -0,0 +1,48 @@ +apiVersion: v1 +items: +- apiVersion: cert-manager.io/v1 + kind: ClusterIssuer + metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"cert-manager.io/v1","kind":"ClusterIssuer","metadata":{"annotations":{},"name":"letsencrypt-prod"},"spec":{"acme":{"email":"admin@chemavx.xyz","privateKeySecretRef":{"name":"letsencrypt-prod-key"},"server":"https://acme-v02.api.letsencrypt.org/directory","solvers":[{"dns01":{"cloudflare":{"apiTokenSecretRef":{"key":"api-token","name":"cloudflare-api-token"}}}}]}}} + + ' + generation: 1 + name: letsencrypt-prod + spec: + acme: + email: admin@chemavx.xyz + privateKeySecretRef: + name: letsencrypt-prod-key + server: https://acme-v02.api.letsencrypt.org/directory + solvers: + - dns01: + cloudflare: + apiTokenSecretRef: + key: api-token + name: cloudflare-api-token +- apiVersion: cert-manager.io/v1 + kind: ClusterIssuer + metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"cert-manager.io/v1","kind":"ClusterIssuer","metadata":{"annotations":{},"name":"letsencrypt-staging"},"spec":{"acme":{"email":"admin@chemavx.xyz","privateKeySecretRef":{"name":"letsencrypt-staging-key"},"server":"https://acme-staging-v02.api.letsencrypt.org/directory","solvers":[{"dns01":{"cloudflare":{"apiTokenSecretRef":{"key":"api-token","name":"cloudflare-api-token"}}}}]}}} + + ' + generation: 1 + name: letsencrypt-staging + spec: + acme: + email: admin@chemavx.xyz + privateKeySecretRef: + name: letsencrypt-staging-key + server: https://acme-staging-v02.api.letsencrypt.org/directory + solvers: + - dns01: + cloudflare: + apiTokenSecretRef: + key: api-token + name: cloudflare-api-token +kind: List +metadata: + resourceVersion: '' + diff --git a/cluster-wide/namespaces.yaml b/cluster-wide/namespaces.yaml new file mode 100644 index 0000000..9c24501 --- /dev/null +++ b/cluster-wide/namespaces.yaml @@ -0,0 +1,178 @@ +apiVersion: v1 +kind: Namespace +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Namespace","metadata":{"annotations":{},"name":"argocd"},"spec":{},"status":{}} + + ' + labels: + kubernetes.io/metadata.name: argocd + name: argocd +spec: + finalizers: + - kubernetes + +apiVersion: v1 +kind: Namespace +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Namespace","metadata":{"annotations":{},"name":"authentik"},"spec":{},"status":{}} + + ' + labels: + kubernetes.io/metadata.name: authentik + name: authentik +spec: + finalizers: + - kubernetes + +apiVersion: v1 +kind: Namespace +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Namespace","metadata":{"annotations":{},"name":"backup-system"},"spec":{},"status":{}} + + ' + labels: + kubernetes.io/metadata.name: backup-system + name: backup-system +spec: + finalizers: + - kubernetes + +apiVersion: v1 +kind: Namespace +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Namespace","metadata":{"annotations":{},"name":"cert-manager"},"spec":{},"status":{}} + + ' + labels: + kubernetes.io/metadata.name: cert-manager + name: cert-manager +spec: + finalizers: + - kubernetes + +apiVersion: v1 +kind: Namespace +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Namespace","metadata":{"annotations":{},"name":"cloudflare-ddns"},"spec":{},"status":{}} + + ' + labels: + kubernetes.io/metadata.name: cloudflare-ddns + name: cloudflare-ddns +spec: + finalizers: + - kubernetes + +apiVersion: v1 +kind: Namespace +metadata: + labels: + kubernetes.io/metadata.name: default + name: default +spec: + finalizers: + - kubernetes + +apiVersion: v1 +kind: Namespace +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Namespace","metadata":{"annotations":{},"name":"gitea"},"spec":{},"status":{}} + + ' + labels: + kubernetes.io/metadata.name: gitea + name: gitea +spec: + finalizers: + - kubernetes + +apiVersion: v1 +kind: Namespace +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Namespace","metadata":{"annotations":{},"name":"homarr"},"spec":{},"status":{}} + + ' + labels: + kubernetes.io/metadata.name: homarr + name: homarr +spec: + finalizers: + - kubernetes + +apiVersion: v1 +kind: Namespace +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Namespace","metadata":{"annotations":{},"name":"monitoring"},"spec":{},"status":{}} + + ' + labels: + kubernetes.io/metadata.name: monitoring + name: monitoring +spec: + finalizers: + - kubernetes + +apiVersion: v1 +kind: Namespace +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Namespace","metadata":{"annotations":{},"name":"n8n"},"spec":{},"status":{}} + + ' + labels: + kubernetes.io/metadata.name: n8n + name: n8n +spec: + finalizers: + - kubernetes + +apiVersion: v1 +kind: Namespace +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Namespace","metadata":{"annotations":{},"name":"openclaw"},"spec":{},"status":{}} + + ' + labels: + kubernetes.io/metadata.name: openclaw + name: openclaw +spec: + finalizers: + - kubernetes + +apiVersion: v1 +kind: Namespace +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Namespace","metadata":{"annotations":{},"name":"polymarket-bot"},"spec":{},"status":{}} + + ' + labels: + kubernetes.io/metadata.name: polymarket-bot + name: polymarket-bot +spec: + finalizers: + - kubernetes + +apiVersion: v1 +kind: Namespace +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Namespace","metadata":{"annotations":{},"name":"vaultwarden"},"spec":{},"status":{}} + + ' + labels: + kubernetes.io/metadata.name: vaultwarden + name: vaultwarden +spec: + finalizers: + - kubernetes + diff --git a/default/secret-wildcard-chemavx-xyz-tls.yaml b/default/secret-wildcard-chemavx-xyz-tls.yaml new file mode 100644 index 0000000..5cc0b3a --- /dev/null +++ b/default/secret-wildcard-chemavx-xyz-tls.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + tls.crt: REDACTED + tls.key: REDACTED +kind: Secret +metadata: + annotations: + cert-manager.io/alt-names: '*.chemavx.xyz,chemavx.xyz' + cert-manager.io/certificate-name: wildcard-chemavx-xyz + cert-manager.io/common-name: chemavx.xyz + cert-manager.io/ip-sans: '' + cert-manager.io/issuer-group: '' + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod + cert-manager.io/uri-sans: '' + labels: + controller.cert-manager.io/fao: 'true' + name: wildcard-chemavx-xyz-tls + namespace: default +type: kubernetes.io/tls + diff --git a/default/service-kubernetes.yaml b/default/service-kubernetes.yaml new file mode 100644 index 0000000..bba67a3 --- /dev/null +++ b/default/service-kubernetes.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + component: apiserver + provider: kubernetes + name: kubernetes + namespace: default +spec: + clusterIP: 10.43.0.1 + clusterIPs: + - 10.43.0.1 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: https + port: 443 + protocol: TCP + targetPort: 6443 + sessionAffinity: None + type: ClusterIP + diff --git a/gitea/ingress-gitea.yaml b/gitea/ingress-gitea.yaml new file mode 100644 index 0000000..e9cd17e --- /dev/null +++ b/gitea/ingress-gitea.yaml @@ -0,0 +1,29 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"networking.k8s.io/v1","kind":"Ingress","metadata":{"annotations":{"cert-manager.io/cluster-issuer":"letsencrypt-prod","traefik.ingress.kubernetes.io/router.entrypoints":"websecure"},"name":"gitea","namespace":"gitea"},"spec":{"ingressClassName":"traefik","rules":[{"host":"gitea.chemavx.xyz","http":{"paths":[{"backend":{"service":{"name":"gitea","port":{"number":3000}}},"path":"/","pathType":"Prefix"}]}}],"tls":[{"hosts":["gitea.chemavx.xyz"],"secretName":"gitea-tls"}]}} + + ' + traefik.ingress.kubernetes.io/router.entrypoints: websecure + name: gitea + namespace: gitea +spec: + ingressClassName: traefik + rules: + - host: git.chemavx.xyz + http: + paths: + - backend: + service: + name: gitea + port: + number: 3000 + path: / + pathType: Prefix + tls: + - hosts: + - git.chemavx.xyz + secretName: gitea-tls + diff --git a/gitea/pvc-gitea-data-pvc.yaml b/gitea/pvc-gitea-data-pvc.yaml new file mode 100644 index 0000000..f3e95ed --- /dev/null +++ b/gitea/pvc-gitea-data-pvc.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"PersistentVolumeClaim","metadata":{"annotations":{},"name":"gitea-data-pvc","namespace":"gitea"},"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"20Gi"}},"storageClassName":"local-path"}} + + ' + pv.kubernetes.io/bind-completed: 'yes' + pv.kubernetes.io/bound-by-controller: 'yes' + volume.beta.kubernetes.io/storage-provisioner: rancher.io/local-path + volume.kubernetes.io/selected-node: chemavx-k8 + volume.kubernetes.io/storage-provisioner: rancher.io/local-path + name: gitea-data-pvc + namespace: gitea +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi + storageClassName: local-path + volumeMode: Filesystem + volumeName: pvc-be542822-009d-4b9f-9313-aa4ed051834f + diff --git a/gitea/secret-gitea-tls.yaml b/gitea/secret-gitea-tls.yaml new file mode 100644 index 0000000..36f8712 --- /dev/null +++ b/gitea/secret-gitea-tls.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + tls.crt: REDACTED + tls.key: REDACTED +kind: Secret +metadata: + annotations: + cert-manager.io/alt-names: git.chemavx.xyz + cert-manager.io/certificate-name: gitea-tls + cert-manager.io/common-name: git.chemavx.xyz + cert-manager.io/ip-sans: '' + cert-manager.io/issuer-group: cert-manager.io + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod + cert-manager.io/uri-sans: '' + labels: + controller.cert-manager.io/fao: 'true' + name: gitea-tls + namespace: gitea +type: kubernetes.io/tls + diff --git a/gitea/service-gitea.yaml b/gitea/service-gitea.yaml new file mode 100644 index 0000000..a5822c9 --- /dev/null +++ b/gitea/service-gitea.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: Service +metadata: + name: gitea + namespace: gitea +spec: + clusterIP: 10.43.118.65 + clusterIPs: + - 10.43.118.65 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: http + port: 3000 + protocol: TCP + targetPort: 3000 + - name: ssh + port: 22 + protocol: TCP + targetPort: 22 + selector: + app: gitea + sessionAffinity: None + type: ClusterIP + diff --git a/gitea/statefulset-gitea.yaml b/gitea/statefulset-gitea.yaml new file mode 100644 index 0000000..7a7220d --- /dev/null +++ b/gitea/statefulset-gitea.yaml @@ -0,0 +1,91 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: gitea + namespace: gitea +spec: + persistentVolumeClaimRetentionPolicy: + whenDeleted: Retain + whenScaled: Retain + podManagementPolicy: OrderedReady + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: gitea + serviceName: gitea + template: + metadata: + labels: + app: gitea + spec: + containers: + - env: + - name: GITEA__database__DB_TYPE + value: sqlite3 + - name: GITEA__database__PATH + value: /data/gitea/gitea.db + - name: GITEA__server__DOMAIN + value: gitea.chemavx.xyz + - name: GITEA__server__ROOT_URL + value: https://gitea.chemavx.xyz + - name: GITEA__server__SSH_PORT + value: '22' + - name: GITEA__server__SSH_DOMAIN + value: gitea.chemavx.xyz + - name: GITEA__security__INSTALL_LOCK + value: 'true' + - name: GITEA__service__DISABLE_REGISTRATION + value: 'false' + - name: USER_UID + value: '1000' + - name: USER_GID + value: '1000' + image: gitea/gitea:latest + imagePullPolicy: Always + name: gitea + ports: + - containerPort: 3000 + protocol: TCP + - containerPort: 22 + protocol: TCP + resources: + limits: + cpu: 300m + memory: 512Mi + requests: + cpu: 50m + memory: 128Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /data + name: data + dnsPolicy: ClusterFirst + initContainers: + - command: + - sh + - -c + - mkdir -p /data/gitea/conf && chown -R 1000:1000 /data + image: busybox + imagePullPolicy: Always + name: init-dirs + resources: {} + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /data + name: data + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + volumes: + - name: data + persistentVolumeClaim: + claimName: gitea-data-pvc + updateStrategy: + rollingUpdate: + partition: 0 + type: RollingUpdate + diff --git a/homarr/deployment-homarr.yaml b/homarr/deployment-homarr.yaml new file mode 100644 index 0000000..5cb9419 --- /dev/null +++ b/homarr/deployment-homarr.yaml @@ -0,0 +1,80 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '5' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"name":"homarr","namespace":"homarr"},"spec":{"replicas":1,"selector":{"matchLabels":{"app":"homarr"}},"template":{"metadata":{"labels":{"app":"homarr"}},"spec":{"containers":[{"env":[{"name":"SECRET_ENCRYPTION_KEY","value":"a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"},{"name":"AUTH_PROVIDERS","value":"credentials"},{"name":"BASE_URL","value":"https://homarr.chemavx.xyz"}],"image":"ghcr.io/homarr-labs/homarr:latest","name":"homarr","ports":[{"containerPort":7575}],"resources":{"limits":{"cpu":"300m","memory":"512Mi"},"requests":{"cpu":"50m","memory":"128Mi"}},"volumeMounts":[{"mountPath":"/appdata/db","name":"db"},{"mountPath":"/appdata/redis","name":"redis"}]}],"volumes":[{"name":"db","persistentVolumeClaim":{"claimName":"homarr-db-pvc"}},{"name":"redis","persistentVolumeClaim":{"claimName":"homarr-redis-pvc"}}]}}}} + + ' + name: homarr + namespace: homarr +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: homarr + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + annotations: + kubectl.kubernetes.io/restartedAt: '2026-04-09T20:09:03Z' + labels: + app: homarr + spec: + containers: + - env: + - name: SECRET_ENCRYPTION_KEY + value: 35337a2b6c59790e46be301ceeb67a29ef78272b962f8d86103f1c2caeed36b4 + - name: AUTH_PROVIDERS + value: oidc + - name: BASE_URL + value: https://home.chemavx.xyz + - name: AUTH_OIDC_ISSUER + value: https://auth.chemavx.xyz/application/o/homarr-oidc/ + - name: AUTH_OIDC_CLIENT_ID + value: THDiU3sUF562PXa9LcXCAiKSQG596ZfqymcWJIks + - name: AUTH_OIDC_CLIENT_SECRET + value: 9EQzeq0bzwlHbfDffCk7r8KYi2fjaxY3FTcjt0wkmd62PykN6bn35RzpowtnMqWvsNOIOzo1PZZfHYlWIrWsKLz8COAZwFwOPeyxwu0oSE3fbreJKeGbq7seAXkFoGE3 + - name: AUTH_OIDC_CLIENT_NAME + value: Authentik + - name: AUTH_OIDC_AUTO_LOGIN + value: 'true' + image: ghcr.io/homarr-labs/homarr:latest + imagePullPolicy: Always + name: homarr + ports: + - containerPort: 7575 + protocol: TCP + resources: + limits: + cpu: 500m + memory: 1Gi + requests: + cpu: 50m + memory: 256Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /appdata/db + name: db + - mountPath: /appdata/redis + name: redis + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + volumes: + - name: db + persistentVolumeClaim: + claimName: homarr-db-pvc + - name: redis + persistentVolumeClaim: + claimName: homarr-redis-pvc + diff --git a/homarr/ingress-homarr.yaml b/homarr/ingress-homarr.yaml new file mode 100644 index 0000000..77f9fed --- /dev/null +++ b/homarr/ingress-homarr.yaml @@ -0,0 +1,29 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"networking.k8s.io/v1","kind":"Ingress","metadata":{"annotations":{"cert-manager.io/cluster-issuer":"letsencrypt-prod","traefik.ingress.kubernetes.io/router.entrypoints":"websecure"},"name":"homarr","namespace":"homarr"},"spec":{"ingressClassName":"traefik","rules":[{"host":"homarr.chemavx.xyz","http":{"paths":[{"backend":{"service":{"name":"homarr","port":{"number":7575}}},"path":"/","pathType":"Prefix"}]}}],"tls":[{"hosts":["homarr.chemavx.xyz"],"secretName":"homarr-tls"}]}} + + ' + traefik.ingress.kubernetes.io/router.entrypoints: websecure + name: homarr + namespace: homarr +spec: + ingressClassName: traefik + rules: + - host: home.chemavx.xyz + http: + paths: + - backend: + service: + name: homarr + port: + number: 7575 + path: / + pathType: Prefix + tls: + - hosts: + - home.chemavx.xyz + secretName: home-tls + diff --git a/homarr/pvc-homarr-db-pvc.yaml b/homarr/pvc-homarr-db-pvc.yaml new file mode 100644 index 0000000..4af59e3 --- /dev/null +++ b/homarr/pvc-homarr-db-pvc.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"PersistentVolumeClaim","metadata":{"annotations":{},"name":"homarr-db-pvc","namespace":"homarr"},"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"1Gi"}},"storageClassName":"local-path"}} + + ' + pv.kubernetes.io/bind-completed: 'yes' + pv.kubernetes.io/bound-by-controller: 'yes' + volume.beta.kubernetes.io/storage-provisioner: rancher.io/local-path + volume.kubernetes.io/selected-node: chemavx-k8 + volume.kubernetes.io/storage-provisioner: rancher.io/local-path + name: homarr-db-pvc + namespace: homarr +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + storageClassName: local-path + volumeMode: Filesystem + volumeName: pvc-d69e810c-9549-40c1-b113-27a88b79ea0a + diff --git a/homarr/pvc-homarr-redis-pvc.yaml b/homarr/pvc-homarr-redis-pvc.yaml new file mode 100644 index 0000000..75626b2 --- /dev/null +++ b/homarr/pvc-homarr-redis-pvc.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"PersistentVolumeClaim","metadata":{"annotations":{},"name":"homarr-redis-pvc","namespace":"homarr"},"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"500Mi"}},"storageClassName":"local-path"}} + + ' + pv.kubernetes.io/bind-completed: 'yes' + pv.kubernetes.io/bound-by-controller: 'yes' + volume.beta.kubernetes.io/storage-provisioner: rancher.io/local-path + volume.kubernetes.io/selected-node: chemavx-k8 + volume.kubernetes.io/storage-provisioner: rancher.io/local-path + name: homarr-redis-pvc + namespace: homarr +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Mi + storageClassName: local-path + volumeMode: Filesystem + volumeName: pvc-f51d5a13-ff66-4444-b96d-bd56b91a8b58 + diff --git a/homarr/secret-homarr-tls.yaml b/homarr/secret-homarr-tls.yaml new file mode 100644 index 0000000..a086316 --- /dev/null +++ b/homarr/secret-homarr-tls.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + tls.crt: REDACTED + tls.key: REDACTED +kind: Secret +metadata: + annotations: + cert-manager.io/alt-names: homarr.chemavx.xyz + cert-manager.io/certificate-name: homarr-tls + cert-manager.io/common-name: homarr.chemavx.xyz + cert-manager.io/ip-sans: '' + cert-manager.io/issuer-group: cert-manager.io + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod + cert-manager.io/uri-sans: '' + labels: + controller.cert-manager.io/fao: 'true' + name: homarr-tls + namespace: homarr +type: kubernetes.io/tls + diff --git a/homarr/secret-home-tls.yaml b/homarr/secret-home-tls.yaml new file mode 100644 index 0000000..83acd35 --- /dev/null +++ b/homarr/secret-home-tls.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + tls.crt: REDACTED + tls.key: REDACTED +kind: Secret +metadata: + annotations: + cert-manager.io/alt-names: home.chemavx.xyz + cert-manager.io/certificate-name: home-tls + cert-manager.io/common-name: home.chemavx.xyz + cert-manager.io/ip-sans: '' + cert-manager.io/issuer-group: cert-manager.io + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod + cert-manager.io/uri-sans: '' + labels: + controller.cert-manager.io/fao: 'true' + name: home-tls + namespace: homarr +type: kubernetes.io/tls + diff --git a/homarr/service-homarr.yaml b/homarr/service-homarr.yaml new file mode 100644 index 0000000..058c874 --- /dev/null +++ b/homarr/service-homarr.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: homarr + namespace: homarr +spec: + clusterIP: 10.43.127.238 + clusterIPs: + - 10.43.127.238 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - port: 7575 + protocol: TCP + targetPort: 7575 + selector: + app: homarr + sessionAffinity: None + type: ClusterIP + diff --git a/monitoring/configmap-kube-prometheus-stack-apiserver.yaml b/monitoring/configmap-kube-prometheus-stack-apiserver.yaml new file mode 100644 index 0000000..f93644c --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-apiserver.yaml @@ -0,0 +1,86 @@ +apiVersion: v1 +data: + apiserver.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"description":"The SLO (service level objective) and other metrics + displayed on this dashboard are for informational purposes only.","gridPos":{"h":2,"w":24,"x":0,"y":0},"id":1,"options":{"content":"The + SLO (service level objective) and other metrics displayed on this dashboard are + for informational purposes only."},"pluginVersion":"v11.4.0","title":"Notice","type":"text"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"description":"How many percent of requests (both read and write) in + 30 days have been answered successfully and fast enough?","fieldConfig":{"defaults":{"decimals":3,"unit":"percentunit"}},"gridPos":{"h":7,"w":8,"x":0,"y":2},"id":2,"interval":"1m","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"apiserver_request:availability30d{verb=\"all\", + cluster=\"$cluster\"}"}],"title":"Availability (30d) > 99.000%","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"description":"How much error budget is left looking at our 0.990% + availability guarantees?","fieldConfig":{"defaults":{"custom":{"fillOpacity":100},"decimals":3,"unit":"percentunit"}},"gridPos":{"h":7,"w":16,"x":8,"y":2},"id":3,"interval":"1m","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"100 + * (apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"} - 0.990000)","legendFormat":"errorbudget"}],"title":"ErrorBudget + (30d) > 99.000%","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"description":"How many percent of read requests (LIST,GET) in 30 days + have been answered successfully and fast enough?","fieldConfig":{"defaults":{"decimals":3,"unit":"percentunit"}},"gridPos":{"h":7,"w":6,"x":0,"y":9},"id":4,"interval":"1m","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"apiserver_request:availability30d{verb=\"read\", + cluster=\"$cluster\"}"}],"title":"Read Availability (30d)","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"description":"How many read requests (LIST,GET) per second do the + apiservers get by code?","fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"stacking":{"mode":"normal"}},"unit":"reqps"},"overrides":[{"matcher":{"id":"byRegexp","options":"/2../i"},"properties":[{"id":"color","value":"#56A64B"}]},{"matcher":{"id":"byRegexp","options":"/3../i"},"properties":[{"id":"color","value":"#F2CC0C"}]},{"matcher":{"id":"byRegexp","options":"/4../i"},"properties":[{"id":"color","value":"#3274D9"}]},{"matcher":{"id":"byRegexp","options":"/5../i"},"properties":[{"id":"color","value":"#E02F44"}]}]},"gridPos":{"h":7,"w":6,"x":6,"y":9},"id":5,"interval":"1m","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})","legendFormat":"{{ + code }}"}],"title":"Read SLI - Requests","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"description":"How many percent of read requests (LIST,GET) per second + are returned with errors (5xx)?","fieldConfig":{"defaults":{"min":0,"unit":"percentunit"}},"gridPos":{"h":7,"w":6,"x":12,"y":9},"id":6,"interval":"1m","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\",code=~\"5..\", + cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\", + cluster=\"$cluster\"})","legendFormat":"{{ resource }}"}],"title":"Read SLI - + Errors","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"description":"How many seconds is the 99th percentile for reading (LIST|GET) + a given resource?","fieldConfig":{"defaults":{"unit":"s"}},"gridPos":{"h":7,"w":6,"x":18,"y":9},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{verb=\"read\", + cluster=\"$cluster\"}","legendFormat":"{{ resource }}"}],"title":"Read SLI - Duration","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"description":"How many percent of write requests (POST|PUT|PATCH|DELETE) + in 30 days have been answered successfully and fast enough?","fieldConfig":{"defaults":{"decimals":3,"unit":"percentunit"}},"gridPos":{"h":7,"w":6,"x":0,"y":16},"id":8,"interval":"1m","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"apiserver_request:availability30d{verb=\"write\", + cluster=\"$cluster\"}"}],"title":"Write Availability (30d)","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"description":"How many write requests (POST|PUT|PATCH|DELETE) per + second do the apiservers get by code?","fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"stacking":{"mode":"normal"}},"unit":"reqps"},"overrides":[{"matcher":{"id":"byRegexp","options":"/2../i"},"properties":[{"id":"color","value":"#56A64B"}]},{"matcher":{"id":"byRegexp","options":"/3../i"},"properties":[{"id":"color","value":"#F2CC0C"}]},{"matcher":{"id":"byRegexp","options":"/4../i"},"properties":[{"id":"color","value":"#3274D9"}]},{"matcher":{"id":"byRegexp","options":"/5../i"},"properties":[{"id":"color","value":"#E02F44"}]}]},"gridPos":{"h":7,"w":6,"x":6,"y":16},"id":9,"interval":"1m","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})","legendFormat":"{{ + code }}"}],"title":"Write SLI - Requests","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"description":"How many percent of write requests (POST|PUT|PATCH|DELETE) + per second are returned with errors (5xx)?","fieldConfig":{"defaults":{"min":0,"unit":"percentunit"}},"gridPos":{"h":7,"w":6,"x":12,"y":16},"id":10,"interval":"1m","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\",code=~\"5..\", + cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\", + cluster=\"$cluster\"})","legendFormat":"{{ resource }}"}],"title":"Write SLI - + Errors","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"description":"How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) + a given resource?","fieldConfig":{"defaults":{"unit":"s"}},"gridPos":{"h":7,"w":6,"x":18,"y":16},"id":11,"interval":"1m","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{verb=\"write\", + cluster=\"$cluster\"}","legendFormat":"{{ resource }}"}],"title":"Write SLI - + Duration","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"min":0,"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":23},"id":12,"interval":"1m","options":{"legend":{"asTable":true,"placement":"right","showLegend":false},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(workqueue_adds_total{job=\"apiserver\", + instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, + name)","legendFormat":"{{instance}} {{name}}"}],"title":"Work Queue Add Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":23},"id":13,"interval":"1m","options":{"legend":{"asTable":true,"placement":"right","showLegend":false},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(workqueue_depth{job=\"apiserver\", + instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, + name)","legendFormat":"{{instance}} {{name}}"}],"title":"Work Queue Depth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"min":0,"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":30},"id":14,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(workqueue_queue_duration_seconds_bucket{job=\"apiserver\", instance=~\"$instance\", + cluster=\"$cluster\"}[$__rate_interval])) by (instance, name, le))","legendFormat":"{{instance}} + {{name}}"}],"title":"Work Queue Latency","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":0,"y":37},"id":15,"interval":"1m","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"process_resident_memory_bytes{job=\"apiserver\",instance=~\"$instance\", + cluster=\"$cluster\"}","legendFormat":"{{instance}}"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"min":0,"unit":"short"}},"gridPos":{"h":7,"w":8,"x":8,"y":37},"id":16,"interval":"1m","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"rate(process_cpu_seconds_total{job=\"apiserver\",instance=~\"$instance\", + cluster=\"$cluster\"}[$__rate_interval])","legendFormat":"{{instance}}"}],"title":"CPU + usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"unit":"short"}},"gridPos":{"h":7,"w":8,"x":16,"y":37},"id":17,"interval":"1m","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"go_goroutines{job=\"apiserver\",instance=~\"$instance\", + cluster=\"$cluster\"}","legendFormat":"{{instance}}"}],"title":"Goroutines","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(up{job=\"apiserver\"}, + cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"name":"instance","query":"label_values(up{job=\"apiserver\", + cluster=\"$cluster\"}, instance)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / API server","uid":"09ec8aa1e996d6ffcd6817bbaff4db1b"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-apiserver + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-cluster-total.yaml b/monitoring/configmap-kube-prometheus-stack-cluster-total.yaml new file mode 100644 index 0000000..5504d2e --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-cluster-total.yaml @@ -0,0 +1,116 @@ +apiVersion: v1 +data: + cluster-total.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":0},"id":1,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (namespace) (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Current + Rate of Bits Received","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":0},"id":2,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (namespace) (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Current + Rate of Bits Transmitted","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/Bits/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byRegexp","options":"/Packets/"},"properties":[{"id":"unit","value":"pps"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill + down","url":"/d/8b7a8b326d7a6f1f04244066368c67af/kubernetes-networking-namespace-pods?${datasource:queryparam}&var-cluster=${cluster}&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":9,"w":24,"x":0,"y":9},"id":3,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (namespace) (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (namespace) (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"avg + by (namespace) (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"avg + by (namespace) (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (namespace) (\n rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (namespace) (\n rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (namespace) (\n rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (namespace) (\n rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","format":"table","instant":true}],"title":"Current + Status","transformations":[{"id":"joinByField","options":{"byField":"namespace","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"Time + 7":true,"Time 8":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time + 5":4,"Time 6":5,"Time 7":6,"Time 8":7,"Value #A":9,"Value #B":10,"Value #C":11,"Value + #D":12,"Value #E":13,"Value #F":14,"Value #G":15,"Value #H":16,"namespace":8},"renameByName":{"Value + #A":"Rx Bits","Value #B":"Tx Bits","Value #C":"Rx Bits (Avg)","Value #D":"Tx Bits + (Avg)","Value #E":"Rx Packets","Value #F":"Tx Packets","Value #G":"Rx Packets + Dropped","Value #H":"Tx Packets Dropped","namespace":"Namespace"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":18},"id":4,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"avg + by (namespace) (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Average + Rate of Bits Received","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":18},"id":5,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"avg + by (namespace) (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Average + Rate of Bits Transmitted","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":27},"id":6,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (namespace) (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Receive + Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":27},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (namespace) (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Transmit + Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":36},"id":8,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (namespace) (\n rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Rate + of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":36},"id":9,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (namespace) (\n rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Rate + of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":45},"id":10,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (namespace) (\n rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Rate + of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":45},"id":11,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (namespace) (\n rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Rate + of Transmitted Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"percentunit"}},"gridPos":{"h":9,"w":12,"x":0,"y":54},"id":12,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (instance) (\n rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$__rate_interval]) + / rate(node_netstat_Tcp_OutSegs{cluster=\"$cluster\"}[$__rate_interval])\n)\n","legendFormat":"__auto"}],"title":"Rate + of TCP Retransmits out of all sent segments","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"percentunit"}},"gridPos":{"h":9,"w":12,"x":12,"y":54},"id":13,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (instance) (\n rate(node_netstat_TcpExt_TCPSynRetrans{cluster=\"$cluster\"}[$__rate_interval]) + / rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$__rate_interval])\n)\n","legendFormat":"__auto"}],"title":"Rate + of TCP SYN Retransmits out of all retransmits","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Networking / Cluster","uid":"ff635a025bcfea7bc3dd4f508990a3e9"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-cluster-total + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-controller-manager.yaml b/monitoring/configmap-kube-prometheus-stack-controller-manager.yaml new file mode 100644 index 0000000..fdf3f36 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-controller-manager.yaml @@ -0,0 +1,60 @@ +apiVersion: v1 +data: + controller-manager.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":0,"y":0},"id":1,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(up{cluster=\"$cluster\", + job=\"kube-controller-manager\"})","instant":true}],"title":"Up","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":20,"x":4,"y":0},"id":2,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(workqueue_adds_total{cluster=\"$cluster\", + job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) + by (cluster, instance, name)","legendFormat":"{{cluster}} {{instance}} {{name}}"}],"title":"Work + Queue Add Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":3,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(workqueue_depth{cluster=\"$cluster\", + job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) + by (cluster, instance, name)","legendFormat":"{{cluster}} {{instance}} {{name}}"}],"title":"Work + Queue Depth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":4,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", + instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name, le))","legendFormat":"{{cluster}} + {{instance}} {{name}}"}],"title":"Work Queue Latency","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":8,"x":0,"y":21},"id":5,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", + instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))","legendFormat":"2xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", + instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))","legendFormat":"3xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", + instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))","legendFormat":"4xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", + instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))","legendFormat":"5xx"}],"title":"Kube + API Request Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":16,"x":8,"y":21},"id":6,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", + instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, le))","legendFormat":"{{verb}}"}],"title":"Post + Request Latency 99th Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", + instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, le))","legendFormat":"{{verb}}"}],"title":"Get + Request Latency 99th Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":0,"y":35},"id":8,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"process_resident_memory_bytes{cluster=\"$cluster\", + job=\"kube-controller-manager\",instance=~\"$instance\"}","legendFormat":"{{instance}}"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":8,"y":35},"id":9,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"rate(process_cpu_seconds_total{cluster=\"$cluster\", + job=\"kube-controller-manager\",instance=~\"$instance\"}[$__rate_interval])","legendFormat":"{{instance}}"}],"title":"CPU + usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":16,"y":35},"id":10,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"go_goroutines{cluster=\"$cluster\", + job=\"kube-controller-manager\",instance=~\"$instance\"}","legendFormat":"{{instance}}"}],"title":"Goroutines","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kube-controller-manager\"}, + cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"instance","name":"instance","query":"label_values(up{cluster=\"$cluster\", + job=\"kube-controller-manager\"}, instance)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Controller Manager","uid":"72e0e05bef5099e5f049b05fdc429ed4"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-controller-manager + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-etcd.yaml b/monitoring/configmap-kube-prometheus-stack-etcd.yaml new file mode 100644 index 0000000..6ee644a --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-etcd.yaml @@ -0,0 +1,68 @@ +apiVersion: v1 +data: + etcd.json: '{"description":"etcd sample Grafana dashboard with Prometheus","panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"gridPos":{"h":7,"w":6,"x":0,"y":0},"id":1,"interval":"1m","options":{"colorMode":"none","graphMode":"none","reduceOptions":{"calcs":["lastNotNull"]}},"pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(etcd_server_has_leader{job=~\".*etcd.*\", + job=\"$cluster\"})","legendFormat":"{{cluster}} - {{namespace}}\n"}],"title":"Up","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"ops"}},"gridPos":{"h":7,"w":10,"x":6,"y":0},"id":2,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(grpc_server_started_total{job=~\".*etcd.*\", + job=\"$cluster\",grpc_type=\"unary\"}[$__rate_interval]))","legendFormat":"RPC + rate"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(grpc_server_handled_total{job=~\".*etcd.*\", + job=\"$cluster\",grpc_type=\"unary\",grpc_code=~\"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded\"}[$__rate_interval]))","legendFormat":"RPC + failed rate"}],"title":"RPC rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"}}},"gridPos":{"h":7,"w":8,"x":16,"y":0},"id":3,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(grpc_server_started_total{job=~\".*etcd.*\",job=\"$cluster\",grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"}) + - sum(grpc_server_handled_total{job=\"$cluster\",grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"})","legendFormat":"Watch + streams"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(grpc_server_started_total{job=~\".*etcd.*\",job=\"$cluster\",grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"}) + - sum(grpc_server_handled_total{job=\"$cluster\",grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"})","legendFormat":"Lease + streams"}],"title":"Active streams","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":0,"y":25},"id":4,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"etcd_mvcc_db_total_size_in_bytes{job=~\".*etcd.*\", + job=\"$cluster\"}","legendFormat":"{{instance}} DB size"}],"title":"DB size","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"s"}},"gridPos":{"h":7,"w":8,"x":8,"y":25},"id":5,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"histogram_quantile(0.99, + sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~\".*etcd.*\", job=\"$cluster\"}[$__rate_interval])) + by (instance, le))","legendFormat":"{{instance}} WAL fsync"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"histogram_quantile(0.99, + sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~\".*etcd.*\", job=\"$cluster\"}[$__rate_interval])) + by (instance, le))","legendFormat":"{{instance}} DB fsync"}],"title":"Disk sync + duration","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":16,"y":25},"id":6,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"process_resident_memory_bytes{job=~\".*etcd.*\", + job=\"$cluster\"}","legendFormat":"{{instance}} resident memory"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"Bps"}},"gridPos":{"h":7,"w":6,"x":0,"y":50},"id":7,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(etcd_network_client_grpc_received_bytes_total{job=~\".*etcd.*\", + job=\"$cluster\"}[$__rate_interval])","legendFormat":"{{instance}} client traffic + in"}],"title":"Client traffic in","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"Bps"}},"gridPos":{"h":7,"w":6,"x":6,"y":50},"id":8,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(etcd_network_client_grpc_sent_bytes_total{job=~\".*etcd.*\", + job=\"$cluster\"}[$__rate_interval])","legendFormat":"{{instance}} client traffic + out"}],"title":"Client traffic out","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"Bps"}},"gridPos":{"h":7,"w":6,"x":12,"y":50},"id":9,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(etcd_network_peer_received_bytes_total{job=~\".*etcd.*\", + job=\"$cluster\"}[$__rate_interval])) by (instance)","legendFormat":"{{instance}} + peer traffic in"}],"title":"Peer traffic in","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"Bps"}},"gridPos":{"h":7,"w":6,"x":18,"y":50},"id":10,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(etcd_network_peer_sent_bytes_total{job=~\".*etcd.*\", + job=\"$cluster\"}[$__rate_interval])) by (instance)","legendFormat":"{{instance}} + peer traffic out"}],"title":"Peer traffic out","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"}}},"gridPos":{"h":7,"w":8,"x":0,"y":75},"id":11,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"changes(etcd_server_leader_changes_seen_total{job=~\".*etcd.*\", + job=\"$cluster\"}[1d])","legendFormat":"{{instance}} total leader elections per + day"}],"title":"Raft proposals","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"}}},"gridPos":{"h":7,"w":8,"x":8,"y":75},"id":12,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"changes(etcd_server_leader_changes_seen_total{job=~\".*etcd.*\", + job=\"$cluster\"}[1d])","legendFormat":"{{instance}} total leader elections per + day"}],"title":"Total leader elections per day","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"s"}},"gridPos":{"h":7,"w":8,"x":16,"y":75},"id":13,"interval":"1m","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"histogram_quantile(0.99, + sum by (instance, le) (rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~\".*etcd.*\", + job=\"$cluster\"}[$__rate_interval])))","legendFormat":"{{instance}} peer round + trip time"}],"title":"Peer round trip time","type":"timeseries"}],"refresh":"10s","schemaVersion":36,"tags":["etcd-mixin"],"templating":{"list":[{"label":"Data + Source","name":"datasource","query":"prometheus","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"label":"cluster","name":"cluster","query":"label_values(etcd_server_has_leader{job=~\".*etcd.*\"}, + job)","refresh":2,"type":"query","allValue":".*","hide":2}]},"time":{"from":"now-15m","to":"now"},"timezone": + "utc","title":"etcd","uid":"c2f4e12cdf69feb95caa41a5a1b423d9"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-etcd + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-grafana-config-dashboards.yaml b/monitoring/configmap-kube-prometheus-stack-grafana-config-dashboards.yaml new file mode 100644 index 0000000..9d5b9e4 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-grafana-config-dashboards.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +data: + provider.yaml: "apiVersion: 1\nproviders:\n - name: 'sidecarProvider'\n orgId:\ + \ 1\n folder: ''\n folderUid: ''\n type: file\n disableDeletion: false\n\ + \ allowUiUpdates: false\n updateIntervalSeconds: 30\n options:\n \ + \ foldersFromFilesStructure: false\n path: /tmp/dashboards" +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana + app.kubernetes.io/version: 12.4.2 + helm.sh/chart: grafana-11.5.0 + name: kube-prometheus-stack-grafana-config-dashboards + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-grafana-datasource.yaml b/monitoring/configmap-kube-prometheus-stack-grafana-datasource.yaml new file mode 100644 index 0000000..310eed1 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-grafana-datasource.yaml @@ -0,0 +1,26 @@ +apiVersion: v1 +data: + datasource.yaml: "apiVersion: 1\ndatasources:\n- name: \"Prometheus\"\n type: prometheus\n\ + \ uid: prometheus\n url: http://kube-prometheus-stack-prometheus.monitoring:9090/\n\ + \ access: proxy\n isDefault: true\n jsonData:\n httpMethod: POST\n timeInterval:\ + \ 30s\n- name: \"Alertmanager\"\n type: alertmanager\n uid: alertmanager\n \ + \ url: http://kube-prometheus-stack-alertmanager.monitoring:9093/\n access: proxy\n\ + \ jsonData:\n handleGrafanaManagedAlerts: false\n implementation: prometheus" +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_datasource: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-grafana-datasource + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-grafana-overview.yaml b/monitoring/configmap-kube-prometheus-stack-grafana-overview.yaml new file mode 100644 index 0000000..4c27cb5 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-grafana-overview.yaml @@ -0,0 +1,43 @@ +apiVersion: v1 +data: + grafana-overview.json: '{"annotations":{"list":[{"builtIn":1,"datasource":{"type":"datasource","uid":"grafana"},"enable":true,"hide":true,"iconColor":"rgba(0, + 211, 255, 1)","name":"Annotations & Alerts","target":{"limit":100,"matchAny":false,"tags":[],"type":"dashboard"},"type":"dashboard"}]},"editable":true,"fiscalYearStartMonth":0,"graphTooltip":0,"id":23,"links":[],"panels":[{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"mappings":[],"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":6,"x":0,"y":0},"id":6,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"showPercentChange":false,"text":{},"textMode":"auto","wideLayout":true},"pluginVersion":"12.0.2","targets":[{"datasource":{"uid":"$datasource"},"expr":"grafana_alerting_result_total{job=~\"$job\", + instance=~\"$instance\", state=\"alerting\"}","instant":true,"interval":"1m","legendFormat":"","refId":"A"}],"title":"Firing + Alerts","type":"stat"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":6,"x":6,"y":0},"id":8,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"showPercentChange":false,"text":{},"textMode":"auto","wideLayout":true},"pluginVersion":"12.0.2","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum(grafana_stat_totals_dashboard{job=~\"$job\", + instance=~\"$instance\"})","interval":"1m","legendFormat":"","refId":"A"}],"title":"Dashboards","type":"stat"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"cellOptions":{"type":"auto"},"inspect":false},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":12,"x":12,"y":0},"id":10,"options":{"cellHeight":"sm","footer":{"countRows":false,"fields":"","reducer":["sum"],"show":false},"showHeader":true},"pluginVersion":"12.0.2","targets":[{"datasource":{"uid":"$datasource"},"expr":"grafana_build_info{job=~\"$job\", + instance=~\"$instance\"}","instant":true,"interval":"1m","legendFormat":"","refId":"A"}],"title":"Build + Info","transformations":[{"id":"labelsToFields","options":{}},{"id":"merge","options":{}},{"id":"organize","options":{"excludeByName":{"Time":true,"Value":true,"branch":true,"container":true,"goversion":true,"namespace":true,"pod":true,"revision":true},"indexByName":{"Time":7,"Value":11,"branch":4,"container":8,"edition":2,"goversion":6,"instance":1,"job":0,"namespace":9,"pod":10,"revision":5,"version":3},"renameByName":{}}}],"type":"table"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"normal"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"reqps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":5},"id":2,"options":{"alertThreshold":true,"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"multi","sort":"none"}},"pluginVersion":"12.0.2","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum + by (status_code) (irate(grafana_http_request_duration_seconds_count{job=~\"$job\", + instance=~\"$instance\"}[1m])) ","interval":"1m","legendFormat":"{{status_code}}","refId":"A"}],"title":"RPS","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"ms"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":5},"id":4,"options":{"alertThreshold":true,"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"multi","sort":"none"}},"pluginVersion":"12.0.2","targets":[{"datasource":{"uid":"$datasource"},"exemplar":true,"expr":"histogram_quantile(0.99, + sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", + job=~\"$job\"}[$__rate_interval])) by (le)) * 1","interval":"1m","legendFormat":"99th + Percentile","refId":"A"},{"datasource":{"uid":"$datasource"},"exemplar":true,"expr":"histogram_quantile(0.50, + sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", + job=~\"$job\"}[$__rate_interval])) by (le)) * 1","interval":"1m","legendFormat":"50th + Percentile","refId":"B"},{"datasource":{"uid":"$datasource"},"exemplar":true,"expr":"sum(irate(grafana_http_request_duration_seconds_sum{instance=~\"$instance\", + job=~\"$job\"}[$__rate_interval])) * 1 / sum(irate(grafana_http_request_duration_seconds_count{instance=~\"$instance\", + job=~\"$job\"}[$__rate_interval]))","interval":"1m","legendFormat":"Average","refId":"C"}],"title":"Request + Latency","type":"timeseries"}],"preload":false,"refresh":"","schemaVersion":41,"tags":[],"templating":{"list":[{"current":{"text":"Prometheus","value":"prometheus"},"includeAll":false,"name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":".*","current":{"text":"All","value":["$__all"]},"datasource":"$datasource","definition":"label_values(grafana_build_info, + job)","includeAll":true,"multi":true,"name":"job","options":[],"query":{"query":"label_values(grafana_build_info, + job)","refId":"Billing Admin-job-Variable-Query"},"refresh":1,"regex":"","type":"query"},{"allValue":".*","current":{"text":"All","value":"$__all"},"datasource":"$datasource","definition":"label_values(grafana_build_info, + instance)","includeAll":true,"multi":true,"name":"instance","options":[],"query":{"query":"label_values(grafana_build_info, + instance)","refId":"Billing Admin-instance-Variable-Query"},"refresh":1,"regex":"","type":"query"}]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["10s","30s","1m","5m","15m","30m","1h","2h","1d"]},"timezone": + "utc","title":"Grafana Overview","uid":"6be0s85Mk","version":1}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-grafana-overview + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-grafana.yaml b/monitoring/configmap-kube-prometheus-stack-grafana.yaml new file mode 100644 index 0000000..64920f6 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-grafana.yaml @@ -0,0 +1,55 @@ +apiVersion: v1 +data: + grafana.ini: '[analytics] + + check_for_updates = true + + [log] + + mode = console + + [paths] + + data = /var/lib/grafana/ + + logs = /var/log/grafana + + plugins = /var/lib/grafana/plugins + + provisioning = /etc/grafana/provisioning + + [server] + + domain = grafana.chemavx.xyz + + root_url = https://grafana.chemavx.xyz + + [unified_storage] + + index_path = /var/lib/grafana-search/bleve + + [auth] + + disable_login_form = true + + [auth.proxy] + + enabled = true + + header_name = X-authentik-username + + auto_sign_up = true' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana + app.kubernetes.io/version: 12.4.2 + helm.sh/chart: grafana-11.5.0 + name: kube-prometheus-stack-grafana + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-k8s-coredns.yaml b/monitoring/configmap-kube-prometheus-stack-k8s-coredns.yaml new file mode 100644 index 0000000..51b98fc --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-k8s-coredns.yaml @@ -0,0 +1,109 @@ +apiVersion: v1 +data: + k8s-coredns.json: '{"annotations":{"list":[{"builtIn":1,"datasource":{"type":"datasource","uid":"grafana"},"enable":true,"hide":true,"iconColor":"rgba(0, + 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"A + dashboard for the CoreDNS DNS server with updated metrics for version 1.7.0+. Based + on the CoreDNS dashboard by buhay.","editable":true,"fiscalYearStartMonth":0,"gnetId":12539,"graphTooltip":0,"id":7,"links":[{"icon":"external + link","tags":[],"targetBlank":true,"title":"CoreDNS.io","type":"link","url":"https://coredns.io"}],"liveNow":false,"panels":[{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"normal"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"pps","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":8,"x":0,"y":0},"id":2,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum(rate(coredns_dns_request_count_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + by (proto) or\nsum(rate(coredns_dns_requests_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + by (proto)","format":"time_series","interval":"1m","intervalFactor":2,"legendFormat":"{{ + proto }}","refId":"A","step":60}],"title":"Requests (total)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"normal"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"pps","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":8,"x":8,"y":0},"id":4,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum(rate(coredns_dns_request_type_count_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + by (type) or \nsum(rate(coredns_dns_requests_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + by (type)","interval":"1m","intervalFactor":2,"legendFormat":"{{ type }}","refId":"A","step":60}],"title":"Requests + (by qtype)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"normal"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"pps","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":8,"x":16,"y":0},"id":6,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum(rate(coredns_dns_request_count_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + by (zone) or\nsum(rate(coredns_dns_requests_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + by (zone)","interval":"1m","intervalFactor":2,"legendFormat":"{{ zone }}","refId":"A","step":60}],"title":"Requests + (by zone)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"pps","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":12,"x":0,"y":7},"id":8,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum(rate(coredns_dns_request_do_count_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + or\nsum(rate(coredns_dns_do_requests_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m]))","interval":"1m","intervalFactor":2,"legendFormat":"DO","refId":"A","step":40},{"datasource":{"uid":"$datasource"},"expr":"sum(rate(coredns_dns_request_count_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + or\nsum(rate(coredns_dns_requests_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m]))","interval":"1m","intervalFactor":2,"legendFormat":"total","refId":"B","step":40}],"title":"Requests + (DO bit)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes","unitScale":true},"overrides":[{"matcher":{"id":"byName","options":"tcp:90"},"properties":[{"id":"unit","value":"short"}]},{"matcher":{"id":"byName","options":"tcp:99 + "},"properties":[{"id":"unit","value":"short"}]},{"matcher":{"id":"byName","options":"tcp:50"},"properties":[{"id":"unit","value":"short"}]}]},"gridPos":{"h":7,"w":6,"x":12,"y":7},"id":10,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.99, + (sum(rate(coredns_dns_request_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) + by (proto)) or (sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) + by (le,proto)))","interval":"1m","intervalFactor":2,"legendFormat":"{{ proto }}:99 + ","refId":"A","step":60},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.90, + (sum(rate(coredns_dns_request_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) + by (proto)) or (sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) + by (le,proto)))","intervalFactor":2,"legendFormat":"{{ proto }}:90","refId":"B","step":60},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.50, + (sum(rate(coredns_dns_request_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) + by (proto)) or (sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) + by (le,proto)))","intervalFactor":2,"legendFormat":"{{ proto }}:50","refId":"C","step":60}],"title":"Requests + (size, udp)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":6,"x":18,"y":7},"id":12,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.99, + (sum(rate(coredns_dns_request_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) + by (proto)) or (sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) + by (le,proto)))","format":"time_series","interval":"1m","intervalFactor":2,"legendFormat":"{{ + proto }}:99 ","refId":"A","step":60},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.90, + (sum(rate(coredns_dns_request_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) + by (proto)) or (sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) + by (le,proto)))","format":"time_series","interval":"1m","intervalFactor":2,"legendFormat":"{{ + proto }}:90","refId":"B","step":60},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.50, + (sum(rate(coredns_dns_request_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) + by (proto)) or (sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) + by (le,proto)))","format":"time_series","interval":"1m","intervalFactor":2,"legendFormat":"{{ + proto }}:50","refId":"C","step":60}],"title":"Requests (size,tcp)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"normal"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"pps","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":12,"x":0,"y":14},"id":14,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum(rate(coredns_dns_response_rcode_count_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + by (rcode) or\nsum(rate(coredns_dns_responses_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + by (rcode)","interval":"1m","intervalFactor":2,"legendFormat":"{{ rcode }}","refId":"A","step":40}],"title":"Responses + (by rcode)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"s","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":12,"x":12,"y":14},"id":32,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.99, + (sum(rate(coredns_dns_request_duration_seconds{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + by (job)) or (sum(rate(coredns_dns_request_duration_seconds_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + by (le, job)))","format":"time_series","intervalFactor":2,"legendFormat":"99%","refId":"A","step":40},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.90, + (sum(rate(coredns_dns_request_duration_seconds{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + by ()) or (sum(rate(coredns_dns_request_duration_seconds_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + by (le)))","format":"time_series","intervalFactor":2,"legendFormat":"90%","refId":"B","step":40},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.50, + (sum(rate(coredns_dns_request_duration_seconds{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + by ()) or (sum(rate(coredns_dns_request_duration_seconds_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + by (le)))","format":"time_series","intervalFactor":2,"legendFormat":"50%","refId":"C","step":40}],"title":"Responses + (duration)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes","unitScale":true},"overrides":[{"matcher":{"id":"byName","options":"tcp:50%"},"properties":[{"id":"unit","value":"short"}]},{"matcher":{"id":"byName","options":"tcp:90%"},"properties":[{"id":"unit","value":"short"}]},{"matcher":{"id":"byName","options":"tcp:99%"},"properties":[{"id":"unit","value":"short"}]}]},"gridPos":{"h":7,"w":12,"x":0,"y":21},"id":18,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.99, + (sum(rate(coredns_dns_response_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) + by (proto)) or (sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) + by (le,proto))) ","interval":"1m","intervalFactor":2,"legendFormat":"{{ proto + }}:99%","refId":"A","step":40},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.90, + (sum(rate(coredns_dns_response_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) + by (proto)) or (sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) + by (le,proto))) ","interval":"1m","intervalFactor":2,"legendFormat":"{{ proto + }}:90%","refId":"B","step":40},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.50, + (sum(rate(coredns_dns_response_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) + by (proto)) or (sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) + by (le,proto))) ","hide":false,"intervalFactor":2,"legendFormat":"{{ proto }}:50%","metric":"","refId":"C","step":40}],"title":"Responses + (size, udp)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":12,"x":12,"y":21},"id":20,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.99, + (sum(rate(coredns_dns_response_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) + by (proto)) or (sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) + by (le,proto))) ","format":"time_series","intervalFactor":2,"legendFormat":"{{ + proto }}:99%","refId":"A","step":40},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.90, + (sum(rate(coredns_dns_response_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) + by (proto)) or (sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) + by (le,proto))) ","format":"time_series","intervalFactor":2,"legendFormat":"{{ + proto }}:90%","refId":"B","step":40},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.50, + (sum(rate(coredns_dns_response_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) + by (proto)) or (sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) + by (le,proto))) ","format":"time_series","intervalFactor":2,"legendFormat":"{{ + proto }}:50%","metric":"","refId":"C","step":40}],"title":"Responses (size, tcp)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"normal"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"decbytes","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":12,"x":0,"y":28},"id":22,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum(coredns_cache_size{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}) + by (type) or\nsum(coredns_cache_entries{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}) + by (type)","interval":"1m","intervalFactor":2,"legendFormat":"{{ type }}","refId":"A","step":40}],"title":"Cache + (size)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"normal"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"pps","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":12,"x":12,"y":28},"id":24,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum(rate(coredns_cache_hits_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + by (type)","hide":false,"intervalFactor":2,"legendFormat":"hits:{{ type }}","refId":"A","step":40},{"datasource":{"uid":"$datasource"},"expr":"sum(rate(coredns_cache_misses_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) + by (type)","hide":false,"intervalFactor":2,"legendFormat":"misses","refId":"B","step":40}],"title":"Cache + (hitrate)","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["dns","coredns"],"templating":{"list":[{"current":{},"hide":0,"includeAll":false,"multi":false,"name":"datasource","options":[],"query":"prometheus","queryValue":"","refresh":1,"regex":"","skipUrlSync":false,"type":"datasource"},{"allValue":".*","current":{"selected":false,"text":"All","value":"$__all"},"datasource":{"type":"prometheus","uid":"$datasource"},"definition":"label_values(coredns_dns_requests_total, + cluster)","hide":2,"includeAll":true,"label":"Cluster","multi":false,"name":"cluster","options":[],"query":"label_values(coredns_dns_requests_total, + cluster)","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tagsQuery":"","type":"query","useTags":false},{"allValue":".*","current":{"selected":false,"text":"All","value":"$__all"},"datasource":{"type":"prometheus","uid":"${datasource}"},"definition":"label_values(coredns_dns_requests_total{cluster=~\"$cluster\"},job)","hide":0,"includeAll":true,"label":"Job","multi":false,"name":"job","options":[],"query":{"qryType":1,"query":"label_values(coredns_dns_requests_total{cluster=~\"$cluster\"},job)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":2,"regex":"","skipUrlSync":false,"sort":1,"type":"query"},{"allValue":".*","current":{"selected":false,"text":"All","value":"$__all"},"datasource":{"type":"prometheus","uid":"$datasource"},"definition":"label_values(coredns_dns_requests_total{job=~\"$job\",cluster=~\"$cluster\"}, + instance)","hide":0,"includeAll":true,"label":"Instance","multi":false,"name":"instance","options":[],"query":"label_values(coredns_dns_requests_total{job=~\"$job\",cluster=~\"$cluster\"}, + instance)","refresh":2,"regex":"","skipUrlSync":false,"sort":3,"tagValuesQuery":"","tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-3h","to":"now"},"timepicker":{"refresh_intervals":["10s","30s","1m","5m","15m","30m","1h","2h","1d"]},"timezone": + "utc","title":"CoreDNS","uid":"vkQ0UHxik","version":3,"weekStart":""}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-k8s-coredns + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-k8s-resources-cluster.yaml b/monitoring/configmap-kube-prometheus-stack-k8s-resources-cluster.yaml new file mode 100644 index 0000000..bbac670 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-k8s-resources-cluster.yaml @@ -0,0 +1,186 @@ +apiVersion: v1 +data: + k8s-resources-cluster.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":0,"y":0},"id":1,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"cluster:node_cpu:ratio_rate5m{cluster=\"$cluster\"}","instant":true}],"title":"CPU + Utilisation","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":4,"y":0},"id":2,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) + / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})","instant":true}],"title":"CPU + Requests Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":8,"y":0},"id":3,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) + / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})","instant":true}],"title":"CPU + Limits Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":12,"y":0},"id":4,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"1 + - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\",cluster=\"$cluster\"})","instant":true}],"title":"Memory + Utilisation","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":16,"y":0},"id":5,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) + / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})","instant":true}],"title":"Memory + Requests Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":20,"y":0},"id":6,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) + / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})","instant":true}],"title":"Memory + Limits Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}}},"gridPos":{"h":6,"w":24,"x":0,"y":6},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\"})) + by (namespace)","legendFormat":"__auto"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill + down to pods","url":"/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":12},"id":8,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_owner{job=\"kube-state-metrics\", + cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) + by (workload, namespace)) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\"})) + by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) + by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\"})) + by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) + by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) + by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\"})) + by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) + by (namespace)","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"namespace","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"Time + 7":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time + 6":5,"Time 7":6,"Value #A":8,"Value #B":9,"Value #C":10,"Value #D":11,"Value #E":12,"Value + #F":13,"Value #G":14,"namespace":7},"renameByName":{"Value #A":"Pods","Value #B":"Workloads","Value + #C":"CPU Usage","Value #D":"CPU Requests","Value #E":"CPU Requests %","Value #F":"CPU + Limits","Value #G":"CPU Limits %","namespace":"Namespace"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":6,"w":24,"x":0,"y":18},"id":9,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_rss{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"})) by + (namespace)","legendFormat":"__auto"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Memory + Usage"},"properties":[{"id":"unit","value":"bytes"}]},{"matcher":{"id":"byName","options":"Memory + Requests"},"properties":[{"id":"unit","value":"bytes"}]},{"matcher":{"id":"byName","options":"Memory + Limits"},"properties":[{"id":"unit","value":"bytes"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill + down to pods","url":"/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":24},"id":10,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_owner{job=\"kube-state-metrics\", + cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) + by (workload, namespace)) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_rss{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"})) by + (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) + by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_rss{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"})) by + (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) + by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) + by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_rss{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"})) by + (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) + by (namespace)","format":"table","instant":true}],"title":"Memory Requests by + Namespace","transformations":[{"id":"joinByField","options":{"byField":"namespace","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"Time + 7":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time + 6":5,"Time 7":6,"Value #A":8,"Value #B":9,"Value #C":10,"Value #D":11,"Value #E":12,"Value + #F":13,"Value #G":14,"namespace":7},"renameByName":{"Value #A":"Pods","Value #B":"Workloads","Value + #C":"Memory Usage","Value #D":"Memory Requests","Value #E":"Memory Requests %","Value + #F":"Memory Limits","Value #G":"Memory Limits %","namespace":"Namespace"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/Bandwidth/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byRegexp","options":"/Packets/"},"properties":[{"id":"unit","value":"pps"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill + down to pods","url":"/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":30},"id":11,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 + * rate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval]))) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 + * rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval]))) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) + by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) + by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) + by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) + by (namespace)","format":"table","instant":true}],"title":"Current Network Usage","transformations":[{"id":"joinByField","options":{"byField":"namespace","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true},"indexByName":{"Time + 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":7,"Value + #B":8,"Value #C":9,"Value #D":10,"Value #E":11,"Value #F":12,"namespace":6},"renameByName":{"Value + #A":"Current Receive Bandwidth","Value #B":"Current Transmit Bandwidth","Value + #C":"Rate of Received Packets","Value #D":"Rate of Transmitted Packets","Value + #E":"Rate of Received Packets Dropped","Value #F":"Rate of Transmitted Packets + Dropped","namespace":"Namespace"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":6,"w":24,"x":0,"y":36},"id":12,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 + * rate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval]))) by (namespace)","legendFormat":"__auto"}],"title":"Receive + Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":6,"w":24,"x":0,"y":42},"id":13,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 + * rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval]))) by (namespace)","legendFormat":"__auto"}],"title":"Transmit + Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":6,"w":24,"x":0,"y":48},"id":14,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"avg((8 + * rate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval]))) by (namespace)","legendFormat":"__auto"}],"title":"Average + Container Bandwidth by Namespace: Received","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":6,"w":24,"x":0,"y":54},"id":15,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"avg((8 + * rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval]))) by (namespace)","legendFormat":"__auto"}],"title":"Average + Container Bandwidth by Namespace: Transmitted","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":6,"w":24,"x":0,"y":60},"id":16,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) + by (namespace)","legendFormat":"__auto"}],"title":"Rate of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":6,"w":24,"x":0,"y":66},"id":17,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) + by (namespace)","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":6,"w":24,"x":0,"y":72},"id":18,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) + by (namespace)","legendFormat":"__auto"}],"title":"Rate of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":6,"w":24,"x":0,"y":78},"id":19,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) + by (namespace)","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets + Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"iops"}},"gridPos":{"h":6,"w":24,"x":0,"y":84},"id":20,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"ceil(sum + by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))","legendFormat":"__auto"}],"title":"IOPS(Reads+Writes)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"Bps"}},"gridPos":{"h":6,"w":24,"x":0,"y":90},"id":21,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","legendFormat":"__auto"}],"title":"ThroughPut(Read+Write)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/IOPS/"},"properties":[{"id":"unit","value":"iops"}]},{"matcher":{"id":"byRegexp","options":"/Throughput/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill + down to pods","url":"/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":96},"id":22,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(namespace) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(namespace) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(namespace) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(namespace) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true}],"title":"Current + Storage IO","transformations":[{"id":"joinByField","options":{"byField":"namespace","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true},"indexByName":{"Time + 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":7,"Value + #B":8,"Value #C":9,"Value #D":10,"Value #E":11,"Value #F":12,"namespace":6},"renameByName":{"Value + #A":"IOPS(Reads)","Value #B":"IOPS(Writes)","Value #C":"IOPS(Reads + Writes)","Value + #D":"Throughput(Read)","Value #E":"Throughput(Write)","Value #F":"Throughput(Read + + Write)","namespace":"Namespace"}}}],"type":"table"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kube-state-metrics\"}, + cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Compute Resources / Cluster","uid":"efa86fd1d0c121a26444b636a3f509a8"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-k8s-resources-cluster + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-k8s-resources-multicluster.yaml b/monitoring/configmap-kube-prometheus-stack-k8s-resources-multicluster.yaml new file mode 100644 index 0000000..f71434f --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-k8s-resources-multicluster.yaml @@ -0,0 +1,83 @@ +apiVersion: v1 +data: + k8s-resources-multicluster.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":3,"w":4,"x":0,"y":0},"id":1,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(cluster:node_cpu:ratio_rate5m) + / count(cluster:node_cpu:ratio_rate5m)","instant":true}],"title":"CPU Utilisation","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":4,"y":0},"id":2,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", + resource=\"cpu\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", + resource=\"cpu\"})","instant":true}],"title":"CPU Requests Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":8,"y":0},"id":3,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", + resource=\"cpu\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", + resource=\"cpu\"})","instant":true}],"title":"CPU Limits Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":12,"y":0},"id":4,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"1 + - sum(:node_memory_MemAvailable_bytes:sum) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\"})","instant":true}],"title":"Memory + Utilisation","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":16,"y":0},"id":5,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", + resource=\"memory\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", + resource=\"memory\"})","instant":true}],"title":"Memory Requests Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":20,"y":0},"id":6,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", + resource=\"memory\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", + resource=\"memory\"})","instant":true}],"title":"Memory Limits Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"}}},"gridPos":{"h":7,"w":24,"x":0,"y":1},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m)) + by (cluster)","legendFormat":"__auto"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Cluster"},"properties":[{"id":"links","value":[{"title":"Drill + down","url":"/d/efa86fd1d0c121a26444b636a3f509a8/kubernetes-compute-resources-cluster?${datasource:queryparam}&var-cluster=${__data.fields.Cluster}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":2},"id":8,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m)) + by (cluster)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", + resource=\"cpu\"}) by (cluster)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m)) + by (cluster) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", + resource=\"cpu\"}) by (cluster)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", + resource=\"cpu\"}) by (cluster)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m)) + by (cluster) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", + resource=\"cpu\"}) by (cluster)","format":"table","instant":true}],"title":"CPU + Quota","transformations":[{"id":"joinByField","options":{"byField":"cluster","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time + 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value + #C":8,"Value #D":9,"Value #E":10,"cluster":5},"renameByName":{"Value #A":"CPU + Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value + #E":"CPU Limits %","cluster":"Cluster"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bytes"}},"gridPos":{"h":7,"w":24,"x":0,"y":3},"id":9,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_rss{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", container!=\"\"})) by (cluster)","legendFormat":"__auto"}],"title":"Memory + Usage (w/o cache)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Cluster"},"properties":[{"id":"links","value":[{"title":"Drill + down","url":"/d/efa86fd1d0c121a26444b636a3f509a8/kubernetes-compute-resources-cluster?${datasource:queryparam}&var-cluster=${__data.fields.Cluster}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":4},"id":10,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_rss{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", container!=\"\"})) by (cluster)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", + resource=\"memory\"}) by (cluster)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_rss{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", container!=\"\"})) by (cluster) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", + resource=\"memory\"}) by (cluster)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", + resource=\"memory\"}) by (cluster)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_rss{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", container!=\"\"})) by (cluster) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", + resource=\"memory\"}) by (cluster)","format":"table","instant":true}],"title":"Memory + Requests by Cluster","transformations":[{"id":"joinByField","options":{"byField":"cluster","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time + 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value + #C":8,"Value #D":9,"Value #E":10,"cluster":5},"renameByName":{"Value #A":"Memory + Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value #D":"Memory + Limits","Value #E":"Memory Limits %","cluster":"Cluster"}}}],"type":"table"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Compute Resources / Multi-Cluster","uid":"b59e6c9f2fcbe2e16d77fc492374cc4f"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-k8s-resources-multicluster + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-k8s-resources-namespace.yaml b/monitoring/configmap-kube-prometheus-stack-k8s-resources-namespace.yaml new file mode 100644 index 0000000..ecb5002 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-k8s-resources-namespace.yaml @@ -0,0 +1,197 @@ +apiVersion: v1 +data: + k8s-resources-namespace.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":6,"x":0,"y":0},"id":1,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + namespace=\"$namespace\"})) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})","instant":true}],"title":"CPU + Utilisation (from requests)","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":6,"x":6,"y":0},"id":2,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + namespace=\"$namespace\"})) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})","instant":true}],"title":"CPU + Utilisation (from limits)","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":6,"x":12,"y":0},"id":3,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", + image!=\"\"})) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})","instant":true}],"title":"Memory + Utilisation (from requests)","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":6,"x":18,"y":0},"id":4,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", + image!=\"\"})) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})","instant":true}],"title":"Memory + Utilisation (from limits)","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}},"overrides":[{"matcher":{"id":"byFrameRefID","options":"B"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"orange","mode":"fixed"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":5,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + namespace=\"$namespace\"})) by (pod)","legendFormat":"__auto"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\", + namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"}))","legendFormat":"quota + - requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\", + namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"}))","legendFormat":"quota + - limits"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill + down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":6,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + namespace=\"$namespace\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", + namespace=\"$namespace\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + namespace=\"$namespace\"})) by (pod) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", + namespace=\"$namespace\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", + namespace=\"$namespace\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + namespace=\"$namespace\"})) by (pod) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", + namespace=\"$namespace\"})) by (pod)","format":"table","instant":true}],"title":"CPU + Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time + 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value + #C":8,"Value #D":9,"Value #E":10,"pod":5},"renameByName":{"Value #A":"CPU Usage","Value + #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value + #E":"CPU Limits %","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"},"overrides":[{"matcher":{"id":"byFrameRefID","options":"B"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"orange","mode":"fixed"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + container!=\"\", image!=\"\"})) by (pod)","legendFormat":"__auto"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\", + namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"}))","legendFormat":"quota + - requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\", + namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"}))","legendFormat":"quota + - limits"}],"title":"Memory Usage (w/o cache)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill + down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":8,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + container!=\"\", image!=\"\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", + namespace=\"$namespace\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", + image!=\"\"})) by (pod) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", + namespace=\"$namespace\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", + namespace=\"$namespace\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", + image!=\"\"})) by (pod) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", + namespace=\"$namespace\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_rss{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"})) + by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_cache{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"})) + by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_swap{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"})) + by (pod)","format":"table","instant":true}],"title":"Memory Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"Time + 7":true,"Time 8":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time + 5":4,"Time 6":5,"Time 7":6,"Time 8":7,"Value #A":9,"Value #B":10,"Value #C":11,"Value + #D":12,"Value #E":13,"Value #F":14,"Value #G":15,"Value #H":16,"pod":8},"renameByName":{"Value + #A":"Memory Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value + #D":"Memory Limits","Value #E":"Memory Limits %","Value #F":"Memory Usage (RSS)","Value + #G":"Memory Usage (Cache)","Value #H":"Memory Usage (Swap)","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/Bandwidth/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byRegexp","options":"/Packets/"},"properties":[{"id":"unit","value":"pps"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill + down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":35},"id":9,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 + * rate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 + * rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) + by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) + by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) + by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) + by (pod)","format":"table","instant":true}],"title":"Current Network Usage","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true},"indexByName":{"Time + 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":7,"Value + #B":8,"Value #C":9,"Value #D":10,"Value #E":11,"Value #F":12,"pod":6},"renameByName":{"Value + #A":"Current Receive Bandwidth","Value #B":"Current Transmit Bandwidth","Value + #C":"Rate of Received Packets","Value #D":"Rate of Transmitted Packets","Value + #E":"Rate of Received Packets Dropped","Value #F":"Rate of Transmitted Packets + Dropped","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":42},"id":10,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 + * rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))) + by (pod)","legendFormat":"__auto"}],"title":"Receive Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":42},"id":11,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 + * rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))) + by (pod)","legendFormat":"__auto"}],"title":"Transmit Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":0,"y":49},"id":12,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) + by (pod)","legendFormat":"__auto"}],"title":"Rate of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":12,"y":49},"id":13,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) + by (pod)","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":0,"y":56},"id":14,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) + by (pod)","legendFormat":"__auto"}],"title":"Rate of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":12,"y":56},"id":15,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) + by (pod)","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"iops"}},"gridPos":{"h":7,"w":12,"x":0,"y":63},"id":16,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"ceil(sum + by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))","legendFormat":"__auto"}],"title":"IOPS(Reads+Writes)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"Bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":63},"id":17,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","legendFormat":"__auto"}],"title":"ThroughPut(Read+Write)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/IOPS/"},"properties":[{"id":"unit","value":"iops"}]},{"matcher":{"id":"byRegexp","options":"/Throughput/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill + down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":70},"id":18,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true}],"title":"Current + Storage IO","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true},"indexByName":{"Time + 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":7,"Value + #B":8,"Value #C":9,"Value #D":10,"Value #E":11,"Value #F":12,"pod":6},"renameByName":{"Value + #A":"IOPS(Reads)","Value #B":"IOPS(Writes)","Value #C":"IOPS(Reads + Writes)","Value + #D":"Throughput(Read)","Value #E":"Throughput(Write)","Value #F":"Throughput(Read + + Write)","pod":"Pod"}}}],"type":"table"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kube-state-metrics\"}, + cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"namespace","name":"namespace","query":"label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", + cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Compute Resources / Namespace (Pods)","uid":"85a562078cdf77779eaa1add43ccec1e"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-k8s-resources-namespace + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-k8s-resources-node.yaml b/monitoring/configmap-kube-prometheus-stack-k8s-resources-node.yaml new file mode 100644 index 0000000..6eaf716 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-k8s-resources-node.yaml @@ -0,0 +1,99 @@ +apiVersion: v1 +data: + k8s-resources-node.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true,"stacking":{"mode":"normal"}}},"overrides":[{"matcher":{"id":"byName","options":"max + capacity"},"properties":[{"id":"color","value":{"fixedColor":"red","mode":"fixed"}},{"id":"custom.stacking","value":{"mode":"none"}},{"id":"custom.hideFrom","value":{"legend":false,"tooltip":true,"viz":false}},{"id":"custom.lineStyle","value":{"dash":[10,10],"fill":"dash"}}]},{"matcher":{"id":"byName","options":"max + allocatable"},"properties":[{"id":"color","value":{"fixedColor":"super-light-red","mode":"fixed"}},{"id":"custom.stacking","value":{"mode":"none"}},{"id":"custom.hideFrom","value":{"legend":false,"tooltip":true,"viz":false}},{"id":"custom.lineStyle","value":{"dash":[10,10],"fill":"dash"}},{"id":"custom.fillOpacity","value":0}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":0},"id":1,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_node_status_capacity{cluster=\"$cluster\", + job=\"kube-state-metrics\", node=~\"$node\", resource=\"cpu\"})","legendFormat":"max + capacity"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_node_status_allocatable{cluster=\"$cluster\", + job=\"kube-state-metrics\", node=~\"$node\", resource=\"cpu\"})","legendFormat":"max + allocatable"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + node=~\"$node\"})) by (pod)","legendFormat":"{{pod}}"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill + down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":6},"id":2,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + node=~\"$node\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", + node=~\"$node\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + node=~\"$node\"})) by (pod) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", + node=~\"$node\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", + node=~\"$node\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + node=~\"$node\"})) by (pod) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", + node=~\"$node\"})) by (pod)","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"renameByName":{"Value + #A":"CPU Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU + Limits","Value #E":"CPU Limits %","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true,"stacking":{"mode":"normal"}},"unit":"bytes"},"overrides":[{"matcher":{"id":"byName","options":"max + capacity"},"properties":[{"id":"color","value":{"fixedColor":"red","mode":"fixed"}},{"id":"custom.stacking","value":{"mode":"none"}},{"id":"custom.hideFrom","value":{"legend":false,"tooltip":true,"viz":false}},{"id":"custom.lineStyle","value":{"dash":[10,10],"fill":"dash"}}]},{"matcher":{"id":"byName","options":"max + allocatable"},"properties":[{"id":"color","value":{"fixedColor":"super-light-red","mode":"fixed"}},{"id":"custom.stacking","value":{"mode":"none"}},{"id":"custom.hideFrom","value":{"legend":false,"tooltip":true,"viz":false}},{"id":"custom.lineStyle","value":{"dash":[10,10],"fill":"dash"}},{"id":"custom.fillOpacity","value":0}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":12},"id":3,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_node_status_capacity{cluster=\"$cluster\", + job=\"kube-state-metrics\", node=~\"$node\", resource=\"memory\"})","legendFormat":"max + capacity"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_node_status_allocatable{cluster=\"$cluster\", + job=\"kube-state-metrics\", node=~\"$node\", resource=\"memory\"})","legendFormat":"max + allocatable"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", + node=~\"$node\", container!=\"\"})) by (pod)","legendFormat":"{{pod}}"}],"title":"Memory + Usage (w/cache)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true,"stacking":{"mode":"normal"}},"unit":"bytes"},"overrides":[{"matcher":{"id":"byName","options":"max + capacity"},"properties":[{"id":"color","value":{"fixedColor":"red","mode":"fixed"}},{"id":"custom.stacking","value":{"mode":"none"}},{"id":"custom.hideFrom","value":{"legend":false,"tooltip":true,"viz":false}},{"id":"custom.lineStyle","value":{"dash":[10,10],"fill":"dash"}}]},{"matcher":{"id":"byName","options":"max + allocatable"},"properties":[{"id":"color","value":{"fixedColor":"super-light-red","mode":"fixed"}},{"id":"custom.stacking","value":{"mode":"none"}},{"id":"custom.hideFrom","value":{"legend":false,"tooltip":true,"viz":false}},{"id":"custom.lineStyle","value":{"dash":[10,10],"fill":"dash"}},{"id":"custom.fillOpacity","value":0}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":18},"id":4,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_node_status_capacity{cluster=\"$cluster\", + job=\"kube-state-metrics\", node=~\"$node\", resource=\"memory\"})","legendFormat":"max + capacity"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_node_status_allocatable{cluster=\"$cluster\", + job=\"kube-state-metrics\", node=~\"$node\", resource=\"memory\"})","legendFormat":"max + allocatable"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", + node=~\"$node\", container!=\"\"})) by (pod)","legendFormat":"{{pod}}"}],"title":"Memory + Usage (w/o cache)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill + down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":24},"id":5,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", + node=~\"$node\",container!=\"\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", + node=~\"$node\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", + node=~\"$node\",container!=\"\"})) by (pod) / sum(max by (cluster, namespace, + pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", + node=~\"$node\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", + node=~\"$node\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", + node=~\"$node\",container!=\"\"})) by (pod) / sum(max by (cluster, namespace, + pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", + node=~\"$node\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", + node=~\"$node\",container!=\"\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", + node=~\"$node\",container!=\"\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", + node=~\"$node\",container!=\"\"})) by (pod)","format":"table","instant":true}],"title":"Memory + Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"Time + 7":true,"Time 8":true},"renameByName":{"Value #A":"Memory Usage","Value #B":"Memory + Requests","Value #C":"Memory Requests %","Value #D":"Memory Limits","Value #E":"Memory + Limits %","Value #F":"Memory Usage (RSS)","Value #G":"Memory Usage (Cache)","Value + #H":"Memory Usage (Swap)","pod":"Pod"}}}],"type":"table"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kube-state-metrics\"}, + cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"node","multi":true,"name":"node","query":"label_values(kube_node_info{cluster=\"$cluster\"}, + node)","refresh":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Compute Resources / Node (Pods)","uid":"200ac8fdbfbb74b39aff88118e4d1c2c"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-k8s-resources-node + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-k8s-resources-pod.yaml b/monitoring/configmap-kube-prometheus-stack-k8s-resources-pod.yaml new file mode 100644 index 0000000..fb773e9 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-k8s-resources-pod.yaml @@ -0,0 +1,184 @@ +apiVersion: v1 +data: + k8s-resources-pod.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}},"overrides":[{"matcher":{"id":"byFrameRefID","options":"B"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"orange","mode":"fixed"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{namespace=\"$namespace\", + pod=\"$pod\", cluster=\"$cluster\", container!=\"\"})) by (container)","legendFormat":"__auto"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n","legendFormat":"requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n","legendFormat":"limits"}],"title":"CPU + Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"axisColorMode":"thresholds","axisSoftMax":1,"axisSoftMin":0,"fillOpacity":10,"showPoints":"never","spanNulls":true,"thresholdsStyle":{"mode":"dashed+area"}},"unit":"percentunit"},"overrides":[{"matcher":{"id":"byFrameRefID","options":"A"},"properties":[{"id":"thresholds","value":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":0.25}]}},{"id":"color","value":{"mode":"thresholds","seriesBy":"lastNotNull"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":2,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(increase(container_cpu_cfs_throttled_periods_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", + cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", + cluster=\"$cluster\"}[$__rate_interval])) by (container)","legendFormat":"__auto"}],"title":"CPU + Throttling","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":3,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{namespace=\"$namespace\", + pod=\"$pod\", cluster=\"$cluster\", container!=\"\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", + namespace=\"$namespace\", pod=\"$pod\", container!=\"\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + namespace=\"$namespace\", pod=\"$pod\", container!=\"\"})) by (container) / sum(max + by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", + namespace=\"$namespace\", pod=\"$pod\", container!=\"\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", + namespace=\"$namespace\", pod=\"$pod\", container!=\"\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + namespace=\"$namespace\", pod=\"$pod\", container!=\"\"})) by (container) / sum(max + by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", + namespace=\"$namespace\", pod=\"$pod\", container!=\"\"})) by (container)","format":"table","instant":true}],"title":"CPU + Quota","transformations":[{"id":"joinByField","options":{"byField":"container","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time + 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value + #C":8,"Value #D":9,"Value #E":10,"container":5},"renameByName":{"Value #A":"CPU + Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value + #E":"CPU Limits %","container":"Container"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"},"overrides":[{"matcher":{"id":"byFrameRefID","options":"B"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"orange","mode":"fixed"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":4,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + pod=\"$pod\", container!=\"\", image!=\"\"})) by (container)","legendFormat":"__auto"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n","legendFormat":"requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n","legendFormat":"limits"}],"title":"Memory + Usage (WSS)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":5,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + pod=\"$pod\", container!=\"\", image!=\"\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", + namespace=\"$namespace\", pod=\"$pod\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + pod=\"$pod\", image!=\"\"})) by (container) / sum(max by (cluster, namespace, + pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", + namespace=\"$namespace\", pod=\"$pod\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", + namespace=\"$namespace\", pod=\"$pod\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + pod=\"$pod\", container!=\"\", image!=\"\"})) by (container) / sum(max by (cluster, + namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", + namespace=\"$namespace\", pod=\"$pod\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_rss{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + pod=\"$pod\", container != \"\", container != \"POD\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_cache{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + pod=\"$pod\", container != \"\", container != \"POD\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max + by (cluster, namespace, pod, container)(container_memory_swap{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + pod=\"$pod\", container != \"\", container != \"POD\"})) by (container)","format":"table","instant":true}],"title":"Memory + Quota","transformations":[{"id":"joinByField","options":{"byField":"container","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"Time + 7":true,"Time 8":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time + 5":4,"Time 6":5,"Time 7":6,"Time 8":7,"Value #A":9,"Value #B":10,"Value #C":11,"Value + #D":12,"Value #E":13,"Value #F":14,"Value #G":15,"Value #H":16,"container":8},"renameByName":{"Value + #A":"Memory Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value + #D":"Memory Limits","Value #E":"Memory Limits %","Value #F":"Memory Usage (RSS)","Value + #G":"Memory Usage (Cache)","Value #H":"Memory Usage (Swap)","container":"Container"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":35},"id":6,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 + * irate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))) + by (pod)","legendFormat":"__auto"}],"title":"Receive Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":35},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 + * rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))) + by (pod)","legendFormat":"__auto"}],"title":"Transmit Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":0,"y":42},"id":8,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + pod=~\"$pod\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate + of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":12,"y":42},"id":9,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + pod=~\"$pod\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate + of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":0,"y":49},"id":10,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + pod=~\"$pod\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate + of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":12,"y":49},"id":11,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + pod=~\"$pod\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate + of Transmitted Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"iops"}},"gridPos":{"h":7,"w":12,"x":0,"y":56},"id":12,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"ceil(sum + by(pod) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))","legendFormat":"Reads"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"ceil(sum + by(pod) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))","legendFormat":"Writes"}],"title":"IOPS + (Pod)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"Bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":56},"id":13,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(pod) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))","legendFormat":"Reads"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(pod) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))","legendFormat":"Writes"}],"title":"ThroughPut + (Pod)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"iops"}},"gridPos":{"h":7,"w":12,"x":0,"y":63},"id":14,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"ceil(sum + by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))","legendFormat":"__auto"}],"title":"IOPS + (Containers)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"Bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":63},"id":15,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","legendFormat":"__auto"}],"title":"ThroughPut + (Containers)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/IOPS/"},"properties":[{"id":"unit","value":"iops"}]},{"matcher":{"id":"byRegexp","options":"/Throughput/"},"properties":[{"id":"unit","value":"Bps"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":70},"id":16,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(container) (rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\",device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(container) (rate(container_fs_reads_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + + rate(container_fs_writes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(container) (rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by(container) (rate(container_fs_reads_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + + rate(container_fs_writes_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", + container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true}],"title":"Current + Storage IO","transformations":[{"id":"joinByField","options":{"byField":"container","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true},"indexByName":{"Time + 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":7,"Value + #B":8,"Value #C":9,"Value #D":10,"Value #E":11,"Value #F":12,"container":6},"renameByName":{"Value + #A":"IOPS(Reads)","Value #B":"IOPS(Writes)","Value #C":"IOPS(Reads + Writes)","Value + #D":"Throughput(Read)","Value #E":"Throughput(Write)","Value #F":"Throughput(Read + + Write)","container":"Container"}}}],"type":"table"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kube-state-metrics\"}, + cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"namespace","name":"namespace","query":"label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", + cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"pod","name":"pod","query":"label_values(kube_pod_info{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\"}, pod)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Compute Resources / Pod","uid":"6581e46e4e5c7ba40a07646395ef7b23"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-k8s-resources-pod + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-k8s-resources-workload.yaml b/monitoring/configmap-kube-prometheus-stack-k8s-resources-workload.yaml new file mode 100644 index 0000000..c69a3ac --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-k8s-resources-workload.yaml @@ -0,0 +1,200 @@ +apiVersion: v1 +data: + k8s-resources-workload.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}}},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + namespace=\"$namespace\"})\n * on(cluster, namespace, pod)\n group_left(workload, + workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) + by (pod)\n","legendFormat":"__auto"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill + down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":2,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + namespace=\"$namespace\"})\n * on(cluster, namespace, pod)\n group_left(workload, + workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) + by (pod)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(kube_pod_container_resource_requests{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})\n * on(cluster, + namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) + by (pod)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + namespace=\"$namespace\"})\n * on(cluster, namespace, pod)\n group_left(workload, + workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) + by (pod)\n/sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_requests{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})\n * on(cluster, + namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) + by (pod)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(kube_pod_container_resource_limits{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})\n * on(cluster, + namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) + by (pod)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + namespace=\"$namespace\"})\n * on(cluster, namespace, pod)\n group_left(workload, + workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) + by (pod)\n/sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_limits{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})\n * on(cluster, + namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) + by (pod)\n","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time + 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value + #C":8,"Value #D":9,"Value #E":10,"pod":5},"renameByName":{"Value #A":"CPU Usage","Value + #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value + #E":"CPU Limits %","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":3,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{cluster=\"$cluster\", + namespace=\"$namespace\", container!=\"\", image!=\"\"})\n * on(cluster, namespace, + pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) + by (pod)\n","legendFormat":"__auto"}],"title":"Memory Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill + down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":4,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{cluster=\"$cluster\", + namespace=\"$namespace\", container!=\"\", image!=\"\"})\n * on(cluster, namespace, + pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) + by (pod)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(kube_pod_container_resource_requests{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})\n * on(cluster, + namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) + by (pod)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{cluster=\"$cluster\", + namespace=\"$namespace\", container!=\"\", image!=\"\"})\n * on(cluster, namespace, + pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) + by (pod)\n/sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_requests{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})\n * on(cluster, + namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) + by (pod)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(kube_pod_container_resource_limits{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})\n * on(cluster, + namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) + by (pod)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{cluster=\"$cluster\", + namespace=\"$namespace\", container!=\"\", image!=\"\"})\n * on(cluster, namespace, + pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) + by (pod)\n/sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_limits{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})\n * on(cluster, + namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) + by (pod)\n","format":"table","instant":true}],"title":"Memory Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time + 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":9,"Value #B":10,"Value + #C":11,"Value #D":12,"Value #E":13,"pod":8},"renameByName":{"Value #A":"Memory + Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value #D":"Memory + Limits","Value #E":"Memory Limits %","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/Bandwidth/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byRegexp","options":"/Packets/"},"properties":[{"id":"unit","value":"pps"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill + down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":5,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum((8 + * rate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) + by (pod))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum((8 + * rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) + by (pod))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_receive_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) + by (pod))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_transmit_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) + by (pod))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_receive_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) + by (pod))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_transmit_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) + by (pod))\n","format":"table","instant":true}],"title":"Current Network Usage","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true},"indexByName":{"Time + 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":7,"Value + #B":8,"Value #C":9,"Value #D":10,"Value #E":11,"Value #F":12,"pod":6},"renameByName":{"Value + #A":"Current Receive Bandwidth","Value #B":"Current Transmit Bandwidth","Value + #C":"Rate of Received Packets","Value #D":"Rate of Transmitted Packets","Value + #E":"Rate of Received Packets Dropped","Value #F":"Rate of Transmitted Packets + Dropped","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":35},"id":6,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum((8 + * rate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) + by (pod))\n","legendFormat":"__auto"}],"title":"Receive Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":35},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum((8 + * rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) + by (pod))\n","legendFormat":"__auto"}],"title":"Transmit Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":42},"id":8,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(avg((8 + * rate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) + by (pod))\n","legendFormat":"__auto"}],"title":"Average Container Bandwidth by + Pod: Received","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":42},"id":9,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(avg((8 + * rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) + by (pod))\n","legendFormat":"__auto"}],"title":"Average Container Bandwidth by + Pod: Transmitted","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":0,"y":49},"id":10,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_receive_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) + by (pod))\n","legendFormat":"__auto"}],"title":"Rate of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":12,"y":49},"id":11,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_transmit_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) + by (pod))\n","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":0,"y":56},"id":12,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_receive_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) + by (pod))\n","legendFormat":"__auto"}],"title":"Rate of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":12,"y":56},"id":13,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_transmit_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) + by (pod))\n","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets Dropped","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kube-state-metrics\"}, + cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"namespace","name":"namespace","query":"label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", + cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"workload_type","name":"type","query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\"}, workload_type)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"workload","name":"workload","query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}, workload)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Compute Resources / Workload","uid":"a164a7f0339f99e89cea5cb47e9be617"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-k8s-resources-workload + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-k8s-resources-workloads-namespace.yaml b/monitoring/configmap-kube-prometheus-stack-k8s-resources-workloads-namespace.yaml new file mode 100644 index 0000000..892f432 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-k8s-resources-workloads-namespace.yaml @@ -0,0 +1,208 @@ +apiVersion: v1 +data: + k8s-resources-workloads-namespace.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}},"overrides":[{"matcher":{"id":"byFrameRefID","options":"B"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"orange","mode":"fixed"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + namespace=\"$namespace\"})\n* on(cluster, namespace, pod)\n group_left(workload, + workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","legendFormat":"{{workload}} + - {{workload_type}}"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\", + namespace=\"$namespace\", type=\"hard\",resource=~\"requests.cpu|cpu\"}))","legendFormat":"quota + - requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\", + namespace=\"$namespace\", type=\"hard\",resource=~\"limits.cpu\"}))","legendFormat":"quota + - limits"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Workload"},"properties":[{"id":"links","value":[{"title":"Drill + down to workloads","url":"/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-type=${__data.fields.Type}&var-workload=${__data.fields.Workload}"}]}]},{"matcher":{"id":"byName","options":"Running + Pods"},"properties":[{"id":"unit","value":"none"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":2,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + namespace=\"$namespace\"})\n* on(cluster, namespace, pod)\n group_left(workload, + workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(kube_pod_container_resource_requests{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})\n* on(cluster, + namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + namespace=\"$namespace\"})\n* on(cluster, namespace, pod)\n group_left(workload, + workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n max + by (cluster, namespace, pod, container)(kube_pod_container_resource_requests{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})\n* on(cluster, + namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(kube_pod_container_resource_limits{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})\n* on(cluster, + namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", + namespace=\"$namespace\"})\n* on(cluster, namespace, pod)\n group_left(workload, + workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n max + by (cluster, namespace, pod, container)(kube_pod_container_resource_limits{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})\n* on(cluster, + namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true}],"title":"CPU + Quota","transformations":[{"id":"joinByField","options":{"byField":"workload","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"workload_type + 2":true,"workload_type 3":true,"workload_type 4":true,"workload_type 5":true,"workload_type + 6":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time + 6":5,"Value #A":8,"Value #B":9,"Value #C":10,"Value #D":11,"Value #E":12,"Value + #F":13,"workload":6,"workload_type 1":7,"workload_type 2":14,"workload_type 3":15,"workload_type + 4":16,"workload_type 5":17,"workload_type 6":18},"renameByName":{"Value #A":"Running + Pods","Value #B":"CPU Usage","Value #C":"CPU Requests","Value #D":"CPU Requests + %","Value #E":"CPU Limits","Value #F":"CPU Limits %","workload":"Workload","workload_type + 1":"Type"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"},"overrides":[{"matcher":{"id":"byFrameRefID","options":"B"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"orange","mode":"fixed"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":3,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + container!=\"\", image!=\"\"})\n * on(cluster, namespace, pod)\n group_left(workload, + workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","legendFormat":"{{workload}} + - {{workload_type}}"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\", + namespace=\"$namespace\", type=\"hard\",resource=~\"requests.memory|memory\"}))","legendFormat":"quota + - requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\", + namespace=\"$namespace\", type=\"hard\",resource=~\"limits.memory\"}))","legendFormat":"quota + - limits"}],"title":"Memory Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Workload"},"properties":[{"id":"links","value":[{"title":"Drill + down to workloads","url":"/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-type=${__data.fields.Type}&var-workload=${__data.fields.Workload}"}]}]},{"matcher":{"id":"byName","options":"Running + Pods"},"properties":[{"id":"unit","value":"none"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":4,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + container!=\"\", image!=\"\"})\n * on(cluster, namespace, pod)\n group_left(workload, + workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(kube_pod_container_resource_requests{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})\n* on(cluster, + namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + container!=\"\", image!=\"\"})\n * on(cluster, namespace, pod)\n group_left(workload, + workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n max + by (cluster, namespace, pod, container)(kube_pod_container_resource_requests{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})\n* on(cluster, + namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(kube_pod_container_resource_limits{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})\n* on(cluster, + namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max + by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", + container!=\"\", image!=\"\"})\n * on(cluster, namespace, pod)\n group_left(workload, + workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n max + by (cluster, namespace, pod, container)(kube_pod_container_resource_limits{job=\"kube-state-metrics\", + cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})\n* on(cluster, + namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true}],"title":"Memory + Quota","transformations":[{"id":"joinByField","options":{"byField":"workload","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"workload_type + 2":true,"workload_type 3":true,"workload_type 4":true,"workload_type 5":true,"workload_type + 6":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time + 6":5,"Value #A":8,"Value #B":9,"Value #C":10,"Value #D":11,"Value #E":12,"Value + #F":13,"workload":6,"workload_type 1":7,"workload_type 2":14,"workload_type 3":15,"workload_type + 4":16,"workload_type 5":17,"workload_type 6":18},"renameByName":{"Value #A":"Running + Pods","Value #B":"Memory Usage","Value #C":"Memory Requests","Value #D":"Memory + Requests %","Value #E":"Memory Limits","Value #F":"Memory Limits %","workload":"Workload","workload_type + 1":"Type"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/Bandwidth/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byRegexp","options":"/Packets/"},"properties":[{"id":"unit","value":"pps"}]},{"matcher":{"id":"byName","options":"Workload"},"properties":[{"id":"links","value":[{"title":"Drill + down to workloads","url":"/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-type=${__data.fields.Type}&var-workload=${__data.fields.Workload}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":5,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum((8 + * rate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum((8 + * rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_receive_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_transmit_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_receive_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_transmit_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true}],"title":"Current + Network Usage","transformations":[{"id":"joinByField","options":{"byField":"workload","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true},"indexByName":{"Time + 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":7,"Value + #B":8,"Value #C":9,"Value #D":10,"Value #E":11,"Value #F":12,"workload":6},"renameByName":{"Value + #A":"Current Receive Bandwidth","Value #B":"Current Transmit Bandwidth","Value + #C":"Rate of Received Packets","Value #D":"Rate of Transmitted Packets","Value + #E":"Rate of Received Packets Dropped","Value #F":"Rate of Transmitted Packets + Dropped","workload":"Workload"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":35},"id":6,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum((8 + * rate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Receive + Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":35},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum((8 + * rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Transmit + Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":42},"id":8,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(avg((8 + * rate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Average + Container Bandwidth by Workload: Received","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":42},"id":9,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(avg((8 + * rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Average + Container Bandwidth by Workload: Transmitted","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":0,"y":49},"id":10,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_receive_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Rate + of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":12,"y":49},"id":11,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_transmit_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Rate + of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":0,"y":56},"id":12,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_receive_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Rate + of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":12,"y":56},"id":13,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_transmit_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Rate + of Transmitted Packets Dropped","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kube-state-metrics\"}, + cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"namespace","name":"namespace","query":"label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", + cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"workload_type","name":"type","query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\".+\"}, workload_type)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Compute Resources / Namespace (Workloads)","uid":"a87fb0d919ec0ea5f6543124e16c42a5"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-k8s-resources-workloads-namespace + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-kubelet.yaml b/monitoring/configmap-kube-prometheus-stack-kubelet.yaml new file mode 100644 index 0000000..ea6e319 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-kubelet.yaml @@ -0,0 +1,117 @@ +apiVersion: v1 +data: + kubelet.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":0,"y":0},"id":1,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kubelet_node_name{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\"})","instant":true}],"title":"Running + Kubelets","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":4,"y":0},"id":2,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kubelet_running_pods{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})","instant":true}],"title":"Running + Pods","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":8,"y":0},"id":3,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kubelet_running_containers{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"})","instant":true}],"title":"Running + Containers","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":12,"y":0},"id":4,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(volume_manager_total_volumes{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})","instant":true}],"title":"Actual + Volume Count","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":16,"y":0},"id":5,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(volume_manager_total_volumes{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})","instant":true}],"title":"Desired + Volume Count","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":20,"y":0},"id":6,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubelet_node_config_error{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval]))","instant":true}],"title":"Config + Error Count","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":7},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (operation_type, + instance)","legendFormat":"{{instance}} {{operation_type}}"}],"title":"Operation + Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":12,"y":7},"id":8,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, + operation_type)","legendFormat":"{{instance}} {{operation_type}}"}],"title":"Operation + Error Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":9,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, + operation_type, le))","legendFormat":"{{instance}} {{operation_type}}"}],"title":"Operation + Duration 99th quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":21},"id":10,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)","legendFormat":"{{instance}} + pod"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)","legendFormat":"{{instance}} + worker"}],"title":"Pod Start Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":12,"x":12,"y":21},"id":11,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(kubelet_pod_start_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, + le))","legendFormat":"{{instance}} pod"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, + le))","legendFormat":"{{instance}} worker"}],"title":"Pod Start Duration","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":28},"id":12,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, + operation_name, volume_plugin)","legendFormat":"{{instance}} {{operation_name}} + {{volume_plugin}}"}],"title":"Storage Operation Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":12,"y":28},"id":13,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, + operation_name, volume_plugin)","legendFormat":"{{instance}} {{operation_name}} + {{volume_plugin}}"}],"title":"Storage Operation Error Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":35},"id":14,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", + metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, + operation_name, volume_plugin, le))","legendFormat":"{{instance}} {{operation_name}} + {{volume_plugin}}"}],"title":"Storage Operation Duration 99th quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":42},"id":15,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) + by (instance, operation_type)","legendFormat":"{{operation_type}}"}],"title":"Cgroup + manager operation rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":12,"x":12,"y":42},"id":16,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) + by (instance, operation_type, le))","legendFormat":"{{instance}} {{operation_type}}"}],"title":"Cgroup + manager 99th quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":49},"id":17,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) + by (instance)","legendFormat":"{{instance}}"}],"title":"PLEG relist rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":12,"x":12,"y":49},"id":18,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, + le))","legendFormat":"{{instance}}"}],"title":"PLEG relist interval","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":56},"id":19,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, + le))","legendFormat":"{{instance}}"}],"title":"PLEG relist duration","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":24,"x":0,"y":63},"id":20,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))","legendFormat":"2xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))","legendFormat":"3xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))","legendFormat":"4xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))","legendFormat":"5xx"}],"title":"RPC + rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":70},"id":21,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, + verb, le))","legendFormat":"{{instance}} {{verb}}"}],"title":"Request duration + 99th quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":0,"y":77},"id":22,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\",instance=~\"$instance\"}","legendFormat":"{{instance}}"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":8,"y":77},"id":23,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])","legendFormat":"{{instance}}"}],"title":"CPU + usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":16,"y":77},"id":24,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"go_goroutines{cluster=\"$cluster\",job=\"kubelet\", + metrics_path=\"/metrics\",instance=~\"$instance\"}","legendFormat":"{{instance}}"}],"title":"Goroutines","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kubelet\", + metrics_path=\"/metrics\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"instance","name":"instance","query":"label_values(up{job=\"kubelet\", + metrics_path=\"/metrics\",cluster=\"$cluster\"}, instance)","refresh":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Kubelet","uid":"3138fa155d5915769fbded898ac09fd9"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-kubelet + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-namespace-by-pod.yaml b/monitoring/configmap-kube-prometheus-stack-namespace-by-pod.yaml new file mode 100644 index 0000000..5538cb8 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-namespace-by-pod.yaml @@ -0,0 +1,93 @@ +apiVersion: v1 +data: + namespace-by-pod.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"displayName":"$namespace","max":10000000000,"min":0,"thresholds":{"steps":[{"color":"dark-green","index":0,"value":null},{"color":"dark-yellow","index":1,"value":5000000000},{"color":"dark-red","index":2,"value":7000000000}]},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":0},"id":1,"interval":"1m","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Current + Rate of Bits Received","type":"gauge"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"displayName":"$namespace","max":10000000000,"min":0,"thresholds":{"steps":[{"color":"dark-green","index":0,"value":null},{"color":"dark-yellow","index":1,"value":5000000000},{"color":"dark-red","index":2,"value":7000000000}]},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":0},"id":2,"interval":"1m","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Current + Rate of Bits Transmitted","type":"gauge"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/Bandwidth/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byRegexp","options":"/Packets/"},"properties":[{"id":"unit","value":"pps"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill + down","url":"/d/7a18067ce943a40ae25454675c19ff5c/kubernetes-networking-pod?${datasource:queryparam}&var-cluster=${cluster}&var-namespace=${namespace}&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":9,"w":24,"x":0,"y":9},"id":3,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (pod) (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (pod) (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (pod) (\n rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (pod) (\n rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (pod) (\n rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (pod) (\n rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","format":"table","instant":true}],"title":"Current + Network Usage","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true},"indexByName":{"Time + 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":7,"Value + #B":8,"Value #C":9,"Value #D":10,"Value #E":11,"Value #F":12,"pod":6},"renameByName":{"Value + #A":"Current Receive Bandwidth","Value #B":"Current Transmit Bandwidth","Value + #C":"Rate of Received Packets","Value #D":"Rate of Transmitted Packets","Value + #E":"Rate of Received Packets Dropped","Value #F":"Rate of Transmitted Packets + Dropped","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":18},"id":4,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (pod) (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Receive + Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":18},"id":5,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (pod) (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Transmit + Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":27},"id":6,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (pod) (\n rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Rate + of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":27},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (pod) (\n rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Rate + of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":36},"id":8,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (pod) (\n rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Rate + of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":36},"id":9,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + by (pod) (\n rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Rate + of Transmitted Packets Dropped","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"allValue":".+","current":{"selected":false,"text":"kube-system","value":"kube-system"},"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"namespace","name":"namespace","query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, + namespace)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Networking / Namespace (Pods)","uid":"8b7a8b326d7a6f1f04244066368c67af"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-namespace-by-pod + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-namespace-by-workload.yaml b/monitoring/configmap-kube-prometheus-stack-namespace-by-workload.yaml new file mode 100644 index 0000000..bec2c9b --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-namespace-by-workload.yaml @@ -0,0 +1,138 @@ +apiVersion: v1 +data: + namespace-by-workload.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"color":{"fixedColor":"green","mode":"fixed"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":0},"id":1,"interval":"1m","options":{"displayMode":"basic","showUnfilled":false},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum((8 + * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n* + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* + on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Current + Rate of Bits Received","type":"bargauge"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"color":{"fixedColor":"green","mode":"fixed"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":0},"id":2,"interval":"1m","options":{"displayMode":"basic","showUnfilled":false},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum((8 + * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n* + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* + on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Current + Rate of Bits Transmitted","type":"bargauge"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/Bits/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byRegexp","options":"/Packets/"},"properties":[{"id":"unit","value":"pps"}]},{"matcher":{"id":"byName","options":"Workload"},"properties":[{"id":"links","value":[{"title":"Drill + down","url":"/d/728bf77cc1166d2f3133bf25846876cc/kubernetes-networking-workload?${datasource:queryparam}&var-cluster=${cluster}&var-namespace=${namespace}&var-type=${__data.fields.Type}&var-workload=${__data.fields.Workload}"}]}]}]},"gridPos":{"h":9,"w":24,"x":0,"y":9},"id":3,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(\n sum + by (workload, workload_type) (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n * + on (cluster, namespace, pod) group_left\n kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"}\n * + on (cluster, namespace, pod) group_left (workload, workload_type)\n namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(\n sum + by (workload, workload_type) (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n * + on (cluster, namespace, pod) group_left\n kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"}\n * + on (cluster, namespace, pod) group_left (workload, workload_type)\n namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(\n avg + by (workload, workload_type) (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n * + on (cluster, namespace, pod) group_left\n kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"}\n * + on (cluster, namespace, pod) group_left (workload, workload_type)\n namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(\n avg + by (workload, workload_type) (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n * + on (cluster, namespace, pod) group_left\n kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"}\n * + on (cluster, namespace, pod) group_left (workload, workload_type)\n namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(\n sum + by (workload, workload_type) (\n (1 * rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n * + on (cluster, namespace, pod) group_left\n kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"}\n * + on (cluster, namespace, pod) group_left (workload, workload_type)\n namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(\n sum + by (workload, workload_type) (\n (1 * rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n * + on (cluster, namespace, pod) group_left\n kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"}\n * + on (cluster, namespace, pod) group_left (workload, workload_type)\n namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(\n sum + by (workload, workload_type) (\n (1 * rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n * + on (cluster, namespace, pod) group_left\n kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"}\n * + on (cluster, namespace, pod) group_left (workload, workload_type)\n namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(\n sum + by (workload, workload_type) (\n (1 * rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n * + on (cluster, namespace, pod) group_left\n kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"}\n * + on (cluster, namespace, pod) group_left (workload, workload_type)\n namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}\n )\n)\n","format":"table","instant":true}],"title":"Current + Status","transformations":[{"id":"joinByField","options":{"byField":"workload","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time + 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"Time + 7":true,"Time 8":true,"workload_type 2":true,"workload_type 3":true,"workload_type + 4":true,"workload_type 5":true,"workload_type 6":true,"workload_type 7":true,"workload_type + 8":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time + 6":5,"Time 7":6,"Time 8":7,"Value #A":10,"Value #B":11,"Value #C":12,"Value #D":13,"Value + #E":14,"Value #F":15,"Value #G":16,"Value #H":17,"workload":8,"workload_type 1":9,"workload_type + 2":18,"workload_type 3":19,"workload_type 4":20,"workload_type 5":21,"workload_type + 6":22,"workload_type 7":23,"workload_type 8":24},"renameByName":{"Value #A":"Rx + Bits","Value #B":"Tx Bits","Value #C":"Rx Bits (Avg)","Value #D":"Tx Bits (Avg)","Value + #E":"Rx Packets","Value #F":"Tx Packets","Value #G":"Rx Packets Dropped","Value + #H":"Tx Packets Dropped","workload":"Workload","workload_type 1":"Type"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":18},"id":4,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum((8 + * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n* + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* + on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Receive + Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":18},"id":5,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum((8 + * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n* + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* + on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Transmit + Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":27},"id":6,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(avg((8 + * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n* + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* + on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Average + Container Bandwidth by Workload: Received","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":27},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(avg((8 + * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n* + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* + on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Average + Container Bandwidth by Workload: Transmitted","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":36},"id":8,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum(rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* + on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Rate + of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":36},"id":9,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum(rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* + on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Rate + of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":45},"id":10,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum(rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* + on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Rate + of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":45},"id":11,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum(rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* + on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) + (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* + on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", + workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Rate + of Transmitted Packets Dropped","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"current":{"selected":false,"text":"kube-system","value":"kube-system"},"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"namespace","name":"namespace","query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, + namespace)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"workload_type","name":"type","query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=\"$namespace\", workload=~\".+\"}, workload_type)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Networking / Namespace (Workload)","uid":"bbb2a765a623ae38130206c7d94a160f"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-namespace-by-workload + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-node-cluster-rsrc-use.yaml b/monitoring/configmap-kube-prometheus-stack-node-cluster-rsrc-use.yaml new file mode 100644 index 0000000..5543fba --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-node-cluster-rsrc-use.yaml @@ -0,0 +1,54 @@ +apiVersion: v1 +data: + node-cluster-rsrc-use.json: '{"graphTooltip":1,"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"CPU","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":1},"id":2,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"((\n instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", + cluster=~\"$cluster\"}\n *\n instance:node_num_cpu:sum{job=\"node-exporter\", + cluster=~\"$cluster\"}\n) != 0 )\n/ scalar(sum(instance:node_num_cpu:sum{job=\"node-exporter\", + cluster=~\"$cluster\"}))\n","legendFormat":"{{ instance }}"}],"title":"CPU Utilisation","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":12,"y":1},"id":3,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n instance:node_load1_per_cpu:ratio{job=\"node-exporter\", + cluster=~\"$cluster\"}\n / scalar(count(instance:node_load1_per_cpu:ratio{job=\"node-exporter\", + cluster=~\"$cluster\"}))\n) != 0\n","legendFormat":"{{ instance }}"}],"title":"CPU + Saturation (Load1 per CPU)","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":8},"id":4,"panels":[],"title":"Memory","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":9},"id":5,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n instance:node_memory_utilisation:ratio{job=\"node-exporter\", + cluster=~\"$cluster\"}\n / scalar(count(instance:node_memory_utilisation:ratio{job=\"node-exporter\", + cluster=~\"$cluster\"}))\n) != 0\n","legendFormat":"{{ instance }}"}],"title":"Memory + Utilisation","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"rds"}},"gridPos":{"h":7,"w":12,"x":12,"y":9},"id":6,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", + cluster=~\"$cluster\"}","legendFormat":"{{ instance }}"}],"title":"Memory Saturation + (Major Page Faults)","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":16},"id":7,"panels":[],"title":"Network","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"Bps"},"overrides":[{"matcher":{"id":"byRegexp","options":"/Transmit/"},"properties":[{"id":"custom.transform","value":"negative-Y"}]}]},"gridPos":{"h":7,"w":12,"x":0,"y":17},"id":8,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\", + cluster=~\"$cluster\"} != 0","legendFormat":"{{ instance }} Receive"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\", + cluster=~\"$cluster\"} != 0","legendFormat":"{{ instance }} Transmit"}],"title":"Network + Utilisation (Bytes Receive/Transmit)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"Bps"},"overrides":[{"matcher":{"id":"byRegexp","options":"/Transmit/"},"properties":[{"id":"custom.transform","value":"negative-Y"}]}]},"gridPos":{"h":7,"w":12,"x":12,"y":17},"id":9,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\", + cluster=~\"$cluster\"} != 0","legendFormat":"{{ instance }} Receive"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\", + cluster=~\"$cluster\"} != 0","legendFormat":"{{ instance }} Transmit"}],"title":"Network + Saturation (Drops Receive/Transmit)","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":24},"id":10,"panels":[],"title":"Disk + IO","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":25},"id":11,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", + cluster=~\"$cluster\"}\n/ scalar(count(instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", + cluster=~\"$cluster\"}))\n","legendFormat":"{{ instance }} {{device}}"}],"title":"Disk + IO Utilisation","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":12,"y":25},"id":12,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", + cluster=~\"$cluster\"}\n/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", + cluster=~\"$cluster\"}))\n","legendFormat":"{{ instance }} {{device}}"}],"title":"Disk + IO Saturation","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":34},"id":13,"panels":[],"title":"Disk + Space","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":24,"x":0,"y":35},"id":14,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum + without (device) (\n max without (fstype, mountpoint) ((\n node_filesystem_size_bytes{job=\"node-exporter\", + fstype!=\"\", mountpoint!=\"\", cluster=~\"$cluster\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", + fstype!=\"\", mountpoint!=\"\", cluster=~\"$cluster\"}\n ) != 0)\n)\n/ scalar(sum(max + without (fstype, mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", + fstype!=\"\", mountpoint!=\"\", cluster=~\"$cluster\"})))\n","legendFormat":"{{ + instance }}"}],"title":"Disk Space Utilisation","type":"timeseries"}],"refresh":"30s","schemaVersion":39,"tags":["node-exporter-mixin"],"templating":{"list":[{"name":"datasource","query":"prometheus","type":"datasource"},{"allValue":".*","datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"includeAll":true,"name":"cluster","query":"label_values(node_time_seconds, + cluster)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Node Exporter / USE Method / Cluster","uid":"3e97d1d02672cdd0861f4c97c64f89b2"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-node-cluster-rsrc-use + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-node-rsrc-use.yaml b/monitoring/configmap-kube-prometheus-stack-node-rsrc-use.yaml new file mode 100644 index 0000000..e38aca3 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-node-rsrc-use.yaml @@ -0,0 +1,49 @@ +apiVersion: v1 +data: + node-rsrc-use.json: '{"graphTooltip":1,"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"CPU","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":1},"id":2,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"Utilisation"}],"title":"CPU + Utilisation","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":12,"y":1},"id":3,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_load1_per_cpu:ratio{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"Saturation"}],"title":"CPU + Saturation (Load1 per CPU)","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":8},"id":4,"panels":[],"title":"Memory","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":9},"id":5,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_memory_utilisation:ratio{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"Utilisation"}],"title":"Memory + Utilisation","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"rds"}},"gridPos":{"h":7,"w":12,"x":12,"y":9},"id":6,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"Major page + Faults"}],"title":"Memory Saturation (Major Page Faults)","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":16},"id":7,"panels":[],"title":"Network","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"Bps"},"overrides":[{"matcher":{"id":"byRegexp","options":"/Transmit/"},"properties":[{"id":"custom.transform","value":"negative-Y"}]}]},"gridPos":{"h":7,"w":12,"x":0,"y":17},"id":8,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_network_receive_bytes_physical:rate5m{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"Receive"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_network_transmit_bytes_physical:rate5m{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"Transmit"}],"title":"Network + Utilisation (Bytes Receive/Transmit)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"Bps"},"overrides":[{"matcher":{"id":"byRegexp","options":"/Transmit/"},"properties":[{"id":"custom.transform","value":"negative-Y"}]}]},"gridPos":{"h":7,"w":12,"x":12,"y":17},"id":9,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_network_receive_drop_physical:rate5m{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"Receive"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_network_transmit_drop_physical:rate5m{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"Transmit"}],"title":"Network + Saturation (Drops Receive/Transmit)","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":24},"id":10,"panels":[],"title":"Disk + IO","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":25},"id":11,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"{{device}}"}],"title":"Disk + IO Utilisation","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":12,"y":25},"id":12,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"{{device}}"}],"title":"Disk + IO Saturation","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":34},"id":13,"panels":[],"title":"Disk + Space","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":24,"x":0,"y":35},"id":14,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sort_desc(1 + -\n (\n max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"node-exporter\", + fstype!=\"\", instance=\"$instance\", cluster=~\"$cluster\"})\n /\n max + without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"node-exporter\", + fstype!=\"\", instance=\"$instance\", cluster=~\"$cluster\"})\n ) != 0\n)\n","legendFormat":"{{device}}"}],"title":"Disk + Space Utilisation","type":"timeseries"}],"refresh":"30s","schemaVersion":39,"tags":["node-exporter-mixin"],"templating":{"list":[{"name":"datasource","query":"prometheus","type":"datasource"},{"allValue":".*","datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"includeAll":true,"name":"cluster","query":"label_values(node_time_seconds, + cluster)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"name":"instance","query":"label_values(node_exporter_build_info{job=\"node-exporter\", + cluster=~\"$cluster\"}, instance)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Node Exporter / USE Method / Node","uid":"fac67cfbe174d3ef53eb473d73d9212f"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-node-rsrc-use + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-nodes-aix.yaml b/monitoring/configmap-kube-prometheus-stack-nodes-aix.yaml new file mode 100644 index 0000000..6795e5a --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-nodes-aix.yaml @@ -0,0 +1,70 @@ +apiVersion: v1 +data: + nodes-aix.json: '{"graphTooltip":1,"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"CPU","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"normal"}},"max":1,"min":0,"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":1},"id":2,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n (1 + - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", + instance=\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])))\n/ ignoring(cpu) + group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", + mode=\"idle\", instance=\"$instance\", cluster=~\"$cluster\"})\n)\n","intervalFactor":5,"legendFormat":"{{cpu}}"}],"title":"CPU + Usage","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":1},"id":3,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load1{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"1m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load5{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"5m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load15{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"15m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"count(node_cpu_seconds_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", mode=\"idle\"})","legendFormat":"logical + cores"}],"title":"Load Average","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":8},"id":4,"title":"Memory","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"none"}},"min":0,"unit":"bytes"}},"gridPos":{"h":7,"w":18,"x":0,"y":9},"id":5,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_memory_total_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"Physical Memory"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n node_memory_total_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"} -\n node_memory_available_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}\n)\n","legendFormat":"Memory Used"}],"title":"Memory + Usage","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"max":100,"min":0,"thresholds":{"steps":[{"color":"rgba(50, + 172, 45, 0.97)"},{"color":"rgba(237, 129, 40, 0.89)","value":80},{"color":"rgba(245, + 54, 54, 0.9)","value":90}]},"unit":"percent"}},"gridPos":{"h":7,"w":6,"x":18,"y":9},"id":6,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"100 + -\n(\n avg(node_memory_available_bytes{job=\"node-exporter\", instance=\"$instance\", + cluster=~\"$cluster\"}) /\n avg(node_memory_total_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"})\n * 100\n)\n"}],"title":"Memory + Usage","type":"gauge"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":18},"id":7,"panels":[],"title":"Disk","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0},"overrides":[{"matcher":{"id":"byRegexp","options":"/ + read| written/"},"properties":[{"id":"unit","value":"Bps"}]},{"matcher":{"id":"byRegexp","options":"/ + io time/"},"properties":[{"id":"unit","value":"percentunit"}]}]},"gridPos":{"h":7,"w":12,"x":0,"y":19},"id":8,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_read_bytes_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} + read"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_written_bytes_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} + written"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_io_time_seconds_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} + io time"}],"title":"Disk I/O","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"thresholds":{"steps":[{"color":"green"},{"color":"yellow","value":0.8},{"color":"red","value":0.9}]},"unit":"decbytes"},"overrides":[{"matcher":{"id":"byName","options":"Mounted + on"},"properties":[{"id":"custom.width","value":260}]},{"matcher":{"id":"byName","options":"Size"},"properties":[{"id":"custom.width","value":93}]},{"matcher":{"id":"byName","options":"Used"},"properties":[{"id":"custom.width","value":72}]},{"matcher":{"id":"byName","options":"Available"},"properties":[{"id":"custom.width","value":88}]},{"matcher":{"id":"byName","options":"Used, + %"},"properties":[{"id":"unit","value":"percentunit"},{"id":"custom.cellOptions","value":{"type":"gauge"}},{"id":"max","value":1},{"id":"min","value":0}]}]},"gridPos":{"h":7,"w":12,"x":12,"y":19},"id":9,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"max + by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", + cluster=~\"$cluster\", fstype!=\"\", mountpoint!=\"\"})\n","format":"table","instant":true,"legendFormat":""},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"max + by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", + cluster=~\"$cluster\", fstype!=\"\", mountpoint!=\"\"})\n","format":"table","instant":true,"legendFormat":""}],"title":"Disk + Space Usage","transformations":[{"id":"groupBy","options":{"fields":{"Value #A":{"aggregations":["lastNotNull"],"operation":"aggregate"},"Value + #B":{"aggregations":["lastNotNull"],"operation":"aggregate"},"mountpoint":{"aggregations":[],"operation":"groupby"}}}},{"id":"merge"},{"id":"calculateField","options":{"alias":"Used","binary":{"left":"Value + #A (lastNotNull)","operator":"-","reducer":"sum","right":"Value #B (lastNotNull)"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"calculateField","options":{"alias":"Used, + %","binary":{"left":"Used","operator":"/","reducer":"sum","right":"Value #A (lastNotNull)"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"organize","options":{"excludeByName":{},"indexByName":{},"renameByName":{"Value + #A (lastNotNull)":"Size","Value #B (lastNotNull)":"Available","mountpoint":"Mounted + on"}}},{"id":"sortBy","options":{"fields":{},"sort":[{"field":"Mounted on"}]}}],"type":"table"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":26},"id":10,"panels":[],"title":"Network","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"description":"Network + received (bits/s)","fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0,"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":27},"id":11,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_network_receive_bytes_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", device!=\"lo\"}[$__rate_interval]) + * 8","intervalFactor":1,"legendFormat":"{{device}}"}],"title":"Network Received","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"description":"Network + transmitted (bits/s)","fieldConfig":{"defaults":{"custom":{"fillOpacity":0},"min":0,"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":27},"id":12,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_network_transmit_bytes_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", device!=\"lo\"}[$__rate_interval]) + * 8","intervalFactor":1,"legendFormat":"{{device}}"}],"title":"Network Transmitted","type":"timeseries"}],"refresh":"30s","schemaVersion":39,"tags":["node-exporter-mixin"],"templating":{"list":[{"name":"datasource","query":"prometheus","type":"datasource"},{"allValue":".*","datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"includeAll":true,"label":"Cluster","name":"cluster","query":"label_values(node_uname_info{job=\"node-exporter\", + sysname!=\"Darwin\"}, cluster)","refresh":2,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"label":"Instance","name":"instance","query":"label_values(node_uname_info{job=\"node-exporter\", + cluster=~\"$cluster\", sysname!=\"Darwin\"}, instance)","refresh":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Node Exporter / AIX","uid":"7e0a61e486f727d763fb1d86fdd629c2"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-nodes-aix + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-nodes-darwin.yaml b/monitoring/configmap-kube-prometheus-stack-nodes-darwin.yaml new file mode 100644 index 0000000..e508d6c --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-nodes-darwin.yaml @@ -0,0 +1,78 @@ +apiVersion: v1 +data: + nodes-darwin.json: '{"graphTooltip":1,"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"CPU","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"normal"}},"max":1,"min":0,"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":1},"id":2,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n (1 + - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", + instance=\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])))\n/ ignoring(cpu) + group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", + mode=\"idle\", instance=\"$instance\", cluster=~\"$cluster\"})\n)\n","intervalFactor":5,"legendFormat":"{{cpu}}"}],"title":"CPU + Usage","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":1},"id":3,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load1{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"1m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load5{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"5m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load15{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"15m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"count(node_cpu_seconds_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", mode=\"idle\"})","legendFormat":"logical + cores"}],"title":"Load Average","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":8},"id":4,"title":"Memory","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"none"}},"min":0,"unit":"bytes"}},"gridPos":{"h":7,"w":18,"x":0,"y":9},"id":5,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_memory_total_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"Physical Memory"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n node_memory_internal_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"} -\n node_memory_purgeable_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"} +\n node_memory_wired_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"} +\n node_memory_compressed_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}\n)\n","legendFormat":"Memory Used"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n node_memory_internal_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"} -\n node_memory_purgeable_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}\n)\n","legendFormat":"App Memory"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_memory_wired_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"Wired Memory"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_memory_compressed_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"Compressed"}],"title":"Memory + Usage","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"max":100,"min":0,"thresholds":{"steps":[{"color":"rgba(50, + 172, 45, 0.97)"},{"color":"rgba(237, 129, 40, 0.89)","value":80},{"color":"rgba(245, + 54, 54, 0.9)","value":90}]},"unit":"percent"}},"gridPos":{"h":7,"w":6,"x":18,"y":9},"id":6,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n (\n avg(node_memory_internal_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}) -\n avg(node_memory_purgeable_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}) +\n avg(node_memory_wired_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}) +\n avg(node_memory_compressed_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"})\n ) /\n avg(node_memory_total_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"})\n)\n*\n100\n"}],"title":"Memory + Usage","type":"gauge"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":18},"id":7,"panels":[],"title":"Disk","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0},"overrides":[{"matcher":{"id":"byRegexp","options":"/ + read| written/"},"properties":[{"id":"unit","value":"Bps"}]},{"matcher":{"id":"byRegexp","options":"/ + io time/"},"properties":[{"id":"unit","value":"percentunit"}]}]},"gridPos":{"h":7,"w":12,"x":0,"y":19},"id":8,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_read_bytes_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} + read"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_written_bytes_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} + written"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_io_time_seconds_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} + io time"}],"title":"Disk I/O","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"thresholds":{"steps":[{"color":"green"},{"color":"yellow","value":0.8},{"color":"red","value":0.9}]},"unit":"decbytes"},"overrides":[{"matcher":{"id":"byName","options":"Mounted + on"},"properties":[{"id":"custom.width","value":260}]},{"matcher":{"id":"byName","options":"Size"},"properties":[{"id":"custom.width","value":93}]},{"matcher":{"id":"byName","options":"Used"},"properties":[{"id":"custom.width","value":72}]},{"matcher":{"id":"byName","options":"Available"},"properties":[{"id":"custom.width","value":88}]},{"matcher":{"id":"byName","options":"Used, + %"},"properties":[{"id":"unit","value":"percentunit"},{"id":"custom.cellOptions","value":{"type":"gauge"}},{"id":"max","value":1},{"id":"min","value":0}]}]},"gridPos":{"h":7,"w":12,"x":12,"y":19},"id":9,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"max + by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", + cluster=~\"$cluster\", fstype!=\"\", mountpoint!=\"\"})\n","format":"table","instant":true,"legendFormat":""},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"max + by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", + cluster=~\"$cluster\", fstype!=\"\", mountpoint!=\"\"})\n","format":"table","instant":true,"legendFormat":""}],"title":"Disk + Space Usage","transformations":[{"id":"groupBy","options":{"fields":{"Value #A":{"aggregations":["lastNotNull"],"operation":"aggregate"},"Value + #B":{"aggregations":["lastNotNull"],"operation":"aggregate"},"mountpoint":{"aggregations":[],"operation":"groupby"}}}},{"id":"merge"},{"id":"calculateField","options":{"alias":"Used","binary":{"left":"Value + #A (lastNotNull)","operator":"-","reducer":"sum","right":"Value #B (lastNotNull)"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"calculateField","options":{"alias":"Used, + %","binary":{"left":"Used","operator":"/","reducer":"sum","right":"Value #A (lastNotNull)"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"organize","options":{"excludeByName":{},"indexByName":{},"renameByName":{"Value + #A (lastNotNull)":"Size","Value #B (lastNotNull)":"Available","mountpoint":"Mounted + on"}}},{"id":"sortBy","options":{"fields":{},"sort":[{"field":"Mounted on"}]}}],"type":"table"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":26},"id":10,"panels":[],"title":"Network","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"description":"Network + received (bits/s)","fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0,"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":27},"id":11,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_network_receive_bytes_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", device!=\"lo\"}[$__rate_interval]) + * 8","intervalFactor":1,"legendFormat":"{{device}}"}],"title":"Network Received","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"description":"Network + transmitted (bits/s)","fieldConfig":{"defaults":{"custom":{"fillOpacity":0},"min":0,"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":27},"id":12,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_network_transmit_bytes_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", device!=\"lo\"}[$__rate_interval]) + * 8","intervalFactor":1,"legendFormat":"{{device}}"}],"title":"Network Transmitted","type":"timeseries"}],"refresh":"30s","schemaVersion":39,"tags":["node-exporter-mixin"],"templating":{"list":[{"name":"datasource","query":"prometheus","type":"datasource"},{"allValue":".*","datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"includeAll":true,"label":"Cluster","name":"cluster","query":"label_values(node_uname_info{job=\"node-exporter\", + sysname=\"Darwin\"}, cluster)","refresh":2,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"label":"Instance","name":"instance","query":"label_values(node_uname_info{job=\"node-exporter\", + cluster=~\"$cluster\", sysname=\"Darwin\"}, instance)","refresh":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Node Exporter / MacOS","uid":"629701ea43bf69291922ea45f4a87d37"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-nodes-darwin + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-nodes.yaml b/monitoring/configmap-kube-prometheus-stack-nodes.yaml new file mode 100644 index 0000000..3f3d173 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-nodes.yaml @@ -0,0 +1,74 @@ +apiVersion: v1 +data: + nodes.json: '{"graphTooltip":1,"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"CPU","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"normal"}},"max":1,"min":0,"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":1},"id":2,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n (1 + - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", + instance=\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])))\n/ ignoring(cpu) + group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", + mode=\"idle\", instance=\"$instance\", cluster=~\"$cluster\"})\n)\n","intervalFactor":5,"legendFormat":"{{cpu}}"}],"title":"CPU + Usage","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":1},"id":3,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load1{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"1m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load5{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"5m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load15{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"15m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"count(node_cpu_seconds_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", mode=\"idle\"})","legendFormat":"logical + cores"}],"title":"Load Average","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":8},"id":4,"title":"Memory","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"normal"}},"min":0,"unit":"bytes"}},"gridPos":{"h":7,"w":18,"x":0,"y":9},"id":5,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n node_memory_MemTotal_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}\n-\n node_memory_MemFree_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}\n-\n node_memory_Buffers_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}\n-\n node_memory_Cached_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}\n)\n","legendFormat":"memory used"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_memory_Buffers_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"memory buffers"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_memory_Cached_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"memory cached"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_memory_MemFree_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"memory free"}],"title":"Memory + Usage","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"max":100,"min":0,"thresholds":{"steps":[{"color":"rgba(50, + 172, 45, 0.97)"},{"color":"rgba(237, 129, 40, 0.89)","value":80},{"color":"rgba(245, + 54, 54, 0.9)","value":90}]},"unit":"percent"}},"gridPos":{"h":7,"w":6,"x":18,"y":9},"id":6,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"100 + -\n(\n avg(node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\", + cluster=~\"$cluster\"}) /\n avg(node_memory_MemTotal_bytes{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\"})\n* 100\n)\n"}],"title":"Memory + Usage","type":"gauge"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":18},"id":7,"panels":[],"title":"Disk","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0},"overrides":[{"matcher":{"id":"byRegexp","options":"/ + read| written/"},"properties":[{"id":"unit","value":"Bps"}]},{"matcher":{"id":"byRegexp","options":"/ + io time/"},"properties":[{"id":"unit","value":"percentunit"}]}]},"gridPos":{"h":7,"w":12,"x":0,"y":19},"id":8,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_read_bytes_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} + read"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_written_bytes_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} + written"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_io_time_seconds_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} + io time"}],"title":"Disk I/O","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"thresholds":{"steps":[{"color":"green"},{"color":"yellow","value":0.8},{"color":"red","value":0.9}]},"unit":"decbytes"},"overrides":[{"matcher":{"id":"byName","options":"Mounted + on"},"properties":[{"id":"custom.width","value":260}]},{"matcher":{"id":"byName","options":"Size"},"properties":[{"id":"custom.width","value":93}]},{"matcher":{"id":"byName","options":"Used"},"properties":[{"id":"custom.width","value":72}]},{"matcher":{"id":"byName","options":"Available"},"properties":[{"id":"custom.width","value":88}]},{"matcher":{"id":"byName","options":"Used, + %"},"properties":[{"id":"unit","value":"percentunit"},{"id":"custom.cellOptions","value":{"type":"gauge"}},{"id":"max","value":1},{"id":"min","value":0}]}]},"gridPos":{"h":7,"w":12,"x":12,"y":19},"id":9,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"max + by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", + cluster=~\"$cluster\", fstype!=\"\", mountpoint!=\"\"})\n","format":"table","instant":true,"legendFormat":""},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"max + by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", + cluster=~\"$cluster\", fstype!=\"\", mountpoint!=\"\"})\n","format":"table","instant":true,"legendFormat":""}],"title":"Disk + Space Usage","transformations":[{"id":"groupBy","options":{"fields":{"Value #A":{"aggregations":["lastNotNull"],"operation":"aggregate"},"Value + #B":{"aggregations":["lastNotNull"],"operation":"aggregate"},"mountpoint":{"aggregations":[],"operation":"groupby"}}}},{"id":"merge"},{"id":"calculateField","options":{"alias":"Used","binary":{"left":"Value + #A (lastNotNull)","operator":"-","reducer":"sum","right":"Value #B (lastNotNull)"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"calculateField","options":{"alias":"Used, + %","binary":{"left":"Used","operator":"/","reducer":"sum","right":"Value #A (lastNotNull)"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"organize","options":{"excludeByName":{},"indexByName":{},"renameByName":{"Value + #A (lastNotNull)":"Size","Value #B (lastNotNull)":"Available","mountpoint":"Mounted + on"}}},{"id":"sortBy","options":{"fields":{},"sort":[{"field":"Mounted on"}]}}],"type":"table"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":26},"id":10,"panels":[],"title":"Network","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"description":"Network + received (bits/s)","fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0,"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":27},"id":11,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_network_receive_bytes_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", device!=\"lo\"}[$__rate_interval]) + * 8","intervalFactor":1,"legendFormat":"{{device}}"}],"title":"Network Received","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"description":"Network + transmitted (bits/s)","fieldConfig":{"defaults":{"custom":{"fillOpacity":0},"min":0,"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":27},"id":12,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_network_transmit_bytes_total{job=\"node-exporter\", + instance=\"$instance\", cluster=~\"$cluster\", device!=\"lo\"}[$__rate_interval]) + * 8","intervalFactor":1,"legendFormat":"{{device}}"}],"title":"Network Transmitted","type":"timeseries"}],"refresh":"30s","schemaVersion":39,"tags":["node-exporter-mixin"],"templating":{"list":[{"name":"datasource","query":"prometheus","type":"datasource"},{"allValue":".*","datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"includeAll":true,"label":"Cluster","name":"cluster","query":"label_values(node_uname_info{job=\"node-exporter\", + sysname!=\"Darwin\"}, cluster)","refresh":2,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"label":"Instance","name":"instance","query":"label_values(node_uname_info{job=\"node-exporter\", + cluster=~\"$cluster\", sysname!=\"Darwin\"}, instance)","refresh":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Node Exporter / Nodes","uid":"7d57716318ee0dddbac5a7f451fb7753"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-nodes + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-persistentvolumesusage.yaml b/monitoring/configmap-kube-prometheus-stack-persistentvolumesusage.yaml new file mode 100644 index 0000000..3039f82 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-persistentvolumesusage.yaml @@ -0,0 +1,57 @@ +apiVersion: v1 +data: + persistentvolumesusage.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":7,"w":18,"y":0},"id":1,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(\n sum + without(instance, node) (topk(1, (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum + without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n","legendFormat":"Used + Space"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n","legendFormat":"Free + Space"}],"title":"Volume Space Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"max":100,"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":0},{"color":"orange","value":80},{"color":"red","value":90}]},"unit":"percent"}},"gridPos":{"h":7,"w":6,"x":18,"y":0},"id":2,"interval":"1m","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"max + without(instance,node) (\n(\n topk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n topk(1, + kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", + namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n/\ntopk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* + 100)\n","instant":true}],"title":"Volume Space Usage","type":"gauge"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"none"}},"gridPos":{"h":7,"w":18,"y":7},"id":3,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum + without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))","legendFormat":"Used + inodes"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(\n sum + without(instance, node) (topk(1, (kubelet_volume_stats_inodes{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum + without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n","legendFormat":"Free + inodes"}],"title":"Volume inodes Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"max":100,"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":0},{"color":"orange","value":80},{"color":"red","value":90}]},"unit":"percent"}},"gridPos":{"h":7,"w":6,"x":18,"y":7},"id":4,"interval":"1m","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"max + without(instance,node) (\ntopk(1, kubelet_volume_stats_inodes_used{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n/\ntopk(1, + kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", + namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n","instant":true}],"title":"Volume + inodes Usage","type":"gauge"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(kubelet_volume_stats_capacity_bytes{job=\"kubelet\", + metrics_path=\"/metrics\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"Namespace","name":"namespace","query":"label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\"}, namespace)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"PersistentVolumeClaim","name":"volume","query":"label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", + job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\"}, persistentvolumeclaim)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Persistent Volumes","uid":"919b92a8e8041bd567af9edab12c840c"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-persistentvolumesusage + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-pod-total.yaml b/monitoring/configmap-kube-prometheus-stack-pod-total.yaml new file mode 100644 index 0000000..0ab2781 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-pod-total.yaml @@ -0,0 +1,54 @@ +apiVersion: v1 +data: + pod-total.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"displayName":"$pod","max":10000000000,"min":0,"thresholds":{"steps":[{"color":"dark-green","index":0,"value":null},{"color":"dark-yellow","index":1,"value":5000000000},{"color":"dark-red","index":2,"value":7000000000}]},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":0},"id":1,"interval":"1m","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 + * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", + pod=~\"$pod\"}[$__rate_interval])))","legendFormat":"__auto"}],"title":"Current + Rate of Bits Received","type":"gauge"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"displayName":"$pod","max":10000000000,"min":0,"thresholds":{"steps":[{"color":"dark-green","index":0,"value":null},{"color":"dark-yellow","index":1,"value":5000000000},{"color":"dark-red","index":2,"value":7000000000}]},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":0},"id":2,"interval":"1m","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 + * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", + pod=~\"$pod\"}[$__rate_interval])))","legendFormat":"__auto"}],"title":"Current + Rate of Bits Transmitted","type":"gauge"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":9},"id":3,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 + * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", + pod=~\"$pod\"}[$__rate_interval]))) by (pod)","legendFormat":"__auto"}],"title":"Receive + Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":9},"id":4,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 + * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", + pod=~\"$pod\"}[$__rate_interval]))) by (pod)","legendFormat":"__auto"}],"title":"Transmit + Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":18},"id":5,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", + pod=~\"$pod\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate + of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":18},"id":6,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", + pod=~\"$pod\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate + of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":27},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", + pod=~\"$pod\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate + of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":27},"id":8,"interval":"1m","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", + pod=~\"$pod\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate + of Transmitted Packets Dropped","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"allValue":".+","current":{"selected":false,"text":"kube-system","value":"kube-system"},"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"namespace","name":"namespace","query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, + namespace)","refresh":2,"sort":1,"type":"query"},{"current":{"selected":false,"text":"kube-system","value":"kube-system"},"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"pod","name":"pod","query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, + pod)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Networking / Pod","uid":"7a18067ce943a40ae25454675c19ff5c"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-pod-total + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-prometheus.yaml b/monitoring/configmap-kube-prometheus-stack-prometheus.yaml new file mode 100644 index 0000000..910d837 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-prometheus.yaml @@ -0,0 +1,59 @@ +apiVersion: v1 +data: + prometheus.json: '{"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"Prometheus + Stats","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"decimals":2,"displayName":"","unit":"short"},"overrides":[{"matcher":{"id":"byName","options":"Time"},"properties":[{"id":"displayName","value":"Time"},{"id":"custom.align","value":null},{"id":"custom.hidden","value":"true"}]},{"matcher":{"id":"byName","options":"cluster"},"properties":[{"id":"custom.align","value":null},{"id":"unit","value":"short"},{"id":"decimals","value":2},{"id":"displayName","value":"Cluster"}]},{"matcher":{"id":"byName","options":"job"},"properties":[{"id":"custom.align","value":null},{"id":"unit","value":"short"},{"id":"decimals","value":2},{"id":"displayName","value":"Job"}]},{"matcher":{"id":"byName","options":"instance"},"properties":[{"id":"displayName","value":"Instance"},{"id":"custom.align","value":null},{"id":"unit","value":"short"},{"id":"decimals","value":2}]},{"matcher":{"id":"byName","options":"version"},"properties":[{"id":"displayName","value":"Version"},{"id":"custom.align","value":null},{"id":"unit","value":"short"},{"id":"decimals","value":2}]},{"matcher":{"id":"byName","options":"Value + #A"},"properties":[{"id":"displayName","value":"Count"},{"id":"custom.align","value":null},{"id":"unit","value":"short"},{"id":"decimals","value":2},{"id":"custom.hidden","value":"true"}]},{"matcher":{"id":"byName","options":"Value + #B"},"properties":[{"id":"displayName","value":"Uptime"},{"id":"custom.align","value":null},{"id":"unit","value":"s"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":1},"id":2,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"count + by (cluster, job, instance, version) (prometheus_build_info{cluster=~\"$cluster\", + job=~\"$job\", instance=~\"$instance\"})","format":"table","instant":true,"legendFormat":""},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"max + by (cluster, job, instance) (time() - process_start_time_seconds{cluster=~\"$cluster\", + job=~\"$job\", instance=~\"$instance\"})","format":"table","instant":true,"legendFormat":""}],"title":"Prometheus + Stats","type":"table"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":8},"id":3,"panels":[],"title":"Discovery","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never"},"min":0,"unit":"ms"}},"gridPos":{"h":7,"w":12,"x":0,"y":9},"id":4,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(prometheus_target_sync_length_seconds_sum{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[5m])) + by (cluster, job, scrape_job, instance) * 1e3","format":"time_series","legendFormat":"{{cluster}}:{{job}}:{{instance}}:{{scrape_job}}"}],"title":"Target + Sync","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"lineWidth":0,"showPoints":"never","stacking":{"mode":"normal"}},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":9},"id":5,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum + by (cluster, job, instance) (prometheus_sd_discovered_targets{cluster=~\"$cluster\", + job=~\"$job\",instance=~\"$instance\"})","format":"time_series","legendFormat":"{{cluster}}:{{job}}:{{instance}}"}],"title":"Targets","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":16},"id":6,"panels":[],"title":"Retrieval","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never"},"min":0,"unit":"ms"}},"gridPos":{"h":7,"w":8,"x":0,"y":17},"id":7,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(prometheus_target_interval_length_seconds_sum{cluster=~\"$cluster\", + job=~\"$job\",instance=~\"$instance\"}[5m]) / rate(prometheus_target_interval_length_seconds_count{cluster=~\"$cluster\", + job=~\"$job\",instance=~\"$instance\"}[5m]) * 1e3","format":"time_series","legendFormat":"{{cluster}}:{{job}}:{{instance}} + {{interval}} configured"}],"title":"Average Scrape Interval Duration","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"lineWidth":0,"showPoints":"never","stacking":{"mode":"normal"}},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":8,"x":8,"y":17},"id":8,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum + by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))","format":"time_series","legendFormat":"exceeded + body size limit: {{cluster}} {{job}} {{instance}}"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum + by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_sample_limit_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))","format":"time_series","legendFormat":"exceeded + sample limit: {{cluster}} {{job}} {{instance}}"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum + by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))","format":"time_series","legendFormat":"duplicate + timestamp: {{cluster}} {{job}} {{instance}}"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum + by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_bounds_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))","format":"time_series","legendFormat":"out + of bounds: {{cluster}} {{job}} {{instance}}"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum + by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_order_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))","format":"time_series","legendFormat":"out + of order: {{cluster}} {{job}} {{instance}}"}],"title":"Scrape failures","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"lineWidth":0,"showPoints":"never","stacking":{"mode":"normal"}},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":8,"x":16,"y":17},"id":9,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(prometheus_tsdb_head_samples_appended_total{cluster=~\"$cluster\", + job=~\"$job\",instance=~\"$instance\"}[5m])","format":"time_series","legendFormat":"{{cluster}} + {{job}} {{instance}}"}],"title":"Appended Samples","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":24},"id":10,"panels":[],"title":"Storage","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"lineWidth":0,"showPoints":"never","stacking":{"mode":"normal"}},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":0,"y":25},"id":11,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"prometheus_tsdb_head_series{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}","format":"time_series","legendFormat":"{{cluster}} + {{job}} {{instance}} head series"}],"title":"Head Series","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"lineWidth":0,"showPoints":"never","stacking":{"mode":"normal"}},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":25},"id":12,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"prometheus_tsdb_head_chunks{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}","format":"time_series","legendFormat":"{{cluster}} + {{job}} {{instance}} head chunks"}],"title":"Head Chunks","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":32},"id":13,"panels":[],"title":"Query","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"lineWidth":0,"showPoints":"never","stacking":{"mode":"normal"}},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":0,"y":33},"id":14,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(prometheus_engine_query_duration_seconds_count{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\",slice=\"inner_eval\"}[5m])","format":"time_series","legendFormat":"{{cluster}} + {{job}} {{instance}}"}],"title":"Query Rate","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"lineWidth":0,"showPoints":"never","stacking":{"mode":"normal"}},"min":0,"unit":"ms"}},"gridPos":{"h":7,"w":12,"x":12,"y":33},"id":15,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"max + by (slice) (prometheus_engine_query_duration_seconds{quantile=\"0.9\",cluster=~\"$cluster\", + job=~\"$job\",instance=~\"$instance\"}) * 1e3","format":"time_series","legendFormat":"{{slice}}"}],"title":"Stage + Duration","type":"timeseries"}],"schemaVersion":39,"tags":["prometheus-mixin"],"templating":{"list":[{"current":{"selected":false,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","type":"datasource"},{"allValue":".*","current":{"selected":false,"text":["$__all"],"value":["$__all"]},"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"includeAll":true,"label":"cluster","multi":true,"name":"cluster","query":"label_values(prometheus_build_info{}, + cluster)","refresh":2,"sort":2,"type":"query"},{"allValue":".+","datasource":{"type":"prometheus","uid":"${datasource}"},"includeAll":true,"label":"job","multi":true,"name":"job","query":"label_values(prometheus_build_info{cluster=~\"$cluster\"}, + job)","refresh":2,"sort":2,"type":"query"},{"allValue":".+","datasource":{"type":"prometheus","uid":"${datasource}"},"includeAll":true,"label":"instance","multi":true,"name":"instance","query":"label_values(prometheus_build_info{cluster=~\"$cluster\", + job=~\"$job\"}, instance)","refresh":2,"sort":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["60s"]},"timezone": + "utc","title":"Prometheus / Overview","uid":"9fa0d141-d019-4ad7-8bc5-42196ee308bd"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-prometheus + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-proxy.yaml b/monitoring/configmap-kube-prometheus-stack-proxy.yaml new file mode 100644 index 0000000..80f119b --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-proxy.yaml @@ -0,0 +1,62 @@ +apiVersion: v1 +data: + proxy.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":0,"y":0},"id":1,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(up{cluster=\"$cluster\", + job=\"kube-proxy\"})","instant":true}],"title":"Up","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":10,"x":4,"y":0},"id":2,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", + job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))","legendFormat":"rate"}],"title":"Rules + Sync Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":10,"x":14,"y":0},"id":3,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", + job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))","legendFormat":"{{instance}}"}],"title":"Rules + Sync Latency 99th Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":7},"id":4,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", + job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))","legendFormat":"rate"}],"title":"Network + Programming Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":12,"x":12,"y":7},"id":5,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", + job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval])) by (instance, + le))","legendFormat":"{{instance}}"}],"title":"Network Programming Latency 99th + Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":8,"x":0,"y":14},"id":6,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kube-proxy\", + instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))","legendFormat":"2xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kube-proxy\", + instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))","legendFormat":"3xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kube-proxy\", + instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))","legendFormat":"4xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kube-proxy\", + instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))","legendFormat":"5xx"}],"title":"Kube + API Request Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":16,"x":8,"y":14},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) + by (verb, le))","legendFormat":"{{verb}}"}],"title":"Post Request Latency 99th + Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":8,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", + instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, le))","legendFormat":"{{verb}}"}],"title":"Get + Request Latency 99th Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":0,"y":28},"id":9,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"process_resident_memory_bytes{cluster=\"$cluster\", + job=\"kube-proxy\",instance=~\"$instance\"}","legendFormat":"{{instance}}"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":8,"y":28},"id":10,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"rate(process_cpu_seconds_total{cluster=\"$cluster\", + job=\"kube-proxy\",instance=~\"$instance\"}[$__rate_interval])","legendFormat":"{{instance}}"}],"title":"CPU + usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":16,"y":28},"id":11,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"go_goroutines{cluster=\"$cluster\", + job=\"kube-proxy\",instance=~\"$instance\"}","legendFormat":"{{instance}}"}],"title":"Goroutines","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kube-proxy\"}, + cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"allValue":".+","datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"instance","name":"instance","query":"label_values(up{job=\"kube-proxy\", + cluster=\"$cluster\", job=\"kube-proxy\"}, instance)","refresh":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Proxy","uid":"632e265de029684c40b21cb76bca4f94"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-proxy + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-scheduler.yaml b/monitoring/configmap-kube-prometheus-stack-scheduler.yaml new file mode 100644 index 0000000..41992b4 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-scheduler.yaml @@ -0,0 +1,72 @@ +apiVersion: v1 +data: + scheduler.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":0,"y":0},"id":1,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(up{cluster=\"$cluster\", + job=\"kube-scheduler\"})","instant":true}],"title":"Up","type":"stat"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":10,"x":4,"y":0},"id":2,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(scheduler_scheduling_attempt_duration_seconds_count{cluster=\"$cluster\", + job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, + instance)","legendFormat":"{{cluster}} {{instance}} e2e"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(scheduler_pod_scheduling_sli_duration_seconds_count{cluster=\"$cluster\", + job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, + instance)","legendFormat":"{{cluster}} {{instance}} binding"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", + job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, + instance)","legendFormat":"{{cluster}} {{instance}} scheduling algorithm"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", + job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, + instance)","legendFormat":"{{cluster}} {{instance}} volume"}],"title":"Scheduling + Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":10,"x":14,"y":0},"id":3,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket{cluster=\"$cluster\", + job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, + instance, le))","legendFormat":"{{cluster}} {{instance}} e2e"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(scheduler_pod_scheduling_sli_duration_seconds_bucket{cluster=\"$cluster\", + job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, + instance, le))","legendFormat":"{{cluster}} {{instance}} binding"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", + job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, + instance, le))","legendFormat":"{{cluster}} {{instance}} scheduling algorithm"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", + job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, + instance, le))","legendFormat":"{{cluster}} {{instance}} volume"}],"title":"Scheduling + latency 99th Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":8,"x":0,"y":7},"id":4,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", + job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))","legendFormat":"2xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", + job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))","legendFormat":"3xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", + job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))","legendFormat":"4xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", + job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))","legendFormat":"5xx"}],"title":"Kube + API Request Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":16,"x":8,"y":7},"id":5,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", + instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, le))","legendFormat":"{{verb}}"}],"title":"Post + Request Latency 99th Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":6,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, + sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", + instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, le))","legendFormat":"{{verb}}"}],"title":"Get + Request Latency 99th Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":0,"y":21},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"process_resident_memory_bytes{cluster=\"$cluster\", + job=\"kube-scheduler\", instance=~\"$instance\"}","legendFormat":"{{instance}}"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":8,"y":21},"id":8,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"rate(process_cpu_seconds_total{cluster=\"$cluster\", + job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])","legendFormat":"{{instance}}"}],"title":"CPU + usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":16,"y":21},"id":9,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"go_goroutines{cluster=\"$cluster\", + job=\"kube-scheduler\",instance=~\"$instance\"}","legendFormat":"{{instance}}"}],"title":"Goroutines","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kube-scheduler\"}, + cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"allValue":".+","datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"instance","name":"instance","query":"label_values(up{job=\"kube-scheduler\", + cluster=\"$cluster\"}, instance)","refresh":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Scheduler","uid":"2e6b6a3b4bddf1427b3a55aa1311c656"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-scheduler + namespace: monitoring + diff --git a/monitoring/configmap-kube-prometheus-stack-workload-total.yaml b/monitoring/configmap-kube-prometheus-stack-workload-total.yaml new file mode 100644 index 0000000..dfe1c07 --- /dev/null +++ b/monitoring/configmap-kube-prometheus-stack-workload-total.yaml @@ -0,0 +1,83 @@ +apiVersion: v1 +data: + workload-total.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"color":{"fixedColor":"green","mode":"fixed"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":0},"id":1,"interval":"1m","options":{"displayMode":"basic","showUnfilled":false},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum((8 + * rate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", + workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Current + Rate of Bits Received","type":"bargauge"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"color":{"fixedColor":"green","mode":"fixed"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":0},"id":2,"interval":"1m","options":{"displayMode":"basic","showUnfilled":false},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum((8 + * rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", + workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Current + Rate of Bits Transmitted","type":"bargauge"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"color":{"fixedColor":"green","mode":"fixed"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":9},"id":3,"interval":"1m","options":{"displayMode":"basic","showUnfilled":false},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(avg((8 + * rate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", + workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Average + Rate of Bits Received","type":"bargauge"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"color":{"fixedColor":"green","mode":"fixed"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":9},"id":4,"interval":"1m","options":{"displayMode":"basic","showUnfilled":false},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(avg((8 + * rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", + workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Average + Rate of Bits Transmitted","type":"bargauge"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":18},"id":5,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum((8 + * rate(container_network_receive_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", + workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Receive + Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":18},"id":6,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum((8 + * rate(container_network_transmit_bytes_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", + cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n* on (cluster, + namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", + workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Transmit + Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed + --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":27},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum(rate(container_network_receive_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", + workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Rate + of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":27},"id":8,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum(rate(container_network_transmit_packets_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", + workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Rate + of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":36},"id":9,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum(rate(container_network_receive_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", + workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Rate + of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- + Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":36},"id":10,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum(rate(container_network_transmit_packets_dropped_total{job=\"kubelet\", + metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* + on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", + workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Rate + of Transmitted Packets Dropped","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data + source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"label":"cluster","name":"cluster","query":"label_values(kube_pod_info{job=\"kube-state-metrics\"}, + cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"allValue":".+","current":{"selected":false,"text":"kube-system","value":"kube-system"},"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"namespace","name":"namespace","query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, + namespace)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"workload","name":"workload","query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=~\"$namespace\", workload=~\".+\"}, workload)","refresh":2,"sort":1,"type":"query"},{"allValue":".+","datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"workload_type","name":"type","query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", + namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": + "utc","title":"Kubernetes / Networking / Workload","uid":"728bf77cc1166d2f3133bf25846876cc"}' +kind: ConfigMap +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-grafana + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + grafana_dashboard: '1' + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-workload-total + namespace: monitoring + diff --git a/monitoring/configmap-prometheus-kube-prometheus-stack-prometheus-rulefiles-0.yaml b/monitoring/configmap-prometheus-kube-prometheus-stack-prometheus-rulefiles-0.yaml new file mode 100644 index 0000000..fbee06e --- /dev/null +++ b/monitoring/configmap-prometheus-kube-prometheus-stack-prometheus-rulefiles-0.yaml @@ -0,0 +1,1705 @@ +apiVersion: v1 +data: + monitoring-kube-prometheus-stack-config-reloaders-eae692b3-e0b3-459e-8981-8dc6d7da6055.yaml: "groups:\n\ + - name: config-reloaders\n rules:\n - alert: ConfigReloaderSidecarErrors\n \ + \ annotations:\n description: |-\n Errors encountered while the\ + \ {{$labels.pod}} config-reloader sidecar attempts to sync config in {{$labels.namespace}}\ + \ namespace.\n As a result, configuration for service running in {{$labels.pod}}\ + \ may be stale and cannot be updated anymore.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/configreloadersidecarerrors\n\ + \ summary: config-reloader sidecar has not had a successful reload for 10m\n\ + \ expr: max_over_time(reloader_last_reload_successful{namespace=~\".+\"}[5m])\ + \ == 0\n for: 10m\n labels:\n severity: warning\n" + monitoring-kube-prometheus-stack-etcd-de0d66c3-becc-4bd9-8ab6-dff75f452f02.yaml: "groups:\n\ + - name: etcd\n rules:\n - alert: etcdMembersDown\n annotations:\n description:\ + \ 'etcd cluster \"{{ $labels.job }}\": members are down ({{ $value\n }}).'\n\ + \ summary: etcd cluster members are down.\n expr: |-\n max without\ + \ (endpoint) (\n sum without (instance, pod) (up{job=~\".*etcd.*\"} ==\ + \ bool 0)\n or\n count without (To) (\n sum without (instance,\ + \ pod) (rate(etcd_network_peer_sent_failures_total{job=~\".*etcd.*\"}[120s]))\ + \ > 0.01\n )\n )\n > 0\n for: 20m\n labels:\n severity:\ + \ warning\n - alert: etcdInsufficientMembers\n annotations:\n description:\ + \ 'etcd cluster \"{{ $labels.job }}\": insufficient members ({{ $value\n \ + \ }}).'\n summary: etcd cluster has insufficient number of members.\n \ + \ expr: sum(up{job=~\".*etcd.*\"} == bool 1) without (instance, pod) < ((count(up{job=~\"\ + .*etcd.*\"})\n without (instance, pod) + 1) / 2)\n for: 3m\n labels:\n\ + \ severity: critical\n - alert: etcdNoLeader\n annotations:\n description:\ + \ 'etcd cluster \"{{ $labels.job }}\": member {{ $labels.instance }}\n \ + \ has no leader.'\n summary: etcd cluster has no leader.\n expr: etcd_server_has_leader{job=~\"\ + .*etcd.*\"} == 0\n for: 1m\n labels:\n severity: critical\n - alert:\ + \ etcdHighNumberOfLeaderChanges\n annotations:\n description: 'etcd cluster\ + \ \"{{ $labels.job }}\": {{ $value }} leader changes\n within the last\ + \ 15 minutes. Frequent elections may be a sign of insufficient\n resources,\ + \ high network latency, or disruptions by other components and should\n \ + \ be investigated.'\n summary: etcd cluster has high number of leader changes.\n\ + \ expr: increase((max without (instance, pod) (etcd_server_leader_changes_seen_total{job=~\"\ + .*etcd.*\"})\n or 0*absent(etcd_server_leader_changes_seen_total{job=~\"\ + .*etcd.*\"}))[15m:1m])\n >= 4\n for: 5m\n labels:\n severity:\ + \ warning\n - alert: etcdHighNumberOfFailedGRPCRequests\n annotations:\n \ + \ description: 'etcd cluster \"{{ $labels.job }}\": {{ $value }}% of requests\ + \ for\n {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance\ + \ }}.'\n summary: etcd cluster has high number of failed grpc requests.\n\ + \ expr: |-\n 100 * sum(rate(grpc_server_handled_total{job=~\".*etcd.*\"\ + , grpc_code=~\"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded\"\ + }[5m])) without (grpc_type, grpc_code)\n /\n sum(rate(grpc_server_handled_total{job=~\"\ + .*etcd.*\"}[5m])) without (grpc_type, grpc_code)\n > 1\n for: 10m\n\ + \ labels:\n severity: warning\n - alert: etcdHighNumberOfFailedGRPCRequests\n\ + \ annotations:\n description: 'etcd cluster \"{{ $labels.job }}\": {{\ + \ $value }}% of requests for\n {{ $labels.grpc_method }} failed on etcd\ + \ instance {{ $labels.instance }}.'\n summary: etcd cluster has high number\ + \ of failed grpc requests.\n expr: |-\n 100 * sum(rate(grpc_server_handled_total{job=~\"\ + .*etcd.*\", grpc_code=~\"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded\"\ + }[5m])) without (grpc_type, grpc_code)\n /\n sum(rate(grpc_server_handled_total{job=~\"\ + .*etcd.*\"}[5m])) without (grpc_type, grpc_code)\n > 5\n for: 5m\n \ + \ labels:\n severity: critical\n - alert: etcdGRPCRequestsSlow\n annotations:\n\ + \ description: 'etcd cluster \"{{ $labels.job }}\": 99th percentile of gRPC\ + \ requests\n is {{ $value }}s on etcd instance {{ $labels.instance }} for\ + \ {{ $labels.grpc_method\n }} method.'\n summary: etcd grpc requests\ + \ are slow\n expr: |-\n histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~\"\ + .*etcd.*\", grpc_method!=\"Defragment\", grpc_type=\"unary\"}[5m])) without(grpc_type))\n\ + \ > 0.15\n for: 10m\n labels:\n severity: critical\n - alert:\ + \ etcdMemberCommunicationSlow\n annotations:\n description: 'etcd cluster\ + \ \"{{ $labels.job }}\": member communication with {{\n $labels.To }} is\ + \ taking {{ $value }}s on etcd instance {{ $labels.instance\n }}.'\n \ + \ summary: etcd cluster member communication is slow.\n expr: |-\n \ + \ histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~\"\ + .*etcd.*\"}[5m]))\n > 0.15\n for: 10m\n labels:\n severity: warning\n\ + \ - alert: etcdHighNumberOfFailedProposals\n annotations:\n description:\ + \ 'etcd cluster \"{{ $labels.job }}\": {{ $value }} proposal failures\n \ + \ within the last 30 minutes on etcd instance {{ $labels.instance }}.'\n \ + \ summary: etcd cluster has high number of proposal failures.\n expr: rate(etcd_server_proposals_failed_total{job=~\"\ + .*etcd.*\"}[15m]) > 5\n for: 15m\n labels:\n severity: warning\n -\ + \ alert: etcdHighFsyncDurations\n annotations:\n description: 'etcd cluster\ + \ \"{{ $labels.job }}\": 99th percentile fsync durations\n are {{ $value\ + \ }}s on etcd instance {{ $labels.instance }}.'\n summary: etcd cluster 99th\ + \ percentile fsync durations are too high.\n expr: |-\n histogram_quantile(0.99,\ + \ rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~\".*etcd.*\"}[5m]))\n\ + \ > 0.5\n for: 10m\n labels:\n severity: warning\n - alert: etcdHighFsyncDurations\n\ + \ annotations:\n description: 'etcd cluster \"{{ $labels.job }}\": 99th\ + \ percentile fsync durations\n are {{ $value }}s on etcd instance {{ $labels.instance\ + \ }}.'\n summary: etcd cluster 99th percentile fsync durations are too high.\n\ + \ expr: |-\n histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~\"\ + .*etcd.*\"}[5m]))\n > 1\n for: 10m\n labels:\n severity: critical\n\ + \ - alert: etcdHighCommitDurations\n annotations:\n description: 'etcd\ + \ cluster \"{{ $labels.job }}\": 99th percentile commit durations\n {{\ + \ $value }}s on etcd instance {{ $labels.instance }}.'\n summary: etcd cluster\ + \ 99th percentile commit durations are too high.\n expr: |-\n histogram_quantile(0.99,\ + \ rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~\".*etcd.*\"}[5m]))\n\ + \ > 0.25\n for: 10m\n labels:\n severity: warning\n - alert:\ + \ etcdDatabaseQuotaLowSpace\n annotations:\n description: 'etcd cluster\ + \ \"{{ $labels.job }}\": database size exceeds the defined\n quota on etcd\ + \ instance {{ $labels.instance }}, please defrag or increase the\n quota\ + \ as the writes to etcd will be disabled when it is full.'\n summary: etcd\ + \ cluster database is running full.\n expr: (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~\"\ + .*etcd.*\"}[5m]) /\n last_over_time(etcd_server_quota_backend_bytes{job=~\"\ + .*etcd.*\"}[5m]))*100 >\n 95\n for: 10m\n labels:\n severity:\ + \ critical\n - alert: etcdExcessiveDatabaseGrowth\n annotations:\n description:\ + \ 'etcd cluster \"{{ $labels.job }}\": Predicting running out of disk\n \ + \ space in the next four hours, based on write observations within the past\n\ + \ four hours on etcd instance {{ $labels.instance }}, please check as it\ + \ might\n be disruptive.'\n summary: etcd cluster database growing\ + \ very fast.\n expr: predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~\"\ + .*etcd.*\"}[4h], 4*60*60)\n > etcd_server_quota_backend_bytes{job=~\".*etcd.*\"\ + }\n for: 10m\n labels:\n severity: warning\n - alert: etcdDatabaseHighFragmentationRatio\n\ + \ annotations:\n description: 'etcd cluster \"{{ $labels.job }}\": database\ + \ size in use on instance\n {{ $labels.instance }} is {{ $value | humanizePercentage\ + \ }} of the actual\n allocated disk space, please run defragmentation (e.g.\ + \ etcdctl defrag) to\n retrieve the unused fragmented disk space.'\n \ + \ runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation\n\ + \ summary: etcd database size in use is less than 50% of the actual allocated\n\ + \ storage.\n expr: (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~\"\ + .*etcd.*\"}[5m])\n / last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~\"\ + .*etcd.*\"}[5m])) < 0.5\n and etcd_mvcc_db_total_size_in_use_in_bytes{job=~\"\ + .*etcd.*\"} > 104857600\n for: 10m\n labels:\n severity: warning\n" + monitoring-kube-prometheus-stack-general.rules-f627c7c8-ea4b-4b56-98b8-e667d6567e7b.yaml: "groups:\n\ + - name: general.rules\n rules:\n - alert: TargetDown\n annotations:\n \ + \ description: '{{ printf \"%.4g\" $value }}% of the {{ $labels.job }}/{{ $labels.service\n\ + \ }} targets in {{ $labels.namespace }} namespace are down.'\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/general/targetdown\n \ + \ summary: One or more targets are unreachable.\n expr: 100 * (count(up ==\ + \ 0) BY (cluster, job, namespace, service) / count(up)\n BY (cluster, job,\ + \ namespace, service)) > 10\n for: 10m\n labels:\n severity: warning\n\ + \ - alert: Watchdog\n annotations:\n description: |\n This is\ + \ an alert meant to ensure that the entire alerting pipeline is functional.\n\ + \ This alert is always firing, therefore it should always be firing in\ + \ Alertmanager\n and always fire against a receiver. There are integrations\ + \ with various notification\n mechanisms that send a notification when\ + \ this alert is not firing. For example the\n \"DeadMansSnitch\" integration\ + \ in PagerDuty.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog\n\ + \ summary: An alert that should always be firing to certify that Alertmanager\n\ + \ is working properly.\n expr: vector(1)\n labels:\n severity:\ + \ none\n - alert: InfoInhibitor\n annotations:\n description: |\n \ + \ This is an alert that is used to inhibit info alerts.\n By themselves,\ + \ the info-level alerts are sometimes very noisy, but they are relevant when combined\ + \ with\n other alerts.\n This alert fires whenever there's a severity=\"\ + info\" alert, and stops firing when another alert with a\n severity of\ + \ 'warning' or 'critical' starts firing on the same namespace.\n This alert\ + \ should be routed to a null receiver and configured to inhibit alerts with severity=\"\ + info\".\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor\n\ + \ summary: Info-level alert inhibition.\n expr: group by (namespace) (ALERTS{severity\ + \ = \"info\"} == 1) unless on (namespace)\n group by (namespace) (ALERTS{alertname\ + \ != \"InfoInhibitor\", alertstate = \"firing\",\n severity =~ \"warning|critical\"\ + } == 1)\n labels:\n severity: none\n" + monitoring-kube-prometheus-stack-k8s.rules.container-cpu-usage-seconds-tot-715fb365-db24-4478-8fdf-40df40c31616.yaml: "groups:\n\ + - name: k8s.rules.container_cpu_usage_seconds_total\n rules:\n - expr: |-\n\ + \ sum by (cluster, namespace, pod, container) (\n rate(container_cpu_usage_seconds_total{job=\"\ + kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}[5m])\n ) * on\ + \ (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod)\ + \ (\n 1, max by (cluster, namespace, pod, node) (kube_pod_info{node!=\"\ + \"})\n )\n record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m\n\ + \ - expr: |-\n sum by (cluster, namespace, pod, container) (\n irate(container_cpu_usage_seconds_total{job=\"\ + kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}[5m])\n ) * on\ + \ (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod)\ + \ (\n 1, max by (cluster, namespace, pod, node) (kube_pod_info{node!=\"\ + \"})\n )\n record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate\n" + monitoring-kube-prometheus-stack-k8s.rules.container-memory-cache-c548651c-95bc-4bc3-a2d8-6fb97abc9ec3.yaml: "groups:\n\ + - name: k8s.rules.container_memory_cache\n rules:\n - expr: |-\n container_memory_cache{job=\"\ + kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}\n * on (cluster,\ + \ namespace, pod) group_left(node) topk by (cluster, namespace, pod) (1,\n \ + \ max by (cluster, namespace, pod, node) (kube_pod_info{node!=\"\"})\n \ + \ )\n record: node_namespace_pod_container:container_memory_cache\n" + monitoring-kube-prometheus-stack-k8s.rules.container-memory-rss-1baf12c0-1dce-4867-86be-91c1ee948313.yaml: "groups:\n\ + - name: k8s.rules.container_memory_rss\n rules:\n - expr: |-\n container_memory_rss{job=\"\ + kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}\n * on (cluster,\ + \ namespace, pod) group_left(node) topk by (cluster, namespace, pod) (1,\n \ + \ max by (cluster, namespace, pod, node) (kube_pod_info{node!=\"\"})\n \ + \ )\n record: node_namespace_pod_container:container_memory_rss\n" + monitoring-kube-prometheus-stack-k8s.rules.container-memory-swap-dd2afe1d-3a0d-44ed-b97a-0fc8e29e111e.yaml: "groups:\n\ + - name: k8s.rules.container_memory_swap\n rules:\n - expr: |-\n container_memory_swap{job=\"\ + kubelet\", metrics_path=\"/metrics/cadvisor\", image!=\"\"}\n * on (cluster,\ + \ namespace, pod) group_left(node) topk by (cluster, namespace, pod) (1,\n \ + \ max by (cluster, namespace, pod, node) (kube_pod_info{node!=\"\"})\n \ + \ )\n record: node_namespace_pod_container:container_memory_swap\n" + monitoring-kube-prometheus-stack-k8s.rules.container-memory-working-set-by-b44b3b4f-1d0e-466a-9b08-a078ed6f1588.yaml: "groups:\n\ + - name: k8s.rules.container_memory_working_set_bytes\n rules:\n - expr: |-\n\ + \ container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\"\ + , image!=\"\"}\n * on (cluster, namespace, pod) group_left(node) topk by\ + \ (cluster, namespace, pod) (1,\n max by (cluster, namespace, pod, node)\ + \ (kube_pod_info{node!=\"\"})\n )\n record: node_namespace_pod_container:container_memory_working_set_bytes\n" + monitoring-kube-prometheus-stack-k8s.rules.container-resource-f779d497-1ce1-46cf-8234-c68eca5f1472.yaml: "groups:\n\ + - name: k8s.rules.container_resource\n rules:\n - expr: |-\n kube_pod_container_resource_requests{resource=\"\ + memory\",job=\"kube-state-metrics\"} * on (namespace, pod, cluster)\n group_left()\ + \ max by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"\ + Pending|Running\"} == 1)\n )\n record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests\n\ + \ - expr: |-\n sum by (namespace, cluster) (\n sum by (namespace,\ + \ pod, cluster) (\n max by (namespace, pod, container, cluster) (\n\ + \ kube_pod_container_resource_requests{resource=\"memory\",job=\"\ + kube-state-metrics\"}\n ) * on (namespace, pod, cluster) group_left()\ + \ max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"\ + Pending|Running\"} == 1\n )\n )\n )\n record: namespace_memory:kube_pod_container_resource_requests:sum\n\ + \ - expr: |-\n kube_pod_container_resource_requests{resource=\"cpu\",job=\"\ + kube-state-metrics\"} * on (namespace, pod, cluster)\n group_left() max\ + \ by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"\ + } == 1)\n )\n record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests\n\ + \ - expr: |-\n sum by (namespace, cluster) (\n sum by (namespace,\ + \ pod, cluster) (\n max by (namespace, pod, container, cluster) (\n\ + \ kube_pod_container_resource_requests{resource=\"cpu\",job=\"\ + kube-state-metrics\"}\n ) * on (namespace, pod, cluster) group_left()\ + \ max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"\ + Pending|Running\"} == 1\n )\n )\n )\n record: namespace_cpu:kube_pod_container_resource_requests:sum\n\ + \ - expr: |-\n kube_pod_container_resource_limits{resource=\"memory\",job=\"\ + kube-state-metrics\"} * on (namespace, pod, cluster)\n group_left() max\ + \ by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"\ + } == 1)\n )\n record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits\n\ + \ - expr: |-\n sum by (namespace, cluster) (\n sum by (namespace,\ + \ pod, cluster) (\n max by (namespace, pod, container, cluster) (\n\ + \ kube_pod_container_resource_limits{resource=\"memory\",job=\"\ + kube-state-metrics\"}\n ) * on (namespace, pod, cluster) group_left()\ + \ max by (namespace, pod, cluster) (\n kube_pod_status_phase{phase=~\"\ + Pending|Running\"} == 1\n )\n )\n )\n record: namespace_memory:kube_pod_container_resource_limits:sum\n\ + \ - expr: |-\n kube_pod_container_resource_limits{resource=\"cpu\",job=\"\ + kube-state-metrics\"} * on (namespace, pod, cluster)\n group_left() max\ + \ by (namespace, pod, cluster) (\n (kube_pod_status_phase{phase=~\"Pending|Running\"\ + } == 1)\n )\n record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits\n\ + \ - expr: |-\n sum by (namespace, cluster) (\n sum by (namespace,\ + \ pod, cluster) (\n max by (namespace, pod, container, cluster) (\n\ + \ kube_pod_container_resource_limits{resource=\"cpu\",job=\"kube-state-metrics\"\ + }\n ) * on (namespace, pod, cluster) group_left() max by (namespace,\ + \ pod, cluster) (\n kube_pod_status_phase{phase=~\"Pending|Running\"\ + } == 1\n )\n )\n )\n record: namespace_cpu:kube_pod_container_resource_limits:sum\n" + monitoring-kube-prometheus-stack-k8s.rules.pod-owner-6921c522-c42e-43f6-8c4c-296d6fd5994e.yaml: "groups:\n\ + - name: k8s.rules.pod_owner\n rules:\n - expr: |-\n max by (cluster, namespace,\ + \ workload, pod) (\n label_replace(\n label_replace(\n \ + \ kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"ReplicaSet\"},\n\ + \ \"replicaset\", \"$1\", \"owner_name\", \"(.*)\"\n ) * on\ + \ (cluster, replicaset, namespace) group_left(owner_name) topk by (cluster, replicaset,\ + \ namespace) (\n 1, max by (cluster, replicaset, namespace, owner_name)\ + \ (\n kube_replicaset_owner{job=\"kube-state-metrics\", owner_kind=\"\ + \"}\n )\n ),\n \"workload\", \"$1\", \"replicaset\"\ + , \"(.*)\"\n )\n )\n labels:\n workload_type: replicaset\n\ + \ record: namespace_workload_pod:kube_pod_owner:relabel\n - expr: |-\n \ + \ max by (cluster, namespace, workload, pod) (\n label_replace(\n \ + \ label_replace(\n kube_pod_owner{job=\"kube-state-metrics\"\ + , owner_kind=\"ReplicaSet\"},\n \"replicaset\", \"$1\", \"owner_name\"\ + , \"(.*)\"\n ) * on (replicaset, namespace, cluster) group_left(owner_name)\ + \ topk by (cluster, replicaset, namespace) (\n 1, max by (cluster,\ + \ replicaset, namespace, owner_name) (\n kube_replicaset_owner{job=\"\ + kube-state-metrics\", owner_kind=\"Deployment\"}\n )\n ),\n\ + \ \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n )\n\ + \ labels:\n workload_type: deployment\n record: namespace_workload_pod:kube_pod_owner:relabel\n\ + \ - expr: |-\n max by (cluster, namespace, workload, pod) (\n label_replace(\n\ + \ kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"DaemonSet\"\ + },\n \"workload\", \"$1\", \"owner_name\", \"(.*)\"\n )\n \ + \ )\n labels:\n workload_type: daemonset\n record: namespace_workload_pod:kube_pod_owner:relabel\n\ + \ - expr: |-\n max by (cluster, namespace, workload, pod) (\n label_replace(\n\ + \ kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"StatefulSet\"\ + },\n \"workload\", \"$1\", \"owner_name\", \"(.*)\")\n )\n labels:\n\ + \ workload_type: statefulset\n record: namespace_workload_pod:kube_pod_owner:relabel\n\ + \ - expr: |-\n group by (cluster, namespace, workload, pod) (\n label_join(\n\ + \ group by (cluster, namespace, job_name, pod, owner_name) (\n \ + \ label_join(\n kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"\ + Job\"}\n , \"job_name\", \"\", \"owner_name\")\n )\n \ + \ * on (cluster, namespace, job_name) group_left()\n group by (cluster,\ + \ namespace, job_name) (\n kube_job_owner{job=\"kube-state-metrics\"\ + , owner_kind=~\"Pod|\"}\n )\n , \"workload\", \"\", \"owner_name\"\ + )\n )\n labels:\n workload_type: job\n record: namespace_workload_pod:kube_pod_owner:relabel\n\ + \ - expr: |-\n max by (cluster, namespace, workload, pod) (\n label_replace(\n\ + \ kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"\", owner_name=\"\ + \"},\n \"workload\", \"$1\", \"pod\", \"(.+)\")\n )\n labels:\n\ + \ workload_type: barepod\n record: namespace_workload_pod:kube_pod_owner:relabel\n\ + \ - expr: |-\n max by (cluster, namespace, workload, pod) (\n label_replace(\n\ + \ kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"Node\"},\n\ + \ \"workload\", \"$1\", \"pod\", \"(.+)\")\n )\n labels:\n \ + \ workload_type: staticpod\n record: namespace_workload_pod:kube_pod_owner:relabel\n\ + \ - expr: |-\n group by (cluster, namespace, workload, workload_type, pod)\ + \ (\n label_join(\n label_join(\n group by (cluster,\ + \ namespace, job_name, pod) (\n label_join(\n kube_pod_owner{job=\"\ + kube-state-metrics\", owner_kind=\"Job\"}\n , \"job_name\", \"\"\ + , \"owner_name\")\n )\n * on (cluster, namespace, job_name)\ + \ group_left(owner_kind, owner_name)\n group by (cluster, namespace,\ + \ job_name, owner_kind, owner_name) (\n kube_job_owner{job=\"kube-state-metrics\"\ + , owner_kind!=\"Pod\", owner_kind!=\"\"}\n )\n , \"workload\"\ + , \"\", \"owner_name\")\n , \"workload_type\", \"\", \"owner_kind\")\n\n\ + \ OR\n\n label_replace(\n label_replace(\n label_replace(\n\ + \ kube_pod_owner{job=\"kube-state-metrics\", owner_kind=\"ReplicaSet\"\ + }\n , \"replicaset\", \"$1\", \"owner_name\", \"(.+)\"\n \ + \ )\n * on (cluster, namespace, replicaset) group_left(owner_kind,\ + \ owner_name)\n group by (cluster, namespace, replicaset, owner_kind,\ + \ owner_name) (\n kube_replicaset_owner{job=\"kube-state-metrics\"\ + , owner_kind!=\"Deployment\", owner_kind!=\"\"}\n )\n , \"\ + workload\", \"$1\", \"owner_name\", \"(.+)\")\n OR\n label_replace(\n\ + \ group by (cluster, namespace, pod, owner_name, owner_kind) (\n \ + \ kube_pod_owner{job=\"kube-state-metrics\", owner_kind!=\"ReplicaSet\"\ + , owner_kind!=\"DaemonSet\", owner_kind!=\"StatefulSet\", owner_kind!=\"Job\"\ + , owner_kind!=\"Node\", owner_kind!=\"\"}\n )\n , \"workload\"\ + , \"$1\", \"owner_name\", \"(.+)\"\n )\n , \"workload_type\",\ + \ \"$1\", \"owner_kind\", \"(.+)\")\n )\n record: namespace_workload_pod:kube_pod_owner:relabel\n" + monitoring-kube-prometheus-stack-kube-apiserver-availability.rules-fe1708b8-332c-4d9d-be30-1a6e49774f5a.yaml: "groups:\n\ + - interval: 3m\n name: kube-apiserver-availability.rules\n rules:\n - expr:\ + \ avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 *\n \ + \ 30\n record: code_verb:apiserver_request_total:increase30d\n - expr:\ + \ sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~\"\ + LIST|GET\"})\n labels:\n verb: read\n record: code:apiserver_request_total:increase30d\n\ + \ - expr: sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~\"\ + POST|PUT|PATCH|DELETE\"})\n labels:\n verb: write\n record: code:apiserver_request_total:increase30d\n\ + \ - expr: sum by (cluster, verb, scope, le) (increase(apiserver_request_sli_duration_seconds_bucket[1h]))\n\ + \ record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h\n\ + \ - expr: sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h[30d])\n\ + \ * 24 * 30)\n record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d\n\ + \ - expr: sum by (cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h{le=\"\ + +Inf\"})\n record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h\n\ + \ - expr: sum by (cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le=\"\ + +Inf\"})\n record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d\n\ + \ - expr: |-\n 1 - (\n (\n # write too slow\n sum\ + \ by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"\ + POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"\ + POST|PUT|PATCH|DELETE\",le=~\"1(\\\\.0)?\"} or vector(0))\n ) +\n \ + \ (\n # read too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"\ + LIST|GET\"})\n -\n (\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"\ + LIST|GET\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"} or vector(0))\n \ + \ +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"\ + LIST|GET\",scope=\"namespace\",le=~\"5(\\\\.0)?\"} or vector(0))\n \ + \ +\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"\ + LIST|GET\",scope=\"cluster\",le=~\"30(\\\\.0)?\"} or vector(0))\n )\n\ + \ ) +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{code=~\"\ + 5..\"} or vector(0))\n )\n /\n sum by (cluster) (code:apiserver_request_total:increase30d)\n\ + \ labels:\n verb: all\n record: apiserver_request:availability30d\n\ + \ - expr: |-\n 1 - (\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"\ + LIST|GET\"})\n -\n (\n # too slow\n sum by (cluster)\ + \ (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"\ + LIST|GET\",scope=~\"resource|\",le=~\"1(\\\\.0)?\"} or vector(0))\n +\n\ + \ sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"\ + LIST|GET\",scope=\"namespace\",le=~\"5(\\\\.0)?\"} or vector(0))\n +\n\ + \ sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"\ + LIST|GET\",scope=\"cluster\",le=~\"30(\\\\.0)?\"} or vector(0))\n )\n \ + \ +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"\ + read\",code=~\"5..\"} or vector(0))\n )\n /\n sum by (cluster)\ + \ (code:apiserver_request_total:increase30d{verb=\"read\"})\n labels:\n \ + \ verb: read\n record: apiserver_request:availability30d\n - expr: |-\n\ + \ 1 - (\n (\n # too slow\n sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~\"\ + POST|PUT|PATCH|DELETE\"})\n -\n sum by (cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~\"\ + POST|PUT|PATCH|DELETE\",le=~\"1(\\\\.0)?\"} or vector(0))\n )\n \ + \ +\n # errors\n sum by (cluster) (code:apiserver_request_total:increase30d{verb=\"\ + write\",code=~\"5..\"} or vector(0))\n )\n /\n sum by (cluster)\ + \ (code:apiserver_request_total:increase30d{verb=\"write\"})\n labels:\n \ + \ verb: write\n record: apiserver_request:availability30d\n - expr: sum\ + \ by (cluster,code,resource) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"\ + LIST|GET\"}[5m]))\n labels:\n verb: read\n record: code_resource:apiserver_request_total:rate5m\n\ + \ - expr: sum by (cluster,code,resource) (rate(apiserver_request_total{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n labels:\n verb: write\n\ + \ record: code_resource:apiserver_request_total:rate5m\n - expr: sum by (cluster,\ + \ code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET|POST|PUT|PATCH|DELETE\"\ + ,code=~\"2..\"}[1h]))\n record: code_verb:apiserver_request_total:increase1h\n\ + \ - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job=\"\ + apiserver\",verb=~\"LIST|GET|POST|PUT|PATCH|DELETE\",code=~\"3..\"}[1h]))\n \ + \ record: code_verb:apiserver_request_total:increase1h\n - expr: sum by (cluster,\ + \ code, verb) (increase(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET|POST|PUT|PATCH|DELETE\"\ + ,code=~\"4..\"}[1h]))\n record: code_verb:apiserver_request_total:increase1h\n\ + \ - expr: sum by (cluster, code, verb) (increase(apiserver_request_total{job=\"\ + apiserver\",verb=~\"LIST|GET|POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n \ + \ record: code_verb:apiserver_request_total:increase1h\n" + monitoring-kube-prometheus-stack-kube-apiserver-burnrate.rules-5b37d5cc-ec92-44ef-8b83-84bd7039e174.yaml: "groups:\n\ + - name: kube-apiserver-burnrate.rules\n rules:\n - expr: |-\n (\n \ + \ (\n # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"\ + apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"\ + }[1d]))\n -\n (\n (\n sum by (cluster)\ + \ (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\",verb=~\"\ + LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=~\"resource|\"\ + ,le=~\"1(\\\\.0)?\"}[1d]))\n or\n vector(0)\n \ + \ )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,scope=\"namespace\",le=~\"5(\\\\.0)?\"}[1d]))\n +\n sum\ + \ by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\"\ + ,verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"\ + cluster\",le=~\"30(\\\\.0)?\"}[1d]))\n )\n )\n +\n \ + \ # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\ + ,verb=~\"LIST|GET\",code=~\"5..\"}[1d]))\n )\n /\n sum by (cluster)\ + \ (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1d]))\n\ + \ labels:\n verb: read\n record: apiserver_request:burnrate1d\n -\ + \ expr: |-\n (\n (\n # too slow\n sum by (cluster)\ + \ (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"\ + LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[1h]))\n \ + \ -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[1h]))\n or\n \ + \ vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,scope=\"namespace\",le=~\"5(\\\\.0)?\"}[1h]))\n +\n sum\ + \ by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\"\ + ,verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"\ + cluster\",le=~\"30(\\\\.0)?\"}[1h]))\n )\n )\n +\n \ + \ # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\ + ,verb=~\"LIST|GET\",code=~\"5..\"}[1h]))\n )\n /\n sum by (cluster)\ + \ (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[1h]))\n\ + \ labels:\n verb: read\n record: apiserver_request:burnrate1h\n -\ + \ expr: |-\n (\n (\n # too slow\n sum by (cluster)\ + \ (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"\ + LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[2h]))\n \ + \ -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[2h]))\n or\n \ + \ vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,scope=\"namespace\",le=~\"5(\\\\.0)?\"}[2h]))\n +\n sum\ + \ by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\"\ + ,verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"\ + cluster\",le=~\"30(\\\\.0)?\"}[2h]))\n )\n )\n +\n \ + \ # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\ + ,verb=~\"LIST|GET\",code=~\"5..\"}[2h]))\n )\n /\n sum by (cluster)\ + \ (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n\ + \ labels:\n verb: read\n record: apiserver_request:burnrate2h\n -\ + \ expr: |-\n (\n (\n # too slow\n sum by (cluster)\ + \ (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"\ + LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[30m]))\n \ + \ -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[30m]))\n or\n \ + \ vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,scope=\"namespace\",le=~\"5(\\\\.0)?\"}[30m]))\n +\n sum\ + \ by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\"\ + ,verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"\ + cluster\",le=~\"30(\\\\.0)?\"}[30m]))\n )\n )\n +\n \ + \ # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"\ + apiserver\",verb=~\"LIST|GET\",code=~\"5..\"}[30m]))\n )\n /\n \ + \ sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"\ + }[30m]))\n labels:\n verb: read\n record: apiserver_request:burnrate30m\n\ + \ - expr: |-\n (\n (\n # too slow\n sum by (cluster)\ + \ (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"\ + LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[3d]))\n \ + \ -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[3d]))\n or\n \ + \ vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,scope=\"namespace\",le=~\"5(\\\\.0)?\"}[3d]))\n +\n sum\ + \ by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\"\ + ,verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"\ + cluster\",le=~\"30(\\\\.0)?\"}[3d]))\n )\n )\n +\n \ + \ # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\ + ,verb=~\"LIST|GET\",code=~\"5..\"}[3d]))\n )\n /\n sum by (cluster)\ + \ (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n\ + \ labels:\n verb: read\n record: apiserver_request:burnrate3d\n -\ + \ expr: |-\n (\n (\n # too slow\n sum by (cluster)\ + \ (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"\ + LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[5m]))\n \ + \ -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[5m]))\n or\n \ + \ vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,scope=\"namespace\",le=~\"5(\\\\.0)?\"}[5m]))\n +\n sum\ + \ by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\"\ + ,verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"\ + cluster\",le=~\"30(\\\\.0)?\"}[5m]))\n )\n )\n +\n \ + \ # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\ + ,verb=~\"LIST|GET\",code=~\"5..\"}[5m]))\n )\n /\n sum by (cluster)\ + \ (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[5m]))\n\ + \ labels:\n verb: read\n record: apiserver_request:burnrate5m\n -\ + \ expr: |-\n (\n (\n # too slow\n sum by (cluster)\ + \ (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"\ + LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"}[6h]))\n \ + \ -\n (\n (\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,scope=~\"resource|\",le=~\"1(\\\\.0)?\"}[6h]))\n or\n \ + \ vector(0)\n )\n +\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,scope=\"namespace\",le=~\"5(\\\\.0)?\"}[6h]))\n +\n sum\ + \ by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"apiserver\"\ + ,verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\",scope=\"\ + cluster\",le=~\"30(\\\\.0)?\"}[6h]))\n )\n )\n +\n \ + \ # errors\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\ + ,verb=~\"LIST|GET\",code=~\"5..\"}[6h]))\n )\n /\n sum by (cluster)\ + \ (rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[6h]))\n\ + \ labels:\n verb: read\n record: apiserver_request:burnrate6h\n -\ + \ expr: |-\n (\n (\n # too slow\n sum by (cluster)\ + \ (rate(apiserver_request_sli_duration_seconds_count{job=\"apiserver\",verb=~\"\ + POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"}[1d]))\n\ + \ -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,le=~\"1(\\\\.0)?\"}[1d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1d]))\n )\n \ + \ /\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\ + ,verb=~\"POST|PUT|PATCH|DELETE\"}[1d]))\n labels:\n verb: write\n record:\ + \ apiserver_request:burnrate1d\n - expr: |-\n (\n (\n #\ + \ too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"\ + }[1h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,le=~\"1(\\\\.0)?\"}[1h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[1h]))\n )\n \ + \ /\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\ + ,verb=~\"POST|PUT|PATCH|DELETE\"}[1h]))\n labels:\n verb: write\n record:\ + \ apiserver_request:burnrate1h\n - expr: |-\n (\n (\n #\ + \ too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"\ + }[2h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,le=~\"1(\\\\.0)?\"}[2h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[2h]))\n )\n \ + \ /\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\ + ,verb=~\"POST|PUT|PATCH|DELETE\"}[2h]))\n labels:\n verb: write\n record:\ + \ apiserver_request:burnrate2h\n - expr: |-\n (\n (\n #\ + \ too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"\ + }[30m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,le=~\"1(\\\\.0)?\"}[30m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[30m]))\n )\n \ + \ /\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\ + ,verb=~\"POST|PUT|PATCH|DELETE\"}[30m]))\n labels:\n verb: write\n \ + \ record: apiserver_request:burnrate30m\n - expr: |-\n (\n (\n \ + \ # too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"\ + }[3d]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,le=~\"1(\\\\.0)?\"}[3d]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[3d]))\n )\n \ + \ /\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\ + ,verb=~\"POST|PUT|PATCH|DELETE\"}[3d]))\n labels:\n verb: write\n record:\ + \ apiserver_request:burnrate3d\n - expr: |-\n (\n (\n #\ + \ too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"\ + }[5m]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,le=~\"1(\\\\.0)?\"}[5m]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[5m]))\n )\n \ + \ /\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\ + ,verb=~\"POST|PUT|PATCH|DELETE\"}[5m]))\n labels:\n verb: write\n record:\ + \ apiserver_request:burnrate5m\n - expr: |-\n (\n (\n #\ + \ too slow\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_count{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"\ + }[6h]))\n -\n sum by (cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"\ + ,le=~\"1(\\\\.0)?\"}[6h]))\n )\n +\n sum by (cluster) (rate(apiserver_request_total{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",code=~\"5..\"}[6h]))\n )\n \ + \ /\n sum by (cluster) (rate(apiserver_request_total{job=\"apiserver\"\ + ,verb=~\"POST|PUT|PATCH|DELETE\"}[6h]))\n labels:\n verb: write\n record:\ + \ apiserver_request:burnrate6h\n" + monitoring-kube-prometheus-stack-kube-apiserver-histogram.rules-3df1ba86-8ec2-4750-a04f-5e59108c7ba3.yaml: "groups:\n\ + - name: kube-apiserver-histogram.rules\n rules:\n - expr: histogram_quantile(0.99,\ + \ sum by (cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"LIST|GET\",subresource!~\"proxy|attach|log|exec|portforward\"\ + }[5m])))\n > 0\n labels:\n quantile: \"0.99\"\n verb: read\n\ + \ record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile\n\ + \ - expr: histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job=\"\ + apiserver\",verb=~\"POST|PUT|PATCH|DELETE\",subresource!~\"proxy|attach|log|exec|portforward\"\ + }[5m])))\n > 0\n labels:\n quantile: \"0.99\"\n verb: write\n\ + \ record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile\n" + monitoring-kube-prometheus-stack-kube-apiserver-slos-be960e5a-cbba-488b-b2a9-b89b70183179.yaml: "groups:\n\ + - name: kube-apiserver-slos\n rules:\n - alert: KubeAPIErrorBudgetBurn\n \ + \ annotations:\n description: The API server is burning too much error budget\ + \ on cluster {{ $labels.cluster\n }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn\n\ + \ summary: The API server is burning too much error budget.\n expr: |-\n\ + \ sum by (cluster) (apiserver_request:burnrate1h) > (14.40 * 0.01000)\n \ + \ and on (cluster)\n sum by (cluster) (apiserver_request:burnrate5m)\ + \ > (14.40 * 0.01000)\n for: 2m\n labels:\n long: 1h\n severity:\ + \ critical\n short: 5m\n - alert: KubeAPIErrorBudgetBurn\n annotations:\n\ + \ description: The API server is burning too much error budget on cluster\ + \ {{ $labels.cluster\n }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn\n\ + \ summary: The API server is burning too much error budget.\n expr: |-\n\ + \ sum by (cluster) (apiserver_request:burnrate6h) > (6.00 * 0.01000)\n \ + \ and on (cluster)\n sum by (cluster) (apiserver_request:burnrate30m)\ + \ > (6.00 * 0.01000)\n for: 15m\n labels:\n long: 6h\n severity:\ + \ critical\n short: 30m\n - alert: KubeAPIErrorBudgetBurn\n annotations:\n\ + \ description: The API server is burning too much error budget on cluster\ + \ {{ $labels.cluster\n }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn\n\ + \ summary: The API server is burning too much error budget.\n expr: |-\n\ + \ sum by (cluster) (apiserver_request:burnrate1d) > (3.00 * 0.01000)\n \ + \ and on (cluster)\n sum by (cluster) (apiserver_request:burnrate2h) >\ + \ (3.00 * 0.01000)\n for: 1h\n labels:\n long: 1d\n severity:\ + \ warning\n short: 2h\n - alert: KubeAPIErrorBudgetBurn\n annotations:\n\ + \ description: The API server is burning too much error budget on cluster\ + \ {{ $labels.cluster\n }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapierrorbudgetburn\n\ + \ summary: The API server is burning too much error budget.\n expr: |-\n\ + \ sum by (cluster) (apiserver_request:burnrate3d) > (1.00 * 0.01000)\n \ + \ and on (cluster)\n sum by (cluster) (apiserver_request:burnrate6h) >\ + \ (1.00 * 0.01000)\n for: 3h\n labels:\n long: 3d\n severity:\ + \ warning\n short: 6h\n" + monitoring-kube-prometheus-stack-kube-prometheus-general.rules-cdf9488c-4fa3-4c7a-9be4-ddcaee437598.yaml: "groups:\n\ + - name: kube-prometheus-general.rules\n rules:\n - expr: count without(instance,\ + \ pod, node) (up == 1)\n record: count:up1\n - expr: count without(instance,\ + \ pod, node) (up == 0)\n record: count:up0\n" + monitoring-kube-prometheus-stack-kube-prometheus-node-recording.rules-14de50cd-57b8-4248-a7c5-054469786b93.yaml: "groups:\n\ + - name: kube-prometheus-node-recording.rules\n rules:\n - expr: sum(rate(node_cpu_seconds_total{mode!=\"\ + idle\",mode!=\"iowait\",mode!=\"steal\"}[3m]))\n BY (instance)\n record:\ + \ instance:node_cpu:rate:sum\n - expr: sum(rate(node_network_receive_bytes_total[3m]))\ + \ BY (instance)\n record: instance:node_network_receive_bytes:rate:sum\n -\ + \ expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)\n record:\ + \ instance:node_network_transmit_bytes:rate:sum\n - expr: sum(rate(node_cpu_seconds_total{mode!=\"\ + idle\",mode!=\"iowait\",mode!=\"steal\"}[5m]))\n WITHOUT (cpu, mode) / ON\ + \ (instance) GROUP_LEFT() count(sum(node_cpu_seconds_total)\n BY (instance,\ + \ cpu)) BY (instance)\n record: instance:node_cpu:ratio\n - expr: sum(rate(node_cpu_seconds_total{mode!=\"\ + idle\",mode!=\"iowait\",mode!=\"steal\"}[5m]))\n record: cluster:node_cpu:sum_rate5m\n\ + \ - expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY\ + \ (instance,\n cpu))\n record: cluster:node_cpu:ratio\n" + monitoring-kube-prometheus-stack-kube-scheduler.rules-da454218-3276-463d-abe3-7043553c8f35.yaml: "groups:\n\ + - name: kube-scheduler.rules\n rules:\n - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket{job=\"\ + kube-scheduler\"}[5m]))\n without(instance, pod))\n labels:\n quantile:\ + \ \"0.99\"\n record: cluster_quantile:scheduler_scheduling_attempt_duration_seconds:histogram_quantile\n\ + \ - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job=\"\ + kube-scheduler\"}[5m]))\n without(instance, pod))\n labels:\n quantile:\ + \ \"0.99\"\n record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile\n\ + \ - expr: histogram_quantile(0.99, sum(rate(scheduler_pod_scheduling_sli_duration_seconds_bucket{job=\"\ + kube-scheduler\"}[5m]))\n without(instance, pod))\n labels:\n quantile:\ + \ \"0.99\"\n record: cluster_quantile:scheduler_pod_scheduling_sli_duration_seconds:histogram_quantile\n\ + \ - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket{job=\"\ + kube-scheduler\"}[5m]))\n without(instance, pod))\n labels:\n quantile:\ + \ \"0.9\"\n record: cluster_quantile:scheduler_scheduling_attempt_duration_seconds:histogram_quantile\n\ + \ - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job=\"\ + kube-scheduler\"}[5m]))\n without(instance, pod))\n labels:\n quantile:\ + \ \"0.9\"\n record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile\n\ + \ - expr: histogram_quantile(0.9, sum(rate(scheduler_pod_scheduling_sli_duration_seconds_bucket{job=\"\ + kube-scheduler\"}[5m]))\n without(instance, pod))\n labels:\n quantile:\ + \ \"0.9\"\n record: cluster_quantile:scheduler_pod_scheduling_sli_duration_seconds:histogram_quantile\n\ + \ - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket{job=\"\ + kube-scheduler\"}[5m]))\n without(instance, pod))\n labels:\n quantile:\ + \ \"0.5\"\n record: cluster_quantile:scheduler_scheduling_attempt_duration_seconds:histogram_quantile\n\ + \ - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job=\"\ + kube-scheduler\"}[5m]))\n without(instance, pod))\n labels:\n quantile:\ + \ \"0.5\"\n record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile\n\ + \ - expr: histogram_quantile(0.5, sum(rate(scheduler_pod_scheduling_sli_duration_seconds_bucket{job=\"\ + kube-scheduler\"}[5m]))\n without(instance, pod))\n labels:\n quantile:\ + \ \"0.5\"\n record: cluster_quantile:scheduler_pod_scheduling_sli_duration_seconds:histogram_quantile\n" + monitoring-kube-prometheus-stack-kube-state-metrics-9826e852-e343-4d08-9f0c-4c5896358ba2.yaml: "groups:\n\ + - name: kube-state-metrics\n rules:\n - alert: KubeStateMetricsListErrors\n\ + \ annotations:\n description: kube-state-metrics is experiencing errors\ + \ at an elevated rate in\n list operations. This is likely causing it to\ + \ not be able to expose metrics\n about Kubernetes objects correctly or\ + \ at all.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricslisterrors\n\ + \ summary: kube-state-metrics is experiencing errors in list operations.\n\ + \ expr: |-\n (sum(rate(kube_state_metrics_list_total{job=\"kube-state-metrics\"\ + ,result=\"error\"}[5m])) by (cluster)\n /\n sum(rate(kube_state_metrics_list_total{job=\"\ + kube-state-metrics\"}[5m])) by (cluster))\n > 0.01\n for: 15m\n labels:\n\ + \ severity: critical\n - alert: KubeStateMetricsWatchErrors\n annotations:\n\ + \ description: kube-state-metrics is experiencing errors at an elevated rate\ + \ in\n watch operations. This is likely causing it to not be able to expose\ + \ metrics\n about Kubernetes objects correctly or at all.\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricswatcherrors\n\ + \ summary: kube-state-metrics is experiencing errors in watch operations.\n\ + \ expr: |-\n (sum(rate(kube_state_metrics_watch_total{job=\"kube-state-metrics\"\ + ,result=\"error\"}[5m])) by (cluster)\n /\n sum(rate(kube_state_metrics_watch_total{job=\"\ + kube-state-metrics\"}[5m])) by (cluster))\n > 0.01\n for: 15m\n labels:\n\ + \ severity: critical\n - alert: KubeStateMetricsShardingMismatch\n annotations:\n\ + \ description: kube-state-metrics pods are running with different --total-shards\n\ + \ configuration, some Kubernetes objects may be exposed multiple times\ + \ or not\n exposed at all.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardingmismatch\n\ + \ summary: kube-state-metrics sharding is misconfigured.\n expr: stdvar\ + \ (kube_state_metrics_total_shards{job=\"kube-state-metrics\"}) by (cluster)\n\ + \ != 0\n for: 15m\n labels:\n severity: critical\n - alert: KubeStateMetricsShardsMissing\n\ + \ annotations:\n description: kube-state-metrics shards are missing, some\ + \ Kubernetes objects\n are not being exposed.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing\n\ + \ summary: kube-state-metrics shards are missing.\n expr: |-\n 2^max(kube_state_metrics_total_shards{job=\"\ + kube-state-metrics\"}) by (cluster) - 1\n -\n sum( 2 ^ max by (cluster,\ + \ shard_ordinal) (kube_state_metrics_shard_ordinal{job=\"kube-state-metrics\"\ + }) ) by (cluster)\n != 0\n for: 15m\n labels:\n severity: critical\n" + monitoring-kube-prometheus-stack-kubelet.rules-931b5e0c-2d70-4f8b-9987-5b5cfaac8845.yaml: "groups:\n\ + - name: kubelet.rules\n rules:\n - expr: |-\n histogram_quantile(\n \ + \ 0.99,\n sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job=\"\ + kubelet\", metrics_path=\"/metrics\"}[5m])) by (cluster, instance, le)\n \ + \ * on (cluster, instance) group_left (node)\n max by (cluster, instance,\ + \ node) (kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"})\n \ + \ )\n labels:\n quantile: \"0.99\"\n record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile\n\ + \ - expr: |-\n histogram_quantile(\n 0.9,\n sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job=\"\ + kubelet\", metrics_path=\"/metrics\"}[5m])) by (cluster, instance, le)\n \ + \ * on (cluster, instance) group_left (node)\n max by (cluster, instance,\ + \ node) (kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"})\n \ + \ )\n labels:\n quantile: \"0.9\"\n record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile\n\ + \ - expr: |-\n histogram_quantile(\n 0.5,\n sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job=\"\ + kubelet\", metrics_path=\"/metrics\"}[5m])) by (cluster, instance, le)\n \ + \ * on (cluster, instance) group_left (node)\n max by (cluster, instance,\ + \ node) (kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"})\n \ + \ )\n labels:\n quantile: \"0.5\"\n record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile\n" + monitoring-kube-prometheus-stack-kubernetes-apps-eaf9c0ce-babd-40f5-913f-7c8c14272dcc.yaml: "groups:\n\ + - name: kubernetes-apps\n rules:\n - alert: KubePodCrashLooping\n annotations:\n\ + \ description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container\n\ + \ }}) is in waiting state (reason: \"CrashLoopBackOff\") on cluster {{\ + \ $labels.cluster\n }}.'\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping\n\ + \ summary: Pod is crash looping.\n expr: max_over_time(kube_pod_container_status_waiting_reason{reason=\"\ + CrashLoopBackOff\",\n job=\"kube-state-metrics\", namespace=~\".*\"}[5m])\ + \ >= 1\n for: 15m\n labels:\n severity: warning\n - alert: KubePodNotReady\n\ + \ annotations:\n description: Pod {{ $labels.namespace }}/{{ $labels.pod\ + \ }} has been in a non-ready\n state for longer than 15 minutes on cluster\ + \ {{ $labels.cluster }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready\n\ + \ summary: Pod has been in a non-ready state for more than 15 minutes.\n\ + \ expr: |-\n sum by (namespace, pod, job, cluster) (\n max by (namespace,\ + \ pod, job, cluster) (\n kube_pod_status_phase{job=\"kube-state-metrics\"\ + , namespace=~\".*\", phase=~\"Pending|Unknown\"}\n ) * on (namespace, pod,\ + \ cluster) group_left(owner_kind) topk by (namespace, pod, cluster) (\n \ + \ 1, max by (namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!=\"\ + Job\"})\n )\n ) > 0\n for: 15m\n labels:\n severity: warning\n\ + \ - alert: KubeDeploymentGenerationMismatch\n annotations:\n description:\ + \ Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment\n \ + \ }} does not match, this indicates that the Deployment has failed but has\ + \ not\n been rolled back on cluster {{ $labels.cluster }}.\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch\n\ + \ summary: Deployment generation mismatch due to possible roll-back\n \ + \ expr: |-\n kube_deployment_status_observed_generation{job=\"kube-state-metrics\"\ + , namespace=~\".*\"}\n !=\n kube_deployment_metadata_generation{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n for: 15m\n labels:\n severity:\ + \ warning\n - alert: KubeDeploymentReplicasMismatch\n annotations:\n \ + \ description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has\n\ + \ not matched the expected number of replicas for longer than 15 minutes\ + \ on\n cluster {{ $labels.cluster }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch\n\ + \ summary: Deployment has not matched the expected number of replicas.\n\ + \ expr: |-\n (\n kube_deployment_spec_replicas{job=\"kube-state-metrics\"\ + , namespace=~\".*\"}\n >\n kube_deployment_status_replicas_available{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n ) and (\n changes(kube_deployment_status_replicas_updated{job=\"\ + kube-state-metrics\", namespace=~\".*\"}[10m])\n ==\n 0\n \ + \ )\n for: 15m\n labels:\n severity: warning\n - alert: KubeDeploymentRolloutStuck\n\ + \ annotations:\n description: Rollout of deployment {{ $labels.namespace\ + \ }}/{{ $labels.deployment\n }} is not progressing for longer than 15 minutes\ + \ on cluster {{ $labels.cluster\n }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck\n\ + \ summary: Deployment rollout is not progressing.\n expr: |-\n kube_deployment_status_condition{condition=\"\ + Progressing\", status=\"false\",job=\"kube-state-metrics\", namespace=~\".*\"\ + }\n != 0\n for: 15m\n labels:\n severity: warning\n - alert:\ + \ KubeStatefulSetReplicasMismatch\n annotations:\n description: StatefulSet\ + \ {{ $labels.namespace }}/{{ $labels.statefulset }} has\n not matched the\ + \ expected number of replicas for longer than 15 minutes on\n cluster {{\ + \ $labels.cluster }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch\n\ + \ summary: StatefulSet has not matched the expected number of replicas.\n\ + \ expr: |-\n (\n kube_statefulset_status_replicas_ready{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n !=\n kube_statefulset_replicas{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n ) and (\n changes(kube_statefulset_status_replicas_updated{job=\"\ + kube-state-metrics\", namespace=~\".*\"}[10m])\n ==\n 0\n \ + \ )\n for: 15m\n labels:\n severity: warning\n - alert: KubeStatefulSetGenerationMismatch\n\ + \ annotations:\n description: StatefulSet generation for {{ $labels.namespace\ + \ }}/{{ $labels.statefulset\n }} does not match, this indicates that the\ + \ StatefulSet has failed but has\n not been rolled back on cluster {{ $labels.cluster\ + \ }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch\n\ + \ summary: StatefulSet generation mismatch due to possible roll-back\n \ + \ expr: |-\n kube_statefulset_status_observed_generation{job=\"kube-state-metrics\"\ + , namespace=~\".*\"}\n !=\n kube_statefulset_metadata_generation{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n for: 15m\n labels:\n severity:\ + \ warning\n - alert: KubeStatefulSetUpdateNotRolledOut\n annotations:\n \ + \ description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }}\ + \ update\n has not been rolled out on cluster {{ $labels.cluster }}.\n\ + \ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout\n\ + \ summary: StatefulSet update has not been rolled out.\n expr: |-\n \ + \ (\n max by (namespace, statefulset, job, cluster) (\n kube_statefulset_status_current_revision{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n unless\n kube_statefulset_status_update_revision{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n )\n * on (namespace,\ + \ statefulset, job, cluster)\n (\n kube_statefulset_replicas{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n !=\n kube_statefulset_status_replicas_updated{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n )\n ) and on (namespace,\ + \ statefulset, job, cluster) (\n changes(kube_statefulset_status_replicas_updated{job=\"\ + kube-state-metrics\", namespace=~\".*\"}[5m])\n ==\n 0\n \ + \ )\n for: 15m\n labels:\n severity: warning\n - alert: KubeDaemonSetRolloutStuck\n\ + \ annotations:\n description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset\ + \ }} has not\n finished or progressed for at least 15m on cluster {{ $labels.cluster\ + \ }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck\n\ + \ summary: DaemonSet rollout is stuck.\n expr: |-\n (\n (\n\ + \ kube_daemonset_status_current_number_scheduled{job=\"kube-state-metrics\"\ + , namespace=~\".*\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n ) or (\n kube_daemonset_status_number_misscheduled{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n !=\n 0\n \ + \ ) or (\n kube_daemonset_status_updated_number_scheduled{job=\"kube-state-metrics\"\ + , namespace=~\".*\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n ) or (\n kube_daemonset_status_number_available{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n !=\n kube_daemonset_status_desired_number_scheduled{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n )\n ) and (\n changes(kube_daemonset_status_updated_number_scheduled{job=\"\ + kube-state-metrics\", namespace=~\".*\"}[5m])\n ==\n 0\n \ + \ )\n for: 15m\n labels:\n severity: warning\n - alert: KubeContainerWaiting\n\ + \ annotations:\n description: 'pod/{{ $labels.pod }} in namespace {{ $labels.namespace\ + \ }} on\n container {{ $labels.container}} has been in waiting state for\ + \ longer than\n 1 hour. (reason: \"{{ $labels.reason }}\") on cluster {{\ + \ $labels.cluster }}.'\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting\n\ + \ summary: Pod container waiting longer than 1 hour\n expr: kube_pod_container_status_waiting_reason{reason!=\"\ + CrashLoopBackOff\", job=\"kube-state-metrics\",\n namespace=~\".*\"} > 0\n\ + \ for: 1h\n labels:\n severity: warning\n - alert: KubeDaemonSetNotScheduled\n\ + \ annotations:\n description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace\ + \ }}/{{ $labels.daemonset\n }} are not scheduled on cluster {{ $labels.cluster\ + \ }}.'\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled\n\ + \ summary: DaemonSet pods are not scheduled.\n expr: |-\n kube_daemonset_status_desired_number_scheduled{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n -\n kube_daemonset_status_current_number_scheduled{job=\"\ + kube-state-metrics\", namespace=~\".*\"} > 0\n for: 10m\n labels:\n \ + \ severity: warning\n - alert: KubeDaemonSetMisScheduled\n annotations:\n\ + \ description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{\ + \ $labels.daemonset\n }} are running where they are not supposed to run\ + \ on cluster {{ $labels.cluster\n }}.'\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled\n\ + \ summary: DaemonSet pods are misscheduled.\n expr: kube_daemonset_status_number_misscheduled{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n > 0\n for: 15m\n labels:\n\ + \ severity: warning\n - alert: KubeJobNotCompleted\n annotations:\n \ + \ description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking\ + \ more\n than {{ \"43200\" | humanizeDuration }} to complete on cluster\ + \ {{ $labels.cluster\n }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted\n\ + \ summary: Job did not complete in time\n expr: |-\n time() - max\ + \ by (namespace, job_name, cluster) (kube_job_status_start_time{job=\"kube-state-metrics\"\ + , namespace=~\".*\"}\n and\n kube_job_status_active{job=\"kube-state-metrics\"\ + , namespace=~\".*\"} > 0) > 43200\n labels:\n severity: warning\n - alert:\ + \ KubeJobFailed\n annotations:\n description: Job {{ $labels.namespace\ + \ }}/{{ $labels.job_name }} failed to complete.\n Removing failed job after\ + \ investigation should clear this alert on cluster\n {{ $labels.cluster\ + \ }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed\n\ + \ summary: Job failed to complete.\n expr: kube_job_failed{job=\"kube-state-metrics\"\ + , namespace=~\".*\"} > 0\n for: 15m\n labels:\n severity: warning\n\ + \ - alert: KubeHpaReplicasMismatch\n annotations:\n description: HPA\ + \ {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}\n has\ + \ not matched the desired number of replicas for longer than 15 minutes\n \ + \ on cluster {{ $labels.cluster }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch\n\ + \ summary: HPA has not matched desired number of replicas.\n expr: |-\n\ + \ (kube_horizontalpodautoscaler_status_desired_replicas{job=\"kube-state-metrics\"\ + , namespace=~\".*\"}\n !=\n kube_horizontalpodautoscaler_status_current_replicas{job=\"\ + kube-state-metrics\", namespace=~\".*\"})\n and\n (kube_horizontalpodautoscaler_status_current_replicas{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n >\n kube_horizontalpodautoscaler_spec_min_replicas{job=\"\ + kube-state-metrics\", namespace=~\".*\"})\n and\n (kube_horizontalpodautoscaler_status_current_replicas{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n <\n kube_horizontalpodautoscaler_spec_max_replicas{job=\"\ + kube-state-metrics\", namespace=~\".*\"})\n and\n changes(kube_horizontalpodautoscaler_status_current_replicas{job=\"\ + kube-state-metrics\", namespace=~\".*\"}[15m]) == 0\n for: 15m\n labels:\n\ + \ severity: warning\n - alert: KubeHpaMaxedOut\n annotations:\n \ + \ description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler\ + \ }}\n has been running at max replicas for longer than 15 minutes on\ + \ cluster {{\n $labels.cluster }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout\n\ + \ summary: HPA is running at max replicas\n expr: |-\n (\n \ + \ kube_horizontalpodautoscaler_status_current_replicas{job=\"kube-state-metrics\"\ + , namespace=~\".*\"}\n ==\n kube_horizontalpodautoscaler_spec_max_replicas{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n )\n and on (namespace, horizontalpodautoscaler)\ + \ (\n kube_horizontalpodautoscaler_spec_max_replicas{job=\"kube-state-metrics\"\ + , namespace=~\".*\"}\n !=\n kube_horizontalpodautoscaler_spec_min_replicas{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n )\n for: 15m\n labels:\n\ + \ severity: warning\n - alert: KubePdbNotEnoughHealthyPods\n annotations:\n\ + \ description: PDB {{ $labels.cluster }}/{{ $labels.namespace }}/{{ $labels.poddisruptionbudget\n\ + \ }} expects {{ $value }} more healthy pods. The desired number of healthy\ + \ pods\n has not been met for at least 15m.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepdbnotenoughhealthypods\n\ + \ summary: PDB does not have enough healthy pods.\n expr: |-\n (\n\ + \ kube_poddisruptionbudget_status_desired_healthy{job=\"kube-state-metrics\"\ + , namespace=~\".*\"}\n -\n kube_poddisruptionbudget_status_current_healthy{job=\"\ + kube-state-metrics\", namespace=~\".*\"}\n )\n > 0\n for: 15m\n \ + \ labels:\n severity: warning\n" + monitoring-kube-prometheus-stack-kubernetes-resources-b694afc5-821c-4800-a61c-a61d36f5c15f.yaml: "groups:\n\ + - name: kubernetes-resources\n rules:\n - alert: KubeCPUOvercommit\n annotations:\n\ + \ description: Cluster {{ $labels.cluster }} has overcommitted CPU resource\ + \ requests\n for Pods by {{ printf \"%.2f\" $value }} CPU shares and cannot\ + \ tolerate node\n failure.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit\n\ + \ summary: Cluster has overcommitted CPU resource requests.\n expr: |-\n\ + \ # Non-HA clusters.\n (\n (\n sum by (cluster) (namespace_cpu:kube_pod_container_resource_requests:sum{})\n\ + \ -\n sum by (cluster) (kube_node_status_allocatable{job=\"\ + kube-state-metrics\",resource=\"cpu\"}) > 0\n )\n and\n count\ + \ by (cluster) (max by (cluster, node) (kube_node_role{job=\"kube-state-metrics\"\ + , role=\"control-plane\"})) < 3\n )\n or\n # HA clusters.\n \ + \ (\n sum by (cluster) (namespace_cpu:kube_pod_container_resource_requests:sum{})\n\ + \ -\n (\n # Skip clusters with only one allocatable node.\n\ + \ (\n sum by (cluster) (kube_node_status_allocatable{job=\"\ + kube-state-metrics\",resource=\"cpu\"})\n -\n max by (cluster)\ + \ (kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\"})\n\ + \ ) > 0\n ) > 0\n )\n for: 10m\n labels:\n severity:\ + \ warning\n - alert: KubeMemoryOvercommit\n annotations:\n description:\ + \ Cluster {{ $labels.cluster }} has overcommitted memory resource\n requests\ + \ for Pods by {{ $value | humanize }} bytes and cannot tolerate node\n \ + \ failure.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit\n\ + \ summary: Cluster has overcommitted memory resource requests.\n expr:\ + \ |-\n # Non-HA clusters.\n (\n (\n sum by (cluster)\ + \ (namespace_memory:kube_pod_container_resource_requests:sum{})\n -\n\ + \ sum by (cluster) (kube_node_status_allocatable{job=\"kube-state-metrics\"\ + ,resource=\"memory\"}) > 0\n )\n and\n count by (cluster)\ + \ (max by (cluster, node) (kube_node_role{job=\"kube-state-metrics\", role=\"\ + control-plane\"})) < 3\n )\n or\n # HA clusters.\n (\n \ + \ sum by (cluster) (namespace_memory:kube_pod_container_resource_requests:sum{})\n\ + \ -\n (\n # Skip clusters with only one allocatable node.\n\ + \ (\n sum by (cluster) (kube_node_status_allocatable{job=\"\ + kube-state-metrics\",resource=\"memory\"})\n -\n max by\ + \ (cluster) (kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"\ + memory\"})\n ) > 0\n ) > 0\n )\n for: 10m\n labels:\n\ + \ severity: warning\n - alert: KubeCPUQuotaOvercommit\n annotations:\n\ + \ description: Cluster {{ $labels.cluster }} has overcommitted CPU resource\ + \ requests\n for Namespaces.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit\n\ + \ summary: Cluster has overcommitted CPU resource requests.\n expr: |-\n\ + \ sum by (cluster) (\n min without(resource) (kube_resourcequota{job=\"\ + kube-state-metrics\", type=\"hard\", resource=~\"(cpu|requests.cpu)\"})\n \ + \ )\n /\n sum by (cluster) (\n kube_node_status_allocatable{resource=\"\ + cpu\", job=\"kube-state-metrics\"}\n ) > 1.5\n for: 5m\n labels:\n\ + \ severity: warning\n - alert: KubeMemoryQuotaOvercommit\n annotations:\n\ + \ description: Cluster {{ $labels.cluster }} has overcommitted memory resource\n\ + \ requests for Namespaces.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit\n\ + \ summary: Cluster has overcommitted memory resource requests.\n expr:\ + \ |-\n sum by (cluster) (\n min without(resource) (kube_resourcequota{job=\"\ + kube-state-metrics\", type=\"hard\", resource=~\"(memory|requests.memory)\"})\n\ + \ )\n /\n sum by (cluster) (\n kube_node_status_allocatable{resource=\"\ + memory\", job=\"kube-state-metrics\"}\n ) > 1.5\n for: 5m\n labels:\n\ + \ severity: warning\n - alert: KubeQuotaAlmostFull\n annotations:\n \ + \ description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage\n\ + \ }} of its {{ $labels.resource }} quota on cluster {{ $labels.cluster\ + \ }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull\n\ + \ summary: Namespace quota is going to be full.\n expr: |-\n max\ + \ without (instance, job, type) (\n kube_resourcequota{job=\"kube-state-metrics\"\ + , type=\"used\"}\n )\n / on (cluster, namespace, resource, resourcequota)\ + \ group_left()\n (\n max without (instance, job, type) (\n \ + \ kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"}\n ) >\ + \ 0\n )\n > 0.9 < 1\n for: 15m\n labels:\n severity: info\n\ + \ - alert: KubeQuotaFullyUsed\n annotations:\n description: Namespace\ + \ {{ $labels.namespace }} is using {{ $value | humanizePercentage\n }}\ + \ of its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}.\n \ + \ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused\n\ + \ summary: Namespace quota is fully used.\n expr: |-\n max without\ + \ (instance, job, type) (\n kube_resourcequota{job=\"kube-state-metrics\"\ + , type=\"used\"}\n )\n / on (cluster, namespace, resource, resourcequota)\ + \ group_left()\n (\n max without (instance, job, type) (\n \ + \ kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"}\n ) >\ + \ 0\n )\n == 1\n for: 15m\n labels:\n severity: info\n -\ + \ alert: KubeQuotaExceeded\n annotations:\n description: Namespace {{\ + \ $labels.namespace }} is using {{ $value | humanizePercentage\n }} of\ + \ its {{ $labels.resource }} quota on cluster {{ $labels.cluster }}.\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded\n\ + \ summary: Namespace quota has exceeded the limits.\n expr: |-\n \ + \ max without (instance, job, type) (\n kube_resourcequota{job=\"kube-state-metrics\"\ + , type=\"used\"}\n )\n / on (cluster, namespace, resource, resourcequota)\ + \ group_left()\n (\n max without (instance, job, type) (\n \ + \ kube_resourcequota{job=\"kube-state-metrics\", type=\"hard\"}\n ) >\ + \ 0\n ) > 1\n for: 15m\n labels:\n severity: warning\n - alert:\ + \ CPUThrottlingHigh\n annotations:\n description: '{{ $value | humanizePercentage\ + \ }} throttling of CPU in namespace\n {{ $labels.namespace }} for container\ + \ {{ $labels.container }} in pod {{ $labels.pod\n }} on cluster {{ $labels.cluster\ + \ }}.'\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh\n\ + \ summary: Processes experience elevated CPU throttling.\n expr: |-\n\ + \ sum without (id, metrics_path, name, image, endpoint, job, node) (\n \ + \ topk by (cluster, namespace, pod, container, instance) (1,\n increase(\n\ + \ container_cpu_cfs_throttled_periods_total{container!=\"\", job=\"\ + kubelet\", metrics_path=\"/metrics/cadvisor\", }\n [5m])\n )\n\ + \ )\n / on (cluster, namespace, pod, container, instance) group_left\n\ + \ sum without (id, metrics_path, name, image, endpoint, job, node) (\n \ + \ topk by (cluster, namespace, pod, container, instance) (1,\n increase(\n\ + \ container_cpu_cfs_periods_total{job=\"kubelet\", metrics_path=\"\ + /metrics/cadvisor\", }\n [5m])\n )\n )\n > ( 25 / 100\ + \ )\n for: 15m\n labels:\n severity: info\n" + monitoring-kube-prometheus-stack-kubernetes-storage-a21970f1-cefe-4cfc-876a-1833115df2e4.yaml: "groups:\n\ + - name: kubernetes-storage\n rules:\n - alert: KubePersistentVolumeFillingUp\n\ + \ annotations:\n description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim\n\ + \ }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on\ + \ Cluster\n {{ . }} {{- end }} is only {{ $value | humanizePercentage }}\ + \ free.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup\n\ + \ summary: PersistentVolume is filling up.\n expr: |-\n (\n \ + \ kubelet_volume_stats_available_bytes{job=\"kubelet\", namespace=~\".*\", metrics_path=\"\ + /metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\"\ + , namespace=~\".*\", metrics_path=\"/metrics\"}\n ) < 0.03\n and\n \ + \ kubelet_volume_stats_used_bytes{job=\"kubelet\", namespace=~\".*\", metrics_path=\"\ + /metrics\"} > 0\n unless on (cluster, namespace, persistentvolumeclaim)\n\ + \ kube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} ==\ + \ 1\n unless on (cluster, namespace, persistentvolumeclaim)\n kube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"\ + true\"} == 1\n for: 1m\n labels:\n severity: critical\n - alert: KubePersistentVolumeFillingUp\n\ + \ annotations:\n description: Based on recent sampling, the PersistentVolume\ + \ claimed by {{ $labels.persistentvolumeclaim\n }} in Namespace {{ $labels.namespace\ + \ }} {{ with $labels.cluster -}} on Cluster\n {{ . }} {{- end }} is expected\ + \ to fill up within four days. Currently {{ $value\n | humanizePercentage\ + \ }} is available.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup\n\ + \ summary: PersistentVolume is filling up.\n expr: |-\n (\n \ + \ kubelet_volume_stats_available_bytes{job=\"kubelet\", namespace=~\".*\", metrics_path=\"\ + /metrics\"}\n /\n kubelet_volume_stats_capacity_bytes{job=\"kubelet\"\ + , namespace=~\".*\", metrics_path=\"/metrics\"}\n ) < 0.15\n and\n \ + \ kubelet_volume_stats_used_bytes{job=\"kubelet\", namespace=~\".*\", metrics_path=\"\ + /metrics\"} > 0\n and\n predict_linear(kubelet_volume_stats_available_bytes{job=\"\ + kubelet\", namespace=~\".*\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) <\ + \ 0\n unless on (cluster, namespace, persistentvolumeclaim)\n kube_persistentvolumeclaim_access_mode{\ + \ access_mode=\"ReadOnlyMany\"} == 1\n unless on (cluster, namespace, persistentvolumeclaim)\n\ + \ kube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"}\ + \ == 1\n for: 1h\n labels:\n severity: warning\n - alert: KubePersistentVolumeInodesFillingUp\n\ + \ annotations:\n description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim\n\ + \ }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on\ + \ Cluster\n {{ . }} {{- end }} only has {{ $value | humanizePercentage\ + \ }} free inodes.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup\n\ + \ summary: PersistentVolumeInodes are filling up.\n expr: |-\n (\n\ + \ kubelet_volume_stats_inodes_free{job=\"kubelet\", namespace=~\".*\",\ + \ metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_inodes{job=\"\ + kubelet\", namespace=~\".*\", metrics_path=\"/metrics\"}\n ) < 0.03\n \ + \ and\n kubelet_volume_stats_inodes_used{job=\"kubelet\", namespace=~\"\ + .*\", metrics_path=\"/metrics\"} > 0\n unless on (cluster, namespace, persistentvolumeclaim)\n\ + \ kube_persistentvolumeclaim_access_mode{ access_mode=\"ReadOnlyMany\"} ==\ + \ 1\n unless on (cluster, namespace, persistentvolumeclaim)\n kube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"\ + true\"} == 1\n for: 1m\n labels:\n severity: critical\n - alert: KubePersistentVolumeInodesFillingUp\n\ + \ annotations:\n description: Based on recent sampling, the PersistentVolume\ + \ claimed by {{ $labels.persistentvolumeclaim\n }} in Namespace {{ $labels.namespace\ + \ }} {{ with $labels.cluster -}} on Cluster\n {{ . }} {{- end }} is expected\ + \ to run out of inodes within four days. Currently\n {{ $value | humanizePercentage\ + \ }} of its inodes are free.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup\n\ + \ summary: PersistentVolumeInodes are filling up.\n expr: |-\n (\n\ + \ kubelet_volume_stats_inodes_free{job=\"kubelet\", namespace=~\".*\",\ + \ metrics_path=\"/metrics\"}\n /\n kubelet_volume_stats_inodes{job=\"\ + kubelet\", namespace=~\".*\", metrics_path=\"/metrics\"}\n ) < 0.15\n \ + \ and\n kubelet_volume_stats_inodes_used{job=\"kubelet\", namespace=~\"\ + .*\", metrics_path=\"/metrics\"} > 0\n and\n predict_linear(kubelet_volume_stats_inodes_free{job=\"\ + kubelet\", namespace=~\".*\", metrics_path=\"/metrics\"}[6h], 4 * 24 * 3600) <\ + \ 0\n unless on (cluster, namespace, persistentvolumeclaim)\n kube_persistentvolumeclaim_access_mode{\ + \ access_mode=\"ReadOnlyMany\"} == 1\n unless on (cluster, namespace, persistentvolumeclaim)\n\ + \ kube_persistentvolumeclaim_labels{label_excluded_from_alerts=\"true\"}\ + \ == 1\n for: 1h\n labels:\n severity: warning\n - alert: KubePersistentVolumeErrors\n\ + \ annotations:\n description: The persistent volume {{ $labels.persistentvolume\ + \ }} {{ with $labels.cluster\n -}} on Cluster {{ . }} {{- end }} has status\ + \ {{ $labels.phase }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeerrors\n\ + \ summary: PersistentVolume is having issues with provisioning.\n expr:\ + \ kube_persistentvolume_status_phase{phase=~\"Failed|Pending\",job=\"kube-state-metrics\"\ + }\n > 0\n for: 5m\n labels:\n severity: critical\n" + monitoring-kube-prometheus-stack-kubernetes-system-26e1e614-9a40-44cd-8622-cff2d1258a88.yaml: "groups:\n\ + - name: kubernetes-system\n rules:\n - alert: KubeVersionMismatch\n annotations:\n\ + \ description: There are {{ $value }} different semantic versions of Kubernetes\n\ + \ components running on cluster {{ $labels.cluster }}.\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch\n\ + \ summary: Different semantic versions of Kubernetes components running.\n\ + \ expr: count by (cluster) (count by (git_version, cluster) (label_replace(kubernetes_build_info{job!~\"\ + kube-dns|coredns\"},\"git_version\",\"$1\",\"git_version\",\"(v[0-9]*.[0-9]*).*\"\ + )))\n > 1\n for: 15m\n labels:\n severity: warning\n - alert:\ + \ KubeClientErrors\n annotations:\n description: Kubernetes API server\ + \ client '{{ $labels.job }}/{{ $labels.instance\n }}' is experiencing {{\ + \ $value | humanizePercentage }} errors on cluster {{\n $labels.cluster\ + \ }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors\n\ + \ summary: Kubernetes API server client is experiencing errors.\n expr:\ + \ |-\n (sum(rate(rest_client_requests_total{job=\"apiserver\",code=~\"5..\"\ + }[5m])) by (cluster, instance, job, namespace)\n /\n sum(rate(rest_client_requests_total{job=\"\ + apiserver\"}[5m])) by (cluster, instance, job, namespace))\n > 0.01\n \ + \ for: 15m\n labels:\n severity: warning\n" + monitoring-kube-prometheus-stack-kubernetes-system-apiserver-f9528ec5-b467-4c90-829a-f4603a9bc7d5.yaml: "groups:\n\ + - name: kubernetes-system-apiserver\n rules:\n - alert: KubeClientCertificateExpiration\n\ + \ annotations:\n description: A client certificate used to authenticate\ + \ to kubernetes apiserver\n is expiring in less than 7.0 days on cluster\ + \ {{ $labels.cluster }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration\n\ + \ summary: Client certificate is about to expire.\n expr: |-\n histogram_quantile(0.01,\ + \ sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"\ + apiserver\"}[5m]))) < 604800\n and\n on (job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job=\"\ + apiserver\"} > 0\n for: 5m\n labels:\n severity: warning\n - alert:\ + \ KubeClientCertificateExpiration\n annotations:\n description: A client\ + \ certificate used to authenticate to kubernetes apiserver\n is expiring\ + \ in less than 24.0 hours on cluster {{ $labels.cluster }}.\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration\n\ + \ summary: Client certificate is about to expire.\n expr: |-\n histogram_quantile(0.01,\ + \ sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job=\"\ + apiserver\"}[5m]))) < 86400\n and\n on (job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job=\"\ + apiserver\"} > 0\n for: 5m\n labels:\n severity: critical\n - alert:\ + \ KubeAggregatedAPIErrors\n annotations:\n description: Kubernetes aggregated\ + \ API {{ $labels.instance }}/{{ $labels.name\n }} has reported {{ $labels.reason\ + \ }} errors on cluster {{ $labels.cluster\n }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors\n\ + \ summary: Kubernetes aggregated API has reported errors.\n expr: sum\ + \ by (cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job=\"\ + apiserver\"}[1m]))\n > 0\n for: 10m\n labels:\n severity: warning\n\ + \ - alert: KubeAggregatedAPIDown\n annotations:\n description: Kubernetes\ + \ aggregated API {{ $labels.name }}/{{ $labels.namespace\n }} has been\ + \ only {{ $value | humanize }}% available over the last 10m on cluster\n \ + \ {{ $labels.cluster }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown\n\ + \ summary: Kubernetes aggregated API is down.\n expr: (1 - max by (name,\ + \ namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice{job=\"apiserver\"\ + }[10m])))\n * 100 < 85\n for: 5m\n labels:\n severity: warning\n\ + \ - alert: KubeAPIDown\n annotations:\n description: KubeAPI has disappeared\ + \ from Prometheus target discovery.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapidown\n\ + \ summary: Target disappeared from Prometheus target discovery.\n expr:\ + \ absent(up{job=\"apiserver\"})\n for: 15m\n labels:\n severity: critical\n\ + \ - alert: KubeAPITerminatedRequests\n annotations:\n description: The\ + \ kubernetes apiserver has terminated {{ $value | humanizePercentage\n \ + \ }} of its incoming requests on cluster {{ $labels.cluster }}.\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests\n\ + \ summary: The kubernetes apiserver has terminated {{ $value | humanizePercentage\n\ + \ }} of its incoming requests.\n expr: sum by (cluster) (rate(apiserver_request_terminations_total{job=\"\ + apiserver\"}[10m]))\n / ( sum by (cluster) (rate(apiserver_request_total{job=\"\ + apiserver\"}[10m])) +\n sum by (cluster) (rate(apiserver_request_terminations_total{job=\"\ + apiserver\"}[10m]))\n ) > 0.20\n for: 5m\n labels:\n severity:\ + \ warning\n" + monitoring-kube-prometheus-stack-kubernetes-system-controller-manager-965b603b-05c3-4e36-9b70-30adcbb5400d.yaml: "groups:\n\ + - name: kubernetes-system-controller-manager\n rules:\n - alert: KubeControllerManagerDown\n\ + \ annotations:\n description: KubeControllerManager has disappeared from\ + \ Prometheus target discovery.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontrollermanagerdown\n\ + \ summary: Target disappeared from Prometheus target discovery.\n expr:\ + \ absent(up{job=\"kube-controller-manager\"})\n for: 15m\n labels:\n \ + \ severity: critical\n" + monitoring-kube-prometheus-stack-kubernetes-system-kube-proxy-b946dd15-5f3f-490e-a788-25134043fbfb.yaml: "groups:\n\ + - name: kubernetes-system-kube-proxy\n rules:\n - alert: KubeProxyDown\n \ + \ annotations:\n description: KubeProxy has disappeared from Prometheus target\ + \ discovery.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeproxydown\n\ + \ summary: Target disappeared from Prometheus target discovery.\n expr:\ + \ absent(up{job=\"kube-proxy\"})\n for: 15m\n labels:\n severity: critical\n" + monitoring-kube-prometheus-stack-kubernetes-system-kubelet-3f5c198a-b883-4aa8-8f72-3001b24a1138.yaml: "groups:\n\ + - name: kubernetes-system-kubelet\n rules:\n - alert: KubeNodeNotReady\n \ + \ annotations:\n description: '{{ $labels.node }} has been unready for more\ + \ than 15 minutes on\n cluster {{ $labels.cluster }}.'\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodenotready\n\ + \ summary: Node is not ready.\n expr: |-\n kube_node_status_condition{job=\"\ + kube-state-metrics\",condition=\"Ready\",status=\"true\"} == 0\n and on (cluster,\ + \ node)\n kube_node_spec_unschedulable{job=\"kube-state-metrics\"} == 0\n\ + \ for: 15m\n labels:\n severity: warning\n - alert: KubeNodePressure\n\ + \ annotations:\n description: '{{ $labels.node }} on cluster {{ $labels.cluster\ + \ }} has active\n Condition {{ $labels.condition }}. This is caused by\ + \ resource usage exceeding\n eviction thresholds.'\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodepressure\n\ + \ summary: Node has as active Condition.\n expr: |-\n kube_node_status_condition{job=\"\ + kube-state-metrics\",condition=~\"(MemoryPressure|DiskPressure|PIDPressure)\"\ + ,status=\"true\"} == 1\n and on (cluster, node)\n kube_node_spec_unschedulable{job=\"\ + kube-state-metrics\"} == 0\n for: 10m\n labels:\n severity: info\n\ + \ - alert: KubeNodeUnreachable\n annotations:\n description: '{{ $labels.node\ + \ }} is unreachable and some workloads may be rescheduled\n on cluster\ + \ {{ $labels.cluster }}.'\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodeunreachable\n\ + \ summary: Node is unreachable.\n expr: (kube_node_spec_taint{job=\"kube-state-metrics\"\ + ,key=\"node.kubernetes.io/unreachable\",effect=\"NoSchedule\"}\n unless ignoring(key,value)\ + \ kube_node_spec_taint{job=\"kube-state-metrics\",key=~\"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn\"\ + })\n == 1\n for: 15m\n labels:\n severity: warning\n - alert:\ + \ KubeletTooManyPods\n annotations:\n description: Kubelet '{{ $labels.node\ + \ }}' is running at {{ $value | humanizePercentage\n }} of its Pod capacity\ + \ on cluster {{ $labels.cluster }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods\n\ + \ summary: Kubelet is running at capacity.\n expr: |-\n (\n \ + \ max by (cluster, instance) (\n kubelet_running_pods{job=\"kubelet\"\ + , metrics_path=\"/metrics\"} > 1\n )\n * on (cluster, instance)\ + \ group_left(node)\n max by (cluster, instance, node) (\n kubelet_node_name{job=\"\ + kubelet\", metrics_path=\"/metrics\"}\n )\n )\n / on (cluster,\ + \ node) group_left()\n max by (cluster, node) (\n kube_node_status_capacity{job=\"\ + kube-state-metrics\", resource=\"pods\"} != 1\n ) > 0.95\n for: 15m\n\ + \ labels:\n severity: info\n - alert: KubeNodeReadinessFlapping\n \ + \ annotations:\n description: The readiness status of node {{ $labels.node\ + \ }} has changed {{\n $value }} times in the last 15 minutes on cluster\ + \ {{ $labels.cluster }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping\n\ + \ summary: Node readiness status is flapping.\n expr: |-\n sum(changes(kube_node_status_condition{job=\"\ + kube-state-metrics\",status=\"true\",condition=\"Ready\"}[15m])) by (cluster,\ + \ node) > 2\n and on (cluster, node)\n kube_node_spec_unschedulable{job=\"\ + kube-state-metrics\"} == 0\n for: 15m\n labels:\n severity: warning\n\ + \ - alert: KubeNodeEviction\n annotations:\n description: Node {{ $labels.node\ + \ }} on {{ $labels.cluster }} is evicting Pods\n due to {{ $labels.eviction_signal\ + \ }}. Eviction occurs when eviction thresholds\n are crossed, typically\ + \ caused by Pods exceeding RAM/ephemeral-storage limits.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodeeviction\n\ + \ summary: Node is evicting pods.\n expr: |-\n sum(rate(kubelet_evictions{job=\"\ + kubelet\", metrics_path=\"/metrics\"}[15m])) by (cluster, eviction_signal, instance)\n\ + \ * on (cluster, instance) group_left(node)\n max by (cluster, instance,\ + \ node) (\n kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"\ + }\n )\n > 0\n for: 0s\n labels:\n severity: info\n - alert:\ + \ KubeletPlegDurationHigh\n annotations:\n description: The Kubelet Pod\ + \ Lifecycle Event Generator has a 99th percentile\n duration of {{ $value\ + \ }} seconds on node {{ $labels.node }} on cluster {{\n $labels.cluster\ + \ }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletplegdurationhigh\n\ + \ summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist.\n\ + \ expr: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile=\"\ + 0.99\"}\n >= 10\n for: 5m\n labels:\n severity: warning\n - alert:\ + \ KubeletPodStartUpLatencyHigh\n annotations:\n description: Kubelet Pod\ + \ startup 99th percentile latency is {{ $value }} seconds\n on node {{\ + \ $labels.node }} on cluster {{ $labels.cluster }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh\n\ + \ summary: Kubelet Pod startup latency is too high.\n expr: |-\n \ + \ histogram_quantile(0.99,\n sum by (cluster, instance, le) (\n \ + \ topk by (cluster, instance, le, operation_type) (1,\n rate(kubelet_pod_worker_duration_seconds_bucket{job=\"\ + kubelet\", metrics_path=\"/metrics\"}[5m])\n )\n )\n )\n\ + \ * on (cluster, instance) group_left(node)\n topk by (cluster, instance,\ + \ node) (1,\n kubelet_node_name{job=\"kubelet\", metrics_path=\"/metrics\"\ + }\n )\n > 60\n for: 15m\n labels:\n severity: warning\n \ + \ - alert: KubeletClientCertificateExpiration\n annotations:\n description:\ + \ Client certificate for Kubelet on node {{ $labels.node }} expires\n in\ + \ {{ $value | humanizeDuration }} on cluster {{ $labels.cluster }}.\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration\n\ + \ summary: Kubelet client certificate is about to expire.\n expr: kubelet_certificate_manager_client_ttl_seconds\ + \ < 604800\n labels:\n severity: warning\n - alert: KubeletClientCertificateExpiration\n\ + \ annotations:\n description: Client certificate for Kubelet on node {{\ + \ $labels.node }} expires\n in {{ $value | humanizeDuration }} on cluster\ + \ {{ $labels.cluster }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration\n\ + \ summary: Kubelet client certificate is about to expire.\n expr: kubelet_certificate_manager_client_ttl_seconds\ + \ < 86400\n labels:\n severity: critical\n - alert: KubeletServerCertificateExpiration\n\ + \ annotations:\n description: Server certificate for Kubelet on node {{\ + \ $labels.node }} expires\n in {{ $value | humanizeDuration }} on cluster\ + \ {{ $labels.cluster }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration\n\ + \ summary: Kubelet server certificate is about to expire.\n expr: kubelet_certificate_manager_server_ttl_seconds\ + \ < 604800\n labels:\n severity: warning\n - alert: KubeletServerCertificateExpiration\n\ + \ annotations:\n description: Server certificate for Kubelet on node {{\ + \ $labels.node }} expires\n in {{ $value | humanizeDuration }} on cluster\ + \ {{ $labels.cluster }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration\n\ + \ summary: Kubelet server certificate is about to expire.\n expr: kubelet_certificate_manager_server_ttl_seconds\ + \ < 86400\n labels:\n severity: critical\n - alert: KubeletClientCertificateRenewalErrors\n\ + \ annotations:\n description: Kubelet on node {{ $labels.node }} has failed\ + \ to renew its client\n certificate ({{ $value | humanize }} errors in\ + \ the last 5 minutes) on cluster\n {{ $labels.cluster }}.\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificaterenewalerrors\n\ + \ summary: Kubelet has failed to renew its client certificate.\n expr:\ + \ increase(kubelet_certificate_manager_client_expiration_renew_errors[5m])\n \ + \ > 0\n for: 15m\n labels:\n severity: warning\n - alert: KubeletServerCertificateRenewalErrors\n\ + \ annotations:\n description: Kubelet on node {{ $labels.node }} has failed\ + \ to renew its server\n certificate ({{ $value | humanize }} errors in\ + \ the last 5 minutes) on cluster\n {{ $labels.cluster }}.\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificaterenewalerrors\n\ + \ summary: Kubelet has failed to renew its server certificate.\n expr:\ + \ increase(kubelet_server_expiration_renew_errors[5m]) > 0\n for: 15m\n \ + \ labels:\n severity: warning\n - alert: KubeletDown\n annotations:\n\ + \ description: Kubelet has disappeared from Prometheus target discovery on\ + \ cluster\n {{ $labels.cluster }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletdown\n\ + \ summary: Target disappeared from Prometheus target discovery.\n expr:\ + \ |-\n count by (cluster) (kube_node_info{job=\"kube-state-metrics\"})\n\ + \ unless on (cluster)\n count by (cluster) (up{job=\"kubelet\", metrics_path=\"\ + /metrics\"} == 1)\n for: 15m\n labels:\n severity: critical\n" + monitoring-kube-prometheus-stack-kubernetes-system-scheduler-d7bc55b3-9301-4c17-81d7-76c4590104da.yaml: "groups:\n\ + - name: kubernetes-system-scheduler\n rules:\n - alert: KubeSchedulerDown\n\ + \ annotations:\n description: KubeScheduler has disappeared from Prometheus\ + \ target discovery.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeschedulerdown\n\ + \ summary: Target disappeared from Prometheus target discovery.\n expr:\ + \ absent(up{job=\"kube-scheduler\"})\n for: 15m\n labels:\n severity:\ + \ critical\n" + monitoring-kube-prometheus-stack-node-exporter-bb0e2fd6-3e20-4883-9c47-3d8d2acb1ac3.yaml: "groups:\n\ + - name: node-exporter\n rules:\n - alert: NodeFilesystemSpaceFillingUp\n \ + \ annotations:\n description: Filesystem on {{ $labels.device }}, mounted\ + \ on {{ $labels.mountpoint\n }}, at {{ $labels.instance }} has only {{\ + \ printf \"%.2f\" $value }}% available\n space left and is filling up.\n\ + \ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup\n\ + \ summary: Filesystem is predicted to run out of space within the next 24\ + \ hours.\n expr: |-\n (\n node_filesystem_avail_bytes{job=\"node-exporter\"\ + ,fstype!=\"\",mountpoint!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\"\ + ,fstype!=\"\",mountpoint!=\"\"} * 100 < 15\n and\n predict_linear(node_filesystem_avail_bytes{job=\"\ + node-exporter\",fstype!=\"\",mountpoint!=\"\"}[6h], 24*60*60) < 0\n and\n\ + \ node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\",mountpoint!=\"\ + \"} == 0\n )\n for: 1h\n labels:\n severity: warning\n - alert:\ + \ NodeFilesystemSpaceFillingUp\n annotations:\n description: Filesystem\ + \ on {{ $labels.device }}, mounted on {{ $labels.mountpoint\n }}, at {{\ + \ $labels.instance }} has only {{ printf \"%.2f\" $value }}% available\n \ + \ space left and is filling up fast.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup\n\ + \ summary: Filesystem is predicted to run out of space within the next 4\ + \ hours.\n expr: |-\n (\n node_filesystem_avail_bytes{job=\"node-exporter\"\ + ,fstype!=\"\",mountpoint!=\"\"} / node_filesystem_size_bytes{job=\"node-exporter\"\ + ,fstype!=\"\",mountpoint!=\"\"} * 100 < 10\n and\n predict_linear(node_filesystem_avail_bytes{job=\"\ + node-exporter\",fstype!=\"\",mountpoint!=\"\"}[6h], 4*60*60) < 0\n and\n\ + \ node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\",mountpoint!=\"\ + \"} == 0\n )\n for: 1h\n labels:\n severity: critical\n - alert:\ + \ NodeFilesystemAlmostOutOfSpace\n annotations:\n description: Filesystem\ + \ on {{ $labels.device }}, mounted on {{ $labels.mountpoint\n }}, at {{\ + \ $labels.instance }} has only {{ printf \"%.2f\" $value }}% available\n \ + \ space left.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace\n\ + \ summary: Filesystem has less than 5% space left.\n expr: |-\n (\n\ + \ node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\",mountpoint!=\"\ + \"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\",mountpoint!=\"\ + \"} * 100 < 5\n and\n node_filesystem_readonly{job=\"node-exporter\"\ + ,fstype!=\"\",mountpoint!=\"\"} == 0\n )\n for: 30m\n labels:\n \ + \ severity: warning\n - alert: NodeFilesystemAlmostOutOfSpace\n annotations:\n\ + \ description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint\n\ + \ }}, at {{ $labels.instance }} has only {{ printf \"%.2f\" $value }}%\ + \ available\n space left.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace\n\ + \ summary: Filesystem has less than 3% space left.\n expr: |-\n (\n\ + \ node_filesystem_avail_bytes{job=\"node-exporter\",fstype!=\"\",mountpoint!=\"\ + \"} / node_filesystem_size_bytes{job=\"node-exporter\",fstype!=\"\",mountpoint!=\"\ + \"} * 100 < 3\n and\n node_filesystem_readonly{job=\"node-exporter\"\ + ,fstype!=\"\",mountpoint!=\"\"} == 0\n )\n for: 30m\n labels:\n \ + \ severity: critical\n - alert: NodeFilesystemFilesFillingUp\n annotations:\n\ + \ description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint\n\ + \ }}, at {{ $labels.instance }} has only {{ printf \"%.2f\" $value }}%\ + \ available\n inodes left and is filling up.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup\n\ + \ summary: Filesystem is predicted to run out of inodes within the next 24\ + \ hours.\n expr: |-\n (\n node_filesystem_files_free{job=\"node-exporter\"\ + ,fstype!=\"\",mountpoint!=\"\"} / node_filesystem_files{job=\"node-exporter\"\ + ,fstype!=\"\",mountpoint!=\"\"} * 100 < 40\n and\n predict_linear(node_filesystem_files_free{job=\"\ + node-exporter\",fstype!=\"\",mountpoint!=\"\"}[6h], 24*60*60) < 0\n and\n\ + \ node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\",mountpoint!=\"\ + \"} == 0\n )\n for: 1h\n labels:\n severity: warning\n - alert:\ + \ NodeFilesystemFilesFillingUp\n annotations:\n description: Filesystem\ + \ on {{ $labels.device }}, mounted on {{ $labels.mountpoint\n }}, at {{\ + \ $labels.instance }} has only {{ printf \"%.2f\" $value }}% available\n \ + \ inodes left and is filling up fast.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup\n\ + \ summary: Filesystem is predicted to run out of inodes within the next 4\ + \ hours.\n expr: |-\n (\n node_filesystem_files_free{job=\"node-exporter\"\ + ,fstype!=\"\",mountpoint!=\"\"} / node_filesystem_files{job=\"node-exporter\"\ + ,fstype!=\"\",mountpoint!=\"\"} * 100 < 20\n and\n predict_linear(node_filesystem_files_free{job=\"\ + node-exporter\",fstype!=\"\",mountpoint!=\"\"}[6h], 4*60*60) < 0\n and\n\ + \ node_filesystem_readonly{job=\"node-exporter\",fstype!=\"\",mountpoint!=\"\ + \"} == 0\n )\n for: 1h\n labels:\n severity: critical\n - alert:\ + \ NodeFilesystemAlmostOutOfFiles\n annotations:\n description: Filesystem\ + \ on {{ $labels.device }}, mounted on {{ $labels.mountpoint\n }}, at {{\ + \ $labels.instance }} has only {{ printf \"%.2f\" $value }}% available\n \ + \ inodes left.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles\n\ + \ summary: Filesystem has less than 5% inodes left.\n expr: |-\n \ + \ (\n node_filesystem_files_free{job=\"node-exporter\",fstype!=\"\",mountpoint!=\"\ + \"} / node_filesystem_files{job=\"node-exporter\",fstype!=\"\",mountpoint!=\"\"\ + } * 100 < 5\n and\n node_filesystem_readonly{job=\"node-exporter\"\ + ,fstype!=\"\",mountpoint!=\"\"} == 0\n )\n for: 1h\n labels:\n \ + \ severity: warning\n - alert: NodeFilesystemAlmostOutOfFiles\n annotations:\n\ + \ description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint\n\ + \ }}, at {{ $labels.instance }} has only {{ printf \"%.2f\" $value }}%\ + \ available\n inodes left.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles\n\ + \ summary: Filesystem has less than 3% inodes left.\n expr: |-\n \ + \ (\n node_filesystem_files_free{job=\"node-exporter\",fstype!=\"\",mountpoint!=\"\ + \"} / node_filesystem_files{job=\"node-exporter\",fstype!=\"\",mountpoint!=\"\"\ + } * 100 < 3\n and\n node_filesystem_readonly{job=\"node-exporter\"\ + ,fstype!=\"\",mountpoint!=\"\"} == 0\n )\n for: 1h\n labels:\n \ + \ severity: critical\n - alert: NodeNetworkReceiveErrs\n annotations:\n \ + \ description: '{{ $labels.instance }} interface {{ $labels.device }} has\ + \ encountered\n {{ printf \"%.0f\" $value }} receive errors in the last\ + \ two minutes.'\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs\n\ + \ summary: Network interface is reporting many receive errors.\n expr:\ + \ rate(node_network_receive_errs_total{job=\"node-exporter\"}[2m]) / rate(node_network_receive_packets_total{job=\"\ + node-exporter\"}[2m])\n > 0.01\n for: 1h\n labels:\n severity:\ + \ warning\n - alert: NodeNetworkTransmitErrs\n annotations:\n description:\ + \ '{{ $labels.instance }} interface {{ $labels.device }} has encountered\n \ + \ {{ printf \"%.0f\" $value }} transmit errors in the last two minutes.'\n\ + \ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs\n\ + \ summary: Network interface is reporting many transmit errors.\n expr:\ + \ rate(node_network_transmit_errs_total{job=\"node-exporter\"}[2m]) / rate(node_network_transmit_packets_total{job=\"\ + node-exporter\"}[2m])\n > 0.01\n for: 1h\n labels:\n severity:\ + \ warning\n - alert: NodeHighNumberConntrackEntriesUsed\n annotations:\n \ + \ description: '{{ $labels.instance }} {{ $value | humanizePercentage }} of\ + \ conntrack\n entries are used.'\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused\n\ + \ summary: Number of conntrack are getting close to the limit.\n expr:\ + \ (node_nf_conntrack_entries{job=\"node-exporter\"} / node_nf_conntrack_entries_limit)\n\ + \ > 0.75\n labels:\n severity: warning\n - alert: NodeTextFileCollectorScrapeError\n\ + \ annotations:\n description: Node Exporter text file collector on {{\ + \ $labels.instance }} failed\n to scrape.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror\n\ + \ summary: Node Exporter text file collector failed to scrape.\n expr:\ + \ node_textfile_scrape_error{job=\"node-exporter\"} == 1\n labels:\n severity:\ + \ warning\n - alert: NodeClockSkewDetected\n annotations:\n description:\ + \ Clock at {{ $labels.instance }} is out of sync by more than 0.05s.\n \ + \ Ensure NTP is configured correctly on this host.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected\n\ + \ summary: Clock skew detected.\n expr: |-\n (\n node_timex_offset_seconds{job=\"\ + node-exporter\"} > 0.05\n and\n deriv(node_timex_offset_seconds{job=\"\ + node-exporter\"}[5m]) >= 0\n )\n or\n (\n node_timex_offset_seconds{job=\"\ + node-exporter\"} < -0.05\n and\n deriv(node_timex_offset_seconds{job=\"\ + node-exporter\"}[5m]) <= 0\n )\n for: 10m\n labels:\n severity:\ + \ warning\n - alert: NodeClockNotSynchronising\n annotations:\n description:\ + \ Clock at {{ $labels.instance }} is not synchronising. Ensure NTP\n is\ + \ configured on this host.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising\n\ + \ summary: Clock not synchronising.\n expr: |-\n min_over_time(node_timex_sync_status{job=\"\ + node-exporter\"}[5m]) == 0\n and\n node_timex_maxerror_seconds{job=\"\ + node-exporter\"} >= 16\n for: 10m\n labels:\n severity: warning\n \ + \ - alert: NodeRAIDDegraded\n annotations:\n description: RAID array '{{\ + \ $labels.device }}' at {{ $labels.instance }} is\n in degraded state due\ + \ to one or more disks failures. Number of spare drives\n is insufficient\ + \ to fix issue automatically.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded\n\ + \ summary: RAID Array is degraded.\n expr: node_md_disks_required{job=\"\ + node-exporter\",device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"\ + }\n - ignoring (state) (node_md_disks{state=\"active\",job=\"node-exporter\"\ + ,device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"\ + })\n > 0\n for: 15m\n labels:\n severity: critical\n - alert:\ + \ NodeRAIDDiskFailure\n annotations:\n description: At least one device\ + \ in RAID array at {{ $labels.instance }} failed.\n Array '{{ $labels.device\ + \ }}' needs attention and possibly a disk swap.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure\n\ + \ summary: Failed device in RAID array.\n expr: node_md_disks{state=\"\ + failed\",job=\"node-exporter\",device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"\ + }\n > 0\n labels:\n severity: warning\n - alert: NodeFileDescriptorLimit\n\ + \ annotations:\n description: File descriptors limit at {{ $labels.instance\ + \ }} is currently at\n {{ printf \"%.2f\" $value }}%.\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit\n\ + \ summary: Kernel is predicted to exhaust file descriptors limit soon.\n\ + \ expr: |-\n (\n node_filefd_allocated{job=\"node-exporter\"} *\ + \ 100 / node_filefd_maximum{job=\"node-exporter\"} > 70\n )\n for: 15m\n\ + \ labels:\n severity: warning\n - alert: NodeFileDescriptorLimit\n \ + \ annotations:\n description: File descriptors limit at {{ $labels.instance\ + \ }} is currently at\n {{ printf \"%.2f\" $value }}%.\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit\n\ + \ summary: Kernel is predicted to exhaust file descriptors limit soon.\n\ + \ expr: |-\n (\n node_filefd_allocated{job=\"node-exporter\"} *\ + \ 100 / node_filefd_maximum{job=\"node-exporter\"} > 90\n )\n for: 15m\n\ + \ labels:\n severity: critical\n - alert: NodeCPUHighUsage\n annotations:\n\ + \ description: |\n CPU usage at {{ $labels.instance }} has been above\ + \ 90% for the last 15 minutes, is currently at {{ printf \"%.2f\" $value }}%.\n\ + \ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage\n\ + \ summary: High CPU usage.\n expr: sum without(mode) (avg without (cpu)\ + \ (rate(node_cpu_seconds_total{job=\"node-exporter\",\n mode!~\"idle|iowait\"\ + }[2m]))) * 100 > 90\n for: 15m\n labels:\n severity: info\n - alert:\ + \ NodeSystemSaturation\n annotations:\n description: |\n System\ + \ load per core at {{ $labels.instance }} has been above 2 for the last 15 minutes,\ + \ is currently at {{ printf \"%.2f\" $value }}.\n This might indicate this\ + \ instance resources saturation and can cause it becoming unresponsive.\n \ + \ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemsaturation\n\ + \ summary: System saturated, load per core is very high.\n expr: |-\n\ + \ node_load1{job=\"node-exporter\"}\n / count without (cpu, mode) (node_cpu_seconds_total{job=\"\ + node-exporter\", mode=\"idle\"}) > 2\n for: 15m\n labels:\n severity:\ + \ warning\n - alert: NodeMemoryMajorPagesFaults\n annotations:\n description:\ + \ |\n Memory major pages are occurring at very high rate at {{ $labels.instance\ + \ }}, 500 major page faults per second for the last 15 minutes, is currently at\ + \ {{ printf \"%.2f\" $value }}.\n Please check that there is enough memory\ + \ available at this instance.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememorymajorpagesfaults\n\ + \ summary: Memory major page faults are occurring at very high rate.\n \ + \ expr: rate(node_vmstat_pgmajfault{job=\"node-exporter\"}[5m]) > 500\n for:\ + \ 15m\n labels:\n severity: warning\n - alert: NodeMemoryHighUtilization\n\ + \ annotations:\n description: |\n Memory is filling up at {{ $labels.instance\ + \ }}, has been above 90% for the last 15 minutes, is currently at {{ printf \"\ + %.2f\" $value }}%.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememoryhighutilization\n\ + \ summary: Host is running out of memory.\n expr: 100 - (node_memory_MemAvailable_bytes{job=\"\ + node-exporter\"} / node_memory_MemTotal_bytes{job=\"node-exporter\"}\n *\ + \ 100) > 90\n for: 15m\n labels:\n severity: warning\n - alert: NodeDiskIOSaturation\n\ + \ annotations:\n description: |\n Disk IO queue (aqu-sq) is high\ + \ on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the\ + \ last 30 minutes, is currently at {{ printf \"%.2f\" $value }}.\n This\ + \ symptom might indicate disk saturation.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodediskiosaturation\n\ + \ summary: Disk IO queue is high.\n expr: rate(node_disk_io_time_weighted_seconds_total{job=\"\ + node-exporter\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"\ + }[5m])\n > 10\n for: 30m\n labels:\n severity: warning\n - alert:\ + \ NodeSystemdServiceFailed\n annotations:\n description: Systemd service\ + \ {{ $labels.name }} has entered failed state at\n {{ $labels.instance\ + \ }}\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemdservicefailed\n\ + \ summary: Systemd service has entered failed state.\n expr: node_systemd_unit_state{job=\"\ + node-exporter\", state=\"failed\"} == 1\n for: 5m\n labels:\n severity:\ + \ warning\n - alert: NodeSystemdServiceCrashlooping\n annotations:\n \ + \ description: Systemd service {{ $labels.name }} has being restarted too many\n\ + \ times at {{ $labels.instance }} for the last 15 minutes. Please check\ + \ if service\n is crash looping.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemdservicecrashlooping\n\ + \ summary: Systemd service keeps restaring, possibly crash looping.\n \ + \ expr: increase(node_systemd_service_restart_total{job=\"node-exporter\"}[5m])\ + \ >\n 2\n for: 15m\n labels:\n severity: warning\n - alert: NodeBondingDegraded\n\ + \ annotations:\n description: Bonding interface {{ $labels.master }} on\ + \ {{ $labels.instance }}\n is in degraded state due to one or more slave\ + \ failures.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodebondingdegraded\n\ + \ summary: Bonding interface is degraded.\n expr: (node_bonding_slaves{job=\"\ + node-exporter\"} - node_bonding_active{job=\"node-exporter\"})\n != 0\n \ + \ for: 5m\n labels:\n severity: warning\n" + monitoring-kube-prometheus-stack-node-exporter.rules-501fd5f0-0366-455a-80cc-5e208856f211.yaml: "groups:\n\ + - name: node-exporter.rules\n rules:\n - expr: |-\n count without (cpu,\ + \ mode) (\n node_cpu_seconds_total{job=\"node-exporter\",mode=\"idle\"\ + }\n )\n record: instance:node_num_cpu:sum\n - expr: |-\n 1 - avg\ + \ without (cpu) (\n sum without (mode) (rate(node_cpu_seconds_total{job=\"\ + node-exporter\", mode=~\"idle|iowait|steal\"}[5m]))\n )\n record: instance:node_cpu_utilisation:rate5m\n\ + \ - expr: |-\n (\n node_load1{job=\"node-exporter\"}\n /\n \ + \ instance:node_num_cpu:sum{job=\"node-exporter\"}\n )\n record:\ + \ instance:node_load1_per_cpu:ratio\n - expr: |-\n 1 - (\n (\n \ + \ node_memory_MemAvailable_bytes{job=\"node-exporter\"}\n or\n\ + \ (\n node_memory_Buffers_bytes{job=\"node-exporter\"}\n \ + \ +\n node_memory_Cached_bytes{job=\"node-exporter\"}\n \ + \ +\n node_memory_MemFree_bytes{job=\"node-exporter\"}\n\ + \ +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n \ + \ )\n )\n /\n node_memory_MemTotal_bytes{job=\"node-exporter\"\ + }\n )\n record: instance:node_memory_utilisation:ratio\n - expr: rate(node_vmstat_pgmajfault{job=\"\ + node-exporter\"}[5m])\n record: instance:node_vmstat_pgmajfault:rate5m\n -\ + \ expr: rate(node_disk_io_time_seconds_total{job=\"node-exporter\", device=~\"\ + (/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[5m])\n\ + \ record: instance_device:node_disk_io_time_seconds:rate5m\n - expr: rate(node_disk_io_time_weighted_seconds_total{job=\"\ + node-exporter\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"\ + }[5m])\n record: instance_device:node_disk_io_time_weighted_seconds:rate5m\n\ + \ - expr: |-\n sum without (device) (\n rate(node_network_receive_bytes_total{job=\"\ + node-exporter\", device!=\"lo\"}[5m])\n )\n record: instance:node_network_receive_bytes_excluding_lo:rate5m\n\ + \ - expr: |-\n sum without (device) (\n rate(node_network_transmit_bytes_total{job=\"\ + node-exporter\", device!=\"lo\"}[5m])\n )\n record: instance:node_network_transmit_bytes_excluding_lo:rate5m\n\ + \ - expr: |-\n sum without (device) (\n rate(node_network_receive_drop_total{job=\"\ + node-exporter\", device!=\"lo\"}[5m])\n )\n record: instance:node_network_receive_drop_excluding_lo:rate5m\n\ + \ - expr: |-\n sum without (device) (\n rate(node_network_transmit_drop_total{job=\"\ + node-exporter\", device!=\"lo\"}[5m])\n )\n record: instance:node_network_transmit_drop_excluding_lo:rate5m\n\ + \ - expr: |-\n sum without (device) (\n rate(node_network_receive_bytes_total{job=\"\ + node-exporter\", device!~\"lo|veth.+\"}[5m])\n )\n record: instance:node_network_receive_bytes_physical:rate5m\n\ + \ - expr: |-\n sum without (device) (\n rate(node_network_transmit_bytes_total{job=\"\ + node-exporter\", device!~\"lo|veth.+\"}[5m])\n )\n record: instance:node_network_transmit_bytes_physical:rate5m\n\ + \ - expr: |-\n sum without (device) (\n rate(node_network_receive_drop_total{job=\"\ + node-exporter\", device!~\"lo|veth.+\"}[5m])\n )\n record: instance:node_network_receive_drop_physical:rate5m\n\ + \ - expr: |-\n sum without (device) (\n rate(node_network_transmit_drop_total{job=\"\ + node-exporter\", device!~\"lo|veth.+\"}[5m])\n )\n record: instance:node_network_transmit_drop_physical:rate5m\n" + monitoring-kube-prometheus-stack-node-network-6268a5b5-2d1a-4ed0-b8b3-a03a12b3390d.yaml: "groups:\n\ + - name: node-network\n rules:\n - alert: NodeNetworkInterfaceFlapping\n annotations:\n\ + \ description: Network interface \"{{ $labels.device }}\" changing its up\ + \ status\n often on node-exporter {{ $labels.namespace }}/{{ $labels.pod\ + \ }}\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/nodenetworkinterfaceflapping\n\ + \ summary: Network interface is often changing its status\n expr: changes(node_network_up{job=\"\ + node-exporter\",device!~\"veth.+\"}[2m]) > 2\n for: 2m\n labels:\n \ + \ severity: warning\n" + monitoring-kube-prometheus-stack-node.rules-a9b43f41-87eb-4fea-856f-1f513c9d1ee3.yaml: "groups:\n\ + - name: node.rules\n rules:\n - expr: |-\n topk by (cluster, namespace,\ + \ pod) (1,\n max by (cluster, node, namespace, pod) (\n label_replace(kube_pod_info{job=\"\ + kube-state-metrics\",node!=\"\"}, \"pod\", \"$1\", \"pod\", \"(.*)\")\n ))\n\ + \ record: 'node_namespace_pod:kube_pod_info:'\n - expr: |-\n count by\ + \ (cluster, node) (\n node_cpu_seconds_total{mode=\"idle\",job=\"node-exporter\"\ + }\n * on (cluster, namespace, pod) group_left(node)\n topk by (cluster,\ + \ namespace, pod) (1, node_namespace_pod:kube_pod_info:)\n )\n record:\ + \ node:node_num_cpu:sum\n - expr: |-\n sum(\n node_memory_MemAvailable_bytes{job=\"\ + node-exporter\"} or\n (\n node_memory_Buffers_bytes{job=\"node-exporter\"\ + } +\n node_memory_Cached_bytes{job=\"node-exporter\"} +\n node_memory_MemFree_bytes{job=\"\ + node-exporter\"} +\n node_memory_Slab_bytes{job=\"node-exporter\"}\n\ + \ )\n ) by (cluster)\n record: :node_memory_MemAvailable_bytes:sum\n\ + \ - expr: |-\n avg by (cluster, node) (\n sum without (mode) (\n\ + \ rate(node_cpu_seconds_total{mode!=\"idle\",mode!=\"iowait\",mode!=\"\ + steal\",job=\"node-exporter\"}[5m])\n )\n )\n record: node:node_cpu_utilization:ratio_rate5m\n\ + \ - expr: |-\n avg by (cluster) (\n node:node_cpu_utilization:ratio_rate5m\n\ + \ )\n record: cluster:node_cpu:ratio_rate5m\n" + monitoring-kube-prometheus-stack-prometheus-7347b3a7-f0fa-4d43-bd9e-c3f35a7087b9.yaml: "groups:\n\ + - name: prometheus\n rules:\n - alert: PrometheusBadConfig\n annotations:\n\ + \ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed\ + \ to\n reload its configuration.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusbadconfig\n\ + \ summary: Failed Prometheus configuration reload.\n expr: |-\n #\ + \ Without max_over_time, failed scrapes could create false negatives, see\n \ + \ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for\ + \ details.\n max_over_time(prometheus_config_last_reload_successful{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m]) == 0\n for:\ + \ 10m\n labels:\n severity: critical\n - alert: PrometheusSDRefreshFailure\n\ + \ annotations:\n description: Prometheus {{$labels.namespace}}/{{$labels.pod}}\ + \ has failed to\n refresh SD with mechanism {{$labels.mechanism}}.\n \ + \ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheussdrefreshfailure\n\ + \ summary: Failed Prometheus SD refresh.\n expr: increase(prometheus_sd_refresh_failures_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[10m])\n > 0\n\ + \ for: 20m\n labels:\n severity: warning\n - alert: PrometheusKubernetesListWatchFailures\n\ + \ annotations:\n description: Kubernetes service discovery of Prometheus\ + \ {{$labels.namespace}}/{{$labels.pod}}\n is experiencing {{ printf \"\ + %.0f\" $value }} failures with LIST/WATCH requests\n to the Kubernetes\ + \ API in the last 5 minutes.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuskuberneteslistwatchfailures\n\ + \ summary: Requests in Kubernetes SD are failing.\n expr: increase(prometheus_sd_kubernetes_failures_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m])\n > 0\n\ + \ for: 15m\n labels:\n severity: warning\n - alert: PrometheusNotificationQueueRunningFull\n\ + \ annotations:\n description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}}\n\ + \ is running full.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotificationqueuerunningfull\n\ + \ summary: Prometheus alert notification queue predicted to run full in less\ + \ than\n 30m.\n expr: |-\n # Without min_over_time, failed scrapes\ + \ could create false negatives, see\n # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0\ + \ for details.\n (\n predict_linear(prometheus_notifications_queue_length{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m], 60 * 30)\n \ + \ >\n min_over_time(prometheus_notifications_queue_capacity{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m])\n )\n \ + \ for: 15m\n labels:\n severity: warning\n - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers\n\ + \ annotations:\n description: '{{ printf \"%.1f\" $value }}% of alerts\ + \ sent by Prometheus {{$labels.namespace}}/{{$labels.pod}}\n to Alertmanager\ + \ {{$labels.alertmanager}} were affected by errors.'\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstosomealertmanagers\n\ + \ summary: More than 1% of alerts sent by Prometheus to a specific Alertmanager\n\ + \ were affected by errors.\n expr: |-\n (\n rate(prometheus_notifications_errors_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m])\n /\n \ + \ rate(prometheus_notifications_sent_total{job=\"kube-prometheus-stack-prometheus\"\ + ,namespace=\"monitoring\"}[5m])\n )\n * 100\n > 1\n for: 15m\n\ + \ labels:\n severity: warning\n - alert: PrometheusNotConnectedToAlertmanagers\n\ + \ annotations:\n description: Prometheus {{$labels.namespace}}/{{$labels.pod}}\ + \ is not connected\n to any Alertmanagers.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotconnectedtoalertmanagers\n\ + \ summary: Prometheus is not connected to any Alertmanagers.\n expr: |-\n\ + \ # Without max_over_time, failed scrapes could create false negatives, see\n\ + \ # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0\ + \ for details.\n max_over_time(prometheus_notifications_alertmanagers_discovered{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m]) < 1\n for:\ + \ 10m\n labels:\n severity: warning\n - alert: PrometheusTSDBReloadsFailing\n\ + \ annotations:\n description: Prometheus {{$labels.namespace}}/{{$labels.pod}}\ + \ has detected {{$value\n | humanize}} reload failures over the last 3h.\n\ + \ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbreloadsfailing\n\ + \ summary: Prometheus has issues reloading blocks from disk.\n expr: increase(prometheus_tsdb_reloads_failures_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[3h])\n > 0\n\ + \ for: 4h\n labels:\n severity: warning\n - alert: PrometheusTSDBCompactionsFailing\n\ + \ annotations:\n description: Prometheus {{$labels.namespace}}/{{$labels.pod}}\ + \ has detected {{$value\n | humanize}} compaction failures over the last\ + \ 3h.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbcompactionsfailing\n\ + \ summary: Prometheus has issues compacting blocks.\n expr: increase(prometheus_tsdb_compactions_failed_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[3h])\n > 0\n\ + \ for: 4h\n labels:\n severity: warning\n - alert: PrometheusNotIngestingSamples\n\ + \ annotations:\n description: Prometheus {{$labels.namespace}}/{{$labels.pod}}\ + \ is not ingesting\n samples.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotingestingsamples\n\ + \ summary: Prometheus is not ingesting samples.\n expr: |-\n (\n\ + \ sum without(type) (rate(prometheus_tsdb_head_samples_appended_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m])) <= 0\n \ + \ and\n (\n sum without(scrape_job) (prometheus_target_metadata_cache_entries{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}) > 0\n or\n\ + \ sum without(rule_group) (prometheus_rule_group_rules{job=\"kube-prometheus-stack-prometheus\"\ + ,namespace=\"monitoring\"}) > 0\n )\n )\n for: 10m\n labels:\n\ + \ severity: warning\n - alert: PrometheusDuplicateTimestamps\n annotations:\n\ + \ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping\ + \ {{\n printf \"%.4g\" $value }} samples/s with different values but duplicated\ + \ timestamp.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusduplicatetimestamps\n\ + \ summary: Prometheus is dropping samples with duplicate timestamps.\n \ + \ expr: rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m])\n > 0\n\ + \ for: 10m\n labels:\n severity: warning\n - alert: PrometheusOutOfOrderTimestamps\n\ + \ annotations:\n description: Prometheus {{$labels.namespace}}/{{$labels.pod}}\ + \ is dropping {{\n printf \"%.4g\" $value }} samples/s with timestamps\ + \ arriving out of order.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusoutofordertimestamps\n\ + \ summary: Prometheus drops samples with out-of-order timestamps.\n expr:\ + \ rate(prometheus_target_scrapes_sample_out_of_order_total{job=\"kube-prometheus-stack-prometheus\"\ + ,namespace=\"monitoring\"}[5m])\n > 0\n for: 10m\n labels:\n severity:\ + \ warning\n - alert: PrometheusRemoteStorageFailures\n annotations:\n \ + \ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send\n\ + \ {{ printf \"%.1f\" $value }}% of the samples to {{ $labels.remote_name}}:{{\n\ + \ $labels.url }}\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotestoragefailures\n\ + \ summary: Prometheus fails to send samples to remote storage.\n expr:\ + \ |-\n (\n (rate(prometheus_remote_storage_failed_samples_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m]))\n /\n\ + \ (\n (rate(prometheus_remote_storage_failed_samples_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m]))\n +\n\ + \ (rate(prometheus_remote_storage_succeeded_samples_total{job=\"kube-prometheus-stack-prometheus\"\ + ,namespace=\"monitoring\"}[5m]) or rate(prometheus_remote_storage_samples_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m]))\n )\n\ + \ )\n * 100\n > 1\n for: 15m\n labels:\n severity: critical\n\ + \ - alert: PrometheusRemoteWriteBehind\n annotations:\n description:\ + \ Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is\n {{\ + \ printf \"%.1f\" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url\n\ + \ }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritebehind\n\ + \ summary: Prometheus remote write is behind.\n expr: |-\n # Without\ + \ max_over_time, failed scrapes could create false negatives, see\n # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0\ + \ for details.\n (\n max_over_time(prometheus_remote_storage_queue_highest_timestamp_seconds{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m])\n -\n \ + \ max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m])\n )\n \ + \ > 120\n for: 15m\n labels:\n severity: critical\n - alert: PrometheusRemoteWriteDesiredShards\n\ + \ annotations:\n description: Prometheus {{$labels.namespace}}/{{$labels.pod}}\ + \ remote write desired\n shards calculation wants to run {{ $value }} shards\ + \ for queue {{ $labels.remote_name}}:{{\n $labels.url }}, which is more\ + \ than the max of {{ printf `prometheus_remote_storage_shards_max{instance=\"\ + %s\",job=\"kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}`\n \ + \ $labels.instance | query | first | value }}.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritedesiredshards\n\ + \ summary: Prometheus remote write desired shards calculation wants to run\ + \ more\n than configured max shards.\n expr: |-\n # Without max_over_time,\ + \ failed scrapes could create false negatives, see\n # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0\ + \ for details.\n (\n max_over_time(prometheus_remote_storage_shards_desired{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m])\n >\n \ + \ max_over_time(prometheus_remote_storage_shards_max{job=\"kube-prometheus-stack-prometheus\"\ + ,namespace=\"monitoring\"}[5m])\n )\n for: 15m\n labels:\n severity:\ + \ warning\n - alert: PrometheusRuleFailures\n annotations:\n description:\ + \ Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to\n evaluate\ + \ {{ printf \"%.0f\" $value }} rules in the last 5m.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusrulefailures\n\ + \ summary: Prometheus is failing rule evaluations.\n expr: increase(prometheus_rule_evaluation_failures_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m])\n > 0\n\ + \ for: 15m\n labels:\n severity: critical\n - alert: PrometheusMissingRuleEvaluations\n\ + \ annotations:\n description: Prometheus {{$labels.namespace}}/{{$labels.pod}}\ + \ has missed {{\n printf \"%.0f\" $value }} rule group evaluations in the\ + \ last 5m.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusmissingruleevaluations\n\ + \ summary: Prometheus is missing rule evaluations due to slow rule group\ + \ evaluation.\n expr: increase(prometheus_rule_group_iterations_missed_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m])\n > 0\n\ + \ for: 15m\n labels:\n severity: warning\n - alert: PrometheusTargetLimitHit\n\ + \ annotations:\n description: Prometheus {{$labels.namespace}}/{{$labels.pod}}\ + \ has dropped {{\n printf \"%.0f\" $value }} targets because the number\ + \ of targets exceeded the\n configured target_limit.\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetlimithit\n\ + \ summary: Prometheus has dropped targets because some scrape configs have\ + \ exceeded\n the targets limit.\n expr: increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m])\n > 0\n\ + \ for: 15m\n labels:\n severity: warning\n - alert: PrometheusLabelLimitHit\n\ + \ annotations:\n description: Prometheus {{$labels.namespace}}/{{$labels.pod}}\ + \ has dropped {{\n printf \"%.0f\" $value }} targets because some samples\ + \ exceeded the configured\n label_limit, label_name_length_limit or label_value_length_limit.\n\ + \ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuslabellimithit\n\ + \ summary: Prometheus has dropped targets because some scrape configs have\ + \ exceeded\n the labels limit.\n expr: increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m])\n > 0\n\ + \ for: 15m\n labels:\n severity: warning\n - alert: PrometheusScrapeBodySizeLimitHit\n\ + \ annotations:\n description: Prometheus {{$labels.namespace}}/{{$labels.pod}}\ + \ has failed {{\n printf \"%.0f\" $value }} scrapes in the last 5m because\ + \ some targets exceeded\n the configured body_size_limit.\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapebodysizelimithit\n\ + \ summary: Prometheus has dropped some targets that exceeded body size limit.\n\ + \ expr: increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m])\n > 0\n\ + \ for: 15m\n labels:\n severity: warning\n - alert: PrometheusScrapeSampleLimitHit\n\ + \ annotations:\n description: Prometheus {{$labels.namespace}}/{{$labels.pod}}\ + \ has failed {{\n printf \"%.0f\" $value }} scrapes in the last 5m because\ + \ some targets exceeded\n the configured sample_limit.\n runbook_url:\ + \ https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusscrapesamplelimithit\n\ + \ summary: Prometheus has failed scrapes that have exceeded the configured\ + \ sample\n limit.\n expr: increase(prometheus_target_scrapes_exceeded_sample_limit_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m])\n > 0\n\ + \ for: 15m\n labels:\n severity: warning\n - alert: PrometheusTargetSyncFailure\n\ + \ annotations:\n description: '{{ printf \"%.0f\" $value }} targets in\ + \ Prometheus {{$labels.namespace}}/{{$labels.pod}}\n have failed to sync\ + \ because invalid configuration was supplied.'\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetsyncfailure\n\ + \ summary: Prometheus has failed to sync targets.\n expr: increase(prometheus_target_sync_failed_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[30m])\n > 0\n\ + \ for: 5m\n labels:\n severity: critical\n - alert: PrometheusHighQueryLoad\n\ + \ annotations:\n description: Prometheus {{$labels.namespace}}/{{$labels.pod}}\ + \ query API has\n less than 20% available capacity in its query engine\ + \ for the last 15 minutes.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheushighqueryload\n\ + \ summary: Prometheus is reaching its maximum capacity serving concurrent\ + \ requests.\n expr: avg_over_time(prometheus_engine_queries{job=\"kube-prometheus-stack-prometheus\"\ + ,namespace=\"monitoring\"}[5m])\n / max_over_time(prometheus_engine_queries_concurrent_max{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\"}[5m])\n > 0.8\n\ + \ for: 15m\n labels:\n severity: warning\n - alert: PrometheusErrorSendingAlertsToAnyAlertmanager\n\ + \ annotations:\n description: '{{ printf \"%.1f\" $value }}% minimum errors\ + \ while sending alerts\n from Prometheus {{$labels.namespace}}/{{$labels.pod}}\ + \ to any Alertmanager.'\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstoanyalertmanager\n\ + \ summary: Prometheus encounters more than 3% errors sending alerts to any\ + \ Alertmanager.\n expr: |-\n min without (alertmanager) (\n rate(prometheus_notifications_errors_total{job=\"\ + kube-prometheus-stack-prometheus\",namespace=\"monitoring\",alertmanager!~``}[5m])\n\ + \ /\n rate(prometheus_notifications_sent_total{job=\"kube-prometheus-stack-prometheus\"\ + ,namespace=\"monitoring\",alertmanager!~``}[5m])\n )\n * 100\n \ + \ > 3\n for: 15m\n labels:\n severity: critical\n" + monitoring-kube-prometheus-stack-prometheus-operator-6e123f81-1bd4-4e2f-b8ae-c1287bf5cd37.yaml: "groups:\n\ + - name: prometheus-operator\n rules:\n - alert: PrometheusOperatorListErrors\n\ + \ annotations:\n description: Errors while performing List operations\ + \ in controller {{$labels.controller}}\n in {{$labels.namespace}} namespace.\n\ + \ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorlisterrors\n\ + \ summary: Errors while performing list operations in controller.\n expr:\ + \ (sum by (cluster,controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job=\"\ + kube-prometheus-stack-operator\",namespace=\"monitoring\"}[10m]))\n / sum\ + \ by (cluster,controller,namespace) (rate(prometheus_operator_list_operations_total{job=\"\ + kube-prometheus-stack-operator\",namespace=\"monitoring\"}[10m])))\n > 0.4\n\ + \ for: 15m\n labels:\n severity: warning\n - alert: PrometheusOperatorWatchErrors\n\ + \ annotations:\n description: Errors while performing watch operations\ + \ in controller {{$labels.controller}}\n in {{$labels.namespace}} namespace.\n\ + \ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorwatcherrors\n\ + \ summary: Errors while performing watch operations in controller.\n expr:\ + \ (sum by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job=\"\ + kube-prometheus-stack-operator\",namespace=\"monitoring\"}[5m]))\n / sum\ + \ by (cluster,controller,namespace) (rate(prometheus_operator_watch_operations_total{job=\"\ + kube-prometheus-stack-operator\",namespace=\"monitoring\"}[5m])))\n > 0.4\n\ + \ for: 15m\n labels:\n severity: warning\n - alert: PrometheusOperatorSyncFailed\n\ + \ annotations:\n description: Controller {{ $labels.controller }} in {{\ + \ $labels.namespace }}\n namespace fails to reconcile {{ $value }} objects.\n\ + \ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorsyncfailed\n\ + \ summary: Last controller reconciliation failed\n expr: min_over_time(prometheus_operator_syncs{status=\"\ + failed\",job=\"kube-prometheus-stack-operator\",namespace=\"monitoring\"}[5m])\n\ + \ > 0\n for: 10m\n labels:\n severity: warning\n - alert: PrometheusOperatorReconcileErrors\n\ + \ annotations:\n description: '{{ $value | humanizePercentage }} of reconciling\ + \ operations failed\n for {{ $labels.controller }} controller in {{ $labels.namespace\ + \ }} namespace.'\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorreconcileerrors\n\ + \ summary: Errors while reconciling objects.\n expr: (sum by (cluster,controller,namespace)\ + \ (rate(prometheus_operator_reconcile_errors_total{job=\"kube-prometheus-stack-operator\"\ + ,namespace=\"monitoring\"}[5m])))\n / (sum by (cluster,controller,namespace)\ + \ (rate(prometheus_operator_reconcile_operations_total{job=\"kube-prometheus-stack-operator\"\ + ,namespace=\"monitoring\"}[5m])))\n > 0.1\n for: 10m\n labels:\n \ + \ severity: warning\n - alert: PrometheusOperatorStatusUpdateErrors\n annotations:\n\ + \ description: '{{ $value | humanizePercentage }} of status update operations\n\ + \ failed for {{ $labels.controller }} controller in {{ $labels.namespace\ + \ }}\n namespace.'\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorstatusupdateerrors\n\ + \ summary: Errors while updating objects status.\n expr: (sum by (cluster,controller,namespace)\ + \ (rate(prometheus_operator_status_update_errors_total{job=\"kube-prometheus-stack-operator\"\ + ,namespace=\"monitoring\"}[5m])))\n / (sum by (cluster,controller,namespace)\ + \ (rate(prometheus_operator_status_update_operations_total{job=\"kube-prometheus-stack-operator\"\ + ,namespace=\"monitoring\"}[5m])))\n > 0.1\n for: 10m\n labels:\n \ + \ severity: warning\n - alert: PrometheusOperatorNodeLookupErrors\n annotations:\n\ + \ description: Errors while reconciling Prometheus in {{ $labels.namespace\ + \ }}\n Namespace.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornodelookuperrors\n\ + \ summary: Errors while reconciling Prometheus.\n expr: rate(prometheus_operator_node_address_lookup_errors_total{job=\"\ + kube-prometheus-stack-operator\",namespace=\"monitoring\"}[5m])\n > 0.1\n\ + \ for: 10m\n labels:\n severity: warning\n - alert: PrometheusOperatorNotReady\n\ + \ annotations:\n description: Prometheus operator in {{ $labels.namespace\ + \ }} namespace isn't\n ready to reconcile {{ $labels.controller }} resources.\n\ + \ runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornotready\n\ + \ summary: Prometheus operator not ready\n expr: min by (cluster,controller,namespace)\ + \ (max_over_time(prometheus_operator_ready{job=\"kube-prometheus-stack-operator\"\ + ,namespace=\"monitoring\"}[5m])\n == 0)\n for: 5m\n labels:\n \ + \ severity: warning\n - alert: PrometheusOperatorRejectedResources\n annotations:\n\ + \ description: Prometheus operator in {{ $labels.namespace }} namespace rejected\n\ + \ {{ printf \"%0.0f\" $value }} {{ $labels.controller }}/{{ $labels.resource\ + \ }}\n resources.\n runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorrejectedresources\n\ + \ summary: Resources rejected by Prometheus operator\n expr: min_over_time(prometheus_operator_managed_resources{state=\"\ + rejected\",job=\"kube-prometheus-stack-operator\",namespace=\"monitoring\"}[5m])\n\ + \ > 0\n for: 5m\n labels:\n severity: warning\n" +kind: ConfigMap +metadata: + labels: + app.kubernetes.io/managed-by: prometheus-operator + managed-by: prometheus-operator + prometheus-name: kube-prometheus-stack-prometheus + name: prometheus-kube-prometheus-stack-prometheus-rulefiles-0 + namespace: monitoring + ownerReferences: + - apiVersion: monitoring.coreos.com/v1 + blockOwnerDeletion: true + controller: true + kind: Prometheus + name: kube-prometheus-stack-prometheus + uid: f0355616-4bfa-4409-8b5f-c1c815ee7a2a + diff --git a/monitoring/daemonset-kube-prometheus-stack-prometheus-node-exporter.yaml b/monitoring/daemonset-kube-prometheus-stack-prometheus-node-exporter.yaml new file mode 100644 index 0000000..724ac61 --- /dev/null +++ b/monitoring/daemonset-kube-prometheus-stack-prometheus-node-exporter.yaml @@ -0,0 +1,143 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + annotations: + deprecated.daemonset.template.generation: '1' + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app.kubernetes.io/component: metrics + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/part-of: prometheus-node-exporter + app.kubernetes.io/version: 1.11.1 + helm.sh/chart: prometheus-node-exporter-4.53.1 + release: kube-prometheus-stack + name: kube-prometheus-stack-prometheus-node-exporter + namespace: monitoring +spec: + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/name: prometheus-node-exporter + template: + metadata: + annotations: + cluster-autoscaler.kubernetes.io/safe-to-evict: 'true' + labels: + app.kubernetes.io/component: metrics + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/part-of: prometheus-node-exporter + app.kubernetes.io/version: 1.11.1 + helm.sh/chart: prometheus-node-exporter-4.53.1 + jobLabel: node-exporter + release: kube-prometheus-stack + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: eks.amazonaws.com/compute-type + operator: NotIn + values: + - fargate + - key: type + operator: NotIn + values: + - virtual-kubelet + automountServiceAccountToken: false + containers: + - args: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + - --path.rootfs=/host/root + - --path.udev.data=/host/root/run/udev/data + - --web.listen-address=[$(HOST_IP)]:9100 + - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run/containerd/.+|var/lib/docker/.+|var/lib/kubelet/.+)($|/) + - --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs|erofs)$ + env: + - name: HOST_IP + value: 0.0.0.0 + image: quay.io/prometheus/node-exporter:v1.11.1 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: / + port: http-metrics + scheme: HTTP + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + name: node-exporter + ports: + - containerPort: 9100 + name: http-metrics + protocol: TCP + readinessProbe: + failureThreshold: 3 + httpGet: + path: / + port: http-metrics + scheme: HTTP + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + resources: {} + securityContext: + readOnlyRootFilesystem: true + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /host/proc + name: proc + readOnly: true + - mountPath: /host/sys + name: sys + readOnly: true + - mountPath: /host/root + mountPropagation: HostToContainer + name: root + readOnly: true + dnsPolicy: ClusterFirst + hostNetwork: true + hostPID: true + nodeSelector: + kubernetes.io/os: linux + restartPolicy: Always + schedulerName: default-scheduler + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + serviceAccount: kube-prometheus-stack-prometheus-node-exporter + serviceAccountName: kube-prometheus-stack-prometheus-node-exporter + terminationGracePeriodSeconds: 30 + tolerations: + - effect: NoSchedule + operator: Exists + volumes: + - hostPath: + path: /proc + type: '' + name: proc + - hostPath: + path: /sys + type: '' + name: sys + - hostPath: + path: / + type: '' + name: root + updateStrategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + type: RollingUpdate + diff --git a/monitoring/deployment-kube-prometheus-stack-grafana.yaml b/monitoring/deployment-kube-prometheus-stack-grafana.yaml new file mode 100644 index 0000000..19ef1d2 --- /dev/null +++ b/monitoring/deployment-kube-prometheus-stack-grafana.yaml @@ -0,0 +1,273 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '2' + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana + app.kubernetes.io/version: 12.4.2 + helm.sh/chart: grafana-11.5.0 + name: kube-prometheus-stack-grafana + namespace: monitoring +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/name: grafana + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + annotations: + checksum/config: b835787868e1c1e3616db22add4a94b4f3a7b8ccedf24d3e48c323334a2b023c + checksum/sc-dashboard-provider-config: e70bf6a851099d385178a76de9757bb0bef8299da6d8443602590e44f05fdf24 + checksum/secret: bed677784356b2af7fb0d87455db21f077853059b594101a4f6532bfbd962a7f + kubectl.kubernetes.io/default-container: grafana + kubectl.kubernetes.io/restartedAt: '2026-04-09T07:16:07Z' + labels: + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/name: grafana + app.kubernetes.io/version: 12.4.2 + helm.sh/chart: grafana-11.5.0 + spec: + automountServiceAccountToken: true + containers: + - env: + - name: METHOD + value: WATCH + - name: LABEL + value: grafana_dashboard + - name: LABEL_VALUE + value: '1' + - name: FOLDER + value: /tmp/dashboards + - name: RESOURCE + value: both + - name: NAMESPACE + value: ALL + - name: REQ_USERNAME + valueFrom: + secretKeyRef: + key: admin-user + name: kube-prometheus-stack-grafana + - name: REQ_PASSWORD + valueFrom: + secretKeyRef: + key: admin-password + name: kube-prometheus-stack-grafana + - name: REQ_URL + value: http://localhost:3000/api/admin/provisioning/dashboards/reload + - name: REQ_METHOD + value: POST + image: quay.io/kiwigrid/k8s-sidecar:2.6.0 + imagePullPolicy: IfNotPresent + name: grafana-sc-dashboard + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /tmp/dashboards + name: sc-dashboard-volume + - env: + - name: METHOD + value: WATCH + - name: LABEL + value: grafana_datasource + - name: LABEL_VALUE + value: '1' + - name: FOLDER + value: /etc/grafana/provisioning/datasources + - name: RESOURCE + value: both + - name: REQ_USERNAME + valueFrom: + secretKeyRef: + key: admin-user + name: kube-prometheus-stack-grafana + - name: REQ_PASSWORD + valueFrom: + secretKeyRef: + key: admin-password + name: kube-prometheus-stack-grafana + - name: REQ_URL + value: http://localhost:3000/api/admin/provisioning/datasources/reload + - name: REQ_METHOD + value: POST + image: quay.io/kiwigrid/k8s-sidecar:2.6.0 + imagePullPolicy: IfNotPresent + name: grafana-sc-datasources + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /etc/grafana/provisioning/datasources + name: sc-datasources-volume + - env: + - name: POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: GF_SECURITY_ADMIN_USER + valueFrom: + secretKeyRef: + key: admin-user + name: kube-prometheus-stack-grafana + - name: GF_SECURITY_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + key: admin-password + name: kube-prometheus-stack-grafana + - name: GF_PATHS_DATA + value: /var/lib/grafana/ + - name: GF_PATHS_LOGS + value: /var/log/grafana + - name: GF_PATHS_PLUGINS + value: /var/lib/grafana/plugins + - name: GF_PATHS_PROVISIONING + value: /etc/grafana/provisioning + - name: GF_UNIFIED_STORAGE_INDEX_PATH + value: /var/lib/grafana-search/bleve + image: docker.io/grafana/grafana:12.4.2 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 10 + httpGet: + path: /api/health + port: grafana + scheme: HTTP + initialDelaySeconds: 60 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 30 + name: grafana + ports: + - containerPort: 3000 + name: grafana + protocol: TCP + - containerPort: 9094 + name: gossip-tcp + protocol: TCP + - containerPort: 9094 + name: gossip-udp + protocol: UDP + - containerPort: 6060 + name: profiling + protocol: TCP + readinessProbe: + failureThreshold: 3 + httpGet: + path: /api/health + port: grafana + scheme: HTTP + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + seccompProfile: + type: RuntimeDefault + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /etc/grafana/grafana.ini + name: config + subPath: grafana.ini + - mountPath: /var/lib/grafana + name: storage + - mountPath: /var/lib/grafana-search + name: search + - mountPath: /tmp/dashboards + name: sc-dashboard-volume + - mountPath: /etc/grafana/provisioning/dashboards/sc-dashboardproviders.yaml + name: sc-dashboard-provider + subPath: provider.yaml + - mountPath: /etc/grafana/provisioning/datasources + name: sc-datasources-volume + dnsPolicy: ClusterFirst + enableServiceLinks: true + initContainers: + - command: + - chown + - -R + - 472:472 + - /var/lib/grafana + image: docker.io/library/busybox:1.37.0 + imagePullPolicy: IfNotPresent + name: init-chown-data + resources: {} + securityContext: + capabilities: + add: + - CHOWN + drop: + - ALL + readOnlyRootFilesystem: false + runAsNonRoot: false + runAsUser: 0 + seccompProfile: + type: RuntimeDefault + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /var/lib/grafana + name: storage + restartPolicy: Always + schedulerName: default-scheduler + securityContext: + fsGroup: 472 + runAsGroup: 472 + runAsNonRoot: true + runAsUser: 472 + serviceAccount: kube-prometheus-stack-grafana + serviceAccountName: kube-prometheus-stack-grafana + shareProcessNamespace: false + terminationGracePeriodSeconds: 30 + volumes: + - configMap: + defaultMode: 420 + name: kube-prometheus-stack-grafana + name: config + - name: storage + persistentVolumeClaim: + claimName: kube-prometheus-stack-grafana + - emptyDir: {} + name: search + - emptyDir: {} + name: sc-dashboard-volume + - configMap: + defaultMode: 420 + name: kube-prometheus-stack-grafana-config-dashboards + name: sc-dashboard-provider + - emptyDir: {} + name: sc-datasources-volume + diff --git a/monitoring/deployment-kube-prometheus-stack-kube-state-metrics.yaml b/monitoring/deployment-kube-prometheus-stack-kube-state-metrics.yaml new file mode 100644 index 0000000..0b503fd --- /dev/null +++ b/monitoring/deployment-kube-prometheus-stack-kube-state-metrics.yaml @@ -0,0 +1,101 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '1' + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app.kubernetes.io/component: metrics + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/version: 2.18.0 + helm.sh/chart: kube-state-metrics-7.2.2 + release: kube-prometheus-stack + name: kube-prometheus-stack-kube-state-metrics + namespace: monitoring +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/name: kube-state-metrics + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + labels: + app.kubernetes.io/component: metrics + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/version: 2.18.0 + helm.sh/chart: kube-state-metrics-7.2.2 + release: kube-prometheus-stack + spec: + automountServiceAccountToken: true + containers: + - args: + - --port=8080 + - --resources=certificatesigningrequests,configmaps,cronjobs,daemonsets,deployments,endpointslices,horizontalpodautoscalers,ingresses,jobs,leases,limitranges,mutatingwebhookconfigurations,namespaces,networkpolicies,nodes,persistentvolumeclaims,persistentvolumes,poddisruptionbudgets,pods,replicasets,replicationcontrollers,resourcequotas,secrets,services,statefulsets,storageclasses,validatingwebhookconfigurations,volumeattachments + image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.18.0 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: /livez + port: http + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + name: kube-state-metrics + ports: + - containerPort: 8080 + name: http + protocol: TCP + - containerPort: 8081 + name: metrics + protocol: TCP + readinessProbe: + failureThreshold: 3 + httpGet: + path: /readyz + port: metrics + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 5 + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + seccompProfile: + type: RuntimeDefault + serviceAccount: kube-prometheus-stack-kube-state-metrics + serviceAccountName: kube-prometheus-stack-kube-state-metrics + terminationGracePeriodSeconds: 30 + diff --git a/monitoring/deployment-kube-prometheus-stack-operator.yaml b/monitoring/deployment-kube-prometheus-stack-operator.yaml new file mode 100644 index 0000000..190f8aa --- /dev/null +++ b/monitoring/deployment-kube-prometheus-stack-operator.yaml @@ -0,0 +1,126 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '1' + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-operator + app.kubernetes.io/component: prometheus-operator + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: kube-prometheus-stack-prometheus-operator + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-operator + namespace: monitoring +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: kube-prometheus-stack-operator + release: kube-prometheus-stack + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + labels: + app: kube-prometheus-stack-operator + app.kubernetes.io/component: prometheus-operator + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: kube-prometheus-stack-prometheus-operator + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + heritage: Helm + release: kube-prometheus-stack + spec: + automountServiceAccountToken: true + containers: + - args: + - --kubelet-service=kube-system/kube-prometheus-stack-kubelet + - --kubelet-endpoints=true + - --kubelet-endpointslice=false + - --localhost=127.0.0.1 + - --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.90.1 + - --config-reloader-cpu-request=0 + - --config-reloader-cpu-limit=0 + - --config-reloader-memory-request=0 + - --config-reloader-memory-limit=0 + - --thanos-default-base-image=quay.io/thanos/thanos:v0.41.0 + - --secret-field-selector=type!=kubernetes.io/dockercfg,type!=kubernetes.io/service-account-token,type!=helm.sh/release.v1 + - --web.enable-tls=true + - --web.cert-file=/cert/cert + - --web.key-file=/cert/key + - --web.listen-address=:10250 + - --web.tls-min-version=VersionTLS13 + env: + - name: GOGC + value: '30' + image: quay.io/prometheus-operator/prometheus-operator:v0.90.1 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: https + scheme: HTTPS + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + name: kube-prometheus-stack + ports: + - containerPort: 10250 + name: https + protocol: TCP + readinessProbe: + failureThreshold: 3 + httpGet: + path: /healthz + port: https + scheme: HTTPS + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /cert + name: tls-secret + readOnly: true + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + seccompProfile: + type: RuntimeDefault + serviceAccount: kube-prometheus-stack-operator + serviceAccountName: kube-prometheus-stack-operator + terminationGracePeriodSeconds: 30 + volumes: + - name: tls-secret + secret: + defaultMode: 420 + secretName: kube-prometheus-stack-admission + diff --git a/monitoring/deployment-uptime-kuma.yaml b/monitoring/deployment-uptime-kuma.yaml new file mode 100644 index 0000000..5a0d0e2 --- /dev/null +++ b/monitoring/deployment-uptime-kuma.yaml @@ -0,0 +1,58 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '2' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"name":"uptime-kuma","namespace":"monitoring"},"spec":{"replicas":1,"selector":{"matchLabels":{"app":"uptime-kuma"}},"template":{"metadata":{"labels":{"app":"uptime-kuma"}},"spec":{"containers":[{"image":"louislam/uptime-kuma:1","name":"uptime-kuma","ports":[{"containerPort":3001}],"resources":{"limits":{"cpu":"300m","memory":"512Mi"},"requests":{"cpu":"50m","memory":"128Mi"}},"volumeMounts":[{"mountPath":"/app/data","name":"data"}]}],"volumes":[{"name":"data","persistentVolumeClaim":{"claimName":"uptime-kuma-pvc"}}]}}}} + + ' + name: uptime-kuma + namespace: monitoring +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: uptime-kuma + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + annotations: + kubectl.kubernetes.io/restartedAt: '2026-04-09T20:22:58Z' + labels: + app: uptime-kuma + spec: + containers: + - image: louislam/uptime-kuma:1 + imagePullPolicy: IfNotPresent + name: uptime-kuma + ports: + - containerPort: 3001 + protocol: TCP + resources: + limits: + cpu: 300m + memory: 512Mi + requests: + cpu: 50m + memory: 128Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /app/data + name: data + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + volumes: + - name: data + persistentVolumeClaim: + claimName: uptime-kuma-pvc + diff --git a/monitoring/ingress-kube-prometheus-stack-grafana.yaml b/monitoring/ingress-kube-prometheus-stack-grafana.yaml new file mode 100644 index 0000000..cb3264f --- /dev/null +++ b/monitoring/ingress-kube-prometheus-stack-grafana.yaml @@ -0,0 +1,35 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.middlewares: authentik-authentik-forward-auth@kubernetescrd + labels: + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana + app.kubernetes.io/version: 12.4.2 + helm.sh/chart: grafana-11.5.0 + name: kube-prometheus-stack-grafana + namespace: monitoring +spec: + ingressClassName: traefik + rules: + - host: grafana.chemavx.xyz + http: + paths: + - backend: + service: + name: kube-prometheus-stack-grafana + port: + number: 80 + path: / + pathType: Prefix + tls: + - hosts: + - grafana.chemavx.xyz + secretName: grafana-tls + diff --git a/monitoring/ingress-prometheus.yaml b/monitoring/ingress-prometheus.yaml new file mode 100644 index 0000000..591844b --- /dev/null +++ b/monitoring/ingress-prometheus.yaml @@ -0,0 +1,29 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"networking.k8s.io/v1","kind":"Ingress","metadata":{"annotations":{"cert-manager.io/cluster-issuer":"letsencrypt-prod","traefik.ingress.kubernetes.io/router.entrypoints":"websecure"},"name":"prometheus","namespace":"monitoring"},"spec":{"ingressClassName":"traefik","rules":[{"host":"prometheus.chemavx.xyz","http":{"paths":[{"backend":{"service":{"name":"kube-prometheus-stack-prometheus","port":{"number":9090}}},"path":"/","pathType":"Prefix"}]}}],"tls":[{"hosts":["prometheus.chemavx.xyz"],"secretName":"prometheus-tls"}]}} + + ' + traefik.ingress.kubernetes.io/router.entrypoints: websecure + name: prometheus + namespace: monitoring +spec: + ingressClassName: traefik + rules: + - host: prometheus.chemavx.xyz + http: + paths: + - backend: + service: + name: kube-prometheus-stack-prometheus + port: + number: 9090 + path: / + pathType: Prefix + tls: + - hosts: + - prometheus.chemavx.xyz + secretName: prometheus-tls + diff --git a/monitoring/ingress-uptime-kuma-redirect.yaml b/monitoring/ingress-uptime-kuma-redirect.yaml new file mode 100644 index 0000000..7fc399c --- /dev/null +++ b/monitoring/ingress-uptime-kuma-redirect.yaml @@ -0,0 +1,30 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"networking.k8s.io/v1","kind":"Ingress","metadata":{"annotations":{"cert-manager.io/cluster-issuer":"letsencrypt-prod","traefik.ingress.kubernetes.io/router.entrypoints":"websecure","traefik.ingress.kubernetes.io/router.middlewares":"monitoring-redirect-to-status@kubernetescrd"},"name":"uptime-kuma-redirect","namespace":"monitoring"},"spec":{"ingressClassName":"traefik","rules":[{"host":"uptime.chemavx.xyz","http":{"paths":[{"backend":{"service":{"name":"uptime-kuma","port":{"number":3001}}},"path":"/","pathType":"Prefix"}]}}],"tls":[{"hosts":["uptime.chemavx.xyz"],"secretName":"uptime-kuma-redirect-tls"}]}} + + ' + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.middlewares: monitoring-redirect-to-status@kubernetescrd + name: uptime-kuma-redirect + namespace: monitoring +spec: + ingressClassName: traefik + rules: + - host: uptime.chemavx.xyz + http: + paths: + - backend: + service: + name: uptime-kuma + port: + number: 3001 + path: / + pathType: Prefix + tls: + - hosts: + - uptime.chemavx.xyz + secretName: uptime-kuma-redirect-tls + diff --git a/monitoring/ingress-uptime-kuma.yaml b/monitoring/ingress-uptime-kuma.yaml new file mode 100644 index 0000000..10b4b1a --- /dev/null +++ b/monitoring/ingress-uptime-kuma.yaml @@ -0,0 +1,40 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"networking.k8s.io/v1","kind":"Ingress","metadata":{"annotations":{"cert-manager.io/cluster-issuer":"letsencrypt-prod","traefik.ingress.kubernetes.io/router.entrypoints":"websecure"},"name":"uptime-kuma","namespace":"monitoring"},"spec":{"ingressClassName":"traefik","rules":[{"host":"status.chemavx.xyz","http":{"paths":[{"backend":{"service":{"name":"uptime-kuma","port":{"number":3001}}},"path":"/","pathType":"Prefix"}]}}],"tls":[{"hosts":["status.chemavx.xyz"],"secretName":"uptime-kuma-tls"}]}} + + ' + traefik.ingress.kubernetes.io/router.entrypoints: websecure + name: uptime-kuma + namespace: monitoring +spec: + ingressClassName: traefik + rules: + - host: status.chemavx.xyz + http: + paths: + - backend: + service: + name: uptime-kuma + port: + number: 3001 + path: / + pathType: Prefix + - host: home.chemavx.xyz + http: + paths: + - backend: + service: + name: uptime-kuma + port: + number: 3001 + path: / + pathType: Prefix + tls: + - hosts: + - status.chemavx.xyz + - home.chemavx.xyz + secretName: uptime-kuma-tls + diff --git a/monitoring/pvc-kube-prometheus-stack-grafana.yaml b/monitoring/pvc-kube-prometheus-stack-grafana.yaml new file mode 100644 index 0000000..66784a7 --- /dev/null +++ b/monitoring/pvc-kube-prometheus-stack-grafana.yaml @@ -0,0 +1,29 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + pv.kubernetes.io/bind-completed: 'yes' + pv.kubernetes.io/bound-by-controller: 'yes' + volume.beta.kubernetes.io/storage-provisioner: rancher.io/local-path + volume.kubernetes.io/selected-node: chemavx-k8 + volume.kubernetes.io/storage-provisioner: rancher.io/local-path + labels: + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana + app.kubernetes.io/version: 12.4.2 + helm.sh/chart: grafana-11.5.0 + name: kube-prometheus-stack-grafana + namespace: monitoring +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + storageClassName: local-path + volumeMode: Filesystem + volumeName: pvc-fdd38be9-12b5-4dd8-b83a-2f9a9ffc61c2 + diff --git a/monitoring/pvc-prometheus-kube-prometheus-stack-prometheus-db-prometheus-kube-prometheus-stack-prometheus-0.yaml b/monitoring/pvc-prometheus-kube-prometheus-stack-prometheus-db-prometheus-kube-prometheus-stack-prometheus-0.yaml new file mode 100644 index 0000000..2639bd9 --- /dev/null +++ b/monitoring/pvc-prometheus-kube-prometheus-stack-prometheus-db-prometheus-kube-prometheus-stack-prometheus-0.yaml @@ -0,0 +1,28 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + annotations: + pv.kubernetes.io/bind-completed: 'yes' + pv.kubernetes.io/bound-by-controller: 'yes' + volume.beta.kubernetes.io/storage-provisioner: rancher.io/local-path + volume.kubernetes.io/selected-node: chemavx-k8 + volume.kubernetes.io/storage-provisioner: rancher.io/local-path + labels: + app.kubernetes.io/instance: kube-prometheus-stack-prometheus + app.kubernetes.io/managed-by: prometheus-operator + app.kubernetes.io/name: prometheus + operator.prometheus.io/name: kube-prometheus-stack-prometheus + operator.prometheus.io/shard: '0' + prometheus: kube-prometheus-stack-prometheus + name: prometheus-kube-prometheus-stack-prometheus-db-prometheus-kube-prometheus-stack-prometheus-0 + namespace: monitoring +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi + storageClassName: local-path + volumeMode: Filesystem + volumeName: pvc-9b25f4cf-48ee-4c15-80e9-2d13833f586d + diff --git a/monitoring/pvc-uptime-kuma-pvc.yaml b/monitoring/pvc-uptime-kuma-pvc.yaml new file mode 100644 index 0000000..e9d55bd --- /dev/null +++ b/monitoring/pvc-uptime-kuma-pvc.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"PersistentVolumeClaim","metadata":{"annotations":{},"name":"uptime-kuma-pvc","namespace":"monitoring"},"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"5Gi"}},"storageClassName":"local-path"}} + + ' + pv.kubernetes.io/bind-completed: 'yes' + pv.kubernetes.io/bound-by-controller: 'yes' + volume.beta.kubernetes.io/storage-provisioner: rancher.io/local-path + volume.kubernetes.io/selected-node: chemavx-k8 + volume.kubernetes.io/storage-provisioner: rancher.io/local-path + name: uptime-kuma-pvc + namespace: monitoring +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + storageClassName: local-path + volumeMode: Filesystem + volumeName: pvc-fb1de67d-f175-482b-9dd8-7adf3fbfb982 + diff --git a/monitoring/secret-grafana-tls.yaml b/monitoring/secret-grafana-tls.yaml new file mode 100644 index 0000000..e556e87 --- /dev/null +++ b/monitoring/secret-grafana-tls.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + tls.crt: REDACTED + tls.key: REDACTED +kind: Secret +metadata: + annotations: + cert-manager.io/alt-names: grafana.chemavx.xyz + cert-manager.io/certificate-name: grafana-tls + cert-manager.io/common-name: grafana.chemavx.xyz + cert-manager.io/ip-sans: '' + cert-manager.io/issuer-group: cert-manager.io + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod + cert-manager.io/uri-sans: '' + labels: + controller.cert-manager.io/fao: 'true' + name: grafana-tls + namespace: monitoring +type: kubernetes.io/tls + diff --git a/monitoring/secret-kube-prometheus-stack-admission.yaml b/monitoring/secret-kube-prometheus-stack-admission.yaml new file mode 100644 index 0000000..30c24a4 --- /dev/null +++ b/monitoring/secret-kube-prometheus-stack-admission.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +data: + ca: REDACTED + cert: REDACTED + key: REDACTED +kind: Secret +metadata: + name: kube-prometheus-stack-admission + namespace: monitoring +type: Opaque + diff --git a/monitoring/secret-kube-prometheus-stack-grafana.yaml b/monitoring/secret-kube-prometheus-stack-grafana.yaml new file mode 100644 index 0000000..1a1aa34 --- /dev/null +++ b/monitoring/secret-kube-prometheus-stack-grafana.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + admin-password: REDACTED + admin-user: REDACTED + ldap-toml: REDACTED +kind: Secret +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app.kubernetes.io/component: admin-secret + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana + app.kubernetes.io/version: 12.4.2 + helm.sh/chart: grafana-11.5.0 + name: kube-prometheus-stack-grafana + namespace: monitoring +type: Opaque + diff --git a/monitoring/secret-prometheus-kube-prometheus-stack-prometheus-thanos-prometheus-http-client-file.yaml b/monitoring/secret-prometheus-kube-prometheus-stack-prometheus-thanos-prometheus-http-client-file.yaml new file mode 100644 index 0000000..528d24b --- /dev/null +++ b/monitoring/secret-prometheus-kube-prometheus-stack-prometheus-thanos-prometheus-http-client-file.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +data: + prometheus.http-client-file.yaml: REDACTED +kind: Secret +metadata: + labels: + app.kubernetes.io/managed-by: prometheus-operator + managed-by: prometheus-operator + name: prometheus-kube-prometheus-stack-prometheus-thanos-prometheus-http-client-file + namespace: monitoring + ownerReferences: + - apiVersion: monitoring.coreos.com/v1 + blockOwnerDeletion: true + controller: true + kind: Prometheus + name: kube-prometheus-stack-prometheus + uid: f0355616-4bfa-4409-8b5f-c1c815ee7a2a +type: Opaque + diff --git a/monitoring/secret-prometheus-kube-prometheus-stack-prometheus-tls-assets-0.yaml b/monitoring/secret-prometheus-kube-prometheus-stack-prometheus-tls-assets-0.yaml new file mode 100644 index 0000000..4537a02 --- /dev/null +++ b/monitoring/secret-prometheus-kube-prometheus-stack-prometheus-tls-assets-0.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +data: + 0_monitoring_kube-prometheus-stack-admission_ca: REDACTED +kind: Secret +metadata: + labels: + app.kubernetes.io/managed-by: prometheus-operator + managed-by: prometheus-operator + name: prometheus-kube-prometheus-stack-prometheus-tls-assets-0 + namespace: monitoring + ownerReferences: + - apiVersion: monitoring.coreos.com/v1 + blockOwnerDeletion: true + controller: true + kind: Prometheus + name: kube-prometheus-stack-prometheus + uid: f0355616-4bfa-4409-8b5f-c1c815ee7a2a +type: Opaque + diff --git a/monitoring/secret-prometheus-kube-prometheus-stack-prometheus-web-config.yaml b/monitoring/secret-prometheus-kube-prometheus-stack-prometheus-web-config.yaml new file mode 100644 index 0000000..c7adfcc --- /dev/null +++ b/monitoring/secret-prometheus-kube-prometheus-stack-prometheus-web-config.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +data: + web-config.yaml: REDACTED +kind: Secret +metadata: + labels: + app.kubernetes.io/managed-by: prometheus-operator + managed-by: prometheus-operator + name: prometheus-kube-prometheus-stack-prometheus-web-config + namespace: monitoring + ownerReferences: + - apiVersion: monitoring.coreos.com/v1 + blockOwnerDeletion: true + controller: true + kind: Prometheus + name: kube-prometheus-stack-prometheus + uid: f0355616-4bfa-4409-8b5f-c1c815ee7a2a +type: Opaque + diff --git a/monitoring/secret-prometheus-kube-prometheus-stack-prometheus.yaml b/monitoring/secret-prometheus-kube-prometheus-stack-prometheus.yaml new file mode 100644 index 0000000..acc1687 --- /dev/null +++ b/monitoring/secret-prometheus-kube-prometheus-stack-prometheus.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +data: + prometheus.yaml.gz: REDACTED +kind: Secret +metadata: + labels: + app.kubernetes.io/managed-by: prometheus-operator + managed-by: prometheus-operator + name: prometheus-kube-prometheus-stack-prometheus + namespace: monitoring + ownerReferences: + - apiVersion: monitoring.coreos.com/v1 + blockOwnerDeletion: true + controller: true + kind: Prometheus + name: kube-prometheus-stack-prometheus + uid: f0355616-4bfa-4409-8b5f-c1c815ee7a2a +type: Opaque + diff --git a/monitoring/secret-prometheus-tls.yaml b/monitoring/secret-prometheus-tls.yaml new file mode 100644 index 0000000..87f9c35 --- /dev/null +++ b/monitoring/secret-prometheus-tls.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + tls.crt: REDACTED + tls.key: REDACTED +kind: Secret +metadata: + annotations: + cert-manager.io/alt-names: prometheus.chemavx.xyz + cert-manager.io/certificate-name: prometheus-tls + cert-manager.io/common-name: prometheus.chemavx.xyz + cert-manager.io/ip-sans: '' + cert-manager.io/issuer-group: cert-manager.io + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod + cert-manager.io/uri-sans: '' + labels: + controller.cert-manager.io/fao: 'true' + name: prometheus-tls + namespace: monitoring +type: kubernetes.io/tls + diff --git a/monitoring/secret-uptime-kuma-redirect-tls.yaml b/monitoring/secret-uptime-kuma-redirect-tls.yaml new file mode 100644 index 0000000..7011c11 --- /dev/null +++ b/monitoring/secret-uptime-kuma-redirect-tls.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + tls.crt: REDACTED + tls.key: REDACTED +kind: Secret +metadata: + annotations: + cert-manager.io/alt-names: uptime.chemavx.xyz + cert-manager.io/certificate-name: uptime-kuma-redirect-tls + cert-manager.io/common-name: uptime.chemavx.xyz + cert-manager.io/ip-sans: '' + cert-manager.io/issuer-group: cert-manager.io + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod + cert-manager.io/uri-sans: '' + labels: + controller.cert-manager.io/fao: 'true' + name: uptime-kuma-redirect-tls + namespace: monitoring +type: kubernetes.io/tls + diff --git a/monitoring/secret-uptime-kuma-tls.yaml b/monitoring/secret-uptime-kuma-tls.yaml new file mode 100644 index 0000000..9c06246 --- /dev/null +++ b/monitoring/secret-uptime-kuma-tls.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + tls.crt: REDACTED + tls.key: REDACTED +kind: Secret +metadata: + annotations: + cert-manager.io/alt-names: home.chemavx.xyz,status.chemavx.xyz + cert-manager.io/certificate-name: uptime-kuma-tls + cert-manager.io/common-name: status.chemavx.xyz + cert-manager.io/ip-sans: '' + cert-manager.io/issuer-group: cert-manager.io + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod + cert-manager.io/uri-sans: '' + labels: + controller.cert-manager.io/fao: 'true' + name: uptime-kuma-tls + namespace: monitoring +type: kubernetes.io/tls + diff --git a/monitoring/service-kube-prometheus-stack-grafana.yaml b/monitoring/service-kube-prometheus-stack-grafana.yaml new file mode 100644 index 0000000..4c484e0 --- /dev/null +++ b/monitoring/service-kube-prometheus-stack-grafana.yaml @@ -0,0 +1,33 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana + app.kubernetes.io/version: 12.4.2 + helm.sh/chart: grafana-11.5.0 + name: kube-prometheus-stack-grafana + namespace: monitoring +spec: + clusterIP: 10.43.245.163 + clusterIPs: + - 10.43.245.163 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: http-web + port: 80 + protocol: TCP + targetPort: grafana + selector: + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/name: grafana + sessionAffinity: None + type: ClusterIP + diff --git a/monitoring/service-kube-prometheus-stack-kube-state-metrics.yaml b/monitoring/service-kube-prometheus-stack-kube-state-metrics.yaml new file mode 100644 index 0000000..ed1387c --- /dev/null +++ b/monitoring/service-kube-prometheus-stack-kube-state-metrics.yaml @@ -0,0 +1,36 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app.kubernetes.io/component: metrics + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/version: 2.18.0 + helm.sh/chart: kube-state-metrics-7.2.2 + release: kube-prometheus-stack + name: kube-prometheus-stack-kube-state-metrics + namespace: monitoring +spec: + clusterIP: 10.43.15.240 + clusterIPs: + - 10.43.15.240 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: http + port: 8080 + protocol: TCP + targetPort: http + selector: + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/name: kube-state-metrics + sessionAffinity: None + type: ClusterIP + diff --git a/monitoring/service-kube-prometheus-stack-operator.yaml b/monitoring/service-kube-prometheus-stack-operator.yaml new file mode 100644 index 0000000..c94345d --- /dev/null +++ b/monitoring/service-kube-prometheus-stack-operator.yaml @@ -0,0 +1,38 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-operator + app.kubernetes.io/component: prometheus-operator + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: kube-prometheus-stack-prometheus-operator + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-operator + namespace: monitoring +spec: + clusterIP: 10.43.87.49 + clusterIPs: + - 10.43.87.49 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: https + port: 443 + protocol: TCP + targetPort: https + selector: + app: kube-prometheus-stack-operator + release: kube-prometheus-stack + sessionAffinity: None + type: ClusterIP + diff --git a/monitoring/service-kube-prometheus-stack-prometheus-node-exporter.yaml b/monitoring/service-kube-prometheus-stack-prometheus-node-exporter.yaml new file mode 100644 index 0000000..6e76fa9 --- /dev/null +++ b/monitoring/service-kube-prometheus-stack-prometheus-node-exporter.yaml @@ -0,0 +1,38 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + prometheus.io/scrape: 'true' + labels: + app.kubernetes.io/component: metrics + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/part-of: prometheus-node-exporter + app.kubernetes.io/version: 1.11.1 + helm.sh/chart: prometheus-node-exporter-4.53.1 + jobLabel: node-exporter + release: kube-prometheus-stack + name: kube-prometheus-stack-prometheus-node-exporter + namespace: monitoring +spec: + clusterIP: 10.43.196.58 + clusterIPs: + - 10.43.196.58 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: http-metrics + port: 9100 + protocol: TCP + targetPort: 9100 + selector: + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/name: prometheus-node-exporter + sessionAffinity: None + type: ClusterIP + diff --git a/monitoring/service-kube-prometheus-stack-prometheus.yaml b/monitoring/service-kube-prometheus-stack-prometheus.yaml new file mode 100644 index 0000000..d9ef452 --- /dev/null +++ b/monitoring/service-kube-prometheus-stack-prometheus.yaml @@ -0,0 +1,42 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-prometheus + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + heritage: Helm + release: kube-prometheus-stack + self-monitor: 'true' + name: kube-prometheus-stack-prometheus + namespace: monitoring +spec: + clusterIP: 10.43.156.49 + clusterIPs: + - 10.43.156.49 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: http-web + port: 9090 + protocol: TCP + targetPort: 9090 + - appProtocol: http + name: reloader-web + port: 8080 + protocol: TCP + targetPort: reloader-web + selector: + app.kubernetes.io/name: prometheus + operator.prometheus.io/name: kube-prometheus-stack-prometheus + sessionAffinity: None + type: ClusterIP + diff --git a/monitoring/service-prometheus-operated.yaml b/monitoring/service-prometheus-operated.yaml new file mode 100644 index 0000000..5bc571e --- /dev/null +++ b/monitoring/service-prometheus-operated.yaml @@ -0,0 +1,32 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/managed-by: prometheus-operator + managed-by: prometheus-operator + operated-prometheus: 'true' + name: prometheus-operated + namespace: monitoring + ownerReferences: + - apiVersion: monitoring.coreos.com/v1 + kind: Prometheus + name: kube-prometheus-stack-prometheus + uid: f0355616-4bfa-4409-8b5f-c1c815ee7a2a +spec: + clusterIP: None + clusterIPs: + - None + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: http-web + port: 9090 + protocol: TCP + targetPort: http-web + selector: + app.kubernetes.io/name: prometheus + sessionAffinity: None + type: ClusterIP + diff --git a/monitoring/service-uptime-kuma.yaml b/monitoring/service-uptime-kuma.yaml new file mode 100644 index 0000000..56ad6a3 --- /dev/null +++ b/monitoring/service-uptime-kuma.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: uptime-kuma + namespace: monitoring +spec: + clusterIP: 10.43.198.60 + clusterIPs: + - 10.43.198.60 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - port: 3001 + protocol: TCP + targetPort: 3001 + selector: + app: uptime-kuma + sessionAffinity: None + type: ClusterIP + diff --git a/monitoring/serviceaccount-kube-prometheus-stack-grafana.yaml b/monitoring/serviceaccount-kube-prometheus-stack-grafana.yaml new file mode 100644 index 0000000..95e0daf --- /dev/null +++ b/monitoring/serviceaccount-kube-prometheus-stack-grafana.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +automountServiceAccountToken: true +kind: ServiceAccount +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: grafana + app.kubernetes.io/version: 12.4.2 + helm.sh/chart: grafana-11.5.0 + name: kube-prometheus-stack-grafana + namespace: monitoring + diff --git a/monitoring/serviceaccount-kube-prometheus-stack-kube-state-metrics.yaml b/monitoring/serviceaccount-kube-prometheus-stack-kube-state-metrics.yaml new file mode 100644 index 0000000..3f71059 --- /dev/null +++ b/monitoring/serviceaccount-kube-prometheus-stack-kube-state-metrics.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +automountServiceAccountToken: true +kind: ServiceAccount +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app.kubernetes.io/component: metrics + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/part-of: kube-state-metrics + app.kubernetes.io/version: 2.18.0 + helm.sh/chart: kube-state-metrics-7.2.2 + release: kube-prometheus-stack + name: kube-prometheus-stack-kube-state-metrics + namespace: monitoring + diff --git a/monitoring/serviceaccount-kube-prometheus-stack-operator.yaml b/monitoring/serviceaccount-kube-prometheus-stack-operator.yaml new file mode 100644 index 0000000..41fdb40 --- /dev/null +++ b/monitoring/serviceaccount-kube-prometheus-stack-operator.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +automountServiceAccountToken: true +kind: ServiceAccount +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-operator + app.kubernetes.io/component: prometheus-operator + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: kube-prometheus-stack-prometheus-operator + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-operator + namespace: monitoring + diff --git a/monitoring/serviceaccount-kube-prometheus-stack-prometheus-node-exporter.yaml b/monitoring/serviceaccount-kube-prometheus-stack-prometheus-node-exporter.yaml new file mode 100644 index 0000000..ecdf537 --- /dev/null +++ b/monitoring/serviceaccount-kube-prometheus-stack-prometheus-node-exporter.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +automountServiceAccountToken: false +kind: ServiceAccount +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app.kubernetes.io/component: metrics + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: prometheus-node-exporter + app.kubernetes.io/part-of: prometheus-node-exporter + app.kubernetes.io/version: 1.11.1 + helm.sh/chart: prometheus-node-exporter-4.53.1 + release: kube-prometheus-stack + name: kube-prometheus-stack-prometheus-node-exporter + namespace: monitoring + diff --git a/monitoring/serviceaccount-kube-prometheus-stack-prometheus.yaml b/monitoring/serviceaccount-kube-prometheus-stack-prometheus.yaml new file mode 100644 index 0000000..a61dc5f --- /dev/null +++ b/monitoring/serviceaccount-kube-prometheus-stack-prometheus.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +automountServiceAccountToken: true +kind: ServiceAccount +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + labels: + app: kube-prometheus-stack-prometheus + app.kubernetes.io/component: prometheus + app.kubernetes.io/instance: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: kube-prometheus-stack-prometheus + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + heritage: Helm + release: kube-prometheus-stack + name: kube-prometheus-stack-prometheus + namespace: monitoring + diff --git a/monitoring/statefulset-prometheus-kube-prometheus-stack-prometheus.yaml b/monitoring/statefulset-prometheus-kube-prometheus-stack-prometheus.yaml new file mode 100644 index 0000000..75f3f57 --- /dev/null +++ b/monitoring/statefulset-prometheus-kube-prometheus-stack-prometheus.yaml @@ -0,0 +1,309 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + prometheus-operator-input-hash: '11102801613401654018' + labels: + app: kube-prometheus-stack-prometheus + app.kubernetes.io/instance: kube-prometheus-stack-prometheus + app.kubernetes.io/managed-by: prometheus-operator + app.kubernetes.io/name: prometheus + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 83.2.0 + chart: kube-prometheus-stack-83.2.0 + heritage: Helm + managed-by: prometheus-operator + operator.prometheus.io/mode: server + operator.prometheus.io/name: kube-prometheus-stack-prometheus + operator.prometheus.io/shard: '0' + prometheus: kube-prometheus-stack-prometheus + release: kube-prometheus-stack + name: prometheus-kube-prometheus-stack-prometheus + namespace: monitoring + ownerReferences: + - apiVersion: monitoring.coreos.com/v1 + blockOwnerDeletion: true + controller: true + kind: Prometheus + name: kube-prometheus-stack-prometheus + uid: f0355616-4bfa-4409-8b5f-c1c815ee7a2a +spec: + persistentVolumeClaimRetentionPolicy: + whenDeleted: Retain + whenScaled: Retain + podManagementPolicy: Parallel + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/instance: kube-prometheus-stack-prometheus + app.kubernetes.io/managed-by: prometheus-operator + app.kubernetes.io/name: prometheus + operator.prometheus.io/name: kube-prometheus-stack-prometheus + operator.prometheus.io/shard: '0' + prometheus: kube-prometheus-stack-prometheus + serviceName: prometheus-operated + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: prometheus + labels: + app.kubernetes.io/instance: kube-prometheus-stack-prometheus + app.kubernetes.io/managed-by: prometheus-operator + app.kubernetes.io/name: prometheus + app.kubernetes.io/version: 3.11.1 + operator.prometheus.io/name: kube-prometheus-stack-prometheus + operator.prometheus.io/shard: '0' + prometheus: kube-prometheus-stack-prometheus + spec: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - prometheus + - key: app.kubernetes.io/instance + operator: In + values: + - kube-prometheus-stack-prometheus + topologyKey: kubernetes.io/hostname + weight: 100 + automountServiceAccountToken: true + containers: + - args: + - --config.file=/etc/prometheus/config_out/prometheus.env.yaml + - --web.enable-lifecycle + - --web.external-url=http://kube-prometheus-stack-prometheus.monitoring:9090 + - --web.route-prefix=/ + - --storage.tsdb.retention.time=30d + - --storage.tsdb.path=/prometheus + - --storage.tsdb.wal-compression + - --web.config.file=/etc/prometheus/web_config/web-config.yaml + image: quay.io/prometheus/prometheus:v3.11.1 + imagePullPolicy: IfNotPresent + livenessProbe: + failureThreshold: 6 + httpGet: + path: /-/healthy + port: http-web + scheme: HTTP + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 3 + name: prometheus + ports: + - containerPort: 9090 + name: http-web + protocol: TCP + readinessProbe: + failureThreshold: 3 + httpGet: + path: /-/ready + port: http-web + scheme: HTTP + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 3 + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + startupProbe: + failureThreshold: 60 + httpGet: + path: /-/ready + port: http-web + scheme: HTTP + periodSeconds: 15 + successThreshold: 1 + timeoutSeconds: 3 + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/prometheus/config_out + name: config-out + readOnly: true + - mountPath: /etc/prometheus/certs + name: tls-assets + readOnly: true + - mountPath: /prometheus + name: prometheus-kube-prometheus-stack-prometheus-db + subPath: prometheus-db + - mountPath: /etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0 + name: prometheus-kube-prometheus-stack-prometheus-rulefiles-0 + readOnly: true + - mountPath: /etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-1 + name: prometheus-kube-prometheus-stack-prometheus-rulefiles-1 + readOnly: true + - mountPath: /etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-2 + name: prometheus-kube-prometheus-stack-prometheus-rulefiles-2 + readOnly: true + - mountPath: /etc/prometheus/web_config/web-config.yaml + name: web-config + readOnly: true + subPath: web-config.yaml + - args: + - --listen-address=:8080 + - --reload-url=http://127.0.0.1:9090/-/reload + - --config-file=/etc/prometheus/config/prometheus.yaml.gz + - --config-envsubst-file=/etc/prometheus/config_out/prometheus.env.yaml + - --watched-dir=/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0 + - --watched-dir=/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-1 + - --watched-dir=/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-2 + command: + - /bin/prometheus-config-reloader + env: + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: SHARD + value: '0' + image: quay.io/prometheus-operator/prometheus-config-reloader:v0.90.1 + imagePullPolicy: IfNotPresent + name: config-reloader + ports: + - containerPort: 8080 + name: reloader-web + protocol: TCP + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/prometheus/config + name: config + - mountPath: /etc/prometheus/config_out + name: config-out + - mountPath: /etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0 + name: prometheus-kube-prometheus-stack-prometheus-rulefiles-0 + - mountPath: /etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-1 + name: prometheus-kube-prometheus-stack-prometheus-rulefiles-1 + - mountPath: /etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-2 + name: prometheus-kube-prometheus-stack-prometheus-rulefiles-2 + dnsPolicy: ClusterFirst + initContainers: + - args: + - --watch-interval=0 + - --listen-address=:8081 + - --config-file=/etc/prometheus/config/prometheus.yaml.gz + - --config-envsubst-file=/etc/prometheus/config_out/prometheus.env.yaml + - --watched-dir=/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0 + - --watched-dir=/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-1 + - --watched-dir=/etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-2 + command: + - /bin/prometheus-config-reloader + env: + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: SHARD + value: '0' + image: quay.io/prometheus-operator/prometheus-config-reloader:v0.90.1 + imagePullPolicy: IfNotPresent + name: init-config-reloader + ports: + - containerPort: 8081 + name: reloader-init + protocol: TCP + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /etc/prometheus/config + name: config + - mountPath: /etc/prometheus/config_out + name: config-out + - mountPath: /etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-0 + name: prometheus-kube-prometheus-stack-prometheus-rulefiles-0 + - mountPath: /etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-1 + name: prometheus-kube-prometheus-stack-prometheus-rulefiles-1 + - mountPath: /etc/prometheus/rules/prometheus-kube-prometheus-stack-prometheus-rulefiles-2 + name: prometheus-kube-prometheus-stack-prometheus-rulefiles-2 + restartPolicy: Always + schedulerName: default-scheduler + securityContext: + fsGroup: 2000 + runAsGroup: 2000 + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + serviceAccount: kube-prometheus-stack-prometheus + serviceAccountName: kube-prometheus-stack-prometheus + shareProcessNamespace: false + terminationGracePeriodSeconds: 600 + volumes: + - name: config + secret: + defaultMode: 420 + secretName: prometheus-kube-prometheus-stack-prometheus + - name: tls-assets + projected: + defaultMode: 420 + sources: + - secret: + name: prometheus-kube-prometheus-stack-prometheus-tls-assets-0 + - emptyDir: + medium: Memory + name: config-out + - configMap: + defaultMode: 420 + name: prometheus-kube-prometheus-stack-prometheus-rulefiles-0 + optional: true + name: prometheus-kube-prometheus-stack-prometheus-rulefiles-0 + - configMap: + defaultMode: 420 + name: prometheus-kube-prometheus-stack-prometheus-rulefiles-1 + optional: true + name: prometheus-kube-prometheus-stack-prometheus-rulefiles-1 + - configMap: + defaultMode: 420 + name: prometheus-kube-prometheus-stack-prometheus-rulefiles-2 + optional: true + name: prometheus-kube-prometheus-stack-prometheus-rulefiles-2 + - name: web-config + secret: + defaultMode: 420 + secretName: prometheus-kube-prometheus-stack-prometheus-web-config + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: prometheus-kube-prometheus-stack-prometheus-db + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi + storageClassName: local-path + volumeMode: Filesystem + status: + phase: Pending + diff --git a/n8n/deployment-n8n.yaml b/n8n/deployment-n8n.yaml new file mode 100644 index 0000000..5b97a31 --- /dev/null +++ b/n8n/deployment-n8n.yaml @@ -0,0 +1,84 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '2' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"name":"n8n","namespace":"n8n"},"spec":{"replicas":1,"selector":{"matchLabels":{"app":"n8n"}},"template":{"metadata":{"labels":{"app":"n8n"}},"spec":{"containers":[{"env":[{"name":"N8N_ENCRYPTION_KEY","valueFrom":{"secretKeyRef":{"key":"encryption-key","name":"n8n-secret"}}},{"name":"N8N_HOST","value":"n8n.chemavx.xyz"},{"name":"N8N_PORT","value":"5678"},{"name":"N8N_PROTOCOL","value":"https"},{"name":"WEBHOOK_URL","value":"https://n8n.chemavx.xyz/"},{"name":"N8N_USER_FOLDER","value":"/home/node/.n8n"},{"name":"NODE_FUNCTION_ALLOW_EXTERNAL","value":"*"},{"name":"GENERIC_TIMEZONE","value":"Europe/Madrid"},{"name":"DB_TYPE","value":"sqlite"},{"name":"DB_SQLITE_DATABASE","value":"/home/node/.n8n/database.sqlite"}],"image":"n8nio/n8n:latest","name":"n8n","ports":[{"containerPort":5678}],"resources":{"limits":{"cpu":"500m","memory":"1Gi"},"requests":{"cpu":"100m","memory":"256Mi"}},"volumeMounts":[{"mountPath":"/home/node/.n8n","name":"n8n-data"}]}],"securityContext":{"fsGroup":1000,"runAsUser":1000},"volumes":[{"name":"n8n-data","persistentVolumeClaim":{"claimName":"n8n-pvc"}}]}}}} + + ' + name: n8n + namespace: n8n +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: n8n + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + annotations: + kubectl.kubernetes.io/restartedAt: '2026-04-09T17:05:44Z' + labels: + app: n8n + spec: + containers: + - env: + - name: N8N_ENCRYPTION_KEY + valueFrom: + secretKeyRef: + key: encryption-key + name: n8n-secret + - name: N8N_HOST + value: n8n.chemavx.xyz + - name: N8N_PORT + value: '5678' + - name: N8N_PROTOCOL + value: https + - name: WEBHOOK_URL + value: https://n8n.chemavx.xyz/ + - name: N8N_USER_FOLDER + value: /home/node/.n8n + - name: NODE_FUNCTION_ALLOW_EXTERNAL + value: '*' + - name: GENERIC_TIMEZONE + value: Europe/Madrid + - name: DB_TYPE + value: sqlite + - name: DB_SQLITE_DATABASE + value: /home/node/.n8n/database.sqlite + image: n8nio/n8n:latest + imagePullPolicy: Always + name: n8n + ports: + - containerPort: 5678 + protocol: TCP + resources: + limits: + cpu: 500m + memory: 1Gi + requests: + cpu: 100m + memory: 256Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /home/node/.n8n + name: n8n-data + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: + fsGroup: 1000 + runAsUser: 1000 + terminationGracePeriodSeconds: 30 + volumes: + - name: n8n-data + persistentVolumeClaim: + claimName: n8n-pvc + diff --git a/n8n/ingress-n8n.yaml b/n8n/ingress-n8n.yaml new file mode 100644 index 0000000..d6d8cff --- /dev/null +++ b/n8n/ingress-n8n.yaml @@ -0,0 +1,29 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"networking.k8s.io/v1","kind":"Ingress","metadata":{"annotations":{"cert-manager.io/cluster-issuer":"letsencrypt-prod","traefik.ingress.kubernetes.io/router.entrypoints":"websecure"},"name":"n8n","namespace":"n8n"},"spec":{"ingressClassName":"traefik","rules":[{"host":"n8n.chemavx.xyz","http":{"paths":[{"backend":{"service":{"name":"n8n","port":{"number":5678}}},"path":"/","pathType":"Prefix"}]}}],"tls":[{"hosts":["n8n.chemavx.xyz"],"secretName":"n8n-tls"}]}} + + ' + traefik.ingress.kubernetes.io/router.entrypoints: websecure + name: n8n + namespace: n8n +spec: + ingressClassName: traefik + rules: + - host: n8n.chemavx.xyz + http: + paths: + - backend: + service: + name: n8n + port: + number: 5678 + path: / + pathType: Prefix + tls: + - hosts: + - n8n.chemavx.xyz + secretName: n8n-tls + diff --git a/n8n/pvc-n8n-pvc.yaml b/n8n/pvc-n8n-pvc.yaml new file mode 100644 index 0000000..44f67e0 --- /dev/null +++ b/n8n/pvc-n8n-pvc.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"PersistentVolumeClaim","metadata":{"annotations":{},"name":"n8n-pvc","namespace":"n8n"},"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"10Gi"}},"storageClassName":"local-path","volumeName":"n8n-pv"}} + + ' + pv.kubernetes.io/bind-completed: 'yes' + name: n8n-pvc + namespace: n8n +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: local-path + volumeMode: Filesystem + volumeName: n8n-pv + diff --git a/n8n/secret-n8n-secret.yaml b/n8n/secret-n8n-secret.yaml new file mode 100644 index 0000000..408c286 --- /dev/null +++ b/n8n/secret-n8n-secret.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +data: + encryption-key: REDACTED +kind: Secret +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Secret","metadata":{"annotations":{},"name":"n8n-secret","namespace":"n8n"},"stringData":{"encryption-key":"Oc6e4mfCNU69FhJBU2blco3rz1doRyYl"},"type":"Opaque"} + + ' + name: n8n-secret + namespace: n8n +type: Opaque + diff --git a/n8n/secret-n8n-tls.yaml b/n8n/secret-n8n-tls.yaml new file mode 100644 index 0000000..737a7c5 --- /dev/null +++ b/n8n/secret-n8n-tls.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + tls.crt: REDACTED + tls.key: REDACTED +kind: Secret +metadata: + annotations: + cert-manager.io/alt-names: n8n.chemavx.xyz + cert-manager.io/certificate-name: n8n-tls + cert-manager.io/common-name: n8n.chemavx.xyz + cert-manager.io/ip-sans: '' + cert-manager.io/issuer-group: cert-manager.io + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod + cert-manager.io/uri-sans: '' + labels: + controller.cert-manager.io/fao: 'true' + name: n8n-tls + namespace: n8n +type: kubernetes.io/tls + diff --git a/n8n/service-n8n.yaml b/n8n/service-n8n.yaml new file mode 100644 index 0000000..4526cc4 --- /dev/null +++ b/n8n/service-n8n.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: n8n + namespace: n8n +spec: + clusterIP: 10.43.176.217 + clusterIPs: + - 10.43.176.217 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - port: 5678 + protocol: TCP + targetPort: 5678 + selector: + app: n8n + sessionAffinity: None + type: ClusterIP + diff --git a/openclaw/deployment-openclaw.yaml b/openclaw/deployment-openclaw.yaml new file mode 100644 index 0000000..fd62988 --- /dev/null +++ b/openclaw/deployment-openclaw.yaml @@ -0,0 +1,72 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '5' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"name":"openclaw","namespace":"openclaw"},"spec":{"replicas":1,"selector":{"matchLabels":{"app":"openclaw"}},"template":{"metadata":{"labels":{"app":"openclaw"}},"spec":{"containers":[{"env":[{"name":"OPENCLAW_DATA_DIR","value":"/data"}],"image":"ghcr.io/openclaw/openclaw:2026.3.2","name":"openclaw","ports":[{"containerPort":18789}],"resources":{"limits":{"cpu":"500m","memory":"1Gi"},"requests":{"cpu":"100m","memory":"256Mi"}},"volumeMounts":[{"mountPath":"/data","name":"data"}]}],"securityContext":{"fsGroup":1000,"runAsUser":1000},"volumes":[{"name":"data","persistentVolumeClaim":{"claimName":"openclaw-pvc"}}]}}}} + + ' + name: openclaw + namespace: openclaw +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: openclaw + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + annotations: + kubectl.kubernetes.io/restartedAt: '2026-04-09T18:59:08Z' + labels: + app: openclaw + spec: + containers: + - env: + - name: OPENCLAW_DATA_DIR + value: /data + - name: NODE_OPTIONS + value: --max-old-space-size=1536 + - name: OPENCLAW_STATE_DIR + value: /data + - name: OPENCLAW_TOKEN + valueFrom: + secretKeyRef: + key: OPENCLAW_TOKEN + name: openclaw-token + image: ghcr.io/openclaw/openclaw:2026.3.2 + imagePullPolicy: IfNotPresent + name: openclaw + ports: + - containerPort: 18789 + protocol: TCP + resources: + limits: + cpu: '1' + memory: 2Gi + requests: + cpu: 100m + memory: 512Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /data + name: data + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: + fsGroup: 1000 + runAsUser: 1000 + terminationGracePeriodSeconds: 30 + volumes: + - name: data + persistentVolumeClaim: + claimName: openclaw-pvc + diff --git a/openclaw/ingress-openclaw.yaml b/openclaw/ingress-openclaw.yaml new file mode 100644 index 0000000..d6011b5 --- /dev/null +++ b/openclaw/ingress-openclaw.yaml @@ -0,0 +1,29 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"networking.k8s.io/v1","kind":"Ingress","metadata":{"annotations":{"cert-manager.io/cluster-issuer":"letsencrypt-prod","traefik.ingress.kubernetes.io/router.entrypoints":"websecure"},"name":"openclaw","namespace":"openclaw"},"spec":{"ingressClassName":"traefik","rules":[{"host":"openclaw.chemavx.xyz","http":{"paths":[{"backend":{"service":{"name":"openclaw","port":{"number":18789}}},"path":"/","pathType":"Prefix"}]}}],"tls":[{"hosts":["openclaw.chemavx.xyz"],"secretName":"openclaw-tls"}]}} + + ' + traefik.ingress.kubernetes.io/router.entrypoints: websecure + name: openclaw + namespace: openclaw +spec: + ingressClassName: traefik + rules: + - host: openclaw.chemavx.xyz + http: + paths: + - backend: + service: + name: openclaw + port: + number: 18789 + path: / + pathType: Prefix + tls: + - hosts: + - openclaw.chemavx.xyz + secretName: openclaw-tls + diff --git a/openclaw/pvc-openclaw-pvc.yaml b/openclaw/pvc-openclaw-pvc.yaml new file mode 100644 index 0000000..3834d5c --- /dev/null +++ b/openclaw/pvc-openclaw-pvc.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"PersistentVolumeClaim","metadata":{"annotations":{},"name":"openclaw-pvc","namespace":"openclaw"},"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"20Gi"}},"storageClassName":"local-path","volumeName":"openclaw-pv"}} + + ' + pv.kubernetes.io/bind-completed: 'yes' + name: openclaw-pvc + namespace: openclaw +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi + storageClassName: local-path + volumeMode: Filesystem + volumeName: openclaw-pv + diff --git a/openclaw/secret-openclaw-tls.yaml b/openclaw/secret-openclaw-tls.yaml new file mode 100644 index 0000000..acb1ff6 --- /dev/null +++ b/openclaw/secret-openclaw-tls.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + tls.crt: REDACTED + tls.key: REDACTED +kind: Secret +metadata: + annotations: + cert-manager.io/alt-names: openclaw.chemavx.xyz + cert-manager.io/certificate-name: openclaw-tls + cert-manager.io/common-name: openclaw.chemavx.xyz + cert-manager.io/ip-sans: '' + cert-manager.io/issuer-group: cert-manager.io + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod + cert-manager.io/uri-sans: '' + labels: + controller.cert-manager.io/fao: 'true' + name: openclaw-tls + namespace: openclaw +type: kubernetes.io/tls + diff --git a/openclaw/secret-openclaw-token.yaml b/openclaw/secret-openclaw-token.yaml new file mode 100644 index 0000000..48f666b --- /dev/null +++ b/openclaw/secret-openclaw-token.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +data: + OPENCLAW_TOKEN: REDACTED +kind: Secret +metadata: + name: openclaw-token + namespace: openclaw +type: Opaque + diff --git a/openclaw/service-openclaw.yaml b/openclaw/service-openclaw.yaml new file mode 100644 index 0000000..6cdafd0 --- /dev/null +++ b/openclaw/service-openclaw.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: openclaw + namespace: openclaw +spec: + clusterIP: 10.43.34.126 + clusterIPs: + - 10.43.34.126 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - port: 18789 + protocol: TCP + targetPort: 18789 + selector: + app: openclaw + sessionAffinity: None + type: ClusterIP + diff --git a/polymarket-bot/deployment-api.yaml b/polymarket-bot/deployment-api.yaml new file mode 100644 index 0000000..7e71638 --- /dev/null +++ b/polymarket-bot/deployment-api.yaml @@ -0,0 +1,59 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '1' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"name":"api","namespace":"polymarket-bot"},"spec":{"replicas":1,"selector":{"matchLabels":{"app":"api"}},"template":{"metadata":{"labels":{"app":"api"}},"spec":{"containers":[{"command":["uvicorn","api.main:app","--host","0.0.0.0","--port","8000"],"envFrom":[{"secretRef":{"name":"bot-secrets"}}],"image":"docker.io/library/polymarket-bot-api:v2","imagePullPolicy":"Never","name":"api","ports":[{"containerPort":8000}],"resources":{"limits":{"cpu":"500m","memory":"512Mi"},"requests":{"cpu":"100m","memory":"256Mi"}}}]}}}} + + ' + name: api + namespace: polymarket-bot +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: api + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + labels: + app: api + spec: + containers: + - command: + - uvicorn + - api.main:app + - --host + - 0.0.0.0 + - --port + - '8000' + envFrom: + - secretRef: + name: bot-secrets + image: docker.io/library/polymarket-bot-api:v2 + imagePullPolicy: Never + name: api + ports: + - containerPort: 8000 + protocol: TCP + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 100m + memory: 256Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + diff --git a/polymarket-bot/deployment-bot.yaml b/polymarket-bot/deployment-bot.yaml new file mode 100644 index 0000000..e5bb679 --- /dev/null +++ b/polymarket-bot/deployment-bot.yaml @@ -0,0 +1,53 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '1' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"name":"bot","namespace":"polymarket-bot"},"spec":{"replicas":1,"selector":{"matchLabels":{"app":"bot"}},"template":{"metadata":{"labels":{"app":"bot"}},"spec":{"containers":[{"command":["python3","-m","bot.main"],"envFrom":[{"secretRef":{"name":"bot-secrets"}}],"image":"docker.io/library/polymarket-bot:v5","imagePullPolicy":"Never","name":"bot","resources":{"limits":{"cpu":"500m","memory":"512Mi"},"requests":{"cpu":"100m","memory":"256Mi"}}}]}}}} + + ' + name: bot + namespace: polymarket-bot +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: bot + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + labels: + app: bot + spec: + containers: + - command: + - python3 + - -m + - bot.main + envFrom: + - secretRef: + name: bot-secrets + image: docker.io/library/polymarket-bot:v5 + imagePullPolicy: Never + name: bot + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 100m + memory: 256Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + diff --git a/polymarket-bot/deployment-dashboard.yaml b/polymarket-bot/deployment-dashboard.yaml new file mode 100644 index 0000000..e1c1e50 --- /dev/null +++ b/polymarket-bot/deployment-dashboard.yaml @@ -0,0 +1,49 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '1' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"name":"dashboard","namespace":"polymarket-bot"},"spec":{"replicas":1,"selector":{"matchLabels":{"app":"dashboard"}},"template":{"metadata":{"labels":{"app":"dashboard"}},"spec":{"containers":[{"image":"docker.io/library/polymarket-bot-dashboard:v3","imagePullPolicy":"Never","name":"dashboard","ports":[{"containerPort":80}],"resources":{"limits":{"cpu":"200m","memory":"128Mi"},"requests":{"cpu":"50m","memory":"64Mi"}}}]}}}} + + ' + name: dashboard + namespace: polymarket-bot +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: dashboard + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + labels: + app: dashboard + spec: + containers: + - image: docker.io/library/polymarket-bot-dashboard:v3 + imagePullPolicy: Never + name: dashboard + ports: + - containerPort: 80 + protocol: TCP + resources: + limits: + cpu: 200m + memory: 128Mi + requests: + cpu: 50m + memory: 64Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + diff --git a/polymarket-bot/ingress-polymarket-bot.yaml b/polymarket-bot/ingress-polymarket-bot.yaml new file mode 100644 index 0000000..4ff16db --- /dev/null +++ b/polymarket-bot/ingress-polymarket-bot.yaml @@ -0,0 +1,37 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"networking.k8s.io/v1","kind":"Ingress","metadata":{"annotations":{"cert-manager.io/cluster-issuer":"letsencrypt-prod","traefik.ingress.kubernetes.io/router.entrypoints":"websecure","traefik.ingress.kubernetes.io/router.tls":"true"},"name":"polymarket-bot","namespace":"polymarket-bot"},"spec":{"ingressClassName":"traefik","rules":[{"host":"polymarket.chemavx.xyz","http":{"paths":[{"backend":{"service":{"name":"api","port":{"number":8000}}},"path":"/api","pathType":"Prefix"},{"backend":{"service":{"name":"dashboard","port":{"number":80}}},"path":"/","pathType":"Prefix"}]}}],"tls":[{"hosts":["polymarket.chemavx.xyz"],"secretName":"polymarket-tls"}]}} + + ' + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: 'true' + name: polymarket-bot + namespace: polymarket-bot +spec: + ingressClassName: traefik + rules: + - host: polymarket.chemavx.xyz + http: + paths: + - backend: + service: + name: api + port: + number: 8000 + path: /api + pathType: Prefix + - backend: + service: + name: dashboard + port: + number: 80 + path: / + pathType: Prefix + tls: + - hosts: + - polymarket.chemavx.xyz + secretName: polymarket-tls + diff --git a/polymarket-bot/pvc-data-postgres-0.yaml b/polymarket-bot/pvc-data-postgres-0.yaml new file mode 100644 index 0000000..62b1975 --- /dev/null +++ b/polymarket-bot/pvc-data-postgres-0.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + annotations: + pv.kubernetes.io/bind-completed: 'yes' + pv.kubernetes.io/bound-by-controller: 'yes' + volume.beta.kubernetes.io/storage-provisioner: rancher.io/local-path + volume.kubernetes.io/selected-node: chemavx-k8 + volume.kubernetes.io/storage-provisioner: rancher.io/local-path + labels: + app: postgres + name: data-postgres-0 + namespace: polymarket-bot +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + storageClassName: local-path + volumeMode: Filesystem + volumeName: pvc-327bbc6d-dcc6-46b4-9d78-db950f71eb98 + diff --git a/polymarket-bot/secret-bot-secrets.yaml b/polymarket-bot/secret-bot-secrets.yaml new file mode 100644 index 0000000..586cc51 --- /dev/null +++ b/polymarket-bot/secret-bot-secrets.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +data: + COINGECKO_API_KEY: REDACTED + DATABASE_URL: REDACTED + PAPER_BANKROLL: REDACTED + PAPER_MODE: REDACTED + POLYMARKET_API_KEY: REDACTED + POLYMARKET_PASSPHRASE: REDACTED + POLYMARKET_SECRET: REDACTED + TELEGRAM_BOT_TOKEN: REDACTED + TELEGRAM_CHAT_ID: REDACTED + WALLET_PRIVATE_KEY: REDACTED +kind: Secret +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Secret","metadata":{"annotations":{},"name":"bot-secrets","namespace":"polymarket-bot"},"stringData":{"COINGECKO_API_KEY":"","DATABASE_URL":"postgresql://bot:bot@postgres:5432/polymarket","PAPER_BANKROLL":"10000","PAPER_MODE":"true","POLYMARKET_API_KEY":"b07767c6-5fa1-94a3-a575-0f49beed86c0","POLYMARKET_PASSPHRASE":"715160758af519b097a2ca2f847cadb80dbcc970c1cf52196c57d2678b3c29b0","POLYMARKET_SECRET":"sTjA4xHJFJ-gI0w3xNQCl6zbS4u430kA_AkVDrSoTfg=","TELEGRAM_BOT_TOKEN":"","TELEGRAM_CHAT_ID":"","WALLET_PRIVATE_KEY":""},"type":"Opaque"} + + ' + name: bot-secrets + namespace: polymarket-bot +type: Opaque + diff --git a/polymarket-bot/secret-polymarket-tls.yaml b/polymarket-bot/secret-polymarket-tls.yaml new file mode 100644 index 0000000..ae8b928 --- /dev/null +++ b/polymarket-bot/secret-polymarket-tls.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + tls.crt: REDACTED + tls.key: REDACTED +kind: Secret +metadata: + annotations: + cert-manager.io/alt-names: polymarket.chemavx.xyz + cert-manager.io/certificate-name: polymarket-tls + cert-manager.io/common-name: polymarket.chemavx.xyz + cert-manager.io/ip-sans: '' + cert-manager.io/issuer-group: cert-manager.io + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod + cert-manager.io/uri-sans: '' + labels: + controller.cert-manager.io/fao: 'true' + name: polymarket-tls + namespace: polymarket-bot +type: kubernetes.io/tls + diff --git a/polymarket-bot/service-api.yaml b/polymarket-bot/service-api.yaml new file mode 100644 index 0000000..247db59 --- /dev/null +++ b/polymarket-bot/service-api.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: api + namespace: polymarket-bot +spec: + clusterIP: 10.43.201.4 + clusterIPs: + - 10.43.201.4 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - port: 8000 + protocol: TCP + targetPort: 8000 + selector: + app: api + sessionAffinity: None + type: ClusterIP + diff --git a/polymarket-bot/service-dashboard.yaml b/polymarket-bot/service-dashboard.yaml new file mode 100644 index 0000000..6ac31ef --- /dev/null +++ b/polymarket-bot/service-dashboard.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: dashboard + namespace: polymarket-bot +spec: + clusterIP: 10.43.62.6 + clusterIPs: + - 10.43.62.6 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - port: 80 + protocol: TCP + targetPort: 80 + selector: + app: dashboard + sessionAffinity: None + type: ClusterIP + diff --git a/polymarket-bot/service-postgres.yaml b/polymarket-bot/service-postgres.yaml new file mode 100644 index 0000000..236d107 --- /dev/null +++ b/polymarket-bot/service-postgres.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + name: postgres + namespace: polymarket-bot +spec: + clusterIP: 10.43.238.53 + clusterIPs: + - 10.43.238.53 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - port: 5432 + protocol: TCP + targetPort: 5432 + selector: + app: postgres + sessionAffinity: None + type: ClusterIP + diff --git a/polymarket-bot/statefulset-postgres.yaml b/polymarket-bot/statefulset-postgres.yaml new file mode 100644 index 0000000..7563daa --- /dev/null +++ b/polymarket-bot/statefulset-postgres.yaml @@ -0,0 +1,76 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: postgres + namespace: polymarket-bot +spec: + persistentVolumeClaimRetentionPolicy: + whenDeleted: Retain + whenScaled: Retain + podManagementPolicy: OrderedReady + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: postgres + serviceName: postgres + template: + metadata: + labels: + app: postgres + spec: + containers: + - env: + - name: POSTGRES_USER + value: bot + - name: POSTGRES_PASSWORD + value: bot + - name: POSTGRES_DB + value: polymarket + image: postgres:16-alpine + imagePullPolicy: IfNotPresent + name: postgres + ports: + - containerPort: 5432 + protocol: TCP + readinessProbe: + exec: + command: + - pg_isready + - -U + - bot + failureThreshold: 3 + initialDelaySeconds: 5 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 1 + resources: {} + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /var/lib/postgresql/data + name: data + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + updateStrategy: + rollingUpdate: + partition: 0 + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + volumeMode: Filesystem + status: + phase: Pending + diff --git a/vaultwarden/deployment-vaultwarden.yaml b/vaultwarden/deployment-vaultwarden.yaml new file mode 100644 index 0000000..eb92d61 --- /dev/null +++ b/vaultwarden/deployment-vaultwarden.yaml @@ -0,0 +1,64 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: '2' + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"name":"vaultwarden","namespace":"vaultwarden"},"spec":{"replicas":1,"selector":{"matchLabels":{"app":"vaultwarden"}},"template":{"metadata":{"labels":{"app":"vaultwarden"}},"spec":{"containers":[{"env":[{"name":"LOG_LEVEL","value":"warn"},{"name":"WEBSOCKET_ENABLED","value":"true"}],"envFrom":[{"secretRef":{"name":"vaultwarden-secret"}}],"image":"vaultwarden/server:latest","name":"vaultwarden","ports":[{"containerPort":80}],"resources":{"limits":{"cpu":"200m","memory":"256Mi"},"requests":{"cpu":"25m","memory":"64Mi"}},"volumeMounts":[{"mountPath":"/data","name":"data"}]}],"volumes":[{"name":"data","persistentVolumeClaim":{"claimName":"vaultwarden-pvc"}}]}}}} + + ' + name: vaultwarden + namespace: vaultwarden +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: vaultwarden + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + labels: + app: vaultwarden + spec: + containers: + - env: + - name: LOG_LEVEL + value: warn + - name: WEBSOCKET_ENABLED + value: 'true' + envFrom: + - secretRef: + name: vaultwarden-secret + image: vaultwarden/server:latest + imagePullPolicy: Always + name: vaultwarden + ports: + - containerPort: 80 + protocol: TCP + resources: + limits: + cpu: 200m + memory: 256Mi + requests: + cpu: 25m + memory: 64Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /data + name: data + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + volumes: + - name: data + persistentVolumeClaim: + claimName: vaultwarden-pvc + diff --git a/vaultwarden/ingress-vaultwarden.yaml b/vaultwarden/ingress-vaultwarden.yaml new file mode 100644 index 0000000..e295956 --- /dev/null +++ b/vaultwarden/ingress-vaultwarden.yaml @@ -0,0 +1,29 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"networking.k8s.io/v1","kind":"Ingress","metadata":{"annotations":{"cert-manager.io/cluster-issuer":"letsencrypt-prod","traefik.ingress.kubernetes.io/router.entrypoints":"websecure"},"name":"vaultwarden","namespace":"vaultwarden"},"spec":{"ingressClassName":"traefik","rules":[{"host":"vaultwarden.chemavx.xyz","http":{"paths":[{"backend":{"service":{"name":"vaultwarden","port":{"number":80}}},"path":"/","pathType":"Prefix"}]}}],"tls":[{"hosts":["vaultwarden.chemavx.xyz"],"secretName":"vaultwarden-tls"}]}} + + ' + traefik.ingress.kubernetes.io/router.entrypoints: websecure + name: vaultwarden + namespace: vaultwarden +spec: + ingressClassName: traefik + rules: + - host: vaultwarden.chemavx.xyz + http: + paths: + - backend: + service: + name: vaultwarden + port: + number: 80 + path: / + pathType: Prefix + tls: + - hosts: + - vaultwarden.chemavx.xyz + secretName: vaultwarden-tls + diff --git a/vaultwarden/pvc-vaultwarden-pvc.yaml b/vaultwarden/pvc-vaultwarden-pvc.yaml new file mode 100644 index 0000000..68fceb5 --- /dev/null +++ b/vaultwarden/pvc-vaultwarden-pvc.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"PersistentVolumeClaim","metadata":{"annotations":{},"name":"vaultwarden-pvc","namespace":"vaultwarden"},"spec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"5Gi"}},"storageClassName":"local-path"}} + + ' + pv.kubernetes.io/bind-completed: 'yes' + pv.kubernetes.io/bound-by-controller: 'yes' + volume.beta.kubernetes.io/storage-provisioner: rancher.io/local-path + volume.kubernetes.io/selected-node: chemavx-k8 + volume.kubernetes.io/storage-provisioner: rancher.io/local-path + name: vaultwarden-pvc + namespace: vaultwarden +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + storageClassName: local-path + volumeMode: Filesystem + volumeName: pvc-419712d2-1b7c-4931-8fce-6b762b3b57a0 + diff --git a/vaultwarden/secret-vaultwarden-secret.yaml b/vaultwarden/secret-vaultwarden-secret.yaml new file mode 100644 index 0000000..745376f --- /dev/null +++ b/vaultwarden/secret-vaultwarden-secret.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +data: + ADMIN_TOKEN: REDACTED + DOMAIN: REDACTED + SIGNUPS_ALLOWED: REDACTED +kind: Secret +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: '{"apiVersion":"v1","kind":"Secret","metadata":{"annotations":{},"name":"vaultwarden-secret","namespace":"vaultwarden"},"stringData":{"DOMAIN":"https://vaultwarden.chemavx.xyz","SIGNUPS_ALLOWED":"false"},"type":"Opaque"} + + ' + name: vaultwarden-secret + namespace: vaultwarden +type: Opaque + diff --git a/vaultwarden/secret-vaultwarden-tls.yaml b/vaultwarden/secret-vaultwarden-tls.yaml new file mode 100644 index 0000000..2886fd6 --- /dev/null +++ b/vaultwarden/secret-vaultwarden-tls.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +data: + tls.crt: REDACTED + tls.key: REDACTED +kind: Secret +metadata: + annotations: + cert-manager.io/alt-names: vaultwarden.chemavx.xyz + cert-manager.io/certificate-name: vaultwarden-tls + cert-manager.io/common-name: vaultwarden.chemavx.xyz + cert-manager.io/ip-sans: '' + cert-manager.io/issuer-group: cert-manager.io + cert-manager.io/issuer-kind: ClusterIssuer + cert-manager.io/issuer-name: letsencrypt-prod + cert-manager.io/uri-sans: '' + labels: + controller.cert-manager.io/fao: 'true' + name: vaultwarden-tls + namespace: vaultwarden +type: kubernetes.io/tls + diff --git a/vaultwarden/service-vaultwarden.yaml b/vaultwarden/service-vaultwarden.yaml new file mode 100644 index 0000000..3bcdc00 --- /dev/null +++ b/vaultwarden/service-vaultwarden.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Service +metadata: + name: vaultwarden + namespace: vaultwarden +spec: + clusterIP: 10.43.195.22 + clusterIPs: + - 10.43.195.22 + internalTrafficPolicy: Cluster + ipFamilies: + - IPv4 + ipFamilyPolicy: SingleStack + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + selector: + app: vaultwarden + sessionAffinity: None + type: ClusterIP +