User Tools

Site Tools


сервис_victoriametrics

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
сервис_victoriametrics [2025/03/28 12:47]
val [Метрики]
сервис_victoriametrics [2025/03/29 09:32] (current)
val [Уведомления]
Line 56: Line 56:
  
 kube1:~/vm# helm show values vm/​victoria-metrics-alert > vm-alert-values.yaml kube1:~/vm# helm show values vm/​victoria-metrics-alert > vm-alert-values.yaml
 +
 +$ wget -qO - https://​raw.githubusercontent.com/​samber/​awesome-prometheus-alerts/​master/​dist/​rules/​host-and-hardware/​node-exporter.yml | sed '​s/​^/ ​   /'
 +
 +$ wget -qO - https://​raw.githubusercontent.com/​samber/​awesome-prometheus-alerts/​master/​dist/​rules/​kubernetes/​kubestate-exporter.yml | sed '​s/​^/ ​   /'
  
 kube1:~/vm# cat vm-alert-values.yaml kube1:~/vm# cat vm-alert-values.yaml
Line 75: Line 79:
 #      groups: [] #      groups: []
       groups:       groups:
-      - name: node_exporter_alerts+      - name: NodeExporter
 ...    ...   
-      - name: vm_k8s_alerts+      - name: KubestateExporter
         rules:         rules:
-        - alert: ​CriticalCPU +        - alert: ​KubernetesContainerOomKiller 
-          expr: sum by (kubernetes_io_hostname) (rate (container_cpu_usage_seconds_total[1m])) / sum (machine_cpu_cores) * 100 > 40 +          expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1and ignoring ​(reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="​OOMKilled"​}[10m]) == 1 
-          for: 1m+          for: 0m
           labels:           labels:
-            severity: ​"​critical"​+            severity: ​warning
           annotations:​           annotations:​
-            summary: ​"​CriticalCPU ​{{ $labels.instance }}" +            summary: ​Kubernetes Container oom killer (instance ​{{ $labels.instance }}) 
- +            ​description"​Container {{ $labels.container ​}} in pod {{ $labels.namespace ​}}/{{ $labels.pod }} has been OOMKill"
-        - alertCriticalFS +
-          expr: container_fs_usage_bytes{device=~"​^/​dev/​[sv]d[a-z][1-9]$"/ container_fs_limit_bytes * 100 > 80 +
-          for: 1m +
-          labels: +
-            severity: "​critical"​ +
-          annotations:​ +
-            summary: "​CriticalFS ​{{ $labels.instance ​}}+
- +
-        - alert: CriticalMEM +
-          expr: sum by (kubernetes_io_hostname) (container_memory_working_set_bytes) ​sum (machine_memory_bytes) * 100 > 80 +
-          for: 1m +
-          labels: +
-            severity: "​critical"​ +
-          annotations:​ +
-            summary: "​CriticalMEM ​{{ $labels.instance ​}}" +
 ... ...
 alertmanager:​ alertmanager:​
Line 182: Line 170:
  
 Builder->​ Filter: kubernetes.pod_name = my-debian Builder->​ Filter: kubernetes.pod_name = my-debian
 +</​code>​
 +
 +====== Черновик ======
 +
 +<​code>​
 +        - alert: CriticalCPU
 +          expr: sum by (kubernetes_io_hostname) (rate (container_cpu_usage_seconds_total[1m])) / sum (machine_cpu_cores) * 100 > 40
 +          for: 1m
 +          labels:
 +            severity: "​critical"​
 +          annotations:​
 +            summary: "​CriticalCPU {{ $labels.instance }}"
 +
 +        - alert: CriticalFS
 +          expr: container_fs_usage_bytes{device=~"​^/​dev/​[sv]d[a-z][1-9]$"​} / container_fs_limit_bytes * 100 > 80
 +          for: 1m
 +          labels:
 +            severity: "​critical"​
 +          annotations:​
 +            summary: "​CriticalFS {{ $labels.instance }}"
 +
 +        - alert: CriticalMEM
 +          expr: sum by (kubernetes_io_hostname) (container_memory_working_set_bytes) / sum (machine_memory_bytes) * 100 > 80
 +          for: 1m
 +          labels:
 +            severity: "​critical"​
 +          annotations:​
 +            summary: "​CriticalMEM {{ $labels.instance }}"
 </​code>​ </​code>​
сервис_victoriametrics.1743155260.txt.gz · Last modified: 2025/03/28 12:47 by val