User Tools

Site Tools


сервис_victoriametrics

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
сервис_victoriametrics [2025/03/25 13:57]
val [Метрики]
сервис_victoriametrics [2025/03/29 09:32] (current)
val [Уведомления]
Line 9: Line 9:
   * [[https://​docs.victoriametrics.com/​guides/​k8s-monitoring-via-vm-single/​|Kubernetes monitoring via VictoriaMetrics Single]]   * [[https://​docs.victoriametrics.com/​guides/​k8s-monitoring-via-vm-single/​|Kubernetes monitoring via VictoriaMetrics Single]]
  
 +  * [[https://​docs.victoriametrics.com/​scrape_config_examples/​]]
  
 +  * [[Система Kubernetes#​kube-state-metrics]]
 <​code>​ <​code>​
 (venv1) server# ansible all -f 4 -m apt -a '​pkg=prometheus-node-exporter state=present update_cache=true'​ -i /​root/​kubespray/​inventory/​mycluster/​hosts.yaml (venv1) server# ansible all -f 4 -m apt -a '​pkg=prometheus-node-exporter state=present update_cache=true'​ -i /​root/​kubespray/​inventory/​mycluster/​hosts.yaml
Line 19: Line 21:
 </​code><​code>​ </​code><​code>​
 ... ...
 +        - job_name: kube-state-metrics
 +          kubernetes_sd_configs:​
 +          - role: pod
 +          relabel_configs:​
 +          - source_labels:​ [__meta_kubernetes_pod_container_name]
 +            regex: kube-state-metrics
 +            action: keep
 +          - source_labels:​ [__meta_kubernetes_pod_container_port_number]
 +            regex: "​8080"​
 +            action: keep
 +
         - job_name: node-exporter         - job_name: node-exporter
           static_configs:​           static_configs:​
Line 30: Line 43:
 kube1:~/vm# kubectl scale --replicas 1 statefulset vmsingle-victoria-metrics-single-server -n vm kube1:~/vm# kubectl scale --replicas 1 statefulset vmsingle-victoria-metrics-single-server -n vm
  
-cmder> kubectl ​-n vm port-forward svc/​vmsingle-victoria-metrics-single-server 8428 -n vm+cmder> kubectl port-forward svc/​vmsingle-victoria-metrics-single-server 8428 -n vm
 </​code>​ </​code>​
  
Line 37: Line 50:
 ===== Уведомления ===== ===== Уведомления =====
  
-  * [[Сервис Prometheus#​prometheus-alertmanager]]+  * [[Сервис Prometheus#​prometheus-alertmanager]] ​(ссылки на правила)
  
 <​code>​ <​code>​
Line 43: Line 56:
  
 kube1:~/vm# helm show values vm/​victoria-metrics-alert > vm-alert-values.yaml kube1:~/vm# helm show values vm/​victoria-metrics-alert > vm-alert-values.yaml
 +
 +$ wget -qO - https://​raw.githubusercontent.com/​samber/​awesome-prometheus-alerts/​master/​dist/​rules/​host-and-hardware/​node-exporter.yml | sed '​s/​^/ ​   /'
 +
 +$ wget -qO - https://​raw.githubusercontent.com/​samber/​awesome-prometheus-alerts/​master/​dist/​rules/​kubernetes/​kubestate-exporter.yml | sed '​s/​^/ ​   /'
  
 kube1:~/vm# cat vm-alert-values.yaml kube1:~/vm# cat vm-alert-values.yaml
Line 62: Line 79:
 #      groups: [] #      groups: []
       groups:       groups:
-      - name: node_exporter_alerts+      - name: NodeExporter
 ...    ...   
-      - name: vm_k8s_alerts+      - name: KubestateExporter
         rules:         rules:
-        - alert: ​CriticalCPU +        - alert: ​KubernetesContainerOomKiller 
-          expr: sum by (kubernetes_io_hostname) (rate (container_cpu_usage_seconds_total[1m])) / sum (machine_cpu_cores) * 100 > 40 +          expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1and ignoring ​(reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="​OOMKilled"​}[10m]) == 1 
-          for: 1m+          for: 0m
           labels:           labels:
-            severity: ​"​critical"​+            severity: ​warning
           annotations:​           annotations:​
-            summary: ​"​CriticalCPU ​{{ $labels.instance }}" +            summary: ​Kubernetes Container oom killer (instance ​{{ $labels.instance }}) 
- +            ​description"​Container {{ $labels.container ​}} in pod {{ $labels.namespace ​}}/{{ $labels.pod }} has been OOMKill"
-        - alertCriticalFS +
-          expr: container_fs_usage_bytes{device=~"​^/​dev/​[sv]d[a-z][1-9]$"/ container_fs_limit_bytes * 100 > 80 +
-          for: 1m +
-          labels: +
-            severity: "​critical"​ +
-          annotations:​ +
-            summary: "​CriticalFS ​{{ $labels.instance ​}}+
- +
-        - alert: CriticalMEM +
-          expr: sum by (kubernetes_io_hostname) (container_memory_working_set_bytes) ​sum (machine_memory_bytes) * 100 > 80 +
-          for: 1m +
-          labels: +
-            severity: "​critical"​ +
-          annotations:​ +
-            summary: "​CriticalMEM ​{{ $labels.instance ​}}" +
 ... ...
 alertmanager:​ alertmanager:​
Line 155: Line 156:
  
 VMUI-> Log Query: kubernetes.pod_name:​ my-debian VMUI-> Log Query: kubernetes.pod_name:​ my-debian
-</​code><​code>​+</​code>​ 
 + 
 +==== Подключение Grafana ==== 
 + 
 +<​code>​
 Grafana -> Connections -> Grafana -> Connections ->
 Add new connection: VictoriaLogs ->  Add new connection: VictoriaLogs -> 
Line 165: Line 170:
  
 Builder->​ Filter: kubernetes.pod_name = my-debian Builder->​ Filter: kubernetes.pod_name = my-debian
 +</​code>​
 +
 +====== Черновик ======
 +
 +<​code>​
 +        - alert: CriticalCPU
 +          expr: sum by (kubernetes_io_hostname) (rate (container_cpu_usage_seconds_total[1m])) / sum (machine_cpu_cores) * 100 > 40
 +          for: 1m
 +          labels:
 +            severity: "​critical"​
 +          annotations:​
 +            summary: "​CriticalCPU {{ $labels.instance }}"
 +
 +        - alert: CriticalFS
 +          expr: container_fs_usage_bytes{device=~"​^/​dev/​[sv]d[a-z][1-9]$"​} / container_fs_limit_bytes * 100 > 80
 +          for: 1m
 +          labels:
 +            severity: "​critical"​
 +          annotations:​
 +            summary: "​CriticalFS {{ $labels.instance }}"
 +
 +        - alert: CriticalMEM
 +          expr: sum by (kubernetes_io_hostname) (container_memory_working_set_bytes) / sum (machine_memory_bytes) * 100 > 80
 +          for: 1m
 +          labels:
 +            severity: "​critical"​
 +          annotations:​
 +            summary: "​CriticalMEM {{ $labels.instance }}"
 </​code>​ </​code>​
сервис_victoriametrics.1742900274.txt.gz · Last modified: 2025/03/25 13:57 by val