сервис_victoriametrics

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
сервис_victoriametrics [2025/03/25 13:07]
val
сервис_victoriametrics [2025/03/29 09:32] (current)
val [Уведомления]
Line 1: Line 1:
 ====== Сервис VictoriaMetrics ====== ====== Сервис VictoriaMetrics ======
  
-==== Метрики ====+===== Метрики ​=====
  
   * [[https://​docs.victoriametrics.com/​sd_configs/​|vmagent and single-node VictoriaMetrics supports the following Prometheus-compatible service discovery]]   * [[https://​docs.victoriametrics.com/​sd_configs/​|vmagent and single-node VictoriaMetrics supports the following Prometheus-compatible service discovery]]
 +
 +  * [[https://​docs.victoriametrics.com/​guides/​k8s-monitoring-via-vm-cluster/​|Kubernetes monitoring with VictoriaMetrics Cluster]]
  
   * [[https://​docs.victoriametrics.com/​guides/​k8s-monitoring-via-vm-single/​|Kubernetes monitoring via VictoriaMetrics Single]]   * [[https://​docs.victoriametrics.com/​guides/​k8s-monitoring-via-vm-single/​|Kubernetes monitoring via VictoriaMetrics Single]]
  
 +  * [[https://​docs.victoriametrics.com/​scrape_config_examples/​]]
  
 +  * [[Система Kubernetes#​kube-state-metrics]]
 <​code>​ <​code>​
 (venv1) server# ansible all -f 4 -m apt -a '​pkg=prometheus-node-exporter state=present update_cache=true'​ -i /​root/​kubespray/​inventory/​mycluster/​hosts.yaml (venv1) server# ansible all -f 4 -m apt -a '​pkg=prometheus-node-exporter state=present update_cache=true'​ -i /​root/​kubespray/​inventory/​mycluster/​hosts.yaml
Line 17: Line 21:
 </​code><​code>​ </​code><​code>​
 ... ...
 +        - job_name: kube-state-metrics
 +          kubernetes_sd_configs:​
 +          - role: pod
 +          relabel_configs:​
 +          - source_labels:​ [__meta_kubernetes_pod_container_name]
 +            regex: kube-state-metrics
 +            action: keep
 +          - source_labels:​ [__meta_kubernetes_pod_container_port_number]
 +            regex: "​8080"​
 +            action: keep
 +
         - job_name: node-exporter         - job_name: node-exporter
           static_configs:​           static_configs:​
Line 26: Line 41:
 kube1:~/vm# helm upgrade -i vmsingle vm/​victoria-metrics-single -f guide-vmsingle-values.yaml -n vm --create-namespace kube1:~/vm# helm upgrade -i vmsingle vm/​victoria-metrics-single -f guide-vmsingle-values.yaml -n vm --create-namespace
  
-kube1:~/vm# kubectl scale --replicas 1 statefulset vmsingle-victoria-metrics-single-server+kube1:~/vm# kubectl scale --replicas 1 statefulset vmsingle-victoria-metrics-single-server ​-n vm
  
-cmder> kubectl ​-n vm port-forward svc/​vmsingle-victoria-metrics-single-server 8428+cmder> kubectl port-forward svc/​vmsingle-victoria-metrics-single-server 8428 -n vm 
 +</​code>​
  
-kube1helm repo add grafana https://​grafana.github.io/​helm-charts +  * Сервис Grafana в [[Сервис Grafana#Kubernetes]]
-kube1# helm repo update+
  
-kube1:~/vm# cat my-grafana-values.yaml +===== Уведомления ​=====
-</​code><​code>​ +
-  datasources:​ +
-    datasources.yaml:​ +
-      apiVersion: 1 +
-      datasources:​ +
-        - name: victoriametrics +
-          type: prometheus +
-          orgId: 1 +
-          url: http://​vmsingle-victoria-metrics-single-server:​8428 +
-          access: proxy +
-          isDefault: true +
-          updateIntervalSeconds:​ 10 +
-          editable: true +
- +
-  dashboardProviders:​ +
-   ​dashboardproviders.yaml:​ +
-     ​apiVersion:​ 1 +
-     ​providers:​ +
-     - name: '​default'​ +
-       ​orgId:​ 1 +
-       ​folder:​ ''​ +
-       type: file +
-       ​disableDeletion:​ true +
-       ​editable:​ true +
-       ​options:​ +
-         path: /​var/​lib/​grafana/​dashboards/​default +
- +
-  dashboards:​ +
-    default: +
-      victoriametrics:​ +
-        gnetId: 10229 +
-        revision: 22 +
-        datasource: victoriametrics +
-      kubernetes:​ +
-        gnetId: 14205 +
-        revision: 1 +
-        datasource: victoriametrics +
-      node-exporter:​ +
-        gnetId: 1860 +
-        revision: 37 +
-        datasource: victoriametrics +
-</​code><​code>​ +
-kube1:~/vm# helm upgrade -i my-grafana grafana/​grafana -f my-grafana-values.yaml -n vm --create-namespace +
- +
-kube1# kubectl get secret --namespace vm my-grafana -o jsonpath="​{.data.admin-password}"​ | base64 --decode ; echo +
- +
-cmder> kubectl -n vm port-forward svc/​vmsingle-victoria-metrics-single-server 3000:80 +
-</​code>​ +
- +
-  * [[https://​docs.victoriametrics.com/​guides/​k8s-monitoring-via-vm-cluster/​|Kubernetes monitoring with VictoriaMetrics Cluster]] +
-==== Уведомления ====+
  
-  * [[Сервис Prometheus#​prometheus-alertmanager]]+  * [[Сервис Prometheus#​prometheus-alertmanager]] ​(ссылки на правила)
  
 <​code>​ <​code>​
Line 92: Line 56:
  
 kube1:~/vm# helm show values vm/​victoria-metrics-alert > vm-alert-values.yaml kube1:~/vm# helm show values vm/​victoria-metrics-alert > vm-alert-values.yaml
 +
 +$ wget -qO - https://​raw.githubusercontent.com/​samber/​awesome-prometheus-alerts/​master/​dist/​rules/​host-and-hardware/​node-exporter.yml | sed '​s/​^/ ​   /'
 +
 +$ wget -qO - https://​raw.githubusercontent.com/​samber/​awesome-prometheus-alerts/​master/​dist/​rules/​kubernetes/​kubestate-exporter.yml | sed '​s/​^/ ​   /'
  
 kube1:~/vm# cat vm-alert-values.yaml kube1:~/vm# cat vm-alert-values.yaml
Line 111: Line 79:
 #      groups: [] #      groups: []
       groups:       groups:
-      - name: node_exporter_alerts+      - name: NodeExporter
 ...    ...   
-      - name: vm_k8s_alerts+      - name: KubestateExporter
         rules:         rules:
-        - alert: ​CriticalCPU +        - alert: ​KubernetesContainerOomKiller 
-          expr: sum by (kubernetes_io_hostname) (rate (container_cpu_usage_seconds_total[1m])) / sum (machine_cpu_cores) * 100 > 40 +          expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1and ignoring ​(reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="​OOMKilled"​}[10m]) == 1 
-          for: 1m+          for: 0m
           labels:           labels:
-            severity: ​"​critical"​+            severity: ​warning
           annotations:​           annotations:​
-            summary: ​"​CriticalCPU ​{{ $labels.instance }}" +            summary: ​Kubernetes Container oom killer (instance ​{{ $labels.instance }}) 
- +            ​description"​Container {{ $labels.container ​}} in pod {{ $labels.namespace ​}}/{{ $labels.pod }} has been OOMKill"
-        - alertCriticalFS +
-          expr: container_fs_usage_bytes{device=~"​^/​dev/​[sv]d[a-z][1-9]$"/ container_fs_limit_bytes * 100 > 80 +
-          for: 1m +
-          labels: +
-            severity: "​critical"​ +
-          annotations:​ +
-            summary: "​CriticalFS ​{{ $labels.instance ​}}+
- +
-        - alert: CriticalMEM +
-          expr: sum by (kubernetes_io_hostname) (container_memory_working_set_bytes) ​sum (machine_memory_bytes) * 100 > 80 +
-          for: 1m +
-          labels: +
-            severity: "​critical"​ +
-          annotations:​ +
-            summary: "​CriticalMEM ​{{ $labels.instance ​}}" +
 ... ...
 alertmanager:​ alertmanager:​
Line 183: Line 135:
   * [[Команда dd]] для нехватки места на диске   * [[Команда dd]] для нехватки места на диске
  
-==== Журналы ====+===== Журналы ​=====
  
   * [[https://​docs.victoriametrics.com/​helm/​victorialogs-single/​]]   * [[https://​docs.victoriametrics.com/​helm/​victorialogs-single/​]]
Line 204: Line 156:
  
 VMUI-> Log Query: kubernetes.pod_name:​ my-debian VMUI-> Log Query: kubernetes.pod_name:​ my-debian
-</​code><​code>​+</​code>​ 
 + 
 +==== Подключение Grafana ==== 
 + 
 +<​code>​
 Grafana -> Connections -> Grafana -> Connections ->
 Add new connection: VictoriaLogs ->  Add new connection: VictoriaLogs -> 
Line 214: Line 170:
  
 Builder->​ Filter: kubernetes.pod_name = my-debian Builder->​ Filter: kubernetes.pod_name = my-debian
 +</​code>​
 +
 +====== Черновик ======
 +
 +<​code>​
 +        - alert: CriticalCPU
 +          expr: sum by (kubernetes_io_hostname) (rate (container_cpu_usage_seconds_total[1m])) / sum (machine_cpu_cores) * 100 > 40
 +          for: 1m
 +          labels:
 +            severity: "​critical"​
 +          annotations:​
 +            summary: "​CriticalCPU {{ $labels.instance }}"
 +
 +        - alert: CriticalFS
 +          expr: container_fs_usage_bytes{device=~"​^/​dev/​[sv]d[a-z][1-9]$"​} / container_fs_limit_bytes * 100 > 80
 +          for: 1m
 +          labels:
 +            severity: "​critical"​
 +          annotations:​
 +            summary: "​CriticalFS {{ $labels.instance }}"
 +
 +        - alert: CriticalMEM
 +          expr: sum by (kubernetes_io_hostname) (container_memory_working_set_bytes) / sum (machine_memory_bytes) * 100 > 80
 +          for: 1m
 +          labels:
 +            severity: "​critical"​
 +          annotations:​
 +            summary: "​CriticalMEM {{ $labels.instance }}"
 </​code>​ </​code>​
сервис_victoriametrics.1742897252.txt.gz · Last modified: 2025/03/25 13:07 by val