User Tools

Site Tools


сервис_victoriametrics

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Next revision
Previous revision
сервис_victoriametrics [2025/03/25 13:05]
val created
сервис_victoriametrics [2025/03/29 09:32] (current)
val [Уведомления]
Line 1: Line 1:
 ====== Сервис VictoriaMetrics ====== ====== Сервис VictoriaMetrics ======
  
 +===== Метрики =====
 +
 +  * [[https://​docs.victoriametrics.com/​sd_configs/​|vmagent and single-node VictoriaMetrics supports the following Prometheus-compatible service discovery]]
 +
 +  * [[https://​docs.victoriametrics.com/​guides/​k8s-monitoring-via-vm-cluster/​|Kubernetes monitoring with VictoriaMetrics Cluster]]
 +
 +  * [[https://​docs.victoriametrics.com/​guides/​k8s-monitoring-via-vm-single/​|Kubernetes monitoring via VictoriaMetrics Single]]
 +
 +  * [[https://​docs.victoriametrics.com/​scrape_config_examples/​]]
 +
 +  * [[Система Kubernetes#​kube-state-metrics]]
 +<​code>​
 +(venv1) server# ansible all -f 4 -m apt -a '​pkg=prometheus-node-exporter state=present update_cache=true'​ -i /​root/​kubespray/​inventory/​mycluster/​hosts.yaml
 +
 +kube1# helm repo add vm https://​victoriametrics.github.io/​helm-charts/​
 +kube1# helm repo update
 +
 +kube1:~/vm# cat guide-vmsingle-values.yaml
 +</​code><​code>​
 +...
 +        - job_name: kube-state-metrics
 +          kubernetes_sd_configs:​
 +          - role: pod
 +          relabel_configs:​
 +          - source_labels:​ [__meta_kubernetes_pod_container_name]
 +            regex: kube-state-metrics
 +            action: keep
 +          - source_labels:​ [__meta_kubernetes_pod_container_port_number]
 +            regex: "​8080"​
 +            action: keep
 +
 +        - job_name: node-exporter
 +          static_configs:​
 +            - targets:
 +              - kube1.corpX.un:​9100
 +              - kube2.corpX.un:​9100
 +              - kube3.corpX.un:​9100
 +</​code><​code>​
 +kube1:~/vm# helm upgrade -i vmsingle vm/​victoria-metrics-single -f guide-vmsingle-values.yaml -n vm --create-namespace
 +
 +kube1:~/vm# kubectl scale --replicas 1 statefulset vmsingle-victoria-metrics-single-server -n vm
 +
 +cmder> kubectl port-forward svc/​vmsingle-victoria-metrics-single-server 8428 -n vm
 +</​code>​
 +
 +  * Сервис Grafana в [[Сервис Grafana#​Kubernetes]]
 +
 +===== Уведомления =====
 +
 +  * [[Сервис Prometheus#​prometheus-alertmanager]] (ссылки на правила)
 +
 +<​code>​
 +(venv1) server# ansible-playbook /​root/​conf/​ansible/​roles/​mail.yml
 +
 +kube1:~/vm# helm show values vm/​victoria-metrics-alert > vm-alert-values.yaml
 +
 +$ wget -qO - https://​raw.githubusercontent.com/​samber/​awesome-prometheus-alerts/​master/​dist/​rules/​host-and-hardware/​node-exporter.yml | sed '​s/​^/ ​   /'
 +
 +$ wget -qO - https://​raw.githubusercontent.com/​samber/​awesome-prometheus-alerts/​master/​dist/​rules/​kubernetes/​kubestate-exporter.yml | sed '​s/​^/ ​   /'
 +
 +kube1:~/vm# cat vm-alert-values.yaml
 +</​code><​code>​
 +...
 +server:
 +...
 +  datasource:
 +    url: "​http://​vmsingle-victoria-metrics-single-server:​8428"​
 +#    url: "​http://​vmsingle-victoria-metrics-single-server.vm.svc.cluster.local:​8428"​
 +#    url: "​http://​vmcluster-victoria-metrics-cluster-vmselect.vm.svc.cluster.local:​8481/​select/​0/​prometheus/"​
 +...
 +  notifier:
 +    alertmanager:​
 +      url: "​http://​alertmanager:​9093"​
 +...
 +  config:
 +    alerts:
 +#      groups: []
 +      groups:
 +      - name: NodeExporter
 +...   
 +      - name: KubestateExporter
 +        rules:
 +        - alert: KubernetesContainerOomKiller
 +          expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="​OOMKilled"​}[10m]) == 1
 +          for: 0m
 +          labels:
 +            severity: warning
 +          annotations:​
 +            summary: Kubernetes Container oom killer (instance {{ $labels.instance }})
 +            description:​ "​Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKill"​
 +...
 +alertmanager:​
 +...
 +  enabled: true
 +...
 +  config:
 +    global:
 +      smtp_smarthost:​ '​server.corpX.un:​25'​
 +      smtp_from: '​alertmanager@corpX.un'​
 +      smtp_require_tls:​ false
 +
 +    route:
 +      group_wait: 30s
 +      group_interval:​ 5m
 +      repeat_interval:​ 3h
 +      receiver: team-mails
 +
 +    receivers:
 +    - name: '​team-mails'​
 +      email_configs:​
 +      - to: '​student@corpX.un'​
 +        send_resolved:​ true
 +...
 +</​code><​code>​
 +kube1:~/vm# helm upgrade -i vma vm/​victoria-metrics-alert -f vm-alert-values.yaml -n vm
 +
 +kube1:~/vm# kubectl -n vm exec -ti pods/​vma-victoria-metrics-alert-server-<​TAB>​ -- sh
 +</​code><​code>​
 +/ # cat /​config/​alert-rules.yaml
 +...
 +</​code><​code>​
 +cmder> kubectl -n vm port-forward svc/​vma-victoria-metrics-alert-server 8880
 +
 +kube1:~/vm# kubectl -n vm exec -ti pods/​vma-victoria-metrics-alert-alertmanager-<​TAB>​ -- sh
 +</​code><​code>​
 +/​alertmanager $ cat /​config/​alertmanager.yaml
 +...
 +</​code><​code>​
 +cmder> kubectl -n vm port-forward svc/​vma-victoria-metrics-alert-alertmanager 9093
 +</​code>​
 +
 +  * Используем [[Система Kubernetes#​Базовые объекты k8s]] для стресс тестирования из [[Основы администрирования систем Linux#​Модуль 11. Анализ производительности и оптимизация системы]]
 +  * [[Команда dd]] для нехватки места на диске
 +
 +===== Журналы =====
 +
 +  * [[https://​docs.victoriametrics.com/​helm/​victorialogs-single/​]]
 +  * [[https://​docs.victoriametrics.com/​victorialogs/​logsql-examples/​]] ​
 +
 +<​code>​
 +kube1:~/vm# helm show values vm/​victoria-logs-single > vls-values.yaml
 +
 +kube1:~/vm# cat vls-values.yaml
 +</​code><​code>​
 +...
 +vector:
 +...
 +  enabled: true
 +...
 +</​code><​code>​
 +kube1:~/vm# helm upgrade -i vls oci://​ghcr.io/​victoriametrics/​helm-charts/​victoria-logs-single -f vls-values.yaml -n vm
 +
 +cmder$ kubectl port-forward svc/​vls-victoria-logs-single-server 9428 -n vm
 +
 +VMUI-> Log Query: kubernetes.pod_name:​ my-debian
 +</​code>​
 +
 +==== Подключение Grafana ====
 +
 +<​code>​
 +Grafana -> Connections ->
 +Add new connection: VictoriaLogs -> 
 +
 +Install -> New Datasources
 +http://​vls-victoria-logs-single-server:​9428 -> 
 +
 +Explore -> LogQL: _time:5m
 +
 +Builder->​ Filter: kubernetes.pod_name = my-debian
 +</​code>​
 +
 +====== Черновик ======
 +
 +<​code>​
 +        - alert: CriticalCPU
 +          expr: sum by (kubernetes_io_hostname) (rate (container_cpu_usage_seconds_total[1m])) / sum (machine_cpu_cores) * 100 > 40
 +          for: 1m
 +          labels:
 +            severity: "​critical"​
 +          annotations:​
 +            summary: "​CriticalCPU {{ $labels.instance }}"
 +
 +        - alert: CriticalFS
 +          expr: container_fs_usage_bytes{device=~"​^/​dev/​[sv]d[a-z][1-9]$"​} / container_fs_limit_bytes * 100 > 80
 +          for: 1m
 +          labels:
 +            severity: "​critical"​
 +          annotations:​
 +            summary: "​CriticalFS {{ $labels.instance }}"
 +
 +        - alert: CriticalMEM
 +          expr: sum by (kubernetes_io_hostname) (container_memory_working_set_bytes) / sum (machine_memory_bytes) * 100 > 80
 +          for: 1m
 +          labels:
 +            severity: "​critical"​
 +          annotations:​
 +            summary: "​CriticalMEM {{ $labels.instance }}"
 +</​code>​
сервис_victoriametrics.1742897146.txt.gz · Last modified: 2025/03/25 13:05 by val