This shows you the differences between two versions of the page.
| Next revision | Previous revision | ||
|
сервис_victoriametrics [2025/03/25 13:05] val created |
сервис_victoriametrics [2025/03/29 09:32] (current) val [Уведомления] |
||
|---|---|---|---|
| Line 1: | Line 1: | ||
| ====== Сервис VictoriaMetrics ====== | ====== Сервис VictoriaMetrics ====== | ||
| + | ===== Метрики ===== | ||
| + | |||
| + | * [[https://docs.victoriametrics.com/sd_configs/|vmagent and single-node VictoriaMetrics supports the following Prometheus-compatible service discovery]] | ||
| + | |||
| + | * [[https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/|Kubernetes monitoring with VictoriaMetrics Cluster]] | ||
| + | |||
| + | * [[https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-single/|Kubernetes monitoring via VictoriaMetrics Single]] | ||
| + | |||
| + | * [[https://docs.victoriametrics.com/scrape_config_examples/]] | ||
| + | |||
| + | * [[Система Kubernetes#kube-state-metrics]] | ||
| + | <code> | ||
| + | (venv1) server# ansible all -f 4 -m apt -a 'pkg=prometheus-node-exporter state=present update_cache=true' -i /root/kubespray/inventory/mycluster/hosts.yaml | ||
| + | |||
| + | kube1# helm repo add vm https://victoriametrics.github.io/helm-charts/ | ||
| + | kube1# helm repo update | ||
| + | |||
| + | kube1:~/vm# cat guide-vmsingle-values.yaml | ||
| + | </code><code> | ||
| + | ... | ||
| + | - job_name: kube-state-metrics | ||
| + | kubernetes_sd_configs: | ||
| + | - role: pod | ||
| + | relabel_configs: | ||
| + | - source_labels: [__meta_kubernetes_pod_container_name] | ||
| + | regex: kube-state-metrics | ||
| + | action: keep | ||
| + | - source_labels: [__meta_kubernetes_pod_container_port_number] | ||
| + | regex: "8080" | ||
| + | action: keep | ||
| + | |||
| + | - job_name: node-exporter | ||
| + | static_configs: | ||
| + | - targets: | ||
| + | - kube1.corpX.un:9100 | ||
| + | - kube2.corpX.un:9100 | ||
| + | - kube3.corpX.un:9100 | ||
| + | </code><code> | ||
| + | kube1:~/vm# helm upgrade -i vmsingle vm/victoria-metrics-single -f guide-vmsingle-values.yaml -n vm --create-namespace | ||
| + | |||
| + | kube1:~/vm# kubectl scale --replicas 1 statefulset vmsingle-victoria-metrics-single-server -n vm | ||
| + | |||
| + | cmder> kubectl port-forward svc/vmsingle-victoria-metrics-single-server 8428 -n vm | ||
| + | </code> | ||
| + | |||
| + | * Сервис Grafana в [[Сервис Grafana#Kubernetes]] | ||
| + | |||
| + | ===== Уведомления ===== | ||
| + | |||
| + | * [[Сервис Prometheus#prometheus-alertmanager]] (ссылки на правила) | ||
| + | |||
| + | <code> | ||
| + | (venv1) server# ansible-playbook /root/conf/ansible/roles/mail.yml | ||
| + | |||
| + | kube1:~/vm# helm show values vm/victoria-metrics-alert > vm-alert-values.yaml | ||
| + | |||
| + | $ wget -qO - https://raw.githubusercontent.com/samber/awesome-prometheus-alerts/master/dist/rules/host-and-hardware/node-exporter.yml | sed 's/^/ /' | ||
| + | |||
| + | $ wget -qO - https://raw.githubusercontent.com/samber/awesome-prometheus-alerts/master/dist/rules/kubernetes/kubestate-exporter.yml | sed 's/^/ /' | ||
| + | |||
| + | kube1:~/vm# cat vm-alert-values.yaml | ||
| + | </code><code> | ||
| + | ... | ||
| + | server: | ||
| + | ... | ||
| + | datasource: | ||
| + | url: "http://vmsingle-victoria-metrics-single-server:8428" | ||
| + | # url: "http://vmsingle-victoria-metrics-single-server.vm.svc.cluster.local:8428" | ||
| + | # url: "http://vmcluster-victoria-metrics-cluster-vmselect.vm.svc.cluster.local:8481/select/0/prometheus/" | ||
| + | ... | ||
| + | notifier: | ||
| + | alertmanager: | ||
| + | url: "http://alertmanager:9093" | ||
| + | ... | ||
| + | config: | ||
| + | alerts: | ||
| + | # groups: [] | ||
| + | groups: | ||
| + | - name: NodeExporter | ||
| + | ... | ||
| + | - name: KubestateExporter | ||
| + | rules: | ||
| + | - alert: KubernetesContainerOomKiller | ||
| + | expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1 | ||
| + | for: 0m | ||
| + | labels: | ||
| + | severity: warning | ||
| + | annotations: | ||
| + | summary: Kubernetes Container oom killer (instance {{ $labels.instance }}) | ||
| + | description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKill" | ||
| + | ... | ||
| + | alertmanager: | ||
| + | ... | ||
| + | enabled: true | ||
| + | ... | ||
| + | config: | ||
| + | global: | ||
| + | smtp_smarthost: 'server.corpX.un:25' | ||
| + | smtp_from: 'alertmanager@corpX.un' | ||
| + | smtp_require_tls: false | ||
| + | |||
| + | route: | ||
| + | group_wait: 30s | ||
| + | group_interval: 5m | ||
| + | repeat_interval: 3h | ||
| + | receiver: team-mails | ||
| + | |||
| + | receivers: | ||
| + | - name: 'team-mails' | ||
| + | email_configs: | ||
| + | - to: 'student@corpX.un' | ||
| + | send_resolved: true | ||
| + | ... | ||
| + | </code><code> | ||
| + | kube1:~/vm# helm upgrade -i vma vm/victoria-metrics-alert -f vm-alert-values.yaml -n vm | ||
| + | |||
| + | kube1:~/vm# kubectl -n vm exec -ti pods/vma-victoria-metrics-alert-server-<TAB> -- sh | ||
| + | </code><code> | ||
| + | / # cat /config/alert-rules.yaml | ||
| + | ... | ||
| + | </code><code> | ||
| + | cmder> kubectl -n vm port-forward svc/vma-victoria-metrics-alert-server 8880 | ||
| + | |||
| + | kube1:~/vm# kubectl -n vm exec -ti pods/vma-victoria-metrics-alert-alertmanager-<TAB> -- sh | ||
| + | </code><code> | ||
| + | /alertmanager $ cat /config/alertmanager.yaml | ||
| + | ... | ||
| + | </code><code> | ||
| + | cmder> kubectl -n vm port-forward svc/vma-victoria-metrics-alert-alertmanager 9093 | ||
| + | </code> | ||
| + | |||
| + | * Используем [[Система Kubernetes#Базовые объекты k8s]] для стресс тестирования из [[Основы администрирования систем Linux#Модуль 11. Анализ производительности и оптимизация системы]] | ||
| + | * [[Команда dd]] для нехватки места на диске | ||
| + | |||
| + | ===== Журналы ===== | ||
| + | |||
| + | * [[https://docs.victoriametrics.com/helm/victorialogs-single/]] | ||
| + | * [[https://docs.victoriametrics.com/victorialogs/logsql-examples/]] | ||
| + | |||
| + | <code> | ||
| + | kube1:~/vm# helm show values vm/victoria-logs-single > vls-values.yaml | ||
| + | |||
| + | kube1:~/vm# cat vls-values.yaml | ||
| + | </code><code> | ||
| + | ... | ||
| + | vector: | ||
| + | ... | ||
| + | enabled: true | ||
| + | ... | ||
| + | </code><code> | ||
| + | kube1:~/vm# helm upgrade -i vls oci://ghcr.io/victoriametrics/helm-charts/victoria-logs-single -f vls-values.yaml -n vm | ||
| + | |||
| + | cmder$ kubectl port-forward svc/vls-victoria-logs-single-server 9428 -n vm | ||
| + | |||
| + | VMUI-> Log Query: kubernetes.pod_name: my-debian | ||
| + | </code> | ||
| + | |||
| + | ==== Подключение Grafana ==== | ||
| + | |||
| + | <code> | ||
| + | Grafana -> Connections -> | ||
| + | Add new connection: VictoriaLogs -> | ||
| + | |||
| + | Install -> New Datasources | ||
| + | http://vls-victoria-logs-single-server:9428 -> | ||
| + | |||
| + | Explore -> LogQL: _time:5m | ||
| + | |||
| + | Builder-> Filter: kubernetes.pod_name = my-debian | ||
| + | </code> | ||
| + | |||
| + | ====== Черновик ====== | ||
| + | |||
| + | <code> | ||
| + | - alert: CriticalCPU | ||
| + | expr: sum by (kubernetes_io_hostname) (rate (container_cpu_usage_seconds_total[1m])) / sum (machine_cpu_cores) * 100 > 40 | ||
| + | for: 1m | ||
| + | labels: | ||
| + | severity: "critical" | ||
| + | annotations: | ||
| + | summary: "CriticalCPU {{ $labels.instance }}" | ||
| + | |||
| + | - alert: CriticalFS | ||
| + | expr: container_fs_usage_bytes{device=~"^/dev/[sv]d[a-z][1-9]$"} / container_fs_limit_bytes * 100 > 80 | ||
| + | for: 1m | ||
| + | labels: | ||
| + | severity: "critical" | ||
| + | annotations: | ||
| + | summary: "CriticalFS {{ $labels.instance }}" | ||
| + | |||
| + | - alert: CriticalMEM | ||
| + | expr: sum by (kubernetes_io_hostname) (container_memory_working_set_bytes) / sum (machine_memory_bytes) * 100 > 80 | ||
| + | for: 1m | ||
| + | labels: | ||
| + | severity: "critical" | ||
| + | annotations: | ||
| + | summary: "CriticalMEM {{ $labels.instance }}" | ||
| + | </code> | ||