This shows you the differences between two versions of the page.
| Both sides previous revision Previous revision Next revision | Previous revision | ||
|
сервис_victoriametrics [2025/03/25 13:35] val [Метрики] |
сервис_victoriametrics [2025/03/29 09:32] (current) val [Уведомления] |
||
|---|---|---|---|
| Line 4: | Line 4: | ||
| * [[https://docs.victoriametrics.com/sd_configs/|vmagent and single-node VictoriaMetrics supports the following Prometheus-compatible service discovery]] | * [[https://docs.victoriametrics.com/sd_configs/|vmagent and single-node VictoriaMetrics supports the following Prometheus-compatible service discovery]] | ||
| + | |||
| + | * [[https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/|Kubernetes monitoring with VictoriaMetrics Cluster]] | ||
| * [[https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-single/|Kubernetes monitoring via VictoriaMetrics Single]] | * [[https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-single/|Kubernetes monitoring via VictoriaMetrics Single]] | ||
| + | * [[https://docs.victoriametrics.com/scrape_config_examples/]] | ||
| + | * [[Система Kubernetes#kube-state-metrics]] | ||
| <code> | <code> | ||
| (venv1) server# ansible all -f 4 -m apt -a 'pkg=prometheus-node-exporter state=present update_cache=true' -i /root/kubespray/inventory/mycluster/hosts.yaml | (venv1) server# ansible all -f 4 -m apt -a 'pkg=prometheus-node-exporter state=present update_cache=true' -i /root/kubespray/inventory/mycluster/hosts.yaml | ||
| Line 17: | Line 21: | ||
| </code><code> | </code><code> | ||
| ... | ... | ||
| + | - job_name: kube-state-metrics | ||
| + | kubernetes_sd_configs: | ||
| + | - role: pod | ||
| + | relabel_configs: | ||
| + | - source_labels: [__meta_kubernetes_pod_container_name] | ||
| + | regex: kube-state-metrics | ||
| + | action: keep | ||
| + | - source_labels: [__meta_kubernetes_pod_container_port_number] | ||
| + | regex: "8080" | ||
| + | action: keep | ||
| + | |||
| - job_name: node-exporter | - job_name: node-exporter | ||
| static_configs: | static_configs: | ||
| Line 28: | Line 43: | ||
| kube1:~/vm# kubectl scale --replicas 1 statefulset vmsingle-victoria-metrics-single-server -n vm | kube1:~/vm# kubectl scale --replicas 1 statefulset vmsingle-victoria-metrics-single-server -n vm | ||
| - | cmder> kubectl -n vm port-forward svc/vmsingle-victoria-metrics-single-server 8428 -n vm | + | cmder> kubectl port-forward svc/vmsingle-victoria-metrics-single-server 8428 -n vm |
| + | </code> | ||
| - | kube1# helm repo add grafana https://grafana.github.io/helm-charts | + | * Сервис Grafana в [[Сервис Grafana#Kubernetes]] |
| - | kube1# helm repo update | + | |
| - | kube1:~/vm# cat my-grafana-values.yaml | ||
| - | </code><code> | ||
| - | datasources: | ||
| - | datasources.yaml: | ||
| - | apiVersion: 1 | ||
| - | datasources: | ||
| - | - name: victoriametrics | ||
| - | type: prometheus | ||
| - | orgId: 1 | ||
| - | url: http://vmsingle-victoria-metrics-single-server:8428 | ||
| - | access: proxy | ||
| - | isDefault: true | ||
| - | updateIntervalSeconds: 10 | ||
| - | editable: true | ||
| - | |||
| - | dashboardProviders: | ||
| - | dashboardproviders.yaml: | ||
| - | apiVersion: 1 | ||
| - | providers: | ||
| - | - name: 'default' | ||
| - | orgId: 1 | ||
| - | folder: '' | ||
| - | type: file | ||
| - | disableDeletion: true | ||
| - | editable: true | ||
| - | options: | ||
| - | path: /var/lib/grafana/dashboards/default | ||
| - | |||
| - | dashboards: | ||
| - | default: | ||
| - | victoriametrics: | ||
| - | gnetId: 10229 | ||
| - | revision: 22 | ||
| - | datasource: victoriametrics | ||
| - | kubernetes: | ||
| - | gnetId: 14205 | ||
| - | revision: 1 | ||
| - | datasource: victoriametrics | ||
| - | node-exporter: | ||
| - | gnetId: 1860 | ||
| - | revision: 37 | ||
| - | datasource: victoriametrics | ||
| - | </code><code> | ||
| - | kube1:~/vm# helm upgrade -i my-grafana grafana/grafana -f my-grafana-values.yaml -n vm --create-namespace | ||
| - | |||
| - | kube1# kubectl get secret --namespace vm my-grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo | ||
| - | |||
| - | cmder> kubectl -n vm port-forward svc/vmsingle-victoria-metrics-single-server 3000:80 | ||
| - | </code> | ||
| - | |||
| - | * [[https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/|Kubernetes monitoring with VictoriaMetrics Cluster]] | ||
| ===== Уведомления ===== | ===== Уведомления ===== | ||
| - | * [[Сервис Prometheus#prometheus-alertmanager]] | + | * [[Сервис Prometheus#prometheus-alertmanager]] (ссылки на правила) |
| <code> | <code> | ||
| Line 92: | Line 56: | ||
| kube1:~/vm# helm show values vm/victoria-metrics-alert > vm-alert-values.yaml | kube1:~/vm# helm show values vm/victoria-metrics-alert > vm-alert-values.yaml | ||
| + | |||
| + | $ wget -qO - https://raw.githubusercontent.com/samber/awesome-prometheus-alerts/master/dist/rules/host-and-hardware/node-exporter.yml | sed 's/^/ /' | ||
| + | |||
| + | $ wget -qO - https://raw.githubusercontent.com/samber/awesome-prometheus-alerts/master/dist/rules/kubernetes/kubestate-exporter.yml | sed 's/^/ /' | ||
| kube1:~/vm# cat vm-alert-values.yaml | kube1:~/vm# cat vm-alert-values.yaml | ||
| Line 111: | Line 79: | ||
| # groups: [] | # groups: [] | ||
| groups: | groups: | ||
| - | - name: node_exporter_alerts | + | - name: NodeExporter |
| ... | ... | ||
| - | - name: vm_k8s_alerts | + | - name: KubestateExporter |
| rules: | rules: | ||
| - | - alert: CriticalCPU | + | - alert: KubernetesContainerOomKiller |
| - | expr: sum by (kubernetes_io_hostname) (rate (container_cpu_usage_seconds_total[1m])) / sum (machine_cpu_cores) * 100 > 40 | + | expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) == 1 |
| - | for: 1m | + | for: 0m |
| labels: | labels: | ||
| - | severity: "critical" | + | severity: warning |
| annotations: | annotations: | ||
| - | summary: "CriticalCPU {{ $labels.instance }}" | + | summary: Kubernetes Container oom killer (instance {{ $labels.instance }}) |
| - | + | description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKill" | |
| - | - alert: CriticalFS | + | |
| - | expr: container_fs_usage_bytes{device=~"^/dev/[sv]d[a-z][1-9]$"} / container_fs_limit_bytes * 100 > 80 | + | |
| - | for: 1m | + | |
| - | labels: | + | |
| - | severity: "critical" | + | |
| - | annotations: | + | |
| - | summary: "CriticalFS {{ $labels.instance }}" | + | |
| - | + | ||
| - | - alert: CriticalMEM | + | |
| - | expr: sum by (kubernetes_io_hostname) (container_memory_working_set_bytes) / sum (machine_memory_bytes) * 100 > 80 | + | |
| - | for: 1m | + | |
| - | labels: | + | |
| - | severity: "critical" | + | |
| - | annotations: | + | |
| - | summary: "CriticalMEM {{ $labels.instance }}" | + | |
| ... | ... | ||
| alertmanager: | alertmanager: | ||
| Line 204: | Line 156: | ||
| VMUI-> Log Query: kubernetes.pod_name: my-debian | VMUI-> Log Query: kubernetes.pod_name: my-debian | ||
| - | </code><code> | + | </code> |
| + | |||
| + | ==== Подключение Grafana ==== | ||
| + | |||
| + | <code> | ||
| Grafana -> Connections -> | Grafana -> Connections -> | ||
| Add new connection: VictoriaLogs -> | Add new connection: VictoriaLogs -> | ||
| Line 214: | Line 170: | ||
| Builder-> Filter: kubernetes.pod_name = my-debian | Builder-> Filter: kubernetes.pod_name = my-debian | ||
| + | </code> | ||
| + | |||
| + | ====== Черновик ====== | ||
| + | |||
| + | <code> | ||
| + | - alert: CriticalCPU | ||
| + | expr: sum by (kubernetes_io_hostname) (rate (container_cpu_usage_seconds_total[1m])) / sum (machine_cpu_cores) * 100 > 40 | ||
| + | for: 1m | ||
| + | labels: | ||
| + | severity: "critical" | ||
| + | annotations: | ||
| + | summary: "CriticalCPU {{ $labels.instance }}" | ||
| + | |||
| + | - alert: CriticalFS | ||
| + | expr: container_fs_usage_bytes{device=~"^/dev/[sv]d[a-z][1-9]$"} / container_fs_limit_bytes * 100 > 80 | ||
| + | for: 1m | ||
| + | labels: | ||
| + | severity: "critical" | ||
| + | annotations: | ||
| + | summary: "CriticalFS {{ $labels.instance }}" | ||
| + | |||
| + | - alert: CriticalMEM | ||
| + | expr: sum by (kubernetes_io_hostname) (container_memory_working_set_bytes) / sum (machine_memory_bytes) * 100 > 80 | ||
| + | for: 1m | ||
| + | labels: | ||
| + | severity: "critical" | ||
| + | annotations: | ||
| + | summary: "CriticalMEM {{ $labels.instance }}" | ||
| </code> | </code> | ||