This is an old revision of the document!
kube1# kubectl logs pods/my-debian -f
kube1:~# kubectl describe nodes kubeN
kube1:~/metrics-server# curl -L https://github.com/kubernetes-sigs/metrics-server/releases/download/v0.7.2/components.yaml | tee metrics-server-components.yaml kube1:~/metrics-server# cat metrics-server-components.yaml
...
containers:
- args:
- --cert-dir=/tmp
- --kubelet-insecure-tls # add this
...
kube1:~/metrics-server# kubectl apply -f metrics-server-components.yaml kube1# kubectl get pods -A | grep metrics-server kube1# kubectl top pod #-n kube-system kube1# kubectl top pod -A --sort-by=mem kube1# kubectl top node
(venv1) server# ansible all -f 4 -m apt -a 'pkg=prometheus-node-exporter state=present update_cache=true' -i /root/kubespray/inventory/mycluster/hosts.yaml kube1# helm repo add vm https://victoriametrics.github.io/helm-charts/ kube1# helm repo update kube1:~/vm# cat guide-vmsingle-values.yaml
...
- job_name: node-exporter
static_configs:
- targets:
- kube1.corpX.un:9100
- kube2.corpX.un:9100
- kube3.corpX.un:9100
- kube4.corpX.un:9100
kube1:~/vm# helm upgrade -i vmsingle vm/victoria-metrics-single -f guide-vmsingle-values.yaml -n vm --create-namespace kube1:~/vm# kubectl scale --replicas 1 statefulset vmsingle-victoria-metrics-single-server cmder> kubectl -n vm port-forward svc/vmsingle-victoria-metrics-single-server 8428
kube1:~/vm# helm show values vm/victoria-metrics-alert > values-vm-alert.yaml kube1:~/vm# cat values-vm-alert.yaml
...
server:
...
datasource:
url: "http://vmsingle-victoria-metrics-single-server:8428"
# url: "http://vmsingle-victoria-metrics-single-server.default.svc.cluster.local:8428"
# url: "http://vmcluster-victoria-metrics-cluster-vmselect.default.svc.cluster.local:8481/select/0/prometheus/"
...
notifier:
alertmanager:
url: "http://alertmanager:9093"
...
config:
alerts:
groups:
- name: alert.rules
rules:
- alert: CriticalCPU
expr: sum by (kubernetes_io_hostname) (rate (container_cpu_usage_seconds_total[1m])) / sum (machine_cpu_cores) * 100 > 40
for: 1m
labels:
severity: "critical"
annotations:
summary: "CriticalCPU {{ $labels.instance }}"
- alert: CriticalFS
expr: container_fs_usage_bytes{device=~"^/dev/[sv]d[a-z][1-9]$"} / container_fs_limit_bytes * 100 > 80
for: 1m
labels:
severity: "critical"
annotations:
summary: "CriticalFS {{ $labels.instance }}"
- alert: CriticalMEM
expr: sum by (kubernetes_io_hostname) (container_memory_working_set_bytes) / sum (machine_memory_bytes) * 100 > 80
for: 1m
labels:
severity: "critical"
annotations:
summary: "CriticalMEM {{ $labels.instance }}"
...
alertmanager:
...
enabled: true
...
config:
global:
smtp_smarthost: 'server.corpX.un:25'
smtp_from: 'alertmanager@corpX.un'
smtp_require_tls: false
route:
group_wait: 30s
group_interval: 5m
repeat_interval: 3h
receiver: team-X-mails
receivers:
- name: 'team-X-mails'
email_configs:
- to: 'student@corpX.un'
send_resolved: true
...
kube1:~/vm# helm upgrade -i vma vm/victoria-metrics-alert -f values-vm-alert.yaml kube1:~/vm# kubectl exec -ti pods/vma-victoria-metrics-alert-server-<TAB> -- sh
/ # cat /config/alert-rules.yaml ...
kube1:~/vm# kubectl exec -ti pods/vma-victoria-metrics-alert-alertmanager-<TAB> -- sh
/alertmanager $ cat /config/alertmanager.yaml ...
kube1:~/vm# helm show values vm/victoria-logs-single > values-vls.yaml kube1:~/vm# cat values-vls.yaml ... vector: ... enabled: true ...
kube1:~/vm# helm upgrade -i vls oci://ghcr.io/victoriametrics/helm-charts/victoria-logs-single -f values-vls.yaml cmder$ kubectl port-forward svc/vls-victoria-logs-single-server 9428 Forwarding from 127.0.0.1:9428 -> 9428 ...
Grafana -> Connections -> Add new connection: VictoriaLogs -> Install -> New Datasources http://vls-victoria-logs-single-server:9428 -> Explore -> LogQL: _time:5m Builder-> Filter: kubernetes.pod_name = my-debian
http://loki.loki-stack.svc.cluster.local:3100
http://loki-prometheus-server.loki-stack.svc.cluster.local:80
kube1:~/loki-stack# helm pull grafana/loki-stack
kube1:~/loki-stack# less loki-stack/charts/loki/values.yaml
persistence:
enabled: false
accessModes:
- ReadWriteOnce
size: 10Gi
kube1:~/loki-stack# cat values.yaml
loki:
persistence:
enabled: true
prometheus:
enabled: true
alertmanager:
config:
global:
smtp_smarthost: 'server.corp13.un:25'
smtp_from: 'alertmanager@corp13.un'
smtp_require_tls: false
templates:
- '/etc/alertmanager/*.tmpl'
route:
group_wait: 30s
group_interval: 5m
repeat_interval: 3h
receiver: team-X-mails
receivers:
- name: 'team-X-mails'
email_configs:
- to: 'student@corp13.un'
send_resolved: true
serverFiles:
alerting_rules.yml:
groups:
kube1:~/loki-stack# helm upgrade --install loki --namespace=loki-stack grafana/loki-stack --create-namespace -f values.yaml
### helm delete loki --namespace=loki-stack