# apt install prometheus # less /etc/prometheus/prometheus.yml
... global: scrape_interval: 15s ...
# promtool check config /etc/prometheus/prometheus.yml # service prometheus restart
!!! Ссылки содержат DNS имена
# apt install prometheus-alertmanager # cat /etc/prometheus/alertmanager.yml
... global: smtp_smarthost: 'localhost:25' smtp_from: 'prometheus@server.corpX.un' smtp_require_tls: false # smtp_auth_username: 'alertmanager' # smtp_auth_password: 'password' ... # A default receiver receiver: team-X-mails ... receivers: - name: 'team-X-mails' email_configs: - to: 'student@corpX.un' send_resolved: true ...
# service prometheus-alertmanager restart # cat /etc/prometheus/first_rules.yml
groups: - name: alert.rules rules: - alert: InstanceDown expr: up == 0 for: 1m labels: severity: "critical" annotations: summary: "Endpoint {{ $labels.instance }} down" description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes." - alert: EndpointDown expr: probe_success == 0 for: 1m labels: severity: "critical" annotations: summary: "Endpoint {{ $labels.instance }} down" - alert: CriticalTraffic expr: rate(ifInOctets{instance="router"}[1m])>125000 for: 1m labels: severity: "critical" annotations: summary: "CriticalTraffic {{ $labels.instance }}"
# cat /etc/prometheus/prometheus.yml
... # Alertmanager configuration alerting: alertmanagers: - static_configs: - targets: ['localhost:9093'] # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: - "first_rules.yml" # - "second_rules.yml" ...
... Checking /etc/prometheus/first_rules.yml SUCCESS: N rules found ...
$ df / ... /dev/mapper/debian--vg-root 15662008 1877488 12969212 13% / ... # TYPE node_filesystem_free_bytes gauge node_filesystem_free_bytes{device="/dev/mapper/debian--vg-root",fstype="ext4",mountpoint="/"} = (15662008 - 1877488) * 1024
$ cat /sys/class/net/eth0/statistics/rx_bytes или $ cat /sys/class/net/bond0/statistics/rx_bytes # TYPE node_network_receive_bytes_total counter node_network_receive_bytes_total{device="bond0"}
# less /etc/prometheus/prometheus.yml ... - job_name: node # If prometheus-node-exporter is installed, grab stats about the local # machine by default. static_configs: - targets: ['localhost:9100']
8*rate(node_network_receive_bytes_total[1m]) 8*rate(node_network_receive_bytes_total{device="bond0"}[1m]) 8*rate(node_network_receive_bytes_total{device="eth0",instance="localhost:9100",job="node"}[1m])
# apt install prometheus-blackbox-exporter
# cat /etc/prometheus/blackbox.yml
... http_2xx: prober: http http: preferred_ip_protocol: "ip4" ...
# service prometheus-blackbox-exporter restart # cat /etc/prometheus/prometheus.yml
... - job_name: check_http metrics_path: /probe params: module: [http_2xx] static_configs: - targets: - https://val.bmstu.ru - https://ya.ru relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: localhost:9115 - job_name: check_ssh metrics_path: /probe params: module: [ssh_banner] static_configs: - targets: - switch1:22 - switch2:22 - switch3:22 relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: localhost:9115
probe_success... probe_duration_seconds... probe_http_duration_seconds...
# cat /etc/prometheus/prometheus.yml
... - job_name: check_ping metrics_path: /probe params: module: [icmp] file_sd_configs: - files: # - switchs.yml # - switchs.json relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: localhost:9115
# cat /etc/prometheus/switchs.json
[ { "targets": [ "switch1", "switch2", "switch3" ] } ]
# cat /etc/prometheus/switchs.yml
- targets: - switch1 - switch2 - switch3
# apt install prometheus-snmp-exporter
# cat /etc/prometheus/snmp.yml
auths: public_v2: community: public version: 2 modules: if_mib: walk: - 1.3.6.1.2.1.2.2.1.10 - 1.3.6.1.2.1.2.2.1.16 - 1.3.6.1.2.1.2.2.1.2 metrics: - name: ifInOctets oid: 1.3.6.1.2.1.2.2.1.10 type: counter indexes: - labelname: ifIndex type: Integer lookups: - labels: - ifIndex labelname: ifDescr oid: 1.3.6.1.2.1.2.2.1.2 type: DisplayString - name: ifOutOctets oid: 1.3.6.1.2.1.2.2.1.16 type: counter indexes: - labelname: ifIndex type: Integer lookups: - labels: - ifIndex labelname: ifDescr oid: 1.3.6.1.2.1.2.2.1.2 type: DisplayString
# cp /usr/share/doc/prometheus-snmp-exporter/examples/generator.yml . может понадобиться удалить все modules, кроме if_mib # prometheus-snmp-generator generate # cp snmp.yml /etc/prometheus/snmp.yml
# service prometheus-snmp-exporter restart
# curl 'http://127.0.0.1:9116/snmp?target=router'
# cat /etc/prometheus/prometheus.yml
... - job_name: 'snmp' static_configs: - targets: - router metrics_path: /snmp relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: localhost:9116
rate(ifInOctets{ifDescr="FastEthernet0/0",ifIndex="1",instance="router",job="snmp"}[1m]) или rate(ifOutOctets{ifIndex="5",instance="router",job="snmp"}[1m]) 8*rate(ifInOctets{ifDescr="FastEthernet0/0",instance="router"}[1m]) или 8*rate(ifOutOctets{ifDescr="Port-channel1",instance="router"}[1m])
# apt install prometheus-pushgateway # cat /etc/prometheus/prometheus.yml
... - job_name: 'pushgateway' honor_labels: true static_configs: - targets: ['localhost:9091']
# cat ip_dhcp_binding.sh
#!/bin/sh unset http_proxy DHCP_SERVER=router NET=192.168 COUNT=`rsh ${DHCP_SERVER} show ip dhcp binding | grep ${NET} | wc -l` cat << EOF | curl --data-binary @- http://127.0.0.1:9091/metrics/job/cisco_dhcp/dhcp_server/${DHCP_SERVER}/net/${NET} ip_dhcp_binding ${COUNT} EOF
ip_dhcp_binding{dhcp_server="router",job="cisco_dhcp",net="192.168"}
# crontab -l
* * * * * /root/ip_dhcp_binding.sh