This is an old revision of the document!
# apt install prometheus # promtool check config /etc/prometheus/prometheus.yml
# apt install prometheus-alertmanager # cat /etc/prometheus/alertmanager.yml
... global: smtp_smarthost: 'localhost:25' smtp_from: 'prometheus@server.corpX.un' smtp_require_tls: false ... # A default receiver receiver: team-X-mails ... receivers: - name: 'team-X-mails' email_configs: - send_resolved: true to: 'student@corpX.un' ...
# cat /etc/prometheus/first_rules.yml
groups: - name: alert.rules rules: - alert: InstanceDown expr: up == 0 for: 1m labels: severity: "critical" annotations: summary: "Endpoint {{ $labels.instance }} down" description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes." - alert: EndpointDown expr: probe_success == 0 for: 1m labels: severity: "critical" annotations: summary: "Endpoint {{ $labels.instance }} down" - alert: CriticalTraffic expr: rate(if_in_octets{instance="router"}[1m])>125000 for: 1m labels: severity: "critical" annotations: summary: "CriticalTraffic {{ $labels.instance }}"
# cat /etc/prometheus/prometheus.yml
... # Alertmanager configuration alerting: alertmanagers: - static_configs: - targets: ['localhost:9093'] # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: - "first_rules.yml" # - "second_rules.yml" ...
# promtool check config /etc/prometheus/prometheus.yml
... Checking /etc/prometheus/first_rules.yml SUCCESS: N rules found ...
$ df ... /dev/mapper/debian--vg-root 15662008 1877488 12969212 13% / ... node_filesystem_free_bytes{device="/dev/mapper/debian--vg-root",fstype="ext4",mountpoint="/"} = (15662008 - 1877488) * 1024
$ cat /sys/class/net/eth1/statistics/rx_bytes node_network_receive_bytes_total{device="eth1"}
# less /etc/prometheus/prometheus.yml ... - job_name: node # If prometheus-node-exporter is installed, grab stats about the local # machine by default. static_configs: - targets: ['localhost:9100']
8*rate(node_network_receive_bytes_total[1m]) 8*rate(node_network_receive_bytes_total{device="eth1"}[1m]) 8*rate(node_network_receive_bytes_total{device="eth1",instance="localhost:9100",job="node"}[1m])
# apt install prometheus-blackbox-exporter # cat /etc/prometheus/blackbox.yml
... http_2xx: prober: http http: preferred_ip_protocol: "ip4" ...
# service prometheus-blackbox-exporter restart # cat /etc/prometheus/prometheus.yml
- job_name: check_ssh metrics_path: /probe params: module: [ssh_banner] static_configs: - targets: - switch1:22 - switch2:22 - switch3:22 relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: 127.0.0.1:9115 - job_name: check_http metrics_path: /probe params: module: [http_2xx] static_configs: - targets: - http://val.bmstu.ru - https://ya.ru relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: 127.0.0.1:9115
probe_success... probe_duration_seconds... probe_http_duration_seconds...
# cat /etc/prometheus/prometheus.yml
... - job_name: check_ping metrics_path: /probe params: module: [icmp] file_sd_configs: - files: # - switchs.yml # - switchs.json relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: localhost:9115
# cat /etc/prometheus/switchs.json
[ { "targets": [ "switch1", "switch2", "switch3" ] } ]
# cat /etc/prometheus/switchs.yml - targets: - switch1 - switch2 - switch3
# apt install prometheus-snmp-exporter # cat /etc/prometheus/snmp.yml
#if_mib: # по умолчанию, позволяет не указывать module в http запросе snmp_in_out_octets: version: 2 auth: community: public walk: - 1.3.6.1.2.1.2.2.1.10 - 1.3.6.1.2.1.2.2.1.16 - 1.3.6.1.2.1.2.2.1.2 metrics: - name: if_in_octets oid: 1.3.6.1.2.1.2.2.1.10 type: counter indexes: - labelname: ifIndex type: Integer lookups: - labels: - ifIndex labelname: ifDescr oid: 1.3.6.1.2.1.2.2.1.2 type: DisplayString - name: if_out_octets oid: 1.3.6.1.2.1.2.2.1.16 type: counter indexes: - labelname: ifIndex type: Integer lookups: - labels: - ifIndex labelname: ifDescr oid: 1.3.6.1.2.1.2.2.1.2 type: DisplayString
# service prometheus-snmp-exporter restart # curl --noproxy 127.0.0.1 'http://127.0.0.1:9116/snmp?target=router&module=snmp_in_out_octets'
# cat /etc/prometheus/prometheus.yml
... - job_name: 'snmp' static_configs: - targets: - router metrics_path: /snmp params: module: [snmp_in_out_octets] relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: server:9116
# promtool check config /etc/prometheus/prometheus.yml # service prometheus restart
rate(if_in_octets{ifDescr="FastEthernet1/1",ifIndex="3",instance="router",job="snmp"}[1m]) 8*rate(if_in_octets{ifDescr="FastEthernet1/1",instance="router"}[1m])
# apt install prometheus-pushgateway # cat /etc/prometheus/prometheus.yml
... - job_name: 'Pushgateway' honor_labels: true static_configs: - targets: ['localhost:9091']
# cat ip_dhcp_binding.sh
#!/bin/sh unset http_proxy DHCP_SERVER=router NET=192.168 COUNT=`rsh ${DHCP_SERVER} show ip dhcp binding | grep ${NET} | wc -l` cat << EOF | curl --data-binary @- http://127.0.0.1:9091/metrics/job/cisco_dhcp/dhcp_server/${DHCP_SERVER}/net/${NET} ip_dhcp_binding ${COUNT} EOF
ip_dhcp_binding{dhcp_server="router",job="cisco_dhcp",net="192.168"}