From c1613be214aaf431ffc77ebf36fbd471103e6b46 Mon Sep 17 00:00:00 2001 From: enggnr <129082112+enggnr@users.noreply.github.com> Date: Fri, 26 May 2023 20:16:51 +0530 Subject: [PATCH] Netdata Alert configs --- home/dot_config/netdata/health.d/cpu.conf | 32 +++++++++++++++++++ home/dot_config/netdata/health.d/disks.conf | 7 ++++ home/dot_config/netdata/health.d/network.conf | 6 ++++ home/dot_config/netdata/health.d/ram.conf | 8 +++++ 4 files changed, 53 insertions(+) create mode 100644 home/dot_config/netdata/health.d/cpu.conf create mode 100644 home/dot_config/netdata/health.d/disks.conf create mode 100644 home/dot_config/netdata/health.d/network.conf create mode 100644 home/dot_config/netdata/health.d/ram.conf diff --git a/home/dot_config/netdata/health.d/cpu.conf b/home/dot_config/netdata/health.d/cpu.conf new file mode 100644 index 00000000..af46be20 --- /dev/null +++ b/home/dot_config/netdata/health.d/cpu.conf @@ -0,0 +1,32 @@ +alarm: cpu_usage +on: system.cpu +lookup: average -1m percentage foreach user,system +units: % +every: 5m +warn: $this > 50 +crit: $this > 70 +info: CPU utilization of users or the system + +template: ml_5min_cpu_dims +on: system.cpu +os: linux +hosts: * +lookup: average -5m anomaly-bit foreach * +calc: $this +units: % +every: 30s +warn: $this > (($status >= $WARNING) ? (5) : (20)) +crit: $this > (($status == $CRITICAL) ? (20) : (100)) +info: Rolling 5min anomaly rate for each system.cpu dimension + +template: ml_5min_cpu_chart +on: system.cpu +os: linux +hosts: * +lookup: average -5m anomaly-bit of * +calc: $this +units: % +every: 30s +warn: $this > (($status >= $WARNING) ? (5) : (20)) +crit: $this > (($status == $CRITICAL) ? (20) : (100)) +info: Rolling 5min anomaly rate for system.cpu chart diff --git a/home/dot_config/netdata/health.d/disks.conf b/home/dot_config/netdata/health.d/disks.conf new file mode 100644 index 00000000..12a3c805 --- /dev/null +++ b/home/dot_config/netdata/health.d/disks.conf @@ -0,0 +1,7 @@ +template: disk_full_percent +on: disk.space +calc: $used * 100 / ($avail + $used) +every: 60m +warn: $this > 70 +crit: $this > 85 +info: Disk usage on the system diff --git a/home/dot_config/netdata/health.d/network.conf b/home/dot_config/netdata/health.d/network.conf new file mode 100644 index 00000000..ca64ffe2 --- /dev/null +++ b/home/dot_config/netdata/health.d/network.conf @@ -0,0 +1,6 @@ +template: 30min_packet_drops +on: net.drops +lookup: sum -30m unaligned absolute +every: 10s +crit: $this > 0 +info: Dropper network packets diff --git a/home/dot_config/netdata/health.d/ram.conf b/home/dot_config/netdata/health.d/ram.conf new file mode 100644 index 00000000..7721472b --- /dev/null +++ b/home/dot_config/netdata/health.d/ram.conf @@ -0,0 +1,8 @@ +alarm: ram_usage +on: system.ram +lookup: average -1m percentage foreach user,system +units: % +every: 5m +warn: $this > 50 +crit: $this > 90 +info: RAM utilization of users or the system