scripts/xymon/xymon.procs.alert.sh

123 lines
11 KiB
Bash
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/sh
# Vars {{{
DEBUG=0
BBCOLORLEVEL="red"
BBHOSTNAME="HOST.DOMAIN.ORG"
BBSVCNAME="procs"
BBALPHAMSG="HOST.DOMAIN.ORG:procs red [729848]
red Fri Aug 10 15:35:26 CEST 2018 - Processes NOT ok
&green systemd-journald (found 1, req. between 1 and 1)
&green systemd-logind (found 1, req. between 1 and 1)
&green CRON (found 1, req. between 1 and 999)
&yellow ATD (found 0, req. between 1 and 999)
&green MTA-Stretch (found 1, req. between 1 and 1)
&green SSHD (found 3, req. between 1 and 20)
&green SSSD (found 1, req. between 1 and 1)
&red Fail2Ban (found 0, req. 1 or more)
&red tftpd-hpa (found 0, req. between 1 and 1)
&yellow ARPwatch (found 0, req. between 1 and 1)
PID PPID USER STARTED S PRI %CPU TIME %MEM RSZ VSZ CMD
1 0 root Aug 02 S 19 0.0 00:00:03 0.5 5492 217412 /sbin/init
43 1 root Aug 02 S 19 0.0 00:00:06 2.8 30068 124220 /lib/systemd/systemd-journald
89 1 root Aug 02 S 19 0.0 00:00:00 0.1 1500 22560 /sbin/dhclient -4 -v -pf /run/dhclient.eth0.pid -lf /var/lib/dhcp/dhclient.eth0.leases -I -df /var/lib/dhcp/dhclient6.eth0.leases eth0
121 1 daemon Aug 02 S 19 0.0 00:00:00 0.1 1636 27968 /usr/sbin/atd -f -l 9.6 -b 98
125 1 root Aug 02 S 19 0.0 00:00:01 0.1 1724 321836 /usr/sbin/rsyslogd -n
126 1 root Aug 02 S 19 0.0 00:00:00 0.1 1980 29668 /usr/sbin/cron -f
137 1 root Aug 02 S 19 0.0 00:00:03 0.3 3328 171944 /usr/sbin/sssd -i -f
284 137 root Aug 02 S 19 0.0 00:00:07 0.7 7768 212360 \\_ /usr/lib/x86_64-linux-gnu/sssd/sssd_be --domain ur1 --uid 0 --gid 0 --debug-to-files
308 137 root Aug 02 S 19 0.0 00:00:09 0.3 4028 168376 \\_ /usr/lib/x86_64-linux-gnu/sssd/sssd_nss --uid 0 --gid 0 --debug-to-files
309 137 root Aug 02 S 19 0.0 00:00:03 0.3 3628 147912 \\_ /usr/lib/x86_64-linux-gnu/sssd/sssd_pam --uid 0 --gid 0 --debug-to-files
310 137 root Aug 02 S 19 0.0 00:00:03 0.2 2200 141356 \\_ /usr/lib/x86_64-linux-gnu/sssd/sssd_autofs --uid 0 --gid 0 --debug-to-files
198 1 root Aug 02 S 19 0.0 00:00:00 0.2 2288 71988 /usr/sbin/sshd -D
594703 198 root 15:34:47 S 19 0.0 00:00:00 0.6 7280 134176 \\_ sshd: USER [priv]
594918 594703 USER 15:34:48 S 19 0.0 00:00:00 0.3 3980 134176 \\_ sshd: USER@pts/2
594919 594918 USER 15:34:48 S 19 0.0 00:00:00 0.6 6316 62044 \\_ zsh
204 1 root Aug 02 S 19 0.0 00:00:00 0.0 892 17076 /usr/sbin/in.tftpd --listen --user tftp --address [::]:69 --secure --port-range 2070:2076 /srv/tftp
331 1 root Aug 02 S 19 0.0 00:00:00 0.1 1468 14316 /sbin/agetty -o -p -- \\u --noclear --keep-baud console 115200,38400,9600 linux
332 1 root Aug 02 S 19 0.0 00:00:00 0.1 1576 14316 /sbin/agetty -o -p -- \\u --noclear --keep-baud tty1 115200,38400,9600 linux
1322 1 root Aug 02 S 19 0.0 00:00:00 0.1 1780 68928 /lib/systemd/systemd --user
1324 1322 root Aug 02 S 19 0.0 00:00:00 0.0 96 111572 \\_ (sd-pam)
1643 1 arpwatch Aug 02 S 19 0.0 00:00:01 0.3 4084 36084 /usr/sbin/arpwatch -u arpwatch -N -p
2105 1 root Aug 02 S 19 0.0 00:00:03 0.3 3484 83252 /usr/lib/postfix/sbin/master -w
2107 2105 postfix Aug 02 S 19 0.0 00:00:00 0.5 5704 95656 \\_ qmgr -l -t unix -u
4516 2105 postfix Aug 02 S 19 0.0 00:00:00 0.6 6396 99844 \\_ tlsmgr -l -t unix -u -c
592131 2105 postfix 14:14:06 S 19 0.0 00:00:00 0.6 6548 95480 \\_ pickup -l -t unix -u -c
594683 2105 postfix 15:32:49 S 19 0.0 00:00:00 0.6 6420 95480 \\_ showq -t unix -u -c
2206 1 message+ Aug 02 S 19 0.0 00:00:00 0.1 1068 67636 /usr/bin/dbus-daemon --system --address=systemd: --nofork --nopidfile --systemd-activation
2207 1 root Aug 02 S 19 0.0 00:00:00 0.3 3808 73508 /lib/systemd/systemd-logind
7896 1 USER Aug 02 S 19 0.0 00:00:00 0.1 1820 68932 /lib/systemd/systemd --user
7898 7896 USER Aug 02 S 19 0.0 00:00:00 0.0 256 132260 \\_ (sd-pam)
163996 1 netdata Aug 03 S 19 0.8 01:30:54 7.2 75652 229332 /usr/sbin/netdata -D
164014 163996 netdata Aug 03 S 19 0.4 00:46:51 0.7 8136 113012 \\_ /usr/bin/python /usr/lib/x86_64-linux-gnu/netdata/plugins.d/python.d.plugin 1
587208 163996 netdata 11:51:34 R 19 0.3 00:00:37 0.2 2632 75012 \\_ /usr/lib/x86_64-linux-gnu/netdata/plugins.d/apps.plugin 1
594189 163996 netdata 15:17:36 S 19 0.0 00:00:01 0.2 2688 9696 \\_ bash /usr/lib/x86_64-linux-gnu/netdata/plugins.d/tc-qos-helper.sh 1
594627 1 xymon 15:30:50 S 19 0.0 00:00:00 0.0 724 4292 sh -c vmstat 300 2 1>/var/lib/xymon/tmp/xymon_vmstat.HOST.DOMAIN.ORG.594578 2>&1; mv /var/lib/xymon/tmp/xymon_vmstat.HOST.DOMAIN.ORG.594578 /var/lib/xymon/tmp/xymon_vmstat.HOST.DOMAIN.ORG
594629 594627 xymon 15:30:50 S 19 0.0 00:00:00 0.1 1376 24900 \\_ vmstat 300 2
595038 1 xymon 15:35:26 S 19 0.0 00:00:00 0.1 1572 4272 /usr/lib/xymon/client/bin/xymonlaunch --config=/etc/xymon/clientlaunch.cfg --log=/var/log/xymon/clientlaunch.log --pidfile=/var/run/xymon/clientlaunch.pid
595043 595038 xymon 15:35:26 S 19 0.0 00:00:00 0.1 1432 4292 \\_ /bin/sh /usr/lib/xymon/client/bin/xymonclient.sh
595065 595043 xymon 15:35:26 S 19 0.0 00:00:00 0.1 1584 4292 | \\_ /bin/sh /usr/lib/xymon/client/bin/xymonclient-linux.sh
595107 595065 xymon 15:35:26 R 19 0.0 00:00:00 0.2 2784 44404 | \\_ ps -Aww f -o pid,ppid,user,start,state,pri,pcpu,time:12,pmem,rsz:10,vsz:10,cmd
595044 595038 xymon 15:35:26 S 19 0.0 00:00:00 0.4 5188 18300 \\_ /usr/bin/perl -w /usr/lib/xymon/client/ext/apt
595081 595044 xymon 15:35:26 R 19 0.0 00:00:00 3.9 40956 70040 \\_ apt-cache policy acl adduser apt apt-listchanges apt-transport-https apt-utils aptitude aptitude-common arpwatch at base-files base-passwd bash bash-completion bind9-host binutils bsd-mailx bsdmainutils bsdutils bzip2 ca-certificates coreutils cpio cpp cpp-6 cracklib-runtime cron curl dash dbus dctrl-tools debconf debconf-i18n debian-archive-keyring debian-faq debian-goodies debianutils debsecan debsums dh-python diffutils dirmngr distro-info-data dmidecode dmsetup doc-debian dpkg e2fslibs:amd64 e2fsprogs ed etckeeper fail2ban file findutils fontconfig-config fonts-dejavu-core fonts-font-awesome fping gcc-6-base:amd64 gettext-base git git-man gnupg gnupg-agent gnutls-bin gpgv grep groff-base gzip hobbit-plugins hostname htop iftop ifupdown init init-system-helpers DOMAINoute2 iputils-ping isc-dhcp-client isc-dhcp-common kmod krb5-locales less libacl1:amd64 libapparmor1:amd64 libapt-inst2.0:amd64 libapt-pkg5.0:amd64 libasprintf0v5:amd64 libassuan0:amd64 libattr1:amd64 libaudit-common libaudit1:amd64 libavahi-client3:amd64 libavahi-common-data:amd64 libavahi-common3:amd64 libbasicobjects0:amd64 libbind9-140:amd64 libblkid1:amd64 libboost-filesystem1.62.0:amd64 libboost-iostreams1.62.0:amd64 libboost-system1.62.0:amd64 libbsd0:amd64 libbz2-1.0:amd64 libc-ares2:amd64 libc-bin libc-l10n libc6:amd64 libcap-ng0:amd64 libcap2-bin libcap2:amd64 libclass-isa-perl libcollection4:amd64 libcomerr2:amd64 libcrack2:amd64 libcryptsetup4:amd64 libcups2:amd64 libcurl3-gnutls:amd64 libcurl3:amd64 libcwidget3v5:amd64 libdb5.3:amd64 libdbus-1-3:amd64 libdebconfclient0:amd64 libdevmapper1.02.1:amd64 libdhash1:amd64 libdns-export162 libdns162:amd64 libdpkg-perl libdrm2:amd64 libedit2:amd64 libelf1:amd64 liberror-perl libestr0 libev4 libevent-2.0-5:amd64 libexpat1:amd64 libfas"
temp_dir=$(mktemp -d -t xymon-procs-alert-XXXXXX.tmp)
debug_stdout="${temp_dir}/debug.stdout"
debug_stderr="${temp_dir}/debug.stderr"
service_list="${temp_dir}/services.error.list"
# }}}
# Create log files
touch "${debug_stdout}" "${debug_stderr}"
# Manage only procs probe {{{
if [ "${BBSVCNAME}" = "procs" ]; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: ${BBHOSTNAME}${BBSVCNAME} error" >> "${debug_stdout}"
else
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: ${BBHOSTNAME}${BBSVCNAME} probe is not managed." >> "${debug_stderr}"
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
exit 0
fi
# }}}
# Get the list of processes with an error
echo "${BBALPHAMSG}" | grep -E "&(red|yellow)" | cut -d" " -f2- | tr '[:upper:]' '[:lower:]' > "${service_list}"
# If any error on a process
if [ -s "${service_list}" ]; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: process list — Some processes seems to be in error." >> "${debug_stdout}"
while IFS= read -r line; do
## Pattern "req. between" {{{
if echo "${line}" | grep -q -E -- ".* \\(found .*, req. between .* and .*\\)" ; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while process loop — Pattern \"req. between\"." >> "${debug_stdout}"
service_name="$(echo "${line}" | cut -d" " -f1)"
process_found="$(echo "${line}" | cut -d" " -f3 | tr -d ',')"
process_min="$(echo "${line}" | cut -d" " -f6)"
process_max="$(echo "${line}" | cut -d" " -f8 | tr -d ')')"
fi
## }}}
## Pattern "req. .* or more" {{{
if echo "${line}" | grep -q -E -- ".* \\(found .*, req. .* or more\\)" ; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while process loop — Pattern \"req. .* or more\"." >> "${debug_stdout}"
service_name="$(echo "${line}" | cut -d" " -f1)"
process_found="$(echo "${line}" | cut -d" " -f3 | tr -d ',')"
process_min="$(echo "${line}" | cut -d" " -f5)"
process_max="nolimit"
fi
## }}}
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while process loop — Found ${process_found} process(es) for ${service_name} service and require between ${process_min} and ${process_max}." >> "${debug_stdout}"
done < "${service_list}"
else
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: process list — No error on any process." >> "${debug_stdout}"
fi
# Remove empty error file
[ -s "${debug_stderr}" ] || rm -f "${debug_stderr}"
# Remove temp_dir if DEBUG is disable
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
exit 0