Restart services in error and xymon-client
Also disable debug mode and add some explanations.
This commit is contained in:
parent
d7887bd72a
commit
0f404c3a33
|
@ -1,67 +1,23 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
# Purpose {{{
|
||||||
|
## If Xymon server says that a service is in error on a remote host, try to restart this service.
|
||||||
|
## 1. Create a ssh keyring for xymon user {{{
|
||||||
|
# sudo mkdir -p -- /var/lib/xymon/.ssh/
|
||||||
|
# sudo ssh-keygen -f /var/lib/xymon/.ssh/id_rsa -N '' -q
|
||||||
|
# sudo chown -R xymon:xymon /var/lib/xymon/.ssh/
|
||||||
|
## }}}
|
||||||
|
## 2. Remote user {{{
|
||||||
|
# Ensure to have the ${REMOTE_SSH_USER} available on remote hosts and allowed to connect with SSH.
|
||||||
|
# Restrict the SSH access to a single SSH key from the Xymon server IP (~${REMOTE_SSH_USER}/.ssh/authorized_keys) :
|
||||||
|
## from="IP.SRV.XYM.ON" ssh-rsa AAAAA…
|
||||||
|
# Allow sudo commands to restart services (/etc/sudoers.d/xymon-ssh) :
|
||||||
|
## xymon-ssh ALL=(root:root) NOPASSWD: /bin/systemctl restart *
|
||||||
|
## }}}
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
# Vars {{{
|
# Vars {{{
|
||||||
DEBUG=0
|
DEBUG=1
|
||||||
|
|
||||||
BBCOLORLEVEL="red"
|
|
||||||
BBHOSTNAME="HOST.DOMAIN.ORG"
|
|
||||||
BBSVCNAME="procs"
|
|
||||||
BBALPHAMSG="HOST.DOMAIN.ORG:procs red [729848]
|
|
||||||
red Fri Aug 10 15:35:26 CEST 2018 - Processes NOT ok
|
|
||||||
&green systemd-journald (found 1, req. between 1 and 1)
|
|
||||||
&green systemd-logind (found 1, req. between 1 and 1)
|
|
||||||
&green CRON (found 1, req. between 1 and 999)
|
|
||||||
&yellow ATD (found 0, req. between 1 and 999)
|
|
||||||
&green MTA-Stretch (found 1, req. between 1 and 1)
|
|
||||||
&green SSHD (found 3, req. between 1 and 20)
|
|
||||||
&green SSSD (found 1, req. between 1 and 1)
|
|
||||||
&red Fail2Ban (found 0, req. 1 or more)
|
|
||||||
&red tftpd-hpa (found 0, req. between 1 and 1)
|
|
||||||
&yellow ARPwatch (found 0, req. between 1 and 1)
|
|
||||||
|
|
||||||
PID PPID USER STARTED S PRI %CPU TIME %MEM RSZ VSZ CMD
|
|
||||||
1 0 root Aug 02 S 19 0.0 00:00:03 0.5 5492 217412 /sbin/init
|
|
||||||
43 1 root Aug 02 S 19 0.0 00:00:06 2.8 30068 124220 /lib/systemd/systemd-journald
|
|
||||||
89 1 root Aug 02 S 19 0.0 00:00:00 0.1 1500 22560 /sbin/dhclient -4 -v -pf /run/dhclient.eth0.pid -lf /var/lib/dhcp/dhclient.eth0.leases -I -df /var/lib/dhcp/dhclient6.eth0.leases eth0
|
|
||||||
121 1 daemon Aug 02 S 19 0.0 00:00:00 0.1 1636 27968 /usr/sbin/atd -f -l 9.6 -b 98
|
|
||||||
125 1 root Aug 02 S 19 0.0 00:00:01 0.1 1724 321836 /usr/sbin/rsyslogd -n
|
|
||||||
126 1 root Aug 02 S 19 0.0 00:00:00 0.1 1980 29668 /usr/sbin/cron -f
|
|
||||||
137 1 root Aug 02 S 19 0.0 00:00:03 0.3 3328 171944 /usr/sbin/sssd -i -f
|
|
||||||
284 137 root Aug 02 S 19 0.0 00:00:07 0.7 7768 212360 \\_ /usr/lib/x86_64-linux-gnu/sssd/sssd_be --domain ur1 --uid 0 --gid 0 --debug-to-files
|
|
||||||
308 137 root Aug 02 S 19 0.0 00:00:09 0.3 4028 168376 \\_ /usr/lib/x86_64-linux-gnu/sssd/sssd_nss --uid 0 --gid 0 --debug-to-files
|
|
||||||
309 137 root Aug 02 S 19 0.0 00:00:03 0.3 3628 147912 \\_ /usr/lib/x86_64-linux-gnu/sssd/sssd_pam --uid 0 --gid 0 --debug-to-files
|
|
||||||
310 137 root Aug 02 S 19 0.0 00:00:03 0.2 2200 141356 \\_ /usr/lib/x86_64-linux-gnu/sssd/sssd_autofs --uid 0 --gid 0 --debug-to-files
|
|
||||||
198 1 root Aug 02 S 19 0.0 00:00:00 0.2 2288 71988 /usr/sbin/sshd -D
|
|
||||||
594703 198 root 15:34:47 S 19 0.0 00:00:00 0.6 7280 134176 \\_ sshd: USER [priv]
|
|
||||||
594918 594703 USER 15:34:48 S 19 0.0 00:00:00 0.3 3980 134176 \\_ sshd: USER@pts/2
|
|
||||||
594919 594918 USER 15:34:48 S 19 0.0 00:00:00 0.6 6316 62044 \\_ zsh
|
|
||||||
204 1 root Aug 02 S 19 0.0 00:00:00 0.0 892 17076 /usr/sbin/in.tftpd --listen --user tftp --address [::]:69 --secure --port-range 2070:2076 /srv/tftp
|
|
||||||
331 1 root Aug 02 S 19 0.0 00:00:00 0.1 1468 14316 /sbin/agetty -o -p -- \\u --noclear --keep-baud console 115200,38400,9600 linux
|
|
||||||
332 1 root Aug 02 S 19 0.0 00:00:00 0.1 1576 14316 /sbin/agetty -o -p -- \\u --noclear --keep-baud tty1 115200,38400,9600 linux
|
|
||||||
1322 1 root Aug 02 S 19 0.0 00:00:00 0.1 1780 68928 /lib/systemd/systemd --user
|
|
||||||
1324 1322 root Aug 02 S 19 0.0 00:00:00 0.0 96 111572 \\_ (sd-pam)
|
|
||||||
1643 1 arpwatch Aug 02 S 19 0.0 00:00:01 0.3 4084 36084 /usr/sbin/arpwatch -u arpwatch -N -p
|
|
||||||
2105 1 root Aug 02 S 19 0.0 00:00:03 0.3 3484 83252 /usr/lib/postfix/sbin/master -w
|
|
||||||
2107 2105 postfix Aug 02 S 19 0.0 00:00:00 0.5 5704 95656 \\_ qmgr -l -t unix -u
|
|
||||||
4516 2105 postfix Aug 02 S 19 0.0 00:00:00 0.6 6396 99844 \\_ tlsmgr -l -t unix -u -c
|
|
||||||
592131 2105 postfix 14:14:06 S 19 0.0 00:00:00 0.6 6548 95480 \\_ pickup -l -t unix -u -c
|
|
||||||
594683 2105 postfix 15:32:49 S 19 0.0 00:00:00 0.6 6420 95480 \\_ showq -t unix -u -c
|
|
||||||
2206 1 message+ Aug 02 S 19 0.0 00:00:00 0.1 1068 67636 /usr/bin/dbus-daemon --system --address=systemd: --nofork --nopidfile --systemd-activation
|
|
||||||
2207 1 root Aug 02 S 19 0.0 00:00:00 0.3 3808 73508 /lib/systemd/systemd-logind
|
|
||||||
7896 1 USER Aug 02 S 19 0.0 00:00:00 0.1 1820 68932 /lib/systemd/systemd --user
|
|
||||||
7898 7896 USER Aug 02 S 19 0.0 00:00:00 0.0 256 132260 \\_ (sd-pam)
|
|
||||||
163996 1 netdata Aug 03 S 19 0.8 01:30:54 7.2 75652 229332 /usr/sbin/netdata -D
|
|
||||||
164014 163996 netdata Aug 03 S 19 0.4 00:46:51 0.7 8136 113012 \\_ /usr/bin/python /usr/lib/x86_64-linux-gnu/netdata/plugins.d/python.d.plugin 1
|
|
||||||
587208 163996 netdata 11:51:34 R 19 0.3 00:00:37 0.2 2632 75012 \\_ /usr/lib/x86_64-linux-gnu/netdata/plugins.d/apps.plugin 1
|
|
||||||
594189 163996 netdata 15:17:36 S 19 0.0 00:00:01 0.2 2688 9696 \\_ bash /usr/lib/x86_64-linux-gnu/netdata/plugins.d/tc-qos-helper.sh 1
|
|
||||||
594627 1 xymon 15:30:50 S 19 0.0 00:00:00 0.0 724 4292 sh -c vmstat 300 2 1>/var/lib/xymon/tmp/xymon_vmstat.HOST.DOMAIN.ORG.594578 2>&1; mv /var/lib/xymon/tmp/xymon_vmstat.HOST.DOMAIN.ORG.594578 /var/lib/xymon/tmp/xymon_vmstat.HOST.DOMAIN.ORG
|
|
||||||
594629 594627 xymon 15:30:50 S 19 0.0 00:00:00 0.1 1376 24900 \\_ vmstat 300 2
|
|
||||||
595038 1 xymon 15:35:26 S 19 0.0 00:00:00 0.1 1572 4272 /usr/lib/xymon/client/bin/xymonlaunch --config=/etc/xymon/clientlaunch.cfg --log=/var/log/xymon/clientlaunch.log --pidfile=/var/run/xymon/clientlaunch.pid
|
|
||||||
595043 595038 xymon 15:35:26 S 19 0.0 00:00:00 0.1 1432 4292 \\_ /bin/sh /usr/lib/xymon/client/bin/xymonclient.sh
|
|
||||||
595065 595043 xymon 15:35:26 S 19 0.0 00:00:00 0.1 1584 4292 | \\_ /bin/sh /usr/lib/xymon/client/bin/xymonclient-linux.sh
|
|
||||||
595107 595065 xymon 15:35:26 R 19 0.0 00:00:00 0.2 2784 44404 | \\_ ps -Aww f -o pid,ppid,user,start,state,pri,pcpu,time:12,pmem,rsz:10,vsz:10,cmd
|
|
||||||
595044 595038 xymon 15:35:26 S 19 0.0 00:00:00 0.4 5188 18300 \\_ /usr/bin/perl -w /usr/lib/xymon/client/ext/apt
|
|
||||||
595081 595044 xymon 15:35:26 R 19 0.0 00:00:00 3.9 40956 70040 \\_ apt-cache policy acl adduser apt apt-listchanges apt-transport-https apt-utils aptitude aptitude-common arpwatch at base-files base-passwd bash bash-completion bind9-host binutils bsd-mailx bsdmainutils bsdutils bzip2 ca-certificates coreutils cpio cpp cpp-6 cracklib-runtime cron curl dash dbus dctrl-tools debconf debconf-i18n debian-archive-keyring debian-faq debian-goodies debianutils debsecan debsums dh-python diffutils dirmngr distro-info-data dmidecode dmsetup doc-debian dpkg e2fslibs:amd64 e2fsprogs ed etckeeper fail2ban file findutils fontconfig-config fonts-dejavu-core fonts-font-awesome fping gcc-6-base:amd64 gettext-base git git-man gnupg gnupg-agent gnutls-bin gpgv grep groff-base gzip hobbit-plugins hostname htop iftop ifupdown init init-system-helpers DOMAINoute2 iputils-ping isc-dhcp-client isc-dhcp-common kmod krb5-locales less libacl1:amd64 libapparmor1:amd64 libapt-inst2.0:amd64 libapt-pkg5.0:amd64 libasprintf0v5:amd64 libassuan0:amd64 libattr1:amd64 libaudit-common libaudit1:amd64 libavahi-client3:amd64 libavahi-common-data:amd64 libavahi-common3:amd64 libbasicobjects0:amd64 libbind9-140:amd64 libblkid1:amd64 libboost-filesystem1.62.0:amd64 libboost-iostreams1.62.0:amd64 libboost-system1.62.0:amd64 libbsd0:amd64 libbz2-1.0:amd64 libc-ares2:amd64 libc-bin libc-l10n libc6:amd64 libcap-ng0:amd64 libcap2-bin libcap2:amd64 libclass-isa-perl libcollection4:amd64 libcomerr2:amd64 libcrack2:amd64 libcryptsetup4:amd64 libcups2:amd64 libcurl3-gnutls:amd64 libcurl3:amd64 libcwidget3v5:amd64 libdb5.3:amd64 libdbus-1-3:amd64 libdebconfclient0:amd64 libdevmapper1.02.1:amd64 libdhash1:amd64 libdns-export162 libdns162:amd64 libdpkg-perl libdrm2:amd64 libedit2:amd64 libelf1:amd64 liberror-perl libestr0 libev4 libevent-2.0-5:amd64 libexpat1:amd64 libfas"
|
|
||||||
|
|
||||||
temp_dir=$(mktemp -d -t xymon-procs-alert-XXXXXX.tmp)
|
temp_dir=$(mktemp -d -t xymon-procs-alert-XXXXXX.tmp)
|
||||||
debug_stdout="${temp_dir}/debug.stdout"
|
debug_stdout="${temp_dir}/debug.stdout"
|
||||||
|
@ -108,7 +64,22 @@ if [ -s "${service_list}" ]; then
|
||||||
fi
|
fi
|
||||||
## }}}
|
## }}}
|
||||||
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while process loop — Found ${process_found} process(es) for ${service_name} service and require between ${process_min} and ${process_max}." >> "${debug_stdout}"
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while process loop — Found ${process_found} process(es) for ${service_name} service and require between ${process_min} and ${process_max}." >> "${debug_stdout}"
|
||||||
|
# Restart service if needed {{{
|
||||||
|
if [ "${process_found}" ] && [ "${process_min}" ] && [ "${process_found}" -lt "${process_min}" ]; then
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while process loop — ${service_name} need to be restarted." >> "${debug_stdout}"
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while process loop — ssh -n -o StrictHostKeyChecking=no ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart ${service_name}.service" >> "${debug_stdout}"
|
||||||
|
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart ${service_name}.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
|
||||||
|
else
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while process loop — ${service_name} service is not managed." >> "${debug_stdout}"
|
||||||
|
fi
|
||||||
|
# }}}
|
||||||
done < "${service_list}"
|
done < "${service_list}"
|
||||||
|
|
||||||
|
# Also restart xymon-client service {{{
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : process list — xymon-client also need to be restarted." >> "${debug_stdout}"
|
||||||
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : process list — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart xymon-client.service" >> "${debug_stdout}"
|
||||||
|
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart xymon-client.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
|
||||||
|
# }}}
|
||||||
else
|
else
|
||||||
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : process list — No error on any process." >> "${debug_stdout}"
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : process list — No error on any process." >> "${debug_stdout}"
|
||||||
fi
|
fi
|
||||||
|
|
Loading…
Reference in New Issue