From c6427499869924768104af6b3722e1950737dfd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gardais=20J=C3=A9r=C3=A9my?= Date: Fri, 17 Aug 2018 10:45:48 +0200 Subject: [PATCH] Add a script to restart service after a xymon err Only for libs probe. --- xymon/sample.xymon.libs.alert | 30 +++++++++ xymon/xymon.libs.alert.sh | 113 ++++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 xymon/sample.xymon.libs.alert create mode 100755 xymon/xymon.libs.alert.sh diff --git a/xymon/sample.xymon.libs.alert b/xymon/sample.xymon.libs.alert new file mode 100644 index 0000000..8e2c103 --- /dev/null +++ b/xymon/sample.xymon.libs.alert @@ -0,0 +1,30 @@ +BBCOLORLEVEL: yellow +BBALPHAMSG: HOST.DOMAIN.ORG:libs yellow [0] +yellow Thu Aug 16 16:41:56 2018 - libs NOT ok +&yellow Machine should be rebooted. Running not the newest installed kernel: + + Running kernel: 4.9.0-7-amd64, version #1 SMP Debian 4.9.110-3+deb9u1 (2018-08-03) + Newest installed kernel: 4.9.0-7-amd64, version #1 SMP Debian 4.9.110-3+deb9u2 (2018-08-13) + +&yellow The following processes have libs linked that were upgraded: + +root: + /lib/systemd/systemd-udevd (546) +systemd-timesync: + /usr/lib/postfix/qmgr (52880) + + +See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=libs + +ACKCODE: 0 +RCPT: 1234567890 +BBHOSTNAME: HOST.DOMAIN.ORG +MACHIP: 129020027106 +BBSVCNAME: libs +BBSVCNUM: 0 +BBHOSTSVC: HOST.DOMAIN.ORG.libs +BBHOSTSVCCOMMAS: HOST.DOMAIN.ORG.libs +BBNUMERIC: 0001290200271060 +RECOVERED: 0 +DOWNSECS: 36320 +DOWNSECSMSG: diff --git a/xymon/xymon.libs.alert.sh b/xymon/xymon.libs.alert.sh new file mode 100755 index 0000000..373a1f1 --- /dev/null +++ b/xymon/xymon.libs.alert.sh @@ -0,0 +1,113 @@ +#!/bin/sh +# Purpose {{{ +## If Xymon server says that a service is in error on a remote host, try to restart this service. +## 1. Create a ssh keyring for xymon user {{{ +# sudo mkdir -p -- /var/lib/xymon/.ssh/ +# sudo ssh-keygen -f /var/lib/xymon/.ssh/id_rsa -N '' -q +# sudo chown -R xymon:xymon /var/lib/xymon/.ssh/ +## }}} +## 2. Remote user {{{ +# Ensure to have the ${REMOTE_SSH_USER} available on remote hosts and allowed to connect with SSH. +# Restrict the SSH access to a single SSH key from the Xymon server IP (~${REMOTE_SSH_USER}/.ssh/authorized_keys) : +## from="IP.SRV.XYM.ON" ssh-rsa AAAAA… +# Allow sudo commands to restart services (/etc/sudoers.d/xymon-ssh) : +## xymon-ssh ALL=(root:root) NOPASSWD: /bin/systemctl restart * +## }}} + +# }}} +# Vars {{{ +DEBUG=0 + +LOCAL_SSH_USER="xymon" +REMOTE_SSH_USER="xymon-ssh" + +BBHOSTNAME="HOST.DOMAIN.ORG" +BBSVCNAME="libs" +BBALPHAMSG="HOST.DOMAIN.ORG:libs yellow [0] +yellow Thu Aug 16 16:41:56 2018 - libs NOT ok +&yellow Machine should be rebooted. Running not the newest installed kernel: + + Running kernel: 4.9.0-7-amd64, version #1 SMP Debian 4.9.110-3+deb9u1 (2018-08-03) + Newest installed kernel: 4.9.0-7-amd64, version #1 SMP Debian 4.9.110-3+deb9u2 (2018-08-13) + +&yellow The following processes have libs linked that were upgraded: + +root: + /lib/systemd/systemd-udevd (546) +systemd-timesync: + /usr/lib/postfix/qmgr (52880) + + +See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=libs" + +temp_dir=$(mktemp -d -t xymon-libs-alert-XXXXXX.tmp) +service_list="${temp_dir}/services.error.list" +# }}} + +[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${BBHOSTNAME} — ${BBSVCNAME} error" + +# Temp dir for this alert +if [ ! -d "${temp_dir}" ] ; then + mkdir -p -- "${temp_dir}" + [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${temp_dir} — created." +fi + +# Check if host need to reboot +if echo "${BBALPHAMSG}" | grep -qE "\\&yellow Machine should be reboot.*" ; then + [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test kernel — The host need to be rebooted." +fi + +# Check if a service need to restart +if echo "${BBALPHAMSG}" | grep -qE "\\&yellow The following processes.*" ; then + [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — Some services need to be restarted." + # Get the list of binaries path + echo "${BBALPHAMSG}" | sed -n 's/^ \(\/.*\) (.*)/\1/p' > "${service_list}" + + while IFS= read -r bin_path; do + [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ${bin_path} use old libs." + case "${bin_path}" in + # Arpwatch + '/usr/sbin/arpwatch' ) + process="arpwatch" + ;; + # Dbus + '/usr/bin/dbus-daemon' ) + process="dbus" + ;; + # Postfix + '/usr/lib/postfix/sbin/pickup' | '/usr/lib/postfix/qmgr' | '/usr/lib/postfix/sbin/tlsmgr' ) + process="postfix" + ;; + # Nginx + '/usr/sbin/nginx' ) + process="nginx" + ;; + # Ntp + '/usr/sbin/ntpd' ) + process="ntp" + ;; + # Systemd-journald + '/lib/systemd/systemd-journald' ) + process="systemd-journald" + ;; + # Systemd-logind + '/lib/systemd/systemd-logind' ) + process="systemd-logind" + ;; + # default + * ) + process="NOT.MANAGED" + ;; + esac + [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ${process} need to be restarted." + [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ssh -o StrictHostKeyChecking=no ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart ${process}.service" + done < "${service_list}" + + # Also restart xymon-client service + [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — xymon-client also need to be restarted." + [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — ssh -o StrictHostKeyChecking=no ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart xymon-client.service" +fi + +rm -rf -- "${temp_dir}" + +exit 0