Add a script to restart service after a xymon err
Only for libs probe.
This commit is contained in:
parent
76b628c04d
commit
c642749986
|
@ -0,0 +1,30 @@
|
|||
BBCOLORLEVEL: yellow
|
||||
BBALPHAMSG: HOST.DOMAIN.ORG:libs yellow [0]
|
||||
yellow Thu Aug 16 16:41:56 2018 - libs NOT ok
|
||||
&yellow Machine should be rebooted. Running not the newest installed kernel:
|
||||
|
||||
Running kernel: 4.9.0-7-amd64, version #1 SMP Debian 4.9.110-3+deb9u1 (2018-08-03)
|
||||
Newest installed kernel: 4.9.0-7-amd64, version #1 SMP Debian 4.9.110-3+deb9u2 (2018-08-13)
|
||||
|
||||
&yellow The following processes have libs linked that were upgraded:
|
||||
|
||||
root:
|
||||
/lib/systemd/systemd-udevd (546)
|
||||
systemd-timesync:
|
||||
/usr/lib/postfix/qmgr (52880)
|
||||
|
||||
|
||||
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=libs
|
||||
|
||||
ACKCODE: 0
|
||||
RCPT: 1234567890
|
||||
BBHOSTNAME: HOST.DOMAIN.ORG
|
||||
MACHIP: 129020027106
|
||||
BBSVCNAME: libs
|
||||
BBSVCNUM: 0
|
||||
BBHOSTSVC: HOST.DOMAIN.ORG.libs
|
||||
BBHOSTSVCCOMMAS: HOST.DOMAIN.ORG.libs
|
||||
BBNUMERIC: 0001290200271060
|
||||
RECOVERED: 0
|
||||
DOWNSECS: 36320
|
||||
DOWNSECSMSG:
|
|
@ -0,0 +1,113 @@
|
|||
#!/bin/sh
|
||||
# Purpose {{{
|
||||
## If Xymon server says that a service is in error on a remote host, try to restart this service.
|
||||
## 1. Create a ssh keyring for xymon user {{{
|
||||
# sudo mkdir -p -- /var/lib/xymon/.ssh/
|
||||
# sudo ssh-keygen -f /var/lib/xymon/.ssh/id_rsa -N '' -q
|
||||
# sudo chown -R xymon:xymon /var/lib/xymon/.ssh/
|
||||
## }}}
|
||||
## 2. Remote user {{{
|
||||
# Ensure to have the ${REMOTE_SSH_USER} available on remote hosts and allowed to connect with SSH.
|
||||
# Restrict the SSH access to a single SSH key from the Xymon server IP (~${REMOTE_SSH_USER}/.ssh/authorized_keys) :
|
||||
## from="IP.SRV.XYM.ON" ssh-rsa AAAAA…
|
||||
# Allow sudo commands to restart services (/etc/sudoers.d/xymon-ssh) :
|
||||
## xymon-ssh ALL=(root:root) NOPASSWD: /bin/systemctl restart *
|
||||
## }}}
|
||||
|
||||
# }}}
|
||||
# Vars {{{
|
||||
DEBUG=0
|
||||
|
||||
LOCAL_SSH_USER="xymon"
|
||||
REMOTE_SSH_USER="xymon-ssh"
|
||||
|
||||
BBHOSTNAME="HOST.DOMAIN.ORG"
|
||||
BBSVCNAME="libs"
|
||||
BBALPHAMSG="HOST.DOMAIN.ORG:libs yellow [0]
|
||||
yellow Thu Aug 16 16:41:56 2018 - libs NOT ok
|
||||
&yellow Machine should be rebooted. Running not the newest installed kernel:
|
||||
|
||||
Running kernel: 4.9.0-7-amd64, version #1 SMP Debian 4.9.110-3+deb9u1 (2018-08-03)
|
||||
Newest installed kernel: 4.9.0-7-amd64, version #1 SMP Debian 4.9.110-3+deb9u2 (2018-08-13)
|
||||
|
||||
&yellow The following processes have libs linked that were upgraded:
|
||||
|
||||
root:
|
||||
/lib/systemd/systemd-udevd (546)
|
||||
systemd-timesync:
|
||||
/usr/lib/postfix/qmgr (52880)
|
||||
|
||||
|
||||
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=libs"
|
||||
|
||||
temp_dir=$(mktemp -d -t xymon-libs-alert-XXXXXX.tmp)
|
||||
service_list="${temp_dir}/services.error.list"
|
||||
# }}}
|
||||
|
||||
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${BBHOSTNAME} — ${BBSVCNAME} error"
|
||||
|
||||
# Temp dir for this alert
|
||||
if [ ! -d "${temp_dir}" ] ; then
|
||||
mkdir -p -- "${temp_dir}"
|
||||
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${temp_dir} — created."
|
||||
fi
|
||||
|
||||
# Check if host need to reboot
|
||||
if echo "${BBALPHAMSG}" | grep -qE "\\&yellow Machine should be reboot.*" ; then
|
||||
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test kernel — The host need to be rebooted."
|
||||
fi
|
||||
|
||||
# Check if a service need to restart
|
||||
if echo "${BBALPHAMSG}" | grep -qE "\\&yellow The following processes.*" ; then
|
||||
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — Some services need to be restarted."
|
||||
# Get the list of binaries path
|
||||
echo "${BBALPHAMSG}" | sed -n 's/^ \(\/.*\) (.*)/\1/p' > "${service_list}"
|
||||
|
||||
while IFS= read -r bin_path; do
|
||||
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ${bin_path} use old libs."
|
||||
case "${bin_path}" in
|
||||
# Arpwatch
|
||||
'/usr/sbin/arpwatch' )
|
||||
process="arpwatch"
|
||||
;;
|
||||
# Dbus
|
||||
'/usr/bin/dbus-daemon' )
|
||||
process="dbus"
|
||||
;;
|
||||
# Postfix
|
||||
'/usr/lib/postfix/sbin/pickup' | '/usr/lib/postfix/qmgr' | '/usr/lib/postfix/sbin/tlsmgr' )
|
||||
process="postfix"
|
||||
;;
|
||||
# Nginx
|
||||
'/usr/sbin/nginx' )
|
||||
process="nginx"
|
||||
;;
|
||||
# Ntp
|
||||
'/usr/sbin/ntpd' )
|
||||
process="ntp"
|
||||
;;
|
||||
# Systemd-journald
|
||||
'/lib/systemd/systemd-journald' )
|
||||
process="systemd-journald"
|
||||
;;
|
||||
# Systemd-logind
|
||||
'/lib/systemd/systemd-logind' )
|
||||
process="systemd-logind"
|
||||
;;
|
||||
# default
|
||||
* )
|
||||
process="NOT.MANAGED"
|
||||
;;
|
||||
esac
|
||||
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ${process} need to be restarted."
|
||||
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ssh -o StrictHostKeyChecking=no ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart ${process}.service"
|
||||
done < "${service_list}"
|
||||
|
||||
# Also restart xymon-client service
|
||||
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — xymon-client also need to be restarted."
|
||||
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — ssh -o StrictHostKeyChecking=no ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart xymon-client.service"
|
||||
fi
|
||||
|
||||
rm -rf -- "${temp_dir}"
|
||||
|
||||
exit 0
|
Loading…
Reference in New Issue