scripts/xymon/xymon.libs.alert.sh

117 lines
4.1 KiB
Bash
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/sh
# Purpose {{{
## If Xymon server says that a service is in error on a remote host, try to restart this service.
## 1. Create a ssh keyring for xymon user {{{
# sudo mkdir -p -- /var/lib/xymon/.ssh/
# sudo ssh-keygen -f /var/lib/xymon/.ssh/id_rsa -N '' -q
# sudo chown -R xymon:xymon /var/lib/xymon/.ssh/
## }}}
## 2. Remote user {{{
# Ensure to have the ${REMOTE_SSH_USER} available on remote hosts and allowed to connect with SSH.
# Restrict the SSH access to a single SSH key from the Xymon server IP (~${REMOTE_SSH_USER}/.ssh/authorized_keys):
## from="IP.SRV.XYM.ON" ssh-rsa AAAAA…
# Allow sudo commands to restart services (/etc/sudoers.d/xymon-ssh):
## xymon-ssh ALL=(root:root) NOPASSWD: /bin/systemctl restart *
## }}}
# }}}
# Vars {{{
DEBUG=0
LOCAL_SSH_USER="xymon"
REMOTE_SSH_USER="xymon-ssh"
BBHOSTNAME="HOST.DOMAIN.ORG"
BBSVCNAME="libs"
BBALPHAMSG="HOST.DOMAIN.ORG:libs yellow [0]
yellow Thu Aug 16 16:41:56 2018 - libs NOT ok
&yellow Machine should be rebooted. Running not the newest installed kernel:
Running kernel: 4.9.0-7-amd64, version #1 SMP Debian 4.9.110-3+deb9u1 (2018-08-03)
Newest installed kernel: 4.9.0-7-amd64, version #1 SMP Debian 4.9.110-3+deb9u2 (2018-08-13)
&yellow The following processes have libs linked that were upgraded:
root:
/lib/systemd/systemd-udevd (546)
systemd-timesync:
/usr/lib/postfix/qmgr (52880)
See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=libs"
temp_dir=$(mktemp -d -t xymon-libs-alert-XXXXXX.tmp)
debug_stdout="${temp_dir}/debug.stdout"
service_list="${temp_dir}/services.error.list"
# }}}
# Temp dir for this alert
if [ ! -d "${temp_dir}" ] ; then
mkdir -p -- "${temp_dir}"
fi
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: ${BBHOSTNAME}${BBSVCNAME} error" > "${debug_stdout}"
# Check if host need to reboot
if echo "${BBALPHAMSG}" | grep -qE "\\&yellow Machine should be reboot.*" ; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test kernel — The host need to be rebooted." >> "${debug_stdout}"
fi
# Check if a service need to restart
if echo "${BBALPHAMSG}" | grep -qE "\\&yellow The following processes.*" ; then
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test service — Some services need to be restarted." >> "${debug_stdout}"
# Get the list of binaries path
echo "${BBALPHAMSG}" | sed -n 's/^ \(\/.*\) (.*)/\1/p' > "${service_list}"
while IFS= read -r bin_path; do
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while bin_path loop — ${bin_path} use old libs." >> "${debug_stdout}"
# Match binaries path and services name {{{
case "${bin_path}" in
# Arpwatch
'/usr/sbin/arpwatch' )
process="arpwatch"
;;
# Dbus
'/usr/bin/dbus-daemon' )
process="dbus"
;;
# Postfix
'/usr/lib/postfix/sbin/pickup' | '/usr/lib/postfix/qmgr' | '/usr/lib/postfix/sbin/tlsmgr' )
process="postfix"
;;
# Nginx
'/usr/sbin/nginx' )
process="nginx"
;;
# Ntp
'/usr/sbin/ntpd' )
process="ntp"
;;
# Systemd-journald
'/lib/systemd/systemd-journald' )
process="systemd-journald"
;;
# Systemd-logind
'/lib/systemd/systemd-logind' )
process="systemd-logind"
;;
# default
* )
process="NOT.MANAGED"
;;
esac
# }}}
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while bin_path loop — ${process} need to be restarted." >> "${debug_stdout}"
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: while bin_path loop — ssh -o StrictHostKeyChecking=no ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart ${process}.service" >> "${debug_stdout}"
done < "${service_list}"
# Also restart xymon-client service
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test service — xymon-client also need to be restarted." >> "${debug_stdout}"
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG: Test service — ssh -o StrictHostKeyChecking=no ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart xymon-client.service" >> "${debug_stdout}"
fi
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
exit 0