2018-08-17 10:45:48 +02:00
|
|
|
|
#!/bin/sh
|
|
|
|
|
# Purpose {{{
|
|
|
|
|
## If Xymon server says that a service is in error on a remote host, try to restart this service.
|
|
|
|
|
## 1. Create a ssh keyring for xymon user {{{
|
|
|
|
|
# sudo mkdir -p -- /var/lib/xymon/.ssh/
|
|
|
|
|
# sudo ssh-keygen -f /var/lib/xymon/.ssh/id_rsa -N '' -q
|
|
|
|
|
# sudo chown -R xymon:xymon /var/lib/xymon/.ssh/
|
|
|
|
|
## }}}
|
|
|
|
|
## 2. Remote user {{{
|
|
|
|
|
# Ensure to have the ${REMOTE_SSH_USER} available on remote hosts and allowed to connect with SSH.
|
|
|
|
|
# Restrict the SSH access to a single SSH key from the Xymon server IP (~${REMOTE_SSH_USER}/.ssh/authorized_keys) :
|
|
|
|
|
## from="IP.SRV.XYM.ON" ssh-rsa AAAAA…
|
|
|
|
|
# Allow sudo commands to restart services (/etc/sudoers.d/xymon-ssh) :
|
|
|
|
|
## xymon-ssh ALL=(root:root) NOPASSWD: /bin/systemctl restart *
|
|
|
|
|
## }}}
|
|
|
|
|
|
|
|
|
|
# }}}
|
|
|
|
|
# Vars {{{
|
2018-08-17 15:33:46 +02:00
|
|
|
|
DEBUG=1
|
2018-08-17 10:45:48 +02:00
|
|
|
|
|
|
|
|
|
LOCAL_SSH_USER="xymon"
|
|
|
|
|
REMOTE_SSH_USER="xymon-ssh"
|
|
|
|
|
|
|
|
|
|
temp_dir=$(mktemp -d -t xymon-libs-alert-XXXXXX.tmp)
|
2018-08-17 10:58:07 +02:00
|
|
|
|
debug_stdout="${temp_dir}/debug.stdout"
|
2018-08-17 11:49:40 +02:00
|
|
|
|
debug_stderr="${temp_dir}/debug.stderr"
|
2018-08-17 10:45:48 +02:00
|
|
|
|
service_list="${temp_dir}/services.error.list"
|
|
|
|
|
# }}}
|
|
|
|
|
|
2018-08-21 08:48:44 +02:00
|
|
|
|
# Create log files
|
|
|
|
|
touch "${debug_stdout}" "${debug_stderr}"
|
2018-08-17 10:45:48 +02:00
|
|
|
|
|
2018-08-17 11:42:38 +02:00
|
|
|
|
# Manage only libs probe {{{
|
2018-08-17 11:46:03 +02:00
|
|
|
|
if [ "${BBSVCNAME}" = "libs" ]; then
|
2018-08-21 08:48:44 +02:00
|
|
|
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${BBHOSTNAME} — ${BBSVCNAME} error" >> "${debug_stdout}"
|
2018-08-17 11:46:03 +02:00
|
|
|
|
else
|
2018-08-21 08:48:44 +02:00
|
|
|
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${BBHOSTNAME} — ${BBSVCNAME} probe is not managed." >> "${debug_stderr}"
|
2018-08-17 11:42:38 +02:00
|
|
|
|
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
|
|
|
|
|
exit 0
|
|
|
|
|
fi
|
|
|
|
|
# }}}
|
|
|
|
|
|
2018-08-17 11:06:21 +02:00
|
|
|
|
# Check if host need to reboot {{{
|
2018-08-17 10:45:48 +02:00
|
|
|
|
if echo "${BBALPHAMSG}" | grep -qE "\\&yellow Machine should be reboot.*" ; then
|
2018-08-17 10:58:07 +02:00
|
|
|
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test kernel — The host need to be rebooted." >> "${debug_stdout}"
|
2018-08-17 10:45:48 +02:00
|
|
|
|
fi
|
2018-08-17 11:06:21 +02:00
|
|
|
|
# }}}
|
2018-08-17 10:45:48 +02:00
|
|
|
|
|
|
|
|
|
# Check if a service need to restart
|
|
|
|
|
if echo "${BBALPHAMSG}" | grep -qE "\\&yellow The following processes.*" ; then
|
2018-08-17 10:58:07 +02:00
|
|
|
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — Some services need to be restarted." >> "${debug_stdout}"
|
2018-08-17 10:45:48 +02:00
|
|
|
|
# Get the list of binaries path
|
|
|
|
|
echo "${BBALPHAMSG}" | sed -n 's/^ \(\/.*\) (.*)/\1/p' > "${service_list}"
|
|
|
|
|
|
|
|
|
|
while IFS= read -r bin_path; do
|
2018-08-17 10:58:07 +02:00
|
|
|
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ${bin_path} use old libs." >> "${debug_stdout}"
|
|
|
|
|
# Match binaries path and services name {{{
|
2018-08-17 10:45:48 +02:00
|
|
|
|
case "${bin_path}" in
|
2018-08-17 14:03:43 +02:00
|
|
|
|
# Apache2
|
|
|
|
|
'/usr/sbin/apache2' )
|
|
|
|
|
service_name="apache2"
|
|
|
|
|
;;
|
2018-08-17 10:45:48 +02:00
|
|
|
|
# Arpwatch
|
|
|
|
|
'/usr/sbin/arpwatch' )
|
2018-08-17 11:06:21 +02:00
|
|
|
|
service_name="arpwatch"
|
2018-08-17 10:45:48 +02:00
|
|
|
|
;;
|
|
|
|
|
# Dbus
|
|
|
|
|
'/usr/bin/dbus-daemon' )
|
2018-08-17 11:06:21 +02:00
|
|
|
|
service_name="dbus"
|
2018-08-17 10:45:48 +02:00
|
|
|
|
;;
|
2019-02-19 07:35:53 +01:00
|
|
|
|
# lvmetad − LVM metadata cache daemon
|
|
|
|
|
'/sbin/lvmetad' )
|
|
|
|
|
service_name="lvm2-lvmetad"
|
2018-08-17 10:45:48 +02:00
|
|
|
|
;;
|
2018-08-17 11:29:55 +02:00
|
|
|
|
# Netdata
|
|
|
|
|
'/usr/sbin/netdata' )
|
|
|
|
|
service_name="netdata"
|
|
|
|
|
;;
|
2019-02-19 07:39:03 +01:00
|
|
|
|
# blkmapd − pNFS block layout mapping daemon
|
|
|
|
|
'/usr/sbin/blkmapd' )
|
|
|
|
|
service_name="nfs-utils"
|
|
|
|
|
;;
|
2019-12-30 11:35:24 +01:00
|
|
|
|
# Mumble-server
|
|
|
|
|
'/usr/sbin/murmurd' )
|
|
|
|
|
service_name="mumble-server"
|
|
|
|
|
;;
|
2019-02-19 07:39:03 +01:00
|
|
|
|
# Nginx
|
|
|
|
|
'/usr/sbin/nginx' )
|
|
|
|
|
service_name="nginx"
|
|
|
|
|
;;
|
2018-08-17 10:45:48 +02:00
|
|
|
|
# Ntp
|
|
|
|
|
'/usr/sbin/ntpd' )
|
2018-08-17 11:06:21 +02:00
|
|
|
|
service_name="ntp"
|
2018-08-17 10:45:48 +02:00
|
|
|
|
;;
|
2019-12-21 08:47:45 +01:00
|
|
|
|
# Nslcd
|
2020-04-29 08:44:40 +02:00
|
|
|
|
'/usr/sbin/nslcd' )
|
|
|
|
|
service_name="nslcd"
|
2019-12-21 08:47:45 +01:00
|
|
|
|
;;
|
2019-08-28 11:29:30 +02:00
|
|
|
|
# PHP-FPM 7.0
|
2018-09-06 09:13:10 +02:00
|
|
|
|
'/usr/sbin/php-fpm7.0' )
|
|
|
|
|
service_name="php7.0-fpm"
|
|
|
|
|
;;
|
2019-08-28 11:29:30 +02:00
|
|
|
|
# PHP-FPM 7.3
|
|
|
|
|
'/usr/sbin/php-fpm7.3' )
|
|
|
|
|
service_name="php7.3-fpm"
|
|
|
|
|
;;
|
2019-02-19 07:35:53 +01:00
|
|
|
|
# Postfix
|
|
|
|
|
'/usr/lib/postfix/sbin/pickup' | '/usr/lib/postfix/qmgr' | '/usr/lib/postfix/sbin/tlsmgr' | '/usr/lib/postfix/sbin/qmgr' )
|
|
|
|
|
service_name="postfix"
|
|
|
|
|
;;
|
2019-08-26 11:51:49 +02:00
|
|
|
|
# Rdnssd
|
|
|
|
|
'/sbin/rdnssd' )
|
|
|
|
|
service_name="rdnssd"
|
|
|
|
|
;;
|
2018-08-17 10:45:48 +02:00
|
|
|
|
# Systemd-journald
|
|
|
|
|
'/lib/systemd/systemd-journald' )
|
2018-08-17 11:06:21 +02:00
|
|
|
|
service_name="systemd-journald"
|
2018-08-17 10:45:48 +02:00
|
|
|
|
;;
|
2018-08-17 11:06:21 +02:00
|
|
|
|
# Systemd-logind
|
2018-08-17 10:45:48 +02:00
|
|
|
|
'/lib/systemd/systemd-logind' )
|
2018-08-17 11:06:21 +02:00
|
|
|
|
service_name="systemd-logind"
|
2018-08-17 10:45:48 +02:00
|
|
|
|
;;
|
|
|
|
|
# default
|
|
|
|
|
* )
|
2018-08-17 11:06:21 +02:00
|
|
|
|
service_name="NOT.MANAGED"
|
2018-08-17 10:45:48 +02:00
|
|
|
|
;;
|
|
|
|
|
esac
|
2018-08-17 10:58:07 +02:00
|
|
|
|
# }}}
|
|
|
|
|
|
2018-08-21 09:36:00 +02:00
|
|
|
|
# Restart service if needed {{{
|
|
|
|
|
if [ "${service_name}" != "NOT.MANAGED" ] && [ "${service_name}" != "${previous_service_name}" ]; then
|
2018-08-17 11:06:21 +02:00
|
|
|
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ${service_name} need to be restarted." >> "${debug_stdout}"
|
2018-08-21 09:46:48 +02:00
|
|
|
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ssh -n -o StrictHostKeyChecking=no ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart ${service_name}.service" >> "${debug_stdout}"
|
2018-08-21 16:38:31 +02:00
|
|
|
|
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart ${service_name}.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
|
2018-08-21 09:36:00 +02:00
|
|
|
|
previous_service_name="${service_name}"
|
|
|
|
|
else
|
|
|
|
|
if [ "${service_name}" = "NOT.MANAGED" ]; then
|
|
|
|
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — service for ${bin_path} is not managed." >> "${debug_stdout}"
|
|
|
|
|
else
|
|
|
|
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ${service_name} was already restarted." >> "${debug_stdout}"
|
|
|
|
|
fi
|
2018-08-17 11:06:21 +02:00
|
|
|
|
fi
|
|
|
|
|
# }}}
|
2018-08-17 10:45:48 +02:00
|
|
|
|
done < "${service_list}"
|
|
|
|
|
|
2018-08-17 11:06:21 +02:00
|
|
|
|
# Also restart xymon-client service {{{
|
2018-08-17 10:58:07 +02:00
|
|
|
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — xymon-client also need to be restarted." >> "${debug_stdout}"
|
2018-08-21 09:46:48 +02:00
|
|
|
|
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart xymon-client.service" >> "${debug_stdout}"
|
2018-08-21 16:38:31 +02:00
|
|
|
|
ssh -n -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "${REMOTE_SSH_USER}"@"${BBHOSTNAME}" "sudo systemctl restart xymon-client.service" >> "${debug_stdout}" 2>> "${debug_stderr}"
|
2018-08-17 11:06:21 +02:00
|
|
|
|
# }}}
|
2018-08-17 10:45:48 +02:00
|
|
|
|
fi
|
|
|
|
|
|
2018-08-21 08:48:44 +02:00
|
|
|
|
# Remove empty error file
|
|
|
|
|
[ -s "${debug_stderr}" ] || rm -f "${debug_stderr}"
|
|
|
|
|
|
|
|
|
|
# Remove temp_dir if DEBUG is disable
|
2018-08-17 10:58:07 +02:00
|
|
|
|
[ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}"
|
2018-08-17 10:45:48 +02:00
|
|
|
|
|
|
|
|
|
exit 0
|