#!/bin/sh # Purpose {{{ ## If Xymon server says that a service is in error on a remote host, try to restart this service. ## 1. Create a ssh keyring for xymon user {{{ # sudo mkdir -p -- /var/lib/xymon/.ssh/ # sudo ssh-keygen -f /var/lib/xymon/.ssh/id_rsa -N '' -q # sudo chown -R xymon:xymon /var/lib/xymon/.ssh/ ## }}} ## 2. Remote user {{{ # Ensure to have the ${REMOTE_SSH_USER} available on remote hosts and allowed to connect with SSH. # Restrict the SSH access to a single SSH key from the Xymon server IP (~${REMOTE_SSH_USER}/.ssh/authorized_keys) : ## from="IP.SRV.XYM.ON" ssh-rsa AAAAA… # Allow sudo commands to restart services (/etc/sudoers.d/xymon-ssh) : ## xymon-ssh ALL=(root:root) NOPASSWD: /bin/systemctl restart * ## }}} # }}} # Vars {{{ DEBUG=0 LOCAL_SSH_USER="xymon" REMOTE_SSH_USER="xymon-ssh" BBHOSTNAME="HOST.DOMAIN.ORG" BBSVCNAME="libs" BBALPHAMSG="HOST.DOMAIN.ORG:libs yellow [0] yellow Thu Aug 16 16:41:56 2018 - libs NOT ok &yellow Machine should be rebooted. Running not the newest installed kernel: Running kernel: 4.9.0-7-amd64, version #1 SMP Debian 4.9.110-3+deb9u1 (2018-08-03) Newest installed kernel: 4.9.0-7-amd64, version #1 SMP Debian 4.9.110-3+deb9u2 (2018-08-13) &yellow The following processes have libs linked that were upgraded: root: /lib/systemd/systemd-udevd (546) systemd-timesync: /usr/lib/postfix/qmgr (52880) See http://localhost/xymon-cgi/svcstatus.sh?HOST=HOST.DOMAIN.ORG&SERVICE=libs" temp_dir=$(mktemp -d -t xymon-libs-alert-XXXXXX.tmp) debug_stdout="${temp_dir}/debug.stdout" service_list="${temp_dir}/services.error.list" # }}} # Temp dir for this alert {{{ if [ ! -d "${temp_dir}" ] ; then mkdir -p -- "${temp_dir}" fi # }}} [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : ${BBHOSTNAME} — ${BBSVCNAME} error" > "${debug_stdout}" # Check if host need to reboot {{{ if echo "${BBALPHAMSG}" | grep -qE "\\&yellow Machine should be reboot.*" ; then [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test kernel — The host need to be rebooted." >> "${debug_stdout}" fi # }}} # Check if a service need to restart if echo "${BBALPHAMSG}" | grep -qE "\\&yellow The following processes.*" ; then [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — Some services need to be restarted." >> "${debug_stdout}" # Get the list of binaries path echo "${BBALPHAMSG}" | sed -n 's/^ \(\/.*\) (.*)/\1/p' > "${service_list}" while IFS= read -r bin_path; do [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ${bin_path} use old libs." >> "${debug_stdout}" # Match binaries path and services name {{{ case "${bin_path}" in # Arpwatch '/usr/sbin/arpwatch' ) service_name="arpwatch" ;; # Dbus '/usr/bin/dbus-daemon' ) service_name="dbus" ;; # Postfix '/usr/lib/postfix/sbin/pickup' | '/usr/lib/postfix/qmgr' | '/usr/lib/postfix/sbin/tlsmgr' ) service_name="postfix" ;; # Nginx '/usr/sbin/nginx' ) service_name="nginx" ;; # Ntp '/usr/sbin/ntpd' ) service_name="ntp" ;; # Systemd-journald '/lib/systemd/systemd-journald' ) service_name="systemd-journald" ;; # Systemd-logind '/lib/systemd/systemd-logind' ) service_name="systemd-logind" ;; # default * ) service_name="NOT.MANAGED" ;; esac # }}} # Restart service if managed {{{ if [ "${service_name}" = "NOT.MANAGED" ]; then [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — service for ${bin_path} is not managed." >> "${debug_stdout}" else [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ${service_name} need to be restarted." >> "${debug_stdout}" [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : while bin_path loop — ssh -o StrictHostKeyChecking=no ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart ${service_name}.service" >> "${debug_stdout}" fi # }}} done < "${service_list}" # Also restart xymon-client service {{{ [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — xymon-client also need to be restarted." >> "${debug_stdout}" [ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6s\e[m\n' "DEBUG : Test service — ssh -o StrictHostKeyChecking=no ${REMOTE_SSH_USER}@${BBHOSTNAME} sudo systemctl restart xymon-client.service" >> "${debug_stdout}" # }}} fi [ "${DEBUG}" -eq "0" ] || rm -rf -- "${temp_dir}" exit 0