scripts/cluster/apt.apply.update.sh

434 lines
11 KiB
Bash
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/sh
# This script will try to apply APT upgrades if all conditions are satisfied
# 1. All SGE queues are disable
# 2. No SGE jobs are running
# 3. No process related to Maco are running
# This script can be called by a cronjob (eg. hourly)
# Vars {{{
readonly PROGNAME=$(basename "${0}")
readonly PROGDIR=$(readlink -m $(dirname "${0}"))
readonly ARGS="${*}"
readonly NBARGS="${#}"
[ -z "${DEBUG}" ] && DEBUG=1
## Export DEBUG for sub-script
export DEBUG
# APT temp file to monitor
readonly APT_TMP_FILE="/tmp/.apt.upgrade"
## Colors
readonly PURPLE='\033[1;35m'
readonly RED='\033[0;31m'
readonly RESET='\033[0m'
readonly COLOR_DEBUG="${PURPLE}"
# }}}
usage() { # {{{
cat <<- EOF
usage: $PROGNAME [-d|-h]
Apply any APT package upgrade if the host is free:
* All SGE queues are disable
* No SGE jobs are running
* No other upgrades are running
EXAMPLES:
- Apply upgrade on the current host
${PROGNAME}
OPTIONS:
-d,--debug
Enable debug messages.
-h,--help
Print this help message.
EOF
}
# }}}
debug_message() { # {{{
local_message="${1}"
## Print message if DEBUG is enable (=0)
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6b\e[m\n' "DEBUG ${PROGNAME}: ${local_message}"
return 0
}
# }}}
define_vars() { # {{{
## If sge_hostname wasn't defined (environment variable,…) {{{
if [ -z "${sge_hostname}" ]; then
## Use local host for sge_hostname
sge_hostname="$(hostname -f)"
fi
## }}}
## If sge_queues_name wasn't defined (environment variable,…) {{{
if [ -z "${sge_queues_name}" ]; then
## Get queues from qhost
sge_queues_name="$(qhost -h "${sge_hostname:=/dev/null}" -q -xml \
| grep "queue name" \
| cut -d"'" -f2 )"
fi
## }}}
## Process pattern to monitor
maco_proc_pattern="(/opt/maco/bin/maco.autoupdate.sh)"
}
# }}}
is_sge_host() { # {{{
## Check if SGE commands (qconf) are available
if [ "$(command -v qconf)" ]; then
debug_message "is_sge_host \
SGE seems present on this host."
### And verify if the host is fully configured as a submit host
if qconf -ss 2>/dev/null | grep --word-regexp --quiet $(hostname -f); then
debug_message "is_sge_host \
The host seems configured as a SGE submit host."
return_is_sge_host="0"
else
return_is_sge_host="1"
debug_message "is_sge_host \
This host is not yet configured as a SGE submit host."
fi
else
return_is_sge_host="1"
debug_message "is_sge_host \
SGE is not present on this host."
fi
return "${return_is_sge_host}"
}
# }}}
is_apt_upgrade_absent() { # {{{
## Check if temp APT upgrade file is absent
if [ ! -f "${APT_TMP_FILE}" ]; then
return_apt_upgrade_absent="0"
debug_message "is_apt_upgrade_absent \
NO APT upgrade available for this system."
else
return_apt_upgrade_absent="1"
debug_message "is_apt_upgrade_absent \
APT upgrade seems available for this system."
fi
return "${return_apt_upgrade_absent}"
}
# }}}
is_apt_upgrade_present() { # {{{
## Check if temp APT upgrade file exists
if [ -f "${APT_TMP_FILE}" ]; then
return_apt_upgrade_present="0"
debug_message "is_apt_upgrade_absent \
APT upgrade seems available for this system."
else
return_apt_upgrade_present="1"
debug_message "is_apt_upgrade_absent \
NO APT upgrade available for this system."
fi
return "${return_apt_upgrade_present}"
}
# }}}
is_queue_enable() { # {{{
local_sge_hostname="${1}"
local_sge_queue_name="${2}"
## List all queues with 'disable' state and filter to the expected queue name
## with a fake_user to avoid pending jobs for this queue
### And count returned lines
local_sge_queue_test=$(qstat -f -qs d -q "${local_sge_queue_name:=/dev/null}@${local_sge_hostname:=/dev/null}" -u fake_user \
| wc -l)
case "${local_sge_queue_test}" in
0 ) ## No result so the queue is enable
local_sge_queue_state="enable"
return_queue_enable="0"
;;
3 ) ## Results (header + queue name) so the queue is disable
local_sge_queue_state="disable"
return_queue_enable="1"
;;
* ) ## Unexpected result
printf '%b\n' "${RED}Not able to determine the state of ${local_sge_queue_name:=/dev/null}@${local_sge_hostname:=/dev/null} queue (command return ${local_sge_queue_test} lines).${RESET}"
exit 2
;;
esac
## Simple debug message to valid current variables
debug_message "is_queue_enable \
SGE queue: ${RED}${local_sge_queue_name:=/dev/null}${COLOR_DEBUG} \
state is: ${RED}${local_sge_queue_state:=/dev/null}${COLOR_DEBUG}."
return "${return_queue_enable}"
}
# }}}
is_sge_master_available() { # {{{
## Check with Netcat if SGE master (sge_qmaster) is reachable from this host.
### -z: Only scan for listening daemons, without sending any data to them.
### -w 10: Timeout the test after 10 seconds.
if nc -z -w 10 "${sge_master_uri}" "${sge_master_port}"; then
return_is_sge_master_available="0"
debug_message "is_sge_master_available \
SGE Master (${sge_master_uri}:${sge_master_port}) is reachable from this host."
else
return_is_sge_master_available="1"
debug_message "is_sge_master_available \
SGE Master (${sge_master_uri}:${sge_master_port}) is not reachable from this host."
fi
return "${return_is_sge_master_available}"
}
# }}}
is_any_queue_enable() { # {{{
local_any_queue_enable_hostname="${1}"
local_any_queue_enable_name="${2}"
## By default, all queues are disable
return_any_queue_enable="1"
## Test all queues one by one
for loop_enable_queue in ${local_any_queue_enable_name}; do
### If a queue is enable
#### Change the return value
is_queue_enable "${local_any_queue_enable_hostname}" "${loop_enable_queue}" \
&& return_any_queue_enable="0"
done
return "${return_any_queue_enable}"
}
# }}}
is_job_running() { # {{{
local_sge_hostname="${1}"
## List SGE informations about the host
### And get the number of used slots from all queues
### Sort the results
### Only get the last result (greater number of used slots)
local_sge_slots_used=$(qhost -h "${local_sge_hostname:=/dev/null}" -q -xml \
| grep slots_used \
| sed 's;.*<queuevalue.*>\(.*\)</queuevalue>;\1;' \
| sort --numeric-sort \
| tail --lines=1)
case "${local_sge_slots_used}" in
0 ) ## No jobs are running
return_job_running="1"
;;
* ) ## Some jobs are running
return_job_running="0"
;;
esac
## Simple debug message to valid current variables
debug_message "is_job_running \
jobs running on ${local_sge_hostname} host: ${RED}${local_sge_slots_used}${COLOR_DEBUG}."
return "${return_job_running}"
}
# }}}
is_proc_running() { # {{{
local_proc_pattern="${1}"
local_count_proc_pattern="$(pgrep -f -- "${local_proc_pattern}" | wc -l)"
case "${local_count_proc_pattern}" in
0 ) ## No procs related to this pattern are running
return_proc_running="1"
;;
* ) ## At least one proc seems running
return_proc_running="0"
;;
esac
## Simple debug message to valid current variables
debug_message "is_proc_running \
procs running (with the pattern: ${RED}${local_proc_pattern}${COLOR_DEBUG}) on the current host: ${RED}${local_count_proc_pattern}${COLOR_DEBUG}."
return "${return_proc_running}"
}
# }}}
prepare_host() { # {{{
debug_message "prepare_host \
Forbid SSH logins,…".
touch /etc/nologin
}
# }}}
upgrade_system() { # {{{
debug_message "upgrade_system \
Try to apply APT upgrades".
## First update repositories to get all available upgrades
aptitude update >> "${APT_TMP_FILE}" 2>&1
## And apply full-upgrade
DEBIAN_FRONTEND=noninteractive aptitude -y -o Dpkg::Options::=--force-confdef -o Dpkg::Options::=--force-confold full-upgrade >> "${APT_TMP_FILE}" 2>&1
}
# }}}
clean_host() { # {{{
debug_message "clean_host \
Try to clean temp files, downloaded packages,…".
aptitude clean >> "${APT_TMP_FILE}" 2>&1 \
&& rm -f -- "${APT_TMP_FILE}" /etc/nologin
}
# }}}
main() { # {{{
## Test if SGE Master is reachable {{{
### If sge_master_uri wasn't defined (environment variable,…) {{{
if [ -z "${sge_master_uri}" ]; then
## Get SGE master from current configuration
sge_master_uri=$(grep --max-count=1 -- "" /var/lib/gridengine/default/common/act_qmaster 2>/dev/null || echo "localhost")
fi
### }}}
### If sge_master_port wasn't defined (environment variable,…) {{{
if [ -z "${sge_master_port}" ]; then
## Use local host for sge_master_port
sge_master_port="6444"
fi
### }}}
### If SGE Master is not reachable from this host {{{
#### Exit
is_sge_master_available \
|| exit 0
### }}}
## }}}
## If SGE is not yet available on this host {{{
### Exit
is_sge_host \
|| exit 0
## }}}
## Define all vars
define_vars
## If NO APT package upgrade is available
### Exit
is_apt_upgrade_absent \
&& exit 0
## If any SGE queue is enable
### Exit
is_any_queue_enable "${sge_hostname}" "${sge_queues_name}" \
&& exit 0
## If any SGE job runs
### Exit
is_job_running "${sge_hostname}" \
&& exit 0
## If anything related to maco is currently running
### Exit
is_proc_running "${maco_proc_pattern}" \
&& exit 0
## Prepare the host for upgrade
prepare_host
## Try to upgrade the system
### If error: Exit 50
upgrade_system \
|| exit 50 \
## Finish by cleaning temp files
### and reboot the system
clean_host \
&& systemctl reboot
}
# }}}
# Manage arguments # {{{
# This code can't be in a function due to argument management
if [ ! "${NBARGS}" -eq "0" ]; then
manage_arg="0"
## If the first argument is not an option
if ! printf -- '%s' "${1}" | grep -q -E -- "^-+";
then
## Print help message and exit
printf '%b\n' "${RED}Invalid option: ${1}${RESET}"
printf '%b\n' "---"
usage
exit 1
fi
# Parse all options (start with a "-") one by one
while printf -- '%s' "${1}" | grep -q -E -- "^-+"; do
case "${1}" in
-d|--debug ) ## debug
DEBUG=0
;;
-h|--help ) ## help
usage
## Exit after help informations
exit 0
;;
* ) ## unknow option
printf '%b\n' "${RED}Invalid option: ${1}${RESET}"
printf '%b\n' "---"
usage
exit 1
;;
esac
debug_message "Arguments management \
${RED}${1}${COLOR_DEBUG} option managed."
## Move to the next argument
shift
manage_arg=$((manage_arg+1))
done
debug_message "Arguments management \
${RED}${manage_arg}${COLOR_DEBUG} argument(s) successfully managed."
else
debug_message "Arguments management \
No arguments/options to manage."
fi
# }}}
main
exit 255