scripts/cluster/apt.apply.update.sh

434 lines
11 KiB
Bash
Raw Normal View History

2020-06-17 16:35:11 +02:00
#!/bin/sh
# This script will try to apply APT upgrades if all conditions are satisfied
# 1. All SGE queues are disable
2020-06-17 16:35:11 +02:00
# 2. No SGE jobs are running
# 3. No process related to Maco are running
# This script can be called by a cronjob (eg. hourly)
2020-06-17 16:35:11 +02:00
# Vars {{{
readonly PROGNAME=$(basename "${0}")
readonly PROGDIR=$(readlink -m $(dirname "${0}"))
readonly ARGS="${*}"
readonly NBARGS="${#}"
2022-07-11 11:34:47 +02:00
[ -z "${DEBUG}" ] && DEBUG=1
2020-06-17 16:35:11 +02:00
## Export DEBUG for sub-script
export DEBUG
# APT temp file to monitor
readonly APT_TMP_FILE="/tmp/.apt.upgrade"
2020-06-17 16:35:11 +02:00
## Colors
readonly PURPLE='\033[1;35m'
readonly RED='\033[0;31m'
readonly RESET='\033[0m'
readonly COLOR_DEBUG="${PURPLE}"
# }}}
usage() { # {{{
cat <<- EOF
2022-07-11 11:34:47 +02:00
usage: $PROGNAME [-d|-h]
2020-06-17 16:35:11 +02:00
Apply any APT package upgrade if the host is free:
* All SGE queues are disable
2020-06-17 16:35:11 +02:00
* No SGE jobs are running
* No other upgrades are running
2020-06-17 16:35:11 +02:00
EXAMPLES:
2020-06-17 16:35:11 +02:00
- Apply upgrade on the current host
${PROGNAME}
2022-07-11 11:34:47 +02:00
OPTIONS:
-d,--debug
Enable debug messages.
-h,--help
Print this help message.
2020-06-17 16:35:11 +02:00
EOF
}
# }}}
debug_message() { # {{{
local_message="${1}"
## Print message if DEBUG is enable (=0)
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6b\e[m\n' "DEBUG ${PROGNAME}: ${local_message}"
2020-06-17 16:35:11 +02:00
return 0
2020-06-17 16:35:11 +02:00
}
# }}}
2022-07-11 11:34:47 +02:00
define_vars() { # {{{
## If sge_hostname wasn't defined (environment variable,…) {{{
if [ -z "${sge_hostname}" ]; then
## Use local host for sge_hostname
sge_hostname="$(hostname -f)"
fi
## }}}
## If sge_queues_name wasn't defined (environment variable,…) {{{
if [ -z "${sge_queues_name}" ]; then
## Get queues from qhost
sge_queues_name="$(qhost -h "${sge_hostname:=/dev/null}" -q -xml \
| grep "queue name" \
| cut -d"'" -f2 )"
fi
## }}}
## Process pattern to monitor
maco_proc_pattern="(/opt/maco/bin/maco.autoupdate.sh)"
}
# }}}
2021-01-26 18:23:01 +01:00
is_sge_host() { # {{{
## Check if SGE commands (qconf) are available
if [ "$(command -v qconf)" ]; then
2021-01-26 18:23:01 +01:00
debug_message "is_sge_host \
SGE seems present on this host."
### And verify if the host is fully configured as a submit host
2021-12-22 10:53:33 +01:00
if qconf -ss 2>/dev/null | grep --word-regexp --quiet $(hostname -f); then
debug_message "is_sge_host \
The host seems configured as a SGE submit host."
return_is_sge_host="0"
else
return_is_sge_host="1"
debug_message "is_sge_host \
This host is not yet configured as a SGE submit host."
fi
2021-01-26 18:23:01 +01:00
else
return_is_sge_host="1"
debug_message "is_sge_host \
SGE is not present on this host."
fi
return "${return_is_sge_host}"
}
# }}}
2020-06-18 16:39:14 +02:00
is_apt_upgrade_absent() { # {{{
2020-06-17 16:35:11 +02:00
## Check if temp APT upgrade file is absent
if [ ! -f "${APT_TMP_FILE}" ]; then
return_apt_upgrade_absent="0"
debug_message "is_apt_upgrade_absent \
NO APT upgrade available for this system."
else
return_apt_upgrade_absent="1"
debug_message "is_apt_upgrade_absent \
APT upgrade seems available for this system."
fi
2020-06-17 16:35:11 +02:00
return "${return_apt_upgrade_absent}"
}
# }}}
is_apt_upgrade_present() { # {{{
## Check if temp APT upgrade file exists
if [ -f "${APT_TMP_FILE}" ]; then
return_apt_upgrade_present="0"
debug_message "is_apt_upgrade_absent \
APT upgrade seems available for this system."
else
return_apt_upgrade_present="1"
debug_message "is_apt_upgrade_absent \
NO APT upgrade available for this system."
fi
2020-06-17 16:35:11 +02:00
return "${return_apt_upgrade_present}"
}
# }}}
2020-06-18 16:39:14 +02:00
is_queue_enable() { # {{{
2020-06-17 16:35:11 +02:00
local_sge_hostname="${1}"
local_sge_queue_name="${2}"
## List all queues with 'disable' state and filter to the expected queue name
## with a fake_user to avoid pending jobs for this queue
### And count returned lines
local_sge_queue_test=$(qstat -f -qs d -q "${local_sge_queue_name:=/dev/null}@${local_sge_hostname:=/dev/null}" -u fake_user \
| wc -l)
case "${local_sge_queue_test}" in
0 ) ## No result so the queue is enable
local_sge_queue_state="enable"
return_queue_enable="0"
;;
3 ) ## Results (header + queue name) so the queue is disable
local_sge_queue_state="disable"
return_queue_enable="1"
;;
* ) ## Unexpected result
printf '%b\n' "${RED}Not able to determine the state of ${local_sge_queue_name:=/dev/null}@${local_sge_hostname:=/dev/null} queue (command return ${local_sge_queue_test} lines).${RESET}"
exit 2
;;
esac
## Simple debug message to valid current variables
debug_message "is_queue_enable \
SGE queue: ${RED}${local_sge_queue_name:=/dev/null}${COLOR_DEBUG} \
state is: ${RED}${local_sge_queue_state:=/dev/null}${COLOR_DEBUG}."
return "${return_queue_enable}"
}
# }}}
is_sge_master_available() { # {{{
## Check with Netcat if SGE master (sge_qmaster) is reachable from this host.
### -z: Only scan for listening daemons, without sending any data to them.
### -w 10: Timeout the test after 10 seconds.
if nc -z -w 10 "${sge_master_uri}" "${sge_master_port}"; then
return_is_sge_master_available="0"
debug_message "is_sge_master_available \
SGE Master (${sge_master_uri}:${sge_master_port}) is reachable from this host."
else
return_is_sge_master_available="1"
debug_message "is_sge_master_available \
SGE Master (${sge_master_uri}:${sge_master_port}) is not reachable from this host."
fi
return "${return_is_sge_master_available}"
}
# }}}
is_any_queue_enable() { # {{{
local_any_queue_enable_hostname="${1}"
local_any_queue_enable_name="${2}"
## By default, all queues are disable
return_any_queue_enable="1"
## Test all queues one by one
for loop_enable_queue in ${local_any_queue_enable_name}; do
### If a queue is enable
#### Change the return value
is_queue_enable "${local_any_queue_enable_hostname}" "${loop_enable_queue}" \
&& return_any_queue_enable="0"
done
return "${return_any_queue_enable}"
2020-06-17 16:35:11 +02:00
}
# }}}
2020-06-18 16:39:14 +02:00
is_job_running() { # {{{
2020-06-17 16:35:11 +02:00
local_sge_hostname="${1}"
## List SGE informations about the host
2020-06-18 12:23:17 +02:00
### And get the number of used slots from all queues
### Sort the results
### Only get the last result (greater number of used slots)
2020-06-17 16:35:11 +02:00
local_sge_slots_used=$(qhost -h "${local_sge_hostname:=/dev/null}" -q -xml \
| grep slots_used \
2020-06-18 12:23:17 +02:00
| sed 's;.*<queuevalue.*>\(.*\)</queuevalue>;\1;' \
| sort --numeric-sort \
| tail --lines=1)
2020-06-17 16:35:11 +02:00
case "${local_sge_slots_used}" in
0 ) ## No jobs are running
return_job_running="1"
;;
2020-06-18 12:23:17 +02:00
* ) ## Some jobs are running
2020-06-17 16:35:11 +02:00
return_job_running="0"
;;
esac
## Simple debug message to valid current variables
debug_message "is_job_running \
jobs running on ${local_sge_hostname} host: ${RED}${local_sge_slots_used}${COLOR_DEBUG}."
2020-06-17 16:35:11 +02:00
return "${return_job_running}"
}
# }}}
2020-06-18 16:39:14 +02:00
is_proc_running() { # {{{
2020-06-17 16:35:11 +02:00
local_proc_pattern="${1}"
local_count_proc_pattern="$(pgrep -f -- "${local_proc_pattern}" | wc -l)"
case "${local_count_proc_pattern}" in
0 ) ## No procs related to this pattern are running
return_proc_running="1"
;;
* ) ## At least one proc seems running
return_proc_running="0"
;;
esac
## Simple debug message to valid current variables
debug_message "is_proc_running \
procs running (with the pattern: ${RED}${local_proc_pattern}${COLOR_DEBUG}) on the current host: ${RED}${local_count_proc_pattern}${COLOR_DEBUG}."
2020-06-17 16:35:11 +02:00
return "${return_proc_running}"
}
# }}}
2021-01-26 18:23:01 +01:00
prepare_host() { # {{{
debug_message "prepare_host \
Forbid SSH logins,…".
touch /etc/nologin
2020-06-18 16:39:14 +02:00
}
# }}}
upgrade_system() { # {{{
debug_message "upgrade_system \
Try to apply APT upgrades".
## First update repositories to get all available upgrades
aptitude update >> "${APT_TMP_FILE}" 2>&1
## And apply full-upgrade
2020-06-18 16:39:14 +02:00
DEBIAN_FRONTEND=noninteractive aptitude -y -o Dpkg::Options::=--force-confdef -o Dpkg::Options::=--force-confold full-upgrade >> "${APT_TMP_FILE}" 2>&1
2020-06-19 09:28:12 +02:00
}
# }}}
clean_host() { # {{{
debug_message "clean_host \
Try to clean temp files, downloaded packages,…".
aptitude clean >> "${APT_TMP_FILE}" 2>&1 \
&& rm -f -- "${APT_TMP_FILE}" /etc/nologin
2020-06-19 09:28:12 +02:00
2020-06-17 16:35:11 +02:00
}
# }}}
main() { # {{{
## Test if SGE Master is reachable {{{
### If sge_master_uri wasn't defined (environment variable,…) {{{
if [ -z "${sge_master_uri}" ]; then
2021-12-09 15:33:27 +01:00
## Get SGE master from current configuration
sge_master_uri=$(grep --max-count=1 -- "" /var/lib/gridengine/default/common/act_qmaster 2>/dev/null || echo "localhost")
fi
### }}}
### If sge_master_port wasn't defined (environment variable,…) {{{
if [ -z "${sge_master_port}" ]; then
## Use local host for sge_master_port
sge_master_port="6444"
fi
### }}}
### If SGE Master is not reachable from this host {{{
#### Exit
is_sge_master_available \
|| exit 0
### }}}
## }}}
2021-12-09 15:33:27 +01:00
## If SGE is not yet available on this host {{{
### Exit
is_sge_host \
|| exit 0
## }}}
2022-07-11 11:34:47 +02:00
## Define all vars
define_vars
2020-06-17 16:35:11 +02:00
## If NO APT package upgrade is available
### Exit
is_apt_upgrade_absent \
&& exit 0
## If any SGE queue is enable
2020-06-17 16:35:11 +02:00
### Exit
is_any_queue_enable "${sge_hostname}" "${sge_queues_name}" \
2020-06-17 16:35:11 +02:00
&& exit 0
## If any SGE job runs
### Exit
is_job_running "${sge_hostname}" \
&& exit 0
## If anything related to maco is currently running
2020-06-17 16:35:11 +02:00
### Exit
is_proc_running "${maco_proc_pattern}" \
&& exit 0
## Prepare the host for upgrade
prepare_host
2020-06-18 16:39:14 +02:00
## Try to upgrade the system
2020-06-19 09:28:12 +02:00
### If error: Exit 50
2020-06-18 17:03:57 +02:00
upgrade_system \
2020-06-19 09:28:12 +02:00
|| exit 50 \
2020-06-18 16:39:14 +02:00
2021-01-06 11:46:19 +01:00
## Finish by cleaning temp files
### and reboot the system
clean_host \
2020-06-19 10:10:13 +02:00
&& systemctl reboot
2020-06-17 16:35:11 +02:00
}
# }}}
2022-07-11 11:34:47 +02:00
# Manage arguments # {{{
# This code can't be in a function due to argument management
if [ ! "${NBARGS}" -eq "0" ]; then
manage_arg="0"
## If the first argument is not an option
if ! printf -- '%s' "${1}" | grep -q -E -- "^-+";
then
## Print help message and exit
printf '%b\n' "${RED}Invalid option: ${1}${RESET}"
printf '%b\n' "---"
usage
exit 1
fi
# Parse all options (start with a "-") one by one
while printf -- '%s' "${1}" | grep -q -E -- "^-+"; do
case "${1}" in
-d|--debug ) ## debug
DEBUG=0
;;
-h|--help ) ## help
usage
## Exit after help informations
exit 0
;;
* ) ## unknow option
printf '%b\n' "${RED}Invalid option: ${1}${RESET}"
printf '%b\n' "---"
usage
exit 1
;;
esac
debug_message "Arguments management \
${RED}${1}${COLOR_DEBUG} option managed."
## Move to the next argument
shift
manage_arg=$((manage_arg+1))
done
debug_message "Arguments management \
${RED}${manage_arg}${COLOR_DEBUG} argument(s) successfully managed."
else
debug_message "Arguments management \
No arguments/options to manage."
fi
# }}}
2020-06-17 16:35:11 +02:00
main
2020-06-18 16:39:14 +02:00
exit 255