scripts/cluster/sge.enable.host.queue.sh

476 lines
13 KiB
Bash
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/sh
# Vars {{{
readonly PROGNAME=$(basename "${0}")
readonly PROGDIR=$(readlink -m $(dirname "${0}"))
readonly ARGS="${*}"
readonly NBARGS="${#}"
## Test if DEBUG is already defined (by parent script,…)
[ -z "${DEBUG}" ] && DEBUG=1
# If output message should be displayed
[ -z "${OUTPUT_MESSAGE}" ] && OUTPUT_MESSAGE=0
# Maco temp file
readonly MACO_TMP_FILE="/tmp/.maco.upgrade"
readonly MACO_TMP_URGENT_FILE="/tmp/.maco.urgent.upgrade"
# APT temp file to monitor
readonly APT_TMP_FILE="/tmp/.apt.upgrade"
## Colors
readonly PURPLE='\033[1;35m'
readonly RED='\033[0;31m'
readonly RESET='\033[0m'
readonly COLOR_DEBUG="${PURPLE}"
# }}}
usage() { # {{{
cat <<- EOF
usage: $PROGNAME [--help] [-d|-f|-h|-q] [hostname]
Try to enable all SGE queues of the current host (default),
if no pending upgrades (Maco, APT) are present,
or to the one passed as first argument (no upgrades checking).
EXAMPLES:
- Enable SGE's queue(s) of the current host
${PROGNAME}
- Enable SGE's queue(s) of "marvin.domain.tld" host
${PROGNAME} marvin.domain.tld
${PROGNAME} -h marvin.domain.tld
OPTIONS:
-d,--debug
Enable debug messages.
-f,--force
Force to (re)enable a queue even if it was
previously manually disabled (by a user).
--help
Print this help message.
-h,--host,--hostname SGE_HOST_TO_MANAGE
Manage SGE's queue(s) of "SGE_HOST_TO_MANAGE" host.
-q,--quiet
Disable messages on standard output (except for error).
EOF
}
# }}}
debug_message() { # {{{
local_debug_message="${1}"
## Print message if DEBUG is enable (=0)
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6b\e[m\n' "DEBUG ${PROGNAME}: ${local_debug_message}"
}
# }}}
message() { # {{{
local_message="${1}"
## Print message if OUTPUT_MESSAGE is enable (=0)
[ "${OUTPUT_MESSAGE}" -eq "0" ] && printf '%b\n' "${local_message}"
return 0
}
# }}}
define_vars() { # {{{
## If sge_hostname wasn't defined
if [ -z "${sge_hostname}" ]; then
## Use local host for sge_hostname
sge_hostname="$(hostname -f)"
fi
## If the host to manage is the current one
if is_current_host "${sge_hostname}" ; then
debug_message "define_vars \
${sge_hostname} is the current host."
## Enable to verify if pending upgrades are present
CHECK_UPGRADE="0"
## Enable to verify if upgrades are running
CHECK_PROCESS="0"
else ## In case of a remote host
debug_message "define_vars \
${sge_hostname} is not the current host."
## Force to (re)enable SGE queue(s) in any case
FORCE_MODE="0"
fi
## If FORCE_MODE was defined and enabled
if [ -n "${FORCE_MODE}" ] && [ "${FORCE_MODE}" -eq "0" ]; then
## Disable upgrade checking (remote host, asked behaviour,…)
CHECK_UPGRADE="1"
## Disable process checking (remote host, asked behaviour,…)
CHECK_PROCESS="1"
else
## Ensure to define a value
FORCE_MODE="1"
fi
## Get all queues name
sge_queues_name="$(qhost -h "${sge_hostname:=/dev/null}" -q -xml \
| grep "queue name" \
| cut -d"'" -f2 )"
sge_queues_name_print="$(qhost -h "${sge_hostname:=/dev/null}" -q -xml \
| grep "queue name" \
| cut -d"'" -f2 \
| tr -s '\n' ' ' )"
## List of process pattern to monitor
maco_proc_pattern="(/opt/maco/bin/maco.autoupdate.sh)"
apt_proc_pattern="(aptitude.*full-upgrade|/usr/bin/dpkg.*--configure|dpkg-deb|/bin/sh /usr/lib/needrestart/dpkg-status)"
}
# }}}
is_current_host() { # {{{
local_current_host="${1}"
local_current_fqdn=$(hostname -f)
## Test if the sge_host to manage is the current host
if [ "${local_current_host}" = "${local_current_fqdn}" ]; then
local_current_host_return="0"
else
local_current_host_return="1"
fi
return "${local_current_host_return}"
}
# }}}
is_apt_upgrade_present() { # {{{
## No pending upgrade by default
return_apt_upgrade_present="1"
### Check if temp APT upgrade file exists
if [ -f "${APT_TMP_FILE}" ]; then
return_apt_upgrade_present="0"
debug_message "is_apt_upgrade_absent \
APT upgrade seems available for this system."
else
return_apt_upgrade_present="1"
debug_message "is_apt_upgrade_absent \
NO APT upgrade available for this system."
fi
return "${return_apt_upgrade_present}"
}
# }}}
is_maco_upgrade_present() { # {{{
## No pending upgrades by default
return_maco_upgrade_present="1"
## Check if temp Maco upgrade file is present
if [ -f "${MACO_TMP_FILE}" ]; then
return_maco_upgrade_present="0"
debug_message "is_maco_upgrade_present \
Maco upgrade seems available."
## Check if temp Maco urgent upgrade file is present
elif [ -f "${MACO_TMP_URGENT_FILE}" ]; then
return_maco_upgrade_present="0"
debug_message "is_maco_upgrade_present \
Maco urgent upgrade seems available."
else
debug_message "is_maco_upgrade_present \
No Maco upgrade require."
fi
return "${return_maco_upgrade_present}"
}
# }}}
is_proc_running() { # {{{
local_proc_pattern="${1}"
local_count_proc_pattern="$(pgrep -f -- "${local_proc_pattern}" | wc -l)"
case "${local_count_proc_pattern}" in
0 ) ## No procs related to this pattern are running
return_proc_running="1"
;;
* ) ## At least one proc seems running
return_proc_running="0"
;;
esac
## Simple debug message to valid current variables
debug_message "is_proc_running \
procs running (with the pattern: ${RED}${local_proc_pattern}${COLOR_DEBUG}) on the current host: ${RED}${local_count_proc_pattern}${COLOR_DEBUG}."
return "${return_proc_running}"
}
# }}}
is_queue_enable() { # {{{
local_queue_enable_hostname="${1}"
local_queue_enable_name="${2}"
## List all queues with 'disable' state and filter to the expected queue name
## with a fake_user to avoid pending jobs for this queue
### And count returned lines
local_queue_enable_test=$(qstat -f -qs d -q "${local_queue_enable_name:=/dev/null}@${local_queue_enable_hostname:=/dev/null}" -u fake_user \
| wc -l)
case "${local_queue_enable_test}" in
0 ) ## No result so the queue is enable
local_sge_queue_state="enable"
return_queue_enable="0"
;;
3 ) ## Results (header + queue name) so the queue is disable
local_sge_queue_state="disable"
return_queue_enable="1"
;;
* ) ## Unexpected result
printf '%b\n' "${RED}Not able to determine the state of ${local_sge_queue_name:=/dev/null}@${local_queue_enable_hostname:=/dev/null} queue (command return ${local_queue_enable_test} lines).${RESET}"
exit 2
;;
esac
## Simple debug message to valid current variables
debug_message "is_queue_enable \
SGE queue: ${RED}${local_queue_enable_name:=/dev/null}${COLOR_DEBUG} \
state is: ${RED}${local_sge_queue_state:=/dev/null}${COLOR_DEBUG}."
return "${return_queue_enable}"
}
# }}}
is_queue_disable() { # {{{
local_queue_disable_hostname="${1}"
local_queue_disable_name="${2}"
## List all queues with 'disable' state and filter to the expected queue name
## add a fake_user to avoid pending jobs for this queue
### And count returned lines
local_queue_disable_test=$(qstat -f -qs d -q "${local_queue_disable_name:=/dev/null}@${local_queue_disable_hostname:=/dev/null}" -u fake_user \
| wc -l)
case "${local_queue_disable_test}" in
0 ) ## No result so the queue is enable
local_sge_queue_state="enable"
return_queue_disable="1"
## Ensure to remove any previously setted file
rm -f -- "${sge_queue_deactivator}"
;;
3 ) ## Results (header + queue name) so the queue is disable
local_sge_queue_state="disable"
return_queue_disable="0"
;;
* ) ## Unexpected result
printf '%b\n' "${RED}Not able to determine the state of ${local_queue_disable_name:=/dev/null}@${local_queue_disable_hostname:=/dev/null} queue (command return ${local_queue_disable_test} lines).${RESET}"
exit 3
;;
esac
## Simple debug message to valid current variables
debug_message "is_queue_disable \
SGE queue: ${RED}${local_queue_disable_name:=/dev/null}${COLOR_DEBUG} \
state is: ${RED}${local_sge_queue_state:=/dev/null}${COLOR_DEBUG}."
return "${return_queue_disable}"
}
# }}}
is_all_queue_enable() { # {{{
local_all_queue_enable_hostname="${1}"
local_all_queue_enable_name="${2}"
## By default, all queues are enable
return_all_queue_enable="0"
## Test all queues one by one
for loop_enable_queue in ${local_all_queue_enable_name}; do
### If a queue is not enable
#### Change the return value
is_queue_enable "${local_all_queue_enable_hostname}" "${loop_enable_queue}" \
|| return_all_queue_enable="1"
done
return "${return_all_queue_enable}"
}
# }}}
enable_sge_queue() { # {{{
local_sge_hostname="${1}"
local_sge_queue_name="${2}"
## If the queue was previously disabled by another script OR if FORCE_MODE is enable
if [ -f "${sge_queue_deactivator}" ] || [ "${FORCE_MODE}" -eq "0" ]; then
debug_message "enable_sge_queue \
Previously disabled by a script (or FORCE is enable), try to enable SGE queue: ${RED}${local_sge_queue_name:=/dev/null}@${local_sge_hostname:=/dev/null}${COLOR_DEBUG}."
## SGE command to enable the queue
qmod --enable "${local_sge_queue_name}@${local_sge_hostname}" > /dev/null \
&& message "Enable SGE queue: ${RED}${local_sge_queue_name:=/dev/null}@${local_sge_hostname:=/dev/null}${RESET}" \
&& return_enable_queue="${?}"
else
message "SGE queue: ${RED}${local_sge_queue_name:=/dev/null}@${local_sge_hostname:=/dev/null}${RESET} was manually disabled, please re-enable it ${RED}manually${RESET} (or use --force option)."
return_enable_queue="1"
fi
return "${return_enable_queue}"
}
# }}}
main() { # {{{
## Define all vars according the selected options
define_vars
## If we need to watch for upgrades
if [ "${CHECK_UPGRADE}" -eq "0" ]; then
## If APT package upgrade is available
### Exit (wait for APT upgrade to be applied)
is_apt_upgrade_present \
&& exit 0
## If Maco upgrade is present
### Exit (wait for Maco upgrade to be applied)
is_maco_upgrade_present \
&& exit 0
fi
## If we need to watch for processes
if [ "${CHECK_PROCESS}" -eq "0" ]; then
## If anything related to APT is currently running
### Exit
is_proc_running "${apt_proc_pattern}" \
&& exit 0
## If anything related to maco is currently running
### Exit
is_proc_running "${maco_proc_pattern}" \
&& exit 0
fi
## Simple debug message with color to valid current variables
debug_message "main Try to manage \
SGE queue(s): ${RED}${sge_queues_name_print:=/dev/null}${COLOR_DEBUG}\
for host: ${RED}${sge_hostname:=/dev/null}${COLOR_DEBUG}."
## If the queue(s) are already enable
### Exit
is_all_queue_enable "${sge_hostname}" "${sge_queues_name}" \
&& exit 0
## Test all queues one by one
for loop_queue in ${sge_queues_name}; do
## File previously set if the queue was disabled by a script
## "automatically disabled" for an upgrade
sge_queue_deactivator="/opt/ipr/cluster/.sge.${loop_queue}.disable"
## If the queue is disable
### Try to enable it
is_queue_disable "${sge_hostname}" "${loop_queue}" \
&& enable_sge_queue "${sge_hostname}" "${loop_queue}"
## Don't consider manually disabled queue as an error except if FORCE_MODE was specified
if [ -f "${sge_queue_deactivator}" ] || [ "${FORCE_MODE}" -eq "0" ]; then
## If the queue is still disable
### Exit with error
is_queue_disable "${sge_hostname}" "${loop_queue}" \
&& printf '%b\n' "${RED}ERROR ${loop_queue}@${sge_hostname} is still disable.${RESET}" \
&& exit 4
fi
done
}
# }}}
# Manage arguments # {{{
# This code can't be in a function due to arguments
if [ ! "${NBARGS}" -eq "0" ]; then
manage_arg="0"
## If the first argument is not an option
if ! printf -- '%s' "${1}" | grep -q -E -- "^-+";
then
## Use this argument for sge_hostname
sge_hostname="${1}"
## Switch to the next argument
shift
manage_arg=$((manage_arg+1))
fi
# Parse all options (start with a "-") one by one
while printf -- '%s' "${1}" | grep -q -E -- "^-+"; do
case "${1}" in
-d|--debug ) ## debug
DEBUG=0
;;
-f|--force ) ## Force to enable SGE queue
FORCE_MODE=0
;;
--help ) ## help
usage
## Exit after help informations
exit 0
;;
-h|--host|--hostname ) ## Specify a different host to manage
## Move to the next argument
shift
## Override previous definition of sge_hostname
sge_hostname="${1}"
;;
-q|--quiet ) ## Silent mode
## Avoid to display any message on standard output
OUTPUT_MESSAGE=1
;;
-- ) ## End of options list
## End the while loop
break
;;
* ) ## unknow option
printf '%b\n' "${RED}Invalid option: ${1}${RESET}"
printf '%b\n' "---"
usage
exit 1
;;
esac
debug_message "Arguments management \
${RED}${1}${COLOR_DEBUG} option managed."
## Move to the next argument
shift
manage_arg=$((manage_arg+1))
done
debug_message "Arguments management \
${RED}${manage_arg}${COLOR_DEBUG} argument(s) successfully managed."
else
debug_message "Arguments management \
No arguments/options to manage."
fi
# }}}
main
exit 0