555 lines
16 KiB
Bash
Executable File
555 lines
16 KiB
Bash
Executable File
#!/bin/sh
|
||
|
||
# Vars {{{
|
||
readonly PROGNAME=$(basename "${0}")
|
||
readonly PROGDIR=$(readlink -m $(dirname "${0}"))
|
||
readonly ARGS="${*}"
|
||
readonly NBARGS="${#}"
|
||
## Test if DEBUG is already defined (by parent script,…)
|
||
[ -z "${DEBUG}" ] && DEBUG=1
|
||
|
||
# If output message should be displayed
|
||
[ -z "${OUTPUT_MESSAGE}" ] && OUTPUT_MESSAGE=0
|
||
|
||
# Maco temp file
|
||
readonly MACO_TMP_FILE="/tmp/.maco.upgrade"
|
||
readonly MACO_TMP_URGENT_FILE="/tmp/.maco.urgent.upgrade"
|
||
|
||
# APT temp file to monitor
|
||
readonly APT_TMP_FILE="/tmp/.apt.upgrade"
|
||
|
||
## Colors
|
||
readonly PURPLE='\033[1;35m'
|
||
readonly RED='\033[0;31m'
|
||
readonly RESET='\033[0m'
|
||
readonly COLOR_DEBUG="${PURPLE}"
|
||
# }}}
|
||
|
||
usage() { # {{{
|
||
|
||
cat <<- EOF
|
||
usage: $PROGNAME [--help] [-d|-f|-h|-q] [hostname]
|
||
|
||
Try to enable all SGE queues of the current host (default),
|
||
if no pending upgrades (Maco, APT) are present,
|
||
or to the one passed as first argument (no upgrades checking).
|
||
|
||
EXAMPLES :
|
||
- Enable SGE's queue(s) of the current host
|
||
${PROGNAME}
|
||
|
||
- Enable SGE's queue(s) of "marvin.domain.tld" host
|
||
${PROGNAME} marvin.domain.tld
|
||
${PROGNAME} -h marvin.domain.tld
|
||
|
||
OPTIONS :
|
||
-d,--debug
|
||
Enable debug messages.
|
||
|
||
-f,--force
|
||
Try to (re)enable a queue even if it was previously
|
||
manually disabled (by a user) and avoid all checks
|
||
(pending upgrades, running processes, files,…).
|
||
|
||
--help
|
||
Print this help message.
|
||
|
||
-h,--host,--hostname SGE_HOST_TO_MANAGE
|
||
Manage SGE's queue(s) of "SGE_HOST_TO_MANAGE" host.
|
||
|
||
-q,--quiet
|
||
Disable messages on standard output (except for error).
|
||
|
||
EOF
|
||
|
||
}
|
||
# }}}
|
||
debug_message() { # {{{
|
||
|
||
local_debug_message="${1}"
|
||
|
||
## Print message if DEBUG is enable (=0)
|
||
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6b\e[m\n' "DEBUG − ${PROGNAME} : ${local_debug_message}"
|
||
|
||
}
|
||
# }}}
|
||
message() { # {{{
|
||
|
||
local_message="${1}"
|
||
|
||
## Print message if OUTPUT_MESSAGE is enable (=0)
|
||
[ "${OUTPUT_MESSAGE}" -eq "0" ] && printf '%b\n' "${local_message}"
|
||
|
||
return 0
|
||
|
||
}
|
||
# }}}
|
||
define_vars() { # {{{
|
||
|
||
## If sge_hostname wasn't defined
|
||
if [ -z "${sge_hostname}" ]; then
|
||
## Use local host for sge_hostname
|
||
sge_hostname="$(hostname -f)"
|
||
fi
|
||
|
||
## If the host to manage is the current one
|
||
if is_current_host "${sge_hostname}" ; then
|
||
debug_message "define_vars − \
|
||
${sge_hostname} is the current host."
|
||
## Enable to verify if pending upgrades are present
|
||
CHECK_UPGRADE="0"
|
||
## Enable to watch if some files are present
|
||
CHECK_FILE="0"
|
||
## Enable to verify if upgrades are running
|
||
CHECK_PROCESS="0"
|
||
else ## In case of a remote host
|
||
debug_message "define_vars − \
|
||
${sge_hostname} is not the current host."
|
||
## Force to (re)enable SGE queue(s) in any case
|
||
FORCE_MODE="0"
|
||
fi
|
||
|
||
## If FORCE_MODE was defined and enabled
|
||
if [ -n "${FORCE_MODE}" ] && [ "${FORCE_MODE}" -eq "0" ]; then
|
||
## Disable upgrade checking (remote host, asked behaviour,…)
|
||
CHECK_UPGRADE="1"
|
||
## Disable files monitoring
|
||
CHECK_FILE="1"
|
||
## Disable process checking (remote host, asked behaviour,…)
|
||
CHECK_PROCESS="1"
|
||
else
|
||
## Ensure to define a value
|
||
FORCE_MODE="1"
|
||
fi
|
||
|
||
## Get all queues name
|
||
sge_queues_name="$(qhost -h "${sge_hostname:=/dev/null}" -q -xml \
|
||
| grep "queue name" \
|
||
| cut -d"'" -f2 )"
|
||
sge_queues_name_print="$(qhost -h "${sge_hostname:=/dev/null}" -q -xml \
|
||
| grep "queue name" \
|
||
| cut -d"'" -f2 \
|
||
| tr -s '\n' ' ' )"
|
||
|
||
## List of process pattern to monitor
|
||
maco_proc_pattern="(/opt/maco/bin/maco.autoupdate.sh)"
|
||
apt_proc_pattern="(aptitude.*full-upgrade|/usr/bin/dpkg.*--configure|dpkg-deb|/bin/sh /usr/lib/needrestart/dpkg-status)"
|
||
sge_proc_pattern="(/usr/lib/gridengine/sge_execd)"
|
||
|
||
## List of files to monitor
|
||
file_nologin_path="/etc/nologin"
|
||
cluster_dir="/opt/ipr/cluster"
|
||
sge_queue_flag_pattern="${cluster_dir}/.sge.*.disable"
|
||
}
|
||
# }}}
|
||
is_current_host() { # {{{
|
||
|
||
local_current_host="${1}"
|
||
local_current_fqdn=$(hostname -f)
|
||
|
||
## Test if the sge_host to manage is the current host
|
||
if [ "${local_current_host}" = "${local_current_fqdn}" ]; then
|
||
local_current_host_return="0"
|
||
else
|
||
local_current_host_return="1"
|
||
fi
|
||
|
||
return "${local_current_host_return}"
|
||
|
||
}
|
||
# }}}
|
||
is_apt_upgrade_present() { # {{{
|
||
|
||
## No pending upgrade by default
|
||
return_apt_upgrade_present="1"
|
||
|
||
### Check if temp APT upgrade file exists
|
||
if [ -f "${APT_TMP_FILE}" ]; then
|
||
return_apt_upgrade_present="0"
|
||
debug_message "is_apt_upgrade_absent − \
|
||
APT upgrade seems available for this system."
|
||
else
|
||
return_apt_upgrade_present="1"
|
||
debug_message "is_apt_upgrade_absent − \
|
||
NO APT upgrade available for this system."
|
||
fi
|
||
|
||
return "${return_apt_upgrade_present}"
|
||
|
||
}
|
||
# }}}
|
||
is_maco_upgrade_present() { # {{{
|
||
|
||
## No pending upgrades by default
|
||
return_maco_upgrade_present="1"
|
||
|
||
## Check if temp Maco upgrade file is present
|
||
if [ -f "${MACO_TMP_FILE}" ]; then
|
||
return_maco_upgrade_present="0"
|
||
debug_message "is_maco_upgrade_present − \
|
||
Maco upgrade seems available."
|
||
|
||
## Check if temp Maco urgent upgrade file is present
|
||
elif [ -f "${MACO_TMP_URGENT_FILE}" ]; then
|
||
return_maco_upgrade_present="0"
|
||
debug_message "is_maco_upgrade_present − \
|
||
Maco urgent upgrade seems available."
|
||
else
|
||
debug_message "is_maco_upgrade_present − \
|
||
No Maco upgrade require."
|
||
fi
|
||
|
||
return "${return_maco_upgrade_present}"
|
||
|
||
}
|
||
# }}}
|
||
is_proc_running() { # {{{
|
||
|
||
local_proc_pattern="${1}"
|
||
|
||
local_count_proc_pattern="$(pgrep -f -- "${local_proc_pattern}" | wc -l)"
|
||
|
||
case "${local_count_proc_pattern}" in
|
||
0 ) ## No procs related to this pattern are running
|
||
return_proc_running="1"
|
||
;;
|
||
* ) ## At least one proc seems running
|
||
return_proc_running="0"
|
||
;;
|
||
esac
|
||
|
||
## Simple debug message to valid current variables
|
||
debug_message "is_proc_running − \
|
||
procs running (with the pattern: ${RED}${local_proc_pattern}${COLOR_DEBUG}) on the current host: ${RED}${local_count_proc_pattern}${COLOR_DEBUG}."
|
||
|
||
return "${return_proc_running}"
|
||
|
||
}
|
||
# }}}
|
||
is_file_present() { # {{{
|
||
|
||
local_file_present="${1}"
|
||
|
||
## File doesn't exist by default
|
||
return_is_file_present="1"
|
||
|
||
### Check if the file exists
|
||
# shellcheck disable=SC2086
|
||
if find ${local_file_present} > /dev/null 2>&1; then
|
||
return_is_file_present="0"
|
||
debug_message "is_file_present − \
|
||
The file ${RED}${local_file_present}${COLOR_DEBUG} exists."
|
||
else
|
||
return_is_file_present="1"
|
||
debug_message "is_file_present − \
|
||
The file ${RED}${local_file_present}${COLOR_DEBUG} doesn't exist."
|
||
fi
|
||
|
||
return "${return_is_file_present}"
|
||
|
||
}
|
||
# }}}
|
||
is_file_absent() { # {{{
|
||
|
||
local_file_absent="${1}"
|
||
|
||
## File exists by default
|
||
return_is_file_absent="1"
|
||
|
||
### Check if the file exists
|
||
# shellcheck disable=SC2086
|
||
if find ${local_file_absent} > /dev/null 2>&1; then
|
||
return_is_file_absent="1"
|
||
debug_message "is_file_absent − \
|
||
The file ${RED}${local_file_absent}${COLOR_DEBUG} exists."
|
||
else
|
||
return_is_file_absent="0"
|
||
debug_message "is_file_absent − \
|
||
The file ${RED}${local_file_absent}${COLOR_DEBUG} doesn't exist."
|
||
fi
|
||
|
||
return "${return_is_file_absent}"
|
||
|
||
}
|
||
# }}}
|
||
is_queue_enable() { # {{{
|
||
|
||
local_queue_enable_hostname="${1}"
|
||
local_queue_enable_name="${2}"
|
||
## List all queues with 'disable' state and filter to the expected queue name
|
||
## with a fake_user to avoid pending jobs for this queue
|
||
### And count returned lines
|
||
local_queue_enable_test=$(qstat -f -qs d -q "${local_queue_enable_name:=/dev/null}@${local_queue_enable_hostname:=/dev/null}" -u fake_user \
|
||
| wc -l)
|
||
|
||
case "${local_queue_enable_test}" in
|
||
0 ) ## No result so the queue is enable
|
||
local_sge_queue_state="enable"
|
||
return_queue_enable="0"
|
||
;;
|
||
3 ) ## Results (header + queue name) so the queue is disable
|
||
local_sge_queue_state="disable"
|
||
return_queue_enable="1"
|
||
;;
|
||
* ) ## Unexpected result
|
||
printf '%b\n' "${RED}Not able to determine the state of ${local_sge_queue_name:=/dev/null}@${local_queue_enable_hostname:=/dev/null} queue (command return ${local_queue_enable_test} lines).${RESET}"
|
||
exit 2
|
||
;;
|
||
esac
|
||
|
||
## Simple debug message to valid current variables
|
||
debug_message "is_queue_enable − \
|
||
SGE queue: ${RED}${local_queue_enable_name:=/dev/null}${COLOR_DEBUG} \
|
||
state is: ${RED}${local_sge_queue_state:=/dev/null}${COLOR_DEBUG}."
|
||
|
||
return "${return_queue_enable}"
|
||
|
||
}
|
||
# }}}
|
||
is_queue_disable() { # {{{
|
||
|
||
local_queue_disable_hostname="${1}"
|
||
local_queue_disable_name="${2}"
|
||
## List all queues with 'disable' state and filter to the expected queue name
|
||
## add a fake_user to avoid pending jobs for this queue
|
||
### And count returned lines
|
||
local_queue_disable_test=$(qstat -f -qs d -q "${local_queue_disable_name:=/dev/null}@${local_queue_disable_hostname:=/dev/null}" -u fake_user \
|
||
| wc -l)
|
||
|
||
case "${local_queue_disable_test}" in
|
||
0 ) ## No result so the queue is enable
|
||
local_sge_queue_state="enable"
|
||
return_queue_disable="1"
|
||
|
||
## Ensure to remove any previously setted file
|
||
rm -f -- "${sge_queue_flag_file}"
|
||
;;
|
||
3 ) ## Results (header + queue name) so the queue is disable
|
||
local_sge_queue_state="disable"
|
||
return_queue_disable="0"
|
||
;;
|
||
* ) ## Unexpected result
|
||
printf '%b\n' "${RED}Not able to determine the state of ${local_queue_disable_name:=/dev/null}@${local_queue_disable_hostname:=/dev/null} queue (command return ${local_queue_disable_test} lines).${RESET}"
|
||
exit 3
|
||
;;
|
||
esac
|
||
|
||
## Simple debug message to valid current variables
|
||
debug_message "is_queue_disable − \
|
||
SGE queue: ${RED}${local_queue_disable_name:=/dev/null}${COLOR_DEBUG} \
|
||
state is: ${RED}${local_sge_queue_state:=/dev/null}${COLOR_DEBUG}."
|
||
|
||
return "${return_queue_disable}"
|
||
|
||
}
|
||
# }}}
|
||
is_all_queue_enable() { # {{{
|
||
|
||
local_all_queue_enable_hostname="${1}"
|
||
local_all_queue_enable_name="${2}"
|
||
|
||
## By default, all queues are enable
|
||
return_all_queue_enable="0"
|
||
|
||
## Test all queues one by one
|
||
for loop_enable_queue in ${local_all_queue_enable_name}; do
|
||
|
||
### If a queue is not enable
|
||
#### Change the return value
|
||
is_queue_enable "${local_all_queue_enable_hostname}" "${loop_enable_queue}" \
|
||
|| return_all_queue_enable="1"
|
||
|
||
done
|
||
|
||
return "${return_all_queue_enable}"
|
||
|
||
}
|
||
# }}}
|
||
enable_sge_queue() { # {{{
|
||
|
||
local_sge_hostname="${1}"
|
||
local_sge_queue_name="${2}"
|
||
|
||
## If the queue was previously disabled by another script OR if FORCE_MODE is enable
|
||
if [ -f "${sge_queue_flag_file}" ] || [ "${FORCE_MODE}" -eq "0" ]; then
|
||
debug_message "enable_sge_queue − \
|
||
Previously disabled by a script (or FORCE is enable), try to enable SGE queue: ${RED}${local_sge_queue_name:=/dev/null}@${local_sge_hostname:=/dev/null}${COLOR_DEBUG}."
|
||
## SGE command to enable the queue
|
||
qmod --enable "${local_sge_queue_name}@${local_sge_hostname}" > /dev/null \
|
||
&& message "Enable SGE queue: ${RED}${local_sge_queue_name:=/dev/null}@${local_sge_hostname:=/dev/null}${RESET}" \
|
||
&& return_enable_queue="${?}"
|
||
else
|
||
message "SGE queue: ${RED}${local_sge_queue_name:=/dev/null}@${local_sge_hostname:=/dev/null}${RESET} was manually disabled, please re-enable it ${RED}manually${RESET} (or use --force option)."
|
||
return_enable_queue="1"
|
||
fi
|
||
|
||
return "${return_enable_queue}"
|
||
|
||
}
|
||
# }}}
|
||
main() { # {{{
|
||
|
||
## Define all vars according the selected options
|
||
define_vars
|
||
|
||
## If we need to watch for upgrades
|
||
if [ "${CHECK_UPGRADE}" -eq "0" ]; then
|
||
## If APT package upgrade is available
|
||
### Exit (wait for APT upgrade to be applied)
|
||
is_apt_upgrade_present \
|
||
&& exit 0
|
||
|
||
## If Maco upgrade is present
|
||
### Exit (wait for Maco upgrade to be applied)
|
||
is_maco_upgrade_present \
|
||
&& exit 0
|
||
fi
|
||
|
||
## If we need to watch files
|
||
if [ "${CHECK_FILE}" -eq "0" ]; then
|
||
## If nologin file exist (error on upgrade,…)
|
||
### Exit
|
||
is_file_present "${file_nologin_path}" \
|
||
&& exit 0
|
||
|
||
## If all SGE queue(s) were manually disabled (not any flag file)
|
||
### Exit
|
||
is_file_absent "${sge_queue_flag_pattern}" \
|
||
&& exit 0
|
||
fi
|
||
|
||
## If we need to watch for processes
|
||
if [ "${CHECK_PROCESS}" -eq "0" ]; then
|
||
## If anything related to APT is currently running
|
||
### Exit
|
||
is_proc_running "${apt_proc_pattern}" \
|
||
&& exit 0
|
||
|
||
## If anything related to maco is currently running
|
||
### Exit
|
||
is_proc_running "${maco_proc_pattern}" \
|
||
&& exit 0
|
||
|
||
## If nothing related to SGE is currently running
|
||
### Try to start the SGE execd systemd service
|
||
### Exit with error if the service can't start
|
||
is_proc_running "${sge_proc_pattern}" \
|
||
&& systemctl --quiet start sge_execd.service > /dev/null 2>&1 \
|
||
|| exit 4
|
||
fi
|
||
|
||
## Simple debug message with color to valid current variables
|
||
debug_message "main − Try to manage \
|
||
SGE queue(s): ${RED}${sge_queues_name_print:=/dev/null}${COLOR_DEBUG}\
|
||
for host: ${RED}${sge_hostname:=/dev/null}${COLOR_DEBUG}."
|
||
|
||
## If the queue(s) are already enable
|
||
### Ensure to remove any potential flag file
|
||
### Exit
|
||
is_all_queue_enable "${sge_hostname}" "${sge_queues_name}" \
|
||
&& rm -f -- "${sge_queue_flag_pattern}" \
|
||
&& exit 0
|
||
|
||
## Test all queues one by one
|
||
for loop_queue in ${sge_queues_name}; do
|
||
|
||
## File previously set if the queue was disabled by a script
|
||
## "automatically disabled" for an upgrade
|
||
sge_queue_flag_file="${cluster_dir}/.sge.${loop_queue}.disable"
|
||
|
||
## If the queue is disable
|
||
### Try to enable it
|
||
is_queue_disable "${sge_hostname}" "${loop_queue}" \
|
||
&& enable_sge_queue "${sge_hostname}" "${loop_queue}"
|
||
|
||
## Don't consider manually disabled queue as an error except if FORCE_MODE was specified
|
||
if [ -f "${sge_queue_flag_file}" ] || [ "${FORCE_MODE}" -eq "0" ]; then
|
||
## If the queue is still disable
|
||
### Exit with error
|
||
is_queue_disable "${sge_hostname}" "${loop_queue}" \
|
||
&& printf '%b\n' "${RED}ERROR ${loop_queue}@${sge_hostname} is still disable.${RESET}" \
|
||
&& exit 5
|
||
fi
|
||
|
||
done
|
||
|
||
}
|
||
# }}}
|
||
|
||
# Manage arguments # {{{
|
||
# This code can't be in a function due to arguments
|
||
|
||
if [ ! "${NBARGS}" -eq "0" ]; then
|
||
|
||
manage_arg="0"
|
||
|
||
## If the first argument is not an option
|
||
if ! printf -- '%s' "${1}" | grep -q -E -- "^-+";
|
||
then
|
||
## Use this argument for sge_hostname
|
||
sge_hostname="${1}"
|
||
|
||
## Switch to the next argument
|
||
shift
|
||
manage_arg=$((manage_arg+1))
|
||
fi
|
||
|
||
# Parse all options (start with a "-") one by one
|
||
while printf -- '%s' "${1}" | grep -q -E -- "^-+"; do
|
||
|
||
case "${1}" in
|
||
-d|--debug ) ## debug
|
||
DEBUG=0
|
||
;;
|
||
-f|--force ) ## Force to enable SGE queue
|
||
FORCE_MODE=0
|
||
;;
|
||
--help ) ## help
|
||
usage
|
||
## Exit after help informations
|
||
exit 0
|
||
;;
|
||
-h|--host|--hostname ) ## Specify a different host to manage
|
||
## Move to the next argument
|
||
shift
|
||
## Override previous definition of sge_hostname
|
||
sge_hostname="${1}"
|
||
;;
|
||
-q|--quiet ) ## Silent mode
|
||
## Avoid to display any message on standard output
|
||
OUTPUT_MESSAGE=1
|
||
;;
|
||
-- ) ## End of options list
|
||
## End the while loop
|
||
break
|
||
;;
|
||
* ) ## unknow option
|
||
printf '%b\n' "${RED}Invalid option: ${1}${RESET}"
|
||
printf '%b\n' "---"
|
||
usage
|
||
exit 1
|
||
;;
|
||
esac
|
||
|
||
debug_message "Arguments management − \
|
||
${RED}${1}${COLOR_DEBUG} option managed."
|
||
|
||
## Move to the next argument
|
||
shift
|
||
manage_arg=$((manage_arg+1))
|
||
|
||
done
|
||
|
||
debug_message "Arguments management − \
|
||
${RED}${manage_arg}${COLOR_DEBUG} argument(s) successfully managed."
|
||
else
|
||
debug_message "Arguments management − \
|
||
No arguments/options to manage."
|
||
fi
|
||
|
||
# }}}
|
||
|
||
main
|
||
|
||
exit 0
|