scripts/cluster/sge.disable.host.queue.sh

412 lines
11 KiB
Bash
Raw Permalink Normal View History

#!/bin/sh
# Vars {{{
readonly PROGNAME=$(basename "${0}")
readonly PROGDIR=$(readlink -m $(dirname "${0}"))
readonly ARGS="${*}"
readonly NBARGS="${#}"
2020-06-04 11:22:50 +02:00
## Test if DEBUG is already defined (by parent script,…)
[ -z "${DEBUG}" ] && DEBUG=1
2021-11-23 16:35:43 +01:00
## Disable FORCE_MODE by default
FORCE_MODE=1
## Colors
readonly PURPLE='\033[1;35m'
readonly RED='\033[0;31m'
readonly RESET='\033[0m'
readonly COLOR_DEBUG="${PURPLE}"
# }}}
usage() { # {{{
cat <<- EOF
usage: $PROGNAME [--help] [-d|-h] [hostname]
Try to disable all SGE queues of the current host (default)
or the one passed as first argument.
EXAMPLES:
- Disable SGE's queue(s) of the current host
${PROGNAME}
- Disable SGE's queue(s) of "marvin.domain.tld" host
${PROGNAME} marvin.domain.tld
${PROGNAME} -h marvin.domain.tld
OPTIONS:
-d,--debug
Enable debug messages.
--help
Print this help message.
-h,--host,--hostname SGE_HOST_TO_MANAGE
Manage SGE's queue(s) of "SGE_HOST_TO_MANAGE" host.
EOF
2020-06-04 11:31:13 +02:00
}
# }}}
debug_message() { # {{{
local_debug_message="${1}"
2020-06-04 11:31:13 +02:00
## Print message if DEBUG is enable (=0)
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6b\e[m\n' "DEBUG ${PROGNAME}: ${local_debug_message}"
2020-06-04 11:31:13 +02:00
return 0
2020-06-24 14:14:08 +02:00
}
# }}}
define_vars() { # {{{
## If sge_hostname wasn't defined {{{
2020-06-24 14:14:08 +02:00
if [ -z "${sge_hostname}" ]; then
## Use local host for sge_hostname
sge_hostname="$(hostname -f)"
fi
## }}}
2020-06-24 14:14:08 +02:00
## If the given host to manage is the current one {{{
if is_current_host "${sge_hostname}" ; then
debug_message "define_vars \
${sge_hostname} is the current host."
LOCAL_HOST="0"
else ## In case of a remote host
debug_message "define_vars \
${sge_hostname} is not the current host."
LOCAL_HOST="1"
fi
## }}}
## If FORCE_MODE was defined and enabled {{{
if [ "${FORCE_MODE}" -eq "0" ]; then
debug_message "define_vars \
FORCE MODE is enable, don't manage anything related to a localhost."
## Don't manage configuration specific to a localhost
LOCAL_HOST="1"
fi
## }}}
## Get all queues name {{{
2020-06-24 14:14:08 +02:00
sge_queues_name="$(qhost -h "${sge_hostname:=/dev/null}" -q -xml \
| grep "queue name" \
| cut -d"'" -f2 )"
sge_queues_name_print="$(qhost -h "${sge_hostname:=/dev/null}" -q -xml \
| grep "queue name" \
| cut -d"'" -f2 \
| tr -s '\n' ' ' )"
## }}}
}
# }}}
2021-12-22 11:03:26 +01:00
is_sge_host() { # {{{
## Check if SGE commands (qconf) are available
if [ "$(command -v qconf)" ]; then
debug_message "is_sge_host \
SGE seems present on this host."
### And verify if the host is fully configured as a submit host
if qconf -ss 2>/dev/null | grep --word-regexp --quiet $(hostname -f); then
debug_message "is_sge_host \
The host seems configured as a SGE submit host."
return_is_sge_host="0"
else
return_is_sge_host="1"
debug_message "is_sge_host \
This host is not yet configured as a SGE submit host."
fi
else
return_is_sge_host="1"
debug_message "is_sge_host \
SGE is not present on this host."
fi
return "${return_is_sge_host}"
}
# }}}
is_sge_master_available() { # {{{
## Check with Netcat if SGE master (sge_qmaster) is reachable from this host.
### -z: Only scan for listening daemons, without sending any data to them.
### -w 10: Timeout the test after 10 seconds.
if nc -z -w 10 "${sge_master_uri}" "${sge_master_port}"; then
return_is_sge_master_available="0"
debug_message "is_sge_master_available \
SGE Master (${sge_master_uri}:${sge_master_port}) is reachable from this host."
else
return_is_sge_master_available="1"
debug_message "is_sge_master_available \
SGE Master (${sge_master_uri}:${sge_master_port}) is not reachable from this host."
fi
return "${return_is_sge_master_available}"
}
# }}}
is_current_host() { # {{{
local_current_host="${1}"
local_current_fqdn=$(hostname -f)
## Test if the sge_host to manage is the current host
if [ "${local_current_host}" = "${local_current_fqdn}" ]; then
local_current_host_return="0"
else
local_current_host_return="1"
fi
return "${local_current_host_return}"
}
# }}}
is_queue_enable() { # {{{
local_queue_enable_hostname="${1}"
local_queue_enable_name="${2}"
## List all queues with 'disable' state and filter to the expected queue name
## with a fake_user to avoid pending jobs for this queue
### And count returned lines
local_queue_enable_test=$(qstat -f -qs d -q "${local_queue_enable_name:=/dev/null}@${local_queue_enable_hostname:=/dev/null}" -u fake_user \
| wc -l)
case "${local_queue_enable_test}" in
0 ) ## No result so the queue is enable
local_sge_queue_state="enable"
return_queue_enable="0"
;;
3 ) ## Results (header + queue name) so the queue is disable
local_sge_queue_state="disable"
return_queue_enable="1"
;;
* ) ## Unexpected result
printf '%b\n' "${RED}Not able to determine the state of ${local_sge_queue_name:=/dev/null}@${local_queue_enable_hostname:=/dev/null} queue (command return ${local_queue_enable_test} lines).${RESET}"
exit 2
;;
esac
## Simple debug message to valid current variables
2020-06-04 11:31:13 +02:00
debug_message "is_queue_enable \
SGE queue: ${RED}${local_queue_enable_name:=/dev/null}${COLOR_DEBUG} \
state is: ${RED}${local_sge_queue_state:=/dev/null}${COLOR_DEBUG}."
return "${return_queue_enable}"
}
# }}}
is_queue_disable() { # {{{
local_queue_disable_hostname="${1}"
local_queue_disable_name="${2}"
## List all queues with 'disable' state and filter to the expected queue name
## add a fake_user to avoid pending jobs for this queue
### And count returned lines
local_queue_disable_test=$(qstat -f -qs d -q "${local_queue_disable_name:=/dev/null}@${local_queue_disable_hostname:=/dev/null}" -u fake_user \
| wc -l)
case "${local_queue_disable_test}" in
0 ) ## No result so the queue is enable
local_sge_queue_state="enable"
return_queue_disable="1"
;;
3 ) ## Results (header + queue name) so the queue is disable
local_sge_queue_state="disable"
return_queue_disable="0"
;;
* ) ## Unexpected result
printf '%b\n' "${RED}Not able to determine the state of ${local_queue_disable_name:=/dev/null}@${local_queue_disable_hostname:=/dev/null} queue (command return ${local_queue_disable_test} lines).${RESET}"
2020-06-03 17:51:04 +02:00
exit 3
;;
esac
## Simple debug message to valid current variables
2020-06-04 11:31:13 +02:00
debug_message "is_queue_disable \
SGE queue: ${RED}${local_queue_disable_name:=/dev/null}${COLOR_DEBUG} \
state is: ${RED}${local_sge_queue_state:=/dev/null}${COLOR_DEBUG}."
return "${return_queue_disable}"
}
# }}}
is_all_queue_disable() { # {{{
local_all_queue_disable_hostname="${1}"
local_all_queue_disable_name="${2}"
## By default, all queues are disable
return_all_queue_disable="0"
## Test all queues one by one
for loop_disable_queue in ${local_all_queue_disable_name}; do
### If a queue is not disable
#### Change the return value
is_queue_disable "${local_all_queue_disable_hostname}" "${loop_disable_queue}" \
|| return_all_queue_disable="1"
done
return "${return_all_queue_disable}"
}
# }}}
disable_sge_queue() { # {{{
local_sge_hostname="${1}"
local_sge_queue_name="${2}"
2022-08-17 17:14:22 +02:00
return_disable_queue="1"
## Simple debug message to valid current variables
2020-06-04 11:31:13 +02:00
debug_message "disable_sge_queue \
Try to disable SGE queue: ${RED}${local_sge_queue_name:=/dev/null}@${local_sge_hostname:=/dev/null}${COLOR_DEBUG}."
## SGE command to disable the queue
2022-08-17 17:14:22 +02:00
## fix returned value
## create a file for local queue if run from the host
if qmod --disable "${local_sge_queue_name}@${local_sge_hostname}" > /dev/null; then
return_disable_queue="0"
[ "${LOCAL_HOST}" -eq "0" ] && printf '%s' "by ${PROGNAME} script" > "${sge_queue_deactivator}"
fi
return "${return_disable_queue}"
}
# }}}
main() { # {{{
2021-12-09 15:33:27 +01:00
## Test if SGE Master is reachable {{{
### If sge_master_uri wasn't defined (environment variable,…) {{{
if [ -z "${sge_master_uri}" ]; then
## Get SGE master from current configuration
sge_master_uri=$(grep --max-count=1 -- "" /var/lib/gridengine/default/common/act_qmaster 2>/dev/null || echo "localhost")
2021-12-09 15:33:27 +01:00
fi
### }}}
### If sge_master_port wasn't defined (environment variable,…) {{{
if [ -z "${sge_master_port}" ]; then
## Use local host for sge_master_port
sge_master_port="6444"
fi
### }}}
2021-12-09 15:33:27 +01:00
### If SGE Master is not reachable from this host {{{
#### Exit
is_sge_master_available \
|| exit 0
2021-12-09 15:33:27 +01:00
### }}}
## }}}
2021-12-22 11:03:26 +01:00
## If SGE is not yet available on this host {{{
### Exit
is_sge_host \
|| exit 0
## }}}
2021-12-09 15:33:27 +01:00
## Define all vars according the selected options
define_vars
## Simple debug message with color to valid current variables
2020-06-04 11:31:13 +02:00
debug_message "main Try to manage \
SGE queue(s): ${RED}${sge_queues_name_print:=/dev/null}${COLOR_DEBUG}\
for host: ${RED}${sge_hostname:=/dev/null}${COLOR_DEBUG}."
## If the queue(s) are already disable
2020-06-03 17:51:04 +02:00
### Exit
is_all_queue_disable "${sge_hostname}" "${sge_queues_name}" \
2020-06-03 17:51:04 +02:00
&& exit 0
## Test all queues one by one
for loop_queue in ${sge_queues_name}; do
## File that says this queue was disabled by a script
## Can be used by other script to verify the queue wasn't disable by a user/admin
sge_queue_deactivator="/opt/ipr/cluster/.sge.${loop_queue}.disable"
## If the queue is enable
### Try to disable it
is_queue_enable "${sge_hostname}" "${loop_queue}" \
&& disable_sge_queue "${sge_hostname}" "${loop_queue}"
## If the queue is still enable
### Exit with error
is_queue_enable "${sge_hostname}" "${loop_queue}" \
&& printf '%b\n' "${RED}ERROR ${loop_queue}@${sge_hostname} is still enable.${RESET}" \
&& exit 4
done
}
# }}}
# Manage arguments # {{{
# This code can't be in a function due to arguments
if [ ! "${NBARGS}" -eq "0" ]; then
manage_arg="0"
## If the first argument is not an option
if ! printf -- '%s' "${1}" | grep -q -E -- "^-+";
then
## Use this argument for sge_hostname
sge_hostname="${1}"
## Switch to the next argument
shift
manage_arg=$((manage_arg+1))
fi
# Parse all options (start with a "-") one by one
while printf -- '%s' "${1}" | grep -q -E -- "^-+"; do
case "${1}" in
-d|--debug ) ## debug
DEBUG=0
;;
2021-11-23 16:35:43 +01:00
-f|--force ) ## Force to disable SGE queue
FORCE_MODE=0
;;
--help ) ## help
usage
## Exit after help informations
exit 0
;;
-h|--host|--hostname ) ## Specify a different host to manage
## Move to the next argument
shift
## Override previous definition of sge_hostname
sge_hostname="${1}"
;;
-- ) ## End of options list
## End the while loop
break
;;
* ) ## unknow option
printf '%b\n' "${RED}Invalid option: ${1}${RESET}"
printf '%b\n' "---"
usage
exit 1
;;
esac
debug_message "Arguments management \
${RED}${1}${COLOR_DEBUG} option managed."
## Move to the next argument
shift
manage_arg=$((manage_arg+1))
done
debug_message "Arguments management \
${RED}${manage_arg}${COLOR_DEBUG} argument(s) successfully managed."
else
debug_message "Arguments management \
No arguments/options to manage."
fi
# }}}
main
exit 0