scripts/cluster/sge.disable.host.queue.sh

412 lines
11 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/sh
# Vars {{{
readonly PROGNAME=$(basename "${0}")
readonly PROGDIR=$(readlink -m $(dirname "${0}"))
readonly ARGS="${*}"
readonly NBARGS="${#}"
## Test if DEBUG is already defined (by parent script,…)
[ -z "${DEBUG}" ] && DEBUG=1
## Disable FORCE_MODE by default
FORCE_MODE=1
## Colors
readonly PURPLE='\033[1;35m'
readonly RED='\033[0;31m'
readonly RESET='\033[0m'
readonly COLOR_DEBUG="${PURPLE}"
# }}}
usage() { # {{{
cat <<- EOF
usage: $PROGNAME [--help] [-d|-h] [hostname]
Try to disable all SGE queues of the current host (default)
or the one passed as first argument.
EXAMPLES:
- Disable SGE's queue(s) of the current host
${PROGNAME}
- Disable SGE's queue(s) of "marvin.domain.tld" host
${PROGNAME} marvin.domain.tld
${PROGNAME} -h marvin.domain.tld
OPTIONS:
-d,--debug
Enable debug messages.
--help
Print this help message.
-h,--host,--hostname SGE_HOST_TO_MANAGE
Manage SGE's queue(s) of "SGE_HOST_TO_MANAGE" host.
EOF
}
# }}}
debug_message() { # {{{
local_debug_message="${1}"
## Print message if DEBUG is enable (=0)
[ "${DEBUG}" -eq "0" ] && printf '\e[1;35m%-6b\e[m\n' "DEBUG ${PROGNAME}: ${local_debug_message}"
return 0
}
# }}}
define_vars() { # {{{
## If sge_hostname wasn't defined {{{
if [ -z "${sge_hostname}" ]; then
## Use local host for sge_hostname
sge_hostname="$(hostname -f)"
fi
## }}}
## If the given host to manage is the current one {{{
if is_current_host "${sge_hostname}" ; then
debug_message "define_vars \
${sge_hostname} is the current host."
LOCAL_HOST="0"
else ## In case of a remote host
debug_message "define_vars \
${sge_hostname} is not the current host."
LOCAL_HOST="1"
fi
## }}}
## If FORCE_MODE was defined and enabled {{{
if [ "${FORCE_MODE}" -eq "0" ]; then
debug_message "define_vars \
FORCE MODE is enable, don't manage anything related to a localhost."
## Don't manage configuration specific to a localhost
LOCAL_HOST="1"
fi
## }}}
## Get all queues name {{{
sge_queues_name="$(qhost -h "${sge_hostname:=/dev/null}" -q -xml \
| grep "queue name" \
| cut -d"'" -f2 )"
sge_queues_name_print="$(qhost -h "${sge_hostname:=/dev/null}" -q -xml \
| grep "queue name" \
| cut -d"'" -f2 \
| tr -s '\n' ' ' )"
## }}}
}
# }}}
is_sge_host() { # {{{
## Check if SGE commands (qconf) are available
if [ "$(command -v qconf)" ]; then
debug_message "is_sge_host \
SGE seems present on this host."
### And verify if the host is fully configured as a submit host
if qconf -ss 2>/dev/null | grep --word-regexp --quiet $(hostname -f); then
debug_message "is_sge_host \
The host seems configured as a SGE submit host."
return_is_sge_host="0"
else
return_is_sge_host="1"
debug_message "is_sge_host \
This host is not yet configured as a SGE submit host."
fi
else
return_is_sge_host="1"
debug_message "is_sge_host \
SGE is not present on this host."
fi
return "${return_is_sge_host}"
}
# }}}
is_sge_master_available() { # {{{
## Check with Netcat if SGE master (sge_qmaster) is reachable from this host.
### -z: Only scan for listening daemons, without sending any data to them.
### -w 10: Timeout the test after 10 seconds.
if nc -z -w 10 "${sge_master_uri}" "${sge_master_port}"; then
return_is_sge_master_available="0"
debug_message "is_sge_master_available \
SGE Master (${sge_master_uri}:${sge_master_port}) is reachable from this host."
else
return_is_sge_master_available="1"
debug_message "is_sge_master_available \
SGE Master (${sge_master_uri}:${sge_master_port}) is not reachable from this host."
fi
return "${return_is_sge_master_available}"
}
# }}}
is_current_host() { # {{{
local_current_host="${1}"
local_current_fqdn=$(hostname -f)
## Test if the sge_host to manage is the current host
if [ "${local_current_host}" = "${local_current_fqdn}" ]; then
local_current_host_return="0"
else
local_current_host_return="1"
fi
return "${local_current_host_return}"
}
# }}}
is_queue_enable() { # {{{
local_queue_enable_hostname="${1}"
local_queue_enable_name="${2}"
## List all queues with 'disable' state and filter to the expected queue name
## with a fake_user to avoid pending jobs for this queue
### And count returned lines
local_queue_enable_test=$(qstat -f -qs d -q "${local_queue_enable_name:=/dev/null}@${local_queue_enable_hostname:=/dev/null}" -u fake_user \
| wc -l)
case "${local_queue_enable_test}" in
0 ) ## No result so the queue is enable
local_sge_queue_state="enable"
return_queue_enable="0"
;;
3 ) ## Results (header + queue name) so the queue is disable
local_sge_queue_state="disable"
return_queue_enable="1"
;;
* ) ## Unexpected result
printf '%b\n' "${RED}Not able to determine the state of ${local_sge_queue_name:=/dev/null}@${local_queue_enable_hostname:=/dev/null} queue (command return ${local_queue_enable_test} lines).${RESET}"
exit 2
;;
esac
## Simple debug message to valid current variables
debug_message "is_queue_enable \
SGE queue: ${RED}${local_queue_enable_name:=/dev/null}${COLOR_DEBUG} \
state is: ${RED}${local_sge_queue_state:=/dev/null}${COLOR_DEBUG}."
return "${return_queue_enable}"
}
# }}}
is_queue_disable() { # {{{
local_queue_disable_hostname="${1}"
local_queue_disable_name="${2}"
## List all queues with 'disable' state and filter to the expected queue name
## add a fake_user to avoid pending jobs for this queue
### And count returned lines
local_queue_disable_test=$(qstat -f -qs d -q "${local_queue_disable_name:=/dev/null}@${local_queue_disable_hostname:=/dev/null}" -u fake_user \
| wc -l)
case "${local_queue_disable_test}" in
0 ) ## No result so the queue is enable
local_sge_queue_state="enable"
return_queue_disable="1"
;;
3 ) ## Results (header + queue name) so the queue is disable
local_sge_queue_state="disable"
return_queue_disable="0"
;;
* ) ## Unexpected result
printf '%b\n' "${RED}Not able to determine the state of ${local_queue_disable_name:=/dev/null}@${local_queue_disable_hostname:=/dev/null} queue (command return ${local_queue_disable_test} lines).${RESET}"
exit 3
;;
esac
## Simple debug message to valid current variables
debug_message "is_queue_disable \
SGE queue: ${RED}${local_queue_disable_name:=/dev/null}${COLOR_DEBUG} \
state is: ${RED}${local_sge_queue_state:=/dev/null}${COLOR_DEBUG}."
return "${return_queue_disable}"
}
# }}}
is_all_queue_disable() { # {{{
local_all_queue_disable_hostname="${1}"
local_all_queue_disable_name="${2}"
## By default, all queues are disable
return_all_queue_disable="0"
## Test all queues one by one
for loop_disable_queue in ${local_all_queue_disable_name}; do
### If a queue is not disable
#### Change the return value
is_queue_disable "${local_all_queue_disable_hostname}" "${loop_disable_queue}" \
|| return_all_queue_disable="1"
done
return "${return_all_queue_disable}"
}
# }}}
disable_sge_queue() { # {{{
local_sge_hostname="${1}"
local_sge_queue_name="${2}"
return_disable_queue="1"
## Simple debug message to valid current variables
debug_message "disable_sge_queue \
Try to disable SGE queue: ${RED}${local_sge_queue_name:=/dev/null}@${local_sge_hostname:=/dev/null}${COLOR_DEBUG}."
## SGE command to disable the queue
## fix returned value
## create a file for local queue if run from the host
if qmod --disable "${local_sge_queue_name}@${local_sge_hostname}" > /dev/null; then
return_disable_queue="0"
[ "${LOCAL_HOST}" -eq "0" ] && printf '%s' "by ${PROGNAME} script" > "${sge_queue_deactivator}"
fi
return "${return_disable_queue}"
}
# }}}
main() { # {{{
## Test if SGE Master is reachable {{{
### If sge_master_uri wasn't defined (environment variable,…) {{{
if [ -z "${sge_master_uri}" ]; then
## Get SGE master from current configuration
sge_master_uri=$(grep --max-count=1 -- "" /var/lib/gridengine/default/common/act_qmaster 2>/dev/null || echo "localhost")
fi
### }}}
### If sge_master_port wasn't defined (environment variable,…) {{{
if [ -z "${sge_master_port}" ]; then
## Use local host for sge_master_port
sge_master_port="6444"
fi
### }}}
### If SGE Master is not reachable from this host {{{
#### Exit
is_sge_master_available \
|| exit 0
### }}}
## }}}
## If SGE is not yet available on this host {{{
### Exit
is_sge_host \
|| exit 0
## }}}
## Define all vars according the selected options
define_vars
## Simple debug message with color to valid current variables
debug_message "main Try to manage \
SGE queue(s): ${RED}${sge_queues_name_print:=/dev/null}${COLOR_DEBUG}\
for host: ${RED}${sge_hostname:=/dev/null}${COLOR_DEBUG}."
## If the queue(s) are already disable
### Exit
is_all_queue_disable "${sge_hostname}" "${sge_queues_name}" \
&& exit 0
## Test all queues one by one
for loop_queue in ${sge_queues_name}; do
## File that says this queue was disabled by a script
## Can be used by other script to verify the queue wasn't disable by a user/admin
sge_queue_deactivator="/opt/ipr/cluster/.sge.${loop_queue}.disable"
## If the queue is enable
### Try to disable it
is_queue_enable "${sge_hostname}" "${loop_queue}" \
&& disable_sge_queue "${sge_hostname}" "${loop_queue}"
## If the queue is still enable
### Exit with error
is_queue_enable "${sge_hostname}" "${loop_queue}" \
&& printf '%b\n' "${RED}ERROR ${loop_queue}@${sge_hostname} is still enable.${RESET}" \
&& exit 4
done
}
# }}}
# Manage arguments # {{{
# This code can't be in a function due to arguments
if [ ! "${NBARGS}" -eq "0" ]; then
manage_arg="0"
## If the first argument is not an option
if ! printf -- '%s' "${1}" | grep -q -E -- "^-+";
then
## Use this argument for sge_hostname
sge_hostname="${1}"
## Switch to the next argument
shift
manage_arg=$((manage_arg+1))
fi
# Parse all options (start with a "-") one by one
while printf -- '%s' "${1}" | grep -q -E -- "^-+"; do
case "${1}" in
-d|--debug ) ## debug
DEBUG=0
;;
-f|--force ) ## Force to disable SGE queue
FORCE_MODE=0
;;
--help ) ## help
usage
## Exit after help informations
exit 0
;;
-h|--host|--hostname ) ## Specify a different host to manage
## Move to the next argument
shift
## Override previous definition of sge_hostname
sge_hostname="${1}"
;;
-- ) ## End of options list
## End the while loop
break
;;
* ) ## unknow option
printf '%b\n' "${RED}Invalid option: ${1}${RESET}"
printf '%b\n' "---"
usage
exit 1
;;
esac
debug_message "Arguments management \
${RED}${1}${COLOR_DEBUG} option managed."
## Move to the next argument
shift
manage_arg=$((manage_arg+1))
done
debug_message "Arguments management \
${RED}${manage_arg}${COLOR_DEBUG} argument(s) successfully managed."
else
debug_message "Arguments management \
No arguments/options to manage."
fi
# }}}
main
exit 0