Ensure queue is disable before starting sge_execd
This commit is contained in:
parent
9d31d4ab02
commit
2340ced9b8
|
@ -114,7 +114,7 @@ define_vars() { # {{{
|
||||||
fi
|
fi
|
||||||
## }}}
|
## }}}
|
||||||
|
|
||||||
## If the host to manage is the current one
|
## If the host to manage is the current one {{{
|
||||||
if is_current_host "${sge_hostname}" ; then
|
if is_current_host "${sge_hostname}" ; then
|
||||||
debug_message "define_vars − \
|
debug_message "define_vars − \
|
||||||
${sge_hostname} is the current host."
|
${sge_hostname} is the current host."
|
||||||
|
@ -130,8 +130,9 @@ ${sge_hostname} is not the current host."
|
||||||
## Force to (re)enable SGE queue(s) in any case
|
## Force to (re)enable SGE queue(s) in any case
|
||||||
FORCE_MODE="0"
|
FORCE_MODE="0"
|
||||||
fi
|
fi
|
||||||
|
## }}}
|
||||||
|
|
||||||
## If FORCE_MODE was defined and enabled
|
## If FORCE_MODE was defined and enabled {{{
|
||||||
if [ -n "${FORCE_MODE}" ] && [ "${FORCE_MODE}" -eq "0" ]; then
|
if [ -n "${FORCE_MODE}" ] && [ "${FORCE_MODE}" -eq "0" ]; then
|
||||||
## Disable upgrade checking (remote host, asked behaviour,…)
|
## Disable upgrade checking (remote host, asked behaviour,…)
|
||||||
CHECK_UPGRADE="1"
|
CHECK_UPGRADE="1"
|
||||||
|
@ -143,8 +144,9 @@ ${sge_hostname} is not the current host."
|
||||||
## Ensure to define a value
|
## Ensure to define a value
|
||||||
FORCE_MODE="1"
|
FORCE_MODE="1"
|
||||||
fi
|
fi
|
||||||
|
## }}}
|
||||||
|
|
||||||
## Get all queues name
|
## Get all queues name {{{
|
||||||
sge_queues_name="$(qhost -h "${sge_hostname:=/dev/null}" -q -xml \
|
sge_queues_name="$(qhost -h "${sge_hostname:=/dev/null}" -q -xml \
|
||||||
| grep "queue name" \
|
| grep "queue name" \
|
||||||
| cut -d"'" -f2 )"
|
| cut -d"'" -f2 )"
|
||||||
|
@ -152,16 +154,21 @@ ${sge_hostname} is not the current host."
|
||||||
| grep "queue name" \
|
| grep "queue name" \
|
||||||
| cut -d"'" -f2 \
|
| cut -d"'" -f2 \
|
||||||
| tr -s '\n' ' ' )"
|
| tr -s '\n' ' ' )"
|
||||||
|
## }}}
|
||||||
|
|
||||||
## List of process pattern to monitor
|
## List of process pattern to monitor {{{
|
||||||
maco_proc_pattern="(/opt/maco/bin/maco.autoupdate.sh)"
|
maco_proc_pattern="(/opt/maco/bin/maco.autoupdate.sh)"
|
||||||
apt_proc_pattern="(aptitude.*full-upgrade|/usr/bin/dpkg.*--configure|dpkg-deb|/bin/sh /usr/lib/needrestart/dpkg-status)"
|
apt_proc_pattern="(aptitude.*full-upgrade|/usr/bin/dpkg.*--configure|dpkg-deb|/bin/sh /usr/lib/needrestart/dpkg-status)"
|
||||||
sge_proc_pattern="(/usr/lib/gridengine/sge_execd)"
|
sge_proc_pattern="(/usr/lib/gridengine/sge_execd)"
|
||||||
|
## }}}
|
||||||
## List of files to monitor
|
## List of files to monitor {{{
|
||||||
file_nologin_path="/etc/nologin"
|
file_nologin_path="/etc/nologin"
|
||||||
cluster_dir="/opt/ipr/cluster"
|
cluster_dir="/opt/ipr/cluster"
|
||||||
sge_queue_flag_pattern="${cluster_dir}/.sge.*.disable"
|
sge_queue_flag_pattern="${cluster_dir}/.sge.*.disable"
|
||||||
|
## }}}
|
||||||
|
|
||||||
|
## Script used to disable SGE queue(s)
|
||||||
|
sge_disable_host_queue_script="${PROGDIR}/sge.disable.host.queue.sh"
|
||||||
}
|
}
|
||||||
# }}}
|
# }}}
|
||||||
is_sge_host() { # {{{
|
is_sge_host() { # {{{
|
||||||
|
@ -520,59 +527,73 @@ main() { # {{{
|
||||||
|
|
||||||
## If we need to watch for processes
|
## If we need to watch for processes
|
||||||
if [ "${CHECK_PROCESS}" -eq "0" ]; then
|
if [ "${CHECK_PROCESS}" -eq "0" ]; then
|
||||||
## If nothing related to SGE is currently running
|
## Ensure the SGE queue(s) are really disable without creation of any flag file
|
||||||
|
sh "${sge_disable_host_queue_script}" --force
|
||||||
|
|
||||||
|
## Wait few seconds
|
||||||
|
sleep "${sleep_delay}"
|
||||||
|
|
||||||
|
## If nothing related to SGE is currently running {{{
|
||||||
### Try to start the SGE execd systemd service
|
### Try to start the SGE execd systemd service
|
||||||
### Exit with error if the service can't start
|
### Exit with error if the service can't start
|
||||||
is_proc_running "${sge_proc_pattern}" \
|
is_proc_running "${sge_proc_pattern}" \
|
||||||
|| systemctl --quiet start sge_execd.service > /dev/null 2>&1 \
|
|| systemctl --quiet start sge_execd.service > /dev/null 2>&1 \
|
||||||
|| exit 4
|
|| exit 4
|
||||||
|
## }}}
|
||||||
|
|
||||||
## Wait some seconds
|
## Wait few seconds
|
||||||
sleep "${sleep_delay}"
|
sleep "${sleep_delay}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
## If we need to watch for upgrades
|
## If we need to watch for upgrades
|
||||||
if [ "${CHECK_UPGRADE}" -eq "0" ]; then
|
if [ "${CHECK_UPGRADE}" -eq "0" ]; then
|
||||||
## If APT package upgrade is available
|
## If APT package upgrade is available {{{
|
||||||
### Exit (wait for APT upgrade to be applied)
|
### Exit (wait for APT upgrade to be applied)
|
||||||
is_apt_upgrade_present \
|
is_apt_upgrade_present \
|
||||||
&& exit 0
|
&& exit 0
|
||||||
|
## }}}
|
||||||
|
|
||||||
## If Maco upgrade is present
|
## If Maco upgrade is present {{{
|
||||||
### Exit (wait for Maco upgrade to be applied)
|
### Exit (wait for Maco upgrade to be applied)
|
||||||
is_maco_upgrade_present \
|
is_maco_upgrade_present \
|
||||||
&& exit 0
|
&& exit 0
|
||||||
|
## }}}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
## If Maco status is ok, CONTINUE
|
## If Maco status is ok, CONTINUE {{{
|
||||||
### Else Exit (wait for upgrade/maintenance)
|
### Else Exit (wait for upgrade/maintenance)
|
||||||
is_maco_status_ok \
|
is_maco_status_ok \
|
||||||
|| exit 0
|
|| exit 0
|
||||||
|
## }}}
|
||||||
|
|
||||||
## If we need to watch for processes
|
## If we need to watch for processes
|
||||||
if [ "${CHECK_PROCESS}" -eq "0" ]; then
|
if [ "${CHECK_PROCESS}" -eq "0" ]; then
|
||||||
## If anything related to APT is currently running
|
## If anything related to APT is currently running {{{
|
||||||
### Exit
|
### Exit
|
||||||
is_proc_running "${apt_proc_pattern}" \
|
is_proc_running "${apt_proc_pattern}" \
|
||||||
&& exit 0
|
&& exit 0
|
||||||
|
## }}}
|
||||||
|
|
||||||
## If anything related to maco is currently running
|
## If anything related to maco is currently running {{{
|
||||||
### Exit
|
### Exit
|
||||||
is_proc_running "${maco_proc_pattern}" \
|
is_proc_running "${maco_proc_pattern}" \
|
||||||
&& exit 0
|
&& exit 0
|
||||||
|
## }}}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
## If we need to watch files
|
## If we need to watch files
|
||||||
if [ "${CHECK_FILE}" -eq "0" ]; then
|
if [ "${CHECK_FILE}" -eq "0" ]; then
|
||||||
## If nologin file exist (error on upgrade,…)
|
## If nologin file exist (error on upgrade,…) {{{
|
||||||
### Exit
|
### Exit
|
||||||
is_file_present "${file_nologin_path}" \
|
is_file_present "${file_nologin_path}" \
|
||||||
&& exit 0
|
&& exit 0
|
||||||
|
## }}}
|
||||||
|
|
||||||
## If all SGE queue(s) were manually disabled (not any flag file)
|
## If all SGE queue(s) were manually disabled (not any flag file) {{{
|
||||||
### Exit
|
### Exit
|
||||||
is_file_absent "${sge_queue_flag_pattern}" \
|
is_file_absent "${sge_queue_flag_pattern}" \
|
||||||
&& exit 0
|
&& exit 0
|
||||||
|
## }}}
|
||||||
fi
|
fi
|
||||||
|
|
||||||
## Simple debug message with color to valid current variables
|
## Simple debug message with color to valid current variables
|
||||||
|
@ -580,13 +601,14 @@ main() { # {{{
|
||||||
SGE queue(s): ${RED}${sge_queues_name_print:=/dev/null}${COLOR_DEBUG}\
|
SGE queue(s): ${RED}${sge_queues_name_print:=/dev/null}${COLOR_DEBUG}\
|
||||||
for host: ${RED}${sge_hostname:=/dev/null}${COLOR_DEBUG}."
|
for host: ${RED}${sge_hostname:=/dev/null}${COLOR_DEBUG}."
|
||||||
|
|
||||||
## If the queue(s) are already enable
|
## If the queue(s) are already enable {{{
|
||||||
### Ensure to remove any potential flag file
|
### Ensure to remove any potential flag file
|
||||||
# shellcheck disable=SC2086
|
# shellcheck disable=SC2086
|
||||||
### Exit
|
### Exit
|
||||||
is_all_queue_enable "${sge_hostname}" "${sge_queues_name}" \
|
is_all_queue_enable "${sge_hostname}" "${sge_queues_name}" \
|
||||||
&& find ${sge_queue_flag_pattern} -delete \
|
&& find ${sge_queue_flag_pattern} -delete \
|
||||||
&& exit 0
|
&& exit 0
|
||||||
|
## }}}
|
||||||
|
|
||||||
## Test all queues one by one
|
## Test all queues one by one
|
||||||
for loop_queue in ${sge_queues_name}; do
|
for loop_queue in ${sge_queues_name}; do
|
||||||
|
@ -595,12 +617,13 @@ for host: ${RED}${sge_hostname:=/dev/null}${COLOR_DEBUG}."
|
||||||
## "automatically disabled" for an upgrade
|
## "automatically disabled" for an upgrade
|
||||||
sge_queue_flag_file="${cluster_dir}/.sge.${loop_queue}.disable"
|
sge_queue_flag_file="${cluster_dir}/.sge.${loop_queue}.disable"
|
||||||
|
|
||||||
## If the queue is disable
|
## If the queue is disable {{{
|
||||||
### Try to enable it
|
### Try to enable it
|
||||||
is_queue_disable "${sge_hostname}" "${loop_queue}" \
|
is_queue_disable "${sge_hostname}" "${loop_queue}" \
|
||||||
&& enable_sge_queue "${sge_hostname}" "${loop_queue}"
|
&& enable_sge_queue "${sge_hostname}" "${loop_queue}"
|
||||||
|
## }}}
|
||||||
|
|
||||||
## Don't consider manually disabled queue as an error except if FORCE_MODE was specified
|
## Don't consider manually disabled queue as an error except if FORCE_MODE was specified {{{
|
||||||
if [ -f "${sge_queue_flag_file}" ] || [ "${FORCE_MODE}" -eq "0" ]; then
|
if [ -f "${sge_queue_flag_file}" ] || [ "${FORCE_MODE}" -eq "0" ]; then
|
||||||
## If the queue is still disable
|
## If the queue is still disable
|
||||||
### Exit with error
|
### Exit with error
|
||||||
|
@ -608,6 +631,7 @@ for host: ${RED}${sge_hostname:=/dev/null}${COLOR_DEBUG}."
|
||||||
&& printf '%b\n' "${RED}ERROR ${loop_queue}@${sge_hostname} is still disable.${RESET}" \
|
&& printf '%b\n' "${RED}ERROR ${loop_queue}@${sge_hostname} is still disable.${RESET}" \
|
||||||
&& exit 5
|
&& exit 5
|
||||||
fi
|
fi
|
||||||
|
## }}}
|
||||||
|
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue