#!/bin/sh # # SGE: Sun Grid Engine check - Xymon external script test # ##### Purpose is to report back to a central server, all Sun ##### Grid Engine software faults. ##### # # version 0.4 # # BIG BROTHER / XXXXXXXXXXXXXXXX status # # Written by Butch Deal # Daniel Gomez # Jérémy Gardais # # v0.4 09/06/20 clean, correction,… for Xymon 4.3.28 # v0.3e 10/14/08 cut down on the number of qhost runs # v0.3d 03/31/06 added alarm/suspend state identification # v0.3c 03/01/06 propogated yellow state upon UNAVAILABLE queue intances # v0.3b 01/31/06 fixed yellow warning queue status for ambigious config test # v0.3a 01/31/06 added unknown queue status and ambigious config test # v0.3 01/26/06 fixed status reporting and optimized job status # v0.2 08/03/05 flag disabled queues as clear # v0.1 07/28/05 authored ######################################## # NOTE # The version v0.4 has only been tested with Xymon (server and client) 4.2.x. # # The color status with respects to queue status is arbitrary and should be # reviewed for your particular environment. # # Tested on : # Solaris & Linux # Linux only (for v0.4) ######################################## ######################################## # INSTALLATION # step 1 - copy to Xymon client's ext dir # step 2 - New clientlaunch.d/sge.cfg file # step 3 - restart Xymon client # # NOTE - the TEST variable in the configuration section, this is the name used # as the column header. ######################################## ################################## # CONFIGURE IT HERE ################################## readonly PLUGIN_NAME=$(basename "${0}") readonly TEST="sge" readonly PLUGIN_RESULT="${XYMONTMP}/${MACHINEDOTS}.${TEST}.plugin_result" readonly PLUGIN_STATE="${XYMONTMP}/${MACHINEDOTS}.${TEST}.plugin_state" true > "${PLUGIN_STATE}" readonly QSTAT=$(command -v qstat) readonly QHOST=$(command -v qhost) readonly QSELECT=$(command -v qselect) export QSTAT QHOST QSELECT # define colours for graphics # Comment these out if using older BB versions CLEAR_PIC="&clear" RED_PIC="&red" YELLOW_PIC="&yellow" GREEN_PIC="&green" UNKNOWN_PIC="&purple" ################################## # Start of script ################################## get_header() { echo "" #echo "$1 ($2)
" echo "$1
" # If you do not want the header in a bigger font use line below instead #echo "$1 ($2)" # If you want the "Paul Luzzi" look uncomment this section and comment # out the above sections: #echo "


" #echo "============== $1 ==============" #echo "--- ($2) ---" #echo "
" #echo "
" } get_header_small() { echo "" #echo "$1 ($2)
" echo "$1
" # If you do not want the header in a bigger font use line below instead # echo "$1 ($2)" # If you want the "Paul Luzzi" look uncomment this section and comment # out the above sections: #echo "


" #echo "============== $1 ==============" #echo "--- ($2) ---" #echo "
" #echo "
" } get_footer() { echo "" # If you want the "Paul Luzzi" look uncomment this section and comment # out the above sections: #echo "
" } ##### ##### Get Status proc - used to get all responses ##### get_status() { # Check defaults have been set if [ "${QSTAT}" = "" ]; then readonly QSTAT=$(command -v qstat) echo "" echo "$YELLOW_PIC QSTAT command is not defined in etc/bbsys.local - using default: ${QSTAT}" fi if [ "${QHOST}" = "" ]; then readonly QHOST=$(command -v qhost) echo "" echo "$YELLOW_PIC QHOST command is not defined in etc/bbsys.local - using default: ${QHOST}" fi if [ "${QSELECT}" = "" ]; then readonly QSELECT=$(command -v qselect) echo "" echo "$YELLOW_PIC QSELECT command is not defined in etc/bbsys.local - using default: ${QSELECT}" fi ### ### Check the jobs ### get_header "Jobs" "$QSTAT -l hostname=$MACHINEDOTS" jobs=$(${QSTAT} -l hostname="${MACHINEDOTS}" -s r -u \*) if [ -z "$jobs" ]; then echo "No Running Jobs" else ${QSTAT} -l hostname="${MACHINEDOTS}" -s r -u \* fi get_footer ### ### Check the host ### get_header "Host" "$METAHS -i" ${QHOST} -h "${MACHINEDOTS}" | grep -v "global" get_footer ### ### Identify queue memberships ### #get_header "Queue Membership" "$QHOST -q" #${QHOST} -h ${MACHINEDOTS} -q | tail +5 #get_footer ### ### Check queue instance states ### queueTriggered=false; ${QHOST} -h "${MACHINEDOTS}" -q | tail +5 > "${PLUGIN_RESULT}.QSTATE" while IFS= read -r _LINE; do queue=$(printf -- '%s' "${_LINE}" | awk '{ print $1 }') qstate=$(printf -- '%s' "${_LINE}" | awk '{ print $4 }') # Order determines more significant alert status if [ "$(echo "${qstate}" | grep -c d)" != "0" ]; then echo "4&clear $queue@$HOST is DISABLED" >> "${PLUGIN_STATE}" queueMsg=$(echo "$queueMsg
$CLEAR_PIC $queue@$HOST is DISABLED
${_LINE}") queueTriggered=true; elif [ "$(echo "${qstate}" | grep -c E)" != "0" ]; then echo "1&red $queue@$HOST is in ERROR!" >> "${PLUGIN_STATE}" queueMsg=$(echo "$queueMsg
$RED_PIC $queue@$HOST is in ERROR!
${_LINE}") queueTriggered=true; elif [ "$(echo "${qstate}" | grep -c c)" != "0" ]; then echo "2&yellow $queue@$HOST has an ambigious configuration!" >> "${PLUGIN_STATE}" queueMsg=$(echo "$queueMsg
$YELLOW_PIC $queue@$HOST has an ambigious configuration!
${_LINE}") queueTriggered=true; elif [ "$(echo "${qstate}" | grep -c a)" != "0" ] || \ [ "$(echo "${qstate}" | grep -c A)" != "0" ]; then echo "2&yellow $queue@$HOST is in ALARM" >> "${PLUGIN_STATE}" queueMsg=$(echo "$queueMsg
$YELLOW_PIC $queue@$HOST is in ALARM
${_LINE}") elif [ "$(echo "${qstate}" | grep -c s)" != "0" ] || \ [ "$(echo "${qstate}" | grep -c S)" != "0" ]; then echo "2&yellow $queue@$HOST is SUSPENDED" >> "${PLUGIN_STATE}" queueMsg=$(echo "$queueMsg
$YELLOW_PIC $queue@$HOST is SUSPENDED
${_LINE}") elif [ "$(echo "${qstate}" | grep -c u)" != "0" ]; then echo "2&yellow $queue@$HOST is UNAVAILABLE" >> "${PLUGIN_STATE}" queueMsg=$(echo "$queueMsg
$YELLOW_PIC $queue@$HOST is UNAVAILABLE!
${_LINE}") queueTriggered=true; elif [ "$qstate" = "" ]; then echo "3&green $queue@$HOST is OK" >> "${PLUGIN_STATE}" queueMsg=$(echo "$queueMsg
$GREEN_PIC $queue@$HOST is OK
${_LINE}") else echo "5&purple $queue@$HOST is UNKNOWN" >> "${PLUGIN_STATE}" queueMsg=$(echo "$queueMsg
$UNKNOWN_PIC $queue@$HOST is UNKNOWN
${_LINE}") queueTriggered=true; fi done < "${PLUGIN_RESULT}.QSTATE" get_header "Queue Instance Status Report" echo "$queueMsg" get_footer ##### ##### End of get_status proc ##### } ##### ##### Main body ##### get_status > "${PLUGIN_RESULT}" # Set the global color according to the highest alert COLOR=$(< "${PLUGIN_STATE}" awk '{print $1}' | sort | uniq | head -1 | cut -c3-) # NOW USE THE XYMON COMMAND TO SEND THE DATA ACROSS $XYMON "${XYMSRV}" "status ${MACHINE}.${TEST} ${COLOR} $($DATE) $(cat ${PLUGIN_RESULT})" #For testing only # echo $XYMON "${XYMSRV}" "status ${MACHINE}.${TEST} ${COLOR} $($DATE) $(cat ${PLUGIN_RESULT})" > /tmp/sgetmp # Clean up our mess # Checking for existence of each file since the whole test may be optional # and may not actually run on every client # if [ -f "${PLUGIN_RESULT}" ]; then rm -f -- "${PLUGIN_RESULT}" "${PLUGIN_STATE}" "${PLUGIN_RESULT}.QSTATE" fi ############################################## # end of script ##############################################