diff --git a/xymon/plugins/client/ext/sge.sh b/xymon/plugins/client/ext/sge.sh new file mode 100755 index 0000000..a051b42 --- /dev/null +++ b/xymon/plugins/client/ext/sge.sh @@ -0,0 +1,264 @@ +#!/bin/sh +# +# SGE: Sun Grid Engine check - BB external script test +# +##### Purpose is to report back to a central server, all Sun +##### Grid Engine software faults. +##### +# +# version 0.3 +# +# BIG BROTHER / XXXXXXXXXXXXXXXX status +# +# Written by Butch Deal +# Daniel Gomez +# +# v0.3e 10/14/08 cut down on the number of qhost runs +# v0.3d 03/31/06 added alarm/suspend state identification +# v0.3c 03/01/06 propogated yellow state upon UNAVAILABLE queue intances +# v0.3b 01/31/06 fixed yellow warning queue status for ambigious config test +# v0.3a 01/31/06 added unknown queue status and ambigious config test +# v0.3 01/26/06 fixed status reporting and optimized job status +# v0.2 08/03/05 flag disabled queues as clear +# v0.1 07/28/05 authored + +######################################## +# NOTE +# This has been tested with BB 1.9e and Xymon 4.2.x +# +# The color status with respects to queue status is arbitrary and should be +# reviewed for your particular environment. +# +# Tested on : +# Solaris & Linux +######################################## + +######################################## +# INSTALLATION +# step 1 - update bb-bbexttab to include this script +# +# step 4 - restart Big Brother +# +# NOTE - the TEST variable in the configuration section, this is the name used +# as the column header. +######################################## + +################################## +# CONFIGURE IT HERE +################################## +TEST="sge" +BBPROG="$0"; export BBPROG + +SGEBIN=/usr/local/bin +QSTAT=${SGEBIN}/qstat +QHOST=${SGEBIN}/qhost +QSELECT=${SGEBIN}/qselect +export SGEBIN QSTAT QHOST QSELECT + +# define colours for graphics +# Comment these out if using older BB versions +CLEAR_PIC="&clear" +RED_PIC="&red" +YELLOW_PIC="&yellow" +GREEN_PIC="&green" +UNKNOWN_PIC="&purple" + +################################## +# Start of script +################################## +BBHOME="/var/BB/bb"; export BBHOME + +if test ! "$BBHOME" +then + echo "template: BBHOME is not set" + exit 1 +fi + +if test ! -d "$BBHOME" +then + echo "template: BBHOME is invalid" + exit 1 +fi + +if test ! "$BBTMP" # GET DEFINITIONS IF NEEDED +then + # echo "*** LOADING BBDEF ***" + . $BBHOME/etc/bbdef.sh # INCLUDE STANDARD DEFINITIONS +fi + +get_header() +{ + echo "" + #echo "$1 ($2)
" + echo "$1
" + # If you do not want the header in a bigger font use line below instead + #echo "$1 ($2)" + # If you want the "Paul Luzzi" look uncomment this section and comment + # out the above sections: + #echo "


" + #echo "============== $1 ==============" + #echo "--- ($2) ---" + #echo "
" + #echo "
" +} +get_header_small() +{ + echo "" + #echo "$1 ($2)
" + echo "$1
" + # If you do not want the header in a bigger font use line below instead + # echo "$1 ($2)" + # If you want the "Paul Luzzi" look uncomment this section and comment + # out the above sections: + #echo "


" + #echo "============== $1 ==============" + #echo "--- ($2) ---" + #echo "
" + #echo "
" +} + + +get_footer() +{ + echo "" + # If you want the "Paul Luzzi" look uncomment this section and comment + # out the above sections: + #echo "
" +} + +##### +##### Get Status proc - used to get all responses +##### +get_status() +{ + ##### + ##### Setup some variables for use later + ##### + COLOR="green" + + # Check defaults have been set + if [ "$SGEBIN" = "" ]; then + SGEBIN=/usr/local/bin + echo "" + echo "$YELLOW_PIC SGEBIN command is not defined in etc/bbsys.local - using default: $SGEBIN" + fi + + if [ "$QSTAT" = "" ]; then + QSTAT=${SGEBIN}/qstat + echo "" + echo "$YELLOW_PIC QSTAT command is not defined in etc/bbsys.local - using default: $QSTAT" + fi + + if [ "$QHOST" = "" ]; then + QHOST=${SGEBIN}/qhost + echo "" + echo "$YELLOW_PIC QHOST command is not defined in etc/bbsys.local - using default: $QHOST" + fi + + ### + ### Check the jobs + ### + get_header "Jobs" "$QSTAT -l hostname=$MACHINEDOTS" + jobs=`${QSTAT} -l hostname=${MACHINEDOTS} -s r` + if [ -z "$jobs" ]; then + echo "No Running Jobs" + else + ${QSTAT} -l hostname=${MACHINEDOTS} -s r + fi + get_footer + + ### + ### Check the host + ### + get_header "Host" "$METAHS -i" + ${QHOST} -h ${MACHINEDOTS} | ${GREP} -v "global" + get_footer + + ### + ### Identify queue memberships + ### + #get_header "Queue Membership" "$QHOST -q" + #${QHOST} -h ${MACHINEDOTS} -q | ${TAIL} +5 + #get_footer + + ### + ### Check queue instance states + ### + queueTriggered=false; + # Queueset=`${QHOST} -h ${MACHINEDOTS} -q | ${TAIL} +5 | ${AWK} '{ print $1 }'` + ${QHOST} -h ${MACHINEDOTS} -q | ${TAIL} +5 > $BBTMP/$MACHINE.$TEST.QSTATE + Queueset=`cat $BBTMP/$MACHINE.$TEST.QSTATE | ${AWK} '{ print $1 }'` + for Qset in $Queueset; do + # qstate=`${QHOST} -q -h ${MACHINEDOTS} | ${TAIL} +5 | $GREP " $Qset" | $AWK '{print $4}'` + qstate=`cat $BBTMP/$MACHINE.$TEST.QSTATE | $GREP " $Qset" | $AWK '{print $4}'` + + # Order determines more significant color status + if [ "`echo $qstate | $GREP -c d`" != "0" ]; then + COLOR="clear" + queueMsg=`echo "$queueMsg
$CLEAR_PIC $Qset@$HOST is DISABLED"` + queueTriggered=true; + elif [ "`echo $qstate | $GREP -c E`" != "0" ]; then + COLOR="red" + queueMsg=`echo "$queueMsg
$RED_PIC $Qset@$HOST is in ERROR!"` + queueTriggered=true; + elif [ "`echo $qstate | $GREP -c c`" != "0" ]; then + COLOR="yellow" + queueMsg=`echo "$queueMsg
$YELLOW_PIC $Qset@$HOST has an ambigious configuration!"` + queueTriggered=true; + elif [ "`echo $qstate | $GREP -c a`" != "0" ] || \ + [ "`echo $qstate | $GREP -c A`" != "0" ]; then + queueMsg=`echo "$queueMsg
$YELLOW_PIC $Qset@$HOST is in ALARM"` + elif [ "`echo $qstate | $GREP -c s`" != "0" ] || \ + [ "`echo $qstate | $GREP -c S`" != "0" ]; then + queueMsg=`echo "$queueMsg
$YELLOW_PIC $Qset@$HOST is SUSPENDED"` + elif [ "`echo $qstate | $GREP -c u`" != "0" ]; then + COLOR="yellow" + queueMsg=`echo "$queueMsg
$YELLOW_PIC $Qset@$HOST is UNAVAILABLE!"` + queueTriggered=true; + elif [ "$qstate" = "" ]; then + queueMsg=`echo "$queueMsg
$GREEN_PIC $Qset@$HOST is OK"` + else + queueMsg=`echo "$queueMsg
$UNKNOWN_PIC $Qset@$HOST is UNKNOWN"` + queueTriggered=true; + fi + done + + if [ -f $BBTMP/$MACHINE.$TEST.QSTATE ]; then + $RM $BBTMP/$MACHINE.$TEST.QSTATE + fi + + get_header "Queue Instance Status Report" + echo "$queueMsg" + get_footer + + ##### + ##### Make sure to export COLOR so that it gets back to "central" + ##### + export COLOR + +##### +##### End of get_status proc +##### +} + +##### +##### Main body +##### +get_status > $BBTMP/$MACHINE.$TEST + + # NOW USE THE BB COMMAND TO SEND THE DATA ACROSS + $BB $BBDISP "status $MACHINE.$TEST $COLOR `$DATE` `$CAT $BBTMP/$MACHINE.$TEST`" + #For testing only + # echo $BB $BBDISP "status $BBTMP/$MACHINE.$TEST $COLOR `$DATE` `$CAT $BBTMP/$MACHINE.$TEST` ">/tmp/qtmp + + +# Clean up our mess +# Checking for existence of each file since the whole test may be optional +# and may not actually run on every client +# +if [ -f $BBTMP/$MACHINE.$TEST ]; then + $RM $BBTMP/$MACHINE.$TEST +fi +############################################## +# end of script +##############################################