New sge.sh Xymon's script
From https://wiki.xymonton.org/doku.php/monitors:sge
This commit is contained in:
parent
74b932506d
commit
fa02d6d485
|
@ -0,0 +1,264 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# SGE: Sun Grid Engine check - BB external script test
|
||||
#
|
||||
##### Purpose is to report back to a central server, all Sun
|
||||
##### Grid Engine software faults.
|
||||
#####
|
||||
#
|
||||
# version 0.3
|
||||
#
|
||||
# BIG BROTHER / XXXXXXXXXXXXXXXX status
|
||||
#
|
||||
# Written by Butch Deal <butchdeal@yahoo.com>
|
||||
# Daniel Gomez <dgomez@tigr.org,daniel@ixplosive.com>
|
||||
#
|
||||
# v0.3e 10/14/08 cut down on the number of qhost runs
|
||||
# v0.3d 03/31/06 added alarm/suspend state identification
|
||||
# v0.3c 03/01/06 propogated yellow state upon UNAVAILABLE queue intances
|
||||
# v0.3b 01/31/06 fixed yellow warning queue status for ambigious config test
|
||||
# v0.3a 01/31/06 added unknown queue status and ambigious config test
|
||||
# v0.3 01/26/06 fixed status reporting and optimized job status
|
||||
# v0.2 08/03/05 flag disabled queues as clear
|
||||
# v0.1 07/28/05 authored
|
||||
|
||||
########################################
|
||||
# NOTE
|
||||
# This has been tested with BB 1.9e and Xymon 4.2.x
|
||||
#
|
||||
# The color status with respects to queue status is arbitrary and should be
|
||||
# reviewed for your particular environment.
|
||||
#
|
||||
# Tested on :
|
||||
# Solaris & Linux
|
||||
########################################
|
||||
|
||||
########################################
|
||||
# INSTALLATION
|
||||
# step 1 - update bb-bbexttab to include this script
|
||||
#
|
||||
# step 4 - restart Big Brother
|
||||
#
|
||||
# NOTE - the TEST variable in the configuration section, this is the name used
|
||||
# as the column header.
|
||||
########################################
|
||||
|
||||
##################################
|
||||
# CONFIGURE IT HERE
|
||||
##################################
|
||||
TEST="sge"
|
||||
BBPROG="$0"; export BBPROG
|
||||
|
||||
SGEBIN=/usr/local/bin
|
||||
QSTAT=${SGEBIN}/qstat
|
||||
QHOST=${SGEBIN}/qhost
|
||||
QSELECT=${SGEBIN}/qselect
|
||||
export SGEBIN QSTAT QHOST QSELECT
|
||||
|
||||
# define colours for graphics
|
||||
# Comment these out if using older BB versions
|
||||
CLEAR_PIC="&clear"
|
||||
RED_PIC="&red"
|
||||
YELLOW_PIC="&yellow"
|
||||
GREEN_PIC="&green"
|
||||
UNKNOWN_PIC="&purple"
|
||||
|
||||
##################################
|
||||
# Start of script
|
||||
##################################
|
||||
BBHOME="/var/BB/bb"; export BBHOME
|
||||
|
||||
if test ! "$BBHOME"
|
||||
then
|
||||
echo "template: BBHOME is not set"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test ! -d "$BBHOME"
|
||||
then
|
||||
echo "template: BBHOME is invalid"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if test ! "$BBTMP" # GET DEFINITIONS IF NEEDED
|
||||
then
|
||||
# echo "*** LOADING BBDEF ***"
|
||||
. $BBHOME/etc/bbdef.sh # INCLUDE STANDARD DEFINITIONS
|
||||
fi
|
||||
|
||||
get_header()
|
||||
{
|
||||
echo ""
|
||||
#echo "<FONT SIZE=+2><b>$1</b></FONT> ($2)<BR>"
|
||||
echo "<FONT SIZE=+2><b>$1</b></FONT> <BR>"
|
||||
# If you do not want the header in a bigger font use line below instead
|
||||
#echo "<b>$1</b> ($2)"
|
||||
# If you want the "Paul Luzzi" look uncomment this section and comment
|
||||
# out the above sections:
|
||||
#echo "<P><DIV ALIGN=\"CENTER\"><HR>"
|
||||
#echo "<B>============== $1 ==============</B>"
|
||||
#echo "<B>--- ($2) ---</B>"
|
||||
#echo "<HR></DIV>"
|
||||
#echo "<BLOCKQUOTE>"
|
||||
}
|
||||
get_header_small()
|
||||
{
|
||||
echo ""
|
||||
#echo "<FONT SIZE=+2><b>$1</b></FONT> ($2)<BR>"
|
||||
echo "<FONT SIZE=+1><b>$1</b></FONT> <BR>"
|
||||
# If you do not want the header in a bigger font use line below instead
|
||||
# echo "<b>$1</b> ($2)"
|
||||
# If you want the "Paul Luzzi" look uncomment this section and comment
|
||||
# out the above sections:
|
||||
#echo "<P><DIV ALIGN=\"CENTER\"><HR>"
|
||||
#echo "<B>============== $1 ==============</B>"
|
||||
#echo "<B>--- ($2) ---</B>"
|
||||
#echo "<HR></DIV>"
|
||||
#echo "<BLOCKQUOTE>"
|
||||
}
|
||||
|
||||
|
||||
get_footer()
|
||||
{
|
||||
echo ""
|
||||
# If you want the "Paul Luzzi" look uncomment this section and comment
|
||||
# out the above sections:
|
||||
#echo "</BLOCKQUOTE>"
|
||||
}
|
||||
|
||||
#####
|
||||
##### Get Status proc - used to get all responses
|
||||
#####
|
||||
get_status()
|
||||
{
|
||||
#####
|
||||
##### Setup some variables for use later
|
||||
#####
|
||||
COLOR="green"
|
||||
|
||||
# Check defaults have been set
|
||||
if [ "$SGEBIN" = "" ]; then
|
||||
SGEBIN=/usr/local/bin
|
||||
echo ""
|
||||
echo "$YELLOW_PIC SGEBIN command is not defined in etc/bbsys.local - using default: $SGEBIN"
|
||||
fi
|
||||
|
||||
if [ "$QSTAT" = "" ]; then
|
||||
QSTAT=${SGEBIN}/qstat
|
||||
echo ""
|
||||
echo "$YELLOW_PIC QSTAT command is not defined in etc/bbsys.local - using default: $QSTAT"
|
||||
fi
|
||||
|
||||
if [ "$QHOST" = "" ]; then
|
||||
QHOST=${SGEBIN}/qhost
|
||||
echo ""
|
||||
echo "$YELLOW_PIC QHOST command is not defined in etc/bbsys.local - using default: $QHOST"
|
||||
fi
|
||||
|
||||
###
|
||||
### Check the jobs
|
||||
###
|
||||
get_header "Jobs" "$QSTAT -l hostname=$MACHINEDOTS"
|
||||
jobs=`${QSTAT} -l hostname=${MACHINEDOTS} -s r`
|
||||
if [ -z "$jobs" ]; then
|
||||
echo "No Running Jobs"
|
||||
else
|
||||
${QSTAT} -l hostname=${MACHINEDOTS} -s r
|
||||
fi
|
||||
get_footer
|
||||
|
||||
###
|
||||
### Check the host
|
||||
###
|
||||
get_header "Host" "$METAHS -i"
|
||||
${QHOST} -h ${MACHINEDOTS} | ${GREP} -v "global"
|
||||
get_footer
|
||||
|
||||
###
|
||||
### Identify queue memberships
|
||||
###
|
||||
#get_header "Queue Membership" "$QHOST -q"
|
||||
#${QHOST} -h ${MACHINEDOTS} -q | ${TAIL} +5
|
||||
#get_footer
|
||||
|
||||
###
|
||||
### Check queue instance states
|
||||
###
|
||||
queueTriggered=false;
|
||||
# Queueset=`${QHOST} -h ${MACHINEDOTS} -q | ${TAIL} +5 | ${AWK} '{ print $1 }'`
|
||||
${QHOST} -h ${MACHINEDOTS} -q | ${TAIL} +5 > $BBTMP/$MACHINE.$TEST.QSTATE
|
||||
Queueset=`cat $BBTMP/$MACHINE.$TEST.QSTATE | ${AWK} '{ print $1 }'`
|
||||
for Qset in $Queueset; do
|
||||
# qstate=`${QHOST} -q -h ${MACHINEDOTS} | ${TAIL} +5 | $GREP " $Qset" | $AWK '{print $4}'`
|
||||
qstate=`cat $BBTMP/$MACHINE.$TEST.QSTATE | $GREP " $Qset" | $AWK '{print $4}'`
|
||||
|
||||
# Order determines more significant color status
|
||||
if [ "`echo $qstate | $GREP -c d`" != "0" ]; then
|
||||
COLOR="clear"
|
||||
queueMsg=`echo "$queueMsg<BR>$CLEAR_PIC $Qset@$HOST is DISABLED"`
|
||||
queueTriggered=true;
|
||||
elif [ "`echo $qstate | $GREP -c E`" != "0" ]; then
|
||||
COLOR="red"
|
||||
queueMsg=`echo "$queueMsg<BR>$RED_PIC $Qset@$HOST is in ERROR!"`
|
||||
queueTriggered=true;
|
||||
elif [ "`echo $qstate | $GREP -c c`" != "0" ]; then
|
||||
COLOR="yellow"
|
||||
queueMsg=`echo "$queueMsg<BR>$YELLOW_PIC $Qset@$HOST has an ambigious configuration!"`
|
||||
queueTriggered=true;
|
||||
elif [ "`echo $qstate | $GREP -c a`" != "0" ] || \
|
||||
[ "`echo $qstate | $GREP -c A`" != "0" ]; then
|
||||
queueMsg=`echo "$queueMsg<BR>$YELLOW_PIC $Qset@$HOST is in ALARM"`
|
||||
elif [ "`echo $qstate | $GREP -c s`" != "0" ] || \
|
||||
[ "`echo $qstate | $GREP -c S`" != "0" ]; then
|
||||
queueMsg=`echo "$queueMsg<BR>$YELLOW_PIC $Qset@$HOST is SUSPENDED"`
|
||||
elif [ "`echo $qstate | $GREP -c u`" != "0" ]; then
|
||||
COLOR="yellow"
|
||||
queueMsg=`echo "$queueMsg<BR>$YELLOW_PIC $Qset@$HOST is UNAVAILABLE!"`
|
||||
queueTriggered=true;
|
||||
elif [ "$qstate" = "" ]; then
|
||||
queueMsg=`echo "$queueMsg<BR>$GREEN_PIC $Qset@$HOST is OK"`
|
||||
else
|
||||
queueMsg=`echo "$queueMsg<BR>$UNKNOWN_PIC $Qset@$HOST is UNKNOWN"`
|
||||
queueTriggered=true;
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -f $BBTMP/$MACHINE.$TEST.QSTATE ]; then
|
||||
$RM $BBTMP/$MACHINE.$TEST.QSTATE
|
||||
fi
|
||||
|
||||
get_header "Queue Instance Status Report"
|
||||
echo "$queueMsg"
|
||||
get_footer
|
||||
|
||||
#####
|
||||
##### Make sure to export COLOR so that it gets back to "central"
|
||||
#####
|
||||
export COLOR
|
||||
|
||||
#####
|
||||
##### End of get_status proc
|
||||
#####
|
||||
}
|
||||
|
||||
#####
|
||||
##### Main body
|
||||
#####
|
||||
get_status > $BBTMP/$MACHINE.$TEST
|
||||
|
||||
# NOW USE THE BB COMMAND TO SEND THE DATA ACROSS
|
||||
$BB $BBDISP "status $MACHINE.$TEST $COLOR `$DATE` `$CAT $BBTMP/$MACHINE.$TEST`"
|
||||
#For testing only
|
||||
# echo $BB $BBDISP "status $BBTMP/$MACHINE.$TEST $COLOR `$DATE` `$CAT $BBTMP/$MACHINE.$TEST` ">/tmp/qtmp
|
||||
|
||||
|
||||
# Clean up our mess
|
||||
# Checking for existence of each file since the whole test may be optional
|
||||
# and may not actually run on every client
|
||||
#
|
||||
if [ -f $BBTMP/$MACHINE.$TEST ]; then
|
||||
$RM $BBTMP/$MACHINE.$TEST
|
||||
fi
|
||||
##############################################
|
||||
# end of script
|
||||
##############################################
|
Loading…
Reference in New Issue