Add sge plugin from http://git.ipr.univ-rennes1.fr
https://git.ipr.univ-rennes1.fr/cellinfo/scripts/src/master/xymon/plugins/client/ext/sge.sh
This commit is contained in:
parent
3912e0c8c0
commit
33239e88f4
|
@ -1,7 +1,8 @@
|
||||||
## v1.x.y
|
## v1.5.0
|
||||||
|
|
||||||
### Enhancements
|
### Enhancements
|
||||||
* Possibility to define URLs in order to get the latest version of SMART's scripts.
|
* Possibility to define URLs in order to get the latest version of SMART's scripts.
|
||||||
|
* Add sge plugin from https://git.ipr.univ-rennes1.fr/cellinfo/scripts/src/master/xymon/plugins/client/ext/sge.sh
|
||||||
|
|
||||||
## v1.4.0
|
## v1.4.0
|
||||||
|
|
||||||
|
|
15
README.md
15
README.md
|
@ -94,6 +94,20 @@ network interface states.
|
||||||
* **xymon_cli__plug_netstats_tpl** : Template used to generate the previous config file [default : `etc/xymon/clientlaunch.d/netstats.cfg.j2`].
|
* **xymon_cli__plug_netstats_tpl** : Template used to generate the previous config file [default : `etc/xymon/clientlaunch.d/netstats.cfg.j2`].
|
||||||
* **xymon_cli__plug_netstats_interval** : Time between each run of the `netstats` plugin [default : `5m`].
|
* **xymon_cli__plug_netstats_interval** : Time between each run of the `netstats` plugin [default : `5m`].
|
||||||
|
|
||||||
|
#### SGE
|
||||||
|
|
||||||
|
Variables for sge plugin from [ipr-cnrs.scripts][sge plugin source].
|
||||||
|
The plugin check health status for SGE queues and display informations about
|
||||||
|
SGE jobs and host.
|
||||||
|
|
||||||
|
* **xymon_cli__plug_sge_state** : The state of plugin `sge` [default : `False`].
|
||||||
|
* **xymon_cli__plug_sge_script_path** : Path to the `sge` script [default : `'/usr/lib/xymon/client/ext/sge'`].
|
||||||
|
* **xymon_cli__plug_sge_script_tpl** : Template used to generate the previous script [default : `'usr/lib/xymon/client/ext/sge.j2'`].
|
||||||
|
* **xymon_cli__plug_sge_script_url** : Use a remote file to get the previous script instead of a template [default : `''`].
|
||||||
|
* **xymon_cli__plug_sge_path** : Configuration file for the `sge` plugin [default : `'/etc/xymon/clientlaunch.d/sge.cfg'`].
|
||||||
|
* **xymon_cli__plug_sge_tpl** : Template used to generate the previous config file [default : `'etc/xymon/clientlaunch.d/sge.cfg.j2'`].
|
||||||
|
* **xymon_cli__plug_sge_interval** : Time between each run of the `sge` plugin [default : `'10m'`]
|
||||||
|
|
||||||
#### Smartoverall
|
#### Smartoverall
|
||||||
|
|
||||||
Variables for Smartoverall plugin from [ipr-cnrs.scripts][smartoverall plugin source].
|
Variables for Smartoverall plugin from [ipr-cnrs.scripts][smartoverall plugin source].
|
||||||
|
@ -194,6 +208,7 @@ Jérémy Gardais
|
||||||
[wtfpl website]: http://www.wtfpl.net/about/
|
[wtfpl website]: http://www.wtfpl.net/about/
|
||||||
[ipr website]: https://ipr.univ-rennes1.fr/
|
[ipr website]: https://ipr.univ-rennes1.fr/
|
||||||
|
|
||||||
|
[sge plugin source]: https://git.ipr.univ-rennes1.fr/cellinfo/scripts/src/master/xymon/plugins/client/ext/sge.sh
|
||||||
[smartoverall plugin source]: https://git.ipr.univ-rennes1.fr/cellinfo/scripts/src/master/xymon/plugins/client/ext/smartoverall
|
[smartoverall plugin source]: https://git.ipr.univ-rennes1.fr/cellinfo/scripts/src/master/xymon/plugins/client/ext/smartoverall
|
||||||
[smart plugin source]: https://github.com/skazi0/xymon-plugins
|
[smart plugin source]: https://github.com/skazi0/xymon-plugins
|
||||||
[zfs plugin source]: https://wiki.xymonton.org/doku.php/monitors:bb-zfs
|
[zfs plugin source]: https://wiki.xymonton.org/doku.php/monitors:bb-zfs
|
||||||
|
|
|
@ -112,6 +112,17 @@ xymon_cli__plug_netstats_path: '/etc/xymon/clientlaunch.d/netstats.cfg'
|
||||||
xymon_cli__plug_netstats_tpl: 'etc/xymon/clientlaunch.d/netstats.cfg.j2'
|
xymon_cli__plug_netstats_tpl: 'etc/xymon/clientlaunch.d/netstats.cfg.j2'
|
||||||
xymon_cli__plug_netstats_interval: '5m'
|
xymon_cli__plug_netstats_interval: '5m'
|
||||||
## ]]]
|
## ]]]
|
||||||
|
## Plugin SGE [[[
|
||||||
|
### From https://git.ipr.univ-rennes1.fr/cellinfo/scripts/src/master/xymon/plugins/client/ext/sge.sh
|
||||||
|
xymon_cli__plug_sge_state: False
|
||||||
|
xymon_cli__plug_sge_script_path: '/usr/lib/xymon/client/ext/sge.sh'
|
||||||
|
xymon_cli__plug_sge_script_tpl: 'usr/lib/xymon/client/ext/sge.sh.j2'
|
||||||
|
xymon_cli__plug_sge_script_url: ''
|
||||||
|
xymon_cli__plug_sge_path: '/etc/xymon/clientlaunch.d/sge.cfg'
|
||||||
|
xymon_cli__plug_sge_tpl: 'etc/xymon/clientlaunch.d/sge.cfg.j2'
|
||||||
|
xymon_cli__plug_sge_interval: '5m'
|
||||||
|
|
||||||
|
# ]]]
|
||||||
## Plugin smartoverall [[[
|
## Plugin smartoverall [[[
|
||||||
### From https://git.ipr.univ-rennes1.fr/cellinfo/scripts/src/master/xymon/plugins/client/ext/smartoverall
|
### From https://git.ipr.univ-rennes1.fr/cellinfo/scripts/src/master/xymon/plugins/client/ext/smartoverall
|
||||||
### And based on https://www.xymon.com/xymon-cgi/viewconf.sh?smart
|
### And based on https://www.xymon.com/xymon-cgi/viewconf.sh?smart
|
||||||
|
|
|
@ -197,6 +197,42 @@
|
||||||
xymon_plug_manage|bool)
|
xymon_plug_manage|bool)
|
||||||
notify: restart xymon-client service
|
notify: restart xymon-client service
|
||||||
|
|
||||||
|
# Manage sge plugin [[[1
|
||||||
|
- name: PLUGIN sge config
|
||||||
|
template:
|
||||||
|
src: '{{ xymon_cli__plug_sge_tpl }}'
|
||||||
|
dest: '{{ xymon_cli__plug_sge_path }}'
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: 0644
|
||||||
|
when: (xymon_cli_manage|bool and
|
||||||
|
xymon_plug_manage|bool)
|
||||||
|
notify: restart xymon-client service
|
||||||
|
|
||||||
|
- name: PLUGIN sge script file from template
|
||||||
|
template:
|
||||||
|
src: '{{ xymon_cli__plug_sge_script_tpl }}'
|
||||||
|
dest: '{{ xymon_cli__plug_sge_script_path }}'
|
||||||
|
owner: root
|
||||||
|
group: xymon
|
||||||
|
mode: 0755
|
||||||
|
when: (xymon_cli_manage|bool and
|
||||||
|
xymon_plug_manage|bool and
|
||||||
|
xymon_cli__plug_sge_script_url|length == 0 )
|
||||||
|
notify: restart xymon-client service
|
||||||
|
|
||||||
|
- name: PLUGIN sge script file from URL
|
||||||
|
get_url:
|
||||||
|
url: '{{ xymon_cli__plug_sge_script_url }}'
|
||||||
|
dest: '{{ xymon_cli__plug_sge_script_path }}'
|
||||||
|
owner: root
|
||||||
|
group: xymon
|
||||||
|
mode: 0755
|
||||||
|
when: (xymon_cli_manage|bool and
|
||||||
|
xymon_plug_manage|bool and
|
||||||
|
xymon_cli__plug_sge_script_url|length > 0 )
|
||||||
|
notify: restart xymon-client service
|
||||||
|
|
||||||
# Manage smartoverall plugin [[[1
|
# Manage smartoverall plugin [[[1
|
||||||
- name: PLUGIN smartoverall packages
|
- name: PLUGIN smartoverall packages
|
||||||
package:
|
package:
|
||||||
|
@ -295,7 +331,6 @@
|
||||||
xymon_cli__plug_smart_script_url|length > 0 )
|
xymon_cli__plug_smart_script_url|length > 0 )
|
||||||
notify: restart xymon-client service
|
notify: restart xymon-client service
|
||||||
|
|
||||||
|
|
||||||
# Manage zfs plugin [[[1
|
# Manage zfs plugin [[[1
|
||||||
- name: PLUGIN zfs config file
|
- name: PLUGIN zfs config file
|
||||||
template:
|
template:
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
[sge]
|
||||||
|
# {{ ansible_managed }}
|
||||||
|
## From ipr-cnrs.xymon role
|
||||||
|
{{ '#DISABLED' if xymon_cli__plug_sge_state else 'DISABLED' }}
|
||||||
|
ENVFILE /etc/xymon/xymonclient.cfg
|
||||||
|
CMD {{ xymon_cli__plug_sge_script_path }}
|
||||||
|
LOGFILE /var/log/xymon/sge.log
|
||||||
|
INTERVAL {{ xymon_cli__plug_sge_interval }}
|
|
@ -0,0 +1,252 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# {{ ansible_managed }}
|
||||||
|
# From ipr-cnrs.xymon role
|
||||||
|
# https://git.ipr.univ-rennes1.fr/cellinfo/scripts/src/master/xymon/plugins/client/ext/sge.sh
|
||||||
|
#
|
||||||
|
# SGE: Sun Grid Engine check - Xymon external script test
|
||||||
|
#
|
||||||
|
##### Purpose is to report back to a central server, all Sun
|
||||||
|
##### Grid Engine software faults.
|
||||||
|
#####
|
||||||
|
#
|
||||||
|
# version 0.4
|
||||||
|
#
|
||||||
|
# BIG BROTHER / XXXXXXXXXXXXXXXX status
|
||||||
|
#
|
||||||
|
# Written by Butch Deal <butchdeal@yahoo.com>
|
||||||
|
# Daniel Gomez <dgomez@tigr.org,daniel@ixplosive.com>
|
||||||
|
# Jérémy Gardais <jeremy.gardais@univ-rennes1.fr>
|
||||||
|
#
|
||||||
|
# v0.4 09/06/20 clean, correction,… for Xymon 4.3.28
|
||||||
|
# v0.3e 10/14/08 cut down on the number of qhost runs
|
||||||
|
# v0.3d 03/31/06 added alarm/suspend state identification
|
||||||
|
# v0.3c 03/01/06 propogated yellow state upon UNAVAILABLE queue intances
|
||||||
|
# v0.3b 01/31/06 fixed yellow warning queue status for ambigious config test
|
||||||
|
# v0.3a 01/31/06 added unknown queue status and ambigious config test
|
||||||
|
# v0.3 01/26/06 fixed status reporting and optimized job status
|
||||||
|
# v0.2 08/03/05 flag disabled queues as clear
|
||||||
|
# v0.1 07/28/05 authored
|
||||||
|
|
||||||
|
########################################
|
||||||
|
# NOTE
|
||||||
|
# The version v0.4 has only been tested with Xymon (server and client) 4.2.x.
|
||||||
|
#
|
||||||
|
# The color status with respects to queue status is arbitrary and should be
|
||||||
|
# reviewed for your particular environment.
|
||||||
|
#
|
||||||
|
# Tested on :
|
||||||
|
# Solaris & Linux
|
||||||
|
# Linux only (for v0.4)
|
||||||
|
########################################
|
||||||
|
|
||||||
|
########################################
|
||||||
|
# INSTALLATION
|
||||||
|
# step 1 - copy to Xymon client's ext dir
|
||||||
|
# step 2 - New clientlaunch.d/sge.cfg file
|
||||||
|
# step 3 - restart Xymon client
|
||||||
|
#
|
||||||
|
# NOTE - the TEST variable in the configuration section, this is the name used
|
||||||
|
# as the column header.
|
||||||
|
########################################
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# CONFIGURE IT HERE
|
||||||
|
##################################
|
||||||
|
readonly PLUGIN_NAME=$(basename "${0}")
|
||||||
|
|
||||||
|
readonly TEST="sge"
|
||||||
|
readonly PLUGIN_RESULT="${XYMONTMP}/${MACHINEDOTS}.${TEST}.plugin_result"
|
||||||
|
|
||||||
|
readonly QSTAT=$(command -v qstat)
|
||||||
|
readonly QHOST=$(command -v qhost)
|
||||||
|
readonly QSELECT=$(command -v qselect)
|
||||||
|
export QSTAT QHOST QSELECT
|
||||||
|
|
||||||
|
# define colours for graphics
|
||||||
|
# Comment these out if using older BB versions
|
||||||
|
CLEAR_PIC="&clear"
|
||||||
|
RED_PIC="&red"
|
||||||
|
YELLOW_PIC="&yellow"
|
||||||
|
GREEN_PIC="&green"
|
||||||
|
UNKNOWN_PIC="&purple"
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# Start of script
|
||||||
|
##################################
|
||||||
|
|
||||||
|
get_header()
|
||||||
|
{
|
||||||
|
echo ""
|
||||||
|
#echo "<FONT SIZE=+2><b>$1</b></FONT> ($2)<BR>"
|
||||||
|
echo "<FONT SIZE=+2><b>$1</b></FONT> <BR>"
|
||||||
|
# If you do not want the header in a bigger font use line below instead
|
||||||
|
#echo "<b>$1</b> ($2)"
|
||||||
|
# If you want the "Paul Luzzi" look uncomment this section and comment
|
||||||
|
# out the above sections:
|
||||||
|
#echo "<P><DIV ALIGN=\"CENTER\"><HR>"
|
||||||
|
#echo "<B>============== $1 ==============</B>"
|
||||||
|
#echo "<B>--- ($2) ---</B>"
|
||||||
|
#echo "<HR></DIV>"
|
||||||
|
#echo "<BLOCKQUOTE>"
|
||||||
|
}
|
||||||
|
get_header_small()
|
||||||
|
{
|
||||||
|
echo ""
|
||||||
|
#echo "<FONT SIZE=+2><b>$1</b></FONT> ($2)<BR>"
|
||||||
|
echo "<FONT SIZE=+1><b>$1</b></FONT> <BR>"
|
||||||
|
# If you do not want the header in a bigger font use line below instead
|
||||||
|
# echo "<b>$1</b> ($2)"
|
||||||
|
# If you want the "Paul Luzzi" look uncomment this section and comment
|
||||||
|
# out the above sections:
|
||||||
|
#echo "<P><DIV ALIGN=\"CENTER\"><HR>"
|
||||||
|
#echo "<B>============== $1 ==============</B>"
|
||||||
|
#echo "<B>--- ($2) ---</B>"
|
||||||
|
#echo "<HR></DIV>"
|
||||||
|
#echo "<BLOCKQUOTE>"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
get_footer()
|
||||||
|
{
|
||||||
|
echo ""
|
||||||
|
# If you want the "Paul Luzzi" look uncomment this section and comment
|
||||||
|
# out the above sections:
|
||||||
|
#echo "</BLOCKQUOTE>"
|
||||||
|
}
|
||||||
|
|
||||||
|
#####
|
||||||
|
##### Get Status proc - used to get all responses
|
||||||
|
#####
|
||||||
|
get_status()
|
||||||
|
{
|
||||||
|
#####
|
||||||
|
##### Setup some variables for use later
|
||||||
|
#####
|
||||||
|
COLOR="green"
|
||||||
|
|
||||||
|
# Check defaults have been set
|
||||||
|
if [ "${QSTAT}" = "" ]; then
|
||||||
|
readonly QSTAT=$(command -v qstat)
|
||||||
|
echo ""
|
||||||
|
echo "$YELLOW_PIC QSTAT command is not defined in etc/bbsys.local - using default: ${QSTAT}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "${QHOST}" = "" ]; then
|
||||||
|
readonly QHOST=$(command -v qhost)
|
||||||
|
echo ""
|
||||||
|
echo "$YELLOW_PIC QHOST command is not defined in etc/bbsys.local - using default: ${QHOST}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "${QSELECT}" = "" ]; then
|
||||||
|
readonly QSELECT=$(command -v qselect)
|
||||||
|
echo ""
|
||||||
|
echo "$YELLOW_PIC QSELECT command is not defined in etc/bbsys.local - using default: ${QSELECT}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
###
|
||||||
|
### Check the jobs
|
||||||
|
###
|
||||||
|
get_header "Jobs" "$QSTAT -l hostname=$MACHINEDOTS"
|
||||||
|
jobs=$(${QSTAT} -l hostname="${MACHINEDOTS}" -s r -u \*)
|
||||||
|
if [ -z "$jobs" ]; then
|
||||||
|
echo "No Running Jobs"
|
||||||
|
else
|
||||||
|
${QSTAT} -l hostname="${MACHINEDOTS}" -s r -u \*
|
||||||
|
fi
|
||||||
|
get_footer
|
||||||
|
|
||||||
|
###
|
||||||
|
### Check the host
|
||||||
|
###
|
||||||
|
get_header "Host" "$METAHS -i"
|
||||||
|
${QHOST} -h "${MACHINEDOTS}" | grep -v "global"
|
||||||
|
get_footer
|
||||||
|
|
||||||
|
###
|
||||||
|
### Identify queue memberships
|
||||||
|
###
|
||||||
|
#get_header "Queue Membership" "$QHOST -q"
|
||||||
|
#${QHOST} -h ${MACHINEDOTS} -q | tail +5
|
||||||
|
#get_footer
|
||||||
|
|
||||||
|
###
|
||||||
|
### Check queue instance states
|
||||||
|
###
|
||||||
|
queueTriggered=false;
|
||||||
|
# Queueset=$(${QHOST} -h ${MACHINEDOTS} -q | tail +5 | awk '{ print $1 }')
|
||||||
|
${QHOST} -h "${MACHINEDOTS}" -q | tail +5 > "${PLUGIN_RESULT}.QSTATE"
|
||||||
|
Queueset=$(< "${PLUGIN_RESULT}.QSTATE" awk '{ print $1 }')
|
||||||
|
for Qset in $Queueset; do
|
||||||
|
# qstate=$(${QHOST} -q -h "${MACHINEDOTS}" | tail +5 | grep " $Qset" | awk '{print $4}')
|
||||||
|
qstate=$(grep " $Qset" "${PLUGIN_RESULT}.QSTATE" | awk '{print $4}')
|
||||||
|
|
||||||
|
# Order determines more significant color status
|
||||||
|
if [ "$(echo "${qstate}" | grep -c d)" != "0" ]; then
|
||||||
|
COLOR="clear"
|
||||||
|
queueMsg=$(echo "$queueMsg<BR>$CLEAR_PIC $Qset@$HOST is DISABLED")
|
||||||
|
queueTriggered=true;
|
||||||
|
elif [ "$(echo "${qstate}" | grep -c E)" != "0" ]; then
|
||||||
|
COLOR="red"
|
||||||
|
queueMsg=$(echo "$queueMsg<BR>$RED_PIC $Qset@$HOST is in ERROR!")
|
||||||
|
queueTriggered=true;
|
||||||
|
elif [ "$(echo "${qstate}" | grep -c c)" != "0" ]; then
|
||||||
|
COLOR="yellow"
|
||||||
|
queueMsg=$(echo "$queueMsg<BR>$YELLOW_PIC $Qset@$HOST has an ambigious configuration!")
|
||||||
|
queueTriggered=true;
|
||||||
|
elif [ "$(echo "${qstate}" | grep -c a)" != "0" ] || \
|
||||||
|
[ "$(echo "${qstate}" | grep -c A)" != "0" ]; then
|
||||||
|
queueMsg=$(echo "$queueMsg<BR>$YELLOW_PIC $Qset@$HOST is in ALARM")
|
||||||
|
elif [ "$(echo "${qstate}" | grep -c s)" != "0" ] || \
|
||||||
|
[ "$(echo "${qstate}" | grep -c S)" != "0" ]; then
|
||||||
|
queueMsg=$(echo "$queueMsg<BR>$YELLOW_PIC $Qset@$HOST is SUSPENDED")
|
||||||
|
elif [ "$(echo "${qstate}" | grep -c u)" != "0" ]; then
|
||||||
|
COLOR="yellow"
|
||||||
|
queueMsg=$(echo "$queueMsg<BR>$YELLOW_PIC $Qset@$HOST is UNAVAILABLE!")
|
||||||
|
queueTriggered=true;
|
||||||
|
elif [ "$qstate" = "" ]; then
|
||||||
|
queueMsg=$(echo "$queueMsg<BR>$GREEN_PIC $Qset@$HOST is OK")
|
||||||
|
else
|
||||||
|
queueMsg=$(echo "$queueMsg<BR>$UNKNOWN_PIC $Qset@$HOST is UNKNOWN")
|
||||||
|
queueTriggered=true;
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -f "${PLUGIN_RESULT}.QSTATE" ]; then
|
||||||
|
rm -f -- "${PLUGIN_RESULT}.QSTATE"
|
||||||
|
fi
|
||||||
|
|
||||||
|
get_header "Queue Instance Status Report"
|
||||||
|
echo "$queueMsg"
|
||||||
|
get_footer
|
||||||
|
|
||||||
|
#####
|
||||||
|
##### Make sure to export COLOR so that it gets back to "central"
|
||||||
|
#####
|
||||||
|
export COLOR
|
||||||
|
|
||||||
|
#####
|
||||||
|
##### End of get_status proc
|
||||||
|
#####
|
||||||
|
}
|
||||||
|
|
||||||
|
#####
|
||||||
|
##### Main body
|
||||||
|
#####
|
||||||
|
get_status > "${PLUGIN_RESULT}"
|
||||||
|
|
||||||
|
# NOW USE THE XYMON COMMAND TO SEND THE DATA ACROSS
|
||||||
|
$XYMON "${XYMSRV}" "status ${MACHINE}.${TEST} ${COLOR} $($DATE) $(cat ${PLUGIN_RESULT})"
|
||||||
|
#For testing only
|
||||||
|
# echo $XYMON "${XYMSRV}" "status ${MACHINE}.${TEST} ${COLOR} $($DATE) $(cat ${PLUGIN_RESULT})" > /tmp/sgetmp
|
||||||
|
|
||||||
|
# Clean up our mess
|
||||||
|
# Checking for existence of each file since the whole test may be optional
|
||||||
|
# and may not actually run on every client
|
||||||
|
#
|
||||||
|
if [ -f "${PLUGIN_RESULT}" ]; then
|
||||||
|
rm -f -- "${PLUGIN_RESULT}"
|
||||||
|
fi
|
||||||
|
##############################################
|
||||||
|
# end of script
|
||||||
|
##############################################
|
Loading…
Reference in New Issue