diff --git a/CHANGELOG.md b/CHANGELOG.md index 64e6453..72f79ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ -## v1.x.y +## v1.5.0 ### Enhancements * Possibility to define URLs in order to get the latest version of SMART's scripts. +* Add sge plugin from https://git.ipr.univ-rennes1.fr/cellinfo/scripts/src/master/xymon/plugins/client/ext/sge.sh ## v1.4.0 diff --git a/README.md b/README.md index 45795b9..0929b6a 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,20 @@ network interface states. * **xymon_cli__plug_netstats_tpl** : Template used to generate the previous config file [default : `etc/xymon/clientlaunch.d/netstats.cfg.j2`]. * **xymon_cli__plug_netstats_interval** : Time between each run of the `netstats` plugin [default : `5m`]. +#### SGE + +Variables for sge plugin from [ipr-cnrs.scripts][sge plugin source]. +The plugin check health status for SGE queues and display informations about +SGE jobs and host. + +* **xymon_cli__plug_sge_state** : The state of plugin `sge` [default : `False`]. +* **xymon_cli__plug_sge_script_path** : Path to the `sge` script [default : `'/usr/lib/xymon/client/ext/sge'`]. +* **xymon_cli__plug_sge_script_tpl** : Template used to generate the previous script [default : `'usr/lib/xymon/client/ext/sge.j2'`]. +* **xymon_cli__plug_sge_script_url** : Use a remote file to get the previous script instead of a template [default : `''`]. +* **xymon_cli__plug_sge_path** : Configuration file for the `sge` plugin [default : `'/etc/xymon/clientlaunch.d/sge.cfg'`]. +* **xymon_cli__plug_sge_tpl** : Template used to generate the previous config file [default : `'etc/xymon/clientlaunch.d/sge.cfg.j2'`]. +* **xymon_cli__plug_sge_interval** : Time between each run of the `sge` plugin [default : `'10m'`] + #### Smartoverall Variables for Smartoverall plugin from [ipr-cnrs.scripts][smartoverall plugin source]. @@ -194,6 +208,7 @@ Jérémy Gardais [wtfpl website]: http://www.wtfpl.net/about/ [ipr website]: https://ipr.univ-rennes1.fr/ +[sge plugin source]: https://git.ipr.univ-rennes1.fr/cellinfo/scripts/src/master/xymon/plugins/client/ext/sge.sh [smartoverall plugin source]: https://git.ipr.univ-rennes1.fr/cellinfo/scripts/src/master/xymon/plugins/client/ext/smartoverall [smart plugin source]: https://github.com/skazi0/xymon-plugins [zfs plugin source]: https://wiki.xymonton.org/doku.php/monitors:bb-zfs diff --git a/defaults/main.yml b/defaults/main.yml index 3f052df..4bf05e5 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -112,6 +112,17 @@ xymon_cli__plug_netstats_path: '/etc/xymon/clientlaunch.d/netstats.cfg' xymon_cli__plug_netstats_tpl: 'etc/xymon/clientlaunch.d/netstats.cfg.j2' xymon_cli__plug_netstats_interval: '5m' ## ]]] +## Plugin SGE [[[ +### From https://git.ipr.univ-rennes1.fr/cellinfo/scripts/src/master/xymon/plugins/client/ext/sge.sh +xymon_cli__plug_sge_state: False +xymon_cli__plug_sge_script_path: '/usr/lib/xymon/client/ext/sge.sh' +xymon_cli__plug_sge_script_tpl: 'usr/lib/xymon/client/ext/sge.sh.j2' +xymon_cli__plug_sge_script_url: '' +xymon_cli__plug_sge_path: '/etc/xymon/clientlaunch.d/sge.cfg' +xymon_cli__plug_sge_tpl: 'etc/xymon/clientlaunch.d/sge.cfg.j2' +xymon_cli__plug_sge_interval: '5m' + + # ]]] ## Plugin smartoverall [[[ ### From https://git.ipr.univ-rennes1.fr/cellinfo/scripts/src/master/xymon/plugins/client/ext/smartoverall ### And based on https://www.xymon.com/xymon-cgi/viewconf.sh?smart diff --git a/tasks/main.yml b/tasks/main.yml index 4b48186..3319be4 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -197,6 +197,42 @@ xymon_plug_manage|bool) notify: restart xymon-client service +# Manage sge plugin [[[1 +- name: PLUGIN sge config + template: + src: '{{ xymon_cli__plug_sge_tpl }}' + dest: '{{ xymon_cli__plug_sge_path }}' + owner: root + group: root + mode: 0644 + when: (xymon_cli_manage|bool and + xymon_plug_manage|bool) + notify: restart xymon-client service + +- name: PLUGIN sge script file from template + template: + src: '{{ xymon_cli__plug_sge_script_tpl }}' + dest: '{{ xymon_cli__plug_sge_script_path }}' + owner: root + group: xymon + mode: 0755 + when: (xymon_cli_manage|bool and + xymon_plug_manage|bool and + xymon_cli__plug_sge_script_url|length == 0 ) + notify: restart xymon-client service + +- name: PLUGIN sge script file from URL + get_url: + url: '{{ xymon_cli__plug_sge_script_url }}' + dest: '{{ xymon_cli__plug_sge_script_path }}' + owner: root + group: xymon + mode: 0755 + when: (xymon_cli_manage|bool and + xymon_plug_manage|bool and + xymon_cli__plug_sge_script_url|length > 0 ) + notify: restart xymon-client service + # Manage smartoverall plugin [[[1 - name: PLUGIN smartoverall packages package: @@ -295,7 +331,6 @@ xymon_cli__plug_smart_script_url|length > 0 ) notify: restart xymon-client service - # Manage zfs plugin [[[1 - name: PLUGIN zfs config file template: diff --git a/templates/etc/xymon/clientlaunch.d/sge.cfg.j2 b/templates/etc/xymon/clientlaunch.d/sge.cfg.j2 new file mode 100644 index 0000000..7dd0393 --- /dev/null +++ b/templates/etc/xymon/clientlaunch.d/sge.cfg.j2 @@ -0,0 +1,8 @@ +[sge] + # {{ ansible_managed }} + ## From ipr-cnrs.xymon role + {{ '#DISABLED' if xymon_cli__plug_sge_state else 'DISABLED' }} + ENVFILE /etc/xymon/xymonclient.cfg + CMD {{ xymon_cli__plug_sge_script_path }} + LOGFILE /var/log/xymon/sge.log + INTERVAL {{ xymon_cli__plug_sge_interval }} diff --git a/templates/usr/lib/xymon/client/ext/sge.sh.j2 b/templates/usr/lib/xymon/client/ext/sge.sh.j2 new file mode 100755 index 0000000..ed46c05 --- /dev/null +++ b/templates/usr/lib/xymon/client/ext/sge.sh.j2 @@ -0,0 +1,252 @@ +#!/bin/sh + +# {{ ansible_managed }} +# From ipr-cnrs.xymon role +# https://git.ipr.univ-rennes1.fr/cellinfo/scripts/src/master/xymon/plugins/client/ext/sge.sh +# +# SGE: Sun Grid Engine check - Xymon external script test +# +##### Purpose is to report back to a central server, all Sun +##### Grid Engine software faults. +##### +# +# version 0.4 +# +# BIG BROTHER / XXXXXXXXXXXXXXXX status +# +# Written by Butch Deal +# Daniel Gomez +# Jérémy Gardais +# +# v0.4 09/06/20 clean, correction,… for Xymon 4.3.28 +# v0.3e 10/14/08 cut down on the number of qhost runs +# v0.3d 03/31/06 added alarm/suspend state identification +# v0.3c 03/01/06 propogated yellow state upon UNAVAILABLE queue intances +# v0.3b 01/31/06 fixed yellow warning queue status for ambigious config test +# v0.3a 01/31/06 added unknown queue status and ambigious config test +# v0.3 01/26/06 fixed status reporting and optimized job status +# v0.2 08/03/05 flag disabled queues as clear +# v0.1 07/28/05 authored + +######################################## +# NOTE +# The version v0.4 has only been tested with Xymon (server and client) 4.2.x. +# +# The color status with respects to queue status is arbitrary and should be +# reviewed for your particular environment. +# +# Tested on : +# Solaris & Linux +# Linux only (for v0.4) +######################################## + +######################################## +# INSTALLATION +# step 1 - copy to Xymon client's ext dir +# step 2 - New clientlaunch.d/sge.cfg file +# step 3 - restart Xymon client +# +# NOTE - the TEST variable in the configuration section, this is the name used +# as the column header. +######################################## + +################################## +# CONFIGURE IT HERE +################################## +readonly PLUGIN_NAME=$(basename "${0}") + +readonly TEST="sge" +readonly PLUGIN_RESULT="${XYMONTMP}/${MACHINEDOTS}.${TEST}.plugin_result" + +readonly QSTAT=$(command -v qstat) +readonly QHOST=$(command -v qhost) +readonly QSELECT=$(command -v qselect) +export QSTAT QHOST QSELECT + +# define colours for graphics +# Comment these out if using older BB versions +CLEAR_PIC="&clear" +RED_PIC="&red" +YELLOW_PIC="&yellow" +GREEN_PIC="&green" +UNKNOWN_PIC="&purple" + +################################## +# Start of script +################################## + +get_header() +{ + echo "" + #echo "$1 ($2)
" + echo "$1
" + # If you do not want the header in a bigger font use line below instead + #echo "$1 ($2)" + # If you want the "Paul Luzzi" look uncomment this section and comment + # out the above sections: + #echo "


" + #echo "============== $1 ==============" + #echo "--- ($2) ---" + #echo "
" + #echo "
" +} +get_header_small() +{ + echo "" + #echo "$1 ($2)
" + echo "$1
" + # If you do not want the header in a bigger font use line below instead + # echo "$1 ($2)" + # If you want the "Paul Luzzi" look uncomment this section and comment + # out the above sections: + #echo "


" + #echo "============== $1 ==============" + #echo "--- ($2) ---" + #echo "
" + #echo "
" +} + + +get_footer() +{ + echo "" + # If you want the "Paul Luzzi" look uncomment this section and comment + # out the above sections: + #echo "
" +} + +##### +##### Get Status proc - used to get all responses +##### +get_status() +{ + ##### + ##### Setup some variables for use later + ##### + COLOR="green" + + # Check defaults have been set + if [ "${QSTAT}" = "" ]; then + readonly QSTAT=$(command -v qstat) + echo "" + echo "$YELLOW_PIC QSTAT command is not defined in etc/bbsys.local - using default: ${QSTAT}" + fi + + if [ "${QHOST}" = "" ]; then + readonly QHOST=$(command -v qhost) + echo "" + echo "$YELLOW_PIC QHOST command is not defined in etc/bbsys.local - using default: ${QHOST}" + fi + + if [ "${QSELECT}" = "" ]; then + readonly QSELECT=$(command -v qselect) + echo "" + echo "$YELLOW_PIC QSELECT command is not defined in etc/bbsys.local - using default: ${QSELECT}" + fi + + ### + ### Check the jobs + ### + get_header "Jobs" "$QSTAT -l hostname=$MACHINEDOTS" + jobs=$(${QSTAT} -l hostname="${MACHINEDOTS}" -s r -u \*) + if [ -z "$jobs" ]; then + echo "No Running Jobs" + else + ${QSTAT} -l hostname="${MACHINEDOTS}" -s r -u \* + fi + get_footer + + ### + ### Check the host + ### + get_header "Host" "$METAHS -i" + ${QHOST} -h "${MACHINEDOTS}" | grep -v "global" + get_footer + + ### + ### Identify queue memberships + ### + #get_header "Queue Membership" "$QHOST -q" + #${QHOST} -h ${MACHINEDOTS} -q | tail +5 + #get_footer + + ### + ### Check queue instance states + ### + queueTriggered=false; + # Queueset=$(${QHOST} -h ${MACHINEDOTS} -q | tail +5 | awk '{ print $1 }') + ${QHOST} -h "${MACHINEDOTS}" -q | tail +5 > "${PLUGIN_RESULT}.QSTATE" + Queueset=$(< "${PLUGIN_RESULT}.QSTATE" awk '{ print $1 }') + for Qset in $Queueset; do + # qstate=$(${QHOST} -q -h "${MACHINEDOTS}" | tail +5 | grep " $Qset" | awk '{print $4}') + qstate=$(grep " $Qset" "${PLUGIN_RESULT}.QSTATE" | awk '{print $4}') + + # Order determines more significant color status + if [ "$(echo "${qstate}" | grep -c d)" != "0" ]; then + COLOR="clear" + queueMsg=$(echo "$queueMsg
$CLEAR_PIC $Qset@$HOST is DISABLED") + queueTriggered=true; + elif [ "$(echo "${qstate}" | grep -c E)" != "0" ]; then + COLOR="red" + queueMsg=$(echo "$queueMsg
$RED_PIC $Qset@$HOST is in ERROR!") + queueTriggered=true; + elif [ "$(echo "${qstate}" | grep -c c)" != "0" ]; then + COLOR="yellow" + queueMsg=$(echo "$queueMsg
$YELLOW_PIC $Qset@$HOST has an ambigious configuration!") + queueTriggered=true; + elif [ "$(echo "${qstate}" | grep -c a)" != "0" ] || \ + [ "$(echo "${qstate}" | grep -c A)" != "0" ]; then + queueMsg=$(echo "$queueMsg
$YELLOW_PIC $Qset@$HOST is in ALARM") + elif [ "$(echo "${qstate}" | grep -c s)" != "0" ] || \ + [ "$(echo "${qstate}" | grep -c S)" != "0" ]; then + queueMsg=$(echo "$queueMsg
$YELLOW_PIC $Qset@$HOST is SUSPENDED") + elif [ "$(echo "${qstate}" | grep -c u)" != "0" ]; then + COLOR="yellow" + queueMsg=$(echo "$queueMsg
$YELLOW_PIC $Qset@$HOST is UNAVAILABLE!") + queueTriggered=true; + elif [ "$qstate" = "" ]; then + queueMsg=$(echo "$queueMsg
$GREEN_PIC $Qset@$HOST is OK") + else + queueMsg=$(echo "$queueMsg
$UNKNOWN_PIC $Qset@$HOST is UNKNOWN") + queueTriggered=true; + fi + done + + if [ -f "${PLUGIN_RESULT}.QSTATE" ]; then + rm -f -- "${PLUGIN_RESULT}.QSTATE" + fi + + get_header "Queue Instance Status Report" + echo "$queueMsg" + get_footer + + ##### + ##### Make sure to export COLOR so that it gets back to "central" + ##### + export COLOR + +##### +##### End of get_status proc +##### +} + +##### +##### Main body +##### +get_status > "${PLUGIN_RESULT}" + + # NOW USE THE XYMON COMMAND TO SEND THE DATA ACROSS + $XYMON "${XYMSRV}" "status ${MACHINE}.${TEST} ${COLOR} $($DATE) $(cat ${PLUGIN_RESULT})" + #For testing only + # echo $XYMON "${XYMSRV}" "status ${MACHINE}.${TEST} ${COLOR} $($DATE) $(cat ${PLUGIN_RESULT})" > /tmp/sgetmp + +# Clean up our mess +# Checking for existence of each file since the whole test may be optional +# and may not actually run on every client +# +if [ -f "${PLUGIN_RESULT}" ]; then + rm -f -- "${PLUGIN_RESULT}" +fi +############################################## +# end of script +##############################################