295 lines
11 KiB
Bash
Executable File
295 lines
11 KiB
Bash
Executable File
#!/bin/sh
|
||
# .. vim: foldmarker=[[[,]]]:foldmethod=marker
|
||
|
||
# NOTE: Must be run as root, so you probably need to setup sudo for this.
|
||
|
||
# This script is mostly intend to be used with Xymon and rather for devices unknown to the smartmontools base.
|
||
# Based on xymon.com's script : https://www.xymon.com/xymon-cgi/viewconf.sh?smart
|
||
# The script will scan all devices compatible with SMART and for each disk, it will : [[[
|
||
# * try to guess the expected TYPE (even megaraid,…).
|
||
# * display health status.
|
||
# * set a "clear" state for incompatible device.
|
||
# * display last selftests.
|
||
# * set a "error" state if no selftest is recorded.
|
||
# * display basic informations.
|
||
# * recommend a more advanced SMART script if the disk is known of smartmontools's database (drivedb.h) or redirect to smartmontools's FAQ if not.
|
||
# ]]]
|
||
# Things the script CAN'T do : [[[
|
||
# * ensure a recent selftest was run.
|
||
# * compare current value with vendor's one (for failure prediction or error).
|
||
# * give detail about errors.
|
||
# * Take a look to this more advance script for such features : https://github.com/skazi0/xymon-plugins/blob/master/client/ext/smart
|
||
# ]]]
|
||
|
||
# Vars [[[
|
||
debug="1"
|
||
|
||
## Colors [[[
|
||
c_redb='\033[1;31m'
|
||
c_magentab='\033[1;35m'
|
||
c_reset='\033[0m'
|
||
## ]]]
|
||
|
||
plugin_name=$(basename "${0}")
|
||
|
||
plugin_result="${XYMONTMP}/${MACHINEDOTS}.smartoverall.plugin_result"
|
||
plugin_state="${XYMONTMP}/${MACHINEDOTS}.smartoverall.plugin_state"
|
||
device_list="${XYMONTMP}/${MACHINEDOTS}.smartoverall.dscan"
|
||
## List of devices known from the smartmontools base and compatible with test logging
|
||
## This file might be used by a more advanced script such as skazi0's one
|
||
drivedb_list="${XYMONTMP}/${MACHINEDOTS}.smart.drivedb.list"
|
||
|
||
# By default, don't empty files newer than 10hours (600 minutes)
|
||
default_mtime_minutes="600"
|
||
# ]]]
|
||
|
||
# Functions
|
||
## Create or empty a file if it's too old [[[
|
||
## First argument (required): Absolut path to the file
|
||
## Second argument (optionnal): Maximum number of minutes since last modification
|
||
regenerate_if_too_old() {
|
||
## Set variables according to the number of passed arguments [[[
|
||
case $# in
|
||
0 )
|
||
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : regenerate_if_too_old func − Need at least 1 argument."
|
||
exit 1
|
||
;;
|
||
1 )
|
||
_file="${1}"
|
||
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : regenerate_if_too_old func − Use default_mtime_minutes value: ${default_mtime_minutes}."
|
||
_max_mtime_minutes="${default_mtime_minutes}"
|
||
;;
|
||
2 )
|
||
_file="${1}"
|
||
_max_mtime_minutes="${2}"
|
||
;;
|
||
esac
|
||
## ]]]
|
||
_current_timestamp=$(date "+%s")
|
||
_file_mtime_timestamp=$(stat --format="%Y" -- "${_file}")
|
||
|
||
## Substract last modification timestamp of the file to current timestamp
|
||
: $(( _file_mtime_seconds=_current_timestamp-_file_mtime_timestamp ))
|
||
## Get maximum allowed mtime in seconds
|
||
: $(( _max_mtime_seconds=_max_mtime_minutes*60 ))
|
||
|
||
## Compare last modification mtime with the maximum allowed
|
||
if [ "${_file_mtime_seconds}" -gt "${_max_mtime_seconds}" ]; then
|
||
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : regenerate_if_too_old func − Need to empty or create ${_file} last modification happened ${_file_mtime_seconds} seconds ago (maximum is ${_max_mtime_seconds})."
|
||
true > "${_file}"
|
||
else
|
||
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : regenerate_if_too_old func − Don't need to empty ${_file} last modification happened ${_file_mtime_seconds} seconds ago (maximum is ${_max_mtime_seconds})."
|
||
fi
|
||
|
||
}
|
||
## ]]]
|
||
## Test if a disk really support SMART [[[
|
||
## Smartctl can give an health status even without a full support
|
||
## of SMART for some type (eg. scsi or megaraid).
|
||
## Exemple : SMART support is: Unavailable - device lacks SMART capability.
|
||
is_disk_support_smart() {
|
||
_disk="${1}"
|
||
_type="${2}"
|
||
|
||
_smarctl_support_result="${XYMONTMP}/${MACHINEDOTS}.smartoverall.support.$(basename "${_disk}")"
|
||
|
||
smart_support_msg=""
|
||
|
||
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : is_disk_support_smart func − check if SMART is supported on : ${_disk}."
|
||
|
||
## Create or empty previous file only if older than 24h (1440 minutes)
|
||
regenerate_if_too_old "${_smarctl_support_result}" 1440
|
||
|
||
## Grep only "support" lines from disk's informations only if the file was emptied
|
||
if test ! -s "${_smarctl_support_result}"; then
|
||
smartctl -d "${_type}" -i -- "${_disk}" | grep -E "^SMART support is:" -- >> "${_smarctl_support_result}"
|
||
fi
|
||
|
||
## If the file is not empty
|
||
if test -s "${_smarctl_support_result}"; then
|
||
## Parse all "support" lines
|
||
while IFS= read -r _LINE; do
|
||
if ! printf -- '%s' "${_LINE}" | grep -q -E -- "(Enabled|Available)"
|
||
then
|
||
smart_support_msg="${_LINE}"
|
||
fi
|
||
done < "${_smarctl_support_result}"
|
||
else
|
||
smart_support_msg="smartctl was not able to open ${_disk} DEVICE with ${_type} TYPE."
|
||
fi
|
||
|
||
if [ -z "${smart_support_msg}" ]; then
|
||
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : is_disk_support_smart func − SMART seems fully supported on : ${_disk} with ${_type} type."
|
||
else
|
||
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : is_disk_support_smart func − SMART is not fully supported on : ${_disk} with ${_type} type. See smartctl informations :\n${smart_support_msg}"
|
||
fi
|
||
|
||
## Clean temp files
|
||
### As the Xymon's tmpdir is used to store log files, no need to delete them at
|
||
### the end of the script. They will be emptied, reused or regenerate (if oldest
|
||
### than the expected interval) at the next run.
|
||
|
||
}
|
||
## ]]]
|
||
## Test the type of disk with smartctl [[[
|
||
## Cause the scanned one might not be the one to use
|
||
choose_correct_type() {
|
||
_disk="${1}"
|
||
_scanned_type="${2}"
|
||
_default_type="auto"
|
||
|
||
TYPE=""
|
||
SMART_SUPPORT_MSG=""
|
||
|
||
for test_type in "${_default_type}" "${_scanned_type}"; do
|
||
is_disk_support_smart "${_disk}" "${test_type}"
|
||
|
||
## If no message, the type is correct
|
||
if [ -z "${smart_support_msg}" ]; then
|
||
TYPE="${test_type}"
|
||
SMART_SUPPORT_MSG=""
|
||
return
|
||
else
|
||
SMART_SUPPORT_MSG="${smart_support_msg}"
|
||
fi
|
||
|
||
done
|
||
|
||
}
|
||
## ]]]
|
||
|
||
# Create or empty previous files
|
||
true > "${plugin_result}"
|
||
true > "${plugin_state}"
|
||
## Create or empty previous file only if older than 24h (1440 minutes)
|
||
regenerate_if_too_old "${device_list}" 1440
|
||
regenerate_if_too_old "${drivedb_list}" 1440
|
||
|
||
# Get the list of all available devices if the previous list was emptied
|
||
if test ! -s "${device_list}"; then
|
||
smartctl --scan >> "${device_list}"
|
||
fi
|
||
|
||
# If the file is not empty
|
||
if test -s "${device_list}"; then
|
||
while IFS= read -r LINE; do
|
||
## Get device path
|
||
DISK=$(echo "${LINE}" | cut -d" " -f1)
|
||
## Try to determine the best type
|
||
SCANNED_TYPE=$(echo "${LINE}" | cut -d" " -f3)
|
||
choose_correct_type "${DISK}" "${SCANNED_TYPE}"
|
||
|
||
## If no correct type was found for this device
|
||
if [ -z "${TYPE}" ]; then
|
||
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : SMART is not fully supported."
|
||
DRES=$(printf '%s' "SMART Health Status can't be determine because of:\n${SMART_SUPPORT_MSG}")
|
||
DCODE="2"
|
||
TYPE="unsupported"
|
||
### Still try to display informations about unsupported device (eg. RAID controller,…)
|
||
DID="unsupported-${DISK}"
|
||
DINFO=$(smartctl -i -d "${SCANNED_TYPE}" "${DISK}" | grep -v -E "^smartctl|^Copyright|^$" || printf '%s' "Can't get informations due to no SMART support.")
|
||
DDRIVEDB_MSG=""
|
||
DSELFTEST=""
|
||
else
|
||
[ "${debug}" -eq "0" ] && printf "${c_magentab}%-6b${c_reset}\n" "DEBUG : SMART seems fully supported, proceed normally."
|
||
### Get SMART Health Status and return code
|
||
DRES=$(/usr/sbin/smartctl -H -d "${TYPE}" -n standby "${DISK}")
|
||
DCODE=$?
|
||
### Get disk's serial number and informations
|
||
DID=$(smartctl -i -d "${TYPE}" "${DISK}" | awk '/.erial .umber:/ { print $NF }')
|
||
DINFO=$(smartctl -i -d "${TYPE}" "${DISK}" | grep -v -E "^smartctl|^Copyright|^$")
|
||
|
||
## If the model of the disk is known from smartmontools database
|
||
if smartctl -d "${TYPE}" -P show "${DISK}" | grep -qi -- "drive found in"; then
|
||
DDRIVEDB_MSG="&green Device is known in smartmontools database. You might consider using a more advanced plugin such as:
|
||
https://github.com/skazi0/xymon-plugins/blob/master/client/ext/smart"
|
||
else
|
||
DDRIVEDB_MSG="&clear Device is unknown or not complete in smartmontools database. Please take a look to the FAQ:
|
||
https://www.smartmontools.org/wiki/FAQ#SmartmontoolsDatabase"
|
||
fi
|
||
|
||
DSELFTEST=$(smartctl -d "${TYPE}" -l selftest "${DISK}" | grep -v -E -- "^smartctl|^Copyright|^$")
|
||
## If no selftest have been recorded
|
||
if smartctl -d "${TYPE}" -l selftest "${DISK}" | grep -qi -- "No self-tests"; then
|
||
DSELFTEST_MSG="&red No self-tests recorded:"
|
||
DCODE="8"
|
||
## If the device doesn't support test logging
|
||
elif smartctl -d "${TYPE}" -l selftest "${DISK}" | grep -qEi -- "does not support.*logging"; then
|
||
DSELFTEST_MSG="&clear Test logging are not supported:"
|
||
else
|
||
DSELFTEST_MSG=""
|
||
### If the device is also known from smartmontools database
|
||
### and not already present in the list of compatible disk
|
||
if printf -- '%s' "${DDRIVEDB_MSG}" | grep -q -E -- "green" &&
|
||
! grep -q -- "${DISK}" "${drivedb_list}"
|
||
then
|
||
echo "${DISK}" >> "${drivedb_list}"
|
||
fi
|
||
fi
|
||
fi
|
||
|
||
## Test health status
|
||
DSTBY=$(( DCODE & 2 ))
|
||
DFAIL=$(( DCODE & 8 ))
|
||
DWARN=$(( DCODE & 32 ))
|
||
|
||
## According to health, give a weight to each color to easily get the page status
|
||
if test $DSTBY -ne 0
|
||
then
|
||
COLOR="4&clear"
|
||
elif test $DFAIL -ne 0
|
||
then
|
||
COLOR="1&red"
|
||
elif test $DWARN -ne 0
|
||
then
|
||
COLOR="2&yellow"
|
||
else
|
||
COLOR="3&green"
|
||
fi
|
||
|
||
## Avoid duplicate device
|
||
if ! grep -q "${DID}" "${plugin_result}"; then
|
||
## For summary
|
||
echo "${COLOR} $DISK ${TYPE}"
|
||
|
||
## For detailed informations
|
||
{
|
||
echo "${COLOR} $DISK ${TYPE}" | cut -c2-
|
||
echo ""
|
||
echo "$DRES" | grep -v -E "^smartctl|^Copyright|^$|^==="
|
||
echo "${DDRIVEDB_MSG}"
|
||
echo "${DINFO}"
|
||
echo "${DSELFTEST_MSG}"
|
||
echo "${DSELFTEST}" | head -n6
|
||
echo "------------------------------------------------------------"
|
||
echo ""
|
||
echo ""
|
||
} >> "${plugin_result}"
|
||
fi
|
||
done < "${device_list}" >> "${plugin_state}"
|
||
|
||
# If the file is empty
|
||
else
|
||
echo "1&red Error while scanning devices with smartctl" >> "${plugin_state}"
|
||
fi
|
||
|
||
# Set the global color according to the highest alert
|
||
COLOR=$(< "${plugin_state}" awk '{print $1}' | sort | uniq | head -1 | cut -c3-)
|
||
|
||
# Send informations to Xymon server
|
||
$XYMON "${XYMSRV}" "status ${MACHINE}.${plugin_name} ${COLOR} SMART health check
|
||
|
||
$(< "${plugin_state}" cut -c2-)
|
||
|
||
==================== Detailed status ====================
|
||
|
||
$(cat "${plugin_result}")
|
||
"
|
||
|
||
## Clean temp files
|
||
### As the Xymon's tmpdir is used to store log files, no need to delete them at
|
||
### the end of the script. They will be emptied, reused or regenerate (if oldest
|
||
### than the expected interval) at the next run.
|
||
|
||
exit 0
|