203 lines
5.9 KiB
Perl
Executable File
203 lines
5.9 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
# $Id: sensors 70 2011-11-25 09:21:18Z skazi $
|
|
# Author: Jacek Tomasiak <jacek.tomasiak@gmail.com>
|
|
# https://github.com/skazi0/xymon-plugins/blob/master/client/ext/smart
|
|
|
|
use strict;
|
|
# add script's directory to module search path for Hobbit.pm on non-debian systems
|
|
use FindBin;
|
|
use lib $FindBin::Bin;
|
|
|
|
use Hobbit;
|
|
use Data::Dumper;
|
|
|
|
my $bb = new Hobbit('smart');
|
|
|
|
my $temp_disk_list = "$ENV{'XYMONTMP'}/$ENV{'MACHINEDOTS'}.smart.drivedb.list";
|
|
my @disks = ();
|
|
my %olderr = {};
|
|
|
|
my $CACHETIME = 10; # minutes
|
|
my $CACHEFILE = "$ENV{'XYMONTMP'}/$ENV{'MACHINEDOTS'}.smart.cache";
|
|
|
|
&load_config("$ENV{'XYMONTMP'}/logfetch.$ENV{'MACHINEDOTS'}.cfg");
|
|
|
|
my @disks_stat = stat($temp_disk_list);
|
|
my $disks_mtime = scalar @disks_stat ? $disks_stat[9] : 0;
|
|
#
|
|
# Regenerate disks list if the file is too old (600 minutes)
|
|
if (time() - $disks_mtime > 600)
|
|
{
|
|
unlink $temp_disk_list;
|
|
}
|
|
|
|
if (-e $temp_disk_list) {
|
|
# Should use the existing file
|
|
}
|
|
else {
|
|
# Create a file with the list of disks
|
|
system("ls -1 /dev/sd* | grep -vE '[0-9]' > $temp_disk_list") == 0
|
|
or die "system command to create $temp_disk_list failed: $?";
|
|
}
|
|
|
|
# fallback to disk detection if nothing defined in the config
|
|
unless (@disks) {
|
|
## Put temp_disk_list content to disks array
|
|
open(my $fh, '<:encoding(UTF-8)', $temp_disk_list)
|
|
or die "Could not open file '$temp_disk_list' $!";
|
|
while (my $row = <$fh>) {
|
|
chomp $row;
|
|
push(@disks, "$row");
|
|
}
|
|
}
|
|
|
|
my @stat = stat($CACHEFILE);
|
|
my $mtime = scalar @stat ? $stat[9] : 0;
|
|
# regenerate sensors cache if outdated
|
|
if (time() - $mtime > $CACHETIME * 60)
|
|
{
|
|
open(OUT, ">$CACHEFILE") or die "cannot open $CACHEFILE";
|
|
|
|
foreach my $name (@disks)
|
|
{
|
|
print OUT ('=' x 20) . " $name " . ('=' x 20) . "\n";
|
|
my @output = `sudo smartctl -AHi -l error -l selftest $name 2>&1` or die;
|
|
my $ncv = '';
|
|
my $newerr = 1;
|
|
my $ponhours = undef;
|
|
my $lasttest = undef;
|
|
foreach my $line (@output)
|
|
{
|
|
# skip header
|
|
next if ($line =~ /smartctl|Copyright|Home page|===/);
|
|
|
|
if ($line =~ /.*overall-health.*:\s*(.*)/)
|
|
{
|
|
my $lstatus = ($1 eq 'PASSED') ? 'green' : 'red';
|
|
print OUT "&$lstatus $line";
|
|
}
|
|
elsif ($line =~ /^\s*(\d+)\s+(\S+)\s+\S+\s+(\d+)\s+(\d+)\s+(\d+)\s+\S+\s+\S+\s+(\S+)\s+(.*)$/)
|
|
{
|
|
my ($aname, $value, $worst, $thresh, $failure, $raw) = ($2, $3, $4, $5, $6, $7);
|
|
my $lstatus = 'green';
|
|
if ($aname =~ /Current_Pending_Sector|Offline_Uncorrectable/ and int($raw) > 0)
|
|
{
|
|
$lstatus = 'yellow';
|
|
}
|
|
elsif ($aname =~ /Power_On_Hours/)
|
|
{
|
|
$ponhours = $raw;
|
|
}
|
|
if ($failure =~ /FAIL/) {
|
|
$lstatus = 'red';
|
|
}
|
|
|
|
print OUT "&$lstatus $line";
|
|
|
|
$ncv .= "$name-$aname-value : $value\n";
|
|
$ncv .= "$name-$aname-worst : $worst\n";
|
|
$ncv .= "$name-$aname-thresh : $thresh\n";
|
|
$ncv .= "$name-$aname-raw : $raw\n";
|
|
}
|
|
elsif ($line =~ /^\s*No Errors Logged\s*$/)
|
|
{
|
|
$newerr = 0;
|
|
print OUT "&green $line";
|
|
}
|
|
elsif ($line =~ /Error Count:\s*(\d+)/)
|
|
{
|
|
$newerr = $1 - $olderr{$name};
|
|
my $lstatus = $newerr > 0 ? 'red' : 'green';
|
|
print OUT "&$lstatus $line"
|
|
}
|
|
elsif ($line =~ /^\s*Error \d+ occurred/)
|
|
{
|
|
my $lstatus = $newerr > 0 ? 'red' : 'green';
|
|
print OUT "&$lstatus $line"
|
|
}
|
|
elsif ($line =~ /^\s*#\s*\d+\s+(Conveyance offline|Extended offline|Short offline|Extended captive)\s+(.*)\s+\d+%\s+(\d+)/)
|
|
{
|
|
my $status = $2;
|
|
my $lifetime = $3;
|
|
my $lstatus = 'red';
|
|
$lasttest = $lifetime if (!defined($lasttest));
|
|
$lstatus = 'yellow' if ($status =~ /Aborted by host|Interrupted \(host reset\)/);
|
|
$lstatus = 'green' if ($status =~ /Completed without error|Self-test routine in progress|Interrupted \(host reset\)/);
|
|
print OUT "&$lstatus $line";
|
|
}
|
|
else
|
|
{
|
|
print OUT " $line";
|
|
}
|
|
}
|
|
# test status footer
|
|
my $lasttestage = $ponhours % 65536 - $lasttest;
|
|
my $lasttestmsg = "$lasttestage hours ago";
|
|
my $lasttestcolor = 'green';
|
|
if (!defined($lasttest))
|
|
{
|
|
$lasttestcolor = 'yellow';
|
|
$lasttestmsg = 'no test performed';
|
|
}
|
|
elsif ($lasttestage > 24 * 7)
|
|
{
|
|
$lasttestcolor = 'red';
|
|
}
|
|
elsif ($lasttestage > 24 * 2)
|
|
{
|
|
$lasttestcolor = 'yellow';
|
|
}
|
|
print OUT "&$lasttestcolor Last Self-test: $lasttestmsg\n";
|
|
|
|
# hidden output for ncv
|
|
print OUT "<!--\n$ncv\n-->\n";
|
|
}
|
|
|
|
close OUT;
|
|
}
|
|
|
|
# send cached content
|
|
{
|
|
open IN, $CACHEFILE or die "cannot open $CACHEFILE";
|
|
while (my $line = <IN>)
|
|
{
|
|
if ($line =~ /^\s*&(\S+)/)
|
|
{
|
|
$bb->color_print($1, $line);
|
|
}
|
|
else
|
|
{
|
|
$bb->print($line);
|
|
}
|
|
}
|
|
close IN;
|
|
}
|
|
|
|
|
|
$bb->send;
|
|
|
|
sub load_config
|
|
{
|
|
my $path = shift;
|
|
|
|
open C, "<$path" or return;
|
|
# print "loading config from $path\n";
|
|
while (my $line = <C>)
|
|
{
|
|
next if ($line =~ /^\s*#/);
|
|
if ($line =~ /DISKS\s*=\s*['"](.*?)["']/)
|
|
{
|
|
@disks = split(/\s+/, $1);
|
|
}
|
|
if ($line =~ /SMARTOLDERROR\[([\w\/]+)\]\s+(\d+)/)
|
|
{
|
|
$olderr{$1} = $2;
|
|
}
|
|
if ($line =~ /SMARTCACHETIME=(\d+)/)
|
|
{
|
|
$CACHETIME = $1;
|
|
}
|
|
}
|
|
close C;
|
|
}
|