source: trunk/noc/nagios/cnagios.pl @ 2014

Last change on this file since 2014 was 1093, checked in by quentin, 17 years ago
finger status@sipb-noc DTRTs
File size: 6.0 KB
Line 
1#
2# the current cnagios.pl for UW-HEP
3#
4
5use strict;
6
7#------------------------------------------------------------------
8
9sub host_plugin_hook {
10  local($_) = $_[0];
11
12  s/\(Host assumed to be up\)/assumed up/;
13  s/\(Host check timed out\)/timed out/;
14  s/\(Not enough data to determine host status yet\)/none/;
15  s/\(No Information Returned From Host Check\)/none/;
16  s/Ping .*? - (\d+)% packet loss.*/$1% pkt loss/;
17
18  return $_;
19}
20
21#------------------------------------------------------------------
22
23sub service_plugin_hook {
24  local($_) = $_[0];
25
26  # generic...
27  s/Plugin timed out after \d+ seconds/timed out/;
28
29  # check_pingwithperl...
30  s/.* (\d+)% packet loss, \d+.\d+ ms ave rtt/$1% pkt loss/;
31
32  # check_tcp...
33  s/.* (\d+\.\d+) second[s]? response time.*/$1 sec response/;
34  s/.* (\d+) second[s]? response time.*/$1 sec response/;
35  s/.* (\d+\.\d+) sec[s]? response time.*/$1 sec response/;
36
37  # check_ftp
38  s/.*Invalid response from host/bad response/;
39
40  # chech_ssh...
41  s/.* (.*?OpenSSH.*?) .*/$1/;
42  s/.*OpenSSH_3.5p1.*/OpenSSH_3.5p1/;
43  s/.*Connection refused.*/connection refused/i;
44
45  # check_netsnmp_disk & check_netsnmp_bigdisk...
46  # also works for check_dcache_usage...
47  while ( $_ =~ /(\d+\.\d+) TB/ ) {
48     my $tb = $1;
49     # WARNING: will fail for > 9999 GB...
50     my $gb = sprintf("%4.4s",int($tb *1024)); 
51     $_ =~ s/$tb TB/$gb GB/;
52  }
53  while ( $_ =~ /(\d+\.\d+) GB/ ) {
54     my $gb = $1;
55     # WARNING: will fail for > 9999 GB...
56     my $gb_new = sprintf("%4.4s",int($gb)); 
57     $_ =~ s/$gb GB/$gb_new GB/;
58  }
59  s/.*?(\d+ GB total,).*?,(\s*\d+ GB avail)/$1$2/;
60
61  # check_netsnmp_loadave...
62  s/.* load average: (\d+\.\d+).*/$1 loadave/;
63
64  # check_ntp...
65  s/.* Offset ([-]*\d+\.\d+) secs.*/$1 sec offset/;
66  s/.* stratum (\d+), offset ([-]*\d+\.\d+).*/stratum $1, $2 sec offset/;
67  s/.*Jitter\s+too high.*/jittering/;
68  s/.*desynchronized peer server.*/desynchronized peer server/i;
69  s/.*probably down.*/down/;
70
71  # check_dhcp et al...
72  s/.* Received \d+ DHCPOFFER.*max lease time = (\d+) sec.*/$1 sec lease time/;
73  s/.* \d+ in use, (\d+) free/$1 free leases/;
74  if ( s/DHCP problem: (.*)/$1/ ) { $_ = lc($_); }
75
76  # check_afs_*...
77  s/File Server Performance/Performance/;
78  s/.* (\d+ blocked) connections/$1/;
79  s/(.*?) AFS (\/.*)/$1 $2/;
80  s/(.*?) AFS Volume Quotas/$1 AFS Volumes/;
81  s/(\d+) processes running normally/$1 ok processes/;
82  s/one process running normally/one ok process/;
83  s/% used/%/g;
84  s/user.(.*?)/$1/g;
85  s/(\d+) volumes under quota/$1 ok volumes/;
86  s/db version (\d+.\d+)/db $1/;
87
88  # check_condor_client...
89  s/.* vm1 = .*?\/(\S+), vm2 = .*?\/(\S+),.*/$1\/$2/;
90  s/.* vm1 = .*?\/(\S+), vm2 = .*?\/(\S+).*/$1\/$2/;
91  s/.* cpu = (\S+)/$1/;
92  s/CondorQueue.*?(\d+ job[s]?, \d+ running).*/$1/;
93  s/.*?No condor status.*/no condor status/;
94
95  # check_condor_pool...
96  s/.*?(\d+) nodes.*/$1 nodes/;
97
98  # check_condor_queue...
99  s/.*?(\d+ idle, \d+ held)/$1/;
100
101  # check_nsr...
102  s/.*?(\d+\.\d+ GB), (\d+ saves) since.*/$1, $2/;
103  s/.*?(\d+ GB), (\d+ saves) since.*/$1, $2/;
104  s/(\d+ GB avail)able, \d+ GB total/$1/;
105
106  # check_hpjd...
107  s/.*? - \(\".*\"\)/printer okay/;
108  if ( s/(.*)\s+\(\".*\"\)/$1/ ) { $_ = lc($_); }
109
110  # check_LPRng_queue...
111  s/(\d+) active job[s]?/$1 active/;
112  s/(\d+) stalled job[s]?/$1 stalled/;
113  s/(\d+) spooled job[s]?/$1 spooled/;
114  s/(\d+) incoming job[s]?/$1 incoming/;
115  s/(\d+) incoming job[s]?/$1 incoming/;
116
117  # check_jug_*...
118  s/(\d+) JugRPC processes.*/$1 processes/;
119  s/.*JugJobs.*?(\d+) running.*/$1 running/;
120  s/.*Jug Storage.*?(\d+) unassigned.*/$1 unassigned/;
121
122  # check_dcache*...
123  s/.*no status available.*/not found/;
124  s/.*not found in the cellInfo.*/not found/;
125  s/service is (.*)/$1/;
126  s/.*(\d+) ms ave ping time/$1 ms ping time/;
127  s/.*(\d+) ms ping time/$1 ms ping time/;
128
129  # (my) check_traffic & check_ifHighSpeed_traffic...
130  # makes columnized XXX.XX Mbps output...
131  s/.*? (.*) Traffic/$1 Traffic/;
132  s/Internet Traffic/Traffic/;
133  if ( $_ =~ /(\d+\.\d+) Gbps in/ ) {
134    my $rate = $1;
135    my $gbps = sprintf("%6.6s",$rate);
136    $_ =~ s/$rate Gbps in/$gbps Gbps in/;
137  }
138  if ( $_ =~ /(\d+\.\d+) Gbps out/ ) {
139    my $rate = $1;
140    my $gbps = sprintf("%6.6s",$rate);
141    $_ =~ s/$rate Gbps out/$gbps Gbps out/;
142  }
143  if ( $_ =~ /(\d+\.\d+) Mbps in/ ) {
144    my $rate = $1;
145    my $mbps = sprintf("%6.6s",$rate);
146    $_ =~ s/$rate Mbps in/$mbps Mbps in/;
147  }
148  if ( $_ =~ /(\d+\.\d+) Mbps out/ ) {
149    my $rate = $1;
150    my $mbps = sprintf("%6.6s",$rate);
151    $_ =~ s/$rate Mbps out/$mbps Mbps out/;
152  }
153  while ( $_ =~ /(\d+\.\d+) Kbps/ ) {
154    my $rate = $1;
155    my $mbps = sprintf("%.2f",$rate/1000);
156    $mbps = sprintf("%6.6s",$mbps);
157    $_ =~ s/$rate Kbps/$mbps Mbps/;
158  }
159  s/\d+\.\d+ bps/  0.00 Mbps/g;
160
161  # check_airport...
162  s/(.*? AirPort) Usage/$1/;
163  s/no connected clients/no clients/;
164  s/(\d+) connected clients/$1 clients/;
165
166  # check_netsnmp_raid...
167  s/.*connect failed.*/connect failed/;
168  s/.*degraded.*/degraded/;
169  s/.*degraded/degraded/;
170  s/.*rebuilding.*/rebuilding/;
171  s/.*rebuilding/rebuilding/;
172  s/.*built.*/building/;
173  s/.*built/building/;
174  s/.*optimal.*/optimal/;
175  s/.*optimal/optimal/;
176
177  # check_ip_routing_with_mtr
178  s/\S+ to \S+ hop not found, first hop out is (\S+)/hop is $1/i;
179
180  # check_phedex
181  s/.*(\d+ UP agents).*/$1/;
182
183  # plugin generic...
184  s/.*no response.*/connection timed out/i;
185  s/.*no route to host.*/no route to host/i;
186  s/Socket timeout.*/socket timed out/;
187
188  # nagios generic...
189  s/\(Service Check Timed Out\)/check timed out/;
190  s/\(No output returned from plugin\)/no output from plugin/;
191  s/Service check scheduled for.*/none/;
192  s/No data yet.*/no data yet/;
193  s/\.$//;
194
195  # generic generic...
196  s/.*?OK - //i;
197  s/.*?WARNING - //i;
198  s/.*?CRITICAL - //i;
199  s/.*?UNKNOWN - //i;
200
201  return $_;
202
203}
204
205#------------------------------------------------------------------
206
207# this sub is used for host/service/plugin-output
208# filtering... it should not change...
209
210sub regex_hook {
211  my($str,$regex,$mode) = @_;
212  if ( $mode == 0 ) { 
213    if ( $str =~ /$regex/ ) { return 0 } else { return 1 }
214  }
215  if ( $mode == 1 ) { 
216    if ( $str !~ /$regex/ ) { return 0 } else { return 1 }
217  }
218  return 2;
219}
220
Note: See TracBrowser for help on using the repository browser.