|  |   | 
| (2 intermediate revisions by 2 users not shown) | 
| Line 1: | Line 1: | 
|  | {{DISPLAYTITLE:Monitoring /proc/user_beancounters with nagios}} |  | {{DISPLAYTITLE:Monitoring /proc/user_beancounters with nagios}} | 
| − | To be added locally on the VZ HN to <code>/etc/nagios/nrpe_local.conf</code>. | + | To be added locally on the VZ HN to <code>/etc/nagios/nrpe_local.conf</code> and as a standalone script run with cron. | 
|  |  |  |  | 
| − | Works as nagios-plugin with option '-f' or reports an increase of a failcnt-value by mail if run e.g. as a cronjob with option '-t'. We use it with both cases to be sure that we see a peak in case it happened between the nagios-checks. | + | Works as nagios-plugin with option '-c' or reports an increase of a failcnt-value by mail if run e.g. as a cronjob with option '-i'. We use it with both cases to be sure that we see a peak in case it happened between the nagios-checks. | 
|  |  |  |  | 
| − | <source lang=python>
 | + | The most current version of this script is available at http://github.com/peletiah/openvz/tree/master | 
| − | #!/usr/bin/python
 |  | 
| − | # Copyright (C) 2008 Christian Benke
 |  | 
| − | # Distributed under the terms of the GNU General Public License v2
 |  | 
| − | # v0.2 2008-04-04
 |  | 
| − | # Christian Benke <c.benke  gmail  com>
 |  | 
|  |  |  |  | 
| − | import string
 |  | 
| − | import pickle
 |  | 
| − | import sys
 |  | 
| − | import getopt
 |  | 
| − | import re
 |  | 
|  |  |  |  | 
| − | veid=''
 |  | 
| − | current_data=dict()
 |  | 
| − | opts=None
 |  | 
| − | beancounter_data=None
 |  | 
| − | picklefilepath='/tmp/beancounters_pickledump'
 |  | 
|  |  |  |  | 
| − | #-------- find the hostname for each veid ---:
 | + | ==Alternative Script== | 
| − |   |  | 
| − | def find_veid(veid):
 |  | 
| − |         veid_conf=open('/etc/vz/conf/' + str(veid) + '.conf','r')
 |  | 
| − |         if int(veid) !=0:
 |  | 
| − |                 for line in veid_conf:
 |  | 
| − |                         if "HOSTNAME" in line:
 |  | 
| − |                                 quotes=re.compile("\"")
 |  | 
| − |                                 line=quotes.sub("",line)
 |  | 
| − |                                 linefeed=re.compile("\n")
 |  | 
| − |                                 line=linefeed.sub("",line)
 |  | 
| − |                                 fqdn=re.split('=',line)
 |  | 
| − |                                 hostname=re.split('\.',fqdn[1])[0]
 |  | 
| − |                                 return hostname
 |  | 
| − |         else:
 |  | 
| − |                 hostname='OpenVZ HN'
 |  | 
| − |                 return hostname
 |  | 
| − |   |  | 
| − | # ---------- send mail in case of a counter-change
 |  | 
| − |   |  | 
| − | def send_mail(count_change):
 |  | 
| − |         sendmail = "/usr/lib/sendmail" # sendmail location
 |  | 
| − |         import os
 |  | 
| − |         p = os.popen("%s -t" % sendmail, "w")
 |  | 
| − |         p.write("From: root\n")
 |  | 
| − |         p.write("To: to@example.com\n")
 |  | 
| − |         p.write("Subject: Beancounters changed in the last 5 minutes\n")
 |  | 
| − |         p.write("\n") # blank line separating headers from body
 |  | 
| − |         p.write("The Beancounter-failcnt value of the following veid(s) and resource(s) has \n")
 |  | 
| − |         p.write("increased in the last 5 minutes:\n\n")
 |  | 
| − |         p.write(count_change)
 |  | 
| − |         sts = p.close()
 |  | 
| − |         if sts is not None:
 |  | 
| − |                 print "Sendmail exit status", sts
 |  | 
| − |   |  | 
| − | #---------- compare the failcnt-values
 |  | 
| − |   |  | 
| − | def cntcheck(data_read,current_data,veid,fields,count,count_change):
 |  | 
| − |         if data_read and count == True and data_read is not '0': #comparing counters of new data with previous run
 |  | 
| − |                 if data_read[veid][fields[0]][5] < current_data[veid][fields[0]][5]:
 |  | 
| − |                         hostname=find_veid(veid)
 |  | 
| − |                         count_change=str(count_change) + str(hostname) + ': ' + str(fields[0]) + ' failcnt has changed from ' + data_read[veid][fields[0]][5] + ' to ' + str(current_data[veid][fields[0]][5]) + '\n'
 |  | 
| − |         return count_change
 |  | 
| − |   |  | 
| − | #---------- compare the current value with barrier/limit value
 |  | 
| − |   |  | 
| − | def barriercheck(data_read,current_data,veid,fields,count,barrier_break):
 |  | 
| − |         if count == False:      #comparing current level with barrier/limit
 |  | 
| − |                 if current_data[veid][fields[0]][0] == 'oomguarpages': #for oomguarpages and physpages only the limit-value is relevant
 |  | 
| − |                         if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9:
 |  | 
| − |                                 hostname=find_veid(veid)
 |  | 
| − |                                 barrier_break = str(barrier_break) + str(hostname) + ': ' + str(current_data[veid][fields[0]][0]) + ' '
 |  | 
| − |                 elif current_data[veid][fields[0]][0] == 'physpages':
 |  | 
| − |                         if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9:
 |  | 
| − |                                 hostname=find_veid(veid)
 |  | 
| − |                                 barrier_break = str(barrier_break) + str(hostname) + ': ' + str(current_data[veid][fields[0]][0]) + ' '
 |  | 
| − |                 else:
 |  | 
| − |                         if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][3])*0.9:
 |  | 
| − |                                 hostname=find_veid(veid)
 |  | 
| − |                                 barrier_break = str(barrier_break) + str(hostname) + ': ' + str(current_data[veid][fields[0]][0]) + ' '
 |  | 
| − |         return barrier_break
 |  | 
| − |   |  | 
| − |   |  | 
| − | #------------ read user_beancounter and handle the result of the comparison subroutines
 |  | 
| − |   |  | 
| − | def compare_data(beancounter_data,data_read,count):
 |  | 
| − |         count_change=str()
 |  | 
| − |         barrier_break=str()
 |  | 
| − |         for line in beancounter_data:
 |  | 
| − |                 if 'Version' in line or 'uid' in line or 'dummy' in line:
 |  | 
| − |                         continue
 |  | 
| − |                 else:
 |  | 
| − |                         fields=line.split( )
 |  | 
| − |                         if len(fields) == 7:
 |  | 
| − |                                 i=0
 |  | 
| − |                                 veid=int(fields[0][:-1])
 |  | 
| − |                                 fields.pop(0) #remove the first element
 |  | 
| − |                                 current_data[veid]=dict()
 |  | 
| − |                                 current_data[veid][fields[0]]=fields
 |  | 
| − |                         else:
 |  | 
| − |                                 i=i+1
 |  | 
| − |                                 current_data[veid][fields[0]]=fields
 |  | 
| − |   |  | 
| − |                 # ------ check barrier/limit
 |  | 
| − |                         barrier_break=barriercheck(data_read,current_data,veid,fields,count,barrier_break)
 |  | 
| − |   |  | 
| − |   |  | 
| − |                 # ------ check failcnt
 |  | 
| − |                         count_change=cntcheck(data_read,current_data,veid,fields,count,count_change)
 |  | 
| − |   |  | 
| − |         if barrier_break and count == False:
 |  | 
| − |                 print barrier_break
 |  | 
| − |                 sys.exit(2)
 |  | 
| − |         elif count == False:
 |  | 
| − |                 print 'All Beancounters OK'
 |  | 
| − |                 sys.exit(0)
 |  | 
| − |   |  | 
| − |         if count_change and count == True:
 |  | 
| − |                 send_mail(count_change)
 |  | 
| − |                 return current_data
 |  | 
| − |         elif count == True:
 |  | 
| − |                 return current_data
 |  | 
| − |   |  | 
| − |   |  | 
| − | # ----- pickle data - read or write
 |  | 
| − |   |  | 
| − | def pickle_data(current_data,action,count,picklefilepath):
 |  | 
| − |         try:
 |  | 
| − |                 picklefile = None
 |  | 
| − |                 if action == 'write':
 |  | 
| − |                         if current_data:
 |  | 
| − |                                 picklefile=open(picklefilepath,'w')
 |  | 
| − |                                 pickle.dump(current_data, picklefile)
 |  | 
| − |                                 picklefile.close()
 |  | 
| − |                                 return
 |  | 
| − |                         else:
 |  | 
| − |                                 print 'current_data is empty: ' + str(current_data)
 |  | 
| − |                 elif action == 'read':
 |  | 
| − |                         picklefile=open(picklefilepath,'r')
 |  | 
| − |                         data_read=pickle.load(picklefile)
 |  | 
| − |                         picklefile.close()
 |  | 
| − |                         if data_read:
 |  | 
| − |                                 return data_read
 |  | 
| − |                         else:
 |  | 
| − |                                 print 'DATA_READ IS NONE:' + str(data_read)
 |  | 
| − |                                 return data_read
 |  | 
| − |         except IOError:
 |  | 
| − |                 current_data = compare_data(beancounter_data,'0',count)
 |  | 
| − |                 picklefile=open(picklefilepath,'w')
 |  | 
| − |                 pickle.dump(current_data,picklefile)
 |  | 
| − |                 picklefile.close()
 |  | 
| − |   |  | 
| − | # ------- print script usage
 |  | 
| − |   |  | 
| − | def usage(prog="check_beancounter.py"):
 |  | 
| − |     print """
 |  | 
| − | check_beancounter.py : Check if failcounters increase or resource-values break barriers or limits
 |  | 
| − |   |  | 
| − |  check_beancounter.py [-tfh]
 |  | 
| − |   |  | 
| − |  -h                  print this message
 |  | 
| − |   |  | 
| − |  -t                  Check if failcnt-values have increased since the last run
 |  | 
| − |  -f                  Check if current value of a resource is higher than barrier/limit
 |  | 
| − |  """
 |  | 
| − |   |  | 
| − |   |  | 
| − | opts=getopt.getopt(sys.argv[1:], 'thf')
 |  | 
| − | if opts:
 |  | 
| − |         if opts[0]==[]:
 |  | 
| − |                 usage(); sys.exit(0)
 |  | 
| − |         elif opts[0][0][0]=='-h':
 |  | 
| − |                 usage(); sys.exit(0)
 |  | 
| − |         elif opts[0][0][0]=='-t':
 |  | 
| − |                 count=True
 |  | 
| − |         elif opts[0][0][0]=='-f':
 |  | 
| − |                 count=False
 |  | 
| − |   |  | 
| − | beancounter_data=open('/proc/user_beancounters','r')
 |  | 
| − | data_read=pickle_data(current_data,'read',count,picklefilepath)
 |  | 
| − | current_data = compare_data(beancounter_data,data_read,count)
 |  | 
| − | pickle_data(current_data,'write',count,picklefilepath)
 |  | 
| − | </source>
 |  | 
|  |  |  |  | 
|  | + | Here is an alternative script, also written in python: | 
|  | + | [http://www.kbrandt.com/2008/10/openvz-beancounters-nagios-script.html OpenVZ Nagios Bean Counters Script] | 
|  |  |  |  | 
|  |  |  |  |