|
|
| (9 intermediate revisions by 3 users not shown) |
| Line 1: |
Line 1: |
| − | ==== Monitoring /proc/user_beancounters with nagios ====
| + | {{DISPLAYTITLE:Monitoring /proc/user_beancounters with nagios}} |
| | + | To be added locally on the VZ HN to <code>/etc/nagios/nrpe_local.conf</code> and as a standalone script run with cron. |
| | | | |
| − | To be added locally on the VZ HN to /etc/nagios/nrpe_local.conf<br>
| + | Works as nagios-plugin with option '-c' or reports an increase of a failcnt-value by mail if run e.g. as a cronjob with option '-i'. We use it with both cases to be sure that we see a peak in case it happened between the nagios-checks. |
| − | Works as nagios-plugin with option '-f' or reports an increase of a failcnt-value by mail if run e.g. as a cronjob with option '-t'. We use it with both cases to be sure that we see a peak in case it happened between the nagios-checks: | |
| | | | |
| − | <source lang=python>
| + | The most current version of this script is available at http://github.com/peletiah/openvz/tree/master |
| − | #!/usr/bin/python
| |
| − | # Copyright (C) 2008 Christian Benke
| |
| − | # Distributed under the terms of the GNU General Public License v2
| |
| − | # v0.1 2008-04-03
| |
| − | # Christian Benke <benkokakao gmail com>
| |
| | | | |
| − | import string
| |
| − | import pickle
| |
| − | import sys
| |
| − | import getopt
| |
| − | import re
| |
| − | import smtplib
| |
| − | import socket
| |
| | | | |
| − | veid=''
| |
| − | current_data=dict()
| |
| − | opts=None
| |
| − | beancounter_data=open('/proc/user_beancounters','r')
| |
| − | picklefilepath='/tmp/beancounters_pickledump'
| |
| | | | |
| − | #-------- find the hostname for each veid ---:
| + | ==Alternative Script== |
| − | | |
| − | def find_veid(veid):
| |
| − | veid_conf=open('/etc/vz/conf/' + str(veid) + '.conf','r')
| |
| − | for line in veid_conf:
| |
| − | if "HOSTNAME" in line:
| |
| − | quotes=re.compile("\"")
| |
| − | line=quotes.sub("",line)
| |
| − | linefeed=re.compile("\n")
| |
| − | line=linefeed.sub("",line)
| |
| − | fqdn=re.split('=',line)
| |
| − | hostname=re.split('\.',fqdn[1])[0]
| |
| − | return hostname
| |
| − | | |
| − | # ---------- send mail in case of a counter-change
| |
| − | def send_mail(count_change):
| |
| − | mailfrom = 'root@' + str(host)
| |
| − | mailto = 'to@example.com'
| |
| − | mailsubject = 'Beancounters changed in the last 5 minutes'
| |
| − | mailbody = 'The Beancounter-failcnt value of the following veid(s) and resource(s) has \nincreased in the last 5 minutes:\n\n'
| |
| − | server = smtplib.SMTP('localhost')
| |
| − | server.sendmail(mailfrom, [mailto], '''\
| |
| − | From:''' + mailfrom + '''\
| |
| − | \nTo:''' + mailto + '''\
| |
| − | \nSubject:''' + mailsubject + '''\
| |
| − | | |
| − | \n''' + mailbody + count_change)
| |
| − | server.quit()
| |
| − | | |
| − | #------------read raw and compare data from user_beancounters
| |
| − | | |
| − | def compare_data(beancounter_data,data_read,count):
| |
| − | barrier_break=str()
| |
| − | count_change=str()
| |
| − | for line in beancounter_data:
| |
| − | if 'Version' in line or 'uid' in line or 'dummy' in line:
| |
| − | continue
| |
| − | else:
| |
| − | fields=line.split( )
| |
| − | if len(fields) == 7:
| |
| − | i=0
| |
| − | veid=int(fields[0][:-1])
| |
| − | fields.pop(0) #remove the first element
| |
| − | current_data[veid]=dict()
| |
| − | current_data[veid][fields[0]]=fields
| |
| − | else:
| |
| − | i=i+1
| |
| − | current_data[veid][fields[0]]=fields
| |
| − | if data_read and count == True and data_read is not '0': #comparing counters of new data with previous run
| |
| − | if data_read[veid][fields[0]][5] < current_data[veid][fields[0]][5]:
| |
| − | if int(veid) != 0:
| |
| − | hostname=find_veid(veid)
| |
| − | else:
| |
| − | hostname='OpenVZ Hardware Node'
| |
| − | count_change=str(count_change) + str(hostname) + ': ' + str(fields[0]) + ' failcnt has changed from ' + data_read[veid][fields[0]][5]
| |
| − | + ' to ' + str(current_data[veid][fields[0]][5]) + '\n'
| |
| − | | |
| − | if count == False: #comparing current level with barrier/limit
| |
| − | if current_data[veid][fields[0]][0] == 'oomguarpages': #for oomguarpages and physpages only the limit-value is relevant
| |
| − | if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9:
| |
| − | barrier_break = str(barrier_break) + str(veid) + ': ' + str(current_data[veid][fields[0]][0]) + ' '
| |
| − | elif current_data[veid][fields[0]][0] == 'physpages':
| |
| − | if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9:
| |
| − | barrier_break = str(barrier_break) + str(veid) + ': ' + str(current_data[veid][fields[0]][0]) + ' '
| |
| − | else:
| |
| − | if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][3])*0.9:
| |
| − | barrier_break = str(barrier_break) + str(veid) + ': ' + str(current_data[veid][fields[0]][0]) + ' '
| |
| − | if barrier_break and count == False:
| |
| − | print barrier_break
| |
| − | sys.exit(2)
| |
| − | elif count == False:
| |
| − | print 'All Beancounters OK'
| |
| − | sys.exit(0)
| |
| − | | |
| − | if count_change and count == True:
| |
| − | send_mail(count_change)
| |
| − | return current_data
| |
| − | elif count == True:
| |
| − | return current_data
| |
| − | | |
| − | | |
| − | # ----- pickle data - read or write
| |
| − | | |
| − | def pickle_data(current_data,action,count,picklefilepath):
| |
| − | try:
| |
| − | picklefile = None
| |
| − | if action == 'write':
| |
| − | if current_data:
| |
| − | picklefile=open(picklefilepath,'w')
| |
| − | pickle.dump(current_data, picklefile)
| |
| − | picklefile.close()
| |
| − | return
| |
| − | else:
| |
| − | print 'current_data is empty: ' + str(current_data)
| |
| − | elif action == 'read':
| |
| − | picklefile=open(picklefilepath,'r')
| |
| − | data_read=pickle.load(picklefile)
| |
| − | picklefile.close()
| |
| − | if data_read:
| |
| − | return data_read
| |
| − | else:
| |
| − | print 'DATA_READ IS NONE:' + str(data_read)
| |
| − | return data_read
| |
| − | except IOError:
| |
| − | current_data = compare_data(beancounter_data,'0',count)
| |
| − | picklefile=open(picklefilepath,'w')
| |
| − | pickle.dump(current_data,picklefile)
| |
| − | picklefile.close()
| |
| − | | |
| − | # ------- print script usage
| |
| − | | |
| − | def usage(prog="check_beancounters.py"):
| |
| − | print """
| |
| − | check_beancounters.py : Check if resource-values break barriers or limits and failcounters increase
| |
| − | | |
| − | check_beancounters.py [-tfh]
| |
| − | | |
| − | -h print this message
| |
| − | | |
| − | -t Check if failcnt-values have increased since the last run
| |
| − | -f Check if current value of a resource is higher than barrier/limit
| |
| − | """
| |
| − | | |
| − | | |
| − | opts=getopt.getopt(sys.argv[1:], 'thf')
| |
| − | if opts:
| |
| − | if opts[0]==[]:
| |
| − | usage(); sys.exit(0)
| |
| − | elif opts[0][0][0]=='-h':
| |
| − | usage(); sys.exit(0)
| |
| − | elif opts[0][0][0]=='-t':
| |
| − | count=True
| |
| − | elif opts[0][0][0]=='-f':
| |
| − | count=False
| |
| − | | |
| − | | |
| − | data_read=pickle_data(current_data,'read',count,picklefilepath)
| |
| − | current_data = compare_data(beancounter_data,data_read,count)
| |
| − | pickle_data(current_data,'write',count,picklefilepath)
| |
| − | </source>
| |
| | | | |
| | + | Here is an alternative script, also written in python: |
| | + | [http://www.kbrandt.com/2008/10/openvz-beancounters-nagios-script.html OpenVZ Nagios Bean Counters Script] |
| | | | |
| | | | |