Open main menu

OpenVZ Virtuozzo Containers Wiki β

Changes

Monitoring /proc/user beancounters with nagios

7,453 bytes removed, 11:21, 24 November 2008
no edit summary
Works as nagios-plugin with option '-f' or reports an increase of a failcnt-value by mail if run e.g. as a cronjob with option '-t'. We use it with both cases to be sure that we see a peak in case it happened between the nagios-checks.
<source lang=python>#!/usrThe most current version of this script is available at http:/bin/python# Copyright (C) 2008 Christian Benke# Distributed under the terms of the GNU General Public License v2# v0.2 2008-04-04# Christian Benke <cgithub.benke gmail com>/peletiah/openvz/tree/master
import string
import pickle
import sys
import getopt
import re
veid=''
current_data=dict()
opts=None
beancounter_data=None
picklefilepath='/tmp/beancounters_pickledump'
#-------- find the hostname for each veid ---: def find_veid(veid): veid_conf=open('/etc/vz/conf/' + str(veid) + '.conf','r') if int(veid) != 0: for line in veid_conf: if "HOSTNAME" in line: quotesAlternative Script=re.compile("\"") line=quotes.sub("",line) linefeed=re.compile("\n") line=linefeed.sub("",line) fqdn=re.split('=',line) hostname=re.split('\.',fqdn[1])[0] return hostname else: hostname='OpenVZ HN' return hostname # ---------- send mail in case of a counter-change def send_mail(count_change): sendmail = "/usr/lib/sendmail" # sendmail location import os p = os.popen("%s -t" % sendmail, "w") p.write("From: root\n") p.write("To: to@example.com\n") p.write("Subject: Beancounters changed in the last 5 minutes\n") p.write("\n") # blank line separating headers from body p.write("The Beancounter-failcnt value of the following veid(s) and resource(s) has \n") p.write("increased in the last 5 minutes:\n\n") p.write(count_change) sts = p.close() if sts is not None: print "Sendmail exit status", sts #---------- compare the failcnt-values def cntcheck(data_read,current_data,veid,fields,count,count_change): if data_read and count == True and data_read is not '0': #comparing counters of new data with previous run if data_read[veid][fields[0]][5] < current_data[veid][fields[0]][5]: hostname=find_veid(veid) count_change=str(count_change) + str(hostname) + ': ' + str(fields[0]) + ' failcnt has changed from ' + data_read[veid][fields[0]][5] + ' to ' + str(current_data[veid][fields[0]][5]) + '\n' return count_change #---------- compare the current value with barrier/limit value def barriercheck(data_read,current_data,veid,fields,count,barrier_break): if count == False: #comparing current level with barrier/limit if current_data[veid][fields[0]][0] == 'oomguarpages': #for oomguarpages and physpages only the limit-value is relevant if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9: hostname=find_veid(veid) barrier_break = str(barrier_break) + str(hostname) + ': ' + str(current_data[veid][fields[0]][0]) + ' ' elif current_data[veid][fields[0]][0] == 'physpages': if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9: hostname=find_veid(veid) barrier_break = str(barrier_break) + str(hostname) + ': ' + str(current_data[veid][fields[0]][0]) + ' ' else: if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][3])*0.9: hostname=find_veid(veid) barrier_break = str(barrier_break) + str(hostname) + ': ' + str(current_data[veid][fields[0]][0]) + ' ' return barrier_break  #------------ read user_beancounter and handle the result of the comparison subroutines def compare_data(beancounter_data,data_read,count): count_change=str() barrier_break=str() for line in beancounter_data: if 'Version' in line or 'uid' in line or 'dummy' in line: continue else: fields=line.split( ) if len(fields) == 7: i=0 veid=int(fields[0][:-1]) fields.pop(0) #remove the first element current_data[veid]=dict() current_data[veid][fields[0]]=fields else: i=i+1 current_data[veid][fields[0]]=fields  # ------ check barrier/limit barrier_break=barriercheck(data_read,current_data,veid,fields,count,barrier_break)   # ------ check failcnt count_change=cntcheck(data_read,current_data,veid,fields,count,count_change)  if barrier_break and count == False: print barrier_break sys.exit(2) elif count == False: print 'All Beancounters OK' sys.exit(0)  if count_change and count == True: send_mail(count_change) return current_data elif count == True: return current_data  # ----- pickle data - read or write def pickle_data(current_data,action,count,picklefilepath): try: picklefile = None if action == 'write': if current_data: picklefile=open(picklefilepath,'w') pickle.dump(current_data, picklefile) picklefile.close() return else: print 'current_data is empty: ' + str(current_data) elif action == 'read': picklefile=open(picklefilepath,'r') data_read=pickle.load(picklefile) picklefile.close() if data_read: return data_read else: print 'DATA_READ IS NONE:' + str(data_read) return data_read except IOError: current_data = compare_data(beancounter_data,'0',count) picklefile=open(picklefilepath,'w') pickle.dump(current_data,picklefile) picklefile.close() # ------- print script usage def usage(prog="check_beancounter.py"): print """check_beancounter.py : Check if failcounters increase or resource-values break barriers or limits  check_beancounter.py [-tfh]  -h print this message  -t Check if failcnt-values have increased since the last run -f Check if current value of a resource is higher than barrier/limit """  opts=getopt.getopt(sys.argv[1:], 'thf')if opts: if opts[0]==[]: usage(); sys.exit(0) elif opts[0][0][0]=='-h': usage(); sys.exit(0) elif opts[0][0][0]=='-t': count=True elif opts[0][0][0]=='-f': count=False beancounter_data=open('/proc/user_beancounters','r')data_read=pickle_data(current_data,'read',count,picklefilepath)current_data = compare_data(beancounter_data,data_read,count)pickle_data(current_data,'write',count,picklefilepath)</source>
Here is an alternative script, also written in python:
14
edits