Changes

Monitoring /proc/user beancounters with nagios

887 bytes added, 15:48, 4 April 2008

no edit summary

# Distributed under the terms of the GNU General Public License v2

# v0.1 2 2008-04-0304

# Christian Benke <benkokakao gmail com>

def find_veid(veid):

veid_conf=open('/etc/vz/conf/' + str(veid) + '.conf','r')

if int(veid) != 0: for line in veid_conf: if "HOSTNAME" in line: quotes=re.compile("\"") line=quotes.sub("",line) linefeed=re.compile("\n") line=linefeed.sub("",line) fqdn=re.split('=',line) hostname=re.split('\.',fqdn[1])[0] return hostname else: hostname='OpenVZ HN' return hostname

# ---------- send mail in case of a counter-change

def send_mail(count_change):

~~mailfrom~~ sendmail = "/usr/lib/sendmail" # sendmail location import os p = 'os.popen("%s -t" % sendmail, "w") p.write("From: root~~@' + str(host~~\n") ~~mailto = 'to~~p.write("To: benke@~~example~~inqnet.~~com'~~at\n") ~~mailsubject = '~~p.write("Subject: Beancounters changed in the last 5 minutes'\n") p.write("\n") # blank line separating headers from body ~~mailbody = '~~p.write("The Beancounter-failcnt value of the following veid(s) and resource(s) has \~~nincreased~~ n") p.write("increased in the last 5 minutes:\n\n'") p.write(count_change) ~~server~~ sts = ~~smtplib~~p.~~SMTP~~close(~~'localhost'~~) ~~server.sendmail~~if sts is not None: print "Sendmail exit status", sts #---------- compare the failcnt-values def cntcheck(~~mailfrom~~data_read, ~~[mailto]~~current_data,veid,fields,count, count_change): if data_read and count == True and data_read is not '0''\: #comparing counters of new data with previous run~~From~~ if data_read[veid][fields[0]][5] < current_data[veid][fields[0]][5]: hostname=find_veid(veid) count_change=str(count_change) + str(hostname) + ': '+ str(fields[0]) + ' failcnt has changed from ' + ~~mailfrom~~ data_read[veid][fields[0]][5] + 'to '+ str(current_data[veid][fields[0]][5]) + '\n' return count_change #---------- compare the current value with barrier/limit value def barriercheck(data_read,current_data,veid,fields,count,barrier_break):~~\nTo~~ if count == False: #comparing current level with barrier/limit if current_data[veid][fields[0]][0] == 'oomguarpages': #for oomguarpages and physpages only the limit-value is relevant if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9: hostname=find_veid(veid) barrier_break = str(barrier_break) + str(hostname) + ': ' + ~~mailto~~ str(current_data[veid][fields[0]][0]) + '' elif current_data[veid][fields[0]][0] == 'physpages'\: if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9: hostname=find_veid(veid)~~\nSubject~~ barrier_break = str(barrier_break) + str(hostname) + ':'+ str(current_data[veid][fields[0]][0]) + '' else: if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][3])*0.9: hostname=find_veid(veid) barrier_break = str(barrier_break) + ~~mailsubject~~ str(hostname) + ': ' + str(current_data[veid][fields[0]][0]) + ''\ return barrier_break

~~\n''' + mailbody + count_change)~~

~~server.quit()~~

#------------read ~~raw~~ user_beancounter and ~~compare data from user_beancounters~~handle the result of the comparison subroutines

def compare_data(beancounter_data,data_read,count):

count_change=str()

barrier_break=str()

~~count_change=str()~~

for line in beancounter_data:

if 'Version' in line or 'uid' in line or 'dummy' in line:

i=i+1

current_data[veid][fields[0]]=fields

~~if data_read and count == True and data_read is not '0': #comparing counters of new data with previous run~~

~~if data_read[veid][fields[0]][5] < current_data[veid][fields[0]][5]:~~

~~if int(veid) != 0:~~

~~hostname=find_veid(veid)~~

~~else:~~

~~hostname='OpenVZ Hardware Node'~~

~~count_change=str(count_change) + str(hostname) + ': ' + str(fields[0]) + ' failcnt has changed from ' + data_read[veid][fields[0]][5]~~

~~+ ' to ' + str(current_data[veid][fields[0]][5]) + '\n'~~

~~if count == False:~~ #~~comparing current level with barrier/limit~~------ check failcnt ~~if current_data[veid][fields[0]][0] =~~barrier_break= ~~'oomguarpages': #for oomguarpages and physpages only the limit-value is relevant~~ ~~if int~~barriercheck(data_read,current_data[,veid][,fields~~[0]][1]) > int(current_data[veid][fields[0]][4])*0.9:~~ ~~barrier_break = str(~~,count,barrier_break) ~~+ str(veid) + ': ' + str(current_data[veid][fields[0]][0]) + ' '~~ ~~elif current_data[veid][fields[0]][0] == 'physpages':~~ ~~if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9:~~ ~~barrier_break = str(barrier_break) + str(veid) + ': ' + str(current_data[veid][fields[0]][0]) + ' '~~ # ------ check barrier/limit ~~else:~~ ~~if int~~count_change=cntcheck(data_read,current_data[,veid][,fields~~[0]][1]~~,count,count_change) ~~> int(current_data[veid][fields[0]][3])*0.9:~~ ~~barrier_break = str(barrier_break) + str(veid) + ': ' + str(current_data[veid][fields[0]][0]) + ' '~~

if barrier_break and count == False:

print barrier_break

if count_change and count == True:

print 'sending mail'

send_mail(count_change)

print 'mail sent'

return current_data

elif count == True:

# ------- print script usage

def usage(prog="~~check_beancounters~~check_beancounter.py"):

print """

~~check_beancounters~~check_beancounter.py : Check if failcounters increase or resource-values break barriers or limits ~~and failcounters increase~~

~~check_beancounters~~check_beancounter.py [-tfh]

-h print this message

Benko

14

edits

OpenVZ Virtuozzo Containers Wiki β

Changes

Monitoring /proc/user beancounters with nagios

OpenVZ Virtuozzo Containers Wiki ^β