Changes

Jump to: navigation, search

Monitoring /proc/user beancounters with nagios

887 bytes added, 15:48, 4 April 2008
no edit summary
# Copyright (C) 2008 Christian Benke
# Distributed under the terms of the GNU General Public License v2
# v0.1 2 2008-04-0304
# Christian Benke <benkokakao gmail com>
def find_veid(veid):
veid_conf=open('/etc/vz/conf/' + str(veid) + '.conf','r')
if int(veid) != 0: for line in veid_conf: if "HOSTNAME" in line: quotes=re.compile("\"") line=quotes.sub("",line) linefeed=re.compile("\n") line=linefeed.sub("",line) fqdn=re.split('=',line) hostname=re.split('\.',fqdn[1])[0] return hostname else: hostname='OpenVZ HN' return hostname
# ---------- send mail in case of a counter-change
 
def send_mail(count_change):
mailfrom sendmail = "/usr/lib/sendmail" # sendmail location import os p = 'os.popen("%s -t" % sendmail, "w") p.write("From: root@' + str(host\n") mailto = 'top.write("To: benke@exampleinqnet.com'at\n") mailsubject = 'p.write("Subject: Beancounters changed in the last 5 minutes'\n") p.write("\n") # blank line separating headers from body mailbody = 'p.write("The Beancounter-failcnt value of the following veid(s) and resource(s) has \nincreased n") p.write("increased in the last 5 minutes:\n\n'") p.write(count_change) server sts = smtplibp.SMTPclose('localhost') server.sendmailif sts is not None: print "Sendmail exit status", sts #---------- compare the failcnt-values def cntcheck(mailfromdata_read, [mailto]current_data,veid,fields,count, count_change): if data_read and count == True and data_read is not '0''\: #comparing counters of new data with previous runFrom if data_read[veid][fields[0]][5] < current_data[veid][fields[0]][5]: hostname=find_veid(veid) count_change=str(count_change) + str(hostname) + ': '+ str(fields[0]) + ' failcnt has changed from ' + mailfrom data_read[veid][fields[0]][5] + 'to '+ str(current_data[veid][fields[0]][5]) + '\n' return count_change #---------- compare the current value with barrier/limit value def barriercheck(data_read,current_data,veid,fields,count,barrier_break):\nTo if count == False: #comparing current level with barrier/limit if current_data[veid][fields[0]][0] == 'oomguarpages': #for oomguarpages and physpages only the limit-value is relevant if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9: hostname=find_veid(veid) barrier_break = str(barrier_break) + str(hostname) + ': ' + mailto str(current_data[veid][fields[0]][0]) + '' elif current_data[veid][fields[0]][0] == 'physpages'\: if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9: hostname=find_veid(veid)\nSubject barrier_break = str(barrier_break) + str(hostname) + ':'+ str(current_data[veid][fields[0]][0]) + '' else: if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][3])*0.9: hostname=find_veid(veid) barrier_break = str(barrier_break) + mailsubject str(hostname) + ': ' + str(current_data[veid][fields[0]][0]) + ''\ return barrier_break
\n''' + mailbody + count_change)
server.quit()
#------------read raw user_beancounter and compare data from user_beancountershandle the result of the comparison subroutines
def compare_data(beancounter_data,data_read,count):
count_change=str()
barrier_break=str()
count_change=str()
for line in beancounter_data:
if 'Version' in line or 'uid' in line or 'dummy' in line:
i=i+1
current_data[veid][fields[0]]=fields
if data_read and count == True and data_read is not '0': #comparing counters of new data with previous run
if data_read[veid][fields[0]][5] < current_data[veid][fields[0]][5]:
if int(veid) != 0:
hostname=find_veid(veid)
else:
hostname='OpenVZ Hardware Node'
count_change=str(count_change) + str(hostname) + ': ' + str(fields[0]) + ' failcnt has changed from ' + data_read[veid][fields[0]][5]
+ ' to ' + str(current_data[veid][fields[0]][5]) + '\n'
if count == False: #comparing current level with barrier/limit------ check failcnt if current_data[veid][fields[0]][0] =barrier_break= 'oomguarpages': #for oomguarpages and physpages only the limit-value is relevant if intbarriercheck(data_read,current_data[,veid][,fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9: barrier_break = str(,count,barrier_break) + str(veid) + ': ' + str(current_data[veid][fields[0]][0]) + ' ' elif current_data[veid][fields[0]][0] == 'physpages': if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9: barrier_break = str(barrier_break) + str(veid) + ': ' + str(current_data[veid][fields[0]][0]) + ' ' # ------ check barrier/limit else: if intcount_change=cntcheck(data_read,current_data[,veid][,fields[0]][1],count,count_change) > int(current_data[veid][fields[0]][3])*0.9: barrier_break = str(barrier_break) + str(veid) + ': ' + str(current_data[veid][fields[0]][0]) + ' '
if barrier_break and count == False:
print barrier_break
if count_change and count == True:
print 'sending mail'
send_mail(count_change)
print 'mail sent'
return current_data
elif count == True:
# ------- print script usage
def usage(prog="check_beancounterscheck_beancounter.py"):
print """
check_beancounterscheck_beancounter.py : Check if failcounters increase or resource-values break barriers or limits and failcounters increase
check_beancounterscheck_beancounter.py [-tfh]
-h print this message
14
edits

Navigation menu