Editing Monitoring /proc/user beancounters with nagios

Jump to: navigation, search

Warning: You are not logged in. Your IP address will be publicly visible if you make any edits. If you log in or create an account, your edits will be attributed to your username, along with other benefits.

The edit can be undone. Please check the comparison below to verify that this is what you want to do, and then save the changes below to finish undoing the edit.
Latest revision Your text
Line 4: Line 4:
 
Works as nagios-plugin with option '-c' or reports an increase of a failcnt-value by mail if run e.g. as a cronjob with option '-i'. We use it with both cases to be sure that we see a peak in case it happened between the nagios-checks.
 
Works as nagios-plugin with option '-c' or reports an increase of a failcnt-value by mail if run e.g. as a cronjob with option '-i'. We use it with both cases to be sure that we see a peak in case it happened between the nagios-checks.
  
The most current version of this script is available at http://github.com/peletiah/openvz/tree/master
+
<source lang=python>
 +
#!/usr/bin/python
 +
# Copyright (C) 2008 Christian Benke
 +
# Distributed under the terms of the GNU General Public License v2
 +
# v0.2 2008-04-04
 +
# Christian Benke <c.benke  gmail  com>
  
 +
import string
 +
import pickle
 +
import sys
 +
import getopt
 +
import re
  
 +
veid=''
 +
current_data=dict()
 +
opts=None
 +
beancounter_data=None
 +
picklefilepath='/tmp/beancounters_pickledump'
  
==Alternative Script==
+
#-------- find the hostname for each veid ---:
 +
 
 +
def find_veid(veid):
 +
        veid_conf=open('/etc/vz/conf/' + str(veid) + '.conf','r')
 +
        if int(veid) != 0:
 +
                for line in veid_conf:
 +
                        if "HOSTNAME" in line:
 +
                                quotes=re.compile("\"")
 +
                                line=quotes.sub("",line)
 +
                                linefeed=re.compile("\n")
 +
                                line=linefeed.sub("",line)
 +
                                fqdn=re.split('=',line)
 +
                                hostname=re.split('\.',fqdn[1])[0]
 +
                                return hostname
 +
        else:
 +
                hostname='OpenVZ HN'
 +
                return hostname
 +
 
 +
# ---------- send mail in case of a counter-change
 +
 
 +
def send_mail(count_change):
 +
        sendmail = "/usr/lib/sendmail" # sendmail location
 +
        import os
 +
        p = os.popen("%s -t" % sendmail, "w")
 +
        p.write("From: root\n")
 +
        p.write("To: to@example.com\n")
 +
        p.write("Subject: Beancounters changed in the last 5 minutes\n")
 +
        p.write("\n") # blank line separating headers from body
 +
        p.write("The Beancounter-failcnt value of the following veid(s) and resource(s) has \n")
 +
        p.write("increased in the last 5 minutes:\n\n")
 +
        p.write(count_change)
 +
        sts = p.close()
 +
        if sts is not None:
 +
                print "Sendmail exit status", sts
 +
 
 +
#---------- compare the failcnt-values
 +
 
 +
def cntcheck(data_read,current_data,veid,fields,count,count_change):
 +
        if data_read and count == True and data_read is not '0': #comparing counters of new data with previous run
 +
                if data_read[veid][fields[0]][5] < current_data[veid][fields[0]][5]:
 +
                        hostname=find_veid(veid)
 +
                        count_change=str(count_change) + str(hostname) + ': ' + str(fields[0]) + ' failcnt has changed from ' + data_read[veid][fields[0]][5] + ' to ' + str(current_data[veid][fields[0]][5]) + '\n'
 +
        return count_change
 +
 
 +
#---------- compare the current value with barrier/limit value
 +
 
 +
def barriercheck(data_read,current_data,veid,fields,count,barrier_break):
 +
        if count == False:      #comparing current level with barrier/limit
 +
                if current_data[veid][fields[0]][0] == 'oomguarpages': #for oomguarpages and physpages only the limit-value is relevant
 +
                        if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9:
 +
                                hostname=find_veid(veid)
 +
                                barrier_break = str(barrier_break) + str(hostname) + ': ' + str(current_data[veid][fields[0]][0]) + ' '
 +
                elif current_data[veid][fields[0]][0] == 'physpages':
 +
                        if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9:
 +
                                hostname=find_veid(veid)
 +
                                barrier_break = str(barrier_break) + str(hostname) + ': ' + str(current_data[veid][fields[0]][0]) + ' '
 +
                else:
 +
                        if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][3])*0.9:
 +
                                hostname=find_veid(veid)
 +
                                barrier_break = str(barrier_break) + str(hostname) + ': ' + str(current_data[veid][fields[0]][0]) + ' '
 +
        return barrier_break
 +
 
 +
 
 +
#------------ read user_beancounter and handle the result of the comparison subroutines
 +
 
 +
def compare_data(beancounter_data,data_read,count):
 +
        count_change=str()
 +
        barrier_break=str()
 +
        for line in beancounter_data:
 +
                if 'Version' in line or 'uid' in line or 'dummy' in line:
 +
                        continue
 +
                else:
 +
                        fields=line.split( )
 +
                        if len(fields) == 7:
 +
                                i=0
 +
                                veid=int(fields[0][:-1])
 +
                                fields.pop(0) #remove the first element
 +
                                current_data[veid]=dict()
 +
                                current_data[veid][fields[0]]=fields
 +
                        else:
 +
                                i=i+1
 +
                                current_data[veid][fields[0]]=fields
 +
 
 +
                # ------ check barrier/limit
 +
                        barrier_break=barriercheck(data_read,current_data,veid,fields,count,barrier_break)
 +
 
 +
 
 +
                # ------ check failcnt
 +
                        count_change=cntcheck(data_read,current_data,veid,fields,count,count_change)
 +
 
 +
        if barrier_break and count == False:
 +
                print barrier_break
 +
                sys.exit(2)
 +
        elif count == False:
 +
                print 'All Beancounters OK'
 +
                sys.exit(0)
 +
 
 +
        if count_change and count == True:
 +
                send_mail(count_change)
 +
                return current_data
 +
        elif count == True:
 +
                return current_data
 +
 
 +
 
 +
# ----- pickle data - read or write
 +
 
 +
def pickle_data(current_data,action,count,picklefilepath):
 +
        try:
 +
                picklefile = None
 +
                if action == 'write':
 +
                        if current_data:
 +
                                picklefile=open(picklefilepath,'w')
 +
                                pickle.dump(current_data, picklefile)
 +
                                picklefile.close()
 +
                                return
 +
                        else:
 +
                                print 'current_data is empty: ' + str(current_data)
 +
                elif action == 'read':
 +
                        picklefile=open(picklefilepath,'r')
 +
                        data_read=pickle.load(picklefile)
 +
                        picklefile.close()
 +
                        if data_read:
 +
                                return data_read
 +
                        else:
 +
                                print 'DATA_READ IS NONE:' + str(data_read)
 +
                                return data_read
 +
        except IOError:
 +
                current_data = compare_data(beancounter_data,'0',count)
 +
                picklefile=open(picklefilepath,'w')
 +
                pickle.dump(current_data,picklefile)
 +
                picklefile.close()
 +
 
 +
# ------- print script usage
 +
 
 +
def usage(prog="check_beancounter.py"):
 +
    print """
 +
check_beancounter.py : Check if failcounters increase or resource-values break barriers or limits
 +
 
 +
check_beancounter.py [-tfh]
 +
 
 +
-h                  print this message
 +
 
 +
-t                  Check if failcnt-values have increased since the last run
 +
-f                  Check if current value of a resource is higher than barrier/limit
 +
"""
 +
 
 +
 
 +
opts=getopt.getopt(sys.argv[1:], 'thf')
 +
if opts:
 +
        if opts[0]==[]:
 +
                usage(); sys.exit(0)
 +
        elif opts[0][0][0]=='-h':
 +
                usage(); sys.exit(0)
 +
        elif opts[0][0][0]=='-t':
 +
                count=True
 +
        elif opts[0][0][0]=='-f':
 +
                count=False
 +
 
 +
beancounter_data=open('/proc/user_beancounters','r')
 +
data_read=pickle_data(current_data,'read',count,picklefilepath)
 +
current_data = compare_data(beancounter_data,data_read,count)
 +
pickle_data(current_data,'write',count,picklefilepath)
 +
</source>
  
 
Here is an alternative script, also written in python:
 
Here is an alternative script, also written in python:

Please note that all contributions to OpenVZ Virtuozzo Containers Wiki may be edited, altered, or removed by other contributors. If you don't want your writing to be edited mercilessly, then don't submit it here.
If you are going to add external links to an article, read the External links policy first!

To edit this page, please answer the question that appears below (more info):

Cancel Editing help (opens in new window)