Editing Monitoring /proc/user beancounters with nagios

Jump to: navigation, search

Warning: You are not logged in. Your IP address will be publicly visible if you make any edits. If you log in or create an account, your edits will be attributed to your username, along with other benefits.

The edit can be undone. Please check the comparison below to verify that this is what you want to do, and then save the changes below to finish undoing the edit.
Latest revision Your text
Line 1: Line 1:
{{DISPLAYTITLE:Monitoring /proc/user_beancounters with nagios}}
+
==== Monitoring /proc/user_beancounters with nagios ====
To be added locally on the VZ HN to <code>/etc/nagios/nrpe_local.conf</code> and as a standalone script run with cron.
 
  
Works as nagios-plugin with option '-c' or reports an increase of a failcnt-value by mail if run e.g. as a cronjob with option '-i'. We use it with both cases to be sure that we see a peak in case it happened between the nagios-checks.
+
To be added locally on the VZ HN to /etc/nagios/nrpe_local.conf<br>
 +
Works as nagios-plugin with option '-f' or reports an increase of a failcnt-value by mail if run e.g. as a cronjob with option '-t'. We use it with both cases to be sure that we see a peak in case it happened between the nagios-checks:
  
The most current version of this script is available at http://github.com/peletiah/openvz/tree/master
+
<source lang=python>
 +
#!/usr/bin/python
 +
# Copyright (C) 2008 Christian Benke
 +
# Distributed under the terms of the GNU General Public License v2
 +
# v0.1 2008-04-03
 +
# Christian Benke <benkokakao  gmail  com>
  
 +
import string
 +
import pickle
 +
import sys
 +
import getopt
 +
import re
 +
import smtplib
 +
import socket
  
 +
veid=''
 +
current_data=dict()
 +
opts=None
 +
beancounter_data=open('/proc/user_beancounters','r')
 +
picklefilepath='/tmp/beancounters_pickledump'
  
==Alternative Script==
+
#-------- find the hostname for each veid ---:
 +
 
 +
def find_veid(veid):
 +
        veid_conf=open('/etc/vz/conf/' + str(veid) + '.conf','r')
 +
        for line in veid_conf:
 +
                if "HOSTNAME" in line:
 +
                        quotes=re.compile("\"")
 +
                        line=quotes.sub("",line)
 +
                        linefeed=re.compile("\n")
 +
                        line=linefeed.sub("",line)
 +
                        fqdn=re.split('=',line)
 +
                        hostname=re.split('\.',fqdn[1])[0]
 +
                        return hostname
 +
 
 +
# ---------- send mail in case of a counter-change
 +
def send_mail(count_change):
 +
        mailfrom = 'root@' + str(host)
 +
        mailto = 'to@example.com'
 +
        mailsubject = 'Beancounters changed in the last 5 minutes'
 +
        mailbody = 'The Beancounter-failcnt value of the following veid(s) and resource(s) has \nincreased in the last 5 minutes:\n\n'
 +
        server = smtplib.SMTP('localhost')
 +
        server.sendmail(mailfrom, [mailto], '''\
 +
From:''' + mailfrom + '''\
 +
\nTo:''' + mailto + '''\
 +
\nSubject:''' + mailsubject + '''\
 +
 
 +
\n''' + mailbody + count_change)
 +
        server.quit()
 +
 
 +
#------------read raw and compare data from user_beancounters
 +
 
 +
def compare_data(beancounter_data,data_read,count):
 +
        barrier_break=str()
 +
        count_change=str()
 +
        for line in beancounter_data:
 +
                if 'Version' in line or 'uid' in line or 'dummy' in line:
 +
                        continue
 +
                else:
 +
                        fields=line.split( )
 +
                        if len(fields) == 7:
 +
                                i=0
 +
                                veid=int(fields[0][:-1])
 +
                                fields.pop(0) #remove the first element
 +
                                current_data[veid]=dict()
 +
                                current_data[veid][fields[0]]=fields
 +
                        else:
 +
                                i=i+1
 +
                                current_data[veid][fields[0]]=fields
 +
                if data_read and count == True and data_read is not '0': #comparing counters of new data with previous run
 +
                        if data_read[veid][fields[0]][5] < current_data[veid][fields[0]][5]:
 +
                                if int(veid) != 0:
 +
                                        hostname=find_veid(veid)
 +
                                else:
 +
                                        hostname='OpenVZ Hardware Node'
 +
                                count_change=str(count_change) + str(hostname) + ': ' + str(fields[0]) + ' failcnt has changed from ' + data_read[veid][fields[0]][5]
 +
+ ' to ' + str(current_data[veid][fields[0]][5]) + '\n'
 +
 
 +
                if count == False:      #comparing current level with barrier/limit
 +
                        if current_data[veid][fields[0]][0] == 'oomguarpages': #for oomguarpages and physpages only the limit-value is relevant
 +
                                if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9:
 +
                                        barrier_break = str(barrier_break) + str(veid) + ': ' + str(current_data[veid][fields[0]][0]) + ' '
 +
                        elif current_data[veid][fields[0]][0] == 'physpages':
 +
                                if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][4])*0.9:
 +
                                        barrier_break = str(barrier_break) + str(veid) + ': ' + str(current_data[veid][fields[0]][0]) + ' '
 +
                        else:
 +
                                if int(current_data[veid][fields[0]][1]) > int(current_data[veid][fields[0]][3])*0.9:
 +
                                        barrier_break = str(barrier_break) + str(veid) + ': ' + str(current_data[veid][fields[0]][0]) + ' '
 +
        if barrier_break and count == False:
 +
                print barrier_break
 +
                sys.exit(2)
 +
        elif count == False:
 +
                print 'All Beancounters OK'
 +
                sys.exit(0)
 +
 
 +
        if count_change and count == True:
 +
                send_mail(count_change)
 +
                return current_data
 +
        elif count == True:
 +
                return current_data
 +
 
 +
 
 +
# ----- pickle data - read or write
 +
 
 +
def pickle_data(current_data,action,count,picklefilepath):
 +
        try:
 +
                picklefile = None
 +
                if action == 'write':
 +
                        if current_data:
 +
                                picklefile=open(picklefilepath,'w')
 +
                                pickle.dump(current_data, picklefile)
 +
                                picklefile.close()
 +
                                return
 +
                        else:
 +
                                print 'current_data is empty: ' + str(current_data)
 +
                elif action == 'read':
 +
                        picklefile=open(picklefilepath,'r')
 +
                        data_read=pickle.load(picklefile)
 +
                        picklefile.close()
 +
                        if data_read:
 +
                                return data_read
 +
                        else:
 +
                                print 'DATA_READ IS NONE:' + str(data_read)
 +
                                return data_read
 +
        except IOError:
 +
                current_data = compare_data(beancounter_data,'0',count)
 +
                picklefile=open(picklefilepath,'w')
 +
                pickle.dump(current_data,picklefile)
 +
                picklefile.close()
 +
 
 +
# ------- print script usage
 +
 
 +
def usage(prog="check_beancounters.py"):
 +
    print """
 +
check_beancounters.py : Check if resource-values break barriers or limits and failcounters increase
 +
 
 +
check_beancounters.py [-tfh]
 +
 
 +
-h                  print this message
 +
 
 +
-t                  Check if failcnt-values have increased since the last run
 +
-f                  Check if current value of a resource is higher than barrier/limit
 +
"""
 +
 
 +
 
 +
opts=getopt.getopt(sys.argv[1:], 'thf')
 +
if opts:
 +
        if opts[0]==[]:
 +
                usage(); sys.exit(0)
 +
        elif opts[0][0][0]=='-h':
 +
                usage(); sys.exit(0)
 +
        elif opts[0][0][0]=='-t':
 +
                count=True
 +
        elif opts[0][0][0]=='-f':
 +
                count=False
 +
 
 +
 
 +
data_read=pickle_data(current_data,'read',count,picklefilepath)
 +
current_data = compare_data(beancounter_data,data_read,count)
 +
pickle_data(current_data,'write',count,picklefilepath)
 +
</source>
  
Here is an alternative script, also written in python:
 
[http://www.kbrandt.com/2008/10/openvz-beancounters-nagios-script.html OpenVZ Nagios Bean Counters Script]
 
  
  

Please note that all contributions to OpenVZ Virtuozzo Containers Wiki may be edited, altered, or removed by other contributors. If you don't want your writing to be edited mercilessly, then don't submit it here.
If you are going to add external links to an article, read the External links policy first!

To edit this page, please answer the question that appears below (more info):

Cancel Editing help (opens in new window)