Difference between revisions of "Ubstat system call"
|  (fixed level heading for API) |  (use <source> tag instead of pre where appropriate) | ||
| Line 19: | Line 19: | ||
| The system call description is | The system call description is | ||
| − | < | + | <source lang="c"> | 
| long ubstat(int func, unsigned long luid, void *notif, void *buf, int size); | long ubstat(int func, unsigned long luid, void *notif, void *buf, int size); | ||
| − | </ | + | </source> | 
| The macros and data typed used are declared in <code>include/ub/ub_stat.h</code> file. | The macros and data typed used are declared in <code>include/ub/ub_stat.h</code> file. | ||
| Line 49: | Line 49: | ||
| '''1. <code>UBSTAT_READ_ONE</code>''' format is | '''1. <code>UBSTAT_READ_ONE</code>''' format is | ||
| − | < | + | <source lang="c"> | 
|          typedef unsigned long ubstattime_t; |          typedef unsigned long ubstattime_t; | ||
| Line 62: | Line 62: | ||
|                  ubstatparm_t    param[1]; |                  ubstatparm_t    param[1]; | ||
|          } |          } | ||
| − | </ | + | </source> | 
| It contains the time period for which the stats are returned and the <code>maxheld</code> and <code>failcnt</code> for the resource. | It contains the time period for which the stats are returned and the <code>maxheld</code> and <code>failcnt</code> for the resource. | ||
| '''2. <code>UBSTAT_READ_ALL</code>''' format is | '''2. <code>UBSTAT_READ_ALL</code>''' format is | ||
| − | < | + | <source lang="c"> | 
|          typedef unsigned long ubstattime_t; |          typedef unsigned long ubstattime_t; | ||
| Line 80: | Line 80: | ||
|                  ubstatparm_t    param[UB_RESOURCES]; |                  ubstatparm_t    param[UB_RESOURCES]; | ||
|          } |          } | ||
| − | </ | + | </source> | 
| It contains the same info as the <code>UBSTAT_READ_ONE</code> does, but for all the resources. | It contains the same info as the <code>UBSTAT_READ_ONE</code> does, but for all the resources. | ||
| '''3. <code>UBSTAT_READ_FULL</code>''' format is | '''3. <code>UBSTAT_READ_FULL</code>''' format is | ||
| − | < | + | <source lang="c"> | 
|          typedef unsigned long ubstattime_t; |          typedef unsigned long ubstattime_t; | ||
| Line 104: | Line 104: | ||
|                  ubstatparmf_t    param[UB_RESOURCES]; |                  ubstatparmf_t    param[UB_RESOURCES]; | ||
|          } |          } | ||
| − | </ | + | </source> | 
| It contains the extended info for all the resources. | It contains the extended info for all the resources. | ||
| Line 113: | Line 113: | ||
| '''6. UBSTAT_GETTIME''' format is | '''6. UBSTAT_GETTIME''' format is | ||
| − | < | + | <source lang="c"> | 
|          typedef unsigned long ubstattime_t; |          typedef unsigned long ubstattime_t; | ||
| Line 121: | Line 121: | ||
|                  ubstattime_t    cur_time; |                  ubstattime_t    cur_time; | ||
|          }; |          }; | ||
| − | </ | + | </source> | 
| It returns the time interval within which the stats are collected and the current time. | It returns the time interval within which the stats are collected and the current time. | ||
| Line 131: | Line 131: | ||
| The <code>notif</code> should point to | The <code>notif</code> should point to | ||
| − | < | + | <source lang="c"> | 
|          typedef struct { |          typedef struct { | ||
|                  long            maxinterval; |                  long            maxinterval; | ||
|                  int             signum; |                  int             signum; | ||
|          } ubnotifrq_t; |          } ubnotifrq_t; | ||
| − | </ | + | </source> | 
| The <code>maxinterval</code> is the time after which the notification will be delivered. It should be more than 1 (second). | The <code>maxinterval</code> is the time after which the notification will be delivered. It should be more than 1 (second). | ||
| Line 147: | Line 147: | ||
| == Demo == | == Demo == | ||
| The following program demonstrates how you can (but not should) use the described API. This example is deliberately made very stupid and simple to demonstrate the main idea and will only work on x86_64. | The following program demonstrates how you can (but not should) use the described API. This example is deliberately made very stupid and simple to demonstrate the main idea and will only work on x86_64. | ||
| − | < | + | <source lang="c"> | 
| #include <stdio.h> | #include <stdio.h> | ||
| #include <unistd.h> | #include <unistd.h> | ||
| Line 285: | Line 285: | ||
| 	return 0; | 	return 0; | ||
| } | } | ||
| − | </ | + | </source> | 
| == Implementation constraints == | == Implementation constraints == | ||
Revision as of 16:04, 5 February 2010
This article describes an interesting system call which was designed to pick beancounters statistics.
Contents
Overview
The system call appeared in the very first version of the OpenVZ. Its API is rather sloppy, but this is something we have to live with due to backward compatibility reasons.
The main intention of this system call is to allow a user space process get the beancounters statistics periodically. This statistics includes the fields observed in the /proc/user_beancounters file and one more field - the so called minheld value which is opposite to the maxheld one. As long as exporting the statistics the system call also notifies the task about the desired period has elapsed. The notification is performed by sending a signal to a process and this notification is one-shot.
How it works
The typical usage of this call is in performing the following steps.
- Request the amount of resources
- Get the IDs of all the living beancounters
- Setup a handler for some signal (e.g. USR1)
- Perform a system call to setup the notification
- Go do something (or sleep for ever)
In a signal handler one should just perform a respective system call to get the stats and schedule the next notification (yes, they are performed in one go; see below for more details).
API
The system call description is
long ubstat(int func, unsigned long luid, void *notif, void *buf, int size);The macros and data typed used are declared in include/ub/ub_stat.h file.
Arguments description
func is like cmd in the ioctl system call. It can be one of
- UBSTAT_READ_ONEto read basic stats for one resource. The desired resource itself should be- or-ed with the- func
- UBSTAT_READ_ALLto read basic stats about all the resources
- UBSTAT_READ_FULLto read extended stats about all the resources
- UBSTAT_UBLISTto get the ids of the beancounters
- UBSTAT_UBPARMNUMto get the number of resources used by the kernel
- UBSTAT_GETTIME
See below for what basic and extended stats mean.
luid is the desired beancounter ID. Only one beancounter can be checked at one call.
notif is the pointer to a ubnotifrq_t structure which describes the notification details (see below).
buf is the pointer to a chunk of memory, which will contain the data requested.
size is the buf memory size.
The statistics format
The format of data rerurned into the buffer depends on the function requested.
1. UBSTAT_READ_ONE format is
        typedef unsigned long ubstattime_t;
        typedef struct {
                unsigned long   maxheld;
                unsigned long   failcnt;
        } ubstatparm_t;
        struct {
                ubstattime_t    start_time;
                ubstattime_t    end_time;
                ubstatparm_t    param[1];
        }It contains the time period for which the stats are returned and the maxheld and failcnt for the resource.
2. UBSTAT_READ_ALL format is
        typedef unsigned long ubstattime_t;
        typedef struct {
                unsigned long   maxheld;
                unsigned long   failcnt;
        } ubstatparm_t;
        struct {
                ubstattime_t    start_time;
                ubstattime_t    end_time;
                ubstatparm_t    param[UB_RESOURCES];
        }It contains the same info as the UBSTAT_READ_ONE does, but for all the resources.
3. UBSTAT_READ_FULL format is
        typedef unsigned long ubstattime_t;
        typedef struct {
                unsigned long   barrier;
                unsigned long   limit;
                unsigned long   held;
                unsigned long   maxheld;
                unsigned long   minheld;
                unsigned long   failcnt;
                unsigned long __unused1;
                unsigned long __unused2;
       } ubstatparmf_t;
        struct {
                ubstattime_t    start_time;
                ubstattime_t    end_time;
                ubstatparmf_t    param[UB_RESOURCES];
        }It contains the extended info for all the resources.
4. UBSTAT_UBLIST treats the buf to point to the unsigned long array.
5. UBSTAT_UBPARMNUM ignores the buf.
6. UBSTAT_GETTIME format is
        typedef unsigned long ubstattime_t;
        struct {
                ubstattime_t    start_time;
                ubstattime_t    end_time;
                ubstattime_t    cur_time;
        };It returns the time interval within which the stats are collected and the current time.
All the times used are in seconds.
Notification
The notification info is passed via the notif argument and is being set up for all the functions except the UBLIST and the UBPARNUM. The notification is one-shot, but note that once you requested the statistics the next shot is scheduled at the same time.
The notif should point to
        typedef struct {
                long            maxinterval;
                int             signum;
        } ubnotifrq_t;The maxinterval is the time after which the notification will be delivered. It should be more than 1 (second).
The signum is the signal that will be sent to notify.
Return value
The system call returns -1 in case error has occurred. In case of UBSTAT_UBPARMNUM</code it returns UB_RESOURCES and in all other cases it returns the amount of bytes written to the buf.
Demo
The following program demonstrates how you can (but not should) use the described API. This example is deliberately made very stupid and simple to demonstrate the main idea and will only work on x86_64.
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <ub_stat.h>
#include <signal.h>
#define UBSTAT_BUFSIZE	4096
#define UBSTAT_NOTIFSIG	SIGUSR1
#define UB_RESOURCES	24
static char ubstat_buf[UBSTAT_BUFSIZE];
static int luid, func;
static ubnotifrq_t notif;
static void (*print_stat)(void *buf);
#define sys_ubstat(args...)	syscall(503, ## args)
static void usage(void)
{
	printf("ubstat <id> <period> <resource>\n");
	printf("resource can be either num or:\n");
	printf("-1 for all maxheld and failcnt\n");
	printf("-2 for all at all\n");
}
static void print_stat_one(void *buf)
{
	struct {
		ubstattime_t    start_time;
		ubstattime_t    end_time;
		ubstatparm_t    param[1];
	} *data;
	data = buf;
	printf("maxheld: %lu\n", data->param[0].maxheld);
	printf("failcnt: %lu\n", data->param[0].failcnt);
}
static void print_stat_all(void *buf)
{
	struct {
		ubstattime_t    start_time;
		ubstattime_t    end_time;
		ubstatparm_t    param[UB_RESOURCES];
	} *data;
	int res;
	data = buf;
	for (res = 0; res < UB_RESOURCES; res++) {
		printf("res %d\n", res);
		printf("\tmaxheld: %lu\n", data->param[res].maxheld);
		printf("\tfailcnt: %lu\n", data->param[res].failcnt);
	}
}
static void print_stat_full(void *buf)
{
	struct {
		ubstattime_t    start_time;
		ubstattime_t    end_time;
		ubstatparmf_t    param[UB_RESOURCES];
	} *data;
	int res;
	data = buf;
	for (res = 0; res < UB_RESOURCES; res++) {
		printf("res %d\n", res);
		printf("minheld: %lu\n", data->param[res].minheld);
		printf("maxheld: %lu\n", data->param[res].maxheld);
		printf("failcnt: %lu\n", data->param[res].failcnt);
	}
}
static inline int res2func(int resource)
{
	if (resource >= 0) {
		print_stat = print_stat_one;
		return UBSTAT_READ_ONE | resource;
	}
	if (resource == -1) {
		print_stat = print_stat_all;
		return UBSTAT_READ_ALL;
	}
	if (resource == -2) {
		print_stat = print_stat_full;
		return UBSTAT_READ_FULL;
	}
	printf("Bad resource %d\n", resource);
	exit(1);
}
static void do_notify(int x)
{
	int err;
	err = sys_ubstat(func, luid, (unsigned long)¬if,
			ubstat_buf, UBSTAT_BUFSIZE);
	if (err < 0) {
		perror("Can't set stat");
		exit(0);
	}
	print_stat(ubstat_buf);
}
static int do_ubstat(int id, int period, int resource)
{
	luid = id;
	func = res2func(resource);
	notif.maxinterval = period;
	notif.signum = UBSTAT_NOTIFSIG;
	signal(UBSTAT_NOTIFSIG, do_notify);
	do_notify(0);
}
int main(int argc, char **argv)
{
	int id, period, res;
	if (argc == 1) {
		usage();
		return 0;
	}
	id = atoi(argv[1]);
	period = atoi(argv[2]);
	res = atoi(argv[3]);
	do_ubstat(id, period, res);
	while (1)
		sleep(10);
	return 0;
}Implementation constraints
Unfortunately the API is not architecture independent and thus 32-bit application will simply not work on x86_64.
