Home | History | Annotate | Download | only in sys
      1     0  stevel /*
      2     0  stevel  * CDDL HEADER START
      3     0  stevel  *
      4     0  stevel  * The contents of this file are subject to the terms of the
      5  2951   elowe  * Common Development and Distribution License (the "License").
      6  2951   elowe  * You may not use this file except in compliance with the License.
      7     0  stevel  *
      8     0  stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9     0  stevel  * or http://www.opensolaris.org/os/licensing.
     10     0  stevel  * See the License for the specific language governing permissions
     11     0  stevel  * and limitations under the License.
     12     0  stevel  *
     13     0  stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14     0  stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15     0  stevel  * If applicable, add the following below this CDDL HEADER, with the
     16     0  stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17     0  stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18     0  stevel  *
     19     0  stevel  * CDDL HEADER END
     20     0  stevel  */
     21     0  stevel /*
     22  2951   elowe  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
     23     0  stevel  * Use is subject to license terms.
     24     0  stevel  */
     25     0  stevel 
     26     0  stevel #ifndef	_SYS_KSTAT_H
     27     0  stevel #define	_SYS_KSTAT_H
     28     0  stevel 
     29     0  stevel #pragma ident	"%Z%%M%	%I%	%E% SMI"
     30     0  stevel 
     31     0  stevel /*
     32     0  stevel  * Definition of general kernel statistics structures and /dev/kstat ioctls
     33     0  stevel  */
     34     0  stevel 
     35     0  stevel #include <sys/types.h>
     36     0  stevel #include <sys/time.h>
     37     0  stevel 
     38     0  stevel #ifdef	__cplusplus
     39     0  stevel extern "C" {
     40     0  stevel #endif
     41     0  stevel 
     42     0  stevel typedef int	kid_t;		/* unique kstat id */
     43     0  stevel 
     44     0  stevel /*
     45     0  stevel  * Kernel statistics driver (/dev/kstat) ioctls
     46     0  stevel  */
     47     0  stevel 
     48     0  stevel #define	KSTAT_IOC_BASE		('K' << 8)
     49     0  stevel 
     50     0  stevel #define	KSTAT_IOC_CHAIN_ID	KSTAT_IOC_BASE | 0x01
     51     0  stevel #define	KSTAT_IOC_READ		KSTAT_IOC_BASE | 0x02
     52     0  stevel #define	KSTAT_IOC_WRITE		KSTAT_IOC_BASE | 0x03
     53     0  stevel 
     54     0  stevel /*
     55     0  stevel  * /dev/kstat ioctl usage (kd denotes /dev/kstat descriptor):
     56     0  stevel  *
     57     0  stevel  *	kcid = ioctl(kd, KSTAT_IOC_CHAIN_ID, NULL);
     58     0  stevel  *	kcid = ioctl(kd, KSTAT_IOC_READ, kstat_t *);
     59     0  stevel  *	kcid = ioctl(kd, KSTAT_IOC_WRITE, kstat_t *);
     60     0  stevel  */
     61     0  stevel 
     62     0  stevel #define	KSTAT_STRLEN	31	/* 30 chars + NULL; must be 16 * n - 1 */
     63     0  stevel 
     64     0  stevel /*
     65     0  stevel  * The generic kstat header
     66     0  stevel  */
     67     0  stevel 
     68     0  stevel typedef struct kstat {
     69     0  stevel 	/*
     70     0  stevel 	 * Fields relevant to both kernel and user
     71     0  stevel 	 */
     72     0  stevel 	hrtime_t	ks_crtime;	/* creation time (from gethrtime()) */
     73     0  stevel 	struct kstat	*ks_next;	/* kstat chain linkage */
     74     0  stevel 	kid_t		ks_kid;		/* unique kstat ID */
     75     0  stevel 	char		ks_module[KSTAT_STRLEN]; /* provider module name */
     76     0  stevel 	uchar_t		ks_resv;	/* reserved, currently just padding */
     77     0  stevel 	int		ks_instance;	/* provider module's instance */
     78     0  stevel 	char		ks_name[KSTAT_STRLEN]; /* kstat name */
     79     0  stevel 	uchar_t		ks_type;	/* kstat data type */
     80     0  stevel 	char		ks_class[KSTAT_STRLEN]; /* kstat class */
     81     0  stevel 	uchar_t		ks_flags;	/* kstat flags */
     82     0  stevel 	void		*ks_data;	/* kstat type-specific data */
     83     0  stevel 	uint_t		ks_ndata;	/* # of type-specific data records */
     84     0  stevel 	size_t		ks_data_size;	/* total size of kstat data section */
     85     0  stevel 	hrtime_t	ks_snaptime;	/* time of last data shapshot */
     86     0  stevel 	/*
     87     0  stevel 	 * Fields relevant to kernel only
     88     0  stevel 	 */
     89     0  stevel 	int		(*ks_update)(struct kstat *, int); /* dynamic update */
     90     0  stevel 	void		*ks_private;	/* arbitrary provider-private data */
     91     0  stevel 	int		(*ks_snapshot)(struct kstat *, void *, int);
     92     0  stevel 	void		*ks_lock;	/* protects this kstat's data */
     93     0  stevel } kstat_t;
     94     0  stevel 
     95     0  stevel #ifdef _SYSCALL32
     96     0  stevel 
     97     0  stevel typedef int32_t kid32_t;
     98     0  stevel 
     99     0  stevel typedef struct kstat32 {
    100     0  stevel 	/*
    101     0  stevel 	 * Fields relevant to both kernel and user
    102     0  stevel 	 */
    103     0  stevel 	hrtime_t	ks_crtime;
    104     0  stevel 	caddr32_t	ks_next;		/* struct kstat pointer */
    105     0  stevel 	kid32_t		ks_kid;
    106     0  stevel 	char		ks_module[KSTAT_STRLEN];
    107     0  stevel 	uint8_t		ks_resv;
    108     0  stevel 	int32_t		ks_instance;
    109     0  stevel 	char		ks_name[KSTAT_STRLEN];
    110     0  stevel 	uint8_t		ks_type;
    111     0  stevel 	char		ks_class[KSTAT_STRLEN];
    112     0  stevel 	uint8_t		ks_flags;
    113     0  stevel 	caddr32_t	ks_data;		/* type-specific data */
    114     0  stevel 	uint32_t	ks_ndata;
    115     0  stevel 	size32_t	ks_data_size;
    116     0  stevel 	hrtime_t	ks_snaptime;
    117     0  stevel 	/*
    118     0  stevel 	 * Fields relevant to kernel only (only needed here for padding)
    119     0  stevel 	 */
    120     0  stevel 	int32_t		_ks_update;
    121     0  stevel 	caddr32_t	_ks_private;
    122     0  stevel 	int32_t		_ks_snapshot;
    123     0  stevel 	caddr32_t	_ks_lock;
    124     0  stevel } kstat32_t;
    125     0  stevel 
    126     0  stevel #endif	/* _SYSCALL32 */
    127     0  stevel 
    128     0  stevel /*
    129     0  stevel  * kstat structure and locking strategy
    130     0  stevel  *
    131     0  stevel  * Each kstat consists of a header section (a kstat_t) and a data section.
    132     0  stevel  * The system maintains a set of kstats, protected by kstat_chain_lock.
    133     0  stevel  * kstat_chain_lock protects all additions to/deletions from this set,
    134     0  stevel  * as well as all changes to kstat headers.  kstat data sections are
    135     0  stevel  * *optionally* protected by the per-kstat ks_lock.  If ks_lock is non-NULL,
    136     0  stevel  * kstat clients (e.g. /dev/kstat) will acquire this lock for all of their
    137     0  stevel  * operations on that kstat.  It is up to the kstat provider to decide whether
    138     0  stevel  * guaranteeing consistent data to kstat clients is sufficiently important
    139     0  stevel  * to justify the locking cost.  Note, however, that most statistic updates
    140     0  stevel  * already occur under one of the provider's mutexes, so if the provider sets
    141     0  stevel  * ks_lock to point to that mutex, then kstat data locking is free.
    142     0  stevel  *
    143     0  stevel  * NOTE: variable-size kstats MUST employ kstat data locking, to prevent
    144     0  stevel  * data-size races with kstat clients.
    145     0  stevel  *
    146     0  stevel  * NOTE: ks_lock is really of type (kmutex_t *); it is declared as (void *)
    147     0  stevel  * in the kstat header so that users don't have to be exposed to all of the
    148     0  stevel  * kernel's lock-related data structures.
    149     0  stevel  */
    150     0  stevel 
    151     0  stevel #if	defined(_KERNEL)
    152     0  stevel 
    153     0  stevel #define	KSTAT_ENTER(k)	\
    154     0  stevel 	{ kmutex_t *lp = (k)->ks_lock; if (lp) mutex_enter(lp); }
    155     0  stevel 
    156     0  stevel #define	KSTAT_EXIT(k)	\
    157     0  stevel 	{ kmutex_t *lp = (k)->ks_lock; if (lp) mutex_exit(lp); }
    158     0  stevel 
    159     0  stevel #define	KSTAT_UPDATE(k, rw)		(*(k)->ks_update)((k), (rw))
    160     0  stevel 
    161     0  stevel #define	KSTAT_SNAPSHOT(k, buf, rw)	(*(k)->ks_snapshot)((k), (buf), (rw))
    162     0  stevel 
    163     0  stevel #endif	/* defined(_KERNEL) */
    164     0  stevel 
    165     0  stevel /*
    166     0  stevel  * kstat time
    167     0  stevel  *
    168     0  stevel  * All times associated with kstats (e.g. creation time, snapshot time,
    169     0  stevel  * kstat_timer_t and kstat_io_t timestamps, etc.) are 64-bit nanosecond values,
    170     0  stevel  * as returned by gethrtime().  The accuracy of these timestamps is machine
    171     0  stevel  * dependent, but the precision (units) is the same across all platforms.
    172     0  stevel  */
    173     0  stevel 
    174     0  stevel /*
    175     0  stevel  * kstat identity (KID)
    176     0  stevel  *
    177     0  stevel  * Each kstat is assigned a unique KID (kstat ID) when it is added to the
    178     0  stevel  * global kstat chain.  The KID is used as a cookie by /dev/kstat to
    179     0  stevel  * request information about the corresponding kstat.  There is also
    180     0  stevel  * an identity associated with the entire kstat chain, kstat_chain_id,
    181     0  stevel  * which is bumped each time a kstat is added or deleted.  /dev/kstat uses
    182     0  stevel  * the chain ID to detect changes in the kstat chain (e.g., a new disk
    183     0  stevel  * coming online) between ioctl()s.
    184     0  stevel  */
    185     0  stevel 
    186     0  stevel /*
    187     0  stevel  * kstat module, kstat instance
    188     0  stevel  *
    189     0  stevel  * ks_module and ks_instance contain the name and instance of the module
    190     0  stevel  * that created the kstat.  In cases where there can only be one instance,
    191     0  stevel  * ks_instance is 0.  The kernel proper (/kernel/unix) uses "unix" as its
    192     0  stevel  * module name.
    193     0  stevel  */
    194     0  stevel 
    195     0  stevel /*
    196     0  stevel  * kstat name
    197     0  stevel  *
    198     0  stevel  * ks_name gives a meaningful name to a kstat.  The full kstat namespace
    199     0  stevel  * is module.instance.name, so the name only need be unique within a
    200     0  stevel  * module.  kstat_create() will fail if you try to create a kstat with
    201     0  stevel  * an already-used (ks_module, ks_instance, ks_name) triplet.  Spaces are
    202     0  stevel  * allowed in kstat names, but strongly discouraged, since they hinder
    203     0  stevel  * awk-style processing at user level.
    204     0  stevel  */
    205     0  stevel 
    206     0  stevel /*
    207     0  stevel  * kstat type
    208     0  stevel  *
    209     0  stevel  * The kstat mechanism provides several flavors of kstat data, defined
    210     0  stevel  * below.  The "raw" kstat type is just treated as an array of bytes; you
    211     0  stevel  * can use this to export any kind of data you want.
    212     0  stevel  *
    213     0  stevel  * Some kstat types allow multiple data structures per kstat, e.g.
    214     0  stevel  * KSTAT_TYPE_NAMED; others do not.  This is part of the spec for each
    215     0  stevel  * kstat data type.
    216     0  stevel  *
    217     0  stevel  * User-level tools should *not* rely on the #define KSTAT_NUM_TYPES.  To
    218     0  stevel  * get this information, read out the standard system kstat "kstat_types".
    219     0  stevel  */
    220     0  stevel 
    221     0  stevel #define	KSTAT_TYPE_RAW		0	/* can be anything */
    222     0  stevel 					/* ks_ndata >= 1 */
    223     0  stevel #define	KSTAT_TYPE_NAMED	1	/* name/value pair */
    224     0  stevel 					/* ks_ndata >= 1 */
    225     0  stevel #define	KSTAT_TYPE_INTR		2	/* interrupt statistics */
    226     0  stevel 					/* ks_ndata == 1 */
    227     0  stevel #define	KSTAT_TYPE_IO		3	/* I/O statistics */
    228     0  stevel 					/* ks_ndata == 1 */
    229     0  stevel #define	KSTAT_TYPE_TIMER	4	/* event timer */
    230     0  stevel 					/* ks_ndata >= 1 */
    231     0  stevel 
    232     0  stevel #define	KSTAT_NUM_TYPES		5
    233     0  stevel 
    234     0  stevel /*
    235     0  stevel  * kstat class
    236     0  stevel  *
    237     0  stevel  * Each kstat can be characterized as belonging to some broad class
    238     0  stevel  * of statistics, e.g. disk, tape, net, vm, streams, etc.  This field
    239     0  stevel  * can be used as a filter to extract related kstats.  The following
    240     0  stevel  * values are currently in use: disk, tape, net, controller, vm, kvm,
    241     0  stevel  * hat, streams, kstat, and misc.  (The kstat class encompasses things
    242     0  stevel  * like kstat_types.)
    243     0  stevel  */
    244     0  stevel 
    245     0  stevel /*
    246     0  stevel  * kstat flags
    247     0  stevel  *
    248     0  stevel  * Any of the following flags may be passed to kstat_create().  They are
    249     0  stevel  * all zero by default.
    250     0  stevel  *
    251     0  stevel  *	KSTAT_FLAG_VIRTUAL:
    252     0  stevel  *
    253     0  stevel  *		Tells kstat_create() not to allocate memory for the
    254     0  stevel  *		kstat data section; instead, you will set the ks_data
    255     0  stevel  *		field to point to the data you wish to export.  This
    256     0  stevel  *		provides a convenient way to export existing data
    257     0  stevel  *		structures.
    258     0  stevel  *
    259     0  stevel  *	KSTAT_FLAG_VAR_SIZE:
    260     0  stevel  *
    261     0  stevel  *		The size of the kstat you are creating will vary over time.
    262     0  stevel  *		For example, you may want to use the kstat mechanism to
    263     0  stevel  *		export a linked list.  NOTE: The kstat framework does not
    264     0  stevel  *		manage the data section, so all variable-size kstats must be
    265     0  stevel  *		virtual kstats.  Moreover, variable-size kstats MUST employ
    266     0  stevel  *		kstat data locking to prevent data-size races with kstat
    267     0  stevel  *		clients.  See the section on "kstat snapshot" for details.
    268     0  stevel  *
    269     0  stevel  *	KSTAT_FLAG_WRITABLE:
    270     0  stevel  *
    271     0  stevel  *		Makes the kstat's data section writable by root.
    272     0  stevel  *		The ks_snapshot routine (see below) does not need to check for
    273     0  stevel  *		this; permission checking is handled in the kstat driver.
    274     0  stevel  *
    275     0  stevel  *	KSTAT_FLAG_PERSISTENT:
    276     0  stevel  *
    277     0  stevel  *		Indicates that this kstat is to be persistent over time.
    278     0  stevel  *		For persistent kstats, kstat_delete() simply marks the
    279     0  stevel  *		kstat as dormant; a subsequent kstat_create() reactivates
    280     0  stevel  *		the kstat.  This feature is provided so that statistics
    281     0  stevel  *		are not lost across driver close/open (e.g., raw disk I/O
    282     0  stevel  *		on a disk with no mounted partitions.)
    283     0  stevel  *		NOTE: Persistent kstats cannot be virtual, since ks_data
    284     0  stevel  *		points to garbage as soon as the driver goes away.
    285     0  stevel  *
    286     0  stevel  * The following flags are maintained by the kstat framework:
    287     0  stevel  *
    288     0  stevel  *	KSTAT_FLAG_DORMANT:
    289     0  stevel  *
    290     0  stevel  *		For persistent kstats, indicates that the kstat is in the
    291     0  stevel  *		dormant state (e.g., the corresponding device is closed).
    292     0  stevel  *
    293     0  stevel  *	KSTAT_FLAG_INVALID:
    294     0  stevel  *
    295     0  stevel  *		This flag is set when a kstat is in a transitional state,
    296     0  stevel  *		e.g. between kstat_create() and kstat_install().
    297     0  stevel  *		kstat clients must not attempt to access the kstat's data
    298     0  stevel  *		if this flag is set.
    299     0  stevel  */
    300     0  stevel 
    301     0  stevel #define	KSTAT_FLAG_VIRTUAL		0x01
    302     0  stevel #define	KSTAT_FLAG_VAR_SIZE		0x02
    303     0  stevel #define	KSTAT_FLAG_WRITABLE		0x04
    304     0  stevel #define	KSTAT_FLAG_PERSISTENT		0x08
    305     0  stevel #define	KSTAT_FLAG_DORMANT		0x10
    306     0  stevel #define	KSTAT_FLAG_INVALID		0x20
    307     0  stevel 
    308     0  stevel /*
    309     0  stevel  * Dynamic update support
    310     0  stevel  *
    311     0  stevel  * The kstat mechanism allows for an optional ks_update function to update
    312     0  stevel  * kstat data.  This is useful for drivers where the underlying device
    313     0  stevel  * keeps cheap hardware stats, but extraction is expensive.  Instead of
    314     0  stevel  * constantly keeping the kstat data section up to date, you can supply a
    315     0  stevel  * ks_update function which updates the kstat's data section on demand.
    316     0  stevel  * To take advantage of this feature, simply set the ks_update field before
    317     0  stevel  * calling kstat_install().
    318     0  stevel  *
    319     0  stevel  * The ks_update function, if supplied, must have the following structure:
    320     0  stevel  *
    321     0  stevel  *	int
    322     0  stevel  *	foo_kstat_update(kstat_t *ksp, int rw)
    323     0  stevel  *	{
    324     0  stevel  *		if (rw == KSTAT_WRITE) {
    325     0  stevel  *			... update the native stats from ksp->ks_data;
    326     0  stevel  *				return EACCES if you don't support this
    327     0  stevel  *		} else {
    328     0  stevel  *			... update ksp->ks_data from the native stats
    329     0  stevel  *		}
    330     0  stevel  *	}
    331     0  stevel  *
    332     0  stevel  * The ks_update return codes are: 0 for success, EACCES if you don't allow
    333     0  stevel  * KSTAT_WRITE, and EIO for any other type of error.
    334     0  stevel  *
    335     0  stevel  * In general, the ks_update function may need to refer to provider-private
    336     0  stevel  * data; for example, it may need a pointer to the provider's raw statistics.
    337     0  stevel  * The ks_private field is available for this purpose.  Its use is entirely
    338     0  stevel  * at the provider's discretion.
    339     0  stevel  *
    340     0  stevel  * All variable-size kstats MUST supply a ks_update routine, which computes
    341     0  stevel  * and sets ks_data_size (and ks_ndata if that is meaningful), since these
    342     0  stevel  * are needed to perform kstat snapshots (see below).
    343     0  stevel  *
    344     0  stevel  * No kstat locking should be done inside the ks_update routine.  The caller
    345     0  stevel  * will already be holding the kstat's ks_lock (to ensure consistent data).
    346     0  stevel  */
    347     0  stevel 
    348     0  stevel #define	KSTAT_READ	0
    349     0  stevel #define	KSTAT_WRITE	1
    350     0  stevel 
    351     0  stevel /*
    352     0  stevel  * Kstat snapshot
    353     0  stevel  *
    354     0  stevel  * In order to get a consistent view of a kstat's data, clients must obey
    355     0  stevel  * the kstat's locking strategy.  However, these clients may need to perform
    356     0  stevel  * operations on the data which could cause a fault (e.g. copyout()), or
    357     0  stevel  * operations which are simply expensive.  Doing so could cause deadlock
    358     0  stevel  * (e.g. if you're holding a disk's kstat lock which is ultimately required
    359     0  stevel  * to resolve a copyout() fault), performance degradation (since the providers'
    360     0  stevel  * activity is serialized at the kstat lock), device timing problems, etc.
    361     0  stevel  *
    362     0  stevel  * To avoid these problems, kstat data is provided via snapshots.  Taking
    363     0  stevel  * a snapshot is a simple process: allocate a wired-down kernel buffer,
    364     0  stevel  * acquire the kstat's data lock, copy the data into the buffer ("take the
    365     0  stevel  * snapshot"), and release the lock.  This ensures that the kstat's data lock
    366     0  stevel  * will be held as briefly as possible, and that no faults will occur while
    367     0  stevel  * the lock is held.
    368     0  stevel  *
    369     0  stevel  * Normally, the snapshot is taken by default_kstat_snapshot(), which
    370     0  stevel  * timestamps the data (sets ks_snaptime), copies it, and does a little
    371     0  stevel  * massaging to deal with incomplete transactions on i/o kstats.  However,
    372     0  stevel  * this routine only works for kstats with contiguous data (the typical case).
    373     0  stevel  * If you create a kstat whose data is, say, a linked list, you must provide
    374     0  stevel  * your own ks_snapshot routine.  The routine you supply must have the
    375     0  stevel  * following prototype (replace "foo" with something appropriate):
    376     0  stevel  *
    377     0  stevel  *	int foo_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
    378     0  stevel  *
    379     0  stevel  * The minimal snapshot routine -- one which copies contiguous data that
    380     0  stevel  * doesn't need any massaging -- would be this:
    381     0  stevel  *
    382     0  stevel  *	ksp->ks_snaptime = gethrtime();
    383     0  stevel  *	if (rw == KSTAT_WRITE)
    384     0  stevel  *		bcopy(buf, ksp->ks_data, ksp->ks_data_size);
    385     0  stevel  *	else
    386     0  stevel  *		bcopy(ksp->ks_data, buf, ksp->ks_data_size);
    387     0  stevel  *	return (0);
    388     0  stevel  *
    389     0  stevel  * A more illuminating example is taking a snapshot of a linked list:
    390     0  stevel  *
    391     0  stevel  *	ksp->ks_snaptime = gethrtime();
    392     0  stevel  *	if (rw == KSTAT_WRITE)
    393     0  stevel  *		return (EACCES);		... See below ...
    394     0  stevel  *	for (foo = first_foo; foo; foo = foo->next) {
    395     0  stevel  *		bcopy((char *) foo, (char *) buf, sizeof (struct foo));
    396     0  stevel  *		buf = ((struct foo *) buf) + 1;
    397     0  stevel  *	}
    398     0  stevel  *	return (0);
    399     0  stevel  *
    400     0  stevel  * In the example above, we have decided that we don't want to allow
    401     0  stevel  * KSTAT_WRITE access, so we return EACCES if this is attempted.
    402     0  stevel  *
    403     0  stevel  * The key points are:
    404     0  stevel  *
    405     0  stevel  *	(1) ks_snaptime must be set (via gethrtime()) to timestamp the data.
    406     0  stevel  *	(2) Data gets copied from the kstat to the buffer on KSTAT_READ,
    407     0  stevel  *		and from the buffer to the kstat on KSTAT_WRITE.
    408     0  stevel  *	(3) ks_snapshot return values are: 0 for success, EACCES if you
    409     0  stevel  *		don't allow KSTAT_WRITE, and EIO for any other type of error.
    410     0  stevel  *
    411     0  stevel  * Named kstats (see section on "Named statistics" below) containing long
    412     0  stevel  * strings (KSTAT_DATA_STRING) need special handling.  The kstat driver
    413     0  stevel  * assumes that all strings are copied into the buffer after the array of
    414     0  stevel  * named kstats, and the pointers (KSTAT_NAMED_STR_PTR()) are updated to point
    415     0  stevel  * into the copy within the buffer. The default snapshot routine does this,
    416     0  stevel  * but overriding routines should contain at least the following:
    417     0  stevel  *
    418     0  stevel  * if (rw == KSTAT_READ) {
    419     0  stevel  * 	kstat_named_t *knp = buf;
    420     0  stevel  * 	char *end = knp + ksp->ks_ndata;
    421     0  stevel  * 	uint_t i;
    422     0  stevel  *
    423     0  stevel  * 	... Do the regular copy ...
    424     0  stevel  * 	bcopy(ksp->ks_data, buf, sizeof (kstat_named_t) * ksp->ks_ndata);
    425     0  stevel  *
    426     0  stevel  * 	for (i = 0; i < ksp->ks_ndata; i++, knp++) {
    427     0  stevel  *		if (knp[i].data_type == KSTAT_DATA_STRING &&
    428     0  stevel  *		    KSTAT_NAMED_STR_PTR(knp) != NULL) {
    429     0  stevel  *			bcopy(KSTAT_NAMED_STR_PTR(knp), end,
    430     0  stevel  *			    KSTAT_NAMED_STR_BUFLEN(knp));
    431     0  stevel  *			KSTAT_NAMED_STR_PTR(knp) = end;
    432     0  stevel  *			end += KSTAT_NAMED_STR_BUFLEN(knp);
    433     0  stevel  *		}
    434     0  stevel  *	}
    435     0  stevel  */
    436     0  stevel 
    437     0  stevel /*
    438     0  stevel  * Named statistics.
    439     0  stevel  *
    440     0  stevel  * List of arbitrary name=value statistics.
    441     0  stevel  */
    442     0  stevel 
    443     0  stevel typedef struct kstat_named {
    444     0  stevel 	char	name[KSTAT_STRLEN];	/* name of counter */
    445     0  stevel 	uchar_t	data_type;		/* data type */
    446     0  stevel 	union {
    447     0  stevel 		char		c[16];	/* enough for 128-bit ints */
    448     0  stevel 		int32_t		i32;
    449     0  stevel 		uint32_t	ui32;
    450     0  stevel 		struct {
    451     0  stevel 			union {
    452     0  stevel 				char 		*ptr;	/* NULL-term string */
    453     0  stevel #if defined(_KERNEL) && defined(_MULTI_DATAMODEL)
    454     0  stevel 				caddr32_t	ptr32;
    455     0  stevel #endif
    456     0  stevel 				char 		__pad[8]; /* 64-bit padding */
    457     0  stevel 			} addr;
    458     0  stevel 			uint32_t	len;	/* # bytes for strlen + '\0' */
    459   457     bmc 		} str;
    460     0  stevel /*
    461     0  stevel  * The int64_t and uint64_t types are not valid for a maximally conformant
    462     0  stevel  * 32-bit compilation environment (cc -Xc) using compilers prior to the
    463     0  stevel  * introduction of C99 conforming compiler (reference ISO/IEC 9899:1990).
    464     0  stevel  * In these cases, the visibility of i64 and ui64 is only permitted for
    465     0  stevel  * 64-bit compilation environments or 32-bit non-maximally conformant
    466     0  stevel  * C89 or C90 ANSI C compilation environments (cc -Xt and cc -Xa). In the
    467     0  stevel  * C99 ANSI C compilation environment, the long long type is supported.
    468     0  stevel  * The _INT64_TYPE is defined by the implementation (see sys/int_types.h).
    469     0  stevel  */
    470     0  stevel #if defined(_INT64_TYPE)
    471     0  stevel 		int64_t		i64;
    472     0  stevel 		uint64_t	ui64;
    473     0  stevel #endif
    474     0  stevel 		long		l;
    475     0  stevel 		ulong_t		ul;
    476     0  stevel 
    477     0  stevel 		/* These structure members are obsolete */
    478     0  stevel 
    479     0  stevel 		longlong_t	ll;
    480     0  stevel 		u_longlong_t	ull;
    481     0  stevel 		float		f;
    482     0  stevel 		double		d;
    483     0  stevel 	} value;			/* value of counter */
    484     0  stevel } kstat_named_t;
    485     0  stevel 
    486     0  stevel #define	KSTAT_DATA_CHAR		0
    487     0  stevel #define	KSTAT_DATA_INT32	1
    488     0  stevel #define	KSTAT_DATA_UINT32	2
    489     0  stevel #define	KSTAT_DATA_INT64	3
    490     0  stevel #define	KSTAT_DATA_UINT64	4
    491     0  stevel 
    492     0  stevel #if !defined(_LP64)
    493     0  stevel #define	KSTAT_DATA_LONG		KSTAT_DATA_INT32
    494     0  stevel #define	KSTAT_DATA_ULONG	KSTAT_DATA_UINT32
    495     0  stevel #else
    496     0  stevel #if !defined(_KERNEL)
    497     0  stevel #define	KSTAT_DATA_LONG		KSTAT_DATA_INT64
    498     0  stevel #define	KSTAT_DATA_ULONG	KSTAT_DATA_UINT64
    499     0  stevel #else
    500     0  stevel #define	KSTAT_DATA_LONG		7	/* only visible to the kernel */
    501     0  stevel #define	KSTAT_DATA_ULONG	8	/* only visible to the kernel */
    502     0  stevel #endif	/* !_KERNEL */
    503     0  stevel #endif	/* !_LP64 */
    504     0  stevel 
    505     0  stevel /*
    506     0  stevel  * Statistics exporting named kstats with long strings (KSTAT_DATA_STRING)
    507     0  stevel  * may not make the assumption that ks_data_size is equal to (ks_ndata * sizeof
    508     0  stevel  * (kstat_named_t)).  ks_data_size in these cases is equal to the sum of the
    509     0  stevel  * amount of space required to store the strings (ie, the sum of
    510     0  stevel  * KSTAT_NAMED_STR_BUFLEN() for all KSTAT_DATA_STRING statistics) plus the
    511     0  stevel  * space required to store the kstat_named_t's.
    512     0  stevel  *
    513     0  stevel  * The default update routine will update ks_data_size automatically for
    514     0  stevel  * variable-length kstats containing long strings (using the default update
    515     0  stevel  * routine only makes sense if the string is the only thing that is changing
    516     0  stevel  * in size, and ks_ndata is constant).  Fixed-length kstats containing long
    517     0  stevel  * strings must explicitly change ks_data_size (after creation but before
    518     0  stevel  * initialization) to reflect the correct amount of space required for the
    519     0  stevel  * long strings and the kstat_named_t's.
    520     0  stevel  */
    521     0  stevel #define	KSTAT_DATA_STRING	9
    522     0  stevel 
    523     0  stevel /* These types are obsolete */
    524     0  stevel 
    525     0  stevel #define	KSTAT_DATA_LONGLONG	KSTAT_DATA_INT64
    526     0  stevel #define	KSTAT_DATA_ULONGLONG	KSTAT_DATA_UINT64
    527     0  stevel #define	KSTAT_DATA_FLOAT	5
    528     0  stevel #define	KSTAT_DATA_DOUBLE	6
    529     0  stevel 
    530     0  stevel #define	KSTAT_NAMED_PTR(kptr)	((kstat_named_t *)(kptr)->ks_data)
    531     0  stevel 
    532     0  stevel /*
    533     0  stevel  * Retrieve the pointer of the string contained in the given named kstat.
    534     0  stevel  */
    535   457     bmc #define	KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.str.addr.ptr)
    536     0  stevel 
    537     0  stevel /*
    538     0  stevel  * Retrieve the length of the buffer required to store the string in the given
    539     0  stevel  * named kstat.
    540     0  stevel  */
    541   457     bmc #define	KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.str.len)
    542     0  stevel 
    543     0  stevel /*
    544     0  stevel  * Interrupt statistics.
    545     0  stevel  *
    546     0  stevel  * An interrupt is a hard interrupt (sourced from the hardware device
    547     0  stevel  * itself), a soft interrupt (induced by the system via the use of
    548     0  stevel  * some system interrupt source), a watchdog interrupt (induced by
    549     0  stevel  * a periodic timer call), spurious (an interrupt entry point was
    550     0  stevel  * entered but there was no interrupt condition to service),
    551     0  stevel  * or multiple service (an interrupt condition was detected and
    552     0  stevel  * serviced just prior to returning from any of the other types).
    553     0  stevel  *
    554     0  stevel  * Measurement of the spurious class of interrupts is useful for
    555     0  stevel  * autovectored devices in order to pinpoint any interrupt latency
    556     0  stevel  * problems in a particular system configuration.
    557     0  stevel  *
    558     0  stevel  * Devices that have more than one interrupt of the same
    559     0  stevel  * type should use multiple structures.
    560     0  stevel  */
    561     0  stevel 
    562     0  stevel #define	KSTAT_INTR_HARD			0
    563     0  stevel #define	KSTAT_INTR_SOFT			1
    564     0  stevel #define	KSTAT_INTR_WATCHDOG		2
    565     0  stevel #define	KSTAT_INTR_SPURIOUS		3
    566     0  stevel #define	KSTAT_INTR_MULTSVC		4
    567     0  stevel 
    568     0  stevel #define	KSTAT_NUM_INTRS			5
    569     0  stevel 
    570     0  stevel typedef struct kstat_intr {
    571     0  stevel 	uint_t	intrs[KSTAT_NUM_INTRS];	/* interrupt counters */
    572     0  stevel } kstat_intr_t;
    573     0  stevel 
    574     0  stevel #define	KSTAT_INTR_PTR(kptr)	((kstat_intr_t *)(kptr)->ks_data)
    575     0  stevel 
    576     0  stevel /*
    577     0  stevel  * I/O statistics.
    578     0  stevel  */
    579     0  stevel 
    580     0  stevel typedef struct kstat_io {
    581     0  stevel 
    582     0  stevel 	/*
    583     0  stevel 	 * Basic counters.
    584     0  stevel 	 *
    585     0  stevel 	 * The counters should be updated at the end of service
    586     0  stevel 	 * (e.g., just prior to calling biodone()).
    587     0  stevel 	 */
    588     0  stevel 
    589     0  stevel 	u_longlong_t	nread;		/* number of bytes read */
    590     0  stevel 	u_longlong_t	nwritten;	/* number of bytes written */
    591     0  stevel 	uint_t		reads;		/* number of read operations */
    592     0  stevel 	uint_t		writes;		/* number of write operations */
    593     0  stevel 
    594     0  stevel 	/*
    595     0  stevel 	 * Accumulated time and queue length statistics.
    596     0  stevel 	 *
    597     0  stevel 	 * Accumulated time statistics are kept as a running sum
    598     0  stevel 	 * of "active" time.  Queue length statistics are kept as a
    599     0  stevel 	 * running sum of the product of queue length and elapsed time
    600     0  stevel 	 * at that length -- i.e., a Riemann sum for queue length
    601     0  stevel 	 * integrated against time.  (You can also think of the active time
    602     0  stevel 	 * as a Riemann sum, for the boolean function (queue_length > 0)
    603     0  stevel 	 * integrated against time, or you can think of it as the
    604     0  stevel 	 * Lebesgue measure of the set on which queue_length > 0.)
    605     0  stevel 	 *
    606     0  stevel 	 *		^
    607     0  stevel 	 *		|			_________
    608     0  stevel 	 *		8			| i4	|
    609     0  stevel 	 *		|			|	|
    610     0  stevel 	 *	Queue	6			|	|
    611     0  stevel 	 *	Length	|	_________	|	|
    612     0  stevel 	 *		4	| i2	|_______|	|
    613     0  stevel 	 *		|	|	    i3		|
    614     0  stevel 	 *		2_______|			|
    615     0  stevel 	 *		|    i1				|
    616     0  stevel 	 *		|_______________________________|
    617     0  stevel 	 *		Time->	t1	t2	t3	t4
    618     0  stevel 	 *
    619     0  stevel 	 * At each change of state (entry or exit from the queue),
    620     0  stevel 	 * we add the elapsed time (since the previous state change)
    621     0  stevel 	 * to the active time if the queue length was non-zero during
    622     0  stevel 	 * that interval; and we add the product of the elapsed time
    623     0  stevel 	 * times the queue length to the running length*time sum.
    624     0  stevel 	 *
    625     0  stevel 	 * This method is generalizable to measuring residency
    626     0  stevel 	 * in any defined system: instead of queue lengths, think
    627     0  stevel 	 * of "outstanding RPC calls to server X".
    628     0  stevel 	 *
    629     0  stevel 	 * A large number of I/O subsystems have at least two basic
    630     0  stevel 	 * "lists" of transactions they manage: one for transactions
    631     0  stevel 	 * that have been accepted for processing but for which processing
    632     0  stevel 	 * has yet to begin, and one for transactions which are actively
    633     0  stevel 	 * being processed (but not done). For this reason, two cumulative
    634     0  stevel 	 * time statistics are defined here: wait (pre-service) time,
    635     0  stevel 	 * and run (service) time.
    636     0  stevel 	 *
    637     0  stevel 	 * All times are 64-bit nanoseconds (hrtime_t), as returned by
    638     0  stevel 	 * gethrtime().
    639     0  stevel 	 *
    640     0  stevel 	 * The units of cumulative busy time are accumulated nanoseconds.
    641     0  stevel 	 * The units of cumulative length*time products are elapsed time
    642     0  stevel 	 * times queue length.
    643     0  stevel 	 *
    644     0  stevel 	 * Updates to the fields below are performed implicitly by calls to
    645     0  stevel 	 * these five functions:
    646     0  stevel 	 *
    647     0  stevel 	 *	kstat_waitq_enter()
    648     0  stevel 	 *	kstat_waitq_exit()
    649     0  stevel 	 *	kstat_runq_enter()
    650     0  stevel 	 *	kstat_runq_exit()
    651     0  stevel 	 *
    652     0  stevel 	 *	kstat_waitq_to_runq()		(see below)
    653     0  stevel 	 *	kstat_runq_back_to_waitq()	(see below)
    654     0  stevel 	 *
    655     0  stevel 	 * Since kstat_waitq_exit() is typically followed immediately
    656     0  stevel 	 * by kstat_runq_enter(), there is a single kstat_waitq_to_runq()
    657     0  stevel 	 * function which performs both operations.  This is a performance
    658     0  stevel 	 * win since only one timestamp is required.
    659     0  stevel 	 *
    660     0  stevel 	 * In some instances, it may be necessary to move a request from
    661     0  stevel 	 * the run queue back to the wait queue, e.g. for write throttling.
    662     0  stevel 	 * For these situations, call kstat_runq_back_to_waitq().
    663     0  stevel 	 *
    664     0  stevel 	 * These fields should never be updated by any other means.
    665     0  stevel 	 */
    666     0  stevel 
    667     0  stevel 	hrtime_t wtime;		/* cumulative wait (pre-service) time */
    668     0  stevel 	hrtime_t wlentime;	/* cumulative wait length*time product */
    669     0  stevel 	hrtime_t wlastupdate;	/* last time wait queue changed */
    670     0  stevel 	hrtime_t rtime;		/* cumulative run (service) time */
    671     0  stevel 	hrtime_t rlentime;	/* cumulative run length*time product */
    672     0  stevel 	hrtime_t rlastupdate;	/* last time run queue changed */
    673     0  stevel 
    674     0  stevel 	uint_t	wcnt;		/* count of elements in wait state */
    675     0  stevel 	uint_t	rcnt;		/* count of elements in run state */
    676     0  stevel 
    677     0  stevel } kstat_io_t;
    678     0  stevel 
    679     0  stevel #define	KSTAT_IO_PTR(kptr)	((kstat_io_t *)(kptr)->ks_data)
    680     0  stevel 
    681     0  stevel /*
    682     0  stevel  * Event timer statistics - cumulative elapsed time and number of events.
    683     0  stevel  *
    684     0  stevel  * Updates to these fields are performed implicitly by calls to
    685     0  stevel  * kstat_timer_start() and kstat_timer_stop().
    686     0  stevel  */
    687     0  stevel 
    688     0  stevel typedef struct kstat_timer {
    689     0  stevel 	char		name[KSTAT_STRLEN];	/* event name */
    690     0  stevel 	uchar_t		resv;			/* reserved */
    691     0  stevel 	u_longlong_t	num_events;		/* number of events */
    692     0  stevel 	hrtime_t	elapsed_time;		/* cumulative elapsed time */
    693     0  stevel 	hrtime_t	min_time;		/* shortest event duration */
    694     0  stevel 	hrtime_t	max_time;		/* longest event duration */
    695     0  stevel 	hrtime_t	start_time;		/* previous event start time */
    696     0  stevel 	hrtime_t	stop_time;		/* previous event stop time */
    697     0  stevel } kstat_timer_t;
    698     0  stevel 
    699     0  stevel #define	KSTAT_TIMER_PTR(kptr)	((kstat_timer_t *)(kptr)->ks_data)
    700     0  stevel 
    701     0  stevel #if	defined(_KERNEL)
    702     0  stevel 
    703     0  stevel #include <sys/t_lock.h>
    704     0  stevel 
    705     0  stevel extern kid_t	kstat_chain_id;		/* bumped at each state change */
    706     0  stevel extern void	kstat_init(void);	/* initialize kstat framework */
    707     0  stevel 
    708     0  stevel /*
    709     0  stevel  * Adding and deleting kstats.
    710     0  stevel  *
    711     0  stevel  * The typical sequence to add a kstat is:
    712     0  stevel  *
    713     0  stevel  *	ksp = kstat_create(module, instance, name, class, type, ndata, flags);
    714     0  stevel  *	if (ksp) {
    715     0  stevel  *		... provider initialization, if necessary
    716     0  stevel  *		kstat_install(ksp);
    717     0  stevel  *	}
    718     0  stevel  *
    719     0  stevel  * There are three logically distinct steps here:
    720     0  stevel  *
    721     0  stevel  * Step 1: System Initialization (kstat_create)
    722     0  stevel  *
    723     0  stevel  * kstat_create() performs system initialization.  kstat_create()
    724     0  stevel  * allocates memory for the entire kstat (header plus data), initializes
    725     0  stevel  * all header fields, initializes the data section to all zeroes, assigns
    726     0  stevel  * a unique KID, and puts the kstat onto the system's kstat chain.
    727     0  stevel  * The returned kstat is marked invalid (KSTAT_FLAG_INVALID is set),
    728     0  stevel  * because the provider (caller) has not yet had a chance to initialize
    729     0  stevel  * the data section.
    730     0  stevel  *
    731     0  stevel  * By default, kstats are exported to all zones on the system.  A kstat may be
    732     0  stevel  * created via kstat_create_zone() to specify a zone to which the statistics
    733     0  stevel  * should be exported.  kstat_zone_add() may be used to specify additional
    734     0  stevel  * zones to which the statistics are to be exported.
    735     0  stevel  *
    736     0  stevel  * Step 2: Provider Initialization
    737     0  stevel  *
    738     0  stevel  * The provider performs any necessary initialization of the data section,
    739     0  stevel  * e.g. setting the name fields in a KSTAT_TYPE_NAMED.  Virtual kstats set
    740     0  stevel  * the ks_data field at this time.  The provider may also set the ks_update,
    741     0  stevel  * ks_snapshot, ks_private, and ks_lock fields if necessary.
    742     0  stevel  *
    743     0  stevel  * Step 3: Installation (kstat_install)
    744     0  stevel  *
    745     0  stevel  * Once the kstat is completely initialized, kstat_install() clears the
    746     0  stevel  * INVALID flag, thus making the kstat accessible to the outside world.
    747     0  stevel  * kstat_install() also clears the DORMANT flag for persistent kstats.
    748     0  stevel  *
    749     0  stevel  * Removing a kstat from the system
    750     0  stevel  *
    751     0  stevel  * kstat_delete(ksp) removes ksp from the kstat chain and frees all
    752     0  stevel  * associated system resources.  NOTE: When you call kstat_delete(),
    753     0  stevel  * you must NOT be holding that kstat's ks_lock.  Otherwise, you may
    754     0  stevel  * deadlock with a kstat reader.
    755     0  stevel  *
    756     0  stevel  * Persistent kstats
    757     0  stevel  *
    758     0  stevel  * From the provider's point of view, persistence is transparent.  The only
    759     0  stevel  * difference between ephemeral (normal) kstats and persistent kstats
    760     0  stevel  * is that you pass KSTAT_FLAG_PERSISTENT to kstat_create().  Magically,
    761     0  stevel  * this has the effect of making your data visible even when you're
    762     0  stevel  * not home.  Persistence is important to tools like iostat, which want
    763     0  stevel  * to get a meaningful picture of disk activity.  Without persistence,
    764     0  stevel  * raw disk i/o statistics could never accumulate: they would come and
    765     0  stevel  * go with each open/close of the raw device.
    766     0  stevel  *
    767     0  stevel  * The magic of persistence works by slightly altering the behavior of
    768     0  stevel  * kstat_create() and kstat_delete().  The first call to kstat_create()
    769     0  stevel  * creates a new kstat, as usual.  However, kstat_delete() does not
    770     0  stevel  * actually delete the kstat: it performs one final update of the data
    771     0  stevel  * (i.e., calls the ks_update routine), marks the kstat as dormant, and
    772     0  stevel  * sets the ks_lock, ks_update, ks_private, and ks_snapshot fields back
    773     0  stevel  * to their default values (since they might otherwise point to garbage,
    774     0  stevel  * e.g. if the provider is going away).  kstat clients can still access
    775     0  stevel  * the dormant kstat just like a live kstat; they just continue to see
    776     0  stevel  * the final data values as long as the kstat remains dormant.
    777     0  stevel  * All subsequent kstat_create() calls simply find the already-existing,
    778     0  stevel  * dormant kstat and return a pointer to it, without altering any fields.
    779     0  stevel  * The provider then performs its usual initialization sequence, and
    780     0  stevel  * calls kstat_install().  kstat_install() uses the old data values to
    781     0  stevel  * initialize the native data (i.e., ks_update is called with KSTAT_WRITE),
    782     0  stevel  * thus making it seem like you were never gone.
    783     0  stevel  */
    784     0  stevel 
    785  2951   elowe extern kstat_t *kstat_create(const char *, int, const char *, const char *,
    786  2951   elowe     uchar_t, uint_t, uchar_t);
    787  2951   elowe extern kstat_t *kstat_create_zone(const char *, int, const char *,
    788  2951   elowe     const char *, uchar_t, uint_t, uchar_t, zoneid_t);
    789     0  stevel extern void kstat_install(kstat_t *);
    790     0  stevel extern void kstat_delete(kstat_t *);
    791     0  stevel extern void kstat_named_setstr(kstat_named_t *knp, const char *src);
    792  2951   elowe extern void kstat_set_string(char *, const char *);
    793  2951   elowe extern void kstat_delete_byname(const char *, int, const char *);
    794  2951   elowe extern void kstat_delete_byname_zone(const char *, int, const char *, zoneid_t);
    795  2951   elowe extern void kstat_named_init(kstat_named_t *, const char *, uchar_t);
    796  2951   elowe extern void kstat_timer_init(kstat_timer_t *, const char *);
    797     0  stevel extern void kstat_waitq_enter(kstat_io_t *);
    798     0  stevel extern void kstat_waitq_exit(kstat_io_t *);
    799     0  stevel extern void kstat_runq_enter(kstat_io_t *);
    800     0  stevel extern void kstat_runq_exit(kstat_io_t *);
    801     0  stevel extern void kstat_waitq_to_runq(kstat_io_t *);
    802     0  stevel extern void kstat_runq_back_to_waitq(kstat_io_t *);
    803     0  stevel extern void kstat_timer_start(kstat_timer_t *);
    804     0  stevel extern void kstat_timer_stop(kstat_timer_t *);
    805     0  stevel 
    806     0  stevel extern void kstat_zone_add(kstat_t *, zoneid_t);
    807     0  stevel extern void kstat_zone_remove(kstat_t *, zoneid_t);
    808     0  stevel extern int kstat_zone_find(kstat_t *, zoneid_t);
    809     0  stevel 
    810     0  stevel extern kstat_t *kstat_hold_bykid(kid_t kid, zoneid_t);
    811  2951   elowe extern kstat_t *kstat_hold_byname(const char *, int, const char *, zoneid_t);
    812     0  stevel extern void kstat_rele(kstat_t *);
    813     0  stevel 
    814     0  stevel #endif	/* defined(_KERNEL) */
    815     0  stevel 
    816     0  stevel #ifdef	__cplusplus
    817     0  stevel }
    818     0  stevel #endif
    819     0  stevel 
    820     0  stevel #endif	/* _SYS_KSTAT_H */
    821