1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 2951 elowe * Common Development and Distribution License (the "License"). 6 2951 elowe * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel */ 21 0 stevel /* 22 2951 elowe * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 0 stevel * Use is subject to license terms. 24 0 stevel */ 25 0 stevel 26 0 stevel #ifndef _SYS_KSTAT_H 27 0 stevel #define _SYS_KSTAT_H 28 0 stevel 29 0 stevel #pragma ident "%Z%%M% %I% %E% SMI" 30 0 stevel 31 0 stevel /* 32 0 stevel * Definition of general kernel statistics structures and /dev/kstat ioctls 33 0 stevel */ 34 0 stevel 35 0 stevel #include <sys/types.h> 36 0 stevel #include <sys/time.h> 37 0 stevel 38 0 stevel #ifdef __cplusplus 39 0 stevel extern "C" { 40 0 stevel #endif 41 0 stevel 42 0 stevel typedef int kid_t; /* unique kstat id */ 43 0 stevel 44 0 stevel /* 45 0 stevel * Kernel statistics driver (/dev/kstat) ioctls 46 0 stevel */ 47 0 stevel 48 0 stevel #define KSTAT_IOC_BASE ('K' << 8) 49 0 stevel 50 0 stevel #define KSTAT_IOC_CHAIN_ID KSTAT_IOC_BASE | 0x01 51 0 stevel #define KSTAT_IOC_READ KSTAT_IOC_BASE | 0x02 52 0 stevel #define KSTAT_IOC_WRITE KSTAT_IOC_BASE | 0x03 53 0 stevel 54 0 stevel /* 55 0 stevel * /dev/kstat ioctl usage (kd denotes /dev/kstat descriptor): 56 0 stevel * 57 0 stevel * kcid = ioctl(kd, KSTAT_IOC_CHAIN_ID, NULL); 58 0 stevel * kcid = ioctl(kd, KSTAT_IOC_READ, kstat_t *); 59 0 stevel * kcid = ioctl(kd, KSTAT_IOC_WRITE, kstat_t *); 60 0 stevel */ 61 0 stevel 62 0 stevel #define KSTAT_STRLEN 31 /* 30 chars + NULL; must be 16 * n - 1 */ 63 0 stevel 64 0 stevel /* 65 0 stevel * The generic kstat header 66 0 stevel */ 67 0 stevel 68 0 stevel typedef struct kstat { 69 0 stevel /* 70 0 stevel * Fields relevant to both kernel and user 71 0 stevel */ 72 0 stevel hrtime_t ks_crtime; /* creation time (from gethrtime()) */ 73 0 stevel struct kstat *ks_next; /* kstat chain linkage */ 74 0 stevel kid_t ks_kid; /* unique kstat ID */ 75 0 stevel char ks_module[KSTAT_STRLEN]; /* provider module name */ 76 0 stevel uchar_t ks_resv; /* reserved, currently just padding */ 77 0 stevel int ks_instance; /* provider module's instance */ 78 0 stevel char ks_name[KSTAT_STRLEN]; /* kstat name */ 79 0 stevel uchar_t ks_type; /* kstat data type */ 80 0 stevel char ks_class[KSTAT_STRLEN]; /* kstat class */ 81 0 stevel uchar_t ks_flags; /* kstat flags */ 82 0 stevel void *ks_data; /* kstat type-specific data */ 83 0 stevel uint_t ks_ndata; /* # of type-specific data records */ 84 0 stevel size_t ks_data_size; /* total size of kstat data section */ 85 0 stevel hrtime_t ks_snaptime; /* time of last data shapshot */ 86 0 stevel /* 87 0 stevel * Fields relevant to kernel only 88 0 stevel */ 89 0 stevel int (*ks_update)(struct kstat *, int); /* dynamic update */ 90 0 stevel void *ks_private; /* arbitrary provider-private data */ 91 0 stevel int (*ks_snapshot)(struct kstat *, void *, int); 92 0 stevel void *ks_lock; /* protects this kstat's data */ 93 0 stevel } kstat_t; 94 0 stevel 95 0 stevel #ifdef _SYSCALL32 96 0 stevel 97 0 stevel typedef int32_t kid32_t; 98 0 stevel 99 0 stevel typedef struct kstat32 { 100 0 stevel /* 101 0 stevel * Fields relevant to both kernel and user 102 0 stevel */ 103 0 stevel hrtime_t ks_crtime; 104 0 stevel caddr32_t ks_next; /* struct kstat pointer */ 105 0 stevel kid32_t ks_kid; 106 0 stevel char ks_module[KSTAT_STRLEN]; 107 0 stevel uint8_t ks_resv; 108 0 stevel int32_t ks_instance; 109 0 stevel char ks_name[KSTAT_STRLEN]; 110 0 stevel uint8_t ks_type; 111 0 stevel char ks_class[KSTAT_STRLEN]; 112 0 stevel uint8_t ks_flags; 113 0 stevel caddr32_t ks_data; /* type-specific data */ 114 0 stevel uint32_t ks_ndata; 115 0 stevel size32_t ks_data_size; 116 0 stevel hrtime_t ks_snaptime; 117 0 stevel /* 118 0 stevel * Fields relevant to kernel only (only needed here for padding) 119 0 stevel */ 120 0 stevel int32_t _ks_update; 121 0 stevel caddr32_t _ks_private; 122 0 stevel int32_t _ks_snapshot; 123 0 stevel caddr32_t _ks_lock; 124 0 stevel } kstat32_t; 125 0 stevel 126 0 stevel #endif /* _SYSCALL32 */ 127 0 stevel 128 0 stevel /* 129 0 stevel * kstat structure and locking strategy 130 0 stevel * 131 0 stevel * Each kstat consists of a header section (a kstat_t) and a data section. 132 0 stevel * The system maintains a set of kstats, protected by kstat_chain_lock. 133 0 stevel * kstat_chain_lock protects all additions to/deletions from this set, 134 0 stevel * as well as all changes to kstat headers. kstat data sections are 135 0 stevel * *optionally* protected by the per-kstat ks_lock. If ks_lock is non-NULL, 136 0 stevel * kstat clients (e.g. /dev/kstat) will acquire this lock for all of their 137 0 stevel * operations on that kstat. It is up to the kstat provider to decide whether 138 0 stevel * guaranteeing consistent data to kstat clients is sufficiently important 139 0 stevel * to justify the locking cost. Note, however, that most statistic updates 140 0 stevel * already occur under one of the provider's mutexes, so if the provider sets 141 0 stevel * ks_lock to point to that mutex, then kstat data locking is free. 142 0 stevel * 143 0 stevel * NOTE: variable-size kstats MUST employ kstat data locking, to prevent 144 0 stevel * data-size races with kstat clients. 145 0 stevel * 146 0 stevel * NOTE: ks_lock is really of type (kmutex_t *); it is declared as (void *) 147 0 stevel * in the kstat header so that users don't have to be exposed to all of the 148 0 stevel * kernel's lock-related data structures. 149 0 stevel */ 150 0 stevel 151 0 stevel #if defined(_KERNEL) 152 0 stevel 153 0 stevel #define KSTAT_ENTER(k) \ 154 0 stevel { kmutex_t *lp = (k)->ks_lock; if (lp) mutex_enter(lp); } 155 0 stevel 156 0 stevel #define KSTAT_EXIT(k) \ 157 0 stevel { kmutex_t *lp = (k)->ks_lock; if (lp) mutex_exit(lp); } 158 0 stevel 159 0 stevel #define KSTAT_UPDATE(k, rw) (*(k)->ks_update)((k), (rw)) 160 0 stevel 161 0 stevel #define KSTAT_SNAPSHOT(k, buf, rw) (*(k)->ks_snapshot)((k), (buf), (rw)) 162 0 stevel 163 0 stevel #endif /* defined(_KERNEL) */ 164 0 stevel 165 0 stevel /* 166 0 stevel * kstat time 167 0 stevel * 168 0 stevel * All times associated with kstats (e.g. creation time, snapshot time, 169 0 stevel * kstat_timer_t and kstat_io_t timestamps, etc.) are 64-bit nanosecond values, 170 0 stevel * as returned by gethrtime(). The accuracy of these timestamps is machine 171 0 stevel * dependent, but the precision (units) is the same across all platforms. 172 0 stevel */ 173 0 stevel 174 0 stevel /* 175 0 stevel * kstat identity (KID) 176 0 stevel * 177 0 stevel * Each kstat is assigned a unique KID (kstat ID) when it is added to the 178 0 stevel * global kstat chain. The KID is used as a cookie by /dev/kstat to 179 0 stevel * request information about the corresponding kstat. There is also 180 0 stevel * an identity associated with the entire kstat chain, kstat_chain_id, 181 0 stevel * which is bumped each time a kstat is added or deleted. /dev/kstat uses 182 0 stevel * the chain ID to detect changes in the kstat chain (e.g., a new disk 183 0 stevel * coming online) between ioctl()s. 184 0 stevel */ 185 0 stevel 186 0 stevel /* 187 0 stevel * kstat module, kstat instance 188 0 stevel * 189 0 stevel * ks_module and ks_instance contain the name and instance of the module 190 0 stevel * that created the kstat. In cases where there can only be one instance, 191 0 stevel * ks_instance is 0. The kernel proper (/kernel/unix) uses "unix" as its 192 0 stevel * module name. 193 0 stevel */ 194 0 stevel 195 0 stevel /* 196 0 stevel * kstat name 197 0 stevel * 198 0 stevel * ks_name gives a meaningful name to a kstat. The full kstat namespace 199 0 stevel * is module.instance.name, so the name only need be unique within a 200 0 stevel * module. kstat_create() will fail if you try to create a kstat with 201 0 stevel * an already-used (ks_module, ks_instance, ks_name) triplet. Spaces are 202 0 stevel * allowed in kstat names, but strongly discouraged, since they hinder 203 0 stevel * awk-style processing at user level. 204 0 stevel */ 205 0 stevel 206 0 stevel /* 207 0 stevel * kstat type 208 0 stevel * 209 0 stevel * The kstat mechanism provides several flavors of kstat data, defined 210 0 stevel * below. The "raw" kstat type is just treated as an array of bytes; you 211 0 stevel * can use this to export any kind of data you want. 212 0 stevel * 213 0 stevel * Some kstat types allow multiple data structures per kstat, e.g. 214 0 stevel * KSTAT_TYPE_NAMED; others do not. This is part of the spec for each 215 0 stevel * kstat data type. 216 0 stevel * 217 0 stevel * User-level tools should *not* rely on the #define KSTAT_NUM_TYPES. To 218 0 stevel * get this information, read out the standard system kstat "kstat_types". 219 0 stevel */ 220 0 stevel 221 0 stevel #define KSTAT_TYPE_RAW 0 /* can be anything */ 222 0 stevel /* ks_ndata >= 1 */ 223 0 stevel #define KSTAT_TYPE_NAMED 1 /* name/value pair */ 224 0 stevel /* ks_ndata >= 1 */ 225 0 stevel #define KSTAT_TYPE_INTR 2 /* interrupt statistics */ 226 0 stevel /* ks_ndata == 1 */ 227 0 stevel #define KSTAT_TYPE_IO 3 /* I/O statistics */ 228 0 stevel /* ks_ndata == 1 */ 229 0 stevel #define KSTAT_TYPE_TIMER 4 /* event timer */ 230 0 stevel /* ks_ndata >= 1 */ 231 0 stevel 232 0 stevel #define KSTAT_NUM_TYPES 5 233 0 stevel 234 0 stevel /* 235 0 stevel * kstat class 236 0 stevel * 237 0 stevel * Each kstat can be characterized as belonging to some broad class 238 0 stevel * of statistics, e.g. disk, tape, net, vm, streams, etc. This field 239 0 stevel * can be used as a filter to extract related kstats. The following 240 0 stevel * values are currently in use: disk, tape, net, controller, vm, kvm, 241 0 stevel * hat, streams, kstat, and misc. (The kstat class encompasses things 242 0 stevel * like kstat_types.) 243 0 stevel */ 244 0 stevel 245 0 stevel /* 246 0 stevel * kstat flags 247 0 stevel * 248 0 stevel * Any of the following flags may be passed to kstat_create(). They are 249 0 stevel * all zero by default. 250 0 stevel * 251 0 stevel * KSTAT_FLAG_VIRTUAL: 252 0 stevel * 253 0 stevel * Tells kstat_create() not to allocate memory for the 254 0 stevel * kstat data section; instead, you will set the ks_data 255 0 stevel * field to point to the data you wish to export. This 256 0 stevel * provides a convenient way to export existing data 257 0 stevel * structures. 258 0 stevel * 259 0 stevel * KSTAT_FLAG_VAR_SIZE: 260 0 stevel * 261 0 stevel * The size of the kstat you are creating will vary over time. 262 0 stevel * For example, you may want to use the kstat mechanism to 263 0 stevel * export a linked list. NOTE: The kstat framework does not 264 0 stevel * manage the data section, so all variable-size kstats must be 265 0 stevel * virtual kstats. Moreover, variable-size kstats MUST employ 266 0 stevel * kstat data locking to prevent data-size races with kstat 267 0 stevel * clients. See the section on "kstat snapshot" for details. 268 0 stevel * 269 0 stevel * KSTAT_FLAG_WRITABLE: 270 0 stevel * 271 0 stevel * Makes the kstat's data section writable by root. 272 0 stevel * The ks_snapshot routine (see below) does not need to check for 273 0 stevel * this; permission checking is handled in the kstat driver. 274 0 stevel * 275 0 stevel * KSTAT_FLAG_PERSISTENT: 276 0 stevel * 277 0 stevel * Indicates that this kstat is to be persistent over time. 278 0 stevel * For persistent kstats, kstat_delete() simply marks the 279 0 stevel * kstat as dormant; a subsequent kstat_create() reactivates 280 0 stevel * the kstat. This feature is provided so that statistics 281 0 stevel * are not lost across driver close/open (e.g., raw disk I/O 282 0 stevel * on a disk with no mounted partitions.) 283 0 stevel * NOTE: Persistent kstats cannot be virtual, since ks_data 284 0 stevel * points to garbage as soon as the driver goes away. 285 0 stevel * 286 0 stevel * The following flags are maintained by the kstat framework: 287 0 stevel * 288 0 stevel * KSTAT_FLAG_DORMANT: 289 0 stevel * 290 0 stevel * For persistent kstats, indicates that the kstat is in the 291 0 stevel * dormant state (e.g., the corresponding device is closed). 292 0 stevel * 293 0 stevel * KSTAT_FLAG_INVALID: 294 0 stevel * 295 0 stevel * This flag is set when a kstat is in a transitional state, 296 0 stevel * e.g. between kstat_create() and kstat_install(). 297 0 stevel * kstat clients must not attempt to access the kstat's data 298 0 stevel * if this flag is set. 299 0 stevel */ 300 0 stevel 301 0 stevel #define KSTAT_FLAG_VIRTUAL 0x01 302 0 stevel #define KSTAT_FLAG_VAR_SIZE 0x02 303 0 stevel #define KSTAT_FLAG_WRITABLE 0x04 304 0 stevel #define KSTAT_FLAG_PERSISTENT 0x08 305 0 stevel #define KSTAT_FLAG_DORMANT 0x10 306 0 stevel #define KSTAT_FLAG_INVALID 0x20 307 0 stevel 308 0 stevel /* 309 0 stevel * Dynamic update support 310 0 stevel * 311 0 stevel * The kstat mechanism allows for an optional ks_update function to update 312 0 stevel * kstat data. This is useful for drivers where the underlying device 313 0 stevel * keeps cheap hardware stats, but extraction is expensive. Instead of 314 0 stevel * constantly keeping the kstat data section up to date, you can supply a 315 0 stevel * ks_update function which updates the kstat's data section on demand. 316 0 stevel * To take advantage of this feature, simply set the ks_update field before 317 0 stevel * calling kstat_install(). 318 0 stevel * 319 0 stevel * The ks_update function, if supplied, must have the following structure: 320 0 stevel * 321 0 stevel * int 322 0 stevel * foo_kstat_update(kstat_t *ksp, int rw) 323 0 stevel * { 324 0 stevel * if (rw == KSTAT_WRITE) { 325 0 stevel * ... update the native stats from ksp->ks_data; 326 0 stevel * return EACCES if you don't support this 327 0 stevel * } else { 328 0 stevel * ... update ksp->ks_data from the native stats 329 0 stevel * } 330 0 stevel * } 331 0 stevel * 332 0 stevel * The ks_update return codes are: 0 for success, EACCES if you don't allow 333 0 stevel * KSTAT_WRITE, and EIO for any other type of error. 334 0 stevel * 335 0 stevel * In general, the ks_update function may need to refer to provider-private 336 0 stevel * data; for example, it may need a pointer to the provider's raw statistics. 337 0 stevel * The ks_private field is available for this purpose. Its use is entirely 338 0 stevel * at the provider's discretion. 339 0 stevel * 340 0 stevel * All variable-size kstats MUST supply a ks_update routine, which computes 341 0 stevel * and sets ks_data_size (and ks_ndata if that is meaningful), since these 342 0 stevel * are needed to perform kstat snapshots (see below). 343 0 stevel * 344 0 stevel * No kstat locking should be done inside the ks_update routine. The caller 345 0 stevel * will already be holding the kstat's ks_lock (to ensure consistent data). 346 0 stevel */ 347 0 stevel 348 0 stevel #define KSTAT_READ 0 349 0 stevel #define KSTAT_WRITE 1 350 0 stevel 351 0 stevel /* 352 0 stevel * Kstat snapshot 353 0 stevel * 354 0 stevel * In order to get a consistent view of a kstat's data, clients must obey 355 0 stevel * the kstat's locking strategy. However, these clients may need to perform 356 0 stevel * operations on the data which could cause a fault (e.g. copyout()), or 357 0 stevel * operations which are simply expensive. Doing so could cause deadlock 358 0 stevel * (e.g. if you're holding a disk's kstat lock which is ultimately required 359 0 stevel * to resolve a copyout() fault), performance degradation (since the providers' 360 0 stevel * activity is serialized at the kstat lock), device timing problems, etc. 361 0 stevel * 362 0 stevel * To avoid these problems, kstat data is provided via snapshots. Taking 363 0 stevel * a snapshot is a simple process: allocate a wired-down kernel buffer, 364 0 stevel * acquire the kstat's data lock, copy the data into the buffer ("take the 365 0 stevel * snapshot"), and release the lock. This ensures that the kstat's data lock 366 0 stevel * will be held as briefly as possible, and that no faults will occur while 367 0 stevel * the lock is held. 368 0 stevel * 369 0 stevel * Normally, the snapshot is taken by default_kstat_snapshot(), which 370 0 stevel * timestamps the data (sets ks_snaptime), copies it, and does a little 371 0 stevel * massaging to deal with incomplete transactions on i/o kstats. However, 372 0 stevel * this routine only works for kstats with contiguous data (the typical case). 373 0 stevel * If you create a kstat whose data is, say, a linked list, you must provide 374 0 stevel * your own ks_snapshot routine. The routine you supply must have the 375 0 stevel * following prototype (replace "foo" with something appropriate): 376 0 stevel * 377 0 stevel * int foo_kstat_snapshot(kstat_t *ksp, void *buf, int rw); 378 0 stevel * 379 0 stevel * The minimal snapshot routine -- one which copies contiguous data that 380 0 stevel * doesn't need any massaging -- would be this: 381 0 stevel * 382 0 stevel * ksp->ks_snaptime = gethrtime(); 383 0 stevel * if (rw == KSTAT_WRITE) 384 0 stevel * bcopy(buf, ksp->ks_data, ksp->ks_data_size); 385 0 stevel * else 386 0 stevel * bcopy(ksp->ks_data, buf, ksp->ks_data_size); 387 0 stevel * return (0); 388 0 stevel * 389 0 stevel * A more illuminating example is taking a snapshot of a linked list: 390 0 stevel * 391 0 stevel * ksp->ks_snaptime = gethrtime(); 392 0 stevel * if (rw == KSTAT_WRITE) 393 0 stevel * return (EACCES); ... See below ... 394 0 stevel * for (foo = first_foo; foo; foo = foo->next) { 395 0 stevel * bcopy((char *) foo, (char *) buf, sizeof (struct foo)); 396 0 stevel * buf = ((struct foo *) buf) + 1; 397 0 stevel * } 398 0 stevel * return (0); 399 0 stevel * 400 0 stevel * In the example above, we have decided that we don't want to allow 401 0 stevel * KSTAT_WRITE access, so we return EACCES if this is attempted. 402 0 stevel * 403 0 stevel * The key points are: 404 0 stevel * 405 0 stevel * (1) ks_snaptime must be set (via gethrtime()) to timestamp the data. 406 0 stevel * (2) Data gets copied from the kstat to the buffer on KSTAT_READ, 407 0 stevel * and from the buffer to the kstat on KSTAT_WRITE. 408 0 stevel * (3) ks_snapshot return values are: 0 for success, EACCES if you 409 0 stevel * don't allow KSTAT_WRITE, and EIO for any other type of error. 410 0 stevel * 411 0 stevel * Named kstats (see section on "Named statistics" below) containing long 412 0 stevel * strings (KSTAT_DATA_STRING) need special handling. The kstat driver 413 0 stevel * assumes that all strings are copied into the buffer after the array of 414 0 stevel * named kstats, and the pointers (KSTAT_NAMED_STR_PTR()) are updated to point 415 0 stevel * into the copy within the buffer. The default snapshot routine does this, 416 0 stevel * but overriding routines should contain at least the following: 417 0 stevel * 418 0 stevel * if (rw == KSTAT_READ) { 419 0 stevel * kstat_named_t *knp = buf; 420 0 stevel * char *end = knp + ksp->ks_ndata; 421 0 stevel * uint_t i; 422 0 stevel * 423 0 stevel * ... Do the regular copy ... 424 0 stevel * bcopy(ksp->ks_data, buf, sizeof (kstat_named_t) * ksp->ks_ndata); 425 0 stevel * 426 0 stevel * for (i = 0; i < ksp->ks_ndata; i++, knp++) { 427 0 stevel * if (knp[i].data_type == KSTAT_DATA_STRING && 428 0 stevel * KSTAT_NAMED_STR_PTR(knp) != NULL) { 429 0 stevel * bcopy(KSTAT_NAMED_STR_PTR(knp), end, 430 0 stevel * KSTAT_NAMED_STR_BUFLEN(knp)); 431 0 stevel * KSTAT_NAMED_STR_PTR(knp) = end; 432 0 stevel * end += KSTAT_NAMED_STR_BUFLEN(knp); 433 0 stevel * } 434 0 stevel * } 435 0 stevel */ 436 0 stevel 437 0 stevel /* 438 0 stevel * Named statistics. 439 0 stevel * 440 0 stevel * List of arbitrary name=value statistics. 441 0 stevel */ 442 0 stevel 443 0 stevel typedef struct kstat_named { 444 0 stevel char name[KSTAT_STRLEN]; /* name of counter */ 445 0 stevel uchar_t data_type; /* data type */ 446 0 stevel union { 447 0 stevel char c[16]; /* enough for 128-bit ints */ 448 0 stevel int32_t i32; 449 0 stevel uint32_t ui32; 450 0 stevel struct { 451 0 stevel union { 452 0 stevel char *ptr; /* NULL-term string */ 453 0 stevel #if defined(_KERNEL) && defined(_MULTI_DATAMODEL) 454 0 stevel caddr32_t ptr32; 455 0 stevel #endif 456 0 stevel char __pad[8]; /* 64-bit padding */ 457 0 stevel } addr; 458 0 stevel uint32_t len; /* # bytes for strlen + '\0' */ 459 457 bmc } str; 460 0 stevel /* 461 0 stevel * The int64_t and uint64_t types are not valid for a maximally conformant 462 0 stevel * 32-bit compilation environment (cc -Xc) using compilers prior to the 463 0 stevel * introduction of C99 conforming compiler (reference ISO/IEC 9899:1990). 464 0 stevel * In these cases, the visibility of i64 and ui64 is only permitted for 465 0 stevel * 64-bit compilation environments or 32-bit non-maximally conformant 466 0 stevel * C89 or C90 ANSI C compilation environments (cc -Xt and cc -Xa). In the 467 0 stevel * C99 ANSI C compilation environment, the long long type is supported. 468 0 stevel * The _INT64_TYPE is defined by the implementation (see sys/int_types.h). 469 0 stevel */ 470 0 stevel #if defined(_INT64_TYPE) 471 0 stevel int64_t i64; 472 0 stevel uint64_t ui64; 473 0 stevel #endif 474 0 stevel long l; 475 0 stevel ulong_t ul; 476 0 stevel 477 0 stevel /* These structure members are obsolete */ 478 0 stevel 479 0 stevel longlong_t ll; 480 0 stevel u_longlong_t ull; 481 0 stevel float f; 482 0 stevel double d; 483 0 stevel } value; /* value of counter */ 484 0 stevel } kstat_named_t; 485 0 stevel 486 0 stevel #define KSTAT_DATA_CHAR 0 487 0 stevel #define KSTAT_DATA_INT32 1 488 0 stevel #define KSTAT_DATA_UINT32 2 489 0 stevel #define KSTAT_DATA_INT64 3 490 0 stevel #define KSTAT_DATA_UINT64 4 491 0 stevel 492 0 stevel #if !defined(_LP64) 493 0 stevel #define KSTAT_DATA_LONG KSTAT_DATA_INT32 494 0 stevel #define KSTAT_DATA_ULONG KSTAT_DATA_UINT32 495 0 stevel #else 496 0 stevel #if !defined(_KERNEL) 497 0 stevel #define KSTAT_DATA_LONG KSTAT_DATA_INT64 498 0 stevel #define KSTAT_DATA_ULONG KSTAT_DATA_UINT64 499 0 stevel #else 500 0 stevel #define KSTAT_DATA_LONG 7 /* only visible to the kernel */ 501 0 stevel #define KSTAT_DATA_ULONG 8 /* only visible to the kernel */ 502 0 stevel #endif /* !_KERNEL */ 503 0 stevel #endif /* !_LP64 */ 504 0 stevel 505 0 stevel /* 506 0 stevel * Statistics exporting named kstats with long strings (KSTAT_DATA_STRING) 507 0 stevel * may not make the assumption that ks_data_size is equal to (ks_ndata * sizeof 508 0 stevel * (kstat_named_t)). ks_data_size in these cases is equal to the sum of the 509 0 stevel * amount of space required to store the strings (ie, the sum of 510 0 stevel * KSTAT_NAMED_STR_BUFLEN() for all KSTAT_DATA_STRING statistics) plus the 511 0 stevel * space required to store the kstat_named_t's. 512 0 stevel * 513 0 stevel * The default update routine will update ks_data_size automatically for 514 0 stevel * variable-length kstats containing long strings (using the default update 515 0 stevel * routine only makes sense if the string is the only thing that is changing 516 0 stevel * in size, and ks_ndata is constant). Fixed-length kstats containing long 517 0 stevel * strings must explicitly change ks_data_size (after creation but before 518 0 stevel * initialization) to reflect the correct amount of space required for the 519 0 stevel * long strings and the kstat_named_t's. 520 0 stevel */ 521 0 stevel #define KSTAT_DATA_STRING 9 522 0 stevel 523 0 stevel /* These types are obsolete */ 524 0 stevel 525 0 stevel #define KSTAT_DATA_LONGLONG KSTAT_DATA_INT64 526 0 stevel #define KSTAT_DATA_ULONGLONG KSTAT_DATA_UINT64 527 0 stevel #define KSTAT_DATA_FLOAT 5 528 0 stevel #define KSTAT_DATA_DOUBLE 6 529 0 stevel 530 0 stevel #define KSTAT_NAMED_PTR(kptr) ((kstat_named_t *)(kptr)->ks_data) 531 0 stevel 532 0 stevel /* 533 0 stevel * Retrieve the pointer of the string contained in the given named kstat. 534 0 stevel */ 535 457 bmc #define KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.str.addr.ptr) 536 0 stevel 537 0 stevel /* 538 0 stevel * Retrieve the length of the buffer required to store the string in the given 539 0 stevel * named kstat. 540 0 stevel */ 541 457 bmc #define KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.str.len) 542 0 stevel 543 0 stevel /* 544 0 stevel * Interrupt statistics. 545 0 stevel * 546 0 stevel * An interrupt is a hard interrupt (sourced from the hardware device 547 0 stevel * itself), a soft interrupt (induced by the system via the use of 548 0 stevel * some system interrupt source), a watchdog interrupt (induced by 549 0 stevel * a periodic timer call), spurious (an interrupt entry point was 550 0 stevel * entered but there was no interrupt condition to service), 551 0 stevel * or multiple service (an interrupt condition was detected and 552 0 stevel * serviced just prior to returning from any of the other types). 553 0 stevel * 554 0 stevel * Measurement of the spurious class of interrupts is useful for 555 0 stevel * autovectored devices in order to pinpoint any interrupt latency 556 0 stevel * problems in a particular system configuration. 557 0 stevel * 558 0 stevel * Devices that have more than one interrupt of the same 559 0 stevel * type should use multiple structures. 560 0 stevel */ 561 0 stevel 562 0 stevel #define KSTAT_INTR_HARD 0 563 0 stevel #define KSTAT_INTR_SOFT 1 564 0 stevel #define KSTAT_INTR_WATCHDOG 2 565 0 stevel #define KSTAT_INTR_SPURIOUS 3 566 0 stevel #define KSTAT_INTR_MULTSVC 4 567 0 stevel 568 0 stevel #define KSTAT_NUM_INTRS 5 569 0 stevel 570 0 stevel typedef struct kstat_intr { 571 0 stevel uint_t intrs[KSTAT_NUM_INTRS]; /* interrupt counters */ 572 0 stevel } kstat_intr_t; 573 0 stevel 574 0 stevel #define KSTAT_INTR_PTR(kptr) ((kstat_intr_t *)(kptr)->ks_data) 575 0 stevel 576 0 stevel /* 577 0 stevel * I/O statistics. 578 0 stevel */ 579 0 stevel 580 0 stevel typedef struct kstat_io { 581 0 stevel 582 0 stevel /* 583 0 stevel * Basic counters. 584 0 stevel * 585 0 stevel * The counters should be updated at the end of service 586 0 stevel * (e.g., just prior to calling biodone()). 587 0 stevel */ 588 0 stevel 589 0 stevel u_longlong_t nread; /* number of bytes read */ 590 0 stevel u_longlong_t nwritten; /* number of bytes written */ 591 0 stevel uint_t reads; /* number of read operations */ 592 0 stevel uint_t writes; /* number of write operations */ 593 0 stevel 594 0 stevel /* 595 0 stevel * Accumulated time and queue length statistics. 596 0 stevel * 597 0 stevel * Accumulated time statistics are kept as a running sum 598 0 stevel * of "active" time. Queue length statistics are kept as a 599 0 stevel * running sum of the product of queue length and elapsed time 600 0 stevel * at that length -- i.e., a Riemann sum for queue length 601 0 stevel * integrated against time. (You can also think of the active time 602 0 stevel * as a Riemann sum, for the boolean function (queue_length > 0) 603 0 stevel * integrated against time, or you can think of it as the 604 0 stevel * Lebesgue measure of the set on which queue_length > 0.) 605 0 stevel * 606 0 stevel * ^ 607 0 stevel * | _________ 608 0 stevel * 8 | i4 | 609 0 stevel * | | | 610 0 stevel * Queue 6 | | 611 0 stevel * Length | _________ | | 612 0 stevel * 4 | i2 |_______| | 613 0 stevel * | | i3 | 614 0 stevel * 2_______| | 615 0 stevel * | i1 | 616 0 stevel * |_______________________________| 617 0 stevel * Time-> t1 t2 t3 t4 618 0 stevel * 619 0 stevel * At each change of state (entry or exit from the queue), 620 0 stevel * we add the elapsed time (since the previous state change) 621 0 stevel * to the active time if the queue length was non-zero during 622 0 stevel * that interval; and we add the product of the elapsed time 623 0 stevel * times the queue length to the running length*time sum. 624 0 stevel * 625 0 stevel * This method is generalizable to measuring residency 626 0 stevel * in any defined system: instead of queue lengths, think 627 0 stevel * of "outstanding RPC calls to server X". 628 0 stevel * 629 0 stevel * A large number of I/O subsystems have at least two basic 630 0 stevel * "lists" of transactions they manage: one for transactions 631 0 stevel * that have been accepted for processing but for which processing 632 0 stevel * has yet to begin, and one for transactions which are actively 633 0 stevel * being processed (but not done). For this reason, two cumulative 634 0 stevel * time statistics are defined here: wait (pre-service) time, 635 0 stevel * and run (service) time. 636 0 stevel * 637 0 stevel * All times are 64-bit nanoseconds (hrtime_t), as returned by 638 0 stevel * gethrtime(). 639 0 stevel * 640 0 stevel * The units of cumulative busy time are accumulated nanoseconds. 641 0 stevel * The units of cumulative length*time products are elapsed time 642 0 stevel * times queue length. 643 0 stevel * 644 0 stevel * Updates to the fields below are performed implicitly by calls to 645 0 stevel * these five functions: 646 0 stevel * 647 0 stevel * kstat_waitq_enter() 648 0 stevel * kstat_waitq_exit() 649 0 stevel * kstat_runq_enter() 650 0 stevel * kstat_runq_exit() 651 0 stevel * 652 0 stevel * kstat_waitq_to_runq() (see below) 653 0 stevel * kstat_runq_back_to_waitq() (see below) 654 0 stevel * 655 0 stevel * Since kstat_waitq_exit() is typically followed immediately 656 0 stevel * by kstat_runq_enter(), there is a single kstat_waitq_to_runq() 657 0 stevel * function which performs both operations. This is a performance 658 0 stevel * win since only one timestamp is required. 659 0 stevel * 660 0 stevel * In some instances, it may be necessary to move a request from 661 0 stevel * the run queue back to the wait queue, e.g. for write throttling. 662 0 stevel * For these situations, call kstat_runq_back_to_waitq(). 663 0 stevel * 664 0 stevel * These fields should never be updated by any other means. 665 0 stevel */ 666 0 stevel 667 0 stevel hrtime_t wtime; /* cumulative wait (pre-service) time */ 668 0 stevel hrtime_t wlentime; /* cumulative wait length*time product */ 669 0 stevel hrtime_t wlastupdate; /* last time wait queue changed */ 670 0 stevel hrtime_t rtime; /* cumulative run (service) time */ 671 0 stevel hrtime_t rlentime; /* cumulative run length*time product */ 672 0 stevel hrtime_t rlastupdate; /* last time run queue changed */ 673 0 stevel 674 0 stevel uint_t wcnt; /* count of elements in wait state */ 675 0 stevel uint_t rcnt; /* count of elements in run state */ 676 0 stevel 677 0 stevel } kstat_io_t; 678 0 stevel 679 0 stevel #define KSTAT_IO_PTR(kptr) ((kstat_io_t *)(kptr)->ks_data) 680 0 stevel 681 0 stevel /* 682 0 stevel * Event timer statistics - cumulative elapsed time and number of events. 683 0 stevel * 684 0 stevel * Updates to these fields are performed implicitly by calls to 685 0 stevel * kstat_timer_start() and kstat_timer_stop(). 686 0 stevel */ 687 0 stevel 688 0 stevel typedef struct kstat_timer { 689 0 stevel char name[KSTAT_STRLEN]; /* event name */ 690 0 stevel uchar_t resv; /* reserved */ 691 0 stevel u_longlong_t num_events; /* number of events */ 692 0 stevel hrtime_t elapsed_time; /* cumulative elapsed time */ 693 0 stevel hrtime_t min_time; /* shortest event duration */ 694 0 stevel hrtime_t max_time; /* longest event duration */ 695 0 stevel hrtime_t start_time; /* previous event start time */ 696 0 stevel hrtime_t stop_time; /* previous event stop time */ 697 0 stevel } kstat_timer_t; 698 0 stevel 699 0 stevel #define KSTAT_TIMER_PTR(kptr) ((kstat_timer_t *)(kptr)->ks_data) 700 0 stevel 701 0 stevel #if defined(_KERNEL) 702 0 stevel 703 0 stevel #include <sys/t_lock.h> 704 0 stevel 705 0 stevel extern kid_t kstat_chain_id; /* bumped at each state change */ 706 0 stevel extern void kstat_init(void); /* initialize kstat framework */ 707 0 stevel 708 0 stevel /* 709 0 stevel * Adding and deleting kstats. 710 0 stevel * 711 0 stevel * The typical sequence to add a kstat is: 712 0 stevel * 713 0 stevel * ksp = kstat_create(module, instance, name, class, type, ndata, flags); 714 0 stevel * if (ksp) { 715 0 stevel * ... provider initialization, if necessary 716 0 stevel * kstat_install(ksp); 717 0 stevel * } 718 0 stevel * 719 0 stevel * There are three logically distinct steps here: 720 0 stevel * 721 0 stevel * Step 1: System Initialization (kstat_create) 722 0 stevel * 723 0 stevel * kstat_create() performs system initialization. kstat_create() 724 0 stevel * allocates memory for the entire kstat (header plus data), initializes 725 0 stevel * all header fields, initializes the data section to all zeroes, assigns 726 0 stevel * a unique KID, and puts the kstat onto the system's kstat chain. 727 0 stevel * The returned kstat is marked invalid (KSTAT_FLAG_INVALID is set), 728 0 stevel * because the provider (caller) has not yet had a chance to initialize 729 0 stevel * the data section. 730 0 stevel * 731 0 stevel * By default, kstats are exported to all zones on the system. A kstat may be 732 0 stevel * created via kstat_create_zone() to specify a zone to which the statistics 733 0 stevel * should be exported. kstat_zone_add() may be used to specify additional 734 0 stevel * zones to which the statistics are to be exported. 735 0 stevel * 736 0 stevel * Step 2: Provider Initialization 737 0 stevel * 738 0 stevel * The provider performs any necessary initialization of the data section, 739 0 stevel * e.g. setting the name fields in a KSTAT_TYPE_NAMED. Virtual kstats set 740 0 stevel * the ks_data field at this time. The provider may also set the ks_update, 741 0 stevel * ks_snapshot, ks_private, and ks_lock fields if necessary. 742 0 stevel * 743 0 stevel * Step 3: Installation (kstat_install) 744 0 stevel * 745 0 stevel * Once the kstat is completely initialized, kstat_install() clears the 746 0 stevel * INVALID flag, thus making the kstat accessible to the outside world. 747 0 stevel * kstat_install() also clears the DORMANT flag for persistent kstats. 748 0 stevel * 749 0 stevel * Removing a kstat from the system 750 0 stevel * 751 0 stevel * kstat_delete(ksp) removes ksp from the kstat chain and frees all 752 0 stevel * associated system resources. NOTE: When you call kstat_delete(), 753 0 stevel * you must NOT be holding that kstat's ks_lock. Otherwise, you may 754 0 stevel * deadlock with a kstat reader. 755 0 stevel * 756 0 stevel * Persistent kstats 757 0 stevel * 758 0 stevel * From the provider's point of view, persistence is transparent. The only 759 0 stevel * difference between ephemeral (normal) kstats and persistent kstats 760 0 stevel * is that you pass KSTAT_FLAG_PERSISTENT to kstat_create(). Magically, 761 0 stevel * this has the effect of making your data visible even when you're 762 0 stevel * not home. Persistence is important to tools like iostat, which want 763 0 stevel * to get a meaningful picture of disk activity. Without persistence, 764 0 stevel * raw disk i/o statistics could never accumulate: they would come and 765 0 stevel * go with each open/close of the raw device. 766 0 stevel * 767 0 stevel * The magic of persistence works by slightly altering the behavior of 768 0 stevel * kstat_create() and kstat_delete(). The first call to kstat_create() 769 0 stevel * creates a new kstat, as usual. However, kstat_delete() does not 770 0 stevel * actually delete the kstat: it performs one final update of the data 771 0 stevel * (i.e., calls the ks_update routine), marks the kstat as dormant, and 772 0 stevel * sets the ks_lock, ks_update, ks_private, and ks_snapshot fields back 773 0 stevel * to their default values (since they might otherwise point to garbage, 774 0 stevel * e.g. if the provider is going away). kstat clients can still access 775 0 stevel * the dormant kstat just like a live kstat; they just continue to see 776 0 stevel * the final data values as long as the kstat remains dormant. 777 0 stevel * All subsequent kstat_create() calls simply find the already-existing, 778 0 stevel * dormant kstat and return a pointer to it, without altering any fields. 779 0 stevel * The provider then performs its usual initialization sequence, and 780 0 stevel * calls kstat_install(). kstat_install() uses the old data values to 781 0 stevel * initialize the native data (i.e., ks_update is called with KSTAT_WRITE), 782 0 stevel * thus making it seem like you were never gone. 783 0 stevel */ 784 0 stevel 785 2951 elowe extern kstat_t *kstat_create(const char *, int, const char *, const char *, 786 2951 elowe uchar_t, uint_t, uchar_t); 787 2951 elowe extern kstat_t *kstat_create_zone(const char *, int, const char *, 788 2951 elowe const char *, uchar_t, uint_t, uchar_t, zoneid_t); 789 0 stevel extern void kstat_install(kstat_t *); 790 0 stevel extern void kstat_delete(kstat_t *); 791 0 stevel extern void kstat_named_setstr(kstat_named_t *knp, const char *src); 792 2951 elowe extern void kstat_set_string(char *, const char *); 793 2951 elowe extern void kstat_delete_byname(const char *, int, const char *); 794 2951 elowe extern void kstat_delete_byname_zone(const char *, int, const char *, zoneid_t); 795 2951 elowe extern void kstat_named_init(kstat_named_t *, const char *, uchar_t); 796 2951 elowe extern void kstat_timer_init(kstat_timer_t *, const char *); 797 0 stevel extern void kstat_waitq_enter(kstat_io_t *); 798 0 stevel extern void kstat_waitq_exit(kstat_io_t *); 799 0 stevel extern void kstat_runq_enter(kstat_io_t *); 800 0 stevel extern void kstat_runq_exit(kstat_io_t *); 801 0 stevel extern void kstat_waitq_to_runq(kstat_io_t *); 802 0 stevel extern void kstat_runq_back_to_waitq(kstat_io_t *); 803 0 stevel extern void kstat_timer_start(kstat_timer_t *); 804 0 stevel extern void kstat_timer_stop(kstat_timer_t *); 805 0 stevel 806 0 stevel extern void kstat_zone_add(kstat_t *, zoneid_t); 807 0 stevel extern void kstat_zone_remove(kstat_t *, zoneid_t); 808 0 stevel extern int kstat_zone_find(kstat_t *, zoneid_t); 809 0 stevel 810 0 stevel extern kstat_t *kstat_hold_bykid(kid_t kid, zoneid_t); 811 2951 elowe extern kstat_t *kstat_hold_byname(const char *, int, const char *, zoneid_t); 812 0 stevel extern void kstat_rele(kstat_t *); 813 0 stevel 814 0 stevel #endif /* defined(_KERNEL) */ 815 0 stevel 816 0 stevel #ifdef __cplusplus 817 0 stevel } 818 0 stevel #endif 819 0 stevel 820 0 stevel #endif /* _SYS_KSTAT_H */ 821