Home | History | Annotate | Download | only in common
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <stdlib.h>
     27 #include <strings.h>
     28 #include <errno.h>
     29 #include <unistd.h>
     30 #include <limits.h>
     31 #include <assert.h>
     32 #include <ctype.h>
     33 #include <alloca.h>
     34 #include <dt_impl.h>
     35 
     36 #define	DT_MASK_LO 0x00000000FFFFFFFFULL
     37 
     38 /*
     39  * We declare this here because (1) we need it and (2) we want to avoid a
     40  * dependency on libm in libdtrace.
     41  */
     42 static long double
     43 dt_fabsl(long double x)
     44 {
     45 	if (x < 0)
     46 		return (-x);
     47 
     48 	return (x);
     49 }
     50 
     51 /*
     52  * 128-bit arithmetic functions needed to support the stddev() aggregating
     53  * action.
     54  */
     55 static int
     56 dt_gt_128(uint64_t *a, uint64_t *b)
     57 {
     58 	return (a[1] > b[1] || (a[1] == b[1] && a[0] > b[0]));
     59 }
     60 
     61 static int
     62 dt_ge_128(uint64_t *a, uint64_t *b)
     63 {
     64 	return (a[1] > b[1] || (a[1] == b[1] && a[0] >= b[0]));
     65 }
     66 
     67 static int
     68 dt_le_128(uint64_t *a, uint64_t *b)
     69 {
     70 	return (a[1] < b[1] || (a[1] == b[1] && a[0] <= b[0]));
     71 }
     72 
     73 /*
     74  * Shift the 128-bit value in a by b. If b is positive, shift left.
     75  * If b is negative, shift right.
     76  */
     77 static void
     78 dt_shift_128(uint64_t *a, int b)
     79 {
     80 	uint64_t mask;
     81 
     82 	if (b == 0)
     83 		return;
     84 
     85 	if (b < 0) {
     86 		b = -b;
     87 		if (b >= 64) {
     88 			a[0] = a[1] >> (b - 64);
     89 			a[1] = 0;
     90 		} else {
     91 			a[0] >>= b;
     92 			mask = 1LL << (64 - b);
     93 			mask -= 1;
     94 			a[0] |= ((a[1] & mask) << (64 - b));
     95 			a[1] >>= b;
     96 		}
     97 	} else {
     98 		if (b >= 64) {
     99 			a[1] = a[0] << (b - 64);
    100 			a[0] = 0;
    101 		} else {
    102 			a[1] <<= b;
    103 			mask = a[0] >> (64 - b);
    104 			a[1] |= mask;
    105 			a[0] <<= b;
    106 		}
    107 	}
    108 }
    109 
    110 static int
    111 dt_nbits_128(uint64_t *a)
    112 {
    113 	int nbits = 0;
    114 	uint64_t tmp[2];
    115 	uint64_t zero[2] = { 0, 0 };
    116 
    117 	tmp[0] = a[0];
    118 	tmp[1] = a[1];
    119 
    120 	dt_shift_128(tmp, -1);
    121 	while (dt_gt_128(tmp, zero)) {
    122 		dt_shift_128(tmp, -1);
    123 		nbits++;
    124 	}
    125 
    126 	return (nbits);
    127 }
    128 
    129 static void
    130 dt_subtract_128(uint64_t *minuend, uint64_t *subtrahend, uint64_t *difference)
    131 {
    132 	uint64_t result[2];
    133 
    134 	result[0] = minuend[0] - subtrahend[0];
    135 	result[1] = minuend[1] - subtrahend[1] -
    136 	    (minuend[0] < subtrahend[0] ? 1 : 0);
    137 
    138 	difference[0] = result[0];
    139 	difference[1] = result[1];
    140 }
    141 
    142 static void
    143 dt_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
    144 {
    145 	uint64_t result[2];
    146 
    147 	result[0] = addend1[0] + addend2[0];
    148 	result[1] = addend1[1] + addend2[1] +
    149 	    (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
    150 
    151 	sum[0] = result[0];
    152 	sum[1] = result[1];
    153 }
    154 
    155 /*
    156  * The basic idea is to break the 2 64-bit values into 4 32-bit values,
    157  * use native multiplication on those, and then re-combine into the
    158  * resulting 128-bit value.
    159  *
    160  * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
    161  *     hi1 * hi2 << 64 +
    162  *     hi1 * lo2 << 32 +
    163  *     hi2 * lo1 << 32 +
    164  *     lo1 * lo2
    165  */
    166 static void
    167 dt_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
    168 {
    169 	uint64_t hi1, hi2, lo1, lo2;
    170 	uint64_t tmp[2];
    171 
    172 	hi1 = factor1 >> 32;
    173 	hi2 = factor2 >> 32;
    174 
    175 	lo1 = factor1 & DT_MASK_LO;
    176 	lo2 = factor2 & DT_MASK_LO;
    177 
    178 	product[0] = lo1 * lo2;
    179 	product[1] = hi1 * hi2;
    180 
    181 	tmp[0] = hi1 * lo2;
    182 	tmp[1] = 0;
    183 	dt_shift_128(tmp, 32);
    184 	dt_add_128(product, tmp, product);
    185 
    186 	tmp[0] = hi2 * lo1;
    187 	tmp[1] = 0;
    188 	dt_shift_128(tmp, 32);
    189 	dt_add_128(product, tmp, product);
    190 }
    191 
    192 /*
    193  * This is long-hand division.
    194  *
    195  * We initialize subtrahend by shifting divisor left as far as possible. We
    196  * loop, comparing subtrahend to dividend:  if subtrahend is smaller, we
    197  * subtract and set the appropriate bit in the result.  We then shift
    198  * subtrahend right by one bit for the next comparison.
    199  */
    200 static void
    201 dt_divide_128(uint64_t *dividend, uint64_t divisor, uint64_t *quotient)
    202 {
    203 	uint64_t result[2] = { 0, 0 };
    204 	uint64_t remainder[2];
    205 	uint64_t subtrahend[2];
    206 	uint64_t divisor_128[2];
    207 	uint64_t mask[2] = { 1, 0 };
    208 	int log = 0;
    209 
    210 	assert(divisor != 0);
    211 
    212 	divisor_128[0] = divisor;
    213 	divisor_128[1] = 0;
    214 
    215 	remainder[0] = dividend[0];
    216 	remainder[1] = dividend[1];
    217 
    218 	subtrahend[0] = divisor;
    219 	subtrahend[1] = 0;
    220 
    221 	while (divisor > 0) {
    222 		log++;
    223 		divisor >>= 1;
    224 	}
    225 
    226 	dt_shift_128(subtrahend, 128 - log);
    227 	dt_shift_128(mask, 128 - log);
    228 
    229 	while (dt_ge_128(remainder, divisor_128)) {
    230 		if (dt_ge_128(remainder, subtrahend)) {
    231 			dt_subtract_128(remainder, subtrahend, remainder);
    232 			result[0] |= mask[0];
    233 			result[1] |= mask[1];
    234 		}
    235 
    236 		dt_shift_128(subtrahend, -1);
    237 		dt_shift_128(mask, -1);
    238 	}
    239 
    240 	quotient[0] = result[0];
    241 	quotient[1] = result[1];
    242 }
    243 
    244 /*
    245  * This is the long-hand method of calculating a square root.
    246  * The algorithm is as follows:
    247  *
    248  * 1. Group the digits by 2 from the right.
    249  * 2. Over the leftmost group, find the largest single-digit number
    250  *    whose square is less than that group.
    251  * 3. Subtract the result of the previous step (2 or 4, depending) and
    252  *    bring down the next two-digit group.
    253  * 4. For the result R we have so far, find the largest single-digit number
    254  *    x such that 2 * R * 10 * x + x^2 is less than the result from step 3.
    255  *    (Note that this is doubling R and performing a decimal left-shift by 1
    256  *    and searching for the appropriate decimal to fill the one's place.)
    257  *    The value x is the next digit in the square root.
    258  * Repeat steps 3 and 4 until the desired precision is reached.  (We're
    259  * dealing with integers, so the above is sufficient.)
    260  *
    261  * In decimal, the square root of 582,734 would be calculated as so:
    262  *
    263  *     __7__6__3
    264  *    | 58 27 34
    265  *     -49       (7^2 == 49 => 7 is the first digit in the square root)
    266  *      --
    267  *       9 27    (Subtract and bring down the next group.)
    268  * 146   8 76    (2 * 7 * 10 * 6 + 6^2 == 876 => 6 is the next digit in
    269  *      -----     the square root)
    270  *         51 34 (Subtract and bring down the next group.)
    271  * 1523    45 69 (2 * 76 * 10 * 3 + 3^2 == 4569 => 3 is the next digit in
    272  *         -----  the square root)
    273  *          5 65 (remainder)
    274  *
    275  * The above algorithm applies similarly in binary, but note that the
    276  * only possible non-zero value for x in step 4 is 1, so step 4 becomes a
    277  * simple decision: is 2 * R * 2 * 1 + 1^2 (aka R << 2 + 1) less than the
    278  * preceding difference?
    279  *
    280  * In binary, the square root of 11011011 would be calculated as so:
    281  *
    282  *     __1__1__1__0
    283  *    | 11 01 10 11
    284  *      01          (0 << 2 + 1 == 1 < 11 => this bit is 1)
    285  *      --
    286  *      10 01 10 11
    287  * 101   1 01       (1 << 2 + 1 == 101 < 1001 => next bit is 1)
    288  *      -----
    289  *       1 00 10 11
    290  * 1101    11 01    (11 << 2 + 1 == 1101 < 10010 => next bit is 1)
    291  *       -------
    292  *          1 01 11
    293  * 11101    1 11 01 (111 << 2 + 1 == 11101 > 10111 => last bit is 0)
    294  *
    295  */
    296 static uint64_t
    297 dt_sqrt_128(uint64_t *square)
    298 {
    299 	uint64_t result[2] = { 0, 0 };
    300 	uint64_t diff[2] = { 0, 0 };
    301 	uint64_t one[2] = { 1, 0 };
    302 	uint64_t next_pair[2];
    303 	uint64_t next_try[2];
    304 	uint64_t bit_pairs, pair_shift;
    305 	int i;
    306 
    307 	bit_pairs = dt_nbits_128(square) / 2;
    308 	pair_shift = bit_pairs * 2;
    309 
    310 	for (i = 0; i <= bit_pairs; i++) {
    311 		/*
    312 		 * Bring down the next pair of bits.
    313 		 */
    314 		next_pair[0] = square[0];
    315 		next_pair[1] = square[1];
    316 		dt_shift_128(next_pair, -pair_shift);
    317 		next_pair[0] &= 0x3;
    318 		next_pair[1] = 0;
    319 
    320 		dt_shift_128(diff, 2);
    321 		dt_add_128(diff, next_pair, diff);
    322 
    323 		/*
    324 		 * next_try = R << 2 + 1
    325 		 */
    326 		next_try[0] = result[0];
    327 		next_try[1] = result[1];
    328 		dt_shift_128(next_try, 2);
    329 		dt_add_128(next_try, one, next_try);
    330 
    331 		if (dt_le_128(next_try, diff)) {
    332 			dt_subtract_128(diff, next_try, diff);
    333 			dt_shift_128(result, 1);
    334 			dt_add_128(result, one, result);
    335 		} else {
    336 			dt_shift_128(result, 1);
    337 		}
    338 
    339 		pair_shift -= 2;
    340 	}
    341 
    342 	assert(result[1] == 0);
    343 
    344 	return (result[0]);
    345 }
    346 
    347 uint64_t
    348 dt_stddev(uint64_t *data, uint64_t normal)
    349 {
    350 	uint64_t avg_of_squares[2];
    351 	uint64_t square_of_avg[2];
    352 	int64_t norm_avg;
    353 	uint64_t diff[2];
    354 
    355 	/*
    356 	 * The standard approximation for standard deviation is
    357 	 * sqrt(average(x**2) - average(x)**2), i.e. the square root
    358 	 * of the average of the squares minus the square of the average.
    359 	 */
    360 	dt_divide_128(data + 2, normal, avg_of_squares);
    361 	dt_divide_128(avg_of_squares, data[0], avg_of_squares);
    362 
    363 	norm_avg = (int64_t)data[1] / (int64_t)normal / (int64_t)data[0];
    364 
    365 	if (norm_avg < 0)
    366 		norm_avg = -norm_avg;
    367 
    368 	dt_multiply_128((uint64_t)norm_avg, (uint64_t)norm_avg, square_of_avg);
    369 
    370 	dt_subtract_128(avg_of_squares, square_of_avg, diff);
    371 
    372 	return (dt_sqrt_128(diff));
    373 }
    374 
    375 static int
    376 dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
    377     dtrace_bufdesc_t *buf, size_t offs)
    378 {
    379 	dtrace_probedesc_t *pd = data->dtpda_pdesc, *npd;
    380 	dtrace_eprobedesc_t *epd = data->dtpda_edesc, *nepd;
    381 	char *p = pd->dtpd_provider, *n = pd->dtpd_name, *sub;
    382 	dtrace_flowkind_t flow = DTRACEFLOW_NONE;
    383 	const char *str = NULL;
    384 	static const char *e_str[2] = { " -> ", " => " };
    385 	static const char *r_str[2] = { " <- ", " <= " };
    386 	static const char *ent = "entry", *ret = "return";
    387 	static int entlen = 0, retlen = 0;
    388 	dtrace_epid_t next, id = epd->dtepd_epid;
    389 	int rval;
    390 
    391 	if (entlen == 0) {
    392 		assert(retlen == 0);
    393 		entlen = strlen(ent);
    394 		retlen = strlen(ret);
    395 	}
    396 
    397 	/*
    398 	 * If the name of the probe is "entry" or ends with "-entry", we
    399 	 * treat it as an entry; if it is "return" or ends with "-return",
    400 	 * we treat it as a return.  (This allows application-provided probes
    401 	 * like "method-entry" or "function-entry" to participate in flow
    402 	 * indentation -- without accidentally misinterpreting popular probe
    403 	 * names like "carpentry", "gentry" or "Coventry".)
    404 	 */
    405 	if ((sub = strstr(n, ent)) != NULL && sub[entlen] == '\0' &&
    406 	    (sub == n || sub[-1] == '-')) {
    407 		flow = DTRACEFLOW_ENTRY;
    408 		str = e_str[strcmp(p, "syscall") == 0];
    409 	} else if ((sub = strstr(n, ret)) != NULL && sub[retlen] == '\0' &&
    410 	    (sub == n || sub[-1] == '-')) {
    411 		flow = DTRACEFLOW_RETURN;
    412 		str = r_str[strcmp(p, "syscall") == 0];
    413 	}
    414 
    415 	/*
    416 	 * If we're going to indent this, we need to check the ID of our last
    417 	 * call.  If we're looking at the same probe ID but a different EPID,
    418 	 * we _don't_ want to indent.  (Yes, there are some minor holes in
    419 	 * this scheme -- it's a heuristic.)
    420 	 */
    421 	if (flow == DTRACEFLOW_ENTRY) {
    422 		if ((last != DTRACE_EPIDNONE && id != last &&
    423 		    pd->dtpd_id == dtp->dt_pdesc[last]->dtpd_id))
    424 			flow = DTRACEFLOW_NONE;
    425 	}
    426 
    427 	/*
    428 	 * If we're going to unindent this, it's more difficult to see if
    429 	 * we don't actually want to unindent it -- we need to look at the
    430 	 * _next_ EPID.
    431 	 */
    432 	if (flow == DTRACEFLOW_RETURN) {
    433 		offs += epd->dtepd_size;
    434 
    435 		do {
    436 			if (offs >= buf->dtbd_size) {
    437 				/*
    438 				 * We're at the end -- maybe.  If the oldest
    439 				 * record is non-zero, we need to wrap.
    440 				 */
    441 				if (buf->dtbd_oldest != 0) {
    442 					offs = 0;
    443 				} else {
    444 					goto out;
    445 				}
    446 			}
    447 
    448 			next = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
    449 
    450 			if (next == DTRACE_EPIDNONE)
    451 				offs += sizeof (id);
    452 		} while (next == DTRACE_EPIDNONE);
    453 
    454 		if ((rval = dt_epid_lookup(dtp, next, &nepd, &npd)) != 0)
    455 			return (rval);
    456 
    457 		if (next != id && npd->dtpd_id == pd->dtpd_id)
    458 			flow = DTRACEFLOW_NONE;
    459 	}
    460 
    461 out:
    462 	if (flow == DTRACEFLOW_ENTRY || flow == DTRACEFLOW_RETURN) {
    463 		data->dtpda_prefix = str;
    464 	} else {
    465 		data->dtpda_prefix = "| ";
    466 	}
    467 
    468 	if (flow == DTRACEFLOW_RETURN && data->dtpda_indent > 0)
    469 		data->dtpda_indent -= 2;
    470 
    471 	data->dtpda_flow = flow;
    472 
    473 	return (0);
    474 }
    475 
    476 static int
    477 dt_nullprobe()
    478 {
    479 	return (DTRACE_CONSUME_THIS);
    480 }
    481 
    482 static int
    483 dt_nullrec()
    484 {
    485 	return (DTRACE_CONSUME_NEXT);
    486 }
    487 
    488 int
    489 dt_print_quantline(dtrace_hdl_t *dtp, FILE *fp, int64_t val,
    490     uint64_t normal, long double total, char positives, char negatives)
    491 {
    492 	long double f;
    493 	uint_t depth, len = 40;
    494 
    495 	const char *ats = "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@";
    496 	const char *spaces = "                                        ";
    497 
    498 	assert(strlen(ats) == len && strlen(spaces) == len);
    499 	assert(!(total == 0 && (positives || negatives)));
    500 	assert(!(val < 0 && !negatives));
    501 	assert(!(val > 0 && !positives));
    502 	assert(!(val != 0 && total == 0));
    503 
    504 	if (!negatives) {
    505 		if (positives) {
    506 			f = (dt_fabsl((long double)val) * len) / total;
    507 			depth = (uint_t)(f + 0.5);
    508 		} else {
    509 			depth = 0;
    510 		}
    511 
    512 		return (dt_printf(dtp, fp, "|%s%s %-9lld\n", ats + len - depth,
    513 		    spaces + depth, (long long)val / normal));
    514 	}
    515 
    516 	if (!positives) {
    517 		f = (dt_fabsl((long double)val) * len) / total;
    518 		depth = (uint_t)(f + 0.5);
    519 
    520 		return (dt_printf(dtp, fp, "%s%s| %-9lld\n", spaces + depth,
    521 		    ats + len - depth, (long long)val / normal));
    522 	}
    523 
    524 	/*
    525 	 * If we're here, we have both positive and negative bucket values.
    526 	 * To express this graphically, we're going to generate both positive
    527 	 * and negative bars separated by a centerline.  These bars are half
    528 	 * the size of normal quantize()/lquantize() bars, so we divide the
    529 	 * length in half before calculating the bar length.
    530 	 */
    531 	len /= 2;
    532 	ats = &ats[len];
    533 	spaces = &spaces[len];
    534 
    535 	f = (dt_fabsl((long double)val) * len) / total;
    536 	depth = (uint_t)(f + 0.5);
    537 
    538 	if (val <= 0) {
    539 		return (dt_printf(dtp, fp, "%s%s|%*s %-9lld\n", spaces + depth,
    540 		    ats + len - depth, len, "", (long long)val / normal));
    541 	} else {
    542 		return (dt_printf(dtp, fp, "%20s|%s%s %-9lld\n", "",
    543 		    ats + len - depth, spaces + depth,
    544 		    (long long)val / normal));
    545 	}
    546 }
    547 
    548 int
    549 dt_print_quantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
    550     size_t size, uint64_t normal)
    551 {
    552 	const int64_t *data = addr;
    553 	int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;
    554 	long double total = 0;
    555 	char positives = 0, negatives = 0;
    556 
    557 	if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
    558 		return (dt_set_errno(dtp, EDT_DMISMATCH));
    559 
    560 	while (first_bin < DTRACE_QUANTIZE_NBUCKETS - 1 && data[first_bin] == 0)
    561 		first_bin++;
    562 
    563 	if (first_bin == DTRACE_QUANTIZE_NBUCKETS - 1) {
    564 		/*
    565 		 * There isn't any data.  This is possible if (and only if)
    566 		 * negative increment values have been used.  In this case,
    567 		 * we'll print the buckets around 0.
    568 		 */
    569 		first_bin = DTRACE_QUANTIZE_ZEROBUCKET - 1;
    570 		last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 1;
    571 	} else {
    572 		if (first_bin > 0)
    573 			first_bin--;
    574 
    575 		while (last_bin > 0 && data[last_bin] == 0)
    576 			last_bin--;
    577 
    578 		if (last_bin < DTRACE_QUANTIZE_NBUCKETS - 1)
    579 			last_bin++;
    580 	}
    581 
    582 	for (i = first_bin; i <= last_bin; i++) {
    583 		positives |= (data[i] > 0);
    584 		negatives |= (data[i] < 0);
    585 		total += dt_fabsl((long double)data[i]);
    586 	}
    587 
    588 	if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
    589 	    "------------- Distribution -------------", "count") < 0)
    590 		return (-1);
    591 
    592 	for (i = first_bin; i <= last_bin; i++) {
    593 		if (dt_printf(dtp, fp, "%16lld ",
    594 		    (long long)DTRACE_QUANTIZE_BUCKETVAL(i)) < 0)
    595 			return (-1);
    596 
    597 		if (dt_print_quantline(dtp, fp, data[i], normal, total,
    598 		    positives, negatives) < 0)
    599 			return (-1);
    600 	}
    601 
    602 	return (0);
    603 }
    604 
    605 int
    606 dt_print_lquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
    607     size_t size, uint64_t normal)
    608 {
    609 	const int64_t *data = addr;
    610 	int i, first_bin, last_bin, base;
    611 	uint64_t arg;
    612 	long double total = 0;
    613 	uint16_t step, levels;
    614 	char positives = 0, negatives = 0;
    615 
    616 	if (size < sizeof (uint64_t))
    617 		return (dt_set_errno(dtp, EDT_DMISMATCH));
    618 
    619 	arg = *data++;
    620 	size -= sizeof (uint64_t);
    621 
    622 	base = DTRACE_LQUANTIZE_BASE(arg);
    623 	step = DTRACE_LQUANTIZE_STEP(arg);
    624 	levels = DTRACE_LQUANTIZE_LEVELS(arg);
    625 
    626 	first_bin = 0;
    627 	last_bin = levels + 1;
    628 
    629 	if (size != sizeof (uint64_t) * (levels + 2))
    630 		return (dt_set_errno(dtp, EDT_DMISMATCH));
    631 
    632 	while (first_bin <= levels + 1 && data[first_bin] == 0)
    633 		first_bin++;
    634 
    635 	if (first_bin > levels + 1) {
    636 		first_bin = 0;
    637 		last_bin = 2;
    638 	} else {
    639 		if (first_bin > 0)
    640 			first_bin--;
    641 
    642 		while (last_bin > 0 && data[last_bin] == 0)
    643 			last_bin--;
    644 
    645 		if (last_bin < levels + 1)
    646 			last_bin++;
    647 	}
    648 
    649 	for (i = first_bin; i <= last_bin; i++) {
    650 		positives |= (data[i] > 0);
    651 		negatives |= (data[i] < 0);
    652 		total += dt_fabsl((long double)data[i]);
    653 	}
    654 
    655 	if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
    656 	    "------------- Distribution -------------", "count") < 0)
    657 		return (-1);
    658 
    659 	for (i = first_bin; i <= last_bin; i++) {
    660 		char c[32];
    661 		int err;
    662 
    663 		if (i == 0) {
    664 			(void) snprintf(c, sizeof (c), "< %d",
    665 			    base / (uint32_t)normal);
    666 			err = dt_printf(dtp, fp, "%16s ", c);
    667 		} else if (i == levels + 1) {
    668 			(void) snprintf(c, sizeof (c), ">= %d",
    669 			    base + (levels * step));
    670 			err = dt_printf(dtp, fp, "%16s ", c);
    671 		} else {
    672 			err = dt_printf(dtp, fp, "%16d ",
    673 			    base + (i - 1) * step);
    674 		}
    675 
    676 		if (err < 0 || dt_print_quantline(dtp, fp, data[i], normal,
    677 		    total, positives, negatives) < 0)
    678 			return (-1);
    679 	}
    680 
    681 	return (0);
    682 }
    683 
    684 /*ARGSUSED*/
    685 static int
    686 dt_print_average(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
    687     size_t size, uint64_t normal)
    688 {
    689 	/* LINTED - alignment */
    690 	int64_t *data = (int64_t *)addr;
    691 
    692 	return (dt_printf(dtp, fp, " %16lld", data[0] ?
    693 	    (long long)(data[1] / (int64_t)normal / data[0]) : 0));
    694 }
    695 
    696 /*ARGSUSED*/
    697 static int
    698 dt_print_stddev(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
    699     size_t size, uint64_t normal)
    700 {
    701 	/* LINTED - alignment */
    702 	uint64_t *data = (uint64_t *)addr;
    703 
    704 	return (dt_printf(dtp, fp, " %16llu", data[0] ?
    705 	    (unsigned long long) dt_stddev(data, normal) : 0));
    706 }
    707 
    708 /*ARGSUSED*/
    709 int
    710 dt_print_bytes(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
    711     size_t nbytes, int width, int quiet)
    712 {
    713 	/*
    714 	 * If the byte stream is a series of printable characters, followed by
    715 	 * a terminating byte, we print it out as a string.  Otherwise, we
    716 	 * assume that it's something else and just print the bytes.
    717 	 */
    718 	int i, j, margin = 5;
    719 	char *c = (char *)addr;
    720 
    721 	if (nbytes == 0)
    722 		return (0);
    723 
    724 	if (dtp->dt_options[DTRACEOPT_RAWBYTES] != DTRACEOPT_UNSET)
    725 		goto raw;
    726 
    727 	for (i = 0; i < nbytes; i++) {
    728 		/*
    729 		 * We define a "printable character" to be one for which
    730 		 * isprint(3C) returns non-zero, isspace(3C) returns non-zero,
    731 		 * or a character which is either backspace or the bell.
    732 		 * Backspace and the bell are regrettably special because
    733 		 * they fail the first two tests -- and yet they are entirely
    734 		 * printable.  These are the only two control characters that
    735 		 * have meaning for the terminal and for which isprint(3C) and
    736 		 * isspace(3C) return 0.
    737 		 */
    738 		if (isprint(c[i]) || isspace(c[i]) ||
    739 		    c[i] == '\b' || c[i] == '\a')
    740 			continue;
    741 
    742 		if (c[i] == '\0' && i > 0) {
    743 			/*
    744 			 * This looks like it might be a string.  Before we
    745 			 * assume that it is indeed a string, check the
    746 			 * remainder of the byte range; if it contains
    747 			 * additional non-nul characters, we'll assume that
    748 			 * it's a binary stream that just happens to look like
    749 			 * a string, and we'll print out the individual bytes.
    750 			 */
    751 			for (j = i + 1; j < nbytes; j++) {
    752 				if (c[j] != '\0')
    753 					break;
    754 			}
    755 
    756 			if (j != nbytes)
    757 				break;
    758 
    759 			if (quiet)
    760 				return (dt_printf(dtp, fp, "%s", c));
    761 			else
    762 				return (dt_printf(dtp, fp, "  %-*s", width, c));
    763 		}
    764 
    765 		break;
    766 	}
    767 
    768 	if (i == nbytes) {
    769 		/*
    770 		 * The byte range is all printable characters, but there is
    771 		 * no trailing nul byte.  We'll assume that it's a string and
    772 		 * print it as such.
    773 		 */
    774 		char *s = alloca(nbytes + 1);
    775 		bcopy(c, s, nbytes);
    776 		s[nbytes] = '\0';
    777 		return (dt_printf(dtp, fp, "  %-*s", width, s));
    778 	}
    779 
    780 raw:
    781 	if (dt_printf(dtp, fp, "\n%*s      ", margin, "") < 0)
    782 		return (-1);
    783 
    784 	for (i = 0; i < 16; i++)
    785 		if (dt_printf(dtp, fp, "  %c", "0123456789abcdef"[i]) < 0)
    786 			return (-1);
    787 
    788 	if (dt_printf(dtp, fp, "  0123456789abcdef\n") < 0)
    789 		return (-1);
    790 
    791 
    792 	for (i = 0; i < nbytes; i += 16) {
    793 		if (dt_printf(dtp, fp, "%*s%5x:", margin, "", i) < 0)
    794 			return (-1);
    795 
    796 		for (j = i; j < i + 16 && j < nbytes; j++) {
    797 			if (dt_printf(dtp, fp, " %02x", (uchar_t)c[j]) < 0)
    798 				return (-1);
    799 		}
    800 
    801 		while (j++ % 16) {
    802 			if (dt_printf(dtp, fp, "   ") < 0)
    803 				return (-1);
    804 		}
    805 
    806 		if (dt_printf(dtp, fp, "  ") < 0)
    807 			return (-1);
    808 
    809 		for (j = i; j < i + 16 && j < nbytes; j++) {
    810 			if (dt_printf(dtp, fp, "%c",
    811 			    c[j] < ' ' || c[j] > '~' ? '.' : c[j]) < 0)
    812 				return (-1);
    813 		}
    814 
    815 		if (dt_printf(dtp, fp, "\n") < 0)
    816 			return (-1);
    817 	}
    818 
    819 	return (0);
    820 }
    821 
    822 int
    823 dt_print_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
    824     caddr_t addr, int depth, int size)
    825 {
    826 	dtrace_syminfo_t dts;
    827 	GElf_Sym sym;
    828 	int i, indent;
    829 	char c[PATH_MAX * 2];
    830 	uint64_t pc;
    831 
    832 	if (dt_printf(dtp, fp, "\n") < 0)
    833 		return (-1);
    834 
    835 	if (format == NULL)
    836 		format = "%s";
    837 
    838 	if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
    839 		indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
    840 	else
    841 		indent = _dtrace_stkindent;
    842 
    843 	for (i = 0; i < depth; i++) {
    844 		switch (size) {
    845 		case sizeof (uint32_t):
    846 			/* LINTED - alignment */
    847 			pc = *((uint32_t *)addr);
    848 			break;
    849 
    850 		case sizeof (uint64_t):
    851 			/* LINTED - alignment */
    852 			pc = *((uint64_t *)addr);
    853 			break;
    854 
    855 		default:
    856 			return (dt_set_errno(dtp, EDT_BADSTACKPC));
    857 		}
    858 
    859 		if (pc == NULL)
    860 			break;
    861 
    862 		addr += size;
    863 
    864 		if (dt_printf(dtp, fp, "%*s", indent, "") < 0)
    865 			return (-1);
    866 
    867 		if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
    868 			if (pc > sym.st_value) {
    869 				(void) snprintf(c, sizeof (c), "%s`%s+0x%llx",
    870 				    dts.dts_object, dts.dts_name,
    871 				    pc - sym.st_value);
    872 			} else {
    873 				(void) snprintf(c, sizeof (c), "%s`%s",
    874 				    dts.dts_object, dts.dts_name);
    875 			}
    876 		} else {
    877 			/*
    878 			 * We'll repeat the lookup, but this time we'll specify
    879 			 * a NULL GElf_Sym -- indicating that we're only
    880 			 * interested in the containing module.
    881 			 */
    882 			if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
    883 				(void) snprintf(c, sizeof (c), "%s`0x%llx",
    884 				    dts.dts_object, pc);
    885 			} else {
    886 				(void) snprintf(c, sizeof (c), "0x%llx", pc);
    887 			}
    888 		}
    889 
    890 		if (dt_printf(dtp, fp, format, c) < 0)
    891 			return (-1);
    892 
    893 		if (dt_printf(dtp, fp, "\n") < 0)
    894 			return (-1);
    895 	}
    896 
    897 	return (0);
    898 }
    899 
    900 int
    901 dt_print_ustack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
    902     caddr_t addr, uint64_t arg)
    903 {
    904 	/* LINTED - alignment */
    905 	uint64_t *pc = (uint64_t *)addr;
    906 	uint32_t depth = DTRACE_USTACK_NFRAMES(arg);
    907 	uint32_t strsize = DTRACE_USTACK_STRSIZE(arg);
    908 	const char *strbase = addr + (depth + 1) * sizeof (uint64_t);
    909 	const char *str = strsize ? strbase : NULL;
    910 	int err = 0;
    911 
    912 	char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2];
    913 	struct ps_prochandle *P;
    914 	GElf_Sym sym;
    915 	int i, indent;
    916 	pid_t pid;
    917 
    918 	if (depth == 0)
    919 		return (0);
    920 
    921 	pid = (pid_t)*pc++;
    922 
    923 	if (dt_printf(dtp, fp, "\n") < 0)
    924 		return (-1);
    925 
    926 	if (format == NULL)
    927 		format = "%s";
    928 
    929 	if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
    930 		indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
    931 	else
    932 		indent = _dtrace_stkindent;
    933 
    934 	/*
    935 	 * Ultimately, we need to add an entry point in the library vector for
    936 	 * determining <symbol, offset> from <pid, address>.  For now, if
    937 	 * this is a vector open, we just print the raw address or string.
    938 	 */
    939 	if (dtp->dt_vector == NULL)
    940 		P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
    941 	else
    942 		P = NULL;
    943 
    944 	if (P != NULL)
    945 		dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
    946 
    947 	for (i = 0; i < depth && pc[i] != NULL; i++) {
    948 		const prmap_t *map;
    949 
    950 		if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
    951 			break;
    952 
    953 		if (P != NULL && Plookup_by_addr(P, pc[i],
    954 		    name, sizeof (name), &sym) == 0) {
    955 			(void) Pobjname(P, pc[i], objname, sizeof (objname));
    956 
    957 			if (pc[i] > sym.st_value) {
    958 				(void) snprintf(c, sizeof (c),
    959 				    "%s`%s+0x%llx", dt_basename(objname), name,
    960 				    (u_longlong_t)(pc[i] - sym.st_value));
    961 			} else {
    962 				(void) snprintf(c, sizeof (c),
    963 				    "%s`%s", dt_basename(objname), name);
    964 			}
    965 		} else if (str != NULL && str[0] != '\0' && str[0] != '@' &&
    966 		    (P != NULL && ((map = Paddr_to_map(P, pc[i])) == NULL ||
    967 		    (map->pr_mflags & MA_WRITE)))) {
    968 			/*
    969 			 * If the current string pointer in the string table
    970 			 * does not point to an empty string _and_ the program
    971 			 * counter falls in a writable region, we'll use the
    972 			 * string from the string table instead of the raw
    973 			 * address.  This last condition is necessary because
    974 			 * some (broken) ustack helpers will return a string
    975 			 * even for a program counter that they can't
    976 			 * identify.  If we have a string for a program
    977 			 * counter that falls in a segment that isn't
    978 			 * writable, we assume that we have fallen into this
    979 			 * case and we refuse to use the string.
    980 			 */
    981 			(void) snprintf(c, sizeof (c), "%s", str);
    982 		} else {
    983 			if (P != NULL && Pobjname(P, pc[i], objname,
    984 			    sizeof (objname)) != NULL) {
    985 				(void) snprintf(c, sizeof (c), "%s`0x%llx",
    986 				    dt_basename(objname), (u_longlong_t)pc[i]);
    987 			} else {
    988 				(void) snprintf(c, sizeof (c), "0x%llx",
    989 				    (u_longlong_t)pc[i]);
    990 			}
    991 		}
    992 
    993 		if ((err = dt_printf(dtp, fp, format, c)) < 0)
    994 			break;
    995 
    996 		if ((err = dt_printf(dtp, fp, "\n")) < 0)
    997 			break;
    998 
    999 		if (str != NULL && str[0] == '@') {
   1000 			/*
   1001 			 * If the first character of the string is an "at" sign,
   1002 			 * then the string is inferred to be an annotation --
   1003 			 * and it is printed out beneath the frame and offset
   1004 			 * with brackets.
   1005 			 */
   1006 			if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
   1007 				break;
   1008 
   1009 			(void) snprintf(c, sizeof (c), "  [ %s ]", &str[1]);
   1010 
   1011 			if ((err = dt_printf(dtp, fp, format, c)) < 0)
   1012 				break;
   1013 
   1014 			if ((err = dt_printf(dtp, fp, "\n")) < 0)
   1015 				break;
   1016 		}
   1017 
   1018 		if (str != NULL) {
   1019 			str += strlen(str) + 1;
   1020 			if (str - strbase >= strsize)
   1021 				str = NULL;
   1022 		}
   1023 	}
   1024 
   1025 	if (P != NULL) {
   1026 		dt_proc_unlock(dtp, P);
   1027 		dt_proc_release(dtp, P);
   1028 	}
   1029 
   1030 	return (err);
   1031 }
   1032 
   1033 static int
   1034 dt_print_usym(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, dtrace_actkind_t act)
   1035 {
   1036 	/* LINTED - alignment */
   1037 	uint64_t pid = ((uint64_t *)addr)[0];
   1038 	/* LINTED - alignment */
   1039 	uint64_t pc = ((uint64_t *)addr)[1];
   1040 	const char *format = "  %-50s";
   1041 	char *s;
   1042 	int n, len = 256;
   1043 
   1044 	if (act == DTRACEACT_USYM && dtp->dt_vector == NULL) {
   1045 		struct ps_prochandle *P;
   1046 
   1047 		if ((P = dt_proc_grab(dtp, pid,
   1048 		    PGRAB_RDONLY | PGRAB_FORCE, 0)) != NULL) {
   1049 			GElf_Sym sym;
   1050 
   1051 			dt_proc_lock(dtp, P);
   1052 
   1053 			if (Plookup_by_addr(P, pc, NULL, 0, &sym) == 0)
   1054 				pc = sym.st_value;
   1055 
   1056 			dt_proc_unlock(dtp, P);
   1057 			dt_proc_release(dtp, P);
   1058 		}
   1059 	}
   1060 
   1061 	do {
   1062 		n = len;
   1063 		s = alloca(n);
   1064 	} while ((len = dtrace_uaddr2str(dtp, pid, pc, s, n)) > n);
   1065 
   1066 	return (dt_printf(dtp, fp, format, s));
   1067 }
   1068 
   1069 int
   1070 dt_print_umod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
   1071 {
   1072 	/* LINTED - alignment */
   1073 	uint64_t pid = ((uint64_t *)addr)[0];
   1074 	/* LINTED - alignment */
   1075 	uint64_t pc = ((uint64_t *)addr)[1];
   1076 	int err = 0;
   1077 
   1078 	char objname[PATH_MAX], c[PATH_MAX * 2];
   1079 	struct ps_prochandle *P;
   1080 
   1081 	if (format == NULL)
   1082 		format = "  %-50s";
   1083 
   1084 	/*
   1085 	 * See the comment in dt_print_ustack() for the rationale for
   1086 	 * printing raw addresses in the vectored case.
   1087 	 */
   1088 	if (dtp->dt_vector == NULL)
   1089 		P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
   1090 	else
   1091 		P = NULL;
   1092 
   1093 	if (P != NULL)
   1094 		dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
   1095 
   1096 	if (P != NULL && Pobjname(P, pc, objname, sizeof (objname)) != NULL) {
   1097 		(void) snprintf(c, sizeof (c), "%s", dt_basename(objname));
   1098 	} else {
   1099 		(void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
   1100 	}
   1101 
   1102 	err = dt_printf(dtp, fp, format, c);
   1103 
   1104 	if (P != NULL) {
   1105 		dt_proc_unlock(dtp, P);
   1106 		dt_proc_release(dtp, P);
   1107 	}
   1108 
   1109 	return (err);
   1110 }
   1111 
   1112 static int
   1113 dt_print_sym(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
   1114 {
   1115 	/* LINTED - alignment */
   1116 	uint64_t pc = *((uint64_t *)addr);
   1117 	dtrace_syminfo_t dts;
   1118 	GElf_Sym sym;
   1119 	char c[PATH_MAX * 2];
   1120 
   1121 	if (format == NULL)
   1122 		format = "  %-50s";
   1123 
   1124 	if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
   1125 		(void) snprintf(c, sizeof (c), "%s`%s",
   1126 		    dts.dts_object, dts.dts_name);
   1127 	} else {
   1128 		/*
   1129 		 * We'll repeat the lookup, but this time we'll specify a
   1130 		 * NULL GElf_Sym -- indicating that we're only interested in
   1131 		 * the containing module.
   1132 		 */
   1133 		if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
   1134 			(void) snprintf(c, sizeof (c), "%s`0x%llx",
   1135 			    dts.dts_object, (u_longlong_t)pc);
   1136 		} else {
   1137 			(void) snprintf(c, sizeof (c), "0x%llx",
   1138 			    (u_longlong_t)pc);
   1139 		}
   1140 	}
   1141 
   1142 	if (dt_printf(dtp, fp, format, c) < 0)
   1143 		return (-1);
   1144 
   1145 	return (0);
   1146 }
   1147 
   1148 int
   1149 dt_print_mod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
   1150 {
   1151 	/* LINTED - alignment */
   1152 	uint64_t pc = *((uint64_t *)addr);
   1153 	dtrace_syminfo_t dts;
   1154 	char c[PATH_MAX * 2];
   1155 
   1156 	if (format == NULL)
   1157 		format = "  %-50s";
   1158 
   1159 	if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
   1160 		(void) snprintf(c, sizeof (c), "%s", dts.dts_object);
   1161 	} else {
   1162 		(void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
   1163 	}
   1164 
   1165 	if (dt_printf(dtp, fp, format, c) < 0)
   1166 		return (-1);
   1167 
   1168 	return (0);
   1169 }
   1170 
   1171 typedef struct dt_normal {
   1172 	dtrace_aggvarid_t dtnd_id;
   1173 	uint64_t dtnd_normal;
   1174 } dt_normal_t;
   1175 
   1176 static int
   1177 dt_normalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
   1178 {
   1179 	dt_normal_t *normal = arg;
   1180 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
   1181 	dtrace_aggvarid_t id = normal->dtnd_id;
   1182 
   1183 	if (agg->dtagd_nrecs == 0)
   1184 		return (DTRACE_AGGWALK_NEXT);
   1185 
   1186 	if (agg->dtagd_varid != id)
   1187 		return (DTRACE_AGGWALK_NEXT);
   1188 
   1189 	((dtrace_aggdata_t *)aggdata)->dtada_normal = normal->dtnd_normal;
   1190 	return (DTRACE_AGGWALK_NORMALIZE);
   1191 }
   1192 
   1193 static int
   1194 dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
   1195 {
   1196 	dt_normal_t normal;
   1197 	caddr_t addr;
   1198 
   1199 	/*
   1200 	 * We (should) have two records:  the aggregation ID followed by the
   1201 	 * normalization value.
   1202 	 */
   1203 	addr = base + rec->dtrd_offset;
   1204 
   1205 	if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
   1206 		return (dt_set_errno(dtp, EDT_BADNORMAL));
   1207 
   1208 	/* LINTED - alignment */
   1209 	normal.dtnd_id = *((dtrace_aggvarid_t *)addr);
   1210 	rec++;
   1211 
   1212 	if (rec->dtrd_action != DTRACEACT_LIBACT)
   1213 		return (dt_set_errno(dtp, EDT_BADNORMAL));
   1214 
   1215 	if (rec->dtrd_arg != DT_ACT_NORMALIZE)
   1216 		return (dt_set_errno(dtp, EDT_BADNORMAL));
   1217 
   1218 	addr = base + rec->dtrd_offset;
   1219 
   1220 	switch (rec->dtrd_size) {
   1221 	case sizeof (uint64_t):
   1222 		/* LINTED - alignment */
   1223 		normal.dtnd_normal = *((uint64_t *)addr);
   1224 		break;
   1225 	case sizeof (uint32_t):
   1226 		/* LINTED - alignment */
   1227 		normal.dtnd_normal = *((uint32_t *)addr);
   1228 		break;
   1229 	case sizeof (uint16_t):
   1230 		/* LINTED - alignment */
   1231 		normal.dtnd_normal = *((uint16_t *)addr);
   1232 		break;
   1233 	case sizeof (uint8_t):
   1234 		normal.dtnd_normal = *((uint8_t *)addr);
   1235 		break;
   1236 	default:
   1237 		return (dt_set_errno(dtp, EDT_BADNORMAL));
   1238 	}
   1239 
   1240 	(void) dtrace_aggregate_walk(dtp, dt_normalize_agg, &normal);
   1241 
   1242 	return (0);
   1243 }
   1244 
   1245 static int
   1246 dt_denormalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
   1247 {
   1248 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
   1249 	dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
   1250 
   1251 	if (agg->dtagd_nrecs == 0)
   1252 		return (DTRACE_AGGWALK_NEXT);
   1253 
   1254 	if (agg->dtagd_varid != id)
   1255 		return (DTRACE_AGGWALK_NEXT);
   1256 
   1257 	return (DTRACE_AGGWALK_DENORMALIZE);
   1258 }
   1259 
   1260 static int
   1261 dt_clear_agg(const dtrace_aggdata_t *aggdata, void *arg)
   1262 {
   1263 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
   1264 	dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
   1265 
   1266 	if (agg->dtagd_nrecs == 0)
   1267 		return (DTRACE_AGGWALK_NEXT);
   1268 
   1269 	if (agg->dtagd_varid != id)
   1270 		return (DTRACE_AGGWALK_NEXT);
   1271 
   1272 	return (DTRACE_AGGWALK_CLEAR);
   1273 }
   1274 
   1275 typedef struct dt_trunc {
   1276 	dtrace_aggvarid_t dttd_id;
   1277 	uint64_t dttd_remaining;
   1278 } dt_trunc_t;
   1279 
   1280 static int
   1281 dt_trunc_agg(const dtrace_aggdata_t *aggdata, void *arg)
   1282 {
   1283 	dt_trunc_t *trunc = arg;
   1284 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
   1285 	dtrace_aggvarid_t id = trunc->dttd_id;
   1286 
   1287 	if (agg->dtagd_nrecs == 0)
   1288 		return (DTRACE_AGGWALK_NEXT);
   1289 
   1290 	if (agg->dtagd_varid != id)
   1291 		return (DTRACE_AGGWALK_NEXT);
   1292 
   1293 	if (trunc->dttd_remaining == 0)
   1294 		return (DTRACE_AGGWALK_REMOVE);
   1295 
   1296 	trunc->dttd_remaining--;
   1297 	return (DTRACE_AGGWALK_NEXT);
   1298 }
   1299 
   1300 static int
   1301 dt_trunc(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
   1302 {
   1303 	dt_trunc_t trunc;
   1304 	caddr_t addr;
   1305 	int64_t remaining;
   1306 	int (*func)(dtrace_hdl_t *, dtrace_aggregate_f *, void *);
   1307 
   1308 	/*
   1309 	 * We (should) have two records:  the aggregation ID followed by the
   1310 	 * number of aggregation entries after which the aggregation is to be
   1311 	 * truncated.
   1312 	 */
   1313 	addr = base + rec->dtrd_offset;
   1314 
   1315 	if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
   1316 		return (dt_set_errno(dtp, EDT_BADTRUNC));
   1317 
   1318 	/* LINTED - alignment */
   1319 	trunc.dttd_id = *((dtrace_aggvarid_t *)addr);
   1320 	rec++;
   1321 
   1322 	if (rec->dtrd_action != DTRACEACT_LIBACT)
   1323 		return (dt_set_errno(dtp, EDT_BADTRUNC));
   1324 
   1325 	if (rec->dtrd_arg != DT_ACT_TRUNC)
   1326 		return (dt_set_errno(dtp, EDT_BADTRUNC));
   1327 
   1328 	addr = base + rec->dtrd_offset;
   1329 
   1330 	switch (rec->dtrd_size) {
   1331 	case sizeof (uint64_t):
   1332 		/* LINTED - alignment */
   1333 		remaining = *((int64_t *)addr);
   1334 		break;
   1335 	case sizeof (uint32_t):
   1336 		/* LINTED - alignment */
   1337 		remaining = *((int32_t *)addr);
   1338 		break;
   1339 	case sizeof (uint16_t):
   1340 		/* LINTED - alignment */
   1341 		remaining = *((int16_t *)addr);
   1342 		break;
   1343 	case sizeof (uint8_t):
   1344 		remaining = *((int8_t *)addr);
   1345 		break;
   1346 	default:
   1347 		return (dt_set_errno(dtp, EDT_BADNORMAL));
   1348 	}
   1349 
   1350 	if (remaining < 0) {
   1351 		func = dtrace_aggregate_walk_valsorted;
   1352 		remaining = -remaining;
   1353 	} else {
   1354 		func = dtrace_aggregate_walk_valrevsorted;
   1355 	}
   1356 
   1357 	assert(remaining >= 0);
   1358 	trunc.dttd_remaining = remaining;
   1359 
   1360 	(void) func(dtp, dt_trunc_agg, &trunc);
   1361 
   1362 	return (0);
   1363 }
   1364 
   1365 static int
   1366 dt_print_datum(dtrace_hdl_t *dtp, FILE *fp, dtrace_recdesc_t *rec,
   1367     caddr_t addr, size_t size, uint64_t normal)
   1368 {
   1369 	int err;
   1370 	dtrace_actkind_t act = rec->dtrd_action;
   1371 
   1372 	switch (act) {
   1373 	case DTRACEACT_STACK:
   1374 		return (dt_print_stack(dtp, fp, NULL, addr,
   1375 		    rec->dtrd_arg, rec->dtrd_size / rec->dtrd_arg));
   1376 
   1377 	case DTRACEACT_USTACK:
   1378 	case DTRACEACT_JSTACK:
   1379 		return (dt_print_ustack(dtp, fp, NULL, addr, rec->dtrd_arg));
   1380 
   1381 	case DTRACEACT_USYM:
   1382 	case DTRACEACT_UADDR:
   1383 		return (dt_print_usym(dtp, fp, addr, act));
   1384 
   1385 	case DTRACEACT_UMOD:
   1386 		return (dt_print_umod(dtp, fp, NULL, addr));
   1387 
   1388 	case DTRACEACT_SYM:
   1389 		return (dt_print_sym(dtp, fp, NULL, addr));
   1390 
   1391 	case DTRACEACT_MOD:
   1392 		return (dt_print_mod(dtp, fp, NULL, addr));
   1393 
   1394 	case DTRACEAGG_QUANTIZE:
   1395 		return (dt_print_quantize(dtp, fp, addr, size, normal));
   1396 
   1397 	case DTRACEAGG_LQUANTIZE:
   1398 		return (dt_print_lquantize(dtp, fp, addr, size, normal));
   1399 
   1400 	case DTRACEAGG_AVG:
   1401 		return (dt_print_average(dtp, fp, addr, size, normal));
   1402 
   1403 	case DTRACEAGG_STDDEV:
   1404 		return (dt_print_stddev(dtp, fp, addr, size, normal));
   1405 
   1406 	default:
   1407 		break;
   1408 	}
   1409 
   1410 	switch (size) {
   1411 	case sizeof (uint64_t):
   1412 		err = dt_printf(dtp, fp, " %16lld",
   1413 		    /* LINTED - alignment */
   1414 		    (long long)*((uint64_t *)addr) / normal);
   1415 		break;
   1416 	case sizeof (uint32_t):
   1417 		/* LINTED - alignment */
   1418 		err = dt_printf(dtp, fp, " %8d", *((uint32_t *)addr) /
   1419 		    (uint32_t)normal);
   1420 		break;
   1421 	case sizeof (uint16_t):
   1422 		/* LINTED - alignment */
   1423 		err = dt_printf(dtp, fp, " %5d", *((uint16_t *)addr) /
   1424 		    (uint32_t)normal);
   1425 		break;
   1426 	case sizeof (uint8_t):
   1427 		err = dt_printf(dtp, fp, " %3d", *((uint8_t *)addr) /
   1428 		    (uint32_t)normal);
   1429 		break;
   1430 	default:
   1431 		err = dt_print_bytes(dtp, fp, addr, size, 50, 0);
   1432 		break;
   1433 	}
   1434 
   1435 	return (err);
   1436 }
   1437 
   1438 int
   1439 dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
   1440 {
   1441 	int i, aggact = 0;
   1442 	dt_print_aggdata_t *pd = arg;
   1443 	const dtrace_aggdata_t *aggdata = aggsdata[0];
   1444 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
   1445 	FILE *fp = pd->dtpa_fp;
   1446 	dtrace_hdl_t *dtp = pd->dtpa_dtp;
   1447 	dtrace_recdesc_t *rec;
   1448 	dtrace_actkind_t act;
   1449 	caddr_t addr;
   1450 	size_t size;
   1451 
   1452 	/*
   1453 	 * Iterate over each record description in the key, printing the traced
   1454 	 * data, skipping the first datum (the tuple member created by the
   1455 	 * compiler).
   1456 	 */
   1457 	for (i = 1; i < agg->dtagd_nrecs; i++) {
   1458 		rec = &agg->dtagd_rec[i];
   1459 		act = rec->dtrd_action;
   1460 		addr = aggdata->dtada_data + rec->dtrd_offset;
   1461 		size = rec->dtrd_size;
   1462 
   1463 		if (DTRACEACT_ISAGG(act)) {
   1464 			aggact = i;
   1465 			break;
   1466 		}
   1467 
   1468 		if (dt_print_datum(dtp, fp, rec, addr, size, 1) < 0)
   1469 			return (-1);
   1470 
   1471 		if (dt_buffered_flush(dtp, NULL, rec, aggdata,
   1472 		    DTRACE_BUFDATA_AGGKEY) < 0)
   1473 			return (-1);
   1474 	}
   1475 
   1476 	assert(aggact != 0);
   1477 
   1478 	for (i = (naggvars == 1 ? 0 : 1); i < naggvars; i++) {
   1479 		uint64_t normal;
   1480 
   1481 		aggdata = aggsdata[i];
   1482 		agg = aggdata->dtada_desc;
   1483 		rec = &agg->dtagd_rec[aggact];
   1484 		act = rec->dtrd_action;
   1485 		addr = aggdata->dtada_data + rec->dtrd_offset;
   1486 		size = rec->dtrd_size;
   1487 
   1488 		assert(DTRACEACT_ISAGG(act));
   1489 		normal = aggdata->dtada_normal;
   1490 
   1491 		if (dt_print_datum(dtp, fp, rec, addr, size, normal) < 0)
   1492 			return (-1);
   1493 
   1494 		if (dt_buffered_flush(dtp, NULL, rec, aggdata,
   1495 		    DTRACE_BUFDATA_AGGVAL) < 0)
   1496 			return (-1);
   1497 
   1498 		if (!pd->dtpa_allunprint)
   1499 			agg->dtagd_flags |= DTRACE_AGD_PRINTED;
   1500 	}
   1501 
   1502 	if (dt_printf(dtp, fp, "\n") < 0)
   1503 		return (-1);
   1504 
   1505 	if (dt_buffered_flush(dtp, NULL, NULL, aggdata,
   1506 	    DTRACE_BUFDATA_AGGFORMAT | DTRACE_BUFDATA_AGGLAST) < 0)
   1507 		return (-1);
   1508 
   1509 	return (0);
   1510 }
   1511 
   1512 int
   1513 dt_print_agg(const dtrace_aggdata_t *aggdata, void *arg)
   1514 {
   1515 	dt_print_aggdata_t *pd = arg;
   1516 	dtrace_aggdesc_t *agg = aggdata->dtada_desc;
   1517 	dtrace_aggvarid_t aggvarid = pd->dtpa_id;
   1518 
   1519 	if (pd->dtpa_allunprint) {
   1520 		if (agg->dtagd_flags & DTRACE_AGD_PRINTED)
   1521 			return (0);
   1522 	} else {
   1523 		/*
   1524 		 * If we're not printing all unprinted aggregations, then the
   1525 		 * aggregation variable ID denotes a specific aggregation
   1526 		 * variable that we should print -- skip any other aggregations
   1527 		 * that we encounter.
   1528 		 */
   1529 		if (agg->dtagd_nrecs == 0)
   1530 			return (0);
   1531 
   1532 		if (aggvarid != agg->dtagd_varid)
   1533 			return (0);
   1534 	}
   1535 
   1536 	return (dt_print_aggs(&aggdata, 1, arg));
   1537 }
   1538 
   1539 int
   1540 dt_setopt(dtrace_hdl_t *dtp, const dtrace_probedata_t *data,
   1541     const char *option, const char *value)
   1542 {
   1543 	int len, rval;
   1544 	char *msg;
   1545 	const char *errstr;
   1546 	dtrace_setoptdata_t optdata;
   1547 
   1548 	bzero(&optdata, sizeof (optdata));
   1549 	(void) dtrace_getopt(dtp, option, &optdata.dtsda_oldval);
   1550 
   1551 	if (dtrace_setopt(dtp, option, value) == 0) {
   1552 		(void) dtrace_getopt(dtp, option, &optdata.dtsda_newval);
   1553 		optdata.dtsda_probe = data;
   1554 		optdata.dtsda_option = option;
   1555 		optdata.dtsda_handle = dtp;
   1556 
   1557 		if ((rval = dt_handle_setopt(dtp, &optdata)) != 0)
   1558 			return (rval);
   1559 
   1560 		return (0);
   1561 	}
   1562 
   1563 	errstr = dtrace_errmsg(dtp, dtrace_errno(dtp));
   1564 	len = strlen(option) + strlen(value) + strlen(errstr) + 80;
   1565 	msg = alloca(len);
   1566 
   1567 	(void) snprintf(msg, len, "couldn't set option \"%s\" to \"%s\": %s\n",
   1568 	    option, value, errstr);
   1569 
   1570 	if ((rval = dt_handle_liberr(dtp, data, msg)) == 0)
   1571 		return (0);
   1572 
   1573 	return (rval);
   1574 }
   1575 
   1576 static int
   1577 dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu, dtrace_bufdesc_t *buf,
   1578     dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc, void *arg)
   1579 {
   1580 	dtrace_epid_t id;
   1581 	size_t offs, start = buf->dtbd_oldest, end = buf->dtbd_size;
   1582 	int flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET);
   1583 	int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
   1584 	int rval, i, n;
   1585 	dtrace_epid_t last = DTRACE_EPIDNONE;
   1586 	dtrace_probedata_t data;
   1587 	uint64_t drops;
   1588 	caddr_t addr;
   1589 
   1590 	bzero(&data, sizeof (data));
   1591 	data.dtpda_handle = dtp;
   1592 	data.dtpda_cpu = cpu;
   1593 
   1594 again:
   1595 	for (offs = start; offs < end; ) {
   1596 		dtrace_eprobedesc_t *epd;
   1597 
   1598 		/*
   1599 		 * We're guaranteed to have an ID.
   1600 		 */
   1601 		id = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
   1602 
   1603 		if (id == DTRACE_EPIDNONE) {
   1604 			/*
   1605 			 * This is filler to assure proper alignment of the
   1606 			 * next record; we simply ignore it.
   1607 			 */
   1608 			offs += sizeof (id);
   1609 			continue;
   1610 		}
   1611 
   1612 		if ((rval = dt_epid_lookup(dtp, id, &data.dtpda_edesc,
   1613 		    &data.dtpda_pdesc)) != 0)
   1614 			return (rval);
   1615 
   1616 		epd = data.dtpda_edesc;
   1617 		data.dtpda_data = buf->dtbd_data + offs;
   1618 
   1619 		if (data.dtpda_edesc->dtepd_uarg != DT_ECB_DEFAULT) {
   1620 			rval = dt_handle(dtp, &data);
   1621 
   1622 			if (rval == DTRACE_CONSUME_NEXT)
   1623 				goto nextepid;
   1624 
   1625 			if (rval == DTRACE_CONSUME_ERROR)
   1626 				return (-1);
   1627 		}
   1628 
   1629 		if (flow)
   1630 			(void) dt_flowindent(dtp, &data, last, buf, offs);
   1631 
   1632 		rval = (*efunc)(&data, arg);
   1633 
   1634 		if (flow) {
   1635 			if (data.dtpda_flow == DTRACEFLOW_ENTRY)
   1636 				data.dtpda_indent += 2;
   1637 		}
   1638 
   1639 		if (rval == DTRACE_CONSUME_NEXT)
   1640 			goto nextepid;
   1641 
   1642 		if (rval == DTRACE_CONSUME_ABORT)
   1643 			return (dt_set_errno(dtp, EDT_DIRABORT));
   1644 
   1645 		if (rval != DTRACE_CONSUME_THIS)
   1646 			return (dt_set_errno(dtp, EDT_BADRVAL));
   1647 
   1648 		for (i = 0; i < epd->dtepd_nrecs; i++) {
   1649 			dtrace_recdesc_t *rec = &epd->dtepd_rec[i];
   1650 			dtrace_actkind_t act = rec->dtrd_action;
   1651 
   1652 			data.dtpda_data = buf->dtbd_data + offs +
   1653 			    rec->dtrd_offset;
   1654 			addr = data.dtpda_data;
   1655 
   1656 			if (act == DTRACEACT_LIBACT) {
   1657 				uint64_t arg = rec->dtrd_arg;
   1658 				dtrace_aggvarid_t id;
   1659 
   1660 				switch (arg) {
   1661 				case DT_ACT_CLEAR:
   1662 					/* LINTED - alignment */
   1663 					id = *((dtrace_aggvarid_t *)addr);
   1664 					(void) dtrace_aggregate_walk(dtp,
   1665 					    dt_clear_agg, &id);
   1666 					continue;
   1667 
   1668 				case DT_ACT_DENORMALIZE:
   1669 					/* LINTED - alignment */
   1670 					id = *((dtrace_aggvarid_t *)addr);
   1671 					(void) dtrace_aggregate_walk(dtp,
   1672 					    dt_denormalize_agg, &id);
   1673 					continue;
   1674 
   1675 				case DT_ACT_FTRUNCATE:
   1676 					if (fp == NULL)
   1677 						continue;
   1678 
   1679 					(void) fflush(fp);
   1680 					(void) ftruncate(fileno(fp), 0);
   1681 					(void) fseeko(fp, 0, SEEK_SET);
   1682 					continue;
   1683 
   1684 				case DT_ACT_NORMALIZE:
   1685 					if (i == epd->dtepd_nrecs - 1)
   1686 						return (dt_set_errno(dtp,
   1687 						    EDT_BADNORMAL));
   1688 
   1689 					if (dt_normalize(dtp,
   1690 					    buf->dtbd_data + offs, rec) != 0)
   1691 						return (-1);
   1692 
   1693 					i++;
   1694 					continue;
   1695 
   1696 				case DT_ACT_SETOPT: {
   1697 					uint64_t *opts = dtp->dt_options;
   1698 					dtrace_recdesc_t *valrec;
   1699 					uint32_t valsize;
   1700 					caddr_t val;
   1701 					int rv;
   1702 
   1703 					if (i == epd->dtepd_nrecs - 1) {
   1704 						return (dt_set_errno(dtp,
   1705 						    EDT_BADSETOPT));
   1706 					}
   1707 
   1708 					valrec = &epd->dtepd_rec[++i];
   1709 					valsize = valrec->dtrd_size;
   1710 
   1711 					if (valrec->dtrd_action != act ||
   1712 					    valrec->dtrd_arg != arg) {
   1713 						return (dt_set_errno(dtp,
   1714 						    EDT_BADSETOPT));
   1715 					}
   1716 
   1717 					if (valsize > sizeof (uint64_t)) {
   1718 						val = buf->dtbd_data + offs +
   1719 						    valrec->dtrd_offset;
   1720 					} else {
   1721 						val = "1";
   1722 					}
   1723 
   1724 					rv = dt_setopt(dtp, &data, addr, val);
   1725 
   1726 					if (rv != 0)
   1727 						return (-1);
   1728 
   1729 					flow = (opts[DTRACEOPT_FLOWINDENT] !=
   1730 					    DTRACEOPT_UNSET);
   1731 					quiet = (opts[DTRACEOPT_QUIET] !=
   1732 					    DTRACEOPT_UNSET);
   1733 
   1734 					continue;
   1735 				}
   1736 
   1737 				case DT_ACT_TRUNC:
   1738 					if (i == epd->dtepd_nrecs - 1)
   1739 						return (dt_set_errno(dtp,
   1740 						    EDT_BADTRUNC));
   1741 
   1742 					if (dt_trunc(dtp,
   1743 					    buf->dtbd_data + offs, rec) != 0)
   1744 						return (-1);
   1745 
   1746 					i++;
   1747 					continue;
   1748 
   1749 				default:
   1750 					continue;
   1751 				}
   1752 			}
   1753 
   1754 			rval = (*rfunc)(&data, rec, arg);
   1755 
   1756 			if (rval == DTRACE_CONSUME_NEXT)
   1757 				continue;
   1758 
   1759 			if (rval == DTRACE_CONSUME_ABORT)
   1760 				return (dt_set_errno(dtp, EDT_DIRABORT));
   1761 
   1762 			if (rval != DTRACE_CONSUME_THIS)
   1763 				return (dt_set_errno(dtp, EDT_BADRVAL));
   1764 
   1765 			if (act == DTRACEACT_STACK) {
   1766 				int depth = rec->dtrd_arg;
   1767 
   1768 				if (dt_print_stack(dtp, fp, NULL, addr, depth,
   1769 				    rec->dtrd_size / depth) < 0)
   1770 					return (-1);
   1771 				goto nextrec;
   1772 			}
   1773 
   1774 			if (act == DTRACEACT_USTACK ||
   1775 			    act == DTRACEACT_JSTACK) {
   1776 				if (dt_print_ustack(dtp, fp, NULL,
   1777 				    addr, rec->dtrd_arg) < 0)
   1778 					return (-1);
   1779 				goto nextrec;
   1780 			}
   1781 
   1782 			if (act == DTRACEACT_SYM) {
   1783 				if (dt_print_sym(dtp, fp, NULL, addr) < 0)
   1784 					return (-1);
   1785 				goto nextrec;
   1786 			}
   1787 
   1788 			if (act == DTRACEACT_MOD) {
   1789 				if (dt_print_mod(dtp, fp, NULL, addr) < 0)
   1790 					return (-1);
   1791 				goto nextrec;
   1792 			}
   1793 
   1794 			if (act == DTRACEACT_USYM || act == DTRACEACT_UADDR) {
   1795 				if (dt_print_usym(dtp, fp, addr, act) < 0)
   1796 					return (-1);
   1797 				goto nextrec;
   1798 			}
   1799 
   1800 			if (act == DTRACEACT_UMOD) {
   1801 				if (dt_print_umod(dtp, fp, NULL, addr) < 0)
   1802 					return (-1);
   1803 				goto nextrec;
   1804 			}
   1805 
   1806 			if (DTRACEACT_ISPRINTFLIKE(act)) {
   1807 				void *fmtdata;
   1808 				int (*func)(dtrace_hdl_t *, FILE *, void *,
   1809 				    const dtrace_probedata_t *,
   1810 				    const dtrace_recdesc_t *, uint_t,
   1811 				    const void *buf, size_t);
   1812 
   1813 				if ((fmtdata = dt_format_lookup(dtp,
   1814 				    rec->dtrd_format)) == NULL)
   1815 					goto nofmt;
   1816 
   1817 				switch (act) {
   1818 				case DTRACEACT_PRINTF:
   1819 					func = dtrace_fprintf;
   1820 					break;
   1821 				case DTRACEACT_PRINTA:
   1822 					func = dtrace_fprinta;
   1823 					break;
   1824 				case DTRACEACT_SYSTEM:
   1825 					func = dtrace_system;
   1826 					break;
   1827 				case DTRACEACT_FREOPEN:
   1828 					func = dtrace_freopen;
   1829 					break;
   1830 				}
   1831 
   1832 				n = (*func)(dtp, fp, fmtdata, &data,
   1833 				    rec, epd->dtepd_nrecs - i,
   1834 				    (uchar_t *)buf->dtbd_data + offs,
   1835 				    buf->dtbd_size - offs);
   1836 
   1837 				if (n < 0)
   1838 					return (-1); /* errno is set for us */
   1839 
   1840 				if (n > 0)
   1841 					i += n - 1;
   1842 				goto nextrec;
   1843 			}
   1844 
   1845 nofmt:
   1846 			if (act == DTRACEACT_PRINTA) {
   1847 				dt_print_aggdata_t pd;
   1848 				dtrace_aggvarid_t *aggvars;
   1849 				int j, naggvars = 0;
   1850 				size_t size = ((epd->dtepd_nrecs - i) *
   1851 				    sizeof (dtrace_aggvarid_t));
   1852 
   1853 				if ((aggvars = dt_alloc(dtp, size)) == NULL)
   1854 					return (-1);
   1855 
   1856 				/*
   1857 				 * This might be a printa() with multiple
   1858 				 * aggregation variables.  We need to scan
   1859 				 * forward through the records until we find
   1860 				 * a record from a different statement.
   1861 				 */
   1862 				for (j = i; j < epd->dtepd_nrecs; j++) {
   1863 					dtrace_recdesc_t *nrec;
   1864 					caddr_t naddr;
   1865 
   1866 					nrec = &epd->dtepd_rec[j];
   1867 
   1868 					if (nrec->dtrd_uarg != rec->dtrd_uarg)
   1869 						break;
   1870 
   1871 					if (nrec->dtrd_action != act) {
   1872 						return (dt_set_errno(dtp,
   1873 						    EDT_BADAGG));
   1874 					}
   1875 
   1876 					naddr = buf->dtbd_data + offs +
   1877 					    nrec->dtrd_offset;
   1878 
   1879 					aggvars[naggvars++] =
   1880 					    /* LINTED - alignment */
   1881 					    *((dtrace_aggvarid_t *)naddr);
   1882 				}
   1883 
   1884 				i = j - 1;
   1885 				bzero(&pd, sizeof (pd));
   1886 				pd.dtpa_dtp = dtp;
   1887 				pd.dtpa_fp = fp;
   1888 
   1889 				assert(naggvars >= 1);
   1890 
   1891 				if (naggvars == 1) {
   1892 					pd.dtpa_id = aggvars[0];
   1893 					dt_free(dtp, aggvars);
   1894 
   1895 					if (dt_printf(dtp, fp, "\n") < 0 ||
   1896 					    dtrace_aggregate_walk_sorted(dtp,
   1897 					    dt_print_agg, &pd) < 0)
   1898 						return (-1);
   1899 					goto nextrec;
   1900 				}
   1901 
   1902 				if (dt_printf(dtp, fp, "\n") < 0 ||
   1903 				    dtrace_aggregate_walk_joined(dtp, aggvars,
   1904 				    naggvars, dt_print_aggs, &pd) < 0) {
   1905 					dt_free(dtp, aggvars);
   1906 					return (-1);
   1907 				}
   1908 
   1909 				dt_free(dtp, aggvars);
   1910 				goto nextrec;
   1911 			}
   1912 
   1913 			switch (rec->dtrd_size) {
   1914 			case sizeof (uint64_t):
   1915 				n = dt_printf(dtp, fp,
   1916 				    quiet ? "%lld" : " %16lld",
   1917 				    /* LINTED - alignment */
   1918 				    *((unsigned long long *)addr));
   1919 				break;
   1920 			case sizeof (uint32_t):
   1921 				n = dt_printf(dtp, fp, quiet ? "%d" : " %8d",
   1922 				    /* LINTED - alignment */
   1923 				    *((uint32_t *)addr));
   1924 				break;
   1925 			case sizeof (uint16_t):
   1926 				n = dt_printf(dtp, fp, quiet ? "%d" : " %5d",
   1927 				    /* LINTED - alignment */
   1928 				    *((uint16_t *)addr));
   1929 				break;
   1930 			case sizeof (uint8_t):
   1931 				n = dt_printf(dtp, fp, quiet ? "%d" : " %3d",
   1932 				    *((uint8_t *)addr));
   1933 				break;
   1934 			default:
   1935 				n = dt_print_bytes(dtp, fp, addr,
   1936 				    rec->dtrd_size, 33, quiet);
   1937 				break;
   1938 			}
   1939 
   1940 			if (n < 0)
   1941 				return (-1); /* errno is set for us */
   1942 
   1943 nextrec:
   1944 			if (dt_buffered_flush(dtp, &data, rec, NULL, 0) < 0)
   1945 				return (-1); /* errno is set for us */
   1946 		}
   1947 
   1948 		/*
   1949 		 * Call the record callback with a NULL record to indicate
   1950 		 * that we're done processing this EPID.
   1951 		 */
   1952 		rval = (*rfunc)(&data, NULL, arg);
   1953 nextepid:
   1954 		offs += epd->dtepd_size;
   1955 		last = id;
   1956 	}
   1957 
   1958 	if (buf->dtbd_oldest != 0 && start == buf->dtbd_oldest) {
   1959 		end = buf->dtbd_oldest;
   1960 		start = 0;
   1961 		goto again;
   1962 	}
   1963 
   1964 	if ((drops = buf->dtbd_drops) == 0)
   1965 		return (0);
   1966 
   1967 	/*
   1968 	 * Explicitly zero the drops to prevent us from processing them again.
   1969 	 */
   1970 	buf->dtbd_drops = 0;
   1971 
   1972 	return (dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops));
   1973 }
   1974 
   1975 typedef struct dt_begin {
   1976 	dtrace_consume_probe_f *dtbgn_probefunc;
   1977 	dtrace_consume_rec_f *dtbgn_recfunc;
   1978 	void *dtbgn_arg;
   1979 	dtrace_handle_err_f *dtbgn_errhdlr;
   1980 	void *dtbgn_errarg;
   1981 	int dtbgn_beginonly;
   1982 } dt_begin_t;
   1983 
   1984 static int
   1985 dt_consume_begin_probe(const dtrace_probedata_t *data, void *arg)
   1986 {
   1987 	dt_begin_t *begin = (dt_begin_t *)arg;
   1988 	dtrace_probedesc_t *pd = data->dtpda_pdesc;
   1989 
   1990 	int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
   1991 	int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
   1992 
   1993 	if (begin->dtbgn_beginonly) {
   1994 		if (!(r1 && r2))
   1995 			return (DTRACE_CONSUME_NEXT);
   1996 	} else {
   1997 		if (r1 && r2)
   1998 			return (DTRACE_CONSUME_NEXT);
   1999 	}
   2000 
   2001 	/*
   2002 	 * We have a record that we're interested in.  Now call the underlying
   2003 	 * probe function...
   2004 	 */
   2005 	return (begin->dtbgn_probefunc(data, begin->dtbgn_arg));
   2006 }
   2007 
   2008 static int
   2009 dt_consume_begin_record(const dtrace_probedata_t *data,
   2010     const dtrace_recdesc_t *rec, void *arg)
   2011 {
   2012 	dt_begin_t *begin = (dt_begin_t *)arg;
   2013 
   2014 	return (begin->dtbgn_recfunc(data, rec, begin->dtbgn_arg));
   2015 }
   2016 
   2017 static int
   2018 dt_consume_begin_error(const dtrace_errdata_t *data, void *arg)
   2019 {
   2020 	dt_begin_t *begin = (dt_begin_t *)arg;
   2021 	dtrace_probedesc_t *pd = data->dteda_pdesc;
   2022 
   2023 	int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
   2024 	int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
   2025 
   2026 	if (begin->dtbgn_beginonly) {
   2027 		if (!(r1 && r2))
   2028 			return (DTRACE_HANDLE_OK);
   2029 	} else {
   2030 		if (r1 && r2)
   2031 			return (DTRACE_HANDLE_OK);
   2032 	}
   2033 
   2034 	return (begin->dtbgn_errhdlr(data, begin->dtbgn_errarg));
   2035 }
   2036 
   2037 static int
   2038 dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf,
   2039     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
   2040 {
   2041 	/*
   2042 	 * There's this idea that the BEGIN probe should be processed before
   2043 	 * everything else, and that the END probe should be processed after
   2044 	 * anything else.  In the common case, this is pretty easy to deal
   2045 	 * with.  However, a situation may arise where the BEGIN enabling and
   2046 	 * END enabling are on the same CPU, and some enabling in the middle
   2047 	 * occurred on a different CPU.  To deal with this (blech!) we need to
   2048 	 * consume the BEGIN buffer up until the end of the BEGIN probe, and
   2049 	 * then set it aside.  We will then process every other CPU, and then
   2050 	 * we'll return to the BEGIN CPU and process the rest of the data
   2051 	 * (which will inevitably include the END probe, if any).  Making this
   2052 	 * even more complicated (!) is the library's ERROR enabling.  Because
   2053 	 * this enabling is processed before we even get into the consume call
   2054 	 * back, any ERROR firing would result in the library's ERROR enabling
   2055 	 * being processed twice -- once in our first pass (for BEGIN probes),
   2056 	 * and again in our second pass (for everything but BEGIN probes).  To
   2057 	 * deal with this, we interpose on the ERROR handler to assure that we
   2058 	 * only process ERROR enablings induced by BEGIN enablings in the
   2059 	 * first pass, and that we only process ERROR enablings _not_ induced
   2060 	 * by BEGIN enablings in the second pass.
   2061 	 */
   2062 	dt_begin_t begin;
   2063 	processorid_t cpu = dtp->dt_beganon;
   2064 	dtrace_bufdesc_t nbuf;
   2065 	int rval, i;
   2066 	static int max_ncpus;
   2067 	dtrace_optval_t size;
   2068 
   2069 	dtp->dt_beganon = -1;
   2070 
   2071 	if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
   2072 		/*
   2073 		 * We really don't expect this to fail, but it is at least
   2074 		 * technically possible for this to fail with ENOENT.  In this
   2075 		 * case, we just drive on...
   2076 		 */
   2077 		if (errno == ENOENT)
   2078 			return (0);
   2079 
   2080 		return (dt_set_errno(dtp, errno));
   2081 	}
   2082 
   2083 	if (!dtp->dt_stopped || buf->dtbd_cpu != dtp->dt_endedon) {
   2084 		/*
   2085 		 * This is the simple case.  We're either not stopped, or if
   2086 		 * we are, we actually processed any END probes on another
   2087 		 * CPU.  We can simply consume this buffer and return.
   2088 		 */
   2089 		return (dt_consume_cpu(dtp, fp, cpu, buf, pf, rf, arg));
   2090 	}
   2091 
   2092 	begin.dtbgn_probefunc = pf;
   2093 	begin.dtbgn_recfunc = rf;
   2094 	begin.dtbgn_arg = arg;
   2095 	begin.dtbgn_beginonly = 1;
   2096 
   2097 	/*
   2098 	 * We need to interpose on the ERROR handler to be sure that we
   2099 	 * only process ERRORs induced by BEGIN.
   2100 	 */
   2101 	begin.dtbgn_errhdlr = dtp->dt_errhdlr;
   2102 	begin.dtbgn_errarg = dtp->dt_errarg;
   2103 	dtp->dt_errhdlr = dt_consume_begin_error;
   2104 	dtp->dt_errarg = &begin;
   2105 
   2106 	rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
   2107 	    dt_consume_begin_record, &begin);
   2108 
   2109 	dtp->dt_errhdlr = begin.dtbgn_errhdlr;
   2110 	dtp->dt_errarg = begin.dtbgn_errarg;
   2111 
   2112 	if (rval != 0)
   2113 		return (rval);
   2114 
   2115 	/*
   2116 	 * Now allocate a new buffer.  We'll use this to deal with every other
   2117 	 * CPU.
   2118 	 */
   2119 	bzero(&nbuf, sizeof (dtrace_bufdesc_t));
   2120 	(void) dtrace_getopt(dtp, "bufsize", &size);
   2121 	if ((nbuf.dtbd_data = malloc(size)) == NULL)
   2122 		return (dt_set_errno(dtp, EDT_NOMEM));
   2123 
   2124 	if (max_ncpus == 0)
   2125 		max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
   2126 
   2127 	for (i = 0; i < max_ncpus; i++) {
   2128 		nbuf.dtbd_cpu = i;
   2129 
   2130 		if (i == cpu)
   2131 			continue;
   2132 
   2133 		if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &nbuf) == -1) {
   2134 			/*
   2135 			 * If we failed with ENOENT, it may be because the
   2136 			 * CPU was unconfigured -- this is okay.  Any other
   2137 			 * error, however, is unexpected.
   2138 			 */
   2139 			if (errno == ENOENT)
   2140 				continue;
   2141 
   2142 			free(nbuf.dtbd_data);
   2143 
   2144 			return (dt_set_errno(dtp, errno));
   2145 		}
   2146 
   2147 		if ((rval = dt_consume_cpu(dtp, fp,
   2148 		    i, &nbuf, pf, rf, arg)) != 0) {
   2149 			free(nbuf.dtbd_data);
   2150 			return (rval);
   2151 		}
   2152 	}
   2153 
   2154 	free(nbuf.dtbd_data);
   2155 
   2156 	/*
   2157 	 * Okay -- we're done with the other buffers.  Now we want to
   2158 	 * reconsume the first buffer -- but this time we're looking for
   2159 	 * everything _but_ BEGIN.  And of course, in order to only consume
   2160 	 * those ERRORs _not_ associated with BEGIN, we need to reinstall our
   2161 	 * ERROR interposition function...
   2162 	 */
   2163 	begin.dtbgn_beginonly = 0;
   2164 
   2165 	assert(begin.dtbgn_errhdlr == dtp->dt_errhdlr);
   2166 	assert(begin.dtbgn_errarg == dtp->dt_errarg);
   2167 	dtp->dt_errhdlr = dt_consume_begin_error;
   2168 	dtp->dt_errarg = &begin;
   2169 
   2170 	rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
   2171 	    dt_consume_begin_record, &begin);
   2172 
   2173 	dtp->dt_errhdlr = begin.dtbgn_errhdlr;
   2174 	dtp->dt_errarg = begin.dtbgn_errarg;
   2175 
   2176 	return (rval);
   2177 }
   2178 
   2179 int
   2180 dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
   2181     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
   2182 {
   2183 	dtrace_bufdesc_t *buf = &dtp->dt_buf;
   2184 	dtrace_optval_t size;
   2185 	static int max_ncpus;
   2186 	int i, rval;
   2187 	dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_SWITCHRATE];
   2188 	hrtime_t now = gethrtime();
   2189 
   2190 	if (dtp->dt_lastswitch != 0) {
   2191 		if (now - dtp->dt_lastswitch < interval)
   2192 			return (0);
   2193 
   2194 		dtp->dt_lastswitch += interval;
   2195 	} else {
   2196 		dtp->dt_lastswitch = now;
   2197 	}
   2198 
   2199 	if (!dtp->dt_active)
   2200 		return (dt_set_errno(dtp, EINVAL));
   2201 
   2202 	if (max_ncpus == 0)
   2203 		max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
   2204 
   2205 	if (pf == NULL)
   2206 		pf = (dtrace_consume_probe_f *)dt_nullprobe;
   2207 
   2208 	if (rf == NULL)
   2209 		rf = (dtrace_consume_rec_f *)dt_nullrec;
   2210 
   2211 	if (buf->dtbd_data == NULL) {
   2212 		(void) dtrace_getopt(dtp, "bufsize", &size);
   2213 		if ((buf->dtbd_data = malloc(size)) == NULL)
   2214 			return (dt_set_errno(dtp, EDT_NOMEM));
   2215 
   2216 		buf->dtbd_size = size;
   2217 	}
   2218 
   2219 	/*
   2220 	 * If we have just begun, we want to first process the CPU that
   2221 	 * executed the BEGIN probe (if any).
   2222 	 */
   2223 	if (dtp->dt_active && dtp->dt_beganon != -1) {
   2224 		buf->dtbd_cpu = dtp->dt_beganon;
   2225 		if ((rval = dt_consume_begin(dtp, fp, buf, pf, rf, arg)) != 0)
   2226 			return (rval);
   2227 	}
   2228 
   2229 	for (i = 0; i < max_ncpus; i++) {
   2230 		buf->dtbd_cpu = i;
   2231 
   2232 		/*
   2233 		 * If we have stopped, we want to process the CPU on which the
   2234 		 * END probe was processed only _after_ we have processed
   2235 		 * everything else.
   2236 		 */
   2237 		if (dtp->dt_stopped && (i == dtp->dt_endedon))
   2238 			continue;
   2239 
   2240 		if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
   2241 			/*
   2242 			 * If we failed with ENOENT, it may be because the
   2243 			 * CPU was unconfigured -- this is okay.  Any other
   2244 			 * error, however, is unexpected.
   2245 			 */
   2246 			if (errno == ENOENT)
   2247 				continue;
   2248 
   2249 			return (dt_set_errno(dtp, errno));
   2250 		}
   2251 
   2252 		if ((rval = dt_consume_cpu(dtp, fp, i, buf, pf, rf, arg)) != 0)
   2253 			return (rval);
   2254 	}
   2255 
   2256 	if (!dtp->dt_stopped)
   2257 		return (0);
   2258 
   2259 	buf->dtbd_cpu = dtp->dt_endedon;
   2260 
   2261 	if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
   2262 		/*
   2263 		 * This _really_ shouldn't fail, but it is strictly speaking
   2264 		 * possible for this to return ENOENT if the CPU that called
   2265 		 * the END enabling somehow managed to become unconfigured.
   2266 		 * It's unclear how the user can possibly expect anything
   2267 		 * rational to happen in this case -- the state has been thrown
   2268 		 * out along with the unconfigured CPU -- so we'll just drive
   2269 		 * on...
   2270 		 */
   2271 		if (errno == ENOENT)
   2272 			return (0);
   2273 
   2274 		return (dt_set_errno(dtp, errno));
   2275 	}
   2276 
   2277 	return (dt_consume_cpu(dtp, fp, dtp->dt_endedon, buf, pf, rf, arg));
   2278 }
   2279