Home | History | Annotate | Download | only in lockstat
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 #include <stdio.h>
     29 #include <stddef.h>
     30 #include <stdlib.h>
     31 #include <stdarg.h>
     32 #include <string.h>
     33 #include <strings.h>
     34 #include <ctype.h>
     35 #include <fcntl.h>
     36 #include <unistd.h>
     37 #include <errno.h>
     38 #include <limits.h>
     39 #include <sys/types.h>
     40 #include <sys/modctl.h>
     41 #include <sys/stat.h>
     42 #include <sys/wait.h>
     43 #include <dtrace.h>
     44 #include <sys/lockstat.h>
     45 #include <alloca.h>
     46 #include <signal.h>
     47 #include <assert.h>
     48 
     49 #define	LOCKSTAT_OPTSTR	"x:bths:n:d:i:l:f:e:ckwWgCHEATID:RpPo:V"
     50 
     51 #define	LS_MAX_STACK_DEPTH	50
     52 #define	LS_MAX_EVENTS		64
     53 
     54 typedef struct lsrec {
     55 	struct lsrec	*ls_next;	/* next in hash chain */
     56 	uintptr_t	ls_lock;	/* lock address */
     57 	uintptr_t	ls_caller;	/* caller address */
     58 	uint32_t	ls_count;	/* cumulative event count */
     59 	uint32_t	ls_event;	/* type of event */
     60 	uintptr_t	ls_refcnt;	/* cumulative reference count */
     61 	uint64_t	ls_time;	/* cumulative event duration */
     62 	uint32_t	ls_hist[64];	/* log2(duration) histogram */
     63 	uintptr_t	ls_stack[LS_MAX_STACK_DEPTH];
     64 } lsrec_t;
     65 
     66 typedef struct lsdata {
     67 	struct lsrec	*lsd_next;	/* next available */
     68 	int		lsd_count;	/* number of records */
     69 } lsdata_t;
     70 
     71 /*
     72  * Definitions for the types of experiments which can be run.  They are
     73  * listed in increasing order of memory cost and processing time cost.
     74  * The numerical value of each type is the number of bytes needed per record.
     75  */
     76 #define	LS_BASIC	offsetof(lsrec_t, ls_time)
     77 #define	LS_TIME		offsetof(lsrec_t, ls_hist[0])
     78 #define	LS_HIST		offsetof(lsrec_t, ls_stack[0])
     79 #define	LS_STACK(depth)	offsetof(lsrec_t, ls_stack[depth])
     80 
     81 static void report_stats(FILE *, lsrec_t **, size_t, uint64_t, uint64_t);
     82 static void report_trace(FILE *, lsrec_t **);
     83 
     84 extern int symtab_init(void);
     85 extern char *addr_to_sym(uintptr_t, uintptr_t *, size_t *);
     86 extern uintptr_t sym_to_addr(char *name);
     87 extern size_t sym_size(char *name);
     88 extern char *strtok_r(char *, const char *, char **);
     89 
     90 #define	DEFAULT_NRECS	10000
     91 #define	DEFAULT_HZ	97
     92 #define	MAX_HZ		1000
     93 #define	MIN_AGGSIZE	(16 * 1024)
     94 #define	MAX_AGGSIZE	(32 * 1024 * 1024)
     95 
     96 static int g_stkdepth;
     97 static int g_topn = INT_MAX;
     98 static hrtime_t g_elapsed;
     99 static int g_rates = 0;
    100 static int g_pflag = 0;
    101 static int g_Pflag = 0;
    102 static int g_wflag = 0;
    103 static int g_Wflag = 0;
    104 static int g_cflag = 0;
    105 static int g_kflag = 0;
    106 static int g_gflag = 0;
    107 static int g_Vflag = 0;
    108 static int g_tracing = 0;
    109 static size_t g_recsize;
    110 static size_t g_nrecs;
    111 static int g_nrecs_used;
    112 static uchar_t g_enabled[LS_MAX_EVENTS];
    113 static hrtime_t g_min_duration[LS_MAX_EVENTS];
    114 static dtrace_hdl_t *g_dtp;
    115 static char *g_predicate;
    116 static char *g_ipredicate;
    117 static char *g_prog;
    118 static int g_proglen;
    119 static int g_dropped;
    120 
    121 typedef struct ls_event_info {
    122 	char	ev_type;
    123 	char	ev_lhdr[20];
    124 	char	ev_desc[80];
    125 	char	ev_units[10];
    126 	char	ev_name[DTRACE_NAMELEN];
    127 	char	*ev_predicate;
    128 	char	*ev_acquire;
    129 } ls_event_info_t;
    130 
    131 static ls_event_info_t g_event_info[LS_MAX_EVENTS] = {
    132 	{ 'C',	"Lock",	"Adaptive mutex spin",			"nsec",
    133 	    "lockstat:::adaptive-spin" },
    134 	{ 'C',	"Lock",	"Adaptive mutex block",			"nsec",
    135 	    "lockstat:::adaptive-block" },
    136 	{ 'C',	"Lock",	"Spin lock spin",			"nsec",
    137 	    "lockstat:::spin-spin" },
    138 	{ 'C',	"Lock",	"Thread lock spin",			"nsec",
    139 	    "lockstat:::thread-spin" },
    140 	{ 'C',	"Lock",	"R/W writer blocked by writer",		"nsec",
    141 	    "lockstat:::rw-block", "arg2 == 0 && arg3 == 1" },
    142 	{ 'C',	"Lock",	"R/W writer blocked by readers",	"nsec",
    143 	    "lockstat:::rw-block", "arg2 == 0 && arg3 == 0 && arg4" },
    144 	{ 'C',	"Lock",	"R/W reader blocked by writer",		"nsec",
    145 	    "lockstat:::rw-block", "arg2 != 0 && arg3 == 1" },
    146 	{ 'C',	"Lock",	"R/W reader blocked by write wanted",	"nsec",
    147 	    "lockstat:::rw-block", "arg2 != 0 && arg3 == 0 && arg4" },
    148 	{ 'C',	"Lock",	"Unknown event (type 8)",		"units"	},
    149 	{ 'C',	"Lock",	"Unknown event (type 9)",		"units"	},
    150 	{ 'C',	"Lock",	"Unknown event (type 10)",		"units"	},
    151 	{ 'C',	"Lock",	"Unknown event (type 11)",		"units"	},
    152 	{ 'C',	"Lock",	"Unknown event (type 12)",		"units"	},
    153 	{ 'C',	"Lock",	"Unknown event (type 13)",		"units"	},
    154 	{ 'C',	"Lock",	"Unknown event (type 14)",		"units"	},
    155 	{ 'C',	"Lock",	"Unknown event (type 15)",		"units"	},
    156 	{ 'C',	"Lock",	"Unknown event (type 16)",		"units"	},
    157 	{ 'C',	"Lock",	"Unknown event (type 17)",		"units"	},
    158 	{ 'C',	"Lock",	"Unknown event (type 18)",		"units"	},
    159 	{ 'C',	"Lock",	"Unknown event (type 19)",		"units"	},
    160 	{ 'C',	"Lock",	"Unknown event (type 20)",		"units"	},
    161 	{ 'C',	"Lock",	"Unknown event (type 21)",		"units"	},
    162 	{ 'C',	"Lock",	"Unknown event (type 22)",		"units"	},
    163 	{ 'C',	"Lock",	"Unknown event (type 23)",		"units"	},
    164 	{ 'C',	"Lock",	"Unknown event (type 24)",		"units"	},
    165 	{ 'C',	"Lock",	"Unknown event (type 25)",		"units"	},
    166 	{ 'C',	"Lock",	"Unknown event (type 26)",		"units"	},
    167 	{ 'C',	"Lock",	"Unknown event (type 27)",		"units"	},
    168 	{ 'C',	"Lock",	"Unknown event (type 28)",		"units"	},
    169 	{ 'C',	"Lock",	"Unknown event (type 29)",		"units"	},
    170 	{ 'C',	"Lock",	"Unknown event (type 30)",		"units"	},
    171 	{ 'C',	"Lock",	"Unknown event (type 31)",		"units"	},
    172 	{ 'H',	"Lock",	"Adaptive mutex hold",			"nsec",
    173 	    "lockstat:::adaptive-release", NULL,
    174 	    "lockstat:::adaptive-acquire" },
    175 	{ 'H',	"Lock",	"Spin lock hold",			"nsec",
    176 	    "lockstat:::spin-release", NULL,
    177 	    "lockstat:::spin-acquire" },
    178 	{ 'H',	"Lock",	"R/W writer hold",			"nsec",
    179 	    "lockstat:::rw-release", "arg1 == 0",
    180 	    "lockstat:::rw-acquire" },
    181 	{ 'H',	"Lock",	"R/W reader hold",			"nsec",
    182 	    "lockstat:::rw-release", "arg1 != 0",
    183 	    "lockstat:::rw-acquire" },
    184 	{ 'H',	"Lock",	"Unknown event (type 36)",		"units"	},
    185 	{ 'H',	"Lock",	"Unknown event (type 37)",		"units"	},
    186 	{ 'H',	"Lock",	"Unknown event (type 38)",		"units"	},
    187 	{ 'H',	"Lock",	"Unknown event (type 39)",		"units"	},
    188 	{ 'H',	"Lock",	"Unknown event (type 40)",		"units"	},
    189 	{ 'H',	"Lock",	"Unknown event (type 41)",		"units"	},
    190 	{ 'H',	"Lock",	"Unknown event (type 42)",		"units"	},
    191 	{ 'H',	"Lock",	"Unknown event (type 43)",		"units"	},
    192 	{ 'H',	"Lock",	"Unknown event (type 44)",		"units"	},
    193 	{ 'H',	"Lock",	"Unknown event (type 45)",		"units"	},
    194 	{ 'H',	"Lock",	"Unknown event (type 46)",		"units"	},
    195 	{ 'H',	"Lock",	"Unknown event (type 47)",		"units"	},
    196 	{ 'H',	"Lock",	"Unknown event (type 48)",		"units"	},
    197 	{ 'H',	"Lock",	"Unknown event (type 49)",		"units"	},
    198 	{ 'H',	"Lock",	"Unknown event (type 50)",		"units"	},
    199 	{ 'H',	"Lock",	"Unknown event (type 51)",		"units"	},
    200 	{ 'H',	"Lock",	"Unknown event (type 52)",		"units"	},
    201 	{ 'H',	"Lock",	"Unknown event (type 53)",		"units"	},
    202 	{ 'H',	"Lock",	"Unknown event (type 54)",		"units"	},
    203 	{ 'H',	"Lock",	"Unknown event (type 55)",		"units"	},
    204 	{ 'I',	"CPU+PIL", "Profiling interrupt",		"nsec",
    205 	    "profile:::profile-97", NULL },
    206 	{ 'I',	"Lock",	"Unknown event (type 57)",		"units"	},
    207 	{ 'I',	"Lock",	"Unknown event (type 58)",		"units"	},
    208 	{ 'I',	"Lock",	"Unknown event (type 59)",		"units"	},
    209 	{ 'E',	"Lock",	"Recursive lock entry detected",	"(N/A)",
    210 	    "lockstat:::rw-release", NULL, "lockstat:::rw-acquire" },
    211 	{ 'E',	"Lock",	"Lockstat enter failure",		"(N/A)"	},
    212 	{ 'E',	"Lock",	"Lockstat exit failure",		"nsec"	},
    213 	{ 'E',	"Lock",	"Lockstat record failure",		"(N/A)"	},
    214 };
    215 
    216 static void
    217 fail(int do_perror, const char *message, ...)
    218 {
    219 	va_list args;
    220 	int save_errno = errno;
    221 
    222 	va_start(args, message);
    223 	(void) fprintf(stderr, "lockstat: ");
    224 	(void) vfprintf(stderr, message, args);
    225 	va_end(args);
    226 	if (do_perror)
    227 		(void) fprintf(stderr, ": %s", strerror(save_errno));
    228 	(void) fprintf(stderr, "\n");
    229 	exit(2);
    230 }
    231 
    232 static void
    233 dfail(const char *message, ...)
    234 {
    235 	va_list args;
    236 
    237 	va_start(args, message);
    238 	(void) fprintf(stderr, "lockstat: ");
    239 	(void) vfprintf(stderr, message, args);
    240 	va_end(args);
    241 	(void) fprintf(stderr, ": %s\n",
    242 	    dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
    243 
    244 	exit(2);
    245 }
    246 
    247 static void
    248 show_events(char event_type, char *desc)
    249 {
    250 	int i, first = -1, last;
    251 
    252 	for (i = 0; i < LS_MAX_EVENTS; i++) {
    253 		ls_event_info_t *evp = &g_event_info[i];
    254 		if (evp->ev_type != event_type ||
    255 		    strncmp(evp->ev_desc, "Unknown event", 13) == 0)
    256 			continue;
    257 		if (first == -1)
    258 			first = i;
    259 		last = i;
    260 	}
    261 
    262 	(void) fprintf(stderr,
    263 	    "\n%s events (lockstat -%c or lockstat -e %d-%d):\n\n",
    264 	    desc, event_type, first, last);
    265 
    266 	for (i = first; i <= last; i++)
    267 		(void) fprintf(stderr,
    268 		    "%4d = %s\n", i, g_event_info[i].ev_desc);
    269 }
    270 
    271 static void
    272 usage(void)
    273 {
    274 	(void) fprintf(stderr,
    275 	    "Usage: lockstat [options] command [args]\n"
    276 	    "\nEvent selection options:\n\n"
    277 	    "  -C              watch contention events [on by default]\n"
    278 	    "  -E              watch error events [off by default]\n"
    279 	    "  -H              watch hold events [off by default]\n"
    280 	    "  -I              watch interrupt events [off by default]\n"
    281 	    "  -A              watch all lock events [equivalent to -CH]\n"
    282 	    "  -e event_list   only watch the specified events (shown below);\n"
    283 	    "                  <event_list> is a comma-separated list of\n"
    284 	    "                  events or ranges of events, e.g. 1,4-7,35\n"
    285 	    "  -i rate         interrupt rate for -I [default: %d Hz]\n"
    286 	    "\nData gathering options:\n\n"
    287 	    "  -b              basic statistics (lock, caller, event count)\n"
    288 	    "  -t              timing for all events [default]\n"
    289 	    "  -h              histograms for event times\n"
    290 	    "  -s depth        stack traces <depth> deep\n"
    291 	    "  -x opt[=val]    enable or modify DTrace options\n"
    292 	    "\nData filtering options:\n\n"
    293 	    "  -n nrecords     maximum number of data records [default: %d]\n"
    294 	    "  -l lock[,size]  only watch <lock>, which can be specified as a\n"
    295 	    "                  symbolic name or hex address; <size> defaults\n"
    296 	    "                  to the ELF symbol size if available, 1 if not\n"
    297 	    "  -f func[,size]  only watch events generated by <func>\n"
    298 	    "  -d duration     only watch events longer than <duration>\n"
    299 	    "  -T              trace (rather than sample) events\n"
    300 	    "\nData reporting options:\n\n"
    301 	    "  -c              coalesce lock data for arrays like pse_mutex[]\n"
    302 	    "  -k              coalesce PCs within functions\n"
    303 	    "  -g              show total events generated by function\n"
    304 	    "  -w              wherever: don't distinguish events by caller\n"
    305 	    "  -W              whichever: don't distinguish events by lock\n"
    306 	    "  -R              display rates rather than counts\n"
    307 	    "  -p              parsable output format (awk(1)-friendly)\n"
    308 	    "  -P              sort lock data by (count * avg_time) product\n"
    309 	    "  -D n            only display top <n> events of each type\n"
    310 	    "  -o filename     send output to <filename>\n",
    311 	    DEFAULT_HZ, DEFAULT_NRECS);
    312 
    313 	show_events('C', "Contention");
    314 	show_events('H', "Hold-time");
    315 	show_events('I', "Interrupt");
    316 	show_events('E', "Error");
    317 	(void) fprintf(stderr, "\n");
    318 
    319 	exit(1);
    320 }
    321 
    322 static int
    323 lockcmp(lsrec_t *a, lsrec_t *b)
    324 {
    325 	int i;
    326 
    327 	if (a->ls_event < b->ls_event)
    328 		return (-1);
    329 	if (a->ls_event > b->ls_event)
    330 		return (1);
    331 
    332 	for (i = g_stkdepth - 1; i >= 0; i--) {
    333 		if (a->ls_stack[i] < b->ls_stack[i])
    334 			return (-1);
    335 		if (a->ls_stack[i] > b->ls_stack[i])
    336 			return (1);
    337 	}
    338 
    339 	if (a->ls_caller < b->ls_caller)
    340 		return (-1);
    341 	if (a->ls_caller > b->ls_caller)
    342 		return (1);
    343 
    344 	if (a->ls_lock < b->ls_lock)
    345 		return (-1);
    346 	if (a->ls_lock > b->ls_lock)
    347 		return (1);
    348 
    349 	return (0);
    350 }
    351 
    352 static int
    353 countcmp(lsrec_t *a, lsrec_t *b)
    354 {
    355 	if (a->ls_event < b->ls_event)
    356 		return (-1);
    357 	if (a->ls_event > b->ls_event)
    358 		return (1);
    359 
    360 	return (b->ls_count - a->ls_count);
    361 }
    362 
    363 static int
    364 timecmp(lsrec_t *a, lsrec_t *b)
    365 {
    366 	if (a->ls_event < b->ls_event)
    367 		return (-1);
    368 	if (a->ls_event > b->ls_event)
    369 		return (1);
    370 
    371 	if (a->ls_time < b->ls_time)
    372 		return (1);
    373 	if (a->ls_time > b->ls_time)
    374 		return (-1);
    375 
    376 	return (0);
    377 }
    378 
    379 static int
    380 lockcmp_anywhere(lsrec_t *a, lsrec_t *b)
    381 {
    382 	if (a->ls_event < b->ls_event)
    383 		return (-1);
    384 	if (a->ls_event > b->ls_event)
    385 		return (1);
    386 
    387 	if (a->ls_lock < b->ls_lock)
    388 		return (-1);
    389 	if (a->ls_lock > b->ls_lock)
    390 		return (1);
    391 
    392 	return (0);
    393 }
    394 
    395 static int
    396 lock_and_count_cmp_anywhere(lsrec_t *a, lsrec_t *b)
    397 {
    398 	if (a->ls_event < b->ls_event)
    399 		return (-1);
    400 	if (a->ls_event > b->ls_event)
    401 		return (1);
    402 
    403 	if (a->ls_lock < b->ls_lock)
    404 		return (-1);
    405 	if (a->ls_lock > b->ls_lock)
    406 		return (1);
    407 
    408 	return (b->ls_count - a->ls_count);
    409 }
    410 
    411 static int
    412 sitecmp_anylock(lsrec_t *a, lsrec_t *b)
    413 {
    414 	int i;
    415 
    416 	if (a->ls_event < b->ls_event)
    417 		return (-1);
    418 	if (a->ls_event > b->ls_event)
    419 		return (1);
    420 
    421 	for (i = g_stkdepth - 1; i >= 0; i--) {
    422 		if (a->ls_stack[i] < b->ls_stack[i])
    423 			return (-1);
    424 		if (a->ls_stack[i] > b->ls_stack[i])
    425 			return (1);
    426 	}
    427 
    428 	if (a->ls_caller < b->ls_caller)
    429 		return (-1);
    430 	if (a->ls_caller > b->ls_caller)
    431 		return (1);
    432 
    433 	return (0);
    434 }
    435 
    436 static int
    437 site_and_count_cmp_anylock(lsrec_t *a, lsrec_t *b)
    438 {
    439 	int i;
    440 
    441 	if (a->ls_event < b->ls_event)
    442 		return (-1);
    443 	if (a->ls_event > b->ls_event)
    444 		return (1);
    445 
    446 	for (i = g_stkdepth - 1; i >= 0; i--) {
    447 		if (a->ls_stack[i] < b->ls_stack[i])
    448 			return (-1);
    449 		if (a->ls_stack[i] > b->ls_stack[i])
    450 			return (1);
    451 	}
    452 
    453 	if (a->ls_caller < b->ls_caller)
    454 		return (-1);
    455 	if (a->ls_caller > b->ls_caller)
    456 		return (1);
    457 
    458 	return (b->ls_count - a->ls_count);
    459 }
    460 
    461 static void
    462 mergesort(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **a, lsrec_t **b, int n)
    463 {
    464 	int m = n / 2;
    465 	int i, j;
    466 
    467 	if (m > 1)
    468 		mergesort(cmp, a, b, m);
    469 	if (n - m > 1)
    470 		mergesort(cmp, a + m, b + m, n - m);
    471 	for (i = m; i > 0; i--)
    472 		b[i - 1] = a[i - 1];
    473 	for (j = m - 1; j < n - 1; j++)
    474 		b[n + m - j - 2] = a[j + 1];
    475 	while (i < j)
    476 		*a++ = cmp(b[i], b[j]) < 0 ? b[i++] : b[j--];
    477 	*a = b[i];
    478 }
    479 
    480 static void
    481 coalesce(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **lock, int n)
    482 {
    483 	int i, j;
    484 	lsrec_t *target, *current;
    485 
    486 	target = lock[0];
    487 
    488 	for (i = 1; i < n; i++) {
    489 		current = lock[i];
    490 		if (cmp(current, target) != 0) {
    491 			target = current;
    492 			continue;
    493 		}
    494 		current->ls_event = LS_MAX_EVENTS;
    495 		target->ls_count += current->ls_count;
    496 		target->ls_refcnt += current->ls_refcnt;
    497 		if (g_recsize < LS_TIME)
    498 			continue;
    499 		target->ls_time += current->ls_time;
    500 		if (g_recsize < LS_HIST)
    501 			continue;
    502 		for (j = 0; j < 64; j++)
    503 			target->ls_hist[j] += current->ls_hist[j];
    504 	}
    505 }
    506 
    507 static void
    508 coalesce_symbol(uintptr_t *addrp)
    509 {
    510 	uintptr_t symoff;
    511 	size_t symsize;
    512 
    513 	if (addr_to_sym(*addrp, &symoff, &symsize) != NULL && symoff < symsize)
    514 		*addrp -= symoff;
    515 }
    516 
    517 static void
    518 predicate_add(char **pred, char *what, char *cmp, uintptr_t value)
    519 {
    520 	char *new;
    521 	int len, newlen;
    522 
    523 	if (what == NULL)
    524 		return;
    525 
    526 	if (*pred == NULL) {
    527 		*pred = malloc(1);
    528 		*pred[0] = '\0';
    529 	}
    530 
    531 	len = strlen(*pred);
    532 	newlen = len + strlen(what) + 32 + strlen("( && )");
    533 	new = malloc(newlen);
    534 
    535 	if (*pred[0] != '\0') {
    536 		if (cmp != NULL) {
    537 			(void) sprintf(new, "(%s) && (%s %s 0x%p)",
    538 			    *pred, what, cmp, (void *)value);
    539 		} else {
    540 			(void) sprintf(new, "(%s) && (%s)", *pred, what);
    541 		}
    542 	} else {
    543 		if (cmp != NULL) {
    544 			(void) sprintf(new, "%s %s 0x%p",
    545 			    what, cmp, (void *)value);
    546 		} else {
    547 			(void) sprintf(new, "%s", what);
    548 		}
    549 	}
    550 
    551 	free(*pred);
    552 	*pred = new;
    553 }
    554 
    555 static void
    556 predicate_destroy(char **pred)
    557 {
    558 	free(*pred);
    559 	*pred = NULL;
    560 }
    561 
    562 static void
    563 filter_add(char **filt, char *what, uintptr_t base, uintptr_t size)
    564 {
    565 	char buf[256], *c = buf, *new;
    566 	int len, newlen;
    567 
    568 	if (*filt == NULL) {
    569 		*filt = malloc(1);
    570 		*filt[0] = '\0';
    571 	}
    572 
    573 	(void) sprintf(c, "%s(%s >= 0x%p && %s < 0x%p)", *filt[0] != '\0' ?
    574 	    " || " : "", what, (void *)base, what, (void *)(base + size));
    575 
    576 	newlen = (len = strlen(*filt) + 1) + strlen(c);
    577 	new = malloc(newlen);
    578 	bcopy(*filt, new, len);
    579 	(void) strcat(new, c);
    580 	free(*filt);
    581 	*filt = new;
    582 }
    583 
    584 static void
    585 filter_destroy(char **filt)
    586 {
    587 	free(*filt);
    588 	*filt = NULL;
    589 }
    590 
    591 static void
    592 dprog_add(const char *fmt, ...)
    593 {
    594 	va_list args;
    595 	int size, offs;
    596 	char c;
    597 
    598 	va_start(args, fmt);
    599 	size = vsnprintf(&c, 1, fmt, args) + 1;
    600 
    601 	if (g_proglen == 0) {
    602 		offs = 0;
    603 	} else {
    604 		offs = g_proglen - 1;
    605 	}
    606 
    607 	g_proglen = offs + size;
    608 
    609 	if ((g_prog = realloc(g_prog, g_proglen)) == NULL)
    610 		fail(1, "failed to reallocate program text");
    611 
    612 	(void) vsnprintf(&g_prog[offs], size, fmt, args);
    613 }
    614 
    615 /*
    616  * This function may read like an open sewer, but keep in mind that programs
    617  * that generate other programs are rarely pretty.  If one has the unenviable
    618  * task of maintaining or -- worse -- extending this code, use the -V option
    619  * to examine the D program as generated by this function.
    620  */
    621 static void
    622 dprog_addevent(int event)
    623 {
    624 	ls_event_info_t *info = &g_event_info[event];
    625 	char *pred = NULL;
    626 	char stack[20];
    627 	const char *arg0, *caller;
    628 	char *arg1 = "arg1";
    629 	char buf[80];
    630 	hrtime_t dur;
    631 	int depth;
    632 
    633 	if (info->ev_name[0] == '\0')
    634 		return;
    635 
    636 	if (info->ev_type == 'I') {
    637 		/*
    638 		 * For interrupt events, arg0 (normally the lock pointer) is
    639 		 * the CPU address plus the current pil, and arg1 (normally
    640 		 * the number of nanoseconds) is the number of nanoseconds
    641 		 * late -- and it's stored in arg2.
    642 		 */
    643 		arg0 = "(uintptr_t)curthread->t_cpu + \n"
    644 		    "\t    curthread->t_cpu->cpu_profile_pil";
    645 		caller = "(uintptr_t)arg0";
    646 		arg1 = "arg2";
    647 	} else {
    648 		arg0 = "(uintptr_t)arg0";
    649 		caller = "caller";
    650 	}
    651 
    652 	if (g_recsize > LS_HIST) {
    653 		for (depth = 0; g_recsize > LS_STACK(depth); depth++)
    654 			continue;
    655 
    656 		if (g_tracing) {
    657 			(void) sprintf(stack, "\tstack(%d);\n", depth);
    658 		} else {
    659 			(void) sprintf(stack, ", stack(%d)", depth);
    660 		}
    661 	} else {
    662 		(void) sprintf(stack, "");
    663 	}
    664 
    665 	if (info->ev_acquire != NULL) {
    666 		/*
    667 		 * If this is a hold event, we need to generate an additional
    668 		 * clause for the acquire; the clause for the release will be
    669 		 * generated with the aggregating statement, below.
    670 		 */
    671 		dprog_add("%s\n", info->ev_acquire);
    672 		predicate_add(&pred, info->ev_predicate, NULL, 0);
    673 		predicate_add(&pred, g_predicate, NULL, 0);
    674 		if (pred != NULL)
    675 			dprog_add("/%s/\n", pred);
    676 
    677 		dprog_add("{\n");
    678 		(void) sprintf(buf, "self->ev%d[(uintptr_t)arg0]", event);
    679 
    680 		if (info->ev_type == 'H') {
    681 			dprog_add("\t%s = timestamp;\n", buf);
    682 		} else {
    683 			/*
    684 			 * If this isn't a hold event, it's the recursive
    685 			 * error event.  For this, we simply bump the
    686 			 * thread-local, per-lock count.
    687 			 */
    688 			dprog_add("\t%s++;\n", buf);
    689 		}
    690 
    691 		dprog_add("}\n\n");
    692 		predicate_destroy(&pred);
    693 		pred = NULL;
    694 
    695 		if (info->ev_type == 'E') {
    696 			/*
    697 			 * If this is the recursive lock error event, we need
    698 			 * to generate an additional clause to decrement the
    699 			 * thread-local, per-lock count.  This assures that we
    700 			 * only execute the aggregating clause if we have
    701 			 * recursive entry.
    702 			 */
    703 			dprog_add("%s\n", info->ev_name);
    704 			dprog_add("/%s/\n{\n\t%s--;\n}\n\n", buf, buf);
    705 		}
    706 
    707 		predicate_add(&pred, buf, NULL, 0);
    708 
    709 		if (info->ev_type == 'H') {
    710 			(void) sprintf(buf, "timestamp -\n\t    "
    711 			    "self->ev%d[(uintptr_t)arg0]", event);
    712 		}
    713 
    714 		arg1 = buf;
    715 	} else {
    716 		predicate_add(&pred, info->ev_predicate, NULL, 0);
    717 		if (info->ev_type != 'I')
    718 			predicate_add(&pred, g_predicate, NULL, 0);
    719 		else
    720 			predicate_add(&pred, g_ipredicate, NULL, 0);
    721 	}
    722 
    723 	if ((dur = g_min_duration[event]) != 0)
    724 		predicate_add(&pred, arg1, ">=", dur);
    725 
    726 	dprog_add("%s\n", info->ev_name);
    727 
    728 	if (pred != NULL)
    729 		dprog_add("/%s/\n", pred);
    730 	predicate_destroy(&pred);
    731 
    732 	dprog_add("{\n");
    733 
    734 	if (g_tracing) {
    735 		dprog_add("\ttrace(%dULL);\n", event);
    736 		dprog_add("\ttrace(%s);\n", arg0);
    737 		dprog_add("\ttrace(%s);\n", caller);
    738 		dprog_add(stack);
    739 	} else {
    740 		/*
    741 		 * The ordering here is important:  when we process the
    742 		 * aggregate, we count on the fact that @avg appears before
    743 		 * @hist in program order to assure that @avg is assigned the
    744 		 * first aggregation variable ID and @hist assigned the
    745 		 * second; see the comment in process_aggregate() for details.
    746 		 */
    747 		dprog_add("\t@avg[%dULL, %s, %s%s] = avg(%s);\n",
    748 		    event, arg0, caller, stack, arg1);
    749 
    750 		if (g_recsize >= LS_HIST) {
    751 			dprog_add("\t@hist[%dULL, %s, %s%s] = quantize"
    752 			    "(%s);\n", event, arg0, caller, stack, arg1);
    753 		}
    754 	}
    755 
    756 	if (info->ev_acquire != NULL)
    757 		dprog_add("\tself->ev%d[arg0] = 0;\n", event);
    758 
    759 	dprog_add("}\n\n");
    760 }
    761 
    762 static void
    763 dprog_compile()
    764 {
    765 	dtrace_prog_t *prog;
    766 	dtrace_proginfo_t info;
    767 
    768 	if (g_Vflag) {
    769 		(void) fprintf(stderr, "lockstat: vvvv D program vvvv\n");
    770 		(void) fputs(g_prog, stderr);
    771 		(void) fprintf(stderr, "lockstat: ^^^^ D program ^^^^\n");
    772 	}
    773 
    774 	if ((prog = dtrace_program_strcompile(g_dtp, g_prog,
    775 	    DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL)
    776 		dfail("failed to compile program");
    777 
    778 	if (dtrace_program_exec(g_dtp, prog, &info) == -1)
    779 		dfail("failed to enable probes");
    780 
    781 	if (dtrace_go(g_dtp) != 0)
    782 		dfail("couldn't start tracing");
    783 }
    784 
    785 static void
    786 status_fire(void)
    787 {}
    788 
    789 static void
    790 status_init(void)
    791 {
    792 	dtrace_optval_t val, status, agg;
    793 	struct sigaction act;
    794 	struct itimerspec ts;
    795 	struct sigevent ev;
    796 	timer_t tid;
    797 
    798 	if (dtrace_getopt(g_dtp, "statusrate", &status) == -1)
    799 		dfail("failed to get 'statusrate'");
    800 
    801 	if (dtrace_getopt(g_dtp, "aggrate", &agg) == -1)
    802 		dfail("failed to get 'statusrate'");
    803 
    804 	/*
    805 	 * We would want to awaken at a rate that is the GCD of the statusrate
    806 	 * and the aggrate -- but that seems a bit absurd.  Instead, we'll
    807 	 * simply awaken at a rate that is the more frequent of the two, which
    808 	 * assures that we're never later than the interval implied by the
    809 	 * more frequent rate.
    810 	 */
    811 	val = status < agg ? status : agg;
    812 
    813 	(void) sigemptyset(&act.sa_mask);
    814 	act.sa_flags = 0;
    815 	act.sa_handler = status_fire;
    816 	(void) sigaction(SIGUSR1, &act, NULL);
    817 
    818 	ev.sigev_notify = SIGEV_SIGNAL;
    819 	ev.sigev_signo = SIGUSR1;
    820 
    821 	if (timer_create(CLOCK_REALTIME, &ev, &tid) == -1)
    822 		dfail("cannot create CLOCK_REALTIME timer");
    823 
    824 	ts.it_value.tv_sec = val / NANOSEC;
    825 	ts.it_value.tv_nsec = val % NANOSEC;
    826 	ts.it_interval = ts.it_value;
    827 
    828 	if (timer_settime(tid, TIMER_RELTIME, &ts, NULL) == -1)
    829 		dfail("cannot set time on CLOCK_REALTIME timer");
    830 }
    831 
    832 static void
    833 status_check(void)
    834 {
    835 	if (!g_tracing && dtrace_aggregate_snap(g_dtp) != 0)
    836 		dfail("failed to snap aggregate");
    837 
    838 	if (dtrace_status(g_dtp) == -1)
    839 		dfail("dtrace_status()");
    840 }
    841 
    842 static void
    843 lsrec_fill(lsrec_t *lsrec, const dtrace_recdesc_t *rec, int nrecs, caddr_t data)
    844 {
    845 	bzero(lsrec, g_recsize);
    846 	lsrec->ls_count = 1;
    847 
    848 	if ((g_recsize > LS_HIST && nrecs < 4) || (nrecs < 3))
    849 		fail(0, "truncated DTrace record");
    850 
    851 	if (rec->dtrd_size != sizeof (uint64_t))
    852 		fail(0, "bad event size in first record");
    853 
    854 	/* LINTED - alignment */
    855 	lsrec->ls_event = (uint32_t)*((uint64_t *)(data + rec->dtrd_offset));
    856 	rec++;
    857 
    858 	if (rec->dtrd_size != sizeof (uintptr_t))
    859 		fail(0, "bad lock address size in second record");
    860 
    861 	/* LINTED - alignment */
    862 	lsrec->ls_lock = *((uintptr_t *)(data + rec->dtrd_offset));
    863 	rec++;
    864 
    865 	if (rec->dtrd_size != sizeof (uintptr_t))
    866 		fail(0, "bad caller size in third record");
    867 
    868 	/* LINTED - alignment */
    869 	lsrec->ls_caller = *((uintptr_t *)(data + rec->dtrd_offset));
    870 	rec++;
    871 
    872 	if (g_recsize > LS_HIST) {
    873 		int frames, i;
    874 		pc_t *stack;
    875 
    876 		frames = rec->dtrd_size / sizeof (pc_t);
    877 		/* LINTED - alignment */
    878 		stack = (pc_t *)(data + rec->dtrd_offset);
    879 
    880 		for (i = 1; i < frames; i++)
    881 			lsrec->ls_stack[i - 1] = stack[i];
    882 	}
    883 }
    884 
    885 /*ARGSUSED*/
    886 static int
    887 count_aggregate(const dtrace_aggdata_t *agg, void *arg)
    888 {
    889 	*((size_t *)arg) += 1;
    890 
    891 	return (DTRACE_AGGWALK_NEXT);
    892 }
    893 
    894 static int
    895 process_aggregate(const dtrace_aggdata_t *agg, void *arg)
    896 {
    897 	const dtrace_aggdesc_t *aggdesc = agg->dtada_desc;
    898 	caddr_t data = agg->dtada_data;
    899 	lsdata_t *lsdata = arg;
    900 	lsrec_t *lsrec = lsdata->lsd_next;
    901 	const dtrace_recdesc_t *rec;
    902 	uint64_t *avg, *quantized;
    903 	int i, j;
    904 
    905 	assert(lsdata->lsd_count < g_nrecs);
    906 
    907 	/*
    908 	 * Aggregation variable IDs are guaranteed to be generated in program
    909 	 * order, and they are guaranteed to start from DTRACE_AGGVARIDNONE
    910 	 * plus one.  As "avg" appears before "hist" in program order, we know
    911 	 * that "avg" will be allocated the first aggregation variable ID, and
    912 	 * "hist" will be allocated the second aggregation variable ID -- and
    913 	 * we therefore use the aggregation variable ID to differentiate the
    914 	 * cases.
    915 	 */
    916 	if (aggdesc->dtagd_varid > DTRACE_AGGVARIDNONE + 1) {
    917 		/*
    918 		 * If this is the histogram entry.  We'll copy the quantized
    919 		 * data into lc_hist, and jump over the rest.
    920 		 */
    921 		rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
    922 
    923 		if (aggdesc->dtagd_varid != DTRACE_AGGVARIDNONE + 2)
    924 			fail(0, "bad variable ID in aggregation record");
    925 
    926 		if (rec->dtrd_size !=
    927 		    DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
    928 			fail(0, "bad quantize size in aggregation record");
    929 
    930 		/* LINTED - alignment */
    931 		quantized = (uint64_t *)(data + rec->dtrd_offset);
    932 
    933 		for (i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0;
    934 		    i < DTRACE_QUANTIZE_NBUCKETS; i++, j++)
    935 			lsrec->ls_hist[j] = quantized[i];
    936 
    937 		goto out;
    938 	}
    939 
    940 	lsrec_fill(lsrec, &aggdesc->dtagd_rec[1],
    941 	    aggdesc->dtagd_nrecs - 1, data);
    942 
    943 	rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
    944 
    945 	if (rec->dtrd_size != 2 * sizeof (uint64_t))
    946 		fail(0, "bad avg size in aggregation record");
    947 
    948 	/* LINTED - alignment */
    949 	avg = (uint64_t *)(data + rec->dtrd_offset);
    950 	lsrec->ls_count = (uint32_t)avg[0];
    951 	lsrec->ls_time = (uintptr_t)avg[1];
    952 
    953 	if (g_recsize >= LS_HIST)
    954 		return (DTRACE_AGGWALK_NEXT);
    955 
    956 out:
    957 	lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize);
    958 	lsdata->lsd_count++;
    959 
    960 	return (DTRACE_AGGWALK_NEXT);
    961 }
    962 
    963 static int
    964 process_trace(const dtrace_probedata_t *pdata, void *arg)
    965 {
    966 	lsdata_t *lsdata = arg;
    967 	lsrec_t *lsrec = lsdata->lsd_next;
    968 	dtrace_eprobedesc_t *edesc = pdata->dtpda_edesc;
    969 	caddr_t data = pdata->dtpda_data;
    970 
    971 	if (lsdata->lsd_count >= g_nrecs)
    972 		return (DTRACE_CONSUME_NEXT);
    973 
    974 	lsrec_fill(lsrec, edesc->dtepd_rec, edesc->dtepd_nrecs, data);
    975 
    976 	lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize);
    977 	lsdata->lsd_count++;
    978 
    979 	return (DTRACE_CONSUME_NEXT);
    980 }
    981 
    982 static int
    983 process_data(FILE *out, char *data)
    984 {
    985 	lsdata_t lsdata;
    986 
    987 	/* LINTED - alignment */
    988 	lsdata.lsd_next = (lsrec_t *)data;
    989 	lsdata.lsd_count = 0;
    990 
    991 	if (g_tracing) {
    992 		if (dtrace_consume(g_dtp, out,
    993 		    process_trace, NULL, &lsdata) != 0)
    994 			dfail("failed to consume buffer");
    995 
    996 		return (lsdata.lsd_count);
    997 	}
    998 
    999 	if (dtrace_aggregate_walk_keyvarsorted(g_dtp,
   1000 	    process_aggregate, &lsdata) != 0)
   1001 		dfail("failed to walk aggregate");
   1002 
   1003 	return (lsdata.lsd_count);
   1004 }
   1005 
   1006 /*ARGSUSED*/
   1007 static int
   1008 drophandler(const dtrace_dropdata_t *data, void *arg)
   1009 {
   1010 	g_dropped++;
   1011 	(void) fprintf(stderr, "lockstat: warning: %s", data->dtdda_msg);
   1012 	return (DTRACE_HANDLE_OK);
   1013 }
   1014 
   1015 int
   1016 main(int argc, char **argv)
   1017 {
   1018 	char *data_buf;
   1019 	lsrec_t *lsp, **current, **first, **sort_buf, **merge_buf;
   1020 	FILE *out = stdout;
   1021 	char c;
   1022 	pid_t child;
   1023 	int status;
   1024 	int i, j;
   1025 	hrtime_t duration;
   1026 	char *addrp, *offp, *sizep, *evp, *lastp, *p;
   1027 	uintptr_t addr;
   1028 	size_t size, off;
   1029 	int events_specified = 0;
   1030 	int exec_errno = 0;
   1031 	uint32_t event;
   1032 	char *filt = NULL, *ifilt = NULL;
   1033 	static uint64_t ev_count[LS_MAX_EVENTS + 1];
   1034 	static uint64_t ev_time[LS_MAX_EVENTS + 1];
   1035 	dtrace_optval_t aggsize;
   1036 	char aggstr[10];
   1037 	long ncpus;
   1038 	int dynvar = 0;
   1039 	int err;
   1040 
   1041 	if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
   1042 		fail(0, "cannot open dtrace library: %s",
   1043 		    dtrace_errmsg(NULL, err));
   1044 	}
   1045 
   1046 	if (dtrace_handle_drop(g_dtp, &drophandler, NULL) == -1)
   1047 		dfail("couldn't establish drop handler");
   1048 
   1049 	if (symtab_init() == -1)
   1050 		fail(1, "can't load kernel symbols");
   1051 
   1052 	g_nrecs = DEFAULT_NRECS;
   1053 
   1054 	while ((c = getopt(argc, argv, LOCKSTAT_OPTSTR)) != EOF) {
   1055 		switch (c) {
   1056 		case 'b':
   1057 			g_recsize = LS_BASIC;
   1058 			break;
   1059 
   1060 		case 't':
   1061 			g_recsize = LS_TIME;
   1062 			break;
   1063 
   1064 		case 'h':
   1065 			g_recsize = LS_HIST;
   1066 			break;
   1067 
   1068 		case 's':
   1069 			if (!isdigit(optarg[0]))
   1070 				usage();
   1071 			g_stkdepth = atoi(optarg);
   1072 			if (g_stkdepth > LS_MAX_STACK_DEPTH)
   1073 				fail(0, "max stack depth is %d",
   1074 				    LS_MAX_STACK_DEPTH);
   1075 			g_recsize = LS_STACK(g_stkdepth);
   1076 			break;
   1077 
   1078 		case 'n':
   1079 			if (!isdigit(optarg[0]))
   1080 				usage();
   1081 			g_nrecs = atoi(optarg);
   1082 			break;
   1083 
   1084 		case 'd':
   1085 			if (!isdigit(optarg[0]))
   1086 				usage();
   1087 			duration = atoll(optarg);
   1088 
   1089 			/*
   1090 			 * XXX -- durations really should be per event
   1091 			 * since the units are different, but it's hard
   1092 			 * to express this nicely in the interface.
   1093 			 * Not clear yet what the cleanest solution is.
   1094 			 */
   1095 			for (i = 0; i < LS_MAX_EVENTS; i++)
   1096 				if (g_event_info[i].ev_type != 'E')
   1097 					g_min_duration[i] = duration;
   1098 
   1099 			break;
   1100 
   1101 		case 'i':
   1102 			if (!isdigit(optarg[0]))
   1103 				usage();
   1104 			i = atoi(optarg);
   1105 			if (i <= 0)
   1106 				usage();
   1107 			if (i > MAX_HZ)
   1108 				fail(0, "max interrupt rate is %d Hz", MAX_HZ);
   1109 
   1110 			for (j = 0; j < LS_MAX_EVENTS; j++)
   1111 				if (strcmp(g_event_info[j].ev_desc,
   1112 				    "Profiling interrupt") == 0)
   1113 					break;
   1114 
   1115 			(void) sprintf(g_event_info[j].ev_name,
   1116 			    "profile:::profile-%d", i);
   1117 			break;
   1118 
   1119 		case 'l':
   1120 		case 'f':
   1121 			addrp = strtok(optarg, ",");
   1122 			sizep = strtok(NULL, ",");
   1123 			addrp = strtok(optarg, ",+");
   1124 			offp = strtok(NULL, ",");
   1125 
   1126 			size = sizep ? strtoul(sizep, NULL, 0) : 1;
   1127 			off = offp ? strtoul(offp, NULL, 0) : 0;
   1128 
   1129 			if (addrp[0] == '0') {
   1130 				addr = strtoul(addrp, NULL, 16) + off;
   1131 			} else {
   1132 				addr = sym_to_addr(addrp) + off;
   1133 				if (sizep == NULL)
   1134 					size = sym_size(addrp) - off;
   1135 				if (addr - off == 0)
   1136 					fail(0, "symbol '%s' not found", addrp);
   1137 				if (size == 0)
   1138 					size = 1;
   1139 			}
   1140 
   1141 
   1142 			if (c == 'l') {
   1143 				filter_add(&filt, "arg0", addr, size);
   1144 			} else {
   1145 				filter_add(&filt, "caller", addr, size);
   1146 				filter_add(&ifilt, "arg0", addr, size);
   1147 			}
   1148 			break;
   1149 
   1150 		case 'e':
   1151 			evp = strtok_r(optarg, ",", &lastp);
   1152 			while (evp) {
   1153 				int ev1, ev2;
   1154 				char *evp2;
   1155 
   1156 				(void) strtok(evp, "-");
   1157 				evp2 = strtok(NULL, "-");
   1158 				ev1 = atoi(evp);
   1159 				ev2 = evp2 ? atoi(evp2) : ev1;
   1160 				if ((uint_t)ev1 >= LS_MAX_EVENTS ||
   1161 				    (uint_t)ev2 >= LS_MAX_EVENTS || ev1 > ev2)
   1162 					fail(0, "-e events out of range");
   1163 				for (i = ev1; i <= ev2; i++)
   1164 					g_enabled[i] = 1;
   1165 				evp = strtok_r(NULL, ",", &lastp);
   1166 			}
   1167 			events_specified = 1;
   1168 			break;
   1169 
   1170 		case 'c':
   1171 			g_cflag = 1;
   1172 			break;
   1173 
   1174 		case 'k':
   1175 			g_kflag = 1;
   1176 			break;
   1177 
   1178 		case 'w':
   1179 			g_wflag = 1;
   1180 			break;
   1181 
   1182 		case 'W':
   1183 			g_Wflag = 1;
   1184 			break;
   1185 
   1186 		case 'g':
   1187 			g_gflag = 1;
   1188 			break;
   1189 
   1190 		case 'C':
   1191 		case 'E':
   1192 		case 'H':
   1193 		case 'I':
   1194 			for (i = 0; i < LS_MAX_EVENTS; i++)
   1195 				if (g_event_info[i].ev_type == c)
   1196 					g_enabled[i] = 1;
   1197 			events_specified = 1;
   1198 			break;
   1199 
   1200 		case 'A':
   1201 			for (i = 0; i < LS_MAX_EVENTS; i++)
   1202 				if (strchr("CH", g_event_info[i].ev_type))
   1203 					g_enabled[i] = 1;
   1204 			events_specified = 1;
   1205 			break;
   1206 
   1207 		case 'T':
   1208 			g_tracing = 1;
   1209 			break;
   1210 
   1211 		case 'D':
   1212 			if (!isdigit(optarg[0]))
   1213 				usage();
   1214 			g_topn = atoi(optarg);
   1215 			break;
   1216 
   1217 		case 'R':
   1218 			g_rates = 1;
   1219 			break;
   1220 
   1221 		case 'p':
   1222 			g_pflag = 1;
   1223 			break;
   1224 
   1225 		case 'P':
   1226 			g_Pflag = 1;
   1227 			break;
   1228 
   1229 		case 'o':
   1230 			if ((out = fopen(optarg, "w")) == NULL)
   1231 				fail(1, "error opening file");
   1232 			break;
   1233 
   1234 		case 'V':
   1235 			g_Vflag = 1;
   1236 			break;
   1237 
   1238 		default:
   1239 			if (strchr(LOCKSTAT_OPTSTR, c) == NULL)
   1240 				usage();
   1241 		}
   1242 	}
   1243 
   1244 	if (filt != NULL) {
   1245 		predicate_add(&g_predicate, filt, NULL, 0);
   1246 		filter_destroy(&filt);
   1247 	}
   1248 
   1249 	if (ifilt != NULL) {
   1250 		predicate_add(&g_ipredicate, ifilt, NULL, 0);
   1251 		filter_destroy(&ifilt);
   1252 	}
   1253 
   1254 	if (g_recsize == 0) {
   1255 		if (g_gflag) {
   1256 			g_stkdepth = LS_MAX_STACK_DEPTH;
   1257 			g_recsize = LS_STACK(g_stkdepth);
   1258 		} else {
   1259 			g_recsize = LS_TIME;
   1260 		}
   1261 	}
   1262 
   1263 	if (g_gflag && g_recsize <= LS_STACK(0))
   1264 		fail(0, "'-g' requires at least '-s 1' data gathering");
   1265 
   1266 	/*
   1267 	 * Make sure the alignment is reasonable
   1268 	 */
   1269 	g_recsize = -(-g_recsize & -sizeof (uint64_t));
   1270 
   1271 	for (i = 0; i < LS_MAX_EVENTS; i++) {
   1272 		/*
   1273 		 * If no events were specified, enable -C.
   1274 		 */
   1275 		if (!events_specified && g_event_info[i].ev_type == 'C')
   1276 			g_enabled[i] = 1;
   1277 	}
   1278 
   1279 	for (i = 0; i < LS_MAX_EVENTS; i++) {
   1280 		if (!g_enabled[i])
   1281 			continue;
   1282 
   1283 		if (g_event_info[i].ev_acquire != NULL) {
   1284 			/*
   1285 			 * If we've enabled a hold event, we must explicitly
   1286 			 * allocate dynamic variable space.
   1287 			 */
   1288 			dynvar = 1;
   1289 		}
   1290 
   1291 		dprog_addevent(i);
   1292 	}
   1293 
   1294 	/*
   1295 	 * Make sure there are remaining arguments to specify a child command
   1296 	 * to execute.
   1297 	 */
   1298 	if (argc <= optind)
   1299 		usage();
   1300 
   1301 	if ((ncpus = sysconf(_SC_NPROCESSORS_ONLN)) == -1)
   1302 		dfail("couldn't determine number of online CPUs");
   1303 
   1304 	/*
   1305 	 * By default, we set our data buffer size to be the number of records
   1306 	 * multiplied by the size of the record, doubled to account for some
   1307 	 * DTrace slop and divided by the number of CPUs.  We silently clamp
   1308 	 * the aggregation size at both a minimum and a maximum to prevent
   1309 	 * absurdly low or high values.
   1310 	 */
   1311 	if ((aggsize = (g_nrecs * g_recsize * 2) / ncpus) < MIN_AGGSIZE)
   1312 		aggsize = MIN_AGGSIZE;
   1313 
   1314 	if (aggsize > MAX_AGGSIZE)
   1315 		aggsize = MAX_AGGSIZE;
   1316 
   1317 	(void) sprintf(aggstr, "%lld", (long long)aggsize);
   1318 
   1319 	if (!g_tracing) {
   1320 		if (dtrace_setopt(g_dtp, "bufsize", "4k") == -1)
   1321 			dfail("failed to set 'bufsize'");
   1322 
   1323 		if (dtrace_setopt(g_dtp, "aggsize", aggstr) == -1)
   1324 			dfail("failed to set 'aggsize'");
   1325 
   1326 		if (dynvar) {
   1327 			/*
   1328 			 * If we're using dynamic variables, we set our
   1329 			 * dynamic variable size to be one megabyte per CPU,
   1330 			 * with a hard-limit of 32 megabytes.  This may still
   1331 			 * be too small in some cases, but it can be tuned
   1332 			 * manually via -x if need be.
   1333 			 */
   1334 			(void) sprintf(aggstr, "%ldm", ncpus < 32 ? ncpus : 32);
   1335 
   1336 			if (dtrace_setopt(g_dtp, "dynvarsize", aggstr) == -1)
   1337 				dfail("failed to set 'dynvarsize'");
   1338 		}
   1339 	} else {
   1340 		if (dtrace_setopt(g_dtp, "bufsize", aggstr) == -1)
   1341 			dfail("failed to set 'bufsize'");
   1342 	}
   1343 
   1344 	if (dtrace_setopt(g_dtp, "statusrate", "10sec") == -1)
   1345 		dfail("failed to set 'statusrate'");
   1346 
   1347 	optind = 1;
   1348 	while ((c = getopt(argc, argv, LOCKSTAT_OPTSTR)) != EOF) {
   1349 		switch (c) {
   1350 		case 'x':
   1351 			if ((p = strchr(optarg, '=')) != NULL)
   1352 				*p++ = '\0';
   1353 
   1354 			if (dtrace_setopt(g_dtp, optarg, p) != 0)
   1355 				dfail("failed to set -x %s", optarg);
   1356 			break;
   1357 		}
   1358 	}
   1359 
   1360 	argc -= optind;
   1361 	argv += optind;
   1362 
   1363 	dprog_compile();
   1364 	status_init();
   1365 
   1366 	g_elapsed = -gethrtime();
   1367 
   1368 	/*
   1369 	 * Spawn the specified command and wait for it to complete.
   1370 	 */
   1371 	child = fork();
   1372 	if (child == -1)
   1373 		fail(1, "cannot fork");
   1374 	if (child == 0) {
   1375 		(void) dtrace_close(g_dtp);
   1376 		(void) execvp(argv[0], &argv[0]);
   1377 		exec_errno = errno;
   1378 		exit(127);
   1379 	}
   1380 
   1381 	while (waitpid(child, &status, WEXITED) != child)
   1382 		status_check();
   1383 
   1384 	g_elapsed += gethrtime();
   1385 
   1386 	if (WIFEXITED(status)) {
   1387 		if (WEXITSTATUS(status) != 0) {
   1388 			if (exec_errno != 0) {
   1389 				errno = exec_errno;
   1390 				fail(1, "could not execute %s", argv[0]);
   1391 			}
   1392 			(void) fprintf(stderr,
   1393 			    "lockstat: warning: %s exited with code %d\n",
   1394 			    argv[0], WEXITSTATUS(status));
   1395 		}
   1396 	} else {
   1397 		(void) fprintf(stderr,
   1398 		    "lockstat: warning: %s died on signal %d\n",
   1399 		    argv[0], WTERMSIG(status));
   1400 	}
   1401 
   1402 	if (dtrace_stop(g_dtp) == -1)
   1403 		dfail("failed to stop dtrace");
   1404 
   1405 	/*
   1406 	 * Before we read out the results, we need to allocate our buffer.
   1407 	 * If we're tracing, then we'll just use the precalculated size.  If
   1408 	 * we're not, then we'll take a snapshot of the aggregate, and walk
   1409 	 * it to count the number of records.
   1410 	 */
   1411 	if (!g_tracing) {
   1412 		if (dtrace_aggregate_snap(g_dtp) != 0)
   1413 			dfail("failed to snap aggregate");
   1414 
   1415 		g_nrecs = 0;
   1416 
   1417 		if (dtrace_aggregate_walk(g_dtp,
   1418 		    count_aggregate, &g_nrecs) != 0)
   1419 			dfail("failed to walk aggregate");
   1420 	}
   1421 
   1422 	if ((data_buf = memalign(sizeof (uint64_t),
   1423 	    (g_nrecs + 1) * g_recsize)) == NULL)
   1424 		fail(1, "Memory allocation failed");
   1425 
   1426 	/*
   1427 	 * Read out the DTrace data.
   1428 	 */
   1429 	g_nrecs_used = process_data(out, data_buf);
   1430 
   1431 	if (g_nrecs_used > g_nrecs || g_dropped)
   1432 		(void) fprintf(stderr, "lockstat: warning: "
   1433 		    "ran out of data records (use -n for more)\n");
   1434 
   1435 	/* LINTED - alignment */
   1436 	for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
   1437 	    /* LINTED - alignment */
   1438 	    lsp = (lsrec_t *)((char *)lsp + g_recsize)) {
   1439 		ev_count[lsp->ls_event] += lsp->ls_count;
   1440 		ev_time[lsp->ls_event] += lsp->ls_time;
   1441 	}
   1442 
   1443 	/*
   1444 	 * If -g was specified, convert stacks into individual records.
   1445 	 */
   1446 	if (g_gflag) {
   1447 		lsrec_t *newlsp, *oldlsp;
   1448 
   1449 		newlsp = memalign(sizeof (uint64_t),
   1450 		    g_nrecs_used * LS_TIME * (g_stkdepth + 1));
   1451 		if (newlsp == NULL)
   1452 			fail(1, "Cannot allocate space for -g processing");
   1453 		lsp = newlsp;
   1454 		/* LINTED - alignment */
   1455 		for (i = 0, oldlsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
   1456 		    /* LINTED - alignment */
   1457 		    oldlsp = (lsrec_t *)((char *)oldlsp + g_recsize)) {
   1458 			int fr;
   1459 			int caller_in_stack = 0;
   1460 
   1461 			if (oldlsp->ls_count == 0)
   1462 				continue;
   1463 
   1464 			for (fr = 0; fr < g_stkdepth; fr++) {
   1465 				if (oldlsp->ls_stack[fr] == 0)
   1466 					break;
   1467 				if (oldlsp->ls_stack[fr] == oldlsp->ls_caller)
   1468 					caller_in_stack = 1;
   1469 				bcopy(oldlsp, lsp, LS_TIME);
   1470 				lsp->ls_caller = oldlsp->ls_stack[fr];
   1471 				/* LINTED - alignment */
   1472 				lsp = (lsrec_t *)((char *)lsp + LS_TIME);
   1473 			}
   1474 			if (!caller_in_stack) {
   1475 				bcopy(oldlsp, lsp, LS_TIME);
   1476 				/* LINTED - alignment */
   1477 				lsp = (lsrec_t *)((char *)lsp + LS_TIME);
   1478 			}
   1479 		}
   1480 		g_nrecs = g_nrecs_used =
   1481 		    ((uintptr_t)lsp - (uintptr_t)newlsp) / LS_TIME;
   1482 		g_recsize = LS_TIME;
   1483 		g_stkdepth = 0;
   1484 		free(data_buf);
   1485 		data_buf = (char *)newlsp;
   1486 	}
   1487 
   1488 	if ((sort_buf = calloc(2 * (g_nrecs + 1),
   1489 	    sizeof (void *))) == NULL)
   1490 		fail(1, "Sort buffer allocation failed");
   1491 	merge_buf = sort_buf + (g_nrecs + 1);
   1492 
   1493 	/*
   1494 	 * Build the sort buffer, discarding zero-count records along the way.
   1495 	 */
   1496 	/* LINTED - alignment */
   1497 	for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
   1498 	    /* LINTED - alignment */
   1499 	    lsp = (lsrec_t *)((char *)lsp + g_recsize)) {
   1500 		if (lsp->ls_count == 0)
   1501 			lsp->ls_event = LS_MAX_EVENTS;
   1502 		sort_buf[i] = lsp;
   1503 	}
   1504 
   1505 	if (g_nrecs_used == 0)
   1506 		exit(0);
   1507 
   1508 	/*
   1509 	 * Add a sentinel after the last record
   1510 	 */
   1511 	sort_buf[i] = lsp;
   1512 	lsp->ls_event = LS_MAX_EVENTS;
   1513 
   1514 	if (g_tracing) {
   1515 		report_trace(out, sort_buf);
   1516 		return (0);
   1517 	}
   1518 
   1519 	/*
   1520 	 * Application of -g may have resulted in multiple records
   1521 	 * with the same signature; coalesce them.
   1522 	 */
   1523 	if (g_gflag) {
   1524 		mergesort(lockcmp, sort_buf, merge_buf, g_nrecs_used);
   1525 		coalesce(lockcmp, sort_buf, g_nrecs_used);
   1526 	}
   1527 
   1528 	/*
   1529 	 * Coalesce locks within the same symbol if -c option specified.
   1530 	 * Coalesce PCs within the same function if -k option specified.
   1531 	 */
   1532 	if (g_cflag || g_kflag) {
   1533 		for (i = 0; i < g_nrecs_used; i++) {
   1534 			int fr;
   1535 			lsp = sort_buf[i];
   1536 			if (g_cflag)
   1537 				coalesce_symbol(&lsp->ls_lock);
   1538 			if (g_kflag) {
   1539 				for (fr = 0; fr < g_stkdepth; fr++)
   1540 					coalesce_symbol(&lsp->ls_stack[fr]);
   1541 				coalesce_symbol(&lsp->ls_caller);
   1542 			}
   1543 		}
   1544 		mergesort(lockcmp, sort_buf, merge_buf, g_nrecs_used);
   1545 		coalesce(lockcmp, sort_buf, g_nrecs_used);
   1546 	}
   1547 
   1548 	/*
   1549 	 * Coalesce callers if -w option specified
   1550 	 */
   1551 	if (g_wflag) {
   1552 		mergesort(lock_and_count_cmp_anywhere,
   1553 		    sort_buf, merge_buf, g_nrecs_used);
   1554 		coalesce(lockcmp_anywhere, sort_buf, g_nrecs_used);
   1555 	}
   1556 
   1557 	/*
   1558 	 * Coalesce locks if -W option specified
   1559 	 */
   1560 	if (g_Wflag) {
   1561 		mergesort(site_and_count_cmp_anylock,
   1562 		    sort_buf, merge_buf, g_nrecs_used);
   1563 		coalesce(sitecmp_anylock, sort_buf, g_nrecs_used);
   1564 	}
   1565 
   1566 	/*
   1567 	 * Sort data by contention count (ls_count) or total time (ls_time),
   1568 	 * depending on g_Pflag.  Override g_Pflag if time wasn't measured.
   1569 	 */
   1570 	if (g_recsize < LS_TIME)
   1571 		g_Pflag = 0;
   1572 
   1573 	if (g_Pflag)
   1574 		mergesort(timecmp, sort_buf, merge_buf, g_nrecs_used);
   1575 	else
   1576 		mergesort(countcmp, sort_buf, merge_buf, g_nrecs_used);
   1577 
   1578 	/*
   1579 	 * Display data by event type
   1580 	 */
   1581 	first = &sort_buf[0];
   1582 	while ((event = (*first)->ls_event) < LS_MAX_EVENTS) {
   1583 		current = first;
   1584 		while ((lsp = *current)->ls_event == event)
   1585 			current++;
   1586 		report_stats(out, first, current - first, ev_count[event],
   1587 		    ev_time[event]);
   1588 		first = current;
   1589 	}
   1590 
   1591 	return (0);
   1592 }
   1593 
   1594 static char *
   1595 format_symbol(char *buf, uintptr_t addr, int show_size)
   1596 {
   1597 	uintptr_t symoff;
   1598 	char *symname;
   1599 	size_t symsize;
   1600 
   1601 	symname = addr_to_sym(addr, &symoff, &symsize);
   1602 
   1603 	if (show_size && symoff == 0)
   1604 		(void) sprintf(buf, "%s[%ld]", symname, (long)symsize);
   1605 	else if (symoff == 0)
   1606 		(void) sprintf(buf, "%s", symname);
   1607 	else if (symoff < 16 && bcmp(symname, "cpu[", 4) == 0)	/* CPU+PIL */
   1608 		(void) sprintf(buf, "%s+%ld", symname, (long)symoff);
   1609 	else if (symoff <= symsize || (symoff < 256 && addr != symoff))
   1610 		(void) sprintf(buf, "%s+0x%llx", symname,
   1611 		    (unsigned long long)symoff);
   1612 	else
   1613 		(void) sprintf(buf, "0x%llx", (unsigned long long)addr);
   1614 	return (buf);
   1615 }
   1616 
   1617 static void
   1618 report_stats(FILE *out, lsrec_t **sort_buf, size_t nrecs, uint64_t total_count,
   1619 	uint64_t total_time)
   1620 {
   1621 	uint32_t event = sort_buf[0]->ls_event;
   1622 	lsrec_t *lsp;
   1623 	double ptotal = 0.0;
   1624 	double percent;
   1625 	int i, j, fr;
   1626 	int displayed;
   1627 	int first_bin, last_bin, max_bin_count, total_bin_count;
   1628 	int rectype;
   1629 	char buf[256];
   1630 	char lhdr[80], chdr[80];
   1631 
   1632 	rectype = g_recsize;
   1633 
   1634 	if (g_topn == 0) {
   1635 		(void) fprintf(out, "%20llu %s\n",
   1636 		    g_rates == 0 ? total_count :
   1637 		    ((unsigned long long)total_count * NANOSEC) / g_elapsed,
   1638 		    g_event_info[event].ev_desc);
   1639 		return;
   1640 	}
   1641 
   1642 	(void) sprintf(lhdr, "%s%s",
   1643 	    g_Wflag ? "Hottest " : "", g_event_info[event].ev_lhdr);
   1644 	(void) sprintf(chdr, "%s%s",
   1645 	    g_wflag ? "Hottest " : "", "Caller");
   1646 
   1647 	if (!g_pflag)
   1648 		(void) fprintf(out,
   1649 		    "\n%s: %.0f events in %.3f seconds (%.0f events/sec)\n\n",
   1650 		    g_event_info[event].ev_desc, (double)total_count,
   1651 		    (double)g_elapsed / NANOSEC,
   1652 		    (double)total_count * NANOSEC / g_elapsed);
   1653 
   1654 	if (!g_pflag && rectype < LS_HIST) {
   1655 		(void) sprintf(buf, "%s", g_event_info[event].ev_units);
   1656 		(void) fprintf(out, "%5s %4s %4s %4s %8s %-22s %-24s\n",
   1657 		    g_rates ? "ops/s" : "Count",
   1658 		    g_gflag ? "genr" : "indv",
   1659 		    "cuml", "rcnt", rectype >= LS_TIME ? buf : "", lhdr, chdr);
   1660 		(void) fprintf(out, "---------------------------------"
   1661 		    "----------------------------------------------\n");
   1662 	}
   1663 
   1664 	displayed = 0;
   1665 	for (i = 0; i < nrecs; i++) {
   1666 		lsp = sort_buf[i];
   1667 
   1668 		if (displayed++ >= g_topn)
   1669 			break;
   1670 
   1671 		if (g_pflag) {
   1672 			int j;
   1673 
   1674 			(void) fprintf(out, "%u %u",
   1675 			    lsp->ls_event, lsp->ls_count);
   1676 			(void) fprintf(out, " %s",
   1677 			    format_symbol(buf, lsp->ls_lock, g_cflag));
   1678 			(void) fprintf(out, " %s",
   1679 			    format_symbol(buf, lsp->ls_caller, 0));
   1680 			(void) fprintf(out, " %f",
   1681 			    (double)lsp->ls_refcnt / lsp->ls_count);
   1682 			if (rectype >= LS_TIME)
   1683 				(void) fprintf(out, " %llu",
   1684 				    (unsigned long long)lsp->ls_time);
   1685 			if (rectype >= LS_HIST) {
   1686 				for (j = 0; j < 64; j++)
   1687 					(void) fprintf(out, " %u",
   1688 					    lsp->ls_hist[j]);
   1689 			}
   1690 			for (j = 0; j < LS_MAX_STACK_DEPTH; j++) {
   1691 				if (rectype <= LS_STACK(j) ||
   1692 				    lsp->ls_stack[j] == 0)
   1693 					break;
   1694 				(void) fprintf(out, " %s",
   1695 				    format_symbol(buf, lsp->ls_stack[j], 0));
   1696 			}
   1697 			(void) fprintf(out, "\n");
   1698 			continue;
   1699 		}
   1700 
   1701 		if (rectype >= LS_HIST) {
   1702 			(void) fprintf(out, "---------------------------------"
   1703 			    "----------------------------------------------\n");
   1704 			(void) sprintf(buf, "%s",
   1705 			    g_event_info[event].ev_units);
   1706 			(void) fprintf(out, "%5s %4s %4s %4s %8s %-22s %-24s\n",
   1707 			    g_rates ? "ops/s" : "Count",
   1708 			    g_gflag ? "genr" : "indv",
   1709 			    "cuml", "rcnt", buf, lhdr, chdr);
   1710 		}
   1711 
   1712 		if (g_Pflag && total_time != 0)
   1713 			percent = (lsp->ls_time * 100.00) / total_time;
   1714 		else
   1715 			percent = (lsp->ls_count * 100.00) / total_count;
   1716 
   1717 		ptotal += percent;
   1718 
   1719 		if (rectype >= LS_TIME)
   1720 			(void) sprintf(buf, "%llu",
   1721 			    (unsigned long long)(lsp->ls_time / lsp->ls_count));
   1722 		else
   1723 			buf[0] = '\0';
   1724 
   1725 		(void) fprintf(out, "%5llu ",
   1726 		    g_rates == 0 ? lsp->ls_count :
   1727 		    ((uint64_t)lsp->ls_count * NANOSEC) / g_elapsed);
   1728 
   1729 		(void) fprintf(out, "%3.0f%% ", percent);
   1730 
   1731 		if (g_gflag)
   1732 			(void) fprintf(out, "---- ");
   1733 		else
   1734 			(void) fprintf(out, "%3.0f%% ", ptotal);
   1735 
   1736 		(void) fprintf(out, "%4.2f %8s ",
   1737 		    (double)lsp->ls_refcnt / lsp->ls_count, buf);
   1738 
   1739 		(void) fprintf(out, "%-22s ",
   1740 		    format_symbol(buf, lsp->ls_lock, g_cflag));
   1741 
   1742 		(void) fprintf(out, "%-24s\n",
   1743 		    format_symbol(buf, lsp->ls_caller, 0));
   1744 
   1745 		if (rectype < LS_HIST)
   1746 			continue;
   1747 
   1748 		(void) fprintf(out, "\n");
   1749 		(void) fprintf(out, "%10s %31s %-9s %-24s\n",
   1750 		    g_event_info[event].ev_units,
   1751 		    "------ Time Distribution ------",
   1752 		    g_rates ? "ops/s" : "count",
   1753 		    rectype > LS_STACK(0) ? "Stack" : "");
   1754 
   1755 		first_bin = 0;
   1756 		while (lsp->ls_hist[first_bin] == 0)
   1757 			first_bin++;
   1758 
   1759 		last_bin = 63;
   1760 		while (lsp->ls_hist[last_bin] == 0)
   1761 			last_bin--;
   1762 
   1763 		max_bin_count = 0;
   1764 		total_bin_count = 0;
   1765 		for (j = first_bin; j <= last_bin; j++) {
   1766 			total_bin_count += lsp->ls_hist[j];
   1767 			if (lsp->ls_hist[j] > max_bin_count)
   1768 				max_bin_count = lsp->ls_hist[j];
   1769 		}
   1770 
   1771 		/*
   1772 		 * If we went a few frames below the caller, ignore them
   1773 		 */
   1774 		for (fr = 3; fr > 0; fr--)
   1775 			if (lsp->ls_stack[fr] == lsp->ls_caller)
   1776 				break;
   1777 
   1778 		for (j = first_bin; j <= last_bin; j++) {
   1779 			uint_t depth = (lsp->ls_hist[j] * 30) / total_bin_count;
   1780 			(void) fprintf(out, "%10llu |%s%s %-9u ",
   1781 			    1ULL << j,
   1782 			    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + 30 - depth,
   1783 			    "                              " + depth,
   1784 			    g_rates == 0 ? lsp->ls_hist[j] :
   1785 			    (uint_t)(((uint64_t)lsp->ls_hist[j] * NANOSEC) /
   1786 			    g_elapsed));
   1787 			if (rectype <= LS_STACK(fr) || lsp->ls_stack[fr] == 0) {
   1788 				(void) fprintf(out, "\n");
   1789 				continue;
   1790 			}
   1791 			(void) fprintf(out, "%-24s\n",
   1792 			    format_symbol(buf, lsp->ls_stack[fr], 0));
   1793 			fr++;
   1794 		}
   1795 		while (rectype > LS_STACK(fr) && lsp->ls_stack[fr] != 0) {
   1796 			(void) fprintf(out, "%15s %-36s %-24s\n", "", "",
   1797 			    format_symbol(buf, lsp->ls_stack[fr], 0));
   1798 			fr++;
   1799 		}
   1800 	}
   1801 
   1802 	if (!g_pflag)
   1803 		(void) fprintf(out, "---------------------------------"
   1804 		    "----------------------------------------------\n");
   1805 
   1806 	(void) fflush(out);
   1807 }
   1808 
   1809 static void
   1810 report_trace(FILE *out, lsrec_t **sort_buf)
   1811 {
   1812 	lsrec_t *lsp;
   1813 	int i, fr;
   1814 	int rectype;
   1815 	char buf[256], buf2[256];
   1816 
   1817 	rectype = g_recsize;
   1818 
   1819 	if (!g_pflag) {
   1820 		(void) fprintf(out, "%5s  %7s  %11s  %-24s  %-24s\n",
   1821 		    "Event", "Time", "Owner", "Lock", "Caller");
   1822 		(void) fprintf(out, "---------------------------------"
   1823 		    "----------------------------------------------\n");
   1824 	}
   1825 
   1826 	for (i = 0; i < g_nrecs_used; i++) {
   1827 
   1828 		lsp = sort_buf[i];
   1829 
   1830 		if (lsp->ls_event >= LS_MAX_EVENTS || lsp->ls_count == 0)
   1831 			continue;
   1832 
   1833 		(void) fprintf(out, "%2d  %10llu  %11p  %-24s  %-24s\n",
   1834 		    lsp->ls_event, (unsigned long long)lsp->ls_time,
   1835 		    (void *)lsp->ls_next,
   1836 		    format_symbol(buf, lsp->ls_lock, 0),
   1837 		    format_symbol(buf2, lsp->ls_caller, 0));
   1838 
   1839 		if (rectype <= LS_STACK(0))
   1840 			continue;
   1841 
   1842 		/*
   1843 		 * If we went a few frames below the caller, ignore them
   1844 		 */
   1845 		for (fr = 3; fr > 0; fr--)
   1846 			if (lsp->ls_stack[fr] == lsp->ls_caller)
   1847 				break;
   1848 
   1849 		while (rectype > LS_STACK(fr) && lsp->ls_stack[fr] != 0) {
   1850 			(void) fprintf(out, "%53s  %-24s\n", "",
   1851 			    format_symbol(buf, lsp->ls_stack[fr], 0));
   1852 			fr++;
   1853 		}
   1854 		(void) fprintf(out, "\n");
   1855 	}
   1856 
   1857 	(void) fflush(out);
   1858 }
   1859