Home | History | Annotate | Download | only in libmicro
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms
      5  * of the Common Development and Distribution License
      6  * (the "License").  You may not use this file except
      7  * in compliance with the License.
      8  *
      9  * You can obtain a copy of the license at
     10  * src/OPENSOLARIS.LICENSE
     11  * or http://www.opensolaris.org/os/licensing.
     12  * See the License for the specific language governing
     13  * permissions and limitations under the License.
     14  *
     15  * When distributing Covered Code, include this CDDL
     16  * HEADER in each file and include the License file at
     17  * usr/src/OPENSOLARIS.LICENSE.  If applicable,
     18  * add the following below this CDDL HEADER, with the
     19  * fields enclosed by brackets "[]" replaced with your
     20  * own identifying information: Portions Copyright [yyyy]
     21  * [name of copyright owner]
     22  *
     23  * CDDL HEADER END
     24  */
     25 
     26 /*
     27  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     28  * Use is subject to license terms.
     29  */
     30 
     31 /*
     32  * benchmarking routines
     33  */
     34 
     35 #include <sys/types.h>
     36 #include <sys/time.h>
     37 #include <sys/ipc.h>
     38 #include <sys/sem.h>
     39 #include <sys/mman.h>
     40 #include <sys/wait.h>
     41 #include <ctype.h>
     42 #include <string.h>
     43 #include <strings.h>
     44 #include <signal.h>
     45 #include <stdio.h>
     46 #include <unistd.h>
     47 #include <stdlib.h>
     48 #include <poll.h>
     49 #include <pthread.h>
     50 #include <dlfcn.h>
     51 #include <errno.h>
     52 #include <sys/resource.h>
     53 #include <math.h>
     54 #include <limits.h>
     55 
     56 #ifdef	__sun
     57 #include <sys/elf.h>
     58 #endif
     59 
     60 #include "libmicro.h"
     61 
     62 
     63 /*
     64  * user visible globals
     65  */
     66 
     67 int				lm_argc = 0;
     68 char **				lm_argv = NULL;
     69 
     70 int				lm_opt1;
     71 int				lm_optA;
     72 int				lm_optB;
     73 int				lm_optC = 100;
     74 int				lm_optD;
     75 int				lm_optE;
     76 int				lm_optH;
     77 int				lm_optI;
     78 int				lm_optL = 0;
     79 int				lm_optM = 0;
     80 char				*lm_optN;
     81 int				lm_optP;
     82 int				lm_optS;
     83 int				lm_optT;
     84 int				lm_optW;
     85 
     86 int				lm_def1 = 0;
     87 int				lm_defB = 0; /* use lm_nsecs_per_op */
     88 int				lm_defD = 10;
     89 int				lm_defH = 0;
     90 char				*lm_defN = NULL;
     91 int				lm_defP = 1;
     92 
     93 int				lm_defS = 0;
     94 int				lm_defT = 1;
     95 
     96 /*
     97  * default on fast platform, should be overridden by individual
     98  * benchmarks if significantly wrong in either direction.
     99  */
    100 
    101 int				lm_nsecs_per_op = 5;
    102 
    103 char				*lm_procpath;
    104 char				lm_procname[STRSIZE];
    105 char				lm_usage[STRSIZE];
    106 char				lm_optstr[STRSIZE];
    107 char				lm_header[STRSIZE];
    108 size_t				lm_tsdsize = 0;
    109 
    110 
    111 /*
    112  *  Globals we do not export to the user
    113  */
    114 
    115 static barrier_t		*lm_barrier;
    116 static pid_t			*pids = NULL;
    117 static pthread_t		*tids = NULL;
    118 static int			pindex = -1;
    119 static void			*tsdseg = NULL;
    120 static size_t			tsdsize = 0;
    121 
    122 #ifdef USE_RDTSC
    123 static long long		lm_hz = 0;
    124 #endif
    125 
    126 
    127 /*
    128  * Forward references
    129  */
    130 
    131 static void 		worker_process();
    132 static void 		usage();
    133 static void 		print_stats(barrier_t *);
    134 static void 		print_histo(barrier_t *);
    135 static int 		remove_outliers(double *, int, stats_t *);
    136 static long long	nsecs_overhead;
    137 static long long	nsecs_resolution;
    138 static long long	get_nsecs_overhead();
    139 static int		crunch_stats(double *, int, stats_t *);
    140 static void 		compute_stats(barrier_t *);
    141 /*
    142  * main routine; renamed in this file to allow linking with other
    143  * files
    144  */
    145 
    146 int
    147 actual_main(int argc, char *argv[])
    148 {
    149 	int			i;
    150 	int			opt;
    151 	extern char		*optarg;
    152 	char			*tmp;
    153 	char			optstr[256];
    154 	barrier_t		*b;
    155 	long long		startnsecs;
    156 
    157 #ifdef USE_RDTSC
    158 	if (getenv("LIBMICRO_HZ") == NULL) {
    159 		(void) printf("LIBMICRO_HZ needed but not set\n");
    160 		exit(1);
    161 	}
    162 	lm_hz = strtoll(getenv("LIBMICRO_HZ"), NULL, 10);
    163 #endif
    164 
    165 	startnsecs = getnsecs();
    166 
    167 	lm_argc = argc;
    168 	lm_argv = argv;
    169 
    170 	/* before we do anything */
    171 	(void) benchmark_init();
    172 
    173 
    174 	nsecs_overhead = get_nsecs_overhead();
    175 	nsecs_resolution = get_nsecs_resolution();
    176 
    177 	/*
    178 	 * Set defaults
    179 	 */
    180 
    181 	lm_opt1	= lm_def1;
    182 	lm_optB	= lm_defB;
    183 	lm_optD	= lm_defD;
    184 	lm_optH	= lm_defH;
    185 	lm_optN	= lm_defN;
    186 	lm_optP	= lm_defP;
    187 
    188 	lm_optS	= lm_defS;
    189 	lm_optT	= lm_defT;
    190 
    191 	/*
    192 	 * squirrel away the path to the current
    193 	 * binary in a way that works on both
    194 	 * Linux and Solaris
    195 	 */
    196 
    197 	if (*argv[0] == '/') {
    198 		lm_procpath = strdup(argv[0]);
    199 		*strrchr(lm_procpath, '/') = 0;
    200 	} else {
    201 		char path[1024];
    202 		(void) getcwd(path, 1024);
    203 		(void) strcat(path, "/");
    204 		(void) strcat(path, argv[0]);
    205 		*strrchr(path, '/') = 0;
    206 		lm_procpath = strdup(path);
    207 	}
    208 
    209 	/*
    210 	 * name of binary
    211 	 */
    212 
    213 	if ((tmp = strrchr(argv[0], '/')) == NULL)
    214 		(void) strcpy(lm_procname, argv[0]);
    215 	else
    216 		(void) strcpy(lm_procname, tmp + 1);
    217 
    218 	if (lm_optN == NULL) {
    219 		lm_optN = lm_procname;
    220 	}
    221 
    222 	/*
    223 	 * Parse command line arguments
    224 	 */
    225 
    226 	(void) sprintf(optstr, "1AB:C:D:EHI:LMN:P:RST:VW?%s", lm_optstr);
    227 	while ((opt = getopt(argc, argv, optstr)) != -1) {
    228 		switch (opt) {
    229 		case '1':
    230 			lm_opt1 = 1;
    231 			break;
    232 		case 'A':
    233 			lm_optA = 1;
    234 			break;
    235 		case 'B':
    236 			lm_optB = sizetoint(optarg);
    237 			break;
    238 		case 'C':
    239 			lm_optC = sizetoint(optarg);
    240 			break;
    241 		case 'D':
    242 			lm_optD = sizetoint(optarg);
    243 			break;
    244 		case 'E':
    245 			lm_optE = 1;
    246 			break;
    247 		case 'H':
    248 			lm_optH = 1;
    249 			break;
    250 		case 'I':
    251 			lm_optI = sizetoint(optarg);
    252 			break;
    253 		case 'L':
    254 			lm_optL = 1;
    255 			break;
    256 		case 'M':
    257 			lm_optM = 1;
    258 			break;
    259 		case 'N':
    260 			lm_optN = optarg;
    261 			break;
    262 		case 'P':
    263 			lm_optP = sizetoint(optarg);
    264 			break;
    265 		case 'S':
    266 			lm_optS = 1;
    267 			break;
    268 		case 'T':
    269 			lm_optT = sizetoint(optarg);
    270 			break;
    271 		case 'V':
    272 			(void) printf("%s\n", LIBMICRO_VERSION);
    273 			exit(0);
    274 			break;
    275 		case 'W':
    276 			lm_optW = 1;
    277 			lm_optS = 1;
    278 			break;
    279 		case '?':
    280 			usage();
    281 			exit(0);
    282 			break;
    283 		default:
    284 			if (benchmark_optswitch(opt, optarg) == -1) {
    285 				usage();
    286 				exit(0);
    287 			}
    288 		}
    289 	}
    290 
    291 	/* deal with implicit and overriding options */
    292 	if (lm_opt1 && lm_optP > 1) {
    293 		lm_optP = 1;
    294 		(void) printf("warning: -1 overrides -P\n");
    295 	}
    296 
    297 	if (lm_optE) {
    298 		(void) fprintf(stderr, "Running:%20s", lm_optN);
    299 		(void) fflush(stderr);
    300 	}
    301 
    302 	if (lm_optB == 0) {
    303 		/*
    304 		 * neither benchmark or user has specified the number
    305 		 * of cnts/sample, so use computed value
    306 		 */
    307 		if (lm_optI)
    308 			lm_nsecs_per_op = lm_optI;
    309 
    310 		lm_optB = nsecs_resolution * 100 / lm_nsecs_per_op;
    311 		if (lm_optB == 0)
    312 			lm_optB = 1;
    313 	}
    314 
    315 	/*
    316 	 * now that the options are set
    317 	 */
    318 
    319 	if (benchmark_initrun() == -1) {
    320 		exit(1);
    321 	}
    322 
    323 	/* allocate dynamic data */
    324 	pids = (pid_t *)malloc(lm_optP * sizeof (pid_t));
    325 	if (pids == NULL) {
    326 		perror("malloc(pids)");
    327 		exit(1);
    328 	}
    329 	tids = (pthread_t *)malloc(lm_optT * sizeof (pthread_t));
    330 	if (tids == NULL) {
    331 		perror("malloc(tids)");
    332 		exit(1);
    333 	}
    334 
    335 	/* check that the case defines lm_tsdsize before proceeding */
    336 	if (lm_tsdsize == (size_t)-1) {
    337 		(void) fprintf(stderr, "error in benchmark_init: "
    338 		    "lm_tsdsize not set\n");
    339 		exit(1);
    340 	}
    341 
    342 	/* round up tsdsize to nearest 128 to eliminate false sharing */
    343 	tsdsize = ((lm_tsdsize + 127) / 128) * 128;
    344 
    345 	/* allocate sufficient TSD for each thread in each process */
    346 	tsdseg = (void *)mmap(NULL, lm_optT * lm_optP * tsdsize + 8192,
    347 	    PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0L);
    348 	if (tsdseg == NULL) {
    349 		perror("mmap(tsd)");
    350 		exit(1);
    351 	}
    352 
    353 	/* initialise worker synchronisation */
    354 	b = barrier_create(lm_optT * lm_optP, DATASIZE);
    355 	if (b == NULL) {
    356 		perror("barrier_create()");
    357 		exit(1);
    358 	}
    359 	lm_barrier = b;
    360 	b->ba_flag = 1;
    361 
    362 	/* need this here so that parent and children can call exit() */
    363 	(void) fflush(stdout);
    364 	(void) fflush(stderr);
    365 
    366 	/* when we started and when to stop */
    367 
    368 	b->ba_starttime = getnsecs();
    369 	b->ba_deadline = (long long) (b->ba_starttime + (lm_optD * 1000000LL));
    370 
    371 	/* do the work */
    372 	if (lm_opt1) {
    373 		/* single process, non-fork mode */
    374 		pindex = 0;
    375 		worker_process();
    376 	} else {
    377 		/* create worker processes */
    378 		for (i = 0; i < lm_optP; i++) {
    379 			pids[i] = fork();
    380 
    381 			switch (pids[i]) {
    382 			case 0:
    383 				pindex = i;
    384 				worker_process();
    385 				exit(0);
    386 				break;
    387 			case -1:
    388 				perror("fork");
    389 				exit(1);
    390 				break;
    391 			default:
    392 				continue;
    393 			}
    394 		}
    395 
    396 		/* wait for worker processes */
    397 		for (i = 0; i < lm_optP; i++) {
    398 			if (pids[i] > 0) {
    399 				(void) waitpid(pids[i], NULL, 0);
    400 			}
    401 		}
    402 	}
    403 
    404 	b->ba_endtime = getnsecs();
    405 
    406 	/* compute results */
    407 
    408 	compute_stats(b);
    409 
    410 	/* print arguments benchmark was invoked with ? */
    411 	if (lm_optL) {
    412 		int l;
    413 		(void) printf("# %s ", argv[0]);
    414 		for (l = 1; l < argc; l++) {
    415 			(void) printf("%s ", argv[l]);
    416 		}
    417 		(void) printf("\n");
    418 	}
    419 
    420 	/* print result header (unless suppressed) */
    421 	if (!lm_optH) {
    422 		(void) printf("%12s %3s %3s %12s %12s %8s %8s %s\n",
    423 		    "", "prc", "thr",
    424 		    "usecs/call",
    425 		    "samples", "errors", "cnt/samp", lm_header);
    426 	}
    427 
    428 	/* print result */
    429 
    430 	(void) printf("%-12s %3d %3d %12.5f %12d %8lld %8d %s\n",
    431 	    lm_optN, lm_optP, lm_optT,
    432 	    (lm_optM?b->ba_corrected.st_mean:b->ba_corrected.st_median),
    433 	    b->ba_batches, b->ba_errors, lm_optB,
    434 	    benchmark_result());
    435 
    436 	if (lm_optS) {
    437 		print_stats(b);
    438 	}
    439 
    440 	/* just incase something goes awry */
    441 	(void) fflush(stdout);
    442 	(void) fflush(stderr);
    443 
    444 	/* cleanup by stages */
    445 	(void) benchmark_finirun();
    446 	(void) barrier_destroy(b);
    447 	(void) benchmark_fini();
    448 
    449 	if (lm_optE) {
    450 		(void) fprintf(stderr, " for %12.5f seconds\n",
    451 		    (double)(getnsecs() - startnsecs) /
    452 		    1.e9);
    453 		(void) fflush(stderr);
    454 	}
    455 	return (0);
    456 }
    457 
    458 void *
    459 worker_thread(void *arg)
    460 {
    461 	result_t		r;
    462 	long long 		last_sleep = 0;
    463 	long long		t;
    464 
    465 	r.re_errors = benchmark_initworker(arg);
    466 
    467 	while (lm_barrier->ba_flag) {
    468 		r.re_count = 0;
    469 		r.re_errors += benchmark_initbatch(arg);
    470 
    471 		/* sync to clock */
    472 
    473 		if (lm_optA && ((t = getnsecs()) - last_sleep) > 75000000LL) {
    474 			(void) poll(0, 0, 10);
    475 			last_sleep = t;
    476 		}
    477 		/* wait for it ... */
    478 		(void) barrier_queue(lm_barrier, NULL);
    479 
    480 		/* time the test */
    481 		r.re_t0 = getnsecs();
    482 		(void) benchmark(arg, &r);
    483 		r.re_t1 = getnsecs();
    484 
    485 		/* time to stop? */
    486 		if (r.re_t1 > lm_barrier->ba_deadline &&
    487 		    (!lm_optC || lm_optC < lm_barrier->ba_batches)) {
    488 			lm_barrier->ba_flag = 0;
    489 		}
    490 
    491 		/* record results and sync */
    492 		(void) barrier_queue(lm_barrier, &r);
    493 
    494 		(void) benchmark_finibatch(arg);
    495 
    496 		r.re_errors = 0;
    497 	}
    498 
    499 	(void) benchmark_finiworker(arg);
    500 
    501 	return (0);
    502 }
    503 
    504 void
    505 worker_process()
    506 {
    507 	int			i;
    508 	void			*tsd;
    509 
    510 	for (i = 1; i < lm_optT; i++) {
    511 		tsd = gettsd(pindex, i);
    512 		if (pthread_create(&tids[i], NULL, worker_thread, tsd) != 0) {
    513 			perror("pthread_create");
    514 			exit(1);
    515 		}
    516 	}
    517 
    518 	tsd = gettsd(pindex, 0);
    519 	(void) worker_thread(tsd);
    520 
    521 	for (i = 1; i < lm_optT; i++) {
    522 		(void) pthread_join(tids[i], NULL);
    523 	}
    524 }
    525 
    526 void
    527 usage()
    528 {
    529 	(void) printf(
    530 	    "usage: %s\n"
    531 	    "       [-1] (single process; overrides -P > 1)\n"
    532 	    "       [-A] (align with clock)\n"
    533 	    "       [-B batch-size (default %d)]\n"
    534 	    "       [-C minimum number of samples (default 0)]\n"
    535 	    "       [-D duration in msecs (default %ds)]\n"
    536 	    "       [-E (echo name to stderr)]\n"
    537 	    "       [-H] (suppress headers)\n"
    538 	    "       [-I] nsecs per op (used to compute batch size)"
    539 	    "       [-L] (print argument line)\n"
    540 	    "       [-M] (reports mean rather than median)\n"
    541 	    "       [-N test-name (default '%s')]\n"
    542 	    "       [-P processes (default %d)]\n"
    543 	    "       [-S] (print detailed stats)\n"
    544 	    "       [-T threads (default %d)]\n"
    545 	    "       [-V] (print the libMicro version and exit)\n"
    546 	    "       [-W] (flag possible benchmark problems)\n"
    547 	    "%s\n",
    548 	    lm_procname,
    549 	    lm_defB, lm_defD, lm_procname, lm_defP, lm_defT,
    550 	    lm_usage);
    551 }
    552 
    553 void
    554 print_warnings(barrier_t *b)
    555 {
    556 	int head = 0;
    557 	int increase;
    558 
    559 	if (b->ba_quant) {
    560 		if (!head++) {
    561 			(void) printf("#\n# WARNINGS\n");
    562 		}
    563 		increase = (int)(floor((nsecs_resolution * 100.0) /
    564 		    ((double)lm_optB * b->ba_corrected.st_median * 1000.0)) +
    565 		    1.0);
    566 		(void) printf("#     Quantization error likely;"
    567 		    "increase batch size (-B option) %dX to avoid.\n",
    568 		    increase);
    569 	}
    570 
    571 	/*
    572 	 * XXX should warn on median != mean by a lot
    573 	 */
    574 
    575 	if (b->ba_errors) {
    576 		if (!head++) {
    577 			(void) printf("#\n# WARNINGS\n");
    578 		}
    579 		(void) printf("#     Errors occured during benchmark.\n");
    580 	}
    581 }
    582 
    583 void
    584 print_stats(barrier_t *b)
    585 {
    586 	(void) printf("#\n");
    587 	(void) printf("# STATISTICS         %12s          %12s\n",
    588 	    "usecs/call (raw)",
    589 	    "usecs/call (outliers removed)");
    590 
    591 	if (b->ba_count == 0) {
    592 		(void) printf("zero samples\n");
    593 		return;
    594 	}
    595 
    596 	(void) printf("#                    min %12.5f            %12.5f\n",
    597 	    b->ba_raw.st_min,
    598 	    b->ba_corrected.st_min);
    599 
    600 	(void) printf("#                    max %12.5f            %12.5f\n",
    601 	    b->ba_raw.st_max,
    602 	    b->ba_corrected.st_max);
    603 	(void) printf("#                   mean %12.5f            %12.5f\n",
    604 	    b->ba_raw.st_mean,
    605 	    b->ba_corrected.st_mean);
    606 	(void) printf("#                 median %12.5f            %12.5f\n",
    607 	    b->ba_raw.st_median,
    608 	    b->ba_corrected.st_median);
    609 	(void) printf("#                 stddev %12.5f            %12.5f\n",
    610 	    b->ba_raw.st_stddev,
    611 	    b->ba_corrected.st_stddev);
    612 	(void) printf("#         standard error %12.5f            %12.5f\n",
    613 	    b->ba_raw.st_stderr,
    614 	    b->ba_corrected.st_stderr);
    615 	(void) printf("#   99%% confidence level %12.5f            %12.5f\n",
    616 	    b->ba_raw.st_99confidence,
    617 	    b->ba_corrected.st_99confidence);
    618 	(void) printf("#                   skew %12.5f            %12.5f\n",
    619 	    b->ba_raw.st_skew,
    620 	    b->ba_corrected.st_skew);
    621 	(void) printf("#               kurtosis %12.5f            %12.5f\n",
    622 	    b->ba_raw.st_kurtosis,
    623 	    b->ba_corrected.st_kurtosis);
    624 
    625 	(void) printf("#       time correlation %12.5f            %12.5f\n",
    626 	    b->ba_raw.st_timecorr,
    627 	    b->ba_corrected.st_timecorr);
    628 	(void) printf("#\n");
    629 
    630 	(void) printf("#           elasped time %12.5f\n", (b->ba_endtime -
    631 	    b->ba_starttime) / 1.0e9);
    632 	(void) printf("#      number of samples %12d\n",   b->ba_batches);
    633 	(void) printf("#     number of outliers %12d\n", b->ba_outliers);
    634 	(void) printf("#      getnsecs overhead %12d\n", (int)nsecs_overhead);
    635 
    636 	(void) printf("#\n");
    637 	(void) printf("# DISTRIBUTION\n");
    638 
    639 	print_histo(b);
    640 
    641 	if (lm_optW) {
    642 		print_warnings(b);
    643 	}
    644 }
    645 
    646 void
    647 update_stats(barrier_t *b, result_t *r)
    648 {
    649 	double			time;
    650 	double			nsecs_per_call;
    651 
    652 	if (b->ba_waiters == 0) {
    653 		/* first thread only */
    654 		b->ba_t0 = r->re_t0;
    655 		b->ba_t1 = r->re_t1;
    656 		b->ba_count0 = 0;
    657 		b->ba_errors0 = 0;
    658 	} else {
    659 		/* all but first thread */
    660 		if (r->re_t0 < b->ba_t0) {
    661 			b->ba_t0 = r->re_t0;
    662 		}
    663 		if (r->re_t1 > b->ba_t1) {
    664 			b->ba_t1 = r->re_t1;
    665 		}
    666 	}
    667 
    668 	b->ba_count0  += r->re_count;
    669 	b->ba_errors0 += r->re_errors;
    670 
    671 	if (b->ba_waiters == b->ba_hwm - 1) {
    672 		/* last thread only */
    673 
    674 
    675 		time = (double)b->ba_t1 - (double)b->ba_t0 -
    676 		    (double)nsecs_overhead;
    677 
    678 		if (time < 100 * nsecs_resolution)
    679 			b->ba_quant++;
    680 
    681 		/*
    682 		 * normalize by procs * threads if not -U
    683 		 */
    684 
    685 		nsecs_per_call = time / (double)b->ba_count0 *
    686 		    (double)(lm_optT * lm_optP);
    687 
    688 		b->ba_count  += b->ba_count0;
    689 		b->ba_errors += b->ba_errors0;
    690 
    691 		b->ba_data[b->ba_batches % b->ba_datasize] =
    692 		    nsecs_per_call;
    693 
    694 		b->ba_batches++;
    695 	}
    696 }
    697 
    698 #ifdef USE_SEMOP
    699 barrier_t *
    700 barrier_create(int hwm, int datasize)
    701 {
    702 	struct sembuf		s[1];
    703 	barrier_t		*b;
    704 
    705 	/*LINTED*/
    706 	b = (barrier_t *)mmap(NULL,
    707 	    sizeof (barrier_t) + (datasize - 1) * sizeof (double),
    708 	    PROT_READ | PROT_WRITE,
    709 	    MAP_SHARED | MAP_ANON, -1, 0L);
    710 	if (b == (barrier_t *)MAP_FAILED) {
    711 		return (NULL);
    712 	}
    713 	b->ba_datasize = datasize;
    714 
    715 	b->ba_flag  = 0;
    716 	b->ba_hwm   = hwm;
    717 	b->ba_semid = semget(IPC_PRIVATE, 3, 0600);
    718 	if (b->ba_semid == -1) {
    719 		(void) munmap((void *)b, sizeof (barrier_t));
    720 		return (NULL);
    721 	}
    722 
    723 	/* [hwm - 1, 0, 0] */
    724 	s[0].sem_num = 0;
    725 	s[0].sem_op  = hwm - 1;
    726 	s[0].sem_flg = 0;
    727 	if (semop(b->ba_semid, s, 1) == -1) {
    728 		perror("semop(1)");
    729 		(void) semctl(b->ba_semid, 0, IPC_RMID);
    730 		(void) munmap((void *)b, sizeof (barrier_t));
    731 		return (NULL);
    732 	}
    733 
    734 	b->ba_waiters = 0;
    735 	b->ba_phase = 0;
    736 
    737 	b->ba_count = 0;
    738 	b->ba_errors = 0;
    739 
    740 	return (b);
    741 }
    742 
    743 int
    744 barrier_destroy(barrier_t *b)
    745 {
    746 	(void) semctl(b->ba_semid, 0, IPC_RMID);
    747 	(void) munmap((void *)b, sizeof (barrier_t));
    748 
    749 	return (0);
    750 }
    751 
    752 int
    753 barrier_queue(barrier_t *b, result_t *r)
    754 {
    755 	struct sembuf		s[2];
    756 
    757 	/*
    758 	 * {s0(-(hwm-1))}
    759 	 * if ! nowait {s1(-(hwm-1))}
    760 	 *   (all other threads)
    761 	 *   update shared stats
    762 	 *   {s0(hwm-1), s1(1)}
    763 	 *   {s0(1), s2(-1)}
    764 	 * else
    765 	 *   (last thread)
    766 	 *   update shared stats
    767 	 *   {s2(hwm-1)}
    768 	 */
    769 
    770 	s[0].sem_num = 0;
    771 	s[0].sem_op  = -(b->ba_hwm - 1);
    772 	s[0].sem_flg = 0;
    773 	if (semop(b->ba_semid, s, 1) == -1) {
    774 		perror("semop(2)");
    775 		return (-1);
    776 	}
    777 
    778 	s[0].sem_num = 1;
    779 	s[0].sem_op  = -(b->ba_hwm - 1);
    780 	s[0].sem_flg = IPC_NOWAIT;
    781 	if (semop(b->ba_semid, s, 1) == -1) {
    782 		if (errno != EAGAIN) {
    783 			perror("semop(3)");
    784 			return (-1);
    785 		}
    786 
    787 		/* all but the last thread */
    788 
    789 		if (r != NULL) {
    790 			update_stats(b, r);
    791 		}
    792 
    793 		b->ba_waiters++;
    794 
    795 		s[0].sem_num = 0;
    796 		s[0].sem_op  = b->ba_hwm - 1;
    797 		s[0].sem_flg = 0;
    798 		s[1].sem_num = 1;
    799 		s[1].sem_op  = 1;
    800 		s[1].sem_flg = 0;
    801 		if (semop(b->ba_semid, s, 2) == -1) {
    802 			perror("semop(4)");
    803 			return (-1);
    804 		}
    805 
    806 		s[0].sem_num = 0;
    807 		s[0].sem_op  = 1;
    808 		s[0].sem_flg = 0;
    809 		s[1].sem_num = 2;
    810 		s[1].sem_op  = -1;
    811 		s[1].sem_flg = 0;
    812 		if (semop(b->ba_semid, s, 2) == -1) {
    813 			perror("semop(5)");
    814 			return (-1);
    815 		}
    816 
    817 	} else {
    818 		/* the last thread */
    819 
    820 		if (r != NULL) {
    821 			update_stats(b, r);
    822 		}
    823 
    824 		b->ba_waiters = 0;
    825 		b->ba_phase++;
    826 
    827 		s[0].sem_num = 2;
    828 		s[0].sem_op  = b->ba_hwm - 1;
    829 		s[0].sem_flg = 0;
    830 		if (semop(b->ba_semid, s, 1) == -1) {
    831 			perror("semop(6)");
    832 			return (-1);
    833 		}
    834 	}
    835 
    836 	return (0);
    837 }
    838 
    839 #else /* USE_SEMOP */
    840 
    841 barrier_t *
    842 barrier_create(int hwm, int datasize)
    843 {
    844 	pthread_mutexattr_t	attr;
    845 	pthread_condattr_t	cattr;
    846 	barrier_t		*b;
    847 
    848 	/*LINTED*/
    849 	b = (barrier_t *)mmap(NULL,
    850 	    sizeof (barrier_t) + (datasize - 1) * sizeof (double),
    851 	    PROT_READ | PROT_WRITE,
    852 	    MAP_SHARED | MAP_ANON, -1, 0L);
    853 	if (b == (barrier_t *)MAP_FAILED) {
    854 		return (NULL);
    855 	}
    856 	b->ba_datasize = datasize;
    857 
    858 	b->ba_hwm = hwm;
    859 	b->ba_flag  = 0;
    860 
    861 	(void) pthread_mutexattr_init(&attr);
    862 	(void) pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
    863 
    864 	(void) pthread_condattr_init(&cattr);
    865 	(void) pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED);
    866 
    867 	(void) pthread_mutex_init(&b->ba_lock, &attr);
    868 	(void) pthread_cond_init(&b->ba_cv, &cattr);
    869 
    870 	b->ba_waiters = 0;
    871 	b->ba_phase = 0;
    872 
    873 	b->ba_count = 0;
    874 	b->ba_errors = 0;
    875 
    876 	return (b);
    877 }
    878 
    879 int
    880 barrier_destroy(barrier_t *b)
    881 {
    882 	(void) munmap((void *)b, sizeof (barrier_t));
    883 
    884 	return (0);
    885 }
    886 
    887 int
    888 barrier_queue(barrier_t *b, result_t *r)
    889 {
    890 	int			phase;
    891 
    892 	(void) pthread_mutex_lock(&b->ba_lock);
    893 
    894 	if (r != NULL) {
    895 		update_stats(b, r);
    896 	}
    897 
    898 	phase = b->ba_phase;
    899 
    900 	b->ba_waiters++;
    901 	if (b->ba_hwm == b->ba_waiters) {
    902 		b->ba_waiters = 0;
    903 		b->ba_phase++;
    904 		(void) pthread_cond_broadcast(&b->ba_cv);
    905 	}
    906 
    907 	while (b->ba_phase == phase) {
    908 		(void) pthread_cond_wait(&b->ba_cv, &b->ba_lock);
    909 	}
    910 
    911 	(void) pthread_mutex_unlock(&b->ba_lock);
    912 	return (0);
    913 }
    914 #endif /* USE_SEMOP */
    915 
    916 int
    917 gettindex()
    918 {
    919 	int			i;
    920 
    921 	if (tids == NULL) {
    922 		return (-1);
    923 	}
    924 
    925 	for (i = 1; i < lm_optT; i++) {
    926 		if (pthread_self() == tids[i]) {
    927 			return (i);
    928 		}
    929 	}
    930 
    931 	return (0);
    932 }
    933 
    934 int
    935 getpindex()
    936 {
    937 	return (pindex);
    938 }
    939 
    940 void *
    941 gettsd(int p, int t)
    942 {
    943 	if ((p < 0) || (p >= lm_optP) || (t < 0) || (t >= lm_optT))
    944 		return (NULL);
    945 
    946 	return ((void *)((unsigned long)tsdseg +
    947 	    (((p * lm_optT) + t) * tsdsize)));
    948 }
    949 
    950 #ifdef USE_GETHRTIME
    951 long long
    952 getnsecs()
    953 {
    954 	return (gethrtime());
    955 }
    956 
    957 long long
    958 getusecs()
    959 {
    960 	return (gethrtime() / 1000);
    961 }
    962 
    963 #elif USE_RDTSC /* USE_GETHRTIME */
    964 
    965 __inline__ long long
    966 rdtsc(void)
    967 {
    968 	unsigned long long x;
    969 	__asm__ volatile(".byte 0x0f, 0x31" : "=A" (x));
    970 	return (x);
    971 }
    972 
    973 long long
    974 getusecs()
    975 {
    976 	return (rdtsc() * 1000000 / lm_hz);
    977 }
    978 
    979 long long
    980 getnsecs()
    981 {
    982 	return (rdtsc() * 1000000000 / lm_hz);
    983 }
    984 
    985 #else /* USE_GETHRTIME */
    986 
    987 long long
    988 getusecs()
    989 {
    990 	struct timeval		tv;
    991 
    992 	(void) gettimeofday(&tv, NULL);
    993 
    994 	return ((long long)tv.tv_sec * 1000000LL + (long long) tv.tv_usec);
    995 }
    996 
    997 long long
    998 getnsecs()
    999 {
   1000 	struct timeval		tv;
   1001 
   1002 	(void) gettimeofday(&tv, NULL);
   1003 
   1004 	return ((long long)tv.tv_sec * 1000000000LL +
   1005 	    (long long) tv.tv_usec * 1000LL);
   1006 }
   1007 
   1008 #endif /* USE_GETHRTIME */
   1009 
   1010 int
   1011 setfdlimit(int limit)
   1012 {
   1013 	struct rlimit rlimit;
   1014 
   1015 	if (getrlimit(RLIMIT_NOFILE, &rlimit) < 0) {
   1016 		perror("getrlimit");
   1017 		exit(1);
   1018 	}
   1019 
   1020 	if (rlimit.rlim_cur > limit)
   1021 		return (0); /* no worries */
   1022 
   1023 	rlimit.rlim_cur = limit;
   1024 
   1025 	if (rlimit.rlim_max < limit)
   1026 		rlimit.rlim_max = limit;
   1027 
   1028 	if (setrlimit(RLIMIT_NOFILE, &rlimit) < 0) {
   1029 		perror("setrlimit");
   1030 		exit(3);
   1031 	}
   1032 
   1033 	return (0);
   1034 }
   1035 
   1036 
   1037 #define	KILOBYTE		1024
   1038 #define	MEGABYTE		(KILOBYTE * KILOBYTE)
   1039 #define	GIGABYTE		(KILOBYTE * MEGABYTE)
   1040 
   1041 long long
   1042 sizetoll(const char *arg)
   1043 {
   1044 	int			len = strlen(arg);
   1045 	int			i;
   1046 	long long		mult = 1;
   1047 
   1048 	if (len && isalpha(arg[len - 1])) {
   1049 		switch (arg[len - 1]) {
   1050 
   1051 		case 'k':
   1052 		case 'K':
   1053 			mult = KILOBYTE;
   1054 			break;
   1055 		case 'm':
   1056 		case 'M':
   1057 			mult = MEGABYTE;
   1058 			break;
   1059 		case 'g':
   1060 		case 'G':
   1061 			mult = GIGABYTE;
   1062 			break;
   1063 		default:
   1064 			return (-1);
   1065 		}
   1066 
   1067 		for (i = 0; i < len - 1; i++)
   1068 			if (!isdigit(arg[i]))
   1069 				return (-1);
   1070 	}
   1071 
   1072 	return (mult * strtoll(arg, NULL, 10));
   1073 }
   1074 
   1075 int
   1076 sizetoint(const char *arg)
   1077 {
   1078 	int			len = strlen(arg);
   1079 	int			i;
   1080 	long long		mult = 1;
   1081 
   1082 	if (len && isalpha(arg[len - 1])) {
   1083 		switch (arg[len - 1]) {
   1084 
   1085 		case 'k':
   1086 		case 'K':
   1087 			mult = KILOBYTE;
   1088 			break;
   1089 		case 'm':
   1090 		case 'M':
   1091 			mult = MEGABYTE;
   1092 			break;
   1093 		case 'g':
   1094 		case 'G':
   1095 			mult = GIGABYTE;
   1096 			break;
   1097 		default:
   1098 			return (-1);
   1099 		}
   1100 
   1101 		for (i = 0; i < len - 1; i++)
   1102 			if (!isdigit(arg[i]))
   1103 				return (-1);
   1104 	}
   1105 
   1106 	return (mult * atoi(arg));
   1107 }
   1108 
   1109 static void
   1110 print_bar(long count, long total)
   1111 {
   1112 	int			i;
   1113 
   1114 	(void) putchar_unlocked(count ? '*' : ' ');
   1115 	for (i = 1; i < (32 * count) / total; i++)
   1116 		(void) putchar_unlocked('*');
   1117 	for (; i < 32; i++)
   1118 		(void) putchar_unlocked(' ');
   1119 }
   1120 
   1121 static int
   1122 doublecmp(const void *p1, const void *p2)
   1123 {
   1124 	double a = *((double *)p1);
   1125 	double b = *((double *)p2);
   1126 
   1127 	if (a > b)
   1128 		return (1);
   1129 	if (a < b)
   1130 		return (-1);
   1131 	return (0);
   1132 }
   1133 
   1134 static void
   1135 print_histo(barrier_t *b)
   1136 {
   1137 	int			n;
   1138 	int			i;
   1139 	int			j;
   1140 	int			last;
   1141 	long long		maxcount;
   1142 	double			sum;
   1143 	long long		min;
   1144 	long long		scale;
   1145 	double			x;
   1146 	long long		y;
   1147 	long long		count;
   1148 	int			i95;
   1149 	double			p95;
   1150 	double			r95;
   1151 	double			m95;
   1152 	histo_t			*histo;
   1153 
   1154 	(void) printf("#	%12s %12s %32s %12s\n", "counts", "usecs/call",
   1155 	    "", "means");
   1156 
   1157 	/* calculate how much data we've captured */
   1158 	n = b->ba_batches > b->ba_datasize ? b->ba_datasize : b->ba_batches;
   1159 
   1160 	/* find the 95th percentile - index, value and range */
   1161 	qsort((void *)b->ba_data, n, sizeof (double), doublecmp);
   1162 	min = b->ba_data[0] + 0.000001;
   1163 	i95 = n * 95 / 100;
   1164 	p95 = b->ba_data[i95];
   1165 	r95 = p95 - min + 1;
   1166 
   1167 	/* find a suitable min and scale */
   1168 	i = 0;
   1169 	x = r95 / (HISTOSIZE - 1);
   1170 	while (x >= 10.0) {
   1171 		x /= 10.0;
   1172 		i++;
   1173 	}
   1174 	y = x + 0.9999999999;
   1175 	while (i > 0) {
   1176 		y *= 10;
   1177 		i--;
   1178 	}
   1179 	min /= y;
   1180 	min *= y;
   1181 	scale = y * (HISTOSIZE - 1);
   1182 	if (scale < (HISTOSIZE - 1)) {
   1183 		scale = (HISTOSIZE - 1);
   1184 	}
   1185 
   1186 	/* create and initialise the histogram */
   1187 	histo = malloc(HISTOSIZE * sizeof (histo_t));
   1188 	for (i = 0; i < HISTOSIZE; i++) {
   1189 		histo[i].sum = 0.0;
   1190 		histo[i].count = 0;
   1191 	}
   1192 
   1193 	/* populate the histogram */
   1194 	last = 0;
   1195 	sum = 0.0;
   1196 	count = 0;
   1197 	for (i = 0; i < i95; i++) {
   1198 		j = (HISTOSIZE - 1) * (b->ba_data[i] - min) / scale;
   1199 
   1200 		if (j >= HISTOSIZE) {
   1201 			(void) printf("panic!\n");
   1202 			j = HISTOSIZE - 1;
   1203 		}
   1204 
   1205 		histo[j].sum += b->ba_data[i];
   1206 		histo[j].count++;
   1207 
   1208 		sum += b->ba_data[i];
   1209 		count++;
   1210 	}
   1211 	m95 = sum / count;
   1212 
   1213 	/* find the larges bucket */
   1214 	maxcount = 0;
   1215 	for (i = 0; i < HISTOSIZE; i++)
   1216 		if (histo[i].count > 0) {
   1217 			last = i;
   1218 			if (histo[i].count > maxcount)
   1219 				maxcount = histo[i].count;
   1220 		}
   1221 
   1222 	/* print the buckets */
   1223 	for (i = 0; i <= last; i++) {
   1224 		(void) printf("#       %12lld %12.5f |", histo[i].count,
   1225 		    (min + scale * (double)i / (HISTOSIZE - 1)));
   1226 
   1227 		print_bar(histo[i].count, maxcount);
   1228 
   1229 		if (histo[i].count > 0)
   1230 			(void) printf("%12.5f\n",
   1231 			    histo[i].sum / histo[i].count);
   1232 		else
   1233 			(void) printf("%12s\n", "-");
   1234 	}
   1235 
   1236 	/* find the mean of values beyond the 95th percentile */
   1237 	sum = 0.0;
   1238 	count = 0;
   1239 	for (i = i95; i < n; i++) {
   1240 		sum += b->ba_data[i];
   1241 		count++;
   1242 	}
   1243 
   1244 	/* print the >95% bucket summary */
   1245 	(void) printf("#\n");
   1246 	(void) printf("#       %12lld %12s |", count, "> 95%");
   1247 	print_bar(count, maxcount);
   1248 	if (count > 0)
   1249 		(void) printf("%12.5f\n", sum / count);
   1250 	else
   1251 		(void) printf("%12s\n", "-");
   1252 	(void) printf("#\n");
   1253 	(void) printf("#       %12s %12.5f\n", "mean of 95%", m95);
   1254 	(void) printf("#       %12s %12.5f\n", "95th %ile", p95);
   1255 
   1256 	/* quantify any buffer overflow */
   1257 	if (b->ba_batches > b->ba_datasize)
   1258 		(void) printf("#       %12s %12d\n", "data dropped",
   1259 		    b->ba_batches - b->ba_datasize);
   1260 }
   1261 
   1262 static void
   1263 compute_stats(barrier_t *b)
   1264 {
   1265 	int i;
   1266 
   1267 	if (b->ba_batches > b->ba_datasize)
   1268 		b->ba_batches = b->ba_datasize;
   1269 
   1270 	/*
   1271 	 * convert to usecs/call
   1272 	 */
   1273 
   1274 	for (i = 0; i < b->ba_batches; i++)
   1275 		b->ba_data[i] /= 1000.0;
   1276 
   1277 	/*
   1278 	 * do raw stats
   1279 	 */
   1280 
   1281 	(void) crunch_stats(b->ba_data, b->ba_batches, &b->ba_raw);
   1282 
   1283 	/*
   1284 	 * recursively apply 3 sigma rule to remove outliers
   1285 	 */
   1286 
   1287 	b->ba_corrected = b->ba_raw;
   1288 	b->ba_outliers = 0;
   1289 
   1290 	if (b->ba_batches > 40) { /* remove outliers */
   1291 		int removed;
   1292 
   1293 		do {
   1294 			removed = remove_outliers(b->ba_data, b->ba_batches,
   1295 			    &b->ba_corrected);
   1296 			b->ba_outliers += removed;
   1297 			b->ba_batches -= removed;
   1298 			(void) crunch_stats(b->ba_data, b->ba_batches,
   1299 			    &b->ba_corrected);
   1300 			} while (removed != 0 && b->ba_batches > 40);
   1301 	}
   1302 
   1303 }
   1304 
   1305 /*
   1306  * routine to compute various statistics on array of doubles.
   1307  */
   1308 
   1309 static int
   1310 crunch_stats(double *data, int count, stats_t *stats)
   1311 {
   1312 	double a;
   1313 	double std;
   1314 	double diff;
   1315 	double sk;
   1316 	double ku;
   1317 	double mean;
   1318 	int i;
   1319 	int bytes;
   1320 	double *dupdata;
   1321 
   1322 	/*
   1323 	 * first we need the mean
   1324 	 */
   1325 
   1326 	mean = 0.0;
   1327 
   1328 	for (i = 0; i < count; i++) {
   1329 		mean += data[i];
   1330 	}
   1331 
   1332 	mean /= count;
   1333 
   1334 	stats->st_mean = mean;
   1335 
   1336 	/*
   1337 	 * malloc and sort so we can do median
   1338 	 */
   1339 
   1340 	dupdata = malloc(bytes = sizeof (double) * count);
   1341 	(void) memcpy(dupdata, data, bytes);
   1342 	qsort((void *)dupdata, count, sizeof (double), doublecmp);
   1343 	stats->st_median   = dupdata[count/2];
   1344 
   1345 	/*
   1346 	 * reuse dupdata to compute time correlation of data to
   1347 	 * detect interesting time-based trends
   1348 	 */
   1349 
   1350 	for (i = 0; i < count; i++)
   1351 		dupdata[i] = (double)i;
   1352 
   1353 	(void) fit_line(dupdata, data, count, &a, &stats->st_timecorr);
   1354 	free(dupdata);
   1355 
   1356 	std = 0.0;
   1357 	sk  = 0.0;
   1358 	ku  = 0.0;
   1359 
   1360 	stats->st_max = -1;
   1361 	stats->st_min = 1.0e99; /* hard to find portable values */
   1362 
   1363 	for (i = 0; i < count; i++) {
   1364 		if (data[i] > stats->st_max)
   1365 			stats->st_max = data[i];
   1366 		if (data[i] < stats->st_min)
   1367 			stats->st_min = data[i];
   1368 
   1369 		diff = data[i] - mean;
   1370 		std += diff * diff;
   1371 		sk  += diff * diff * diff;
   1372 		ku  += diff * diff * diff * diff;
   1373 	}
   1374 
   1375 	stats->st_stddev   = std = sqrt(std/(double)(count - 1));
   1376 	stats->st_stderr   = std / sqrt(count);
   1377 	stats->st_99confidence = stats->st_stderr * 2.326;
   1378 	stats->st_skew	   = sk / (std * std * std) / (double)(count);
   1379 	stats->st_kurtosis = ku / (std * std * std * std) /
   1380 	    (double)(count) - 3;
   1381 
   1382 	return (0);
   1383 }
   1384 
   1385 /*
   1386  * does a least squares fit to the set of points x, y and
   1387  * fits a line y = a + bx.  Returns a, b
   1388  */
   1389 
   1390 int
   1391 fit_line(double *x, double *y, int count, double *a, double *b)
   1392 {
   1393 	double sumx, sumy, sumxy, sumx2;
   1394 	double denom;
   1395 	int i;
   1396 
   1397 	sumx = sumy = sumxy = sumx2 = 0.0;
   1398 
   1399 	for (i = 0; i < count; i++) {
   1400 		sumx	+= x[i];
   1401 		sumx2	+= x[i] * x[i];
   1402 		sumy	+= y[i];
   1403 		sumxy	+= x[i] * y[i];
   1404 	}
   1405 
   1406 	denom = count * sumx2 - sumx * sumx;
   1407 
   1408 	if (denom == 0.0)
   1409 		return (-1);
   1410 
   1411 	*a = (sumy * sumx2 - sumx * sumxy) / denom;
   1412 
   1413 	*b = (count * sumxy - sumx * sumy) / denom;
   1414 
   1415 	return (0);
   1416 }
   1417 
   1418 /*
   1419  * empty function for measurement purposes
   1420  */
   1421 
   1422 int
   1423 nop()
   1424 {
   1425 	return (1);
   1426 }
   1427 
   1428 #define	NSECITER 1000
   1429 
   1430 static long long
   1431 get_nsecs_overhead()
   1432 {
   1433 	long long s;
   1434 
   1435 	double data[NSECITER];
   1436 	stats_t stats;
   1437 
   1438 	int i;
   1439 	int count;
   1440 	int outliers;
   1441 
   1442 	(void) getnsecs(); /* warmup */
   1443 	(void) getnsecs(); /* warmup */
   1444 	(void) getnsecs(); /* warmup */
   1445 
   1446 	i = 0;
   1447 
   1448 	count = NSECITER;
   1449 
   1450 	for (i = 0; i < count; i++) {
   1451 		s = getnsecs();
   1452 		data[i] = getnsecs() - s;
   1453 	}
   1454 
   1455 	(void) crunch_stats(data, count, &stats);
   1456 
   1457 	while ((outliers = remove_outliers(data, count, &stats)) != 0) {
   1458 		count -= outliers;
   1459 		(void) crunch_stats(data, count, &stats);
   1460 	}
   1461 
   1462 	return ((long long)stats.st_mean);
   1463 
   1464 }
   1465 
   1466 long long
   1467 get_nsecs_resolution()
   1468 {
   1469 	long long y[1000];
   1470 
   1471 	int i, j, nops, res;
   1472 	long long start, stop;
   1473 
   1474 	/*
   1475 	 * first, figure out how many nops to use
   1476 	 * to get any delta between time measurements.
   1477 	 * use a minimum of one.
   1478 	 */
   1479 
   1480 	/*
   1481 	 * warm cache
   1482 	 */
   1483 
   1484 	stop = start = getnsecs();
   1485 
   1486 	for (i = 1; i < 10000000; i++) {
   1487 		start = getnsecs();
   1488 		for (j = i; j; j--)
   1489 			;
   1490 		stop = getnsecs();
   1491 		if (stop > start)
   1492 			break;
   1493 	}
   1494 
   1495 	nops = i;
   1496 
   1497 	/*
   1498 	 * now collect data at linearly varying intervals
   1499 	 */
   1500 
   1501 	for (i = 0; i < 1000; i++) {
   1502 		start = getnsecs();
   1503 		for (j = nops * i; j; j--)
   1504 			;
   1505 		stop = getnsecs();
   1506 		y[i] = stop - start;
   1507 	}
   1508 
   1509 	/*
   1510 	 * find smallest positive difference between samples;
   1511 	 * this is the timer resolution
   1512 	 */
   1513 
   1514 	res = 1<<30;
   1515 
   1516 	for (i = 1; i < 1000; i++) {
   1517 		int diff = y[i] - y[i-1];
   1518 
   1519 		if (diff > 0 && res > diff)
   1520 			res = diff;
   1521 
   1522 	}
   1523 
   1524 	return (res);
   1525 }
   1526 
   1527 /*
   1528  * remove any data points from the array more than 3 sigma out
   1529  */
   1530 
   1531 static int
   1532 remove_outliers(double *data, int count, stats_t *stats)
   1533 {
   1534 	double outmin = stats->st_mean - 3 * stats->st_stddev;
   1535 	double outmax = stats->st_mean + 3 * stats->st_stddev;
   1536 
   1537 	int i, j, outliers;
   1538 
   1539 	for (outliers = i = j = 0; i < count; i++)
   1540 		if (data[i] > outmax || data[i] < outmin)
   1541 			outliers++;
   1542 		else
   1543 			data[j++] = data[i];
   1544 
   1545 	return (outliers);
   1546 }
   1547