Home | History | Annotate | Download | only in libmicro
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms
      5  * of the Common Development and Distribution License
      6  * (the "License").  You may not use this file except
      7  * in compliance with the License.
      8  *
      9  * You can obtain a copy of the license at
     10  * src/OPENSOLARIS.LICENSE
     11  * or http://www.opensolaris.org/os/licensing.
     12  * See the License for the specific language governing
     13  * permissions and limitations under the License.
     14  *
     15  * When distributing Covered Code, include this CDDL
     16  * HEADER in each file and include the License file at
     17  * usr/src/OPENSOLARIS.LICENSE.  If applicable,
     18  * add the following below this CDDL HEADER, with the
     19  * fields enclosed by brackets "[]" replaced with your
     20  * own identifying information: Portions Copyright [yyyy]
     21  * [name of copyright owner]
     22  *
     23  * CDDL HEADER END
     24  */
     25 
     26 /*
     27  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     28  * Use is subject to license terms.
     29  */
     30 
     31 /*
     32  * routine to benchmark cache-to-cache transfer times... uses
     33  * solaris features to find and bind to cpus in the current
     34  * processor set, so not likely to work elsewhere.
     35  */
     36 
     37 
     38 #include <unistd.h>
     39 #include <stdlib.h>
     40 #include <stdio.h>
     41 #include <fcntl.h>
     42 #include <string.h>
     43 #include <sys/processor.h>
     44 #include <sys/types.h>
     45 #include <stdio.h>
     46 #include <errno.h>
     47 #include <sys/pset.h>
     48 
     49 #include "libmicro.h"
     50 
     51 static long			opts = 1024*512;
     52 
     53 typedef struct {
     54 	long			**ts_data;
     55 	long			ts_result;
     56 	pthread_mutex_t		ts_lock;
     57 } tsd_t;
     58 
     59 static unsigned int ncpu = 1024;
     60 
     61 static tsd_t *thread_data[1024];
     62 static processorid_t cpus[1024];
     63 
     64 int traverse_ptrchain(long **, int, int);
     65 
     66 int
     67 benchmark_init()
     68 {
     69 	lm_tsdsize = sizeof (tsd_t);
     70 
     71 	(void) sprintf(lm_optstr, "s:");
     72 
     73 	(void) sprintf(lm_usage,
     74 	    "       [-s size] size of access area in bytes"
     75 	    " (default %ld)\n"
     76 	    "notes: measures cache to cache transfer times on Solaris\n",
     77 	    opts);
     78 
     79 	(void) sprintf(lm_header, "%8s", "size");
     80 
     81 	return (0);
     82 }
     83 
     84 int
     85 benchmark_optswitch(int opt, char *optarg)
     86 {
     87 	switch (opt) {
     88 	case 's':
     89 		opts = sizetoint(optarg);
     90 		break;
     91 	default:
     92 		return (-1);
     93 	}
     94 
     95 	return (0);
     96 }
     97 
     98 int
     99 benchmark_initrun()
    100 {
    101 	if (pset_info(PS_MYID, NULL, &ncpu, cpus) < 0) {
    102 		perror("pset_info");
    103 		return (1);
    104 	}
    105 
    106 	return (0);
    107 }
    108 
    109 int
    110 benchmark_initworker(void *tsd)
    111 {
    112 	tsd_t			*ts = (tsd_t *)tsd;
    113 	int i, j;
    114 	processorid_t cpu;
    115 
    116 	ts->ts_data = malloc(opts);
    117 
    118 	if (ts->ts_data == NULL) {
    119 		return (1);
    120 	}
    121 
    122 	(void) pthread_mutex_init(&ts->ts_lock, NULL);
    123 
    124 
    125 	if (processor_bind(P_LWPID, P_MYID,
    126 	    cpu = cpus[(pthread_self() - 1) % ncpu],
    127 	    NULL) < 0) {
    128 		perror("processor_bind:");
    129 		return (1);
    130 	}
    131 
    132 	(void) printf("# thread %d using processor %d\n", pthread_self(), cpu);
    133 
    134 	/*
    135 	 * use lmbench style backwards stride
    136 	 */
    137 
    138 	for (i = 0; i < opts / sizeof (long); i++) {
    139 		j = i - 128;
    140 		if (j < 0)
    141 			j = j + opts / sizeof (long);
    142 		ts->ts_data[i] = (long *)&(ts->ts_data[j]);
    143 	}
    144 
    145 	thread_data[pthread_self() - 1] = ts;
    146 
    147 	return (0);
    148 }
    149 
    150 /*
    151  * here we go in order for each thread, causing inherent serialization
    152  * this is normally not a good idea, but in this case we're trying to
    153  * measure cache-to-cache transfer times, and if we run threads in
    154  * parallel we're likely to see saturation effects rather than cache-to-cache,
    155  * esp. on wimpy memory platforms like P4.
    156  */
    157 
    158 
    159 /*ARGSUSED*/
    160 int
    161 benchmark(void *tsd, result_t *res)
    162 {
    163 	tsd_t			*ts;
    164 	int			i, j;
    165 	int 			count = opts / 128 / sizeof (long);
    166 
    167 	for (j = 0; j < lm_optB; j++)
    168 		for (i = 0; i < lm_optT; i++) {
    169 			ts = thread_data[i];
    170 			(void) pthread_mutex_lock(&ts->ts_lock);
    171 			ts->ts_result += traverse_ptrchain(
    172 			    (long **)ts->ts_data, count, 0);
    173 			(void) pthread_mutex_unlock(&ts->ts_lock);
    174 		}
    175 
    176 	res->re_count = lm_optB * lm_optT * count;
    177 
    178 	return (0);
    179 }
    180 
    181 int
    182 traverse_ptrchain(long **ptr, int count, int value)
    183 {
    184 	int i;
    185 
    186 	for (i = 0; i < count; i += 10) {
    187 		*ptr = *ptr + value;
    188 		ptr = (long **)*ptr;
    189 		*ptr = *ptr + value;
    190 		ptr = (long **)*ptr;
    191 		*ptr = *ptr + value;
    192 		ptr = (long **)*ptr;
    193 		*ptr = *ptr + value;
    194 		ptr = (long **)*ptr;
    195 		*ptr = *ptr + value;
    196 		ptr = (long **)*ptr;
    197 		*ptr = *ptr + value;
    198 		ptr = (long **)*ptr;
    199 		*ptr = *ptr + value;
    200 		ptr = (long **)*ptr;
    201 		*ptr = *ptr + value;
    202 		ptr = (long **)*ptr;
    203 		*ptr = *ptr + value;
    204 		ptr = (long **)*ptr;
    205 		*ptr = *ptr + value;
    206 		ptr = (long **)*ptr;
    207 		*ptr = *ptr + value;
    208 	}
    209 	return ((int)*ptr); /* bogus return */
    210 }
    211 
    212 
    213 char *
    214 benchmark_result()
    215 {
    216 	static char  result[256];
    217 
    218 	(void) sprintf(result, "%8ld ", opts);
    219 
    220 
    221 	return (result);
    222 }
    223