1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms 5 * of the Common Development and Distribution License 6 * (the "License"). You may not use this file except 7 * in compliance with the License. 8 * 9 * You can obtain a copy of the license at 10 * src/OPENSOLARIS.LICENSE 11 * or http://www.opensolaris.org/os/licensing. 12 * See the License for the specific language governing 13 * permissions and limitations under the License. 14 * 15 * When distributing Covered Code, include this CDDL 16 * HEADER in each file and include the License file at 17 * usr/src/OPENSOLARIS.LICENSE. If applicable, 18 * add the following below this CDDL HEADER, with the 19 * fields enclosed by brackets "[]" replaced with your 20 * own identifying information: Portions Copyright [yyyy] 21 * [name of copyright owner] 22 * 23 * CDDL HEADER END 24 */ 25 26 /* 27 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 /* 32 * routine to benchmark cache-to-cache transfer times... uses 33 * solaris features to find and bind to cpus in the current 34 * processor set, so not likely to work elsewhere. 35 */ 36 37 38 #include <unistd.h> 39 #include <stdlib.h> 40 #include <stdio.h> 41 #include <fcntl.h> 42 #include <string.h> 43 #include <sys/processor.h> 44 #include <sys/types.h> 45 #include <stdio.h> 46 #include <errno.h> 47 #include <sys/pset.h> 48 49 #include "libmicro.h" 50 51 static long opts = 1024*512; 52 53 typedef struct { 54 long **ts_data; 55 long ts_result; 56 pthread_mutex_t ts_lock; 57 } tsd_t; 58 59 static unsigned int ncpu = 1024; 60 61 static tsd_t *thread_data[1024]; 62 static processorid_t cpus[1024]; 63 64 int traverse_ptrchain(long **, int, int); 65 66 int 67 benchmark_init() 68 { 69 lm_tsdsize = sizeof (tsd_t); 70 71 (void) sprintf(lm_optstr, "s:"); 72 73 (void) sprintf(lm_usage, 74 " [-s size] size of access area in bytes" 75 " (default %ld)\n" 76 "notes: measures cache to cache transfer times on Solaris\n", 77 opts); 78 79 (void) sprintf(lm_header, "%8s", "size"); 80 81 return (0); 82 } 83 84 int 85 benchmark_optswitch(int opt, char *optarg) 86 { 87 switch (opt) { 88 case 's': 89 opts = sizetoint(optarg); 90 break; 91 default: 92 return (-1); 93 } 94 95 return (0); 96 } 97 98 int 99 benchmark_initrun() 100 { 101 if (pset_info(PS_MYID, NULL, &ncpu, cpus) < 0) { 102 perror("pset_info"); 103 return (1); 104 } 105 106 return (0); 107 } 108 109 int 110 benchmark_initworker(void *tsd) 111 { 112 tsd_t *ts = (tsd_t *)tsd; 113 int i, j; 114 processorid_t cpu; 115 116 ts->ts_data = malloc(opts); 117 118 if (ts->ts_data == NULL) { 119 return (1); 120 } 121 122 (void) pthread_mutex_init(&ts->ts_lock, NULL); 123 124 125 if (processor_bind(P_LWPID, P_MYID, 126 cpu = cpus[(pthread_self() - 1) % ncpu], 127 NULL) < 0) { 128 perror("processor_bind:"); 129 return (1); 130 } 131 132 (void) printf("# thread %d using processor %d\n", pthread_self(), cpu); 133 134 /* 135 * use lmbench style backwards stride 136 */ 137 138 for (i = 0; i < opts / sizeof (long); i++) { 139 j = i - 128; 140 if (j < 0) 141 j = j + opts / sizeof (long); 142 ts->ts_data[i] = (long *)&(ts->ts_data[j]); 143 } 144 145 thread_data[pthread_self() - 1] = ts; 146 147 return (0); 148 } 149 150 /* 151 * here we go in order for each thread, causing inherent serialization 152 * this is normally not a good idea, but in this case we're trying to 153 * measure cache-to-cache transfer times, and if we run threads in 154 * parallel we're likely to see saturation effects rather than cache-to-cache, 155 * esp. on wimpy memory platforms like P4. 156 */ 157 158 159 /*ARGSUSED*/ 160 int 161 benchmark(void *tsd, result_t *res) 162 { 163 tsd_t *ts; 164 int i, j; 165 int count = opts / 128 / sizeof (long); 166 167 for (j = 0; j < lm_optB; j++) 168 for (i = 0; i < lm_optT; i++) { 169 ts = thread_data[i]; 170 (void) pthread_mutex_lock(&ts->ts_lock); 171 ts->ts_result += traverse_ptrchain( 172 (long **)ts->ts_data, count, 0); 173 (void) pthread_mutex_unlock(&ts->ts_lock); 174 } 175 176 res->re_count = lm_optB * lm_optT * count; 177 178 return (0); 179 } 180 181 int 182 traverse_ptrchain(long **ptr, int count, int value) 183 { 184 int i; 185 186 for (i = 0; i < count; i += 10) { 187 *ptr = *ptr + value; 188 ptr = (long **)*ptr; 189 *ptr = *ptr + value; 190 ptr = (long **)*ptr; 191 *ptr = *ptr + value; 192 ptr = (long **)*ptr; 193 *ptr = *ptr + value; 194 ptr = (long **)*ptr; 195 *ptr = *ptr + value; 196 ptr = (long **)*ptr; 197 *ptr = *ptr + value; 198 ptr = (long **)*ptr; 199 *ptr = *ptr + value; 200 ptr = (long **)*ptr; 201 *ptr = *ptr + value; 202 ptr = (long **)*ptr; 203 *ptr = *ptr + value; 204 ptr = (long **)*ptr; 205 *ptr = *ptr + value; 206 ptr = (long **)*ptr; 207 *ptr = *ptr + value; 208 } 209 return ((int)*ptr); /* bogus return */ 210 } 211 212 213 char * 214 benchmark_result() 215 { 216 static char result[256]; 217 218 (void) sprintf(result, "%8ld ", opts); 219 220 221 return (result); 222 } 223