Home | History | Annotate | Download | only in i386
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License, Version 1.0 only
      6  * (the "License").  You may not use this file except in compliance
      7  * with the License.
      8  *
      9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
     10  * or http://www.opensolaris.org/os/licensing.
     11  * See the License for the specific language governing permissions
     12  * and limitations under the License.
     13  *
     14  * When distributing Covered Code, include this CDDL HEADER in each
     15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     16  * If applicable, add the following below this CDDL HEADER, with the
     17  * fields enclosed by brackets "[]" replaced with your own identifying
     18  * information: Portions Copyright [yyyy] [name of copyright owner]
     19  *
     20  * CDDL HEADER END
     21  */
     22 /*
     23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     28 
     29 #include <sys/types.h>
     30 #include <string.h>
     31 #include <alloca.h>
     32 #include <stdlib.h>
     33 #include <stdio.h>
     34 #include <libintl.h>
     35 
     36 #include "libcpc.h"
     37 #include "libcpc_impl.h"
     38 
     39 /*
     40  * Configuration data for Pentium Pro performance counters.
     41  *
     42  * Definitions taken from [3].  See the reference to
     43  * understand what any of these settings actually means.
     44  *
     45  * [3] "Pentium Pro Family Developer's Manual, Volume 3:
     46  *     Operating Systems Writer's Manual," January 1996
     47  */
     48 
     49 #define	V_P5	(1u << 0)		/* specific to Pentium cpus */
     50 #define	V_P5mmx	(1u << 1)		/* " MMX instructions */
     51 #define	V_P6	(1u << 2)		/* specific to Pentium II cpus */
     52 #define	V_P6mmx	(1u << 3)		/* " MMX instructions */
     53 #define	V_END	0
     54 
     55 /*
     56  * map from "cpu version" to flag bits
     57  */
     58 static const uint_t cpuvermap[] = {
     59 	V_P5,		/* CPC_PENTIUM */
     60 	V_P5 | V_P5mmx,	/* CPC_PENTIUM_MMX */
     61 	V_P6,		/* CPC_PENTIUM_PRO */
     62 	V_P6 | V_P6mmx,	/* CPC_PENTIUM_PRO_MMX */
     63 };
     64 
     65 struct nametable {
     66 	const uint_t	ver;
     67 	const uint8_t	bits;
     68 	const char	*name;
     69 };
     70 
     71 /*
     72  * Basic Pentium events
     73  */
     74 #define	P5_EVENTS(v)						\
     75 	{v,		0x0,	"data_read"},			\
     76 	{v,		0x1,	"data_write"},			\
     77 	{v,		0x2,	"data_tlb_miss"},		\
     78 	{v,		0x3,	"data_read_miss"},		\
     79 	{v,		0x4,	"data_write_miss"},		\
     80 	{v,		0x5,	"write_hit_to_M_or_E"},		\
     81 	{v,		0x6,	"dcache_lines_wrback"},		\
     82 	{v,		0x7,	"external_snoops"},		\
     83 	{v,		0x8,	"external_dcache_snoop_hits"},	\
     84 	{v,		0x9,	"memory_access_in_both_pipes"},	\
     85 	{v,		0xa,	"bank_conflicts"},		\
     86 	{v,		0xb,	"misaligned_ref"},		\
     87 	{v,		0xc,	"code_read"},			\
     88 	{v,		0xd,	"code_tlb_miss"},		\
     89 	{v,		0xe,	"code_cache_miss"},		\
     90 	{v,		0xf,	"any_segreg_loaded"},		\
     91 	{v,		0x12,	"branches"},			\
     92 	{v,		0x13,	"btb_hits"},			\
     93 	{v,		0x14,	"taken_or_btb_hit"},		\
     94 	{v,		0x15,	"pipeline_flushes"},		\
     95 	{v,		0x16,	"instr_exec"},			\
     96 	{v,		0x17,	"instr_exec_V_pipe"},		\
     97 	{v,		0x18,	"clks_bus_cycle"},		\
     98 	{v,		0x19,	"clks_full_wbufs"},		\
     99 	{v,		0x1a,	"pipe_stall_read"},		\
    100 	{v,		0x1b,	"stall_on_write_ME"},		\
    101 	{v,		0x1c,	"locked_bus_cycle"},		\
    102 	{v,		0x1d,	"io_rw_cycles"},		\
    103 	{v,		0x1e,	"reads_noncache_mem"},		\
    104 	{v,		0x1f,	"pipeline_agi_stalls"},		\
    105 	{v,		0x22,	"flops"},			\
    106 	{v,		0x23,	"bp_match_dr0"},		\
    107 	{v,		0x24,	"bp_match_dr1"},		\
    108 	{v,		0x25,	"bp_match_dr2"},		\
    109 	{v,		0x26,	"bp_match_dr3"},		\
    110 	{v,		0x27,	"hw_intrs"},			\
    111 	{v,		0x28,	"data_rw"},			\
    112 	{v,		0x29,	"data_rw_miss"}
    113 
    114 static const struct nametable P5mmx_names0[] = {
    115 	P5_EVENTS(V_P5),
    116 	{V_P5mmx,	0x2a,	"bus_ownership_latency"},
    117 	{V_P5mmx,	0x2b,	"mmx_instr_upipe"},
    118 	{V_P5mmx,	0x2c,	"cache_M_line_sharing"},
    119 	{V_P5mmx,	0x2d,	"emms_instr"},
    120 	{V_P5mmx,	0x2e,	"bus_util_processor"},
    121 	{V_P5mmx,	0x2f,	"sat_mmx_instr"},
    122 	{V_P5mmx,	0x30,	"clks_not_HLT"},
    123 	{V_P5mmx,	0x31,	"mmx_data_read"},
    124 	{V_P5mmx,	0x32,	"clks_fp_stall"},
    125 	{V_P5mmx,	0x33,	"d1_starv_fifo_0"},
    126 	{V_P5mmx,	0x34,	"mmx_data_write"},
    127 	{V_P5mmx,	0x35,	"pipe_flush_wbp"},
    128 	{V_P5mmx,	0x36,	"mmx_misalign_data_refs"},
    129 	{V_P5mmx,	0x37,	"rets_pred_incorrect"},
    130 	{V_P5mmx,	0x38,	"mmx_multiply_unit_interlock"},
    131 	{V_P5mmx,	0x39,	"rets"},
    132 	{V_P5mmx,	0x3a,	"btb_false_entries"},
    133 	{V_P5mmx,	0x3b,	"clocks_stall_full_wb"},
    134 	{V_END}
    135 };
    136 
    137 static const struct nametable P5mmx_names1[] = {
    138 	P5_EVENTS(V_P5),
    139 	{V_P5mmx,	0x2a,	"bus_ownership_transfers"},
    140 	{V_P5mmx,	0x2b,	"mmx_instr_vpipe"},
    141 	{V_P5mmx,	0x2c,	"cache_lint_sharing"},
    142 	{V_P5mmx,	0x2d,	"mmx_fp_transitions"},
    143 	{V_P5mmx,	0x2e,	"writes_noncache_mem"},
    144 	{V_P5mmx,	0x2f,	"sats_performed"},
    145 	{V_P5mmx,	0x30,	"clks_dcache_tlb_miss"},
    146 	{V_P5mmx,	0x31,	"mmx_data_read_miss"},
    147 	{V_P5mmx,	0x32,	"taken_br"},
    148 	{V_P5mmx,	0x33,	"d1_starv_fifo_1"},
    149 	{V_P5mmx,	0x34,	"mmx_data_write_miss"},
    150 	{V_P5mmx,	0x35,	"pipe_flush_wbp_wb"},
    151 	{V_P5mmx,	0x36,	"mmx_pipe_stall_data_read"},
    152 	{V_P5mmx,	0x37,	"rets_pred"},
    153 	{V_P5mmx,	0x38,	"movd_movq_stall"},
    154 	{V_P5mmx,	0x39,	"rsb_overflow"},
    155 	{V_P5mmx,	0x3a,	"btb_mispred_nt"},
    156 	{V_P5mmx,	0x3b,	"mmx_stall_write_ME"},
    157 	{V_END}
    158 };
    159 
    160 static const struct nametable *P5mmx_names[2] = {
    161 	P5mmx_names0,
    162 	P5mmx_names1
    163 };
    164 
    165 /*
    166  * Pentium Pro and Pentium II events
    167  */
    168 static const struct nametable P6_names[] = {
    169 	/*
    170 	 * Data cache unit
    171 	 */
    172 	{V_P6,		0x43,	"data_mem_refs"},
    173 	{V_P6,		0x45,	"dcu_lines_in"},
    174 	{V_P6,		0x46,	"dcu_m_lines_in"},
    175 	{V_P6,		0x47,	"dcu_m_lines_out"},
    176 	{V_P6,		0x48,	"dcu_miss_outstanding"},
    177 
    178 	/*
    179 	 * Instruction fetch unit
    180 	 */
    181 	{V_P6,		0x80,	"ifu_ifetch"},
    182 	{V_P6,		0x81,	"ifu_ifetch_miss"},
    183 	{V_P6,		0x85,	"itlb_miss"},
    184 	{V_P6,		0x86,	"ifu_mem_stall"},
    185 	{V_P6,		0x87,	"ild_stall"},
    186 
    187 	/*
    188 	 * L2 cache
    189 	 */
    190 	{V_P6,		0x28,	"l2_ifetch"},
    191 	{V_P6,		0x29,	"l2_ld"},
    192 	{V_P6,		0x2a,	"l2_st"},
    193 	{V_P6,		0x24,	"l2_lines_in"},
    194 	{V_P6,		0x26,	"l2_lines_out"},
    195 	{V_P6,		0x25,	"l2_m_lines_inm"},
    196 	{V_P6,		0x27,	"l2_m_lines_outm"},
    197 	{V_P6,		0x2e,	"l2_rqsts"},
    198 	{V_P6,		0x21,	"l2_ads"},
    199 	{V_P6,		0x22,	"l2_dbus_busy"},
    200 	{V_P6,		0x23,	"l2_dbus_busy_rd"},
    201 
    202 	/*
    203 	 * External bus logic
    204 	 */
    205 	{V_P6,		0x62,	"bus_drdy_clocks"},
    206 	{V_P6,		0x63,	"bus_lock_clocks"},
    207 	{V_P6,		0x60,	"bus_req_outstanding"},
    208 	{V_P6,		0x65,	"bus_tran_brd"},
    209 	{V_P6,		0x66,	"bus_tran_rfo"},
    210 	{V_P6,		0x67,	"bus_trans_wb"},
    211 	{V_P6,		0x68,	"bus_tran_ifetch"},
    212 	{V_P6,		0x69,	"bus_tran_inval"},
    213 	{V_P6,		0x6a,	"bus_tran_pwr"},
    214 	{V_P6,		0x6b,	"bus_trans_p"},
    215 	{V_P6,		0x6c,	"bus_trans_io"},
    216 	{V_P6,		0x6d,	"bus_tran_def"},
    217 	{V_P6,		0x6e,	"bus_tran_burst"},
    218 	{V_P6,		0x70,	"bus_tran_any"},
    219 	{V_P6,		0x6f,	"bus_tran_mem"},
    220 	{V_P6,		0x64,	"bus_data_rcv"},
    221 	{V_P6,		0x61,	"bus_bnr_drv"},
    222 	{V_P6,		0x7a,	"bus_hit_drv"},
    223 	{V_P6,		0x7b,	"bus_hitm_drv"},
    224 	{V_P6,		0x7e,	"bus_snoop_stall"},
    225 
    226 	/*
    227 	 * Floating point unit
    228 	 */
    229 	{V_P6,		0xc1,	"flops"},		/* 0 only */
    230 	{V_P6,		0x10,	"fp_comp_ops_exe"},	/* 0 only */
    231 	{V_P6,		0x11,	"fp_assist"},		/* 1 only */
    232 	{V_P6,		0x12,	"mul"},			/* 1 only */
    233 	{V_P6,		0x13,	"div"},			/* 1 only */
    234 	{V_P6,		0x14,	"cycles_div_busy"},	/* 0 only */
    235 
    236 	/*
    237 	 * Memory ordering
    238 	 */
    239 	{V_P6,		0x3,	"ld_blocks"},
    240 	{V_P6,		0x4,	"sb_drains"},
    241 	{V_P6,		0x5,	"misalign_mem_ref"},
    242 
    243 	/*
    244 	 * Instruction decoding and retirement
    245 	 */
    246 	{V_P6,		0xc0,	"inst_retired"},
    247 	{V_P6,		0xc2,	"uops_retired"},
    248 	{V_P6,		0xd0,	"inst_decoder"},
    249 
    250 	/*
    251 	 * Interrupts
    252 	 */
    253 	{V_P6,		0xc8,	"hw_int_rx"},
    254 	{V_P6,		0xc6,	"cycles_int_masked"},
    255 	{V_P6,		0xc7,	"cycles_int_pending_and_masked"},
    256 
    257 	/*
    258 	 * Branches
    259 	 */
    260 	{V_P6,		0xc4,	"br_inst_retired"},
    261 	{V_P6,		0xc5,	"br_miss_pred_retired"},
    262 	{V_P6,		0xc9,	"br_taken_retired"},
    263 	{V_P6,		0xca,	"br_miss_pred_taken_ret"},
    264 	{V_P6,		0xe0,	"br_inst_decoded"},
    265 	{V_P6,		0xe2,	"btb_misses"},
    266 	{V_P6,		0xe4,	"br_bogus"},
    267 	{V_P6,		0xe6,	"baclears"},
    268 
    269 	/*
    270 	 * Stalls
    271 	 */
    272 	{V_P6,		0xa2,	"resource_stalls"},
    273 	{V_P6,		0xd2,	"partial_rat_stalls"},
    274 
    275 	/*
    276 	 * Segment register loads
    277 	 */
    278 	{V_P6,		0x6,	"segment_reg_loads"},
    279 
    280 	/*
    281 	 * Clocks
    282 	 */
    283 	{V_P6,		0x79,	"cpu_clk_unhalted"},
    284 
    285 	/*
    286 	 * MMX
    287 	 */
    288 	{V_P6mmx,	0xb0,	"mmx_instr_exec"},
    289 	{V_P6mmx,	0xb1,	"mmx_sat_instr_exec"},
    290 	{V_P6mmx,	0xb2,	"mmx_uops_exec"},
    291 	{V_P6mmx,	0xb3,	"mmx_instr_type_exec"},
    292 	{V_P6mmx,	0xcc,	"fp_mmx_trans"},
    293 	{V_P6mmx,	0xcd,	"mmx_assists"},
    294 	{V_P6mmx,	0xce,	"mmx_instr_ret"},
    295 	{V_P6mmx,	0xd4,	"seg_rename_stalls"},
    296 	{V_P6mmx,	0xd5,	"seg_reg_renames"},
    297 	{V_P6mmx,	0xd6,	"ret_seg_renames"},
    298 
    299 	{V_END}
    300 };
    301 
    302 #define	MAPCPUVER(cpuver)	(cpuvermap[(cpuver) - CPC_PENTIUM])
    303 
    304 static int
    305 validargs(int cpuver, int regno)
    306 {
    307 	if (regno < 0 || regno > 1)
    308 		return (0);
    309 	cpuver -= CPC_PENTIUM;
    310 	if (cpuver < 0 ||
    311 	    cpuver >= sizeof (cpuvermap) / sizeof (cpuvermap[0]))
    312 		return (0);
    313 	return (1);
    314 }
    315 
    316 /*ARGSUSED*/
    317 static int
    318 versionmatch(int cpuver, int regno, const struct nametable *n)
    319 {
    320 	if (!validargs(cpuver, regno) || (n->ver & MAPCPUVER(cpuver)) == 0)
    321 		return (0);
    322 
    323 	switch (MAPCPUVER(cpuver)) {
    324 	case V_P5:
    325 	case V_P5 | V_P5mmx:
    326 		break;
    327 	case V_P6:
    328 	case V_P6 | V_P6mmx:
    329 		switch (n->bits) {
    330 		case 0xc1:	/* flops */
    331 		case 0x10:	/* fp_comp_ops_exe */
    332 		case 0x14:	/* cycles_div_busy */
    333 			/* only reg0 counts these */
    334 			if (regno == 1)
    335 				return (0);
    336 			break;
    337 		case 0x11:	/* fp_assist */
    338 		case 0x12:	/* mul */
    339 		case 0x13:	/* div */
    340 			/* only 1 can count these */
    341 			if (regno == 0)
    342 				return (0);
    343 			break;
    344 		default:
    345 			break;
    346 		}
    347 		break;
    348 	default:
    349 		return (0);
    350 	}
    351 
    352 	return (1);
    353 }
    354 
    355 static const struct nametable *
    356 getnametable(int cpuver, int regno)
    357 {
    358 	const struct nametable *n;
    359 
    360 	if (!validargs(cpuver, regno))
    361 		return (NULL);
    362 
    363 	switch (MAPCPUVER(cpuver)) {
    364 	case V_P5:
    365 	case V_P5 | V_P5mmx:
    366 		n = P5mmx_names[regno];
    367 		break;
    368 	case V_P6:
    369 	case V_P6 | V_P6mmx:
    370 		n = P6_names;
    371 		break;
    372 	default:
    373 		n = NULL;
    374 		break;
    375 	}
    376 
    377 	return (n);
    378 }
    379 
    380 void
    381 cpc_walk_names(int cpuver, int regno, void *arg,
    382     void (*action)(void *, int, const char *, uint8_t))
    383 {
    384 	const struct nametable *n;
    385 
    386 	if ((n = getnametable(cpuver, regno)) == NULL)
    387 		return;
    388 	for (; n->ver != V_END; n++)
    389 		if (versionmatch(cpuver, regno, n))
    390 			action(arg, regno, n->name, n->bits);
    391 }
    392 
    393 const char *
    394 __cpc_reg_to_name(int cpuver, int regno, uint8_t bits)
    395 {
    396 	const struct nametable *n;
    397 
    398 	if ((n = getnametable(cpuver, regno)) == NULL)
    399 		return (NULL);
    400 	for (; n->ver != V_END; n++)
    401 		if (bits == n->bits && versionmatch(cpuver, regno, n))
    402 			return (n->name);
    403 	return (NULL);
    404 }
    405 
    406 /*
    407  * Register names can be specified as strings or even as numbers
    408  */
    409 int
    410 __cpc_name_to_reg(int cpuver, int regno, const char *name, uint8_t *bits)
    411 {
    412 	const struct nametable *n;
    413 	char *eptr = NULL;
    414 	long value;
    415 
    416 	if ((n = getnametable(cpuver, regno)) == NULL || name == NULL)
    417 		return (-1);
    418 	for (; n->ver != V_END; n++)
    419 		if (strcmp(name, n->name) == 0 &&
    420 		    versionmatch(cpuver, regno, n)) {
    421 			*bits = n->bits;
    422 			return (0);
    423 		}
    424 
    425 	value = strtol(name, &eptr, 0);
    426 	if (name != eptr && value >= 0 && value <= UINT8_MAX) {
    427 		*bits = (uint8_t)value;
    428 		return (0);
    429 	}
    430 
    431 	return (-1);
    432 }
    433 
    434 const char *
    435 cpc_getcciname(int cpuver)
    436 {
    437 	if (validargs(cpuver, 0))
    438 		switch (MAPCPUVER(cpuver)) {
    439 		case V_P5:
    440 			return ("Pentium");
    441 		case V_P5 | V_P5mmx:
    442 			return ("Pentium with MMX");
    443 		case V_P6:
    444 			return ("Pentium Pro, Pentium II");
    445 		case V_P6 | V_P6mmx:
    446 			return ("Pentium Pro with MMX, Pentium II");
    447 		default:
    448 			break;
    449 		}
    450 	return (NULL);
    451 }
    452 
    453 const char *
    454 cpc_getcpuref(int cpuver)
    455 {
    456 	if (validargs(cpuver, 0))
    457 		switch (MAPCPUVER(cpuver)) {
    458 		case V_P5:
    459 		case V_P5 | V_P5mmx:
    460 			return (gettext(
    461 			    "See Appendix A.2 of the \"Intel Architecture "
    462 			    "Software Developer's Manual,\" 243192, 1997"));
    463 		case V_P6:
    464 		case V_P6 | V_P6mmx:
    465 			return (gettext(
    466 			    "See Appendix A.1 of the \"Intel Architecture "
    467 			    "Software Developer's Manual,\" 243192, 1997"));
    468 		default:
    469 			break;
    470 		}
    471 	return (NULL);
    472 }
    473 
    474 /*
    475  * This is a functional interface to allow CPUs with fewer %pic registers
    476  * to share the same data structure as those with more %pic registers
    477  * within the same instruction set family.
    478  */
    479 uint_t
    480 cpc_getnpic(int cpuver)
    481 {
    482 	switch (cpuver) {
    483 	case CPC_PENTIUM:
    484 	case CPC_PENTIUM_MMX:
    485 	case CPC_PENTIUM_PRO:
    486 	case CPC_PENTIUM_PRO_MMX:
    487 #define	EVENT	((cpc_event_t *)0)
    488 		return (sizeof (EVENT->ce_pic) / sizeof	(EVENT->ce_pic[0]));
    489 #undef	EVENT
    490 	default:
    491 		return (0);
    492 	}
    493 }
    494 
    495 #define	BITS(v, u, l)	\
    496 	(((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
    497 
    498 #include "getcpuid.h"
    499 
    500 /*
    501  * Return the version of the current processor.
    502  *
    503  * Version -1 is defined as 'not performance counter capable'
    504  */
    505 int
    506 cpc_getcpuver(void)
    507 {
    508 	static int ver = -1;
    509 	uint32_t maxeax;
    510 	uint32_t vbuf[4];
    511 
    512 	if (ver != -1)
    513 		return (ver);
    514 
    515 	maxeax = cpc_getcpuid(0, &vbuf[0], &vbuf[2], &vbuf[1]);
    516 	{
    517 		char *vendor = (char *)vbuf;
    518 		vendor[12] = '\0';
    519 
    520 		if (strcmp(vendor, "GenuineIntel") != 0)
    521 			return (ver);
    522 	}
    523 
    524 	if (maxeax >= 1) {
    525 		int family, model;
    526 		uint32_t eax, ebx, ecx, edx;
    527 
    528 		eax = cpc_getcpuid(1, &ebx, &ecx, &edx);
    529 
    530 		if ((family = BITS(eax, 11, 8)) == 0xf)
    531 			family = BITS(eax, 27, 20);
    532 		if ((model = BITS(eax, 7, 4)) == 0xf)
    533 			model = BITS(eax, 19, 16);
    534 
    535 		/*
    536 		 * map family and model into the performance
    537 		 * counter architectures we currently understand.
    538 		 *
    539 		 * See application note AP485 (from developer.intel.com)
    540 		 * for further explanation.
    541 		 */
    542 		switch (family) {
    543 		case 5:		/* Pentium and Pentium with MMX */
    544 			ver = model < 4 ?
    545 				CPC_PENTIUM : CPC_PENTIUM_MMX;
    546 			break;
    547 		case 6:		/* Pentium Pro and Pentium II and III */
    548 			ver = BITS(edx, 23, 23) ?	   /* mmx check */
    549 				CPC_PENTIUM_PRO_MMX : CPC_PENTIUM_PRO;
    550 			break;
    551 		default:
    552 		case 0xf:	/* Pentium IV */
    553 			break;
    554 		}
    555 	}
    556 
    557 	return (ver);
    558 }
    559