Home | History | Annotate | Download | only in pcbe
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * Performance Counter Back-End for Intel processors supporting Architectural
     28  * Performance Monitoring.
     29  */
     30 
     31 #include <sys/cpuvar.h>
     32 #include <sys/param.h>
     33 #include <sys/cpc_impl.h>
     34 #include <sys/cpc_pcbe.h>
     35 #include <sys/modctl.h>
     36 #include <sys/inttypes.h>
     37 #include <sys/systm.h>
     38 #include <sys/cmn_err.h>
     39 #include <sys/x86_archext.h>
     40 #include <sys/sdt.h>
     41 #include <sys/archsystm.h>
     42 #include <sys/privregs.h>
     43 #include <sys/ddi.h>
     44 #include <sys/sunddi.h>
     45 #include <sys/cred.h>
     46 #include <sys/policy.h>
     47 
     48 static int core_pcbe_init(void);
     49 static uint_t core_pcbe_ncounters(void);
     50 static const char *core_pcbe_impl_name(void);
     51 static const char *core_pcbe_cpuref(void);
     52 static char *core_pcbe_list_events(uint_t picnum);
     53 static char *core_pcbe_list_attrs(void);
     54 static uint64_t core_pcbe_event_coverage(char *event);
     55 static uint64_t core_pcbe_overflow_bitmap(void);
     56 static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
     57     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
     58     void *token);
     59 static void core_pcbe_program(void *token);
     60 static void core_pcbe_allstop(void);
     61 static void core_pcbe_sample(void *token);
     62 static void core_pcbe_free(void *config);
     63 
     64 #define	FALSE	0
     65 #define	TRUE	1
     66 
     67 /* Counter Type */
     68 #define	CORE_GPC	0	/* General-Purpose Counter (GPC) */
     69 #define	CORE_FFC	1	/* Fixed-Function Counter (FFC) */
     70 
     71 /* MSR Addresses */
     72 #define	GPC_BASE_PMC		0x00c1	/* First GPC */
     73 #define	GPC_BASE_PES		0x0186	/* First GPC Event Select register */
     74 #define	FFC_BASE_PMC		0x0309	/* First FFC */
     75 #define	PERF_FIXED_CTR_CTRL	0x038d	/* Used to enable/disable FFCs */
     76 #define	PERF_GLOBAL_STATUS	0x038e	/* Overflow status register */
     77 #define	PERF_GLOBAL_CTRL	0x038f	/* Used to enable/disable counting */
     78 #define	PERF_GLOBAL_OVF_CTRL	0x0390	/* Used to clear overflow status */
     79 
     80 /*
     81  * Processor Event Select register fields
     82  */
     83 #define	CORE_USR	(1ULL << 16)	/* Count while not in ring 0 */
     84 #define	CORE_OS		(1ULL << 17)	/* Count while in ring 0 */
     85 #define	CORE_EDGE	(1ULL << 18)	/* Enable edge detection */
     86 #define	CORE_PC		(1ULL << 19)	/* Enable pin control */
     87 #define	CORE_INT	(1ULL << 20)	/* Enable interrupt on overflow */
     88 #define	CORE_EN		(1ULL << 22)	/* Enable counting */
     89 #define	CORE_INV	(1ULL << 23)	/* Invert the CMASK */
     90 #define	CORE_ANYTHR	(1ULL << 21)	/* Count event for any thread on core */
     91 
     92 #define	CORE_UMASK_SHIFT	8
     93 #define	CORE_UMASK_MASK		0xffu
     94 #define	CORE_CMASK_SHIFT	24
     95 #define	CORE_CMASK_MASK		0xffu
     96 
     97 /*
     98  * Fixed-function counter attributes
     99  */
    100 #define	CORE_FFC_OS_EN	(1ULL << 0)	/* Count while not in ring 0 */
    101 #define	CORE_FFC_USR_EN	(1ULL << 1)	/* Count while in ring 1 */
    102 #define	CORE_FFC_ANYTHR	(1ULL << 2)	/* Count event for any thread on core */
    103 #define	CORE_FFC_PMI	(1ULL << 3)	/* Enable interrupt on overflow */
    104 
    105 /*
    106  * Number of bits for specifying each FFC's attributes in the control register
    107  */
    108 #define	CORE_FFC_ATTR_SIZE	4
    109 
    110 /*
    111  * CondChgd and OvfBuffer fields of global status and overflow control registers
    112  */
    113 #define	CONDCHGD	(1ULL << 63)
    114 #define	OVFBUFFER	(1ULL << 62)
    115 #define	MASK_CONDCHGD_OVFBUFFER	(CONDCHGD | OVFBUFFER)
    116 
    117 #define	ALL_STOPPED	0ULL
    118 
    119 #define	BITMASK_XBITS(x)	((1ull << (x)) - 1ull)
    120 
    121 /*
    122  * Only the lower 32-bits can be written to in the general-purpose
    123  * counters.  The higher bits are extended from bit 31; all ones if
    124  * bit 31 is one and all zeros otherwise.
    125  *
    126  * The fixed-function counters do not have this restriction.
    127  */
    128 #define	BITS_EXTENDED_FROM_31	(BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31))
    129 
    130 #define	WRMSR(msr, value)						\
    131 	wrmsr((msr), (value));						\
    132 	DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value));
    133 
    134 #define	RDMSR(msr, value)						\
    135 	(value) = rdmsr((msr));						\
    136 	DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value));
    137 
    138 typedef struct core_pcbe_config {
    139 	uint64_t	core_rawpic;
    140 	uint64_t	core_ctl;	/* Event Select bits */
    141 	uint64_t	core_pmc;	/* Counter register address */
    142 	uint64_t	core_pes;	/* Event Select register address */
    143 	uint_t		core_picno;
    144 	uint8_t		core_pictype;	/* CORE_GPC or CORE_FFC */
    145 } core_pcbe_config_t;
    146 
    147 pcbe_ops_t core_pcbe_ops = {
    148 	PCBE_VER_1,			/* pcbe_ver */
    149 	CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE,	/* pcbe_caps */
    150 	core_pcbe_ncounters,		/* pcbe_ncounters */
    151 	core_pcbe_impl_name,		/* pcbe_impl_name */
    152 	core_pcbe_cpuref,		/* pcbe_cpuref */
    153 	core_pcbe_list_events,		/* pcbe_list_events */
    154 	core_pcbe_list_attrs,		/* pcbe_list_attrs */
    155 	core_pcbe_event_coverage,	/* pcbe_event_coverage */
    156 	core_pcbe_overflow_bitmap,	/* pcbe_overflow_bitmap */
    157 	core_pcbe_configure,		/* pcbe_configure */
    158 	core_pcbe_program,		/* pcbe_program */
    159 	core_pcbe_allstop,		/* pcbe_allstop */
    160 	core_pcbe_sample,		/* pcbe_sample */
    161 	core_pcbe_free			/* pcbe_free */
    162 };
    163 
    164 struct nametable_core_uarch {
    165 	const char	*name;
    166 	uint64_t	restricted_bits;
    167 	uint8_t		event_num;
    168 };
    169 
    170 #define	NT_END	0xFF
    171 
    172 /*
    173  * Counting an event for all cores or all bus agents requires cpc_cpu privileges
    174  */
    175 #define	ALL_CORES	(1ULL << 15)
    176 #define	ALL_AGENTS	(1ULL << 13)
    177 
    178 /*
    179  * The events listed in the following table can be counted on all
    180  * general-purpose counters on processors that are of Penryn and Merom Family
    181  */
    182 static const struct nametable_core_uarch cmn_gpc_events_core_uarch[] = {
    183 	/* Alphabetical order of event name */
    184 
    185 	{ "baclears",			0x0,	0xe6 },
    186 	{ "bogus_br",			0x0,	0xe4 },
    187 	{ "br_bac_missp_exec",		0x0,	0x8a },
    188 
    189 	{ "br_call_exec",		0x0,	0x92 },
    190 	{ "br_call_missp_exec",		0x0,	0x93 },
    191 	{ "br_cnd_exec",		0x0,	0x8b },
    192 
    193 	{ "br_cnd_missp_exec",		0x0,	0x8c },
    194 	{ "br_ind_call_exec",		0x0,	0x94 },
    195 	{ "br_ind_exec",		0x0,	0x8d },
    196 
    197 	{ "br_ind_missp_exec",		0x0,	0x8e },
    198 	{ "br_inst_decoded",		0x0,	0xe0 },
    199 	{ "br_inst_exec",		0x0,	0x88 },
    200 
    201 	{ "br_inst_retired",		0x0,	0xc4 },
    202 	{ "br_inst_retired_mispred",	0x0,	0xc5 },
    203 	{ "br_missp_exec",		0x0,	0x89 },
    204 
    205 	{ "br_ret_bac_missp_exec",	0x0,	0x91 },
    206 	{ "br_ret_exec",		0x0,	0x8f },
    207 	{ "br_ret_missp_exec",		0x0,	0x90 },
    208 
    209 	{ "br_tkn_bubble_1",		0x0,	0x97 },
    210 	{ "br_tkn_bubble_2",		0x0,	0x98 },
    211 	{ "bus_bnr_drv",		ALL_AGENTS,	0x61 },
    212 
    213 	{ "bus_data_rcv",		ALL_CORES,	0x64 },
    214 	{ "bus_drdy_clocks",		ALL_AGENTS,	0x62 },
    215 	{ "bus_hit_drv",		ALL_AGENTS,	0x7a },
    216 
    217 	{ "bus_hitm_drv",		ALL_AGENTS,	0x7b },
    218 	{ "bus_io_wait",		ALL_CORES,	0x7f },
    219 	{ "bus_lock_clocks",		ALL_CORES | ALL_AGENTS,	0x63 },
    220 
    221 	{ "bus_request_outstanding",	ALL_CORES | ALL_AGENTS,	0x60 },
    222 	{ "bus_trans_any",		ALL_CORES | ALL_AGENTS,	0x70 },
    223 	{ "bus_trans_brd",		ALL_CORES | ALL_AGENTS,	0x65 },
    224 
    225 	{ "bus_trans_burst",		ALL_CORES | ALL_AGENTS,	0x6e },
    226 	{ "bus_trans_def",		ALL_CORES | ALL_AGENTS,	0x6d },
    227 	{ "bus_trans_ifetch",		ALL_CORES | ALL_AGENTS,	0x68 },
    228 
    229 	{ "bus_trans_inval",		ALL_CORES | ALL_AGENTS,	0x69 },
    230 	{ "bus_trans_io",		ALL_CORES | ALL_AGENTS,	0x6c },
    231 	{ "bus_trans_mem",		ALL_CORES | ALL_AGENTS,	0x6f },
    232 
    233 	{ "bus_trans_p",		ALL_CORES | ALL_AGENTS,	0x6b },
    234 	{ "bus_trans_pwr",		ALL_CORES | ALL_AGENTS,	0x6a },
    235 	{ "bus_trans_rfo",		ALL_CORES | ALL_AGENTS,	0x66 },
    236 
    237 	{ "bus_trans_wb",		ALL_CORES | ALL_AGENTS,	0x67 },
    238 	{ "busq_empty",			ALL_CORES,	0x7d },
    239 	{ "cmp_snoop",			ALL_CORES,	0x78 },
    240 
    241 	{ "cpu_clk_unhalted",		0x0,	0x3c },
    242 	{ "cycles_int",			0x0,	0xc6 },
    243 	{ "cycles_l1i_mem_stalled",	0x0,	0x86 },
    244 
    245 	{ "dtlb_misses",		0x0,	0x08 },
    246 	{ "eist_trans",			0x0,	0x3a },
    247 	{ "esp",			0x0,	0xab },
    248 
    249 	{ "ext_snoop",			ALL_AGENTS,	0x77 },
    250 	{ "fp_mmx_trans",		0x0,	0xcc },
    251 	{ "hw_int_rcv",			0x0,	0xc8 },
    252 
    253 	{ "ild_stall",			0x0,	0x87 },
    254 	{ "inst_queue",			0x0,	0x83 },
    255 	{ "inst_retired",		0x0,	0xc0 },
    256 
    257 	{ "itlb",			0x0,	0x82 },
    258 	{ "itlb_miss_retired",		0x0,	0xc9 },
    259 	{ "l1d_all_ref",		0x0,	0x43 },
    260 
    261 	{ "l1d_cache_ld",		0x0,	0x40 },
    262 	{ "l1d_cache_lock",		0x0,	0x42 },
    263 	{ "l1d_cache_st",		0x0,	0x41 },
    264 
    265 	{ "l1d_m_evict",		0x0,	0x47 },
    266 	{ "l1d_m_repl",			0x0,	0x46 },
    267 	{ "l1d_pend_miss",		0x0,	0x48 },
    268 
    269 	{ "l1d_prefetch",		0x0,	0x4e },
    270 	{ "l1d_repl",			0x0,	0x45 },
    271 	{ "l1d_split",			0x0,	0x49 },
    272 
    273 	{ "l1i_misses",			0x0,	0x81 },
    274 	{ "l1i_reads",			0x0,	0x80 },
    275 	{ "l2_ads",			ALL_CORES,	0x21 },
    276 
    277 	{ "l2_dbus_busy_rd",		ALL_CORES,	0x23 },
    278 	{ "l2_ifetch",			ALL_CORES,	0x28 },
    279 	{ "l2_ld",			ALL_CORES,	0x29 },
    280 
    281 	{ "l2_lines_in",		ALL_CORES,	0x24 },
    282 	{ "l2_lines_out",		ALL_CORES,	0x26 },
    283 	{ "l2_lock",			ALL_CORES,	0x2b },
    284 
    285 	{ "l2_m_lines_in",		ALL_CORES,	0x25 },
    286 	{ "l2_m_lines_out",		ALL_CORES,	0x27 },
    287 	{ "l2_no_req",			ALL_CORES,	0x32 },
    288 
    289 	{ "l2_reject_busq",		ALL_CORES,	0x30 },
    290 	{ "l2_rqsts",			ALL_CORES,	0x2e },
    291 	{ "l2_st",			ALL_CORES,	0x2a },
    292 
    293 	{ "load_block",			0x0,	0x03 },
    294 	{ "load_hit_pre",		0x0,	0x4c },
    295 	{ "machine_nukes",		0x0,	0xc3 },
    296 
    297 	{ "macro_insts",		0x0,	0xaa },
    298 	{ "memory_disambiguation",	0x0,	0x09 },
    299 	{ "misalign_mem_ref",		0x0,	0x05 },
    300 	{ "page_walks",			0x0,	0x0c },
    301 
    302 	{ "pref_rqsts_dn",		0x0,	0xf8 },
    303 	{ "pref_rqsts_up",		0x0,	0xf0 },
    304 	{ "rat_stalls",			0x0,	0xd2 },
    305 
    306 	{ "resource_stalls",		0x0,	0xdc },
    307 	{ "rs_uops_dispatched",		0x0,	0xa0 },
    308 	{ "seg_reg_renames",		0x0,	0xd5 },
    309 
    310 	{ "seg_rename_stalls",		0x0,	0xd4 },
    311 	{ "segment_reg_loads",		0x0,	0x06 },
    312 	{ "simd_assist",		0x0,	0xcd },
    313 
    314 	{ "simd_comp_inst_retired",	0x0,	0xca },
    315 	{ "simd_inst_retired",		0x0,	0xc7 },
    316 	{ "simd_instr_retired",		0x0,	0xce },
    317 
    318 	{ "simd_sat_instr_retired",	0x0,	0xcf },
    319 	{ "simd_sat_uop_exec",		0x0,	0xb1 },
    320 	{ "simd_uop_type_exec",		0x0,	0xb3 },
    321 
    322 	{ "simd_uops_exec",		0x0,	0xb0 },
    323 	{ "snoop_stall_drv",		ALL_CORES | ALL_AGENTS,	0x7e },
    324 	{ "sse_pre_exec",		0x0,	0x07 },
    325 
    326 	{ "sse_pre_miss",		0x0,	0x4b },
    327 	{ "store_block",		0x0,	0x04 },
    328 	{ "thermal_trip",		0x0,	0x3b },
    329 
    330 	{ "uops_retired",		0x0,	0xc2 },
    331 	{ "x87_ops_retired",		0x0,	0xc1 },
    332 	{ "",				0x0,	NT_END }
    333 };
    334 
    335 /*
    336  * If any of the pic specific events require privileges, make sure to add a
    337  * check in configure_gpc() to find whether an event hard-coded as a number by
    338  * the user has any privilege requirements
    339  */
    340 static const struct nametable_core_uarch pic0_events[] = {
    341 	/* Alphabetical order of event name */
    342 
    343 	{ "cycles_div_busy",		0x0,	0x14 },
    344 	{ "fp_comp_ops_exe",		0x0,	0x10 },
    345 	{ "idle_during_div",		0x0,	0x18 },
    346 
    347 	{ "mem_load_retired",		0x0,	0xcb },
    348 	{ "rs_uops_dispatched_port",	0x0,	0xa1 },
    349 	{ "",				0x0,	NT_END }
    350 };
    351 
    352 static const struct nametable_core_uarch pic1_events[] = {
    353 	/* Alphabetical order of event name */
    354 
    355 	{ "delayed_bypass",	0x0,	0x19 },
    356 	{ "div",		0x0,	0x13 },
    357 	{ "fp_assist",		0x0,	0x11 },
    358 
    359 	{ "mul",		0x0,	0x12 },
    360 	{ "",			0x0,	NT_END }
    361 };
    362 
    363 /* FFC entries must be in order */
    364 char *ffc_names_non_htt[] = {
    365 	"instr_retired.any",
    366 	"cpu_clk_unhalted.core",
    367 	"cpu_clk_unhalted.ref",
    368 	NULL
    369 };
    370 
    371 char *ffc_names_htt[] = {
    372 	"instr_retired.any",
    373 	"cpu_clk_unhalted.thread",
    374 	"cpu_clk_unhalted.ref",
    375 	NULL
    376 };
    377 
    378 char **ffc_names = NULL;
    379 
    380 static char	**gpc_names = NULL;
    381 static uint32_t	versionid;
    382 static uint64_t	num_gpc;
    383 static uint64_t	width_gpc;
    384 static uint64_t	mask_gpc;
    385 static uint64_t	num_ffc;
    386 static uint64_t	width_ffc;
    387 static uint64_t	mask_ffc;
    388 static uint_t	total_pmc;
    389 static uint64_t	control_ffc;
    390 static uint64_t	control_gpc;
    391 static uint64_t	control_mask;
    392 static uint32_t	arch_events_vector;
    393 
    394 #define	IMPL_NAME_LEN 100
    395 static char core_impl_name[IMPL_NAME_LEN];
    396 
    397 static const char *core_cpuref =
    398 	"See Appendix A of the \"Intel 64 and IA-32 Architectures Software" \
    399 	" Developer's Manual Volume 3B: System Programming Guide, Part 2\"" \
    400 	" Order Number: 253669-026US, Februrary 2008";
    401 
    402 struct events_table_t {
    403 	uint8_t		eventselect;
    404 	uint8_t		unitmask;
    405 	uint64_t	supported_counters;
    406 	const char	*name;
    407 };
    408 
    409 /* Used to describe which counters support an event */
    410 #define	C(x) (1 << (x))
    411 #define	C0 C(0)
    412 #define	C1 C(1)
    413 #define	C2 C(2)
    414 #define	C3 C(3)
    415 #define	C_ALL 0xFFFFFFFFFFFFFFFF
    416 
    417 /* Architectural events */
    418 #define	ARCH_EVENTS_COMMON					\
    419 	{ 0xc0, 0x00, C_ALL, "inst_retired.any_p" },		\
    420 	{ 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" },	\
    421 	{ 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" },	\
    422 	{ 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" },	\
    423 	{ 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" },	\
    424 	{ 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" }
    425 
    426 const struct events_table_t arch_events_table_non_htt[] = {
    427 	{ 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.core" },
    428 	ARCH_EVENTS_COMMON
    429 };
    430 
    431 const struct events_table_t arch_events_table_htt[] = {
    432 	{ 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" },
    433 	ARCH_EVENTS_COMMON
    434 };
    435 
    436 const struct events_table_t *arch_events_table = NULL;
    437 static uint64_t known_arch_events;
    438 static uint64_t known_ffc_num;
    439 
    440 #define	EVENTS_FAM6_MOD26						\
    441 									\
    442 { 0x80, 0x04, C0|C1|C2|C3, "l1i.cycles_stalled" },			\
    443 { 0x80, 0x01, C0|C1|C2|C3, "l1i.hits" },				\
    444 { 0x80, 0x02, C0|C1|C2|C3, "l1i.misses" },				\
    445 									\
    446 { 0x80, 0x03, C0|C1|C2|C3, "l1i.reads" },				\
    447 { 0x82, 0x01, C0|C1|C2|C3, "large_itlb.hit" },				\
    448 { 0x87, 0x0F, C0|C1|C2|C3, "ild_stall.any" },				\
    449 									\
    450 { 0x87, 0x04, C0|C1|C2|C3, "ild_stall.iq_full" },			\
    451 { 0x87, 0x01, C0|C1|C2|C3, "ild_stall.lcp" },				\
    452 { 0x87, 0x02, C0|C1|C2|C3, "ild_stall.mru" },				\
    453 									\
    454 { 0x87, 0x08, C0|C1|C2|C3, "ild_stall.regen" },				\
    455 { 0xE6, 0x02, C0|C1|C2|C3, "baclear.bad_target" },			\
    456 { 0xE6, 0x01, C0|C1|C2|C3, "baclear.clear" },				\
    457 									\
    458 { 0xE8, 0x01, C0|C1|C2|C3, "bpu_clears.early" },			\
    459 { 0xE8, 0x02, C0|C1|C2|C3, "bpu_clears.late" },				\
    460 { 0xE5, 0x01, C0|C1|C2|C3, "bpu_missed_call_ret" },			\
    461 									\
    462 { 0xE0, 0x01, C0|C1|C2|C3, "br_inst_decoded" },				\
    463 { 0x88, 0x7F, C0|C1|C2|C3, "br_inst_exec.any" },			\
    464 { 0x88, 0x01, C0|C1|C2|C3, "br_inst_exec.cond" },			\
    465 									\
    466 { 0x88, 0x02, C0|C1|C2|C3, "br_inst_exec.direct" },			\
    467 { 0x88, 0x10, C0|C1|C2|C3, "br_inst_exec.direct_near_call" },		\
    468 { 0x88, 0x20, C0|C1|C2|C3, "br_inst_exec.indirect_near_call" },		\
    469 									\
    470 { 0x88, 0x04, C0|C1|C2|C3, "br_inst_exec.indirect_non_call" },		\
    471 { 0x88, 0x30, C0|C1|C2|C3, "br_inst_exec.near_calls" },			\
    472 { 0x88, 0x07, C0|C1|C2|C3, "br_inst_exec.non_calls" },			\
    473 									\
    474 { 0x88, 0x08, C0|C1|C2|C3, "br_inst_exec.return_near" },		\
    475 { 0x88, 0x40, C0|C1|C2|C3, "br_inst_exec.taken" },			\
    476 { 0x89, 0x7F, C0|C1|C2|C3, "br_misp_exec.any" },			\
    477 									\
    478 { 0x89, 0x01, C0|C1|C2|C3, "br_misp_exec.cond" },			\
    479 { 0x89, 0x02, C0|C1|C2|C3, "br_misp_exec.direct" },			\
    480 { 0x89, 0x10, C0|C1|C2|C3, "br_misp_exec.direct_near_call" },		\
    481 									\
    482 { 0x89, 0x20, C0|C1|C2|C3, "br_misp_exec.indirect_near_call" },		\
    483 { 0x89, 0x04, C0|C1|C2|C3, "br_misp_exec.indirect_non_call" },		\
    484 { 0x89, 0x30, C0|C1|C2|C3, "br_misp_exec.near_calls" },			\
    485 									\
    486 { 0x89, 0x07, C0|C1|C2|C3, "br_misp_exec.non_calls" },			\
    487 { 0x89, 0x08, C0|C1|C2|C3, "br_misp_exec.return_near" },		\
    488 { 0x89, 0x40, C0|C1|C2|C3, "br_misp_exec.taken" },			\
    489 									\
    490 { 0x17, 0x01, C0|C1|C2|C3, "inst_queue_writes" },			\
    491 { 0x1E, 0x01, C0|C1|C2|C3, "inst_queue_write_cycles" },			\
    492 { 0xA7, 0x01, C0|C1|C2|C3, "baclear_force_iq" },			\
    493 									\
    494 { 0xD0, 0x01, C0|C1|C2|C3, "macro_insts.decoded" },			\
    495 { 0xA6, 0x01, C0|C1|C2|C3, "macro_insts.fusions_decoded" },		\
    496 { 0x19, 0x01, C0|C1|C2|C3, "two_uop_insts_decoded" },			\
    497 									\
    498 { 0x18, 0x01, C0|C1|C2|C3, "inst_decoded.dec0" },			\
    499 { 0xD1, 0x04, C0|C1|C2|C3, "uops_decoded.esp_folding" },		\
    500 { 0xD1, 0x08, C0|C1|C2|C3, "uops_decoded.esp_sync" },			\
    501 									\
    502 { 0xD1, 0x02, C0|C1|C2|C3, "uops_decoded.ms" },				\
    503 { 0x20, 0x01, C0|C1|C2|C3, "lsd_overflow" },				\
    504 { 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.any" },				\
    505 									\
    506 { 0x0E, 0x02, C0|C1|C2|C3, "uops_issued.fused" },			\
    507 { 0xA2, 0x20, C0|C1|C2|C3, "resource_stalls.fpcw" },			\
    508 { 0xA2, 0x02, C0|C1|C2|C3, "resource_stalls.load" },			\
    509 									\
    510 { 0xA2, 0x40, C0|C1|C2|C3, "resource_stalls.mxcsr" },			\
    511 { 0xA2, 0x04, C0|C1|C2|C3, "resource_stalls.rs_full" },			\
    512 { 0xA2, 0x08, C0|C1|C2|C3, "resource_stalls.store" },			\
    513 									\
    514 { 0xA2, 0x01, C0|C1|C2|C3, "resource_stalls.any" },			\
    515 { 0xD2, 0x01, C0|C1|C2|C3, "rat_stalls.flags" },			\
    516 { 0xD2, 0x02, C0|C1|C2|C3, "rat_stalls.registers" },			\
    517 									\
    518 { 0xD2, 0x04, C0|C1|C2|C3, "rat_stalls.rob_read_port" },		\
    519 { 0xD2, 0x08, C0|C1|C2|C3, "rat_stalls.scoreboard" },			\
    520 { 0xD2, 0x0F, C0|C1|C2|C3, "rat_stalls.any" },				\
    521 									\
    522 { 0xD4, 0x01, C0|C1|C2|C3, "seg_rename_stalls" },			\
    523 { 0xD5, 0x01, C0|C1|C2|C3, "es_reg_renames" },				\
    524 { 0x10, 0x02, C0|C1|C2|C3, "fp_comp_ops_exe.mmx" },			\
    525 									\
    526 { 0x10, 0x80, C0|C1|C2|C3, "fp_comp_ops_exe.sse_double_precision" },	\
    527 { 0x10, 0x04, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp" },			\
    528 { 0x10, 0x10, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_packed" },		\
    529 									\
    530 { 0x10, 0x20, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_scalar" },		\
    531 { 0x10, 0x40, C0|C1|C2|C3, "fp_comp_ops_exe.sse_single_precision" },	\
    532 { 0x10, 0x08, C0|C1|C2|C3, "fp_comp_ops_exe.sse2_integer" },		\
    533 									\
    534 { 0x10, 0x01, C0|C1|C2|C3, "fp_comp_ops_exe.x87" },			\
    535 { 0x14, 0x01, C0|C1|C2|C3, "arith.cycles_div_busy" },			\
    536 { 0x14, 0x02, C0|C1|C2|C3, "arith.mul" },				\
    537 									\
    538 { 0x12, 0x04, C0|C1|C2|C3, "simd_int_128.pack" },			\
    539 { 0x12, 0x20, C0|C1|C2|C3, "simd_int_128.packed_arith" },		\
    540 { 0x12, 0x10, C0|C1|C2|C3, "simd_int_128.packed_logical" },		\
    541 									\
    542 { 0x12, 0x01, C0|C1|C2|C3, "simd_int_128.packed_mpy" },			\
    543 { 0x12, 0x02, C0|C1|C2|C3, "simd_int_128.packed_shift" },		\
    544 { 0x12, 0x40, C0|C1|C2|C3, "simd_int_128.shuffle_move" },		\
    545 									\
    546 { 0x12, 0x08, C0|C1|C2|C3, "simd_int_128.unpack" },			\
    547 { 0xFD, 0x04, C0|C1|C2|C3, "simd_int_64.pack" },			\
    548 { 0xFD, 0x20, C0|C1|C2|C3, "simd_int_64.packed_arith" },		\
    549 									\
    550 { 0xFD, 0x10, C0|C1|C2|C3, "simd_int_64.packed_logical" },		\
    551 { 0xFD, 0x01, C0|C1|C2|C3, "simd_int_64.packed_mpy" },			\
    552 { 0xFD, 0x02, C0|C1|C2|C3, "simd_int_64.packed_shift" },		\
    553 									\
    554 { 0xFD, 0x40, C0|C1|C2|C3, "simd_int_64.shuffle_move" },		\
    555 { 0xFD, 0x08, C0|C1|C2|C3, "simd_int_64.unpack" },			\
    556 { 0xB1, 0x01, C0|C1|C2|C3, "uops_executed.port0" },			\
    557 									\
    558 { 0xB1, 0x02, C0|C1|C2|C3, "uops_executed.port1" },			\
    559 { 0x40, 0x04, C0|C1, "l1d_cache_ld.e_state" },				\
    560 { 0x40, 0x01, C0|C1, "l1d_cache_ld.i_state" },				\
    561 									\
    562 { 0x40, 0x08, C0|C1, "l1d_cache_ld.m_state" },				\
    563 { 0x40, 0x0F, C0|C1, "l1d_cache_ld.mesi" },				\
    564 { 0x40, 0x02, C0|C1, "l1d_cache_ld.s_state" },				\
    565 									\
    566 { 0x41, 0x04, C0|C1, "l1d_cache_st.e_state" },				\
    567 { 0x41, 0x08, C0|C1, "l1d_cache_st.m_state" },				\
    568 { 0x41, 0x0F, C0|C1, "l1d_cache_st.mesi" },				\
    569 									\
    570 { 0x41, 0x02, C0|C1, "l1d_cache_st.s_state" },				\
    571 { 0x42, 0x04, C0|C1, "l1d_cache_lock.e_state" },			\
    572 { 0x42, 0x01, C0|C1, "l1d_cache_lock.hit" },				\
    573 									\
    574 { 0x42, 0x08, C0|C1, "l1d_cache_lock.m_state" },			\
    575 { 0x42, 0x02, C0|C1, "l1d_cache_lock.s_state" },			\
    576 { 0x43, 0x01, C0|C1, "l1d_all_ref.any" },				\
    577 									\
    578 { 0x43, 0x02, C0|C1, "l1d_all_ref.cacheable" },				\
    579 { 0x4B, 0x01, C0|C1, "mmx2_mem_exec.nta" },				\
    580 { 0x4C, 0x01, C0|C1, "load_hit_pre" },					\
    581 									\
    582 { 0x4E, 0x02, C0|C1, "l1d_prefetch.miss" },				\
    583 { 0x4E, 0x01, C0|C1, "l1d_prefetch.requests" },				\
    584 { 0x4E, 0x04, C0|C1, "l1d_prefetch.triggers" },				\
    585 									\
    586 { 0x51, 0x04, C0|C1, "l1d.m_evict" },					\
    587 { 0x51, 0x02, C0|C1, "l1d.m_repl" },					\
    588 { 0x51, 0x08, C0|C1, "l1d.m_snoop_evict" },				\
    589 									\
    590 { 0x51, 0x01, C0|C1, "l1d.repl" },					\
    591 { 0x52, 0x01, C0|C1, "l1d_cache_prefetch_lock_fb_hit" },		\
    592 { 0x53, 0x01, C0|C1, "l1d_cache_lock_fb_hit" },				\
    593 									\
    594 { 0x63, 0x02, C0|C1, "cache_lock_cycles.l1d" },				\
    595 { 0x63, 0x01, C0|C1, "cache_lock_cycles.l1d_l2" },			\
    596 { 0x06, 0x04, C0|C1|C2|C3, "store_blocks.at_ret" },			\
    597 									\
    598 { 0x06, 0x08, C0|C1|C2|C3, "store_blocks.l1d_block" },			\
    599 { 0x06, 0x01, C0|C1|C2|C3, "store_blocks.not_sta" },			\
    600 { 0x06, 0x02, C0|C1|C2|C3, "store_blocks.sta" },			\
    601 									\
    602 { 0x13, 0x07, C0|C1|C2|C3, "load_dispatch.any" },			\
    603 { 0x13, 0x04, C0|C1|C2|C3, "load_dispatch.mob" },			\
    604 { 0x13, 0x01, C0|C1|C2|C3, "load_dispatch.rs" },			\
    605 									\
    606 { 0x13, 0x02, C0|C1|C2|C3, "load_dispatch.rs_delayed" },		\
    607 { 0x08, 0x01, C0|C1|C2|C3, "dtlb_load_misses.any" },			\
    608 { 0x08, 0x20, C0|C1|C2|C3, "dtlb_load_misses.pde_miss" },		\
    609 									\
    610 { 0x08, 0x02, C0|C1|C2|C3, "dtlb_load_misses.walk_completed" },		\
    611 { 0x49, 0x01, C0|C1|C2|C3, "dtlb_misses.any" },				\
    612 { 0x49, 0x10, C0|C1|C2|C3, "dtlb_misses.stlb_hit" },			\
    613 									\
    614 { 0x49, 0x02, C0|C1|C2|C3, "dtlb_misses.walk_completed" },		\
    615 { 0x4F, 0x02, C0|C1|C2|C3, "ept.epde_miss" },				\
    616 { 0x4F, 0x08, C0|C1|C2|C3, "ept.epdpe_miss" },				\
    617 									\
    618 { 0x85, 0x01, C0|C1|C2|C3, "itlb_misses.any" },				\
    619 { 0x85, 0x02, C0|C1|C2|C3, "itlb_misses.walk_completed" },		\
    620 { 0x24, 0xAA, C0|C1|C2|C3, "l2_rqsts.miss" },				\
    621 									\
    622 { 0x24, 0xFF, C0|C1|C2|C3, "l2_rqsts.references" },			\
    623 { 0x24, 0x10, C0|C1|C2|C3, "l2_rqsts.ifetch_hit" },			\
    624 { 0x24, 0x20, C0|C1|C2|C3, "l2_rqsts.ifetch_miss" },			\
    625 									\
    626 { 0x24, 0x30, C0|C1|C2|C3, "l2_rqsts.ifetches" },			\
    627 { 0x24, 0x01, C0|C1|C2|C3, "l2_rqsts.ld_hit" },				\
    628 { 0x24, 0x02, C0|C1|C2|C3, "l2_rqsts.ld_miss" },			\
    629 									\
    630 { 0x24, 0x03, C0|C1|C2|C3, "l2_rqsts.loads" },				\
    631 { 0x24, 0x40, C0|C1|C2|C3, "l2_rqsts.prefetch_hit" },			\
    632 { 0x24, 0x80, C0|C1|C2|C3, "l2_rqsts.prefetch_miss" },			\
    633 									\
    634 { 0x24, 0xC0, C0|C1|C2|C3, "l2_rqsts.prefetches" },			\
    635 { 0x24, 0x04, C0|C1|C2|C3, "l2_rqsts.rfo_hit" },			\
    636 { 0x24, 0x08, C0|C1|C2|C3, "l2_rqsts.rfo_miss" },			\
    637 									\
    638 { 0x24, 0x0C, C0|C1|C2|C3, "l2_rqsts.rfos" },				\
    639 { 0x26, 0xFF, C0|C1|C2|C3, "l2_data_rqsts.any" },			\
    640 { 0x26, 0x04, C0|C1|C2|C3, "l2_data_rqsts.demand.e_state" },		\
    641 									\
    642 { 0x26, 0x01, C0|C1|C2|C3, "l2_data_rqsts.demand.i_state" },		\
    643 { 0x26, 0x08, C0|C1|C2|C3, "l2_data_rqsts.demand.m_state" },		\
    644 { 0x26, 0x0F, C0|C1|C2|C3, "l2_data_rqsts.demand.mesi" },		\
    645 									\
    646 { 0x26, 0x02, C0|C1|C2|C3, "l2_data_rqsts.demand.s_state" },		\
    647 { 0x26, 0x40, C0|C1|C2|C3, "l2_data_rqsts.prefetch.e_state" },		\
    648 { 0x26, 0x10, C0|C1|C2|C3, "l2_data_rqsts.prefetch.i_state" },		\
    649 									\
    650 { 0x26, 0x80, C0|C1|C2|C3, "l2_data_rqsts.prefetch.m_state" },		\
    651 { 0x26, 0xF0, C0|C1|C2|C3, "l2_data_rqsts.prefetch.mesi" },		\
    652 { 0x26, 0x20, C0|C1|C2|C3, "l2_data_rqsts.prefetch.s_state" },		\
    653 									\
    654 { 0x27, 0x40, C0|C1|C2|C3, "l2_write.lock.e_state" },			\
    655 { 0x27, 0x10, C0|C1|C2|C3, "l2_write.lock.i_state" },			\
    656 { 0x27, 0x20, C0|C1|C2|C3, "l2_write.lock.s_state" },			\
    657 									\
    658 { 0x27, 0x0E, C0|C1|C2|C3, "l2_write.rfo.hit" },			\
    659 { 0x27, 0x01, C0|C1|C2|C3, "l2_write.rfo.i_state" },			\
    660 { 0x27, 0x08, C0|C1|C2|C3, "l2_write.rfo.m_state" },			\
    661 									\
    662 { 0x27, 0x0F, C0|C1|C2|C3, "l2_write.rfo.mesi" },			\
    663 { 0x27, 0x02, C0|C1|C2|C3, "l2_write.rfo.s_state" },			\
    664 { 0x28, 0x04, C0|C1|C2|C3, "l1d_wb_l2.e_state" },			\
    665 									\
    666 { 0x28, 0x01, C0|C1|C2|C3, "l1d_wb_l2.i_state" },			\
    667 { 0x28, 0x08, C0|C1|C2|C3, "l1d_wb_l2.m_state" },			\
    668 { 0xF0, 0x80, C0|C1|C2|C3, "l2_transactions.any" },			\
    669 									\
    670 { 0xF0, 0x20, C0|C1|C2|C3, "l2_transactions.fill" },			\
    671 { 0xF0, 0x04, C0|C1|C2|C3, "l2_transactions.ifetch" },			\
    672 { 0xF0, 0x10, C0|C1|C2|C3, "l2_transactions.l1d_wb" },			\
    673 									\
    674 { 0xF0, 0x01, C0|C1|C2|C3, "l2_transactions.load" },			\
    675 { 0xF0, 0x08, C0|C1|C2|C3, "l2_transactions.prefetch" },		\
    676 { 0xF0, 0x02, C0|C1|C2|C3, "l2_transactions.rfo" },			\
    677 									\
    678 { 0xF0, 0x40, C0|C1|C2|C3, "l2_transactions.wb" },			\
    679 { 0xF1, 0x07, C0|C1|C2|C3, "l2_lines_in.any" },				\
    680 { 0xF1, 0x04, C0|C1|C2|C3, "l2_lines_in.e_state" },			\
    681 									\
    682 { 0xF1, 0x02, C0|C1|C2|C3, "l2_lines_in.s_state" },			\
    683 { 0xF2, 0x0F, C0|C1|C2|C3, "l2_lines_out.any" },			\
    684 { 0xF2, 0x01, C0|C1|C2|C3, "l2_lines_out.demand_clean" },		\
    685 									\
    686 { 0xF2, 0x02, C0|C1|C2|C3, "l2_lines_out.demand_dirty" },		\
    687 { 0xF2, 0x04, C0|C1|C2|C3, "l2_lines_out.prefetch_clean" },		\
    688 { 0x6C, 0x01, C0|C1|C2|C3, "io_transactions" },				\
    689 									\
    690 { 0xB0, 0x80, C0|C1|C2|C3, "offcore_requests.any" },			\
    691 { 0xB0, 0x10, C0|C1|C2|C3, "offcore_requests.any.rfo" },		\
    692 { 0xB0, 0x40, C0|C1|C2|C3, "offcore_requests.l1d_writeback" },		\
    693 									\
    694 { 0xB8, 0x01, C0|C1|C2|C3, "snoop_response.hit" },			\
    695 { 0xB8, 0x02, C0|C1|C2|C3, "snoop_response.hite" },			\
    696 { 0xB8, 0x04, C0|C1|C2|C3, "snoop_response.hitm" },			\
    697 									\
    698 { 0xF4, 0x10, C0|C1|C2|C3, "sq_misc.split_lock" },			\
    699 { 0x0B, 0x01, C0|C1|C2|C3, "mem_inst_retired.loads" },			\
    700 { 0x0B, 0x02, C0|C1|C2|C3, "mem_inst_retired.stores" },			\
    701 									\
    702 { 0xC0, 0x04, C0|C1|C2|C3, "inst_retired.mmx" },			\
    703 { 0xC0, 0x02, C0|C1|C2|C3, "inst_retired.x87" },			\
    704 { 0xC7, 0x04, C0|C1|C2|C3, "ssex_uops_retired.packed_double" },		\
    705 									\
    706 { 0xC7, 0x01, C0|C1|C2|C3, "ssex_uops_retired.packed_single" },		\
    707 { 0xC7, 0x08, C0|C1|C2|C3, "ssex_uops_retired.scalar_double" },		\
    708 { 0xC7, 0x02, C0|C1|C2|C3, "ssex_uops_retired.scalar_single" },		\
    709 									\
    710 { 0xC7, 0x10, C0|C1|C2|C3, "ssex_uops_retired.vector_integer" },	\
    711 { 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.any" },			\
    712 { 0xC2, 0x04, C0|C1|C2|C3, "uops_retired.macro_fused" },		\
    713 									\
    714 { 0xC8, 0x20, C0|C1|C2|C3, "itlb_miss_retired" },			\
    715 { 0xCB, 0x80, C0|C1|C2|C3, "mem_load_retired.dtlb_miss" },		\
    716 { 0xCB, 0x40, C0|C1|C2|C3, "mem_load_retired.hit_lfb" },		\
    717 									\
    718 { 0xCB, 0x01, C0|C1|C2|C3, "mem_load_retired.l1d_hit" },		\
    719 { 0xCB, 0x02, C0|C1|C2|C3, "mem_load_retired.l2_hit" },			\
    720 { 0xCB, 0x10, C0|C1|C2|C3, "mem_load_retired.llc_miss" },		\
    721 									\
    722 { 0xCB, 0x04, C0|C1|C2|C3, "mem_load_retired.llc_unshared_hit" },	\
    723 { 0xCB, 0x08, C0|C1|C2|C3, "mem_load_retired.other_core_l2_hit_hitm" },	\
    724 { 0x0F, 0x02, C0|C1|C2|C3, "mem_uncore_retired.other_core_l2_hitm" },	\
    725 									\
    726 { 0x0F, 0x08, C0|C1|C2|C3, "mem_uncore_retired.remote_cache_local_home_hit" },\
    727 { 0x0F, 0x10, C0|C1|C2|C3, "mem_uncore_retired.remote_dram" },		\
    728 { 0x0F, 0x20, C0|C1|C2|C3, "mem_uncore_retired.local_dram" },		\
    729 									\
    730 { 0x0C, 0x01, C0|C1|C2|C3, "mem_store_retired.dtlb_miss" },		\
    731 { 0xC4, 0x01, C0|C1|C2|C3, "br_inst_retired.conditional" },		\
    732 { 0xC4, 0x02, C0|C1|C2|C3, "br_inst_retired.near_call" },		\
    733 									\
    734 { 0xC5, 0x02, C0|C1|C2|C3, "br_misp_retired.near_call" },		\
    735 { 0xDB, 0x01, C0|C1|C2|C3, "uop_unfusion" },				\
    736 { 0xF7, 0x01, C0|C1|C2|C3, "fp_assist.all" },				\
    737 									\
    738 { 0xF7, 0x04, C0|C1|C2|C3, "fp_assist.input" },				\
    739 { 0xF7, 0x02, C0|C1|C2|C3, "fp_assist.output" },			\
    740 { 0xCC, 0x03, C0|C1|C2|C3, "fp_mmx_trans.any" },			\
    741 									\
    742 { 0xCC, 0x01, C0|C1|C2|C3, "fp_mmx_trans.to_fp" },			\
    743 { 0xCC, 0x02, C0|C1|C2|C3, "fp_mmx_trans.to_mmx" },			\
    744 { 0xC3, 0x04, C0|C1|C2|C3, "machine_clears.smc" }
    745 
    746 #define	EVENTS_FAM6_MOD28						\
    747 	{ 0x2,  0x81, C0|C1, "store_forwards.good" },                   \
    748 	{ 0x6,  0x0,  C0|C1, "segment_reg_loads.any" },                 \
    749 	{ 0x7,  0x1,  C0|C1, "prefetch.prefetcht0" },                   \
    750 	{ 0x7,  0x6,  C0|C1, "prefetch.sw_l2" },                        \
    751 	{ 0x7,  0x8,  C0|C1, "prefetch.prefetchnta" },                  \
    752 	{ 0x8,  0x7,  C0|C1, "data_tlb_misses.dtlb_miss" },             \
    753 	{ 0x8,  0x5,  C0|C1, "data_tlb_misses.dtlb_miss_ld" },          \
    754 	{ 0x8,  0x9,  C0|C1, "data_tlb_misses.l0_dtlb_miss_ld" },	\
    755 	{ 0x8,  0x6,  C0|C1, "data_tlb_misses.dtlb_miss_st" },          \
    756 	{ 0xC,  0x3,  C0|C1, "page_walks.cycles" },                     \
    757 	{ 0x10, 0x1,  C0|C1, "x87_comp_ops_exe.any.s" },                \
    758 	{ 0x10, 0x81, C0|C1, "x87_comp_ops_exe.any.ar" },               \
    759 	{ 0x11, 0x1,  C0|C1, "fp_assist" },                             \
    760 	{ 0x11, 0x81, C0|C1, "fp_assist.ar" },                          \
    761 	{ 0x12, 0x1,  C0|C1, "mul.s" },                                 \
    762 	{ 0x12, 0x81, C0|C1, "mul.ar" },                                \
    763 	{ 0x13, 0x1,  C0|C1, "div.s" },                                 \
    764 	{ 0x13, 0x81, C0|C1, "div.ar" },                                \
    765 	{ 0x14, 0x1,  C0|C1, "cycles_div_busy" },                       \
    766 	{ 0x21, 0x0,  C0|C1, "l2_ads" },                      		\
    767 	{ 0x22, 0x0,  C0|C1, "l2_dbus_busy" },                		\
    768 	{ 0x24, 0x0,  C0|C1, "l2_lines_in" },   			\
    769 	{ 0x25, 0x0,  C0|C1, "l2_m_lines_in" },               		\
    770 	{ 0x26, 0x0,  C0|C1, "l2_lines_out" },  			\
    771 	{ 0x27, 0x0,  C0|C1, "l2_m_lines_out" },			\
    772 	{ 0x28, 0x0,  C0|C1, "l2_ifetch" },  				\
    773 	{ 0x29, 0x0,  C0|C1, "l2_ld" },					\
    774 	{ 0x2A, 0x0,  C0|C1, "l2_st" },      				\
    775 	{ 0x2B, 0x0,  C0|C1, "l2_lock" },    				\
    776 	{ 0x2E, 0x0,  C0|C1, "l2_rqsts" },             			\
    777 	{ 0x2E, 0x41, C0|C1, "l2_rqsts.self.demand.i_state" },		\
    778 	{ 0x2E, 0x4F, C0|C1, "l2_rqsts.self.demand.mesi" },		\
    779 	{ 0x30, 0x0,  C0|C1, "l2_reject_bus_q" },			\
    780 	{ 0x32, 0x0,  C0|C1, "l2_no_req" },                   		\
    781 	{ 0x3A, 0x0,  C0|C1, "eist_trans" },                            \
    782 	{ 0x3B, 0xC0, C0|C1, "thermal_trip" },                          \
    783 	{ 0x3C, 0x0,  C0|C1, "cpu_clk_unhalted.core_p" },               \
    784 	{ 0x3C, 0x1,  C0|C1, "cpu_clk_unhalted.bus" },                  \
    785 	{ 0x3C, 0x2,  C0|C1, "cpu_clk_unhalted.no_other" },             \
    786 	{ 0x40, 0x21, C0|C1, "l1d_cache.ld" },                          \
    787 	{ 0x40, 0x22, C0|C1, "l1d_cache.st" },                          \
    788 	{ 0x60, 0x0,  C0|C1, "bus_request_outstanding" },		\
    789 	{ 0x61, 0x0,  C0|C1, "bus_bnr_drv" },                		\
    790 	{ 0x62, 0x0,  C0|C1, "bus_drdy_clocks" },            		\
    791 	{ 0x63, 0x0,  C0|C1, "bus_lock_clocks" },  			\
    792 	{ 0x64, 0x0,  C0|C1, "bus_data_rcv" },                		\
    793 	{ 0x65, 0x0,  C0|C1, "bus_trans_brd" },    			\
    794 	{ 0x66, 0x0,  C0|C1, "bus_trans_rfo" },    			\
    795 	{ 0x67, 0x0,  C0|C1, "bus_trans_wb" },     			\
    796 	{ 0x68, 0x0,  C0|C1, "bus_trans_ifetch" }, 			\
    797 	{ 0x69, 0x0,  C0|C1, "bus_trans_inval" },  			\
    798 	{ 0x6A, 0x0,  C0|C1, "bus_trans_pwr" },				\
    799 	{ 0x6B, 0x0,  C0|C1, "bus_trans_p" },      			\
    800 	{ 0x6C, 0x0,  C0|C1, "bus_trans_io" },     			\
    801 	{ 0x6D, 0x0,  C0|C1, "bus_trans_def" },    			\
    802 	{ 0x6E, 0x0,  C0|C1, "bus_trans_burst" },  			\
    803 	{ 0x6F, 0x0,  C0|C1, "bus_trans_mem" },    			\
    804 	{ 0x70, 0x0,  C0|C1, "bus_trans_any" },    			\
    805 	{ 0x77, 0x0,  C0|C1, "ext_snoop" },     			\
    806 	{ 0x7A, 0x0,  C0|C1, "bus_hit_drv" },                		\
    807 	{ 0x7B, 0x0,  C0|C1, "bus_hitm_drv" },               		\
    808 	{ 0x7D, 0x0,  C0|C1, "busq_empty" },                  		\
    809 	{ 0x7E, 0x0,  C0|C1, "snoop_stall_drv" },  			\
    810 	{ 0x7F, 0x0,  C0|C1, "bus_io_wait" },				\
    811 	{ 0x80, 0x3,  C0|C1, "icache.accesses" },                       \
    812 	{ 0x80, 0x2,  C0|C1, "icache.misses" },                         \
    813 	{ 0x82, 0x4,  C0|C1, "itlb.flush" },                            \
    814 	{ 0x82, 0x2,  C0|C1, "itlb.misses" },                           \
    815 	{ 0xAA, 0x2,  C0|C1, "macro_insts.cisc_decoded" },              \
    816 	{ 0xAA, 0x3,  C0|C1, "macro_insts.all_decoded" },               \
    817 	{ 0xB0, 0x0,  C0|C1, "simd_uops_exec.s" },                      \
    818 	{ 0xB0, 0x80, C0|C1, "simd_uops_exec.ar" },                     \
    819 	{ 0xB1, 0x0,  C0|C1, "simd_sat_uop_exec.s" },                   \
    820 	{ 0xB1, 0x80, C0|C1, "simd_sat_uop_exec.ar" },                  \
    821 	{ 0xB3, 0x1,  C0|C1, "simd_uop_type_exec.mul.s" },              \
    822 	{ 0xB3, 0x81, C0|C1, "simd_uop_type_exec.mul.ar" },             \
    823 	{ 0xB3, 0x02, C0|C1, "simd_uop_type_exec.shift.s" },            \
    824 	{ 0xB3, 0x82, C0|C1, "simd_uop_type_exec.shift.ar" },           \
    825 	{ 0xB3, 0x04, C0|C1, "simd_uop_type_exec.pack.s" },             \
    826 	{ 0xB3, 0x84, C0|C1, "simd_uop_type_exec.pack.ar" },            \
    827 	{ 0xB3, 0x08, C0|C1, "simd_uop_type_exec.unpack.s" },           \
    828 	{ 0xB3, 0x88, C0|C1, "simd_uop_type_exec.unpack.ar" },          \
    829 	{ 0xB3, 0x10, C0|C1, "simd_uop_type_exec.logical.s" },          \
    830 	{ 0xB3, 0x90, C0|C1, "simd_uop_type_exec.logical.ar" },         \
    831 	{ 0xB3, 0x20, C0|C1, "simd_uop_type_exec.arithmetic.s" },       \
    832 	{ 0xB3, 0xA0, C0|C1, "simd_uop_type_exec.arithmetic.ar" },      \
    833 	{ 0xC2, 0x10, C0|C1, "uops_retired.any" },                      \
    834 	{ 0xC3, 0x1,  C0|C1, "machine_clears.smc" },                    \
    835 	{ 0xC4, 0x0,  C0|C1, "br_inst_retired.any" },                   \
    836 	{ 0xC4, 0x1,  C0|C1, "br_inst_retired.pred_not_taken" },        \
    837 	{ 0xC4, 0x2,  C0|C1, "br_inst_retired.mispred_not_taken" },     \
    838 	{ 0xC4, 0x4,  C0|C1, "br_inst_retired.pred_taken" },            \
    839 	{ 0xC4, 0x8,  C0|C1, "br_inst_retired.mispred_taken" },         \
    840 	{ 0xC4, 0xA,  C0|C1, "br_inst_retired.mispred" },               \
    841 	{ 0xC4, 0xC,  C0|C1, "br_inst_retired.taken" },                 \
    842 	{ 0xC4, 0xF,  C0|C1, "br_inst_retired.any1" },                  \
    843 	{ 0xC6, 0x1,  C0|C1, "cycles_int_masked.cycles_int_masked" },   \
    844 	{ 0xC6, 0x2,  C0|C1,						\
    845 		"cycles_int_masked.cycles_int_pending_and_masked" },	\
    846 	{ 0xC7, 0x1,  C0|C1, "simd_inst_retired.packed_single" },       \
    847 	{ 0xC7, 0x2,  C0|C1, "simd_inst_retired.scalar_single" },      	\
    848 	{ 0xC7, 0x4,  C0|C1, "simd_inst_retired.packed_double" },       \
    849 	{ 0xC7, 0x8,  C0|C1, "simd_inst_retired.scalar_double" },       \
    850 	{ 0xC7, 0x10, C0|C1, "simd_inst_retired.vector" },              \
    851 	{ 0xC7, 0x1F, C0|C1, "simd_inst_retired.any" },                 \
    852 	{ 0xC8, 0x00, C0|C1, "hw_int_rcv" },                            \
    853 	{ 0xCA, 0x1,  C0|C1, "simd_comp_inst_retired.packed_single" },  \
    854 	{ 0xCA, 0x2,  C0|C1, "simd_comp_inst_retired.scalar_single" }, 	\
    855 	{ 0xCA, 0x4,  C0|C1, "simd_comp_inst_retired.packed_double" },  \
    856 	{ 0xCA, 0x8,  C0|C1, "simd_comp_inst_retired.scalar_double" },  \
    857 	{ 0xCB, 0x1,  C0|C1, "mem_load_retired.l2_hit" },               \
    858 	{ 0xCB, 0x2,  C0|C1, "mem_load_retired.l2_miss" },              \
    859 	{ 0xCB, 0x4,  C0|C1, "mem_load_retired.dtlb_miss" },           	\
    860 	{ 0xCD, 0x0,  C0|C1, "simd_assist" },                           \
    861 	{ 0xCE, 0x0,  C0|C1, "simd_instr_retired" },                    \
    862 	{ 0xCF, 0x0,  C0|C1, "simd_sat_instr_retired" },                \
    863 	{ 0xE0, 0x1,  C0|C1, "br_inst_decoded" },                       \
    864 	{ 0xE4, 0x1,  C0|C1, "bogus_br" },                             	\
    865 	{ 0xE6, 0x1,  C0|C1, "baclears.any" }
    866 
    867 static const struct events_table_t *events_table = NULL;
    868 
    869 const struct events_table_t events_fam6_mod26[] = {
    870 	EVENTS_FAM6_MOD26,
    871 	{ NT_END, 0, 0, "" }
    872 };
    873 
    874 const struct events_table_t events_fam6_mod28[] = {
    875 	EVENTS_FAM6_MOD28,
    876 	{ NT_END, 0, 0, "" }
    877 };
    878 
    879 /*
    880  * Initialize string containing list of supported general-purpose counter
    881  * events for processors of Penryn and Merom Family
    882  */
    883 static void
    884 pcbe_init_core_uarch()
    885 {
    886 	const struct nametable_core_uarch	*n;
    887 	const struct nametable_core_uarch	*picspecific_events;
    888 	size_t			common_size;
    889 	size_t			size;
    890 	uint64_t		i;
    891 
    892 	gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
    893 
    894 	/* Calculate space needed to save all the common event names */
    895 	common_size = 0;
    896 	for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END; n++) {
    897 		common_size += strlen(n->name) + 1;
    898 	}
    899 
    900 	for (i = 0; i < num_gpc; i++) {
    901 		size = 0;
    902 		switch (i) {
    903 			case 0:
    904 				picspecific_events = pic0_events;
    905 				break;
    906 			case 1:
    907 				picspecific_events = pic1_events;
    908 				break;
    909 			default:
    910 				picspecific_events = NULL;
    911 				break;
    912 		}
    913 		if (picspecific_events != NULL) {
    914 			for (n = picspecific_events;
    915 			    n->event_num != NT_END;
    916 			    n++) {
    917 				size += strlen(n->name) + 1;
    918 			}
    919 		}
    920 
    921 		gpc_names[i] =
    922 		    kmem_alloc(size + common_size + 1, KM_SLEEP);
    923 
    924 		gpc_names[i][0] = '\0';
    925 		if (picspecific_events != NULL) {
    926 			for (n = picspecific_events;
    927 			    n->event_num != NT_END;
    928 			    n++) {
    929 				(void) strcat(gpc_names[i], n->name);
    930 				(void) strcat(gpc_names[i], ",");
    931 			}
    932 		}
    933 		for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END;
    934 		    n++) {
    935 			(void) strcat(gpc_names[i], n->name);
    936 			(void) strcat(gpc_names[i], ",");
    937 		}
    938 		/*
    939 		 * Remove trailing comma.
    940 		 */
    941 		gpc_names[i][common_size + size - 1] = '\0';
    942 	}
    943 }
    944 
    945 static int
    946 core_pcbe_init(void)
    947 {
    948 	struct cpuid_regs	cp;
    949 	size_t			size;
    950 	uint64_t		i;
    951 	uint64_t		j;
    952 	uint64_t		arch_events_vector_length;
    953 	size_t			arch_events_string_length;
    954 
    955 	if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
    956 		return (-1);
    957 
    958 	/* Obtain Basic CPUID information */
    959 	cp.cp_eax = 0x0;
    960 	(void) __cpuid_insn(&cp);
    961 
    962 	/* No Architectural Performance Monitoring Leaf returned by CPUID */
    963 	if (cp.cp_eax < 0xa) {
    964 		return (-1);
    965 	}
    966 
    967 	/* Obtain the Architectural Performance Monitoring Leaf */
    968 	cp.cp_eax = 0xa;
    969 	(void) __cpuid_insn(&cp);
    970 
    971 	versionid = cp.cp_eax & 0xFF;
    972 
    973 	/*
    974 	 * Fixed-Function Counters (FFC)
    975 	 *
    976 	 * All Family 6 Model 15 and Model 23 processors have fixed-function
    977 	 * counters.  These counters were made Architectural with
    978 	 * Family 6 Model 15 Stepping 9.
    979 	 */
    980 	switch (versionid) {
    981 
    982 		case 0:
    983 			return (-1);
    984 
    985 		case 2:
    986 			num_ffc = cp.cp_edx & 0x1F;
    987 			width_ffc = (cp.cp_edx >> 5) & 0xFF;
    988 
    989 			/*
    990 			 * Some processors have an errata (AW34) where
    991 			 * versionid is reported as 2 when actually 1.
    992 			 * In this case, fixed-function counters are
    993 			 * model-specific as in Version 1.
    994 			 */
    995 			if (num_ffc != 0) {
    996 				break;
    997 			}
    998 			/* FALLTHROUGH */
    999 		case 1:
   1000 			num_ffc = 3;
   1001 			width_ffc = 40;
   1002 			versionid = 1;
   1003 			break;
   1004 
   1005 		default:
   1006 			num_ffc = cp.cp_edx & 0x1F;
   1007 			width_ffc = (cp.cp_edx >> 5) & 0xFF;
   1008 			break;
   1009 	}
   1010 
   1011 
   1012 	if (num_ffc >= 64)
   1013 		return (-1);
   1014 
   1015 	/* Set HTT-specific names of architectural & FFC events */
   1016 	if (x86_feature & X86_HTT) {
   1017 		ffc_names = ffc_names_htt;
   1018 		arch_events_table = arch_events_table_htt;
   1019 		known_arch_events =
   1020 		    sizeof (arch_events_table_htt) /
   1021 		    sizeof (struct events_table_t);
   1022 		known_ffc_num =
   1023 		    sizeof (ffc_names_htt) / sizeof (char *);
   1024 	} else {
   1025 		ffc_names = ffc_names_non_htt;
   1026 		arch_events_table = arch_events_table_non_htt;
   1027 		known_arch_events =
   1028 		    sizeof (arch_events_table_non_htt) /
   1029 		    sizeof (struct events_table_t);
   1030 		known_ffc_num =
   1031 		    sizeof (ffc_names_non_htt) / sizeof (char *);
   1032 	}
   1033 
   1034 	if (num_ffc >= known_ffc_num) {
   1035 		/*
   1036 		 * The system seems to have more fixed-function counters than
   1037 		 * what this PCBE is able to handle correctly.  Default to the
   1038 		 * maximum number of fixed-function counters that this driver
   1039 		 * is aware of.
   1040 		 */
   1041 		num_ffc = known_ffc_num - 1;
   1042 	}
   1043 
   1044 	mask_ffc = BITMASK_XBITS(width_ffc);
   1045 	control_ffc = BITMASK_XBITS(num_ffc);
   1046 
   1047 	/*
   1048 	 * General Purpose Counters (GPC)
   1049 	 */
   1050 	num_gpc = (cp.cp_eax >> 8) & 0xFF;
   1051 	width_gpc = (cp.cp_eax >> 16) & 0xFF;
   1052 
   1053 	if (num_gpc >= 64)
   1054 		return (-1);
   1055 
   1056 	mask_gpc = BITMASK_XBITS(width_gpc);
   1057 
   1058 	control_gpc = BITMASK_XBITS(num_gpc);
   1059 
   1060 	control_mask = (control_ffc << 32) | control_gpc;
   1061 
   1062 	total_pmc = num_gpc + num_ffc;
   1063 	if (total_pmc > 64) {
   1064 		/* Too wide for the overflow bitmap */
   1065 		return (-1);
   1066 	}
   1067 
   1068 	/* GPC events for Family 6 Models 15 & 23 only */
   1069 	if ((cpuid_getfamily(CPU) == 6) &&
   1070 	    ((cpuid_getmodel(CPU) == 15) || (cpuid_getmodel(CPU) == 23) ||
   1071 	    (cpuid_getmodel(CPU) == 29))) {
   1072 		(void) snprintf(core_impl_name, IMPL_NAME_LEN,
   1073 		    "Core Microarchitecture");
   1074 		pcbe_init_core_uarch();
   1075 		return (0);
   1076 	}
   1077 
   1078 	(void) snprintf(core_impl_name, IMPL_NAME_LEN,
   1079 	    "Intel Arch PerfMon v%d on Family %d Model %d",
   1080 	    versionid, cpuid_getfamily(CPU), cpuid_getmodel(CPU));
   1081 
   1082 	/*
   1083 	 * Architectural events
   1084 	 */
   1085 	arch_events_vector_length = (cp.cp_eax >> 24) & 0xFF;
   1086 
   1087 	ASSERT(known_arch_events == arch_events_vector_length);
   1088 
   1089 	/*
   1090 	 * To handle the case where a new performance monitoring setup is run
   1091 	 * on a non-debug kernel
   1092 	 */
   1093 	if (known_arch_events > arch_events_vector_length) {
   1094 		known_arch_events = arch_events_vector_length;
   1095 	} else {
   1096 		arch_events_vector_length = known_arch_events;
   1097 	}
   1098 
   1099 	arch_events_vector = cp.cp_ebx &
   1100 	    BITMASK_XBITS(arch_events_vector_length);
   1101 
   1102 	/*
   1103 	 * Process architectural and non-architectural events using GPC
   1104 	 */
   1105 	if (num_gpc > 0) {
   1106 
   1107 		gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
   1108 
   1109 		/* Calculate space required for the architectural gpc events */
   1110 		arch_events_string_length = 0;
   1111 		for (i = 0; i < known_arch_events; i++) {
   1112 			if (((1U << i) & arch_events_vector) == 0) {
   1113 				arch_events_string_length +=
   1114 				    strlen(arch_events_table[i].name) + 1;
   1115 			}
   1116 		}
   1117 
   1118 		/* Non-architectural events list */
   1119 		if (cpuid_getmodel(CPU) == 26) {
   1120 			events_table = events_fam6_mod26;
   1121 		} else if (cpuid_getmodel(CPU) == 28) {
   1122 			events_table = events_fam6_mod28;
   1123 		}
   1124 
   1125 		for (i = 0; i < num_gpc; i++) {
   1126 
   1127 			/*
   1128 			 * Determine length of all supported event names
   1129 			 * (architectural + non-architectural)
   1130 			 */
   1131 			size = arch_events_string_length;
   1132 			for (j = 0; events_table != NULL &&
   1133 			    events_table[j].eventselect != NT_END;
   1134 			    j++) {
   1135 				if (C(i) & events_table[j].supported_counters) {
   1136 					size += strlen(events_table[j].name) +
   1137 					    1;
   1138 				}
   1139 			}
   1140 
   1141 			/* Allocate memory for this pics list */
   1142 			gpc_names[i] = kmem_alloc(size + 1, KM_SLEEP);
   1143 			gpc_names[i][0] = '\0';
   1144 			if (size == 0) {
   1145 				continue;
   1146 			}
   1147 
   1148 			/*
   1149 			 * Create the list of all supported events
   1150 			 * (architectural + non-architectural)
   1151 			 */
   1152 			for (j = 0; j < known_arch_events; j++) {
   1153 				if (((1U << j) & arch_events_vector) == 0) {
   1154 					(void) strcat(gpc_names[i],
   1155 					    arch_events_table[j].name);
   1156 					(void) strcat(gpc_names[i], ",");
   1157 				}
   1158 			}
   1159 
   1160 			for (j = 0; events_table != NULL &&
   1161 			    events_table[j].eventselect != NT_END;
   1162 			    j++) {
   1163 				if (C(i) & events_table[j].supported_counters) {
   1164 					(void) strcat(gpc_names[i],
   1165 					    events_table[j].name);
   1166 					(void) strcat(gpc_names[i], ",");
   1167 				}
   1168 			}
   1169 
   1170 			/* Remove trailing comma */
   1171 			gpc_names[i][size - 1] = '\0';
   1172 		}
   1173 	}
   1174 	/*
   1175 	 * Fixed-function Counters (FFC) are already listed individually in
   1176 	 * ffc_names[]
   1177 	 */
   1178 	return (0);
   1179 }
   1180 
   1181 static uint_t core_pcbe_ncounters()
   1182 {
   1183 	return (total_pmc);
   1184 }
   1185 
   1186 static const char *core_pcbe_impl_name(void)
   1187 {
   1188 	return (core_impl_name);
   1189 }
   1190 
   1191 static const char *core_pcbe_cpuref(void)
   1192 {
   1193 	return (core_cpuref);
   1194 }
   1195 
   1196 static char *core_pcbe_list_events(uint_t picnum)
   1197 {
   1198 	ASSERT(picnum < cpc_ncounters);
   1199 
   1200 	if (picnum < num_gpc) {
   1201 		return (gpc_names[picnum]);
   1202 	} else {
   1203 		return (ffc_names[picnum - num_gpc]);
   1204 	}
   1205 }
   1206 
   1207 static char *core_pcbe_list_attrs(void)
   1208 {
   1209 	if (versionid >= 3) {
   1210 		return ("edge,inv,umask,cmask,anythr");
   1211 	} else {
   1212 		return ("edge,pc,inv,umask,cmask");
   1213 	}
   1214 }
   1215 
   1216 static const struct nametable_core_uarch *
   1217 find_gpcevent_core_uarch(char *name,
   1218     const struct nametable_core_uarch *nametable)
   1219 {
   1220 	const struct nametable_core_uarch *n;
   1221 	int compare_result = -1;
   1222 
   1223 	for (n = nametable; n->event_num != NT_END; n++) {
   1224 		compare_result = strcmp(name, n->name);
   1225 		if (compare_result <= 0) {
   1226 			break;
   1227 		}
   1228 	}
   1229 
   1230 	if (compare_result == 0) {
   1231 		return (n);
   1232 	}
   1233 
   1234 	return (NULL);
   1235 }
   1236 
   1237 static const struct events_table_t *
   1238 find_gpcevent(char *name)
   1239 {
   1240 	int i;
   1241 
   1242 	/* Search architectural events */
   1243 	for (i = 0; i < known_arch_events; i++) {
   1244 		if (strcmp(name, arch_events_table[i].name) == 0) {
   1245 			if (((1U << i) & arch_events_vector) == 0) {
   1246 				return (&arch_events_table[i]);
   1247 			}
   1248 		}
   1249 	}
   1250 
   1251 	/* Search non-architectural events */
   1252 	if (events_table != NULL) {
   1253 		for (i = 0; events_table[i].eventselect != NT_END; i++) {
   1254 			if (strcmp(name, events_table[i].name) == 0) {
   1255 				return (&events_table[i]);
   1256 			}
   1257 		}
   1258 	}
   1259 
   1260 	return (NULL);
   1261 }
   1262 static uint64_t
   1263 core_pcbe_event_coverage(char *event)
   1264 {
   1265 	uint64_t bitmap;
   1266 	uint64_t bitmask;
   1267 	const struct events_table_t *n;
   1268 	int i;
   1269 
   1270 	bitmap = 0;
   1271 
   1272 	/* Is it an event that a GPC can track? */
   1273 	if (versionid >= 3) {
   1274 		n = find_gpcevent(event);
   1275 		if (n != NULL) {
   1276 			bitmap |= (n->supported_counters &
   1277 			    BITMASK_XBITS(num_gpc));
   1278 		}
   1279 	} else {
   1280 		if (find_gpcevent_core_uarch(event, cmn_gpc_events_core_uarch)
   1281 		    != NULL) {
   1282 			bitmap |= BITMASK_XBITS(num_gpc);
   1283 		} else if (find_gpcevent_core_uarch(event, pic0_events) !=
   1284 		    NULL) {
   1285 			bitmap |= 1ULL;
   1286 		} else if (find_gpcevent_core_uarch(event, pic1_events) !=
   1287 		    NULL) {
   1288 			bitmap |= 1ULL << 1;
   1289 		}
   1290 	}
   1291 
   1292 	/* Check if the event can be counted in the fixed-function counters */
   1293 	if (num_ffc > 0) {
   1294 		bitmask = 1ULL << num_gpc;
   1295 		for (i = 0; i < num_ffc; i++) {
   1296 			if (strcmp(event, ffc_names[i]) == 0) {
   1297 				bitmap |= bitmask;
   1298 			}
   1299 			bitmask = bitmask << 1;
   1300 		}
   1301 	}
   1302 
   1303 	return (bitmap);
   1304 }
   1305 
   1306 static uint64_t
   1307 core_pcbe_overflow_bitmap(void)
   1308 {
   1309 	uint64_t interrupt_status;
   1310 	uint64_t intrbits_ffc;
   1311 	uint64_t intrbits_gpc;
   1312 	extern int kcpc_hw_overflow_intr_installed;
   1313 	uint64_t overflow_bitmap;
   1314 
   1315 	RDMSR(PERF_GLOBAL_STATUS, interrupt_status);
   1316 	WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status);
   1317 
   1318 	interrupt_status = interrupt_status & control_mask;
   1319 	intrbits_ffc = (interrupt_status >> 32) & control_ffc;
   1320 	intrbits_gpc = interrupt_status & control_gpc;
   1321 	overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc;
   1322 
   1323 	ASSERT(kcpc_hw_overflow_intr_installed);
   1324 	(*kcpc_hw_enable_cpc_intr)();
   1325 
   1326 	return (overflow_bitmap);
   1327 }
   1328 
   1329 static int
   1330 check_cpc_securitypolicy(core_pcbe_config_t *conf,
   1331     const struct nametable_core_uarch *n)
   1332 {
   1333 	if (conf->core_ctl & n->restricted_bits) {
   1334 		if (secpolicy_cpc_cpu(crgetcred()) != 0) {
   1335 			return (CPC_ATTR_REQUIRES_PRIVILEGE);
   1336 		}
   1337 	}
   1338 	return (0);
   1339 }
   1340 
   1341 static int
   1342 configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
   1343     uint_t nattrs, kcpc_attr_t *attrs, void **data)
   1344 {
   1345 	core_pcbe_config_t	conf;
   1346 	const struct nametable_core_uarch	*n;
   1347 	const struct nametable_core_uarch	*m;
   1348 	const struct nametable_core_uarch	*picspecific_events;
   1349 	struct nametable_core_uarch	nt_raw = { "", 0x0, 0x0 };
   1350 	uint_t			i;
   1351 	long			event_num;
   1352 	const struct events_table_t *eventcode;
   1353 
   1354 	if (((preset & BITS_EXTENDED_FROM_31) != 0) &&
   1355 	    ((preset & BITS_EXTENDED_FROM_31) !=
   1356 	    BITS_EXTENDED_FROM_31)) {
   1357 
   1358 		/*
   1359 		 * Bits beyond bit-31 in the general-purpose counters can only
   1360 		 * be written to by extension of bit 31.  We cannot preset
   1361 		 * these bits to any value other than all 1s or all 0s.
   1362 		 */
   1363 		return (CPC_ATTRIBUTE_OUT_OF_RANGE);
   1364 	}
   1365 
   1366 	if (versionid >= 3) {
   1367 		eventcode = find_gpcevent(event);
   1368 		if (eventcode != NULL) {
   1369 			if ((C(picnum) & eventcode->supported_counters) == 0) {
   1370 				return (CPC_PIC_NOT_CAPABLE);
   1371 			}
   1372 			conf.core_ctl = eventcode->eventselect;
   1373 			conf.core_ctl |= eventcode->unitmask <<
   1374 			    CORE_UMASK_SHIFT;
   1375 		} else {
   1376 			/* Event specified as raw event code */
   1377 			if (ddi_strtol(event, NULL, 0, &event_num) != 0) {
   1378 				return (CPC_INVALID_EVENT);
   1379 			}
   1380 			conf.core_ctl = event_num & 0xFF;
   1381 		}
   1382 	} else {
   1383 		n = find_gpcevent_core_uarch(event, cmn_gpc_events_core_uarch);
   1384 		if (n == NULL) {
   1385 			switch (picnum) {
   1386 				case 0:
   1387 					picspecific_events = pic0_events;
   1388 					break;
   1389 				case 1:
   1390 					picspecific_events = pic1_events;
   1391 					break;
   1392 				default:
   1393 					picspecific_events = NULL;
   1394 					break;
   1395 			}
   1396 			if (picspecific_events != NULL) {
   1397 				n = find_gpcevent_core_uarch(event,
   1398 				    picspecific_events);
   1399 			}
   1400 		}
   1401 		if (n == NULL) {
   1402 			/*
   1403 			 * Check if this is a case where the event was
   1404 			 * specified directly by its event number instead of
   1405 			 * its name string.
   1406 			 */
   1407 			if (ddi_strtol(event, NULL, 0, &event_num) != 0) {
   1408 				return (CPC_INVALID_EVENT);
   1409 			}
   1410 
   1411 			event_num = event_num & 0xFF;
   1412 
   1413 			/*
   1414 			 * Search the event table to find out if the event
   1415 			 * specified has an privilege requirements.  Currently
   1416 			 * none of the pic-specific counters have any privilege
   1417 			 * requirements.  Hence only the table
   1418 			 * cmn_gpc_events_core_uarch is searched.
   1419 			 */
   1420 			for (m = cmn_gpc_events_core_uarch;
   1421 			    m->event_num != NT_END;
   1422 			    m++) {
   1423 				if (event_num == m->event_num) {
   1424 					break;
   1425 				}
   1426 			}
   1427 			if (m->event_num == NT_END) {
   1428 				nt_raw.event_num = (uint8_t)event_num;
   1429 				n = &nt_raw;
   1430 			} else {
   1431 				n = m;
   1432 			}
   1433 		}
   1434 		conf.core_ctl = n->event_num; /* Event Select */
   1435 	}
   1436 
   1437 
   1438 	conf.core_picno = picnum;
   1439 	conf.core_pictype = CORE_GPC;
   1440 	conf.core_rawpic = preset & mask_gpc;
   1441 
   1442 	conf.core_pes = GPC_BASE_PES + picnum;
   1443 	conf.core_pmc = GPC_BASE_PMC + picnum;
   1444 
   1445 	for (i = 0; i < nattrs; i++) {
   1446 		if (strncmp(attrs[i].ka_name, "umask", 6) == 0) {
   1447 			if ((attrs[i].ka_val | CORE_UMASK_MASK) !=
   1448 			    CORE_UMASK_MASK) {
   1449 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
   1450 			}
   1451 			/* Clear out the default umask */
   1452 			conf.core_ctl &= ~ (CORE_UMASK_MASK <<
   1453 			    CORE_UMASK_SHIFT);
   1454 			/* Use the user provided umask */
   1455 			conf.core_ctl |= attrs[i].ka_val <<
   1456 			    CORE_UMASK_SHIFT;
   1457 		} else  if (strncmp(attrs[i].ka_name, "edge", 6) == 0) {
   1458 			if (attrs[i].ka_val != 0)
   1459 				conf.core_ctl |= CORE_EDGE;
   1460 		} else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) {
   1461 			if (attrs[i].ka_val != 0)
   1462 				conf.core_ctl |= CORE_INV;
   1463 		} else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) {
   1464 			if ((attrs[i].ka_val | CORE_CMASK_MASK) !=
   1465 			    CORE_CMASK_MASK) {
   1466 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
   1467 			}
   1468 			conf.core_ctl |= attrs[i].ka_val <<
   1469 			    CORE_CMASK_SHIFT;
   1470 		} else if (strncmp(attrs[i].ka_name, "anythr", 7) ==
   1471 		    0) {
   1472 			if (versionid < 3)
   1473 				return (CPC_INVALID_ATTRIBUTE);
   1474 			if (secpolicy_cpc_cpu(crgetcred()) != 0) {
   1475 				return (CPC_ATTR_REQUIRES_PRIVILEGE);
   1476 			}
   1477 			if (attrs[i].ka_val != 0)
   1478 				conf.core_ctl |= CORE_ANYTHR;
   1479 		} else {
   1480 			return (CPC_INVALID_ATTRIBUTE);
   1481 		}
   1482 	}
   1483 
   1484 	if (flags & CPC_COUNT_USER)
   1485 		conf.core_ctl |= CORE_USR;
   1486 	if (flags & CPC_COUNT_SYSTEM)
   1487 		conf.core_ctl |= CORE_OS;
   1488 	if (flags & CPC_OVF_NOTIFY_EMT)
   1489 		conf.core_ctl |= CORE_INT;
   1490 	conf.core_ctl |= CORE_EN;
   1491 
   1492 	if (versionid < 3) {
   1493 		if (check_cpc_securitypolicy(&conf, n) != 0) {
   1494 			return (CPC_ATTR_REQUIRES_PRIVILEGE);
   1495 		}
   1496 	}
   1497 
   1498 	*data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
   1499 	*((core_pcbe_config_t *)*data) = conf;
   1500 
   1501 	return (0);
   1502 }
   1503 
   1504 static int
   1505 configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
   1506     uint_t nattrs, kcpc_attr_t *attrs, void **data)
   1507 {
   1508 	core_pcbe_config_t	*conf;
   1509 	uint_t			i;
   1510 
   1511 	if (picnum - num_gpc >= num_ffc) {
   1512 		return (CPC_INVALID_PICNUM);
   1513 	}
   1514 
   1515 	if (strcmp(ffc_names[picnum-num_gpc], event) != 0) {
   1516 		return (CPC_INVALID_EVENT);
   1517 	}
   1518 
   1519 	if ((versionid < 3) && (nattrs != 0)) {
   1520 		return (CPC_INVALID_ATTRIBUTE);
   1521 	}
   1522 
   1523 	conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
   1524 	conf->core_ctl = 0;
   1525 
   1526 	for (i = 0; i < nattrs; i++) {
   1527 		if (strncmp(attrs[i].ka_name, "anythr", 7) == 0) {
   1528 			if (secpolicy_cpc_cpu(crgetcred()) != 0) {
   1529 				return (CPC_ATTR_REQUIRES_PRIVILEGE);
   1530 			}
   1531 			if (attrs[i].ka_val != 0) {
   1532 				conf->core_ctl |= CORE_FFC_ANYTHR;
   1533 			}
   1534 		} else {
   1535 			kmem_free(conf, sizeof (core_pcbe_config_t));
   1536 			return (CPC_INVALID_ATTRIBUTE);
   1537 		}
   1538 	}
   1539 
   1540 	conf->core_picno = picnum;
   1541 	conf->core_pictype = CORE_FFC;
   1542 	conf->core_rawpic = preset & mask_ffc;
   1543 	conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc);
   1544 
   1545 	/* All fixed-function counters have the same control register */
   1546 	conf->core_pes = PERF_FIXED_CTR_CTRL;
   1547 
   1548 	if (flags & CPC_COUNT_USER)
   1549 		conf->core_ctl |= CORE_FFC_USR_EN;
   1550 	if (flags & CPC_COUNT_SYSTEM)
   1551 		conf->core_ctl |= CORE_FFC_OS_EN;
   1552 	if (flags & CPC_OVF_NOTIFY_EMT)
   1553 		conf->core_ctl |= CORE_FFC_PMI;
   1554 
   1555 	*data = conf;
   1556 	return (0);
   1557 }
   1558 
   1559 /*ARGSUSED*/
   1560 static int
   1561 core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
   1562     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
   1563     void *token)
   1564 {
   1565 	int			ret;
   1566 	core_pcbe_config_t	*conf;
   1567 
   1568 	/*
   1569 	 * If we've been handed an existing configuration, we need only preset
   1570 	 * the counter value.
   1571 	 */
   1572 	if (*data != NULL) {
   1573 		conf = *data;
   1574 		ASSERT(conf->core_pictype == CORE_GPC ||
   1575 		    conf->core_pictype == CORE_FFC);
   1576 		if (conf->core_pictype == CORE_GPC)
   1577 			conf->core_rawpic = preset & mask_gpc;
   1578 		else /* CORE_FFC */
   1579 			conf->core_rawpic = preset & mask_ffc;
   1580 		return (0);
   1581 	}
   1582 
   1583 	if (picnum >= total_pmc) {
   1584 		return (CPC_INVALID_PICNUM);
   1585 	}
   1586 
   1587 	if (picnum < num_gpc) {
   1588 		ret = configure_gpc(picnum, event, preset, flags,
   1589 		    nattrs, attrs, data);
   1590 	} else {
   1591 		ret = configure_ffc(picnum, event, preset, flags,
   1592 		    nattrs, attrs, data);
   1593 	}
   1594 	return (ret);
   1595 }
   1596 
   1597 static void
   1598 core_pcbe_program(void *token)
   1599 {
   1600 	core_pcbe_config_t	*cfg;
   1601 	uint64_t		perf_global_ctrl;
   1602 	uint64_t		perf_fixed_ctr_ctrl;
   1603 	uint64_t		curcr4;
   1604 
   1605 	core_pcbe_allstop();
   1606 
   1607 	curcr4 = getcr4();
   1608 	if (kcpc_allow_nonpriv(token))
   1609 		/* Allow RDPMC at any ring level */
   1610 		setcr4(curcr4 | CR4_PCE);
   1611 	else
   1612 		/* Allow RDPMC only at ring 0 */
   1613 		setcr4(curcr4 & ~CR4_PCE);
   1614 
   1615 	/* Clear any overflow indicators before programming the counters */
   1616 	WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask);
   1617 
   1618 	cfg = NULL;
   1619 	perf_global_ctrl = 0;
   1620 	perf_fixed_ctr_ctrl = 0;
   1621 	cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL);
   1622 	while (cfg != NULL) {
   1623 		ASSERT(cfg->core_pictype == CORE_GPC ||
   1624 		    cfg->core_pictype == CORE_FFC);
   1625 
   1626 		if (cfg->core_pictype == CORE_GPC) {
   1627 			/*
   1628 			 * General-purpose counter registers have write
   1629 			 * restrictions where only the lower 32-bits can be
   1630 			 * written to.  The rest of the relevant bits are
   1631 			 * written to by extension from bit 31 (all ZEROS if
   1632 			 * bit-31 is ZERO and all ONE if bit-31 is ONE).  This
   1633 			 * makes it possible to write to the counter register
   1634 			 * only values that have all ONEs or all ZEROs in the
   1635 			 * higher bits.
   1636 			 */
   1637 			if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) ||
   1638 			    ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) ==
   1639 			    BITS_EXTENDED_FROM_31)) {
   1640 				/*
   1641 				 * Straighforward case where the higher bits
   1642 				 * are all ZEROs or all ONEs.
   1643 				 */
   1644 				WRMSR(cfg->core_pmc,
   1645 				    (cfg->core_rawpic & mask_gpc));
   1646 			} else {
   1647 				/*
   1648 				 * The high order bits are not all the same.
   1649 				 * We save what is currently in the registers
   1650 				 * and do not write to it.  When we want to do
   1651 				 * a read from this register later (in
   1652 				 * core_pcbe_sample()), we subtract the value
   1653 				 * we save here to get the actual event count.
   1654 				 *
   1655 				 * NOTE: As a result, we will not get overflow
   1656 				 * interrupts as expected.
   1657 				 */
   1658 				RDMSR(cfg->core_pmc, cfg->core_rawpic);
   1659 				cfg->core_rawpic = cfg->core_rawpic & mask_gpc;
   1660 			}
   1661 			WRMSR(cfg->core_pes, cfg->core_ctl);
   1662 			perf_global_ctrl |= 1ull << cfg->core_picno;
   1663 		} else {
   1664 			/*
   1665 			 * Unlike the general-purpose counters, all relevant
   1666 			 * bits of fixed-function counters can be written to.
   1667 			 */
   1668 			WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc);
   1669 
   1670 			/*
   1671 			 * Collect the control bits for all the
   1672 			 * fixed-function counters and write it at one shot
   1673 			 * later in this function
   1674 			 */
   1675 			perf_fixed_ctr_ctrl |= cfg->core_ctl <<
   1676 			    ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE);
   1677 			perf_global_ctrl |=
   1678 			    1ull << (cfg->core_picno - num_gpc + 32);
   1679 		}
   1680 
   1681 		cfg = (core_pcbe_config_t *)
   1682 		    kcpc_next_config(token, cfg, NULL);
   1683 	}
   1684 
   1685 	/* Enable all the counters */
   1686 	WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl);
   1687 	WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl);
   1688 }
   1689 
   1690 static void
   1691 core_pcbe_allstop(void)
   1692 {
   1693 	/* Disable all the counters together */
   1694 	WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED);
   1695 
   1696 	setcr4(getcr4() & ~CR4_PCE);
   1697 }
   1698 
   1699 static void
   1700 core_pcbe_sample(void *token)
   1701 {
   1702 	uint64_t		*daddr;
   1703 	uint64_t		curpic;
   1704 	core_pcbe_config_t	*cfg;
   1705 	uint64_t			counter_mask;
   1706 
   1707 	cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr);
   1708 	while (cfg != NULL) {
   1709 		ASSERT(cfg->core_pictype == CORE_GPC ||
   1710 		    cfg->core_pictype == CORE_FFC);
   1711 
   1712 		curpic = rdmsr(cfg->core_pmc);
   1713 
   1714 		DTRACE_PROBE4(core__pcbe__sample,
   1715 		    uint64_t, cfg->core_pmc,
   1716 		    uint64_t, curpic,
   1717 		    uint64_t, cfg->core_rawpic,
   1718 		    uint64_t, *daddr);
   1719 
   1720 		if (cfg->core_pictype == CORE_GPC) {
   1721 			counter_mask = mask_gpc;
   1722 		} else {
   1723 			counter_mask = mask_ffc;
   1724 		}
   1725 		curpic = curpic & counter_mask;
   1726 		if (curpic >= cfg->core_rawpic) {
   1727 			*daddr += curpic - cfg->core_rawpic;
   1728 		} else {
   1729 			/* Counter overflowed since our last sample */
   1730 			*daddr += counter_mask - (cfg->core_rawpic - curpic) +
   1731 			    1;
   1732 		}
   1733 		cfg->core_rawpic = *daddr & counter_mask;
   1734 
   1735 		cfg =
   1736 		    (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr);
   1737 	}
   1738 }
   1739 
   1740 static void
   1741 core_pcbe_free(void *config)
   1742 {
   1743 	kmem_free(config, sizeof (core_pcbe_config_t));
   1744 }
   1745 
   1746 static struct modlpcbe core_modlpcbe = {
   1747 	&mod_pcbeops,
   1748 	"Core Performance Counters",
   1749 	&core_pcbe_ops
   1750 };
   1751 
   1752 static struct modlinkage core_modl = {
   1753 	MODREV_1,
   1754 	&core_modlpcbe,
   1755 };
   1756 
   1757 int
   1758 _init(void)
   1759 {
   1760 	if (core_pcbe_init() != 0) {
   1761 		return (ENOTSUP);
   1762 	}
   1763 	return (mod_install(&core_modl));
   1764 }
   1765 
   1766 int
   1767 _fini(void)
   1768 {
   1769 	return (mod_remove(&core_modl));
   1770 }
   1771 
   1772 int
   1773 _info(struct modinfo *mi)
   1774 {
   1775 	return (mod_info(&core_modl, mi));
   1776 }
   1777