Home | History | Annotate | Download | only in pcbe
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * This file contains preset event names from the Performance Application
     28  * Programming Interface v3.5 which included the following notice:
     29  *
     30  *                             Copyright (c) 2005,6
     31  *                           Innovative Computing Labs
     32  *                         Computer Science Department,
     33  *                            University of Tennessee,
     34  *                                 Knoxville, TN.
     35  *                              All Rights Reserved.
     36  *
     37  *
     38  * Redistribution and use in source and binary forms, with or without
     39  * modification, are permitted provided that the following conditions are met:
     40  *
     41  *    * Redistributions of source code must retain the above copyright notice,
     42  *      this list of conditions and the following disclaimer.
     43  *    * Redistributions in binary form must reproduce the above copyright
     44  *      notice, this list of conditions and the following disclaimer in the
     45  *      documentation and/or other materials provided with the distribution.
     46  *    * Neither the name of the University of Tennessee nor the names of its
     47  *      contributors may be used to endorse or promote products derived from
     48  *      this software without specific prior written permission.
     49  *
     50  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     51  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     53  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     54  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     55  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     56  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     57  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     58  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     59  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     60  * POSSIBILITY OF SUCH DAMAGE.
     61  *
     62  *
     63  * This open source software license conforms to the BSD License template.
     64  */
     65 
     66 
     67 /*
     68  * Performance Counter Back-End for Intel processors supporting Architectural
     69  * Performance Monitoring.
     70  */
     71 
     72 #include <sys/cpuvar.h>
     73 #include <sys/param.h>
     74 #include <sys/cpc_impl.h>
     75 #include <sys/cpc_pcbe.h>
     76 #include <sys/modctl.h>
     77 #include <sys/inttypes.h>
     78 #include <sys/systm.h>
     79 #include <sys/cmn_err.h>
     80 #include <sys/x86_archext.h>
     81 #include <sys/sdt.h>
     82 #include <sys/archsystm.h>
     83 #include <sys/privregs.h>
     84 #include <sys/ddi.h>
     85 #include <sys/sunddi.h>
     86 #include <sys/cred.h>
     87 #include <sys/policy.h>
     88 
     89 static int core_pcbe_init(void);
     90 static uint_t core_pcbe_ncounters(void);
     91 static const char *core_pcbe_impl_name(void);
     92 static const char *core_pcbe_cpuref(void);
     93 static char *core_pcbe_list_events(uint_t picnum);
     94 static char *core_pcbe_list_attrs(void);
     95 static uint64_t core_pcbe_event_coverage(char *event);
     96 static uint64_t core_pcbe_overflow_bitmap(void);
     97 static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
     98     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
     99     void *token);
    100 static void core_pcbe_program(void *token);
    101 static void core_pcbe_allstop(void);
    102 static void core_pcbe_sample(void *token);
    103 static void core_pcbe_free(void *config);
    104 
    105 #define	FALSE	0
    106 #define	TRUE	1
    107 
    108 /* Counter Type */
    109 #define	CORE_GPC	0	/* General-Purpose Counter (GPC) */
    110 #define	CORE_FFC	1	/* Fixed-Function Counter (FFC) */
    111 
    112 /* MSR Addresses */
    113 #define	GPC_BASE_PMC		0x00c1	/* First GPC */
    114 #define	GPC_BASE_PES		0x0186	/* First GPC Event Select register */
    115 #define	FFC_BASE_PMC		0x0309	/* First FFC */
    116 #define	PERF_FIXED_CTR_CTRL	0x038d	/* Used to enable/disable FFCs */
    117 #define	PERF_GLOBAL_STATUS	0x038e	/* Overflow status register */
    118 #define	PERF_GLOBAL_CTRL	0x038f	/* Used to enable/disable counting */
    119 #define	PERF_GLOBAL_OVF_CTRL	0x0390	/* Used to clear overflow status */
    120 
    121 /*
    122  * Processor Event Select register fields
    123  */
    124 #define	CORE_USR	(1ULL << 16)	/* Count while not in ring 0 */
    125 #define	CORE_OS		(1ULL << 17)	/* Count while in ring 0 */
    126 #define	CORE_EDGE	(1ULL << 18)	/* Enable edge detection */
    127 #define	CORE_PC		(1ULL << 19)	/* Enable pin control */
    128 #define	CORE_INT	(1ULL << 20)	/* Enable interrupt on overflow */
    129 #define	CORE_EN		(1ULL << 22)	/* Enable counting */
    130 #define	CORE_INV	(1ULL << 23)	/* Invert the CMASK */
    131 #define	CORE_ANYTHR	(1ULL << 21)	/* Count event for any thread on core */
    132 
    133 #define	CORE_UMASK_SHIFT	8
    134 #define	CORE_UMASK_MASK		0xffu
    135 #define	CORE_CMASK_SHIFT	24
    136 #define	CORE_CMASK_MASK		0xffu
    137 
    138 /*
    139  * Fixed-function counter attributes
    140  */
    141 #define	CORE_FFC_OS_EN	(1ULL << 0)	/* Count while not in ring 0 */
    142 #define	CORE_FFC_USR_EN	(1ULL << 1)	/* Count while in ring 1 */
    143 #define	CORE_FFC_ANYTHR	(1ULL << 2)	/* Count event for any thread on core */
    144 #define	CORE_FFC_PMI	(1ULL << 3)	/* Enable interrupt on overflow */
    145 
    146 /*
    147  * Number of bits for specifying each FFC's attributes in the control register
    148  */
    149 #define	CORE_FFC_ATTR_SIZE	4
    150 
    151 /*
    152  * CondChgd and OvfBuffer fields of global status and overflow control registers
    153  */
    154 #define	CONDCHGD	(1ULL << 63)
    155 #define	OVFBUFFER	(1ULL << 62)
    156 #define	MASK_CONDCHGD_OVFBUFFER	(CONDCHGD | OVFBUFFER)
    157 
    158 #define	ALL_STOPPED	0ULL
    159 
    160 #define	BITMASK_XBITS(x)	((1ull << (x)) - 1ull)
    161 
    162 /*
    163  * Only the lower 32-bits can be written to in the general-purpose
    164  * counters.  The higher bits are extended from bit 31; all ones if
    165  * bit 31 is one and all zeros otherwise.
    166  *
    167  * The fixed-function counters do not have this restriction.
    168  */
    169 #define	BITS_EXTENDED_FROM_31	(BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31))
    170 
    171 #define	WRMSR(msr, value)						\
    172 	wrmsr((msr), (value));						\
    173 	DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value));
    174 
    175 #define	RDMSR(msr, value)						\
    176 	(value) = rdmsr((msr));						\
    177 	DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value));
    178 
    179 typedef struct core_pcbe_config {
    180 	uint64_t	core_rawpic;
    181 	uint64_t	core_ctl;	/* Event Select bits */
    182 	uint64_t	core_pmc;	/* Counter register address */
    183 	uint64_t	core_pes;	/* Event Select register address */
    184 	uint_t		core_picno;
    185 	uint8_t		core_pictype;	/* CORE_GPC or CORE_FFC */
    186 } core_pcbe_config_t;
    187 
    188 pcbe_ops_t core_pcbe_ops = {
    189 	PCBE_VER_1,			/* pcbe_ver */
    190 	CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE,	/* pcbe_caps */
    191 	core_pcbe_ncounters,		/* pcbe_ncounters */
    192 	core_pcbe_impl_name,		/* pcbe_impl_name */
    193 	core_pcbe_cpuref,		/* pcbe_cpuref */
    194 	core_pcbe_list_events,		/* pcbe_list_events */
    195 	core_pcbe_list_attrs,		/* pcbe_list_attrs */
    196 	core_pcbe_event_coverage,	/* pcbe_event_coverage */
    197 	core_pcbe_overflow_bitmap,	/* pcbe_overflow_bitmap */
    198 	core_pcbe_configure,		/* pcbe_configure */
    199 	core_pcbe_program,		/* pcbe_program */
    200 	core_pcbe_allstop,		/* pcbe_allstop */
    201 	core_pcbe_sample,		/* pcbe_sample */
    202 	core_pcbe_free			/* pcbe_free */
    203 };
    204 
    205 struct nametable_core_uarch {
    206 	const char	*name;
    207 	uint64_t	restricted_bits;
    208 	uint8_t		event_num;
    209 };
    210 
    211 #define	NT_END	0xFF
    212 
    213 /*
    214  * Counting an event for all cores or all bus agents requires cpc_cpu privileges
    215  */
    216 #define	ALL_CORES	(1ULL << 15)
    217 #define	ALL_AGENTS	(1ULL << 13)
    218 
    219 struct generic_events {
    220 	const char	*name;
    221 	uint8_t		event_num;
    222 	uint8_t		umask;
    223 };
    224 
    225 static const struct generic_events cmn_generic_events[] = {
    226 	{ "PAPI_tot_cyc", 0x3c, 0x00 }, /* cpu_clk_unhalted.thread_p/core */
    227 	{ "PAPI_tot_ins", 0xc0, 0x00 }, /* inst_retired.any_p		  */
    228 	{ "PAPI_br_ins",  0xc4, 0x0c }, /* br_inst_retired.taken	  */
    229 	{ "PAPI_br_msp",  0xc5, 0x00 }, /* br_inst_retired.mispred	  */
    230 	{ "PAPI_br_ntk",  0xc4, 0x03 },
    231 				/* br_inst_retired.pred_not_taken|pred_taken */
    232 	{ "PAPI_br_prc",  0xc4, 0x05 },
    233 				/* br_inst_retired.pred_not_taken|pred_taken */
    234 	{ "PAPI_hw_int",  0xc8, 0x00 }, /* hw_int_rvc			  */
    235 	{ "PAPI_tot_iis", 0xaa, 0x01 }, /* macro_insts.decoded		  */
    236 	{ "PAPI_l1_dca",  0x43, 0x01 }, /* l1d_all_ref			  */
    237 	{ "PAPI_l1_icm",  0x81, 0x00 }, /* l1i_misses			  */
    238 	{ "PAPI_l1_icr",  0x80, 0x00 }, /* l1i_reads			  */
    239 	{ "PAPI_l1_tcw",  0x41, 0x0f }, /* l1d_cache_st.mesi		  */
    240 	{ "PAPI_l2_stm",  0x2a, 0x41 }, /* l2_st.self.i_state		  */
    241 	{ "PAPI_l2_tca",  0x2e, 0x4f }, /* l2_rqsts.self.demand.mesi	  */
    242 	{ "PAPI_l2_tch",  0x2e, 0x4e }, /* l2_rqsts.mes			  */
    243 	{ "PAPI_l2_tcm",  0x2e, 0x41 }, /* l2_rqsts.self.demand.i_state   */
    244 	{ "PAPI_l2_tcw",  0x2a, 0x4f }, /* l2_st.self.mesi		  */
    245 	{ "PAPI_ld_ins",  0xc0, 0x01 }, /* inst_retired.loads		  */
    246 	{ "PAPI_lst_ins", 0xc0, 0x03 }, /* inst_retired.loads|stores	  */
    247 	{ "PAPI_sr_ins",  0xc0, 0x02 }, /* inst_retired.stores		  */
    248 	{ "PAPI_tlb_dm",  0x08, 0x01 }, /* dtlb_misses.any		  */
    249 	{ "PAPI_tlb_im",  0x82, 0x12 }, /* itlb.small_miss|large_miss	  */
    250 	{ "PAPI_tlb_tl",  0x0c, 0x03 }, /* page_walks			  */
    251 	{ "",		  NT_END, 0  }
    252 };
    253 
    254 static const struct generic_events generic_events_pic0[] = {
    255 	{ "PAPI_l1_dcm",  0xcb, 0x01 }, /* mem_load_retired.l1d_miss */
    256 	{ "",		  NT_END, 0  }
    257 };
    258 
    259 /*
    260  * The events listed in the following table can be counted on all
    261  * general-purpose counters on processors that are of Penryn and Merom Family
    262  */
    263 static const struct nametable_core_uarch cmn_gpc_events_core_uarch[] = {
    264 	/* Alphabetical order of event name */
    265 
    266 	{ "baclears",			0x0,	0xe6 },
    267 	{ "bogus_br",			0x0,	0xe4 },
    268 	{ "br_bac_missp_exec",		0x0,	0x8a },
    269 
    270 	{ "br_call_exec",		0x0,	0x92 },
    271 	{ "br_call_missp_exec",		0x0,	0x93 },
    272 	{ "br_cnd_exec",		0x0,	0x8b },
    273 
    274 	{ "br_cnd_missp_exec",		0x0,	0x8c },
    275 	{ "br_ind_call_exec",		0x0,	0x94 },
    276 	{ "br_ind_exec",		0x0,	0x8d },
    277 
    278 	{ "br_ind_missp_exec",		0x0,	0x8e },
    279 	{ "br_inst_decoded",		0x0,	0xe0 },
    280 	{ "br_inst_exec",		0x0,	0x88 },
    281 
    282 	{ "br_inst_retired",		0x0,	0xc4 },
    283 	{ "br_inst_retired_mispred",	0x0,	0xc5 },
    284 	{ "br_missp_exec",		0x0,	0x89 },
    285 
    286 	{ "br_ret_bac_missp_exec",	0x0,	0x91 },
    287 	{ "br_ret_exec",		0x0,	0x8f },
    288 	{ "br_ret_missp_exec",		0x0,	0x90 },
    289 
    290 	{ "br_tkn_bubble_1",		0x0,	0x97 },
    291 	{ "br_tkn_bubble_2",		0x0,	0x98 },
    292 	{ "bus_bnr_drv",		ALL_AGENTS,	0x61 },
    293 
    294 	{ "bus_data_rcv",		ALL_CORES,	0x64 },
    295 	{ "bus_drdy_clocks",		ALL_AGENTS,	0x62 },
    296 	{ "bus_hit_drv",		ALL_AGENTS,	0x7a },
    297 
    298 	{ "bus_hitm_drv",		ALL_AGENTS,	0x7b },
    299 	{ "bus_io_wait",		ALL_CORES,	0x7f },
    300 	{ "bus_lock_clocks",		ALL_CORES | ALL_AGENTS,	0x63 },
    301 
    302 	{ "bus_request_outstanding",	ALL_CORES | ALL_AGENTS,	0x60 },
    303 	{ "bus_trans_any",		ALL_CORES | ALL_AGENTS,	0x70 },
    304 	{ "bus_trans_brd",		ALL_CORES | ALL_AGENTS,	0x65 },
    305 
    306 	{ "bus_trans_burst",		ALL_CORES | ALL_AGENTS,	0x6e },
    307 	{ "bus_trans_def",		ALL_CORES | ALL_AGENTS,	0x6d },
    308 	{ "bus_trans_ifetch",		ALL_CORES | ALL_AGENTS,	0x68 },
    309 
    310 	{ "bus_trans_inval",		ALL_CORES | ALL_AGENTS,	0x69 },
    311 	{ "bus_trans_io",		ALL_CORES | ALL_AGENTS,	0x6c },
    312 	{ "bus_trans_mem",		ALL_CORES | ALL_AGENTS,	0x6f },
    313 
    314 	{ "bus_trans_p",		ALL_CORES | ALL_AGENTS,	0x6b },
    315 	{ "bus_trans_pwr",		ALL_CORES | ALL_AGENTS,	0x6a },
    316 	{ "bus_trans_rfo",		ALL_CORES | ALL_AGENTS,	0x66 },
    317 
    318 	{ "bus_trans_wb",		ALL_CORES | ALL_AGENTS,	0x67 },
    319 	{ "busq_empty",			ALL_CORES,	0x7d },
    320 	{ "cmp_snoop",			ALL_CORES,	0x78 },
    321 
    322 	{ "cpu_clk_unhalted",		0x0,	0x3c },
    323 	{ "cycles_int",			0x0,	0xc6 },
    324 	{ "cycles_l1i_mem_stalled",	0x0,	0x86 },
    325 
    326 	{ "dtlb_misses",		0x0,	0x08 },
    327 	{ "eist_trans",			0x0,	0x3a },
    328 	{ "esp",			0x0,	0xab },
    329 
    330 	{ "ext_snoop",			ALL_AGENTS,	0x77 },
    331 	{ "fp_mmx_trans",		0x0,	0xcc },
    332 	{ "hw_int_rcv",			0x0,	0xc8 },
    333 
    334 	{ "ild_stall",			0x0,	0x87 },
    335 	{ "inst_queue",			0x0,	0x83 },
    336 	{ "inst_retired",		0x0,	0xc0 },
    337 
    338 	{ "itlb",			0x0,	0x82 },
    339 	{ "itlb_miss_retired",		0x0,	0xc9 },
    340 	{ "l1d_all_ref",		0x0,	0x43 },
    341 
    342 	{ "l1d_cache_ld",		0x0,	0x40 },
    343 	{ "l1d_cache_lock",		0x0,	0x42 },
    344 	{ "l1d_cache_st",		0x0,	0x41 },
    345 
    346 	{ "l1d_m_evict",		0x0,	0x47 },
    347 	{ "l1d_m_repl",			0x0,	0x46 },
    348 	{ "l1d_pend_miss",		0x0,	0x48 },
    349 
    350 	{ "l1d_prefetch",		0x0,	0x4e },
    351 	{ "l1d_repl",			0x0,	0x45 },
    352 	{ "l1d_split",			0x0,	0x49 },
    353 
    354 	{ "l1i_misses",			0x0,	0x81 },
    355 	{ "l1i_reads",			0x0,	0x80 },
    356 	{ "l2_ads",			ALL_CORES,	0x21 },
    357 
    358 	{ "l2_dbus_busy_rd",		ALL_CORES,	0x23 },
    359 	{ "l2_ifetch",			ALL_CORES,	0x28 },
    360 	{ "l2_ld",			ALL_CORES,	0x29 },
    361 
    362 	{ "l2_lines_in",		ALL_CORES,	0x24 },
    363 	{ "l2_lines_out",		ALL_CORES,	0x26 },
    364 	{ "l2_lock",			ALL_CORES,	0x2b },
    365 
    366 	{ "l2_m_lines_in",		ALL_CORES,	0x25 },
    367 	{ "l2_m_lines_out",		ALL_CORES,	0x27 },
    368 	{ "l2_no_req",			ALL_CORES,	0x32 },
    369 
    370 	{ "l2_reject_busq",		ALL_CORES,	0x30 },
    371 	{ "l2_rqsts",			ALL_CORES,	0x2e },
    372 	{ "l2_st",			ALL_CORES,	0x2a },
    373 
    374 	{ "load_block",			0x0,	0x03 },
    375 	{ "load_hit_pre",		0x0,	0x4c },
    376 	{ "machine_nukes",		0x0,	0xc3 },
    377 
    378 	{ "macro_insts",		0x0,	0xaa },
    379 	{ "memory_disambiguation",	0x0,	0x09 },
    380 	{ "misalign_mem_ref",		0x0,	0x05 },
    381 	{ "page_walks",			0x0,	0x0c },
    382 
    383 	{ "pref_rqsts_dn",		0x0,	0xf8 },
    384 	{ "pref_rqsts_up",		0x0,	0xf0 },
    385 	{ "rat_stalls",			0x0,	0xd2 },
    386 
    387 	{ "resource_stalls",		0x0,	0xdc },
    388 	{ "rs_uops_dispatched",		0x0,	0xa0 },
    389 	{ "seg_reg_renames",		0x0,	0xd5 },
    390 
    391 	{ "seg_rename_stalls",		0x0,	0xd4 },
    392 	{ "segment_reg_loads",		0x0,	0x06 },
    393 	{ "simd_assist",		0x0,	0xcd },
    394 
    395 	{ "simd_comp_inst_retired",	0x0,	0xca },
    396 	{ "simd_inst_retired",		0x0,	0xc7 },
    397 	{ "simd_instr_retired",		0x0,	0xce },
    398 
    399 	{ "simd_sat_instr_retired",	0x0,	0xcf },
    400 	{ "simd_sat_uop_exec",		0x0,	0xb1 },
    401 	{ "simd_uop_type_exec",		0x0,	0xb3 },
    402 
    403 	{ "simd_uops_exec",		0x0,	0xb0 },
    404 	{ "snoop_stall_drv",		ALL_CORES | ALL_AGENTS,	0x7e },
    405 	{ "sse_pre_exec",		0x0,	0x07 },
    406 
    407 	{ "sse_pre_miss",		0x0,	0x4b },
    408 	{ "store_block",		0x0,	0x04 },
    409 	{ "thermal_trip",		0x0,	0x3b },
    410 
    411 	{ "uops_retired",		0x0,	0xc2 },
    412 	{ "x87_ops_retired",		0x0,	0xc1 },
    413 	{ "",				0x0,	NT_END }
    414 };
    415 
    416 /*
    417  * If any of the pic specific events require privileges, make sure to add a
    418  * check in configure_gpc() to find whether an event hard-coded as a number by
    419  * the user has any privilege requirements
    420  */
    421 static const struct nametable_core_uarch pic0_events[] = {
    422 	/* Alphabetical order of event name */
    423 
    424 	{ "cycles_div_busy",		0x0,	0x14 },
    425 	{ "fp_comp_ops_exe",		0x0,	0x10 },
    426 	{ "idle_during_div",		0x0,	0x18 },
    427 
    428 	{ "mem_load_retired",		0x0,	0xcb },
    429 	{ "rs_uops_dispatched_port",	0x0,	0xa1 },
    430 	{ "",				0x0,	NT_END }
    431 };
    432 
    433 static const struct nametable_core_uarch pic1_events[] = {
    434 	/* Alphabetical order of event name */
    435 
    436 	{ "delayed_bypass",	0x0,	0x19 },
    437 	{ "div",		0x0,	0x13 },
    438 	{ "fp_assist",		0x0,	0x11 },
    439 
    440 	{ "mul",		0x0,	0x12 },
    441 	{ "",			0x0,	NT_END }
    442 };
    443 
    444 /* FFC entries must be in order */
    445 static char *ffc_names_non_htt[] = {
    446 	"instr_retired.any",
    447 	"cpu_clk_unhalted.core",
    448 	"cpu_clk_unhalted.ref",
    449 	NULL
    450 };
    451 
    452 static char *ffc_names_htt[] = {
    453 	"instr_retired.any",
    454 	"cpu_clk_unhalted.thread",
    455 	"cpu_clk_unhalted.ref",
    456 	NULL
    457 };
    458 
    459 static char *ffc_genericnames[] = {
    460 	"PAPI_tot_ins",
    461 	"PAPI_tot_cyc",
    462 	"",
    463 	NULL
    464 };
    465 
    466 static char	**ffc_names = NULL;
    467 static char	**ffc_allnames = NULL;
    468 static char	**gpc_names = NULL;
    469 static uint32_t	versionid;
    470 static uint64_t	num_gpc;
    471 static uint64_t	width_gpc;
    472 static uint64_t	mask_gpc;
    473 static uint64_t	num_ffc;
    474 static uint64_t	width_ffc;
    475 static uint64_t	mask_ffc;
    476 static uint_t	total_pmc;
    477 static uint64_t	control_ffc;
    478 static uint64_t	control_gpc;
    479 static uint64_t	control_mask;
    480 static uint32_t	arch_events_vector;
    481 
    482 #define	IMPL_NAME_LEN 100
    483 static char core_impl_name[IMPL_NAME_LEN];
    484 
    485 static const char *core_cpuref =
    486 	"See Appendix A of the \"Intel 64 and IA-32 Architectures Software" \
    487 	" Developer's Manual Volume 3B: System Programming Guide, Part 2\"" \
    488 	" Order Number: 253669-026US, Februrary 2008";
    489 
    490 struct events_table_t {
    491 	uint8_t		eventselect;
    492 	uint8_t		unitmask;
    493 	uint64_t	supported_counters;
    494 	const char	*name;
    495 };
    496 
    497 /* Used to describe which counters support an event */
    498 #define	C(x) (1 << (x))
    499 #define	C0 C(0)
    500 #define	C1 C(1)
    501 #define	C2 C(2)
    502 #define	C3 C(3)
    503 #define	C_ALL 0xFFFFFFFFFFFFFFFF
    504 
    505 /* Architectural events */
    506 #define	ARCH_EVENTS_COMMON					\
    507 	{ 0xc0, 0x00, C_ALL, "inst_retired.any_p" },		\
    508 	{ 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" },	\
    509 	{ 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" },	\
    510 	{ 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" },	\
    511 	{ 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" },	\
    512 	{ 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" }
    513 
    514 static const struct events_table_t arch_events_table_non_htt[] = {
    515 	{ 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.core" },
    516 	ARCH_EVENTS_COMMON
    517 };
    518 
    519 static const struct events_table_t arch_events_table_htt[] = {
    520 	{ 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" },
    521 	ARCH_EVENTS_COMMON
    522 };
    523 
    524 static char *arch_genevents_table[] = {
    525 	"PAPI_tot_cyc", /* cpu_clk_unhalted.thread_p/core */
    526 	"PAPI_tot_ins", /* inst_retired.any_p		  */
    527 	"",		/* cpu_clk_unhalted.ref_p	  */
    528 	"",		/* longest_lat_cache.reference	  */
    529 	"",		/* longest_lat_cache.miss	  */
    530 	"",		/* br_inst_retired.all_branches	  */
    531 	"",		/* br_misp_retired.all_branches	  */
    532 };
    533 
    534 static const struct events_table_t *arch_events_table = NULL;
    535 static uint64_t known_arch_events;
    536 static uint64_t known_ffc_num;
    537 
    538 #define	GENERICEVENTS_FAM6_NHM						       \
    539 { 0xc4, 0x01, C0|C1|C2|C3, "PAPI_br_cn" },   /* br_inst_retired.conditional */ \
    540 { 0x1d, 0x01, C0|C1|C2|C3, "PAPI_hw_int" },  /* hw_int.rcx		    */ \
    541 { 0x17, 0x01, C0|C1|C2|C3, "PAPI_tot_iis" }, /* inst_queue_writes	    */ \
    542 { 0x43, 0x01, C0|C1,	   "PAPI_l1_dca" },  /* l1d_all_ref.any		    */ \
    543 { 0x24, 0x03, C0|C1|C2|C3, "PAPI_l1_dcm" },  /* l2_rqsts. loads and rfos    */ \
    544 { 0x40, 0x0f, C0|C1|C2|C3, "PAPI_l1_dcr" },  /* l1d_cache_ld.mesi	    */ \
    545 { 0x41, 0x0f, C0|C1|C2|C3, "PAPI_l1_dcw" },  /* l1d_cache_st.mesi	    */ \
    546 { 0x80, 0x03, C0|C1|C2|C3, "PAPI_l1_ica" },  /* l1i.reads		    */ \
    547 { 0x80, 0x01, C0|C1|C2|C3, "PAPI_l1_ich" },  /* l1i.hits		    */ \
    548 { 0x80, 0x02, C0|C1|C2|C3, "PAPI_l1_icm" },  /* l1i.misses		    */ \
    549 { 0x80, 0x03, C0|C1|C2|C3, "PAPI_l1_icr" },  /* l1i.reads		    */ \
    550 { 0x24, 0x33, C0|C1|C2|C3, "PAPI_l1_ldm" },  /* l2_rqsts. loads and ifetches */\
    551 { 0x24, 0xff, C0|C1|C2|C3, "PAPI_l1_tcm" },  /* l2_rqsts.references	    */ \
    552 { 0x24, 0x02, C0|C1|C2|C3, "PAPI_l2_ldm" },  /* l2_rqsts.ld_miss	    */ \
    553 { 0x24, 0x08, C0|C1|C2|C3, "PAPI_l2_stm" },  /* l2_rqsts.rfo_miss	    */ \
    554 { 0x24, 0x3f, C0|C1|C2|C3, "PAPI_l2_tca" },				       \
    555 				/* l2_rqsts. loads, rfos and ifetches */       \
    556 { 0x24, 0x15, C0|C1|C2|C3, "PAPI_l2_tch" },				       \
    557 				/* l2_rqsts. ld_hit, rfo_hit and ifetch_hit */ \
    558 { 0x24, 0x2a, C0|C1|C2|C3, "PAPI_l2_tcm" },				       \
    559 			/* l2_rqsts. ld_miss, rfo_miss and ifetch_miss */      \
    560 { 0x24, 0x33, C0|C1|C2|C3, "PAPI_l2_tcr" },  /* l2_rqsts. loads and ifetches */\
    561 { 0x24, 0x0c, C0|C1|C2|C3, "PAPI_l2_tcw" },  /* l2_rqsts.rfos		    */ \
    562 { 0x2e, 0x4f, C0|C1|C2|C3, "PAPI_l3_tca" },  /* l3_lat_cache.reference	    */ \
    563 { 0x2e, 0x41, C0|C1|C2|C3, "PAPI_l3_tcm" },  /* l3_lat_cache.misses	    */ \
    564 { 0x0b, 0x01, C0|C1|C2|C3, "PAPI_ld_ins" },  /* mem_inst_retired.loads	    */ \
    565 { 0x0b, 0x03, C0|C1|C2|C3, "PAPI_lst_ins" },				       \
    566 				/* mem_inst_retired.loads and stores	    */ \
    567 { 0x26, 0xf0, C0|C1|C2|C3, "PAPI_prf_dm" },  /* l2_data_rqsts.prefetch.mesi */ \
    568 { 0x0b, 0x02, C0|C1|C2|C3, "PAPI_sr_ins" },  /* mem_inst_retired.stores	    */ \
    569 { 0x49, 0x01, C0|C1|C2|C3, "PAPI_tlb_dm" },  /* dtlb_misses.any		    */ \
    570 { 0x85, 0x01, C0|C1|C2|C3, "PAPI_tlb_im" }   /* itlb_misses.any		    */
    571 
    572 
    573 #define	EVENTS_FAM6_NHM							\
    574 									\
    575 { 0x80, 0x04, C0|C1|C2|C3, "l1i.cycles_stalled" },			\
    576 { 0x80, 0x01, C0|C1|C2|C3, "l1i.hits" },				\
    577 { 0x80, 0x02, C0|C1|C2|C3, "l1i.misses" },				\
    578 									\
    579 { 0x80, 0x03, C0|C1|C2|C3, "l1i.reads" },				\
    580 { 0x82, 0x01, C0|C1|C2|C3, "large_itlb.hit" },				\
    581 { 0x87, 0x0F, C0|C1|C2|C3, "ild_stall.any" },				\
    582 									\
    583 { 0x87, 0x04, C0|C1|C2|C3, "ild_stall.iq_full" },			\
    584 { 0x87, 0x01, C0|C1|C2|C3, "ild_stall.lcp" },				\
    585 { 0x87, 0x02, C0|C1|C2|C3, "ild_stall.mru" },				\
    586 									\
    587 { 0x87, 0x08, C0|C1|C2|C3, "ild_stall.regen" },				\
    588 { 0xE6, 0x02, C0|C1|C2|C3, "baclear.bad_target" },			\
    589 { 0xE6, 0x01, C0|C1|C2|C3, "baclear.clear" },				\
    590 									\
    591 { 0xE8, 0x01, C0|C1|C2|C3, "bpu_clears.early" },			\
    592 { 0xE8, 0x02, C0|C1|C2|C3, "bpu_clears.late" },				\
    593 { 0xE5, 0x01, C0|C1|C2|C3, "bpu_missed_call_ret" },			\
    594 									\
    595 { 0xE0, 0x01, C0|C1|C2|C3, "br_inst_decoded" },				\
    596 { 0x88, 0x7F, C0|C1|C2|C3, "br_inst_exec.any" },			\
    597 { 0x88, 0x01, C0|C1|C2|C3, "br_inst_exec.cond" },			\
    598 									\
    599 { 0x88, 0x02, C0|C1|C2|C3, "br_inst_exec.direct" },			\
    600 { 0x88, 0x10, C0|C1|C2|C3, "br_inst_exec.direct_near_call" },		\
    601 { 0x88, 0x20, C0|C1|C2|C3, "br_inst_exec.indirect_near_call" },		\
    602 									\
    603 { 0x88, 0x04, C0|C1|C2|C3, "br_inst_exec.indirect_non_call" },		\
    604 { 0x88, 0x30, C0|C1|C2|C3, "br_inst_exec.near_calls" },			\
    605 { 0x88, 0x07, C0|C1|C2|C3, "br_inst_exec.non_calls" },			\
    606 									\
    607 { 0x88, 0x08, C0|C1|C2|C3, "br_inst_exec.return_near" },		\
    608 { 0x88, 0x40, C0|C1|C2|C3, "br_inst_exec.taken" },			\
    609 { 0x89, 0x7F, C0|C1|C2|C3, "br_misp_exec.any" },			\
    610 									\
    611 { 0x89, 0x01, C0|C1|C2|C3, "br_misp_exec.cond" },			\
    612 { 0x89, 0x02, C0|C1|C2|C3, "br_misp_exec.direct" },			\
    613 { 0x89, 0x10, C0|C1|C2|C3, "br_misp_exec.direct_near_call" },		\
    614 									\
    615 { 0x89, 0x20, C0|C1|C2|C3, "br_misp_exec.indirect_near_call" },		\
    616 { 0x89, 0x04, C0|C1|C2|C3, "br_misp_exec.indirect_non_call" },		\
    617 { 0x89, 0x30, C0|C1|C2|C3, "br_misp_exec.near_calls" },			\
    618 									\
    619 { 0x89, 0x07, C0|C1|C2|C3, "br_misp_exec.non_calls" },			\
    620 { 0x89, 0x08, C0|C1|C2|C3, "br_misp_exec.return_near" },		\
    621 { 0x89, 0x40, C0|C1|C2|C3, "br_misp_exec.taken" },			\
    622 									\
    623 { 0x17, 0x01, C0|C1|C2|C3, "inst_queue_writes" },			\
    624 { 0x1E, 0x01, C0|C1|C2|C3, "inst_queue_write_cycles" },			\
    625 { 0xA7, 0x01, C0|C1|C2|C3, "baclear_force_iq" },			\
    626 									\
    627 { 0xD0, 0x01, C0|C1|C2|C3, "macro_insts.decoded" },			\
    628 { 0xA6, 0x01, C0|C1|C2|C3, "macro_insts.fusions_decoded" },		\
    629 { 0x19, 0x01, C0|C1|C2|C3, "two_uop_insts_decoded" },			\
    630 									\
    631 { 0x18, 0x01, C0|C1|C2|C3, "inst_decoded.dec0" },			\
    632 { 0xD1, 0x04, C0|C1|C2|C3, "uops_decoded.esp_folding" },		\
    633 { 0xD1, 0x08, C0|C1|C2|C3, "uops_decoded.esp_sync" },			\
    634 									\
    635 { 0xD1, 0x02, C0|C1|C2|C3, "uops_decoded.ms" },				\
    636 { 0x20, 0x01, C0|C1|C2|C3, "lsd_overflow" },				\
    637 { 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.any" },				\
    638 									\
    639 { 0x0E, 0x02, C0|C1|C2|C3, "uops_issued.fused" },			\
    640 { 0xA2, 0x20, C0|C1|C2|C3, "resource_stalls.fpcw" },			\
    641 { 0xA2, 0x02, C0|C1|C2|C3, "resource_stalls.load" },			\
    642 									\
    643 { 0xA2, 0x40, C0|C1|C2|C3, "resource_stalls.mxcsr" },			\
    644 { 0xA2, 0x04, C0|C1|C2|C3, "resource_stalls.rs_full" },			\
    645 { 0xA2, 0x08, C0|C1|C2|C3, "resource_stalls.store" },			\
    646 									\
    647 { 0xA2, 0x01, C0|C1|C2|C3, "resource_stalls.any" },			\
    648 { 0xD2, 0x01, C0|C1|C2|C3, "rat_stalls.flags" },			\
    649 { 0xD2, 0x02, C0|C1|C2|C3, "rat_stalls.registers" },			\
    650 									\
    651 { 0xD2, 0x04, C0|C1|C2|C3, "rat_stalls.rob_read_port" },		\
    652 { 0xD2, 0x08, C0|C1|C2|C3, "rat_stalls.scoreboard" },			\
    653 { 0xD2, 0x0F, C0|C1|C2|C3, "rat_stalls.any" },				\
    654 									\
    655 { 0xD4, 0x01, C0|C1|C2|C3, "seg_rename_stalls" },			\
    656 { 0xD5, 0x01, C0|C1|C2|C3, "es_reg_renames" },				\
    657 { 0x10, 0x02, C0|C1|C2|C3, "fp_comp_ops_exe.mmx" },			\
    658 									\
    659 { 0x10, 0x80, C0|C1|C2|C3, "fp_comp_ops_exe.sse_double_precision" },	\
    660 { 0x10, 0x04, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp" },			\
    661 { 0x10, 0x10, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_packed" },		\
    662 									\
    663 { 0x10, 0x20, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_scalar" },		\
    664 { 0x10, 0x40, C0|C1|C2|C3, "fp_comp_ops_exe.sse_single_precision" },	\
    665 { 0x10, 0x08, C0|C1|C2|C3, "fp_comp_ops_exe.sse2_integer" },		\
    666 									\
    667 { 0x10, 0x01, C0|C1|C2|C3, "fp_comp_ops_exe.x87" },			\
    668 { 0x14, 0x01, C0|C1|C2|C3, "arith.cycles_div_busy" },			\
    669 { 0x14, 0x02, C0|C1|C2|C3, "arith.mul" },				\
    670 									\
    671 { 0x12, 0x04, C0|C1|C2|C3, "simd_int_128.pack" },			\
    672 { 0x12, 0x20, C0|C1|C2|C3, "simd_int_128.packed_arith" },		\
    673 { 0x12, 0x10, C0|C1|C2|C3, "simd_int_128.packed_logical" },		\
    674 									\
    675 { 0x12, 0x01, C0|C1|C2|C3, "simd_int_128.packed_mpy" },			\
    676 { 0x12, 0x02, C0|C1|C2|C3, "simd_int_128.packed_shift" },		\
    677 { 0x12, 0x40, C0|C1|C2|C3, "simd_int_128.shuffle_move" },		\
    678 									\
    679 { 0x12, 0x08, C0|C1|C2|C3, "simd_int_128.unpack" },			\
    680 { 0xFD, 0x04, C0|C1|C2|C3, "simd_int_64.pack" },			\
    681 { 0xFD, 0x20, C0|C1|C2|C3, "simd_int_64.packed_arith" },		\
    682 									\
    683 { 0xFD, 0x10, C0|C1|C2|C3, "simd_int_64.packed_logical" },		\
    684 { 0xFD, 0x01, C0|C1|C2|C3, "simd_int_64.packed_mpy" },			\
    685 { 0xFD, 0x02, C0|C1|C2|C3, "simd_int_64.packed_shift" },		\
    686 									\
    687 { 0xFD, 0x40, C0|C1|C2|C3, "simd_int_64.shuffle_move" },		\
    688 { 0xFD, 0x08, C0|C1|C2|C3, "simd_int_64.unpack" },			\
    689 { 0xB1, 0x01, C0|C1|C2|C3, "uops_executed.port0" },			\
    690 									\
    691 { 0xB1, 0x02, C0|C1|C2|C3, "uops_executed.port1" },			\
    692 { 0x40, 0x04, C0|C1, "l1d_cache_ld.e_state" },				\
    693 { 0x40, 0x01, C0|C1, "l1d_cache_ld.i_state" },				\
    694 									\
    695 { 0x40, 0x08, C0|C1, "l1d_cache_ld.m_state" },				\
    696 { 0x40, 0x0F, C0|C1, "l1d_cache_ld.mesi" },				\
    697 { 0x40, 0x02, C0|C1, "l1d_cache_ld.s_state" },				\
    698 									\
    699 { 0x41, 0x04, C0|C1, "l1d_cache_st.e_state" },				\
    700 { 0x41, 0x08, C0|C1, "l1d_cache_st.m_state" },				\
    701 { 0x41, 0x0F, C0|C1, "l1d_cache_st.mesi" },				\
    702 									\
    703 { 0x41, 0x02, C0|C1, "l1d_cache_st.s_state" },				\
    704 { 0x42, 0x04, C0|C1, "l1d_cache_lock.e_state" },			\
    705 { 0x42, 0x01, C0|C1, "l1d_cache_lock.hit" },				\
    706 									\
    707 { 0x42, 0x08, C0|C1, "l1d_cache_lock.m_state" },			\
    708 { 0x42, 0x02, C0|C1, "l1d_cache_lock.s_state" },			\
    709 { 0x43, 0x01, C0|C1, "l1d_all_ref.any" },				\
    710 									\
    711 { 0x43, 0x02, C0|C1, "l1d_all_ref.cacheable" },				\
    712 { 0x4B, 0x01, C0|C1, "mmx2_mem_exec.nta" },				\
    713 { 0x4C, 0x01, C0|C1, "load_hit_pre" },					\
    714 									\
    715 { 0x4E, 0x02, C0|C1, "l1d_prefetch.miss" },				\
    716 { 0x4E, 0x01, C0|C1, "l1d_prefetch.requests" },				\
    717 { 0x4E, 0x04, C0|C1, "l1d_prefetch.triggers" },				\
    718 									\
    719 { 0x51, 0x04, C0|C1, "l1d.m_evict" },					\
    720 { 0x51, 0x02, C0|C1, "l1d.m_repl" },					\
    721 { 0x51, 0x08, C0|C1, "l1d.m_snoop_evict" },				\
    722 									\
    723 { 0x51, 0x01, C0|C1, "l1d.repl" },					\
    724 { 0x52, 0x01, C0|C1, "l1d_cache_prefetch_lock_fb_hit" },		\
    725 { 0x53, 0x01, C0|C1, "l1d_cache_lock_fb_hit" },				\
    726 									\
    727 { 0x63, 0x02, C0|C1, "cache_lock_cycles.l1d" },				\
    728 { 0x63, 0x01, C0|C1, "cache_lock_cycles.l1d_l2" },			\
    729 { 0x06, 0x04, C0|C1|C2|C3, "store_blocks.at_ret" },			\
    730 									\
    731 { 0x06, 0x08, C0|C1|C2|C3, "store_blocks.l1d_block" },			\
    732 { 0x06, 0x01, C0|C1|C2|C3, "store_blocks.not_sta" },			\
    733 { 0x06, 0x02, C0|C1|C2|C3, "store_blocks.sta" },			\
    734 									\
    735 { 0x13, 0x07, C0|C1|C2|C3, "load_dispatch.any" },			\
    736 { 0x13, 0x04, C0|C1|C2|C3, "load_dispatch.mob" },			\
    737 { 0x13, 0x01, C0|C1|C2|C3, "load_dispatch.rs" },			\
    738 									\
    739 { 0x13, 0x02, C0|C1|C2|C3, "load_dispatch.rs_delayed" },		\
    740 { 0x08, 0x01, C0|C1|C2|C3, "dtlb_load_misses.any" },			\
    741 { 0x08, 0x20, C0|C1|C2|C3, "dtlb_load_misses.pde_miss" },		\
    742 									\
    743 { 0x08, 0x02, C0|C1|C2|C3, "dtlb_load_misses.walk_completed" },		\
    744 { 0x49, 0x01, C0|C1|C2|C3, "dtlb_misses.any" },				\
    745 { 0x49, 0x10, C0|C1|C2|C3, "dtlb_misses.stlb_hit" },			\
    746 									\
    747 { 0x49, 0x02, C0|C1|C2|C3, "dtlb_misses.walk_completed" },		\
    748 { 0x4F, 0x02, C0|C1|C2|C3, "ept.epde_miss" },				\
    749 { 0x4F, 0x08, C0|C1|C2|C3, "ept.epdpe_miss" },				\
    750 									\
    751 { 0x85, 0x01, C0|C1|C2|C3, "itlb_misses.any" },				\
    752 { 0x85, 0x02, C0|C1|C2|C3, "itlb_misses.walk_completed" },		\
    753 { 0x24, 0xAA, C0|C1|C2|C3, "l2_rqsts.miss" },				\
    754 									\
    755 { 0x24, 0xFF, C0|C1|C2|C3, "l2_rqsts.references" },			\
    756 { 0x24, 0x10, C0|C1|C2|C3, "l2_rqsts.ifetch_hit" },			\
    757 { 0x24, 0x20, C0|C1|C2|C3, "l2_rqsts.ifetch_miss" },			\
    758 									\
    759 { 0x24, 0x30, C0|C1|C2|C3, "l2_rqsts.ifetches" },			\
    760 { 0x24, 0x01, C0|C1|C2|C3, "l2_rqsts.ld_hit" },				\
    761 { 0x24, 0x02, C0|C1|C2|C3, "l2_rqsts.ld_miss" },			\
    762 									\
    763 { 0x24, 0x03, C0|C1|C2|C3, "l2_rqsts.loads" },				\
    764 { 0x24, 0x40, C0|C1|C2|C3, "l2_rqsts.prefetch_hit" },			\
    765 { 0x24, 0x80, C0|C1|C2|C3, "l2_rqsts.prefetch_miss" },			\
    766 									\
    767 { 0x24, 0xC0, C0|C1|C2|C3, "l2_rqsts.prefetches" },			\
    768 { 0x24, 0x04, C0|C1|C2|C3, "l2_rqsts.rfo_hit" },			\
    769 { 0x24, 0x08, C0|C1|C2|C3, "l2_rqsts.rfo_miss" },			\
    770 									\
    771 { 0x24, 0x0C, C0|C1|C2|C3, "l2_rqsts.rfos" },				\
    772 { 0x26, 0xFF, C0|C1|C2|C3, "l2_data_rqsts.any" },			\
    773 { 0x26, 0x04, C0|C1|C2|C3, "l2_data_rqsts.demand.e_state" },		\
    774 									\
    775 { 0x26, 0x01, C0|C1|C2|C3, "l2_data_rqsts.demand.i_state" },		\
    776 { 0x26, 0x08, C0|C1|C2|C3, "l2_data_rqsts.demand.m_state" },		\
    777 { 0x26, 0x0F, C0|C1|C2|C3, "l2_data_rqsts.demand.mesi" },		\
    778 									\
    779 { 0x26, 0x02, C0|C1|C2|C3, "l2_data_rqsts.demand.s_state" },		\
    780 { 0x26, 0x40, C0|C1|C2|C3, "l2_data_rqsts.prefetch.e_state" },		\
    781 { 0x26, 0x10, C0|C1|C2|C3, "l2_data_rqsts.prefetch.i_state" },		\
    782 									\
    783 { 0x26, 0x80, C0|C1|C2|C3, "l2_data_rqsts.prefetch.m_state" },		\
    784 { 0x26, 0xF0, C0|C1|C2|C3, "l2_data_rqsts.prefetch.mesi" },		\
    785 { 0x26, 0x20, C0|C1|C2|C3, "l2_data_rqsts.prefetch.s_state" },		\
    786 									\
    787 { 0x27, 0x40, C0|C1|C2|C3, "l2_write.lock.e_state" },			\
    788 { 0x27, 0x10, C0|C1|C2|C3, "l2_write.lock.i_state" },			\
    789 { 0x27, 0x20, C0|C1|C2|C3, "l2_write.lock.s_state" },			\
    790 									\
    791 { 0x27, 0x0E, C0|C1|C2|C3, "l2_write.rfo.hit" },			\
    792 { 0x27, 0x01, C0|C1|C2|C3, "l2_write.rfo.i_state" },			\
    793 { 0x27, 0x08, C0|C1|C2|C3, "l2_write.rfo.m_state" },			\
    794 									\
    795 { 0x27, 0x0F, C0|C1|C2|C3, "l2_write.rfo.mesi" },			\
    796 { 0x27, 0x02, C0|C1|C2|C3, "l2_write.rfo.s_state" },			\
    797 { 0x28, 0x04, C0|C1|C2|C3, "l1d_wb_l2.e_state" },			\
    798 									\
    799 { 0x28, 0x01, C0|C1|C2|C3, "l1d_wb_l2.i_state" },			\
    800 { 0x28, 0x08, C0|C1|C2|C3, "l1d_wb_l2.m_state" },			\
    801 { 0xF0, 0x80, C0|C1|C2|C3, "l2_transactions.any" },			\
    802 									\
    803 { 0xF0, 0x20, C0|C1|C2|C3, "l2_transactions.fill" },			\
    804 { 0xF0, 0x04, C0|C1|C2|C3, "l2_transactions.ifetch" },			\
    805 { 0xF0, 0x10, C0|C1|C2|C3, "l2_transactions.l1d_wb" },			\
    806 									\
    807 { 0xF0, 0x01, C0|C1|C2|C3, "l2_transactions.load" },			\
    808 { 0xF0, 0x08, C0|C1|C2|C3, "l2_transactions.prefetch" },		\
    809 { 0xF0, 0x02, C0|C1|C2|C3, "l2_transactions.rfo" },			\
    810 									\
    811 { 0xF0, 0x40, C0|C1|C2|C3, "l2_transactions.wb" },			\
    812 { 0xF1, 0x07, C0|C1|C2|C3, "l2_lines_in.any" },				\
    813 { 0xF1, 0x04, C0|C1|C2|C3, "l2_lines_in.e_state" },			\
    814 									\
    815 { 0xF1, 0x02, C0|C1|C2|C3, "l2_lines_in.s_state" },			\
    816 { 0xF2, 0x0F, C0|C1|C2|C3, "l2_lines_out.any" },			\
    817 { 0xF2, 0x01, C0|C1|C2|C3, "l2_lines_out.demand_clean" },		\
    818 									\
    819 { 0xF2, 0x02, C0|C1|C2|C3, "l2_lines_out.demand_dirty" },		\
    820 { 0xF2, 0x04, C0|C1|C2|C3, "l2_lines_out.prefetch_clean" },		\
    821 { 0x6C, 0x01, C0|C1|C2|C3, "io_transactions" },				\
    822 									\
    823 { 0xB0, 0x80, C0|C1|C2|C3, "offcore_requests.any" },			\
    824 { 0xB0, 0x10, C0|C1|C2|C3, "offcore_requests.any.rfo" },		\
    825 { 0xB0, 0x40, C0|C1|C2|C3, "offcore_requests.l1d_writeback" },		\
    826 									\
    827 { 0xB8, 0x01, C0|C1|C2|C3, "snoop_response.hit" },			\
    828 { 0xB8, 0x02, C0|C1|C2|C3, "snoop_response.hite" },			\
    829 { 0xB8, 0x04, C0|C1|C2|C3, "snoop_response.hitm" },			\
    830 									\
    831 { 0xF4, 0x10, C0|C1|C2|C3, "sq_misc.split_lock" },			\
    832 { 0x0B, 0x01, C0|C1|C2|C3, "mem_inst_retired.loads" },			\
    833 { 0x0B, 0x02, C0|C1|C2|C3, "mem_inst_retired.stores" },			\
    834 									\
    835 { 0xC0, 0x04, C0|C1|C2|C3, "inst_retired.mmx" },			\
    836 { 0xC0, 0x02, C0|C1|C2|C3, "inst_retired.x87" },			\
    837 { 0xC7, 0x04, C0|C1|C2|C3, "ssex_uops_retired.packed_double" },		\
    838 									\
    839 { 0xC7, 0x01, C0|C1|C2|C3, "ssex_uops_retired.packed_single" },		\
    840 { 0xC7, 0x08, C0|C1|C2|C3, "ssex_uops_retired.scalar_double" },		\
    841 { 0xC7, 0x02, C0|C1|C2|C3, "ssex_uops_retired.scalar_single" },		\
    842 									\
    843 { 0xC7, 0x10, C0|C1|C2|C3, "ssex_uops_retired.vector_integer" },	\
    844 { 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.any" },			\
    845 { 0xC2, 0x04, C0|C1|C2|C3, "uops_retired.macro_fused" },		\
    846 									\
    847 { 0xC8, 0x20, C0|C1|C2|C3, "itlb_miss_retired" },			\
    848 { 0xCB, 0x80, C0|C1|C2|C3, "mem_load_retired.dtlb_miss" },		\
    849 { 0xCB, 0x40, C0|C1|C2|C3, "mem_load_retired.hit_lfb" },		\
    850 									\
    851 { 0xCB, 0x01, C0|C1|C2|C3, "mem_load_retired.l1d_hit" },		\
    852 { 0xCB, 0x02, C0|C1|C2|C3, "mem_load_retired.l2_hit" },			\
    853 { 0xCB, 0x10, C0|C1|C2|C3, "mem_load_retired.llc_miss" },		\
    854 									\
    855 { 0xCB, 0x04, C0|C1|C2|C3, "mem_load_retired.llc_unshared_hit" },	\
    856 { 0xCB, 0x08, C0|C1|C2|C3, "mem_load_retired.other_core_l2_hit_hitm" },	\
    857 { 0x0F, 0x02, C0|C1|C2|C3, "mem_uncore_retired.other_core_l2_hitm" },	\
    858 									\
    859 { 0x0F, 0x08, C0|C1|C2|C3, "mem_uncore_retired.remote_cache_local_home_hit" },\
    860 { 0x0F, 0x10, C0|C1|C2|C3, "mem_uncore_retired.remote_dram" },		\
    861 { 0x0F, 0x20, C0|C1|C2|C3, "mem_uncore_retired.local_dram" },		\
    862 									\
    863 { 0x0C, 0x01, C0|C1|C2|C3, "mem_store_retired.dtlb_miss" },		\
    864 { 0xC4, 0x01, C0|C1|C2|C3, "br_inst_retired.conditional" },		\
    865 { 0xC4, 0x02, C0|C1|C2|C3, "br_inst_retired.near_call" },		\
    866 									\
    867 { 0xC5, 0x02, C0|C1|C2|C3, "br_misp_retired.near_call" },		\
    868 { 0xDB, 0x01, C0|C1|C2|C3, "uop_unfusion" },				\
    869 { 0xF7, 0x01, C0|C1|C2|C3, "fp_assist.all" },				\
    870 									\
    871 { 0xF7, 0x04, C0|C1|C2|C3, "fp_assist.input" },				\
    872 { 0xF7, 0x02, C0|C1|C2|C3, "fp_assist.output" },			\
    873 { 0xCC, 0x03, C0|C1|C2|C3, "fp_mmx_trans.any" },			\
    874 									\
    875 { 0xCC, 0x01, C0|C1|C2|C3, "fp_mmx_trans.to_fp" },			\
    876 { 0xCC, 0x02, C0|C1|C2|C3, "fp_mmx_trans.to_mmx" },			\
    877 { 0xC3, 0x04, C0|C1|C2|C3, "machine_clears.smc" }
    878 
    879 #define	GENERICEVENTS_FAM6_MOD28					       \
    880 { 0xc4, 0x00, C0|C1, "PAPI_br_ins" },	/* br_inst_retired.any */	       \
    881 { 0xc5, 0x00, C0|C1, "PAPI_br_msp" },	/* br_inst_retired.mispred */	       \
    882 { 0xc4, 0x03, C0|C1, "PAPI_br_ntk" },					       \
    883 			/* br_inst_retired.pred_not_taken|mispred_not_taken */ \
    884 { 0xc4, 0x05, C0|C1, "PAPI_br_prc" },					       \
    885 			/* br_inst_retired.pred_not_taken|pred_taken */	       \
    886 { 0xc8, 0x00, C0|C1, "PAPI_hw_int" },	/* hw_int_rcv */	      	       \
    887 { 0xaa, 0x03, C0|C1, "PAPI_tot_iis" },	/* macro_insts.all_decoded */	       \
    888 { 0x40, 0x23, C0|C1, "PAPI_l1_dca" },	/* l1d_cache.l1|st */	      	       \
    889 { 0x2a, 0x41, C0|C1, "PAPI_l2_stm" },	/* l2_st.self.i_state */	       \
    890 { 0x2e, 0x4f, C0|C1, "PAPI_l2_tca" },	/* longest_lat_cache.reference */      \
    891 { 0x2e, 0x4e, C0|C1, "PAPI_l2_tch" },   /* l2_rqsts.mes */		       \
    892 { 0x2e, 0x41, C0|C1, "PAPI_l2_tcm" },	/* longest_lat_cache.miss */	       \
    893 { 0x2a, 0x4f, C0|C1, "PAPI_l2_tcw" },	/* l2_st.self.mesi */		       \
    894 { 0x08, 0x07, C0|C1, "PAPI_tlb_dm" },	/* data_tlb_misses.dtlb.miss */	       \
    895 { 0x82, 0x02, C0|C1, "PAPI_tlb_im" }	/* itlb.misses */
    896 
    897 
    898 #define	EVENTS_FAM6_MOD28						\
    899 	{ 0x2,  0x81, C0|C1, "store_forwards.good" },                   \
    900 	{ 0x6,  0x0,  C0|C1, "segment_reg_loads.any" },                 \
    901 	{ 0x7,  0x1,  C0|C1, "prefetch.prefetcht0" },                   \
    902 	{ 0x7,  0x6,  C0|C1, "prefetch.sw_l2" },                        \
    903 	{ 0x7,  0x8,  C0|C1, "prefetch.prefetchnta" },                  \
    904 	{ 0x8,  0x7,  C0|C1, "data_tlb_misses.dtlb_miss" },             \
    905 	{ 0x8,  0x5,  C0|C1, "data_tlb_misses.dtlb_miss_ld" },          \
    906 	{ 0x8,  0x9,  C0|C1, "data_tlb_misses.l0_dtlb_miss_ld" },	\
    907 	{ 0x8,  0x6,  C0|C1, "data_tlb_misses.dtlb_miss_st" },          \
    908 	{ 0xC,  0x3,  C0|C1, "page_walks.cycles" },                     \
    909 	{ 0x10, 0x1,  C0|C1, "x87_comp_ops_exe.any.s" },                \
    910 	{ 0x10, 0x81, C0|C1, "x87_comp_ops_exe.any.ar" },               \
    911 	{ 0x11, 0x1,  C0|C1, "fp_assist" },                             \
    912 	{ 0x11, 0x81, C0|C1, "fp_assist.ar" },                          \
    913 	{ 0x12, 0x1,  C0|C1, "mul.s" },                                 \
    914 	{ 0x12, 0x81, C0|C1, "mul.ar" },                                \
    915 	{ 0x13, 0x1,  C0|C1, "div.s" },                                 \
    916 	{ 0x13, 0x81, C0|C1, "div.ar" },                                \
    917 	{ 0x14, 0x1,  C0|C1, "cycles_div_busy" },                       \
    918 	{ 0x21, 0x0,  C0|C1, "l2_ads" },                      		\
    919 	{ 0x22, 0x0,  C0|C1, "l2_dbus_busy" },                		\
    920 	{ 0x24, 0x0,  C0|C1, "l2_lines_in" },   			\
    921 	{ 0x25, 0x0,  C0|C1, "l2_m_lines_in" },               		\
    922 	{ 0x26, 0x0,  C0|C1, "l2_lines_out" },  			\
    923 	{ 0x27, 0x0,  C0|C1, "l2_m_lines_out" },			\
    924 	{ 0x28, 0x0,  C0|C1, "l2_ifetch" },  				\
    925 	{ 0x29, 0x0,  C0|C1, "l2_ld" },					\
    926 	{ 0x2A, 0x0,  C0|C1, "l2_st" },      				\
    927 	{ 0x2B, 0x0,  C0|C1, "l2_lock" },    				\
    928 	{ 0x2E, 0x0,  C0|C1, "l2_rqsts" },             			\
    929 	{ 0x2E, 0x41, C0|C1, "l2_rqsts.self.demand.i_state" },		\
    930 	{ 0x2E, 0x4F, C0|C1, "l2_rqsts.self.demand.mesi" },		\
    931 	{ 0x30, 0x0,  C0|C1, "l2_reject_bus_q" },			\
    932 	{ 0x32, 0x0,  C0|C1, "l2_no_req" },                   		\
    933 	{ 0x3A, 0x0,  C0|C1, "eist_trans" },                            \
    934 	{ 0x3B, 0xC0, C0|C1, "thermal_trip" },                          \
    935 	{ 0x3C, 0x0,  C0|C1, "cpu_clk_unhalted.core_p" },               \
    936 	{ 0x3C, 0x1,  C0|C1, "cpu_clk_unhalted.bus" },                  \
    937 	{ 0x3C, 0x2,  C0|C1, "cpu_clk_unhalted.no_other" },             \
    938 	{ 0x40, 0x21, C0|C1, "l1d_cache.ld" },                          \
    939 	{ 0x40, 0x22, C0|C1, "l1d_cache.st" },                          \
    940 	{ 0x60, 0x0,  C0|C1, "bus_request_outstanding" },		\
    941 	{ 0x61, 0x0,  C0|C1, "bus_bnr_drv" },                		\
    942 	{ 0x62, 0x0,  C0|C1, "bus_drdy_clocks" },            		\
    943 	{ 0x63, 0x0,  C0|C1, "bus_lock_clocks" },  			\
    944 	{ 0x64, 0x0,  C0|C1, "bus_data_rcv" },                		\
    945 	{ 0x65, 0x0,  C0|C1, "bus_trans_brd" },    			\
    946 	{ 0x66, 0x0,  C0|C1, "bus_trans_rfo" },    			\
    947 	{ 0x67, 0x0,  C0|C1, "bus_trans_wb" },     			\
    948 	{ 0x68, 0x0,  C0|C1, "bus_trans_ifetch" }, 			\
    949 	{ 0x69, 0x0,  C0|C1, "bus_trans_inval" },  			\
    950 	{ 0x6A, 0x0,  C0|C1, "bus_trans_pwr" },				\
    951 	{ 0x6B, 0x0,  C0|C1, "bus_trans_p" },      			\
    952 	{ 0x6C, 0x0,  C0|C1, "bus_trans_io" },     			\
    953 	{ 0x6D, 0x0,  C0|C1, "bus_trans_def" },    			\
    954 	{ 0x6E, 0x0,  C0|C1, "bus_trans_burst" },  			\
    955 	{ 0x6F, 0x0,  C0|C1, "bus_trans_mem" },    			\
    956 	{ 0x70, 0x0,  C0|C1, "bus_trans_any" },    			\
    957 	{ 0x77, 0x0,  C0|C1, "ext_snoop" },     			\
    958 	{ 0x7A, 0x0,  C0|C1, "bus_hit_drv" },                		\
    959 	{ 0x7B, 0x0,  C0|C1, "bus_hitm_drv" },               		\
    960 	{ 0x7D, 0x0,  C0|C1, "busq_empty" },                  		\
    961 	{ 0x7E, 0x0,  C0|C1, "snoop_stall_drv" },  			\
    962 	{ 0x7F, 0x0,  C0|C1, "bus_io_wait" },				\
    963 	{ 0x80, 0x3,  C0|C1, "icache.accesses" },                       \
    964 	{ 0x80, 0x2,  C0|C1, "icache.misses" },                         \
    965 	{ 0x82, 0x4,  C0|C1, "itlb.flush" },                            \
    966 	{ 0x82, 0x2,  C0|C1, "itlb.misses" },                           \
    967 	{ 0xAA, 0x2,  C0|C1, "macro_insts.cisc_decoded" },              \
    968 	{ 0xAA, 0x3,  C0|C1, "macro_insts.all_decoded" },               \
    969 	{ 0xB0, 0x0,  C0|C1, "simd_uops_exec.s" },                      \
    970 	{ 0xB0, 0x80, C0|C1, "simd_uops_exec.ar" },                     \
    971 	{ 0xB1, 0x0,  C0|C1, "simd_sat_uop_exec.s" },                   \
    972 	{ 0xB1, 0x80, C0|C1, "simd_sat_uop_exec.ar" },                  \
    973 	{ 0xB3, 0x1,  C0|C1, "simd_uop_type_exec.mul.s" },              \
    974 	{ 0xB3, 0x81, C0|C1, "simd_uop_type_exec.mul.ar" },             \
    975 	{ 0xB3, 0x02, C0|C1, "simd_uop_type_exec.shift.s" },            \
    976 	{ 0xB3, 0x82, C0|C1, "simd_uop_type_exec.shift.ar" },           \
    977 	{ 0xB3, 0x04, C0|C1, "simd_uop_type_exec.pack.s" },             \
    978 	{ 0xB3, 0x84, C0|C1, "simd_uop_type_exec.pack.ar" },            \
    979 	{ 0xB3, 0x08, C0|C1, "simd_uop_type_exec.unpack.s" },           \
    980 	{ 0xB3, 0x88, C0|C1, "simd_uop_type_exec.unpack.ar" },          \
    981 	{ 0xB3, 0x10, C0|C1, "simd_uop_type_exec.logical.s" },          \
    982 	{ 0xB3, 0x90, C0|C1, "simd_uop_type_exec.logical.ar" },         \
    983 	{ 0xB3, 0x20, C0|C1, "simd_uop_type_exec.arithmetic.s" },       \
    984 	{ 0xB3, 0xA0, C0|C1, "simd_uop_type_exec.arithmetic.ar" },      \
    985 	{ 0xC2, 0x10, C0|C1, "uops_retired.any" },                      \
    986 	{ 0xC3, 0x1,  C0|C1, "machine_clears.smc" },                    \
    987 	{ 0xC4, 0x0,  C0|C1, "br_inst_retired.any" },                   \
    988 	{ 0xC4, 0x1,  C0|C1, "br_inst_retired.pred_not_taken" },        \
    989 	{ 0xC4, 0x2,  C0|C1, "br_inst_retired.mispred_not_taken" },     \
    990 	{ 0xC4, 0x4,  C0|C1, "br_inst_retired.pred_taken" },            \
    991 	{ 0xC4, 0x8,  C0|C1, "br_inst_retired.mispred_taken" },         \
    992 	{ 0xC4, 0xA,  C0|C1, "br_inst_retired.mispred" },               \
    993 	{ 0xC4, 0xC,  C0|C1, "br_inst_retired.taken" },                 \
    994 	{ 0xC4, 0xF,  C0|C1, "br_inst_retired.any1" },                  \
    995 	{ 0xC6, 0x1,  C0|C1, "cycles_int_masked.cycles_int_masked" },   \
    996 	{ 0xC6, 0x2,  C0|C1,						\
    997 		"cycles_int_masked.cycles_int_pending_and_masked" },	\
    998 	{ 0xC7, 0x1,  C0|C1, "simd_inst_retired.packed_single" },       \
    999 	{ 0xC7, 0x2,  C0|C1, "simd_inst_retired.scalar_single" },      	\
   1000 	{ 0xC7, 0x4,  C0|C1, "simd_inst_retired.packed_double" },       \
   1001 	{ 0xC7, 0x8,  C0|C1, "simd_inst_retired.scalar_double" },       \
   1002 	{ 0xC7, 0x10, C0|C1, "simd_inst_retired.vector" },              \
   1003 	{ 0xC7, 0x1F, C0|C1, "simd_inst_retired.any" },                 \
   1004 	{ 0xC8, 0x00, C0|C1, "hw_int_rcv" },                            \
   1005 	{ 0xCA, 0x1,  C0|C1, "simd_comp_inst_retired.packed_single" },  \
   1006 	{ 0xCA, 0x2,  C0|C1, "simd_comp_inst_retired.scalar_single" }, 	\
   1007 	{ 0xCA, 0x4,  C0|C1, "simd_comp_inst_retired.packed_double" },  \
   1008 	{ 0xCA, 0x8,  C0|C1, "simd_comp_inst_retired.scalar_double" },  \
   1009 	{ 0xCB, 0x1,  C0|C1, "mem_load_retired.l2_hit" },               \
   1010 	{ 0xCB, 0x2,  C0|C1, "mem_load_retired.l2_miss" },              \
   1011 	{ 0xCB, 0x4,  C0|C1, "mem_load_retired.dtlb_miss" },           	\
   1012 	{ 0xCD, 0x0,  C0|C1, "simd_assist" },                           \
   1013 	{ 0xCE, 0x0,  C0|C1, "simd_instr_retired" },                    \
   1014 	{ 0xCF, 0x0,  C0|C1, "simd_sat_instr_retired" },                \
   1015 	{ 0xE0, 0x1,  C0|C1, "br_inst_decoded" },                       \
   1016 	{ 0xE4, 0x1,  C0|C1, "bogus_br" },                             	\
   1017 	{ 0xE6, 0x1,  C0|C1, "baclears.any" }
   1018 
   1019 static const struct events_table_t *events_table = NULL;
   1020 
   1021 const struct events_table_t events_fam6_nhm[] = {
   1022 	GENERICEVENTS_FAM6_NHM,
   1023 	EVENTS_FAM6_NHM,
   1024 	{ NT_END, 0, 0, "" }
   1025 };
   1026 
   1027 const struct events_table_t events_fam6_mod28[] = {
   1028 	GENERICEVENTS_FAM6_MOD28,
   1029 	EVENTS_FAM6_MOD28,
   1030 	{ NT_END, 0, 0, "" }
   1031 };
   1032 
   1033 /*
   1034  * Initialize string containing list of supported general-purpose counter
   1035  * events for processors of Penryn and Merom Family
   1036  */
   1037 static void
   1038 pcbe_init_core_uarch()
   1039 {
   1040 	const struct nametable_core_uarch	*n;
   1041 	const struct generic_events		*k;
   1042 	const struct nametable_core_uarch	*picspecific_events;
   1043 	const struct generic_events		*picspecific_genericevents;
   1044 	size_t			common_size;
   1045 	size_t			size;
   1046 	uint64_t		i;
   1047 
   1048 	gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
   1049 
   1050 	/* Calculate space needed to save all the common event names */
   1051 	common_size = 0;
   1052 	for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END; n++) {
   1053 		common_size += strlen(n->name) + 1;
   1054 	}
   1055 
   1056 	for (k = cmn_generic_events; k->event_num != NT_END; k++) {
   1057 		common_size += strlen(k->name) + 1;
   1058 	}
   1059 
   1060 	for (i = 0; i < num_gpc; i++) {
   1061 		size = 0;
   1062 		picspecific_genericevents = NULL;
   1063 
   1064 		switch (i) {
   1065 			case 0:
   1066 				picspecific_events = pic0_events;
   1067 				picspecific_genericevents = generic_events_pic0;
   1068 				break;
   1069 			case 1:
   1070 				picspecific_events = pic1_events;
   1071 				break;
   1072 			default:
   1073 				picspecific_events = NULL;
   1074 				break;
   1075 		}
   1076 		if (picspecific_events != NULL) {
   1077 			for (n = picspecific_events;
   1078 			    n->event_num != NT_END;
   1079 			    n++) {
   1080 				size += strlen(n->name) + 1;
   1081 			}
   1082 		}
   1083 		if (picspecific_genericevents != NULL) {
   1084 			for (k = picspecific_genericevents;
   1085 			    k->event_num != NT_END; k++) {
   1086 				size += strlen(k->name) + 1;
   1087 			}
   1088 		}
   1089 
   1090 		gpc_names[i] =
   1091 		    kmem_alloc(size + common_size + 1, KM_SLEEP);
   1092 
   1093 		gpc_names[i][0] = '\0';
   1094 		if (picspecific_events != NULL) {
   1095 			for (n = picspecific_events;
   1096 			    n->event_num != NT_END; n++) {
   1097 				(void) strcat(gpc_names[i], n->name);
   1098 				(void) strcat(gpc_names[i], ",");
   1099 			}
   1100 		}
   1101 		if (picspecific_genericevents != NULL) {
   1102 			for (k = picspecific_genericevents;
   1103 			    k->event_num != NT_END; k++) {
   1104 				(void) strcat(gpc_names[i], k->name);
   1105 				(void) strcat(gpc_names[i], ",");
   1106 			}
   1107 		}
   1108 		for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END;
   1109 		    n++) {
   1110 			(void) strcat(gpc_names[i], n->name);
   1111 			(void) strcat(gpc_names[i], ",");
   1112 		}
   1113 		for (k = cmn_generic_events; k->event_num != NT_END; k++) {
   1114 			(void) strcat(gpc_names[i], k->name);
   1115 			(void) strcat(gpc_names[i], ",");
   1116 		}
   1117 
   1118 		/*
   1119 		 * Remove trailing comma.
   1120 		 */
   1121 		gpc_names[i][common_size + size - 1] = '\0';
   1122 	}
   1123 }
   1124 
   1125 static int
   1126 core_pcbe_init(void)
   1127 {
   1128 	struct cpuid_regs	cp;
   1129 	size_t			size;
   1130 	uint64_t		i;
   1131 	uint64_t		j;
   1132 	uint64_t		arch_events_vector_length;
   1133 	size_t			arch_events_string_length;
   1134 	uint_t			model;
   1135 
   1136 	if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
   1137 		return (-1);
   1138 
   1139 	/* Obtain Basic CPUID information */
   1140 	cp.cp_eax = 0x0;
   1141 	(void) __cpuid_insn(&cp);
   1142 
   1143 	/* No Architectural Performance Monitoring Leaf returned by CPUID */
   1144 	if (cp.cp_eax < 0xa) {
   1145 		return (-1);
   1146 	}
   1147 
   1148 	/* Obtain the Architectural Performance Monitoring Leaf */
   1149 	cp.cp_eax = 0xa;
   1150 	(void) __cpuid_insn(&cp);
   1151 
   1152 	versionid = cp.cp_eax & 0xFF;
   1153 
   1154 	/*
   1155 	 * Fixed-Function Counters (FFC)
   1156 	 *
   1157 	 * All Family 6 Model 15 and Model 23 processors have fixed-function
   1158 	 * counters.  These counters were made Architectural with
   1159 	 * Family 6 Model 15 Stepping 9.
   1160 	 */
   1161 	switch (versionid) {
   1162 
   1163 		case 0:
   1164 			return (-1);
   1165 
   1166 		case 2:
   1167 			num_ffc = cp.cp_edx & 0x1F;
   1168 			width_ffc = (cp.cp_edx >> 5) & 0xFF;
   1169 
   1170 			/*
   1171 			 * Some processors have an errata (AW34) where
   1172 			 * versionid is reported as 2 when actually 1.
   1173 			 * In this case, fixed-function counters are
   1174 			 * model-specific as in Version 1.
   1175 			 */
   1176 			if (num_ffc != 0) {
   1177 				break;
   1178 			}
   1179 			/* FALLTHROUGH */
   1180 		case 1:
   1181 			num_ffc = 3;
   1182 			width_ffc = 40;
   1183 			versionid = 1;
   1184 			break;
   1185 
   1186 		default:
   1187 			num_ffc = cp.cp_edx & 0x1F;
   1188 			width_ffc = (cp.cp_edx >> 5) & 0xFF;
   1189 			break;
   1190 	}
   1191 
   1192 
   1193 	if (num_ffc >= 64)
   1194 		return (-1);
   1195 
   1196 	/* Set HTT-specific names of architectural & FFC events */
   1197 	if (x86_feature & X86_HTT) {
   1198 		ffc_names = ffc_names_htt;
   1199 		arch_events_table = arch_events_table_htt;
   1200 		known_arch_events =
   1201 		    sizeof (arch_events_table_htt) /
   1202 		    sizeof (struct events_table_t);
   1203 		known_ffc_num =
   1204 		    sizeof (ffc_names_htt) / sizeof (char *);
   1205 	} else {
   1206 		ffc_names = ffc_names_non_htt;
   1207 		arch_events_table = arch_events_table_non_htt;
   1208 		known_arch_events =
   1209 		    sizeof (arch_events_table_non_htt) /
   1210 		    sizeof (struct events_table_t);
   1211 		known_ffc_num =
   1212 		    sizeof (ffc_names_non_htt) / sizeof (char *);
   1213 	}
   1214 
   1215 	if (num_ffc >= known_ffc_num) {
   1216 		/*
   1217 		 * The system seems to have more fixed-function counters than
   1218 		 * what this PCBE is able to handle correctly.  Default to the
   1219 		 * maximum number of fixed-function counters that this driver
   1220 		 * is aware of.
   1221 		 */
   1222 		num_ffc = known_ffc_num - 1;
   1223 	}
   1224 
   1225 	mask_ffc = BITMASK_XBITS(width_ffc);
   1226 	control_ffc = BITMASK_XBITS(num_ffc);
   1227 
   1228 	/*
   1229 	 * General Purpose Counters (GPC)
   1230 	 */
   1231 	num_gpc = (cp.cp_eax >> 8) & 0xFF;
   1232 	width_gpc = (cp.cp_eax >> 16) & 0xFF;
   1233 
   1234 	if (num_gpc >= 64)
   1235 		return (-1);
   1236 
   1237 	mask_gpc = BITMASK_XBITS(width_gpc);
   1238 
   1239 	control_gpc = BITMASK_XBITS(num_gpc);
   1240 
   1241 	control_mask = (control_ffc << 32) | control_gpc;
   1242 
   1243 	total_pmc = num_gpc + num_ffc;
   1244 	if (total_pmc > 64) {
   1245 		/* Too wide for the overflow bitmap */
   1246 		return (-1);
   1247 	}
   1248 
   1249 	/* FFC names */
   1250 	ffc_allnames = kmem_alloc(num_ffc * sizeof (char *), KM_SLEEP);
   1251 	for (i = 0; i < num_ffc; i++) {
   1252 		ffc_allnames[i] = kmem_alloc(
   1253 		    strlen(ffc_names[i]) + strlen(ffc_genericnames[i]) + 2,
   1254 		    KM_SLEEP);
   1255 
   1256 		ffc_allnames[i][0] = '\0';
   1257 		(void) strcat(ffc_allnames[i], ffc_names[i]);
   1258 
   1259 		/* Check if this ffc has a generic name */
   1260 		if (strcmp(ffc_genericnames[i], "") != 0) {
   1261 			(void) strcat(ffc_allnames[i], ",");
   1262 			(void) strcat(ffc_allnames[i], ffc_genericnames[i]);
   1263 		}
   1264 	}
   1265 
   1266 	/* GPC events for Family 6 Models 15, 23 and 29 only */
   1267 	if ((cpuid_getfamily(CPU) == 6) &&
   1268 	    ((cpuid_getmodel(CPU) == 15) || (cpuid_getmodel(CPU) == 23) ||
   1269 	    (cpuid_getmodel(CPU) == 29))) {
   1270 		(void) snprintf(core_impl_name, IMPL_NAME_LEN,
   1271 		    "Core Microarchitecture");
   1272 		pcbe_init_core_uarch();
   1273 		return (0);
   1274 	}
   1275 
   1276 	(void) snprintf(core_impl_name, IMPL_NAME_LEN,
   1277 	    "Intel Arch PerfMon v%d on Family %d Model %d",
   1278 	    versionid, cpuid_getfamily(CPU), cpuid_getmodel(CPU));
   1279 
   1280 	/*
   1281 	 * Architectural events
   1282 	 */
   1283 	arch_events_vector_length = (cp.cp_eax >> 24) & 0xFF;
   1284 
   1285 	ASSERT(known_arch_events == arch_events_vector_length);
   1286 
   1287 	/*
   1288 	 * To handle the case where a new performance monitoring setup is run
   1289 	 * on a non-debug kernel
   1290 	 */
   1291 	if (known_arch_events > arch_events_vector_length) {
   1292 		known_arch_events = arch_events_vector_length;
   1293 	} else {
   1294 		arch_events_vector_length = known_arch_events;
   1295 	}
   1296 
   1297 	arch_events_vector = cp.cp_ebx &
   1298 	    BITMASK_XBITS(arch_events_vector_length);
   1299 
   1300 	/*
   1301 	 * Process architectural and non-architectural events using GPC
   1302 	 */
   1303 	if (num_gpc > 0) {
   1304 
   1305 		gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
   1306 
   1307 		/* Calculate space required for the architectural gpc events */
   1308 		arch_events_string_length = 0;
   1309 		for (i = 0; i < known_arch_events; i++) {
   1310 			if (((1U << i) & arch_events_vector) == 0) {
   1311 				arch_events_string_length +=
   1312 				    strlen(arch_events_table[i].name) + 1;
   1313 				if (strcmp(arch_genevents_table[i], "") != 0) {
   1314 					arch_events_string_length +=
   1315 					    strlen(arch_genevents_table[i]) + 1;
   1316 				}
   1317 			}
   1318 		}
   1319 
   1320 		/* Non-architectural events list */
   1321 		model = cpuid_getmodel(CPU);
   1322 		switch (model) {
   1323 			/* Nehalem */
   1324 			case 26:
   1325 			case 30:
   1326 			case 31:
   1327 			/* Westmere */
   1328 			case 37:
   1329 			case 44:
   1330 			/* Nehalem-EX */
   1331 			case 46:
   1332 				events_table = events_fam6_nhm;
   1333 				break;
   1334 			case 28:
   1335 				events_table = events_fam6_mod28;
   1336 				break;
   1337 		}
   1338 
   1339 		for (i = 0; i < num_gpc; i++) {
   1340 
   1341 			/*
   1342 			 * Determine length of all supported event names
   1343 			 * (architectural + non-architectural)
   1344 			 */
   1345 			size = arch_events_string_length;
   1346 			for (j = 0; events_table != NULL &&
   1347 			    events_table[j].eventselect != NT_END;
   1348 			    j++) {
   1349 				if (C(i) & events_table[j].supported_counters) {
   1350 					size += strlen(events_table[j].name) +
   1351 					    1;
   1352 				}
   1353 			}
   1354 
   1355 			/* Allocate memory for this pics list */
   1356 			gpc_names[i] = kmem_alloc(size + 1, KM_SLEEP);
   1357 			gpc_names[i][0] = '\0';
   1358 			if (size == 0) {
   1359 				continue;
   1360 			}
   1361 
   1362 			/*
   1363 			 * Create the list of all supported events
   1364 			 * (architectural + non-architectural)
   1365 			 */
   1366 			for (j = 0; j < known_arch_events; j++) {
   1367 				if (((1U << j) & arch_events_vector) == 0) {
   1368 					(void) strcat(gpc_names[i],
   1369 					    arch_events_table[j].name);
   1370 					(void) strcat(gpc_names[i], ",");
   1371 					if (strcmp(
   1372 					    arch_genevents_table[j], "")
   1373 					    != 0) {
   1374 						(void) strcat(gpc_names[i],
   1375 						    arch_genevents_table[j]);
   1376 						(void) strcat(gpc_names[i],
   1377 						    ",");
   1378 					}
   1379 				}
   1380 			}
   1381 
   1382 			for (j = 0; events_table != NULL &&
   1383 			    events_table[j].eventselect != NT_END;
   1384 			    j++) {
   1385 				if (C(i) & events_table[j].supported_counters) {
   1386 					(void) strcat(gpc_names[i],
   1387 					    events_table[j].name);
   1388 					(void) strcat(gpc_names[i], ",");
   1389 				}
   1390 			}
   1391 
   1392 			/* Remove trailing comma */
   1393 			gpc_names[i][size - 1] = '\0';
   1394 		}
   1395 	}
   1396 
   1397 	return (0);
   1398 }
   1399 
   1400 static uint_t core_pcbe_ncounters()
   1401 {
   1402 	return (total_pmc);
   1403 }
   1404 
   1405 static const char *core_pcbe_impl_name(void)
   1406 {
   1407 	return (core_impl_name);
   1408 }
   1409 
   1410 static const char *core_pcbe_cpuref(void)
   1411 {
   1412 	return (core_cpuref);
   1413 }
   1414 
   1415 static char *core_pcbe_list_events(uint_t picnum)
   1416 {
   1417 	ASSERT(picnum < cpc_ncounters);
   1418 
   1419 	if (picnum < num_gpc) {
   1420 		return (gpc_names[picnum]);
   1421 	} else {
   1422 		return (ffc_allnames[picnum - num_gpc]);
   1423 	}
   1424 }
   1425 
   1426 static char *core_pcbe_list_attrs(void)
   1427 {
   1428 	if (versionid >= 3) {
   1429 		return ("edge,inv,umask,cmask,anythr");
   1430 	} else {
   1431 		return ("edge,pc,inv,umask,cmask");
   1432 	}
   1433 }
   1434 
   1435 static const struct nametable_core_uarch *
   1436 find_gpcevent_core_uarch(char *name,
   1437     const struct nametable_core_uarch *nametable)
   1438 {
   1439 	const struct nametable_core_uarch *n;
   1440 	int compare_result = -1;
   1441 
   1442 	for (n = nametable; n->event_num != NT_END; n++) {
   1443 		compare_result = strcmp(name, n->name);
   1444 		if (compare_result <= 0) {
   1445 			break;
   1446 		}
   1447 	}
   1448 
   1449 	if (compare_result == 0) {
   1450 		return (n);
   1451 	}
   1452 
   1453 	return (NULL);
   1454 }
   1455 
   1456 static const struct generic_events *
   1457 find_generic_events(char *name, const struct generic_events *table)
   1458 {
   1459 	const struct generic_events *n;
   1460 
   1461 	for (n = table; n->event_num != NT_END; n++) {
   1462 		if (strcmp(name, n->name) == 0) {
   1463 			return (n);
   1464 		};
   1465 	}
   1466 
   1467 	return (NULL);
   1468 }
   1469 
   1470 static const struct events_table_t *
   1471 find_gpcevent(char *name)
   1472 {
   1473 	int i;
   1474 
   1475 	/* Search architectural events */
   1476 	for (i = 0; i < known_arch_events; i++) {
   1477 		if (strcmp(name, arch_events_table[i].name) == 0 ||
   1478 		    strcmp(name, arch_genevents_table[i]) == 0) {
   1479 			if (((1U << i) & arch_events_vector) == 0) {
   1480 				return (&arch_events_table[i]);
   1481 			}
   1482 		}
   1483 	}
   1484 
   1485 	/* Search non-architectural events */
   1486 	if (events_table != NULL) {
   1487 		for (i = 0; events_table[i].eventselect != NT_END; i++) {
   1488 			if (strcmp(name, events_table[i].name) == 0) {
   1489 				return (&events_table[i]);
   1490 			}
   1491 		}
   1492 	}
   1493 
   1494 	return (NULL);
   1495 }
   1496 
   1497 static uint64_t
   1498 core_pcbe_event_coverage(char *event)
   1499 {
   1500 	uint64_t bitmap;
   1501 	uint64_t bitmask;
   1502 	const struct events_table_t *n;
   1503 	int i;
   1504 
   1505 	bitmap = 0;
   1506 
   1507 	/* Is it an event that a GPC can track? */
   1508 	if (versionid >= 3) {
   1509 		n = find_gpcevent(event);
   1510 		if (n != NULL) {
   1511 			bitmap |= (n->supported_counters &
   1512 			    BITMASK_XBITS(num_gpc));
   1513 		}
   1514 	} else {
   1515 		if (find_generic_events(event, cmn_generic_events) != NULL) {
   1516 			bitmap |= BITMASK_XBITS(num_gpc);
   1517 		} if (find_generic_events(event, generic_events_pic0) != NULL) {
   1518 			bitmap |= 1ULL;
   1519 		} else if (find_gpcevent_core_uarch(event,
   1520 		    cmn_gpc_events_core_uarch) != NULL) {
   1521 			bitmap |= BITMASK_XBITS(num_gpc);
   1522 		} else if (find_gpcevent_core_uarch(event, pic0_events) !=
   1523 		    NULL) {
   1524 			bitmap |= 1ULL;
   1525 		} else if (find_gpcevent_core_uarch(event, pic1_events) !=
   1526 		    NULL) {
   1527 			bitmap |= 1ULL << 1;
   1528 		}
   1529 	}
   1530 
   1531 	/* Check if the event can be counted in the fixed-function counters */
   1532 	if (num_ffc > 0) {
   1533 		bitmask = 1ULL << num_gpc;
   1534 		for (i = 0; i < num_ffc; i++) {
   1535 			if (strcmp(event, ffc_names[i]) == 0) {
   1536 				bitmap |= bitmask;
   1537 			} else if (strcmp(event, ffc_genericnames[i]) == 0) {
   1538 				bitmap |= bitmask;
   1539 			}
   1540 			bitmask = bitmask << 1;
   1541 		}
   1542 	}
   1543 
   1544 	return (bitmap);
   1545 }
   1546 
   1547 static uint64_t
   1548 core_pcbe_overflow_bitmap(void)
   1549 {
   1550 	uint64_t interrupt_status;
   1551 	uint64_t intrbits_ffc;
   1552 	uint64_t intrbits_gpc;
   1553 	extern int kcpc_hw_overflow_intr_installed;
   1554 	uint64_t overflow_bitmap;
   1555 
   1556 	RDMSR(PERF_GLOBAL_STATUS, interrupt_status);
   1557 	WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status);
   1558 
   1559 	interrupt_status = interrupt_status & control_mask;
   1560 	intrbits_ffc = (interrupt_status >> 32) & control_ffc;
   1561 	intrbits_gpc = interrupt_status & control_gpc;
   1562 	overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc;
   1563 
   1564 	ASSERT(kcpc_hw_overflow_intr_installed);
   1565 	(*kcpc_hw_enable_cpc_intr)();
   1566 
   1567 	return (overflow_bitmap);
   1568 }
   1569 
   1570 static int
   1571 check_cpc_securitypolicy(core_pcbe_config_t *conf,
   1572     const struct nametable_core_uarch *n)
   1573 {
   1574 	if (conf->core_ctl & n->restricted_bits) {
   1575 		if (secpolicy_cpc_cpu(crgetcred()) != 0) {
   1576 			return (CPC_ATTR_REQUIRES_PRIVILEGE);
   1577 		}
   1578 	}
   1579 	return (0);
   1580 }
   1581 
   1582 static int
   1583 configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
   1584     uint_t nattrs, kcpc_attr_t *attrs, void **data)
   1585 {
   1586 	core_pcbe_config_t	conf;
   1587 	const struct nametable_core_uarch	*n;
   1588 	const struct generic_events *k = NULL;
   1589 	const struct nametable_core_uarch	*m;
   1590 	const struct nametable_core_uarch	*picspecific_events;
   1591 	struct nametable_core_uarch	nt_raw = { "", 0x0, 0x0 };
   1592 	uint_t			i;
   1593 	long			event_num;
   1594 	const struct events_table_t *eventcode;
   1595 
   1596 	if (((preset & BITS_EXTENDED_FROM_31) != 0) &&
   1597 	    ((preset & BITS_EXTENDED_FROM_31) !=
   1598 	    BITS_EXTENDED_FROM_31)) {
   1599 
   1600 		/*
   1601 		 * Bits beyond bit-31 in the general-purpose counters can only
   1602 		 * be written to by extension of bit 31.  We cannot preset
   1603 		 * these bits to any value other than all 1s or all 0s.
   1604 		 */
   1605 		return (CPC_ATTRIBUTE_OUT_OF_RANGE);
   1606 	}
   1607 
   1608 	if (versionid >= 3) {
   1609 		eventcode = find_gpcevent(event);
   1610 		if (eventcode != NULL) {
   1611 			if ((C(picnum) & eventcode->supported_counters) == 0) {
   1612 				return (CPC_PIC_NOT_CAPABLE);
   1613 			}
   1614 			if (nattrs > 0 &&
   1615 			    (strncmp("PAPI_", event, 5) == 0)) {
   1616 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
   1617 			}
   1618 			conf.core_ctl = eventcode->eventselect;
   1619 			conf.core_ctl |= eventcode->unitmask <<
   1620 			    CORE_UMASK_SHIFT;
   1621 		} else {
   1622 			/* Event specified as raw event code */
   1623 			if (ddi_strtol(event, NULL, 0, &event_num) != 0) {
   1624 				return (CPC_INVALID_EVENT);
   1625 			}
   1626 			conf.core_ctl = event_num & 0xFF;
   1627 		}
   1628 	} else {
   1629 		if ((k = find_generic_events(event, cmn_generic_events)) !=
   1630 		    NULL ||
   1631 		    (picnum == 0 &&
   1632 		    (k = find_generic_events(event, generic_events_pic0)) !=
   1633 		    NULL)) {
   1634 			if (nattrs > 0) {
   1635 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
   1636 			}
   1637 			conf.core_ctl = k->event_num;
   1638 			conf.core_ctl |= k->umask << CORE_UMASK_SHIFT;
   1639 		} else {
   1640 			/* Not a generic event */
   1641 
   1642 			n = find_gpcevent_core_uarch(event,
   1643 			    cmn_gpc_events_core_uarch);
   1644 			if (n == NULL) {
   1645 				switch (picnum) {
   1646 					case 0:
   1647 						picspecific_events =
   1648 						    pic0_events;
   1649 						break;
   1650 					case 1:
   1651 						picspecific_events =
   1652 						    pic1_events;
   1653 						break;
   1654 					default:
   1655 						picspecific_events = NULL;
   1656 						break;
   1657 				}
   1658 				if (picspecific_events != NULL) {
   1659 					n = find_gpcevent_core_uarch(event,
   1660 					    picspecific_events);
   1661 				}
   1662 			}
   1663 			if (n == NULL) {
   1664 
   1665 				/*
   1666 				 * Check if this is a case where the event was
   1667 				 * specified directly by its event number
   1668 				 * instead of its name string.
   1669 				 */
   1670 				if (ddi_strtol(event, NULL, 0, &event_num) !=
   1671 				    0) {
   1672 					return (CPC_INVALID_EVENT);
   1673 				}
   1674 
   1675 				event_num = event_num & 0xFF;
   1676 
   1677 				/*
   1678 				 * Search the event table to find out if the
   1679 				 * event specified has an privilege
   1680 				 * requirements.  Currently none of the
   1681 				 * pic-specific counters have any privilege
   1682 				 * requirements.  Hence only the table
   1683 				 * cmn_gpc_events_core_uarch is searched.
   1684 				 */
   1685 				for (m = cmn_gpc_events_core_uarch;
   1686 				    m->event_num != NT_END;
   1687 				    m++) {
   1688 					if (event_num == m->event_num) {
   1689 						break;
   1690 					}
   1691 				}
   1692 				if (m->event_num == NT_END) {
   1693 					nt_raw.event_num = (uint8_t)event_num;
   1694 					n = &nt_raw;
   1695 				} else {
   1696 					n = m;
   1697 				}
   1698 			}
   1699 			conf.core_ctl = n->event_num; /* Event Select */
   1700 		}
   1701 	}
   1702 
   1703 
   1704 	conf.core_picno = picnum;
   1705 	conf.core_pictype = CORE_GPC;
   1706 	conf.core_rawpic = preset & mask_gpc;
   1707 
   1708 	conf.core_pes = GPC_BASE_PES + picnum;
   1709 	conf.core_pmc = GPC_BASE_PMC + picnum;
   1710 
   1711 	for (i = 0; i < nattrs; i++) {
   1712 		if (strncmp(attrs[i].ka_name, "umask", 6) == 0) {
   1713 			if ((attrs[i].ka_val | CORE_UMASK_MASK) !=
   1714 			    CORE_UMASK_MASK) {
   1715 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
   1716 			}
   1717 			/* Clear out the default umask */
   1718 			conf.core_ctl &= ~ (CORE_UMASK_MASK <<
   1719 			    CORE_UMASK_SHIFT);
   1720 			/* Use the user provided umask */
   1721 			conf.core_ctl |= attrs[i].ka_val <<
   1722 			    CORE_UMASK_SHIFT;
   1723 		} else  if (strncmp(attrs[i].ka_name, "edge", 6) == 0) {
   1724 			if (attrs[i].ka_val != 0)
   1725 				conf.core_ctl |= CORE_EDGE;
   1726 		} else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) {
   1727 			if (attrs[i].ka_val != 0)
   1728 				conf.core_ctl |= CORE_INV;
   1729 		} else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) {
   1730 			if ((attrs[i].ka_val | CORE_CMASK_MASK) !=
   1731 			    CORE_CMASK_MASK) {
   1732 				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
   1733 			}
   1734 			conf.core_ctl |= attrs[i].ka_val <<
   1735 			    CORE_CMASK_SHIFT;
   1736 		} else if (strncmp(attrs[i].ka_name, "anythr", 7) ==
   1737 		    0) {
   1738 			if (versionid < 3)
   1739 				return (CPC_INVALID_ATTRIBUTE);
   1740 			if (secpolicy_cpc_cpu(crgetcred()) != 0) {
   1741 				return (CPC_ATTR_REQUIRES_PRIVILEGE);
   1742 			}
   1743 			if (attrs[i].ka_val != 0)
   1744 				conf.core_ctl |= CORE_ANYTHR;
   1745 		} else {
   1746 			return (CPC_INVALID_ATTRIBUTE);
   1747 		}
   1748 	}
   1749 
   1750 	if (flags & CPC_COUNT_USER)
   1751 		conf.core_ctl |= CORE_USR;
   1752 	if (flags & CPC_COUNT_SYSTEM)
   1753 		conf.core_ctl |= CORE_OS;
   1754 	if (flags & CPC_OVF_NOTIFY_EMT)
   1755 		conf.core_ctl |= CORE_INT;
   1756 	conf.core_ctl |= CORE_EN;
   1757 
   1758 	if (versionid < 3 && k == NULL) {
   1759 		if (check_cpc_securitypolicy(&conf, n) != 0) {
   1760 			return (CPC_ATTR_REQUIRES_PRIVILEGE);
   1761 		}
   1762 	}
   1763 
   1764 	*data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
   1765 	*((core_pcbe_config_t *)*data) = conf;
   1766 
   1767 	return (0);
   1768 }
   1769 
   1770 static int
   1771 configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
   1772     uint_t nattrs, kcpc_attr_t *attrs, void **data)
   1773 {
   1774 	core_pcbe_config_t	*conf;
   1775 	uint_t			i;
   1776 
   1777 	if (picnum - num_gpc >= num_ffc) {
   1778 		return (CPC_INVALID_PICNUM);
   1779 	}
   1780 
   1781 	if ((strcmp(ffc_names[picnum-num_gpc], event) != 0) &&
   1782 	    (strcmp(ffc_genericnames[picnum-num_gpc], event) != 0)) {
   1783 		return (CPC_INVALID_EVENT);
   1784 	}
   1785 
   1786 	if ((versionid < 3) && (nattrs != 0)) {
   1787 		return (CPC_INVALID_ATTRIBUTE);
   1788 	}
   1789 
   1790 	conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
   1791 	conf->core_ctl = 0;
   1792 
   1793 	for (i = 0; i < nattrs; i++) {
   1794 		if (strncmp(attrs[i].ka_name, "anythr", 7) == 0) {
   1795 			if (secpolicy_cpc_cpu(crgetcred()) != 0) {
   1796 				return (CPC_ATTR_REQUIRES_PRIVILEGE);
   1797 			}
   1798 			if (attrs[i].ka_val != 0) {
   1799 				conf->core_ctl |= CORE_FFC_ANYTHR;
   1800 			}
   1801 		} else {
   1802 			kmem_free(conf, sizeof (core_pcbe_config_t));
   1803 			return (CPC_INVALID_ATTRIBUTE);
   1804 		}
   1805 	}
   1806 
   1807 	conf->core_picno = picnum;
   1808 	conf->core_pictype = CORE_FFC;
   1809 	conf->core_rawpic = preset & mask_ffc;
   1810 	conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc);
   1811 
   1812 	/* All fixed-function counters have the same control register */
   1813 	conf->core_pes = PERF_FIXED_CTR_CTRL;
   1814 
   1815 	if (flags & CPC_COUNT_USER)
   1816 		conf->core_ctl |= CORE_FFC_USR_EN;
   1817 	if (flags & CPC_COUNT_SYSTEM)
   1818 		conf->core_ctl |= CORE_FFC_OS_EN;
   1819 	if (flags & CPC_OVF_NOTIFY_EMT)
   1820 		conf->core_ctl |= CORE_FFC_PMI;
   1821 
   1822 	*data = conf;
   1823 	return (0);
   1824 }
   1825 
   1826 /*ARGSUSED*/
   1827 static int
   1828 core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
   1829     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
   1830     void *token)
   1831 {
   1832 	int			ret;
   1833 	core_pcbe_config_t	*conf;
   1834 
   1835 	/*
   1836 	 * If we've been handed an existing configuration, we need only preset
   1837 	 * the counter value.
   1838 	 */
   1839 	if (*data != NULL) {
   1840 		conf = *data;
   1841 		ASSERT(conf->core_pictype == CORE_GPC ||
   1842 		    conf->core_pictype == CORE_FFC);
   1843 		if (conf->core_pictype == CORE_GPC)
   1844 			conf->core_rawpic = preset & mask_gpc;
   1845 		else /* CORE_FFC */
   1846 			conf->core_rawpic = preset & mask_ffc;
   1847 		return (0);
   1848 	}
   1849 
   1850 	if (picnum >= total_pmc) {
   1851 		return (CPC_INVALID_PICNUM);
   1852 	}
   1853 
   1854 	if (picnum < num_gpc) {
   1855 		ret = configure_gpc(picnum, event, preset, flags,
   1856 		    nattrs, attrs, data);
   1857 	} else {
   1858 		ret = configure_ffc(picnum, event, preset, flags,
   1859 		    nattrs, attrs, data);
   1860 	}
   1861 	return (ret);
   1862 }
   1863 
   1864 static void
   1865 core_pcbe_program(void *token)
   1866 {
   1867 	core_pcbe_config_t	*cfg;
   1868 	uint64_t		perf_global_ctrl;
   1869 	uint64_t		perf_fixed_ctr_ctrl;
   1870 	uint64_t		curcr4;
   1871 
   1872 	core_pcbe_allstop();
   1873 
   1874 	curcr4 = getcr4();
   1875 	if (kcpc_allow_nonpriv(token))
   1876 		/* Allow RDPMC at any ring level */
   1877 		setcr4(curcr4 | CR4_PCE);
   1878 	else
   1879 		/* Allow RDPMC only at ring 0 */
   1880 		setcr4(curcr4 & ~CR4_PCE);
   1881 
   1882 	/* Clear any overflow indicators before programming the counters */
   1883 	WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask);
   1884 
   1885 	cfg = NULL;
   1886 	perf_global_ctrl = 0;
   1887 	perf_fixed_ctr_ctrl = 0;
   1888 	cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL);
   1889 	while (cfg != NULL) {
   1890 		ASSERT(cfg->core_pictype == CORE_GPC ||
   1891 		    cfg->core_pictype == CORE_FFC);
   1892 
   1893 		if (cfg->core_pictype == CORE_GPC) {
   1894 			/*
   1895 			 * General-purpose counter registers have write
   1896 			 * restrictions where only the lower 32-bits can be
   1897 			 * written to.  The rest of the relevant bits are
   1898 			 * written to by extension from bit 31 (all ZEROS if
   1899 			 * bit-31 is ZERO and all ONE if bit-31 is ONE).  This
   1900 			 * makes it possible to write to the counter register
   1901 			 * only values that have all ONEs or all ZEROs in the
   1902 			 * higher bits.
   1903 			 */
   1904 			if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) ||
   1905 			    ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) ==
   1906 			    BITS_EXTENDED_FROM_31)) {
   1907 				/*
   1908 				 * Straighforward case where the higher bits
   1909 				 * are all ZEROs or all ONEs.
   1910 				 */
   1911 				WRMSR(cfg->core_pmc,
   1912 				    (cfg->core_rawpic & mask_gpc));
   1913 			} else {
   1914 				/*
   1915 				 * The high order bits are not all the same.
   1916 				 * We save what is currently in the registers
   1917 				 * and do not write to it.  When we want to do
   1918 				 * a read from this register later (in
   1919 				 * core_pcbe_sample()), we subtract the value
   1920 				 * we save here to get the actual event count.
   1921 				 *
   1922 				 * NOTE: As a result, we will not get overflow
   1923 				 * interrupts as expected.
   1924 				 */
   1925 				RDMSR(cfg->core_pmc, cfg->core_rawpic);
   1926 				cfg->core_rawpic = cfg->core_rawpic & mask_gpc;
   1927 			}
   1928 			WRMSR(cfg->core_pes, cfg->core_ctl);
   1929 			perf_global_ctrl |= 1ull << cfg->core_picno;
   1930 		} else {
   1931 			/*
   1932 			 * Unlike the general-purpose counters, all relevant
   1933 			 * bits of fixed-function counters can be written to.
   1934 			 */
   1935 			WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc);
   1936 
   1937 			/*
   1938 			 * Collect the control bits for all the
   1939 			 * fixed-function counters and write it at one shot
   1940 			 * later in this function
   1941 			 */
   1942 			perf_fixed_ctr_ctrl |= cfg->core_ctl <<
   1943 			    ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE);
   1944 			perf_global_ctrl |=
   1945 			    1ull << (cfg->core_picno - num_gpc + 32);
   1946 		}
   1947 
   1948 		cfg = (core_pcbe_config_t *)
   1949 		    kcpc_next_config(token, cfg, NULL);
   1950 	}
   1951 
   1952 	/* Enable all the counters */
   1953 	WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl);
   1954 	WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl);
   1955 }
   1956 
   1957 static void
   1958 core_pcbe_allstop(void)
   1959 {
   1960 	/* Disable all the counters together */
   1961 	WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED);
   1962 
   1963 	setcr4(getcr4() & ~CR4_PCE);
   1964 }
   1965 
   1966 static void
   1967 core_pcbe_sample(void *token)
   1968 {
   1969 	uint64_t		*daddr;
   1970 	uint64_t		curpic;
   1971 	core_pcbe_config_t	*cfg;
   1972 	uint64_t			counter_mask;
   1973 
   1974 	cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr);
   1975 	while (cfg != NULL) {
   1976 		ASSERT(cfg->core_pictype == CORE_GPC ||
   1977 		    cfg->core_pictype == CORE_FFC);
   1978 
   1979 		curpic = rdmsr(cfg->core_pmc);
   1980 
   1981 		DTRACE_PROBE4(core__pcbe__sample,
   1982 		    uint64_t, cfg->core_pmc,
   1983 		    uint64_t, curpic,
   1984 		    uint64_t, cfg->core_rawpic,
   1985 		    uint64_t, *daddr);
   1986 
   1987 		if (cfg->core_pictype == CORE_GPC) {
   1988 			counter_mask = mask_gpc;
   1989 		} else {
   1990 			counter_mask = mask_ffc;
   1991 		}
   1992 		curpic = curpic & counter_mask;
   1993 		if (curpic >= cfg->core_rawpic) {
   1994 			*daddr += curpic - cfg->core_rawpic;
   1995 		} else {
   1996 			/* Counter overflowed since our last sample */
   1997 			*daddr += counter_mask - (cfg->core_rawpic - curpic) +
   1998 			    1;
   1999 		}
   2000 		cfg->core_rawpic = *daddr & counter_mask;
   2001 
   2002 		cfg =
   2003 		    (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr);
   2004 	}
   2005 }
   2006 
   2007 static void
   2008 core_pcbe_free(void *config)
   2009 {
   2010 	kmem_free(config, sizeof (core_pcbe_config_t));
   2011 }
   2012 
   2013 static struct modlpcbe core_modlpcbe = {
   2014 	&mod_pcbeops,
   2015 	"Core Performance Counters",
   2016 	&core_pcbe_ops
   2017 };
   2018 
   2019 static struct modlinkage core_modl = {
   2020 	MODREV_1,
   2021 	&core_modlpcbe,
   2022 };
   2023 
   2024 int
   2025 _init(void)
   2026 {
   2027 	if (core_pcbe_init() != 0) {
   2028 		return (ENOTSUP);
   2029 	}
   2030 	return (mod_install(&core_modl));
   2031 }
   2032 
   2033 int
   2034 _fini(void)
   2035 {
   2036 	return (mod_remove(&core_modl));
   2037 }
   2038 
   2039 int
   2040 _info(struct modinfo *mi)
   2041 {
   2042 	return (mod_info(&core_modl, mi));
   2043 }
   2044