Home | History | Annotate | Download | only in cpu
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/types.h>
     27 #include <sys/systm.h>
     28 #include <sys/ddi.h>
     29 #include <sys/sysmacros.h>
     30 #include <sys/archsystm.h>
     31 #include <sys/vmsystm.h>
     32 #include <sys/machparam.h>
     33 #include <sys/machsystm.h>
     34 #include <sys/machthread.h>
     35 #include <sys/cpu.h>
     36 #include <sys/cmp.h>
     37 #include <sys/elf_SPARC.h>
     38 #include <vm/vm_dep.h>
     39 #include <vm/hat_sfmmu.h>
     40 #include <vm/seg_kpm.h>
     41 #include <sys/cpuvar.h>
     42 #include <sys/cheetahregs.h>
     43 #include <sys/us3_module.h>
     44 #include <sys/async.h>
     45 #include <sys/cmn_err.h>
     46 #include <sys/debug.h>
     47 #include <sys/dditypes.h>
     48 #include <sys/prom_debug.h>
     49 #include <sys/prom_plat.h>
     50 #include <sys/cpu_module.h>
     51 #include <sys/sysmacros.h>
     52 #include <sys/intreg.h>
     53 #include <sys/clock.h>
     54 #include <sys/platform_module.h>
     55 #include <sys/machtrap.h>
     56 #include <sys/ontrap.h>
     57 #include <sys/panic.h>
     58 #include <sys/memlist.h>
     59 #include <sys/bootconf.h>
     60 #include <sys/ivintr.h>
     61 #include <sys/atomic.h>
     62 #include <sys/taskq.h>
     63 #include <sys/note.h>
     64 #include <sys/ndifm.h>
     65 #include <sys/ddifm.h>
     66 #include <sys/fm/protocol.h>
     67 #include <sys/fm/util.h>
     68 #include <sys/fm/cpu/UltraSPARC-III.h>
     69 #include <sys/fpras_impl.h>
     70 #include <sys/dtrace.h>
     71 #include <sys/watchpoint.h>
     72 #include <sys/plat_ecc_unum.h>
     73 #include <sys/cyclic.h>
     74 #include <sys/errorq.h>
     75 #include <sys/errclassify.h>
     76 #include <sys/pghw.h>
     77 #include <sys/clock_impl.h>
     78 
     79 #ifdef	CHEETAHPLUS_ERRATUM_25
     80 #include <sys/xc_impl.h>
     81 #endif	/* CHEETAHPLUS_ERRATUM_25 */
     82 
     83 ch_cpu_logout_t	clop_before_flush;
     84 ch_cpu_logout_t	clop_after_flush;
     85 uint_t	flush_retries_done = 0;
     86 /*
     87  * Note that 'Cheetah PRM' refers to:
     88  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
     89  */
     90 
     91 /*
     92  * Per CPU pointers to physical address of TL>0 logout data areas.
     93  * These pointers have to be in the kernel nucleus to avoid MMU
     94  * misses.
     95  */
     96 uint64_t ch_err_tl1_paddrs[NCPU];
     97 
     98 /*
     99  * One statically allocated structure to use during startup/DR
    100  * to prevent unnecessary panics.
    101  */
    102 ch_err_tl1_data_t ch_err_tl1_data;
    103 
    104 /*
    105  * Per CPU pending error at TL>0, used by level15 softint handler
    106  */
    107 uchar_t ch_err_tl1_pending[NCPU];
    108 
    109 /*
    110  * For deferred CE re-enable after trap.
    111  */
    112 taskq_t		*ch_check_ce_tq;
    113 
    114 /*
    115  * Internal functions.
    116  */
    117 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
    118 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
    119 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
    120     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
    121 static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt,
    122     uint64_t t_afsr_bit);
    123 static int clear_ecc(struct async_flt *ecc);
    124 #if defined(CPU_IMP_ECACHE_ASSOC)
    125 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
    126 #endif
    127 int cpu_ecache_set_size(struct cpu *cp);
    128 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
    129 int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
    130 uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
    131 int cpu_ectag_pa_to_subblk_state(int cachesize,
    132 				uint64_t subaddr, uint64_t tag);
    133 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
    134 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
    135 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
    136 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
    137 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
    138 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
    139 static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *, char *buf);
    140 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
    141 static void cpu_scrubphys(struct async_flt *aflt);
    142 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
    143     int *, int *);
    144 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
    145 static void cpu_ereport_init(struct async_flt *aflt);
    146 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
    147 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
    148 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
    149     uint64_t nceen, ch_cpu_logout_t *clop);
    150 static int cpu_ce_delayed_ec_logout(uint64_t);
    151 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
    152 static int cpu_error_is_ecache_data(int, uint64_t);
    153 static void cpu_fmri_cpu_set(nvlist_t *, int);
    154 static int cpu_error_to_resource_type(struct async_flt *aflt);
    155 
    156 #ifdef	CHEETAHPLUS_ERRATUM_25
    157 static int mondo_recover_proc(uint16_t, int);
    158 static void cheetah_nudge_init(void);
    159 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
    160     cyc_time_t *when);
    161 static void cheetah_nudge_buddy(void);
    162 #endif	/* CHEETAHPLUS_ERRATUM_25 */
    163 
    164 #if defined(CPU_IMP_L1_CACHE_PARITY)
    165 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
    166 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
    167 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
    168     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
    169 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
    170 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
    171 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
    172 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
    173 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
    174 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
    175 #endif	/* CPU_IMP_L1_CACHE_PARITY */
    176 
    177 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
    178     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
    179     int *segsp, int *banksp, int *mcidp);
    180 
    181 /*
    182  * This table is used to determine which bit(s) is(are) bad when an ECC
    183  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
    184  * of this array have the following semantics:
    185  *
    186  *      00-127  The number of the bad bit, when only one bit is bad.
    187  *      128     ECC bit C0 is bad.
    188  *      129     ECC bit C1 is bad.
    189  *      130     ECC bit C2 is bad.
    190  *      131     ECC bit C3 is bad.
    191  *      132     ECC bit C4 is bad.
    192  *      133     ECC bit C5 is bad.
    193  *      134     ECC bit C6 is bad.
    194  *      135     ECC bit C7 is bad.
    195  *      136     ECC bit C8 is bad.
    196  *	137-143 reserved for Mtag Data and ECC.
    197  *      144(M2) Two bits are bad within a nibble.
    198  *      145(M3) Three bits are bad within a nibble.
    199  *      146(M3) Four bits are bad within a nibble.
    200  *      147(M)  Multiple bits (5 or more) are bad.
    201  *      148     NO bits are bad.
    202  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
    203  */
    204 
    205 #define	C0	128
    206 #define	C1	129
    207 #define	C2	130
    208 #define	C3	131
    209 #define	C4	132
    210 #define	C5	133
    211 #define	C6	134
    212 #define	C7	135
    213 #define	C8	136
    214 #define	MT0	137	/* Mtag Data bit 0 */
    215 #define	MT1	138
    216 #define	MT2	139
    217 #define	MTC0	140	/* Mtag Check bit 0 */
    218 #define	MTC1	141
    219 #define	MTC2	142
    220 #define	MTC3	143
    221 #define	M2	144
    222 #define	M3	145
    223 #define	M4	146
    224 #define	M	147
    225 #define	NA	148
    226 #if defined(JALAPENO) || defined(SERRANO)
    227 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
    228 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
    229 #define	SLAST	S003MEM	/* last special syndrome */
    230 #else /* JALAPENO || SERRANO */
    231 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
    232 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
    233 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
    234 #define	SLAST	S11C	/* last special syndrome */
    235 #endif /* JALAPENO || SERRANO */
    236 #if defined(JALAPENO) || defined(SERRANO)
    237 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
    238 #define	BPAR15	167
    239 #endif	/* JALAPENO || SERRANO */
    240 
    241 static uint8_t ecc_syndrome_tab[] =
    242 {
    243 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
    244 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
    245 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
    246 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
    247 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
    248 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
    249 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
    250 #if defined(JALAPENO) || defined(SERRANO)
    251 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
    252 #else	/* JALAPENO || SERRANO */
    253 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
    254 #endif	/* JALAPENO || SERRANO */
    255 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
    256 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
    257 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
    258 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
    259 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
    260 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
    261 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
    262 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
    263 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
    264 #if defined(JALAPENO) || defined(SERRANO)
    265 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
    266 #else	/* JALAPENO || SERRANO */
    267 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
    268 #endif	/* JALAPENO || SERRANO */
    269 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
    270 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
    271 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
    272 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
    273 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
    274 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
    275 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
    276 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
    277 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
    278 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
    279 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
    280 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
    281 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
    282 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
    283 };
    284 
    285 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
    286 
    287 #if !(defined(JALAPENO) || defined(SERRANO))
    288 /*
    289  * This table is used to determine which bit(s) is(are) bad when a Mtag
    290  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
    291  * of this array have the following semantics:
    292  *
    293  *      -1	Invalid mtag syndrome.
    294  *      137     Mtag Data 0 is bad.
    295  *      138     Mtag Data 1 is bad.
    296  *      139     Mtag Data 2 is bad.
    297  *      140     Mtag ECC 0 is bad.
    298  *      141     Mtag ECC 1 is bad.
    299  *      142     Mtag ECC 2 is bad.
    300  *      143     Mtag ECC 3 is bad.
    301  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
    302  */
    303 short mtag_syndrome_tab[] =
    304 {
    305 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
    306 };
    307 
    308 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
    309 
    310 #else /* !(JALAPENO || SERRANO) */
    311 
    312 #define	BSYND_TBL_SIZE	16
    313 
    314 #endif /* !(JALAPENO || SERRANO) */
    315 
    316 /*
    317  * Types returned from cpu_error_to_resource_type()
    318  */
    319 #define	ERRTYPE_UNKNOWN		0
    320 #define	ERRTYPE_CPU		1
    321 #define	ERRTYPE_MEMORY		2
    322 #define	ERRTYPE_ECACHE_DATA	3
    323 
    324 /*
    325  * CE initial classification and subsequent action lookup table
    326  */
    327 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
    328 static int ce_disp_inited;
    329 
    330 /*
    331  * Set to disable leaky and partner check for memory correctables
    332  */
    333 int ce_xdiag_off;
    334 
    335 /*
    336  * The following are not incremented atomically so are indicative only
    337  */
    338 static int ce_xdiag_drops;
    339 static int ce_xdiag_lkydrops;
    340 static int ce_xdiag_ptnrdrops;
    341 static int ce_xdiag_bad;
    342 
    343 /*
    344  * CE leaky check callback structure
    345  */
    346 typedef struct {
    347 	struct async_flt *lkycb_aflt;
    348 	errorq_t *lkycb_eqp;
    349 	errorq_elem_t *lkycb_eqep;
    350 } ce_lkychk_cb_t;
    351 
    352 /*
    353  * defines for various ecache_flush_flag's
    354  */
    355 #define	ECACHE_FLUSH_LINE	1
    356 #define	ECACHE_FLUSH_ALL	2
    357 
    358 /*
    359  * STICK sync
    360  */
    361 #define	STICK_ITERATION 10
    362 #define	MAX_TSKEW	1
    363 #define	EV_A_START	0
    364 #define	EV_A_END	1
    365 #define	EV_B_START	2
    366 #define	EV_B_END	3
    367 #define	EVENTS		4
    368 
    369 static int64_t stick_iter = STICK_ITERATION;
    370 static int64_t stick_tsk = MAX_TSKEW;
    371 
    372 typedef enum {
    373 	EVENT_NULL = 0,
    374 	SLAVE_START,
    375 	SLAVE_CONT,
    376 	MASTER_START
    377 } event_cmd_t;
    378 
    379 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
    380 static int64_t timestamp[EVENTS];
    381 static volatile int slave_done;
    382 
    383 #ifdef DEBUG
    384 #define	DSYNC_ATTEMPTS 64
    385 typedef struct {
    386 	int64_t	skew_val[DSYNC_ATTEMPTS];
    387 } ss_t;
    388 
    389 ss_t stick_sync_stats[NCPU];
    390 #endif /* DEBUG */
    391 
    392 uint_t cpu_impl_dual_pgsz = 0;
    393 #if defined(CPU_IMP_DUAL_PAGESIZE)
    394 uint_t disable_dual_pgsz = 0;
    395 #endif	/* CPU_IMP_DUAL_PAGESIZE */
    396 
    397 /*
    398  * Save the cache bootup state for use when internal
    399  * caches are to be re-enabled after an error occurs.
    400  */
    401 uint64_t cache_boot_state;
    402 
    403 /*
    404  * PA[22:0] represent Displacement in Safari configuration space.
    405  */
    406 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
    407 
    408 bus_config_eclk_t bus_config_eclk[] = {
    409 #if defined(JALAPENO) || defined(SERRANO)
    410 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
    411 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
    412 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
    413 #else /* JALAPENO || SERRANO */
    414 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
    415 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
    416 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
    417 #endif /* JALAPENO || SERRANO */
    418 	{0, 0}
    419 };
    420 
    421 /*
    422  * Interval for deferred CEEN reenable
    423  */
    424 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
    425 
    426 /*
    427  * set in /etc/system to control logging of user BERR/TO's
    428  */
    429 int cpu_berr_to_verbose = 0;
    430 
    431 /*
    432  * set to 0 in /etc/system to defer CEEN reenable for all CEs
    433  */
    434 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
    435 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
    436 
    437 /*
    438  * Set of all offline cpus
    439  */
    440 cpuset_t cpu_offline_set;
    441 
    442 static void cpu_delayed_check_ce_errors(void *);
    443 static void cpu_check_ce_errors(void *);
    444 void cpu_error_ecache_flush(ch_async_flt_t *);
    445 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
    446 static void cpu_log_and_clear_ce(ch_async_flt_t *);
    447 void cpu_ce_detected(ch_cpu_errors_t *, int);
    448 
    449 /*
    450  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
    451  * memory refresh interval of current DIMMs (64ms).  After initial fix that
    452  * gives at least one full refresh cycle in which the cell can leak
    453  * (whereafter further refreshes simply reinforce any incorrect bit value).
    454  */
    455 clock_t cpu_ce_lkychk_timeout_usec = 128000;
    456 
    457 /*
    458  * CE partner check partner caching period in seconds
    459  */
    460 int cpu_ce_ptnr_cachetime_sec = 60;
    461 
    462 /*
    463  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
    464  */
    465 #define	CH_SET_TRAP(ttentry, ttlabel)			\
    466 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
    467 		flush_instr_mem((caddr_t)&ttentry, 32);
    468 
    469 static int min_ecache_size;
    470 static uint_t priv_hcl_1;
    471 static uint_t priv_hcl_2;
    472 static uint_t priv_hcl_4;
    473 static uint_t priv_hcl_8;
    474 
    475 void
    476 cpu_setup(void)
    477 {
    478 	extern int at_flags;
    479 	extern int cpc_has_overflow_intr;
    480 
    481 	/*
    482 	 * Setup chip-specific trap handlers.
    483 	 */
    484 	cpu_init_trap();
    485 
    486 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
    487 
    488 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
    489 
    490 	/*
    491 	 * save the cache bootup state.
    492 	 */
    493 	cache_boot_state = get_dcu() & DCU_CACHE;
    494 
    495 	/*
    496 	 * Due to the number of entries in the fully-associative tlb
    497 	 * this may have to be tuned lower than in spitfire.
    498 	 */
    499 	pp_slots = MIN(8, MAXPP_SLOTS);
    500 
    501 	/*
    502 	 * Block stores do not invalidate all pages of the d$, pagecopy
    503 	 * et. al. need virtual translations with virtual coloring taken
    504 	 * into consideration.  prefetch/ldd will pollute the d$ on the
    505 	 * load side.
    506 	 */
    507 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
    508 
    509 	if (use_page_coloring) {
    510 		do_pg_coloring = 1;
    511 	}
    512 
    513 	isa_list =
    514 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
    515 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
    516 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
    517 
    518 	/*
    519 	 * On Panther-based machines, this should
    520 	 * also include AV_SPARC_POPC too
    521 	 */
    522 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
    523 
    524 	/*
    525 	 * On cheetah, there's no hole in the virtual address space
    526 	 */
    527 	hole_start = hole_end = 0;
    528 
    529 	/*
    530 	 * The kpm mapping window.
    531 	 * kpm_size:
    532 	 *	The size of a single kpm range.
    533 	 *	The overall size will be: kpm_size * vac_colors.
    534 	 * kpm_vbase:
    535 	 *	The virtual start address of the kpm range within the kernel
    536 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
    537 	 */
    538 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
    539 	kpm_size_shift = 43;
    540 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
    541 	kpm_smallpages = 1;
    542 
    543 	/*
    544 	 * The traptrace code uses either %tick or %stick for
    545 	 * timestamping.  We have %stick so we can use it.
    546 	 */
    547 	traptrace_use_stick = 1;
    548 
    549 	/*
    550 	 * Cheetah has a performance counter overflow interrupt
    551 	 */
    552 	cpc_has_overflow_intr = 1;
    553 
    554 #if defined(CPU_IMP_DUAL_PAGESIZE)
    555 	/*
    556 	 * Use Cheetah+ and later dual page size support.
    557 	 */
    558 	if (!disable_dual_pgsz) {
    559 		cpu_impl_dual_pgsz = 1;
    560 	}
    561 #endif	/* CPU_IMP_DUAL_PAGESIZE */
    562 
    563 	/*
    564 	 * Declare that this architecture/cpu combination does fpRAS.
    565 	 */
    566 	fpras_implemented = 1;
    567 
    568 	/*
    569 	 * Setup CE lookup table
    570 	 */
    571 	CE_INITDISPTBL_POPULATE(ce_disp_table);
    572 	ce_disp_inited = 1;
    573 }
    574 
    575 /*
    576  * Called by setcpudelay
    577  */
    578 void
    579 cpu_init_tick_freq(void)
    580 {
    581 	/*
    582 	 * For UltraSPARC III and beyond we want to use the
    583 	 * system clock rate as the basis for low level timing,
    584 	 * due to support of mixed speed CPUs and power managment.
    585 	 */
    586 	if (system_clock_freq == 0)
    587 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
    588 
    589 	sys_tick_freq = system_clock_freq;
    590 }
    591 
    592 #ifdef CHEETAHPLUS_ERRATUM_25
    593 /*
    594  * Tunables
    595  */
    596 int cheetah_bpe_off = 0;
    597 int cheetah_sendmondo_recover = 1;
    598 int cheetah_sendmondo_fullscan = 0;
    599 int cheetah_sendmondo_recover_delay = 5;
    600 
    601 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
    602 
    603 /*
    604  * Recovery Statistics
    605  */
    606 typedef struct cheetah_livelock_entry	{
    607 	int cpuid;		/* fallen cpu */
    608 	int buddy;		/* cpu that ran recovery */
    609 	clock_t lbolt;		/* when recovery started */
    610 	hrtime_t recovery_time;	/* time spent in recovery */
    611 } cheetah_livelock_entry_t;
    612 
    613 #define	CHEETAH_LIVELOCK_NENTRY	32
    614 
    615 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
    616 int cheetah_livelock_entry_nxt;
    617 
    618 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
    619 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
    620 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
    621 		cheetah_livelock_entry_nxt = 0;				\
    622 	}								\
    623 }
    624 
    625 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
    626 
    627 struct {
    628 	hrtime_t hrt;		/* maximum recovery time */
    629 	int recovery;		/* recovered */
    630 	int full_claimed;	/* maximum pages claimed in full recovery */
    631 	int proc_entry;		/* attempted to claim TSB */
    632 	int proc_tsb_scan;	/* tsb scanned */
    633 	int proc_tsb_partscan;	/* tsb partially scanned */
    634 	int proc_tsb_fullscan;	/* whole tsb scanned */
    635 	int proc_claimed;	/* maximum pages claimed in tsb scan */
    636 	int proc_user;		/* user thread */
    637 	int proc_kernel;	/* kernel thread */
    638 	int proc_onflt;		/* bad stack */
    639 	int proc_cpu;		/* null cpu */
    640 	int proc_thread;	/* null thread */
    641 	int proc_proc;		/* null proc */
    642 	int proc_as;		/* null as */
    643 	int proc_hat;		/* null hat */
    644 	int proc_hat_inval;	/* hat contents don't make sense */
    645 	int proc_hat_busy;	/* hat is changing TSBs */
    646 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
    647 	int proc_cnum_bad;	/* cnum out of range */
    648 	int proc_cnum;		/* last cnum processed */
    649 	tte_t proc_tte;		/* last tte processed */
    650 } cheetah_livelock_stat;
    651 
    652 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
    653 
    654 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
    655 	cheetah_livelock_stat.item = value
    656 
    657 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
    658 	if (value > cheetah_livelock_stat.item)		\
    659 		cheetah_livelock_stat.item = value;	\
    660 }
    661 
    662 /*
    663  * Attempt to recover a cpu by claiming every cache line as saved
    664  * in the TSB that the non-responsive cpu is using. Since we can't
    665  * grab any adaptive lock, this is at best an attempt to do so. Because
    666  * we don't grab any locks, we must operate under the protection of
    667  * on_fault().
    668  *
    669  * Return 1 if cpuid could be recovered, 0 if failed.
    670  */
    671 int
    672 mondo_recover_proc(uint16_t cpuid, int bn)
    673 {
    674 	label_t ljb;
    675 	cpu_t *cp;
    676 	kthread_t *t;
    677 	proc_t *p;
    678 	struct as *as;
    679 	struct hat *hat;
    680 	uint_t  cnum;
    681 	struct tsb_info *tsbinfop;
    682 	struct tsbe *tsbep;
    683 	caddr_t tsbp;
    684 	caddr_t end_tsbp;
    685 	uint64_t paddr;
    686 	uint64_t idsr;
    687 	u_longlong_t pahi, palo;
    688 	int pages_claimed = 0;
    689 	tte_t tsbe_tte;
    690 	int tried_kernel_tsb = 0;
    691 	mmu_ctx_t *mmu_ctxp;
    692 
    693 	CHEETAH_LIVELOCK_STAT(proc_entry);
    694 
    695 	if (on_fault(&ljb)) {
    696 		CHEETAH_LIVELOCK_STAT(proc_onflt);
    697 		goto badstruct;
    698 	}
    699 
    700 	if ((cp = cpu[cpuid]) == NULL) {
    701 		CHEETAH_LIVELOCK_STAT(proc_cpu);
    702 		goto badstruct;
    703 	}
    704 
    705 	if ((t = cp->cpu_thread) == NULL) {
    706 		CHEETAH_LIVELOCK_STAT(proc_thread);
    707 		goto badstruct;
    708 	}
    709 
    710 	if ((p = ttoproc(t)) == NULL) {
    711 		CHEETAH_LIVELOCK_STAT(proc_proc);
    712 		goto badstruct;
    713 	}
    714 
    715 	if ((as = p->p_as) == NULL) {
    716 		CHEETAH_LIVELOCK_STAT(proc_as);
    717 		goto badstruct;
    718 	}
    719 
    720 	if ((hat = as->a_hat) == NULL) {
    721 		CHEETAH_LIVELOCK_STAT(proc_hat);
    722 		goto badstruct;
    723 	}
    724 
    725 	if (hat != ksfmmup) {
    726 		CHEETAH_LIVELOCK_STAT(proc_user);
    727 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
    728 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
    729 			goto badstruct;
    730 		}
    731 		tsbinfop = hat->sfmmu_tsb;
    732 		if (tsbinfop == NULL) {
    733 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
    734 			goto badstruct;
    735 		}
    736 		tsbp = tsbinfop->tsb_va;
    737 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
    738 	} else {
    739 		CHEETAH_LIVELOCK_STAT(proc_kernel);
    740 		tsbinfop = NULL;
    741 		tsbp = ktsb_base;
    742 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
    743 	}
    744 
    745 	/* Verify as */
    746 	if (hat->sfmmu_as != as) {
    747 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
    748 		goto badstruct;
    749 	}
    750 
    751 	mmu_ctxp = CPU_MMU_CTXP(cp);
    752 	ASSERT(mmu_ctxp);
    753 	cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum;
    754 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
    755 
    756 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) ||
    757 	    (cnum >= mmu_ctxp->mmu_nctxs)) {
    758 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
    759 		goto badstruct;
    760 	}
    761 
    762 	do {
    763 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
    764 
    765 		/*
    766 		 * Skip TSBs being relocated.  This is important because
    767 		 * we want to avoid the following deadlock scenario:
    768 		 *
    769 		 * 1) when we came in we set ourselves to "in recover" state.
    770 		 * 2) when we try to touch TSB being relocated the mapping
    771 		 *    will be in the suspended state so we'll spin waiting
    772 		 *    for it to be unlocked.
    773 		 * 3) when the CPU that holds the TSB mapping locked tries to
    774 		 *    unlock it it will send a xtrap which will fail to xcall
    775 		 *    us or the CPU we're trying to recover, and will in turn
    776 		 *    enter the mondo code.
    777 		 * 4) since we are still spinning on the locked mapping
    778 		 *    no further progress will be made and the system will
    779 		 *    inevitably hard hang.
    780 		 *
    781 		 * A TSB not being relocated can't begin being relocated
    782 		 * while we're accessing it because we check
    783 		 * sendmondo_in_recover before relocating TSBs.
    784 		 */
    785 		if (hat != ksfmmup &&
    786 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
    787 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
    788 			goto next_tsbinfo;
    789 		}
    790 
    791 		for (tsbep = (struct tsbe *)tsbp;
    792 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
    793 			tsbe_tte = tsbep->tte_data;
    794 
    795 			if (tsbe_tte.tte_val == 0) {
    796 				/*
    797 				 * Invalid tte
    798 				 */
    799 				continue;
    800 			}
    801 			if (tsbe_tte.tte_se) {
    802 				/*
    803 				 * Don't want device registers
    804 				 */
    805 				continue;
    806 			}
    807 			if (tsbe_tte.tte_cp == 0) {
    808 				/*
    809 				 * Must be cached in E$
    810 				 */
    811 				continue;
    812 			}
    813 			if (tsbep->tte_tag.tag_invalid != 0) {
    814 				/*
    815 				 * Invalid tag, ingnore this entry.
    816 				 */
    817 				continue;
    818 			}
    819 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
    820 			idsr = getidsr();
    821 			if ((idsr & (IDSR_NACK_BIT(bn) |
    822 			    IDSR_BUSY_BIT(bn))) == 0) {
    823 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
    824 				goto done;
    825 			}
    826 			pahi = tsbe_tte.tte_pahi;
    827 			palo = tsbe_tte.tte_palo;
    828 			paddr = (uint64_t)((pahi << 32) |
    829 			    (palo << MMU_PAGESHIFT));
    830 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
    831 			    CH_ECACHE_SUBBLK_SIZE);
    832 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
    833 				shipit(cpuid, bn);
    834 			}
    835 			pages_claimed++;
    836 		}
    837 next_tsbinfo:
    838 		if (tsbinfop != NULL)
    839 			tsbinfop = tsbinfop->tsb_next;
    840 		if (tsbinfop != NULL) {
    841 			tsbp = tsbinfop->tsb_va;
    842 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
    843 		} else if (tsbp == ktsb_base) {
    844 			tried_kernel_tsb = 1;
    845 		} else if (!tried_kernel_tsb) {
    846 			tsbp = ktsb_base;
    847 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
    848 			hat = ksfmmup;
    849 			tsbinfop = NULL;
    850 		}
    851 	} while (tsbinfop != NULL ||
    852 	    ((tsbp == ktsb_base) && !tried_kernel_tsb));
    853 
    854 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
    855 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
    856 	no_fault();
    857 	idsr = getidsr();
    858 	if ((idsr & (IDSR_NACK_BIT(bn) |
    859 	    IDSR_BUSY_BIT(bn))) == 0) {
    860 		return (1);
    861 	} else {
    862 		return (0);
    863 	}
    864 
    865 done:
    866 	no_fault();
    867 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
    868 	return (1);
    869 
    870 badstruct:
    871 	no_fault();
    872 	return (0);
    873 }
    874 
    875 /*
    876  * Attempt to claim ownership, temporarily, of every cache line that a
    877  * non-responsive cpu might be using.  This might kick that cpu out of
    878  * this state.
    879  *
    880  * The return value indicates to the caller if we have exhausted all recovery
    881  * techniques. If 1 is returned, it is useless to call this function again
    882  * even for a different target CPU.
    883  */
    884 int
    885 mondo_recover(uint16_t cpuid, int bn)
    886 {
    887 	struct memseg *seg;
    888 	uint64_t begin_pa, end_pa, cur_pa;
    889 	hrtime_t begin_hrt, end_hrt;
    890 	int retval = 0;
    891 	int pages_claimed = 0;
    892 	cheetah_livelock_entry_t *histp;
    893 	uint64_t idsr;
    894 
    895 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
    896 		/*
    897 		 * Wait while recovery takes place
    898 		 */
    899 		while (sendmondo_in_recover) {
    900 			drv_usecwait(1);
    901 		}
    902 		/*
    903 		 * Assume we didn't claim the whole memory. If
    904 		 * the target of this caller is not recovered,
    905 		 * it will come back.
    906 		 */
    907 		return (retval);
    908 	}
    909 
    910 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp);
    911 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, LBOLT_WAITFREE);
    912 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
    913 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
    914 
    915 	begin_hrt = gethrtime_waitfree();
    916 	/*
    917 	 * First try to claim the lines in the TSB the target
    918 	 * may have been using.
    919 	 */
    920 	if (mondo_recover_proc(cpuid, bn) == 1) {
    921 		/*
    922 		 * Didn't claim the whole memory
    923 		 */
    924 		goto done;
    925 	}
    926 
    927 	/*
    928 	 * We tried using the TSB. The target is still
    929 	 * not recovered. Check if complete memory scan is
    930 	 * enabled.
    931 	 */
    932 	if (cheetah_sendmondo_fullscan == 0) {
    933 		/*
    934 		 * Full memory scan is disabled.
    935 		 */
    936 		retval = 1;
    937 		goto done;
    938 	}
    939 
    940 	/*
    941 	 * Try claiming the whole memory.
    942 	 */
    943 	for (seg = memsegs; seg; seg = seg->next) {
    944 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
    945 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
    946 		for (cur_pa = begin_pa; cur_pa < end_pa;
    947 		    cur_pa += MMU_PAGESIZE) {
    948 			idsr = getidsr();
    949 			if ((idsr & (IDSR_NACK_BIT(bn) |
    950 			    IDSR_BUSY_BIT(bn))) == 0) {
    951 				/*
    952 				 * Didn't claim all memory
    953 				 */
    954 				goto done;
    955 			}
    956 			claimlines(cur_pa, MMU_PAGESIZE,
    957 			    CH_ECACHE_SUBBLK_SIZE);
    958 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
    959 				shipit(cpuid, bn);
    960 			}
    961 			pages_claimed++;
    962 		}
    963 	}
    964 
    965 	/*
    966 	 * We did all we could.
    967 	 */
    968 	retval = 1;
    969 
    970 done:
    971 	/*
    972 	 * Update statistics
    973 	 */
    974 	end_hrt = gethrtime_waitfree();
    975 	CHEETAH_LIVELOCK_STAT(recovery);
    976 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
    977 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
    978 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
    979 	    (end_hrt -  begin_hrt));
    980 
    981 	while (cas32(&sendmondo_in_recover, 1, 0) != 1)
    982 		;
    983 
    984 	return (retval);
    985 }
    986 
    987 /*
    988  * This is called by the cyclic framework when this CPU becomes online
    989  */
    990 /*ARGSUSED*/
    991 static void
    992 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
    993 {
    994 
    995 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
    996 	hdlr->cyh_level = CY_LOW_LEVEL;
    997 	hdlr->cyh_arg = NULL;
    998 
    999 	/*
   1000 	 * Stagger the start time
   1001 	 */
   1002 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
   1003 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
   1004 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
   1005 	}
   1006 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
   1007 }
   1008 
   1009 /*
   1010  * Create a low level cyclic to send a xtrap to the next cpu online.
   1011  * However, there's no need to have this running on a uniprocessor system.
   1012  */
   1013 static void
   1014 cheetah_nudge_init(void)
   1015 {
   1016 	cyc_omni_handler_t hdlr;
   1017 
   1018 	if (max_ncpus == 1) {
   1019 		return;
   1020 	}
   1021 
   1022 	hdlr.cyo_online = cheetah_nudge_onln;
   1023 	hdlr.cyo_offline = NULL;
   1024 	hdlr.cyo_arg = NULL;
   1025 
   1026 	mutex_enter(&cpu_lock);
   1027 	(void) cyclic_add_omni(&hdlr);
   1028 	mutex_exit(&cpu_lock);
   1029 }
   1030 
   1031 /*
   1032  * Cyclic handler to wake up buddy
   1033  */
   1034 void
   1035 cheetah_nudge_buddy(void)
   1036 {
   1037 	/*
   1038 	 * Disable kernel preemption to protect the cpu list
   1039 	 */
   1040 	kpreempt_disable();
   1041 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
   1042 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
   1043 		    0, 0);
   1044 	}
   1045 	kpreempt_enable();
   1046 }
   1047 
   1048 #endif	/* CHEETAHPLUS_ERRATUM_25 */
   1049 
   1050 #ifdef SEND_MONDO_STATS
   1051 uint32_t x_one_stimes[64];
   1052 uint32_t x_one_ltimes[16];
   1053 uint32_t x_set_stimes[64];
   1054 uint32_t x_set_ltimes[16];
   1055 uint32_t x_set_cpus[NCPU];
   1056 uint32_t x_nack_stimes[64];
   1057 #endif
   1058 
   1059 /*
   1060  * Note: A version of this function is used by the debugger via the KDI,
   1061  * and must be kept in sync with this version.  Any changes made to this
   1062  * function to support new chips or to accomodate errata must also be included
   1063  * in the KDI-specific version.  See us3_kdi.c.
   1064  */
   1065 void
   1066 send_one_mondo(int cpuid)
   1067 {
   1068 	int busy, nack;
   1069 	uint64_t idsr, starttick, endtick, tick, lasttick;
   1070 	uint64_t busymask;
   1071 #ifdef	CHEETAHPLUS_ERRATUM_25
   1072 	int recovered = 0;
   1073 #endif
   1074 
   1075 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
   1076 	starttick = lasttick = gettick();
   1077 	shipit(cpuid, 0);
   1078 	endtick = starttick + xc_tick_limit;
   1079 	busy = nack = 0;
   1080 #if defined(JALAPENO) || defined(SERRANO)
   1081 	/*
   1082 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
   1083 	 * will be used for dispatching interrupt. For now, assume
   1084 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
   1085 	 * issues with respect to BUSY/NACK pair usage.
   1086 	 */
   1087 	busymask  = IDSR_BUSY_BIT(cpuid);
   1088 #else /* JALAPENO || SERRANO */
   1089 	busymask = IDSR_BUSY;
   1090 #endif /* JALAPENO || SERRANO */
   1091 	for (;;) {
   1092 		idsr = getidsr();
   1093 		if (idsr == 0)
   1094 			break;
   1095 
   1096 		tick = gettick();
   1097 		/*
   1098 		 * If there is a big jump between the current tick
   1099 		 * count and lasttick, we have probably hit a break
   1100 		 * point.  Adjust endtick accordingly to avoid panic.
   1101 		 */
   1102 		if (tick > (lasttick + xc_tick_jump_limit))
   1103 			endtick += (tick - lasttick);
   1104 		lasttick = tick;
   1105 		if (tick > endtick) {
   1106 			if (panic_quiesce)
   1107 				return;
   1108 #ifdef	CHEETAHPLUS_ERRATUM_25
   1109 			if (cheetah_sendmondo_recover && recovered == 0) {
   1110 				if (mondo_recover(cpuid, 0)) {
   1111 					/*
   1112 					 * We claimed the whole memory or
   1113 					 * full scan is disabled.
   1114 					 */
   1115 					recovered++;
   1116 				}
   1117 				tick = gettick();
   1118 				endtick = tick + xc_tick_limit;
   1119 				lasttick = tick;
   1120 				/*
   1121 				 * Recheck idsr
   1122 				 */
   1123 				continue;
   1124 			} else
   1125 #endif	/* CHEETAHPLUS_ERRATUM_25 */
   1126 			{
   1127 				cmn_err(CE_PANIC, "send mondo timeout "
   1128 				    "(target 0x%x) [%d NACK %d BUSY]",
   1129 				    cpuid, nack, busy);
   1130 			}
   1131 		}
   1132 
   1133 		if (idsr & busymask) {
   1134 			busy++;
   1135 			continue;
   1136 		}
   1137 		drv_usecwait(1);
   1138 		shipit(cpuid, 0);
   1139 		nack++;
   1140 		busy = 0;
   1141 	}
   1142 #ifdef SEND_MONDO_STATS
   1143 	{
   1144 		int n = gettick() - starttick;
   1145 		if (n < 8192)
   1146 			x_one_stimes[n >> 7]++;
   1147 		else
   1148 			x_one_ltimes[(n >> 13) & 0xf]++;
   1149 	}
   1150 #endif
   1151 }
   1152 
   1153 void
   1154 syncfpu(void)
   1155 {
   1156 }
   1157 
   1158 /*
   1159  * Return processor specific async error structure
   1160  * size used.
   1161  */
   1162 int
   1163 cpu_aflt_size(void)
   1164 {
   1165 	return (sizeof (ch_async_flt_t));
   1166 }
   1167 
   1168 /*
   1169  * Tunable to disable the checking of other cpu logout areas during panic for
   1170  * potential syndrome 71 generating errors.
   1171  */
   1172 int enable_check_other_cpus_logout = 1;
   1173 
   1174 /*
   1175  * Check other cpus logout area for potential synd 71 generating
   1176  * errors.
   1177  */
   1178 static void
   1179 cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
   1180     ch_cpu_logout_t *clop)
   1181 {
   1182 	struct async_flt *aflt;
   1183 	ch_async_flt_t ch_flt;
   1184 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
   1185 
   1186 	if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
   1187 		return;
   1188 	}
   1189 
   1190 	bzero(&ch_flt, sizeof (ch_async_flt_t));
   1191 
   1192 	t_afar = clop->clo_data.chd_afar;
   1193 	t_afsr = clop->clo_data.chd_afsr;
   1194 	t_afsr_ext = clop->clo_data.chd_afsr_ext;
   1195 #if defined(SERRANO)
   1196 	ch_flt.afar2 = clop->clo_data.chd_afar2;
   1197 #endif	/* SERRANO */
   1198 
   1199 	/*
   1200 	 * In order to simplify code, we maintain this afsr_errs
   1201 	 * variable which holds the aggregate of AFSR and AFSR_EXT
   1202 	 * sticky bits.
   1203 	 */
   1204 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
   1205 	    (t_afsr & C_AFSR_ALL_ERRS);
   1206 
   1207 	/* Setup the async fault structure */
   1208 	aflt = (struct async_flt *)&ch_flt;
   1209 	aflt->flt_id = gethrtime_waitfree();
   1210 	ch_flt.afsr_ext = t_afsr_ext;
   1211 	ch_flt.afsr_errs = t_afsr_errs;
   1212 	aflt->flt_stat = t_afsr;
   1213 	aflt->flt_addr = t_afar;
   1214 	aflt->flt_bus_id = cpuid;
   1215 	aflt->flt_inst = cpuid;
   1216 	aflt->flt_pc = tpc;
   1217 	aflt->flt_prot = AFLT_PROT_NONE;
   1218 	aflt->flt_class = CPU_FAULT;
   1219 	aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
   1220 	aflt->flt_tl = tl;
   1221 	aflt->flt_status = ecc_type;
   1222 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
   1223 
   1224 	/*
   1225 	 * Queue events on the async event queue, one event per error bit.
   1226 	 * If no events are queued, queue an event to complain.
   1227 	 */
   1228 	if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
   1229 		ch_flt.flt_type = CPU_INV_AFSR;
   1230 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
   1231 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
   1232 		    aflt->flt_panic);
   1233 	}
   1234 
   1235 	/*
   1236 	 * Zero out + invalidate CPU logout.
   1237 	 */
   1238 	bzero(clop, sizeof (ch_cpu_logout_t));
   1239 	clop->clo_data.chd_afar = LOGOUT_INVALID;
   1240 }
   1241 
   1242 /*
   1243  * Check the logout areas of all other cpus for unlogged errors.
   1244  */
   1245 static void
   1246 cpu_check_other_cpus_logout(void)
   1247 {
   1248 	int i, j;
   1249 	processorid_t myid;
   1250 	struct cpu *cp;
   1251 	ch_err_tl1_data_t *cl1p;
   1252 
   1253 	myid = CPU->cpu_id;
   1254 	for (i = 0; i < NCPU; i++) {
   1255 		cp = cpu[i];
   1256 
   1257 		if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
   1258 		    (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
   1259 			continue;
   1260 		}
   1261 
   1262 		/*
   1263 		 * Check each of the tl>0 logout areas
   1264 		 */
   1265 		cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
   1266 		for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
   1267 			if (cl1p->ch_err_tl1_flags == 0)
   1268 				continue;
   1269 
   1270 			cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
   1271 			    1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
   1272 		}
   1273 
   1274 		/*
   1275 		 * Check each of the remaining logout areas
   1276 		 */
   1277 		cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
   1278 		    CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
   1279 		cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
   1280 		    CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
   1281 		cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
   1282 		    CPU_PRIVATE_PTR(cp, chpr_async_logout));
   1283 	}
   1284 }
   1285 
   1286 /*
   1287  * The fast_ecc_err handler transfers control here for UCU, UCC events.
   1288  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
   1289  * flush the error that caused the UCU/UCC, then again here at the end to
   1290  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
   1291  * the probability of getting a TL>1 Fast ECC trap when we're fielding
   1292  * another Fast ECC trap.
   1293  *
   1294  * Cheetah+ also handles: TSCE: No additional processing required.
   1295  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
   1296  *
   1297  * Note that the p_clo_flags input is only valid in cases where the
   1298  * cpu_private struct is not yet initialized (since that is the only
   1299  * time that information cannot be obtained from the logout struct.)
   1300  */
   1301 /*ARGSUSED*/
   1302 void
   1303 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
   1304 {
   1305 	ch_cpu_logout_t *clop;
   1306 	uint64_t ceen, nceen;
   1307 
   1308 	/*
   1309 	 * Get the CPU log out info. If we can't find our CPU private
   1310 	 * pointer, then we will have to make due without any detailed
   1311 	 * logout information.
   1312 	 */
   1313 	if (CPU_PRIVATE(CPU) == NULL) {
   1314 		clop = NULL;
   1315 		ceen = p_clo_flags & EN_REG_CEEN;
   1316 		nceen = p_clo_flags & EN_REG_NCEEN;
   1317 	} else {
   1318 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
   1319 		ceen = clop->clo_flags & EN_REG_CEEN;
   1320 		nceen = clop->clo_flags & EN_REG_NCEEN;
   1321 	}
   1322 
   1323 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
   1324 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
   1325 }
   1326 
   1327 /*
   1328  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
   1329  * ECC at TL>0.  Need to supply either a error register pointer or a
   1330  * cpu logout structure pointer.
   1331  */
   1332 static void
   1333 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
   1334     uint64_t nceen, ch_cpu_logout_t *clop)
   1335 {
   1336 	struct async_flt *aflt;
   1337 	ch_async_flt_t ch_flt;
   1338 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
   1339 	char pr_reason[MAX_REASON_STRING];
   1340 	ch_cpu_errors_t cpu_error_regs;
   1341 
   1342 	bzero(&ch_flt, sizeof (ch_async_flt_t));
   1343 	/*
   1344 	 * If no cpu logout data, then we will have to make due without
   1345 	 * any detailed logout information.
   1346 	 */
   1347 	if (clop == NULL) {
   1348 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
   1349 		get_cpu_error_state(&cpu_error_regs);
   1350 		set_cpu_error_state(&cpu_error_regs);
   1351 		t_afar = cpu_error_regs.afar;
   1352 		t_afsr = cpu_error_regs.afsr;
   1353 		t_afsr_ext = cpu_error_regs.afsr_ext;
   1354 #if defined(SERRANO)
   1355 		ch_flt.afar2 = cpu_error_regs.afar2;
   1356 #endif	/* SERRANO */
   1357 	} else {
   1358 		t_afar = clop->clo_data.chd_afar;
   1359 		t_afsr = clop->clo_data.chd_afsr;
   1360 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
   1361 #if defined(SERRANO)
   1362 		ch_flt.afar2 = clop->clo_data.chd_afar2;
   1363 #endif	/* SERRANO */
   1364 	}
   1365 
   1366 	/*
   1367 	 * In order to simplify code, we maintain this afsr_errs
   1368 	 * variable which holds the aggregate of AFSR and AFSR_EXT
   1369 	 * sticky bits.
   1370 	 */
   1371 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
   1372 	    (t_afsr & C_AFSR_ALL_ERRS);
   1373 	pr_reason[0] = '\0';
   1374 
   1375 	/* Setup the async fault structure */
   1376 	aflt = (struct async_flt *)&ch_flt;
   1377 	aflt->flt_id = gethrtime_waitfree();
   1378 	ch_flt.afsr_ext = t_afsr_ext;
   1379 	ch_flt.afsr_errs = t_afsr_errs;
   1380 	aflt->flt_stat = t_afsr;
   1381 	aflt->flt_addr = t_afar;
   1382 	aflt->flt_bus_id = getprocessorid();
   1383 	aflt->flt_inst = CPU->cpu_id;
   1384 	aflt->flt_pc = tpc;
   1385 	aflt->flt_prot = AFLT_PROT_NONE;
   1386 	aflt->flt_class = CPU_FAULT;
   1387 	aflt->flt_priv = priv;
   1388 	aflt->flt_tl = tl;
   1389 	aflt->flt_status = ECC_F_TRAP;
   1390 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
   1391 
   1392 	/*
   1393 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
   1394 	 * cmn_err messages out to the console.  The situation is a UCU (in
   1395 	 * priv mode) which causes a WDU which causes a UE (on the retry).
   1396 	 * The messages for the UCU and WDU are enqueued and then pulled off
   1397 	 * the async queue via softint and syslogd starts to process them
   1398 	 * but doesn't get them to the console.  The UE causes a panic, but
   1399 	 * since the UCU/WDU messages are already in transit, those aren't
   1400 	 * on the async queue.  The hack is to check if we have a matching
   1401 	 * WDU event for the UCU, and if it matches, we're more than likely
   1402 	 * going to panic with a UE, unless we're under protection.  So, we
   1403 	 * check to see if we got a matching WDU event and if we're under
   1404 	 * protection.
   1405 	 *
   1406 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
   1407 	 * looks like this:
   1408 	 *    UCU->WDU->UE
   1409 	 * For Panther, it could look like either of these:
   1410 	 *    UCU---->WDU->L3_WDU->UE
   1411 	 *    L3_UCU->WDU->L3_WDU->UE
   1412 	 */
   1413 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
   1414 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
   1415 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
   1416 		get_cpu_error_state(&cpu_error_regs);
   1417 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
   1418 			aflt->flt_panic |=
   1419 			    ((cpu_error_regs.afsr & C_AFSR_WDU) &&
   1420 			    (cpu_error_regs.afsr_ext & C_AFSR_L3_WDU) &&
   1421 			    (cpu_error_regs.afar == t_afar));
   1422 			aflt->flt_panic |= ((clop == NULL) &&
   1423 			    (t_afsr_errs & C_AFSR_WDU) &&
   1424 			    (t_afsr_errs & C_AFSR_L3_WDU));
   1425 		} else {
   1426 			aflt->flt_panic |=
   1427 			    ((cpu_error_regs.afsr & C_AFSR_WDU) &&
   1428 			    (cpu_error_regs.afar == t_afar));
   1429 			aflt->flt_panic |= ((clop == NULL) &&
   1430 			    (t_afsr_errs & C_AFSR_WDU));
   1431 		}
   1432 	}
   1433 
   1434 	/*
   1435 	 * Queue events on the async event queue, one event per error bit.
   1436 	 * If no events are queued or no Fast ECC events are on in the AFSR,
   1437 	 * queue an event to complain.
   1438 	 */
   1439 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
   1440 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
   1441 		ch_flt.flt_type = CPU_INV_AFSR;
   1442 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
   1443 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
   1444 		    aflt->flt_panic);
   1445 	}
   1446 
   1447 	/*
   1448 	 * Zero out + invalidate CPU logout.
   1449 	 */
   1450 	if (clop) {
   1451 		bzero(clop, sizeof (ch_cpu_logout_t));
   1452 		clop->clo_data.chd_afar = LOGOUT_INVALID;
   1453 	}
   1454 
   1455 	/*
   1456 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
   1457 	 * or disrupting errors have happened.  We do this because if a
   1458 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
   1459 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
   1460 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
   1461 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
   1462 	 * deferred or disrupting error happening between checking the AFSR and
   1463 	 * enabling NCEEN/CEEN.
   1464 	 *
   1465 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
   1466 	 * taken.
   1467 	 */
   1468 	set_error_enable(get_error_enable() | (nceen | ceen));
   1469 	if (clear_errors(&ch_flt)) {
   1470 		aflt->flt_panic |= ((ch_flt.afsr_errs &
   1471 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
   1472 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
   1473 		    NULL);
   1474 	}
   1475 
   1476 	/*
   1477 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
   1478 	 * be logged as part of the panic flow.
   1479 	 */
   1480 	if (aflt->flt_panic)
   1481 		fm_panic("%sError(s)", pr_reason);
   1482 
   1483 	/*
   1484 	 * Flushing the Ecache here gets the part of the trap handler that
   1485 	 * is run at TL=1 out of the Ecache.
   1486 	 */
   1487 	cpu_flush_ecache();
   1488 }
   1489 
   1490 /*
   1491  * This is called via sys_trap from pil15_interrupt code if the
   1492  * corresponding entry in ch_err_tl1_pending is set.  Checks the
   1493  * various ch_err_tl1_data structures for valid entries based on the bit
   1494  * settings in the ch_err_tl1_flags entry of the structure.
   1495  */
   1496 /*ARGSUSED*/
   1497 void
   1498 cpu_tl1_error(struct regs *rp, int panic)
   1499 {
   1500 	ch_err_tl1_data_t *cl1p, cl1;
   1501 	int i, ncl1ps;
   1502 	uint64_t me_flags;
   1503 	uint64_t ceen, nceen;
   1504 
   1505 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
   1506 		cl1p = &ch_err_tl1_data;
   1507 		ncl1ps = 1;
   1508 	} else if (CPU_PRIVATE(CPU) != NULL) {
   1509 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
   1510 		ncl1ps = CH_ERR_TL1_TLMAX;
   1511 	} else {
   1512 		ncl1ps = 0;
   1513 	}
   1514 
   1515 	for (i = 0; i < ncl1ps; i++, cl1p++) {
   1516 		if (cl1p->ch_err_tl1_flags == 0)
   1517 			continue;
   1518 
   1519 		/*
   1520 		 * Grab a copy of the logout data and invalidate
   1521 		 * the logout area.
   1522 		 */
   1523 		cl1 = *cl1p;
   1524 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
   1525 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
   1526 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
   1527 
   1528 		/*
   1529 		 * Log "first error" in ch_err_tl1_data.
   1530 		 */
   1531 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
   1532 			ceen = get_error_enable() & EN_REG_CEEN;
   1533 			nceen = get_error_enable() & EN_REG_NCEEN;
   1534 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
   1535 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
   1536 		}
   1537 #if defined(CPU_IMP_L1_CACHE_PARITY)
   1538 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
   1539 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
   1540 			    (caddr_t)cl1.ch_err_tl1_tpc);
   1541 		}
   1542 #endif	/* CPU_IMP_L1_CACHE_PARITY */
   1543 
   1544 		/*
   1545 		 * Log "multiple events" in ch_err_tl1_data.  Note that
   1546 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
   1547 		 * if the structure is busy, we just do the cache flushing
   1548 		 * we have to do and then do the retry.  So the AFSR/AFAR
   1549 		 * at this point *should* have some relevant info.  If there
   1550 		 * are no valid errors in the AFSR, we'll assume they've
   1551 		 * already been picked up and logged.  For I$/D$ parity,
   1552 		 * we just log an event with an "Unknown" (NULL) TPC.
   1553 		 */
   1554 		if (me_flags & CH_ERR_FECC) {
   1555 			ch_cpu_errors_t cpu_error_regs;
   1556 			uint64_t t_afsr_errs;
   1557 
   1558 			/*
   1559 			 * Get the error registers and see if there's
   1560 			 * a pending error.  If not, don't bother
   1561 			 * generating an "Invalid AFSR" error event.
   1562 			 */
   1563 			get_cpu_error_state(&cpu_error_regs);
   1564 			t_afsr_errs = (cpu_error_regs.afsr_ext &
   1565 			    C_AFSR_EXT_ALL_ERRS) |
   1566 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
   1567 			if (t_afsr_errs != 0) {
   1568 				ceen = get_error_enable() & EN_REG_CEEN;
   1569 				nceen = get_error_enable() & EN_REG_NCEEN;
   1570 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
   1571 				    1, ceen, nceen, NULL);
   1572 			}
   1573 		}
   1574 #if defined(CPU_IMP_L1_CACHE_PARITY)
   1575 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
   1576 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
   1577 		}
   1578 #endif	/* CPU_IMP_L1_CACHE_PARITY */
   1579 	}
   1580 }
   1581 
   1582 /*
   1583  * Called from Fast ECC TL>0 handler in case of fatal error.
   1584  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
   1585  * but if we don't, we'll panic with something reasonable.
   1586  */
   1587 /*ARGSUSED*/
   1588 void
   1589 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
   1590 {
   1591 	cpu_tl1_error(rp, 1);
   1592 	/*
   1593 	 * Should never return, but just in case.
   1594 	 */
   1595 	fm_panic("Unsurvivable ECC Error at TL>0");
   1596 }
   1597 
   1598 /*
   1599  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
   1600  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
   1601  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
   1602  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
   1603  *
   1604  * Cheetah+ also handles (No additional processing required):
   1605  *    DUE, DTO, DBERR	(NCEEN controlled)
   1606  *    THCE		(CEEN and ET_ECC_en controlled)
   1607  *    TUE		(ET_ECC_en controlled)
   1608  *
   1609  * Panther further adds:
   1610  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
   1611  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
   1612  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
   1613  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
   1614  *    THCE			(CEEN and L2_tag_ECC_en controlled)
   1615  *    L3_THCE			(CEEN and ET_ECC_en controlled)
   1616  *
   1617  * Note that the p_clo_flags input is only valid in cases where the
   1618  * cpu_private struct is not yet initialized (since that is the only
   1619  * time that information cannot be obtained from the logout struct.)
   1620  */
   1621 /*ARGSUSED*/
   1622 void
   1623 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
   1624 {
   1625 	struct async_flt *aflt;
   1626 	ch_async_flt_t ch_flt;
   1627 	char pr_reason[MAX_REASON_STRING];
   1628 	ch_cpu_logout_t *clop;
   1629 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
   1630 	ch_cpu_errors_t cpu_error_regs;
   1631 
   1632 	bzero(&ch_flt, sizeof (ch_async_flt_t));
   1633 	/*
   1634 	 * Get the CPU log out info. If we can't find our CPU private
   1635 	 * pointer, then we will have to make due without any detailed
   1636 	 * logout information.
   1637 	 */
   1638 	if (CPU_PRIVATE(CPU) == NULL) {
   1639 		clop = NULL;
   1640 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
   1641 		get_cpu_error_state(&cpu_error_regs);
   1642 		set_cpu_error_state(&cpu_error_regs);
   1643 		t_afar = cpu_error_regs.afar;
   1644 		t_afsr = cpu_error_regs.afsr;
   1645 		t_afsr_ext = cpu_error_regs.afsr_ext;
   1646 #if defined(SERRANO)
   1647 		ch_flt.afar2 = cpu_error_regs.afar2;
   1648 #endif	/* SERRANO */
   1649 	} else {
   1650 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
   1651 		t_afar = clop->clo_data.chd_afar;
   1652 		t_afsr = clop->clo_data.chd_afsr;
   1653 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
   1654 #if defined(SERRANO)
   1655 		ch_flt.afar2 = clop->clo_data.chd_afar2;
   1656 #endif	/* SERRANO */
   1657 	}
   1658 
   1659 	/*
   1660 	 * In order to simplify code, we maintain this afsr_errs
   1661 	 * variable which holds the aggregate of AFSR and AFSR_EXT
   1662 	 * sticky bits.
   1663 	 */
   1664 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
   1665 	    (t_afsr & C_AFSR_ALL_ERRS);
   1666 
   1667 	pr_reason[0] = '\0';
   1668 	/* Setup the async fault structure */
   1669 	aflt = (struct async_flt *)&ch_flt;
   1670 	ch_flt.afsr_ext = t_afsr_ext;
   1671 	ch_flt.afsr_errs = t_afsr_errs;
   1672 	aflt->flt_stat = t_afsr;
   1673 	aflt->flt_addr = t_afar;
   1674 	aflt->flt_pc = (caddr_t)rp->r_pc;
   1675 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
   1676 	aflt->flt_tl = 0;
   1677 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
   1678 
   1679 	/*
   1680 	 * If this trap is a result of one of the errors not masked
   1681 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
   1682 	 * indicate that a timeout is to be set later.
   1683 	 */
   1684 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
   1685 	    !aflt->flt_panic)
   1686 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
   1687 	else
   1688 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
   1689 
   1690 	/*
   1691 	 * log the CE and clean up
   1692 	 */
   1693 	cpu_log_and_clear_ce(&ch_flt);
   1694 
   1695 	/*
   1696 	 * We re-enable CEEN (if required) and check if any disrupting errors
   1697 	 * have happened.  We do this because if a disrupting error had occurred
   1698 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
   1699 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
   1700 	 * we enable CEEN *before* checking the AFSR to avoid the small window
   1701 	 * of a error happening between checking the AFSR and enabling CEEN.
   1702 	 */
   1703 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
   1704 		set_error_enable(get_error_enable() | EN_REG_CEEN);
   1705 	if (clear_errors(&ch_flt)) {
   1706 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
   1707 		    NULL);
   1708 	}
   1709 
   1710 	/*
   1711 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
   1712 	 * be logged as part of the panic flow.
   1713 	 */
   1714 	if (aflt->flt_panic)
   1715 		fm_panic("%sError(s)", pr_reason);
   1716 }
   1717 
   1718 /*
   1719  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
   1720  * L3_EDU:BLD, TO, and BERR events.
   1721  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
   1722  *
   1723  * Cheetah+: No additional errors handled.
   1724  *
   1725  * Note that the p_clo_flags input is only valid in cases where the
   1726  * cpu_private struct is not yet initialized (since that is the only
   1727  * time that information cannot be obtained from the logout struct.)
   1728  */
   1729 /*ARGSUSED*/
   1730 void
   1731 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
   1732 {
   1733 	ushort_t ttype, tl;
   1734 	ch_async_flt_t ch_flt;
   1735 	struct async_flt *aflt;
   1736 	int trampolined = 0;
   1737 	char pr_reason[MAX_REASON_STRING];
   1738 	ch_cpu_logout_t *clop;
   1739 	uint64_t ceen, clo_flags;
   1740 	uint64_t log_afsr;
   1741 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
   1742 	ch_cpu_errors_t cpu_error_regs;
   1743 	int expected = DDI_FM_ERR_UNEXPECTED;
   1744 	ddi_acc_hdl_t *hp;
   1745 
   1746 	/*
   1747 	 * We need to look at p_flag to determine if the thread detected an
   1748 	 * error while dumping core.  We can't grab p_lock here, but it's ok
   1749 	 * because we just need a consistent snapshot and we know that everyone
   1750 	 * else will store a consistent set of bits while holding p_lock.  We
   1751 	 * don't have to worry about a race because SDOCORE is set once prior
   1752 	 * to doing i/o from the process's address space and is never cleared.
   1753 	 */
   1754 	uint_t pflag = ttoproc(curthread)->p_flag;
   1755 
   1756 	bzero(&ch_flt, sizeof (ch_async_flt_t));
   1757 	/*
   1758 	 * Get the CPU log out info. If we can't find our CPU private
   1759 	 * pointer then we will have to make due without any detailed
   1760 	 * logout information.
   1761 	 */
   1762 	if (CPU_PRIVATE(CPU) == NULL) {
   1763 		clop = NULL;
   1764 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
   1765 		get_cpu_error_state(&cpu_error_regs);
   1766 		set_cpu_error_state(&cpu_error_regs);
   1767 		t_afar = cpu_error_regs.afar;
   1768 		t_afsr = cpu_error_regs.afsr;
   1769 		t_afsr_ext = cpu_error_regs.afsr_ext;
   1770 #if defined(SERRANO)
   1771 		ch_flt.afar2 = cpu_error_regs.afar2;
   1772 #endif	/* SERRANO */
   1773 		clo_flags = p_clo_flags;
   1774 	} else {
   1775 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
   1776 		t_afar = clop->clo_data.chd_afar;
   1777 		t_afsr = clop->clo_data.chd_afsr;
   1778 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
   1779 #if defined(SERRANO)
   1780 		ch_flt.afar2 = clop->clo_data.chd_afar2;
   1781 #endif	/* SERRANO */
   1782 		clo_flags = clop->clo_flags;
   1783 	}
   1784 
   1785 	/*
   1786 	 * In order to simplify code, we maintain this afsr_errs
   1787 	 * variable which holds the aggregate of AFSR and AFSR_EXT
   1788 	 * sticky bits.
   1789 	 */
   1790 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
   1791 	    (t_afsr & C_AFSR_ALL_ERRS);
   1792 	pr_reason[0] = '\0';
   1793 
   1794 	/*
   1795 	 * Grab information encoded into our clo_flags field.
   1796 	 */
   1797 	ceen = clo_flags & EN_REG_CEEN;
   1798 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
   1799 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
   1800 
   1801 	/*
   1802 	 * handle the specific error
   1803 	 */
   1804 	aflt = (struct async_flt *)&ch_flt;
   1805 	aflt->flt_id = gethrtime_waitfree();
   1806 	aflt->flt_bus_id = getprocessorid();
   1807 	aflt->flt_inst = CPU->cpu_id;
   1808 	ch_flt.afsr_ext = t_afsr_ext;
   1809 	ch_flt.afsr_errs = t_afsr_errs;
   1810 	aflt->flt_stat = t_afsr;
   1811 	aflt->flt_addr = t_afar;
   1812 	aflt->flt_pc = (caddr_t)rp->r_pc;
   1813 	aflt->flt_prot = AFLT_PROT_NONE;
   1814 	aflt->flt_class = CPU_FAULT;
   1815 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
   1816 	aflt->flt_tl = (uchar_t)tl;
   1817 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
   1818 	    C_AFSR_PANIC(t_afsr_errs));
   1819 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
   1820 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
   1821 
   1822 	/*
   1823 	 * If the trap occurred in privileged mode at TL=0, we need to check to
   1824 	 * see if we were executing in the kernel under on_trap() or t_lofault
   1825 	 * protection.  If so, modify the saved registers so that we return
   1826 	 * from the trap to the appropriate trampoline routine.
   1827 	 */
   1828 	if (aflt->flt_priv && tl == 0) {
   1829 		if (curthread->t_ontrap != NULL) {
   1830 			on_trap_data_t *otp = curthread->t_ontrap;
   1831 
   1832 			if (otp->ot_prot & OT_DATA_EC) {
   1833 				aflt->flt_prot = AFLT_PROT_EC;
   1834 				otp->ot_trap |= OT_DATA_EC;
   1835 				rp->r_pc = otp->ot_trampoline;
   1836 				rp->r_npc = rp->r_pc + 4;
   1837 				trampolined = 1;
   1838 			}
   1839 
   1840 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
   1841 			    (otp->ot_prot & OT_DATA_ACCESS)) {
   1842 				aflt->flt_prot = AFLT_PROT_ACCESS;
   1843 				otp->ot_trap |= OT_DATA_ACCESS;
   1844 				rp->r_pc = otp->ot_trampoline;
   1845 				rp->r_npc = rp->r_pc + 4;
   1846 				trampolined = 1;
   1847 				/*
   1848 				 * for peeks and caut_gets errors are expected
   1849 				 */
   1850 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
   1851 				if (!hp)
   1852 					expected = DDI_FM_ERR_PEEK;
   1853 				else if (hp->ah_acc.devacc_attr_access ==
   1854 				    DDI_CAUTIOUS_ACC)
   1855 					expected = DDI_FM_ERR_EXPECTED;
   1856 			}
   1857 
   1858 		} else if (curthread->t_lofault) {
   1859 			aflt->flt_prot = AFLT_PROT_COPY;
   1860 			rp->r_g1 = EFAULT;
   1861 			rp->r_pc = curthread->t_lofault;
   1862 			rp->r_npc = rp->r_pc + 4;
   1863 			trampolined = 1;
   1864 		}
   1865 	}
   1866 
   1867 	/*
   1868 	 * If we're in user mode or we're doing a protected copy, we either
   1869 	 * want the ASTON code below to send a signal to the user process
   1870 	 * or we want to panic if aft_panic is set.
   1871 	 *
   1872 	 * If we're in privileged mode and we're not doing a copy, then we
   1873 	 * need to check if we've trampolined.  If we haven't trampolined,
   1874 	 * we should panic.
   1875 	 */
   1876 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
   1877 		if (t_afsr_errs &
   1878 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
   1879 		    ~(C_AFSR_BERR | C_AFSR_TO)))
   1880 			aflt->flt_panic |= aft_panic;
   1881 	} else if (!trampolined) {
   1882 			aflt->flt_panic = 1;
   1883 	}
   1884 
   1885 	/*
   1886 	 * If we've trampolined due to a privileged TO or BERR, or if an
   1887 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
   1888 	 * event for that TO or BERR.  Queue all other events (if any) besides
   1889 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
   1890 	 * ignore the number of events queued.  If we haven't trampolined due
   1891 	 * to a TO or BERR, just enqueue events normally.
   1892 	 */
   1893 	log_afsr = t_afsr_errs;
   1894 	if (trampolined) {
   1895 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
   1896 	} else if (!aflt->flt_priv) {
   1897 		/*
   1898 		 * User mode, suppress messages if
   1899 		 * cpu_berr_to_verbose is not set.
   1900 		 */
   1901 		if (!cpu_berr_to_verbose)
   1902 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
   1903 	}
   1904 
   1905 	/*
   1906 	 * Log any errors that occurred
   1907 	 */
   1908 	if (((log_afsr &
   1909 	    ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
   1910 	    cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
   1911 	    (t_afsr_errs & (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
   1912 		ch_flt.flt_type = CPU_INV_AFSR;
   1913 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
   1914 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
   1915 		    aflt->flt_panic);
   1916 	}
   1917 
   1918 	/*
   1919 	 * Zero out + invalidate CPU logout.
   1920 	 */
   1921 	if (clop) {
   1922 		bzero(clop, sizeof (ch_cpu_logout_t));
   1923 		clop->clo_data.chd_afar = LOGOUT_INVALID;
   1924 	}
   1925 
   1926 #if defined(JALAPENO) || defined(SERRANO)
   1927 	/*
   1928 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
   1929 	 * IO errors that may have resulted in this trap.
   1930 	 */
   1931 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
   1932 		cpu_run_bus_error_handlers(aflt, expected);
   1933 	}
   1934 
   1935 	/*
   1936 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
   1937 	 * line from the Ecache.  We also need to query the bus nexus for
   1938 	 * fatal errors.  Attempts to do diagnostic read on caches may
   1939 	 * introduce more errors (especially when the module is bad).
   1940 	 */
   1941 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
   1942 		/*
   1943 		 * Ask our bus nexus friends if they have any fatal errors.  If
   1944 		 * so, they will log appropriate error messages.
   1945 		 */
   1946 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
   1947 			aflt->flt_panic = 1;
   1948 
   1949 		/*
   1950 		 * We got a UE or RUE and are panicking, save the fault PA in
   1951 		 * a known location so that the platform specific panic code
   1952 		 * can check for copyback errors.
   1953 		 */
   1954 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
   1955 			panic_aflt = *aflt;
   1956 		}
   1957 	}
   1958 
   1959 	/*
   1960 	 * Flush Ecache line or entire Ecache
   1961 	 */
   1962 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
   1963 		cpu_error_ecache_flush(&ch_flt);
   1964 #else /* JALAPENO || SERRANO */
   1965 	/*
   1966 	 * UE/BERR/TO: Call our bus nexus friends to check for
   1967 	 * IO errors that may have resulted in this trap.
   1968 	 */
   1969 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
   1970 		cpu_run_bus_error_handlers(aflt, expected);
   1971 	}
   1972 
   1973 	/*
   1974 	 * UE: If the UE is in memory, we need to flush the bad
   1975 	 * line from the Ecache.  We also need to query the bus nexus for
   1976 	 * fatal errors.  Attempts to do diagnostic read on caches may
   1977 	 * introduce more errors (especially when the module is bad).
   1978 	 */
   1979 	if (t_afsr & C_AFSR_UE) {
   1980 		/*
   1981 		 * Ask our legacy bus nexus friends if they have any fatal
   1982 		 * errors.  If so, they will log appropriate error messages.
   1983 		 */
   1984 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
   1985 			aflt->flt_panic = 1;
   1986 
   1987 		/*
   1988 		 * We got a UE and are panicking, save the fault PA in a known
   1989 		 * location so that the platform specific panic code can check
   1990 		 * for copyback errors.
   1991 		 */
   1992 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
   1993 			panic_aflt = *aflt;
   1994 		}
   1995 	}
   1996 
   1997 	/*
   1998 	 * Flush Ecache line or entire Ecache
   1999 	 */
   2000 	if (t_afsr_errs &
   2001 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
   2002 		cpu_error_ecache_flush(&ch_flt);
   2003 #endif /* JALAPENO || SERRANO */
   2004 
   2005 	/*
   2006 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
   2007 	 * or disrupting errors have happened.  We do this because if a
   2008 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
   2009 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
   2010 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
   2011 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
   2012 	 * deferred or disrupting error happening between checking the AFSR and
   2013 	 * enabling NCEEN/CEEN.
   2014 	 *
   2015 	 * Note: CEEN reenabled only if it was on when trap taken.
   2016 	 */
   2017 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
   2018 	if (clear_errors(&ch_flt)) {
   2019 		/*
   2020 		 * Check for secondary errors, and avoid panicking if we
   2021 		 * have them
   2022 		 */
   2023 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
   2024 		    t_afar) == 0) {
   2025 			aflt->flt_panic |= ((ch_flt.afsr_errs &
   2026 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
   2027 		}
   2028 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
   2029 		    NULL);
   2030 	}
   2031 
   2032 	/*
   2033 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
   2034 	 * be logged as part of the panic flow.
   2035 	 */
   2036 	if (aflt->flt_panic)
   2037 		fm_panic("%sError(s)", pr_reason);
   2038 
   2039 	/*
   2040 	 * If we queued an error and we are going to return from the trap and
   2041 	 * the error was in user mode or inside of a copy routine, set AST flag
   2042 	 * so the queue will be drained before returning to user mode.  The
   2043 	 * AST processing will also act on our failure policy.
   2044 	 */
   2045 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
   2046 		int pcb_flag = 0;
   2047 
   2048 		if (t_afsr_errs &
   2049 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
   2050 		    ~(C_AFSR_BERR | C_AFSR_TO)))
   2051 			pcb_flag |= ASYNC_HWERR;
   2052 
   2053 		if (t_afsr & C_AFSR_BERR)
   2054 			pcb_flag |= ASYNC_BERR;
   2055 
   2056 		if (t_afsr & C_AFSR_TO)
   2057 			pcb_flag |= ASYNC_BTO;
   2058 
   2059 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
   2060 		aston(curthread);
   2061 	}
   2062 }
   2063 
   2064 #if defined(CPU_IMP_L1_CACHE_PARITY)
   2065 /*
   2066  * Handling of data and instruction parity errors (traps 0x71, 0x72).
   2067  *
   2068  * For Panther, P$ data parity errors during floating point load hits
   2069  * are also detected (reported as TT 0x71) and handled by this trap
   2070  * handler.
   2071  *
   2072  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
   2073  * is available.
   2074  */
   2075 /*ARGSUSED*/
   2076 void
   2077 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
   2078 {
   2079 	ch_async_flt_t ch_flt;
   2080 	struct async_flt *aflt;
   2081 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
   2082 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
   2083 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
   2084 	char *error_class;
   2085 
   2086 	/*
   2087 	 * Log the error.
   2088 	 * For icache parity errors the fault address is the trap PC.
   2089 	 * For dcache/pcache parity errors the instruction would have to
   2090 	 * be decoded to determine the address and that isn't possible
   2091 	 * at high PIL.
   2092 	 */
   2093 	bzero(&ch_flt, sizeof (ch_async_flt_t));
   2094 	aflt = (struct async_flt *)&ch_flt;
   2095 	aflt->flt_id = gethrtime_waitfree();
   2096 	aflt->flt_bus_id = getprocessorid();
   2097 	aflt->flt_inst = CPU->cpu_id;
   2098 	aflt->flt_pc = tpc;
   2099 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
   2100 	aflt->flt_prot = AFLT_PROT_NONE;
   2101 	aflt->flt_class = CPU_FAULT;
   2102 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
   2103 	aflt->flt_tl = tl;
   2104 	aflt->flt_panic = panic;
   2105 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
   2106 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
   2107 
   2108 	if (iparity) {
   2109 		cpu_icache_parity_info(&ch_flt);
   2110 		if (ch_flt.parity_data.ipe.cpl_off != -1)
   2111 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
   2112 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
   2113 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
   2114 		else
   2115 			error_class = FM_EREPORT_CPU_USIII_IPE;
   2116 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
   2117 	} else {
   2118 		cpu_dcache_parity_info(&ch_flt);
   2119 		if (ch_flt.parity_data.dpe.cpl_off != -1)
   2120 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
   2121 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
   2122 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
   2123 		else
   2124 			error_class = FM_EREPORT_CPU_USIII_DPE;
   2125 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
   2126 		/*
   2127 		 * For panther we also need to check the P$ for parity errors.
   2128 		 */
   2129 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
   2130 			cpu_pcache_parity_info(&ch_flt);
   2131 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
   2132 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
   2133 				aflt->flt_payload =
   2134 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
   2135 			}
   2136 		}
   2137 	}
   2138 
   2139 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
   2140 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
   2141 
   2142 	if (iparity) {
   2143 		/*
   2144 		 * Invalidate entire I$.
   2145 		 * This is required due to the use of diagnostic ASI
   2146 		 * accesses that may result in a loss of I$ coherency.
   2147 		 */
   2148 		if (cache_boot_state & DCU_IC) {
   2149 			flush_icache();
   2150 		}
   2151 		/*
   2152 		 * According to section P.3.1 of the Panther PRM, we
   2153 		 * need to do a little more for recovery on those
   2154 		 * CPUs after encountering an I$ parity error.
   2155 		 */
   2156 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
   2157 			flush_ipb();
   2158 			correct_dcache_parity(dcache_size,
   2159 			    dcache_linesize);
   2160 			flush_pcache();
   2161 		}
   2162 	} else {
   2163 		/*
   2164 		 * Since the valid bit is ignored when checking parity the
   2165 		 * D$ data and tag must also be corrected.  Set D$ data bits
   2166 		 * to zero and set utag to 0, 1, 2, 3.
   2167 		 */
   2168 		correct_dcache_parity(dcache_size, dcache_linesize);
   2169 
   2170 		/*
   2171 		 * According to section P.3.3 of the Panther PRM, we
   2172 		 * need to do a little more for recovery on those
   2173 		 * CPUs after encountering a D$ or P$ parity error.
   2174 		 *
   2175 		 * As far as clearing P$ parity errors, it is enough to
   2176 		 * simply invalidate all entries in the P$ since P$ parity
   2177 		 * error traps are only generated for floating point load
   2178 		 * hits.
   2179 		 */
   2180 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
   2181 			flush_icache();
   2182 			flush_ipb();
   2183 			flush_pcache();
   2184 		}
   2185 	}
   2186 
   2187 	/*
   2188 	 * Invalidate entire D$ if it was enabled.
   2189 	 * This is done to avoid stale data in the D$ which might
   2190 	 * occur with the D$ disabled and the trap handler doing
   2191 	 * stores affecting lines already in the D$.
   2192 	 */
   2193 	if (cache_boot_state & DCU_DC) {
   2194 		flush_dcache();
   2195 	}
   2196 
   2197 	/*
   2198 	 * Restore caches to their bootup state.
   2199 	 */
   2200 	set_dcu(get_dcu() | cache_boot_state);
   2201 
   2202 	/*
   2203 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
   2204 	 * be logged as part of the panic flow.
   2205 	 */
   2206 	if (aflt->flt_panic)
   2207 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
   2208 
   2209 	/*
   2210 	 * If this error occurred at TL>0 then flush the E$ here to reduce
   2211 	 * the chance of getting an unrecoverable Fast ECC error.  This
   2212 	 * flush will evict the part of the parity trap handler that is run
   2213 	 * at TL>1.
   2214 	 */
   2215 	if (tl) {
   2216 		cpu_flush_ecache();
   2217 	}
   2218 }
   2219 
   2220 /*
   2221  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
   2222  * to indicate which portions of the captured data should be in the ereport.
   2223  */
   2224 void
   2225 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
   2226 {
   2227 	int way = ch_flt->parity_data.ipe.cpl_way;
   2228 	int offset = ch_flt->parity_data.ipe.cpl_off;
   2229 	int tag_index;
   2230 	struct async_flt *aflt = (struct async_flt *)ch_flt;
   2231 
   2232 
   2233 	if ((offset != -1) || (way != -1)) {
   2234 		/*
   2235 		 * Parity error in I$ tag or data
   2236 		 */
   2237 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
   2238 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
   2239 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
   2240 			    PN_ICIDX_TO_WAY(tag_index);
   2241 		else
   2242 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
   2243 			    CH_ICIDX_TO_WAY(tag_index);
   2244 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
   2245 		    IC_LOGFLAG_MAGIC;
   2246 	} else {
   2247 		/*
   2248 		 * Parity error was not identified.
   2249 		 * Log tags and data for all ways.
   2250 		 */
   2251 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
   2252 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
   2253 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
   2254 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
   2255 				    PN_ICIDX_TO_WAY(tag_index);
   2256 			else
   2257 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
   2258 				    CH_ICIDX_TO_WAY(tag_index);
   2259 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
   2260 			    IC_LOGFLAG_MAGIC;
   2261 		}
   2262 	}
   2263 }
   2264 
   2265 /*
   2266  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
   2267  * to indicate which portions of the captured data should be in the ereport.
   2268  */
   2269 void
   2270 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
   2271 {
   2272 	int way = ch_flt->parity_data.dpe.cpl_way;
   2273 	int offset = ch_flt->parity_data.dpe.cpl_off;
   2274 	int tag_index;
   2275 
   2276 	if (offset != -1) {
   2277 		/*
   2278 		 * Parity error in D$ or P$ data array.
   2279 		 *
   2280 		 * First check to see whether the parity error is in D$ or P$
   2281 		 * since P$ data parity errors are reported in Panther using
   2282 		 * the same trap.
   2283 		 */
   2284 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
   2285 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
   2286 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
   2287 			    CH_PCIDX_TO_WAY(tag_index);
   2288 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
   2289 			    PC_LOGFLAG_MAGIC;
   2290 		} else {
   2291 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
   2292 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
   2293 			    CH_DCIDX_TO_WAY(tag_index);
   2294 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
   2295 			    DC_LOGFLAG_MAGIC;
   2296 		}
   2297 	} else if (way != -1) {
   2298 		/*
   2299 		 * Parity error in D$ tag.
   2300 		 */
   2301 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
   2302 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
   2303 		    CH_DCIDX_TO_WAY(tag_index);
   2304 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
   2305 		    DC_LOGFLAG_MAGIC;
   2306 	}
   2307 }
   2308 #endif	/* CPU_IMP_L1_CACHE_PARITY */
   2309 
   2310 /*
   2311  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
   2312  * post-process CPU events that are dequeued.  As such, it can be invoked
   2313  * from softint context, from AST processing in the trap() flow, or from the
   2314  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
   2315  * Historically this entry point was used to log the actual cmn_err(9F) text;
   2316  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
   2317  * With FMA this function now also returns a flag which indicates to the
   2318  * caller whether the ereport should be posted (1) or suppressed (0).
   2319  */
   2320 static int
   2321 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
   2322 {
   2323 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
   2324 	struct async_flt *aflt = (struct async_flt *)flt;
   2325 	uint64_t errors;
   2326 	extern void memscrub_induced_error(void);
   2327 
   2328 	switch (ch_flt->flt_type) {
   2329 	case CPU_INV_AFSR:
   2330 		/*
   2331 		 * If it is a disrupting trap and the AFSR is zero, then
   2332 		 * the event has probably already been noted. Do not post
   2333 		 * an ereport.
   2334 		 */
   2335 		if ((aflt->flt_status & ECC_C_TRAP) &&
   2336 		    (!(aflt->flt_stat & C_AFSR_MASK)))
   2337 			return (0);
   2338 		else
   2339 			return (1);
   2340 	case CPU_TO:
   2341 	case CPU_BERR:
   2342 	case CPU_FATAL:
   2343 	case CPU_FPUERR:
   2344 		return (1);
   2345 
   2346 	case CPU_UE_ECACHE_RETIRE:
   2347 		cpu_log_err(aflt);
   2348 		cpu_page_retire(ch_flt);
   2349 		return (1);
   2350 
   2351 	/*
   2352 	 * Cases where we may want to suppress logging or perform
   2353 	 * extended diagnostics.
   2354 	 */
   2355 	case CPU_CE:
   2356 	case CPU_EMC:
   2357 		/*
   2358 		 * We want to skip logging and further classification
   2359 		 * only if ALL the following conditions are true:
   2360 		 *
   2361 		 *	1. There is only one error
   2362 		 *	2. That error is a correctable memory error
   2363 		 *	3. The error is caused by the memory scrubber (in
   2364 		 *	   which case the error will have occurred under
   2365 		 *	   on_trap protection)
   2366 		 *	4. The error is on a retired page
   2367 		 *
   2368 		 * Note: AFLT_PROT_EC is used places other than the memory
   2369 		 * scrubber.  However, none of those errors should occur
   2370 		 * on a retired page.
   2371 		 */
   2372 		if ((ch_flt->afsr_errs &
   2373 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
   2374 		    aflt->flt_prot == AFLT_PROT_EC) {
   2375 
   2376 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
   2377 				if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
   2378 
   2379 				/*
   2380 				 * Since we're skipping logging, we'll need
   2381 				 * to schedule the re-enabling of CEEN
   2382 				 */
   2383 				(void) timeout(cpu_delayed_check_ce_errors,
   2384 				    (void *)(uintptr_t)aflt->flt_inst,
   2385 				    drv_usectohz((clock_t)cpu_ceen_delay_secs
   2386 				    * MICROSEC));
   2387 				}
   2388 
   2389 				/*
   2390 				 * Inform memscrubber - scrubbing induced
   2391 				 * CE on a retired page.
   2392 				 */
   2393 				memscrub_induced_error();
   2394 				return (0);
   2395 			}
   2396 		}
   2397 
   2398 		/*
   2399 		 * Perform/schedule further classification actions, but
   2400 		 * only if the page is healthy (we don't want bad
   2401 		 * pages inducing too much diagnostic activity).  If we could
   2402 		 * not find a page pointer then we also skip this.  If
   2403 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
   2404 		 * to copy and recirculate the event (for further diagnostics)
   2405 		 * and we should not proceed to log it here.
   2406 		 *
   2407 		 * This must be the last step here before the cpu_log_err()
   2408 		 * below - if an event recirculates cpu_ce_log_err() will
   2409 		 * not call the current function but just proceed directly
   2410 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
   2411 		 *
   2412 		 * Note: Check cpu_impl_async_log_err if changing this
   2413 		 */
   2414 		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
   2415 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
   2416 			    CE_XDIAG_SKIP_NOPP);
   2417 		} else {
   2418 			if (errors != PR_OK) {
   2419 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
   2420 				    CE_XDIAG_SKIP_PAGEDET);
   2421 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
   2422 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
   2423 				return (0);
   2424 			}
   2425 		}
   2426 		/*FALLTHRU*/
   2427 
   2428 	/*
   2429 	 * Cases where we just want to report the error and continue.
   2430 	 */
   2431 	case CPU_CE_ECACHE:
   2432 	case CPU_UE_ECACHE:
   2433 	case CPU_IV:
   2434 	case CPU_ORPH:
   2435 		cpu_log_err(aflt);
   2436 		return (1);
   2437 
   2438 	/*
   2439 	 * Cases where we want to fall through to handle panicking.
   2440 	 */
   2441 	case CPU_UE:
   2442 		/*
   2443 		 * We want to skip logging in the same conditions as the
   2444 		 * CE case.  In addition, we want to make sure we're not
   2445 		 * panicking.
   2446 		 */
   2447 		if (!panicstr && (ch_flt->afsr_errs &
   2448 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
   2449 		    aflt->flt_prot == AFLT_PROT_EC) {
   2450 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
   2451 				/* Zero the address to clear the error */
   2452 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
   2453 				/*
   2454 				 * Inform memscrubber - scrubbing induced
   2455 				 * UE on a retired page.
   2456 				 */
   2457 				memscrub_induced_error();
   2458 				return (0);
   2459 			}
   2460 		}
   2461 		cpu_log_err(aflt);
   2462 		break;
   2463 
   2464 	default:
   2465 		/*
   2466 		 * If the us3_common.c code doesn't know the flt_type, it may
   2467 		 * be an implementation-specific code.  Call into the impldep
   2468 		 * backend to find out what to do: if it tells us to continue,
   2469 		 * break and handle as if falling through from a UE; if not,
   2470 		 * the impldep backend has handled the error and we're done.
   2471 		 */
   2472 		switch (cpu_impl_async_log_err(flt, eqep)) {
   2473 		case CH_ASYNC_LOG_DONE:
   2474 			return (1);
   2475 		case CH_ASYNC_LOG_RECIRC:
   2476 			return (0);
   2477 		case CH_ASYNC_LOG_CONTINUE:
   2478 			break; /* continue on to handle UE-like error */
   2479 		default:
   2480 			cmn_err(CE_WARN, "discarding error 0x%p with "
   2481 			    "invalid fault type (0x%x)",
   2482 			    (void *)aflt, ch_flt->flt_type);
   2483 			return (0);
   2484 		}
   2485 	}
   2486 
   2487 	/* ... fall through from the UE case */
   2488 
   2489 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
   2490 		if (!panicstr) {
   2491 			cpu_page_retire(ch_flt);
   2492 		} else {
   2493 			/*
   2494 			 * Clear UEs on panic so that we don't
   2495 			 * get haunted by them during panic or
   2496 			 * after reboot
   2497 			 */
   2498 			cpu_clearphys(aflt);
   2499 			(void) clear_errors(NULL);
   2500 		}
   2501 	}
   2502 
   2503 	return (1);
   2504 }
   2505 
   2506 /*
   2507  * Retire the bad page that may contain the flushed error.
   2508  */
   2509 void
   2510 cpu_page_retire(ch_async_flt_t *ch_flt)
   2511 {
   2512 	struct async_flt *aflt = (struct async_flt *)ch_flt;
   2513 	(void) page_retire(aflt->flt_addr, PR_UE);
   2514 }
   2515 
   2516 /*
   2517  * Return true if the error specified in the AFSR indicates
   2518  * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$
   2519  * for Panther, none for Jalapeno/Serrano).
   2520  */
   2521 /* ARGSUSED */
   2522 static int
   2523 cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr)
   2524 {
   2525 #if defined(JALAPENO) || defined(SERRANO)
   2526 	return (0);
   2527 #elif defined(CHEETAH_PLUS)
   2528 	if (IS_PANTHER(cpunodes[cpuid].implementation))
   2529 		return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0);
   2530 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
   2531 #else	/* CHEETAH_PLUS */
   2532 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
   2533 #endif
   2534 }
   2535 
   2536 /*
   2537  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
   2538  * generic event post-processing for correctable and uncorrectable memory,
   2539  * E$, and MTag errors.  Historically this entry point was used to log bits of
   2540  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
   2541  * converted into an ereport.  In addition, it transmits the error to any
   2542  * platform-specific service-processor FRU logging routines, if available.
   2543  */
   2544 void
   2545 cpu_log_err(struct async_flt *aflt)
   2546 {
   2547 	char unum[UNUM_NAMLEN];
   2548 	int synd_status, synd_code, afar_status;
   2549 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
   2550 
   2551 	if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit))
   2552 		aflt->flt_status |= ECC_ECACHE;
   2553 	else
   2554 		aflt->flt_status &= ~ECC_ECACHE;
   2555 	/*
   2556 	 * Determine syndrome status.
   2557 	 */
   2558 	synd_status = afsr_to_synd_status(aflt->flt_inst,
   2559 	    ch_flt->afsr_errs, ch_flt->flt_bit);
   2560 
   2561 	/*
   2562 	 * Determine afar status.
   2563 	 */
   2564 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
   2565 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
   2566 		    ch_flt->flt_bit);
   2567 	else
   2568 		afar_status = AFLT_STAT_INVALID;
   2569 
   2570 	synd_code = synd_to_synd_code(synd_status,
   2571 	    aflt->flt_synd, ch_flt->flt_bit);
   2572 
   2573 	/*
   2574 	 * If afar status is not invalid do a unum lookup.
   2575 	 */
   2576 	if (afar_status != AFLT_STAT_INVALID) {
   2577 		(void) cpu_get_mem_unum_synd(synd_code, aflt, unum);
   2578 	} else {
   2579 		unum[0] = '\0';
   2580 	}
   2581 
   2582 	/*
   2583 	 * Do not send the fruid message (plat_ecc_error_data_t)
   2584 	 * to the SC if it can handle the enhanced error information
   2585 	 * (plat_ecc_error2_data_t) or when the tunable
   2586 	 * ecc_log_fruid_enable is set to 0.
   2587 	 */
   2588 
   2589 	if (&plat_ecc_capability_sc_get &&
   2590 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
   2591 		if (&plat_log_fruid_error)
   2592 			plat_log_fruid_error(synd_code, aflt, unum,
   2593 			    ch_flt->flt_bit);
   2594 	}
   2595 
   2596 	if (aflt->flt_func != NULL)
   2597 		aflt->flt_func(aflt, unum);
   2598 
   2599 	if (afar_status != AFLT_STAT_INVALID)
   2600 		cpu_log_diag_info(ch_flt);
   2601 
   2602 	/*
   2603 	 * If we have a CEEN error , we do not reenable CEEN until after
   2604 	 * we exit the trap handler. Otherwise, another error may
   2605 	 * occur causing the handler to be entered recursively.
   2606 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
   2607 	 * to try and ensure that the CPU makes progress in the face
   2608 	 * of a CE storm.
   2609 	 */
   2610 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
   2611 		(void) timeout(cpu_delayed_check_ce_errors,
   2612 		    (void *)(uintptr_t)aflt->flt_inst,
   2613 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
   2614 	}
   2615 }
   2616 
   2617 /*
   2618  * Invoked by error_init() early in startup and therefore before
   2619  * startup_errorq() is called to drain any error Q -
   2620  *
   2621  * startup()
   2622  *   startup_end()
   2623  *     error_init()
   2624  *       cpu_error_init()
   2625  * errorq_init()
   2626  *   errorq_drain()
   2627  * start_other_cpus()
   2628  *
   2629  * The purpose of this routine is to create error-related taskqs.  Taskqs
   2630  * are used for this purpose because cpu_lock can't be grabbed from interrupt
   2631  * context.
   2632  */
   2633 void
   2634 cpu_error_init(int items)
   2635 {
   2636 	/*
   2637 	 * Create taskq(s) to reenable CE
   2638 	 */
   2639 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
   2640 	    items, items, TASKQ_PREPOPULATE);
   2641 }
   2642 
   2643 void
   2644 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
   2645 {
   2646 	char unum[UNUM_NAMLEN];
   2647 	int len;
   2648 
   2649 	switch (aflt->flt_class) {
   2650 	case CPU_FAULT:
   2651 		cpu_ereport_init(aflt);
   2652 		if (cpu_async_log_err(aflt, eqep))
   2653 			cpu_ereport_post(aflt);
   2654 		break;
   2655 
   2656 	case BUS_FAULT:
   2657 		if (aflt->flt_func != NULL) {
   2658 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
   2659 			    unum, UNUM_NAMLEN, &len);
   2660 			aflt->flt_func(aflt, unum);
   2661 		}
   2662 		break;
   2663 
   2664 	case RECIRC_CPU_FAULT:
   2665 		aflt->flt_class = CPU_FAULT;
   2666 		cpu_log_err(aflt);
   2667 		cpu_ereport_post(aflt);
   2668 		break;
   2669 
   2670 	case RECIRC_BUS_FAULT:
   2671 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
   2672 		/*FALLTHRU*/
   2673 	default:
   2674 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
   2675 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
   2676 		return;
   2677 	}
   2678 }
   2679 
   2680 /*
   2681  * Scrub and classify a CE.  This function must not modify the
   2682  * fault structure passed to it but instead should return the classification
   2683  * information.
   2684  */
   2685 
   2686 static uchar_t
   2687 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
   2688 {
   2689 	uchar_t disp = CE_XDIAG_EXTALG;
   2690 	on_trap_data_t otd;
   2691 	uint64_t orig_err;
   2692 	ch_cpu_logout_t *clop;
   2693 
   2694 	/*
   2695 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
   2696 	 * this, but our other callers have not.  Disable preemption to
   2697 	 * avoid CPU migration so that we restore CEEN on the correct
   2698 	 * cpu later.
   2699 	 *
   2700 	 * CEEN is cleared so that further CEs that our instruction and
   2701 	 * data footprint induce do not cause use to either creep down
   2702 	 * kernel stack to the point of overflow, or do so much CE
   2703 	 * notification as to make little real forward progress.
   2704 	 *
   2705 	 * NCEEN must not be cleared.  However it is possible that
   2706 	 * our accesses to the flt_addr may provoke a bus error or timeout
   2707 	 * if the offending address has just been unconfigured as part of
   2708 	 * a DR action.  So we must operate under on_trap protection.
   2709 	 */
   2710 	kpreempt_disable();
   2711 	orig_err = get_error_enable();
   2712 	if (orig_err & EN_REG_CEEN)
   2713 		set_error_enable(orig_err & ~EN_REG_CEEN);
   2714 
   2715 	/*
   2716 	 * Our classification algorithm includes the line state before
   2717 	 * the scrub; we'd like this captured after the detection and
   2718 	 * before the algorithm below - the earlier the better.
   2719 	 *
   2720 	 * If we've come from a cpu CE trap then this info already exists
   2721 	 * in the cpu logout area.
   2722 	 *
   2723 	 * For a CE detected by memscrub for which there was no trap
   2724 	 * (running with CEEN off) cpu_log_and_clear_ce has called
   2725 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
   2726 	 * marked the fault structure as incomplete as a flag to later
   2727 	 * logging code.
   2728 	 *
   2729 	 * If called directly from an IO detected CE there has been
   2730 	 * no line data capture.  In this case we logout to the cpu logout
   2731 	 * area - that's appropriate since it's the cpu cache data we need
   2732 	 * for classification.  We thus borrow the cpu logout area for a
   2733 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
   2734 	 * this time (we will invalidate it again below).
   2735 	 *
   2736 	 * If called from the partner check xcall handler then this cpu
   2737 	 * (the partner) has not necessarily experienced a CE at this
   2738 	 * address.  But we want to capture line state before its scrub
   2739 	 * attempt since we use that in our classification.
   2740 	 */
   2741 	if (logout_tried == B_FALSE) {
   2742 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
   2743 			disp |= CE_XDIAG_NOLOGOUT;
   2744 	}
   2745 
   2746 	/*
   2747 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
   2748 	 * no longer be valid (if DR'd since the initial event) so we
   2749 	 * perform this scrub under on_trap protection.  If this access is
   2750 	 * ok then further accesses below will also be ok - DR cannot
   2751 	 * proceed while this thread is active (preemption is disabled);
   2752 	 * to be safe we'll nonetheless use on_trap again below.
   2753 	 */
   2754 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
   2755 		cpu_scrubphys(ecc);
   2756 	} else {
   2757 		no_trap();
   2758 		if (orig_err & EN_REG_CEEN)
   2759 			set_error_enable(orig_err);
   2760 		kpreempt_enable();
   2761 		return (disp);
   2762 	}
   2763 	no_trap();
   2764 
   2765 	/*
   2766 	 * Did the casx read of the scrub log a CE that matches the AFAR?
   2767 	 * Note that it's quite possible that the read sourced the data from
   2768 	 * another cpu.
   2769 	 */
   2770 	if (clear_ecc(ecc))
   2771 		disp |= CE_XDIAG_CE1;
   2772 
   2773 	/*
   2774 	 * Read the data again.  This time the read is very likely to
   2775 	 * come from memory since the scrub induced a writeback to memory.
   2776 	 */
   2777 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
   2778 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
   2779 	} else {
   2780 		no_trap();
   2781 		if (orig_err & EN_REG_CEEN)
   2782 			set_error_enable(orig_err);
   2783 		kpreempt_enable();
   2784 		return (disp);
   2785 	}
   2786 	no_trap();
   2787 
   2788 	/* Did that read induce a CE that matches the AFAR? */
   2789 	if (clear_ecc(ecc))
   2790 		disp |= CE_XDIAG_CE2;
   2791 
   2792 	/*
   2793 	 * Look at the logout information and record whether we found the
   2794 	 * line in l2/l3 cache.  For Panther we are interested in whether
   2795 	 * we found it in either cache (it won't reside in both but
   2796 	 * it is possible to read it that way given the moving target).
   2797 	 */
   2798 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
   2799 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
   2800 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
   2801 		int hit, level;
   2802 		int state;
   2803 		int totalsize;
   2804 		ch_ec_data_t *ecp;
   2805 
   2806 		/*
   2807 		 * If hit is nonzero then a match was found and hit will
   2808 		 * be one greater than the index which hit.  For Panther we
   2809 		 * also need to pay attention to level to see which of l2$ or
   2810 		 * l3$ it hit in.
   2811 		 */
   2812 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
   2813 		    0, &level);
   2814 
   2815 		if (hit) {
   2816 			--hit;
   2817 			disp |= CE_XDIAG_AFARMATCH;
   2818 
   2819 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
   2820 				if (level == 2)
   2821 					ecp = &clop->clo_data.chd_l2_data[hit];
   2822 				else
   2823 					ecp = &clop->clo_data.chd_ec_data[hit];
   2824 			} else {
   2825 				ASSERT(level == 2);
   2826 				ecp = &clop->clo_data.chd_ec_data[hit];
   2827 			}
   2828 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
   2829 			state = cpu_ectag_pa_to_subblk_state(totalsize,
   2830 			    ecc->flt_addr, ecp->ec_tag);
   2831 
   2832 			/*
   2833 			 * Cheetah variants use different state encodings -
   2834 			 * the CH_ECSTATE_* defines vary depending on the
   2835 			 * module we're compiled for.  Translate into our
   2836 			 * one true version.  Conflate Owner-Shared state
   2837 			 * of SSM mode with Owner as victimisation of such
   2838 			 * lines may cause a writeback.
   2839 			 */
   2840 			switch (state) {
   2841 			case CH_ECSTATE_MOD:
   2842 				disp |= EC_STATE_M;
   2843 				break;
   2844 
   2845 			case CH_ECSTATE_OWN:
   2846 			case CH_ECSTATE_OWS:
   2847 				disp |= EC_STATE_O;
   2848 				break;
   2849 
   2850 			case CH_ECSTATE_EXL:
   2851 				disp |= EC_STATE_E;
   2852 				break;
   2853 
   2854 			case CH_ECSTATE_SHR:
   2855 				disp |= EC_STATE_S;
   2856 				break;
   2857 
   2858 			default:
   2859 				disp |= EC_STATE_I;
   2860 				break;
   2861 			}
   2862 		}
   2863 
   2864 		/*
   2865 		 * If we initiated the delayed logout then we are responsible
   2866 		 * for invalidating the logout area.
   2867 		 */
   2868 		if (logout_tried == B_FALSE) {
   2869 			bzero(clop, sizeof (ch_cpu_logout_t));
   2870 			clop->clo_data.chd_afar = LOGOUT_INVALID;
   2871 		}
   2872 	}
   2873 
   2874 	/*
   2875 	 * Re-enable CEEN if we turned it off.
   2876 	 */
   2877 	if (orig_err & EN_REG_CEEN)
   2878 		set_error_enable(orig_err);
   2879 	kpreempt_enable();
   2880 
   2881 	return (disp);
   2882 }
   2883 
   2884 /*
   2885  * Scrub a correctable memory error and collect data for classification
   2886  * of CE type.  This function is called in the detection path, ie tl0 handling
   2887  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
   2888  */
   2889 void
   2890 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
   2891 {
   2892 	/*
   2893 	 * Cheetah CE classification does not set any bits in flt_status.
   2894 	 * Instead we will record classification datapoints in flt_disp.
   2895 	 */
   2896 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
   2897 
   2898 	/*
   2899 	 * To check if the error detected by IO is persistent, sticky or
   2900 	 * intermittent.  This is noticed by clear_ecc().
   2901 	 */
   2902 	if (ecc->flt_status & ECC_IOBUS)
   2903 		ecc->flt_stat = C_AFSR_MEMORY;
   2904 
   2905 	/*
   2906 	 * Record information from this first part of the algorithm in
   2907 	 * flt_disp.
   2908 	 */
   2909 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
   2910 }
   2911 
   2912 /*
   2913  * Select a partner to perform a further CE classification check from.
   2914  * Must be called with kernel preemption disabled (to stop the cpu list
   2915  * from changing).  The detecting cpu we are partnering has cpuid
   2916  * aflt->flt_inst; we might not be running on the detecting cpu.
   2917  *
   2918  * Restrict choice to active cpus in the same cpu partition as ourselves in
   2919  * an effort to stop bad cpus in one partition causing other partitions to
   2920  * perform excessive diagnostic activity.  Actually since the errorq drain
   2921  * is run from a softint most of the time and that is a global mechanism
   2922  * this isolation is only partial.  Return NULL if we fail to find a
   2923  * suitable partner.
   2924  *
   2925  * We prefer a partner that is in a different latency group to ourselves as
   2926  * we will share fewer datapaths.  If such a partner is unavailable then
   2927  * choose one in the same lgroup but prefer a different chip and only allow
   2928  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
   2929  * flags includes PTNR_SELFOK then permit selection of the original detector.
   2930  *
   2931  * We keep a cache of the last partner selected for a cpu, and we'll try to
   2932  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
   2933  * have passed since that selection was made.  This provides the benefit
   2934  * of the point-of-view of different partners over time but without
   2935  * requiring frequent cpu list traversals.
   2936  */
   2937 
   2938 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
   2939 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
   2940 
   2941 static cpu_t *
   2942 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
   2943 {
   2944 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
   2945 	hrtime_t lasttime, thistime;
   2946 
   2947 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
   2948 
   2949 	dtcr = cpu[aflt->flt_inst];
   2950 
   2951 	/*
   2952 	 * Short-circuit for the following cases:
   2953 	 *	. the dtcr is not flagged active
   2954 	 *	. there is just one cpu present
   2955 	 *	. the detector has disappeared
   2956 	 *	. we were given a bad flt_inst cpuid; this should not happen
   2957 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
   2958 	 *	  reason to panic.
   2959 	 *	. there is just one cpu left online in the cpu partition
   2960 	 *
   2961 	 * If we return NULL after this point then we do not update the
   2962 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
   2963 	 * again next time; this is the case where the only other cpu online
   2964 	 * in the detector's partition is on the same chip as the detector
   2965 	 * and since CEEN re-enable is throttled even that case should not
   2966 	 * hurt performance.
   2967 	 */
   2968 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
   2969 		return (NULL);
   2970 	}
   2971 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
   2972 		if (flags & PTNR_SELFOK) {
   2973 			*typep = CE_XDIAG_PTNR_SELF;
   2974 			return (dtcr);
   2975 		} else {
   2976 			return (NULL);
   2977 		}
   2978 	}
   2979 
   2980 	thistime = gethrtime();
   2981 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
   2982 
   2983 	/*
   2984 	 * Select a starting point.
   2985 	 */
   2986 	if (!lasttime) {
   2987 		/*
   2988 		 * We've never selected a partner for this detector before.
   2989 		 * Start the scan at the next online cpu in the same cpu
   2990 		 * partition.
   2991 		 */
   2992 		sp = dtcr->cpu_next_part;
   2993 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
   2994 		/*
   2995 		 * Our last selection has not aged yet.  If this partner:
   2996 		 *	. is still a valid cpu,
   2997 		 *	. is still in the same partition as the detector
   2998 		 *	. is still marked active
   2999 		 *	. satisfies the 'flags' argument criteria
   3000 		 * then select it again without updating the timestamp.
   3001 		 */
   3002 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
   3003 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
   3004 		    !cpu_flagged_active(sp->cpu_flags) ||
   3005 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
   3006 		    (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) &&
   3007 		    !(flags & PTNR_SIBLINGOK))) {
   3008 			sp = dtcr->cpu_next_part;
   3009 		} else {
   3010 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
   3011 				*typep = CE_XDIAG_PTNR_REMOTE;
   3012 			} else if (sp == dtcr) {
   3013 				*typep = CE_XDIAG_PTNR_SELF;
   3014 			} else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) {
   3015 				*typep = CE_XDIAG_PTNR_SIBLING;
   3016 			} else {
   3017 				*typep = CE_XDIAG_PTNR_LOCAL;
   3018 			}
   3019 			return (sp);
   3020 		}
   3021 	} else {
   3022 		/*
   3023 		 * Our last selection has aged.  If it is nonetheless still a
   3024 		 * valid cpu then start the scan at the next cpu in the
   3025 		 * partition after our last partner.  If the last selection
   3026 		 * is no longer a valid cpu then go with our default.  In
   3027 		 * this way we slowly cycle through possible partners to
   3028 		 * obtain multiple viewpoints over time.
   3029 		 */
   3030 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
   3031 		if (sp == NULL) {
   3032 			sp = dtcr->cpu_next_part;
   3033 		} else {
   3034 			sp = sp->cpu_next_part;		/* may be dtcr */
   3035 			if (sp->cpu_part != dtcr->cpu_part)
   3036 				sp = dtcr;
   3037 		}
   3038 	}
   3039 
   3040 	/*
   3041 	 * We have a proposed starting point for our search, but if this
   3042 	 * cpu is offline then its cpu_next_part will point to itself
   3043 	 * so we can't use that to iterate over cpus in this partition in
   3044 	 * the loop below.  We still want to avoid iterating over cpus not
   3045 	 * in our partition, so in the case that our starting point is offline
   3046 	 * we will repoint it to be the detector itself;  and if the detector
   3047 	 * happens to be offline we'll return NULL from the following loop.
   3048 	 */
   3049 	if (!cpu_flagged_active(sp->cpu_flags)) {
   3050 		sp = dtcr;
   3051 	}
   3052 
   3053 	ptnr = sp;
   3054 	locptnr = NULL;
   3055 	sibptnr = NULL;
   3056 	do {
   3057 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
   3058 			continue;
   3059 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
   3060 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
   3061 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
   3062 			*typep = CE_XDIAG_PTNR_REMOTE;
   3063 			return (ptnr);
   3064 		}
   3065 		if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) {
   3066 			if (sibptnr == NULL)
   3067 				sibptnr = ptnr;
   3068 			continue;
   3069 		}
   3070 		if (locptnr == NULL)
   3071 			locptnr = ptnr;
   3072 	} while ((ptnr = ptnr->cpu_next_part) != sp);
   3073 
   3074 	/*
   3075 	 * A foreign partner has already been returned if one was available.
   3076 	 *
   3077 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
   3078 	 * detector, is active, and is not a sibling of the detector.
   3079 	 *
   3080 	 * If sibptnr is not NULL it is a sibling of the detector, and is
   3081 	 * active.
   3082 	 *
   3083 	 * If we have to resort to using the detector itself we have already
   3084 	 * checked that it is active.
   3085 	 */
   3086 	if (locptnr) {
   3087 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
   3088 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
   3089 		*typep = CE_XDIAG_PTNR_LOCAL;
   3090 		return (locptnr);
   3091 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
   3092 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
   3093 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
   3094 		*typep = CE_XDIAG_PTNR_SIBLING;
   3095 		return (sibptnr);
   3096 	} else if (flags & PTNR_SELFOK) {
   3097 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
   3098 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
   3099 		*typep = CE_XDIAG_PTNR_SELF;
   3100 		return (dtcr);
   3101 	}
   3102 
   3103 	return (NULL);
   3104 }
   3105 
   3106 /*
   3107  * Cross call handler that is requested to run on the designated partner of
   3108  * a cpu that experienced a possibly sticky or possibly persistnet CE.
   3109  */
   3110 static void
   3111 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
   3112 {
   3113 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
   3114 }
   3115 
   3116 /*
   3117  * The associated errorqs are never destroyed so we do not need to deal with
   3118  * them disappearing before this timeout fires.  If the affected memory
   3119  * has been DR'd out since the original event the scrub algrithm will catch
   3120  * any errors and return null disposition info.  If the original detecting
   3121  * cpu has been DR'd out then ereport detector info will not be able to
   3122  * lookup CPU type;  with a small timeout this is unlikely.
   3123  */
   3124 static void
   3125 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
   3126 {
   3127 	struct async_flt *aflt = cbarg->lkycb_aflt;
   3128 	uchar_t disp;
   3129 	cpu_t *cp;
   3130 	int ptnrtype;
   3131 
   3132 	kpreempt_disable();
   3133 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
   3134 	    &ptnrtype)) {
   3135 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
   3136 		    (uint64_t)&disp);
   3137 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
   3138 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
   3139 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
   3140 	} else {
   3141 		ce_xdiag_lkydrops++;
   3142 		if (ncpus > 1)
   3143 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
   3144 			    CE_XDIAG_SKIP_NOPTNR);
   3145 	}
   3146 	kpreempt_enable();
   3147 
   3148 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
   3149 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
   3150 }
   3151 
   3152 /*
   3153  * Called from errorq drain code when processing a CE error, both from
   3154  * CPU and PCI drain functions.  Decide what further classification actions,
   3155  * if any, we will perform.  Perform immediate actions now, and schedule
   3156  * delayed actions as required.  Note that we are no longer necessarily running
   3157  * on the detecting cpu, and that the async_flt structure will not persist on
   3158  * return from this function.
   3159  *
   3160  * Calls to this function should aim to be self-throtlling in some way.  With
   3161  * the delayed re-enable of CEEN the absolute rate of calls should not
   3162  * be excessive.  Callers should also avoid performing in-depth classification
   3163  * for events in pages that are already known to be suspect.
   3164  *
   3165  * We return nonzero to indicate that the event has been copied and
   3166  * recirculated for further testing.  The caller should not log the event
   3167  * in this case - it will be logged when further test results are available.
   3168  *
   3169  * Our possible contexts are that of errorq_drain: below lock level or from
   3170  * panic context.  We can assume that the cpu we are running on is online.
   3171  */
   3172 
   3173 
   3174 #ifdef DEBUG
   3175 static int ce_xdiag_forceaction;
   3176 #endif
   3177 
   3178 int
   3179 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
   3180     errorq_elem_t *eqep, size_t afltoffset)
   3181 {
   3182 	ce_dispact_t dispact, action;
   3183 	cpu_t *cp;
   3184 	uchar_t dtcrinfo, disp;
   3185 	int ptnrtype;
   3186 
   3187 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
   3188 		ce_xdiag_drops++;
   3189 		return (0);
   3190 	} else if (!aflt->flt_in_memory) {
   3191 		ce_xdiag_drops++;
   3192 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
   3193 		return (0);
   3194 	}
   3195 
   3196 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
   3197 
   3198 	/*
   3199 	 * Some correctable events are not scrubbed/classified, such as those
   3200 	 * noticed at the tail of cpu_deferred_error.  So if there is no
   3201 	 * initial detector classification go no further.
   3202 	 */
   3203 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
   3204 		ce_xdiag_drops++;
   3205 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
   3206 		return (0);
   3207 	}
   3208 
   3209 	dispact = CE_DISPACT(ce_disp_table,
   3210 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
   3211 	    CE_XDIAG_STATE(dtcrinfo),
   3212 	    CE_XDIAG_CE1SEEN(dtcrinfo),
   3213 	    CE_XDIAG_CE2SEEN(dtcrinfo));
   3214 
   3215 
   3216 	action = CE_ACT(dispact);	/* bad lookup caught below */
   3217 #ifdef DEBUG
   3218 	if (ce_xdiag_forceaction != 0)
   3219 		action = ce_xdiag_forceaction;
   3220 #endif
   3221 
   3222 	switch (action) {
   3223 	case CE_ACT_LKYCHK: {
   3224 		caddr_t ndata;
   3225 		errorq_elem_t *neqep;
   3226 		struct async_flt *ecc;
   3227 		ce_lkychk_cb_t *cbargp;
   3228 
   3229 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
   3230 			ce_xdiag_lkydrops++;
   3231 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
   3232 			    CE_XDIAG_SKIP_DUPFAIL);
   3233 			break;
   3234 		}
   3235 		ecc = (struct async_flt *)(ndata + afltoffset);
   3236 
   3237 		ASSERT(ecc->flt_class == CPU_FAULT ||
   3238 		    ecc->flt_class == BUS_FAULT);
   3239 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
   3240 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
   3241 
   3242 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
   3243 		cbargp->lkycb_aflt = ecc;
   3244 		cbargp->lkycb_eqp = eqp;
   3245 		cbargp->lkycb_eqep = neqep;
   3246 
   3247 		(void) timeout((void (*)(void *))ce_lkychk_cb,
   3248 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
   3249 		return (1);
   3250 	}
   3251 
   3252 	case CE_ACT_PTNRCHK:
   3253 		kpreempt_disable();	/* stop cpu list changing */
   3254 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
   3255 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
   3256 			    (uint64_t)aflt, (uint64_t)&disp);
   3257 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
   3258 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
   3259 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
   3260 		} else if (ncpus > 1) {
   3261 			ce_xdiag_ptnrdrops++;
   3262 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
   3263 			    CE_XDIAG_SKIP_NOPTNR);
   3264 		} else {
   3265 			ce_xdiag_ptnrdrops++;
   3266 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
   3267 			    CE_XDIAG_SKIP_UNIPROC);
   3268 		}
   3269 		kpreempt_enable();
   3270 		break;
   3271 
   3272 	case CE_ACT_DONE:
   3273 		break;
   3274 
   3275 	case CE_ACT(CE_DISP_BAD):
   3276 	default:
   3277 #ifdef DEBUG
   3278 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
   3279 #endif
   3280 		ce_xdiag_bad++;
   3281 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
   3282 		break;
   3283 	}
   3284 
   3285 	return (0);
   3286 }
   3287 
   3288 /*
   3289  * We route all errors through a single switch statement.
   3290  */
   3291 void
   3292 cpu_ue_log_err(struct async_flt *aflt)
   3293 {
   3294 	switch (aflt->flt_class) {
   3295 	case CPU_FAULT:
   3296 		cpu_ereport_init(aflt);
   3297 		if (cpu_async_log_err(aflt, NULL))
   3298 			cpu_ereport_post(aflt);
   3299 		break;
   3300 
   3301 	case BUS_FAULT:
   3302 		bus_async_log_err(aflt);
   3303 		break;
   3304 
   3305 	default:
   3306 		cmn_err(CE_WARN, "discarding async error %p with invalid "
   3307 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
   3308 		return;
   3309 	}
   3310 }
   3311 
   3312 /*
   3313  * Routine for panic hook callback from panic_idle().
   3314  */
   3315 void
   3316 cpu_async_panic_callb(void)
   3317 {
   3318 	ch_async_flt_t ch_flt;
   3319 	struct async_flt *aflt;
   3320 	ch_cpu_errors_t cpu_error_regs;
   3321 	uint64_t afsr_errs;
   3322 
   3323 	get_cpu_error_state(&cpu_error_regs);
   3324 
   3325 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
   3326 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
   3327 
   3328 	if (afsr_errs) {
   3329 
   3330 		bzero(&ch_flt, sizeof (ch_async_flt_t));
   3331 		aflt = (struct async_flt *)&ch_flt;
   3332 		aflt->flt_id = gethrtime_waitfree();
   3333 		aflt->flt_bus_id = getprocessorid();
   3334 		aflt->flt_inst = CPU->cpu_id;
   3335 		aflt->flt_stat = cpu_error_regs.afsr;
   3336 		aflt->flt_addr = cpu_error_regs.afar;
   3337 		aflt->flt_prot = AFLT_PROT_NONE;
   3338 		aflt->flt_class = CPU_FAULT;
   3339 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
   3340 		aflt->flt_panic = 1;
   3341 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
   3342 		ch_flt.afsr_errs = afsr_errs;
   3343 #if defined(SERRANO)
   3344 		ch_flt.afar2 = cpu_error_regs.afar2;
   3345 #endif	/* SERRANO */
   3346 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
   3347 	}
   3348 }
   3349 
   3350 /*
   3351  * Routine to convert a syndrome into a syndrome code.
   3352  */
   3353 static int
   3354 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
   3355 {
   3356 	if (synd_status == AFLT_STAT_INVALID)
   3357 		return (-1);
   3358 
   3359 	/*
   3360 	 * Use the syndrome to index the appropriate syndrome table,
   3361 	 * to get the code indicating which bit(s) is(are) bad.
   3362 	 */
   3363 	if (afsr_bit &
   3364 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
   3365 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
   3366 #if defined(JALAPENO) || defined(SERRANO)
   3367 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
   3368 				return (-1);
   3369 			else
   3370 				return (BPAR0 + synd);
   3371 #else /* JALAPENO || SERRANO */
   3372 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
   3373 				return (-1);
   3374 			else
   3375 				return (mtag_syndrome_tab[synd]);
   3376 #endif /* JALAPENO || SERRANO */
   3377 		} else {
   3378 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
   3379 				return (-1);
   3380 			else
   3381 				return (ecc_syndrome_tab[synd]);
   3382 		}
   3383 	} else {
   3384 		return (-1);
   3385 	}
   3386 }
   3387 
   3388 int
   3389 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
   3390 {
   3391 	if (&plat_get_mem_sid)
   3392 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
   3393 	else
   3394 		return (ENOTSUP);
   3395 }
   3396 
   3397 int
   3398 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
   3399 {
   3400 	if (&plat_get_mem_offset)
   3401 		return (plat_get_mem_offset(flt_addr, offp));
   3402 	else
   3403 		return (ENOTSUP);
   3404 }
   3405 
   3406 int
   3407 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
   3408 {
   3409 	if (&plat_get_mem_addr)
   3410 		return (plat_get_mem_addr(unum, sid, offset, addrp));
   3411 	else
   3412 		return (ENOTSUP);
   3413 }
   3414 
   3415 /*
   3416  * Routine to return a string identifying the physical name
   3417  * associated with a memory/cache error.
   3418  */
   3419 int
   3420 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
   3421     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
   3422     ushort_t flt_status, char *buf, int buflen, int *lenp)
   3423 {
   3424 	int synd_code;
   3425 	int ret;
   3426 
   3427 	/*
   3428 	 * An AFSR of -1 defaults to a memory syndrome.
   3429 	 */
   3430 	if (flt_stat == (uint64_t)-1)
   3431 		flt_stat = C_AFSR_CE;
   3432 
   3433 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
   3434 
   3435 	/*
   3436 	 * Syndrome code must be either a single-bit error code
   3437 	 * (0...143) or -1 for unum lookup.
   3438 	 */
   3439 	if (synd_code < 0 || synd_code >= M2)
   3440 		synd_code = -1;
   3441 	if (&plat_get_mem_unum) {
   3442 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
   3443 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
   3444 			buf[0] = '\0';
   3445 			*lenp = 0;
   3446 		}
   3447 
   3448 		return (ret);
   3449 	}
   3450 
   3451 	return (ENOTSUP);
   3452 }
   3453 
   3454 /*
   3455  * Wrapper for cpu_get_mem_unum() routine that takes an
   3456  * async_flt struct rather than explicit arguments.
   3457  */
   3458 int
   3459 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
   3460     char *buf, int buflen, int *lenp)
   3461 {
   3462 	/*
   3463 	 * If we come thru here for an IO bus error aflt->flt_stat will
   3464 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
   3465 	 * so it will interpret this as a memory error.
   3466 	 */
   3467 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
   3468 	    (aflt->flt_class == BUS_FAULT) ?
   3469 	    (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit,
   3470 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
   3471 	    aflt->flt_status, buf, buflen, lenp));
   3472 }
   3473 
   3474 /*
   3475  * Return unum string given synd_code and async_flt into
   3476  * the buf with size UNUM_NAMLEN
   3477  */
   3478 static int
   3479 cpu_get_mem_unum_synd(int synd_code, struct async_flt *aflt, char *buf)
   3480 {
   3481 	int ret, len;
   3482 
   3483 	/*
   3484 	 * Syndrome code must be either a single-bit error code
   3485 	 * (0...143) or -1 for unum lookup.
   3486 	 */
   3487 	if (synd_code < 0 || synd_code >= M2)
   3488 		synd_code = -1;
   3489 	if (&plat_get_mem_unum) {
   3490 		if ((ret = plat_get_mem_unum(synd_code, aflt->flt_addr,
   3491 		    aflt->flt_bus_id, aflt->flt_in_memory,
   3492 		    aflt->flt_status, buf, UNUM_NAMLEN, &len)) != 0) {
   3493 			buf[0] = '\0';
   3494 		}
   3495 		return (ret);
   3496 	}
   3497 
   3498 	buf[0] = '\0';
   3499 	return (ENOTSUP);
   3500 }
   3501 
   3502 /*
   3503  * This routine is a more generic interface to cpu_get_mem_unum()
   3504  * that may be used by other modules (e.g. the 'mm' driver, through
   3505  * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's
   3506  * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events).
   3507  */
   3508 int
   3509 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
   3510     char *buf, int buflen, int *lenp)
   3511 {
   3512 	int synd_status, flt_in_memory, ret;
   3513 	ushort_t flt_status = 0;
   3514 	char unum[UNUM_NAMLEN];
   3515 	uint64_t t_afsr_errs;
   3516 
   3517 	/*
   3518 	 * Check for an invalid address.
   3519 	 */
   3520 	if (afar == (uint64_t)-1)
   3521 		return (ENXIO);
   3522 
   3523 	if (synd == (uint64_t)-1)
   3524 		synd_status = AFLT_STAT_INVALID;
   3525 	else
   3526 		synd_status = AFLT_STAT_VALID;
   3527 
   3528 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
   3529 	    pf_is_memory(afar >> MMU_PAGESHIFT);
   3530 
   3531 	/*
   3532 	 * Get aggregate AFSR for call to cpu_error_is_ecache_data.
   3533 	 */
   3534 	if (*afsr == (uint64_t)-1)
   3535 		t_afsr_errs = C_AFSR_CE;
   3536 	else {
   3537 		t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS);
   3538 #if defined(CHEETAH_PLUS)
   3539 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
   3540 			t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS);
   3541 #endif	/* CHEETAH_PLUS */
   3542 	}
   3543 
   3544 	/*
   3545 	 * Turn on ECC_ECACHE if error type is E$ Data.
   3546 	 */
   3547 	if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs))
   3548 		flt_status |= ECC_ECACHE;
   3549 
   3550 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar,
   3551 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
   3552 	if (ret != 0)
   3553 		return (ret);
   3554 
   3555 	if (*lenp >= buflen)
   3556 		return (ENAMETOOLONG);
   3557 
   3558 	(void) strncpy(buf, unum, buflen);
   3559 
   3560 	return (0);
   3561 }
   3562 
   3563 /*
   3564  * Routine to return memory information associated
   3565  * with a physical address and syndrome.
   3566  */
   3567 int
   3568 cpu_get_mem_info(uint64_t synd, uint64_t afar,
   3569     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
   3570     int *segsp, int *banksp, int *mcidp)
   3571 {
   3572 	int synd_status, synd_code;
   3573 
   3574 	if (afar == (uint64_t)-1)
   3575 		return (ENXIO);
   3576 
   3577 	if (synd == (uint64_t)-1)
   3578 		synd_status = AFLT_STAT_INVALID;
   3579 	else
   3580 		synd_status = AFLT_STAT_VALID;
   3581 
   3582 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
   3583 
   3584 	if (p2get_mem_info != NULL)
   3585 		return ((p2get_mem_info)(synd_code, afar,
   3586 		    mem_sizep, seg_sizep, bank_sizep,
   3587 		    segsp, banksp, mcidp));
   3588 	else
   3589 		return (ENOTSUP);
   3590 }
   3591 
   3592 /*
   3593  * Routine to return a string identifying the physical
   3594  * name associated with a cpuid.
   3595  */
   3596 int
   3597 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
   3598 {
   3599 	int ret;
   3600 	char unum[UNUM_NAMLEN];
   3601 
   3602 	if (&plat_get_cpu_unum) {
   3603 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
   3604 		    != 0)
   3605 			return (ret);
   3606 	} else {
   3607 		return (ENOTSUP);
   3608 	}
   3609 
   3610 	if (*lenp >= buflen)
   3611 		return (ENAMETOOLONG);
   3612 
   3613 	(void) strncpy(buf, unum, buflen);
   3614 
   3615 	return (0);
   3616 }
   3617 
   3618 /*
   3619  * This routine exports the name buffer size.
   3620  */
   3621 size_t
   3622 cpu_get_name_bufsize()
   3623 {
   3624 	return (UNUM_NAMLEN);
   3625 }
   3626 
   3627 /*
   3628  * Historical function, apparantly not used.
   3629  */
   3630 /* ARGSUSED */
   3631 void
   3632 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
   3633 {}
   3634 
   3635 /*
   3636  * Historical function only called for SBus errors in debugging.
   3637  */
   3638 /*ARGSUSED*/
   3639 void
   3640 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
   3641 {}
   3642 
   3643 /*
   3644  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
   3645  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
   3646  * an async fault structure argument is passed in, the captured error state
   3647  * (AFSR, AFAR) info will be returned in the structure.
   3648  */
   3649 int
   3650 clear_errors(ch_async_flt_t *ch_flt)
   3651 {
   3652 	struct async_flt *aflt = (struct async_flt *)ch_flt;
   3653 	ch_cpu_errors_t	cpu_error_regs;
   3654 
   3655 	get_cpu_error_state(&cpu_error_regs);
   3656 
   3657 	if (ch_flt != NULL) {
   3658 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
   3659 		aflt->flt_addr = cpu_error_regs.afar;
   3660 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
   3661 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
   3662 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
   3663 #if defined(SERRANO)
   3664 		ch_flt->afar2 = cpu_error_regs.afar2;
   3665 #endif	/* SERRANO */
   3666 	}
   3667 
   3668 	set_cpu_error_state(&cpu_error_regs);
   3669 
   3670 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
   3671 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
   3672 }
   3673 
   3674 /*
   3675  * Clear any AFSR error bits, and check for persistence.
   3676  *
   3677  * It would be desirable to also insist that syndrome match.  PCI handling
   3678  * has already filled flt_synd.  For errors trapped by CPU we only fill
   3679  * flt_synd when we queue the event, so we do not have a valid flt_synd
   3680  * during initial classification (it is valid if we're called as part of
   3681  * subsequent low-pil additional classification attempts).  We could try
   3682  * to determine which syndrome to use: we know we're only called for
   3683  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
   3684  * would be esynd/none and esynd/msynd, respectively.  If that is
   3685  * implemented then what do we do in the case that we do experience an
   3686  * error on the same afar but with different syndrome?  At the very least
   3687  * we should count such occurences.  Anyway, for now, we'll leave it as
   3688  * it has been for ages.
   3689  */
   3690 static int
   3691 clear_ecc(struct async_flt *aflt)
   3692 {
   3693 	ch_cpu_errors_t	cpu_error_regs;
   3694 
   3695 	/*
   3696 	 * Snapshot the AFSR and AFAR and clear any errors
   3697 	 */
   3698 	get_cpu_error_state(&cpu_error_regs);
   3699 	set_cpu_error_state(&cpu_error_regs);
   3700 
   3701 	/*
   3702 	 * If any of the same memory access error bits are still on and
   3703 	 * the AFAR matches, return that the error is persistent.
   3704 	 */
   3705 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
   3706 	    cpu_error_regs.afar == aflt->flt_addr);
   3707 }
   3708 
   3709 /*
   3710  * Turn off all cpu error detection, normally only used for panics.
   3711  */
   3712 void
   3713 cpu_disable_errors(void)
   3714 {
   3715 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
   3716 
   3717 	/*
   3718 	 * With error detection now turned off, check the other cpus
   3719 	 * logout areas for any unlogged errors.
   3720 	 */
   3721 	if (enable_check_other_cpus_logout) {
   3722 		cpu_check_other_cpus_logout();
   3723 		/*
   3724 		 * Make a second pass over the logout areas, in case
   3725 		 * there is a failing CPU in an error-trap loop which
   3726 		 * will write to the logout area once it is emptied.
   3727 		 */
   3728 		cpu_check_other_cpus_logout();
   3729 	}
   3730 }
   3731 
   3732 /*
   3733  * Enable errors.
   3734  */
   3735 void
   3736 cpu_enable_errors(void)
   3737 {
   3738 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
   3739 }
   3740 
   3741 /*
   3742  * Flush the entire ecache using displacement flush by reading through a
   3743  * physical address range twice as large as the Ecache.
   3744  */
   3745 void
   3746 cpu_flush_ecache(void)
   3747 {
   3748 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
   3749 	    cpunodes[CPU->cpu_id].ecache_linesize);
   3750 }
   3751 
   3752 /*
   3753  * Return CPU E$ set size - E$ size divided by the associativity.
   3754  * We use this function in places where the CPU_PRIVATE ptr may not be
   3755  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
   3756  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
   3757  * up before the kernel switches from OBP's to the kernel's trap table, so
   3758  * we don't have to worry about cpunodes being unitialized.
   3759  */
   3760 int
   3761 cpu_ecache_set_size(struct cpu *cp)
   3762 {
   3763 	if (CPU_PRIVATE(cp))
   3764 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
   3765 
   3766 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
   3767 }
   3768 
   3769 /*
   3770  * Flush Ecache line.
   3771  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
   3772  * Uses normal displacement flush for Cheetah.
   3773  */
   3774 static void
   3775 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
   3776 {
   3777 	struct async_flt *aflt = (struct async_flt *)ch_flt;
   3778 	int ec_set_size = cpu_ecache_set_size(CPU);
   3779 
   3780 	ecache_flush_line(aflt->flt_addr, ec_set_size);
   3781 }
   3782 
   3783 /*
   3784  * Scrub physical address.
   3785  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
   3786  * Ecache or direct-mapped Ecache.
   3787  */
   3788 static void
   3789 cpu_scrubphys(struct async_flt *aflt)
   3790 {
   3791 	int ec_set_size = cpu_ecache_set_size(CPU);
   3792 
   3793 	scrubphys(aflt->flt_addr, ec_set_size);
   3794 }
   3795 
   3796 /*
   3797  * Clear physical address.
   3798  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
   3799  * Ecache or direct-mapped Ecache.
   3800  */
   3801 void
   3802 cpu_clearphys(struct async_flt *aflt)
   3803 {
   3804 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
   3805 	int ec_set_size = cpu_ecache_set_size(CPU);
   3806 
   3807 
   3808 	clearphys(aflt->flt_addr, ec_set_size, lsize);
   3809 }
   3810 
   3811 #if defined(CPU_IMP_ECACHE_ASSOC)
   3812 /*
   3813  * Check for a matching valid line in all the sets.
   3814  * If found, return set# + 1. Otherwise return 0.
   3815  */
   3816 static int
   3817 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
   3818 {
   3819 	struct async_flt *aflt = (struct async_flt *)ch_flt;
   3820 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
   3821 	int ec_set_size = cpu_ecache_set_size(CPU);
   3822 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
   3823 	int nway = cpu_ecache_nway();
   3824 	int i;
   3825 
   3826 	for (i = 0; i < nway; i++, ecp++) {
   3827 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
   3828 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
   3829 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
   3830 			return (i+1);
   3831 	}
   3832 	return (0);
   3833 }
   3834 #endif /* CPU_IMP_ECACHE_ASSOC */
   3835 
   3836 /*
   3837  * Check whether a line in the given logout info matches the specified
   3838  * fault address.  If reqval is set then the line must not be Invalid.
   3839  * Returns 0 on failure;  on success (way + 1) is returned an *level is
   3840  * set to 2 for l2$ or 3 for l3$.
   3841  */
   3842 static int
   3843 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
   3844 {
   3845 	ch_diag_data_t *cdp = data;
   3846 	ch_ec_data_t *ecp;
   3847 	int totalsize, ec_set_size;
   3848 	int i, ways;
   3849 	int match = 0;
   3850 	int tagvalid;
   3851 	uint64_t addr, tagpa;
   3852 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
   3853 
   3854 	/*
   3855 	 * Check the l2$ logout data
   3856 	 */
   3857 	if (ispanther) {
   3858 		ecp = &cdp->chd_l2_data[0];
   3859 		ec_set_size = PN_L2_SET_SIZE;
   3860 		ways = PN_L2_NWAYS;
   3861 	} else {
   3862 		ecp = &cdp->chd_ec_data[0];
   3863 		ec_set_size = cpu_ecache_set_size(CPU);
   3864 		ways = cpu_ecache_nway();
   3865 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
   3866 	}
   3867 	/* remove low order PA bits from fault address not used in PA tag */
   3868 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
   3869 	for (i = 0; i < ways; i++, ecp++) {
   3870 		if (ispanther) {
   3871 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
   3872 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
   3873 		} else {
   3874 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
   3875 			tagvalid = !cpu_ectag_line_invalid(totalsize,
   3876 			    ecp->ec_tag);
   3877 		}
   3878 		if (tagpa == addr && (!reqval || tagvalid)) {
   3879 			match = i + 1;
   3880 			*level = 2;
   3881 			break;
   3882 		}
   3883 	}
   3884 
   3885 	if (match || !ispanther)
   3886 		return (match);
   3887 
   3888 	/* For Panther we also check the l3$ */
   3889 	ecp = &cdp->chd_ec_data[0];
   3890 	ec_set_size = PN_L3_SET_SIZE;
   3891 	ways = PN_L3_NWAYS;
   3892 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
   3893 
   3894 	for (i = 0; i < ways; i++, ecp++) {
   3895 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
   3896 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
   3897 			match = i + 1;
   3898 			*level = 3;
   3899 			break;
   3900 		}
   3901 	}
   3902 
   3903 	return (match);
   3904 }
   3905 
   3906 #if defined(CPU_IMP_L1_CACHE_PARITY)
   3907 /*
   3908  * Record information related to the source of an Dcache Parity Error.
   3909  */
   3910 static void
   3911 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
   3912 {
   3913 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
   3914 	int index;
   3915 
   3916 	/*
   3917 	 * Since instruction decode cannot be done at high PIL
   3918 	 * just examine the entire Dcache to locate the error.
   3919 	 */
   3920 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
   3921 		ch_flt->parity_data.dpe.cpl_way = -1;
   3922 		ch_flt->parity_data.dpe.cpl_off = -1;
   3923 	}
   3924 	for (index = 0; index < dc_set_size; index += dcache_linesize)
   3925 		cpu_dcache_parity_check(ch_flt, index);
   3926 }
   3927 
   3928 /*
   3929  * Check all ways of the Dcache at a specified index for good parity.
   3930  */
   3931 static void
   3932 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
   3933 {
   3934 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
   3935 	uint64_t parity_bits, pbits, data_word;
   3936 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
   3937 	int way, word, data_byte;
   3938 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
   3939 	ch_dc_data_t tmp_dcp;
   3940 
   3941 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
   3942 		/*
   3943 		 * Perform diagnostic read.
   3944 		 */
   3945 		get_dcache_dtag(index + way * dc_set_size,
   3946 		    (uint64_t *)&tmp_dcp);
   3947 
   3948 		/*
   3949 		 * Check tag for even parity.
   3950 		 * Sum of 1 bits (including parity bit) should be even.
   3951 		 */
   3952 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
   3953 			/*
   3954 			 * If this is the first error log detailed information
   3955 			 * about it and check the snoop tag. Otherwise just
   3956 			 * record the fact that we found another error.
   3957 			 */
   3958 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
   3959 				ch_flt->parity_data.dpe.cpl_way = way;
   3960 				ch_flt->parity_data.dpe.cpl_cache =
   3961 				    CPU_DC_PARITY;
   3962 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
   3963 
   3964 				if (popc64(tmp_dcp.dc_sntag &
   3965 				    CHP_DCSNTAG_PARMASK) & 1) {
   3966 					ch_flt->parity_data.dpe.cpl_tag |=
   3967 					    CHP_DC_SNTAG;
   3968 					ch_flt->parity_data.dpe.cpl_lcnt++;
   3969 				}
   3970 
   3971 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
   3972 			}
   3973 
   3974 			ch_flt->parity_data.dpe.cpl_lcnt++;
   3975 		}
   3976 
   3977 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
   3978 			/*
   3979 			 * Panther has more parity bits than the other
   3980 			 * processors for covering dcache data and so each
   3981 			 * byte of data in each word has its own parity bit.
   3982 			 */
   3983 			parity_bits = tmp_dcp.dc_pn_data_parity;
   3984 			for (word = 0; word < 4; word++) {
   3985 				data_word = tmp_dcp.dc_data[word];
   3986 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
   3987 				for (data_byte = 0; data_byte < 8;
   3988 				    data_byte++) {
   3989 					if (((popc64(data_word &
   3990 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
   3991 					    (pbits & 1)) {
   3992 						cpu_record_dc_data_parity(
   3993 						    ch_flt, dcp, &tmp_dcp, way,
   3994 						    word);
   3995 					}
   3996 					pbits >>= 1;
   3997 					data_word >>= 8;
   3998 				}
   3999 				parity_bits >>= 8;
   4000 			}
   4001 		} else {
   4002 			/*
   4003 			 * Check data array for even parity.
   4004 			 * The 8 parity bits are grouped into 4 pairs each
   4005 			 * of which covers a 64-bit word.  The endianness is
   4006 			 * reversed -- the low-order parity bits cover the
   4007 			 * high-order data words.
   4008 			 */
   4009 			parity_bits = tmp_dcp.dc_utag >> 8;
   4010 			for (word = 0; word < 4; word++) {
   4011 				pbits = (parity_bits >> (6 - word * 2)) & 3;
   4012 				if ((popc64(tmp_dcp.dc_data[word]) +
   4013 				    parity_bits_popc[pbits]) & 1) {
   4014 					cpu_record_dc_data_parity(ch_flt, dcp,
   4015 					    &tmp_dcp, way, word);
   4016 				}
   4017 			}
   4018 		}
   4019 	}
   4020 }
   4021 
   4022 static void
   4023 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
   4024     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
   4025 {
   4026 	/*
   4027 	 * If this is the first error log detailed information about it.
   4028 	 * Otherwise just record the fact that we found another error.
   4029 	 */
   4030 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
   4031 		ch_flt->parity_data.dpe.cpl_way = way;
   4032 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
   4033 		ch_flt->parity_data.dpe.cpl_off = word * 8;
   4034 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
   4035 	}
   4036 	ch_flt->parity_data.dpe.cpl_lcnt++;
   4037 }
   4038 
   4039 /*
   4040  * Record information related to the source of an Icache Parity Error.
   4041  *
   4042  * Called with the Icache disabled so any diagnostic accesses are safe.
   4043  */
   4044 static void
   4045 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
   4046 {
   4047 	int	ic_set_size;
   4048 	int	ic_linesize;
   4049 	int	index;
   4050 
   4051 	if (CPU_PRIVATE(CPU)) {
   4052 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
   4053 		    CH_ICACHE_NWAY;
   4054 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
   4055 	} else {
   4056 		ic_set_size = icache_size / CH_ICACHE_NWAY;
   4057 		ic_linesize = icache_linesize;
   4058 	}
   4059 
   4060 	ch_flt->parity_data.ipe.cpl_way = -1;
   4061 	ch_flt->parity_data.ipe.cpl_off = -1;
   4062 
   4063 	for (index = 0; index < ic_set_size; index += ic_linesize)
   4064 		cpu_icache_parity_check(ch_flt, index);
   4065 }
   4066 
   4067 /*
   4068  * Check all ways of the Icache at a specified index for good parity.
   4069  */
   4070 static void
   4071 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
   4072 {
   4073 	uint64_t parmask, pn_inst_parity;
   4074 	int ic_set_size;
   4075 	int ic_linesize;
   4076 	int flt_index, way, instr, num_instr;
   4077 	struct async_flt *aflt = (struct async_flt *)ch_flt;
   4078 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
   4079 	ch_ic_data_t tmp_icp;
   4080 
   4081 	if (CPU_PRIVATE(CPU)) {
   4082 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
   4083 		    CH_ICACHE_NWAY;
   4084 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
   4085 	} else {
   4086 		ic_set_size = icache_size / CH_ICACHE_NWAY;
   4087 		ic_linesize = icache_linesize;
   4088 	}
   4089 
   4090 	/*
   4091 	 * Panther has twice as many instructions per icache line and the
   4092 	 * instruction parity bit is in a different location.
   4093 	 */
   4094 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
   4095 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
   4096 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
   4097 	} else {
   4098 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
   4099 		pn_inst_parity = 0;
   4100 	}
   4101 
   4102 	/*
   4103 	 * Index at which we expect to find the parity error.
   4104 	 */
   4105 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
   4106 
   4107 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
   4108 		/*
   4109 		 * Diagnostic reads expect address argument in ASI format.
   4110 		 */
   4111 		get_icache_dtag(2 * (index + way * ic_set_size),
   4112 		    (uint64_t *)&tmp_icp);
   4113 
   4114 		/*
   4115 		 * If this is the index in which we expect to find the
   4116 		 * error log detailed information about each of the ways.
   4117 		 * This information will be displayed later if we can't
   4118 		 * determine the exact way in which the error is located.
   4119 		 */
   4120 		if (flt_index == index)
   4121 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
   4122 
   4123 		/*
   4124 		 * Check tag for even parity.
   4125 		 * Sum of 1 bits (including parity bit) should be even.
   4126 		 */
   4127 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
   4128 			/*
   4129 			 * If this way is the one in which we expected
   4130 			 * to find the error record the way and check the
   4131 			 * snoop tag. Otherwise just record the fact we
   4132 			 * found another error.
   4133 			 */
   4134 			if (flt_index == index) {
   4135 				ch_flt->parity_data.ipe.cpl_way = way;
   4136 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
   4137 
   4138 				if (popc64(tmp_icp.ic_sntag &
   4139 				    CHP_ICSNTAG_PARMASK) & 1) {
   4140 					ch_flt->parity_data.ipe.cpl_tag |=
   4141 					    CHP_IC_SNTAG;
   4142 					ch_flt->parity_data.ipe.cpl_lcnt++;
   4143 				}
   4144 
   4145 			}
   4146 			ch_flt->parity_data.ipe.cpl_lcnt++;
   4147 			continue;
   4148 		}
   4149 
   4150 		/*
   4151 		 * Check instruction data for even parity.
   4152 		 * Bits participating in parity differ for PC-relative
   4153 		 * versus non-PC-relative instructions.
   4154 		 */
   4155 		for (instr = 0; instr < num_instr; instr++) {
   4156 			parmask = (tmp_icp.ic_data[instr] &
   4157 			    CH_ICDATA_PRED_ISPCREL) ?
   4158 			    (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
   4159 			    (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
   4160 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
   4161 				/*
   4162 				 * If this way is the one in which we expected
   4163 				 * to find the error record the way and offset.
   4164 				 * Otherwise just log the fact we found another
   4165 				 * error.
   4166 				 */
   4167 				if (flt_index == index) {
   4168 					ch_flt->parity_data.ipe.cpl_way = way;
   4169 					ch_flt->parity_data.ipe.cpl_off =
   4170 					    instr * 4;
   4171 				}
   4172 				ch_flt->parity_data.ipe.cpl_lcnt++;
   4173 				continue;
   4174 			}
   4175 		}
   4176 	}
   4177 }
   4178 
   4179 /*
   4180  * Record information related to the source of an Pcache Parity Error.
   4181  */
   4182 static void
   4183 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
   4184 {
   4185 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
   4186 	int index;
   4187 
   4188 	/*
   4189 	 * Since instruction decode cannot be done at high PIL just
   4190 	 * examine the entire Pcache to check for any parity errors.
   4191 	 */
   4192 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
   4193 		ch_flt->parity_data.dpe.cpl_way = -1;
   4194 		ch_flt->parity_data.dpe.cpl_off = -1;
   4195 	}
   4196 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
   4197 		cpu_pcache_parity_check(ch_flt, index);
   4198 }
   4199 
   4200 /*
   4201  * Check all ways of the Pcache at a specified index for good parity.
   4202  */
   4203 static void
   4204 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
   4205 {
   4206 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
   4207 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
   4208 	int way, word, pbit, parity_bits;
   4209 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
   4210 	ch_pc_data_t tmp_pcp;
   4211 
   4212 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
   4213 		/*
   4214 		 * Perform diagnostic read.
   4215 		 */
   4216 		get_pcache_dtag(index + way * pc_set_size,
   4217 		    (uint64_t *)&tmp_pcp);
   4218 		/*
   4219 		 * Check data array for odd parity. There are 8 parity
   4220 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
   4221 		 * of those bits covers exactly 8 bytes of the data
   4222 		 * array:
   4223 		 *
   4224 		 *	parity bit	P$ data bytes covered
   4225 		 *	----------	---------------------
   4226 		 *	50		63:56
   4227 		 *	51		55:48
   4228 		 *	52		47:40
   4229 		 *	53		39:32
   4230 		 *	54		31:24
   4231 		 *	55		23:16
   4232 		 *	56		15:8
   4233 		 *	57		7:0
   4234 		 */
   4235 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
   4236 		for (word = 0; word < pc_data_words; word++) {
   4237 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
   4238 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
   4239 				/*
   4240 				 * If this is the first error log detailed
   4241 				 * information about it. Otherwise just record
   4242 				 * the fact that we found another error.
   4243 				 */
   4244 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
   4245 					ch_flt->parity_data.dpe.cpl_way = way;
   4246 					ch_flt->parity_data.dpe.cpl_cache =
   4247 					    CPU_PC_PARITY;
   4248 					ch_flt->parity_data.dpe.cpl_off =
   4249 					    word * sizeof (uint64_t);
   4250 					bcopy(&tmp_pcp, pcp,
   4251 					    sizeof (ch_pc_data_t));
   4252 				}
   4253 				ch_flt->parity_data.dpe.cpl_lcnt++;
   4254 			}
   4255 		}
   4256 	}
   4257 }
   4258 
   4259 
   4260 /*
   4261  * Add L1 Data cache data to the ereport payload.
   4262  */
   4263 static void
   4264 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
   4265 {
   4266 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
   4267 	ch_dc_data_t *dcp;
   4268 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
   4269 	uint_t nelem;
   4270 	int i, ways_to_check, ways_logged = 0;
   4271 
   4272 	/*
   4273 	 * If this is an D$ fault then there may be multiple
   4274 	 * ways captured in the ch_parity_log_t structure.
   4275 	 * Otherwise, there will be at most one way captured
   4276 	 * in the ch_diag_data_t struct.
   4277 	 * Check each way to see if it should be encoded.
   4278 	 */
   4279 	if (ch_flt->flt_type == CPU_DC_PARITY)
   4280 		ways_to_check = CH_DCACHE_NWAY;
   4281 	else
   4282 		ways_to_check = 1;
   4283 	for (i = 0; i < ways_to_check; i++) {
   4284 		if (ch_flt->flt_type == CPU_DC_PARITY)
   4285 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
   4286 		else
   4287 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
   4288 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
   4289 			bcopy(dcp, &dcdata[ways_logged],
   4290 			    sizeof (ch_dc_data_t));
   4291 			ways_logged++;
   4292 		}
   4293 	}
   4294 
   4295 	/*
   4296 	 * Add the dcache data to the payload.
   4297 	 */
   4298 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
   4299 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
   4300 	if (ways_logged != 0) {
   4301 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
   4302 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
   4303 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
   4304 	}
   4305 }
   4306 
   4307 /*
   4308  * Add L1 Instruction cache data to the ereport payload.
   4309  */
   4310 static void
   4311 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
   4312 {
   4313 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
   4314 	ch_ic_data_t *icp;
   4315 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
   4316 	uint_t nelem;
   4317 	int i, ways_to_check, ways_logged = 0;
   4318 
   4319 	/*
   4320 	 * If this is an I$ fault then there may be multiple
   4321 	 * ways captured in the ch_parity_log_t structure.
   4322 	 * Otherwise, there will be at most one way captured
   4323 	 * in the ch_diag_data_t struct.
   4324 	 * Check each way to see if it should be encoded.
   4325 	 */
   4326 	if (ch_flt->flt_type == CPU_IC_PARITY)
   4327 		ways_to_check = CH_ICACHE_NWAY;
   4328 	else
   4329 		ways_to_check = 1;
   4330 	for (i = 0; i < ways_to_check; i++) {
   4331 		if (ch_flt->flt_type == CPU_IC_PARITY)
   4332 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
   4333 		else
   4334 			icp = &ch_flt->flt_diag_data.chd_ic_data;
   4335 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
   4336 			bcopy(icp, &icdata[ways_logged],
   4337 			    sizeof (ch_ic_data_t));
   4338 			ways_logged++;
   4339 		}
   4340 	}
   4341 
   4342 	/*
   4343 	 * Add the icache data to the payload.
   4344 	 */
   4345 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
   4346 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
   4347 	if (ways_logged != 0) {
   4348 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
   4349 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
   4350 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
   4351 	}
   4352 }
   4353 
   4354 #endif	/* CPU_IMP_L1_CACHE_PARITY */
   4355 
   4356 /*
   4357  * Add ecache data to payload.
   4358  */
   4359 static void
   4360 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
   4361 {
   4362 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
   4363 	ch_ec_data_t *ecp;
   4364 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
   4365 	uint_t nelem;
   4366 	int i, ways_logged = 0;
   4367 
   4368 	/*
   4369 	 * Check each way to see if it should be encoded
   4370 	 * and concatinate it into a temporary buffer.
   4371 	 */
   4372 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
   4373 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
   4374 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
   4375 			bcopy(ecp, &ecdata[ways_logged],
   4376 			    sizeof (ch_ec_data_t));
   4377 			ways_logged++;
   4378 		}
   4379 	}
   4380 
   4381 	/*
   4382 	 * Panther CPUs have an additional level of cache and so
   4383 	 * what we just collected was the L3 (ecache) and not the
   4384 	 * L2 cache.
   4385 	 */
   4386 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
   4387 		/*
   4388 		 * Add the L3 (ecache) data to the payload.
   4389 		 */
   4390 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
   4391 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
   4392 		if (ways_logged != 0) {
   4393 			nelem = sizeof (ch_ec_data_t) /
   4394 			    sizeof (uint64_t) * ways_logged;
   4395 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
   4396 			    DATA_TYPE_UINT64_ARRAY, nelem,
   4397 			    (uint64_t *)ecdata, NULL);
   4398 		}
   4399 
   4400 		/*
   4401 		 * Now collect the L2 cache.
   4402 		 */
   4403 		ways_logged = 0;
   4404 		for (i = 0; i < PN_L2_NWAYS; i++) {
   4405 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
   4406 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
   4407 				bcopy(ecp, &ecdata[ways_logged],
   4408 				    sizeof (ch_ec_data_t));
   4409 				ways_logged++;
   4410 			}
   4411 		}
   4412 	}
   4413 
   4414 	/*
   4415 	 * Add the L2 cache data to the payload.
   4416 	 */
   4417 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
   4418 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
   4419 	if (ways_logged != 0) {
   4420 		nelem = sizeof (ch_ec_data_t) /
   4421 		    sizeof (uint64_t) * ways_logged;
   4422 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
   4423 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
   4424 	}
   4425 }
   4426 
   4427 /*
   4428  * Initialize cpu scheme for specified cpu.
   4429  */
   4430 static void
   4431 cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid)
   4432 {
   4433 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
   4434 	uint8_t mask;
   4435 
   4436 	mask = cpunodes[cpuid].version;
   4437 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
   4438 	    (u_longlong_t)cpunodes[cpuid].device_id);
   4439 	(void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL,
   4440 	    cpuid, &mask, (const char *)sbuf);
   4441 }
   4442 
   4443 /*
   4444  * Returns ereport resource type.
   4445  */
   4446 static int
   4447 cpu_error_to_resource_type(struct async_flt *aflt)
   4448 {
   4449 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
   4450 
   4451 	switch (ch_flt->flt_type) {
   4452 
   4453 	case CPU_CE_ECACHE:
   4454 	case CPU_UE_ECACHE:
   4455 	case CPU_UE_ECACHE_RETIRE:
   4456 	case CPU_ORPH:
   4457 		/*
   4458 		 * If AFSR error bit indicates L2$ Data for Cheetah,
   4459 		 * Cheetah+ or Jaguar, or L3$ Data for Panther, return
   4460 		 * E$ Data type, otherwise, return CPU type.
   4461 		 */
   4462 		if (cpu_error_is_ecache_data(aflt->flt_inst,
   4463 		    ch_flt->flt_bit))
   4464 			return (ERRTYPE_ECACHE_DATA);
   4465 		return (ERRTYPE_CPU);
   4466 
   4467 	case CPU_CE:
   4468 	case CPU_UE:
   4469 	case CPU_EMC:
   4470 	case CPU_DUE:
   4471 	case CPU_RCE:
   4472 	case CPU_RUE:
   4473 	case CPU_FRC:
   4474 	case CPU_FRU:
   4475 		return (ERRTYPE_MEMORY);
   4476 
   4477 	case CPU_IC_PARITY:
   4478 	case CPU_DC_PARITY:
   4479 	case CPU_FPUERR:
   4480 	case CPU_PC_PARITY:
   4481 	case CPU_ITLB_PARITY:
   4482 	case CPU_DTLB_PARITY:
   4483 		return (ERRTYPE_CPU);
   4484 	}
   4485 	return (ERRTYPE_UNKNOWN);
   4486 }
   4487 
   4488 /*
   4489  * Encode the data saved in the ch_async_flt_t struct into
   4490  * the FM ereport payload.
   4491  */
   4492 static void
   4493 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
   4494 	nvlist_t *resource, int *afar_status, int *synd_status)
   4495 {
   4496 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
   4497 	*synd_status = AFLT_STAT_INVALID;
   4498 	*afar_status = AFLT_STAT_INVALID;
   4499 
   4500 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
   4501 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
   4502 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
   4503 	}
   4504 
   4505 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
   4506 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
   4507 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
   4508 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
   4509 	}
   4510 
   4511 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
   4512 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
   4513 		    ch_flt->flt_bit);
   4514 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
   4515 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
   4516 	}
   4517 
   4518 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
   4519 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
   4520 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
   4521 	}
   4522 
   4523 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
   4524 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
   4525 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
   4526 	}
   4527 
   4528 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
   4529 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
   4530 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
   4531 	}
   4532 
   4533 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
   4534 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
   4535 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
   4536 	}
   4537 
   4538 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
   4539 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
   4540 		    DATA_TYPE_BOOLEAN_VALUE,
   4541 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
   4542 	}
   4543 
   4544 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
   4545 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
   4546 		    DATA_TYPE_BOOLEAN_VALUE,
   4547 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
   4548 	}
   4549 
   4550 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
   4551 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
   4552 		    ch_flt->afsr_errs, ch_flt->flt_bit);
   4553 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
   4554 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
   4555 	}
   4556 
   4557 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
   4558 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
   4559 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
   4560 	}
   4561 
   4562 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
   4563 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
   4564 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
   4565 	}
   4566 
   4567 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
   4568 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
   4569 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
   4570 	}
   4571 
   4572 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
   4573 		cpu_payload_add_ecache(aflt, payload);
   4574 
   4575 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
   4576 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
   4577 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
   4578 	}
   4579 
   4580 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
   4581 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
   4582 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
   4583 	}
   4584 
   4585 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
   4586 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
   4587 		    DATA_TYPE_UINT32_ARRAY, 16,
   4588 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
   4589 	}
   4590 
   4591 #if defined(CPU_IMP_L1_CACHE_PARITY)
   4592 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
   4593 		cpu_payload_add_dcache(aflt, payload);
   4594 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
   4595 		cpu_payload_add_icache(aflt, payload);
   4596 #endif	/* CPU_IMP_L1_CACHE_PARITY */
   4597 
   4598 #if defined(CHEETAH_PLUS)
   4599 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
   4600 		cpu_payload_add_pcache(aflt, payload);
   4601 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
   4602 		cpu_payload_add_tlb(aflt, payload);
   4603 #endif	/* CHEETAH_PLUS */
   4604 	/*
   4605 	 * Create the FMRI that goes into the payload
   4606 	 * and contains the unum info if necessary.
   4607 	 */
   4608 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) {
   4609 		char unum[UNUM_NAMLEN] = "";
   4610 		char sid[DIMM_SERIAL_ID_LEN] = "";
   4611 		int len, ret, rtype, synd_code;
   4612 		uint64_t offset = (uint64_t)-1;
   4613 
   4614 		rtype = cpu_error_to_resource_type(aflt);
   4615 		switch (rtype) {
   4616 
   4617 		case ERRTYPE_MEMORY:
   4618 		case ERRTYPE_ECACHE_DATA:
   4619 
   4620 			/*
   4621 			 * Memory errors, do unum lookup
   4622 			 */
   4623 			if (*afar_status == AFLT_STAT_INVALID)
   4624 				break;
   4625 
   4626 			if (rtype == ERRTYPE_ECACHE_DATA)
   4627 				aflt->flt_status |= ECC_ECACHE;
   4628 			else
   4629 				aflt->flt_status &= ~ECC_ECACHE;
   4630 
   4631 			synd_code = synd_to_synd_code(*synd_status,
   4632 			    aflt->flt_synd, ch_flt->flt_bit);
   4633 
   4634 			if (cpu_get_mem_unum_synd(synd_code, aflt, unum) != 0)
   4635 				break;
   4636 
   4637 			ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
   4638 			    &len);
   4639 
   4640 			if (ret == 0) {
   4641 				(void) cpu_get_mem_offset(aflt->flt_addr,
   4642 				    &offset);
   4643 			}
   4644 
   4645 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
   4646 			    NULL, unum, (ret == 0) ? sid : NULL, offset);
   4647 			fm_payload_set(payload,
   4648 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
   4649 			    DATA_TYPE_NVLIST, resource, NULL);
   4650 			break;
   4651 
   4652 		case ERRTYPE_CPU:
   4653 			/*
   4654 			 * On-board processor array error, add cpu resource.
   4655 			 */
   4656 			cpu_fmri_cpu_set(resource, aflt->flt_inst);
   4657 			fm_payload_set(payload,
   4658 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
   4659 			    DATA_TYPE_NVLIST, resource, NULL);
   4660 			break;
   4661 		}
   4662 	}
   4663 }
   4664 
   4665 /*
   4666  * Initialize the way info if necessary.
   4667  */
   4668 void
   4669 cpu_ereport_init(struct async_flt *aflt)
   4670 {
   4671 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
   4672 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
   4673 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
   4674 	int i;
   4675 
   4676 	/*
   4677 	 * Initialize the info in the CPU logout structure.
   4678 	 * The I$/D$ way information is not initialized here
   4679 	 * since it is captured in the logout assembly code.
   4680 	 */
   4681 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
   4682 		(ecp + i)->ec_way = i;
   4683 
   4684 	for (i = 0; i < PN_L2_NWAYS; i++)
   4685 		(l2p + i)->ec_way = i;
   4686 }
   4687 
   4688 /*
   4689  * Returns whether fault address is valid for this error bit and
   4690  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
   4691  */
   4692 int
   4693 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
   4694 {
   4695 	struct async_flt *aflt = (struct async_flt *)ch_flt;
   4696 
   4697 	return ((t_afsr_bit & C_AFSR_MEMORY) &&
   4698 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
   4699 	    AFLT_STAT_VALID &&
   4700 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
   4701 }
   4702 
   4703 /*
   4704  * Returns whether fault address is valid based on the error bit for the
   4705  * one event being queued and whether the address is "in memory".
   4706  */
   4707 static int
   4708 cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
   4709 {
   4710 	struct async_flt *aflt = (struct async_flt *)ch_flt;
   4711 	int afar_status;
   4712 	uint64_t afsr_errs, afsr_ow, *ow_bits;
   4713 
   4714 	if (!(t_afsr_bit & C_AFSR_MEMORY) ||
   4715 	    !pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
   4716 		return (0);
   4717 
   4718 	afsr_errs = ch_flt->afsr_errs;
   4719 	afar_status = afsr_to_afar_status(afsr_errs, t_afsr_bit);
   4720 
   4721 	switch (afar_status) {
   4722 	case AFLT_STAT_VALID:
   4723 		return (1);
   4724 
   4725 	case AFLT_STAT_AMBIGUOUS:
   4726 		/*
   4727 		 * Status is ambiguous since another error bit (or bits)
   4728 		 * of equal priority to the specified bit on in the afsr,
   4729 		 * so check those bits. Return 1 only if the bits on in the
   4730 		 * same class as the t_afsr_bit are also C_AFSR_MEMORY bits.
   4731 		 * Otherwise not all the equal priority bits are for memory
   4732 		 * errors, so return 0.
   4733 		 */
   4734 		ow_bits = afar_overwrite;
   4735 		while ((afsr_ow = *ow_bits++) != 0) {
   4736 			/*
   4737 			 * Get other bits that are on in t_afsr_bit's priority
   4738 			 * class to check for Memory Error bits only.
   4739 			 */
   4740 			if (afsr_ow & t_afsr_bit) {
   4741 				if ((afsr_errs & afsr_ow) & ~C_AFSR_MEMORY)
   4742 					return (0);
   4743 				else
   4744 					return (1);
   4745 			}
   4746 		}
   4747 		/*FALLTHRU*/
   4748 
   4749 	default:
   4750 		return (0);
   4751 	}
   4752 }
   4753 
   4754 static void
   4755 cpu_log_diag_info(ch_async_flt_t *ch_flt)
   4756 {
   4757 	struct async_flt *aflt = (struct async_flt *)ch_flt;
   4758 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
   4759 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
   4760 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
   4761 #if defined(CPU_IMP_ECACHE_ASSOC)
   4762 	int i, nway;
   4763 #endif /* CPU_IMP_ECACHE_ASSOC */
   4764 
   4765 	/*
   4766 	 * Check if the CPU log out captured was valid.
   4767 	 */
   4768 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
   4769 	    ch_flt->flt_data_incomplete)
   4770 		return;
   4771 
   4772 #if defined(CPU_IMP_ECACHE_ASSOC)
   4773 	nway = cpu_ecache_nway();
   4774 	i =  cpu_ecache_line_valid(ch_flt);
   4775 	if (i == 0 || i > nway) {
   4776 		for (i = 0; i < nway; i++)
   4777 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
   4778 	} else
   4779 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
   4780 #else /* CPU_IMP_ECACHE_ASSOC */
   4781 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
   4782 #endif /* CPU_IMP_ECACHE_ASSOC */
   4783 
   4784 #if defined(CHEETAH_PLUS)
   4785 	pn_cpu_log_diag_l2_info(ch_flt);
   4786 #endif /* CHEETAH_PLUS */
   4787 
   4788 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
   4789 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
   4790 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
   4791 	}
   4792 
   4793 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
   4794 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
   4795 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
   4796 		else
   4797 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
   4798 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
   4799 	}
   4800 }
   4801 
   4802 /*
   4803  * Cheetah ECC calculation.
   4804  *
   4805  * We only need to do the calculation on the data bits and can ignore check
   4806  * bit and Mtag bit terms in the calculation.
   4807  */
   4808 static uint64_t ch_ecc_table[9][2] = {
   4809 	/*
   4810 	 * low order 64-bits   high-order 64-bits
   4811 	 */
   4812 	{ 0x46bffffeccd1177f, 0x488800022100014c },
   4813 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
   4814 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
   4815 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
   4816 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
   4817 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
   4818 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
   4819 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
   4820 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
   4821 };
   4822 
   4823 /*
   4824  * 64-bit population count, use well-known popcnt trick.
   4825  * We could use the UltraSPARC V9 POPC instruction, but some
   4826  * CPUs including Cheetahplus and Jaguar do not support that
   4827  * instruction.
   4828  */
   4829 int
   4830 popc64(uint64_t val)
   4831 {
   4832 	int cnt;
   4833 
   4834 	for (cnt = 0; val != 0; val &= val - 1)
   4835 		cnt++;
   4836 	return (cnt);
   4837 }
   4838 
   4839 /*
   4840  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
   4841  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
   4842  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
   4843  * instead of doing all the xor's.
   4844  */
   4845 uint32_t
   4846 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
   4847 {
   4848 	int bitno, s;
   4849 	int synd = 0;
   4850 
   4851 	for (bitno = 0; bitno < 9; bitno++) {
   4852 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
   4853 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
   4854 		synd |= (s << bitno);
   4855 	}
   4856 	return (synd);
   4857 
   4858 }
   4859 
   4860 /*
   4861  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
   4862  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
   4863  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
   4864  */
   4865 static void
   4866 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
   4867     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
   4868 {
   4869 	struct async_flt *aflt = (struct async_flt *)ch_flt;
   4870 
   4871 	if (reason &&
   4872 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
   4873 		(void) strcat(reason, eccp->ec_reason);
   4874 	}
   4875 
   4876 	ch_flt->flt_bit = eccp->ec_afsr_bit;
   4877 	ch_flt->flt_type = eccp->ec_flt_type;
   4878 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
   4879 		ch_flt->flt_diag_data = *cdp;
   4880 	else
   4881 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
   4882 	aflt->flt_in_memory =
   4883 	    cpu_flt_in_memory_one_event(ch_flt, ch_flt->flt_bit);
   4884 
   4885 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
   4886 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
   4887 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
   4888 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
   4889 	else
   4890 		aflt->flt_synd = 0;
   4891 
   4892 	aflt->flt_payload = eccp->ec_err_payload;
   4893 
   4894 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
   4895 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
   4896 		cpu_errorq_dispatch(eccp->ec_err_class,
   4897 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
   4898 		    aflt->flt_panic);
   4899 	else
   4900 		cpu_errorq_dispatch(eccp->ec_err_class,
   4901 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
   4902 		    aflt->flt_panic);
   4903 }
   4904 
   4905 /*
   4906  * Queue events on async event queue one event per error bit.  First we
   4907  * queue the events that we "expect" for the given trap, then we queue events
   4908  * that we may not expect.  Return number of events queued.
   4909  */
   4910 int
   4911 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
   4912     ch_cpu_logout_t *clop)
   4913 {
   4914 	struct async_flt *aflt = (struct async_flt *)ch_flt;
   4915 	ecc_type_to_info_t *eccp;
   4916 	int nevents = 0;
   4917 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
   4918 #if defined(CHEETAH_PLUS)
   4919 	uint64_t orig_t_afsr_errs;
   4920 #endif
   4921 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
   4922 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
   4923 	ch_diag_data_t *cdp = NULL;
   4924 
   4925 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
   4926 
   4927 #if defined(CHEETAH_PLUS)
   4928 	orig_t_afsr_errs = t_afsr_errs;
   4929 
   4930 	/*
   4931 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
   4932 	 */
   4933 	if (clop != NULL) {
   4934 		/*
   4935 		 * Set the AFSR and AFAR fields to the shadow registers.  The
   4936 		 * flt_addr and flt_stat fields will be reset to the primaries
   4937 		 * below, but the sdw_addr and sdw_stat will stay as the
   4938 		 * secondaries.
   4939 		 */
   4940 		cdp = &clop->clo_sdw_data;
   4941 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
   4942 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
   4943 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
   4944 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
   4945 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
   4946 
   4947 		/*
   4948 		 * If the primary and shadow AFSR differ, tag the shadow as
   4949 		 * the first fault.
   4950 		 */
   4951 		if ((primary_afar != cdp->chd_afar) ||
   4952 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
   4953 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
   4954 		}
   4955 
   4956 		/*
   4957 		 * Check AFSR bits as well as AFSR_EXT bits in order of
   4958 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
   4959 		 * is expected to be zero for those CPUs which do not have
   4960 		 * an AFSR_EXT register.
   4961 		 */
   4962 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
   4963 			if ((eccp->ec_afsr_bit &
   4964 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
   4965 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
   4966 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
   4967 				cdp = NULL;
   4968 				t_afsr_errs &= ~eccp->ec_afsr_bit;
   4969 				nevents++;
   4970 			}
   4971 		}
   4972 
   4973 		/*
   4974 		 * If the ME bit is on in the primary AFSR turn all the
   4975 		 * error bits on again that may set the ME bit to make
   4976 		 * sure we see the ME AFSR error logs.
   4977 		 */
   4978 		if ((primary_afsr & C_AFSR_ME) != 0)
   4979 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
   4980 	}
   4981 #endif	/* CHEETAH_PLUS */
   4982 
   4983 	if (clop != NULL)
   4984 		cdp = &clop->clo_data;
   4985 
   4986 	/*
   4987 	 * Queue expected errors, error bit and fault type must match
   4988 	 * in the ecc_type_to_info table.
   4989 	 */
   4990 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
   4991 	    eccp++) {
   4992 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
   4993 		    (eccp->ec_flags & aflt->flt_status) != 0) {
   4994 #if defined(SERRANO)
   4995 			/*
   4996 			 * For FRC/FRU errors on Serrano the afar2 captures
   4997 			 * the address and the associated data is
   4998 			 * in the shadow logout area.
   4999 			 */
   5000 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
   5001 				if (clop != NULL)
   5002 					cdp = &clop->clo_sdw_data;
   5003 				aflt->flt_addr = ch_flt->afar2;
   5004 			} else {
   5005 				if (clop != NULL)
   5006 					cdp = &clop->clo_data;
   5007 				aflt->flt_addr = primary_afar;
   5008 			}
   5009 #else	/* SERRANO */
   5010 			aflt->flt_addr = primary_afar;
   5011 #endif	/* SERRANO */
   5012 			aflt->flt_stat = primary_afsr;
   5013 			ch_flt->afsr_ext = primary_afsr_ext;
   5014 			ch_flt->afsr_errs = primary_afsr_errs;
   5015 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
   5016 			cdp = NULL;
   5017 			t_afsr_errs &= ~eccp->ec_afsr_bit;
   5018 			nevents++;
   5019 		}
   5020 	}
   5021 
   5022 	/*
   5023 	 * Queue unexpected errors, error bit only match.
   5024 	 */
   5025 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
   5026 	    eccp++) {
   5027 		if (eccp->ec_afsr_bit & t_afsr_errs) {
   5028 #if defined(SERRANO)
   5029 			/*
   5030 			 * For FRC/FRU errors on Serrano the afar2 captures
   5031 			 * the address and the associated data is
   5032 			 * in the shadow logout area.
   5033 			 */
   5034 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
   5035 				if (clop != NULL)
   5036 					cdp = &clop->clo_sdw_data;
   5037 				aflt->flt_addr = ch_flt->afar2;
   5038 			} else {
   5039 				if (clop != NULL)
   5040 					cdp = &clop->clo_data;
   5041 				aflt->flt_addr = primary_afar;
   5042 			}
   5043 #else	/* SERRANO */
   5044 			aflt->flt_addr = primary_afar;
   5045 #endif	/* SERRANO */
   5046 			aflt->flt_stat = primary_afsr;
   5047 			ch_flt->afsr_ext = primary_afsr_ext;
   5048 			ch_flt->afsr_errs = primary_afsr_errs;
   5049 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
   5050 			cdp = NULL;
   5051 			t_afsr_errs &= ~eccp->ec_afsr_bit;
   5052 			nevents++;
   5053 		}
   5054 	}
   5055 	return (nevents);
   5056 }
   5057 
   5058 /*
   5059  * Return trap type number.
   5060  */
   5061 uint8_t
   5062 flt_to_trap_type(struct async_flt *aflt)
   5063 {
   5064 	if (aflt->flt_status & ECC_I_TRAP)
   5065 		return (TRAP_TYPE_ECC_I);
   5066 	if (aflt->flt_status & ECC_D_TRAP)
   5067 		return (TRAP_TYPE_ECC_D);
   5068 	if (aflt->flt_status & ECC_F_TRAP)
   5069 		return (TRAP_TYPE_ECC_F);
   5070 	if (aflt->flt_status & ECC_C_TRAP)
   5071 		return (TRAP_TYPE_ECC_C);
   5072 	if (aflt->flt_status & ECC_DP_TRAP)
   5073 		return (TRAP_TYPE_ECC_DP);
   5074 	if (aflt->flt_status & ECC_IP_TRAP)
   5075 		return (TRAP_TYPE_ECC_IP);
   5076 	if (aflt->flt_status & ECC_ITLB_TRAP)
   5077 		return (TRAP_TYPE_ECC_ITLB);
   5078 	if (aflt->flt_status & ECC_DTLB_TRAP)
   5079 		return (TRAP_TYPE_ECC_DTLB);
   5080 	return (TRAP_TYPE_UNKNOWN);
   5081 }
   5082 
   5083 /*
   5084  * Decide an error type based on detector and leaky/partner tests.
   5085  * The following array is used for quick translation - it must
   5086  * stay in sync with ce_dispact_t.
   5087  */
   5088 
   5089 static char *cetypes[] = {
   5090 	CE_DISP_DESC_U,
   5091 	CE_DISP_DESC_I,
   5092 	CE_DISP_DESC_PP,
   5093 	CE_DISP_DESC_P,
   5094 	CE_DISP_DESC_L,
   5095 	CE_DISP_DESC_PS,
   5096 	CE_DISP_DESC_S
   5097 };
   5098 
   5099 char *
   5100 flt_to_error_type(struct async_flt *aflt)
   5101 {
   5102 	ce_dispact_t dispact, disp;
   5103 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
   5104 
   5105 	/*
   5106 	 * The memory payload bundle is shared by some events that do
   5107 	 * not perform any classification.  For those flt_disp will be
   5108 	 * 0 and we will return "unknown".
   5109 	 */
   5110 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
   5111 		return (cetypes[CE_DISP_UNKNOWN]);
   5112 
   5113 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
   5114 
   5115 	/*
   5116 	 * It is also possible that no scrub/classification was performed
   5117 	 * by the detector, for instance where a disrupting error logged
   5118 	 * in the AFSR while CEEN was off in cpu_deferred_error.
   5119 	 */
   5120 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
   5121 		return (cetypes[CE_DISP_UNKNOWN]);
   5122 
   5123 	/*
   5124 	 * Lookup type in initial classification/action table
   5125 	 */
   5126 	dispact = CE_DISPACT(ce_disp_table,
   5127 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
   5128 	    CE_XDIAG_STATE(dtcrinfo),
   5129 	    CE_XDIAG_CE1SEEN(dtcrinfo),
   5130 	    CE_XDIAG_CE2SEEN(dtcrinfo));
   5131 
   5132 	/*
   5133 	 * A bad lookup is not something to panic production systems for.
   5134 	 */
   5135 	ASSERT(dispact != CE_DISP_BAD);
   5136 	if (dispact == CE_DISP_BAD)
   5137 		return (cetypes[CE_DISP_UNKNOWN]);
   5138 
   5139 	disp = CE_DISP(dispact);
   5140 
   5141 	switch (disp) {
   5142 	case CE_DISP_UNKNOWN:
   5143 	case CE_DISP_INTERMITTENT:
   5144 		break;
   5145 
   5146 	case CE_DISP_POSS_PERS:
   5147 		/*
   5148 		 * "Possible persistent" errors to which we have applied a valid
   5149 		 * leaky test can be separated into "persistent" or "leaky".
   5150 		 */
   5151 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
   5152 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
   5153 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
   5154 			    CE_XDIAG_CE2SEEN(lkyinfo))
   5155 				disp = CE_DISP_LEAKY;
   5156 			else
   5157 				disp = CE_DISP_PERS;
   5158 		}
   5159 		break;
   5160 
   5161 	case CE_DISP_POSS_STICKY:
   5162 		/*
   5163 		 * Promote "possible sticky" results that have been
   5164 		 * confirmed by a partner test to "sticky".  Unconfirmed
   5165 		 * "possible sticky" events are left at that status - we do not
   5166 		 * guess at any bad reader/writer etc status here.
   5167 		 */
   5168 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
   5169 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
   5170 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
   5171 			disp = CE_DISP_STICKY;
   5172 
   5173 		/*
   5174 		 * Promote "possible sticky" results on a uniprocessor
   5175 		 * to "sticky"
   5176 		 */
   5177 		if (disp == CE_DISP_POSS_STICKY &&
   5178 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
   5179 			disp = CE_DISP_STICKY;
   5180 		break;
   5181 
   5182 	default:
   5183 		disp = CE_DISP_UNKNOWN;
   5184 		break;
   5185 	}
   5186 
   5187 	return (cetypes[disp]);
   5188 }
   5189 
   5190 /*
   5191  * Given the entire afsr, the specific bit to check and a prioritized list of
   5192  * error bits, determine the validity of the various overwrite priority
   5193  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
   5194  * different overwrite priorities.
   5195  *
   5196  * Given a specific afsr error bit and the entire afsr, there are three cases:
   5197  *   INVALID:	The specified bit is lower overwrite priority than some other
   5198  *		error bit which is on in the afsr (or IVU/IVC).
   5199  *   VALID:	The specified bit is higher priority than all other error bits
   5200  *		which are on in the afsr.
   5201  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
   5202  *		bit is on in the afsr.
   5203  */
   5204 int
   5205 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
   5206 {
   5207 	uint64_t afsr_ow;
   5208 
   5209 	while ((afsr_ow = *ow_bits++) != 0) {
   5210 		/*
   5211 		 * If bit is in the priority class, check to see if another
   5212 		 * bit in the same class is on => ambiguous.  Otherwise,
   5213 		 * the value is valid.  If the bit is not on at this priority
   5214 		 * class, but a higher priority bit is on, then the value is
   5215 		 * invalid.
   5216 		 */
   5217 		if (afsr_ow & afsr_bit) {
   5218 			/*
   5219 			 * If equal pri bit is on, ambiguous.
   5220 			 */
   5221 			if (afsr & (afsr_ow & ~afsr_bit))
   5222 				return (AFLT_STAT_AMBIGUOUS);
   5223 			return (AFLT_STAT_VALID);
   5224 		} else if (afsr & afsr_ow)
   5225 			break;
   5226 	}
   5227 
   5228 	/*
   5229 	 * We didn't find a match or a higher priority bit was on.  Not
   5230 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
   5231 	 */
   5232 	return (AFLT_STAT_INVALID);
   5233 }
   5234 
   5235 static int
   5236 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
   5237 {
   5238 #if defined(SERRANO)
   5239 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
   5240 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
   5241 	else
   5242 #endif	/* SERRANO */
   5243 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
   5244 }
   5245 
   5246 static int
   5247 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
   5248 {
   5249 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
   5250 }
   5251 
   5252 static int
   5253 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
   5254 {
   5255 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
   5256 }
   5257 
   5258 static int
   5259 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
   5260 {
   5261 #ifdef lint
   5262 	cpuid = cpuid;
   5263 #endif
   5264 #if defined(CHEETAH_PLUS)
   5265 	/*
   5266 	 * The M_SYND overwrite policy is combined with the E_SYND overwrite
   5267 	 * policy for Cheetah+ and separate for Panther CPUs.
   5268 	 */
   5269 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
   5270 		if (IS_PANTHER(cpunodes[cpuid].implementation))
   5271 			return (afsr_to_msynd_status(afsr, afsr_bit));
   5272 		else
   5273 			return (afsr_to_esynd_status(afsr, afsr_bit));
   5274 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
   5275 		if (IS_PANTHER(cpunodes[cpuid].implementation))
   5276 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
   5277 		else
   5278 			return (afsr_to_esynd_status(afsr, afsr_bit));
   5279 #else /* CHEETAH_PLUS */
   5280 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
   5281 		return (afsr_to_msynd_status(afsr, afsr_bit));
   5282 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
   5283 		return (afsr_to_esynd_status(afsr, afsr_bit));
   5284 #endif /* CHEETAH_PLUS */
   5285 	} else {
   5286 		return (AFLT_STAT_INVALID);
   5287 	}
   5288 }
   5289 
   5290 /*
   5291  * Slave CPU stick synchronization.
   5292  */
   5293 void
   5294 sticksync_slave(void)
   5295 {
   5296 	int 		i;
   5297 	int		tries = 0;
   5298 	int64_t		tskew;
   5299 	int64_t		av_tskew;
   5300 
   5301 	kpreempt_disable();
   5302 	/* wait for the master side */
   5303 	while (stick_sync_cmd != SLAVE_START)
   5304 		;
   5305 	/*
   5306 	 * Synchronization should only take a few tries at most. But in the
   5307 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
   5308 	 * without it's stick synchronized wouldn't be a good citizen.
   5309 	 */
   5310 	while (slave_done == 0) {
   5311 		/*
   5312 		 * Time skew calculation.
   5313 		 */
   5314 		av_tskew = tskew = 0;
   5315 
   5316 		for (i = 0; i < stick_iter; i++) {
   5317 			/* make location hot */
   5318 			timestamp[EV_A_START] = 0;
   5319 			stick_timestamp(&timestamp[EV_A_START]);
   5320 
   5321 			/* tell the master we're ready */
   5322 			stick_sync_cmd = MASTER_START;
   5323 
   5324 			/* and wait */
   5325 			while (stick_sync_cmd != SLAVE_CONT)
   5326 				;
   5327 			/* Event B end */
   5328 			stick_timestamp(&timestamp[EV_B_END]);
   5329 
   5330 			/* calculate time skew */
   5331 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
   5332 			    - (timestamp[EV_A_END] - timestamp[EV_A_START]))
   5333 			    / 2;
   5334 
   5335 			/* keep running count */
   5336 			av_tskew += tskew;
   5337 		} /* for */
   5338 
   5339 		/*
   5340 		 * Adjust stick for time skew if not within the max allowed;
   5341 		 * otherwise we're all done.
   5342 		 */
   5343 		if (stick_iter != 0)
   5344 			av_tskew = av_tskew/stick_iter;
   5345 		if (ABS(av_tskew) > stick_tsk) {
   5346 			/*
   5347 			 * If the skew is 1 (the slave's STICK register
   5348 			 * is 1 STICK ahead of the master's), stick_adj
   5349 			 * could fail to adjust the slave's STICK register
   5350 			 * if the STICK read on the slave happens to
   5351 			 * align with the increment of the STICK.
   5352 			 * Therefore, we increment the skew to 2.
   5353 			 */
   5354 			if (av_tskew == 1)
   5355 				av_tskew++;
   5356 			stick_adj(-av_tskew);
   5357 		} else
   5358 			slave_done = 1;
   5359 #ifdef DEBUG
   5360 		if (tries < DSYNC_ATTEMPTS)
   5361 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
   5362 			    av_tskew;
   5363 		++tries;
   5364 #endif /* DEBUG */
   5365 #ifdef lint
   5366 		tries = tries;
   5367 #endif
   5368 
   5369 	} /* while */
   5370 
   5371 	/* allow the master to finish */
   5372 	stick_sync_cmd = EVENT_NULL;
   5373 	kpreempt_enable();
   5374 }
   5375 
   5376 /*
   5377  * Master CPU side of stick synchronization.
   5378  *  - timestamp end of Event A
   5379  *  - timestamp beginning of Event B
   5380  */
   5381 void
   5382 sticksync_master(void)
   5383 {
   5384 	int		i;
   5385 
   5386 	kpreempt_disable();
   5387 	/* tell the slave we've started */
   5388 	slave_done = 0;
   5389 	stick_sync_cmd = SLAVE_START;
   5390 
   5391 	while (slave_done == 0) {
   5392 		for (i = 0; i < stick_iter; i++) {
   5393 			/* wait for the slave */
   5394 			while (stick_sync_cmd != MASTER_START)
   5395 				;
   5396 			/* Event A end */
   5397 			stick_timestamp(&timestamp[EV_A_END]);
   5398 
   5399 			/* make location hot */
   5400 			timestamp[EV_B_START] = 0;
   5401 			stick_timestamp(&timestamp[EV_B_START]);
   5402 
   5403 			/* tell the slave to continue */
   5404 			stick_sync_cmd = SLAVE_CONT;
   5405 		} /* for */
   5406 
   5407 		/* wait while slave calculates time skew */
   5408 		while (stick_sync_cmd == SLAVE_CONT)
   5409 			;
   5410 	} /* while */
   5411 	kpreempt_enable();
   5412 }
   5413 
   5414 /*
   5415  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
   5416  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
   5417  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
   5418  * panic idle.
   5419  */
   5420 /*ARGSUSED*/
   5421 void
   5422 cpu_check_allcpus(struct async_flt *aflt)
   5423 {}
   5424 
   5425 struct kmem_cache *ch_private_cache;
   5426 
   5427 /*
   5428  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
   5429  * deallocate the scrubber data structures and cpu_private data structure.
   5430  */
   5431 void
   5432 cpu_uninit_private(struct cpu *cp)
   5433 {
   5434 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
   5435 
   5436 	ASSERT(chprp);
   5437 	cpu_uninit_ecache_scrub_dr(cp);
   5438 	CPU_PRIVATE(cp) = NULL;
   5439 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
   5440 	kmem_cache_free(ch_private_cache, chprp);
   5441 	cmp_delete_cpu(cp->cpu_id);
   5442 
   5443 }
   5444 
   5445 /*
   5446  * Cheetah Cache Scrubbing
   5447  *
   5448  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
   5449  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
   5450  * protected by either parity or ECC.
   5451  *
   5452  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
   5453  * cache per second). Due to the the specifics of how the I$ control
   5454  * logic works with respect to the ASI used to scrub I$ lines, the entire
   5455  * I$ is scanned at once.
   5456  */
   5457 
   5458 /*
   5459  * Tuneables to enable and disable the scrubbing of the caches, and to tune
   5460  * scrubbing behavior.  These may be changed via /etc/system or using mdb
   5461  * on a running system.
   5462  */
   5463 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
   5464 
   5465 /*
   5466  * The following are the PIL levels that the softints/cross traps will fire at.
   5467  */
   5468 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
   5469 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
   5470 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
   5471 
   5472 #if defined(JALAPENO)
   5473 
   5474 /*
   5475  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
   5476  * on Jalapeno.
   5477  */
   5478 int ecache_scrub_enable = 0;
   5479 
   5480 #else	/* JALAPENO */
   5481 
   5482 /*
   5483  * With all other cpu types, E$ scrubbing is on by default
   5484  */
   5485 int ecache_scrub_enable = 1;
   5486 
   5487 #endif	/* JALAPENO */
   5488 
   5489 
   5490 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
   5491 
   5492 /*
   5493  * The I$ scrubber tends to cause latency problems for real-time SW, so it
   5494  * is disabled by default on non-Cheetah systems
   5495  */
   5496 int icache_scrub_enable = 0;
   5497 
   5498 /*
   5499  * Tuneables specifying the scrub calls per second and the scan rate
   5500  * for each cache
   5501  *
   5502  * The cyclic times are set during boot based on the following values.
   5503  * Changing these values in mdb after this time will have no effect.  If
   5504  * a different value is desired, it must be set in /etc/system before a
   5505  * reboot.
   5506  */
   5507 int ecache_calls_a_sec = 1;
   5508 int dcache_calls_a_sec = 2;
   5509 int icache_calls_a_sec = 2;
   5510 
   5511 int ecache_scan_rate_idle = 1;
   5512 int ecache_scan_rate_busy = 1;
   5513 int dcache_scan_rate_idle = 1;
   5514 int dcache_scan_rate_busy = 1;
   5515 int icache_scan_rate_idle = 1;
   5516 int icache_scan_rate_busy = 1;
   5517 
   5518 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
   5519 
   5520 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
   5521 
   5522 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
   5523 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
   5524 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
   5525 
   5526 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
   5527 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
   5528 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
   5529 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
   5530 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
   5531 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
   5532 
   5533 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
   5534 
   5535 /*
   5536  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
   5537  * increment the outstanding request counter and schedule a softint to run
   5538  * the scrubber.
   5539  */
   5540 extern xcfunc_t cache_scrubreq_tl1;
   5541 
   5542 /*
   5543  * These are the softint functions for each cache scrubber
   5544  */
   5545 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
   5546 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
   5547 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
   5548 
   5549 /*
   5550  * The cache scrub info table contains cache specific information
   5551  * and allows for some of the scrub code to be table driven, reducing
   5552  * duplication of cache similar code.
   5553  *
   5554  * This table keeps a copy of the value in the calls per second variable
   5555  * (?cache_calls_a_sec).  This makes it much more difficult for someone
   5556  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
   5557  * mdb in a misguided attempt to disable the scrubber).
   5558  */
   5559 struct scrub_info {
   5560 	int		*csi_enable;	/* scrubber enable flag */
   5561 	int		csi_freq;	/* scrubber calls per second */
   5562 	int		csi_index;	/* index to chsm_outstanding[] */
   5563 	uint64_t	csi_inum;	/* scrubber interrupt number */
   5564 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
   5565 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
   5566 	char		csi_name[3];	/* cache name for this scrub entry */
   5567 } cache_scrub_info[] = {
   5568 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
   5569 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
   5570 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
   5571 };
   5572 
   5573 /*
   5574  * If scrubbing is enabled, increment the outstanding request counter.  If it
   5575  * is 1 (meaning there were no previous requests outstanding), call
   5576  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
   5577  * a self trap.
   5578  */
   5579 static void
   5580 do_scrub(struct scrub_info *csi)
   5581 {
   5582 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
   5583 	int index = csi->csi_index;
   5584 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
   5585 
   5586 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
   5587 		if (atomic_add_32_nv(outstanding, 1) == 1) {
   5588 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
   5589 			    csi->csi_inum, 0);
   5590 		}
   5591 	}
   5592 }
   5593 
   5594 /*
   5595  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
   5596  * cross-trap the offline cpus.
   5597  */
   5598 static void
   5599 do_scrub_offline(struct scrub_info *csi)
   5600 {
   5601 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
   5602 
   5603 	if (CPUSET_ISNULL(cpu_offline_set)) {
   5604 		/*
   5605 		 * No offline cpus - nothing to do
   5606 		 */
   5607 		return;
   5608 	}
   5609 
   5610 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
   5611 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
   5612 		    csi->csi_index);
   5613 	}
   5614 }
   5615 
   5616 /*
   5617  * This is the initial setup for the scrubber cyclics - it sets the
   5618  * interrupt level, frequency, and function to call.
   5619  */
   5620 /*ARGSUSED*/
   5621 static void
   5622 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
   5623     cyc_time_t *when)
   5624 {
   5625 	struct scrub_info *csi = (struct scrub_info *)arg;
   5626 
   5627 	ASSERT(csi != NULL);
   5628 	hdlr->cyh_func = (cyc_func_t)do_scrub;
   5629 	hdlr->cyh_level = CY_LOW_LEVEL;
   5630 	hdlr->cyh_arg = arg;
   5631 
   5632 	when->cyt_when = 0;	/* Start immediately */
   5633 	when->cyt_interval = NANOSEC / csi->csi_freq;
   5634 }
   5635 
   5636 /*
   5637  * Initialization for cache scrubbing.
   5638  * This routine is called AFTER all cpus have had cpu_init_private called
   5639  * to initialize their private data areas.
   5640  */
   5641 void
   5642 cpu_init_cache_scrub(void)
   5643 {
   5644 	int i;
   5645 	struct scrub_info *csi;
   5646 	cyc_omni_handler_t omni_hdlr;
   5647 	cyc_handler_t offline_hdlr;
   5648 	cyc_time_t when;
   5649 
   5650 	/*
   5651 	 * save away the maximum number of lines for the D$
   5652 	 */
   5653 	dcache_nlines = dcache_size / dcache_linesize;
   5654 
   5655 	/*
   5656 	 * register the softints for the cache scrubbing
   5657 	 */
   5658 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
   5659 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
   5660 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E], SOFTINT_MT);
   5661 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
   5662 
   5663 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
   5664 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
   5665 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D], SOFTINT_MT);
   5666 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
   5667 
   5668 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
   5669 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
   5670 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I], SOFTINT_MT);
   5671 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
   5672 
   5673 	/*
   5674 	 * start the scrubbing for all the caches
   5675 	 */
   5676 	mutex_enter(&cpu_lock);
   5677 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
   5678 
   5679 		csi = &cache_scrub_info[i];
   5680 
   5681 		if (!(*csi->csi_enable))
   5682 			continue;
   5683 
   5684 		/*
   5685 		 * force the following to be true:
   5686 		 *	1 <= calls_a_sec <= hz
   5687 		 */
   5688 		if (csi->csi_freq > hz) {
   5689 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
   5690 			    "(%d); resetting to hz (%d)", csi->csi_name,
   5691 			    csi->csi_freq, hz);
   5692 			csi->csi_freq = hz;
   5693 		} else if (csi->csi_freq < 1) {
   5694 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
   5695 			    "(%d); resetting to 1", csi->csi_name,
   5696 			    csi->csi_freq);
   5697 			csi->csi_freq = 1;
   5698 		}
   5699 
   5700 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
   5701 		omni_hdlr.cyo_offline = NULL;
   5702 		omni_hdlr.cyo_arg = (void *)csi;
   5703 
   5704 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
   5705 		offline_hdlr.cyh_arg = (void *)csi;
   5706 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
   5707 
   5708 		when.cyt_when = 0;	/* Start immediately */
   5709 		when.cyt_interval = NANOSEC / csi->csi_freq;
   5710 
   5711 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
   5712 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
   5713 	}
   5714 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
   5715 	mutex_exit(&cpu_lock);
   5716 }
   5717 
   5718 /*
   5719  * Indicate that the specified cpu is idle.
   5720  */
   5721 void
   5722 cpu_idle_ecache_scrub(struct cpu *cp)
   5723 {
   5724 	if (CPU_PRIVATE(cp) != NULL) {
   5725 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
   5726 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
   5727 	}
   5728 }
   5729 
   5730 /*
   5731  * Indicate that the specified cpu is busy.
   5732  */
   5733 void
   5734 cpu_busy_ecache_scrub(struct cpu *cp)
   5735 {
   5736 	if (CPU_PRIVATE(cp) != NULL) {
   5737 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
   5738 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
   5739 	}
   5740 }
   5741 
   5742 /*
   5743  * Initialization for cache scrubbing for the specified cpu.
   5744  */
   5745 void
   5746 cpu_init_ecache_scrub_dr(struct cpu *cp)
   5747 {
   5748 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
   5749 	int cpuid = cp->cpu_id;
   5750 
   5751 	/* initialize the number of lines in the caches */
   5752 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
   5753 	    cpunodes[cpuid].ecache_linesize;
   5754 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
   5755 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
   5756 
   5757 	/*
   5758 	 * do_scrub() and do_scrub_offline() check both the global
   5759 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
   5760 	 * check this value before scrubbing.  Currently, we use it to
   5761 	 * disable the E$ scrubber on multi-core cpus or while running at
   5762 	 * slowed speed.  For now, just turn everything on and allow
   5763 	 * cpu_init_private() to change it if necessary.
   5764 	 */
   5765 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
   5766 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
   5767 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
   5768 
   5769 	cpu_busy_ecache_scrub(cp);
   5770 }
   5771 
   5772 /*
   5773  * Un-initialization for cache scrubbing for the specified cpu.
   5774  */
   5775 static void
   5776 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
   5777 {
   5778 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
   5779 
   5780 	/*
   5781 	 * un-initialize bookkeeping for cache scrubbing
   5782 	 */
   5783 	bzero(csmp, sizeof (ch_scrub_misc_t));
   5784 
   5785 	cpu_idle_ecache_scrub(cp);
   5786 }
   5787 
   5788 /*
   5789  * Called periodically on each CPU to scrub the D$.
   5790  */
   5791 static void
   5792 scrub_dcache(int how_many)
   5793 {
   5794 	int i;
   5795 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
   5796 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
   5797 
   5798 	/*
   5799 	 * scrub the desired number of lines
   5800 	 */
   5801 	for (i = 0; i < how_many; i++) {
   5802 		/*
   5803 		 * scrub a D$ line
   5804 		 */
   5805 		dcache_inval_line(index);
   5806 
   5807 		/*
   5808 		 * calculate the next D$ line to scrub, assumes
   5809 		 * that dcache_nlines is a power of 2
   5810 		 */
   5811 		index = (index + 1) & (dcache_nlines - 1);
   5812 	}
   5813 
   5814 	/*
   5815 	 * set the scrub index for the next visit
   5816 	 */
   5817 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
   5818 }
   5819 
   5820 /*
   5821  * Handler for D$ scrub inum softint. Call scrub_dcache until
   5822  * we decrement the outstanding request count to zero.
   5823  */
   5824 /*ARGSUSED*/
   5825 static uint_t
   5826 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
   5827 {
   5828 	int i;
   5829 	int how_many;
   5830 	int outstanding;
   5831 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
   5832 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
   5833 	struct scrub_info *csi = (struct scrub_info *)arg1;
   5834 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
   5835 	    dcache_scan_rate_idle : dcache_scan_rate_busy;
   5836 
   5837 	/*
   5838 	 * The scan rates are expressed in units of tenths of a
   5839 	 * percent.  A scan rate of 1000 (100%) means the whole
   5840 	 * cache is scanned every second.
   5841 	 */
   5842 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
   5843 
   5844 	do {
   5845 		outstanding = *countp;
   5846 		for (i = 0; i < outstanding; i++) {
   5847 			scrub_dcache(how_many);
   5848 		}
   5849 	} while (atomic_add_32_nv(countp, -outstanding));
   5850 
   5851 	return (DDI_INTR_CLAIMED);
   5852 }
   5853 
   5854 /*
   5855  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
   5856  * by invalidating lines. Due to the characteristics of the ASI which
   5857  * is used to invalidate an I$ line, the entire I$ must be invalidated
   5858  * vs. an individual I$ line.
   5859  */
   5860 static void
   5861 scrub_icache(int how_many)
   5862 {
   5863 	int i;
   5864 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
   5865 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
   5866 	int icache_nlines = csmp->chsm_icache_nlines;
   5867 
   5868 	/*
   5869 	 * scrub the desired number of lines
   5870 	 */
   5871 	for (i = 0; i < how_many; i++) {
   5872 		/*
   5873 		 * since the entire I$ must be scrubbed at once,
   5874 		 * wait until the index wraps to zero to invalidate
   5875 		 * the entire I$
   5876 		 */
   5877 		if (index == 0) {
   5878 			icache_inval_all();
   5879 		}
   5880 
   5881 		/*
   5882 		 * calculate the next I$ line to scrub, assumes
   5883 		 * that chsm_icache_nlines is a power of 2
   5884 		 */
   5885 		index = (index + 1) & (icache_nlines - 1);
   5886 	}
   5887 
   5888 	/*
   5889 	 * set the scrub index for the next visit
   5890 	 */
   5891 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
   5892 }
   5893 
   5894 /*
   5895  * Handler for I$ scrub inum softint. Call scrub_icache until
   5896  * we decrement the outstanding request count to zero.
   5897  */
   5898 /*ARGSUSED*/
   5899 static uint_t
   5900 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
   5901 {
   5902 	int i;
   5903 	int how_many;
   5904 	int outstanding;
   5905 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
   5906 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
   5907 	struct scrub_info *csi = (struct scrub_info *)arg1;
   5908 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
   5909 	    icache_scan_rate_idle : icache_scan_rate_busy;
   5910 	int icache_nlines = csmp->chsm_icache_nlines;
   5911 
   5912 	/*
   5913 	 * The scan rates are expressed in units of tenths of a
   5914 	 * percent.  A scan rate of 1000 (100%) means the whole
   5915 	 * cache is scanned every second.
   5916 	 */
   5917 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
   5918 
   5919 	do {
   5920 		outstanding = *countp;
   5921 		for (i = 0; i < outstanding; i++) {
   5922 			scrub_icache(how_many);
   5923 		}
   5924 	} while (atomic_add_32_nv(countp, -outstanding));
   5925 
   5926 	return (DDI_INTR_CLAIMED);
   5927 }
   5928 
   5929 /*
   5930  * Called periodically on each CPU to scrub the E$.
   5931  */
   5932 static void
   5933 scrub_ecache(int how_many)
   5934 {
   5935 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
   5936 	int i;
   5937 	int cpuid = CPU->cpu_id;
   5938 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
   5939 	int nlines = csmp->chsm_ecache_nlines;
   5940 	int linesize = cpunodes[cpuid].ecache_linesize;
   5941 	int ec_set_size = cpu_ecache_set_size(CPU);
   5942 
   5943 	/*
   5944 	 * scrub the desired number of lines
   5945 	 */
   5946 	for (i = 0; i < how_many; i++) {
   5947 		/*
   5948 		 * scrub the E$ line
   5949 		 */
   5950 		ecache_flush_line(ecache_flushaddr + (index * linesize),
   5951 		    ec_set_size);
   5952 
   5953 		/*
   5954 		 * calculate the next E$ line to scrub based on twice
   5955 		 * the number of E$ lines (to displace lines containing
   5956 		 * flush area data), assumes that the number of lines
   5957 		 * is a power of 2
   5958 		 */
   5959 		index = (index + 1) & ((nlines << 1) - 1);
   5960 	}
   5961 
   5962 	/*
   5963 	 * set the ecache scrub index for the next visit
   5964 	 */
   5965 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
   5966 }
   5967 
   5968 /*
   5969  * Handler for E$ scrub inum softint. Call the E$ scrubber until
   5970  * we decrement the outstanding request count to zero.
   5971  *
   5972  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
   5973  * become negative after the atomic_add_32_nv().  This is not a problem, as
   5974  * the next trip around the loop won't scrub anything, and the next add will
   5975  * reset the count back to zero.
   5976  */
   5977 /*ARGSUSED*/
   5978 static uint_t
   5979 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
   5980 {
   5981 	int i;
   5982 	int how_many;
   5983 	int outstanding;
   5984 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
   5985 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
   5986 	struct scrub_info *csi = (struct scrub_info *)arg1;
   5987 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
   5988 	    ecache_scan_rate_idle : ecache_scan_rate_busy;
   5989 	int ecache_nlines = csmp->chsm_ecache_nlines;
   5990 
   5991 	/*
   5992 	 * The scan rates are expressed in units of tenths of a
   5993 	 * percent.  A scan rate of 1000 (100%) means the whole
   5994 	 * cache is scanned every second.
   5995 	 */
   5996 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
   5997 
   5998 	do {
   5999 		outstanding = *countp;
   6000 		for (i = 0; i < outstanding; i++) {
   6001 			scrub_ecache(how_many);
   6002 		}
   6003 	} while (atomic_add_32_nv(countp, -outstanding));
   6004 
   6005 	return (DDI_INTR_CLAIMED);
   6006 }
   6007 
   6008 /*
   6009  * Timeout function to reenable CE
   6010  */
   6011 static void
   6012 cpu_delayed_check_ce_errors(void *arg)
   6013 {
   6014 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
   6015 	    TQ_NOSLEEP)) {
   6016 		(void) timeout(cpu_delayed_check_ce_errors, arg,
   6017 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
   6018 	}
   6019 }
   6020 
   6021 /*
   6022  * CE Deferred Re-enable after trap.
   6023  *
   6024  * When the CPU gets a disrupting trap for any of the errors
   6025  * controlled by the CEEN bit, CEEN is disabled in the trap handler
   6026  * immediately. To eliminate the possibility of multiple CEs causing
   6027  * recursive stack overflow in the trap handler, we cannot
   6028  * reenable CEEN while still running in the trap handler. Instead,
   6029  * after a CE is logged on a CPU, we schedule a timeout function,
   6030  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
   6031  * seconds. This function will check whether any further CEs
   6032  * have occurred on that CPU, and if none have, will reenable CEEN.
   6033  *
   6034  * If further CEs have occurred while CEEN is disabled, another
   6035  * timeout will be scheduled. This is to ensure that the CPU can
   6036  * make progress in the face of CE 'storms', and that it does not
   6037  * spend all its time logging CE errors.
   6038  */
   6039 static void
   6040 cpu_check_ce_errors(void *arg)
   6041 {
   6042 	int	cpuid = (int)(uintptr_t)arg;
   6043 	cpu_t	*cp;
   6044 
   6045 	/*
   6046 	 * We acquire cpu_lock.
   6047 	 */
   6048 	ASSERT(curthread->t_pil == 0);
   6049 
   6050 	/*
   6051 	 * verify that the cpu is still around, DR
   6052 	 * could have got there first ...
   6053 	 */
   6054 	mutex_enter(&cpu_lock);
   6055 	cp = cpu_get(cpuid);
   6056 	if (cp == NULL) {
   6057 		mutex_exit(&cpu_lock);
   6058 		return;
   6059 	}
   6060 	/*
   6061 	 * make sure we don't migrate across CPUs
   6062 	 * while checking our CE status.
   6063 	 */
   6064 	kpreempt_disable();
   6065 
   6066 	/*
   6067 	 * If we are running on the CPU that got the
   6068 	 * CE, we can do the checks directly.
   6069 	 */
   6070 	if (cp->cpu_id == CPU->cpu_id) {
   6071 		mutex_exit(&cpu_lock);
   6072 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
   6073 		kpreempt_enable();
   6074 		return;
   6075 	}
   6076 	kpreempt_enable();
   6077 
   6078 	/*
   6079 	 * send an x-call to get the CPU that originally
   6080 	 * got the CE to do the necessary checks. If we can't
   6081 	 * send the x-call, reschedule the timeout, otherwise we
   6082 	 * lose CEEN forever on that CPU.
   6083 	 */
   6084 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
   6085 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
   6086 		    TIMEOUT_CEEN_CHECK, 0);
   6087 		mutex_exit(&cpu_lock);
   6088 	} else {
   6089 		/*
   6090 		 * When the CPU is not accepting xcalls, or
   6091 		 * the processor is offlined, we don't want to
   6092 		 * incur the extra overhead of trying to schedule the
   6093 		 * CE timeout indefinitely. However, we don't want to lose
   6094 		 * CE checking forever.
   6095 		 *
   6096 		 * Keep rescheduling the timeout, accepting the additional
   6097 		 * overhead as the cost of correctness in the case where we get
   6098 		 * a CE, disable CEEN, offline the CPU during the
   6099 		 * the timeout interval, and then online it at some
   6100 		 * point in the future. This is unlikely given the short
   6101 		 * cpu_ceen_delay_secs.
   6102 		 */
   6103 		mutex_exit(&cpu_lock);
   6104 		(void) timeout(cpu_delayed_check_ce_errors,
   6105 		    (void *)(uintptr_t)cp->cpu_id,
   6106 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
   6107 	}
   6108 }
   6109 
   6110 /*
   6111  * This routine will check whether CEs have occurred while
   6112  * CEEN is disabled. Any CEs detected will be logged and, if
   6113  * possible, scrubbed.
   6114  *
   6115  * The memscrubber will also use this routine to clear any errors
   6116  * caused by its scrubbing with CEEN disabled.
   6117  *
   6118  * flag == SCRUBBER_CEEN_CHECK
   6119  *		called from memscrubber, just check/scrub, no reset
   6120  *		paddr 	physical addr. for start of scrub pages
   6121  *		vaddr 	virtual addr. for scrub area
   6122  *		psz	page size of area to be scrubbed
   6123  *
   6124  * flag == TIMEOUT_CEEN_CHECK
   6125  *		timeout function has triggered, reset timeout or CEEN
   6126  *
   6127  * Note: We must not migrate cpus during this function.  This can be
   6128  * achieved by one of:
   6129  *    - invoking as target of an x-call in which case we're at XCALL_PIL
   6130  *	The flag value must be first xcall argument.
   6131  *    - disabling kernel preemption.  This should be done for very short
   6132  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
   6133  *	scrub an extended area with cpu_check_block.  The call for
   6134  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
   6135  *	brief for this case.
   6136  *    - binding to a cpu, eg with thread_affinity_set().  This is used
   6137  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
   6138  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
   6139  */
   6140 void
   6141 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
   6142 {
   6143 	ch_cpu_errors_t	cpu_error_regs;
   6144 	uint64_t	ec_err_enable;
   6145 	uint64_t	page_offset;
   6146 
   6147 	/* Read AFSR */
   6148 	get_cpu_error_state(&cpu_error_regs);
   6149 
   6150 	/*
   6151 	 * If no CEEN errors have occurred during the timeout
   6152 	 * interval, it is safe to re-enable CEEN and exit.
   6153 	 */
   6154 	if (((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) |
   6155 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_CECC_ERRS)) == 0) {
   6156 		if (flag == TIMEOUT_CEEN_CHECK &&
   6157 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
   6158 			set_error_enable(ec_err_enable | EN_REG_CEEN);
   6159 		return;
   6160 	}
   6161 
   6162 	/*
   6163 	 * Ensure that CEEN was not reenabled (maybe by DR) before
   6164 	 * we log/clear the error.
   6165 	 */
   6166 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
   6167 		set_error_enable(ec_err_enable & ~EN_REG_CEEN);
   6168 
   6169 	/*
   6170 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
   6171 	 * timeout will be rescheduled when the error is logged.
   6172 	 */
   6173 	if (!((cpu_error_regs.afsr & cpu_ce_not_deferred) |
   6174 	    (cpu_error_regs.afsr_ext & cpu_ce_not_deferred_ext)))
   6175 		cpu_ce_detected(&cpu_error_regs,
   6176 		    CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
   6177 	else
   6178 		cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
   6179 
   6180 	/*
   6181 	 * If the memory scrubber runs while CEEN is
   6182 	 * disabled, (or if CEEN is disabled during the
   6183 	 * scrub as a result of a CE being triggered by
   6184 	 * it), the range being scrubbed will not be
   6185 	 * completely cleaned. If there are multiple CEs
   6186 	 * in the range at most two of these will be dealt
   6187 	 * with, (one by the trap handler and one by the
   6188 	 * timeout). It is also possible that none are dealt
   6189 	 * with, (CEEN disabled and another CE occurs before
   6190 	 * the timeout triggers). So to ensure that the
   6191 	 * memory is actually scrubbed, we have to access each
   6192 	 * memory location in the range and then check whether
   6193 	 * that access causes a CE.
   6194 	 */
   6195 	if (flag == SCRUBBER_CEEN_CHECK && va) {
   6196 		if ((cpu_error_regs.afar >= pa) &&
   6197 		    (cpu_error_regs.afar < (pa + psz))) {
   6198 			/*
   6199 			 * Force a load from physical memory for each
   6200 			 * 64-byte block, then check AFSR to determine
   6201 			 * whether this access caused an error.
   6202 			 *
   6203 			 * This is a slow way to do a scrub, but as it will
   6204 			 * only be invoked when the memory scrubber actually
   6205 			 * triggered a CE, it should not happen too
   6206 			 * frequently.
   6207 			 *
   6208 			 * cut down what we need to check as the scrubber
   6209 			 * has verified up to AFAR, so get it's offset
   6210 			 * into the page and start there.
   6211 			 */
   6212 			page_offset = (uint64_t)(cpu_error_regs.afar &
   6213 			    (psz - 1));
   6214 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
   6215 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
   6216 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
   6217 			    psz);
   6218 		}
   6219 	}
   6220 
   6221 	/*
   6222 	 * Reset error enable if this CE is not masked.
   6223 	 */
   6224 	if ((flag == TIMEOUT_CEEN_CHECK) &&
   6225 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
   6226 		set_error_enable(ec_err_enable | EN_REG_CEEN);
   6227 
   6228 }
   6229 
   6230 /*
   6231  * Attempt a cpu logout for an error that we did not trap for, such
   6232  * as a CE noticed with CEEN off.  It is assumed that we are still running
   6233  * on the cpu that took the error and that we cannot migrate.  Returns
   6234  * 0 on success, otherwise nonzero.
   6235  */
   6236 static int
   6237 cpu_ce_delayed_ec_logout(uint64_t afar)
   6238 {
   6239 	ch_cpu_logout_t *clop;
   6240 
   6241 	if (CPU_PRIVATE(CPU) == NULL)
   6242 		return (0);
   6243 
   6244 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
   6245 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
   6246 	    LOGOUT_INVALID)
   6247 		return (0);
   6248 
   6249 	cpu_delayed_logout(afar, clop);
   6250 	return (1);
   6251 }
   6252 
   6253 /*
   6254  * We got an error while CEEN was disabled. We
   6255  * need to clean up after it and log whatever
   6256  * information we have on the CE.
   6257  */
   6258 void
   6259 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
   6260 {
   6261 	ch_async_flt_t 	ch_flt;
   6262 	struct async_flt *aflt;
   6263 	char 		pr_reason[MAX_REASON_STRING];
   6264 
   6265 	bzero(&ch_flt, sizeof (ch_async_flt_t));
   6266 	ch_flt.flt_trapped_ce = flag;
   6267 	aflt = (struct async_flt *)&ch_flt;
   6268 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
   6269 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
   6270 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
   6271 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
   6272 	aflt->flt_addr = cpu_error_regs->afar;
   6273 #if defined(SERRANO)
   6274 	ch_flt.afar2 = cpu_error_regs->afar2;
   6275 #endif	/* SERRANO */
   6276 	aflt->flt_pc = NULL;
   6277 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
   6278 	aflt->flt_tl = 0;
   6279 	aflt->flt_panic = 0;
   6280 	cpu_log_and_clear_ce(&ch_flt);
   6281 
   6282 	/*
   6283 	 * check if we caused any errors during cleanup
   6284 	 */
   6285 	if (clear_errors(&ch_flt)) {
   6286 		pr_reason[0] = '\0';
   6287 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
   6288 		    NULL);
   6289 	}
   6290 }
   6291 
   6292 /*
   6293  * Log/clear CEEN-controlled disrupting errors
   6294  */
   6295 static void
   6296 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
   6297 {
   6298 	struct async_flt *aflt;
   6299 	uint64_t afsr, afsr_errs;
   6300 	ch_cpu_logout_t *clop;
   6301 	char 		pr_reason[MAX_REASON_STRING];
   6302 	on_trap_data_t	*otp = curthread->t_ontrap;
   6303 
   6304 	aflt = (struct async_flt *)ch_flt;
   6305 	afsr = aflt->flt_stat;
   6306 	afsr_errs = ch_flt->afsr_errs;
   6307 	aflt->flt_id = gethrtime_waitfree();
   6308 	aflt->flt_bus_id = getprocessorid();
   6309 	aflt->flt_inst = CPU->cpu_id;
   6310 	aflt->flt_prot = AFLT_PROT_NONE;
   6311 	aflt->flt_class = CPU_FAULT;
   6312 	aflt->flt_status = ECC_C_TRAP;
   6313 
   6314 	pr_reason[0] = '\0';
   6315 	/*
   6316 	 * Get the CPU log out info for Disrupting Trap.
   6317 	 */
   6318 	if (CPU_PRIVATE(CPU) == NULL) {
   6319 		clop = NULL;
   6320 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
   6321 	} else {
   6322 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
   6323 	}
   6324 
   6325 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
   6326 		ch_cpu_errors_t cpu_error_regs;
   6327 
   6328 		get_cpu_error_state(&cpu_error_regs);
   6329 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
   6330 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
   6331 		clop->clo_data.chd_afar = cpu_error_regs.afar;
   6332 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
   6333 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
   6334 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
   6335 		clop->clo_sdw_data.chd_afsr_ext =
   6336 		    cpu_error_regs.shadow_afsr_ext;
   6337 #if defined(SERRANO)
   6338 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
   6339 #endif	/* SERRANO */
   6340 		ch_flt->flt_data_incomplete = 1;
   6341 
   6342 		/*
   6343 		 * The logging/clear code expects AFSR/AFAR to be cleared.
   6344 		 * The trap handler does it for CEEN enabled errors
   6345 		 * so we need to do it here.
   6346 		 */
   6347 		set_cpu_error_state(&cpu_error_regs);
   6348 	}
   6349 
   6350 #if defined(JALAPENO) || defined(SERRANO)
   6351 	/*
   6352 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
   6353 	 * For Serrano, even thou we do have the AFAR, we still do the
   6354 	 * scrub on the RCE side since that's where the error type can
   6355 	 * be properly classified as intermittent, persistent, etc.
   6356 	 *
   6357 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
   6358 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
   6359 	 * the flt_status bits.
   6360 	 */
   6361 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
   6362 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
   6363 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
   6364 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
   6365 	}
   6366 #else /* JALAPENO || SERRANO */
   6367 	/*
   6368 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
   6369 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
   6370 	 * the flt_status bits.
   6371 	 */
   6372 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
   6373 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
   6374 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
   6375 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
   6376 		}
   6377 	}
   6378 
   6379 #endif /* JALAPENO || SERRANO */
   6380 
   6381 	/*
   6382 	 * Update flt_prot if this error occurred under on_trap protection.
   6383 	 */
   6384 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
   6385 		aflt->flt_prot = AFLT_PROT_EC;
   6386 
   6387 	/*
   6388 	 * Queue events on the async event queue, one event per error bit.
   6389 	 */
   6390 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
   6391 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
   6392 		ch_flt->flt_type = CPU_INV_AFSR;
   6393 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
   6394 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
   6395 		    aflt->flt_panic);
   6396 	}
   6397 
   6398 	/*
   6399 	 * Zero out + invalidate CPU logout.
   6400 	 */
   6401 	if (clop) {
   6402 		bzero(clop, sizeof (ch_cpu_logout_t));
   6403 		clop->clo_data.chd_afar = LOGOUT_INVALID;
   6404 	}
   6405 
   6406 	/*
   6407 	 * If either a CPC, WDC or EDC error has occurred while CEEN
   6408 	 * was disabled, we need to flush either the entire
   6409 	 * E$ or an E$ line.
   6410 	 */
   6411 #if defined(JALAPENO) || defined(SERRANO)
   6412 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
   6413 #else	/* JALAPENO || SERRANO */
   6414 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
   6415 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
   6416 #endif	/* JALAPENO || SERRANO */
   6417 		cpu_error_ecache_flush(ch_flt);
   6418 
   6419 }
   6420 
   6421 /*
   6422  * depending on the error type, we determine whether we
   6423  * need to flush the entire ecache or just a line.
   6424  */
   6425 static int
   6426 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
   6427 {
   6428 	struct async_flt *aflt;
   6429 	uint64_t	afsr;
   6430 	uint64_t	afsr_errs = ch_flt->afsr_errs;
   6431 
   6432 	aflt = (struct async_flt *)ch_flt;
   6433 	afsr = aflt->flt_stat;
   6434 
   6435 	/*
   6436 	 * If we got multiple errors, no point in trying
   6437 	 * the individual cases, just flush the whole cache
   6438 	 */
   6439 	if (afsr & C_AFSR_ME) {
   6440 		return (ECACHE_FLUSH_ALL);
   6441 	}
   6442 
   6443 	/*
   6444 	 * If either a CPC, WDC or EDC error has occurred while CEEN
   6445 	 * was disabled, we need to flush entire E$. We can't just
   6446 	 * flush the cache line affected as the ME bit
   6447 	 * is not set when multiple correctable errors of the same
   6448 	 * type occur, so we might have multiple CPC or EDC errors,
   6449 	 * with only the first recorded.
   6450 	 */
   6451 #if defined(JALAPENO) || defined(SERRANO)
   6452 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
   6453 #else	/* JALAPENO || SERRANO */
   6454 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
   6455 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
   6456 #endif	/* JALAPENO || SERRANO */
   6457 		return (ECACHE_FLUSH_ALL);
   6458 	}
   6459 
   6460 #if defined(JALAPENO) || defined(SERRANO)
   6461 	/*
   6462 	 * If only UE or RUE is set, flush the Ecache line, otherwise
   6463 	 * flush the entire Ecache.
   6464 	 */
   6465 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
   6466 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
   6467 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
   6468 			return (ECACHE_FLUSH_LINE);
   6469 		} else {
   6470 			return (ECACHE_FLUSH_ALL);
   6471 		}
   6472 	}
   6473 #else /* JALAPENO || SERRANO */
   6474 	/*
   6475 	 * If UE only is set, flush the Ecache line, otherwise
   6476 	 * flush the entire Ecache.
   6477 	 */
   6478 	if (afsr_errs & C_AFSR_UE) {
   6479 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
   6480 		    C_AFSR_UE) {
   6481 			return (ECACHE_FLUSH_LINE);
   6482 		} else {
   6483 			return (ECACHE_FLUSH_ALL);
   6484 		}
   6485 	}
   6486 #endif /* JALAPENO || SERRANO */
   6487 
   6488 	/*
   6489 	 * EDU: If EDU only is set, flush the ecache line, otherwise
   6490 	 * flush the entire Ecache.
   6491 	 */
   6492 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
   6493 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
   6494 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
   6495 			return (ECACHE_FLUSH_LINE);
   6496 		} else {
   6497 			return (ECACHE_FLUSH_ALL);
   6498 		}
   6499 	}
   6500 
   6501 	/*
   6502 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
   6503 	 * flush the entire Ecache.
   6504 	 */
   6505 	if (afsr_errs & C_AFSR_BERR) {
   6506 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
   6507 			return (ECACHE_FLUSH_LINE);
   6508 		} else {
   6509 			return (ECACHE_FLUSH_ALL);
   6510 		}
   6511 	}
   6512 
   6513 	return (0);
   6514 }
   6515 
   6516 void
   6517 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
   6518 {
   6519 	int	ecache_flush_flag =
   6520 	    cpu_error_ecache_flush_required(ch_flt);
   6521 
   6522 	/*
   6523 	 * Flush Ecache line or entire Ecache based on above checks.
   6524 	 */
   6525 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
   6526 		cpu_flush_ecache();
   6527 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
   6528 		cpu_flush_ecache_line(ch_flt);
   6529 	}
   6530 
   6531 }
   6532 
   6533 /*
   6534  * Extract the PA portion from the E$ tag.
   6535  */
   6536 uint64_t
   6537 cpu_ectag_to_pa(int setsize, uint64_t tag)
   6538 {
   6539 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
   6540 		return (JG_ECTAG_TO_PA(setsize, tag));
   6541 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
   6542 		return (PN_L3TAG_TO_PA(tag));
   6543 	else
   6544 		return (CH_ECTAG_TO_PA(setsize, tag));
   6545 }
   6546 
   6547 /*
   6548  * Convert the E$ tag PA into an E$ subblock index.
   6549  */
   6550 int
   6551 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
   6552 {
   6553 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
   6554 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
   6555 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
   6556 		/* Panther has only one subblock per line */
   6557 		return (0);
   6558 	else
   6559 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
   6560 }
   6561 
   6562 /*
   6563  * All subblocks in an E$ line must be invalid for
   6564  * the line to be invalid.
   6565  */
   6566 int
   6567 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
   6568 {
   6569 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
   6570 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
   6571 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
   6572 		return (PN_L3_LINE_INVALID(tag));
   6573 	else
   6574 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
   6575 }
   6576 
   6577 /*
   6578  * Extract state bits for a subblock given the tag.  Note that for Panther
   6579  * this works on both l2 and l3 tags.
   6580  */
   6581 int
   6582 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
   6583 {
   6584 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
   6585 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
   6586 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
   6587 		return (tag & CH_ECSTATE_MASK);
   6588 	else
   6589 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
   6590 }
   6591 
   6592 /*
   6593  * Cpu specific initialization.
   6594  */
   6595 void
   6596 cpu_mp_init(void)
   6597 {
   6598 #ifdef	CHEETAHPLUS_ERRATUM_25
   6599 	if (cheetah_sendmondo_recover) {
   6600 		cheetah_nudge_init();
   6601 	}
   6602 #endif
   6603 }
   6604 
   6605 void
   6606 cpu_ereport_post(struct async_flt *aflt)
   6607 {
   6608 	char *cpu_type, buf[FM_MAX_CLASS];
   6609 	nv_alloc_t *nva = NULL;
   6610 	nvlist_t *ereport, *detector, *resource;
   6611 	errorq_elem_t *eqep;
   6612 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
   6613 	char unum[UNUM_NAMLEN];
   6614 	int synd_code;
   6615 	uint8_t msg_type;
   6616 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
   6617 
   6618 	if (aflt->flt_panic || panicstr) {
   6619 		eqep = errorq_reserve(ereport_errorq);
   6620 		if (eqep == NULL)
   6621 			return;
   6622 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
   6623 		nva = errorq_elem_nva(ereport_errorq, eqep);
   6624 	} else {
   6625 		ereport = fm_nvlist_create(nva);
   6626 	}
   6627 
   6628 	/*
   6629 	 * Create the scheme "cpu" FMRI.
   6630 	 */
   6631 	detector = fm_nvlist_create(nva);
   6632 	resource = fm_nvlist_create(nva);
   6633 	switch (cpunodes[aflt->flt_inst].implementation) {
   6634 	case CHEETAH_IMPL:
   6635 		cpu_type = FM_EREPORT_CPU_USIII;
   6636 		break;
   6637 	case CHEETAH_PLUS_IMPL:
   6638 		cpu_type = FM_EREPORT_CPU_USIIIplus;
   6639 		break;
   6640 	case JALAPENO_IMPL:
   6641 		cpu_type = FM_EREPORT_CPU_USIIIi;
   6642 		break;
   6643 	case SERRANO_IMPL:
   6644 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
   6645 		break;
   6646 	case JAGUAR_IMPL:
   6647 		cpu_type = FM_EREPORT_CPU_USIV;
   6648 		break;
   6649 	case PANTHER_IMPL:
   6650 		cpu_type = FM_EREPORT_CPU_USIVplus;
   6651 		break;
   6652 	default:
   6653 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
   6654 		break;
   6655 	}
   6656 
   6657 	cpu_fmri_cpu_set(detector, aflt->flt_inst);
   6658 
   6659 	/*
   6660 	 * Encode all the common data into the ereport.
   6661 	 */
   6662 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
   6663 	    FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
   6664 
   6665 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
   6666 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
   6667 	    detector, NULL);
   6668 
   6669 	/*
   6670 	 * Encode the error specific data that was saved in
   6671 	 * the async_flt structure into the ereport.
   6672 	 */
   6673 	cpu_payload_add_aflt(aflt, ereport, resource,
   6674 	    &plat_ecc_ch_flt.ecaf_afar_status,
   6675 	    &plat_ecc_ch_flt.ecaf_synd_status);
   6676 
   6677 	if (aflt->flt_panic || panicstr) {
   6678 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
   6679 	} else {
   6680 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
   6681 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
   6682 		fm_nvlist_destroy(detector, FM_NVA_FREE);
   6683 		fm_nvlist_destroy(resource, FM_NVA_FREE);
   6684 	}
   6685 	/*
   6686 	 * Send the enhanced error information (plat_ecc_error2_data_t)
   6687 	 * to the SC olny if it can process it.
   6688 	 */
   6689 
   6690 	if (&plat_ecc_capability_sc_get &&
   6691 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
   6692 		msg_type = cpu_flt_bit_to_plat_error(aflt);
   6693 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
   6694 			/*
   6695 			 * If afar status is not invalid do a unum lookup.
   6696 			 */
   6697 			if (plat_ecc_ch_flt.ecaf_afar_status !=
   6698 			    AFLT_STAT_INVALID) {
   6699 				synd_code = synd_to_synd_code(
   6700 				    plat_ecc_ch_flt.ecaf_synd_status,
   6701 				    aflt->flt_synd, ch_flt->flt_bit);
   6702 				(void) cpu_get_mem_unum_synd(synd_code,
   6703 				    aflt, unum);
   6704 			} else {
   6705 				unum[0] = '\0';
   6706 			}
   6707 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
   6708 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
   6709 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
   6710 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
   6711 			    ch_flt->flt_sdw_afsr_ext;
   6712 
   6713 			if (&plat_log_fruid_error2)
   6714 				plat_log_fruid_error2(msg_type, unum, aflt,
   6715 				    &plat_ecc_ch_flt);
   6716 		}
   6717 	}
   6718 }
   6719 
   6720 void
   6721 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
   6722 {
   6723 	int status;
   6724 	ddi_fm_error_t de;
   6725 
   6726 	bzero(&de, sizeof (ddi_fm_error_t));
   6727 
   6728 	de.fme_version = DDI_FME_VERSION;
   6729 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
   6730 	    FM_ENA_FMT1);
   6731 	de.fme_flag = expected;
   6732 	de.fme_bus_specific = (void *)aflt->flt_addr;
   6733 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
   6734 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
   6735 		aflt->flt_panic = 1;
   6736 }
   6737 
   6738 void
   6739 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
   6740     errorq_t *eqp, uint_t flag)
   6741 {
   6742 	struct async_flt *aflt = (struct async_flt *)payload;
   6743 
   6744 	aflt->flt_erpt_class = error_class;
   6745 	errorq_dispatch(eqp, payload, payload_sz, flag);
   6746 }
   6747 
   6748 /*
   6749  * This routine may be called by the IO module, but does not do
   6750  * anything in this cpu module. The SERD algorithm is handled by
   6751  * cpumem-diagnosis engine instead.
   6752  */
   6753 /*ARGSUSED*/
   6754 void
   6755 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
   6756 {}
   6757 
   6758 void
   6759 adjust_hw_copy_limits(int ecache_size)
   6760 {
   6761 	/*
   6762 	 * Set hw copy limits.
   6763 	 *
   6764 	 * /etc/system will be parsed later and can override one or more
   6765 	 * of these settings.
   6766 	 *
   6767 	 * At this time, ecache size seems only mildly relevant.
   6768 	 * We seem to run into issues with the d-cache and stalls
   6769 	 * we see on misses.
   6770 	 *
   6771 	 * Cycle measurement indicates that 2 byte aligned copies fare
   6772 	 * little better than doing things with VIS at around 512 bytes.
   6773 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
   6774 	 * aligned is faster whenever the source and destination data
   6775 	 * in cache and the total size is less than 2 Kbytes.  The 2K
   6776 	 * limit seems to be driven by the 2K write cache.
   6777 	 * When more than 2K of copies are done in non-VIS mode, stores
   6778 	 * backup in the write cache.  In VIS mode, the write cache is
   6779 	 * bypassed, allowing faster cache-line writes aligned on cache
   6780 	 * boundaries.
   6781 	 *
   6782 	 * In addition, in non-VIS mode, there is no prefetching, so
   6783 	 * for larger copies, the advantage of prefetching to avoid even
   6784 	 * occasional cache misses is enough to justify using the VIS code.
   6785 	 *
   6786 	 * During testing, it was discovered that netbench ran 3% slower
   6787 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
   6788 	 * applications, data is only used once (copied to the output
   6789 	 * buffer, then copied by the network device off the system).  Using
   6790 	 * the VIS copy saves more L2 cache state.  Network copies are
   6791 	 * around 1.3K to 1.5K in size for historical reasons.
   6792 	 *
   6793 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
   6794 	 * aligned copy even for large caches and 8 MB ecache.  The
   6795 	 * infrastructure to allow different limits for different sized
   6796 	 * caches is kept to allow further tuning in later releases.
   6797 	 */
   6798 
   6799 	if (min_ecache_size == 0 && use_hw_bcopy) {
   6800 		/*
   6801 		 * First time through - should be before /etc/system
   6802 		 * is read.
   6803 		 * Could skip the checks for zero but this lets us
   6804 		 * preserve any debugger rewrites.
   6805 		 */
   6806 		if (hw_copy_limit_1 == 0) {
   6807 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
   6808 			priv_hcl_1 = hw_copy_limit_1;
   6809 		}
   6810 		if (hw_copy_limit_2 == 0) {
   6811 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
   6812 			priv_hcl_2 = hw_copy_limit_2;
   6813 		}
   6814 		if (hw_copy_limit_4 == 0) {
   6815 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
   6816 			priv_hcl_4 = hw_copy_limit_4;
   6817 		}
   6818 		if (hw_copy_limit_8 == 0) {
   6819 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
   6820 			priv_hcl_8 = hw_copy_limit_8;
   6821 		}
   6822 		min_ecache_size = ecache_size;
   6823 	} else {
   6824 		/*
   6825 		 * MP initialization. Called *after* /etc/system has
   6826 		 * been parsed. One CPU has already been initialized.
   6827 		 * Need to cater for /etc/system having scragged one
   6828 		 * of our values.
   6829 		 */
   6830 		if (ecache_size == min_ecache_size) {
   6831 			/*
   6832 			 * Same size ecache. We do nothing unless we
   6833 			 * have a pessimistic ecache setting. In that
   6834 			 * case we become more optimistic (if the cache is
   6835 			 * large enough).
   6836 			 */
   6837 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
   6838 				/*
   6839 				 * Need to adjust hw_copy_limit* from our
   6840 				 * pessimistic uniprocessor value to a more
   6841 				 * optimistic UP value *iff* it hasn't been
   6842 				 * reset.
   6843 				 */
   6844 				if ((ecache_size > 1048576) &&
   6845 				    (priv_hcl_8 == hw_copy_limit_8)) {
   6846 					if (ecache_size <= 2097152)
   6847 						hw_copy_limit_8 = 4 *
   6848 						    VIS_COPY_THRESHOLD;
   6849 					else if (ecache_size <= 4194304)
   6850 						hw_copy_limit_8 = 4 *
   6851 						    VIS_COPY_THRESHOLD;
   6852 					else
   6853 						hw_copy_limit_8 = 4 *
   6854 						    VIS_COPY_THRESHOLD;
   6855 					priv_hcl_8 = hw_copy_limit_8;
   6856 				}
   6857 			}
   6858 		} else if (ecache_size < min_ecache_size) {
   6859 			/*
   6860 			 * A different ecache size. Can this even happen?
   6861 			 */
   6862 			if (priv_hcl_8 == hw_copy_limit_8) {
   6863 				/*
   6864 				 * The previous value that we set
   6865 				 * is unchanged (i.e., it hasn't been
   6866 				 * scragged by /etc/system). Rewrite it.
   6867 				 */
   6868 				if (ecache_size <= 1048576)
   6869 					hw_copy_limit_8 = 8 *
   6870 					    VIS_COPY_THRESHOLD;
   6871 				else if (ecache_size <= 2097152)
   6872 					hw_copy_limit_8 = 8 *
   6873 					    VIS_COPY_THRESHOLD;
   6874 				else if (ecache_size <= 4194304)
   6875 					hw_copy_limit_8 = 8 *
   6876 					    VIS_COPY_THRESHOLD;
   6877 				else
   6878 					hw_copy_limit_8 = 10 *
   6879 					    VIS_COPY_THRESHOLD;
   6880 				priv_hcl_8 = hw_copy_limit_8;
   6881 				min_ecache_size = ecache_size;
   6882 			}
   6883 		}
   6884 	}
   6885 }
   6886 
   6887 /*
   6888  * Called from illegal instruction trap handler to see if we can attribute
   6889  * the trap to a fpras check.
   6890  */
   6891 int
   6892 fpras_chktrap(struct regs *rp)
   6893 {
   6894 	int op;
   6895 	struct fpras_chkfngrp *cgp;
   6896 	uintptr_t tpc = (uintptr_t)rp->r_pc;
   6897 
   6898 	if (fpras_chkfngrps == NULL)
   6899 		return (0);
   6900 
   6901 	cgp = &fpras_chkfngrps[CPU->cpu_id];
   6902 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
   6903 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
   6904 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
   6905 			break;
   6906 	}
   6907 	if (op == FPRAS_NCOPYOPS)
   6908 		return (0);
   6909 
   6910 	/*
   6911 	 * This is an fpRAS failure caught through an illegal
   6912 	 * instruction - trampoline.
   6913 	 */
   6914 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
   6915 	rp->r_npc = rp->r_pc + 4;
   6916 	return (1);
   6917 }
   6918 
   6919 /*
   6920  * fpras_failure is called when a fpras check detects a bad calculation
   6921  * result or an illegal instruction trap is attributed to an fpras
   6922  * check.  In all cases we are still bound to CPU.
   6923  */
   6924 int
   6925 fpras_failure(int op, int how)
   6926 {
   6927 	int use_hw_bcopy_orig, use_hw_bzero_orig;
   6928 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
   6929 	ch_async_flt_t ch_flt;
   6930 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
   6931 	struct fpras_chkfn *sfp, *cfp;
   6932 	uint32_t *sip, *cip;
   6933 	int i;
   6934 
   6935 	/*
   6936 	 * We're running on a sick CPU.  Avoid further FPU use at least for
   6937 	 * the time in which we dispatch an ereport and (if applicable) panic.
   6938 	 */
   6939 	use_hw_bcopy_orig = use_hw_bcopy;
   6940 	use_hw_bzero_orig = use_hw_bzero;
   6941 	hcl1_orig = hw_copy_limit_1;
   6942 	hcl2_orig = hw_copy_limit_2;
   6943 	hcl4_orig = hw_copy_limit_4;
   6944 	hcl8_orig = hw_copy_limit_8;
   6945 	use_hw_bcopy = use_hw_bzero = 0;
   6946 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
   6947 	    hw_copy_limit_8 = 0;
   6948 
   6949 	bzero(&ch_flt, sizeof (ch_async_flt_t));
   6950 	aflt->flt_id = gethrtime_waitfree();
   6951 	aflt->flt_class = CPU_FAULT;
   6952 	aflt->flt_inst = CPU->cpu_id;
   6953 	aflt->flt_status = (how << 8) | op;
   6954 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
   6955 	ch_flt.flt_type = CPU_FPUERR;
   6956 
   6957 	/*
   6958 	 * We must panic if the copy operation had no lofault protection -
   6959 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
   6960 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
   6961 	 */
   6962 	aflt->flt_panic = (curthread->t_lofault == NULL);
   6963 
   6964 	/*
   6965 	 * XOR the source instruction block with the copied instruction
   6966 	 * block - this will show us which bit(s) are corrupted.
   6967 	 */
   6968 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
   6969 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
   6970 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
   6971 		sip = &sfp->fpras_blk0[0];
   6972 		cip = &cfp->fpras_blk0[0];
   6973 	} else {
   6974 		sip = &sfp->fpras_blk1[0];
   6975 		cip = &cfp->fpras_blk1[0];
   6976 	}
   6977 	for (i = 0; i < 16; ++i, ++sip, ++cip)
   6978 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
   6979 
   6980 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
   6981 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
   6982 
   6983 	if (aflt->flt_panic)
   6984 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
   6985 
   6986 	/*
   6987 	 * We get here for copyin/copyout and kcopy or bcopy where the
   6988 	 * caller has used on_fault.  We will flag the error so that
   6989 	 * the process may be killed  The trap_async_hwerr mechanism will
   6990 	 * take appropriate further action (such as a reboot, contract
   6991 	 * notification etc).  Since we may be continuing we will
   6992 	 * restore the global hardware copy acceleration switches.
   6993 	 *
   6994 	 * When we return from this function to the copy function we want to
   6995 	 * avoid potentially bad data being used, ie we want the affected
   6996 	 * copy function to return an error.  The caller should therefore
   6997 	 * invoke its lofault handler (which always exists for these functions)
   6998 	 * which will return the appropriate error.
   6999 	 */
   7000 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
   7001 	aston(curthread);
   7002 
   7003 	use_hw_bcopy = use_hw_bcopy_orig;
   7004 	use_hw_bzero = use_hw_bzero_orig;
   7005 	hw_copy_limit_1 = hcl1_orig;
   7006 	hw_copy_limit_2 = hcl2_orig;
   7007 	hw_copy_limit_4 = hcl4_orig;
   7008 	hw_copy_limit_8 = hcl8_orig;
   7009 
   7010 	return (1);
   7011 }
   7012 
   7013 #define	VIS_BLOCKSIZE		64
   7014 
   7015 int
   7016 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
   7017 {
   7018 	int ret, watched;
   7019 
   7020 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
   7021 	ret = dtrace_blksuword32(addr, data, 0);
   7022 	if (watched)
   7023 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
   7024 
   7025 	return (ret);
   7026 }
   7027 
   7028 /*
   7029  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
   7030  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
   7031  * CEEN from the EER to disable traps for further disrupting error types
   7032  * on that cpu.  We could cross-call instead, but that has a larger
   7033  * instruction and data footprint than cross-trapping, and the cpu is known
   7034  * to be faulted.
   7035  */
   7036 
   7037 void
   7038 cpu_faulted_enter(struct cpu *cp)
   7039 {
   7040 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
   7041 }
   7042 
   7043 /*
   7044  * Called when a cpu leaves the CPU_FAULTED state to return to one of
   7045  * offline, spare, or online (by the cpu requesting this state change).
   7046  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
   7047  * disrupting error bits that have accumulated without trapping, then
   7048  * we cross-trap to re-enable CEEN controlled traps.
   7049  */
   7050 void
   7051 cpu_faulted_exit(struct cpu *cp)
   7052 {
   7053 	ch_cpu_errors_t cpu_error_regs;
   7054 
   7055 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
   7056 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
   7057 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
   7058 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
   7059 	    (uint64_t)&cpu_error_regs, 0);
   7060 
   7061 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
   7062 }
   7063 
   7064 /*
   7065  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
   7066  * the errors in the original AFSR, 0 otherwise.
   7067  *
   7068  * For all procs if the initial error was a BERR or TO, then it is possible
   7069  * that we may have caused a secondary BERR or TO in the process of logging the
   7070  * inital error via cpu_run_bus_error_handlers().  If this is the case then
   7071  * if the request was protected then a panic is still not necessary, if not
   7072  * protected then aft_panic is already set - so either way there's no need
   7073  * to set aft_panic for the secondary error.
   7074  *
   7075  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
   7076  * a store merge, then the error handling code will call cpu_deferred_error().
   7077  * When clear_errors() is called, it will determine that secondary errors have
   7078  * occurred - in particular, the store merge also caused a EDU and WDU that
   7079  * weren't discovered until this point.
   7080  *
   7081  * We do three checks to verify that we are in this case.  If we pass all three
   7082  * checks, we return 1 to indicate that we should not panic.  If any unexpected
   7083  * errors occur, we return 0.
   7084  *
   7085  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
   7086  * handled in cpu_disrupting_errors().  Since this function is not even called
   7087  * in the case we are interested in, we just return 0 for these processors.
   7088  */
   7089 /*ARGSUSED*/
   7090 static int
   7091 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
   7092     uint64_t t_afar)
   7093 {
   7094 #if defined(CHEETAH_PLUS)
   7095 #else	/* CHEETAH_PLUS */
   7096 	struct async_flt *aflt = (struct async_flt *)ch_flt;
   7097 #endif	/* CHEETAH_PLUS */
   7098 
   7099 	/*
   7100 	 * Was the original error a BERR or TO and only a BERR or TO
   7101 	 * (multiple errors are also OK)
   7102 	 */
   7103 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
   7104 		/*
   7105 		 * Is the new error a BERR or TO and only a BERR or TO
   7106 		 * (multiple errors are also OK)
   7107 		 */
   7108 		if ((ch_flt->afsr_errs &
   7109 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
   7110 			return (1);
   7111 	}
   7112 
   7113 #if defined(CHEETAH_PLUS)
   7114 	return (0);
   7115 #else	/* CHEETAH_PLUS */
   7116 	/*
   7117 	 * Now look for secondary effects of a UE on cheetah/jalapeno
   7118 	 *
   7119 	 * Check the original error was a UE, and only a UE.  Note that
   7120 	 * the ME bit will cause us to fail this check.
   7121 	 */
   7122 	if (t_afsr_errs != C_AFSR_UE)
   7123 		return (0);
   7124 
   7125 	/*
   7126 	 * Check the secondary errors were exclusively an EDU and/or WDU.
   7127 	 */
   7128 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
   7129 		return (0);
   7130 
   7131 	/*
   7132 	 * Check the AFAR of the original error and secondary errors
   7133 	 * match to the 64-byte boundary
   7134 	 */
   7135 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
   7136 		return (0);
   7137 
   7138 	/*
   7139 	 * We've passed all the checks, so it's a secondary error!
   7140 	 */
   7141 	return (1);
   7142 #endif	/* CHEETAH_PLUS */
   7143 }
   7144 
   7145 /*
   7146  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
   7147  * is checked for any valid errors.  If found, the error type is
   7148  * returned. If not found, the flt_type is checked for L1$ parity errors.
   7149  */
   7150 /*ARGSUSED*/
   7151 static uint8_t
   7152 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
   7153 {
   7154 #if defined(JALAPENO)
   7155 	/*
   7156 	 * Currently, logging errors to the SC is not supported on Jalapeno
   7157 	 */
   7158 	return (PLAT_ECC_ERROR2_NONE);
   7159 #else
   7160 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
   7161 
   7162 	switch (ch_flt->flt_bit) {
   7163 	case C_AFSR_CE:
   7164 		return (PLAT_ECC_ERROR2_CE);
   7165 	case C_AFSR_UCC:
   7166 	case C_AFSR_EDC:
   7167 	case C_AFSR_WDC:
   7168 	case C_AFSR_CPC:
   7169 		return (PLAT_ECC_ERROR2_L2_CE);
   7170 	case C_AFSR_EMC:
   7171 		return (PLAT_ECC_ERROR2_EMC);
   7172 	case C_AFSR_IVC:
   7173 		return (PLAT_ECC_ERROR2_IVC);
   7174 	case C_AFSR_UE:
   7175 		return (PLAT_ECC_ERROR2_UE);
   7176 	case C_AFSR_UCU:
   7177 	case C_AFSR_EDU:
   7178 	case C_AFSR_WDU:
   7179 	case C_AFSR_CPU:
   7180 		return (PLAT_ECC_ERROR2_L2_UE);
   7181 	case C_AFSR_IVU:
   7182 		return (PLAT_ECC_ERROR2_IVU);
   7183 	case C_AFSR_TO:
   7184 		return (PLAT_ECC_ERROR2_TO);
   7185 	case C_AFSR_BERR:
   7186 		return (PLAT_ECC_ERROR2_BERR);
   7187 #if defined(CHEETAH_PLUS)
   7188 	case C_AFSR_L3_EDC:
   7189 	case C_AFSR_L3_UCC:
   7190 	case C_AFSR_L3_CPC:
   7191 	case C_AFSR_L3_WDC:
   7192 		return (PLAT_ECC_ERROR2_L3_CE);
   7193 	case C_AFSR_IMC:
   7194 		return (PLAT_ECC_ERROR2_IMC);
   7195 	case C_AFSR_TSCE:
   7196 		return (PLAT_ECC_ERROR2_L2_TSCE);
   7197 	case C_AFSR_THCE:
   7198 		return (PLAT_ECC_ERROR2_L2_THCE);
   7199 	case C_AFSR_L3_MECC:
   7200 		return (PLAT_ECC_ERROR2_L3_MECC);
   7201 	case C_AFSR_L3_THCE:
   7202 		return (PLAT_ECC_ERROR2_L3_THCE);
   7203 	case C_AFSR_L3_CPU:
   7204 	case C_AFSR_L3_EDU:
   7205 	case C_AFSR_L3_UCU:
   7206 	case C_AFSR_L3_WDU:
   7207 		return (PLAT_ECC_ERROR2_L3_UE);
   7208 	case C_AFSR_DUE:
   7209 		return (PLAT_ECC_ERROR2_DUE);
   7210 	case C_AFSR_DTO:
   7211 		return (PLAT_ECC_ERROR2_DTO);
   7212 	case C_AFSR_DBERR:
   7213 		return (PLAT_ECC_ERROR2_DBERR);
   7214 #endif	/* CHEETAH_PLUS */
   7215 	default:
   7216 		switch (ch_flt->flt_type) {
   7217 #if defined(CPU_IMP_L1_CACHE_PARITY)
   7218 		case CPU_IC_PARITY:
   7219 			return (PLAT_ECC_ERROR2_IPE);
   7220 		case CPU_DC_PARITY:
   7221 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
   7222 				if (ch_flt->parity_data.dpe.cpl_cache ==
   7223 				    CPU_PC_PARITY) {
   7224 					return (PLAT_ECC_ERROR2_PCACHE);
   7225 				}
   7226 			}
   7227 			return (PLAT_ECC_ERROR2_DPE);
   7228 #endif /* CPU_IMP_L1_CACHE_PARITY */
   7229 		case CPU_ITLB_PARITY:
   7230 			return (PLAT_ECC_ERROR2_ITLB);
   7231 		case CPU_DTLB_PARITY:
   7232 			return (PLAT_ECC_ERROR2_DTLB);
   7233 		default:
   7234 			return (PLAT_ECC_ERROR2_NONE);
   7235 		}
   7236 	}
   7237 #endif	/* JALAPENO */
   7238 }
   7239