Home | History | Annotate | Download | only in cpu
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/types.h>
     27 #include <sys/systm.h>
     28 #include <sys/archsystm.h>
     29 #include <sys/machparam.h>
     30 #include <sys/machsystm.h>
     31 #include <sys/cpu.h>
     32 #include <sys/elf_SPARC.h>
     33 #include <vm/hat_sfmmu.h>
     34 #include <vm/seg_kpm.h>
     35 #include <vm/page.h>
     36 #include <vm/vm_dep.h>
     37 #include <sys/cpuvar.h>
     38 #include <sys/spitregs.h>
     39 #include <sys/async.h>
     40 #include <sys/cmn_err.h>
     41 #include <sys/debug.h>
     42 #include <sys/dditypes.h>
     43 #include <sys/sunddi.h>
     44 #include <sys/cpu_module.h>
     45 #include <sys/prom_debug.h>
     46 #include <sys/vmsystm.h>
     47 #include <sys/prom_plat.h>
     48 #include <sys/sysmacros.h>
     49 #include <sys/intreg.h>
     50 #include <sys/machtrap.h>
     51 #include <sys/ontrap.h>
     52 #include <sys/ivintr.h>
     53 #include <sys/atomic.h>
     54 #include <sys/panic.h>
     55 #include <sys/ndifm.h>
     56 #include <sys/fm/protocol.h>
     57 #include <sys/fm/util.h>
     58 #include <sys/fm/cpu/UltraSPARC-II.h>
     59 #include <sys/ddi.h>
     60 #include <sys/ecc_kstat.h>
     61 #include <sys/watchpoint.h>
     62 #include <sys/dtrace.h>
     63 #include <sys/errclassify.h>
     64 
     65 uint_t	cpu_impl_dual_pgsz = 0;
     66 
     67 /*
     68  * Structure for the 8 byte ecache data dump and the associated AFSR state.
     69  * There will be 8 of these structures used to dump an ecache line (64 bytes).
     70  */
     71 typedef struct sf_ec_data_elm {
     72 	uint64_t ec_d8;
     73 	uint64_t ec_afsr;
     74 } ec_data_t;
     75 
     76 /*
     77  * Define spitfire (Ultra I/II) specific asynchronous error structure
     78  */
     79 typedef struct spitfire_async_flt {
     80 	struct async_flt cmn_asyncflt;	/* common - see sun4u/sys/async.h */
     81 	ushort_t flt_type;		/* types of faults - cpu specific */
     82 	ec_data_t flt_ec_data[8];	/* for E$ or mem dump/state */
     83 	uint64_t flt_ec_tag;		/* E$ tag info */
     84 	int flt_ec_lcnt;		/* number of bad E$ lines */
     85 	ushort_t flt_sdbh;		/* UDBH reg */
     86 	ushort_t flt_sdbl;		/* UDBL reg */
     87 } spitf_async_flt;
     88 
     89 /*
     90  * Prototypes for support routines in spitfire_asm.s:
     91  */
     92 extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize);
     93 extern uint64_t get_lsu(void);
     94 extern void set_lsu(uint64_t ncc);
     95 extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
     96 				uint64_t *oafsr, uint64_t *acc_afsr);
     97 extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr);
     98 extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr,
     99 				uint64_t *acc_afsr);
    100 extern uint64_t read_and_clear_afsr();
    101 extern void write_ec_tag_parity(uint32_t id);
    102 extern void write_hb_ec_tag_parity(uint32_t id);
    103 
    104 /*
    105  * Spitfire module routines:
    106  */
    107 static void cpu_async_log_err(void *flt);
    108 /*PRINTFLIKE6*/
    109 static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt,
    110     uint_t logflags, const char *endstr, const char *fmt, ...);
    111 
    112 static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err);
    113 static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum);
    114 static void cpu_log_ecmem_info(spitf_async_flt *spf_flt);
    115 
    116 static void log_ce_err(struct async_flt *aflt, char *unum);
    117 static void log_ue_err(struct async_flt *aflt, char *unum);
    118 static void check_misc_err(spitf_async_flt *spf_flt);
    119 static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes);
    120 static int check_ecc(struct async_flt *aflt);
    121 static uint_t get_cpu_status(uint64_t arg);
    122 static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr);
    123 static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag,
    124 		int *m, uint64_t *afsr);
    125 static void ecache_kstat_init(struct cpu *cp);
    126 static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag,
    127 		uint64_t paddr, int mpb, uint64_t);
    128 static uint64_t ecache_scrub_misc_err(int, uint64_t);
    129 static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t);
    130 static void ecache_page_retire(void *);
    131 static int ecc_kstat_update(kstat_t *ksp, int rw);
    132 static int ce_count_unum(int status, int len, char *unum);
    133 static void add_leaky_bucket_timeout(void);
    134 static int synd_to_synd_code(int synd_status, ushort_t synd);
    135 
    136 extern uint_t read_all_memscrub;
    137 extern void memscrub_run(void);
    138 
    139 static uchar_t	isus2i;			/* set if sabre */
    140 static uchar_t	isus2e;			/* set if hummingbird */
    141 
    142 /*
    143  * Default ecache mask and shift settings for Spitfire.  If we detect a
    144  * different CPU implementation, we will modify these values at boot time.
    145  */
    146 static uint64_t cpu_ec_tag_mask		= S_ECTAG_MASK;
    147 static uint64_t cpu_ec_state_mask	= S_ECSTATE_MASK;
    148 static uint64_t cpu_ec_par_mask		= S_ECPAR_MASK;
    149 static int cpu_ec_par_shift		= S_ECPAR_SHIFT;
    150 static int cpu_ec_tag_shift		= S_ECTAG_SHIFT;
    151 static int cpu_ec_state_shift		= S_ECSTATE_SHIFT;
    152 static uchar_t cpu_ec_state_exl		= S_ECSTATE_EXL;
    153 static uchar_t cpu_ec_state_mod		= S_ECSTATE_MOD;
    154 static uchar_t cpu_ec_state_shr		= S_ECSTATE_SHR;
    155 static uchar_t cpu_ec_state_own		= S_ECSTATE_OWN;
    156 
    157 /*
    158  * Default ecache state bits for Spitfire.  These individual bits indicate if
    159  * the given line is in any of the valid or modified states, respectively.
    160  * Again, we modify these at boot if we detect a different CPU.
    161  */
    162 static uchar_t cpu_ec_state_valid	= S_ECSTATE_VALID;
    163 static uchar_t cpu_ec_state_dirty	= S_ECSTATE_DIRTY;
    164 static uchar_t cpu_ec_parity		= S_EC_PARITY;
    165 static uchar_t cpu_ec_state_parity	= S_ECSTATE_PARITY;
    166 
    167 /*
    168  * This table is used to determine which bit(s) is(are) bad when an ECC
    169  * error occurrs.  The array is indexed an 8-bit syndrome.  The entries
    170  * of this array have the following semantics:
    171  *
    172  *      00-63   The number of the bad bit, when only one bit is bad.
    173  *      64      ECC bit C0 is bad.
    174  *      65      ECC bit C1 is bad.
    175  *      66      ECC bit C2 is bad.
    176  *      67      ECC bit C3 is bad.
    177  *      68      ECC bit C4 is bad.
    178  *      69      ECC bit C5 is bad.
    179  *      70      ECC bit C6 is bad.
    180  *      71      ECC bit C7 is bad.
    181  *      72      Two bits are bad.
    182  *      73      Three bits are bad.
    183  *      74      Four bits are bad.
    184  *      75      More than Four bits are bad.
    185  *      76      NO bits are bad.
    186  * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28.
    187  */
    188 
    189 #define	C0	64
    190 #define	C1	65
    191 #define	C2	66
    192 #define	C3	67
    193 #define	C4	68
    194 #define	C5	69
    195 #define	C6	70
    196 #define	C7	71
    197 #define	M2	72
    198 #define	M3	73
    199 #define	M4	74
    200 #define	MX	75
    201 #define	NA	76
    202 
    203 #define	SYND_IS_SINGLE_BIT_DATA(synd_code)	((synd_code >= 0) && \
    204 						    (synd_code < C0))
    205 #define	SYND_IS_SINGLE_BIT_CHK(synd_code)	((synd_code >= C0) && \
    206 						    (synd_code <= C7))
    207 
    208 static char ecc_syndrome_tab[] =
    209 {
    210 	NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4,
    211 	C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44,
    212 	C5, M2, M2, 33, M2, 61,  4, M2, M2, MX, 53, M2, 45, M2, M2, 41,
    213 	M2,  0,  1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2,
    214 	C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46,
    215 	M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2,
    216 	M2, MX, 36, M2,  7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2,
    217 	M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX,
    218 	C7, M2, M2, 47, M2, 63, MX, M2, M2,  6, 55, M2, 35, M2, M2, 43,
    219 	M2,  5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2,
    220 	M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2,
    221 	M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX,
    222 	M2,  8, 13, M2,  2, M2, M2, M3,  3, M2, M2, M3, M2, MX, MX, M2,
    223 	M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX,
    224 	M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX,
    225 	M4, 12,  9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4
    226 };
    227 
    228 #define	SYND_TBL_SIZE 256
    229 
    230 /*
    231  * Hack for determining UDBH/UDBL, for later cpu-specific error reporting.
    232  * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird.
    233  */
    234 #define	UDBL_REG	0x8000
    235 #define	UDBL(synd)	((synd & UDBL_REG) >> 15)
    236 #define	SYND(synd)	(synd & 0x7FFF)
    237 
    238 /*
    239  * These error types are specific to Spitfire and are used internally for the
    240  * spitfire fault structure flt_type field.
    241  */
    242 #define	CPU_UE_ERR		0	/* uncorrectable errors - UEs */
    243 #define	CPU_EDP_LDP_ERR		1	/* LDP or EDP parity error */
    244 #define	CPU_WP_ERR		2	/* WP parity error */
    245 #define	CPU_BTO_BERR_ERR	3	/* bus timeout errors */
    246 #define	CPU_PANIC_CP_ERR	4	/* cp error from panic polling */
    247 #define	CPU_TRAPPING_CP_ERR	5	/* for sabre/hbird only, cp error */
    248 #define	CPU_BADLINE_CI_ERR	6	/* E$ clean_bad line when idle */
    249 #define	CPU_BADLINE_CB_ERR	7	/* E$ clean_bad line when busy */
    250 #define	CPU_BADLINE_DI_ERR	8	/* E$ dirty_bad line when idle */
    251 #define	CPU_BADLINE_DB_ERR	9	/* E$ dirty_bad line when busy */
    252 #define	CPU_ORPHAN_CP_ERR	10	/* Orphan CP error */
    253 #define	CPU_ECACHE_ADDR_PAR_ERR	11	/* Ecache Address parity error */
    254 #define	CPU_ECACHE_STATE_ERR	12	/* Ecache state error */
    255 #define	CPU_ECACHE_ETP_ETS_ERR	13	/* ETP set but ETS is zero */
    256 #define	CPU_ECACHE_TAG_ERR	14	/* Scrub the E$ tag, if state clean */
    257 #define	CPU_ADDITIONAL_ERR	15	/* Additional errors occurred */
    258 
    259 /*
    260  * Macro to access the "Spitfire cpu private" data structure.
    261  */
    262 #define	CPU_PRIVATE_PTR(cp, x)	(&(((spitfire_private_t *)CPU_PRIVATE(cp))->x))
    263 
    264 /*
    265  * set to 0 to disable automatic retiring of pages on
    266  * DIMMs that have excessive soft errors
    267  */
    268 int automatic_page_removal = 1;
    269 
    270 /*
    271  * Heuristic for figuring out which module to replace.
    272  * Relative likelihood that this P_SYND indicates that this module is bad.
    273  * We call it a "score", though, not a relative likelihood.
    274  *
    275  * Step 1.
    276  * Assign a score to each byte of P_SYND according to the following rules:
    277  * If no bits on (0x00) or all bits on (0xFF), then give it a 5.
    278  * If one bit on, give it a 95.
    279  * If seven bits on, give it a 10.
    280  * If two bits on:
    281  *   in different nybbles, a 90
    282  *   in same nybble, but unaligned, 85
    283  *   in same nybble and as an aligned pair, 80
    284  * If six bits on, look at the bits that are off:
    285  *   in same nybble and as an aligned pair, 15
    286  *   in same nybble, but unaligned, 20
    287  *   in different nybbles, a 25
    288  * If three bits on:
    289  *   in diferent nybbles, no aligned pairs, 75
    290  *   in diferent nybbles, one aligned pair, 70
    291  *   in the same nybble, 65
    292  * If five bits on, look at the bits that are off:
    293  *   in the same nybble, 30
    294  *   in diferent nybbles, one aligned pair, 35
    295  *   in diferent nybbles, no aligned pairs, 40
    296  * If four bits on:
    297  *   all in one nybble, 45
    298  *   as two aligned pairs, 50
    299  *   one aligned pair, 55
    300  *   no aligned pairs, 60
    301  *
    302  * Step 2:
    303  * Take the higher of the two scores (one for each byte) as the score
    304  * for the module.
    305  *
    306  * Print the score for each module, and field service should replace the
    307  * module with the highest score.
    308  */
    309 
    310 /*
    311  * In the table below, the first row/column comment indicates the
    312  * number of bits on in that nybble; the second row/column comment is
    313  * the hex digit.
    314  */
    315 
    316 static int
    317 p_synd_score_table[256] = {
    318 	/* 0   1   1   2   1   2   2   3   1   2   2   3   2   3   3   4 */
    319 	/* 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  A,  B,  C,  D,  E,  F */
    320 /* 0 0 */  5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45,
    321 /* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
    322 /* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
    323 /* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
    324 /* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
    325 /* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
    326 /* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
    327 /* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
    328 /* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
    329 /* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
    330 /* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
    331 /* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
    332 /* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
    333 /* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
    334 /* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
    335 /* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10,  5,
    336 };
    337 
    338 int
    339 ecc_psynd_score(ushort_t p_synd)
    340 {
    341 	int i, j, a, b;
    342 
    343 	i = p_synd & 0xFF;
    344 	j = (p_synd >> 8) & 0xFF;
    345 
    346 	a = p_synd_score_table[i];
    347 	b = p_synd_score_table[j];
    348 
    349 	return (a > b ? a : b);
    350 }
    351 
    352 /*
    353  * Async Fault Logging
    354  *
    355  * To ease identifying, reading, and filtering async fault log messages, the
    356  * label [AFT#] is now prepended to each async fault message.  These messages
    357  * and the logging rules are implemented by cpu_aflt_log(), below.
    358  *
    359  * [AFT0] - Tag for log messages that are associated with corrected ECC errors.
    360  *          This includes both corrected ECC memory and ecache faults.
    361  *
    362  * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything
    363  *          else except CE errors) with a priority of 1 (highest).  This tag
    364  *          is also used for panic messages that result from an async fault.
    365  *
    366  * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC
    367  * [AFT3]   or parity errors.  For example, AFT2 is used for the actual dump
    368  *          of the E-$ data and tags.
    369  *
    370  * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not
    371  * printed on the console.  To send all AFT logs to both the log and the
    372  * console, set aft_verbose = 1.
    373  */
    374 
    375 #define	CPU_FLTCPU		0x0001	/* print flt_inst as a CPU id */
    376 #define	CPU_SPACE		0x0002	/* print flt_status (data or instr) */
    377 #define	CPU_ERRID		0x0004	/* print flt_id */
    378 #define	CPU_TL			0x0008	/* print flt_tl */
    379 #define	CPU_ERRID_FIRST 	0x0010	/* print flt_id first in message */
    380 #define	CPU_AFSR		0x0020	/* print flt_stat as decoded %afsr */
    381 #define	CPU_AFAR		0x0040	/* print flt_addr as %afar */
    382 #define	CPU_AF_PSYND		0x0080	/* print flt_stat %afsr.PSYND */
    383 #define	CPU_AF_ETS		0x0100	/* print flt_stat %afsr.ETS */
    384 #define	CPU_UDBH		0x0200	/* print flt_sdbh and syndrome */
    385 #define	CPU_UDBL		0x0400	/* print flt_sdbl and syndrome */
    386 #define	CPU_FAULTPC		0x0800	/* print flt_pc */
    387 #define	CPU_SYND		0x1000	/* print flt_synd and unum */
    388 
    389 #define	CMN_LFLAGS	(CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL |	\
    390 				CPU_AFSR | CPU_AFAR | CPU_AF_PSYND |	\
    391 				CPU_AF_ETS | CPU_UDBH | CPU_UDBL |	\
    392 				CPU_FAULTPC)
    393 #define	UE_LFLAGS	(CMN_LFLAGS | CPU_SYND)
    394 #define	CE_LFLAGS	(UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL &	\
    395 				~CPU_SPACE)
    396 #define	PARERR_LFLAGS	(CMN_LFLAGS)
    397 #define	WP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL)
    398 #define	CP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL &		\
    399 				~CPU_FLTCPU & ~CPU_FAULTPC)
    400 #define	BERRTO_LFLAGS	(CMN_LFLAGS)
    401 #define	NO_LFLAGS	(0)
    402 
    403 #define	AFSR_FMTSTR0	"\020\1ME"
    404 #define	AFSR_FMTSTR1	"\020\040PRIV\037ISAP\036ETP\035IVUE\034TO"	\
    405 			"\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE"
    406 #define	UDB_FMTSTR	"\020\012UE\011CE"
    407 
    408 /*
    409  * Save the cache bootup state for use when internal
    410  * caches are to be re-enabled after an error occurs.
    411  */
    412 uint64_t	cache_boot_state = 0;
    413 
    414 /*
    415  * PA[31:0] represent Displacement in UPA configuration space.
    416  */
    417 uint_t	root_phys_addr_lo_mask = 0xffffffff;
    418 
    419 /*
    420  * Spitfire legacy globals
    421  */
    422 int	itlb_entries;
    423 int	dtlb_entries;
    424 
    425 void
    426 cpu_setup(void)
    427 {
    428 	extern int page_retire_messages;
    429 	extern int page_retire_first_ue;
    430 	extern int at_flags;
    431 #if defined(SF_ERRATA_57)
    432 	extern caddr_t errata57_limit;
    433 #endif
    434 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
    435 
    436 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
    437 
    438 	/*
    439 	 * Spitfire isn't currently FMA-aware, so we have to enable the
    440 	 * page retirement messages. We also change the default policy
    441 	 * for UE retirement to allow clearing of transient errors.
    442 	 */
    443 	page_retire_messages = 1;
    444 	page_retire_first_ue = 0;
    445 
    446 	/*
    447 	 * save the cache bootup state.
    448 	 */
    449 	cache_boot_state = get_lsu() & (LSU_IC | LSU_DC);
    450 
    451 	if (use_page_coloring) {
    452 		do_pg_coloring = 1;
    453 	}
    454 
    455 	/*
    456 	 * Tune pp_slots to use up to 1/8th of the tlb entries.
    457 	 */
    458 	pp_slots = MIN(8, MAXPP_SLOTS);
    459 
    460 	/*
    461 	 * Block stores invalidate all pages of the d$ so pagecopy
    462 	 * et. al. do not need virtual translations with virtual
    463 	 * coloring taken into consideration.
    464 	 */
    465 	pp_consistent_coloring = 0;
    466 
    467 	isa_list =
    468 	    "sparcv9+vis sparcv9 "
    469 	    "sparcv8plus+vis sparcv8plus "
    470 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
    471 
    472 	cpu_hwcap_flags = AV_SPARC_VIS;
    473 
    474 	/*
    475 	 * On Spitfire, there's a hole in the address space
    476 	 * that we must never map (the hardware only support 44-bits of
    477 	 * virtual address).  Later CPUs are expected to have wider
    478 	 * supported address ranges.
    479 	 *
    480 	 * See address map on p23 of the UltraSPARC 1 user's manual.
    481 	 */
    482 	hole_start = (caddr_t)0x80000000000ull;
    483 	hole_end = (caddr_t)0xfffff80000000000ull;
    484 
    485 	/*
    486 	 * A spitfire call bug requires us to be a further 4Gbytes of
    487 	 * firewall from the spec.
    488 	 *
    489 	 * See Spitfire Errata #21
    490 	 */
    491 	hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32));
    492 	hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32));
    493 
    494 	/*
    495 	 * The kpm mapping window.
    496 	 * kpm_size:
    497 	 *	The size of a single kpm range.
    498 	 *	The overall size will be: kpm_size * vac_colors.
    499 	 * kpm_vbase:
    500 	 *	The virtual start address of the kpm range within the kernel
    501 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
    502 	 */
    503 	kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */
    504 	kpm_size_shift = 41;
    505 	kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */
    506 
    507 	/*
    508 	 * All UltraSPARC platforms should use small kpm page as default, as
    509 	 * the KPM large page VAC conflict code has no value to maintain. The
    510 	 * new generation of SPARC no longer have VAC conflict issue.
    511 	 */
    512 	kpm_smallpages = 1;
    513 
    514 #if defined(SF_ERRATA_57)
    515 	errata57_limit = (caddr_t)0x80000000ul;
    516 #endif
    517 
    518 	/*
    519 	 * Disable text by default.
    520 	 * Note that the other defaults are set in sun4u/vm/mach_vm_dep.c.
    521 	 */
    522 	max_utext_lpsize = MMU_PAGESIZE;
    523 }
    524 
    525 static int
    526 getintprop(pnode_t node, char *name, int deflt)
    527 {
    528 	int	value;
    529 
    530 	switch (prom_getproplen(node, name)) {
    531 	case 0:
    532 		value = 1;	/* boolean properties */
    533 		break;
    534 
    535 	case sizeof (int):
    536 		(void) prom_getprop(node, name, (caddr_t)&value);
    537 		break;
    538 
    539 	default:
    540 		value = deflt;
    541 		break;
    542 	}
    543 
    544 	return (value);
    545 }
    546 
    547 /*
    548  * Set the magic constants of the implementation.
    549  */
    550 void
    551 cpu_fiximp(pnode_t dnode)
    552 {
    553 	extern int vac_size, vac_shift;
    554 	extern uint_t vac_mask;
    555 	extern int dcache_line_mask;
    556 	int i, a;
    557 	static struct {
    558 		char	*name;
    559 		int	*var;
    560 	} prop[] = {
    561 		"dcache-size",		&dcache_size,
    562 		"dcache-line-size",	&dcache_linesize,
    563 		"icache-size",		&icache_size,
    564 		"icache-line-size",	&icache_linesize,
    565 		"ecache-size",		&ecache_size,
    566 		"ecache-line-size",	&ecache_alignsize,
    567 		"ecache-associativity", &ecache_associativity,
    568 		"#itlb-entries",	&itlb_entries,
    569 		"#dtlb-entries",	&dtlb_entries,
    570 		};
    571 
    572 	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) {
    573 		if ((a = getintprop(dnode, prop[i].name, -1)) != -1) {
    574 			*prop[i].var = a;
    575 		}
    576 	}
    577 
    578 	ecache_setsize = ecache_size / ecache_associativity;
    579 
    580 	vac_size = S_VAC_SIZE;
    581 	vac_mask = MMU_PAGEMASK & (vac_size - 1);
    582 	i = 0; a = vac_size;
    583 	while (a >>= 1)
    584 		++i;
    585 	vac_shift = i;
    586 	shm_alignment = vac_size;
    587 	vac = 1;
    588 
    589 	dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1);
    590 
    591 	/*
    592 	 * UltraSPARC I & II have ecache sizes running
    593 	 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB
    594 	 * and 8 MB. Adjust the copyin/copyout limits
    595 	 * according to the cache size. The magic number
    596 	 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code
    597 	 * and its floor of VIS_COPY_THRESHOLD bytes before it will use
    598 	 * VIS instructions.
    599 	 *
    600 	 * We assume that all CPUs on the system have the same size
    601 	 * ecache. We're also called very early in the game.
    602 	 * /etc/system will be parsed *after* we're called so
    603 	 * these values can be overwritten.
    604 	 */
    605 
    606 	hw_copy_limit_1 = VIS_COPY_THRESHOLD;
    607 	if (ecache_size <= 524288) {
    608 		hw_copy_limit_2 = VIS_COPY_THRESHOLD;
    609 		hw_copy_limit_4 = VIS_COPY_THRESHOLD;
    610 		hw_copy_limit_8 = VIS_COPY_THRESHOLD;
    611 	} else if (ecache_size == 1048576) {
    612 		hw_copy_limit_2 = 1024;
    613 		hw_copy_limit_4 = 1280;
    614 		hw_copy_limit_8 = 1536;
    615 	} else if (ecache_size == 2097152) {
    616 		hw_copy_limit_2 = 1536;
    617 		hw_copy_limit_4 = 2048;
    618 		hw_copy_limit_8 = 2560;
    619 	} else if (ecache_size == 4194304) {
    620 		hw_copy_limit_2 = 2048;
    621 		hw_copy_limit_4 = 2560;
    622 		hw_copy_limit_8 = 3072;
    623 	} else {
    624 		hw_copy_limit_2 = 2560;
    625 		hw_copy_limit_4 = 3072;
    626 		hw_copy_limit_8 = 3584;
    627 	}
    628 }
    629 
    630 /*
    631  * Called by setcpudelay
    632  */
    633 void
    634 cpu_init_tick_freq(void)
    635 {
    636 	/*
    637 	 * Determine the cpu frequency by calling
    638 	 * tod_get_cpufrequency. Use an approximate freqency
    639 	 * value computed by the prom if the tod module
    640 	 * is not initialized and loaded yet.
    641 	 */
    642 	if (tod_ops.tod_get_cpufrequency != NULL) {
    643 		mutex_enter(&tod_lock);
    644 		sys_tick_freq = tod_ops.tod_get_cpufrequency();
    645 		mutex_exit(&tod_lock);
    646 	} else {
    647 #if defined(HUMMINGBIRD)
    648 		/*
    649 		 * the hummingbird version of %stick is used as the basis for
    650 		 * low level timing; this provides an independent constant-rate
    651 		 * clock for general system use, and frees power mgmt to set
    652 		 * various cpu clock speeds.
    653 		 */
    654 		if (system_clock_freq == 0)
    655 			cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx",
    656 			    system_clock_freq);
    657 		sys_tick_freq = system_clock_freq;
    658 #else /* SPITFIRE */
    659 		sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
    660 #endif
    661 	}
    662 }
    663 
    664 
    665 void shipit(int upaid);
    666 extern uint64_t xc_tick_limit;
    667 extern uint64_t xc_tick_jump_limit;
    668 
    669 #ifdef SEND_MONDO_STATS
    670 uint64_t x_early[NCPU][64];
    671 #endif
    672 
    673 /*
    674  * Note: A version of this function is used by the debugger via the KDI,
    675  * and must be kept in sync with this version.  Any changes made to this
    676  * function to support new chips or to accomodate errata must also be included
    677  * in the KDI-specific version.  See spitfire_kdi.c.
    678  */
    679 void
    680 send_one_mondo(int cpuid)
    681 {
    682 	uint64_t idsr, starttick, endtick;
    683 	int upaid, busy, nack;
    684 	uint64_t tick, tick_prev;
    685 	ulong_t ticks;
    686 
    687 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
    688 	upaid = CPUID_TO_UPAID(cpuid);
    689 	tick = starttick = gettick();
    690 	shipit(upaid);
    691 	endtick = starttick + xc_tick_limit;
    692 	busy = nack = 0;
    693 	for (;;) {
    694 		idsr = getidsr();
    695 		if (idsr == 0)
    696 			break;
    697 		/*
    698 		 * When we detect an irregular tick jump, we adjust
    699 		 * the timer window to the current tick value.
    700 		 */
    701 		tick_prev = tick;
    702 		tick = gettick();
    703 		ticks = tick - tick_prev;
    704 		if (ticks > xc_tick_jump_limit) {
    705 			endtick = tick + xc_tick_limit;
    706 		} else if (tick > endtick) {
    707 			if (panic_quiesce)
    708 				return;
    709 			cmn_err(CE_PANIC,
    710 			    "send mondo timeout (target 0x%x) [%d NACK %d "
    711 			    "BUSY]", upaid, nack, busy);
    712 		}
    713 		if (idsr & IDSR_BUSY) {
    714 			busy++;
    715 			continue;
    716 		}
    717 		drv_usecwait(1);
    718 		shipit(upaid);
    719 		nack++;
    720 		busy = 0;
    721 	}
    722 #ifdef SEND_MONDO_STATS
    723 	x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++;
    724 #endif
    725 }
    726 
    727 void
    728 send_mondo_set(cpuset_t set)
    729 {
    730 	int i;
    731 
    732 	for (i = 0; i < NCPU; i++)
    733 		if (CPU_IN_SET(set, i)) {
    734 			send_one_mondo(i);
    735 			CPUSET_DEL(set, i);
    736 			if (CPUSET_ISNULL(set))
    737 				break;
    738 		}
    739 }
    740 
    741 void
    742 syncfpu(void)
    743 {
    744 }
    745 
    746 /*
    747  * Determine the size of the CPU module's error structure in bytes.  This is
    748  * called once during boot to initialize the error queues.
    749  */
    750 int
    751 cpu_aflt_size(void)
    752 {
    753 	/*
    754 	 * We need to determine whether this is a sabre, Hummingbird or a
    755 	 * Spitfire/Blackbird impl and set the appropriate state variables for
    756 	 * ecache tag manipulation.  We can't do this in cpu_setup() as it is
    757 	 * too early in the boot flow and the cpunodes are not initialized.
    758 	 * This routine will be called once after cpunodes[] is ready, so do
    759 	 * it here.
    760 	 */
    761 	if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) {
    762 		isus2i = 1;
    763 		cpu_ec_tag_mask = SB_ECTAG_MASK;
    764 		cpu_ec_state_mask = SB_ECSTATE_MASK;
    765 		cpu_ec_par_mask = SB_ECPAR_MASK;
    766 		cpu_ec_par_shift = SB_ECPAR_SHIFT;
    767 		cpu_ec_tag_shift = SB_ECTAG_SHIFT;
    768 		cpu_ec_state_shift = SB_ECSTATE_SHIFT;
    769 		cpu_ec_state_exl = SB_ECSTATE_EXL;
    770 		cpu_ec_state_mod = SB_ECSTATE_MOD;
    771 
    772 		/* These states do not exist in sabre - set to 0xFF */
    773 		cpu_ec_state_shr = 0xFF;
    774 		cpu_ec_state_own = 0xFF;
    775 
    776 		cpu_ec_state_valid = SB_ECSTATE_VALID;
    777 		cpu_ec_state_dirty = SB_ECSTATE_DIRTY;
    778 		cpu_ec_state_parity = SB_ECSTATE_PARITY;
    779 		cpu_ec_parity = SB_EC_PARITY;
    780 	} else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) {
    781 		isus2e = 1;
    782 		cpu_ec_tag_mask = HB_ECTAG_MASK;
    783 		cpu_ec_state_mask = HB_ECSTATE_MASK;
    784 		cpu_ec_par_mask = HB_ECPAR_MASK;
    785 		cpu_ec_par_shift = HB_ECPAR_SHIFT;
    786 		cpu_ec_tag_shift = HB_ECTAG_SHIFT;
    787 		cpu_ec_state_shift = HB_ECSTATE_SHIFT;
    788 		cpu_ec_state_exl = HB_ECSTATE_EXL;
    789 		cpu_ec_state_mod = HB_ECSTATE_MOD;
    790 
    791 		/* These states do not exist in hummingbird - set to 0xFF */
    792 		cpu_ec_state_shr = 0xFF;
    793 		cpu_ec_state_own = 0xFF;
    794 
    795 		cpu_ec_state_valid = HB_ECSTATE_VALID;
    796 		cpu_ec_state_dirty = HB_ECSTATE_DIRTY;
    797 		cpu_ec_state_parity = HB_ECSTATE_PARITY;
    798 		cpu_ec_parity = HB_EC_PARITY;
    799 	}
    800 
    801 	return (sizeof (spitf_async_flt));
    802 }
    803 
    804 
    805 /*
    806  * Correctable ecc error trap handler
    807  */
    808 /*ARGSUSED*/
    809 void
    810 cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
    811 	uint_t p_afsr_high, uint_t p_afar_high)
    812 {
    813 	ushort_t sdbh, sdbl;
    814 	ushort_t e_syndh, e_syndl;
    815 	spitf_async_flt spf_flt;
    816 	struct async_flt *ecc;
    817 	int queue = 1;
    818 
    819 	uint64_t t_afar = p_afar;
    820 	uint64_t t_afsr = p_afsr;
    821 
    822 	/*
    823 	 * Note: the Spitfire data buffer error registers
    824 	 * (upper and lower halves) are or'ed into the upper
    825 	 * word of the afsr by ce_err().
    826 	 */
    827 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
    828 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
    829 
    830 	e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND);
    831 	e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND);
    832 
    833 	t_afsr &= S_AFSR_MASK;
    834 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
    835 
    836 	/* Setup the async fault structure */
    837 	bzero(&spf_flt, sizeof (spitf_async_flt));
    838 	ecc = (struct async_flt *)&spf_flt;
    839 	ecc->flt_id = gethrtime_waitfree();
    840 	ecc->flt_stat = t_afsr;
    841 	ecc->flt_addr = t_afar;
    842 	ecc->flt_status = ECC_C_TRAP;
    843 	ecc->flt_bus_id = getprocessorid();
    844 	ecc->flt_inst = CPU->cpu_id;
    845 	ecc->flt_pc = (caddr_t)rp->r_pc;
    846 	ecc->flt_func = log_ce_err;
    847 	ecc->flt_in_memory =
    848 	    (pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0;
    849 	spf_flt.flt_sdbh = sdbh;
    850 	spf_flt.flt_sdbl = sdbl;
    851 
    852 	/*
    853 	 * Check for fatal conditions.
    854 	 */
    855 	check_misc_err(&spf_flt);
    856 
    857 	/*
    858 	 * Pananoid checks for valid AFSR and UDBs
    859 	 */
    860 	if ((t_afsr & P_AFSR_CE) == 0) {
    861 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
    862 		    "** Panic due to CE bit not set in the AFSR",
    863 		    "  Corrected Memory Error on");
    864 	}
    865 
    866 	/*
    867 	 * We want to skip logging only if ALL the following
    868 	 * conditions are true:
    869 	 *
    870 	 *	1. There is only one error
    871 	 *	2. That error is a correctable memory error
    872 	 *	3. The error is caused by the memory scrubber (in which case
    873 	 *	    the error will have occurred under on_trap protection)
    874 	 *	4. The error is on a retired page
    875 	 *
    876 	 * Note: OT_DATA_EC is used places other than the memory scrubber.
    877 	 * However, none of those errors should occur on a retired page.
    878 	 */
    879 	if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE &&
    880 	    curthread->t_ontrap != NULL) {
    881 
    882 		if (curthread->t_ontrap->ot_prot & OT_DATA_EC) {
    883 			if (page_retire_check(ecc->flt_addr, NULL) == 0) {
    884 				queue = 0;
    885 			}
    886 		}
    887 	}
    888 
    889 	if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) {
    890 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
    891 		    "** Panic due to CE bits not set in the UDBs",
    892 		    " Corrected Memory Error on");
    893 	}
    894 
    895 	if ((sdbh >> 8) & 1) {
    896 		ecc->flt_synd = e_syndh;
    897 		ce_scrub(ecc);
    898 		if (queue) {
    899 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
    900 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
    901 		}
    902 	}
    903 
    904 	if ((sdbl >> 8) & 1) {
    905 		ecc->flt_addr = t_afar | 0x8;	/* Sabres do not have a UDBL */
    906 		ecc->flt_synd = e_syndl | UDBL_REG;
    907 		ce_scrub(ecc);
    908 		if (queue) {
    909 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
    910 			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
    911 		}
    912 	}
    913 
    914 	/*
    915 	 * Re-enable all error trapping (CEEN currently cleared).
    916 	 */
    917 	clr_datapath();
    918 	set_asyncflt(P_AFSR_CE);
    919 	set_error_enable(EER_ENABLE);
    920 }
    921 
    922 /*
    923  * Cpu specific CE logging routine
    924  */
    925 static void
    926 log_ce_err(struct async_flt *aflt, char *unum)
    927 {
    928 	spitf_async_flt spf_flt;
    929 
    930 	if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) {
    931 		return;
    932 	}
    933 
    934 	spf_flt.cmn_asyncflt = *aflt;
    935 	cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum,
    936 	    " Corrected Memory Error detected by");
    937 }
    938 
    939 /*
    940  * Spitfire does not perform any further CE classification refinement
    941  */
    942 /*ARGSUSED*/
    943 int
    944 ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep,
    945     size_t afltoffset)
    946 {
    947 	return (0);
    948 }
    949 
    950 char *
    951 flt_to_error_type(struct async_flt *aflt)
    952 {
    953 	if (aflt->flt_status & ECC_INTERMITTENT)
    954 		return (ERR_TYPE_DESC_INTERMITTENT);
    955 	if (aflt->flt_status & ECC_PERSISTENT)
    956 		return (ERR_TYPE_DESC_PERSISTENT);
    957 	if (aflt->flt_status & ECC_STICKY)
    958 		return (ERR_TYPE_DESC_STICKY);
    959 	return (ERR_TYPE_DESC_UNKNOWN);
    960 }
    961 
    962 /*
    963  * Called by correctable ecc error logging code to print out
    964  * the stick/persistent/intermittent status of the error.
    965  */
    966 static void
    967 cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum)
    968 {
    969 	ushort_t status;
    970 	char *status1_str = "Memory";
    971 	char *status2_str = "Intermittent";
    972 	struct async_flt *aflt = (struct async_flt *)spf_flt;
    973 
    974 	status = aflt->flt_status;
    975 
    976 	if (status & ECC_ECACHE)
    977 		status1_str = "Ecache";
    978 
    979 	if (status & ECC_STICKY)
    980 		status2_str = "Sticky";
    981 	else if (status & ECC_PERSISTENT)
    982 		status2_str = "Persistent";
    983 
    984 	cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST,
    985 	    NULL, " Corrected %s Error on %s is %s",
    986 	    status1_str, unum, status2_str);
    987 }
    988 
    989 /*
    990  * check for a valid ce syndrome, then call the
    991  * displacement flush scrubbing code, and then check the afsr to see if
    992  * the error was persistent or intermittent. Reread the afar/afsr to see
    993  * if the error was not scrubbed successfully, and is therefore sticky.
    994  */
    995 /*ARGSUSED1*/
    996 void
    997 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout)
    998 {
    999 	uint64_t eer, afsr;
   1000 	ushort_t status;
   1001 
   1002 	ASSERT(getpil() > LOCK_LEVEL);
   1003 
   1004 	/*
   1005 	 * It is possible that the flt_addr is not a valid
   1006 	 * physical address. To deal with this, we disable
   1007 	 * NCEEN while we scrub that address. If this causes
   1008 	 * a TIMEOUT/BERR, we know this is an invalid
   1009 	 * memory location.
   1010 	 */
   1011 	kpreempt_disable();
   1012 	eer = get_error_enable();
   1013 	if (eer & (EER_CEEN | EER_NCEEN))
   1014 		set_error_enable(eer & ~(EER_CEEN | EER_NCEEN));
   1015 
   1016 	/*
   1017 	 * To check if the error detected by IO is persistent, sticky or
   1018 	 * intermittent.
   1019 	 */
   1020 	if (ecc->flt_status & ECC_IOBUS) {
   1021 		ecc->flt_stat = P_AFSR_CE;
   1022 	}
   1023 
   1024 	scrubphys(P2ALIGN(ecc->flt_addr, 64),
   1025 	    cpunodes[CPU->cpu_id].ecache_size);
   1026 
   1027 	get_asyncflt(&afsr);
   1028 	if (afsr & (P_AFSR_TO | P_AFSR_BERR)) {
   1029 		/*
   1030 		 * Must ensure that we don't get the TIMEOUT/BERR
   1031 		 * when we reenable NCEEN, so we clear the AFSR.
   1032 		 */
   1033 		set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR));
   1034 		if (eer & (EER_CEEN | EER_NCEEN))
   1035 			set_error_enable(eer);
   1036 		kpreempt_enable();
   1037 		return;
   1038 	}
   1039 
   1040 	if (eer & EER_NCEEN)
   1041 		set_error_enable(eer & ~EER_CEEN);
   1042 
   1043 	/*
   1044 	 * Check and clear any ECC errors from the scrub.  If the scrub did
   1045 	 * not trip over the error, mark it intermittent.  If the scrub did
   1046 	 * trip the error again and it did not scrub away, mark it sticky.
   1047 	 * Otherwise mark it persistent.
   1048 	 */
   1049 	if (check_ecc(ecc) != 0) {
   1050 		cpu_read_paddr(ecc, 0, 1);
   1051 
   1052 		if (check_ecc(ecc) != 0)
   1053 			status = ECC_STICKY;
   1054 		else
   1055 			status = ECC_PERSISTENT;
   1056 	} else
   1057 		status = ECC_INTERMITTENT;
   1058 
   1059 	if (eer & (EER_CEEN | EER_NCEEN))
   1060 		set_error_enable(eer);
   1061 	kpreempt_enable();
   1062 
   1063 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
   1064 	ecc->flt_status |= status;
   1065 }
   1066 
   1067 /*
   1068  * get the syndrome and unum, and then call the routines
   1069  * to check the other cpus and iobuses, and then do the error logging.
   1070  */
   1071 /*ARGSUSED1*/
   1072 void
   1073 cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep)
   1074 {
   1075 	char unum[UNUM_NAMLEN];
   1076 	int len = 0;
   1077 	int ce_verbose = 0;
   1078 	int err;
   1079 
   1080 	ASSERT(ecc->flt_func != NULL);
   1081 
   1082 	/* Get the unum string for logging purposes */
   1083 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum,
   1084 	    UNUM_NAMLEN, &len);
   1085 
   1086 	/* Call specific error logging routine */
   1087 	(void) (*ecc->flt_func)(ecc, unum);
   1088 
   1089 	/*
   1090 	 * Count errors per unum.
   1091 	 * Non-memory errors are all counted via a special unum string.
   1092 	 */
   1093 	if ((err = ce_count_unum(ecc->flt_status, len, unum)) != PR_OK &&
   1094 	    automatic_page_removal) {
   1095 		(void) page_retire(ecc->flt_addr, err);
   1096 	}
   1097 
   1098 	if (ecc->flt_panic) {
   1099 		ce_verbose = 1;
   1100 	} else if ((ecc->flt_class == BUS_FAULT) ||
   1101 	    (ecc->flt_stat & P_AFSR_CE)) {
   1102 		ce_verbose = (ce_verbose_memory > 0);
   1103 	} else {
   1104 		ce_verbose = 1;
   1105 	}
   1106 
   1107 	if (ce_verbose) {
   1108 		spitf_async_flt sflt;
   1109 		int synd_code;
   1110 
   1111 		sflt.cmn_asyncflt = *ecc;	/* for cpu_aflt_log() */
   1112 
   1113 		cpu_ce_log_status(&sflt, unum);
   1114 
   1115 		synd_code = synd_to_synd_code(AFLT_STAT_VALID,
   1116 		    SYND(ecc->flt_synd));
   1117 
   1118 		if (SYND_IS_SINGLE_BIT_DATA(synd_code)) {
   1119 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
   1120 			    NULL, " ECC Data Bit %2d was in error "
   1121 			    "and corrected", synd_code);
   1122 		} else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) {
   1123 			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
   1124 			    NULL, " ECC Check Bit %2d was in error "
   1125 			    "and corrected", synd_code - C0);
   1126 		} else {
   1127 			/*
   1128 			 * These are UE errors - we shouldn't be getting CE
   1129 			 * traps for these; handle them in case of bad h/w.
   1130 			 */
   1131 			switch (synd_code) {
   1132 			case M2:
   1133 				cpu_aflt_log(CE_CONT, 0, &sflt,
   1134 				    CPU_ERRID_FIRST, NULL,
   1135 				    " Two ECC Bits were in error");
   1136 				break;
   1137 			case M3:
   1138 				cpu_aflt_log(CE_CONT, 0, &sflt,
   1139 				    CPU_ERRID_FIRST, NULL,
   1140 				    " Three ECC Bits were in error");
   1141 				break;
   1142 			case M4:
   1143 				cpu_aflt_log(CE_CONT, 0, &sflt,
   1144 				    CPU_ERRID_FIRST, NULL,
   1145 				    " Four ECC Bits were in error");
   1146 				break;
   1147 			case MX:
   1148 				cpu_aflt_log(CE_CONT, 0, &sflt,
   1149 				    CPU_ERRID_FIRST, NULL,
   1150 				    " More than Four ECC bits were "
   1151 				    "in error");
   1152 				break;
   1153 			default:
   1154 				cpu_aflt_log(CE_CONT, 0, &sflt,
   1155 				    CPU_ERRID_FIRST, NULL,
   1156 				    " Unknown fault syndrome %d",
   1157 				    synd_code);
   1158 				break;
   1159 			}
   1160 		}
   1161 	}
   1162 
   1163 	/* Display entire cache line, if valid address */
   1164 	if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR)
   1165 		read_ecc_data(ecc, 1, 1);
   1166 }
   1167 
   1168 /*
   1169  * We route all errors through a single switch statement.
   1170  */
   1171 void
   1172 cpu_ue_log_err(struct async_flt *aflt)
   1173 {
   1174 
   1175 	switch (aflt->flt_class) {
   1176 	case CPU_FAULT:
   1177 		cpu_async_log_err(aflt);
   1178 		break;
   1179 
   1180 	case BUS_FAULT:
   1181 		bus_async_log_err(aflt);
   1182 		break;
   1183 
   1184 	default:
   1185 		cmn_err(CE_WARN, "discarding async error 0x%p with invalid "
   1186 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
   1187 		break;
   1188 	}
   1189 }
   1190 
   1191 /* Values for action variable in cpu_async_error() */
   1192 #define	ACTION_NONE		0
   1193 #define	ACTION_TRAMPOLINE	1
   1194 #define	ACTION_AST_FLAGS	2
   1195 
   1196 /*
   1197  * Access error trap handler for asynchronous cpu errors.  This routine is
   1198  * called to handle a data or instruction access error.  All fatal errors are
   1199  * completely handled by this routine (by panicking).  Non fatal error logging
   1200  * is queued for later processing either via AST or softint at a lower PIL.
   1201  * In case of panic, the error log queue will also be processed as part of the
   1202  * panic flow to ensure all errors are logged.  This routine is called with all
   1203  * errors disabled at PIL15.  The AFSR bits are cleared and the UDBL and UDBH
   1204  * error bits are also cleared.  The hardware has also disabled the I and
   1205  * D-caches for us, so we must re-enable them before returning.
   1206  *
   1207  * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP:
   1208  *
   1209  *		_______________________________________________________________
   1210  *		|        Privileged tl0		|         Unprivileged	      |
   1211  *		| Protected	| Unprotected	| Protected	| Unprotected |
   1212  *		|on_trap|lofault|		|		|	      |
   1213  * -------------|-------|-------+---------------+---------------+-------------|
   1214  *		|	|	|		|		|	      |
   1215  * UE/LDP/EDP	| L,T,p	| L,R,p	| L,P		| n/a		| L,R,p	      |
   1216  *		|	|	|		|		|	      |
   1217  * TO/BERR	| T	| S	| L,P		| n/a		| S	      |
   1218  *		|	|	|		|		|	      |
   1219  * WP		| L,M,p | L,M,p	| L,M,p		| n/a		| L,M,p       |
   1220  *		|	|	|		|		|	      |
   1221  * CP (IIi/IIe)	| L,P	| L,P	| L,P		| n/a		| L,P	      |
   1222  * ____________________________________________________________________________
   1223  *
   1224  *
   1225  * Action codes:
   1226  *
   1227  * L - log
   1228  * M - kick off memscrubber if flt_in_memory
   1229  * P - panic
   1230  * p - panic if US-IIi or US-IIe (Sabre); overrides R and M
   1231  * R - i)  if aft_panic is set, panic
   1232  *     ii) otherwise, send hwerr event to contract and SIGKILL to process
   1233  * S - send SIGBUS to process
   1234  * T - trampoline
   1235  *
   1236  * Special cases:
   1237  *
   1238  * 1) if aft_testfatal is set, all faults result in a panic regardless
   1239  *    of type (even WP), protection (even on_trap), or privilege.
   1240  */
   1241 /*ARGSUSED*/
   1242 void
   1243 cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
   1244 	uint_t p_afsr_high, uint_t p_afar_high)
   1245 {
   1246 	ushort_t sdbh, sdbl, ttype, tl;
   1247 	spitf_async_flt spf_flt;
   1248 	struct async_flt *aflt;
   1249 	char pr_reason[28];
   1250 	uint64_t oafsr;
   1251 	uint64_t acc_afsr = 0;			/* accumulated afsr */
   1252 	int action = ACTION_NONE;
   1253 	uint64_t t_afar = p_afar;
   1254 	uint64_t t_afsr = p_afsr;
   1255 	int expected = DDI_FM_ERR_UNEXPECTED;
   1256 	ddi_acc_hdl_t *hp;
   1257 
   1258 	/*
   1259 	 * We need to look at p_flag to determine if the thread detected an
   1260 	 * error while dumping core.  We can't grab p_lock here, but it's ok
   1261 	 * because we just need a consistent snapshot and we know that everyone
   1262 	 * else will store a consistent set of bits while holding p_lock.  We
   1263 	 * don't have to worry about a race because SDOCORE is set once prior
   1264 	 * to doing i/o from the process's address space and is never cleared.
   1265 	 */
   1266 	uint_t pflag = ttoproc(curthread)->p_flag;
   1267 
   1268 	pr_reason[0] = '\0';
   1269 
   1270 	/*
   1271 	 * Note: the Spitfire data buffer error registers
   1272 	 * (upper and lower halves) are or'ed into the upper
   1273 	 * word of the afsr by async_err() if P_AFSR_UE is set.
   1274 	 */
   1275 	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
   1276 	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);
   1277 
   1278 	/*
   1279 	 * Grab the ttype encoded in <63:53> of the saved
   1280 	 * afsr passed from async_err()
   1281 	 */
   1282 	ttype = (ushort_t)((t_afsr >> 53) & 0x1FF);
   1283 	tl = (ushort_t)(t_afsr >> 62);
   1284 
   1285 	t_afsr &= S_AFSR_MASK;
   1286 	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */
   1287 
   1288 	/*
   1289 	 * Initialize most of the common and CPU-specific structure.  We derive
   1290 	 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit.  The
   1291 	 * initial setting of aflt->flt_panic is based on TL: we must panic if
   1292 	 * the error occurred at TL > 0.  We also set flt_panic if the test/demo
   1293 	 * tuneable aft_testfatal is set (not the default).
   1294 	 */
   1295 	bzero(&spf_flt, sizeof (spitf_async_flt));
   1296 	aflt = (struct async_flt *)&spf_flt;
   1297 	aflt->flt_id = gethrtime_waitfree();
   1298 	aflt->flt_stat = t_afsr;
   1299 	aflt->flt_addr = t_afar;
   1300 	aflt->flt_bus_id = getprocessorid();
   1301 	aflt->flt_inst = CPU->cpu_id;
   1302 	aflt->flt_pc = (caddr_t)rp->r_pc;
   1303 	aflt->flt_prot = AFLT_PROT_NONE;
   1304 	aflt->flt_class = CPU_FAULT;
   1305 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
   1306 	aflt->flt_tl = (uchar_t)tl;
   1307 	aflt->flt_panic = (tl != 0 || aft_testfatal != 0);
   1308 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
   1309 
   1310 	/*
   1311 	 * Set flt_status based on the trap type.  If we end up here as the
   1312 	 * result of a UE detected by the CE handling code, leave status 0.
   1313 	 */
   1314 	switch (ttype) {
   1315 	case T_DATA_ERROR:
   1316 		aflt->flt_status = ECC_D_TRAP;
   1317 		break;
   1318 	case T_INSTR_ERROR:
   1319 		aflt->flt_status = ECC_I_TRAP;
   1320 		break;
   1321 	}
   1322 
   1323 	spf_flt.flt_sdbh = sdbh;
   1324 	spf_flt.flt_sdbl = sdbl;
   1325 
   1326 	/*
   1327 	 * Check for fatal async errors.
   1328 	 */
   1329 	check_misc_err(&spf_flt);
   1330 
   1331 	/*
   1332 	 * If the trap occurred in privileged mode at TL=0, we need to check to
   1333 	 * see if we were executing in the kernel under on_trap() or t_lofault
   1334 	 * protection.  If so, modify the saved registers so that we return
   1335 	 * from the trap to the appropriate trampoline routine.
   1336 	 */
   1337 	if (aflt->flt_priv && tl == 0) {
   1338 		if (curthread->t_ontrap != NULL) {
   1339 			on_trap_data_t *otp = curthread->t_ontrap;
   1340 
   1341 			if (otp->ot_prot & OT_DATA_EC) {
   1342 				aflt->flt_prot = AFLT_PROT_EC;
   1343 				otp->ot_trap |= OT_DATA_EC;
   1344 				rp->r_pc = otp->ot_trampoline;
   1345 				rp->r_npc = rp->r_pc + 4;
   1346 				action = ACTION_TRAMPOLINE;
   1347 			}
   1348 
   1349 			if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) &&
   1350 			    (otp->ot_prot & OT_DATA_ACCESS)) {
   1351 				aflt->flt_prot = AFLT_PROT_ACCESS;
   1352 				otp->ot_trap |= OT_DATA_ACCESS;
   1353 				rp->r_pc = otp->ot_trampoline;
   1354 				rp->r_npc = rp->r_pc + 4;
   1355 				action = ACTION_TRAMPOLINE;
   1356 				/*
   1357 				 * for peeks and caut_gets errors are expected
   1358 				 */
   1359 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
   1360 				if (!hp)
   1361 					expected = DDI_FM_ERR_PEEK;
   1362 				else if (hp->ah_acc.devacc_attr_access ==
   1363 				    DDI_CAUTIOUS_ACC)
   1364 					expected = DDI_FM_ERR_EXPECTED;
   1365 			}
   1366 
   1367 		} else if (curthread->t_lofault) {
   1368 			aflt->flt_prot = AFLT_PROT_COPY;
   1369 			rp->r_g1 = EFAULT;
   1370 			rp->r_pc = curthread->t_lofault;
   1371 			rp->r_npc = rp->r_pc + 4;
   1372 			action = ACTION_TRAMPOLINE;
   1373 		}
   1374 	}
   1375 
   1376 	/*
   1377 	 * Determine if this error needs to be treated as fatal.  Note that
   1378 	 * multiple errors detected upon entry to this trap handler does not
   1379 	 * necessarily warrant a panic.  We only want to panic if the trap
   1380 	 * happened in privileged mode and not under t_ontrap or t_lofault
   1381 	 * protection.  The exception is WP: if we *only* get WP, it is not
   1382 	 * fatal even if the trap occurred in privileged mode, except on Sabre.
   1383 	 *
   1384 	 * aft_panic, if set, effectively makes us treat usermode
   1385 	 * UE/EDP/LDP faults as if they were privileged - so we we will
   1386 	 * panic instead of sending a contract event.  A lofault-protected
   1387 	 * fault will normally follow the contract event; if aft_panic is
   1388 	 * set this will be changed to a panic.
   1389 	 *
   1390 	 * For usermode BERR/BTO errors, eg from processes performing device
   1391 	 * control through mapped device memory, we need only deliver
   1392 	 * a SIGBUS to the offending process.
   1393 	 *
   1394 	 * Some additional flt_panic reasons (eg, WP on Sabre) will be
   1395 	 * checked later; for now we implement the common reasons.
   1396 	 */
   1397 	if (aflt->flt_prot == AFLT_PROT_NONE) {
   1398 		/*
   1399 		 * Beware - multiple bits may be set in AFSR
   1400 		 */
   1401 		if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) {
   1402 			if (aflt->flt_priv || aft_panic)
   1403 				aflt->flt_panic = 1;
   1404 		}
   1405 
   1406 		if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
   1407 			if (aflt->flt_priv)
   1408 				aflt->flt_panic = 1;
   1409 		}
   1410 	} else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) {
   1411 		aflt->flt_panic = 1;
   1412 	}
   1413 
   1414 	/*
   1415 	 * UE/BERR/TO: Call our bus nexus friends to check for
   1416 	 * IO errors that may have resulted in this trap.
   1417 	 */
   1418 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) {
   1419 		cpu_run_bus_error_handlers(aflt, expected);
   1420 	}
   1421 
   1422 	/*
   1423 	 * Handle UE: If the UE is in memory, we need to flush the bad line from
   1424 	 * the E-cache.  We also need to query the bus nexus for fatal errors.
   1425 	 * For sabre, we will panic on UEs. Attempts to do diagnostic read on
   1426 	 * caches may introduce more parity errors (especially when the module
   1427 	 * is bad) and in sabre there is no guarantee that such errors
   1428 	 * (if introduced) are written back as poisoned data.
   1429 	 */
   1430 	if (t_afsr & P_AFSR_UE) {
   1431 		int i;
   1432 
   1433 		(void) strcat(pr_reason, "UE ");
   1434 
   1435 		spf_flt.flt_type = CPU_UE_ERR;
   1436 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
   1437 		    MMU_PAGESHIFT)) ? 1: 0;
   1438 
   1439 		/*
   1440 		 * With UE, we have the PA of the fault.
   1441 		 * Let do a diagnostic read to get the ecache
   1442 		 * data and tag info of the bad line for logging.
   1443 		 */
   1444 		if (aflt->flt_in_memory) {
   1445 			uint32_t ec_set_size;
   1446 			uchar_t state;
   1447 			uint32_t ecache_idx;
   1448 			uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64);
   1449 
   1450 			/* touch the line to put it in ecache */
   1451 			acc_afsr |= read_and_clear_afsr();
   1452 			(void) lddphys(faultpa);
   1453 			acc_afsr |= (read_and_clear_afsr() &
   1454 			    ~(P_AFSR_EDP | P_AFSR_UE));
   1455 
   1456 			ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
   1457 			    ecache_associativity;
   1458 
   1459 			for (i = 0; i < ecache_associativity; i++) {
   1460 				ecache_idx = i * ec_set_size +
   1461 				    (aflt->flt_addr % ec_set_size);
   1462 				get_ecache_dtag(P2ALIGN(ecache_idx, 64),
   1463 				    (uint64_t *)&spf_flt.flt_ec_data[0],
   1464 				    &spf_flt.flt_ec_tag, &oafsr, &acc_afsr);
   1465 				acc_afsr |= oafsr;
   1466 
   1467 				state = (uchar_t)((spf_flt.flt_ec_tag &
   1468 				    cpu_ec_state_mask) >> cpu_ec_state_shift);
   1469 
   1470 				if ((state & cpu_ec_state_valid) &&
   1471 				    ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) ==
   1472 				    ((uint64_t)aflt->flt_addr >>
   1473 				    cpu_ec_tag_shift)))
   1474 					break;
   1475 			}
   1476 
   1477 			/*
   1478 			 * Check to see if the ecache tag is valid for the
   1479 			 * fault PA. In the very unlikely event where the
   1480 			 * line could be victimized, no ecache info will be
   1481 			 * available. If this is the case, capture the line
   1482 			 * from memory instead.
   1483 			 */
   1484 			if ((state & cpu_ec_state_valid) == 0 ||
   1485 			    (spf_flt.flt_ec_tag & cpu_ec_tag_mask) !=
   1486 			    ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) {
   1487 				for (i = 0; i < 8; i++, faultpa += 8) {
   1488 					ec_data_t *ecdptr;
   1489 
   1490 					ecdptr = &spf_flt.flt_ec_data[i];
   1491 					acc_afsr |= read_and_clear_afsr();
   1492 					ecdptr->ec_d8 = lddphys(faultpa);
   1493 					acc_afsr |= (read_and_clear_afsr() &
   1494 					    ~(P_AFSR_EDP | P_AFSR_UE));
   1495 					ecdptr->ec_afsr = 0;
   1496 							/* null afsr value */
   1497 				}
   1498 
   1499 				/*
   1500 				 * Mark tag invalid to indicate mem dump
   1501 				 * when we print out the info.
   1502 				 */
   1503 				spf_flt.flt_ec_tag = AFLT_INV_ADDR;
   1504 			}
   1505 			spf_flt.flt_ec_lcnt = 1;
   1506 
   1507 			/*
   1508 			 * Flush out the bad line
   1509 			 */
   1510 			flushecacheline(P2ALIGN(aflt->flt_addr, 64),
   1511 			    cpunodes[CPU->cpu_id].ecache_size);
   1512 
   1513 			acc_afsr |= clear_errors(NULL, NULL);
   1514 		}
   1515 
   1516 		/*
   1517 		 * Ask our bus nexus friends if they have any fatal errors. If
   1518 		 * so, they will log appropriate error messages and panic as a
   1519 		 * result. We then queue an event for each UDB that reports a
   1520 		 * UE. Each UE reported in a UDB will have its own log message.
   1521 		 *
   1522 		 * Note from kbn: In the case where there are multiple UEs
   1523 		 * (ME bit is set) - the AFAR address is only accurate to
   1524 		 * the 16-byte granularity. One cannot tell whether the AFAR
   1525 		 * belongs to the UDBH or UDBL syndromes. In this case, we
   1526 		 * always report the AFAR address to be 16-byte aligned.
   1527 		 *
   1528 		 * If we're on a Sabre, there is no SDBL, but it will always
   1529 		 * read as zero, so the sdbl test below will safely fail.
   1530 		 */
   1531 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e)
   1532 			aflt->flt_panic = 1;
   1533 
   1534 		if (sdbh & P_DER_UE) {
   1535 			aflt->flt_synd = sdbh & P_DER_E_SYND;
   1536 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
   1537 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
   1538 			    aflt->flt_panic);
   1539 		}
   1540 		if (sdbl & P_DER_UE) {
   1541 			aflt->flt_synd = sdbl & P_DER_E_SYND;
   1542 			aflt->flt_synd |= UDBL_REG;	/* indicates UDBL */
   1543 			if (!(aflt->flt_stat & P_AFSR_ME))
   1544 				aflt->flt_addr |= 0x8;
   1545 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
   1546 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
   1547 			    aflt->flt_panic);
   1548 		}
   1549 
   1550 		/*
   1551 		 * We got a UE and are panicking, save the fault PA in a known
   1552 		 * location so that the platform specific panic code can check
   1553 		 * for copyback errors.
   1554 		 */
   1555 		if (aflt->flt_panic && aflt->flt_in_memory) {
   1556 			panic_aflt = *aflt;
   1557 		}
   1558 	}
   1559 
   1560 	/*
   1561 	 * Handle EDP and LDP: Locate the line with bad parity and enqueue an
   1562 	 * async error for logging. For Sabre, we panic on EDP or LDP.
   1563 	 */
   1564 	if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) {
   1565 		spf_flt.flt_type = CPU_EDP_LDP_ERR;
   1566 
   1567 		if (t_afsr & P_AFSR_EDP)
   1568 			(void) strcat(pr_reason, "EDP ");
   1569 
   1570 		if (t_afsr & P_AFSR_LDP)
   1571 			(void) strcat(pr_reason, "LDP ");
   1572 
   1573 		/*
   1574 		 * Here we have no PA to work with.
   1575 		 * Scan each line in the ecache to look for
   1576 		 * the one with bad parity.
   1577 		 */
   1578 		aflt->flt_addr = AFLT_INV_ADDR;
   1579 		scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
   1580 		    &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
   1581 		acc_afsr |= (oafsr & ~P_AFSR_WP);
   1582 
   1583 		/*
   1584 		 * If we found a bad PA, update the state to indicate if it is
   1585 		 * memory or I/O space.  This code will be important if we ever
   1586 		 * support cacheable frame buffers.
   1587 		 */
   1588 		if (aflt->flt_addr != AFLT_INV_ADDR) {
   1589 			aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
   1590 			    MMU_PAGESHIFT)) ? 1 : 0;
   1591 		}
   1592 
   1593 		if (isus2i || isus2e)
   1594 			aflt->flt_panic = 1;
   1595 
   1596 		cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ?
   1597 		    FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP,
   1598 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
   1599 		    aflt->flt_panic);
   1600 	}
   1601 
   1602 	/*
   1603 	 * Timeout and bus error handling.  There are two cases to consider:
   1604 	 *
   1605 	 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we
   1606 	 * have already modified the saved registers so that we will return
   1607 	 * from the trap to the appropriate trampoline routine; otherwise panic.
   1608 	 *
   1609 	 * (2) In user mode, we can simply use our AST mechanism to deliver
   1610 	 * a SIGBUS.  We do not log the occurence - processes performing
   1611 	 * device control would generate lots of uninteresting messages.
   1612 	 */
   1613 	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
   1614 		if (t_afsr & P_AFSR_TO)
   1615 			(void) strcat(pr_reason, "BTO ");
   1616 
   1617 		if (t_afsr & P_AFSR_BERR)
   1618 			(void) strcat(pr_reason, "BERR ");
   1619 
   1620 		spf_flt.flt_type = CPU_BTO_BERR_ERR;
   1621 		if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) {
   1622 			cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ?
   1623 			    FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR,
   1624 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
   1625 			    aflt->flt_panic);
   1626 		}
   1627 	}
   1628 
   1629 	/*
   1630 	 * Handle WP: WP happens when the ecache is victimized and a parity
   1631 	 * error was detected on a writeback.  The data in question will be
   1632 	 * poisoned as a UE will be written back.  The PA is not logged and
   1633 	 * it is possible that it doesn't belong to the trapped thread.  The
   1634 	 * WP trap is not fatal, but it could be fatal to someone that
   1635 	 * subsequently accesses the toxic page.  We set read_all_memscrub
   1636 	 * to force the memscrubber to read all of memory when it awakens.
   1637 	 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a
   1638 	 * UE back to poison the data.
   1639 	 */
   1640 	if (t_afsr & P_AFSR_WP) {
   1641 		(void) strcat(pr_reason, "WP ");
   1642 		if (isus2i || isus2e) {
   1643 			aflt->flt_panic = 1;
   1644 		} else {
   1645 			read_all_memscrub = 1;
   1646 		}
   1647 		spf_flt.flt_type = CPU_WP_ERR;
   1648 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP,
   1649 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
   1650 		    aflt->flt_panic);
   1651 	}
   1652 
   1653 	/*
   1654 	 * Handle trapping CP error: In Sabre/Hummingbird, parity error in
   1655 	 * the ecache on a copyout due to a PCI DMA read is signaled as a CP.
   1656 	 * This is fatal.
   1657 	 */
   1658 
   1659 	if (t_afsr & P_AFSR_CP) {
   1660 		if (isus2i || isus2e) {
   1661 			(void) strcat(pr_reason, "CP ");
   1662 			aflt->flt_panic = 1;
   1663 			spf_flt.flt_type = CPU_TRAPPING_CP_ERR;
   1664 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
   1665 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
   1666 			    aflt->flt_panic);
   1667 		} else {
   1668 			/*
   1669 			 * Orphan CP: Happens due to signal integrity problem
   1670 			 * on a CPU, where a CP is reported, without reporting
   1671 			 * its associated UE. This is handled by locating the
   1672 			 * bad parity line and would kick off the memscrubber
   1673 			 * to find the UE if in memory or in another's cache.
   1674 			 */
   1675 			spf_flt.flt_type = CPU_ORPHAN_CP_ERR;
   1676 			(void) strcat(pr_reason, "ORPHAN_CP ");
   1677 
   1678 			/*
   1679 			 * Here we have no PA to work with.
   1680 			 * Scan each line in the ecache to look for
   1681 			 * the one with bad parity.
   1682 			 */
   1683 			aflt->flt_addr = AFLT_INV_ADDR;
   1684 			scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
   1685 			    &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt,
   1686 			    &oafsr);
   1687 			acc_afsr |= oafsr;
   1688 
   1689 			/*
   1690 			 * If we found a bad PA, update the state to indicate
   1691 			 * if it is memory or I/O space.
   1692 			 */
   1693 			if (aflt->flt_addr != AFLT_INV_ADDR) {
   1694 				aflt->flt_in_memory =
   1695 				    (pf_is_memory(aflt->flt_addr >>
   1696 				    MMU_PAGESHIFT)) ? 1 : 0;
   1697 			}
   1698 			read_all_memscrub = 1;
   1699 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
   1700 			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
   1701 			    aflt->flt_panic);
   1702 
   1703 		}
   1704 	}
   1705 
   1706 	/*
   1707 	 * If we queued an error other than WP or CP and we are going to return
   1708 	 * from the trap and the error was in user mode or inside of a
   1709 	 * copy routine, set AST flag so the queue will be drained before
   1710 	 * returning to user mode.
   1711 	 *
   1712 	 * For UE/LDP/EDP, the AST processing will SIGKILL the process
   1713 	 * and send an event to its process contract.
   1714 	 *
   1715 	 * For BERR/BTO, the AST processing will SIGBUS the process.  There
   1716 	 * will have been no error queued in this case.
   1717 	 */
   1718 	if ((t_afsr &
   1719 	    (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) &&
   1720 	    (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) {
   1721 			int pcb_flag = 0;
   1722 
   1723 			if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP))
   1724 				pcb_flag |= ASYNC_HWERR;
   1725 
   1726 			if (t_afsr & P_AFSR_BERR)
   1727 				pcb_flag |= ASYNC_BERR;
   1728 
   1729 			if (t_afsr & P_AFSR_TO)
   1730 				pcb_flag |= ASYNC_BTO;
   1731 
   1732 			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
   1733 			aston(curthread);
   1734 			action = ACTION_AST_FLAGS;
   1735 	}
   1736 
   1737 	/*
   1738 	 * In response to a deferred error, we must do one of three things:
   1739 	 * (1) set the AST flags, (2) trampoline, or (3) panic.  action is
   1740 	 * set in cases (1) and (2) - check that either action is set or
   1741 	 * (3) is true.
   1742 	 *
   1743 	 * On II, the WP writes poisoned data back to memory, which will
   1744 	 * cause a UE and a panic or reboot when read.  In this case, we
   1745 	 * don't need to panic at this time.  On IIi and IIe,
   1746 	 * aflt->flt_panic is already set above.
   1747 	 */
   1748 	ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) ||
   1749 	    (t_afsr & P_AFSR_WP));
   1750 
   1751 	/*
   1752 	 * Make a final sanity check to make sure we did not get any more async
   1753 	 * errors and accumulate the afsr.
   1754 	 */
   1755 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
   1756 	    cpunodes[CPU->cpu_id].ecache_linesize);
   1757 	(void) clear_errors(&spf_flt, NULL);
   1758 
   1759 	/*
   1760 	 * Take care of a special case: If there is a UE in the ecache flush
   1761 	 * area, we'll see it in flush_ecache().  This will trigger the
   1762 	 * CPU_ADDITIONAL_ERRORS case below.
   1763 	 *
   1764 	 * This could occur if the original error was a UE in the flush area,
   1765 	 * or if the original error was an E$ error that was flushed out of
   1766 	 * the E$ in scan_ecache().
   1767 	 *
   1768 	 * If it's at the same address that we're already logging, then it's
   1769 	 * probably one of these cases.  Clear the bit so we don't trip over
   1770 	 * it on the additional errors case, which could cause an unnecessary
   1771 	 * panic.
   1772 	 */
   1773 	if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar)
   1774 		acc_afsr |= aflt->flt_stat & ~P_AFSR_UE;
   1775 	else
   1776 		acc_afsr |= aflt->flt_stat;
   1777 
   1778 	/*
   1779 	 * Check the acumulated afsr for the important bits.
   1780 	 * Make sure the spf_flt.flt_type value is set, and
   1781 	 * enque an error.
   1782 	 */
   1783 	if (acc_afsr &
   1784 	    (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) {
   1785 		if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP |
   1786 		    P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP |
   1787 		    P_AFSR_ISAP))
   1788 			aflt->flt_panic = 1;
   1789 
   1790 		spf_flt.flt_type = CPU_ADDITIONAL_ERR;
   1791 		aflt->flt_stat = acc_afsr;
   1792 		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN,
   1793 		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
   1794 		    aflt->flt_panic);
   1795 	}
   1796 
   1797 	/*
   1798 	 * If aflt->flt_panic is set at this point, we need to panic as the
   1799 	 * result of a trap at TL > 0, or an error we determined to be fatal.
   1800 	 * We've already enqueued the error in one of the if-clauses above,
   1801 	 * and it will be dequeued and logged as part of the panic flow.
   1802 	 */
   1803 	if (aflt->flt_panic) {
   1804 		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST,
   1805 		    "See previous message(s) for details", " %sError(s)",
   1806 		    pr_reason);
   1807 	}
   1808 
   1809 	/*
   1810 	 * Before returning, we must re-enable errors, and
   1811 	 * reset the caches to their boot-up state.
   1812 	 */
   1813 	set_lsu(get_lsu() | cache_boot_state);
   1814 	set_error_enable(EER_ENABLE);
   1815 }
   1816 
   1817 /*
   1818  * Check for miscellaneous fatal errors and call CE_PANIC if any are seen.
   1819  * This routine is shared by the CE and UE handling code.
   1820  */
   1821 static void
   1822 check_misc_err(spitf_async_flt *spf_flt)
   1823 {
   1824 	struct async_flt *aflt = (struct async_flt *)spf_flt;
   1825 	char *fatal_str = NULL;
   1826 
   1827 	/*
   1828 	 * The ISAP and ETP errors are supposed to cause a POR
   1829 	 * from the system, so in theory we never, ever see these messages.
   1830 	 * ISAP, ETP and IVUE are considered to be fatal.
   1831 	 */
   1832 	if (aflt->flt_stat & P_AFSR_ISAP)
   1833 		fatal_str = " System Address Parity Error on";
   1834 	else if (aflt->flt_stat & P_AFSR_ETP)
   1835 		fatal_str = " Ecache Tag Parity Error on";
   1836 	else if (aflt->flt_stat & P_AFSR_IVUE)
   1837 		fatal_str = " Interrupt Vector Uncorrectable Error on";
   1838 	if (fatal_str != NULL) {
   1839 		cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS,
   1840 		    NULL, fatal_str);
   1841 	}
   1842 }
   1843 
   1844 /*
   1845  * Routine to convert a syndrome into a syndrome code.
   1846  */
   1847 static int
   1848 synd_to_synd_code(int synd_status, ushort_t synd)
   1849 {
   1850 	if (synd_status != AFLT_STAT_VALID)
   1851 		return (-1);
   1852 
   1853 	/*
   1854 	 * Use the 8-bit syndrome to index the ecc_syndrome_tab
   1855 	 * to get the code indicating which bit(s) is(are) bad.
   1856 	 */
   1857 	if ((synd == 0) || (synd >= SYND_TBL_SIZE))
   1858 		return (-1);
   1859 	else
   1860 		return (ecc_syndrome_tab[synd]);
   1861 }
   1862 
   1863 /* ARGSUSED */
   1864 int
   1865 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
   1866 {
   1867 	return (ENOTSUP);
   1868 }
   1869 
   1870 /* ARGSUSED */
   1871 int
   1872 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
   1873 {
   1874 	return (ENOTSUP);
   1875 }
   1876 
   1877 /* ARGSUSED */
   1878 int
   1879 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
   1880 {
   1881 	return (ENOTSUP);
   1882 }
   1883 
   1884 /*
   1885  * Routine to return a string identifying the physical name
   1886  * associated with a memory/cache error.
   1887  */
   1888 /* ARGSUSED */
   1889 int
   1890 cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr,
   1891     uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status,
   1892     char *buf, int buflen, int *lenp)
   1893 {
   1894 	short synd_code;
   1895 	int ret;
   1896 
   1897 	if (flt_in_memory) {
   1898 		synd_code = synd_to_synd_code(synd_status, synd);
   1899 		if (synd_code == -1) {
   1900 			ret = EINVAL;
   1901 		} else if (prom_get_unum(synd_code, P2ALIGN(afar, 8),
   1902 		    buf, buflen, lenp) != 0) {
   1903 			ret = EIO;
   1904 		} else if (*lenp <= 1) {
   1905 			ret = EINVAL;
   1906 		} else {
   1907 			ret = 0;
   1908 		}
   1909 	} else {
   1910 		ret = ENOTSUP;
   1911 	}
   1912 
   1913 	if (ret != 0) {
   1914 		buf[0] = '\0';
   1915 		*lenp = 0;
   1916 	}
   1917 
   1918 	return (ret);
   1919 }
   1920 
   1921 /*
   1922  * Wrapper for cpu_get_mem_unum() routine that takes an
   1923  * async_flt struct rather than explicit arguments.
   1924  */
   1925 int
   1926 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
   1927     char *buf, int buflen, int *lenp)
   1928 {
   1929 	return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd),
   1930 	    aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id,
   1931 	    aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp));
   1932 }
   1933 
   1934 /*
   1935  * This routine is a more generic interface to cpu_get_mem_unum(),
   1936  * that may be used by other modules (e.g. mm).
   1937  */
   1938 int
   1939 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
   1940 		char *buf, int buflen, int *lenp)
   1941 {
   1942 	int synd_status, flt_in_memory, ret;
   1943 	char unum[UNUM_NAMLEN];
   1944 
   1945 	/*
   1946 	 * Check for an invalid address.
   1947 	 */
   1948 	if (afar == (uint64_t)-1)
   1949 		return (ENXIO);
   1950 
   1951 	if (synd == (uint64_t)-1)
   1952 		synd_status = AFLT_STAT_INVALID;
   1953 	else
   1954 		synd_status = AFLT_STAT_VALID;
   1955 
   1956 	flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0;
   1957 
   1958 	if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
   1959 	    CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp))
   1960 	    != 0)
   1961 		return (ret);
   1962 
   1963 	if (*lenp >= buflen)
   1964 		return (ENAMETOOLONG);
   1965 
   1966 	(void) strncpy(buf, unum, buflen);
   1967 
   1968 	return (0);
   1969 }
   1970 
   1971 /*
   1972  * Routine to return memory information associated
   1973  * with a physical address and syndrome.
   1974  */
   1975 /* ARGSUSED */
   1976 int
   1977 cpu_get_mem_info(uint64_t synd, uint64_t afar,
   1978     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
   1979     int *segsp, int *banksp, int *mcidp)
   1980 {
   1981 	return (ENOTSUP);
   1982 }
   1983 
   1984 /*
   1985  * Routine to return a string identifying the physical
   1986  * name associated with a cpuid.
   1987  */
   1988 /* ARGSUSED */
   1989 int
   1990 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
   1991 {
   1992 	return (ENOTSUP);
   1993 }
   1994 
   1995 /*
   1996  * This routine returns the size of the kernel's FRU name buffer.
   1997  */
   1998 size_t
   1999 cpu_get_name_bufsize()
   2000 {
   2001 	return (UNUM_NAMLEN);
   2002 }
   2003 
   2004 /*
   2005  * Cpu specific log func for UEs.
   2006  */
   2007 static void
   2008 log_ue_err(struct async_flt *aflt, char *unum)
   2009 {
   2010 	spitf_async_flt *spf_flt = (spitf_async_flt *)aflt;
   2011 	int len = 0;
   2012 
   2013 #ifdef DEBUG
   2014 	int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0;
   2015 
   2016 	/*
   2017 	 * Paranoid Check for priv mismatch
   2018 	 * Only applicable for UEs
   2019 	 */
   2020 	if (afsr_priv != aflt->flt_priv) {
   2021 		/*
   2022 		 * The priv bits in %tstate and %afsr did not match; we expect
   2023 		 * this to be very rare, so flag it with a message.
   2024 		 */
   2025 		cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL,
   2026 		    ": PRIV bit in TSTATE and AFSR mismatched; "
   2027 		    "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0);
   2028 
   2029 		/* update saved afsr to reflect the correct priv */
   2030 		aflt->flt_stat &= ~P_AFSR_PRIV;
   2031 		if (aflt->flt_priv)
   2032 			aflt->flt_stat |= P_AFSR_PRIV;
   2033 	}
   2034 #endif /* DEBUG */
   2035 
   2036 	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum,
   2037 	    UNUM_NAMLEN, &len);
   2038 
   2039 	cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum,
   2040 	    " Uncorrectable Memory Error on");
   2041 
   2042 	if (SYND(aflt->flt_synd) == 0x3) {
   2043 		cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL,
   2044 		    " Syndrome 0x3 indicates that this may not be a "
   2045 		    "memory module problem");
   2046 	}
   2047 
   2048 	if (aflt->flt_in_memory)
   2049 		cpu_log_ecmem_info(spf_flt);
   2050 }
   2051 
   2052 
   2053 /*
   2054  * The cpu_async_log_err() function is called via the ue_drain() function to
   2055  * handle logging for CPU events that are dequeued.  As such, it can be invoked
   2056  * from softint context, from AST processing in the trap() flow, or from the
   2057  * panic flow.  We decode the CPU-specific data, and log appropriate messages.
   2058  */
   2059 static void
   2060 cpu_async_log_err(void *flt)
   2061 {
   2062 	spitf_async_flt *spf_flt = (spitf_async_flt *)flt;
   2063 	struct async_flt *aflt = (struct async_flt *)flt;
   2064 	char unum[UNUM_NAMLEN];
   2065 	char *space;
   2066 	char *ecache_scrub_logstr = NULL;
   2067 
   2068 	switch (spf_flt->flt_type) {
   2069 	case CPU_UE_ERR:
   2070 		/*
   2071 		 * We want to skip logging only if ALL the following
   2072 		 * conditions are true:
   2073 		 *
   2074 		 *	1. We are not panicking
   2075 		 *	2. There is only one error
   2076 		 *	3. That error is a memory error
   2077 		 *	4. The error is caused by the memory scrubber (in
   2078 		 *	   which case the error will have occurred under
   2079 		 *	   on_trap protection)
   2080 		 *	5. The error is on a retired page
   2081 		 *
   2082 		 * Note 1: AFLT_PROT_EC is used places other than the memory
   2083 		 * scrubber.  However, none of those errors should occur
   2084 		 * on a retired page.
   2085 		 *
   2086 		 * Note 2: In the CE case, these errors are discarded before
   2087 		 * the errorq.  In the UE case, we must wait until now --
   2088 		 * softcall() grabs a mutex, which we can't do at a high PIL.
   2089 		 */
   2090 		if (!panicstr &&
   2091 		    (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE &&
   2092 		    aflt->flt_prot == AFLT_PROT_EC) {
   2093 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
   2094 				/* Zero the address to clear the error */
   2095 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
   2096 				return;
   2097 			}
   2098 		}
   2099 
   2100 		/*
   2101 		 * Log the UE and check for causes of this UE error that
   2102 		 * don't cause a trap (Copyback error).  cpu_async_error()
   2103 		 * has already checked the i/o buses for us.
   2104 		 */
   2105 		log_ue_err(aflt, unum);
   2106 		if (aflt->flt_in_memory)
   2107 			cpu_check_allcpus(aflt);
   2108 		break;
   2109 
   2110 	case CPU_EDP_LDP_ERR:
   2111 		if (aflt->flt_stat & P_AFSR_EDP)
   2112 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
   2113 			    NULL, " EDP event on");
   2114 
   2115 		if (aflt->flt_stat & P_AFSR_LDP)
   2116 			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
   2117 			    NULL, " LDP event on");
   2118 
   2119 		/* Log ecache info if exist */
   2120 		if (spf_flt->flt_ec_lcnt > 0) {
   2121 			cpu_log_ecmem_info(spf_flt);
   2122 
   2123 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
   2124 			    NULL, " AFAR was derived from E$Tag");
   2125 		} else {
   2126 			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
   2127 			    NULL, " No error found in ecache (No fault "
   2128 			    "PA available)");
   2129 		}
   2130 		break;
   2131 
   2132 	case CPU_WP_ERR:
   2133 		/*
   2134 		 * If the memscrub thread hasn't yet read
   2135 		 * all of memory, as we requested in the
   2136 		 * trap handler, then give it a kick to
   2137 		 * make sure it does.
   2138 		 */
   2139 		if (!isus2i && !isus2e && read_all_memscrub)
   2140 			memscrub_run();
   2141 
   2142 		cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL,
   2143 		    " WP event on");
   2144 		return;
   2145 
   2146 	case CPU_BTO_BERR_ERR:
   2147 		/*
   2148 		 * A bus timeout or error occurred that was in user mode or not
   2149 		 * in a protected kernel code region.
   2150 		 */
   2151 		if (aflt->flt_stat & P_AFSR_BERR) {
   2152 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
   2153 			    spf_flt, BERRTO_LFLAGS, NULL,
   2154 			    " Bus Error on System Bus in %s mode from",
   2155 			    aflt->flt_priv ? "privileged" : "user");
   2156 		}
   2157 
   2158 		if (aflt->flt_stat & P_AFSR_TO) {
   2159 			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
   2160 			    spf_flt, BERRTO_LFLAGS, NULL,
   2161 			    " Timeout on System Bus in %s mode from",
   2162 			    aflt->flt_priv ? "privileged" : "user");
   2163 		}
   2164 
   2165 		return;
   2166 
   2167 	case CPU_PANIC_CP_ERR:
   2168 		/*
   2169 		 * Process the Copyback (CP) error info (if any) obtained from
   2170 		 * polling all the cpus in the panic flow. This case is only
   2171 		 * entered if we are panicking.
   2172 		 */
   2173 		ASSERT(panicstr != NULL);
   2174 		ASSERT(aflt->flt_id == panic_aflt.flt_id);
   2175 
   2176 		/* See which space - this info may not exist */
   2177 		if (panic_aflt.flt_status & ECC_D_TRAP)
   2178 			space = "Data ";
   2179 		else if (panic_aflt.flt_status & ECC_I_TRAP)
   2180 			space = "Instruction ";
   2181 		else
   2182 			space = "";
   2183 
   2184 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
   2185 		    " AFAR was derived from UE report,"
   2186 		    " CP event on CPU%d (caused %saccess error on %s%d)",
   2187 		    aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ?
   2188 		    "IOBUS" : "CPU", panic_aflt.flt_bus_id);
   2189 
   2190 		if (spf_flt->flt_ec_lcnt > 0)
   2191 			cpu_log_ecmem_info(spf_flt);
   2192 		else
   2193 			cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST,
   2194 			    NULL, " No cache dump available");
   2195 
   2196 		return;
   2197 
   2198 	case CPU_TRAPPING_CP_ERR:
   2199 		/*
   2200 		 * For sabre only.  This is a copyback ecache parity error due
   2201 		 * to a PCI DMA read.  We should be panicking if we get here.
   2202 		 */
   2203 		ASSERT(panicstr != NULL);
   2204 		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
   2205 		    " AFAR was derived from UE report,"
   2206 		    " CP event on CPU%d (caused Data access error "
   2207 		    "on PCIBus)", aflt->flt_inst);
   2208 		return;
   2209 
   2210 		/*
   2211 		 * We log the ecache lines of the following states,
   2212 		 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and
   2213 		 * dirty_bad_busy if ecache_scrub_verbose is set and panic
   2214 		 * in addition to logging if ecache_scrub_panic is set.
   2215 		 */
   2216 	case CPU_BADLINE_CI_ERR:
   2217 		ecache_scrub_logstr = "CBI";
   2218 		/* FALLTHRU */
   2219 
   2220 	case CPU_BADLINE_CB_ERR:
   2221 		if (ecache_scrub_logstr == NULL)
   2222 			ecache_scrub_logstr = "CBB";
   2223 		/* FALLTHRU */
   2224 
   2225 	case CPU_BADLINE_DI_ERR:
   2226 		if (ecache_scrub_logstr == NULL)
   2227 			ecache_scrub_logstr = "DBI";
   2228 		/* FALLTHRU */
   2229 
   2230 	case CPU_BADLINE_DB_ERR:
   2231 		if (ecache_scrub_logstr == NULL)
   2232 			ecache_scrub_logstr = "DBB";
   2233 
   2234 		cpu_aflt_log(CE_NOTE, 2, spf_flt,
   2235 		    (CPU_ERRID_FIRST | CPU_FLTCPU), NULL,
   2236 		    " %s event on", ecache_scrub_logstr);
   2237 		cpu_log_ecmem_info(spf_flt);
   2238 
   2239 		return;
   2240 
   2241 	case CPU_ORPHAN_CP_ERR:
   2242 		/*
   2243 		 * Orphan CPs, where the CP bit is set, but when a CPU
   2244 		 * doesn't report a UE.
   2245 		 */
   2246 		if (read_all_memscrub)
   2247 			memscrub_run();
   2248 
   2249 		cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU),
   2250 		    NULL, " Orphan CP event on");
   2251 
   2252 		/* Log ecache info if exist */
   2253 		if (spf_flt->flt_ec_lcnt > 0)
   2254 			cpu_log_ecmem_info(spf_flt);
   2255 		else
   2256 			cpu_aflt_log(CE_NOTE, 2, spf_flt,
   2257 			    (CP_LFLAGS | CPU_FLTCPU), NULL,
   2258 			    " No error found in ecache (No fault "
   2259 			    "PA available");
   2260 		return;
   2261 
   2262 	case CPU_ECACHE_ADDR_PAR_ERR:
   2263 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
   2264 		    " E$ Tag Address Parity error on");
   2265 		cpu_log_ecmem_info(spf_flt);
   2266 		return;
   2267 
   2268 	case CPU_ECACHE_STATE_ERR:
   2269 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
   2270 		    " E$ Tag State Parity error on");
   2271 		cpu_log_ecmem_info(spf_flt);
   2272 		return;
   2273 
   2274 	case CPU_ECACHE_TAG_ERR:
   2275 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
   2276 		    " E$ Tag scrub event on");
   2277 		cpu_log_ecmem_info(spf_flt);
   2278 		return;
   2279 
   2280 	case CPU_ECACHE_ETP_ETS_ERR:
   2281 		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
   2282 		    " AFSR.ETP is set and AFSR.ETS is zero on");
   2283 		cpu_log_ecmem_info(spf_flt);
   2284 		return;
   2285 
   2286 
   2287 	case CPU_ADDITIONAL_ERR:
   2288 		cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL,
   2289 		    " Additional errors detected during error processing on");
   2290 		return;
   2291 
   2292 	default:
   2293 		cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown "
   2294 		    "fault type %x", (void *)spf_flt, spf_flt->flt_type);
   2295 		return;
   2296 	}
   2297 
   2298 	/* ... fall through from the UE, EDP, or LDP cases */
   2299 
   2300 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
   2301 		if (!panicstr) {
   2302 			(void) page_retire(aflt->flt_addr, PR_UE);
   2303 		} else {
   2304 			/*
   2305 			 * Clear UEs on panic so that we don't
   2306 			 * get haunted by them during panic or
   2307 			 * after reboot
   2308 			 */
   2309 			clearphys(P2ALIGN(aflt->flt_addr, 64),
   2310 			    cpunodes[CPU->cpu_id].ecache_size,
   2311 			    cpunodes[CPU->cpu_id].ecache_linesize);
   2312 
   2313 			(void) clear_errors(NULL, NULL);
   2314 		}
   2315 	}
   2316 
   2317 	/*
   2318 	 * Log final recover message
   2319 	 */
   2320 	if (!panicstr) {
   2321 		if (!aflt->flt_priv) {
   2322 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
   2323 			    NULL, " Above Error is in User Mode"
   2324 			    "\n    and is fatal: "
   2325 			    "will SIGKILL process and notify contract");
   2326 		} else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) {
   2327 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
   2328 			    NULL, " Above Error detected while dumping core;"
   2329 			    "\n    core file will be truncated");
   2330 		} else if (aflt->flt_prot == AFLT_PROT_COPY) {
   2331 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
   2332 			    NULL, " Above Error is due to Kernel access"
   2333 			    "\n    to User space and is fatal: "
   2334 			    "will SIGKILL process and notify contract");
   2335 		} else if (aflt->flt_prot == AFLT_PROT_EC) {
   2336 			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL,
   2337 			    " Above Error detected by protected Kernel code"
   2338 			    "\n    that will try to clear error from system");
   2339 		}
   2340 	}
   2341 }
   2342 
   2343 
   2344 /*
   2345  * Check all cpus for non-trapping UE-causing errors
   2346  * In Ultra I/II, we look for copyback errors (CPs)
   2347  */
   2348 void
   2349 cpu_check_allcpus(struct async_flt *aflt)
   2350 {
   2351 	spitf_async_flt cp;
   2352 	spitf_async_flt *spf_cpflt = &cp;
   2353 	struct async_flt *cpflt = (struct async_flt *)&cp;
   2354 	int pix;
   2355 
   2356 	cpflt->flt_id = aflt->flt_id;
   2357 	cpflt->flt_addr = aflt->flt_addr;
   2358 
   2359 	for (pix = 0; pix < NCPU; pix++) {
   2360 		if (CPU_XCALL_READY(pix)) {
   2361 			xc_one(pix, (xcfunc_t *)get_cpu_status,
   2362 			    (uint64_t)cpflt, 0);
   2363 
   2364 			if (cpflt->flt_stat & P_AFSR_CP) {
   2365 				char *space;
   2366 
   2367 				/* See which space - this info may not exist */
   2368 				if (aflt->flt_status & ECC_D_TRAP)
   2369 					space = "Data ";
   2370 				else if (aflt->flt_status & ECC_I_TRAP)
   2371 					space = "Instruction ";
   2372 				else
   2373 					space = "";
   2374 
   2375 				cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS,
   2376 				    NULL, " AFAR was derived from UE report,"
   2377 				    " CP event on CPU%d (caused %saccess "
   2378 				    "error on %s%d)", pix, space,
   2379 				    (aflt->flt_status & ECC_IOBUS) ?
   2380 				    "IOBUS" : "CPU", aflt->flt_bus_id);
   2381 
   2382 				if (spf_cpflt->flt_ec_lcnt > 0)
   2383 					cpu_log_ecmem_info(spf_cpflt);
   2384 				else
   2385 					cpu_aflt_log(CE_WARN, 2, spf_cpflt,
   2386 					    CPU_ERRID_FIRST, NULL,
   2387 					    " No cache dump available");
   2388 			}
   2389 		}
   2390 	}
   2391 }
   2392 
   2393 #ifdef DEBUG
   2394 int test_mp_cp = 0;
   2395 #endif
   2396 
   2397 /*
   2398  * Cross-call callback routine to tell a CPU to read its own %afsr to check
   2399  * for copyback errors and capture relevant information.
   2400  */
   2401 static uint_t
   2402 get_cpu_status(uint64_t arg)
   2403 {
   2404 	struct async_flt *aflt = (struct async_flt *)arg;
   2405 	spitf_async_flt *spf_flt = (spitf_async_flt *)arg;
   2406 	uint64_t afsr;
   2407 	uint32_t ec_idx;
   2408 	uint64_t sdbh, sdbl;
   2409 	int i;
   2410 	uint32_t ec_set_size;
   2411 	uchar_t valid;
   2412 	ec_data_t ec_data[8];
   2413 	uint64_t ec_tag, flt_addr_tag, oafsr;
   2414 	uint64_t *acc_afsr = NULL;
   2415 
   2416 	get_asyncflt(&afsr);
   2417 	if (CPU_PRIVATE(CPU) != NULL) {
   2418 		acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
   2419 		afsr |= *acc_afsr;
   2420 		*acc_afsr = 0;
   2421 	}
   2422 
   2423 #ifdef DEBUG
   2424 	if (test_mp_cp)
   2425 		afsr |= P_AFSR_CP;
   2426 #endif
   2427 	aflt->flt_stat = afsr;
   2428 
   2429 	if (afsr & P_AFSR_CP) {
   2430 		/*
   2431 		 * Capture the UDBs
   2432 		 */
   2433 		get_udb_errors(&sdbh, &sdbl);
   2434 		spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF);
   2435 		spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF);
   2436 
   2437 		/*
   2438 		 * Clear CP bit before capturing ecache data
   2439 		 * and AFSR info.
   2440 		 */
   2441 		set_asyncflt(P_AFSR_CP);
   2442 
   2443 		/*
   2444 		 * See if we can capture the ecache line for the
   2445 		 * fault PA.
   2446 		 *
   2447 		 * Return a valid matching ecache line, if any.
   2448 		 * Otherwise, return the first matching ecache
   2449 		 * line marked invalid.
   2450 		 */
   2451 		flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift;
   2452 		ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
   2453 		    ecache_associativity;
   2454 		spf_flt->flt_ec_lcnt = 0;
   2455 
   2456 		for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size);
   2457 		    i < ecache_associativity; i++, ec_idx += ec_set_size) {
   2458 			get_ecache_dtag(P2ALIGN(ec_idx, 64),
   2459 			    (uint64_t *)&ec_data[0], &ec_tag, &oafsr,
   2460 			    acc_afsr);
   2461 
   2462 			if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag)
   2463 				continue;
   2464 
   2465 			valid = cpu_ec_state_valid &
   2466 			    (uchar_t)((ec_tag & cpu_ec_state_mask) >>
   2467 			    cpu_ec_state_shift);
   2468 
   2469 			if (valid || spf_flt->flt_ec_lcnt == 0) {
   2470 				spf_flt->flt_ec_tag = ec_tag;
   2471 				bcopy(&ec_data, &spf_flt->flt_ec_data,
   2472 				    sizeof (ec_data));
   2473 				spf_flt->flt_ec_lcnt = 1;
   2474 
   2475 				if (valid)
   2476 					break;
   2477 			}
   2478 		}
   2479 	}
   2480 	return (0);
   2481 }
   2482 
   2483 /*
   2484  * CPU-module callback for the non-panicking CPUs.  This routine is invoked
   2485  * from panic_idle() as part of the other CPUs stopping themselves when a
   2486  * panic occurs.  We need to be VERY careful what we do here, since panicstr
   2487  * is NOT set yet and we cannot blow through locks.  If panic_aflt is set
   2488  * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for
   2489  * CP error information.
   2490  */
   2491 void
   2492 cpu_async_panic_callb(void)
   2493 {
   2494 	spitf_async_flt cp;
   2495 	struct async_flt *aflt = (struct async_flt *)&cp;
   2496 	uint64_t *scrub_afsr;
   2497 
   2498 	if (panic_aflt.flt_id != 0) {
   2499 		aflt->flt_addr = panic_aflt.flt_addr;
   2500 		(void) get_cpu_status((uint64_t)aflt);
   2501 
   2502 		if (CPU_PRIVATE(CPU) != NULL) {
   2503 			scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
   2504 			if (*scrub_afsr & P_AFSR_CP) {
   2505 				aflt->flt_stat |= *scrub_afsr;
   2506 				*scrub_afsr = 0;
   2507 			}
   2508 		}
   2509 		if (aflt->flt_stat & P_AFSR_CP) {
   2510 			aflt->flt_id = panic_aflt.flt_id;
   2511 			aflt->flt_panic = 1;
   2512 			aflt->flt_inst = CPU->cpu_id;
   2513 			aflt->flt_class = CPU_FAULT;
   2514 			cp.flt_type = CPU_PANIC_CP_ERR;
   2515 			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
   2516 			    (void *)&cp, sizeof (cp), ue_queue,
   2517 			    aflt->flt_panic);
   2518 		}
   2519 	}
   2520 }
   2521 
   2522 /*
   2523  * Turn off all cpu error detection, normally only used for panics.
   2524  */
   2525 void
   2526 cpu_disable_errors(void)
   2527 {
   2528 	xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE);
   2529 }
   2530 
   2531 /*
   2532  * Enable errors.
   2533  */
   2534 void
   2535 cpu_enable_errors(void)
   2536 {
   2537 	xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE);
   2538 }
   2539 
   2540 static void
   2541 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
   2542 {
   2543 	uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8);
   2544 	int i, loop = 1;
   2545 	ushort_t ecc_0;
   2546 	uint64_t paddr;
   2547 	uint64_t data;
   2548 
   2549 	if (verbose)
   2550 		loop = 8;
   2551 	for (i = 0; i < loop; i++) {
   2552 		paddr = aligned_addr + (i * 8);
   2553 		data = lddphys(paddr);
   2554 		if (verbose) {
   2555 			if (ce_err) {
   2556 				ecc_0 = ecc_gen((uint32_t)(data>>32),
   2557 				    (uint32_t)data);
   2558 				cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
   2559 				    NULL, "    Paddr 0x%" PRIx64 ", "
   2560 				    "Data 0x%08x.%08x, ECC 0x%x", paddr,
   2561 				    (uint32_t)(data>>32), (uint32_t)data,
   2562 				    ecc_0);
   2563 			} else {
   2564 				cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
   2565 				    NULL, "    Paddr 0x%" PRIx64 ", "
   2566 				    "Data 0x%08x.%08x", paddr,
   2567 				    (uint32_t)(data>>32), (uint32_t)data);
   2568 			}
   2569 		}
   2570 	}
   2571 }
   2572 
   2573 static struct {		/* sec-ded-s4ed ecc code */
   2574 	uint_t hi, lo;
   2575 } ecc_code[8] = {
   2576 	{ 0xee55de23U, 0x16161161U },
   2577 	{ 0x55eede93U, 0x61612212U },
   2578 	{ 0xbb557b8cU, 0x49494494U },
   2579 	{ 0x55bb7b6cU, 0x94948848U },
   2580 	{ 0x16161161U, 0xee55de23U },
   2581 	{ 0x61612212U, 0x55eede93U },
   2582 	{ 0x49494494U, 0xbb557b8cU },
   2583 	{ 0x94948848U, 0x55bb7b6cU }
   2584 };
   2585 
   2586 static ushort_t
   2587 ecc_gen(uint_t high_bytes, uint_t low_bytes)
   2588 {
   2589 	int i, j;
   2590 	uchar_t checker, bit_mask;
   2591 	struct {
   2592 		uint_t hi, lo;
   2593 	} hex_data, masked_data[8];
   2594 
   2595 	hex_data.hi = high_bytes;
   2596 	hex_data.lo = low_bytes;
   2597 
   2598 	/* mask out bits according to sec-ded-s4ed ecc code */
   2599 	for (i = 0; i < 8; i++) {
   2600 		masked_data[i].hi = hex_data.hi & ecc_code[i].hi;
   2601 		masked_data[i].lo = hex_data.lo & ecc_code[i].lo;
   2602 	}
   2603 
   2604 	/*
   2605 	 * xor all bits in masked_data[i] to get bit_i of checker,
   2606 	 * where i = 0 to 7
   2607 	 */
   2608 	checker = 0;
   2609 	for (i = 0; i < 8; i++) {
   2610 		bit_mask = 1 << i;
   2611 		for (j = 0; j < 32; j++) {
   2612 			if (masked_data[i].lo & 1) checker ^= bit_mask;
   2613 			if (masked_data[i].hi & 1) checker ^= bit_mask;
   2614 			masked_data[i].hi >>= 1;
   2615 			masked_data[i].lo >>= 1;
   2616 		}
   2617 	}
   2618 	return (checker);
   2619 }
   2620 
   2621 /*
   2622  * Flush the entire ecache using displacement flush by reading through a
   2623  * physical address range as large as the ecache.
   2624  */
   2625 void
   2626 cpu_flush_ecache(void)
   2627 {
   2628 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
   2629 	    cpunodes[CPU->cpu_id].ecache_linesize);
   2630 }
   2631 
   2632 /*
   2633  * read and display the data in the cache line where the
   2634  * original ce error occurred.
   2635  * This routine is mainly used for debugging new hardware.
   2636  */
   2637 void
   2638 read_ecc_data(struct async_flt *ecc, short verbose, short ce_err)
   2639 {
   2640 	kpreempt_disable();
   2641 	/* disable ECC error traps */
   2642 	set_error_enable(EER_ECC_DISABLE);
   2643 
   2644 	/*
   2645 	 * flush the ecache
   2646 	 * read the data
   2647 	 * check to see if an ECC error occured
   2648 	 */
   2649 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
   2650 	    cpunodes[CPU->cpu_id].ecache_linesize);
   2651 	set_lsu(get_lsu() | cache_boot_state);
   2652 	cpu_read_paddr(ecc, verbose, ce_err);
   2653 	(void) check_ecc(ecc);
   2654 
   2655 	/* enable ECC error traps */
   2656 	set_error_enable(EER_ENABLE);
   2657 	kpreempt_enable();
   2658 }
   2659 
   2660 /*
   2661  * Check the AFSR bits for UE/CE persistence.
   2662  * If UE or CE errors are detected, the routine will
   2663  * clears all the AFSR sticky bits (except CP for
   2664  * spitfire/blackbird) and the UDBs.
   2665  * if ce_debug or ue_debug is set, log any ue/ce errors detected.
   2666  */
   2667 static int
   2668 check_ecc(struct async_flt *ecc)
   2669 {
   2670 	uint64_t t_afsr;
   2671 	uint64_t t_afar;
   2672 	uint64_t udbh;
   2673 	uint64_t udbl;
   2674 	ushort_t udb;
   2675 	int persistent = 0;
   2676 
   2677 	/*
   2678 	 * Capture the AFSR, AFAR and UDBs info
   2679 	 */
   2680 	get_asyncflt(&t_afsr);
   2681 	get_asyncaddr(&t_afar);
   2682 	t_afar &= SABRE_AFAR_PA;
   2683 	get_udb_errors(&udbh, &udbl);
   2684 
   2685 	if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) {
   2686 		/*
   2687 		 * Clear the errors
   2688 		 */
   2689 		clr_datapath();
   2690 
   2691 		if (isus2i || isus2e)
   2692 			set_asyncflt(t_afsr);
   2693 		else
   2694 			set_asyncflt(t_afsr & ~P_AFSR_CP);
   2695 
   2696 		/*
   2697 		 * determine whether to check UDBH or UDBL for persistence
   2698 		 */
   2699 		if (ecc->flt_synd & UDBL_REG) {
   2700 			udb = (ushort_t)udbl;
   2701 			t_afar |= 0x8;
   2702 		} else {
   2703 			udb = (ushort_t)udbh;
   2704 		}
   2705 
   2706 		if (ce_debug || ue_debug) {
   2707 			spitf_async_flt spf_flt; /* for logging */
   2708 			struct async_flt *aflt =
   2709 			    (struct async_flt *)&spf_flt;
   2710 
   2711 			/* Package the info nicely in the spf_flt struct */
   2712 			bzero(&spf_flt, sizeof (spitf_async_flt));
   2713 			aflt->flt_stat = t_afsr;
   2714 			aflt->flt_addr = t_afar;
   2715 			spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF);
   2716 			spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF);
   2717 
   2718 			cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR |
   2719 			    CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL,
   2720 			    " check_ecc: Dumping captured error states ...");
   2721 		}
   2722 
   2723 		/*
   2724 		 * if the fault addresses don't match, not persistent
   2725 		 */
   2726 		if (t_afar != ecc->flt_addr) {
   2727 			return (persistent);
   2728 		}
   2729 
   2730 		/*
   2731 		 * check for UE persistence
   2732 		 * since all DIMMs in the bank are identified for a UE,
   2733 		 * there's no reason to check the syndrome
   2734 		 */
   2735 		if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) {
   2736 			persistent = 1;
   2737 		}
   2738 
   2739 		/*
   2740 		 * check for CE persistence
   2741 		 */
   2742 		if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) {
   2743 			if ((udb & P_DER_E_SYND) ==
   2744 			    (ecc->flt_synd & P_DER_E_SYND)) {
   2745 				persistent = 1;
   2746 			}
   2747 		}
   2748 	}
   2749 	return (persistent);
   2750 }
   2751 
   2752 #ifdef HUMMINGBIRD
   2753 #define	HB_FULL_DIV		1
   2754 #define	HB_HALF_DIV		2
   2755 #define	HB_LOWEST_DIV		8
   2756 #define	HB_ECLK_INVALID		0xdeadbad
   2757 static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = {
   2758 	HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID,
   2759 	HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID,
   2760 	HB_ECLK_8 };
   2761 
   2762 #define	HB_SLOW_DOWN		0
   2763 #define	HB_SPEED_UP		1
   2764 
   2765 #define	SET_ESTAR_MODE(mode)					\
   2766 	stdphysio(HB_ESTAR_MODE, (mode));			\
   2767 	/*							\
   2768 	 * PLL logic requires minimum of 16 clock		\
   2769 	 * cycles to lock to the new clock speed.		\
   2770 	 * Wait 1 usec to satisfy this requirement.		\
   2771 	 */							\
   2772 	drv_usecwait(1);
   2773 
   2774 #define	CHANGE_REFRESH_COUNT(direction, cur_div, new_div)	\
   2775 {								\
   2776 	volatile uint64_t data;					\
   2777 	uint64_t count, new_count;				\
   2778 	clock_t delay;						\
   2779 	data = lddphysio(HB_MEM_CNTRL0);			\
   2780 	count = (data & HB_REFRESH_COUNT_MASK) >> 		\
   2781 	    HB_REFRESH_COUNT_SHIFT;				\
   2782 	new_count = (HB_REFRESH_INTERVAL *			\
   2783 	    cpunodes[CPU->cpu_id].clock_freq) /			\
   2784 	    (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\
   2785 	data = (data & ~HB_REFRESH_COUNT_MASK) |		\
   2786 	    (new_count << HB_REFRESH_COUNT_SHIFT);		\
   2787 	stdphysio(HB_MEM_CNTRL0, data);				\
   2788 	data = lddphysio(HB_MEM_CNTRL0);        		\
   2789 	/*							\
   2790 	 * If we are slowing down the cpu and Memory		\
   2791 	 * Self Refresh is not enabled, it is required		\
   2792 	 * to wait for old refresh count to count-down and	\
   2793 	 * new refresh count to go into effect (let new value	\
   2794 	 * counts down once).					\
   2795 	 */							\
   2796 	if ((direction) == HB_SLOW_DOWN &&			\
   2797 	    (data & HB_SELF_REFRESH_MASK) == 0) {		\
   2798 		/*						\
   2799 		 * Each count takes 64 cpu clock cycles		\
   2800 		 * to decrement.  Wait for current refresh	\
   2801 		 * count plus new refresh count at current	\
   2802 		 * cpu speed to count down to zero.  Round	\
   2803 		 * up the delay time.				\
   2804 		 */						\
   2805 		delay = ((HB_REFRESH_CLOCKS_PER_COUNT *		\
   2806 		    (count + new_count) * MICROSEC * (cur_div)) /\
   2807 		    cpunodes[CPU->cpu_id].clock_freq) + 1;	\
   2808 		drv_usecwait(delay);				\
   2809 	}							\
   2810 }
   2811 
   2812 #define	SET_SELF_REFRESH(bit)					\
   2813 {								\
   2814 	volatile uint64_t data;					\
   2815 	data = lddphysio(HB_MEM_CNTRL0);			\
   2816 	data = (data & ~HB_SELF_REFRESH_MASK) |			\
   2817 	    ((bit) << HB_SELF_REFRESH_SHIFT);			\
   2818 	stdphysio(HB_MEM_CNTRL0, data);				\
   2819 	data = lddphysio(HB_MEM_CNTRL0);			\
   2820 }
   2821 #endif	/* HUMMINGBIRD */
   2822 
   2823 /* ARGSUSED */
   2824 void
   2825 cpu_change_speed(uint64_t new_divisor, uint64_t arg2)
   2826 {
   2827 #ifdef HUMMINGBIRD
   2828 	uint64_t cur_mask, cur_divisor = 0;
   2829 	volatile uint64_t reg;
   2830 	processor_info_t *pi = &(CPU->cpu_type_info);
   2831 	int index;
   2832 
   2833 	if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) ||
   2834 	    (hb_eclk[new_divisor] == HB_ECLK_INVALID)) {
   2835 		cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx",
   2836 		    new_divisor);
   2837 		return;
   2838 	}
   2839 
   2840 	reg = lddphysio(HB_ESTAR_MODE);
   2841 	cur_mask = reg & HB_ECLK_MASK;
   2842 	for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) {
   2843 		if (hb_eclk[index] == cur_mask) {
   2844 			cur_divisor = index;
   2845 			break;
   2846 		}
   2847 	}
   2848 
   2849 	if (cur_divisor == 0)
   2850 		cmn_err(CE_PANIC, "cpu_change_speed: current divisor "
   2851 		    "can't be determined!");
   2852 
   2853 	/*
   2854 	 * If we are already at the requested divisor speed, just
   2855 	 * return.
   2856 	 */
   2857 	if (cur_divisor == new_divisor)
   2858 		return;
   2859 
   2860 	if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) {
   2861 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
   2862 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
   2863 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
   2864 
   2865 	} else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
   2866 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
   2867 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
   2868 		/* LINTED: E_FALSE_LOGICAL_EXPR */
   2869 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
   2870 
   2871 	} else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) {
   2872 		/*
   2873 		 * Transition to 1/2 speed first, then to
   2874 		 * lower speed.
   2875 		 */
   2876 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV);
   2877 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
   2878 		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);
   2879 
   2880 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor);
   2881 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
   2882 
   2883 	} else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
   2884 		/*
   2885 		 * Transition to 1/2 speed first, then to
   2886 		 * full speed.
   2887 		 */
   2888 		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
   2889 		/* LINTED: E_FALSE_LOGICAL_EXPR */
   2890 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV);
   2891 
   2892 		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
   2893 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
   2894 		/* LINTED: E_FALSE_LOGICAL_EXPR */
   2895 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor);
   2896 
   2897 	} else if (cur_divisor < new_divisor) {
   2898 		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
   2899 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
   2900 
   2901 	} else if (cur_divisor > new_divisor) {
   2902 		SET_ESTAR_MODE(hb_eclk[new_divisor]);
   2903 		/* LINTED: E_FALSE_LOGICAL_EXPR */
   2904 		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
   2905 	}
   2906 	CPU->cpu_m.divisor = (uchar_t)new_divisor;
   2907 	cpu_set_curr_clock(((uint64_t)pi->pi_clock * 1000000) / new_divisor);
   2908 #endif
   2909 }
   2910 
   2911 /*
   2912  * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird,
   2913  * we clear all the sticky bits. If a non-null pointer to a async fault
   2914  * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs)
   2915  * info will be returned in the structure.  If a non-null pointer to a
   2916  * uint64_t is passed in, this will be updated if the CP bit is set in the
   2917  * AFSR.  The afsr will be returned.
   2918  */
   2919 static uint64_t
   2920 clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr)
   2921 {
   2922 	struct async_flt *aflt = (struct async_flt *)spf_flt;
   2923 	uint64_t afsr;
   2924 	uint64_t udbh, udbl;
   2925 
   2926 	get_asyncflt(&afsr);
   2927 
   2928 	if ((acc_afsr != NULL) && (afsr & P_AFSR_CP))
   2929 		*acc_afsr |= afsr;
   2930 
   2931 	if (spf_flt != NULL) {
   2932 		aflt->flt_stat = afsr;
   2933 		get_asyncaddr(&aflt->flt_addr);
   2934 		aflt->flt_addr &= SABRE_AFAR_PA;
   2935 
   2936 		get_udb_errors(&udbh, &udbl);
   2937 		spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF);
   2938 		spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF);
   2939 	}
   2940 
   2941 	set_asyncflt(afsr);		/* clear afsr */
   2942 	clr_datapath();			/* clear udbs */
   2943 	return (afsr);
   2944 }
   2945 
   2946 /*
   2947  * Scan the ecache to look for bad lines.  If found, the afsr, afar, e$ data
   2948  * tag of the first bad line will be returned. We also return the old-afsr
   2949  * (before clearing the sticky bits). The linecnt data will be updated to
   2950  * indicate the number of bad lines detected.
   2951  */
   2952 static void
   2953 scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data,
   2954 	uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr)
   2955 {
   2956 	ec_data_t t_ecdata[8];
   2957 	uint64_t t_etag, oafsr;
   2958 	uint64_t pa = AFLT_INV_ADDR;
   2959 	uint32_t i, j, ecache_sz;
   2960 	uint64_t acc_afsr = 0;
   2961 	uint64_t *cpu_afsr = NULL;
   2962 
   2963 	if (CPU_PRIVATE(CPU) != NULL)
   2964 		cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
   2965 
   2966 	*linecnt = 0;
   2967 	ecache_sz = cpunodes[CPU->cpu_id].ecache_size;
   2968 
   2969 	for (i = 0; i < ecache_sz; i += 64) {
   2970 		get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr,
   2971 		    cpu_afsr);
   2972 		acc_afsr |= oafsr;
   2973 
   2974 		/*
   2975 		 * Scan through the whole 64 bytes line in 8 8-byte chunks
   2976 		 * looking for the first occurrence of an EDP error.  The AFSR
   2977 		 * info is captured for each 8-byte chunk.  Note that for
   2978 		 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in
   2979 		 * 16-byte chunk granularity (i.e. the AFSR will be the same
   2980 		 * for the high and low 8-byte words within the 16-byte chunk).
   2981 		 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte
   2982 		 * granularity and only PSYND bits [7:0] are used.
   2983 		 */
   2984 		for (j = 0; j < 8; j++) {
   2985 			ec_data_t *ecdptr = &t_ecdata[j];
   2986 
   2987 			if (ecdptr->ec_afsr & P_AFSR_EDP) {
   2988 				uint64_t errpa;
   2989 				ushort_t psynd;
   2990 				uint32_t ec_set_size = ecache_sz /
   2991 				    ecache_associativity;
   2992 
   2993 				/*
   2994 				 * For Spitfire/Blackbird, we need to look at
   2995 				 * the PSYND to make sure that this 8-byte chunk
   2996 				 * is the right one.  PSYND bits [15:8] belong
   2997 				 * to the upper 8-byte (even) chunk.  Bits
   2998 				 * [7:0] belong to the lower 8-byte chunk (odd).
   2999 				 */
   3000 				psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
   3001 				if (!isus2i && !isus2e) {
   3002 					if (j & 0x1)
   3003 						psynd = psynd & 0xFF;
   3004 					else
   3005 						psynd = psynd >> 8;
   3006 
   3007 					if (!psynd)
   3008 						continue; /* wrong chunk */
   3009 				}
   3010 
   3011 				/* Construct the PA */
   3012 				errpa = ((t_etag & cpu_ec_tag_mask) <<
   3013 				    cpu_ec_tag_shift) | ((i | (j << 3)) %
   3014 				    ec_set_size);
   3015 
   3016 				/* clean up the cache line */
   3017 				flushecacheline(P2ALIGN(errpa, 64),
   3018 				    cpunodes[CPU->cpu_id].ecache_size);
   3019 
   3020 				oafsr = clear_errors(NULL, cpu_afsr);
   3021 				acc_afsr |= oafsr;
   3022 
   3023 				(*linecnt)++;
   3024 
   3025 				/*
   3026 				 * Capture the PA for the first bad line found.
   3027 				 * Return the ecache dump and tag info.
   3028 				 */
   3029 				if (pa == AFLT_INV_ADDR) {
   3030 					int k;
   3031 
   3032 					pa = errpa;
   3033 					for (k = 0; k < 8; k++)
   3034 						ecache_data[k] = t_ecdata[k];
   3035 					*ecache_tag = t_etag;
   3036 				}
   3037 				break;
   3038 			}
   3039 		}
   3040 	}
   3041 	*t_afar = pa;
   3042 	*t_afsr = acc_afsr;
   3043 }
   3044 
   3045 static void
   3046 cpu_log_ecmem_info(spitf_async_flt *spf_flt)
   3047 {
   3048 	struct async_flt *aflt = (struct async_flt *)spf_flt;
   3049 	uint64_t ecache_tag = spf_flt->flt_ec_tag;
   3050 	char linestr[30];
   3051 	char *state_str;
   3052 	int i;
   3053 
   3054 	/*
   3055 	 * Check the ecache tag to make sure it
   3056 	 * is valid. If invalid, a memory dump was
   3057 	 * captured instead of a ecache dump.
   3058 	 */
   3059 	if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) {
   3060 		uchar_t eparity = (uchar_t)
   3061 		    ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift);
   3062 
   3063 		uchar_t estate = (uchar_t)
   3064 		    ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift);
   3065 
   3066 		if (estate == cpu_ec_state_shr)
   3067 			state_str = "Shared";
   3068 		else if (estate == cpu_ec_state_exl)
   3069 			state_str = "Exclusive";
   3070 		else if (estate == cpu_ec_state_own)
   3071 			state_str = "Owner";
   3072 		else if (estate == cpu_ec_state_mod)
   3073 			state_str = "Modified";
   3074 		else
   3075 			state_str = "Invalid";
   3076 
   3077 		if (spf_flt->flt_ec_lcnt > 1) {
   3078 			(void) snprintf(linestr, sizeof (linestr),
   3079 			    "Badlines found=%d", spf_flt->flt_ec_lcnt);
   3080 		} else {
   3081 			linestr[0] = '\0';
   3082 		}
   3083 
   3084 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
   3085 		    " PA=0x%08x.%08x\n    E$tag 0x%08x.%08x E$State: %s "
   3086 		    "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32),
   3087 		    (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32),
   3088 		    (uint32_t)ecache_tag, state_str,
   3089 		    (uint32_t)eparity, linestr);
   3090 	} else {
   3091 		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
   3092 		    " E$tag != PA from AFAR; E$line was victimized"
   3093 		    "\n    dumping memory from PA 0x%08x.%08x instead",
   3094 		    (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32),
   3095 		    (uint32_t)P2ALIGN(aflt->flt_addr, 64));
   3096 	}
   3097 
   3098 	/*
   3099 	 * Dump out all 8 8-byte ecache data captured
   3100 	 * For each 8-byte data captured, we check the
   3101 	 * captured afsr's parity syndrome to find out
   3102 	 * which 8-byte chunk is bad. For memory dump, the
   3103 	 * AFSR values were initialized to 0.
   3104 	 */
   3105 	for (i = 0; i < 8; i++) {
   3106 		ec_data_t *ecdptr;
   3107 		uint_t offset;
   3108 		ushort_t psynd;
   3109 		ushort_t bad;
   3110 		uint64_t edp;
   3111 
   3112 		offset = i << 3;	/* multiply by 8 */
   3113 		ecdptr = &spf_flt->flt_ec_data[i];
   3114 		psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
   3115 		edp = ecdptr->ec_afsr & P_AFSR_EDP;
   3116 
   3117 		/*
   3118 		 * For Sabre/Hummingbird, parity synd is captured only
   3119 		 * in [7:0] of AFSR.PSYND for each 8-byte chunk.
   3120 		 * For spitfire/blackbird, AFSR.PSYND is captured
   3121 		 * in 16-byte granularity. [15:8] represent
   3122 		 * the upper 8 byte and [7:0] the lower 8 byte.
   3123 		 */
   3124 		if (isus2i || isus2e || (i & 0x1))
   3125 			bad = (psynd & 0xFF);		/* check bits [7:0] */
   3126 		else
   3127 			bad = (psynd & 0xFF00);		/* check bits [15:8] */
   3128 
   3129 		if (bad && edp) {
   3130 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
   3131 			    " E$Data (0x%02x): 0x%08x.%08x "
   3132 			    "*Bad* PSYND=0x%04x", offset,
   3133 			    (uint32_t)(ecdptr->ec_d8 >> 32),
   3134 			    (uint32_t)ecdptr->ec_d8, psynd);
   3135 		} else {
   3136 			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
   3137 			    " E$Data (0x%02x): 0x%08x.%08x", offset,
   3138 			    (uint32_t)(ecdptr->ec_d8 >> 32),
   3139 			    (uint32_t)ecdptr->ec_d8);
   3140 		}
   3141 	}
   3142 }
   3143 
   3144 /*
   3145  * Common logging function for all cpu async errors.  This function allows the
   3146  * caller to generate a single cmn_err() call that logs the appropriate items
   3147  * from the fault structure, and implements our rules for AFT logging levels.
   3148  *
   3149  *	ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT)
   3150  *	tagnum: 0, 1, 2, .. generate the [AFT#] tag
   3151  *	spflt: pointer to spitfire async fault structure
   3152  *	logflags: bitflags indicating what to output
   3153  *	endstr: a end string to appear at the end of this log
   3154  *	fmt: a format string to appear at the beginning of the log
   3155  *
   3156  * The logflags allows the construction of predetermined output from the spflt
   3157  * structure.  The individual data items always appear in a consistent order.
   3158  * Note that either or both of the spflt structure pointer and logflags may be
   3159  * NULL or zero respectively, indicating that the predetermined output
   3160  * substrings are not requested in this log.  The output looks like this:
   3161  *
   3162  *	[AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU>
   3163  *	<CPU_SPACE><CPU_ERRID>
   3164  *	newline+4spaces<CPU_AFSR><CPU_AFAR>
   3165  *	newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC>
   3166  *	newline+4spaces<CPU_UDBH><CPU_UDBL>
   3167  *	newline+4spaces<CPU_SYND>
   3168  *	newline+4spaces<endstr>
   3169  *
   3170  * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>;
   3171  * it is assumed that <endstr> will be the unum string in this case.  The size
   3172  * of our intermediate formatting buf[] is based on the worst case of all flags
   3173  * being enabled.  We pass the caller's varargs directly to vcmn_err() for
   3174  * formatting so we don't need additional stack space to format them here.
   3175  */
   3176 /*PRINTFLIKE6*/
   3177 static void
   3178 cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags,
   3179 	const char *endstr, const char *fmt, ...)
   3180 {
   3181 	struct async_flt *aflt = (struct async_flt *)spflt;
   3182 	char buf[400], *p, *q; /* see comments about buf[] size above */
   3183 	va_list ap;
   3184 	int console_log_flag;
   3185 
   3186 	if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) &&
   3187 	    (aflt->flt_stat & P_AFSR_LEVEL1)) ||
   3188 	    (aflt->flt_panic)) {
   3189 		console_log_flag = (tagnum < 2) || aft_verbose;
   3190 	} else {
   3191 		int verbose = ((aflt->flt_class == BUS_FAULT) ||
   3192 		    (aflt->flt_stat & P_AFSR_CE)) ?
   3193 		    ce_verbose_memory : ce_verbose_other;
   3194 
   3195 		if (!verbose)
   3196 			return;
   3197 
   3198 		console_log_flag = (verbose > 1);
   3199 	}
   3200 
   3201 	if (console_log_flag)
   3202 		(void) sprintf(buf, "[AFT%d]", tagnum);
   3203 	else
   3204 		(void) sprintf(buf, "![AFT%d]", tagnum);
   3205 
   3206 	p = buf + strlen(buf);	/* current buffer position */
   3207 	q = buf + sizeof (buf);	/* pointer past end of buffer */
   3208 
   3209 	if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) {
   3210 		(void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x",
   3211 		    (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id);
   3212 		p += strlen(p);
   3213 	}
   3214 
   3215 	/*
   3216 	 * Copy the caller's format string verbatim into buf[].  It will be
   3217 	 * formatted by the call to vcmn_err() at the end of this function.
   3218 	 */
   3219 	if (fmt != NULL && p < q) {
   3220 		(void) strncpy(p, fmt, (size_t)(q - p - 1));
   3221 		buf[sizeof (buf) - 1] = '\0';
   3222 		p += strlen(p);
   3223 	}
   3224 
   3225 	if (spflt != NULL) {
   3226 		if (logflags & CPU_FLTCPU) {
   3227 			(void) snprintf(p, (size_t)(q - p), " CPU%d",
   3228 			    aflt->flt_inst);
   3229 			p += strlen(p);
   3230 		}
   3231 
   3232 		if (logflags & CPU_SPACE) {
   3233 			if (aflt->flt_status & ECC_D_TRAP)
   3234 				(void) snprintf(p, (size_t)(q - p),
   3235 				    " Data access");
   3236 			else if (aflt->flt_status & ECC_I_TRAP)
   3237 				(void) snprintf(p, (size_t)(q - p),
   3238 				    " Instruction access");
   3239 			p += strlen(p);
   3240 		}
   3241 
   3242 		if (logflags & CPU_TL) {
   3243 			(void) snprintf(p, (size_t)(q - p), " at TL%s",
   3244 			    aflt->flt_tl ? ">0" : "=0");
   3245 			p += strlen(p);
   3246 		}
   3247 
   3248 		if (logflags & CPU_ERRID) {
   3249 			(void) snprintf(p, (size_t)(q - p),
   3250 			    ", errID 0x%08x.%08x",
   3251 			    (uint32_t)(aflt->flt_id >> 32),
   3252 			    (uint32_t)aflt->flt_id);
   3253 			p += strlen(p);
   3254 		}
   3255 
   3256 		if (logflags & CPU_AFSR) {
   3257 			(void) snprintf(p, (size_t)(q - p),
   3258 			    "\n    AFSR 0x%08b.%08b",
   3259 			    (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0,
   3260 			    (uint32_t)aflt->flt_stat, AFSR_FMTSTR1);
   3261 			p += strlen(p);
   3262 		}
   3263 
   3264 		if (logflags & CPU_AFAR) {
   3265 			(void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x",
   3266 			    (uint32_t)(aflt->flt_addr >> 32),
   3267 			    (uint32_t)aflt->flt_addr);
   3268 			p += strlen(p);
   3269 		}
   3270 
   3271 		if (logflags & CPU_AF_PSYND) {
   3272 			ushort_t psynd = (ushort_t)
   3273 			    (aflt->flt_stat & P_AFSR_P_SYND);
   3274 
   3275 			(void) snprintf(p, (size_t)(q - p),
   3276 			    "\n    AFSR.PSYND 0x%04x(Score %02d)",
   3277 			    psynd, ecc_psynd_score(psynd));
   3278 			p += strlen(p);
   3279 		}
   3280 
   3281 		if (logflags & CPU_AF_ETS) {
   3282 			(void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x",
   3283 			    (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16));
   3284 			p += strlen(p);
   3285 		}
   3286 
   3287 		if (logflags & CPU_FAULTPC) {
   3288 			(void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p",
   3289 			    (void *)aflt->flt_pc);
   3290 			p += strlen(p);
   3291 		}
   3292 
   3293 		if (logflags & CPU_UDBH) {
   3294 			(void) snprintf(p, (size_t)(q - p),
   3295 			    "\n    UDBH 0x%04b UDBH.ESYND 0x%02x",
   3296 			    spflt->flt_sdbh, UDB_FMTSTR,
   3297 			    spflt->flt_sdbh & 0xFF);
   3298 			p += strlen(p);
   3299 		}
   3300 
   3301 		if (logflags & CPU_UDBL) {
   3302 			(void) snprintf(p, (size_t)(q - p),
   3303 			    " UDBL 0x%04b UDBL.ESYND 0x%02x",
   3304 			    spflt->flt_sdbl, UDB_FMTSTR,
   3305 			    spflt->flt_sdbl & 0xFF);
   3306 			p += strlen(p);
   3307 		}
   3308 
   3309 		if (logflags & CPU_SYND) {
   3310 			ushort_t synd = SYND(aflt->flt_synd);
   3311 
   3312 			(void) snprintf(p, (size_t)(q - p),
   3313 			    "\n    %s Syndrome 0x%x Memory Module ",
   3314 			    UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd);
   3315 			p += strlen(p);
   3316 		}
   3317 	}
   3318 
   3319 	if (endstr != NULL) {
   3320 		if (!(logflags & CPU_SYND))
   3321 			(void) snprintf(p, (size_t)(q - p), "\n    %s", endstr);
   3322 		else
   3323 			(void) snprintf(p, (size_t)(q - p), "%s", endstr);
   3324 		p += strlen(p);
   3325 	}
   3326 
   3327 	if (ce_code == CE_CONT && (p < q - 1))
   3328 		(void) strcpy(p, "\n"); /* add final \n if needed */
   3329 
   3330 	va_start(ap, fmt);
   3331 	vcmn_err(ce_code, buf, ap);
   3332 	va_end(ap);
   3333 }
   3334 
   3335 /*
   3336  * Ecache Scrubbing
   3337  *
   3338  * The basic idea is to prevent lines from sitting in the ecache long enough
   3339  * to build up soft errors which can lead to ecache parity errors.
   3340  *
   3341  * The following rules are observed when flushing the ecache:
   3342  *
   3343  * 1. When the system is busy, flush bad clean lines
   3344  * 2. When the system is idle, flush all clean lines
   3345  * 3. When the system is idle, flush good dirty lines
   3346  * 4. Never flush bad dirty lines.
   3347  *
   3348  *	modify	parity	busy   idle
   3349  *	----------------------------
   3350  *	clean	good		X
   3351  * 	clean	bad	X	X
   3352  * 	dirty	good		X
   3353  *	dirty	bad
   3354  *
   3355  * Bad or good refers to whether a line has an E$ parity error or not.
   3356  * Clean or dirty refers to the state of the modified bit.  We currently
   3357  * default the scan rate to 100 (scan 10% of the cache per second).
   3358  *
   3359  * The following are E$ states and actions.
   3360  *
   3361  * We encode our state as a 3-bit number, consisting of:
   3362  *	ECACHE_STATE_MODIFIED	(0=clean, 1=dirty)
   3363  *	ECACHE_STATE_PARITY	(0=good,  1=bad)
   3364  *	ECACHE_STATE_BUSY	(0=idle,  1=busy)
   3365  *
   3366  * We associate a flushing and a logging action with each state.
   3367  *
   3368  * E$ actions are different for Spitfire and Sabre/Hummingbird modules.
   3369  * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored
   3370  * E$ only, in addition to value being set by ec_flush.
   3371  */
   3372 
   3373 #define	ALWAYS_FLUSH		0x1	/* flush E$ line on all E$ types */
   3374 #define	NEVER_FLUSH		0x0	/* never the flush the E$ line */
   3375 #define	MIRROR_FLUSH		0xF	/* flush E$ line on mirrored E$ only */
   3376 
   3377 struct {
   3378 	char	ec_flush;		/* whether to flush or not */
   3379 	char	ec_log;			/* ecache logging */
   3380 	char	ec_log_type;		/* log type info */
   3381 } ec_action[] = {	/* states of the E$ line in M P B */
   3382 	{ ALWAYS_FLUSH, 0, 0 },			 /* 0 0 0 clean_good_idle */
   3383 	{ MIRROR_FLUSH, 0, 0 },			 /* 0 0 1 clean_good_busy */
   3384 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */
   3385 	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */
   3386 	{ ALWAYS_FLUSH, 0, 0 },			 /* 1 0 0 dirty_good_idle */
   3387 	{ MIRROR_FLUSH, 0, 0 },			 /* 1 0 1 dirty_good_busy */
   3388 	{ NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR },	 /* 1 1 0 dirty_bad_idle */
   3389 	{ NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR }	 /* 1 1 1 dirty_bad_busy */
   3390 };
   3391 
   3392 /*
   3393  * Offsets into the ec_action[] that determines clean_good_busy and
   3394  * dirty_good_busy lines.
   3395  */
   3396 #define	ECACHE_CGB_LINE		1	/* E$ clean_good_busy line */
   3397 #define	ECACHE_DGB_LINE		5	/* E$ dirty_good_busy line */
   3398 
   3399 /*
   3400  * We are flushing lines which are Clean_Good_Busy and also the lines
   3401  * Dirty_Good_Busy. And we only follow it for non-mirrored E$.
   3402  */
   3403 #define	CGB(x, m)	(((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR))
   3404 #define	DGB(x, m)	(((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR))
   3405 
   3406 #define	ECACHE_STATE_MODIFIED	0x4
   3407 #define	ECACHE_STATE_PARITY	0x2
   3408 #define	ECACHE_STATE_BUSY	0x1
   3409 
   3410 /*
   3411  * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced.
   3412  */
   3413 int ecache_calls_a_sec_mirrored = 1;
   3414 int ecache_lines_per_call_mirrored = 1;
   3415 
   3416 int ecache_scrub_enable = 1;	/* ecache scrubbing is on by default */
   3417 int ecache_scrub_verbose = 1;		/* prints clean and dirty lines */
   3418 int ecache_scrub_panic = 0;		/* panics on a clean and dirty line */
   3419 int ecache_calls_a_sec = 100;		/* scrubber calls per sec */
   3420 int ecache_scan_rate = 100;		/* scan rate (in tenths of a percent) */
   3421 int ecache_idle_factor = 1;		/* increase the scan rate when idle */
   3422 int ecache_flush_clean_good_busy = 50;	/* flush rate (in percent) */
   3423 int ecache_flush_dirty_good_busy = 100;	/* flush rate (in percent) */
   3424 
   3425 volatile int ec_timeout_calls = 1;	/* timeout calls */
   3426 
   3427 /*
   3428  * Interrupt number and pil for ecache scrubber cross-trap calls.
   3429  */
   3430 static uint64_t ecache_scrub_inum;
   3431 uint_t ecache_scrub_pil = PIL_9;
   3432 
   3433 /*
   3434  * Kstats for the E$ scrubber.
   3435  */
   3436 typedef struct ecache_kstat {
   3437 	kstat_named_t clean_good_idle;		/* # of lines scrubbed */
   3438 	kstat_named_t clean_good_busy;		/* # of lines skipped */
   3439 	kstat_named_t clean_bad_idle;		/* # of lines scrubbed */
   3440 	kstat_named_t clean_bad_busy;		/* # of lines scrubbed */
   3441 	kstat_named_t dirty_good_idle;		/* # of lines scrubbed */
   3442 	kstat_named_t dirty_good_busy;		/* # of lines skipped */
   3443 	kstat_named_t dirty_bad_idle;		/* # of lines skipped */
   3444 	kstat_named_t dirty_bad_busy;		/* # of lines skipped */
   3445 	kstat_named_t invalid_lines;		/* # of invalid lines */
   3446 	kstat_named_t clean_good_busy_flush;    /* # of lines scrubbed */
   3447 	kstat_named_t dirty_good_busy_flush;    /* # of lines scrubbed */
   3448 	kstat_named_t tags_cleared;		/* # of E$ tags cleared */
   3449 } ecache_kstat_t;
   3450 
   3451 static ecache_kstat_t ec_kstat_template = {
   3452 	{ "clean_good_idle", KSTAT_DATA_ULONG },
   3453 	{ "clean_good_busy", KSTAT_DATA_ULONG },
   3454 	{ "clean_bad_idle", KSTAT_DATA_ULONG },
   3455 	{ "clean_bad_busy", KSTAT_DATA_ULONG },
   3456 	{ "dirty_good_idle", KSTAT_DATA_ULONG },
   3457 	{ "dirty_good_busy", KSTAT_DATA_ULONG },
   3458 	{ "dirty_bad_idle", KSTAT_DATA_ULONG },
   3459 	{ "dirty_bad_busy", KSTAT_DATA_ULONG },
   3460 	{ "invalid_lines", KSTAT_DATA_ULONG },
   3461 	{ "clean_good_busy_flush", KSTAT_DATA_ULONG },
   3462 	{ "dirty_good_busy_flush", KSTAT_DATA_ULONG },
   3463 	{ "ecache_tags_cleared", KSTAT_DATA_ULONG }
   3464 };
   3465 
   3466 struct kmem_cache *sf_private_cache;
   3467 
   3468 /*
   3469  * Called periodically on each CPU to scan the ecache once a sec.
   3470  * adjusting the ecache line index appropriately
   3471  */
   3472 void
   3473 scrub_ecache_line()
   3474 {
   3475 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
   3476 	int cpuid = CPU->cpu_id;
   3477 	uint32_t index = ssmp->ecache_flush_index;
   3478 	uint64_t ec_size = cpunodes[cpuid].ecache_size;
   3479 	size_t ec_linesize = cpunodes[cpuid].ecache_linesize;
   3480 	int nlines = ssmp->ecache_nlines;
   3481 	uint32_t ec_set_size = ec_size / ecache_associativity;
   3482 	int ec_mirror = ssmp->ecache_mirror;
   3483 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
   3484 
   3485 	int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0;
   3486 	int mpb;		/* encode Modified, Parity, Busy for action */
   3487 	uchar_t state;
   3488 	uint64_t ec_tag, paddr, oafsr, tafsr, nafsr;
   3489 	uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
   3490 	ec_data_t ec_data[8];
   3491 	kstat_named_t *ec_knp;
   3492 
   3493 	switch (ec_mirror) {
   3494 		default:
   3495 		case ECACHE_CPU_NON_MIRROR:
   3496 			/*
   3497 			 * The E$ scan rate is expressed in units of tenths of
   3498 			 * a percent.  ecache_scan_rate = 1000 (100%) means the
   3499 			 * whole cache is scanned every second.
   3500 			 */
   3501 			scan_lines = (nlines * ecache_scan_rate) /
   3502 			    (1000 * ecache_calls_a_sec);
   3503 			if (!(ssmp->ecache_busy)) {
   3504 				if (ecache_idle_factor > 0) {
   3505 					scan_lines *= ecache_idle_factor;
   3506 				}
   3507 			} else {
   3508 				flush_clean_busy = (scan_lines *
   3509 				    ecache_flush_clean_good_busy) / 100;
   3510 				flush_dirty_busy = (scan_lines *
   3511 				    ecache_flush_dirty_good_busy) / 100;
   3512 			}
   3513 
   3514 			ec_timeout_calls = (ecache_calls_a_sec ?
   3515 			    ecache_calls_a_sec : 1);
   3516 			break;
   3517 
   3518 		case ECACHE_CPU_MIRROR:
   3519 			scan_lines = ecache_lines_per_call_mirrored;
   3520 			ec_timeout_calls = (ecache_calls_a_sec_mirrored ?
   3521 			    ecache_calls_a_sec_mirrored : 1);
   3522 			break;
   3523 	}
   3524 
   3525 	/*
   3526 	 * The ecache scrubber algorithm operates by reading and
   3527 	 * decoding the E$ tag to determine whether the corresponding E$ line
   3528 	 * can be scrubbed. There is a implicit assumption in the scrubber
   3529 	 * logic that the E$ tag is valid. Unfortunately, this assertion is
   3530 	 * flawed since the E$ tag may also be corrupted and have parity errors
   3531 	 * The scrubber logic is enhanced to check the validity of the E$ tag
   3532 	 * before scrubbing. When a parity error is detected in the E$ tag,
   3533 	 * it is possible to recover and scrub the tag under certain conditions
   3534 	 * so that a ETP error condition can be avoided.
   3535 	 */
   3536 
   3537 	for (mpb = line = 0; line < scan_lines; line++, mpb = 0) {
   3538 		/*
   3539 		 * We get the old-AFSR before clearing the AFSR sticky bits
   3540 		 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag}
   3541 		 * If CP bit is set in the old-AFSR, we log an Orphan CP event.
   3542 		 */
   3543 		ec_tag = get_ecache_tag(index, &nafsr, acc_afsr);
   3544 		state = (uchar_t)((ec_tag & cpu_ec_state_mask) >>
   3545 		    cpu_ec_state_shift);
   3546 
   3547 		/*
   3548 		 * ETP is set try to scrub the ecache tag.
   3549 		 */
   3550 		if (nafsr & P_AFSR_ETP) {
   3551 			ecache_scrub_tag_err(nafsr, state, index);
   3552 		} else if (state & cpu_ec_state_valid) {
   3553 			/*
   3554 			 * ETP is not set, E$ tag is valid.
   3555 			 * Proceed with the E$ scrubbing.
   3556 			 */
   3557 			if (state & cpu_ec_state_dirty)
   3558 				mpb |= ECACHE_STATE_MODIFIED;
   3559 
   3560 			tafsr = check_ecache_line(index, acc_afsr);
   3561 
   3562 			if (tafsr & P_AFSR_EDP) {
   3563 				mpb |= ECACHE_STATE_PARITY;
   3564 
   3565 				if (ecache_scrub_verbose ||
   3566 				    ecache_scrub_panic) {
   3567 					get_ecache_dtag(P2ALIGN(index, 64),
   3568 					    (uint64_t *)&ec_data[0],
   3569 					    &ec_tag, &oafsr, acc_afsr);
   3570 				}
   3571 			}
   3572 
   3573 			if (ssmp->ecache_busy)
   3574 				mpb |= ECACHE_STATE_BUSY;
   3575 
   3576 			ec_knp = (kstat_named_t *)ec_ksp + mpb;
   3577 			ec_knp->value.ul++;
   3578 
   3579 			paddr = ((ec_tag & cpu_ec_tag_mask) <<
   3580 			    cpu_ec_tag_shift) | (index % ec_set_size);
   3581 
   3582 			/*
   3583 			 * We flush the E$ lines depending on the ec_flush,
   3584 			 * we additionally flush clean_good_busy and
   3585 			 * dirty_good_busy lines for mirrored E$.
   3586 			 */
   3587 			if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) {
   3588 				flushecacheline(paddr, ec_size);
   3589 			} else if ((ec_mirror == ECACHE_CPU_MIRROR) &&
   3590 			    (ec_action[mpb].ec_flush == MIRROR_FLUSH)) {
   3591 				flushecacheline(paddr, ec_size);
   3592 			} else if (ec_action[mpb].ec_flush == NEVER_FLUSH) {
   3593 				softcall(ecache_page_retire, (void *)paddr);
   3594 			}
   3595 
   3596 			/*
   3597 			 * Conditionally flush both the clean_good and
   3598 			 * dirty_good lines when busy.
   3599 			 */
   3600 			if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) {
   3601 				flush_clean_busy--;
   3602 				flushecacheline(paddr, ec_size);
   3603 				ec_ksp->clean_good_busy_flush.value.ul++;
   3604 			} else if (DGB(mpb, ec_mirror) &&
   3605 			    (flush_dirty_busy > 0)) {
   3606 				flush_dirty_busy--;
   3607 				flushecacheline(paddr, ec_size);
   3608 				ec_ksp->dirty_good_busy_flush.value.ul++;
   3609 			}
   3610 
   3611 			if (ec_action[mpb].ec_log && (ecache_scrub_verbose ||
   3612 			    ecache_scrub_panic)) {
   3613 				ecache_scrub_log(ec_data, ec_tag, paddr, mpb,
   3614 				    tafsr);
   3615 			}
   3616 
   3617 		} else {
   3618 			ec_ksp->invalid_lines.value.ul++;
   3619 		}
   3620 
   3621 		if ((index += ec_linesize) >= ec_size)
   3622 			index = 0;
   3623 
   3624 	}
   3625 
   3626 	/*
   3627 	 * set the ecache scrub index for the next time around
   3628 	 */
   3629 	ssmp->ecache_flush_index = index;
   3630 
   3631 	if (*acc_afsr & P_AFSR_CP) {
   3632 		uint64_t ret_afsr;
   3633 
   3634 		ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr);
   3635 		if ((ret_afsr & P_AFSR_CP) == 0)
   3636 			*acc_afsr = 0;
   3637 	}
   3638 }
   3639 
   3640 /*
   3641  * Handler for ecache_scrub_inum softint.  Call scrub_ecache_line until
   3642  * we decrement the outstanding request count to zero.
   3643  */
   3644 
   3645 /*ARGSUSED*/
   3646 uint_t
   3647 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
   3648 {
   3649 	int i;
   3650 	int outstanding;
   3651 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
   3652 	uint32_t *countp = &ssmp->ec_scrub_outstanding;
   3653 
   3654 	do {
   3655 		outstanding = *countp;
   3656 		ASSERT(outstanding > 0);
   3657 		for (i = 0; i < outstanding; i++)
   3658 			scrub_ecache_line();
   3659 	} while (atomic_add_32_nv(countp, -outstanding));
   3660 
   3661 	return (DDI_INTR_CLAIMED);
   3662 }
   3663 
   3664 /*
   3665  * force each cpu to perform an ecache scrub, called from a timeout
   3666  */
   3667 extern xcfunc_t ecache_scrubreq_tl1;
   3668 
   3669 void
   3670 do_scrub_ecache_line(void)
   3671 {
   3672 	long delta;
   3673 
   3674 	if (ecache_calls_a_sec > hz)
   3675 		ecache_calls_a_sec = hz;
   3676 	else if (ecache_calls_a_sec <= 0)
   3677 		ecache_calls_a_sec = 1;
   3678 
   3679 	if (ecache_calls_a_sec_mirrored > hz)
   3680 		ecache_calls_a_sec_mirrored = hz;
   3681 	else if (ecache_calls_a_sec_mirrored <= 0)
   3682 		ecache_calls_a_sec_mirrored = 1;
   3683 
   3684 	if (ecache_scrub_enable) {
   3685 		xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0);
   3686 		delta = hz / ec_timeout_calls;
   3687 	} else {
   3688 		delta = hz;
   3689 	}
   3690 
   3691 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
   3692 	    delta);
   3693 }
   3694 
   3695 /*
   3696  * initialization for ecache scrubbing
   3697  * This routine is called AFTER all cpus have had cpu_init_private called
   3698  * to initialize their private data areas.
   3699  */
   3700 void
   3701 cpu_init_cache_scrub(void)
   3702 {
   3703 	if (ecache_calls_a_sec > hz) {
   3704 		cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); "
   3705 		    "resetting to hz (%d)", ecache_calls_a_sec, hz);
   3706 		ecache_calls_a_sec = hz;
   3707 	}
   3708 
   3709 	/*
   3710 	 * Register softint for ecache scrubbing.
   3711 	 */
   3712 	ecache_scrub_inum = add_softintr(ecache_scrub_pil,
   3713 	    scrub_ecache_line_intr, NULL, SOFTINT_MT);
   3714 
   3715 	/*
   3716 	 * kick off the scrubbing using realtime timeout
   3717 	 */
   3718 	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
   3719 	    hz / ecache_calls_a_sec);
   3720 }
   3721 
   3722 /*
   3723  * Unset the busy flag for this cpu.
   3724  */
   3725 void
   3726 cpu_idle_ecache_scrub(struct cpu *cp)
   3727 {
   3728 	if (CPU_PRIVATE(cp) != NULL) {
   3729 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
   3730 		    sfpr_scrub_misc);
   3731 		ssmp->ecache_busy = ECACHE_CPU_IDLE;
   3732 	}
   3733 }
   3734 
   3735 /*
   3736  * Set the busy flag for this cpu.
   3737  */
   3738 void
   3739 cpu_busy_ecache_scrub(struct cpu *cp)
   3740 {
   3741 	if (CPU_PRIVATE(cp) != NULL) {
   3742 		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
   3743 		    sfpr_scrub_misc);
   3744 		ssmp->ecache_busy = ECACHE_CPU_BUSY;
   3745 	}
   3746 }
   3747 
   3748 /*
   3749  * initialize the ecache scrubber data structures
   3750  * The global entry point cpu_init_private replaces this entry point.
   3751  *
   3752  */
   3753 static void
   3754 cpu_init_ecache_scrub_dr(struct cpu *cp)
   3755 {
   3756 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
   3757 	int cpuid = cp->cpu_id;
   3758 
   3759 	/*
   3760 	 * intialize bookkeeping for cache scrubbing
   3761 	 */
   3762 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
   3763 
   3764 	ssmp->ecache_flush_index = 0;
   3765 
   3766 	ssmp->ecache_nlines =
   3767 	    cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize;
   3768 
   3769 	/*
   3770 	 * Determine whether we are running on mirrored SRAM
   3771 	 */
   3772 
   3773 	if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR)
   3774 		ssmp->ecache_mirror = ECACHE_CPU_MIRROR;
   3775 	else
   3776 		ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR;
   3777 
   3778 	cpu_busy_ecache_scrub(cp);
   3779 
   3780 	/*
   3781 	 * initialize the kstats
   3782 	 */
   3783 	ecache_kstat_init(cp);
   3784 }
   3785 
   3786 /*
   3787  * uninitialize the ecache scrubber data structures
   3788  * The global entry point cpu_uninit_private replaces this entry point.
   3789  */
   3790 static void
   3791 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
   3792 {
   3793 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
   3794 
   3795 	if (ssmp->ecache_ksp != NULL) {
   3796 		kstat_delete(ssmp->ecache_ksp);
   3797 		ssmp->ecache_ksp = NULL;
   3798 	}
   3799 
   3800 	/*
   3801 	 * un-initialize bookkeeping for cache scrubbing
   3802 	 */
   3803 	bzero(ssmp, sizeof (spitfire_scrub_misc_t));
   3804 
   3805 	cpu_idle_ecache_scrub(cp);
   3806 }
   3807 
   3808 struct kmem_cache *sf_private_cache;
   3809 
   3810 /*
   3811  * Cpu private initialization.  This includes allocating the cpu_private
   3812  * data structure, initializing it, and initializing the scrubber for this
   3813  * cpu.  This is called once for EVERY cpu, including CPU 0. This function
   3814  * calls cpu_init_ecache_scrub_dr to init the scrubber.
   3815  * We use kmem_cache_create for the spitfire private data structure because it
   3816  * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary.
   3817  */
   3818 void
   3819 cpu_init_private(struct cpu *cp)
   3820 {
   3821 	spitfire_private_t *sfprp;
   3822 
   3823 	ASSERT(CPU_PRIVATE(cp) == NULL);
   3824 
   3825 	/*
   3826 	 * If the sf_private_cache has not been created, create it.
   3827 	 */
   3828 	if (sf_private_cache == NULL) {
   3829 		sf_private_cache = kmem_cache_create("sf_private_cache",
   3830 		    sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL,
   3831 		    NULL, NULL, NULL, NULL, 0);
   3832 		ASSERT(sf_private_cache);
   3833 	}
   3834 
   3835 	sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP);
   3836 
   3837 	bzero(sfprp, sizeof (spitfire_private_t));
   3838 
   3839 	cpu_init_ecache_scrub_dr(cp);
   3840 }
   3841 
   3842 /*
   3843  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
   3844  * deallocate the scrubber data structures and cpu_private data structure.
   3845  * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit
   3846  * the scrubber for the specified cpu.
   3847  */
   3848 void
   3849 cpu_uninit_private(struct cpu *cp)
   3850 {
   3851 	ASSERT(CPU_PRIVATE(cp));
   3852 
   3853 	cpu_uninit_ecache_scrub_dr(cp);
   3854 	kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp));
   3855 	CPU_PRIVATE(cp) = NULL;
   3856 }
   3857 
   3858 /*
   3859  * initialize the ecache kstats for each cpu
   3860  */
   3861 static void
   3862 ecache_kstat_init(struct cpu *cp)
   3863 {
   3864 	struct kstat *ksp;
   3865 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
   3866 
   3867 	ASSERT(ssmp != NULL);
   3868 
   3869 	if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc",
   3870 	    KSTAT_TYPE_NAMED,
   3871 	    sizeof (ecache_kstat_t) / sizeof (kstat_named_t),
   3872 	    KSTAT_FLAG_WRITABLE)) == NULL) {
   3873 		ssmp->ecache_ksp = NULL;
   3874 		cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id);
   3875 		return;
   3876 	}
   3877 
   3878 	ssmp->ecache_ksp = ksp;
   3879 	bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t));
   3880 	kstat_install(ksp);
   3881 }
   3882 
   3883 /*
   3884  * log the bad ecache information
   3885  */
   3886 static void
   3887 ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb,
   3888 		uint64_t afsr)
   3889 {
   3890 	spitf_async_flt spf_flt;
   3891 	struct async_flt *aflt;
   3892 	int i;
   3893 	char *class;
   3894 
   3895 	bzero(&spf_flt, sizeof (spitf_async_flt));
   3896 	aflt = &spf_flt.cmn_asyncflt;
   3897 
   3898 	for (i = 0; i < 8; i++) {
   3899 		spf_flt.flt_ec_data[i] = ec_data[i];
   3900 	}
   3901 
   3902 	spf_flt.flt_ec_tag = ec_tag;
   3903 
   3904 	if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) {
   3905 		spf_flt.flt_type = ec_action[mpb].ec_log_type;
   3906 	} else spf_flt.flt_type = (ushort_t)mpb;
   3907 
   3908 	aflt->flt_inst = CPU->cpu_id;
   3909 	aflt->flt_class = CPU_FAULT;
   3910 	aflt->flt_id = gethrtime_waitfree();
   3911 	aflt->flt_addr = paddr;
   3912 	aflt->flt_stat = afsr;
   3913 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
   3914 
   3915 	switch (mpb) {
   3916 	case CPU_ECACHE_TAG_ERR:
   3917 	case CPU_ECACHE_ADDR_PAR_ERR:
   3918 	case CPU_ECACHE_ETP_ETS_ERR:
   3919 	case CPU_ECACHE_STATE_ERR:
   3920 		class = FM_EREPORT_CPU_USII_ESCRUB_TAG;
   3921 		break;
   3922 	default:
   3923 		class = FM_EREPORT_CPU_USII_ESCRUB_DATA;
   3924 		break;
   3925 	}
   3926 
   3927 	cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt),
   3928 	    ue_queue, aflt->flt_panic);
   3929 
   3930 	if (aflt->flt_panic)
   3931 		cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$"
   3932 		    "line detected");
   3933 }
   3934 
   3935 /*
   3936  * Process an ecache error that occured during the E$ scrubbing.
   3937  * We do the ecache scan to find the bad line, flush the bad line
   3938  * and start the memscrubber to find any UE (in memory or in another cache)
   3939  */
   3940 static uint64_t
   3941 ecache_scrub_misc_err(int type, uint64_t afsr)
   3942 {
   3943 	spitf_async_flt spf_flt;
   3944 	struct async_flt *aflt;
   3945 	uint64_t oafsr;
   3946 
   3947 	bzero(&spf_flt, sizeof (spitf_async_flt));
   3948 	aflt = &spf_flt.cmn_asyncflt;
   3949 
   3950 	/*
   3951 	 * Scan each line in the cache to look for the one
   3952 	 * with bad parity
   3953 	 */
   3954 	aflt->flt_addr = AFLT_INV_ADDR;
   3955 	scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
   3956 	    &spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
   3957 
   3958 	if (oafsr & P_AFSR_CP) {
   3959 		uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
   3960 		*cp_afsr |= oafsr;
   3961 	}
   3962 
   3963 	/*
   3964 	 * If we found a bad PA, update the state to indicate if it is
   3965 	 * memory or I/O space.
   3966 	 */
   3967 	if (aflt->flt_addr != AFLT_INV_ADDR) {
   3968 		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
   3969 		    MMU_PAGESHIFT)) ? 1 : 0;
   3970 	}
   3971 
   3972 	spf_flt.flt_type = (ushort_t)type;
   3973 
   3974 	aflt->flt_inst = CPU->cpu_id;
   3975 	aflt->flt_class = CPU_FAULT;
   3976 	aflt->flt_id = gethrtime_waitfree();
   3977 	aflt->flt_status = afsr;
   3978 	aflt->flt_panic = (uchar_t)ecache_scrub_panic;
   3979 
   3980 	/*
   3981 	 * We have the bad line, flush that line and start
   3982 	 * the memscrubber.
   3983 	 */
   3984 	if (spf_flt.flt_ec_lcnt > 0) {
   3985 		flushecacheline(P2ALIGN(aflt->flt_addr, 64),
   3986 		    cpunodes[CPU->cpu_id].ecache_size);
   3987 		read_all_memscrub = 1;
   3988 		memscrub_run();
   3989 	}
   3990 
   3991 	cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ?
   3992 	    FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN,
   3993 	    (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic);
   3994 
   3995 	return (oafsr);
   3996 }
   3997 
   3998 static void
   3999 ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index)
   4000 {
   4001 	ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT;
   4002 	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
   4003 	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
   4004 	uint64_t ec_tag, paddr, oafsr;
   4005 	ec_data_t ec_data[8];
   4006 	int cpuid = CPU->cpu_id;
   4007 	uint32_t ec_set_size = cpunodes[cpuid].ecache_size /
   4008 	    ecache_associativity;
   4009 	uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
   4010 
   4011 	get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag,
   4012 	    &oafsr, cpu_afsr);
   4013 	paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) |
   4014 	    (index % ec_set_size);
   4015 
   4016 	/*
   4017 	 * E$ tag state has good parity
   4018 	 */
   4019 	if ((afsr_ets & cpu_ec_state_parity) == 0) {
   4020 		if (afsr_ets & cpu_ec_parity) {
   4021 			/*
   4022 			 * E$ tag state bits indicate the line is clean,
   4023 			 * invalidate the E$ tag and continue.
   4024 			 */
   4025 			if (!(state & cpu_ec_state_dirty)) {
   4026 				/*
   4027 				 * Zero the tag and mark the state invalid
   4028 				 * with good parity for the tag.
   4029 				 */
   4030 				if (isus2i || isus2e)
   4031 					write_hb_ec_tag_parity(index);
   4032 				else
   4033 					write_ec_tag_parity(index);
   4034 
   4035 				/* Sync with the dual tag */
   4036 				flushecacheline(0,
   4037 				    cpunodes[CPU->cpu_id].ecache_size);
   4038 				ec_ksp->tags_cleared.value.ul++;
   4039 				ecache_scrub_log(ec_data, ec_tag, paddr,
   4040 				    CPU_ECACHE_TAG_ERR, afsr);
   4041 				return;
   4042 			} else {
   4043 				ecache_scrub_log(ec_data, ec_tag, paddr,
   4044 				    CPU_ECACHE_ADDR_PAR_ERR, afsr);
   4045 				cmn_err(CE_PANIC, " E$ tag address has bad"
   4046 				    " parity");
   4047 			}
   4048 		} else if ((afsr_ets & cpu_ec_parity) == 0) {
   4049 			/*
   4050 			 * ETS is zero but ETP is set
   4051 			 */
   4052 			ecache_scrub_log(ec_data, ec_tag, paddr,
   4053 			    CPU_ECACHE_ETP_ETS_ERR, afsr);
   4054 			cmn_err(CE_PANIC, "AFSR.ETP is set and"
   4055 			    " AFSR.ETS is zero");
   4056 		}
   4057 	} else {
   4058 		/*
   4059 		 * E$ tag state bit has a bad parity
   4060 		 */
   4061 		ecache_scrub_log(ec_data, ec_tag, paddr,
   4062 		    CPU_ECACHE_STATE_ERR, afsr);
   4063 		cmn_err(CE_PANIC, "E$ tag state has bad parity");
   4064 	}
   4065 }
   4066 
   4067 static void
   4068 ecache_page_retire(void *arg)
   4069 {
   4070 	uint64_t paddr = (uint64_t)arg;
   4071 	(void) page_retire(paddr, PR_UE);
   4072 }
   4073 
   4074 void
   4075 sticksync_slave(void)
   4076 {}
   4077 
   4078 void
   4079 sticksync_master(void)
   4080 {}
   4081 
   4082 /*ARGSUSED*/
   4083 void
   4084 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp)
   4085 {}
   4086 
   4087 void
   4088 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
   4089 {
   4090 	int status;
   4091 	ddi_fm_error_t de;
   4092 
   4093 	bzero(&de, sizeof (ddi_fm_error_t));
   4094 
   4095 	de.fme_version = DDI_FME_VERSION;
   4096 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
   4097 	    FM_ENA_FMT1);
   4098 	de.fme_flag = expected;
   4099 	de.fme_bus_specific = (void *)aflt->flt_addr;
   4100 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
   4101 
   4102 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
   4103 		aflt->flt_panic = 1;
   4104 }
   4105 
   4106 /*ARGSUSED*/
   4107 void
   4108 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
   4109     errorq_t *eqp, uint_t flag)
   4110 {
   4111 	struct async_flt *aflt = (struct async_flt *)payload;
   4112 
   4113 	aflt->flt_erpt_class = error_class;
   4114 	errorq_dispatch(eqp, payload, payload_sz, flag);
   4115 }
   4116 
   4117 #define	MAX_SIMM	8
   4118 
   4119 struct ce_info {
   4120 	char    name[UNUM_NAMLEN];
   4121 	uint64_t intermittent_total;
   4122 	uint64_t persistent_total;
   4123 	uint64_t sticky_total;
   4124 	unsigned short leaky_bucket_cnt;
   4125 };
   4126 
   4127 /*
   4128  * Separately-defined structure for use in reporting the ce_info
   4129  * to SunVTS without exposing the internal layout and implementation
   4130  * of struct ce_info.
   4131  */
   4132 static struct ecc_error_info ecc_error_info_data = {
   4133 	{ "version", KSTAT_DATA_UINT32 },
   4134 	{ "maxcount", KSTAT_DATA_UINT32 },
   4135 	{ "count", KSTAT_DATA_UINT32 }
   4136 };
   4137 static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) /
   4138     sizeof (struct kstat_named);
   4139 
   4140 #if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN
   4141 #error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN"
   4142 #endif
   4143 
   4144 struct ce_info  *mem_ce_simm = NULL;
   4145 size_t mem_ce_simm_size = 0;
   4146 
   4147 /*
   4148  * Default values for the number of CE's allowed per interval.
   4149  * Interval is defined in minutes
   4150  * SOFTERR_MIN_TIMEOUT is defined in microseconds
   4151  */
   4152 #define	SOFTERR_LIMIT_DEFAULT		2
   4153 #define	SOFTERR_INTERVAL_DEFAULT	1440		/* This is 24 hours */
   4154 #define	SOFTERR_MIN_TIMEOUT		(60 * MICROSEC)	/* This is 1 minute */
   4155 #define	TIMEOUT_NONE			((timeout_id_t)0)
   4156 #define	TIMEOUT_SET			((timeout_id_t)1)
   4157 
   4158 /*
   4159  * timeout identifer for leaky_bucket
   4160  */
   4161 static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE;
   4162 
   4163 /*
   4164  * Tunables for maximum number of allowed CE's in a given time
   4165  */
   4166 int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
   4167 int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
   4168 
   4169 void
   4170 cpu_mp_init(void)
   4171 {
   4172 	size_t size = cpu_aflt_size();
   4173 	size_t i;
   4174 	kstat_t *ksp;
   4175 
   4176 	/*
   4177 	 * Initialize the CE error handling buffers.
   4178 	 */
   4179 	mem_ce_simm_size = MAX_SIMM * max_ncpus;
   4180 	size = sizeof (struct ce_info) * mem_ce_simm_size;
   4181 	mem_ce_simm = kmem_zalloc(size, KM_SLEEP);
   4182 
   4183 	ksp = kstat_create("unix", 0, "ecc-info", "misc",
   4184 	    KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL);
   4185 	if (ksp != NULL) {
   4186 		ksp->ks_data = (struct kstat_named *)&ecc_error_info_data;
   4187 		ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER;
   4188 		ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size;
   4189 		ecc_error_info_data.count.value.ui32 = 0;
   4190 		kstat_install(ksp);
   4191 	}
   4192 
   4193 	for (i = 0; i < mem_ce_simm_size; i++) {
   4194 		struct kstat_ecc_mm_info *kceip;
   4195 
   4196 		kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info),
   4197 		    KM_SLEEP);
   4198 		ksp = kstat_create("mm", i, "ecc-info", "misc",
   4199 		    KSTAT_TYPE_NAMED,
   4200 		    sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t),
   4201 		    KSTAT_FLAG_VIRTUAL);
   4202 		if (ksp != NULL) {
   4203 			/*
   4204 			 * Re-declare ks_data_size to include room for the
   4205 			 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE
   4206 			 * set.
   4207 			 */
   4208 			ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) +
   4209 			    KSTAT_CE_UNUM_NAMLEN;
   4210 			ksp->ks_data = kceip;
   4211 			kstat_named_init(&kceip->name,
   4212 			    "name", KSTAT_DATA_STRING);
   4213 			kstat_named_init(&kceip->intermittent_total,
   4214 			    "intermittent_total", KSTAT_DATA_UINT64);
   4215 			kstat_named_init(&kceip->persistent_total,
   4216 			    "persistent_total", KSTAT_DATA_UINT64);
   4217 			kstat_named_init(&kceip->sticky_total,
   4218 			    "sticky_total", KSTAT_DATA_UINT64);
   4219 			/*
   4220 			 * Use the default snapshot routine as it knows how to
   4221 			 * deal with named kstats with long strings.
   4222 			 */
   4223 			ksp->ks_update = ecc_kstat_update;
   4224 			kstat_install(ksp);
   4225 		} else {
   4226 			kmem_free(kceip, sizeof (struct kstat_ecc_mm_info));
   4227 		}
   4228 	}
   4229 }
   4230 
   4231 /*ARGSUSED*/
   4232 static void
   4233 leaky_bucket_timeout(void *arg)
   4234 {
   4235 	int i;
   4236 	struct ce_info *psimm = mem_ce_simm;
   4237 
   4238 	for (i = 0; i < mem_ce_simm_size; i++) {
   4239 		if (psimm[i].leaky_bucket_cnt > 0)
   4240 			atomic_add_16(&psimm[i].leaky_bucket_cnt, -1);
   4241 	}
   4242 	add_leaky_bucket_timeout();
   4243 }
   4244 
   4245 static void
   4246 add_leaky_bucket_timeout(void)
   4247 {
   4248 	long timeout_in_microsecs;
   4249 
   4250 	/*
   4251 	 * create timeout for next leak.
   4252 	 *
   4253 	 * The timeout interval is calculated as follows
   4254 	 *
   4255 	 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit
   4256 	 *
   4257 	 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds
   4258 	 * in a minute), then multiply this by MICROSEC to get the interval
   4259 	 * in microseconds.  Divide this total by ecc_softerr_limit so that
   4260 	 * the timeout interval is accurate to within a few microseconds.
   4261 	 */
   4262 
   4263 	if (ecc_softerr_limit <= 0)
   4264 		ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
   4265 	if (ecc_softerr_interval <= 0)
   4266 		ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;
   4267 
   4268 	timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) /
   4269 	    ecc_softerr_limit;
   4270 
   4271 	if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT)
   4272 		timeout_in_microsecs = SOFTERR_MIN_TIMEOUT;
   4273 
   4274 	leaky_bucket_timeout_id = timeout(leaky_bucket_timeout,
   4275 	    (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs));
   4276 }
   4277 
   4278 /*
   4279  * Legacy Correctable ECC Error Hash
   4280  *
   4281  * All of the code below this comment is used to implement a legacy array
   4282  * which counted intermittent, persistent, and sticky CE errors by unum,
   4283  * and then was later extended to publish the data as a kstat for SunVTS.
   4284  * All of this code is replaced by FMA, and remains here until such time
   4285  * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed.
   4286  *
   4287  * Errors are saved in three buckets per-unum:
   4288  * (1) sticky - scrub was unsuccessful, cannot be scrubbed
   4289  *     This could represent a problem, and is immediately printed out.
   4290  * (2) persistent - was successfully scrubbed
   4291  *     These errors use the leaky bucket algorithm to determine
   4292  *     if there is a serious problem.
   4293  * (3) intermittent - may have originated from the cpu or upa/safari bus,
   4294  *     and does not necessarily indicate any problem with the dimm itself,
   4295  *     is critical information for debugging new hardware.
   4296  *     Because we do not know if it came from the dimm, it would be
   4297  *     inappropriate to include these in the leaky bucket counts.
   4298  *
   4299  * If the E$ line was modified before the scrub operation began, then the
   4300  * displacement flush at the beginning of scrubphys() will cause the modified
   4301  * line to be written out, which will clean up the CE.  Then, any subsequent
   4302  * read will not cause an error, which will cause persistent errors to be
   4303  * identified as intermittent.
   4304  *
   4305  * If a DIMM is going bad, it will produce true persistents as well as
   4306  * false intermittents, so these intermittents can be safely ignored.
   4307  *
   4308  * If the error count is excessive for a DIMM, this function will return
   4309  * PR_MCE, and the CPU module may then decide to remove that page from use.
   4310  */
   4311 static int
   4312 ce_count_unum(int status, int len, char *unum)
   4313 {
   4314 	int i;
   4315 	struct ce_info *psimm = mem_ce_simm;
   4316 	int page_status = PR_OK;
   4317 
   4318 	ASSERT(psimm != NULL);
   4319 
   4320 	if (len <= 0 ||
   4321 	    (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0)
   4322 		return (page_status);
   4323 
   4324 	/*
   4325 	 * Initialize the leaky_bucket timeout
   4326 	 */
   4327 	if (casptr(&leaky_bucket_timeout_id,
   4328 	    TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE)
   4329 		add_leaky_bucket_timeout();
   4330 
   4331 	for (i = 0; i < mem_ce_simm_size; i++) {
   4332 		if (psimm[i].name[0] == '\0') {
   4333 			/*
   4334 			 * Hit the end of the valid entries, add
   4335 			 * a new one.
   4336 			 */
   4337 			(void) strncpy(psimm[i].name, unum, len);
   4338 			if (status & ECC_STICKY) {
   4339 				/*
   4340 				 * Sticky - the leaky bucket is used to track
   4341 				 * soft errors.  Since a sticky error is a
   4342 				 * hard error and likely to be retired soon,
   4343 				 * we do not count it in the leaky bucket.
   4344 				 */
   4345 				psimm[i].leaky_bucket_cnt = 0;
   4346 				psimm[i].intermittent_total = 0;
   4347 				psimm[i].persistent_total = 0;
   4348 				psimm[i].sticky_total = 1;
   4349 				cmn_err(CE_NOTE,
   4350 				    "[AFT0] Sticky Softerror encountered "
   4351 				    "on Memory Module %s\n", unum);
   4352 				page_status = PR_MCE;
   4353 			} else if (status & ECC_PERSISTENT) {
   4354 				psimm[i].leaky_bucket_cnt = 1;
   4355 				psimm[i].intermittent_total = 0;
   4356 				psimm[i].persistent_total = 1;
   4357 				psimm[i].sticky_total = 0;
   4358 			} else {
   4359 				/*
   4360 				 * Intermittent - Because the scrub operation
   4361 				 * cannot find the error in the DIMM, we will
   4362 				 * not count these in the leaky bucket
   4363 				 */
   4364 				psimm[i].leaky_bucket_cnt = 0;
   4365 				psimm[i].intermittent_total = 1;
   4366 				psimm[i].persistent_total = 0;
   4367 				psimm[i].sticky_total = 0;
   4368 			}
   4369 			ecc_error_info_data.count.value.ui32++;
   4370 			break;
   4371 		} else if (strncmp(unum, psimm[i].name, len) == 0) {
   4372 			/*
   4373 			 * Found an existing entry for the current
   4374 			 * memory module, adjust the counts.
   4375 			 */
   4376 			if (status & ECC_STICKY) {
   4377 				psimm[i].sticky_total++;
   4378 				cmn_err(CE_NOTE,
   4379 				    "[AFT0] Sticky Softerror encountered "
   4380 				    "on Memory Module %s\n", unum);
   4381 				page_status = PR_MCE;
   4382 			} else if (status & ECC_PERSISTENT) {
   4383 				int new_value;
   4384 
   4385 				new_value = atomic_add_16_nv(
   4386 				    &psimm[i].leaky_bucket_cnt, 1);
   4387 				psimm[i].persistent_total++;
   4388 				if (new_value > ecc_softerr_limit) {
   4389 					cmn_err(CE_NOTE, "[AFT0] Most recent %d"
   4390 					    " soft errors from Memory Module"
   4391 					    " %s exceed threshold (N=%d,"
   4392 					    " T=%dh:%02dm) triggering page"
   4393 					    " retire", new_value, unum,
   4394 					    ecc_softerr_limit,
   4395 					    ecc_softerr_interval / 60,
   4396 					    ecc_softerr_interval % 60);
   4397 					atomic_add_16(
   4398 					    &psimm[i].leaky_bucket_cnt, -1);
   4399 					page_status = PR_MCE;
   4400 				}
   4401 			} else { /* Intermittent */
   4402 				psimm[i].intermittent_total++;
   4403 			}
   4404 			break;
   4405 		}
   4406 	}
   4407 
   4408 	if (i >= mem_ce_simm_size)
   4409 		cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of "
   4410 		    "space.\n");
   4411 
   4412 	return (page_status);
   4413 }
   4414 
   4415 /*
   4416  * Function to support counting of IO detected CEs.
   4417  */
   4418 void
   4419 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
   4420 {
   4421 	int err;
   4422 
   4423 	err = ce_count_unum(ecc->flt_status, len, unum);
   4424 	if (err != PR_OK && automatic_page_removal) {
   4425 		(void) page_retire(ecc->flt_addr, err);
   4426 	}
   4427 }
   4428 
   4429 static int
   4430 ecc_kstat_update(kstat_t *ksp, int rw)
   4431 {
   4432 	struct kstat_ecc_mm_info *kceip = ksp->ks_data;
   4433 	struct ce_info *ceip = mem_ce_simm;
   4434 	int i = ksp->ks_instance;
   4435 
   4436 	if (rw == KSTAT_WRITE)
   4437 		return (EACCES);
   4438 
   4439 	ASSERT(ksp->ks_data != NULL);
   4440 	ASSERT(i < mem_ce_simm_size && i >= 0);
   4441 
   4442 	/*
   4443 	 * Since we're not using locks, make sure that we don't get partial
   4444 	 * data. The name is always copied before the counters are incremented
   4445 	 * so only do this update routine if at least one of the counters is
   4446 	 * non-zero, which ensures that ce_count_unum() is done, and the
   4447 	 * string is fully copied.
   4448 	 */
   4449 	if (ceip[i].intermittent_total == 0 &&
   4450 	    ceip[i].persistent_total == 0 &&
   4451 	    ceip[i].sticky_total == 0) {
   4452 		/*
   4453 		 * Uninitialized or partially initialized. Ignore.
   4454 		 * The ks_data buffer was allocated via kmem_zalloc,
   4455 		 * so no need to bzero it.
   4456 		 */
   4457 		return (0);
   4458 	}
   4459 
   4460 	kstat_named_setstr(&kceip->name, ceip[i].name);
   4461 	kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total;
   4462 	kceip->persistent_total.value.ui64 = ceip[i].persistent_total;
   4463 	kceip->sticky_total.value.ui64 = ceip[i].sticky_total;
   4464 
   4465 	return (0);
   4466 }
   4467 
   4468 #define	VIS_BLOCKSIZE		64
   4469 
   4470 int
   4471 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
   4472 {
   4473 	int ret, watched;
   4474 
   4475 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
   4476 	ret = dtrace_blksuword32(addr, data, 0);
   4477 	if (watched)
   4478 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
   4479 
   4480 	return (ret);
   4481 }
   4482 
   4483 /*ARGSUSED*/
   4484 void
   4485 cpu_faulted_enter(struct cpu *cp)
   4486 {
   4487 }
   4488 
   4489 /*ARGSUSED*/
   4490 void
   4491 cpu_faulted_exit(struct cpu *cp)
   4492 {
   4493 }
   4494 
   4495 /*ARGSUSED*/
   4496 void
   4497 mmu_init_kernel_pgsz(struct hat *hat)
   4498 {
   4499 }
   4500 
   4501 size_t
   4502 mmu_get_kernel_lpsize(size_t lpsize)
   4503 {
   4504 	uint_t tte;
   4505 
   4506 	if (lpsize == 0) {
   4507 		/* no setting for segkmem_lpsize in /etc/system: use default */
   4508 		return (MMU_PAGESIZE4M);
   4509 	}
   4510 
   4511 	for (tte = TTE8K; tte <= TTE4M; tte++) {
   4512 		if (lpsize == TTEBYTES(tte))
   4513 			return (lpsize);
   4514 	}
   4515 
   4516 	return (TTEBYTES(TTE8K));
   4517 }
   4518