Home | History | Annotate | Download | only in cpu
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * Support for Olympus-C (SPARC64-VI) and Jupiter (SPARC64-VII).
     28  */
     29 
     30 #include <sys/types.h>
     31 #include <sys/systm.h>
     32 #include <sys/ddi.h>
     33 #include <sys/sysmacros.h>
     34 #include <sys/archsystm.h>
     35 #include <sys/vmsystm.h>
     36 #include <sys/machparam.h>
     37 #include <sys/machsystm.h>
     38 #include <sys/machthread.h>
     39 #include <sys/cpu.h>
     40 #include <sys/cmp.h>
     41 #include <sys/elf_SPARC.h>
     42 #include <vm/vm_dep.h>
     43 #include <vm/hat_sfmmu.h>
     44 #include <vm/seg_kpm.h>
     45 #include <vm/seg_kmem.h>
     46 #include <sys/cpuvar.h>
     47 #include <sys/opl_olympus_regs.h>
     48 #include <sys/opl_module.h>
     49 #include <sys/async.h>
     50 #include <sys/cmn_err.h>
     51 #include <sys/debug.h>
     52 #include <sys/dditypes.h>
     53 #include <sys/cpu_module.h>
     54 #include <sys/sysmacros.h>
     55 #include <sys/intreg.h>
     56 #include <sys/clock.h>
     57 #include <sys/platform_module.h>
     58 #include <sys/ontrap.h>
     59 #include <sys/panic.h>
     60 #include <sys/memlist.h>
     61 #include <sys/ndifm.h>
     62 #include <sys/ddifm.h>
     63 #include <sys/fm/protocol.h>
     64 #include <sys/fm/util.h>
     65 #include <sys/fm/cpu/SPARC64-VI.h>
     66 #include <sys/dtrace.h>
     67 #include <sys/watchpoint.h>
     68 #include <sys/promif.h>
     69 
     70 /*
     71  * Internal functions.
     72  */
     73 static int cpu_sync_log_err(void *flt);
     74 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *);
     75 static void opl_cpu_sync_error(struct regs *, ulong_t, ulong_t, uint_t, uint_t);
     76 static int  cpu_flt_in_memory(opl_async_flt_t *, uint64_t);
     77 static int prom_SPARC64VII_support_enabled(void);
     78 static void opl_ta3();
     79 static int plat_prom_preserve_kctx_is_supported(void);
     80 
     81 /*
     82  * Error counters resetting interval.
     83  */
     84 static int opl_async_check_interval = 60;		/* 1 min */
     85 
     86 uint_t cpu_impl_dual_pgsz = 1;
     87 
     88 /*
     89  * PA[22:0] represent Displacement in Jupiter
     90  * configuration space.
     91  */
     92 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
     93 
     94 /*
     95  * set in /etc/system to control logging of user BERR/TO's
     96  */
     97 int cpu_berr_to_verbose = 0;
     98 
     99 /*
    100  * Set to 1 if booted with all Jupiter cpus (all-Jupiter features enabled).
    101  */
    102 int cpu_alljupiter = 0;
    103 
    104 /*
    105  * The sfmmu_cext field to be used by processes in a shared context domain.
    106  */
    107 static uchar_t shctx_cext = TAGACCEXT_MKSZPAIR(DEFAULT_ISM_PAGESZC, TTE8K);
    108 
    109 static int min_ecache_size;
    110 static uint_t priv_hcl_1;
    111 static uint_t priv_hcl_2;
    112 static uint_t priv_hcl_4;
    113 static uint_t priv_hcl_8;
    114 
    115 /*
    116  * Olympus error log
    117  */
    118 static opl_errlog_t	*opl_err_log;
    119 static int		opl_cpu0_log_setup;
    120 
    121 /*
    122  * OPL ta 3 save area.
    123  */
    124 char	*opl_ta3_save;
    125 
    126 /*
    127  * UE is classified into four classes (MEM, CHANNEL, CPU, PATH).
    128  * No any other ecc_type_info insertion is allowed in between the following
    129  * four UE classess.
    130  */
    131 ecc_type_to_info_t ecc_type_to_info[] = {
    132 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
    133 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
    134 	FM_EREPORT_CPU_UE_MEM,
    135 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
    136 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
    137 	FM_EREPORT_CPU_UE_CHANNEL,
    138 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
    139 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
    140 	FM_EREPORT_CPU_UE_CPU,
    141 	SFSR_UE,	"UE ",	(OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_UE,
    142 	"Uncorrectable ECC",  FM_EREPORT_PAYLOAD_SYNC,
    143 	FM_EREPORT_CPU_UE_PATH,
    144 	SFSR_BERR, "BERR ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
    145 	"Bus Error",  FM_EREPORT_PAYLOAD_SYNC,
    146 	FM_EREPORT_CPU_BERR,
    147 	SFSR_TO, "TO ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
    148 	"Bus Timeout",  FM_EREPORT_PAYLOAD_SYNC,
    149 	FM_EREPORT_CPU_BTO,
    150 	SFSR_TLB_MUL, "TLB_MUL ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
    151 	"TLB MultiHit",  FM_EREPORT_PAYLOAD_SYNC,
    152 	FM_EREPORT_CPU_MTLB,
    153 	SFSR_TLB_PRT, "TLB_PRT ", (OPL_ECC_SYNC_TRAP), OPL_CPU_SYNC_OTHERS,
    154 	"TLB Parity",  FM_EREPORT_PAYLOAD_SYNC,
    155 	FM_EREPORT_CPU_TLBP,
    156 
    157 	UGESR_IAUG_CRE, "IAUG_CRE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
    158 	"IAUG CRE",  FM_EREPORT_PAYLOAD_URGENT,
    159 	FM_EREPORT_CPU_CRE,
    160 	UGESR_IAUG_TSBCTXT, "IAUG_TSBCTXT",
    161 	OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
    162 	"IAUG TSBCTXT",  FM_EREPORT_PAYLOAD_URGENT,
    163 	FM_EREPORT_CPU_TSBCTX,
    164 	UGESR_IUG_TSBP, "IUG_TSBP", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
    165 	"IUG TSBP",  FM_EREPORT_PAYLOAD_URGENT,
    166 	FM_EREPORT_CPU_TSBP,
    167 	UGESR_IUG_PSTATE, "IUG_PSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
    168 	"IUG PSTATE",  FM_EREPORT_PAYLOAD_URGENT,
    169 	FM_EREPORT_CPU_PSTATE,
    170 	UGESR_IUG_TSTATE, "IUG_TSTATE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
    171 	"IUG TSTATE",  FM_EREPORT_PAYLOAD_URGENT,
    172 	FM_EREPORT_CPU_TSTATE,
    173 	UGESR_IUG_F, "IUG_F", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
    174 	"IUG FREG",  FM_EREPORT_PAYLOAD_URGENT,
    175 	FM_EREPORT_CPU_IUG_F,
    176 	UGESR_IUG_R, "IUG_R", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
    177 	"IUG RREG",  FM_EREPORT_PAYLOAD_URGENT,
    178 	FM_EREPORT_CPU_IUG_R,
    179 	UGESR_AUG_SDC, "AUG_SDC", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
    180 	"AUG SDC",  FM_EREPORT_PAYLOAD_URGENT,
    181 	FM_EREPORT_CPU_SDC,
    182 	UGESR_IUG_WDT, "IUG_WDT", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
    183 	"IUG WDT",  FM_EREPORT_PAYLOAD_URGENT,
    184 	FM_EREPORT_CPU_WDT,
    185 	UGESR_IUG_DTLB, "IUG_DTLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
    186 	"IUG DTLB",  FM_EREPORT_PAYLOAD_URGENT,
    187 	FM_EREPORT_CPU_DTLB,
    188 	UGESR_IUG_ITLB, "IUG_ITLB", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
    189 	"IUG ITLB",  FM_EREPORT_PAYLOAD_URGENT,
    190 	FM_EREPORT_CPU_ITLB,
    191 	UGESR_IUG_COREERR, "IUG_COREERR",
    192 	OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
    193 	"IUG COREERR",  FM_EREPORT_PAYLOAD_URGENT,
    194 	FM_EREPORT_CPU_CORE,
    195 	UGESR_MULTI_DAE, "MULTI_DAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
    196 	"MULTI DAE",  FM_EREPORT_PAYLOAD_URGENT,
    197 	FM_EREPORT_CPU_DAE,
    198 	UGESR_MULTI_IAE, "MULTI_IAE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
    199 	"MULTI IAE",  FM_EREPORT_PAYLOAD_URGENT,
    200 	FM_EREPORT_CPU_IAE,
    201 	UGESR_MULTI_UGE, "MULTI_UGE", OPL_ECC_URGENT_TRAP, OPL_CPU_URGENT,
    202 	"MULTI UGE",  FM_EREPORT_PAYLOAD_URGENT,
    203 	FM_EREPORT_CPU_UGE,
    204 	0,		NULL,		0,		0,
    205 	NULL,  0,	   0,
    206 };
    207 
    208 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
    209 		uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
    210 		int *segsp, int *banksp, int *mcidp);
    211 
    212 
    213 /*
    214  * Setup trap handlers for 0xA, 0x32, 0x40 trap types
    215  * and "ta 3" and "ta 4".
    216  */
    217 void
    218 cpu_init_trap(void)
    219 {
    220 	OPL_SET_TRAP(tt0_iae, opl_serr_instr);
    221 	OPL_SET_TRAP(tt1_iae, opl_serr_instr);
    222 	OPL_SET_TRAP(tt0_dae, opl_serr_instr);
    223 	OPL_SET_TRAP(tt1_dae, opl_serr_instr);
    224 	OPL_SET_TRAP(tt0_asdat, opl_ugerr_instr);
    225 	OPL_SET_TRAP(tt1_asdat, opl_ugerr_instr);
    226 	OPL_SET_TRAP(tt0_flushw, opl_ta3_instr);
    227 	OPL_PATCH_28(opl_cleanw_patch, opl_ta4_instr);
    228 }
    229 
    230 static int
    231 getintprop(pnode_t node, char *name, int deflt)
    232 {
    233 	int	value;
    234 
    235 	switch (prom_getproplen(node, name)) {
    236 	case sizeof (int):
    237 		(void) prom_getprop(node, name, (caddr_t)&value);
    238 		break;
    239 
    240 	default:
    241 		value = deflt;
    242 		break;
    243 	}
    244 
    245 	return (value);
    246 }
    247 
    248 /*
    249  * Set the magic constants of the implementation.
    250  */
    251 /*ARGSUSED*/
    252 void
    253 cpu_fiximp(pnode_t dnode)
    254 {
    255 	int i, a;
    256 	extern int vac_size, vac_shift;
    257 	extern uint_t vac_mask;
    258 
    259 	static struct {
    260 		char	*name;
    261 		int	*var;
    262 		int	defval;
    263 	} prop[] = {
    264 		"l1-dcache-size", &dcache_size, OPL_DCACHE_SIZE,
    265 		"l1-dcache-line-size", &dcache_linesize, OPL_DCACHE_LSIZE,
    266 		"l1-icache-size", &icache_size, OPL_ICACHE_SIZE,
    267 		"l1-icache-line-size", &icache_linesize, OPL_ICACHE_LSIZE,
    268 		"l2-cache-size", &ecache_size, OPL_ECACHE_SIZE,
    269 		"l2-cache-line-size", &ecache_alignsize, OPL_ECACHE_LSIZE,
    270 		"l2-cache-associativity", &ecache_associativity, OPL_ECACHE_NWAY
    271 	};
    272 
    273 	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++)
    274 		*prop[i].var = getintprop(dnode, prop[i].name, prop[i].defval);
    275 
    276 	ecache_setsize = ecache_size / ecache_associativity;
    277 
    278 	vac_size = OPL_VAC_SIZE;
    279 	vac_mask = MMU_PAGEMASK & (vac_size - 1);
    280 	i = 0; a = vac_size;
    281 	while (a >>= 1)
    282 		++i;
    283 	vac_shift = i;
    284 	shm_alignment = vac_size;
    285 	vac = 1;
    286 }
    287 
    288 /*
    289  * Enable features for Jupiter-only domains.
    290  */
    291 void
    292 cpu_fix_alljupiter(void)
    293 {
    294 	if (!prom_SPARC64VII_support_enabled()) {
    295 		/*
    296 		 * Do not enable all-Jupiter features and do not turn on
    297 		 * the cpu_alljupiter flag.
    298 		 */
    299 		return;
    300 	}
    301 
    302 	cpu_alljupiter = 1;
    303 
    304 	/*
    305 	 * Enable ima hwcap for Jupiter-only domains.  DR will prevent
    306 	 * addition of Olympus-C to all-Jupiter domains to preserve ima
    307 	 * hwcap semantics.
    308 	 */
    309 	cpu_hwcap_flags |= AV_SPARC_IMA;
    310 
    311 	/*
    312 	 * Enable shared context support.
    313 	 */
    314 	shctx_on = 1;
    315 }
    316 
    317 #ifdef	OLYMPUS_C_REV_B_ERRATA_XCALL
    318 /*
    319  * Quick and dirty way to redefine locally in
    320  * OPL the value of IDSR_BN_SETS to 31 instead
    321  * of the standard 32 value. This is to workaround
    322  * REV_B of Olympus_c processor's problem in handling
    323  * more than 31 xcall broadcast.
    324  */
    325 #undef	IDSR_BN_SETS
    326 #define	IDSR_BN_SETS    31
    327 #endif	/* OLYMPUS_C_REV_B_ERRATA_XCALL */
    328 
    329 void
    330 send_mondo_set(cpuset_t set)
    331 {
    332 	int lo, busy, nack, shipped = 0;
    333 	uint16_t i, cpuids[IDSR_BN_SETS];
    334 	uint64_t idsr, nackmask = 0, busymask, curnack, curbusy;
    335 	uint64_t starttick, endtick, tick, lasttick;
    336 #if (NCPU > IDSR_BN_SETS)
    337 	int index = 0;
    338 	int ncpuids = 0;
    339 #endif
    340 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
    341 	int bn_sets = IDSR_BN_SETS;
    342 	uint64_t ver;
    343 
    344 	ASSERT(NCPU > bn_sets);
    345 #endif
    346 
    347 	ASSERT(!CPUSET_ISNULL(set));
    348 	starttick = lasttick = gettick();
    349 
    350 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
    351 	ver = ultra_getver();
    352 	if (((ULTRA_VER_IMPL(ver)) == OLYMPUS_C_IMPL) &&
    353 	    ((OLYMPUS_REV_MASK(ver)) == OLYMPUS_C_A))
    354 		bn_sets = 1;
    355 #endif
    356 
    357 #if (NCPU <= IDSR_BN_SETS)
    358 	for (i = 0; i < NCPU; i++)
    359 		if (CPU_IN_SET(set, i)) {
    360 			shipit(i, shipped);
    361 			nackmask |= IDSR_NACK_BIT(shipped);
    362 			cpuids[shipped++] = i;
    363 			CPUSET_DEL(set, i);
    364 			if (CPUSET_ISNULL(set))
    365 				break;
    366 		}
    367 	CPU_STATS_ADDQ(CPU, sys, xcalls, shipped);
    368 #else
    369 	for (i = 0; i < NCPU; i++)
    370 		if (CPU_IN_SET(set, i)) {
    371 			ncpuids++;
    372 
    373 			/*
    374 			 * Ship only to the first (IDSR_BN_SETS) CPUs.  If we
    375 			 * find we have shipped to more than (IDSR_BN_SETS)
    376 			 * CPUs, set "index" to the highest numbered CPU in
    377 			 * the set so we can ship to other CPUs a bit later on.
    378 			 */
    379 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
    380 			if (shipped < bn_sets) {
    381 #else
    382 			if (shipped < IDSR_BN_SETS) {
    383 #endif
    384 				shipit(i, shipped);
    385 				nackmask |= IDSR_NACK_BIT(shipped);
    386 				cpuids[shipped++] = i;
    387 				CPUSET_DEL(set, i);
    388 				if (CPUSET_ISNULL(set))
    389 					break;
    390 			} else
    391 				index = (int)i;
    392 		}
    393 
    394 	CPU_STATS_ADDQ(CPU, sys, xcalls, ncpuids);
    395 #endif
    396 
    397 	busymask = IDSR_NACK_TO_BUSY(nackmask);
    398 	busy = nack = 0;
    399 	endtick = starttick + xc_tick_limit;
    400 	for (;;) {
    401 		idsr = getidsr();
    402 #if (NCPU <= IDSR_BN_SETS)
    403 		if (idsr == 0)
    404 			break;
    405 #else
    406 		if (idsr == 0 && shipped == ncpuids)
    407 			break;
    408 #endif
    409 		tick = gettick();
    410 		/*
    411 		 * If there is a big jump between the current tick
    412 		 * count and lasttick, we have probably hit a break
    413 		 * point.  Adjust endtick accordingly to avoid panic.
    414 		 */
    415 		if (tick > (lasttick + xc_tick_jump_limit))
    416 			endtick += (tick - lasttick);
    417 		lasttick = tick;
    418 		if (tick > endtick) {
    419 			if (panic_quiesce)
    420 				return;
    421 			cmn_err(CE_CONT, "send mondo timeout [%d NACK %d "
    422 			    "BUSY]\nIDSR 0x%" PRIx64 "  cpuids:",
    423 			    nack, busy, idsr);
    424 #ifdef	OLYMPUS_C_REV_A_ERRATA_XCALL
    425 			for (i = 0; i < bn_sets; i++) {
    426 #else
    427 			for (i = 0; i < IDSR_BN_SETS; i++) {
    428 #endif
    429 				if (idsr & (IDSR_NACK_BIT(i) |
    430 				    IDSR_BUSY_BIT(i))) {
    431 					cmn_err(CE_CONT, " 0x%x", cpuids[i]);
    432 				}
    433 			}
    434 			cmn_err(CE_CONT, "\n");
    435 			cmn_err(CE_PANIC, "send_mondo_set: timeout");
    436 		}
    437 		curnack = idsr & nackmask;
    438 		curbusy = idsr & busymask;
    439 
    440 #ifdef OLYMPUS_C_REV_B_ERRATA_XCALL
    441 		/*
    442 		 * Only proceed to send more xcalls if all the
    443 		 * cpus in the previous IDSR_BN_SETS were completed.
    444 		 */
    445 		if (curbusy) {
    446 			busy++;
    447 			continue;
    448 		}
    449 #endif /* OLYMPUS_C_REV_B_ERRATA_XCALL */
    450 
    451 #if (NCPU > IDSR_BN_SETS)
    452 		if (shipped < ncpuids) {
    453 			uint64_t cpus_left;
    454 			uint16_t next = (uint16_t)index;
    455 
    456 			cpus_left = ~(IDSR_NACK_TO_BUSY(curnack) | curbusy) &
    457 			    busymask;
    458 
    459 			if (cpus_left) {
    460 				do {
    461 					/*
    462 					 * Sequence through and ship to the
    463 					 * remainder of the CPUs in the system
    464 					 * (e.g. other than the first
    465 					 * (IDSR_BN_SETS)) in reverse order.
    466 					 */
    467 					lo = lowbit(cpus_left) - 1;
    468 					i = IDSR_BUSY_IDX(lo);
    469 					shipit(next, i);
    470 					shipped++;
    471 					cpuids[i] = next;
    472 
    473 					/*
    474 					 * If we've processed all the CPUs,
    475 					 * exit the loop now and save
    476 					 * instructions.
    477 					 */
    478 					if (shipped == ncpuids)
    479 						break;
    480 
    481 					for ((index = ((int)next - 1));
    482 					    index >= 0; index--)
    483 						if (CPU_IN_SET(set, index)) {
    484 							next = (uint16_t)index;
    485 							break;
    486 						}
    487 
    488 					cpus_left &= ~(1ull << lo);
    489 				} while (cpus_left);
    490 				continue;
    491 			}
    492 		}
    493 #endif
    494 #ifndef	OLYMPUS_C_REV_B_ERRATA_XCALL
    495 		if (curbusy) {
    496 			busy++;
    497 			continue;
    498 		}
    499 #endif	/* OLYMPUS_C_REV_B_ERRATA_XCALL */
    500 #ifdef SEND_MONDO_STATS
    501 		{
    502 			int n = gettick() - starttick;
    503 			if (n < 8192)
    504 				x_nack_stimes[n >> 7]++;
    505 		}
    506 #endif
    507 		while (gettick() < (tick + sys_clock_mhz))
    508 			;
    509 		do {
    510 			lo = lowbit(curnack) - 1;
    511 			i = IDSR_NACK_IDX(lo);
    512 			shipit(cpuids[i], i);
    513 			curnack &= ~(1ull << lo);
    514 		} while (curnack);
    515 		nack++;
    516 		busy = 0;
    517 	}
    518 #ifdef SEND_MONDO_STATS
    519 	{
    520 		int n = gettick() - starttick;
    521 		if (n < 8192)
    522 			x_set_stimes[n >> 7]++;
    523 		else
    524 			x_set_ltimes[(n >> 13) & 0xf]++;
    525 	}
    526 	x_set_cpus[shipped]++;
    527 #endif
    528 }
    529 
    530 /*
    531  * Cpu private initialization.
    532  */
    533 void
    534 cpu_init_private(struct cpu *cp)
    535 {
    536 	if (!((IS_OLYMPUS_C(cpunodes[cp->cpu_id].implementation)) ||
    537 	    (IS_JUPITER(cpunodes[cp->cpu_id].implementation)))) {
    538 		cmn_err(CE_PANIC, "CPU%d Impl %d: Only SPARC64-VI(I) is "
    539 		    "supported", cp->cpu_id,
    540 		    cpunodes[cp->cpu_id].implementation);
    541 	}
    542 
    543 	adjust_hw_copy_limits(cpunodes[cp->cpu_id].ecache_size);
    544 }
    545 
    546 void
    547 cpu_setup(void)
    548 {
    549 	extern int at_flags;
    550 	extern int cpc_has_overflow_intr;
    551 	uint64_t cpu0_log;
    552 	extern	 uint64_t opl_cpu0_err_log;
    553 
    554 	/*
    555 	 * Initialize Error log Scratch register for error handling.
    556 	 */
    557 
    558 	cpu0_log = va_to_pa(&opl_cpu0_err_log);
    559 	opl_error_setup(cpu0_log);
    560 	opl_cpu0_log_setup = 1;
    561 
    562 	/*
    563 	 * Enable MMU translating multiple page sizes for
    564 	 * sITLB and sDTLB.
    565 	 */
    566 	cpu_early_feature_init();
    567 
    568 	/*
    569 	 * Setup chip-specific trap handlers.
    570 	 */
    571 	cpu_init_trap();
    572 
    573 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
    574 
    575 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
    576 
    577 	/*
    578 	 * Due to the number of entries in the fully-associative tlb
    579 	 * this may have to be tuned lower than in spitfire.
    580 	 */
    581 	pp_slots = MIN(8, MAXPP_SLOTS);
    582 
    583 	/*
    584 	 * Block stores do not invalidate all pages of the d$, pagecopy
    585 	 * et. al. need virtual translations with virtual coloring taken
    586 	 * into consideration.  prefetch/ldd will pollute the d$ on the
    587 	 * load side.
    588 	 */
    589 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
    590 
    591 	if (use_page_coloring) {
    592 		do_pg_coloring = 1;
    593 	}
    594 
    595 	isa_list =
    596 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
    597 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
    598 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
    599 
    600 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2 |
    601 	    AV_SPARC_POPC | AV_SPARC_FMAF;
    602 
    603 	/*
    604 	 * On SPARC64-VI, there's no hole in the virtual address space
    605 	 */
    606 	hole_start = hole_end = 0;
    607 
    608 	/*
    609 	 * The kpm mapping window.
    610 	 * kpm_size:
    611 	 *	The size of a single kpm range.
    612 	 *	The overall size will be: kpm_size * vac_colors.
    613 	 * kpm_vbase:
    614 	 *	The virtual start address of the kpm range within the kernel
    615 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
    616 	 */
    617 	kpm_size = (size_t)(128ull * 1024 * 1024 * 1024 * 1024); /* 128TB */
    618 	kpm_size_shift = 47;
    619 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
    620 	kpm_smallpages = 1;
    621 
    622 	/*
    623 	 * The traptrace code uses either %tick or %stick for
    624 	 * timestamping.  We have %stick so we can use it.
    625 	 */
    626 	traptrace_use_stick = 1;
    627 
    628 	/*
    629 	 * SPARC64-VI has a performance counter overflow interrupt
    630 	 */
    631 	cpc_has_overflow_intr = 1;
    632 
    633 	/*
    634 	 * Declare that this architecture/cpu combination does not support
    635 	 * fpRAS.
    636 	 */
    637 	fpras_implemented = 0;
    638 }
    639 
    640 /*
    641  * Called by setcpudelay
    642  */
    643 void
    644 cpu_init_tick_freq(void)
    645 {
    646 	/*
    647 	 * For SPARC64-VI we want to use the system clock rate as
    648 	 * the basis for low level timing, due to support of mixed
    649 	 * speed CPUs and power managment.
    650 	 */
    651 	if (system_clock_freq == 0)
    652 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
    653 
    654 	sys_tick_freq = system_clock_freq;
    655 }
    656 
    657 #ifdef SEND_MONDO_STATS
    658 uint32_t x_one_stimes[64];
    659 uint32_t x_one_ltimes[16];
    660 uint32_t x_set_stimes[64];
    661 uint32_t x_set_ltimes[16];
    662 uint32_t x_set_cpus[NCPU];
    663 uint32_t x_nack_stimes[64];
    664 #endif
    665 
    666 /*
    667  * Note: A version of this function is used by the debugger via the KDI,
    668  * and must be kept in sync with this version.  Any changes made to this
    669  * function to support new chips or to accomodate errata must also be included
    670  * in the KDI-specific version.  See us3_kdi.c.
    671  */
    672 void
    673 send_one_mondo(int cpuid)
    674 {
    675 	int busy, nack;
    676 	uint64_t idsr, starttick, endtick, tick, lasttick;
    677 	uint64_t busymask;
    678 
    679 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
    680 	starttick = lasttick = gettick();
    681 	shipit(cpuid, 0);
    682 	endtick = starttick + xc_tick_limit;
    683 	busy = nack = 0;
    684 	busymask = IDSR_BUSY;
    685 	for (;;) {
    686 		idsr = getidsr();
    687 		if (idsr == 0)
    688 			break;
    689 
    690 		tick = gettick();
    691 		/*
    692 		 * If there is a big jump between the current tick
    693 		 * count and lasttick, we have probably hit a break
    694 		 * point.  Adjust endtick accordingly to avoid panic.
    695 		 */
    696 		if (tick > (lasttick + xc_tick_jump_limit))
    697 			endtick += (tick - lasttick);
    698 		lasttick = tick;
    699 		if (tick > endtick) {
    700 			if (panic_quiesce)
    701 				return;
    702 			cmn_err(CE_PANIC, "send mondo timeout (target 0x%x) "
    703 			    "[%d NACK %d BUSY]", cpuid, nack, busy);
    704 		}
    705 
    706 		if (idsr & busymask) {
    707 			busy++;
    708 			continue;
    709 		}
    710 		drv_usecwait(1);
    711 		shipit(cpuid, 0);
    712 		nack++;
    713 		busy = 0;
    714 	}
    715 #ifdef SEND_MONDO_STATS
    716 	{
    717 		int n = gettick() - starttick;
    718 		if (n < 8192)
    719 			x_one_stimes[n >> 7]++;
    720 		else
    721 			x_one_ltimes[(n >> 13) & 0xf]++;
    722 	}
    723 #endif
    724 }
    725 
    726 /*
    727  * init_mmu_page_sizes is set to one after the bootup time initialization
    728  * via mmu_init_mmu_page_sizes, to indicate that mmu_page_sizes has a
    729  * valid value.
    730  *
    731  * mmu_disable_ism_large_pages and mmu_disable_large_pages are the mmu-specific
    732  * versions of disable_ism_large_pages and disable_large_pages, and feed back
    733  * into those two hat variables at hat initialization time.
    734  *
    735  */
    736 int init_mmu_page_sizes = 0;
    737 
    738 static uint_t mmu_disable_large_pages = 0;
    739 static uint_t mmu_disable_ism_large_pages = ((1 << TTE64K) |
    740 	(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
    741 static uint_t mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
    742 	(1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
    743 static uint_t mmu_disable_auto_text_large_pages = ((1 << TTE64K) |
    744 	(1 << TTE512K));
    745 
    746 /*
    747  * Re-initialize mmu_page_sizes and friends, for SPARC64-VI mmu support.
    748  * Called during very early bootup from check_cpus_set().
    749  * Can be called to verify that mmu_page_sizes are set up correctly.
    750  *
    751  * Set Olympus defaults. We do not use the function parameter.
    752  */
    753 /*ARGSUSED*/
    754 void
    755 mmu_init_scd(sf_scd_t *scdp)
    756 {
    757 	scdp->scd_sfmmup->sfmmu_cext = shctx_cext;
    758 }
    759 
    760 /*ARGSUSED*/
    761 int
    762 mmu_init_mmu_page_sizes(int32_t not_used)
    763 {
    764 	if (!init_mmu_page_sizes) {
    765 		mmu_page_sizes = MMU_PAGE_SIZES;
    766 		mmu_hashcnt = MAX_HASHCNT;
    767 		mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE;
    768 		mmu_exported_pagesize_mask = (1 << TTE8K) |
    769 		    (1 << TTE64K) | (1 << TTE512K) | (1 << TTE4M) |
    770 		    (1 << TTE32M) | (1 << TTE256M);
    771 		init_mmu_page_sizes = 1;
    772 		return (0);
    773 	}
    774 	return (1);
    775 }
    776 
    777 /* SPARC64-VI worst case DTLB parameters */
    778 #ifndef	LOCKED_DTLB_ENTRIES
    779 #define	LOCKED_DTLB_ENTRIES	5	/* 2 user TSBs, 2 nucleus, + OBP */
    780 #endif
    781 #define	TOTAL_DTLB_ENTRIES	32
    782 #define	AVAIL_32M_ENTRIES	0
    783 #define	AVAIL_256M_ENTRIES	0
    784 #define	AVAIL_DTLB_ENTRIES	(TOTAL_DTLB_ENTRIES - LOCKED_DTLB_ENTRIES)
    785 static uint64_t ttecnt_threshold[MMU_PAGE_SIZES] = {
    786 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES,
    787 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES,
    788 	AVAIL_DTLB_ENTRIES, AVAIL_DTLB_ENTRIES};
    789 
    790 /*
    791  * The function returns the mmu-specific values for the
    792  * hat's disable_large_pages, disable_ism_large_pages, and
    793  * disable_auto_data_large_pages and
    794  * disable_text_data_large_pages variables.
    795  */
    796 uint_t
    797 mmu_large_pages_disabled(uint_t flag)
    798 {
    799 	uint_t pages_disable = 0;
    800 	extern int use_text_pgsz64K;
    801 	extern int use_text_pgsz512K;
    802 
    803 	if (flag == HAT_LOAD) {
    804 		pages_disable =  mmu_disable_large_pages;
    805 	} else if (flag == HAT_LOAD_SHARE) {
    806 		pages_disable = mmu_disable_ism_large_pages;
    807 	} else if (flag == HAT_AUTO_DATA) {
    808 		pages_disable = mmu_disable_auto_data_large_pages;
    809 	} else if (flag == HAT_AUTO_TEXT) {
    810 		pages_disable = mmu_disable_auto_text_large_pages;
    811 		if (use_text_pgsz512K) {
    812 			pages_disable &= ~(1 << TTE512K);
    813 		}
    814 		if (use_text_pgsz64K) {
    815 			pages_disable &= ~(1 << TTE64K);
    816 		}
    817 	}
    818 	return (pages_disable);
    819 }
    820 
    821 /*
    822  * mmu_init_large_pages is called with the desired ism_pagesize parameter.
    823  * It may be called from set_platform_defaults, if some value other than 4M
    824  * is desired.  mmu_ism_pagesize is the tunable.  If it has a bad value,
    825  * then only warn, since it would be bad form to panic due to a user typo.
    826  *
    827  * The function re-initializes the mmu_disable_ism_large_pages variable.
    828  */
    829 void
    830 mmu_init_large_pages(size_t ism_pagesize)
    831 {
    832 
    833 	switch (ism_pagesize) {
    834 	case MMU_PAGESIZE4M:
    835 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
    836 		    (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
    837 		mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
    838 		    (1 << TTE512K) | (1 << TTE32M) | (1 << TTE256M));
    839 		shctx_cext = TAGACCEXT_MKSZPAIR(TTE4M, TTE8K);
    840 		break;
    841 	case MMU_PAGESIZE32M:
    842 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
    843 		    (1 << TTE512K) | (1 << TTE256M));
    844 		mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
    845 		    (1 << TTE512K) | (1 << TTE4M) | (1 << TTE256M));
    846 		adjust_data_maxlpsize(ism_pagesize);
    847 		shctx_cext = TAGACCEXT_MKSZPAIR(TTE32M, TTE8K);
    848 		break;
    849 	case MMU_PAGESIZE256M:
    850 		mmu_disable_ism_large_pages = ((1 << TTE64K) |
    851 		    (1 << TTE512K) | (1 << TTE32M));
    852 		mmu_disable_auto_data_large_pages = ((1 << TTE64K) |
    853 		    (1 << TTE512K) | (1 << TTE4M) | (1 << TTE32M));
    854 		adjust_data_maxlpsize(ism_pagesize);
    855 		shctx_cext = TAGACCEXT_MKSZPAIR(TTE256M, TTE8K);
    856 		break;
    857 	default:
    858 		cmn_err(CE_WARN, "Unrecognized mmu_ism_pagesize value 0x%lx",
    859 		    ism_pagesize);
    860 		break;
    861 	}
    862 }
    863 
    864 /*
    865  * Function to reprogram the TLBs when page sizes used
    866  * by a process change significantly.
    867  */
    868 static void
    869 mmu_setup_page_sizes(struct hat *hat, uint64_t *ttecnt, uint8_t *tmp_pgsz)
    870 {
    871 	uint8_t pgsz0, pgsz1;
    872 
    873 	/*
    874 	 * Don't program 2nd dtlb for kernel and ism hat
    875 	 */
    876 	ASSERT(hat->sfmmu_ismhat == NULL);
    877 	ASSERT(hat != ksfmmup);
    878 
    879 	/*
    880 	 * hat->sfmmu_pgsz[] is an array whose elements
    881 	 * contain a sorted order of page sizes.  Element
    882 	 * 0 is the most commonly used page size, followed
    883 	 * by element 1, and so on.
    884 	 *
    885 	 * ttecnt[] is an array of per-page-size page counts
    886 	 * mapped into the process.
    887 	 *
    888 	 * If the HAT's choice for page sizes is unsuitable,
    889 	 * we can override it here.  The new values written
    890 	 * to the array will be handed back to us later to
    891 	 * do the actual programming of the TLB hardware.
    892 	 *
    893 	 */
    894 	pgsz0 = (uint8_t)MIN(tmp_pgsz[0], tmp_pgsz[1]);
    895 	pgsz1 = (uint8_t)MAX(tmp_pgsz[0], tmp_pgsz[1]);
    896 
    897 	/*
    898 	 * This implements PAGESIZE programming of the sTLB
    899 	 * if large TTE counts don't exceed the thresholds.
    900 	 */
    901 	if (ttecnt[pgsz0] < ttecnt_threshold[pgsz0])
    902 		pgsz0 = page_szc(MMU_PAGESIZE);
    903 	if (ttecnt[pgsz1] < ttecnt_threshold[pgsz1])
    904 		pgsz1 = page_szc(MMU_PAGESIZE);
    905 	tmp_pgsz[0] = pgsz0;
    906 	tmp_pgsz[1] = pgsz1;
    907 	/* otherwise, accept what the HAT chose for us */
    908 }
    909 
    910 /*
    911  * The HAT calls this function when an MMU context is allocated so that we
    912  * can reprogram the large TLBs appropriately for the new process using
    913  * the context.
    914  *
    915  * The caller must hold the HAT lock.
    916  */
    917 void
    918 mmu_set_ctx_page_sizes(struct hat *hat)
    919 {
    920 	uint8_t pgsz0, pgsz1;
    921 	uint8_t new_cext;
    922 
    923 	ASSERT(sfmmu_hat_lock_held(hat));
    924 	/*
    925 	 * Don't program 2nd dtlb for kernel and ism hat
    926 	 */
    927 	if (hat->sfmmu_ismhat || hat == ksfmmup)
    928 		return;
    929 
    930 	/*
    931 	 * If supported, reprogram the TLBs to a larger pagesize.
    932 	 */
    933 	if (hat->sfmmu_scdp != NULL) {
    934 		new_cext = hat->sfmmu_scdp->scd_sfmmup->sfmmu_cext;
    935 		ASSERT(new_cext == shctx_cext);
    936 	} else {
    937 		pgsz0 = hat->sfmmu_pgsz[0];
    938 		pgsz1 = hat->sfmmu_pgsz[1];
    939 		ASSERT(pgsz0 < mmu_page_sizes);
    940 		ASSERT(pgsz1 < mmu_page_sizes);
    941 		new_cext = TAGACCEXT_MKSZPAIR(pgsz1, pgsz0);
    942 	}
    943 	if (hat->sfmmu_cext != new_cext) {
    944 #ifdef DEBUG
    945 		int i;
    946 		/*
    947 		 * assert cnum should be invalid, this is because pagesize
    948 		 * can only be changed after a proc's ctxs are invalidated.
    949 		 */
    950 		for (i = 0; i < max_mmu_ctxdoms; i++) {
    951 			ASSERT(hat->sfmmu_ctxs[i].cnum == INVALID_CONTEXT);
    952 		}
    953 #endif /* DEBUG */
    954 		hat->sfmmu_cext = new_cext;
    955 	}
    956 	/*
    957 	 * sfmmu_setctx_sec() will take care of the
    958 	 * rest of the dirty work for us.
    959 	 */
    960 }
    961 
    962 /*
    963  * This function assumes that there are either four or six supported page
    964  * sizes and at most two programmable TLBs, so we need to decide which
    965  * page sizes are most important and then adjust the TLB page sizes
    966  * accordingly (if supported).
    967  *
    968  * If these assumptions change, this function will need to be
    969  * updated to support whatever the new limits are.
    970  */
    971 void
    972 mmu_check_page_sizes(sfmmu_t *sfmmup, uint64_t *ttecnt)
    973 {
    974 	uint64_t sortcnt[MMU_PAGE_SIZES];
    975 	uint8_t tmp_pgsz[MMU_PAGE_SIZES];
    976 	uint8_t i, j, max;
    977 	uint16_t oldval, newval;
    978 
    979 	/*
    980 	 * We only consider reprogramming the TLBs if one or more of
    981 	 * the two most used page sizes changes and we're using
    982 	 * large pages in this process.
    983 	 */
    984 	if (SFMMU_LGPGS_INUSE(sfmmup)) {
    985 		/* Sort page sizes. */
    986 		for (i = 0; i < mmu_page_sizes; i++) {
    987 			sortcnt[i] = ttecnt[i];
    988 		}
    989 		for (j = 0; j < mmu_page_sizes; j++) {
    990 			for (i = mmu_page_sizes - 1, max = 0; i > 0; i--) {
    991 				if (sortcnt[i] > sortcnt[max])
    992 					max = i;
    993 			}
    994 			tmp_pgsz[j] = max;
    995 			sortcnt[max] = 0;
    996 		}
    997 
    998 		oldval = sfmmup->sfmmu_pgsz[0] << 8 | sfmmup->sfmmu_pgsz[1];
    999 
   1000 		mmu_setup_page_sizes(sfmmup, ttecnt, tmp_pgsz);
   1001 
   1002 		/* Check 2 largest values after the sort. */
   1003 		newval = tmp_pgsz[0] << 8 | tmp_pgsz[1];
   1004 		if (newval != oldval) {
   1005 			sfmmu_reprog_pgsz_arr(sfmmup, tmp_pgsz);
   1006 		}
   1007 	}
   1008 }
   1009 
   1010 /*
   1011  * Return processor specific async error structure
   1012  * size used.
   1013  */
   1014 int
   1015 cpu_aflt_size(void)
   1016 {
   1017 	return (sizeof (opl_async_flt_t));
   1018 }
   1019 
   1020 /*
   1021  * The cpu_sync_log_err() function is called via the [uc]e_drain() function to
   1022  * post-process CPU events that are dequeued.  As such, it can be invoked
   1023  * from softint context, from AST processing in the trap() flow, or from the
   1024  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
   1025  * Historically this entry point was used to log the actual cmn_err(9F) text;
   1026  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
   1027  * With FMA this function now also returns a flag which indicates to the
   1028  * caller whether the ereport should be posted (1) or suppressed (0).
   1029  */
   1030 /*ARGSUSED*/
   1031 static int
   1032 cpu_sync_log_err(void *flt)
   1033 {
   1034 	opl_async_flt_t *opl_flt = (opl_async_flt_t *)flt;
   1035 	struct async_flt *aflt = (struct async_flt *)flt;
   1036 
   1037 	/*
   1038 	 * No extra processing of urgent error events.
   1039 	 * Always generate ereports for these events.
   1040 	 */
   1041 	if (aflt->flt_status == OPL_ECC_URGENT_TRAP)
   1042 		return (1);
   1043 
   1044 	/*
   1045 	 * Additional processing for synchronous errors.
   1046 	 */
   1047 	switch (opl_flt->flt_type) {
   1048 	case OPL_CPU_INV_SFSR:
   1049 		return (1);
   1050 
   1051 	case OPL_CPU_SYNC_UE:
   1052 		/*
   1053 		 * The validity: SFSR_MK_UE bit has been checked
   1054 		 * in opl_cpu_sync_error()
   1055 		 * No more check is required.
   1056 		 *
   1057 		 * opl_flt->flt_eid_mod and flt_eid_sid have been set by H/W,
   1058 		 * and they have been retrieved in cpu_queue_events()
   1059 		 */
   1060 
   1061 		if (opl_flt->flt_eid_mod == OPL_ERRID_MEM) {
   1062 			ASSERT(aflt->flt_in_memory);
   1063 			/*
   1064 			 * We want to skip logging only if ALL the following
   1065 			 * conditions are true:
   1066 			 *
   1067 			 *	1. We are not panicing already.
   1068 			 *	2. The error is a memory error.
   1069 			 *	3. There is only one error.
   1070 			 *	4. The error is on a retired page.
   1071 			 *	5. The error occurred under on_trap
   1072 			 *	protection AFLT_PROT_EC
   1073 			 */
   1074 			if (!panicstr && aflt->flt_prot == AFLT_PROT_EC &&
   1075 			    page_retire_check(aflt->flt_addr, NULL) == 0) {
   1076 				/*
   1077 				 * Do not log an error from
   1078 				 * the retired page
   1079 				 */
   1080 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
   1081 				return (0);
   1082 			}
   1083 			if (!panicstr)
   1084 				cpu_page_retire(opl_flt);
   1085 		}
   1086 		return (1);
   1087 
   1088 	case OPL_CPU_SYNC_OTHERS:
   1089 		/*
   1090 		 * For the following error cases, the processor HW does
   1091 		 * not set the flt_eid_mod/flt_eid_sid. Instead, SW will attempt
   1092 		 * to assign appropriate values here to reflect what we
   1093 		 * think is the most likely cause of the problem w.r.t to
   1094 		 * the particular error event.  For Buserr and timeout
   1095 		 * error event, we will assign OPL_ERRID_CHANNEL as the
   1096 		 * most likely reason.  For TLB parity or multiple hit
   1097 		 * error events, we will assign the reason as
   1098 		 * OPL_ERRID_CPU (cpu related problem) and set the
   1099 		 * flt_eid_sid to point to the cpuid.
   1100 		 */
   1101 
   1102 		if (opl_flt->flt_bit & (SFSR_BERR|SFSR_TO)) {
   1103 			/*
   1104 			 * flt_eid_sid will not be used for this case.
   1105 			 */
   1106 			opl_flt->flt_eid_mod = OPL_ERRID_CHANNEL;
   1107 		}
   1108 		if (opl_flt->flt_bit & (SFSR_TLB_MUL|SFSR_TLB_PRT)) {
   1109 			opl_flt->flt_eid_mod = OPL_ERRID_CPU;
   1110 			opl_flt->flt_eid_sid = aflt->flt_inst;
   1111 		}
   1112 
   1113 		/*
   1114 		 * In case of no effective error bit
   1115 		 */
   1116 		if ((opl_flt->flt_bit & SFSR_ERRS) == 0) {
   1117 			opl_flt->flt_eid_mod = OPL_ERRID_CPU;
   1118 			opl_flt->flt_eid_sid = aflt->flt_inst;
   1119 		}
   1120 		break;
   1121 
   1122 		default:
   1123 			return (1);
   1124 	}
   1125 	return (1);
   1126 }
   1127 
   1128 /*
   1129  * Retire the bad page that may contain the flushed error.
   1130  */
   1131 void
   1132 cpu_page_retire(opl_async_flt_t *opl_flt)
   1133 {
   1134 	struct async_flt *aflt = (struct async_flt *)opl_flt;
   1135 	(void) page_retire(aflt->flt_addr, PR_UE);
   1136 }
   1137 
   1138 /*
   1139  * Invoked by error_init() early in startup and therefore before
   1140  * startup_errorq() is called to drain any error Q -
   1141  *
   1142  * startup()
   1143  *   startup_end()
   1144  *     error_init()
   1145  *       cpu_error_init()
   1146  * errorq_init()
   1147  *   errorq_drain()
   1148  * start_other_cpus()
   1149  *
   1150  * The purpose of this routine is to create error-related taskqs.  Taskqs
   1151  * are used for this purpose because cpu_lock can't be grabbed from interrupt
   1152  * context.
   1153  *
   1154  */
   1155 /*ARGSUSED*/
   1156 void
   1157 cpu_error_init(int items)
   1158 {
   1159 	opl_err_log = (opl_errlog_t *)
   1160 	    kmem_alloc(ERRLOG_ALLOC_SZ, KM_SLEEP);
   1161 	if ((uint64_t)opl_err_log & MMU_PAGEOFFSET)
   1162 		cmn_err(CE_PANIC, "The base address of the error log "
   1163 		    "is not page aligned");
   1164 }
   1165 
   1166 /*
   1167  * We route all errors through a single switch statement.
   1168  */
   1169 void
   1170 cpu_ue_log_err(struct async_flt *aflt)
   1171 {
   1172 	switch (aflt->flt_class) {
   1173 	case CPU_FAULT:
   1174 		if (cpu_sync_log_err(aflt))
   1175 			cpu_ereport_post(aflt);
   1176 		break;
   1177 
   1178 	case BUS_FAULT:
   1179 		bus_async_log_err(aflt);
   1180 		break;
   1181 
   1182 	default:
   1183 		cmn_err(CE_WARN, "discarding async error %p with invalid "
   1184 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
   1185 		return;
   1186 	}
   1187 }
   1188 
   1189 /*
   1190  * Routine for panic hook callback from panic_idle().
   1191  *
   1192  * Nothing to do here.
   1193  */
   1194 void
   1195 cpu_async_panic_callb(void)
   1196 {
   1197 }
   1198 
   1199 /*
   1200  * Routine to return a string identifying the physical name
   1201  * associated with a memory/cache error.
   1202  */
   1203 /*ARGSUSED*/
   1204 int
   1205 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
   1206     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
   1207     ushort_t flt_status, char *buf, int buflen, int *lenp)
   1208 {
   1209 	int synd_code;
   1210 	int ret;
   1211 
   1212 	/*
   1213 	 * An AFSR of -1 defaults to a memory syndrome.
   1214 	 */
   1215 	synd_code = (int)flt_synd;
   1216 
   1217 	if (&plat_get_mem_unum) {
   1218 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
   1219 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
   1220 			buf[0] = '\0';
   1221 			*lenp = 0;
   1222 		}
   1223 		return (ret);
   1224 	}
   1225 	buf[0] = '\0';
   1226 	*lenp = 0;
   1227 	return (ENOTSUP);
   1228 }
   1229 
   1230 /*
   1231  * Wrapper for cpu_get_mem_unum() routine that takes an
   1232  * async_flt struct rather than explicit arguments.
   1233  */
   1234 int
   1235 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
   1236     char *buf, int buflen, int *lenp)
   1237 {
   1238 	/*
   1239 	 * We always pass -1 so that cpu_get_mem_unum will interpret this as a
   1240 	 * memory error.
   1241 	 */
   1242 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
   1243 	    (uint64_t)-1,
   1244 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
   1245 	    aflt->flt_status, buf, buflen, lenp));
   1246 }
   1247 
   1248 /*
   1249  * This routine is a more generic interface to cpu_get_mem_unum()
   1250  * that may be used by other modules (e.g. mm).
   1251  */
   1252 /*ARGSUSED*/
   1253 int
   1254 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
   1255     char *buf, int buflen, int *lenp)
   1256 {
   1257 	int synd_status, flt_in_memory, ret;
   1258 	ushort_t flt_status = 0;
   1259 	char unum[UNUM_NAMLEN];
   1260 
   1261 	/*
   1262 	 * Check for an invalid address.
   1263 	 */
   1264 	if (afar == (uint64_t)-1)
   1265 		return (ENXIO);
   1266 
   1267 	if (synd == (uint64_t)-1)
   1268 		synd_status = AFLT_STAT_INVALID;
   1269 	else
   1270 		synd_status = AFLT_STAT_VALID;
   1271 
   1272 	flt_in_memory = (*afsr & SFSR_MEMORY) &&
   1273 	    pf_is_memory(afar >> MMU_PAGESHIFT);
   1274 
   1275 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
   1276 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
   1277 	if (ret != 0)
   1278 		return (ret);
   1279 
   1280 	if (*lenp >= buflen)
   1281 		return (ENAMETOOLONG);
   1282 
   1283 	(void) strncpy(buf, unum, buflen);
   1284 
   1285 	return (0);
   1286 }
   1287 
   1288 /*
   1289  * Routine to return memory information associated
   1290  * with a physical address and syndrome.
   1291  */
   1292 /*ARGSUSED*/
   1293 int
   1294 cpu_get_mem_info(uint64_t synd, uint64_t afar,
   1295     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
   1296     int *segsp, int *banksp, int *mcidp)
   1297 {
   1298 	int synd_code = (int)synd;
   1299 
   1300 	if (afar == (uint64_t)-1)
   1301 		return (ENXIO);
   1302 
   1303 	if (p2get_mem_info != NULL)
   1304 		return ((p2get_mem_info)(synd_code, afar, mem_sizep, seg_sizep,
   1305 		    bank_sizep, segsp, banksp, mcidp));
   1306 	else
   1307 		return (ENOTSUP);
   1308 }
   1309 
   1310 /*
   1311  * Routine to return a string identifying the physical
   1312  * name associated with a cpuid.
   1313  */
   1314 int
   1315 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
   1316 {
   1317 	int ret;
   1318 	char unum[UNUM_NAMLEN];
   1319 
   1320 	if (&plat_get_cpu_unum) {
   1321 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN,
   1322 		    lenp)) != 0)
   1323 			return (ret);
   1324 	} else {
   1325 		return (ENOTSUP);
   1326 	}
   1327 
   1328 	if (*lenp >= buflen)
   1329 		return (ENAMETOOLONG);
   1330 
   1331 	(void) strncpy(buf, unum, *lenp);
   1332 
   1333 	return (0);
   1334 }
   1335 
   1336 /*
   1337  * This routine exports the name buffer size.
   1338  */
   1339 size_t
   1340 cpu_get_name_bufsize()
   1341 {
   1342 	return (UNUM_NAMLEN);
   1343 }
   1344 
   1345 /*
   1346  * Flush the entire ecache by ASI_L2_CNTL.U2_FLUSH
   1347  */
   1348 void
   1349 cpu_flush_ecache(void)
   1350 {
   1351 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
   1352 	    cpunodes[CPU->cpu_id].ecache_linesize);
   1353 }
   1354 
   1355 static uint8_t
   1356 flt_to_trap_type(struct async_flt *aflt)
   1357 {
   1358 	if (aflt->flt_status & OPL_ECC_ISYNC_TRAP)
   1359 		return (TRAP_TYPE_ECC_I);
   1360 	if (aflt->flt_status & OPL_ECC_DSYNC_TRAP)
   1361 		return (TRAP_TYPE_ECC_D);
   1362 	if (aflt->flt_status & OPL_ECC_URGENT_TRAP)
   1363 		return (TRAP_TYPE_URGENT);
   1364 	return (TRAP_TYPE_UNKNOWN);
   1365 }
   1366 
   1367 /*
   1368  * Encode the data saved in the opl_async_flt_t struct into
   1369  * the FM ereport payload.
   1370  */
   1371 /* ARGSUSED */
   1372 static void
   1373 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
   1374 		nvlist_t *resource)
   1375 {
   1376 	opl_async_flt_t *opl_flt = (opl_async_flt_t *)aflt;
   1377 	char unum[UNUM_NAMLEN];
   1378 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
   1379 	int len;
   1380 
   1381 
   1382 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFSR) {
   1383 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFSR,
   1384 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
   1385 	}
   1386 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SFAR) {
   1387 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SFAR,
   1388 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
   1389 	}
   1390 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_UGESR) {
   1391 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_UGESR,
   1392 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
   1393 	}
   1394 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
   1395 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
   1396 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
   1397 	}
   1398 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
   1399 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
   1400 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
   1401 	}
   1402 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
   1403 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
   1404 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
   1405 	}
   1406 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
   1407 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
   1408 		    DATA_TYPE_BOOLEAN_VALUE,
   1409 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
   1410 	}
   1411 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_FLT_STATUS) {
   1412 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_FLT_STATUS,
   1413 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_status, NULL);
   1414 	}
   1415 
   1416 	switch (opl_flt->flt_eid_mod) {
   1417 	case OPL_ERRID_CPU:
   1418 		(void) snprintf(sbuf, sizeof (sbuf), "%llX",
   1419 		    (u_longlong_t)cpunodes[opl_flt->flt_eid_sid].device_id);
   1420 		(void) fm_fmri_cpu_set(resource, FM_CPU_SCHEME_VERSION,
   1421 		    NULL, opl_flt->flt_eid_sid,
   1422 		    (uint8_t *)&cpunodes[opl_flt->flt_eid_sid].version, sbuf);
   1423 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE,
   1424 		    DATA_TYPE_NVLIST, resource, NULL);
   1425 		break;
   1426 
   1427 	case OPL_ERRID_CHANNEL:
   1428 		/*
   1429 		 * No resource is created but the cpumem DE will find
   1430 		 * the defective path by retreiving EID from SFSR which is
   1431 		 * included in the payload.
   1432 		 */
   1433 		break;
   1434 
   1435 	case OPL_ERRID_MEM:
   1436 		(void) cpu_get_mem_unum_aflt(0, aflt, unum, UNUM_NAMLEN, &len);
   1437 		(void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION, NULL,
   1438 		    unum, NULL, (uint64_t)-1);
   1439 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_RESOURCE,
   1440 		    DATA_TYPE_NVLIST, resource, NULL);
   1441 		break;
   1442 
   1443 	case OPL_ERRID_PATH:
   1444 		/*
   1445 		 * No resource is created but the cpumem DE will find
   1446 		 * the defective path by retreiving EID from SFSR which is
   1447 		 * included in the payload.
   1448 		 */
   1449 		break;
   1450 	}
   1451 }
   1452 
   1453 /*
   1454  * Returns whether fault address is valid for this error bit and
   1455  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
   1456  */
   1457 /*ARGSUSED*/
   1458 static int
   1459 cpu_flt_in_memory(opl_async_flt_t *opl_flt, uint64_t t_afsr_bit)
   1460 {
   1461 	struct async_flt *aflt = (struct async_flt *)opl_flt;
   1462 
   1463 	if (aflt->flt_status & (OPL_ECC_SYNC_TRAP)) {
   1464 		return ((t_afsr_bit & SFSR_MEMORY) &&
   1465 		    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
   1466 	}
   1467 	return (0);
   1468 }
   1469 
   1470 /*
   1471  * In OPL SCF does the stick synchronization.
   1472  */
   1473 void
   1474 sticksync_slave(void)
   1475 {
   1476 }
   1477 
   1478 /*
   1479  * In OPL SCF does the stick synchronization.
   1480  */
   1481 void
   1482 sticksync_master(void)
   1483 {
   1484 }
   1485 
   1486 /*
   1487  * Cpu private unitialization.  OPL cpus do not use the private area.
   1488  */
   1489 void
   1490 cpu_uninit_private(struct cpu *cp)
   1491 {
   1492 	cmp_delete_cpu(cp->cpu_id);
   1493 }
   1494 
   1495 /*
   1496  * Always flush an entire cache.
   1497  */
   1498 void
   1499 cpu_error_ecache_flush(void)
   1500 {
   1501 	cpu_flush_ecache();
   1502 }
   1503 
   1504 void
   1505 cpu_ereport_post(struct async_flt *aflt)
   1506 {
   1507 	char *cpu_type, buf[FM_MAX_CLASS];
   1508 	nv_alloc_t *nva = NULL;
   1509 	nvlist_t *ereport, *detector, *resource;
   1510 	errorq_elem_t *eqep;
   1511 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
   1512 
   1513 	if (aflt->flt_panic || panicstr) {
   1514 		eqep = errorq_reserve(ereport_errorq);
   1515 		if (eqep == NULL)
   1516 			return;
   1517 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
   1518 		nva = errorq_elem_nva(ereport_errorq, eqep);
   1519 	} else {
   1520 		ereport = fm_nvlist_create(nva);
   1521 	}
   1522 
   1523 	/*
   1524 	 * Create the scheme "cpu" FMRI.
   1525 	 */
   1526 	detector = fm_nvlist_create(nva);
   1527 	resource = fm_nvlist_create(nva);
   1528 	switch (cpunodes[aflt->flt_inst].implementation) {
   1529 	case OLYMPUS_C_IMPL:
   1530 		cpu_type = FM_EREPORT_CPU_SPARC64_VI;
   1531 		break;
   1532 	case JUPITER_IMPL:
   1533 		cpu_type = FM_EREPORT_CPU_SPARC64_VII;
   1534 		break;
   1535 	default:
   1536 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
   1537 		break;
   1538 	}
   1539 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
   1540 	    (u_longlong_t)cpunodes[aflt->flt_inst].device_id);
   1541 	(void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL,
   1542 	    aflt->flt_inst, (uint8_t *)&cpunodes[aflt->flt_inst].version,
   1543 	    sbuf);
   1544 
   1545 	/*
   1546 	 * Encode all the common data into the ereport.
   1547 	 */
   1548 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
   1549 	    FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
   1550 
   1551 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
   1552 	    fm_ena_generate(aflt->flt_id, FM_ENA_FMT1), detector, NULL);
   1553 
   1554 	/*
   1555 	 * Encode the error specific data that was saved in
   1556 	 * the async_flt structure into the ereport.
   1557 	 */
   1558 	cpu_payload_add_aflt(aflt, ereport, resource);
   1559 
   1560 	if (aflt->flt_panic || panicstr) {
   1561 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
   1562 	} else {
   1563 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
   1564 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
   1565 		fm_nvlist_destroy(detector, FM_NVA_FREE);
   1566 		fm_nvlist_destroy(resource, FM_NVA_FREE);
   1567 	}
   1568 }
   1569 
   1570 void
   1571 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
   1572 {
   1573 	int status;
   1574 	ddi_fm_error_t de;
   1575 
   1576 	bzero(&de, sizeof (ddi_fm_error_t));
   1577 
   1578 	de.fme_version = DDI_FME_VERSION;
   1579 	de.fme_ena = fm_ena_generate(aflt->flt_id, FM_ENA_FMT1);
   1580 	de.fme_flag = expected;
   1581 	de.fme_bus_specific = (void *)aflt->flt_addr;
   1582 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
   1583 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
   1584 		aflt->flt_panic = 1;
   1585 }
   1586 
   1587 void
   1588 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
   1589     errorq_t *eqp, uint_t flag)
   1590 {
   1591 	struct async_flt *aflt = (struct async_flt *)payload;
   1592 
   1593 	aflt->flt_erpt_class = error_class;
   1594 	errorq_dispatch(eqp, payload, payload_sz, flag);
   1595 }
   1596 
   1597 void
   1598 adjust_hw_copy_limits(int ecache_size)
   1599 {
   1600 	/*
   1601 	 * Set hw copy limits.
   1602 	 *
   1603 	 * /etc/system will be parsed later and can override one or more
   1604 	 * of these settings.
   1605 	 *
   1606 	 * At this time, ecache size seems only mildly relevant.
   1607 	 * We seem to run into issues with the d-cache and stalls
   1608 	 * we see on misses.
   1609 	 *
   1610 	 * Cycle measurement indicates that 2 byte aligned copies fare
   1611 	 * little better than doing things with VIS at around 512 bytes.
   1612 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
   1613 	 * aligned is faster whenever the source and destination data
   1614 	 * in cache and the total size is less than 2 Kbytes.  The 2K
   1615 	 * limit seems to be driven by the 2K write cache.
   1616 	 * When more than 2K of copies are done in non-VIS mode, stores
   1617 	 * backup in the write cache.  In VIS mode, the write cache is
   1618 	 * bypassed, allowing faster cache-line writes aligned on cache
   1619 	 * boundaries.
   1620 	 *
   1621 	 * In addition, in non-VIS mode, there is no prefetching, so
   1622 	 * for larger copies, the advantage of prefetching to avoid even
   1623 	 * occasional cache misses is enough to justify using the VIS code.
   1624 	 *
   1625 	 * During testing, it was discovered that netbench ran 3% slower
   1626 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
   1627 	 * applications, data is only used once (copied to the output
   1628 	 * buffer, then copied by the network device off the system).  Using
   1629 	 * the VIS copy saves more L2 cache state.  Network copies are
   1630 	 * around 1.3K to 1.5K in size for historical reasons.
   1631 	 *
   1632 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
   1633 	 * aligned copy even for large caches and 8 MB ecache.  The
   1634 	 * infrastructure to allow different limits for different sized
   1635 	 * caches is kept to allow further tuning in later releases.
   1636 	 */
   1637 
   1638 	if (min_ecache_size == 0 && use_hw_bcopy) {
   1639 		/*
   1640 		 * First time through - should be before /etc/system
   1641 		 * is read.
   1642 		 * Could skip the checks for zero but this lets us
   1643 		 * preserve any debugger rewrites.
   1644 		 */
   1645 		if (hw_copy_limit_1 == 0) {
   1646 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
   1647 			priv_hcl_1 = hw_copy_limit_1;
   1648 		}
   1649 		if (hw_copy_limit_2 == 0) {
   1650 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
   1651 			priv_hcl_2 = hw_copy_limit_2;
   1652 		}
   1653 		if (hw_copy_limit_4 == 0) {
   1654 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
   1655 			priv_hcl_4 = hw_copy_limit_4;
   1656 		}
   1657 		if (hw_copy_limit_8 == 0) {
   1658 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
   1659 			priv_hcl_8 = hw_copy_limit_8;
   1660 		}
   1661 		min_ecache_size = ecache_size;
   1662 	} else {
   1663 		/*
   1664 		 * MP initialization. Called *after* /etc/system has
   1665 		 * been parsed. One CPU has already been initialized.
   1666 		 * Need to cater for /etc/system having scragged one
   1667 		 * of our values.
   1668 		 */
   1669 		if (ecache_size == min_ecache_size) {
   1670 			/*
   1671 			 * Same size ecache. We do nothing unless we
   1672 			 * have a pessimistic ecache setting. In that
   1673 			 * case we become more optimistic (if the cache is
   1674 			 * large enough).
   1675 			 */
   1676 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
   1677 				/*
   1678 				 * Need to adjust hw_copy_limit* from our
   1679 				 * pessimistic uniprocessor value to a more
   1680 				 * optimistic UP value *iff* it hasn't been
   1681 				 * reset.
   1682 				 */
   1683 				if ((ecache_size > 1048576) &&
   1684 				    (priv_hcl_8 == hw_copy_limit_8)) {
   1685 					if (ecache_size <= 2097152)
   1686 						hw_copy_limit_8 = 4 *
   1687 						    VIS_COPY_THRESHOLD;
   1688 					else if (ecache_size <= 4194304)
   1689 						hw_copy_limit_8 = 4 *
   1690 						    VIS_COPY_THRESHOLD;
   1691 					else
   1692 						hw_copy_limit_8 = 4 *
   1693 						    VIS_COPY_THRESHOLD;
   1694 					priv_hcl_8 = hw_copy_limit_8;
   1695 				}
   1696 			}
   1697 		} else if (ecache_size < min_ecache_size) {
   1698 			/*
   1699 			 * A different ecache size. Can this even happen?
   1700 			 */
   1701 			if (priv_hcl_8 == hw_copy_limit_8) {
   1702 				/*
   1703 				 * The previous value that we set
   1704 				 * is unchanged (i.e., it hasn't been
   1705 				 * scragged by /etc/system). Rewrite it.
   1706 				 */
   1707 				if (ecache_size <= 1048576)
   1708 					hw_copy_limit_8 = 8 *
   1709 					    VIS_COPY_THRESHOLD;
   1710 				else if (ecache_size <= 2097152)
   1711 					hw_copy_limit_8 = 8 *
   1712 					    VIS_COPY_THRESHOLD;
   1713 				else if (ecache_size <= 4194304)
   1714 					hw_copy_limit_8 = 8 *
   1715 					    VIS_COPY_THRESHOLD;
   1716 				else
   1717 					hw_copy_limit_8 = 10 *
   1718 					    VIS_COPY_THRESHOLD;
   1719 				priv_hcl_8 = hw_copy_limit_8;
   1720 				min_ecache_size = ecache_size;
   1721 			}
   1722 		}
   1723 	}
   1724 }
   1725 
   1726 #define	VIS_BLOCKSIZE		64
   1727 
   1728 int
   1729 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
   1730 {
   1731 	int ret, watched;
   1732 
   1733 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
   1734 	ret = dtrace_blksuword32(addr, data, 0);
   1735 	if (watched)
   1736 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
   1737 
   1738 	return (ret);
   1739 }
   1740 
   1741 void
   1742 opl_cpu_reg_init()
   1743 {
   1744 	uint64_t	this_cpu_log;
   1745 
   1746 	if (cpu[getprocessorid()] == &cpu0 && opl_cpu0_log_setup == 1) {
   1747 		/*
   1748 		 * Support for "ta 3"
   1749 		 */
   1750 		opl_ta3();
   1751 
   1752 		/*
   1753 		 * If we are being called at boot time on cpu0 the error
   1754 		 * log is already set up in cpu_setup. Clear the
   1755 		 * opl_cpu0_log_setup flag so that a subsequent DR of cpu0 will
   1756 		 * do the proper initialization.
   1757 		 */
   1758 		opl_cpu0_log_setup = 0;
   1759 		return;
   1760 	}
   1761 
   1762 	/*
   1763 	 * Initialize Error log Scratch register for error handling.
   1764 	 */
   1765 
   1766 	this_cpu_log = va_to_pa((void*)(((uint64_t)opl_err_log) +
   1767 	    ERRLOG_BUFSZ * (getprocessorid())));
   1768 	opl_error_setup(this_cpu_log);
   1769 }
   1770 
   1771 /*
   1772  * Queue one event in ue_queue based on ecc_type_to_info entry.
   1773  */
   1774 static void
   1775 cpu_queue_one_event(opl_async_flt_t *opl_flt, char *reason,
   1776     ecc_type_to_info_t *eccp)
   1777 {
   1778 	struct async_flt *aflt = (struct async_flt *)opl_flt;
   1779 
   1780 	if (reason &&
   1781 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
   1782 		(void) strcat(reason, eccp->ec_reason);
   1783 	}
   1784 
   1785 	opl_flt->flt_bit = eccp->ec_afsr_bit;
   1786 	opl_flt->flt_type = eccp->ec_flt_type;
   1787 	aflt->flt_in_memory = cpu_flt_in_memory(opl_flt, opl_flt->flt_bit);
   1788 	aflt->flt_payload = eccp->ec_err_payload;
   1789 
   1790 	ASSERT(aflt->flt_status & (OPL_ECC_SYNC_TRAP|OPL_ECC_URGENT_TRAP));
   1791 	cpu_errorq_dispatch(eccp->ec_err_class, (void *)opl_flt,
   1792 	    sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic);
   1793 }
   1794 
   1795 /*
   1796  * Queue events on async event queue one event per error bit.
   1797  * Return number of events queued.
   1798  */
   1799 int
   1800 cpu_queue_events(opl_async_flt_t *opl_flt, char *reason, uint64_t t_afsr_errs)
   1801 {
   1802 	struct async_flt *aflt = (struct async_flt *)opl_flt;
   1803 	ecc_type_to_info_t *eccp;
   1804 	int nevents = 0;
   1805 
   1806 	/*
   1807 	 * Queue expected errors, error bit and fault type must must match
   1808 	 * in the ecc_type_to_info table.
   1809 	 */
   1810 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
   1811 	    eccp++) {
   1812 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
   1813 		    (eccp->ec_flags & aflt->flt_status) != 0) {
   1814 			/*
   1815 			 * UE error event can be further
   1816 			 * classified/breakdown into finer granularity
   1817 			 * based on the flt_eid_mod value set by HW.  We do
   1818 			 * special handling here so that we can report UE
   1819 			 * error in finer granularity as ue_mem,
   1820 			 * ue_channel, ue_cpu or ue_path.
   1821 			 */
   1822 			if (eccp->ec_flt_type == OPL_CPU_SYNC_UE) {
   1823 				opl_flt->flt_eid_mod = (aflt->flt_stat &
   1824 				    SFSR_EID_MOD) >> SFSR_EID_MOD_SHIFT;
   1825 				opl_flt->flt_eid_sid = (aflt->flt_stat &
   1826 				    SFSR_EID_SID) >> SFSR_EID_SID_SHIFT;
   1827 				/*
   1828 				 * Need to advance eccp pointer by flt_eid_mod
   1829 				 * so that we get an appropriate ecc pointer
   1830 				 *
   1831 				 * EID			# of advances
   1832 				 * ----------------------------------
   1833 				 * OPL_ERRID_MEM	0
   1834 				 * OPL_ERRID_CHANNEL	1
   1835 				 * OPL_ERRID_CPU	2
   1836 				 * OPL_ERRID_PATH	3
   1837 				 */
   1838 				eccp += opl_flt->flt_eid_mod;
   1839 			}
   1840 			cpu_queue_one_event(opl_flt, reason, eccp);
   1841 			t_afsr_errs &= ~eccp->ec_afsr_bit;
   1842 			nevents++;
   1843 		}
   1844 	}
   1845 
   1846 	return (nevents);
   1847 }
   1848 
   1849 /*
   1850  * Sync. error wrapper functions.
   1851  * We use these functions in order to transfer here from the
   1852  * nucleus trap handler information about trap type (data or
   1853  * instruction) and trap level (0 or above 0). This way we
   1854  * get rid of using SFSR's reserved bits.
   1855  */
   1856 
   1857 #define	OPL_SYNC_TL0	0
   1858 #define	OPL_SYNC_TL1	1
   1859 #define	OPL_ISYNC_ERR	0
   1860 #define	OPL_DSYNC_ERR	1
   1861 
   1862 void
   1863 opl_cpu_isync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
   1864 {
   1865 	uint64_t t_sfar = p_sfar;
   1866 	uint64_t t_sfsr = p_sfsr;
   1867 
   1868 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
   1869 	    OPL_SYNC_TL0, OPL_ISYNC_ERR);
   1870 }
   1871 
   1872 void
   1873 opl_cpu_isync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
   1874 {
   1875 	uint64_t t_sfar = p_sfar;
   1876 	uint64_t t_sfsr = p_sfsr;
   1877 
   1878 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
   1879 	    OPL_SYNC_TL1, OPL_ISYNC_ERR);
   1880 }
   1881 
   1882 void
   1883 opl_cpu_dsync_tl0_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
   1884 {
   1885 	uint64_t t_sfar = p_sfar;
   1886 	uint64_t t_sfsr = p_sfsr;
   1887 
   1888 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
   1889 	    OPL_SYNC_TL0, OPL_DSYNC_ERR);
   1890 }
   1891 
   1892 void
   1893 opl_cpu_dsync_tl1_error(struct regs *rp, ulong_t p_sfar, ulong_t p_sfsr)
   1894 {
   1895 	uint64_t t_sfar = p_sfar;
   1896 	uint64_t t_sfsr = p_sfsr;
   1897 
   1898 	opl_cpu_sync_error(rp, t_sfar, t_sfsr,
   1899 	    OPL_SYNC_TL1, OPL_DSYNC_ERR);
   1900 }
   1901 
   1902 /*
   1903  * The fj sync err handler transfers control here for UE, BERR, TO, TLB_MUL
   1904  * and TLB_PRT.
   1905  * This function is designed based on cpu_deferred_error().
   1906  */
   1907 
   1908 static void
   1909 opl_cpu_sync_error(struct regs *rp, ulong_t t_sfar, ulong_t t_sfsr,
   1910     uint_t tl, uint_t derr)
   1911 {
   1912 	opl_async_flt_t opl_flt;
   1913 	struct async_flt *aflt;
   1914 	int trampolined = 0;
   1915 	char pr_reason[MAX_REASON_STRING];
   1916 	uint64_t log_sfsr;
   1917 	int expected = DDI_FM_ERR_UNEXPECTED;
   1918 	ddi_acc_hdl_t *hp;
   1919 
   1920 	/*
   1921 	 * We need to look at p_flag to determine if the thread detected an
   1922 	 * error while dumping core.  We can't grab p_lock here, but it's ok
   1923 	 * because we just need a consistent snapshot and we know that everyone
   1924 	 * else will store a consistent set of bits while holding p_lock.  We
   1925 	 * don't have to worry about a race because SDOCORE is set once prior
   1926 	 * to doing i/o from the process's address space and is never cleared.
   1927 	 */
   1928 	uint_t pflag = ttoproc(curthread)->p_flag;
   1929 
   1930 	pr_reason[0] = '\0';
   1931 
   1932 	/*
   1933 	 * handle the specific error
   1934 	 */
   1935 	bzero(&opl_flt, sizeof (opl_async_flt_t));
   1936 	aflt = (struct async_flt *)&opl_flt;
   1937 	aflt->flt_id = gethrtime_waitfree();
   1938 	aflt->flt_bus_id = getprocessorid();
   1939 	aflt->flt_inst = CPU->cpu_id;
   1940 	aflt->flt_stat = t_sfsr;
   1941 	aflt->flt_addr = t_sfar;
   1942 	aflt->flt_pc = (caddr_t)rp->r_pc;
   1943 	aflt->flt_prot = (uchar_t)AFLT_PROT_NONE;
   1944 	aflt->flt_class = (uchar_t)CPU_FAULT;
   1945 	aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate &
   1946 	    TSTATE_PRIV) ? 1 : 0));
   1947 	aflt->flt_tl = (uchar_t)tl;
   1948 	aflt->flt_panic = (uchar_t)(tl != 0 || aft_testfatal != 0 ||
   1949 	    (t_sfsr & (SFSR_TLB_MUL|SFSR_TLB_PRT)) != 0);
   1950 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
   1951 	aflt->flt_status = (derr) ? OPL_ECC_DSYNC_TRAP : OPL_ECC_ISYNC_TRAP;
   1952 
   1953 	/*
   1954 	 * If SFSR.FV is not set, both SFSR and SFAR/SFPAR values are uncertain.
   1955 	 * So, clear all error bits to avoid mis-handling and force the system
   1956 	 * panicked.
   1957 	 * We skip all the procedures below down to the panic message call.
   1958 	 */
   1959 	if (!(t_sfsr & SFSR_FV)) {
   1960 		opl_flt.flt_type = OPL_CPU_INV_SFSR;
   1961 		aflt->flt_panic = 1;
   1962 		aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC;
   1963 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt,
   1964 		    sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic);
   1965 		fm_panic("%sErrors(s)", "invalid SFSR");
   1966 	}
   1967 
   1968 	/*
   1969 	 * If either UE and MK bit is off, this is not valid UE error.
   1970 	 * If it is not valid UE error, clear UE & MK_UE bits to prevent
   1971 	 * mis-handling below.
   1972 	 * aflt->flt_stat keeps the original bits as a reference.
   1973 	 */
   1974 	if ((t_sfsr & (SFSR_MK_UE|SFSR_UE)) !=
   1975 	    (SFSR_MK_UE|SFSR_UE)) {
   1976 		t_sfsr &= ~(SFSR_MK_UE|SFSR_UE);
   1977 	}
   1978 
   1979 	/*
   1980 	 * If the trap occurred in privileged mode at TL=0, we need to check to
   1981 	 * see if we were executing in the kernel under on_trap() or t_lofault
   1982 	 * protection.  If so, modify the saved registers so that we return
   1983 	 * from the trap to the appropriate trampoline routine.
   1984 	 */
   1985 	if (!aflt->flt_panic && aflt->flt_priv && tl == 0) {
   1986 		if (curthread->t_ontrap != NULL) {
   1987 			on_trap_data_t *otp = curthread->t_ontrap;
   1988 
   1989 			if (otp->ot_prot & OT_DATA_EC) {
   1990 				aflt->flt_prot = (uchar_t)AFLT_PROT_EC;
   1991 				otp->ot_trap |= (ushort_t)OT_DATA_EC;
   1992 				rp->r_pc = otp->ot_trampoline;
   1993 				rp->r_npc = rp->r_pc + 4;
   1994 				trampolined = 1;
   1995 			}
   1996 
   1997 			if ((t_sfsr & (SFSR_TO | SFSR_BERR)) &&
   1998 			    (otp->ot_prot & OT_DATA_ACCESS)) {
   1999 				aflt->flt_prot = (uchar_t)AFLT_PROT_ACCESS;
   2000 				otp->ot_trap |= (ushort_t)OT_DATA_ACCESS;
   2001 				rp->r_pc = otp->ot_trampoline;
   2002 				rp->r_npc = rp->r_pc + 4;
   2003 				trampolined = 1;
   2004 				/*
   2005 				 * for peeks and caut_gets errors are expected
   2006 				 */
   2007 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
   2008 				if (!hp)
   2009 					expected = DDI_FM_ERR_PEEK;
   2010 				else if (hp->ah_acc.devacc_attr_access ==
   2011 				    DDI_CAUTIOUS_ACC)
   2012 					expected = DDI_FM_ERR_EXPECTED;
   2013 			}
   2014 
   2015 		} else if (curthread->t_lofault) {
   2016 			aflt->flt_prot = AFLT_PROT_COPY;
   2017 			rp->r_g1 = EFAULT;
   2018 			rp->r_pc = curthread->t_lofault;
   2019 			rp->r_npc = rp->r_pc + 4;
   2020 			trampolined = 1;
   2021 		}
   2022 	}
   2023 
   2024 	/*
   2025 	 * If we're in user mode or we're doing a protected copy, we either
   2026 	 * want the ASTON code below to send a signal to the user process
   2027 	 * or we want to panic if aft_panic is set.
   2028 	 *
   2029 	 * If we're in privileged mode and we're not doing a copy, then we
   2030 	 * need to check if we've trampolined.  If we haven't trampolined,
   2031 	 * we should panic.
   2032 	 */
   2033 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
   2034 		if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO)))
   2035 			aflt->flt_panic |= aft_panic;
   2036 	} else if (!trampolined) {
   2037 		aflt->flt_panic = 1;
   2038 	}
   2039 
   2040 	/*
   2041 	 * If we've trampolined due to a privileged TO or BERR, or if an
   2042 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
   2043 	 * event for that TO or BERR.  Queue all other events (if any) besides
   2044 	 * the TO/BERR.
   2045 	 */
   2046 	log_sfsr = t_sfsr;
   2047 	if (trampolined) {
   2048 		log_sfsr &= ~(SFSR_TO | SFSR_BERR);
   2049 	} else if (!aflt->flt_priv) {
   2050 		/*
   2051 		 * User mode, suppress messages if
   2052 		 * cpu_berr_to_verbose is not set.
   2053 		 */
   2054 		if (!cpu_berr_to_verbose)
   2055 			log_sfsr &= ~(SFSR_TO | SFSR_BERR);
   2056 	}
   2057 
   2058 	if (((log_sfsr & SFSR_ERRS) && (cpu_queue_events(&opl_flt, pr_reason,
   2059 	    t_sfsr) == 0)) || ((t_sfsr & SFSR_ERRS) == 0)) {
   2060 		opl_flt.flt_type = OPL_CPU_INV_SFSR;
   2061 		aflt->flt_payload = FM_EREPORT_PAYLOAD_SYNC;
   2062 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_SFSR, (void *)&opl_flt,
   2063 		    sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic);
   2064 	}
   2065 
   2066 	if (t_sfsr & (SFSR_UE|SFSR_TO|SFSR_BERR)) {
   2067 		cpu_run_bus_error_handlers(aflt, expected);
   2068 	}
   2069 
   2070 	/*
   2071 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
   2072 	 * be logged as part of the panic flow.
   2073 	 */
   2074 	if (aflt->flt_panic) {
   2075 		if (pr_reason[0] == 0)
   2076 			strcpy(pr_reason, "invalid SFSR ");
   2077 
   2078 		fm_panic("%sErrors(s)", pr_reason);
   2079 	}
   2080 
   2081 	/*
   2082 	 * If we queued an error and we are going to return from the trap and
   2083 	 * the error was in user mode or inside of a copy routine, set AST flag
   2084 	 * so the queue will be drained before returning to user mode.  The
   2085 	 * AST processing will also act on our failure policy.
   2086 	 */
   2087 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
   2088 		int pcb_flag = 0;
   2089 
   2090 		if (t_sfsr & (SFSR_ERRS & ~(SFSR_BERR | SFSR_TO)))
   2091 			pcb_flag |= ASYNC_HWERR;
   2092 
   2093 		if (t_sfsr & SFSR_BERR)
   2094 			pcb_flag |= ASYNC_BERR;
   2095 
   2096 		if (t_sfsr & SFSR_TO)
   2097 			pcb_flag |= ASYNC_BTO;
   2098 
   2099 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
   2100 		aston(curthread);
   2101 	}
   2102 }
   2103 
   2104 /*ARGSUSED*/
   2105 void
   2106 opl_cpu_urgent_error(struct regs *rp, ulong_t p_ugesr, ulong_t tl)
   2107 {
   2108 	opl_async_flt_t opl_flt;
   2109 	struct async_flt *aflt;
   2110 	char pr_reason[MAX_REASON_STRING];
   2111 
   2112 	/* normalize tl */
   2113 	tl = (tl >= 2 ? 1 : 0);
   2114 	pr_reason[0] = '\0';
   2115 
   2116 	bzero(&opl_flt, sizeof (opl_async_flt_t));
   2117 	aflt = (struct async_flt *)&opl_flt;
   2118 	aflt->flt_id = gethrtime_waitfree();
   2119 	aflt->flt_bus_id = getprocessorid();
   2120 	aflt->flt_inst = CPU->cpu_id;
   2121 	aflt->flt_stat = p_ugesr;
   2122 	aflt->flt_pc = (caddr_t)rp->r_pc;
   2123 	aflt->flt_class = (uchar_t)CPU_FAULT;
   2124 	aflt->flt_tl = tl;
   2125 	aflt->flt_priv = (uchar_t)(tl == 1 ? 1 : ((rp->r_tstate & TSTATE_PRIV) ?
   2126 	    1 : 0));
   2127 	aflt->flt_status = OPL_ECC_URGENT_TRAP;
   2128 	aflt->flt_panic = 1;
   2129 	/*
   2130 	 * HW does not set mod/sid in case of urgent error.
   2131 	 * So we have to set it here.
   2132 	 */
   2133 	opl_flt.flt_eid_mod = OPL_ERRID_CPU;
   2134 	opl_flt.flt_eid_sid = aflt->flt_inst;
   2135 
   2136 	if (cpu_queue_events(&opl_flt, pr_reason, p_ugesr) == 0) {
   2137 		opl_flt.flt_type = OPL_CPU_INV_UGESR;
   2138 		aflt->flt_payload = FM_EREPORT_PAYLOAD_URGENT;
   2139 		cpu_errorq_dispatch(FM_EREPORT_CPU_INV_URG, (void *)&opl_flt,
   2140 		    sizeof (opl_async_flt_t), ue_queue, aflt->flt_panic);
   2141 	}
   2142 
   2143 	fm_panic("Urgent Error");
   2144 }
   2145 
   2146 /*
   2147  * Initialization error counters resetting.
   2148  */
   2149 /* ARGSUSED */
   2150 static void
   2151 opl_ras_online(void *arg, cpu_t *cp, cyc_handler_t *hdlr, cyc_time_t *when)
   2152 {
   2153 	hdlr->cyh_func = (cyc_func_t)ras_cntr_reset;
   2154 	hdlr->cyh_level = CY_LOW_LEVEL;
   2155 	hdlr->cyh_arg = (void *)(uintptr_t)cp->cpu_id;
   2156 
   2157 	when->cyt_when = cp->cpu_id * (((hrtime_t)NANOSEC * 10)/ NCPU);
   2158 	when->cyt_interval = (hrtime_t)NANOSEC * opl_async_check_interval;
   2159 }
   2160 
   2161 void
   2162 cpu_mp_init(void)
   2163 {
   2164 	cyc_omni_handler_t hdlr;
   2165 
   2166 	hdlr.cyo_online = opl_ras_online;
   2167 	hdlr.cyo_offline = NULL;
   2168 	hdlr.cyo_arg = NULL;
   2169 	mutex_enter(&cpu_lock);
   2170 	(void) cyclic_add_omni(&hdlr);
   2171 	mutex_exit(&cpu_lock);
   2172 }
   2173 
   2174 int heaplp_use_stlb = 0;
   2175 
   2176 void
   2177 mmu_init_kernel_pgsz(struct hat *hat)
   2178 {
   2179 	uint_t tte = page_szc(segkmem_lpsize);
   2180 	uchar_t new_cext_primary, new_cext_nucleus;
   2181 
   2182 	if (heaplp_use_stlb == 0) {
   2183 		/* do not reprogram stlb */
   2184 		tte = TTE8K;
   2185 	} else if (!plat_prom_preserve_kctx_is_supported()) {
   2186 		/* OBP does not support non-zero primary context */
   2187 		tte = TTE8K;
   2188 		heaplp_use_stlb = 0;
   2189 	}
   2190 
   2191 	new_cext_nucleus = TAGACCEXT_MKSZPAIR(tte, TTE8K);
   2192 	new_cext_primary = TAGACCEXT_MKSZPAIR(TTE8K, tte);
   2193 
   2194 	hat->sfmmu_cext = new_cext_primary;
   2195 	kcontextreg = ((uint64_t)new_cext_nucleus << CTXREG_NEXT_SHIFT) |
   2196 	    ((uint64_t)new_cext_primary << CTXREG_EXT_SHIFT);
   2197 }
   2198 
   2199 size_t
   2200 mmu_get_kernel_lpsize(size_t lpsize)
   2201 {
   2202 	uint_t tte;
   2203 
   2204 	if (lpsize == 0) {
   2205 		/* no setting for segkmem_lpsize in /etc/system: use default */
   2206 		return (MMU_PAGESIZE4M);
   2207 	}
   2208 
   2209 	for (tte = TTE8K; tte <= TTE4M; tte++) {
   2210 		if (lpsize == TTEBYTES(tte))
   2211 			return (lpsize);
   2212 	}
   2213 
   2214 	return (TTEBYTES(TTE8K));
   2215 }
   2216 
   2217 /*
   2218  * Support for ta 3.
   2219  * We allocate here a buffer for each cpu
   2220  * for saving the current register window.
   2221  */
   2222 typedef struct win_regs {
   2223 	uint64_t l[8];
   2224 	uint64_t i[8];
   2225 } win_regs_t;
   2226 static void
   2227 opl_ta3(void)
   2228 {
   2229 	/*
   2230 	 * opl_ta3 should only be called once at boot time.
   2231 	 */
   2232 	if (opl_ta3_save == NULL)
   2233 		opl_ta3_save = (char *)kmem_alloc(NCPU * sizeof (win_regs_t),
   2234 		    KM_SLEEP);
   2235 }
   2236 
   2237 /*
   2238  * The following are functions that are unused in
   2239  * OPL cpu module. They are defined here to resolve
   2240  * dependencies in the "unix" module.
   2241  * Unused functions that should never be called in
   2242  * OPL are coded with ASSERT(0).
   2243  */
   2244 
   2245 void
   2246 cpu_disable_errors(void)
   2247 {}
   2248 
   2249 void
   2250 cpu_enable_errors(void)
   2251 { ASSERT(0); }
   2252 
   2253 /*ARGSUSED*/
   2254 void
   2255 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t t)
   2256 { ASSERT(0); }
   2257 
   2258 /*ARGSUSED*/
   2259 void
   2260 cpu_faulted_enter(struct cpu *cp)
   2261 {}
   2262 
   2263 /*ARGSUSED*/
   2264 void
   2265 cpu_faulted_exit(struct cpu *cp)
   2266 {}
   2267 
   2268 /*ARGSUSED*/
   2269 void
   2270 cpu_check_allcpus(struct async_flt *aflt)
   2271 {}
   2272 
   2273 /*ARGSUSED*/
   2274 void
   2275 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *t)
   2276 { ASSERT(0); }
   2277 
   2278 /*ARGSUSED*/
   2279 void
   2280 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
   2281 { ASSERT(0); }
   2282 
   2283 /*ARGSUSED*/
   2284 void
   2285 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
   2286 { ASSERT(0); }
   2287 
   2288 /*ARGSUSED*/
   2289 void
   2290 cpu_busy_ecache_scrub(struct cpu *cp)
   2291 {}
   2292 
   2293 /*ARGSUSED*/
   2294 void
   2295 cpu_idle_ecache_scrub(struct cpu *cp)
   2296 {}
   2297 
   2298 /* ARGSUSED */
   2299 void
   2300 cpu_change_speed(uint64_t divisor, uint64_t arg2)
   2301 { ASSERT(0); }
   2302 
   2303 void
   2304 cpu_init_cache_scrub(void)
   2305 {}
   2306 
   2307 /* ARGSUSED */
   2308 int
   2309 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
   2310 {
   2311 	if (&plat_get_mem_sid) {
   2312 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
   2313 	} else {
   2314 		return (ENOTSUP);
   2315 	}
   2316 }
   2317 
   2318 /* ARGSUSED */
   2319 int
   2320 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
   2321 {
   2322 	if (&plat_get_mem_addr) {
   2323 		return (plat_get_mem_addr(unum, sid, offset, addrp));
   2324 	} else {
   2325 		return (ENOTSUP);
   2326 	}
   2327 }
   2328 
   2329 /* ARGSUSED */
   2330 int
   2331 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
   2332 {
   2333 	if (&plat_get_mem_offset) {
   2334 		return (plat_get_mem_offset(flt_addr, offp));
   2335 	} else {
   2336 		return (ENOTSUP);
   2337 	}
   2338 }
   2339 
   2340 /*ARGSUSED*/
   2341 void
   2342 itlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
   2343 { ASSERT(0); }
   2344 
   2345 /*ARGSUSED*/
   2346 void
   2347 dtlb_rd_entry(uint_t entry, tte_t *tte, uint64_t *va_tag)
   2348 { ASSERT(0); }
   2349 
   2350 /*ARGSUSED*/
   2351 void
   2352 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
   2353 { ASSERT(0); }
   2354 
   2355 /*ARGSUSED*/
   2356 int
   2357 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
   2358     errorq_elem_t *eqep, size_t afltoffset)
   2359 {
   2360 	ASSERT(0);
   2361 	return (0);
   2362 }
   2363 
   2364 /*ARGSUSED*/
   2365 char *
   2366 flt_to_error_type(struct async_flt *aflt)
   2367 {
   2368 	ASSERT(0);
   2369 	return (NULL);
   2370 }
   2371 
   2372 #define	PROM_SPARC64VII_MODE_PROPNAME	"SPARC64-VII-mode"
   2373 
   2374 /*
   2375  * Check for existence of OPL OBP property that indicates
   2376  * SPARC64-VII support. By default, only enable Jupiter
   2377  * features if the property is present.   It will be
   2378  * present in all-Jupiter domains by OBP if the domain has
   2379  * been selected by the user on the system controller to
   2380  * run in Jupiter mode.  Basically, this OBP property must
   2381  * be present to turn on the cpu_alljupiter flag.
   2382  */
   2383 static int
   2384 prom_SPARC64VII_support_enabled(void)
   2385 {
   2386 	int val;
   2387 
   2388 	return ((prom_getprop(prom_rootnode(), PROM_SPARC64VII_MODE_PROPNAME,
   2389 	    (caddr_t)&val) == 0) ? 1 : 0);
   2390 }
   2391 
   2392 #define	PROM_KCTX_PRESERVED_PROPNAME	"context0-page-size-preserved"
   2393 
   2394 /*
   2395  * Check for existence of OPL OBP property that indicates support for
   2396  * preserving Solaris kernel page sizes when entering OBP.  We need to
   2397  * check the prom tree since the ddi tree is not yet built when the
   2398  * platform startup sequence is called.
   2399  */
   2400 static int
   2401 plat_prom_preserve_kctx_is_supported(void)
   2402 {
   2403 	pnode_t		pnode;
   2404 	int		val;
   2405 
   2406 	/*
   2407 	 * Check for existence of context0-page-size-preserved property
   2408 	 * in virtual-memory prom node.
   2409 	 */
   2410 	pnode = (pnode_t)prom_getphandle(prom_mmu_ihandle());
   2411 	return ((prom_getprop(pnode, PROM_KCTX_PRESERVED_PROPNAME,
   2412 	    (caddr_t)&val) == 0) ? 1 : 0);
   2413 }
   2414