Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/machsystm.h>
     27 #include <sys/archsystm.h>
     28 #include <sys/vm.h>
     29 #include <sys/cpu.h>
     30 #include <sys/cpupart.h>
     31 #include <sys/cmt.h>
     32 #include <sys/bitset.h>
     33 #include <sys/reboot.h>
     34 #include <sys/kdi.h>
     35 #include <sys/bootconf.h>
     36 #include <sys/memlist_plat.h>
     37 #include <sys/memlist_impl.h>
     38 #include <sys/prom_plat.h>
     39 #include <sys/prom_isa.h>
     40 #include <sys/autoconf.h>
     41 #include <sys/intreg.h>
     42 #include <sys/ivintr.h>
     43 #include <sys/fpu/fpusystm.h>
     44 #include <sys/iommutsb.h>
     45 #include <vm/vm_dep.h>
     46 #include <vm/seg_kmem.h>
     47 #include <vm/seg_kpm.h>
     48 #include <vm/seg_map.h>
     49 #include <vm/seg_kp.h>
     50 #include <sys/sysconf.h>
     51 #include <vm/hat_sfmmu.h>
     52 #include <sys/kobj.h>
     53 #include <sys/sun4asi.h>
     54 #include <sys/clconf.h>
     55 #include <sys/platform_module.h>
     56 #include <sys/panic.h>
     57 #include <sys/cpu_sgnblk_defs.h>
     58 #include <sys/clock.h>
     59 #include <sys/fpras_impl.h>
     60 #include <sys/prom_debug.h>
     61 #include <sys/traptrace.h>
     62 #include <sys/memnode.h>
     63 #include <sys/mem_cage.h>
     64 
     65 /*
     66  * fpRAS implementation structures.
     67  */
     68 struct fpras_chkfn *fpras_chkfnaddrs[FPRAS_NCOPYOPS];
     69 struct fpras_chkfngrp *fpras_chkfngrps;
     70 struct fpras_chkfngrp *fpras_chkfngrps_base;
     71 int fpras_frequency = -1;
     72 int64_t fpras_interval = -1;
     73 
     74 /*
     75  * Increase unix symbol table size as a work around for 6828121
     76  */
     77 int alloc_mem_bermuda_triangle;
     78 
     79 /*
     80  * Halt idling cpus optimization
     81  *
     82  * This optimation is only enabled in platforms that have
     83  * the CPU halt support. The cpu_halt_cpu() support is provided
     84  * in the cpu module and it is referenced here with a pragma weak.
     85  * The presence of this routine automatically enable the halt idling
     86  * cpus functionality if the global switch enable_halt_idle_cpus
     87  * is set (default is set).
     88  *
     89  */
     90 #pragma weak	cpu_halt_cpu
     91 extern void	cpu_halt_cpu();
     92 
     93 /*
     94  * Defines for the idle_state_transition DTrace probe
     95  *
     96  * The probe fires when the CPU undergoes an idle state change (e.g. halting)
     97  * The agument passed is the state to which the CPU is transitioning.
     98  *
     99  * The states are defined here.
    100  */
    101 #define	IDLE_STATE_NORMAL 0
    102 #define	IDLE_STATE_HALTED 1
    103 
    104 int		enable_halt_idle_cpus = 1; /* global switch */
    105 
    106 void
    107 setup_trap_table(void)
    108 {
    109 	intr_init(CPU);			/* init interrupt request free list */
    110 	setwstate(WSTATE_KERN);
    111 	prom_set_traptable(&trap_table);
    112 }
    113 
    114 void
    115 mach_fpras()
    116 {
    117 	if (fpras_implemented && !fpras_disable) {
    118 		int i;
    119 		struct fpras_chkfngrp *fcgp;
    120 		size_t chkfngrpsallocsz;
    121 
    122 		/*
    123 		 * Note that we size off of NCPU and setup for
    124 		 * all those possibilities regardless of whether
    125 		 * the cpu id is present or not.  We do this so that
    126 		 * we don't have any construction or destruction
    127 		 * activity to perform at DR time, and it's not
    128 		 * costly in memory.  We require block alignment.
    129 		 */
    130 		chkfngrpsallocsz = NCPU * sizeof (struct fpras_chkfngrp);
    131 		fpras_chkfngrps_base = kmem_alloc(chkfngrpsallocsz, KM_SLEEP);
    132 		if (IS_P2ALIGNED((uintptr_t)fpras_chkfngrps_base, 64)) {
    133 			fpras_chkfngrps = fpras_chkfngrps_base;
    134 		} else {
    135 			kmem_free(fpras_chkfngrps_base, chkfngrpsallocsz);
    136 			chkfngrpsallocsz += 64;
    137 			fpras_chkfngrps_base = kmem_alloc(chkfngrpsallocsz,
    138 			    KM_SLEEP);
    139 			fpras_chkfngrps = (struct fpras_chkfngrp *)
    140 			    P2ROUNDUP((uintptr_t)fpras_chkfngrps_base, 64);
    141 		}
    142 
    143 		/*
    144 		 * Copy our check function into place for each copy operation
    145 		 * and each cpu id.
    146 		 */
    147 		fcgp = &fpras_chkfngrps[0];
    148 		for (i = 0; i < FPRAS_NCOPYOPS; ++i)
    149 			bcopy((void *)fpras_chkfn_type1, &fcgp->fpras_fn[i],
    150 			    sizeof (struct fpras_chkfn));
    151 		for (i = 1; i < NCPU; ++i)
    152 			*(&fpras_chkfngrps[i]) = *fcgp;
    153 
    154 		/*
    155 		 * At definition fpras_frequency is set to -1, and it will
    156 		 * still have that value unless changed in /etc/system (not
    157 		 * strictly supported, but not preventable).  The following
    158 		 * both sets the default and sanity checks anything from
    159 		 * /etc/system.
    160 		 */
    161 		if (fpras_frequency < 0)
    162 			fpras_frequency = FPRAS_DEFAULT_FREQUENCY;
    163 
    164 		/*
    165 		 * Now calculate fpras_interval.  When fpras_interval
    166 		 * becomes non-negative fpras checks will commence
    167 		 * (copies before this point in boot will bypass fpras).
    168 		 * Our stores of instructions must be visible; no need
    169 		 * to flush as they're never been executed before.
    170 		 */
    171 		membar_producer();
    172 		fpras_interval = (fpras_frequency == 0) ?
    173 		    0 : sys_tick_freq / fpras_frequency;
    174 	}
    175 }
    176 
    177 void
    178 mach_hw_copy_limit(void)
    179 {
    180 	if (!fpu_exists) {
    181 		use_hw_bcopy = 0;
    182 		hw_copy_limit_1 = 0;
    183 		hw_copy_limit_2 = 0;
    184 		hw_copy_limit_4 = 0;
    185 		hw_copy_limit_8 = 0;
    186 		use_hw_bzero = 0;
    187 	}
    188 }
    189 
    190 void
    191 load_tod_module()
    192 {
    193 	/*
    194 	 * Load tod driver module for the tod part found on this system.
    195 	 * Recompute the cpu frequency/delays based on tod as tod part
    196 	 * tends to keep time more accurately.
    197 	 */
    198 	if (tod_module_name == NULL || modload("tod", tod_module_name) == -1)
    199 		halt("Can't load tod module");
    200 }
    201 
    202 void
    203 mach_memscrub(void)
    204 {
    205 	/*
    206 	 * Startup memory scrubber, if not running fpu emulation code.
    207 	 */
    208 
    209 #ifndef _HW_MEMSCRUB_SUPPORT
    210 	if (fpu_exists) {
    211 		if (memscrub_init()) {
    212 			cmn_err(CE_WARN,
    213 			    "Memory scrubber failed to initialize");
    214 		}
    215 	}
    216 #endif /* _HW_MEMSCRUB_SUPPORT */
    217 }
    218 
    219 /*
    220  * Halt the present CPU until awoken via an interrupt.
    221  * This routine should only be invoked if cpu_halt_cpu()
    222  * exists and is supported, see mach_cpu_halt_idle()
    223  */
    224 void
    225 cpu_halt(void)
    226 {
    227 	cpu_t *cpup = CPU;
    228 	processorid_t cpu_sid = cpup->cpu_seqid;
    229 	cpupart_t *cp = cpup->cpu_part;
    230 	int hset_update = 1;
    231 	volatile int *p = &cpup->cpu_disp->disp_nrunnable;
    232 	uint_t s;
    233 
    234 	/*
    235 	 * If this CPU is online then we should notate our halting
    236 	 * by adding ourselves to the partition's halted CPU
    237 	 * bitset. This allows other CPUs to find/awaken us when
    238 	 * work becomes available.
    239 	 */
    240 	if (CPU->cpu_flags & CPU_OFFLINE)
    241 		hset_update = 0;
    242 
    243 	/*
    244 	 * Add ourselves to the partition's halted CPUs bitset
    245 	 * and set our HALTED flag, if necessary.
    246 	 *
    247 	 * When a thread becomes runnable, it is placed on the queue
    248 	 * and then the halted cpu bitset is checked to determine who
    249 	 * (if anyone) should be awoken. We therefore need to first
    250 	 * add ourselves to the halted bitset, and then check if there
    251 	 * is any work available.  The order is important to prevent a race
    252 	 * that can lead to work languishing on a run queue somewhere while
    253 	 * this CPU remains halted.
    254 	 *
    255 	 * Either the producing CPU will see we're halted and will awaken us,
    256 	 * or this CPU will see the work available in disp_anywork()
    257 	 */
    258 	if (hset_update) {
    259 		cpup->cpu_disp_flags |= CPU_DISP_HALTED;
    260 		membar_producer();
    261 		bitset_atomic_add(&cp->cp_haltset, cpu_sid);
    262 	}
    263 
    264 	/*
    265 	 * Check to make sure there's really nothing to do.
    266 	 * Work destined for this CPU may become available after
    267 	 * this check. We'll be notified through the clearing of our
    268 	 * bit in the halted CPU bitset, and a poke.
    269 	 */
    270 	if (disp_anywork()) {
    271 		if (hset_update) {
    272 			cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
    273 			bitset_atomic_del(&cp->cp_haltset, cpu_sid);
    274 		}
    275 		return;
    276 	}
    277 
    278 	/*
    279 	 * We're on our way to being halted.  Wait until something becomes
    280 	 * runnable locally or we are awaken (i.e. removed from the halt set).
    281 	 * Note that the call to hv_cpu_yield() can return even if we have
    282 	 * nothing to do.
    283 	 *
    284 	 * Disable interrupts now, so that we'll awaken immediately
    285 	 * after halting if someone tries to poke us between now and
    286 	 * the time we actually halt.
    287 	 *
    288 	 * We check for the presence of our bit after disabling interrupts.
    289 	 * If it's cleared, we'll return. If the bit is cleared after
    290 	 * we check then the poke will pop us out of the halted state.
    291 	 * Also, if the offlined CPU has been brought back on-line, then
    292 	 * we return as well.
    293 	 *
    294 	 * The ordering of the poke and the clearing of the bit by cpu_wakeup
    295 	 * is important.
    296 	 * cpu_wakeup() must clear, then poke.
    297 	 * cpu_halt() must disable interrupts, then check for the bit.
    298 	 *
    299 	 * The check for anything locally runnable is here for performance
    300 	 * and isn't needed for correctness. disp_nrunnable ought to be
    301 	 * in our cache still, so it's inexpensive to check, and if there
    302 	 * is anything runnable we won't have to wait for the poke.
    303 	 *
    304 	 * Any interrupt will awaken the cpu from halt. Looping here
    305 	 * will filter spurious interrupts that wake us up, but don't
    306 	 * represent a need for us to head back out to idle().  This
    307 	 * will enable the idle loop to be more efficient and sleep in
    308 	 * the processor pipeline for a larger percent of the time,
    309 	 * which returns useful cycles to the peer hardware strand
    310 	 * that shares the pipeline.
    311 	 */
    312 	s = disable_vec_intr();
    313 	while (*p == 0 &&
    314 	    ((hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid)) ||
    315 	    (!hset_update && (CPU->cpu_flags & CPU_OFFLINE)))) {
    316 
    317 		DTRACE_PROBE1(idle__state__transition,
    318 		    uint_t, IDLE_STATE_HALTED);
    319 		(void) cpu_halt_cpu();
    320 		DTRACE_PROBE1(idle__state__transition,
    321 		    uint_t, IDLE_STATE_NORMAL);
    322 
    323 		enable_vec_intr(s);
    324 		s = disable_vec_intr();
    325 	}
    326 
    327 	/*
    328 	 * We're no longer halted
    329 	 */
    330 	enable_vec_intr(s);
    331 	if (hset_update) {
    332 		cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
    333 		bitset_atomic_del(&cp->cp_haltset, cpu_sid);
    334 	}
    335 }
    336 
    337 /*
    338  * If "cpu" is halted, then wake it up clearing its halted bit in advance.
    339  * Otherwise, see if other CPUs in the cpu partition are halted and need to
    340  * be woken up so that they can steal the thread we placed on this CPU.
    341  * This function is only used on MP systems.
    342  * This function should only be invoked if cpu_halt_cpu()
    343  * exists and is supported, see mach_cpu_halt_idle()
    344  */
    345 static void
    346 cpu_wakeup(cpu_t *cpu, int bound)
    347 {
    348 	uint_t		cpu_found;
    349 	processorid_t	cpu_sid;
    350 	cpupart_t	*cp;
    351 
    352 	cp = cpu->cpu_part;
    353 	cpu_sid = cpu->cpu_seqid;
    354 	if (bitset_in_set(&cp->cp_haltset, cpu_sid)) {
    355 		/*
    356 		 * Clear the halted bit for that CPU since it will be
    357 		 * poked in a moment.
    358 		 */
    359 		bitset_atomic_del(&cp->cp_haltset, cpu_sid);
    360 		/*
    361 		 * We may find the current CPU present in the halted cpu bitset
    362 		 * if we're in the context of an interrupt that occurred
    363 		 * before we had a chance to clear our bit in cpu_halt().
    364 		 * Poking ourself is obviously unnecessary, since if
    365 		 * we're here, we're not halted.
    366 		 */
    367 		if (cpu != CPU)
    368 			poke_cpu(cpu->cpu_id);
    369 		return;
    370 	} else {
    371 		/*
    372 		 * This cpu isn't halted, but it's idle or undergoing a
    373 		 * context switch. No need to awaken anyone else.
    374 		 */
    375 		if (cpu->cpu_thread == cpu->cpu_idle_thread ||
    376 		    cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL)
    377 			return;
    378 	}
    379 
    380 	/*
    381 	 * No need to wake up other CPUs if this is for a bound thread.
    382 	 */
    383 	if (bound)
    384 		return;
    385 
    386 	/*
    387 	 * The CPU specified for wakeup isn't currently halted, so check
    388 	 * to see if there are any other halted CPUs in the partition,
    389 	 * and if there are then awaken one.
    390 	 *
    391 	 * If possible, try to select a CPU close to the target, since this
    392 	 * will likely trigger a migration.
    393 	 */
    394 	do {
    395 		cpu_found = bitset_find(&cp->cp_haltset);
    396 		if (cpu_found == (uint_t)-1)
    397 			return;
    398 	} while (bitset_atomic_test_and_del(&cp->cp_haltset, cpu_found) < 0);
    399 
    400 	if (cpu_found != CPU->cpu_seqid)
    401 		poke_cpu(cpu_seq[cpu_found]->cpu_id);
    402 }
    403 
    404 void
    405 mach_cpu_halt_idle(void)
    406 {
    407 	if (enable_halt_idle_cpus) {
    408 		if (&cpu_halt_cpu) {
    409 			idle_cpu = cpu_halt;
    410 			disp_enq_thread = cpu_wakeup;
    411 		}
    412 	}
    413 }
    414 
    415 /*ARGSUSED*/
    416 int
    417 cpu_intrq_setup(struct cpu *cp)
    418 {
    419 	/* Interrupt mondo queues not applicable to sun4u */
    420 	return (0);
    421 }
    422 
    423 /*ARGSUSED*/
    424 void
    425 cpu_intrq_cleanup(struct cpu *cp)
    426 {
    427 	/* Interrupt mondo queues not applicable to sun4u */
    428 }
    429 
    430 /*ARGSUSED*/
    431 void
    432 cpu_intrq_register(struct cpu *cp)
    433 {
    434 	/* Interrupt/error queues not applicable to sun4u */
    435 }
    436 
    437 /*ARGSUSED*/
    438 void
    439 mach_htraptrace_setup(int cpuid)
    440 {
    441 	/* Setup hypervisor traptrace buffer, not applicable to sun4u */
    442 }
    443 
    444 /*ARGSUSED*/
    445 void
    446 mach_htraptrace_configure(int cpuid)
    447 {
    448 	/* enable/ disable hypervisor traptracing, not applicable to sun4u */
    449 }
    450 
    451 /*ARGSUSED*/
    452 void
    453 mach_htraptrace_cleanup(int cpuid)
    454 {
    455 	/* cleanup hypervisor traptrace buffer, not applicable to sun4u */
    456 }
    457 
    458 void
    459 mach_descrip_startup_init(void)
    460 {
    461 	/*
    462 	 * Only for sun4v.
    463 	 * Initialize Machine description framework during startup.
    464 	 */
    465 }
    466 void
    467 mach_descrip_startup_fini(void)
    468 {
    469 	/*
    470 	 * Only for sun4v.
    471 	 * Clean up Machine Description framework during startup.
    472 	 */
    473 }
    474 
    475 void
    476 mach_descrip_init(void)
    477 {
    478 	/*
    479 	 * Only for sun4v.
    480 	 * Initialize Machine description framework.
    481 	 */
    482 }
    483 
    484 void
    485 hsvc_setup(void)
    486 {
    487 	/* Setup hypervisor services, not applicable to sun4u */
    488 }
    489 
    490 void
    491 load_mach_drivers(void)
    492 {
    493 	/* Currently no machine class (sun4u) specific drivers to load */
    494 }
    495 
    496 /*
    497  * Return true if the machine we're running on is a Positron.
    498  * (Positron is an unsupported developers platform.)
    499  */
    500 int
    501 iam_positron(void)
    502 {
    503 	char model[32];
    504 	const char proto_model[] = "SUNW,501-2732";
    505 	pnode_t root = prom_rootnode();
    506 
    507 	if (prom_getproplen(root, "model") != sizeof (proto_model))
    508 		return (0);
    509 
    510 	(void) prom_getprop(root, "model", model);
    511 	if (strcmp(model, proto_model) == 0)
    512 		return (1);
    513 	return (0);
    514 }
    515 
    516 /*
    517  * Find a physically contiguous area of twice the largest ecache size
    518  * to be used while doing displacement flush of ecaches.
    519  */
    520 uint64_t
    521 ecache_flush_address(void)
    522 {
    523 	struct memlist *pmem;
    524 	uint64_t flush_size;
    525 	uint64_t ret_val;
    526 
    527 	flush_size = ecache_size * 2;
    528 	for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
    529 		ret_val = P2ROUNDUP(pmem->ml_address, ecache_size);
    530 		if (ret_val + flush_size <= pmem->ml_address + pmem->ml_size)
    531 			return (ret_val);
    532 	}
    533 	return ((uint64_t)-1);
    534 }
    535 
    536 /*
    537  * Called with the memlist lock held to say that phys_install has
    538  * changed.
    539  */
    540 void
    541 phys_install_has_changed(void)
    542 {
    543 	/*
    544 	 * Get the new address into a temporary just in case panicking
    545 	 * involves use of ecache_flushaddr.
    546 	 */
    547 	uint64_t new_addr;
    548 
    549 	new_addr = ecache_flush_address();
    550 	if (new_addr == (uint64_t)-1) {
    551 		cmn_err(CE_PANIC,
    552 		    "ecache_flush_address(): failed, ecache_size=%x",
    553 		    ecache_size);
    554 		/*NOTREACHED*/
    555 	}
    556 	ecache_flushaddr = new_addr;
    557 	membar_producer();
    558 }
    559