Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/machsystm.h>
     28 #include <sys/archsystm.h>
     29 #include <sys/prom_plat.h>
     30 #include <sys/promif.h>
     31 #include <sys/vm.h>
     32 #include <sys/cpu.h>
     33 #include <sys/bitset.h>
     34 #include <sys/cpupart.h>
     35 #include <sys/disp.h>
     36 #include <sys/hypervisor_api.h>
     37 #include <sys/traptrace.h>
     38 #include <sys/modctl.h>
     39 #include <sys/ldoms.h>
     40 #include <sys/cpu_module.h>
     41 #include <sys/mutex_impl.h>
     42 #include <sys/rwlock.h>
     43 #include <sys/sdt.h>
     44 #include <sys/cmt.h>
     45 #include <vm/vm_dep.h>
     46 
     47 #ifdef TRAPTRACE
     48 int mach_htraptrace_enable = 1;
     49 #else
     50 int mach_htraptrace_enable = 0;
     51 #endif
     52 int htrap_tr0_inuse = 0;
     53 extern char htrap_tr0[];	/* prealloc buf for boot cpu */
     54 
     55 caddr_t	mmu_fault_status_area;
     56 
     57 extern void sfmmu_set_tsbs(void);
     58 /*
     59  * CPU IDLE optimization variables/routines
     60  */
     61 static int enable_halt_idle_cpus = 1;
     62 
     63 /*
     64  * Defines for the idle_state_transition DTrace probe
     65  *
     66  * The probe fires when the CPU undergoes an idle state change (e.g. hv yield)
     67  * The agument passed is the state to which the CPU is transitioning.
     68  *
     69  * The states are defined here.
     70  */
     71 #define	IDLE_STATE_NORMAL 0
     72 #define	IDLE_STATE_YIELDED 1
     73 
     74 #define	SUN4V_CLOCK_TICK_THRESHOLD	64
     75 #define	SUN4V_CLOCK_TICK_NCPUS		64
     76 
     77 extern int	clock_tick_threshold;
     78 extern int	clock_tick_ncpus;
     79 
     80 void
     81 setup_trap_table(void)
     82 {
     83 	caddr_t mmfsa_va;
     84 	extern	 caddr_t mmu_fault_status_area;
     85 	mmfsa_va =
     86 	    mmu_fault_status_area + (MMFSA_SIZE * CPU->cpu_id);
     87 
     88 	intr_init(CPU);		/* init interrupt request free list */
     89 	setwstate(WSTATE_KERN);
     90 	set_mmfsa_scratchpad(mmfsa_va);
     91 	prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va));
     92 	sfmmu_set_tsbs();
     93 }
     94 
     95 void
     96 phys_install_has_changed(void)
     97 {
     98 
     99 }
    100 
    101 /*
    102  * Halt the present CPU until awoken via an interrupt
    103  */
    104 static void
    105 cpu_halt(void)
    106 {
    107 	cpu_t *cpup = CPU;
    108 	processorid_t cpu_sid = cpup->cpu_seqid;
    109 	cpupart_t *cp = cpup->cpu_part;
    110 	int hset_update = 1;
    111 	volatile int *p = &cpup->cpu_disp->disp_nrunnable;
    112 	uint_t s;
    113 
    114 	/*
    115 	 * If this CPU is online then we should notate our halting
    116 	 * by adding ourselves to the partition's halted CPU
    117 	 * bitset. This allows other CPUs to find/awaken us when
    118 	 * work becomes available.
    119 	 */
    120 	if (CPU->cpu_flags & CPU_OFFLINE)
    121 		hset_update = 0;
    122 
    123 	/*
    124 	 * Add ourselves to the partition's halted CPUs bitset
    125 	 * and set our HALTED flag, if necessary.
    126 	 *
    127 	 * When a thread becomes runnable, it is placed on the queue
    128 	 * and then the halted cpu bitset is checked to determine who
    129 	 * (if anyone) should be awoken. We therefore need to first
    130 	 * add ourselves to the halted bitset, and then check if there
    131 	 * is any work available.  The order is important to prevent a race
    132 	 * that can lead to work languishing on a run queue somewhere while
    133 	 * this CPU remains halted.
    134 	 *
    135 	 * Either the producing CPU will see we're halted and will awaken us,
    136 	 * or this CPU will see the work available in disp_anywork()
    137 	 */
    138 	if (hset_update) {
    139 		cpup->cpu_disp_flags |= CPU_DISP_HALTED;
    140 		membar_producer();
    141 		bitset_atomic_add(&cp->cp_haltset, cpu_sid);
    142 	}
    143 
    144 	/*
    145 	 * Check to make sure there's really nothing to do.
    146 	 * Work destined for this CPU may become available after
    147 	 * this check. We'll be notified through the clearing of our
    148 	 * bit in the halted CPU bitset, and a poke.
    149 	 */
    150 	if (disp_anywork()) {
    151 		if (hset_update) {
    152 			cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
    153 			bitset_atomic_del(&cp->cp_haltset, cpu_sid);
    154 		}
    155 		return;
    156 	}
    157 
    158 	/*
    159 	 * We're on our way to being halted.  Wait until something becomes
    160 	 * runnable locally or we are awaken (i.e. removed from the halt set).
    161 	 * Note that the call to hv_cpu_yield() can return even if we have
    162 	 * nothing to do.
    163 	 *
    164 	 * Disable interrupts now, so that we'll awaken immediately
    165 	 * after halting if someone tries to poke us between now and
    166 	 * the time we actually halt.
    167 	 *
    168 	 * We check for the presence of our bit after disabling interrupts.
    169 	 * If it's cleared, we'll return. If the bit is cleared after
    170 	 * we check then the poke will pop us out of the halted state.
    171 	 * Also, if the offlined CPU has been brought back on-line, then
    172 	 * we return as well.
    173 	 *
    174 	 * The ordering of the poke and the clearing of the bit by cpu_wakeup
    175 	 * is important.
    176 	 * cpu_wakeup() must clear, then poke.
    177 	 * cpu_halt() must disable interrupts, then check for the bit.
    178 	 *
    179 	 * The check for anything locally runnable is here for performance
    180 	 * and isn't needed for correctness. disp_nrunnable ought to be
    181 	 * in our cache still, so it's inexpensive to check, and if there
    182 	 * is anything runnable we won't have to wait for the poke.
    183 	 *
    184 	 * Any interrupt will awaken the cpu from halt. Looping here
    185 	 * will filter spurious interrupts that wake us up, but don't
    186 	 * represent a need for us to head back out to idle().  This
    187 	 * will enable the idle loop to be more efficient and sleep in
    188 	 * the processor pipeline for a larger percent of the time,
    189 	 * which returns useful cycles to the peer hardware strand
    190 	 * that shares the pipeline.
    191 	 */
    192 	s = disable_vec_intr();
    193 	while (*p == 0 &&
    194 	    ((hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid)) ||
    195 	    (!hset_update && (CPU->cpu_flags & CPU_OFFLINE)))) {
    196 
    197 		DTRACE_PROBE1(idle__state__transition,
    198 		    uint_t, IDLE_STATE_YIELDED);
    199 		(void) hv_cpu_yield();
    200 		DTRACE_PROBE1(idle__state__transition,
    201 		    uint_t, IDLE_STATE_NORMAL);
    202 
    203 		enable_vec_intr(s);
    204 		s = disable_vec_intr();
    205 	}
    206 
    207 	/*
    208 	 * We're no longer halted
    209 	 */
    210 	enable_vec_intr(s);
    211 	if (hset_update) {
    212 		cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
    213 		bitset_atomic_del(&cp->cp_haltset, cpu_sid);
    214 	}
    215 }
    216 
    217 /*
    218  * If "cpu" is halted, then wake it up clearing its halted bit in advance.
    219  * Otherwise, see if other CPUs in the cpu partition are halted and need to
    220  * be woken up so that they can steal the thread we placed on this CPU.
    221  * This function is only used on MP systems.
    222  */
    223 static void
    224 cpu_wakeup(cpu_t *cpu, int bound)
    225 {
    226 	uint_t		cpu_found;
    227 	processorid_t	cpu_sid;
    228 	cpupart_t	*cp;
    229 
    230 	cp = cpu->cpu_part;
    231 	cpu_sid = cpu->cpu_seqid;
    232 	if (bitset_in_set(&cp->cp_haltset, cpu_sid)) {
    233 		/*
    234 		 * Clear the halted bit for that CPU since it will be
    235 		 * poked in a moment.
    236 		 */
    237 		bitset_atomic_del(&cp->cp_haltset, cpu_sid);
    238 		/*
    239 		 * We may find the current CPU present in the halted cpu bitset
    240 		 * if we're in the context of an interrupt that occurred
    241 		 * before we had a chance to clear our bit in cpu_halt().
    242 		 * Poking ourself is obviously unnecessary, since if
    243 		 * we're here, we're not halted.
    244 		 */
    245 		if (cpu != CPU)
    246 			poke_cpu(cpu->cpu_id);
    247 		return;
    248 	} else {
    249 		/*
    250 		 * This cpu isn't halted, but it's idle or undergoing a
    251 		 * context switch. No need to awaken anyone else.
    252 		 */
    253 		if (cpu->cpu_thread == cpu->cpu_idle_thread ||
    254 		    cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL)
    255 			return;
    256 	}
    257 
    258 	/*
    259 	 * No need to wake up other CPUs if this is for a bound thread.
    260 	 */
    261 	if (bound)
    262 		return;
    263 
    264 	/*
    265 	 * The CPU specified for wakeup isn't currently halted, so check
    266 	 * to see if there are any other halted CPUs in the partition,
    267 	 * and if there are then awaken one.
    268 	 */
    269 	do {
    270 		cpu_found = bitset_find(&cp->cp_haltset);
    271 		if (cpu_found == (uint_t)-1)
    272 			return;
    273 	} while (bitset_atomic_test_and_del(&cp->cp_haltset, cpu_found) < 0);
    274 
    275 	if (cpu_found != CPU->cpu_seqid)
    276 		poke_cpu(cpu_seq[cpu_found]->cpu_id);
    277 }
    278 
    279 void
    280 mach_cpu_halt_idle(void)
    281 {
    282 	if (enable_halt_idle_cpus) {
    283 		idle_cpu = cpu_halt;
    284 		disp_enq_thread = cpu_wakeup;
    285 	}
    286 }
    287 
    288 int
    289 ndata_alloc_mmfsa(struct memlist *ndata)
    290 {
    291 	size_t	size;
    292 
    293 	size = MMFSA_SIZE * max_ncpus;
    294 	mmu_fault_status_area = ndata_alloc(ndata, size, ecache_alignsize);
    295 	if (mmu_fault_status_area == NULL)
    296 		return (-1);
    297 	return (0);
    298 }
    299 
    300 void
    301 mach_memscrub(void)
    302 {
    303 	/* no memscrub support for sun4v for now */
    304 }
    305 
    306 void
    307 mach_fpras()
    308 {
    309 	/* no fpras support for sun4v for now */
    310 }
    311 
    312 void
    313 mach_hw_copy_limit(void)
    314 {
    315 	/* HW copy limits set by individual CPU module */
    316 }
    317 
    318 /*
    319  * We need to enable soft ring functionality on Niagara platforms since
    320  * one strand can't handle interrupts for a 1Gb NIC. So set the tunable
    321  * mac_soft_ring_enable by default on this platform.
    322  * mac_soft_ring_enable variable is defined in space.c and used by MAC
    323  * module. This tunable in concert with mac_soft_ring_count (declared
    324  * in mac.h) will configure the number of fanout soft rings for a link.
    325  */
    326 extern boolean_t mac_soft_ring_enable;
    327 void
    328 startup_platform(void)
    329 {
    330 	mac_soft_ring_enable = B_TRUE;
    331 	if (clock_tick_threshold == 0)
    332 		clock_tick_threshold = SUN4V_CLOCK_TICK_THRESHOLD;
    333 	if (clock_tick_ncpus == 0)
    334 		clock_tick_ncpus = SUN4V_CLOCK_TICK_NCPUS;
    335 	/* set per-platform constants for mutex_backoff */
    336 	mutex_backoff_base = 1;
    337 	mutex_cap_factor = 4;
    338 	if (l2_cache_node_count() > 1) {
    339 		/* VF for example */
    340 		mutex_backoff_base = 2;
    341 		mutex_cap_factor = 64;
    342 	}
    343 	rw_lock_backoff = default_lock_backoff;
    344 	rw_lock_delay = default_lock_delay;
    345 }
    346 
    347 /*
    348  * This function sets up hypervisor traptrace buffer
    349  * This routine is called by the boot cpu only
    350  */
    351 void
    352 mach_htraptrace_setup(int cpuid)
    353 {
    354 	TRAP_TRACE_CTL	*ctlp;
    355 	int bootcpuid = getprocessorid(); /* invoked on boot cpu only */
    356 
    357 	if (mach_htraptrace_enable && ((cpuid != bootcpuid) ||
    358 	    !htrap_tr0_inuse)) {
    359 		ctlp = &trap_trace_ctl[cpuid];
    360 		ctlp->d.hvaddr_base = (cpuid == bootcpuid) ? htrap_tr0 :
    361 		    contig_mem_alloc_align(HTRAP_TSIZE, HTRAP_TSIZE);
    362 		if (ctlp->d.hvaddr_base == NULL) {
    363 			ctlp->d.hlimit = 0;
    364 			ctlp->d.hpaddr_base = NULL;
    365 			cmn_err(CE_WARN, "!cpu%d: failed to allocate HV "
    366 			    "traptrace buffer", cpuid);
    367 		} else {
    368 			ctlp->d.hlimit = HTRAP_TSIZE;
    369 			ctlp->d.hpaddr_base = va_to_pa(ctlp->d.hvaddr_base);
    370 		}
    371 	}
    372 }
    373 
    374 /*
    375  * This function enables or disables the hypervisor traptracing
    376  */
    377 void
    378 mach_htraptrace_configure(int cpuid)
    379 {
    380 	uint64_t ret;
    381 	uint64_t prev_buf, prev_bufsize;
    382 	uint64_t prev_enable;
    383 	uint64_t size;
    384 	TRAP_TRACE_CTL	*ctlp;
    385 
    386 	ctlp = &trap_trace_ctl[cpuid];
    387 	if (mach_htraptrace_enable) {
    388 		if ((ctlp->d.hvaddr_base != NULL) &&
    389 		    ((ctlp->d.hvaddr_base != htrap_tr0) ||
    390 		    (!htrap_tr0_inuse))) {
    391 			ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize);
    392 			if ((ret == H_EOK) && (prev_bufsize != 0)) {
    393 				cmn_err(CE_CONT,
    394 				    "!cpu%d: previous HV traptrace buffer of "
    395 				    "size 0x%lx at address 0x%lx", cpuid,
    396 				    prev_bufsize, prev_buf);
    397 			}
    398 
    399 			ret = hv_ttrace_buf_conf(ctlp->d.hpaddr_base,
    400 			    ctlp->d.hlimit /
    401 			    (sizeof (struct htrap_trace_record)), &size);
    402 			if (ret == H_EOK) {
    403 				ret = hv_ttrace_enable(\
    404 				    (uint64_t)TRAP_TENABLE_ALL, &prev_enable);
    405 				if (ret != H_EOK) {
    406 					cmn_err(CE_WARN,
    407 					    "!cpu%d: HV traptracing not "
    408 					    "enabled, ta: 0x%x returned error: "
    409 					    "%ld", cpuid, TTRACE_ENABLE, ret);
    410 				} else {
    411 					if (ctlp->d.hvaddr_base == htrap_tr0)
    412 						htrap_tr0_inuse = 1;
    413 				}
    414 			} else {
    415 				cmn_err(CE_WARN,
    416 				    "!cpu%d: HV traptrace buffer not "
    417 				    "configured, ta: 0x%x returned error: %ld",
    418 				    cpuid, TTRACE_BUF_CONF, ret);
    419 			}
    420 			/*
    421 			 * set hvaddr_base to NULL when traptrace buffer
    422 			 * registration fails
    423 			 */
    424 			if (ret != H_EOK) {
    425 				ctlp->d.hvaddr_base = NULL;
    426 				ctlp->d.hlimit = 0;
    427 				ctlp->d.hpaddr_base = NULL;
    428 			}
    429 		}
    430 	} else {
    431 		ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize);
    432 		if ((ret == H_EOK) && (prev_bufsize != 0)) {
    433 			ret = hv_ttrace_enable((uint64_t)TRAP_TDISABLE_ALL,
    434 			    &prev_enable);
    435 			if (ret == H_EOK) {
    436 				if (ctlp->d.hvaddr_base == htrap_tr0)
    437 					htrap_tr0_inuse = 0;
    438 				ctlp->d.hvaddr_base = NULL;
    439 				ctlp->d.hlimit = 0;
    440 				ctlp->d.hpaddr_base = NULL;
    441 			} else
    442 				cmn_err(CE_WARN,
    443 				    "!cpu%d: HV traptracing is not disabled, "
    444 				    "ta: 0x%x returned error: %ld",
    445 				    cpuid, TTRACE_ENABLE, ret);
    446 		}
    447 	}
    448 }
    449 
    450 /*
    451  * This function cleans up the hypervisor traptrace buffer
    452  */
    453 void
    454 mach_htraptrace_cleanup(int cpuid)
    455 {
    456 	if (mach_htraptrace_enable) {
    457 		TRAP_TRACE_CTL *ctlp;
    458 		caddr_t httrace_buf_va;
    459 
    460 		ASSERT(cpuid < max_ncpus);
    461 		ctlp = &trap_trace_ctl[cpuid];
    462 		httrace_buf_va = ctlp->d.hvaddr_base;
    463 		if (httrace_buf_va == htrap_tr0) {
    464 			bzero(httrace_buf_va, HTRAP_TSIZE);
    465 		} else if (httrace_buf_va != NULL) {
    466 			contig_mem_free(httrace_buf_va, HTRAP_TSIZE);
    467 		}
    468 		ctlp->d.hvaddr_base = NULL;
    469 		ctlp->d.hlimit = 0;
    470 		ctlp->d.hpaddr_base = NULL;
    471 	}
    472 }
    473 
    474 /*
    475  * Load any required machine class (sun4v) specific drivers.
    476  */
    477 void
    478 load_mach_drivers(void)
    479 {
    480 	/*
    481 	 * We don't want to load these LDOMs-specific
    482 	 * modules if domaining is not supported.  Also,
    483 	 * we must be able to run on non-LDOMs firmware.
    484 	 */
    485 	if (!domaining_supported())
    486 		return;
    487 
    488 	/*
    489 	 * Load the core domain services module
    490 	 */
    491 	if (modload("misc", "ds") == -1)
    492 		cmn_err(CE_NOTE, "!'ds' module failed to load");
    493 
    494 	/*
    495 	 * Load the rest of the domain services
    496 	 */
    497 	if (modload("misc", "fault_iso") == -1)
    498 		cmn_err(CE_NOTE, "!'fault_iso' module failed to load");
    499 
    500 	if (modload("misc", "platsvc") == -1)
    501 		cmn_err(CE_NOTE, "!'platsvc' module failed to load");
    502 
    503 	if (domaining_enabled() && modload("misc", "dr_cpu") == -1)
    504 		cmn_err(CE_NOTE, "!'dr_cpu' module failed to load");
    505 
    506 	if (modload("misc", "dr_io") == -1)
    507 		cmn_err(CE_NOTE, "!'dr_io' module failed to load");
    508 
    509 	if (modload("misc", "dr_mem") == -1)
    510 		cmn_err(CE_NOTE, "!'dr_mem' module failed to load");
    511 
    512 	/*
    513 	 * Attempt to attach any virtual device servers. These
    514 	 * drivers must be loaded at start of day so that they
    515 	 * can respond to any updates to the machine description.
    516 	 *
    517 	 * Since it is quite likely that a domain will not support
    518 	 * one or more of these servers, failures are ignored.
    519 	 */
    520 
    521 	/* virtual disk server */
    522 	(void) i_ddi_attach_hw_nodes("vds");
    523 
    524 	/* virtual network switch */
    525 	(void) i_ddi_attach_hw_nodes("vsw");
    526 
    527 	/* virtual console concentrator */
    528 	(void) i_ddi_attach_hw_nodes("vcc");
    529 }
    530