Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * Platform specific implementation code
     28  * Currently only suspend to RAM is supported (ACPI S3)
     29  */
     30 
     31 #define	SUNDDI_IMPL
     32 
     33 #include <sys/types.h>
     34 #include <sys/promif.h>
     35 #include <sys/prom_isa.h>
     36 #include <sys/prom_plat.h>
     37 #include <sys/cpuvar.h>
     38 #include <sys/pte.h>
     39 #include <vm/hat.h>
     40 #include <vm/page.h>
     41 #include <vm/as.h>
     42 #include <sys/cpr.h>
     43 #include <sys/kmem.h>
     44 #include <sys/clock.h>
     45 #include <sys/kmem.h>
     46 #include <sys/panic.h>
     47 #include <vm/seg_kmem.h>
     48 #include <sys/cpu_module.h>
     49 #include <sys/callb.h>
     50 #include <sys/machsystm.h>
     51 #include <sys/vmsystm.h>
     52 #include <sys/systm.h>
     53 #include <sys/archsystm.h>
     54 #include <sys/stack.h>
     55 #include <sys/fs/ufs_fs.h>
     56 #include <sys/memlist.h>
     57 #include <sys/bootconf.h>
     58 #include <sys/thread.h>
     59 #include <sys/x_call.h>
     60 #include <sys/smp_impldefs.h>
     61 #include <vm/vm_dep.h>
     62 #include <sys/psm.h>
     63 #include <sys/epm.h>
     64 #include <sys/cpr_wakecode.h>
     65 #include <sys/x86_archext.h>
     66 #include <sys/reboot.h>
     67 #include <sys/acpi/acpi.h>
     68 #include <sys/acpica.h>
     69 
     70 #define	AFMT	"%lx"
     71 
     72 extern int	flushes_require_xcalls;
     73 extern cpuset_t	cpu_ready_set;
     74 
     75 #if defined(__amd64)
     76 extern void	*wc_long_mode_64(void);
     77 #endif	/* __amd64 */
     78 extern int	tsc_gethrtime_enable;
     79 extern	void	i_cpr_start_cpu(void);
     80 
     81 ushort_t	cpr_mach_type = CPR_MACHTYPE_X86;
     82 void		(*cpr_start_cpu_func)(void) = i_cpr_start_cpu;
     83 
     84 static wc_cpu_t	*wc_other_cpus = NULL;
     85 static cpuset_t procset;
     86 
     87 static void
     88 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt);
     89 
     90 static int i_cpr_platform_alloc(psm_state_request_t *req);
     91 static void i_cpr_platform_free(psm_state_request_t *req);
     92 static int i_cpr_save_apic(psm_state_request_t *req);
     93 static int i_cpr_restore_apic(psm_state_request_t *req);
     94 static int wait_for_set(cpuset_t *set, int who);
     95 
     96 static	void i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu);
     97 void i_cpr_restore_stack(kthread_t *t, greg_t *save_stack);
     98 
     99 #ifdef STACK_GROWTH_DOWN
    100 #define	CPR_GET_STACK_START(t) ((t)->t_stkbase)
    101 #define	CPR_GET_STACK_END(t) ((t)->t_stk)
    102 #else
    103 #define	CPR_GET_STACK_START(t) ((t)->t_stk)
    104 #define	CPR_GET_STACK_END(t) ((t)->t_stkbase)
    105 #endif	/* STACK_GROWTH_DOWN */
    106 
    107 /*
    108  * restart paused slave cpus
    109  */
    110 void
    111 i_cpr_machdep_setup(void)
    112 {
    113 	if (ncpus > 1) {
    114 		CPR_DEBUG(CPR_DEBUG1, ("MP restarted...\n"));
    115 		mutex_enter(&cpu_lock);
    116 		start_cpus();
    117 		mutex_exit(&cpu_lock);
    118 	}
    119 }
    120 
    121 
    122 /*
    123  * Stop all interrupt activities in the system
    124  */
    125 void
    126 i_cpr_stop_intr(void)
    127 {
    128 	(void) spl7();
    129 }
    130 
    131 /*
    132  * Set machine up to take interrupts
    133  */
    134 void
    135 i_cpr_enable_intr(void)
    136 {
    137 	(void) spl0();
    138 }
    139 
    140 /*
    141  * Save miscellaneous information which needs to be written to the
    142  * state file.  This information is required to re-initialize
    143  * kernel/prom handshaking.
    144  */
    145 void
    146 i_cpr_save_machdep_info(void)
    147 {
    148 	int notcalled = 0;
    149 	ASSERT(notcalled);
    150 }
    151 
    152 
    153 void
    154 i_cpr_set_tbr(void)
    155 {
    156 }
    157 
    158 
    159 processorid_t
    160 i_cpr_bootcpuid(void)
    161 {
    162 	return (0);
    163 }
    164 
    165 /*
    166  * cpu0 should contain bootcpu info
    167  */
    168 cpu_t *
    169 i_cpr_bootcpu(void)
    170 {
    171 	ASSERT(MUTEX_HELD(&cpu_lock));
    172 
    173 	return (cpu_get(i_cpr_bootcpuid()));
    174 }
    175 
    176 /*
    177  *	Save context for the specified CPU
    178  */
    179 void *
    180 i_cpr_save_context(void *arg)
    181 {
    182 	long	index = (long)arg;
    183 	psm_state_request_t *papic_state;
    184 	int resuming;
    185 	int	ret;
    186 	wc_cpu_t	*wc_cpu = wc_other_cpus + index;
    187 
    188 	PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index))
    189 
    190 	ASSERT(index < NCPU);
    191 
    192 	papic_state = &(wc_cpu)->wc_apic_state;
    193 
    194 	ret = i_cpr_platform_alloc(papic_state);
    195 	ASSERT(ret == 0);
    196 
    197 	ret = i_cpr_save_apic(papic_state);
    198 	ASSERT(ret == 0);
    199 
    200 	i_cpr_save_stack(curthread, wc_cpu);
    201 
    202 	/*
    203 	 * wc_save_context returns twice, once when susending and
    204 	 * once when resuming,  wc_save_context() returns 0 when
    205 	 * suspending and non-zero upon resume
    206 	 */
    207 	resuming = (wc_save_context(wc_cpu) == 0);
    208 
    209 	/*
    210 	 * do NOT call any functions after this point, because doing so
    211 	 * will modify the stack that we are running on
    212 	 */
    213 
    214 	if (resuming) {
    215 
    216 		ret = i_cpr_restore_apic(papic_state);
    217 		ASSERT(ret == 0);
    218 
    219 		i_cpr_platform_free(papic_state);
    220 
    221 		/*
    222 		 * Enable interrupts on this cpu.
    223 		 * Do not bind interrupts to this CPU's local APIC until
    224 		 * the CPU is ready to receive interrupts.
    225 		 */
    226 		ASSERT(CPU->cpu_id != i_cpr_bootcpuid());
    227 		mutex_enter(&cpu_lock);
    228 		cpu_enable_intr(CPU);
    229 		mutex_exit(&cpu_lock);
    230 
    231 		/*
    232 		 * Setting the bit in cpu_ready_set must be the last operation
    233 		 * in processor initialization; the boot CPU will continue to
    234 		 * boot once it sees this bit set for all active CPUs.
    235 		 */
    236 		CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id);
    237 
    238 		PMD(PMD_SX,
    239 		    ("i_cpr_save_context() resuming cpu %d in cpu_ready_set\n",
    240 		    CPU->cpu_id))
    241 	} else {
    242 		/*
    243 		 * Disable interrupts on this CPU so that PSM knows not to bind
    244 		 * interrupts here on resume until the CPU has executed
    245 		 * cpu_enable_intr() (above) in the resume path.
    246 		 * We explicitly do not grab cpu_lock here because at this point
    247 		 * in the suspend process, the boot cpu owns cpu_lock and all
    248 		 * other cpus are also executing in the pause thread (only
    249 		 * modifying their respective CPU structure).
    250 		 */
    251 		(void) cpu_disable_intr(CPU);
    252 	}
    253 
    254 	PMD(PMD_SX, ("i_cpr_save_context: wc_save_context returns %d\n",
    255 	    resuming))
    256 
    257 	return (NULL);
    258 }
    259 
    260 static ushort_t *warm_reset_vector = NULL;
    261 
    262 static ushort_t *
    263 map_warm_reset_vector()
    264 {
    265 	/*LINTED*/
    266 	if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
    267 	    sizeof (ushort_t *), PROT_READ|PROT_WRITE)))
    268 		return (NULL);
    269 
    270 	/*
    271 	 * setup secondary cpu bios boot up vector
    272 	 */
    273 	*warm_reset_vector = (ushort_t)((caddr_t)
    274 	    /*LINTED*/
    275 	    ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
    276 	    + ((ulong_t)rm_platter_va & 0xf));
    277 	warm_reset_vector++;
    278 	*warm_reset_vector = (ushort_t)(rm_platter_pa >> 4);
    279 
    280 	--warm_reset_vector;
    281 	return (warm_reset_vector);
    282 }
    283 
    284 void
    285 i_cpr_pre_resume_cpus()
    286 {
    287 	/*
    288 	 * this is a cut down version of start_other_cpus()
    289 	 * just do the initialization to wake the other cpus
    290 	 */
    291 	unsigned who;
    292 	int boot_cpuid = i_cpr_bootcpuid();
    293 	uint32_t		code_length = 0;
    294 	caddr_t			wakevirt = rm_platter_va;
    295 	/*LINTED*/
    296 	wakecode_t		*wp = (wakecode_t *)wakevirt;
    297 	char *str = "i_cpr_pre_resume_cpus";
    298 	extern int get_tsc_ready();
    299 	int err;
    300 
    301 	/*LINTED*/
    302 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
    303 
    304 	/*
    305 	 * If startup wasn't able to find a page under 1M, we cannot
    306 	 * proceed.
    307 	 */
    308 	if (rm_platter_va == 0) {
    309 		cmn_err(CE_WARN, "Cannot suspend the system because no "
    310 		    "memory below 1M could be found for processor startup");
    311 		return;
    312 	}
    313 
    314 	/*
    315 	 * Copy the real mode code at "real_mode_start" to the
    316 	 * page at rm_platter_va.
    317 	 */
    318 	warm_reset_vector = map_warm_reset_vector();
    319 	if (warm_reset_vector == NULL) {
    320 		PMD(PMD_SX, ("i_cpr_pre_resume_cpus() returning #2\n"))
    321 		return;
    322 	}
    323 
    324 	flushes_require_xcalls = 1;
    325 
    326 	/*
    327 	 * We lock our affinity to the master CPU to ensure that all slave CPUs
    328 	 * do their TSC syncs with the same CPU.
    329 	 */
    330 
    331 	affinity_set(CPU_CURRENT);
    332 
    333 	/*
    334 	 * Mark the boot cpu as being ready and in the procset, since we are
    335 	 * running on that cpu.
    336 	 */
    337 	CPUSET_ONLY(cpu_ready_set, boot_cpuid);
    338 	CPUSET_ONLY(procset, boot_cpuid);
    339 
    340 	for (who = 0; who < ncpus; who++) {
    341 
    342 		wc_cpu_t	*cpup = wc_other_cpus + who;
    343 		wc_desctbr_t	gdt;
    344 
    345 		if (who == boot_cpuid)
    346 			continue;
    347 
    348 		if (!CPU_IN_SET(mp_cpus, who))
    349 			continue;
    350 
    351 		PMD(PMD_SX, ("%s() waking up %d cpu\n", str, who))
    352 
    353 		bcopy(cpup, &(wp->wc_cpu), sizeof (wc_cpu_t));
    354 
    355 		gdt.base = cpup->wc_gdt_base;
    356 		gdt.limit = cpup->wc_gdt_limit;
    357 
    358 #if defined(__amd64)
    359 		code_length = (uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start;
    360 #else
    361 		code_length = 0;
    362 #endif
    363 
    364 		init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt);
    365 
    366 		if ((err = mach_cpuid_start(who, rm_platter_va)) != 0) {
    367 			cmn_err(CE_WARN, "cpu%d: failed to start during "
    368 			    "suspend/resume error %d", who, err);
    369 			continue;
    370 		}
    371 
    372 		PMD(PMD_SX, ("%s() #1 waiting for %d in procset\n", str, who))
    373 
    374 		if (!wait_for_set(&procset, who))
    375 			continue;
    376 
    377 		PMD(PMD_SX, ("%s() %d cpu started\n", str, who))
    378 
    379 		PMD(PMD_SX, ("%s() tsc_ready = %d\n", str, get_tsc_ready()))
    380 
    381 		if (tsc_gethrtime_enable) {
    382 			PMD(PMD_SX, ("%s() calling tsc_sync_master\n", str))
    383 			tsc_sync_master(who);
    384 		}
    385 
    386 		PMD(PMD_SX, ("%s() waiting for %d in cpu_ready_set\n", str,
    387 		    who))
    388 		/*
    389 		 * Wait for cpu to declare that it is ready, we want the
    390 		 * cpus to start serially instead of in parallel, so that
    391 		 * they do not contend with each other in wc_rm_start()
    392 		 */
    393 		if (!wait_for_set(&cpu_ready_set, who))
    394 			continue;
    395 
    396 		/*
    397 		 * do not need to re-initialize dtrace using dtrace_cpu_init
    398 		 * function
    399 		 */
    400 		PMD(PMD_SX, ("%s() cpu %d now ready\n", str, who))
    401 	}
    402 
    403 	affinity_clear();
    404 
    405 	PMD(PMD_SX, ("%s() all cpus now ready\n", str))
    406 
    407 }
    408 
    409 static void
    410 unmap_warm_reset_vector(ushort_t *warm_reset_vector)
    411 {
    412 	psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *));
    413 }
    414 
    415 /*
    416  * We need to setup a 1:1 (virtual to physical) mapping for the
    417  * page containing the wakeup code.
    418  */
    419 static struct as *save_as;	/* when switching to kas */
    420 
    421 static void
    422 unmap_wakeaddr_1to1(uint64_t wakephys)
    423 {
    424 	uintptr_t	wp = (uintptr_t)wakephys;
    425 	hat_setup(save_as->a_hat, 0);	/* switch back from kernel hat */
    426 	hat_unload(kas.a_hat, (caddr_t)wp, PAGESIZE, HAT_UNLOAD);
    427 }
    428 
    429 void
    430 i_cpr_post_resume_cpus()
    431 {
    432 	uint64_t	wakephys = rm_platter_pa;
    433 
    434 	if (warm_reset_vector != NULL)
    435 		unmap_warm_reset_vector(warm_reset_vector);
    436 
    437 	hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
    438 	    HAT_UNLOAD);
    439 
    440 	/*
    441 	 * cmi_post_mpstartup() is only required upon boot not upon
    442 	 * resume from RAM
    443 	 */
    444 
    445 	PT(PT_UNDO1to1);
    446 	/* Tear down 1:1 mapping for wakeup code */
    447 	unmap_wakeaddr_1to1(wakephys);
    448 }
    449 
    450 /* ARGSUSED */
    451 void
    452 i_cpr_handle_xc(int flag)
    453 {
    454 }
    455 
    456 int
    457 i_cpr_reusable_supported(void)
    458 {
    459 	return (0);
    460 }
    461 static void
    462 map_wakeaddr_1to1(uint64_t wakephys)
    463 {
    464 	uintptr_t	wp = (uintptr_t)wakephys;
    465 	hat_devload(kas.a_hat, (caddr_t)wp, PAGESIZE, btop(wakephys),
    466 	    (PROT_READ|PROT_WRITE|PROT_EXEC|HAT_STORECACHING_OK|HAT_NOSYNC),
    467 	    HAT_LOAD);
    468 	save_as = curthread->t_procp->p_as;
    469 	hat_setup(kas.a_hat, 0);	/* switch to kernel-only hat */
    470 }
    471 
    472 
    473 void
    474 prt_other_cpus()
    475 {
    476 	int	who;
    477 
    478 	if (ncpus == 1) {
    479 		PMD(PMD_SX, ("prt_other_cpus() other cpu table empty for "
    480 		    "uniprocessor machine\n"))
    481 		return;
    482 	}
    483 
    484 	for (who = 0; who < ncpus; who++) {
    485 
    486 		wc_cpu_t	*cpup = wc_other_cpus + who;
    487 
    488 		PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, "
    489 		    "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase="
    490 		    AFMT ", sp=%lx\n", who,
    491 		    (void *)cpup->wc_gdt_base, cpup->wc_gdt_limit,
    492 		    (void *)cpup->wc_idt_base, cpup->wc_idt_limit,
    493 		    (long)cpup->wc_ldt, (long)cpup->wc_tr,
    494 		    (long)cpup->wc_kgsbase, (long)cpup->wc_rsp))
    495 	}
    496 }
    497 
    498 /*
    499  * Power down the system.
    500  */
    501 int
    502 i_cpr_power_down(int sleeptype)
    503 {
    504 	caddr_t		wakevirt = rm_platter_va;
    505 	uint64_t	wakephys = rm_platter_pa;
    506 	ulong_t		saved_intr;
    507 	uint32_t	code_length = 0;
    508 	wc_desctbr_t	gdt;
    509 	/*LINTED*/
    510 	wakecode_t	*wp = (wakecode_t *)wakevirt;
    511 	/*LINTED*/
    512 	rm_platter_t	*wcpp = (rm_platter_t *)wakevirt;
    513 	wc_cpu_t	*cpup = &(wp->wc_cpu);
    514 	dev_info_t	*ppm;
    515 	int		ret = 0;
    516 	power_req_t	power_req;
    517 	char *str =	"i_cpr_power_down";
    518 #if defined(__amd64)
    519 	/*LINTED*/
    520 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
    521 #endif
    522 	extern int	cpr_suspend_succeeded;
    523 	extern void	kernel_wc_code();
    524 
    525 	ASSERT(sleeptype == CPR_TORAM);
    526 	ASSERT(CPU->cpu_id == 0);
    527 
    528 	if ((ppm = PPM(ddi_root_node())) == NULL) {
    529 		PMD(PMD_SX, ("%s: root node not claimed\n", str))
    530 		return (ENOTTY);
    531 	}
    532 
    533 	PMD(PMD_SX, ("Entering %s()\n", str))
    534 
    535 	PT(PT_IC);
    536 	saved_intr = intr_clear();
    537 
    538 	PT(PT_1to1);
    539 	/* Setup 1:1 mapping for wakeup code */
    540 	map_wakeaddr_1to1(wakephys);
    541 
    542 	PMD(PMD_SX, ("ncpus=%d\n", ncpus))
    543 
    544 	PMD(PMD_SX, ("wc_rm_end - wc_rm_start=%lx WC_CODESIZE=%x\n",
    545 	    ((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)), WC_CODESIZE))
    546 
    547 	PMD(PMD_SX, ("wakevirt=%p, wakephys=%x\n",
    548 	    (void *)wakevirt, (uint_t)wakephys))
    549 
    550 	ASSERT(((size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start)) <
    551 	    WC_CODESIZE);
    552 
    553 	bzero(wakevirt, PAGESIZE);
    554 
    555 	/* Copy code to rm_platter */
    556 	bcopy((caddr_t)wc_rm_start, wakevirt,
    557 	    (size_t)((uint_t)wc_rm_end - (uint_t)wc_rm_start));
    558 
    559 	prt_other_cpus();
    560 
    561 #if defined(__amd64)
    562 
    563 	PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
    564 	    (ulong_t)real_mode_platter->rm_cr4, (ulong_t)getcr4()))
    565 	PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
    566 	    (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
    567 
    568 	real_mode_platter->rm_cr4 = getcr4();
    569 	real_mode_platter->rm_pdbr = getcr3();
    570 
    571 	rmp_gdt_init(real_mode_platter);
    572 
    573 	/*
    574 	 * Since the CPU needs to jump to protected mode using an identity
    575 	 * mapped address, we need to calculate it here.
    576 	 */
    577 	real_mode_platter->rm_longmode64_addr = rm_platter_pa +
    578 	    ((uint32_t)wc_long_mode_64 - (uint32_t)wc_rm_start);
    579 
    580 	PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
    581 	    (ulong_t)real_mode_platter->rm_cr4, getcr4()))
    582 
    583 	PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
    584 	    (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
    585 
    586 	PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
    587 	    (ulong_t)real_mode_platter->rm_longmode64_addr))
    588 
    589 #endif
    590 
    591 	PT(PT_SC);
    592 	if (wc_save_context(cpup)) {
    593 
    594 		ret = i_cpr_platform_alloc(&(wc_other_cpus->wc_apic_state));
    595 		if (ret != 0)
    596 			return (ret);
    597 
    598 		ret = i_cpr_save_apic(&(wc_other_cpus->wc_apic_state));
    599 		PMD(PMD_SX, ("%s: i_cpr_save_apic() returned %d\n", str, ret))
    600 		if (ret != 0)
    601 			return (ret);
    602 
    603 		PMD(PMD_SX, ("wakephys=%x, kernel_wc_code=%p\n",
    604 		    (uint_t)wakephys, (void *)&kernel_wc_code))
    605 		PMD(PMD_SX, ("virtaddr=%lx, retaddr=%lx\n",
    606 		    (long)cpup->wc_virtaddr, (long)cpup->wc_retaddr))
    607 		PMD(PMD_SX, ("ebx=%x, edi=%x, esi=%x, ebp=%x, esp=%x\n",
    608 		    cpup->wc_ebx, cpup->wc_edi, cpup->wc_esi, cpup->wc_ebp,
    609 		    cpup->wc_esp))
    610 		PMD(PMD_SX, ("cr0=%lx, cr3=%lx, cr4=%lx\n",
    611 		    (long)cpup->wc_cr0, (long)cpup->wc_cr3,
    612 		    (long)cpup->wc_cr4))
    613 		PMD(PMD_SX, ("cs=%x, ds=%x, es=%x, ss=%x, fs=%lx, gs=%lx, "
    614 		    "flgs=%lx\n", cpup->wc_cs, cpup->wc_ds, cpup->wc_es,
    615 		    cpup->wc_ss, (long)cpup->wc_fs, (long)cpup->wc_gs,
    616 		    (long)cpup->wc_eflags))
    617 
    618 		PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
    619 		    "kgbase=%lx\n", (void *)cpup->wc_gdt_base,
    620 		    cpup->wc_gdt_limit, (void *)cpup->wc_idt_base,
    621 		    cpup->wc_idt_limit, (long)cpup->wc_ldt,
    622 		    (long)cpup->wc_tr, (long)cpup->wc_kgsbase))
    623 
    624 		gdt.base = cpup->wc_gdt_base;
    625 		gdt.limit = cpup->wc_gdt_limit;
    626 
    627 #if defined(__amd64)
    628 		code_length = (uint32_t)wc_long_mode_64 -
    629 		    (uint32_t)wc_rm_start;
    630 #else
    631 		code_length = 0;
    632 #endif
    633 
    634 		init_real_mode_platter(0, code_length, cpup->wc_cr4, gdt);
    635 
    636 #if defined(__amd64)
    637 		PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
    638 		    (ulong_t)wcpp->rm_cr4, getcr4()))
    639 
    640 		PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
    641 		    (ulong_t)wcpp->rm_pdbr, getcr3()))
    642 
    643 		PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
    644 		    (ulong_t)wcpp->rm_longmode64_addr))
    645 
    646 		PMD(PMD_SX,
    647 		    ("real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64]=%lx\n",
    648 		    (ulong_t)wcpp->rm_temp_gdt[TEMPGDT_KCODE64]))
    649 #endif
    650 
    651 		PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
    652 		    "kgsbase=%lx\n", (void *)wcpp->rm_gdt_base,
    653 		    wcpp->rm_gdt_lim, (void *)wcpp->rm_idt_base,
    654 		    wcpp->rm_idt_lim, (long)cpup->wc_ldt, (long)cpup->wc_tr,
    655 		    (long)cpup->wc_kgsbase))
    656 
    657 		power_req.request_type = PMR_PPM_ENTER_SX;
    658 		power_req.req.ppm_power_enter_sx_req.sx_state = S3;
    659 		power_req.req.ppm_power_enter_sx_req.test_point =
    660 		    cpr_test_point;
    661 		power_req.req.ppm_power_enter_sx_req.wakephys = wakephys;
    662 
    663 		PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_ENTER_SX\n", str))
    664 		PT(PT_PPMCTLOP);
    665 		(void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
    666 		    &power_req, &ret);
    667 		PMD(PMD_SX, ("%s: returns %d\n", str, ret))
    668 
    669 		/*
    670 		 * If it works, we get control back to the else branch below
    671 		 * If we get control back here, it didn't work.
    672 		 * XXX return EINVAL here?
    673 		 */
    674 
    675 		unmap_wakeaddr_1to1(wakephys);
    676 		intr_restore(saved_intr);
    677 
    678 		return (ret);
    679 	} else {
    680 		cpr_suspend_succeeded = 1;
    681 
    682 		power_req.request_type = PMR_PPM_EXIT_SX;
    683 		power_req.req.ppm_power_enter_sx_req.sx_state = S3;
    684 
    685 		PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_EXIT_SX\n", str))
    686 		PT(PT_PPMCTLOP);
    687 		(void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
    688 		    &power_req, &ret);
    689 		PMD(PMD_SX, ("%s: returns %d\n", str, ret))
    690 
    691 		ret = i_cpr_restore_apic(&(wc_other_cpus->wc_apic_state));
    692 		/*
    693 		 * the restore should never fail, if the saved suceeded
    694 		 */
    695 		ASSERT(ret == 0);
    696 
    697 		i_cpr_platform_free(&(wc_other_cpus->wc_apic_state));
    698 
    699 		/*
    700 		 * Enable interrupts on boot cpu.
    701 		 */
    702 		ASSERT(CPU->cpu_id == i_cpr_bootcpuid());
    703 		mutex_enter(&cpu_lock);
    704 		cpu_enable_intr(CPU);
    705 		mutex_exit(&cpu_lock);
    706 
    707 		PT(PT_INTRRESTORE);
    708 		intr_restore(saved_intr);
    709 		PT(PT_CPU);
    710 
    711 		return (ret);
    712 	}
    713 }
    714 
    715 /*
    716  * Stop all other cpu's before halting or rebooting. We pause the cpu's
    717  * instead of sending a cross call.
    718  * Stolen from sun4/os/mp_states.c
    719  */
    720 
    721 static int cpu_are_paused;	/* sic */
    722 
    723 void
    724 i_cpr_stop_other_cpus(void)
    725 {
    726 	mutex_enter(&cpu_lock);
    727 	if (cpu_are_paused) {
    728 		mutex_exit(&cpu_lock);
    729 		return;
    730 	}
    731 	pause_cpus(NULL);
    732 	cpu_are_paused = 1;
    733 
    734 	mutex_exit(&cpu_lock);
    735 }
    736 
    737 int
    738 i_cpr_is_supported(int sleeptype)
    739 {
    740 	extern int cpr_supported_override;
    741 	extern int cpr_platform_enable;
    742 	extern int pm_S3_enabled;
    743 
    744 	if (sleeptype != CPR_TORAM)
    745 		return (0);
    746 
    747 	/*
    748 	 * The next statement tests if a specific platform has turned off
    749 	 * cpr support.
    750 	 */
    751 	if (cpr_supported_override)
    752 		return (0);
    753 
    754 	/*
    755 	 * If a platform has specifically turned on cpr support ...
    756 	 */
    757 	if (cpr_platform_enable)
    758 		return (1);
    759 
    760 	return (pm_S3_enabled);
    761 }
    762 
    763 void
    764 i_cpr_bitmap_cleanup(void)
    765 {
    766 }
    767 
    768 void
    769 i_cpr_free_memory_resources(void)
    770 {
    771 }
    772 
    773 /*
    774  * Needed only for S3 so far
    775  */
    776 static int
    777 i_cpr_platform_alloc(psm_state_request_t *req)
    778 {
    779 #ifdef DEBUG
    780 	char	*str = "i_cpr_platform_alloc";
    781 #endif
    782 
    783 	PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
    784 
    785 	if (psm_state == NULL) {
    786 		PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
    787 		return (0);
    788 	}
    789 
    790 	req->psr_cmd = PSM_STATE_ALLOC;
    791 	return ((*psm_state)(req));
    792 }
    793 
    794 /*
    795  * Needed only for S3 so far
    796  */
    797 static void
    798 i_cpr_platform_free(psm_state_request_t *req)
    799 {
    800 #ifdef DEBUG
    801 	char	*str = "i_cpr_platform_free";
    802 #endif
    803 
    804 	PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
    805 
    806 	if (psm_state == NULL) {
    807 		PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
    808 		return;
    809 	}
    810 
    811 	req->psr_cmd = PSM_STATE_FREE;
    812 	(void) (*psm_state)(req);
    813 }
    814 
    815 static int
    816 i_cpr_save_apic(psm_state_request_t *req)
    817 {
    818 #ifdef DEBUG
    819 	char	*str = "i_cpr_save_apic";
    820 #endif
    821 
    822 	if (psm_state == NULL) {
    823 		PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
    824 		return (0);
    825 	}
    826 
    827 	req->psr_cmd = PSM_STATE_SAVE;
    828 	return ((*psm_state)(req));
    829 }
    830 
    831 static int
    832 i_cpr_restore_apic(psm_state_request_t *req)
    833 {
    834 #ifdef DEBUG
    835 	char	*str = "i_cpr_restore_apic";
    836 #endif
    837 
    838 	if (psm_state == NULL) {
    839 		PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
    840 		return (0);
    841 	}
    842 
    843 	req->psr_cmd = PSM_STATE_RESTORE;
    844 	return ((*psm_state)(req));
    845 }
    846 
    847 
    848 /* stop lint complaining about offset not being used in 32bit mode */
    849 #if !defined(__amd64)
    850 /*ARGSUSED*/
    851 #endif
    852 static void
    853 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt)
    854 {
    855 	/*LINTED*/
    856 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
    857 
    858 	/*
    859 	 * Fill up the real mode platter to make it easy for real mode code to
    860 	 * kick it off. This area should really be one passed by boot to kernel
    861 	 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
    862 	 * have identical physical and virtual address in paged mode.
    863 	 */
    864 
    865 	real_mode_platter->rm_pdbr = getcr3();
    866 	real_mode_platter->rm_cpu = cpun;
    867 	real_mode_platter->rm_cr4 = cr4;
    868 
    869 	real_mode_platter->rm_gdt_base = gdt.base;
    870 	real_mode_platter->rm_gdt_lim = gdt.limit;
    871 
    872 #if defined(__amd64)
    873 	real_mode_platter->rm_x86feature = x86_feature;
    874 
    875 	if (getcr3() > 0xffffffffUL)
    876 		panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
    877 		    "located above 4G in physical memory (@ 0x%llx).",
    878 		    (unsigned long long)getcr3());
    879 
    880 	/*
    881 	 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
    882 	 * by code in real_mode_start():
    883 	 *
    884 	 * GDT[0]:  NULL selector
    885 	 * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
    886 	 *
    887 	 * Clear the IDT as interrupts will be off and a limit of 0 will cause
    888 	 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
    889 	 * a course of action as any other, though it may cause the entire
    890 	 * platform to reset in some cases...
    891 	 */
    892 	real_mode_platter->rm_temp_gdt[0] = 0ULL;
    893 	real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
    894 
    895 	real_mode_platter->rm_temp_gdt_lim = (ushort_t)
    896 	    (sizeof (real_mode_platter->rm_temp_gdt) - 1);
    897 	real_mode_platter->rm_temp_gdt_base = rm_platter_pa +
    898 	    (uint32_t)(&((rm_platter_t *)0)->rm_temp_gdt);
    899 
    900 	real_mode_platter->rm_temp_idt_lim = 0;
    901 	real_mode_platter->rm_temp_idt_base = 0;
    902 
    903 	/*
    904 	 * Since the CPU needs to jump to protected mode using an identity
    905 	 * mapped address, we need to calculate it here.
    906 	 */
    907 	real_mode_platter->rm_longmode64_addr = rm_platter_pa + offset;
    908 #endif	/* __amd64 */
    909 
    910 	/* return; */
    911 }
    912 
    913 void
    914 i_cpr_start_cpu(void)
    915 {
    916 
    917 	struct cpu *cp = CPU;
    918 
    919 	char *str = "i_cpr_start_cpu";
    920 	extern void init_cpu_syscall(struct cpu *cp);
    921 
    922 	PMD(PMD_SX, ("%s() called\n", str))
    923 
    924 	PMD(PMD_SX, ("%s() #0 cp->cpu_base_spl %d\n", str,
    925 	    cp->cpu_base_spl))
    926 
    927 	mutex_enter(&cpu_lock);
    928 	if (cp == i_cpr_bootcpu()) {
    929 		mutex_exit(&cpu_lock);
    930 		PMD(PMD_SX,
    931 		    ("%s() called on bootcpu nothing to do!\n", str))
    932 		return;
    933 	}
    934 	mutex_exit(&cpu_lock);
    935 
    936 	/*
    937 	 * We need to Sync PAT with cpu0's PAT. We have to do
    938 	 * this with interrupts disabled.
    939 	 */
    940 	if (x86_feature & X86_PAT)
    941 		pat_sync();
    942 
    943 	/*
    944 	 * Initialize this CPU's syscall handlers
    945 	 */
    946 	init_cpu_syscall(cp);
    947 
    948 	PMD(PMD_SX, ("%s() #1 cp->cpu_base_spl %d\n", str, cp->cpu_base_spl))
    949 
    950 	/*
    951 	 * Do not need to call cpuid_pass2(), cpuid_pass3(), cpuid_pass4() or
    952 	 * init_cpu_info(), since the work that they do is only needed to
    953 	 * be done once at boot time
    954 	 */
    955 
    956 
    957 	mutex_enter(&cpu_lock);
    958 	CPUSET_ADD(procset, cp->cpu_id);
    959 	mutex_exit(&cpu_lock);
    960 
    961 	PMD(PMD_SX, ("%s() #2 cp->cpu_base_spl %d\n", str,
    962 	    cp->cpu_base_spl))
    963 
    964 	if (tsc_gethrtime_enable) {
    965 		PMD(PMD_SX, ("%s() calling tsc_sync_slave\n", str))
    966 		tsc_sync_slave();
    967 	}
    968 
    969 	PMD(PMD_SX, ("%s() cp->cpu_id %d, cp->cpu_intr_actv %d\n", str,
    970 	    cp->cpu_id, cp->cpu_intr_actv))
    971 	PMD(PMD_SX, ("%s() #3 cp->cpu_base_spl %d\n", str,
    972 	    cp->cpu_base_spl))
    973 
    974 	(void) spl0();		/* enable interrupts */
    975 
    976 	PMD(PMD_SX, ("%s() #4 cp->cpu_base_spl %d\n", str,
    977 	    cp->cpu_base_spl))
    978 
    979 	/*
    980 	 * Set up the CPU module for this CPU.  This can't be done before
    981 	 * this CPU is made CPU_READY, because we may (in heterogeneous systems)
    982 	 * need to go load another CPU module.  The act of attempting to load
    983 	 * a module may trigger a cross-call, which will ASSERT unless this
    984 	 * cpu is CPU_READY.
    985 	 */
    986 
    987 	/*
    988 	 * cmi already been init'd (during boot), so do not need to do it again
    989 	 */
    990 #ifdef PM_REINITMCAONRESUME
    991 	if (x86_feature & X86_MCA)
    992 		cmi_mca_init();
    993 #endif
    994 
    995 	PMD(PMD_SX, ("%s() returning\n", str))
    996 
    997 	/* return; */
    998 }
    999 
   1000 void
   1001 i_cpr_alloc_cpus(void)
   1002 {
   1003 	char *str = "i_cpr_alloc_cpus";
   1004 
   1005 	PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
   1006 	/*
   1007 	 * we allocate this only when we actually need it to save on
   1008 	 * kernel memory
   1009 	 */
   1010 
   1011 	if (wc_other_cpus == NULL) {
   1012 		wc_other_cpus = kmem_zalloc(ncpus * sizeof (wc_cpu_t),
   1013 		    KM_SLEEP);
   1014 	}
   1015 
   1016 }
   1017 
   1018 void
   1019 i_cpr_free_cpus(void)
   1020 {
   1021 	int index;
   1022 	wc_cpu_t *wc_cpu;
   1023 
   1024 	if (wc_other_cpus != NULL) {
   1025 		for (index = 0; index < ncpus; index++) {
   1026 			wc_cpu = wc_other_cpus + index;
   1027 			if (wc_cpu->wc_saved_stack != NULL) {
   1028 				kmem_free(wc_cpu->wc_saved_stack,
   1029 				    wc_cpu->wc_saved_stack_size);
   1030 			}
   1031 		}
   1032 
   1033 		kmem_free((void *) wc_other_cpus, ncpus * sizeof (wc_cpu_t));
   1034 		wc_other_cpus = NULL;
   1035 	}
   1036 }
   1037 
   1038 /*
   1039  * wrapper for acpica_ddi_save_resources()
   1040  */
   1041 void
   1042 i_cpr_save_configuration(dev_info_t *dip)
   1043 {
   1044 	acpica_ddi_save_resources(dip);
   1045 }
   1046 
   1047 /*
   1048  * wrapper for acpica_ddi_restore_resources()
   1049  */
   1050 void
   1051 i_cpr_restore_configuration(dev_info_t *dip)
   1052 {
   1053 	acpica_ddi_restore_resources(dip);
   1054 }
   1055 
   1056 static int
   1057 wait_for_set(cpuset_t *set, int who)
   1058 {
   1059 	int delays;
   1060 	char *str = "wait_for_set";
   1061 
   1062 	for (delays = 0; !CPU_IN_SET(*set, who); delays++) {
   1063 		if (delays == 500) {
   1064 			/*
   1065 			 * After five seconds, things are probably
   1066 			 * looking a bit bleak - explain the hang.
   1067 			 */
   1068 			cmn_err(CE_NOTE, "cpu%d: started, "
   1069 			    "but not running in the kernel yet", who);
   1070 			PMD(PMD_SX, ("%s() %d cpu started "
   1071 			    "but not running in the kernel yet\n",
   1072 			    str, who))
   1073 		} else if (delays > 2000) {
   1074 			/*
   1075 			 * We waited at least 20 seconds, bail ..
   1076 			 */
   1077 			cmn_err(CE_WARN, "cpu%d: timed out", who);
   1078 			PMD(PMD_SX, ("%s() %d cpu timed out\n",
   1079 			    str, who))
   1080 			return (0);
   1081 		}
   1082 
   1083 		/*
   1084 		 * wait at least 10ms, then check again..
   1085 		 */
   1086 		drv_usecwait(10000);
   1087 	}
   1088 
   1089 	return (1);
   1090 }
   1091 
   1092 static	void
   1093 i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu)
   1094 {
   1095 	size_t	stack_size;	/* size of stack */
   1096 	caddr_t	start = CPR_GET_STACK_START(t);	/* stack start */
   1097 	caddr_t	end = CPR_GET_STACK_END(t);	/* stack end  */
   1098 
   1099 	stack_size = (size_t)end - (size_t)start;
   1100 
   1101 	if (wc_cpu->wc_saved_stack_size < stack_size) {
   1102 		if (wc_cpu->wc_saved_stack != NULL) {
   1103 			kmem_free(wc_cpu->wc_saved_stack,
   1104 			    wc_cpu->wc_saved_stack_size);
   1105 		}
   1106 		wc_cpu->wc_saved_stack = kmem_zalloc(stack_size, KM_SLEEP);
   1107 		wc_cpu->wc_saved_stack_size = stack_size;
   1108 	}
   1109 
   1110 	bcopy(start, wc_cpu->wc_saved_stack, stack_size);
   1111 }
   1112 
   1113 void
   1114 i_cpr_restore_stack(kthread_t *t, greg_t *save_stack)
   1115 {
   1116 	size_t	stack_size;	/* size of stack */
   1117 	caddr_t	start = CPR_GET_STACK_START(t);	/* stack start */
   1118 	caddr_t	end = CPR_GET_STACK_END(t);	/* stack end  */
   1119 
   1120 	stack_size = (size_t)end - (size_t)start;
   1121 
   1122 	bcopy(save_stack, start, stack_size);
   1123 }
   1124