Home | History | Annotate | Download | only in cpr
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 /*
     29  * This module contains the guts of checkpoint-resume mechanism.
     30  * All code in this module is platform independent.
     31  */
     32 
     33 #include <sys/types.h>
     34 #include <sys/errno.h>
     35 #include <sys/callb.h>
     36 #include <sys/processor.h>
     37 #include <sys/machsystm.h>
     38 #include <sys/clock.h>
     39 #include <sys/vfs.h>
     40 #include <sys/kmem.h>
     41 #include <nfs/lm.h>
     42 #include <sys/systm.h>
     43 #include <sys/cpr.h>
     44 #include <sys/bootconf.h>
     45 #include <sys/cyclic.h>
     46 #include <sys/filio.h>
     47 #include <sys/fs/ufs_filio.h>
     48 #include <sys/epm.h>
     49 #include <sys/modctl.h>
     50 #include <sys/reboot.h>
     51 #include <sys/kdi.h>
     52 #include <sys/promif.h>
     53 #include <sys/srn.h>
     54 #include <sys/cpr_impl.h>
     55 
     56 #define	PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm)
     57 
     58 extern struct cpr_terminator cpr_term;
     59 
     60 extern int cpr_alloc_statefile(int);
     61 extern void cpr_start_kernel_threads(void);
     62 extern void cpr_abbreviate_devpath(char *, char *);
     63 extern void cpr_convert_promtime(cpr_time_t *);
     64 extern void cpr_send_notice(void);
     65 extern void cpr_set_bitmap_size(void);
     66 extern void cpr_stat_init();
     67 extern void cpr_statef_close(void);
     68 extern void flush_windows(void);
     69 extern void (*srn_signal)(int, int);
     70 extern void init_cpu_syscall(struct cpu *);
     71 extern void i_cpr_pre_resume_cpus();
     72 extern void i_cpr_post_resume_cpus();
     73 extern int cpr_is_ufs(struct vfs *);
     74 
     75 extern int pm_powering_down;
     76 extern kmutex_t srn_clone_lock;
     77 extern int srn_inuse;
     78 
     79 static int cpr_suspend(int);
     80 static int cpr_resume(int);
     81 static void cpr_suspend_init(int);
     82 #if defined(__x86)
     83 static int cpr_suspend_cpus(void);
     84 static void cpr_resume_cpus(void);
     85 #endif
     86 static int cpr_all_online(void);
     87 static void cpr_restore_offline(void);
     88 
     89 cpr_time_t wholecycle_tv;
     90 int cpr_suspend_succeeded;
     91 pfn_t curthreadpfn;
     92 int curthreadremapped;
     93 
     94 extern cpuset_t cpu_ready_set;
     95 extern void *(*cpu_pause_func)(void *);
     96 
     97 extern processorid_t i_cpr_bootcpuid(void);
     98 extern cpu_t *i_cpr_bootcpu(void);
     99 extern void tsc_adjust_delta(hrtime_t tdelta);
    100 extern void tsc_resume(void);
    101 extern int tsc_resume_in_cyclic;
    102 
    103 /*
    104  * Set this variable to 1, to have device drivers resume in an
    105  * uniprocessor environment. This is to allow drivers that assume
    106  * that they resume on a UP machine to continue to work. Should be
    107  * deprecated once the broken drivers are fixed
    108  */
    109 int cpr_resume_uniproc = 0;
    110 
    111 /*
    112  * save or restore abort_enable;  this prevents a drop
    113  * to kadb or prom during cpr_resume_devices() when
    114  * there is no kbd present;  see abort_sequence_enter()
    115  */
    116 static void
    117 cpr_sae(int stash)
    118 {
    119 	static int saved_ae = -1;
    120 
    121 	if (stash) {
    122 		saved_ae = abort_enable;
    123 		abort_enable = 0;
    124 	} else if (saved_ae != -1) {
    125 		abort_enable = saved_ae;
    126 		saved_ae = -1;
    127 	}
    128 }
    129 
    130 
    131 /*
    132  * The main switching point for cpr, this routine starts the ckpt
    133  * and state file saving routines; on resume the control is
    134  * returned back to here and it then calls the resume routine.
    135  */
    136 int
    137 cpr_main(int sleeptype)
    138 {
    139 	int rc, rc2;
    140 	label_t saveq;
    141 	klwp_t *tlwp = ttolwp(curthread);
    142 
    143 	if (sleeptype == CPR_TODISK) {
    144 		if ((rc = cpr_default_setup(1)) != 0)
    145 			return (rc);
    146 		ASSERT(tlwp);
    147 		saveq = tlwp->lwp_qsav;
    148 	}
    149 
    150 	if (sleeptype == CPR_TORAM) {
    151 		rc = cpr_suspend(sleeptype);
    152 		PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
    153 		if (rc == 0) {
    154 			int i_cpr_power_down(int sleeptype);
    155 
    156 			/*
    157 			 * From this point on, we should be at a high
    158 			 * spl, interrupts disabled, and all but one
    159 			 * cpu's paused (effectively UP/single threaded).
    160 			 * So this is were we want to put ASSERTS()
    161 			 * to let us know otherwise.
    162 			 */
    163 			ASSERT(cpus_paused());
    164 
    165 			/*
    166 			 * Now do the work of actually putting this
    167 			 * machine to sleep!
    168 			 */
    169 			rc = i_cpr_power_down(sleeptype);
    170 			if (rc == 0) {
    171 				PMD(PMD_SX, ("back from succssful suspend\n"))
    172 			}
    173 			/*
    174 			 * We do care about the return value from cpr_resume
    175 			 * at this point, as it will tell us if one of the
    176 			 * resume functions failed (cpr_resume_devices())
    177 			 * However, for this to return and _not_ panic, means
    178 			 * that we must be in one of the test functions.  So
    179 			 * check for that and return an appropriate message.
    180 			 */
    181 			rc2 = cpr_resume(sleeptype);
    182 			if (rc2 != 0) {
    183 				ASSERT(cpr_test_point > 0);
    184 				cmn_err(CE_NOTE,
    185 				    "cpr_resume returned non-zero: %d\n", rc2);
    186 				PMD(PMD_SX, ("cpr_resume rets %x\n", rc2))
    187 			}
    188 			ASSERT(!cpus_paused());
    189 		} else {
    190 			PMD(PMD_SX, ("failed suspend, resuming\n"))
    191 			rc = cpr_resume(sleeptype);
    192 		}
    193 		return (rc);
    194 	}
    195 	/*
    196 	 * Remember where we are for resume after reboot
    197 	 */
    198 	if (!setjmp(&tlwp->lwp_qsav)) {
    199 		/*
    200 		 * try to checkpoint the system, if failed return back
    201 		 * to userland, otherwise power off.
    202 		 */
    203 		rc = cpr_suspend(sleeptype);
    204 		if (rc || cpr_reusable_mode) {
    205 			/*
    206 			 * We don't really want to go down, or
    207 			 * something went wrong in suspend, do what we can
    208 			 * to put the system back to an operable state then
    209 			 * return back to userland.
    210 			 */
    211 			PMD(PMD_SX, ("failed suspend, resuming\n"))
    212 			(void) cpr_resume(sleeptype);
    213 			PMD(PMD_SX, ("back from failed suspend resume\n"))
    214 		}
    215 	} else {
    216 		/*
    217 		 * This is the resumed side of longjmp, restore the previous
    218 		 * longjmp pointer if there is one so this will be transparent
    219 		 * to the world.
    220 		 * This path is only for CPR_TODISK, where we reboot
    221 		 */
    222 		ASSERT(sleeptype == CPR_TODISK);
    223 		tlwp->lwp_qsav = saveq;
    224 		CPR->c_flags &= ~C_SUSPENDING;
    225 		CPR->c_flags |= C_RESUMING;
    226 
    227 		/*
    228 		 * resume the system back to the original state
    229 		 */
    230 		rc = cpr_resume(sleeptype);
    231 		PMD(PMD_SX, ("back from successful suspend; resume rets %x\n",
    232 		    rc))
    233 	}
    234 
    235 	(void) cpr_default_setup(0);
    236 
    237 	return (rc);
    238 }
    239 
    240 
    241 #if defined(__sparc)
    242 
    243 /*
    244  * check/disable or re-enable UFS logging
    245  */
    246 static void
    247 cpr_log_status(int enable, int *svstat, vnode_t *vp)
    248 {
    249 	int cmd, status, error;
    250 	char *str, *able;
    251 	fiolog_t fl;
    252 	refstr_t *mntpt;
    253 
    254 	str = "cpr_log_status";
    255 	bzero(&fl, sizeof (fl));
    256 	fl.error = FIOLOG_ENONE;
    257 
    258 	/*
    259 	 * when disabling, first get and save logging status (0 or 1)
    260 	 */
    261 	if (enable == 0) {
    262 		if (error = VOP_IOCTL(vp, _FIOISLOG,
    263 		    (uintptr_t)&status, FKIOCTL, CRED(), NULL, NULL)) {
    264 			mntpt = vfs_getmntpoint(vp->v_vfsp);
    265 			prom_printf("%s: \"%s\", cant get logging "
    266 			    "status, error %d\n", str, refstr_value(mntpt),
    267 			    error);
    268 			refstr_rele(mntpt);
    269 			return;
    270 		}
    271 		*svstat = status;
    272 		if (cpr_debug & CPR_DEBUG5) {
    273 			mntpt = vfs_getmntpoint(vp->v_vfsp);
    274 			errp("%s: \"%s\", logging status = %d\n",
    275 			    str, refstr_value(mntpt), status);
    276 			refstr_rele(mntpt);
    277 		};
    278 
    279 		able = "disable";
    280 		cmd = _FIOLOGDISABLE;
    281 	} else {
    282 		able = "enable";
    283 		cmd = _FIOLOGENABLE;
    284 	}
    285 
    286 	/*
    287 	 * disable or re-enable logging when the saved status is 1
    288 	 */
    289 	if (*svstat == 1) {
    290 		error = VOP_IOCTL(vp, cmd, (uintptr_t)&fl,
    291 		    FKIOCTL, CRED(), NULL, NULL);
    292 		if (error) {
    293 			mntpt = vfs_getmntpoint(vp->v_vfsp);
    294 			prom_printf("%s: \"%s\", cant %s logging, error %d\n",
    295 			    str, refstr_value(mntpt), able, error);
    296 			refstr_rele(mntpt);
    297 		} else {
    298 			if (cpr_debug & CPR_DEBUG5) {
    299 				mntpt = vfs_getmntpoint(vp->v_vfsp);
    300 				errp("%s: \"%s\", logging is now %sd\n",
    301 				    str, refstr_value(mntpt), able);
    302 				refstr_rele(mntpt);
    303 			};
    304 		}
    305 	}
    306 
    307 	/*
    308 	 * when enabling logging, reset the saved status
    309 	 * to unknown for next time
    310 	 */
    311 	if (enable)
    312 		*svstat = -1;
    313 }
    314 
    315 /*
    316  * enable/disable UFS logging on filesystems containing cpr_default_path
    317  * and cpr statefile.  since the statefile can be on any fs, that fs
    318  * needs to be handled separately.  this routine and cprboot expect that
    319  * CPR_CONFIG and CPR_DEFAULT both reside on the same fs, rootfs.  cprboot
    320  * is loaded from the device with rootfs and uses the same device to open
    321  * both CPR_CONFIG and CPR_DEFAULT (see common/support.c).  moving either
    322  * file outside of rootfs would cause errors during cprboot, plus cpr and
    323  * fsck problems with the new fs if logging were enabled.
    324  */
    325 
    326 static int
    327 cpr_ufs_logging(int enable)
    328 {
    329 	static int def_status = -1, sf_status = -1;
    330 	struct vfs *vfsp;
    331 	char *fname;
    332 	vnode_t *vp;
    333 	int error;
    334 
    335 	if (cpr_reusable_mode)
    336 		return (0);
    337 
    338 	if (error = cpr_open_deffile(FREAD, &vp))
    339 		return (error);
    340 	vfsp = vp->v_vfsp;
    341 	if (!cpr_is_ufs(vfsp)) {
    342 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
    343 		VN_RELE(vp);
    344 		return (0);
    345 	}
    346 
    347 	cpr_log_status(enable, &def_status, vp);
    348 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
    349 	VN_RELE(vp);
    350 
    351 	fname = cpr_build_statefile_path();
    352 	if (fname == NULL)
    353 		return (ENOENT);
    354 	if (error = vn_open(fname, UIO_SYSSPACE, FCREAT|FWRITE,
    355 	    0600, &vp, CRCREAT, 0)) {
    356 		prom_printf("cpr_ufs_logging: cant open/create \"%s\", "
    357 		    "error %d\n", fname, error);
    358 		return (error);
    359 	}
    360 
    361 	/*
    362 	 * check logging status for the statefile if it resides
    363 	 * on a different fs and the type is a regular file
    364 	 */
    365 	if (vp->v_vfsp != vfsp && vp->v_type == VREG)
    366 		cpr_log_status(enable, &sf_status, vp);
    367 	(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
    368 	VN_RELE(vp);
    369 
    370 	return (0);
    371 }
    372 #endif
    373 
    374 
    375 /*
    376  * Check if klmmod is loaded and call a lock manager service; if klmmod
    377  * is not loaded, the services aren't needed and a call would trigger a
    378  * modload, which would block since another thread would never run.
    379  */
    380 static void
    381 cpr_lock_mgr(void (*service)(void))
    382 {
    383 	if (mod_find_by_filename(NULL, "misc/klmmod") != NULL)
    384 		(*service)();
    385 }
    386 
    387 int
    388 cpr_suspend_cpus(void)
    389 {
    390 	int	ret = 0;
    391 	extern void *i_cpr_save_context(void *arg);
    392 
    393 	mutex_enter(&cpu_lock);
    394 
    395 	/*
    396 	 * the machine could not have booted without a bootcpu
    397 	 */
    398 	ASSERT(i_cpr_bootcpu() != NULL);
    399 
    400 	/*
    401 	 * bring all the offline cpus online
    402 	 */
    403 	if ((ret = cpr_all_online())) {
    404 		mutex_exit(&cpu_lock);
    405 		return (ret);
    406 	}
    407 
    408 	/*
    409 	 * Set the affinity to be the boot processor
    410 	 * This is cleared in either cpr_resume_cpus() or cpr_unpause_cpus()
    411 	 */
    412 	affinity_set(i_cpr_bootcpuid());
    413 
    414 	ASSERT(CPU->cpu_id == 0);
    415 
    416 	PMD(PMD_SX, ("curthread running on bootcpu\n"))
    417 
    418 	/*
    419 	 * pause all other running CPUs and save the CPU state at the sametime
    420 	 */
    421 	cpu_pause_func = i_cpr_save_context;
    422 	pause_cpus(NULL);
    423 
    424 	mutex_exit(&cpu_lock);
    425 
    426 	return (0);
    427 }
    428 
    429 /*
    430  * Take the system down to a checkpointable state and write
    431  * the state file, the following are sequentially executed:
    432  *
    433  *    - Request all user threads to stop themselves
    434  *    - push out and invalidate user pages
    435  *    - bring statefile inode incore to prevent a miss later
    436  *    - request all daemons to stop
    437  *    - check and make sure all threads are stopped
    438  *    - sync the file system
    439  *    - suspend all devices
    440  *    - block intrpts
    441  *    - dump system state and memory to state file
    442  *    - SPARC code will not be called with CPR_TORAM, caller filters
    443  */
    444 static int
    445 cpr_suspend(int sleeptype)
    446 {
    447 #if defined(__sparc)
    448 	int sf_realloc, nverr;
    449 #endif
    450 	int	rc = 0;
    451 	int	skt_rc = 0;
    452 
    453 	PMD(PMD_SX, ("cpr_suspend %x\n", sleeptype))
    454 	cpr_set_substate(C_ST_SUSPEND_BEGIN);
    455 
    456 	cpr_suspend_init(sleeptype);
    457 
    458 	cpr_save_time();
    459 
    460 	cpr_tod_get(&wholecycle_tv);
    461 	CPR_STAT_EVENT_START("Suspend Total");
    462 
    463 	i_cpr_alloc_cpus();
    464 
    465 #if defined(__sparc)
    466 	ASSERT(sleeptype == CPR_TODISK);
    467 	if (!cpr_reusable_mode) {
    468 		/*
    469 		 * We need to validate default file before fs
    470 		 * functionality is disabled.
    471 		 */
    472 		if (rc = cpr_validate_definfo(0))
    473 			return (rc);
    474 	}
    475 	i_cpr_save_machdep_info();
    476 #endif
    477 
    478 	PMD(PMD_SX, ("cpr_suspend: stop scans\n"))
    479 	/* Stop PM scans ASAP */
    480 	(void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_CHKPT);
    481 
    482 	pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_SUSPEND,
    483 	    NULL, NULL, PM_DEP_WAIT, NULL, 0);
    484 
    485 #if defined(__sparc)
    486 	ASSERT(sleeptype == CPR_TODISK);
    487 	cpr_set_substate(C_ST_MP_OFFLINE);
    488 	if (rc = cpr_mp_offline())
    489 		return (rc);
    490 #endif
    491 	/*
    492 	 * Ask Xorg to suspend the frame buffer, and wait for it to happen
    493 	 */
    494 	mutex_enter(&srn_clone_lock);
    495 	if (srn_signal) {
    496 		PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
    497 		    "SRN_SUSPEND_REQ)\n"))
    498 		srn_inuse = 1;	/* because *(srn_signal) cv_waits */
    499 		(*srn_signal)(SRN_TYPE_APM, SRN_SUSPEND_REQ);
    500 		srn_inuse = 0;
    501 	} else {
    502 		PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
    503 	}
    504 	mutex_exit(&srn_clone_lock);
    505 
    506 	/*
    507 	 * Ask the user threads to stop by themselves, but
    508 	 * if they don't or can't after 3 retries, we give up on CPR.
    509 	 * The 3 retry is not a random number because 2 is possible if
    510 	 * a thread has been forked before the parent thread is stopped.
    511 	 */
    512 	CPR_DEBUG(CPR_DEBUG1, "\nstopping user threads...");
    513 	CPR_STAT_EVENT_START("  stop users");
    514 	cpr_set_substate(C_ST_STOP_USER_THREADS);
    515 	PMD(PMD_SX, ("cpr_suspend: stop user threads\n"))
    516 	if (rc = cpr_stop_user_threads())
    517 		return (rc);
    518 	CPR_STAT_EVENT_END("  stop users");
    519 	CPR_DEBUG(CPR_DEBUG1, "done\n");
    520 
    521 	PMD(PMD_SX, ("cpr_suspend: save direct levels\n"))
    522 	pm_save_direct_levels();
    523 
    524 	/*
    525 	 * User threads are stopped.  We will start communicating with the
    526 	 * user via prom_printf (some debug output may have already happened)
    527 	 * so let anybody who cares know about this (bug 4096122)
    528 	 */
    529 	(void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_CHKPT);
    530 
    531 	PMD(PMD_SX, ("cpr_suspend: send notice\n"))
    532 #ifndef DEBUG
    533 	cpr_send_notice();
    534 	if (cpr_debug)
    535 		prom_printf("\n");
    536 #endif
    537 
    538 	PMD(PMD_SX, ("cpr_suspend: POST USER callback\n"))
    539 	(void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_CHKPT);
    540 
    541 	/*
    542 	 * Reattach any drivers which originally exported the
    543 	 * no-involuntary-power-cycles property.  We need to do this before
    544 	 * stopping kernel threads because modload is implemented using
    545 	 * a kernel thread.
    546 	 */
    547 	cpr_set_substate(C_ST_PM_REATTACH_NOINVOL);
    548 	PMD(PMD_SX, ("cpr_suspend: reattach noinvol\n"))
    549 	if (!pm_reattach_noinvol())
    550 		return (ENXIO);
    551 
    552 #if defined(__sparc)
    553 	ASSERT(sleeptype == CPR_TODISK);
    554 	/*
    555 	 * if ufs logging is enabled, we need to disable before
    556 	 * stopping kernel threads so that ufs delete and roll
    557 	 * threads can do the work.
    558 	 */
    559 	cpr_set_substate(C_ST_DISABLE_UFS_LOGGING);
    560 	if (rc = cpr_ufs_logging(0))
    561 		return (rc);
    562 
    563 	/*
    564 	 * Use sync_all to swap out all user pages and find out how much
    565 	 * extra space needed for user pages that don't have back store
    566 	 * space left.
    567 	 */
    568 	CPR_STAT_EVENT_START("  swapout upages");
    569 	vfs_sync(SYNC_ALL);
    570 	CPR_STAT_EVENT_END("  swapout upages");
    571 
    572 	cpr_set_bitmap_size();
    573 
    574 alloc_statefile:
    575 	/*
    576 	 * If our last state was C_ST_DUMP_NOSPC, we're trying to
    577 	 * realloc the statefile, otherwise this is the first attempt.
    578 	 */
    579 	sf_realloc = (CPR->c_substate == C_ST_DUMP_NOSPC) ? 1 : 0;
    580 
    581 	CPR_STAT_EVENT_START("  alloc statefile");
    582 	cpr_set_substate(C_ST_STATEF_ALLOC);
    583 	if (rc = cpr_alloc_statefile(sf_realloc)) {
    584 		if (sf_realloc)
    585 			errp("realloc failed\n");
    586 		return (rc);
    587 	}
    588 	CPR_STAT_EVENT_END("  alloc statefile");
    589 
    590 	/*
    591 	 * Sync the filesystem to preserve its integrity.
    592 	 *
    593 	 * This sync is also used to flush out all B_DELWRI buffers
    594 	 * (fs cache) which are mapped and neither dirty nor referenced
    595 	 * before cpr_invalidate_pages destroys them.
    596 	 * fsflush does similar thing.
    597 	 */
    598 	sync();
    599 
    600 	/*
    601 	 * destroy all clean file mapped kernel pages
    602 	 */
    603 	CPR_STAT_EVENT_START("  clean pages");
    604 	CPR_DEBUG(CPR_DEBUG1, ("cleaning up mapped pages..."));
    605 	(void) callb_execute_class(CB_CL_CPR_VM, CB_CODE_CPR_CHKPT);
    606 	CPR_DEBUG(CPR_DEBUG1, ("done\n"));
    607 	CPR_STAT_EVENT_END("  clean pages");
    608 #endif
    609 
    610 
    611 	/*
    612 	 * Hooks needed by lock manager prior to suspending.
    613 	 * Refer to code for more comments.
    614 	 */
    615 	PMD(PMD_SX, ("cpr_suspend: lock mgr\n"))
    616 	cpr_lock_mgr(lm_cprsuspend);
    617 
    618 	/*
    619 	 * Now suspend all the devices
    620 	 */
    621 	CPR_STAT_EVENT_START("  stop drivers");
    622 	CPR_DEBUG(CPR_DEBUG1, "suspending drivers...");
    623 	cpr_set_substate(C_ST_SUSPEND_DEVICES);
    624 	pm_powering_down = 1;
    625 	PMD(PMD_SX, ("cpr_suspend: suspending devices\n"))
    626 	rc = cpr_suspend_devices(ddi_root_node());
    627 	pm_powering_down = 0;
    628 	if (rc)
    629 		return (rc);
    630 	CPR_DEBUG(CPR_DEBUG1, "done\n");
    631 	CPR_STAT_EVENT_END("  stop drivers");
    632 
    633 	/*
    634 	 * Stop all daemon activities
    635 	 */
    636 	cpr_set_substate(C_ST_STOP_KERNEL_THREADS);
    637 	PMD(PMD_SX, ("cpr_suspend: stopping kernel threads\n"))
    638 	if (skt_rc = cpr_stop_kernel_threads())
    639 		return (skt_rc);
    640 
    641 	PMD(PMD_SX, ("cpr_suspend: POST KERNEL callback\n"))
    642 	(void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_CHKPT);
    643 
    644 	PMD(PMD_SX, ("cpr_suspend: reattach noinvol fini\n"))
    645 	pm_reattach_noinvol_fini();
    646 
    647 	cpr_sae(1);
    648 
    649 	PMD(PMD_SX, ("cpr_suspend: CPR CALLOUT callback\n"))
    650 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT);
    651 
    652 	if (sleeptype == CPR_TODISK) {
    653 		/*
    654 		 * It's safer to do tod_get before we disable all intr.
    655 		 */
    656 		CPR_STAT_EVENT_START("  write statefile");
    657 	}
    658 
    659 	/*
    660 	 * it's time to ignore the outside world, stop the real time
    661 	 * clock and disable any further intrpt activity.
    662 	 */
    663 	PMD(PMD_SX, ("cpr_suspend: handle xc\n"))
    664 	i_cpr_handle_xc(1);	/* turn it on to disable xc assertion */
    665 
    666 	mutex_enter(&cpu_lock);
    667 	PMD(PMD_SX, ("cpr_suspend: cyclic suspend\n"))
    668 	cyclic_suspend();
    669 	mutex_exit(&cpu_lock);
    670 
    671 	/*
    672 	 * Due to the different methods of resuming the system between
    673 	 * CPR_TODISK (boot cprboot on SPARC, which reloads kernel image)
    674 	 * and CPR_TORAM (restart via reset into existing kernel image)
    675 	 * cpus are not suspended and restored in the SPARC case, since it
    676 	 * is necessary to restart the cpus and pause them before restoring
    677 	 * the OBP image
    678 	 */
    679 
    680 #if defined(__x86)
    681 
    682 	/* pause aux cpus */
    683 	PMD(PMD_SX, ("pause aux cpus\n"))
    684 
    685 	cpr_set_substate(C_ST_MP_PAUSED);
    686 
    687 	if ((rc = cpr_suspend_cpus()) != 0)
    688 		return (rc);
    689 #endif
    690 
    691 	PMD(PMD_SX, ("cpr_suspend: stop intr\n"))
    692 	i_cpr_stop_intr();
    693 	CPR_DEBUG(CPR_DEBUG1, "interrupt is stopped\n");
    694 
    695 	/*
    696 	 * Since we will now disable the mechanism that causes prom_printfs
    697 	 * to power up (if needed) the console fb/monitor, we assert that
    698 	 * it must be up now.
    699 	 */
    700 	ASSERT(pm_cfb_is_up());
    701 	PMD(PMD_SX, ("cpr_suspend: prom suspend prepost\n"))
    702 	prom_suspend_prepost();
    703 
    704 #if defined(__sparc)
    705 	/*
    706 	 * getting ready to write ourself out, flush the register
    707 	 * windows to make sure that our stack is good when we
    708 	 * come back on the resume side.
    709 	 */
    710 	flush_windows();
    711 #endif
    712 
    713 	/*
    714 	 * For S3, we're done
    715 	 */
    716 	if (sleeptype == CPR_TORAM) {
    717 		PMD(PMD_SX, ("cpr_suspend rets %x\n", rc))
    718 		cpr_set_substate(C_ST_NODUMP);
    719 		return (rc);
    720 	}
    721 #if defined(__sparc)
    722 	/*
    723 	 * FATAL: NO MORE MEMORY ALLOCATION ALLOWED AFTER THIS POINT!!!
    724 	 *
    725 	 * The system is quiesced at this point, we are ready to either dump
    726 	 * to the state file for a extended sleep or a simple shutdown for
    727 	 * systems with non-volatile memory.
    728 	 */
    729 
    730 	/*
    731 	 * special handling for reusable:
    732 	 */
    733 	if (cpr_reusable_mode) {
    734 		cpr_set_substate(C_ST_SETPROPS_1);
    735 		if (nverr = cpr_set_properties(1))
    736 			return (nverr);
    737 	}
    738 
    739 	cpr_set_substate(C_ST_DUMP);
    740 	rc = cpr_dump(C_VP);
    741 
    742 	/*
    743 	 * if any error occurred during dump, more
    744 	 * special handling for reusable:
    745 	 */
    746 	if (rc && cpr_reusable_mode) {
    747 		cpr_set_substate(C_ST_SETPROPS_0);
    748 		if (nverr = cpr_set_properties(0))
    749 			return (nverr);
    750 	}
    751 
    752 	if (rc == ENOSPC) {
    753 		cpr_set_substate(C_ST_DUMP_NOSPC);
    754 		(void) cpr_resume(sleeptype);
    755 		goto alloc_statefile;
    756 	} else if (rc == 0) {
    757 		if (cpr_reusable_mode) {
    758 			cpr_set_substate(C_ST_REUSABLE);
    759 			longjmp(&ttolwp(curthread)->lwp_qsav);
    760 		} else
    761 			rc = cpr_set_properties(1);
    762 	}
    763 #endif
    764 	PMD(PMD_SX, ("cpr_suspend: return %d\n", rc))
    765 	return (rc);
    766 }
    767 
    768 void
    769 cpr_resume_cpus(void)
    770 {
    771 	/*
    772 	 * this is a cut down version of start_other_cpus()
    773 	 * just do the initialization to wake the other cpus
    774 	 */
    775 
    776 #if defined(__x86)
    777 	/*
    778 	 * Initialize our syscall handlers
    779 	 */
    780 	init_cpu_syscall(CPU);
    781 
    782 #endif
    783 
    784 	i_cpr_pre_resume_cpus();
    785 
    786 	/*
    787 	 * Restart the paused cpus
    788 	 */
    789 	mutex_enter(&cpu_lock);
    790 	start_cpus();
    791 	mutex_exit(&cpu_lock);
    792 
    793 	i_cpr_post_resume_cpus();
    794 
    795 	mutex_enter(&cpu_lock);
    796 	/*
    797 	 * Restore this cpu to use the regular cpu_pause(), so that
    798 	 * online and offline will work correctly
    799 	 */
    800 	cpu_pause_func = NULL;
    801 
    802 	/*
    803 	 * clear the affinity set in cpr_suspend_cpus()
    804 	 */
    805 	affinity_clear();
    806 
    807 	/*
    808 	 * offline all the cpus that were brought online during suspend
    809 	 */
    810 	cpr_restore_offline();
    811 
    812 	mutex_exit(&cpu_lock);
    813 }
    814 
    815 void
    816 cpr_unpause_cpus(void)
    817 {
    818 	/*
    819 	 * Now restore the system back to what it was before we suspended
    820 	 */
    821 
    822 	PMD(PMD_SX, ("cpr_unpause_cpus: restoring system\n"))
    823 
    824 	mutex_enter(&cpu_lock);
    825 
    826 	/*
    827 	 * Restore this cpu to use the regular cpu_pause(), so that
    828 	 * online and offline will work correctly
    829 	 */
    830 	cpu_pause_func = NULL;
    831 
    832 	/*
    833 	 * Restart the paused cpus
    834 	 */
    835 	start_cpus();
    836 
    837 	/*
    838 	 * clear the affinity set in cpr_suspend_cpus()
    839 	 */
    840 	affinity_clear();
    841 
    842 	/*
    843 	 * offline all the cpus that were brought online during suspend
    844 	 */
    845 	cpr_restore_offline();
    846 
    847 	mutex_exit(&cpu_lock);
    848 }
    849 
    850 /*
    851  * Bring the system back up from a checkpoint, at this point
    852  * the VM has been minimally restored by boot, the following
    853  * are executed sequentially:
    854  *
    855  *    - machdep setup and enable interrupts (mp startup if it's mp)
    856  *    - resume all devices
    857  *    - restart daemons
    858  *    - put all threads back on run queue
    859  */
    860 static int
    861 cpr_resume(int sleeptype)
    862 {
    863 	cpr_time_t pwron_tv, *ctp;
    864 	char *str;
    865 	int rc = 0;
    866 
    867 	/*
    868 	 * The following switch is used to resume the system
    869 	 * that was suspended to a different level.
    870 	 */
    871 	CPR_DEBUG(CPR_DEBUG1, "\nEntering cpr_resume...\n");
    872 	PMD(PMD_SX, ("cpr_resume %x\n", sleeptype))
    873 
    874 	/*
    875 	 * Note:
    876 	 *
    877 	 * The rollback labels rb_xyz do not represent the cpr resume
    878 	 * state when event 'xyz' has happened. Instead they represent
    879 	 * the state during cpr suspend when event 'xyz' was being
    880 	 * entered (and where cpr suspend failed). The actual call that
    881 	 * failed may also need to be partially rolled back, since they
    882 	 * aren't atomic in most cases.  In other words, rb_xyz means
    883 	 * "roll back all cpr suspend events that happened before 'xyz',
    884 	 * and the one that caused the failure, if necessary."
    885 	 */
    886 	switch (CPR->c_substate) {
    887 #if defined(__sparc)
    888 	case C_ST_DUMP:
    889 		/*
    890 		 * This is most likely a full-fledged cpr_resume after
    891 		 * a complete and successful cpr suspend. Just roll back
    892 		 * everything.
    893 		 */
    894 		ASSERT(sleeptype == CPR_TODISK);
    895 		break;
    896 
    897 	case C_ST_REUSABLE:
    898 	case C_ST_DUMP_NOSPC:
    899 	case C_ST_SETPROPS_0:
    900 	case C_ST_SETPROPS_1:
    901 		/*
    902 		 * C_ST_REUSABLE and C_ST_DUMP_NOSPC are the only two
    903 		 * special switch cases here. The other two do not have
    904 		 * any state change during cpr_suspend() that needs to
    905 		 * be rolled back. But these are exit points from
    906 		 * cpr_suspend, so theoretically (or in the future), it
    907 		 * is possible that a need for roll back of a state
    908 		 * change arises between these exit points.
    909 		 */
    910 		ASSERT(sleeptype == CPR_TODISK);
    911 		goto rb_dump;
    912 #endif
    913 
    914 	case C_ST_NODUMP:
    915 		PMD(PMD_SX, ("cpr_resume: NODUMP\n"))
    916 		goto rb_nodump;
    917 
    918 	case C_ST_STOP_KERNEL_THREADS:
    919 		PMD(PMD_SX, ("cpr_resume: STOP_KERNEL_THREADS\n"))
    920 		goto rb_stop_kernel_threads;
    921 
    922 	case C_ST_SUSPEND_DEVICES:
    923 		PMD(PMD_SX, ("cpr_resume: SUSPEND_DEVICES\n"))
    924 		goto rb_suspend_devices;
    925 
    926 #if defined(__sparc)
    927 	case C_ST_STATEF_ALLOC:
    928 		ASSERT(sleeptype == CPR_TODISK);
    929 		goto rb_statef_alloc;
    930 
    931 	case C_ST_DISABLE_UFS_LOGGING:
    932 		ASSERT(sleeptype == CPR_TODISK);
    933 		goto rb_disable_ufs_logging;
    934 #endif
    935 
    936 	case C_ST_PM_REATTACH_NOINVOL:
    937 		PMD(PMD_SX, ("cpr_resume: REATTACH_NOINVOL\n"))
    938 		goto rb_pm_reattach_noinvol;
    939 
    940 	case C_ST_STOP_USER_THREADS:
    941 		PMD(PMD_SX, ("cpr_resume: STOP_USER_THREADS\n"))
    942 		goto rb_stop_user_threads;
    943 
    944 #if defined(__sparc)
    945 	case C_ST_MP_OFFLINE:
    946 		PMD(PMD_SX, ("cpr_resume: MP_OFFLINE\n"))
    947 		goto rb_mp_offline;
    948 #endif
    949 
    950 #if defined(__x86)
    951 	case C_ST_MP_PAUSED:
    952 		PMD(PMD_SX, ("cpr_resume: MP_PAUSED\n"))
    953 		goto rb_mp_paused;
    954 #endif
    955 
    956 
    957 	default:
    958 		PMD(PMD_SX, ("cpr_resume: others\n"))
    959 		goto rb_others;
    960 	}
    961 
    962 rb_all:
    963 	/*
    964 	 * perform platform-dependent initialization
    965 	 */
    966 	if (cpr_suspend_succeeded)
    967 		i_cpr_machdep_setup();
    968 
    969 	/*
    970 	 * system did not really go down if we jump here
    971 	 */
    972 rb_dump:
    973 	/*
    974 	 * IMPORTANT:  SENSITIVE RESUME SEQUENCE
    975 	 *
    976 	 * DO NOT ADD ANY INITIALIZATION STEP BEFORE THIS POINT!!
    977 	 */
    978 rb_nodump:
    979 	/*
    980 	 * If we did suspend to RAM, we didn't generate a dump
    981 	 */
    982 	PMD(PMD_SX, ("cpr_resume: CPR DMA callback\n"))
    983 	(void) callb_execute_class(CB_CL_CPR_DMA, CB_CODE_CPR_RESUME);
    984 	if (cpr_suspend_succeeded) {
    985 		PMD(PMD_SX, ("cpr_resume: CPR RPC callback\n"))
    986 		(void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME);
    987 	}
    988 
    989 	prom_resume_prepost();
    990 #if !defined(__sparc)
    991 	/*
    992 	 * Need to sync the software clock with the hardware clock.
    993 	 * On Sparc, this occurs in the sparc-specific cbe.  However
    994 	 * on x86 this needs to be handled _before_ we bring other cpu's
    995 	 * back online.  So we call a resume function in timestamp.c
    996 	 */
    997 	if (tsc_resume_in_cyclic == 0)
    998 		tsc_resume();
    999 
   1000 #endif
   1001 
   1002 #if defined(__sparc)
   1003 	if (cpr_suspend_succeeded && (boothowto & RB_DEBUG))
   1004 		kdi_dvec_cpr_restart();
   1005 #endif
   1006 
   1007 
   1008 #if defined(__x86)
   1009 rb_mp_paused:
   1010 	PT(PT_RMPO);
   1011 	PMD(PMD_SX, ("resume aux cpus\n"))
   1012 
   1013 	if (cpr_suspend_succeeded) {
   1014 		cpr_resume_cpus();
   1015 	} else {
   1016 		cpr_unpause_cpus();
   1017 	}
   1018 #endif
   1019 
   1020 	/*
   1021 	 * let the tmp callout catch up.
   1022 	 */
   1023 	PMD(PMD_SX, ("cpr_resume: CPR CALLOUT callback\n"))
   1024 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_RESUME);
   1025 
   1026 	i_cpr_enable_intr();
   1027 
   1028 	mutex_enter(&cpu_lock);
   1029 	PMD(PMD_SX, ("cpr_resume: cyclic resume\n"))
   1030 	cyclic_resume();
   1031 	mutex_exit(&cpu_lock);
   1032 
   1033 	PMD(PMD_SX, ("cpr_resume: handle xc\n"))
   1034 	i_cpr_handle_xc(0);	/* turn it off to allow xc assertion */
   1035 
   1036 	PMD(PMD_SX, ("cpr_resume: CPR POST KERNEL callback\n"))
   1037 	(void) callb_execute_class(CB_CL_CPR_POST_KERNEL, CB_CODE_CPR_RESUME);
   1038 
   1039 	/*
   1040 	 * statistics gathering
   1041 	 */
   1042 	if (cpr_suspend_succeeded) {
   1043 		/*
   1044 		 * Prevent false alarm in tod_validate() due to tod
   1045 		 * value change between suspend and resume
   1046 		 */
   1047 		cpr_tod_fault_reset();
   1048 
   1049 		cpr_convert_promtime(&pwron_tv);
   1050 
   1051 		ctp = &cpr_term.tm_shutdown;
   1052 		if (sleeptype == CPR_TODISK)
   1053 			CPR_STAT_EVENT_END_TMZ("  write statefile", ctp);
   1054 		CPR_STAT_EVENT_END_TMZ("Suspend Total", ctp);
   1055 
   1056 		CPR_STAT_EVENT_START_TMZ("Resume Total", &pwron_tv);
   1057 
   1058 		str = "  prom time";
   1059 		CPR_STAT_EVENT_START_TMZ(str, &pwron_tv);
   1060 		ctp = &cpr_term.tm_cprboot_start;
   1061 		CPR_STAT_EVENT_END_TMZ(str, ctp);
   1062 
   1063 		str = "  read statefile";
   1064 		CPR_STAT_EVENT_START_TMZ(str, ctp);
   1065 		ctp = &cpr_term.tm_cprboot_end;
   1066 		CPR_STAT_EVENT_END_TMZ(str, ctp);
   1067 	}
   1068 
   1069 rb_stop_kernel_threads:
   1070 	/*
   1071 	 * Put all threads back to where they belong; get the kernel
   1072 	 * daemons straightened up too. Note that the callback table
   1073 	 * locked during cpr_stop_kernel_threads() is released only
   1074 	 * in cpr_start_kernel_threads(). Ensure modunloading is
   1075 	 * disabled before starting kernel threads, we don't want
   1076 	 * modunload thread to start changing device tree underneath.
   1077 	 */
   1078 	PMD(PMD_SX, ("cpr_resume: modunload disable\n"))
   1079 	modunload_disable();
   1080 	PMD(PMD_SX, ("cpr_resume: start kernel threads\n"))
   1081 	cpr_start_kernel_threads();
   1082 
   1083 rb_suspend_devices:
   1084 	CPR_DEBUG(CPR_DEBUG1, "resuming devices...");
   1085 	CPR_STAT_EVENT_START("  start drivers");
   1086 
   1087 	PMD(PMD_SX,
   1088 	    ("cpr_resume: rb_suspend_devices: cpr_resume_uniproc = %d\n",
   1089 	    cpr_resume_uniproc))
   1090 
   1091 #if defined(__x86)
   1092 	/*
   1093 	 * If cpr_resume_uniproc is set, then pause all the other cpus
   1094 	 * apart from the current cpu, so that broken drivers that think
   1095 	 * that they are on a uniprocessor machine will resume
   1096 	 */
   1097 	if (cpr_resume_uniproc) {
   1098 		mutex_enter(&cpu_lock);
   1099 		pause_cpus(NULL);
   1100 		mutex_exit(&cpu_lock);
   1101 	}
   1102 #endif
   1103 
   1104 	/*
   1105 	 * The policy here is to continue resume everything we can if we did
   1106 	 * not successfully finish suspend; and panic if we are coming back
   1107 	 * from a fully suspended system.
   1108 	 */
   1109 	PMD(PMD_SX, ("cpr_resume: resume devices\n"))
   1110 	rc = cpr_resume_devices(ddi_root_node(), 0);
   1111 
   1112 	cpr_sae(0);
   1113 
   1114 	str = "Failed to resume one or more devices.";
   1115 
   1116 	if (rc) {
   1117 		if (CPR->c_substate == C_ST_DUMP ||
   1118 		    (sleeptype == CPR_TORAM &&
   1119 		    CPR->c_substate == C_ST_NODUMP)) {
   1120 			if (cpr_test_point == FORCE_SUSPEND_TO_RAM) {
   1121 				PMD(PMD_SX, ("cpr_resume: resume device "
   1122 				    "warn\n"))
   1123 				cpr_err(CE_WARN, str);
   1124 			} else {
   1125 				PMD(PMD_SX, ("cpr_resume: resume device "
   1126 				    "panic\n"))
   1127 				cpr_err(CE_PANIC, str);
   1128 			}
   1129 		} else {
   1130 			PMD(PMD_SX, ("cpr_resume: resume device warn\n"))
   1131 			cpr_err(CE_WARN, str);
   1132 		}
   1133 	}
   1134 
   1135 	CPR_STAT_EVENT_END("  start drivers");
   1136 	CPR_DEBUG(CPR_DEBUG1, "done\n");
   1137 
   1138 #if defined(__x86)
   1139 	/*
   1140 	 * If cpr_resume_uniproc is set, then unpause all the processors
   1141 	 * that were paused before resuming the drivers
   1142 	 */
   1143 	if (cpr_resume_uniproc) {
   1144 		mutex_enter(&cpu_lock);
   1145 		start_cpus();
   1146 		mutex_exit(&cpu_lock);
   1147 	}
   1148 #endif
   1149 
   1150 	/*
   1151 	 * If we had disabled modunloading in this cpr resume cycle (i.e. we
   1152 	 * resumed from a state earlier than C_ST_SUSPEND_DEVICES), re-enable
   1153 	 * modunloading now.
   1154 	 */
   1155 	if (CPR->c_substate != C_ST_SUSPEND_DEVICES) {
   1156 		PMD(PMD_SX, ("cpr_resume: modload enable\n"))
   1157 		modunload_enable();
   1158 	}
   1159 
   1160 	/*
   1161 	 * Hooks needed by lock manager prior to resuming.
   1162 	 * Refer to code for more comments.
   1163 	 */
   1164 	PMD(PMD_SX, ("cpr_resume: lock mgr\n"))
   1165 	cpr_lock_mgr(lm_cprresume);
   1166 
   1167 #if defined(__sparc)
   1168 	/*
   1169 	 * This is a partial (half) resume during cpr suspend, we
   1170 	 * haven't yet given up on the suspend. On return from here,
   1171 	 * cpr_suspend() will try to reallocate and retry the suspend.
   1172 	 */
   1173 	if (CPR->c_substate == C_ST_DUMP_NOSPC) {
   1174 		return (0);
   1175 	}
   1176 
   1177 	if (sleeptype == CPR_TODISK) {
   1178 rb_statef_alloc:
   1179 		cpr_statef_close();
   1180 
   1181 rb_disable_ufs_logging:
   1182 		/*
   1183 		 * if ufs logging was disabled, re-enable
   1184 		 */
   1185 		(void) cpr_ufs_logging(1);
   1186 	}
   1187 #endif
   1188 
   1189 rb_pm_reattach_noinvol:
   1190 	/*
   1191 	 * When pm_reattach_noinvol() succeeds, modunload_thread will
   1192 	 * remain disabled until after cpr suspend passes the
   1193 	 * C_ST_STOP_KERNEL_THREADS state. If any failure happens before
   1194 	 * cpr suspend reaches this state, we'll need to enable modunload
   1195 	 * thread during rollback.
   1196 	 */
   1197 	if (CPR->c_substate == C_ST_DISABLE_UFS_LOGGING ||
   1198 	    CPR->c_substate == C_ST_STATEF_ALLOC ||
   1199 	    CPR->c_substate == C_ST_SUSPEND_DEVICES ||
   1200 	    CPR->c_substate == C_ST_STOP_KERNEL_THREADS) {
   1201 		PMD(PMD_SX, ("cpr_resume: reattach noinvol fini\n"))
   1202 		pm_reattach_noinvol_fini();
   1203 	}
   1204 
   1205 	PMD(PMD_SX, ("cpr_resume: CPR POST USER callback\n"))
   1206 	(void) callb_execute_class(CB_CL_CPR_POST_USER, CB_CODE_CPR_RESUME);
   1207 	PMD(PMD_SX, ("cpr_resume: CPR PROMPRINTF callback\n"))
   1208 	(void) callb_execute_class(CB_CL_CPR_PROMPRINTF, CB_CODE_CPR_RESUME);
   1209 
   1210 	PMD(PMD_SX, ("cpr_resume: restore direct levels\n"))
   1211 	pm_restore_direct_levels();
   1212 
   1213 rb_stop_user_threads:
   1214 	CPR_DEBUG(CPR_DEBUG1, "starting user threads...");
   1215 	PMD(PMD_SX, ("cpr_resume: starting user threads\n"))
   1216 	cpr_start_user_threads();
   1217 	CPR_DEBUG(CPR_DEBUG1, "done\n");
   1218 	/*
   1219 	 * Ask Xorg to resume the frame buffer, and wait for it to happen
   1220 	 */
   1221 	mutex_enter(&srn_clone_lock);
   1222 	if (srn_signal) {
   1223 		PMD(PMD_SX, ("cpr_suspend: (*srn_signal)(..., "
   1224 		    "SRN_NORMAL_RESUME)\n"))
   1225 		srn_inuse = 1;		/* because (*srn_signal) cv_waits */
   1226 		(*srn_signal)(SRN_TYPE_APM, SRN_NORMAL_RESUME);
   1227 		srn_inuse = 0;
   1228 	} else {
   1229 		PMD(PMD_SX, ("cpr_suspend: srn_signal NULL\n"))
   1230 	}
   1231 	mutex_exit(&srn_clone_lock);
   1232 
   1233 #if defined(__sparc)
   1234 rb_mp_offline:
   1235 	if (cpr_mp_online())
   1236 		cpr_err(CE_WARN, "Failed to online all the processors.");
   1237 #endif
   1238 
   1239 rb_others:
   1240 	PMD(PMD_SX, ("cpr_resume: dep thread\n"))
   1241 	pm_dispatch_to_dep_thread(PM_DEP_WK_CPR_RESUME, NULL, NULL,
   1242 	    PM_DEP_WAIT, NULL, 0);
   1243 
   1244 	PMD(PMD_SX, ("cpr_resume: CPR PM callback\n"))
   1245 	(void) callb_execute_class(CB_CL_CPR_PM, CB_CODE_CPR_RESUME);
   1246 
   1247 	if (cpr_suspend_succeeded) {
   1248 		cpr_stat_record_events();
   1249 	}
   1250 
   1251 #if defined(__sparc)
   1252 	if (sleeptype == CPR_TODISK && !cpr_reusable_mode)
   1253 		cpr_clear_definfo();
   1254 #endif
   1255 
   1256 	i_cpr_free_cpus();
   1257 	CPR_DEBUG(CPR_DEBUG1, "Sending SIGTHAW...");
   1258 	PMD(PMD_SX, ("cpr_resume: SIGTHAW\n"))
   1259 	cpr_signal_user(SIGTHAW);
   1260 	CPR_DEBUG(CPR_DEBUG1, "done\n");
   1261 
   1262 	CPR_STAT_EVENT_END("Resume Total");
   1263 
   1264 	CPR_STAT_EVENT_START_TMZ("WHOLE CYCLE", &wholecycle_tv);
   1265 	CPR_STAT_EVENT_END("WHOLE CYCLE");
   1266 
   1267 	if (cpr_debug & CPR_DEBUG1)
   1268 		cmn_err(CE_CONT, "\nThe system is back where you left!\n");
   1269 
   1270 	CPR_STAT_EVENT_START("POST CPR DELAY");
   1271 
   1272 #ifdef CPR_STAT
   1273 	ctp = &cpr_term.tm_shutdown;
   1274 	CPR_STAT_EVENT_START_TMZ("PWROFF TIME", ctp);
   1275 	CPR_STAT_EVENT_END_TMZ("PWROFF TIME", &pwron_tv);
   1276 
   1277 	CPR_STAT_EVENT_PRINT();
   1278 #endif /* CPR_STAT */
   1279 
   1280 	PMD(PMD_SX, ("cpr_resume returns %x\n", rc))
   1281 	return (rc);
   1282 }
   1283 
   1284 static void
   1285 cpr_suspend_init(int sleeptype)
   1286 {
   1287 	cpr_time_t *ctp;
   1288 
   1289 	cpr_stat_init();
   1290 
   1291 	/*
   1292 	 * If cpr_suspend() failed before cpr_dump() gets a chance
   1293 	 * to reinitialize the terminator of the statefile,
   1294 	 * the values of the old terminator will still linger around.
   1295 	 * Since the terminator contains information that we need to
   1296 	 * decide whether suspend succeeded or not, we need to
   1297 	 * reinitialize it as early as possible.
   1298 	 */
   1299 	cpr_term.real_statef_size = 0;
   1300 	ctp = &cpr_term.tm_shutdown;
   1301 	bzero(ctp, sizeof (*ctp));
   1302 	ctp = &cpr_term.tm_cprboot_start;
   1303 	bzero(ctp, sizeof (*ctp));
   1304 	ctp = &cpr_term.tm_cprboot_end;
   1305 	bzero(ctp, sizeof (*ctp));
   1306 
   1307 	if (sleeptype == CPR_TODISK) {
   1308 		/*
   1309 		 * Lookup the physical address of our thread structure.
   1310 		 * This should never be invalid and the entire thread structure
   1311 		 * is expected to reside within the same pfn.
   1312 		 */
   1313 		curthreadpfn = hat_getpfnum(kas.a_hat, (caddr_t)curthread);
   1314 		ASSERT(curthreadpfn != PFN_INVALID);
   1315 		ASSERT(curthreadpfn == hat_getpfnum(kas.a_hat,
   1316 		    (caddr_t)curthread + sizeof (kthread_t) - 1));
   1317 	}
   1318 
   1319 	cpr_suspend_succeeded = 0;
   1320 }
   1321 
   1322 /*
   1323  * bring all the offline cpus online
   1324  */
   1325 static int
   1326 cpr_all_online(void)
   1327 {
   1328 	int	rc = 0;
   1329 
   1330 #ifdef	__sparc
   1331 	/*
   1332 	 * do nothing
   1333 	 */
   1334 #else
   1335 
   1336 	cpu_t	*cp;
   1337 
   1338 	ASSERT(MUTEX_HELD(&cpu_lock));
   1339 
   1340 	cp = cpu_list;
   1341 	do {
   1342 		cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
   1343 		if (!CPU_ACTIVE(cp)) {
   1344 			if ((rc = cpu_online(cp)) != 0)
   1345 				break;
   1346 			CPU_SET_CPR_FLAGS(cp, CPU_CPR_ONLINE);
   1347 		}
   1348 	} while ((cp = cp->cpu_next) != cpu_list);
   1349 
   1350 	if (rc) {
   1351 		/*
   1352 		 * an online operation failed so offline the cpus
   1353 		 * that were onlined above to restore the system
   1354 		 * to its original state
   1355 		 */
   1356 		cpr_restore_offline();
   1357 	}
   1358 #endif
   1359 	return (rc);
   1360 }
   1361 
   1362 /*
   1363  * offline all the cpus that were brought online by cpr_all_online()
   1364  */
   1365 static void
   1366 cpr_restore_offline(void)
   1367 {
   1368 
   1369 #ifdef	__sparc
   1370 	/*
   1371 	 * do nothing
   1372 	 */
   1373 #else
   1374 
   1375 	cpu_t	*cp;
   1376 	int	rc = 0;
   1377 
   1378 	ASSERT(MUTEX_HELD(&cpu_lock));
   1379 
   1380 	cp = cpu_list;
   1381 	do {
   1382 		if (CPU_CPR_IS_ONLINE(cp)) {
   1383 			rc =  cpu_offline(cp, 0);
   1384 			/*
   1385 			 * this offline should work, since the cpu was
   1386 			 * offline originally and was successfully onlined
   1387 			 * by cpr_all_online()
   1388 			 */
   1389 			ASSERT(rc == 0);
   1390 			cp->cpu_cpr_flags &= ~CPU_CPR_ONLINE;
   1391 		}
   1392 	} while ((cp = cp->cpu_next) != cpu_list);
   1393 
   1394 #endif
   1395 
   1396 }
   1397