Home | History | Annotate | Download | only in cpr
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 #include <sys/types.h>
     29 #include <sys/thread.h>
     30 #include <sys/conf.h>
     31 #include <sys/cpuvar.h>
     32 #include <sys/cpr.h>
     33 #include <sys/user.h>
     34 #include <sys/cmn_err.h>
     35 #include <sys/callb.h>
     36 
     37 extern void utstop_init(void);
     38 extern void add_one_utstop(void);
     39 extern void utstop_timedwait(long ticks);
     40 
     41 static void cpr_stop_user(int);
     42 static int cpr_check_user_threads(void);
     43 
     44 /*
     45  * CPR user thread related support routines
     46  */
     47 void
     48 cpr_signal_user(int sig)
     49 {
     50 /*
     51  * The signal SIGTHAW and SIGFREEZE cannot be sent to every thread yet
     52  * since openwin is catching every signal and default action is to exit.
     53  * We also need to implement the true SIGFREEZE and SIGTHAW to stop threads.
     54  */
     55 	struct proc *p;
     56 
     57 	mutex_enter(&pidlock);
     58 
     59 	for (p = practive; p; p = p->p_next) {
     60 		/* only user threads */
     61 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
     62 		    p == proc_init || p == ttoproc(curthread))
     63 			continue;
     64 
     65 		mutex_enter(&p->p_lock);
     66 		sigtoproc(p, NULL, sig);
     67 		mutex_exit(&p->p_lock);
     68 	}
     69 	mutex_exit(&pidlock);
     70 
     71 	DELAY(MICROSEC);
     72 }
     73 
     74 /* max wait time for user thread stop */
     75 #define	CPR_UTSTOP_WAIT		hz
     76 #define	CPR_UTSTOP_RETRY	4
     77 static int count;
     78 
     79 int
     80 cpr_stop_user_threads()
     81 {
     82 	utstop_init();
     83 
     84 	count = 0;
     85 	do {
     86 		if (++count > CPR_UTSTOP_RETRY)
     87 			return (ESRCH);
     88 		cpr_stop_user(count * count * CPR_UTSTOP_WAIT);
     89 	} while (cpr_check_user_threads() &&
     90 	    (count < CPR_UTSTOP_RETRY || CPR->c_fcn != AD_CPR_FORCE));
     91 
     92 	return (0);
     93 }
     94 
     95 /*
     96  * This routine tries to stop all user threads before we get rid of all
     97  * its pages.It goes through allthreads list and set the TP_CHKPT flag
     98  * for all user threads and make them runnable. If all of the threads
     99  * can be stopped within the max wait time, CPR will proceed. Otherwise
    100  * CPR is aborted after a few of similiar retries.
    101  */
    102 static void
    103 cpr_stop_user(int wait)
    104 {
    105 	kthread_id_t tp;
    106 	proc_t *p;
    107 
    108 	/* The whole loop below needs to be atomic */
    109 	mutex_enter(&pidlock);
    110 
    111 	/* faster this way */
    112 	tp = curthread->t_next;
    113 	do {
    114 		/* kernel threads will be handled later */
    115 		p = ttoproc(tp);
    116 		if (p->p_as == &kas || p->p_stat == SZOMB)
    117 			continue;
    118 
    119 		/*
    120 		 * If the thread is stopped (by CPR) already, do nothing;
    121 		 * if running, mark TP_CHKPT;
    122 		 * if sleeping normally, mark TP_CHKPT and setrun;
    123 		 * if sleeping non-interruptable, mark TP_CHKPT only for now;
    124 		 * if sleeping with t_wchan0 != 0 etc, virtually stopped,
    125 		 * do nothing.
    126 		 */
    127 
    128 		/* p_lock is needed for modifying t_proc_flag */
    129 		mutex_enter(&p->p_lock);
    130 		thread_lock(tp); /* needed to check CPR_ISTOPPED */
    131 
    132 		if (tp->t_state == TS_STOPPED) {
    133 			/*
    134 			 * if already stopped by other reasons, add this new
    135 			 * reason to it.
    136 			 */
    137 			if (tp->t_schedflag & TS_RESUME)
    138 				tp->t_schedflag &= ~TS_RESUME;
    139 		} else {
    140 
    141 			tp->t_proc_flag |= TP_CHKPT;
    142 
    143 			thread_unlock(tp);
    144 			mutex_exit(&p->p_lock);
    145 			add_one_utstop();
    146 			mutex_enter(&p->p_lock);
    147 			thread_lock(tp);
    148 
    149 			aston(tp);
    150 
    151 			if (ISWAKEABLE(tp) || ISWAITING(tp)) {
    152 				setrun_locked(tp);
    153 			}
    154 		}
    155 		/*
    156 		 * force the thread into the kernel if it is not already there.
    157 		 */
    158 		if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
    159 			poke_cpu(tp->t_cpu->cpu_id);
    160 		thread_unlock(tp);
    161 		mutex_exit(&p->p_lock);
    162 
    163 	} while ((tp = tp->t_next) != curthread);
    164 	mutex_exit(&pidlock);
    165 
    166 	utstop_timedwait(wait);
    167 }
    168 
    169 /*
    170  * Checks and makes sure all user threads are stopped
    171  */
    172 static int
    173 cpr_check_user_threads()
    174 {
    175 	kthread_id_t tp;
    176 	int rc = 0;
    177 
    178 	mutex_enter(&pidlock);
    179 	tp = curthread->t_next;
    180 	do {
    181 		if (ttoproc(tp)->p_as == &kas || ttoproc(tp)->p_stat == SZOMB)
    182 			continue;
    183 
    184 		thread_lock(tp);
    185 		/*
    186 		 * make sure that we are off all the queues and in a stopped
    187 		 * state.
    188 		 */
    189 		if (!CPR_ISTOPPED(tp)) {
    190 			thread_unlock(tp);
    191 			mutex_exit(&pidlock);
    192 
    193 			if (count == CPR_UTSTOP_RETRY) {
    194 			CPR_DEBUG(CPR_DEBUG1, "Suspend failed: "
    195 			    "cannot stop uthread\n");
    196 			cpr_err(CE_WARN, "Suspend cannot stop "
    197 			    "process %s (%p:%x).",
    198 			    ttoproc(tp)->p_user.u_psargs, (void *)tp,
    199 			    tp->t_state);
    200 			cpr_err(CE_WARN, "Process may be waiting for"
    201 			    " network request, please try again.");
    202 			}
    203 
    204 			CPR_DEBUG(CPR_DEBUG2, "cant stop t=%p state=%x pfg=%x "
    205 			    "sched=%x\n", (void *)tp, tp->t_state,
    206 			    tp->t_proc_flag, tp->t_schedflag);
    207 			CPR_DEBUG(CPR_DEBUG2, "proc %p state=%x pid=%d\n",
    208 			    (void *)ttoproc(tp), ttoproc(tp)->p_stat,
    209 			    ttoproc(tp)->p_pidp->pid_id);
    210 			return (1);
    211 		}
    212 		thread_unlock(tp);
    213 
    214 	} while ((tp = tp->t_next) != curthread && rc == 0);
    215 
    216 	mutex_exit(&pidlock);
    217 	return (0);
    218 }
    219 
    220 
    221 /*
    222  * start all threads that were stopped for checkpoint.
    223  */
    224 void
    225 cpr_start_user_threads()
    226 {
    227 	kthread_id_t tp;
    228 	proc_t *p;
    229 
    230 	mutex_enter(&pidlock);
    231 	tp = curthread->t_next;
    232 	do {
    233 		p = ttoproc(tp);
    234 		/*
    235 		 * kernel threads are callback'ed rather than setrun.
    236 		 */
    237 		if (ttoproc(tp)->p_as == &kas) continue;
    238 		/*
    239 		 * t_proc_flag should have been cleared. Just to make sure here
    240 		 */
    241 		mutex_enter(&p->p_lock);
    242 		tp->t_proc_flag &= ~TP_CHKPT;
    243 		mutex_exit(&p->p_lock);
    244 
    245 		thread_lock(tp);
    246 		if (CPR_ISTOPPED(tp)) {
    247 
    248 			/*
    249 			 * put it back on the runq
    250 			 */
    251 			tp->t_schedflag |= TS_RESUME;
    252 			setrun_locked(tp);
    253 		}
    254 		thread_unlock(tp);
    255 		/*
    256 		 * DEBUG - Keep track of current and next thread pointer.
    257 		 */
    258 	} while ((tp = tp->t_next) != curthread);
    259 
    260 	mutex_exit(&pidlock);
    261 }
    262 
    263 
    264 /*
    265  * re/start kernel threads
    266  */
    267 void
    268 cpr_start_kernel_threads(void)
    269 {
    270 	CPR_DEBUG(CPR_DEBUG1, "starting kernel daemons...");
    271 	(void) callb_execute_class(CB_CL_CPR_DAEMON, CB_CODE_CPR_RESUME);
    272 	CPR_DEBUG(CPR_DEBUG1, "done\n");
    273 
    274 	/* see table lock below */
    275 	callb_unlock_table();
    276 }
    277 
    278 
    279 /*
    280  * Stop kernel threads by using the callback mechanism.  If any thread
    281  * cannot be stopped, return failure.
    282  */
    283 int
    284 cpr_stop_kernel_threads(void)
    285 {
    286 	caddr_t	name;
    287 
    288 	callb_lock_table();	/* Note: we unlock the table in resume. */
    289 
    290 	CPR_DEBUG(CPR_DEBUG1, "stopping kernel daemons...");
    291 	if ((name = callb_execute_class(CB_CL_CPR_DAEMON,
    292 	    CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) {
    293 		cpr_err(CE_WARN,
    294 		    "Could not stop \"%s\" kernel thread.  "
    295 		    "Please try again later.", name);
    296 		return (EBUSY);
    297 	}
    298 
    299 	CPR_DEBUG(CPR_DEBUG1, ("done\n"));
    300 	return (0);
    301 }
    302 
    303 /*
    304  * Check to see that kernel threads are stopped.
    305  * This should be called while CPU's are paused, and the caller is
    306  * effectively running single user, or else we are virtually guaranteed
    307  * to fail.  The routine should not ASSERT on the paused state or spl
    308  * level, as there may be a use for this to verify that things are running
    309  * again.
    310  */
    311 int
    312 cpr_threads_are_stopped(void)
    313 {
    314 	caddr_t	name;
    315 	kthread_id_t tp;
    316 	proc_t *p;
    317 
    318 	/*
    319 	 * We think we stopped all the kernel threads.  Just in case
    320 	 * someone is not playing by the rules, take a spin through
    321 	 * the threadlist and see if we can account for everybody.
    322 	 */
    323 	mutex_enter(&pidlock);
    324 	tp = curthread->t_next;
    325 	do {
    326 		p = ttoproc(tp);
    327 		if (p->p_as != &kas)
    328 			continue;
    329 
    330 		if (tp->t_flag & T_INTR_THREAD)
    331 			continue;
    332 
    333 		if (! callb_is_stopped(tp, &name)) {
    334 			mutex_exit(&pidlock);
    335 			cpr_err(CE_WARN,
    336 			    "\"%s\" kernel thread not stopped.", name);
    337 			return (EBUSY);
    338 		}
    339 	} while ((tp = tp->t_next) != curthread);
    340 
    341 	mutex_exit(&pidlock);
    342 	return (0);
    343 }
    344