OpenGrok

Cross Reference: panic.c
xref: /onnv/onnv-gate/usr/src/uts/common/os/panic.c
Home | History | Annotate | Line # | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
     23  */
     24 
     25 /*
     26  * When the operating system detects that it is in an invalid state, a panic
     27  * is initiated in order to minimize potential damage to user data and to
     28  * facilitate debugging.  There are three major tasks to be performed in
     29  * a system panic: recording information about the panic in memory (and thus
     30  * making it part of the crash dump), synchronizing the file systems to
     31  * preserve user file data, and generating the crash dump.  We define the
     32  * system to be in one of four states with respect to the panic code:
     33  *
     34  * CALM    - the state of the system prior to any thread initiating a panic
     35  *
     36  * QUIESCE - the state of the system when the first thread to initiate
     37  *           a system panic records information about the cause of the panic
     38  *           and renders the system quiescent by stopping other processors
     39  *
     40  * SYNC    - the state of the system when we synchronize the file systems
     41  * DUMP    - the state when we generate the crash dump.
     42  *
     43  * The transitions between these states are irreversible: once we begin
     44  * panicking, we only make one attempt to perform the actions associated with
     45  * each state.
     46  *
     47  * The panic code itself must be re-entrant because actions taken during any
     48  * state may lead to another system panic.  Additionally, any Solaris
     49  * thread may initiate a panic at any time, and so we must have synchronization
     50  * between threads which attempt to initiate a state transition simultaneously.
     51  * The panic code makes use of a special locking primitive, a trigger, to
     52  * perform this synchronization.  A trigger is simply a word which is set
     53  * atomically and can only be set once.  We declare three triggers, one for
     54  * each transition between the four states.  When a thread enters the panic
     55  * code it attempts to set each trigger; if it fails it moves on to the
     56  * next trigger.  A special case is the first trigger: if two threads race
     57  * to perform the transition to QUIESCE, the losing thread may execute before
     58  * the winner has a chance to stop its CPU.  To solve this problem, we have
     59  * the loser look ahead to see if any other triggers are set; if not, it
     60  * presumes a panic is underway and simply spins.  Unfortunately, since we
     61  * are panicking, it is not possible to know this with absolute certainty.
     62  *
     63  * There are two common reasons for re-entering the panic code once a panic
     64  * has been initiated: (1) after we debug_enter() at the end of QUIESCE,
     65  * the operator may type "sync" instead of "go", and the PROM's sync callback
     66  * routine will invoke panic(); (2) if the clock routine decides that sync
     67  * or dump is not making progress, it will invoke panic() to force a timeout.
     68  * The design assumes that a third possibility, another thread causing an
     69  * unrelated panic while sync or dump is still underway, is extremely unlikely.
     70  * If this situation occurs, we may end up triggering dump while sync is
     71  * still in progress.  This third case is considered extremely unlikely because
     72  * all other CPUs are stopped and low-level interrupts have been blocked.
     73  *
     74  * The panic code is entered via a call directly to the vpanic() function,
     75  * or its varargs wrappers panic() and cmn_err(9F).  The vpanic routine
     76  * is implemented in assembly language to record the current machine
     77  * registers, attempt to set the trigger for the QUIESCE state, and
     78  * if successful, switch stacks on to the panic_stack before calling into
     79  * the common panicsys() routine.  The first thread to initiate a panic
     80  * is allowed to make use of the reserved panic_stack so that executing
     81  * the panic code itself does not overwrite valuable data on that thread's
     82  * stack *ahead* of the current stack pointer.  This data will be preserved
     83  * in the crash dump and may prove invaluable in determining what this
     84  * thread has previously been doing.  The first thread, saved in panic_thread,
     85  * is also responsible for stopping the other CPUs as quickly as possible,
     86  * and then setting the various panic_* variables.  Most important among
     87  * these is panicstr, which allows threads to subsequently bypass held
     88  * locks so that we can proceed without ever blocking.  We must stop the
     89  * other CPUs *prior* to setting panicstr in case threads running there are
     90  * currently spinning to acquire a lock; we want that state to be preserved.
     91  * Every thread which initiates a panic has its T_PANIC flag set so we can
     92  * identify all such threads in the crash dump.
     93  *
     94  * The panic_thread is also allowed to make use of the special memory buffer
     95  * panicbuf, which on machines with appropriate hardware is preserved across
     96  * reboots.  We allow the panic_thread to store its register set and panic
     97  * message in this buffer, so even if we fail to obtain a crash dump we will
     98  * be able to examine the machine after reboot and determine some of the
     99  * state at the time of the panic.  If we do get a dump, the panic buffer
    100  * data is structured so that a debugger can easily consume the information
    101  * therein (see <sys/panic.h>).
    102  *
    103  * Each platform or architecture is required to implement the functions
    104  * panic_savetrap() to record trap-specific information to panicbuf,
    105  * panic_saveregs() to record a register set to panicbuf, panic_stopcpus()
    106  * to halt all CPUs but the panicking CPU, panic_quiesce_hw() to perform
    107  * miscellaneous platform-specific tasks *after* panicstr is set,
    108  * panic_showtrap() to print trap-specific information to the console,
    109  * and panic_dump_hw() to perform platform tasks prior to calling dumpsys().
    110  *
    111  * A Note on Word Formation, courtesy of the Oxford Guide to English Usage:
    112  *
    113  * Words ending in -c interpose k before suffixes which otherwise would
    114  * indicate a soft c, and thus the verb and adjective forms of 'panic' are
    115  * spelled "panicked", "panicking", and "panicky" respectively.  Use of
    116  * the ill-conceived "panicing" and "panic'd" is discouraged.
    117  */
    118 
    119 #include <sys/types.h>
    120 #include <sys/varargs.h>
    121 #include <sys/sysmacros.h>
    122 #include <sys/cmn_err.h>
    123 #include <sys/cpuvar.h>
    124 #include <sys/thread.h>
    125 #include <sys/t_lock.h>
    126 #include <sys/cred.h>
    127 #include <sys/systm.h>
    128 #include <sys/archsystm.h>
    129 #include <sys/uadmin.h>
    130 #include <sys/callb.h>
    131 #include <sys/vfs.h>
    132 #include <sys/log.h>
    133 #include <sys/disp.h>
    134 #include <sys/param.h>
    135 #include <sys/dumphdr.h>
    136 #include <sys/ftrace.h>
    137 #include <sys/reboot.h>
    138 #include <sys/debug.h>
    139 #include <sys/stack.h>
    140 #include <sys/spl.h>
    141 #include <sys/errorq.h>
    142 #include <sys/panic.h>
    143 #include <sys/fm/util.h>
    144 #include <sys/clock_impl.h>
    145 
    146 /*
    147  * Panic variables which are set once during the QUIESCE state by the
    148  * first thread to initiate a panic.  These are examined by post-mortem
    149  * debugging tools; the inconsistent use of 'panic' versus 'panic_' in
    150  * the variable naming is historical and allows legacy tools to work.
    151  */
    152 #pragma align STACK_ALIGN(panic_stack)
    153 char panic_stack[PANICSTKSIZE];		/* reserved stack for panic_thread */
    154 kthread_t *panic_thread;		/* first thread to call panicsys() */
    155 cpu_t panic_cpu;			/* cpu from first call to panicsys() */
    156 label_t panic_regs;			/* setjmp label from panic_thread */
    157 struct regs *panic_reg;			/* regs struct from first panicsys() */
    158 char *volatile panicstr;		/* format string to first panicsys() */
    159 va_list panicargs;			/* arguments to first panicsys() */
    160 clock_t panic_lbolt;			/* lbolt at time of panic */
    161 int64_t panic_lbolt64;			/* lbolt64 at time of panic */
    162 hrtime_t panic_hrtime;			/* hrtime at time of panic */
    163 timespec_t panic_hrestime;		/* hrestime at time of panic */
    164 int panic_ipl;				/* ipl on panic_cpu at time of panic */
    165 ushort_t panic_schedflag;		/* t_schedflag for panic_thread */
    166 cpu_t *panic_bound_cpu;			/* t_bound_cpu for panic_thread */
    167 char panic_preempt;			/* t_preempt for panic_thread */
    168 
    169 /*
    170  * Panic variables which can be set via /etc/system or patched while
    171  * the system is in operation.  Again, the stupid names are historic.
    172  */
    173 char *panic_bootstr = NULL;		/* mdboot string to use after panic */
    174 int panic_bootfcn = AD_BOOT;		/* mdboot function to use after panic */
    175 int halt_on_panic = 0;  		/* halt after dump instead of reboot? */
    176 int nopanicdebug = 0;			/* reboot instead of call debugger? */
    177 int in_sync = 0;			/* skip vfs_syncall() and just dump? */
    178 
    179 /*
    180  * The do_polled_io flag is set by the panic code to inform the SCSI subsystem
    181  * to use polled mode instead of interrupt-driven i/o.
    182  */
    183 int do_polled_io = 0;
    184 
    185 /*
    186  * The panic_forced flag is set by the uadmin A_DUMP code to inform the
    187  * panic subsystem that it should not attempt an initial debug_enter.
    188  */
    189 int panic_forced = 0;
    190 
    191 /*
    192  * Triggers for panic state transitions:
    193  */
    194 int panic_quiesce;			/* trigger for CALM    -> QUIESCE */
    195 int panic_sync;				/* trigger for QUIESCE -> SYNC */
    196 int panic_dump;				/* trigger for SYNC    -> DUMP */
    197 
    198 /*
    199  * Variable signifying quiesce(9E) is in progress.
    200  */
    201 volatile int quiesce_active = 0;
    202 
    203 void
    204 panicsys(const char *format, va_list alist, struct regs *rp, int on_panic_stack)
    205 {
    206 	int s = spl8();
    207 	kthread_t *t = curthread;
    208 	cpu_t *cp = CPU;
    209 
    210 	caddr_t intr_stack = NULL;
    211 	uint_t intr_actv;
    212 
    213 	ushort_t schedflag = t->t_schedflag;
    214 	cpu_t *bound_cpu = t->t_bound_cpu;
    215 	char preempt = t->t_preempt;
    216 
    217 	(void) setjmp(&t->t_pcb);
    218 	t->t_flag |= T_PANIC;
    219 
    220 	t->t_schedflag |= TS_DONT_SWAP;
    221 	t->t_bound_cpu = cp;
    222 	t->t_preempt++;
    223 
    224 	panic_enter_hw(s);
    225 
    226 	/*
    227 	 * If we're on the interrupt stack and an interrupt thread is available
    228 	 * in this CPU's pool, preserve the interrupt stack by detaching an
    229 	 * interrupt thread and making its stack the intr_stack.
    230 	 */
    231 	if (CPU_ON_INTR(cp) && cp->cpu_intr_thread != NULL) {
    232 		kthread_t *it = cp->cpu_intr_thread;
    233 
    234 		intr_stack = cp->cpu_intr_stack;
    235 		intr_actv = cp->cpu_intr_actv;
    236 
    237 		cp->cpu_intr_stack = thread_stk_init(it->t_stk);
    238 		cp->cpu_intr_thread = it->t_link;
    239 
    240 		/*
    241 		 * Clear only the high level bits of cpu_intr_actv.
    242 		 * We want to indicate that high-level interrupts are
    243 		 * not active without destroying the low-level interrupt
    244 		 * information stored there.
    245 		 */
    246 		cp->cpu_intr_actv &= ((1 << (LOCK_LEVEL + 1)) - 1);
    247 	}
    248 
    249 	/*
    250 	 * Record one-time panic information and quiesce the other CPUs.
    251 	 * Then print out the panic message and stack trace.
    252 	 */
    253 	if (on_panic_stack) {
    254 		panic_data_t *pdp = (panic_data_t *)panicbuf;
    255 
    256 		pdp->pd_version = PANICBUFVERS;
    257 		pdp->pd_msgoff = sizeof (panic_data_t) - sizeof (panic_nv_t);
    258 
    259 		(void) strncpy(pdp->pd_uuid, dump_get_uuid(),
    260 		    sizeof (pdp->pd_uuid));
    261 
    262 		if (t->t_panic_trap != NULL)
    263 			panic_savetrap(pdp, t->t_panic_trap);
    264 		else
    265 			panic_saveregs(pdp, rp);
    266 
    267 		(void) vsnprintf(&panicbuf[pdp->pd_msgoff],
    268 		    PANICBUFSIZE - pdp->pd_msgoff, format, alist);
    269 
    270 		/*
    271 		 * Call into the platform code to stop the other CPUs.
    272 		 * We currently have all interrupts blocked, and expect that
    273 		 * the platform code will lower ipl only as far as needed to
    274 		 * perform cross-calls, and will acquire as *few* locks as is
    275 		 * possible -- panicstr is not set so we can still deadlock.
    276 		 */
    277 		panic_stopcpus(cp, t, s);
    278 
    279 		panicstr = (char *)format;
    280 		va_copy(panicargs, alist);
    281 		panic_lbolt = LBOLT_NO_ACCOUNT;
    282 		panic_lbolt64 = LBOLT_NO_ACCOUNT64;
    283 		panic_hrestime = hrestime;
    284 		panic_hrtime = gethrtime_waitfree();
    285 		panic_thread = t;
    286 		panic_regs = t->t_pcb;
    287 		panic_reg = rp;
    288 		panic_cpu = *cp;
    289 		panic_ipl = spltoipl(s);
    290 		panic_schedflag = schedflag;
    291 		panic_bound_cpu = bound_cpu;
    292 		panic_preempt = preempt;
    293 
    294 		if (intr_stack != NULL) {
    295 			panic_cpu.cpu_intr_stack = intr_stack;
    296 			panic_cpu.cpu_intr_actv = intr_actv;
    297 		}
    298 
    299 		/*
    300 		 * Lower ipl to 10 to keep clock() from running, but allow
    301 		 * keyboard interrupts to enter the debugger.  These callbacks
    302 		 * are executed with panicstr set so they can bypass locks.
    303 		 */
    304 		splx(ipltospl(CLOCK_LEVEL));
    305 		panic_quiesce_hw(pdp);
    306 		(void) FTRACE_STOP();
    307 		(void) callb_execute_class(CB_CL_PANIC, NULL);
    308 
    309 		if (log_intrq != NULL)
    310 			log_flushq(log_intrq);
    311 
    312 		/*
    313 		 * If log_consq has been initialized and syslogd has started,
    314 		 * print any messages in log_consq that haven't been consumed.
    315 		 */
    316 		if (log_consq != NULL && log_consq != log_backlogq)
    317 			log_printq(log_consq);
    318 
    319 		fm_banner();
    320 
    321 #if defined(__x86)
    322 		/*
    323 		 * A hypervisor panic originates outside of Solaris, so we
    324 		 * don't want to prepend the panic message with misleading
    325 		 * pointers from within Solaris.
    326 		 */
    327 		if (!IN_XPV_PANIC())
    328 #endif
    329 			printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id,
    330 			    (void *)t);
    331 		vprintf(format, alist);
    332 		printf("\n\n");
    333 
    334 		if (t->t_panic_trap != NULL) {
    335 			panic_showtrap(t->t_panic_trap);
    336 			printf("\n");
    337 		}
    338 
    339 		traceregs(rp);
    340 		printf("\n");
    341 
    342 		if (((boothowto & RB_DEBUG) || obpdebug) &&
    343 		    !nopanicdebug && !panic_forced) {
    344 			if (dumpvp != NULL) {
    345 				debug_enter("panic: entering debugger "
    346 				    "(continue to save dump)");
    347 			} else {
    348 				debug_enter("panic: entering debugger "
    349 				    "(no dump device, continue to reboot)");
    350 			}
    351 		}
    352 
    353 	} else if (panic_dump != 0 || panic_sync != 0 || panicstr != NULL) {
    354 		printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id, (void *)t);
    355 		vprintf(format, alist);
    356 		printf("\n");
    357 	} else
    358 		goto spin;
    359 
    360 	/*
    361 	 * Prior to performing sync or dump, we make sure that do_polled_io is
    362 	 * set, but we'll leave ipl at 10; deadman(), a CY_HIGH_LEVEL cyclic,
    363 	 * will re-enter panic if we are not making progress with sync or dump.
    364 	 */
    365 
    366 	/*
    367 	 * Sync the filesystems.  Reset t_cred if not set because much of
    368 	 * the filesystem code depends on CRED() being valid.
    369 	 */
    370 	if (!in_sync && panic_trigger(&panic_sync)) {
    371 		if (t->t_cred == NULL)
    372 			t->t_cred = kcred;
    373 		splx(ipltospl(CLOCK_LEVEL));
    374 		do_polled_io = 1;
    375 		vfs_syncall();
    376 	}
    377 
    378 	/*
    379 	 * Take the crash dump.  If the dump trigger is already set, try to
    380 	 * enter the debugger again before rebooting the system.
    381 	 */
    382 	if (panic_trigger(&panic_dump)) {
    383 		panic_dump_hw(s);
    384 		splx(ipltospl(CLOCK_LEVEL));
    385 		errorq_panic();
    386 		do_polled_io = 1;
    387 		dumpsys();
    388 	} else if (((boothowto & RB_DEBUG) || obpdebug) && !nopanicdebug) {
    389 		debug_enter("panic: entering debugger (continue to reboot)");
    390 	} else
    391 		printf("dump aborted: please record the above information!\n");
    392 
    393 	if (halt_on_panic)
    394 		mdboot(A_REBOOT, AD_HALT, NULL, B_FALSE);
    395 	else
    396 		mdboot(A_REBOOT, panic_bootfcn, panic_bootstr, B_FALSE);
    397 spin:
    398 	/*
    399 	 * Restore ipl to at most CLOCK_LEVEL so we don't end up spinning
    400 	 * and unable to jump into the debugger.
    401 	 */
    402 	splx(MIN(s, ipltospl(CLOCK_LEVEL)));
    403 	for (;;)
    404 		;
    405 }
    406 
    407 void
    408 panic(const char *format, ...)
    409 {
    410 	va_list alist;
    411 
    412 	va_start(alist, format);
    413 	vpanic(format, alist);
    414 	va_end(alist);
    415 }
    416