1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * When the operating system detects that it is in an invalid state, a panic
27 * is initiated in order to minimize potential damage to user data and to
28 * facilitate debugging. There are three major tasks to be performed in
29 * a system panic: recording information about the panic in memory (and thus
30 * making it part of the crash dump), synchronizing the file systems to
31 * preserve user file data, and generating the crash dump. We define the
32 * system to be in one of four states with respect to the panic code:
33 *
34 * CALM - the state of the system prior to any thread initiating a panic
35 *
36 * QUIESCE - the state of the system when the first thread to initiate
37 * a system panic records information about the cause of the panic
38 * and renders the system quiescent by stopping other processors
39 *
40 * SYNC - the state of the system when we synchronize the file systems
41 * DUMP - the state when we generate the crash dump.
42 *
43 * The transitions between these states are irreversible: once we begin
44 * panicking, we only make one attempt to perform the actions associated with
45 * each state.
46 *
47 * The panic code itself must be re-entrant because actions taken during any
48 * state may lead to another system panic. Additionally, any Solaris
49 * thread may initiate a panic at any time, and so we must have synchronization
50 * between threads which attempt to initiate a state transition simultaneously.
51 * The panic code makes use of a special locking primitive, a trigger, to
52 * perform this synchronization. A trigger is simply a word which is set
53 * atomically and can only be set once. We declare three triggers, one for
54 * each transition between the four states. When a thread enters the panic
55 * code it attempts to set each trigger; if it fails it moves on to the
56 * next trigger. A special case is the first trigger: if two threads race
57 * to perform the transition to QUIESCE, the losing thread may execute before
58 * the winner has a chance to stop its CPU. To solve this problem, we have
59 * the loser look ahead to see if any other triggers are set; if not, it
60 * presumes a panic is underway and simply spins. Unfortunately, since we
61 * are panicking, it is not possible to know this with absolute certainty.
62 *
63 * There are two common reasons for re-entering the panic code once a panic
64 * has been initiated: (1) after we debug_enter() at the end of QUIESCE,
65 * the operator may type "sync" instead of "go", and the PROM's sync callback
66 * routine will invoke panic(); (2) if the clock routine decides that sync
67 * or dump is not making progress, it will invoke panic() to force a timeout.
68 * The design assumes that a third possibility, another thread causing an
69 * unrelated panic while sync or dump is still underway, is extremely unlikely.
70 * If this situation occurs, we may end up triggering dump while sync is
71 * still in progress. This third case is considered extremely unlikely because
72 * all other CPUs are stopped and low-level interrupts have been blocked.
73 *
74 * The panic code is entered via a call directly to the vpanic() function,
75 * or its varargs wrappers panic() and cmn_err(9F). The vpanic routine
76 * is implemented in assembly language to record the current machine
77 * registers, attempt to set the trigger for the QUIESCE state, and
78 * if successful, switch stacks on to the panic_stack before calling into
79 * the common panicsys() routine. The first thread to initiate a panic
80 * is allowed to make use of the reserved panic_stack so that executing
81 * the panic code itself does not overwrite valuable data on that thread's
82 * stack *ahead* of the current stack pointer. This data will be preserved
83 * in the crash dump and may prove invaluable in determining what this
84 * thread has previously been doing. The first thread, saved in panic_thread,
85 * is also responsible for stopping the other CPUs as quickly as possible,
86 * and then setting the various panic_* variables. Most important among
87 * these is panicstr, which allows threads to subsequently bypass held
88 * locks so that we can proceed without ever blocking. We must stop the
89 * other CPUs *prior* to setting panicstr in case threads running there are
90 * currently spinning to acquire a lock; we want that state to be preserved.
91 * Every thread which initiates a panic has its T_PANIC flag set so we can
92 * identify all such threads in the crash dump.
93 *
94 * The panic_thread is also allowed to make use of the special memory buffer
95 * panicbuf, which on machines with appropriate hardware is preserved across
96 * reboots. We allow the panic_thread to store its register set and panic
97 * message in this buffer, so even if we fail to obtain a crash dump we will
98 * be able to examine the machine after reboot and determine some of the
99 * state at the time of the panic. If we do get a dump, the panic buffer
100 * data is structured so that a debugger can easily consume the information
101 * therein (see <sys/panic.h>).
102 *
103 * Each platform or architecture is required to implement the functions
104 * panic_savetrap() to record trap-specific information to panicbuf,
105 * panic_saveregs() to record a register set to panicbuf, panic_stopcpus()
106 * to halt all CPUs but the panicking CPU, panic_quiesce_hw() to perform
107 * miscellaneous platform-specific tasks *after* panicstr is set,
108 * panic_showtrap() to print trap-specific information to the console,
109 * and panic_dump_hw() to perform platform tasks prior to calling dumpsys().
110 *
111 * A Note on Word Formation, courtesy of the Oxford Guide to English Usage:
112 *
113 * Words ending in -c interpose k before suffixes which otherwise would
114 * indicate a soft c, and thus the verb and adjective forms of 'panic' are
115 * spelled "panicked", "panicking", and "panicky" respectively. Use of
116 * the ill-conceived "panicing" and "panic'd" is discouraged.
117 */
118
119 #include <sys/types.h>
120 #include <sys/varargs.h>
121 #include <sys/sysmacros.h>
122 #include <sys/cmn_err.h>
123 #include <sys/cpuvar.h>
124 #include <sys/thread.h>
125 #include <sys/t_lock.h>
126 #include <sys/cred.h>
127 #include <sys/systm.h>
128 #include <sys/archsystm.h>
129 #include <sys/uadmin.h>
130 #include <sys/callb.h>
131 #include <sys/vfs.h>
132 #include <sys/log.h>
133 #include <sys/disp.h>
134 #include <sys/param.h>
135 #include <sys/dumphdr.h>
136 #include <sys/ftrace.h>
137 #include <sys/reboot.h>
138 #include <sys/debug.h>
139 #include <sys/stack.h>
140 #include <sys/spl.h>
141 #include <sys/errorq.h>
142 #include <sys/panic.h>
143 #include <sys/fm/util.h>
144 #include <sys/clock_impl.h>
145
146 /*
147 * Panic variables which are set once during the QUIESCE state by the
148 * first thread to initiate a panic. These are examined by post-mortem
149 * debugging tools; the inconsistent use of 'panic' versus 'panic_' in
150 * the variable naming is historical and allows legacy tools to work.
151 */
152 #pragma align STACK_ALIGN(panic_stack)
153 char panic_stack[PANICSTKSIZE]; /* reserved stack for panic_thread */
154 kthread_t *panic_thread; /* first thread to call panicsys() */
155 cpu_t panic_cpu; /* cpu from first call to panicsys() */
156 label_t panic_regs; /* setjmp label from panic_thread */
157 struct regs *panic_reg; /* regs struct from first panicsys() */
158 char *volatile panicstr; /* format string to first panicsys() */
159 va_list panicargs; /* arguments to first panicsys() */
160 clock_t panic_lbolt; /* lbolt at time of panic */
161 int64_t panic_lbolt64; /* lbolt64 at time of panic */
162 hrtime_t panic_hrtime; /* hrtime at time of panic */
163 timespec_t panic_hrestime; /* hrestime at time of panic */
164 int panic_ipl; /* ipl on panic_cpu at time of panic */
165 ushort_t panic_schedflag; /* t_schedflag for panic_thread */
166 cpu_t *panic_bound_cpu; /* t_bound_cpu for panic_thread */
167 char panic_preempt; /* t_preempt for panic_thread */
168
169 /*
170 * Panic variables which can be set via /etc/system or patched while
171 * the system is in operation. Again, the stupid names are historic.
172 */
173 char *panic_bootstr = NULL; /* mdboot string to use after panic */
174 int panic_bootfcn = AD_BOOT; /* mdboot function to use after panic */
175 int halt_on_panic = 0; /* halt after dump instead of reboot? */
176 int nopanicdebug = 0; /* reboot instead of call debugger? */
177 int in_sync = 0; /* skip vfs_syncall() and just dump? */
178
179 /*
180 * The do_polled_io flag is set by the panic code to inform the SCSI subsystem
181 * to use polled mode instead of interrupt-driven i/o.
182 */
183 int do_polled_io = 0;
184
185 /*
186 * The panic_forced flag is set by the uadmin A_DUMP code to inform the
187 * panic subsystem that it should not attempt an initial debug_enter.
188 */
189 int panic_forced = 0;
190
191 /*
192 * Triggers for panic state transitions:
193 */
194 int panic_quiesce; /* trigger for CALM -> QUIESCE */
195 int panic_sync; /* trigger for QUIESCE -> SYNC */
196 int panic_dump; /* trigger for SYNC -> DUMP */
197
198 /*
199 * Variable signifying quiesce(9E) is in progress.
200 */
201 volatile int quiesce_active = 0;
202
203 void
panicsys(const char * format,va_list alist,struct regs * rp,int on_panic_stack)204 panicsys(const char *format, va_list alist, struct regs *rp, int on_panic_stack)
205 {
206 int s = spl8();
207 kthread_t *t = curthread;
208 cpu_t *cp = CPU;
209
210 caddr_t intr_stack = NULL;
211 uint_t intr_actv;
212
213 ushort_t schedflag = t->t_schedflag;
214 cpu_t *bound_cpu = t->t_bound_cpu;
215 char preempt = t->t_preempt;
216
217 (void) setjmp(&t->t_pcb);
218 t->t_flag |= T_PANIC;
219
220 t->t_schedflag |= TS_DONT_SWAP;
221 t->t_bound_cpu = cp;
222 t->t_preempt++;
223
224 panic_enter_hw(s);
225
226 /*
227 * If we're on the interrupt stack and an interrupt thread is available
228 * in this CPU's pool, preserve the interrupt stack by detaching an
229 * interrupt thread and making its stack the intr_stack.
230 */
231 if (CPU_ON_INTR(cp) && cp->cpu_intr_thread != NULL) {
232 kthread_t *it = cp->cpu_intr_thread;
233
234 intr_stack = cp->cpu_intr_stack;
235 intr_actv = cp->cpu_intr_actv;
236
237 cp->cpu_intr_stack = thread_stk_init(it->t_stk);
238 cp->cpu_intr_thread = it->t_link;
239
240 /*
241 * Clear only the high level bits of cpu_intr_actv.
242 * We want to indicate that high-level interrupts are
243 * not active without destroying the low-level interrupt
244 * information stored there.
245 */
246 cp->cpu_intr_actv &= ((1 << (LOCK_LEVEL + 1)) - 1);
247 }
248
249 /*
250 * Record one-time panic information and quiesce the other CPUs.
251 * Then print out the panic message and stack trace.
252 */
253 if (on_panic_stack) {
254 panic_data_t *pdp = (panic_data_t *)panicbuf;
255
256 pdp->pd_version = PANICBUFVERS;
257 pdp->pd_msgoff = sizeof (panic_data_t) - sizeof (panic_nv_t);
258
259 (void) strncpy(pdp->pd_uuid, dump_get_uuid(),
260 sizeof (pdp->pd_uuid));
261
262 if (t->t_panic_trap != NULL)
263 panic_savetrap(pdp, t->t_panic_trap);
264 else
265 panic_saveregs(pdp, rp);
266
267 (void) vsnprintf(&panicbuf[pdp->pd_msgoff],
268 PANICBUFSIZE - pdp->pd_msgoff, format, alist);
269
270 /*
271 * Call into the platform code to stop the other CPUs.
272 * We currently have all interrupts blocked, and expect that
273 * the platform code will lower ipl only as far as needed to
274 * perform cross-calls, and will acquire as *few* locks as is
275 * possible -- panicstr is not set so we can still deadlock.
276 */
277 panic_stopcpus(cp, t, s);
278
279 panicstr = (char *)format;
280 va_copy(panicargs, alist);
281 panic_lbolt = LBOLT_NO_ACCOUNT;
282 panic_lbolt64 = LBOLT_NO_ACCOUNT64;
283 panic_hrestime = hrestime;
284 panic_hrtime = gethrtime_waitfree();
285 panic_thread = t;
286 panic_regs = t->t_pcb;
287 panic_reg = rp;
288 panic_cpu = *cp;
289 panic_ipl = spltoipl(s);
290 panic_schedflag = schedflag;
291 panic_bound_cpu = bound_cpu;
292 panic_preempt = preempt;
293
294 if (intr_stack != NULL) {
295 panic_cpu.cpu_intr_stack = intr_stack;
296 panic_cpu.cpu_intr_actv = intr_actv;
297 }
298
299 /*
300 * Lower ipl to 10 to keep clock() from running, but allow
301 * keyboard interrupts to enter the debugger. These callbacks
302 * are executed with panicstr set so they can bypass locks.
303 */
304 splx(ipltospl(CLOCK_LEVEL));
305 panic_quiesce_hw(pdp);
306 (void) FTRACE_STOP();
307 (void) callb_execute_class(CB_CL_PANIC, NULL);
308
309 if (log_intrq != NULL)
310 log_flushq(log_intrq);
311
312 /*
313 * If log_consq has been initialized and syslogd has started,
314 * print any messages in log_consq that haven't been consumed.
315 */
316 if (log_consq != NULL && log_consq != log_backlogq)
317 log_printq(log_consq);
318
319 fm_banner();
320
321 #if defined(__x86)
322 /*
323 * A hypervisor panic originates outside of Solaris, so we
324 * don't want to prepend the panic message with misleading
325 * pointers from within Solaris.
326 */
327 if (!IN_XPV_PANIC())
328 #endif
329 printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id,
330 (void *)t);
331 vprintf(format, alist);
332 printf("\n\n");
333
334 if (t->t_panic_trap != NULL) {
335 panic_showtrap(t->t_panic_trap);
336 printf("\n");
337 }
338
339 traceregs(rp);
340 printf("\n");
341
342 if (((boothowto & RB_DEBUG) || obpdebug) &&
343 !nopanicdebug && !panic_forced) {
344 if (dumpvp != NULL) {
345 debug_enter("panic: entering debugger "
346 "(continue to save dump)");
347 } else {
348 debug_enter("panic: entering debugger "
349 "(no dump device, continue to reboot)");
350 }
351 }
352
353 } else if (panic_dump != 0 || panic_sync != 0 || panicstr != NULL) {
354 printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id, (void *)t);
355 vprintf(format, alist);
356 printf("\n");
357 } else
358 goto spin;
359
360 /*
361 * Prior to performing sync or dump, we make sure that do_polled_io is
362 * set, but we'll leave ipl at 10; deadman(), a CY_HIGH_LEVEL cyclic,
363 * will re-enter panic if we are not making progress with sync or dump.
364 */
365
366 /*
367 * Sync the filesystems. Reset t_cred if not set because much of
368 * the filesystem code depends on CRED() being valid.
369 */
370 if (!in_sync && panic_trigger(&panic_sync)) {
371 if (t->t_cred == NULL)
372 t->t_cred = kcred;
373 splx(ipltospl(CLOCK_LEVEL));
374 do_polled_io = 1;
375 vfs_syncall();
376 }
377
378 /*
379 * Take the crash dump. If the dump trigger is already set, try to
380 * enter the debugger again before rebooting the system.
381 */
382 if (panic_trigger(&panic_dump)) {
383 panic_dump_hw(s);
384 splx(ipltospl(CLOCK_LEVEL));
385 errorq_panic();
386 do_polled_io = 1;
387 dumpsys();
388 } else if (((boothowto & RB_DEBUG) || obpdebug) && !nopanicdebug) {
389 debug_enter("panic: entering debugger (continue to reboot)");
390 } else
391 printf("dump aborted: please record the above information!\n");
392
393 if (halt_on_panic)
394 mdboot(A_REBOOT, AD_HALT, NULL, B_FALSE);
395 else
396 mdboot(A_REBOOT, panic_bootfcn, panic_bootstr, B_FALSE);
397 spin:
398 /*
399 * Restore ipl to at most CLOCK_LEVEL so we don't end up spinning
400 * and unable to jump into the debugger.
401 */
402 splx(MIN(s, ipltospl(CLOCK_LEVEL)));
403 for (;;)
404 ;
405 }
406
407 void
panic(const char * format,...)408 panic(const char *format, ...)
409 {
410 va_list alist;
411
412 va_start(alist, format);
413 vpanic(format, alist);
414 va_end(alist);
415 }
416