1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/param.h>
28 #include <sys/types.h>
29 #include <sys/sysmacros.h>
30 #include <sys/systm.h>
31 #include <sys/errno.h>
32 #include <sys/vfs.h>
33 #include <sys/vnode.h>
34 #include <sys/swap.h>
35 #include <sys/file.h>
36 #include <sys/proc.h>
37 #include <sys/var.h>
38 #include <sys/uadmin.h>
39 #include <sys/signal.h>
40 #include <sys/time.h>
41 #include <vm/seg_kmem.h>
42 #include <sys/modctl.h>
43 #include <sys/callb.h>
44 #include <sys/dumphdr.h>
45 #include <sys/debug.h>
46 #include <sys/ftrace.h>
47 #include <sys/cmn_err.h>
48 #include <sys/panic.h>
49 #include <sys/ddi.h>
50 #include <sys/sunddi.h>
51 #include <sys/policy.h>
52 #include <sys/zone.h>
53 #include <sys/condvar.h>
54 #include <sys/thread.h>
55 #include <sys/sdt.h>
56
57 /*
58 * Administrivia system call. We provide this in two flavors: one for calling
59 * from the system call path (uadmin), and the other for calling from elsewhere
60 * within the kernel (kadmin). Callers must beware that certain uadmin cmd
61 * values (specifically A_SWAPCTL) are only supported by uadmin and not kadmin.
62 */
63
64 extern ksema_t fsflush_sema;
65 kmutex_t ualock;
66 kcondvar_t uacond;
67 kthread_t *ua_shutdown_thread = NULL;
68
69 int sys_shutdown = 0;
70 volatile int fastreboot_dryrun = 0;
71
72 /*
73 * Kill all user processes in said zone. A special argument of ALL_ZONES is
74 * passed in when the system as a whole is shutting down. The lack of per-zone
75 * process lists is likely to make the following a performance bottleneck on a
76 * system with many zones.
77 */
78 void
killall(zoneid_t zoneid)79 killall(zoneid_t zoneid)
80 {
81 proc_t *p;
82
83 ASSERT(zoneid != GLOBAL_ZONEID);
84 /*
85 * Kill all processes except kernel daemons and ourself.
86 * Make a first pass to stop all processes so they won't
87 * be trying to restart children as we kill them.
88 */
89 mutex_enter(&pidlock);
90 for (p = practive; p != NULL; p = p->p_next) {
91 if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
92 p->p_exec != NULLVP && /* kernel daemons */
93 p->p_as != &kas &&
94 p->p_stat != SZOMB) {
95 mutex_enter(&p->p_lock);
96 p->p_flag |= SNOWAIT;
97 sigtoproc(p, NULL, SIGSTOP);
98 mutex_exit(&p->p_lock);
99 }
100 }
101 p = practive;
102 while (p != NULL) {
103 if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
104 p->p_exec != NULLVP && /* kernel daemons */
105 p->p_as != &kas &&
106 p->p_stat != SIDL &&
107 p->p_stat != SZOMB) {
108 mutex_enter(&p->p_lock);
109 if (sigismember(&p->p_sig, SIGKILL)) {
110 mutex_exit(&p->p_lock);
111 p = p->p_next;
112 } else {
113 sigtoproc(p, NULL, SIGKILL);
114 mutex_exit(&p->p_lock);
115 (void) cv_reltimedwait(&p->p_srwchan_cv,
116 &pidlock, hz, TR_CLOCK_TICK);
117 p = practive;
118 }
119 } else {
120 p = p->p_next;
121 }
122 }
123 mutex_exit(&pidlock);
124 }
125
126 int
kadmin(int cmd,int fcn,void * mdep,cred_t * credp)127 kadmin(int cmd, int fcn, void *mdep, cred_t *credp)
128 {
129 int error = 0;
130 char *buf;
131 size_t buflen = 0;
132 boolean_t invoke_cb = B_FALSE;
133
134 /*
135 * We might be called directly by the kernel's fault-handling code, so
136 * we can't assert that the caller is in the global zone.
137 */
138
139 /*
140 * Make sure that cmd is one of the valid <sys/uadmin.h> command codes
141 * and that we have appropriate privileges for this action.
142 */
143 switch (cmd) {
144 case A_FTRACE:
145 case A_SHUTDOWN:
146 case A_REBOOT:
147 case A_REMOUNT:
148 case A_FREEZE:
149 case A_DUMP:
150 case A_SDTTEST:
151 case A_CONFIG:
152 if (secpolicy_sys_config(credp, B_FALSE) != 0)
153 return (EPERM);
154 break;
155
156 default:
157 return (EINVAL);
158 }
159
160 /*
161 * Serialize these operations on ualock. If it is held, the
162 * system should shutdown, reboot, or remount shortly, unless there is
163 * an error. We need a cv rather than just a mutex because proper
164 * functioning of A_REBOOT relies on being able to interrupt blocked
165 * userland callers.
166 *
167 * We only clear ua_shutdown_thread after A_REMOUNT or A_CONFIG.
168 * Other commands should never return.
169 */
170 if (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_REMOUNT ||
171 cmd == A_CONFIG) {
172 mutex_enter(&ualock);
173 while (ua_shutdown_thread != NULL) {
174 if (cv_wait_sig(&uacond, &ualock) == 0) {
175 /*
176 * If we were interrupted, leave, and handle
177 * the signal (or exit, depending on what
178 * happened)
179 */
180 mutex_exit(&ualock);
181 return (EINTR);
182 }
183 }
184 ua_shutdown_thread = curthread;
185 mutex_exit(&ualock);
186 }
187
188 switch (cmd) {
189 case A_SHUTDOWN:
190 {
191 proc_t *p = ttoproc(curthread);
192
193 /*
194 * Release (almost) all of our own resources if we are called
195 * from a user context, however if we are calling kadmin() from
196 * a kernel context then we do not release these resources.
197 */
198 if (p != &p0) {
199 proc_is_exiting(p);
200 if ((error = exitlwps(0)) != 0) {
201 /*
202 * Another thread in this process also called
203 * exitlwps().
204 */
205 mutex_enter(&ualock);
206 ua_shutdown_thread = NULL;
207 cv_signal(&uacond);
208 mutex_exit(&ualock);
209 return (error);
210 }
211 mutex_enter(&p->p_lock);
212 p->p_flag |= SNOWAIT;
213 sigfillset(&p->p_ignore);
214 curthread->t_lwp->lwp_cursig = 0;
215 curthread->t_lwp->lwp_extsig = 0;
216 if (p->p_exec) {
217 vnode_t *exec_vp = p->p_exec;
218 p->p_exec = NULLVP;
219 mutex_exit(&p->p_lock);
220 VN_RELE(exec_vp);
221 } else {
222 mutex_exit(&p->p_lock);
223 }
224
225 pollcleanup();
226 closeall(P_FINFO(curproc));
227 relvm();
228
229 } else {
230 /*
231 * Reset t_cred if not set because much of the
232 * filesystem code depends on CRED() being valid.
233 */
234 if (curthread->t_cred == NULL)
235 curthread->t_cred = kcred;
236 }
237
238 /* indicate shutdown in progress */
239 sys_shutdown = 1;
240
241 /*
242 * Communcate that init shouldn't be restarted.
243 */
244 zone_shutdown_global();
245
246 killall(ALL_ZONES);
247 /*
248 * If we are calling kadmin() from a kernel context then we
249 * do not release these resources.
250 */
251 if (ttoproc(curthread) != &p0) {
252 VN_RELE(PTOU(curproc)->u_cdir);
253 if (PTOU(curproc)->u_rdir)
254 VN_RELE(PTOU(curproc)->u_rdir);
255 if (PTOU(curproc)->u_cwd)
256 refstr_rele(PTOU(curproc)->u_cwd);
257
258 PTOU(curproc)->u_cdir = rootdir;
259 PTOU(curproc)->u_rdir = NULL;
260 PTOU(curproc)->u_cwd = NULL;
261 }
262
263 /*
264 * Allow the reboot/halt/poweroff code a chance to do
265 * anything it needs to whilst we still have filesystems
266 * mounted, like loading any modules necessary for later
267 * performing the actual poweroff.
268 */
269 if ((mdep != NULL) && (*(char *)mdep == '/')) {
270 buf = i_convert_boot_device_name(mdep, NULL, &buflen);
271 mdpreboot(cmd, fcn, buf);
272 } else
273 mdpreboot(cmd, fcn, mdep);
274
275 /*
276 * Allow fsflush to finish running and then prevent it
277 * from ever running again so that vfs_unmountall() and
278 * vfs_syncall() can acquire the vfs locks they need.
279 */
280 sema_p(&fsflush_sema);
281 (void) callb_execute_class(CB_CL_UADMIN_PRE_VFS, NULL);
282
283 vfs_unmountall();
284 (void) VFS_MOUNTROOT(rootvfs, ROOT_UNMOUNT);
285 vfs_syncall();
286
287 dump_ereports();
288 dump_messages();
289
290 invoke_cb = B_TRUE;
291
292 /* FALLTHROUGH */
293 }
294
295 case A_REBOOT:
296 if ((mdep != NULL) && (*(char *)mdep == '/')) {
297 buf = i_convert_boot_device_name(mdep, NULL, &buflen);
298 mdboot(cmd, fcn, buf, invoke_cb);
299 } else
300 mdboot(cmd, fcn, mdep, invoke_cb);
301 /* no return expected */
302 break;
303
304 case A_CONFIG:
305 switch (fcn) {
306 case AD_UPDATE_BOOT_CONFIG:
307 #ifndef __sparc
308 {
309 extern void fastboot_update_config(const char *);
310
311 fastboot_update_config(mdep);
312 }
313 #endif
314
315 break;
316 }
317 /* Let other threads enter the shutdown path now */
318 mutex_enter(&ualock);
319 ua_shutdown_thread = NULL;
320 cv_signal(&uacond);
321 mutex_exit(&ualock);
322 break;
323
324 case A_REMOUNT:
325 (void) VFS_MOUNTROOT(rootvfs, ROOT_REMOUNT);
326 /* Let other threads enter the shutdown path now */
327 mutex_enter(&ualock);
328 ua_shutdown_thread = NULL;
329 cv_signal(&uacond);
330 mutex_exit(&ualock);
331 break;
332
333 case A_FREEZE:
334 {
335 /*
336 * This is the entrypoint for all suspend/resume actions.
337 */
338 extern int cpr(int, void *);
339
340 if (modload("misc", "cpr") == -1)
341 return (ENOTSUP);
342 /* Let the CPR module decide what to do with mdep */
343 error = cpr(fcn, mdep);
344 break;
345 }
346
347 case A_FTRACE:
348 {
349 switch (fcn) {
350 case AD_FTRACE_START:
351 (void) FTRACE_START();
352 break;
353 case AD_FTRACE_STOP:
354 (void) FTRACE_STOP();
355 break;
356 default:
357 error = EINVAL;
358 }
359 break;
360 }
361
362 case A_DUMP:
363 {
364 if (fcn == AD_NOSYNC) {
365 in_sync = 1;
366 break;
367 }
368
369 panic_bootfcn = fcn;
370 panic_forced = 1;
371
372 if ((mdep != NULL) && (*(char *)mdep == '/')) {
373 panic_bootstr = i_convert_boot_device_name(mdep,
374 NULL, &buflen);
375 } else
376 panic_bootstr = mdep;
377
378 #ifndef __sparc
379 extern void fastboot_update_and_load(int, char *);
380
381 fastboot_update_and_load(fcn, mdep);
382 #endif
383
384 panic("forced crash dump initiated at user request");
385 /*NOTREACHED*/
386 }
387
388 case A_SDTTEST:
389 {
390 DTRACE_PROBE7(test, int, 1, int, 2, int, 3, int, 4, int, 5,
391 int, 6, int, 7);
392 break;
393 }
394
395 default:
396 error = EINVAL;
397 }
398
399 return (error);
400 }
401
402 int
uadmin(int cmd,int fcn,uintptr_t mdep)403 uadmin(int cmd, int fcn, uintptr_t mdep)
404 {
405 int error = 0, rv = 0;
406 size_t nbytes = 0;
407 cred_t *credp = CRED();
408 char *bootargs = NULL;
409 int reset_status = 0;
410
411 if (cmd == A_SHUTDOWN && fcn == AD_FASTREBOOT_DRYRUN) {
412 ddi_walk_devs(ddi_root_node(), check_driver_quiesce,
413 &reset_status);
414 if (reset_status != 0)
415 return (EIO);
416 else
417 return (0);
418 }
419
420 /*
421 * The swapctl system call doesn't have its own entry point: it uses
422 * uadmin as a wrapper so we just call it directly from here.
423 */
424 if (cmd == A_SWAPCTL) {
425 if (get_udatamodel() == DATAMODEL_NATIVE)
426 error = swapctl(fcn, (void *)mdep, &rv);
427 #if defined(_SYSCALL32_IMPL)
428 else
429 error = swapctl32(fcn, (void *)mdep, &rv);
430 #endif /* _SYSCALL32_IMPL */
431 return (error ? set_errno(error) : rv);
432 }
433
434 /*
435 * Certain subcommands intepret a non-NULL mdep value as a pointer to
436 * a boot string. We pull that in as bootargs, if applicable.
437 */
438 if (mdep != NULL &&
439 (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_DUMP ||
440 cmd == A_FREEZE || cmd == A_CONFIG)) {
441 bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP);
442 if ((error = copyinstr((const char *)mdep, bootargs,
443 BOOTARGS_MAX, &nbytes)) != 0) {
444 kmem_free(bootargs, BOOTARGS_MAX);
445 return (set_errno(error));
446 }
447 }
448
449 /*
450 * Invoke the appropriate kadmin() routine.
451 */
452 if (getzoneid() != GLOBAL_ZONEID)
453 error = zone_kadmin(cmd, fcn, bootargs, credp);
454 else
455 error = kadmin(cmd, fcn, bootargs, credp);
456
457 if (bootargs != NULL)
458 kmem_free(bootargs, BOOTARGS_MAX);
459 return (error ? set_errno(error) : 0);
460 }
461