1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/sysmacros.h>
31 #include <sys/systm.h>
32 #include <sys/cred.h>
33 #include <sys/user.h>
34 #include <sys/errno.h>
35 #include <sys/proc.h>
36 #include <sys/ucontext.h>
37 #include <sys/procfs.h>
38 #include <sys/vnode.h>
39 #include <sys/acct.h>
40 #include <sys/var.h>
41 #include <sys/cmn_err.h>
42 #include <sys/debug.h>
43 #include <sys/wait.h>
44 #include <sys/siginfo.h>
45 #include <sys/procset.h>
46 #include <sys/class.h>
47 #include <sys/file.h>
48 #include <sys/session.h>
49 #include <sys/kmem.h>
50 #include <sys/vtrace.h>
51 #include <sys/prsystm.h>
52 #include <sys/ipc.h>
53 #include <sys/sem_impl.h>
54 #include <c2/audit.h>
55 #include <sys/aio_impl.h>
56 #include <vm/as.h>
57 #include <sys/poll.h>
58 #include <sys/door.h>
59 #include <sys/lwpchan_impl.h>
60 #include <sys/utrap.h>
61 #include <sys/task.h>
62 #include <sys/exacct.h>
63 #include <sys/cyclic.h>
64 #include <sys/schedctl.h>
65 #include <sys/rctl.h>
66 #include <sys/contract_impl.h>
67 #include <sys/contract/process_impl.h>
68 #include <sys/list.h>
69 #include <sys/dtrace.h>
70 #include <sys/pool.h>
71 #include <sys/sdt.h>
72 #include <sys/corectl.h>
73 #include <sys/brand.h>
74 #include <sys/libc_kernel.h>
75
76 /*
77 * convert code/data pair into old style wait status
78 */
79 int
wstat(int code,int data)80 wstat(int code, int data)
81 {
82 int stat = (data & 0377);
83
84 switch (code) {
85 case CLD_EXITED:
86 stat <<= 8;
87 break;
88 case CLD_DUMPED:
89 stat |= WCOREFLG;
90 break;
91 case CLD_KILLED:
92 break;
93 case CLD_TRAPPED:
94 case CLD_STOPPED:
95 stat <<= 8;
96 stat |= WSTOPFLG;
97 break;
98 case CLD_CONTINUED:
99 stat = WCONTFLG;
100 break;
101 default:
102 cmn_err(CE_PANIC, "wstat: bad code");
103 /* NOTREACHED */
104 }
105 return (stat);
106 }
107
108 static char *
exit_reason(char * buf,size_t bufsz,int what,int why)109 exit_reason(char *buf, size_t bufsz, int what, int why)
110 {
111 switch (why) {
112 case CLD_EXITED:
113 (void) snprintf(buf, bufsz, "exited with status %d", what);
114 break;
115 case CLD_KILLED:
116 (void) snprintf(buf, bufsz, "exited on fatal signal %d", what);
117 break;
118 case CLD_DUMPED:
119 (void) snprintf(buf, bufsz, "core dumped on signal %d", what);
120 break;
121 default:
122 (void) snprintf(buf, bufsz, "encountered unknown error "
123 "(%d, %d)", why, what);
124 break;
125 }
126
127 return (buf);
128 }
129
130 /*
131 * exit system call: pass back caller's arg.
132 */
133 void
rexit(int rval)134 rexit(int rval)
135 {
136 exit(CLD_EXITED, rval);
137 }
138
139 /*
140 * Called by proc_exit() when a zone's init exits, presumably because
141 * it failed. As long as the given zone is still in the "running"
142 * state, we will re-exec() init, but first we need to reset things
143 * which are usually inherited across exec() but will break init's
144 * assumption that it is being exec()'d from a virgin process. Most
145 * importantly this includes closing all file descriptors (exec only
146 * closes those marked close-on-exec) and resetting signals (exec only
147 * resets handled signals, and we need to clear any signals which
148 * killed init). Anything else that exec(2) says would be inherited,
149 * but would affect the execution of init, needs to be reset.
150 */
151 static int
restart_init(int what,int why)152 restart_init(int what, int why)
153 {
154 kthread_t *t = curthread;
155 klwp_t *lwp = ttolwp(t);
156 proc_t *p = ttoproc(t);
157 user_t *up = PTOU(p);
158
159 vnode_t *oldcd, *oldrd;
160 int i, err;
161 char reason_buf[64];
162
163 /*
164 * Let zone admin (and global zone admin if this is for a non-global
165 * zone) know that init has failed and will be restarted.
166 */
167 zcmn_err(p->p_zone->zone_id, CE_WARN,
168 "init(1M) %s: restarting automatically",
169 exit_reason(reason_buf, sizeof (reason_buf), what, why));
170
171 if (!INGLOBALZONE(p)) {
172 cmn_err(CE_WARN, "init(1M) for zone %s (pid %d) %s: "
173 "restarting automatically",
174 p->p_zone->zone_name, p->p_pid, reason_buf);
175 }
176
177 /*
178 * Remove any fpollinfo_t's for this (last) thread from our file
179 * descriptors so closeall() can ASSERT() that they're all gone.
180 * Then close all open file descriptors in the process.
181 */
182 pollcleanup();
183 closeall(P_FINFO(p));
184
185 /*
186 * Grab p_lock and begin clearing miscellaneous global process
187 * state that needs to be reset before we exec the new init(1M).
188 */
189
190 mutex_enter(&p->p_lock);
191 prbarrier(p);
192
193 p->p_flag &= ~(SKILLED | SEXTKILLED | SEXITING | SDOCORE);
194 up->u_cmask = CMASK;
195
196 sigemptyset(&t->t_hold);
197 sigemptyset(&t->t_sig);
198 sigemptyset(&t->t_extsig);
199
200 sigemptyset(&p->p_sig);
201 sigemptyset(&p->p_extsig);
202
203 sigdelq(p, t, 0);
204 sigdelq(p, NULL, 0);
205
206 if (p->p_killsqp) {
207 siginfofree(p->p_killsqp);
208 p->p_killsqp = NULL;
209 }
210
211 /*
212 * Reset any signals that are ignored back to the default disposition.
213 * Other u_signal members will be cleared when exec calls sigdefault().
214 */
215 for (i = 1; i < NSIG; i++) {
216 if (up->u_signal[i - 1] == SIG_IGN) {
217 up->u_signal[i - 1] = SIG_DFL;
218 sigemptyset(&up->u_sigmask[i - 1]);
219 }
220 }
221
222 /*
223 * Clear the current signal, any signal info associated with it, and
224 * any signal information from contracts and/or contract templates.
225 */
226 lwp->lwp_cursig = 0;
227 lwp->lwp_extsig = 0;
228 if (lwp->lwp_curinfo != NULL) {
229 siginfofree(lwp->lwp_curinfo);
230 lwp->lwp_curinfo = NULL;
231 }
232 lwp_ctmpl_clear(lwp);
233
234 /*
235 * Reset both the process root directory and the current working
236 * directory to the root of the zone just as we do during boot.
237 */
238 VN_HOLD(p->p_zone->zone_rootvp);
239 oldrd = up->u_rdir;
240 up->u_rdir = p->p_zone->zone_rootvp;
241
242 VN_HOLD(p->p_zone->zone_rootvp);
243 oldcd = up->u_cdir;
244 up->u_cdir = p->p_zone->zone_rootvp;
245
246 if (up->u_cwd != NULL) {
247 refstr_rele(up->u_cwd);
248 up->u_cwd = NULL;
249 }
250
251 mutex_exit(&p->p_lock);
252
253 if (oldrd != NULL)
254 VN_RELE(oldrd);
255 if (oldcd != NULL)
256 VN_RELE(oldcd);
257
258 /* Free the controlling tty. (freectty() always assumes curproc.) */
259 ASSERT(p == curproc);
260 (void) freectty(B_TRUE);
261
262 /*
263 * Now exec() the new init(1M) on top of the current process. If we
264 * succeed, the caller will treat this like a successful system call.
265 * If we fail, we issue messages and the caller will proceed with exit.
266 */
267 err = exec_init(p->p_zone->zone_initname, NULL);
268
269 if (err == 0)
270 return (0);
271
272 zcmn_err(p->p_zone->zone_id, CE_WARN,
273 "failed to restart init(1M) (err=%d): system reboot required", err);
274
275 if (!INGLOBALZONE(p)) {
276 cmn_err(CE_WARN, "failed to restart init(1M) for zone %s "
277 "(pid %d, err=%d): zoneadm(1M) boot required",
278 p->p_zone->zone_name, p->p_pid, err);
279 }
280
281 return (-1);
282 }
283
284 /*
285 * Release resources.
286 * Enter zombie state.
287 * Wake up parent and init processes,
288 * and dispose of children.
289 */
290 void
exit(int why,int what)291 exit(int why, int what)
292 {
293 /*
294 * If proc_exit() fails, then some other lwp in the process
295 * got there first. We just have to call lwp_exit() to allow
296 * the other lwp to finish exiting the process. Otherwise we're
297 * restarting init, and should return.
298 */
299 if (proc_exit(why, what) != 0) {
300 mutex_enter(&curproc->p_lock);
301 ASSERT(curproc->p_flag & SEXITLWPS);
302 lwp_exit();
303 /* NOTREACHED */
304 }
305 }
306
307 /*
308 * Set the SEXITING flag on the process, after making sure /proc does
309 * not have it locked. This is done in more places than proc_exit(),
310 * so it is a separate function.
311 */
312 void
proc_is_exiting(proc_t * p)313 proc_is_exiting(proc_t *p)
314 {
315 mutex_enter(&p->p_lock);
316 prbarrier(p);
317 p->p_flag |= SEXITING;
318 mutex_exit(&p->p_lock);
319 }
320
321 /*
322 * Return value:
323 * 1 - exitlwps() failed, call (or continue) lwp_exit()
324 * 0 - restarting init. Return through system call path
325 */
326 int
proc_exit(int why,int what)327 proc_exit(int why, int what)
328 {
329 kthread_t *t = curthread;
330 klwp_t *lwp = ttolwp(t);
331 proc_t *p = ttoproc(t);
332 zone_t *z = p->p_zone;
333 timeout_id_t tmp_id;
334 int rv;
335 proc_t *q;
336 task_t *tk;
337 vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
338 sigqueue_t *sqp;
339 lwpdir_t *lwpdir;
340 uint_t lwpdir_sz;
341 tidhash_t *tidhash;
342 uint_t tidhash_sz;
343 ret_tidhash_t *ret_tidhash;
344 refstr_t *cwd;
345 hrtime_t hrutime, hrstime;
346 int evaporate;
347
348 /*
349 * Stop and discard the process's lwps except for the current one,
350 * unless some other lwp beat us to it. If exitlwps() fails then
351 * return and the calling lwp will call (or continue in) lwp_exit().
352 */
353 proc_is_exiting(p);
354 if (exitlwps(0) != 0)
355 return (1);
356
357 mutex_enter(&p->p_lock);
358 if (p->p_ttime > 0) {
359 /*
360 * Account any remaining ticks charged to this process
361 * on its way out.
362 */
363 (void) task_cpu_time_incr(p->p_task, p->p_ttime);
364 p->p_ttime = 0;
365 }
366 mutex_exit(&p->p_lock);
367
368 DTRACE_PROC(lwp__exit);
369 DTRACE_PROC1(exit, int, why);
370
371 /*
372 * Will perform any brand specific proc exit processing, since this
373 * is always the last lwp, will also perform lwp_exit and free brand
374 * data
375 */
376 if (PROC_IS_BRANDED(p)) {
377 lwp_detach_brand_hdlrs(lwp);
378 brand_clearbrand(p, B_FALSE);
379 }
380
381 /*
382 * Don't let init exit unless zone_start_init() failed its exec, or
383 * we are shutting down the zone or the machine.
384 *
385 * Since we are single threaded, we don't need to lock the
386 * following accesses to zone_proc_initpid.
387 */
388 if (p->p_pid == z->zone_proc_initpid) {
389 if (z->zone_boot_err == 0 &&
390 zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
391 zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN &&
392 z->zone_restart_init == B_TRUE &&
393 restart_init(what, why) == 0)
394 return (0);
395 /*
396 * Since we didn't or couldn't restart init, we clear
397 * the zone's init state and proceed with exit
398 * processing.
399 */
400 z->zone_proc_initpid = -1;
401 }
402
403 lwp_pcb_exit();
404
405 /*
406 * Allocate a sigqueue now, before we grab locks.
407 * It will be given to sigcld(), below.
408 * Special case: If we will be making the process disappear
409 * without a trace because it is either:
410 * * an exiting SSYS process, or
411 * * a posix_spawn() vfork child who requests it,
412 * we don't bother to allocate a useless sigqueue.
413 */
414 evaporate = (p->p_flag & SSYS) || ((p->p_flag & SVFORK) &&
415 why == CLD_EXITED && what == _EVAPORATE);
416 if (!evaporate)
417 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
418
419 /*
420 * revoke any doors created by the process.
421 */
422 if (p->p_door_list)
423 door_exit();
424
425 /*
426 * Release schedctl data structures.
427 */
428 if (p->p_pagep)
429 schedctl_proc_cleanup();
430
431 /*
432 * make sure all pending kaio has completed.
433 */
434 if (p->p_aio)
435 aio_cleanup_exit();
436
437 /*
438 * discard the lwpchan cache.
439 */
440 if (p->p_lcp != NULL)
441 lwpchan_destroy_cache(0);
442
443 /*
444 * Clean up any DTrace helper actions or probes for the process.
445 */
446 if (p->p_dtrace_helpers != NULL) {
447 ASSERT(dtrace_helpers_cleanup != NULL);
448 (*dtrace_helpers_cleanup)();
449 }
450
451 /* untimeout the realtime timers */
452 if (p->p_itimer != NULL)
453 timer_exit();
454
455 if ((tmp_id = p->p_alarmid) != 0) {
456 p->p_alarmid = 0;
457 (void) untimeout(tmp_id);
458 }
459
460 /*
461 * Remove any fpollinfo_t's for this (last) thread from our file
462 * descriptors so closeall() can ASSERT() that they're all gone.
463 */
464 pollcleanup();
465
466 if (p->p_rprof_cyclic != CYCLIC_NONE) {
467 mutex_enter(&cpu_lock);
468 cyclic_remove(p->p_rprof_cyclic);
469 mutex_exit(&cpu_lock);
470 }
471
472 mutex_enter(&p->p_lock);
473
474 /*
475 * Clean up any DTrace probes associated with this process.
476 */
477 if (p->p_dtrace_probes) {
478 ASSERT(dtrace_fasttrap_exit_ptr != NULL);
479 dtrace_fasttrap_exit_ptr(p);
480 }
481
482 while ((tmp_id = p->p_itimerid) != 0) {
483 p->p_itimerid = 0;
484 mutex_exit(&p->p_lock);
485 (void) untimeout(tmp_id);
486 mutex_enter(&p->p_lock);
487 }
488
489 lwp_cleanup();
490
491 /*
492 * We are about to exit; prevent our resource associations from
493 * being changed.
494 */
495 pool_barrier_enter();
496
497 /*
498 * Block the process against /proc now that we have really
499 * acquired p->p_lock (to manipulate p_tlist at least).
500 */
501 prbarrier(p);
502
503 sigfillset(&p->p_ignore);
504 sigemptyset(&p->p_siginfo);
505 sigemptyset(&p->p_sig);
506 sigemptyset(&p->p_extsig);
507 sigemptyset(&t->t_sig);
508 sigemptyset(&t->t_extsig);
509 sigemptyset(&p->p_sigmask);
510 sigdelq(p, t, 0);
511 lwp->lwp_cursig = 0;
512 lwp->lwp_extsig = 0;
513 p->p_flag &= ~(SKILLED | SEXTKILLED);
514 if (lwp->lwp_curinfo) {
515 siginfofree(lwp->lwp_curinfo);
516 lwp->lwp_curinfo = NULL;
517 }
518
519 t->t_proc_flag |= TP_LWPEXIT;
520 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
521 prlwpexit(t); /* notify /proc */
522 lwp_hash_out(p, t->t_tid);
523 prexit(p);
524
525 p->p_lwpcnt = 0;
526 p->p_tlist = NULL;
527 sigqfree(p);
528 term_mstate(t);
529 p->p_mterm = gethrtime();
530
531 exec_vp = p->p_exec;
532 execdir_vp = p->p_execdir;
533 p->p_exec = NULLVP;
534 p->p_execdir = NULLVP;
535 mutex_exit(&p->p_lock);
536
537 pr_free_watched_pages(p);
538
539 closeall(P_FINFO(p));
540
541 /* Free the controlling tty. (freectty() always assumes curproc.) */
542 ASSERT(p == curproc);
543 (void) freectty(B_TRUE);
544
545 #if defined(__sparc)
546 if (p->p_utraps != NULL)
547 utrap_free(p);
548 #endif
549 if (p->p_semacct) /* IPC semaphore exit */
550 semexit(p);
551 rv = wstat(why, what);
552
553 acct(rv & 0xff);
554 exacct_commit_proc(p, rv);
555
556 /*
557 * Release any resources associated with C2 auditing
558 */
559 if (AU_AUDITING()) {
560 /*
561 * audit exit system call
562 */
563 audit_exit(why, what);
564 }
565
566 /*
567 * Free address space.
568 */
569 relvm();
570
571 if (exec_vp) {
572 /*
573 * Close this executable which has been opened when the process
574 * was created by getproc().
575 */
576 (void) VOP_CLOSE(exec_vp, FREAD, 1, (offset_t)0, CRED(), NULL);
577 VN_RELE(exec_vp);
578 }
579 if (execdir_vp)
580 VN_RELE(execdir_vp);
581
582 /*
583 * Release held contracts.
584 */
585 contract_exit(p);
586
587 /*
588 * Depart our encapsulating process contract.
589 */
590 if ((p->p_flag & SSYS) == 0) {
591 ASSERT(p->p_ct_process);
592 contract_process_exit(p->p_ct_process, p, rv);
593 }
594
595 /*
596 * Remove pool association, and block if requested by pool_do_bind.
597 */
598 mutex_enter(&p->p_lock);
599 ASSERT(p->p_pool->pool_ref > 0);
600 atomic_add_32(&p->p_pool->pool_ref, -1);
601 p->p_pool = pool_default;
602 /*
603 * Now that our address space has been freed and all other threads
604 * in this process have exited, set the PEXITED pool flag. This
605 * tells the pools subsystems to ignore this process if it was
606 * requested to rebind this process to a new pool.
607 */
608 p->p_poolflag |= PEXITED;
609 pool_barrier_exit();
610 mutex_exit(&p->p_lock);
611
612 mutex_enter(&pidlock);
613
614 /*
615 * Delete this process from the newstate list of its parent. We
616 * will put it in the right place in the sigcld in the end.
617 */
618 delete_ns(p->p_parent, p);
619
620 /*
621 * Reassign the orphans to the next of kin.
622 * Don't rearrange init's orphanage.
623 */
624 if ((q = p->p_orphan) != NULL && p != proc_init) {
625
626 proc_t *nokp = p->p_nextofkin;
627
628 for (;;) {
629 q->p_nextofkin = nokp;
630 if (q->p_nextorph == NULL)
631 break;
632 q = q->p_nextorph;
633 }
634 q->p_nextorph = nokp->p_orphan;
635 nokp->p_orphan = p->p_orphan;
636 p->p_orphan = NULL;
637 }
638
639 /*
640 * Reassign the children to init.
641 * Don't try to assign init's children to init.
642 */
643 if ((q = p->p_child) != NULL && p != proc_init) {
644 struct proc *np;
645 struct proc *initp = proc_init;
646 boolean_t setzonetop = B_FALSE;
647
648 if (!INGLOBALZONE(curproc))
649 setzonetop = B_TRUE;
650
651 pgdetach(p);
652
653 do {
654 np = q->p_sibling;
655 /*
656 * Delete it from its current parent new state
657 * list and add it to init new state list
658 */
659 delete_ns(q->p_parent, q);
660
661 q->p_ppid = 1;
662 q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
663 if (setzonetop) {
664 mutex_enter(&q->p_lock);
665 q->p_flag |= SZONETOP;
666 mutex_exit(&q->p_lock);
667 }
668 q->p_parent = initp;
669
670 /*
671 * Since q will be the first child,
672 * it will not have a previous sibling.
673 */
674 q->p_psibling = NULL;
675 if (initp->p_child) {
676 initp->p_child->p_psibling = q;
677 }
678 q->p_sibling = initp->p_child;
679 initp->p_child = q;
680 if (q->p_proc_flag & P_PR_PTRACE) {
681 mutex_enter(&q->p_lock);
682 sigtoproc(q, NULL, SIGKILL);
683 mutex_exit(&q->p_lock);
684 }
685 /*
686 * sigcld() will add the child to parents
687 * newstate list.
688 */
689 if (q->p_stat == SZOMB)
690 sigcld(q, NULL);
691 } while ((q = np) != NULL);
692
693 p->p_child = NULL;
694 ASSERT(p->p_child_ns == NULL);
695 }
696
697 TRACE_1(TR_FAC_PROC, TR_PROC_EXIT, "proc_exit: %p", p);
698
699 mutex_enter(&p->p_lock);
700 CL_EXIT(curthread); /* tell the scheduler that curthread is exiting */
701
702 /*
703 * Have our task accummulate our resource usage data before they
704 * become contaminated by p_cacct etc., and before we renounce
705 * membership of the task.
706 *
707 * We do this regardless of whether or not task accounting is active.
708 * This is to avoid having nonsense data reported for this task if
709 * task accounting is subsequently enabled. The overhead is minimal;
710 * by this point, this process has accounted for the usage of all its
711 * LWPs. We nonetheless do the work here, and under the protection of
712 * pidlock, so that the movement of the process's usage to the task
713 * happens at the same time as the removal of the process from the
714 * task, from the point of view of exacct_snapshot_task_usage().
715 */
716 exacct_update_task_mstate(p);
717
718 hrutime = mstate_aggr_state(p, LMS_USER);
719 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
720 p->p_utime = (clock_t)NSEC_TO_TICK(hrutime) + p->p_cutime;
721 p->p_stime = (clock_t)NSEC_TO_TICK(hrstime) + p->p_cstime;
722
723 p->p_acct[LMS_USER] += p->p_cacct[LMS_USER];
724 p->p_acct[LMS_SYSTEM] += p->p_cacct[LMS_SYSTEM];
725 p->p_acct[LMS_TRAP] += p->p_cacct[LMS_TRAP];
726 p->p_acct[LMS_TFAULT] += p->p_cacct[LMS_TFAULT];
727 p->p_acct[LMS_DFAULT] += p->p_cacct[LMS_DFAULT];
728 p->p_acct[LMS_KFAULT] += p->p_cacct[LMS_KFAULT];
729 p->p_acct[LMS_USER_LOCK] += p->p_cacct[LMS_USER_LOCK];
730 p->p_acct[LMS_SLEEP] += p->p_cacct[LMS_SLEEP];
731 p->p_acct[LMS_WAIT_CPU] += p->p_cacct[LMS_WAIT_CPU];
732 p->p_acct[LMS_STOPPED] += p->p_cacct[LMS_STOPPED];
733
734 p->p_ru.minflt += p->p_cru.minflt;
735 p->p_ru.majflt += p->p_cru.majflt;
736 p->p_ru.nswap += p->p_cru.nswap;
737 p->p_ru.inblock += p->p_cru.inblock;
738 p->p_ru.oublock += p->p_cru.oublock;
739 p->p_ru.msgsnd += p->p_cru.msgsnd;
740 p->p_ru.msgrcv += p->p_cru.msgrcv;
741 p->p_ru.nsignals += p->p_cru.nsignals;
742 p->p_ru.nvcsw += p->p_cru.nvcsw;
743 p->p_ru.nivcsw += p->p_cru.nivcsw;
744 p->p_ru.sysc += p->p_cru.sysc;
745 p->p_ru.ioch += p->p_cru.ioch;
746
747 p->p_stat = SZOMB;
748 p->p_proc_flag &= ~P_PR_PTRACE;
749 p->p_wdata = what;
750 p->p_wcode = (char)why;
751
752 cdir = PTOU(p)->u_cdir;
753 rdir = PTOU(p)->u_rdir;
754 cwd = PTOU(p)->u_cwd;
755
756 ASSERT(cdir != NULL || p->p_parent == &p0);
757
758 /*
759 * Release resource controls, as they are no longer enforceable.
760 */
761 rctl_set_free(p->p_rctls);
762
763 /*
764 * Decrement tk_nlwps counter for our task.max-lwps resource control.
765 * An extended accounting record, if that facility is active, is
766 * scheduled to be written. We cannot give up task and project
767 * membership at this point because that would allow zombies to escape
768 * from the max-processes resource controls. Zombies stay in their
769 * current task and project until the process table slot is released
770 * in freeproc().
771 */
772 tk = p->p_task;
773
774 mutex_enter(&p->p_zone->zone_nlwps_lock);
775 tk->tk_nlwps--;
776 tk->tk_proj->kpj_nlwps--;
777 p->p_zone->zone_nlwps--;
778 mutex_exit(&p->p_zone->zone_nlwps_lock);
779
780 /*
781 * Clear the lwp directory and the lwpid hash table
782 * now that /proc can't bother us any more.
783 * We free the memory below, after dropping p->p_lock.
784 */
785 lwpdir = p->p_lwpdir;
786 lwpdir_sz = p->p_lwpdir_sz;
787 tidhash = p->p_tidhash;
788 tidhash_sz = p->p_tidhash_sz;
789 ret_tidhash = p->p_ret_tidhash;
790 p->p_lwpdir = NULL;
791 p->p_lwpfree = NULL;
792 p->p_lwpdir_sz = 0;
793 p->p_tidhash = NULL;
794 p->p_tidhash_sz = 0;
795 p->p_ret_tidhash = NULL;
796
797 /*
798 * If the process has context ops installed, call the exit routine
799 * on behalf of this last remaining thread. Normally exitpctx() is
800 * called during thread_exit() or lwp_exit(), but because this is the
801 * last thread in the process, we must call it here. By the time
802 * thread_exit() is called (below), the association with the relevant
803 * process has been lost.
804 *
805 * We also free the context here.
806 */
807 if (p->p_pctx) {
808 kpreempt_disable();
809 exitpctx(p);
810 kpreempt_enable();
811
812 freepctx(p, 0);
813 }
814
815 /*
816 * curthread's proc pointer is changed to point to the 'sched'
817 * process for the corresponding zone, except in the case when
818 * the exiting process is in fact a zsched instance, in which
819 * case the proc pointer is set to p0. We do so, so that the
820 * process still points at the right zone when we call the VN_RELE()
821 * below.
822 *
823 * This is because curthread's original proc pointer can be freed as
824 * soon as the child sends a SIGCLD to its parent. We use zsched so
825 * that for user processes, even in the final moments of death, the
826 * process is still associated with its zone.
827 */
828 if (p != t->t_procp->p_zone->zone_zsched)
829 t->t_procp = t->t_procp->p_zone->zone_zsched;
830 else
831 t->t_procp = &p0;
832
833 mutex_exit(&p->p_lock);
834 if (!evaporate) {
835 p->p_pidflag &= ~CLDPEND;
836 sigcld(p, sqp);
837 } else {
838 /*
839 * Do what sigcld() would do if the disposition
840 * of the SIGCHLD signal were set to be ignored.
841 */
842 cv_broadcast(&p->p_srwchan_cv);
843 freeproc(p);
844 }
845 mutex_exit(&pidlock);
846
847 /*
848 * We don't release u_cdir and u_rdir until SZOMB is set.
849 * This protects us against dofusers().
850 */
851 if (cdir)
852 VN_RELE(cdir);
853 if (rdir)
854 VN_RELE(rdir);
855 if (cwd)
856 refstr_rele(cwd);
857
858 /*
859 * task_rele() may ultimately cause the zone to go away (or
860 * may cause the last user process in a zone to go away, which
861 * signals zsched to go away). So prior to this call, we must
862 * no longer point at zsched.
863 */
864 t->t_procp = &p0;
865
866 kmem_free(lwpdir, lwpdir_sz * sizeof (lwpdir_t));
867 kmem_free(tidhash, tidhash_sz * sizeof (tidhash_t));
868 while (ret_tidhash != NULL) {
869 ret_tidhash_t *next = ret_tidhash->rth_next;
870 kmem_free(ret_tidhash->rth_tidhash,
871 ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
872 kmem_free(ret_tidhash, sizeof (*ret_tidhash));
873 ret_tidhash = next;
874 }
875
876 thread_exit();
877 /* NOTREACHED */
878 }
879
880 /*
881 * Format siginfo structure for wait system calls.
882 */
883 void
winfo(proc_t * pp,k_siginfo_t * ip,int waitflag)884 winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
885 {
886 ASSERT(MUTEX_HELD(&pidlock));
887
888 bzero(ip, sizeof (k_siginfo_t));
889 ip->si_signo = SIGCLD;
890 ip->si_code = pp->p_wcode;
891 ip->si_pid = pp->p_pid;
892 ip->si_ctid = PRCTID(pp);
893 ip->si_zoneid = pp->p_zone->zone_id;
894 ip->si_status = pp->p_wdata;
895 ip->si_stime = pp->p_stime;
896 ip->si_utime = pp->p_utime;
897
898 if (waitflag) {
899 pp->p_wcode = 0;
900 pp->p_wdata = 0;
901 pp->p_pidflag &= ~CLDPEND;
902 }
903 }
904
905 /*
906 * Wait system call.
907 * Search for a terminated (zombie) child,
908 * finally lay it to rest, and collect its status.
909 * Look also for stopped children,
910 * and pass back status from them.
911 */
912 int
waitid(idtype_t idtype,id_t id,k_siginfo_t * ip,int options)913 waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
914 {
915 int found;
916 proc_t *cp, *pp;
917 int proc_gone;
918 int waitflag = !(options & WNOWAIT);
919
920 /*
921 * Obsolete flag, defined here only for binary compatibility
922 * with old statically linked executables. Delete this when
923 * we no longer care about these old and broken applications.
924 */
925 #define _WNOCHLD 0400
926 options &= ~_WNOCHLD;
927
928 if (options == 0 || (options & ~WOPTMASK))
929 return (EINVAL);
930
931 switch (idtype) {
932 case P_PID:
933 case P_PGID:
934 if (id < 0 || id >= maxpid)
935 return (EINVAL);
936 /* FALLTHROUGH */
937 case P_ALL:
938 break;
939 default:
940 return (EINVAL);
941 }
942
943 pp = ttoproc(curthread);
944
945 /*
946 * lock parent mutex so that sibling chain can be searched.
947 */
948 mutex_enter(&pidlock);
949
950 /*
951 * if we are only looking for exited processes and child_ns list
952 * is empty no reason to look at all children.
953 */
954 if (idtype == P_ALL &&
955 (options & ~WNOWAIT) == (WNOHANG | WEXITED) &&
956 pp->p_child_ns == NULL) {
957 if (pp->p_child) {
958 mutex_exit(&pidlock);
959 bzero(ip, sizeof (k_siginfo_t));
960 return (0);
961 }
962 mutex_exit(&pidlock);
963 return (ECHILD);
964 }
965
966 while (pp->p_child != NULL) {
967
968 proc_gone = 0;
969
970 for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
971 if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
972 continue;
973 if (idtype == P_PID && id != cp->p_pid)
974 continue;
975 if (idtype == P_PGID && id != cp->p_pgrp)
976 continue;
977
978 switch (cp->p_wcode) {
979
980 case CLD_TRAPPED:
981 case CLD_STOPPED:
982 case CLD_CONTINUED:
983 cmn_err(CE_PANIC,
984 "waitid: wrong state %d on the p_newstate"
985 " list", cp->p_wcode);
986 break;
987
988 case CLD_EXITED:
989 case CLD_DUMPED:
990 case CLD_KILLED:
991 if (!(options & WEXITED)) {
992 /*
993 * Count how many are already gone
994 * for good.
995 */
996 proc_gone++;
997 break;
998 }
999 if (!waitflag) {
1000 winfo(cp, ip, 0);
1001 } else {
1002 winfo(cp, ip, 1);
1003 freeproc(cp);
1004 }
1005 mutex_exit(&pidlock);
1006 if (waitflag) { /* accept SIGCLD */
1007 sigcld_delete(ip);
1008 sigcld_repost();
1009 }
1010 return (0);
1011 }
1012
1013 if (idtype == P_PID)
1014 break;
1015 }
1016
1017 /*
1018 * Wow! None of the threads on the p_sibling_ns list were
1019 * interesting threads. Check all the kids!
1020 */
1021 found = 0;
1022 for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
1023 if (idtype == P_PID && id != cp->p_pid)
1024 continue;
1025 if (idtype == P_PGID && id != cp->p_pgrp)
1026 continue;
1027
1028 switch (cp->p_wcode) {
1029 case CLD_TRAPPED:
1030 if (!(options & WTRAPPED))
1031 break;
1032 winfo(cp, ip, waitflag);
1033 mutex_exit(&pidlock);
1034 if (waitflag) { /* accept SIGCLD */
1035 sigcld_delete(ip);
1036 sigcld_repost();
1037 }
1038 return (0);
1039
1040 case CLD_STOPPED:
1041 if (!(options & WSTOPPED))
1042 break;
1043 /* Is it still stopped? */
1044 mutex_enter(&cp->p_lock);
1045 if (!jobstopped(cp)) {
1046 mutex_exit(&cp->p_lock);
1047 break;
1048 }
1049 mutex_exit(&cp->p_lock);
1050 winfo(cp, ip, waitflag);
1051 mutex_exit(&pidlock);
1052 if (waitflag) { /* accept SIGCLD */
1053 sigcld_delete(ip);
1054 sigcld_repost();
1055 }
1056 return (0);
1057
1058 case CLD_CONTINUED:
1059 if (!(options & WCONTINUED))
1060 break;
1061 winfo(cp, ip, waitflag);
1062 mutex_exit(&pidlock);
1063 if (waitflag) { /* accept SIGCLD */
1064 sigcld_delete(ip);
1065 sigcld_repost();
1066 }
1067 return (0);
1068
1069 case CLD_EXITED:
1070 case CLD_DUMPED:
1071 case CLD_KILLED:
1072 if (idtype != P_PID &&
1073 (cp->p_pidflag & CLDWAITPID))
1074 continue;
1075 /*
1076 * Don't complain if a process was found in
1077 * the first loop but we broke out of the loop
1078 * because of the arguments passed to us.
1079 */
1080 if (proc_gone == 0) {
1081 cmn_err(CE_PANIC,
1082 "waitid: wrong state on the"
1083 " p_child list");
1084 } else {
1085 break;
1086 }
1087 }
1088
1089 found++;
1090
1091 if (idtype == P_PID)
1092 break;
1093 }
1094
1095 /*
1096 * If we found no interesting processes at all,
1097 * break out and return ECHILD.
1098 */
1099 if (found + proc_gone == 0)
1100 break;
1101
1102 if (options & WNOHANG) {
1103 mutex_exit(&pidlock);
1104 bzero(ip, sizeof (k_siginfo_t));
1105 /*
1106 * We should set ip->si_signo = SIGCLD,
1107 * but there is an SVVS test that expects
1108 * ip->si_signo to be zero in this case.
1109 */
1110 return (0);
1111 }
1112
1113 /*
1114 * If we found no processes of interest that could
1115 * change state while we wait, we don't wait at all.
1116 * Get out with ECHILD according to SVID.
1117 */
1118 if (found == proc_gone)
1119 break;
1120
1121 if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
1122 mutex_exit(&pidlock);
1123 return (EINTR);
1124 }
1125 }
1126 mutex_exit(&pidlock);
1127 return (ECHILD);
1128 }
1129
1130 int
waitsys(idtype_t idtype,id_t id,siginfo_t * infop,int options)1131 waitsys(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1132 {
1133 int error;
1134 k_siginfo_t info;
1135
1136 if (error = waitid(idtype, id, &info, options))
1137 return (set_errno(error));
1138 if (copyout(&info, infop, sizeof (k_siginfo_t)))
1139 return (set_errno(EFAULT));
1140 return (0);
1141 }
1142
1143 #ifdef _SYSCALL32_IMPL
1144
1145 int
waitsys32(idtype_t idtype,id_t id,siginfo_t * infop,int options)1146 waitsys32(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1147 {
1148 int error;
1149 k_siginfo_t info;
1150 siginfo32_t info32;
1151
1152 if (error = waitid(idtype, id, &info, options))
1153 return (set_errno(error));
1154 siginfo_kto32(&info, &info32);
1155 if (copyout(&info32, infop, sizeof (info32)))
1156 return (set_errno(EFAULT));
1157 return (0);
1158 }
1159
1160 #endif /* _SYSCALL32_IMPL */
1161
1162 void
proc_detach(proc_t * p)1163 proc_detach(proc_t *p)
1164 {
1165 proc_t *q;
1166
1167 ASSERT(MUTEX_HELD(&pidlock));
1168
1169 q = p->p_parent;
1170 ASSERT(q != NULL);
1171
1172 /*
1173 * Take it off the newstate list of its parent
1174 */
1175 delete_ns(q, p);
1176
1177 if (q->p_child == p) {
1178 q->p_child = p->p_sibling;
1179 /*
1180 * If the parent has no children, it better not
1181 * have any with new states either!
1182 */
1183 ASSERT(q->p_child ? 1 : q->p_child_ns == NULL);
1184 }
1185
1186 if (p->p_sibling) {
1187 p->p_sibling->p_psibling = p->p_psibling;
1188 }
1189
1190 if (p->p_psibling) {
1191 p->p_psibling->p_sibling = p->p_sibling;
1192 }
1193 }
1194
1195 /*
1196 * Remove zombie children from the process table.
1197 */
1198 void
freeproc(proc_t * p)1199 freeproc(proc_t *p)
1200 {
1201 proc_t *q;
1202 task_t *tk;
1203
1204 ASSERT(p->p_stat == SZOMB);
1205 ASSERT(p->p_tlist == NULL);
1206 ASSERT(MUTEX_HELD(&pidlock));
1207
1208 sigdelq(p, NULL, 0);
1209 if (p->p_killsqp) {
1210 siginfofree(p->p_killsqp);
1211 p->p_killsqp = NULL;
1212 }
1213
1214 prfree(p); /* inform /proc */
1215
1216 /*
1217 * Don't free the init processes.
1218 * Other dying processes will access it.
1219 */
1220 if (p == proc_init)
1221 return;
1222
1223
1224 /*
1225 * We wait until now to free the cred structure because a
1226 * zombie process's credentials may be examined by /proc.
1227 * No cred locking needed because there are no threads at this point.
1228 */
1229 upcount_dec(crgetruid(p->p_cred), crgetzoneid(p->p_cred));
1230 crfree(p->p_cred);
1231 if (p->p_corefile != NULL) {
1232 corectl_path_rele(p->p_corefile);
1233 p->p_corefile = NULL;
1234 }
1235 if (p->p_content != NULL) {
1236 corectl_content_rele(p->p_content);
1237 p->p_content = NULL;
1238 }
1239
1240 if (p->p_nextofkin && !((p->p_nextofkin->p_flag & SNOWAIT) ||
1241 (PTOU(p->p_nextofkin)->u_signal[SIGCLD - 1] == SIG_IGN))) {
1242 /*
1243 * This should still do the right thing since p_utime/stime
1244 * get set to the correct value on process exit, so it
1245 * should get properly updated
1246 */
1247 p->p_nextofkin->p_cutime += p->p_utime;
1248 p->p_nextofkin->p_cstime += p->p_stime;
1249
1250 p->p_nextofkin->p_cacct[LMS_USER] += p->p_acct[LMS_USER];
1251 p->p_nextofkin->p_cacct[LMS_SYSTEM] += p->p_acct[LMS_SYSTEM];
1252 p->p_nextofkin->p_cacct[LMS_TRAP] += p->p_acct[LMS_TRAP];
1253 p->p_nextofkin->p_cacct[LMS_TFAULT] += p->p_acct[LMS_TFAULT];
1254 p->p_nextofkin->p_cacct[LMS_DFAULT] += p->p_acct[LMS_DFAULT];
1255 p->p_nextofkin->p_cacct[LMS_KFAULT] += p->p_acct[LMS_KFAULT];
1256 p->p_nextofkin->p_cacct[LMS_USER_LOCK]
1257 += p->p_acct[LMS_USER_LOCK];
1258 p->p_nextofkin->p_cacct[LMS_SLEEP] += p->p_acct[LMS_SLEEP];
1259 p->p_nextofkin->p_cacct[LMS_WAIT_CPU]
1260 += p->p_acct[LMS_WAIT_CPU];
1261 p->p_nextofkin->p_cacct[LMS_STOPPED] += p->p_acct[LMS_STOPPED];
1262
1263 p->p_nextofkin->p_cru.minflt += p->p_ru.minflt;
1264 p->p_nextofkin->p_cru.majflt += p->p_ru.majflt;
1265 p->p_nextofkin->p_cru.nswap += p->p_ru.nswap;
1266 p->p_nextofkin->p_cru.inblock += p->p_ru.inblock;
1267 p->p_nextofkin->p_cru.oublock += p->p_ru.oublock;
1268 p->p_nextofkin->p_cru.msgsnd += p->p_ru.msgsnd;
1269 p->p_nextofkin->p_cru.msgrcv += p->p_ru.msgrcv;
1270 p->p_nextofkin->p_cru.nsignals += p->p_ru.nsignals;
1271 p->p_nextofkin->p_cru.nvcsw += p->p_ru.nvcsw;
1272 p->p_nextofkin->p_cru.nivcsw += p->p_ru.nivcsw;
1273 p->p_nextofkin->p_cru.sysc += p->p_ru.sysc;
1274 p->p_nextofkin->p_cru.ioch += p->p_ru.ioch;
1275
1276 }
1277
1278 q = p->p_nextofkin;
1279 if (q && q->p_orphan == p)
1280 q->p_orphan = p->p_nextorph;
1281 else if (q) {
1282 for (q = q->p_orphan; q; q = q->p_nextorph)
1283 if (q->p_nextorph == p)
1284 break;
1285 ASSERT(q && q->p_nextorph == p);
1286 q->p_nextorph = p->p_nextorph;
1287 }
1288
1289 /*
1290 * The process table slot is being freed, so it is now safe to give up
1291 * task and project membership.
1292 */
1293 mutex_enter(&p->p_lock);
1294 tk = p->p_task;
1295 task_detach(p);
1296 mutex_exit(&p->p_lock);
1297
1298 proc_detach(p);
1299 pid_exit(p, tk); /* frees pid and proc structure */
1300
1301 task_rele(tk);
1302 }
1303
1304 /*
1305 * Delete process "child" from the newstate list of process "parent"
1306 */
1307 void
delete_ns(proc_t * parent,proc_t * child)1308 delete_ns(proc_t *parent, proc_t *child)
1309 {
1310 proc_t **ns;
1311
1312 ASSERT(MUTEX_HELD(&pidlock));
1313 ASSERT(child->p_parent == parent);
1314 for (ns = &parent->p_child_ns; *ns != NULL; ns = &(*ns)->p_sibling_ns) {
1315 if (*ns == child) {
1316
1317 ASSERT((*ns)->p_parent == parent);
1318
1319 *ns = child->p_sibling_ns;
1320 child->p_sibling_ns = NULL;
1321 return;
1322 }
1323 }
1324 }
1325
1326 /*
1327 * Add process "child" to the new state list of process "parent"
1328 */
1329 void
add_ns(proc_t * parent,proc_t * child)1330 add_ns(proc_t *parent, proc_t *child)
1331 {
1332 ASSERT(child->p_sibling_ns == NULL);
1333 child->p_sibling_ns = parent->p_child_ns;
1334 parent->p_child_ns = child;
1335 }
1336