xref: /netbsd-src/sys/kern/kern_resource.c (revision 267197ec1eebfcb9810ea27a89625b6ddf68e3e7)
1 /*	$NetBSD: kern_resource.c,v 1.132 2008/01/29 12:41:59 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c) 1982, 1986, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  * (c) UNIX System Laboratories, Inc.
7  * All or some portions of this file are derived from material licensed
8  * to the University of California by American Telephone and Telegraph
9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10  * the permission of UNIX System Laboratories, Inc.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)kern_resource.c	8.8 (Berkeley) 2/14/95
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: kern_resource.c,v 1.132 2008/01/29 12:41:59 yamt Exp $");
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/file.h>
46 #include <sys/resourcevar.h>
47 #include <sys/malloc.h>
48 #include <sys/kmem.h>
49 #include <sys/namei.h>
50 #include <sys/pool.h>
51 #include <sys/proc.h>
52 #include <sys/sysctl.h>
53 #include <sys/timevar.h>
54 #include <sys/kauth.h>
55 #include <sys/atomic.h>
56 #include <sys/mount.h>
57 #include <sys/syscallargs.h>
58 
59 #include <uvm/uvm_extern.h>
60 
61 /*
62  * Maximum process data and stack limits.
63  * They are variables so they are patchable.
64  */
65 rlim_t maxdmap = MAXDSIZ;
66 rlim_t maxsmap = MAXSSIZ;
67 
68 struct uihashhead *uihashtbl;
69 u_long uihash;		/* size of hash table - 1 */
70 kmutex_t uihashtbl_lock;
71 
72 static pool_cache_t plimit_cache;
73 static pool_cache_t pstats_cache;
74 
75 void
76 resource_init(void)
77 {
78 
79 	plimit_cache = pool_cache_init(sizeof(struct plimit), 0, 0, 0,
80 	    "plimitpl", NULL, IPL_NONE, NULL, NULL, NULL);
81 	pstats_cache = pool_cache_init(sizeof(struct pstats), 0, 0, 0,
82 	    "pstatspl", NULL, IPL_NONE, NULL, NULL, NULL);
83 }
84 
85 /*
86  * Resource controls and accounting.
87  */
88 
89 int
90 sys_getpriority(struct lwp *l, const struct sys_getpriority_args *uap, register_t *retval)
91 {
92 	/* {
93 		syscallarg(int) which;
94 		syscallarg(id_t) who;
95 	} */
96 	struct proc *curp = l->l_proc, *p;
97 	int low = NZERO + PRIO_MAX + 1;
98 	int who = SCARG(uap, who);
99 
100 	mutex_enter(&proclist_lock);
101 	switch (SCARG(uap, which)) {
102 	case PRIO_PROCESS:
103 		if (who == 0)
104 			p = curp;
105 		else
106 			p = p_find(who, PFIND_LOCKED);
107 		if (p != NULL)
108 			low = p->p_nice;
109 		break;
110 
111 	case PRIO_PGRP: {
112 		struct pgrp *pg;
113 
114 		if (who == 0)
115 			pg = curp->p_pgrp;
116 		else if ((pg = pg_find(who, PFIND_LOCKED)) == NULL)
117 			break;
118 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
119 			if (p->p_nice < low)
120 				low = p->p_nice;
121 		}
122 		break;
123 	}
124 
125 	case PRIO_USER:
126 		if (who == 0)
127 			who = (int)kauth_cred_geteuid(l->l_cred);
128 		PROCLIST_FOREACH(p, &allproc) {
129 			mutex_enter(&p->p_mutex);
130 			if (kauth_cred_geteuid(p->p_cred) ==
131 			    (uid_t)who && p->p_nice < low)
132 				low = p->p_nice;
133 			mutex_exit(&p->p_mutex);
134 		}
135 		break;
136 
137 	default:
138 		mutex_exit(&proclist_lock);
139 		return (EINVAL);
140 	}
141 	mutex_exit(&proclist_lock);
142 
143 	if (low == NZERO + PRIO_MAX + 1)
144 		return (ESRCH);
145 	*retval = low - NZERO;
146 	return (0);
147 }
148 
149 /* ARGSUSED */
150 int
151 sys_setpriority(struct lwp *l, const struct sys_setpriority_args *uap, register_t *retval)
152 {
153 	/* {
154 		syscallarg(int) which;
155 		syscallarg(id_t) who;
156 		syscallarg(int) prio;
157 	} */
158 	struct proc *curp = l->l_proc, *p;
159 	int found = 0, error = 0;
160 	int who = SCARG(uap, who);
161 
162 	mutex_enter(&proclist_lock);
163 	switch (SCARG(uap, which)) {
164 	case PRIO_PROCESS:
165 		if (who == 0)
166 			p = curp;
167 		else
168 			p = p_find(who, PFIND_LOCKED);
169 		if (p != 0) {
170 			mutex_enter(&p->p_mutex);
171 			error = donice(l, p, SCARG(uap, prio));
172 			mutex_exit(&p->p_mutex);
173 		}
174 		found++;
175 		break;
176 
177 	case PRIO_PGRP: {
178 		struct pgrp *pg;
179 
180 		if (who == 0)
181 			pg = curp->p_pgrp;
182 		else if ((pg = pg_find(who, PFIND_LOCKED)) == NULL)
183 			break;
184 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
185 			mutex_enter(&p->p_mutex);
186 			error = donice(l, p, SCARG(uap, prio));
187 			mutex_exit(&p->p_mutex);
188 			found++;
189 		}
190 		break;
191 	}
192 
193 	case PRIO_USER:
194 		if (who == 0)
195 			who = (int)kauth_cred_geteuid(l->l_cred);
196 		PROCLIST_FOREACH(p, &allproc) {
197 			mutex_enter(&p->p_mutex);
198 			if (kauth_cred_geteuid(p->p_cred) ==
199 			    (uid_t)SCARG(uap, who)) {
200 				error = donice(l, p, SCARG(uap, prio));
201 				found++;
202 			}
203 			mutex_exit(&p->p_mutex);
204 		}
205 		break;
206 
207 	default:
208 		error = EINVAL;
209 		break;
210 	}
211 	mutex_exit(&proclist_lock);
212 	if (found == 0)
213 		return (ESRCH);
214 	return (error);
215 }
216 
217 /*
218  * Renice a process.
219  *
220  * Call with the target process' credentials locked.
221  */
222 int
223 donice(struct lwp *l, struct proc *chgp, int n)
224 {
225 	kauth_cred_t cred = l->l_cred;
226 	int onice;
227 
228 	KASSERT(mutex_owned(&chgp->p_mutex));
229 
230 	if (n > PRIO_MAX)
231 		n = PRIO_MAX;
232 	if (n < PRIO_MIN)
233 		n = PRIO_MIN;
234 	n += NZERO;
235 	onice = chgp->p_nice;
236 	onice = chgp->p_nice;
237 
238   again:
239 	if (kauth_authorize_process(cred, KAUTH_PROCESS_NICE, chgp,
240 	    KAUTH_ARG(n), NULL, NULL))
241 		return (EACCES);
242 	mutex_spin_enter(&chgp->p_smutex);
243 	if (onice != chgp->p_nice) {
244 		mutex_spin_exit(&chgp->p_smutex);
245 		goto again;
246 	}
247 	sched_nice(chgp, n);
248 	mutex_spin_exit(&chgp->p_smutex);
249 	return (0);
250 }
251 
252 /* ARGSUSED */
253 int
254 sys_setrlimit(struct lwp *l, const struct sys_setrlimit_args *uap, register_t *retval)
255 {
256 	/* {
257 		syscallarg(int) which;
258 		syscallarg(const struct rlimit *) rlp;
259 	} */
260 	int which = SCARG(uap, which);
261 	struct rlimit alim;
262 	int error;
263 
264 	error = copyin(SCARG(uap, rlp), &alim, sizeof(struct rlimit));
265 	if (error)
266 		return (error);
267 	return (dosetrlimit(l, l->l_proc, which, &alim));
268 }
269 
270 int
271 dosetrlimit(struct lwp *l, struct proc *p, int which, struct rlimit *limp)
272 {
273 	struct rlimit *alimp;
274 	int error;
275 
276 	if ((u_int)which >= RLIM_NLIMITS)
277 		return (EINVAL);
278 
279 	if (limp->rlim_cur < 0 || limp->rlim_max < 0)
280 		return (EINVAL);
281 
282 	if (limp->rlim_cur > limp->rlim_max) {
283 		/*
284 		 * This is programming error. According to SUSv2, we should
285 		 * return error in this case.
286 		 */
287 		return (EINVAL);
288 	}
289 
290 	alimp = &p->p_rlimit[which];
291 	/* if we don't change the value, no need to limcopy() */
292 	if (limp->rlim_cur == alimp->rlim_cur &&
293 	    limp->rlim_max == alimp->rlim_max)
294 		return 0;
295 
296 	error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
297 	    p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_SET), limp, KAUTH_ARG(which));
298 	if (error)
299 		return (error);
300 
301 	lim_privatise(p, false);
302 	/* p->p_limit is now unchangeable */
303 	alimp = &p->p_rlimit[which];
304 
305 	switch (which) {
306 
307 	case RLIMIT_DATA:
308 		if (limp->rlim_cur > maxdmap)
309 			limp->rlim_cur = maxdmap;
310 		if (limp->rlim_max > maxdmap)
311 			limp->rlim_max = maxdmap;
312 		break;
313 
314 	case RLIMIT_STACK:
315 		if (limp->rlim_cur > maxsmap)
316 			limp->rlim_cur = maxsmap;
317 		if (limp->rlim_max > maxsmap)
318 			limp->rlim_max = maxsmap;
319 
320 		/*
321 		 * Return EINVAL if the new stack size limit is lower than
322 		 * current usage. Otherwise, the process would get SIGSEGV the
323 		 * moment it would try to access anything on it's current stack.
324 		 * This conforms to SUSv2.
325 		 */
326 		if (limp->rlim_cur < p->p_vmspace->vm_ssize * PAGE_SIZE
327 		    || limp->rlim_max < p->p_vmspace->vm_ssize * PAGE_SIZE) {
328 			return (EINVAL);
329 		}
330 
331 		/*
332 		 * Stack is allocated to the max at exec time with
333 		 * only "rlim_cur" bytes accessible (In other words,
334 		 * allocates stack dividing two contiguous regions at
335 		 * "rlim_cur" bytes boundary).
336 		 *
337 		 * Since allocation is done in terms of page, roundup
338 		 * "rlim_cur" (otherwise, contiguous regions
339 		 * overlap).  If stack limit is going up make more
340 		 * accessible, if going down make inaccessible.
341 		 */
342 		limp->rlim_cur = round_page(limp->rlim_cur);
343 		if (limp->rlim_cur != alimp->rlim_cur) {
344 			vaddr_t addr;
345 			vsize_t size;
346 			vm_prot_t prot;
347 
348 			if (limp->rlim_cur > alimp->rlim_cur) {
349 				prot = VM_PROT_READ | VM_PROT_WRITE;
350 				size = limp->rlim_cur - alimp->rlim_cur;
351 				addr = (vaddr_t)p->p_vmspace->vm_minsaddr -
352 				    limp->rlim_cur;
353 			} else {
354 				prot = VM_PROT_NONE;
355 				size = alimp->rlim_cur - limp->rlim_cur;
356 				addr = (vaddr_t)p->p_vmspace->vm_minsaddr -
357 				     alimp->rlim_cur;
358 			}
359 			(void) uvm_map_protect(&p->p_vmspace->vm_map,
360 			    addr, addr+size, prot, false);
361 		}
362 		break;
363 
364 	case RLIMIT_NOFILE:
365 		if (limp->rlim_cur > maxfiles)
366 			limp->rlim_cur = maxfiles;
367 		if (limp->rlim_max > maxfiles)
368 			limp->rlim_max = maxfiles;
369 		break;
370 
371 	case RLIMIT_NPROC:
372 		if (limp->rlim_cur > maxproc)
373 			limp->rlim_cur = maxproc;
374 		if (limp->rlim_max > maxproc)
375 			limp->rlim_max = maxproc;
376 		break;
377 	}
378 
379 	mutex_enter(&p->p_limit->pl_lock);
380 	*alimp = *limp;
381 	mutex_exit(&p->p_limit->pl_lock);
382 	return (0);
383 }
384 
385 /* ARGSUSED */
386 int
387 sys_getrlimit(struct lwp *l, const struct sys_getrlimit_args *uap, register_t *retval)
388 {
389 	/* {
390 		syscallarg(int) which;
391 		syscallarg(struct rlimit *) rlp;
392 	} */
393 	struct proc *p = l->l_proc;
394 	int which = SCARG(uap, which);
395 	struct rlimit rl;
396 
397 	if ((u_int)which >= RLIM_NLIMITS)
398 		return (EINVAL);
399 
400 	mutex_enter(&p->p_mutex);
401 	memcpy(&rl, &p->p_rlimit[which], sizeof(rl));
402 	mutex_exit(&p->p_mutex);
403 
404 	return copyout(&rl, SCARG(uap, rlp), sizeof(rl));
405 }
406 
407 /*
408  * Transform the running time and tick information in proc p into user,
409  * system, and interrupt time usage.
410  *
411  * Should be called with p->p_smutex held unless called from exit1().
412  */
413 void
414 calcru(struct proc *p, struct timeval *up, struct timeval *sp,
415     struct timeval *ip, struct timeval *rp)
416 {
417 	uint64_t u, st, ut, it, tot;
418 	struct lwp *l;
419 	struct bintime tm;
420 	struct timeval tv;
421 
422 	mutex_spin_enter(&p->p_stmutex);
423 	st = p->p_sticks;
424 	ut = p->p_uticks;
425 	it = p->p_iticks;
426 	mutex_spin_exit(&p->p_stmutex);
427 
428 	tm = p->p_rtime;
429 
430 	LIST_FOREACH(l, &p->p_lwps, l_sibling) {
431 		lwp_lock(l);
432 		bintime_add(&tm, &l->l_rtime);
433 		if ((l->l_flag & LW_RUNNING) != 0) {
434 			struct bintime diff;
435 			/*
436 			 * Adjust for the current time slice.  This is
437 			 * actually fairly important since the error
438 			 * here is on the order of a time quantum,
439 			 * which is much greater than the sampling
440 			 * error.
441 			 */
442 			binuptime(&diff);
443 			bintime_sub(&diff, &l->l_stime);
444 			bintime_add(&tm, &diff);
445 		}
446 		lwp_unlock(l);
447 	}
448 
449 	tot = st + ut + it;
450 	bintime2timeval(&tm, &tv);
451 	u = (uint64_t)tv.tv_sec * 1000000ul + tv.tv_usec;
452 
453 	if (tot == 0) {
454 		/* No ticks, so can't use to share time out, split 50-50 */
455 		st = ut = u / 2;
456 	} else {
457 		st = (u * st) / tot;
458 		ut = (u * ut) / tot;
459 	}
460 	if (sp != NULL) {
461 		sp->tv_sec = st / 1000000;
462 		sp->tv_usec = st % 1000000;
463 	}
464 	if (up != NULL) {
465 		up->tv_sec = ut / 1000000;
466 		up->tv_usec = ut % 1000000;
467 	}
468 	if (ip != NULL) {
469 		if (it != 0)
470 			it = (u * it) / tot;
471 		ip->tv_sec = it / 1000000;
472 		ip->tv_usec = it % 1000000;
473 	}
474 	if (rp != NULL) {
475 		*rp = tv;
476 	}
477 }
478 
479 /* ARGSUSED */
480 int
481 sys_getrusage(struct lwp *l, const struct sys_getrusage_args *uap, register_t *retval)
482 {
483 	/* {
484 		syscallarg(int) who;
485 		syscallarg(struct rusage *) rusage;
486 	} */
487 	struct rusage ru;
488 	struct proc *p = l->l_proc;
489 
490 	switch (SCARG(uap, who)) {
491 	case RUSAGE_SELF:
492 		mutex_enter(&p->p_smutex);
493 		memcpy(&ru, &p->p_stats->p_ru, sizeof(ru));
494 		calcru(p, &ru.ru_utime, &ru.ru_stime, NULL, NULL);
495 		mutex_exit(&p->p_smutex);
496 		break;
497 
498 	case RUSAGE_CHILDREN:
499 		mutex_enter(&p->p_smutex);
500 		memcpy(&ru, &p->p_stats->p_cru, sizeof(ru));
501 		mutex_exit(&p->p_smutex);
502 		break;
503 
504 	default:
505 		return EINVAL;
506 	}
507 
508 	return copyout(&ru, SCARG(uap, rusage), sizeof(ru));
509 }
510 
511 void
512 ruadd(struct rusage *ru, struct rusage *ru2)
513 {
514 	long *ip, *ip2;
515 	int i;
516 
517 	timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
518 	timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
519 	if (ru->ru_maxrss < ru2->ru_maxrss)
520 		ru->ru_maxrss = ru2->ru_maxrss;
521 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
522 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
523 		*ip++ += *ip2++;
524 }
525 
526 /*
527  * Make a copy of the plimit structure.
528  * We share these structures copy-on-write after fork,
529  * and copy when a limit is changed.
530  *
531  * Unfortunately (due to PL_SHAREMOD) it is possibly for the structure
532  * we are copying to change beneath our feet!
533  */
534 struct plimit *
535 lim_copy(struct plimit *lim)
536 {
537 	struct plimit *newlim;
538 	char *corename;
539 	size_t alen, len;
540 
541 	newlim = pool_cache_get(plimit_cache, PR_WAITOK);
542 	mutex_init(&newlim->pl_lock, MUTEX_DEFAULT, IPL_NONE);
543 	newlim->pl_flags = 0;
544 	newlim->pl_refcnt = 1;
545 	newlim->pl_sv_limit = NULL;
546 
547 	mutex_enter(&lim->pl_lock);
548 	memcpy(newlim->pl_rlimit, lim->pl_rlimit,
549 	    sizeof(struct rlimit) * RLIM_NLIMITS);
550 
551 	alen = 0;
552 	corename = NULL;
553 	for (;;) {
554 		if (lim->pl_corename == defcorename) {
555 			newlim->pl_corename = defcorename;
556 			break;
557 		}
558 		len = strlen(lim->pl_corename) + 1;
559 		if (len <= alen) {
560 			newlim->pl_corename = corename;
561 			memcpy(corename, lim->pl_corename, len);
562 			corename = NULL;
563 			break;
564 		}
565 		mutex_exit(&lim->pl_lock);
566 		if (corename != NULL)
567 			free(corename, M_TEMP);
568 		alen = len;
569 		corename = malloc(alen, M_TEMP, M_WAITOK);
570 		mutex_enter(&lim->pl_lock);
571 	}
572 	mutex_exit(&lim->pl_lock);
573 	if (corename != NULL)
574 		free(corename, M_TEMP);
575 	return newlim;
576 }
577 
578 void
579 lim_addref(struct plimit *lim)
580 {
581 	atomic_inc_uint(&lim->pl_refcnt);
582 }
583 
584 /*
585  * Give a process it's own private plimit structure.
586  * This will only be shared (in fork) if modifications are to be shared.
587  */
588 void
589 lim_privatise(struct proc *p, bool set_shared)
590 {
591 	struct plimit *lim, *newlim;
592 
593 	lim = p->p_limit;
594 	if (lim->pl_flags & PL_WRITEABLE) {
595 		if (set_shared)
596 			lim->pl_flags |= PL_SHAREMOD;
597 		return;
598 	}
599 
600 	if (set_shared && lim->pl_flags & PL_SHAREMOD)
601 		return;
602 
603 	newlim = lim_copy(lim);
604 
605 	mutex_enter(&p->p_mutex);
606 	if (p->p_limit->pl_flags & PL_WRITEABLE) {
607 		/* Someone crept in while we were busy */
608 		mutex_exit(&p->p_mutex);
609 		limfree(newlim);
610 		if (set_shared)
611 			p->p_limit->pl_flags |= PL_SHAREMOD;
612 		return;
613 	}
614 
615 	/*
616 	 * Since most accesses to p->p_limit aren't locked, we must not
617 	 * delete the old limit structure yet.
618 	 */
619 	newlim->pl_sv_limit = p->p_limit;
620 	newlim->pl_flags |= PL_WRITEABLE;
621 	if (set_shared)
622 		newlim->pl_flags |= PL_SHAREMOD;
623 	p->p_limit = newlim;
624 	mutex_exit(&p->p_mutex);
625 }
626 
627 void
628 limfree(struct plimit *lim)
629 {
630 	struct plimit *sv_lim;
631 
632 	do {
633 		if (atomic_dec_uint_nv(&lim->pl_refcnt) > 0)
634 			return;
635 		if (lim->pl_corename != defcorename)
636 			free(lim->pl_corename, M_TEMP);
637 		sv_lim = lim->pl_sv_limit;
638 		mutex_destroy(&lim->pl_lock);
639 		pool_cache_put(plimit_cache, lim);
640 	} while ((lim = sv_lim) != NULL);
641 }
642 
643 struct pstats *
644 pstatscopy(struct pstats *ps)
645 {
646 
647 	struct pstats *newps;
648 
649 	newps = pool_cache_get(pstats_cache, PR_WAITOK);
650 
651 	memset(&newps->pstat_startzero, 0,
652 	(unsigned) ((char *)&newps->pstat_endzero -
653 		    (char *)&newps->pstat_startzero));
654 	memcpy(&newps->pstat_startcopy, &ps->pstat_startcopy,
655 	((char *)&newps->pstat_endcopy -
656 	 (char *)&newps->pstat_startcopy));
657 
658 	return (newps);
659 
660 }
661 
662 void
663 pstatsfree(struct pstats *ps)
664 {
665 
666 	pool_cache_put(pstats_cache, ps);
667 }
668 
669 /*
670  * sysctl interface in five parts
671  */
672 
673 /*
674  * a routine for sysctl proc subtree helpers that need to pick a valid
675  * process by pid.
676  */
677 static int
678 sysctl_proc_findproc(struct lwp *l, struct proc **p2, pid_t pid)
679 {
680 	struct proc *ptmp;
681 	int error = 0;
682 
683 	if (pid == PROC_CURPROC)
684 		ptmp = l->l_proc;
685 	else if ((ptmp = pfind(pid)) == NULL)
686 		error = ESRCH;
687 
688 	*p2 = ptmp;
689 	return (error);
690 }
691 
692 /*
693  * sysctl helper routine for setting a process's specific corefile
694  * name.  picks the process based on the given pid and checks the
695  * correctness of the new value.
696  */
697 static int
698 sysctl_proc_corename(SYSCTLFN_ARGS)
699 {
700 	struct proc *ptmp;
701 	struct plimit *lim;
702 	int error = 0, len;
703 	char *cname;
704 	char *ocore;
705 	char *tmp;
706 	struct sysctlnode node;
707 
708 	/*
709 	 * is this all correct?
710 	 */
711 	if (namelen != 0)
712 		return (EINVAL);
713 	if (name[-1] != PROC_PID_CORENAME)
714 		return (EINVAL);
715 
716 	/*
717 	 * whom are we tweaking?
718 	 */
719 	error = sysctl_proc_findproc(l, &ptmp, (pid_t)name[-2]);
720 	if (error)
721 		return (error);
722 
723 	/* XXX-elad */
724 	error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, ptmp,
725 	    KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
726 	if (error)
727 		return (error);
728 
729 	if (newp == NULL) {
730 		error = kauth_authorize_process(l->l_cred,
731 		    KAUTH_PROCESS_CORENAME, ptmp,
732 		    KAUTH_ARG(KAUTH_REQ_PROCESS_CORENAME_GET), NULL, NULL);
733 		if (error)
734 			return (error);
735 	}
736 
737 	/*
738 	 * let them modify a temporary copy of the core name
739 	 */
740 	cname = PNBUF_GET();
741 	lim = ptmp->p_limit;
742 	mutex_enter(&lim->pl_lock);
743 	strlcpy(cname, lim->pl_corename, MAXPATHLEN);
744 	mutex_exit(&lim->pl_lock);
745 
746 	node = *rnode;
747 	node.sysctl_data = cname;
748 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
749 
750 	/*
751 	 * if that failed, or they have nothing new to say, or we've
752 	 * heard it before...
753 	 */
754 	if (error || newp == NULL)
755 		goto done;
756 	lim = ptmp->p_limit;
757 	mutex_enter(&lim->pl_lock);
758 	error = strcmp(cname, lim->pl_corename);
759 	mutex_exit(&lim->pl_lock);
760 	if (error == 0)
761 		/* Unchanged */
762 		goto done;
763 
764 	error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CORENAME,
765 	    ptmp, KAUTH_ARG(KAUTH_REQ_PROCESS_CORENAME_SET), cname, NULL);
766 	if (error)
767 		return (error);
768 
769 	/*
770 	 * no error yet and cname now has the new core name in it.
771 	 * let's see if it looks acceptable.  it must be either "core"
772 	 * or end in ".core" or "/core".
773 	 */
774 	len = strlen(cname);
775 	if (len < 4) {
776 		error = EINVAL;
777 	} else if (strcmp(cname + len - 4, "core") != 0) {
778 		error = EINVAL;
779 	} else if (len > 4 && cname[len - 5] != '/' && cname[len - 5] != '.') {
780 		error = EINVAL;
781 	}
782 	if (error != 0) {
783 		goto done;
784 	}
785 
786 	/*
787 	 * hmm...looks good.  now...where do we put it?
788 	 */
789 	tmp = malloc(len + 1, M_TEMP, M_WAITOK|M_CANFAIL);
790 	if (tmp == NULL) {
791 		error = ENOMEM;
792 		goto done;
793 	}
794 	memcpy(tmp, cname, len + 1);
795 
796 	lim_privatise(ptmp, false);
797 	lim = ptmp->p_limit;
798 	mutex_enter(&lim->pl_lock);
799 	ocore = lim->pl_corename;
800 	lim->pl_corename = tmp;
801 	mutex_exit(&lim->pl_lock);
802 	if (ocore != defcorename)
803 		free(ocore, M_TEMP);
804 
805 done:
806 	PNBUF_PUT(cname);
807 	return error;
808 }
809 
810 /*
811  * sysctl helper routine for checking/setting a process's stop flags,
812  * one for fork and one for exec.
813  */
814 static int
815 sysctl_proc_stop(SYSCTLFN_ARGS)
816 {
817 	struct proc *ptmp;
818 	int i, f, error = 0;
819 	struct sysctlnode node;
820 
821 	if (namelen != 0)
822 		return (EINVAL);
823 
824 	error = sysctl_proc_findproc(l, &ptmp, (pid_t)name[-2]);
825 	if (error)
826 		return (error);
827 
828 	/* XXX-elad */
829 	error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, ptmp,
830 	    KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
831 	if (error)
832 		return (error);
833 
834 	switch (rnode->sysctl_num) {
835 	case PROC_PID_STOPFORK:
836 		f = PS_STOPFORK;
837 		break;
838 	case PROC_PID_STOPEXEC:
839 		f = PS_STOPEXEC;
840 		break;
841 	case PROC_PID_STOPEXIT:
842 		f = PS_STOPEXIT;
843 		break;
844 	default:
845 		return (EINVAL);
846 	}
847 
848 	i = (ptmp->p_flag & f) ? 1 : 0;
849 	node = *rnode;
850 	node.sysctl_data = &i;
851 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
852 	if (error || newp == NULL)
853 		return (error);
854 
855 	mutex_enter(&ptmp->p_smutex);
856 	error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_STOPFLAG,
857 	    ptmp, KAUTH_ARG(f), NULL, NULL);
858 	if (error)
859 		return (error);
860 	if (i)
861 		ptmp->p_sflag |= f;
862 	else
863 		ptmp->p_sflag &= ~f;
864 	mutex_exit(&ptmp->p_smutex);
865 
866 	return (0);
867 }
868 
869 /*
870  * sysctl helper routine for a process's rlimits as exposed by sysctl.
871  */
872 static int
873 sysctl_proc_plimit(SYSCTLFN_ARGS)
874 {
875 	struct proc *ptmp;
876 	u_int limitno;
877 	int which, error = 0;
878         struct rlimit alim;
879 	struct sysctlnode node;
880 
881 	if (namelen != 0)
882 		return (EINVAL);
883 
884 	which = name[-1];
885 	if (which != PROC_PID_LIMIT_TYPE_SOFT &&
886 	    which != PROC_PID_LIMIT_TYPE_HARD)
887 		return (EINVAL);
888 
889 	limitno = name[-2] - 1;
890 	if (limitno >= RLIM_NLIMITS)
891 		return (EINVAL);
892 
893 	if (name[-3] != PROC_PID_LIMIT)
894 		return (EINVAL);
895 
896 	error = sysctl_proc_findproc(l, &ptmp, (pid_t)name[-4]);
897 	if (error)
898 		return (error);
899 
900 	/* XXX-elad */
901 	error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, ptmp,
902 	    KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
903 	if (error)
904 		return (error);
905 
906 	/* Check if we can view limits. */
907 	if (newp == NULL) {
908 		error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
909 		    ptmp, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_GET), &alim,
910 		    KAUTH_ARG(which));
911 		if (error)
912 			return (error);
913 	}
914 
915 	node = *rnode;
916 	memcpy(&alim, &ptmp->p_rlimit[limitno], sizeof(alim));
917 	if (which == PROC_PID_LIMIT_TYPE_HARD)
918 		node.sysctl_data = &alim.rlim_max;
919 	else
920 		node.sysctl_data = &alim.rlim_cur;
921 
922 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
923 	if (error || newp == NULL)
924 		return (error);
925 
926 	return (dosetrlimit(l, ptmp, limitno, &alim));
927 }
928 
929 /*
930  * and finally, the actually glue that sticks it to the tree
931  */
932 SYSCTL_SETUP(sysctl_proc_setup, "sysctl proc subtree setup")
933 {
934 
935 	sysctl_createv(clog, 0, NULL, NULL,
936 		       CTLFLAG_PERMANENT,
937 		       CTLTYPE_NODE, "proc", NULL,
938 		       NULL, 0, NULL, 0,
939 		       CTL_PROC, CTL_EOL);
940 	sysctl_createv(clog, 0, NULL, NULL,
941 		       CTLFLAG_PERMANENT|CTLFLAG_ANYNUMBER,
942 		       CTLTYPE_NODE, "curproc",
943 		       SYSCTL_DESCR("Per-process settings"),
944 		       NULL, 0, NULL, 0,
945 		       CTL_PROC, PROC_CURPROC, CTL_EOL);
946 
947 	sysctl_createv(clog, 0, NULL, NULL,
948 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
949 		       CTLTYPE_STRING, "corename",
950 		       SYSCTL_DESCR("Core file name"),
951 		       sysctl_proc_corename, 0, NULL, MAXPATHLEN,
952 		       CTL_PROC, PROC_CURPROC, PROC_PID_CORENAME, CTL_EOL);
953 	sysctl_createv(clog, 0, NULL, NULL,
954 		       CTLFLAG_PERMANENT,
955 		       CTLTYPE_NODE, "rlimit",
956 		       SYSCTL_DESCR("Process limits"),
957 		       NULL, 0, NULL, 0,
958 		       CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, CTL_EOL);
959 
960 #define create_proc_plimit(s, n) do {					\
961 	sysctl_createv(clog, 0, NULL, NULL,				\
962 		       CTLFLAG_PERMANENT,				\
963 		       CTLTYPE_NODE, s,					\
964 		       SYSCTL_DESCR("Process " s " limits"),		\
965 		       NULL, 0, NULL, 0,				\
966 		       CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n,	\
967 		       CTL_EOL);					\
968 	sysctl_createv(clog, 0, NULL, NULL,				\
969 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, \
970 		       CTLTYPE_QUAD, "soft",				\
971 		       SYSCTL_DESCR("Process soft " s " limit"),	\
972 		       sysctl_proc_plimit, 0, NULL, 0,			\
973 		       CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n,	\
974 		       PROC_PID_LIMIT_TYPE_SOFT, CTL_EOL);		\
975 	sysctl_createv(clog, 0, NULL, NULL,				\
976 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, \
977 		       CTLTYPE_QUAD, "hard",				\
978 		       SYSCTL_DESCR("Process hard " s " limit"),	\
979 		       sysctl_proc_plimit, 0, NULL, 0,			\
980 		       CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n,	\
981 		       PROC_PID_LIMIT_TYPE_HARD, CTL_EOL);		\
982 	} while (0/*CONSTCOND*/)
983 
984 	create_proc_plimit("cputime",		PROC_PID_LIMIT_CPU);
985 	create_proc_plimit("filesize",		PROC_PID_LIMIT_FSIZE);
986 	create_proc_plimit("datasize",		PROC_PID_LIMIT_DATA);
987 	create_proc_plimit("stacksize",		PROC_PID_LIMIT_STACK);
988 	create_proc_plimit("coredumpsize",	PROC_PID_LIMIT_CORE);
989 	create_proc_plimit("memoryuse",		PROC_PID_LIMIT_RSS);
990 	create_proc_plimit("memorylocked",	PROC_PID_LIMIT_MEMLOCK);
991 	create_proc_plimit("maxproc",		PROC_PID_LIMIT_NPROC);
992 	create_proc_plimit("descriptors",	PROC_PID_LIMIT_NOFILE);
993 	create_proc_plimit("sbsize",		PROC_PID_LIMIT_SBSIZE);
994 
995 #undef create_proc_plimit
996 
997 	sysctl_createv(clog, 0, NULL, NULL,
998 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
999 		       CTLTYPE_INT, "stopfork",
1000 		       SYSCTL_DESCR("Stop process at fork(2)"),
1001 		       sysctl_proc_stop, 0, NULL, 0,
1002 		       CTL_PROC, PROC_CURPROC, PROC_PID_STOPFORK, CTL_EOL);
1003 	sysctl_createv(clog, 0, NULL, NULL,
1004 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
1005 		       CTLTYPE_INT, "stopexec",
1006 		       SYSCTL_DESCR("Stop process at execve(2)"),
1007 		       sysctl_proc_stop, 0, NULL, 0,
1008 		       CTL_PROC, PROC_CURPROC, PROC_PID_STOPEXEC, CTL_EOL);
1009 	sysctl_createv(clog, 0, NULL, NULL,
1010 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
1011 		       CTLTYPE_INT, "stopexit",
1012 		       SYSCTL_DESCR("Stop process before completing exit"),
1013 		       sysctl_proc_stop, 0, NULL, 0,
1014 		       CTL_PROC, PROC_CURPROC, PROC_PID_STOPEXIT, CTL_EOL);
1015 }
1016 
1017 void
1018 uid_init(void)
1019 {
1020 
1021 	/*
1022 	 * XXXSMP This could be at IPL_SOFTNET, but for now we want
1023 	 * to to be deadlock free, so it must be at IPL_VM.
1024 	 */
1025 	mutex_init(&uihashtbl_lock, MUTEX_DEFAULT, IPL_VM);
1026 
1027 	/*
1028 	 * Ensure that uid 0 is always in the user hash table, as
1029 	 * sbreserve() expects it available from interrupt context.
1030 	 */
1031 	(void)uid_find(0);
1032 }
1033 
1034 struct uidinfo *
1035 uid_find(uid_t uid)
1036 {
1037 	struct uidinfo *uip;
1038 	struct uidinfo *newuip = NULL;
1039 	struct uihashhead *uipp;
1040 
1041 	uipp = UIHASH(uid);
1042 
1043 again:
1044 	mutex_enter(&uihashtbl_lock);
1045 	LIST_FOREACH(uip, uipp, ui_hash)
1046 		if (uip->ui_uid == uid) {
1047 			mutex_exit(&uihashtbl_lock);
1048 			if (newuip) {
1049 				mutex_destroy(&newuip->ui_lock);
1050 				kmem_free(newuip, sizeof(*newuip));
1051 			}
1052 			return uip;
1053 		}
1054 	if (newuip == NULL) {
1055 		mutex_exit(&uihashtbl_lock);
1056 		/* Must not be called from interrupt context. */
1057 		newuip = kmem_zalloc(sizeof(*newuip), KM_SLEEP);
1058 		/* XXX this could be IPL_SOFTNET */
1059 		mutex_init(&newuip->ui_lock, MUTEX_DEFAULT, IPL_VM);
1060 		goto again;
1061 	}
1062 	uip = newuip;
1063 
1064 	LIST_INSERT_HEAD(uipp, uip, ui_hash);
1065 	uip->ui_uid = uid;
1066 	mutex_exit(&uihashtbl_lock);
1067 
1068 	return uip;
1069 }
1070 
1071 /*
1072  * Change the count associated with number of processes
1073  * a given user is using.
1074  */
1075 int
1076 chgproccnt(uid_t uid, int diff)
1077 {
1078 	struct uidinfo *uip;
1079 
1080 	if (diff == 0)
1081 		return 0;
1082 
1083 	uip = uid_find(uid);
1084 	mutex_enter(&uip->ui_lock);
1085 	uip->ui_proccnt += diff;
1086 	KASSERT(uip->ui_proccnt >= 0);
1087 	mutex_exit(&uip->ui_lock);
1088 	return uip->ui_proccnt;
1089 }
1090 
1091 int
1092 chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t xmax)
1093 {
1094 	rlim_t nsb;
1095 
1096 	mutex_enter(&uip->ui_lock);
1097 	nsb = uip->ui_sbsize + to - *hiwat;
1098 	if (to > *hiwat && nsb > xmax) {
1099 		mutex_exit(&uip->ui_lock);
1100 		return 0;
1101 	}
1102 	*hiwat = to;
1103 	uip->ui_sbsize = nsb;
1104 	KASSERT(uip->ui_sbsize >= 0);
1105 	mutex_exit(&uip->ui_lock);
1106 	return 1;
1107 }
1108