xref: /dflybsd-src/sys/kern/kern_resource.c (revision 6c7d9e6aea3e6576cd6ce8fe650e679d71d24861)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
35  * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $
36  */
37 
38 #include "opt_compat.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/sysproto.h>
43 #include <sys/file.h>
44 #include <sys/kern_syscall.h>
45 #include <sys/kernel.h>
46 #include <sys/resourcevar.h>
47 #include <sys/malloc.h>
48 #include <sys/proc.h>
49 #include <sys/priv.h>
50 #include <sys/time.h>
51 #include <sys/lockf.h>
52 
53 #include <vm/vm.h>
54 #include <vm/vm_param.h>
55 #include <sys/lock.h>
56 #include <vm/pmap.h>
57 #include <vm/vm_map.h>
58 
59 #include <sys/thread2.h>
60 #include <sys/spinlock2.h>
61 
62 static int donice (struct proc *chgp, int n);
63 static int doionice (struct proc *chgp, int n);
64 
65 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
66 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
67 static struct spinlock uihash_lock;
68 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
69 static u_long uihash;		/* size of hash table - 1 */
70 
71 static struct uidinfo	*uicreate (uid_t uid);
72 static struct uidinfo	*uilookup (uid_t uid);
73 
74 /*
75  * Resource controls and accounting.
76  */
77 
78 struct getpriority_info {
79 	int low;
80 	int who;
81 };
82 
83 static int getpriority_callback(struct proc *p, void *data);
84 
85 /*
86  * MPALMOSTSAFE
87  */
88 int
89 sys_getpriority(struct getpriority_args *uap)
90 {
91 	struct getpriority_info info;
92 	thread_t curtd = curthread;
93 	struct proc *curp = curproc;
94 	struct proc *p;
95 	struct pgrp *pg;
96 	int low = PRIO_MAX + 1;
97 	int error;
98 
99 	switch (uap->which) {
100 	case PRIO_PROCESS:
101 		if (uap->who == 0) {
102 			low = curp->p_nice;
103 		} else {
104 			p = pfind(uap->who);
105 			if (p) {
106 				lwkt_gettoken_shared(&p->p_token);
107 				if (PRISON_CHECK(curtd->td_ucred, p->p_ucred))
108 					low = p->p_nice;
109 				lwkt_reltoken(&p->p_token);
110 				PRELE(p);
111 			}
112 		}
113 		break;
114 	case PRIO_PGRP:
115 		if (uap->who == 0) {
116 			lwkt_gettoken_shared(&curp->p_token);
117 			pg = curp->p_pgrp;
118 			pgref(pg);
119 			lwkt_reltoken(&curp->p_token);
120 		} else if ((pg = pgfind(uap->who)) == NULL) {
121 			break;
122 		} /* else ref held from pgfind */
123 
124 		lwkt_gettoken_shared(&pg->pg_token);
125 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
126 			if (PRISON_CHECK(curtd->td_ucred, p->p_ucred) &&
127 			    p->p_nice < low) {
128 				low = p->p_nice;
129 			}
130 		}
131 		lwkt_reltoken(&pg->pg_token);
132 		pgrel(pg);
133 		break;
134 	case PRIO_USER:
135 		if (uap->who == 0)
136 			uap->who = curtd->td_ucred->cr_uid;
137 		info.low = low;
138 		info.who = uap->who;
139 		allproc_scan(getpriority_callback, &info);
140 		low = info.low;
141 		break;
142 
143 	default:
144 		error = EINVAL;
145 		goto done;
146 	}
147 	if (low == PRIO_MAX + 1) {
148 		error = ESRCH;
149 		goto done;
150 	}
151 	uap->sysmsg_result = low;
152 	error = 0;
153 done:
154 	return (error);
155 }
156 
157 /*
158  * Figure out the current lowest nice priority for processes owned
159  * by the specified user.
160  */
161 static
162 int
163 getpriority_callback(struct proc *p, void *data)
164 {
165 	struct getpriority_info *info = data;
166 
167 	lwkt_gettoken_shared(&p->p_token);
168 	if (PRISON_CHECK(curthread->td_ucred, p->p_ucred) &&
169 	    p->p_ucred->cr_uid == info->who &&
170 	    p->p_nice < info->low) {
171 		info->low = p->p_nice;
172 	}
173 	lwkt_reltoken(&p->p_token);
174 	return(0);
175 }
176 
177 struct setpriority_info {
178 	int prio;
179 	int who;
180 	int error;
181 	int found;
182 };
183 
184 static int setpriority_callback(struct proc *p, void *data);
185 
186 /*
187  * MPALMOSTSAFE
188  */
189 int
190 sys_setpriority(struct setpriority_args *uap)
191 {
192 	struct setpriority_info info;
193 	thread_t curtd = curthread;
194 	struct proc *curp = curproc;
195 	struct proc *p;
196 	struct pgrp *pg;
197 	int found = 0, error = 0;
198 
199 	switch (uap->which) {
200 	case PRIO_PROCESS:
201 		if (uap->who == 0) {
202 			lwkt_gettoken(&curp->p_token);
203 			error = donice(curp, uap->prio);
204 			found++;
205 			lwkt_reltoken(&curp->p_token);
206 		} else {
207 			p = pfind(uap->who);
208 			if (p) {
209 				lwkt_gettoken(&p->p_token);
210 				if (PRISON_CHECK(curtd->td_ucred, p->p_ucred)) {
211 					error = donice(p, uap->prio);
212 					found++;
213 				}
214 				lwkt_reltoken(&p->p_token);
215 				PRELE(p);
216 			}
217 		}
218 		break;
219 	case PRIO_PGRP:
220 		if (uap->who == 0) {
221 			lwkt_gettoken_shared(&curp->p_token);
222 			pg = curp->p_pgrp;
223 			pgref(pg);
224 			lwkt_reltoken(&curp->p_token);
225 		} else if ((pg = pgfind(uap->who)) == NULL) {
226 			break;
227 		} /* else ref held from pgfind */
228 
229 		lwkt_gettoken(&pg->pg_token);
230 restart:
231 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
232 			PHOLD(p);
233 			lwkt_gettoken(&p->p_token);
234 			if (p->p_pgrp == pg &&
235 			    PRISON_CHECK(curtd->td_ucred, p->p_ucred)) {
236 				error = donice(p, uap->prio);
237 				found++;
238 			}
239 			lwkt_reltoken(&p->p_token);
240 			if (p->p_pgrp != pg) {
241 				PRELE(p);
242 				goto restart;
243 			}
244 			PRELE(p);
245 		}
246 		lwkt_reltoken(&pg->pg_token);
247 		pgrel(pg);
248 		break;
249 	case PRIO_USER:
250 		if (uap->who == 0)
251 			uap->who = curtd->td_ucred->cr_uid;
252 		info.prio = uap->prio;
253 		info.who = uap->who;
254 		info.error = 0;
255 		info.found = 0;
256 		allproc_scan(setpriority_callback, &info);
257 		error = info.error;
258 		found = info.found;
259 		break;
260 	default:
261 		error = EINVAL;
262 		found = 1;
263 		break;
264 	}
265 
266 	if (found == 0)
267 		error = ESRCH;
268 	return (error);
269 }
270 
271 static
272 int
273 setpriority_callback(struct proc *p, void *data)
274 {
275 	struct setpriority_info *info = data;
276 	int error;
277 
278 	lwkt_gettoken(&p->p_token);
279 	if (p->p_ucred->cr_uid == info->who &&
280 	    PRISON_CHECK(curthread->td_ucred, p->p_ucred)) {
281 		error = donice(p, info->prio);
282 		if (error)
283 			info->error = error;
284 		++info->found;
285 	}
286 	lwkt_reltoken(&p->p_token);
287 	return(0);
288 }
289 
290 /*
291  * Caller must hold chgp->p_token
292  */
293 static int
294 donice(struct proc *chgp, int n)
295 {
296 	struct ucred *cr = curthread->td_ucred;
297 	struct lwp *lp;
298 
299 	if (cr->cr_uid && cr->cr_ruid &&
300 	    cr->cr_uid != chgp->p_ucred->cr_uid &&
301 	    cr->cr_ruid != chgp->p_ucred->cr_uid)
302 		return (EPERM);
303 	if (n > PRIO_MAX)
304 		n = PRIO_MAX;
305 	if (n < PRIO_MIN)
306 		n = PRIO_MIN;
307 	if (n < chgp->p_nice && priv_check_cred(cr, PRIV_SCHED_SETPRIORITY, 0))
308 		return (EACCES);
309 	chgp->p_nice = n;
310 	FOREACH_LWP_IN_PROC(lp, chgp) {
311 		LWPHOLD(lp);
312 		chgp->p_usched->resetpriority(lp);
313 		LWPRELE(lp);
314 	}
315 	return (0);
316 }
317 
318 
319 struct ioprio_get_info {
320 	int high;
321 	int who;
322 };
323 
324 static int ioprio_get_callback(struct proc *p, void *data);
325 
326 /*
327  * MPALMOSTSAFE
328  */
329 int
330 sys_ioprio_get(struct ioprio_get_args *uap)
331 {
332 	struct ioprio_get_info info;
333 	thread_t curtd = curthread;
334 	struct proc *curp = curproc;
335 	struct proc *p;
336 	struct pgrp *pg;
337 	int high = IOPRIO_MIN-2;
338 	int error;
339 
340 	switch (uap->which) {
341 	case PRIO_PROCESS:
342 		if (uap->who == 0) {
343 			high = curp->p_ionice;
344 		} else {
345 			p = pfind(uap->who);
346 			if (p) {
347 				lwkt_gettoken_shared(&p->p_token);
348 				if (PRISON_CHECK(curtd->td_ucred, p->p_ucred))
349 					high = p->p_ionice;
350 				lwkt_reltoken(&p->p_token);
351 				PRELE(p);
352 			}
353 		}
354 		break;
355 	case PRIO_PGRP:
356 		if (uap->who == 0) {
357 			lwkt_gettoken_shared(&curp->p_token);
358 			pg = curp->p_pgrp;
359 			pgref(pg);
360 			lwkt_reltoken(&curp->p_token);
361 		} else if ((pg = pgfind(uap->who)) == NULL) {
362 			break;
363 		} /* else ref held from pgfind */
364 
365 		lwkt_gettoken_shared(&pg->pg_token);
366 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
367 			if (PRISON_CHECK(curtd->td_ucred, p->p_ucred) &&
368 			    p->p_nice > high)
369 				high = p->p_ionice;
370 		}
371 		lwkt_reltoken(&pg->pg_token);
372 		pgrel(pg);
373 		break;
374 	case PRIO_USER:
375 		if (uap->who == 0)
376 			uap->who = curtd->td_ucred->cr_uid;
377 		info.high = high;
378 		info.who = uap->who;
379 		allproc_scan(ioprio_get_callback, &info);
380 		high = info.high;
381 		break;
382 	default:
383 		error = EINVAL;
384 		goto done;
385 	}
386 	if (high == IOPRIO_MIN-2) {
387 		error = ESRCH;
388 		goto done;
389 	}
390 	uap->sysmsg_result = high;
391 	error = 0;
392 done:
393 	return (error);
394 }
395 
396 /*
397  * Figure out the current lowest nice priority for processes owned
398  * by the specified user.
399  */
400 static
401 int
402 ioprio_get_callback(struct proc *p, void *data)
403 {
404 	struct ioprio_get_info *info = data;
405 
406 	lwkt_gettoken_shared(&p->p_token);
407 	if (PRISON_CHECK(curthread->td_ucred, p->p_ucred) &&
408 	    p->p_ucred->cr_uid == info->who &&
409 	    p->p_ionice > info->high) {
410 		info->high = p->p_ionice;
411 	}
412 	lwkt_reltoken(&p->p_token);
413 	return(0);
414 }
415 
416 
417 struct ioprio_set_info {
418 	int prio;
419 	int who;
420 	int error;
421 	int found;
422 };
423 
424 static int ioprio_set_callback(struct proc *p, void *data);
425 
426 /*
427  * MPALMOSTSAFE
428  */
429 int
430 sys_ioprio_set(struct ioprio_set_args *uap)
431 {
432 	struct ioprio_set_info info;
433 	thread_t curtd = curthread;
434 	struct proc *curp = curproc;
435 	struct proc *p;
436 	struct pgrp *pg;
437 	int found = 0, error = 0;
438 
439 	switch (uap->which) {
440 	case PRIO_PROCESS:
441 		if (uap->who == 0) {
442 			lwkt_gettoken(&curp->p_token);
443 			error = doionice(curp, uap->prio);
444 			lwkt_reltoken(&curp->p_token);
445 			found++;
446 		} else {
447 			p = pfind(uap->who);
448 			if (p) {
449 				lwkt_gettoken(&p->p_token);
450 				if (PRISON_CHECK(curtd->td_ucred, p->p_ucred)) {
451 					error = doionice(p, uap->prio);
452 					found++;
453 				}
454 				lwkt_reltoken(&p->p_token);
455 				PRELE(p);
456 			}
457 		}
458 		break;
459 	case PRIO_PGRP:
460 		if (uap->who == 0) {
461 			lwkt_gettoken_shared(&curp->p_token);
462 			pg = curp->p_pgrp;
463 			pgref(pg);
464 			lwkt_reltoken(&curp->p_token);
465 		} else if ((pg = pgfind(uap->who)) == NULL) {
466 			break;
467 		} /* else ref held from pgfind */
468 
469 		lwkt_gettoken(&pg->pg_token);
470 restart:
471 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
472 			PHOLD(p);
473 			lwkt_gettoken(&p->p_token);
474 			if (p->p_pgrp == pg &&
475 			    PRISON_CHECK(curtd->td_ucred, p->p_ucred)) {
476 				error = doionice(p, uap->prio);
477 				found++;
478 			}
479 			lwkt_reltoken(&p->p_token);
480 			if (p->p_pgrp != pg) {
481 				PRELE(p);
482 				goto restart;
483 			}
484 			PRELE(p);
485 		}
486 		lwkt_reltoken(&pg->pg_token);
487 		pgrel(pg);
488 		break;
489 	case PRIO_USER:
490 		if (uap->who == 0)
491 			uap->who = curtd->td_ucred->cr_uid;
492 		info.prio = uap->prio;
493 		info.who = uap->who;
494 		info.error = 0;
495 		info.found = 0;
496 		allproc_scan(ioprio_set_callback, &info);
497 		error = info.error;
498 		found = info.found;
499 		break;
500 	default:
501 		error = EINVAL;
502 		found = 1;
503 		break;
504 	}
505 
506 	if (found == 0)
507 		error = ESRCH;
508 	return (error);
509 }
510 
511 static
512 int
513 ioprio_set_callback(struct proc *p, void *data)
514 {
515 	struct ioprio_set_info *info = data;
516 	int error;
517 
518 	lwkt_gettoken(&p->p_token);
519 	if (p->p_ucred->cr_uid == info->who &&
520 	    PRISON_CHECK(curthread->td_ucred, p->p_ucred)) {
521 		error = doionice(p, info->prio);
522 		if (error)
523 			info->error = error;
524 		++info->found;
525 	}
526 	lwkt_reltoken(&p->p_token);
527 	return(0);
528 }
529 
530 int
531 doionice(struct proc *chgp, int n)
532 {
533 	struct ucred *cr = curthread->td_ucred;
534 
535 	if (cr->cr_uid && cr->cr_ruid &&
536 	    cr->cr_uid != chgp->p_ucred->cr_uid &&
537 	    cr->cr_ruid != chgp->p_ucred->cr_uid)
538 		return (EPERM);
539 	if (n > IOPRIO_MAX)
540 		n = IOPRIO_MAX;
541 	if (n < IOPRIO_MIN)
542 		n = IOPRIO_MIN;
543 	if (n < chgp->p_ionice &&
544 	    priv_check_cred(cr, PRIV_SCHED_SETPRIORITY, 0))
545 		return (EACCES);
546 	chgp->p_ionice = n;
547 
548 	return (0);
549 
550 }
551 
552 /*
553  * MPALMOSTSAFE
554  */
555 int
556 sys_lwp_rtprio(struct lwp_rtprio_args *uap)
557 {
558 	struct ucred *cr = curthread->td_ucred;
559 	struct proc *p;
560 	struct lwp *lp;
561 	struct rtprio rtp;
562 	int error;
563 
564 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
565 	if (error)
566 		return error;
567 	if (uap->pid < 0)
568 		return EINVAL;
569 
570 	if (uap->pid == 0) {
571 		p = curproc;
572 		PHOLD(p);
573 	} else {
574 		p = pfind(uap->pid);
575 	}
576 	if (p == NULL) {
577 		error = ESRCH;
578 		goto done;
579 	}
580 	lwkt_gettoken(&p->p_token);
581 
582 	if (uap->tid < -1) {
583 		error = EINVAL;
584 		goto done;
585 	}
586 	if (uap->tid == -1) {
587 		/*
588 		 * sadly, tid can be 0 so we can't use 0 here
589 		 * like sys_rtprio()
590 		 */
591 		lp = curthread->td_lwp;
592 	} else {
593 		lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid);
594 		if (lp == NULL) {
595 			error = ESRCH;
596 			goto done;
597 		}
598 	}
599 
600 	switch (uap->function) {
601 	case RTP_LOOKUP:
602 		error = copyout(&lp->lwp_rtprio, uap->rtp,
603 				sizeof(struct rtprio));
604 		break;
605 	case RTP_SET:
606 		if (cr->cr_uid && cr->cr_ruid &&
607 		    cr->cr_uid != p->p_ucred->cr_uid &&
608 		    cr->cr_ruid != p->p_ucred->cr_uid) {
609 			error = EPERM;
610 			break;
611 		}
612 		/* disallow setting rtprio in most cases if not superuser */
613 		if (priv_check_cred(cr, PRIV_SCHED_RTPRIO, 0)) {
614 			/* can't set someone else's */
615 			if (uap->pid) { /* XXX */
616 				error = EPERM;
617 				break;
618 			}
619 			/* can't set realtime priority */
620 /*
621  * Realtime priority has to be restricted for reasons which should be
622  * obvious. However, for idle priority, there is a potential for
623  * system deadlock if an idleprio process gains a lock on a resource
624  * that other processes need (and the idleprio process can't run
625  * due to a CPU-bound normal process). Fix me! XXX
626  */
627  			if (RTP_PRIO_IS_REALTIME(rtp.type)) {
628 				error = EPERM;
629 				break;
630 			}
631 		}
632 		switch (rtp.type) {
633 #ifdef RTP_PRIO_FIFO
634 		case RTP_PRIO_FIFO:
635 #endif
636 		case RTP_PRIO_REALTIME:
637 		case RTP_PRIO_NORMAL:
638 		case RTP_PRIO_IDLE:
639 			if (rtp.prio > RTP_PRIO_MAX) {
640 				error = EINVAL;
641 			} else {
642 				lp->lwp_rtprio = rtp;
643 				error = 0;
644 			}
645 			break;
646 		default:
647 			error = EINVAL;
648 			break;
649 		}
650 		break;
651 	default:
652 		error = EINVAL;
653 		break;
654 	}
655 
656 done:
657 	if (p) {
658 		lwkt_reltoken(&p->p_token);
659 		PRELE(p);
660 	}
661 	return (error);
662 }
663 
664 /*
665  * Set realtime priority
666  *
667  * MPALMOSTSAFE
668  */
669 int
670 sys_rtprio(struct rtprio_args *uap)
671 {
672 	struct ucred *cr = curthread->td_ucred;
673 	struct proc *p;
674 	struct lwp *lp;
675 	struct rtprio rtp;
676 	int error;
677 
678 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
679 	if (error)
680 		return (error);
681 
682 	if (uap->pid == 0) {
683 		p = curproc;
684 		PHOLD(p);
685 	} else {
686 		p = pfind(uap->pid);
687 	}
688 
689 	if (p == NULL) {
690 		error = ESRCH;
691 		goto done;
692 	}
693 	lwkt_gettoken(&p->p_token);
694 
695 	/* XXX lwp */
696 	lp = FIRST_LWP_IN_PROC(p);
697 	switch (uap->function) {
698 	case RTP_LOOKUP:
699 		error = copyout(&lp->lwp_rtprio, uap->rtp,
700 				sizeof(struct rtprio));
701 		break;
702 	case RTP_SET:
703 		if (cr->cr_uid && cr->cr_ruid &&
704 		    cr->cr_uid != p->p_ucred->cr_uid &&
705 		    cr->cr_ruid != p->p_ucred->cr_uid) {
706 			error = EPERM;
707 			break;
708 		}
709 		/* disallow setting rtprio in most cases if not superuser */
710 		if (priv_check_cred(cr, PRIV_SCHED_RTPRIO, 0)) {
711 			/* can't set someone else's */
712 			if (uap->pid) {
713 				error = EPERM;
714 				break;
715 			}
716 			/* can't set realtime priority */
717 /*
718  * Realtime priority has to be restricted for reasons which should be
719  * obvious. However, for idle priority, there is a potential for
720  * system deadlock if an idleprio process gains a lock on a resource
721  * that other processes need (and the idleprio process can't run
722  * due to a CPU-bound normal process). Fix me! XXX
723  */
724 			if (RTP_PRIO_IS_REALTIME(rtp.type)) {
725 				error = EPERM;
726 				break;
727 			}
728 		}
729 		switch (rtp.type) {
730 #ifdef RTP_PRIO_FIFO
731 		case RTP_PRIO_FIFO:
732 #endif
733 		case RTP_PRIO_REALTIME:
734 		case RTP_PRIO_NORMAL:
735 		case RTP_PRIO_IDLE:
736 			if (rtp.prio > RTP_PRIO_MAX) {
737 				error = EINVAL;
738 				break;
739 			}
740 			lp->lwp_rtprio = rtp;
741 			error = 0;
742 			break;
743 		default:
744 			error = EINVAL;
745 			break;
746 		}
747 		break;
748 	default:
749 		error = EINVAL;
750 		break;
751 	}
752 done:
753 	if (p) {
754 		lwkt_reltoken(&p->p_token);
755 		PRELE(p);
756 	}
757 
758 	return (error);
759 }
760 
761 /*
762  * MPSAFE
763  */
764 int
765 sys_setrlimit(struct __setrlimit_args *uap)
766 {
767 	struct rlimit alim;
768 	int error;
769 
770 	error = copyin(uap->rlp, &alim, sizeof(alim));
771 	if (error)
772 		return (error);
773 
774 	error = kern_setrlimit(uap->which, &alim);
775 
776 	return (error);
777 }
778 
779 /*
780  * MPSAFE
781  */
782 int
783 sys_getrlimit(struct __getrlimit_args *uap)
784 {
785 	struct rlimit lim;
786 	int error;
787 
788 	error = kern_getrlimit(uap->which, &lim);
789 
790 	if (error == 0)
791 		error = copyout(&lim, uap->rlp, sizeof(*uap->rlp));
792 	return error;
793 }
794 
795 /*
796  * Transform the running time and tick information in lwp lp's thread into user,
797  * system, and interrupt time usage.
798  *
799  * Since we are limited to statclock tick granularity this is a statisical
800  * calculation which will be correct over the long haul, but should not be
801  * expected to measure fine grained deltas.
802  *
803  * It is possible to catch a lwp in the midst of being created, so
804  * check whether lwp_thread is NULL or not.
805  */
806 void
807 calcru(struct lwp *lp, struct timeval *up, struct timeval *sp)
808 {
809 	struct thread *td;
810 
811 	/*
812 	 * Calculate at the statclock level.  YYY if the thread is owned by
813 	 * another cpu we need to forward the request to the other cpu, or
814 	 * have a token to interlock the information in order to avoid racing
815 	 * thread destruction.
816 	 */
817 	if ((td = lp->lwp_thread) != NULL) {
818 		crit_enter();
819 		up->tv_sec = td->td_uticks / 1000000;
820 		up->tv_usec = td->td_uticks % 1000000;
821 		sp->tv_sec = td->td_sticks / 1000000;
822 		sp->tv_usec = td->td_sticks % 1000000;
823 		crit_exit();
824 	}
825 }
826 
827 /*
828  * Aggregate resource statistics of all lwps of a process.
829  *
830  * proc.p_ru keeps track of all statistics directly related to a proc.  This
831  * consists of RSS usage and nswap information and aggregate numbers for all
832  * former lwps of this proc.
833  *
834  * proc.p_cru is the sum of all stats of reaped children.
835  *
836  * lwp.lwp_ru contains the stats directly related to one specific lwp, meaning
837  * packet, scheduler switch or page fault counts, etc.  This information gets
838  * added to lwp.lwp_proc.p_ru when the lwp exits.
839  */
840 void
841 calcru_proc(struct proc *p, struct rusage *ru)
842 {
843 	struct timeval upt, spt;
844 	long *rip1, *rip2;
845 	struct lwp *lp;
846 
847 	*ru = p->p_ru;
848 
849 	FOREACH_LWP_IN_PROC(lp, p) {
850 		calcru(lp, &upt, &spt);
851 		timevaladd(&ru->ru_utime, &upt);
852 		timevaladd(&ru->ru_stime, &spt);
853 		for (rip1 = &ru->ru_first, rip2 = &lp->lwp_ru.ru_first;
854 		     rip1 <= &ru->ru_last;
855 		     rip1++, rip2++)
856 			*rip1 += *rip2;
857 	}
858 }
859 
860 
861 /*
862  * MPALMOSTSAFE
863  */
864 int
865 sys_getrusage(struct getrusage_args *uap)
866 {
867 	struct proc *p = curproc;
868 	struct rusage ru;
869 	struct rusage *rup;
870 	int error;
871 
872 	lwkt_gettoken(&p->p_token);
873 
874 	switch (uap->who) {
875 	case RUSAGE_SELF:
876 		rup = &ru;
877 		calcru_proc(p, rup);
878 		error = 0;
879 		break;
880 	case RUSAGE_CHILDREN:
881 		rup = &p->p_cru;
882 		error = 0;
883 		break;
884 	default:
885 		error = EINVAL;
886 		break;
887 	}
888 	lwkt_reltoken(&p->p_token);
889 
890 	if (error == 0)
891 		error = copyout(rup, uap->rusage, sizeof(struct rusage));
892 	return (error);
893 }
894 
895 void
896 ruadd(struct rusage *ru, struct rusage *ru2)
897 {
898 	long *ip, *ip2;
899 	int i;
900 
901 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
902 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
903 	if (ru->ru_maxrss < ru2->ru_maxrss)
904 		ru->ru_maxrss = ru2->ru_maxrss;
905 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
906 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
907 		*ip++ += *ip2++;
908 }
909 
910 /*
911  * Find the uidinfo structure for a uid.  This structure is used to
912  * track the total resource consumption (process count, socket buffer
913  * size, etc.) for the uid and impose limits.
914  */
915 void
916 uihashinit(void)
917 {
918 	spin_init(&uihash_lock, "uihashinit");
919 	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
920 }
921 
922 /*
923  * NOTE: Must be called with uihash_lock held
924  *
925  * MPSAFE
926  */
927 static struct uidinfo *
928 uilookup(uid_t uid)
929 {
930 	struct	uihashhead *uipp;
931 	struct	uidinfo *uip;
932 
933 	uipp = UIHASH(uid);
934 	LIST_FOREACH(uip, uipp, ui_hash) {
935 		if (uip->ui_uid == uid)
936 			break;
937 	}
938 	return (uip);
939 }
940 
941 /*
942  * Helper function to creat ea uid that could not be found.
943  * This function will properly deal with races.
944  *
945  * MPSAFE
946  */
947 static struct uidinfo *
948 uicreate(uid_t uid)
949 {
950 	struct	uidinfo *uip, *tmp;
951 
952 	/*
953 	 * Allocate space and check for a race
954 	 */
955 	uip = kmalloc(sizeof(*uip), M_UIDINFO, M_WAITOK|M_ZERO);
956 
957 	/*
958 	 * Initialize structure and enter it into the hash table
959 	 */
960 	spin_init(&uip->ui_lock, "uicreate");
961 	uip->ui_uid = uid;
962 	uip->ui_ref = 1;	/* we're returning a ref */
963 	varsymset_init(&uip->ui_varsymset, NULL);
964 
965 	/*
966 	 * Somebody may have already created the uidinfo for this
967 	 * uid. If so, return that instead.
968 	 */
969 	spin_lock(&uihash_lock);
970 	tmp = uilookup(uid);
971 	if (tmp != NULL) {
972 		uihold(tmp);
973 		spin_unlock(&uihash_lock);
974 
975 		spin_uninit(&uip->ui_lock);
976 		varsymset_clean(&uip->ui_varsymset);
977 		kfree(uip, M_UIDINFO);
978 		uip = tmp;
979 	} else {
980 		LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
981 		spin_unlock(&uihash_lock);
982 	}
983 	return (uip);
984 }
985 
986 /*
987  *
988  *
989  * MPSAFE
990  */
991 struct uidinfo *
992 uifind(uid_t uid)
993 {
994 	struct	uidinfo *uip;
995 
996 	spin_lock(&uihash_lock);
997 	uip = uilookup(uid);
998 	if (uip == NULL) {
999 		spin_unlock(&uihash_lock);
1000 		uip = uicreate(uid);
1001 	} else {
1002 		uihold(uip);
1003 		spin_unlock(&uihash_lock);
1004 	}
1005 	return (uip);
1006 }
1007 
1008 /*
1009  * Helper funtion to remove a uidinfo whos reference count is
1010  * transitioning from 1->0.  The reference count is 1 on call.
1011  *
1012  * Zero is returned on success, otherwise non-zero and the
1013  * uiphas not been removed.
1014  *
1015  * MPSAFE
1016  */
1017 static __inline int
1018 uifree(struct uidinfo *uip)
1019 {
1020 	/*
1021 	 * If we are still the only holder after acquiring the uihash_lock
1022 	 * we can safely unlink the uip and destroy it.  Otherwise we lost
1023 	 * a race and must fail.
1024 	 */
1025 	spin_lock(&uihash_lock);
1026 	if (uip->ui_ref != 1) {
1027 		spin_unlock(&uihash_lock);
1028 		return(-1);
1029 	}
1030 	LIST_REMOVE(uip, ui_hash);
1031 	spin_unlock(&uihash_lock);
1032 
1033 	/*
1034 	 * The uip is now orphaned and we can destroy it at our
1035 	 * leisure.
1036 	 */
1037 	if (uip->ui_sbsize != 0)
1038 		kprintf("freeing uidinfo: uid = %d, sbsize = %jd\n",
1039 		    uip->ui_uid, (intmax_t)uip->ui_sbsize);
1040 	if (uip->ui_proccnt != 0)
1041 		kprintf("freeing uidinfo: uid = %d, proccnt = %ld\n",
1042 		    uip->ui_uid, uip->ui_proccnt);
1043 
1044 	varsymset_clean(&uip->ui_varsymset);
1045 	lockuninit(&uip->ui_varsymset.vx_lock);
1046 	spin_uninit(&uip->ui_lock);
1047 	kfree(uip, M_UIDINFO);
1048 	return(0);
1049 }
1050 
1051 /*
1052  * MPSAFE
1053  */
1054 void
1055 uihold(struct uidinfo *uip)
1056 {
1057 	atomic_add_int(&uip->ui_ref, 1);
1058 	KKASSERT(uip->ui_ref >= 0);
1059 }
1060 
1061 /*
1062  * NOTE: It is important for us to not drop the ref count to 0
1063  *	 because this can cause a 2->0/2->0 race with another
1064  *	 concurrent dropper.  Losing the race in that situation
1065  *	 can cause uip to become stale for one of the other
1066  *	 threads.
1067  *
1068  * MPSAFE
1069  */
1070 void
1071 uidrop(struct uidinfo *uip)
1072 {
1073 	int ref;
1074 
1075 	KKASSERT(uip->ui_ref > 0);
1076 
1077 	for (;;) {
1078 		ref = uip->ui_ref;
1079 		cpu_ccfence();
1080 		if (ref == 1) {
1081 			if (uifree(uip) == 0)
1082 				break;
1083 		} else if (atomic_cmpset_int(&uip->ui_ref, ref, ref - 1)) {
1084 			break;
1085 		}
1086 		/* else retry */
1087 	}
1088 }
1089 
1090 void
1091 uireplace(struct uidinfo **puip, struct uidinfo *nuip)
1092 {
1093 	uidrop(*puip);
1094 	*puip = nuip;
1095 }
1096 
1097 /*
1098  * Change the count associated with number of processes
1099  * a given user is using.  When 'max' is 0, don't enforce a limit
1100  */
1101 int
1102 chgproccnt(struct uidinfo *uip, int diff, int max)
1103 {
1104 	int ret;
1105 	spin_lock(&uip->ui_lock);
1106 	/* don't allow them to exceed max, but allow subtraction */
1107 	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) {
1108 		ret = 0;
1109 	} else {
1110 		uip->ui_proccnt += diff;
1111 		if (uip->ui_proccnt < 0)
1112 			kprintf("negative proccnt for uid = %d\n", uip->ui_uid);
1113 		ret = 1;
1114 	}
1115 	spin_unlock(&uip->ui_lock);
1116 	return ret;
1117 }
1118 
1119 /*
1120  * Change the total socket buffer size a user has used.
1121  */
1122 int
1123 chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t max)
1124 {
1125 	rlim_t new;
1126 
1127 #ifdef __x86_64__
1128 	rlim_t sbsize;
1129 
1130 	sbsize = atomic_fetchadd_long(&uip->ui_sbsize, to - *hiwat);
1131 	new = sbsize + to - *hiwat;
1132 #else
1133 	spin_lock(&uip->ui_lock);
1134 	new = uip->ui_sbsize + to - *hiwat;
1135 	uip->ui_sbsize = new;
1136 	spin_unlock(&uip->ui_lock);
1137 #endif
1138 	KKASSERT(new >= 0);
1139 
1140 	/*
1141 	 * If we are trying to increase the socket buffer size
1142 	 * Scale down the hi water mark when we exceed the user's
1143 	 * allowed socket buffer space.
1144 	 *
1145 	 * We can't scale down too much or we will blow up atomic packet
1146 	 * operations.
1147 	 */
1148 	if (to > *hiwat && to > MCLBYTES && new > max) {
1149 		to = to * max / new;
1150 		if (to < MCLBYTES)
1151 			to = MCLBYTES;
1152 	}
1153 	*hiwat = to;
1154 	return (1);
1155 }
1156