xref: /dflybsd-src/sys/kern/kern_resource.c (revision bc76a771df54af7e361532b257cecc26227736b4)
1 /*-
2  * Copyright (c) 1982, 1986, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
39  * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $
40  * $DragonFly: src/sys/kern/kern_resource.c,v 1.19 2004/04/10 20:55:23 dillon Exp $
41  */
42 
43 #include "opt_compat.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/file.h>
49 #include <sys/kern_syscall.h>
50 #include <sys/kernel.h>
51 #include <sys/resourcevar.h>
52 #include <sys/malloc.h>
53 #include <sys/proc.h>
54 #include <sys/time.h>
55 
56 #include <vm/vm.h>
57 #include <vm/vm_param.h>
58 #include <sys/lock.h>
59 #include <vm/pmap.h>
60 #include <vm/vm_map.h>
61 
62 #include <sys/thread2.h>
63 
64 static int donice (struct proc *chgp, int n);
65 
66 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
67 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
68 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
69 static u_long uihash;		/* size of hash table - 1 */
70 
71 static struct uidinfo	*uicreate (uid_t uid);
72 static struct uidinfo	*uilookup (uid_t uid);
73 
74 /*
75  * Resource controls and accounting.
76  */
77 
78 int
79 getpriority(struct getpriority_args *uap)
80 {
81 	struct proc *curp = curproc;
82 	struct proc *p;
83 	int low = PRIO_MAX + 1;
84 
85 	switch (uap->which) {
86 	case PRIO_PROCESS:
87 		if (uap->who == 0)
88 			p = curp;
89 		else
90 			p = pfind(uap->who);
91 		if (p == 0)
92 			break;
93 		if (!PRISON_CHECK(curp->p_ucred, p->p_ucred))
94 			break;
95 		low = p->p_nice;
96 		break;
97 
98 	case PRIO_PGRP:
99 	{
100 		struct pgrp *pg;
101 
102 		if (uap->who == 0)
103 			pg = curp->p_pgrp;
104 		else if ((pg = pgfind(uap->who)) == NULL)
105 			break;
106 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
107 			if ((PRISON_CHECK(curp->p_ucred, p->p_ucred) && p->p_nice < low))
108 				low = p->p_nice;
109 		}
110 		break;
111 	}
112 	case PRIO_USER:
113 		if (uap->who == 0)
114 			uap->who = curp->p_ucred->cr_uid;
115 		FOREACH_PROC_IN_SYSTEM(p)
116 			if (PRISON_CHECK(curp->p_ucred, p->p_ucred) &&
117 			    p->p_ucred->cr_uid == uap->who &&
118 			    p->p_nice < low)
119 				low = p->p_nice;
120 		break;
121 
122 	default:
123 		return (EINVAL);
124 	}
125 	if (low == PRIO_MAX + 1)
126 		return (ESRCH);
127 	uap->sysmsg_result = low;
128 	return (0);
129 }
130 
131 /* ARGSUSED */
132 int
133 setpriority(struct setpriority_args *uap)
134 {
135 	struct proc *curp = curproc;
136 	struct proc *p;
137 	int found = 0, error = 0;
138 
139 	switch (uap->which) {
140 
141 	case PRIO_PROCESS:
142 		if (uap->who == 0)
143 			p = curp;
144 		else
145 			p = pfind(uap->who);
146 		if (p == 0)
147 			break;
148 		if (!PRISON_CHECK(curp->p_ucred, p->p_ucred))
149 			break;
150 		error = donice(p, uap->prio);
151 		found++;
152 		break;
153 
154 	case PRIO_PGRP:
155 	{
156 		struct pgrp *pg;
157 
158 		if (uap->who == 0)
159 			pg = curp->p_pgrp;
160 		else if ((pg = pgfind(uap->who)) == NULL)
161 			break;
162 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
163 			if (PRISON_CHECK(curp->p_ucred, p->p_ucred)) {
164 				error = donice(p, uap->prio);
165 				found++;
166 			}
167 		}
168 		break;
169 	}
170 	case PRIO_USER:
171 		if (uap->who == 0)
172 			uap->who = curp->p_ucred->cr_uid;
173 		FOREACH_PROC_IN_SYSTEM(p)
174 			if (p->p_ucred->cr_uid == uap->who &&
175 			    PRISON_CHECK(curp->p_ucred, p->p_ucred)) {
176 				error = donice(p, uap->prio);
177 				found++;
178 			}
179 		break;
180 
181 	default:
182 		return (EINVAL);
183 	}
184 	if (found == 0)
185 		return (ESRCH);
186 	return (error);
187 }
188 
189 static int
190 donice(struct proc *chgp, int n)
191 {
192 	struct proc *curp = curproc;
193 	struct ucred *cr = curp->p_ucred;
194 
195 	if (cr->cr_uid && cr->cr_ruid &&
196 	    cr->cr_uid != chgp->p_ucred->cr_uid &&
197 	    cr->cr_ruid != chgp->p_ucred->cr_uid)
198 		return (EPERM);
199 	if (n > PRIO_MAX)
200 		n = PRIO_MAX;
201 	if (n < PRIO_MIN)
202 		n = PRIO_MIN;
203 	if (n < chgp->p_nice && suser_cred(cr, 0))
204 		return (EACCES);
205 	chgp->p_nice = n;
206 	(void)resetpriority(chgp);
207 	return (0);
208 }
209 
210 /*
211  * Set realtime priority
212  */
213 /* ARGSUSED */
214 int
215 rtprio(struct rtprio_args *uap)
216 {
217 	struct proc *curp = curproc;
218 	struct proc *p;
219 	struct ucred *cr = curp->p_ucred;
220 	struct rtprio rtp;
221 	int error;
222 
223 	error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
224 	if (error)
225 		return (error);
226 
227 	if (uap->pid == 0)
228 		p = curp;
229 	else
230 		p = pfind(uap->pid);
231 
232 	if (p == 0)
233 		return (ESRCH);
234 
235 	switch (uap->function) {
236 	case RTP_LOOKUP:
237 		return (copyout(&p->p_rtprio, uap->rtp, sizeof(struct rtprio)));
238 	case RTP_SET:
239 		if (cr->cr_uid && cr->cr_ruid &&
240 		    cr->cr_uid != p->p_ucred->cr_uid &&
241 		    cr->cr_ruid != p->p_ucred->cr_uid)
242 		        return (EPERM);
243 		/* disallow setting rtprio in most cases if not superuser */
244 		if (suser_cred(cr, 0)) {
245 			/* can't set someone else's */
246 			if (uap->pid)
247 				return (EPERM);
248 			/* can't set realtime priority */
249 /*
250  * Realtime priority has to be restricted for reasons which should be
251  * obvious. However, for idle priority, there is a potential for
252  * system deadlock if an idleprio process gains a lock on a resource
253  * that other processes need (and the idleprio process can't run
254  * due to a CPU-bound normal process). Fix me! XXX
255  */
256  			if (RTP_PRIO_IS_REALTIME(rtp.type))
257 				return (EPERM);
258 		}
259 		switch (rtp.type) {
260 #ifdef RTP_PRIO_FIFO
261 		case RTP_PRIO_FIFO:
262 #endif
263 		case RTP_PRIO_REALTIME:
264 		case RTP_PRIO_NORMAL:
265 		case RTP_PRIO_IDLE:
266 			if (rtp.prio > RTP_PRIO_MAX)
267 				return (EINVAL);
268 			p->p_rtprio = rtp;
269 			return (0);
270 		default:
271 			return (EINVAL);
272 		}
273 
274 	default:
275 		return (EINVAL);
276 	}
277 }
278 
279 int
280 setrlimit(struct __setrlimit_args *uap)
281 {
282 	struct rlimit alim;
283 	int error;
284 
285 	error = copyin(uap->rlp, &alim, sizeof(alim));
286 	if (error)
287 		return (error);
288 
289 	error = kern_setrlimit(uap->which, &alim);
290 
291 	return (error);
292 }
293 
294 int
295 kern_setrlimit(u_int which, struct rlimit *limp)
296 {
297 	struct proc *p = curproc;
298 	struct rlimit *alimp;
299 	int error;
300 
301 	if (which >= RLIM_NLIMITS)
302 		return (EINVAL);
303 	alimp = &p->p_rlimit[which];
304 
305 	/*
306 	 * Preserve historical bugs by treating negative limits as unsigned.
307 	 */
308 	if (limp->rlim_cur < 0)
309 		limp->rlim_cur = RLIM_INFINITY;
310 	if (limp->rlim_max < 0)
311 		limp->rlim_max = RLIM_INFINITY;
312 
313 	if (limp->rlim_cur > alimp->rlim_max ||
314 	    limp->rlim_max > alimp->rlim_max)
315 		if ((error = suser_cred(p->p_ucred, PRISON_ROOT)))
316 			return (error);
317 	if (limp->rlim_cur > limp->rlim_max)
318 		limp->rlim_cur = limp->rlim_max;
319 	if (p->p_limit->p_refcnt > 1 &&
320 	    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
321 		p->p_limit->p_refcnt--;
322 		p->p_limit = limcopy(p->p_limit);
323 		alimp = &p->p_rlimit[which];
324 	}
325 
326 	switch (which) {
327 
328 	case RLIMIT_CPU:
329 		if (limp->rlim_cur > RLIM_INFINITY / (rlim_t)1000000)
330 			p->p_limit->p_cpulimit = RLIM_INFINITY;
331 		else
332 			p->p_limit->p_cpulimit =
333 			    (rlim_t)1000000 * limp->rlim_cur;
334 		break;
335 	case RLIMIT_DATA:
336 		if (limp->rlim_cur > maxdsiz)
337 			limp->rlim_cur = maxdsiz;
338 		if (limp->rlim_max > maxdsiz)
339 			limp->rlim_max = maxdsiz;
340 		break;
341 
342 	case RLIMIT_STACK:
343 		if (limp->rlim_cur > maxssiz)
344 			limp->rlim_cur = maxssiz;
345 		if (limp->rlim_max > maxssiz)
346 			limp->rlim_max = maxssiz;
347 		/*
348 		 * Stack is allocated to the max at exec time with only
349 		 * "rlim_cur" bytes accessible.  If stack limit is going
350 		 * up make more accessible, if going down make inaccessible.
351 		 */
352 		if (limp->rlim_cur != alimp->rlim_cur) {
353 			vm_offset_t addr;
354 			vm_size_t size;
355 			vm_prot_t prot;
356 
357 			if (limp->rlim_cur > alimp->rlim_cur) {
358 				prot = VM_PROT_ALL;
359 				size = limp->rlim_cur - alimp->rlim_cur;
360 				addr = USRSTACK - limp->rlim_cur;
361 			} else {
362 				prot = VM_PROT_NONE;
363 				size = alimp->rlim_cur - limp->rlim_cur;
364 				addr = USRSTACK - alimp->rlim_cur;
365 			}
366 			addr = trunc_page(addr);
367 			size = round_page(size);
368 			(void) vm_map_protect(&p->p_vmspace->vm_map,
369 					      addr, addr+size, prot, FALSE);
370 		}
371 		break;
372 
373 	case RLIMIT_NOFILE:
374 		if (limp->rlim_cur > maxfilesperproc)
375 			limp->rlim_cur = maxfilesperproc;
376 		if (limp->rlim_max > maxfilesperproc)
377 			limp->rlim_max = maxfilesperproc;
378 		break;
379 
380 	case RLIMIT_NPROC:
381 		if (limp->rlim_cur > maxprocperuid)
382 			limp->rlim_cur = maxprocperuid;
383 		if (limp->rlim_max > maxprocperuid)
384 			limp->rlim_max = maxprocperuid;
385 		if (limp->rlim_cur < 1)
386 			limp->rlim_cur = 1;
387 		if (limp->rlim_max < 1)
388 			limp->rlim_max = 1;
389 		break;
390 	}
391 	*alimp = *limp;
392 	return (0);
393 }
394 
395 /*
396  * The rlimit indexed by which is returned in the second argument.
397  */
398 int
399 kern_getrlimit(u_int which, struct rlimit *limp)
400 {
401 	struct thread *td = curthread;
402 	struct proc *p = td->td_proc;
403 
404 	if (which >= RLIM_NLIMITS)
405 		return (EINVAL);
406 
407 	*limp = p->p_rlimit[which];
408 
409 	return (0);
410 }
411 
412 int
413 getrlimit(struct __getrlimit_args *uap)
414 {
415 	struct rlimit lim;
416 	int error;
417 
418 	error = kern_getrlimit(uap->which, &lim);
419 
420 	if (error == 0)
421 		error = copyout(&lim, uap->rlp, sizeof(*uap->rlp));
422 	return error;
423 }
424 
425 /*
426  * Transform the running time and tick information in proc p into user,
427  * system, and interrupt time usage.
428  *
429  * Since we are limited to statclock tick granularity this is a statisical
430  * calculation which will be correct over the long haul, but should not be
431  * expected to measure fine grained deltas.
432  */
433 void
434 calcru(struct proc *p, struct timeval *up, struct timeval *sp,
435 	struct timeval *ip)
436 {
437 	struct thread *td = p->p_thread;
438 
439 	/*
440 	 * Calculate at the statclock level.  YYY if the thread is owned by
441 	 * another cpu we need to forward the request to the other cpu, or
442 	 * have a token to interlock the information.
443 	 */
444 	crit_enter();
445 	up->tv_sec = td->td_uticks / 1000000;
446 	up->tv_usec = td->td_uticks % 1000000;
447 	sp->tv_sec = td->td_sticks / 1000000;
448 	sp->tv_usec = td->td_sticks % 1000000;
449 	if (ip != NULL) {
450 		ip->tv_sec = td->td_iticks / 1000000;
451 		ip->tv_usec = td->td_iticks % 1000000;
452 	}
453 	crit_exit();
454 }
455 
456 /* ARGSUSED */
457 int
458 getrusage(struct getrusage_args *uap)
459 {
460 	struct proc *p = curproc;
461 	struct rusage *rup;
462 
463 	switch (uap->who) {
464 
465 	case RUSAGE_SELF:
466 		rup = &p->p_stats->p_ru;
467 		calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
468 		break;
469 
470 	case RUSAGE_CHILDREN:
471 		rup = &p->p_stats->p_cru;
472 		break;
473 
474 	default:
475 		return (EINVAL);
476 	}
477 	return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
478 	    sizeof (struct rusage)));
479 }
480 
481 void
482 ruadd(struct rusage *ru, struct rusage *ru2)
483 {
484 	long *ip, *ip2;
485 	int i;
486 
487 	timevaladd(&ru->ru_utime, &ru2->ru_utime);
488 	timevaladd(&ru->ru_stime, &ru2->ru_stime);
489 	if (ru->ru_maxrss < ru2->ru_maxrss)
490 		ru->ru_maxrss = ru2->ru_maxrss;
491 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
492 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
493 		*ip++ += *ip2++;
494 }
495 
496 /*
497  * Make a copy of the plimit structure.
498  * We share these structures copy-on-write after fork,
499  * and copy when a limit is changed.
500  */
501 struct plimit *
502 limcopy(struct plimit *lim)
503 {
504 	struct plimit *copy;
505 
506 	MALLOC(copy, struct plimit *, sizeof(struct plimit),
507 	    M_SUBPROC, M_WAITOK);
508 	bcopy(lim->pl_rlimit, copy->pl_rlimit, sizeof(struct plimit));
509 	copy->p_lflags = 0;
510 	copy->p_refcnt = 1;
511 	return (copy);
512 }
513 
514 /*
515  * Find the uidinfo structure for a uid.  This structure is used to
516  * track the total resource consumption (process count, socket buffer
517  * size, etc.) for the uid and impose limits.
518  */
519 void
520 uihashinit(void)
521 {
522 	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
523 }
524 
525 static struct uidinfo *
526 uilookup(uid_t uid)
527 {
528 	struct	uihashhead *uipp;
529 	struct	uidinfo *uip;
530 
531 	uipp = UIHASH(uid);
532 	LIST_FOREACH(uip, uipp, ui_hash) {
533 		if (uip->ui_uid == uid)
534 			break;
535 	}
536 	return (uip);
537 }
538 
539 static struct uidinfo *
540 uicreate(uid_t uid)
541 {
542 	struct	uidinfo *uip, *norace;
543 
544 	/*
545 	 * Allocate space and check for a race
546 	 */
547 	MALLOC(uip, struct uidinfo *, sizeof(*uip), M_UIDINFO, M_WAITOK);
548 	norace = uilookup(uid);
549 	if (norace != NULL) {
550 		FREE(uip, M_UIDINFO);
551 		return (norace);
552 	}
553 
554 	/*
555 	 * Initialize structure and enter it into the hash table
556 	 */
557 	LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
558 	uip->ui_uid = uid;
559 	uip->ui_proccnt = 0;
560 	uip->ui_sbsize = 0;
561 	uip->ui_ref = 0;
562 	varsymset_init(&uip->ui_varsymset, NULL);
563 	return (uip);
564 }
565 
566 struct uidinfo *
567 uifind(uid_t uid)
568 {
569 	struct	uidinfo *uip;
570 
571 	uip = uilookup(uid);
572 	if (uip == NULL)
573 		uip = uicreate(uid);
574 	uip->ui_ref++;
575 	return (uip);
576 }
577 
578 static __inline void
579 uifree(struct uidinfo *uip)
580 {
581 	if (uip->ui_sbsize != 0)
582 		/* XXX no %qd in kernel.  Truncate. */
583 		printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
584 		    uip->ui_uid, (long)uip->ui_sbsize);
585 	if (uip->ui_proccnt != 0)
586 		printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
587 		    uip->ui_uid, uip->ui_proccnt);
588 	LIST_REMOVE(uip, ui_hash);
589 	varsymset_clean(&uip->ui_varsymset);
590 	FREE(uip, M_UIDINFO);
591 }
592 
593 void
594 uihold(struct uidinfo *uip)
595 {
596 	++uip->ui_ref;
597 	KKASSERT(uip->ui_ref > 0);
598 }
599 
600 void
601 uidrop(struct uidinfo *uip)
602 {
603 	KKASSERT(uip->ui_ref > 0);
604 	if (--uip->ui_ref == 0)
605 		uifree(uip);
606 }
607 
608 void
609 uireplace(struct uidinfo **puip, struct uidinfo *nuip)
610 {
611 	uidrop(*puip);
612 	*puip = nuip;
613 }
614 
615 /*
616  * Change the count associated with number of processes
617  * a given user is using.  When 'max' is 0, don't enforce a limit
618  */
619 int
620 chgproccnt(struct uidinfo *uip, int diff, int max)
621 {
622 	/* don't allow them to exceed max, but allow subtraction */
623 	if (diff > 0 && uip->ui_proccnt + diff > max && max != 0)
624 		return (0);
625 	uip->ui_proccnt += diff;
626 	if (uip->ui_proccnt < 0)
627 		printf("negative proccnt for uid = %d\n", uip->ui_uid);
628 	return (1);
629 }
630 
631 /*
632  * Change the total socket buffer size a user has used.
633  */
634 int
635 chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t max)
636 {
637 	rlim_t new;
638 	int s;
639 
640 	s = splnet();
641 	new = uip->ui_sbsize + to - *hiwat;
642 	/* don't allow them to exceed max, but allow subtraction */
643 	if (to > *hiwat && new > max) {
644 		splx(s);
645 		return (0);
646 	}
647 	uip->ui_sbsize = new;
648 	*hiwat = to;
649 	if (uip->ui_sbsize < 0)
650 		printf("negative sbsize for uid = %d\n", uip->ui_uid);
651 	splx(s);
652 	return (1);
653 }
654 
655