xref: /openbsd-src/sys/kern/kern_resource.c (revision 81203fd30940ed2bf252836c3192dac5f25bc4a9)
1 /*	$OpenBSD: kern_resource.c,v 1.64 2019/06/10 03:15:53 visa Exp $	*/
2 /*	$NetBSD: kern_resource.c,v 1.38 1996/10/23 07:19:38 matthias Exp $	*/
3 
4 /*-
5  * Copyright (c) 1982, 1986, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/file.h>
44 #include <sys/resourcevar.h>
45 #include <sys/pool.h>
46 #include <sys/proc.h>
47 #include <sys/ktrace.h>
48 #include <sys/sched.h>
49 #include <sys/signalvar.h>
50 
51 #include <sys/mount.h>
52 #include <sys/syscallargs.h>
53 
54 #include <uvm/uvm_extern.h>
55 
56 /* SIGXCPU interval in seconds of process runtime */
57 #define SIGXCPU_INTERVAL	5
58 
59 void	tuagg_sub(struct tusage *, struct proc *);
60 
61 /*
62  * Patchable maximum data and stack limits.
63  */
64 rlim_t maxdmap = MAXDSIZ;
65 rlim_t maxsmap = MAXSSIZ;
66 
67 /*
68  * Resource controls and accounting.
69  */
70 
71 int
72 sys_getpriority(struct proc *curp, void *v, register_t *retval)
73 {
74 	struct sys_getpriority_args /* {
75 		syscallarg(int) which;
76 		syscallarg(id_t) who;
77 	} */ *uap = v;
78 	struct process *pr;
79 	int low = NZERO + PRIO_MAX + 1;
80 
81 	switch (SCARG(uap, which)) {
82 
83 	case PRIO_PROCESS:
84 		if (SCARG(uap, who) == 0)
85 			pr = curp->p_p;
86 		else
87 			pr = prfind(SCARG(uap, who));
88 		if (pr == NULL)
89 			break;
90 		if (pr->ps_nice < low)
91 			low = pr->ps_nice;
92 		break;
93 
94 	case PRIO_PGRP: {
95 		struct pgrp *pg;
96 
97 		if (SCARG(uap, who) == 0)
98 			pg = curp->p_p->ps_pgrp;
99 		else if ((pg = pgfind(SCARG(uap, who))) == NULL)
100 			break;
101 		LIST_FOREACH(pr, &pg->pg_members, ps_pglist)
102 			if (pr->ps_nice < low)
103 				low = pr->ps_nice;
104 		break;
105 	}
106 
107 	case PRIO_USER:
108 		if (SCARG(uap, who) == 0)
109 			SCARG(uap, who) = curp->p_ucred->cr_uid;
110 		LIST_FOREACH(pr, &allprocess, ps_list)
111 			if (pr->ps_ucred->cr_uid == SCARG(uap, who) &&
112 			    pr->ps_nice < low)
113 				low = pr->ps_nice;
114 		break;
115 
116 	default:
117 		return (EINVAL);
118 	}
119 	if (low == NZERO + PRIO_MAX + 1)
120 		return (ESRCH);
121 	*retval = low - NZERO;
122 	return (0);
123 }
124 
125 int
126 sys_setpriority(struct proc *curp, void *v, register_t *retval)
127 {
128 	struct sys_setpriority_args /* {
129 		syscallarg(int) which;
130 		syscallarg(id_t) who;
131 		syscallarg(int) prio;
132 	} */ *uap = v;
133 	struct process *pr;
134 	int found = 0, error = 0;
135 
136 	switch (SCARG(uap, which)) {
137 
138 	case PRIO_PROCESS:
139 		if (SCARG(uap, who) == 0)
140 			pr = curp->p_p;
141 		else
142 			pr = prfind(SCARG(uap, who));
143 		if (pr == NULL)
144 			break;
145 		error = donice(curp, pr, SCARG(uap, prio));
146 		found++;
147 		break;
148 
149 	case PRIO_PGRP: {
150 		struct pgrp *pg;
151 
152 		if (SCARG(uap, who) == 0)
153 			pg = curp->p_p->ps_pgrp;
154 		else if ((pg = pgfind(SCARG(uap, who))) == NULL)
155 			break;
156 		LIST_FOREACH(pr, &pg->pg_members, ps_pglist) {
157 			error = donice(curp, pr, SCARG(uap, prio));
158 			found++;
159 		}
160 		break;
161 	}
162 
163 	case PRIO_USER:
164 		if (SCARG(uap, who) == 0)
165 			SCARG(uap, who) = curp->p_ucred->cr_uid;
166 		LIST_FOREACH(pr, &allprocess, ps_list)
167 			if (pr->ps_ucred->cr_uid == SCARG(uap, who)) {
168 				error = donice(curp, pr, SCARG(uap, prio));
169 				found++;
170 			}
171 		break;
172 
173 	default:
174 		return (EINVAL);
175 	}
176 	if (found == 0)
177 		return (ESRCH);
178 	return (error);
179 }
180 
181 int
182 donice(struct proc *curp, struct process *chgpr, int n)
183 {
184 	struct ucred *ucred = curp->p_ucred;
185 	struct proc *p;
186 	int s;
187 
188 	if (ucred->cr_uid != 0 && ucred->cr_ruid != 0 &&
189 	    ucred->cr_uid != chgpr->ps_ucred->cr_uid &&
190 	    ucred->cr_ruid != chgpr->ps_ucred->cr_uid)
191 		return (EPERM);
192 	if (n > PRIO_MAX)
193 		n = PRIO_MAX;
194 	if (n < PRIO_MIN)
195 		n = PRIO_MIN;
196 	n += NZERO;
197 	if (n < chgpr->ps_nice && suser(curp))
198 		return (EACCES);
199 	chgpr->ps_nice = n;
200 	SCHED_LOCK(s);
201 	TAILQ_FOREACH(p, &chgpr->ps_threads, p_thr_link)
202 		(void)resetpriority(p);
203 	SCHED_UNLOCK(s);
204 	return (0);
205 }
206 
207 int
208 sys_setrlimit(struct proc *p, void *v, register_t *retval)
209 {
210 	struct sys_setrlimit_args /* {
211 		syscallarg(int) which;
212 		syscallarg(const struct rlimit *) rlp;
213 	} */ *uap = v;
214 	struct rlimit alim;
215 	int error;
216 
217 	error = copyin((caddr_t)SCARG(uap, rlp), (caddr_t)&alim,
218 		       sizeof (struct rlimit));
219 	if (error)
220 		return (error);
221 #ifdef KTRACE
222 	if (KTRPOINT(p, KTR_STRUCT))
223 		ktrrlimit(p, &alim);
224 #endif
225 	return (dosetrlimit(p, SCARG(uap, which), &alim));
226 }
227 
228 int
229 dosetrlimit(struct proc *p, u_int which, struct rlimit *limp)
230 {
231 	struct rlimit *alimp;
232 	rlim_t maxlim;
233 	int error;
234 
235 	if (which >= RLIM_NLIMITS || limp->rlim_cur > limp->rlim_max)
236 		return (EINVAL);
237 
238 	alimp = &p->p_rlimit[which];
239 	if (limp->rlim_max > alimp->rlim_max)
240 		if ((error = suser(p)) != 0)
241 			return (error);
242 	if (p->p_p->ps_limit->pl_refcnt > 1) {
243 		struct plimit *l = p->p_p->ps_limit;
244 
245 		/* limcopy() can sleep, so copy before decrementing refcnt */
246 		p->p_p->ps_limit = limcopy(l);
247 		limfree(l);
248 		alimp = &p->p_rlimit[which];
249 	}
250 
251 	switch (which) {
252 	case RLIMIT_DATA:
253 		maxlim = maxdmap;
254 		break;
255 	case RLIMIT_STACK:
256 		maxlim = maxsmap;
257 		break;
258 	case RLIMIT_NOFILE:
259 		maxlim = maxfiles;
260 		break;
261 	case RLIMIT_NPROC:
262 		maxlim = maxprocess;
263 		break;
264 	default:
265 		maxlim = RLIM_INFINITY;
266 		break;
267 	}
268 
269 	if (limp->rlim_max > maxlim)
270 		limp->rlim_max = maxlim;
271 	if (limp->rlim_cur > limp->rlim_max)
272 		limp->rlim_cur = limp->rlim_max;
273 
274 	if (which == RLIMIT_CPU && limp->rlim_cur != RLIM_INFINITY &&
275 	    alimp->rlim_cur == RLIM_INFINITY)
276 		timeout_add_msec(&p->p_p->ps_rucheck_to, RUCHECK_INTERVAL);
277 
278 	if (which == RLIMIT_STACK) {
279 		/*
280 		 * Stack is allocated to the max at exec time with only
281 		 * "rlim_cur" bytes accessible.  If stack limit is going
282 		 * up make more accessible, if going down make inaccessible.
283 		 */
284 		if (limp->rlim_cur != alimp->rlim_cur) {
285 			vaddr_t addr;
286 			vsize_t size;
287 			vm_prot_t prot;
288 			struct vmspace *vm = p->p_vmspace;
289 
290 			if (limp->rlim_cur > alimp->rlim_cur) {
291 				prot = PROT_READ | PROT_WRITE;
292 				size = limp->rlim_cur - alimp->rlim_cur;
293 #ifdef MACHINE_STACK_GROWS_UP
294 				addr = (vaddr_t)vm->vm_maxsaddr +
295 				    alimp->rlim_cur;
296 #else
297 				addr = (vaddr_t)vm->vm_minsaddr -
298 				    limp->rlim_cur;
299 #endif
300 			} else {
301 				prot = PROT_NONE;
302 				size = alimp->rlim_cur - limp->rlim_cur;
303 #ifdef MACHINE_STACK_GROWS_UP
304 				addr = (vaddr_t)vm->vm_maxsaddr +
305 				    limp->rlim_cur;
306 #else
307 				addr = (vaddr_t)vm->vm_minsaddr -
308 				    alimp->rlim_cur;
309 #endif
310 			}
311 			addr = trunc_page(addr);
312 			size = round_page(size);
313 			(void) uvm_map_protect(&vm->vm_map,
314 					      addr, addr+size, prot, FALSE);
315 		}
316 	}
317 
318 	*alimp = *limp;
319 	return (0);
320 }
321 
322 int
323 sys_getrlimit(struct proc *p, void *v, register_t *retval)
324 {
325 	struct sys_getrlimit_args /* {
326 		syscallarg(int) which;
327 		syscallarg(struct rlimit *) rlp;
328 	} */ *uap = v;
329 	struct rlimit *alimp;
330 	int error;
331 
332 	if (SCARG(uap, which) < 0 || SCARG(uap, which) >= RLIM_NLIMITS)
333 		return (EINVAL);
334 	alimp = &p->p_rlimit[SCARG(uap, which)];
335 	error = copyout(alimp, SCARG(uap, rlp), sizeof(struct rlimit));
336 #ifdef KTRACE
337 	if (error == 0 && KTRPOINT(p, KTR_STRUCT))
338 		ktrrlimit(p, alimp);
339 #endif
340 	return (error);
341 }
342 
343 void
344 tuagg_sub(struct tusage *tup, struct proc *p)
345 {
346 	timespecadd(&tup->tu_runtime, &p->p_rtime, &tup->tu_runtime);
347 	tup->tu_uticks += p->p_uticks;
348 	tup->tu_sticks += p->p_sticks;
349 	tup->tu_iticks += p->p_iticks;
350 }
351 
352 /*
353  * Aggregate a single thread's immediate time counts into the running
354  * totals for the thread and process
355  */
356 void
357 tuagg_unlocked(struct process *pr, struct proc *p)
358 {
359 	tuagg_sub(&pr->ps_tu, p);
360 	tuagg_sub(&p->p_tu, p);
361 	timespecclear(&p->p_rtime);
362 	p->p_uticks = 0;
363 	p->p_sticks = 0;
364 	p->p_iticks = 0;
365 }
366 
367 void
368 tuagg(struct process *pr, struct proc *p)
369 {
370 	int s;
371 
372 	SCHED_LOCK(s);
373 	tuagg_unlocked(pr, p);
374 	SCHED_UNLOCK(s);
375 }
376 
377 /*
378  * Transform the running time and tick information in a struct tusage
379  * into user, system, and interrupt time usage.
380  */
381 void
382 calctsru(struct tusage *tup, struct timespec *up, struct timespec *sp,
383     struct timespec *ip)
384 {
385 	u_quad_t st, ut, it;
386 	int freq;
387 
388 	st = tup->tu_sticks;
389 	ut = tup->tu_uticks;
390 	it = tup->tu_iticks;
391 
392 	if (st + ut + it == 0) {
393 		timespecclear(up);
394 		timespecclear(sp);
395 		if (ip != NULL)
396 			timespecclear(ip);
397 		return;
398 	}
399 
400 	freq = stathz ? stathz : hz;
401 
402 	st = st * 1000000000 / freq;
403 	sp->tv_sec = st / 1000000000;
404 	sp->tv_nsec = st % 1000000000;
405 	ut = ut * 1000000000 / freq;
406 	up->tv_sec = ut / 1000000000;
407 	up->tv_nsec = ut % 1000000000;
408 	if (ip != NULL) {
409 		it = it * 1000000000 / freq;
410 		ip->tv_sec = it / 1000000000;
411 		ip->tv_nsec = it % 1000000000;
412 	}
413 }
414 
415 void
416 calcru(struct tusage *tup, struct timeval *up, struct timeval *sp,
417     struct timeval *ip)
418 {
419 	struct timespec u, s, i;
420 
421 	calctsru(tup, &u, &s, ip != NULL ? &i : NULL);
422 	TIMESPEC_TO_TIMEVAL(up, &u);
423 	TIMESPEC_TO_TIMEVAL(sp, &s);
424 	if (ip != NULL)
425 		TIMESPEC_TO_TIMEVAL(ip, &i);
426 }
427 
428 int
429 sys_getrusage(struct proc *p, void *v, register_t *retval)
430 {
431 	struct sys_getrusage_args /* {
432 		syscallarg(int) who;
433 		syscallarg(struct rusage *) rusage;
434 	} */ *uap = v;
435 	struct rusage ru;
436 	int error;
437 
438 	error = dogetrusage(p, SCARG(uap, who), &ru);
439 	if (error == 0) {
440 		error = copyout(&ru, SCARG(uap, rusage), sizeof(ru));
441 #ifdef KTRACE
442 		if (error == 0 && KTRPOINT(p, KTR_STRUCT))
443 			ktrrusage(p, &ru);
444 #endif
445 	}
446 	return (error);
447 }
448 
449 int
450 dogetrusage(struct proc *p, int who, struct rusage *rup)
451 {
452 	struct process *pr = p->p_p;
453 	struct proc *q;
454 
455 	switch (who) {
456 
457 	case RUSAGE_SELF:
458 		/* start with the sum of dead threads, if any */
459 		if (pr->ps_ru != NULL)
460 			*rup = *pr->ps_ru;
461 		else
462 			memset(rup, 0, sizeof(*rup));
463 
464 		/* add on all living threads */
465 		TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
466 			ruadd(rup, &q->p_ru);
467 			tuagg(pr, q);
468 		}
469 
470 		calcru(&pr->ps_tu, &rup->ru_utime, &rup->ru_stime, NULL);
471 		break;
472 
473 	case RUSAGE_THREAD:
474 		*rup = p->p_ru;
475 		calcru(&p->p_tu, &rup->ru_utime, &rup->ru_stime, NULL);
476 		break;
477 
478 	case RUSAGE_CHILDREN:
479 		*rup = pr->ps_cru;
480 		break;
481 
482 	default:
483 		return (EINVAL);
484 	}
485 	return (0);
486 }
487 
488 void
489 ruadd(struct rusage *ru, struct rusage *ru2)
490 {
491 	long *ip, *ip2;
492 	int i;
493 
494 	timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
495 	timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
496 	if (ru->ru_maxrss < ru2->ru_maxrss)
497 		ru->ru_maxrss = ru2->ru_maxrss;
498 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
499 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
500 		*ip++ += *ip2++;
501 }
502 
503 /*
504  * Check if the process exceeds its cpu resource allocation.
505  * If over max, kill it.
506  */
507 void
508 rucheck(void *arg)
509 {
510 	struct process *pr = arg;
511 	struct rlimit *rlim;
512 	time_t runtime;
513 	int s;
514 
515 	KERNEL_ASSERT_LOCKED();
516 
517 	SCHED_LOCK(s);
518 	runtime = pr->ps_tu.tu_runtime.tv_sec;
519 	SCHED_UNLOCK(s);
520 
521 	rlim = &pr->ps_limit->pl_rlimit[RLIMIT_CPU];
522 	if ((rlim_t)runtime >= rlim->rlim_cur) {
523 		if ((rlim_t)runtime >= rlim->rlim_max) {
524 			prsignal(pr, SIGKILL);
525 		} else if (runtime >= pr->ps_nextxcpu) {
526 			prsignal(pr, SIGXCPU);
527 			pr->ps_nextxcpu = runtime + SIGXCPU_INTERVAL;
528 		}
529 	}
530 
531 	timeout_add_msec(&pr->ps_rucheck_to, RUCHECK_INTERVAL);
532 }
533 
534 struct pool plimit_pool;
535 
536 void
537 lim_startup(struct plimit *limit0)
538 {
539 	rlim_t lim;
540 	int i;
541 
542 	pool_init(&plimit_pool, sizeof(struct plimit), 0, IPL_MPFLOOR,
543 	    PR_WAITOK, "plimitpl", NULL);
544 
545 	for (i = 0; i < nitems(limit0->pl_rlimit); i++)
546 		limit0->pl_rlimit[i].rlim_cur =
547 		    limit0->pl_rlimit[i].rlim_max = RLIM_INFINITY;
548 	limit0->pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE;
549 	limit0->pl_rlimit[RLIMIT_NOFILE].rlim_max = MIN(NOFILE_MAX,
550 	    (maxfiles - NOFILE > NOFILE) ? maxfiles - NOFILE : NOFILE);
551 	limit0->pl_rlimit[RLIMIT_NPROC].rlim_cur = MAXUPRC;
552 	lim = ptoa(uvmexp.free);
553 	limit0->pl_rlimit[RLIMIT_RSS].rlim_max = lim;
554 	limit0->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim;
555 	limit0->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
556 	limit0->pl_refcnt = 1;
557 }
558 
559 /*
560  * Make a copy of the plimit structure.
561  * We share these structures copy-on-write after fork,
562  * and copy when a limit is changed.
563  */
564 struct plimit *
565 limcopy(struct plimit *lim)
566 {
567 	struct plimit *newlim;
568 
569 	newlim = pool_get(&plimit_pool, PR_WAITOK);
570 	memcpy(newlim->pl_rlimit, lim->pl_rlimit,
571 	    sizeof(struct rlimit) * RLIM_NLIMITS);
572 	newlim->pl_refcnt = 1;
573 	return (newlim);
574 }
575 
576 void
577 limfree(struct plimit *lim)
578 {
579 	if (--lim->pl_refcnt > 0)
580 		return;
581 	pool_put(&plimit_pool, lim);
582 }
583