1 /* $NetBSD: kern_resource.c,v 1.195 2023/10/04 20:28:06 ad Exp $ */
2
3 /*-
4 * Copyright (c) 1982, 1986, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)kern_resource.c 8.8 (Berkeley) 2/14/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: kern_resource.c,v 1.195 2023/10/04 20:28:06 ad Exp $");
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/file.h>
46 #include <sys/resourcevar.h>
47 #include <sys/kmem.h>
48 #include <sys/namei.h>
49 #include <sys/pool.h>
50 #include <sys/proc.h>
51 #include <sys/sysctl.h>
52 #include <sys/timevar.h>
53 #include <sys/kauth.h>
54 #include <sys/atomic.h>
55 #include <sys/mount.h>
56 #include <sys/syscallargs.h>
57 #include <sys/atomic.h>
58
59 #include <uvm/uvm_extern.h>
60
61 /*
62 * Maximum process data and stack limits.
63 * They are variables so they are patchable.
64 */
65 rlim_t maxdmap = MAXDSIZ;
66 rlim_t maxsmap = MAXSSIZ;
67
68 static kauth_listener_t resource_listener;
69 static struct sysctllog *proc_sysctllog;
70
71 static int donice(struct lwp *, struct proc *, int);
72 static void sysctl_proc_setup(void);
73
74 static int
resource_listener_cb(kauth_cred_t cred,kauth_action_t action,void * cookie,void * arg0,void * arg1,void * arg2,void * arg3)75 resource_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
76 void *arg0, void *arg1, void *arg2, void *arg3)
77 {
78 struct proc *p;
79 int result;
80
81 result = KAUTH_RESULT_DEFER;
82 p = arg0;
83
84 switch (action) {
85 case KAUTH_PROCESS_NICE:
86 if (kauth_cred_geteuid(cred) != kauth_cred_geteuid(p->p_cred) &&
87 kauth_cred_getuid(cred) != kauth_cred_geteuid(p->p_cred)) {
88 break;
89 }
90
91 if ((u_long)arg1 >= p->p_nice)
92 result = KAUTH_RESULT_ALLOW;
93
94 break;
95
96 case KAUTH_PROCESS_RLIMIT: {
97 enum kauth_process_req req;
98
99 req = (enum kauth_process_req)(uintptr_t)arg1;
100
101 switch (req) {
102 case KAUTH_REQ_PROCESS_RLIMIT_GET:
103 result = KAUTH_RESULT_ALLOW;
104 break;
105
106 case KAUTH_REQ_PROCESS_RLIMIT_SET: {
107 struct rlimit *new_rlimit;
108 u_long which;
109
110 if ((p != curlwp->l_proc) &&
111 (proc_uidmatch(cred, p->p_cred) != 0))
112 break;
113
114 new_rlimit = arg2;
115 which = (u_long)arg3;
116
117 if (new_rlimit->rlim_max <= p->p_rlimit[which].rlim_max)
118 result = KAUTH_RESULT_ALLOW;
119
120 break;
121 }
122
123 default:
124 break;
125 }
126
127 break;
128 }
129
130 default:
131 break;
132 }
133
134 return result;
135 }
136
137 void
resource_init(void)138 resource_init(void)
139 {
140
141 resource_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
142 resource_listener_cb, NULL);
143
144 sysctl_proc_setup();
145 }
146
147 /*
148 * Resource controls and accounting.
149 */
150
151 int
sys_getpriority(struct lwp * l,const struct sys_getpriority_args * uap,register_t * retval)152 sys_getpriority(struct lwp *l, const struct sys_getpriority_args *uap,
153 register_t *retval)
154 {
155 /* {
156 syscallarg(int) which;
157 syscallarg(id_t) who;
158 } */
159 struct proc *curp = l->l_proc, *p;
160 id_t who = SCARG(uap, who);
161 int low = NZERO + PRIO_MAX + 1;
162
163 mutex_enter(&proc_lock);
164 switch (SCARG(uap, which)) {
165 case PRIO_PROCESS:
166 p = who ? proc_find(who) : curp;
167 if (p != NULL)
168 low = p->p_nice;
169 break;
170
171 case PRIO_PGRP: {
172 struct pgrp *pg;
173
174 if (who == 0)
175 pg = curp->p_pgrp;
176 else if ((pg = pgrp_find(who)) == NULL)
177 break;
178 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
179 if (p->p_nice < low)
180 low = p->p_nice;
181 }
182 break;
183 }
184
185 case PRIO_USER:
186 if (who == 0)
187 who = (int)kauth_cred_geteuid(l->l_cred);
188 PROCLIST_FOREACH(p, &allproc) {
189 mutex_enter(p->p_lock);
190 if (kauth_cred_geteuid(p->p_cred) ==
191 (uid_t)who && p->p_nice < low)
192 low = p->p_nice;
193 mutex_exit(p->p_lock);
194 }
195 break;
196
197 default:
198 mutex_exit(&proc_lock);
199 return EINVAL;
200 }
201 mutex_exit(&proc_lock);
202
203 if (low == NZERO + PRIO_MAX + 1) {
204 return ESRCH;
205 }
206 *retval = low - NZERO;
207 return 0;
208 }
209
210 int
sys_setpriority(struct lwp * l,const struct sys_setpriority_args * uap,register_t * retval)211 sys_setpriority(struct lwp *l, const struct sys_setpriority_args *uap,
212 register_t *retval)
213 {
214 /* {
215 syscallarg(int) which;
216 syscallarg(id_t) who;
217 syscallarg(int) prio;
218 } */
219 struct proc *curp = l->l_proc, *p;
220 id_t who = SCARG(uap, who);
221 int found = 0, error = 0;
222
223 mutex_enter(&proc_lock);
224 switch (SCARG(uap, which)) {
225 case PRIO_PROCESS:
226 p = who ? proc_find(who) : curp;
227 if (p != NULL) {
228 mutex_enter(p->p_lock);
229 found++;
230 error = donice(l, p, SCARG(uap, prio));
231 mutex_exit(p->p_lock);
232 }
233 break;
234
235 case PRIO_PGRP: {
236 struct pgrp *pg;
237
238 if (who == 0)
239 pg = curp->p_pgrp;
240 else if ((pg = pgrp_find(who)) == NULL)
241 break;
242 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
243 mutex_enter(p->p_lock);
244 found++;
245 error = donice(l, p, SCARG(uap, prio));
246 mutex_exit(p->p_lock);
247 if (error)
248 break;
249 }
250 break;
251 }
252
253 case PRIO_USER:
254 if (who == 0)
255 who = (int)kauth_cred_geteuid(l->l_cred);
256 PROCLIST_FOREACH(p, &allproc) {
257 mutex_enter(p->p_lock);
258 if (kauth_cred_geteuid(p->p_cred) ==
259 (uid_t)SCARG(uap, who)) {
260 found++;
261 error = donice(l, p, SCARG(uap, prio));
262 }
263 mutex_exit(p->p_lock);
264 if (error)
265 break;
266 }
267 break;
268
269 default:
270 mutex_exit(&proc_lock);
271 return EINVAL;
272 }
273 mutex_exit(&proc_lock);
274
275 return (found == 0) ? ESRCH : error;
276 }
277
278 /*
279 * Renice a process.
280 *
281 * Call with the target process' credentials locked.
282 */
283 static int
donice(struct lwp * l,struct proc * chgp,int n)284 donice(struct lwp *l, struct proc *chgp, int n)
285 {
286 kauth_cred_t cred = l->l_cred;
287
288 KASSERT(mutex_owned(chgp->p_lock));
289
290 if (kauth_cred_geteuid(cred) && kauth_cred_getuid(cred) &&
291 kauth_cred_geteuid(cred) != kauth_cred_geteuid(chgp->p_cred) &&
292 kauth_cred_getuid(cred) != kauth_cred_geteuid(chgp->p_cred))
293 return EPERM;
294
295 if (n > PRIO_MAX) {
296 n = PRIO_MAX;
297 }
298 if (n < PRIO_MIN) {
299 n = PRIO_MIN;
300 }
301 n += NZERO;
302
303 if (kauth_authorize_process(cred, KAUTH_PROCESS_NICE, chgp,
304 KAUTH_ARG(n), NULL, NULL)) {
305 return EACCES;
306 }
307
308 sched_nice(chgp, n);
309 return 0;
310 }
311
312 int
sys_setrlimit(struct lwp * l,const struct sys_setrlimit_args * uap,register_t * retval)313 sys_setrlimit(struct lwp *l, const struct sys_setrlimit_args *uap,
314 register_t *retval)
315 {
316 /* {
317 syscallarg(int) which;
318 syscallarg(const struct rlimit *) rlp;
319 } */
320 int error, which = SCARG(uap, which);
321 struct rlimit alim;
322
323 error = copyin(SCARG(uap, rlp), &alim, sizeof(struct rlimit));
324 if (error) {
325 return error;
326 }
327 return dosetrlimit(l, l->l_proc, which, &alim);
328 }
329
330 int
dosetrlimit(struct lwp * l,struct proc * p,int which,struct rlimit * limp)331 dosetrlimit(struct lwp *l, struct proc *p, int which, struct rlimit *limp)
332 {
333 struct rlimit *alimp;
334 int error;
335
336 if ((u_int)which >= RLIM_NLIMITS)
337 return EINVAL;
338
339 if (limp->rlim_cur > limp->rlim_max) {
340 /*
341 * This is programming error. According to SUSv2, we should
342 * return error in this case.
343 */
344 return EINVAL;
345 }
346
347 alimp = &p->p_rlimit[which];
348 /* if we don't change the value, no need to limcopy() */
349 if (limp->rlim_cur == alimp->rlim_cur &&
350 limp->rlim_max == alimp->rlim_max)
351 return 0;
352
353 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
354 p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_SET), limp, KAUTH_ARG(which));
355 if (error)
356 return error;
357
358 lim_privatise(p);
359 /* p->p_limit is now unchangeable */
360 alimp = &p->p_rlimit[which];
361
362 switch (which) {
363
364 case RLIMIT_DATA:
365 if (limp->rlim_cur > maxdmap)
366 limp->rlim_cur = maxdmap;
367 if (limp->rlim_max > maxdmap)
368 limp->rlim_max = maxdmap;
369 break;
370
371 case RLIMIT_STACK:
372 if (limp->rlim_cur > maxsmap)
373 limp->rlim_cur = maxsmap;
374 if (limp->rlim_max > maxsmap)
375 limp->rlim_max = maxsmap;
376
377 /*
378 * Return EINVAL if the new stack size limit is lower than
379 * current usage. Otherwise, the process would get SIGSEGV the
380 * moment it would try to access anything on its current stack.
381 * This conforms to SUSv2.
382 */
383 if (btoc(limp->rlim_cur) < p->p_vmspace->vm_ssize ||
384 btoc(limp->rlim_max) < p->p_vmspace->vm_ssize) {
385 return EINVAL;
386 }
387
388 /*
389 * Stack is allocated to the max at exec time with
390 * only "rlim_cur" bytes accessible (In other words,
391 * allocates stack dividing two contiguous regions at
392 * "rlim_cur" bytes boundary).
393 *
394 * Since allocation is done in terms of page, roundup
395 * "rlim_cur" (otherwise, contiguous regions
396 * overlap). If stack limit is going up make more
397 * accessible, if going down make inaccessible.
398 */
399 limp->rlim_max = round_page(limp->rlim_max);
400 limp->rlim_cur = round_page(limp->rlim_cur);
401 if (limp->rlim_cur != alimp->rlim_cur) {
402 vaddr_t addr;
403 vsize_t size;
404 vm_prot_t prot;
405 char *base, *tmp;
406
407 base = p->p_vmspace->vm_minsaddr;
408 if (limp->rlim_cur > alimp->rlim_cur) {
409 prot = VM_PROT_READ | VM_PROT_WRITE;
410 size = limp->rlim_cur - alimp->rlim_cur;
411 tmp = STACK_GROW(base, alimp->rlim_cur);
412 } else {
413 prot = VM_PROT_NONE;
414 size = alimp->rlim_cur - limp->rlim_cur;
415 tmp = STACK_GROW(base, limp->rlim_cur);
416 }
417 addr = (vaddr_t)STACK_ALLOC(tmp, size);
418 (void) uvm_map_protect(&p->p_vmspace->vm_map,
419 addr, addr + size, prot, false);
420 }
421 break;
422
423 case RLIMIT_NOFILE:
424 if (limp->rlim_cur > maxfiles)
425 limp->rlim_cur = maxfiles;
426 if (limp->rlim_max > maxfiles)
427 limp->rlim_max = maxfiles;
428 break;
429
430 case RLIMIT_NPROC:
431 if (limp->rlim_cur > maxproc)
432 limp->rlim_cur = maxproc;
433 if (limp->rlim_max > maxproc)
434 limp->rlim_max = maxproc;
435 break;
436
437 case RLIMIT_NTHR:
438 if (limp->rlim_cur > maxlwp)
439 limp->rlim_cur = maxlwp;
440 if (limp->rlim_max > maxlwp)
441 limp->rlim_max = maxlwp;
442 break;
443 }
444
445 mutex_enter(&p->p_limit->pl_lock);
446 *alimp = *limp;
447 mutex_exit(&p->p_limit->pl_lock);
448 return 0;
449 }
450
451 int
sys_getrlimit(struct lwp * l,const struct sys_getrlimit_args * uap,register_t * retval)452 sys_getrlimit(struct lwp *l, const struct sys_getrlimit_args *uap,
453 register_t *retval)
454 {
455 /* {
456 syscallarg(int) which;
457 syscallarg(struct rlimit *) rlp;
458 } */
459 struct proc *p = l->l_proc;
460 int which = SCARG(uap, which);
461 struct rlimit rl;
462
463 if ((u_int)which >= RLIM_NLIMITS)
464 return EINVAL;
465
466 mutex_enter(p->p_lock);
467 memcpy(&rl, &p->p_rlimit[which], sizeof(rl));
468 mutex_exit(p->p_lock);
469
470 return copyout(&rl, SCARG(uap, rlp), sizeof(rl));
471 }
472
473 void
addrulwp(struct lwp * l,struct bintime * tm)474 addrulwp(struct lwp *l, struct bintime *tm)
475 {
476
477 lwp_lock(l);
478 bintime_add(tm, &l->l_rtime);
479 if ((l->l_pflag & LP_RUNNING) != 0 &&
480 (l->l_pflag & (LP_INTR | LP_TIMEINTR)) != LP_INTR) {
481 struct bintime diff;
482 /*
483 * Adjust for the current time slice. This is
484 * actually fairly important since the error
485 * here is on the order of a time quantum,
486 * which is much greater than the sampling
487 * error.
488 */
489 binuptime(&diff);
490 membar_consumer(); /* for softint_dispatch() */
491 bintime_sub(&diff, &l->l_stime);
492 bintime_add(tm, &diff);
493 }
494 lwp_unlock(l);
495 }
496
497 /*
498 * Transform the running time and tick information in proc p into user,
499 * system, and interrupt time usage.
500 *
501 * Should be called with p->p_lock held unless called from exit1().
502 */
503 void
calcru(struct proc * p,struct timeval * up,struct timeval * sp,struct timeval * ip,struct timeval * rp)504 calcru(struct proc *p, struct timeval *up, struct timeval *sp,
505 struct timeval *ip, struct timeval *rp)
506 {
507 uint64_t u, st, ut, it, tot, dt;
508 struct lwp *l;
509 struct bintime tm;
510 struct timeval tv;
511
512 KASSERT(p->p_stat == SDEAD || mutex_owned(p->p_lock));
513
514 mutex_spin_enter(&p->p_stmutex);
515 st = p->p_sticks;
516 ut = p->p_uticks;
517 it = p->p_iticks;
518 mutex_spin_exit(&p->p_stmutex);
519
520 tm = p->p_rtime;
521
522 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
523 addrulwp(l, &tm);
524 }
525
526 tot = st + ut + it;
527 bintime2timeval(&tm, &tv);
528 u = (uint64_t)tv.tv_sec * 1000000ul + tv.tv_usec;
529
530 if (tot == 0) {
531 /* No ticks, so can't use to share time out, split 50-50 */
532 st = ut = u / 2;
533 } else {
534 st = (u * st) / tot;
535 ut = (u * ut) / tot;
536 }
537
538 /*
539 * Try to avoid lying to the users (too much)
540 *
541 * Of course, user/sys time are based on sampling (ie: statistics)
542 * so that would be impossible, but convincing the mark
543 * that we have used less ?time this call than we had
544 * last time, is beyond reasonable... (the con fails!)
545 *
546 * Note that since actual used time cannot decrease, either
547 * utime or stime (or both) must be greater now than last time
548 * (or both the same) - if one seems to have decreased, hold
549 * it constant and steal the necessary bump from the other
550 * which must have increased.
551 */
552 if (p->p_xutime > ut) {
553 dt = p->p_xutime - ut;
554 st -= uimin(dt, st);
555 ut = p->p_xutime;
556 } else if (p->p_xstime > st) {
557 dt = p->p_xstime - st;
558 ut -= uimin(dt, ut);
559 st = p->p_xstime;
560 }
561
562 if (sp != NULL) {
563 p->p_xstime = st;
564 sp->tv_sec = st / 1000000;
565 sp->tv_usec = st % 1000000;
566 }
567 if (up != NULL) {
568 p->p_xutime = ut;
569 up->tv_sec = ut / 1000000;
570 up->tv_usec = ut % 1000000;
571 }
572 if (ip != NULL) {
573 if (it != 0) /* it != 0 --> tot != 0 */
574 it = (u * it) / tot;
575 ip->tv_sec = it / 1000000;
576 ip->tv_usec = it % 1000000;
577 }
578 if (rp != NULL) {
579 *rp = tv;
580 }
581 }
582
583 int
sys___getrusage50(struct lwp * l,const struct sys___getrusage50_args * uap,register_t * retval)584 sys___getrusage50(struct lwp *l, const struct sys___getrusage50_args *uap,
585 register_t *retval)
586 {
587 /* {
588 syscallarg(int) who;
589 syscallarg(struct rusage *) rusage;
590 } */
591 int error;
592 struct rusage ru;
593 struct proc *p = l->l_proc;
594
595 error = getrusage1(p, SCARG(uap, who), &ru);
596 if (error != 0)
597 return error;
598
599 return copyout(&ru, SCARG(uap, rusage), sizeof(ru));
600 }
601
602 int
getrusage1(struct proc * p,int who,struct rusage * ru)603 getrusage1(struct proc *p, int who, struct rusage *ru)
604 {
605
606 switch (who) {
607 case RUSAGE_SELF:
608 mutex_enter(p->p_lock);
609 ruspace(p);
610 memcpy(ru, &p->p_stats->p_ru, sizeof(*ru));
611 calcru(p, &ru->ru_utime, &ru->ru_stime, NULL, NULL);
612 rulwps(p, ru);
613 mutex_exit(p->p_lock);
614 break;
615 case RUSAGE_CHILDREN:
616 mutex_enter(p->p_lock);
617 memcpy(ru, &p->p_stats->p_cru, sizeof(*ru));
618 mutex_exit(p->p_lock);
619 break;
620 default:
621 return EINVAL;
622 }
623
624 return 0;
625 }
626
627 void
ruspace(struct proc * p)628 ruspace(struct proc *p)
629 {
630 struct vmspace *vm = p->p_vmspace;
631 struct rusage *ru = &p->p_stats->p_ru;
632
633 ru->ru_ixrss = vm->vm_tsize << (PAGE_SHIFT - 10);
634 ru->ru_idrss = vm->vm_dsize << (PAGE_SHIFT - 10);
635 ru->ru_isrss = vm->vm_ssize << (PAGE_SHIFT - 10);
636 #ifdef __HAVE_NO_PMAP_STATS
637 /* We don't keep track of the max so we get the current */
638 ru->ru_maxrss = vm_resident_count(vm) << (PAGE_SHIFT - 10);
639 #else
640 ru->ru_maxrss = vm->vm_rssmax << (PAGE_SHIFT - 10);
641 #endif
642 }
643
644 void
ruadd(struct rusage * ru,struct rusage * ru2)645 ruadd(struct rusage *ru, struct rusage *ru2)
646 {
647 long *ip, *ip2;
648 int i;
649
650 timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
651 timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
652 if (ru->ru_maxrss < ru2->ru_maxrss)
653 ru->ru_maxrss = ru2->ru_maxrss;
654 ip = &ru->ru_first; ip2 = &ru2->ru_first;
655 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
656 *ip++ += *ip2++;
657 }
658
659 void
rulwps(proc_t * p,struct rusage * ru)660 rulwps(proc_t *p, struct rusage *ru)
661 {
662 lwp_t *l;
663
664 KASSERT(mutex_owned(p->p_lock));
665
666 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
667 ruadd(ru, &l->l_ru);
668 }
669 }
670
671 /*
672 * lim_copy: make a copy of the plimit structure.
673 *
674 * We use copy-on-write after fork, and copy when a limit is changed.
675 */
676 struct plimit *
lim_copy(struct plimit * lim)677 lim_copy(struct plimit *lim)
678 {
679 struct plimit *newlim;
680 char *corename;
681 size_t alen, len;
682
683 newlim = kmem_alloc(sizeof(*newlim), KM_SLEEP);
684 mutex_init(&newlim->pl_lock, MUTEX_DEFAULT, IPL_NONE);
685 newlim->pl_writeable = false;
686 newlim->pl_refcnt = 1;
687 newlim->pl_sv_limit = NULL;
688
689 mutex_enter(&lim->pl_lock);
690 memcpy(newlim->pl_rlimit, lim->pl_rlimit,
691 sizeof(struct rlimit) * RLIM_NLIMITS);
692
693 /*
694 * Note: the common case is a use of default core name.
695 */
696 alen = 0;
697 corename = NULL;
698 for (;;) {
699 if (lim->pl_corename == defcorename) {
700 newlim->pl_corename = defcorename;
701 newlim->pl_cnlen = 0;
702 break;
703 }
704 len = lim->pl_cnlen;
705 if (len == alen) {
706 newlim->pl_corename = corename;
707 newlim->pl_cnlen = len;
708 memcpy(corename, lim->pl_corename, len);
709 corename = NULL;
710 break;
711 }
712 mutex_exit(&lim->pl_lock);
713 if (corename) {
714 kmem_free(corename, alen);
715 }
716 alen = len;
717 corename = kmem_alloc(alen, KM_SLEEP);
718 mutex_enter(&lim->pl_lock);
719 }
720 mutex_exit(&lim->pl_lock);
721
722 if (corename) {
723 kmem_free(corename, alen);
724 }
725 return newlim;
726 }
727
728 void
lim_addref(struct plimit * lim)729 lim_addref(struct plimit *lim)
730 {
731 atomic_inc_uint(&lim->pl_refcnt);
732 }
733
734 /*
735 * lim_privatise: give a process its own private plimit structure.
736 */
737 void
lim_privatise(proc_t * p)738 lim_privatise(proc_t *p)
739 {
740 struct plimit *lim = p->p_limit, *newlim;
741
742 if (lim->pl_writeable) {
743 return;
744 }
745
746 newlim = lim_copy(lim);
747
748 mutex_enter(p->p_lock);
749 if (p->p_limit->pl_writeable) {
750 /* Other thread won the race. */
751 mutex_exit(p->p_lock);
752 lim_free(newlim);
753 return;
754 }
755
756 /*
757 * Since p->p_limit can be accessed without locked held,
758 * old limit structure must not be deleted yet.
759 */
760 newlim->pl_sv_limit = p->p_limit;
761 newlim->pl_writeable = true;
762 p->p_limit = newlim;
763 mutex_exit(p->p_lock);
764 }
765
766 void
lim_setcorename(proc_t * p,char * name,size_t len)767 lim_setcorename(proc_t *p, char *name, size_t len)
768 {
769 struct plimit *lim;
770 char *oname;
771 size_t olen;
772
773 lim_privatise(p);
774 lim = p->p_limit;
775
776 mutex_enter(&lim->pl_lock);
777 oname = lim->pl_corename;
778 olen = lim->pl_cnlen;
779 lim->pl_corename = name;
780 lim->pl_cnlen = len;
781 mutex_exit(&lim->pl_lock);
782
783 if (oname != defcorename) {
784 kmem_free(oname, olen);
785 }
786 }
787
788 void
lim_free(struct plimit * lim)789 lim_free(struct plimit *lim)
790 {
791 struct plimit *sv_lim;
792
793 do {
794 membar_release();
795 if (atomic_dec_uint_nv(&lim->pl_refcnt) > 0) {
796 return;
797 }
798 membar_acquire();
799 if (lim->pl_corename != defcorename) {
800 kmem_free(lim->pl_corename, lim->pl_cnlen);
801 }
802 sv_lim = lim->pl_sv_limit;
803 mutex_destroy(&lim->pl_lock);
804 kmem_free(lim, sizeof(*lim));
805 } while ((lim = sv_lim) != NULL);
806 }
807
808 struct pstats *
pstatscopy(struct pstats * ps)809 pstatscopy(struct pstats *ps)
810 {
811 struct pstats *nps;
812 size_t len;
813
814 nps = kmem_alloc(sizeof(*nps), KM_SLEEP);
815
816 len = (char *)&nps->pstat_endzero - (char *)&nps->pstat_startzero;
817 memset(&nps->pstat_startzero, 0, len);
818
819 len = (char *)&nps->pstat_endcopy - (char *)&nps->pstat_startcopy;
820 memcpy(&nps->pstat_startcopy, &ps->pstat_startcopy, len);
821
822 return nps;
823 }
824
825 void
pstatsfree(struct pstats * ps)826 pstatsfree(struct pstats *ps)
827 {
828
829 kmem_free(ps, sizeof(*ps));
830 }
831
832 /*
833 * sysctl_proc_findproc: a routine for sysctl proc subtree helpers that
834 * need to pick a valid process by PID.
835 *
836 * => Hold a reference on the process, on success.
837 */
838 static int
sysctl_proc_findproc(lwp_t * l,pid_t pid,proc_t ** p2)839 sysctl_proc_findproc(lwp_t *l, pid_t pid, proc_t **p2)
840 {
841 proc_t *p;
842 int error;
843
844 if (pid == PROC_CURPROC) {
845 p = l->l_proc;
846 } else {
847 mutex_enter(&proc_lock);
848 p = proc_find(pid);
849 if (p == NULL) {
850 mutex_exit(&proc_lock);
851 return ESRCH;
852 }
853 }
854 error = rw_tryenter(&p->p_reflock, RW_READER) ? 0 : EBUSY;
855 if (pid != PROC_CURPROC) {
856 mutex_exit(&proc_lock);
857 }
858 *p2 = p;
859 return error;
860 }
861
862 /*
863 * sysctl_proc_paxflags: helper routine to get process's paxctl flags
864 */
865 static int
sysctl_proc_paxflags(SYSCTLFN_ARGS)866 sysctl_proc_paxflags(SYSCTLFN_ARGS)
867 {
868 struct proc *p;
869 struct sysctlnode node;
870 int paxflags;
871 int error;
872
873 /* First, validate the request. */
874 if (namelen != 0 || name[-1] != PROC_PID_PAXFLAGS)
875 return EINVAL;
876
877 /* Find the process. Hold a reference (p_reflock), if found. */
878 error = sysctl_proc_findproc(l, (pid_t)name[-2], &p);
879 if (error)
880 return error;
881
882 /* XXX-elad */
883 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, p,
884 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
885 if (error) {
886 rw_exit(&p->p_reflock);
887 return error;
888 }
889
890 /* Retrieve the limits. */
891 node = *rnode;
892 paxflags = p->p_pax;
893 node.sysctl_data = &paxflags;
894
895 error = sysctl_lookup(SYSCTLFN_CALL(&node));
896
897 /* If attempting to write new value, it's an error */
898 if (error == 0 && newp != NULL)
899 error = EACCES;
900
901 rw_exit(&p->p_reflock);
902 return error;
903 }
904
905 /*
906 * sysctl_proc_corename: helper routine to get or set the core file name
907 * for a process specified by PID.
908 */
909 static int
sysctl_proc_corename(SYSCTLFN_ARGS)910 sysctl_proc_corename(SYSCTLFN_ARGS)
911 {
912 struct proc *p;
913 struct plimit *lim;
914 char *cnbuf, *cname;
915 struct sysctlnode node;
916 size_t len;
917 int error;
918
919 /* First, validate the request. */
920 if (namelen != 0 || name[-1] != PROC_PID_CORENAME)
921 return EINVAL;
922
923 /* Find the process. Hold a reference (p_reflock), if found. */
924 error = sysctl_proc_findproc(l, (pid_t)name[-2], &p);
925 if (error)
926 return error;
927
928 /* XXX-elad */
929 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, p,
930 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
931 if (error) {
932 rw_exit(&p->p_reflock);
933 return error;
934 }
935
936 cnbuf = PNBUF_GET();
937
938 if (oldp) {
939 /* Get case: copy the core name into the buffer. */
940 error = kauth_authorize_process(l->l_cred,
941 KAUTH_PROCESS_CORENAME, p,
942 KAUTH_ARG(KAUTH_REQ_PROCESS_CORENAME_GET), NULL, NULL);
943 if (error) {
944 goto done;
945 }
946 lim = p->p_limit;
947 mutex_enter(&lim->pl_lock);
948 strlcpy(cnbuf, lim->pl_corename, MAXPATHLEN);
949 mutex_exit(&lim->pl_lock);
950 }
951
952 node = *rnode;
953 node.sysctl_data = cnbuf;
954 error = sysctl_lookup(SYSCTLFN_CALL(&node));
955
956 /* Return if error, or if caller is only getting the core name. */
957 if (error || newp == NULL) {
958 goto done;
959 }
960
961 /*
962 * Set case. Check permission and then validate new core name.
963 * It must be either "core", "/core", or end in ".core".
964 */
965 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CORENAME,
966 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CORENAME_SET), cnbuf, NULL);
967 if (error) {
968 goto done;
969 }
970 len = strlen(cnbuf);
971 if ((len < 4 || strcmp(cnbuf + len - 4, "core") != 0) ||
972 (len > 4 && cnbuf[len - 5] != '/' && cnbuf[len - 5] != '.')) {
973 error = EINVAL;
974 goto done;
975 }
976
977 /* Allocate, copy and set the new core name for plimit structure. */
978 cname = kmem_alloc(++len, KM_NOSLEEP);
979 if (cname == NULL) {
980 error = ENOMEM;
981 goto done;
982 }
983 memcpy(cname, cnbuf, len);
984 lim_setcorename(p, cname, len);
985 done:
986 rw_exit(&p->p_reflock);
987 PNBUF_PUT(cnbuf);
988 return error;
989 }
990
991 /*
992 * sysctl_proc_stop: helper routine for checking/setting the stop flags.
993 */
994 static int
sysctl_proc_stop(SYSCTLFN_ARGS)995 sysctl_proc_stop(SYSCTLFN_ARGS)
996 {
997 struct proc *p;
998 int isset, flag, error = 0;
999 struct sysctlnode node;
1000
1001 if (namelen != 0)
1002 return EINVAL;
1003
1004 /* Find the process. Hold a reference (p_reflock), if found. */
1005 error = sysctl_proc_findproc(l, (pid_t)name[-2], &p);
1006 if (error)
1007 return error;
1008
1009 /* XXX-elad */
1010 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, p,
1011 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
1012 if (error) {
1013 goto out;
1014 }
1015
1016 /* Determine the flag. */
1017 switch (rnode->sysctl_num) {
1018 case PROC_PID_STOPFORK:
1019 flag = PS_STOPFORK;
1020 break;
1021 case PROC_PID_STOPEXEC:
1022 flag = PS_STOPEXEC;
1023 break;
1024 case PROC_PID_STOPEXIT:
1025 flag = PS_STOPEXIT;
1026 break;
1027 default:
1028 error = EINVAL;
1029 goto out;
1030 }
1031 isset = (p->p_flag & flag) ? 1 : 0;
1032 node = *rnode;
1033 node.sysctl_data = &isset;
1034 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1035
1036 /* Return if error, or if callers is only getting the flag. */
1037 if (error || newp == NULL) {
1038 goto out;
1039 }
1040
1041 /* Check if caller can set the flags. */
1042 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_STOPFLAG,
1043 p, KAUTH_ARG(flag), NULL, NULL);
1044 if (error) {
1045 goto out;
1046 }
1047 mutex_enter(p->p_lock);
1048 if (isset) {
1049 p->p_sflag |= flag;
1050 } else {
1051 p->p_sflag &= ~flag;
1052 }
1053 mutex_exit(p->p_lock);
1054 out:
1055 rw_exit(&p->p_reflock);
1056 return error;
1057 }
1058
1059 /*
1060 * sysctl_proc_plimit: helper routine to get/set rlimits of a process.
1061 */
1062 static int
sysctl_proc_plimit(SYSCTLFN_ARGS)1063 sysctl_proc_plimit(SYSCTLFN_ARGS)
1064 {
1065 struct proc *p;
1066 u_int limitno;
1067 int which, error = 0;
1068 struct rlimit alim;
1069 struct sysctlnode node;
1070
1071 if (namelen != 0)
1072 return EINVAL;
1073
1074 which = name[-1];
1075 if (which != PROC_PID_LIMIT_TYPE_SOFT &&
1076 which != PROC_PID_LIMIT_TYPE_HARD)
1077 return EINVAL;
1078
1079 limitno = name[-2] - 1;
1080 if (limitno >= RLIM_NLIMITS)
1081 return EINVAL;
1082
1083 if (name[-3] != PROC_PID_LIMIT)
1084 return EINVAL;
1085
1086 /* Find the process. Hold a reference (p_reflock), if found. */
1087 error = sysctl_proc_findproc(l, (pid_t)name[-4], &p);
1088 if (error)
1089 return error;
1090
1091 /* XXX-elad */
1092 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, p,
1093 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
1094 if (error)
1095 goto out;
1096
1097 /* Check if caller can retrieve the limits. */
1098 if (newp == NULL) {
1099 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
1100 p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_GET), &alim,
1101 KAUTH_ARG(which));
1102 if (error)
1103 goto out;
1104 }
1105
1106 /* Retrieve the limits. */
1107 node = *rnode;
1108 memcpy(&alim, &p->p_rlimit[limitno], sizeof(alim));
1109 if (which == PROC_PID_LIMIT_TYPE_HARD) {
1110 node.sysctl_data = &alim.rlim_max;
1111 } else {
1112 node.sysctl_data = &alim.rlim_cur;
1113 }
1114 error = sysctl_lookup(SYSCTLFN_CALL(&node));
1115
1116 /* Return if error, or if we are only retrieving the limits. */
1117 if (error || newp == NULL) {
1118 goto out;
1119 }
1120 error = dosetrlimit(l, p, limitno, &alim);
1121 out:
1122 rw_exit(&p->p_reflock);
1123 return error;
1124 }
1125
1126 /*
1127 * Setup sysctl nodes.
1128 */
1129 static void
sysctl_proc_setup(void)1130 sysctl_proc_setup(void)
1131 {
1132
1133 sysctl_createv(&proc_sysctllog, 0, NULL, NULL,
1134 CTLFLAG_PERMANENT|CTLFLAG_ANYNUMBER,
1135 CTLTYPE_NODE, "curproc",
1136 SYSCTL_DESCR("Per-process settings"),
1137 NULL, 0, NULL, 0,
1138 CTL_PROC, PROC_CURPROC, CTL_EOL);
1139
1140 sysctl_createv(&proc_sysctllog, 0, NULL, NULL,
1141 CTLFLAG_PERMANENT|CTLFLAG_READONLY,
1142 CTLTYPE_INT, "paxflags",
1143 SYSCTL_DESCR("Process PAX control flags"),
1144 sysctl_proc_paxflags, 0, NULL, 0,
1145 CTL_PROC, PROC_CURPROC, PROC_PID_PAXFLAGS, CTL_EOL);
1146
1147 sysctl_createv(&proc_sysctllog, 0, NULL, NULL,
1148 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
1149 CTLTYPE_STRING, "corename",
1150 SYSCTL_DESCR("Core file name"),
1151 sysctl_proc_corename, 0, NULL, MAXPATHLEN,
1152 CTL_PROC, PROC_CURPROC, PROC_PID_CORENAME, CTL_EOL);
1153 sysctl_createv(&proc_sysctllog, 0, NULL, NULL,
1154 CTLFLAG_PERMANENT,
1155 CTLTYPE_NODE, "rlimit",
1156 SYSCTL_DESCR("Process limits"),
1157 NULL, 0, NULL, 0,
1158 CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, CTL_EOL);
1159
1160 #define create_proc_plimit(s, n) do { \
1161 sysctl_createv(&proc_sysctllog, 0, NULL, NULL, \
1162 CTLFLAG_PERMANENT, \
1163 CTLTYPE_NODE, s, \
1164 SYSCTL_DESCR("Process " s " limits"), \
1165 NULL, 0, NULL, 0, \
1166 CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n, \
1167 CTL_EOL); \
1168 sysctl_createv(&proc_sysctllog, 0, NULL, NULL, \
1169 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, \
1170 CTLTYPE_QUAD, "soft", \
1171 SYSCTL_DESCR("Process soft " s " limit"), \
1172 sysctl_proc_plimit, 0, NULL, 0, \
1173 CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n, \
1174 PROC_PID_LIMIT_TYPE_SOFT, CTL_EOL); \
1175 sysctl_createv(&proc_sysctllog, 0, NULL, NULL, \
1176 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, \
1177 CTLTYPE_QUAD, "hard", \
1178 SYSCTL_DESCR("Process hard " s " limit"), \
1179 sysctl_proc_plimit, 0, NULL, 0, \
1180 CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n, \
1181 PROC_PID_LIMIT_TYPE_HARD, CTL_EOL); \
1182 } while (0/*CONSTCOND*/)
1183
1184 create_proc_plimit("cputime", PROC_PID_LIMIT_CPU);
1185 create_proc_plimit("filesize", PROC_PID_LIMIT_FSIZE);
1186 create_proc_plimit("datasize", PROC_PID_LIMIT_DATA);
1187 create_proc_plimit("stacksize", PROC_PID_LIMIT_STACK);
1188 create_proc_plimit("coredumpsize", PROC_PID_LIMIT_CORE);
1189 create_proc_plimit("memoryuse", PROC_PID_LIMIT_RSS);
1190 create_proc_plimit("memorylocked", PROC_PID_LIMIT_MEMLOCK);
1191 create_proc_plimit("maxproc", PROC_PID_LIMIT_NPROC);
1192 create_proc_plimit("descriptors", PROC_PID_LIMIT_NOFILE);
1193 create_proc_plimit("sbsize", PROC_PID_LIMIT_SBSIZE);
1194 create_proc_plimit("vmemoryuse", PROC_PID_LIMIT_AS);
1195 create_proc_plimit("maxlwp", PROC_PID_LIMIT_NTHR);
1196
1197 #undef create_proc_plimit
1198
1199 sysctl_createv(&proc_sysctllog, 0, NULL, NULL,
1200 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
1201 CTLTYPE_INT, "stopfork",
1202 SYSCTL_DESCR("Stop process at fork(2)"),
1203 sysctl_proc_stop, 0, NULL, 0,
1204 CTL_PROC, PROC_CURPROC, PROC_PID_STOPFORK, CTL_EOL);
1205 sysctl_createv(&proc_sysctllog, 0, NULL, NULL,
1206 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
1207 CTLTYPE_INT, "stopexec",
1208 SYSCTL_DESCR("Stop process at execve(2)"),
1209 sysctl_proc_stop, 0, NULL, 0,
1210 CTL_PROC, PROC_CURPROC, PROC_PID_STOPEXEC, CTL_EOL);
1211 sysctl_createv(&proc_sysctllog, 0, NULL, NULL,
1212 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
1213 CTLTYPE_INT, "stopexit",
1214 SYSCTL_DESCR("Stop process before completing exit"),
1215 sysctl_proc_stop, 0, NULL, 0,
1216 CTL_PROC, PROC_CURPROC, PROC_PID_STOPEXIT, CTL_EOL);
1217 }
1218