1 /* $OpenBSD: kern_resource.c,v 1.91 2024/10/08 11:57:59 claudio Exp $ */ 2 /* $NetBSD: kern_resource.c,v 1.38 1996/10/23 07:19:38 matthias Exp $ */ 3 4 /*- 5 * Copyright (c) 1982, 1986, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/file.h> 44 #include <sys/resourcevar.h> 45 #include <sys/pool.h> 46 #include <sys/proc.h> 47 #include <sys/ktrace.h> 48 #include <sys/sched.h> 49 #include <sys/signalvar.h> 50 51 #include <sys/mount.h> 52 #include <sys/syscallargs.h> 53 54 #include <uvm/uvm_extern.h> 55 #include <uvm/uvm.h> 56 57 /* Resource usage check interval in msec */ 58 #define RUCHECK_INTERVAL 1000 59 60 /* SIGXCPU interval in seconds of process runtime */ 61 #define SIGXCPU_INTERVAL 5 62 63 struct plimit *lim_copy(struct plimit *); 64 struct plimit *lim_write_begin(void); 65 void lim_write_commit(struct plimit *); 66 67 void tuagg_sumup(struct tusage *, const struct tusage *); 68 69 /* 70 * Patchable maximum data and stack limits. 71 */ 72 rlim_t maxdmap = MAXDSIZ; 73 rlim_t maxsmap = MAXSSIZ; 74 75 /* 76 * Serializes resource limit updates. 77 * This lock has to be held together with ps_mtx when updating 78 * the process' ps_limit. 79 */ 80 struct rwlock rlimit_lock = RWLOCK_INITIALIZER("rlimitlk"); 81 82 /* 83 * Resource controls and accounting. 84 */ 85 86 int 87 sys_getpriority(struct proc *curp, void *v, register_t *retval) 88 { 89 struct sys_getpriority_args /* { 90 syscallarg(int) which; 91 syscallarg(id_t) who; 92 } */ *uap = v; 93 struct process *pr; 94 int low = NZERO + PRIO_MAX + 1; 95 96 switch (SCARG(uap, which)) { 97 98 case PRIO_PROCESS: 99 if (SCARG(uap, who) == 0) 100 pr = curp->p_p; 101 else 102 pr = prfind(SCARG(uap, who)); 103 if (pr == NULL) 104 break; 105 if (pr->ps_nice < low) 106 low = pr->ps_nice; 107 break; 108 109 case PRIO_PGRP: { 110 struct pgrp *pg; 111 112 if (SCARG(uap, who) == 0) 113 pg = curp->p_p->ps_pgrp; 114 else if ((pg = pgfind(SCARG(uap, who))) == NULL) 115 break; 116 LIST_FOREACH(pr, &pg->pg_members, ps_pglist) 117 if (pr->ps_nice < low) 118 low = pr->ps_nice; 119 break; 120 } 121 122 case PRIO_USER: 123 if (SCARG(uap, who) == 0) 124 SCARG(uap, who) = curp->p_ucred->cr_uid; 125 LIST_FOREACH(pr, &allprocess, ps_list) 126 if (pr->ps_ucred->cr_uid == SCARG(uap, who) && 127 pr->ps_nice < low) 128 low = pr->ps_nice; 129 break; 130 131 default: 132 return (EINVAL); 133 } 134 if (low == NZERO + PRIO_MAX + 1) 135 return (ESRCH); 136 *retval = low - NZERO; 137 return (0); 138 } 139 140 int 141 sys_setpriority(struct proc *curp, void *v, register_t *retval) 142 { 143 struct sys_setpriority_args /* { 144 syscallarg(int) which; 145 syscallarg(id_t) who; 146 syscallarg(int) prio; 147 } */ *uap = v; 148 struct process *pr; 149 int found = 0, error = 0; 150 151 switch (SCARG(uap, which)) { 152 153 case PRIO_PROCESS: 154 if (SCARG(uap, who) == 0) 155 pr = curp->p_p; 156 else 157 pr = prfind(SCARG(uap, who)); 158 if (pr == NULL) 159 break; 160 error = donice(curp, pr, SCARG(uap, prio)); 161 found = 1; 162 break; 163 164 case PRIO_PGRP: { 165 struct pgrp *pg; 166 167 if (SCARG(uap, who) == 0) 168 pg = curp->p_p->ps_pgrp; 169 else if ((pg = pgfind(SCARG(uap, who))) == NULL) 170 break; 171 LIST_FOREACH(pr, &pg->pg_members, ps_pglist) { 172 error = donice(curp, pr, SCARG(uap, prio)); 173 found = 1; 174 } 175 break; 176 } 177 178 case PRIO_USER: 179 if (SCARG(uap, who) == 0) 180 SCARG(uap, who) = curp->p_ucred->cr_uid; 181 LIST_FOREACH(pr, &allprocess, ps_list) 182 if (pr->ps_ucred->cr_uid == SCARG(uap, who)) { 183 error = donice(curp, pr, SCARG(uap, prio)); 184 found = 1; 185 } 186 break; 187 188 default: 189 return (EINVAL); 190 } 191 if (!found) 192 return (ESRCH); 193 return (error); 194 } 195 196 int 197 donice(struct proc *curp, struct process *chgpr, int n) 198 { 199 struct ucred *ucred = curp->p_ucred; 200 struct proc *p; 201 202 if (ucred->cr_uid != 0 && ucred->cr_ruid != 0 && 203 ucred->cr_uid != chgpr->ps_ucred->cr_uid && 204 ucred->cr_ruid != chgpr->ps_ucred->cr_uid) 205 return (EPERM); 206 if (n > PRIO_MAX) 207 n = PRIO_MAX; 208 if (n < PRIO_MIN) 209 n = PRIO_MIN; 210 n += NZERO; 211 if (n < chgpr->ps_nice && suser(curp)) 212 return (EACCES); 213 chgpr->ps_nice = n; 214 mtx_enter(&chgpr->ps_mtx); 215 SCHED_LOCK(); 216 TAILQ_FOREACH(p, &chgpr->ps_threads, p_thr_link) { 217 setpriority(p, p->p_estcpu, n); 218 } 219 SCHED_UNLOCK(); 220 mtx_leave(&chgpr->ps_mtx); 221 return (0); 222 } 223 224 int 225 sys_setrlimit(struct proc *p, void *v, register_t *retval) 226 { 227 struct sys_setrlimit_args /* { 228 syscallarg(int) which; 229 syscallarg(const struct rlimit *) rlp; 230 } */ *uap = v; 231 struct rlimit alim; 232 int error; 233 234 error = copyin((caddr_t)SCARG(uap, rlp), (caddr_t)&alim, 235 sizeof (struct rlimit)); 236 if (error) 237 return (error); 238 #ifdef KTRACE 239 if (KTRPOINT(p, KTR_STRUCT)) 240 ktrrlimit(p, &alim); 241 #endif 242 return (dosetrlimit(p, SCARG(uap, which), &alim)); 243 } 244 245 int 246 dosetrlimit(struct proc *p, u_int which, struct rlimit *limp) 247 { 248 struct rlimit *alimp; 249 struct plimit *limit; 250 rlim_t maxlim; 251 int error; 252 253 if (which >= RLIM_NLIMITS || limp->rlim_cur > limp->rlim_max) 254 return (EINVAL); 255 256 rw_enter_write(&rlimit_lock); 257 258 alimp = &p->p_p->ps_limit->pl_rlimit[which]; 259 if (limp->rlim_max > alimp->rlim_max) { 260 if ((error = suser(p)) != 0) { 261 rw_exit_write(&rlimit_lock); 262 return (error); 263 } 264 } 265 266 /* Get exclusive write access to the limit structure. */ 267 limit = lim_write_begin(); 268 alimp = &limit->pl_rlimit[which]; 269 270 switch (which) { 271 case RLIMIT_DATA: 272 maxlim = maxdmap; 273 break; 274 case RLIMIT_STACK: 275 maxlim = maxsmap; 276 break; 277 case RLIMIT_NOFILE: 278 maxlim = atomic_load_int(&maxfiles); 279 break; 280 case RLIMIT_NPROC: 281 maxlim = atomic_load_int(&maxprocess); 282 break; 283 default: 284 maxlim = RLIM_INFINITY; 285 break; 286 } 287 288 if (limp->rlim_max > maxlim) 289 limp->rlim_max = maxlim; 290 if (limp->rlim_cur > limp->rlim_max) 291 limp->rlim_cur = limp->rlim_max; 292 293 if (which == RLIMIT_CPU && limp->rlim_cur != RLIM_INFINITY && 294 alimp->rlim_cur == RLIM_INFINITY) 295 timeout_add_msec(&p->p_p->ps_rucheck_to, RUCHECK_INTERVAL); 296 297 if (which == RLIMIT_STACK) { 298 /* 299 * Stack is allocated to the max at exec time with only 300 * "rlim_cur" bytes accessible. If stack limit is going 301 * up make more accessible, if going down make inaccessible. 302 */ 303 if (limp->rlim_cur != alimp->rlim_cur) { 304 vaddr_t addr; 305 vsize_t size; 306 vm_prot_t prot; 307 struct vmspace *vm = p->p_vmspace; 308 309 if (limp->rlim_cur > alimp->rlim_cur) { 310 prot = PROT_READ | PROT_WRITE; 311 size = limp->rlim_cur - alimp->rlim_cur; 312 #ifdef MACHINE_STACK_GROWS_UP 313 addr = (vaddr_t)vm->vm_maxsaddr + 314 alimp->rlim_cur; 315 #else 316 addr = (vaddr_t)vm->vm_minsaddr - 317 limp->rlim_cur; 318 #endif 319 } else { 320 prot = PROT_NONE; 321 size = alimp->rlim_cur - limp->rlim_cur; 322 #ifdef MACHINE_STACK_GROWS_UP 323 addr = (vaddr_t)vm->vm_maxsaddr + 324 limp->rlim_cur; 325 #else 326 addr = (vaddr_t)vm->vm_minsaddr - 327 alimp->rlim_cur; 328 #endif 329 } 330 addr = trunc_page(addr); 331 size = round_page(size); 332 KERNEL_LOCK(); 333 (void) uvm_map_protect(&vm->vm_map, addr, 334 addr+size, prot, UVM_ET_STACK, FALSE, FALSE); 335 KERNEL_UNLOCK(); 336 } 337 } 338 339 *alimp = *limp; 340 341 lim_write_commit(limit); 342 rw_exit_write(&rlimit_lock); 343 344 return (0); 345 } 346 347 int 348 sys_getrlimit(struct proc *p, void *v, register_t *retval) 349 { 350 struct sys_getrlimit_args /* { 351 syscallarg(int) which; 352 syscallarg(struct rlimit *) rlp; 353 } */ *uap = v; 354 struct plimit *limit; 355 struct rlimit alimp; 356 int error; 357 358 if (SCARG(uap, which) < 0 || SCARG(uap, which) >= RLIM_NLIMITS) 359 return (EINVAL); 360 limit = lim_read_enter(); 361 alimp = limit->pl_rlimit[SCARG(uap, which)]; 362 lim_read_leave(limit); 363 error = copyout(&alimp, SCARG(uap, rlp), sizeof(struct rlimit)); 364 #ifdef KTRACE 365 if (error == 0 && KTRPOINT(p, KTR_STRUCT)) 366 ktrrlimit(p, &alimp); 367 #endif 368 return (error); 369 } 370 371 /* Add the counts from *from to *tu, ensuring a consistent read of *from. */ 372 void 373 tuagg_sumup(struct tusage *tu, const struct tusage *from) 374 { 375 struct tusage tmp; 376 uint64_t enter, leave; 377 378 enter = from->tu_gen; 379 for (;;) { 380 /* the generation number is odd during an update */ 381 while (enter & 1) { 382 CPU_BUSY_CYCLE(); 383 enter = from->tu_gen; 384 } 385 386 membar_consumer(); 387 tmp = *from; 388 membar_consumer(); 389 leave = from->tu_gen; 390 391 if (enter == leave) 392 break; 393 enter = leave; 394 } 395 396 tu->tu_uticks += tmp.tu_uticks; 397 tu->tu_sticks += tmp.tu_sticks; 398 tu->tu_iticks += tmp.tu_iticks; 399 timespecadd(&tu->tu_runtime, &tmp.tu_runtime, &tu->tu_runtime); 400 } 401 402 void 403 tuagg_get_proc(struct tusage *tu, struct proc *p) 404 { 405 memset(tu, 0, sizeof(*tu)); 406 tuagg_sumup(tu, &p->p_tu); 407 } 408 409 void 410 tuagg_get_process(struct tusage *tu, struct process *pr) 411 { 412 struct proc *q; 413 414 memset(tu, 0, sizeof(*tu)); 415 416 mtx_enter(&pr->ps_mtx); 417 tuagg_sumup(tu, &pr->ps_tu); 418 /* add on all living threads */ 419 TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) 420 tuagg_sumup(tu, &q->p_tu); 421 mtx_leave(&pr->ps_mtx); 422 } 423 424 /* 425 * Update the process ps_tu usage with the values from proc p while 426 * doing so the times for proc p are reset. 427 * This requires that p is either curproc or SDEAD and that the 428 * IPL is higher than IPL_STATCLOCK. ps_mtx uses IPL_HIGH so 429 * this should always be the case. 430 */ 431 void 432 tuagg_add_process(struct process *pr, struct proc *p) 433 { 434 MUTEX_ASSERT_LOCKED(&pr->ps_mtx); 435 KASSERT(curproc == p || p->p_stat == SDEAD); 436 437 tu_enter(&pr->ps_tu); 438 tuagg_sumup(&pr->ps_tu, &p->p_tu); 439 tu_leave(&pr->ps_tu); 440 441 /* Now reset CPU time usage for the thread. */ 442 timespecclear(&p->p_tu.tu_runtime); 443 p->p_tu.tu_uticks = p->p_tu.tu_sticks = p->p_tu.tu_iticks = 0; 444 } 445 446 void 447 tuagg_add_runtime(void) 448 { 449 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 450 struct proc *p = curproc; 451 struct timespec ts; 452 453 /* 454 * Compute the amount of time during which the current 455 * process was running, and add that to its total so far. 456 */ 457 nanouptime(&ts); 458 if (timespeccmp(&ts, &spc->spc_runtime, <)) { 459 #if 0 460 printf("uptime is not monotonic! " 461 "ts=%lld.%09lu, runtime=%lld.%09lu\n", 462 (long long)tv.tv_sec, tv.tv_nsec, 463 (long long)spc->spc_runtime.tv_sec, 464 spc->spc_runtime.tv_nsec); 465 #endif 466 timespecclear(&ts); 467 } else { 468 timespecsub(&ts, &spc->spc_runtime, &ts); 469 } 470 /* update spc_runtime */ 471 spc->spc_runtime = ts; 472 tu_enter(&p->p_tu); 473 timespecadd(&p->p_tu.tu_runtime, &ts, &p->p_tu.tu_runtime); 474 tu_leave(&p->p_tu); 475 } 476 477 /* 478 * Transform the running time and tick information in a struct tusage 479 * into user, system, and interrupt time usage. 480 */ 481 void 482 calctsru(struct tusage *tup, struct timespec *up, struct timespec *sp, 483 struct timespec *ip) 484 { 485 u_quad_t st, ut, it; 486 487 st = tup->tu_sticks; 488 ut = tup->tu_uticks; 489 it = tup->tu_iticks; 490 491 if (st + ut + it == 0) { 492 timespecclear(up); 493 timespecclear(sp); 494 if (ip != NULL) 495 timespecclear(ip); 496 return; 497 } 498 499 st = st * 1000000000 / stathz; 500 sp->tv_sec = st / 1000000000; 501 sp->tv_nsec = st % 1000000000; 502 ut = ut * 1000000000 / stathz; 503 up->tv_sec = ut / 1000000000; 504 up->tv_nsec = ut % 1000000000; 505 if (ip != NULL) { 506 it = it * 1000000000 / stathz; 507 ip->tv_sec = it / 1000000000; 508 ip->tv_nsec = it % 1000000000; 509 } 510 } 511 512 void 513 calcru(struct tusage *tup, struct timeval *up, struct timeval *sp, 514 struct timeval *ip) 515 { 516 struct timespec u, s, i; 517 518 calctsru(tup, &u, &s, ip != NULL ? &i : NULL); 519 TIMESPEC_TO_TIMEVAL(up, &u); 520 TIMESPEC_TO_TIMEVAL(sp, &s); 521 if (ip != NULL) 522 TIMESPEC_TO_TIMEVAL(ip, &i); 523 } 524 525 int 526 sys_getrusage(struct proc *p, void *v, register_t *retval) 527 { 528 struct sys_getrusage_args /* { 529 syscallarg(int) who; 530 syscallarg(struct rusage *) rusage; 531 } */ *uap = v; 532 struct rusage ru; 533 int error; 534 535 error = dogetrusage(p, SCARG(uap, who), &ru); 536 if (error == 0) { 537 error = copyout(&ru, SCARG(uap, rusage), sizeof(ru)); 538 #ifdef KTRACE 539 if (error == 0 && KTRPOINT(p, KTR_STRUCT)) 540 ktrrusage(p, &ru); 541 #endif 542 } 543 return (error); 544 } 545 546 int 547 dogetrusage(struct proc *p, int who, struct rusage *rup) 548 { 549 struct process *pr = p->p_p; 550 struct proc *q; 551 struct tusage tu = { 0 }; 552 553 KERNEL_ASSERT_LOCKED(); 554 555 switch (who) { 556 case RUSAGE_SELF: 557 /* start with the sum of dead threads, if any */ 558 if (pr->ps_ru != NULL) 559 *rup = *pr->ps_ru; 560 else 561 memset(rup, 0, sizeof(*rup)); 562 tuagg_sumup(&tu, &pr->ps_tu); 563 564 /* add on all living threads */ 565 TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) { 566 ruadd(rup, &q->p_ru); 567 tuagg_sumup(&tu, &q->p_tu); 568 } 569 570 calcru(&tu, &rup->ru_utime, &rup->ru_stime, NULL); 571 break; 572 573 case RUSAGE_THREAD: 574 *rup = p->p_ru; 575 calcru(&p->p_tu, &rup->ru_utime, &rup->ru_stime, NULL); 576 break; 577 578 case RUSAGE_CHILDREN: 579 *rup = pr->ps_cru; 580 break; 581 582 default: 583 return (EINVAL); 584 } 585 return (0); 586 } 587 588 void 589 ruadd(struct rusage *ru, const struct rusage *ru2) 590 { 591 long *ip; 592 const long *ip2; 593 int i; 594 595 timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime); 596 timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime); 597 if (ru->ru_maxrss < ru2->ru_maxrss) 598 ru->ru_maxrss = ru2->ru_maxrss; 599 ip = &ru->ru_first; ip2 = &ru2->ru_first; 600 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 601 *ip++ += *ip2++; 602 } 603 604 /* 605 * Check if the process exceeds its cpu resource allocation. 606 * If over max, kill it. 607 */ 608 void 609 rucheck(void *arg) 610 { 611 struct rlimit rlim; 612 struct tusage tu = { 0 }; 613 struct process *pr = arg; 614 struct proc *q; 615 time_t runtime; 616 617 KERNEL_ASSERT_LOCKED(); 618 619 mtx_enter(&pr->ps_mtx); 620 rlim = pr->ps_limit->pl_rlimit[RLIMIT_CPU]; 621 tuagg_sumup(&tu, &pr->ps_tu); 622 TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) 623 tuagg_sumup(&tu, &q->p_tu); 624 mtx_leave(&pr->ps_mtx); 625 626 runtime = tu.tu_runtime.tv_sec; 627 628 if ((rlim_t)runtime >= rlim.rlim_cur) { 629 if ((rlim_t)runtime >= rlim.rlim_max) { 630 prsignal(pr, SIGKILL); 631 } else if (runtime >= pr->ps_nextxcpu) { 632 prsignal(pr, SIGXCPU); 633 pr->ps_nextxcpu = runtime + SIGXCPU_INTERVAL; 634 } 635 } 636 637 timeout_add_msec(&pr->ps_rucheck_to, RUCHECK_INTERVAL); 638 } 639 640 struct pool plimit_pool; 641 642 void 643 lim_startup(struct plimit *limit0) 644 { 645 rlim_t lim; 646 int i; 647 648 pool_init(&plimit_pool, sizeof(struct plimit), 0, IPL_MPFLOOR, 649 PR_WAITOK, "plimitpl", NULL); 650 651 for (i = 0; i < nitems(limit0->pl_rlimit); i++) 652 limit0->pl_rlimit[i].rlim_cur = 653 limit0->pl_rlimit[i].rlim_max = RLIM_INFINITY; 654 limit0->pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE; 655 limit0->pl_rlimit[RLIMIT_NOFILE].rlim_max = MIN(NOFILE_MAX, 656 (maxfiles - NOFILE > NOFILE) ? maxfiles - NOFILE : NOFILE); 657 limit0->pl_rlimit[RLIMIT_NPROC].rlim_cur = MAXUPRC; 658 lim = ptoa(uvmexp.free); 659 limit0->pl_rlimit[RLIMIT_RSS].rlim_max = lim; 660 lim = ptoa(64*1024); /* Default to very low */ 661 limit0->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim; 662 limit0->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3; 663 refcnt_init(&limit0->pl_refcnt); 664 } 665 666 /* 667 * Make a copy of the plimit structure. 668 * We share these structures copy-on-write after fork, 669 * and copy when a limit is changed. 670 */ 671 struct plimit * 672 lim_copy(struct plimit *lim) 673 { 674 struct plimit *newlim; 675 676 newlim = pool_get(&plimit_pool, PR_WAITOK); 677 memcpy(newlim->pl_rlimit, lim->pl_rlimit, 678 sizeof(struct rlimit) * RLIM_NLIMITS); 679 refcnt_init(&newlim->pl_refcnt); 680 return (newlim); 681 } 682 683 void 684 lim_free(struct plimit *lim) 685 { 686 if (refcnt_rele(&lim->pl_refcnt) == 0) 687 return; 688 pool_put(&plimit_pool, lim); 689 } 690 691 void 692 lim_fork(struct process *parent, struct process *child) 693 { 694 struct plimit *limit; 695 696 mtx_enter(&parent->ps_mtx); 697 limit = parent->ps_limit; 698 refcnt_take(&limit->pl_refcnt); 699 mtx_leave(&parent->ps_mtx); 700 701 child->ps_limit = limit; 702 703 if (limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) 704 timeout_add_msec(&child->ps_rucheck_to, RUCHECK_INTERVAL); 705 } 706 707 /* 708 * Return an exclusive write reference to the process' resource limit structure. 709 * The caller has to release the structure by calling lim_write_commit(). 710 * 711 * This invalidates any plimit read reference held by the calling thread. 712 */ 713 struct plimit * 714 lim_write_begin(void) 715 { 716 struct plimit *limit; 717 struct proc *p = curproc; 718 719 rw_assert_wrlock(&rlimit_lock); 720 721 if (p->p_limit != NULL) 722 lim_free(p->p_limit); 723 p->p_limit = NULL; 724 725 /* 726 * It is safe to access ps_limit here without holding ps_mtx 727 * because rlimit_lock excludes other writers. 728 */ 729 730 limit = p->p_p->ps_limit; 731 if (P_HASSIBLING(p) || refcnt_shared(&limit->pl_refcnt)) 732 limit = lim_copy(limit); 733 734 return (limit); 735 } 736 737 /* 738 * Finish exclusive write access to the plimit structure. 739 * This makes the structure visible to other threads in the process. 740 */ 741 void 742 lim_write_commit(struct plimit *limit) 743 { 744 struct plimit *olimit; 745 struct proc *p = curproc; 746 747 rw_assert_wrlock(&rlimit_lock); 748 749 if (limit != p->p_p->ps_limit) { 750 mtx_enter(&p->p_p->ps_mtx); 751 olimit = p->p_p->ps_limit; 752 p->p_p->ps_limit = limit; 753 mtx_leave(&p->p_p->ps_mtx); 754 755 lim_free(olimit); 756 } 757 } 758 759 /* 760 * Begin read access to the process' resource limit structure. 761 * The access has to be finished by calling lim_read_leave(). 762 * 763 * Sections denoted by lim_read_enter() and lim_read_leave() cannot nest. 764 */ 765 struct plimit * 766 lim_read_enter(void) 767 { 768 struct plimit *limit; 769 struct proc *p = curproc; 770 struct process *pr = p->p_p; 771 772 /* 773 * This thread might not observe the latest value of ps_limit 774 * if another thread updated the limits very recently on another CPU. 775 * However, the anomaly should disappear quickly, especially if 776 * there is any synchronization activity between the threads (or 777 * the CPUs). 778 */ 779 780 limit = p->p_limit; 781 if (limit != pr->ps_limit) { 782 mtx_enter(&pr->ps_mtx); 783 limit = pr->ps_limit; 784 refcnt_take(&limit->pl_refcnt); 785 mtx_leave(&pr->ps_mtx); 786 if (p->p_limit != NULL) 787 lim_free(p->p_limit); 788 p->p_limit = limit; 789 } 790 KASSERT(limit != NULL); 791 return (limit); 792 } 793 794 /* 795 * Get the value of the resource limit in given process. 796 */ 797 rlim_t 798 lim_cur_proc(struct proc *p, int which) 799 { 800 struct process *pr = p->p_p; 801 rlim_t val; 802 803 KASSERT(which >= 0 && which < RLIM_NLIMITS); 804 805 mtx_enter(&pr->ps_mtx); 806 val = pr->ps_limit->pl_rlimit[which].rlim_cur; 807 mtx_leave(&pr->ps_mtx); 808 return (val); 809 } 810