1 /* $OpenBSD: kern_resource.c,v 1.93 2024/11/10 06:45:36 jsg Exp $ */ 2 /* $NetBSD: kern_resource.c,v 1.38 1996/10/23 07:19:38 matthias Exp $ */ 3 4 /*- 5 * Copyright (c) 1982, 1986, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/file.h> 44 #include <sys/resourcevar.h> 45 #include <sys/pool.h> 46 #include <sys/proc.h> 47 #include <sys/ktrace.h> 48 #include <sys/sched.h> 49 #include <sys/signalvar.h> 50 51 #include <sys/mount.h> 52 #include <sys/syscallargs.h> 53 54 #include <uvm/uvm.h> 55 56 /* Resource usage check interval in msec */ 57 #define RUCHECK_INTERVAL 1000 58 59 /* SIGXCPU interval in seconds of process runtime */ 60 #define SIGXCPU_INTERVAL 5 61 62 struct plimit *lim_copy(struct plimit *); 63 struct plimit *lim_write_begin(void); 64 void lim_write_commit(struct plimit *); 65 66 void tuagg_sumup(struct tusage *, const struct tusage *); 67 68 /* 69 * Patchable maximum data and stack limits. 70 */ 71 rlim_t maxdmap = MAXDSIZ; 72 rlim_t maxsmap = MAXSSIZ; 73 74 /* 75 * Serializes resource limit updates. 76 * This lock has to be held together with ps_mtx when updating 77 * the process' ps_limit. 78 */ 79 struct rwlock rlimit_lock = RWLOCK_INITIALIZER("rlimitlk"); 80 81 /* 82 * Resource controls and accounting. 83 */ 84 85 int 86 sys_getpriority(struct proc *curp, void *v, register_t *retval) 87 { 88 struct sys_getpriority_args /* { 89 syscallarg(int) which; 90 syscallarg(id_t) who; 91 } */ *uap = v; 92 struct process *pr; 93 int low = NZERO + PRIO_MAX + 1; 94 95 switch (SCARG(uap, which)) { 96 97 case PRIO_PROCESS: 98 if (SCARG(uap, who) == 0) 99 pr = curp->p_p; 100 else 101 pr = prfind(SCARG(uap, who)); 102 if (pr == NULL) 103 break; 104 if (pr->ps_nice < low) 105 low = pr->ps_nice; 106 break; 107 108 case PRIO_PGRP: { 109 struct pgrp *pg; 110 111 if (SCARG(uap, who) == 0) 112 pg = curp->p_p->ps_pgrp; 113 else if ((pg = pgfind(SCARG(uap, who))) == NULL) 114 break; 115 LIST_FOREACH(pr, &pg->pg_members, ps_pglist) 116 if (pr->ps_nice < low) 117 low = pr->ps_nice; 118 break; 119 } 120 121 case PRIO_USER: 122 if (SCARG(uap, who) == 0) 123 SCARG(uap, who) = curp->p_ucred->cr_uid; 124 LIST_FOREACH(pr, &allprocess, ps_list) 125 if (pr->ps_ucred->cr_uid == SCARG(uap, who) && 126 pr->ps_nice < low) 127 low = pr->ps_nice; 128 break; 129 130 default: 131 return (EINVAL); 132 } 133 if (low == NZERO + PRIO_MAX + 1) 134 return (ESRCH); 135 *retval = low - NZERO; 136 return (0); 137 } 138 139 int 140 sys_setpriority(struct proc *curp, void *v, register_t *retval) 141 { 142 struct sys_setpriority_args /* { 143 syscallarg(int) which; 144 syscallarg(id_t) who; 145 syscallarg(int) prio; 146 } */ *uap = v; 147 struct process *pr; 148 int found = 0, error = 0; 149 150 switch (SCARG(uap, which)) { 151 152 case PRIO_PROCESS: 153 if (SCARG(uap, who) == 0) 154 pr = curp->p_p; 155 else 156 pr = prfind(SCARG(uap, who)); 157 if (pr == NULL) 158 break; 159 error = donice(curp, pr, SCARG(uap, prio)); 160 found = 1; 161 break; 162 163 case PRIO_PGRP: { 164 struct pgrp *pg; 165 166 if (SCARG(uap, who) == 0) 167 pg = curp->p_p->ps_pgrp; 168 else if ((pg = pgfind(SCARG(uap, who))) == NULL) 169 break; 170 LIST_FOREACH(pr, &pg->pg_members, ps_pglist) { 171 error = donice(curp, pr, SCARG(uap, prio)); 172 found = 1; 173 } 174 break; 175 } 176 177 case PRIO_USER: 178 if (SCARG(uap, who) == 0) 179 SCARG(uap, who) = curp->p_ucred->cr_uid; 180 LIST_FOREACH(pr, &allprocess, ps_list) 181 if (pr->ps_ucred->cr_uid == SCARG(uap, who)) { 182 error = donice(curp, pr, SCARG(uap, prio)); 183 found = 1; 184 } 185 break; 186 187 default: 188 return (EINVAL); 189 } 190 if (!found) 191 return (ESRCH); 192 return (error); 193 } 194 195 int 196 donice(struct proc *curp, struct process *chgpr, int n) 197 { 198 struct ucred *ucred = curp->p_ucred; 199 struct proc *p; 200 201 if (ucred->cr_uid != 0 && ucred->cr_ruid != 0 && 202 ucred->cr_uid != chgpr->ps_ucred->cr_uid && 203 ucred->cr_ruid != chgpr->ps_ucred->cr_uid) 204 return (EPERM); 205 if (n > PRIO_MAX) 206 n = PRIO_MAX; 207 if (n < PRIO_MIN) 208 n = PRIO_MIN; 209 n += NZERO; 210 if (n < chgpr->ps_nice && suser(curp)) 211 return (EACCES); 212 chgpr->ps_nice = n; 213 mtx_enter(&chgpr->ps_mtx); 214 SCHED_LOCK(); 215 TAILQ_FOREACH(p, &chgpr->ps_threads, p_thr_link) { 216 setpriority(p, p->p_estcpu, n); 217 } 218 SCHED_UNLOCK(); 219 mtx_leave(&chgpr->ps_mtx); 220 return (0); 221 } 222 223 int 224 sys_setrlimit(struct proc *p, void *v, register_t *retval) 225 { 226 struct sys_setrlimit_args /* { 227 syscallarg(int) which; 228 syscallarg(const struct rlimit *) rlp; 229 } */ *uap = v; 230 struct rlimit alim; 231 int error; 232 233 error = copyin((caddr_t)SCARG(uap, rlp), (caddr_t)&alim, 234 sizeof (struct rlimit)); 235 if (error) 236 return (error); 237 #ifdef KTRACE 238 if (KTRPOINT(p, KTR_STRUCT)) 239 ktrrlimit(p, &alim); 240 #endif 241 return (dosetrlimit(p, SCARG(uap, which), &alim)); 242 } 243 244 int 245 dosetrlimit(struct proc *p, u_int which, struct rlimit *limp) 246 { 247 struct rlimit *alimp; 248 struct plimit *limit; 249 rlim_t maxlim; 250 int error; 251 252 if (which >= RLIM_NLIMITS || limp->rlim_cur > limp->rlim_max) 253 return (EINVAL); 254 255 rw_enter_write(&rlimit_lock); 256 257 alimp = &p->p_p->ps_limit->pl_rlimit[which]; 258 if (limp->rlim_max > alimp->rlim_max) { 259 if ((error = suser(p)) != 0) { 260 rw_exit_write(&rlimit_lock); 261 return (error); 262 } 263 } 264 265 /* Get exclusive write access to the limit structure. */ 266 limit = lim_write_begin(); 267 alimp = &limit->pl_rlimit[which]; 268 269 switch (which) { 270 case RLIMIT_DATA: 271 maxlim = maxdmap; 272 break; 273 case RLIMIT_STACK: 274 maxlim = maxsmap; 275 break; 276 case RLIMIT_NOFILE: 277 maxlim = atomic_load_int(&maxfiles); 278 break; 279 case RLIMIT_NPROC: 280 maxlim = atomic_load_int(&maxprocess); 281 break; 282 default: 283 maxlim = RLIM_INFINITY; 284 break; 285 } 286 287 if (limp->rlim_max > maxlim) 288 limp->rlim_max = maxlim; 289 if (limp->rlim_cur > limp->rlim_max) 290 limp->rlim_cur = limp->rlim_max; 291 292 if (which == RLIMIT_CPU && limp->rlim_cur != RLIM_INFINITY && 293 alimp->rlim_cur == RLIM_INFINITY) 294 timeout_add_msec(&p->p_p->ps_rucheck_to, RUCHECK_INTERVAL); 295 296 if (which == RLIMIT_STACK) { 297 /* 298 * Stack is allocated to the max at exec time with only 299 * "rlim_cur" bytes accessible. If stack limit is going 300 * up make more accessible, if going down make inaccessible. 301 */ 302 if (limp->rlim_cur != alimp->rlim_cur) { 303 vaddr_t addr; 304 vsize_t size; 305 vm_prot_t prot; 306 struct vmspace *vm = p->p_vmspace; 307 308 if (limp->rlim_cur > alimp->rlim_cur) { 309 prot = PROT_READ | PROT_WRITE; 310 size = limp->rlim_cur - alimp->rlim_cur; 311 #ifdef MACHINE_STACK_GROWS_UP 312 addr = (vaddr_t)vm->vm_maxsaddr + 313 alimp->rlim_cur; 314 #else 315 addr = (vaddr_t)vm->vm_minsaddr - 316 limp->rlim_cur; 317 #endif 318 } else { 319 prot = PROT_NONE; 320 size = alimp->rlim_cur - limp->rlim_cur; 321 #ifdef MACHINE_STACK_GROWS_UP 322 addr = (vaddr_t)vm->vm_maxsaddr + 323 limp->rlim_cur; 324 #else 325 addr = (vaddr_t)vm->vm_minsaddr - 326 alimp->rlim_cur; 327 #endif 328 } 329 addr = trunc_page(addr); 330 size = round_page(size); 331 KERNEL_LOCK(); 332 (void) uvm_map_protect(&vm->vm_map, addr, 333 addr+size, prot, UVM_ET_STACK, FALSE, FALSE); 334 KERNEL_UNLOCK(); 335 } 336 } 337 338 *alimp = *limp; 339 340 lim_write_commit(limit); 341 rw_exit_write(&rlimit_lock); 342 343 return (0); 344 } 345 346 int 347 sys_getrlimit(struct proc *p, void *v, register_t *retval) 348 { 349 struct sys_getrlimit_args /* { 350 syscallarg(int) which; 351 syscallarg(struct rlimit *) rlp; 352 } */ *uap = v; 353 struct plimit *limit; 354 struct rlimit alimp; 355 int error; 356 357 if (SCARG(uap, which) < 0 || SCARG(uap, which) >= RLIM_NLIMITS) 358 return (EINVAL); 359 limit = lim_read_enter(); 360 alimp = limit->pl_rlimit[SCARG(uap, which)]; 361 lim_read_leave(limit); 362 error = copyout(&alimp, SCARG(uap, rlp), sizeof(struct rlimit)); 363 #ifdef KTRACE 364 if (error == 0 && KTRPOINT(p, KTR_STRUCT)) 365 ktrrlimit(p, &alimp); 366 #endif 367 return (error); 368 } 369 370 /* Add the counts from *from to *tu, ensuring a consistent read of *from. */ 371 void 372 tuagg_sumup(struct tusage *tu, const struct tusage *from) 373 { 374 struct tusage tmp; 375 uint64_t enter, leave; 376 377 enter = from->tu_gen; 378 for (;;) { 379 /* the generation number is odd during an update */ 380 while (enter & 1) { 381 CPU_BUSY_CYCLE(); 382 enter = from->tu_gen; 383 } 384 385 membar_consumer(); 386 tmp = *from; 387 membar_consumer(); 388 leave = from->tu_gen; 389 390 if (enter == leave) 391 break; 392 enter = leave; 393 } 394 395 tu->tu_uticks += tmp.tu_uticks; 396 tu->tu_sticks += tmp.tu_sticks; 397 tu->tu_iticks += tmp.tu_iticks; 398 timespecadd(&tu->tu_runtime, &tmp.tu_runtime, &tu->tu_runtime); 399 } 400 401 void 402 tuagg_get_proc(struct tusage *tu, struct proc *p) 403 { 404 memset(tu, 0, sizeof(*tu)); 405 tuagg_sumup(tu, &p->p_tu); 406 } 407 408 void 409 tuagg_get_process(struct tusage *tu, struct process *pr) 410 { 411 struct proc *q; 412 413 memset(tu, 0, sizeof(*tu)); 414 415 mtx_enter(&pr->ps_mtx); 416 tuagg_sumup(tu, &pr->ps_tu); 417 /* add on all living threads */ 418 TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) 419 tuagg_sumup(tu, &q->p_tu); 420 mtx_leave(&pr->ps_mtx); 421 } 422 423 /* 424 * Update the process ps_tu usage with the values from proc p while 425 * doing so the times for proc p are reset. 426 * This requires that p is either curproc or SDEAD and that the 427 * IPL is higher than IPL_STATCLOCK. ps_mtx uses IPL_HIGH so 428 * this should always be the case. 429 */ 430 void 431 tuagg_add_process(struct process *pr, struct proc *p) 432 { 433 MUTEX_ASSERT_LOCKED(&pr->ps_mtx); 434 KASSERT(curproc == p || p->p_stat == SDEAD); 435 436 tu_enter(&pr->ps_tu); 437 tuagg_sumup(&pr->ps_tu, &p->p_tu); 438 tu_leave(&pr->ps_tu); 439 440 /* Now reset CPU time usage for the thread. */ 441 timespecclear(&p->p_tu.tu_runtime); 442 p->p_tu.tu_uticks = p->p_tu.tu_sticks = p->p_tu.tu_iticks = 0; 443 } 444 445 void 446 tuagg_add_runtime(void) 447 { 448 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 449 struct proc *p = curproc; 450 struct timespec ts, delta; 451 452 /* 453 * Compute the amount of time during which the current 454 * process was running, and add that to its total so far. 455 */ 456 nanouptime(&ts); 457 if (timespeccmp(&ts, &spc->spc_runtime, <)) { 458 #if 0 459 printf("uptime is not monotonic! " 460 "ts=%lld.%09lu, runtime=%lld.%09lu\n", 461 (long long)tv.tv_sec, tv.tv_nsec, 462 (long long)spc->spc_runtime.tv_sec, 463 spc->spc_runtime.tv_nsec); 464 #endif 465 timespecclear(&delta); 466 } else { 467 timespecsub(&ts, &spc->spc_runtime, &delta); 468 } 469 /* update spc_runtime */ 470 spc->spc_runtime = ts; 471 tu_enter(&p->p_tu); 472 timespecadd(&p->p_tu.tu_runtime, &delta, &p->p_tu.tu_runtime); 473 tu_leave(&p->p_tu); 474 } 475 476 /* 477 * Transform the running time and tick information in a struct tusage 478 * into user, system, and interrupt time usage. 479 */ 480 void 481 calctsru(struct tusage *tup, struct timespec *up, struct timespec *sp, 482 struct timespec *ip) 483 { 484 u_quad_t st, ut, it; 485 486 st = tup->tu_sticks; 487 ut = tup->tu_uticks; 488 it = tup->tu_iticks; 489 490 if (st + ut + it == 0) { 491 timespecclear(up); 492 timespecclear(sp); 493 if (ip != NULL) 494 timespecclear(ip); 495 return; 496 } 497 498 st = st * 1000000000 / stathz; 499 sp->tv_sec = st / 1000000000; 500 sp->tv_nsec = st % 1000000000; 501 ut = ut * 1000000000 / stathz; 502 up->tv_sec = ut / 1000000000; 503 up->tv_nsec = ut % 1000000000; 504 if (ip != NULL) { 505 it = it * 1000000000 / stathz; 506 ip->tv_sec = it / 1000000000; 507 ip->tv_nsec = it % 1000000000; 508 } 509 } 510 511 void 512 calcru(struct tusage *tup, struct timeval *up, struct timeval *sp, 513 struct timeval *ip) 514 { 515 struct timespec u, s, i; 516 517 calctsru(tup, &u, &s, ip != NULL ? &i : NULL); 518 TIMESPEC_TO_TIMEVAL(up, &u); 519 TIMESPEC_TO_TIMEVAL(sp, &s); 520 if (ip != NULL) 521 TIMESPEC_TO_TIMEVAL(ip, &i); 522 } 523 524 int 525 sys_getrusage(struct proc *p, void *v, register_t *retval) 526 { 527 struct sys_getrusage_args /* { 528 syscallarg(int) who; 529 syscallarg(struct rusage *) rusage; 530 } */ *uap = v; 531 struct rusage ru; 532 int error; 533 534 error = dogetrusage(p, SCARG(uap, who), &ru); 535 if (error == 0) { 536 error = copyout(&ru, SCARG(uap, rusage), sizeof(ru)); 537 #ifdef KTRACE 538 if (error == 0 && KTRPOINT(p, KTR_STRUCT)) 539 ktrrusage(p, &ru); 540 #endif 541 } 542 return (error); 543 } 544 545 int 546 dogetrusage(struct proc *p, int who, struct rusage *rup) 547 { 548 struct process *pr = p->p_p; 549 struct proc *q; 550 struct tusage tu = { 0 }; 551 552 KERNEL_ASSERT_LOCKED(); 553 554 switch (who) { 555 case RUSAGE_SELF: 556 /* start with the sum of dead threads, if any */ 557 if (pr->ps_ru != NULL) 558 *rup = *pr->ps_ru; 559 else 560 memset(rup, 0, sizeof(*rup)); 561 tuagg_sumup(&tu, &pr->ps_tu); 562 563 /* add on all living threads */ 564 TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) { 565 ruadd(rup, &q->p_ru); 566 tuagg_sumup(&tu, &q->p_tu); 567 } 568 569 calcru(&tu, &rup->ru_utime, &rup->ru_stime, NULL); 570 break; 571 572 case RUSAGE_THREAD: 573 *rup = p->p_ru; 574 calcru(&p->p_tu, &rup->ru_utime, &rup->ru_stime, NULL); 575 break; 576 577 case RUSAGE_CHILDREN: 578 *rup = pr->ps_cru; 579 break; 580 581 default: 582 return (EINVAL); 583 } 584 return (0); 585 } 586 587 void 588 ruadd(struct rusage *ru, const struct rusage *ru2) 589 { 590 long *ip; 591 const long *ip2; 592 int i; 593 594 timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime); 595 timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime); 596 if (ru->ru_maxrss < ru2->ru_maxrss) 597 ru->ru_maxrss = ru2->ru_maxrss; 598 ip = &ru->ru_first; ip2 = &ru2->ru_first; 599 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 600 *ip++ += *ip2++; 601 } 602 603 /* 604 * Check if the process exceeds its cpu resource allocation. 605 * If over max, kill it. 606 */ 607 void 608 rucheck(void *arg) 609 { 610 struct rlimit rlim; 611 struct tusage tu = { 0 }; 612 struct process *pr = arg; 613 struct proc *q; 614 time_t runtime; 615 616 KERNEL_ASSERT_LOCKED(); 617 618 mtx_enter(&pr->ps_mtx); 619 rlim = pr->ps_limit->pl_rlimit[RLIMIT_CPU]; 620 tuagg_sumup(&tu, &pr->ps_tu); 621 TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) 622 tuagg_sumup(&tu, &q->p_tu); 623 mtx_leave(&pr->ps_mtx); 624 625 runtime = tu.tu_runtime.tv_sec; 626 627 if ((rlim_t)runtime >= rlim.rlim_cur) { 628 if ((rlim_t)runtime >= rlim.rlim_max) { 629 prsignal(pr, SIGKILL); 630 } else if (runtime >= pr->ps_nextxcpu) { 631 prsignal(pr, SIGXCPU); 632 pr->ps_nextxcpu = runtime + SIGXCPU_INTERVAL; 633 } 634 } 635 636 timeout_add_msec(&pr->ps_rucheck_to, RUCHECK_INTERVAL); 637 } 638 639 struct pool plimit_pool; 640 641 void 642 lim_startup(struct plimit *limit0) 643 { 644 rlim_t lim; 645 int i; 646 647 pool_init(&plimit_pool, sizeof(struct plimit), 0, IPL_MPFLOOR, 648 PR_WAITOK, "plimitpl", NULL); 649 650 for (i = 0; i < nitems(limit0->pl_rlimit); i++) 651 limit0->pl_rlimit[i].rlim_cur = 652 limit0->pl_rlimit[i].rlim_max = RLIM_INFINITY; 653 limit0->pl_rlimit[RLIMIT_NOFILE].rlim_cur = NOFILE; 654 limit0->pl_rlimit[RLIMIT_NOFILE].rlim_max = MIN(NOFILE_MAX, 655 (maxfiles - NOFILE > NOFILE) ? maxfiles - NOFILE : NOFILE); 656 limit0->pl_rlimit[RLIMIT_NPROC].rlim_cur = MAXUPRC; 657 lim = ptoa(uvmexp.free); 658 limit0->pl_rlimit[RLIMIT_RSS].rlim_max = lim; 659 lim = ptoa(64*1024); /* Default to very low */ 660 limit0->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim; 661 limit0->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3; 662 refcnt_init(&limit0->pl_refcnt); 663 } 664 665 /* 666 * Make a copy of the plimit structure. 667 * We share these structures copy-on-write after fork, 668 * and copy when a limit is changed. 669 */ 670 struct plimit * 671 lim_copy(struct plimit *lim) 672 { 673 struct plimit *newlim; 674 675 newlim = pool_get(&plimit_pool, PR_WAITOK); 676 memcpy(newlim->pl_rlimit, lim->pl_rlimit, 677 sizeof(struct rlimit) * RLIM_NLIMITS); 678 refcnt_init(&newlim->pl_refcnt); 679 return (newlim); 680 } 681 682 void 683 lim_free(struct plimit *lim) 684 { 685 if (refcnt_rele(&lim->pl_refcnt) == 0) 686 return; 687 pool_put(&plimit_pool, lim); 688 } 689 690 void 691 lim_fork(struct process *parent, struct process *child) 692 { 693 struct plimit *limit; 694 695 mtx_enter(&parent->ps_mtx); 696 limit = parent->ps_limit; 697 refcnt_take(&limit->pl_refcnt); 698 mtx_leave(&parent->ps_mtx); 699 700 child->ps_limit = limit; 701 702 if (limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) 703 timeout_add_msec(&child->ps_rucheck_to, RUCHECK_INTERVAL); 704 } 705 706 /* 707 * Return an exclusive write reference to the process' resource limit structure. 708 * The caller has to release the structure by calling lim_write_commit(). 709 * 710 * This invalidates any plimit read reference held by the calling thread. 711 */ 712 struct plimit * 713 lim_write_begin(void) 714 { 715 struct plimit *limit; 716 struct proc *p = curproc; 717 718 rw_assert_wrlock(&rlimit_lock); 719 720 if (p->p_limit != NULL) 721 lim_free(p->p_limit); 722 p->p_limit = NULL; 723 724 /* 725 * It is safe to access ps_limit here without holding ps_mtx 726 * because rlimit_lock excludes other writers. 727 */ 728 729 limit = p->p_p->ps_limit; 730 if (P_HASSIBLING(p) || refcnt_shared(&limit->pl_refcnt)) 731 limit = lim_copy(limit); 732 733 return (limit); 734 } 735 736 /* 737 * Finish exclusive write access to the plimit structure. 738 * This makes the structure visible to other threads in the process. 739 */ 740 void 741 lim_write_commit(struct plimit *limit) 742 { 743 struct plimit *olimit; 744 struct proc *p = curproc; 745 746 rw_assert_wrlock(&rlimit_lock); 747 748 if (limit != p->p_p->ps_limit) { 749 mtx_enter(&p->p_p->ps_mtx); 750 olimit = p->p_p->ps_limit; 751 p->p_p->ps_limit = limit; 752 mtx_leave(&p->p_p->ps_mtx); 753 754 lim_free(olimit); 755 } 756 } 757 758 /* 759 * Begin read access to the process' resource limit structure. 760 * The access has to be finished by calling lim_read_leave(). 761 * 762 * Sections denoted by lim_read_enter() and lim_read_leave() cannot nest. 763 */ 764 struct plimit * 765 lim_read_enter(void) 766 { 767 struct plimit *limit; 768 struct proc *p = curproc; 769 struct process *pr = p->p_p; 770 771 /* 772 * This thread might not observe the latest value of ps_limit 773 * if another thread updated the limits very recently on another CPU. 774 * However, the anomaly should disappear quickly, especially if 775 * there is any synchronization activity between the threads (or 776 * the CPUs). 777 */ 778 779 limit = p->p_limit; 780 if (limit != pr->ps_limit) { 781 mtx_enter(&pr->ps_mtx); 782 limit = pr->ps_limit; 783 refcnt_take(&limit->pl_refcnt); 784 mtx_leave(&pr->ps_mtx); 785 if (p->p_limit != NULL) 786 lim_free(p->p_limit); 787 p->p_limit = limit; 788 } 789 KASSERT(limit != NULL); 790 return (limit); 791 } 792 793 /* 794 * Get the value of the resource limit in given process. 795 */ 796 rlim_t 797 lim_cur_proc(struct proc *p, int which) 798 { 799 struct process *pr = p->p_p; 800 rlim_t val; 801 802 KASSERT(which >= 0 && which < RLIM_NLIMITS); 803 804 mtx_enter(&pr->ps_mtx); 805 val = pr->ps_limit->pl_rlimit[which].rlim_cur; 806 mtx_leave(&pr->ps_mtx); 807 return (val); 808 } 809