1 /* $NetBSD: kern_resource.c,v 1.139 2008/04/24 18:39:24 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1982, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_resource.c 8.8 (Berkeley) 2/14/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: kern_resource.c,v 1.139 2008/04/24 18:39:24 ad Exp $"); 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/file.h> 46 #include <sys/resourcevar.h> 47 #include <sys/malloc.h> 48 #include <sys/kmem.h> 49 #include <sys/namei.h> 50 #include <sys/pool.h> 51 #include <sys/proc.h> 52 #include <sys/sysctl.h> 53 #include <sys/timevar.h> 54 #include <sys/kauth.h> 55 #include <sys/atomic.h> 56 #include <sys/mount.h> 57 #include <sys/syscallargs.h> 58 #include <sys/atomic.h> 59 60 #include <uvm/uvm_extern.h> 61 62 /* 63 * Maximum process data and stack limits. 64 * They are variables so they are patchable. 65 */ 66 rlim_t maxdmap = MAXDSIZ; 67 rlim_t maxsmap = MAXSSIZ; 68 69 static SLIST_HEAD(uihashhead, uidinfo) *uihashtbl; 70 static u_long uihash; 71 72 #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) 73 74 static pool_cache_t plimit_cache; 75 static pool_cache_t pstats_cache; 76 77 void 78 resource_init(void) 79 { 80 /* 81 * In case of MP system, SLIST_FOREACH would force a cache line 82 * write-back for every modified 'uidinfo', thus we try to keep the 83 * lists short. 84 */ 85 const u_int uihash_sz = (maxproc > 1 ? 1024 : 64); 86 87 plimit_cache = pool_cache_init(sizeof(struct plimit), 0, 0, 0, 88 "plimitpl", NULL, IPL_NONE, NULL, NULL, NULL); 89 pstats_cache = pool_cache_init(sizeof(struct pstats), 0, 0, 0, 90 "pstatspl", NULL, IPL_NONE, NULL, NULL, NULL); 91 uihashtbl = hashinit(uihash_sz, HASH_SLIST, M_PROC, M_WAITOK, &uihash); 92 } 93 94 /* 95 * Resource controls and accounting. 96 */ 97 98 int 99 sys_getpriority(struct lwp *l, const struct sys_getpriority_args *uap, 100 register_t *retval) 101 { 102 /* { 103 syscallarg(int) which; 104 syscallarg(id_t) who; 105 } */ 106 struct proc *curp = l->l_proc, *p; 107 int low = NZERO + PRIO_MAX + 1; 108 int who = SCARG(uap, who); 109 110 mutex_enter(proc_lock); 111 switch (SCARG(uap, which)) { 112 case PRIO_PROCESS: 113 if (who == 0) 114 p = curp; 115 else 116 p = p_find(who, PFIND_LOCKED); 117 if (p != NULL) 118 low = p->p_nice; 119 break; 120 121 case PRIO_PGRP: { 122 struct pgrp *pg; 123 124 if (who == 0) 125 pg = curp->p_pgrp; 126 else if ((pg = pg_find(who, PFIND_LOCKED)) == NULL) 127 break; 128 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 129 if (p->p_nice < low) 130 low = p->p_nice; 131 } 132 break; 133 } 134 135 case PRIO_USER: 136 if (who == 0) 137 who = (int)kauth_cred_geteuid(l->l_cred); 138 PROCLIST_FOREACH(p, &allproc) { 139 mutex_enter(p->p_lock); 140 if (kauth_cred_geteuid(p->p_cred) == 141 (uid_t)who && p->p_nice < low) 142 low = p->p_nice; 143 mutex_exit(p->p_lock); 144 } 145 break; 146 147 default: 148 mutex_exit(proc_lock); 149 return (EINVAL); 150 } 151 mutex_exit(proc_lock); 152 153 if (low == NZERO + PRIO_MAX + 1) 154 return (ESRCH); 155 *retval = low - NZERO; 156 return (0); 157 } 158 159 /* ARGSUSED */ 160 int 161 sys_setpriority(struct lwp *l, const struct sys_setpriority_args *uap, 162 register_t *retval) 163 { 164 /* { 165 syscallarg(int) which; 166 syscallarg(id_t) who; 167 syscallarg(int) prio; 168 } */ 169 struct proc *curp = l->l_proc, *p; 170 int found = 0, error = 0; 171 int who = SCARG(uap, who); 172 173 mutex_enter(proc_lock); 174 switch (SCARG(uap, which)) { 175 case PRIO_PROCESS: 176 if (who == 0) 177 p = curp; 178 else 179 p = p_find(who, PFIND_LOCKED); 180 if (p != 0) { 181 mutex_enter(p->p_lock); 182 error = donice(l, p, SCARG(uap, prio)); 183 mutex_exit(p->p_lock); 184 } 185 found++; 186 break; 187 188 case PRIO_PGRP: { 189 struct pgrp *pg; 190 191 if (who == 0) 192 pg = curp->p_pgrp; 193 else if ((pg = pg_find(who, PFIND_LOCKED)) == NULL) 194 break; 195 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 196 mutex_enter(p->p_lock); 197 error = donice(l, p, SCARG(uap, prio)); 198 mutex_exit(p->p_lock); 199 found++; 200 } 201 break; 202 } 203 204 case PRIO_USER: 205 if (who == 0) 206 who = (int)kauth_cred_geteuid(l->l_cred); 207 PROCLIST_FOREACH(p, &allproc) { 208 mutex_enter(p->p_lock); 209 if (kauth_cred_geteuid(p->p_cred) == 210 (uid_t)SCARG(uap, who)) { 211 error = donice(l, p, SCARG(uap, prio)); 212 found++; 213 } 214 mutex_exit(p->p_lock); 215 } 216 break; 217 218 default: 219 error = EINVAL; 220 break; 221 } 222 mutex_exit(proc_lock); 223 if (found == 0) 224 return (ESRCH); 225 return (error); 226 } 227 228 /* 229 * Renice a process. 230 * 231 * Call with the target process' credentials locked. 232 */ 233 int 234 donice(struct lwp *l, struct proc *chgp, int n) 235 { 236 kauth_cred_t cred = l->l_cred; 237 238 KASSERT(mutex_owned(chgp->p_lock)); 239 240 if (n > PRIO_MAX) 241 n = PRIO_MAX; 242 if (n < PRIO_MIN) 243 n = PRIO_MIN; 244 n += NZERO; 245 if (kauth_authorize_process(cred, KAUTH_PROCESS_NICE, chgp, 246 KAUTH_ARG(n), NULL, NULL)) 247 return (EACCES); 248 sched_nice(chgp, n); 249 return (0); 250 } 251 252 /* ARGSUSED */ 253 int 254 sys_setrlimit(struct lwp *l, const struct sys_setrlimit_args *uap, 255 register_t *retval) 256 { 257 /* { 258 syscallarg(int) which; 259 syscallarg(const struct rlimit *) rlp; 260 } */ 261 int which = SCARG(uap, which); 262 struct rlimit alim; 263 int error; 264 265 error = copyin(SCARG(uap, rlp), &alim, sizeof(struct rlimit)); 266 if (error) 267 return (error); 268 return (dosetrlimit(l, l->l_proc, which, &alim)); 269 } 270 271 int 272 dosetrlimit(struct lwp *l, struct proc *p, int which, struct rlimit *limp) 273 { 274 struct rlimit *alimp; 275 int error; 276 277 if ((u_int)which >= RLIM_NLIMITS) 278 return (EINVAL); 279 280 if (limp->rlim_cur < 0 || limp->rlim_max < 0) 281 return (EINVAL); 282 283 if (limp->rlim_cur > limp->rlim_max) { 284 /* 285 * This is programming error. According to SUSv2, we should 286 * return error in this case. 287 */ 288 return (EINVAL); 289 } 290 291 alimp = &p->p_rlimit[which]; 292 /* if we don't change the value, no need to limcopy() */ 293 if (limp->rlim_cur == alimp->rlim_cur && 294 limp->rlim_max == alimp->rlim_max) 295 return 0; 296 297 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT, 298 p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_SET), limp, KAUTH_ARG(which)); 299 if (error) 300 return (error); 301 302 lim_privatise(p, false); 303 /* p->p_limit is now unchangeable */ 304 alimp = &p->p_rlimit[which]; 305 306 switch (which) { 307 308 case RLIMIT_DATA: 309 if (limp->rlim_cur > maxdmap) 310 limp->rlim_cur = maxdmap; 311 if (limp->rlim_max > maxdmap) 312 limp->rlim_max = maxdmap; 313 break; 314 315 case RLIMIT_STACK: 316 if (limp->rlim_cur > maxsmap) 317 limp->rlim_cur = maxsmap; 318 if (limp->rlim_max > maxsmap) 319 limp->rlim_max = maxsmap; 320 321 /* 322 * Return EINVAL if the new stack size limit is lower than 323 * current usage. Otherwise, the process would get SIGSEGV the 324 * moment it would try to access anything on it's current stack. 325 * This conforms to SUSv2. 326 */ 327 if (limp->rlim_cur < p->p_vmspace->vm_ssize * PAGE_SIZE 328 || limp->rlim_max < p->p_vmspace->vm_ssize * PAGE_SIZE) { 329 return (EINVAL); 330 } 331 332 /* 333 * Stack is allocated to the max at exec time with 334 * only "rlim_cur" bytes accessible (In other words, 335 * allocates stack dividing two contiguous regions at 336 * "rlim_cur" bytes boundary). 337 * 338 * Since allocation is done in terms of page, roundup 339 * "rlim_cur" (otherwise, contiguous regions 340 * overlap). If stack limit is going up make more 341 * accessible, if going down make inaccessible. 342 */ 343 limp->rlim_cur = round_page(limp->rlim_cur); 344 if (limp->rlim_cur != alimp->rlim_cur) { 345 vaddr_t addr; 346 vsize_t size; 347 vm_prot_t prot; 348 349 if (limp->rlim_cur > alimp->rlim_cur) { 350 prot = VM_PROT_READ | VM_PROT_WRITE; 351 size = limp->rlim_cur - alimp->rlim_cur; 352 addr = (vaddr_t)p->p_vmspace->vm_minsaddr - 353 limp->rlim_cur; 354 } else { 355 prot = VM_PROT_NONE; 356 size = alimp->rlim_cur - limp->rlim_cur; 357 addr = (vaddr_t)p->p_vmspace->vm_minsaddr - 358 alimp->rlim_cur; 359 } 360 (void) uvm_map_protect(&p->p_vmspace->vm_map, 361 addr, addr+size, prot, false); 362 } 363 break; 364 365 case RLIMIT_NOFILE: 366 if (limp->rlim_cur > maxfiles) 367 limp->rlim_cur = maxfiles; 368 if (limp->rlim_max > maxfiles) 369 limp->rlim_max = maxfiles; 370 break; 371 372 case RLIMIT_NPROC: 373 if (limp->rlim_cur > maxproc) 374 limp->rlim_cur = maxproc; 375 if (limp->rlim_max > maxproc) 376 limp->rlim_max = maxproc; 377 break; 378 } 379 380 mutex_enter(&p->p_limit->pl_lock); 381 *alimp = *limp; 382 mutex_exit(&p->p_limit->pl_lock); 383 return (0); 384 } 385 386 /* ARGSUSED */ 387 int 388 sys_getrlimit(struct lwp *l, const struct sys_getrlimit_args *uap, 389 register_t *retval) 390 { 391 /* { 392 syscallarg(int) which; 393 syscallarg(struct rlimit *) rlp; 394 } */ 395 struct proc *p = l->l_proc; 396 int which = SCARG(uap, which); 397 struct rlimit rl; 398 399 if ((u_int)which >= RLIM_NLIMITS) 400 return (EINVAL); 401 402 mutex_enter(p->p_lock); 403 memcpy(&rl, &p->p_rlimit[which], sizeof(rl)); 404 mutex_exit(p->p_lock); 405 406 return copyout(&rl, SCARG(uap, rlp), sizeof(rl)); 407 } 408 409 /* 410 * Transform the running time and tick information in proc p into user, 411 * system, and interrupt time usage. 412 * 413 * Should be called with p->p_lock held unless called from exit1(). 414 */ 415 void 416 calcru(struct proc *p, struct timeval *up, struct timeval *sp, 417 struct timeval *ip, struct timeval *rp) 418 { 419 uint64_t u, st, ut, it, tot; 420 struct lwp *l; 421 struct bintime tm; 422 struct timeval tv; 423 424 mutex_spin_enter(&p->p_stmutex); 425 st = p->p_sticks; 426 ut = p->p_uticks; 427 it = p->p_iticks; 428 mutex_spin_exit(&p->p_stmutex); 429 430 tm = p->p_rtime; 431 432 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 433 lwp_lock(l); 434 bintime_add(&tm, &l->l_rtime); 435 if ((l->l_flag & LW_RUNNING) != 0) { 436 struct bintime diff; 437 /* 438 * Adjust for the current time slice. This is 439 * actually fairly important since the error 440 * here is on the order of a time quantum, 441 * which is much greater than the sampling 442 * error. 443 */ 444 binuptime(&diff); 445 bintime_sub(&diff, &l->l_stime); 446 bintime_add(&tm, &diff); 447 } 448 lwp_unlock(l); 449 } 450 451 tot = st + ut + it; 452 bintime2timeval(&tm, &tv); 453 u = (uint64_t)tv.tv_sec * 1000000ul + tv.tv_usec; 454 455 if (tot == 0) { 456 /* No ticks, so can't use to share time out, split 50-50 */ 457 st = ut = u / 2; 458 } else { 459 st = (u * st) / tot; 460 ut = (u * ut) / tot; 461 } 462 if (sp != NULL) { 463 sp->tv_sec = st / 1000000; 464 sp->tv_usec = st % 1000000; 465 } 466 if (up != NULL) { 467 up->tv_sec = ut / 1000000; 468 up->tv_usec = ut % 1000000; 469 } 470 if (ip != NULL) { 471 if (it != 0) 472 it = (u * it) / tot; 473 ip->tv_sec = it / 1000000; 474 ip->tv_usec = it % 1000000; 475 } 476 if (rp != NULL) { 477 *rp = tv; 478 } 479 } 480 481 /* ARGSUSED */ 482 int 483 sys_getrusage(struct lwp *l, const struct sys_getrusage_args *uap, 484 register_t *retval) 485 { 486 /* { 487 syscallarg(int) who; 488 syscallarg(struct rusage *) rusage; 489 } */ 490 struct rusage ru; 491 struct proc *p = l->l_proc; 492 493 switch (SCARG(uap, who)) { 494 case RUSAGE_SELF: 495 mutex_enter(p->p_lock); 496 memcpy(&ru, &p->p_stats->p_ru, sizeof(ru)); 497 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL, NULL); 498 rulwps(p, &ru); 499 mutex_exit(p->p_lock); 500 break; 501 502 case RUSAGE_CHILDREN: 503 mutex_enter(p->p_lock); 504 memcpy(&ru, &p->p_stats->p_cru, sizeof(ru)); 505 mutex_exit(p->p_lock); 506 break; 507 508 default: 509 return EINVAL; 510 } 511 512 return copyout(&ru, SCARG(uap, rusage), sizeof(ru)); 513 } 514 515 void 516 ruadd(struct rusage *ru, struct rusage *ru2) 517 { 518 long *ip, *ip2; 519 int i; 520 521 timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime); 522 timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime); 523 if (ru->ru_maxrss < ru2->ru_maxrss) 524 ru->ru_maxrss = ru2->ru_maxrss; 525 ip = &ru->ru_first; ip2 = &ru2->ru_first; 526 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 527 *ip++ += *ip2++; 528 } 529 530 void 531 rulwps(proc_t *p, struct rusage *ru) 532 { 533 lwp_t *l; 534 535 KASSERT(mutex_owned(p->p_lock)); 536 537 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 538 ruadd(ru, &l->l_ru); 539 ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw); 540 ru->ru_nivcsw += l->l_nivcsw; 541 } 542 } 543 544 /* 545 * Make a copy of the plimit structure. 546 * We share these structures copy-on-write after fork, 547 * and copy when a limit is changed. 548 * 549 * Unfortunately (due to PL_SHAREMOD) it is possibly for the structure 550 * we are copying to change beneath our feet! 551 */ 552 struct plimit * 553 lim_copy(struct plimit *lim) 554 { 555 struct plimit *newlim; 556 char *corename; 557 size_t alen, len; 558 559 newlim = pool_cache_get(plimit_cache, PR_WAITOK); 560 mutex_init(&newlim->pl_lock, MUTEX_DEFAULT, IPL_NONE); 561 newlim->pl_flags = 0; 562 newlim->pl_refcnt = 1; 563 newlim->pl_sv_limit = NULL; 564 565 mutex_enter(&lim->pl_lock); 566 memcpy(newlim->pl_rlimit, lim->pl_rlimit, 567 sizeof(struct rlimit) * RLIM_NLIMITS); 568 569 alen = 0; 570 corename = NULL; 571 for (;;) { 572 if (lim->pl_corename == defcorename) { 573 newlim->pl_corename = defcorename; 574 break; 575 } 576 len = strlen(lim->pl_corename) + 1; 577 if (len <= alen) { 578 newlim->pl_corename = corename; 579 memcpy(corename, lim->pl_corename, len); 580 corename = NULL; 581 break; 582 } 583 mutex_exit(&lim->pl_lock); 584 if (corename != NULL) 585 free(corename, M_TEMP); 586 alen = len; 587 corename = malloc(alen, M_TEMP, M_WAITOK); 588 mutex_enter(&lim->pl_lock); 589 } 590 mutex_exit(&lim->pl_lock); 591 if (corename != NULL) 592 free(corename, M_TEMP); 593 return newlim; 594 } 595 596 void 597 lim_addref(struct plimit *lim) 598 { 599 atomic_inc_uint(&lim->pl_refcnt); 600 } 601 602 /* 603 * Give a process it's own private plimit structure. 604 * This will only be shared (in fork) if modifications are to be shared. 605 */ 606 void 607 lim_privatise(struct proc *p, bool set_shared) 608 { 609 struct plimit *lim, *newlim; 610 611 lim = p->p_limit; 612 if (lim->pl_flags & PL_WRITEABLE) { 613 if (set_shared) 614 lim->pl_flags |= PL_SHAREMOD; 615 return; 616 } 617 618 if (set_shared && lim->pl_flags & PL_SHAREMOD) 619 return; 620 621 newlim = lim_copy(lim); 622 623 mutex_enter(p->p_lock); 624 if (p->p_limit->pl_flags & PL_WRITEABLE) { 625 /* Someone crept in while we were busy */ 626 mutex_exit(p->p_lock); 627 limfree(newlim); 628 if (set_shared) 629 p->p_limit->pl_flags |= PL_SHAREMOD; 630 return; 631 } 632 633 /* 634 * Since most accesses to p->p_limit aren't locked, we must not 635 * delete the old limit structure yet. 636 */ 637 newlim->pl_sv_limit = p->p_limit; 638 newlim->pl_flags |= PL_WRITEABLE; 639 if (set_shared) 640 newlim->pl_flags |= PL_SHAREMOD; 641 p->p_limit = newlim; 642 mutex_exit(p->p_lock); 643 } 644 645 void 646 limfree(struct plimit *lim) 647 { 648 struct plimit *sv_lim; 649 650 do { 651 if (atomic_dec_uint_nv(&lim->pl_refcnt) > 0) 652 return; 653 if (lim->pl_corename != defcorename) 654 free(lim->pl_corename, M_TEMP); 655 sv_lim = lim->pl_sv_limit; 656 mutex_destroy(&lim->pl_lock); 657 pool_cache_put(plimit_cache, lim); 658 } while ((lim = sv_lim) != NULL); 659 } 660 661 struct pstats * 662 pstatscopy(struct pstats *ps) 663 { 664 665 struct pstats *newps; 666 667 newps = pool_cache_get(pstats_cache, PR_WAITOK); 668 669 memset(&newps->pstat_startzero, 0, 670 (unsigned) ((char *)&newps->pstat_endzero - 671 (char *)&newps->pstat_startzero)); 672 memcpy(&newps->pstat_startcopy, &ps->pstat_startcopy, 673 ((char *)&newps->pstat_endcopy - 674 (char *)&newps->pstat_startcopy)); 675 676 return (newps); 677 678 } 679 680 void 681 pstatsfree(struct pstats *ps) 682 { 683 684 pool_cache_put(pstats_cache, ps); 685 } 686 687 /* 688 * sysctl interface in five parts 689 */ 690 691 /* 692 * a routine for sysctl proc subtree helpers that need to pick a valid 693 * process by pid. 694 */ 695 static int 696 sysctl_proc_findproc(struct lwp *l, struct proc **p2, pid_t pid) 697 { 698 struct proc *ptmp; 699 int error = 0; 700 701 if (pid == PROC_CURPROC) 702 ptmp = l->l_proc; 703 else if ((ptmp = pfind(pid)) == NULL) 704 error = ESRCH; 705 706 *p2 = ptmp; 707 return (error); 708 } 709 710 /* 711 * sysctl helper routine for setting a process's specific corefile 712 * name. picks the process based on the given pid and checks the 713 * correctness of the new value. 714 */ 715 static int 716 sysctl_proc_corename(SYSCTLFN_ARGS) 717 { 718 struct proc *ptmp; 719 struct plimit *lim; 720 int error = 0, len; 721 char *cname; 722 char *ocore; 723 char *tmp; 724 struct sysctlnode node; 725 726 /* 727 * is this all correct? 728 */ 729 if (namelen != 0) 730 return (EINVAL); 731 if (name[-1] != PROC_PID_CORENAME) 732 return (EINVAL); 733 734 /* 735 * whom are we tweaking? 736 */ 737 error = sysctl_proc_findproc(l, &ptmp, (pid_t)name[-2]); 738 if (error) 739 return (error); 740 741 /* XXX-elad */ 742 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, ptmp, 743 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); 744 if (error) 745 return (error); 746 747 if (newp == NULL) { 748 error = kauth_authorize_process(l->l_cred, 749 KAUTH_PROCESS_CORENAME, ptmp, 750 KAUTH_ARG(KAUTH_REQ_PROCESS_CORENAME_GET), NULL, NULL); 751 if (error) 752 return (error); 753 } 754 755 /* 756 * let them modify a temporary copy of the core name 757 */ 758 cname = PNBUF_GET(); 759 lim = ptmp->p_limit; 760 mutex_enter(&lim->pl_lock); 761 strlcpy(cname, lim->pl_corename, MAXPATHLEN); 762 mutex_exit(&lim->pl_lock); 763 764 node = *rnode; 765 node.sysctl_data = cname; 766 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 767 768 /* 769 * if that failed, or they have nothing new to say, or we've 770 * heard it before... 771 */ 772 if (error || newp == NULL) 773 goto done; 774 lim = ptmp->p_limit; 775 mutex_enter(&lim->pl_lock); 776 error = strcmp(cname, lim->pl_corename); 777 mutex_exit(&lim->pl_lock); 778 if (error == 0) 779 /* Unchanged */ 780 goto done; 781 782 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CORENAME, 783 ptmp, KAUTH_ARG(KAUTH_REQ_PROCESS_CORENAME_SET), cname, NULL); 784 if (error) 785 return (error); 786 787 /* 788 * no error yet and cname now has the new core name in it. 789 * let's see if it looks acceptable. it must be either "core" 790 * or end in ".core" or "/core". 791 */ 792 len = strlen(cname); 793 if (len < 4) { 794 error = EINVAL; 795 } else if (strcmp(cname + len - 4, "core") != 0) { 796 error = EINVAL; 797 } else if (len > 4 && cname[len - 5] != '/' && cname[len - 5] != '.') { 798 error = EINVAL; 799 } 800 if (error != 0) { 801 goto done; 802 } 803 804 /* 805 * hmm...looks good. now...where do we put it? 806 */ 807 tmp = malloc(len + 1, M_TEMP, M_WAITOK|M_CANFAIL); 808 if (tmp == NULL) { 809 error = ENOMEM; 810 goto done; 811 } 812 memcpy(tmp, cname, len + 1); 813 814 lim_privatise(ptmp, false); 815 lim = ptmp->p_limit; 816 mutex_enter(&lim->pl_lock); 817 ocore = lim->pl_corename; 818 lim->pl_corename = tmp; 819 mutex_exit(&lim->pl_lock); 820 if (ocore != defcorename) 821 free(ocore, M_TEMP); 822 823 done: 824 PNBUF_PUT(cname); 825 return error; 826 } 827 828 /* 829 * sysctl helper routine for checking/setting a process's stop flags, 830 * one for fork and one for exec. 831 */ 832 static int 833 sysctl_proc_stop(SYSCTLFN_ARGS) 834 { 835 struct proc *ptmp; 836 int i, f, error = 0; 837 struct sysctlnode node; 838 839 if (namelen != 0) 840 return (EINVAL); 841 842 error = sysctl_proc_findproc(l, &ptmp, (pid_t)name[-2]); 843 if (error) 844 return (error); 845 846 /* XXX-elad */ 847 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, ptmp, 848 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); 849 if (error) 850 return (error); 851 852 switch (rnode->sysctl_num) { 853 case PROC_PID_STOPFORK: 854 f = PS_STOPFORK; 855 break; 856 case PROC_PID_STOPEXEC: 857 f = PS_STOPEXEC; 858 break; 859 case PROC_PID_STOPEXIT: 860 f = PS_STOPEXIT; 861 break; 862 default: 863 return (EINVAL); 864 } 865 866 i = (ptmp->p_flag & f) ? 1 : 0; 867 node = *rnode; 868 node.sysctl_data = &i; 869 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 870 if (error || newp == NULL) 871 return (error); 872 873 mutex_enter(ptmp->p_lock); 874 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_STOPFLAG, 875 ptmp, KAUTH_ARG(f), NULL, NULL); 876 if (error) 877 return (error); 878 if (i) 879 ptmp->p_sflag |= f; 880 else 881 ptmp->p_sflag &= ~f; 882 mutex_exit(ptmp->p_lock); 883 884 return (0); 885 } 886 887 /* 888 * sysctl helper routine for a process's rlimits as exposed by sysctl. 889 */ 890 static int 891 sysctl_proc_plimit(SYSCTLFN_ARGS) 892 { 893 struct proc *ptmp; 894 u_int limitno; 895 int which, error = 0; 896 struct rlimit alim; 897 struct sysctlnode node; 898 899 if (namelen != 0) 900 return (EINVAL); 901 902 which = name[-1]; 903 if (which != PROC_PID_LIMIT_TYPE_SOFT && 904 which != PROC_PID_LIMIT_TYPE_HARD) 905 return (EINVAL); 906 907 limitno = name[-2] - 1; 908 if (limitno >= RLIM_NLIMITS) 909 return (EINVAL); 910 911 if (name[-3] != PROC_PID_LIMIT) 912 return (EINVAL); 913 914 error = sysctl_proc_findproc(l, &ptmp, (pid_t)name[-4]); 915 if (error) 916 return (error); 917 918 /* XXX-elad */ 919 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, ptmp, 920 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); 921 if (error) 922 return (error); 923 924 /* Check if we can view limits. */ 925 if (newp == NULL) { 926 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT, 927 ptmp, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_GET), &alim, 928 KAUTH_ARG(which)); 929 if (error) 930 return (error); 931 } 932 933 node = *rnode; 934 memcpy(&alim, &ptmp->p_rlimit[limitno], sizeof(alim)); 935 if (which == PROC_PID_LIMIT_TYPE_HARD) 936 node.sysctl_data = &alim.rlim_max; 937 else 938 node.sysctl_data = &alim.rlim_cur; 939 940 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 941 if (error || newp == NULL) 942 return (error); 943 944 return (dosetrlimit(l, ptmp, limitno, &alim)); 945 } 946 947 /* 948 * and finally, the actually glue that sticks it to the tree 949 */ 950 SYSCTL_SETUP(sysctl_proc_setup, "sysctl proc subtree setup") 951 { 952 953 sysctl_createv(clog, 0, NULL, NULL, 954 CTLFLAG_PERMANENT, 955 CTLTYPE_NODE, "proc", NULL, 956 NULL, 0, NULL, 0, 957 CTL_PROC, CTL_EOL); 958 sysctl_createv(clog, 0, NULL, NULL, 959 CTLFLAG_PERMANENT|CTLFLAG_ANYNUMBER, 960 CTLTYPE_NODE, "curproc", 961 SYSCTL_DESCR("Per-process settings"), 962 NULL, 0, NULL, 0, 963 CTL_PROC, PROC_CURPROC, CTL_EOL); 964 965 sysctl_createv(clog, 0, NULL, NULL, 966 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, 967 CTLTYPE_STRING, "corename", 968 SYSCTL_DESCR("Core file name"), 969 sysctl_proc_corename, 0, NULL, MAXPATHLEN, 970 CTL_PROC, PROC_CURPROC, PROC_PID_CORENAME, CTL_EOL); 971 sysctl_createv(clog, 0, NULL, NULL, 972 CTLFLAG_PERMANENT, 973 CTLTYPE_NODE, "rlimit", 974 SYSCTL_DESCR("Process limits"), 975 NULL, 0, NULL, 0, 976 CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, CTL_EOL); 977 978 #define create_proc_plimit(s, n) do { \ 979 sysctl_createv(clog, 0, NULL, NULL, \ 980 CTLFLAG_PERMANENT, \ 981 CTLTYPE_NODE, s, \ 982 SYSCTL_DESCR("Process " s " limits"), \ 983 NULL, 0, NULL, 0, \ 984 CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n, \ 985 CTL_EOL); \ 986 sysctl_createv(clog, 0, NULL, NULL, \ 987 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, \ 988 CTLTYPE_QUAD, "soft", \ 989 SYSCTL_DESCR("Process soft " s " limit"), \ 990 sysctl_proc_plimit, 0, NULL, 0, \ 991 CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n, \ 992 PROC_PID_LIMIT_TYPE_SOFT, CTL_EOL); \ 993 sysctl_createv(clog, 0, NULL, NULL, \ 994 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, \ 995 CTLTYPE_QUAD, "hard", \ 996 SYSCTL_DESCR("Process hard " s " limit"), \ 997 sysctl_proc_plimit, 0, NULL, 0, \ 998 CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n, \ 999 PROC_PID_LIMIT_TYPE_HARD, CTL_EOL); \ 1000 } while (0/*CONSTCOND*/) 1001 1002 create_proc_plimit("cputime", PROC_PID_LIMIT_CPU); 1003 create_proc_plimit("filesize", PROC_PID_LIMIT_FSIZE); 1004 create_proc_plimit("datasize", PROC_PID_LIMIT_DATA); 1005 create_proc_plimit("stacksize", PROC_PID_LIMIT_STACK); 1006 create_proc_plimit("coredumpsize", PROC_PID_LIMIT_CORE); 1007 create_proc_plimit("memoryuse", PROC_PID_LIMIT_RSS); 1008 create_proc_plimit("memorylocked", PROC_PID_LIMIT_MEMLOCK); 1009 create_proc_plimit("maxproc", PROC_PID_LIMIT_NPROC); 1010 create_proc_plimit("descriptors", PROC_PID_LIMIT_NOFILE); 1011 create_proc_plimit("sbsize", PROC_PID_LIMIT_SBSIZE); 1012 1013 #undef create_proc_plimit 1014 1015 sysctl_createv(clog, 0, NULL, NULL, 1016 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, 1017 CTLTYPE_INT, "stopfork", 1018 SYSCTL_DESCR("Stop process at fork(2)"), 1019 sysctl_proc_stop, 0, NULL, 0, 1020 CTL_PROC, PROC_CURPROC, PROC_PID_STOPFORK, CTL_EOL); 1021 sysctl_createv(clog, 0, NULL, NULL, 1022 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, 1023 CTLTYPE_INT, "stopexec", 1024 SYSCTL_DESCR("Stop process at execve(2)"), 1025 sysctl_proc_stop, 0, NULL, 0, 1026 CTL_PROC, PROC_CURPROC, PROC_PID_STOPEXEC, CTL_EOL); 1027 sysctl_createv(clog, 0, NULL, NULL, 1028 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, 1029 CTLTYPE_INT, "stopexit", 1030 SYSCTL_DESCR("Stop process before completing exit"), 1031 sysctl_proc_stop, 0, NULL, 0, 1032 CTL_PROC, PROC_CURPROC, PROC_PID_STOPEXIT, CTL_EOL); 1033 } 1034 1035 void 1036 uid_init(void) 1037 { 1038 1039 /* 1040 * Ensure that uid 0 is always in the user hash table, as 1041 * sbreserve() expects it available from interrupt context. 1042 */ 1043 (void)uid_find(0); 1044 } 1045 1046 struct uidinfo * 1047 uid_find(uid_t uid) 1048 { 1049 struct uidinfo *uip, *uip_first, *newuip; 1050 struct uihashhead *uipp; 1051 1052 uipp = UIHASH(uid); 1053 newuip = NULL; 1054 1055 /* 1056 * To make insertion atomic, abstraction of SLIST will be violated. 1057 */ 1058 uip_first = uipp->slh_first; 1059 again: 1060 SLIST_FOREACH(uip, uipp, ui_hash) { 1061 if (uip->ui_uid != uid) 1062 continue; 1063 if (newuip != NULL) 1064 kmem_free(newuip, sizeof(*newuip)); 1065 return uip; 1066 } 1067 if (newuip == NULL) 1068 newuip = kmem_zalloc(sizeof(*newuip), KM_SLEEP); 1069 newuip->ui_uid = uid; 1070 1071 /* 1072 * If atomic insert is unsuccessful, another thread might be 1073 * allocated this 'uid', thus full re-check is needed. 1074 */ 1075 newuip->ui_hash.sle_next = uip_first; 1076 membar_producer(); 1077 uip = atomic_cas_ptr(&uipp->slh_first, uip_first, newuip); 1078 if (uip != uip_first) { 1079 uip_first = uip; 1080 goto again; 1081 } 1082 1083 return newuip; 1084 } 1085 1086 /* 1087 * Change the count associated with number of processes 1088 * a given user is using. 1089 */ 1090 int 1091 chgproccnt(uid_t uid, int diff) 1092 { 1093 struct uidinfo *uip; 1094 long proccnt; 1095 1096 uip = uid_find(uid); 1097 proccnt = atomic_add_long_nv(&uip->ui_proccnt, diff); 1098 KASSERT(proccnt >= 0); 1099 return proccnt; 1100 } 1101 1102 int 1103 chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t xmax) 1104 { 1105 rlim_t nsb; 1106 const long diff = to - *hiwat; 1107 1108 nsb = atomic_add_long_nv((long *)&uip->ui_sbsize, diff); 1109 if (diff > 0 && nsb > xmax) { 1110 atomic_add_long((long *)&uip->ui_sbsize, -diff); 1111 return 0; 1112 } 1113 *hiwat = to; 1114 KASSERT(nsb >= 0); 1115 return 1; 1116 } 1117