1 /* $NetBSD: kern_resource.c,v 1.73 2003/08/24 17:52:47 chs Exp $ */ 2 3 /*- 4 * Copyright (c) 1982, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)kern_resource.c 8.8 (Berkeley) 2/14/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: kern_resource.c,v 1.73 2003/08/24 17:52:47 chs Exp $"); 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/kernel.h> 45 #include <sys/file.h> 46 #include <sys/resourcevar.h> 47 #include <sys/malloc.h> 48 #include <sys/pool.h> 49 #include <sys/proc.h> 50 51 #include <sys/mount.h> 52 #include <sys/sa.h> 53 #include <sys/syscallargs.h> 54 55 #include <uvm/uvm_extern.h> 56 57 /* 58 * Maximum process data and stack limits. 59 * They are variables so they are patchable. 60 * 61 * XXXX Do we really need them to be patchable? 62 */ 63 rlim_t maxdmap = MAXDSIZ; 64 rlim_t maxsmap = MAXSSIZ; 65 66 /* 67 * Resource controls and accounting. 68 */ 69 70 int 71 sys_getpriority(l, v, retval) 72 struct lwp *l; 73 void *v; 74 register_t *retval; 75 { 76 struct sys_getpriority_args /* { 77 syscallarg(int) which; 78 syscallarg(int) who; 79 } */ *uap = v; 80 struct proc *curp = l->l_proc, *p; 81 int low = NZERO + PRIO_MAX + 1; 82 83 switch (SCARG(uap, which)) { 84 85 case PRIO_PROCESS: 86 if (SCARG(uap, who) == 0) 87 p = curp; 88 else 89 p = pfind(SCARG(uap, who)); 90 if (p == 0) 91 break; 92 low = p->p_nice; 93 break; 94 95 case PRIO_PGRP: { 96 struct pgrp *pg; 97 98 if (SCARG(uap, who) == 0) 99 pg = curp->p_pgrp; 100 else if ((pg = pgfind(SCARG(uap, who))) == NULL) 101 break; 102 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 103 if (p->p_nice < low) 104 low = p->p_nice; 105 } 106 break; 107 } 108 109 case PRIO_USER: 110 if (SCARG(uap, who) == 0) 111 SCARG(uap, who) = curp->p_ucred->cr_uid; 112 proclist_lock_read(); 113 LIST_FOREACH(p, &allproc, p_list) { 114 if (p->p_ucred->cr_uid == (uid_t) SCARG(uap, who) && 115 p->p_nice < low) 116 low = p->p_nice; 117 } 118 proclist_unlock_read(); 119 break; 120 121 default: 122 return (EINVAL); 123 } 124 if (low == NZERO + PRIO_MAX + 1) 125 return (ESRCH); 126 *retval = low - NZERO; 127 return (0); 128 } 129 130 /* ARGSUSED */ 131 int 132 sys_setpriority(l, v, retval) 133 struct lwp *l; 134 void *v; 135 register_t *retval; 136 { 137 struct sys_setpriority_args /* { 138 syscallarg(int) which; 139 syscallarg(int) who; 140 syscallarg(int) prio; 141 } */ *uap = v; 142 struct proc *curp = l->l_proc, *p; 143 int found = 0, error = 0; 144 145 switch (SCARG(uap, which)) { 146 147 case PRIO_PROCESS: 148 if (SCARG(uap, who) == 0) 149 p = curp; 150 else 151 p = pfind(SCARG(uap, who)); 152 if (p == 0) 153 break; 154 error = donice(curp, p, SCARG(uap, prio)); 155 found++; 156 break; 157 158 case PRIO_PGRP: { 159 struct pgrp *pg; 160 161 if (SCARG(uap, who) == 0) 162 pg = curp->p_pgrp; 163 else if ((pg = pgfind(SCARG(uap, who))) == NULL) 164 break; 165 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 166 error = donice(curp, p, SCARG(uap, prio)); 167 found++; 168 } 169 break; 170 } 171 172 case PRIO_USER: 173 if (SCARG(uap, who) == 0) 174 SCARG(uap, who) = curp->p_ucred->cr_uid; 175 proclist_lock_read(); 176 LIST_FOREACH(p, &allproc, p_list) { 177 if (p->p_ucred->cr_uid == (uid_t) SCARG(uap, who)) { 178 error = donice(curp, p, SCARG(uap, prio)); 179 found++; 180 } 181 } 182 proclist_unlock_read(); 183 break; 184 185 default: 186 return (EINVAL); 187 } 188 if (found == 0) 189 return (ESRCH); 190 return (error); 191 } 192 193 int 194 donice(curp, chgp, n) 195 struct proc *curp, *chgp; 196 int n; 197 { 198 struct pcred *pcred = curp->p_cred; 199 int s; 200 201 if (pcred->pc_ucred->cr_uid && pcred->p_ruid && 202 pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid && 203 pcred->p_ruid != chgp->p_ucred->cr_uid) 204 return (EPERM); 205 if (n > PRIO_MAX) 206 n = PRIO_MAX; 207 if (n < PRIO_MIN) 208 n = PRIO_MIN; 209 n += NZERO; 210 if (n < chgp->p_nice && suser(pcred->pc_ucred, &curp->p_acflag)) 211 return (EACCES); 212 chgp->p_nice = n; 213 SCHED_LOCK(s); 214 (void)resetprocpriority(chgp); 215 SCHED_UNLOCK(s); 216 return (0); 217 } 218 219 /* ARGSUSED */ 220 int 221 sys_setrlimit(l, v, retval) 222 struct lwp *l; 223 void *v; 224 register_t *retval; 225 { 226 struct sys_setrlimit_args /* { 227 syscallarg(int) which; 228 syscallarg(const struct rlimit *) rlp; 229 } */ *uap = v; 230 struct proc *p = l->l_proc; 231 int which = SCARG(uap, which); 232 struct rlimit alim; 233 int error; 234 235 error = copyin(SCARG(uap, rlp), &alim, sizeof(struct rlimit)); 236 if (error) 237 return (error); 238 return (dosetrlimit(p, p->p_cred, which, &alim)); 239 } 240 241 int 242 dosetrlimit(p, cred, which, limp) 243 struct proc *p; 244 struct pcred *cred; 245 int which; 246 struct rlimit *limp; 247 { 248 struct rlimit *alimp; 249 struct plimit *newplim; 250 int error; 251 252 if ((u_int)which >= RLIM_NLIMITS) 253 return (EINVAL); 254 255 if (limp->rlim_cur < 0 || limp->rlim_max < 0) 256 return (EINVAL); 257 258 alimp = &p->p_rlimit[which]; 259 /* if we don't change the value, no need to limcopy() */ 260 if (limp->rlim_cur == alimp->rlim_cur && 261 limp->rlim_max == alimp->rlim_max) 262 return 0; 263 264 if (limp->rlim_cur > limp->rlim_max) { 265 /* 266 * This is programming error. According to SUSv2, we should 267 * return error in this case. 268 */ 269 return (EINVAL); 270 } 271 if (limp->rlim_max > alimp->rlim_max 272 && (error = suser(cred->pc_ucred, &p->p_acflag)) != 0) 273 return (error); 274 275 if (p->p_limit->p_refcnt > 1 && 276 (p->p_limit->p_lflags & PL_SHAREMOD) == 0) { 277 newplim = limcopy(p->p_limit); 278 limfree(p->p_limit); 279 p->p_limit = newplim; 280 alimp = &p->p_rlimit[which]; 281 } 282 283 switch (which) { 284 285 case RLIMIT_DATA: 286 if (limp->rlim_cur > maxdmap) 287 limp->rlim_cur = maxdmap; 288 if (limp->rlim_max > maxdmap) 289 limp->rlim_max = maxdmap; 290 break; 291 292 case RLIMIT_STACK: 293 if (limp->rlim_cur > maxsmap) 294 limp->rlim_cur = maxsmap; 295 if (limp->rlim_max > maxsmap) 296 limp->rlim_max = maxsmap; 297 298 /* 299 * Return EINVAL if the new stack size limit is lower than 300 * current usage. Otherwise, the process would get SIGSEGV the 301 * moment it would try to access anything on it's current stack. 302 * This conforms to SUSv2. 303 */ 304 if (limp->rlim_cur < p->p_vmspace->vm_ssize * PAGE_SIZE 305 || limp->rlim_max < p->p_vmspace->vm_ssize * PAGE_SIZE) 306 return (EINVAL); 307 308 /* 309 * Stack is allocated to the max at exec time with 310 * only "rlim_cur" bytes accessible (In other words, 311 * allocates stack dividing two contiguous regions at 312 * "rlim_cur" bytes boundary). 313 * 314 * Since allocation is done in terms of page, roundup 315 * "rlim_cur" (otherwise, contiguous regions 316 * overlap). If stack limit is going up make more 317 * accessible, if going down make inaccessible. 318 */ 319 limp->rlim_cur = round_page(limp->rlim_cur); 320 if (limp->rlim_cur != alimp->rlim_cur) { 321 vaddr_t addr; 322 vsize_t size; 323 vm_prot_t prot; 324 325 if (limp->rlim_cur > alimp->rlim_cur) { 326 prot = VM_PROT_READ | VM_PROT_WRITE; 327 size = limp->rlim_cur - alimp->rlim_cur; 328 addr = USRSTACK - limp->rlim_cur; 329 } else { 330 prot = VM_PROT_NONE; 331 size = alimp->rlim_cur - limp->rlim_cur; 332 addr = USRSTACK - alimp->rlim_cur; 333 } 334 (void) uvm_map_protect(&p->p_vmspace->vm_map, 335 addr, addr+size, prot, FALSE); 336 } 337 break; 338 339 case RLIMIT_NOFILE: 340 if (limp->rlim_cur > maxfiles) 341 limp->rlim_cur = maxfiles; 342 if (limp->rlim_max > maxfiles) 343 limp->rlim_max = maxfiles; 344 break; 345 346 case RLIMIT_NPROC: 347 if (limp->rlim_cur > maxproc) 348 limp->rlim_cur = maxproc; 349 if (limp->rlim_max > maxproc) 350 limp->rlim_max = maxproc; 351 break; 352 } 353 *alimp = *limp; 354 return (0); 355 } 356 357 /* ARGSUSED */ 358 int 359 sys_getrlimit(l, v, retval) 360 struct lwp *l; 361 void *v; 362 register_t *retval; 363 { 364 struct sys_getrlimit_args /* { 365 syscallarg(int) which; 366 syscallarg(struct rlimit *) rlp; 367 } */ *uap = v; 368 struct proc *p = l->l_proc; 369 int which = SCARG(uap, which); 370 371 if ((u_int)which >= RLIM_NLIMITS) 372 return (EINVAL); 373 return (copyout(&p->p_rlimit[which], SCARG(uap, rlp), 374 sizeof(struct rlimit))); 375 } 376 377 /* 378 * Transform the running time and tick information in proc p into user, 379 * system, and interrupt time usage. 380 */ 381 void 382 calcru(p, up, sp, ip) 383 struct proc *p; 384 struct timeval *up; 385 struct timeval *sp; 386 struct timeval *ip; 387 { 388 u_quad_t u, st, ut, it, tot; 389 unsigned long sec; 390 long usec; 391 int s; 392 struct timeval tv; 393 struct lwp *l; 394 395 s = splstatclock(); 396 st = p->p_sticks; 397 ut = p->p_uticks; 398 it = p->p_iticks; 399 splx(s); 400 401 sec = p->p_rtime.tv_sec; 402 usec = p->p_rtime.tv_usec; 403 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 404 if (l->l_stat == LSONPROC) { 405 struct schedstate_percpu *spc; 406 407 KDASSERT(l->l_cpu != NULL); 408 spc = &l->l_cpu->ci_schedstate; 409 410 /* 411 * Adjust for the current time slice. This is 412 * actually fairly important since the error 413 * here is on the order of a time quantum, 414 * which is much greater than the sampling 415 * error. 416 */ 417 microtime(&tv); 418 sec += tv.tv_sec - spc->spc_runtime.tv_sec; 419 usec += tv.tv_usec - spc->spc_runtime.tv_usec; 420 } 421 } 422 423 tot = st + ut + it; 424 u = sec * 1000000ull + usec; 425 426 if (tot == 0) { 427 /* No ticks, so can't use to share time out, split 50-50 */ 428 st = ut = u / 2; 429 } else { 430 st = (u * st) / tot; 431 ut = (u * ut) / tot; 432 } 433 sp->tv_sec = st / 1000000; 434 sp->tv_usec = st % 1000000; 435 up->tv_sec = ut / 1000000; 436 up->tv_usec = ut % 1000000; 437 if (ip != NULL) { 438 if (it != 0) 439 it = (u * it) / tot; 440 ip->tv_sec = it / 1000000; 441 ip->tv_usec = it % 1000000; 442 } 443 } 444 445 /* ARGSUSED */ 446 int 447 sys_getrusage(l, v, retval) 448 struct lwp *l; 449 void *v; 450 register_t *retval; 451 { 452 struct sys_getrusage_args /* { 453 syscallarg(int) who; 454 syscallarg(struct rusage *) rusage; 455 } */ *uap = v; 456 struct rusage *rup; 457 struct proc *p = l->l_proc; 458 459 switch (SCARG(uap, who)) { 460 461 case RUSAGE_SELF: 462 rup = &p->p_stats->p_ru; 463 calcru(p, &rup->ru_utime, &rup->ru_stime, NULL); 464 break; 465 466 case RUSAGE_CHILDREN: 467 rup = &p->p_stats->p_cru; 468 break; 469 470 default: 471 return (EINVAL); 472 } 473 return (copyout(rup, SCARG(uap, rusage), sizeof(struct rusage))); 474 } 475 476 void 477 ruadd(ru, ru2) 478 struct rusage *ru, *ru2; 479 { 480 long *ip, *ip2; 481 int i; 482 483 timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime); 484 timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime); 485 if (ru->ru_maxrss < ru2->ru_maxrss) 486 ru->ru_maxrss = ru2->ru_maxrss; 487 ip = &ru->ru_first; ip2 = &ru2->ru_first; 488 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 489 *ip++ += *ip2++; 490 } 491 492 /* 493 * Make a copy of the plimit structure. 494 * We share these structures copy-on-write after fork, 495 * and copy when a limit is changed. 496 */ 497 struct plimit * 498 limcopy(lim) 499 struct plimit *lim; 500 { 501 struct plimit *newlim; 502 size_t l; 503 504 newlim = pool_get(&plimit_pool, PR_WAITOK); 505 memcpy(newlim->pl_rlimit, lim->pl_rlimit, 506 sizeof(struct rlimit) * RLIM_NLIMITS); 507 if (lim->pl_corename == defcorename) { 508 newlim->pl_corename = defcorename; 509 } else { 510 l = strlen(lim->pl_corename) + 1; 511 newlim->pl_corename = malloc(l, M_TEMP, M_WAITOK); 512 strlcpy(newlim->pl_corename, lim->pl_corename, l); 513 } 514 newlim->p_lflags = 0; 515 newlim->p_refcnt = 1; 516 return (newlim); 517 } 518 519 void 520 limfree(lim) 521 struct plimit *lim; 522 { 523 524 if (--lim->p_refcnt > 0) 525 return; 526 #ifdef DIAGNOSTIC 527 if (lim->p_refcnt < 0) 528 panic("limfree"); 529 #endif 530 if (lim->pl_corename != defcorename) 531 free(lim->pl_corename, M_TEMP); 532 pool_put(&plimit_pool, lim); 533 } 534 535 struct pstats * 536 pstatscopy(ps) 537 struct pstats *ps; 538 { 539 540 struct pstats *newps; 541 542 newps = pool_get(&pstats_pool, PR_WAITOK); 543 544 memset(&newps->pstat_startzero, 0, 545 (unsigned) ((caddr_t)&newps->pstat_endzero - 546 (caddr_t)&newps->pstat_startzero)); 547 memcpy(&newps->pstat_startcopy, &ps->pstat_startcopy, 548 ((caddr_t)&newps->pstat_endcopy - 549 (caddr_t)&newps->pstat_startcopy)); 550 551 return (newps); 552 553 } 554 555 void 556 pstatsfree(ps) 557 struct pstats *ps; 558 { 559 560 pool_put(&pstats_pool, ps); 561 } 562