1 /* $NetBSD: kern_resource.c,v 1.68 2003/01/18 10:06:29 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 1982, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)kern_resource.c 8.8 (Berkeley) 2/14/95 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: kern_resource.c,v 1.68 2003/01/18 10:06:29 thorpej Exp $"); 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/file.h> 50 #include <sys/resourcevar.h> 51 #include <sys/malloc.h> 52 #include <sys/pool.h> 53 #include <sys/proc.h> 54 55 #include <sys/mount.h> 56 #include <sys/sa.h> 57 #include <sys/syscallargs.h> 58 59 #include <uvm/uvm_extern.h> 60 61 /* 62 * Maximum process data and stack limits. 63 * They are variables so they are patchable. 64 * 65 * XXXX Do we really need them to be patchable? 66 */ 67 rlim_t maxdmap = MAXDSIZ; 68 rlim_t maxsmap = MAXSSIZ; 69 70 /* 71 * Resource controls and accounting. 72 */ 73 74 int 75 sys_getpriority(l, v, retval) 76 struct lwp *l; 77 void *v; 78 register_t *retval; 79 { 80 struct sys_getpriority_args /* { 81 syscallarg(int) which; 82 syscallarg(int) who; 83 } */ *uap = v; 84 struct proc *curp = l->l_proc, *p; 85 int low = NZERO + PRIO_MAX + 1; 86 87 switch (SCARG(uap, which)) { 88 89 case PRIO_PROCESS: 90 if (SCARG(uap, who) == 0) 91 p = curp; 92 else 93 p = pfind(SCARG(uap, who)); 94 if (p == 0) 95 break; 96 low = p->p_nice; 97 break; 98 99 case PRIO_PGRP: { 100 struct pgrp *pg; 101 102 if (SCARG(uap, who) == 0) 103 pg = curp->p_pgrp; 104 else if ((pg = pgfind(SCARG(uap, who))) == NULL) 105 break; 106 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 107 if (p->p_nice < low) 108 low = p->p_nice; 109 } 110 break; 111 } 112 113 case PRIO_USER: 114 if (SCARG(uap, who) == 0) 115 SCARG(uap, who) = curp->p_ucred->cr_uid; 116 proclist_lock_read(); 117 LIST_FOREACH(p, &allproc, p_list) { 118 if (p->p_ucred->cr_uid == (uid_t) SCARG(uap, who) && 119 p->p_nice < low) 120 low = p->p_nice; 121 } 122 proclist_unlock_read(); 123 break; 124 125 default: 126 return (EINVAL); 127 } 128 if (low == NZERO + PRIO_MAX + 1) 129 return (ESRCH); 130 *retval = low - NZERO; 131 return (0); 132 } 133 134 /* ARGSUSED */ 135 int 136 sys_setpriority(l, v, retval) 137 struct lwp *l; 138 void *v; 139 register_t *retval; 140 { 141 struct sys_setpriority_args /* { 142 syscallarg(int) which; 143 syscallarg(int) who; 144 syscallarg(int) prio; 145 } */ *uap = v; 146 struct proc *curp = l->l_proc, *p; 147 int found = 0, error = 0; 148 149 switch (SCARG(uap, which)) { 150 151 case PRIO_PROCESS: 152 if (SCARG(uap, who) == 0) 153 p = curp; 154 else 155 p = pfind(SCARG(uap, who)); 156 if (p == 0) 157 break; 158 error = donice(curp, p, SCARG(uap, prio)); 159 found++; 160 break; 161 162 case PRIO_PGRP: { 163 struct pgrp *pg; 164 165 if (SCARG(uap, who) == 0) 166 pg = curp->p_pgrp; 167 else if ((pg = pgfind(SCARG(uap, who))) == NULL) 168 break; 169 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 170 error = donice(curp, p, SCARG(uap, prio)); 171 found++; 172 } 173 break; 174 } 175 176 case PRIO_USER: 177 if (SCARG(uap, who) == 0) 178 SCARG(uap, who) = curp->p_ucred->cr_uid; 179 proclist_lock_read(); 180 LIST_FOREACH(p, &allproc, p_list) { 181 if (p->p_ucred->cr_uid == (uid_t) SCARG(uap, who)) { 182 error = donice(curp, p, SCARG(uap, prio)); 183 found++; 184 } 185 } 186 proclist_unlock_read(); 187 break; 188 189 default: 190 return (EINVAL); 191 } 192 if (found == 0) 193 return (ESRCH); 194 return (error); 195 } 196 197 int 198 donice(curp, chgp, n) 199 struct proc *curp, *chgp; 200 int n; 201 { 202 struct pcred *pcred = curp->p_cred; 203 int s; 204 205 if (pcred->pc_ucred->cr_uid && pcred->p_ruid && 206 pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid && 207 pcred->p_ruid != chgp->p_ucred->cr_uid) 208 return (EPERM); 209 if (n > PRIO_MAX) 210 n = PRIO_MAX; 211 if (n < PRIO_MIN) 212 n = PRIO_MIN; 213 n += NZERO; 214 if (n < chgp->p_nice && suser(pcred->pc_ucred, &curp->p_acflag)) 215 return (EACCES); 216 chgp->p_nice = n; 217 SCHED_LOCK(s); 218 (void)resetprocpriority(chgp); 219 SCHED_UNLOCK(s); 220 return (0); 221 } 222 223 /* ARGSUSED */ 224 int 225 sys_setrlimit(l, v, retval) 226 struct lwp *l; 227 void *v; 228 register_t *retval; 229 { 230 struct sys_setrlimit_args /* { 231 syscallarg(int) which; 232 syscallarg(const struct rlimit *) rlp; 233 } */ *uap = v; 234 struct proc *p = l->l_proc; 235 int which = SCARG(uap, which); 236 struct rlimit alim; 237 int error; 238 239 error = copyin(SCARG(uap, rlp), &alim, sizeof(struct rlimit)); 240 if (error) 241 return (error); 242 return (dosetrlimit(p, p->p_cred, which, &alim)); 243 } 244 245 int 246 dosetrlimit(p, cred, which, limp) 247 struct proc *p; 248 struct pcred *cred; 249 int which; 250 struct rlimit *limp; 251 { 252 struct rlimit *alimp; 253 struct plimit *newplim; 254 int error; 255 256 if ((u_int)which >= RLIM_NLIMITS) 257 return (EINVAL); 258 259 if (limp->rlim_cur < 0 || limp->rlim_max < 0) 260 return (EINVAL); 261 262 alimp = &p->p_rlimit[which]; 263 /* if we don't change the value, no need to limcopy() */ 264 if (limp->rlim_cur == alimp->rlim_cur && 265 limp->rlim_max == alimp->rlim_max) 266 return 0; 267 268 if (limp->rlim_cur > limp->rlim_max) { 269 /* 270 * This is programming error. According to SUSv2, we should 271 * return error in this case. 272 */ 273 return (EINVAL); 274 } 275 if (limp->rlim_max > alimp->rlim_max 276 && (error = suser(cred->pc_ucred, &p->p_acflag)) != 0) 277 return (error); 278 279 if (p->p_limit->p_refcnt > 1 && 280 (p->p_limit->p_lflags & PL_SHAREMOD) == 0) { 281 newplim = limcopy(p->p_limit); 282 limfree(p->p_limit); 283 p->p_limit = newplim; 284 alimp = &p->p_rlimit[which]; 285 } 286 287 switch (which) { 288 289 case RLIMIT_DATA: 290 if (limp->rlim_cur > maxdmap) 291 limp->rlim_cur = maxdmap; 292 if (limp->rlim_max > maxdmap) 293 limp->rlim_max = maxdmap; 294 break; 295 296 case RLIMIT_STACK: 297 if (limp->rlim_cur > maxsmap) 298 limp->rlim_cur = maxsmap; 299 if (limp->rlim_max > maxsmap) 300 limp->rlim_max = maxsmap; 301 302 /* 303 * Return EINVAL if the new stack size limit is lower than 304 * current usage. Otherwise, the process would get SIGSEGV the 305 * moment it would try to access anything on it's current stack. 306 * This conforms to SUSv2. 307 */ 308 if (limp->rlim_cur < p->p_vmspace->vm_ssize * PAGE_SIZE 309 || limp->rlim_max < p->p_vmspace->vm_ssize * PAGE_SIZE) 310 return (EINVAL); 311 312 /* 313 * Stack is allocated to the max at exec time with 314 * only "rlim_cur" bytes accessible (In other words, 315 * allocates stack dividing two contiguous regions at 316 * "rlim_cur" bytes boundary). 317 * 318 * Since allocation is done in terms of page, roundup 319 * "rlim_cur" (otherwise, contiguous regions 320 * overlap). If stack limit is going up make more 321 * accessible, if going down make inaccessible. 322 */ 323 limp->rlim_cur = round_page(limp->rlim_cur); 324 if (limp->rlim_cur != alimp->rlim_cur) { 325 vaddr_t addr; 326 vsize_t size; 327 vm_prot_t prot; 328 329 if (limp->rlim_cur > alimp->rlim_cur) { 330 prot = VM_PROT_ALL; 331 size = limp->rlim_cur - alimp->rlim_cur; 332 addr = USRSTACK - limp->rlim_cur; 333 } else { 334 prot = VM_PROT_NONE; 335 size = alimp->rlim_cur - limp->rlim_cur; 336 addr = USRSTACK - alimp->rlim_cur; 337 } 338 (void) uvm_map_protect(&p->p_vmspace->vm_map, 339 addr, addr+size, prot, FALSE); 340 } 341 break; 342 343 case RLIMIT_NOFILE: 344 if (limp->rlim_cur > maxfiles) 345 limp->rlim_cur = maxfiles; 346 if (limp->rlim_max > maxfiles) 347 limp->rlim_max = maxfiles; 348 break; 349 350 case RLIMIT_NPROC: 351 if (limp->rlim_cur > maxproc) 352 limp->rlim_cur = maxproc; 353 if (limp->rlim_max > maxproc) 354 limp->rlim_max = maxproc; 355 break; 356 } 357 *alimp = *limp; 358 return (0); 359 } 360 361 /* ARGSUSED */ 362 int 363 sys_getrlimit(l, v, retval) 364 struct lwp *l; 365 void *v; 366 register_t *retval; 367 { 368 struct sys_getrlimit_args /* { 369 syscallarg(int) which; 370 syscallarg(struct rlimit *) rlp; 371 } */ *uap = v; 372 struct proc *p = l->l_proc; 373 int which = SCARG(uap, which); 374 375 if ((u_int)which >= RLIM_NLIMITS) 376 return (EINVAL); 377 return (copyout(&p->p_rlimit[which], SCARG(uap, rlp), 378 sizeof(struct rlimit))); 379 } 380 381 /* 382 * Transform the running time and tick information in proc p into user, 383 * system, and interrupt time usage. 384 */ 385 void 386 calcru(p, up, sp, ip) 387 struct proc *p; 388 struct timeval *up; 389 struct timeval *sp; 390 struct timeval *ip; 391 { 392 u_quad_t u, st, ut, it, tot; 393 long sec, usec; 394 int s; 395 struct timeval tv; 396 struct lwp *l; 397 398 s = splstatclock(); 399 st = p->p_sticks; 400 ut = p->p_uticks; 401 it = p->p_iticks; 402 splx(s); 403 404 tot = st + ut + it; 405 if (tot == 0) { 406 up->tv_sec = up->tv_usec = 0; 407 sp->tv_sec = sp->tv_usec = 0; 408 if (ip != NULL) 409 ip->tv_sec = ip->tv_usec = 0; 410 return; 411 } 412 413 sec = p->p_rtime.tv_sec; 414 usec = p->p_rtime.tv_usec; 415 for (l = LIST_FIRST(&p->p_lwps); l != NULL; 416 l = LIST_NEXT(l, l_sibling)) { 417 if (l->l_stat == LSONPROC) { 418 struct schedstate_percpu *spc; 419 420 KDASSERT(l->l_cpu != NULL); 421 spc = &l->l_cpu->ci_schedstate; 422 423 /* 424 * Adjust for the current time slice. This is 425 * actually fairly important since the error 426 * here is on the order of a time quantum, 427 * which is much greater than the sampling 428 * error. 429 */ 430 microtime(&tv); 431 sec += tv.tv_sec - spc->spc_runtime.tv_sec; 432 usec += tv.tv_usec - spc->spc_runtime.tv_usec; 433 434 break; 435 } 436 } 437 u = (u_quad_t) sec * 1000000 + usec; 438 st = (u * st) / tot; 439 sp->tv_sec = st / 1000000; 440 sp->tv_usec = st % 1000000; 441 ut = (u * ut) / tot; 442 up->tv_sec = ut / 1000000; 443 up->tv_usec = ut % 1000000; 444 if (ip != NULL) { 445 it = (u * it) / tot; 446 ip->tv_sec = it / 1000000; 447 ip->tv_usec = it % 1000000; 448 } 449 } 450 451 /* ARGSUSED */ 452 int 453 sys_getrusage(l, v, retval) 454 struct lwp *l; 455 void *v; 456 register_t *retval; 457 { 458 struct sys_getrusage_args /* { 459 syscallarg(int) who; 460 syscallarg(struct rusage *) rusage; 461 } */ *uap = v; 462 struct rusage *rup; 463 struct proc *p = l->l_proc; 464 465 switch (SCARG(uap, who)) { 466 467 case RUSAGE_SELF: 468 rup = &p->p_stats->p_ru; 469 calcru(p, &rup->ru_utime, &rup->ru_stime, NULL); 470 break; 471 472 case RUSAGE_CHILDREN: 473 rup = &p->p_stats->p_cru; 474 break; 475 476 default: 477 return (EINVAL); 478 } 479 return (copyout(rup, SCARG(uap, rusage), sizeof(struct rusage))); 480 } 481 482 void 483 ruadd(ru, ru2) 484 struct rusage *ru, *ru2; 485 { 486 long *ip, *ip2; 487 int i; 488 489 timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime); 490 timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime); 491 if (ru->ru_maxrss < ru2->ru_maxrss) 492 ru->ru_maxrss = ru2->ru_maxrss; 493 ip = &ru->ru_first; ip2 = &ru2->ru_first; 494 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 495 *ip++ += *ip2++; 496 } 497 498 /* 499 * Make a copy of the plimit structure. 500 * We share these structures copy-on-write after fork, 501 * and copy when a limit is changed. 502 */ 503 struct plimit * 504 limcopy(lim) 505 struct plimit *lim; 506 { 507 struct plimit *newlim; 508 509 newlim = pool_get(&plimit_pool, PR_WAITOK); 510 memcpy(newlim->pl_rlimit, lim->pl_rlimit, 511 sizeof(struct rlimit) * RLIM_NLIMITS); 512 if (lim->pl_corename == defcorename) { 513 newlim->pl_corename = defcorename; 514 } else { 515 newlim->pl_corename = malloc(strlen(lim->pl_corename)+1, 516 M_TEMP, M_WAITOK); 517 strcpy(newlim->pl_corename, lim->pl_corename); 518 } 519 newlim->p_lflags = 0; 520 newlim->p_refcnt = 1; 521 return (newlim); 522 } 523 524 void 525 limfree(lim) 526 struct plimit *lim; 527 { 528 529 if (--lim->p_refcnt > 0) 530 return; 531 #ifdef DIAGNOSTIC 532 if (lim->p_refcnt < 0) 533 panic("limfree"); 534 #endif 535 if (lim->pl_corename != defcorename) 536 free(lim->pl_corename, M_TEMP); 537 pool_put(&plimit_pool, lim); 538 } 539 540 struct pstats * 541 pstatscopy(ps) 542 struct pstats *ps; 543 { 544 545 struct pstats *newps; 546 547 newps = pool_get(&pstats_pool, PR_WAITOK); 548 549 memset(&newps->pstat_startzero, 0, 550 (unsigned) ((caddr_t)&newps->pstat_endzero - 551 (caddr_t)&newps->pstat_startzero)); 552 memcpy(&newps->pstat_startcopy, &ps->pstat_startcopy, 553 ((caddr_t)&newps->pstat_endcopy - 554 (caddr_t)&newps->pstat_startcopy)); 555 556 return (newps); 557 558 } 559 560 void 561 pstatsfree(ps) 562 struct pstats *ps; 563 { 564 565 pool_put(&pstats_pool, ps); 566 } 567