1 /* $NetBSD: kern_resource.c,v 1.71 2003/05/16 14:25:03 itojun Exp $ */ 2 3 /*- 4 * Copyright (c) 1982, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)kern_resource.c 8.8 (Berkeley) 2/14/95 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: kern_resource.c,v 1.71 2003/05/16 14:25:03 itojun Exp $"); 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/file.h> 50 #include <sys/resourcevar.h> 51 #include <sys/malloc.h> 52 #include <sys/pool.h> 53 #include <sys/proc.h> 54 55 #include <sys/mount.h> 56 #include <sys/sa.h> 57 #include <sys/syscallargs.h> 58 59 #include <uvm/uvm_extern.h> 60 61 /* 62 * Maximum process data and stack limits. 63 * They are variables so they are patchable. 64 * 65 * XXXX Do we really need them to be patchable? 66 */ 67 rlim_t maxdmap = MAXDSIZ; 68 rlim_t maxsmap = MAXSSIZ; 69 70 /* 71 * Resource controls and accounting. 72 */ 73 74 int 75 sys_getpriority(l, v, retval) 76 struct lwp *l; 77 void *v; 78 register_t *retval; 79 { 80 struct sys_getpriority_args /* { 81 syscallarg(int) which; 82 syscallarg(int) who; 83 } */ *uap = v; 84 struct proc *curp = l->l_proc, *p; 85 int low = NZERO + PRIO_MAX + 1; 86 87 switch (SCARG(uap, which)) { 88 89 case PRIO_PROCESS: 90 if (SCARG(uap, who) == 0) 91 p = curp; 92 else 93 p = pfind(SCARG(uap, who)); 94 if (p == 0) 95 break; 96 low = p->p_nice; 97 break; 98 99 case PRIO_PGRP: { 100 struct pgrp *pg; 101 102 if (SCARG(uap, who) == 0) 103 pg = curp->p_pgrp; 104 else if ((pg = pgfind(SCARG(uap, who))) == NULL) 105 break; 106 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 107 if (p->p_nice < low) 108 low = p->p_nice; 109 } 110 break; 111 } 112 113 case PRIO_USER: 114 if (SCARG(uap, who) == 0) 115 SCARG(uap, who) = curp->p_ucred->cr_uid; 116 proclist_lock_read(); 117 LIST_FOREACH(p, &allproc, p_list) { 118 if (p->p_ucred->cr_uid == (uid_t) SCARG(uap, who) && 119 p->p_nice < low) 120 low = p->p_nice; 121 } 122 proclist_unlock_read(); 123 break; 124 125 default: 126 return (EINVAL); 127 } 128 if (low == NZERO + PRIO_MAX + 1) 129 return (ESRCH); 130 *retval = low - NZERO; 131 return (0); 132 } 133 134 /* ARGSUSED */ 135 int 136 sys_setpriority(l, v, retval) 137 struct lwp *l; 138 void *v; 139 register_t *retval; 140 { 141 struct sys_setpriority_args /* { 142 syscallarg(int) which; 143 syscallarg(int) who; 144 syscallarg(int) prio; 145 } */ *uap = v; 146 struct proc *curp = l->l_proc, *p; 147 int found = 0, error = 0; 148 149 switch (SCARG(uap, which)) { 150 151 case PRIO_PROCESS: 152 if (SCARG(uap, who) == 0) 153 p = curp; 154 else 155 p = pfind(SCARG(uap, who)); 156 if (p == 0) 157 break; 158 error = donice(curp, p, SCARG(uap, prio)); 159 found++; 160 break; 161 162 case PRIO_PGRP: { 163 struct pgrp *pg; 164 165 if (SCARG(uap, who) == 0) 166 pg = curp->p_pgrp; 167 else if ((pg = pgfind(SCARG(uap, who))) == NULL) 168 break; 169 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 170 error = donice(curp, p, SCARG(uap, prio)); 171 found++; 172 } 173 break; 174 } 175 176 case PRIO_USER: 177 if (SCARG(uap, who) == 0) 178 SCARG(uap, who) = curp->p_ucred->cr_uid; 179 proclist_lock_read(); 180 LIST_FOREACH(p, &allproc, p_list) { 181 if (p->p_ucred->cr_uid == (uid_t) SCARG(uap, who)) { 182 error = donice(curp, p, SCARG(uap, prio)); 183 found++; 184 } 185 } 186 proclist_unlock_read(); 187 break; 188 189 default: 190 return (EINVAL); 191 } 192 if (found == 0) 193 return (ESRCH); 194 return (error); 195 } 196 197 int 198 donice(curp, chgp, n) 199 struct proc *curp, *chgp; 200 int n; 201 { 202 struct pcred *pcred = curp->p_cred; 203 int s; 204 205 if (pcred->pc_ucred->cr_uid && pcred->p_ruid && 206 pcred->pc_ucred->cr_uid != chgp->p_ucred->cr_uid && 207 pcred->p_ruid != chgp->p_ucred->cr_uid) 208 return (EPERM); 209 if (n > PRIO_MAX) 210 n = PRIO_MAX; 211 if (n < PRIO_MIN) 212 n = PRIO_MIN; 213 n += NZERO; 214 if (n < chgp->p_nice && suser(pcred->pc_ucred, &curp->p_acflag)) 215 return (EACCES); 216 chgp->p_nice = n; 217 SCHED_LOCK(s); 218 (void)resetprocpriority(chgp); 219 SCHED_UNLOCK(s); 220 return (0); 221 } 222 223 /* ARGSUSED */ 224 int 225 sys_setrlimit(l, v, retval) 226 struct lwp *l; 227 void *v; 228 register_t *retval; 229 { 230 struct sys_setrlimit_args /* { 231 syscallarg(int) which; 232 syscallarg(const struct rlimit *) rlp; 233 } */ *uap = v; 234 struct proc *p = l->l_proc; 235 int which = SCARG(uap, which); 236 struct rlimit alim; 237 int error; 238 239 error = copyin(SCARG(uap, rlp), &alim, sizeof(struct rlimit)); 240 if (error) 241 return (error); 242 return (dosetrlimit(p, p->p_cred, which, &alim)); 243 } 244 245 int 246 dosetrlimit(p, cred, which, limp) 247 struct proc *p; 248 struct pcred *cred; 249 int which; 250 struct rlimit *limp; 251 { 252 struct rlimit *alimp; 253 struct plimit *newplim; 254 int error; 255 256 if ((u_int)which >= RLIM_NLIMITS) 257 return (EINVAL); 258 259 if (limp->rlim_cur < 0 || limp->rlim_max < 0) 260 return (EINVAL); 261 262 alimp = &p->p_rlimit[which]; 263 /* if we don't change the value, no need to limcopy() */ 264 if (limp->rlim_cur == alimp->rlim_cur && 265 limp->rlim_max == alimp->rlim_max) 266 return 0; 267 268 if (limp->rlim_cur > limp->rlim_max) { 269 /* 270 * This is programming error. According to SUSv2, we should 271 * return error in this case. 272 */ 273 return (EINVAL); 274 } 275 if (limp->rlim_max > alimp->rlim_max 276 && (error = suser(cred->pc_ucred, &p->p_acflag)) != 0) 277 return (error); 278 279 if (p->p_limit->p_refcnt > 1 && 280 (p->p_limit->p_lflags & PL_SHAREMOD) == 0) { 281 newplim = limcopy(p->p_limit); 282 limfree(p->p_limit); 283 p->p_limit = newplim; 284 alimp = &p->p_rlimit[which]; 285 } 286 287 switch (which) { 288 289 case RLIMIT_DATA: 290 if (limp->rlim_cur > maxdmap) 291 limp->rlim_cur = maxdmap; 292 if (limp->rlim_max > maxdmap) 293 limp->rlim_max = maxdmap; 294 break; 295 296 case RLIMIT_STACK: 297 if (limp->rlim_cur > maxsmap) 298 limp->rlim_cur = maxsmap; 299 if (limp->rlim_max > maxsmap) 300 limp->rlim_max = maxsmap; 301 302 /* 303 * Return EINVAL if the new stack size limit is lower than 304 * current usage. Otherwise, the process would get SIGSEGV the 305 * moment it would try to access anything on it's current stack. 306 * This conforms to SUSv2. 307 */ 308 if (limp->rlim_cur < p->p_vmspace->vm_ssize * PAGE_SIZE 309 || limp->rlim_max < p->p_vmspace->vm_ssize * PAGE_SIZE) 310 return (EINVAL); 311 312 /* 313 * Stack is allocated to the max at exec time with 314 * only "rlim_cur" bytes accessible (In other words, 315 * allocates stack dividing two contiguous regions at 316 * "rlim_cur" bytes boundary). 317 * 318 * Since allocation is done in terms of page, roundup 319 * "rlim_cur" (otherwise, contiguous regions 320 * overlap). If stack limit is going up make more 321 * accessible, if going down make inaccessible. 322 */ 323 limp->rlim_cur = round_page(limp->rlim_cur); 324 if (limp->rlim_cur != alimp->rlim_cur) { 325 vaddr_t addr; 326 vsize_t size; 327 vm_prot_t prot; 328 329 if (limp->rlim_cur > alimp->rlim_cur) { 330 prot = VM_PROT_ALL; 331 size = limp->rlim_cur - alimp->rlim_cur; 332 addr = USRSTACK - limp->rlim_cur; 333 } else { 334 prot = VM_PROT_NONE; 335 size = alimp->rlim_cur - limp->rlim_cur; 336 addr = USRSTACK - alimp->rlim_cur; 337 } 338 (void) uvm_map_protect(&p->p_vmspace->vm_map, 339 addr, addr+size, prot, FALSE); 340 } 341 break; 342 343 case RLIMIT_NOFILE: 344 if (limp->rlim_cur > maxfiles) 345 limp->rlim_cur = maxfiles; 346 if (limp->rlim_max > maxfiles) 347 limp->rlim_max = maxfiles; 348 break; 349 350 case RLIMIT_NPROC: 351 if (limp->rlim_cur > maxproc) 352 limp->rlim_cur = maxproc; 353 if (limp->rlim_max > maxproc) 354 limp->rlim_max = maxproc; 355 break; 356 } 357 *alimp = *limp; 358 return (0); 359 } 360 361 /* ARGSUSED */ 362 int 363 sys_getrlimit(l, v, retval) 364 struct lwp *l; 365 void *v; 366 register_t *retval; 367 { 368 struct sys_getrlimit_args /* { 369 syscallarg(int) which; 370 syscallarg(struct rlimit *) rlp; 371 } */ *uap = v; 372 struct proc *p = l->l_proc; 373 int which = SCARG(uap, which); 374 375 if ((u_int)which >= RLIM_NLIMITS) 376 return (EINVAL); 377 return (copyout(&p->p_rlimit[which], SCARG(uap, rlp), 378 sizeof(struct rlimit))); 379 } 380 381 /* 382 * Transform the running time and tick information in proc p into user, 383 * system, and interrupt time usage. 384 */ 385 void 386 calcru(p, up, sp, ip) 387 struct proc *p; 388 struct timeval *up; 389 struct timeval *sp; 390 struct timeval *ip; 391 { 392 u_quad_t u, st, ut, it, tot; 393 unsigned long sec; 394 long usec; 395 int s; 396 struct timeval tv; 397 struct lwp *l; 398 399 s = splstatclock(); 400 st = p->p_sticks; 401 ut = p->p_uticks; 402 it = p->p_iticks; 403 splx(s); 404 405 sec = p->p_rtime.tv_sec; 406 usec = p->p_rtime.tv_usec; 407 LIST_FOREACH(l, &p->p_lwps, l_sibling) { 408 if (l->l_stat == LSONPROC) { 409 struct schedstate_percpu *spc; 410 411 KDASSERT(l->l_cpu != NULL); 412 spc = &l->l_cpu->ci_schedstate; 413 414 /* 415 * Adjust for the current time slice. This is 416 * actually fairly important since the error 417 * here is on the order of a time quantum, 418 * which is much greater than the sampling 419 * error. 420 */ 421 microtime(&tv); 422 sec += tv.tv_sec - spc->spc_runtime.tv_sec; 423 usec += tv.tv_usec - spc->spc_runtime.tv_usec; 424 } 425 } 426 427 tot = st + ut + it; 428 u = sec * 1000000ull + usec; 429 430 if (tot == 0) { 431 /* No ticks, so can't use to share time out, split 50-50 */ 432 st = ut = u / 2; 433 } else { 434 st = (u * st) / tot; 435 ut = (u * ut) / tot; 436 } 437 sp->tv_sec = st / 1000000; 438 sp->tv_usec = st % 1000000; 439 up->tv_sec = ut / 1000000; 440 up->tv_usec = ut % 1000000; 441 if (ip != NULL) { 442 if (it != 0) 443 it = (u * it) / tot; 444 ip->tv_sec = it / 1000000; 445 ip->tv_usec = it % 1000000; 446 } 447 } 448 449 /* ARGSUSED */ 450 int 451 sys_getrusage(l, v, retval) 452 struct lwp *l; 453 void *v; 454 register_t *retval; 455 { 456 struct sys_getrusage_args /* { 457 syscallarg(int) who; 458 syscallarg(struct rusage *) rusage; 459 } */ *uap = v; 460 struct rusage *rup; 461 struct proc *p = l->l_proc; 462 463 switch (SCARG(uap, who)) { 464 465 case RUSAGE_SELF: 466 rup = &p->p_stats->p_ru; 467 calcru(p, &rup->ru_utime, &rup->ru_stime, NULL); 468 break; 469 470 case RUSAGE_CHILDREN: 471 rup = &p->p_stats->p_cru; 472 break; 473 474 default: 475 return (EINVAL); 476 } 477 return (copyout(rup, SCARG(uap, rusage), sizeof(struct rusage))); 478 } 479 480 void 481 ruadd(ru, ru2) 482 struct rusage *ru, *ru2; 483 { 484 long *ip, *ip2; 485 int i; 486 487 timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime); 488 timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime); 489 if (ru->ru_maxrss < ru2->ru_maxrss) 490 ru->ru_maxrss = ru2->ru_maxrss; 491 ip = &ru->ru_first; ip2 = &ru2->ru_first; 492 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) 493 *ip++ += *ip2++; 494 } 495 496 /* 497 * Make a copy of the plimit structure. 498 * We share these structures copy-on-write after fork, 499 * and copy when a limit is changed. 500 */ 501 struct plimit * 502 limcopy(lim) 503 struct plimit *lim; 504 { 505 struct plimit *newlim; 506 size_t l; 507 508 newlim = pool_get(&plimit_pool, PR_WAITOK); 509 memcpy(newlim->pl_rlimit, lim->pl_rlimit, 510 sizeof(struct rlimit) * RLIM_NLIMITS); 511 if (lim->pl_corename == defcorename) { 512 newlim->pl_corename = defcorename; 513 } else { 514 l = strlen(lim->pl_corename) + 1; 515 newlim->pl_corename = malloc(l, M_TEMP, M_WAITOK); 516 strlcpy(newlim->pl_corename, lim->pl_corename, l); 517 } 518 newlim->p_lflags = 0; 519 newlim->p_refcnt = 1; 520 return (newlim); 521 } 522 523 void 524 limfree(lim) 525 struct plimit *lim; 526 { 527 528 if (--lim->p_refcnt > 0) 529 return; 530 #ifdef DIAGNOSTIC 531 if (lim->p_refcnt < 0) 532 panic("limfree"); 533 #endif 534 if (lim->pl_corename != defcorename) 535 free(lim->pl_corename, M_TEMP); 536 pool_put(&plimit_pool, lim); 537 } 538 539 struct pstats * 540 pstatscopy(ps) 541 struct pstats *ps; 542 { 543 544 struct pstats *newps; 545 546 newps = pool_get(&pstats_pool, PR_WAITOK); 547 548 memset(&newps->pstat_startzero, 0, 549 (unsigned) ((caddr_t)&newps->pstat_endzero - 550 (caddr_t)&newps->pstat_startzero)); 551 memcpy(&newps->pstat_startcopy, &ps->pstat_startcopy, 552 ((caddr_t)&newps->pstat_endcopy - 553 (caddr_t)&newps->pstat_startcopy)); 554 555 return (newps); 556 557 } 558 559 void 560 pstatsfree(ps) 561 struct pstats *ps; 562 { 563 564 pool_put(&pstats_pool, ps); 565 } 566