1 /* $NetBSD: sysv_sem.c,v 1.89 2012/03/13 18:40:54 elad Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Implementation of SVID semaphores 35 * 36 * Author: Daniel Boulet 37 * 38 * This software is provided ``AS IS'' without any warranties of any kind. 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: sysv_sem.c,v 1.89 2012/03/13 18:40:54 elad Exp $"); 43 44 #define SYSVSEM 45 46 #include <sys/param.h> 47 #include <sys/kernel.h> 48 #include <sys/sem.h> 49 #include <sys/sysctl.h> 50 #include <sys/kmem.h> 51 #include <sys/mount.h> /* XXX for <sys/syscallargs.h> */ 52 #include <sys/syscallargs.h> 53 #include <sys/kauth.h> 54 55 /* 56 * Memory areas: 57 * 1st: Pool of semaphore identifiers 58 * 2nd: Semaphores 59 * 3rd: Conditional variables 60 * 4th: Undo structures 61 */ 62 struct semid_ds * sema __read_mostly; 63 static struct __sem * sem __read_mostly; 64 static kcondvar_t * semcv __read_mostly; 65 static int * semu __read_mostly; 66 67 static kmutex_t semlock __cacheline_aligned; 68 static bool sem_realloc_state __read_mostly; 69 static kcondvar_t sem_realloc_cv; 70 71 /* 72 * List of active undo structures, total number of semaphores, 73 * and total number of semop waiters. 74 */ 75 static struct sem_undo *semu_list __read_mostly; 76 static u_int semtot __cacheline_aligned; 77 static u_int sem_waiters __cacheline_aligned; 78 79 /* Macro to find a particular sem_undo vector */ 80 #define SEMU(s, ix) ((struct sem_undo *)(((long)s) + ix * seminfo.semusz)) 81 82 #ifdef SEM_DEBUG 83 #define SEM_PRINTF(a) printf a 84 #else 85 #define SEM_PRINTF(a) 86 #endif 87 88 struct sem_undo *semu_alloc(struct proc *); 89 int semundo_adjust(struct proc *, struct sem_undo **, int, int, int); 90 void semundo_clear(int, int); 91 92 void 93 seminit(void) 94 { 95 int i, sz; 96 vaddr_t v; 97 98 mutex_init(&semlock, MUTEX_DEFAULT, IPL_NONE); 99 cv_init(&sem_realloc_cv, "semrealc"); 100 sem_realloc_state = false; 101 semtot = 0; 102 sem_waiters = 0; 103 104 /* Allocate the wired memory for our structures */ 105 sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) + 106 ALIGN(seminfo.semmns * sizeof(struct __sem)) + 107 ALIGN(seminfo.semmni * sizeof(kcondvar_t)) + 108 ALIGN(seminfo.semmnu * seminfo.semusz); 109 sz = round_page(sz); 110 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO); 111 if (v == 0) 112 panic("sysv_sem: cannot allocate memory"); 113 sema = (void *)v; 114 sem = (void *)((uintptr_t)sema + 115 ALIGN(seminfo.semmni * sizeof(struct semid_ds))); 116 semcv = (void *)((uintptr_t)sem + 117 ALIGN(seminfo.semmns * sizeof(struct __sem))); 118 semu = (void *)((uintptr_t)semcv + 119 ALIGN(seminfo.semmni * sizeof(kcondvar_t))); 120 121 for (i = 0; i < seminfo.semmni; i++) { 122 sema[i]._sem_base = 0; 123 sema[i].sem_perm.mode = 0; 124 cv_init(&semcv[i], "semwait"); 125 } 126 for (i = 0; i < seminfo.semmnu; i++) { 127 struct sem_undo *suptr = SEMU(semu, i); 128 suptr->un_proc = NULL; 129 } 130 semu_list = NULL; 131 exithook_establish(semexit, NULL); 132 133 sysvipcinit(); 134 } 135 136 static int 137 semrealloc(int newsemmni, int newsemmns, int newsemmnu) 138 { 139 struct semid_ds *new_sema, *old_sema; 140 struct __sem *new_sem; 141 struct sem_undo *new_semu_list, *suptr, *nsuptr; 142 int *new_semu; 143 kcondvar_t *new_semcv; 144 vaddr_t v; 145 int i, j, lsemid, nmnus, sz; 146 147 if (newsemmni < 1 || newsemmns < 1 || newsemmnu < 1) 148 return EINVAL; 149 150 /* Allocate the wired memory for our structures */ 151 sz = ALIGN(newsemmni * sizeof(struct semid_ds)) + 152 ALIGN(newsemmns * sizeof(struct __sem)) + 153 ALIGN(newsemmni * sizeof(kcondvar_t)) + 154 ALIGN(newsemmnu * seminfo.semusz); 155 sz = round_page(sz); 156 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO); 157 if (v == 0) 158 return ENOMEM; 159 160 mutex_enter(&semlock); 161 if (sem_realloc_state) { 162 mutex_exit(&semlock); 163 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED); 164 return EBUSY; 165 } 166 sem_realloc_state = true; 167 if (sem_waiters) { 168 /* 169 * Mark reallocation state, wake-up all waiters, 170 * and wait while they will all exit. 171 */ 172 for (i = 0; i < seminfo.semmni; i++) 173 cv_broadcast(&semcv[i]); 174 while (sem_waiters) 175 cv_wait(&sem_realloc_cv, &semlock); 176 } 177 old_sema = sema; 178 179 /* Get the number of last slot */ 180 lsemid = 0; 181 for (i = 0; i < seminfo.semmni; i++) 182 if (sema[i].sem_perm.mode & SEM_ALLOC) 183 lsemid = i; 184 185 /* Get the number of currently used undo structures */ 186 nmnus = 0; 187 for (i = 0; i < seminfo.semmnu; i++) { 188 suptr = SEMU(semu, i); 189 if (suptr->un_proc == NULL) 190 continue; 191 nmnus++; 192 } 193 194 /* We cannot reallocate less memory than we use */ 195 if (lsemid >= newsemmni || semtot > newsemmns || nmnus > newsemmnu) { 196 mutex_exit(&semlock); 197 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED); 198 return EBUSY; 199 } 200 201 new_sema = (void *)v; 202 new_sem = (void *)((uintptr_t)new_sema + 203 ALIGN(newsemmni * sizeof(struct semid_ds))); 204 new_semcv = (void *)((uintptr_t)new_sem + 205 ALIGN(newsemmns * sizeof(struct __sem))); 206 new_semu = (void *)((uintptr_t)new_semcv + 207 ALIGN(newsemmni * sizeof(kcondvar_t))); 208 209 /* Initialize all semaphore identifiers and condvars */ 210 for (i = 0; i < newsemmni; i++) { 211 new_sema[i]._sem_base = 0; 212 new_sema[i].sem_perm.mode = 0; 213 cv_init(&new_semcv[i], "semwait"); 214 } 215 for (i = 0; i < newsemmnu; i++) { 216 nsuptr = SEMU(new_semu, i); 217 nsuptr->un_proc = NULL; 218 } 219 220 /* 221 * Copy all identifiers, semaphores and list of the 222 * undo structures to the new memory allocation. 223 */ 224 j = 0; 225 for (i = 0; i <= lsemid; i++) { 226 if ((sema[i].sem_perm.mode & SEM_ALLOC) == 0) 227 continue; 228 memcpy(&new_sema[i], &sema[i], sizeof(struct semid_ds)); 229 new_sema[i]._sem_base = &new_sem[j]; 230 memcpy(new_sema[i]._sem_base, sema[i]._sem_base, 231 (sizeof(struct __sem) * sema[i].sem_nsems)); 232 j += sema[i].sem_nsems; 233 } 234 KASSERT(j == semtot); 235 236 j = 0; 237 new_semu_list = NULL; 238 for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) { 239 KASSERT(j < newsemmnu); 240 nsuptr = SEMU(new_semu, j); 241 memcpy(nsuptr, suptr, SEMUSZ); 242 nsuptr->un_next = new_semu_list; 243 new_semu_list = nsuptr; 244 j++; 245 } 246 247 for (i = 0; i < seminfo.semmni; i++) { 248 KASSERT(cv_has_waiters(&semcv[i]) == false); 249 cv_destroy(&semcv[i]); 250 } 251 252 sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) + 253 ALIGN(seminfo.semmns * sizeof(struct __sem)) + 254 ALIGN(seminfo.semmni * sizeof(kcondvar_t)) + 255 ALIGN(seminfo.semmnu * seminfo.semusz); 256 sz = round_page(sz); 257 258 /* Set the pointers and update the new values */ 259 sema = new_sema; 260 sem = new_sem; 261 semcv = new_semcv; 262 semu = new_semu; 263 semu_list = new_semu_list; 264 265 seminfo.semmni = newsemmni; 266 seminfo.semmns = newsemmns; 267 seminfo.semmnu = newsemmnu; 268 269 /* Reallocation completed - notify all waiters, if any */ 270 sem_realloc_state = false; 271 cv_broadcast(&sem_realloc_cv); 272 mutex_exit(&semlock); 273 274 uvm_km_free(kernel_map, (vaddr_t)old_sema, sz, UVM_KMF_WIRED); 275 return 0; 276 } 277 278 /* 279 * Placebo. 280 */ 281 282 int 283 sys_semconfig(struct lwp *l, const struct sys_semconfig_args *uap, register_t *retval) 284 { 285 286 *retval = 0; 287 return 0; 288 } 289 290 /* 291 * Allocate a new sem_undo structure for a process. 292 * => Returns NULL on failure. 293 */ 294 struct sem_undo * 295 semu_alloc(struct proc *p) 296 { 297 struct sem_undo *suptr, **supptr; 298 bool attempted = false; 299 int i; 300 301 KASSERT(mutex_owned(&semlock)); 302 again: 303 /* Look for a free structure. */ 304 for (i = 0; i < seminfo.semmnu; i++) { 305 suptr = SEMU(semu, i); 306 if (suptr->un_proc == NULL) { 307 /* Found. Fill it in and return. */ 308 suptr->un_next = semu_list; 309 semu_list = suptr; 310 suptr->un_cnt = 0; 311 suptr->un_proc = p; 312 return suptr; 313 } 314 } 315 316 /* Not found. Attempt to free some structures. */ 317 if (!attempted) { 318 bool freed = false; 319 320 attempted = true; 321 supptr = &semu_list; 322 while ((suptr = *supptr) != NULL) { 323 if (suptr->un_cnt == 0) { 324 suptr->un_proc = NULL; 325 *supptr = suptr->un_next; 326 freed = true; 327 } else { 328 supptr = &suptr->un_next; 329 } 330 } 331 if (freed) { 332 goto again; 333 } 334 } 335 return NULL; 336 } 337 338 /* 339 * Adjust a particular entry for a particular proc 340 */ 341 342 int 343 semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid, int semnum, 344 int adjval) 345 { 346 struct sem_undo *suptr; 347 struct undo *sunptr; 348 int i; 349 350 KASSERT(mutex_owned(&semlock)); 351 352 /* 353 * Look for and remember the sem_undo if the caller doesn't 354 * provide it 355 */ 356 357 suptr = *supptr; 358 if (suptr == NULL) { 359 for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) 360 if (suptr->un_proc == p) 361 break; 362 363 if (suptr == NULL) { 364 suptr = semu_alloc(p); 365 if (suptr == NULL) 366 return (ENOSPC); 367 } 368 *supptr = suptr; 369 } 370 371 /* 372 * Look for the requested entry and adjust it (delete if 373 * adjval becomes 0). 374 */ 375 sunptr = &suptr->un_ent[0]; 376 for (i = 0; i < suptr->un_cnt; i++, sunptr++) { 377 if (sunptr->un_id != semid || sunptr->un_num != semnum) 378 continue; 379 sunptr->un_adjval += adjval; 380 if (sunptr->un_adjval == 0) { 381 suptr->un_cnt--; 382 if (i < suptr->un_cnt) 383 suptr->un_ent[i] = 384 suptr->un_ent[suptr->un_cnt]; 385 } 386 return (0); 387 } 388 389 /* Didn't find the right entry - create it */ 390 if (suptr->un_cnt == SEMUME) 391 return (EINVAL); 392 393 sunptr = &suptr->un_ent[suptr->un_cnt]; 394 suptr->un_cnt++; 395 sunptr->un_adjval = adjval; 396 sunptr->un_id = semid; 397 sunptr->un_num = semnum; 398 return (0); 399 } 400 401 void 402 semundo_clear(int semid, int semnum) 403 { 404 struct sem_undo *suptr; 405 struct undo *sunptr, *sunend; 406 407 KASSERT(mutex_owned(&semlock)); 408 409 for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) 410 for (sunptr = &suptr->un_ent[0], 411 sunend = sunptr + suptr->un_cnt; sunptr < sunend;) { 412 if (sunptr->un_id == semid) { 413 if (semnum == -1 || sunptr->un_num == semnum) { 414 suptr->un_cnt--; 415 sunend--; 416 if (sunptr != sunend) 417 *sunptr = *sunend; 418 if (semnum != -1) 419 break; 420 else 421 continue; 422 } 423 } 424 sunptr++; 425 } 426 } 427 428 int 429 sys_____semctl50(struct lwp *l, const struct sys_____semctl50_args *uap, 430 register_t *retval) 431 { 432 /* { 433 syscallarg(int) semid; 434 syscallarg(int) semnum; 435 syscallarg(int) cmd; 436 syscallarg(union __semun *) arg; 437 } */ 438 struct semid_ds sembuf; 439 int cmd, error; 440 void *pass_arg; 441 union __semun karg; 442 443 cmd = SCARG(uap, cmd); 444 445 pass_arg = get_semctl_arg(cmd, &sembuf, &karg); 446 447 if (pass_arg) { 448 error = copyin(SCARG(uap, arg), &karg, sizeof(karg)); 449 if (error) 450 return error; 451 if (cmd == IPC_SET) { 452 error = copyin(karg.buf, &sembuf, sizeof(sembuf)); 453 if (error) 454 return (error); 455 } 456 } 457 458 error = semctl1(l, SCARG(uap, semid), SCARG(uap, semnum), cmd, 459 pass_arg, retval); 460 461 if (error == 0 && cmd == IPC_STAT) 462 error = copyout(&sembuf, karg.buf, sizeof(sembuf)); 463 464 return (error); 465 } 466 467 int 468 semctl1(struct lwp *l, int semid, int semnum, int cmd, void *v, 469 register_t *retval) 470 { 471 kauth_cred_t cred = l->l_cred; 472 union __semun *arg = v; 473 struct semid_ds *sembuf = v, *semaptr; 474 int i, error, ix; 475 476 SEM_PRINTF(("call to semctl(%d, %d, %d, %p)\n", 477 semid, semnum, cmd, v)); 478 479 mutex_enter(&semlock); 480 481 ix = IPCID_TO_IX(semid); 482 if (ix < 0 || ix >= seminfo.semmni) { 483 mutex_exit(&semlock); 484 return (EINVAL); 485 } 486 487 semaptr = &sema[ix]; 488 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 || 489 semaptr->sem_perm._seq != IPCID_TO_SEQ(semid)) { 490 mutex_exit(&semlock); 491 return (EINVAL); 492 } 493 494 switch (cmd) { 495 case IPC_RMID: 496 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)) != 0) 497 break; 498 semaptr->sem_perm.cuid = kauth_cred_geteuid(cred); 499 semaptr->sem_perm.uid = kauth_cred_geteuid(cred); 500 semtot -= semaptr->sem_nsems; 501 for (i = semaptr->_sem_base - sem; i < semtot; i++) 502 sem[i] = sem[i + semaptr->sem_nsems]; 503 for (i = 0; i < seminfo.semmni; i++) { 504 if ((sema[i].sem_perm.mode & SEM_ALLOC) && 505 sema[i]._sem_base > semaptr->_sem_base) 506 sema[i]._sem_base -= semaptr->sem_nsems; 507 } 508 semaptr->sem_perm.mode = 0; 509 semundo_clear(ix, -1); 510 cv_broadcast(&semcv[ix]); 511 break; 512 513 case IPC_SET: 514 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M))) 515 break; 516 KASSERT(sembuf != NULL); 517 semaptr->sem_perm.uid = sembuf->sem_perm.uid; 518 semaptr->sem_perm.gid = sembuf->sem_perm.gid; 519 semaptr->sem_perm.mode = (semaptr->sem_perm.mode & ~0777) | 520 (sembuf->sem_perm.mode & 0777); 521 semaptr->sem_ctime = time_second; 522 break; 523 524 case IPC_STAT: 525 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R))) 526 break; 527 KASSERT(sembuf != NULL); 528 memcpy(sembuf, semaptr, sizeof(struct semid_ds)); 529 sembuf->sem_perm.mode &= 0777; 530 break; 531 532 case GETNCNT: 533 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R))) 534 break; 535 if (semnum < 0 || semnum >= semaptr->sem_nsems) { 536 error = EINVAL; 537 break; 538 } 539 *retval = semaptr->_sem_base[semnum].semncnt; 540 break; 541 542 case GETPID: 543 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R))) 544 break; 545 if (semnum < 0 || semnum >= semaptr->sem_nsems) { 546 error = EINVAL; 547 break; 548 } 549 *retval = semaptr->_sem_base[semnum].sempid; 550 break; 551 552 case GETVAL: 553 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R))) 554 break; 555 if (semnum < 0 || semnum >= semaptr->sem_nsems) { 556 error = EINVAL; 557 break; 558 } 559 *retval = semaptr->_sem_base[semnum].semval; 560 break; 561 562 case GETALL: 563 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R))) 564 break; 565 KASSERT(arg != NULL); 566 for (i = 0; i < semaptr->sem_nsems; i++) { 567 error = copyout(&semaptr->_sem_base[i].semval, 568 &arg->array[i], sizeof(arg->array[i])); 569 if (error != 0) 570 break; 571 } 572 break; 573 574 case GETZCNT: 575 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R))) 576 break; 577 if (semnum < 0 || semnum >= semaptr->sem_nsems) { 578 error = EINVAL; 579 break; 580 } 581 *retval = semaptr->_sem_base[semnum].semzcnt; 582 break; 583 584 case SETVAL: 585 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W))) 586 break; 587 if (semnum < 0 || semnum >= semaptr->sem_nsems) { 588 error = EINVAL; 589 break; 590 } 591 KASSERT(arg != NULL); 592 if ((unsigned int)arg->val > seminfo.semvmx) { 593 error = ERANGE; 594 break; 595 } 596 semaptr->_sem_base[semnum].semval = arg->val; 597 semundo_clear(ix, semnum); 598 cv_broadcast(&semcv[ix]); 599 break; 600 601 case SETALL: 602 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W))) 603 break; 604 KASSERT(arg != NULL); 605 for (i = 0; i < semaptr->sem_nsems; i++) { 606 unsigned short semval; 607 error = copyin(&arg->array[i], &semval, 608 sizeof(arg->array[i])); 609 if (error != 0) 610 break; 611 if ((unsigned int)semval > seminfo.semvmx) { 612 error = ERANGE; 613 break; 614 } 615 semaptr->_sem_base[i].semval = semval; 616 } 617 semundo_clear(ix, -1); 618 cv_broadcast(&semcv[ix]); 619 break; 620 621 default: 622 error = EINVAL; 623 break; 624 } 625 626 mutex_exit(&semlock); 627 return (error); 628 } 629 630 int 631 sys_semget(struct lwp *l, const struct sys_semget_args *uap, register_t *retval) 632 { 633 /* { 634 syscallarg(key_t) key; 635 syscallarg(int) nsems; 636 syscallarg(int) semflg; 637 } */ 638 int semid, error = 0; 639 int key = SCARG(uap, key); 640 int nsems = SCARG(uap, nsems); 641 int semflg = SCARG(uap, semflg); 642 kauth_cred_t cred = l->l_cred; 643 644 SEM_PRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg)); 645 646 mutex_enter(&semlock); 647 648 if (key != IPC_PRIVATE) { 649 for (semid = 0; semid < seminfo.semmni; semid++) { 650 if ((sema[semid].sem_perm.mode & SEM_ALLOC) && 651 sema[semid].sem_perm._key == key) 652 break; 653 } 654 if (semid < seminfo.semmni) { 655 SEM_PRINTF(("found public key\n")); 656 if ((error = ipcperm(cred, &sema[semid].sem_perm, 657 semflg & 0700))) 658 goto out; 659 if (nsems > 0 && sema[semid].sem_nsems < nsems) { 660 SEM_PRINTF(("too small\n")); 661 error = EINVAL; 662 goto out; 663 } 664 if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) { 665 SEM_PRINTF(("not exclusive\n")); 666 error = EEXIST; 667 goto out; 668 } 669 goto found; 670 } 671 } 672 673 SEM_PRINTF(("need to allocate the semid_ds\n")); 674 if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) { 675 if (nsems <= 0 || nsems > seminfo.semmsl) { 676 SEM_PRINTF(("nsems out of range (0<%d<=%d)\n", nsems, 677 seminfo.semmsl)); 678 error = EINVAL; 679 goto out; 680 } 681 if (nsems > seminfo.semmns - semtot) { 682 SEM_PRINTF(("not enough semaphores left " 683 "(need %d, got %d)\n", 684 nsems, seminfo.semmns - semtot)); 685 error = ENOSPC; 686 goto out; 687 } 688 for (semid = 0; semid < seminfo.semmni; semid++) { 689 if ((sema[semid].sem_perm.mode & SEM_ALLOC) == 0) 690 break; 691 } 692 if (semid == seminfo.semmni) { 693 SEM_PRINTF(("no more semid_ds's available\n")); 694 error = ENOSPC; 695 goto out; 696 } 697 SEM_PRINTF(("semid %d is available\n", semid)); 698 sema[semid].sem_perm._key = key; 699 sema[semid].sem_perm.cuid = kauth_cred_geteuid(cred); 700 sema[semid].sem_perm.uid = kauth_cred_geteuid(cred); 701 sema[semid].sem_perm.cgid = kauth_cred_getegid(cred); 702 sema[semid].sem_perm.gid = kauth_cred_getegid(cred); 703 sema[semid].sem_perm.mode = (semflg & 0777) | SEM_ALLOC; 704 sema[semid].sem_perm._seq = 705 (sema[semid].sem_perm._seq + 1) & 0x7fff; 706 sema[semid].sem_nsems = nsems; 707 sema[semid].sem_otime = 0; 708 sema[semid].sem_ctime = time_second; 709 sema[semid]._sem_base = &sem[semtot]; 710 semtot += nsems; 711 memset(sema[semid]._sem_base, 0, 712 sizeof(sema[semid]._sem_base[0]) * nsems); 713 SEM_PRINTF(("sembase = %p, next = %p\n", sema[semid]._sem_base, 714 &sem[semtot])); 715 } else { 716 SEM_PRINTF(("didn't find it and wasn't asked to create it\n")); 717 error = ENOENT; 718 goto out; 719 } 720 721 found: 722 *retval = IXSEQ_TO_IPCID(semid, sema[semid].sem_perm); 723 out: 724 mutex_exit(&semlock); 725 return (error); 726 } 727 728 #define SMALL_SOPS 8 729 730 int 731 sys_semop(struct lwp *l, const struct sys_semop_args *uap, register_t *retval) 732 { 733 /* { 734 syscallarg(int) semid; 735 syscallarg(struct sembuf *) sops; 736 syscallarg(size_t) nsops; 737 } */ 738 struct proc *p = l->l_proc; 739 int semid = SCARG(uap, semid), seq; 740 size_t nsops = SCARG(uap, nsops); 741 struct sembuf small_sops[SMALL_SOPS]; 742 struct sembuf *sops; 743 struct semid_ds *semaptr; 744 struct sembuf *sopptr = NULL; 745 struct __sem *semptr = NULL; 746 struct sem_undo *suptr = NULL; 747 kauth_cred_t cred = l->l_cred; 748 int i, error; 749 int do_wakeup, do_undos; 750 751 SEM_PRINTF(("call to semop(%d, %p, %zd)\n", semid, SCARG(uap,sops), nsops)); 752 753 if (__predict_false((p->p_flag & PK_SYSVSEM) == 0)) { 754 mutex_enter(p->p_lock); 755 p->p_flag |= PK_SYSVSEM; 756 mutex_exit(p->p_lock); 757 } 758 759 restart: 760 if (nsops <= SMALL_SOPS) { 761 sops = small_sops; 762 } else if (nsops <= seminfo.semopm) { 763 sops = kmem_alloc(nsops * sizeof(*sops), KM_SLEEP); 764 } else { 765 SEM_PRINTF(("too many sops (max=%d, nsops=%zd)\n", 766 seminfo.semopm, nsops)); 767 return (E2BIG); 768 } 769 770 error = copyin(SCARG(uap, sops), sops, nsops * sizeof(sops[0])); 771 if (error) { 772 SEM_PRINTF(("error = %d from copyin(%p, %p, %zd)\n", error, 773 SCARG(uap, sops), &sops, nsops * sizeof(sops[0]))); 774 if (sops != small_sops) 775 kmem_free(sops, nsops * sizeof(*sops)); 776 return error; 777 } 778 779 mutex_enter(&semlock); 780 /* In case of reallocation, we will wait for completion */ 781 while (__predict_false(sem_realloc_state)) 782 cv_wait(&sem_realloc_cv, &semlock); 783 784 semid = IPCID_TO_IX(semid); /* Convert back to zero origin */ 785 if (semid < 0 || semid >= seminfo.semmni) { 786 error = EINVAL; 787 goto out; 788 } 789 790 semaptr = &sema[semid]; 791 seq = IPCID_TO_SEQ(SCARG(uap, semid)); 792 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 || 793 semaptr->sem_perm._seq != seq) { 794 error = EINVAL; 795 goto out; 796 } 797 798 if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W))) { 799 SEM_PRINTF(("error = %d from ipaccess\n", error)); 800 goto out; 801 } 802 803 for (i = 0; i < nsops; i++) 804 if (sops[i].sem_num >= semaptr->sem_nsems) { 805 error = EFBIG; 806 goto out; 807 } 808 809 /* 810 * Loop trying to satisfy the vector of requests. 811 * If we reach a point where we must wait, any requests already 812 * performed are rolled back and we go to sleep until some other 813 * process wakes us up. At this point, we start all over again. 814 * 815 * This ensures that from the perspective of other tasks, a set 816 * of requests is atomic (never partially satisfied). 817 */ 818 do_undos = 0; 819 820 for (;;) { 821 do_wakeup = 0; 822 823 for (i = 0; i < nsops; i++) { 824 sopptr = &sops[i]; 825 semptr = &semaptr->_sem_base[sopptr->sem_num]; 826 827 SEM_PRINTF(("semop: semaptr=%p, sem_base=%p, " 828 "semptr=%p, sem[%d]=%d : op=%d, flag=%s\n", 829 semaptr, semaptr->_sem_base, semptr, 830 sopptr->sem_num, semptr->semval, sopptr->sem_op, 831 (sopptr->sem_flg & IPC_NOWAIT) ? 832 "nowait" : "wait")); 833 834 if (sopptr->sem_op < 0) { 835 if ((int)(semptr->semval + 836 sopptr->sem_op) < 0) { 837 SEM_PRINTF(("semop: " 838 "can't do it now\n")); 839 break; 840 } else { 841 semptr->semval += sopptr->sem_op; 842 if (semptr->semval == 0 && 843 semptr->semzcnt > 0) 844 do_wakeup = 1; 845 } 846 if (sopptr->sem_flg & SEM_UNDO) 847 do_undos = 1; 848 } else if (sopptr->sem_op == 0) { 849 if (semptr->semval > 0) { 850 SEM_PRINTF(("semop: not zero now\n")); 851 break; 852 } 853 } else { 854 if (semptr->semncnt > 0) 855 do_wakeup = 1; 856 semptr->semval += sopptr->sem_op; 857 if (sopptr->sem_flg & SEM_UNDO) 858 do_undos = 1; 859 } 860 } 861 862 /* 863 * Did we get through the entire vector? 864 */ 865 if (i >= nsops) 866 goto done; 867 868 /* 869 * No ... rollback anything that we've already done 870 */ 871 SEM_PRINTF(("semop: rollback 0 through %d\n", i - 1)); 872 while (i-- > 0) 873 semaptr->_sem_base[sops[i].sem_num].semval -= 874 sops[i].sem_op; 875 876 /* 877 * If the request that we couldn't satisfy has the 878 * NOWAIT flag set then return with EAGAIN. 879 */ 880 if (sopptr->sem_flg & IPC_NOWAIT) { 881 error = EAGAIN; 882 goto out; 883 } 884 885 if (sopptr->sem_op == 0) 886 semptr->semzcnt++; 887 else 888 semptr->semncnt++; 889 890 sem_waiters++; 891 SEM_PRINTF(("semop: good night!\n")); 892 error = cv_wait_sig(&semcv[semid], &semlock); 893 SEM_PRINTF(("semop: good morning (error=%d)!\n", error)); 894 sem_waiters--; 895 896 /* Notify reallocator, if it is waiting */ 897 cv_broadcast(&sem_realloc_cv); 898 899 /* 900 * Make sure that the semaphore still exists 901 */ 902 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 || 903 semaptr->sem_perm._seq != seq) { 904 error = EIDRM; 905 goto out; 906 } 907 908 /* 909 * The semaphore is still alive. Readjust the count of 910 * waiting processes. 911 */ 912 semptr = &semaptr->_sem_base[sopptr->sem_num]; 913 if (sopptr->sem_op == 0) 914 semptr->semzcnt--; 915 else 916 semptr->semncnt--; 917 918 /* In case of such state, restart the call */ 919 if (sem_realloc_state) { 920 mutex_exit(&semlock); 921 goto restart; 922 } 923 924 /* Is it really morning, or was our sleep interrupted? */ 925 if (error != 0) { 926 error = EINTR; 927 goto out; 928 } 929 SEM_PRINTF(("semop: good morning!\n")); 930 } 931 932 done: 933 /* 934 * Process any SEM_UNDO requests. 935 */ 936 if (do_undos) { 937 for (i = 0; i < nsops; i++) { 938 /* 939 * We only need to deal with SEM_UNDO's for non-zero 940 * op's. 941 */ 942 int adjval; 943 944 if ((sops[i].sem_flg & SEM_UNDO) == 0) 945 continue; 946 adjval = sops[i].sem_op; 947 if (adjval == 0) 948 continue; 949 error = semundo_adjust(p, &suptr, semid, 950 sops[i].sem_num, -adjval); 951 if (error == 0) 952 continue; 953 954 /* 955 * Oh-Oh! We ran out of either sem_undo's or undo's. 956 * Rollback the adjustments to this point and then 957 * rollback the semaphore ups and down so we can return 958 * with an error with all structures restored. We 959 * rollback the undo's in the exact reverse order that 960 * we applied them. This guarantees that we won't run 961 * out of space as we roll things back out. 962 */ 963 while (i-- > 0) { 964 if ((sops[i].sem_flg & SEM_UNDO) == 0) 965 continue; 966 adjval = sops[i].sem_op; 967 if (adjval == 0) 968 continue; 969 if (semundo_adjust(p, &suptr, semid, 970 sops[i].sem_num, adjval) != 0) 971 panic("semop - can't undo undos"); 972 } 973 974 for (i = 0; i < nsops; i++) 975 semaptr->_sem_base[sops[i].sem_num].semval -= 976 sops[i].sem_op; 977 978 SEM_PRINTF(("error = %d from semundo_adjust\n", error)); 979 goto out; 980 } /* loop through the sops */ 981 } /* if (do_undos) */ 982 983 /* We're definitely done - set the sempid's */ 984 for (i = 0; i < nsops; i++) { 985 sopptr = &sops[i]; 986 semptr = &semaptr->_sem_base[sopptr->sem_num]; 987 semptr->sempid = p->p_pid; 988 } 989 990 /* Update sem_otime */ 991 semaptr->sem_otime = time_second; 992 993 /* Do a wakeup if any semaphore was up'd. */ 994 if (do_wakeup) { 995 SEM_PRINTF(("semop: doing wakeup\n")); 996 cv_broadcast(&semcv[semid]); 997 SEM_PRINTF(("semop: back from wakeup\n")); 998 } 999 SEM_PRINTF(("semop: done\n")); 1000 *retval = 0; 1001 1002 out: 1003 mutex_exit(&semlock); 1004 if (sops != small_sops) 1005 kmem_free(sops, nsops * sizeof(*sops)); 1006 return error; 1007 } 1008 1009 /* 1010 * Go through the undo structures for this process and apply the 1011 * adjustments to semaphores. 1012 */ 1013 /*ARGSUSED*/ 1014 void 1015 semexit(struct proc *p, void *v) 1016 { 1017 struct sem_undo *suptr; 1018 struct sem_undo **supptr; 1019 1020 if ((p->p_flag & PK_SYSVSEM) == 0) 1021 return; 1022 1023 mutex_enter(&semlock); 1024 1025 /* 1026 * Go through the chain of undo vectors looking for one 1027 * associated with this process. 1028 */ 1029 1030 for (supptr = &semu_list; (suptr = *supptr) != NULL; 1031 supptr = &suptr->un_next) { 1032 if (suptr->un_proc == p) 1033 break; 1034 } 1035 1036 /* 1037 * If there is no undo vector, skip to the end. 1038 */ 1039 1040 if (suptr == NULL) { 1041 mutex_exit(&semlock); 1042 return; 1043 } 1044 1045 /* 1046 * We now have an undo vector for this process. 1047 */ 1048 1049 SEM_PRINTF(("proc @%p has undo structure with %d entries\n", p, 1050 suptr->un_cnt)); 1051 1052 /* 1053 * If there are any active undo elements then process them. 1054 */ 1055 if (suptr->un_cnt > 0) { 1056 int ix; 1057 1058 for (ix = 0; ix < suptr->un_cnt; ix++) { 1059 int semid = suptr->un_ent[ix].un_id; 1060 int semnum = suptr->un_ent[ix].un_num; 1061 int adjval = suptr->un_ent[ix].un_adjval; 1062 struct semid_ds *semaptr; 1063 1064 semaptr = &sema[semid]; 1065 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0) 1066 panic("semexit - semid not allocated"); 1067 if (semnum >= semaptr->sem_nsems) 1068 panic("semexit - semnum out of range"); 1069 1070 SEM_PRINTF(("semexit: %p id=%d num=%d(adj=%d) ; " 1071 "sem=%d\n", 1072 suptr->un_proc, suptr->un_ent[ix].un_id, 1073 suptr->un_ent[ix].un_num, 1074 suptr->un_ent[ix].un_adjval, 1075 semaptr->_sem_base[semnum].semval)); 1076 1077 if (adjval < 0 && 1078 semaptr->_sem_base[semnum].semval < -adjval) 1079 semaptr->_sem_base[semnum].semval = 0; 1080 else 1081 semaptr->_sem_base[semnum].semval += adjval; 1082 1083 cv_broadcast(&semcv[semid]); 1084 SEM_PRINTF(("semexit: back from wakeup\n")); 1085 } 1086 } 1087 1088 /* 1089 * Deallocate the undo vector. 1090 */ 1091 SEM_PRINTF(("removing vector\n")); 1092 suptr->un_proc = NULL; 1093 *supptr = suptr->un_next; 1094 mutex_exit(&semlock); 1095 } 1096 1097 /* 1098 * Sysctl initialization and nodes. 1099 */ 1100 1101 static int 1102 sysctl_ipc_semmni(SYSCTLFN_ARGS) 1103 { 1104 int newsize, error; 1105 struct sysctlnode node; 1106 node = *rnode; 1107 node.sysctl_data = &newsize; 1108 1109 newsize = seminfo.semmni; 1110 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1111 if (error || newp == NULL) 1112 return error; 1113 1114 return semrealloc(newsize, seminfo.semmns, seminfo.semmnu); 1115 } 1116 1117 static int 1118 sysctl_ipc_semmns(SYSCTLFN_ARGS) 1119 { 1120 int newsize, error; 1121 struct sysctlnode node; 1122 node = *rnode; 1123 node.sysctl_data = &newsize; 1124 1125 newsize = seminfo.semmns; 1126 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1127 if (error || newp == NULL) 1128 return error; 1129 1130 return semrealloc(seminfo.semmni, newsize, seminfo.semmnu); 1131 } 1132 1133 static int 1134 sysctl_ipc_semmnu(SYSCTLFN_ARGS) 1135 { 1136 int newsize, error; 1137 struct sysctlnode node; 1138 node = *rnode; 1139 node.sysctl_data = &newsize; 1140 1141 newsize = seminfo.semmnu; 1142 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1143 if (error || newp == NULL) 1144 return error; 1145 1146 return semrealloc(seminfo.semmni, seminfo.semmns, newsize); 1147 } 1148 1149 SYSCTL_SETUP(sysctl_ipc_sem_setup, "sysctl kern.ipc subtree setup") 1150 { 1151 const struct sysctlnode *node = NULL; 1152 1153 sysctl_createv(clog, 0, NULL, NULL, 1154 CTLFLAG_PERMANENT, 1155 CTLTYPE_NODE, "kern", NULL, 1156 NULL, 0, NULL, 0, 1157 CTL_KERN, CTL_EOL); 1158 sysctl_createv(clog, 0, NULL, &node, 1159 CTLFLAG_PERMANENT, 1160 CTLTYPE_NODE, "ipc", 1161 SYSCTL_DESCR("SysV IPC options"), 1162 NULL, 0, NULL, 0, 1163 CTL_KERN, KERN_SYSVIPC, CTL_EOL); 1164 1165 if (node == NULL) 1166 return; 1167 1168 sysctl_createv(clog, 0, &node, NULL, 1169 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1170 CTLTYPE_INT, "semmni", 1171 SYSCTL_DESCR("Max number of number of semaphore identifiers"), 1172 sysctl_ipc_semmni, 0, &seminfo.semmni, 0, 1173 CTL_CREATE, CTL_EOL); 1174 sysctl_createv(clog, 0, &node, NULL, 1175 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1176 CTLTYPE_INT, "semmns", 1177 SYSCTL_DESCR("Max number of number of semaphores in system"), 1178 sysctl_ipc_semmns, 0, &seminfo.semmns, 0, 1179 CTL_CREATE, CTL_EOL); 1180 sysctl_createv(clog, 0, &node, NULL, 1181 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1182 CTLTYPE_INT, "semmnu", 1183 SYSCTL_DESCR("Max number of undo structures in system"), 1184 sysctl_ipc_semmnu, 0, &seminfo.semmnu, 0, 1185 CTL_CREATE, CTL_EOL); 1186 } 1187