1 /* $NetBSD: sysv_shm.c,v 1.131 2015/11/26 13:15:34 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Mindaugas Rasiukevicius. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1994 Adam Glass and Charles M. Hannum. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by Adam Glass and Charles M. 47 * Hannum. 48 * 4. The names of the authors may not be used to endorse or promote products 49 * derived from this software without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 52 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 53 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 54 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, 55 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 56 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 57 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 58 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 59 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 60 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 61 */ 62 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.131 2015/11/26 13:15:34 martin Exp $"); 65 66 #ifdef _KERNEL_OPT 67 #include "opt_sysv.h" 68 #endif 69 70 #include <sys/param.h> 71 #include <sys/kernel.h> 72 #include <sys/kmem.h> 73 #include <sys/shm.h> 74 #include <sys/mutex.h> 75 #include <sys/mman.h> 76 #include <sys/stat.h> 77 #include <sys/sysctl.h> 78 #include <sys/mount.h> /* XXX for <sys/syscallargs.h> */ 79 #include <sys/syscallargs.h> 80 #include <sys/queue.h> 81 #include <sys/kauth.h> 82 83 #include <uvm/uvm_extern.h> 84 #include <uvm/uvm_object.h> 85 86 struct shmmap_entry { 87 SLIST_ENTRY(shmmap_entry) next; 88 vaddr_t va; 89 int shmid; 90 }; 91 92 int shm_nused __cacheline_aligned; 93 struct shmid_ds * shmsegs __read_mostly; 94 95 static kmutex_t shm_lock __cacheline_aligned; 96 static kcondvar_t * shm_cv __cacheline_aligned; 97 static int shm_last_free __cacheline_aligned; 98 static size_t shm_committed __cacheline_aligned; 99 static int shm_use_phys __read_mostly; 100 101 static kcondvar_t shm_realloc_cv; 102 static bool shm_realloc_state; 103 static u_int shm_realloc_disable; 104 105 struct shmmap_state { 106 unsigned int nitems; 107 unsigned int nrefs; 108 SLIST_HEAD(, shmmap_entry) entries; 109 }; 110 111 extern int kern_has_sysvshm; 112 113 SYSCTL_SETUP_PROTO(sysctl_ipc_shm_setup); 114 115 #ifdef SHMDEBUG 116 #define SHMPRINTF(a) printf a 117 #else 118 #define SHMPRINTF(a) 119 #endif 120 121 static int shmrealloc(int); 122 123 /* 124 * Find the shared memory segment by the identifier. 125 * => must be called with shm_lock held; 126 */ 127 static struct shmid_ds * 128 shm_find_segment_by_shmid(int shmid) 129 { 130 int segnum; 131 struct shmid_ds *shmseg; 132 133 KASSERT(mutex_owned(&shm_lock)); 134 135 segnum = IPCID_TO_IX(shmid); 136 if (segnum < 0 || segnum >= shminfo.shmmni) 137 return NULL; 138 shmseg = &shmsegs[segnum]; 139 if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0) 140 return NULL; 141 if ((shmseg->shm_perm.mode & 142 (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED) 143 return NULL; 144 if (shmseg->shm_perm._seq != IPCID_TO_SEQ(shmid)) 145 return NULL; 146 147 return shmseg; 148 } 149 150 /* 151 * Free memory segment. 152 * => must be called with shm_lock held; 153 */ 154 static void 155 shm_free_segment(int segnum) 156 { 157 struct shmid_ds *shmseg; 158 size_t size; 159 bool wanted; 160 161 KASSERT(mutex_owned(&shm_lock)); 162 163 shmseg = &shmsegs[segnum]; 164 SHMPRINTF(("shm freeing key 0x%lx seq 0x%x\n", 165 shmseg->shm_perm._key, shmseg->shm_perm._seq)); 166 167 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 168 wanted = (shmseg->shm_perm.mode & SHMSEG_WANTED); 169 170 shmseg->_shm_internal = NULL; 171 shm_committed -= btoc(size); 172 shm_nused--; 173 shmseg->shm_perm.mode = SHMSEG_FREE; 174 shm_last_free = segnum; 175 if (wanted == true) 176 cv_broadcast(&shm_cv[segnum]); 177 } 178 179 /* 180 * Delete entry from the shm map. 181 * => must be called with shm_lock held; 182 */ 183 static struct uvm_object * 184 shm_delete_mapping(struct shmmap_state *shmmap_s, 185 struct shmmap_entry *shmmap_se) 186 { 187 struct uvm_object *uobj = NULL; 188 struct shmid_ds *shmseg; 189 int segnum; 190 191 KASSERT(mutex_owned(&shm_lock)); 192 193 segnum = IPCID_TO_IX(shmmap_se->shmid); 194 shmseg = &shmsegs[segnum]; 195 SLIST_REMOVE(&shmmap_s->entries, shmmap_se, shmmap_entry, next); 196 shmmap_s->nitems--; 197 shmseg->shm_dtime = time_second; 198 if ((--shmseg->shm_nattch <= 0) && 199 (shmseg->shm_perm.mode & SHMSEG_REMOVED)) { 200 uobj = shmseg->_shm_internal; 201 shm_free_segment(segnum); 202 } 203 204 return uobj; 205 } 206 207 /* 208 * Get a non-shared shm map for that vmspace. Note, that memory 209 * allocation might be performed with lock held. 210 */ 211 static struct shmmap_state * 212 shmmap_getprivate(struct proc *p) 213 { 214 struct shmmap_state *oshmmap_s, *shmmap_s; 215 struct shmmap_entry *oshmmap_se, *shmmap_se; 216 217 KASSERT(mutex_owned(&shm_lock)); 218 219 /* 1. A shm map with refcnt = 1, used by ourselves, thus return */ 220 oshmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; 221 if (oshmmap_s && oshmmap_s->nrefs == 1) 222 return oshmmap_s; 223 224 /* 2. No shm map preset - create a fresh one */ 225 shmmap_s = kmem_zalloc(sizeof(struct shmmap_state), KM_SLEEP); 226 shmmap_s->nrefs = 1; 227 SLIST_INIT(&shmmap_s->entries); 228 p->p_vmspace->vm_shm = (void *)shmmap_s; 229 230 if (oshmmap_s == NULL) 231 return shmmap_s; 232 233 SHMPRINTF(("shmmap_getprivate: vm %p split (%d entries), was used by %d\n", 234 p->p_vmspace, oshmmap_s->nitems, oshmmap_s->nrefs)); 235 236 /* 3. A shared shm map, copy to a fresh one and adjust refcounts */ 237 SLIST_FOREACH(oshmmap_se, &oshmmap_s->entries, next) { 238 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP); 239 shmmap_se->va = oshmmap_se->va; 240 shmmap_se->shmid = oshmmap_se->shmid; 241 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next); 242 } 243 shmmap_s->nitems = oshmmap_s->nitems; 244 oshmmap_s->nrefs--; 245 246 return shmmap_s; 247 } 248 249 /* 250 * Lock/unlock the memory. 251 * => must be called with shm_lock held; 252 * => called from one place, thus, inline; 253 */ 254 static inline int 255 shm_memlock(struct lwp *l, struct shmid_ds *shmseg, int shmid, int cmd) 256 { 257 struct proc *p = l->l_proc; 258 struct shmmap_entry *shmmap_se; 259 struct shmmap_state *shmmap_s; 260 size_t size; 261 int error; 262 263 KASSERT(mutex_owned(&shm_lock)); 264 shmmap_s = shmmap_getprivate(p); 265 266 /* Find our shared memory address by shmid */ 267 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) { 268 if (shmmap_se->shmid != shmid) 269 continue; 270 271 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 272 273 if (cmd == SHM_LOCK && 274 (shmseg->shm_perm.mode & SHMSEG_WIRED) == 0) { 275 /* Wire the object and map, then tag it */ 276 error = uvm_obj_wirepages(shmseg->_shm_internal, 277 0, size, NULL); 278 if (error) 279 return EIO; 280 error = uvm_map_pageable(&p->p_vmspace->vm_map, 281 shmmap_se->va, shmmap_se->va + size, false, 0); 282 if (error) { 283 uvm_obj_unwirepages(shmseg->_shm_internal, 284 0, size); 285 if (error == EFAULT) 286 error = ENOMEM; 287 return error; 288 } 289 shmseg->shm_perm.mode |= SHMSEG_WIRED; 290 291 } else if (cmd == SHM_UNLOCK && 292 (shmseg->shm_perm.mode & SHMSEG_WIRED) != 0) { 293 /* Unwire the object and map, then untag it */ 294 uvm_obj_unwirepages(shmseg->_shm_internal, 0, size); 295 error = uvm_map_pageable(&p->p_vmspace->vm_map, 296 shmmap_se->va, shmmap_se->va + size, true, 0); 297 if (error) 298 return EIO; 299 shmseg->shm_perm.mode &= ~SHMSEG_WIRED; 300 } 301 } 302 303 return 0; 304 } 305 306 /* 307 * Unmap shared memory. 308 */ 309 int 310 sys_shmdt(struct lwp *l, const struct sys_shmdt_args *uap, register_t *retval) 311 { 312 /* { 313 syscallarg(const void *) shmaddr; 314 } */ 315 struct proc *p = l->l_proc; 316 struct shmmap_state *shmmap_s1, *shmmap_s; 317 struct shmmap_entry *shmmap_se; 318 struct uvm_object *uobj; 319 struct shmid_ds *shmseg; 320 size_t size; 321 322 mutex_enter(&shm_lock); 323 /* In case of reallocation, we will wait for completion */ 324 while (__predict_false(shm_realloc_state)) 325 cv_wait(&shm_realloc_cv, &shm_lock); 326 327 shmmap_s1 = (struct shmmap_state *)p->p_vmspace->vm_shm; 328 if (shmmap_s1 == NULL) { 329 mutex_exit(&shm_lock); 330 return EINVAL; 331 } 332 333 /* Find the map entry */ 334 SLIST_FOREACH(shmmap_se, &shmmap_s1->entries, next) 335 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr)) 336 break; 337 if (shmmap_se == NULL) { 338 mutex_exit(&shm_lock); 339 return EINVAL; 340 } 341 342 shmmap_s = shmmap_getprivate(p); 343 if (shmmap_s != shmmap_s1) { 344 /* Map has been copied, lookup entry in new map */ 345 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) 346 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr)) 347 break; 348 if (shmmap_se == NULL) { 349 mutex_exit(&shm_lock); 350 return EINVAL; 351 } 352 } 353 354 SHMPRINTF(("shmdt: vm %p: remove %d @%lx\n", 355 p->p_vmspace, shmmap_se->shmid, shmmap_se->va)); 356 357 /* Delete the entry from shm map */ 358 uobj = shm_delete_mapping(shmmap_s, shmmap_se); 359 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)]; 360 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 361 mutex_exit(&shm_lock); 362 363 uvm_deallocate(&p->p_vmspace->vm_map, shmmap_se->va, size); 364 if (uobj != NULL) { 365 uao_detach(uobj); 366 } 367 kmem_free(shmmap_se, sizeof(struct shmmap_entry)); 368 369 return 0; 370 } 371 372 /* 373 * Map shared memory. 374 */ 375 int 376 sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval) 377 { 378 /* { 379 syscallarg(int) shmid; 380 syscallarg(const void *) shmaddr; 381 syscallarg(int) shmflg; 382 } */ 383 int error, flags = 0; 384 struct proc *p = l->l_proc; 385 kauth_cred_t cred = l->l_cred; 386 struct shmid_ds *shmseg; 387 struct shmmap_state *shmmap_s; 388 struct shmmap_entry *shmmap_se; 389 struct uvm_object *uobj; 390 struct vmspace *vm; 391 vaddr_t attach_va; 392 vm_prot_t prot; 393 vsize_t size; 394 395 /* Allocate a new map entry and set it */ 396 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP); 397 shmmap_se->shmid = SCARG(uap, shmid); 398 399 mutex_enter(&shm_lock); 400 /* In case of reallocation, we will wait for completion */ 401 while (__predict_false(shm_realloc_state)) 402 cv_wait(&shm_realloc_cv, &shm_lock); 403 404 shmseg = shm_find_segment_by_shmid(SCARG(uap, shmid)); 405 if (shmseg == NULL) { 406 error = EINVAL; 407 goto err; 408 } 409 error = ipcperm(cred, &shmseg->shm_perm, 410 (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W); 411 if (error) 412 goto err; 413 414 vm = p->p_vmspace; 415 shmmap_s = (struct shmmap_state *)vm->vm_shm; 416 if (shmmap_s && shmmap_s->nitems >= shminfo.shmseg) { 417 error = EMFILE; 418 goto err; 419 } 420 421 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 422 prot = VM_PROT_READ; 423 if ((SCARG(uap, shmflg) & SHM_RDONLY) == 0) 424 prot |= VM_PROT_WRITE; 425 if (SCARG(uap, shmaddr)) { 426 flags |= UVM_FLAG_FIXED; 427 if (SCARG(uap, shmflg) & SHM_RND) 428 attach_va = 429 (vaddr_t)SCARG(uap, shmaddr) & ~(SHMLBA-1); 430 else if (((vaddr_t)SCARG(uap, shmaddr) & (SHMLBA-1)) == 0) 431 attach_va = (vaddr_t)SCARG(uap, shmaddr); 432 else { 433 error = EINVAL; 434 goto err; 435 } 436 } else { 437 /* This is just a hint to uvm_map() about where to put it. */ 438 attach_va = p->p_emul->e_vm_default_addr(p, 439 (vaddr_t)vm->vm_daddr, size, 440 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 441 } 442 443 /* 444 * Create a map entry, add it to the list and increase the counters. 445 * The lock will be dropped before the mapping, disable reallocation. 446 */ 447 shmmap_s = shmmap_getprivate(p); 448 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next); 449 shmmap_s->nitems++; 450 shmseg->shm_lpid = p->p_pid; 451 shmseg->shm_nattch++; 452 shm_realloc_disable++; 453 mutex_exit(&shm_lock); 454 455 /* 456 * Add a reference to the memory object, map it to the 457 * address space, and lock the memory, if needed. 458 */ 459 uobj = shmseg->_shm_internal; 460 uao_reference(uobj); 461 error = uvm_map(&vm->vm_map, &attach_va, size, uobj, 0, 0, 462 UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, flags)); 463 if (error) 464 goto err_detach; 465 if (shm_use_phys || (shmseg->shm_perm.mode & SHMSEG_WIRED)) { 466 error = uvm_map_pageable(&vm->vm_map, attach_va, 467 attach_va + size, false, 0); 468 if (error) { 469 if (error == EFAULT) 470 error = ENOMEM; 471 uvm_deallocate(&vm->vm_map, attach_va, size); 472 goto err_detach; 473 } 474 } 475 476 /* Set the new address, and update the time */ 477 mutex_enter(&shm_lock); 478 shmmap_se->va = attach_va; 479 shmseg->shm_atime = time_second; 480 shm_realloc_disable--; 481 retval[0] = attach_va; 482 SHMPRINTF(("shmat: vm %p: add %d @%lx\n", 483 p->p_vmspace, shmmap_se->shmid, attach_va)); 484 err: 485 cv_broadcast(&shm_realloc_cv); 486 mutex_exit(&shm_lock); 487 if (error && shmmap_se) { 488 kmem_free(shmmap_se, sizeof(struct shmmap_entry)); 489 } 490 return error; 491 492 err_detach: 493 uao_detach(uobj); 494 mutex_enter(&shm_lock); 495 uobj = shm_delete_mapping(shmmap_s, shmmap_se); 496 shm_realloc_disable--; 497 cv_broadcast(&shm_realloc_cv); 498 mutex_exit(&shm_lock); 499 if (uobj != NULL) { 500 uao_detach(uobj); 501 } 502 kmem_free(shmmap_se, sizeof(struct shmmap_entry)); 503 return error; 504 } 505 506 /* 507 * Shared memory control operations. 508 */ 509 int 510 sys___shmctl50(struct lwp *l, const struct sys___shmctl50_args *uap, 511 register_t *retval) 512 { 513 /* { 514 syscallarg(int) shmid; 515 syscallarg(int) cmd; 516 syscallarg(struct shmid_ds *) buf; 517 } */ 518 struct shmid_ds shmbuf; 519 int cmd, error; 520 521 cmd = SCARG(uap, cmd); 522 if (cmd == IPC_SET) { 523 error = copyin(SCARG(uap, buf), &shmbuf, sizeof(shmbuf)); 524 if (error) 525 return error; 526 } 527 528 error = shmctl1(l, SCARG(uap, shmid), cmd, 529 (cmd == IPC_SET || cmd == IPC_STAT) ? &shmbuf : NULL); 530 531 if (error == 0 && cmd == IPC_STAT) 532 error = copyout(&shmbuf, SCARG(uap, buf), sizeof(shmbuf)); 533 534 return error; 535 } 536 537 int 538 shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf) 539 { 540 struct uvm_object *uobj = NULL; 541 kauth_cred_t cred = l->l_cred; 542 struct shmid_ds *shmseg; 543 int error = 0; 544 545 mutex_enter(&shm_lock); 546 /* In case of reallocation, we will wait for completion */ 547 while (__predict_false(shm_realloc_state)) 548 cv_wait(&shm_realloc_cv, &shm_lock); 549 550 shmseg = shm_find_segment_by_shmid(shmid); 551 if (shmseg == NULL) { 552 mutex_exit(&shm_lock); 553 return EINVAL; 554 } 555 556 switch (cmd) { 557 case IPC_STAT: 558 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_R)) != 0) 559 break; 560 memcpy(shmbuf, shmseg, sizeof(struct shmid_ds)); 561 break; 562 case IPC_SET: 563 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0) 564 break; 565 shmseg->shm_perm.uid = shmbuf->shm_perm.uid; 566 shmseg->shm_perm.gid = shmbuf->shm_perm.gid; 567 shmseg->shm_perm.mode = 568 (shmseg->shm_perm.mode & ~ACCESSPERMS) | 569 (shmbuf->shm_perm.mode & ACCESSPERMS); 570 shmseg->shm_ctime = time_second; 571 break; 572 case IPC_RMID: 573 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0) 574 break; 575 shmseg->shm_perm._key = IPC_PRIVATE; 576 shmseg->shm_perm.mode |= SHMSEG_REMOVED; 577 if (shmseg->shm_nattch <= 0) { 578 uobj = shmseg->_shm_internal; 579 shm_free_segment(IPCID_TO_IX(shmid)); 580 } 581 break; 582 case SHM_LOCK: 583 case SHM_UNLOCK: 584 if ((error = kauth_authorize_system(cred, 585 KAUTH_SYSTEM_SYSVIPC, 586 (cmd == SHM_LOCK) ? KAUTH_REQ_SYSTEM_SYSVIPC_SHM_LOCK : 587 KAUTH_REQ_SYSTEM_SYSVIPC_SHM_UNLOCK, NULL, NULL, NULL)) != 0) 588 break; 589 error = shm_memlock(l, shmseg, shmid, cmd); 590 break; 591 default: 592 error = EINVAL; 593 } 594 595 mutex_exit(&shm_lock); 596 if (uobj != NULL) 597 uao_detach(uobj); 598 return error; 599 } 600 601 /* 602 * Try to take an already existing segment. 603 * => must be called with shm_lock held; 604 * => called from one place, thus, inline; 605 */ 606 static inline int 607 shmget_existing(struct lwp *l, const struct sys_shmget_args *uap, int mode, 608 register_t *retval) 609 { 610 struct shmid_ds *shmseg; 611 kauth_cred_t cred = l->l_cred; 612 int segnum, error; 613 again: 614 KASSERT(mutex_owned(&shm_lock)); 615 616 /* Find segment by key */ 617 for (segnum = 0; segnum < shminfo.shmmni; segnum++) 618 if ((shmsegs[segnum].shm_perm.mode & SHMSEG_ALLOCATED) && 619 shmsegs[segnum].shm_perm._key == SCARG(uap, key)) 620 break; 621 if (segnum == shminfo.shmmni) { 622 /* Not found */ 623 return -1; 624 } 625 626 shmseg = &shmsegs[segnum]; 627 if (shmseg->shm_perm.mode & SHMSEG_REMOVED) { 628 /* 629 * This segment is in the process of being allocated. Wait 630 * until it's done, and look the key up again (in case the 631 * allocation failed or it was freed). 632 */ 633 shmseg->shm_perm.mode |= SHMSEG_WANTED; 634 error = cv_wait_sig(&shm_cv[segnum], &shm_lock); 635 if (error) 636 return error; 637 goto again; 638 } 639 640 /* 641 * First check the flags, to generate a useful error when a 642 * segment already exists. 643 */ 644 if ((SCARG(uap, shmflg) & (IPC_CREAT | IPC_EXCL)) == 645 (IPC_CREAT | IPC_EXCL)) 646 return EEXIST; 647 648 /* Check the permission and segment size. */ 649 error = ipcperm(cred, &shmseg->shm_perm, mode); 650 if (error) 651 return error; 652 if (SCARG(uap, size) && SCARG(uap, size) > shmseg->shm_segsz) 653 return EINVAL; 654 655 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); 656 return 0; 657 } 658 659 int 660 sys_shmget(struct lwp *l, const struct sys_shmget_args *uap, register_t *retval) 661 { 662 /* { 663 syscallarg(key_t) key; 664 syscallarg(size_t) size; 665 syscallarg(int) shmflg; 666 } */ 667 struct shmid_ds *shmseg; 668 kauth_cred_t cred = l->l_cred; 669 key_t key = SCARG(uap, key); 670 size_t size; 671 int error, mode, segnum; 672 bool lockmem; 673 674 mode = SCARG(uap, shmflg) & ACCESSPERMS; 675 if (SCARG(uap, shmflg) & _SHM_RMLINGER) 676 mode |= SHMSEG_RMLINGER; 677 678 SHMPRINTF(("shmget: key 0x%lx size 0x%zx shmflg 0x%x mode 0x%x\n", 679 SCARG(uap, key), SCARG(uap, size), SCARG(uap, shmflg), mode)); 680 681 mutex_enter(&shm_lock); 682 /* In case of reallocation, we will wait for completion */ 683 while (__predict_false(shm_realloc_state)) 684 cv_wait(&shm_realloc_cv, &shm_lock); 685 686 if (key != IPC_PRIVATE) { 687 error = shmget_existing(l, uap, mode, retval); 688 if (error != -1) { 689 mutex_exit(&shm_lock); 690 return error; 691 } 692 if ((SCARG(uap, shmflg) & IPC_CREAT) == 0) { 693 mutex_exit(&shm_lock); 694 return ENOENT; 695 } 696 } 697 error = 0; 698 699 /* 700 * Check the for the limits. 701 */ 702 size = SCARG(uap, size); 703 if (size < shminfo.shmmin || size > shminfo.shmmax) { 704 mutex_exit(&shm_lock); 705 return EINVAL; 706 } 707 if (shm_nused >= shminfo.shmmni) { 708 mutex_exit(&shm_lock); 709 return ENOSPC; 710 } 711 size = (size + PGOFSET) & ~PGOFSET; 712 if (shm_committed + btoc(size) > shminfo.shmall) { 713 mutex_exit(&shm_lock); 714 return ENOMEM; 715 } 716 717 /* Find the first available segment */ 718 if (shm_last_free < 0) { 719 for (segnum = 0; segnum < shminfo.shmmni; segnum++) 720 if (shmsegs[segnum].shm_perm.mode & SHMSEG_FREE) 721 break; 722 KASSERT(segnum < shminfo.shmmni); 723 } else { 724 segnum = shm_last_free; 725 shm_last_free = -1; 726 } 727 728 /* 729 * Initialize the segment. 730 * We will drop the lock while allocating the memory, thus mark the 731 * segment present, but removed, that no other thread could take it. 732 * Also, disable reallocation, while lock is dropped. 733 */ 734 shmseg = &shmsegs[segnum]; 735 shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED; 736 shm_committed += btoc(size); 737 shm_nused++; 738 lockmem = shm_use_phys; 739 shm_realloc_disable++; 740 mutex_exit(&shm_lock); 741 742 /* Allocate the memory object and lock it if needed */ 743 shmseg->_shm_internal = uao_create(size, 0); 744 if (lockmem) { 745 /* Wire the pages and tag it */ 746 error = uvm_obj_wirepages(shmseg->_shm_internal, 0, size, NULL); 747 if (error) { 748 uao_detach(shmseg->_shm_internal); 749 mutex_enter(&shm_lock); 750 shm_free_segment(segnum); 751 shm_realloc_disable--; 752 mutex_exit(&shm_lock); 753 return error; 754 } 755 } 756 757 /* 758 * Please note, while segment is marked, there are no need to hold the 759 * lock, while setting it (except shm_perm.mode). 760 */ 761 shmseg->shm_perm._key = SCARG(uap, key); 762 shmseg->shm_perm._seq = (shmseg->shm_perm._seq + 1) & 0x7fff; 763 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); 764 765 shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_geteuid(cred); 766 shmseg->shm_perm.cgid = shmseg->shm_perm.gid = kauth_cred_getegid(cred); 767 shmseg->shm_segsz = SCARG(uap, size); 768 shmseg->shm_cpid = l->l_proc->p_pid; 769 shmseg->shm_lpid = shmseg->shm_nattch = 0; 770 shmseg->shm_atime = shmseg->shm_dtime = 0; 771 shmseg->shm_ctime = time_second; 772 773 /* 774 * Segment is initialized. 775 * Enter the lock, mark as allocated, and notify waiters (if any). 776 * Also, unmark the state of reallocation. 777 */ 778 mutex_enter(&shm_lock); 779 shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) | 780 (mode & (ACCESSPERMS | SHMSEG_RMLINGER)) | 781 SHMSEG_ALLOCATED | (lockmem ? SHMSEG_WIRED : 0); 782 if (shmseg->shm_perm.mode & SHMSEG_WANTED) { 783 shmseg->shm_perm.mode &= ~SHMSEG_WANTED; 784 cv_broadcast(&shm_cv[segnum]); 785 } 786 shm_realloc_disable--; 787 cv_broadcast(&shm_realloc_cv); 788 mutex_exit(&shm_lock); 789 790 return error; 791 } 792 793 void 794 shmfork(struct vmspace *vm1, struct vmspace *vm2) 795 { 796 struct shmmap_state *shmmap_s; 797 struct shmmap_entry *shmmap_se; 798 799 SHMPRINTF(("shmfork %p->%p\n", vm1, vm2)); 800 mutex_enter(&shm_lock); 801 vm2->vm_shm = vm1->vm_shm; 802 if (vm1->vm_shm) { 803 shmmap_s = (struct shmmap_state *)vm1->vm_shm; 804 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) 805 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch++; 806 shmmap_s->nrefs++; 807 } 808 mutex_exit(&shm_lock); 809 } 810 811 void 812 shmexit(struct vmspace *vm) 813 { 814 struct shmmap_state *shmmap_s; 815 struct shmmap_entry *shmmap_se; 816 817 mutex_enter(&shm_lock); 818 shmmap_s = (struct shmmap_state *)vm->vm_shm; 819 if (shmmap_s == NULL) { 820 mutex_exit(&shm_lock); 821 return; 822 } 823 vm->vm_shm = NULL; 824 825 if (--shmmap_s->nrefs > 0) { 826 SHMPRINTF(("shmexit: vm %p drop ref (%d entries), refs = %d\n", 827 vm, shmmap_s->nitems, shmmap_s->nrefs)); 828 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) { 829 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch--; 830 } 831 mutex_exit(&shm_lock); 832 return; 833 } 834 835 SHMPRINTF(("shmexit: vm %p cleanup (%d entries)\n", vm, shmmap_s->nitems)); 836 if (shmmap_s->nitems == 0) { 837 mutex_exit(&shm_lock); 838 kmem_free(shmmap_s, sizeof(struct shmmap_state)); 839 return; 840 } 841 842 /* 843 * Delete the entry from shm map. 844 */ 845 for (;;) { 846 struct shmid_ds *shmseg; 847 struct uvm_object *uobj; 848 size_t sz; 849 850 shmmap_se = SLIST_FIRST(&shmmap_s->entries); 851 KASSERT(shmmap_se != NULL); 852 853 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)]; 854 sz = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 855 /* shm_delete_mapping() removes from the list. */ 856 uobj = shm_delete_mapping(shmmap_s, shmmap_se); 857 mutex_exit(&shm_lock); 858 859 uvm_deallocate(&vm->vm_map, shmmap_se->va, sz); 860 if (uobj != NULL) { 861 uao_detach(uobj); 862 } 863 kmem_free(shmmap_se, sizeof(struct shmmap_entry)); 864 865 if (SLIST_EMPTY(&shmmap_s->entries)) { 866 break; 867 } 868 mutex_enter(&shm_lock); 869 KASSERT(!SLIST_EMPTY(&shmmap_s->entries)); 870 } 871 kmem_free(shmmap_s, sizeof(struct shmmap_state)); 872 } 873 874 static int 875 shmrealloc(int newshmni) 876 { 877 vaddr_t v; 878 struct shmid_ds *oldshmsegs, *newshmsegs; 879 kcondvar_t *newshm_cv, *oldshm_cv; 880 size_t sz; 881 int i, lsegid, oldshmni; 882 883 if (newshmni < 1) 884 return EINVAL; 885 886 /* Allocate new memory area */ 887 sz = ALIGN(newshmni * sizeof(struct shmid_ds)) + 888 ALIGN(newshmni * sizeof(kcondvar_t)); 889 sz = round_page(sz); 890 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO); 891 if (v == 0) 892 return ENOMEM; 893 894 mutex_enter(&shm_lock); 895 while (shm_realloc_state || shm_realloc_disable) 896 cv_wait(&shm_realloc_cv, &shm_lock); 897 898 /* 899 * Get the number of last segment. Fail we are trying to 900 * reallocate less memory than we use. 901 */ 902 lsegid = 0; 903 for (i = 0; i < shminfo.shmmni; i++) 904 if ((shmsegs[i].shm_perm.mode & SHMSEG_FREE) == 0) 905 lsegid = i; 906 if (lsegid >= newshmni) { 907 mutex_exit(&shm_lock); 908 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED); 909 return EBUSY; 910 } 911 shm_realloc_state = true; 912 913 newshmsegs = (void *)v; 914 newshm_cv = (void *)((uintptr_t)newshmsegs + 915 ALIGN(newshmni * sizeof(struct shmid_ds))); 916 917 /* Copy all memory to the new area */ 918 for (i = 0; i < shm_nused; i++) { 919 cv_init(&newshm_cv[i], "shmwait"); 920 (void)memcpy(&newshmsegs[i], &shmsegs[i], 921 sizeof(newshmsegs[0])); 922 } 923 924 /* Mark as free all new segments, if there is any */ 925 for (; i < newshmni; i++) { 926 cv_init(&newshm_cv[i], "shmwait"); 927 newshmsegs[i].shm_perm.mode = SHMSEG_FREE; 928 newshmsegs[i].shm_perm._seq = 0; 929 } 930 931 oldshmsegs = shmsegs; 932 oldshmni = shminfo.shmmni; 933 shminfo.shmmni = newshmni; 934 shmsegs = newshmsegs; 935 shm_cv = newshm_cv; 936 937 /* Reallocation completed - notify all waiters, if any */ 938 shm_realloc_state = false; 939 cv_broadcast(&shm_realloc_cv); 940 mutex_exit(&shm_lock); 941 942 /* Release now unused resources. */ 943 oldshm_cv = (void *)((uintptr_t)oldshmsegs + 944 ALIGN(oldshmni * sizeof(struct shmid_ds))); 945 for (i = 0; i < oldshmni; i++) 946 cv_destroy(&oldshm_cv[i]); 947 948 sz = ALIGN(oldshmni * sizeof(struct shmid_ds)) + 949 ALIGN(oldshmni * sizeof(kcondvar_t)); 950 sz = round_page(sz); 951 uvm_km_free(kernel_map, (vaddr_t)oldshmsegs, sz, UVM_KMF_WIRED); 952 953 return 0; 954 } 955 956 void 957 shminit(struct sysctllog **clog) 958 { 959 vaddr_t v; 960 size_t sz; 961 int i; 962 963 mutex_init(&shm_lock, MUTEX_DEFAULT, IPL_NONE); 964 cv_init(&shm_realloc_cv, "shmrealc"); 965 966 /* Allocate the wired memory for our structures */ 967 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) + 968 ALIGN(shminfo.shmmni * sizeof(kcondvar_t)); 969 sz = round_page(sz); 970 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO); 971 if (v == 0) 972 panic("sysv_shm: cannot allocate memory"); 973 shmsegs = (void *)v; 974 shm_cv = (void *)((uintptr_t)shmsegs + 975 ALIGN(shminfo.shmmni * sizeof(struct shmid_ds))); 976 977 if (shminfo.shmmax == 0) 978 shminfo.shmmax = max(physmem / 4, 1024) * PAGE_SIZE; 979 else 980 shminfo.shmmax *= PAGE_SIZE; 981 shminfo.shmall = shminfo.shmmax / PAGE_SIZE; 982 983 for (i = 0; i < shminfo.shmmni; i++) { 984 cv_init(&shm_cv[i], "shmwait"); 985 shmsegs[i].shm_perm.mode = SHMSEG_FREE; 986 shmsegs[i].shm_perm._seq = 0; 987 } 988 shm_last_free = 0; 989 shm_nused = 0; 990 shm_committed = 0; 991 shm_realloc_disable = 0; 992 shm_realloc_state = false; 993 994 kern_has_sysvshm = 1; 995 996 /* Load the callback function pointers for the uvm subsystem */ 997 uvm_shmexit = shmexit; 998 uvm_shmfork = shmfork; 999 1000 #ifdef _MODULE 1001 if (clog) 1002 sysctl_ipc_shm_setup(clog); 1003 #endif 1004 } 1005 1006 int 1007 shmfini(void) 1008 { 1009 size_t sz; 1010 int i; 1011 vaddr_t v = (vaddr_t)shmsegs; 1012 1013 mutex_enter(&shm_lock); 1014 if (shm_nused) { 1015 mutex_exit(&shm_lock); 1016 return 1; 1017 } 1018 1019 /* Clear the callback function pointers for the uvm subsystem */ 1020 uvm_shmexit = NULL; 1021 uvm_shmfork = NULL; 1022 1023 /* Destroy all condvars */ 1024 for (i = 0; i < shminfo.shmmni; i++) 1025 cv_destroy(&shm_cv[i]); 1026 cv_destroy(&shm_realloc_cv); 1027 1028 /* Free the allocated/wired memory */ 1029 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) + 1030 ALIGN(shminfo.shmmni * sizeof(kcondvar_t)); 1031 sz = round_page(sz); 1032 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED); 1033 1034 /* Release and destroy our mutex */ 1035 mutex_exit(&shm_lock); 1036 mutex_destroy(&shm_lock); 1037 1038 kern_has_sysvshm = 0; 1039 1040 return 0; 1041 } 1042 1043 static int 1044 sysctl_ipc_shmmni(SYSCTLFN_ARGS) 1045 { 1046 int newsize, error; 1047 struct sysctlnode node; 1048 node = *rnode; 1049 node.sysctl_data = &newsize; 1050 1051 newsize = shminfo.shmmni; 1052 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1053 if (error || newp == NULL) 1054 return error; 1055 1056 sysctl_unlock(); 1057 error = shmrealloc(newsize); 1058 sysctl_relock(); 1059 return error; 1060 } 1061 1062 static int 1063 sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS) 1064 { 1065 uint32_t newsize; 1066 int error; 1067 struct sysctlnode node; 1068 node = *rnode; 1069 node.sysctl_data = &newsize; 1070 1071 newsize = shminfo.shmall; 1072 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1073 if (error || newp == NULL) 1074 return error; 1075 1076 if (newsize < 1) 1077 return EINVAL; 1078 1079 shminfo.shmall = newsize; 1080 shminfo.shmmax = (uint64_t)shminfo.shmall * PAGE_SIZE; 1081 1082 return 0; 1083 } 1084 1085 static int 1086 sysctl_ipc_shmmax(SYSCTLFN_ARGS) 1087 { 1088 uint64_t newsize; 1089 int error; 1090 struct sysctlnode node; 1091 node = *rnode; 1092 node.sysctl_data = &newsize; 1093 1094 newsize = shminfo.shmmax; 1095 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1096 if (error || newp == NULL) 1097 return error; 1098 1099 if (newsize < PAGE_SIZE) 1100 return EINVAL; 1101 1102 shminfo.shmmax = round_page(newsize); 1103 shminfo.shmall = shminfo.shmmax >> PAGE_SHIFT; 1104 1105 return 0; 1106 } 1107 1108 SYSCTL_SETUP(sysctl_ipc_shm_setup, "sysctl kern.ipc subtree setup") 1109 { 1110 1111 sysctl_createv(clog, 0, NULL, NULL, 1112 CTLFLAG_PERMANENT, 1113 CTLTYPE_NODE, "ipc", 1114 SYSCTL_DESCR("SysV IPC options"), 1115 NULL, 0, NULL, 0, 1116 CTL_KERN, KERN_SYSVIPC, CTL_EOL); 1117 sysctl_createv(clog, 0, NULL, NULL, 1118 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1119 CTLTYPE_QUAD, "shmmax", 1120 SYSCTL_DESCR("Max shared memory segment size in bytes"), 1121 sysctl_ipc_shmmax, 0, &shminfo.shmmax, 0, 1122 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAX, CTL_EOL); 1123 sysctl_createv(clog, 0, NULL, NULL, 1124 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1125 CTLTYPE_INT, "shmmni", 1126 SYSCTL_DESCR("Max number of shared memory identifiers"), 1127 sysctl_ipc_shmmni, 0, &shminfo.shmmni, 0, 1128 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMNI, CTL_EOL); 1129 sysctl_createv(clog, 0, NULL, NULL, 1130 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1131 CTLTYPE_INT, "shmseg", 1132 SYSCTL_DESCR("Max shared memory segments per process"), 1133 NULL, 0, &shminfo.shmseg, 0, 1134 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMSEG, CTL_EOL); 1135 sysctl_createv(clog, 0, NULL, NULL, 1136 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1137 CTLTYPE_INT, "shmmaxpgs", 1138 SYSCTL_DESCR("Max amount of shared memory in pages"), 1139 sysctl_ipc_shmmaxpgs, 0, &shminfo.shmall, 0, 1140 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAXPGS, CTL_EOL); 1141 sysctl_createv(clog, 0, NULL, NULL, 1142 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1143 CTLTYPE_INT, "shm_use_phys", 1144 SYSCTL_DESCR("Enable/disable locking of shared memory in " 1145 "physical memory"), NULL, 0, &shm_use_phys, 0, 1146 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMUSEPHYS, CTL_EOL); 1147 } 1148