1 /* $NetBSD: sysv_shm.c,v 1.134 2019/04/10 10:03:50 pgoyette Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Mindaugas Rasiukevicius. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1994 Adam Glass and Charles M. Hannum. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by Adam Glass and Charles M. 47 * Hannum. 48 * 4. The names of the authors may not be used to endorse or promote products 49 * derived from this software without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 52 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 53 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 54 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, 55 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 56 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 57 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 58 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 59 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 60 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 61 */ 62 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.134 2019/04/10 10:03:50 pgoyette Exp $"); 65 66 #ifdef _KERNEL_OPT 67 #include "opt_sysv.h" 68 #endif 69 70 #include <sys/param.h> 71 #include <sys/kernel.h> 72 #include <sys/kmem.h> 73 #include <sys/shm.h> 74 #include <sys/mutex.h> 75 #include <sys/mman.h> 76 #include <sys/stat.h> 77 #include <sys/sysctl.h> 78 #include <sys/mount.h> /* XXX for <sys/syscallargs.h> */ 79 #include <sys/syscallargs.h> 80 #include <sys/queue.h> 81 #include <sys/kauth.h> 82 83 #include <uvm/uvm_extern.h> 84 #include <uvm/uvm_object.h> 85 86 struct shmmap_entry { 87 SLIST_ENTRY(shmmap_entry) next; 88 vaddr_t va; 89 int shmid; 90 }; 91 92 int shm_nused __cacheline_aligned; 93 struct shmid_ds * shmsegs __read_mostly; 94 95 static kmutex_t shm_lock __cacheline_aligned; 96 static kcondvar_t * shm_cv __cacheline_aligned; 97 static int shm_last_free __cacheline_aligned; 98 static size_t shm_committed __cacheline_aligned; 99 static int shm_use_phys __read_mostly; 100 101 static kcondvar_t shm_realloc_cv; 102 static bool shm_realloc_state; 103 static u_int shm_realloc_disable; 104 105 struct shmmap_state { 106 unsigned int nitems; 107 unsigned int nrefs; 108 SLIST_HEAD(, shmmap_entry) entries; 109 }; 110 111 extern int kern_has_sysvshm; 112 113 SYSCTL_SETUP_PROTO(sysctl_ipc_shm_setup); 114 115 #ifdef SHMDEBUG 116 #define SHMPRINTF(a) printf a 117 #else 118 #define SHMPRINTF(a) 119 #endif 120 121 static int shmrealloc(int); 122 123 /* 124 * Find the shared memory segment by the identifier. 125 * => must be called with shm_lock held; 126 */ 127 static struct shmid_ds * 128 shm_find_segment_by_shmid(int shmid) 129 { 130 int segnum; 131 struct shmid_ds *shmseg; 132 133 KASSERT(mutex_owned(&shm_lock)); 134 135 segnum = IPCID_TO_IX(shmid); 136 if (segnum < 0 || segnum >= shminfo.shmmni) 137 return NULL; 138 shmseg = &shmsegs[segnum]; 139 if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0) 140 return NULL; 141 if ((shmseg->shm_perm.mode & 142 (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED) 143 return NULL; 144 if (shmseg->shm_perm._seq != IPCID_TO_SEQ(shmid)) 145 return NULL; 146 147 return shmseg; 148 } 149 150 /* 151 * Free memory segment. 152 * => must be called with shm_lock held; 153 */ 154 static void 155 shm_free_segment(int segnum) 156 { 157 struct shmid_ds *shmseg; 158 size_t size; 159 bool wanted; 160 161 KASSERT(mutex_owned(&shm_lock)); 162 163 shmseg = &shmsegs[segnum]; 164 SHMPRINTF(("shm freeing key 0x%lx seq 0x%x\n", 165 shmseg->shm_perm._key, shmseg->shm_perm._seq)); 166 167 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 168 wanted = (shmseg->shm_perm.mode & SHMSEG_WANTED); 169 170 shmseg->_shm_internal = NULL; 171 shm_committed -= btoc(size); 172 shm_nused--; 173 shmseg->shm_perm.mode = SHMSEG_FREE; 174 shm_last_free = segnum; 175 if (wanted == true) 176 cv_broadcast(&shm_cv[segnum]); 177 } 178 179 /* 180 * Delete entry from the shm map. 181 * => must be called with shm_lock held; 182 */ 183 static struct uvm_object * 184 shm_delete_mapping(struct shmmap_state *shmmap_s, 185 struct shmmap_entry *shmmap_se) 186 { 187 struct uvm_object *uobj = NULL; 188 struct shmid_ds *shmseg; 189 int segnum; 190 191 KASSERT(mutex_owned(&shm_lock)); 192 193 segnum = IPCID_TO_IX(shmmap_se->shmid); 194 shmseg = &shmsegs[segnum]; 195 SLIST_REMOVE(&shmmap_s->entries, shmmap_se, shmmap_entry, next); 196 shmmap_s->nitems--; 197 shmseg->shm_dtime = time_second; 198 if ((--shmseg->shm_nattch <= 0) && 199 (shmseg->shm_perm.mode & SHMSEG_REMOVED)) { 200 uobj = shmseg->_shm_internal; 201 shm_free_segment(segnum); 202 } 203 204 return uobj; 205 } 206 207 /* 208 * Get a non-shared shm map for that vmspace. Note, that memory 209 * allocation might be performed with lock held. 210 */ 211 static struct shmmap_state * 212 shmmap_getprivate(struct proc *p) 213 { 214 struct shmmap_state *oshmmap_s, *shmmap_s; 215 struct shmmap_entry *oshmmap_se, *shmmap_se; 216 217 KASSERT(mutex_owned(&shm_lock)); 218 219 /* 1. A shm map with refcnt = 1, used by ourselves, thus return */ 220 oshmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; 221 if (oshmmap_s && oshmmap_s->nrefs == 1) 222 return oshmmap_s; 223 224 /* 2. No shm map preset - create a fresh one */ 225 shmmap_s = kmem_zalloc(sizeof(struct shmmap_state), KM_SLEEP); 226 shmmap_s->nrefs = 1; 227 SLIST_INIT(&shmmap_s->entries); 228 p->p_vmspace->vm_shm = (void *)shmmap_s; 229 230 if (oshmmap_s == NULL) 231 return shmmap_s; 232 233 SHMPRINTF(("shmmap_getprivate: vm %p split (%d entries), was used by %d\n", 234 p->p_vmspace, oshmmap_s->nitems, oshmmap_s->nrefs)); 235 236 /* 3. A shared shm map, copy to a fresh one and adjust refcounts */ 237 SLIST_FOREACH(oshmmap_se, &oshmmap_s->entries, next) { 238 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP); 239 shmmap_se->va = oshmmap_se->va; 240 shmmap_se->shmid = oshmmap_se->shmid; 241 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next); 242 } 243 shmmap_s->nitems = oshmmap_s->nitems; 244 oshmmap_s->nrefs--; 245 246 return shmmap_s; 247 } 248 249 /* 250 * Lock/unlock the memory. 251 * => must be called with shm_lock held; 252 * => called from one place, thus, inline; 253 */ 254 static inline int 255 shm_memlock(struct lwp *l, struct shmid_ds *shmseg, int shmid, int cmd) 256 { 257 struct proc *p = l->l_proc; 258 struct shmmap_entry *shmmap_se; 259 struct shmmap_state *shmmap_s; 260 size_t size; 261 int error; 262 263 KASSERT(mutex_owned(&shm_lock)); 264 shmmap_s = shmmap_getprivate(p); 265 266 /* Find our shared memory address by shmid */ 267 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) { 268 if (shmmap_se->shmid != shmid) 269 continue; 270 271 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 272 273 if (cmd == SHM_LOCK && 274 (shmseg->shm_perm.mode & SHMSEG_WIRED) == 0) { 275 /* Wire the object and map, then tag it */ 276 error = uvm_obj_wirepages(shmseg->_shm_internal, 277 0, size, NULL); 278 if (error) 279 return EIO; 280 error = uvm_map_pageable(&p->p_vmspace->vm_map, 281 shmmap_se->va, shmmap_se->va + size, false, 0); 282 if (error) { 283 uvm_obj_unwirepages(shmseg->_shm_internal, 284 0, size); 285 if (error == EFAULT) 286 error = ENOMEM; 287 return error; 288 } 289 shmseg->shm_perm.mode |= SHMSEG_WIRED; 290 291 } else if (cmd == SHM_UNLOCK && 292 (shmseg->shm_perm.mode & SHMSEG_WIRED) != 0) { 293 /* Unwire the object and map, then untag it */ 294 uvm_obj_unwirepages(shmseg->_shm_internal, 0, size); 295 error = uvm_map_pageable(&p->p_vmspace->vm_map, 296 shmmap_se->va, shmmap_se->va + size, true, 0); 297 if (error) 298 return EIO; 299 shmseg->shm_perm.mode &= ~SHMSEG_WIRED; 300 } 301 } 302 303 return 0; 304 } 305 306 /* 307 * Unmap shared memory. 308 */ 309 int 310 sys_shmdt(struct lwp *l, const struct sys_shmdt_args *uap, register_t *retval) 311 { 312 /* { 313 syscallarg(const void *) shmaddr; 314 } */ 315 struct proc *p = l->l_proc; 316 struct shmmap_state *shmmap_s1, *shmmap_s; 317 struct shmmap_entry *shmmap_se; 318 struct uvm_object *uobj; 319 struct shmid_ds *shmseg; 320 size_t size; 321 322 mutex_enter(&shm_lock); 323 /* In case of reallocation, we will wait for completion */ 324 while (__predict_false(shm_realloc_state)) 325 cv_wait(&shm_realloc_cv, &shm_lock); 326 327 shmmap_s1 = (struct shmmap_state *)p->p_vmspace->vm_shm; 328 if (shmmap_s1 == NULL) { 329 mutex_exit(&shm_lock); 330 return EINVAL; 331 } 332 333 /* Find the map entry */ 334 SLIST_FOREACH(shmmap_se, &shmmap_s1->entries, next) 335 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr)) 336 break; 337 if (shmmap_se == NULL) { 338 mutex_exit(&shm_lock); 339 return EINVAL; 340 } 341 342 shmmap_s = shmmap_getprivate(p); 343 if (shmmap_s != shmmap_s1) { 344 /* Map has been copied, lookup entry in new map */ 345 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) 346 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr)) 347 break; 348 if (shmmap_se == NULL) { 349 mutex_exit(&shm_lock); 350 return EINVAL; 351 } 352 } 353 354 SHMPRINTF(("shmdt: vm %p: remove %d @%lx\n", 355 p->p_vmspace, shmmap_se->shmid, shmmap_se->va)); 356 357 /* Delete the entry from shm map */ 358 uobj = shm_delete_mapping(shmmap_s, shmmap_se); 359 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)]; 360 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 361 mutex_exit(&shm_lock); 362 363 uvm_deallocate(&p->p_vmspace->vm_map, shmmap_se->va, size); 364 if (uobj != NULL) { 365 uao_detach(uobj); 366 } 367 kmem_free(shmmap_se, sizeof(struct shmmap_entry)); 368 369 return 0; 370 } 371 372 /* 373 * Map shared memory. 374 */ 375 int 376 sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval) 377 { 378 /* { 379 syscallarg(int) shmid; 380 syscallarg(const void *) shmaddr; 381 syscallarg(int) shmflg; 382 } */ 383 int error, flags = 0; 384 struct proc *p = l->l_proc; 385 kauth_cred_t cred = l->l_cred; 386 struct shmid_ds *shmseg; 387 struct shmmap_state *shmmap_s; 388 struct shmmap_entry *shmmap_se; 389 struct uvm_object *uobj; 390 struct vmspace *vm; 391 vaddr_t attach_va; 392 vm_prot_t prot; 393 vsize_t size; 394 395 /* Allocate a new map entry and set it */ 396 shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP); 397 shmmap_se->shmid = SCARG(uap, shmid); 398 399 mutex_enter(&shm_lock); 400 /* In case of reallocation, we will wait for completion */ 401 while (__predict_false(shm_realloc_state)) 402 cv_wait(&shm_realloc_cv, &shm_lock); 403 404 shmseg = shm_find_segment_by_shmid(SCARG(uap, shmid)); 405 if (shmseg == NULL) { 406 error = EINVAL; 407 goto err; 408 } 409 error = ipcperm(cred, &shmseg->shm_perm, 410 (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W); 411 if (error) 412 goto err; 413 414 vm = p->p_vmspace; 415 shmmap_s = (struct shmmap_state *)vm->vm_shm; 416 if (shmmap_s && shmmap_s->nitems >= shminfo.shmseg) { 417 error = EMFILE; 418 goto err; 419 } 420 421 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 422 prot = VM_PROT_READ; 423 if ((SCARG(uap, shmflg) & SHM_RDONLY) == 0) 424 prot |= VM_PROT_WRITE; 425 if (SCARG(uap, shmaddr)) { 426 flags |= UVM_FLAG_FIXED; 427 if (SCARG(uap, shmflg) & SHM_RND) 428 attach_va = 429 (vaddr_t)SCARG(uap, shmaddr) & ~(SHMLBA-1); 430 else if (((vaddr_t)SCARG(uap, shmaddr) & (SHMLBA-1)) == 0) 431 attach_va = (vaddr_t)SCARG(uap, shmaddr); 432 else { 433 error = EINVAL; 434 goto err; 435 } 436 } else { 437 /* This is just a hint to uvm_map() about where to put it. */ 438 attach_va = p->p_emul->e_vm_default_addr(p, 439 (vaddr_t)vm->vm_daddr, size, 440 p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 441 } 442 443 /* 444 * Create a map entry, add it to the list and increase the counters. 445 * The lock will be dropped before the mapping, disable reallocation. 446 */ 447 shmmap_s = shmmap_getprivate(p); 448 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next); 449 shmmap_s->nitems++; 450 shmseg->shm_lpid = p->p_pid; 451 shmseg->shm_nattch++; 452 shm_realloc_disable++; 453 mutex_exit(&shm_lock); 454 455 /* 456 * Add a reference to the memory object, map it to the 457 * address space, and lock the memory, if needed. 458 */ 459 uobj = shmseg->_shm_internal; 460 uao_reference(uobj); 461 error = uvm_map(&vm->vm_map, &attach_va, size, uobj, 0, 0, 462 UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, flags)); 463 if (error) 464 goto err_detach; 465 if (shm_use_phys || (shmseg->shm_perm.mode & SHMSEG_WIRED)) { 466 error = uvm_map_pageable(&vm->vm_map, attach_va, 467 attach_va + size, false, 0); 468 if (error) { 469 if (error == EFAULT) 470 error = ENOMEM; 471 uvm_deallocate(&vm->vm_map, attach_va, size); 472 goto err_detach; 473 } 474 } 475 476 /* Set the new address, and update the time */ 477 mutex_enter(&shm_lock); 478 shmmap_se->va = attach_va; 479 shmseg->shm_atime = time_second; 480 shm_realloc_disable--; 481 retval[0] = attach_va; 482 SHMPRINTF(("shmat: vm %p: add %d @%lx\n", 483 p->p_vmspace, shmmap_se->shmid, attach_va)); 484 err: 485 cv_broadcast(&shm_realloc_cv); 486 mutex_exit(&shm_lock); 487 if (error && shmmap_se) { 488 kmem_free(shmmap_se, sizeof(struct shmmap_entry)); 489 } 490 return error; 491 492 err_detach: 493 uao_detach(uobj); 494 mutex_enter(&shm_lock); 495 uobj = shm_delete_mapping(shmmap_s, shmmap_se); 496 shm_realloc_disable--; 497 cv_broadcast(&shm_realloc_cv); 498 mutex_exit(&shm_lock); 499 if (uobj != NULL) { 500 uao_detach(uobj); 501 } 502 kmem_free(shmmap_se, sizeof(struct shmmap_entry)); 503 return error; 504 } 505 506 /* 507 * Shared memory control operations. 508 */ 509 int 510 sys___shmctl50(struct lwp *l, const struct sys___shmctl50_args *uap, 511 register_t *retval) 512 { 513 /* { 514 syscallarg(int) shmid; 515 syscallarg(int) cmd; 516 syscallarg(struct shmid_ds *) buf; 517 } */ 518 struct shmid_ds shmbuf; 519 int cmd, error; 520 521 cmd = SCARG(uap, cmd); 522 if (cmd == IPC_SET) { 523 error = copyin(SCARG(uap, buf), &shmbuf, sizeof(shmbuf)); 524 if (error) 525 return error; 526 } 527 528 error = shmctl1(l, SCARG(uap, shmid), cmd, 529 (cmd == IPC_SET || cmd == IPC_STAT) ? &shmbuf : NULL); 530 531 if (error == 0 && cmd == IPC_STAT) 532 error = copyout(&shmbuf, SCARG(uap, buf), sizeof(shmbuf)); 533 534 return error; 535 } 536 537 int 538 shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf) 539 { 540 struct uvm_object *uobj = NULL; 541 kauth_cred_t cred = l->l_cred; 542 struct shmid_ds *shmseg; 543 int error = 0; 544 545 mutex_enter(&shm_lock); 546 /* In case of reallocation, we will wait for completion */ 547 while (__predict_false(shm_realloc_state)) 548 cv_wait(&shm_realloc_cv, &shm_lock); 549 550 shmseg = shm_find_segment_by_shmid(shmid); 551 if (shmseg == NULL) { 552 mutex_exit(&shm_lock); 553 return EINVAL; 554 } 555 556 switch (cmd) { 557 case IPC_STAT: 558 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_R)) != 0) 559 break; 560 memset(shmbuf, 0, sizeof *shmbuf); 561 shmbuf->shm_perm = shmseg->shm_perm; 562 shmbuf->shm_perm.mode &= 0777; 563 shmbuf->shm_segsz = shmseg->shm_segsz; 564 shmbuf->shm_lpid = shmseg->shm_lpid; 565 shmbuf->shm_cpid = shmseg->shm_cpid; 566 shmbuf->shm_nattch = shmseg->shm_nattch; 567 shmbuf->shm_atime = shmseg->shm_atime; 568 shmbuf->shm_dtime = shmseg->shm_dtime; 569 shmbuf->shm_ctime = shmseg->shm_ctime; 570 break; 571 case IPC_SET: 572 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0) 573 break; 574 shmseg->shm_perm.uid = shmbuf->shm_perm.uid; 575 shmseg->shm_perm.gid = shmbuf->shm_perm.gid; 576 shmseg->shm_perm.mode = 577 (shmseg->shm_perm.mode & ~ACCESSPERMS) | 578 (shmbuf->shm_perm.mode & ACCESSPERMS); 579 shmseg->shm_ctime = time_second; 580 break; 581 case IPC_RMID: 582 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0) 583 break; 584 shmseg->shm_perm._key = IPC_PRIVATE; 585 shmseg->shm_perm.mode |= SHMSEG_REMOVED; 586 if (shmseg->shm_nattch <= 0) { 587 uobj = shmseg->_shm_internal; 588 shm_free_segment(IPCID_TO_IX(shmid)); 589 } 590 break; 591 case SHM_LOCK: 592 case SHM_UNLOCK: 593 if ((error = kauth_authorize_system(cred, 594 KAUTH_SYSTEM_SYSVIPC, 595 (cmd == SHM_LOCK) ? KAUTH_REQ_SYSTEM_SYSVIPC_SHM_LOCK : 596 KAUTH_REQ_SYSTEM_SYSVIPC_SHM_UNLOCK, NULL, NULL, NULL)) != 0) 597 break; 598 error = shm_memlock(l, shmseg, shmid, cmd); 599 break; 600 default: 601 error = EINVAL; 602 } 603 604 mutex_exit(&shm_lock); 605 if (uobj != NULL) 606 uao_detach(uobj); 607 return error; 608 } 609 610 /* 611 * Try to take an already existing segment. 612 * => must be called with shm_lock held; 613 * => called from one place, thus, inline; 614 */ 615 static inline int 616 shmget_existing(struct lwp *l, const struct sys_shmget_args *uap, int mode, 617 register_t *retval) 618 { 619 struct shmid_ds *shmseg; 620 kauth_cred_t cred = l->l_cred; 621 int segnum, error; 622 again: 623 KASSERT(mutex_owned(&shm_lock)); 624 625 /* Find segment by key */ 626 for (segnum = 0; segnum < shminfo.shmmni; segnum++) 627 if ((shmsegs[segnum].shm_perm.mode & SHMSEG_ALLOCATED) && 628 shmsegs[segnum].shm_perm._key == SCARG(uap, key)) 629 break; 630 if (segnum == shminfo.shmmni) { 631 /* Not found */ 632 return -1; 633 } 634 635 shmseg = &shmsegs[segnum]; 636 if (shmseg->shm_perm.mode & SHMSEG_REMOVED) { 637 /* 638 * This segment is in the process of being allocated. Wait 639 * until it's done, and look the key up again (in case the 640 * allocation failed or it was freed). 641 */ 642 shmseg->shm_perm.mode |= SHMSEG_WANTED; 643 error = cv_wait_sig(&shm_cv[segnum], &shm_lock); 644 if (error) 645 return error; 646 goto again; 647 } 648 649 /* 650 * First check the flags, to generate a useful error when a 651 * segment already exists. 652 */ 653 if ((SCARG(uap, shmflg) & (IPC_CREAT | IPC_EXCL)) == 654 (IPC_CREAT | IPC_EXCL)) 655 return EEXIST; 656 657 /* Check the permission and segment size. */ 658 error = ipcperm(cred, &shmseg->shm_perm, mode); 659 if (error) 660 return error; 661 if (SCARG(uap, size) && SCARG(uap, size) > shmseg->shm_segsz) 662 return EINVAL; 663 664 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); 665 return 0; 666 } 667 668 int 669 sys_shmget(struct lwp *l, const struct sys_shmget_args *uap, register_t *retval) 670 { 671 /* { 672 syscallarg(key_t) key; 673 syscallarg(size_t) size; 674 syscallarg(int) shmflg; 675 } */ 676 struct shmid_ds *shmseg; 677 kauth_cred_t cred = l->l_cred; 678 key_t key = SCARG(uap, key); 679 size_t size; 680 int error, mode, segnum; 681 bool lockmem; 682 683 mode = SCARG(uap, shmflg) & ACCESSPERMS; 684 if (SCARG(uap, shmflg) & _SHM_RMLINGER) 685 mode |= SHMSEG_RMLINGER; 686 687 SHMPRINTF(("shmget: key 0x%lx size 0x%zx shmflg 0x%x mode 0x%x\n", 688 SCARG(uap, key), SCARG(uap, size), SCARG(uap, shmflg), mode)); 689 690 mutex_enter(&shm_lock); 691 /* In case of reallocation, we will wait for completion */ 692 while (__predict_false(shm_realloc_state)) 693 cv_wait(&shm_realloc_cv, &shm_lock); 694 695 if (key != IPC_PRIVATE) { 696 error = shmget_existing(l, uap, mode, retval); 697 if (error != -1) { 698 mutex_exit(&shm_lock); 699 return error; 700 } 701 if ((SCARG(uap, shmflg) & IPC_CREAT) == 0) { 702 mutex_exit(&shm_lock); 703 return ENOENT; 704 } 705 } 706 error = 0; 707 708 /* 709 * Check the for the limits. 710 */ 711 size = SCARG(uap, size); 712 if (size < shminfo.shmmin || size > shminfo.shmmax) { 713 mutex_exit(&shm_lock); 714 return EINVAL; 715 } 716 if (shm_nused >= shminfo.shmmni) { 717 mutex_exit(&shm_lock); 718 return ENOSPC; 719 } 720 size = (size + PGOFSET) & ~PGOFSET; 721 if (shm_committed + btoc(size) > shminfo.shmall) { 722 mutex_exit(&shm_lock); 723 return ENOMEM; 724 } 725 726 /* Find the first available segment */ 727 if (shm_last_free < 0) { 728 for (segnum = 0; segnum < shminfo.shmmni; segnum++) 729 if (shmsegs[segnum].shm_perm.mode & SHMSEG_FREE) 730 break; 731 KASSERT(segnum < shminfo.shmmni); 732 } else { 733 segnum = shm_last_free; 734 shm_last_free = -1; 735 } 736 737 /* 738 * Initialize the segment. 739 * We will drop the lock while allocating the memory, thus mark the 740 * segment present, but removed, that no other thread could take it. 741 * Also, disable reallocation, while lock is dropped. 742 */ 743 shmseg = &shmsegs[segnum]; 744 shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED; 745 shm_committed += btoc(size); 746 shm_nused++; 747 lockmem = shm_use_phys; 748 shm_realloc_disable++; 749 mutex_exit(&shm_lock); 750 751 /* Allocate the memory object and lock it if needed */ 752 shmseg->_shm_internal = uao_create(size, 0); 753 if (lockmem) { 754 /* Wire the pages and tag it */ 755 error = uvm_obj_wirepages(shmseg->_shm_internal, 0, size, NULL); 756 if (error) { 757 uao_detach(shmseg->_shm_internal); 758 mutex_enter(&shm_lock); 759 shm_free_segment(segnum); 760 shm_realloc_disable--; 761 mutex_exit(&shm_lock); 762 return error; 763 } 764 } 765 766 /* 767 * Please note, while segment is marked, there are no need to hold the 768 * lock, while setting it (except shm_perm.mode). 769 */ 770 shmseg->shm_perm._key = SCARG(uap, key); 771 shmseg->shm_perm._seq = (shmseg->shm_perm._seq + 1) & 0x7fff; 772 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); 773 774 shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_geteuid(cred); 775 shmseg->shm_perm.cgid = shmseg->shm_perm.gid = kauth_cred_getegid(cred); 776 shmseg->shm_segsz = SCARG(uap, size); 777 shmseg->shm_cpid = l->l_proc->p_pid; 778 shmseg->shm_lpid = shmseg->shm_nattch = 0; 779 shmseg->shm_atime = shmseg->shm_dtime = 0; 780 shmseg->shm_ctime = time_second; 781 782 /* 783 * Segment is initialized. 784 * Enter the lock, mark as allocated, and notify waiters (if any). 785 * Also, unmark the state of reallocation. 786 */ 787 mutex_enter(&shm_lock); 788 shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) | 789 (mode & (ACCESSPERMS | SHMSEG_RMLINGER)) | 790 SHMSEG_ALLOCATED | (lockmem ? SHMSEG_WIRED : 0); 791 if (shmseg->shm_perm.mode & SHMSEG_WANTED) { 792 shmseg->shm_perm.mode &= ~SHMSEG_WANTED; 793 cv_broadcast(&shm_cv[segnum]); 794 } 795 shm_realloc_disable--; 796 cv_broadcast(&shm_realloc_cv); 797 mutex_exit(&shm_lock); 798 799 return error; 800 } 801 802 void 803 shmfork(struct vmspace *vm1, struct vmspace *vm2) 804 { 805 struct shmmap_state *shmmap_s; 806 struct shmmap_entry *shmmap_se; 807 808 SHMPRINTF(("shmfork %p->%p\n", vm1, vm2)); 809 mutex_enter(&shm_lock); 810 vm2->vm_shm = vm1->vm_shm; 811 if (vm1->vm_shm) { 812 shmmap_s = (struct shmmap_state *)vm1->vm_shm; 813 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) 814 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch++; 815 shmmap_s->nrefs++; 816 } 817 mutex_exit(&shm_lock); 818 } 819 820 void 821 shmexit(struct vmspace *vm) 822 { 823 struct shmmap_state *shmmap_s; 824 struct shmmap_entry *shmmap_se; 825 826 mutex_enter(&shm_lock); 827 shmmap_s = (struct shmmap_state *)vm->vm_shm; 828 if (shmmap_s == NULL) { 829 mutex_exit(&shm_lock); 830 return; 831 } 832 vm->vm_shm = NULL; 833 834 if (--shmmap_s->nrefs > 0) { 835 SHMPRINTF(("shmexit: vm %p drop ref (%d entries), refs = %d\n", 836 vm, shmmap_s->nitems, shmmap_s->nrefs)); 837 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) { 838 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch--; 839 } 840 mutex_exit(&shm_lock); 841 return; 842 } 843 844 SHMPRINTF(("shmexit: vm %p cleanup (%d entries)\n", vm, shmmap_s->nitems)); 845 if (shmmap_s->nitems == 0) { 846 mutex_exit(&shm_lock); 847 kmem_free(shmmap_s, sizeof(struct shmmap_state)); 848 return; 849 } 850 851 /* 852 * Delete the entry from shm map. 853 */ 854 for (;;) { 855 struct shmid_ds *shmseg; 856 struct uvm_object *uobj; 857 size_t sz; 858 859 shmmap_se = SLIST_FIRST(&shmmap_s->entries); 860 KASSERT(shmmap_se != NULL); 861 862 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)]; 863 sz = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 864 /* shm_delete_mapping() removes from the list. */ 865 uobj = shm_delete_mapping(shmmap_s, shmmap_se); 866 mutex_exit(&shm_lock); 867 868 uvm_deallocate(&vm->vm_map, shmmap_se->va, sz); 869 if (uobj != NULL) { 870 uao_detach(uobj); 871 } 872 kmem_free(shmmap_se, sizeof(struct shmmap_entry)); 873 874 if (SLIST_EMPTY(&shmmap_s->entries)) { 875 break; 876 } 877 mutex_enter(&shm_lock); 878 KASSERT(!SLIST_EMPTY(&shmmap_s->entries)); 879 } 880 kmem_free(shmmap_s, sizeof(struct shmmap_state)); 881 } 882 883 static int 884 shmrealloc(int newshmni) 885 { 886 vaddr_t v; 887 struct shmid_ds *oldshmsegs, *newshmsegs; 888 kcondvar_t *newshm_cv, *oldshm_cv; 889 size_t sz; 890 int i, lsegid, oldshmni; 891 892 if (newshmni < 1) 893 return EINVAL; 894 895 /* Allocate new memory area */ 896 sz = ALIGN(newshmni * sizeof(struct shmid_ds)) + 897 ALIGN(newshmni * sizeof(kcondvar_t)); 898 sz = round_page(sz); 899 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO); 900 if (v == 0) 901 return ENOMEM; 902 903 mutex_enter(&shm_lock); 904 while (shm_realloc_state || shm_realloc_disable) 905 cv_wait(&shm_realloc_cv, &shm_lock); 906 907 /* 908 * Get the number of last segment. Fail we are trying to 909 * reallocate less memory than we use. 910 */ 911 lsegid = 0; 912 for (i = 0; i < shminfo.shmmni; i++) 913 if ((shmsegs[i].shm_perm.mode & SHMSEG_FREE) == 0) 914 lsegid = i; 915 if (lsegid >= newshmni) { 916 mutex_exit(&shm_lock); 917 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED); 918 return EBUSY; 919 } 920 shm_realloc_state = true; 921 922 newshmsegs = (void *)v; 923 newshm_cv = (void *)((uintptr_t)newshmsegs + 924 ALIGN(newshmni * sizeof(struct shmid_ds))); 925 926 /* Copy all memory to the new area */ 927 for (i = 0; i < shm_nused; i++) { 928 cv_init(&newshm_cv[i], "shmwait"); 929 (void)memcpy(&newshmsegs[i], &shmsegs[i], 930 sizeof(newshmsegs[0])); 931 } 932 933 /* Mark as free all new segments, if there is any */ 934 for (; i < newshmni; i++) { 935 cv_init(&newshm_cv[i], "shmwait"); 936 newshmsegs[i].shm_perm.mode = SHMSEG_FREE; 937 newshmsegs[i].shm_perm._seq = 0; 938 } 939 940 oldshmsegs = shmsegs; 941 oldshmni = shminfo.shmmni; 942 shminfo.shmmni = newshmni; 943 shmsegs = newshmsegs; 944 shm_cv = newshm_cv; 945 946 /* Reallocation completed - notify all waiters, if any */ 947 shm_realloc_state = false; 948 cv_broadcast(&shm_realloc_cv); 949 mutex_exit(&shm_lock); 950 951 /* Release now unused resources. */ 952 oldshm_cv = (void *)((uintptr_t)oldshmsegs + 953 ALIGN(oldshmni * sizeof(struct shmid_ds))); 954 for (i = 0; i < oldshmni; i++) 955 cv_destroy(&oldshm_cv[i]); 956 957 sz = ALIGN(oldshmni * sizeof(struct shmid_ds)) + 958 ALIGN(oldshmni * sizeof(kcondvar_t)); 959 sz = round_page(sz); 960 uvm_km_free(kernel_map, (vaddr_t)oldshmsegs, sz, UVM_KMF_WIRED); 961 962 return 0; 963 } 964 965 int 966 shminit(struct sysctllog **clog) 967 { 968 vaddr_t v; 969 size_t sz; 970 int i; 971 972 mutex_init(&shm_lock, MUTEX_DEFAULT, IPL_NONE); 973 cv_init(&shm_realloc_cv, "shmrealc"); 974 975 /* Allocate the wired memory for our structures */ 976 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) + 977 ALIGN(shminfo.shmmni * sizeof(kcondvar_t)); 978 sz = round_page(sz); 979 v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO); 980 if (v == 0) { 981 printf("sysv_shm: cannot allocate memory"); 982 return ENOMEM; 983 } 984 shmsegs = (void *)v; 985 shm_cv = (void *)((uintptr_t)shmsegs + 986 ALIGN(shminfo.shmmni * sizeof(struct shmid_ds))); 987 988 if (shminfo.shmmax == 0) 989 shminfo.shmmax = uimax(physmem / 4, 1024) * PAGE_SIZE; 990 else 991 shminfo.shmmax *= PAGE_SIZE; 992 shminfo.shmall = shminfo.shmmax / PAGE_SIZE; 993 994 for (i = 0; i < shminfo.shmmni; i++) { 995 cv_init(&shm_cv[i], "shmwait"); 996 shmsegs[i].shm_perm.mode = SHMSEG_FREE; 997 shmsegs[i].shm_perm._seq = 0; 998 } 999 shm_last_free = 0; 1000 shm_nused = 0; 1001 shm_committed = 0; 1002 shm_realloc_disable = 0; 1003 shm_realloc_state = false; 1004 1005 kern_has_sysvshm = 1; 1006 1007 /* Load the callback function pointers for the uvm subsystem */ 1008 uvm_shmexit = shmexit; 1009 uvm_shmfork = shmfork; 1010 1011 #ifdef _MODULE 1012 if (clog) 1013 sysctl_ipc_shm_setup(clog); 1014 #endif 1015 return 0; 1016 } 1017 1018 int 1019 shmfini(void) 1020 { 1021 size_t sz; 1022 int i; 1023 vaddr_t v = (vaddr_t)shmsegs; 1024 1025 mutex_enter(&shm_lock); 1026 if (shm_nused) { 1027 mutex_exit(&shm_lock); 1028 return 1; 1029 } 1030 1031 /* Clear the callback function pointers for the uvm subsystem */ 1032 uvm_shmexit = NULL; 1033 uvm_shmfork = NULL; 1034 1035 /* Destroy all condvars */ 1036 for (i = 0; i < shminfo.shmmni; i++) 1037 cv_destroy(&shm_cv[i]); 1038 cv_destroy(&shm_realloc_cv); 1039 1040 /* Free the allocated/wired memory */ 1041 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) + 1042 ALIGN(shminfo.shmmni * sizeof(kcondvar_t)); 1043 sz = round_page(sz); 1044 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED); 1045 1046 /* Release and destroy our mutex */ 1047 mutex_exit(&shm_lock); 1048 mutex_destroy(&shm_lock); 1049 1050 kern_has_sysvshm = 0; 1051 1052 return 0; 1053 } 1054 1055 static int 1056 sysctl_ipc_shmmni(SYSCTLFN_ARGS) 1057 { 1058 int newsize, error; 1059 struct sysctlnode node; 1060 node = *rnode; 1061 node.sysctl_data = &newsize; 1062 1063 newsize = shminfo.shmmni; 1064 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1065 if (error || newp == NULL) 1066 return error; 1067 1068 sysctl_unlock(); 1069 error = shmrealloc(newsize); 1070 sysctl_relock(); 1071 return error; 1072 } 1073 1074 static int 1075 sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS) 1076 { 1077 uint32_t newsize; 1078 int error; 1079 struct sysctlnode node; 1080 node = *rnode; 1081 node.sysctl_data = &newsize; 1082 1083 newsize = shminfo.shmall; 1084 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1085 if (error || newp == NULL) 1086 return error; 1087 1088 if (newsize < 1) 1089 return EINVAL; 1090 1091 shminfo.shmall = newsize; 1092 shminfo.shmmax = (uint64_t)shminfo.shmall * PAGE_SIZE; 1093 1094 return 0; 1095 } 1096 1097 static int 1098 sysctl_ipc_shmmax(SYSCTLFN_ARGS) 1099 { 1100 uint64_t newsize; 1101 int error; 1102 struct sysctlnode node; 1103 node = *rnode; 1104 node.sysctl_data = &newsize; 1105 1106 newsize = shminfo.shmmax; 1107 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1108 if (error || newp == NULL) 1109 return error; 1110 1111 if (newsize < PAGE_SIZE) 1112 return EINVAL; 1113 1114 shminfo.shmmax = round_page(newsize); 1115 shminfo.shmall = shminfo.shmmax >> PAGE_SHIFT; 1116 1117 return 0; 1118 } 1119 1120 SYSCTL_SETUP(sysctl_ipc_shm_setup, "sysctl kern.ipc subtree setup") 1121 { 1122 1123 sysctl_createv(clog, 0, NULL, NULL, 1124 CTLFLAG_PERMANENT, 1125 CTLTYPE_NODE, "ipc", 1126 SYSCTL_DESCR("SysV IPC options"), 1127 NULL, 0, NULL, 0, 1128 CTL_KERN, KERN_SYSVIPC, CTL_EOL); 1129 sysctl_createv(clog, 0, NULL, NULL, 1130 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1131 CTLTYPE_QUAD, "shmmax", 1132 SYSCTL_DESCR("Max shared memory segment size in bytes"), 1133 sysctl_ipc_shmmax, 0, &shminfo.shmmax, 0, 1134 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAX, CTL_EOL); 1135 sysctl_createv(clog, 0, NULL, NULL, 1136 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1137 CTLTYPE_INT, "shmmni", 1138 SYSCTL_DESCR("Max number of shared memory identifiers"), 1139 sysctl_ipc_shmmni, 0, &shminfo.shmmni, 0, 1140 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMNI, CTL_EOL); 1141 sysctl_createv(clog, 0, NULL, NULL, 1142 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1143 CTLTYPE_INT, "shmseg", 1144 SYSCTL_DESCR("Max shared memory segments per process"), 1145 NULL, 0, &shminfo.shmseg, 0, 1146 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMSEG, CTL_EOL); 1147 sysctl_createv(clog, 0, NULL, NULL, 1148 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1149 CTLTYPE_INT, "shmmaxpgs", 1150 SYSCTL_DESCR("Max amount of shared memory in pages"), 1151 sysctl_ipc_shmmaxpgs, 0, &shminfo.shmall, 0, 1152 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAXPGS, CTL_EOL); 1153 sysctl_createv(clog, 0, NULL, NULL, 1154 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1155 CTLTYPE_INT, "shm_use_phys", 1156 SYSCTL_DESCR("Enable/disable locking of shared memory in " 1157 "physical memory"), NULL, 0, &shm_use_phys, 0, 1158 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMUSEPHYS, CTL_EOL); 1159 } 1160