1 /*- 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1982, 1986, 1990 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department, and code derived from software contributed to 9 * Berkeley by William Jolitz. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * from: Utah $Hdr: mem.c 1.13 89/10/08$ 36 * from: @(#)mem.c 7.2 (Berkeley) 5/9/91 37 * $FreeBSD: src/sys/i386/i386/mem.c,v 1.79.2.9 2003/01/04 22:58:01 njl Exp $ 38 */ 39 40 /* 41 * Memory special file 42 */ 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/buf.h> 47 #include <sys/conf.h> 48 #include <sys/fcntl.h> 49 #include <sys/filio.h> 50 #include <sys/interrupt.h> 51 #include <sys/kernel.h> 52 #include <sys/malloc.h> 53 #include <sys/memrange.h> 54 #include <sys/proc.h> 55 #include <sys/priv.h> 56 #include <sys/queue.h> 57 #include <sys/random.h> 58 #include <sys/signalvar.h> 59 #include <sys/uio.h> 60 #include <sys/vnode.h> 61 #include <sys/sysctl.h> 62 63 #include <sys/signal2.h> 64 #include <sys/spinlock2.h> 65 66 #include <vm/vm.h> 67 #include <vm/pmap.h> 68 #include <vm/vm_map.h> 69 #include <vm/vm_extern.h> 70 71 72 static d_open_t mmopen; 73 static d_close_t mmclose; 74 static d_read_t mmread; 75 static d_write_t mmwrite; 76 static d_ioctl_t mmioctl; 77 #if 0 78 static d_mmap_t memmmap; 79 #endif 80 static d_kqfilter_t mmkqfilter; 81 static int memuksmap(vm_map_backing_t ba, int op, cdev_t dev, vm_page_t fake); 82 83 #define CDEV_MAJOR 2 84 static struct dev_ops mem_ops = { 85 { "mem", 0, D_MPSAFE | D_QUICK }, 86 .d_open = mmopen, 87 .d_close = mmclose, 88 .d_read = mmread, 89 .d_write = mmwrite, 90 .d_ioctl = mmioctl, 91 .d_kqfilter = mmkqfilter, 92 #if 0 93 .d_mmap = memmmap, 94 #endif 95 .d_uksmap = memuksmap 96 }; 97 98 static struct dev_ops mem_ops_mem = { 99 { "mem", 0, D_MEM | D_MPSAFE | D_QUICK }, 100 .d_open = mmopen, 101 .d_close = mmclose, 102 .d_read = mmread, 103 .d_write = mmwrite, 104 .d_ioctl = mmioctl, 105 .d_kqfilter = mmkqfilter, 106 #if 0 107 .d_mmap = memmmap, 108 #endif 109 .d_uksmap = memuksmap 110 }; 111 112 static struct dev_ops mem_ops_noq = { 113 { "mem", 0, D_MPSAFE }, 114 .d_open = mmopen, 115 .d_close = mmclose, 116 .d_read = mmread, 117 .d_write = mmwrite, 118 .d_ioctl = mmioctl, 119 .d_kqfilter = mmkqfilter, 120 #if 0 121 .d_mmap = memmmap, 122 #endif 123 .d_uksmap = memuksmap 124 }; 125 126 static int rand_bolt; 127 static caddr_t zbuf; 128 static cdev_t zerodev = NULL; 129 static struct lock mem_lock = LOCK_INITIALIZER("memlk", 0, 0); 130 131 MALLOC_DEFINE(M_MEMDESC, "memdesc", "memory range descriptors"); 132 static int mem_ioctl (cdev_t, u_long, caddr_t, int, struct ucred *); 133 static int random_ioctl (cdev_t, u_long, caddr_t, int, struct ucred *); 134 135 struct mem_range_softc mem_range_softc; 136 137 static int seedenable; 138 SYSCTL_INT(_kern, OID_AUTO, seedenable, CTLFLAG_RW, &seedenable, 0, ""); 139 140 static int 141 mmopen(struct dev_open_args *ap) 142 { 143 cdev_t dev = ap->a_head.a_dev; 144 int error; 145 146 switch (minor(dev)) { 147 case 0: 148 case 1: 149 /* 150 * /dev/mem and /dev/kmem 151 */ 152 if (ap->a_oflags & FWRITE) { 153 if (securelevel > 0 || kernel_mem_readonly) 154 return (EPERM); 155 } 156 error = 0; 157 break; 158 case 6: 159 /* 160 * /dev/kpmap can only be opened for reading. 161 */ 162 if (ap->a_oflags & FWRITE) 163 return (EPERM); 164 error = 0; 165 break; 166 case 14: 167 error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0); 168 if (error != 0) 169 break; 170 if (securelevel > 0 || kernel_mem_readonly) { 171 error = EPERM; 172 break; 173 } 174 error = cpu_set_iopl(); 175 break; 176 default: 177 error = 0; 178 break; 179 } 180 return (error); 181 } 182 183 static int 184 mmclose(struct dev_close_args *ap) 185 { 186 cdev_t dev = ap->a_head.a_dev; 187 int error; 188 189 switch (minor(dev)) { 190 case 14: 191 error = cpu_clr_iopl(); 192 break; 193 default: 194 error = 0; 195 break; 196 } 197 return (error); 198 } 199 200 201 static int 202 mmrw(cdev_t dev, struct uio *uio, int flags) 203 { 204 int o; 205 u_int c; 206 u_int poolsize; 207 u_long v; 208 struct iovec *iov; 209 int error = 0; 210 caddr_t buf = NULL; 211 212 while (uio->uio_resid > 0 && error == 0) { 213 iov = uio->uio_iov; 214 if (iov->iov_len == 0) { 215 uio->uio_iov++; 216 uio->uio_iovcnt--; 217 if (uio->uio_iovcnt < 0) 218 panic("mmrw"); 219 continue; 220 } 221 switch (minor(dev)) { 222 case 0: 223 /* 224 * minor device 0 is physical memory, /dev/mem 225 */ 226 v = uio->uio_offset; 227 v &= ~(long)PAGE_MASK; 228 pmap_kenter((vm_offset_t)ptvmmap, v); 229 o = (int)uio->uio_offset & PAGE_MASK; 230 c = (u_int)(PAGE_SIZE - ((uintptr_t)iov->iov_base & PAGE_MASK)); 231 c = min(c, (u_int)(PAGE_SIZE - o)); 232 c = min(c, (u_int)iov->iov_len); 233 error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio); 234 pmap_kremove((vm_offset_t)ptvmmap); 235 continue; 236 237 case 1: { 238 /* 239 * minor device 1 is kernel memory, /dev/kmem 240 */ 241 vm_offset_t saddr, eaddr; 242 int prot; 243 244 c = iov->iov_len; 245 246 /* 247 * Make sure that all of the pages are currently 248 * resident so that we don't create any zero-fill 249 * pages. 250 */ 251 saddr = trunc_page(uio->uio_offset); 252 eaddr = round_page(uio->uio_offset + c); 253 if (saddr > eaddr) 254 return EFAULT; 255 256 /* 257 * Make sure the kernel addresses are mapped. 258 * platform_direct_mapped() can be used to bypass 259 * default mapping via the page table (virtual kernels 260 * contain a lot of out-of-band data). 261 */ 262 prot = VM_PROT_READ; 263 if (uio->uio_rw != UIO_READ) 264 prot |= VM_PROT_WRITE; 265 error = kvm_access_check(saddr, eaddr, prot); 266 if (error) 267 return (error); 268 error = uiomove((caddr_t)(vm_offset_t)uio->uio_offset, 269 (int)c, uio); 270 continue; 271 } 272 case 2: 273 /* 274 * minor device 2 (/dev/null) is EOF/RATHOLE 275 */ 276 if (uio->uio_rw == UIO_READ) 277 return (0); 278 c = iov->iov_len; 279 break; 280 case 3: 281 /* 282 * minor device 3 (/dev/random) is source of filth 283 * on read, seeder on write 284 */ 285 if (buf == NULL) 286 buf = kmalloc(PAGE_SIZE, M_TEMP, M_WAITOK); 287 c = min(iov->iov_len, PAGE_SIZE); 288 if (uio->uio_rw == UIO_WRITE) { 289 error = uiomove(buf, (int)c, uio); 290 if (error == 0 && 291 seedenable && 292 securelevel <= 0) { 293 error = add_buffer_randomness_src(buf, c, RAND_SRC_SEEDING); 294 } else if (error == 0) { 295 error = EPERM; 296 } 297 } else { 298 poolsize = read_random(buf, c); 299 if (poolsize == 0) { 300 if (buf) 301 kfree(buf, M_TEMP); 302 if ((flags & IO_NDELAY) != 0) 303 return (EWOULDBLOCK); 304 return (0); 305 } 306 c = min(c, poolsize); 307 error = uiomove(buf, (int)c, uio); 308 } 309 continue; 310 case 4: 311 /* 312 * minor device 4 (/dev/urandom) is source of muck 313 * on read, writes are disallowed. 314 */ 315 c = min(iov->iov_len, PAGE_SIZE); 316 if (uio->uio_rw == UIO_WRITE) { 317 error = EPERM; 318 break; 319 } 320 if (CURSIG(curthread->td_lwp) != 0) { 321 /* 322 * Use tsleep() to get the error code right. 323 * It should return immediately. 324 */ 325 error = tsleep(&rand_bolt, PCATCH, "urand", 1); 326 if (error != 0 && error != EWOULDBLOCK) 327 continue; 328 } 329 if (buf == NULL) 330 buf = kmalloc(PAGE_SIZE, M_TEMP, M_WAITOK); 331 poolsize = read_random_unlimited(buf, c); 332 c = min(c, poolsize); 333 error = uiomove(buf, (int)c, uio); 334 continue; 335 /* case 5: read/write not supported, mmap only */ 336 /* case 6: read/write not supported, mmap only */ 337 case 12: 338 /* 339 * minor device 12 (/dev/zero) is source of nulls 340 * on read, write are disallowed. 341 */ 342 if (uio->uio_rw == UIO_WRITE) { 343 c = iov->iov_len; 344 break; 345 } 346 if (zbuf == NULL) { 347 zbuf = (caddr_t)kmalloc(PAGE_SIZE, M_TEMP, 348 M_WAITOK | M_ZERO); 349 } 350 c = min(iov->iov_len, PAGE_SIZE); 351 error = uiomove(zbuf, (int)c, uio); 352 continue; 353 default: 354 return (ENODEV); 355 } 356 if (error) 357 break; 358 iov->iov_base = (char *)iov->iov_base + c; 359 iov->iov_len -= c; 360 uio->uio_offset += c; 361 uio->uio_resid -= c; 362 } 363 if (buf) 364 kfree(buf, M_TEMP); 365 return (error); 366 } 367 368 static int 369 mmread(struct dev_read_args *ap) 370 { 371 return(mmrw(ap->a_head.a_dev, ap->a_uio, ap->a_ioflag)); 372 } 373 374 static int 375 mmwrite(struct dev_write_args *ap) 376 { 377 return(mmrw(ap->a_head.a_dev, ap->a_uio, ap->a_ioflag)); 378 } 379 380 /*******************************************************\ 381 * allow user processes to MMAP some memory sections * 382 * instead of going through read/write * 383 \*******************************************************/ 384 385 static int user_kernel_mapping(vm_map_backing_t ba, int num, 386 vm_ooffset_t offset, vm_ooffset_t *resultp); 387 388 static int 389 memuksmap(vm_map_backing_t ba, int op, cdev_t dev, vm_page_t fake) 390 { 391 vm_ooffset_t result; 392 int error; 393 struct proc *p; 394 struct lwp *lp; 395 396 error = 0; 397 398 switch(op) { 399 case UKSMAPOP_ADD: 400 /* 401 * /dev/lpmap only (minor 7) 402 * 403 * Don't do anything until the page is faulted in. Clear 404 * our flags on this possibly replicated ba. vm_map_entry 405 * replication can occur before the new process/lwp is 406 * created, so there's nothing to link into. 407 */ 408 if (minor(dev) != 7) 409 break; 410 atomic_clear_int(&ba->flags, VM_MAP_LWP_LINKED); 411 break; 412 case UKSMAPOP_REM: 413 /* 414 * /dev/lpmap only (minor 7) 415 * 416 * The mapping is only on the lwp list after it has been 417 * faulted in. 418 */ 419 if (minor(dev) != 7) 420 break; 421 if ((ba->flags & VM_MAP_LWP_LINKED) == 0) 422 break; 423 424 p = curproc; 425 lwkt_gettoken_shared(&p->p_token); 426 lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, 427 (int)(intptr_t)ba->aux_info); 428 if (lp) { 429 LWPHOLD(lp); 430 lwkt_reltoken(&p->p_token); 431 spin_lock(&lp->lwp_spin); 432 TAILQ_REMOVE(&lp->lwp_lpmap_backing_list, ba, entry); 433 atomic_clear_int(&ba->flags, VM_MAP_LWP_LINKED); 434 spin_unlock(&lp->lwp_spin); 435 LWPRELE(lp); 436 } else { 437 lwkt_reltoken(&p->p_token); 438 } 439 break; 440 case UKSMAPOP_FAULT: 441 switch (minor(dev)) { 442 case 0: 443 /* 444 * minor device 0 is physical memory 445 */ 446 fake->phys_addr = ptoa(fake->pindex); 447 break; 448 case 1: 449 /* 450 * minor device 1 is kernel memory 451 */ 452 fake->phys_addr = vtophys(ptoa(fake->pindex)); 453 break; 454 case 5: 455 case 6: 456 case 7: 457 /* 458 * minor device 5 is /dev/upmap (see sys/upmap.h) 459 * minor device 6 is /dev/kpmap (see sys/upmap.h) 460 * minor device 7 is /dev/lpmap (see sys/upmap.h) 461 */ 462 result = 0; 463 error = user_kernel_mapping(ba, 464 minor(dev), 465 ptoa(fake->pindex), 466 &result); 467 fake->phys_addr = result; 468 break; 469 default: 470 error = EINVAL; 471 break; 472 } 473 break; 474 default: 475 error = EINVAL; 476 break; 477 } 478 return error; 479 } 480 481 static int 482 mmioctl(struct dev_ioctl_args *ap) 483 { 484 cdev_t dev = ap->a_head.a_dev; 485 int error; 486 487 lockmgr(&mem_lock, LK_EXCLUSIVE); 488 489 switch (minor(dev)) { 490 case 0: 491 error = mem_ioctl(dev, ap->a_cmd, ap->a_data, 492 ap->a_fflag, ap->a_cred); 493 break; 494 case 3: 495 case 4: 496 error = random_ioctl(dev, ap->a_cmd, ap->a_data, 497 ap->a_fflag, ap->a_cred); 498 break; 499 default: 500 error = ENODEV; 501 break; 502 } 503 504 lockmgr(&mem_lock, LK_RELEASE); 505 506 return (error); 507 } 508 509 /* 510 * Operations for changing memory attributes. 511 * 512 * This is basically just an ioctl shim for mem_range_attr_get 513 * and mem_range_attr_set. 514 */ 515 static int 516 mem_ioctl(cdev_t dev, u_long cmd, caddr_t data, int flags, struct ucred *cred) 517 { 518 int nd, error = 0; 519 struct mem_range_op *mo = (struct mem_range_op *)data; 520 struct mem_range_desc *md; 521 522 /* is this for us? */ 523 if ((cmd != MEMRANGE_GET) && 524 (cmd != MEMRANGE_SET)) 525 return (ENOTTY); 526 527 /* any chance we can handle this? */ 528 if (mem_range_softc.mr_op == NULL) 529 return (EOPNOTSUPP); 530 531 /* do we have any descriptors? */ 532 if (mem_range_softc.mr_ndesc == 0) 533 return (ENXIO); 534 535 switch (cmd) { 536 case MEMRANGE_GET: 537 nd = imin(mo->mo_arg[0], mem_range_softc.mr_ndesc); 538 if (nd > 0) { 539 md = (struct mem_range_desc *) 540 kmalloc(nd * sizeof(struct mem_range_desc), 541 M_MEMDESC, M_WAITOK); 542 error = mem_range_attr_get(md, &nd); 543 if (!error) 544 error = copyout(md, mo->mo_desc, 545 nd * sizeof(struct mem_range_desc)); 546 kfree(md, M_MEMDESC); 547 } else { 548 nd = mem_range_softc.mr_ndesc; 549 } 550 mo->mo_arg[0] = nd; 551 break; 552 553 case MEMRANGE_SET: 554 md = (struct mem_range_desc *)kmalloc(sizeof(struct mem_range_desc), 555 M_MEMDESC, M_WAITOK); 556 error = copyin(mo->mo_desc, md, sizeof(struct mem_range_desc)); 557 /* clamp description string */ 558 md->mr_owner[sizeof(md->mr_owner) - 1] = 0; 559 if (error == 0) 560 error = mem_range_attr_set(md, &mo->mo_arg[0]); 561 kfree(md, M_MEMDESC); 562 break; 563 } 564 return (error); 565 } 566 567 /* 568 * Implementation-neutral, kernel-callable functions for manipulating 569 * memory range attributes. 570 */ 571 int 572 mem_range_attr_get(struct mem_range_desc *mrd, int *arg) 573 { 574 /* can we handle this? */ 575 if (mem_range_softc.mr_op == NULL) 576 return (EOPNOTSUPP); 577 578 if (*arg == 0) { 579 *arg = mem_range_softc.mr_ndesc; 580 } else { 581 bcopy(mem_range_softc.mr_desc, mrd, (*arg) * sizeof(struct mem_range_desc)); 582 } 583 return (0); 584 } 585 586 int 587 mem_range_attr_set(struct mem_range_desc *mrd, int *arg) 588 { 589 /* can we handle this? */ 590 if (mem_range_softc.mr_op == NULL) 591 return (EOPNOTSUPP); 592 593 return (mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg)); 594 } 595 596 void 597 mem_range_AP_init(void) 598 { 599 if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) 600 mem_range_softc.mr_op->initAP(&mem_range_softc); 601 } 602 603 static int 604 random_ioctl(cdev_t dev, u_long cmd, caddr_t data, int flags, struct ucred *cred) 605 { 606 int error; 607 int intr; 608 609 /* 610 * Even inspecting the state is privileged, since it gives a hint 611 * about how easily the randomness might be guessed. 612 */ 613 error = 0; 614 615 switch (cmd) { 616 /* Really handled in upper layer */ 617 case FIOASYNC: 618 break; 619 case MEM_SETIRQ: 620 intr = *(int16_t *)data; 621 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 622 break; 623 if (intr < 0 || intr >= MAX_INTS) 624 return (EINVAL); 625 register_randintr(intr); 626 break; 627 case MEM_CLEARIRQ: 628 intr = *(int16_t *)data; 629 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 630 break; 631 if (intr < 0 || intr >= MAX_INTS) 632 return (EINVAL); 633 unregister_randintr(intr); 634 break; 635 case MEM_RETURNIRQ: 636 error = ENOTSUP; 637 break; 638 case MEM_FINDIRQ: 639 intr = *(int16_t *)data; 640 if ((error = priv_check_cred(cred, PRIV_ROOT, 0)) != 0) 641 break; 642 if (intr < 0 || intr >= MAX_INTS) 643 return (EINVAL); 644 intr = next_registered_randintr(intr); 645 if (intr == MAX_INTS) 646 return (ENOENT); 647 *(u_int16_t *)data = intr; 648 break; 649 default: 650 error = ENOTSUP; 651 break; 652 } 653 return (error); 654 } 655 656 static int 657 mm_filter_read(struct knote *kn, long hint) 658 { 659 return (1); 660 } 661 662 static int 663 mm_filter_write(struct knote *kn, long hint) 664 { 665 return (1); 666 } 667 668 static void 669 dummy_filter_detach(struct knote *kn) {} 670 671 /* Implemented in kern_nrandom.c */ 672 static struct filterops random_read_filtops = 673 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, dummy_filter_detach, random_filter_read }; 674 675 static struct filterops mm_read_filtops = 676 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, dummy_filter_detach, mm_filter_read }; 677 678 static struct filterops mm_write_filtops = 679 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, dummy_filter_detach, mm_filter_write }; 680 681 static int 682 mmkqfilter(struct dev_kqfilter_args *ap) 683 { 684 struct knote *kn = ap->a_kn; 685 cdev_t dev = ap->a_head.a_dev; 686 687 ap->a_result = 0; 688 switch (kn->kn_filter) { 689 case EVFILT_READ: 690 switch (minor(dev)) { 691 case 3: 692 kn->kn_fop = &random_read_filtops; 693 break; 694 default: 695 kn->kn_fop = &mm_read_filtops; 696 break; 697 } 698 break; 699 case EVFILT_WRITE: 700 kn->kn_fop = &mm_write_filtops; 701 break; 702 default: 703 ap->a_result = EOPNOTSUPP; 704 return (0); 705 } 706 707 return (0); 708 } 709 710 int 711 iszerodev(cdev_t dev) 712 { 713 return (zerodev == dev); 714 } 715 716 /* 717 * /dev/lpmap, /dev/upmap, /dev/kpmap. 718 */ 719 static int 720 user_kernel_mapping(vm_map_backing_t ba, int num, vm_ooffset_t offset, 721 vm_ooffset_t *resultp) 722 { 723 struct proc *p; 724 struct lwp *lp; 725 int error; 726 int invfork; 727 728 p = curthread->td_proc; 729 if (p == NULL) 730 return (EINVAL); 731 if (offset < 0) 732 return (EINVAL); 733 734 /* 735 * If this is a child currently in vfork the pmap is shared with 736 * the parent! We need to actually set-up the parent's p_upmap, 737 * not the child's, and we need to set the invfork flag. Userland 738 * will probably adjust its static state so it must be consistent 739 * with the parent or userland will be really badly confused. 740 * 741 * (this situation can happen when user code in vfork() calls 742 * libc's getpid() or some other function which then decides 743 * it wants the upmap). 744 */ 745 if (p->p_flags & P_PPWAIT) { 746 p = p->p_pptr; 747 if (p == NULL) 748 return (EINVAL); 749 invfork = 1; 750 } else { 751 invfork = 0; 752 } 753 754 error = EINVAL; 755 756 switch(num) { 757 case 5: 758 /* 759 * /dev/upmap - maps RW per-process shared user-kernel area. 760 */ 761 if (p->p_upmap == NULL) 762 proc_usermap(p, invfork); 763 else if (invfork) 764 p->p_upmap->invfork = invfork; 765 766 if (p->p_upmap && 767 offset < roundup2(sizeof(*p->p_upmap), PAGE_SIZE)) { 768 /* only good for current process */ 769 *resultp = pmap_kextract((vm_offset_t)p->p_upmap + 770 offset); 771 error = 0; 772 } 773 break; 774 case 6: 775 /* 776 * /dev/kpmap - maps RO shared kernel global page 777 */ 778 if (kpmap && 779 offset < roundup2(sizeof(*kpmap), PAGE_SIZE)) { 780 *resultp = pmap_kextract((vm_offset_t)kpmap + 781 offset); 782 error = 0; 783 } 784 break; 785 case 7: 786 /* 787 * /dev/lpmap - maps RW per-thread shared user-kernel area. 788 * 789 * Link the vm_map_backing into the lwp so we can delete 790 * the mapping when the lwp exits. Otherwise we would end 791 * up with a lingering pmap page and the associated kernel 792 * memory disclosure. 793 * 794 * We do the linking on first-fault since the process and/or 795 * lwp might not exist at the time the map is created (i.e. 796 * in the case of fork()). 797 */ 798 lwkt_gettoken_shared(&p->p_token); 799 lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, 800 (int)(intptr_t)ba->aux_info); 801 if (lp == NULL) { 802 lwkt_reltoken(&p->p_token); 803 break; 804 } 805 LWPHOLD(lp); 806 lwkt_reltoken(&p->p_token); 807 808 /* 809 * Extract address 810 */ 811 if (lp->lwp_lpmap == NULL) 812 lwp_usermap(lp, invfork); 813 814 if ((ba->flags & VM_MAP_LWP_LINKED) == 0) { 815 spin_lock(&lp->lwp_spin); 816 TAILQ_INSERT_TAIL(&lp->lwp_lpmap_backing_list, 817 ba, entry); 818 atomic_set_int(&ba->flags, VM_MAP_LWP_LINKED); 819 spin_unlock(&lp->lwp_spin); 820 } 821 822 if (lp->lwp_lpmap && 823 offset < roundup2(sizeof(*lp->lwp_lpmap), PAGE_SIZE)) { 824 /* only good for current process */ 825 *resultp = pmap_kextract((vm_offset_t)lp->lwp_lpmap + 826 offset); 827 error = 0; 828 } 829 LWPRELE(lp); 830 break; 831 default: 832 break; 833 } 834 return error; 835 } 836 837 static void 838 mem_drvinit(void *unused) 839 { 840 841 /* Initialise memory range handling */ 842 if (mem_range_softc.mr_op != NULL) 843 mem_range_softc.mr_op->init(&mem_range_softc); 844 845 make_dev(&mem_ops_mem, 0, UID_ROOT, GID_KMEM, 0640, "mem"); 846 make_dev(&mem_ops_mem, 1, UID_ROOT, GID_KMEM, 0640, "kmem"); 847 make_dev(&mem_ops, 2, UID_ROOT, GID_WHEEL, 0666, "null"); 848 make_dev(&mem_ops, 3, UID_ROOT, GID_WHEEL, 0644, "random"); 849 make_dev(&mem_ops, 4, UID_ROOT, GID_WHEEL, 0644, "urandom"); 850 make_dev(&mem_ops, 5, UID_ROOT, GID_WHEEL, 0666, "upmap"); 851 make_dev(&mem_ops, 6, UID_ROOT, GID_WHEEL, 0444, "kpmap"); 852 make_dev(&mem_ops, 7, UID_ROOT, GID_WHEEL, 0666, "lpmap"); 853 zerodev = make_dev(&mem_ops, 12, UID_ROOT, GID_WHEEL, 0666, "zero"); 854 make_dev(&mem_ops_noq, 14, UID_ROOT, GID_WHEEL, 0600, "io"); 855 } 856 857 SYSINIT(memdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE + CDEV_MAJOR, mem_drvinit, 858 NULL); 859 860