1 /* $NetBSD: md.c,v 1.69 2014/03/16 05:20:26 dholland Exp $ */ 2 3 /* 4 * Copyright (c) 1995 Gordon W. Ross, Leo Weppelman. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 /* 29 * This implements a general-purpose memory-disk. 30 * See md.h for notes on the config types. 31 * 32 * Note that this driver provides the same functionality 33 * as the MFS filesystem hack, but this is better because 34 * you can use this for any filesystem type you'd like! 35 * 36 * Credit for most of the kmem ramdisk code goes to: 37 * Leo Weppelman (atari) and Phil Nelson (pc532) 38 * Credit for the ideas behind the "user space memory" code goes 39 * to the authors of the MFS implementation. 40 */ 41 42 #include <sys/cdefs.h> 43 __KERNEL_RCSID(0, "$NetBSD: md.c,v 1.69 2014/03/16 05:20:26 dholland Exp $"); 44 45 #ifdef _KERNEL_OPT 46 #include "opt_md.h" 47 #else 48 #define MEMORY_DISK_SERVER 1 49 #endif 50 51 #include <sys/param.h> 52 #include <sys/kernel.h> 53 #include <sys/malloc.h> 54 #include <sys/systm.h> 55 #include <sys/buf.h> 56 #include <sys/bufq.h> 57 #include <sys/device.h> 58 #include <sys/disk.h> 59 #include <sys/stat.h> 60 #include <sys/proc.h> 61 #include <sys/conf.h> 62 #include <sys/disklabel.h> 63 64 #include <uvm/uvm_extern.h> 65 66 #include <dev/md.h> 67 68 /* 69 * The user-space functionality is included by default. 70 * Use `options MEMORY_DISK_SERVER=0' to turn it off. 71 */ 72 #ifndef MEMORY_DISK_SERVER 73 #error MEMORY_DISK_SERVER should be defined by opt_md.h 74 #endif /* MEMORY_DISK_SERVER */ 75 76 /* 77 * We should use the raw partition for ioctl. 78 */ 79 #define MD_UNIT(unit) DISKUNIT(unit) 80 81 /* autoconfig stuff... */ 82 83 struct md_softc { 84 device_t sc_dev; /* Self. */ 85 struct disk sc_dkdev; /* hook for generic disk handling */ 86 struct md_conf sc_md; 87 kmutex_t sc_lock; /* Protect self. */ 88 kcondvar_t sc_cv; /* Wait here for work. */ 89 struct bufq_state *sc_buflist; 90 }; 91 /* shorthand for fields in sc_md: */ 92 #define sc_addr sc_md.md_addr 93 #define sc_size sc_md.md_size 94 #define sc_type sc_md.md_type 95 96 void mdattach(int); 97 98 static void md_attach(device_t, device_t, void *); 99 static int md_detach(device_t, int); 100 101 static dev_type_open(mdopen); 102 static dev_type_close(mdclose); 103 static dev_type_read(mdread); 104 static dev_type_write(mdwrite); 105 static dev_type_ioctl(mdioctl); 106 static dev_type_strategy(mdstrategy); 107 static dev_type_size(mdsize); 108 109 const struct bdevsw md_bdevsw = { 110 .d_open = mdopen, 111 .d_close = mdclose, 112 .d_strategy = mdstrategy, 113 .d_ioctl = mdioctl, 114 .d_dump = nodump, 115 .d_psize = mdsize, 116 .d_flag = D_DISK | D_MPSAFE 117 }; 118 119 const struct cdevsw md_cdevsw = { 120 .d_open = mdopen, 121 .d_close = mdclose, 122 .d_read = mdread, 123 .d_write = mdwrite, 124 .d_ioctl = mdioctl, 125 .d_stop = nostop, 126 .d_tty = notty, 127 .d_poll = nopoll, 128 .d_mmap = nommap, 129 .d_kqfilter = nokqfilter, 130 .d_flag = D_DISK 131 }; 132 133 static struct dkdriver mddkdriver = { mdstrategy, NULL }; 134 135 extern struct cfdriver md_cd; 136 CFATTACH_DECL3_NEW(md, sizeof(struct md_softc), 137 0, md_attach, md_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN); 138 139 static kmutex_t md_device_lock; /* Protect unit creation / deletion. */ 140 extern size_t md_root_size; 141 142 static void md_set_disklabel(struct md_softc *); 143 144 /* 145 * This is called if we are configured as a pseudo-device 146 */ 147 void 148 mdattach(int n) 149 { 150 151 mutex_init(&md_device_lock, MUTEX_DEFAULT, IPL_NONE); 152 if (config_cfattach_attach(md_cd.cd_name, &md_ca)) { 153 aprint_error("%s: cfattach_attach failed\n", md_cd.cd_name); 154 return; 155 } 156 } 157 158 static void 159 md_attach(device_t parent, device_t self, void *aux) 160 { 161 struct md_softc *sc = device_private(self); 162 163 sc->sc_dev = self; 164 sc->sc_type = MD_UNCONFIGURED; 165 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE); 166 cv_init(&sc->sc_cv, "mdidle"); 167 bufq_alloc(&sc->sc_buflist, "fcfs", 0); 168 169 /* XXX - Could accept aux info here to set the config. */ 170 #ifdef MEMORY_DISK_HOOKS 171 /* 172 * This external function might setup a pre-loaded disk. 173 * All it would need to do is setup the md_conf struct. 174 * See sys/dev/md_root.c for an example. 175 */ 176 md_attach_hook(device_unit(self), &sc->sc_md); 177 #endif 178 179 /* 180 * Initialize and attach the disk structure. 181 */ 182 disk_init(&sc->sc_dkdev, device_xname(self), &mddkdriver); 183 disk_attach(&sc->sc_dkdev); 184 185 if (sc->sc_type != MD_UNCONFIGURED) 186 md_set_disklabel(sc); 187 188 if (!pmf_device_register(self, NULL, NULL)) 189 aprint_error_dev(self, "couldn't establish power handler\n"); 190 } 191 192 static int 193 md_detach(device_t self, int flags) 194 { 195 struct md_softc *sc = device_private(self); 196 int rc; 197 198 rc = 0; 199 mutex_enter(&sc->sc_dkdev.dk_openlock); 200 if (sc->sc_dkdev.dk_openmask == 0 && sc->sc_type == MD_UNCONFIGURED) 201 ; /* nothing to do */ 202 else if ((flags & DETACH_FORCE) == 0) 203 rc = EBUSY; 204 mutex_exit(&sc->sc_dkdev.dk_openlock); 205 206 if (rc != 0) 207 return rc; 208 209 pmf_device_deregister(self); 210 disk_detach(&sc->sc_dkdev); 211 disk_destroy(&sc->sc_dkdev); 212 bufq_free(sc->sc_buflist); 213 mutex_destroy(&sc->sc_lock); 214 cv_destroy(&sc->sc_cv); 215 return 0; 216 } 217 218 /* 219 * operational routines: 220 * open, close, read, write, strategy, 221 * ioctl, dump, size 222 */ 223 224 #if MEMORY_DISK_SERVER 225 static int md_server_loop(struct md_softc *sc); 226 static int md_ioctl_server(struct md_softc *sc, struct md_conf *umd, 227 struct lwp *l); 228 #endif /* MEMORY_DISK_SERVER */ 229 static int md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd, 230 struct lwp *l); 231 232 static int 233 mdsize(dev_t dev) 234 { 235 struct md_softc *sc; 236 int res; 237 238 sc = device_lookup_private(&md_cd, MD_UNIT(dev)); 239 if (sc == NULL) 240 return 0; 241 242 mutex_enter(&sc->sc_lock); 243 if (sc->sc_type == MD_UNCONFIGURED) 244 res = 0; 245 else 246 res = sc->sc_size >> DEV_BSHIFT; 247 mutex_exit(&sc->sc_lock); 248 249 return res; 250 } 251 252 static int 253 mdopen(dev_t dev, int flag, int fmt, struct lwp *l) 254 { 255 int unit; 256 int part = DISKPART(dev); 257 int pmask = 1 << part; 258 cfdata_t cf; 259 struct md_softc *sc; 260 struct disk *dk; 261 #ifdef MEMORY_DISK_HOOKS 262 bool configured; 263 #endif 264 265 mutex_enter(&md_device_lock); 266 unit = MD_UNIT(dev); 267 sc = device_lookup_private(&md_cd, unit); 268 if (sc == NULL) { 269 if (part != RAW_PART) { 270 mutex_exit(&md_device_lock); 271 return ENXIO; 272 } 273 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 274 cf->cf_name = md_cd.cd_name; 275 cf->cf_atname = md_cd.cd_name; 276 cf->cf_unit = unit; 277 cf->cf_fstate = FSTATE_STAR; 278 sc = device_private(config_attach_pseudo(cf)); 279 if (sc == NULL) { 280 mutex_exit(&md_device_lock); 281 return ENOMEM; 282 } 283 } 284 285 dk = &sc->sc_dkdev; 286 287 /* 288 * The raw partition is used for ioctl to configure. 289 */ 290 if (part == RAW_PART) 291 goto ok; 292 293 #ifdef MEMORY_DISK_HOOKS 294 /* Call the open hook to allow loading the device. */ 295 configured = (sc->sc_type != MD_UNCONFIGURED); 296 md_open_hook(unit, &sc->sc_md); 297 /* initialize disklabel if the device is configured in open hook */ 298 if (!configured && sc->sc_type != MD_UNCONFIGURED) 299 md_set_disklabel(sc); 300 #endif 301 302 /* 303 * This is a normal, "slave" device, so 304 * enforce initialized. 305 */ 306 if (sc->sc_type == MD_UNCONFIGURED) { 307 mutex_exit(&md_device_lock); 308 return ENXIO; 309 } 310 311 ok: 312 /* XXX duplicates code in dk_open(). Call dk_open(), instead? */ 313 mutex_enter(&dk->dk_openlock); 314 /* Mark our unit as open. */ 315 switch (fmt) { 316 case S_IFCHR: 317 dk->dk_copenmask |= pmask; 318 break; 319 case S_IFBLK: 320 dk->dk_bopenmask |= pmask; 321 break; 322 } 323 324 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask; 325 326 mutex_exit(&dk->dk_openlock); 327 mutex_exit(&md_device_lock); 328 return 0; 329 } 330 331 static int 332 mdclose(dev_t dev, int flag, int fmt, struct lwp *l) 333 { 334 int part = DISKPART(dev); 335 int pmask = 1 << part; 336 int error; 337 cfdata_t cf; 338 struct md_softc *sc; 339 struct disk *dk; 340 341 sc = device_lookup_private(&md_cd, MD_UNIT(dev)); 342 if (sc == NULL) 343 return ENXIO; 344 345 dk = &sc->sc_dkdev; 346 347 mutex_enter(&dk->dk_openlock); 348 349 switch (fmt) { 350 case S_IFCHR: 351 dk->dk_copenmask &= ~pmask; 352 break; 353 case S_IFBLK: 354 dk->dk_bopenmask &= ~pmask; 355 break; 356 } 357 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask; 358 if (dk->dk_openmask != 0) { 359 mutex_exit(&dk->dk_openlock); 360 return 0; 361 } 362 363 mutex_exit(&dk->dk_openlock); 364 365 mutex_enter(&md_device_lock); 366 cf = device_cfdata(sc->sc_dev); 367 error = config_detach(sc->sc_dev, DETACH_QUIET); 368 if (! error) 369 free(cf, M_DEVBUF); 370 mutex_exit(&md_device_lock); 371 return error; 372 } 373 374 static int 375 mdread(dev_t dev, struct uio *uio, int flags) 376 { 377 struct md_softc *sc; 378 379 sc = device_lookup_private(&md_cd, MD_UNIT(dev)); 380 381 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED) 382 return ENXIO; 383 384 return (physio(mdstrategy, NULL, dev, B_READ, minphys, uio)); 385 } 386 387 static int 388 mdwrite(dev_t dev, struct uio *uio, int flags) 389 { 390 struct md_softc *sc; 391 392 sc = device_lookup_private(&md_cd, MD_UNIT(dev)); 393 394 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED) 395 return ENXIO; 396 397 return (physio(mdstrategy, NULL, dev, B_WRITE, minphys, uio)); 398 } 399 400 /* 401 * Handle I/O requests, either directly, or 402 * by passing them to the server process. 403 */ 404 static void 405 mdstrategy(struct buf *bp) 406 { 407 struct md_softc *sc; 408 void * addr; 409 size_t off, xfer; 410 bool is_read; 411 412 sc = device_lookup_private(&md_cd, MD_UNIT(bp->b_dev)); 413 414 mutex_enter(&sc->sc_lock); 415 416 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED) { 417 bp->b_error = ENXIO; 418 goto done; 419 } 420 421 switch (sc->sc_type) { 422 #if MEMORY_DISK_SERVER 423 case MD_UMEM_SERVER: 424 /* Just add this job to the server's queue. */ 425 bufq_put(sc->sc_buflist, bp); 426 cv_signal(&sc->sc_cv); 427 mutex_exit(&sc->sc_lock); 428 /* see md_server_loop() */ 429 /* no biodone in this case */ 430 return; 431 #endif /* MEMORY_DISK_SERVER */ 432 433 case MD_KMEM_FIXED: 434 case MD_KMEM_ALLOCATED: 435 /* These are in kernel space. Access directly. */ 436 is_read = ((bp->b_flags & B_READ) == B_READ); 437 bp->b_resid = bp->b_bcount; 438 off = (bp->b_blkno << DEV_BSHIFT); 439 if (off >= sc->sc_size) { 440 if (is_read) 441 break; /* EOF */ 442 goto set_eio; 443 } 444 xfer = bp->b_resid; 445 if (xfer > (sc->sc_size - off)) 446 xfer = (sc->sc_size - off); 447 addr = (char *)sc->sc_addr + off; 448 disk_busy(&sc->sc_dkdev); 449 if (is_read) 450 memcpy(bp->b_data, addr, xfer); 451 else 452 memcpy(addr, bp->b_data, xfer); 453 disk_unbusy(&sc->sc_dkdev, xfer, is_read); 454 bp->b_resid -= xfer; 455 break; 456 457 default: 458 bp->b_resid = bp->b_bcount; 459 set_eio: 460 bp->b_error = EIO; 461 break; 462 } 463 464 done: 465 mutex_exit(&sc->sc_lock); 466 467 biodone(bp); 468 } 469 470 static int 471 mdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 472 { 473 struct md_softc *sc; 474 struct md_conf *umd; 475 struct disklabel *lp; 476 struct partinfo *pp; 477 int error; 478 479 if ((sc = device_lookup_private(&md_cd, MD_UNIT(dev))) == NULL) 480 return ENXIO; 481 482 mutex_enter(&sc->sc_lock); 483 if (sc->sc_type != MD_UNCONFIGURED) { 484 switch (cmd) { 485 case DIOCGDINFO: 486 lp = (struct disklabel *)data; 487 *lp = *sc->sc_dkdev.dk_label; 488 mutex_exit(&sc->sc_lock); 489 return 0; 490 491 case DIOCGPART: 492 pp = (struct partinfo *)data; 493 pp->disklab = sc->sc_dkdev.dk_label; 494 pp->part = 495 &sc->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 496 mutex_exit(&sc->sc_lock); 497 return 0; 498 } 499 } 500 501 /* If this is not the raw partition, punt! */ 502 if (DISKPART(dev) != RAW_PART) { 503 mutex_exit(&sc->sc_lock); 504 return ENOTTY; 505 } 506 507 umd = (struct md_conf *)data; 508 error = EINVAL; 509 switch (cmd) { 510 case MD_GETCONF: 511 *umd = sc->sc_md; 512 error = 0; 513 break; 514 515 case MD_SETCONF: 516 /* Can only set it once. */ 517 if (sc->sc_type != MD_UNCONFIGURED) 518 break; 519 switch (umd->md_type) { 520 case MD_KMEM_ALLOCATED: 521 error = md_ioctl_kalloc(sc, umd, l); 522 break; 523 #if MEMORY_DISK_SERVER 524 case MD_UMEM_SERVER: 525 error = md_ioctl_server(sc, umd, l); 526 break; 527 #endif /* MEMORY_DISK_SERVER */ 528 default: 529 break; 530 } 531 break; 532 } 533 mutex_exit(&sc->sc_lock); 534 return error; 535 } 536 537 static void 538 md_set_disklabel(struct md_softc *sc) 539 { 540 struct disklabel *lp = sc->sc_dkdev.dk_label; 541 struct partition *pp; 542 543 memset(lp, 0, sizeof(*lp)); 544 545 lp->d_secsize = DEV_BSIZE; 546 lp->d_secperunit = sc->sc_size / DEV_BSIZE; 547 if (lp->d_secperunit >= (32*64)) { 548 lp->d_nsectors = 32; 549 lp->d_ntracks = 64; 550 lp->d_ncylinders = lp->d_secperunit / (32*64); 551 } else { 552 lp->d_nsectors = 1; 553 lp->d_ntracks = 1; 554 lp->d_ncylinders = lp->d_secperunit; 555 } 556 lp->d_secpercyl = lp->d_ntracks*lp->d_nsectors; 557 558 strncpy(lp->d_typename, md_cd.cd_name, sizeof(lp->d_typename)); 559 lp->d_type = DTYPE_UNKNOWN; 560 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 561 lp->d_rpm = 3600; 562 lp->d_interleave = 1; 563 lp->d_flags = 0; 564 565 pp = &lp->d_partitions[0]; 566 pp->p_offset = 0; 567 pp->p_size = lp->d_secperunit; 568 pp->p_fstype = FS_BSDFFS; 569 570 pp = &lp->d_partitions[RAW_PART]; 571 pp->p_offset = 0; 572 pp->p_size = lp->d_secperunit; 573 pp->p_fstype = FS_UNUSED; 574 575 lp->d_npartitions = RAW_PART+1; 576 lp->d_magic = DISKMAGIC; 577 lp->d_magic2 = DISKMAGIC; 578 lp->d_checksum = dkcksum(lp); 579 } 580 581 /* 582 * Handle ioctl MD_SETCONF for (sc_type == MD_KMEM_ALLOCATED) 583 * Just allocate some kernel memory and return. 584 */ 585 static int 586 md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd, 587 struct lwp *l) 588 { 589 vaddr_t addr; 590 vsize_t size; 591 592 mutex_exit(&sc->sc_lock); 593 594 /* Sanity check the size. */ 595 size = umd->md_size; 596 addr = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_WIRED|UVM_KMF_ZERO); 597 598 mutex_enter(&sc->sc_lock); 599 600 if (!addr) 601 return ENOMEM; 602 603 /* If another thread beat us to configure this unit: fail. */ 604 if (sc->sc_type != MD_UNCONFIGURED) { 605 uvm_km_free(kernel_map, addr, size, UVM_KMF_WIRED); 606 return EINVAL; 607 } 608 609 /* This unit is now configured. */ 610 sc->sc_addr = (void *)addr; /* kernel space */ 611 sc->sc_size = (size_t)size; 612 sc->sc_type = MD_KMEM_ALLOCATED; 613 md_set_disklabel(sc); 614 return 0; 615 } 616 617 #if MEMORY_DISK_SERVER 618 619 /* 620 * Handle ioctl MD_SETCONF for (sc_type == MD_UMEM_SERVER) 621 * Set config, then become the I/O server for this unit. 622 */ 623 static int 624 md_ioctl_server(struct md_softc *sc, struct md_conf *umd, 625 struct lwp *l) 626 { 627 vaddr_t end; 628 int error; 629 630 KASSERT(mutex_owned(&sc->sc_lock)); 631 632 /* Sanity check addr, size. */ 633 end = (vaddr_t) ((char *)umd->md_addr + umd->md_size); 634 635 if ((end >= VM_MAXUSER_ADDRESS) || 636 (end < ((vaddr_t) umd->md_addr)) ) 637 return EINVAL; 638 639 /* This unit is now configured. */ 640 sc->sc_addr = umd->md_addr; /* user space */ 641 sc->sc_size = umd->md_size; 642 sc->sc_type = MD_UMEM_SERVER; 643 md_set_disklabel(sc); 644 645 /* Become the server daemon */ 646 error = md_server_loop(sc); 647 648 /* This server is now going away! */ 649 sc->sc_type = MD_UNCONFIGURED; 650 sc->sc_addr = 0; 651 sc->sc_size = 0; 652 653 return (error); 654 } 655 656 static int 657 md_server_loop(struct md_softc *sc) 658 { 659 struct buf *bp; 660 void *addr; /* user space address */ 661 size_t off; /* offset into "device" */ 662 size_t xfer; /* amount to transfer */ 663 int error; 664 bool is_read; 665 666 KASSERT(mutex_owned(&sc->sc_lock)); 667 668 for (;;) { 669 /* Wait for some work to arrive. */ 670 while ((bp = bufq_get(sc->sc_buflist)) == NULL) { 671 error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock); 672 if (error) 673 return error; 674 } 675 676 /* Do the transfer to/from user space. */ 677 mutex_exit(&sc->sc_lock); 678 error = 0; 679 is_read = ((bp->b_flags & B_READ) == B_READ); 680 bp->b_resid = bp->b_bcount; 681 off = (bp->b_blkno << DEV_BSHIFT); 682 if (off >= sc->sc_size) { 683 if (is_read) 684 goto done; /* EOF (not an error) */ 685 error = EIO; 686 goto done; 687 } 688 xfer = bp->b_resid; 689 if (xfer > (sc->sc_size - off)) 690 xfer = (sc->sc_size - off); 691 addr = (char *)sc->sc_addr + off; 692 disk_busy(&sc->sc_dkdev); 693 if (is_read) 694 error = copyin(addr, bp->b_data, xfer); 695 else 696 error = copyout(bp->b_data, addr, xfer); 697 disk_unbusy(&sc->sc_dkdev, (error ? 0 : xfer), is_read); 698 if (!error) 699 bp->b_resid -= xfer; 700 701 done: 702 if (error) { 703 bp->b_error = error; 704 } 705 biodone(bp); 706 mutex_enter(&sc->sc_lock); 707 } 708 } 709 #endif /* MEMORY_DISK_SERVER */ 710