1 /* $NetBSD: md.c,v 1.74 2015/04/26 15:15:20 mlelstv Exp $ */ 2 3 /* 4 * Copyright (c) 1995 Gordon W. Ross, Leo Weppelman. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 /* 29 * This implements a general-purpose memory-disk. 30 * See md.h for notes on the config types. 31 * 32 * Note that this driver provides the same functionality 33 * as the MFS filesystem hack, but this is better because 34 * you can use this for any filesystem type you'd like! 35 * 36 * Credit for most of the kmem ramdisk code goes to: 37 * Leo Weppelman (atari) and Phil Nelson (pc532) 38 * Credit for the ideas behind the "user space memory" code goes 39 * to the authors of the MFS implementation. 40 */ 41 42 #include <sys/cdefs.h> 43 __KERNEL_RCSID(0, "$NetBSD: md.c,v 1.74 2015/04/26 15:15:20 mlelstv Exp $"); 44 45 #ifdef _KERNEL_OPT 46 #include "opt_md.h" 47 #else 48 #define MEMORY_DISK_SERVER 1 49 #endif 50 51 #include <sys/param.h> 52 #include <sys/kernel.h> 53 #include <sys/malloc.h> 54 #include <sys/systm.h> 55 #include <sys/buf.h> 56 #include <sys/bufq.h> 57 #include <sys/device.h> 58 #include <sys/disk.h> 59 #include <sys/stat.h> 60 #include <sys/proc.h> 61 #include <sys/conf.h> 62 #include <sys/disklabel.h> 63 64 #include <uvm/uvm_extern.h> 65 66 #include <dev/md.h> 67 68 /* 69 * The user-space functionality is included by default. 70 * Use `options MEMORY_DISK_SERVER=0' to turn it off. 71 */ 72 #ifndef MEMORY_DISK_SERVER 73 #error MEMORY_DISK_SERVER should be defined by opt_md.h 74 #endif /* MEMORY_DISK_SERVER */ 75 76 /* 77 * We should use the raw partition for ioctl. 78 */ 79 #define MD_UNIT(unit) DISKUNIT(unit) 80 81 /* autoconfig stuff... */ 82 83 struct md_softc { 84 device_t sc_dev; /* Self. */ 85 struct disk sc_dkdev; /* hook for generic disk handling */ 86 struct md_conf sc_md; 87 kmutex_t sc_lock; /* Protect self. */ 88 kcondvar_t sc_cv; /* Wait here for work. */ 89 struct bufq_state *sc_buflist; 90 }; 91 /* shorthand for fields in sc_md: */ 92 #define sc_addr sc_md.md_addr 93 #define sc_size sc_md.md_size 94 #define sc_type sc_md.md_type 95 96 void mdattach(int); 97 98 static void md_attach(device_t, device_t, void *); 99 static int md_detach(device_t, int); 100 101 static dev_type_open(mdopen); 102 static dev_type_close(mdclose); 103 static dev_type_read(mdread); 104 static dev_type_write(mdwrite); 105 static dev_type_ioctl(mdioctl); 106 static dev_type_strategy(mdstrategy); 107 static dev_type_size(mdsize); 108 109 const struct bdevsw md_bdevsw = { 110 .d_open = mdopen, 111 .d_close = mdclose, 112 .d_strategy = mdstrategy, 113 .d_ioctl = mdioctl, 114 .d_dump = nodump, 115 .d_psize = mdsize, 116 .d_discard = nodiscard, 117 .d_flag = D_DISK | D_MPSAFE 118 }; 119 120 const struct cdevsw md_cdevsw = { 121 .d_open = mdopen, 122 .d_close = mdclose, 123 .d_read = mdread, 124 .d_write = mdwrite, 125 .d_ioctl = mdioctl, 126 .d_stop = nostop, 127 .d_tty = notty, 128 .d_poll = nopoll, 129 .d_mmap = nommap, 130 .d_kqfilter = nokqfilter, 131 .d_discard = nodiscard, 132 .d_flag = D_DISK 133 }; 134 135 static struct dkdriver mddkdriver = { 136 .d_strategy = mdstrategy 137 }; 138 139 extern struct cfdriver md_cd; 140 CFATTACH_DECL3_NEW(md, sizeof(struct md_softc), 141 0, md_attach, md_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN); 142 143 static kmutex_t md_device_lock; /* Protect unit creation / deletion. */ 144 extern size_t md_root_size; 145 146 static void md_set_disklabel(struct md_softc *); 147 148 /* 149 * This is called if we are configured as a pseudo-device 150 */ 151 void 152 mdattach(int n) 153 { 154 155 mutex_init(&md_device_lock, MUTEX_DEFAULT, IPL_NONE); 156 if (config_cfattach_attach(md_cd.cd_name, &md_ca)) { 157 aprint_error("%s: cfattach_attach failed\n", md_cd.cd_name); 158 return; 159 } 160 } 161 162 static void 163 md_attach(device_t parent, device_t self, void *aux) 164 { 165 struct md_softc *sc = device_private(self); 166 167 sc->sc_dev = self; 168 sc->sc_type = MD_UNCONFIGURED; 169 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE); 170 cv_init(&sc->sc_cv, "mdidle"); 171 bufq_alloc(&sc->sc_buflist, "fcfs", 0); 172 173 /* XXX - Could accept aux info here to set the config. */ 174 #ifdef MEMORY_DISK_HOOKS 175 /* 176 * This external function might setup a pre-loaded disk. 177 * All it would need to do is setup the md_conf struct. 178 * See sys/dev/md_root.c for an example. 179 */ 180 md_attach_hook(device_unit(self), &sc->sc_md); 181 #endif 182 183 /* 184 * Initialize and attach the disk structure. 185 */ 186 disk_init(&sc->sc_dkdev, device_xname(self), &mddkdriver); 187 disk_attach(&sc->sc_dkdev); 188 189 if (sc->sc_type != MD_UNCONFIGURED) 190 md_set_disklabel(sc); 191 192 if (!pmf_device_register(self, NULL, NULL)) 193 aprint_error_dev(self, "couldn't establish power handler\n"); 194 } 195 196 static int 197 md_detach(device_t self, int flags) 198 { 199 struct md_softc *sc = device_private(self); 200 int rc; 201 202 rc = 0; 203 mutex_enter(&sc->sc_dkdev.dk_openlock); 204 if (sc->sc_dkdev.dk_openmask == 0 && sc->sc_type == MD_UNCONFIGURED) 205 ; /* nothing to do */ 206 else if ((flags & DETACH_FORCE) == 0) 207 rc = EBUSY; 208 mutex_exit(&sc->sc_dkdev.dk_openlock); 209 210 if (rc != 0) 211 return rc; 212 213 pmf_device_deregister(self); 214 disk_detach(&sc->sc_dkdev); 215 disk_destroy(&sc->sc_dkdev); 216 bufq_free(sc->sc_buflist); 217 mutex_destroy(&sc->sc_lock); 218 cv_destroy(&sc->sc_cv); 219 return 0; 220 } 221 222 /* 223 * operational routines: 224 * open, close, read, write, strategy, 225 * ioctl, dump, size 226 */ 227 228 #if MEMORY_DISK_SERVER 229 static int md_server_loop(struct md_softc *sc); 230 static int md_ioctl_server(struct md_softc *sc, struct md_conf *umd, 231 struct lwp *l); 232 #endif /* MEMORY_DISK_SERVER */ 233 static int md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd, 234 struct lwp *l); 235 236 static int 237 mdsize(dev_t dev) 238 { 239 struct md_softc *sc; 240 int res; 241 242 sc = device_lookup_private(&md_cd, MD_UNIT(dev)); 243 if (sc == NULL) 244 return 0; 245 246 mutex_enter(&sc->sc_lock); 247 if (sc->sc_type == MD_UNCONFIGURED) 248 res = 0; 249 else 250 res = sc->sc_size >> DEV_BSHIFT; 251 mutex_exit(&sc->sc_lock); 252 253 return res; 254 } 255 256 static int 257 mdopen(dev_t dev, int flag, int fmt, struct lwp *l) 258 { 259 int unit; 260 int part = DISKPART(dev); 261 int pmask = 1 << part; 262 cfdata_t cf; 263 struct md_softc *sc; 264 struct disk *dk; 265 #ifdef MEMORY_DISK_HOOKS 266 bool configured; 267 #endif 268 269 mutex_enter(&md_device_lock); 270 unit = MD_UNIT(dev); 271 sc = device_lookup_private(&md_cd, unit); 272 if (sc == NULL) { 273 if (part != RAW_PART) { 274 mutex_exit(&md_device_lock); 275 return ENXIO; 276 } 277 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 278 cf->cf_name = md_cd.cd_name; 279 cf->cf_atname = md_cd.cd_name; 280 cf->cf_unit = unit; 281 cf->cf_fstate = FSTATE_STAR; 282 sc = device_private(config_attach_pseudo(cf)); 283 if (sc == NULL) { 284 mutex_exit(&md_device_lock); 285 return ENOMEM; 286 } 287 } 288 289 dk = &sc->sc_dkdev; 290 291 /* 292 * The raw partition is used for ioctl to configure. 293 */ 294 if (part == RAW_PART) 295 goto ok; 296 297 #ifdef MEMORY_DISK_HOOKS 298 /* Call the open hook to allow loading the device. */ 299 configured = (sc->sc_type != MD_UNCONFIGURED); 300 md_open_hook(unit, &sc->sc_md); 301 /* initialize disklabel if the device is configured in open hook */ 302 if (!configured && sc->sc_type != MD_UNCONFIGURED) 303 md_set_disklabel(sc); 304 #endif 305 306 /* 307 * This is a normal, "slave" device, so 308 * enforce initialized. 309 */ 310 if (sc->sc_type == MD_UNCONFIGURED) { 311 mutex_exit(&md_device_lock); 312 return ENXIO; 313 } 314 315 ok: 316 /* XXX duplicates code in dk_open(). Call dk_open(), instead? */ 317 mutex_enter(&dk->dk_openlock); 318 /* Mark our unit as open. */ 319 switch (fmt) { 320 case S_IFCHR: 321 dk->dk_copenmask |= pmask; 322 break; 323 case S_IFBLK: 324 dk->dk_bopenmask |= pmask; 325 break; 326 } 327 328 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask; 329 330 mutex_exit(&dk->dk_openlock); 331 mutex_exit(&md_device_lock); 332 return 0; 333 } 334 335 static int 336 mdclose(dev_t dev, int flag, int fmt, struct lwp *l) 337 { 338 int part = DISKPART(dev); 339 int pmask = 1 << part; 340 int error; 341 cfdata_t cf; 342 struct md_softc *sc; 343 struct disk *dk; 344 345 sc = device_lookup_private(&md_cd, MD_UNIT(dev)); 346 if (sc == NULL) 347 return ENXIO; 348 349 dk = &sc->sc_dkdev; 350 351 mutex_enter(&dk->dk_openlock); 352 353 switch (fmt) { 354 case S_IFCHR: 355 dk->dk_copenmask &= ~pmask; 356 break; 357 case S_IFBLK: 358 dk->dk_bopenmask &= ~pmask; 359 break; 360 } 361 dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask; 362 if (dk->dk_openmask != 0) { 363 mutex_exit(&dk->dk_openlock); 364 return 0; 365 } 366 367 mutex_exit(&dk->dk_openlock); 368 369 mutex_enter(&md_device_lock); 370 cf = device_cfdata(sc->sc_dev); 371 error = config_detach(sc->sc_dev, DETACH_QUIET); 372 if (! error) 373 free(cf, M_DEVBUF); 374 mutex_exit(&md_device_lock); 375 return error; 376 } 377 378 static int 379 mdread(dev_t dev, struct uio *uio, int flags) 380 { 381 struct md_softc *sc; 382 383 sc = device_lookup_private(&md_cd, MD_UNIT(dev)); 384 385 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED) 386 return ENXIO; 387 388 return (physio(mdstrategy, NULL, dev, B_READ, minphys, uio)); 389 } 390 391 static int 392 mdwrite(dev_t dev, struct uio *uio, int flags) 393 { 394 struct md_softc *sc; 395 396 sc = device_lookup_private(&md_cd, MD_UNIT(dev)); 397 398 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED) 399 return ENXIO; 400 401 return (physio(mdstrategy, NULL, dev, B_WRITE, minphys, uio)); 402 } 403 404 /* 405 * Handle I/O requests, either directly, or 406 * by passing them to the server process. 407 */ 408 static void 409 mdstrategy(struct buf *bp) 410 { 411 struct md_softc *sc; 412 void * addr; 413 size_t off, xfer; 414 bool is_read; 415 416 sc = device_lookup_private(&md_cd, MD_UNIT(bp->b_dev)); 417 418 mutex_enter(&sc->sc_lock); 419 420 if (sc == NULL || sc->sc_type == MD_UNCONFIGURED) { 421 bp->b_error = ENXIO; 422 goto done; 423 } 424 425 switch (sc->sc_type) { 426 #if MEMORY_DISK_SERVER 427 case MD_UMEM_SERVER: 428 /* Just add this job to the server's queue. */ 429 bufq_put(sc->sc_buflist, bp); 430 cv_signal(&sc->sc_cv); 431 mutex_exit(&sc->sc_lock); 432 /* see md_server_loop() */ 433 /* no biodone in this case */ 434 return; 435 #endif /* MEMORY_DISK_SERVER */ 436 437 case MD_KMEM_FIXED: 438 case MD_KMEM_ALLOCATED: 439 /* These are in kernel space. Access directly. */ 440 is_read = ((bp->b_flags & B_READ) == B_READ); 441 bp->b_resid = bp->b_bcount; 442 off = (bp->b_blkno << DEV_BSHIFT); 443 if (off >= sc->sc_size) { 444 if (is_read) 445 break; /* EOF */ 446 goto set_eio; 447 } 448 xfer = bp->b_resid; 449 if (xfer > (sc->sc_size - off)) 450 xfer = (sc->sc_size - off); 451 addr = (char *)sc->sc_addr + off; 452 disk_busy(&sc->sc_dkdev); 453 if (is_read) 454 memcpy(bp->b_data, addr, xfer); 455 else 456 memcpy(addr, bp->b_data, xfer); 457 disk_unbusy(&sc->sc_dkdev, xfer, is_read); 458 bp->b_resid -= xfer; 459 break; 460 461 default: 462 bp->b_resid = bp->b_bcount; 463 set_eio: 464 bp->b_error = EIO; 465 break; 466 } 467 468 done: 469 mutex_exit(&sc->sc_lock); 470 471 biodone(bp); 472 } 473 474 static int 475 mdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 476 { 477 struct md_softc *sc; 478 struct md_conf *umd; 479 int error; 480 481 if ((sc = device_lookup_private(&md_cd, MD_UNIT(dev))) == NULL) 482 return ENXIO; 483 484 mutex_enter(&sc->sc_lock); 485 if (sc->sc_type != MD_UNCONFIGURED) { 486 error = disk_ioctl(&sc->sc_dkdev, dev, cmd, data, flag, l); 487 if (error != EPASSTHROUGH) { 488 mutex_exit(&sc->sc_lock); 489 return 0; 490 } 491 } 492 493 /* If this is not the raw partition, punt! */ 494 if (DISKPART(dev) != RAW_PART) { 495 mutex_exit(&sc->sc_lock); 496 return ENOTTY; 497 } 498 499 umd = (struct md_conf *)data; 500 error = EINVAL; 501 switch (cmd) { 502 case MD_GETCONF: 503 *umd = sc->sc_md; 504 error = 0; 505 break; 506 507 case MD_SETCONF: 508 /* Can only set it once. */ 509 if (sc->sc_type != MD_UNCONFIGURED) 510 break; 511 switch (umd->md_type) { 512 case MD_KMEM_ALLOCATED: 513 error = md_ioctl_kalloc(sc, umd, l); 514 break; 515 #if MEMORY_DISK_SERVER 516 case MD_UMEM_SERVER: 517 error = md_ioctl_server(sc, umd, l); 518 break; 519 #endif /* MEMORY_DISK_SERVER */ 520 default: 521 break; 522 } 523 break; 524 } 525 mutex_exit(&sc->sc_lock); 526 return error; 527 } 528 529 static void 530 md_set_disklabel(struct md_softc *sc) 531 { 532 struct disklabel *lp = sc->sc_dkdev.dk_label; 533 struct partition *pp; 534 535 memset(lp, 0, sizeof(*lp)); 536 537 lp->d_secsize = DEV_BSIZE; 538 lp->d_secperunit = sc->sc_size / DEV_BSIZE; 539 if (lp->d_secperunit >= (32*64)) { 540 lp->d_nsectors = 32; 541 lp->d_ntracks = 64; 542 lp->d_ncylinders = lp->d_secperunit / (32*64); 543 } else { 544 lp->d_nsectors = 1; 545 lp->d_ntracks = 1; 546 lp->d_ncylinders = lp->d_secperunit; 547 } 548 lp->d_secpercyl = lp->d_ntracks*lp->d_nsectors; 549 550 strncpy(lp->d_typename, md_cd.cd_name, sizeof(lp->d_typename)); 551 lp->d_type = DKTYPE_MD; 552 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 553 lp->d_rpm = 3600; 554 lp->d_interleave = 1; 555 lp->d_flags = 0; 556 557 pp = &lp->d_partitions[0]; 558 pp->p_offset = 0; 559 pp->p_size = lp->d_secperunit; 560 pp->p_fstype = FS_BSDFFS; 561 562 pp = &lp->d_partitions[RAW_PART]; 563 pp->p_offset = 0; 564 pp->p_size = lp->d_secperunit; 565 pp->p_fstype = FS_UNUSED; 566 567 lp->d_npartitions = RAW_PART+1; 568 lp->d_magic = DISKMAGIC; 569 lp->d_magic2 = DISKMAGIC; 570 lp->d_checksum = dkcksum(lp); 571 } 572 573 /* 574 * Handle ioctl MD_SETCONF for (sc_type == MD_KMEM_ALLOCATED) 575 * Just allocate some kernel memory and return. 576 */ 577 static int 578 md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd, 579 struct lwp *l) 580 { 581 vaddr_t addr; 582 vsize_t size; 583 584 mutex_exit(&sc->sc_lock); 585 586 /* Sanity check the size. */ 587 size = umd->md_size; 588 addr = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_WIRED|UVM_KMF_ZERO); 589 590 mutex_enter(&sc->sc_lock); 591 592 if (!addr) 593 return ENOMEM; 594 595 /* If another thread beat us to configure this unit: fail. */ 596 if (sc->sc_type != MD_UNCONFIGURED) { 597 uvm_km_free(kernel_map, addr, size, UVM_KMF_WIRED); 598 return EINVAL; 599 } 600 601 /* This unit is now configured. */ 602 sc->sc_addr = (void *)addr; /* kernel space */ 603 sc->sc_size = (size_t)size; 604 sc->sc_type = MD_KMEM_ALLOCATED; 605 md_set_disklabel(sc); 606 return 0; 607 } 608 609 #if MEMORY_DISK_SERVER 610 611 /* 612 * Handle ioctl MD_SETCONF for (sc_type == MD_UMEM_SERVER) 613 * Set config, then become the I/O server for this unit. 614 */ 615 static int 616 md_ioctl_server(struct md_softc *sc, struct md_conf *umd, 617 struct lwp *l) 618 { 619 vaddr_t end; 620 int error; 621 622 KASSERT(mutex_owned(&sc->sc_lock)); 623 624 /* Sanity check addr, size. */ 625 end = (vaddr_t) ((char *)umd->md_addr + umd->md_size); 626 627 if ((end >= VM_MAXUSER_ADDRESS) || 628 (end < ((vaddr_t) umd->md_addr)) ) 629 return EINVAL; 630 631 /* This unit is now configured. */ 632 sc->sc_addr = umd->md_addr; /* user space */ 633 sc->sc_size = umd->md_size; 634 sc->sc_type = MD_UMEM_SERVER; 635 md_set_disklabel(sc); 636 637 /* Become the server daemon */ 638 error = md_server_loop(sc); 639 640 /* This server is now going away! */ 641 sc->sc_type = MD_UNCONFIGURED; 642 sc->sc_addr = 0; 643 sc->sc_size = 0; 644 645 return (error); 646 } 647 648 static int 649 md_server_loop(struct md_softc *sc) 650 { 651 struct buf *bp; 652 void *addr; /* user space address */ 653 size_t off; /* offset into "device" */ 654 size_t xfer; /* amount to transfer */ 655 int error; 656 bool is_read; 657 658 KASSERT(mutex_owned(&sc->sc_lock)); 659 660 for (;;) { 661 /* Wait for some work to arrive. */ 662 while ((bp = bufq_get(sc->sc_buflist)) == NULL) { 663 error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock); 664 if (error) 665 return error; 666 } 667 668 /* Do the transfer to/from user space. */ 669 mutex_exit(&sc->sc_lock); 670 error = 0; 671 is_read = ((bp->b_flags & B_READ) == B_READ); 672 bp->b_resid = bp->b_bcount; 673 off = (bp->b_blkno << DEV_BSHIFT); 674 if (off >= sc->sc_size) { 675 if (is_read) 676 goto done; /* EOF (not an error) */ 677 error = EIO; 678 goto done; 679 } 680 xfer = bp->b_resid; 681 if (xfer > (sc->sc_size - off)) 682 xfer = (sc->sc_size - off); 683 addr = (char *)sc->sc_addr + off; 684 disk_busy(&sc->sc_dkdev); 685 if (is_read) 686 error = copyin(addr, bp->b_data, xfer); 687 else 688 error = copyout(bp->b_data, addr, xfer); 689 disk_unbusy(&sc->sc_dkdev, (error ? 0 : xfer), is_read); 690 if (!error) 691 bp->b_resid -= xfer; 692 693 done: 694 if (error) { 695 bp->b_error = error; 696 } 697 biodone(bp); 698 mutex_enter(&sc->sc_lock); 699 } 700 } 701 #endif /* MEMORY_DISK_SERVER */ 702