1 /* $NetBSD: device-mapper.c,v 1.22 2010/03/26 15:46:04 jakllsch Exp $ */ 2 3 /* 4 * Copyright (c) 2010 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Adam Hamsik. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * I want to say thank you to all people who helped me with this project. 34 */ 35 36 #include <sys/types.h> 37 #include <sys/param.h> 38 39 #include <sys/buf.h> 40 #include <sys/conf.h> 41 #include <sys/device.h> 42 #include <sys/devfs.h> 43 #include <sys/disk.h> 44 #include <sys/disklabel.h> 45 #include <sys/dtype.h> 46 #include <sys/ioccom.h> 47 #include <sys/malloc.h> 48 #include <sys/module.h> 49 #include <sys/sysctl.h> 50 #include <dev/disk/dm/dm.h> 51 52 #include "netbsd-dm.h" 53 54 static d_ioctl_t dmioctl; 55 static d_open_t dmopen; 56 static d_close_t dmclose; 57 static d_psize_t dmsize; 58 static d_strategy_t dmstrategy; 59 static d_dump_t dmdump; 60 61 /* attach and detach routines */ 62 void dmattach(int); 63 static int dm_modcmd(module_t mod, int cmd, void *unused); 64 static int dmdestroy(void); 65 66 static void dm_doinit(void); 67 68 static int dm_cmd_to_fun(prop_dictionary_t); 69 static int disk_ioctl_switch(cdev_t, u_long, void *); 70 static int dm_ioctl_switch(u_long); 71 #if 0 72 static void dmminphys(struct buf *); 73 #endif 74 75 struct devfs_bitmap dm_minor_bitmap; 76 77 /* ***Variable-definitions*** */ 78 struct dev_ops dm_ops = { 79 { "dm", 0, D_DISK | D_MPSAFE }, 80 .d_open = dmopen, 81 .d_close = dmclose, 82 .d_read = physread, 83 .d_write = physwrite, 84 .d_ioctl = dmioctl, 85 .d_strategy = dmstrategy, 86 .d_psize = dmsize, 87 .d_dump = dmdump, 88 /* D_DISK */ 89 }; 90 91 MALLOC_DEFINE(M_DM, "dm", "Device Mapper allocations"); 92 93 int dm_debug_level = 0; 94 95 extern uint64_t dm_dev_counter; 96 97 static cdev_t dmcdev; 98 99 static moduledata_t dm_mod = { 100 "dm", 101 dm_modcmd, 102 NULL 103 }; 104 DECLARE_MODULE(dm, dm_mod, SI_SUB_RAID, SI_ORDER_ANY); 105 MODULE_VERSION(dm, 1); 106 107 /* 108 * This array is used to translate cmd to function pointer. 109 * 110 * Interface between libdevmapper and lvm2tools uses different 111 * names for one IOCTL call because libdevmapper do another thing 112 * then. When I run "info" or "mknodes" libdevmapper will send same 113 * ioctl to kernel but will do another things in userspace. 114 * 115 */ 116 static struct cmd_function cmd_fn[] = { 117 { .cmd = "version", .fn = dm_get_version_ioctl}, 118 { .cmd = "targets", .fn = dm_list_versions_ioctl}, 119 { .cmd = "create", .fn = dm_dev_create_ioctl}, 120 { .cmd = "info", .fn = dm_dev_status_ioctl}, 121 { .cmd = "mknodes", .fn = dm_dev_status_ioctl}, 122 { .cmd = "names", .fn = dm_dev_list_ioctl}, 123 { .cmd = "suspend", .fn = dm_dev_suspend_ioctl}, 124 { .cmd = "remove", .fn = dm_dev_remove_ioctl}, 125 { .cmd = "rename", .fn = dm_dev_rename_ioctl}, 126 { .cmd = "resume", .fn = dm_dev_resume_ioctl}, 127 { .cmd = "clear", .fn = dm_table_clear_ioctl}, 128 { .cmd = "deps", .fn = dm_table_deps_ioctl}, 129 { .cmd = "reload", .fn = dm_table_load_ioctl}, 130 { .cmd = "status", .fn = dm_table_status_ioctl}, 131 { .cmd = "table", .fn = dm_table_status_ioctl}, 132 {NULL, NULL} 133 }; 134 135 /* New module handle routine */ 136 static int 137 dm_modcmd(module_t mod, int cmd, void *unused) 138 { 139 int error, bmajor, cmajor; 140 141 error = 0; 142 bmajor = -1; 143 cmajor = -1; 144 145 switch (cmd) { 146 case MOD_LOAD: 147 devfs_clone_bitmap_init(&dm_minor_bitmap); 148 dm_doinit(); 149 kprintf("Device Mapper version %d.%d.%d loaded\n", 150 DM_VERSION_MAJOR, DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL); 151 break; 152 153 case MOD_UNLOAD: 154 /* 155 * Disable unloading of dm module if there are any devices 156 * defined in driver. This is probably too strong we need 157 * to disable auto-unload only if there is mounted dm device 158 * present. 159 */ 160 if (dm_dev_counter > 0) 161 return EBUSY; 162 163 error = dmdestroy(); 164 if (error) 165 break; 166 kprintf("Device Mapper unloaded\n"); 167 break; 168 169 default: 170 break; 171 } 172 173 return error; 174 } 175 176 /* 177 * dm_detach is called to completely destroy & remove a dm disk device. 178 */ 179 int 180 dm_detach(dm_dev_t *dmv) 181 { 182 int minor; 183 184 /* Remove device from list and wait for refcnt to drop to zero */ 185 dm_dev_rem(dmv, NULL, NULL, -1); 186 187 /* Destroy active table first. */ 188 dm_table_destroy(&dmv->table_head, DM_TABLE_ACTIVE); 189 190 /* Destroy inactive table if exits, too. */ 191 dm_table_destroy(&dmv->table_head, DM_TABLE_INACTIVE); 192 193 dm_table_head_destroy(&dmv->table_head); 194 195 minor = dkunit(dmv->devt); 196 disk_destroy(dmv->diskp); 197 devstat_remove_entry(&dmv->stats); 198 199 release_dev(dmv->devt); 200 devfs_clone_bitmap_put(&dm_minor_bitmap, minor); 201 202 /* Destroy device */ 203 (void)dm_dev_free(dmv); 204 205 /* Decrement device counter After removing device */ 206 --dm_dev_counter; /* XXX: was atomic 64 */ 207 208 return 0; 209 } 210 211 static void 212 dm_doinit(void) 213 { 214 dm_target_init(); 215 dm_dev_init(); 216 dm_pdev_init(); 217 dmcdev = make_dev(&dm_ops, 0, UID_ROOT, GID_OPERATOR, 0640, "mapper/control"); 218 } 219 220 /* Destroy routine */ 221 static int 222 dmdestroy(void) 223 { 224 destroy_dev(dmcdev); 225 226 dm_dev_destroy(); 227 dm_pdev_destroy(); 228 dm_target_destroy(); 229 230 return 0; 231 } 232 233 static int 234 dmopen(struct dev_open_args *ap) 235 { 236 cdev_t dev = ap->a_head.a_dev; 237 dm_dev_t *dmv; 238 239 /* Shortcut for the control device */ 240 if (minor(dev) == 0) 241 return 0; 242 243 if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) 244 return ENXIO; 245 246 dmv->is_open = 1; 247 dm_dev_unbusy(dmv); 248 249 aprint_debug("dm open routine called %" PRIu32 "\n", 250 minor(ap->a_head.a_dev)); 251 return 0; 252 } 253 254 static int 255 dmclose(struct dev_close_args *ap) 256 { 257 cdev_t dev = ap->a_head.a_dev; 258 dm_dev_t *dmv; 259 260 /* Shortcut for the control device */ 261 if (minor(dev) == 0) 262 return 0; 263 264 if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) 265 return ENXIO; 266 267 dmv->is_open = 0; 268 dm_dev_unbusy(dmv); 269 270 aprint_debug("dm close routine called %" PRIu32 "\n", 271 minor(ap->a_head.a_dev)); 272 return 0; 273 } 274 275 276 static int 277 dmioctl(struct dev_ioctl_args *ap) 278 { 279 cdev_t dev = ap->a_head.a_dev; 280 u_long cmd = ap->a_cmd; 281 void *data = ap->a_data; 282 283 int r, err; 284 prop_dictionary_t dm_dict_in; 285 286 err = r = 0; 287 288 aprint_debug("dmioctl called\n"); 289 290 KKASSERT(data != NULL); 291 292 if (( r = disk_ioctl_switch(dev, cmd, data)) == ENOTTY) { 293 struct plistref *pref = (struct plistref *) data; 294 295 /* Check if we were called with NETBSD_DM_IOCTL ioctl 296 otherwise quit. */ 297 if ((r = dm_ioctl_switch(cmd)) != 0) 298 return r; 299 300 if((r = prop_dictionary_copyin_ioctl(pref, cmd, &dm_dict_in)) != 0) 301 return r; 302 303 if ((r = dm_check_version(dm_dict_in)) != 0) 304 goto cleanup_exit; 305 306 /* run ioctl routine */ 307 if ((err = dm_cmd_to_fun(dm_dict_in)) != 0) 308 goto cleanup_exit; 309 310 cleanup_exit: 311 r = prop_dictionary_copyout_ioctl(pref, cmd, dm_dict_in); 312 prop_object_release(dm_dict_in); 313 } 314 315 /* 316 * Return the error of the actual command if one one has 317 * happened. Otherwise return 'r' which indicates errors 318 * that occurred during helper operations. 319 */ 320 return (err != 0)?err:r; 321 } 322 323 /* 324 * Translate command sent from libdevmapper to func. 325 */ 326 static int 327 dm_cmd_to_fun(prop_dictionary_t dm_dict){ 328 int i, r; 329 prop_string_t command; 330 331 r = 0; 332 333 if ((command = prop_dictionary_get(dm_dict, DM_IOCTL_COMMAND)) == NULL) 334 return EINVAL; 335 336 for(i = 0; cmd_fn[i].cmd != NULL; i++) 337 if (prop_string_equals_cstring(command, cmd_fn[i].cmd)) 338 break; 339 340 if (cmd_fn[i].cmd == NULL) 341 return EINVAL; 342 343 aprint_debug("ioctl %s called\n", cmd_fn[i].cmd); 344 r = cmd_fn[i].fn(dm_dict); 345 346 return r; 347 } 348 349 /* Call apropriate ioctl handler function. */ 350 static int 351 dm_ioctl_switch(u_long cmd) 352 { 353 354 switch(cmd) { 355 356 case NETBSD_DM_IOCTL: 357 aprint_debug("dm NetBSD_DM_IOCTL called\n"); 358 break; 359 default: 360 aprint_debug("dm unknown ioctl called\n"); 361 return ENOTTY; 362 break; /* NOT REACHED */ 363 } 364 365 return 0; 366 } 367 368 /* 369 * Check for disk specific ioctls. 370 */ 371 372 static int 373 disk_ioctl_switch(cdev_t dev, u_long cmd, void *data) 374 { 375 dm_dev_t *dmv; 376 377 /* disk ioctls make sense only on block devices */ 378 if (minor(dev) == 0) 379 return ENOTTY; 380 381 switch(cmd) { 382 case DIOCGPART: 383 { 384 struct partinfo *dpart; 385 u_int64_t size; 386 dpart = (void *)data; 387 bzero(dpart, sizeof(*dpart)); 388 389 if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) 390 return ENODEV; 391 if (dmv->diskp->d_info.d_media_blksize == 0) { 392 dm_dev_unbusy(dmv); 393 return ENOTSUP; 394 } else { 395 size = dm_table_size(&dmv->table_head); 396 dpart->media_offset = 0; 397 dpart->media_size = size * DEV_BSIZE; 398 dpart->media_blocks = size; 399 dpart->media_blksize = DEV_BSIZE; 400 dpart->fstype = FS_BSDFFS; 401 } 402 dm_dev_unbusy(dmv); 403 break; 404 } 405 406 default: 407 aprint_debug("unknown disk_ioctl called\n"); 408 return ENOTTY; 409 break; /* NOT REACHED */ 410 } 411 412 return 0; 413 } 414 415 /* 416 * Do all IO operations on dm logical devices. 417 */ 418 static int 419 dmstrategy(struct dev_strategy_args *ap) 420 { 421 cdev_t dev = ap->a_head.a_dev; 422 struct bio *bio = ap->a_bio; 423 struct buf *bp = bio->bio_buf; 424 int bypass; 425 426 dm_dev_t *dmv; 427 dm_table_t *tbl; 428 dm_table_entry_t *table_en; 429 struct buf *nestbuf; 430 431 uint32_t dev_type; 432 433 uint64_t buf_start, buf_len, issued_len; 434 uint64_t table_start, table_end; 435 uint64_t start, end; 436 437 buf_start = bio->bio_offset; 438 buf_len = bp->b_bcount; 439 440 tbl = NULL; 441 442 table_end = 0; 443 dev_type = 0; 444 issued_len = 0; 445 446 if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) { 447 bp->b_error = EIO; 448 bp->b_resid = bp->b_bcount; 449 biodone(bio); 450 return 0; 451 } 452 453 switch(bp->b_cmd) { 454 case BUF_CMD_READ: 455 case BUF_CMD_WRITE: 456 case BUF_CMD_FREEBLKS: 457 bypass = 0; 458 break; 459 case BUF_CMD_FLUSH: 460 bypass = 1; 461 KKASSERT(buf_len == 0); 462 break; 463 default: 464 dm_dev_unbusy(dmv); 465 bp->b_error = EIO; 466 bp->b_resid = bp->b_bcount; 467 biodone(bio); 468 return 0; 469 } 470 471 if (bypass == 0 && 472 bounds_check_with_mediasize(bio, DEV_BSIZE, 473 dm_table_size(&dmv->table_head)) <= 0) { 474 dm_dev_unbusy(dmv); 475 bp->b_resid = bp->b_bcount; 476 biodone(bio); 477 return 0; 478 } 479 480 /* Select active table */ 481 tbl = dm_table_get_entry(&dmv->table_head, DM_TABLE_ACTIVE); 482 483 nestiobuf_init(bio); 484 devstat_start_transaction(&dmv->stats); 485 486 /* 487 * Find out what tables I want to select. 488 */ 489 SLIST_FOREACH(table_en, tbl, next) { 490 /* 491 * I need need number of bytes not blocks. 492 */ 493 table_start = table_en->start * DEV_BSIZE; 494 table_end = table_start + (table_en->length) * DEV_BSIZE; 495 496 /* 497 * Calculate the start and end 498 */ 499 start = MAX(table_start, buf_start); 500 end = MIN(table_end, buf_start + buf_len); 501 502 aprint_debug("----------------------------------------\n"); 503 aprint_debug("table_start %010" PRIu64", table_end %010" 504 PRIu64 "\n", table_start, table_end); 505 aprint_debug("buf_start %010" PRIu64", buf_len %010" 506 PRIu64"\n", buf_start, buf_len); 507 aprint_debug("start-buf_start %010"PRIu64", end %010" 508 PRIu64"\n", start - buf_start, end); 509 aprint_debug("start %010" PRIu64" , end %010" 510 PRIu64"\n", start, end); 511 aprint_debug("\n----------------------------------------\n"); 512 513 if (bypass) { 514 nestbuf = getpbuf(NULL); 515 nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS; 516 517 nestiobuf_add(bio, nestbuf, 0, 0, &dmv->stats); 518 nestbuf->b_bio1.bio_offset = 0; 519 table_en->target->strategy(table_en, nestbuf); 520 } else if (start < end) { 521 nestbuf = getpbuf(NULL); 522 nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS; 523 524 nestiobuf_add(bio, nestbuf, 525 start - buf_start, (end - start), 526 &dmv->stats); 527 issued_len += end - start; 528 529 nestbuf->b_bio1.bio_offset = (start - table_start); 530 table_en->target->strategy(table_en, nestbuf); 531 } 532 } 533 534 if (issued_len < buf_len) 535 nestiobuf_error(bio, EINVAL); 536 nestiobuf_start(bio); 537 dm_table_release(&dmv->table_head, DM_TABLE_ACTIVE); 538 dm_dev_unbusy(dmv); 539 540 return 0; 541 } 542 543 static int 544 dmdump(struct dev_dump_args *ap) 545 { 546 cdev_t dev = ap->a_head.a_dev; 547 dm_dev_t *dmv; 548 dm_table_t *tbl; 549 dm_table_entry_t *table_en; 550 uint32_t dev_type; 551 uint64_t buf_start, buf_len, issued_len; 552 uint64_t table_start, table_end; 553 uint64_t start, end, data_offset; 554 off_t offset; 555 size_t length; 556 int error = 0; 557 558 buf_start = ap->a_offset; 559 buf_len = ap->a_length; 560 561 tbl = NULL; 562 563 table_end = 0; 564 dev_type = 0; 565 issued_len = 0; 566 567 if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) { 568 return EIO; 569 } 570 571 /* Select active table */ 572 tbl = dm_table_get_entry(&dmv->table_head, DM_TABLE_ACTIVE); 573 574 575 /* 576 * Find out what tables I want to select. 577 */ 578 SLIST_FOREACH(table_en, tbl, next) { 579 /* 580 * I need need number of bytes not blocks. 581 */ 582 table_start = table_en->start * DEV_BSIZE; 583 table_end = table_start + (table_en->length) * DEV_BSIZE; 584 585 /* 586 * Calculate the start and end 587 */ 588 start = MAX(table_start, buf_start); 589 end = MIN(table_end, buf_start + buf_len); 590 591 if (ap->a_length == 0) { 592 if (table_en->target->dump == NULL) { 593 error = ENXIO; 594 goto out; 595 } 596 597 table_en->target->dump(table_en, NULL, 0, 0); 598 } else if (start < end) { 599 data_offset = start - buf_start; 600 offset = start - table_start; 601 length = end - start; 602 603 if (table_en->target->dump == NULL) { 604 error = ENXIO; 605 goto out; 606 } 607 608 table_en->target->dump(table_en, 609 (char *)ap->a_virtual + data_offset, 610 length, offset); 611 612 issued_len += end - start; 613 } 614 } 615 616 if (issued_len < buf_len) 617 error = EINVAL; 618 619 out: 620 dm_table_release(&dmv->table_head, DM_TABLE_ACTIVE); 621 dm_dev_unbusy(dmv); 622 623 return error; 624 } 625 626 static int 627 dmsize(struct dev_psize_args *ap) 628 { 629 cdev_t dev = ap->a_head.a_dev; 630 dm_dev_t *dmv; 631 uint64_t size; 632 633 size = 0; 634 635 if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) 636 return ENOENT; 637 638 size = dm_table_size(&dmv->table_head); 639 dm_dev_unbusy(dmv); 640 641 ap->a_result = (int64_t)size; 642 643 return 0; 644 } 645 646 #if 0 647 static void 648 dmminphys(struct buf *bp) 649 { 650 651 bp->b_bcount = MIN(bp->b_bcount, MAXPHYS); 652 } 653 #endif 654 655 void 656 dmsetdiskinfo(struct disk *disk, dm_table_head_t *head) 657 { 658 struct disk_info info; 659 uint64_t dmp_size; 660 661 dmp_size = dm_table_size(head); 662 663 bzero(&info, sizeof(struct disk_info)); 664 info.d_media_blksize = DEV_BSIZE; 665 info.d_media_blocks = dmp_size; 666 #if 0 667 /* this is set by disk_setdiskinfo */ 668 info.d_media_size = dmp_size * DEV_BSIZE; 669 #endif 670 info.d_dsflags = DSO_MBRQUIET | DSO_DEVICEMAPPER; 671 672 info.d_secpertrack = 32; 673 info.d_nheads = 64; 674 info.d_secpercyl = info.d_secpertrack * info.d_nheads; 675 info.d_ncylinders = dmp_size / info.d_secpercyl; 676 677 disk_setdiskinfo(disk, &info); 678 } 679 680 void 681 dm_builtin_init(void *arg) 682 { 683 modeventhand_t evh = (modeventhand_t)arg; 684 685 KKASSERT(evh != NULL); 686 evh(NULL, MOD_LOAD, NULL); 687 } 688 689 void 690 dm_builtin_uninit(void *arg) 691 { 692 modeventhand_t evh = (modeventhand_t)arg; 693 694 KKASSERT(evh != NULL); 695 evh(NULL, MOD_UNLOAD, NULL); 696 } 697 698 TUNABLE_INT("debug.dm_debug", &dm_debug_level); 699 SYSCTL_INT(_debug, OID_AUTO, dm_debug, CTLFLAG_RW, &dm_debug_level, 700 0, "Eanble device mapper debugging"); 701 702