1 /* $NetBSD: device-mapper.c,v 1.22 2010/03/26 15:46:04 jakllsch Exp $ */ 2 3 /* 4 * Copyright (c) 2010 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Adam Hamsik. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * I want to say thank you to all people who helped me with this project. 34 */ 35 36 #include <sys/types.h> 37 #include <sys/param.h> 38 39 #include <sys/buf.h> 40 #include <sys/conf.h> 41 #include <sys/device.h> 42 #include <sys/devfs.h> 43 #include <sys/disk.h> 44 #include <sys/disklabel.h> 45 #include <sys/dtype.h> 46 #include <sys/ioccom.h> 47 #include <sys/malloc.h> 48 #include <sys/module.h> 49 #include <sys/sysctl.h> 50 51 #include "netbsd-dm.h" 52 #include "dm.h" 53 54 static d_ioctl_t dmioctl; 55 static d_open_t dmopen; 56 static d_close_t dmclose; 57 static d_psize_t dmsize; 58 static d_strategy_t dmstrategy; 59 static d_dump_t dmdump; 60 61 /* attach and detach routines */ 62 void dmattach(int); 63 static int dm_modcmd(module_t mod, int cmd, void *unused); 64 static int dmdestroy(void); 65 66 static void dm_doinit(void); 67 68 static int dm_cmd_to_fun(prop_dictionary_t); 69 static int disk_ioctl_switch(cdev_t, u_long, void *); 70 static int dm_ioctl_switch(u_long); 71 #if 0 72 static void dmminphys(struct buf *); 73 #endif 74 75 struct devfs_bitmap dm_minor_bitmap; 76 77 /* ***Variable-definitions*** */ 78 struct dev_ops dm_ops = { 79 { "dm", 0, D_DISK | D_MPSAFE }, 80 .d_open = dmopen, 81 .d_close = dmclose, 82 .d_read = physread, 83 .d_write = physwrite, 84 .d_ioctl = dmioctl, 85 .d_strategy = dmstrategy, 86 .d_psize = dmsize, 87 .d_dump = dmdump, 88 /* D_DISK */ 89 }; 90 91 MALLOC_DEFINE(M_DM, "dm", "Device Mapper allocations"); 92 93 int dm_debug_level = 0; 94 95 extern uint64_t dm_dev_counter; 96 97 static cdev_t dmcdev; 98 99 static moduledata_t dm_mod = { 100 "dm", 101 dm_modcmd, 102 NULL 103 }; 104 DECLARE_MODULE(dm, dm_mod, SI_SUB_RAID, SI_ORDER_ANY); 105 106 /* 107 * This array is used to translate cmd to function pointer. 108 * 109 * Interface between libdevmapper and lvm2tools uses different 110 * names for one IOCTL call because libdevmapper do another thing 111 * then. When I run "info" or "mknodes" libdevmapper will send same 112 * ioctl to kernel but will do another things in userspace. 113 * 114 */ 115 static struct cmd_function cmd_fn[] = { 116 { .cmd = "version", .fn = dm_get_version_ioctl}, 117 { .cmd = "targets", .fn = dm_list_versions_ioctl}, 118 { .cmd = "create", .fn = dm_dev_create_ioctl}, 119 { .cmd = "info", .fn = dm_dev_status_ioctl}, 120 { .cmd = "mknodes", .fn = dm_dev_status_ioctl}, 121 { .cmd = "names", .fn = dm_dev_list_ioctl}, 122 { .cmd = "suspend", .fn = dm_dev_suspend_ioctl}, 123 { .cmd = "remove", .fn = dm_dev_remove_ioctl}, 124 { .cmd = "rename", .fn = dm_dev_rename_ioctl}, 125 { .cmd = "resume", .fn = dm_dev_resume_ioctl}, 126 { .cmd = "clear", .fn = dm_table_clear_ioctl}, 127 { .cmd = "deps", .fn = dm_table_deps_ioctl}, 128 { .cmd = "reload", .fn = dm_table_load_ioctl}, 129 { .cmd = "status", .fn = dm_table_status_ioctl}, 130 { .cmd = "table", .fn = dm_table_status_ioctl}, 131 {NULL, NULL} 132 }; 133 134 /* New module handle routine */ 135 static int 136 dm_modcmd(module_t mod, int cmd, void *unused) 137 { 138 int error, bmajor, cmajor; 139 140 error = 0; 141 bmajor = -1; 142 cmajor = -1; 143 144 switch (cmd) { 145 case MOD_LOAD: 146 devfs_clone_bitmap_init(&dm_minor_bitmap); 147 dm_doinit(); 148 kprintf("Device Mapper version %d.%d.%d loaded\n", 149 DM_VERSION_MAJOR, DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL); 150 break; 151 152 case MOD_UNLOAD: 153 /* 154 * Disable unloading of dm module if there are any devices 155 * defined in driver. This is probably too strong we need 156 * to disable auto-unload only if there is mounted dm device 157 * present. 158 */ 159 if (dm_dev_counter > 0) 160 return EBUSY; 161 162 error = dmdestroy(); 163 if (error) 164 break; 165 kprintf("Device Mapper unloaded\n"); 166 break; 167 168 default: 169 break; 170 } 171 172 return error; 173 } 174 175 /* 176 * dm_detach: 177 * 178 * Autoconfiguration detach function for pseudo-device glue. 179 * This routine is called by dm_ioctl::dm_dev_remove_ioctl and by autoconf to 180 * remove devices created in device-mapper. 181 */ 182 int 183 dm_detach(dm_dev_t *dmv) 184 { 185 int minor; 186 187 disable_dev(dmv); 188 189 /* Destroy active table first. */ 190 dm_table_destroy(&dmv->table_head, DM_TABLE_ACTIVE); 191 192 /* Destroy inactive table if exits, too. */ 193 dm_table_destroy(&dmv->table_head, DM_TABLE_INACTIVE); 194 195 dm_table_head_destroy(&dmv->table_head); 196 197 minor = dkunit(dmv->devt); 198 disk_destroy(dmv->diskp); 199 devstat_remove_entry(&dmv->stats); 200 devfs_clone_bitmap_put(&dm_minor_bitmap, minor); 201 202 /* Destroy device */ 203 (void)dm_dev_free(dmv); 204 205 /* Decrement device counter After removing device */ 206 --dm_dev_counter; /* XXX: was atomic 64 */ 207 208 return 0; 209 } 210 211 static void 212 dm_doinit(void) 213 { 214 dm_target_init(); 215 dm_dev_init(); 216 dm_pdev_init(); 217 dmcdev = make_dev(&dm_ops, 0, UID_ROOT, GID_OPERATOR, 0640, "mapper/control"); 218 } 219 220 /* Destroy routine */ 221 static int 222 dmdestroy(void) 223 { 224 destroy_dev(dmcdev); 225 226 dm_dev_destroy(); 227 dm_pdev_destroy(); 228 dm_target_destroy(); 229 230 return 0; 231 } 232 233 static int 234 dmopen(struct dev_open_args *ap) 235 { 236 237 aprint_debug("dm open routine called %" PRIu32 "\n", 238 minor(ap->a_head.a_dev)); 239 return 0; 240 } 241 242 static int 243 dmclose(struct dev_close_args *ap) 244 { 245 246 aprint_debug("dm close routine called %" PRIu32 "\n", 247 minor(ap->a_head.a_dev)); 248 return 0; 249 } 250 251 252 static int 253 dmioctl(struct dev_ioctl_args *ap) 254 { 255 cdev_t dev = ap->a_head.a_dev; 256 u_long cmd = ap->a_cmd; 257 void *data = ap->a_data; 258 259 int r; 260 prop_dictionary_t dm_dict_in; 261 262 r = 0; 263 264 aprint_debug("dmioctl called\n"); 265 266 KKASSERT(data != NULL); 267 268 if (( r = disk_ioctl_switch(dev, cmd, data)) == ENOTTY) { 269 struct plistref *pref = (struct plistref *) data; 270 271 /* Check if we were called with NETBSD_DM_IOCTL ioctl 272 otherwise quit. */ 273 if ((r = dm_ioctl_switch(cmd)) != 0) 274 return r; 275 276 if((r = prop_dictionary_copyin_ioctl(pref, cmd, &dm_dict_in)) != 0) 277 return r; 278 279 if ((r = dm_check_version(dm_dict_in)) != 0) 280 goto cleanup_exit; 281 282 /* run ioctl routine */ 283 if ((r = dm_cmd_to_fun(dm_dict_in)) != 0) 284 goto cleanup_exit; 285 286 cleanup_exit: 287 r = prop_dictionary_copyout_ioctl(pref, cmd, dm_dict_in); 288 prop_object_release(dm_dict_in); 289 } 290 291 return r; 292 } 293 294 /* 295 * Translate command sent from libdevmapper to func. 296 */ 297 static int 298 dm_cmd_to_fun(prop_dictionary_t dm_dict){ 299 int i, r; 300 prop_string_t command; 301 302 r = 0; 303 304 if ((command = prop_dictionary_get(dm_dict, DM_IOCTL_COMMAND)) == NULL) 305 return EINVAL; 306 307 for(i = 0; cmd_fn[i].cmd != NULL; i++) 308 if (prop_string_equals_cstring(command, cmd_fn[i].cmd)) 309 break; 310 311 if (cmd_fn[i].cmd == NULL) 312 return EINVAL; 313 314 aprint_debug("ioctl %s called\n", cmd_fn[i].cmd); 315 r = cmd_fn[i].fn(dm_dict); 316 317 return r; 318 } 319 320 /* Call apropriate ioctl handler function. */ 321 static int 322 dm_ioctl_switch(u_long cmd) 323 { 324 325 switch(cmd) { 326 327 case NETBSD_DM_IOCTL: 328 aprint_debug("dm NetBSD_DM_IOCTL called\n"); 329 break; 330 default: 331 aprint_debug("dm unknown ioctl called\n"); 332 return ENOTTY; 333 break; /* NOT REACHED */ 334 } 335 336 return 0; 337 } 338 339 /* 340 * Check for disk specific ioctls. 341 */ 342 343 static int 344 disk_ioctl_switch(cdev_t dev, u_long cmd, void *data) 345 { 346 dm_dev_t *dmv; 347 348 /* disk ioctls make sense only on block devices */ 349 if (minor(dev) == 0) 350 return ENOTTY; 351 352 switch(cmd) { 353 case DIOCGPART: 354 { 355 struct partinfo *dpart; 356 u_int64_t size; 357 dpart = (void *)data; 358 bzero(dpart, sizeof(*dpart)); 359 360 if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) 361 return ENODEV; 362 if (dmv->diskp->d_info.d_media_blksize == 0) { 363 dm_dev_unbusy(dmv); 364 return ENOTSUP; 365 } else { 366 size = dm_table_size(&dmv->table_head); 367 dpart->media_offset = 0; 368 dpart->media_size = size * DEV_BSIZE; 369 dpart->media_blocks = size; 370 dpart->media_blksize = DEV_BSIZE; 371 dpart->fstype = FS_BSDFFS; 372 } 373 dm_dev_unbusy(dmv); 374 break; 375 } 376 377 default: 378 aprint_debug("unknown disk_ioctl called\n"); 379 return ENOTTY; 380 break; /* NOT REACHED */ 381 } 382 383 return 0; 384 } 385 386 /* 387 * Do all IO operations on dm logical devices. 388 */ 389 static int 390 dmstrategy(struct dev_strategy_args *ap) 391 { 392 cdev_t dev = ap->a_head.a_dev; 393 struct bio *bio = ap->a_bio; 394 struct buf *bp = bio->bio_buf; 395 int bypass; 396 397 dm_dev_t *dmv; 398 dm_table_t *tbl; 399 dm_table_entry_t *table_en; 400 struct buf *nestbuf; 401 402 uint32_t dev_type; 403 404 uint64_t buf_start, buf_len, issued_len; 405 uint64_t table_start, table_end; 406 uint64_t start, end; 407 408 buf_start = bio->bio_offset; 409 buf_len = bp->b_bcount; 410 411 tbl = NULL; 412 413 table_end = 0; 414 dev_type = 0; 415 issued_len = 0; 416 417 if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) { 418 bp->b_error = EIO; 419 bp->b_resid = bp->b_bcount; 420 biodone(bio); 421 return 0; 422 } 423 424 switch(bp->b_cmd) { 425 case BUF_CMD_READ: 426 case BUF_CMD_WRITE: 427 case BUF_CMD_FREEBLKS: 428 bypass = 0; 429 break; 430 case BUF_CMD_FLUSH: 431 bypass = 1; 432 KKASSERT(buf_len == 0); 433 break; 434 default: 435 dm_dev_unbusy(dmv); 436 bp->b_error = EIO; 437 bp->b_resid = bp->b_bcount; 438 biodone(bio); 439 return 0; 440 } 441 442 if (bypass == 0 && 443 bounds_check_with_mediasize(bio, DEV_BSIZE, 444 dm_table_size(&dmv->table_head)) <= 0) { 445 dm_dev_unbusy(dmv); 446 bp->b_resid = bp->b_bcount; 447 biodone(bio); 448 return 0; 449 } 450 451 /* Select active table */ 452 tbl = dm_table_get_entry(&dmv->table_head, DM_TABLE_ACTIVE); 453 454 nestiobuf_init(bio); 455 devstat_start_transaction(&dmv->stats); 456 457 /* 458 * Find out what tables I want to select. 459 */ 460 SLIST_FOREACH(table_en, tbl, next) { 461 /* 462 * I need need number of bytes not blocks. 463 */ 464 table_start = table_en->start * DEV_BSIZE; 465 table_end = table_start + (table_en->length) * DEV_BSIZE; 466 467 /* 468 * Calculate the start and end 469 */ 470 start = MAX(table_start, buf_start); 471 end = MIN(table_end, buf_start + buf_len); 472 473 aprint_debug("----------------------------------------\n"); 474 aprint_debug("table_start %010" PRIu64", table_end %010" 475 PRIu64 "\n", table_start, table_end); 476 aprint_debug("buf_start %010" PRIu64", buf_len %010" 477 PRIu64"\n", buf_start, buf_len); 478 aprint_debug("start-buf_start %010"PRIu64", end %010" 479 PRIu64"\n", start - buf_start, end); 480 aprint_debug("start %010" PRIu64" , end %010" 481 PRIu64"\n", start, end); 482 aprint_debug("\n----------------------------------------\n"); 483 484 if (bypass) { 485 nestbuf = getpbuf(NULL); 486 nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS; 487 488 nestiobuf_add(bio, nestbuf, 0, 0, &dmv->stats); 489 nestbuf->b_bio1.bio_offset = 0; 490 table_en->target->strategy(table_en, nestbuf); 491 } else if (start < end) { 492 nestbuf = getpbuf(NULL); 493 nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS; 494 495 nestiobuf_add(bio, nestbuf, 496 start - buf_start, (end - start), 497 &dmv->stats); 498 issued_len += end - start; 499 500 nestbuf->b_bio1.bio_offset = (start - table_start); 501 table_en->target->strategy(table_en, nestbuf); 502 } 503 } 504 505 if (issued_len < buf_len) 506 nestiobuf_error(bio, EINVAL); 507 nestiobuf_start(bio); 508 dm_table_release(&dmv->table_head, DM_TABLE_ACTIVE); 509 dm_dev_unbusy(dmv); 510 511 return 0; 512 } 513 514 static int 515 dmdump(struct dev_dump_args *ap) 516 { 517 cdev_t dev = ap->a_head.a_dev; 518 dm_dev_t *dmv; 519 dm_table_t *tbl; 520 dm_table_entry_t *table_en; 521 uint32_t dev_type; 522 uint64_t buf_start, buf_len, issued_len; 523 uint64_t table_start, table_end; 524 uint64_t start, end, data_offset; 525 off_t offset; 526 size_t length; 527 int error = 0; 528 529 buf_start = ap->a_offset; 530 buf_len = ap->a_length; 531 532 tbl = NULL; 533 534 table_end = 0; 535 dev_type = 0; 536 issued_len = 0; 537 538 if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) { 539 return EIO; 540 } 541 542 /* Select active table */ 543 tbl = dm_table_get_entry(&dmv->table_head, DM_TABLE_ACTIVE); 544 545 546 /* 547 * Find out what tables I want to select. 548 */ 549 SLIST_FOREACH(table_en, tbl, next) { 550 /* 551 * I need need number of bytes not blocks. 552 */ 553 table_start = table_en->start * DEV_BSIZE; 554 table_end = table_start + (table_en->length) * DEV_BSIZE; 555 556 /* 557 * Calculate the start and end 558 */ 559 start = MAX(table_start, buf_start); 560 end = MIN(table_end, buf_start + buf_len); 561 562 if (ap->a_length == 0) { 563 if (table_en->target->dump == NULL) { 564 error = ENXIO; 565 goto out; 566 } 567 568 table_en->target->dump(table_en, NULL, 0, 0); 569 } else if (start < end) { 570 data_offset = start - buf_start; 571 offset = start - table_start; 572 length = end - start; 573 574 if (table_en->target->dump == NULL) { 575 error = ENXIO; 576 goto out; 577 } 578 579 table_en->target->dump(table_en, 580 (char *)ap->a_virtual + data_offset, 581 length, offset); 582 583 issued_len += end - start; 584 } 585 } 586 587 if (issued_len < buf_len) 588 error = EINVAL; 589 590 out: 591 dm_table_release(&dmv->table_head, DM_TABLE_ACTIVE); 592 dm_dev_unbusy(dmv); 593 594 return error; 595 } 596 597 static int 598 dmsize(struct dev_psize_args *ap) 599 { 600 cdev_t dev = ap->a_head.a_dev; 601 dm_dev_t *dmv; 602 uint64_t size; 603 604 size = 0; 605 606 if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) 607 return ENOENT; 608 609 size = dm_table_size(&dmv->table_head); 610 dm_dev_unbusy(dmv); 611 612 ap->a_result = (int64_t)size; 613 614 return 0; 615 } 616 617 #if 0 618 static void 619 dmminphys(struct buf *bp) 620 { 621 622 bp->b_bcount = MIN(bp->b_bcount, MAXPHYS); 623 } 624 #endif 625 626 void 627 dmsetdiskinfo(struct disk *disk, dm_table_head_t *head) 628 { 629 struct disk_info info; 630 uint64_t dmp_size; 631 632 dmp_size = dm_table_size(head); 633 634 bzero(&info, sizeof(struct disk_info)); 635 info.d_media_blksize = DEV_BSIZE; 636 info.d_media_blocks = dmp_size; 637 #if 0 638 /* this is set by disk_setdiskinfo */ 639 info.d_media_size = dmp_size * DEV_BSIZE; 640 #endif 641 info.d_dsflags = DSO_MBRQUIET | DSO_DEVICEMAPPER; 642 643 info.d_secpertrack = 32; 644 info.d_nheads = 64; 645 info.d_secpercyl = info.d_secpertrack * info.d_nheads; 646 info.d_ncylinders = dmp_size / info.d_secpercyl; 647 648 disk_setdiskinfo(disk, &info); 649 } 650 651 prop_dictionary_t 652 dmgetdiskinfo(struct disk *disk) 653 { 654 prop_dictionary_t disk_info, geom; 655 struct disk_info *pinfo; 656 657 pinfo = &disk->d_info; 658 659 disk_info = prop_dictionary_create(); 660 geom = prop_dictionary_create(); 661 662 prop_dictionary_set_cstring_nocopy(disk_info, "type", "ESDI"); 663 prop_dictionary_set_uint64(geom, "sectors-per-unit", pinfo->d_media_blocks); 664 prop_dictionary_set_uint32(geom, "sector-size", 665 DEV_BSIZE /* XXX 512? */); 666 prop_dictionary_set_uint32(geom, "sectors-per-track", 32); 667 prop_dictionary_set_uint32(geom, "tracks-per-cylinder", 64); 668 prop_dictionary_set_uint32(geom, "cylinders-per-unit", 669 pinfo->d_media_blocks / 2048); 670 prop_dictionary_set(disk_info, "geometry", geom); 671 prop_object_release(geom); 672 673 return disk_info; 674 } 675 676 void 677 dmgetproperties(struct disk *disk, dm_table_head_t *head) 678 { 679 #if 0 680 prop_dictionary_t disk_info, odisk_info, geom; 681 int dmp_size; 682 683 dmp_size = dm_table_size(head); 684 disk_info = prop_dictionary_create(); 685 geom = prop_dictionary_create(); 686 687 prop_dictionary_set_cstring_nocopy(disk_info, "type", "ESDI"); 688 prop_dictionary_set_uint64(geom, "sectors-per-unit", dmp_size); 689 prop_dictionary_set_uint32(geom, "sector-size", 690 DEV_BSIZE /* XXX 512? */); 691 prop_dictionary_set_uint32(geom, "sectors-per-track", 32); 692 prop_dictionary_set_uint32(geom, "tracks-per-cylinder", 64); 693 prop_dictionary_set_uint32(geom, "cylinders-per-unit", dmp_size / 2048); 694 prop_dictionary_set(disk_info, "geometry", geom); 695 prop_object_release(geom); 696 697 odisk_info = disk->dk_info; 698 disk->dk_info = disk_info; 699 700 if (odisk_info != NULL) 701 prop_object_release(odisk_info); 702 #endif 703 } 704 705 TUNABLE_INT("debug.dm_debug", &dm_debug_level); 706 SYSCTL_INT(_debug, OID_AUTO, dm_debug, CTLFLAG_RW, &dm_debug_level, 707 0, "Eanble device mapper debugging"); 708 709