1 /* $NetBSD: device-mapper.c,v 1.22 2010/03/26 15:46:04 jakllsch Exp $ */ 2 3 /* 4 * Copyright (c) 2010 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Adam Hamsik. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * I want to say thank you to all people who helped me with this project. 34 */ 35 36 #include <sys/types.h> 37 #include <sys/param.h> 38 39 #include <sys/buf.h> 40 #include <sys/conf.h> 41 #include <sys/device.h> 42 #include <sys/disk.h> 43 #include <sys/disklabel.h> 44 #include <sys/dtype.h> 45 #include <sys/ioccom.h> 46 #include <sys/malloc.h> 47 #include <sys/module.h> 48 #include <sys/sysctl.h> 49 50 #include "netbsd-dm.h" 51 #include "dm.h" 52 53 static d_ioctl_t dmioctl; 54 static d_open_t dmopen; 55 static d_close_t dmclose; 56 static d_psize_t dmsize; 57 static d_strategy_t dmstrategy; 58 static d_dump_t dmdump; 59 60 /* attach and detach routines */ 61 void dmattach(int); 62 static int dm_modcmd(module_t mod, int cmd, void *unused); 63 static int dmdestroy(void); 64 65 static void dm_doinit(void); 66 67 static int dm_cmd_to_fun(prop_dictionary_t); 68 static int disk_ioctl_switch(cdev_t, u_long, void *); 69 static int dm_ioctl_switch(u_long); 70 #if 0 71 static void dmminphys(struct buf *); 72 #endif 73 74 /* ***Variable-definitions*** */ 75 struct dev_ops dm_ops = { 76 { "dm", 0, D_DISK | D_MPSAFE }, 77 .d_open = dmopen, 78 .d_close = dmclose, 79 .d_read = physread, 80 .d_write = physwrite, 81 .d_ioctl = dmioctl, 82 .d_strategy = dmstrategy, 83 .d_psize = dmsize, 84 .d_dump = dmdump, 85 /* D_DISK */ 86 }; 87 88 MALLOC_DEFINE(M_DM, "dm", "Device Mapper allocations"); 89 90 int dm_debug_level = 0; 91 92 extern uint64_t dm_dev_counter; 93 94 static cdev_t dmcdev; 95 96 static moduledata_t dm_mod = { 97 "dm", 98 dm_modcmd, 99 NULL 100 }; 101 DECLARE_MODULE(dm, dm_mod, SI_SUB_RAID, SI_ORDER_ANY); 102 103 /* 104 * This array is used to translate cmd to function pointer. 105 * 106 * Interface between libdevmapper and lvm2tools uses different 107 * names for one IOCTL call because libdevmapper do another thing 108 * then. When I run "info" or "mknodes" libdevmapper will send same 109 * ioctl to kernel but will do another things in userspace. 110 * 111 */ 112 static struct cmd_function cmd_fn[] = { 113 { .cmd = "version", .fn = dm_get_version_ioctl}, 114 { .cmd = "targets", .fn = dm_list_versions_ioctl}, 115 { .cmd = "create", .fn = dm_dev_create_ioctl}, 116 { .cmd = "info", .fn = dm_dev_status_ioctl}, 117 { .cmd = "mknodes", .fn = dm_dev_status_ioctl}, 118 { .cmd = "names", .fn = dm_dev_list_ioctl}, 119 { .cmd = "suspend", .fn = dm_dev_suspend_ioctl}, 120 { .cmd = "remove", .fn = dm_dev_remove_ioctl}, 121 { .cmd = "rename", .fn = dm_dev_rename_ioctl}, 122 { .cmd = "resume", .fn = dm_dev_resume_ioctl}, 123 { .cmd = "clear", .fn = dm_table_clear_ioctl}, 124 { .cmd = "deps", .fn = dm_table_deps_ioctl}, 125 { .cmd = "reload", .fn = dm_table_load_ioctl}, 126 { .cmd = "status", .fn = dm_table_status_ioctl}, 127 { .cmd = "table", .fn = dm_table_status_ioctl}, 128 {NULL, NULL} 129 }; 130 131 /* New module handle routine */ 132 static int 133 dm_modcmd(module_t mod, int cmd, void *unused) 134 { 135 int error, bmajor, cmajor; 136 137 error = 0; 138 bmajor = -1; 139 cmajor = -1; 140 141 switch (cmd) { 142 case MOD_LOAD: 143 dm_doinit(); 144 kprintf("Device Mapper version %d.%d.%d loaded\n", 145 DM_VERSION_MAJOR, DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL); 146 break; 147 148 case MOD_UNLOAD: 149 /* 150 * Disable unloading of dm module if there are any devices 151 * defined in driver. This is probably too strong we need 152 * to disable auto-unload only if there is mounted dm device 153 * present. 154 */ 155 if (dm_dev_counter > 0) 156 return EBUSY; 157 158 error = dmdestroy(); 159 if (error) 160 break; 161 kprintf("Device Mapper unloaded\n"); 162 break; 163 164 default: 165 break; 166 } 167 168 return error; 169 } 170 171 /* 172 * dm_detach: 173 * 174 * Autoconfiguration detach function for pseudo-device glue. 175 * This routine is called by dm_ioctl::dm_dev_remove_ioctl and by autoconf to 176 * remove devices created in device-mapper. 177 */ 178 int 179 dm_detach(dm_dev_t *dmv) 180 { 181 disable_dev(dmv); 182 183 /* Destroy active table first. */ 184 dm_table_destroy(&dmv->table_head, DM_TABLE_ACTIVE); 185 186 /* Destroy inactive table if exits, too. */ 187 dm_table_destroy(&dmv->table_head, DM_TABLE_INACTIVE); 188 189 dm_table_head_destroy(&dmv->table_head); 190 191 destroy_dev(dmv->devt); 192 193 /* Destroy device */ 194 (void)dm_dev_free(dmv); 195 196 /* Decrement device counter After removing device */ 197 --dm_dev_counter; /* XXX: was atomic 64 */ 198 199 return 0; 200 } 201 202 static void 203 dm_doinit(void) 204 { 205 dm_target_init(); 206 dm_dev_init(); 207 dm_pdev_init(); 208 dmcdev = make_dev(&dm_ops, 0, UID_ROOT, GID_OPERATOR, 0640, "mapper/control"); 209 } 210 211 /* Destroy routine */ 212 static int 213 dmdestroy(void) 214 { 215 destroy_dev(dmcdev); 216 217 dm_dev_destroy(); 218 dm_pdev_destroy(); 219 dm_target_destroy(); 220 221 return 0; 222 } 223 224 static int 225 dmopen(struct dev_open_args *ap) 226 { 227 228 aprint_debug("dm open routine called %" PRIu32 "\n", 229 minor(ap->a_head.a_dev)); 230 return 0; 231 } 232 233 static int 234 dmclose(struct dev_close_args *ap) 235 { 236 237 aprint_debug("dm close routine called %" PRIu32 "\n", 238 minor(ap->a_head.a_dev)); 239 return 0; 240 } 241 242 243 static int 244 dmioctl(struct dev_ioctl_args *ap) 245 { 246 cdev_t dev = ap->a_head.a_dev; 247 u_long cmd = ap->a_cmd; 248 void *data = ap->a_data; 249 250 int r; 251 prop_dictionary_t dm_dict_in; 252 253 r = 0; 254 255 aprint_debug("dmioctl called\n"); 256 257 KKASSERT(data != NULL); 258 259 if (( r = disk_ioctl_switch(dev, cmd, data)) == ENOTTY) { 260 struct plistref *pref = (struct plistref *) data; 261 262 /* Check if we were called with NETBSD_DM_IOCTL ioctl 263 otherwise quit. */ 264 if ((r = dm_ioctl_switch(cmd)) != 0) 265 return r; 266 267 if((r = prop_dictionary_copyin_ioctl(pref, cmd, &dm_dict_in)) != 0) 268 return r; 269 270 if ((r = dm_check_version(dm_dict_in)) != 0) 271 goto cleanup_exit; 272 273 /* run ioctl routine */ 274 if ((r = dm_cmd_to_fun(dm_dict_in)) != 0) 275 goto cleanup_exit; 276 277 cleanup_exit: 278 r = prop_dictionary_copyout_ioctl(pref, cmd, dm_dict_in); 279 prop_object_release(dm_dict_in); 280 } 281 282 return r; 283 } 284 285 /* 286 * Translate command sent from libdevmapper to func. 287 */ 288 static int 289 dm_cmd_to_fun(prop_dictionary_t dm_dict){ 290 int i, r; 291 prop_string_t command; 292 293 r = 0; 294 295 if ((command = prop_dictionary_get(dm_dict, DM_IOCTL_COMMAND)) == NULL) 296 return EINVAL; 297 298 for(i = 0; cmd_fn[i].cmd != NULL; i++) 299 if (prop_string_equals_cstring(command, cmd_fn[i].cmd)) 300 break; 301 302 if (cmd_fn[i].cmd == NULL) 303 return EINVAL; 304 305 aprint_debug("ioctl %s called\n", cmd_fn[i].cmd); 306 r = cmd_fn[i].fn(dm_dict); 307 308 return r; 309 } 310 311 /* Call apropriate ioctl handler function. */ 312 static int 313 dm_ioctl_switch(u_long cmd) 314 { 315 316 switch(cmd) { 317 318 case NETBSD_DM_IOCTL: 319 aprint_debug("dm NetBSD_DM_IOCTL called\n"); 320 break; 321 default: 322 aprint_debug("dm unknown ioctl called\n"); 323 return ENOTTY; 324 break; /* NOT REACHED */ 325 } 326 327 return 0; 328 } 329 330 /* 331 * Check for disk specific ioctls. 332 */ 333 334 static int 335 disk_ioctl_switch(cdev_t dev, u_long cmd, void *data) 336 { 337 dm_dev_t *dmv; 338 339 /* disk ioctls make sense only on block devices */ 340 if (minor(dev) == 0) 341 return ENOTTY; 342 343 switch(cmd) { 344 case DIOCGPART: 345 { 346 struct partinfo *dpart; 347 u_int64_t size; 348 dpart = (void *)data; 349 bzero(dpart, sizeof(*dpart)); 350 351 if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) 352 return ENODEV; 353 if (dmv->diskp->d_info.d_media_blksize == 0) { 354 dm_dev_unbusy(dmv); 355 return ENOTSUP; 356 } else { 357 size = dm_table_size(&dmv->table_head); 358 dpart->media_offset = 0; 359 dpart->media_size = size * DEV_BSIZE; 360 dpart->media_blocks = size; 361 dpart->media_blksize = DEV_BSIZE; 362 dpart->fstype = FS_BSDFFS; 363 } 364 dm_dev_unbusy(dmv); 365 break; 366 } 367 368 default: 369 aprint_debug("unknown disk_ioctl called\n"); 370 return ENOTTY; 371 break; /* NOT REACHED */ 372 } 373 374 return 0; 375 } 376 377 /* 378 * Do all IO operations on dm logical devices. 379 */ 380 static int 381 dmstrategy(struct dev_strategy_args *ap) 382 { 383 cdev_t dev = ap->a_head.a_dev; 384 struct bio *bio = ap->a_bio; 385 struct buf *bp = bio->bio_buf; 386 int bypass; 387 388 dm_dev_t *dmv; 389 dm_table_t *tbl; 390 dm_table_entry_t *table_en; 391 struct buf *nestbuf; 392 393 uint32_t dev_type; 394 395 uint64_t buf_start, buf_len, issued_len; 396 uint64_t table_start, table_end; 397 uint64_t start, end; 398 399 buf_start = bio->bio_offset; 400 buf_len = bp->b_bcount; 401 402 tbl = NULL; 403 404 table_end = 0; 405 dev_type = 0; 406 issued_len = 0; 407 408 if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) { 409 bp->b_error = EIO; 410 bp->b_resid = bp->b_bcount; 411 biodone(bio); 412 return 0; 413 } 414 415 switch(bp->b_cmd) { 416 case BUF_CMD_READ: 417 case BUF_CMD_WRITE: 418 case BUF_CMD_FREEBLKS: 419 bypass = 0; 420 break; 421 case BUF_CMD_FLUSH: 422 bypass = 1; 423 KKASSERT(buf_len == 0); 424 break; 425 default: 426 dm_dev_unbusy(dmv); 427 bp->b_error = EIO; 428 bp->b_resid = bp->b_bcount; 429 biodone(bio); 430 return 0; 431 } 432 433 if (bypass == 0 && 434 bounds_check_with_mediasize(bio, DEV_BSIZE, 435 dm_table_size(&dmv->table_head)) <= 0) { 436 dm_dev_unbusy(dmv); 437 bp->b_resid = bp->b_bcount; 438 biodone(bio); 439 return 0; 440 } 441 442 /* Select active table */ 443 tbl = dm_table_get_entry(&dmv->table_head, DM_TABLE_ACTIVE); 444 445 nestiobuf_init(bio); 446 447 /* 448 * Find out what tables I want to select. 449 */ 450 SLIST_FOREACH(table_en, tbl, next) { 451 /* 452 * I need need number of bytes not blocks. 453 */ 454 table_start = table_en->start * DEV_BSIZE; 455 table_end = table_start + (table_en->length) * DEV_BSIZE; 456 457 /* 458 * Calculate the start and end 459 */ 460 start = MAX(table_start, buf_start); 461 end = MIN(table_end, buf_start + buf_len); 462 463 aprint_debug("----------------------------------------\n"); 464 aprint_debug("table_start %010" PRIu64", table_end %010" 465 PRIu64 "\n", table_start, table_end); 466 aprint_debug("buf_start %010" PRIu64", buf_len %010" 467 PRIu64"\n", buf_start, buf_len); 468 aprint_debug("start-buf_start %010"PRIu64", end %010" 469 PRIu64"\n", start - buf_start, end); 470 aprint_debug("start %010" PRIu64" , end %010" 471 PRIu64"\n", start, end); 472 aprint_debug("\n----------------------------------------\n"); 473 474 if (bypass) { 475 nestbuf = getpbuf(NULL); 476 nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS; 477 478 nestiobuf_add(bio, nestbuf, 0, 0); 479 nestbuf->b_bio1.bio_offset = 0; 480 table_en->target->strategy(table_en, nestbuf); 481 } else if (start < end) { 482 nestbuf = getpbuf(NULL); 483 nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS; 484 485 nestiobuf_add(bio, nestbuf, 486 start - buf_start, (end - start)); 487 issued_len += end - start; 488 489 nestbuf->b_bio1.bio_offset = (start - table_start); 490 table_en->target->strategy(table_en, nestbuf); 491 } 492 } 493 494 if (issued_len < buf_len) 495 nestiobuf_error(bio, EINVAL); 496 nestiobuf_start(bio); 497 dm_table_release(&dmv->table_head, DM_TABLE_ACTIVE); 498 dm_dev_unbusy(dmv); 499 500 return 0; 501 } 502 503 static int 504 dmdump(struct dev_dump_args *ap) 505 { 506 cdev_t dev = ap->a_head.a_dev; 507 dm_dev_t *dmv; 508 dm_table_t *tbl; 509 dm_table_entry_t *table_en; 510 uint32_t dev_type; 511 uint64_t buf_start, buf_len, issued_len; 512 uint64_t table_start, table_end; 513 uint64_t start, end, data_offset; 514 off_t offset; 515 size_t length; 516 int error = 0; 517 518 buf_start = ap->a_offset; 519 buf_len = ap->a_length; 520 521 tbl = NULL; 522 523 table_end = 0; 524 dev_type = 0; 525 issued_len = 0; 526 527 if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) { 528 return EIO; 529 } 530 531 /* Select active table */ 532 tbl = dm_table_get_entry(&dmv->table_head, DM_TABLE_ACTIVE); 533 534 535 /* 536 * Find out what tables I want to select. 537 */ 538 SLIST_FOREACH(table_en, tbl, next) { 539 /* 540 * I need need number of bytes not blocks. 541 */ 542 table_start = table_en->start * DEV_BSIZE; 543 table_end = table_start + (table_en->length) * DEV_BSIZE; 544 545 /* 546 * Calculate the start and end 547 */ 548 start = MAX(table_start, buf_start); 549 end = MIN(table_end, buf_start + buf_len); 550 551 if (ap->a_length == 0) { 552 if (table_en->target->dump == NULL) { 553 error = ENXIO; 554 goto out; 555 } 556 557 table_en->target->dump(table_en, NULL, 0, 0); 558 } else if (start < end) { 559 data_offset = start - buf_start; 560 offset = start - table_start; 561 length = end - start; 562 563 if (table_en->target->dump == NULL) { 564 error = ENXIO; 565 goto out; 566 } 567 568 table_en->target->dump(table_en, 569 (char *)ap->a_virtual + data_offset, 570 length, offset); 571 572 issued_len += end - start; 573 } 574 } 575 576 if (issued_len < buf_len) 577 error = EINVAL; 578 579 out: 580 dm_table_release(&dmv->table_head, DM_TABLE_ACTIVE); 581 dm_dev_unbusy(dmv); 582 583 return error; 584 } 585 586 static int 587 dmsize(struct dev_psize_args *ap) 588 { 589 cdev_t dev = ap->a_head.a_dev; 590 dm_dev_t *dmv; 591 uint64_t size; 592 593 size = 0; 594 595 if ((dmv = dm_dev_lookup(NULL, NULL, minor(dev))) == NULL) 596 return ENOENT; 597 598 size = dm_table_size(&dmv->table_head); 599 dm_dev_unbusy(dmv); 600 601 ap->a_result = (int64_t)size; 602 603 return 0; 604 } 605 606 #if 0 607 static void 608 dmminphys(struct buf *bp) 609 { 610 611 bp->b_bcount = MIN(bp->b_bcount, MAXPHYS); 612 } 613 #endif 614 615 void 616 dmsetdiskinfo(struct disk *disk, dm_table_head_t *head) 617 { 618 struct disk_info info; 619 int dmp_size; 620 621 dmp_size = dm_table_size(head); 622 623 bzero(&info, sizeof(struct disk_info)); 624 info.d_media_blksize = DEV_BSIZE; 625 info.d_media_blocks = dmp_size; 626 info.d_media_size = dmp_size * DEV_BSIZE; 627 info.d_dsflags = DSO_MBRQUIET; /* XXX */ 628 info.d_secpertrack = 32; 629 info.d_nheads = 64; 630 info.d_secpercyl = info.d_secpertrack * info.d_nheads; 631 info.d_ncylinders = dmp_size / 2048; 632 bcopy(&info, &disk->d_info, sizeof(disk->d_info)); 633 } 634 635 prop_dictionary_t 636 dmgetdiskinfo(struct disk *disk) 637 { 638 prop_dictionary_t disk_info, geom; 639 struct disk_info *pinfo; 640 641 pinfo = &disk->d_info; 642 643 disk_info = prop_dictionary_create(); 644 geom = prop_dictionary_create(); 645 646 prop_dictionary_set_cstring_nocopy(disk_info, "type", "ESDI"); 647 prop_dictionary_set_uint64(geom, "sectors-per-unit", pinfo->d_media_blocks); 648 prop_dictionary_set_uint32(geom, "sector-size", 649 DEV_BSIZE /* XXX 512? */); 650 prop_dictionary_set_uint32(geom, "sectors-per-track", 32); 651 prop_dictionary_set_uint32(geom, "tracks-per-cylinder", 64); 652 prop_dictionary_set_uint32(geom, "cylinders-per-unit", 653 pinfo->d_media_blocks / 2048); 654 prop_dictionary_set(disk_info, "geometry", geom); 655 prop_object_release(geom); 656 657 return disk_info; 658 } 659 660 void 661 dmgetproperties(struct disk *disk, dm_table_head_t *head) 662 { 663 #if 0 664 prop_dictionary_t disk_info, odisk_info, geom; 665 int dmp_size; 666 667 dmp_size = dm_table_size(head); 668 disk_info = prop_dictionary_create(); 669 geom = prop_dictionary_create(); 670 671 prop_dictionary_set_cstring_nocopy(disk_info, "type", "ESDI"); 672 prop_dictionary_set_uint64(geom, "sectors-per-unit", dmp_size); 673 prop_dictionary_set_uint32(geom, "sector-size", 674 DEV_BSIZE /* XXX 512? */); 675 prop_dictionary_set_uint32(geom, "sectors-per-track", 32); 676 prop_dictionary_set_uint32(geom, "tracks-per-cylinder", 64); 677 prop_dictionary_set_uint32(geom, "cylinders-per-unit", dmp_size / 2048); 678 prop_dictionary_set(disk_info, "geometry", geom); 679 prop_object_release(geom); 680 681 odisk_info = disk->dk_info; 682 disk->dk_info = disk_info; 683 684 if (odisk_info != NULL) 685 prop_object_release(odisk_info); 686 #endif 687 } 688 689 TUNABLE_INT("debug.dm_debug", &dm_debug_level); 690 SYSCTL_INT(_debug, OID_AUTO, dm_debug, CTLFLAG_RW, &dm_debug_level, 691 0, "Eanble device mapper debugging"); 692 693