1 /* $NetBSD: disk-rep.c,v 1.1.1.1 2008/12/22 00:17:59 haad Exp $ */ 2 3 /* 4 * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. 5 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 6 * 7 * This file is part of LVM2. 8 * 9 * This copyrighted material is made available to anyone wishing to use, 10 * modify, copy, or redistribute it subject to the terms and conditions 11 * of the GNU Lesser General Public License v.2.1. 12 * 13 * You should have received a copy of the GNU Lesser General Public License 14 * along with this program; if not, write to the Free Software Foundation, 15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 */ 17 18 #include "lib.h" 19 #include "disk-rep.h" 20 #include "xlate.h" 21 #include "filter.h" 22 #include "lvmcache.h" 23 24 #include <fcntl.h> 25 26 #define xx16(v) disk->v = xlate16(disk->v) 27 #define xx32(v) disk->v = xlate32(disk->v) 28 #define xx64(v) disk->v = xlate64(disk->v) 29 30 /* 31 * Functions to perform the endian conversion 32 * between disk and core. The same code works 33 * both ways of course. 34 */ 35 static void _xlate_pvd(struct pv_disk *disk) 36 { 37 xx16(version); 38 39 xx32(pv_on_disk.base); 40 xx32(pv_on_disk.size); 41 xx32(vg_on_disk.base); 42 xx32(vg_on_disk.size); 43 xx32(pv_uuidlist_on_disk.base); 44 xx32(pv_uuidlist_on_disk.size); 45 xx32(lv_on_disk.base); 46 xx32(lv_on_disk.size); 47 xx32(pe_on_disk.base); 48 xx32(pe_on_disk.size); 49 50 xx32(pv_major); 51 xx32(pv_number); 52 xx32(pv_status); 53 xx32(pv_allocatable); 54 xx32(pv_size); 55 xx32(lv_cur); 56 xx32(pe_size); 57 xx32(pe_total); 58 xx32(pe_allocated); 59 xx32(pe_start); 60 } 61 62 static void _xlate_lvd(struct lv_disk *disk) 63 { 64 xx32(lv_access); 65 xx32(lv_status); 66 xx32(lv_open); 67 xx32(lv_dev); 68 xx32(lv_number); 69 xx32(lv_mirror_copies); 70 xx32(lv_recovery); 71 xx32(lv_schedule); 72 xx32(lv_size); 73 xx32(lv_snapshot_minor); 74 xx16(lv_chunk_size); 75 xx16(dummy); 76 xx32(lv_allocated_le); 77 xx32(lv_stripes); 78 xx32(lv_stripesize); 79 xx32(lv_badblock); 80 xx32(lv_allocation); 81 xx32(lv_io_timeout); 82 xx32(lv_read_ahead); 83 } 84 85 static void _xlate_vgd(struct vg_disk *disk) 86 { 87 xx32(vg_number); 88 xx32(vg_access); 89 xx32(vg_status); 90 xx32(lv_max); 91 xx32(lv_cur); 92 xx32(lv_open); 93 xx32(pv_max); 94 xx32(pv_cur); 95 xx32(pv_act); 96 xx32(dummy); 97 xx32(vgda); 98 xx32(pe_size); 99 xx32(pe_total); 100 xx32(pe_allocated); 101 xx32(pvg_total); 102 } 103 104 static void _xlate_extents(struct pe_disk *extents, uint32_t count) 105 { 106 unsigned i; 107 108 for (i = 0; i < count; i++) { 109 extents[i].lv_num = xlate16(extents[i].lv_num); 110 extents[i].le_num = xlate16(extents[i].le_num); 111 } 112 } 113 114 /* 115 * Handle both minor metadata formats. 116 */ 117 static int _munge_formats(struct pv_disk *pvd) 118 { 119 uint32_t pe_start; 120 unsigned b, e; 121 122 switch (pvd->version) { 123 case 1: 124 pvd->pe_start = ((pvd->pe_on_disk.base + 125 pvd->pe_on_disk.size) >> SECTOR_SHIFT); 126 break; 127 128 case 2: 129 pvd->version = 1; 130 pe_start = pvd->pe_start << SECTOR_SHIFT; 131 pvd->pe_on_disk.size = pe_start - pvd->pe_on_disk.base; 132 break; 133 134 default: 135 return 0; 136 } 137 138 /* UUID too long? */ 139 if (pvd->pv_uuid[ID_LEN]) { 140 /* Retain ID_LEN chars from end */ 141 for (e = ID_LEN; e < sizeof(pvd->pv_uuid); e++) { 142 if (!pvd->pv_uuid[e]) { 143 e--; 144 break; 145 } 146 } 147 for (b = 0; b < ID_LEN; b++) { 148 pvd->pv_uuid[b] = pvd->pv_uuid[++e - ID_LEN]; 149 /* FIXME Remove all invalid chars */ 150 if (pvd->pv_uuid[b] == '/') 151 pvd->pv_uuid[b] = '#'; 152 } 153 memset(&pvd->pv_uuid[ID_LEN], 0, sizeof(pvd->pv_uuid) - ID_LEN); 154 } 155 156 /* If UUID is missing, create one */ 157 if (pvd->pv_uuid[0] == '\0') { 158 uuid_from_num((char *)pvd->pv_uuid, pvd->pv_number); 159 pvd->pv_uuid[ID_LEN] = '\0'; 160 } 161 162 return 1; 163 } 164 165 /* 166 * If exported, remove "PV_EXP" from end of VG name 167 */ 168 static void _munge_exported_vg(struct pv_disk *pvd) 169 { 170 int l; 171 size_t s; 172 173 /* Return if PV not in a VG */ 174 if ((!*pvd->vg_name)) 175 return; 176 /* FIXME also check vgd->status & VG_EXPORTED? */ 177 178 l = strlen((char *)pvd->vg_name); 179 s = sizeof(EXPORTED_TAG); 180 if (!strncmp((char *)pvd->vg_name + l - s + 1, EXPORTED_TAG, s)) { 181 pvd->vg_name[l - s + 1] = '\0'; 182 pvd->pv_status |= VG_EXPORTED; 183 } 184 } 185 186 int munge_pvd(struct device *dev, struct pv_disk *pvd) 187 { 188 _xlate_pvd(pvd); 189 190 if (pvd->id[0] != 'H' || pvd->id[1] != 'M') { 191 log_very_verbose("%s does not have a valid LVM1 PV identifier", 192 dev_name(dev)); 193 return 0; 194 } 195 196 if (!_munge_formats(pvd)) { 197 log_very_verbose("format1: Unknown metadata version %d " 198 "found on %s", pvd->version, dev_name(dev)); 199 return 0; 200 } 201 202 /* If VG is exported, set VG name back to the real name */ 203 _munge_exported_vg(pvd); 204 205 return 1; 206 } 207 208 static int _read_pvd(struct device *dev, struct pv_disk *pvd) 209 { 210 if (!dev_read(dev, UINT64_C(0), sizeof(*pvd), pvd)) { 211 log_very_verbose("Failed to read PV data from %s", 212 dev_name(dev)); 213 return 0; 214 } 215 216 return munge_pvd(dev, pvd); 217 } 218 219 static int _read_lvd(struct device *dev, uint64_t pos, struct lv_disk *disk) 220 { 221 if (!dev_read(dev, pos, sizeof(*disk), disk)) 222 return_0; 223 224 _xlate_lvd(disk); 225 226 return 1; 227 } 228 229 int read_vgd(struct device *dev, struct vg_disk *vgd, struct pv_disk *pvd) 230 { 231 uint64_t pos = pvd->vg_on_disk.base; 232 233 if (!dev_read(dev, pos, sizeof(*vgd), vgd)) 234 return_0; 235 236 _xlate_vgd(vgd); 237 238 if ((vgd->lv_max > MAX_LV) || (vgd->pv_max > MAX_PV)) 239 return_0; 240 241 /* If UUID is missing, create one */ 242 if (vgd->vg_uuid[0] == '\0') 243 uuid_from_num((char *)vgd->vg_uuid, vgd->vg_number); 244 245 return 1; 246 } 247 248 static int _read_uuids(struct disk_list *data) 249 { 250 unsigned num_read = 0; 251 struct uuid_list *ul; 252 char buffer[NAME_LEN] __attribute((aligned(8))); 253 uint64_t pos = data->pvd.pv_uuidlist_on_disk.base; 254 uint64_t end = pos + data->pvd.pv_uuidlist_on_disk.size; 255 256 while (pos < end && num_read < data->vgd.pv_cur) { 257 if (!dev_read(data->dev, pos, sizeof(buffer), buffer)) 258 return_0; 259 260 if (!(ul = dm_pool_alloc(data->mem, sizeof(*ul)))) 261 return_0; 262 263 memcpy(ul->uuid, buffer, NAME_LEN); 264 ul->uuid[NAME_LEN - 1] = '\0'; 265 266 dm_list_add(&data->uuids, &ul->list); 267 268 pos += NAME_LEN; 269 num_read++; 270 } 271 272 return 1; 273 } 274 275 static int _check_lvd(struct lv_disk *lvd) 276 { 277 return !(lvd->lv_name[0] == '\0'); 278 } 279 280 static int _read_lvs(struct disk_list *data) 281 { 282 unsigned int i, lvs_read = 0; 283 uint64_t pos; 284 struct lvd_list *ll; 285 struct vg_disk *vgd = &data->vgd; 286 287 for (i = 0; (i < vgd->lv_max) && (lvs_read < vgd->lv_cur); i++) { 288 pos = data->pvd.lv_on_disk.base + (i * sizeof(struct lv_disk)); 289 ll = dm_pool_alloc(data->mem, sizeof(*ll)); 290 291 if (!ll) 292 return_0; 293 294 if (!_read_lvd(data->dev, pos, &ll->lvd)) 295 return_0; 296 297 if (!_check_lvd(&ll->lvd)) 298 continue; 299 300 lvs_read++; 301 dm_list_add(&data->lvds, &ll->list); 302 } 303 304 return 1; 305 } 306 307 static int _read_extents(struct disk_list *data) 308 { 309 size_t len = sizeof(struct pe_disk) * data->pvd.pe_total; 310 struct pe_disk *extents = dm_pool_alloc(data->mem, len); 311 uint64_t pos = data->pvd.pe_on_disk.base; 312 313 if (!extents) 314 return_0; 315 316 if (!dev_read(data->dev, pos, len, extents)) 317 return_0; 318 319 _xlate_extents(extents, data->pvd.pe_total); 320 data->extents = extents; 321 322 return 1; 323 } 324 325 static void __update_lvmcache(const struct format_type *fmt, 326 struct disk_list *dl, 327 struct device *dev, const char *vgid, 328 unsigned exported) 329 { 330 struct lvmcache_info *info; 331 const char *vgname = *((char *)dl->pvd.vg_name) ? 332 (char *)dl->pvd.vg_name : fmt->orphan_vg_name; 333 334 if (!(info = lvmcache_add(fmt->labeller, (char *)dl->pvd.pv_uuid, dev, 335 vgname, vgid, exported ? EXPORTED_VG : 0))) { 336 stack; 337 return; 338 } 339 340 info->device_size = xlate32(dl->pvd.pv_size) << SECTOR_SHIFT; 341 dm_list_init(&info->mdas); 342 info->status &= ~CACHE_INVALID; 343 } 344 345 static struct disk_list *__read_disk(const struct format_type *fmt, 346 struct device *dev, struct dm_pool *mem, 347 const char *vg_name) 348 { 349 struct disk_list *dl = dm_pool_zalloc(mem, sizeof(*dl)); 350 const char *name = dev_name(dev); 351 352 if (!dl) 353 return_NULL; 354 355 dl->dev = dev; 356 dl->mem = mem; 357 dm_list_init(&dl->uuids); 358 dm_list_init(&dl->lvds); 359 360 if (!_read_pvd(dev, &dl->pvd)) 361 goto_bad; 362 363 /* 364 * is it an orphan ? 365 */ 366 if (!*dl->pvd.vg_name) { 367 log_very_verbose("%s is not a member of any format1 VG", name); 368 369 __update_lvmcache(fmt, dl, dev, fmt->orphan_vg_name, 0); 370 return (vg_name) ? NULL : dl; 371 } 372 373 if (!read_vgd(dl->dev, &dl->vgd, &dl->pvd)) { 374 log_error("Failed to read VG data from PV (%s)", name); 375 __update_lvmcache(fmt, dl, dev, fmt->orphan_vg_name, 0); 376 goto bad; 377 } 378 379 if (vg_name && strcmp(vg_name, (char *)dl->pvd.vg_name)) { 380 log_very_verbose("%s is not a member of the VG %s", 381 name, vg_name); 382 __update_lvmcache(fmt, dl, dev, fmt->orphan_vg_name, 0); 383 goto bad; 384 } 385 386 __update_lvmcache(fmt, dl, dev, (char *)dl->vgd.vg_uuid, 387 dl->vgd.vg_status & VG_EXPORTED); 388 389 if (!_read_uuids(dl)) { 390 log_error("Failed to read PV uuid list from %s", name); 391 goto bad; 392 } 393 394 if (!_read_lvs(dl)) { 395 log_error("Failed to read LV's from %s", name); 396 goto bad; 397 } 398 399 if (!_read_extents(dl)) { 400 log_error("Failed to read extents from %s", name); 401 goto bad; 402 } 403 404 log_very_verbose("Found %s in %sVG %s", name, 405 (dl->vgd.vg_status & VG_EXPORTED) ? "exported " : "", 406 dl->pvd.vg_name); 407 408 return dl; 409 410 bad: 411 dm_pool_free(dl->mem, dl); 412 return NULL; 413 } 414 415 struct disk_list *read_disk(const struct format_type *fmt, struct device *dev, 416 struct dm_pool *mem, const char *vg_name) 417 { 418 struct disk_list *dl; 419 420 if (!dev_open(dev)) 421 return_NULL; 422 423 dl = __read_disk(fmt, dev, mem, vg_name); 424 425 if (!dev_close(dev)) 426 stack; 427 428 return dl; 429 } 430 431 static void _add_pv_to_list(struct dm_list *head, struct disk_list *data) 432 { 433 struct pv_disk *pvd; 434 struct disk_list *diskl; 435 436 dm_list_iterate_items(diskl, head) { 437 pvd = &diskl->pvd; 438 if (!strncmp((char *)data->pvd.pv_uuid, (char *)pvd->pv_uuid, 439 sizeof(pvd->pv_uuid))) { 440 if (MAJOR(data->dev->dev) != md_major()) { 441 log_very_verbose("Ignoring duplicate PV %s on " 442 "%s", pvd->pv_uuid, 443 dev_name(data->dev)); 444 return; 445 } 446 log_very_verbose("Duplicate PV %s - using md %s", 447 pvd->pv_uuid, dev_name(data->dev)); 448 dm_list_del(&diskl->list); 449 break; 450 } 451 } 452 dm_list_add(head, &data->list); 453 } 454 455 /* 456 * Build a list of pv_d's structures, allocated from mem. 457 * We keep track of the first object allocated from the pool 458 * so we can free off all the memory if something goes wrong. 459 */ 460 int read_pvs_in_vg(const struct format_type *fmt, const char *vg_name, 461 struct dev_filter *filter, struct dm_pool *mem, 462 struct dm_list *head) 463 { 464 struct dev_iter *iter; 465 struct device *dev; 466 struct disk_list *data = NULL; 467 struct lvmcache_vginfo *vginfo; 468 struct lvmcache_info *info; 469 470 /* Fast path if we already saw this VG and cached the list of PVs */ 471 if (vg_name && (vginfo = vginfo_from_vgname(vg_name, NULL)) && 472 vginfo->infos.n) { 473 dm_list_iterate_items(info, &vginfo->infos) { 474 dev = info->dev; 475 if (dev && !(data = read_disk(fmt, dev, mem, vg_name))) 476 break; 477 _add_pv_to_list(head, data); 478 } 479 480 /* Did we find the whole VG? */ 481 if (!vg_name || is_orphan_vg(vg_name) || 482 (data && *data->pvd.vg_name && 483 dm_list_size(head) == data->vgd.pv_cur)) 484 return 1; 485 486 /* Failed */ 487 dm_list_init(head); 488 /* vgcache_del(vg_name); */ 489 } 490 491 if (!(iter = dev_iter_create(filter, 1))) { 492 log_error("read_pvs_in_vg: dev_iter_create failed"); 493 return 0; 494 } 495 496 /* Otherwise do a complete scan */ 497 for (dev = dev_iter_get(iter); dev; dev = dev_iter_get(iter)) { 498 if ((data = read_disk(fmt, dev, mem, vg_name))) { 499 _add_pv_to_list(head, data); 500 } 501 } 502 dev_iter_destroy(iter); 503 504 if (dm_list_empty(head)) 505 return 0; 506 507 return 1; 508 } 509 510 static int _write_vgd(struct disk_list *data) 511 { 512 struct vg_disk *vgd = &data->vgd; 513 uint64_t pos = data->pvd.vg_on_disk.base; 514 515 log_debug("Writing %s VG metadata to %s at %" PRIu64 " len %" PRIsize_t, 516 data->pvd.vg_name, dev_name(data->dev), pos, sizeof(*vgd)); 517 518 _xlate_vgd(vgd); 519 if (!dev_write(data->dev, pos, sizeof(*vgd), vgd)) 520 return_0; 521 522 _xlate_vgd(vgd); 523 524 return 1; 525 } 526 527 static int _write_uuids(struct disk_list *data) 528 { 529 struct uuid_list *ul; 530 uint64_t pos = data->pvd.pv_uuidlist_on_disk.base; 531 uint64_t end = pos + data->pvd.pv_uuidlist_on_disk.size; 532 533 dm_list_iterate_items(ul, &data->uuids) { 534 if (pos >= end) { 535 log_error("Too many uuids to fit on %s", 536 dev_name(data->dev)); 537 return 0; 538 } 539 540 log_debug("Writing %s uuidlist to %s at %" PRIu64 " len %d", 541 data->pvd.vg_name, dev_name(data->dev), 542 pos, NAME_LEN); 543 544 if (!dev_write(data->dev, pos, NAME_LEN, ul->uuid)) 545 return_0; 546 547 pos += NAME_LEN; 548 } 549 550 return 1; 551 } 552 553 static int _write_lvd(struct device *dev, uint64_t pos, struct lv_disk *disk) 554 { 555 log_debug("Writing %s LV %s metadata to %s at %" PRIu64 " len %" 556 PRIsize_t, disk->vg_name, disk->lv_name, dev_name(dev), 557 pos, sizeof(*disk)); 558 559 _xlate_lvd(disk); 560 if (!dev_write(dev, pos, sizeof(*disk), disk)) 561 return_0; 562 563 _xlate_lvd(disk); 564 565 return 1; 566 } 567 568 static int _write_lvs(struct disk_list *data) 569 { 570 struct lvd_list *ll; 571 uint64_t pos, offset; 572 573 pos = data->pvd.lv_on_disk.base; 574 575 if (!dev_set(data->dev, pos, data->pvd.lv_on_disk.size, 0)) { 576 log_error("Couldn't zero lv area on device '%s'", 577 dev_name(data->dev)); 578 return 0; 579 } 580 581 dm_list_iterate_items(ll, &data->lvds) { 582 offset = sizeof(struct lv_disk) * ll->lvd.lv_number; 583 if (offset + sizeof(struct lv_disk) > data->pvd.lv_on_disk.size) { 584 log_error("lv_number %d too large", ll->lvd.lv_number); 585 return 0; 586 } 587 588 if (!_write_lvd(data->dev, pos + offset, &ll->lvd)) 589 return_0; 590 } 591 592 return 1; 593 } 594 595 static int _write_extents(struct disk_list *data) 596 { 597 size_t len = sizeof(struct pe_disk) * data->pvd.pe_total; 598 struct pe_disk *extents = data->extents; 599 uint64_t pos = data->pvd.pe_on_disk.base; 600 601 log_debug("Writing %s extents metadata to %s at %" PRIu64 " len %" 602 PRIsize_t, data->pvd.vg_name, dev_name(data->dev), 603 pos, len); 604 605 _xlate_extents(extents, data->pvd.pe_total); 606 if (!dev_write(data->dev, pos, len, extents)) 607 return_0; 608 609 _xlate_extents(extents, data->pvd.pe_total); 610 611 return 1; 612 } 613 614 static int _write_pvd(struct disk_list *data) 615 { 616 char *buf; 617 uint64_t pos = data->pvd.pv_on_disk.base; 618 size_t size = data->pvd.pv_on_disk.size; 619 620 if (size < sizeof(struct pv_disk)) { 621 log_error("Invalid PV structure size."); 622 return 0; 623 } 624 625 /* Make sure that the gap between the PV structure and 626 the next one is zeroed in order to make non LVM tools 627 happy (idea from AED) */ 628 buf = dm_malloc(size); 629 if (!buf) { 630 log_err("Couldn't allocate temporary PV buffer."); 631 return 0; 632 } 633 634 memset(buf, 0, size); 635 memcpy(buf, &data->pvd, sizeof(struct pv_disk)); 636 637 log_debug("Writing %s PV metadata to %s at %" PRIu64 " len %" 638 PRIsize_t, data->pvd.vg_name, dev_name(data->dev), 639 pos, size); 640 641 _xlate_pvd((struct pv_disk *) buf); 642 if (!dev_write(data->dev, pos, size, buf)) { 643 dm_free(buf); 644 return_0; 645 } 646 647 dm_free(buf); 648 return 1; 649 } 650 651 /* 652 * assumes the device has been opened. 653 */ 654 static int __write_all_pvd(const struct format_type *fmt __attribute((unused)), 655 struct disk_list *data) 656 { 657 const char *pv_name = dev_name(data->dev); 658 659 if (!_write_pvd(data)) { 660 log_error("Failed to write PV structure onto %s", pv_name); 661 return 0; 662 } 663 664 /* vgcache_add(data->pvd.vg_name, data->vgd.vg_uuid, data->dev, fmt); */ 665 /* 666 * Stop here for orphan pv's. 667 */ 668 if (data->pvd.vg_name[0] == '\0') { 669 /* if (!test_mode()) 670 vgcache_add(data->pvd.vg_name, NULL, data->dev, fmt); */ 671 return 1; 672 } 673 674 /* if (!test_mode()) 675 vgcache_add(data->pvd.vg_name, data->vgd.vg_uuid, data->dev, 676 fmt); */ 677 678 if (!_write_vgd(data)) { 679 log_error("Failed to write VG data to %s", pv_name); 680 return 0; 681 } 682 683 if (!_write_uuids(data)) { 684 log_error("Failed to write PV uuid list to %s", pv_name); 685 return 0; 686 } 687 688 if (!_write_lvs(data)) { 689 log_error("Failed to write LV's to %s", pv_name); 690 return 0; 691 } 692 693 if (!_write_extents(data)) { 694 log_error("Failed to write extents to %s", pv_name); 695 return 0; 696 } 697 698 return 1; 699 } 700 701 /* 702 * opens the device and hands to the above fn. 703 */ 704 static int _write_all_pvd(const struct format_type *fmt, struct disk_list *data) 705 { 706 int r; 707 708 if (!dev_open(data->dev)) 709 return_0; 710 711 r = __write_all_pvd(fmt, data); 712 713 if (!dev_close(data->dev)) 714 stack; 715 716 return r; 717 } 718 719 /* 720 * Writes all the given pv's to disk. Does very 721 * little sanity checking, so make sure correct 722 * data is passed to here. 723 */ 724 int write_disks(const struct format_type *fmt, struct dm_list *pvs) 725 { 726 struct disk_list *dl; 727 728 dm_list_iterate_items(dl, pvs) { 729 if (!(_write_all_pvd(fmt, dl))) 730 return_0; 731 732 log_very_verbose("Successfully wrote data to %s", 733 dev_name(dl->dev)); 734 } 735 736 return 1; 737 } 738