1 /* 2 * Copyright (c) 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer.h" 36 37 struct recover_dict { 38 struct recover_dict *next; 39 struct recover_dict *parent; 40 int64_t obj_id; 41 uint8_t obj_type; 42 uint8_t flags; 43 uint16_t pfs_id; 44 int64_t size; 45 char *name; 46 }; 47 48 #define DICTF_MADEDIR 0x01 49 #define DICTF_MADEFILE 0x02 50 #define DICTF_PARENT 0x04 /* parent attached for real */ 51 #define DICTF_TRAVERSED 0x80 52 53 static void recover_top(char *ptr, hammer_off_t offset); 54 static void recover_elm(hammer_btree_leaf_elm_t leaf); 55 static struct recover_dict *get_dict(int64_t obj_id, uint16_t pfs_id); 56 static char *recover_path(struct recover_dict *dict); 57 static void sanitize_string(char *str); 58 59 static const char *TargetDir; 60 static int CachedFd = -1; 61 static char *CachedPath; 62 63 /* 64 * XXX There is a hidden bug here while iterating zone-2 offset as 65 * shown in an example below. 66 * 67 * If a volume was once used as HAMMER filesystem which consists of 68 * multiple volumes whose usage has reached beyond the first volume, 69 * and then later re-formatted only using 1 volume, hammer recover is 70 * likely to hit assertion in get_buffer() due to having access to 71 * invalid volume (vol1,2,...) from old filesystem data. 72 * 73 * |-----vol0-----|-----vol1-----|-----vol2-----| old filesystem 74 * <-----------------------> used by old filesystem 75 * 76 * |-----vol0-----| new filesystem 77 * <-----> used by new filesystem 78 * <-------> unused, invalid data from old filesystem 79 * <-> B-Tree nodes likely to point to vol1 80 */ 81 82 void 83 hammer_cmd_recover(const char *target_dir) 84 { 85 struct buffer_info *data_buffer; 86 struct volume_info *volume; 87 hammer_off_t off; 88 hammer_off_t off_end; 89 char *ptr; 90 int i; 91 92 TargetDir = target_dir; 93 94 if (mkdir(TargetDir, 0777) == -1) { 95 if (errno != EEXIST) { 96 perror("mkdir"); 97 exit(1); 98 } 99 } 100 101 printf("Running raw scan of HAMMER image, recovering to %s\n", 102 TargetDir); 103 104 data_buffer = NULL; 105 for (i = 0; i < HAMMER_MAX_VOLUMES; i++) { 106 volume = get_volume(i); 107 if (volume == NULL) 108 continue; 109 printf("Scanning volume %d size %s\n", 110 volume->vol_no, sizetostr(volume->size)); 111 off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 112 off_end = off + HAMMER_VOL_BUF_SIZE(volume->ondisk); 113 114 /* 115 * It should somehow implement reverse mapping from 116 * zone-2 to zone-X, so the command doesn't just try 117 * every zone-2 offset assuming it's a B-Tree node. 118 * Since we know most big-blocks are not for zone-8, 119 * we don't want to spend extra I/O for other zones 120 * as well as possible misinterpretation of nodes. 121 */ 122 while (off < off_end) { 123 ptr = get_buffer_data(off, &data_buffer, 0); 124 if (ptr) 125 recover_top(ptr, off); 126 off += HAMMER_BUFSIZE; 127 } 128 } 129 rel_buffer(data_buffer); 130 131 if (CachedPath) { 132 free(CachedPath); 133 close(CachedFd); 134 CachedPath = NULL; 135 CachedFd = -1; 136 } 137 } 138 139 /* 140 * Top level recovery processor. Assume the data is a B-Tree node. 141 * If the CRC is good we attempt to process the node, building the 142 * object space and creating the dictionary as we go. 143 */ 144 static void 145 recover_top(char *ptr, hammer_off_t offset) 146 { 147 hammer_node_ondisk_t node; 148 hammer_btree_elm_t elm; 149 int maxcount; 150 int i; 151 int isnode; 152 char buf[HAMMER_BTREE_LEAF_ELMS + 1]; 153 154 for (node = (void *)ptr; (char *)node < ptr + HAMMER_BUFSIZE; ++node) { 155 isnode = hammer_crc_test_btree(node); 156 maxcount = hammer_node_max_elements(node->type); 157 158 if (DebugOpt) { 159 for (i = 0; i < node->count && i < maxcount; ++i) 160 buf[i] = hammer_elm_btype(&node->elms[i]); 161 buf[i] = '\0'; 162 if (!isnode && DebugOpt > 1) 163 printf("%016jx -\n", offset); 164 if (isnode) 165 printf("%016jx %c %d %s\n", 166 offset, node->type, node->count, buf); 167 } 168 offset += sizeof(*node); 169 170 if (isnode && node->type == HAMMER_BTREE_TYPE_LEAF) { 171 for (i = 0; i < node->count && i < maxcount; ++i) { 172 elm = &node->elms[i]; 173 if (elm->base.btype != HAMMER_BTREE_TYPE_RECORD) 174 continue; 175 recover_elm(&elm->leaf); 176 } 177 } 178 } 179 } 180 181 static void 182 recover_elm(hammer_btree_leaf_elm_t leaf) 183 { 184 struct buffer_info *data_buffer = NULL; 185 struct recover_dict *dict; 186 struct recover_dict *dict2; 187 hammer_data_ondisk_t ondisk; 188 hammer_off_t data_offset; 189 struct stat st; 190 int chunk; 191 int len; 192 int zfill; 193 int64_t file_offset; 194 uint16_t pfs_id; 195 size_t nlen; 196 int fd; 197 char *name; 198 char *path1; 199 char *path2; 200 201 /* 202 * Ignore deleted records 203 */ 204 if (leaf->delete_ts) 205 return; 206 if ((data_offset = leaf->data_offset) != 0) 207 ondisk = get_buffer_data(data_offset, &data_buffer, 0); 208 else 209 ondisk = NULL; 210 if (ondisk == NULL) 211 goto done; 212 213 len = leaf->data_len; 214 chunk = HAMMER_BUFSIZE - ((int)data_offset & HAMMER_BUFMASK); 215 if (chunk > len) 216 chunk = len; 217 218 if (len < 0 || len > HAMMER_XBUFSIZE || len > chunk) 219 goto done; 220 221 pfs_id = lo_to_pfs(leaf->base.localization); 222 223 dict = get_dict(leaf->base.obj_id, pfs_id); 224 225 switch(leaf->base.rec_type) { 226 case HAMMER_RECTYPE_INODE: 227 /* 228 * We found an inode which also tells us where the file 229 * or directory is in the directory hierarchy. 230 */ 231 if (VerboseOpt) { 232 printf("file %016jx:%05d inode found\n", 233 (uintmax_t)leaf->base.obj_id, pfs_id); 234 } 235 path1 = recover_path(dict); 236 237 /* 238 * Attach the inode to its parent. This isn't strictly 239 * necessary because the information is also in the 240 * directory entries, but if we do not find the directory 241 * entry this ensures that the files will still be 242 * reasonably well organized in their proper directories. 243 */ 244 if ((dict->flags & DICTF_PARENT) == 0 && 245 dict->obj_id != HAMMER_OBJID_ROOT && 246 ondisk->inode.parent_obj_id != 0) { 247 dict->flags |= DICTF_PARENT; 248 dict->parent = get_dict(ondisk->inode.parent_obj_id, 249 pfs_id); 250 if (dict->parent && 251 (dict->parent->flags & DICTF_MADEDIR) == 0) { 252 dict->parent->flags |= DICTF_MADEDIR; 253 path2 = recover_path(dict->parent); 254 printf("mkdir %s\n", path2); 255 mkdir(path2, 0777); 256 free(path2); 257 path2 = NULL; 258 } 259 } 260 if (dict->obj_type == 0) 261 dict->obj_type = ondisk->inode.obj_type; 262 dict->size = ondisk->inode.size; 263 path2 = recover_path(dict); 264 265 if (lstat(path1, &st) == 0) { 266 if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) { 267 truncate(path1, dict->size); 268 /* chmod(path1, 0666); */ 269 } 270 if (strcmp(path1, path2)) { 271 printf("Rename %s -> %s\n", path1, path2); 272 rename(path1, path2); 273 } 274 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) { 275 printf("mkinode (file) %s\n", path2); 276 fd = open(path2, O_RDWR|O_CREAT, 0666); 277 if (fd > 0) 278 close(fd); 279 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_DIRECTORY) { 280 printf("mkinode (dir) %s\n", path2); 281 mkdir(path2, 0777); 282 dict->flags |= DICTF_MADEDIR; 283 } 284 free(path1); 285 free(path2); 286 break; 287 case HAMMER_RECTYPE_DATA: 288 /* 289 * File record data 290 */ 291 if (leaf->base.obj_id == 0) 292 break; 293 if (VerboseOpt) { 294 printf("file %016jx:%05d data %016jx,%d\n", 295 (uintmax_t)leaf->base.obj_id, 296 pfs_id, 297 (uintmax_t)leaf->base.key - len, 298 len); 299 } 300 301 /* 302 * Update the dictionary entry 303 */ 304 if (dict->obj_type == 0) 305 dict->obj_type = HAMMER_OBJTYPE_REGFILE; 306 307 /* 308 * If the parent directory has not been created we 309 * have to create it (typically a PFS%05d) 310 */ 311 if (dict->parent && 312 (dict->parent->flags & DICTF_MADEDIR) == 0) { 313 dict->parent->flags |= DICTF_MADEDIR; 314 path2 = recover_path(dict->parent); 315 printf("mkdir %s\n", path2); 316 mkdir(path2, 0777); 317 free(path2); 318 path2 = NULL; 319 } 320 321 /* 322 * Create the file if necessary, report file creations 323 */ 324 path1 = recover_path(dict); 325 if (CachedPath && strcmp(CachedPath, path1) == 0) { 326 fd = CachedFd; 327 } else { 328 fd = open(path1, O_CREAT|O_RDWR, 0666); 329 } 330 if (fd < 0) { 331 printf("Unable to create %s: %s\n", 332 path1, strerror(errno)); 333 free(path1); 334 break; 335 } 336 if ((dict->flags & DICTF_MADEFILE) == 0) { 337 dict->flags |= DICTF_MADEFILE; 338 printf("mkfile %s\n", path1); 339 } 340 341 /* 342 * And write the record. A HAMMER data block is aligned 343 * and may contain trailing zeros after the file EOF. The 344 * inode record is required to get the actual file size. 345 * 346 * However, when the inode record is not available 347 * we can do a sparse write and that will get it right 348 * most of the time even if the inode record is never 349 * found. 350 */ 351 file_offset = (int64_t)leaf->base.key - len; 352 lseek(fd, (off_t)file_offset, SEEK_SET); 353 while (len) { 354 if (dict->size == -1) { 355 for (zfill = chunk - 1; zfill >= 0; --zfill) { 356 if (((char *)ondisk)[zfill]) 357 break; 358 } 359 ++zfill; 360 } else { 361 zfill = chunk; 362 } 363 364 if (zfill) 365 write(fd, ondisk, zfill); 366 if (zfill < chunk) 367 lseek(fd, chunk - zfill, SEEK_CUR); 368 369 len -= chunk; 370 data_offset += chunk; 371 file_offset += chunk; 372 ondisk = get_buffer_data(data_offset, &data_buffer, 0); 373 if (ondisk == NULL) 374 break; 375 chunk = HAMMER_BUFSIZE - 376 ((int)data_offset & HAMMER_BUFMASK); 377 if (chunk > len) 378 chunk = len; 379 } 380 if (dict->size >= 0 && file_offset > dict->size) { 381 ftruncate(fd, dict->size); 382 /* fchmod(fd, 0666); */ 383 } 384 385 if (fd == CachedFd) { 386 free(path1); 387 } else if (CachedPath) { 388 free(CachedPath); 389 close(CachedFd); 390 CachedPath = path1; 391 CachedFd = fd; 392 } else { 393 CachedPath = path1; 394 CachedFd = fd; 395 } 396 break; 397 case HAMMER_RECTYPE_DIRENTRY: 398 nlen = len - HAMMER_ENTRY_NAME_OFF; 399 if ((int)nlen < 0) /* illegal length */ 400 break; 401 if (ondisk->entry.obj_id == 0 || 402 ondisk->entry.obj_id == HAMMER_OBJID_ROOT) 403 break; 404 name = malloc(nlen + 1); 405 bcopy(ondisk->entry.name, name, nlen); 406 name[nlen] = 0; 407 sanitize_string(name); 408 409 /* 410 * We can't deal with hardlinks so if the object already 411 * has a name assigned to it we just keep using that name. 412 */ 413 dict2 = get_dict(ondisk->entry.obj_id, pfs_id); 414 path1 = recover_path(dict2); 415 416 if (dict2->name == NULL) 417 dict2->name = name; 418 else 419 free(name); 420 421 /* 422 * Attach dict2 to its directory (dict), create the 423 * directory (dict) if necessary. We must ensure 424 * that the directory entry exists in order to be 425 * able to properly rename() the file without creating 426 * a namespace conflict. 427 */ 428 if ((dict2->flags & DICTF_PARENT) == 0) { 429 dict2->flags |= DICTF_PARENT; 430 dict2->parent = dict; 431 if ((dict->flags & DICTF_MADEDIR) == 0) { 432 dict->flags |= DICTF_MADEDIR; 433 path2 = recover_path(dict); 434 printf("mkdir %s\n", path2); 435 mkdir(path2, 0777); 436 free(path2); 437 path2 = NULL; 438 } 439 } 440 path2 = recover_path(dict2); 441 if (strcmp(path1, path2) != 0 && lstat(path1, &st) == 0) { 442 printf("Rename %s -> %s\n", path1, path2); 443 rename(path1, path2); 444 } 445 free(path1); 446 free(path2); 447 448 printf("dir %016jx:%05d entry %016jx \"%s\"\n", 449 (uintmax_t)leaf->base.obj_id, 450 pfs_id, 451 (uintmax_t)ondisk->entry.obj_id, 452 name); 453 break; 454 default: 455 /* 456 * Ignore any other record types 457 */ 458 break; 459 } 460 done: 461 rel_buffer(data_buffer); 462 } 463 464 #define RD_HSIZE 32768 465 #define RD_HMASK (RD_HSIZE - 1) 466 467 struct recover_dict *RDHash[RD_HSIZE]; 468 469 static 470 struct recover_dict * 471 get_dict(int64_t obj_id, uint16_t pfs_id) 472 { 473 struct recover_dict *dict; 474 int i; 475 476 if (obj_id == 0) 477 return(NULL); 478 479 i = crc32(&obj_id, sizeof(obj_id)) & RD_HMASK; 480 for (dict = RDHash[i]; dict; dict = dict->next) { 481 if (dict->obj_id == obj_id && 482 dict->pfs_id == pfs_id) { 483 break; 484 } 485 } 486 if (dict == NULL) { 487 dict = malloc(sizeof(*dict)); 488 bzero(dict, sizeof(*dict)); 489 dict->obj_id = obj_id; 490 dict->pfs_id = pfs_id; 491 dict->next = RDHash[i]; 492 dict->size = -1; 493 RDHash[i] = dict; 494 495 /* 496 * Always connect dangling dictionary entries to object 1 497 * (the root of the PFS). 498 * 499 * DICTF_PARENT will not be set until we know what the 500 * real parent directory object is. 501 */ 502 if (dict->obj_id != HAMMER_OBJID_ROOT) 503 dict->parent = get_dict(1, pfs_id); 504 } 505 return(dict); 506 } 507 508 struct path_info { 509 enum { PI_FIGURE, PI_LOAD } state; 510 uint16_t pfs_id; 511 char *base; 512 char *next; 513 int len; 514 }; 515 516 static void recover_path_helper(struct recover_dict *, struct path_info *); 517 518 static 519 char * 520 recover_path(struct recover_dict *dict) 521 { 522 struct path_info info; 523 524 bzero(&info, sizeof(info)); 525 info.pfs_id = dict->pfs_id; 526 info.state = PI_FIGURE; 527 recover_path_helper(dict, &info); 528 info.base = malloc(info.len); 529 info.next = info.base; 530 info.state = PI_LOAD; 531 recover_path_helper(dict, &info); 532 533 return(info.base); 534 } 535 536 static 537 void 538 recover_path_helper(struct recover_dict *dict, struct path_info *info) 539 { 540 /* 541 * Calculate path element length 542 */ 543 dict->flags |= DICTF_TRAVERSED; 544 545 switch(info->state) { 546 case PI_FIGURE: 547 if (dict->obj_id == HAMMER_OBJID_ROOT) 548 info->len += 8; 549 else if (dict->name) 550 info->len += strlen(dict->name); 551 else 552 info->len += 6 + 16; 553 ++info->len; 554 555 if (dict->parent && 556 (dict->parent->flags & DICTF_TRAVERSED) == 0) { 557 recover_path_helper(dict->parent, info); 558 } else { 559 info->len += strlen(TargetDir) + 1; 560 } 561 break; 562 case PI_LOAD: 563 if (dict->parent && 564 (dict->parent->flags & DICTF_TRAVERSED) == 0) { 565 recover_path_helper(dict->parent, info); 566 } else { 567 strcpy(info->next, TargetDir); 568 info->next += strlen(info->next); 569 } 570 571 *info->next++ = '/'; 572 if (dict->obj_id == HAMMER_OBJID_ROOT) { 573 snprintf(info->next, 8+1, "PFS%05d", info->pfs_id); 574 } else if (dict->name) { 575 strcpy(info->next, dict->name); 576 } else { 577 snprintf(info->next, 6+16+1, "obj_0x%016jx", 578 (uintmax_t)dict->obj_id); 579 } 580 info->next += strlen(info->next); 581 break; 582 } 583 dict->flags &= ~DICTF_TRAVERSED; 584 } 585 586 static 587 void 588 sanitize_string(char *str) 589 { 590 while (*str) { 591 if (!isprint(*str)) 592 *str = 'x'; 593 ++str; 594 } 595 } 596