1 /* 2 * Copyright (c) 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer.h" 36 37 struct recover_dict { 38 struct recover_dict *next; 39 struct recover_dict *parent; 40 int64_t obj_id; 41 uint8_t obj_type; 42 uint8_t flags; 43 uint16_t pfs_id; 44 int64_t size; 45 char *name; 46 }; 47 48 #define DICTF_MADEDIR 0x01 49 #define DICTF_MADEFILE 0x02 50 #define DICTF_PARENT 0x04 /* parent attached for real */ 51 #define DICTF_TRAVERSED 0x80 52 53 static void recover_top(char *ptr, hammer_off_t offset); 54 static void recover_elm(hammer_btree_leaf_elm_t leaf); 55 static struct recover_dict *get_dict(int64_t obj_id, uint16_t pfs_id); 56 static char *recover_path(struct recover_dict *dict); 57 static void sanitize_string(char *str); 58 59 static const char *TargetDir; 60 static int CachedFd = -1; 61 static char *CachedPath; 62 63 /* 64 * XXX There is a hidden bug here while iterating zone-2 offset as 65 * shown in an example below. 66 * 67 * If a volume was once used as HAMMER filesystem which consists of 68 * multiple volumes whose usage has reached beyond the first volume, 69 * and then later re-formatted only using 1 volume, hammer recover is 70 * likely to hit assertion in get_buffer() due to having access to 71 * invalid volume (vol1,2,...) from old filesystem data. 72 * 73 * |-----vol0-----|-----vol1-----|-----vol2-----| old filesystem 74 * <-----------------------> used by old filesystem 75 * 76 * |-----vol0-----| new filesystem 77 * <-----> used by new filesystem 78 * <-------> unused, invalid data from old filesystem 79 * <-> B-Tree nodes likely to point to vol1 80 */ 81 82 void 83 hammer_cmd_recover(const char *target_dir) 84 { 85 struct buffer_info *data_buffer; 86 struct volume_info *volume; 87 hammer_off_t off; 88 hammer_off_t off_end; 89 char *ptr; 90 int i; 91 92 TargetDir = target_dir; 93 94 if (mkdir(TargetDir, 0777) == -1) { 95 if (errno != EEXIST) { 96 perror("mkdir"); 97 exit(1); 98 } 99 } 100 101 printf("Running raw scan of HAMMER image, recovering to %s\n", 102 TargetDir); 103 104 data_buffer = NULL; 105 for (i = 0; i < HAMMER_MAX_VOLUMES; i++) { 106 volume = get_volume(i); 107 if (volume == NULL) 108 continue; 109 printf("Scanning volume %d size %s\n", 110 volume->vol_no, sizetostr(volume->size)); 111 off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 112 off_end = off + HAMMER_VOL_BUF_SIZE(volume->ondisk); 113 while (off < off_end) { 114 ptr = get_buffer_data(off, &data_buffer, 0); 115 if (ptr) 116 recover_top(ptr, off); 117 off += HAMMER_BUFSIZE; 118 } 119 } 120 rel_buffer(data_buffer); 121 122 if (CachedPath) { 123 free(CachedPath); 124 close(CachedFd); 125 CachedPath = NULL; 126 CachedFd = -1; 127 } 128 } 129 130 /* 131 * Top level recovery processor. Assume the data is a B-Tree node. 132 * If the CRC is good we attempt to process the node, building the 133 * object space and creating the dictionary as we go. 134 */ 135 static void 136 recover_top(char *ptr, hammer_off_t offset) 137 { 138 hammer_node_ondisk_t node; 139 hammer_btree_elm_t elm; 140 int maxcount; 141 int i; 142 int isnode; 143 char buf[HAMMER_BTREE_LEAF_ELMS + 1]; 144 145 for (node = (void *)ptr; (char *)node < ptr + HAMMER_BUFSIZE; ++node) { 146 isnode = hammer_crc_test_btree(node); 147 maxcount = hammer_node_max_elements(node->type); 148 149 if (DebugOpt) { 150 for (i = 0; i < node->count && i < maxcount; ++i) 151 buf[i] = hammer_elm_btype(&node->elms[i]); 152 buf[i] = '\0'; 153 if (!isnode && DebugOpt > 1) 154 printf("%016jx -\n", offset); 155 if (isnode) 156 printf("%016jx %c %d %s\n", 157 offset, node->type, node->count, buf); 158 } 159 offset += sizeof(*node); 160 161 if (isnode && node->type == HAMMER_BTREE_TYPE_LEAF) { 162 for (i = 0; i < node->count && i < maxcount; ++i) { 163 elm = &node->elms[i]; 164 if (elm->base.btype != HAMMER_BTREE_TYPE_RECORD) 165 continue; 166 recover_elm(&elm->leaf); 167 } 168 } 169 } 170 } 171 172 static void 173 recover_elm(hammer_btree_leaf_elm_t leaf) 174 { 175 struct buffer_info *data_buffer = NULL; 176 struct recover_dict *dict; 177 struct recover_dict *dict2; 178 hammer_data_ondisk_t ondisk; 179 hammer_off_t data_offset; 180 struct stat st; 181 int chunk; 182 int len; 183 int zfill; 184 int64_t file_offset; 185 uint16_t pfs_id; 186 size_t nlen; 187 int fd; 188 char *name; 189 char *path1; 190 char *path2; 191 192 /* 193 * Ignore deleted records 194 */ 195 if (leaf->delete_ts) 196 return; 197 if ((data_offset = leaf->data_offset) != 0) 198 ondisk = get_buffer_data(data_offset, &data_buffer, 0); 199 else 200 ondisk = NULL; 201 if (ondisk == NULL) 202 goto done; 203 204 len = leaf->data_len; 205 chunk = HAMMER_BUFSIZE - ((int)data_offset & HAMMER_BUFMASK); 206 if (chunk > len) 207 chunk = len; 208 209 if (len < 0 || len > HAMMER_XBUFSIZE || len > chunk) 210 goto done; 211 212 pfs_id = lo_to_pfs(leaf->base.localization); 213 214 dict = get_dict(leaf->base.obj_id, pfs_id); 215 216 switch(leaf->base.rec_type) { 217 case HAMMER_RECTYPE_INODE: 218 /* 219 * We found an inode which also tells us where the file 220 * or directory is in the directory hierarchy. 221 */ 222 if (VerboseOpt) { 223 printf("file %016jx:%05d inode found\n", 224 (uintmax_t)leaf->base.obj_id, pfs_id); 225 } 226 path1 = recover_path(dict); 227 228 /* 229 * Attach the inode to its parent. This isn't strictly 230 * necessary because the information is also in the 231 * directory entries, but if we do not find the directory 232 * entry this ensures that the files will still be 233 * reasonably well organized in their proper directories. 234 */ 235 if ((dict->flags & DICTF_PARENT) == 0 && 236 dict->obj_id != HAMMER_OBJID_ROOT && 237 ondisk->inode.parent_obj_id != 0) { 238 dict->flags |= DICTF_PARENT; 239 dict->parent = get_dict(ondisk->inode.parent_obj_id, 240 pfs_id); 241 if (dict->parent && 242 (dict->parent->flags & DICTF_MADEDIR) == 0) { 243 dict->parent->flags |= DICTF_MADEDIR; 244 path2 = recover_path(dict->parent); 245 printf("mkdir %s\n", path2); 246 mkdir(path2, 0777); 247 free(path2); 248 path2 = NULL; 249 } 250 } 251 if (dict->obj_type == 0) 252 dict->obj_type = ondisk->inode.obj_type; 253 dict->size = ondisk->inode.size; 254 path2 = recover_path(dict); 255 256 if (lstat(path1, &st) == 0) { 257 if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) { 258 truncate(path1, dict->size); 259 /* chmod(path1, 0666); */ 260 } 261 if (strcmp(path1, path2)) { 262 printf("Rename %s -> %s\n", path1, path2); 263 rename(path1, path2); 264 } 265 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) { 266 printf("mkinode (file) %s\n", path2); 267 fd = open(path2, O_RDWR|O_CREAT, 0666); 268 if (fd > 0) 269 close(fd); 270 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_DIRECTORY) { 271 printf("mkinode (dir) %s\n", path2); 272 mkdir(path2, 0777); 273 dict->flags |= DICTF_MADEDIR; 274 } 275 free(path1); 276 free(path2); 277 break; 278 case HAMMER_RECTYPE_DATA: 279 /* 280 * File record data 281 */ 282 if (leaf->base.obj_id == 0) 283 break; 284 if (VerboseOpt) { 285 printf("file %016jx:%05d data %016jx,%d\n", 286 (uintmax_t)leaf->base.obj_id, 287 pfs_id, 288 (uintmax_t)leaf->base.key - len, 289 len); 290 } 291 292 /* 293 * Update the dictionary entry 294 */ 295 if (dict->obj_type == 0) 296 dict->obj_type = HAMMER_OBJTYPE_REGFILE; 297 298 /* 299 * If the parent directory has not been created we 300 * have to create it (typically a PFS%05d) 301 */ 302 if (dict->parent && 303 (dict->parent->flags & DICTF_MADEDIR) == 0) { 304 dict->parent->flags |= DICTF_MADEDIR; 305 path2 = recover_path(dict->parent); 306 printf("mkdir %s\n", path2); 307 mkdir(path2, 0777); 308 free(path2); 309 path2 = NULL; 310 } 311 312 /* 313 * Create the file if necessary, report file creations 314 */ 315 path1 = recover_path(dict); 316 if (CachedPath && strcmp(CachedPath, path1) == 0) { 317 fd = CachedFd; 318 } else { 319 fd = open(path1, O_CREAT|O_RDWR, 0666); 320 } 321 if (fd < 0) { 322 printf("Unable to create %s: %s\n", 323 path1, strerror(errno)); 324 free(path1); 325 break; 326 } 327 if ((dict->flags & DICTF_MADEFILE) == 0) { 328 dict->flags |= DICTF_MADEFILE; 329 printf("mkfile %s\n", path1); 330 } 331 332 /* 333 * And write the record. A HAMMER data block is aligned 334 * and may contain trailing zeros after the file EOF. The 335 * inode record is required to get the actual file size. 336 * 337 * However, when the inode record is not available 338 * we can do a sparse write and that will get it right 339 * most of the time even if the inode record is never 340 * found. 341 */ 342 file_offset = (int64_t)leaf->base.key - len; 343 lseek(fd, (off_t)file_offset, SEEK_SET); 344 while (len) { 345 if (dict->size == -1) { 346 for (zfill = chunk - 1; zfill >= 0; --zfill) { 347 if (((char *)ondisk)[zfill]) 348 break; 349 } 350 ++zfill; 351 } else { 352 zfill = chunk; 353 } 354 355 if (zfill) 356 write(fd, ondisk, zfill); 357 if (zfill < chunk) 358 lseek(fd, chunk - zfill, SEEK_CUR); 359 360 len -= chunk; 361 data_offset += chunk; 362 file_offset += chunk; 363 ondisk = get_buffer_data(data_offset, &data_buffer, 0); 364 if (ondisk == NULL) 365 break; 366 chunk = HAMMER_BUFSIZE - 367 ((int)data_offset & HAMMER_BUFMASK); 368 if (chunk > len) 369 chunk = len; 370 } 371 if (dict->size >= 0 && file_offset > dict->size) { 372 ftruncate(fd, dict->size); 373 /* fchmod(fd, 0666); */ 374 } 375 376 if (fd == CachedFd) { 377 free(path1); 378 } else if (CachedPath) { 379 free(CachedPath); 380 close(CachedFd); 381 CachedPath = path1; 382 CachedFd = fd; 383 } else { 384 CachedPath = path1; 385 CachedFd = fd; 386 } 387 break; 388 case HAMMER_RECTYPE_DIRENTRY: 389 nlen = len - HAMMER_ENTRY_NAME_OFF; 390 if ((int)nlen < 0) /* illegal length */ 391 break; 392 if (ondisk->entry.obj_id == 0 || 393 ondisk->entry.obj_id == HAMMER_OBJID_ROOT) 394 break; 395 name = malloc(nlen + 1); 396 bcopy(ondisk->entry.name, name, nlen); 397 name[nlen] = 0; 398 sanitize_string(name); 399 400 /* 401 * We can't deal with hardlinks so if the object already 402 * has a name assigned to it we just keep using that name. 403 */ 404 dict2 = get_dict(ondisk->entry.obj_id, pfs_id); 405 path1 = recover_path(dict2); 406 407 if (dict2->name == NULL) 408 dict2->name = name; 409 else 410 free(name); 411 412 /* 413 * Attach dict2 to its directory (dict), create the 414 * directory (dict) if necessary. We must ensure 415 * that the directory entry exists in order to be 416 * able to properly rename() the file without creating 417 * a namespace conflict. 418 */ 419 if ((dict2->flags & DICTF_PARENT) == 0) { 420 dict2->flags |= DICTF_PARENT; 421 dict2->parent = dict; 422 if ((dict->flags & DICTF_MADEDIR) == 0) { 423 dict->flags |= DICTF_MADEDIR; 424 path2 = recover_path(dict); 425 printf("mkdir %s\n", path2); 426 mkdir(path2, 0777); 427 free(path2); 428 path2 = NULL; 429 } 430 } 431 path2 = recover_path(dict2); 432 if (strcmp(path1, path2) != 0 && lstat(path1, &st) == 0) { 433 printf("Rename %s -> %s\n", path1, path2); 434 rename(path1, path2); 435 } 436 free(path1); 437 free(path2); 438 439 printf("dir %016jx:%05d entry %016jx \"%s\"\n", 440 (uintmax_t)leaf->base.obj_id, 441 pfs_id, 442 (uintmax_t)ondisk->entry.obj_id, 443 name); 444 break; 445 default: 446 /* 447 * Ignore any other record types 448 */ 449 break; 450 } 451 done: 452 rel_buffer(data_buffer); 453 } 454 455 #define RD_HSIZE 32768 456 #define RD_HMASK (RD_HSIZE - 1) 457 458 struct recover_dict *RDHash[RD_HSIZE]; 459 460 static 461 struct recover_dict * 462 get_dict(int64_t obj_id, uint16_t pfs_id) 463 { 464 struct recover_dict *dict; 465 int i; 466 467 if (obj_id == 0) 468 return(NULL); 469 470 i = crc32(&obj_id, sizeof(obj_id)) & RD_HMASK; 471 for (dict = RDHash[i]; dict; dict = dict->next) { 472 if (dict->obj_id == obj_id && 473 dict->pfs_id == pfs_id) { 474 break; 475 } 476 } 477 if (dict == NULL) { 478 dict = malloc(sizeof(*dict)); 479 bzero(dict, sizeof(*dict)); 480 dict->obj_id = obj_id; 481 dict->pfs_id = pfs_id; 482 dict->next = RDHash[i]; 483 dict->size = -1; 484 RDHash[i] = dict; 485 486 /* 487 * Always connect dangling dictionary entries to object 1 488 * (the root of the PFS). 489 * 490 * DICTF_PARENT will not be set until we know what the 491 * real parent directory object is. 492 */ 493 if (dict->obj_id != HAMMER_OBJID_ROOT) 494 dict->parent = get_dict(1, pfs_id); 495 } 496 return(dict); 497 } 498 499 struct path_info { 500 enum { PI_FIGURE, PI_LOAD } state; 501 uint16_t pfs_id; 502 char *base; 503 char *next; 504 int len; 505 }; 506 507 static void recover_path_helper(struct recover_dict *, struct path_info *); 508 509 static 510 char * 511 recover_path(struct recover_dict *dict) 512 { 513 struct path_info info; 514 515 bzero(&info, sizeof(info)); 516 info.pfs_id = dict->pfs_id; 517 info.state = PI_FIGURE; 518 recover_path_helper(dict, &info); 519 info.base = malloc(info.len); 520 info.next = info.base; 521 info.state = PI_LOAD; 522 recover_path_helper(dict, &info); 523 524 return(info.base); 525 } 526 527 static 528 void 529 recover_path_helper(struct recover_dict *dict, struct path_info *info) 530 { 531 /* 532 * Calculate path element length 533 */ 534 dict->flags |= DICTF_TRAVERSED; 535 536 switch(info->state) { 537 case PI_FIGURE: 538 if (dict->obj_id == HAMMER_OBJID_ROOT) 539 info->len += 8; 540 else if (dict->name) 541 info->len += strlen(dict->name); 542 else 543 info->len += 6 + 16; 544 ++info->len; 545 546 if (dict->parent && 547 (dict->parent->flags & DICTF_TRAVERSED) == 0) { 548 recover_path_helper(dict->parent, info); 549 } else { 550 info->len += strlen(TargetDir) + 1; 551 } 552 break; 553 case PI_LOAD: 554 if (dict->parent && 555 (dict->parent->flags & DICTF_TRAVERSED) == 0) { 556 recover_path_helper(dict->parent, info); 557 } else { 558 strcpy(info->next, TargetDir); 559 info->next += strlen(info->next); 560 } 561 562 *info->next++ = '/'; 563 if (dict->obj_id == HAMMER_OBJID_ROOT) { 564 snprintf(info->next, 8+1, "PFS%05d", info->pfs_id); 565 } else if (dict->name) { 566 strcpy(info->next, dict->name); 567 } else { 568 snprintf(info->next, 6+16+1, "obj_0x%016jx", 569 (uintmax_t)dict->obj_id); 570 } 571 info->next += strlen(info->next); 572 break; 573 } 574 dict->flags &= ~DICTF_TRAVERSED; 575 } 576 577 static 578 void 579 sanitize_string(char *str) 580 { 581 while (*str) { 582 if (!isprint(*str)) 583 *str = 'x'; 584 ++str; 585 } 586 } 587