1 /* 2 * Copyright (c) 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer.h" 36 37 struct recover_dict { 38 struct recover_dict *next; 39 struct recover_dict *parent; 40 int64_t obj_id; 41 uint8_t obj_type; 42 uint8_t flags; 43 uint16_t pfs_id; 44 int64_t size; 45 char *name; 46 }; 47 48 #define DICTF_MADEDIR 0x01 49 #define DICTF_MADEFILE 0x02 50 #define DICTF_PARENT 0x04 /* parent attached for real */ 51 #define DICTF_TRAVERSED 0x80 52 53 static void recover_top(char *ptr, hammer_off_t offset); 54 static void recover_elm(hammer_btree_leaf_elm_t leaf); 55 static struct recover_dict *get_dict(int64_t obj_id, uint16_t pfs_id); 56 static char *recover_path(struct recover_dict *dict); 57 static void sanitize_string(char *str); 58 59 static const char *TargetDir; 60 static int CachedFd = -1; 61 static char *CachedPath; 62 63 void 64 hammer_cmd_recover(const char *target_dir) 65 { 66 struct buffer_info *data_buffer; 67 struct volume_info *volume; 68 hammer_off_t off; 69 hammer_off_t off_end; 70 char *ptr; 71 72 TargetDir = target_dir; 73 74 if (mkdir(TargetDir, 0777) == -1) { 75 if (errno != EEXIST) { 76 perror("mkdir"); 77 exit(1); 78 } 79 } 80 81 printf("Running raw scan of HAMMER image, recovering to %s\n", 82 TargetDir); 83 84 data_buffer = NULL; 85 TAILQ_FOREACH(volume, &VolList, entry) { 86 check_volume(volume); 87 printf("Scanning volume %d size %s\n", 88 volume->vol_no, sizetostr(volume->size)); 89 off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0); 90 off_end = off + HAMMER_VOL_BUF_SIZE(volume->ondisk); 91 while (off < off_end) { 92 ptr = get_buffer_data(off, &data_buffer, 0); 93 if (ptr) 94 recover_top(ptr, off); 95 off += HAMMER_BUFSIZE; 96 } 97 } 98 rel_buffer(data_buffer); 99 100 if (CachedPath) { 101 free(CachedPath); 102 close(CachedFd); 103 CachedPath = NULL; 104 CachedFd = -1; 105 } 106 } 107 108 /* 109 * Top level recovery processor. Assume the data is a B-Tree node. 110 * If the CRC is good we attempt to process the node, building the 111 * object space and creating the dictionary as we go. 112 */ 113 static void 114 recover_top(char *ptr, hammer_off_t offset) 115 { 116 struct hammer_node_ondisk *node; 117 hammer_btree_elm_t elm; 118 int maxcount; 119 int i; 120 int isnode; 121 char buf[HAMMER_BTREE_LEAF_ELMS + 1]; 122 123 for (node = (void *)ptr; (char *)node < ptr + HAMMER_BUFSIZE; ++node) { 124 isnode = (crc32(&node->crc + 1, HAMMER_BTREE_CRCSIZE) == node->crc); 125 maxcount = hammer_node_max_elements(node->type); 126 127 if (DebugOpt) { 128 for (i = 0; i < node->count && i < maxcount; ++i) 129 buf[i] = hammer_elm_btype(&node->elms[i]); 130 buf[i] = '\0'; 131 if (!isnode && DebugOpt > 1) 132 printf("%016jx -\n", offset); 133 if (isnode) 134 printf("%016jx %c %d %s\n", 135 offset, node->type, node->count, buf); 136 } 137 offset += sizeof(*node); 138 139 if (isnode && node->type == HAMMER_BTREE_TYPE_LEAF) { 140 for (i = 0; i < node->count && i < maxcount; ++i) { 141 elm = &node->elms[i]; 142 if (elm->base.btype != HAMMER_BTREE_TYPE_RECORD) 143 continue; 144 recover_elm(&elm->leaf); 145 } 146 } 147 } 148 } 149 150 static void 151 recover_elm(hammer_btree_leaf_elm_t leaf) 152 { 153 struct buffer_info *data_buffer = NULL; 154 struct recover_dict *dict; 155 struct recover_dict *dict2; 156 hammer_data_ondisk_t ondisk; 157 hammer_off_t data_offset; 158 struct stat st; 159 int chunk; 160 int len; 161 int zfill; 162 int64_t file_offset; 163 uint16_t pfs_id; 164 size_t nlen; 165 int fd; 166 char *name; 167 char *path1; 168 char *path2; 169 170 /* 171 * Ignore deleted records 172 */ 173 if (leaf->delete_ts) 174 return; 175 if ((data_offset = leaf->data_offset) != 0) 176 ondisk = get_buffer_data(data_offset, &data_buffer, 0); 177 else 178 ondisk = NULL; 179 if (ondisk == NULL) 180 goto done; 181 182 len = leaf->data_len; 183 chunk = HAMMER_BUFSIZE - ((int)data_offset & HAMMER_BUFMASK); 184 if (chunk > len) 185 chunk = len; 186 187 if (len < 0 || len > HAMMER_XBUFSIZE || len > chunk) 188 goto done; 189 190 pfs_id = lo_to_pfs(leaf->base.localization); 191 192 dict = get_dict(leaf->base.obj_id, pfs_id); 193 194 switch(leaf->base.rec_type) { 195 case HAMMER_RECTYPE_INODE: 196 /* 197 * We found an inode which also tells us where the file 198 * or directory is in the directory hierarchy. 199 */ 200 if (VerboseOpt) { 201 printf("file %016jx:%05d inode found\n", 202 (uintmax_t)leaf->base.obj_id, pfs_id); 203 } 204 path1 = recover_path(dict); 205 206 /* 207 * Attach the inode to its parent. This isn't strictly 208 * necessary because the information is also in the 209 * directory entries, but if we do not find the directory 210 * entry this ensures that the files will still be 211 * reasonably well organized in their proper directories. 212 */ 213 if ((dict->flags & DICTF_PARENT) == 0 && 214 dict->obj_id != HAMMER_OBJID_ROOT && 215 ondisk->inode.parent_obj_id != 0) { 216 dict->flags |= DICTF_PARENT; 217 dict->parent = get_dict(ondisk->inode.parent_obj_id, 218 pfs_id); 219 if (dict->parent && 220 (dict->parent->flags & DICTF_MADEDIR) == 0) { 221 dict->parent->flags |= DICTF_MADEDIR; 222 path2 = recover_path(dict->parent); 223 printf("mkdir %s\n", path2); 224 mkdir(path2, 0777); 225 free(path2); 226 path2 = NULL; 227 } 228 } 229 if (dict->obj_type == 0) 230 dict->obj_type = ondisk->inode.obj_type; 231 dict->size = ondisk->inode.size; 232 path2 = recover_path(dict); 233 234 if (lstat(path1, &st) == 0) { 235 if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) { 236 truncate(path1, dict->size); 237 /* chmod(path1, 0666); */ 238 } 239 if (strcmp(path1, path2)) { 240 printf("Rename %s -> %s\n", path1, path2); 241 rename(path1, path2); 242 } 243 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) { 244 printf("mkinode (file) %s\n", path2); 245 fd = open(path2, O_RDWR|O_CREAT, 0666); 246 if (fd > 0) 247 close(fd); 248 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_DIRECTORY) { 249 printf("mkinode (dir) %s\n", path2); 250 mkdir(path2, 0777); 251 dict->flags |= DICTF_MADEDIR; 252 } 253 free(path1); 254 free(path2); 255 break; 256 case HAMMER_RECTYPE_DATA: 257 /* 258 * File record data 259 */ 260 if (leaf->base.obj_id == 0) 261 break; 262 if (VerboseOpt) { 263 printf("file %016jx:%05d data %016jx,%d\n", 264 (uintmax_t)leaf->base.obj_id, 265 pfs_id, 266 (uintmax_t)leaf->base.key - len, 267 len); 268 } 269 270 /* 271 * Update the dictionary entry 272 */ 273 if (dict->obj_type == 0) 274 dict->obj_type = HAMMER_OBJTYPE_REGFILE; 275 276 /* 277 * If the parent directory has not been created we 278 * have to create it (typically a PFS%05d) 279 */ 280 if (dict->parent && 281 (dict->parent->flags & DICTF_MADEDIR) == 0) { 282 dict->parent->flags |= DICTF_MADEDIR; 283 path2 = recover_path(dict->parent); 284 printf("mkdir %s\n", path2); 285 mkdir(path2, 0777); 286 free(path2); 287 path2 = NULL; 288 } 289 290 /* 291 * Create the file if necessary, report file creations 292 */ 293 path1 = recover_path(dict); 294 if (CachedPath && strcmp(CachedPath, path1) == 0) { 295 fd = CachedFd; 296 } else { 297 fd = open(path1, O_CREAT|O_RDWR, 0666); 298 } 299 if (fd < 0) { 300 printf("Unable to create %s: %s\n", 301 path1, strerror(errno)); 302 free(path1); 303 break; 304 } 305 if ((dict->flags & DICTF_MADEFILE) == 0) { 306 dict->flags |= DICTF_MADEFILE; 307 printf("mkfile %s\n", path1); 308 } 309 310 /* 311 * And write the record. A HAMMER data block is aligned 312 * and may contain trailing zeros after the file EOF. The 313 * inode record is required to get the actual file size. 314 * 315 * However, when the inode record is not available 316 * we can do a sparse write and that will get it right 317 * most of the time even if the inode record is never 318 * found. 319 */ 320 file_offset = (int64_t)leaf->base.key - len; 321 lseek(fd, (off_t)file_offset, SEEK_SET); 322 while (len) { 323 if (dict->size == -1) { 324 for (zfill = chunk - 1; zfill >= 0; --zfill) { 325 if (((char *)ondisk)[zfill]) 326 break; 327 } 328 ++zfill; 329 } else { 330 zfill = chunk; 331 } 332 333 if (zfill) 334 write(fd, ondisk, zfill); 335 if (zfill < chunk) 336 lseek(fd, chunk - zfill, SEEK_CUR); 337 338 len -= chunk; 339 data_offset += chunk; 340 file_offset += chunk; 341 ondisk = get_buffer_data(data_offset, &data_buffer, 0); 342 if (ondisk == NULL) 343 break; 344 chunk = HAMMER_BUFSIZE - 345 ((int)data_offset & HAMMER_BUFMASK); 346 if (chunk > len) 347 chunk = len; 348 } 349 if (dict->size >= 0 && file_offset > dict->size) { 350 ftruncate(fd, dict->size); 351 /* fchmod(fd, 0666); */ 352 } 353 354 if (fd == CachedFd) { 355 free(path1); 356 } else if (CachedPath) { 357 free(CachedPath); 358 close(CachedFd); 359 CachedPath = path1; 360 CachedFd = fd; 361 } else { 362 CachedPath = path1; 363 CachedFd = fd; 364 } 365 break; 366 case HAMMER_RECTYPE_DIRENTRY: 367 nlen = len - offsetof(struct hammer_direntry_data, name[0]); 368 if ((int)nlen < 0) /* illegal length */ 369 break; 370 if (ondisk->entry.obj_id == 0 || 371 ondisk->entry.obj_id == HAMMER_OBJID_ROOT) 372 break; 373 name = malloc(nlen + 1); 374 bcopy(ondisk->entry.name, name, nlen); 375 name[nlen] = 0; 376 sanitize_string(name); 377 378 /* 379 * We can't deal with hardlinks so if the object already 380 * has a name assigned to it we just keep using that name. 381 */ 382 dict2 = get_dict(ondisk->entry.obj_id, pfs_id); 383 path1 = recover_path(dict2); 384 385 if (dict2->name == NULL) 386 dict2->name = name; 387 else 388 free(name); 389 390 /* 391 * Attach dict2 to its directory (dict), create the 392 * directory (dict) if necessary. We must ensure 393 * that the directory entry exists in order to be 394 * able to properly rename() the file without creating 395 * a namespace conflict. 396 */ 397 if ((dict2->flags & DICTF_PARENT) == 0) { 398 dict2->flags |= DICTF_PARENT; 399 dict2->parent = dict; 400 if ((dict->flags & DICTF_MADEDIR) == 0) { 401 dict->flags |= DICTF_MADEDIR; 402 path2 = recover_path(dict); 403 printf("mkdir %s\n", path2); 404 mkdir(path2, 0777); 405 free(path2); 406 path2 = NULL; 407 } 408 } 409 path2 = recover_path(dict2); 410 if (strcmp(path1, path2) != 0 && lstat(path1, &st) == 0) { 411 printf("Rename %s -> %s\n", path1, path2); 412 rename(path1, path2); 413 } 414 free(path1); 415 free(path2); 416 417 printf("dir %016jx:%05d entry %016jx \"%s\"\n", 418 (uintmax_t)leaf->base.obj_id, 419 pfs_id, 420 (uintmax_t)ondisk->entry.obj_id, 421 name); 422 break; 423 default: 424 /* 425 * Ignore any other record types 426 */ 427 break; 428 } 429 done: 430 rel_buffer(data_buffer); 431 } 432 433 #define RD_HSIZE 32768 434 #define RD_HMASK (RD_HSIZE - 1) 435 436 struct recover_dict *RDHash[RD_HSIZE]; 437 438 static 439 struct recover_dict * 440 get_dict(int64_t obj_id, uint16_t pfs_id) 441 { 442 struct recover_dict *dict; 443 int i; 444 445 if (obj_id == 0) 446 return(NULL); 447 448 i = crc32(&obj_id, sizeof(obj_id)) & RD_HMASK; 449 for (dict = RDHash[i]; dict; dict = dict->next) { 450 if (dict->obj_id == obj_id && 451 dict->pfs_id == pfs_id) { 452 break; 453 } 454 } 455 if (dict == NULL) { 456 dict = malloc(sizeof(*dict)); 457 bzero(dict, sizeof(*dict)); 458 dict->obj_id = obj_id; 459 dict->pfs_id = pfs_id; 460 dict->next = RDHash[i]; 461 dict->size = -1; 462 RDHash[i] = dict; 463 464 /* 465 * Always connect dangling dictionary entries to object 1 466 * (the root of the PFS). 467 * 468 * DICTF_PARENT will not be set until we know what the 469 * real parent directory object is. 470 */ 471 if (dict->obj_id != HAMMER_OBJID_ROOT) 472 dict->parent = get_dict(1, pfs_id); 473 } 474 return(dict); 475 } 476 477 struct path_info { 478 enum { PI_FIGURE, PI_LOAD } state; 479 uint16_t pfs_id; 480 char *base; 481 char *next; 482 int len; 483 }; 484 485 static void recover_path_helper(struct recover_dict *, struct path_info *); 486 487 static 488 char * 489 recover_path(struct recover_dict *dict) 490 { 491 struct path_info info; 492 493 bzero(&info, sizeof(info)); 494 info.pfs_id = dict->pfs_id; 495 info.state = PI_FIGURE; 496 recover_path_helper(dict, &info); 497 info.base = malloc(info.len); 498 info.next = info.base; 499 info.state = PI_LOAD; 500 recover_path_helper(dict, &info); 501 502 return(info.base); 503 } 504 505 static 506 void 507 recover_path_helper(struct recover_dict *dict, struct path_info *info) 508 { 509 /* 510 * Calculate path element length 511 */ 512 dict->flags |= DICTF_TRAVERSED; 513 514 switch(info->state) { 515 case PI_FIGURE: 516 if (dict->obj_id == HAMMER_OBJID_ROOT) 517 info->len += 8; 518 else if (dict->name) 519 info->len += strlen(dict->name); 520 else 521 info->len += 6 + 16; 522 ++info->len; 523 524 if (dict->parent && 525 (dict->parent->flags & DICTF_TRAVERSED) == 0) { 526 recover_path_helper(dict->parent, info); 527 } else { 528 info->len += strlen(TargetDir) + 1; 529 } 530 break; 531 case PI_LOAD: 532 if (dict->parent && 533 (dict->parent->flags & DICTF_TRAVERSED) == 0) { 534 recover_path_helper(dict->parent, info); 535 } else { 536 strcpy(info->next, TargetDir); 537 info->next += strlen(info->next); 538 } 539 540 *info->next++ = '/'; 541 if (dict->obj_id == HAMMER_OBJID_ROOT) { 542 snprintf(info->next, 8+1, "PFS%05d", info->pfs_id); 543 } else if (dict->name) { 544 strcpy(info->next, dict->name); 545 } else { 546 snprintf(info->next, 6+16+1, "obj_0x%016jx", 547 (uintmax_t)dict->obj_id); 548 } 549 info->next += strlen(info->next); 550 break; 551 } 552 dict->flags &= ~DICTF_TRAVERSED; 553 } 554 555 static 556 void 557 sanitize_string(char *str) 558 { 559 while (*str) { 560 if (!isprint(*str)) 561 *str = 'x'; 562 ++str; 563 } 564 } 565