1 /* 2 * Copyright (c) 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer.h" 36 37 struct recover_dict { 38 struct recover_dict *next; 39 struct recover_dict *parent; 40 int64_t obj_id; 41 uint8_t obj_type; 42 uint8_t flags; 43 uint16_t llid; 44 int64_t size; 45 char *name; 46 }; 47 48 #define DICTF_MADEDIR 0x01 49 #define DICTF_MADEFILE 0x02 50 #define DICTF_PARENT 0x04 /* parent attached for real */ 51 #define DICTF_TRAVERSED 0x80 52 53 static void recover_top(char *ptr); 54 static void recover_elm(hammer_btree_leaf_elm_t leaf); 55 static struct recover_dict *get_dict(int64_t obj_id, uint16_t llid); 56 static char *recover_path(struct recover_dict *dict); 57 static void sanitize_string(char *str); 58 59 static const char *TargetDir; 60 static int CachedFd = -1; 61 static char *CachedPath; 62 63 void 64 hammer_cmd_recover(const char *target_dir) 65 { 66 struct buffer_info *data_buffer; 67 struct volume_info *scan; 68 struct volume_info *volume; 69 hammer_off_t off; 70 hammer_off_t off_end; 71 char *ptr; 72 73 AssertOnFailure = 0; 74 TargetDir = target_dir; 75 76 printf("Running raw scan of HAMMER image, recovering to %s\n", 77 TargetDir); 78 mkdir(TargetDir, 0777); 79 80 data_buffer = NULL; 81 TAILQ_FOREACH(scan, &VolList, entry) { 82 volume = get_volume(scan->vol_no); 83 84 off = HAMMER_ZONE_RAW_BUFFER + 0; 85 off |= HAMMER_VOL_ENCODE(volume->vol_no); 86 off_end = off + (volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg); 87 while (off < off_end) { 88 ptr = get_buffer_data(off, &data_buffer, 0); 89 if (ptr) { 90 recover_top(ptr); 91 off += HAMMER_BUFSIZE; 92 } 93 } 94 } 95 rel_buffer(data_buffer); 96 97 if (CachedPath) { 98 free(CachedPath); 99 close(CachedFd); 100 CachedPath = NULL; 101 CachedFd = -1; 102 } 103 104 AssertOnFailure = 1; 105 } 106 107 /* 108 * Top level recovery processor. Assume the data is a B-Tree node. 109 * If the CRC is good we attempt to process the node, building the 110 * object space and creating the dictionary as we go. 111 */ 112 static void 113 recover_top(char *ptr) 114 { 115 struct hammer_node_ondisk *node; 116 hammer_btree_elm_t elm; 117 int maxcount; 118 int i; 119 120 for (node = (void *)ptr; (char *)node < ptr + HAMMER_BUFSIZE; ++node) { 121 if (crc32(&node->crc + 1, HAMMER_BTREE_CRCSIZE) == 122 node->crc && 123 node->type == HAMMER_BTREE_TYPE_LEAF) { 124 /* 125 * Scan elements 126 */ 127 maxcount = HAMMER_BTREE_LEAF_ELMS; 128 for (i = 0; i < node->count && i < maxcount; ++i) { 129 elm = &node->elms[i]; 130 if (elm->base.btype != 'R') 131 continue; 132 recover_elm(&elm->leaf); 133 } 134 } 135 } 136 } 137 138 static void 139 recover_elm(hammer_btree_leaf_elm_t leaf) 140 { 141 struct buffer_info *data_buffer = NULL; 142 struct recover_dict *dict; 143 struct recover_dict *dict2; 144 hammer_data_ondisk_t ondisk; 145 hammer_off_t data_offset; 146 struct stat st; 147 int chunk; 148 int len; 149 int zfill; 150 int64_t file_offset; 151 uint16_t llid; 152 size_t nlen; 153 int fd; 154 char *name; 155 char *path1; 156 char *path2; 157 158 /* 159 * Ignore deleted records 160 */ 161 if (leaf->delete_ts) 162 return; 163 if ((data_offset = leaf->data_offset) != 0) 164 ondisk = get_buffer_data(data_offset, &data_buffer, 0); 165 else 166 ondisk = NULL; 167 if (ondisk == NULL) 168 goto done; 169 170 len = leaf->data_len; 171 chunk = HAMMER_BUFSIZE - ((int)data_offset & HAMMER_BUFMASK); 172 if (chunk > len) 173 chunk = len; 174 175 if (len < 0 || len > HAMMER_XBUFSIZE || len > chunk) 176 goto done; 177 178 llid = leaf->base.localization >> 16; 179 180 dict = get_dict(leaf->base.obj_id, llid); 181 182 switch(leaf->base.rec_type) { 183 case HAMMER_RECTYPE_INODE: 184 /* 185 * We found an inode which also tells us where the file 186 * or directory is in the directory hierarchy. 187 */ 188 if (VerboseOpt) { 189 printf("file %016jx:%05d inode found\n", 190 (uintmax_t)leaf->base.obj_id, llid); 191 } 192 path1 = recover_path(dict); 193 194 /* 195 * Attach the inode to its parent. This isn't strictly 196 * necessary because the information is also in the 197 * directory entries, but if we do not find the directory 198 * entry this ensures that the files will still be 199 * reasonably well organized in their proper directories. 200 */ 201 if ((dict->flags & DICTF_PARENT) == 0 && 202 dict->obj_id != 1 && ondisk->inode.parent_obj_id != 0) { 203 dict->flags |= DICTF_PARENT; 204 dict->parent = get_dict(ondisk->inode.parent_obj_id, 205 llid); 206 if (dict->parent && 207 (dict->parent->flags & DICTF_MADEDIR) == 0) { 208 dict->parent->flags |= DICTF_MADEDIR; 209 path2 = recover_path(dict->parent); 210 printf("mkdir %s\n", path2); 211 mkdir(path2, 0777); 212 free(path2); 213 path2 = NULL; 214 } 215 } 216 if (dict->obj_type == 0) 217 dict->obj_type = ondisk->inode.obj_type; 218 dict->size = ondisk->inode.size; 219 path2 = recover_path(dict); 220 221 if (lstat(path1, &st) == 0) { 222 if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) { 223 truncate(path1, dict->size); 224 /* chmod(path1, 0666); */ 225 } 226 if (strcmp(path1, path2)) { 227 printf("Rename %s -> %s\n", path1, path2); 228 rename(path1, path2); 229 } 230 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) { 231 printf("mkinode (file) %s\n", path2); 232 fd = open(path2, O_RDWR|O_CREAT, 0666); 233 if (fd > 0) 234 close(fd); 235 } else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_DIRECTORY) { 236 printf("mkinode (dir) %s\n", path2); 237 mkdir(path2, 0777); 238 dict->flags |= DICTF_MADEDIR; 239 } 240 free(path1); 241 free(path2); 242 break; 243 case HAMMER_RECTYPE_DATA: 244 /* 245 * File record data 246 */ 247 if (leaf->base.obj_id == 0) 248 break; 249 if (VerboseOpt) { 250 printf("file %016jx:%05d data %016jx,%d\n", 251 (uintmax_t)leaf->base.obj_id, 252 llid, 253 (uintmax_t)leaf->base.key - len, 254 len); 255 } 256 257 /* 258 * Update the dictionary entry 259 */ 260 if (dict->obj_type == 0) 261 dict->obj_type = HAMMER_OBJTYPE_REGFILE; 262 263 /* 264 * If the parent directory has not been created we 265 * have to create it (typically a PFS%05d) 266 */ 267 if (dict->parent && 268 (dict->parent->flags & DICTF_MADEDIR) == 0) { 269 dict->parent->flags |= DICTF_MADEDIR; 270 path2 = recover_path(dict->parent); 271 printf("mkdir %s\n", path2); 272 mkdir(path2, 0777); 273 free(path2); 274 path2 = NULL; 275 } 276 277 /* 278 * Create the file if necessary, report file creations 279 */ 280 path1 = recover_path(dict); 281 if (CachedPath && strcmp(CachedPath, path1) == 0) { 282 fd = CachedFd; 283 } else { 284 fd = open(path1, O_CREAT|O_RDWR, 0666); 285 } 286 if (fd < 0) { 287 printf("Unable to create %s: %s\n", 288 path1, strerror(errno)); 289 free(path1); 290 break; 291 } 292 if ((dict->flags & DICTF_MADEFILE) == 0) { 293 dict->flags |= DICTF_MADEFILE; 294 printf("mkfile %s\n", path1); 295 } 296 297 /* 298 * And write the record. A HAMMER data block is aligned 299 * and may contain trailing zeros after the file EOF. The 300 * inode record is required to get the actual file size. 301 * 302 * However, when the inode record is not available 303 * we can do a sparse write and that will get it right 304 * most of the time even if the inode record is never 305 * found. 306 */ 307 file_offset = (int64_t)leaf->base.key - len; 308 lseek(fd, (off_t)file_offset, SEEK_SET); 309 while (len) { 310 if (dict->size == -1) { 311 for (zfill = chunk - 1; zfill >= 0; --zfill) { 312 if (((char *)ondisk)[zfill]) 313 break; 314 } 315 ++zfill; 316 } else { 317 zfill = chunk; 318 } 319 320 if (zfill) 321 write(fd, ondisk, zfill); 322 if (zfill < chunk) 323 lseek(fd, chunk - zfill, SEEK_CUR); 324 325 len -= chunk; 326 data_offset += chunk; 327 file_offset += chunk; 328 ondisk = get_buffer_data(data_offset, &data_buffer, 0); 329 if (ondisk == NULL) 330 break; 331 chunk = HAMMER_BUFSIZE - 332 ((int)data_offset & HAMMER_BUFMASK); 333 if (chunk > len) 334 chunk = len; 335 } 336 if (dict->size >= 0 && file_offset > dict->size) { 337 ftruncate(fd, dict->size); 338 /* fchmod(fd, 0666); */ 339 } 340 341 if (fd == CachedFd) { 342 free(path1); 343 } else if (CachedPath) { 344 free(CachedPath); 345 close(CachedFd); 346 CachedPath = path1; 347 CachedFd = fd; 348 } else { 349 CachedPath = path1; 350 CachedFd = fd; 351 } 352 break; 353 case HAMMER_RECTYPE_DIRENTRY: 354 nlen = len - offsetof(struct hammer_entry_data, name[0]); 355 if ((int)nlen < 0) /* illegal length */ 356 break; 357 if (ondisk->entry.obj_id == 0 || ondisk->entry.obj_id == 1) 358 break; 359 name = malloc(nlen + 1); 360 bcopy(ondisk->entry.name, name, nlen); 361 name[nlen] = 0; 362 sanitize_string(name); 363 364 /* 365 * We can't deal with hardlinks so if the object already 366 * has a name assigned to it we just keep using that name. 367 */ 368 dict2 = get_dict(ondisk->entry.obj_id, llid); 369 path1 = recover_path(dict2); 370 371 if (dict2->name == NULL) 372 dict2->name = name; 373 else 374 free(name); 375 376 /* 377 * Attach dict2 to its directory (dict), create the 378 * directory (dict) if necessary. We must ensure 379 * that the directory entry exists in order to be 380 * able to properly rename() the file without creating 381 * a namespace conflict. 382 */ 383 if ((dict2->flags & DICTF_PARENT) == 0) { 384 dict2->flags |= DICTF_PARENT; 385 dict2->parent = dict; 386 if ((dict->flags & DICTF_MADEDIR) == 0) { 387 dict->flags |= DICTF_MADEDIR; 388 path2 = recover_path(dict); 389 printf("mkdir %s\n", path2); 390 mkdir(path2, 0777); 391 free(path2); 392 path2 = NULL; 393 } 394 } 395 path2 = recover_path(dict2); 396 if (strcmp(path1, path2) != 0 && lstat(path1, &st) == 0) { 397 printf("Rename %s -> %s\n", path1, path2); 398 rename(path1, path2); 399 } 400 free(path1); 401 free(path2); 402 403 printf("dir %016jx:%05d entry %016jx \"%s\"\n", 404 (uintmax_t)leaf->base.obj_id, 405 llid, 406 (uintmax_t)ondisk->entry.obj_id, 407 name); 408 break; 409 default: 410 /* 411 * Ignore any other record types 412 */ 413 break; 414 } 415 done: 416 rel_buffer(data_buffer); 417 } 418 419 #define RD_HSIZE 32768 420 #define RD_HMASK (RD_HSIZE - 1) 421 422 struct recover_dict *RDHash[RD_HSIZE]; 423 424 static 425 struct recover_dict * 426 get_dict(int64_t obj_id, uint16_t llid) 427 { 428 struct recover_dict *dict; 429 int i; 430 431 if (obj_id == 0) 432 return(NULL); 433 434 i = crc32(&obj_id, sizeof(obj_id)) & RD_HMASK; 435 for (dict = RDHash[i]; dict; dict = dict->next) { 436 if (dict->obj_id == obj_id && 437 dict->llid == llid) { 438 break; 439 } 440 } 441 if (dict == NULL) { 442 dict = malloc(sizeof(*dict)); 443 bzero(dict, sizeof(*dict)); 444 dict->obj_id = obj_id; 445 dict->llid = llid; 446 dict->next = RDHash[i]; 447 dict->size = -1; 448 RDHash[i] = dict; 449 450 /* 451 * Always connect dangling dictionary entries to object 1 452 * (the root of the PFS). 453 * 454 * DICTF_PARENT will not be set until we know what the 455 * real parent directory object is. 456 */ 457 if (dict->obj_id != 1) 458 dict->parent = get_dict(1, llid); 459 } 460 return(dict); 461 } 462 463 struct path_info { 464 enum { PI_FIGURE, PI_LOAD } state; 465 uint16_t llid; 466 char *base; 467 char *next; 468 int len; 469 }; 470 471 static void recover_path_helper(struct recover_dict *, struct path_info *); 472 473 static 474 char * 475 recover_path(struct recover_dict *dict) 476 { 477 struct path_info info; 478 479 bzero(&info, sizeof(info)); 480 info.llid = dict->llid; 481 info.state = PI_FIGURE; 482 recover_path_helper(dict, &info); 483 info.base = malloc(info.len); 484 info.next = info.base; 485 info.state = PI_LOAD; 486 recover_path_helper(dict, &info); 487 488 return(info.base); 489 } 490 491 static 492 void 493 recover_path_helper(struct recover_dict *dict, struct path_info *info) 494 { 495 /* 496 * Calculate path element length 497 */ 498 dict->flags |= DICTF_TRAVERSED; 499 500 switch(info->state) { 501 case PI_FIGURE: 502 if (dict->obj_id == 1) 503 info->len += 8; 504 else if (dict->name) 505 info->len += strlen(dict->name); 506 else 507 info->len += 6 + 16; 508 ++info->len; 509 510 if (dict->parent && 511 (dict->parent->flags & DICTF_TRAVERSED) == 0) { 512 recover_path_helper(dict->parent, info); 513 } else { 514 info->len += strlen(TargetDir) + 1; 515 } 516 break; 517 case PI_LOAD: 518 if (dict->parent && 519 (dict->parent->flags & DICTF_TRAVERSED) == 0) { 520 recover_path_helper(dict->parent, info); 521 } else { 522 strcpy(info->next, TargetDir); 523 info->next += strlen(info->next); 524 } 525 526 *info->next++ = '/'; 527 if (dict->obj_id == 1) { 528 snprintf(info->next, 8+1, "PFS%05d", info->llid); 529 } else if (dict->name) { 530 strcpy(info->next, dict->name); 531 } else { 532 snprintf(info->next, 6+16+1, "obj_0x%016jx", 533 (uintmax_t)dict->obj_id); 534 } 535 info->next += strlen(info->next); 536 break; 537 } 538 dict->flags &= ~DICTF_TRAVERSED; 539 } 540 541 static 542 void 543 sanitize_string(char *str) 544 { 545 while (*str) { 546 if (!isprint(*str)) 547 *str = 'x'; 548 ++str; 549 } 550 } 551