xref: /dflybsd-src/sbin/hammer/cmd_recover.c (revision 8d378610e3b5687c707bc8aad4e11a3a96bea2fc)
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "hammer.h"
36 
37 struct recover_dict {
38 	struct recover_dict *next;
39 	struct recover_dict *parent;
40 	int64_t	obj_id;
41 	uint8_t obj_type;
42 	uint8_t flags;
43 	uint16_t pfs_id;
44 	int64_t	size;
45 	char	*name;
46 };
47 
48 #define DICTF_MADEDIR	0x01
49 #define DICTF_MADEFILE	0x02
50 #define DICTF_PARENT	0x04	/* parent attached for real */
51 #define DICTF_TRAVERSED	0x80
52 
53 static void recover_top(char *ptr, hammer_off_t offset);
54 static void recover_elm(hammer_btree_leaf_elm_t leaf);
55 static struct recover_dict *get_dict(int64_t obj_id, uint16_t pfs_id);
56 static char *recover_path(struct recover_dict *dict);
57 static void sanitize_string(char *str);
58 
59 static const char *TargetDir;
60 static int CachedFd = -1;
61 static char *CachedPath;
62 
63 /*
64  * XXX There is a hidden bug here while iterating zone-2 offset as
65  * shown in an example below.
66  *
67  * If a volume was once used as HAMMER filesystem which consists of
68  * multiple volumes whose usage has reached beyond the first volume,
69  * and then later re-formatted only using 1 volume, hammer recover is
70  * likely to hit assertion in get_buffer() due to having access to
71  * invalid volume (vol1,2,...) from old filesystem data.
72  *
73  * |-----vol0-----|-----vol1-----|-----vol2-----| old filesystem
74  * <-----------------------> used by old filesystem
75  *
76  * |-----vol0-----| new filesystem
77  * <-----> used by new filesystem
78  *        <-------> unused, invalid data from old filesystem
79  *              <-> B-Tree nodes likely to point to vol1
80  */
81 
82 void
83 hammer_cmd_recover(const char *target_dir)
84 {
85 	struct buffer_info *data_buffer;
86 	struct volume_info *volume;
87 	hammer_off_t off;
88 	hammer_off_t off_end;
89 	char *ptr;
90 	int i;
91 
92 	TargetDir = target_dir;
93 
94 	if (mkdir(TargetDir, 0777) == -1) {
95 		if (errno != EEXIST) {
96 			perror("mkdir");
97 			exit(1);
98 		}
99 	}
100 
101 	printf("Running raw scan of HAMMER image, recovering to %s\n",
102 		TargetDir);
103 
104 	data_buffer = NULL;
105 	for (i = 0; i < HAMMER_MAX_VOLUMES; i++) {
106 		volume = get_volume(i);
107 		if (volume == NULL)
108 			continue;
109 		printf("Scanning volume %d size %s\n",
110 			volume->vol_no, sizetostr(volume->size));
111 		off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
112 		off_end = off + HAMMER_VOL_BUF_SIZE(volume->ondisk);
113 		while (off < off_end) {
114 			ptr = get_buffer_data(off, &data_buffer, 0);
115 			if (ptr)
116 				recover_top(ptr, off);
117 			off += HAMMER_BUFSIZE;
118 		}
119 	}
120 	rel_buffer(data_buffer);
121 
122 	if (CachedPath) {
123 		free(CachedPath);
124 		close(CachedFd);
125 		CachedPath = NULL;
126 		CachedFd = -1;
127 	}
128 }
129 
130 /*
131  * Top level recovery processor.  Assume the data is a B-Tree node.
132  * If the CRC is good we attempt to process the node, building the
133  * object space and creating the dictionary as we go.
134  */
135 static void
136 recover_top(char *ptr, hammer_off_t offset)
137 {
138 	hammer_node_ondisk_t node;
139 	hammer_btree_elm_t elm;
140 	int maxcount;
141 	int i;
142 	int isnode;
143 	char buf[HAMMER_BTREE_LEAF_ELMS + 1];
144 
145 	for (node = (void *)ptr; (char *)node < ptr + HAMMER_BUFSIZE; ++node) {
146 		isnode = hammer_crc_test_btree(node);
147 		maxcount = hammer_node_max_elements(node->type);
148 
149 		if (DebugOpt) {
150 			for (i = 0; i < node->count && i < maxcount; ++i)
151 				buf[i] = hammer_elm_btype(&node->elms[i]);
152 			buf[i] = '\0';
153 			if (!isnode && DebugOpt > 1)
154 				printf("%016jx -\n", offset);
155 			if (isnode)
156 				printf("%016jx %c %d %s\n",
157 					offset, node->type, node->count, buf);
158 		}
159 		offset += sizeof(*node);
160 
161 		if (isnode && node->type == HAMMER_BTREE_TYPE_LEAF) {
162 			for (i = 0; i < node->count && i < maxcount; ++i) {
163 				elm = &node->elms[i];
164 				if (elm->base.btype != HAMMER_BTREE_TYPE_RECORD)
165 					continue;
166 				recover_elm(&elm->leaf);
167 			}
168 		}
169 	}
170 }
171 
172 static void
173 recover_elm(hammer_btree_leaf_elm_t leaf)
174 {
175 	struct buffer_info *data_buffer = NULL;
176 	struct recover_dict *dict;
177 	struct recover_dict *dict2;
178 	hammer_data_ondisk_t ondisk;
179 	hammer_off_t data_offset;
180 	struct stat st;
181 	int chunk;
182 	int len;
183 	int zfill;
184 	int64_t file_offset;
185 	uint16_t pfs_id;
186 	size_t nlen;
187 	int fd;
188 	char *name;
189 	char *path1;
190 	char *path2;
191 
192 	/*
193 	 * Ignore deleted records
194 	 */
195 	if (leaf->delete_ts)
196 		return;
197 	if ((data_offset = leaf->data_offset) != 0)
198 		ondisk = get_buffer_data(data_offset, &data_buffer, 0);
199 	else
200 		ondisk = NULL;
201 	if (ondisk == NULL)
202 		goto done;
203 
204 	len = leaf->data_len;
205 	chunk = HAMMER_BUFSIZE - ((int)data_offset & HAMMER_BUFMASK);
206 	if (chunk > len)
207 		chunk = len;
208 
209 	if (len < 0 || len > HAMMER_XBUFSIZE || len > chunk)
210 		goto done;
211 
212 	pfs_id = lo_to_pfs(leaf->base.localization);
213 
214 	dict = get_dict(leaf->base.obj_id, pfs_id);
215 
216 	switch(leaf->base.rec_type) {
217 	case HAMMER_RECTYPE_INODE:
218 		/*
219 		 * We found an inode which also tells us where the file
220 		 * or directory is in the directory hierarchy.
221 		 */
222 		if (VerboseOpt) {
223 			printf("file %016jx:%05d inode found\n",
224 				(uintmax_t)leaf->base.obj_id, pfs_id);
225 		}
226 		path1 = recover_path(dict);
227 
228 		/*
229 		 * Attach the inode to its parent.  This isn't strictly
230 		 * necessary because the information is also in the
231 		 * directory entries, but if we do not find the directory
232 		 * entry this ensures that the files will still be
233 		 * reasonably well organized in their proper directories.
234 		 */
235 		if ((dict->flags & DICTF_PARENT) == 0 &&
236 		    dict->obj_id != HAMMER_OBJID_ROOT &&
237 		    ondisk->inode.parent_obj_id != 0) {
238 			dict->flags |= DICTF_PARENT;
239 			dict->parent = get_dict(ondisk->inode.parent_obj_id,
240 						pfs_id);
241 			if (dict->parent &&
242 			    (dict->parent->flags & DICTF_MADEDIR) == 0) {
243 				dict->parent->flags |= DICTF_MADEDIR;
244 				path2 = recover_path(dict->parent);
245 				printf("mkdir %s\n", path2);
246 				mkdir(path2, 0777);
247 				free(path2);
248 				path2 = NULL;
249 			}
250 		}
251 		if (dict->obj_type == 0)
252 			dict->obj_type = ondisk->inode.obj_type;
253 		dict->size = ondisk->inode.size;
254 		path2 = recover_path(dict);
255 
256 		if (lstat(path1, &st) == 0) {
257 			if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
258 				truncate(path1, dict->size);
259 				/* chmod(path1, 0666); */
260 			}
261 			if (strcmp(path1, path2)) {
262 				printf("Rename %s -> %s\n", path1, path2);
263 				rename(path1, path2);
264 			}
265 		} else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
266 			printf("mkinode (file) %s\n", path2);
267 			fd = open(path2, O_RDWR|O_CREAT, 0666);
268 			if (fd > 0)
269 				close(fd);
270 		} else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_DIRECTORY) {
271 			printf("mkinode (dir) %s\n", path2);
272 			mkdir(path2, 0777);
273 			dict->flags |= DICTF_MADEDIR;
274 		}
275 		free(path1);
276 		free(path2);
277 		break;
278 	case HAMMER_RECTYPE_DATA:
279 		/*
280 		 * File record data
281 		 */
282 		if (leaf->base.obj_id == 0)
283 			break;
284 		if (VerboseOpt) {
285 			printf("file %016jx:%05d data %016jx,%d\n",
286 				(uintmax_t)leaf->base.obj_id,
287 				pfs_id,
288 				(uintmax_t)leaf->base.key - len,
289 				len);
290 		}
291 
292 		/*
293 		 * Update the dictionary entry
294 		 */
295 		if (dict->obj_type == 0)
296 			dict->obj_type = HAMMER_OBJTYPE_REGFILE;
297 
298 		/*
299 		 * If the parent directory has not been created we
300 		 * have to create it (typically a PFS%05d)
301 		 */
302 		if (dict->parent &&
303 		    (dict->parent->flags & DICTF_MADEDIR) == 0) {
304 			dict->parent->flags |= DICTF_MADEDIR;
305 			path2 = recover_path(dict->parent);
306 			printf("mkdir %s\n", path2);
307 			mkdir(path2, 0777);
308 			free(path2);
309 			path2 = NULL;
310 		}
311 
312 		/*
313 		 * Create the file if necessary, report file creations
314 		 */
315 		path1 = recover_path(dict);
316 		if (CachedPath && strcmp(CachedPath, path1) == 0) {
317 			fd = CachedFd;
318 		} else {
319 			fd = open(path1, O_CREAT|O_RDWR, 0666);
320 		}
321 		if (fd < 0) {
322 			printf("Unable to create %s: %s\n",
323 				path1, strerror(errno));
324 			free(path1);
325 			break;
326 		}
327 		if ((dict->flags & DICTF_MADEFILE) == 0) {
328 			dict->flags |= DICTF_MADEFILE;
329 			printf("mkfile %s\n", path1);
330 		}
331 
332 		/*
333 		 * And write the record.  A HAMMER data block is aligned
334 		 * and may contain trailing zeros after the file EOF.  The
335 		 * inode record is required to get the actual file size.
336 		 *
337 		 * However, when the inode record is not available
338 		 * we can do a sparse write and that will get it right
339 		 * most of the time even if the inode record is never
340 		 * found.
341 		 */
342 		file_offset = (int64_t)leaf->base.key - len;
343 		lseek(fd, (off_t)file_offset, SEEK_SET);
344 		while (len) {
345 			if (dict->size == -1) {
346 				for (zfill = chunk - 1; zfill >= 0; --zfill) {
347 					if (((char *)ondisk)[zfill])
348 						break;
349 				}
350 				++zfill;
351 			} else {
352 				zfill = chunk;
353 			}
354 
355 			if (zfill)
356 				write(fd, ondisk, zfill);
357 			if (zfill < chunk)
358 				lseek(fd, chunk - zfill, SEEK_CUR);
359 
360 			len -= chunk;
361 			data_offset += chunk;
362 			file_offset += chunk;
363 			ondisk = get_buffer_data(data_offset, &data_buffer, 0);
364 			if (ondisk == NULL)
365 				break;
366 			chunk = HAMMER_BUFSIZE -
367 				((int)data_offset & HAMMER_BUFMASK);
368 			if (chunk > len)
369 				chunk = len;
370 		}
371 		if (dict->size >= 0 && file_offset > dict->size) {
372 			ftruncate(fd, dict->size);
373 			/* fchmod(fd, 0666); */
374 		}
375 
376 		if (fd == CachedFd) {
377 			free(path1);
378 		} else if (CachedPath) {
379 			free(CachedPath);
380 			close(CachedFd);
381 			CachedPath = path1;
382 			CachedFd = fd;
383 		} else {
384 			CachedPath = path1;
385 			CachedFd = fd;
386 		}
387 		break;
388 	case HAMMER_RECTYPE_DIRENTRY:
389 		nlen = len - HAMMER_ENTRY_NAME_OFF;
390 		if ((int)nlen < 0)	/* illegal length */
391 			break;
392 		if (ondisk->entry.obj_id == 0 ||
393 		    ondisk->entry.obj_id == HAMMER_OBJID_ROOT)
394 			break;
395 		name = malloc(nlen + 1);
396 		bcopy(ondisk->entry.name, name, nlen);
397 		name[nlen] = 0;
398 		sanitize_string(name);
399 
400 		/*
401 		 * We can't deal with hardlinks so if the object already
402 		 * has a name assigned to it we just keep using that name.
403 		 */
404 		dict2 = get_dict(ondisk->entry.obj_id, pfs_id);
405 		path1 = recover_path(dict2);
406 
407 		if (dict2->name == NULL)
408 			dict2->name = name;
409 		else
410 			free(name);
411 
412 		/*
413 		 * Attach dict2 to its directory (dict), create the
414 		 * directory (dict) if necessary.  We must ensure
415 		 * that the directory entry exists in order to be
416 		 * able to properly rename() the file without creating
417 		 * a namespace conflict.
418 		 */
419 		if ((dict2->flags & DICTF_PARENT) == 0) {
420 			dict2->flags |= DICTF_PARENT;
421 			dict2->parent = dict;
422 			if ((dict->flags & DICTF_MADEDIR) == 0) {
423 				dict->flags |= DICTF_MADEDIR;
424 				path2 = recover_path(dict);
425 				printf("mkdir %s\n", path2);
426 				mkdir(path2, 0777);
427 				free(path2);
428 				path2 = NULL;
429 			}
430 		}
431 		path2 = recover_path(dict2);
432 		if (strcmp(path1, path2) != 0 && lstat(path1, &st) == 0) {
433 			printf("Rename %s -> %s\n", path1, path2);
434 			rename(path1, path2);
435 		}
436 		free(path1);
437 		free(path2);
438 
439 		printf("dir  %016jx:%05d entry %016jx \"%s\"\n",
440 			(uintmax_t)leaf->base.obj_id,
441 			pfs_id,
442 			(uintmax_t)ondisk->entry.obj_id,
443 			name);
444 		break;
445 	default:
446 		/*
447 		 * Ignore any other record types
448 		 */
449 		break;
450 	}
451 done:
452 	rel_buffer(data_buffer);
453 }
454 
455 #define RD_HSIZE	32768
456 #define RD_HMASK	(RD_HSIZE - 1)
457 
458 struct recover_dict *RDHash[RD_HSIZE];
459 
460 static
461 struct recover_dict *
462 get_dict(int64_t obj_id, uint16_t pfs_id)
463 {
464 	struct recover_dict *dict;
465 	int i;
466 
467 	if (obj_id == 0)
468 		return(NULL);
469 
470 	i = crc32(&obj_id, sizeof(obj_id)) & RD_HMASK;
471 	for (dict = RDHash[i]; dict; dict = dict->next) {
472 		if (dict->obj_id == obj_id &&
473 		    dict->pfs_id == pfs_id) {
474 			break;
475 		}
476 	}
477 	if (dict == NULL) {
478 		dict = malloc(sizeof(*dict));
479 		bzero(dict, sizeof(*dict));
480 		dict->obj_id = obj_id;
481 		dict->pfs_id = pfs_id;
482 		dict->next = RDHash[i];
483 		dict->size = -1;
484 		RDHash[i] = dict;
485 
486 		/*
487 		 * Always connect dangling dictionary entries to object 1
488 		 * (the root of the PFS).
489 		 *
490 		 * DICTF_PARENT will not be set until we know what the
491 		 * real parent directory object is.
492 		 */
493 		if (dict->obj_id != HAMMER_OBJID_ROOT)
494 			dict->parent = get_dict(1, pfs_id);
495 	}
496 	return(dict);
497 }
498 
499 struct path_info {
500 	enum { PI_FIGURE, PI_LOAD } state;
501 	uint16_t pfs_id;
502 	char *base;
503 	char *next;
504 	int len;
505 };
506 
507 static void recover_path_helper(struct recover_dict *, struct path_info *);
508 
509 static
510 char *
511 recover_path(struct recover_dict *dict)
512 {
513 	struct path_info info;
514 
515 	bzero(&info, sizeof(info));
516 	info.pfs_id = dict->pfs_id;
517 	info.state = PI_FIGURE;
518 	recover_path_helper(dict, &info);
519 	info.base = malloc(info.len);
520 	info.next = info.base;
521 	info.state = PI_LOAD;
522 	recover_path_helper(dict, &info);
523 
524 	return(info.base);
525 }
526 
527 static
528 void
529 recover_path_helper(struct recover_dict *dict, struct path_info *info)
530 {
531 	/*
532 	 * Calculate path element length
533 	 */
534 	dict->flags |= DICTF_TRAVERSED;
535 
536 	switch(info->state) {
537 	case PI_FIGURE:
538 		if (dict->obj_id == HAMMER_OBJID_ROOT)
539 			info->len += 8;
540 		else if (dict->name)
541 			info->len += strlen(dict->name);
542 		else
543 			info->len += 6 + 16;
544 		++info->len;
545 
546 		if (dict->parent &&
547 		    (dict->parent->flags & DICTF_TRAVERSED) == 0) {
548 			recover_path_helper(dict->parent, info);
549 		} else {
550 			info->len += strlen(TargetDir) + 1;
551 		}
552 		break;
553 	case PI_LOAD:
554 		if (dict->parent &&
555 		    (dict->parent->flags & DICTF_TRAVERSED) == 0) {
556 			recover_path_helper(dict->parent, info);
557 		} else {
558 			strcpy(info->next, TargetDir);
559 			info->next += strlen(info->next);
560 		}
561 
562 		*info->next++ = '/';
563 		if (dict->obj_id == HAMMER_OBJID_ROOT) {
564 			snprintf(info->next, 8+1, "PFS%05d", info->pfs_id);
565 		} else if (dict->name) {
566 			strcpy(info->next, dict->name);
567 		} else {
568 			snprintf(info->next, 6+16+1, "obj_0x%016jx",
569 				(uintmax_t)dict->obj_id);
570 		}
571 		info->next += strlen(info->next);
572 		break;
573 	}
574 	dict->flags &= ~DICTF_TRAVERSED;
575 }
576 
577 static
578 void
579 sanitize_string(char *str)
580 {
581 	while (*str) {
582 		if (!isprint(*str))
583 			*str = 'x';
584 		++str;
585 	}
586 }
587