xref: /dflybsd-src/sbin/hammer/cmd_recover.c (revision f603807b2c8b9b8ca8a7a99e36eeb70cd39b460d)
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "hammer.h"
36 
37 struct recover_dict {
38 	struct recover_dict *next;
39 	struct recover_dict *parent;
40 	int64_t	obj_id;
41 	uint8_t obj_type;
42 	uint8_t flags;
43 	uint16_t pfs_id;
44 	int64_t	size;
45 	char	*name;
46 };
47 
48 #define DICTF_MADEDIR	0x01
49 #define DICTF_MADEFILE	0x02
50 #define DICTF_PARENT	0x04	/* parent attached for real */
51 #define DICTF_TRAVERSED	0x80
52 
53 static void recover_top(char *ptr, hammer_off_t offset);
54 static void recover_elm(hammer_btree_leaf_elm_t leaf);
55 static struct recover_dict *get_dict(int64_t obj_id, uint16_t pfs_id);
56 static char *recover_path(struct recover_dict *dict);
57 static void sanitize_string(char *str);
58 
59 static const char *TargetDir;
60 static int CachedFd = -1;
61 static char *CachedPath;
62 
63 /*
64  * XXX There is a hidden bug here while iterating zone-2 offset as
65  * shown in an example below.
66  *
67  * If a volume was once used as HAMMER filesystem which consists of
68  * multiple volumes whose usage has reached beyond the first volume,
69  * and then later re-formatted only using 1 volume, hammer recover is
70  * likely to hit assertion in get_buffer() due to having access to
71  * invalid volume (vol1,2,...) from old filesystem data.
72  *
73  * |-----vol0-----|-----vol1-----|-----vol2-----| old filesystem
74  * <-----------------------> used by old filesystem
75  *
76  * |-----vol0-----| new filesystem
77  * <-----> used by new filesystem
78  *        <-------> unused, invalid data from old filesystem
79  *              <-> B-Tree nodes likely to point to vol1
80  */
81 
82 void
83 hammer_cmd_recover(const char *target_dir)
84 {
85 	struct buffer_info *data_buffer;
86 	struct volume_info *volume;
87 	hammer_off_t off;
88 	hammer_off_t off_end;
89 	char *ptr;
90 	int i;
91 
92 	TargetDir = target_dir;
93 
94 	if (mkdir(TargetDir, 0777) == -1) {
95 		if (errno != EEXIST) {
96 			perror("mkdir");
97 			exit(1);
98 		}
99 	}
100 
101 	printf("Running raw scan of HAMMER image, recovering to %s\n",
102 		TargetDir);
103 
104 	data_buffer = NULL;
105 	for (i = 0; i < HAMMER_MAX_VOLUMES; i++) {
106 		volume = get_volume(i);
107 		if (volume == NULL)
108 			continue;
109 		printf("Scanning volume %d size %s\n",
110 			volume->vol_no, sizetostr(volume->size));
111 		off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
112 		off_end = off + HAMMER_VOL_BUF_SIZE(volume->ondisk);
113 
114 		/*
115 		 * It should somehow implement reverse mapping from
116 		 * zone-2 to zone-X, so the command doesn't just try
117 		 * every zone-2 offset assuming it's a B-Tree node.
118 		 * Since we know most big-blocks are not for zone-8,
119 		 * we don't want to spend extra I/O for other zones
120 		 * as well as possible misinterpretation of nodes.
121 		 */
122 		while (off < off_end) {
123 			ptr = get_buffer_data(off, &data_buffer, 0);
124 			if (ptr)
125 				recover_top(ptr, off);
126 			off += HAMMER_BUFSIZE;
127 		}
128 	}
129 	rel_buffer(data_buffer);
130 
131 	if (CachedPath) {
132 		free(CachedPath);
133 		close(CachedFd);
134 		CachedPath = NULL;
135 		CachedFd = -1;
136 	}
137 }
138 
139 /*
140  * Top level recovery processor.  Assume the data is a B-Tree node.
141  * If the CRC is good we attempt to process the node, building the
142  * object space and creating the dictionary as we go.
143  */
144 static void
145 recover_top(char *ptr, hammer_off_t offset)
146 {
147 	hammer_node_ondisk_t node;
148 	hammer_btree_elm_t elm;
149 	int maxcount;
150 	int i;
151 	int isnode;
152 	char buf[HAMMER_BTREE_LEAF_ELMS + 1];
153 
154 	for (node = (void *)ptr; (char *)node < ptr + HAMMER_BUFSIZE; ++node) {
155 		isnode = hammer_crc_test_btree(node);
156 		maxcount = hammer_node_max_elements(node->type);
157 
158 		if (DebugOpt) {
159 			for (i = 0; i < node->count && i < maxcount; ++i)
160 				buf[i] = hammer_elm_btype(&node->elms[i]);
161 			buf[i] = '\0';
162 			if (!isnode && DebugOpt > 1)
163 				printf("%016jx -\n", offset);
164 			if (isnode)
165 				printf("%016jx %c %d %s\n",
166 					offset, node->type, node->count, buf);
167 		}
168 		offset += sizeof(*node);
169 
170 		if (isnode && node->type == HAMMER_BTREE_TYPE_LEAF) {
171 			for (i = 0; i < node->count && i < maxcount; ++i) {
172 				elm = &node->elms[i];
173 				if (elm->base.btype != HAMMER_BTREE_TYPE_RECORD)
174 					continue;
175 				recover_elm(&elm->leaf);
176 			}
177 		}
178 	}
179 }
180 
181 static void
182 recover_elm(hammer_btree_leaf_elm_t leaf)
183 {
184 	struct buffer_info *data_buffer = NULL;
185 	struct recover_dict *dict;
186 	struct recover_dict *dict2;
187 	hammer_data_ondisk_t ondisk;
188 	hammer_off_t data_offset;
189 	struct stat st;
190 	int chunk;
191 	int len;
192 	int zfill;
193 	int64_t file_offset;
194 	uint16_t pfs_id;
195 	size_t nlen;
196 	int fd;
197 	char *name;
198 	char *path1;
199 	char *path2;
200 
201 	/*
202 	 * Ignore deleted records
203 	 */
204 	if (leaf->delete_ts)
205 		return;
206 	if ((data_offset = leaf->data_offset) != 0)
207 		ondisk = get_buffer_data(data_offset, &data_buffer, 0);
208 	else
209 		ondisk = NULL;
210 	if (ondisk == NULL)
211 		goto done;
212 
213 	len = leaf->data_len;
214 	chunk = HAMMER_BUFSIZE - ((int)data_offset & HAMMER_BUFMASK);
215 	if (chunk > len)
216 		chunk = len;
217 
218 	if (len < 0 || len > HAMMER_XBUFSIZE || len > chunk)
219 		goto done;
220 
221 	pfs_id = lo_to_pfs(leaf->base.localization);
222 
223 	dict = get_dict(leaf->base.obj_id, pfs_id);
224 
225 	switch(leaf->base.rec_type) {
226 	case HAMMER_RECTYPE_INODE:
227 		/*
228 		 * We found an inode which also tells us where the file
229 		 * or directory is in the directory hierarchy.
230 		 */
231 		if (VerboseOpt) {
232 			printf("file %016jx:%05d inode found\n",
233 				(uintmax_t)leaf->base.obj_id, pfs_id);
234 		}
235 		path1 = recover_path(dict);
236 
237 		/*
238 		 * Attach the inode to its parent.  This isn't strictly
239 		 * necessary because the information is also in the
240 		 * directory entries, but if we do not find the directory
241 		 * entry this ensures that the files will still be
242 		 * reasonably well organized in their proper directories.
243 		 */
244 		if ((dict->flags & DICTF_PARENT) == 0 &&
245 		    dict->obj_id != HAMMER_OBJID_ROOT &&
246 		    ondisk->inode.parent_obj_id != 0) {
247 			dict->flags |= DICTF_PARENT;
248 			dict->parent = get_dict(ondisk->inode.parent_obj_id,
249 						pfs_id);
250 			if (dict->parent &&
251 			    (dict->parent->flags & DICTF_MADEDIR) == 0) {
252 				dict->parent->flags |= DICTF_MADEDIR;
253 				path2 = recover_path(dict->parent);
254 				printf("mkdir %s\n", path2);
255 				mkdir(path2, 0777);
256 				free(path2);
257 				path2 = NULL;
258 			}
259 		}
260 		if (dict->obj_type == 0)
261 			dict->obj_type = ondisk->inode.obj_type;
262 		dict->size = ondisk->inode.size;
263 		path2 = recover_path(dict);
264 
265 		if (lstat(path1, &st) == 0) {
266 			if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
267 				truncate(path1, dict->size);
268 				/* chmod(path1, 0666); */
269 			}
270 			if (strcmp(path1, path2)) {
271 				printf("Rename %s -> %s\n", path1, path2);
272 				rename(path1, path2);
273 			}
274 		} else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
275 			printf("mkinode (file) %s\n", path2);
276 			fd = open(path2, O_RDWR|O_CREAT, 0666);
277 			if (fd > 0)
278 				close(fd);
279 		} else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_DIRECTORY) {
280 			printf("mkinode (dir) %s\n", path2);
281 			mkdir(path2, 0777);
282 			dict->flags |= DICTF_MADEDIR;
283 		}
284 		free(path1);
285 		free(path2);
286 		break;
287 	case HAMMER_RECTYPE_DATA:
288 		/*
289 		 * File record data
290 		 */
291 		if (leaf->base.obj_id == 0)
292 			break;
293 		if (VerboseOpt) {
294 			printf("file %016jx:%05d data %016jx,%d\n",
295 				(uintmax_t)leaf->base.obj_id,
296 				pfs_id,
297 				(uintmax_t)leaf->base.key - len,
298 				len);
299 		}
300 
301 		/*
302 		 * Update the dictionary entry
303 		 */
304 		if (dict->obj_type == 0)
305 			dict->obj_type = HAMMER_OBJTYPE_REGFILE;
306 
307 		/*
308 		 * If the parent directory has not been created we
309 		 * have to create it (typically a PFS%05d)
310 		 */
311 		if (dict->parent &&
312 		    (dict->parent->flags & DICTF_MADEDIR) == 0) {
313 			dict->parent->flags |= DICTF_MADEDIR;
314 			path2 = recover_path(dict->parent);
315 			printf("mkdir %s\n", path2);
316 			mkdir(path2, 0777);
317 			free(path2);
318 			path2 = NULL;
319 		}
320 
321 		/*
322 		 * Create the file if necessary, report file creations
323 		 */
324 		path1 = recover_path(dict);
325 		if (CachedPath && strcmp(CachedPath, path1) == 0) {
326 			fd = CachedFd;
327 		} else {
328 			fd = open(path1, O_CREAT|O_RDWR, 0666);
329 		}
330 		if (fd < 0) {
331 			printf("Unable to create %s: %s\n",
332 				path1, strerror(errno));
333 			free(path1);
334 			break;
335 		}
336 		if ((dict->flags & DICTF_MADEFILE) == 0) {
337 			dict->flags |= DICTF_MADEFILE;
338 			printf("mkfile %s\n", path1);
339 		}
340 
341 		/*
342 		 * And write the record.  A HAMMER data block is aligned
343 		 * and may contain trailing zeros after the file EOF.  The
344 		 * inode record is required to get the actual file size.
345 		 *
346 		 * However, when the inode record is not available
347 		 * we can do a sparse write and that will get it right
348 		 * most of the time even if the inode record is never
349 		 * found.
350 		 */
351 		file_offset = (int64_t)leaf->base.key - len;
352 		lseek(fd, (off_t)file_offset, SEEK_SET);
353 		while (len) {
354 			if (dict->size == -1) {
355 				for (zfill = chunk - 1; zfill >= 0; --zfill) {
356 					if (((char *)ondisk)[zfill])
357 						break;
358 				}
359 				++zfill;
360 			} else {
361 				zfill = chunk;
362 			}
363 
364 			if (zfill)
365 				write(fd, ondisk, zfill);
366 			if (zfill < chunk)
367 				lseek(fd, chunk - zfill, SEEK_CUR);
368 
369 			len -= chunk;
370 			data_offset += chunk;
371 			file_offset += chunk;
372 			ondisk = get_buffer_data(data_offset, &data_buffer, 0);
373 			if (ondisk == NULL)
374 				break;
375 			chunk = HAMMER_BUFSIZE -
376 				((int)data_offset & HAMMER_BUFMASK);
377 			if (chunk > len)
378 				chunk = len;
379 		}
380 		if (dict->size >= 0 && file_offset > dict->size) {
381 			ftruncate(fd, dict->size);
382 			/* fchmod(fd, 0666); */
383 		}
384 
385 		if (fd == CachedFd) {
386 			free(path1);
387 		} else if (CachedPath) {
388 			free(CachedPath);
389 			close(CachedFd);
390 			CachedPath = path1;
391 			CachedFd = fd;
392 		} else {
393 			CachedPath = path1;
394 			CachedFd = fd;
395 		}
396 		break;
397 	case HAMMER_RECTYPE_DIRENTRY:
398 		nlen = len - HAMMER_ENTRY_NAME_OFF;
399 		if ((int)nlen < 0)	/* illegal length */
400 			break;
401 		if (ondisk->entry.obj_id == 0 ||
402 		    ondisk->entry.obj_id == HAMMER_OBJID_ROOT)
403 			break;
404 		name = malloc(nlen + 1);
405 		bcopy(ondisk->entry.name, name, nlen);
406 		name[nlen] = 0;
407 		sanitize_string(name);
408 
409 		/*
410 		 * We can't deal with hardlinks so if the object already
411 		 * has a name assigned to it we just keep using that name.
412 		 */
413 		dict2 = get_dict(ondisk->entry.obj_id, pfs_id);
414 		path1 = recover_path(dict2);
415 
416 		if (dict2->name == NULL)
417 			dict2->name = name;
418 		else
419 			free(name);
420 
421 		/*
422 		 * Attach dict2 to its directory (dict), create the
423 		 * directory (dict) if necessary.  We must ensure
424 		 * that the directory entry exists in order to be
425 		 * able to properly rename() the file without creating
426 		 * a namespace conflict.
427 		 */
428 		if ((dict2->flags & DICTF_PARENT) == 0) {
429 			dict2->flags |= DICTF_PARENT;
430 			dict2->parent = dict;
431 			if ((dict->flags & DICTF_MADEDIR) == 0) {
432 				dict->flags |= DICTF_MADEDIR;
433 				path2 = recover_path(dict);
434 				printf("mkdir %s\n", path2);
435 				mkdir(path2, 0777);
436 				free(path2);
437 				path2 = NULL;
438 			}
439 		}
440 		path2 = recover_path(dict2);
441 		if (strcmp(path1, path2) != 0 && lstat(path1, &st) == 0) {
442 			printf("Rename %s -> %s\n", path1, path2);
443 			rename(path1, path2);
444 		}
445 		free(path1);
446 		free(path2);
447 
448 		printf("dir  %016jx:%05d entry %016jx \"%s\"\n",
449 			(uintmax_t)leaf->base.obj_id,
450 			pfs_id,
451 			(uintmax_t)ondisk->entry.obj_id,
452 			name);
453 		break;
454 	default:
455 		/*
456 		 * Ignore any other record types
457 		 */
458 		break;
459 	}
460 done:
461 	rel_buffer(data_buffer);
462 }
463 
464 #define RD_HSIZE	32768
465 #define RD_HMASK	(RD_HSIZE - 1)
466 
467 struct recover_dict *RDHash[RD_HSIZE];
468 
469 static
470 struct recover_dict *
471 get_dict(int64_t obj_id, uint16_t pfs_id)
472 {
473 	struct recover_dict *dict;
474 	int i;
475 
476 	if (obj_id == 0)
477 		return(NULL);
478 
479 	i = crc32(&obj_id, sizeof(obj_id)) & RD_HMASK;
480 	for (dict = RDHash[i]; dict; dict = dict->next) {
481 		if (dict->obj_id == obj_id &&
482 		    dict->pfs_id == pfs_id) {
483 			break;
484 		}
485 	}
486 	if (dict == NULL) {
487 		dict = malloc(sizeof(*dict));
488 		bzero(dict, sizeof(*dict));
489 		dict->obj_id = obj_id;
490 		dict->pfs_id = pfs_id;
491 		dict->next = RDHash[i];
492 		dict->size = -1;
493 		RDHash[i] = dict;
494 
495 		/*
496 		 * Always connect dangling dictionary entries to object 1
497 		 * (the root of the PFS).
498 		 *
499 		 * DICTF_PARENT will not be set until we know what the
500 		 * real parent directory object is.
501 		 */
502 		if (dict->obj_id != HAMMER_OBJID_ROOT)
503 			dict->parent = get_dict(1, pfs_id);
504 	}
505 	return(dict);
506 }
507 
508 struct path_info {
509 	enum { PI_FIGURE, PI_LOAD } state;
510 	uint16_t pfs_id;
511 	char *base;
512 	char *next;
513 	int len;
514 };
515 
516 static void recover_path_helper(struct recover_dict *, struct path_info *);
517 
518 static
519 char *
520 recover_path(struct recover_dict *dict)
521 {
522 	struct path_info info;
523 
524 	bzero(&info, sizeof(info));
525 	info.pfs_id = dict->pfs_id;
526 	info.state = PI_FIGURE;
527 	recover_path_helper(dict, &info);
528 	info.base = malloc(info.len);
529 	info.next = info.base;
530 	info.state = PI_LOAD;
531 	recover_path_helper(dict, &info);
532 
533 	return(info.base);
534 }
535 
536 static
537 void
538 recover_path_helper(struct recover_dict *dict, struct path_info *info)
539 {
540 	/*
541 	 * Calculate path element length
542 	 */
543 	dict->flags |= DICTF_TRAVERSED;
544 
545 	switch(info->state) {
546 	case PI_FIGURE:
547 		if (dict->obj_id == HAMMER_OBJID_ROOT)
548 			info->len += 8;
549 		else if (dict->name)
550 			info->len += strlen(dict->name);
551 		else
552 			info->len += 6 + 16;
553 		++info->len;
554 
555 		if (dict->parent &&
556 		    (dict->parent->flags & DICTF_TRAVERSED) == 0) {
557 			recover_path_helper(dict->parent, info);
558 		} else {
559 			info->len += strlen(TargetDir) + 1;
560 		}
561 		break;
562 	case PI_LOAD:
563 		if (dict->parent &&
564 		    (dict->parent->flags & DICTF_TRAVERSED) == 0) {
565 			recover_path_helper(dict->parent, info);
566 		} else {
567 			strcpy(info->next, TargetDir);
568 			info->next += strlen(info->next);
569 		}
570 
571 		*info->next++ = '/';
572 		if (dict->obj_id == HAMMER_OBJID_ROOT) {
573 			snprintf(info->next, 8+1, "PFS%05d", info->pfs_id);
574 		} else if (dict->name) {
575 			strcpy(info->next, dict->name);
576 		} else {
577 			snprintf(info->next, 6+16+1, "obj_0x%016jx",
578 				(uintmax_t)dict->obj_id);
579 		}
580 		info->next += strlen(info->next);
581 		break;
582 	}
583 	dict->flags &= ~DICTF_TRAVERSED;
584 }
585 
586 static
587 void
588 sanitize_string(char *str)
589 {
590 	while (*str) {
591 		if (!isprint(*str))
592 			*str = 'x';
593 		++str;
594 	}
595 }
596