xref: /dflybsd-src/sbin/hammer/cmd_recover.c (revision d0ce80dc6b5a41da926e902cb8b6a31c874b12bc)
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "hammer.h"
36 
37 struct recover_dict {
38 	struct recover_dict *next;
39 	struct recover_dict *parent;
40 	int64_t	obj_id;
41 	uint8_t obj_type;
42 	uint8_t flags;
43 	uint16_t pfs_id;
44 	int64_t	size;
45 	char	*name;
46 };
47 
48 #define DICTF_MADEDIR	0x01
49 #define DICTF_MADEFILE	0x02
50 #define DICTF_PARENT	0x04	/* parent attached for real */
51 #define DICTF_TRAVERSED	0x80
52 
53 static void recover_top(char *ptr, hammer_off_t offset);
54 static void recover_elm(hammer_btree_leaf_elm_t leaf);
55 static struct recover_dict *get_dict(int64_t obj_id, uint16_t pfs_id);
56 static char *recover_path(struct recover_dict *dict);
57 static void sanitize_string(char *str);
58 
59 static const char *TargetDir;
60 static int CachedFd = -1;
61 static char *CachedPath;
62 
63 void
64 hammer_cmd_recover(const char *target_dir)
65 {
66 	struct buffer_info *data_buffer;
67 	struct volume_info *volume;
68 	hammer_off_t off;
69 	hammer_off_t off_end;
70 	char *ptr;
71 
72 	TargetDir = target_dir;
73 
74 	if (mkdir(TargetDir, 0777) == -1) {
75 		if (errno != EEXIST) {
76 			perror("mkdir");
77 			exit(1);
78 		}
79 	}
80 
81 	printf("Running raw scan of HAMMER image, recovering to %s\n",
82 		TargetDir);
83 
84 	data_buffer = NULL;
85 	TAILQ_FOREACH(volume, &VolList, entry) {
86 		check_volume(volume);
87 		printf("Scanning volume %d size %s\n",
88 			volume->vol_no, sizetostr(volume->size));
89 		off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
90 		off_end = off + HAMMER_VOL_BUF_SIZE(volume->ondisk);
91 		while (off < off_end) {
92 			ptr = get_buffer_data(off, &data_buffer, 0);
93 			if (ptr)
94 				recover_top(ptr, off);
95 			off += HAMMER_BUFSIZE;
96 		}
97 	}
98 	rel_buffer(data_buffer);
99 
100 	if (CachedPath) {
101 		free(CachedPath);
102 		close(CachedFd);
103 		CachedPath = NULL;
104 		CachedFd = -1;
105 	}
106 }
107 
108 /*
109  * Top level recovery processor.  Assume the data is a B-Tree node.
110  * If the CRC is good we attempt to process the node, building the
111  * object space and creating the dictionary as we go.
112  */
113 static void
114 recover_top(char *ptr, hammer_off_t offset)
115 {
116 	struct hammer_node_ondisk *node;
117 	hammer_btree_elm_t elm;
118 	int maxcount;
119 	int i;
120 	int isnode;
121 	char buf[HAMMER_BTREE_LEAF_ELMS + 1];
122 
123 	for (node = (void *)ptr; (char *)node < ptr + HAMMER_BUFSIZE; ++node) {
124 		isnode = (crc32(&node->crc + 1, HAMMER_BTREE_CRCSIZE) == node->crc);
125 		maxcount = hammer_node_max_elements(node->type);
126 
127 		if (DebugOpt) {
128 			for (i = 0; i < node->count && i < maxcount; ++i)
129 				buf[i] = hammer_elm_btype(&node->elms[i]);
130 			buf[i] = '\0';
131 			if (!isnode && DebugOpt > 1)
132 				printf("%016jx -\n", offset);
133 			if (isnode)
134 				printf("%016jx %c %d %s\n",
135 					offset, node->type, node->count, buf);
136 		}
137 		offset += sizeof(*node);
138 
139 		if (isnode && node->type == HAMMER_BTREE_TYPE_LEAF) {
140 			for (i = 0; i < node->count && i < maxcount; ++i) {
141 				elm = &node->elms[i];
142 				if (elm->base.btype != HAMMER_BTREE_TYPE_RECORD)
143 					continue;
144 				recover_elm(&elm->leaf);
145 			}
146 		}
147 	}
148 }
149 
150 static void
151 recover_elm(hammer_btree_leaf_elm_t leaf)
152 {
153 	struct buffer_info *data_buffer = NULL;
154 	struct recover_dict *dict;
155 	struct recover_dict *dict2;
156 	hammer_data_ondisk_t ondisk;
157 	hammer_off_t data_offset;
158 	struct stat st;
159 	int chunk;
160 	int len;
161 	int zfill;
162 	int64_t file_offset;
163 	uint16_t pfs_id;
164 	size_t nlen;
165 	int fd;
166 	char *name;
167 	char *path1;
168 	char *path2;
169 
170 	/*
171 	 * Ignore deleted records
172 	 */
173 	if (leaf->delete_ts)
174 		return;
175 	if ((data_offset = leaf->data_offset) != 0)
176 		ondisk = get_buffer_data(data_offset, &data_buffer, 0);
177 	else
178 		ondisk = NULL;
179 	if (ondisk == NULL)
180 		goto done;
181 
182 	len = leaf->data_len;
183 	chunk = HAMMER_BUFSIZE - ((int)data_offset & HAMMER_BUFMASK);
184 	if (chunk > len)
185 		chunk = len;
186 
187 	if (len < 0 || len > HAMMER_XBUFSIZE || len > chunk)
188 		goto done;
189 
190 	pfs_id = lo_to_pfs(leaf->base.localization);
191 
192 	dict = get_dict(leaf->base.obj_id, pfs_id);
193 
194 	switch(leaf->base.rec_type) {
195 	case HAMMER_RECTYPE_INODE:
196 		/*
197 		 * We found an inode which also tells us where the file
198 		 * or directory is in the directory hierarchy.
199 		 */
200 		if (VerboseOpt) {
201 			printf("file %016jx:%05d inode found\n",
202 				(uintmax_t)leaf->base.obj_id, pfs_id);
203 		}
204 		path1 = recover_path(dict);
205 
206 		/*
207 		 * Attach the inode to its parent.  This isn't strictly
208 		 * necessary because the information is also in the
209 		 * directory entries, but if we do not find the directory
210 		 * entry this ensures that the files will still be
211 		 * reasonably well organized in their proper directories.
212 		 */
213 		if ((dict->flags & DICTF_PARENT) == 0 &&
214 		    dict->obj_id != HAMMER_OBJID_ROOT &&
215 		    ondisk->inode.parent_obj_id != 0) {
216 			dict->flags |= DICTF_PARENT;
217 			dict->parent = get_dict(ondisk->inode.parent_obj_id,
218 						pfs_id);
219 			if (dict->parent &&
220 			    (dict->parent->flags & DICTF_MADEDIR) == 0) {
221 				dict->parent->flags |= DICTF_MADEDIR;
222 				path2 = recover_path(dict->parent);
223 				printf("mkdir %s\n", path2);
224 				mkdir(path2, 0777);
225 				free(path2);
226 				path2 = NULL;
227 			}
228 		}
229 		if (dict->obj_type == 0)
230 			dict->obj_type = ondisk->inode.obj_type;
231 		dict->size = ondisk->inode.size;
232 		path2 = recover_path(dict);
233 
234 		if (lstat(path1, &st) == 0) {
235 			if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
236 				truncate(path1, dict->size);
237 				/* chmod(path1, 0666); */
238 			}
239 			if (strcmp(path1, path2)) {
240 				printf("Rename %s -> %s\n", path1, path2);
241 				rename(path1, path2);
242 			}
243 		} else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_REGFILE) {
244 			printf("mkinode (file) %s\n", path2);
245 			fd = open(path2, O_RDWR|O_CREAT, 0666);
246 			if (fd > 0)
247 				close(fd);
248 		} else if (ondisk->inode.obj_type == HAMMER_OBJTYPE_DIRECTORY) {
249 			printf("mkinode (dir) %s\n", path2);
250 			mkdir(path2, 0777);
251 			dict->flags |= DICTF_MADEDIR;
252 		}
253 		free(path1);
254 		free(path2);
255 		break;
256 	case HAMMER_RECTYPE_DATA:
257 		/*
258 		 * File record data
259 		 */
260 		if (leaf->base.obj_id == 0)
261 			break;
262 		if (VerboseOpt) {
263 			printf("file %016jx:%05d data %016jx,%d\n",
264 				(uintmax_t)leaf->base.obj_id,
265 				pfs_id,
266 				(uintmax_t)leaf->base.key - len,
267 				len);
268 		}
269 
270 		/*
271 		 * Update the dictionary entry
272 		 */
273 		if (dict->obj_type == 0)
274 			dict->obj_type = HAMMER_OBJTYPE_REGFILE;
275 
276 		/*
277 		 * If the parent directory has not been created we
278 		 * have to create it (typically a PFS%05d)
279 		 */
280 		if (dict->parent &&
281 		    (dict->parent->flags & DICTF_MADEDIR) == 0) {
282 			dict->parent->flags |= DICTF_MADEDIR;
283 			path2 = recover_path(dict->parent);
284 			printf("mkdir %s\n", path2);
285 			mkdir(path2, 0777);
286 			free(path2);
287 			path2 = NULL;
288 		}
289 
290 		/*
291 		 * Create the file if necessary, report file creations
292 		 */
293 		path1 = recover_path(dict);
294 		if (CachedPath && strcmp(CachedPath, path1) == 0) {
295 			fd = CachedFd;
296 		} else {
297 			fd = open(path1, O_CREAT|O_RDWR, 0666);
298 		}
299 		if (fd < 0) {
300 			printf("Unable to create %s: %s\n",
301 				path1, strerror(errno));
302 			free(path1);
303 			break;
304 		}
305 		if ((dict->flags & DICTF_MADEFILE) == 0) {
306 			dict->flags |= DICTF_MADEFILE;
307 			printf("mkfile %s\n", path1);
308 		}
309 
310 		/*
311 		 * And write the record.  A HAMMER data block is aligned
312 		 * and may contain trailing zeros after the file EOF.  The
313 		 * inode record is required to get the actual file size.
314 		 *
315 		 * However, when the inode record is not available
316 		 * we can do a sparse write and that will get it right
317 		 * most of the time even if the inode record is never
318 		 * found.
319 		 */
320 		file_offset = (int64_t)leaf->base.key - len;
321 		lseek(fd, (off_t)file_offset, SEEK_SET);
322 		while (len) {
323 			if (dict->size == -1) {
324 				for (zfill = chunk - 1; zfill >= 0; --zfill) {
325 					if (((char *)ondisk)[zfill])
326 						break;
327 				}
328 				++zfill;
329 			} else {
330 				zfill = chunk;
331 			}
332 
333 			if (zfill)
334 				write(fd, ondisk, zfill);
335 			if (zfill < chunk)
336 				lseek(fd, chunk - zfill, SEEK_CUR);
337 
338 			len -= chunk;
339 			data_offset += chunk;
340 			file_offset += chunk;
341 			ondisk = get_buffer_data(data_offset, &data_buffer, 0);
342 			if (ondisk == NULL)
343 				break;
344 			chunk = HAMMER_BUFSIZE -
345 				((int)data_offset & HAMMER_BUFMASK);
346 			if (chunk > len)
347 				chunk = len;
348 		}
349 		if (dict->size >= 0 && file_offset > dict->size) {
350 			ftruncate(fd, dict->size);
351 			/* fchmod(fd, 0666); */
352 		}
353 
354 		if (fd == CachedFd) {
355 			free(path1);
356 		} else if (CachedPath) {
357 			free(CachedPath);
358 			close(CachedFd);
359 			CachedPath = path1;
360 			CachedFd = fd;
361 		} else {
362 			CachedPath = path1;
363 			CachedFd = fd;
364 		}
365 		break;
366 	case HAMMER_RECTYPE_DIRENTRY:
367 		nlen = len - offsetof(struct hammer_direntry_data, name[0]);
368 		if ((int)nlen < 0)	/* illegal length */
369 			break;
370 		if (ondisk->entry.obj_id == 0 ||
371 		    ondisk->entry.obj_id == HAMMER_OBJID_ROOT)
372 			break;
373 		name = malloc(nlen + 1);
374 		bcopy(ondisk->entry.name, name, nlen);
375 		name[nlen] = 0;
376 		sanitize_string(name);
377 
378 		/*
379 		 * We can't deal with hardlinks so if the object already
380 		 * has a name assigned to it we just keep using that name.
381 		 */
382 		dict2 = get_dict(ondisk->entry.obj_id, pfs_id);
383 		path1 = recover_path(dict2);
384 
385 		if (dict2->name == NULL)
386 			dict2->name = name;
387 		else
388 			free(name);
389 
390 		/*
391 		 * Attach dict2 to its directory (dict), create the
392 		 * directory (dict) if necessary.  We must ensure
393 		 * that the directory entry exists in order to be
394 		 * able to properly rename() the file without creating
395 		 * a namespace conflict.
396 		 */
397 		if ((dict2->flags & DICTF_PARENT) == 0) {
398 			dict2->flags |= DICTF_PARENT;
399 			dict2->parent = dict;
400 			if ((dict->flags & DICTF_MADEDIR) == 0) {
401 				dict->flags |= DICTF_MADEDIR;
402 				path2 = recover_path(dict);
403 				printf("mkdir %s\n", path2);
404 				mkdir(path2, 0777);
405 				free(path2);
406 				path2 = NULL;
407 			}
408 		}
409 		path2 = recover_path(dict2);
410 		if (strcmp(path1, path2) != 0 && lstat(path1, &st) == 0) {
411 			printf("Rename %s -> %s\n", path1, path2);
412 			rename(path1, path2);
413 		}
414 		free(path1);
415 		free(path2);
416 
417 		printf("dir  %016jx:%05d entry %016jx \"%s\"\n",
418 			(uintmax_t)leaf->base.obj_id,
419 			pfs_id,
420 			(uintmax_t)ondisk->entry.obj_id,
421 			name);
422 		break;
423 	default:
424 		/*
425 		 * Ignore any other record types
426 		 */
427 		break;
428 	}
429 done:
430 	rel_buffer(data_buffer);
431 }
432 
433 #define RD_HSIZE	32768
434 #define RD_HMASK	(RD_HSIZE - 1)
435 
436 struct recover_dict *RDHash[RD_HSIZE];
437 
438 static
439 struct recover_dict *
440 get_dict(int64_t obj_id, uint16_t pfs_id)
441 {
442 	struct recover_dict *dict;
443 	int i;
444 
445 	if (obj_id == 0)
446 		return(NULL);
447 
448 	i = crc32(&obj_id, sizeof(obj_id)) & RD_HMASK;
449 	for (dict = RDHash[i]; dict; dict = dict->next) {
450 		if (dict->obj_id == obj_id &&
451 		    dict->pfs_id == pfs_id) {
452 			break;
453 		}
454 	}
455 	if (dict == NULL) {
456 		dict = malloc(sizeof(*dict));
457 		bzero(dict, sizeof(*dict));
458 		dict->obj_id = obj_id;
459 		dict->pfs_id = pfs_id;
460 		dict->next = RDHash[i];
461 		dict->size = -1;
462 		RDHash[i] = dict;
463 
464 		/*
465 		 * Always connect dangling dictionary entries to object 1
466 		 * (the root of the PFS).
467 		 *
468 		 * DICTF_PARENT will not be set until we know what the
469 		 * real parent directory object is.
470 		 */
471 		if (dict->obj_id != HAMMER_OBJID_ROOT)
472 			dict->parent = get_dict(1, pfs_id);
473 	}
474 	return(dict);
475 }
476 
477 struct path_info {
478 	enum { PI_FIGURE, PI_LOAD } state;
479 	uint16_t pfs_id;
480 	char *base;
481 	char *next;
482 	int len;
483 };
484 
485 static void recover_path_helper(struct recover_dict *, struct path_info *);
486 
487 static
488 char *
489 recover_path(struct recover_dict *dict)
490 {
491 	struct path_info info;
492 
493 	bzero(&info, sizeof(info));
494 	info.pfs_id = dict->pfs_id;
495 	info.state = PI_FIGURE;
496 	recover_path_helper(dict, &info);
497 	info.base = malloc(info.len);
498 	info.next = info.base;
499 	info.state = PI_LOAD;
500 	recover_path_helper(dict, &info);
501 
502 	return(info.base);
503 }
504 
505 static
506 void
507 recover_path_helper(struct recover_dict *dict, struct path_info *info)
508 {
509 	/*
510 	 * Calculate path element length
511 	 */
512 	dict->flags |= DICTF_TRAVERSED;
513 
514 	switch(info->state) {
515 	case PI_FIGURE:
516 		if (dict->obj_id == HAMMER_OBJID_ROOT)
517 			info->len += 8;
518 		else if (dict->name)
519 			info->len += strlen(dict->name);
520 		else
521 			info->len += 6 + 16;
522 		++info->len;
523 
524 		if (dict->parent &&
525 		    (dict->parent->flags & DICTF_TRAVERSED) == 0) {
526 			recover_path_helper(dict->parent, info);
527 		} else {
528 			info->len += strlen(TargetDir) + 1;
529 		}
530 		break;
531 	case PI_LOAD:
532 		if (dict->parent &&
533 		    (dict->parent->flags & DICTF_TRAVERSED) == 0) {
534 			recover_path_helper(dict->parent, info);
535 		} else {
536 			strcpy(info->next, TargetDir);
537 			info->next += strlen(info->next);
538 		}
539 
540 		*info->next++ = '/';
541 		if (dict->obj_id == HAMMER_OBJID_ROOT) {
542 			snprintf(info->next, 8+1, "PFS%05d", info->pfs_id);
543 		} else if (dict->name) {
544 			strcpy(info->next, dict->name);
545 		} else {
546 			snprintf(info->next, 6+16+1, "obj_0x%016jx",
547 				(uintmax_t)dict->obj_id);
548 		}
549 		info->next += strlen(info->next);
550 		break;
551 	}
552 	dict->flags &= ~DICTF_TRAVERSED;
553 }
554 
555 static
556 void
557 sanitize_string(char *str)
558 {
559 	while (*str) {
560 		if (!isprint(*str))
561 			*str = 'x';
562 		++str;
563 	}
564 }
565