xref: /dflybsd-src/sys/vfs/hammer/hammer_recover.c (revision 4c09d9c4fd910651904ede280ad90a4abf3fc5d7)
14d75d829SMatthew Dillon /*
24d75d829SMatthew Dillon  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
34d75d829SMatthew Dillon  *
44d75d829SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
54d75d829SMatthew Dillon  * by Matthew Dillon <dillon@backplane.com>
64d75d829SMatthew Dillon  *
74d75d829SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
84d75d829SMatthew Dillon  * modification, are permitted provided that the following conditions
94d75d829SMatthew Dillon  * are met:
104d75d829SMatthew Dillon  *
114d75d829SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
124d75d829SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
134d75d829SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
144d75d829SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
154d75d829SMatthew Dillon  *    the documentation and/or other materials provided with the
164d75d829SMatthew Dillon  *    distribution.
174d75d829SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
184d75d829SMatthew Dillon  *    contributors may be used to endorse or promote products derived
194d75d829SMatthew Dillon  *    from this software without specific, prior written permission.
204d75d829SMatthew Dillon  *
214d75d829SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
224d75d829SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
234d75d829SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
244d75d829SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
254d75d829SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
264d75d829SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
274d75d829SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
284d75d829SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
294d75d829SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
304d75d829SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
314d75d829SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
324d75d829SMatthew Dillon  * SUCH DAMAGE.
33c58123daSMatthew Dillon  */
34c58123daSMatthew Dillon 
35c58123daSMatthew Dillon /*
36c58123daSMatthew Dillon  * UNDO ALGORITHM:
374d75d829SMatthew Dillon  *
38c58123daSMatthew Dillon  *	The UNDO algorithm is trivial.  The nominal UNDO range in the
39c58123daSMatthew Dillon  *	FIFO is determined by taking the first/next offset stored in
40c58123daSMatthew Dillon  *	the volume header.  The next offset may not be correct since
41c58123daSMatthew Dillon  *	UNDO flushes are not required to flush the volume header, so
42c58123daSMatthew Dillon  *	the code also scans forward until it finds a discontinuous
43c58123daSMatthew Dillon  *	sequence number.
44c58123daSMatthew Dillon  *
45c58123daSMatthew Dillon  *	The UNDOs are then scanned and executed in reverse order.  These
46c58123daSMatthew Dillon  *	UNDOs are effectively just data restorations based on HAMMER offsets.
47c58123daSMatthew Dillon  *
48c58123daSMatthew Dillon  * REDO ALGORITHM:
49c58123daSMatthew Dillon  *
50c58123daSMatthew Dillon  *	REDO records are laid down in the UNDO/REDO FIFO for nominal
51c58123daSMatthew Dillon  *	writes, truncations, and file extension ops.  On a per-inode
52c58123daSMatthew Dillon  *	basis two types of REDO records are generated, REDO_WRITE
53c58123daSMatthew Dillon  *	and REDO_TRUNC.
54c58123daSMatthew Dillon  *
55c58123daSMatthew Dillon  *	Essentially the recovery block will contain UNDO records backing
56c58123daSMatthew Dillon  *	out partial operations and REDO records to regenerate those partial
57c58123daSMatthew Dillon  *	operations guaranteed by the filesystem during recovery.
58c58123daSMatthew Dillon  *
59c58123daSMatthew Dillon  *	REDO generation is optional, and can also be started and then
60c58123daSMatthew Dillon  *	later stopped due to excessive write()s inbetween fsyncs, or not
61c58123daSMatthew Dillon  *	started at all.  Because of this the recovery code must determine
62c58123daSMatthew Dillon  *	when REDOs are valid and when they are not.  Additional records are
63c58123daSMatthew Dillon  *	generated to help figure it out.
64c58123daSMatthew Dillon  *
65c58123daSMatthew Dillon  *	The REDO_TERM_WRITE and REDO_TERM_TRUNC records are generated
66c58123daSMatthew Dillon  *	during a flush cycle indicating which records the flush cycle
67c58123daSMatthew Dillon  *	has synched meta-data for, and HAMMER_REDO_SYNC is generated in
68c58123daSMatthew Dillon  *	each flush cycle to indicate how far back in the UNDO/REDO FIFO
69c58123daSMatthew Dillon  *	the recovery code must go to find the earliest applicable REDO
70c58123daSMatthew Dillon  *	record.  Applicable REDO records can be far outside the nominal
71c58123daSMatthew Dillon  *	UNDO recovery range, for example if a write() lays down a REDO but
72c58123daSMatthew Dillon  *	the related file is not flushed for several cycles.
73c58123daSMatthew Dillon  *
74c58123daSMatthew Dillon  *	The SYNC reference is to a point prior to the nominal UNDO FIFO
75c58123daSMatthew Dillon  *	range, creating an extended REDO range which must be scanned.
76c58123daSMatthew Dillon  *
77c58123daSMatthew Dillon  *	Any REDO_WRITE/REDO_TRUNC encountered within the extended range
78c58123daSMatthew Dillon  *	which have no matching REDO_TERM_WRITE/REDO_TERM_TRUNC records
79c58123daSMatthew Dillon  *	prior to the start of the nominal UNDO range are applicable.
80c58123daSMatthew Dillon  *	That is, any REDO_TERM_* records in the extended range but not in
81c58123daSMatthew Dillon  *	the nominal undo range will mask any redo operations for prior REDO
82c58123daSMatthew Dillon  *	records.  This is necessary because once the TERM is laid down
83c58123daSMatthew Dillon  *	followup operations may make additional changes to the related
84c58123daSMatthew Dillon  *	records but not necessarily record them as REDOs (because REDOs are
85c58123daSMatthew Dillon  *	optional).
86c58123daSMatthew Dillon  *
87c58123daSMatthew Dillon  *	REDO_TERM_WRITE/REDO_TERM_TRUNC records in the nominal UNDO range
88c58123daSMatthew Dillon  *	must be ignored since they represent meta-data flushes which are
89c58123daSMatthew Dillon  *	undone by the UNDOs in that nominal UNDO range by the recovery
90c58123daSMatthew Dillon  *	code.  Only REDO_TERM_* records in the extended range but not
91c58123daSMatthew Dillon  *	in the nominal undo range are applicable.
92c58123daSMatthew Dillon  *
93c58123daSMatthew Dillon  *	The REDO_SYNC record itself always exists in the nominal UNDO range
94c58123daSMatthew Dillon  *	(this is how the extended range is determined).  For recovery
95c58123daSMatthew Dillon  *	purposes the most recent REDO_SYNC record is always used if several
96c58123daSMatthew Dillon  *	are found.
97c58123daSMatthew Dillon  *
98c58123daSMatthew Dillon  * CRASHES DURING UNDO/REDO
99c58123daSMatthew Dillon  *
100c58123daSMatthew Dillon  *	A crash during the UNDO phase requires no additional effort.  The
101c58123daSMatthew Dillon  *	UNDOs will simply be re-run again.  The state of the UNDO/REDO fifo
102c58123daSMatthew Dillon  *	remains unchanged and has no re-crash issues.
103c58123daSMatthew Dillon  *
104c58123daSMatthew Dillon  *	A crash during the REDO phase is more complex because the REDOs
105c58123daSMatthew Dillon  *	run normal filesystem ops and generate additional UNDO/REDO records.
106c58123daSMatthew Dillon  *	REDO is disabled during REDO recovery and any SYNC records generated
107c58123daSMatthew Dillon  *	by flushes during REDO recovery must continue to reference the
108c58123daSMatthew Dillon  *	original extended range.
109c58123daSMatthew Dillon  *
110c58123daSMatthew Dillon  *	If multiple crashes occur and the UNDO/REDO FIFO wraps, REDO recovery
111c58123daSMatthew Dillon  *	may become impossible.  This is detected when the start of the
112c58123daSMatthew Dillon  *	extended range fails to have monotonically increasing sequence
113c58123daSMatthew Dillon  *	numbers leading into the nominal undo range.
1144d75d829SMatthew Dillon  */
1154d75d829SMatthew Dillon 
1164d75d829SMatthew Dillon #include "hammer.h"
1174d75d829SMatthew Dillon 
118c58123daSMatthew Dillon /*
119dbd4f600SAntonio Huete Jimenez  * Specify the way we want to handle stage2 errors.
120dbd4f600SAntonio Huete Jimenez  *
121dbd4f600SAntonio Huete Jimenez  * Following values are accepted:
122dbd4f600SAntonio Huete Jimenez  *
123dbd4f600SAntonio Huete Jimenez  * 0 - Run redo recovery normally and fail to mount if
124dbd4f600SAntonio Huete Jimenez  *     the operation fails (default).
125dbd4f600SAntonio Huete Jimenez  * 1 - Run redo recovery, but don't fail to mount if the
126dbd4f600SAntonio Huete Jimenez  *     operation fails.
127dbd4f600SAntonio Huete Jimenez  * 2 - Completely skip redo recovery (only for severe error
128dbd4f600SAntonio Huete Jimenez  *     conditions and/or debugging.
129dbd4f600SAntonio Huete Jimenez  */
130172fb573SSascha Wildner static int hammer_skip_redo = 0;
131dbd4f600SAntonio Huete Jimenez TUNABLE_INT("vfs.hammer.skip_redo", &hammer_skip_redo);
132dbd4f600SAntonio Huete Jimenez 
133dbd4f600SAntonio Huete Jimenez /*
134c58123daSMatthew Dillon  * Each rterm entry has a list of fifo offsets indicating termination
135c58123daSMatthew Dillon  * points.  These are stripped as the scan progresses.
136c58123daSMatthew Dillon  */
137c58123daSMatthew Dillon typedef struct hammer_rterm_entry {
138c58123daSMatthew Dillon 	struct hammer_rterm_entry *next;
139c58123daSMatthew Dillon 	hammer_off_t		fifo_offset;
140c58123daSMatthew Dillon } *hammer_rterm_entry_t;
141c58123daSMatthew Dillon 
142c58123daSMatthew Dillon /*
143c58123daSMatthew Dillon  * rterm entries sorted in RB tree are indexed by objid, flags, and offset.
144c58123daSMatthew Dillon  * TRUNC entries ignore the offset.
145c58123daSMatthew Dillon  */
146c58123daSMatthew Dillon typedef struct hammer_rterm {
147c58123daSMatthew Dillon 	RB_ENTRY(hammer_rterm)	rb_node;
148c58123daSMatthew Dillon 	int64_t			redo_objid;
14946137e17STomohiro Kusumi 	uint32_t		redo_localization;
15046137e17STomohiro Kusumi 	uint32_t		redo_flags;
151c58123daSMatthew Dillon 	hammer_off_t		redo_offset;
152c58123daSMatthew Dillon 	hammer_rterm_entry_t	term_list;
153c58123daSMatthew Dillon } *hammer_rterm_t;
154c58123daSMatthew Dillon 
155c58123daSMatthew Dillon static int hammer_rterm_rb_cmp(hammer_rterm_t rt1, hammer_rterm_t rt2);
156c58123daSMatthew Dillon struct hammer_rterm_rb_tree;
157c58123daSMatthew Dillon RB_HEAD(hammer_rterm_rb_tree, hammer_rterm);
158c58123daSMatthew Dillon RB_PROTOTYPE(hammer_rterm_rb_tree, hammer_rterm, rb_node, hammer_rterm_rb_cmp);
159c58123daSMatthew Dillon 
160*4c09d9c4SMatthew Dillon static int hammer_check_tail_signature(hammer_mount_t hmp,
161*4c09d9c4SMatthew Dillon 			hammer_fifo_tail_t tail, hammer_off_t end_off);
162*4c09d9c4SMatthew Dillon static int hammer_check_head_signature(hammer_mount_t hmp,
163*4c09d9c4SMatthew Dillon 			hammer_fifo_head_t head, hammer_off_t beg_off);
164f90dde4cSMatthew Dillon static void hammer_recover_copy_undo(hammer_off_t undo_offset,
165f90dde4cSMatthew Dillon 			char *src, char *dst, int bytes);
16602428fb6SMatthew Dillon static hammer_fifo_any_t hammer_recover_scan_fwd(hammer_mount_t hmp,
16702428fb6SMatthew Dillon 			hammer_volume_t root_volume,
16802428fb6SMatthew Dillon 			hammer_off_t *scan_offsetp,
169562d34c2STomohiro Kusumi 			int *errorp, hammer_buffer_t *bufferp);
17002428fb6SMatthew Dillon static hammer_fifo_any_t hammer_recover_scan_rev(hammer_mount_t hmp,
17102428fb6SMatthew Dillon 			hammer_volume_t root_volume,
17202428fb6SMatthew Dillon 			hammer_off_t *scan_offsetp,
173562d34c2STomohiro Kusumi 			int *errorp, hammer_buffer_t *bufferp);
174d36ec43bSMatthew Dillon #if 0
175f90dde4cSMatthew Dillon static void hammer_recover_debug_dump(int w, char *buf, int bytes);
176d36ec43bSMatthew Dillon #endif
17751c35492SMatthew Dillon static int hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
17802428fb6SMatthew Dillon 			hammer_fifo_undo_t undo);
179c58123daSMatthew Dillon static int hammer_recover_redo_rec(hammer_mount_t hmp,
180c58123daSMatthew Dillon 			struct hammer_rterm_rb_tree *root,
181c58123daSMatthew Dillon 			hammer_off_t redo_fifo_offset, hammer_fifo_redo_t redo);
182c58123daSMatthew Dillon static int hammer_recover_redo_run(hammer_mount_t hmp,
183c58123daSMatthew Dillon 			struct hammer_rterm_rb_tree *root,
184c58123daSMatthew Dillon 			hammer_off_t redo_fifo_offset, hammer_fifo_redo_t redo);
185c58123daSMatthew Dillon static void hammer_recover_redo_exec(hammer_mount_t hmp,
186c58123daSMatthew Dillon 			hammer_fifo_redo_t redo);
187c58123daSMatthew Dillon 
188c58123daSMatthew Dillon RB_GENERATE(hammer_rterm_rb_tree, hammer_rterm, rb_node, hammer_rterm_rb_cmp);
1894d75d829SMatthew Dillon 
1904d75d829SMatthew Dillon /*
19102428fb6SMatthew Dillon  * Recover filesystem meta-data on mount.  This procedure figures out the
19202428fb6SMatthew Dillon  * UNDO FIFO range and runs the UNDOs backwards.  The FIFO pointers are not
19302428fb6SMatthew Dillon  * resynchronized by this procedure.
19402428fb6SMatthew Dillon  *
19502428fb6SMatthew Dillon  * This procedure is run near the beginning of the mount sequence, before
19602428fb6SMatthew Dillon  * any B-Tree or high-level accesses are enabled, and is responsible for
19702428fb6SMatthew Dillon  * restoring the meta-data to a consistent state.  High level HAMMER data
19802428fb6SMatthew Dillon  * structures (such as the B-Tree) cannot be accessed here.
1990729c8c8SMatthew Dillon  *
2000729c8c8SMatthew Dillon  * NOTE: No information from the root volume has been cached in the
2010729c8c8SMatthew Dillon  *	 hammer_mount structure yet, so we need to access the root volume's
2020729c8c8SMatthew Dillon  *	 buffer directly.
20302428fb6SMatthew Dillon  *
20402428fb6SMatthew Dillon  * NOTE:
2054d75d829SMatthew Dillon  */
2064d75d829SMatthew Dillon int
hammer_recover_stage1(hammer_mount_t hmp,hammer_volume_t root_volume)20702428fb6SMatthew Dillon hammer_recover_stage1(hammer_mount_t hmp, hammer_volume_t root_volume)
2084d75d829SMatthew Dillon {
209f90dde4cSMatthew Dillon 	hammer_blockmap_t rootmap;
2104d75d829SMatthew Dillon 	hammer_buffer_t buffer;
211f90dde4cSMatthew Dillon 	hammer_off_t scan_offset;
21202428fb6SMatthew Dillon 	hammer_off_t scan_offset_save;
213f90dde4cSMatthew Dillon 	hammer_off_t bytes;
21402428fb6SMatthew Dillon 	hammer_fifo_any_t head;
2159f5097dcSMatthew Dillon 	hammer_off_t first_offset;
2169f5097dcSMatthew Dillon 	hammer_off_t last_offset;
21746137e17STomohiro Kusumi 	uint32_t seqno;
2184d75d829SMatthew Dillon 	int error;
2192dd2e007SMatthew Dillon 	int degenerate_case = 0;
2204d75d829SMatthew Dillon 
2214d75d829SMatthew Dillon 	/*
22202428fb6SMatthew Dillon 	 * Examine the UNDO FIFO indices in the volume header.
2234d75d829SMatthew Dillon 	 */
224f90dde4cSMatthew Dillon 	rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
2259f5097dcSMatthew Dillon 	first_offset = rootmap->first_offset;
2269f5097dcSMatthew Dillon 	last_offset  = rootmap->next_offset;
22702428fb6SMatthew Dillon 	buffer = NULL;
22802428fb6SMatthew Dillon 	error = 0;
2299f5097dcSMatthew Dillon 
230c58123daSMatthew Dillon 	hmp->recover_stage2_offset = 0;
231c58123daSMatthew Dillon 
23202428fb6SMatthew Dillon 	if (first_offset > rootmap->alloc_offset ||
23302428fb6SMatthew Dillon 	    last_offset > rootmap->alloc_offset) {
234d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume,
235d053aa8aSTomohiro Kusumi 			"Illegal UNDO FIFO index range "
23602428fb6SMatthew Dillon 			"%016jx, %016jx limit %016jx\n",
23702428fb6SMatthew Dillon 			(intmax_t)first_offset,
23802428fb6SMatthew Dillon 			(intmax_t)last_offset,
23902428fb6SMatthew Dillon 			(intmax_t)rootmap->alloc_offset);
24002428fb6SMatthew Dillon 		error = EIO;
24102428fb6SMatthew Dillon 		goto done;
24202428fb6SMatthew Dillon 	}
24302428fb6SMatthew Dillon 
24402428fb6SMatthew Dillon 	/*
24502428fb6SMatthew Dillon 	 * In HAMMER version 4+ filesystems the volume header does NOT
24602428fb6SMatthew Dillon 	 * contain definitive UNDO FIFO state.  In particular, the
24702428fb6SMatthew Dillon 	 * rootmap->next_offset may not be indexed completely to the
24802428fb6SMatthew Dillon 	 * end of the active UNDO FIFO.
24902428fb6SMatthew Dillon 	 */
25002428fb6SMatthew Dillon 	if (hmp->version >= HAMMER_VOL_VERSION_FOUR) {
25102428fb6SMatthew Dillon 		/*
25202428fb6SMatthew Dillon 		 * To find the definitive range we must first scan backwards
25302428fb6SMatthew Dillon 		 * from first_offset to locate the first real record and
25402428fb6SMatthew Dillon 		 * extract the sequence number from it.  This record is not
25502428fb6SMatthew Dillon 		 * part of the active undo space.
25602428fb6SMatthew Dillon 		 */
25702428fb6SMatthew Dillon 		scan_offset = first_offset;
25802428fb6SMatthew Dillon 		seqno = 0;
25902428fb6SMatthew Dillon 
26002428fb6SMatthew Dillon 		for (;;) {
26102428fb6SMatthew Dillon 			head = hammer_recover_scan_rev(hmp, root_volume,
26202428fb6SMatthew Dillon 						       &scan_offset,
26302428fb6SMatthew Dillon 						       &error, &buffer);
26402428fb6SMatthew Dillon 			if (error)
26502428fb6SMatthew Dillon 				break;
26602428fb6SMatthew Dillon 			if (head->head.hdr_type != HAMMER_HEAD_TYPE_PAD) {
26702428fb6SMatthew Dillon 				seqno = head->head.hdr_seq;
26802428fb6SMatthew Dillon 				break;
26902428fb6SMatthew Dillon 			}
27002428fb6SMatthew Dillon 		}
27102428fb6SMatthew Dillon 		if (error) {
272d053aa8aSTomohiro Kusumi 			hvkprintf(root_volume,
273d053aa8aSTomohiro Kusumi 				"recovery failure during seqno backscan\n");
27402428fb6SMatthew Dillon 			goto done;
27502428fb6SMatthew Dillon 		}
27602428fb6SMatthew Dillon 
27702428fb6SMatthew Dillon 		/*
27802428fb6SMatthew Dillon 		 * Scan forwards from first_offset and (seqno+1) looking
27902428fb6SMatthew Dillon 		 * for a sequence space discontinuity.  This denotes the
28002428fb6SMatthew Dillon 		 * end of the active FIFO area.
28102428fb6SMatthew Dillon 		 *
28202428fb6SMatthew Dillon 		 * NOTE: For the case where the FIFO is empty the very first
28302428fb6SMatthew Dillon 		 *	 record we find will be discontinuous.
28402428fb6SMatthew Dillon 		 *
28502428fb6SMatthew Dillon 		 * NOTE: Do not include trailing PADs in the scan range,
28602428fb6SMatthew Dillon 		 *	 and remember the returned scan_offset after a
28702428fb6SMatthew Dillon 		 *	 fwd iteration points to the end of the returned
28802428fb6SMatthew Dillon 		 *	 record.
28902428fb6SMatthew Dillon 		 */
290d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume, "recovery check seqno=%08x\n", seqno);
29102428fb6SMatthew Dillon 
29202428fb6SMatthew Dillon 		scan_offset = first_offset;
29302428fb6SMatthew Dillon 		scan_offset_save = scan_offset;
29402428fb6SMatthew Dillon 		++seqno;
295c58123daSMatthew Dillon 		hmp->recover_stage2_seqno = seqno;
296c58123daSMatthew Dillon 
29702428fb6SMatthew Dillon 		for (;;) {
29802428fb6SMatthew Dillon 			head = hammer_recover_scan_fwd(hmp, root_volume,
29902428fb6SMatthew Dillon 						       &scan_offset,
30002428fb6SMatthew Dillon 						       &error, &buffer);
30102428fb6SMatthew Dillon 			if (error)
30202428fb6SMatthew Dillon 				break;
30302428fb6SMatthew Dillon 			if (head->head.hdr_type != HAMMER_HEAD_TYPE_PAD) {
30402428fb6SMatthew Dillon 				if (seqno != head->head.hdr_seq) {
30502428fb6SMatthew Dillon 					scan_offset = scan_offset_save;
30602428fb6SMatthew Dillon 					break;
30702428fb6SMatthew Dillon 				}
30802428fb6SMatthew Dillon 				scan_offset_save = scan_offset;
30902428fb6SMatthew Dillon 				++seqno;
31002428fb6SMatthew Dillon 			}
31102428fb6SMatthew Dillon 
31202428fb6SMatthew Dillon #if 0
31302428fb6SMatthew Dillon 			/*
31402428fb6SMatthew Dillon 			 * If the forward scan is grossly ahead of last_offset
31502428fb6SMatthew Dillon 			 * then something is wrong.  last_offset is supposed
31602428fb6SMatthew Dillon 			 * to be flushed out
31702428fb6SMatthew Dillon 			 */
31802428fb6SMatthew Dillon 			if (last_offset >= scan_offset) {
31902428fb6SMatthew Dillon 				bytes = last_offset - scan_offset;
32002428fb6SMatthew Dillon 			} else {
32102428fb6SMatthew Dillon 				bytes = rootmap->alloc_offset - scan_offset +
322eee3f66cSTomohiro Kusumi 					HAMMER_OFF_LONG_ENCODE(last_offset);
32302428fb6SMatthew Dillon 			}
32402428fb6SMatthew Dillon 			if (bytes >
325eee3f66cSTomohiro Kusumi 			    HAMMER_OFF_LONG_ENCODE(rootmap->alloc_offset) *
32602428fb6SMatthew Dillon 			    4 / 5) {
327d053aa8aSTomohiro Kusumi 				hvkprintf(root_volume,
328d053aa8aSTomohiro Kusumi 					"recovery forward scan is "
32902428fb6SMatthew Dillon 					"grossly beyond the last_offset in "
33002428fb6SMatthew Dillon 					"the volume header, this can't be "
331d053aa8aSTomohiro Kusumi 					"right.\n");
33202428fb6SMatthew Dillon 				error = EIO;
33302428fb6SMatthew Dillon 				break;
33402428fb6SMatthew Dillon 			}
33502428fb6SMatthew Dillon #endif
33602428fb6SMatthew Dillon 		}
33702428fb6SMatthew Dillon 
33802428fb6SMatthew Dillon 		/*
33902428fb6SMatthew Dillon 		 * Store the seqno.  This will be the next seqno we lay down
34002428fb6SMatthew Dillon 		 * when generating new UNDOs.
34102428fb6SMatthew Dillon 		 */
34202428fb6SMatthew Dillon 		hmp->undo_seqno = seqno;
34302428fb6SMatthew Dillon 		if (error) {
344d053aa8aSTomohiro Kusumi 			hvkprintf(root_volume,
345d053aa8aSTomohiro Kusumi 				"recovery failure during seqno fwdscan\n");
34602428fb6SMatthew Dillon 			goto done;
34702428fb6SMatthew Dillon 		}
34802428fb6SMatthew Dillon 		last_offset = scan_offset;
349d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume,
350d053aa8aSTomohiro Kusumi 			"recovery range %016jx-%016jx\n",
35102428fb6SMatthew Dillon 			(intmax_t)first_offset,
352d053aa8aSTomohiro Kusumi 			(intmax_t)last_offset);
353d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume,
354d053aa8aSTomohiro Kusumi 			"recovery nexto %016jx endseqno=%08x\n",
35502428fb6SMatthew Dillon 			(intmax_t)rootmap->next_offset,
35602428fb6SMatthew Dillon 			seqno);
35702428fb6SMatthew Dillon 	}
35802428fb6SMatthew Dillon 
35902428fb6SMatthew Dillon 	/*
36002428fb6SMatthew Dillon 	 * Calculate the size of the active portion of the FIFO.  If the
36102428fb6SMatthew Dillon 	 * FIFO is empty the filesystem is clean and no further action is
36202428fb6SMatthew Dillon 	 * needed.
36302428fb6SMatthew Dillon 	 */
3649f5097dcSMatthew Dillon 	if (last_offset >= first_offset) {
3659f5097dcSMatthew Dillon 		bytes = last_offset - first_offset;
366c9b9e29dSMatthew Dillon 	} else {
3679f5097dcSMatthew Dillon 		bytes = rootmap->alloc_offset - first_offset +
368eee3f66cSTomohiro Kusumi 			HAMMER_OFF_LONG_ENCODE(last_offset);
369c9b9e29dSMatthew Dillon 	}
37002428fb6SMatthew Dillon 	if (bytes == 0) {
3712dd2e007SMatthew Dillon 		degenerate_case = 1;
37202428fb6SMatthew Dillon 		error = 0;
37302428fb6SMatthew Dillon 		goto done;
37402428fb6SMatthew Dillon 	}
37502428fb6SMatthew Dillon 
376d053aa8aSTomohiro Kusumi 	hvkprintf(root_volume,
377d053aa8aSTomohiro Kusumi 		"recovery undo  %016jx-%016jx (%jd bytes)%s\n",
37802428fb6SMatthew Dillon 		(intmax_t)first_offset,
37902428fb6SMatthew Dillon 		(intmax_t)last_offset,
38002428fb6SMatthew Dillon 		(intmax_t)bytes,
38151c35492SMatthew Dillon 		(hmp->ronly ? " (RO)" : "(RW)"));
382eee3f66cSTomohiro Kusumi 	if (bytes > HAMMER_OFF_LONG_ENCODE(rootmap->alloc_offset)) {
38333234d14STomohiro Kusumi 		hkprintf("Undo size is absurd, unable to mount\n");
38402428fb6SMatthew Dillon 		error = EIO;
38502428fb6SMatthew Dillon 		goto done;
386c9b9e29dSMatthew Dillon 	}
3874d75d829SMatthew Dillon 
3884d75d829SMatthew Dillon 	/*
389f90dde4cSMatthew Dillon 	 * Scan the UNDOs backwards.
3904d75d829SMatthew Dillon 	 */
3919f5097dcSMatthew Dillon 	scan_offset = last_offset;
392b33e2cc0SMatthew Dillon 
393f90dde4cSMatthew Dillon 	while ((int64_t)bytes > 0) {
39402428fb6SMatthew Dillon 		KKASSERT(scan_offset != first_offset);
39502428fb6SMatthew Dillon 		head = hammer_recover_scan_rev(hmp, root_volume,
39602428fb6SMatthew Dillon 					       &scan_offset, &error, &buffer);
39702428fb6SMatthew Dillon 		if (error)
398f90dde4cSMatthew Dillon 			break;
399c58123daSMatthew Dillon 
400c58123daSMatthew Dillon 		/*
401c58123daSMatthew Dillon 		 * Normal UNDO
402c58123daSMatthew Dillon 		 */
40302428fb6SMatthew Dillon 		error = hammer_recover_undo(hmp, root_volume, &head->undo);
4044d75d829SMatthew Dillon 		if (error) {
405d053aa8aSTomohiro Kusumi 			hvkprintf(root_volume,
406d053aa8aSTomohiro Kusumi 				"UNDO record at %016jx failed\n",
40702428fb6SMatthew Dillon 				(intmax_t)scan_offset - head->head.hdr_size);
408f90dde4cSMatthew Dillon 			break;
4094d75d829SMatthew Dillon 		}
410c58123daSMatthew Dillon 
411c58123daSMatthew Dillon 		/*
412c58123daSMatthew Dillon 		 * The first REDO_SYNC record encountered (scanning backwards)
413c58123daSMatthew Dillon 		 * enables REDO processing.
414c58123daSMatthew Dillon 		 */
415c58123daSMatthew Dillon 		if (head->head.hdr_type == HAMMER_HEAD_TYPE_REDO &&
416c58123daSMatthew Dillon 		    head->redo.redo_flags == HAMMER_REDO_SYNC) {
417c58123daSMatthew Dillon 			if (hmp->flags & HAMMER_MOUNT_REDO_RECOVERY_REQ) {
418d053aa8aSTomohiro Kusumi 				hvkprintf(root_volume,
419d053aa8aSTomohiro Kusumi 					"Ignoring extra REDO_SYNC "
420d053aa8aSTomohiro Kusumi 					"records in UNDO/REDO FIFO.\n");
421c58123daSMatthew Dillon 			} else {
422c58123daSMatthew Dillon 				hmp->flags |= HAMMER_MOUNT_REDO_RECOVERY_REQ;
423c58123daSMatthew Dillon 				hmp->recover_stage2_offset =
424c58123daSMatthew Dillon 					head->redo.redo_offset;
425d053aa8aSTomohiro Kusumi 				hvkprintf(root_volume,
426d053aa8aSTomohiro Kusumi 					"Found REDO_SYNC %016jx\n",
427c58123daSMatthew Dillon 					(intmax_t)head->redo.redo_offset);
428c58123daSMatthew Dillon 			}
429c58123daSMatthew Dillon 		}
430c58123daSMatthew Dillon 
43102428fb6SMatthew Dillon 		bytes -= head->head.hdr_size;
43206ad81ffSMatthew Dillon 
43306ad81ffSMatthew Dillon 		/*
434312de84dSMatthew Dillon 		 * If too many dirty buffers have built up we have to flush'm
435312de84dSMatthew Dillon 		 * out.  As long as we do not flush out the volume header
436312de84dSMatthew Dillon 		 * a crash here should not cause any problems.
437312de84dSMatthew Dillon 		 *
438312de84dSMatthew Dillon 		 * buffer must be released so the flush can assert that
439312de84dSMatthew Dillon 		 * all buffers are idle.
44006ad81ffSMatthew Dillon 		 */
44106ad81ffSMatthew Dillon 		if (hammer_flusher_meta_limit(hmp)) {
442312de84dSMatthew Dillon 			if (buffer) {
443312de84dSMatthew Dillon 				hammer_rel_buffer(buffer, 0);
444312de84dSMatthew Dillon 				buffer = NULL;
445312de84dSMatthew Dillon 			}
44606ad81ffSMatthew Dillon 			if (hmp->ronly == 0) {
44706ad81ffSMatthew Dillon 				hammer_recover_flush_buffers(hmp, root_volume,
44806ad81ffSMatthew Dillon 							     0);
449d053aa8aSTomohiro Kusumi 				hvkprintf(root_volume, "Continuing recovery\n");
45000f16fadSMatthew Dillon 			} else {
451d053aa8aSTomohiro Kusumi 				hvkprintf(root_volume,
452d053aa8aSTomohiro Kusumi 					"Recovery failure: "
453653fa4cdSTomohiro Kusumi 					"Insufficient buffer cache to hold "
454d053aa8aSTomohiro Kusumi 					"dirty buffers on read-only mount!\n");
45500f16fadSMatthew Dillon 				error = EIO;
45600f16fadSMatthew Dillon 				break;
45706ad81ffSMatthew Dillon 			}
45806ad81ffSMatthew Dillon 		}
459f90dde4cSMatthew Dillon 	}
460c58123daSMatthew Dillon 	KKASSERT(error || bytes == 0);
461c9b9e29dSMatthew Dillon done:
46202428fb6SMatthew Dillon 	if (buffer) {
463f90dde4cSMatthew Dillon 		hammer_rel_buffer(buffer, 0);
46402428fb6SMatthew Dillon 		buffer = NULL;
46502428fb6SMatthew Dillon 	}
46651c35492SMatthew Dillon 
46751c35492SMatthew Dillon 	/*
4689f5097dcSMatthew Dillon 	 * After completely flushing all the recovered buffers the volume
46902428fb6SMatthew Dillon 	 * header will also be flushed.
47051c35492SMatthew Dillon 	 */
4719f5097dcSMatthew Dillon 	if (root_volume->io.recovered == 0) {
4729f5097dcSMatthew Dillon 		hammer_ref_volume(root_volume);
4739f5097dcSMatthew Dillon 		root_volume->io.recovered = 1;
47451c35492SMatthew Dillon 	}
47500f16fadSMatthew Dillon 
47600f16fadSMatthew Dillon 	/*
47702428fb6SMatthew Dillon 	 * Finish up flushing (or discarding) recovered buffers.  FIFO
47802428fb6SMatthew Dillon 	 * indices in the volume header are updated to the actual undo
47902428fb6SMatthew Dillon 	 * range but will not be collapsed until stage 2.
48000f16fadSMatthew Dillon 	 */
48100f16fadSMatthew Dillon 	if (error == 0) {
482f1c0ae53STomohiro Kusumi 		hammer_modify_volume_noundo(NULL, root_volume);
4839f5097dcSMatthew Dillon 		rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
48402428fb6SMatthew Dillon 		rootmap->first_offset = first_offset;
4859f5097dcSMatthew Dillon 		rootmap->next_offset = last_offset;
4869f5097dcSMatthew Dillon 		hammer_modify_volume_done(root_volume);
48700f16fadSMatthew Dillon 		if (hmp->ronly == 0)
48806ad81ffSMatthew Dillon 			hammer_recover_flush_buffers(hmp, root_volume, 1);
48900f16fadSMatthew Dillon 	} else {
49000f16fadSMatthew Dillon 		hammer_recover_flush_buffers(hmp, root_volume, -1);
49100f16fadSMatthew Dillon 	}
4922dd2e007SMatthew Dillon 	if (degenerate_case == 0) {
493d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume, "recovery complete\n");
4942dd2e007SMatthew Dillon 	} else {
495d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume, "mounted clean, no recovery needed\n");
4962dd2e007SMatthew Dillon 	}
4974d75d829SMatthew Dillon 	return (error);
4984d75d829SMatthew Dillon }
4994d75d829SMatthew Dillon 
50002428fb6SMatthew Dillon /*
50102428fb6SMatthew Dillon  * Execute redo operations
50202428fb6SMatthew Dillon  *
50302428fb6SMatthew Dillon  * This procedure is run at the end of the mount sequence, after the hammer
50402428fb6SMatthew Dillon  * mount structure has been completely initialized but before the filesystem
50502428fb6SMatthew Dillon  * goes live.  It can access standard cursors, the B-Tree, flush the
50602428fb6SMatthew Dillon  * filesystem, and so forth.
50702428fb6SMatthew Dillon  *
50802428fb6SMatthew Dillon  * This code may only be called for read-write mounts or when a mount
5092dd2e007SMatthew Dillon  * switches from read-only to read-write.  vnodes may or may not be present.
51002428fb6SMatthew Dillon  *
51102428fb6SMatthew Dillon  * The stage1 code will have already calculated the correct FIFO range
512c58123daSMatthew Dillon  * for the nominal UNDO FIFO and stored it in the rootmap.  The extended
513c58123daSMatthew Dillon  * range for REDO is stored in hmp->recover_stage2_offset.
51402428fb6SMatthew Dillon  */
51502428fb6SMatthew Dillon int
hammer_recover_stage2(hammer_mount_t hmp,hammer_volume_t root_volume)51602428fb6SMatthew Dillon hammer_recover_stage2(hammer_mount_t hmp, hammer_volume_t root_volume)
51702428fb6SMatthew Dillon {
51802428fb6SMatthew Dillon 	hammer_blockmap_t rootmap;
51902428fb6SMatthew Dillon 	hammer_buffer_t buffer;
52002428fb6SMatthew Dillon 	hammer_off_t scan_offset;
521c58123daSMatthew Dillon 	hammer_off_t oscan_offset;
52202428fb6SMatthew Dillon 	hammer_off_t bytes;
523c58123daSMatthew Dillon 	hammer_off_t ext_bytes;
52402428fb6SMatthew Dillon 	hammer_fifo_any_t head;
52502428fb6SMatthew Dillon 	hammer_off_t first_offset;
52602428fb6SMatthew Dillon 	hammer_off_t last_offset;
527c58123daSMatthew Dillon 	hammer_off_t ext_offset;
528c58123daSMatthew Dillon 	struct hammer_rterm_rb_tree rterm_root;
52946137e17STomohiro Kusumi 	uint32_t seqno;
53002428fb6SMatthew Dillon 	int error;
531c58123daSMatthew Dillon 	int verbose = 0;
532c58123daSMatthew Dillon 	int dorscan;
53302428fb6SMatthew Dillon 
53402428fb6SMatthew Dillon 	/*
53502428fb6SMatthew Dillon 	 * Stage 2 can only be run on a RW mount, or when the mount is
536c58123daSMatthew Dillon 	 * switched from RO to RW.
53702428fb6SMatthew Dillon 	 */
53802428fb6SMatthew Dillon 	KKASSERT(hmp->ronly == 0);
539c58123daSMatthew Dillon 	RB_INIT(&rterm_root);
54002428fb6SMatthew Dillon 
541dbd4f600SAntonio Huete Jimenez 	if (hammer_skip_redo == 1)
542d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume, "recovery redo marked as optional\n");
543dbd4f600SAntonio Huete Jimenez 
544dbd4f600SAntonio Huete Jimenez 	if (hammer_skip_redo == 2) {
545d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume, "recovery redo skipped.\n");
546dbd4f600SAntonio Huete Jimenez 		return (0);
547dbd4f600SAntonio Huete Jimenez 	}
548dbd4f600SAntonio Huete Jimenez 
54902428fb6SMatthew Dillon 	/*
55002428fb6SMatthew Dillon 	 * Examine the UNDO FIFO.  If it is empty the filesystem is clean
55102428fb6SMatthew Dillon 	 * and no action need be taken.
55202428fb6SMatthew Dillon 	 */
55302428fb6SMatthew Dillon 	rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
55402428fb6SMatthew Dillon 	first_offset = rootmap->first_offset;
55502428fb6SMatthew Dillon 	last_offset  = rootmap->next_offset;
556c58123daSMatthew Dillon 	if (first_offset == last_offset) {
557c58123daSMatthew Dillon 		KKASSERT((hmp->flags & HAMMER_MOUNT_REDO_RECOVERY_REQ) == 0);
55802428fb6SMatthew Dillon 		return(0);
559c58123daSMatthew Dillon 	}
56002428fb6SMatthew Dillon 
561c58123daSMatthew Dillon 	/*
562c58123daSMatthew Dillon 	 * Stage2 must only be run once, and will not be run at all
563c58123daSMatthew Dillon 	 * if Stage1 did not find a REDO_SYNC record.
564c58123daSMatthew Dillon 	 */
565c58123daSMatthew Dillon 	error = 0;
566c58123daSMatthew Dillon 	buffer = NULL;
567c58123daSMatthew Dillon 
568c58123daSMatthew Dillon 	if ((hmp->flags & HAMMER_MOUNT_REDO_RECOVERY_REQ) == 0)
569c58123daSMatthew Dillon 		goto done;
570c58123daSMatthew Dillon 	hmp->flags &= ~HAMMER_MOUNT_REDO_RECOVERY_REQ;
571c58123daSMatthew Dillon 	hmp->flags |= HAMMER_MOUNT_REDO_RECOVERY_RUN;
572c58123daSMatthew Dillon 	ext_offset = hmp->recover_stage2_offset;
573c58123daSMatthew Dillon 	if (ext_offset == 0) {
574d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume,
575d053aa8aSTomohiro Kusumi 			"REDO stage specified but no REDO_SYNC "
576d053aa8aSTomohiro Kusumi 			"offset, ignoring\n");
577c58123daSMatthew Dillon 		goto done;
578c58123daSMatthew Dillon 	}
579c58123daSMatthew Dillon 
580c58123daSMatthew Dillon 	/*
581c58123daSMatthew Dillon 	 * Calculate nominal UNDO range (this is not yet the extended
582c58123daSMatthew Dillon 	 * range).
583c58123daSMatthew Dillon 	 */
58402428fb6SMatthew Dillon 	if (last_offset >= first_offset) {
58502428fb6SMatthew Dillon 		bytes = last_offset - first_offset;
58602428fb6SMatthew Dillon 	} else {
58702428fb6SMatthew Dillon 		bytes = rootmap->alloc_offset - first_offset +
588eee3f66cSTomohiro Kusumi 			HAMMER_OFF_LONG_ENCODE(last_offset);
58902428fb6SMatthew Dillon 	}
590d053aa8aSTomohiro Kusumi 	hvkprintf(root_volume,
591d053aa8aSTomohiro Kusumi 		"recovery redo  %016jx-%016jx (%jd bytes)%s\n",
59202428fb6SMatthew Dillon 		(intmax_t)first_offset,
59302428fb6SMatthew Dillon 		(intmax_t)last_offset,
59402428fb6SMatthew Dillon 		(intmax_t)bytes,
59502428fb6SMatthew Dillon 		(hmp->ronly ? " (RO)" : "(RW)"));
596c58123daSMatthew Dillon 	verbose = 1;
597eee3f66cSTomohiro Kusumi 	if (bytes > HAMMER_OFF_LONG_ENCODE(rootmap->alloc_offset)) {
59833234d14STomohiro Kusumi 		hkprintf("Undo size is absurd, unable to mount\n");
599c58123daSMatthew Dillon 		error = EIO;
600c58123daSMatthew Dillon 		goto fatal;
60102428fb6SMatthew Dillon 	}
60202428fb6SMatthew Dillon 
60302428fb6SMatthew Dillon 	/*
604c58123daSMatthew Dillon 	 * Scan the REDOs backwards collecting REDO_TERM_* information.
605c58123daSMatthew Dillon 	 * This information is only collected for the extended range,
606c58123daSMatthew Dillon 	 * non-inclusive of any TERMs in the nominal UNDO range.
607c58123daSMatthew Dillon 	 *
608c58123daSMatthew Dillon 	 * If the stage2 extended range is inside the nominal undo range
609c58123daSMatthew Dillon 	 * we have nothing to scan.
610c58123daSMatthew Dillon 	 *
611c58123daSMatthew Dillon 	 * This must fit in memory!
61202428fb6SMatthew Dillon 	 */
613c58123daSMatthew Dillon 	if (first_offset < last_offset) {
614c58123daSMatthew Dillon 		/*
615c58123daSMatthew Dillon 		 * [      first_offset........last_offset      ]
616c58123daSMatthew Dillon 		 */
617c58123daSMatthew Dillon 		if (ext_offset < first_offset) {
618c58123daSMatthew Dillon 			dorscan = 1;
619c58123daSMatthew Dillon 			ext_bytes = first_offset - ext_offset;
620c58123daSMatthew Dillon 		} else if (ext_offset > last_offset) {
621c58123daSMatthew Dillon 			dorscan = 1;
622c58123daSMatthew Dillon 			ext_bytes = (rootmap->alloc_offset - ext_offset) +
623eee3f66cSTomohiro Kusumi 				    HAMMER_OFF_LONG_ENCODE(first_offset);
624c58123daSMatthew Dillon 		} else {
625c58123daSMatthew Dillon 			ext_bytes = -(ext_offset - first_offset);
626c58123daSMatthew Dillon 			dorscan = 0;
627c58123daSMatthew Dillon 		}
628c58123daSMatthew Dillon 	} else {
629c58123daSMatthew Dillon 		/*
630c58123daSMatthew Dillon 		 * [......last_offset         first_offset.....]
631c58123daSMatthew Dillon 		 */
632c58123daSMatthew Dillon 		if (ext_offset < last_offset) {
633c58123daSMatthew Dillon 			ext_bytes = -((rootmap->alloc_offset - first_offset) +
634eee3f66cSTomohiro Kusumi 				    HAMMER_OFF_LONG_ENCODE(ext_offset));
635c58123daSMatthew Dillon 			dorscan = 0;
636c58123daSMatthew Dillon 		} else if (ext_offset > first_offset) {
637c58123daSMatthew Dillon 			ext_bytes = -(ext_offset - first_offset);
638c58123daSMatthew Dillon 			dorscan = 0;
639c58123daSMatthew Dillon 		} else {
640c58123daSMatthew Dillon 			ext_bytes = first_offset - ext_offset;
641c58123daSMatthew Dillon 			dorscan = 1;
642c58123daSMatthew Dillon 		}
643c58123daSMatthew Dillon 	}
64402428fb6SMatthew Dillon 
645c58123daSMatthew Dillon 	if (dorscan) {
646c58123daSMatthew Dillon 		scan_offset = first_offset;
647d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume,
648d053aa8aSTomohiro Kusumi 			"Find extended redo  %016jx, %jd extbytes\n",
649c58123daSMatthew Dillon 			(intmax_t)ext_offset,
650c58123daSMatthew Dillon 			(intmax_t)ext_bytes);
651c58123daSMatthew Dillon 		seqno = hmp->recover_stage2_seqno - 1;
652c58123daSMatthew Dillon 		for (;;) {
653c58123daSMatthew Dillon 			head = hammer_recover_scan_rev(hmp, root_volume,
654c58123daSMatthew Dillon 						       &scan_offset,
655c58123daSMatthew Dillon 						       &error, &buffer);
656c58123daSMatthew Dillon 			if (error)
657c58123daSMatthew Dillon 				break;
658c58123daSMatthew Dillon 			if (head->head.hdr_type != HAMMER_HEAD_TYPE_PAD) {
659c58123daSMatthew Dillon 				if (head->head.hdr_seq != seqno) {
660c58123daSMatthew Dillon 					error = ERANGE;
661c58123daSMatthew Dillon 					break;
662c58123daSMatthew Dillon 				}
663c58123daSMatthew Dillon 				error = hammer_recover_redo_rec(
664c58123daSMatthew Dillon 						hmp, &rterm_root,
665c58123daSMatthew Dillon 						scan_offset, &head->redo);
666c58123daSMatthew Dillon 				--seqno;
667c58123daSMatthew Dillon 			}
668c58123daSMatthew Dillon 			if (scan_offset == ext_offset)
669c58123daSMatthew Dillon 				break;
670c58123daSMatthew Dillon 		}
671c58123daSMatthew Dillon 		if (error) {
672d053aa8aSTomohiro Kusumi 			hvkprintf(root_volume,
673d053aa8aSTomohiro Kusumi 				"Find extended redo failed %d, "
674c58123daSMatthew Dillon 				"unable to run REDO\n",
675c58123daSMatthew Dillon 				error);
676c58123daSMatthew Dillon 			goto done;
677c58123daSMatthew Dillon 		}
678c58123daSMatthew Dillon 	} else {
679d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume,
680d053aa8aSTomohiro Kusumi 			"Embedded extended redo %016jx, %jd extbytes\n",
681c58123daSMatthew Dillon 			(intmax_t)ext_offset,
682c58123daSMatthew Dillon 			(intmax_t)ext_bytes);
683c58123daSMatthew Dillon 	}
684c58123daSMatthew Dillon 
685c58123daSMatthew Dillon 	/*
686c58123daSMatthew Dillon 	 * Scan the REDO forwards through the entire extended range.
687c58123daSMatthew Dillon 	 * Anything with a previously recorded matching TERM is discarded.
688c58123daSMatthew Dillon 	 */
689c58123daSMatthew Dillon 	scan_offset = ext_offset;
690c58123daSMatthew Dillon 	bytes += ext_bytes;
691c58123daSMatthew Dillon 
692c58123daSMatthew Dillon 	/*
693c58123daSMatthew Dillon 	 * NOTE: when doing a forward scan the returned scan_offset is
694c58123daSMatthew Dillon 	 *	 for the record following the returned record, so we
695c58123daSMatthew Dillon 	 *	 have to play a bit.
696c58123daSMatthew Dillon 	 */
697c58123daSMatthew Dillon 	while ((int64_t)bytes > 0) {
69802428fb6SMatthew Dillon 		KKASSERT(scan_offset != last_offset);
69902428fb6SMatthew Dillon 
700c58123daSMatthew Dillon 		oscan_offset = scan_offset;
70102428fb6SMatthew Dillon 		head = hammer_recover_scan_fwd(hmp, root_volume,
70202428fb6SMatthew Dillon 					       &scan_offset, &error, &buffer);
70302428fb6SMatthew Dillon 		if (error)
70402428fb6SMatthew Dillon 			break;
70502428fb6SMatthew Dillon 
706c58123daSMatthew Dillon 		error = hammer_recover_redo_run(hmp, &rterm_root,
707c58123daSMatthew Dillon 						oscan_offset, &head->redo);
70802428fb6SMatthew Dillon 		if (error) {
709d053aa8aSTomohiro Kusumi 			hvkprintf(root_volume,
710d053aa8aSTomohiro Kusumi 				"UNDO record at %016jx failed\n",
71102428fb6SMatthew Dillon 				(intmax_t)scan_offset - head->head.hdr_size);
71202428fb6SMatthew Dillon 			break;
71302428fb6SMatthew Dillon 		}
71402428fb6SMatthew Dillon 		bytes -= head->head.hdr_size;
71502428fb6SMatthew Dillon 	}
716c58123daSMatthew Dillon 	KKASSERT(error || bytes == 0);
71786327cc9SMatthew Dillon 
71886327cc9SMatthew Dillon done:
71902428fb6SMatthew Dillon 	if (buffer) {
72002428fb6SMatthew Dillon 		hammer_rel_buffer(buffer, 0);
72102428fb6SMatthew Dillon 		buffer = NULL;
72202428fb6SMatthew Dillon 	}
72302428fb6SMatthew Dillon 
724c58123daSMatthew Dillon 	/*
725c58123daSMatthew Dillon 	 * Cleanup rterm tree
726c58123daSMatthew Dillon 	 */
727c58123daSMatthew Dillon 	{
728c58123daSMatthew Dillon 		hammer_rterm_t rterm;
729c58123daSMatthew Dillon 		hammer_rterm_entry_t rte;
730c58123daSMatthew Dillon 
731c58123daSMatthew Dillon 		while ((rterm = RB_ROOT(&rterm_root)) != NULL) {
732c58123daSMatthew Dillon 			RB_REMOVE(hammer_rterm_rb_tree, &rterm_root, rterm);
733c58123daSMatthew Dillon 			while ((rte = rterm->term_list) != NULL) {
734c58123daSMatthew Dillon 				rterm->term_list = rte->next;
735c58123daSMatthew Dillon 				kfree(rte, hmp->m_misc);
736c58123daSMatthew Dillon 			}
737c58123daSMatthew Dillon 			kfree(rterm, hmp->m_misc);
738c58123daSMatthew Dillon 		}
739c58123daSMatthew Dillon 	}
740c58123daSMatthew Dillon 
74102428fb6SMatthew Dillon 	/*
74202428fb6SMatthew Dillon 	 * Finish up flushing (or discarding) recovered buffers by executing
74302428fb6SMatthew Dillon 	 * a normal flush cycle.  Setting HMNT_UNDO_DIRTY bypasses degenerate
74402428fb6SMatthew Dillon 	 * case tests and forces the flush in order to update the FIFO indices.
74502428fb6SMatthew Dillon 	 *
74602428fb6SMatthew Dillon 	 * If a crash occurs during the flush the entire undo/redo will be
74702428fb6SMatthew Dillon 	 * re-run during recovery on the next mount.
74802428fb6SMatthew Dillon 	 */
74902428fb6SMatthew Dillon 	if (error == 0) {
75002428fb6SMatthew Dillon 		if (rootmap->first_offset != rootmap->next_offset)
75102428fb6SMatthew Dillon 			hmp->hflags |= HMNT_UNDO_DIRTY;
75202428fb6SMatthew Dillon 		hammer_flusher_sync(hmp);
75302428fb6SMatthew Dillon 	}
754c58123daSMatthew Dillon fatal:
755c58123daSMatthew Dillon 	hmp->flags &= ~HAMMER_MOUNT_REDO_RECOVERY_RUN;
756c58123daSMatthew Dillon 	if (verbose) {
757d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume, "End redo recovery\n");
758c58123daSMatthew Dillon 	}
759dbd4f600SAntonio Huete Jimenez 
760dbd4f600SAntonio Huete Jimenez 	if (error && hammer_skip_redo == 1)
761d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume,
762d053aa8aSTomohiro Kusumi 			"recovery redo error %d, skipping.\n",
763dbd4f600SAntonio Huete Jimenez 			error);
764dbd4f600SAntonio Huete Jimenez 
765dbd4f600SAntonio Huete Jimenez 	return (hammer_skip_redo ? 0 : error);
76602428fb6SMatthew Dillon }
76702428fb6SMatthew Dillon 
76802428fb6SMatthew Dillon /*
76902428fb6SMatthew Dillon  * Scan backwards from *scan_offsetp, return the FIFO record prior to the
77002428fb6SMatthew Dillon  * record at *scan_offsetp or NULL if an error occured.
77102428fb6SMatthew Dillon  *
77202428fb6SMatthew Dillon  * On return *scan_offsetp will be the offset of the returned record.
77302428fb6SMatthew Dillon  */
77402428fb6SMatthew Dillon hammer_fifo_any_t
hammer_recover_scan_rev(hammer_mount_t hmp,hammer_volume_t root_volume,hammer_off_t * scan_offsetp,int * errorp,hammer_buffer_t * bufferp)77502428fb6SMatthew Dillon hammer_recover_scan_rev(hammer_mount_t hmp, hammer_volume_t root_volume,
77602428fb6SMatthew Dillon 			hammer_off_t *scan_offsetp,
777562d34c2STomohiro Kusumi 			int *errorp, hammer_buffer_t *bufferp)
77802428fb6SMatthew Dillon {
77902428fb6SMatthew Dillon 	hammer_off_t scan_offset;
78002428fb6SMatthew Dillon 	hammer_blockmap_t rootmap;
78102428fb6SMatthew Dillon 	hammer_fifo_any_t head;
78202428fb6SMatthew Dillon 	hammer_fifo_tail_t tail;
78302428fb6SMatthew Dillon 
78402428fb6SMatthew Dillon 	rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
78502428fb6SMatthew Dillon 	scan_offset = *scan_offsetp;
78602428fb6SMatthew Dillon 
78702428fb6SMatthew Dillon 	if (hammer_debug_general & 0x0080)
78833234d14STomohiro Kusumi 		hdkprintf("rev scan_offset %016jx\n", (intmax_t)scan_offset);
789459a6a40STomohiro Kusumi 	if (scan_offset == HAMMER_ENCODE_UNDO(0))
79002428fb6SMatthew Dillon 		scan_offset = rootmap->alloc_offset;
791459a6a40STomohiro Kusumi 	if (scan_offset - sizeof(*tail) < HAMMER_ENCODE_UNDO(0)) {
792d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume,
793d053aa8aSTomohiro Kusumi 			"UNDO record at %016jx FIFO underflow\n",
79402428fb6SMatthew Dillon 			(intmax_t)scan_offset);
79502428fb6SMatthew Dillon 		*errorp = EIO;
79602428fb6SMatthew Dillon 		return (NULL);
79702428fb6SMatthew Dillon 	}
79802428fb6SMatthew Dillon 	tail = hammer_bread(hmp, scan_offset - sizeof(*tail),
79902428fb6SMatthew Dillon 			    errorp, bufferp);
80002428fb6SMatthew Dillon 	if (*errorp) {
801d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume,
802d053aa8aSTomohiro Kusumi 			"Unable to read UNDO TAIL at %016jx\n",
80302428fb6SMatthew Dillon 			(intmax_t)scan_offset - sizeof(*tail));
80402428fb6SMatthew Dillon 		return (NULL);
80502428fb6SMatthew Dillon 	}
80602428fb6SMatthew Dillon 
807*4c09d9c4SMatthew Dillon 	if (hammer_check_tail_signature(hmp, tail, scan_offset) != 0) {
808d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume,
809d053aa8aSTomohiro Kusumi 			"Illegal UNDO TAIL signature at %016jx\n",
81002428fb6SMatthew Dillon 			(intmax_t)scan_offset - sizeof(*tail));
81102428fb6SMatthew Dillon 		*errorp = EIO;
81202428fb6SMatthew Dillon 		return (NULL);
81302428fb6SMatthew Dillon 	}
81402428fb6SMatthew Dillon 	head = (void *)((char *)tail + sizeof(*tail) - tail->tail_size);
81502428fb6SMatthew Dillon 	*scan_offsetp = scan_offset - head->head.hdr_size;
81602428fb6SMatthew Dillon 
81702428fb6SMatthew Dillon 	return (head);
81802428fb6SMatthew Dillon }
81902428fb6SMatthew Dillon 
82002428fb6SMatthew Dillon /*
82102428fb6SMatthew Dillon  * Scan forwards from *scan_offsetp, return the FIFO record or NULL if
82202428fb6SMatthew Dillon  * an error occured.
82302428fb6SMatthew Dillon  *
82402428fb6SMatthew Dillon  * On return *scan_offsetp will be the offset of the record following
82502428fb6SMatthew Dillon  * the returned record.
82602428fb6SMatthew Dillon  */
82702428fb6SMatthew Dillon hammer_fifo_any_t
hammer_recover_scan_fwd(hammer_mount_t hmp,hammer_volume_t root_volume,hammer_off_t * scan_offsetp,int * errorp,hammer_buffer_t * bufferp)82802428fb6SMatthew Dillon hammer_recover_scan_fwd(hammer_mount_t hmp, hammer_volume_t root_volume,
82902428fb6SMatthew Dillon 			hammer_off_t *scan_offsetp,
830562d34c2STomohiro Kusumi 			int *errorp, hammer_buffer_t *bufferp)
83102428fb6SMatthew Dillon {
83202428fb6SMatthew Dillon 	hammer_off_t scan_offset;
83302428fb6SMatthew Dillon 	hammer_blockmap_t rootmap;
83402428fb6SMatthew Dillon 	hammer_fifo_any_t head;
83502428fb6SMatthew Dillon 
83602428fb6SMatthew Dillon 	rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
83702428fb6SMatthew Dillon 	scan_offset = *scan_offsetp;
83802428fb6SMatthew Dillon 
83902428fb6SMatthew Dillon 	if (hammer_debug_general & 0x0080)
84033234d14STomohiro Kusumi 		hdkprintf("fwd scan_offset %016jx\n", (intmax_t)scan_offset);
84102428fb6SMatthew Dillon 	if (scan_offset == rootmap->alloc_offset)
842459a6a40STomohiro Kusumi 		scan_offset = HAMMER_ENCODE_UNDO(0);
84302428fb6SMatthew Dillon 
84402428fb6SMatthew Dillon 	head = hammer_bread(hmp, scan_offset, errorp, bufferp);
84502428fb6SMatthew Dillon 	if (*errorp) {
846d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume,
847d053aa8aSTomohiro Kusumi 			"Unable to read UNDO HEAD at %016jx\n",
84802428fb6SMatthew Dillon 			(intmax_t)scan_offset);
84902428fb6SMatthew Dillon 		return (NULL);
85002428fb6SMatthew Dillon 	}
85102428fb6SMatthew Dillon 
852*4c09d9c4SMatthew Dillon 	if (hammer_check_head_signature(hmp, &head->head, scan_offset) != 0) {
853d053aa8aSTomohiro Kusumi 		hvkprintf(root_volume,
854d053aa8aSTomohiro Kusumi 			"Illegal UNDO TAIL signature at %016jx\n",
85502428fb6SMatthew Dillon 			(intmax_t)scan_offset);
85602428fb6SMatthew Dillon 		*errorp = EIO;
85702428fb6SMatthew Dillon 		return (NULL);
85802428fb6SMatthew Dillon 	}
85902428fb6SMatthew Dillon 	scan_offset += head->head.hdr_size;
86002428fb6SMatthew Dillon 	if (scan_offset == rootmap->alloc_offset)
861459a6a40STomohiro Kusumi 		scan_offset = HAMMER_ENCODE_UNDO(0);
86202428fb6SMatthew Dillon 	*scan_offsetp = scan_offset;
86302428fb6SMatthew Dillon 
86402428fb6SMatthew Dillon 	return (head);
86502428fb6SMatthew Dillon }
86602428fb6SMatthew Dillon 
86702428fb6SMatthew Dillon /*
86802428fb6SMatthew Dillon  * Helper function for hammer_check_{head,tail}_signature().  Check stuff
86902428fb6SMatthew Dillon  * once the head and tail has been established.
87002428fb6SMatthew Dillon  *
87102428fb6SMatthew Dillon  * This function validates the entire FIFO record wrapper.
87202428fb6SMatthew Dillon  */
87302428fb6SMatthew Dillon static __inline
87402428fb6SMatthew Dillon int
_hammer_check_signature(hammer_mount_t hmp,hammer_fifo_head_t head,hammer_fifo_tail_t tail,hammer_off_t beg_off)875*4c09d9c4SMatthew Dillon _hammer_check_signature(hammer_mount_t hmp,
876*4c09d9c4SMatthew Dillon 			hammer_fifo_head_t head, hammer_fifo_tail_t tail,
87702428fb6SMatthew Dillon 			hammer_off_t beg_off)
87802428fb6SMatthew Dillon {
87902428fb6SMatthew Dillon 	hammer_off_t end_off;
88002428fb6SMatthew Dillon 	int bytes;
88102428fb6SMatthew Dillon 
88202428fb6SMatthew Dillon 	/*
88302428fb6SMatthew Dillon 	 * Check signatures.  The tail signature is allowed to be the
88402428fb6SMatthew Dillon 	 * head signature only for 8-byte PADs.
88502428fb6SMatthew Dillon 	 */
88602428fb6SMatthew Dillon 	if (head->hdr_signature != HAMMER_HEAD_SIGNATURE) {
887d053aa8aSTomohiro Kusumi 		hkprintf("FIFO record bad head signature %04x at %016jx\n",
88802428fb6SMatthew Dillon 			head->hdr_signature,
88902428fb6SMatthew Dillon 			(intmax_t)beg_off);
89002428fb6SMatthew Dillon 		return(2);
89102428fb6SMatthew Dillon 	}
89202428fb6SMatthew Dillon 	if (head->hdr_size < HAMMER_HEAD_ALIGN ||
89302428fb6SMatthew Dillon 	    (head->hdr_size & HAMMER_HEAD_ALIGN_MASK)) {
894d053aa8aSTomohiro Kusumi 		hkprintf("FIFO record unaligned or bad size %04x at %016jx\n",
89502428fb6SMatthew Dillon 			head->hdr_size,
89602428fb6SMatthew Dillon 			(intmax_t)beg_off);
89702428fb6SMatthew Dillon 		return(2);
89802428fb6SMatthew Dillon 	}
89902428fb6SMatthew Dillon 	end_off = beg_off + head->hdr_size;
90002428fb6SMatthew Dillon 
90102428fb6SMatthew Dillon 	if (head->hdr_type != HAMMER_HEAD_TYPE_PAD ||
90202428fb6SMatthew Dillon 	    (size_t)(end_off - beg_off) != sizeof(*tail)) {
90302428fb6SMatthew Dillon 		if (head->hdr_type != tail->tail_type) {
904d053aa8aSTomohiro Kusumi 			hkprintf("FIFO record head/tail type mismatch "
90502428fb6SMatthew Dillon 				"%04x %04x at %016jx\n",
90602428fb6SMatthew Dillon 				head->hdr_type, tail->tail_type,
90702428fb6SMatthew Dillon 				(intmax_t)beg_off);
90802428fb6SMatthew Dillon 			return(2);
90902428fb6SMatthew Dillon 		}
91002428fb6SMatthew Dillon 		if (head->hdr_size != tail->tail_size) {
911d053aa8aSTomohiro Kusumi 			hkprintf("FIFO record head/tail size mismatch "
91202428fb6SMatthew Dillon 				"%04x %04x at %016jx\n",
91302428fb6SMatthew Dillon 				head->hdr_size, tail->tail_size,
91402428fb6SMatthew Dillon 				(intmax_t)beg_off);
91502428fb6SMatthew Dillon 			return(2);
91602428fb6SMatthew Dillon 		}
91702428fb6SMatthew Dillon 		if (tail->tail_signature != HAMMER_TAIL_SIGNATURE) {
918d053aa8aSTomohiro Kusumi 			hkprintf("FIFO record bad tail signature "
91902428fb6SMatthew Dillon 				"%04x at %016jx\n",
92002428fb6SMatthew Dillon 				tail->tail_signature,
92102428fb6SMatthew Dillon 				(intmax_t)beg_off);
92202428fb6SMatthew Dillon 			return(3);
92302428fb6SMatthew Dillon 		}
92402428fb6SMatthew Dillon 	}
92502428fb6SMatthew Dillon 
92602428fb6SMatthew Dillon 	/*
92702428fb6SMatthew Dillon 	 * Non-PAD records must have a CRC and must be sized at
92802428fb6SMatthew Dillon 	 * least large enough to fit the head and tail.
92902428fb6SMatthew Dillon 	 */
93002428fb6SMatthew Dillon 	if (head->hdr_type != HAMMER_HEAD_TYPE_PAD) {
931*4c09d9c4SMatthew Dillon 		if (hammer_crc_test_fifo_head(hmp->version,
932*4c09d9c4SMatthew Dillon 					      head, head->hdr_size) == 0) {
933*4c09d9c4SMatthew Dillon 			hkprintf("FIFO record CRC failed %08x at %016jx\n",
934*4c09d9c4SMatthew Dillon 				head->hdr_crc, (intmax_t)beg_off);
93502428fb6SMatthew Dillon 			return(EIO);
93602428fb6SMatthew Dillon 		}
93702428fb6SMatthew Dillon 		if (head->hdr_size < sizeof(*head) + sizeof(*tail)) {
938d053aa8aSTomohiro Kusumi 			hkprintf("FIFO record too small %04x at %016jx\n",
93902428fb6SMatthew Dillon 				head->hdr_size,
94002428fb6SMatthew Dillon 				(intmax_t)beg_off);
94102428fb6SMatthew Dillon 			return(EIO);
94202428fb6SMatthew Dillon 		}
94302428fb6SMatthew Dillon 	}
94402428fb6SMatthew Dillon 
94502428fb6SMatthew Dillon 	/*
94602428fb6SMatthew Dillon 	 * Check the tail
94702428fb6SMatthew Dillon 	 */
94802428fb6SMatthew Dillon 	bytes = head->hdr_size;
94902428fb6SMatthew Dillon 	tail = (void *)((char *)head + bytes - sizeof(*tail));
95002428fb6SMatthew Dillon 	if (tail->tail_size != head->hdr_size) {
951d053aa8aSTomohiro Kusumi 		hkprintf("Bad tail size %04x vs %04x at %016jx\n",
95202428fb6SMatthew Dillon 			tail->tail_size, head->hdr_size,
95302428fb6SMatthew Dillon 			(intmax_t)beg_off);
95402428fb6SMatthew Dillon 		return(EIO);
95502428fb6SMatthew Dillon 	}
95602428fb6SMatthew Dillon 	if (tail->tail_type != head->hdr_type) {
957d053aa8aSTomohiro Kusumi 		hkprintf("Bad tail type %04x vs %04x at %016jx\n",
95802428fb6SMatthew Dillon 			tail->tail_type, head->hdr_type,
95902428fb6SMatthew Dillon 			(intmax_t)beg_off);
96002428fb6SMatthew Dillon 		return(EIO);
96102428fb6SMatthew Dillon 	}
96202428fb6SMatthew Dillon 
96302428fb6SMatthew Dillon 	return(0);
96402428fb6SMatthew Dillon }
96502428fb6SMatthew Dillon 
96602428fb6SMatthew Dillon /*
96702428fb6SMatthew Dillon  * Check that the FIFO record is in-bounds given the head and the
96802428fb6SMatthew Dillon  * hammer offset.
96902428fb6SMatthew Dillon  *
97002428fb6SMatthew Dillon  * Also checks that the head and tail structures agree with each other,
97102428fb6SMatthew Dillon  * but does not check beyond the signature, type, and size.
97202428fb6SMatthew Dillon  */
97302428fb6SMatthew Dillon static int
hammer_check_head_signature(hammer_mount_t hmp,hammer_fifo_head_t head,hammer_off_t beg_off)974*4c09d9c4SMatthew Dillon hammer_check_head_signature(hammer_mount_t hmp, hammer_fifo_head_t head,
975*4c09d9c4SMatthew Dillon 			    hammer_off_t beg_off)
97602428fb6SMatthew Dillon {
97702428fb6SMatthew Dillon 	hammer_fifo_tail_t tail;
97802428fb6SMatthew Dillon 	hammer_off_t end_off;
97902428fb6SMatthew Dillon 
98002428fb6SMatthew Dillon 	/*
98102428fb6SMatthew Dillon 	 * head overlaps buffer boundary.  This could be a PAD so only
98202428fb6SMatthew Dillon 	 * check the minimum PAD size here.
98302428fb6SMatthew Dillon 	 */
98402428fb6SMatthew Dillon 	if (((beg_off + sizeof(*tail) - 1) ^ (beg_off)) & ~HAMMER_BUFMASK64)
98502428fb6SMatthew Dillon 		return(1);
98602428fb6SMatthew Dillon 
98702428fb6SMatthew Dillon 	/*
98802428fb6SMatthew Dillon 	 * Calculate the ending offset and make sure the record does
98902428fb6SMatthew Dillon 	 * not cross a buffer boundary.
99002428fb6SMatthew Dillon 	 */
99102428fb6SMatthew Dillon 	end_off = beg_off + head->hdr_size;
99202428fb6SMatthew Dillon 	if ((beg_off ^ (end_off - 1)) & ~HAMMER_BUFMASK64)
99302428fb6SMatthew Dillon 		return(1);
99402428fb6SMatthew Dillon 	tail = (void *)((char *)head + head->hdr_size - sizeof(*tail));
995*4c09d9c4SMatthew Dillon 	return (_hammer_check_signature(hmp, head, tail, beg_off));
99602428fb6SMatthew Dillon }
99702428fb6SMatthew Dillon 
99802428fb6SMatthew Dillon /*
99902428fb6SMatthew Dillon  * Check that the FIFO record is in-bounds given the tail and the
100002428fb6SMatthew Dillon  * hammer offset.  The offset is pointing at the ending boundary of the
100102428fb6SMatthew Dillon  * record.
100202428fb6SMatthew Dillon  *
100302428fb6SMatthew Dillon  * Also checks that the head and tail structures agree with each other,
100402428fb6SMatthew Dillon  * but does not check beyond the signature, type, and size.
100502428fb6SMatthew Dillon  */
1006f90dde4cSMatthew Dillon static int
hammer_check_tail_signature(hammer_mount_t hmp,hammer_fifo_tail_t tail,hammer_off_t end_off)1007*4c09d9c4SMatthew Dillon hammer_check_tail_signature(hammer_mount_t hmp, hammer_fifo_tail_t tail,
1008*4c09d9c4SMatthew Dillon 			    hammer_off_t end_off)
1009f90dde4cSMatthew Dillon {
101002428fb6SMatthew Dillon 	hammer_fifo_head_t head;
101102428fb6SMatthew Dillon 	hammer_off_t beg_off;
1012f90dde4cSMatthew Dillon 
1013f90dde4cSMatthew Dillon 	/*
1014f90dde4cSMatthew Dillon 	 * tail overlaps buffer boundary
1015f90dde4cSMatthew Dillon 	 */
101602428fb6SMatthew Dillon 	if (((end_off - sizeof(*tail)) ^ (end_off - 1)) & ~HAMMER_BUFMASK64)
1017f90dde4cSMatthew Dillon 		return(1);
1018f90dde4cSMatthew Dillon 
1019f90dde4cSMatthew Dillon 	/*
102002428fb6SMatthew Dillon 	 * Calculate the begining offset and make sure the record does
102102428fb6SMatthew Dillon 	 * not cross a buffer boundary.
1022f90dde4cSMatthew Dillon 	 */
102302428fb6SMatthew Dillon 	beg_off = end_off - tail->tail_size;
102402428fb6SMatthew Dillon 	if ((beg_off ^ (end_off - 1)) & ~HAMMER_BUFMASK64)
102502428fb6SMatthew Dillon 		return(1);
102602428fb6SMatthew Dillon 	head = (void *)((char *)tail + sizeof(*tail) - tail->tail_size);
1027*4c09d9c4SMatthew Dillon 	return (_hammer_check_signature(hmp, head, tail, beg_off));
1028f90dde4cSMatthew Dillon }
1029f90dde4cSMatthew Dillon 
1030f90dde4cSMatthew Dillon static int
hammer_recover_undo(hammer_mount_t hmp,hammer_volume_t root_volume,hammer_fifo_undo_t undo)103151c35492SMatthew Dillon hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
103202428fb6SMatthew Dillon 		    hammer_fifo_undo_t undo)
1033f90dde4cSMatthew Dillon {
1034f90dde4cSMatthew Dillon 	hammer_volume_t volume;
1035f90dde4cSMatthew Dillon 	hammer_buffer_t buffer;
10362f85fa4dSMatthew Dillon 	hammer_off_t buf_offset;
1037f90dde4cSMatthew Dillon 	int zone;
1038f90dde4cSMatthew Dillon 	int error;
1039f90dde4cSMatthew Dillon 	int vol_no;
104002428fb6SMatthew Dillon 	int bytes;
104146137e17STomohiro Kusumi 	uint32_t offset;
1042f90dde4cSMatthew Dillon 
1043f90dde4cSMatthew Dillon 	/*
104402428fb6SMatthew Dillon 	 * Only process UNDO records.  Flag if we find other records to
104502428fb6SMatthew Dillon 	 * optimize stage2 recovery.
1046f90dde4cSMatthew Dillon 	 */
1047c58123daSMatthew Dillon 	if (undo->head.hdr_type != HAMMER_HEAD_TYPE_UNDO)
1048f90dde4cSMatthew Dillon 		return(0);
104909ac686bSMatthew Dillon 
1050f90dde4cSMatthew Dillon 	/*
1051f90dde4cSMatthew Dillon 	 * Validate the UNDO record.
1052f90dde4cSMatthew Dillon 	 */
105302428fb6SMatthew Dillon 	bytes = undo->head.hdr_size - sizeof(*undo) -
105402428fb6SMatthew Dillon 		sizeof(struct hammer_fifo_tail);
105502428fb6SMatthew Dillon 	if (bytes < 0 || undo->undo_data_bytes < 0 ||
105602428fb6SMatthew Dillon 	    undo->undo_data_bytes > bytes) {
1057d053aa8aSTomohiro Kusumi 		hkprintf("Corrupt UNDO record, undo_data_bytes %d/%d\n",
105802428fb6SMatthew Dillon 			undo->undo_data_bytes, bytes);
1059f90dde4cSMatthew Dillon 		return(EIO);
1060f90dde4cSMatthew Dillon 	}
1061f90dde4cSMatthew Dillon 
106202428fb6SMatthew Dillon 	bytes = undo->undo_data_bytes;
106302428fb6SMatthew Dillon 
1064f90dde4cSMatthew Dillon 	/*
1065f90dde4cSMatthew Dillon 	 * The undo offset may only be a zone-1 or zone-2 offset.
1066f90dde4cSMatthew Dillon 	 *
1067f90dde4cSMatthew Dillon 	 * Currently we only support a zone-1 offset representing the
1068f90dde4cSMatthew Dillon 	 * volume header.
1069f90dde4cSMatthew Dillon 	 */
1070f90dde4cSMatthew Dillon 	zone = HAMMER_ZONE_DECODE(undo->undo_offset);
1071f90dde4cSMatthew Dillon 	offset = undo->undo_offset & HAMMER_BUFMASK;
1072f90dde4cSMatthew Dillon 
107302428fb6SMatthew Dillon 	if (offset + bytes > HAMMER_BUFSIZE) {
1074d053aa8aSTomohiro Kusumi 		hkprintf("Corrupt UNDO record, bad offset\n");
1075f90dde4cSMatthew Dillon 		return (EIO);
1076f90dde4cSMatthew Dillon 	}
1077f90dde4cSMatthew Dillon 
1078f90dde4cSMatthew Dillon 	switch(zone) {
1079f90dde4cSMatthew Dillon 	case HAMMER_ZONE_RAW_VOLUME_INDEX:
1080f90dde4cSMatthew Dillon 		vol_no = HAMMER_VOL_DECODE(undo->undo_offset);
1081f90dde4cSMatthew Dillon 		volume = hammer_get_volume(hmp, vol_no, &error);
1082f90dde4cSMatthew Dillon 		if (volume == NULL) {
1083d053aa8aSTomohiro Kusumi 			hkprintf("UNDO record, cannot access volume %d\n",
1084d053aa8aSTomohiro Kusumi 				vol_no);
1085f90dde4cSMatthew Dillon 			break;
1086f90dde4cSMatthew Dillon 		}
1087f1c0ae53STomohiro Kusumi 		hammer_modify_volume_noundo(NULL, volume);
1088f90dde4cSMatthew Dillon 		hammer_recover_copy_undo(undo->undo_offset,
1089f90dde4cSMatthew Dillon 					 (char *)(undo + 1),
1090f90dde4cSMatthew Dillon 					 (char *)volume->ondisk + offset,
109102428fb6SMatthew Dillon 					 bytes);
1092f90dde4cSMatthew Dillon 		hammer_modify_volume_done(volume);
109351c35492SMatthew Dillon 
109451c35492SMatthew Dillon 		/*
10959f5097dcSMatthew Dillon 		 * Multiple modifications may be made to the same buffer.
10969f5097dcSMatthew Dillon 		 * Also, the volume header cannot be written out until
10979f5097dcSMatthew Dillon 		 * everything else has been flushed.  This also
109851c35492SMatthew Dillon 		 * covers the read-only case by preventing the kernel from
109951c35492SMatthew Dillon 		 * flushing the buffer.
110051c35492SMatthew Dillon 		 */
110151c35492SMatthew Dillon 		if (volume->io.recovered == 0)
110251c35492SMatthew Dillon 			volume->io.recovered = 1;
110351c35492SMatthew Dillon 		else
1104f90dde4cSMatthew Dillon 			hammer_rel_volume(volume, 0);
1105f90dde4cSMatthew Dillon 		break;
1106f90dde4cSMatthew Dillon 	case HAMMER_ZONE_RAW_BUFFER_INDEX:
11072f85fa4dSMatthew Dillon 		buf_offset = undo->undo_offset & ~HAMMER_BUFMASK64;
11084a2796f3SMatthew Dillon 		buffer = hammer_get_buffer(hmp, buf_offset, HAMMER_BUFSIZE,
11094a2796f3SMatthew Dillon 					   0, &error);
1110f90dde4cSMatthew Dillon 		if (buffer == NULL) {
1111d053aa8aSTomohiro Kusumi 			hkprintf("UNDO record, cannot access buffer %016jx\n",
111202428fb6SMatthew Dillon 				(intmax_t)undo->undo_offset);
1113f90dde4cSMatthew Dillon 			break;
1114f90dde4cSMatthew Dillon 		}
1115f1c0ae53STomohiro Kusumi 		hammer_modify_buffer_noundo(NULL, buffer);
1116f90dde4cSMatthew Dillon 		hammer_recover_copy_undo(undo->undo_offset,
1117f90dde4cSMatthew Dillon 					 (char *)(undo + 1),
1118f90dde4cSMatthew Dillon 					 (char *)buffer->ondisk + offset,
111902428fb6SMatthew Dillon 					 bytes);
1120f90dde4cSMatthew Dillon 		hammer_modify_buffer_done(buffer);
112151c35492SMatthew Dillon 
112251c35492SMatthew Dillon 		/*
112351c35492SMatthew Dillon 		 * Multiple modifications may be made to the same buffer,
112451c35492SMatthew Dillon 		 * improve performance by delaying the flush.  This also
112551c35492SMatthew Dillon 		 * covers the read-only case by preventing the kernel from
112651c35492SMatthew Dillon 		 * flushing the buffer.
112751c35492SMatthew Dillon 		 */
112851c35492SMatthew Dillon 		if (buffer->io.recovered == 0)
112951c35492SMatthew Dillon 			buffer->io.recovered = 1;
113051c35492SMatthew Dillon 		else
1131f90dde4cSMatthew Dillon 			hammer_rel_buffer(buffer, 0);
1132f90dde4cSMatthew Dillon 		break;
1133f90dde4cSMatthew Dillon 	default:
1134d053aa8aSTomohiro Kusumi 		hkprintf("Corrupt UNDO record\n");
1135f90dde4cSMatthew Dillon 		error = EIO;
1136f90dde4cSMatthew Dillon 	}
1137f90dde4cSMatthew Dillon 	return (error);
1138f90dde4cSMatthew Dillon }
1139f90dde4cSMatthew Dillon 
1140f90dde4cSMatthew Dillon static void
hammer_recover_copy_undo(hammer_off_t undo_offset,char * src,char * dst,int bytes)1141f90dde4cSMatthew Dillon hammer_recover_copy_undo(hammer_off_t undo_offset,
1142f90dde4cSMatthew Dillon 			 char *src, char *dst, int bytes)
1143f90dde4cSMatthew Dillon {
1144973c11b9SMatthew Dillon 	if (hammer_debug_general & 0x0080) {
114533234d14STomohiro Kusumi 		hdkprintf("UNDO %016jx: %d\n",
114602428fb6SMatthew Dillon 			(intmax_t)undo_offset, bytes);
1147973c11b9SMatthew Dillon 	}
1148ec4e8497SMatthew Dillon #if 0
114933234d14STomohiro Kusumi 	hkprintf("UNDO %016jx:", (intmax_t)undo_offset);
1150f90dde4cSMatthew Dillon 	hammer_recover_debug_dump(22, dst, bytes);
1151f90dde4cSMatthew Dillon 	kprintf("%22s", "to:");
1152f90dde4cSMatthew Dillon 	hammer_recover_debug_dump(22, src, bytes);
1153ec4e8497SMatthew Dillon #endif
1154f90dde4cSMatthew Dillon 	bcopy(src, dst, bytes);
1155f90dde4cSMatthew Dillon }
1156f90dde4cSMatthew Dillon 
1157c58123daSMatthew Dillon /*
1158c58123daSMatthew Dillon  * Record HAMMER_REDO_TERM_WRITE and HAMMER_REDO_TERM_TRUNC operations
1159c58123daSMatthew Dillon  * during the backwards scan of the extended UNDO/REDO FIFO.  This scan
1160c58123daSMatthew Dillon  * does not include the nominal UNDO range, just the extended range.
1161c58123daSMatthew Dillon  */
1162c58123daSMatthew Dillon int
hammer_recover_redo_rec(hammer_mount_t hmp,struct hammer_rterm_rb_tree * root,hammer_off_t scan_offset,hammer_fifo_redo_t redo)1163c58123daSMatthew Dillon hammer_recover_redo_rec(hammer_mount_t hmp, struct hammer_rterm_rb_tree *root,
1164c58123daSMatthew Dillon 			hammer_off_t scan_offset, hammer_fifo_redo_t redo)
1165c58123daSMatthew Dillon {
1166c58123daSMatthew Dillon 	hammer_rterm_t rterm;
1167c58123daSMatthew Dillon 	hammer_rterm_t nrterm;
1168c58123daSMatthew Dillon 	hammer_rterm_entry_t rte;
1169c58123daSMatthew Dillon 
1170c58123daSMatthew Dillon 	if (redo->head.hdr_type != HAMMER_HEAD_TYPE_REDO)
1171c58123daSMatthew Dillon 		return(0);
1172c58123daSMatthew Dillon 	if (redo->redo_flags != HAMMER_REDO_TERM_WRITE &&
1173c58123daSMatthew Dillon 	    redo->redo_flags != HAMMER_REDO_TERM_TRUNC) {
1174c58123daSMatthew Dillon 		return(0);
1175c58123daSMatthew Dillon 	}
1176c58123daSMatthew Dillon 
1177c58123daSMatthew Dillon 	nrterm = kmalloc(sizeof(*nrterm), hmp->m_misc, M_WAITOK|M_ZERO);
1178c58123daSMatthew Dillon 	nrterm->redo_objid = redo->redo_objid;
1179c58123daSMatthew Dillon 	nrterm->redo_localization = redo->redo_localization;
1180c58123daSMatthew Dillon 	nrterm->redo_flags = redo->redo_flags;
1181c58123daSMatthew Dillon 	nrterm->redo_offset = redo->redo_offset;
1182c58123daSMatthew Dillon 
1183c58123daSMatthew Dillon 	rterm = RB_INSERT(hammer_rterm_rb_tree, root, nrterm);
1184c58123daSMatthew Dillon 	if (rterm)
1185c58123daSMatthew Dillon 		kfree(nrterm, hmp->m_misc);
1186c58123daSMatthew Dillon 	else
1187c58123daSMatthew Dillon 		rterm = nrterm;
1188c58123daSMatthew Dillon 
1189fad4297bSMatthew Dillon 	if (bootverbose) {
119033234d14STomohiro Kusumi 		hkprintf("record record %016jx objid %016jx "
1191fad4297bSMatthew Dillon 			"offset %016jx flags %08x\n",
1192c58123daSMatthew Dillon 			(intmax_t)scan_offset,
1193c58123daSMatthew Dillon 			(intmax_t)redo->redo_objid,
1194c58123daSMatthew Dillon 			(intmax_t)redo->redo_offset,
1195c58123daSMatthew Dillon 			(int)redo->redo_flags);
1196fad4297bSMatthew Dillon 	}
1197c58123daSMatthew Dillon 
1198c58123daSMatthew Dillon 	/*
1199c58123daSMatthew Dillon 	 * Scan in reverse order, rte prepended, so the rte list will be
1200c58123daSMatthew Dillon 	 * in forward order.
1201c58123daSMatthew Dillon 	 */
1202c58123daSMatthew Dillon 	rte = kmalloc(sizeof(*rte), hmp->m_misc, M_WAITOK|M_ZERO);
1203c58123daSMatthew Dillon 	rte->fifo_offset = scan_offset;
1204c58123daSMatthew Dillon 	rte->next = rterm->term_list;
1205c58123daSMatthew Dillon 	rterm->term_list = rte;
1206c58123daSMatthew Dillon 
1207c58123daSMatthew Dillon 	return(0);
1208c58123daSMatthew Dillon }
1209c58123daSMatthew Dillon 
1210c58123daSMatthew Dillon /*
1211c58123daSMatthew Dillon  * Execute HAMMER_REDO_WRITE and HAMMER_REDO_TRUNC operations during
1212c58123daSMatthew Dillon  * the forwards scan of the entire extended UNDO/REDO FIFO range.
1213c58123daSMatthew Dillon  *
1214c58123daSMatthew Dillon  * Records matching previously recorded TERMs have already been committed
1215c58123daSMatthew Dillon  * and are ignored.
1216c58123daSMatthew Dillon  */
1217c58123daSMatthew Dillon int
hammer_recover_redo_run(hammer_mount_t hmp,struct hammer_rterm_rb_tree * root,hammer_off_t scan_offset,hammer_fifo_redo_t redo)1218c58123daSMatthew Dillon hammer_recover_redo_run(hammer_mount_t hmp, struct hammer_rterm_rb_tree *root,
1219c58123daSMatthew Dillon 			hammer_off_t scan_offset, hammer_fifo_redo_t redo)
1220c58123daSMatthew Dillon {
1221c58123daSMatthew Dillon 	struct hammer_rterm rtval;
1222c58123daSMatthew Dillon 	hammer_rterm_t rterm;
1223c58123daSMatthew Dillon 	hammer_rterm_entry_t rte;
1224c58123daSMatthew Dillon 
1225c58123daSMatthew Dillon 	if (redo->head.hdr_type != HAMMER_HEAD_TYPE_REDO)
1226c58123daSMatthew Dillon 		return(0);
1227c58123daSMatthew Dillon 
1228c58123daSMatthew Dillon 	switch(redo->redo_flags) {
1229c58123daSMatthew Dillon 	case HAMMER_REDO_WRITE:
1230c58123daSMatthew Dillon 	case HAMMER_REDO_TRUNC:
1231c58123daSMatthew Dillon 		/*
1232c58123daSMatthew Dillon 		 * We hit a REDO request.  The REDO request is only executed
1233c58123daSMatthew Dillon 		 * if there is no matching TERM.
1234c58123daSMatthew Dillon 		 */
1235c58123daSMatthew Dillon 		bzero(&rtval, sizeof(rtval));
1236c58123daSMatthew Dillon 		rtval.redo_objid = redo->redo_objid;
1237c58123daSMatthew Dillon 		rtval.redo_localization = redo->redo_localization;
1238c58123daSMatthew Dillon 		rtval.redo_offset = redo->redo_offset;
1239c58123daSMatthew Dillon 		rtval.redo_flags = (redo->redo_flags == HAMMER_REDO_WRITE) ?
1240c58123daSMatthew Dillon 				   HAMMER_REDO_TERM_WRITE :
1241c58123daSMatthew Dillon 				   HAMMER_REDO_TERM_TRUNC;
1242c58123daSMatthew Dillon 
1243c58123daSMatthew Dillon 		rterm = RB_FIND(hammer_rterm_rb_tree, root, &rtval);
1244c58123daSMatthew Dillon 		if (rterm) {
1245418cb5e5SMatthew Dillon 			if (bootverbose) {
124633234d14STomohiro Kusumi 				hkprintf("ignore record %016jx objid %016jx "
1247c58123daSMatthew Dillon 					"offset %016jx flags %08x\n",
1248c58123daSMatthew Dillon 					(intmax_t)scan_offset,
1249c58123daSMatthew Dillon 					(intmax_t)redo->redo_objid,
1250c58123daSMatthew Dillon 					(intmax_t)redo->redo_offset,
1251c58123daSMatthew Dillon 					(int)redo->redo_flags);
1252418cb5e5SMatthew Dillon 			}
1253c58123daSMatthew Dillon 			break;
1254c58123daSMatthew Dillon 		}
1255418cb5e5SMatthew Dillon 		if (bootverbose) {
125633234d14STomohiro Kusumi 			hkprintf("run    record %016jx objid %016jx "
1257c58123daSMatthew Dillon 				"offset %016jx flags %08x\n",
1258c58123daSMatthew Dillon 				(intmax_t)scan_offset,
1259c58123daSMatthew Dillon 				(intmax_t)redo->redo_objid,
1260c58123daSMatthew Dillon 				(intmax_t)redo->redo_offset,
1261c58123daSMatthew Dillon 				(int)redo->redo_flags);
1262418cb5e5SMatthew Dillon 		}
1263c58123daSMatthew Dillon 
1264c58123daSMatthew Dillon 		/*
1265c58123daSMatthew Dillon 		 * Redo stage2 can access a live filesystem, acquire the
1266c58123daSMatthew Dillon 		 * vnode.
1267c58123daSMatthew Dillon 		 */
1268c58123daSMatthew Dillon 		hammer_recover_redo_exec(hmp, redo);
1269c58123daSMatthew Dillon 		break;
1270c58123daSMatthew Dillon 	case HAMMER_REDO_TERM_WRITE:
1271c58123daSMatthew Dillon 	case HAMMER_REDO_TERM_TRUNC:
1272c58123daSMatthew Dillon 		/*
1273c58123daSMatthew Dillon 		 * As we encounter TERMs in the forward scan we remove
1274c58123daSMatthew Dillon 		 * them.  Once the forward scan hits the nominal undo range
1275c58123daSMatthew Dillon 		 * there will be no more recorded TERMs.
1276c58123daSMatthew Dillon 		 */
1277c58123daSMatthew Dillon 		bzero(&rtval, sizeof(rtval));
1278c58123daSMatthew Dillon 		rtval.redo_objid = redo->redo_objid;
1279c58123daSMatthew Dillon 		rtval.redo_localization = redo->redo_localization;
1280c58123daSMatthew Dillon 		rtval.redo_flags = redo->redo_flags;
1281c58123daSMatthew Dillon 		rtval.redo_offset = redo->redo_offset;
1282c58123daSMatthew Dillon 
1283c58123daSMatthew Dillon 		rterm = RB_FIND(hammer_rterm_rb_tree, root, &rtval);
1284c58123daSMatthew Dillon 		if (rterm) {
1285c58123daSMatthew Dillon 			if ((rte = rterm->term_list) != NULL) {
1286c58123daSMatthew Dillon 				KKASSERT(rte->fifo_offset == scan_offset);
1287c58123daSMatthew Dillon 				rterm->term_list = rte->next;
1288c58123daSMatthew Dillon 				kfree(rte, hmp->m_misc);
1289c58123daSMatthew Dillon 			}
1290c58123daSMatthew Dillon 		}
1291c58123daSMatthew Dillon 		break;
1292c58123daSMatthew Dillon 	}
1293c58123daSMatthew Dillon 	return(0);
1294c58123daSMatthew Dillon }
1295c58123daSMatthew Dillon 
1296c58123daSMatthew Dillon static void
hammer_recover_redo_exec(hammer_mount_t hmp,hammer_fifo_redo_t redo)1297c58123daSMatthew Dillon hammer_recover_redo_exec(hammer_mount_t hmp, hammer_fifo_redo_t redo)
1298c58123daSMatthew Dillon {
1299c58123daSMatthew Dillon 	struct hammer_transaction trans;
1300c58123daSMatthew Dillon 	struct vattr va;
1301e1067862STomohiro Kusumi 	hammer_inode_t ip;
1302c58123daSMatthew Dillon 	struct vnode *vp = NULL;
1303c58123daSMatthew Dillon 	int error;
1304c58123daSMatthew Dillon 
1305c58123daSMatthew Dillon 	hammer_start_transaction(&trans, hmp);
1306c58123daSMatthew Dillon 
1307c58123daSMatthew Dillon 	ip = hammer_get_inode(&trans, NULL, redo->redo_objid,
1308c58123daSMatthew Dillon 			      HAMMER_MAX_TID, redo->redo_localization,
1309c58123daSMatthew Dillon 			      0, &error);
1310c58123daSMatthew Dillon 	if (ip == NULL) {
131133234d14STomohiro Kusumi 		hkprintf("unable to find objid %016jx:%08x\n",
1312c58123daSMatthew Dillon 			(intmax_t)redo->redo_objid, redo->redo_localization);
1313c58123daSMatthew Dillon 		goto done2;
1314c58123daSMatthew Dillon 	}
1315c58123daSMatthew Dillon 	error = hammer_get_vnode(ip, &vp);
1316c58123daSMatthew Dillon 	if (error) {
131733234d14STomohiro Kusumi 		hkprintf("unable to acquire vnode for %016jx:%08x\n",
1318c58123daSMatthew Dillon 			(intmax_t)redo->redo_objid, redo->redo_localization);
1319c58123daSMatthew Dillon 		goto done1;
1320c58123daSMatthew Dillon 	}
1321c58123daSMatthew Dillon 
1322c58123daSMatthew Dillon 	switch(redo->redo_flags) {
1323c58123daSMatthew Dillon 	case HAMMER_REDO_WRITE:
1324c58123daSMatthew Dillon 		error = VOP_OPEN(vp, FREAD|FWRITE, proc0.p_ucred, NULL);
1325c58123daSMatthew Dillon 		if (error) {
132633234d14STomohiro Kusumi 			hkprintf("vn_rdwr open %016jx:%08x returned %d\n",
1327418cb5e5SMatthew Dillon 				(intmax_t)redo->redo_objid,
1328418cb5e5SMatthew Dillon 				redo->redo_localization, error);
1329c58123daSMatthew Dillon 			break;
1330c58123daSMatthew Dillon 		}
1331c58123daSMatthew Dillon 		vn_unlock(vp);
1332c58123daSMatthew Dillon 		error = vn_rdwr(UIO_WRITE, vp, (void *)(redo + 1),
1333c58123daSMatthew Dillon 				redo->redo_data_bytes,
1334c58123daSMatthew Dillon 				redo->redo_offset, UIO_SYSSPACE,
1335c58123daSMatthew Dillon 				0, proc0.p_ucred, NULL);
1336c58123daSMatthew Dillon 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1337418cb5e5SMatthew Dillon 		if (error) {
133833234d14STomohiro Kusumi 			hkprintf("write %016jx:%08x returned %d\n",
1339418cb5e5SMatthew Dillon 				(intmax_t)redo->redo_objid,
1340418cb5e5SMatthew Dillon 				redo->redo_localization, error);
1341418cb5e5SMatthew Dillon 		}
13423596743eSMarkus Pfeiffer 		VOP_CLOSE(vp, FREAD|FWRITE, NULL);
1343c58123daSMatthew Dillon 		break;
1344c58123daSMatthew Dillon 	case HAMMER_REDO_TRUNC:
1345c58123daSMatthew Dillon 		VATTR_NULL(&va);
1346c58123daSMatthew Dillon 		va.va_size = redo->redo_offset;
1347c58123daSMatthew Dillon 		error = VOP_SETATTR(vp, &va, proc0.p_ucred);
1348418cb5e5SMatthew Dillon 		if (error) {
134933234d14STomohiro Kusumi 			hkprintf("setattr offset %016jx error %d\n",
1350418cb5e5SMatthew Dillon 				(intmax_t)redo->redo_offset, error);
1351418cb5e5SMatthew Dillon 		}
1352c58123daSMatthew Dillon 		break;
1353c58123daSMatthew Dillon 	}
1354c58123daSMatthew Dillon 	vput(vp);
1355c58123daSMatthew Dillon done1:
1356c58123daSMatthew Dillon 	hammer_rel_inode(ip, 0);
1357c58123daSMatthew Dillon done2:
1358c58123daSMatthew Dillon 	hammer_done_transaction(&trans);
1359c58123daSMatthew Dillon }
1360c58123daSMatthew Dillon 
1361c58123daSMatthew Dillon /*
1362c58123daSMatthew Dillon  * RB tree compare function.  Note that REDO_TERM_TRUNC ops ignore
1363c58123daSMatthew Dillon  * the offset.
1364c58123daSMatthew Dillon  *
1365c58123daSMatthew Dillon  * WRITE@0 TERM@0 WRITE@0 .... (no TERM@0) etc.
1366c58123daSMatthew Dillon  */
1367c58123daSMatthew Dillon static int
hammer_rterm_rb_cmp(hammer_rterm_t rt1,hammer_rterm_t rt2)1368c58123daSMatthew Dillon hammer_rterm_rb_cmp(hammer_rterm_t rt1, hammer_rterm_t rt2)
1369c58123daSMatthew Dillon {
1370c58123daSMatthew Dillon 	if (rt1->redo_objid < rt2->redo_objid)
1371c58123daSMatthew Dillon 		return(-1);
1372c58123daSMatthew Dillon 	if (rt1->redo_objid > rt2->redo_objid)
1373c58123daSMatthew Dillon 		return(1);
1374c58123daSMatthew Dillon 	if (rt1->redo_localization < rt2->redo_localization)
1375c58123daSMatthew Dillon 		return(-1);
1376c58123daSMatthew Dillon 	if (rt1->redo_localization > rt2->redo_localization)
1377c58123daSMatthew Dillon 		return(1);
1378c58123daSMatthew Dillon 	if (rt1->redo_flags < rt2->redo_flags)
1379c58123daSMatthew Dillon 		return(-1);
1380c58123daSMatthew Dillon 	if (rt1->redo_flags > rt2->redo_flags)
1381c58123daSMatthew Dillon 		return(1);
1382c58123daSMatthew Dillon 	if (rt1->redo_flags != HAMMER_REDO_TERM_TRUNC) {
1383c58123daSMatthew Dillon 		if (rt1->redo_offset < rt2->redo_offset)
1384c58123daSMatthew Dillon 			return(-1);
1385c58123daSMatthew Dillon 		if (rt1->redo_offset > rt2->redo_offset)
1386c58123daSMatthew Dillon 			return(1);
1387c58123daSMatthew Dillon 	}
1388c58123daSMatthew Dillon 	return(0);
1389c58123daSMatthew Dillon }
1390c58123daSMatthew Dillon 
1391d36ec43bSMatthew Dillon #if 0
1392d36ec43bSMatthew Dillon 
1393f90dde4cSMatthew Dillon static void
1394f90dde4cSMatthew Dillon hammer_recover_debug_dump(int w, char *buf, int bytes)
1395f90dde4cSMatthew Dillon {
1396f90dde4cSMatthew Dillon 	int i;
1397f90dde4cSMatthew Dillon 
1398f90dde4cSMatthew Dillon 	for (i = 0; i < bytes; ++i) {
1399f90dde4cSMatthew Dillon 		if (i && (i & 15) == 0)
1400f90dde4cSMatthew Dillon 			kprintf("\n%*.*s", w, w, "");
1401f90dde4cSMatthew Dillon 		kprintf(" %02x", (unsigned char)buf[i]);
1402f90dde4cSMatthew Dillon 	}
1403f90dde4cSMatthew Dillon 	kprintf("\n");
1404f90dde4cSMatthew Dillon }
1405f90dde4cSMatthew Dillon 
1406d36ec43bSMatthew Dillon #endif
140751c35492SMatthew Dillon 
140851c35492SMatthew Dillon /*
14099f5097dcSMatthew Dillon  * Flush recovered buffers from recovery operations.  The call to this
14109f5097dcSMatthew Dillon  * routine may be delayed if a read-only mount was made and then later
14112faf0737SMatthew Dillon  * upgraded to read-write.  This routine is also called when unmounting
14122faf0737SMatthew Dillon  * a read-only mount to clean out recovered (dirty) buffers which we
14132faf0737SMatthew Dillon  * couldn't flush (because the mount is read-only).
14149f5097dcSMatthew Dillon  *
14159f5097dcSMatthew Dillon  * The volume header is always written last.  The UNDO FIFO will be forced
14169f5097dcSMatthew Dillon  * to zero-length by setting next_offset to first_offset.  This leaves the
14179f5097dcSMatthew Dillon  * (now stale) UNDO information used to recover the disk available for
14189f5097dcSMatthew Dillon  * forensic analysis.
141900f16fadSMatthew Dillon  *
142000f16fadSMatthew Dillon  * final is typically 0 or 1.  The volume header is only written if final
142100f16fadSMatthew Dillon  * is 1.  If final is -1 the recovered buffers are discarded instead of
142200f16fadSMatthew Dillon  * written and root_volume can also be passed as NULL in that case.
142351c35492SMatthew Dillon  */
142451c35492SMatthew Dillon static int hammer_recover_flush_volume_callback(hammer_volume_t, void *);
142551c35492SMatthew Dillon static int hammer_recover_flush_buffer_callback(hammer_buffer_t, void *);
142651c35492SMatthew Dillon 
142751c35492SMatthew Dillon void
hammer_recover_flush_buffers(hammer_mount_t hmp,hammer_volume_t root_volume,int final)142806ad81ffSMatthew Dillon hammer_recover_flush_buffers(hammer_mount_t hmp, hammer_volume_t root_volume,
142906ad81ffSMatthew Dillon 			     int final)
143051c35492SMatthew Dillon {
1431af209b0fSMatthew Dillon         /*
1432af209b0fSMatthew Dillon          * Flush the buffers out asynchronously, wait for all the I/O to
1433af209b0fSMatthew Dillon 	 * complete, then do it again to destroy the buffer cache buffer
1434af209b0fSMatthew Dillon 	 * so it doesn't alias something later on.
1435af209b0fSMatthew Dillon          */
1436af209b0fSMatthew Dillon 	RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
143700f16fadSMatthew Dillon 		hammer_recover_flush_buffer_callback, &final);
1438eddadaeeSMatthew Dillon 	hammer_io_wait_all(hmp, "hmrrcw", 1);
14390832c9bbSMatthew Dillon 	RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
144000f16fadSMatthew Dillon 		hammer_recover_flush_buffer_callback, &final);
14419f5097dcSMatthew Dillon 
1442af209b0fSMatthew Dillon 	/*
144300f16fadSMatthew Dillon 	 * Flush all volume headers except the root volume.  If final < 0
144400f16fadSMatthew Dillon 	 * we discard all volume headers including the root volume.
1445af209b0fSMatthew Dillon 	 */
144600f16fadSMatthew Dillon 	if (final >= 0) {
144700f16fadSMatthew Dillon 		RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
144800f16fadSMatthew Dillon 			hammer_recover_flush_volume_callback, root_volume);
144900f16fadSMatthew Dillon 	} else {
145000f16fadSMatthew Dillon 		RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
145100f16fadSMatthew Dillon 			hammer_recover_flush_volume_callback, NULL);
145200f16fadSMatthew Dillon 	}
145300f16fadSMatthew Dillon 
145400f16fadSMatthew Dillon 	/*
145500f16fadSMatthew Dillon 	 * Finalize the root volume header.
145677912481SMatthew Dillon 	 *
145777912481SMatthew Dillon 	 * No interlock is needed, volume buffers are not
145877912481SMatthew Dillon 	 * messed with by bioops.
145900f16fadSMatthew Dillon 	 */
146000f16fadSMatthew Dillon 	if (root_volume && root_volume->io.recovered && final > 0) {
1461eddadaeeSMatthew Dillon 		hammer_io_wait_all(hmp, "hmrflx", 1);
146251c35492SMatthew Dillon 		root_volume->io.recovered = 0;
1463710733a6SMatthew Dillon 		hammer_io_flush(&root_volume->io, 0);
146451c35492SMatthew Dillon 		hammer_rel_volume(root_volume, 0);
1465eddadaeeSMatthew Dillon 		hammer_io_wait_all(hmp, "hmrfly", 1);
146651c35492SMatthew Dillon 	}
146751c35492SMatthew Dillon }
146851c35492SMatthew Dillon 
146900f16fadSMatthew Dillon /*
147000f16fadSMatthew Dillon  * Callback to flush volume headers.  If discarding data will be NULL and
147100f16fadSMatthew Dillon  * all volume headers (including the root volume) will be discarded.
147200f16fadSMatthew Dillon  * Otherwise data is the root_volume and we flush all volume headers
147300f16fadSMatthew Dillon  * EXCEPT the root_volume.
14742faf0737SMatthew Dillon  *
14752faf0737SMatthew Dillon  * Clear any I/O error or modified condition when discarding buffers to
14762faf0737SMatthew Dillon  * clean up the reference count, otherwise the buffer may have extra refs
14772faf0737SMatthew Dillon  * on it.
147800f16fadSMatthew Dillon  */
147951c35492SMatthew Dillon static
148051c35492SMatthew Dillon int
hammer_recover_flush_volume_callback(hammer_volume_t volume,void * data)148151c35492SMatthew Dillon hammer_recover_flush_volume_callback(hammer_volume_t volume, void *data)
148251c35492SMatthew Dillon {
148351c35492SMatthew Dillon 	hammer_volume_t root_volume = data;
148451c35492SMatthew Dillon 
148551c35492SMatthew Dillon 	if (volume->io.recovered && volume != root_volume) {
148651c35492SMatthew Dillon 		volume->io.recovered = 0;
14872faf0737SMatthew Dillon 		if (root_volume != NULL) {
148877912481SMatthew Dillon 			/*
148977912481SMatthew Dillon 			 * No interlock is needed, volume buffers are not
149077912481SMatthew Dillon 			 * messed with by bioops.
149177912481SMatthew Dillon 			 */
1492710733a6SMatthew Dillon 			hammer_io_flush(&volume->io, 0);
14932faf0737SMatthew Dillon 		} else {
14942faf0737SMatthew Dillon 			hammer_io_clear_error(&volume->io);
149500f16fadSMatthew Dillon 			hammer_io_clear_modify(&volume->io, 1);
14962faf0737SMatthew Dillon 		}
149751c35492SMatthew Dillon 		hammer_rel_volume(volume, 0);
149851c35492SMatthew Dillon 	}
149951c35492SMatthew Dillon 	return(0);
150051c35492SMatthew Dillon }
150151c35492SMatthew Dillon 
15022faf0737SMatthew Dillon /*
15032faf0737SMatthew Dillon  * Flush or discard recovered I/O buffers.
15042faf0737SMatthew Dillon  *
15052faf0737SMatthew Dillon  * Clear any I/O error or modified condition when discarding buffers to
15062faf0737SMatthew Dillon  * clean up the reference count, otherwise the buffer may have extra refs
15072faf0737SMatthew Dillon  * on it.
15082faf0737SMatthew Dillon  */
150951c35492SMatthew Dillon static
151051c35492SMatthew Dillon int
hammer_recover_flush_buffer_callback(hammer_buffer_t buffer,void * data)151151c35492SMatthew Dillon hammer_recover_flush_buffer_callback(hammer_buffer_t buffer, void *data)
151251c35492SMatthew Dillon {
151300f16fadSMatthew Dillon 	int final = *(int *)data;
1514250aec18SMatthew Dillon 	int flush;
151500f16fadSMatthew Dillon 
151651c35492SMatthew Dillon 	if (buffer->io.recovered) {
151751c35492SMatthew Dillon 		buffer->io.recovered = 0;
1518af209b0fSMatthew Dillon 		buffer->io.reclaim = 1;
15192faf0737SMatthew Dillon 		if (final < 0) {
15202faf0737SMatthew Dillon 			hammer_io_clear_error(&buffer->io);
152100f16fadSMatthew Dillon 			hammer_io_clear_modify(&buffer->io, 1);
15222faf0737SMatthew Dillon 		} else {
152377912481SMatthew Dillon 			hammer_io_write_interlock(&buffer->io);
1524710733a6SMatthew Dillon 			hammer_io_flush(&buffer->io, 0);
152577912481SMatthew Dillon 			hammer_io_done_interlock(&buffer->io);
15262faf0737SMatthew Dillon 		}
1527af209b0fSMatthew Dillon 		hammer_rel_buffer(buffer, 0);
1528af209b0fSMatthew Dillon 	} else {
1529250aec18SMatthew Dillon 		flush = hammer_ref_interlock(&buffer->io.lock);
1530250aec18SMatthew Dillon 		if (flush)
1531c1745db9SMatthew Dillon 			atomic_add_int(&hammer_count_refedbufs, 1);
1532250aec18SMatthew Dillon 
15332faf0737SMatthew Dillon 		if (final < 0) {
15342faf0737SMatthew Dillon 			hammer_io_clear_error(&buffer->io);
15352faf0737SMatthew Dillon 			hammer_io_clear_modify(&buffer->io, 1);
15362faf0737SMatthew Dillon 		}
1537250aec18SMatthew Dillon 		KKASSERT(hammer_oneref(&buffer->io.lock));
1538af209b0fSMatthew Dillon 		buffer->io.reclaim = 1;
1539250aec18SMatthew Dillon 		hammer_rel_buffer(buffer, flush);
154051c35492SMatthew Dillon 	}
154151c35492SMatthew Dillon 	return(0);
154251c35492SMatthew Dillon }
154351c35492SMatthew Dillon 
1544