14d75d829SMatthew Dillon /*
24d75d829SMatthew Dillon * Copyright (c) 2008 The DragonFly Project. All rights reserved.
34d75d829SMatthew Dillon *
44d75d829SMatthew Dillon * This code is derived from software contributed to The DragonFly Project
54d75d829SMatthew Dillon * by Matthew Dillon <dillon@backplane.com>
64d75d829SMatthew Dillon *
74d75d829SMatthew Dillon * Redistribution and use in source and binary forms, with or without
84d75d829SMatthew Dillon * modification, are permitted provided that the following conditions
94d75d829SMatthew Dillon * are met:
104d75d829SMatthew Dillon *
114d75d829SMatthew Dillon * 1. Redistributions of source code must retain the above copyright
124d75d829SMatthew Dillon * notice, this list of conditions and the following disclaimer.
134d75d829SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright
144d75d829SMatthew Dillon * notice, this list of conditions and the following disclaimer in
154d75d829SMatthew Dillon * the documentation and/or other materials provided with the
164d75d829SMatthew Dillon * distribution.
174d75d829SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its
184d75d829SMatthew Dillon * contributors may be used to endorse or promote products derived
194d75d829SMatthew Dillon * from this software without specific, prior written permission.
204d75d829SMatthew Dillon *
214d75d829SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
224d75d829SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
234d75d829SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
244d75d829SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
254d75d829SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
264d75d829SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
274d75d829SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
284d75d829SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
294d75d829SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
304d75d829SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
314d75d829SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
324d75d829SMatthew Dillon * SUCH DAMAGE.
33c58123daSMatthew Dillon */
34c58123daSMatthew Dillon
35c58123daSMatthew Dillon /*
36c58123daSMatthew Dillon * UNDO ALGORITHM:
374d75d829SMatthew Dillon *
38c58123daSMatthew Dillon * The UNDO algorithm is trivial. The nominal UNDO range in the
39c58123daSMatthew Dillon * FIFO is determined by taking the first/next offset stored in
40c58123daSMatthew Dillon * the volume header. The next offset may not be correct since
41c58123daSMatthew Dillon * UNDO flushes are not required to flush the volume header, so
42c58123daSMatthew Dillon * the code also scans forward until it finds a discontinuous
43c58123daSMatthew Dillon * sequence number.
44c58123daSMatthew Dillon *
45c58123daSMatthew Dillon * The UNDOs are then scanned and executed in reverse order. These
46c58123daSMatthew Dillon * UNDOs are effectively just data restorations based on HAMMER offsets.
47c58123daSMatthew Dillon *
48c58123daSMatthew Dillon * REDO ALGORITHM:
49c58123daSMatthew Dillon *
50c58123daSMatthew Dillon * REDO records are laid down in the UNDO/REDO FIFO for nominal
51c58123daSMatthew Dillon * writes, truncations, and file extension ops. On a per-inode
52c58123daSMatthew Dillon * basis two types of REDO records are generated, REDO_WRITE
53c58123daSMatthew Dillon * and REDO_TRUNC.
54c58123daSMatthew Dillon *
55c58123daSMatthew Dillon * Essentially the recovery block will contain UNDO records backing
56c58123daSMatthew Dillon * out partial operations and REDO records to regenerate those partial
57c58123daSMatthew Dillon * operations guaranteed by the filesystem during recovery.
58c58123daSMatthew Dillon *
59c58123daSMatthew Dillon * REDO generation is optional, and can also be started and then
60c58123daSMatthew Dillon * later stopped due to excessive write()s inbetween fsyncs, or not
61c58123daSMatthew Dillon * started at all. Because of this the recovery code must determine
62c58123daSMatthew Dillon * when REDOs are valid and when they are not. Additional records are
63c58123daSMatthew Dillon * generated to help figure it out.
64c58123daSMatthew Dillon *
65c58123daSMatthew Dillon * The REDO_TERM_WRITE and REDO_TERM_TRUNC records are generated
66c58123daSMatthew Dillon * during a flush cycle indicating which records the flush cycle
67c58123daSMatthew Dillon * has synched meta-data for, and HAMMER_REDO_SYNC is generated in
68c58123daSMatthew Dillon * each flush cycle to indicate how far back in the UNDO/REDO FIFO
69c58123daSMatthew Dillon * the recovery code must go to find the earliest applicable REDO
70c58123daSMatthew Dillon * record. Applicable REDO records can be far outside the nominal
71c58123daSMatthew Dillon * UNDO recovery range, for example if a write() lays down a REDO but
72c58123daSMatthew Dillon * the related file is not flushed for several cycles.
73c58123daSMatthew Dillon *
74c58123daSMatthew Dillon * The SYNC reference is to a point prior to the nominal UNDO FIFO
75c58123daSMatthew Dillon * range, creating an extended REDO range which must be scanned.
76c58123daSMatthew Dillon *
77c58123daSMatthew Dillon * Any REDO_WRITE/REDO_TRUNC encountered within the extended range
78c58123daSMatthew Dillon * which have no matching REDO_TERM_WRITE/REDO_TERM_TRUNC records
79c58123daSMatthew Dillon * prior to the start of the nominal UNDO range are applicable.
80c58123daSMatthew Dillon * That is, any REDO_TERM_* records in the extended range but not in
81c58123daSMatthew Dillon * the nominal undo range will mask any redo operations for prior REDO
82c58123daSMatthew Dillon * records. This is necessary because once the TERM is laid down
83c58123daSMatthew Dillon * followup operations may make additional changes to the related
84c58123daSMatthew Dillon * records but not necessarily record them as REDOs (because REDOs are
85c58123daSMatthew Dillon * optional).
86c58123daSMatthew Dillon *
87c58123daSMatthew Dillon * REDO_TERM_WRITE/REDO_TERM_TRUNC records in the nominal UNDO range
88c58123daSMatthew Dillon * must be ignored since they represent meta-data flushes which are
89c58123daSMatthew Dillon * undone by the UNDOs in that nominal UNDO range by the recovery
90c58123daSMatthew Dillon * code. Only REDO_TERM_* records in the extended range but not
91c58123daSMatthew Dillon * in the nominal undo range are applicable.
92c58123daSMatthew Dillon *
93c58123daSMatthew Dillon * The REDO_SYNC record itself always exists in the nominal UNDO range
94c58123daSMatthew Dillon * (this is how the extended range is determined). For recovery
95c58123daSMatthew Dillon * purposes the most recent REDO_SYNC record is always used if several
96c58123daSMatthew Dillon * are found.
97c58123daSMatthew Dillon *
98c58123daSMatthew Dillon * CRASHES DURING UNDO/REDO
99c58123daSMatthew Dillon *
100c58123daSMatthew Dillon * A crash during the UNDO phase requires no additional effort. The
101c58123daSMatthew Dillon * UNDOs will simply be re-run again. The state of the UNDO/REDO fifo
102c58123daSMatthew Dillon * remains unchanged and has no re-crash issues.
103c58123daSMatthew Dillon *
104c58123daSMatthew Dillon * A crash during the REDO phase is more complex because the REDOs
105c58123daSMatthew Dillon * run normal filesystem ops and generate additional UNDO/REDO records.
106c58123daSMatthew Dillon * REDO is disabled during REDO recovery and any SYNC records generated
107c58123daSMatthew Dillon * by flushes during REDO recovery must continue to reference the
108c58123daSMatthew Dillon * original extended range.
109c58123daSMatthew Dillon *
110c58123daSMatthew Dillon * If multiple crashes occur and the UNDO/REDO FIFO wraps, REDO recovery
111c58123daSMatthew Dillon * may become impossible. This is detected when the start of the
112c58123daSMatthew Dillon * extended range fails to have monotonically increasing sequence
113c58123daSMatthew Dillon * numbers leading into the nominal undo range.
1144d75d829SMatthew Dillon */
1154d75d829SMatthew Dillon
1164d75d829SMatthew Dillon #include "hammer.h"
1174d75d829SMatthew Dillon
118c58123daSMatthew Dillon /*
119dbd4f600SAntonio Huete Jimenez * Specify the way we want to handle stage2 errors.
120dbd4f600SAntonio Huete Jimenez *
121dbd4f600SAntonio Huete Jimenez * Following values are accepted:
122dbd4f600SAntonio Huete Jimenez *
123dbd4f600SAntonio Huete Jimenez * 0 - Run redo recovery normally and fail to mount if
124dbd4f600SAntonio Huete Jimenez * the operation fails (default).
125dbd4f600SAntonio Huete Jimenez * 1 - Run redo recovery, but don't fail to mount if the
126dbd4f600SAntonio Huete Jimenez * operation fails.
127dbd4f600SAntonio Huete Jimenez * 2 - Completely skip redo recovery (only for severe error
128dbd4f600SAntonio Huete Jimenez * conditions and/or debugging.
129dbd4f600SAntonio Huete Jimenez */
130172fb573SSascha Wildner static int hammer_skip_redo = 0;
131dbd4f600SAntonio Huete Jimenez TUNABLE_INT("vfs.hammer.skip_redo", &hammer_skip_redo);
132dbd4f600SAntonio Huete Jimenez
133dbd4f600SAntonio Huete Jimenez /*
134c58123daSMatthew Dillon * Each rterm entry has a list of fifo offsets indicating termination
135c58123daSMatthew Dillon * points. These are stripped as the scan progresses.
136c58123daSMatthew Dillon */
137c58123daSMatthew Dillon typedef struct hammer_rterm_entry {
138c58123daSMatthew Dillon struct hammer_rterm_entry *next;
139c58123daSMatthew Dillon hammer_off_t fifo_offset;
140c58123daSMatthew Dillon } *hammer_rterm_entry_t;
141c58123daSMatthew Dillon
142c58123daSMatthew Dillon /*
143c58123daSMatthew Dillon * rterm entries sorted in RB tree are indexed by objid, flags, and offset.
144c58123daSMatthew Dillon * TRUNC entries ignore the offset.
145c58123daSMatthew Dillon */
146c58123daSMatthew Dillon typedef struct hammer_rterm {
147c58123daSMatthew Dillon RB_ENTRY(hammer_rterm) rb_node;
148c58123daSMatthew Dillon int64_t redo_objid;
14946137e17STomohiro Kusumi uint32_t redo_localization;
15046137e17STomohiro Kusumi uint32_t redo_flags;
151c58123daSMatthew Dillon hammer_off_t redo_offset;
152c58123daSMatthew Dillon hammer_rterm_entry_t term_list;
153c58123daSMatthew Dillon } *hammer_rterm_t;
154c58123daSMatthew Dillon
155c58123daSMatthew Dillon static int hammer_rterm_rb_cmp(hammer_rterm_t rt1, hammer_rterm_t rt2);
156c58123daSMatthew Dillon struct hammer_rterm_rb_tree;
157c58123daSMatthew Dillon RB_HEAD(hammer_rterm_rb_tree, hammer_rterm);
158c58123daSMatthew Dillon RB_PROTOTYPE(hammer_rterm_rb_tree, hammer_rterm, rb_node, hammer_rterm_rb_cmp);
159c58123daSMatthew Dillon
160*4c09d9c4SMatthew Dillon static int hammer_check_tail_signature(hammer_mount_t hmp,
161*4c09d9c4SMatthew Dillon hammer_fifo_tail_t tail, hammer_off_t end_off);
162*4c09d9c4SMatthew Dillon static int hammer_check_head_signature(hammer_mount_t hmp,
163*4c09d9c4SMatthew Dillon hammer_fifo_head_t head, hammer_off_t beg_off);
164f90dde4cSMatthew Dillon static void hammer_recover_copy_undo(hammer_off_t undo_offset,
165f90dde4cSMatthew Dillon char *src, char *dst, int bytes);
16602428fb6SMatthew Dillon static hammer_fifo_any_t hammer_recover_scan_fwd(hammer_mount_t hmp,
16702428fb6SMatthew Dillon hammer_volume_t root_volume,
16802428fb6SMatthew Dillon hammer_off_t *scan_offsetp,
169562d34c2STomohiro Kusumi int *errorp, hammer_buffer_t *bufferp);
17002428fb6SMatthew Dillon static hammer_fifo_any_t hammer_recover_scan_rev(hammer_mount_t hmp,
17102428fb6SMatthew Dillon hammer_volume_t root_volume,
17202428fb6SMatthew Dillon hammer_off_t *scan_offsetp,
173562d34c2STomohiro Kusumi int *errorp, hammer_buffer_t *bufferp);
174d36ec43bSMatthew Dillon #if 0
175f90dde4cSMatthew Dillon static void hammer_recover_debug_dump(int w, char *buf, int bytes);
176d36ec43bSMatthew Dillon #endif
17751c35492SMatthew Dillon static int hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
17802428fb6SMatthew Dillon hammer_fifo_undo_t undo);
179c58123daSMatthew Dillon static int hammer_recover_redo_rec(hammer_mount_t hmp,
180c58123daSMatthew Dillon struct hammer_rterm_rb_tree *root,
181c58123daSMatthew Dillon hammer_off_t redo_fifo_offset, hammer_fifo_redo_t redo);
182c58123daSMatthew Dillon static int hammer_recover_redo_run(hammer_mount_t hmp,
183c58123daSMatthew Dillon struct hammer_rterm_rb_tree *root,
184c58123daSMatthew Dillon hammer_off_t redo_fifo_offset, hammer_fifo_redo_t redo);
185c58123daSMatthew Dillon static void hammer_recover_redo_exec(hammer_mount_t hmp,
186c58123daSMatthew Dillon hammer_fifo_redo_t redo);
187c58123daSMatthew Dillon
188c58123daSMatthew Dillon RB_GENERATE(hammer_rterm_rb_tree, hammer_rterm, rb_node, hammer_rterm_rb_cmp);
1894d75d829SMatthew Dillon
1904d75d829SMatthew Dillon /*
19102428fb6SMatthew Dillon * Recover filesystem meta-data on mount. This procedure figures out the
19202428fb6SMatthew Dillon * UNDO FIFO range and runs the UNDOs backwards. The FIFO pointers are not
19302428fb6SMatthew Dillon * resynchronized by this procedure.
19402428fb6SMatthew Dillon *
19502428fb6SMatthew Dillon * This procedure is run near the beginning of the mount sequence, before
19602428fb6SMatthew Dillon * any B-Tree or high-level accesses are enabled, and is responsible for
19702428fb6SMatthew Dillon * restoring the meta-data to a consistent state. High level HAMMER data
19802428fb6SMatthew Dillon * structures (such as the B-Tree) cannot be accessed here.
1990729c8c8SMatthew Dillon *
2000729c8c8SMatthew Dillon * NOTE: No information from the root volume has been cached in the
2010729c8c8SMatthew Dillon * hammer_mount structure yet, so we need to access the root volume's
2020729c8c8SMatthew Dillon * buffer directly.
20302428fb6SMatthew Dillon *
20402428fb6SMatthew Dillon * NOTE:
2054d75d829SMatthew Dillon */
2064d75d829SMatthew Dillon int
hammer_recover_stage1(hammer_mount_t hmp,hammer_volume_t root_volume)20702428fb6SMatthew Dillon hammer_recover_stage1(hammer_mount_t hmp, hammer_volume_t root_volume)
2084d75d829SMatthew Dillon {
209f90dde4cSMatthew Dillon hammer_blockmap_t rootmap;
2104d75d829SMatthew Dillon hammer_buffer_t buffer;
211f90dde4cSMatthew Dillon hammer_off_t scan_offset;
21202428fb6SMatthew Dillon hammer_off_t scan_offset_save;
213f90dde4cSMatthew Dillon hammer_off_t bytes;
21402428fb6SMatthew Dillon hammer_fifo_any_t head;
2159f5097dcSMatthew Dillon hammer_off_t first_offset;
2169f5097dcSMatthew Dillon hammer_off_t last_offset;
21746137e17STomohiro Kusumi uint32_t seqno;
2184d75d829SMatthew Dillon int error;
2192dd2e007SMatthew Dillon int degenerate_case = 0;
2204d75d829SMatthew Dillon
2214d75d829SMatthew Dillon /*
22202428fb6SMatthew Dillon * Examine the UNDO FIFO indices in the volume header.
2234d75d829SMatthew Dillon */
224f90dde4cSMatthew Dillon rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
2259f5097dcSMatthew Dillon first_offset = rootmap->first_offset;
2269f5097dcSMatthew Dillon last_offset = rootmap->next_offset;
22702428fb6SMatthew Dillon buffer = NULL;
22802428fb6SMatthew Dillon error = 0;
2299f5097dcSMatthew Dillon
230c58123daSMatthew Dillon hmp->recover_stage2_offset = 0;
231c58123daSMatthew Dillon
23202428fb6SMatthew Dillon if (first_offset > rootmap->alloc_offset ||
23302428fb6SMatthew Dillon last_offset > rootmap->alloc_offset) {
234d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
235d053aa8aSTomohiro Kusumi "Illegal UNDO FIFO index range "
23602428fb6SMatthew Dillon "%016jx, %016jx limit %016jx\n",
23702428fb6SMatthew Dillon (intmax_t)first_offset,
23802428fb6SMatthew Dillon (intmax_t)last_offset,
23902428fb6SMatthew Dillon (intmax_t)rootmap->alloc_offset);
24002428fb6SMatthew Dillon error = EIO;
24102428fb6SMatthew Dillon goto done;
24202428fb6SMatthew Dillon }
24302428fb6SMatthew Dillon
24402428fb6SMatthew Dillon /*
24502428fb6SMatthew Dillon * In HAMMER version 4+ filesystems the volume header does NOT
24602428fb6SMatthew Dillon * contain definitive UNDO FIFO state. In particular, the
24702428fb6SMatthew Dillon * rootmap->next_offset may not be indexed completely to the
24802428fb6SMatthew Dillon * end of the active UNDO FIFO.
24902428fb6SMatthew Dillon */
25002428fb6SMatthew Dillon if (hmp->version >= HAMMER_VOL_VERSION_FOUR) {
25102428fb6SMatthew Dillon /*
25202428fb6SMatthew Dillon * To find the definitive range we must first scan backwards
25302428fb6SMatthew Dillon * from first_offset to locate the first real record and
25402428fb6SMatthew Dillon * extract the sequence number from it. This record is not
25502428fb6SMatthew Dillon * part of the active undo space.
25602428fb6SMatthew Dillon */
25702428fb6SMatthew Dillon scan_offset = first_offset;
25802428fb6SMatthew Dillon seqno = 0;
25902428fb6SMatthew Dillon
26002428fb6SMatthew Dillon for (;;) {
26102428fb6SMatthew Dillon head = hammer_recover_scan_rev(hmp, root_volume,
26202428fb6SMatthew Dillon &scan_offset,
26302428fb6SMatthew Dillon &error, &buffer);
26402428fb6SMatthew Dillon if (error)
26502428fb6SMatthew Dillon break;
26602428fb6SMatthew Dillon if (head->head.hdr_type != HAMMER_HEAD_TYPE_PAD) {
26702428fb6SMatthew Dillon seqno = head->head.hdr_seq;
26802428fb6SMatthew Dillon break;
26902428fb6SMatthew Dillon }
27002428fb6SMatthew Dillon }
27102428fb6SMatthew Dillon if (error) {
272d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
273d053aa8aSTomohiro Kusumi "recovery failure during seqno backscan\n");
27402428fb6SMatthew Dillon goto done;
27502428fb6SMatthew Dillon }
27602428fb6SMatthew Dillon
27702428fb6SMatthew Dillon /*
27802428fb6SMatthew Dillon * Scan forwards from first_offset and (seqno+1) looking
27902428fb6SMatthew Dillon * for a sequence space discontinuity. This denotes the
28002428fb6SMatthew Dillon * end of the active FIFO area.
28102428fb6SMatthew Dillon *
28202428fb6SMatthew Dillon * NOTE: For the case where the FIFO is empty the very first
28302428fb6SMatthew Dillon * record we find will be discontinuous.
28402428fb6SMatthew Dillon *
28502428fb6SMatthew Dillon * NOTE: Do not include trailing PADs in the scan range,
28602428fb6SMatthew Dillon * and remember the returned scan_offset after a
28702428fb6SMatthew Dillon * fwd iteration points to the end of the returned
28802428fb6SMatthew Dillon * record.
28902428fb6SMatthew Dillon */
290d053aa8aSTomohiro Kusumi hvkprintf(root_volume, "recovery check seqno=%08x\n", seqno);
29102428fb6SMatthew Dillon
29202428fb6SMatthew Dillon scan_offset = first_offset;
29302428fb6SMatthew Dillon scan_offset_save = scan_offset;
29402428fb6SMatthew Dillon ++seqno;
295c58123daSMatthew Dillon hmp->recover_stage2_seqno = seqno;
296c58123daSMatthew Dillon
29702428fb6SMatthew Dillon for (;;) {
29802428fb6SMatthew Dillon head = hammer_recover_scan_fwd(hmp, root_volume,
29902428fb6SMatthew Dillon &scan_offset,
30002428fb6SMatthew Dillon &error, &buffer);
30102428fb6SMatthew Dillon if (error)
30202428fb6SMatthew Dillon break;
30302428fb6SMatthew Dillon if (head->head.hdr_type != HAMMER_HEAD_TYPE_PAD) {
30402428fb6SMatthew Dillon if (seqno != head->head.hdr_seq) {
30502428fb6SMatthew Dillon scan_offset = scan_offset_save;
30602428fb6SMatthew Dillon break;
30702428fb6SMatthew Dillon }
30802428fb6SMatthew Dillon scan_offset_save = scan_offset;
30902428fb6SMatthew Dillon ++seqno;
31002428fb6SMatthew Dillon }
31102428fb6SMatthew Dillon
31202428fb6SMatthew Dillon #if 0
31302428fb6SMatthew Dillon /*
31402428fb6SMatthew Dillon * If the forward scan is grossly ahead of last_offset
31502428fb6SMatthew Dillon * then something is wrong. last_offset is supposed
31602428fb6SMatthew Dillon * to be flushed out
31702428fb6SMatthew Dillon */
31802428fb6SMatthew Dillon if (last_offset >= scan_offset) {
31902428fb6SMatthew Dillon bytes = last_offset - scan_offset;
32002428fb6SMatthew Dillon } else {
32102428fb6SMatthew Dillon bytes = rootmap->alloc_offset - scan_offset +
322eee3f66cSTomohiro Kusumi HAMMER_OFF_LONG_ENCODE(last_offset);
32302428fb6SMatthew Dillon }
32402428fb6SMatthew Dillon if (bytes >
325eee3f66cSTomohiro Kusumi HAMMER_OFF_LONG_ENCODE(rootmap->alloc_offset) *
32602428fb6SMatthew Dillon 4 / 5) {
327d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
328d053aa8aSTomohiro Kusumi "recovery forward scan is "
32902428fb6SMatthew Dillon "grossly beyond the last_offset in "
33002428fb6SMatthew Dillon "the volume header, this can't be "
331d053aa8aSTomohiro Kusumi "right.\n");
33202428fb6SMatthew Dillon error = EIO;
33302428fb6SMatthew Dillon break;
33402428fb6SMatthew Dillon }
33502428fb6SMatthew Dillon #endif
33602428fb6SMatthew Dillon }
33702428fb6SMatthew Dillon
33802428fb6SMatthew Dillon /*
33902428fb6SMatthew Dillon * Store the seqno. This will be the next seqno we lay down
34002428fb6SMatthew Dillon * when generating new UNDOs.
34102428fb6SMatthew Dillon */
34202428fb6SMatthew Dillon hmp->undo_seqno = seqno;
34302428fb6SMatthew Dillon if (error) {
344d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
345d053aa8aSTomohiro Kusumi "recovery failure during seqno fwdscan\n");
34602428fb6SMatthew Dillon goto done;
34702428fb6SMatthew Dillon }
34802428fb6SMatthew Dillon last_offset = scan_offset;
349d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
350d053aa8aSTomohiro Kusumi "recovery range %016jx-%016jx\n",
35102428fb6SMatthew Dillon (intmax_t)first_offset,
352d053aa8aSTomohiro Kusumi (intmax_t)last_offset);
353d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
354d053aa8aSTomohiro Kusumi "recovery nexto %016jx endseqno=%08x\n",
35502428fb6SMatthew Dillon (intmax_t)rootmap->next_offset,
35602428fb6SMatthew Dillon seqno);
35702428fb6SMatthew Dillon }
35802428fb6SMatthew Dillon
35902428fb6SMatthew Dillon /*
36002428fb6SMatthew Dillon * Calculate the size of the active portion of the FIFO. If the
36102428fb6SMatthew Dillon * FIFO is empty the filesystem is clean and no further action is
36202428fb6SMatthew Dillon * needed.
36302428fb6SMatthew Dillon */
3649f5097dcSMatthew Dillon if (last_offset >= first_offset) {
3659f5097dcSMatthew Dillon bytes = last_offset - first_offset;
366c9b9e29dSMatthew Dillon } else {
3679f5097dcSMatthew Dillon bytes = rootmap->alloc_offset - first_offset +
368eee3f66cSTomohiro Kusumi HAMMER_OFF_LONG_ENCODE(last_offset);
369c9b9e29dSMatthew Dillon }
37002428fb6SMatthew Dillon if (bytes == 0) {
3712dd2e007SMatthew Dillon degenerate_case = 1;
37202428fb6SMatthew Dillon error = 0;
37302428fb6SMatthew Dillon goto done;
37402428fb6SMatthew Dillon }
37502428fb6SMatthew Dillon
376d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
377d053aa8aSTomohiro Kusumi "recovery undo %016jx-%016jx (%jd bytes)%s\n",
37802428fb6SMatthew Dillon (intmax_t)first_offset,
37902428fb6SMatthew Dillon (intmax_t)last_offset,
38002428fb6SMatthew Dillon (intmax_t)bytes,
38151c35492SMatthew Dillon (hmp->ronly ? " (RO)" : "(RW)"));
382eee3f66cSTomohiro Kusumi if (bytes > HAMMER_OFF_LONG_ENCODE(rootmap->alloc_offset)) {
38333234d14STomohiro Kusumi hkprintf("Undo size is absurd, unable to mount\n");
38402428fb6SMatthew Dillon error = EIO;
38502428fb6SMatthew Dillon goto done;
386c9b9e29dSMatthew Dillon }
3874d75d829SMatthew Dillon
3884d75d829SMatthew Dillon /*
389f90dde4cSMatthew Dillon * Scan the UNDOs backwards.
3904d75d829SMatthew Dillon */
3919f5097dcSMatthew Dillon scan_offset = last_offset;
392b33e2cc0SMatthew Dillon
393f90dde4cSMatthew Dillon while ((int64_t)bytes > 0) {
39402428fb6SMatthew Dillon KKASSERT(scan_offset != first_offset);
39502428fb6SMatthew Dillon head = hammer_recover_scan_rev(hmp, root_volume,
39602428fb6SMatthew Dillon &scan_offset, &error, &buffer);
39702428fb6SMatthew Dillon if (error)
398f90dde4cSMatthew Dillon break;
399c58123daSMatthew Dillon
400c58123daSMatthew Dillon /*
401c58123daSMatthew Dillon * Normal UNDO
402c58123daSMatthew Dillon */
40302428fb6SMatthew Dillon error = hammer_recover_undo(hmp, root_volume, &head->undo);
4044d75d829SMatthew Dillon if (error) {
405d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
406d053aa8aSTomohiro Kusumi "UNDO record at %016jx failed\n",
40702428fb6SMatthew Dillon (intmax_t)scan_offset - head->head.hdr_size);
408f90dde4cSMatthew Dillon break;
4094d75d829SMatthew Dillon }
410c58123daSMatthew Dillon
411c58123daSMatthew Dillon /*
412c58123daSMatthew Dillon * The first REDO_SYNC record encountered (scanning backwards)
413c58123daSMatthew Dillon * enables REDO processing.
414c58123daSMatthew Dillon */
415c58123daSMatthew Dillon if (head->head.hdr_type == HAMMER_HEAD_TYPE_REDO &&
416c58123daSMatthew Dillon head->redo.redo_flags == HAMMER_REDO_SYNC) {
417c58123daSMatthew Dillon if (hmp->flags & HAMMER_MOUNT_REDO_RECOVERY_REQ) {
418d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
419d053aa8aSTomohiro Kusumi "Ignoring extra REDO_SYNC "
420d053aa8aSTomohiro Kusumi "records in UNDO/REDO FIFO.\n");
421c58123daSMatthew Dillon } else {
422c58123daSMatthew Dillon hmp->flags |= HAMMER_MOUNT_REDO_RECOVERY_REQ;
423c58123daSMatthew Dillon hmp->recover_stage2_offset =
424c58123daSMatthew Dillon head->redo.redo_offset;
425d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
426d053aa8aSTomohiro Kusumi "Found REDO_SYNC %016jx\n",
427c58123daSMatthew Dillon (intmax_t)head->redo.redo_offset);
428c58123daSMatthew Dillon }
429c58123daSMatthew Dillon }
430c58123daSMatthew Dillon
43102428fb6SMatthew Dillon bytes -= head->head.hdr_size;
43206ad81ffSMatthew Dillon
43306ad81ffSMatthew Dillon /*
434312de84dSMatthew Dillon * If too many dirty buffers have built up we have to flush'm
435312de84dSMatthew Dillon * out. As long as we do not flush out the volume header
436312de84dSMatthew Dillon * a crash here should not cause any problems.
437312de84dSMatthew Dillon *
438312de84dSMatthew Dillon * buffer must be released so the flush can assert that
439312de84dSMatthew Dillon * all buffers are idle.
44006ad81ffSMatthew Dillon */
44106ad81ffSMatthew Dillon if (hammer_flusher_meta_limit(hmp)) {
442312de84dSMatthew Dillon if (buffer) {
443312de84dSMatthew Dillon hammer_rel_buffer(buffer, 0);
444312de84dSMatthew Dillon buffer = NULL;
445312de84dSMatthew Dillon }
44606ad81ffSMatthew Dillon if (hmp->ronly == 0) {
44706ad81ffSMatthew Dillon hammer_recover_flush_buffers(hmp, root_volume,
44806ad81ffSMatthew Dillon 0);
449d053aa8aSTomohiro Kusumi hvkprintf(root_volume, "Continuing recovery\n");
45000f16fadSMatthew Dillon } else {
451d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
452d053aa8aSTomohiro Kusumi "Recovery failure: "
453653fa4cdSTomohiro Kusumi "Insufficient buffer cache to hold "
454d053aa8aSTomohiro Kusumi "dirty buffers on read-only mount!\n");
45500f16fadSMatthew Dillon error = EIO;
45600f16fadSMatthew Dillon break;
45706ad81ffSMatthew Dillon }
45806ad81ffSMatthew Dillon }
459f90dde4cSMatthew Dillon }
460c58123daSMatthew Dillon KKASSERT(error || bytes == 0);
461c9b9e29dSMatthew Dillon done:
46202428fb6SMatthew Dillon if (buffer) {
463f90dde4cSMatthew Dillon hammer_rel_buffer(buffer, 0);
46402428fb6SMatthew Dillon buffer = NULL;
46502428fb6SMatthew Dillon }
46651c35492SMatthew Dillon
46751c35492SMatthew Dillon /*
4689f5097dcSMatthew Dillon * After completely flushing all the recovered buffers the volume
46902428fb6SMatthew Dillon * header will also be flushed.
47051c35492SMatthew Dillon */
4719f5097dcSMatthew Dillon if (root_volume->io.recovered == 0) {
4729f5097dcSMatthew Dillon hammer_ref_volume(root_volume);
4739f5097dcSMatthew Dillon root_volume->io.recovered = 1;
47451c35492SMatthew Dillon }
47500f16fadSMatthew Dillon
47600f16fadSMatthew Dillon /*
47702428fb6SMatthew Dillon * Finish up flushing (or discarding) recovered buffers. FIFO
47802428fb6SMatthew Dillon * indices in the volume header are updated to the actual undo
47902428fb6SMatthew Dillon * range but will not be collapsed until stage 2.
48000f16fadSMatthew Dillon */
48100f16fadSMatthew Dillon if (error == 0) {
482f1c0ae53STomohiro Kusumi hammer_modify_volume_noundo(NULL, root_volume);
4839f5097dcSMatthew Dillon rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
48402428fb6SMatthew Dillon rootmap->first_offset = first_offset;
4859f5097dcSMatthew Dillon rootmap->next_offset = last_offset;
4869f5097dcSMatthew Dillon hammer_modify_volume_done(root_volume);
48700f16fadSMatthew Dillon if (hmp->ronly == 0)
48806ad81ffSMatthew Dillon hammer_recover_flush_buffers(hmp, root_volume, 1);
48900f16fadSMatthew Dillon } else {
49000f16fadSMatthew Dillon hammer_recover_flush_buffers(hmp, root_volume, -1);
49100f16fadSMatthew Dillon }
4922dd2e007SMatthew Dillon if (degenerate_case == 0) {
493d053aa8aSTomohiro Kusumi hvkprintf(root_volume, "recovery complete\n");
4942dd2e007SMatthew Dillon } else {
495d053aa8aSTomohiro Kusumi hvkprintf(root_volume, "mounted clean, no recovery needed\n");
4962dd2e007SMatthew Dillon }
4974d75d829SMatthew Dillon return (error);
4984d75d829SMatthew Dillon }
4994d75d829SMatthew Dillon
50002428fb6SMatthew Dillon /*
50102428fb6SMatthew Dillon * Execute redo operations
50202428fb6SMatthew Dillon *
50302428fb6SMatthew Dillon * This procedure is run at the end of the mount sequence, after the hammer
50402428fb6SMatthew Dillon * mount structure has been completely initialized but before the filesystem
50502428fb6SMatthew Dillon * goes live. It can access standard cursors, the B-Tree, flush the
50602428fb6SMatthew Dillon * filesystem, and so forth.
50702428fb6SMatthew Dillon *
50802428fb6SMatthew Dillon * This code may only be called for read-write mounts or when a mount
5092dd2e007SMatthew Dillon * switches from read-only to read-write. vnodes may or may not be present.
51002428fb6SMatthew Dillon *
51102428fb6SMatthew Dillon * The stage1 code will have already calculated the correct FIFO range
512c58123daSMatthew Dillon * for the nominal UNDO FIFO and stored it in the rootmap. The extended
513c58123daSMatthew Dillon * range for REDO is stored in hmp->recover_stage2_offset.
51402428fb6SMatthew Dillon */
51502428fb6SMatthew Dillon int
hammer_recover_stage2(hammer_mount_t hmp,hammer_volume_t root_volume)51602428fb6SMatthew Dillon hammer_recover_stage2(hammer_mount_t hmp, hammer_volume_t root_volume)
51702428fb6SMatthew Dillon {
51802428fb6SMatthew Dillon hammer_blockmap_t rootmap;
51902428fb6SMatthew Dillon hammer_buffer_t buffer;
52002428fb6SMatthew Dillon hammer_off_t scan_offset;
521c58123daSMatthew Dillon hammer_off_t oscan_offset;
52202428fb6SMatthew Dillon hammer_off_t bytes;
523c58123daSMatthew Dillon hammer_off_t ext_bytes;
52402428fb6SMatthew Dillon hammer_fifo_any_t head;
52502428fb6SMatthew Dillon hammer_off_t first_offset;
52602428fb6SMatthew Dillon hammer_off_t last_offset;
527c58123daSMatthew Dillon hammer_off_t ext_offset;
528c58123daSMatthew Dillon struct hammer_rterm_rb_tree rterm_root;
52946137e17STomohiro Kusumi uint32_t seqno;
53002428fb6SMatthew Dillon int error;
531c58123daSMatthew Dillon int verbose = 0;
532c58123daSMatthew Dillon int dorscan;
53302428fb6SMatthew Dillon
53402428fb6SMatthew Dillon /*
53502428fb6SMatthew Dillon * Stage 2 can only be run on a RW mount, or when the mount is
536c58123daSMatthew Dillon * switched from RO to RW.
53702428fb6SMatthew Dillon */
53802428fb6SMatthew Dillon KKASSERT(hmp->ronly == 0);
539c58123daSMatthew Dillon RB_INIT(&rterm_root);
54002428fb6SMatthew Dillon
541dbd4f600SAntonio Huete Jimenez if (hammer_skip_redo == 1)
542d053aa8aSTomohiro Kusumi hvkprintf(root_volume, "recovery redo marked as optional\n");
543dbd4f600SAntonio Huete Jimenez
544dbd4f600SAntonio Huete Jimenez if (hammer_skip_redo == 2) {
545d053aa8aSTomohiro Kusumi hvkprintf(root_volume, "recovery redo skipped.\n");
546dbd4f600SAntonio Huete Jimenez return (0);
547dbd4f600SAntonio Huete Jimenez }
548dbd4f600SAntonio Huete Jimenez
54902428fb6SMatthew Dillon /*
55002428fb6SMatthew Dillon * Examine the UNDO FIFO. If it is empty the filesystem is clean
55102428fb6SMatthew Dillon * and no action need be taken.
55202428fb6SMatthew Dillon */
55302428fb6SMatthew Dillon rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
55402428fb6SMatthew Dillon first_offset = rootmap->first_offset;
55502428fb6SMatthew Dillon last_offset = rootmap->next_offset;
556c58123daSMatthew Dillon if (first_offset == last_offset) {
557c58123daSMatthew Dillon KKASSERT((hmp->flags & HAMMER_MOUNT_REDO_RECOVERY_REQ) == 0);
55802428fb6SMatthew Dillon return(0);
559c58123daSMatthew Dillon }
56002428fb6SMatthew Dillon
561c58123daSMatthew Dillon /*
562c58123daSMatthew Dillon * Stage2 must only be run once, and will not be run at all
563c58123daSMatthew Dillon * if Stage1 did not find a REDO_SYNC record.
564c58123daSMatthew Dillon */
565c58123daSMatthew Dillon error = 0;
566c58123daSMatthew Dillon buffer = NULL;
567c58123daSMatthew Dillon
568c58123daSMatthew Dillon if ((hmp->flags & HAMMER_MOUNT_REDO_RECOVERY_REQ) == 0)
569c58123daSMatthew Dillon goto done;
570c58123daSMatthew Dillon hmp->flags &= ~HAMMER_MOUNT_REDO_RECOVERY_REQ;
571c58123daSMatthew Dillon hmp->flags |= HAMMER_MOUNT_REDO_RECOVERY_RUN;
572c58123daSMatthew Dillon ext_offset = hmp->recover_stage2_offset;
573c58123daSMatthew Dillon if (ext_offset == 0) {
574d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
575d053aa8aSTomohiro Kusumi "REDO stage specified but no REDO_SYNC "
576d053aa8aSTomohiro Kusumi "offset, ignoring\n");
577c58123daSMatthew Dillon goto done;
578c58123daSMatthew Dillon }
579c58123daSMatthew Dillon
580c58123daSMatthew Dillon /*
581c58123daSMatthew Dillon * Calculate nominal UNDO range (this is not yet the extended
582c58123daSMatthew Dillon * range).
583c58123daSMatthew Dillon */
58402428fb6SMatthew Dillon if (last_offset >= first_offset) {
58502428fb6SMatthew Dillon bytes = last_offset - first_offset;
58602428fb6SMatthew Dillon } else {
58702428fb6SMatthew Dillon bytes = rootmap->alloc_offset - first_offset +
588eee3f66cSTomohiro Kusumi HAMMER_OFF_LONG_ENCODE(last_offset);
58902428fb6SMatthew Dillon }
590d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
591d053aa8aSTomohiro Kusumi "recovery redo %016jx-%016jx (%jd bytes)%s\n",
59202428fb6SMatthew Dillon (intmax_t)first_offset,
59302428fb6SMatthew Dillon (intmax_t)last_offset,
59402428fb6SMatthew Dillon (intmax_t)bytes,
59502428fb6SMatthew Dillon (hmp->ronly ? " (RO)" : "(RW)"));
596c58123daSMatthew Dillon verbose = 1;
597eee3f66cSTomohiro Kusumi if (bytes > HAMMER_OFF_LONG_ENCODE(rootmap->alloc_offset)) {
59833234d14STomohiro Kusumi hkprintf("Undo size is absurd, unable to mount\n");
599c58123daSMatthew Dillon error = EIO;
600c58123daSMatthew Dillon goto fatal;
60102428fb6SMatthew Dillon }
60202428fb6SMatthew Dillon
60302428fb6SMatthew Dillon /*
604c58123daSMatthew Dillon * Scan the REDOs backwards collecting REDO_TERM_* information.
605c58123daSMatthew Dillon * This information is only collected for the extended range,
606c58123daSMatthew Dillon * non-inclusive of any TERMs in the nominal UNDO range.
607c58123daSMatthew Dillon *
608c58123daSMatthew Dillon * If the stage2 extended range is inside the nominal undo range
609c58123daSMatthew Dillon * we have nothing to scan.
610c58123daSMatthew Dillon *
611c58123daSMatthew Dillon * This must fit in memory!
61202428fb6SMatthew Dillon */
613c58123daSMatthew Dillon if (first_offset < last_offset) {
614c58123daSMatthew Dillon /*
615c58123daSMatthew Dillon * [ first_offset........last_offset ]
616c58123daSMatthew Dillon */
617c58123daSMatthew Dillon if (ext_offset < first_offset) {
618c58123daSMatthew Dillon dorscan = 1;
619c58123daSMatthew Dillon ext_bytes = first_offset - ext_offset;
620c58123daSMatthew Dillon } else if (ext_offset > last_offset) {
621c58123daSMatthew Dillon dorscan = 1;
622c58123daSMatthew Dillon ext_bytes = (rootmap->alloc_offset - ext_offset) +
623eee3f66cSTomohiro Kusumi HAMMER_OFF_LONG_ENCODE(first_offset);
624c58123daSMatthew Dillon } else {
625c58123daSMatthew Dillon ext_bytes = -(ext_offset - first_offset);
626c58123daSMatthew Dillon dorscan = 0;
627c58123daSMatthew Dillon }
628c58123daSMatthew Dillon } else {
629c58123daSMatthew Dillon /*
630c58123daSMatthew Dillon * [......last_offset first_offset.....]
631c58123daSMatthew Dillon */
632c58123daSMatthew Dillon if (ext_offset < last_offset) {
633c58123daSMatthew Dillon ext_bytes = -((rootmap->alloc_offset - first_offset) +
634eee3f66cSTomohiro Kusumi HAMMER_OFF_LONG_ENCODE(ext_offset));
635c58123daSMatthew Dillon dorscan = 0;
636c58123daSMatthew Dillon } else if (ext_offset > first_offset) {
637c58123daSMatthew Dillon ext_bytes = -(ext_offset - first_offset);
638c58123daSMatthew Dillon dorscan = 0;
639c58123daSMatthew Dillon } else {
640c58123daSMatthew Dillon ext_bytes = first_offset - ext_offset;
641c58123daSMatthew Dillon dorscan = 1;
642c58123daSMatthew Dillon }
643c58123daSMatthew Dillon }
64402428fb6SMatthew Dillon
645c58123daSMatthew Dillon if (dorscan) {
646c58123daSMatthew Dillon scan_offset = first_offset;
647d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
648d053aa8aSTomohiro Kusumi "Find extended redo %016jx, %jd extbytes\n",
649c58123daSMatthew Dillon (intmax_t)ext_offset,
650c58123daSMatthew Dillon (intmax_t)ext_bytes);
651c58123daSMatthew Dillon seqno = hmp->recover_stage2_seqno - 1;
652c58123daSMatthew Dillon for (;;) {
653c58123daSMatthew Dillon head = hammer_recover_scan_rev(hmp, root_volume,
654c58123daSMatthew Dillon &scan_offset,
655c58123daSMatthew Dillon &error, &buffer);
656c58123daSMatthew Dillon if (error)
657c58123daSMatthew Dillon break;
658c58123daSMatthew Dillon if (head->head.hdr_type != HAMMER_HEAD_TYPE_PAD) {
659c58123daSMatthew Dillon if (head->head.hdr_seq != seqno) {
660c58123daSMatthew Dillon error = ERANGE;
661c58123daSMatthew Dillon break;
662c58123daSMatthew Dillon }
663c58123daSMatthew Dillon error = hammer_recover_redo_rec(
664c58123daSMatthew Dillon hmp, &rterm_root,
665c58123daSMatthew Dillon scan_offset, &head->redo);
666c58123daSMatthew Dillon --seqno;
667c58123daSMatthew Dillon }
668c58123daSMatthew Dillon if (scan_offset == ext_offset)
669c58123daSMatthew Dillon break;
670c58123daSMatthew Dillon }
671c58123daSMatthew Dillon if (error) {
672d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
673d053aa8aSTomohiro Kusumi "Find extended redo failed %d, "
674c58123daSMatthew Dillon "unable to run REDO\n",
675c58123daSMatthew Dillon error);
676c58123daSMatthew Dillon goto done;
677c58123daSMatthew Dillon }
678c58123daSMatthew Dillon } else {
679d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
680d053aa8aSTomohiro Kusumi "Embedded extended redo %016jx, %jd extbytes\n",
681c58123daSMatthew Dillon (intmax_t)ext_offset,
682c58123daSMatthew Dillon (intmax_t)ext_bytes);
683c58123daSMatthew Dillon }
684c58123daSMatthew Dillon
685c58123daSMatthew Dillon /*
686c58123daSMatthew Dillon * Scan the REDO forwards through the entire extended range.
687c58123daSMatthew Dillon * Anything with a previously recorded matching TERM is discarded.
688c58123daSMatthew Dillon */
689c58123daSMatthew Dillon scan_offset = ext_offset;
690c58123daSMatthew Dillon bytes += ext_bytes;
691c58123daSMatthew Dillon
692c58123daSMatthew Dillon /*
693c58123daSMatthew Dillon * NOTE: when doing a forward scan the returned scan_offset is
694c58123daSMatthew Dillon * for the record following the returned record, so we
695c58123daSMatthew Dillon * have to play a bit.
696c58123daSMatthew Dillon */
697c58123daSMatthew Dillon while ((int64_t)bytes > 0) {
69802428fb6SMatthew Dillon KKASSERT(scan_offset != last_offset);
69902428fb6SMatthew Dillon
700c58123daSMatthew Dillon oscan_offset = scan_offset;
70102428fb6SMatthew Dillon head = hammer_recover_scan_fwd(hmp, root_volume,
70202428fb6SMatthew Dillon &scan_offset, &error, &buffer);
70302428fb6SMatthew Dillon if (error)
70402428fb6SMatthew Dillon break;
70502428fb6SMatthew Dillon
706c58123daSMatthew Dillon error = hammer_recover_redo_run(hmp, &rterm_root,
707c58123daSMatthew Dillon oscan_offset, &head->redo);
70802428fb6SMatthew Dillon if (error) {
709d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
710d053aa8aSTomohiro Kusumi "UNDO record at %016jx failed\n",
71102428fb6SMatthew Dillon (intmax_t)scan_offset - head->head.hdr_size);
71202428fb6SMatthew Dillon break;
71302428fb6SMatthew Dillon }
71402428fb6SMatthew Dillon bytes -= head->head.hdr_size;
71502428fb6SMatthew Dillon }
716c58123daSMatthew Dillon KKASSERT(error || bytes == 0);
71786327cc9SMatthew Dillon
71886327cc9SMatthew Dillon done:
71902428fb6SMatthew Dillon if (buffer) {
72002428fb6SMatthew Dillon hammer_rel_buffer(buffer, 0);
72102428fb6SMatthew Dillon buffer = NULL;
72202428fb6SMatthew Dillon }
72302428fb6SMatthew Dillon
724c58123daSMatthew Dillon /*
725c58123daSMatthew Dillon * Cleanup rterm tree
726c58123daSMatthew Dillon */
727c58123daSMatthew Dillon {
728c58123daSMatthew Dillon hammer_rterm_t rterm;
729c58123daSMatthew Dillon hammer_rterm_entry_t rte;
730c58123daSMatthew Dillon
731c58123daSMatthew Dillon while ((rterm = RB_ROOT(&rterm_root)) != NULL) {
732c58123daSMatthew Dillon RB_REMOVE(hammer_rterm_rb_tree, &rterm_root, rterm);
733c58123daSMatthew Dillon while ((rte = rterm->term_list) != NULL) {
734c58123daSMatthew Dillon rterm->term_list = rte->next;
735c58123daSMatthew Dillon kfree(rte, hmp->m_misc);
736c58123daSMatthew Dillon }
737c58123daSMatthew Dillon kfree(rterm, hmp->m_misc);
738c58123daSMatthew Dillon }
739c58123daSMatthew Dillon }
740c58123daSMatthew Dillon
74102428fb6SMatthew Dillon /*
74202428fb6SMatthew Dillon * Finish up flushing (or discarding) recovered buffers by executing
74302428fb6SMatthew Dillon * a normal flush cycle. Setting HMNT_UNDO_DIRTY bypasses degenerate
74402428fb6SMatthew Dillon * case tests and forces the flush in order to update the FIFO indices.
74502428fb6SMatthew Dillon *
74602428fb6SMatthew Dillon * If a crash occurs during the flush the entire undo/redo will be
74702428fb6SMatthew Dillon * re-run during recovery on the next mount.
74802428fb6SMatthew Dillon */
74902428fb6SMatthew Dillon if (error == 0) {
75002428fb6SMatthew Dillon if (rootmap->first_offset != rootmap->next_offset)
75102428fb6SMatthew Dillon hmp->hflags |= HMNT_UNDO_DIRTY;
75202428fb6SMatthew Dillon hammer_flusher_sync(hmp);
75302428fb6SMatthew Dillon }
754c58123daSMatthew Dillon fatal:
755c58123daSMatthew Dillon hmp->flags &= ~HAMMER_MOUNT_REDO_RECOVERY_RUN;
756c58123daSMatthew Dillon if (verbose) {
757d053aa8aSTomohiro Kusumi hvkprintf(root_volume, "End redo recovery\n");
758c58123daSMatthew Dillon }
759dbd4f600SAntonio Huete Jimenez
760dbd4f600SAntonio Huete Jimenez if (error && hammer_skip_redo == 1)
761d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
762d053aa8aSTomohiro Kusumi "recovery redo error %d, skipping.\n",
763dbd4f600SAntonio Huete Jimenez error);
764dbd4f600SAntonio Huete Jimenez
765dbd4f600SAntonio Huete Jimenez return (hammer_skip_redo ? 0 : error);
76602428fb6SMatthew Dillon }
76702428fb6SMatthew Dillon
76802428fb6SMatthew Dillon /*
76902428fb6SMatthew Dillon * Scan backwards from *scan_offsetp, return the FIFO record prior to the
77002428fb6SMatthew Dillon * record at *scan_offsetp or NULL if an error occured.
77102428fb6SMatthew Dillon *
77202428fb6SMatthew Dillon * On return *scan_offsetp will be the offset of the returned record.
77302428fb6SMatthew Dillon */
77402428fb6SMatthew Dillon hammer_fifo_any_t
hammer_recover_scan_rev(hammer_mount_t hmp,hammer_volume_t root_volume,hammer_off_t * scan_offsetp,int * errorp,hammer_buffer_t * bufferp)77502428fb6SMatthew Dillon hammer_recover_scan_rev(hammer_mount_t hmp, hammer_volume_t root_volume,
77602428fb6SMatthew Dillon hammer_off_t *scan_offsetp,
777562d34c2STomohiro Kusumi int *errorp, hammer_buffer_t *bufferp)
77802428fb6SMatthew Dillon {
77902428fb6SMatthew Dillon hammer_off_t scan_offset;
78002428fb6SMatthew Dillon hammer_blockmap_t rootmap;
78102428fb6SMatthew Dillon hammer_fifo_any_t head;
78202428fb6SMatthew Dillon hammer_fifo_tail_t tail;
78302428fb6SMatthew Dillon
78402428fb6SMatthew Dillon rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
78502428fb6SMatthew Dillon scan_offset = *scan_offsetp;
78602428fb6SMatthew Dillon
78702428fb6SMatthew Dillon if (hammer_debug_general & 0x0080)
78833234d14STomohiro Kusumi hdkprintf("rev scan_offset %016jx\n", (intmax_t)scan_offset);
789459a6a40STomohiro Kusumi if (scan_offset == HAMMER_ENCODE_UNDO(0))
79002428fb6SMatthew Dillon scan_offset = rootmap->alloc_offset;
791459a6a40STomohiro Kusumi if (scan_offset - sizeof(*tail) < HAMMER_ENCODE_UNDO(0)) {
792d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
793d053aa8aSTomohiro Kusumi "UNDO record at %016jx FIFO underflow\n",
79402428fb6SMatthew Dillon (intmax_t)scan_offset);
79502428fb6SMatthew Dillon *errorp = EIO;
79602428fb6SMatthew Dillon return (NULL);
79702428fb6SMatthew Dillon }
79802428fb6SMatthew Dillon tail = hammer_bread(hmp, scan_offset - sizeof(*tail),
79902428fb6SMatthew Dillon errorp, bufferp);
80002428fb6SMatthew Dillon if (*errorp) {
801d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
802d053aa8aSTomohiro Kusumi "Unable to read UNDO TAIL at %016jx\n",
80302428fb6SMatthew Dillon (intmax_t)scan_offset - sizeof(*tail));
80402428fb6SMatthew Dillon return (NULL);
80502428fb6SMatthew Dillon }
80602428fb6SMatthew Dillon
807*4c09d9c4SMatthew Dillon if (hammer_check_tail_signature(hmp, tail, scan_offset) != 0) {
808d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
809d053aa8aSTomohiro Kusumi "Illegal UNDO TAIL signature at %016jx\n",
81002428fb6SMatthew Dillon (intmax_t)scan_offset - sizeof(*tail));
81102428fb6SMatthew Dillon *errorp = EIO;
81202428fb6SMatthew Dillon return (NULL);
81302428fb6SMatthew Dillon }
81402428fb6SMatthew Dillon head = (void *)((char *)tail + sizeof(*tail) - tail->tail_size);
81502428fb6SMatthew Dillon *scan_offsetp = scan_offset - head->head.hdr_size;
81602428fb6SMatthew Dillon
81702428fb6SMatthew Dillon return (head);
81802428fb6SMatthew Dillon }
81902428fb6SMatthew Dillon
82002428fb6SMatthew Dillon /*
82102428fb6SMatthew Dillon * Scan forwards from *scan_offsetp, return the FIFO record or NULL if
82202428fb6SMatthew Dillon * an error occured.
82302428fb6SMatthew Dillon *
82402428fb6SMatthew Dillon * On return *scan_offsetp will be the offset of the record following
82502428fb6SMatthew Dillon * the returned record.
82602428fb6SMatthew Dillon */
82702428fb6SMatthew Dillon hammer_fifo_any_t
hammer_recover_scan_fwd(hammer_mount_t hmp,hammer_volume_t root_volume,hammer_off_t * scan_offsetp,int * errorp,hammer_buffer_t * bufferp)82802428fb6SMatthew Dillon hammer_recover_scan_fwd(hammer_mount_t hmp, hammer_volume_t root_volume,
82902428fb6SMatthew Dillon hammer_off_t *scan_offsetp,
830562d34c2STomohiro Kusumi int *errorp, hammer_buffer_t *bufferp)
83102428fb6SMatthew Dillon {
83202428fb6SMatthew Dillon hammer_off_t scan_offset;
83302428fb6SMatthew Dillon hammer_blockmap_t rootmap;
83402428fb6SMatthew Dillon hammer_fifo_any_t head;
83502428fb6SMatthew Dillon
83602428fb6SMatthew Dillon rootmap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
83702428fb6SMatthew Dillon scan_offset = *scan_offsetp;
83802428fb6SMatthew Dillon
83902428fb6SMatthew Dillon if (hammer_debug_general & 0x0080)
84033234d14STomohiro Kusumi hdkprintf("fwd scan_offset %016jx\n", (intmax_t)scan_offset);
84102428fb6SMatthew Dillon if (scan_offset == rootmap->alloc_offset)
842459a6a40STomohiro Kusumi scan_offset = HAMMER_ENCODE_UNDO(0);
84302428fb6SMatthew Dillon
84402428fb6SMatthew Dillon head = hammer_bread(hmp, scan_offset, errorp, bufferp);
84502428fb6SMatthew Dillon if (*errorp) {
846d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
847d053aa8aSTomohiro Kusumi "Unable to read UNDO HEAD at %016jx\n",
84802428fb6SMatthew Dillon (intmax_t)scan_offset);
84902428fb6SMatthew Dillon return (NULL);
85002428fb6SMatthew Dillon }
85102428fb6SMatthew Dillon
852*4c09d9c4SMatthew Dillon if (hammer_check_head_signature(hmp, &head->head, scan_offset) != 0) {
853d053aa8aSTomohiro Kusumi hvkprintf(root_volume,
854d053aa8aSTomohiro Kusumi "Illegal UNDO TAIL signature at %016jx\n",
85502428fb6SMatthew Dillon (intmax_t)scan_offset);
85602428fb6SMatthew Dillon *errorp = EIO;
85702428fb6SMatthew Dillon return (NULL);
85802428fb6SMatthew Dillon }
85902428fb6SMatthew Dillon scan_offset += head->head.hdr_size;
86002428fb6SMatthew Dillon if (scan_offset == rootmap->alloc_offset)
861459a6a40STomohiro Kusumi scan_offset = HAMMER_ENCODE_UNDO(0);
86202428fb6SMatthew Dillon *scan_offsetp = scan_offset;
86302428fb6SMatthew Dillon
86402428fb6SMatthew Dillon return (head);
86502428fb6SMatthew Dillon }
86602428fb6SMatthew Dillon
86702428fb6SMatthew Dillon /*
86802428fb6SMatthew Dillon * Helper function for hammer_check_{head,tail}_signature(). Check stuff
86902428fb6SMatthew Dillon * once the head and tail has been established.
87002428fb6SMatthew Dillon *
87102428fb6SMatthew Dillon * This function validates the entire FIFO record wrapper.
87202428fb6SMatthew Dillon */
87302428fb6SMatthew Dillon static __inline
87402428fb6SMatthew Dillon int
_hammer_check_signature(hammer_mount_t hmp,hammer_fifo_head_t head,hammer_fifo_tail_t tail,hammer_off_t beg_off)875*4c09d9c4SMatthew Dillon _hammer_check_signature(hammer_mount_t hmp,
876*4c09d9c4SMatthew Dillon hammer_fifo_head_t head, hammer_fifo_tail_t tail,
87702428fb6SMatthew Dillon hammer_off_t beg_off)
87802428fb6SMatthew Dillon {
87902428fb6SMatthew Dillon hammer_off_t end_off;
88002428fb6SMatthew Dillon int bytes;
88102428fb6SMatthew Dillon
88202428fb6SMatthew Dillon /*
88302428fb6SMatthew Dillon * Check signatures. The tail signature is allowed to be the
88402428fb6SMatthew Dillon * head signature only for 8-byte PADs.
88502428fb6SMatthew Dillon */
88602428fb6SMatthew Dillon if (head->hdr_signature != HAMMER_HEAD_SIGNATURE) {
887d053aa8aSTomohiro Kusumi hkprintf("FIFO record bad head signature %04x at %016jx\n",
88802428fb6SMatthew Dillon head->hdr_signature,
88902428fb6SMatthew Dillon (intmax_t)beg_off);
89002428fb6SMatthew Dillon return(2);
89102428fb6SMatthew Dillon }
89202428fb6SMatthew Dillon if (head->hdr_size < HAMMER_HEAD_ALIGN ||
89302428fb6SMatthew Dillon (head->hdr_size & HAMMER_HEAD_ALIGN_MASK)) {
894d053aa8aSTomohiro Kusumi hkprintf("FIFO record unaligned or bad size %04x at %016jx\n",
89502428fb6SMatthew Dillon head->hdr_size,
89602428fb6SMatthew Dillon (intmax_t)beg_off);
89702428fb6SMatthew Dillon return(2);
89802428fb6SMatthew Dillon }
89902428fb6SMatthew Dillon end_off = beg_off + head->hdr_size;
90002428fb6SMatthew Dillon
90102428fb6SMatthew Dillon if (head->hdr_type != HAMMER_HEAD_TYPE_PAD ||
90202428fb6SMatthew Dillon (size_t)(end_off - beg_off) != sizeof(*tail)) {
90302428fb6SMatthew Dillon if (head->hdr_type != tail->tail_type) {
904d053aa8aSTomohiro Kusumi hkprintf("FIFO record head/tail type mismatch "
90502428fb6SMatthew Dillon "%04x %04x at %016jx\n",
90602428fb6SMatthew Dillon head->hdr_type, tail->tail_type,
90702428fb6SMatthew Dillon (intmax_t)beg_off);
90802428fb6SMatthew Dillon return(2);
90902428fb6SMatthew Dillon }
91002428fb6SMatthew Dillon if (head->hdr_size != tail->tail_size) {
911d053aa8aSTomohiro Kusumi hkprintf("FIFO record head/tail size mismatch "
91202428fb6SMatthew Dillon "%04x %04x at %016jx\n",
91302428fb6SMatthew Dillon head->hdr_size, tail->tail_size,
91402428fb6SMatthew Dillon (intmax_t)beg_off);
91502428fb6SMatthew Dillon return(2);
91602428fb6SMatthew Dillon }
91702428fb6SMatthew Dillon if (tail->tail_signature != HAMMER_TAIL_SIGNATURE) {
918d053aa8aSTomohiro Kusumi hkprintf("FIFO record bad tail signature "
91902428fb6SMatthew Dillon "%04x at %016jx\n",
92002428fb6SMatthew Dillon tail->tail_signature,
92102428fb6SMatthew Dillon (intmax_t)beg_off);
92202428fb6SMatthew Dillon return(3);
92302428fb6SMatthew Dillon }
92402428fb6SMatthew Dillon }
92502428fb6SMatthew Dillon
92602428fb6SMatthew Dillon /*
92702428fb6SMatthew Dillon * Non-PAD records must have a CRC and must be sized at
92802428fb6SMatthew Dillon * least large enough to fit the head and tail.
92902428fb6SMatthew Dillon */
93002428fb6SMatthew Dillon if (head->hdr_type != HAMMER_HEAD_TYPE_PAD) {
931*4c09d9c4SMatthew Dillon if (hammer_crc_test_fifo_head(hmp->version,
932*4c09d9c4SMatthew Dillon head, head->hdr_size) == 0) {
933*4c09d9c4SMatthew Dillon hkprintf("FIFO record CRC failed %08x at %016jx\n",
934*4c09d9c4SMatthew Dillon head->hdr_crc, (intmax_t)beg_off);
93502428fb6SMatthew Dillon return(EIO);
93602428fb6SMatthew Dillon }
93702428fb6SMatthew Dillon if (head->hdr_size < sizeof(*head) + sizeof(*tail)) {
938d053aa8aSTomohiro Kusumi hkprintf("FIFO record too small %04x at %016jx\n",
93902428fb6SMatthew Dillon head->hdr_size,
94002428fb6SMatthew Dillon (intmax_t)beg_off);
94102428fb6SMatthew Dillon return(EIO);
94202428fb6SMatthew Dillon }
94302428fb6SMatthew Dillon }
94402428fb6SMatthew Dillon
94502428fb6SMatthew Dillon /*
94602428fb6SMatthew Dillon * Check the tail
94702428fb6SMatthew Dillon */
94802428fb6SMatthew Dillon bytes = head->hdr_size;
94902428fb6SMatthew Dillon tail = (void *)((char *)head + bytes - sizeof(*tail));
95002428fb6SMatthew Dillon if (tail->tail_size != head->hdr_size) {
951d053aa8aSTomohiro Kusumi hkprintf("Bad tail size %04x vs %04x at %016jx\n",
95202428fb6SMatthew Dillon tail->tail_size, head->hdr_size,
95302428fb6SMatthew Dillon (intmax_t)beg_off);
95402428fb6SMatthew Dillon return(EIO);
95502428fb6SMatthew Dillon }
95602428fb6SMatthew Dillon if (tail->tail_type != head->hdr_type) {
957d053aa8aSTomohiro Kusumi hkprintf("Bad tail type %04x vs %04x at %016jx\n",
95802428fb6SMatthew Dillon tail->tail_type, head->hdr_type,
95902428fb6SMatthew Dillon (intmax_t)beg_off);
96002428fb6SMatthew Dillon return(EIO);
96102428fb6SMatthew Dillon }
96202428fb6SMatthew Dillon
96302428fb6SMatthew Dillon return(0);
96402428fb6SMatthew Dillon }
96502428fb6SMatthew Dillon
96602428fb6SMatthew Dillon /*
96702428fb6SMatthew Dillon * Check that the FIFO record is in-bounds given the head and the
96802428fb6SMatthew Dillon * hammer offset.
96902428fb6SMatthew Dillon *
97002428fb6SMatthew Dillon * Also checks that the head and tail structures agree with each other,
97102428fb6SMatthew Dillon * but does not check beyond the signature, type, and size.
97202428fb6SMatthew Dillon */
97302428fb6SMatthew Dillon static int
hammer_check_head_signature(hammer_mount_t hmp,hammer_fifo_head_t head,hammer_off_t beg_off)974*4c09d9c4SMatthew Dillon hammer_check_head_signature(hammer_mount_t hmp, hammer_fifo_head_t head,
975*4c09d9c4SMatthew Dillon hammer_off_t beg_off)
97602428fb6SMatthew Dillon {
97702428fb6SMatthew Dillon hammer_fifo_tail_t tail;
97802428fb6SMatthew Dillon hammer_off_t end_off;
97902428fb6SMatthew Dillon
98002428fb6SMatthew Dillon /*
98102428fb6SMatthew Dillon * head overlaps buffer boundary. This could be a PAD so only
98202428fb6SMatthew Dillon * check the minimum PAD size here.
98302428fb6SMatthew Dillon */
98402428fb6SMatthew Dillon if (((beg_off + sizeof(*tail) - 1) ^ (beg_off)) & ~HAMMER_BUFMASK64)
98502428fb6SMatthew Dillon return(1);
98602428fb6SMatthew Dillon
98702428fb6SMatthew Dillon /*
98802428fb6SMatthew Dillon * Calculate the ending offset and make sure the record does
98902428fb6SMatthew Dillon * not cross a buffer boundary.
99002428fb6SMatthew Dillon */
99102428fb6SMatthew Dillon end_off = beg_off + head->hdr_size;
99202428fb6SMatthew Dillon if ((beg_off ^ (end_off - 1)) & ~HAMMER_BUFMASK64)
99302428fb6SMatthew Dillon return(1);
99402428fb6SMatthew Dillon tail = (void *)((char *)head + head->hdr_size - sizeof(*tail));
995*4c09d9c4SMatthew Dillon return (_hammer_check_signature(hmp, head, tail, beg_off));
99602428fb6SMatthew Dillon }
99702428fb6SMatthew Dillon
99802428fb6SMatthew Dillon /*
99902428fb6SMatthew Dillon * Check that the FIFO record is in-bounds given the tail and the
100002428fb6SMatthew Dillon * hammer offset. The offset is pointing at the ending boundary of the
100102428fb6SMatthew Dillon * record.
100202428fb6SMatthew Dillon *
100302428fb6SMatthew Dillon * Also checks that the head and tail structures agree with each other,
100402428fb6SMatthew Dillon * but does not check beyond the signature, type, and size.
100502428fb6SMatthew Dillon */
1006f90dde4cSMatthew Dillon static int
hammer_check_tail_signature(hammer_mount_t hmp,hammer_fifo_tail_t tail,hammer_off_t end_off)1007*4c09d9c4SMatthew Dillon hammer_check_tail_signature(hammer_mount_t hmp, hammer_fifo_tail_t tail,
1008*4c09d9c4SMatthew Dillon hammer_off_t end_off)
1009f90dde4cSMatthew Dillon {
101002428fb6SMatthew Dillon hammer_fifo_head_t head;
101102428fb6SMatthew Dillon hammer_off_t beg_off;
1012f90dde4cSMatthew Dillon
1013f90dde4cSMatthew Dillon /*
1014f90dde4cSMatthew Dillon * tail overlaps buffer boundary
1015f90dde4cSMatthew Dillon */
101602428fb6SMatthew Dillon if (((end_off - sizeof(*tail)) ^ (end_off - 1)) & ~HAMMER_BUFMASK64)
1017f90dde4cSMatthew Dillon return(1);
1018f90dde4cSMatthew Dillon
1019f90dde4cSMatthew Dillon /*
102002428fb6SMatthew Dillon * Calculate the begining offset and make sure the record does
102102428fb6SMatthew Dillon * not cross a buffer boundary.
1022f90dde4cSMatthew Dillon */
102302428fb6SMatthew Dillon beg_off = end_off - tail->tail_size;
102402428fb6SMatthew Dillon if ((beg_off ^ (end_off - 1)) & ~HAMMER_BUFMASK64)
102502428fb6SMatthew Dillon return(1);
102602428fb6SMatthew Dillon head = (void *)((char *)tail + sizeof(*tail) - tail->tail_size);
1027*4c09d9c4SMatthew Dillon return (_hammer_check_signature(hmp, head, tail, beg_off));
1028f90dde4cSMatthew Dillon }
1029f90dde4cSMatthew Dillon
1030f90dde4cSMatthew Dillon static int
hammer_recover_undo(hammer_mount_t hmp,hammer_volume_t root_volume,hammer_fifo_undo_t undo)103151c35492SMatthew Dillon hammer_recover_undo(hammer_mount_t hmp, hammer_volume_t root_volume,
103202428fb6SMatthew Dillon hammer_fifo_undo_t undo)
1033f90dde4cSMatthew Dillon {
1034f90dde4cSMatthew Dillon hammer_volume_t volume;
1035f90dde4cSMatthew Dillon hammer_buffer_t buffer;
10362f85fa4dSMatthew Dillon hammer_off_t buf_offset;
1037f90dde4cSMatthew Dillon int zone;
1038f90dde4cSMatthew Dillon int error;
1039f90dde4cSMatthew Dillon int vol_no;
104002428fb6SMatthew Dillon int bytes;
104146137e17STomohiro Kusumi uint32_t offset;
1042f90dde4cSMatthew Dillon
1043f90dde4cSMatthew Dillon /*
104402428fb6SMatthew Dillon * Only process UNDO records. Flag if we find other records to
104502428fb6SMatthew Dillon * optimize stage2 recovery.
1046f90dde4cSMatthew Dillon */
1047c58123daSMatthew Dillon if (undo->head.hdr_type != HAMMER_HEAD_TYPE_UNDO)
1048f90dde4cSMatthew Dillon return(0);
104909ac686bSMatthew Dillon
1050f90dde4cSMatthew Dillon /*
1051f90dde4cSMatthew Dillon * Validate the UNDO record.
1052f90dde4cSMatthew Dillon */
105302428fb6SMatthew Dillon bytes = undo->head.hdr_size - sizeof(*undo) -
105402428fb6SMatthew Dillon sizeof(struct hammer_fifo_tail);
105502428fb6SMatthew Dillon if (bytes < 0 || undo->undo_data_bytes < 0 ||
105602428fb6SMatthew Dillon undo->undo_data_bytes > bytes) {
1057d053aa8aSTomohiro Kusumi hkprintf("Corrupt UNDO record, undo_data_bytes %d/%d\n",
105802428fb6SMatthew Dillon undo->undo_data_bytes, bytes);
1059f90dde4cSMatthew Dillon return(EIO);
1060f90dde4cSMatthew Dillon }
1061f90dde4cSMatthew Dillon
106202428fb6SMatthew Dillon bytes = undo->undo_data_bytes;
106302428fb6SMatthew Dillon
1064f90dde4cSMatthew Dillon /*
1065f90dde4cSMatthew Dillon * The undo offset may only be a zone-1 or zone-2 offset.
1066f90dde4cSMatthew Dillon *
1067f90dde4cSMatthew Dillon * Currently we only support a zone-1 offset representing the
1068f90dde4cSMatthew Dillon * volume header.
1069f90dde4cSMatthew Dillon */
1070f90dde4cSMatthew Dillon zone = HAMMER_ZONE_DECODE(undo->undo_offset);
1071f90dde4cSMatthew Dillon offset = undo->undo_offset & HAMMER_BUFMASK;
1072f90dde4cSMatthew Dillon
107302428fb6SMatthew Dillon if (offset + bytes > HAMMER_BUFSIZE) {
1074d053aa8aSTomohiro Kusumi hkprintf("Corrupt UNDO record, bad offset\n");
1075f90dde4cSMatthew Dillon return (EIO);
1076f90dde4cSMatthew Dillon }
1077f90dde4cSMatthew Dillon
1078f90dde4cSMatthew Dillon switch(zone) {
1079f90dde4cSMatthew Dillon case HAMMER_ZONE_RAW_VOLUME_INDEX:
1080f90dde4cSMatthew Dillon vol_no = HAMMER_VOL_DECODE(undo->undo_offset);
1081f90dde4cSMatthew Dillon volume = hammer_get_volume(hmp, vol_no, &error);
1082f90dde4cSMatthew Dillon if (volume == NULL) {
1083d053aa8aSTomohiro Kusumi hkprintf("UNDO record, cannot access volume %d\n",
1084d053aa8aSTomohiro Kusumi vol_no);
1085f90dde4cSMatthew Dillon break;
1086f90dde4cSMatthew Dillon }
1087f1c0ae53STomohiro Kusumi hammer_modify_volume_noundo(NULL, volume);
1088f90dde4cSMatthew Dillon hammer_recover_copy_undo(undo->undo_offset,
1089f90dde4cSMatthew Dillon (char *)(undo + 1),
1090f90dde4cSMatthew Dillon (char *)volume->ondisk + offset,
109102428fb6SMatthew Dillon bytes);
1092f90dde4cSMatthew Dillon hammer_modify_volume_done(volume);
109351c35492SMatthew Dillon
109451c35492SMatthew Dillon /*
10959f5097dcSMatthew Dillon * Multiple modifications may be made to the same buffer.
10969f5097dcSMatthew Dillon * Also, the volume header cannot be written out until
10979f5097dcSMatthew Dillon * everything else has been flushed. This also
109851c35492SMatthew Dillon * covers the read-only case by preventing the kernel from
109951c35492SMatthew Dillon * flushing the buffer.
110051c35492SMatthew Dillon */
110151c35492SMatthew Dillon if (volume->io.recovered == 0)
110251c35492SMatthew Dillon volume->io.recovered = 1;
110351c35492SMatthew Dillon else
1104f90dde4cSMatthew Dillon hammer_rel_volume(volume, 0);
1105f90dde4cSMatthew Dillon break;
1106f90dde4cSMatthew Dillon case HAMMER_ZONE_RAW_BUFFER_INDEX:
11072f85fa4dSMatthew Dillon buf_offset = undo->undo_offset & ~HAMMER_BUFMASK64;
11084a2796f3SMatthew Dillon buffer = hammer_get_buffer(hmp, buf_offset, HAMMER_BUFSIZE,
11094a2796f3SMatthew Dillon 0, &error);
1110f90dde4cSMatthew Dillon if (buffer == NULL) {
1111d053aa8aSTomohiro Kusumi hkprintf("UNDO record, cannot access buffer %016jx\n",
111202428fb6SMatthew Dillon (intmax_t)undo->undo_offset);
1113f90dde4cSMatthew Dillon break;
1114f90dde4cSMatthew Dillon }
1115f1c0ae53STomohiro Kusumi hammer_modify_buffer_noundo(NULL, buffer);
1116f90dde4cSMatthew Dillon hammer_recover_copy_undo(undo->undo_offset,
1117f90dde4cSMatthew Dillon (char *)(undo + 1),
1118f90dde4cSMatthew Dillon (char *)buffer->ondisk + offset,
111902428fb6SMatthew Dillon bytes);
1120f90dde4cSMatthew Dillon hammer_modify_buffer_done(buffer);
112151c35492SMatthew Dillon
112251c35492SMatthew Dillon /*
112351c35492SMatthew Dillon * Multiple modifications may be made to the same buffer,
112451c35492SMatthew Dillon * improve performance by delaying the flush. This also
112551c35492SMatthew Dillon * covers the read-only case by preventing the kernel from
112651c35492SMatthew Dillon * flushing the buffer.
112751c35492SMatthew Dillon */
112851c35492SMatthew Dillon if (buffer->io.recovered == 0)
112951c35492SMatthew Dillon buffer->io.recovered = 1;
113051c35492SMatthew Dillon else
1131f90dde4cSMatthew Dillon hammer_rel_buffer(buffer, 0);
1132f90dde4cSMatthew Dillon break;
1133f90dde4cSMatthew Dillon default:
1134d053aa8aSTomohiro Kusumi hkprintf("Corrupt UNDO record\n");
1135f90dde4cSMatthew Dillon error = EIO;
1136f90dde4cSMatthew Dillon }
1137f90dde4cSMatthew Dillon return (error);
1138f90dde4cSMatthew Dillon }
1139f90dde4cSMatthew Dillon
1140f90dde4cSMatthew Dillon static void
hammer_recover_copy_undo(hammer_off_t undo_offset,char * src,char * dst,int bytes)1141f90dde4cSMatthew Dillon hammer_recover_copy_undo(hammer_off_t undo_offset,
1142f90dde4cSMatthew Dillon char *src, char *dst, int bytes)
1143f90dde4cSMatthew Dillon {
1144973c11b9SMatthew Dillon if (hammer_debug_general & 0x0080) {
114533234d14STomohiro Kusumi hdkprintf("UNDO %016jx: %d\n",
114602428fb6SMatthew Dillon (intmax_t)undo_offset, bytes);
1147973c11b9SMatthew Dillon }
1148ec4e8497SMatthew Dillon #if 0
114933234d14STomohiro Kusumi hkprintf("UNDO %016jx:", (intmax_t)undo_offset);
1150f90dde4cSMatthew Dillon hammer_recover_debug_dump(22, dst, bytes);
1151f90dde4cSMatthew Dillon kprintf("%22s", "to:");
1152f90dde4cSMatthew Dillon hammer_recover_debug_dump(22, src, bytes);
1153ec4e8497SMatthew Dillon #endif
1154f90dde4cSMatthew Dillon bcopy(src, dst, bytes);
1155f90dde4cSMatthew Dillon }
1156f90dde4cSMatthew Dillon
1157c58123daSMatthew Dillon /*
1158c58123daSMatthew Dillon * Record HAMMER_REDO_TERM_WRITE and HAMMER_REDO_TERM_TRUNC operations
1159c58123daSMatthew Dillon * during the backwards scan of the extended UNDO/REDO FIFO. This scan
1160c58123daSMatthew Dillon * does not include the nominal UNDO range, just the extended range.
1161c58123daSMatthew Dillon */
1162c58123daSMatthew Dillon int
hammer_recover_redo_rec(hammer_mount_t hmp,struct hammer_rterm_rb_tree * root,hammer_off_t scan_offset,hammer_fifo_redo_t redo)1163c58123daSMatthew Dillon hammer_recover_redo_rec(hammer_mount_t hmp, struct hammer_rterm_rb_tree *root,
1164c58123daSMatthew Dillon hammer_off_t scan_offset, hammer_fifo_redo_t redo)
1165c58123daSMatthew Dillon {
1166c58123daSMatthew Dillon hammer_rterm_t rterm;
1167c58123daSMatthew Dillon hammer_rterm_t nrterm;
1168c58123daSMatthew Dillon hammer_rterm_entry_t rte;
1169c58123daSMatthew Dillon
1170c58123daSMatthew Dillon if (redo->head.hdr_type != HAMMER_HEAD_TYPE_REDO)
1171c58123daSMatthew Dillon return(0);
1172c58123daSMatthew Dillon if (redo->redo_flags != HAMMER_REDO_TERM_WRITE &&
1173c58123daSMatthew Dillon redo->redo_flags != HAMMER_REDO_TERM_TRUNC) {
1174c58123daSMatthew Dillon return(0);
1175c58123daSMatthew Dillon }
1176c58123daSMatthew Dillon
1177c58123daSMatthew Dillon nrterm = kmalloc(sizeof(*nrterm), hmp->m_misc, M_WAITOK|M_ZERO);
1178c58123daSMatthew Dillon nrterm->redo_objid = redo->redo_objid;
1179c58123daSMatthew Dillon nrterm->redo_localization = redo->redo_localization;
1180c58123daSMatthew Dillon nrterm->redo_flags = redo->redo_flags;
1181c58123daSMatthew Dillon nrterm->redo_offset = redo->redo_offset;
1182c58123daSMatthew Dillon
1183c58123daSMatthew Dillon rterm = RB_INSERT(hammer_rterm_rb_tree, root, nrterm);
1184c58123daSMatthew Dillon if (rterm)
1185c58123daSMatthew Dillon kfree(nrterm, hmp->m_misc);
1186c58123daSMatthew Dillon else
1187c58123daSMatthew Dillon rterm = nrterm;
1188c58123daSMatthew Dillon
1189fad4297bSMatthew Dillon if (bootverbose) {
119033234d14STomohiro Kusumi hkprintf("record record %016jx objid %016jx "
1191fad4297bSMatthew Dillon "offset %016jx flags %08x\n",
1192c58123daSMatthew Dillon (intmax_t)scan_offset,
1193c58123daSMatthew Dillon (intmax_t)redo->redo_objid,
1194c58123daSMatthew Dillon (intmax_t)redo->redo_offset,
1195c58123daSMatthew Dillon (int)redo->redo_flags);
1196fad4297bSMatthew Dillon }
1197c58123daSMatthew Dillon
1198c58123daSMatthew Dillon /*
1199c58123daSMatthew Dillon * Scan in reverse order, rte prepended, so the rte list will be
1200c58123daSMatthew Dillon * in forward order.
1201c58123daSMatthew Dillon */
1202c58123daSMatthew Dillon rte = kmalloc(sizeof(*rte), hmp->m_misc, M_WAITOK|M_ZERO);
1203c58123daSMatthew Dillon rte->fifo_offset = scan_offset;
1204c58123daSMatthew Dillon rte->next = rterm->term_list;
1205c58123daSMatthew Dillon rterm->term_list = rte;
1206c58123daSMatthew Dillon
1207c58123daSMatthew Dillon return(0);
1208c58123daSMatthew Dillon }
1209c58123daSMatthew Dillon
1210c58123daSMatthew Dillon /*
1211c58123daSMatthew Dillon * Execute HAMMER_REDO_WRITE and HAMMER_REDO_TRUNC operations during
1212c58123daSMatthew Dillon * the forwards scan of the entire extended UNDO/REDO FIFO range.
1213c58123daSMatthew Dillon *
1214c58123daSMatthew Dillon * Records matching previously recorded TERMs have already been committed
1215c58123daSMatthew Dillon * and are ignored.
1216c58123daSMatthew Dillon */
1217c58123daSMatthew Dillon int
hammer_recover_redo_run(hammer_mount_t hmp,struct hammer_rterm_rb_tree * root,hammer_off_t scan_offset,hammer_fifo_redo_t redo)1218c58123daSMatthew Dillon hammer_recover_redo_run(hammer_mount_t hmp, struct hammer_rterm_rb_tree *root,
1219c58123daSMatthew Dillon hammer_off_t scan_offset, hammer_fifo_redo_t redo)
1220c58123daSMatthew Dillon {
1221c58123daSMatthew Dillon struct hammer_rterm rtval;
1222c58123daSMatthew Dillon hammer_rterm_t rterm;
1223c58123daSMatthew Dillon hammer_rterm_entry_t rte;
1224c58123daSMatthew Dillon
1225c58123daSMatthew Dillon if (redo->head.hdr_type != HAMMER_HEAD_TYPE_REDO)
1226c58123daSMatthew Dillon return(0);
1227c58123daSMatthew Dillon
1228c58123daSMatthew Dillon switch(redo->redo_flags) {
1229c58123daSMatthew Dillon case HAMMER_REDO_WRITE:
1230c58123daSMatthew Dillon case HAMMER_REDO_TRUNC:
1231c58123daSMatthew Dillon /*
1232c58123daSMatthew Dillon * We hit a REDO request. The REDO request is only executed
1233c58123daSMatthew Dillon * if there is no matching TERM.
1234c58123daSMatthew Dillon */
1235c58123daSMatthew Dillon bzero(&rtval, sizeof(rtval));
1236c58123daSMatthew Dillon rtval.redo_objid = redo->redo_objid;
1237c58123daSMatthew Dillon rtval.redo_localization = redo->redo_localization;
1238c58123daSMatthew Dillon rtval.redo_offset = redo->redo_offset;
1239c58123daSMatthew Dillon rtval.redo_flags = (redo->redo_flags == HAMMER_REDO_WRITE) ?
1240c58123daSMatthew Dillon HAMMER_REDO_TERM_WRITE :
1241c58123daSMatthew Dillon HAMMER_REDO_TERM_TRUNC;
1242c58123daSMatthew Dillon
1243c58123daSMatthew Dillon rterm = RB_FIND(hammer_rterm_rb_tree, root, &rtval);
1244c58123daSMatthew Dillon if (rterm) {
1245418cb5e5SMatthew Dillon if (bootverbose) {
124633234d14STomohiro Kusumi hkprintf("ignore record %016jx objid %016jx "
1247c58123daSMatthew Dillon "offset %016jx flags %08x\n",
1248c58123daSMatthew Dillon (intmax_t)scan_offset,
1249c58123daSMatthew Dillon (intmax_t)redo->redo_objid,
1250c58123daSMatthew Dillon (intmax_t)redo->redo_offset,
1251c58123daSMatthew Dillon (int)redo->redo_flags);
1252418cb5e5SMatthew Dillon }
1253c58123daSMatthew Dillon break;
1254c58123daSMatthew Dillon }
1255418cb5e5SMatthew Dillon if (bootverbose) {
125633234d14STomohiro Kusumi hkprintf("run record %016jx objid %016jx "
1257c58123daSMatthew Dillon "offset %016jx flags %08x\n",
1258c58123daSMatthew Dillon (intmax_t)scan_offset,
1259c58123daSMatthew Dillon (intmax_t)redo->redo_objid,
1260c58123daSMatthew Dillon (intmax_t)redo->redo_offset,
1261c58123daSMatthew Dillon (int)redo->redo_flags);
1262418cb5e5SMatthew Dillon }
1263c58123daSMatthew Dillon
1264c58123daSMatthew Dillon /*
1265c58123daSMatthew Dillon * Redo stage2 can access a live filesystem, acquire the
1266c58123daSMatthew Dillon * vnode.
1267c58123daSMatthew Dillon */
1268c58123daSMatthew Dillon hammer_recover_redo_exec(hmp, redo);
1269c58123daSMatthew Dillon break;
1270c58123daSMatthew Dillon case HAMMER_REDO_TERM_WRITE:
1271c58123daSMatthew Dillon case HAMMER_REDO_TERM_TRUNC:
1272c58123daSMatthew Dillon /*
1273c58123daSMatthew Dillon * As we encounter TERMs in the forward scan we remove
1274c58123daSMatthew Dillon * them. Once the forward scan hits the nominal undo range
1275c58123daSMatthew Dillon * there will be no more recorded TERMs.
1276c58123daSMatthew Dillon */
1277c58123daSMatthew Dillon bzero(&rtval, sizeof(rtval));
1278c58123daSMatthew Dillon rtval.redo_objid = redo->redo_objid;
1279c58123daSMatthew Dillon rtval.redo_localization = redo->redo_localization;
1280c58123daSMatthew Dillon rtval.redo_flags = redo->redo_flags;
1281c58123daSMatthew Dillon rtval.redo_offset = redo->redo_offset;
1282c58123daSMatthew Dillon
1283c58123daSMatthew Dillon rterm = RB_FIND(hammer_rterm_rb_tree, root, &rtval);
1284c58123daSMatthew Dillon if (rterm) {
1285c58123daSMatthew Dillon if ((rte = rterm->term_list) != NULL) {
1286c58123daSMatthew Dillon KKASSERT(rte->fifo_offset == scan_offset);
1287c58123daSMatthew Dillon rterm->term_list = rte->next;
1288c58123daSMatthew Dillon kfree(rte, hmp->m_misc);
1289c58123daSMatthew Dillon }
1290c58123daSMatthew Dillon }
1291c58123daSMatthew Dillon break;
1292c58123daSMatthew Dillon }
1293c58123daSMatthew Dillon return(0);
1294c58123daSMatthew Dillon }
1295c58123daSMatthew Dillon
1296c58123daSMatthew Dillon static void
hammer_recover_redo_exec(hammer_mount_t hmp,hammer_fifo_redo_t redo)1297c58123daSMatthew Dillon hammer_recover_redo_exec(hammer_mount_t hmp, hammer_fifo_redo_t redo)
1298c58123daSMatthew Dillon {
1299c58123daSMatthew Dillon struct hammer_transaction trans;
1300c58123daSMatthew Dillon struct vattr va;
1301e1067862STomohiro Kusumi hammer_inode_t ip;
1302c58123daSMatthew Dillon struct vnode *vp = NULL;
1303c58123daSMatthew Dillon int error;
1304c58123daSMatthew Dillon
1305c58123daSMatthew Dillon hammer_start_transaction(&trans, hmp);
1306c58123daSMatthew Dillon
1307c58123daSMatthew Dillon ip = hammer_get_inode(&trans, NULL, redo->redo_objid,
1308c58123daSMatthew Dillon HAMMER_MAX_TID, redo->redo_localization,
1309c58123daSMatthew Dillon 0, &error);
1310c58123daSMatthew Dillon if (ip == NULL) {
131133234d14STomohiro Kusumi hkprintf("unable to find objid %016jx:%08x\n",
1312c58123daSMatthew Dillon (intmax_t)redo->redo_objid, redo->redo_localization);
1313c58123daSMatthew Dillon goto done2;
1314c58123daSMatthew Dillon }
1315c58123daSMatthew Dillon error = hammer_get_vnode(ip, &vp);
1316c58123daSMatthew Dillon if (error) {
131733234d14STomohiro Kusumi hkprintf("unable to acquire vnode for %016jx:%08x\n",
1318c58123daSMatthew Dillon (intmax_t)redo->redo_objid, redo->redo_localization);
1319c58123daSMatthew Dillon goto done1;
1320c58123daSMatthew Dillon }
1321c58123daSMatthew Dillon
1322c58123daSMatthew Dillon switch(redo->redo_flags) {
1323c58123daSMatthew Dillon case HAMMER_REDO_WRITE:
1324c58123daSMatthew Dillon error = VOP_OPEN(vp, FREAD|FWRITE, proc0.p_ucred, NULL);
1325c58123daSMatthew Dillon if (error) {
132633234d14STomohiro Kusumi hkprintf("vn_rdwr open %016jx:%08x returned %d\n",
1327418cb5e5SMatthew Dillon (intmax_t)redo->redo_objid,
1328418cb5e5SMatthew Dillon redo->redo_localization, error);
1329c58123daSMatthew Dillon break;
1330c58123daSMatthew Dillon }
1331c58123daSMatthew Dillon vn_unlock(vp);
1332c58123daSMatthew Dillon error = vn_rdwr(UIO_WRITE, vp, (void *)(redo + 1),
1333c58123daSMatthew Dillon redo->redo_data_bytes,
1334c58123daSMatthew Dillon redo->redo_offset, UIO_SYSSPACE,
1335c58123daSMatthew Dillon 0, proc0.p_ucred, NULL);
1336c58123daSMatthew Dillon vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1337418cb5e5SMatthew Dillon if (error) {
133833234d14STomohiro Kusumi hkprintf("write %016jx:%08x returned %d\n",
1339418cb5e5SMatthew Dillon (intmax_t)redo->redo_objid,
1340418cb5e5SMatthew Dillon redo->redo_localization, error);
1341418cb5e5SMatthew Dillon }
13423596743eSMarkus Pfeiffer VOP_CLOSE(vp, FREAD|FWRITE, NULL);
1343c58123daSMatthew Dillon break;
1344c58123daSMatthew Dillon case HAMMER_REDO_TRUNC:
1345c58123daSMatthew Dillon VATTR_NULL(&va);
1346c58123daSMatthew Dillon va.va_size = redo->redo_offset;
1347c58123daSMatthew Dillon error = VOP_SETATTR(vp, &va, proc0.p_ucred);
1348418cb5e5SMatthew Dillon if (error) {
134933234d14STomohiro Kusumi hkprintf("setattr offset %016jx error %d\n",
1350418cb5e5SMatthew Dillon (intmax_t)redo->redo_offset, error);
1351418cb5e5SMatthew Dillon }
1352c58123daSMatthew Dillon break;
1353c58123daSMatthew Dillon }
1354c58123daSMatthew Dillon vput(vp);
1355c58123daSMatthew Dillon done1:
1356c58123daSMatthew Dillon hammer_rel_inode(ip, 0);
1357c58123daSMatthew Dillon done2:
1358c58123daSMatthew Dillon hammer_done_transaction(&trans);
1359c58123daSMatthew Dillon }
1360c58123daSMatthew Dillon
1361c58123daSMatthew Dillon /*
1362c58123daSMatthew Dillon * RB tree compare function. Note that REDO_TERM_TRUNC ops ignore
1363c58123daSMatthew Dillon * the offset.
1364c58123daSMatthew Dillon *
1365c58123daSMatthew Dillon * WRITE@0 TERM@0 WRITE@0 .... (no TERM@0) etc.
1366c58123daSMatthew Dillon */
1367c58123daSMatthew Dillon static int
hammer_rterm_rb_cmp(hammer_rterm_t rt1,hammer_rterm_t rt2)1368c58123daSMatthew Dillon hammer_rterm_rb_cmp(hammer_rterm_t rt1, hammer_rterm_t rt2)
1369c58123daSMatthew Dillon {
1370c58123daSMatthew Dillon if (rt1->redo_objid < rt2->redo_objid)
1371c58123daSMatthew Dillon return(-1);
1372c58123daSMatthew Dillon if (rt1->redo_objid > rt2->redo_objid)
1373c58123daSMatthew Dillon return(1);
1374c58123daSMatthew Dillon if (rt1->redo_localization < rt2->redo_localization)
1375c58123daSMatthew Dillon return(-1);
1376c58123daSMatthew Dillon if (rt1->redo_localization > rt2->redo_localization)
1377c58123daSMatthew Dillon return(1);
1378c58123daSMatthew Dillon if (rt1->redo_flags < rt2->redo_flags)
1379c58123daSMatthew Dillon return(-1);
1380c58123daSMatthew Dillon if (rt1->redo_flags > rt2->redo_flags)
1381c58123daSMatthew Dillon return(1);
1382c58123daSMatthew Dillon if (rt1->redo_flags != HAMMER_REDO_TERM_TRUNC) {
1383c58123daSMatthew Dillon if (rt1->redo_offset < rt2->redo_offset)
1384c58123daSMatthew Dillon return(-1);
1385c58123daSMatthew Dillon if (rt1->redo_offset > rt2->redo_offset)
1386c58123daSMatthew Dillon return(1);
1387c58123daSMatthew Dillon }
1388c58123daSMatthew Dillon return(0);
1389c58123daSMatthew Dillon }
1390c58123daSMatthew Dillon
1391d36ec43bSMatthew Dillon #if 0
1392d36ec43bSMatthew Dillon
1393f90dde4cSMatthew Dillon static void
1394f90dde4cSMatthew Dillon hammer_recover_debug_dump(int w, char *buf, int bytes)
1395f90dde4cSMatthew Dillon {
1396f90dde4cSMatthew Dillon int i;
1397f90dde4cSMatthew Dillon
1398f90dde4cSMatthew Dillon for (i = 0; i < bytes; ++i) {
1399f90dde4cSMatthew Dillon if (i && (i & 15) == 0)
1400f90dde4cSMatthew Dillon kprintf("\n%*.*s", w, w, "");
1401f90dde4cSMatthew Dillon kprintf(" %02x", (unsigned char)buf[i]);
1402f90dde4cSMatthew Dillon }
1403f90dde4cSMatthew Dillon kprintf("\n");
1404f90dde4cSMatthew Dillon }
1405f90dde4cSMatthew Dillon
1406d36ec43bSMatthew Dillon #endif
140751c35492SMatthew Dillon
140851c35492SMatthew Dillon /*
14099f5097dcSMatthew Dillon * Flush recovered buffers from recovery operations. The call to this
14109f5097dcSMatthew Dillon * routine may be delayed if a read-only mount was made and then later
14112faf0737SMatthew Dillon * upgraded to read-write. This routine is also called when unmounting
14122faf0737SMatthew Dillon * a read-only mount to clean out recovered (dirty) buffers which we
14132faf0737SMatthew Dillon * couldn't flush (because the mount is read-only).
14149f5097dcSMatthew Dillon *
14159f5097dcSMatthew Dillon * The volume header is always written last. The UNDO FIFO will be forced
14169f5097dcSMatthew Dillon * to zero-length by setting next_offset to first_offset. This leaves the
14179f5097dcSMatthew Dillon * (now stale) UNDO information used to recover the disk available for
14189f5097dcSMatthew Dillon * forensic analysis.
141900f16fadSMatthew Dillon *
142000f16fadSMatthew Dillon * final is typically 0 or 1. The volume header is only written if final
142100f16fadSMatthew Dillon * is 1. If final is -1 the recovered buffers are discarded instead of
142200f16fadSMatthew Dillon * written and root_volume can also be passed as NULL in that case.
142351c35492SMatthew Dillon */
142451c35492SMatthew Dillon static int hammer_recover_flush_volume_callback(hammer_volume_t, void *);
142551c35492SMatthew Dillon static int hammer_recover_flush_buffer_callback(hammer_buffer_t, void *);
142651c35492SMatthew Dillon
142751c35492SMatthew Dillon void
hammer_recover_flush_buffers(hammer_mount_t hmp,hammer_volume_t root_volume,int final)142806ad81ffSMatthew Dillon hammer_recover_flush_buffers(hammer_mount_t hmp, hammer_volume_t root_volume,
142906ad81ffSMatthew Dillon int final)
143051c35492SMatthew Dillon {
1431af209b0fSMatthew Dillon /*
1432af209b0fSMatthew Dillon * Flush the buffers out asynchronously, wait for all the I/O to
1433af209b0fSMatthew Dillon * complete, then do it again to destroy the buffer cache buffer
1434af209b0fSMatthew Dillon * so it doesn't alias something later on.
1435af209b0fSMatthew Dillon */
1436af209b0fSMatthew Dillon RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
143700f16fadSMatthew Dillon hammer_recover_flush_buffer_callback, &final);
1438eddadaeeSMatthew Dillon hammer_io_wait_all(hmp, "hmrrcw", 1);
14390832c9bbSMatthew Dillon RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
144000f16fadSMatthew Dillon hammer_recover_flush_buffer_callback, &final);
14419f5097dcSMatthew Dillon
1442af209b0fSMatthew Dillon /*
144300f16fadSMatthew Dillon * Flush all volume headers except the root volume. If final < 0
144400f16fadSMatthew Dillon * we discard all volume headers including the root volume.
1445af209b0fSMatthew Dillon */
144600f16fadSMatthew Dillon if (final >= 0) {
144700f16fadSMatthew Dillon RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
144800f16fadSMatthew Dillon hammer_recover_flush_volume_callback, root_volume);
144900f16fadSMatthew Dillon } else {
145000f16fadSMatthew Dillon RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
145100f16fadSMatthew Dillon hammer_recover_flush_volume_callback, NULL);
145200f16fadSMatthew Dillon }
145300f16fadSMatthew Dillon
145400f16fadSMatthew Dillon /*
145500f16fadSMatthew Dillon * Finalize the root volume header.
145677912481SMatthew Dillon *
145777912481SMatthew Dillon * No interlock is needed, volume buffers are not
145877912481SMatthew Dillon * messed with by bioops.
145900f16fadSMatthew Dillon */
146000f16fadSMatthew Dillon if (root_volume && root_volume->io.recovered && final > 0) {
1461eddadaeeSMatthew Dillon hammer_io_wait_all(hmp, "hmrflx", 1);
146251c35492SMatthew Dillon root_volume->io.recovered = 0;
1463710733a6SMatthew Dillon hammer_io_flush(&root_volume->io, 0);
146451c35492SMatthew Dillon hammer_rel_volume(root_volume, 0);
1465eddadaeeSMatthew Dillon hammer_io_wait_all(hmp, "hmrfly", 1);
146651c35492SMatthew Dillon }
146751c35492SMatthew Dillon }
146851c35492SMatthew Dillon
146900f16fadSMatthew Dillon /*
147000f16fadSMatthew Dillon * Callback to flush volume headers. If discarding data will be NULL and
147100f16fadSMatthew Dillon * all volume headers (including the root volume) will be discarded.
147200f16fadSMatthew Dillon * Otherwise data is the root_volume and we flush all volume headers
147300f16fadSMatthew Dillon * EXCEPT the root_volume.
14742faf0737SMatthew Dillon *
14752faf0737SMatthew Dillon * Clear any I/O error or modified condition when discarding buffers to
14762faf0737SMatthew Dillon * clean up the reference count, otherwise the buffer may have extra refs
14772faf0737SMatthew Dillon * on it.
147800f16fadSMatthew Dillon */
147951c35492SMatthew Dillon static
148051c35492SMatthew Dillon int
hammer_recover_flush_volume_callback(hammer_volume_t volume,void * data)148151c35492SMatthew Dillon hammer_recover_flush_volume_callback(hammer_volume_t volume, void *data)
148251c35492SMatthew Dillon {
148351c35492SMatthew Dillon hammer_volume_t root_volume = data;
148451c35492SMatthew Dillon
148551c35492SMatthew Dillon if (volume->io.recovered && volume != root_volume) {
148651c35492SMatthew Dillon volume->io.recovered = 0;
14872faf0737SMatthew Dillon if (root_volume != NULL) {
148877912481SMatthew Dillon /*
148977912481SMatthew Dillon * No interlock is needed, volume buffers are not
149077912481SMatthew Dillon * messed with by bioops.
149177912481SMatthew Dillon */
1492710733a6SMatthew Dillon hammer_io_flush(&volume->io, 0);
14932faf0737SMatthew Dillon } else {
14942faf0737SMatthew Dillon hammer_io_clear_error(&volume->io);
149500f16fadSMatthew Dillon hammer_io_clear_modify(&volume->io, 1);
14962faf0737SMatthew Dillon }
149751c35492SMatthew Dillon hammer_rel_volume(volume, 0);
149851c35492SMatthew Dillon }
149951c35492SMatthew Dillon return(0);
150051c35492SMatthew Dillon }
150151c35492SMatthew Dillon
15022faf0737SMatthew Dillon /*
15032faf0737SMatthew Dillon * Flush or discard recovered I/O buffers.
15042faf0737SMatthew Dillon *
15052faf0737SMatthew Dillon * Clear any I/O error or modified condition when discarding buffers to
15062faf0737SMatthew Dillon * clean up the reference count, otherwise the buffer may have extra refs
15072faf0737SMatthew Dillon * on it.
15082faf0737SMatthew Dillon */
150951c35492SMatthew Dillon static
151051c35492SMatthew Dillon int
hammer_recover_flush_buffer_callback(hammer_buffer_t buffer,void * data)151151c35492SMatthew Dillon hammer_recover_flush_buffer_callback(hammer_buffer_t buffer, void *data)
151251c35492SMatthew Dillon {
151300f16fadSMatthew Dillon int final = *(int *)data;
1514250aec18SMatthew Dillon int flush;
151500f16fadSMatthew Dillon
151651c35492SMatthew Dillon if (buffer->io.recovered) {
151751c35492SMatthew Dillon buffer->io.recovered = 0;
1518af209b0fSMatthew Dillon buffer->io.reclaim = 1;
15192faf0737SMatthew Dillon if (final < 0) {
15202faf0737SMatthew Dillon hammer_io_clear_error(&buffer->io);
152100f16fadSMatthew Dillon hammer_io_clear_modify(&buffer->io, 1);
15222faf0737SMatthew Dillon } else {
152377912481SMatthew Dillon hammer_io_write_interlock(&buffer->io);
1524710733a6SMatthew Dillon hammer_io_flush(&buffer->io, 0);
152577912481SMatthew Dillon hammer_io_done_interlock(&buffer->io);
15262faf0737SMatthew Dillon }
1527af209b0fSMatthew Dillon hammer_rel_buffer(buffer, 0);
1528af209b0fSMatthew Dillon } else {
1529250aec18SMatthew Dillon flush = hammer_ref_interlock(&buffer->io.lock);
1530250aec18SMatthew Dillon if (flush)
1531c1745db9SMatthew Dillon atomic_add_int(&hammer_count_refedbufs, 1);
1532250aec18SMatthew Dillon
15332faf0737SMatthew Dillon if (final < 0) {
15342faf0737SMatthew Dillon hammer_io_clear_error(&buffer->io);
15352faf0737SMatthew Dillon hammer_io_clear_modify(&buffer->io, 1);
15362faf0737SMatthew Dillon }
1537250aec18SMatthew Dillon KKASSERT(hammer_oneref(&buffer->io.lock));
1538af209b0fSMatthew Dillon buffer->io.reclaim = 1;
1539250aec18SMatthew Dillon hammer_rel_buffer(buffer, flush);
154051c35492SMatthew Dillon }
154151c35492SMatthew Dillon return(0);
154251c35492SMatthew Dillon }
154351c35492SMatthew Dillon
1544