xref: /dflybsd-src/sys/vfs/hammer/hammer_mirror.c (revision f3a4893b0e844e1ab8c2334d304ebff61aa71ace)
1dd94f1b1SMatthew Dillon /*
2dd94f1b1SMatthew Dillon  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3dd94f1b1SMatthew Dillon  *
4dd94f1b1SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
5dd94f1b1SMatthew Dillon  * by Matthew Dillon <dillon@backplane.com>
6dd94f1b1SMatthew Dillon  *
7dd94f1b1SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
8dd94f1b1SMatthew Dillon  * modification, are permitted provided that the following conditions
9dd94f1b1SMatthew Dillon  * are met:
10dd94f1b1SMatthew Dillon  *
11dd94f1b1SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
12dd94f1b1SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
13dd94f1b1SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
14dd94f1b1SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
15dd94f1b1SMatthew Dillon  *    the documentation and/or other materials provided with the
16dd94f1b1SMatthew Dillon  *    distribution.
17dd94f1b1SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
18dd94f1b1SMatthew Dillon  *    contributors may be used to endorse or promote products derived
19dd94f1b1SMatthew Dillon  *    from this software without specific, prior written permission.
20dd94f1b1SMatthew Dillon  *
21dd94f1b1SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22dd94f1b1SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23dd94f1b1SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24dd94f1b1SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25dd94f1b1SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26dd94f1b1SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27dd94f1b1SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28dd94f1b1SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29dd94f1b1SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30dd94f1b1SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31dd94f1b1SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32dd94f1b1SMatthew Dillon  * SUCH DAMAGE.
33dd94f1b1SMatthew Dillon  *
34e469566bSMatthew Dillon  * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.17 2008/07/31 22:30:33 dillon Exp $
35dd94f1b1SMatthew Dillon  */
36dd94f1b1SMatthew Dillon /*
37dd94f1b1SMatthew Dillon  * HAMMER mirroring ioctls - serialize and deserialize modifications made
38dd94f1b1SMatthew Dillon  *			     to a filesystem.
39dd94f1b1SMatthew Dillon  */
40dd94f1b1SMatthew Dillon 
41dd94f1b1SMatthew Dillon #include "hammer.h"
42dd94f1b1SMatthew Dillon 
43c82af904SMatthew Dillon static int hammer_mirror_check(hammer_cursor_t cursor,
444c038e17SMatthew Dillon 				struct hammer_ioc_mrecord_rec *mrec);
45c82af904SMatthew Dillon static int hammer_mirror_update(hammer_cursor_t cursor,
464c038e17SMatthew Dillon 				struct hammer_ioc_mrecord_rec *mrec);
474c038e17SMatthew Dillon static int hammer_ioc_mirror_write_rec(hammer_cursor_t cursor,
484c038e17SMatthew Dillon 				struct hammer_ioc_mrecord_rec *mrec,
494c038e17SMatthew Dillon 				struct hammer_ioc_mirror_rw *mirror,
504c038e17SMatthew Dillon 				u_int32_t localization,
514c038e17SMatthew Dillon 				char *uptr);
524c038e17SMatthew Dillon static int hammer_ioc_mirror_write_pass(hammer_cursor_t cursor,
534c038e17SMatthew Dillon 				struct hammer_ioc_mrecord_rec *mrec,
544c038e17SMatthew Dillon 				struct hammer_ioc_mirror_rw *mirror,
554c038e17SMatthew Dillon 				u_int32_t localization);
564c038e17SMatthew Dillon static int hammer_ioc_mirror_write_skip(hammer_cursor_t cursor,
574c038e17SMatthew Dillon 				struct hammer_ioc_mrecord_skip *mrec,
584c038e17SMatthew Dillon 				struct hammer_ioc_mirror_rw *mirror,
594c038e17SMatthew Dillon 				u_int32_t localization);
60842e7a70SMatthew Dillon static int hammer_mirror_delete_to(hammer_cursor_t cursor,
614c038e17SMatthew Dillon 			        struct hammer_ioc_mirror_rw *mirror);
6283f2a3aaSMatthew Dillon static int hammer_mirror_nomirror(struct hammer_base_elm *base);
63c82af904SMatthew Dillon 
64c82af904SMatthew Dillon /*
65c82af904SMatthew Dillon  * All B-Tree records within the specified key range which also conform
66c82af904SMatthew Dillon  * to the transaction id range are returned.  Mirroring code keeps track
67c82af904SMatthew Dillon  * of the last transaction id fully scanned and can efficiently pick up
68c82af904SMatthew Dillon  * where it left off if interrupted.
69ea434b6fSMatthew Dillon  *
70ea434b6fSMatthew Dillon  * The PFS is identified in the mirror structure.  The passed ip is just
71ea434b6fSMatthew Dillon  * some directory in the overall HAMMER filesystem and has nothing to
72ea434b6fSMatthew Dillon  * do with the PFS.
73c82af904SMatthew Dillon  */
74dd94f1b1SMatthew Dillon int
75dd94f1b1SMatthew Dillon hammer_ioc_mirror_read(hammer_transaction_t trans, hammer_inode_t ip,
76dd94f1b1SMatthew Dillon 		       struct hammer_ioc_mirror_rw *mirror)
77dd94f1b1SMatthew Dillon {
784c038e17SMatthew Dillon 	struct hammer_cmirror cmirror;
79dd94f1b1SMatthew Dillon 	struct hammer_cursor cursor;
804c038e17SMatthew Dillon 	union hammer_ioc_mrecord_any mrec;
81c82af904SMatthew Dillon 	hammer_btree_leaf_elm_t elm;
82c82af904SMatthew Dillon 	const int crc_start = HAMMER_MREC_CRCOFF;
83c82af904SMatthew Dillon 	char *uptr;
84dd94f1b1SMatthew Dillon 	int error;
85c82af904SMatthew Dillon 	int data_len;
86c82af904SMatthew Dillon 	int bytes;
874c038e17SMatthew Dillon 	int eatdisk;
884c286c36SMatthew Dillon 	int mrec_flags;
89ea434b6fSMatthew Dillon 	u_int32_t localization;
904c038e17SMatthew Dillon 	u_int32_t rec_crc;
91ea434b6fSMatthew Dillon 
92ea434b6fSMatthew Dillon 	localization = (u_int32_t)mirror->pfs_id << 16;
93dd94f1b1SMatthew Dillon 
94dd94f1b1SMatthew Dillon 	if ((mirror->key_beg.localization | mirror->key_end.localization) &
95dd94f1b1SMatthew Dillon 	    HAMMER_LOCALIZE_PSEUDOFS_MASK) {
96dd94f1b1SMatthew Dillon 		return(EINVAL);
97dd94f1b1SMatthew Dillon 	}
98dd94f1b1SMatthew Dillon 	if (hammer_btree_cmp(&mirror->key_beg, &mirror->key_end) > 0)
99dd94f1b1SMatthew Dillon 		return(EINVAL);
100dd94f1b1SMatthew Dillon 
101dd94f1b1SMatthew Dillon 	mirror->key_cur = mirror->key_beg;
1024c038e17SMatthew Dillon 	mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK;
103ea434b6fSMatthew Dillon 	mirror->key_cur.localization += localization;
104c82af904SMatthew Dillon 	bzero(&mrec, sizeof(mrec));
1054c038e17SMatthew Dillon 	bzero(&cmirror, sizeof(cmirror));
106dd94f1b1SMatthew Dillon 
1074c286c36SMatthew Dillon 	/*
1084c286c36SMatthew Dillon 	 * Make CRC errors non-fatal (at least on data), causing an EDOM
1094c286c36SMatthew Dillon 	 * error instead of EIO.
1104c286c36SMatthew Dillon 	 */
1114c286c36SMatthew Dillon 	trans->flags |= HAMMER_TRANSF_CRCDOM;
1124c286c36SMatthew Dillon 
113dd94f1b1SMatthew Dillon retry:
114dd94f1b1SMatthew Dillon 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
115dd94f1b1SMatthew Dillon 	if (error) {
116dd94f1b1SMatthew Dillon 		hammer_done_cursor(&cursor);
117dd94f1b1SMatthew Dillon 		goto failed;
118dd94f1b1SMatthew Dillon 	}
119dd94f1b1SMatthew Dillon 	cursor.key_beg = mirror->key_cur;
120dd94f1b1SMatthew Dillon 	cursor.key_end = mirror->key_end;
1214c038e17SMatthew Dillon 	cursor.key_end.localization &= HAMMER_LOCALIZE_MASK;
122ea434b6fSMatthew Dillon 	cursor.key_end.localization += localization;
123dd94f1b1SMatthew Dillon 
124dd94f1b1SMatthew Dillon 	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
125dd94f1b1SMatthew Dillon 	cursor.flags |= HAMMER_CURSOR_BACKEND;
126dd94f1b1SMatthew Dillon 
127dd94f1b1SMatthew Dillon 	/*
128c82af904SMatthew Dillon 	 * This flag filters the search to only return elements whos create
129c82af904SMatthew Dillon 	 * or delete TID is >= mirror_tid.  The B-Tree uses the mirror_tid
130c82af904SMatthew Dillon 	 * field stored with internal and leaf nodes to shortcut the scan.
131dd94f1b1SMatthew Dillon 	 */
132c82af904SMatthew Dillon 	cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
1334c038e17SMatthew Dillon 	cursor.cmirror = &cmirror;
1344c038e17SMatthew Dillon 	cmirror.mirror_tid = mirror->tid_beg;
135dd94f1b1SMatthew Dillon 
136dd94f1b1SMatthew Dillon 	error = hammer_btree_first(&cursor);
137dd94f1b1SMatthew Dillon 	while (error == 0) {
138dd94f1b1SMatthew Dillon 		/*
13993291532SMatthew Dillon 		 * Yield to more important tasks
14093291532SMatthew Dillon 		 */
14193291532SMatthew Dillon 		if (error == 0) {
14293291532SMatthew Dillon 			error = hammer_signal_check(trans->hmp);
14393291532SMatthew Dillon 			if (error)
14493291532SMatthew Dillon 				break;
14593291532SMatthew Dillon 		}
14693291532SMatthew Dillon 
14793291532SMatthew Dillon 		/*
1484c038e17SMatthew Dillon 		 * An internal node can be returned in mirror-filtered
1494c038e17SMatthew Dillon 		 * mode and indicates that the scan is returning a skip
1504c038e17SMatthew Dillon 		 * range in the cursor->cmirror structure.
1514c038e17SMatthew Dillon 		 */
1524c038e17SMatthew Dillon 		uptr = (char *)mirror->ubuf + mirror->count;
1534c038e17SMatthew Dillon 		if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) {
1544c038e17SMatthew Dillon 			/*
1554c038e17SMatthew Dillon 			 * Check space
1564c038e17SMatthew Dillon 			 */
1574c038e17SMatthew Dillon 			mirror->key_cur = cmirror.skip_beg;
1584c038e17SMatthew Dillon 			bytes = sizeof(mrec.skip);
1594c038e17SMatthew Dillon 			if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) >
1604c038e17SMatthew Dillon 			    mirror->size) {
1614c038e17SMatthew Dillon 				break;
1624c038e17SMatthew Dillon 			}
1634c038e17SMatthew Dillon 
1644c038e17SMatthew Dillon 			/*
1654c038e17SMatthew Dillon 			 * Fill mrec
1664c038e17SMatthew Dillon 			 */
1674c038e17SMatthew Dillon 			mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE;
1684c038e17SMatthew Dillon 			mrec.head.type = HAMMER_MREC_TYPE_SKIP;
1694c038e17SMatthew Dillon 			mrec.head.rec_size = bytes;
1704c038e17SMatthew Dillon 			mrec.skip.skip_beg = cmirror.skip_beg;
1714c038e17SMatthew Dillon 			mrec.skip.skip_end = cmirror.skip_end;
1724c038e17SMatthew Dillon 			mrec.head.rec_crc = crc32(&mrec.head.rec_size,
1734c038e17SMatthew Dillon 						 bytes - crc_start);
1744c038e17SMatthew Dillon 			error = copyout(&mrec, uptr, bytes);
1754c038e17SMatthew Dillon 			eatdisk = 0;
1764c038e17SMatthew Dillon 			goto didwrite;
1774c038e17SMatthew Dillon 		}
1784c038e17SMatthew Dillon 
1794c038e17SMatthew Dillon 		/*
1804c038e17SMatthew Dillon 		 * Leaf node.  In full-history mode we could filter out
1814c038e17SMatthew Dillon 		 * elements modified outside the user-requested TID range.
1824c038e17SMatthew Dillon 		 *
1834c038e17SMatthew Dillon 		 * However, such elements must be returned so the writer
184f96881ffSMatthew Dillon 		 * can compare them against the target to determine what
1854c038e17SMatthew Dillon 		 * needs to be deleted on the target, particular for
1864c038e17SMatthew Dillon 		 * no-history mirrors.
187dd94f1b1SMatthew Dillon 		 */
188c82af904SMatthew Dillon 		KKASSERT(cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
189c82af904SMatthew Dillon 		elm = &cursor.node->ondisk->elms[cursor.index].leaf;
190c82af904SMatthew Dillon 		mirror->key_cur = elm->base;
191dd94f1b1SMatthew Dillon 
192e469566bSMatthew Dillon 		/*
1933324b8cdSMatthew Dillon 		 * If the record was created after our end point we just
1943324b8cdSMatthew Dillon 		 * ignore it.
1953324b8cdSMatthew Dillon 		 */
1963324b8cdSMatthew Dillon 		if (elm->base.create_tid > mirror->tid_end) {
1973324b8cdSMatthew Dillon 			error = 0;
1983324b8cdSMatthew Dillon 			bytes = 0;
1993324b8cdSMatthew Dillon 			eatdisk = 1;
2003324b8cdSMatthew Dillon 			goto didwrite;
2013324b8cdSMatthew Dillon 		}
2023324b8cdSMatthew Dillon 
2033324b8cdSMatthew Dillon 		/*
204e469566bSMatthew Dillon 		 * Determine if we should generate a PASS or a REC.  PASS
205e469566bSMatthew Dillon 		 * records are records without any data payload.  Such
206e469566bSMatthew Dillon 		 * records will be generated if the target is already expected
207e469566bSMatthew Dillon 		 * to have the record, allowing it to delete the gaps.
208e469566bSMatthew Dillon 		 *
209e469566bSMatthew Dillon 		 * A PASS record is also used to perform deletions on the
210e469566bSMatthew Dillon 		 * target.
211e469566bSMatthew Dillon 		 *
212e469566bSMatthew Dillon 		 * Such deletions are needed if the master or files on the
213e469566bSMatthew Dillon 		 * master are no-history, or if the slave is so far behind
214e469566bSMatthew Dillon 		 * the master has already been pruned.
215e469566bSMatthew Dillon 		 */
2163324b8cdSMatthew Dillon 		if (elm->base.create_tid < mirror->tid_beg) {
2174c038e17SMatthew Dillon 			bytes = sizeof(mrec.rec);
2184c038e17SMatthew Dillon 			if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) >
2194c038e17SMatthew Dillon 			    mirror->size) {
2204c038e17SMatthew Dillon 				break;
2214c038e17SMatthew Dillon 			}
2224c038e17SMatthew Dillon 
2234c038e17SMatthew Dillon 			/*
224e469566bSMatthew Dillon 			 * Fill mrec.
2254c038e17SMatthew Dillon 			 */
2264c038e17SMatthew Dillon 			mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE;
2274c038e17SMatthew Dillon 			mrec.head.type = HAMMER_MREC_TYPE_PASS;
2284c038e17SMatthew Dillon 			mrec.head.rec_size = bytes;
2294c038e17SMatthew Dillon 			mrec.rec.leaf = *elm;
2304c038e17SMatthew Dillon 			mrec.head.rec_crc = crc32(&mrec.head.rec_size,
2314c038e17SMatthew Dillon 						 bytes - crc_start);
2324c038e17SMatthew Dillon 			error = copyout(&mrec, uptr, bytes);
2334c038e17SMatthew Dillon 			eatdisk = 1;
2344c038e17SMatthew Dillon 			goto didwrite;
2354c038e17SMatthew Dillon 
2364c038e17SMatthew Dillon 		}
2374c038e17SMatthew Dillon 
238dd94f1b1SMatthew Dillon 		/*
239c82af904SMatthew Dillon 		 * The core code exports the data to userland.
2404c286c36SMatthew Dillon 		 *
2414c286c36SMatthew Dillon 		 * CRC errors on data are reported but passed through,
2424c286c36SMatthew Dillon 		 * but the data must be washed by the user program.
243dd94f1b1SMatthew Dillon 		 */
2444c286c36SMatthew Dillon 		mrec_flags = 0;
245c82af904SMatthew Dillon 		data_len = (elm->data_offset) ? elm->data_len : 0;
246c82af904SMatthew Dillon 		if (data_len) {
247c82af904SMatthew Dillon 			error = hammer_btree_extract(&cursor,
248c82af904SMatthew Dillon 						     HAMMER_CURSOR_GET_DATA);
2494c286c36SMatthew Dillon 			if (error) {
2504c286c36SMatthew Dillon 				if (error != EDOM)
251c82af904SMatthew Dillon 					break;
2524c286c36SMatthew Dillon 				mrec_flags |= HAMMER_MRECF_CRC_ERROR |
2534c286c36SMatthew Dillon 					      HAMMER_MRECF_DATA_CRC_BAD;
2544c286c36SMatthew Dillon 			}
255c82af904SMatthew Dillon 		}
2564c038e17SMatthew Dillon 
2574c038e17SMatthew Dillon 		bytes = sizeof(mrec.rec) + data_len;
2584c038e17SMatthew Dillon 		if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) > mirror->size)
259c82af904SMatthew Dillon 			break;
260c82af904SMatthew Dillon 
261c82af904SMatthew Dillon 		/*
262c82af904SMatthew Dillon 		 * Construct the record for userland and copyout.
263c82af904SMatthew Dillon 		 *
264c82af904SMatthew Dillon 		 * The user is asking for a snapshot, if the record was
265c82af904SMatthew Dillon 		 * deleted beyond the user-requested ending tid, the record
266c82af904SMatthew Dillon 		 * is not considered deleted from the point of view of
267c82af904SMatthew Dillon 		 * userland and delete_tid is cleared.
268c82af904SMatthew Dillon 		 */
2694c038e17SMatthew Dillon 		mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE;
2704c286c36SMatthew Dillon 		mrec.head.type = HAMMER_MREC_TYPE_REC | mrec_flags;
2714c038e17SMatthew Dillon 		mrec.head.rec_size = bytes;
2724c038e17SMatthew Dillon 		mrec.rec.leaf = *elm;
2734c286c36SMatthew Dillon 
2744889cbd4SMatthew Dillon 		if (elm->base.delete_tid > mirror->tid_end)
2754c038e17SMatthew Dillon 			mrec.rec.leaf.base.delete_tid = 0;
2764c038e17SMatthew Dillon 		rec_crc = crc32(&mrec.head.rec_size,
2774c038e17SMatthew Dillon 				sizeof(mrec.rec) - crc_start);
2784c038e17SMatthew Dillon 		if (data_len)
2794c038e17SMatthew Dillon 			rec_crc = crc32_ext(cursor.data, data_len, rec_crc);
2804c038e17SMatthew Dillon 		mrec.head.rec_crc = rec_crc;
2814c038e17SMatthew Dillon 		error = copyout(&mrec, uptr, sizeof(mrec.rec));
282c82af904SMatthew Dillon 		if (data_len && error == 0) {
2834c038e17SMatthew Dillon 			error = copyout(cursor.data, uptr + sizeof(mrec.rec),
284c82af904SMatthew Dillon 					data_len);
285c82af904SMatthew Dillon 		}
2864c038e17SMatthew Dillon 		eatdisk = 1;
2874c038e17SMatthew Dillon 
2884c038e17SMatthew Dillon 		/*
2894c038e17SMatthew Dillon 		 * eatdisk controls whether we skip the current cursor
2904c038e17SMatthew Dillon 		 * position on the next scan or not.  If doing a SKIP
2914c038e17SMatthew Dillon 		 * the cursor is already positioned properly for the next
2924c038e17SMatthew Dillon 		 * scan and eatdisk will be 0.
2934c038e17SMatthew Dillon 		 */
2944c038e17SMatthew Dillon didwrite:
295dd94f1b1SMatthew Dillon 		if (error == 0) {
2964c038e17SMatthew Dillon 			mirror->count += HAMMER_HEAD_DOALIGN(bytes);
2974c038e17SMatthew Dillon 			if (eatdisk)
298dd94f1b1SMatthew Dillon 				cursor.flags |= HAMMER_CURSOR_ATEDISK;
2994c038e17SMatthew Dillon 			else
3004c038e17SMatthew Dillon 				cursor.flags &= ~HAMMER_CURSOR_ATEDISK;
301dd94f1b1SMatthew Dillon 			error = hammer_btree_iterate(&cursor);
302dd94f1b1SMatthew Dillon 		}
303dd94f1b1SMatthew Dillon 	}
304c82af904SMatthew Dillon 	if (error == ENOENT) {
305c82af904SMatthew Dillon 		mirror->key_cur = mirror->key_end;
306dd94f1b1SMatthew Dillon 		error = 0;
307c82af904SMatthew Dillon 	}
308dd94f1b1SMatthew Dillon 	hammer_done_cursor(&cursor);
309dd94f1b1SMatthew Dillon 	if (error == EDEADLK)
310dd94f1b1SMatthew Dillon 		goto retry;
311dd94f1b1SMatthew Dillon 	if (error == EINTR) {
312c82af904SMatthew Dillon 		mirror->head.flags |= HAMMER_IOC_HEAD_INTR;
313dd94f1b1SMatthew Dillon 		error = 0;
314dd94f1b1SMatthew Dillon 	}
315dd94f1b1SMatthew Dillon failed:
316dd94f1b1SMatthew Dillon 	mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK;
317dd94f1b1SMatthew Dillon 	return(error);
318dd94f1b1SMatthew Dillon }
319dd94f1b1SMatthew Dillon 
320c82af904SMatthew Dillon /*
3214c038e17SMatthew Dillon  * Copy records from userland to the target mirror.
322602c6cb8SMatthew Dillon  *
323ea434b6fSMatthew Dillon  * The PFS is identified in the mirror structure.  The passed ip is just
324ea434b6fSMatthew Dillon  * some directory in the overall HAMMER filesystem and has nothing to
325ea434b6fSMatthew Dillon  * do with the PFS.  In fact, there might not even be a root directory for
326ea434b6fSMatthew Dillon  * the PFS yet!
327c82af904SMatthew Dillon  */
328c82af904SMatthew Dillon int
329c82af904SMatthew Dillon hammer_ioc_mirror_write(hammer_transaction_t trans, hammer_inode_t ip,
330c82af904SMatthew Dillon 		       struct hammer_ioc_mirror_rw *mirror)
331c82af904SMatthew Dillon {
3324c038e17SMatthew Dillon 	union hammer_ioc_mrecord_any mrec;
333c82af904SMatthew Dillon 	struct hammer_cursor cursor;
334ea434b6fSMatthew Dillon 	u_int32_t localization;
33593291532SMatthew Dillon 	int checkspace_count = 0;
3364c038e17SMatthew Dillon 	int error;
3374c038e17SMatthew Dillon 	int bytes;
3384c038e17SMatthew Dillon 	char *uptr;
33993291532SMatthew Dillon 	int seq;
340ea434b6fSMatthew Dillon 
341ea434b6fSMatthew Dillon 	localization = (u_int32_t)mirror->pfs_id << 16;
34293291532SMatthew Dillon 	seq = trans->hmp->flusher.act;
343c82af904SMatthew Dillon 
3444c038e17SMatthew Dillon 	/*
3454c038e17SMatthew Dillon 	 * Validate the mirror structure and relocalize the tracking keys.
3464c038e17SMatthew Dillon 	 */
347c82af904SMatthew Dillon 	if (mirror->size < 0 || mirror->size > 0x70000000)
348c82af904SMatthew Dillon 		return(EINVAL);
3494c038e17SMatthew Dillon 	mirror->key_beg.localization &= HAMMER_LOCALIZE_MASK;
3504c038e17SMatthew Dillon 	mirror->key_beg.localization += localization;
3514c038e17SMatthew Dillon 	mirror->key_end.localization &= HAMMER_LOCALIZE_MASK;
3524c038e17SMatthew Dillon 	mirror->key_end.localization += localization;
3534c038e17SMatthew Dillon 	mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK;
3544c038e17SMatthew Dillon 	mirror->key_cur.localization += localization;
355c82af904SMatthew Dillon 
3564c038e17SMatthew Dillon 	/*
3574c038e17SMatthew Dillon 	 * Set up our tracking cursor for the loop.  The tracking cursor
3584c038e17SMatthew Dillon 	 * is used to delete records that are no longer present on the
3594c038e17SMatthew Dillon 	 * master.  The last handled record at key_cur must be skipped.
3604c038e17SMatthew Dillon 	 */
361c82af904SMatthew Dillon 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
362c82af904SMatthew Dillon 
3634c038e17SMatthew Dillon 	cursor.key_beg = mirror->key_cur;
3644c038e17SMatthew Dillon 	cursor.key_end = mirror->key_end;
3654c038e17SMatthew Dillon 	cursor.flags |= HAMMER_CURSOR_BACKEND;
3664c038e17SMatthew Dillon 	error = hammer_btree_first(&cursor);
3674c038e17SMatthew Dillon 	if (error == 0)
3684c038e17SMatthew Dillon 		cursor.flags |= HAMMER_CURSOR_ATEDISK;
3694c038e17SMatthew Dillon 	if (error == ENOENT)
3704c038e17SMatthew Dillon 		error = 0;
3714c038e17SMatthew Dillon 
3724c038e17SMatthew Dillon 	/*
3734c038e17SMatthew Dillon 	 * Loop until our input buffer has been exhausted.
3744c038e17SMatthew Dillon 	 */
3754c038e17SMatthew Dillon 	while (error == 0 &&
3764c038e17SMatthew Dillon 		mirror->count + sizeof(mrec.head) <= mirror->size) {
3774c038e17SMatthew Dillon 
378c82af904SMatthew Dillon 	        /*
37993291532SMatthew Dillon 		 * Don't blow out the buffer cache.  Leave room for frontend
38093291532SMatthew Dillon 		 * cache as well.
381c9ce54d6SMatthew Dillon 		 *
382c9ce54d6SMatthew Dillon 		 * WARNING: See warnings in hammer_unlock_cursor() function.
38393291532SMatthew Dillon 		 */
38415e75dabSMatthew Dillon 		while (hammer_flusher_meta_halflimit(trans->hmp) ||
3857a61b85dSMatthew Dillon 		       hammer_flusher_undo_exhausted(trans, 2)) {
386982be4bfSMatthew Dillon 			hammer_unlock_cursor(&cursor);
38793291532SMatthew Dillon 			hammer_flusher_wait(trans->hmp, seq);
388982be4bfSMatthew Dillon 			hammer_lock_cursor(&cursor);
38915e75dabSMatthew Dillon 			seq = hammer_flusher_async_one(trans->hmp);
39093291532SMatthew Dillon 		}
39193291532SMatthew Dillon 
39293291532SMatthew Dillon 		/*
39393291532SMatthew Dillon 		 * If there is insufficient free space it may be due to
39493291532SMatthew Dillon 		 * reserved bigblocks, which flushing might fix.
39593291532SMatthew Dillon 		 */
39693291532SMatthew Dillon 		if (hammer_checkspace(trans->hmp, HAMMER_CHKSPC_MIRROR)) {
39793291532SMatthew Dillon 			if (++checkspace_count == 10) {
39893291532SMatthew Dillon 				error = ENOSPC;
39993291532SMatthew Dillon 				break;
40093291532SMatthew Dillon 			}
401982be4bfSMatthew Dillon 			hammer_unlock_cursor(&cursor);
40293291532SMatthew Dillon 			hammer_flusher_wait(trans->hmp, seq);
403982be4bfSMatthew Dillon 			hammer_lock_cursor(&cursor);
4047a61b85dSMatthew Dillon 			seq = hammer_flusher_async(trans->hmp, NULL);
40593291532SMatthew Dillon 		}
40693291532SMatthew Dillon 
40793291532SMatthew Dillon 
40893291532SMatthew Dillon 		/*
409c82af904SMatthew Dillon 		 * Acquire and validate header
410c82af904SMatthew Dillon 		 */
4114c038e17SMatthew Dillon 		if ((bytes = mirror->size - mirror->count) > sizeof(mrec))
4124c038e17SMatthew Dillon 			bytes = sizeof(mrec);
413c82af904SMatthew Dillon 		uptr = (char *)mirror->ubuf + mirror->count;
4144c038e17SMatthew Dillon 		error = copyin(uptr, &mrec, bytes);
415c82af904SMatthew Dillon 		if (error)
416c82af904SMatthew Dillon 			break;
4174c038e17SMatthew Dillon 		if (mrec.head.signature != HAMMER_IOC_MIRROR_SIGNATURE) {
418c82af904SMatthew Dillon 			error = EINVAL;
419c82af904SMatthew Dillon 			break;
420c82af904SMatthew Dillon 		}
4214c038e17SMatthew Dillon 		if (mrec.head.rec_size < sizeof(mrec.head) ||
4224c038e17SMatthew Dillon 		    mrec.head.rec_size > sizeof(mrec) + HAMMER_XBUFSIZE ||
4234c038e17SMatthew Dillon 		    mirror->count + mrec.head.rec_size > mirror->size) {
4245fa5c92fSMatthew Dillon 			error = EINVAL;
4255fa5c92fSMatthew Dillon 			break;
4265fa5c92fSMatthew Dillon 		}
4274c038e17SMatthew Dillon 
4284c286c36SMatthew Dillon 		switch(mrec.head.type & HAMMER_MRECF_TYPE_MASK) {
4294c038e17SMatthew Dillon 		case HAMMER_MREC_TYPE_SKIP:
4304c038e17SMatthew Dillon 			if (mrec.head.rec_size != sizeof(mrec.skip))
4314c038e17SMatthew Dillon 				error = EINVAL;
4324c038e17SMatthew Dillon 			if (error == 0)
4334c038e17SMatthew Dillon 				error = hammer_ioc_mirror_write_skip(&cursor, &mrec.skip, mirror, localization);
4344c038e17SMatthew Dillon 			break;
4354c038e17SMatthew Dillon 		case HAMMER_MREC_TYPE_REC:
4364c038e17SMatthew Dillon 			if (mrec.head.rec_size < sizeof(mrec.rec))
4374c038e17SMatthew Dillon 				error = EINVAL;
4384c038e17SMatthew Dillon 			if (error == 0)
4394c038e17SMatthew Dillon 				error = hammer_ioc_mirror_write_rec(&cursor, &mrec.rec, mirror, localization, uptr + sizeof(mrec.rec));
4404c038e17SMatthew Dillon 			break;
4414c286c36SMatthew Dillon 		case HAMMER_MREC_TYPE_REC_BADCRC:
4424c286c36SMatthew Dillon 			/*
4434c286c36SMatthew Dillon 			 * Records with bad data payloads are ignored XXX.
4444c286c36SMatthew Dillon 			 */
4454c286c36SMatthew Dillon 			if (mrec.head.rec_size < sizeof(mrec.rec))
4464c286c36SMatthew Dillon 				error = EINVAL;
4474c286c36SMatthew Dillon 			break;
4484c038e17SMatthew Dillon 		case HAMMER_MREC_TYPE_PASS:
4494c038e17SMatthew Dillon 			if (mrec.head.rec_size != sizeof(mrec.rec))
4504c038e17SMatthew Dillon 				error = EINVAL;
4514c038e17SMatthew Dillon 			if (error == 0)
4524c038e17SMatthew Dillon 				error = hammer_ioc_mirror_write_pass(&cursor, &mrec.rec, mirror, localization);
4534c038e17SMatthew Dillon 			break;
4544c038e17SMatthew Dillon 		default:
455c82af904SMatthew Dillon 			error = EINVAL;
456c82af904SMatthew Dillon 			break;
457c82af904SMatthew Dillon 		}
4584c038e17SMatthew Dillon 
4594c038e17SMatthew Dillon 		/*
4604c038e17SMatthew Dillon 		 * Retry the current record on deadlock, otherwise setup
4614c038e17SMatthew Dillon 		 * for the next loop.
4624c038e17SMatthew Dillon 		 */
4634c038e17SMatthew Dillon 		if (error == EDEADLK) {
4644c038e17SMatthew Dillon 			while (error == EDEADLK) {
465*f3a4893bSMatthew Dillon 				hammer_sync_lock_sh(trans);
4664c038e17SMatthew Dillon 				hammer_recover_cursor(&cursor);
4674c038e17SMatthew Dillon 				error = hammer_cursor_upgrade(&cursor);
468*f3a4893bSMatthew Dillon 				hammer_sync_unlock(trans);
469c82af904SMatthew Dillon 			}
4704c038e17SMatthew Dillon 		} else {
4714c038e17SMatthew Dillon 			if (error == EALREADY)
4724c038e17SMatthew Dillon 				error = 0;
4734c038e17SMatthew Dillon 			if (error == 0) {
4744c038e17SMatthew Dillon 				mirror->count +=
4754c038e17SMatthew Dillon 					HAMMER_HEAD_DOALIGN(mrec.head.rec_size);
4764c038e17SMatthew Dillon 			}
4774c038e17SMatthew Dillon 		}
4784c038e17SMatthew Dillon 	}
4794c038e17SMatthew Dillon 	hammer_done_cursor(&cursor);
4804c038e17SMatthew Dillon 
4814c038e17SMatthew Dillon 	/*
4824c038e17SMatthew Dillon 	 * cumulative error
4834c038e17SMatthew Dillon 	 */
4844c038e17SMatthew Dillon 	if (error) {
4854c038e17SMatthew Dillon 		mirror->head.flags |= HAMMER_IOC_HEAD_ERROR;
4864c038e17SMatthew Dillon 		mirror->head.error = error;
4874c038e17SMatthew Dillon 	}
4884c038e17SMatthew Dillon 
4894c038e17SMatthew Dillon 	/*
4904c038e17SMatthew Dillon 	 * ioctls don't update the RW data structure if an error is returned,
4914c038e17SMatthew Dillon 	 * always return 0.
4924c038e17SMatthew Dillon 	 */
4934c038e17SMatthew Dillon 	return(0);
4944c038e17SMatthew Dillon }
4954c038e17SMatthew Dillon 
4964c038e17SMatthew Dillon /*
4974c038e17SMatthew Dillon  * Handle skip records.
4984c038e17SMatthew Dillon  *
4994c038e17SMatthew Dillon  * We must iterate from the last resolved record position at mirror->key_cur
5003324b8cdSMatthew Dillon  * to skip_beg non-inclusive and delete any records encountered.
5014c038e17SMatthew Dillon  *
5024c038e17SMatthew Dillon  * mirror->key_cur must be carefully set when we succeed in processing
5034c038e17SMatthew Dillon  * this mrec.
5044c038e17SMatthew Dillon  */
5054c038e17SMatthew Dillon static int
5064c038e17SMatthew Dillon hammer_ioc_mirror_write_skip(hammer_cursor_t cursor,
5074c038e17SMatthew Dillon 			     struct hammer_ioc_mrecord_skip *mrec,
5084c038e17SMatthew Dillon 			     struct hammer_ioc_mirror_rw *mirror,
5094c038e17SMatthew Dillon 			     u_int32_t localization)
5104c038e17SMatthew Dillon {
5114c038e17SMatthew Dillon 	int error;
5124c038e17SMatthew Dillon 
5134c038e17SMatthew Dillon 	/*
5144c038e17SMatthew Dillon 	 * Relocalize the skip range
5154c038e17SMatthew Dillon 	 */
5164c038e17SMatthew Dillon 	mrec->skip_beg.localization &= HAMMER_LOCALIZE_MASK;
5174c038e17SMatthew Dillon 	mrec->skip_beg.localization += localization;
5184c038e17SMatthew Dillon 	mrec->skip_end.localization &= HAMMER_LOCALIZE_MASK;
5194c038e17SMatthew Dillon 	mrec->skip_end.localization += localization;
5204c038e17SMatthew Dillon 
5214c038e17SMatthew Dillon 	/*
5224c038e17SMatthew Dillon 	 * Iterate from current position to skip_beg, deleting any records
5233324b8cdSMatthew Dillon 	 * we encounter.  The record at skip_beg is not included (it is
5243324b8cdSMatthew Dillon 	 * skipped).
5254c038e17SMatthew Dillon 	 */
5264c038e17SMatthew Dillon 	cursor->key_end = mrec->skip_beg;
5273324b8cdSMatthew Dillon 	cursor->flags &= ~HAMMER_CURSOR_END_INCLUSIVE;
5284c038e17SMatthew Dillon 	cursor->flags |= HAMMER_CURSOR_BACKEND;
529842e7a70SMatthew Dillon 	error = hammer_mirror_delete_to(cursor, mirror);
5304c038e17SMatthew Dillon 
5314c038e17SMatthew Dillon 	/*
5324c038e17SMatthew Dillon 	 * Now skip past the skip (which is the whole point point of
5334c038e17SMatthew Dillon 	 * having a skip record).  The sender has not sent us any records
5344c038e17SMatthew Dillon 	 * for the skip area so we wouldn't know what to keep and what
5354c038e17SMatthew Dillon 	 * to delete anyway.
5364c038e17SMatthew Dillon 	 *
5374c038e17SMatthew Dillon 	 * Clear ATEDISK because skip_end is non-inclusive, so we can't
5384c038e17SMatthew Dillon 	 * count an exact match if we happened to get one.
5394c038e17SMatthew Dillon 	 */
5404c038e17SMatthew Dillon 	if (error == 0) {
5414c038e17SMatthew Dillon 		mirror->key_cur = mrec->skip_end;
5424c038e17SMatthew Dillon 		cursor->key_beg = mrec->skip_end;
5434c038e17SMatthew Dillon 		error = hammer_btree_lookup(cursor);
5444c038e17SMatthew Dillon 		cursor->flags &= ~HAMMER_CURSOR_ATEDISK;
5454c038e17SMatthew Dillon 		if (error == ENOENT)
5464c038e17SMatthew Dillon 			error = 0;
5474c038e17SMatthew Dillon 	}
5484c038e17SMatthew Dillon 	return(error);
5494c038e17SMatthew Dillon }
5504c038e17SMatthew Dillon 
5514c038e17SMatthew Dillon /*
5524c038e17SMatthew Dillon  * Handle B-Tree records.
5534c038e17SMatthew Dillon  *
5544c038e17SMatthew Dillon  * We must iterate to mrec->base.key (non-inclusively), and then process
5554c038e17SMatthew Dillon  * the record.  We are allowed to write a new record or delete an existing
5564c038e17SMatthew Dillon  * record, but cannot replace an existing record.
5574c038e17SMatthew Dillon  *
5584c038e17SMatthew Dillon  * mirror->key_cur must be carefully set when we succeed in processing
5594c038e17SMatthew Dillon  * this mrec.
5604c038e17SMatthew Dillon  */
5614c038e17SMatthew Dillon static int
5624c038e17SMatthew Dillon hammer_ioc_mirror_write_rec(hammer_cursor_t cursor,
5634c038e17SMatthew Dillon 			    struct hammer_ioc_mrecord_rec *mrec,
5644c038e17SMatthew Dillon 			    struct hammer_ioc_mirror_rw *mirror,
5654c038e17SMatthew Dillon 			    u_int32_t localization,
5664c038e17SMatthew Dillon 			    char *uptr)
5674c038e17SMatthew Dillon {
5684c038e17SMatthew Dillon 	hammer_transaction_t trans;
5694c038e17SMatthew Dillon 	u_int32_t rec_crc;
5704c038e17SMatthew Dillon 	int error;
5714c038e17SMatthew Dillon 
5724c038e17SMatthew Dillon 	trans = cursor->trans;
5734c038e17SMatthew Dillon 	rec_crc = crc32(mrec, sizeof(*mrec));
5744c038e17SMatthew Dillon 
5754c038e17SMatthew Dillon 	if (mrec->leaf.data_len < 0 ||
5764c038e17SMatthew Dillon 	    mrec->leaf.data_len > HAMMER_XBUFSIZE ||
5774c038e17SMatthew Dillon 	    mrec->leaf.data_len + sizeof(*mrec) > mrec->head.rec_size) {
5784c038e17SMatthew Dillon 		return(EINVAL);
579c82af904SMatthew Dillon 	}
580c82af904SMatthew Dillon 
581c82af904SMatthew Dillon 	/*
582c82af904SMatthew Dillon 	 * Re-localize for target.  relocalization of data is handled
583c82af904SMatthew Dillon 	 * by hammer_mirror_write().
584c82af904SMatthew Dillon 	 */
5854c038e17SMatthew Dillon 	mrec->leaf.base.localization &= HAMMER_LOCALIZE_MASK;
5864c038e17SMatthew Dillon 	mrec->leaf.base.localization += localization;
5874c038e17SMatthew Dillon 
5884c038e17SMatthew Dillon 	/*
5894c038e17SMatthew Dillon 	 * Delete records through until we reach (non-inclusively) the
5904c038e17SMatthew Dillon 	 * target record.
5914c038e17SMatthew Dillon 	 */
5924c038e17SMatthew Dillon 	cursor->key_end = mrec->leaf.base;
5934c038e17SMatthew Dillon 	cursor->flags &= ~HAMMER_CURSOR_END_INCLUSIVE;
5944c038e17SMatthew Dillon 	cursor->flags |= HAMMER_CURSOR_BACKEND;
595842e7a70SMatthew Dillon 	error = hammer_mirror_delete_to(cursor, mirror);
596c82af904SMatthew Dillon 
597c82af904SMatthew Dillon 	/*
59883f2a3aaSMatthew Dillon 	 * Certain records are not part of the mirroring operation
59983f2a3aaSMatthew Dillon 	 */
60083f2a3aaSMatthew Dillon 	if (hammer_mirror_nomirror(&mrec->leaf.base))
60183f2a3aaSMatthew Dillon 		return(0);
60283f2a3aaSMatthew Dillon 
60383f2a3aaSMatthew Dillon 	/*
604c82af904SMatthew Dillon 	 * Locate the record.
605c82af904SMatthew Dillon 	 *
606c82af904SMatthew Dillon 	 * If the record exists only the delete_tid may be updated.
607c82af904SMatthew Dillon 	 *
608e469566bSMatthew Dillon 	 * If the record does not exist we can create it only if the
609e469566bSMatthew Dillon 	 * create_tid is not too old.  If the create_tid is too old
610e469566bSMatthew Dillon 	 * it may have already been destroyed on the slave from pruning.
611e469566bSMatthew Dillon 	 *
612e469566bSMatthew Dillon 	 * Note that mirror operations are effectively as-of operations
613e469566bSMatthew Dillon 	 * and delete_tid can be 0 for mirroring purposes even if it is
614c82af904SMatthew Dillon 	 * not actually 0 at the originator.
61598da6d8cSMatthew Dillon 	 *
61698da6d8cSMatthew Dillon 	 * These functions can return EDEADLK
617c82af904SMatthew Dillon 	 */
6184c038e17SMatthew Dillon 	cursor->key_beg = mrec->leaf.base;
6194c038e17SMatthew Dillon 	cursor->flags |= HAMMER_CURSOR_BACKEND;
6204c038e17SMatthew Dillon 	cursor->flags &= ~HAMMER_CURSOR_INSERT;
6214c038e17SMatthew Dillon 	error = hammer_btree_lookup(cursor);
622c82af904SMatthew Dillon 
6234c038e17SMatthew Dillon 	if (error == 0 && hammer_mirror_check(cursor, mrec)) {
6244c038e17SMatthew Dillon 		error = hammer_mirror_update(cursor, mrec);
625adf01747SMatthew Dillon 	} else if (error == ENOENT) {
62683f2a3aaSMatthew Dillon 		if (mrec->leaf.base.create_tid >= mirror->tid_beg) {
62783f2a3aaSMatthew Dillon 			error = hammer_create_at_cursor(
62883f2a3aaSMatthew Dillon 					cursor, &mrec->leaf,
62983f2a3aaSMatthew Dillon 					uptr, HAMMER_CREATE_MODE_UMIRROR);
63083f2a3aaSMatthew Dillon 		} else {
631adf01747SMatthew Dillon 			error = 0;
632c82af904SMatthew Dillon 		}
63383f2a3aaSMatthew Dillon 	}
6344c038e17SMatthew Dillon 	if (error == 0 || error == EALREADY)
6354c038e17SMatthew Dillon 		mirror->key_cur = mrec->leaf.base;
6364c038e17SMatthew Dillon 	return(error);
6374c038e17SMatthew Dillon }
638c82af904SMatthew Dillon 
639c82af904SMatthew Dillon /*
6404c038e17SMatthew Dillon  * This works like write_rec but no write or update is necessary,
6414c038e17SMatthew Dillon  * and no data payload is included so we couldn't do a write even
6424c038e17SMatthew Dillon  * if we wanted to.
6434c038e17SMatthew Dillon  *
6444c038e17SMatthew Dillon  * We must still iterate for deletions, and we can validate the
6454c038e17SMatthew Dillon  * record header which is a good way to test for corrupted mirror
6464c038e17SMatthew Dillon  * targets XXX.
6474c038e17SMatthew Dillon  *
6484c038e17SMatthew Dillon  * mirror->key_cur must be carefully set when we succeed in processing
6494c038e17SMatthew Dillon  * this mrec.
650c82af904SMatthew Dillon  */
6514c038e17SMatthew Dillon static
6524c038e17SMatthew Dillon int
6534c038e17SMatthew Dillon hammer_ioc_mirror_write_pass(hammer_cursor_t cursor,
6544c038e17SMatthew Dillon 			     struct hammer_ioc_mrecord_rec *mrec,
6554c038e17SMatthew Dillon 			     struct hammer_ioc_mirror_rw *mirror,
6564c038e17SMatthew Dillon 			     u_int32_t localization)
6574c038e17SMatthew Dillon {
6584c038e17SMatthew Dillon 	hammer_transaction_t trans;
6594c038e17SMatthew Dillon 	u_int32_t rec_crc;
6604c038e17SMatthew Dillon 	int error;
6614c038e17SMatthew Dillon 
6624c038e17SMatthew Dillon 	trans = cursor->trans;
6634c038e17SMatthew Dillon 	rec_crc = crc32(mrec, sizeof(*mrec));
6644c038e17SMatthew Dillon 
6654c038e17SMatthew Dillon 	/*
6664c038e17SMatthew Dillon 	 * Re-localize for target.  Relocalization of data is handled
6674c038e17SMatthew Dillon 	 * by hammer_mirror_write().
6684c038e17SMatthew Dillon 	 */
6694c038e17SMatthew Dillon 	mrec->leaf.base.localization &= HAMMER_LOCALIZE_MASK;
6704c038e17SMatthew Dillon 	mrec->leaf.base.localization += localization;
6714c038e17SMatthew Dillon 
6724c038e17SMatthew Dillon 	/*
6734c038e17SMatthew Dillon 	 * Delete records through until we reach (non-inclusively) the
6744c038e17SMatthew Dillon 	 * target record.
6754c038e17SMatthew Dillon 	 */
6764c038e17SMatthew Dillon 	cursor->key_end = mrec->leaf.base;
6774c038e17SMatthew Dillon 	cursor->flags &= ~HAMMER_CURSOR_END_INCLUSIVE;
6784c038e17SMatthew Dillon 	cursor->flags |= HAMMER_CURSOR_BACKEND;
679842e7a70SMatthew Dillon 	error = hammer_mirror_delete_to(cursor, mirror);
6804c038e17SMatthew Dillon 
6814c038e17SMatthew Dillon 	/*
68283f2a3aaSMatthew Dillon 	 * Certain records are not part of the mirroring operation
68383f2a3aaSMatthew Dillon 	 */
68483f2a3aaSMatthew Dillon 	if (hammer_mirror_nomirror(&mrec->leaf.base))
68583f2a3aaSMatthew Dillon 		return(0);
68683f2a3aaSMatthew Dillon 
68783f2a3aaSMatthew Dillon 	/*
688e469566bSMatthew Dillon 	 * Locate the record and get past it by setting ATEDISK.  Perform
689e469566bSMatthew Dillon 	 * any necessary deletions.  We have no data payload and cannot
690e469566bSMatthew Dillon 	 * create a new record.
6914c038e17SMatthew Dillon 	 */
6924c038e17SMatthew Dillon 	if (error == 0) {
6934c038e17SMatthew Dillon 		mirror->key_cur = mrec->leaf.base;
6944c038e17SMatthew Dillon 		cursor->key_beg = mrec->leaf.base;
6954c038e17SMatthew Dillon 		cursor->flags |= HAMMER_CURSOR_BACKEND;
6964c038e17SMatthew Dillon 		cursor->flags &= ~HAMMER_CURSOR_INSERT;
6974c038e17SMatthew Dillon 		error = hammer_btree_lookup(cursor);
698e469566bSMatthew Dillon 		if (error == 0) {
699e469566bSMatthew Dillon 			if (hammer_mirror_check(cursor, mrec))
700e469566bSMatthew Dillon 				error = hammer_mirror_update(cursor, mrec);
7014c038e17SMatthew Dillon 			cursor->flags |= HAMMER_CURSOR_ATEDISK;
702e469566bSMatthew Dillon 		} else {
7034c038e17SMatthew Dillon 			cursor->flags &= ~HAMMER_CURSOR_ATEDISK;
704e469566bSMatthew Dillon 		}
7054c038e17SMatthew Dillon 		if (error == ENOENT)
7064c038e17SMatthew Dillon 			error = 0;
7074c038e17SMatthew Dillon 	}
7084c038e17SMatthew Dillon 	return(error);
709c82af904SMatthew Dillon }
710adf01747SMatthew Dillon 
7114c038e17SMatthew Dillon /*
7124c038e17SMatthew Dillon  * As part of the mirror write we iterate across swaths of records
7134c038e17SMatthew Dillon  * on the target which no longer exist on the source, and mark them
7144c038e17SMatthew Dillon  * deleted.
715842e7a70SMatthew Dillon  *
716842e7a70SMatthew Dillon  * The caller has indexed the cursor and set up key_end.  We iterate
717842e7a70SMatthew Dillon  * through to key_end.
718f96881ffSMatthew Dillon  *
719f96881ffSMatthew Dillon  * There is an edge case where the master has deleted a record whos
720f96881ffSMatthew Dillon  * create_tid exactly matches our end_tid.  We cannot delete this
721f96881ffSMatthew Dillon  * record on the slave yet because we cannot assign delete_tid == create_tid.
722f96881ffSMatthew Dillon  * The deletion should be picked up on the next sequence since in order
723f96881ffSMatthew Dillon  * to have been deleted on the master a transaction must have occured with
724f96881ffSMatthew Dillon  * a TID greater then the create_tid of the record.
7253324b8cdSMatthew Dillon  *
7263324b8cdSMatthew Dillon  * To support incremental re-mirroring, just for robustness, we do not
7273324b8cdSMatthew Dillon  * touch any records created beyond (or equal to) mirror->tid_end.
7284c038e17SMatthew Dillon  */
7294c038e17SMatthew Dillon static
7304c038e17SMatthew Dillon int
731842e7a70SMatthew Dillon hammer_mirror_delete_to(hammer_cursor_t cursor,
7324c038e17SMatthew Dillon 		       struct hammer_ioc_mirror_rw *mirror)
7334c038e17SMatthew Dillon {
734842e7a70SMatthew Dillon 	hammer_btree_leaf_elm_t elm;
73598da6d8cSMatthew Dillon 	int error;
73698da6d8cSMatthew Dillon 
737842e7a70SMatthew Dillon 	error = hammer_btree_iterate(cursor);
738842e7a70SMatthew Dillon 	while (error == 0) {
739842e7a70SMatthew Dillon 		elm = &cursor->node->ondisk->elms[cursor->index].leaf;
740842e7a70SMatthew Dillon 		KKASSERT(elm->base.btype == HAMMER_BTREE_TYPE_RECORD);
7414889cbd4SMatthew Dillon 		cursor->flags |= HAMMER_CURSOR_ATEDISK;
7423324b8cdSMatthew Dillon 
7433324b8cdSMatthew Dillon 		/*
74483f2a3aaSMatthew Dillon 		 * Certain records are not part of the mirroring operation
74583f2a3aaSMatthew Dillon 		 */
74683f2a3aaSMatthew Dillon 		if (hammer_mirror_nomirror(&elm->base)) {
74783f2a3aaSMatthew Dillon 			error = hammer_btree_iterate(cursor);
74883f2a3aaSMatthew Dillon 			continue;
74983f2a3aaSMatthew Dillon 		}
75083f2a3aaSMatthew Dillon 
75183f2a3aaSMatthew Dillon 		/*
7523324b8cdSMatthew Dillon 		 * Note: Must still delete records with create_tid < tid_beg,
7533324b8cdSMatthew Dillon 		 *	 as record may have been pruned-away on source.
7543324b8cdSMatthew Dillon 		 */
755f96881ffSMatthew Dillon 		if (elm->base.delete_tid == 0 &&
7563324b8cdSMatthew Dillon 		    elm->base.create_tid < mirror->tid_end) {
757842e7a70SMatthew Dillon 			error = hammer_delete_at_cursor(cursor,
758842e7a70SMatthew Dillon 							HAMMER_DELETE_ADJUST,
759842e7a70SMatthew Dillon 							mirror->tid_end,
760842e7a70SMatthew Dillon 							time_second,
761842e7a70SMatthew Dillon 							1, NULL);
762842e7a70SMatthew Dillon 		}
763842e7a70SMatthew Dillon 		if (error == 0)
764842e7a70SMatthew Dillon 			error = hammer_btree_iterate(cursor);
765842e7a70SMatthew Dillon 	}
766842e7a70SMatthew Dillon 	if (error == ENOENT)
767842e7a70SMatthew Dillon 		error = 0;
768842e7a70SMatthew Dillon 	return(error);
769c82af904SMatthew Dillon }
770c82af904SMatthew Dillon 
771c82af904SMatthew Dillon /*
772c82af904SMatthew Dillon  * Check whether an update is needed in the case where a match already
773c82af904SMatthew Dillon  * exists on the target.  The only type of update allowed in this case
774c82af904SMatthew Dillon  * is an update of the delete_tid.
775c82af904SMatthew Dillon  *
776c82af904SMatthew Dillon  * Return non-zero if the update should proceed.
777c82af904SMatthew Dillon  */
778c82af904SMatthew Dillon static
779c82af904SMatthew Dillon int
7804c038e17SMatthew Dillon hammer_mirror_check(hammer_cursor_t cursor, struct hammer_ioc_mrecord_rec *mrec)
781c82af904SMatthew Dillon {
782c82af904SMatthew Dillon 	hammer_btree_leaf_elm_t leaf = cursor->leaf;
783c82af904SMatthew Dillon 
784c82af904SMatthew Dillon 	if (leaf->base.delete_tid != mrec->leaf.base.delete_tid) {
785ea434b6fSMatthew Dillon 		if (mrec->leaf.base.delete_tid != 0)
786c82af904SMatthew Dillon 			return(1);
787c82af904SMatthew Dillon 	}
788c82af904SMatthew Dillon 	return(0);
789c82af904SMatthew Dillon }
790c82af904SMatthew Dillon 
791c82af904SMatthew Dillon /*
79283f2a3aaSMatthew Dillon  * Filter out records which are never mirrored, such as configuration space
79383f2a3aaSMatthew Dillon  * records (for hammer cleanup).
79483f2a3aaSMatthew Dillon  *
79583f2a3aaSMatthew Dillon  * NOTE: We currently allow HAMMER_RECTYPE_SNAPSHOT records to be mirrored.
79683f2a3aaSMatthew Dillon  */
79783f2a3aaSMatthew Dillon static
79883f2a3aaSMatthew Dillon int
79983f2a3aaSMatthew Dillon hammer_mirror_nomirror(struct hammer_base_elm *base)
80083f2a3aaSMatthew Dillon {
80183f2a3aaSMatthew Dillon 	/*
80283f2a3aaSMatthew Dillon 	 * Certain types of records are never updated when mirroring.
80383f2a3aaSMatthew Dillon 	 * Slaves have their own configuration space.
80483f2a3aaSMatthew Dillon 	 */
80583f2a3aaSMatthew Dillon 	if (base->rec_type == HAMMER_RECTYPE_CONFIG)
80683f2a3aaSMatthew Dillon 		return(1);
80783f2a3aaSMatthew Dillon 	return(0);
80883f2a3aaSMatthew Dillon }
80983f2a3aaSMatthew Dillon 
81083f2a3aaSMatthew Dillon 
81183f2a3aaSMatthew Dillon /*
812842e7a70SMatthew Dillon  * Update a record in-place.  Only the delete_tid can change, and
813842e7a70SMatthew Dillon  * only from zero to non-zero.
814c82af904SMatthew Dillon  */
815c82af904SMatthew Dillon static
816c82af904SMatthew Dillon int
8174c038e17SMatthew Dillon hammer_mirror_update(hammer_cursor_t cursor,
8184c038e17SMatthew Dillon 		     struct hammer_ioc_mrecord_rec *mrec)
819c82af904SMatthew Dillon {
82098da6d8cSMatthew Dillon 	int error;
82198da6d8cSMatthew Dillon 
822842e7a70SMatthew Dillon 	/*
823842e7a70SMatthew Dillon 	 * This case shouldn't occur.
824842e7a70SMatthew Dillon 	 */
825842e7a70SMatthew Dillon 	if (mrec->leaf.base.delete_tid == 0)
82606ad81ffSMatthew Dillon 		return(0);
827adf01747SMatthew Dillon 
828adf01747SMatthew Dillon 	/*
829842e7a70SMatthew Dillon 	 * Mark the record deleted on the mirror target.
8304c038e17SMatthew Dillon 	 */
831842e7a70SMatthew Dillon 	error = hammer_delete_at_cursor(cursor, HAMMER_DELETE_ADJUST,
832842e7a70SMatthew Dillon 					mrec->leaf.base.delete_tid,
833842e7a70SMatthew Dillon 					mrec->leaf.delete_ts,
834842e7a70SMatthew Dillon 					1, NULL);
8354c038e17SMatthew Dillon 	cursor->flags |= HAMMER_CURSOR_ATEDISK;
836842e7a70SMatthew Dillon 	return(error);
837c82af904SMatthew Dillon }
838c82af904SMatthew Dillon 
83983f2a3aaSMatthew Dillon #if 0
84083f2a3aaSMatthew Dillon /*
84183f2a3aaSMatthew Dillon  * MOVED TO HAMMER_OBJECT.C: hammer_create_at_cursor()
84283f2a3aaSMatthew Dillon  */
84383f2a3aaSMatthew Dillon 
84483f2a3aaSMatthew Dillon static int hammer_mirror_localize_data(hammer_data_ondisk_t data,
84583f2a3aaSMatthew Dillon 				hammer_btree_leaf_elm_t leaf);
84683f2a3aaSMatthew Dillon 
847c82af904SMatthew Dillon /*
848c82af904SMatthew Dillon  * Write out a new record.
849c82af904SMatthew Dillon  */
850c82af904SMatthew Dillon static
851c82af904SMatthew Dillon int
8524c038e17SMatthew Dillon hammer_mirror_write(hammer_cursor_t cursor,
8534c038e17SMatthew Dillon 		    struct hammer_ioc_mrecord_rec *mrec,
8544c038e17SMatthew Dillon 		    char *udata)
855c82af904SMatthew Dillon {
856adf01747SMatthew Dillon 	hammer_transaction_t trans;
857adf01747SMatthew Dillon 	hammer_buffer_t data_buffer;
858c82af904SMatthew Dillon 	hammer_off_t ndata_offset;
859a56cb012SMatthew Dillon 	hammer_tid_t high_tid;
860c82af904SMatthew Dillon 	void *ndata;
861c82af904SMatthew Dillon 	int error;
862602c6cb8SMatthew Dillon 	int doprop;
863c82af904SMatthew Dillon 
864adf01747SMatthew Dillon 	trans = cursor->trans;
865adf01747SMatthew Dillon 	data_buffer = NULL;
866adf01747SMatthew Dillon 
867adf01747SMatthew Dillon 	/*
86898da6d8cSMatthew Dillon 	 * Get the sync lock so the whole mess is atomic
86998da6d8cSMatthew Dillon 	 */
87098da6d8cSMatthew Dillon 	hammer_sync_lock_sh(trans);
87198da6d8cSMatthew Dillon 
87298da6d8cSMatthew Dillon 	/*
873adf01747SMatthew Dillon 	 * Allocate and adjust data
874adf01747SMatthew Dillon 	 */
875c82af904SMatthew Dillon 	if (mrec->leaf.data_len && mrec->leaf.data_offset) {
876adf01747SMatthew Dillon 		ndata = hammer_alloc_data(trans, mrec->leaf.data_len,
877c82af904SMatthew Dillon 					  mrec->leaf.base.rec_type,
878df2ccbacSMatthew Dillon 					  &ndata_offset, &data_buffer,
879df2ccbacSMatthew Dillon 					  0, &error);
880c82af904SMatthew Dillon 		if (ndata == NULL)
881c82af904SMatthew Dillon 			return(error);
882c82af904SMatthew Dillon 		mrec->leaf.data_offset = ndata_offset;
883adf01747SMatthew Dillon 		hammer_modify_buffer(trans, data_buffer, NULL, 0);
884c82af904SMatthew Dillon 		error = copyin(udata, ndata, mrec->leaf.data_len);
885c82af904SMatthew Dillon 		if (error == 0) {
886c82af904SMatthew Dillon 			if (hammer_crc_test_leaf(ndata, &mrec->leaf) == 0) {
887c82af904SMatthew Dillon 				kprintf("data crc mismatch on pipe\n");
888c82af904SMatthew Dillon 				error = EINVAL;
889c82af904SMatthew Dillon 			} else {
890c82af904SMatthew Dillon 				error = hammer_mirror_localize_data(
891c82af904SMatthew Dillon 							ndata, &mrec->leaf);
892c82af904SMatthew Dillon 			}
893c82af904SMatthew Dillon 		}
894c82af904SMatthew Dillon 		hammer_modify_buffer_done(data_buffer);
895c82af904SMatthew Dillon 	} else {
896c82af904SMatthew Dillon 		mrec->leaf.data_offset = 0;
897c82af904SMatthew Dillon 		error = 0;
898c82af904SMatthew Dillon 		ndata = NULL;
899c82af904SMatthew Dillon 	}
900c82af904SMatthew Dillon 	if (error)
901c82af904SMatthew Dillon 		goto failed;
902adf01747SMatthew Dillon 
903adf01747SMatthew Dillon 	/*
9044c038e17SMatthew Dillon 	 * Do the insertion.  This can fail with a EDEADLK or EALREADY
905adf01747SMatthew Dillon 	 */
906c82af904SMatthew Dillon 	cursor->flags |= HAMMER_CURSOR_INSERT;
907c82af904SMatthew Dillon 	error = hammer_btree_lookup(cursor);
908c82af904SMatthew Dillon 	if (error != ENOENT) {
909c82af904SMatthew Dillon 		if (error == 0)
910c82af904SMatthew Dillon 			error = EALREADY;
911c82af904SMatthew Dillon 		goto failed;
912c82af904SMatthew Dillon 	}
913c82af904SMatthew Dillon 
914602c6cb8SMatthew Dillon 	error = hammer_btree_insert(cursor, &mrec->leaf, &doprop);
915adf01747SMatthew Dillon 
916adf01747SMatthew Dillon 	/*
9174c038e17SMatthew Dillon 	 * Cursor is left on the current element, we want to skip it now.
9184c038e17SMatthew Dillon 	 */
9194c038e17SMatthew Dillon 	cursor->flags |= HAMMER_CURSOR_ATEDISK;
9204c038e17SMatthew Dillon 	cursor->flags &= ~HAMMER_CURSOR_INSERT;
9214c038e17SMatthew Dillon 
9224c038e17SMatthew Dillon 	/*
923adf01747SMatthew Dillon 	 * Track a count of active inodes.
924adf01747SMatthew Dillon 	 */
925842e7a70SMatthew Dillon 	if (error == 0 &&
926842e7a70SMatthew Dillon 	    mrec->leaf.base.rec_type == HAMMER_RECTYPE_INODE &&
927842e7a70SMatthew Dillon 	    mrec->leaf.base.delete_tid == 0) {
928adf01747SMatthew Dillon 		hammer_modify_volume_field(trans,
929adf01747SMatthew Dillon 					   trans->rootvol,
930adf01747SMatthew Dillon 					   vol0_stat_inodes);
931adf01747SMatthew Dillon 		++trans->hmp->rootvol->ondisk->vol0_stat_inodes;
932adf01747SMatthew Dillon 		hammer_modify_volume_done(trans->rootvol);
933adf01747SMatthew Dillon 	}
934a56cb012SMatthew Dillon 
935a56cb012SMatthew Dillon 	/*
936a56cb012SMatthew Dillon 	 * vol0_next_tid must track the highest TID stored in the filesystem.
937a56cb012SMatthew Dillon 	 * We do not need to generate undo for this update.
938a56cb012SMatthew Dillon 	 */
939a56cb012SMatthew Dillon 	high_tid = mrec->leaf.base.create_tid;
940a56cb012SMatthew Dillon 	if (high_tid < mrec->leaf.base.delete_tid)
941a56cb012SMatthew Dillon 		high_tid = mrec->leaf.base.delete_tid;
942a56cb012SMatthew Dillon 	if (trans->rootvol->ondisk->vol0_next_tid < high_tid) {
943a56cb012SMatthew Dillon 		hammer_modify_volume(trans, trans->rootvol, NULL, 0);
944a56cb012SMatthew Dillon 		trans->rootvol->ondisk->vol0_next_tid = high_tid;
945a56cb012SMatthew Dillon 		hammer_modify_volume_done(trans->rootvol);
946a56cb012SMatthew Dillon 	}
947a56cb012SMatthew Dillon 
948c9ce54d6SMatthew Dillon 	/*
949c9ce54d6SMatthew Dillon 	 * WARNING!  cursor's leaf pointer may have changed after
950c9ce54d6SMatthew Dillon 	 *	     do_propagation returns.
951c9ce54d6SMatthew Dillon 	 */
952602c6cb8SMatthew Dillon 	if (error == 0 && doprop)
9534c038e17SMatthew Dillon 		hammer_btree_do_propagation(cursor, NULL, &mrec->leaf);
954c82af904SMatthew Dillon 
955c82af904SMatthew Dillon failed:
956c82af904SMatthew Dillon 	/*
957c82af904SMatthew Dillon 	 * Cleanup
958c82af904SMatthew Dillon 	 */
959c82af904SMatthew Dillon 	if (error && mrec->leaf.data_offset) {
960c82af904SMatthew Dillon 		hammer_blockmap_free(cursor->trans,
961c82af904SMatthew Dillon 				     mrec->leaf.data_offset,
962c82af904SMatthew Dillon 				     mrec->leaf.data_len);
963c82af904SMatthew Dillon 	}
96498da6d8cSMatthew Dillon 	hammer_sync_unlock(trans);
965c82af904SMatthew Dillon 	if (data_buffer)
966c82af904SMatthew Dillon 		hammer_rel_buffer(data_buffer, 0);
967c82af904SMatthew Dillon 	return(error);
968c82af904SMatthew Dillon }
969c82af904SMatthew Dillon 
970c82af904SMatthew Dillon /*
971c82af904SMatthew Dillon  * Localize the data payload.  Directory entries may need their
972c82af904SMatthew Dillon  * localization adjusted.
973c82af904SMatthew Dillon  */
974c82af904SMatthew Dillon static
975c82af904SMatthew Dillon int
976c82af904SMatthew Dillon hammer_mirror_localize_data(hammer_data_ondisk_t data,
977c82af904SMatthew Dillon 			    hammer_btree_leaf_elm_t leaf)
978c82af904SMatthew Dillon {
979c82af904SMatthew Dillon 	u_int32_t localization;
980c82af904SMatthew Dillon 
981c82af904SMatthew Dillon 	if (leaf->base.rec_type == HAMMER_RECTYPE_DIRENTRY) {
982c82af904SMatthew Dillon 		localization = leaf->base.localization &
983c82af904SMatthew Dillon 			       HAMMER_LOCALIZE_PSEUDOFS_MASK;
984c82af904SMatthew Dillon 		if (data->entry.localization != localization) {
985c82af904SMatthew Dillon 			data->entry.localization = localization;
986c82af904SMatthew Dillon 			hammer_crc_set_leaf(data, leaf);
987adf01747SMatthew Dillon 		}
988adf01747SMatthew Dillon 	}
989adf01747SMatthew Dillon 	return(0);
990c82af904SMatthew Dillon }
991c82af904SMatthew Dillon 
99283f2a3aaSMatthew Dillon #endif
993