xref: /dflybsd-src/sys/vfs/hammer/hammer_mirror.c (revision 982be4bfef38fc8be86f896586cd899dd9994fc8)
1dd94f1b1SMatthew Dillon /*
2dd94f1b1SMatthew Dillon  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3dd94f1b1SMatthew Dillon  *
4dd94f1b1SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
5dd94f1b1SMatthew Dillon  * by Matthew Dillon <dillon@backplane.com>
6dd94f1b1SMatthew Dillon  *
7dd94f1b1SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
8dd94f1b1SMatthew Dillon  * modification, are permitted provided that the following conditions
9dd94f1b1SMatthew Dillon  * are met:
10dd94f1b1SMatthew Dillon  *
11dd94f1b1SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
12dd94f1b1SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
13dd94f1b1SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
14dd94f1b1SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
15dd94f1b1SMatthew Dillon  *    the documentation and/or other materials provided with the
16dd94f1b1SMatthew Dillon  *    distribution.
17dd94f1b1SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
18dd94f1b1SMatthew Dillon  *    contributors may be used to endorse or promote products derived
19dd94f1b1SMatthew Dillon  *    from this software without specific, prior written permission.
20dd94f1b1SMatthew Dillon  *
21dd94f1b1SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22dd94f1b1SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23dd94f1b1SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24dd94f1b1SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25dd94f1b1SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26dd94f1b1SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27dd94f1b1SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28dd94f1b1SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29dd94f1b1SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30dd94f1b1SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31dd94f1b1SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32dd94f1b1SMatthew Dillon  * SUCH DAMAGE.
33dd94f1b1SMatthew Dillon  *
34e469566bSMatthew Dillon  * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.17 2008/07/31 22:30:33 dillon Exp $
35dd94f1b1SMatthew Dillon  */
36dd94f1b1SMatthew Dillon /*
37dd94f1b1SMatthew Dillon  * HAMMER mirroring ioctls - serialize and deserialize modifications made
38dd94f1b1SMatthew Dillon  *			     to a filesystem.
39dd94f1b1SMatthew Dillon  */
40dd94f1b1SMatthew Dillon 
41dd94f1b1SMatthew Dillon #include "hammer.h"
42dd94f1b1SMatthew Dillon 
43c82af904SMatthew Dillon static int hammer_mirror_check(hammer_cursor_t cursor,
444c038e17SMatthew Dillon 				struct hammer_ioc_mrecord_rec *mrec);
45c82af904SMatthew Dillon static int hammer_mirror_update(hammer_cursor_t cursor,
464c038e17SMatthew Dillon 				struct hammer_ioc_mrecord_rec *mrec);
47c82af904SMatthew Dillon static int hammer_mirror_write(hammer_cursor_t cursor,
484c038e17SMatthew Dillon 				struct hammer_ioc_mrecord_rec *mrec,
494c038e17SMatthew Dillon 				char *udata);
504c038e17SMatthew Dillon static int hammer_ioc_mirror_write_rec(hammer_cursor_t cursor,
514c038e17SMatthew Dillon 				struct hammer_ioc_mrecord_rec *mrec,
524c038e17SMatthew Dillon 				struct hammer_ioc_mirror_rw *mirror,
534c038e17SMatthew Dillon 				u_int32_t localization,
544c038e17SMatthew Dillon 				char *uptr);
554c038e17SMatthew Dillon static int hammer_ioc_mirror_write_pass(hammer_cursor_t cursor,
564c038e17SMatthew Dillon 				struct hammer_ioc_mrecord_rec *mrec,
574c038e17SMatthew Dillon 				struct hammer_ioc_mirror_rw *mirror,
584c038e17SMatthew Dillon 				u_int32_t localization);
594c038e17SMatthew Dillon static int hammer_ioc_mirror_write_skip(hammer_cursor_t cursor,
604c038e17SMatthew Dillon 				struct hammer_ioc_mrecord_skip *mrec,
614c038e17SMatthew Dillon 				struct hammer_ioc_mirror_rw *mirror,
624c038e17SMatthew Dillon 				u_int32_t localization);
63842e7a70SMatthew Dillon static int hammer_mirror_delete_to(hammer_cursor_t cursor,
644c038e17SMatthew Dillon 			        struct hammer_ioc_mirror_rw *mirror);
65c82af904SMatthew Dillon static int hammer_mirror_localize_data(hammer_data_ondisk_t data,
66c82af904SMatthew Dillon 				hammer_btree_leaf_elm_t leaf);
67c82af904SMatthew Dillon 
68c82af904SMatthew Dillon /*
69c82af904SMatthew Dillon  * All B-Tree records within the specified key range which also conform
70c82af904SMatthew Dillon  * to the transaction id range are returned.  Mirroring code keeps track
71c82af904SMatthew Dillon  * of the last transaction id fully scanned and can efficiently pick up
72c82af904SMatthew Dillon  * where it left off if interrupted.
73ea434b6fSMatthew Dillon  *
74ea434b6fSMatthew Dillon  * The PFS is identified in the mirror structure.  The passed ip is just
75ea434b6fSMatthew Dillon  * some directory in the overall HAMMER filesystem and has nothing to
76ea434b6fSMatthew Dillon  * do with the PFS.
77c82af904SMatthew Dillon  */
78dd94f1b1SMatthew Dillon int
79dd94f1b1SMatthew Dillon hammer_ioc_mirror_read(hammer_transaction_t trans, hammer_inode_t ip,
80dd94f1b1SMatthew Dillon 		       struct hammer_ioc_mirror_rw *mirror)
81dd94f1b1SMatthew Dillon {
824c038e17SMatthew Dillon 	struct hammer_cmirror cmirror;
83dd94f1b1SMatthew Dillon 	struct hammer_cursor cursor;
844c038e17SMatthew Dillon 	union hammer_ioc_mrecord_any mrec;
85c82af904SMatthew Dillon 	hammer_btree_leaf_elm_t elm;
86c82af904SMatthew Dillon 	const int crc_start = HAMMER_MREC_CRCOFF;
87c82af904SMatthew Dillon 	char *uptr;
88dd94f1b1SMatthew Dillon 	int error;
89c82af904SMatthew Dillon 	int data_len;
90c82af904SMatthew Dillon 	int bytes;
914c038e17SMatthew Dillon 	int eatdisk;
92ea434b6fSMatthew Dillon 	u_int32_t localization;
934c038e17SMatthew Dillon 	u_int32_t rec_crc;
94ea434b6fSMatthew Dillon 
95ea434b6fSMatthew Dillon 	localization = (u_int32_t)mirror->pfs_id << 16;
96dd94f1b1SMatthew Dillon 
97dd94f1b1SMatthew Dillon 	if ((mirror->key_beg.localization | mirror->key_end.localization) &
98dd94f1b1SMatthew Dillon 	    HAMMER_LOCALIZE_PSEUDOFS_MASK) {
99dd94f1b1SMatthew Dillon 		return(EINVAL);
100dd94f1b1SMatthew Dillon 	}
101dd94f1b1SMatthew Dillon 	if (hammer_btree_cmp(&mirror->key_beg, &mirror->key_end) > 0)
102dd94f1b1SMatthew Dillon 		return(EINVAL);
103dd94f1b1SMatthew Dillon 
104dd94f1b1SMatthew Dillon 	mirror->key_cur = mirror->key_beg;
1054c038e17SMatthew Dillon 	mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK;
106ea434b6fSMatthew Dillon 	mirror->key_cur.localization += localization;
107c82af904SMatthew Dillon 	bzero(&mrec, sizeof(mrec));
1084c038e17SMatthew Dillon 	bzero(&cmirror, sizeof(cmirror));
109dd94f1b1SMatthew Dillon 
110dd94f1b1SMatthew Dillon retry:
111dd94f1b1SMatthew Dillon 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
112dd94f1b1SMatthew Dillon 	if (error) {
113dd94f1b1SMatthew Dillon 		hammer_done_cursor(&cursor);
114dd94f1b1SMatthew Dillon 		goto failed;
115dd94f1b1SMatthew Dillon 	}
116dd94f1b1SMatthew Dillon 	cursor.key_beg = mirror->key_cur;
117dd94f1b1SMatthew Dillon 	cursor.key_end = mirror->key_end;
1184c038e17SMatthew Dillon 	cursor.key_end.localization &= HAMMER_LOCALIZE_MASK;
119ea434b6fSMatthew Dillon 	cursor.key_end.localization += localization;
120dd94f1b1SMatthew Dillon 
121dd94f1b1SMatthew Dillon 	cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
122dd94f1b1SMatthew Dillon 	cursor.flags |= HAMMER_CURSOR_BACKEND;
123dd94f1b1SMatthew Dillon 
124dd94f1b1SMatthew Dillon 	/*
125c82af904SMatthew Dillon 	 * This flag filters the search to only return elements whos create
126c82af904SMatthew Dillon 	 * or delete TID is >= mirror_tid.  The B-Tree uses the mirror_tid
127c82af904SMatthew Dillon 	 * field stored with internal and leaf nodes to shortcut the scan.
128dd94f1b1SMatthew Dillon 	 */
129c82af904SMatthew Dillon 	cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED;
1304c038e17SMatthew Dillon 	cursor.cmirror = &cmirror;
1314c038e17SMatthew Dillon 	cmirror.mirror_tid = mirror->tid_beg;
132dd94f1b1SMatthew Dillon 
133dd94f1b1SMatthew Dillon 	error = hammer_btree_first(&cursor);
134dd94f1b1SMatthew Dillon 	while (error == 0) {
135dd94f1b1SMatthew Dillon 		/*
13693291532SMatthew Dillon 		 * Yield to more important tasks
13793291532SMatthew Dillon 		 */
13893291532SMatthew Dillon 		if (error == 0) {
13993291532SMatthew Dillon 			error = hammer_signal_check(trans->hmp);
14093291532SMatthew Dillon 			if (error)
14193291532SMatthew Dillon 				break;
14293291532SMatthew Dillon 		}
14393291532SMatthew Dillon 
14493291532SMatthew Dillon 		/*
1454c038e17SMatthew Dillon 		 * An internal node can be returned in mirror-filtered
1464c038e17SMatthew Dillon 		 * mode and indicates that the scan is returning a skip
1474c038e17SMatthew Dillon 		 * range in the cursor->cmirror structure.
1484c038e17SMatthew Dillon 		 */
1494c038e17SMatthew Dillon 		uptr = (char *)mirror->ubuf + mirror->count;
1504c038e17SMatthew Dillon 		if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) {
1514c038e17SMatthew Dillon 			/*
1524c038e17SMatthew Dillon 			 * Check space
1534c038e17SMatthew Dillon 			 */
1544c038e17SMatthew Dillon 			mirror->key_cur = cmirror.skip_beg;
1554c038e17SMatthew Dillon 			bytes = sizeof(mrec.skip);
1564c038e17SMatthew Dillon 			if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) >
1574c038e17SMatthew Dillon 			    mirror->size) {
1584c038e17SMatthew Dillon 				break;
1594c038e17SMatthew Dillon 			}
1604c038e17SMatthew Dillon 
1614c038e17SMatthew Dillon 			/*
1624c038e17SMatthew Dillon 			 * Fill mrec
1634c038e17SMatthew Dillon 			 */
1644c038e17SMatthew Dillon 			mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE;
1654c038e17SMatthew Dillon 			mrec.head.type = HAMMER_MREC_TYPE_SKIP;
1664c038e17SMatthew Dillon 			mrec.head.rec_size = bytes;
1674c038e17SMatthew Dillon 			mrec.skip.skip_beg = cmirror.skip_beg;
1684c038e17SMatthew Dillon 			mrec.skip.skip_end = cmirror.skip_end;
1694c038e17SMatthew Dillon 			mrec.head.rec_crc = crc32(&mrec.head.rec_size,
1704c038e17SMatthew Dillon 						 bytes - crc_start);
1714c038e17SMatthew Dillon 			error = copyout(&mrec, uptr, bytes);
1724c038e17SMatthew Dillon 			eatdisk = 0;
1734c038e17SMatthew Dillon 			goto didwrite;
1744c038e17SMatthew Dillon 		}
1754c038e17SMatthew Dillon 
1764c038e17SMatthew Dillon 		/*
1774c038e17SMatthew Dillon 		 * Leaf node.  In full-history mode we could filter out
1784c038e17SMatthew Dillon 		 * elements modified outside the user-requested TID range.
1794c038e17SMatthew Dillon 		 *
1804c038e17SMatthew Dillon 		 * However, such elements must be returned so the writer
1814c038e17SMatthew Dillon 		 * can compare them against the target to detemrine what
1824c038e17SMatthew Dillon 		 * needs to be deleted on the target, particular for
1834c038e17SMatthew Dillon 		 * no-history mirrors.
184dd94f1b1SMatthew Dillon 		 */
185c82af904SMatthew Dillon 		KKASSERT(cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
186c82af904SMatthew Dillon 		elm = &cursor.node->ondisk->elms[cursor.index].leaf;
187c82af904SMatthew Dillon 		mirror->key_cur = elm->base;
188dd94f1b1SMatthew Dillon 
189e469566bSMatthew Dillon 		/*
190e469566bSMatthew Dillon 		 * Determine if we should generate a PASS or a REC.  PASS
191e469566bSMatthew Dillon 		 * records are records without any data payload.  Such
192e469566bSMatthew Dillon 		 * records will be generated if the target is already expected
193e469566bSMatthew Dillon 		 * to have the record, allowing it to delete the gaps.
194e469566bSMatthew Dillon 		 *
195e469566bSMatthew Dillon 		 * A PASS record is also used to perform deletions on the
196e469566bSMatthew Dillon 		 * target.
197e469566bSMatthew Dillon 		 *
198e469566bSMatthew Dillon 		 * Such deletions are needed if the master or files on the
199e469566bSMatthew Dillon 		 * master are no-history, or if the slave is so far behind
200e469566bSMatthew Dillon 		 * the master has already been pruned.
201e469566bSMatthew Dillon 		 */
202e469566bSMatthew Dillon 		if (elm->base.create_tid < mirror->tid_beg ||
203e469566bSMatthew Dillon 		    elm->base.create_tid > mirror->tid_end) {
2044c038e17SMatthew Dillon 			bytes = sizeof(mrec.rec);
2054c038e17SMatthew Dillon 			if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) >
2064c038e17SMatthew Dillon 			    mirror->size) {
2074c038e17SMatthew Dillon 				break;
2084c038e17SMatthew Dillon 			}
2094c038e17SMatthew Dillon 
2104c038e17SMatthew Dillon 			/*
211e469566bSMatthew Dillon 			 * Fill mrec.
2124c038e17SMatthew Dillon 			 */
2134c038e17SMatthew Dillon 			mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE;
2144c038e17SMatthew Dillon 			mrec.head.type = HAMMER_MREC_TYPE_PASS;
2154c038e17SMatthew Dillon 			mrec.head.rec_size = bytes;
2164c038e17SMatthew Dillon 			mrec.rec.leaf = *elm;
2174c038e17SMatthew Dillon 			mrec.head.rec_crc = crc32(&mrec.head.rec_size,
2184c038e17SMatthew Dillon 						 bytes - crc_start);
2194c038e17SMatthew Dillon 			error = copyout(&mrec, uptr, bytes);
2204c038e17SMatthew Dillon 			eatdisk = 1;
2214c038e17SMatthew Dillon 			goto didwrite;
2224c038e17SMatthew Dillon 
2234c038e17SMatthew Dillon 		}
2244c038e17SMatthew Dillon 
225dd94f1b1SMatthew Dillon 		/*
226c82af904SMatthew Dillon 		 * The core code exports the data to userland.
227dd94f1b1SMatthew Dillon 		 */
228c82af904SMatthew Dillon 		data_len = (elm->data_offset) ? elm->data_len : 0;
229c82af904SMatthew Dillon 		if (data_len) {
230c82af904SMatthew Dillon 			error = hammer_btree_extract(&cursor,
231c82af904SMatthew Dillon 						     HAMMER_CURSOR_GET_DATA);
232c82af904SMatthew Dillon 			if (error)
233c82af904SMatthew Dillon 				break;
234c82af904SMatthew Dillon 		}
2354c038e17SMatthew Dillon 
2364c038e17SMatthew Dillon 		bytes = sizeof(mrec.rec) + data_len;
2374c038e17SMatthew Dillon 		if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) > mirror->size)
238c82af904SMatthew Dillon 			break;
239c82af904SMatthew Dillon 
240c82af904SMatthew Dillon 		/*
241c82af904SMatthew Dillon 		 * Construct the record for userland and copyout.
242c82af904SMatthew Dillon 		 *
243c82af904SMatthew Dillon 		 * The user is asking for a snapshot, if the record was
244c82af904SMatthew Dillon 		 * deleted beyond the user-requested ending tid, the record
245c82af904SMatthew Dillon 		 * is not considered deleted from the point of view of
246c82af904SMatthew Dillon 		 * userland and delete_tid is cleared.
247c82af904SMatthew Dillon 		 */
2484c038e17SMatthew Dillon 		mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE;
2494c038e17SMatthew Dillon 		mrec.head.type = HAMMER_MREC_TYPE_REC;
2504c038e17SMatthew Dillon 		mrec.head.rec_size = bytes;
2514c038e17SMatthew Dillon 		mrec.rec.leaf = *elm;
2524889cbd4SMatthew Dillon 		if (elm->base.delete_tid > mirror->tid_end)
2534c038e17SMatthew Dillon 			mrec.rec.leaf.base.delete_tid = 0;
2544c038e17SMatthew Dillon 		rec_crc = crc32(&mrec.head.rec_size,
2554c038e17SMatthew Dillon 				sizeof(mrec.rec) - crc_start);
2564c038e17SMatthew Dillon 		if (data_len)
2574c038e17SMatthew Dillon 			rec_crc = crc32_ext(cursor.data, data_len, rec_crc);
2584c038e17SMatthew Dillon 		mrec.head.rec_crc = rec_crc;
2594c038e17SMatthew Dillon 		error = copyout(&mrec, uptr, sizeof(mrec.rec));
260c82af904SMatthew Dillon 		if (data_len && error == 0) {
2614c038e17SMatthew Dillon 			error = copyout(cursor.data, uptr + sizeof(mrec.rec),
262c82af904SMatthew Dillon 					data_len);
263c82af904SMatthew Dillon 		}
2644c038e17SMatthew Dillon 		eatdisk = 1;
2654c038e17SMatthew Dillon 
2664c038e17SMatthew Dillon 		/*
2674c038e17SMatthew Dillon 		 * eatdisk controls whether we skip the current cursor
2684c038e17SMatthew Dillon 		 * position on the next scan or not.  If doing a SKIP
2694c038e17SMatthew Dillon 		 * the cursor is already positioned properly for the next
2704c038e17SMatthew Dillon 		 * scan and eatdisk will be 0.
2714c038e17SMatthew Dillon 		 */
2724c038e17SMatthew Dillon didwrite:
273dd94f1b1SMatthew Dillon 		if (error == 0) {
2744c038e17SMatthew Dillon 			mirror->count += HAMMER_HEAD_DOALIGN(bytes);
2754c038e17SMatthew Dillon 			if (eatdisk)
276dd94f1b1SMatthew Dillon 				cursor.flags |= HAMMER_CURSOR_ATEDISK;
2774c038e17SMatthew Dillon 			else
2784c038e17SMatthew Dillon 				cursor.flags &= ~HAMMER_CURSOR_ATEDISK;
279dd94f1b1SMatthew Dillon 			error = hammer_btree_iterate(&cursor);
280dd94f1b1SMatthew Dillon 		}
281dd94f1b1SMatthew Dillon 	}
282c82af904SMatthew Dillon 	if (error == ENOENT) {
283c82af904SMatthew Dillon 		mirror->key_cur = mirror->key_end;
284dd94f1b1SMatthew Dillon 		error = 0;
285c82af904SMatthew Dillon 	}
286dd94f1b1SMatthew Dillon 	hammer_done_cursor(&cursor);
287dd94f1b1SMatthew Dillon 	if (error == EDEADLK)
288dd94f1b1SMatthew Dillon 		goto retry;
289dd94f1b1SMatthew Dillon 	if (error == EINTR) {
290c82af904SMatthew Dillon 		mirror->head.flags |= HAMMER_IOC_HEAD_INTR;
291dd94f1b1SMatthew Dillon 		error = 0;
292dd94f1b1SMatthew Dillon 	}
293dd94f1b1SMatthew Dillon failed:
294dd94f1b1SMatthew Dillon 	mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK;
295dd94f1b1SMatthew Dillon 	return(error);
296dd94f1b1SMatthew Dillon }
297dd94f1b1SMatthew Dillon 
298c82af904SMatthew Dillon /*
2994c038e17SMatthew Dillon  * Copy records from userland to the target mirror.
300602c6cb8SMatthew Dillon  *
301ea434b6fSMatthew Dillon  * The PFS is identified in the mirror structure.  The passed ip is just
302ea434b6fSMatthew Dillon  * some directory in the overall HAMMER filesystem and has nothing to
303ea434b6fSMatthew Dillon  * do with the PFS.  In fact, there might not even be a root directory for
304ea434b6fSMatthew Dillon  * the PFS yet!
305c82af904SMatthew Dillon  */
306c82af904SMatthew Dillon int
307c82af904SMatthew Dillon hammer_ioc_mirror_write(hammer_transaction_t trans, hammer_inode_t ip,
308c82af904SMatthew Dillon 		       struct hammer_ioc_mirror_rw *mirror)
309c82af904SMatthew Dillon {
3104c038e17SMatthew Dillon 	union hammer_ioc_mrecord_any mrec;
311c82af904SMatthew Dillon 	struct hammer_cursor cursor;
312ea434b6fSMatthew Dillon 	u_int32_t localization;
31393291532SMatthew Dillon 	int checkspace_count = 0;
3144c038e17SMatthew Dillon 	int error;
3154c038e17SMatthew Dillon 	int bytes;
3164c038e17SMatthew Dillon 	char *uptr;
31793291532SMatthew Dillon 	int seq;
318ea434b6fSMatthew Dillon 
319ea434b6fSMatthew Dillon 	localization = (u_int32_t)mirror->pfs_id << 16;
32093291532SMatthew Dillon 	seq = trans->hmp->flusher.act;
321c82af904SMatthew Dillon 
3224c038e17SMatthew Dillon 	/*
3234c038e17SMatthew Dillon 	 * Validate the mirror structure and relocalize the tracking keys.
3244c038e17SMatthew Dillon 	 */
325c82af904SMatthew Dillon 	if (mirror->size < 0 || mirror->size > 0x70000000)
326c82af904SMatthew Dillon 		return(EINVAL);
3274c038e17SMatthew Dillon 	mirror->key_beg.localization &= HAMMER_LOCALIZE_MASK;
3284c038e17SMatthew Dillon 	mirror->key_beg.localization += localization;
3294c038e17SMatthew Dillon 	mirror->key_end.localization &= HAMMER_LOCALIZE_MASK;
3304c038e17SMatthew Dillon 	mirror->key_end.localization += localization;
3314c038e17SMatthew Dillon 	mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK;
3324c038e17SMatthew Dillon 	mirror->key_cur.localization += localization;
333c82af904SMatthew Dillon 
3344c038e17SMatthew Dillon 	/*
3354c038e17SMatthew Dillon 	 * Set up our tracking cursor for the loop.  The tracking cursor
3364c038e17SMatthew Dillon 	 * is used to delete records that are no longer present on the
3374c038e17SMatthew Dillon 	 * master.  The last handled record at key_cur must be skipped.
3384c038e17SMatthew Dillon 	 */
339c82af904SMatthew Dillon 	error = hammer_init_cursor(trans, &cursor, NULL, NULL);
340c82af904SMatthew Dillon 
3414c038e17SMatthew Dillon 	cursor.key_beg = mirror->key_cur;
3424c038e17SMatthew Dillon 	cursor.key_end = mirror->key_end;
3434c038e17SMatthew Dillon 	cursor.flags |= HAMMER_CURSOR_BACKEND;
3444c038e17SMatthew Dillon 	error = hammer_btree_first(&cursor);
3454c038e17SMatthew Dillon 	if (error == 0)
3464c038e17SMatthew Dillon 		cursor.flags |= HAMMER_CURSOR_ATEDISK;
3474c038e17SMatthew Dillon 	if (error == ENOENT)
3484c038e17SMatthew Dillon 		error = 0;
3494c038e17SMatthew Dillon 
3504c038e17SMatthew Dillon 	/*
3514c038e17SMatthew Dillon 	 * Loop until our input buffer has been exhausted.
3524c038e17SMatthew Dillon 	 */
3534c038e17SMatthew Dillon 	while (error == 0 &&
3544c038e17SMatthew Dillon 		mirror->count + sizeof(mrec.head) <= mirror->size) {
3554c038e17SMatthew Dillon 
356c82af904SMatthew Dillon 	        /*
35793291532SMatthew Dillon 		 * Don't blow out the buffer cache.  Leave room for frontend
35893291532SMatthew Dillon 		 * cache as well.
35993291532SMatthew Dillon 		 */
36015e75dabSMatthew Dillon 		while (hammer_flusher_meta_halflimit(trans->hmp) ||
3617a61b85dSMatthew Dillon 		       hammer_flusher_undo_exhausted(trans, 2)) {
362*982be4bfSMatthew Dillon 			hammer_unlock_cursor(&cursor);
36393291532SMatthew Dillon 			hammer_flusher_wait(trans->hmp, seq);
364*982be4bfSMatthew Dillon 			hammer_lock_cursor(&cursor);
36515e75dabSMatthew Dillon 			seq = hammer_flusher_async_one(trans->hmp);
36693291532SMatthew Dillon 		}
36793291532SMatthew Dillon 
36893291532SMatthew Dillon 		/*
36993291532SMatthew Dillon 		 * If there is insufficient free space it may be due to
37093291532SMatthew Dillon 		 * reserved bigblocks, which flushing might fix.
37193291532SMatthew Dillon 		 */
37293291532SMatthew Dillon 		if (hammer_checkspace(trans->hmp, HAMMER_CHKSPC_MIRROR)) {
37393291532SMatthew Dillon 			if (++checkspace_count == 10) {
37493291532SMatthew Dillon 				error = ENOSPC;
37593291532SMatthew Dillon 				break;
37693291532SMatthew Dillon 			}
377*982be4bfSMatthew Dillon 			hammer_unlock_cursor(&cursor);
37893291532SMatthew Dillon 			hammer_flusher_wait(trans->hmp, seq);
379*982be4bfSMatthew Dillon 			hammer_lock_cursor(&cursor);
3807a61b85dSMatthew Dillon 			seq = hammer_flusher_async(trans->hmp, NULL);
38193291532SMatthew Dillon 		}
38293291532SMatthew Dillon 
38393291532SMatthew Dillon 
38493291532SMatthew Dillon 		/*
385c82af904SMatthew Dillon 		 * Acquire and validate header
386c82af904SMatthew Dillon 		 */
3874c038e17SMatthew Dillon 		if ((bytes = mirror->size - mirror->count) > sizeof(mrec))
3884c038e17SMatthew Dillon 			bytes = sizeof(mrec);
389c82af904SMatthew Dillon 		uptr = (char *)mirror->ubuf + mirror->count;
3904c038e17SMatthew Dillon 		error = copyin(uptr, &mrec, bytes);
391c82af904SMatthew Dillon 		if (error)
392c82af904SMatthew Dillon 			break;
3934c038e17SMatthew Dillon 		if (mrec.head.signature != HAMMER_IOC_MIRROR_SIGNATURE) {
394c82af904SMatthew Dillon 			error = EINVAL;
395c82af904SMatthew Dillon 			break;
396c82af904SMatthew Dillon 		}
3974c038e17SMatthew Dillon 		if (mrec.head.rec_size < sizeof(mrec.head) ||
3984c038e17SMatthew Dillon 		    mrec.head.rec_size > sizeof(mrec) + HAMMER_XBUFSIZE ||
3994c038e17SMatthew Dillon 		    mirror->count + mrec.head.rec_size > mirror->size) {
4005fa5c92fSMatthew Dillon 			error = EINVAL;
4015fa5c92fSMatthew Dillon 			break;
4025fa5c92fSMatthew Dillon 		}
4034c038e17SMatthew Dillon 
4044c038e17SMatthew Dillon 		switch(mrec.head.type) {
4054c038e17SMatthew Dillon 		case HAMMER_MREC_TYPE_SKIP:
4064c038e17SMatthew Dillon 			if (mrec.head.rec_size != sizeof(mrec.skip))
4074c038e17SMatthew Dillon 				error = EINVAL;
4084c038e17SMatthew Dillon 			if (error == 0)
4094c038e17SMatthew Dillon 				error = hammer_ioc_mirror_write_skip(&cursor, &mrec.skip, mirror, localization);
4104c038e17SMatthew Dillon 			break;
4114c038e17SMatthew Dillon 		case HAMMER_MREC_TYPE_REC:
4124c038e17SMatthew Dillon 			if (mrec.head.rec_size < sizeof(mrec.rec))
4134c038e17SMatthew Dillon 				error = EINVAL;
4144c038e17SMatthew Dillon 			if (error == 0)
4154c038e17SMatthew Dillon 				error = hammer_ioc_mirror_write_rec(&cursor, &mrec.rec, mirror, localization, uptr + sizeof(mrec.rec));
4164c038e17SMatthew Dillon 			break;
4174c038e17SMatthew Dillon 		case HAMMER_MREC_TYPE_PASS:
4184c038e17SMatthew Dillon 			if (mrec.head.rec_size != sizeof(mrec.rec))
4194c038e17SMatthew Dillon 				error = EINVAL;
4204c038e17SMatthew Dillon 			if (error == 0)
4214c038e17SMatthew Dillon 				error = hammer_ioc_mirror_write_pass(&cursor, &mrec.rec, mirror, localization);
4224c038e17SMatthew Dillon 			break;
4234c038e17SMatthew Dillon 		default:
424c82af904SMatthew Dillon 			error = EINVAL;
425c82af904SMatthew Dillon 			break;
426c82af904SMatthew Dillon 		}
4274c038e17SMatthew Dillon 
4284c038e17SMatthew Dillon 		/*
4294c038e17SMatthew Dillon 		 * Retry the current record on deadlock, otherwise setup
4304c038e17SMatthew Dillon 		 * for the next loop.
4314c038e17SMatthew Dillon 		 */
4324c038e17SMatthew Dillon 		if (error == EDEADLK) {
4334c038e17SMatthew Dillon 			while (error == EDEADLK) {
4344c038e17SMatthew Dillon 				hammer_recover_cursor(&cursor);
4354c038e17SMatthew Dillon 				error = hammer_cursor_upgrade(&cursor);
436c82af904SMatthew Dillon 			}
4374c038e17SMatthew Dillon 		} else {
4384c038e17SMatthew Dillon 			if (error == EALREADY)
4394c038e17SMatthew Dillon 				error = 0;
4404c038e17SMatthew Dillon 			if (error == 0) {
4414c038e17SMatthew Dillon 				mirror->count +=
4424c038e17SMatthew Dillon 					HAMMER_HEAD_DOALIGN(mrec.head.rec_size);
4434c038e17SMatthew Dillon 			}
4444c038e17SMatthew Dillon 		}
4454c038e17SMatthew Dillon 	}
4464c038e17SMatthew Dillon 	hammer_done_cursor(&cursor);
4474c038e17SMatthew Dillon 
4484c038e17SMatthew Dillon 	/*
4494c038e17SMatthew Dillon 	 * cumulative error
4504c038e17SMatthew Dillon 	 */
4514c038e17SMatthew Dillon 	if (error) {
4524c038e17SMatthew Dillon 		mirror->head.flags |= HAMMER_IOC_HEAD_ERROR;
4534c038e17SMatthew Dillon 		mirror->head.error = error;
4544c038e17SMatthew Dillon 	}
4554c038e17SMatthew Dillon 
4564c038e17SMatthew Dillon 	/*
4574c038e17SMatthew Dillon 	 * ioctls don't update the RW data structure if an error is returned,
4584c038e17SMatthew Dillon 	 * always return 0.
4594c038e17SMatthew Dillon 	 */
4604c038e17SMatthew Dillon 	return(0);
4614c038e17SMatthew Dillon }
4624c038e17SMatthew Dillon 
4634c038e17SMatthew Dillon /*
4644c038e17SMatthew Dillon  * Handle skip records.
4654c038e17SMatthew Dillon  *
4664c038e17SMatthew Dillon  * We must iterate from the last resolved record position at mirror->key_cur
4674c038e17SMatthew Dillon  * to skip_beg and delete any records encountered.
4684c038e17SMatthew Dillon  *
4694c038e17SMatthew Dillon  * mirror->key_cur must be carefully set when we succeed in processing
4704c038e17SMatthew Dillon  * this mrec.
4714c038e17SMatthew Dillon  */
4724c038e17SMatthew Dillon static int
4734c038e17SMatthew Dillon hammer_ioc_mirror_write_skip(hammer_cursor_t cursor,
4744c038e17SMatthew Dillon 			     struct hammer_ioc_mrecord_skip *mrec,
4754c038e17SMatthew Dillon 			     struct hammer_ioc_mirror_rw *mirror,
4764c038e17SMatthew Dillon 			     u_int32_t localization)
4774c038e17SMatthew Dillon {
4784c038e17SMatthew Dillon 	int error;
4794c038e17SMatthew Dillon 
4804c038e17SMatthew Dillon 	/*
4814c038e17SMatthew Dillon 	 * Relocalize the skip range
4824c038e17SMatthew Dillon 	 */
4834c038e17SMatthew Dillon 	mrec->skip_beg.localization &= HAMMER_LOCALIZE_MASK;
4844c038e17SMatthew Dillon 	mrec->skip_beg.localization += localization;
4854c038e17SMatthew Dillon 	mrec->skip_end.localization &= HAMMER_LOCALIZE_MASK;
4864c038e17SMatthew Dillon 	mrec->skip_end.localization += localization;
4874c038e17SMatthew Dillon 
4884c038e17SMatthew Dillon 	/*
4894c038e17SMatthew Dillon 	 * Iterate from current position to skip_beg, deleting any records
4904c038e17SMatthew Dillon 	 * we encounter.
4914c038e17SMatthew Dillon 	 */
4924c038e17SMatthew Dillon 	cursor->key_end = mrec->skip_beg;
4934c038e17SMatthew Dillon 	cursor->flags |= HAMMER_CURSOR_BACKEND;
494842e7a70SMatthew Dillon 	error = hammer_mirror_delete_to(cursor, mirror);
4954c038e17SMatthew Dillon 
4964c038e17SMatthew Dillon 	/*
4974c038e17SMatthew Dillon 	 * Now skip past the skip (which is the whole point point of
4984c038e17SMatthew Dillon 	 * having a skip record).  The sender has not sent us any records
4994c038e17SMatthew Dillon 	 * for the skip area so we wouldn't know what to keep and what
5004c038e17SMatthew Dillon 	 * to delete anyway.
5014c038e17SMatthew Dillon 	 *
5024c038e17SMatthew Dillon 	 * Clear ATEDISK because skip_end is non-inclusive, so we can't
5034c038e17SMatthew Dillon 	 * count an exact match if we happened to get one.
5044c038e17SMatthew Dillon 	 */
5054c038e17SMatthew Dillon 	if (error == 0) {
5064c038e17SMatthew Dillon 		mirror->key_cur = mrec->skip_end;
5074c038e17SMatthew Dillon 		cursor->key_beg = mrec->skip_end;
5084c038e17SMatthew Dillon 		error = hammer_btree_lookup(cursor);
5094c038e17SMatthew Dillon 		cursor->flags &= ~HAMMER_CURSOR_ATEDISK;
5104c038e17SMatthew Dillon 		if (error == ENOENT)
5114c038e17SMatthew Dillon 			error = 0;
5124c038e17SMatthew Dillon 	}
5134c038e17SMatthew Dillon 	return(error);
5144c038e17SMatthew Dillon }
5154c038e17SMatthew Dillon 
5164c038e17SMatthew Dillon /*
5174c038e17SMatthew Dillon  * Handle B-Tree records.
5184c038e17SMatthew Dillon  *
5194c038e17SMatthew Dillon  * We must iterate to mrec->base.key (non-inclusively), and then process
5204c038e17SMatthew Dillon  * the record.  We are allowed to write a new record or delete an existing
5214c038e17SMatthew Dillon  * record, but cannot replace an existing record.
5224c038e17SMatthew Dillon  *
5234c038e17SMatthew Dillon  * mirror->key_cur must be carefully set when we succeed in processing
5244c038e17SMatthew Dillon  * this mrec.
5254c038e17SMatthew Dillon  */
5264c038e17SMatthew Dillon static int
5274c038e17SMatthew Dillon hammer_ioc_mirror_write_rec(hammer_cursor_t cursor,
5284c038e17SMatthew Dillon 			    struct hammer_ioc_mrecord_rec *mrec,
5294c038e17SMatthew Dillon 			    struct hammer_ioc_mirror_rw *mirror,
5304c038e17SMatthew Dillon 			    u_int32_t localization,
5314c038e17SMatthew Dillon 			    char *uptr)
5324c038e17SMatthew Dillon {
5334c038e17SMatthew Dillon 	hammer_transaction_t trans;
5344c038e17SMatthew Dillon 	u_int32_t rec_crc;
5354c038e17SMatthew Dillon 	int error;
5364c038e17SMatthew Dillon 
5374c038e17SMatthew Dillon 	trans = cursor->trans;
5384c038e17SMatthew Dillon 	rec_crc = crc32(mrec, sizeof(*mrec));
5394c038e17SMatthew Dillon 
5404c038e17SMatthew Dillon 	if (mrec->leaf.data_len < 0 ||
5414c038e17SMatthew Dillon 	    mrec->leaf.data_len > HAMMER_XBUFSIZE ||
5424c038e17SMatthew Dillon 	    mrec->leaf.data_len + sizeof(*mrec) > mrec->head.rec_size) {
5434c038e17SMatthew Dillon 		return(EINVAL);
544c82af904SMatthew Dillon 	}
545c82af904SMatthew Dillon 
546c82af904SMatthew Dillon 	/*
547c82af904SMatthew Dillon 	 * Re-localize for target.  relocalization of data is handled
548c82af904SMatthew Dillon 	 * by hammer_mirror_write().
549c82af904SMatthew Dillon 	 */
5504c038e17SMatthew Dillon 	mrec->leaf.base.localization &= HAMMER_LOCALIZE_MASK;
5514c038e17SMatthew Dillon 	mrec->leaf.base.localization += localization;
5524c038e17SMatthew Dillon 
5534c038e17SMatthew Dillon 	/*
5544c038e17SMatthew Dillon 	 * Delete records through until we reach (non-inclusively) the
5554c038e17SMatthew Dillon 	 * target record.
5564c038e17SMatthew Dillon 	 */
5574c038e17SMatthew Dillon 	cursor->key_end = mrec->leaf.base;
5584c038e17SMatthew Dillon 	cursor->flags &= ~HAMMER_CURSOR_END_INCLUSIVE;
5594c038e17SMatthew Dillon 	cursor->flags |= HAMMER_CURSOR_BACKEND;
560842e7a70SMatthew Dillon 	error = hammer_mirror_delete_to(cursor, mirror);
561c82af904SMatthew Dillon 
562c82af904SMatthew Dillon 	/*
563c82af904SMatthew Dillon 	 * Locate the record.
564c82af904SMatthew Dillon 	 *
565c82af904SMatthew Dillon 	 * If the record exists only the delete_tid may be updated.
566c82af904SMatthew Dillon 	 *
567e469566bSMatthew Dillon 	 * If the record does not exist we can create it only if the
568e469566bSMatthew Dillon 	 * create_tid is not too old.  If the create_tid is too old
569e469566bSMatthew Dillon 	 * it may have already been destroyed on the slave from pruning.
570e469566bSMatthew Dillon 	 *
571e469566bSMatthew Dillon 	 * Note that mirror operations are effectively as-of operations
572e469566bSMatthew Dillon 	 * and delete_tid can be 0 for mirroring purposes even if it is
573c82af904SMatthew Dillon 	 * not actually 0 at the originator.
57498da6d8cSMatthew Dillon 	 *
57598da6d8cSMatthew Dillon 	 * These functions can return EDEADLK
576c82af904SMatthew Dillon 	 */
5774c038e17SMatthew Dillon 	cursor->key_beg = mrec->leaf.base;
5784c038e17SMatthew Dillon 	cursor->flags |= HAMMER_CURSOR_BACKEND;
5794c038e17SMatthew Dillon 	cursor->flags &= ~HAMMER_CURSOR_INSERT;
5804c038e17SMatthew Dillon 	error = hammer_btree_lookup(cursor);
581c82af904SMatthew Dillon 
5824c038e17SMatthew Dillon 	if (error == 0 && hammer_mirror_check(cursor, mrec)) {
5834c038e17SMatthew Dillon 		error = hammer_mirror_update(cursor, mrec);
584adf01747SMatthew Dillon 	} else if (error == ENOENT) {
585e469566bSMatthew Dillon 		if (mrec->leaf.base.create_tid >= mirror->tid_beg)
586e469566bSMatthew Dillon 			error = hammer_mirror_write(cursor, mrec, uptr);
587e469566bSMatthew Dillon 		else
588adf01747SMatthew Dillon 			error = 0;
589c82af904SMatthew Dillon 	}
5904c038e17SMatthew Dillon 	if (error == 0 || error == EALREADY)
5914c038e17SMatthew Dillon 		mirror->key_cur = mrec->leaf.base;
5924c038e17SMatthew Dillon 	return(error);
5934c038e17SMatthew Dillon }
594c82af904SMatthew Dillon 
595c82af904SMatthew Dillon /*
5964c038e17SMatthew Dillon  * This works like write_rec but no write or update is necessary,
5974c038e17SMatthew Dillon  * and no data payload is included so we couldn't do a write even
5984c038e17SMatthew Dillon  * if we wanted to.
5994c038e17SMatthew Dillon  *
6004c038e17SMatthew Dillon  * We must still iterate for deletions, and we can validate the
6014c038e17SMatthew Dillon  * record header which is a good way to test for corrupted mirror
6024c038e17SMatthew Dillon  * targets XXX.
6034c038e17SMatthew Dillon  *
6044c038e17SMatthew Dillon  * mirror->key_cur must be carefully set when we succeed in processing
6054c038e17SMatthew Dillon  * this mrec.
606c82af904SMatthew Dillon  */
6074c038e17SMatthew Dillon static
6084c038e17SMatthew Dillon int
6094c038e17SMatthew Dillon hammer_ioc_mirror_write_pass(hammer_cursor_t cursor,
6104c038e17SMatthew Dillon 			     struct hammer_ioc_mrecord_rec *mrec,
6114c038e17SMatthew Dillon 			     struct hammer_ioc_mirror_rw *mirror,
6124c038e17SMatthew Dillon 			     u_int32_t localization)
6134c038e17SMatthew Dillon {
6144c038e17SMatthew Dillon 	hammer_transaction_t trans;
6154c038e17SMatthew Dillon 	u_int32_t rec_crc;
6164c038e17SMatthew Dillon 	int error;
6174c038e17SMatthew Dillon 
6184c038e17SMatthew Dillon 	trans = cursor->trans;
6194c038e17SMatthew Dillon 	rec_crc = crc32(mrec, sizeof(*mrec));
6204c038e17SMatthew Dillon 
6214c038e17SMatthew Dillon 	/*
6224c038e17SMatthew Dillon 	 * Re-localize for target.  Relocalization of data is handled
6234c038e17SMatthew Dillon 	 * by hammer_mirror_write().
6244c038e17SMatthew Dillon 	 */
6254c038e17SMatthew Dillon 	mrec->leaf.base.localization &= HAMMER_LOCALIZE_MASK;
6264c038e17SMatthew Dillon 	mrec->leaf.base.localization += localization;
6274c038e17SMatthew Dillon 
6284c038e17SMatthew Dillon 	/*
6294c038e17SMatthew Dillon 	 * Delete records through until we reach (non-inclusively) the
6304c038e17SMatthew Dillon 	 * target record.
6314c038e17SMatthew Dillon 	 */
6324c038e17SMatthew Dillon 	cursor->key_end = mrec->leaf.base;
6334c038e17SMatthew Dillon 	cursor->flags &= ~HAMMER_CURSOR_END_INCLUSIVE;
6344c038e17SMatthew Dillon 	cursor->flags |= HAMMER_CURSOR_BACKEND;
6354c038e17SMatthew Dillon 
636842e7a70SMatthew Dillon 	error = hammer_mirror_delete_to(cursor, mirror);
6374c038e17SMatthew Dillon 
6384c038e17SMatthew Dillon 	/*
639e469566bSMatthew Dillon 	 * Locate the record and get past it by setting ATEDISK.  Perform
640e469566bSMatthew Dillon 	 * any necessary deletions.  We have no data payload and cannot
641e469566bSMatthew Dillon 	 * create a new record.
6424c038e17SMatthew Dillon 	 */
6434c038e17SMatthew Dillon 	if (error == 0) {
6444c038e17SMatthew Dillon 		mirror->key_cur = mrec->leaf.base;
6454c038e17SMatthew Dillon 		cursor->key_beg = mrec->leaf.base;
6464c038e17SMatthew Dillon 		cursor->flags |= HAMMER_CURSOR_BACKEND;
6474c038e17SMatthew Dillon 		cursor->flags &= ~HAMMER_CURSOR_INSERT;
6484c038e17SMatthew Dillon 		error = hammer_btree_lookup(cursor);
649e469566bSMatthew Dillon 		if (error == 0) {
650e469566bSMatthew Dillon 			if (hammer_mirror_check(cursor, mrec))
651e469566bSMatthew Dillon 				error = hammer_mirror_update(cursor, mrec);
6524c038e17SMatthew Dillon 			cursor->flags |= HAMMER_CURSOR_ATEDISK;
653e469566bSMatthew Dillon 		} else {
6544c038e17SMatthew Dillon 			cursor->flags &= ~HAMMER_CURSOR_ATEDISK;
655e469566bSMatthew Dillon 		}
6564c038e17SMatthew Dillon 		if (error == ENOENT)
6574c038e17SMatthew Dillon 			error = 0;
6584c038e17SMatthew Dillon 	}
6594c038e17SMatthew Dillon 	return(error);
660c82af904SMatthew Dillon }
661adf01747SMatthew Dillon 
6624c038e17SMatthew Dillon /*
6634c038e17SMatthew Dillon  * As part of the mirror write we iterate across swaths of records
6644c038e17SMatthew Dillon  * on the target which no longer exist on the source, and mark them
6654c038e17SMatthew Dillon  * deleted.
666842e7a70SMatthew Dillon  *
667842e7a70SMatthew Dillon  * The caller has indexed the cursor and set up key_end.  We iterate
668842e7a70SMatthew Dillon  * through to key_end.
6694c038e17SMatthew Dillon  */
6704c038e17SMatthew Dillon static
6714c038e17SMatthew Dillon int
672842e7a70SMatthew Dillon hammer_mirror_delete_to(hammer_cursor_t cursor,
6734c038e17SMatthew Dillon 		       struct hammer_ioc_mirror_rw *mirror)
6744c038e17SMatthew Dillon {
675842e7a70SMatthew Dillon 	hammer_btree_leaf_elm_t elm;
67698da6d8cSMatthew Dillon 	int error;
67798da6d8cSMatthew Dillon 
678842e7a70SMatthew Dillon 	error = hammer_btree_iterate(cursor);
679842e7a70SMatthew Dillon 	while (error == 0) {
680842e7a70SMatthew Dillon 		elm = &cursor->node->ondisk->elms[cursor->index].leaf;
681842e7a70SMatthew Dillon 		KKASSERT(elm->base.btype == HAMMER_BTREE_TYPE_RECORD);
6824889cbd4SMatthew Dillon 		cursor->flags |= HAMMER_CURSOR_ATEDISK;
683842e7a70SMatthew Dillon 		if (elm->base.delete_tid == 0) {
684842e7a70SMatthew Dillon 			error = hammer_delete_at_cursor(cursor,
685842e7a70SMatthew Dillon 							HAMMER_DELETE_ADJUST,
686842e7a70SMatthew Dillon 							mirror->tid_end,
687842e7a70SMatthew Dillon 							time_second,
688842e7a70SMatthew Dillon 							1, NULL);
689842e7a70SMatthew Dillon 		}
690842e7a70SMatthew Dillon 		if (error == 0)
691842e7a70SMatthew Dillon 			error = hammer_btree_iterate(cursor);
692842e7a70SMatthew Dillon 	}
693842e7a70SMatthew Dillon 	if (error == ENOENT)
694842e7a70SMatthew Dillon 		error = 0;
695842e7a70SMatthew Dillon 	return(error);
696c82af904SMatthew Dillon }
697c82af904SMatthew Dillon 
698c82af904SMatthew Dillon /*
699c82af904SMatthew Dillon  * Check whether an update is needed in the case where a match already
700c82af904SMatthew Dillon  * exists on the target.  The only type of update allowed in this case
701c82af904SMatthew Dillon  * is an update of the delete_tid.
702c82af904SMatthew Dillon  *
703c82af904SMatthew Dillon  * Return non-zero if the update should proceed.
704c82af904SMatthew Dillon  */
705c82af904SMatthew Dillon static
706c82af904SMatthew Dillon int
7074c038e17SMatthew Dillon hammer_mirror_check(hammer_cursor_t cursor, struct hammer_ioc_mrecord_rec *mrec)
708c82af904SMatthew Dillon {
709c82af904SMatthew Dillon 	hammer_btree_leaf_elm_t leaf = cursor->leaf;
710c82af904SMatthew Dillon 
711c82af904SMatthew Dillon 	if (leaf->base.delete_tid != mrec->leaf.base.delete_tid) {
712ea434b6fSMatthew Dillon 		if (mrec->leaf.base.delete_tid != 0)
713c82af904SMatthew Dillon 			return(1);
714c82af904SMatthew Dillon 	}
715c82af904SMatthew Dillon 	return(0);
716c82af904SMatthew Dillon }
717c82af904SMatthew Dillon 
718c82af904SMatthew Dillon /*
719842e7a70SMatthew Dillon  * Update a record in-place.  Only the delete_tid can change, and
720842e7a70SMatthew Dillon  * only from zero to non-zero.
721c82af904SMatthew Dillon  */
722c82af904SMatthew Dillon static
723c82af904SMatthew Dillon int
7244c038e17SMatthew Dillon hammer_mirror_update(hammer_cursor_t cursor,
7254c038e17SMatthew Dillon 		     struct hammer_ioc_mrecord_rec *mrec)
726c82af904SMatthew Dillon {
72798da6d8cSMatthew Dillon 	int error;
72898da6d8cSMatthew Dillon 
729842e7a70SMatthew Dillon 	/*
730842e7a70SMatthew Dillon 	 * This case shouldn't occur.
731842e7a70SMatthew Dillon 	 */
732842e7a70SMatthew Dillon 	if (mrec->leaf.base.delete_tid == 0)
73306ad81ffSMatthew Dillon 		return(0);
734adf01747SMatthew Dillon 
735adf01747SMatthew Dillon 	/*
736842e7a70SMatthew Dillon 	 * Mark the record deleted on the mirror target.
7374c038e17SMatthew Dillon 	 */
738842e7a70SMatthew Dillon 	error = hammer_delete_at_cursor(cursor, HAMMER_DELETE_ADJUST,
739842e7a70SMatthew Dillon 					mrec->leaf.base.delete_tid,
740842e7a70SMatthew Dillon 					mrec->leaf.delete_ts,
741842e7a70SMatthew Dillon 					1, NULL);
7424c038e17SMatthew Dillon 	cursor->flags |= HAMMER_CURSOR_ATEDISK;
743842e7a70SMatthew Dillon 	return(error);
744c82af904SMatthew Dillon }
745c82af904SMatthew Dillon 
746c82af904SMatthew Dillon /*
747c82af904SMatthew Dillon  * Write out a new record.
748c82af904SMatthew Dillon  */
749c82af904SMatthew Dillon static
750c82af904SMatthew Dillon int
7514c038e17SMatthew Dillon hammer_mirror_write(hammer_cursor_t cursor,
7524c038e17SMatthew Dillon 		    struct hammer_ioc_mrecord_rec *mrec,
7534c038e17SMatthew Dillon 		    char *udata)
754c82af904SMatthew Dillon {
755adf01747SMatthew Dillon 	hammer_transaction_t trans;
756adf01747SMatthew Dillon 	hammer_buffer_t data_buffer;
757c82af904SMatthew Dillon 	hammer_off_t ndata_offset;
758a56cb012SMatthew Dillon 	hammer_tid_t high_tid;
759c82af904SMatthew Dillon 	void *ndata;
760c82af904SMatthew Dillon 	int error;
761602c6cb8SMatthew Dillon 	int doprop;
762c82af904SMatthew Dillon 
763adf01747SMatthew Dillon 	trans = cursor->trans;
764adf01747SMatthew Dillon 	data_buffer = NULL;
765adf01747SMatthew Dillon 
766adf01747SMatthew Dillon 	/*
76798da6d8cSMatthew Dillon 	 * Get the sync lock so the whole mess is atomic
76898da6d8cSMatthew Dillon 	 */
76998da6d8cSMatthew Dillon 	hammer_sync_lock_sh(trans);
77098da6d8cSMatthew Dillon 
77198da6d8cSMatthew Dillon 	/*
772adf01747SMatthew Dillon 	 * Allocate and adjust data
773adf01747SMatthew Dillon 	 */
774c82af904SMatthew Dillon 	if (mrec->leaf.data_len && mrec->leaf.data_offset) {
775adf01747SMatthew Dillon 		ndata = hammer_alloc_data(trans, mrec->leaf.data_len,
776c82af904SMatthew Dillon 					  mrec->leaf.base.rec_type,
777c82af904SMatthew Dillon 					  &ndata_offset, &data_buffer, &error);
778c82af904SMatthew Dillon 		if (ndata == NULL)
779c82af904SMatthew Dillon 			return(error);
780c82af904SMatthew Dillon 		mrec->leaf.data_offset = ndata_offset;
781adf01747SMatthew Dillon 		hammer_modify_buffer(trans, data_buffer, NULL, 0);
782c82af904SMatthew Dillon 		error = copyin(udata, ndata, mrec->leaf.data_len);
783c82af904SMatthew Dillon 		if (error == 0) {
784c82af904SMatthew Dillon 			if (hammer_crc_test_leaf(ndata, &mrec->leaf) == 0) {
785c82af904SMatthew Dillon 				kprintf("data crc mismatch on pipe\n");
786c82af904SMatthew Dillon 				error = EINVAL;
787c82af904SMatthew Dillon 			} else {
788c82af904SMatthew Dillon 				error = hammer_mirror_localize_data(
789c82af904SMatthew Dillon 							ndata, &mrec->leaf);
790c82af904SMatthew Dillon 			}
791c82af904SMatthew Dillon 		}
792c82af904SMatthew Dillon 		hammer_modify_buffer_done(data_buffer);
793c82af904SMatthew Dillon 	} else {
794c82af904SMatthew Dillon 		mrec->leaf.data_offset = 0;
795c82af904SMatthew Dillon 		error = 0;
796c82af904SMatthew Dillon 		ndata = NULL;
797c82af904SMatthew Dillon 	}
798c82af904SMatthew Dillon 	if (error)
799c82af904SMatthew Dillon 		goto failed;
800adf01747SMatthew Dillon 
801adf01747SMatthew Dillon 	/*
8024c038e17SMatthew Dillon 	 * Do the insertion.  This can fail with a EDEADLK or EALREADY
803adf01747SMatthew Dillon 	 */
804c82af904SMatthew Dillon 	cursor->flags |= HAMMER_CURSOR_INSERT;
805c82af904SMatthew Dillon 	error = hammer_btree_lookup(cursor);
806c82af904SMatthew Dillon 	if (error != ENOENT) {
807c82af904SMatthew Dillon 		if (error == 0)
808c82af904SMatthew Dillon 			error = EALREADY;
809c82af904SMatthew Dillon 		goto failed;
810c82af904SMatthew Dillon 	}
811c82af904SMatthew Dillon 
812602c6cb8SMatthew Dillon 	error = hammer_btree_insert(cursor, &mrec->leaf, &doprop);
813adf01747SMatthew Dillon 
814adf01747SMatthew Dillon 	/*
8154c038e17SMatthew Dillon 	 * Cursor is left on the current element, we want to skip it now.
8164c038e17SMatthew Dillon 	 */
8174c038e17SMatthew Dillon 	cursor->flags |= HAMMER_CURSOR_ATEDISK;
8184c038e17SMatthew Dillon 	cursor->flags &= ~HAMMER_CURSOR_INSERT;
8194c038e17SMatthew Dillon 
8204c038e17SMatthew Dillon 	/*
821adf01747SMatthew Dillon 	 * Track a count of active inodes.
822adf01747SMatthew Dillon 	 */
823842e7a70SMatthew Dillon 	if (error == 0 &&
824842e7a70SMatthew Dillon 	    mrec->leaf.base.rec_type == HAMMER_RECTYPE_INODE &&
825842e7a70SMatthew Dillon 	    mrec->leaf.base.delete_tid == 0) {
826adf01747SMatthew Dillon 		hammer_modify_volume_field(trans,
827adf01747SMatthew Dillon 					   trans->rootvol,
828adf01747SMatthew Dillon 					   vol0_stat_inodes);
829adf01747SMatthew Dillon 		++trans->hmp->rootvol->ondisk->vol0_stat_inodes;
830adf01747SMatthew Dillon 		hammer_modify_volume_done(trans->rootvol);
831adf01747SMatthew Dillon 	}
832a56cb012SMatthew Dillon 
833a56cb012SMatthew Dillon 	/*
834a56cb012SMatthew Dillon 	 * vol0_next_tid must track the highest TID stored in the filesystem.
835a56cb012SMatthew Dillon 	 * We do not need to generate undo for this update.
836a56cb012SMatthew Dillon 	 */
837a56cb012SMatthew Dillon 	high_tid = mrec->leaf.base.create_tid;
838a56cb012SMatthew Dillon 	if (high_tid < mrec->leaf.base.delete_tid)
839a56cb012SMatthew Dillon 		high_tid = mrec->leaf.base.delete_tid;
840a56cb012SMatthew Dillon 	if (trans->rootvol->ondisk->vol0_next_tid < high_tid) {
841a56cb012SMatthew Dillon 		hammer_modify_volume(trans, trans->rootvol, NULL, 0);
842a56cb012SMatthew Dillon 		trans->rootvol->ondisk->vol0_next_tid = high_tid;
843a56cb012SMatthew Dillon 		hammer_modify_volume_done(trans->rootvol);
844a56cb012SMatthew Dillon 	}
845a56cb012SMatthew Dillon 
846602c6cb8SMatthew Dillon 	if (error == 0 && doprop)
8474c038e17SMatthew Dillon 		hammer_btree_do_propagation(cursor, NULL, &mrec->leaf);
848c82af904SMatthew Dillon 
849c82af904SMatthew Dillon failed:
850c82af904SMatthew Dillon 	/*
851c82af904SMatthew Dillon 	 * Cleanup
852c82af904SMatthew Dillon 	 */
853c82af904SMatthew Dillon 	if (error && mrec->leaf.data_offset) {
854c82af904SMatthew Dillon 		hammer_blockmap_free(cursor->trans,
855c82af904SMatthew Dillon 				     mrec->leaf.data_offset,
856c82af904SMatthew Dillon 				     mrec->leaf.data_len);
857c82af904SMatthew Dillon 	}
85898da6d8cSMatthew Dillon 	hammer_sync_unlock(trans);
859c82af904SMatthew Dillon 	if (data_buffer)
860c82af904SMatthew Dillon 		hammer_rel_buffer(data_buffer, 0);
861c82af904SMatthew Dillon 	return(error);
862c82af904SMatthew Dillon }
863c82af904SMatthew Dillon 
864c82af904SMatthew Dillon /*
865c82af904SMatthew Dillon  * Localize the data payload.  Directory entries may need their
866c82af904SMatthew Dillon  * localization adjusted.
867c82af904SMatthew Dillon  *
868adf01747SMatthew Dillon  * PFS directory entries must be skipped entirely (return EALREADY).
869c82af904SMatthew Dillon  */
870c82af904SMatthew Dillon static
871c82af904SMatthew Dillon int
872c82af904SMatthew Dillon hammer_mirror_localize_data(hammer_data_ondisk_t data,
873c82af904SMatthew Dillon 			    hammer_btree_leaf_elm_t leaf)
874c82af904SMatthew Dillon {
875c82af904SMatthew Dillon 	u_int32_t localization;
876c82af904SMatthew Dillon 
877c82af904SMatthew Dillon 	if (leaf->base.rec_type == HAMMER_RECTYPE_DIRENTRY) {
878adf01747SMatthew Dillon 		if (data->entry.obj_id == HAMMER_OBJID_ROOT)
879adf01747SMatthew Dillon 			return(EALREADY);
880c82af904SMatthew Dillon 		localization = leaf->base.localization &
881c82af904SMatthew Dillon 			       HAMMER_LOCALIZE_PSEUDOFS_MASK;
882c82af904SMatthew Dillon 		if (data->entry.localization != localization) {
883c82af904SMatthew Dillon 			data->entry.localization = localization;
884c82af904SMatthew Dillon 			hammer_crc_set_leaf(data, leaf);
885adf01747SMatthew Dillon 		}
886adf01747SMatthew Dillon 	}
887adf01747SMatthew Dillon 	return(0);
888c82af904SMatthew Dillon }
889c82af904SMatthew Dillon 
890