1dd94f1b1SMatthew Dillon /* 2dd94f1b1SMatthew Dillon * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3dd94f1b1SMatthew Dillon * 4dd94f1b1SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 5dd94f1b1SMatthew Dillon * by Matthew Dillon <dillon@backplane.com> 6dd94f1b1SMatthew Dillon * 7dd94f1b1SMatthew Dillon * Redistribution and use in source and binary forms, with or without 8dd94f1b1SMatthew Dillon * modification, are permitted provided that the following conditions 9dd94f1b1SMatthew Dillon * are met: 10dd94f1b1SMatthew Dillon * 11dd94f1b1SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 12dd94f1b1SMatthew Dillon * notice, this list of conditions and the following disclaimer. 13dd94f1b1SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 14dd94f1b1SMatthew Dillon * notice, this list of conditions and the following disclaimer in 15dd94f1b1SMatthew Dillon * the documentation and/or other materials provided with the 16dd94f1b1SMatthew Dillon * distribution. 17dd94f1b1SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 18dd94f1b1SMatthew Dillon * contributors may be used to endorse or promote products derived 19dd94f1b1SMatthew Dillon * from this software without specific, prior written permission. 20dd94f1b1SMatthew Dillon * 21dd94f1b1SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22dd94f1b1SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23dd94f1b1SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24dd94f1b1SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25dd94f1b1SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26dd94f1b1SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27dd94f1b1SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28dd94f1b1SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29dd94f1b1SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30dd94f1b1SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31dd94f1b1SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32dd94f1b1SMatthew Dillon * SUCH DAMAGE. 33dd94f1b1SMatthew Dillon * 34e469566bSMatthew Dillon * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.17 2008/07/31 22:30:33 dillon Exp $ 35dd94f1b1SMatthew Dillon */ 36dd94f1b1SMatthew Dillon /* 37dd94f1b1SMatthew Dillon * HAMMER mirroring ioctls - serialize and deserialize modifications made 38dd94f1b1SMatthew Dillon * to a filesystem. 39dd94f1b1SMatthew Dillon */ 40dd94f1b1SMatthew Dillon 41dd94f1b1SMatthew Dillon #include "hammer.h" 42dd94f1b1SMatthew Dillon 43c82af904SMatthew Dillon static int hammer_mirror_check(hammer_cursor_t cursor, 444c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec); 45c82af904SMatthew Dillon static int hammer_mirror_update(hammer_cursor_t cursor, 464c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec); 474c038e17SMatthew Dillon static int hammer_ioc_mirror_write_rec(hammer_cursor_t cursor, 484c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec, 494c038e17SMatthew Dillon struct hammer_ioc_mirror_rw *mirror, 504c038e17SMatthew Dillon u_int32_t localization, 514c038e17SMatthew Dillon char *uptr); 524c038e17SMatthew Dillon static int hammer_ioc_mirror_write_pass(hammer_cursor_t cursor, 534c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec, 544c038e17SMatthew Dillon struct hammer_ioc_mirror_rw *mirror, 554c038e17SMatthew Dillon u_int32_t localization); 564c038e17SMatthew Dillon static int hammer_ioc_mirror_write_skip(hammer_cursor_t cursor, 574c038e17SMatthew Dillon struct hammer_ioc_mrecord_skip *mrec, 584c038e17SMatthew Dillon struct hammer_ioc_mirror_rw *mirror, 594c038e17SMatthew Dillon u_int32_t localization); 60842e7a70SMatthew Dillon static int hammer_mirror_delete_to(hammer_cursor_t cursor, 614c038e17SMatthew Dillon struct hammer_ioc_mirror_rw *mirror); 6283f2a3aaSMatthew Dillon static int hammer_mirror_nomirror(struct hammer_base_elm *base); 63c82af904SMatthew Dillon 64c82af904SMatthew Dillon /* 65c82af904SMatthew Dillon * All B-Tree records within the specified key range which also conform 66c82af904SMatthew Dillon * to the transaction id range are returned. Mirroring code keeps track 67c82af904SMatthew Dillon * of the last transaction id fully scanned and can efficiently pick up 68c82af904SMatthew Dillon * where it left off if interrupted. 69ea434b6fSMatthew Dillon * 70ea434b6fSMatthew Dillon * The PFS is identified in the mirror structure. The passed ip is just 71ea434b6fSMatthew Dillon * some directory in the overall HAMMER filesystem and has nothing to 72ea434b6fSMatthew Dillon * do with the PFS. 73c82af904SMatthew Dillon */ 74dd94f1b1SMatthew Dillon int 75dd94f1b1SMatthew Dillon hammer_ioc_mirror_read(hammer_transaction_t trans, hammer_inode_t ip, 76dd94f1b1SMatthew Dillon struct hammer_ioc_mirror_rw *mirror) 77dd94f1b1SMatthew Dillon { 784c038e17SMatthew Dillon struct hammer_cmirror cmirror; 79dd94f1b1SMatthew Dillon struct hammer_cursor cursor; 804c038e17SMatthew Dillon union hammer_ioc_mrecord_any mrec; 81c82af904SMatthew Dillon hammer_btree_leaf_elm_t elm; 82c82af904SMatthew Dillon const int crc_start = HAMMER_MREC_CRCOFF; 83c82af904SMatthew Dillon char *uptr; 84dd94f1b1SMatthew Dillon int error; 85c82af904SMatthew Dillon int data_len; 86c82af904SMatthew Dillon int bytes; 874c038e17SMatthew Dillon int eatdisk; 884c286c36SMatthew Dillon int mrec_flags; 89ea434b6fSMatthew Dillon u_int32_t localization; 904c038e17SMatthew Dillon u_int32_t rec_crc; 91ea434b6fSMatthew Dillon 92ea434b6fSMatthew Dillon localization = (u_int32_t)mirror->pfs_id << 16; 93dd94f1b1SMatthew Dillon 94dd94f1b1SMatthew Dillon if ((mirror->key_beg.localization | mirror->key_end.localization) & 95dd94f1b1SMatthew Dillon HAMMER_LOCALIZE_PSEUDOFS_MASK) { 96dd94f1b1SMatthew Dillon return(EINVAL); 97dd94f1b1SMatthew Dillon } 98dd94f1b1SMatthew Dillon if (hammer_btree_cmp(&mirror->key_beg, &mirror->key_end) > 0) 99dd94f1b1SMatthew Dillon return(EINVAL); 100dd94f1b1SMatthew Dillon 101dd94f1b1SMatthew Dillon mirror->key_cur = mirror->key_beg; 1024c038e17SMatthew Dillon mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK; 103ea434b6fSMatthew Dillon mirror->key_cur.localization += localization; 104c82af904SMatthew Dillon bzero(&mrec, sizeof(mrec)); 1054c038e17SMatthew Dillon bzero(&cmirror, sizeof(cmirror)); 106dd94f1b1SMatthew Dillon 1074c286c36SMatthew Dillon /* 1084c286c36SMatthew Dillon * Make CRC errors non-fatal (at least on data), causing an EDOM 1094c286c36SMatthew Dillon * error instead of EIO. 1104c286c36SMatthew Dillon */ 1114c286c36SMatthew Dillon trans->flags |= HAMMER_TRANSF_CRCDOM; 1124c286c36SMatthew Dillon 113dd94f1b1SMatthew Dillon retry: 114dd94f1b1SMatthew Dillon error = hammer_init_cursor(trans, &cursor, NULL, NULL); 115dd94f1b1SMatthew Dillon if (error) { 116dd94f1b1SMatthew Dillon hammer_done_cursor(&cursor); 117dd94f1b1SMatthew Dillon goto failed; 118dd94f1b1SMatthew Dillon } 119dd94f1b1SMatthew Dillon cursor.key_beg = mirror->key_cur; 120dd94f1b1SMatthew Dillon cursor.key_end = mirror->key_end; 1214c038e17SMatthew Dillon cursor.key_end.localization &= HAMMER_LOCALIZE_MASK; 122ea434b6fSMatthew Dillon cursor.key_end.localization += localization; 123dd94f1b1SMatthew Dillon 124dd94f1b1SMatthew Dillon cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 125dd94f1b1SMatthew Dillon cursor.flags |= HAMMER_CURSOR_BACKEND; 126dd94f1b1SMatthew Dillon 127dd94f1b1SMatthew Dillon /* 128c82af904SMatthew Dillon * This flag filters the search to only return elements whos create 129c82af904SMatthew Dillon * or delete TID is >= mirror_tid. The B-Tree uses the mirror_tid 130c82af904SMatthew Dillon * field stored with internal and leaf nodes to shortcut the scan. 131dd94f1b1SMatthew Dillon */ 132c82af904SMatthew Dillon cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED; 1334c038e17SMatthew Dillon cursor.cmirror = &cmirror; 1344c038e17SMatthew Dillon cmirror.mirror_tid = mirror->tid_beg; 135dd94f1b1SMatthew Dillon 136dd94f1b1SMatthew Dillon error = hammer_btree_first(&cursor); 137dd94f1b1SMatthew Dillon while (error == 0) { 138dd94f1b1SMatthew Dillon /* 13993291532SMatthew Dillon * Yield to more important tasks 14093291532SMatthew Dillon */ 14193291532SMatthew Dillon if (error == 0) { 14293291532SMatthew Dillon error = hammer_signal_check(trans->hmp); 14393291532SMatthew Dillon if (error) 14493291532SMatthew Dillon break; 14593291532SMatthew Dillon } 14693291532SMatthew Dillon 14793291532SMatthew Dillon /* 1484c038e17SMatthew Dillon * An internal node can be returned in mirror-filtered 1494c038e17SMatthew Dillon * mode and indicates that the scan is returning a skip 1504c038e17SMatthew Dillon * range in the cursor->cmirror structure. 1514c038e17SMatthew Dillon */ 1524c038e17SMatthew Dillon uptr = (char *)mirror->ubuf + mirror->count; 1534c038e17SMatthew Dillon if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) { 1544c038e17SMatthew Dillon /* 1554c038e17SMatthew Dillon * Check space 1564c038e17SMatthew Dillon */ 1574c038e17SMatthew Dillon mirror->key_cur = cmirror.skip_beg; 1584c038e17SMatthew Dillon bytes = sizeof(mrec.skip); 1594c038e17SMatthew Dillon if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) > 1604c038e17SMatthew Dillon mirror->size) { 1614c038e17SMatthew Dillon break; 1624c038e17SMatthew Dillon } 1634c038e17SMatthew Dillon 1644c038e17SMatthew Dillon /* 1654c038e17SMatthew Dillon * Fill mrec 1664c038e17SMatthew Dillon */ 1674c038e17SMatthew Dillon mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE; 1684c038e17SMatthew Dillon mrec.head.type = HAMMER_MREC_TYPE_SKIP; 1694c038e17SMatthew Dillon mrec.head.rec_size = bytes; 1704c038e17SMatthew Dillon mrec.skip.skip_beg = cmirror.skip_beg; 1714c038e17SMatthew Dillon mrec.skip.skip_end = cmirror.skip_end; 1724c038e17SMatthew Dillon mrec.head.rec_crc = crc32(&mrec.head.rec_size, 1734c038e17SMatthew Dillon bytes - crc_start); 1744c038e17SMatthew Dillon error = copyout(&mrec, uptr, bytes); 1754c038e17SMatthew Dillon eatdisk = 0; 1764c038e17SMatthew Dillon goto didwrite; 1774c038e17SMatthew Dillon } 1784c038e17SMatthew Dillon 1794c038e17SMatthew Dillon /* 1804c038e17SMatthew Dillon * Leaf node. In full-history mode we could filter out 1814c038e17SMatthew Dillon * elements modified outside the user-requested TID range. 1824c038e17SMatthew Dillon * 1834c038e17SMatthew Dillon * However, such elements must be returned so the writer 184f96881ffSMatthew Dillon * can compare them against the target to determine what 1854c038e17SMatthew Dillon * needs to be deleted on the target, particular for 1864c038e17SMatthew Dillon * no-history mirrors. 187dd94f1b1SMatthew Dillon */ 188c82af904SMatthew Dillon KKASSERT(cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF); 189c82af904SMatthew Dillon elm = &cursor.node->ondisk->elms[cursor.index].leaf; 190c82af904SMatthew Dillon mirror->key_cur = elm->base; 191dd94f1b1SMatthew Dillon 192e469566bSMatthew Dillon /* 1933324b8cdSMatthew Dillon * If the record was created after our end point we just 1943324b8cdSMatthew Dillon * ignore it. 1953324b8cdSMatthew Dillon */ 1963324b8cdSMatthew Dillon if (elm->base.create_tid > mirror->tid_end) { 1973324b8cdSMatthew Dillon error = 0; 1983324b8cdSMatthew Dillon bytes = 0; 1993324b8cdSMatthew Dillon eatdisk = 1; 2003324b8cdSMatthew Dillon goto didwrite; 2013324b8cdSMatthew Dillon } 2023324b8cdSMatthew Dillon 2033324b8cdSMatthew Dillon /* 204e469566bSMatthew Dillon * Determine if we should generate a PASS or a REC. PASS 205e469566bSMatthew Dillon * records are records without any data payload. Such 206e469566bSMatthew Dillon * records will be generated if the target is already expected 207e469566bSMatthew Dillon * to have the record, allowing it to delete the gaps. 208e469566bSMatthew Dillon * 209e469566bSMatthew Dillon * A PASS record is also used to perform deletions on the 210e469566bSMatthew Dillon * target. 211e469566bSMatthew Dillon * 212e469566bSMatthew Dillon * Such deletions are needed if the master or files on the 213e469566bSMatthew Dillon * master are no-history, or if the slave is so far behind 214e469566bSMatthew Dillon * the master has already been pruned. 215e469566bSMatthew Dillon */ 2163324b8cdSMatthew Dillon if (elm->base.create_tid < mirror->tid_beg) { 2174c038e17SMatthew Dillon bytes = sizeof(mrec.rec); 2184c038e17SMatthew Dillon if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) > 2194c038e17SMatthew Dillon mirror->size) { 2204c038e17SMatthew Dillon break; 2214c038e17SMatthew Dillon } 2224c038e17SMatthew Dillon 2234c038e17SMatthew Dillon /* 224e469566bSMatthew Dillon * Fill mrec. 2254c038e17SMatthew Dillon */ 2264c038e17SMatthew Dillon mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE; 2274c038e17SMatthew Dillon mrec.head.type = HAMMER_MREC_TYPE_PASS; 2284c038e17SMatthew Dillon mrec.head.rec_size = bytes; 2294c038e17SMatthew Dillon mrec.rec.leaf = *elm; 2304c038e17SMatthew Dillon mrec.head.rec_crc = crc32(&mrec.head.rec_size, 2314c038e17SMatthew Dillon bytes - crc_start); 2324c038e17SMatthew Dillon error = copyout(&mrec, uptr, bytes); 2334c038e17SMatthew Dillon eatdisk = 1; 2344c038e17SMatthew Dillon goto didwrite; 2354c038e17SMatthew Dillon 2364c038e17SMatthew Dillon } 2374c038e17SMatthew Dillon 238dd94f1b1SMatthew Dillon /* 239c82af904SMatthew Dillon * The core code exports the data to userland. 2404c286c36SMatthew Dillon * 2414c286c36SMatthew Dillon * CRC errors on data are reported but passed through, 2424c286c36SMatthew Dillon * but the data must be washed by the user program. 243dd94f1b1SMatthew Dillon */ 2444c286c36SMatthew Dillon mrec_flags = 0; 245c82af904SMatthew Dillon data_len = (elm->data_offset) ? elm->data_len : 0; 246c82af904SMatthew Dillon if (data_len) { 247c82af904SMatthew Dillon error = hammer_btree_extract(&cursor, 248c82af904SMatthew Dillon HAMMER_CURSOR_GET_DATA); 2494c286c36SMatthew Dillon if (error) { 2504c286c36SMatthew Dillon if (error != EDOM) 251c82af904SMatthew Dillon break; 2524c286c36SMatthew Dillon mrec_flags |= HAMMER_MRECF_CRC_ERROR | 2534c286c36SMatthew Dillon HAMMER_MRECF_DATA_CRC_BAD; 2544c286c36SMatthew Dillon } 255c82af904SMatthew Dillon } 2564c038e17SMatthew Dillon 2574c038e17SMatthew Dillon bytes = sizeof(mrec.rec) + data_len; 2584c038e17SMatthew Dillon if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) > mirror->size) 259c82af904SMatthew Dillon break; 260c82af904SMatthew Dillon 261c82af904SMatthew Dillon /* 262c82af904SMatthew Dillon * Construct the record for userland and copyout. 263c82af904SMatthew Dillon * 264c82af904SMatthew Dillon * The user is asking for a snapshot, if the record was 265c82af904SMatthew Dillon * deleted beyond the user-requested ending tid, the record 266c82af904SMatthew Dillon * is not considered deleted from the point of view of 267c82af904SMatthew Dillon * userland and delete_tid is cleared. 268c82af904SMatthew Dillon */ 2694c038e17SMatthew Dillon mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE; 2704c286c36SMatthew Dillon mrec.head.type = HAMMER_MREC_TYPE_REC | mrec_flags; 2714c038e17SMatthew Dillon mrec.head.rec_size = bytes; 2724c038e17SMatthew Dillon mrec.rec.leaf = *elm; 2734c286c36SMatthew Dillon 2744889cbd4SMatthew Dillon if (elm->base.delete_tid > mirror->tid_end) 2754c038e17SMatthew Dillon mrec.rec.leaf.base.delete_tid = 0; 2764c038e17SMatthew Dillon rec_crc = crc32(&mrec.head.rec_size, 2774c038e17SMatthew Dillon sizeof(mrec.rec) - crc_start); 2784c038e17SMatthew Dillon if (data_len) 2794c038e17SMatthew Dillon rec_crc = crc32_ext(cursor.data, data_len, rec_crc); 2804c038e17SMatthew Dillon mrec.head.rec_crc = rec_crc; 2814c038e17SMatthew Dillon error = copyout(&mrec, uptr, sizeof(mrec.rec)); 282c82af904SMatthew Dillon if (data_len && error == 0) { 2834c038e17SMatthew Dillon error = copyout(cursor.data, uptr + sizeof(mrec.rec), 284c82af904SMatthew Dillon data_len); 285c82af904SMatthew Dillon } 2864c038e17SMatthew Dillon eatdisk = 1; 2874c038e17SMatthew Dillon 2884c038e17SMatthew Dillon /* 2894c038e17SMatthew Dillon * eatdisk controls whether we skip the current cursor 2904c038e17SMatthew Dillon * position on the next scan or not. If doing a SKIP 2914c038e17SMatthew Dillon * the cursor is already positioned properly for the next 2924c038e17SMatthew Dillon * scan and eatdisk will be 0. 2934c038e17SMatthew Dillon */ 2944c038e17SMatthew Dillon didwrite: 295dd94f1b1SMatthew Dillon if (error == 0) { 2964c038e17SMatthew Dillon mirror->count += HAMMER_HEAD_DOALIGN(bytes); 2974c038e17SMatthew Dillon if (eatdisk) 298dd94f1b1SMatthew Dillon cursor.flags |= HAMMER_CURSOR_ATEDISK; 2994c038e17SMatthew Dillon else 3004c038e17SMatthew Dillon cursor.flags &= ~HAMMER_CURSOR_ATEDISK; 301dd94f1b1SMatthew Dillon error = hammer_btree_iterate(&cursor); 302dd94f1b1SMatthew Dillon } 303dd94f1b1SMatthew Dillon } 304c82af904SMatthew Dillon if (error == ENOENT) { 305c82af904SMatthew Dillon mirror->key_cur = mirror->key_end; 306dd94f1b1SMatthew Dillon error = 0; 307c82af904SMatthew Dillon } 308dd94f1b1SMatthew Dillon hammer_done_cursor(&cursor); 309dd94f1b1SMatthew Dillon if (error == EDEADLK) 310dd94f1b1SMatthew Dillon goto retry; 311dd94f1b1SMatthew Dillon if (error == EINTR) { 312c82af904SMatthew Dillon mirror->head.flags |= HAMMER_IOC_HEAD_INTR; 313dd94f1b1SMatthew Dillon error = 0; 314dd94f1b1SMatthew Dillon } 315dd94f1b1SMatthew Dillon failed: 316dd94f1b1SMatthew Dillon mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK; 317dd94f1b1SMatthew Dillon return(error); 318dd94f1b1SMatthew Dillon } 319dd94f1b1SMatthew Dillon 320c82af904SMatthew Dillon /* 3214c038e17SMatthew Dillon * Copy records from userland to the target mirror. 322602c6cb8SMatthew Dillon * 323ea434b6fSMatthew Dillon * The PFS is identified in the mirror structure. The passed ip is just 324ea434b6fSMatthew Dillon * some directory in the overall HAMMER filesystem and has nothing to 325ea434b6fSMatthew Dillon * do with the PFS. In fact, there might not even be a root directory for 326ea434b6fSMatthew Dillon * the PFS yet! 327c82af904SMatthew Dillon */ 328c82af904SMatthew Dillon int 329c82af904SMatthew Dillon hammer_ioc_mirror_write(hammer_transaction_t trans, hammer_inode_t ip, 330c82af904SMatthew Dillon struct hammer_ioc_mirror_rw *mirror) 331c82af904SMatthew Dillon { 3324c038e17SMatthew Dillon union hammer_ioc_mrecord_any mrec; 333c82af904SMatthew Dillon struct hammer_cursor cursor; 334ea434b6fSMatthew Dillon u_int32_t localization; 33593291532SMatthew Dillon int checkspace_count = 0; 3364c038e17SMatthew Dillon int error; 3374c038e17SMatthew Dillon int bytes; 3384c038e17SMatthew Dillon char *uptr; 33993291532SMatthew Dillon int seq; 340ea434b6fSMatthew Dillon 341ea434b6fSMatthew Dillon localization = (u_int32_t)mirror->pfs_id << 16; 34293291532SMatthew Dillon seq = trans->hmp->flusher.act; 343c82af904SMatthew Dillon 3444c038e17SMatthew Dillon /* 3454c038e17SMatthew Dillon * Validate the mirror structure and relocalize the tracking keys. 3464c038e17SMatthew Dillon */ 347c82af904SMatthew Dillon if (mirror->size < 0 || mirror->size > 0x70000000) 348c82af904SMatthew Dillon return(EINVAL); 3494c038e17SMatthew Dillon mirror->key_beg.localization &= HAMMER_LOCALIZE_MASK; 3504c038e17SMatthew Dillon mirror->key_beg.localization += localization; 3514c038e17SMatthew Dillon mirror->key_end.localization &= HAMMER_LOCALIZE_MASK; 3524c038e17SMatthew Dillon mirror->key_end.localization += localization; 3534c038e17SMatthew Dillon mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK; 3544c038e17SMatthew Dillon mirror->key_cur.localization += localization; 355c82af904SMatthew Dillon 3564c038e17SMatthew Dillon /* 3574c038e17SMatthew Dillon * Set up our tracking cursor for the loop. The tracking cursor 3584c038e17SMatthew Dillon * is used to delete records that are no longer present on the 3594c038e17SMatthew Dillon * master. The last handled record at key_cur must be skipped. 3604c038e17SMatthew Dillon */ 361c82af904SMatthew Dillon error = hammer_init_cursor(trans, &cursor, NULL, NULL); 362c82af904SMatthew Dillon 3634c038e17SMatthew Dillon cursor.key_beg = mirror->key_cur; 3644c038e17SMatthew Dillon cursor.key_end = mirror->key_end; 3654c038e17SMatthew Dillon cursor.flags |= HAMMER_CURSOR_BACKEND; 3664c038e17SMatthew Dillon error = hammer_btree_first(&cursor); 3674c038e17SMatthew Dillon if (error == 0) 3684c038e17SMatthew Dillon cursor.flags |= HAMMER_CURSOR_ATEDISK; 3694c038e17SMatthew Dillon if (error == ENOENT) 3704c038e17SMatthew Dillon error = 0; 3714c038e17SMatthew Dillon 3724c038e17SMatthew Dillon /* 3734c038e17SMatthew Dillon * Loop until our input buffer has been exhausted. 3744c038e17SMatthew Dillon */ 3754c038e17SMatthew Dillon while (error == 0 && 3764c038e17SMatthew Dillon mirror->count + sizeof(mrec.head) <= mirror->size) { 3774c038e17SMatthew Dillon 378c82af904SMatthew Dillon /* 37993291532SMatthew Dillon * Don't blow out the buffer cache. Leave room for frontend 38093291532SMatthew Dillon * cache as well. 381c9ce54d6SMatthew Dillon * 382c9ce54d6SMatthew Dillon * WARNING: See warnings in hammer_unlock_cursor() function. 38393291532SMatthew Dillon */ 38415e75dabSMatthew Dillon while (hammer_flusher_meta_halflimit(trans->hmp) || 3857a61b85dSMatthew Dillon hammer_flusher_undo_exhausted(trans, 2)) { 386982be4bfSMatthew Dillon hammer_unlock_cursor(&cursor); 38793291532SMatthew Dillon hammer_flusher_wait(trans->hmp, seq); 388982be4bfSMatthew Dillon hammer_lock_cursor(&cursor); 38915e75dabSMatthew Dillon seq = hammer_flusher_async_one(trans->hmp); 39093291532SMatthew Dillon } 39193291532SMatthew Dillon 39293291532SMatthew Dillon /* 39393291532SMatthew Dillon * If there is insufficient free space it may be due to 39493291532SMatthew Dillon * reserved bigblocks, which flushing might fix. 39593291532SMatthew Dillon */ 39693291532SMatthew Dillon if (hammer_checkspace(trans->hmp, HAMMER_CHKSPC_MIRROR)) { 39793291532SMatthew Dillon if (++checkspace_count == 10) { 39893291532SMatthew Dillon error = ENOSPC; 39993291532SMatthew Dillon break; 40093291532SMatthew Dillon } 401982be4bfSMatthew Dillon hammer_unlock_cursor(&cursor); 40293291532SMatthew Dillon hammer_flusher_wait(trans->hmp, seq); 403982be4bfSMatthew Dillon hammer_lock_cursor(&cursor); 4047a61b85dSMatthew Dillon seq = hammer_flusher_async(trans->hmp, NULL); 40593291532SMatthew Dillon } 40693291532SMatthew Dillon 40793291532SMatthew Dillon 40893291532SMatthew Dillon /* 409c82af904SMatthew Dillon * Acquire and validate header 410c82af904SMatthew Dillon */ 4114c038e17SMatthew Dillon if ((bytes = mirror->size - mirror->count) > sizeof(mrec)) 4124c038e17SMatthew Dillon bytes = sizeof(mrec); 413c82af904SMatthew Dillon uptr = (char *)mirror->ubuf + mirror->count; 4144c038e17SMatthew Dillon error = copyin(uptr, &mrec, bytes); 415c82af904SMatthew Dillon if (error) 416c82af904SMatthew Dillon break; 4174c038e17SMatthew Dillon if (mrec.head.signature != HAMMER_IOC_MIRROR_SIGNATURE) { 418c82af904SMatthew Dillon error = EINVAL; 419c82af904SMatthew Dillon break; 420c82af904SMatthew Dillon } 4214c038e17SMatthew Dillon if (mrec.head.rec_size < sizeof(mrec.head) || 4224c038e17SMatthew Dillon mrec.head.rec_size > sizeof(mrec) + HAMMER_XBUFSIZE || 4234c038e17SMatthew Dillon mirror->count + mrec.head.rec_size > mirror->size) { 4245fa5c92fSMatthew Dillon error = EINVAL; 4255fa5c92fSMatthew Dillon break; 4265fa5c92fSMatthew Dillon } 4274c038e17SMatthew Dillon 4284c286c36SMatthew Dillon switch(mrec.head.type & HAMMER_MRECF_TYPE_MASK) { 4294c038e17SMatthew Dillon case HAMMER_MREC_TYPE_SKIP: 4304c038e17SMatthew Dillon if (mrec.head.rec_size != sizeof(mrec.skip)) 4314c038e17SMatthew Dillon error = EINVAL; 4324c038e17SMatthew Dillon if (error == 0) 4334c038e17SMatthew Dillon error = hammer_ioc_mirror_write_skip(&cursor, &mrec.skip, mirror, localization); 4344c038e17SMatthew Dillon break; 4354c038e17SMatthew Dillon case HAMMER_MREC_TYPE_REC: 4364c038e17SMatthew Dillon if (mrec.head.rec_size < sizeof(mrec.rec)) 4374c038e17SMatthew Dillon error = EINVAL; 4384c038e17SMatthew Dillon if (error == 0) 4394c038e17SMatthew Dillon error = hammer_ioc_mirror_write_rec(&cursor, &mrec.rec, mirror, localization, uptr + sizeof(mrec.rec)); 4404c038e17SMatthew Dillon break; 4414c286c36SMatthew Dillon case HAMMER_MREC_TYPE_REC_BADCRC: 4424c286c36SMatthew Dillon /* 4434c286c36SMatthew Dillon * Records with bad data payloads are ignored XXX. 4444c286c36SMatthew Dillon */ 4454c286c36SMatthew Dillon if (mrec.head.rec_size < sizeof(mrec.rec)) 4464c286c36SMatthew Dillon error = EINVAL; 4474c286c36SMatthew Dillon break; 4484c038e17SMatthew Dillon case HAMMER_MREC_TYPE_PASS: 4494c038e17SMatthew Dillon if (mrec.head.rec_size != sizeof(mrec.rec)) 4504c038e17SMatthew Dillon error = EINVAL; 4514c038e17SMatthew Dillon if (error == 0) 4524c038e17SMatthew Dillon error = hammer_ioc_mirror_write_pass(&cursor, &mrec.rec, mirror, localization); 4534c038e17SMatthew Dillon break; 4544c038e17SMatthew Dillon default: 455c82af904SMatthew Dillon error = EINVAL; 456c82af904SMatthew Dillon break; 457c82af904SMatthew Dillon } 4584c038e17SMatthew Dillon 4594c038e17SMatthew Dillon /* 4604c038e17SMatthew Dillon * Retry the current record on deadlock, otherwise setup 4614c038e17SMatthew Dillon * for the next loop. 4624c038e17SMatthew Dillon */ 4634c038e17SMatthew Dillon if (error == EDEADLK) { 4644c038e17SMatthew Dillon while (error == EDEADLK) { 465*f3a4893bSMatthew Dillon hammer_sync_lock_sh(trans); 4664c038e17SMatthew Dillon hammer_recover_cursor(&cursor); 4674c038e17SMatthew Dillon error = hammer_cursor_upgrade(&cursor); 468*f3a4893bSMatthew Dillon hammer_sync_unlock(trans); 469c82af904SMatthew Dillon } 4704c038e17SMatthew Dillon } else { 4714c038e17SMatthew Dillon if (error == EALREADY) 4724c038e17SMatthew Dillon error = 0; 4734c038e17SMatthew Dillon if (error == 0) { 4744c038e17SMatthew Dillon mirror->count += 4754c038e17SMatthew Dillon HAMMER_HEAD_DOALIGN(mrec.head.rec_size); 4764c038e17SMatthew Dillon } 4774c038e17SMatthew Dillon } 4784c038e17SMatthew Dillon } 4794c038e17SMatthew Dillon hammer_done_cursor(&cursor); 4804c038e17SMatthew Dillon 4814c038e17SMatthew Dillon /* 4824c038e17SMatthew Dillon * cumulative error 4834c038e17SMatthew Dillon */ 4844c038e17SMatthew Dillon if (error) { 4854c038e17SMatthew Dillon mirror->head.flags |= HAMMER_IOC_HEAD_ERROR; 4864c038e17SMatthew Dillon mirror->head.error = error; 4874c038e17SMatthew Dillon } 4884c038e17SMatthew Dillon 4894c038e17SMatthew Dillon /* 4904c038e17SMatthew Dillon * ioctls don't update the RW data structure if an error is returned, 4914c038e17SMatthew Dillon * always return 0. 4924c038e17SMatthew Dillon */ 4934c038e17SMatthew Dillon return(0); 4944c038e17SMatthew Dillon } 4954c038e17SMatthew Dillon 4964c038e17SMatthew Dillon /* 4974c038e17SMatthew Dillon * Handle skip records. 4984c038e17SMatthew Dillon * 4994c038e17SMatthew Dillon * We must iterate from the last resolved record position at mirror->key_cur 5003324b8cdSMatthew Dillon * to skip_beg non-inclusive and delete any records encountered. 5014c038e17SMatthew Dillon * 5024c038e17SMatthew Dillon * mirror->key_cur must be carefully set when we succeed in processing 5034c038e17SMatthew Dillon * this mrec. 5044c038e17SMatthew Dillon */ 5054c038e17SMatthew Dillon static int 5064c038e17SMatthew Dillon hammer_ioc_mirror_write_skip(hammer_cursor_t cursor, 5074c038e17SMatthew Dillon struct hammer_ioc_mrecord_skip *mrec, 5084c038e17SMatthew Dillon struct hammer_ioc_mirror_rw *mirror, 5094c038e17SMatthew Dillon u_int32_t localization) 5104c038e17SMatthew Dillon { 5114c038e17SMatthew Dillon int error; 5124c038e17SMatthew Dillon 5134c038e17SMatthew Dillon /* 5144c038e17SMatthew Dillon * Relocalize the skip range 5154c038e17SMatthew Dillon */ 5164c038e17SMatthew Dillon mrec->skip_beg.localization &= HAMMER_LOCALIZE_MASK; 5174c038e17SMatthew Dillon mrec->skip_beg.localization += localization; 5184c038e17SMatthew Dillon mrec->skip_end.localization &= HAMMER_LOCALIZE_MASK; 5194c038e17SMatthew Dillon mrec->skip_end.localization += localization; 5204c038e17SMatthew Dillon 5214c038e17SMatthew Dillon /* 5224c038e17SMatthew Dillon * Iterate from current position to skip_beg, deleting any records 5233324b8cdSMatthew Dillon * we encounter. The record at skip_beg is not included (it is 5243324b8cdSMatthew Dillon * skipped). 5254c038e17SMatthew Dillon */ 5264c038e17SMatthew Dillon cursor->key_end = mrec->skip_beg; 5273324b8cdSMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_END_INCLUSIVE; 5284c038e17SMatthew Dillon cursor->flags |= HAMMER_CURSOR_BACKEND; 529842e7a70SMatthew Dillon error = hammer_mirror_delete_to(cursor, mirror); 5304c038e17SMatthew Dillon 5314c038e17SMatthew Dillon /* 5324c038e17SMatthew Dillon * Now skip past the skip (which is the whole point point of 5334c038e17SMatthew Dillon * having a skip record). The sender has not sent us any records 5344c038e17SMatthew Dillon * for the skip area so we wouldn't know what to keep and what 5354c038e17SMatthew Dillon * to delete anyway. 5364c038e17SMatthew Dillon * 5374c038e17SMatthew Dillon * Clear ATEDISK because skip_end is non-inclusive, so we can't 5384c038e17SMatthew Dillon * count an exact match if we happened to get one. 5394c038e17SMatthew Dillon */ 5404c038e17SMatthew Dillon if (error == 0) { 5414c038e17SMatthew Dillon mirror->key_cur = mrec->skip_end; 5424c038e17SMatthew Dillon cursor->key_beg = mrec->skip_end; 5434c038e17SMatthew Dillon error = hammer_btree_lookup(cursor); 5444c038e17SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_ATEDISK; 5454c038e17SMatthew Dillon if (error == ENOENT) 5464c038e17SMatthew Dillon error = 0; 5474c038e17SMatthew Dillon } 5484c038e17SMatthew Dillon return(error); 5494c038e17SMatthew Dillon } 5504c038e17SMatthew Dillon 5514c038e17SMatthew Dillon /* 5524c038e17SMatthew Dillon * Handle B-Tree records. 5534c038e17SMatthew Dillon * 5544c038e17SMatthew Dillon * We must iterate to mrec->base.key (non-inclusively), and then process 5554c038e17SMatthew Dillon * the record. We are allowed to write a new record or delete an existing 5564c038e17SMatthew Dillon * record, but cannot replace an existing record. 5574c038e17SMatthew Dillon * 5584c038e17SMatthew Dillon * mirror->key_cur must be carefully set when we succeed in processing 5594c038e17SMatthew Dillon * this mrec. 5604c038e17SMatthew Dillon */ 5614c038e17SMatthew Dillon static int 5624c038e17SMatthew Dillon hammer_ioc_mirror_write_rec(hammer_cursor_t cursor, 5634c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec, 5644c038e17SMatthew Dillon struct hammer_ioc_mirror_rw *mirror, 5654c038e17SMatthew Dillon u_int32_t localization, 5664c038e17SMatthew Dillon char *uptr) 5674c038e17SMatthew Dillon { 5684c038e17SMatthew Dillon hammer_transaction_t trans; 5694c038e17SMatthew Dillon u_int32_t rec_crc; 5704c038e17SMatthew Dillon int error; 5714c038e17SMatthew Dillon 5724c038e17SMatthew Dillon trans = cursor->trans; 5734c038e17SMatthew Dillon rec_crc = crc32(mrec, sizeof(*mrec)); 5744c038e17SMatthew Dillon 5754c038e17SMatthew Dillon if (mrec->leaf.data_len < 0 || 5764c038e17SMatthew Dillon mrec->leaf.data_len > HAMMER_XBUFSIZE || 5774c038e17SMatthew Dillon mrec->leaf.data_len + sizeof(*mrec) > mrec->head.rec_size) { 5784c038e17SMatthew Dillon return(EINVAL); 579c82af904SMatthew Dillon } 580c82af904SMatthew Dillon 581c82af904SMatthew Dillon /* 582c82af904SMatthew Dillon * Re-localize for target. relocalization of data is handled 583c82af904SMatthew Dillon * by hammer_mirror_write(). 584c82af904SMatthew Dillon */ 5854c038e17SMatthew Dillon mrec->leaf.base.localization &= HAMMER_LOCALIZE_MASK; 5864c038e17SMatthew Dillon mrec->leaf.base.localization += localization; 5874c038e17SMatthew Dillon 5884c038e17SMatthew Dillon /* 5894c038e17SMatthew Dillon * Delete records through until we reach (non-inclusively) the 5904c038e17SMatthew Dillon * target record. 5914c038e17SMatthew Dillon */ 5924c038e17SMatthew Dillon cursor->key_end = mrec->leaf.base; 5934c038e17SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_END_INCLUSIVE; 5944c038e17SMatthew Dillon cursor->flags |= HAMMER_CURSOR_BACKEND; 595842e7a70SMatthew Dillon error = hammer_mirror_delete_to(cursor, mirror); 596c82af904SMatthew Dillon 597c82af904SMatthew Dillon /* 59883f2a3aaSMatthew Dillon * Certain records are not part of the mirroring operation 59983f2a3aaSMatthew Dillon */ 60083f2a3aaSMatthew Dillon if (hammer_mirror_nomirror(&mrec->leaf.base)) 60183f2a3aaSMatthew Dillon return(0); 60283f2a3aaSMatthew Dillon 60383f2a3aaSMatthew Dillon /* 604c82af904SMatthew Dillon * Locate the record. 605c82af904SMatthew Dillon * 606c82af904SMatthew Dillon * If the record exists only the delete_tid may be updated. 607c82af904SMatthew Dillon * 608e469566bSMatthew Dillon * If the record does not exist we can create it only if the 609e469566bSMatthew Dillon * create_tid is not too old. If the create_tid is too old 610e469566bSMatthew Dillon * it may have already been destroyed on the slave from pruning. 611e469566bSMatthew Dillon * 612e469566bSMatthew Dillon * Note that mirror operations are effectively as-of operations 613e469566bSMatthew Dillon * and delete_tid can be 0 for mirroring purposes even if it is 614c82af904SMatthew Dillon * not actually 0 at the originator. 61598da6d8cSMatthew Dillon * 61698da6d8cSMatthew Dillon * These functions can return EDEADLK 617c82af904SMatthew Dillon */ 6184c038e17SMatthew Dillon cursor->key_beg = mrec->leaf.base; 6194c038e17SMatthew Dillon cursor->flags |= HAMMER_CURSOR_BACKEND; 6204c038e17SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_INSERT; 6214c038e17SMatthew Dillon error = hammer_btree_lookup(cursor); 622c82af904SMatthew Dillon 6234c038e17SMatthew Dillon if (error == 0 && hammer_mirror_check(cursor, mrec)) { 6244c038e17SMatthew Dillon error = hammer_mirror_update(cursor, mrec); 625adf01747SMatthew Dillon } else if (error == ENOENT) { 62683f2a3aaSMatthew Dillon if (mrec->leaf.base.create_tid >= mirror->tid_beg) { 62783f2a3aaSMatthew Dillon error = hammer_create_at_cursor( 62883f2a3aaSMatthew Dillon cursor, &mrec->leaf, 62983f2a3aaSMatthew Dillon uptr, HAMMER_CREATE_MODE_UMIRROR); 63083f2a3aaSMatthew Dillon } else { 631adf01747SMatthew Dillon error = 0; 632c82af904SMatthew Dillon } 63383f2a3aaSMatthew Dillon } 6344c038e17SMatthew Dillon if (error == 0 || error == EALREADY) 6354c038e17SMatthew Dillon mirror->key_cur = mrec->leaf.base; 6364c038e17SMatthew Dillon return(error); 6374c038e17SMatthew Dillon } 638c82af904SMatthew Dillon 639c82af904SMatthew Dillon /* 6404c038e17SMatthew Dillon * This works like write_rec but no write or update is necessary, 6414c038e17SMatthew Dillon * and no data payload is included so we couldn't do a write even 6424c038e17SMatthew Dillon * if we wanted to. 6434c038e17SMatthew Dillon * 6444c038e17SMatthew Dillon * We must still iterate for deletions, and we can validate the 6454c038e17SMatthew Dillon * record header which is a good way to test for corrupted mirror 6464c038e17SMatthew Dillon * targets XXX. 6474c038e17SMatthew Dillon * 6484c038e17SMatthew Dillon * mirror->key_cur must be carefully set when we succeed in processing 6494c038e17SMatthew Dillon * this mrec. 650c82af904SMatthew Dillon */ 6514c038e17SMatthew Dillon static 6524c038e17SMatthew Dillon int 6534c038e17SMatthew Dillon hammer_ioc_mirror_write_pass(hammer_cursor_t cursor, 6544c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec, 6554c038e17SMatthew Dillon struct hammer_ioc_mirror_rw *mirror, 6564c038e17SMatthew Dillon u_int32_t localization) 6574c038e17SMatthew Dillon { 6584c038e17SMatthew Dillon hammer_transaction_t trans; 6594c038e17SMatthew Dillon u_int32_t rec_crc; 6604c038e17SMatthew Dillon int error; 6614c038e17SMatthew Dillon 6624c038e17SMatthew Dillon trans = cursor->trans; 6634c038e17SMatthew Dillon rec_crc = crc32(mrec, sizeof(*mrec)); 6644c038e17SMatthew Dillon 6654c038e17SMatthew Dillon /* 6664c038e17SMatthew Dillon * Re-localize for target. Relocalization of data is handled 6674c038e17SMatthew Dillon * by hammer_mirror_write(). 6684c038e17SMatthew Dillon */ 6694c038e17SMatthew Dillon mrec->leaf.base.localization &= HAMMER_LOCALIZE_MASK; 6704c038e17SMatthew Dillon mrec->leaf.base.localization += localization; 6714c038e17SMatthew Dillon 6724c038e17SMatthew Dillon /* 6734c038e17SMatthew Dillon * Delete records through until we reach (non-inclusively) the 6744c038e17SMatthew Dillon * target record. 6754c038e17SMatthew Dillon */ 6764c038e17SMatthew Dillon cursor->key_end = mrec->leaf.base; 6774c038e17SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_END_INCLUSIVE; 6784c038e17SMatthew Dillon cursor->flags |= HAMMER_CURSOR_BACKEND; 679842e7a70SMatthew Dillon error = hammer_mirror_delete_to(cursor, mirror); 6804c038e17SMatthew Dillon 6814c038e17SMatthew Dillon /* 68283f2a3aaSMatthew Dillon * Certain records are not part of the mirroring operation 68383f2a3aaSMatthew Dillon */ 68483f2a3aaSMatthew Dillon if (hammer_mirror_nomirror(&mrec->leaf.base)) 68583f2a3aaSMatthew Dillon return(0); 68683f2a3aaSMatthew Dillon 68783f2a3aaSMatthew Dillon /* 688e469566bSMatthew Dillon * Locate the record and get past it by setting ATEDISK. Perform 689e469566bSMatthew Dillon * any necessary deletions. We have no data payload and cannot 690e469566bSMatthew Dillon * create a new record. 6914c038e17SMatthew Dillon */ 6924c038e17SMatthew Dillon if (error == 0) { 6934c038e17SMatthew Dillon mirror->key_cur = mrec->leaf.base; 6944c038e17SMatthew Dillon cursor->key_beg = mrec->leaf.base; 6954c038e17SMatthew Dillon cursor->flags |= HAMMER_CURSOR_BACKEND; 6964c038e17SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_INSERT; 6974c038e17SMatthew Dillon error = hammer_btree_lookup(cursor); 698e469566bSMatthew Dillon if (error == 0) { 699e469566bSMatthew Dillon if (hammer_mirror_check(cursor, mrec)) 700e469566bSMatthew Dillon error = hammer_mirror_update(cursor, mrec); 7014c038e17SMatthew Dillon cursor->flags |= HAMMER_CURSOR_ATEDISK; 702e469566bSMatthew Dillon } else { 7034c038e17SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_ATEDISK; 704e469566bSMatthew Dillon } 7054c038e17SMatthew Dillon if (error == ENOENT) 7064c038e17SMatthew Dillon error = 0; 7074c038e17SMatthew Dillon } 7084c038e17SMatthew Dillon return(error); 709c82af904SMatthew Dillon } 710adf01747SMatthew Dillon 7114c038e17SMatthew Dillon /* 7124c038e17SMatthew Dillon * As part of the mirror write we iterate across swaths of records 7134c038e17SMatthew Dillon * on the target which no longer exist on the source, and mark them 7144c038e17SMatthew Dillon * deleted. 715842e7a70SMatthew Dillon * 716842e7a70SMatthew Dillon * The caller has indexed the cursor and set up key_end. We iterate 717842e7a70SMatthew Dillon * through to key_end. 718f96881ffSMatthew Dillon * 719f96881ffSMatthew Dillon * There is an edge case where the master has deleted a record whos 720f96881ffSMatthew Dillon * create_tid exactly matches our end_tid. We cannot delete this 721f96881ffSMatthew Dillon * record on the slave yet because we cannot assign delete_tid == create_tid. 722f96881ffSMatthew Dillon * The deletion should be picked up on the next sequence since in order 723f96881ffSMatthew Dillon * to have been deleted on the master a transaction must have occured with 724f96881ffSMatthew Dillon * a TID greater then the create_tid of the record. 7253324b8cdSMatthew Dillon * 7263324b8cdSMatthew Dillon * To support incremental re-mirroring, just for robustness, we do not 7273324b8cdSMatthew Dillon * touch any records created beyond (or equal to) mirror->tid_end. 7284c038e17SMatthew Dillon */ 7294c038e17SMatthew Dillon static 7304c038e17SMatthew Dillon int 731842e7a70SMatthew Dillon hammer_mirror_delete_to(hammer_cursor_t cursor, 7324c038e17SMatthew Dillon struct hammer_ioc_mirror_rw *mirror) 7334c038e17SMatthew Dillon { 734842e7a70SMatthew Dillon hammer_btree_leaf_elm_t elm; 73598da6d8cSMatthew Dillon int error; 73698da6d8cSMatthew Dillon 737842e7a70SMatthew Dillon error = hammer_btree_iterate(cursor); 738842e7a70SMatthew Dillon while (error == 0) { 739842e7a70SMatthew Dillon elm = &cursor->node->ondisk->elms[cursor->index].leaf; 740842e7a70SMatthew Dillon KKASSERT(elm->base.btype == HAMMER_BTREE_TYPE_RECORD); 7414889cbd4SMatthew Dillon cursor->flags |= HAMMER_CURSOR_ATEDISK; 7423324b8cdSMatthew Dillon 7433324b8cdSMatthew Dillon /* 74483f2a3aaSMatthew Dillon * Certain records are not part of the mirroring operation 74583f2a3aaSMatthew Dillon */ 74683f2a3aaSMatthew Dillon if (hammer_mirror_nomirror(&elm->base)) { 74783f2a3aaSMatthew Dillon error = hammer_btree_iterate(cursor); 74883f2a3aaSMatthew Dillon continue; 74983f2a3aaSMatthew Dillon } 75083f2a3aaSMatthew Dillon 75183f2a3aaSMatthew Dillon /* 7523324b8cdSMatthew Dillon * Note: Must still delete records with create_tid < tid_beg, 7533324b8cdSMatthew Dillon * as record may have been pruned-away on source. 7543324b8cdSMatthew Dillon */ 755f96881ffSMatthew Dillon if (elm->base.delete_tid == 0 && 7563324b8cdSMatthew Dillon elm->base.create_tid < mirror->tid_end) { 757842e7a70SMatthew Dillon error = hammer_delete_at_cursor(cursor, 758842e7a70SMatthew Dillon HAMMER_DELETE_ADJUST, 759842e7a70SMatthew Dillon mirror->tid_end, 760842e7a70SMatthew Dillon time_second, 761842e7a70SMatthew Dillon 1, NULL); 762842e7a70SMatthew Dillon } 763842e7a70SMatthew Dillon if (error == 0) 764842e7a70SMatthew Dillon error = hammer_btree_iterate(cursor); 765842e7a70SMatthew Dillon } 766842e7a70SMatthew Dillon if (error == ENOENT) 767842e7a70SMatthew Dillon error = 0; 768842e7a70SMatthew Dillon return(error); 769c82af904SMatthew Dillon } 770c82af904SMatthew Dillon 771c82af904SMatthew Dillon /* 772c82af904SMatthew Dillon * Check whether an update is needed in the case where a match already 773c82af904SMatthew Dillon * exists on the target. The only type of update allowed in this case 774c82af904SMatthew Dillon * is an update of the delete_tid. 775c82af904SMatthew Dillon * 776c82af904SMatthew Dillon * Return non-zero if the update should proceed. 777c82af904SMatthew Dillon */ 778c82af904SMatthew Dillon static 779c82af904SMatthew Dillon int 7804c038e17SMatthew Dillon hammer_mirror_check(hammer_cursor_t cursor, struct hammer_ioc_mrecord_rec *mrec) 781c82af904SMatthew Dillon { 782c82af904SMatthew Dillon hammer_btree_leaf_elm_t leaf = cursor->leaf; 783c82af904SMatthew Dillon 784c82af904SMatthew Dillon if (leaf->base.delete_tid != mrec->leaf.base.delete_tid) { 785ea434b6fSMatthew Dillon if (mrec->leaf.base.delete_tid != 0) 786c82af904SMatthew Dillon return(1); 787c82af904SMatthew Dillon } 788c82af904SMatthew Dillon return(0); 789c82af904SMatthew Dillon } 790c82af904SMatthew Dillon 791c82af904SMatthew Dillon /* 79283f2a3aaSMatthew Dillon * Filter out records which are never mirrored, such as configuration space 79383f2a3aaSMatthew Dillon * records (for hammer cleanup). 79483f2a3aaSMatthew Dillon * 79583f2a3aaSMatthew Dillon * NOTE: We currently allow HAMMER_RECTYPE_SNAPSHOT records to be mirrored. 79683f2a3aaSMatthew Dillon */ 79783f2a3aaSMatthew Dillon static 79883f2a3aaSMatthew Dillon int 79983f2a3aaSMatthew Dillon hammer_mirror_nomirror(struct hammer_base_elm *base) 80083f2a3aaSMatthew Dillon { 80183f2a3aaSMatthew Dillon /* 80283f2a3aaSMatthew Dillon * Certain types of records are never updated when mirroring. 80383f2a3aaSMatthew Dillon * Slaves have their own configuration space. 80483f2a3aaSMatthew Dillon */ 80583f2a3aaSMatthew Dillon if (base->rec_type == HAMMER_RECTYPE_CONFIG) 80683f2a3aaSMatthew Dillon return(1); 80783f2a3aaSMatthew Dillon return(0); 80883f2a3aaSMatthew Dillon } 80983f2a3aaSMatthew Dillon 81083f2a3aaSMatthew Dillon 81183f2a3aaSMatthew Dillon /* 812842e7a70SMatthew Dillon * Update a record in-place. Only the delete_tid can change, and 813842e7a70SMatthew Dillon * only from zero to non-zero. 814c82af904SMatthew Dillon */ 815c82af904SMatthew Dillon static 816c82af904SMatthew Dillon int 8174c038e17SMatthew Dillon hammer_mirror_update(hammer_cursor_t cursor, 8184c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec) 819c82af904SMatthew Dillon { 82098da6d8cSMatthew Dillon int error; 82198da6d8cSMatthew Dillon 822842e7a70SMatthew Dillon /* 823842e7a70SMatthew Dillon * This case shouldn't occur. 824842e7a70SMatthew Dillon */ 825842e7a70SMatthew Dillon if (mrec->leaf.base.delete_tid == 0) 82606ad81ffSMatthew Dillon return(0); 827adf01747SMatthew Dillon 828adf01747SMatthew Dillon /* 829842e7a70SMatthew Dillon * Mark the record deleted on the mirror target. 8304c038e17SMatthew Dillon */ 831842e7a70SMatthew Dillon error = hammer_delete_at_cursor(cursor, HAMMER_DELETE_ADJUST, 832842e7a70SMatthew Dillon mrec->leaf.base.delete_tid, 833842e7a70SMatthew Dillon mrec->leaf.delete_ts, 834842e7a70SMatthew Dillon 1, NULL); 8354c038e17SMatthew Dillon cursor->flags |= HAMMER_CURSOR_ATEDISK; 836842e7a70SMatthew Dillon return(error); 837c82af904SMatthew Dillon } 838c82af904SMatthew Dillon 83983f2a3aaSMatthew Dillon #if 0 84083f2a3aaSMatthew Dillon /* 84183f2a3aaSMatthew Dillon * MOVED TO HAMMER_OBJECT.C: hammer_create_at_cursor() 84283f2a3aaSMatthew Dillon */ 84383f2a3aaSMatthew Dillon 84483f2a3aaSMatthew Dillon static int hammer_mirror_localize_data(hammer_data_ondisk_t data, 84583f2a3aaSMatthew Dillon hammer_btree_leaf_elm_t leaf); 84683f2a3aaSMatthew Dillon 847c82af904SMatthew Dillon /* 848c82af904SMatthew Dillon * Write out a new record. 849c82af904SMatthew Dillon */ 850c82af904SMatthew Dillon static 851c82af904SMatthew Dillon int 8524c038e17SMatthew Dillon hammer_mirror_write(hammer_cursor_t cursor, 8534c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec, 8544c038e17SMatthew Dillon char *udata) 855c82af904SMatthew Dillon { 856adf01747SMatthew Dillon hammer_transaction_t trans; 857adf01747SMatthew Dillon hammer_buffer_t data_buffer; 858c82af904SMatthew Dillon hammer_off_t ndata_offset; 859a56cb012SMatthew Dillon hammer_tid_t high_tid; 860c82af904SMatthew Dillon void *ndata; 861c82af904SMatthew Dillon int error; 862602c6cb8SMatthew Dillon int doprop; 863c82af904SMatthew Dillon 864adf01747SMatthew Dillon trans = cursor->trans; 865adf01747SMatthew Dillon data_buffer = NULL; 866adf01747SMatthew Dillon 867adf01747SMatthew Dillon /* 86898da6d8cSMatthew Dillon * Get the sync lock so the whole mess is atomic 86998da6d8cSMatthew Dillon */ 87098da6d8cSMatthew Dillon hammer_sync_lock_sh(trans); 87198da6d8cSMatthew Dillon 87298da6d8cSMatthew Dillon /* 873adf01747SMatthew Dillon * Allocate and adjust data 874adf01747SMatthew Dillon */ 875c82af904SMatthew Dillon if (mrec->leaf.data_len && mrec->leaf.data_offset) { 876adf01747SMatthew Dillon ndata = hammer_alloc_data(trans, mrec->leaf.data_len, 877c82af904SMatthew Dillon mrec->leaf.base.rec_type, 878df2ccbacSMatthew Dillon &ndata_offset, &data_buffer, 879df2ccbacSMatthew Dillon 0, &error); 880c82af904SMatthew Dillon if (ndata == NULL) 881c82af904SMatthew Dillon return(error); 882c82af904SMatthew Dillon mrec->leaf.data_offset = ndata_offset; 883adf01747SMatthew Dillon hammer_modify_buffer(trans, data_buffer, NULL, 0); 884c82af904SMatthew Dillon error = copyin(udata, ndata, mrec->leaf.data_len); 885c82af904SMatthew Dillon if (error == 0) { 886c82af904SMatthew Dillon if (hammer_crc_test_leaf(ndata, &mrec->leaf) == 0) { 887c82af904SMatthew Dillon kprintf("data crc mismatch on pipe\n"); 888c82af904SMatthew Dillon error = EINVAL; 889c82af904SMatthew Dillon } else { 890c82af904SMatthew Dillon error = hammer_mirror_localize_data( 891c82af904SMatthew Dillon ndata, &mrec->leaf); 892c82af904SMatthew Dillon } 893c82af904SMatthew Dillon } 894c82af904SMatthew Dillon hammer_modify_buffer_done(data_buffer); 895c82af904SMatthew Dillon } else { 896c82af904SMatthew Dillon mrec->leaf.data_offset = 0; 897c82af904SMatthew Dillon error = 0; 898c82af904SMatthew Dillon ndata = NULL; 899c82af904SMatthew Dillon } 900c82af904SMatthew Dillon if (error) 901c82af904SMatthew Dillon goto failed; 902adf01747SMatthew Dillon 903adf01747SMatthew Dillon /* 9044c038e17SMatthew Dillon * Do the insertion. This can fail with a EDEADLK or EALREADY 905adf01747SMatthew Dillon */ 906c82af904SMatthew Dillon cursor->flags |= HAMMER_CURSOR_INSERT; 907c82af904SMatthew Dillon error = hammer_btree_lookup(cursor); 908c82af904SMatthew Dillon if (error != ENOENT) { 909c82af904SMatthew Dillon if (error == 0) 910c82af904SMatthew Dillon error = EALREADY; 911c82af904SMatthew Dillon goto failed; 912c82af904SMatthew Dillon } 913c82af904SMatthew Dillon 914602c6cb8SMatthew Dillon error = hammer_btree_insert(cursor, &mrec->leaf, &doprop); 915adf01747SMatthew Dillon 916adf01747SMatthew Dillon /* 9174c038e17SMatthew Dillon * Cursor is left on the current element, we want to skip it now. 9184c038e17SMatthew Dillon */ 9194c038e17SMatthew Dillon cursor->flags |= HAMMER_CURSOR_ATEDISK; 9204c038e17SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_INSERT; 9214c038e17SMatthew Dillon 9224c038e17SMatthew Dillon /* 923adf01747SMatthew Dillon * Track a count of active inodes. 924adf01747SMatthew Dillon */ 925842e7a70SMatthew Dillon if (error == 0 && 926842e7a70SMatthew Dillon mrec->leaf.base.rec_type == HAMMER_RECTYPE_INODE && 927842e7a70SMatthew Dillon mrec->leaf.base.delete_tid == 0) { 928adf01747SMatthew Dillon hammer_modify_volume_field(trans, 929adf01747SMatthew Dillon trans->rootvol, 930adf01747SMatthew Dillon vol0_stat_inodes); 931adf01747SMatthew Dillon ++trans->hmp->rootvol->ondisk->vol0_stat_inodes; 932adf01747SMatthew Dillon hammer_modify_volume_done(trans->rootvol); 933adf01747SMatthew Dillon } 934a56cb012SMatthew Dillon 935a56cb012SMatthew Dillon /* 936a56cb012SMatthew Dillon * vol0_next_tid must track the highest TID stored in the filesystem. 937a56cb012SMatthew Dillon * We do not need to generate undo for this update. 938a56cb012SMatthew Dillon */ 939a56cb012SMatthew Dillon high_tid = mrec->leaf.base.create_tid; 940a56cb012SMatthew Dillon if (high_tid < mrec->leaf.base.delete_tid) 941a56cb012SMatthew Dillon high_tid = mrec->leaf.base.delete_tid; 942a56cb012SMatthew Dillon if (trans->rootvol->ondisk->vol0_next_tid < high_tid) { 943a56cb012SMatthew Dillon hammer_modify_volume(trans, trans->rootvol, NULL, 0); 944a56cb012SMatthew Dillon trans->rootvol->ondisk->vol0_next_tid = high_tid; 945a56cb012SMatthew Dillon hammer_modify_volume_done(trans->rootvol); 946a56cb012SMatthew Dillon } 947a56cb012SMatthew Dillon 948c9ce54d6SMatthew Dillon /* 949c9ce54d6SMatthew Dillon * WARNING! cursor's leaf pointer may have changed after 950c9ce54d6SMatthew Dillon * do_propagation returns. 951c9ce54d6SMatthew Dillon */ 952602c6cb8SMatthew Dillon if (error == 0 && doprop) 9534c038e17SMatthew Dillon hammer_btree_do_propagation(cursor, NULL, &mrec->leaf); 954c82af904SMatthew Dillon 955c82af904SMatthew Dillon failed: 956c82af904SMatthew Dillon /* 957c82af904SMatthew Dillon * Cleanup 958c82af904SMatthew Dillon */ 959c82af904SMatthew Dillon if (error && mrec->leaf.data_offset) { 960c82af904SMatthew Dillon hammer_blockmap_free(cursor->trans, 961c82af904SMatthew Dillon mrec->leaf.data_offset, 962c82af904SMatthew Dillon mrec->leaf.data_len); 963c82af904SMatthew Dillon } 96498da6d8cSMatthew Dillon hammer_sync_unlock(trans); 965c82af904SMatthew Dillon if (data_buffer) 966c82af904SMatthew Dillon hammer_rel_buffer(data_buffer, 0); 967c82af904SMatthew Dillon return(error); 968c82af904SMatthew Dillon } 969c82af904SMatthew Dillon 970c82af904SMatthew Dillon /* 971c82af904SMatthew Dillon * Localize the data payload. Directory entries may need their 972c82af904SMatthew Dillon * localization adjusted. 973c82af904SMatthew Dillon */ 974c82af904SMatthew Dillon static 975c82af904SMatthew Dillon int 976c82af904SMatthew Dillon hammer_mirror_localize_data(hammer_data_ondisk_t data, 977c82af904SMatthew Dillon hammer_btree_leaf_elm_t leaf) 978c82af904SMatthew Dillon { 979c82af904SMatthew Dillon u_int32_t localization; 980c82af904SMatthew Dillon 981c82af904SMatthew Dillon if (leaf->base.rec_type == HAMMER_RECTYPE_DIRENTRY) { 982c82af904SMatthew Dillon localization = leaf->base.localization & 983c82af904SMatthew Dillon HAMMER_LOCALIZE_PSEUDOFS_MASK; 984c82af904SMatthew Dillon if (data->entry.localization != localization) { 985c82af904SMatthew Dillon data->entry.localization = localization; 986c82af904SMatthew Dillon hammer_crc_set_leaf(data, leaf); 987adf01747SMatthew Dillon } 988adf01747SMatthew Dillon } 989adf01747SMatthew Dillon return(0); 990c82af904SMatthew Dillon } 991c82af904SMatthew Dillon 99283f2a3aaSMatthew Dillon #endif 993