1dd94f1b1SMatthew Dillon /* 2dd94f1b1SMatthew Dillon * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3dd94f1b1SMatthew Dillon * 4dd94f1b1SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 5dd94f1b1SMatthew Dillon * by Matthew Dillon <dillon@backplane.com> 6dd94f1b1SMatthew Dillon * 7dd94f1b1SMatthew Dillon * Redistribution and use in source and binary forms, with or without 8dd94f1b1SMatthew Dillon * modification, are permitted provided that the following conditions 9dd94f1b1SMatthew Dillon * are met: 10dd94f1b1SMatthew Dillon * 11dd94f1b1SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 12dd94f1b1SMatthew Dillon * notice, this list of conditions and the following disclaimer. 13dd94f1b1SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 14dd94f1b1SMatthew Dillon * notice, this list of conditions and the following disclaimer in 15dd94f1b1SMatthew Dillon * the documentation and/or other materials provided with the 16dd94f1b1SMatthew Dillon * distribution. 17dd94f1b1SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 18dd94f1b1SMatthew Dillon * contributors may be used to endorse or promote products derived 19dd94f1b1SMatthew Dillon * from this software without specific, prior written permission. 20dd94f1b1SMatthew Dillon * 21dd94f1b1SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22dd94f1b1SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23dd94f1b1SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24dd94f1b1SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25dd94f1b1SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26dd94f1b1SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27dd94f1b1SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28dd94f1b1SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29dd94f1b1SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30dd94f1b1SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31dd94f1b1SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32dd94f1b1SMatthew Dillon * SUCH DAMAGE. 33dd94f1b1SMatthew Dillon * 34e469566bSMatthew Dillon * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.17 2008/07/31 22:30:33 dillon Exp $ 35dd94f1b1SMatthew Dillon */ 36dd94f1b1SMatthew Dillon /* 37dd94f1b1SMatthew Dillon * HAMMER mirroring ioctls - serialize and deserialize modifications made 38dd94f1b1SMatthew Dillon * to a filesystem. 39dd94f1b1SMatthew Dillon */ 40dd94f1b1SMatthew Dillon 41dd94f1b1SMatthew Dillon #include "hammer.h" 42dd94f1b1SMatthew Dillon 43c82af904SMatthew Dillon static int hammer_mirror_check(hammer_cursor_t cursor, 444c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec); 45c82af904SMatthew Dillon static int hammer_mirror_update(hammer_cursor_t cursor, 464c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec); 47c82af904SMatthew Dillon static int hammer_mirror_write(hammer_cursor_t cursor, 484c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec, 494c038e17SMatthew Dillon char *udata); 504c038e17SMatthew Dillon static int hammer_ioc_mirror_write_rec(hammer_cursor_t cursor, 514c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec, 524c038e17SMatthew Dillon struct hammer_ioc_mirror_rw *mirror, 534c038e17SMatthew Dillon u_int32_t localization, 544c038e17SMatthew Dillon char *uptr); 554c038e17SMatthew Dillon static int hammer_ioc_mirror_write_pass(hammer_cursor_t cursor, 564c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec, 574c038e17SMatthew Dillon struct hammer_ioc_mirror_rw *mirror, 584c038e17SMatthew Dillon u_int32_t localization); 594c038e17SMatthew Dillon static int hammer_ioc_mirror_write_skip(hammer_cursor_t cursor, 604c038e17SMatthew Dillon struct hammer_ioc_mrecord_skip *mrec, 614c038e17SMatthew Dillon struct hammer_ioc_mirror_rw *mirror, 624c038e17SMatthew Dillon u_int32_t localization); 63842e7a70SMatthew Dillon static int hammer_mirror_delete_to(hammer_cursor_t cursor, 644c038e17SMatthew Dillon struct hammer_ioc_mirror_rw *mirror); 65c82af904SMatthew Dillon static int hammer_mirror_localize_data(hammer_data_ondisk_t data, 66c82af904SMatthew Dillon hammer_btree_leaf_elm_t leaf); 67c82af904SMatthew Dillon 68c82af904SMatthew Dillon /* 69c82af904SMatthew Dillon * All B-Tree records within the specified key range which also conform 70c82af904SMatthew Dillon * to the transaction id range are returned. Mirroring code keeps track 71c82af904SMatthew Dillon * of the last transaction id fully scanned and can efficiently pick up 72c82af904SMatthew Dillon * where it left off if interrupted. 73ea434b6fSMatthew Dillon * 74ea434b6fSMatthew Dillon * The PFS is identified in the mirror structure. The passed ip is just 75ea434b6fSMatthew Dillon * some directory in the overall HAMMER filesystem and has nothing to 76ea434b6fSMatthew Dillon * do with the PFS. 77c82af904SMatthew Dillon */ 78dd94f1b1SMatthew Dillon int 79dd94f1b1SMatthew Dillon hammer_ioc_mirror_read(hammer_transaction_t trans, hammer_inode_t ip, 80dd94f1b1SMatthew Dillon struct hammer_ioc_mirror_rw *mirror) 81dd94f1b1SMatthew Dillon { 824c038e17SMatthew Dillon struct hammer_cmirror cmirror; 83dd94f1b1SMatthew Dillon struct hammer_cursor cursor; 844c038e17SMatthew Dillon union hammer_ioc_mrecord_any mrec; 85c82af904SMatthew Dillon hammer_btree_leaf_elm_t elm; 86c82af904SMatthew Dillon const int crc_start = HAMMER_MREC_CRCOFF; 87c82af904SMatthew Dillon char *uptr; 88dd94f1b1SMatthew Dillon int error; 89c82af904SMatthew Dillon int data_len; 90c82af904SMatthew Dillon int bytes; 914c038e17SMatthew Dillon int eatdisk; 92ea434b6fSMatthew Dillon u_int32_t localization; 934c038e17SMatthew Dillon u_int32_t rec_crc; 94ea434b6fSMatthew Dillon 95ea434b6fSMatthew Dillon localization = (u_int32_t)mirror->pfs_id << 16; 96dd94f1b1SMatthew Dillon 97dd94f1b1SMatthew Dillon if ((mirror->key_beg.localization | mirror->key_end.localization) & 98dd94f1b1SMatthew Dillon HAMMER_LOCALIZE_PSEUDOFS_MASK) { 99dd94f1b1SMatthew Dillon return(EINVAL); 100dd94f1b1SMatthew Dillon } 101dd94f1b1SMatthew Dillon if (hammer_btree_cmp(&mirror->key_beg, &mirror->key_end) > 0) 102dd94f1b1SMatthew Dillon return(EINVAL); 103dd94f1b1SMatthew Dillon 104dd94f1b1SMatthew Dillon mirror->key_cur = mirror->key_beg; 1054c038e17SMatthew Dillon mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK; 106ea434b6fSMatthew Dillon mirror->key_cur.localization += localization; 107c82af904SMatthew Dillon bzero(&mrec, sizeof(mrec)); 1084c038e17SMatthew Dillon bzero(&cmirror, sizeof(cmirror)); 109dd94f1b1SMatthew Dillon 110dd94f1b1SMatthew Dillon retry: 111dd94f1b1SMatthew Dillon error = hammer_init_cursor(trans, &cursor, NULL, NULL); 112dd94f1b1SMatthew Dillon if (error) { 113dd94f1b1SMatthew Dillon hammer_done_cursor(&cursor); 114dd94f1b1SMatthew Dillon goto failed; 115dd94f1b1SMatthew Dillon } 116dd94f1b1SMatthew Dillon cursor.key_beg = mirror->key_cur; 117dd94f1b1SMatthew Dillon cursor.key_end = mirror->key_end; 1184c038e17SMatthew Dillon cursor.key_end.localization &= HAMMER_LOCALIZE_MASK; 119ea434b6fSMatthew Dillon cursor.key_end.localization += localization; 120dd94f1b1SMatthew Dillon 121dd94f1b1SMatthew Dillon cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 122dd94f1b1SMatthew Dillon cursor.flags |= HAMMER_CURSOR_BACKEND; 123dd94f1b1SMatthew Dillon 124dd94f1b1SMatthew Dillon /* 125c82af904SMatthew Dillon * This flag filters the search to only return elements whos create 126c82af904SMatthew Dillon * or delete TID is >= mirror_tid. The B-Tree uses the mirror_tid 127c82af904SMatthew Dillon * field stored with internal and leaf nodes to shortcut the scan. 128dd94f1b1SMatthew Dillon */ 129c82af904SMatthew Dillon cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED; 1304c038e17SMatthew Dillon cursor.cmirror = &cmirror; 1314c038e17SMatthew Dillon cmirror.mirror_tid = mirror->tid_beg; 132dd94f1b1SMatthew Dillon 133dd94f1b1SMatthew Dillon error = hammer_btree_first(&cursor); 134dd94f1b1SMatthew Dillon while (error == 0) { 135dd94f1b1SMatthew Dillon /* 13693291532SMatthew Dillon * Yield to more important tasks 13793291532SMatthew Dillon */ 13893291532SMatthew Dillon if (error == 0) { 13993291532SMatthew Dillon error = hammer_signal_check(trans->hmp); 14093291532SMatthew Dillon if (error) 14193291532SMatthew Dillon break; 14293291532SMatthew Dillon } 14393291532SMatthew Dillon 14493291532SMatthew Dillon /* 1454c038e17SMatthew Dillon * An internal node can be returned in mirror-filtered 1464c038e17SMatthew Dillon * mode and indicates that the scan is returning a skip 1474c038e17SMatthew Dillon * range in the cursor->cmirror structure. 1484c038e17SMatthew Dillon */ 1494c038e17SMatthew Dillon uptr = (char *)mirror->ubuf + mirror->count; 1504c038e17SMatthew Dillon if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL) { 1514c038e17SMatthew Dillon /* 1524c038e17SMatthew Dillon * Check space 1534c038e17SMatthew Dillon */ 1544c038e17SMatthew Dillon mirror->key_cur = cmirror.skip_beg; 1554c038e17SMatthew Dillon bytes = sizeof(mrec.skip); 1564c038e17SMatthew Dillon if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) > 1574c038e17SMatthew Dillon mirror->size) { 1584c038e17SMatthew Dillon break; 1594c038e17SMatthew Dillon } 1604c038e17SMatthew Dillon 1614c038e17SMatthew Dillon /* 1624c038e17SMatthew Dillon * Fill mrec 1634c038e17SMatthew Dillon */ 1644c038e17SMatthew Dillon mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE; 1654c038e17SMatthew Dillon mrec.head.type = HAMMER_MREC_TYPE_SKIP; 1664c038e17SMatthew Dillon mrec.head.rec_size = bytes; 1674c038e17SMatthew Dillon mrec.skip.skip_beg = cmirror.skip_beg; 1684c038e17SMatthew Dillon mrec.skip.skip_end = cmirror.skip_end; 1694c038e17SMatthew Dillon mrec.head.rec_crc = crc32(&mrec.head.rec_size, 1704c038e17SMatthew Dillon bytes - crc_start); 1714c038e17SMatthew Dillon error = copyout(&mrec, uptr, bytes); 1724c038e17SMatthew Dillon eatdisk = 0; 1734c038e17SMatthew Dillon goto didwrite; 1744c038e17SMatthew Dillon } 1754c038e17SMatthew Dillon 1764c038e17SMatthew Dillon /* 1774c038e17SMatthew Dillon * Leaf node. In full-history mode we could filter out 1784c038e17SMatthew Dillon * elements modified outside the user-requested TID range. 1794c038e17SMatthew Dillon * 1804c038e17SMatthew Dillon * However, such elements must be returned so the writer 1814c038e17SMatthew Dillon * can compare them against the target to detemrine what 1824c038e17SMatthew Dillon * needs to be deleted on the target, particular for 1834c038e17SMatthew Dillon * no-history mirrors. 184dd94f1b1SMatthew Dillon */ 185c82af904SMatthew Dillon KKASSERT(cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF); 186c82af904SMatthew Dillon elm = &cursor.node->ondisk->elms[cursor.index].leaf; 187c82af904SMatthew Dillon mirror->key_cur = elm->base; 188dd94f1b1SMatthew Dillon 189e469566bSMatthew Dillon /* 190e469566bSMatthew Dillon * Determine if we should generate a PASS or a REC. PASS 191e469566bSMatthew Dillon * records are records without any data payload. Such 192e469566bSMatthew Dillon * records will be generated if the target is already expected 193e469566bSMatthew Dillon * to have the record, allowing it to delete the gaps. 194e469566bSMatthew Dillon * 195e469566bSMatthew Dillon * A PASS record is also used to perform deletions on the 196e469566bSMatthew Dillon * target. 197e469566bSMatthew Dillon * 198e469566bSMatthew Dillon * Such deletions are needed if the master or files on the 199e469566bSMatthew Dillon * master are no-history, or if the slave is so far behind 200e469566bSMatthew Dillon * the master has already been pruned. 201e469566bSMatthew Dillon */ 202e469566bSMatthew Dillon if (elm->base.create_tid < mirror->tid_beg || 203e469566bSMatthew Dillon elm->base.create_tid > mirror->tid_end) { 2044c038e17SMatthew Dillon bytes = sizeof(mrec.rec); 2054c038e17SMatthew Dillon if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) > 2064c038e17SMatthew Dillon mirror->size) { 2074c038e17SMatthew Dillon break; 2084c038e17SMatthew Dillon } 2094c038e17SMatthew Dillon 2104c038e17SMatthew Dillon /* 211e469566bSMatthew Dillon * Fill mrec. 2124c038e17SMatthew Dillon */ 2134c038e17SMatthew Dillon mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE; 2144c038e17SMatthew Dillon mrec.head.type = HAMMER_MREC_TYPE_PASS; 2154c038e17SMatthew Dillon mrec.head.rec_size = bytes; 2164c038e17SMatthew Dillon mrec.rec.leaf = *elm; 2174c038e17SMatthew Dillon mrec.head.rec_crc = crc32(&mrec.head.rec_size, 2184c038e17SMatthew Dillon bytes - crc_start); 2194c038e17SMatthew Dillon error = copyout(&mrec, uptr, bytes); 2204c038e17SMatthew Dillon eatdisk = 1; 2214c038e17SMatthew Dillon goto didwrite; 2224c038e17SMatthew Dillon 2234c038e17SMatthew Dillon } 2244c038e17SMatthew Dillon 225dd94f1b1SMatthew Dillon /* 226c82af904SMatthew Dillon * The core code exports the data to userland. 227dd94f1b1SMatthew Dillon */ 228c82af904SMatthew Dillon data_len = (elm->data_offset) ? elm->data_len : 0; 229c82af904SMatthew Dillon if (data_len) { 230c82af904SMatthew Dillon error = hammer_btree_extract(&cursor, 231c82af904SMatthew Dillon HAMMER_CURSOR_GET_DATA); 232c82af904SMatthew Dillon if (error) 233c82af904SMatthew Dillon break; 234c82af904SMatthew Dillon } 2354c038e17SMatthew Dillon 2364c038e17SMatthew Dillon bytes = sizeof(mrec.rec) + data_len; 2374c038e17SMatthew Dillon if (mirror->count + HAMMER_HEAD_DOALIGN(bytes) > mirror->size) 238c82af904SMatthew Dillon break; 239c82af904SMatthew Dillon 240c82af904SMatthew Dillon /* 241c82af904SMatthew Dillon * Construct the record for userland and copyout. 242c82af904SMatthew Dillon * 243c82af904SMatthew Dillon * The user is asking for a snapshot, if the record was 244c82af904SMatthew Dillon * deleted beyond the user-requested ending tid, the record 245c82af904SMatthew Dillon * is not considered deleted from the point of view of 246c82af904SMatthew Dillon * userland and delete_tid is cleared. 247c82af904SMatthew Dillon */ 2484c038e17SMatthew Dillon mrec.head.signature = HAMMER_IOC_MIRROR_SIGNATURE; 2494c038e17SMatthew Dillon mrec.head.type = HAMMER_MREC_TYPE_REC; 2504c038e17SMatthew Dillon mrec.head.rec_size = bytes; 2514c038e17SMatthew Dillon mrec.rec.leaf = *elm; 2524889cbd4SMatthew Dillon if (elm->base.delete_tid > mirror->tid_end) 2534c038e17SMatthew Dillon mrec.rec.leaf.base.delete_tid = 0; 2544c038e17SMatthew Dillon rec_crc = crc32(&mrec.head.rec_size, 2554c038e17SMatthew Dillon sizeof(mrec.rec) - crc_start); 2564c038e17SMatthew Dillon if (data_len) 2574c038e17SMatthew Dillon rec_crc = crc32_ext(cursor.data, data_len, rec_crc); 2584c038e17SMatthew Dillon mrec.head.rec_crc = rec_crc; 2594c038e17SMatthew Dillon error = copyout(&mrec, uptr, sizeof(mrec.rec)); 260c82af904SMatthew Dillon if (data_len && error == 0) { 2614c038e17SMatthew Dillon error = copyout(cursor.data, uptr + sizeof(mrec.rec), 262c82af904SMatthew Dillon data_len); 263c82af904SMatthew Dillon } 2644c038e17SMatthew Dillon eatdisk = 1; 2654c038e17SMatthew Dillon 2664c038e17SMatthew Dillon /* 2674c038e17SMatthew Dillon * eatdisk controls whether we skip the current cursor 2684c038e17SMatthew Dillon * position on the next scan or not. If doing a SKIP 2694c038e17SMatthew Dillon * the cursor is already positioned properly for the next 2704c038e17SMatthew Dillon * scan and eatdisk will be 0. 2714c038e17SMatthew Dillon */ 2724c038e17SMatthew Dillon didwrite: 273dd94f1b1SMatthew Dillon if (error == 0) { 2744c038e17SMatthew Dillon mirror->count += HAMMER_HEAD_DOALIGN(bytes); 2754c038e17SMatthew Dillon if (eatdisk) 276dd94f1b1SMatthew Dillon cursor.flags |= HAMMER_CURSOR_ATEDISK; 2774c038e17SMatthew Dillon else 2784c038e17SMatthew Dillon cursor.flags &= ~HAMMER_CURSOR_ATEDISK; 279dd94f1b1SMatthew Dillon error = hammer_btree_iterate(&cursor); 280dd94f1b1SMatthew Dillon } 281dd94f1b1SMatthew Dillon } 282c82af904SMatthew Dillon if (error == ENOENT) { 283c82af904SMatthew Dillon mirror->key_cur = mirror->key_end; 284dd94f1b1SMatthew Dillon error = 0; 285c82af904SMatthew Dillon } 286dd94f1b1SMatthew Dillon hammer_done_cursor(&cursor); 287dd94f1b1SMatthew Dillon if (error == EDEADLK) 288dd94f1b1SMatthew Dillon goto retry; 289dd94f1b1SMatthew Dillon if (error == EINTR) { 290c82af904SMatthew Dillon mirror->head.flags |= HAMMER_IOC_HEAD_INTR; 291dd94f1b1SMatthew Dillon error = 0; 292dd94f1b1SMatthew Dillon } 293dd94f1b1SMatthew Dillon failed: 294dd94f1b1SMatthew Dillon mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK; 295dd94f1b1SMatthew Dillon return(error); 296dd94f1b1SMatthew Dillon } 297dd94f1b1SMatthew Dillon 298c82af904SMatthew Dillon /* 2994c038e17SMatthew Dillon * Copy records from userland to the target mirror. 300602c6cb8SMatthew Dillon * 301ea434b6fSMatthew Dillon * The PFS is identified in the mirror structure. The passed ip is just 302ea434b6fSMatthew Dillon * some directory in the overall HAMMER filesystem and has nothing to 303ea434b6fSMatthew Dillon * do with the PFS. In fact, there might not even be a root directory for 304ea434b6fSMatthew Dillon * the PFS yet! 305c82af904SMatthew Dillon */ 306c82af904SMatthew Dillon int 307c82af904SMatthew Dillon hammer_ioc_mirror_write(hammer_transaction_t trans, hammer_inode_t ip, 308c82af904SMatthew Dillon struct hammer_ioc_mirror_rw *mirror) 309c82af904SMatthew Dillon { 3104c038e17SMatthew Dillon union hammer_ioc_mrecord_any mrec; 311c82af904SMatthew Dillon struct hammer_cursor cursor; 312ea434b6fSMatthew Dillon u_int32_t localization; 31393291532SMatthew Dillon int checkspace_count = 0; 3144c038e17SMatthew Dillon int error; 3154c038e17SMatthew Dillon int bytes; 3164c038e17SMatthew Dillon char *uptr; 31793291532SMatthew Dillon int seq; 318ea434b6fSMatthew Dillon 319ea434b6fSMatthew Dillon localization = (u_int32_t)mirror->pfs_id << 16; 32093291532SMatthew Dillon seq = trans->hmp->flusher.act; 321c82af904SMatthew Dillon 3224c038e17SMatthew Dillon /* 3234c038e17SMatthew Dillon * Validate the mirror structure and relocalize the tracking keys. 3244c038e17SMatthew Dillon */ 325c82af904SMatthew Dillon if (mirror->size < 0 || mirror->size > 0x70000000) 326c82af904SMatthew Dillon return(EINVAL); 3274c038e17SMatthew Dillon mirror->key_beg.localization &= HAMMER_LOCALIZE_MASK; 3284c038e17SMatthew Dillon mirror->key_beg.localization += localization; 3294c038e17SMatthew Dillon mirror->key_end.localization &= HAMMER_LOCALIZE_MASK; 3304c038e17SMatthew Dillon mirror->key_end.localization += localization; 3314c038e17SMatthew Dillon mirror->key_cur.localization &= HAMMER_LOCALIZE_MASK; 3324c038e17SMatthew Dillon mirror->key_cur.localization += localization; 333c82af904SMatthew Dillon 3344c038e17SMatthew Dillon /* 3354c038e17SMatthew Dillon * Set up our tracking cursor for the loop. The tracking cursor 3364c038e17SMatthew Dillon * is used to delete records that are no longer present on the 3374c038e17SMatthew Dillon * master. The last handled record at key_cur must be skipped. 3384c038e17SMatthew Dillon */ 339c82af904SMatthew Dillon error = hammer_init_cursor(trans, &cursor, NULL, NULL); 340c82af904SMatthew Dillon 3414c038e17SMatthew Dillon cursor.key_beg = mirror->key_cur; 3424c038e17SMatthew Dillon cursor.key_end = mirror->key_end; 3434c038e17SMatthew Dillon cursor.flags |= HAMMER_CURSOR_BACKEND; 3444c038e17SMatthew Dillon error = hammer_btree_first(&cursor); 3454c038e17SMatthew Dillon if (error == 0) 3464c038e17SMatthew Dillon cursor.flags |= HAMMER_CURSOR_ATEDISK; 3474c038e17SMatthew Dillon if (error == ENOENT) 3484c038e17SMatthew Dillon error = 0; 3494c038e17SMatthew Dillon 3504c038e17SMatthew Dillon /* 3514c038e17SMatthew Dillon * Loop until our input buffer has been exhausted. 3524c038e17SMatthew Dillon */ 3534c038e17SMatthew Dillon while (error == 0 && 3544c038e17SMatthew Dillon mirror->count + sizeof(mrec.head) <= mirror->size) { 3554c038e17SMatthew Dillon 356c82af904SMatthew Dillon /* 35793291532SMatthew Dillon * Don't blow out the buffer cache. Leave room for frontend 35893291532SMatthew Dillon * cache as well. 35993291532SMatthew Dillon */ 36015e75dabSMatthew Dillon while (hammer_flusher_meta_halflimit(trans->hmp) || 3617a61b85dSMatthew Dillon hammer_flusher_undo_exhausted(trans, 2)) { 362*982be4bfSMatthew Dillon hammer_unlock_cursor(&cursor); 36393291532SMatthew Dillon hammer_flusher_wait(trans->hmp, seq); 364*982be4bfSMatthew Dillon hammer_lock_cursor(&cursor); 36515e75dabSMatthew Dillon seq = hammer_flusher_async_one(trans->hmp); 36693291532SMatthew Dillon } 36793291532SMatthew Dillon 36893291532SMatthew Dillon /* 36993291532SMatthew Dillon * If there is insufficient free space it may be due to 37093291532SMatthew Dillon * reserved bigblocks, which flushing might fix. 37193291532SMatthew Dillon */ 37293291532SMatthew Dillon if (hammer_checkspace(trans->hmp, HAMMER_CHKSPC_MIRROR)) { 37393291532SMatthew Dillon if (++checkspace_count == 10) { 37493291532SMatthew Dillon error = ENOSPC; 37593291532SMatthew Dillon break; 37693291532SMatthew Dillon } 377*982be4bfSMatthew Dillon hammer_unlock_cursor(&cursor); 37893291532SMatthew Dillon hammer_flusher_wait(trans->hmp, seq); 379*982be4bfSMatthew Dillon hammer_lock_cursor(&cursor); 3807a61b85dSMatthew Dillon seq = hammer_flusher_async(trans->hmp, NULL); 38193291532SMatthew Dillon } 38293291532SMatthew Dillon 38393291532SMatthew Dillon 38493291532SMatthew Dillon /* 385c82af904SMatthew Dillon * Acquire and validate header 386c82af904SMatthew Dillon */ 3874c038e17SMatthew Dillon if ((bytes = mirror->size - mirror->count) > sizeof(mrec)) 3884c038e17SMatthew Dillon bytes = sizeof(mrec); 389c82af904SMatthew Dillon uptr = (char *)mirror->ubuf + mirror->count; 3904c038e17SMatthew Dillon error = copyin(uptr, &mrec, bytes); 391c82af904SMatthew Dillon if (error) 392c82af904SMatthew Dillon break; 3934c038e17SMatthew Dillon if (mrec.head.signature != HAMMER_IOC_MIRROR_SIGNATURE) { 394c82af904SMatthew Dillon error = EINVAL; 395c82af904SMatthew Dillon break; 396c82af904SMatthew Dillon } 3974c038e17SMatthew Dillon if (mrec.head.rec_size < sizeof(mrec.head) || 3984c038e17SMatthew Dillon mrec.head.rec_size > sizeof(mrec) + HAMMER_XBUFSIZE || 3994c038e17SMatthew Dillon mirror->count + mrec.head.rec_size > mirror->size) { 4005fa5c92fSMatthew Dillon error = EINVAL; 4015fa5c92fSMatthew Dillon break; 4025fa5c92fSMatthew Dillon } 4034c038e17SMatthew Dillon 4044c038e17SMatthew Dillon switch(mrec.head.type) { 4054c038e17SMatthew Dillon case HAMMER_MREC_TYPE_SKIP: 4064c038e17SMatthew Dillon if (mrec.head.rec_size != sizeof(mrec.skip)) 4074c038e17SMatthew Dillon error = EINVAL; 4084c038e17SMatthew Dillon if (error == 0) 4094c038e17SMatthew Dillon error = hammer_ioc_mirror_write_skip(&cursor, &mrec.skip, mirror, localization); 4104c038e17SMatthew Dillon break; 4114c038e17SMatthew Dillon case HAMMER_MREC_TYPE_REC: 4124c038e17SMatthew Dillon if (mrec.head.rec_size < sizeof(mrec.rec)) 4134c038e17SMatthew Dillon error = EINVAL; 4144c038e17SMatthew Dillon if (error == 0) 4154c038e17SMatthew Dillon error = hammer_ioc_mirror_write_rec(&cursor, &mrec.rec, mirror, localization, uptr + sizeof(mrec.rec)); 4164c038e17SMatthew Dillon break; 4174c038e17SMatthew Dillon case HAMMER_MREC_TYPE_PASS: 4184c038e17SMatthew Dillon if (mrec.head.rec_size != sizeof(mrec.rec)) 4194c038e17SMatthew Dillon error = EINVAL; 4204c038e17SMatthew Dillon if (error == 0) 4214c038e17SMatthew Dillon error = hammer_ioc_mirror_write_pass(&cursor, &mrec.rec, mirror, localization); 4224c038e17SMatthew Dillon break; 4234c038e17SMatthew Dillon default: 424c82af904SMatthew Dillon error = EINVAL; 425c82af904SMatthew Dillon break; 426c82af904SMatthew Dillon } 4274c038e17SMatthew Dillon 4284c038e17SMatthew Dillon /* 4294c038e17SMatthew Dillon * Retry the current record on deadlock, otherwise setup 4304c038e17SMatthew Dillon * for the next loop. 4314c038e17SMatthew Dillon */ 4324c038e17SMatthew Dillon if (error == EDEADLK) { 4334c038e17SMatthew Dillon while (error == EDEADLK) { 4344c038e17SMatthew Dillon hammer_recover_cursor(&cursor); 4354c038e17SMatthew Dillon error = hammer_cursor_upgrade(&cursor); 436c82af904SMatthew Dillon } 4374c038e17SMatthew Dillon } else { 4384c038e17SMatthew Dillon if (error == EALREADY) 4394c038e17SMatthew Dillon error = 0; 4404c038e17SMatthew Dillon if (error == 0) { 4414c038e17SMatthew Dillon mirror->count += 4424c038e17SMatthew Dillon HAMMER_HEAD_DOALIGN(mrec.head.rec_size); 4434c038e17SMatthew Dillon } 4444c038e17SMatthew Dillon } 4454c038e17SMatthew Dillon } 4464c038e17SMatthew Dillon hammer_done_cursor(&cursor); 4474c038e17SMatthew Dillon 4484c038e17SMatthew Dillon /* 4494c038e17SMatthew Dillon * cumulative error 4504c038e17SMatthew Dillon */ 4514c038e17SMatthew Dillon if (error) { 4524c038e17SMatthew Dillon mirror->head.flags |= HAMMER_IOC_HEAD_ERROR; 4534c038e17SMatthew Dillon mirror->head.error = error; 4544c038e17SMatthew Dillon } 4554c038e17SMatthew Dillon 4564c038e17SMatthew Dillon /* 4574c038e17SMatthew Dillon * ioctls don't update the RW data structure if an error is returned, 4584c038e17SMatthew Dillon * always return 0. 4594c038e17SMatthew Dillon */ 4604c038e17SMatthew Dillon return(0); 4614c038e17SMatthew Dillon } 4624c038e17SMatthew Dillon 4634c038e17SMatthew Dillon /* 4644c038e17SMatthew Dillon * Handle skip records. 4654c038e17SMatthew Dillon * 4664c038e17SMatthew Dillon * We must iterate from the last resolved record position at mirror->key_cur 4674c038e17SMatthew Dillon * to skip_beg and delete any records encountered. 4684c038e17SMatthew Dillon * 4694c038e17SMatthew Dillon * mirror->key_cur must be carefully set when we succeed in processing 4704c038e17SMatthew Dillon * this mrec. 4714c038e17SMatthew Dillon */ 4724c038e17SMatthew Dillon static int 4734c038e17SMatthew Dillon hammer_ioc_mirror_write_skip(hammer_cursor_t cursor, 4744c038e17SMatthew Dillon struct hammer_ioc_mrecord_skip *mrec, 4754c038e17SMatthew Dillon struct hammer_ioc_mirror_rw *mirror, 4764c038e17SMatthew Dillon u_int32_t localization) 4774c038e17SMatthew Dillon { 4784c038e17SMatthew Dillon int error; 4794c038e17SMatthew Dillon 4804c038e17SMatthew Dillon /* 4814c038e17SMatthew Dillon * Relocalize the skip range 4824c038e17SMatthew Dillon */ 4834c038e17SMatthew Dillon mrec->skip_beg.localization &= HAMMER_LOCALIZE_MASK; 4844c038e17SMatthew Dillon mrec->skip_beg.localization += localization; 4854c038e17SMatthew Dillon mrec->skip_end.localization &= HAMMER_LOCALIZE_MASK; 4864c038e17SMatthew Dillon mrec->skip_end.localization += localization; 4874c038e17SMatthew Dillon 4884c038e17SMatthew Dillon /* 4894c038e17SMatthew Dillon * Iterate from current position to skip_beg, deleting any records 4904c038e17SMatthew Dillon * we encounter. 4914c038e17SMatthew Dillon */ 4924c038e17SMatthew Dillon cursor->key_end = mrec->skip_beg; 4934c038e17SMatthew Dillon cursor->flags |= HAMMER_CURSOR_BACKEND; 494842e7a70SMatthew Dillon error = hammer_mirror_delete_to(cursor, mirror); 4954c038e17SMatthew Dillon 4964c038e17SMatthew Dillon /* 4974c038e17SMatthew Dillon * Now skip past the skip (which is the whole point point of 4984c038e17SMatthew Dillon * having a skip record). The sender has not sent us any records 4994c038e17SMatthew Dillon * for the skip area so we wouldn't know what to keep and what 5004c038e17SMatthew Dillon * to delete anyway. 5014c038e17SMatthew Dillon * 5024c038e17SMatthew Dillon * Clear ATEDISK because skip_end is non-inclusive, so we can't 5034c038e17SMatthew Dillon * count an exact match if we happened to get one. 5044c038e17SMatthew Dillon */ 5054c038e17SMatthew Dillon if (error == 0) { 5064c038e17SMatthew Dillon mirror->key_cur = mrec->skip_end; 5074c038e17SMatthew Dillon cursor->key_beg = mrec->skip_end; 5084c038e17SMatthew Dillon error = hammer_btree_lookup(cursor); 5094c038e17SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_ATEDISK; 5104c038e17SMatthew Dillon if (error == ENOENT) 5114c038e17SMatthew Dillon error = 0; 5124c038e17SMatthew Dillon } 5134c038e17SMatthew Dillon return(error); 5144c038e17SMatthew Dillon } 5154c038e17SMatthew Dillon 5164c038e17SMatthew Dillon /* 5174c038e17SMatthew Dillon * Handle B-Tree records. 5184c038e17SMatthew Dillon * 5194c038e17SMatthew Dillon * We must iterate to mrec->base.key (non-inclusively), and then process 5204c038e17SMatthew Dillon * the record. We are allowed to write a new record or delete an existing 5214c038e17SMatthew Dillon * record, but cannot replace an existing record. 5224c038e17SMatthew Dillon * 5234c038e17SMatthew Dillon * mirror->key_cur must be carefully set when we succeed in processing 5244c038e17SMatthew Dillon * this mrec. 5254c038e17SMatthew Dillon */ 5264c038e17SMatthew Dillon static int 5274c038e17SMatthew Dillon hammer_ioc_mirror_write_rec(hammer_cursor_t cursor, 5284c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec, 5294c038e17SMatthew Dillon struct hammer_ioc_mirror_rw *mirror, 5304c038e17SMatthew Dillon u_int32_t localization, 5314c038e17SMatthew Dillon char *uptr) 5324c038e17SMatthew Dillon { 5334c038e17SMatthew Dillon hammer_transaction_t trans; 5344c038e17SMatthew Dillon u_int32_t rec_crc; 5354c038e17SMatthew Dillon int error; 5364c038e17SMatthew Dillon 5374c038e17SMatthew Dillon trans = cursor->trans; 5384c038e17SMatthew Dillon rec_crc = crc32(mrec, sizeof(*mrec)); 5394c038e17SMatthew Dillon 5404c038e17SMatthew Dillon if (mrec->leaf.data_len < 0 || 5414c038e17SMatthew Dillon mrec->leaf.data_len > HAMMER_XBUFSIZE || 5424c038e17SMatthew Dillon mrec->leaf.data_len + sizeof(*mrec) > mrec->head.rec_size) { 5434c038e17SMatthew Dillon return(EINVAL); 544c82af904SMatthew Dillon } 545c82af904SMatthew Dillon 546c82af904SMatthew Dillon /* 547c82af904SMatthew Dillon * Re-localize for target. relocalization of data is handled 548c82af904SMatthew Dillon * by hammer_mirror_write(). 549c82af904SMatthew Dillon */ 5504c038e17SMatthew Dillon mrec->leaf.base.localization &= HAMMER_LOCALIZE_MASK; 5514c038e17SMatthew Dillon mrec->leaf.base.localization += localization; 5524c038e17SMatthew Dillon 5534c038e17SMatthew Dillon /* 5544c038e17SMatthew Dillon * Delete records through until we reach (non-inclusively) the 5554c038e17SMatthew Dillon * target record. 5564c038e17SMatthew Dillon */ 5574c038e17SMatthew Dillon cursor->key_end = mrec->leaf.base; 5584c038e17SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_END_INCLUSIVE; 5594c038e17SMatthew Dillon cursor->flags |= HAMMER_CURSOR_BACKEND; 560842e7a70SMatthew Dillon error = hammer_mirror_delete_to(cursor, mirror); 561c82af904SMatthew Dillon 562c82af904SMatthew Dillon /* 563c82af904SMatthew Dillon * Locate the record. 564c82af904SMatthew Dillon * 565c82af904SMatthew Dillon * If the record exists only the delete_tid may be updated. 566c82af904SMatthew Dillon * 567e469566bSMatthew Dillon * If the record does not exist we can create it only if the 568e469566bSMatthew Dillon * create_tid is not too old. If the create_tid is too old 569e469566bSMatthew Dillon * it may have already been destroyed on the slave from pruning. 570e469566bSMatthew Dillon * 571e469566bSMatthew Dillon * Note that mirror operations are effectively as-of operations 572e469566bSMatthew Dillon * and delete_tid can be 0 for mirroring purposes even if it is 573c82af904SMatthew Dillon * not actually 0 at the originator. 57498da6d8cSMatthew Dillon * 57598da6d8cSMatthew Dillon * These functions can return EDEADLK 576c82af904SMatthew Dillon */ 5774c038e17SMatthew Dillon cursor->key_beg = mrec->leaf.base; 5784c038e17SMatthew Dillon cursor->flags |= HAMMER_CURSOR_BACKEND; 5794c038e17SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_INSERT; 5804c038e17SMatthew Dillon error = hammer_btree_lookup(cursor); 581c82af904SMatthew Dillon 5824c038e17SMatthew Dillon if (error == 0 && hammer_mirror_check(cursor, mrec)) { 5834c038e17SMatthew Dillon error = hammer_mirror_update(cursor, mrec); 584adf01747SMatthew Dillon } else if (error == ENOENT) { 585e469566bSMatthew Dillon if (mrec->leaf.base.create_tid >= mirror->tid_beg) 586e469566bSMatthew Dillon error = hammer_mirror_write(cursor, mrec, uptr); 587e469566bSMatthew Dillon else 588adf01747SMatthew Dillon error = 0; 589c82af904SMatthew Dillon } 5904c038e17SMatthew Dillon if (error == 0 || error == EALREADY) 5914c038e17SMatthew Dillon mirror->key_cur = mrec->leaf.base; 5924c038e17SMatthew Dillon return(error); 5934c038e17SMatthew Dillon } 594c82af904SMatthew Dillon 595c82af904SMatthew Dillon /* 5964c038e17SMatthew Dillon * This works like write_rec but no write or update is necessary, 5974c038e17SMatthew Dillon * and no data payload is included so we couldn't do a write even 5984c038e17SMatthew Dillon * if we wanted to. 5994c038e17SMatthew Dillon * 6004c038e17SMatthew Dillon * We must still iterate for deletions, and we can validate the 6014c038e17SMatthew Dillon * record header which is a good way to test for corrupted mirror 6024c038e17SMatthew Dillon * targets XXX. 6034c038e17SMatthew Dillon * 6044c038e17SMatthew Dillon * mirror->key_cur must be carefully set when we succeed in processing 6054c038e17SMatthew Dillon * this mrec. 606c82af904SMatthew Dillon */ 6074c038e17SMatthew Dillon static 6084c038e17SMatthew Dillon int 6094c038e17SMatthew Dillon hammer_ioc_mirror_write_pass(hammer_cursor_t cursor, 6104c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec, 6114c038e17SMatthew Dillon struct hammer_ioc_mirror_rw *mirror, 6124c038e17SMatthew Dillon u_int32_t localization) 6134c038e17SMatthew Dillon { 6144c038e17SMatthew Dillon hammer_transaction_t trans; 6154c038e17SMatthew Dillon u_int32_t rec_crc; 6164c038e17SMatthew Dillon int error; 6174c038e17SMatthew Dillon 6184c038e17SMatthew Dillon trans = cursor->trans; 6194c038e17SMatthew Dillon rec_crc = crc32(mrec, sizeof(*mrec)); 6204c038e17SMatthew Dillon 6214c038e17SMatthew Dillon /* 6224c038e17SMatthew Dillon * Re-localize for target. Relocalization of data is handled 6234c038e17SMatthew Dillon * by hammer_mirror_write(). 6244c038e17SMatthew Dillon */ 6254c038e17SMatthew Dillon mrec->leaf.base.localization &= HAMMER_LOCALIZE_MASK; 6264c038e17SMatthew Dillon mrec->leaf.base.localization += localization; 6274c038e17SMatthew Dillon 6284c038e17SMatthew Dillon /* 6294c038e17SMatthew Dillon * Delete records through until we reach (non-inclusively) the 6304c038e17SMatthew Dillon * target record. 6314c038e17SMatthew Dillon */ 6324c038e17SMatthew Dillon cursor->key_end = mrec->leaf.base; 6334c038e17SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_END_INCLUSIVE; 6344c038e17SMatthew Dillon cursor->flags |= HAMMER_CURSOR_BACKEND; 6354c038e17SMatthew Dillon 636842e7a70SMatthew Dillon error = hammer_mirror_delete_to(cursor, mirror); 6374c038e17SMatthew Dillon 6384c038e17SMatthew Dillon /* 639e469566bSMatthew Dillon * Locate the record and get past it by setting ATEDISK. Perform 640e469566bSMatthew Dillon * any necessary deletions. We have no data payload and cannot 641e469566bSMatthew Dillon * create a new record. 6424c038e17SMatthew Dillon */ 6434c038e17SMatthew Dillon if (error == 0) { 6444c038e17SMatthew Dillon mirror->key_cur = mrec->leaf.base; 6454c038e17SMatthew Dillon cursor->key_beg = mrec->leaf.base; 6464c038e17SMatthew Dillon cursor->flags |= HAMMER_CURSOR_BACKEND; 6474c038e17SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_INSERT; 6484c038e17SMatthew Dillon error = hammer_btree_lookup(cursor); 649e469566bSMatthew Dillon if (error == 0) { 650e469566bSMatthew Dillon if (hammer_mirror_check(cursor, mrec)) 651e469566bSMatthew Dillon error = hammer_mirror_update(cursor, mrec); 6524c038e17SMatthew Dillon cursor->flags |= HAMMER_CURSOR_ATEDISK; 653e469566bSMatthew Dillon } else { 6544c038e17SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_ATEDISK; 655e469566bSMatthew Dillon } 6564c038e17SMatthew Dillon if (error == ENOENT) 6574c038e17SMatthew Dillon error = 0; 6584c038e17SMatthew Dillon } 6594c038e17SMatthew Dillon return(error); 660c82af904SMatthew Dillon } 661adf01747SMatthew Dillon 6624c038e17SMatthew Dillon /* 6634c038e17SMatthew Dillon * As part of the mirror write we iterate across swaths of records 6644c038e17SMatthew Dillon * on the target which no longer exist on the source, and mark them 6654c038e17SMatthew Dillon * deleted. 666842e7a70SMatthew Dillon * 667842e7a70SMatthew Dillon * The caller has indexed the cursor and set up key_end. We iterate 668842e7a70SMatthew Dillon * through to key_end. 6694c038e17SMatthew Dillon */ 6704c038e17SMatthew Dillon static 6714c038e17SMatthew Dillon int 672842e7a70SMatthew Dillon hammer_mirror_delete_to(hammer_cursor_t cursor, 6734c038e17SMatthew Dillon struct hammer_ioc_mirror_rw *mirror) 6744c038e17SMatthew Dillon { 675842e7a70SMatthew Dillon hammer_btree_leaf_elm_t elm; 67698da6d8cSMatthew Dillon int error; 67798da6d8cSMatthew Dillon 678842e7a70SMatthew Dillon error = hammer_btree_iterate(cursor); 679842e7a70SMatthew Dillon while (error == 0) { 680842e7a70SMatthew Dillon elm = &cursor->node->ondisk->elms[cursor->index].leaf; 681842e7a70SMatthew Dillon KKASSERT(elm->base.btype == HAMMER_BTREE_TYPE_RECORD); 6824889cbd4SMatthew Dillon cursor->flags |= HAMMER_CURSOR_ATEDISK; 683842e7a70SMatthew Dillon if (elm->base.delete_tid == 0) { 684842e7a70SMatthew Dillon error = hammer_delete_at_cursor(cursor, 685842e7a70SMatthew Dillon HAMMER_DELETE_ADJUST, 686842e7a70SMatthew Dillon mirror->tid_end, 687842e7a70SMatthew Dillon time_second, 688842e7a70SMatthew Dillon 1, NULL); 689842e7a70SMatthew Dillon } 690842e7a70SMatthew Dillon if (error == 0) 691842e7a70SMatthew Dillon error = hammer_btree_iterate(cursor); 692842e7a70SMatthew Dillon } 693842e7a70SMatthew Dillon if (error == ENOENT) 694842e7a70SMatthew Dillon error = 0; 695842e7a70SMatthew Dillon return(error); 696c82af904SMatthew Dillon } 697c82af904SMatthew Dillon 698c82af904SMatthew Dillon /* 699c82af904SMatthew Dillon * Check whether an update is needed in the case where a match already 700c82af904SMatthew Dillon * exists on the target. The only type of update allowed in this case 701c82af904SMatthew Dillon * is an update of the delete_tid. 702c82af904SMatthew Dillon * 703c82af904SMatthew Dillon * Return non-zero if the update should proceed. 704c82af904SMatthew Dillon */ 705c82af904SMatthew Dillon static 706c82af904SMatthew Dillon int 7074c038e17SMatthew Dillon hammer_mirror_check(hammer_cursor_t cursor, struct hammer_ioc_mrecord_rec *mrec) 708c82af904SMatthew Dillon { 709c82af904SMatthew Dillon hammer_btree_leaf_elm_t leaf = cursor->leaf; 710c82af904SMatthew Dillon 711c82af904SMatthew Dillon if (leaf->base.delete_tid != mrec->leaf.base.delete_tid) { 712ea434b6fSMatthew Dillon if (mrec->leaf.base.delete_tid != 0) 713c82af904SMatthew Dillon return(1); 714c82af904SMatthew Dillon } 715c82af904SMatthew Dillon return(0); 716c82af904SMatthew Dillon } 717c82af904SMatthew Dillon 718c82af904SMatthew Dillon /* 719842e7a70SMatthew Dillon * Update a record in-place. Only the delete_tid can change, and 720842e7a70SMatthew Dillon * only from zero to non-zero. 721c82af904SMatthew Dillon */ 722c82af904SMatthew Dillon static 723c82af904SMatthew Dillon int 7244c038e17SMatthew Dillon hammer_mirror_update(hammer_cursor_t cursor, 7254c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec) 726c82af904SMatthew Dillon { 72798da6d8cSMatthew Dillon int error; 72898da6d8cSMatthew Dillon 729842e7a70SMatthew Dillon /* 730842e7a70SMatthew Dillon * This case shouldn't occur. 731842e7a70SMatthew Dillon */ 732842e7a70SMatthew Dillon if (mrec->leaf.base.delete_tid == 0) 73306ad81ffSMatthew Dillon return(0); 734adf01747SMatthew Dillon 735adf01747SMatthew Dillon /* 736842e7a70SMatthew Dillon * Mark the record deleted on the mirror target. 7374c038e17SMatthew Dillon */ 738842e7a70SMatthew Dillon error = hammer_delete_at_cursor(cursor, HAMMER_DELETE_ADJUST, 739842e7a70SMatthew Dillon mrec->leaf.base.delete_tid, 740842e7a70SMatthew Dillon mrec->leaf.delete_ts, 741842e7a70SMatthew Dillon 1, NULL); 7424c038e17SMatthew Dillon cursor->flags |= HAMMER_CURSOR_ATEDISK; 743842e7a70SMatthew Dillon return(error); 744c82af904SMatthew Dillon } 745c82af904SMatthew Dillon 746c82af904SMatthew Dillon /* 747c82af904SMatthew Dillon * Write out a new record. 748c82af904SMatthew Dillon */ 749c82af904SMatthew Dillon static 750c82af904SMatthew Dillon int 7514c038e17SMatthew Dillon hammer_mirror_write(hammer_cursor_t cursor, 7524c038e17SMatthew Dillon struct hammer_ioc_mrecord_rec *mrec, 7534c038e17SMatthew Dillon char *udata) 754c82af904SMatthew Dillon { 755adf01747SMatthew Dillon hammer_transaction_t trans; 756adf01747SMatthew Dillon hammer_buffer_t data_buffer; 757c82af904SMatthew Dillon hammer_off_t ndata_offset; 758a56cb012SMatthew Dillon hammer_tid_t high_tid; 759c82af904SMatthew Dillon void *ndata; 760c82af904SMatthew Dillon int error; 761602c6cb8SMatthew Dillon int doprop; 762c82af904SMatthew Dillon 763adf01747SMatthew Dillon trans = cursor->trans; 764adf01747SMatthew Dillon data_buffer = NULL; 765adf01747SMatthew Dillon 766adf01747SMatthew Dillon /* 76798da6d8cSMatthew Dillon * Get the sync lock so the whole mess is atomic 76898da6d8cSMatthew Dillon */ 76998da6d8cSMatthew Dillon hammer_sync_lock_sh(trans); 77098da6d8cSMatthew Dillon 77198da6d8cSMatthew Dillon /* 772adf01747SMatthew Dillon * Allocate and adjust data 773adf01747SMatthew Dillon */ 774c82af904SMatthew Dillon if (mrec->leaf.data_len && mrec->leaf.data_offset) { 775adf01747SMatthew Dillon ndata = hammer_alloc_data(trans, mrec->leaf.data_len, 776c82af904SMatthew Dillon mrec->leaf.base.rec_type, 777c82af904SMatthew Dillon &ndata_offset, &data_buffer, &error); 778c82af904SMatthew Dillon if (ndata == NULL) 779c82af904SMatthew Dillon return(error); 780c82af904SMatthew Dillon mrec->leaf.data_offset = ndata_offset; 781adf01747SMatthew Dillon hammer_modify_buffer(trans, data_buffer, NULL, 0); 782c82af904SMatthew Dillon error = copyin(udata, ndata, mrec->leaf.data_len); 783c82af904SMatthew Dillon if (error == 0) { 784c82af904SMatthew Dillon if (hammer_crc_test_leaf(ndata, &mrec->leaf) == 0) { 785c82af904SMatthew Dillon kprintf("data crc mismatch on pipe\n"); 786c82af904SMatthew Dillon error = EINVAL; 787c82af904SMatthew Dillon } else { 788c82af904SMatthew Dillon error = hammer_mirror_localize_data( 789c82af904SMatthew Dillon ndata, &mrec->leaf); 790c82af904SMatthew Dillon } 791c82af904SMatthew Dillon } 792c82af904SMatthew Dillon hammer_modify_buffer_done(data_buffer); 793c82af904SMatthew Dillon } else { 794c82af904SMatthew Dillon mrec->leaf.data_offset = 0; 795c82af904SMatthew Dillon error = 0; 796c82af904SMatthew Dillon ndata = NULL; 797c82af904SMatthew Dillon } 798c82af904SMatthew Dillon if (error) 799c82af904SMatthew Dillon goto failed; 800adf01747SMatthew Dillon 801adf01747SMatthew Dillon /* 8024c038e17SMatthew Dillon * Do the insertion. This can fail with a EDEADLK or EALREADY 803adf01747SMatthew Dillon */ 804c82af904SMatthew Dillon cursor->flags |= HAMMER_CURSOR_INSERT; 805c82af904SMatthew Dillon error = hammer_btree_lookup(cursor); 806c82af904SMatthew Dillon if (error != ENOENT) { 807c82af904SMatthew Dillon if (error == 0) 808c82af904SMatthew Dillon error = EALREADY; 809c82af904SMatthew Dillon goto failed; 810c82af904SMatthew Dillon } 811c82af904SMatthew Dillon 812602c6cb8SMatthew Dillon error = hammer_btree_insert(cursor, &mrec->leaf, &doprop); 813adf01747SMatthew Dillon 814adf01747SMatthew Dillon /* 8154c038e17SMatthew Dillon * Cursor is left on the current element, we want to skip it now. 8164c038e17SMatthew Dillon */ 8174c038e17SMatthew Dillon cursor->flags |= HAMMER_CURSOR_ATEDISK; 8184c038e17SMatthew Dillon cursor->flags &= ~HAMMER_CURSOR_INSERT; 8194c038e17SMatthew Dillon 8204c038e17SMatthew Dillon /* 821adf01747SMatthew Dillon * Track a count of active inodes. 822adf01747SMatthew Dillon */ 823842e7a70SMatthew Dillon if (error == 0 && 824842e7a70SMatthew Dillon mrec->leaf.base.rec_type == HAMMER_RECTYPE_INODE && 825842e7a70SMatthew Dillon mrec->leaf.base.delete_tid == 0) { 826adf01747SMatthew Dillon hammer_modify_volume_field(trans, 827adf01747SMatthew Dillon trans->rootvol, 828adf01747SMatthew Dillon vol0_stat_inodes); 829adf01747SMatthew Dillon ++trans->hmp->rootvol->ondisk->vol0_stat_inodes; 830adf01747SMatthew Dillon hammer_modify_volume_done(trans->rootvol); 831adf01747SMatthew Dillon } 832a56cb012SMatthew Dillon 833a56cb012SMatthew Dillon /* 834a56cb012SMatthew Dillon * vol0_next_tid must track the highest TID stored in the filesystem. 835a56cb012SMatthew Dillon * We do not need to generate undo for this update. 836a56cb012SMatthew Dillon */ 837a56cb012SMatthew Dillon high_tid = mrec->leaf.base.create_tid; 838a56cb012SMatthew Dillon if (high_tid < mrec->leaf.base.delete_tid) 839a56cb012SMatthew Dillon high_tid = mrec->leaf.base.delete_tid; 840a56cb012SMatthew Dillon if (trans->rootvol->ondisk->vol0_next_tid < high_tid) { 841a56cb012SMatthew Dillon hammer_modify_volume(trans, trans->rootvol, NULL, 0); 842a56cb012SMatthew Dillon trans->rootvol->ondisk->vol0_next_tid = high_tid; 843a56cb012SMatthew Dillon hammer_modify_volume_done(trans->rootvol); 844a56cb012SMatthew Dillon } 845a56cb012SMatthew Dillon 846602c6cb8SMatthew Dillon if (error == 0 && doprop) 8474c038e17SMatthew Dillon hammer_btree_do_propagation(cursor, NULL, &mrec->leaf); 848c82af904SMatthew Dillon 849c82af904SMatthew Dillon failed: 850c82af904SMatthew Dillon /* 851c82af904SMatthew Dillon * Cleanup 852c82af904SMatthew Dillon */ 853c82af904SMatthew Dillon if (error && mrec->leaf.data_offset) { 854c82af904SMatthew Dillon hammer_blockmap_free(cursor->trans, 855c82af904SMatthew Dillon mrec->leaf.data_offset, 856c82af904SMatthew Dillon mrec->leaf.data_len); 857c82af904SMatthew Dillon } 85898da6d8cSMatthew Dillon hammer_sync_unlock(trans); 859c82af904SMatthew Dillon if (data_buffer) 860c82af904SMatthew Dillon hammer_rel_buffer(data_buffer, 0); 861c82af904SMatthew Dillon return(error); 862c82af904SMatthew Dillon } 863c82af904SMatthew Dillon 864c82af904SMatthew Dillon /* 865c82af904SMatthew Dillon * Localize the data payload. Directory entries may need their 866c82af904SMatthew Dillon * localization adjusted. 867c82af904SMatthew Dillon * 868adf01747SMatthew Dillon * PFS directory entries must be skipped entirely (return EALREADY). 869c82af904SMatthew Dillon */ 870c82af904SMatthew Dillon static 871c82af904SMatthew Dillon int 872c82af904SMatthew Dillon hammer_mirror_localize_data(hammer_data_ondisk_t data, 873c82af904SMatthew Dillon hammer_btree_leaf_elm_t leaf) 874c82af904SMatthew Dillon { 875c82af904SMatthew Dillon u_int32_t localization; 876c82af904SMatthew Dillon 877c82af904SMatthew Dillon if (leaf->base.rec_type == HAMMER_RECTYPE_DIRENTRY) { 878adf01747SMatthew Dillon if (data->entry.obj_id == HAMMER_OBJID_ROOT) 879adf01747SMatthew Dillon return(EALREADY); 880c82af904SMatthew Dillon localization = leaf->base.localization & 881c82af904SMatthew Dillon HAMMER_LOCALIZE_PSEUDOFS_MASK; 882c82af904SMatthew Dillon if (data->entry.localization != localization) { 883c82af904SMatthew Dillon data->entry.localization = localization; 884c82af904SMatthew Dillon hammer_crc_set_leaf(data, leaf); 885adf01747SMatthew Dillon } 886adf01747SMatthew Dillon } 887adf01747SMatthew Dillon return(0); 888c82af904SMatthew Dillon } 889c82af904SMatthew Dillon 890