15f59596dSMatthew Dillon /* 268b321c1SMatthew Dillon * Copyright (c) 2015-2018 The DragonFly Project. All rights reserved. 35f59596dSMatthew Dillon * 45f59596dSMatthew Dillon * This code is derived from software contributed to The DragonFly Project 55f59596dSMatthew Dillon * by Matthew Dillon <dillon@dragonflybsd.org> 65f59596dSMatthew Dillon * 75f59596dSMatthew Dillon * Redistribution and use in source and binary forms, with or without 85f59596dSMatthew Dillon * modification, are permitted provided that the following conditions 95f59596dSMatthew Dillon * are met: 105f59596dSMatthew Dillon * 115f59596dSMatthew Dillon * 1. Redistributions of source code must retain the above copyright 125f59596dSMatthew Dillon * notice, this list of conditions and the following disclaimer. 135f59596dSMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 145f59596dSMatthew Dillon * notice, this list of conditions and the following disclaimer in 155f59596dSMatthew Dillon * the documentation and/or other materials provided with the 165f59596dSMatthew Dillon * distribution. 175f59596dSMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 185f59596dSMatthew Dillon * contributors may be used to endorse or promote products derived 195f59596dSMatthew Dillon * from this software without specific, prior written permission. 205f59596dSMatthew Dillon * 215f59596dSMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 225f59596dSMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 235f59596dSMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 245f59596dSMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 255f59596dSMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 265f59596dSMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 275f59596dSMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 285f59596dSMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 295f59596dSMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 305f59596dSMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 315f59596dSMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 325f59596dSMatthew Dillon * SUCH DAMAGE. 335f59596dSMatthew Dillon */ 345f59596dSMatthew Dillon /* 355f59596dSMatthew Dillon * This module implements the hammer2 helper thread API, including 365f59596dSMatthew Dillon * the frontend/backend XOP API. 375f59596dSMatthew Dillon */ 385f59596dSMatthew Dillon #include "hammer2.h" 395f59596dSMatthew Dillon 40c4421f07SMatthew Dillon #define H2XOPDESCRIPTOR(label) \ 41ab8c8a18SMatthew Dillon hammer2_xop_desc_t hammer2_##label##_desc = { \ 42c4421f07SMatthew Dillon .storage_func = hammer2_xop_##label, \ 43c4421f07SMatthew Dillon .id = #label \ 447a6ccfa3STomohiro Kusumi } 45c4421f07SMatthew Dillon 46c4421f07SMatthew Dillon H2XOPDESCRIPTOR(ipcluster); 47c4421f07SMatthew Dillon H2XOPDESCRIPTOR(readdir); 48c4421f07SMatthew Dillon H2XOPDESCRIPTOR(nresolve); 49c4421f07SMatthew Dillon H2XOPDESCRIPTOR(unlink); 50c4421f07SMatthew Dillon H2XOPDESCRIPTOR(nrename); 51c4421f07SMatthew Dillon H2XOPDESCRIPTOR(scanlhc); 52c4421f07SMatthew Dillon H2XOPDESCRIPTOR(scanall); 53c4421f07SMatthew Dillon H2XOPDESCRIPTOR(lookup); 54c4421f07SMatthew Dillon H2XOPDESCRIPTOR(delete); 55c4421f07SMatthew Dillon H2XOPDESCRIPTOR(inode_mkdirent); 56c4421f07SMatthew Dillon H2XOPDESCRIPTOR(inode_create); 57ecfe89b8SMatthew Dillon H2XOPDESCRIPTOR(inode_create_det); 58ecfe89b8SMatthew Dillon H2XOPDESCRIPTOR(inode_create_ins); 59c4421f07SMatthew Dillon H2XOPDESCRIPTOR(inode_destroy); 60c4421f07SMatthew Dillon H2XOPDESCRIPTOR(inode_chain_sync); 61c4421f07SMatthew Dillon H2XOPDESCRIPTOR(inode_unlinkall); 62c4421f07SMatthew Dillon H2XOPDESCRIPTOR(inode_connect); 63c4421f07SMatthew Dillon H2XOPDESCRIPTOR(inode_flush); 64c4421f07SMatthew Dillon H2XOPDESCRIPTOR(strategy_read); 65c4421f07SMatthew Dillon H2XOPDESCRIPTOR(strategy_write); 66c4421f07SMatthew Dillon 675f59596dSMatthew Dillon /* 689dca9515SMatthew Dillon * Set flags and wakeup any waiters. 690d66a712SMatthew Dillon * 700d66a712SMatthew Dillon * WARNING! During teardown (thr) can disappear the instant our cmpset 710d66a712SMatthew Dillon * succeeds. 725f59596dSMatthew Dillon */ 735f59596dSMatthew Dillon void 749dca9515SMatthew Dillon hammer2_thr_signal(hammer2_thread_t *thr, uint32_t flags) 755f59596dSMatthew Dillon { 765f59596dSMatthew Dillon uint32_t oflags; 775f59596dSMatthew Dillon uint32_t nflags; 785f59596dSMatthew Dillon 795f59596dSMatthew Dillon for (;;) { 805f59596dSMatthew Dillon oflags = thr->flags; 815f59596dSMatthew Dillon cpu_ccfence(); 829dca9515SMatthew Dillon nflags = (oflags | flags) & ~HAMMER2_THREAD_WAITING; 835f59596dSMatthew Dillon 849dca9515SMatthew Dillon if (oflags & HAMMER2_THREAD_WAITING) { 855f59596dSMatthew Dillon if (atomic_cmpset_int(&thr->flags, oflags, nflags)) { 8605a3c4ecSMatthew Dillon wakeup(&thr->flags); 875f59596dSMatthew Dillon break; 885f59596dSMatthew Dillon } 895f59596dSMatthew Dillon } else { 905f59596dSMatthew Dillon if (atomic_cmpset_int(&thr->flags, oflags, nflags)) 915f59596dSMatthew Dillon break; 925f59596dSMatthew Dillon } 935f59596dSMatthew Dillon } 945f59596dSMatthew Dillon } 955f59596dSMatthew Dillon 965f59596dSMatthew Dillon /* 979dca9515SMatthew Dillon * Set and clear flags and wakeup any waiters. 989dca9515SMatthew Dillon * 999dca9515SMatthew Dillon * WARNING! During teardown (thr) can disappear the instant our cmpset 1009dca9515SMatthew Dillon * succeeds. 1019dca9515SMatthew Dillon */ 1029dca9515SMatthew Dillon void 1039dca9515SMatthew Dillon hammer2_thr_signal2(hammer2_thread_t *thr, uint32_t posflags, uint32_t negflags) 1049dca9515SMatthew Dillon { 1059dca9515SMatthew Dillon uint32_t oflags; 1069dca9515SMatthew Dillon uint32_t nflags; 1079dca9515SMatthew Dillon 1089dca9515SMatthew Dillon for (;;) { 1099dca9515SMatthew Dillon oflags = thr->flags; 1109dca9515SMatthew Dillon cpu_ccfence(); 1119dca9515SMatthew Dillon nflags = (oflags | posflags) & 1129dca9515SMatthew Dillon ~(negflags | HAMMER2_THREAD_WAITING); 1139dca9515SMatthew Dillon if (oflags & HAMMER2_THREAD_WAITING) { 1149dca9515SMatthew Dillon if (atomic_cmpset_int(&thr->flags, oflags, nflags)) { 1159dca9515SMatthew Dillon wakeup(&thr->flags); 1169dca9515SMatthew Dillon break; 1179dca9515SMatthew Dillon } 1189dca9515SMatthew Dillon } else { 1199dca9515SMatthew Dillon if (atomic_cmpset_int(&thr->flags, oflags, nflags)) 1209dca9515SMatthew Dillon break; 1219dca9515SMatthew Dillon } 1229dca9515SMatthew Dillon } 1239dca9515SMatthew Dillon } 1249dca9515SMatthew Dillon 1259dca9515SMatthew Dillon /* 1269dca9515SMatthew Dillon * Wait until all the bits in flags are set. 1270d66a712SMatthew Dillon * 1280d66a712SMatthew Dillon * WARNING! During teardown (thr) can disappear the instant our cmpset 1290d66a712SMatthew Dillon * succeeds. 1305f59596dSMatthew Dillon */ 1315f59596dSMatthew Dillon void 1325f59596dSMatthew Dillon hammer2_thr_wait(hammer2_thread_t *thr, uint32_t flags) 1335f59596dSMatthew Dillon { 1345f59596dSMatthew Dillon uint32_t oflags; 1355f59596dSMatthew Dillon uint32_t nflags; 1365f59596dSMatthew Dillon 1375f59596dSMatthew Dillon for (;;) { 1385f59596dSMatthew Dillon oflags = thr->flags; 1395f59596dSMatthew Dillon cpu_ccfence(); 1405f59596dSMatthew Dillon if ((oflags & flags) == flags) 1415f59596dSMatthew Dillon break; 1429dca9515SMatthew Dillon nflags = oflags | HAMMER2_THREAD_WAITING; 14305a3c4ecSMatthew Dillon tsleep_interlock(&thr->flags, 0); 1445f59596dSMatthew Dillon if (atomic_cmpset_int(&thr->flags, oflags, nflags)) { 14505a3c4ecSMatthew Dillon tsleep(&thr->flags, PINTERLOCKED, "h2twait", hz*60); 1465f59596dSMatthew Dillon } 1475f59596dSMatthew Dillon } 1485f59596dSMatthew Dillon } 1495f59596dSMatthew Dillon 1505f59596dSMatthew Dillon /* 1519dca9515SMatthew Dillon * Wait until any of the bits in flags are set, with timeout. 1529dca9515SMatthew Dillon * 1539dca9515SMatthew Dillon * WARNING! During teardown (thr) can disappear the instant our cmpset 1549dca9515SMatthew Dillon * succeeds. 1559dca9515SMatthew Dillon */ 1569dca9515SMatthew Dillon int 1579dca9515SMatthew Dillon hammer2_thr_wait_any(hammer2_thread_t *thr, uint32_t flags, int timo) 1589dca9515SMatthew Dillon { 1599dca9515SMatthew Dillon uint32_t oflags; 1609dca9515SMatthew Dillon uint32_t nflags; 1619dca9515SMatthew Dillon int error; 1629dca9515SMatthew Dillon 1639dca9515SMatthew Dillon error = 0; 1649dca9515SMatthew Dillon for (;;) { 1659dca9515SMatthew Dillon oflags = thr->flags; 1669dca9515SMatthew Dillon cpu_ccfence(); 1679dca9515SMatthew Dillon if (oflags & flags) 1689dca9515SMatthew Dillon break; 1699dca9515SMatthew Dillon nflags = oflags | HAMMER2_THREAD_WAITING; 1709dca9515SMatthew Dillon tsleep_interlock(&thr->flags, 0); 1719dca9515SMatthew Dillon if (atomic_cmpset_int(&thr->flags, oflags, nflags)) { 1729dca9515SMatthew Dillon error = tsleep(&thr->flags, PINTERLOCKED, 1739dca9515SMatthew Dillon "h2twait", timo); 1749dca9515SMatthew Dillon } 17565cacacfSMatthew Dillon if (error == ETIMEDOUT) { 17665cacacfSMatthew Dillon error = HAMMER2_ERROR_ETIMEDOUT; 1779dca9515SMatthew Dillon break; 1789dca9515SMatthew Dillon } 17965cacacfSMatthew Dillon } 1809dca9515SMatthew Dillon return error; 1819dca9515SMatthew Dillon } 1829dca9515SMatthew Dillon 1839dca9515SMatthew Dillon /* 1845f59596dSMatthew Dillon * Wait until the bits in flags are clear. 1850d66a712SMatthew Dillon * 1860d66a712SMatthew Dillon * WARNING! During teardown (thr) can disappear the instant our cmpset 1870d66a712SMatthew Dillon * succeeds. 1885f59596dSMatthew Dillon */ 1895f59596dSMatthew Dillon void 1905f59596dSMatthew Dillon hammer2_thr_wait_neg(hammer2_thread_t *thr, uint32_t flags) 1915f59596dSMatthew Dillon { 1925f59596dSMatthew Dillon uint32_t oflags; 1935f59596dSMatthew Dillon uint32_t nflags; 1945f59596dSMatthew Dillon 1955f59596dSMatthew Dillon for (;;) { 1965f59596dSMatthew Dillon oflags = thr->flags; 1975f59596dSMatthew Dillon cpu_ccfence(); 1985f59596dSMatthew Dillon if ((oflags & flags) == 0) 1995f59596dSMatthew Dillon break; 2009dca9515SMatthew Dillon nflags = oflags | HAMMER2_THREAD_WAITING; 20105a3c4ecSMatthew Dillon tsleep_interlock(&thr->flags, 0); 2025f59596dSMatthew Dillon if (atomic_cmpset_int(&thr->flags, oflags, nflags)) { 20305a3c4ecSMatthew Dillon tsleep(&thr->flags, PINTERLOCKED, "h2twait", hz*60); 2045f59596dSMatthew Dillon } 2055f59596dSMatthew Dillon } 2065f59596dSMatthew Dillon } 2075f59596dSMatthew Dillon 2085f59596dSMatthew Dillon /* 2095f59596dSMatthew Dillon * Initialize the supplied thread structure, starting the specified 2105f59596dSMatthew Dillon * thread. 21105a3c4ecSMatthew Dillon * 21205a3c4ecSMatthew Dillon * NOTE: thr structure can be retained across mounts and unmounts for this 21305a3c4ecSMatthew Dillon * pmp, so make sure the flags are in a sane state. 2145f59596dSMatthew Dillon */ 2155f59596dSMatthew Dillon void 2165f59596dSMatthew Dillon hammer2_thr_create(hammer2_thread_t *thr, hammer2_pfs_t *pmp, 2179dca9515SMatthew Dillon hammer2_dev_t *hmp, 2185f59596dSMatthew Dillon const char *id, int clindex, int repidx, 2195f59596dSMatthew Dillon void (*func)(void *arg)) 2205f59596dSMatthew Dillon { 2219dca9515SMatthew Dillon thr->pmp = pmp; /* xop helpers */ 2229dca9515SMatthew Dillon thr->hmp = hmp; /* bulkfree */ 2235f59596dSMatthew Dillon thr->clindex = clindex; 2245f59596dSMatthew Dillon thr->repidx = repidx; 22520852157SMatthew Dillon TAILQ_INIT(&thr->xopq); 22605a3c4ecSMatthew Dillon atomic_clear_int(&thr->flags, HAMMER2_THREAD_STOP | 22705a3c4ecSMatthew Dillon HAMMER2_THREAD_STOPPED | 22805a3c4ecSMatthew Dillon HAMMER2_THREAD_FREEZE | 22905a3c4ecSMatthew Dillon HAMMER2_THREAD_FROZEN); 23005a3c4ecSMatthew Dillon if (thr->scratch == NULL) 23105a3c4ecSMatthew Dillon thr->scratch = kmalloc(MAXPHYS, M_HAMMER2, M_WAITOK | M_ZERO); 2325f59596dSMatthew Dillon if (repidx >= 0) { 2335f59596dSMatthew Dillon lwkt_create(func, thr, &thr->td, NULL, 0, repidx % ncpus, 2345f59596dSMatthew Dillon "%s-%s.%02d", id, pmp->pfs_names[clindex], repidx); 2359dca9515SMatthew Dillon } else if (pmp) { 2365f59596dSMatthew Dillon lwkt_create(func, thr, &thr->td, NULL, 0, -1, 2375f59596dSMatthew Dillon "%s-%s", id, pmp->pfs_names[clindex]); 2389dca9515SMatthew Dillon } else { 2399dca9515SMatthew Dillon lwkt_create(func, thr, &thr->td, NULL, 0, -1, "%s", id); 2405f59596dSMatthew Dillon } 2415f59596dSMatthew Dillon } 2425f59596dSMatthew Dillon 2435f59596dSMatthew Dillon /* 2445f59596dSMatthew Dillon * Terminate a thread. This function will silently return if the thread 2455f59596dSMatthew Dillon * was never initialized or has already been deleted. 2465f59596dSMatthew Dillon * 2475f59596dSMatthew Dillon * This is accomplished by setting the STOP flag and waiting for the td 2485f59596dSMatthew Dillon * structure to become NULL. 2495f59596dSMatthew Dillon */ 2505f59596dSMatthew Dillon void 2515f59596dSMatthew Dillon hammer2_thr_delete(hammer2_thread_t *thr) 2525f59596dSMatthew Dillon { 2535f59596dSMatthew Dillon if (thr->td == NULL) 2545f59596dSMatthew Dillon return; 2555f59596dSMatthew Dillon hammer2_thr_signal(thr, HAMMER2_THREAD_STOP); 2565f59596dSMatthew Dillon hammer2_thr_wait(thr, HAMMER2_THREAD_STOPPED); 2575f59596dSMatthew Dillon thr->pmp = NULL; 25805a3c4ecSMatthew Dillon if (thr->scratch) { 25905a3c4ecSMatthew Dillon kfree(thr->scratch, M_HAMMER2); 26005a3c4ecSMatthew Dillon thr->scratch = NULL; 26105a3c4ecSMatthew Dillon } 26220852157SMatthew Dillon KKASSERT(TAILQ_EMPTY(&thr->xopq)); 2635f59596dSMatthew Dillon } 2645f59596dSMatthew Dillon 2655f59596dSMatthew Dillon /* 2665f59596dSMatthew Dillon * Asynchronous remaster request. Ask the synchronization thread to 2675f59596dSMatthew Dillon * start over soon (as if it were frozen and unfrozen, but without waiting). 2685f59596dSMatthew Dillon * The thread always recalculates mastership relationships when restarting. 2695f59596dSMatthew Dillon */ 2705f59596dSMatthew Dillon void 2715f59596dSMatthew Dillon hammer2_thr_remaster(hammer2_thread_t *thr) 2725f59596dSMatthew Dillon { 2735f59596dSMatthew Dillon if (thr->td == NULL) 2745f59596dSMatthew Dillon return; 2755f59596dSMatthew Dillon hammer2_thr_signal(thr, HAMMER2_THREAD_REMASTER); 2765f59596dSMatthew Dillon } 2775f59596dSMatthew Dillon 2785f59596dSMatthew Dillon void 2795f59596dSMatthew Dillon hammer2_thr_freeze_async(hammer2_thread_t *thr) 2805f59596dSMatthew Dillon { 2815f59596dSMatthew Dillon hammer2_thr_signal(thr, HAMMER2_THREAD_FREEZE); 2825f59596dSMatthew Dillon } 2835f59596dSMatthew Dillon 2845f59596dSMatthew Dillon void 2855f59596dSMatthew Dillon hammer2_thr_freeze(hammer2_thread_t *thr) 2865f59596dSMatthew Dillon { 2875f59596dSMatthew Dillon if (thr->td == NULL) 2885f59596dSMatthew Dillon return; 2895f59596dSMatthew Dillon hammer2_thr_signal(thr, HAMMER2_THREAD_FREEZE); 2905f59596dSMatthew Dillon hammer2_thr_wait(thr, HAMMER2_THREAD_FROZEN); 2915f59596dSMatthew Dillon } 2925f59596dSMatthew Dillon 2935f59596dSMatthew Dillon void 2945f59596dSMatthew Dillon hammer2_thr_unfreeze(hammer2_thread_t *thr) 2955f59596dSMatthew Dillon { 2965f59596dSMatthew Dillon if (thr->td == NULL) 2975f59596dSMatthew Dillon return; 2985f59596dSMatthew Dillon hammer2_thr_signal(thr, HAMMER2_THREAD_UNFREEZE); 2995f59596dSMatthew Dillon hammer2_thr_wait_neg(thr, HAMMER2_THREAD_FROZEN); 3005f59596dSMatthew Dillon } 3015f59596dSMatthew Dillon 3025f59596dSMatthew Dillon int 3035f59596dSMatthew Dillon hammer2_thr_break(hammer2_thread_t *thr) 3045f59596dSMatthew Dillon { 3055f59596dSMatthew Dillon if (thr->flags & (HAMMER2_THREAD_STOP | 3065f59596dSMatthew Dillon HAMMER2_THREAD_REMASTER | 3075f59596dSMatthew Dillon HAMMER2_THREAD_FREEZE)) { 3085f59596dSMatthew Dillon return 1; 3095f59596dSMatthew Dillon } 3105f59596dSMatthew Dillon return 0; 3115f59596dSMatthew Dillon } 3125f59596dSMatthew Dillon 3135f59596dSMatthew Dillon /**************************************************************************** 3145f59596dSMatthew Dillon * HAMMER2 XOPS API * 3155f59596dSMatthew Dillon ****************************************************************************/ 3165f59596dSMatthew Dillon 3175f59596dSMatthew Dillon /* 3185f59596dSMatthew Dillon * Allocate a XOP request. 3195f59596dSMatthew Dillon * 3205f59596dSMatthew Dillon * Once allocated a XOP request can be started, collected, and retired, 3215f59596dSMatthew Dillon * and can be retired early if desired. 3225f59596dSMatthew Dillon * 3235f59596dSMatthew Dillon * NOTE: Fifo indices might not be zero but ri == wi on objcache_get(). 3245f59596dSMatthew Dillon */ 3255f59596dSMatthew Dillon void * 3265f59596dSMatthew Dillon hammer2_xop_alloc(hammer2_inode_t *ip, int flags) 3275f59596dSMatthew Dillon { 3285f59596dSMatthew Dillon hammer2_xop_t *xop; 3295f59596dSMatthew Dillon 3305f59596dSMatthew Dillon xop = objcache_get(cache_xops, M_WAITOK); 3315f59596dSMatthew Dillon KKASSERT(xop->head.cluster.array[0].chain == NULL); 3325f59596dSMatthew Dillon 3335f59596dSMatthew Dillon xop->head.ip1 = ip; 334c4421f07SMatthew Dillon xop->head.desc = NULL; 3355f59596dSMatthew Dillon xop->head.flags = flags; 3365f59596dSMatthew Dillon xop->head.state = 0; 3375f59596dSMatthew Dillon xop->head.error = 0; 3385f59596dSMatthew Dillon xop->head.collect_key = 0; 339fda30e02SMatthew Dillon xop->head.focus_dio = NULL; 340fda30e02SMatthew Dillon 3415f59596dSMatthew Dillon if (flags & HAMMER2_XOP_MODIFYING) 3425f59596dSMatthew Dillon xop->head.mtid = hammer2_trans_sub(ip->pmp); 3435f59596dSMatthew Dillon else 3445f59596dSMatthew Dillon xop->head.mtid = 0; 3455f59596dSMatthew Dillon 3465f59596dSMatthew Dillon xop->head.cluster.nchains = ip->cluster.nchains; 3475f59596dSMatthew Dillon xop->head.cluster.pmp = ip->pmp; 3485f59596dSMatthew Dillon xop->head.cluster.flags = HAMMER2_CLUSTER_LOCKED; 3495f59596dSMatthew Dillon 3505f59596dSMatthew Dillon /* 3515f59596dSMatthew Dillon * run_mask - Active thread (or frontend) associated with XOP 3525f59596dSMatthew Dillon */ 3535f59596dSMatthew Dillon xop->head.run_mask = HAMMER2_XOPMASK_VOP; 3545f59596dSMatthew Dillon 3555f59596dSMatthew Dillon hammer2_inode_ref(ip); 3565f59596dSMatthew Dillon 3575f59596dSMatthew Dillon return xop; 3585f59596dSMatthew Dillon } 3595f59596dSMatthew Dillon 3605f59596dSMatthew Dillon void 3615f59596dSMatthew Dillon hammer2_xop_setname(hammer2_xop_head_t *xop, const char *name, size_t name_len) 3625f59596dSMatthew Dillon { 3635f59596dSMatthew Dillon xop->name1 = kmalloc(name_len + 1, M_HAMMER2, M_WAITOK | M_ZERO); 3645f59596dSMatthew Dillon xop->name1_len = name_len; 3655f59596dSMatthew Dillon bcopy(name, xop->name1, name_len); 3665f59596dSMatthew Dillon } 3675f59596dSMatthew Dillon 3685f59596dSMatthew Dillon void 3695f59596dSMatthew Dillon hammer2_xop_setname2(hammer2_xop_head_t *xop, const char *name, size_t name_len) 3705f59596dSMatthew Dillon { 3715f59596dSMatthew Dillon xop->name2 = kmalloc(name_len + 1, M_HAMMER2, M_WAITOK | M_ZERO); 3725f59596dSMatthew Dillon xop->name2_len = name_len; 3735f59596dSMatthew Dillon bcopy(name, xop->name2, name_len); 3745f59596dSMatthew Dillon } 3755f59596dSMatthew Dillon 376cf1b3fafSMatthew Dillon size_t 377cf1b3fafSMatthew Dillon hammer2_xop_setname_inum(hammer2_xop_head_t *xop, hammer2_key_t inum) 378cf1b3fafSMatthew Dillon { 379cf1b3fafSMatthew Dillon const size_t name_len = 18; 380cf1b3fafSMatthew Dillon 381cf1b3fafSMatthew Dillon xop->name1 = kmalloc(name_len + 1, M_HAMMER2, M_WAITOK | M_ZERO); 382cf1b3fafSMatthew Dillon xop->name1_len = name_len; 383cf1b3fafSMatthew Dillon ksnprintf(xop->name1, name_len + 1, "0x%016jx", (intmax_t)inum); 384cf1b3fafSMatthew Dillon 385cf1b3fafSMatthew Dillon return name_len; 386cf1b3fafSMatthew Dillon } 387cf1b3fafSMatthew Dillon 3885f59596dSMatthew Dillon 3895f59596dSMatthew Dillon void 3905f59596dSMatthew Dillon hammer2_xop_setip2(hammer2_xop_head_t *xop, hammer2_inode_t *ip2) 3915f59596dSMatthew Dillon { 3925f59596dSMatthew Dillon xop->ip2 = ip2; 3935f59596dSMatthew Dillon hammer2_inode_ref(ip2); 3945f59596dSMatthew Dillon } 3955f59596dSMatthew Dillon 3965f59596dSMatthew Dillon void 3975f59596dSMatthew Dillon hammer2_xop_setip3(hammer2_xop_head_t *xop, hammer2_inode_t *ip3) 3985f59596dSMatthew Dillon { 3995f59596dSMatthew Dillon xop->ip3 = ip3; 4005f59596dSMatthew Dillon hammer2_inode_ref(ip3); 4015f59596dSMatthew Dillon } 4025f59596dSMatthew Dillon 4035f59596dSMatthew Dillon void 404*38f19b3eSMatthew Dillon hammer2_xop_setip4(hammer2_xop_head_t *xop, hammer2_inode_t *ip4) 405*38f19b3eSMatthew Dillon { 406*38f19b3eSMatthew Dillon xop->ip4 = ip4; 407*38f19b3eSMatthew Dillon hammer2_inode_ref(ip4); 408*38f19b3eSMatthew Dillon } 409*38f19b3eSMatthew Dillon 410*38f19b3eSMatthew Dillon void 4115f59596dSMatthew Dillon hammer2_xop_reinit(hammer2_xop_head_t *xop) 4125f59596dSMatthew Dillon { 4135f59596dSMatthew Dillon xop->state = 0; 4145f59596dSMatthew Dillon xop->error = 0; 4155f59596dSMatthew Dillon xop->collect_key = 0; 4165f59596dSMatthew Dillon xop->run_mask = HAMMER2_XOPMASK_VOP; 4175f59596dSMatthew Dillon } 4185f59596dSMatthew Dillon 4195f59596dSMatthew Dillon /* 4205f59596dSMatthew Dillon * A mounted PFS needs Xops threads to support frontend operations. 4215f59596dSMatthew Dillon */ 4225f59596dSMatthew Dillon void 4235f59596dSMatthew Dillon hammer2_xop_helper_create(hammer2_pfs_t *pmp) 4245f59596dSMatthew Dillon { 4255f59596dSMatthew Dillon int i; 4265f59596dSMatthew Dillon int j; 4275f59596dSMatthew Dillon 4285f59596dSMatthew Dillon lockmgr(&pmp->lock, LK_EXCLUSIVE); 4295f59596dSMatthew Dillon pmp->has_xop_threads = 1; 4305f59596dSMatthew Dillon 4316b039a3dSMatthew Dillon pmp->xop_groups = kmalloc(hammer2_xopgroups * 4326b039a3dSMatthew Dillon sizeof(hammer2_xop_group_t), 4336b039a3dSMatthew Dillon M_HAMMER2, M_WAITOK | M_ZERO); 4345f59596dSMatthew Dillon for (i = 0; i < pmp->iroot->cluster.nchains; ++i) { 4356b039a3dSMatthew Dillon for (j = 0; j < hammer2_xopgroups; ++j) { 4365f59596dSMatthew Dillon if (pmp->xop_groups[j].thrs[i].td) 4375f59596dSMatthew Dillon continue; 4389dca9515SMatthew Dillon hammer2_thr_create(&pmp->xop_groups[j].thrs[i], 4399dca9515SMatthew Dillon pmp, NULL, 4405f59596dSMatthew Dillon "h2xop", i, j, 4415f59596dSMatthew Dillon hammer2_primary_xops_thread); 4425f59596dSMatthew Dillon } 4435f59596dSMatthew Dillon } 4445f59596dSMatthew Dillon lockmgr(&pmp->lock, LK_RELEASE); 4455f59596dSMatthew Dillon } 4465f59596dSMatthew Dillon 4475f59596dSMatthew Dillon void 4485f59596dSMatthew Dillon hammer2_xop_helper_cleanup(hammer2_pfs_t *pmp) 4495f59596dSMatthew Dillon { 4505f59596dSMatthew Dillon int i; 4515f59596dSMatthew Dillon int j; 4525f59596dSMatthew Dillon 4536b039a3dSMatthew Dillon if (pmp->xop_groups == NULL) { 4546b039a3dSMatthew Dillon KKASSERT(pmp->has_xop_threads == 0); 4556b039a3dSMatthew Dillon return; 4566b039a3dSMatthew Dillon } 4576b039a3dSMatthew Dillon 4585f59596dSMatthew Dillon for (i = 0; i < pmp->pfs_nmasters; ++i) { 4596b039a3dSMatthew Dillon for (j = 0; j < hammer2_xopgroups; ++j) { 4605f59596dSMatthew Dillon if (pmp->xop_groups[j].thrs[i].td) 4615f59596dSMatthew Dillon hammer2_thr_delete(&pmp->xop_groups[j].thrs[i]); 4625f59596dSMatthew Dillon } 4635f59596dSMatthew Dillon } 46405a3c4ecSMatthew Dillon pmp->has_xop_threads = 0; 4656b039a3dSMatthew Dillon kfree(pmp->xop_groups, M_HAMMER2); 4666b039a3dSMatthew Dillon pmp->xop_groups = NULL; 4675f59596dSMatthew Dillon } 4685f59596dSMatthew Dillon 4695f59596dSMatthew Dillon /* 4705f59596dSMatthew Dillon * Start a XOP request, queueing it to all nodes in the cluster to 4715f59596dSMatthew Dillon * execute the cluster op. 4725f59596dSMatthew Dillon * 4735f59596dSMatthew Dillon * XXX optimize single-target case. 4745f59596dSMatthew Dillon */ 4755f59596dSMatthew Dillon void 476c4421f07SMatthew Dillon hammer2_xop_start_except(hammer2_xop_head_t *xop, hammer2_xop_desc_t *desc, 4775f59596dSMatthew Dillon int notidx) 4785f59596dSMatthew Dillon { 4795f59596dSMatthew Dillon hammer2_inode_t *ip1; 4805f59596dSMatthew Dillon hammer2_pfs_t *pmp; 48120852157SMatthew Dillon hammer2_thread_t *thr; 4825f59596dSMatthew Dillon int i; 4835f59596dSMatthew Dillon int ng; 4845f59596dSMatthew Dillon int nchains; 4855f59596dSMatthew Dillon 4865f59596dSMatthew Dillon ip1 = xop->ip1; 4875f59596dSMatthew Dillon pmp = ip1->pmp; 4885f59596dSMatthew Dillon if (pmp->has_xop_threads == 0) 4895f59596dSMatthew Dillon hammer2_xop_helper_create(pmp); 4905f59596dSMatthew Dillon 49120852157SMatthew Dillon /* 49205a3c4ecSMatthew Dillon * The intent of the XOP sequencer is to ensure that ops on the same 49305a3c4ecSMatthew Dillon * inode execute in the same order. This is necessary when issuing 49405a3c4ecSMatthew Dillon * modifying operations to multiple targets because some targets might 49505a3c4ecSMatthew Dillon * get behind and the frontend is allowed to complete the moment a 49605a3c4ecSMatthew Dillon * quorum of targets succeed. 49720852157SMatthew Dillon * 4981159c75cSMatthew Dillon * Strategy operations: 4991159c75cSMatthew Dillon * 5001159c75cSMatthew Dillon * (1) Must be segregated from non-strategy operations to 5011159c75cSMatthew Dillon * avoid a deadlock. A vfsync and a bread/bwrite can 5021159c75cSMatthew Dillon * deadlock the vfsync's buffer list scan. 5031159c75cSMatthew Dillon * 5041159c75cSMatthew Dillon * (2) Reads are separated from writes to avoid write stalls 5051159c75cSMatthew Dillon * from excessively intefering with reads. Reads are allowed 5061159c75cSMatthew Dillon * to wander across multiple worker threads for potential 5071159c75cSMatthew Dillon * single-file concurrency improvements. 5081159c75cSMatthew Dillon * 5091159c75cSMatthew Dillon * (3) Writes are serialized to a single worker thread (for any 5101159c75cSMatthew Dillon * given inode) in order to try to improve block allocation 5111159c75cSMatthew Dillon * sequentiality and to reduce lock contention. 51220852157SMatthew Dillon * 51305a3c4ecSMatthew Dillon * TODO - RENAME fails here because it is potentially modifying 5141159c75cSMatthew Dillon * three different inodes, but we triple-lock the inodes 5151159c75cSMatthew Dillon * involved so it shouldn't create a sequencing schism. 51620852157SMatthew Dillon */ 51720852157SMatthew Dillon if (xop->flags & HAMMER2_XOP_STRATEGY) { 51820852157SMatthew Dillon hammer2_xop_strategy_t *xopst; 5196b039a3dSMatthew Dillon 5206b039a3dSMatthew Dillon xopst = &((hammer2_xop_t *)xop)->xop_strategy; 5216b039a3dSMatthew Dillon ng = mycpu->gd_cpuid % (hammer2_xopgroups >> 1); 5226b039a3dSMatthew Dillon #if 0 5231159c75cSMatthew Dillon hammer2_off_t off; 5241159c75cSMatthew Dillon int cdr; 52520852157SMatthew Dillon 5261159c75cSMatthew Dillon ng = (int)(hammer2_icrc32(&xop->ip1, sizeof(xop->ip1))); 5271159c75cSMatthew Dillon if (desc == &hammer2_strategy_read_desc) { 5281159c75cSMatthew Dillon off = xopst->lbase / HAMMER2_PBUFSIZE; 5291159c75cSMatthew Dillon cdr = hammer2_cluster_data_read; 5301159c75cSMatthew Dillon /* sysctl race, load into var */ 5311159c75cSMatthew Dillon cpu_ccfence(); 5321159c75cSMatthew Dillon if (cdr) 5331159c75cSMatthew Dillon off /= cdr; 5341159c75cSMatthew Dillon ng ^= hammer2_icrc32(&off, sizeof(off)) & 5351159c75cSMatthew Dillon (hammer2_worker_rmask << 1); 5361159c75cSMatthew Dillon ng |= 1; 5371159c75cSMatthew Dillon } else { 5381159c75cSMatthew Dillon #if 0 5391159c75cSMatthew Dillon off = xopst->lbase >> 21; 5401159c75cSMatthew Dillon ng ^= hammer2_icrc32(&off, sizeof(off)) & 3; 5411159c75cSMatthew Dillon #endif 5421159c75cSMatthew Dillon ng &= ~1; 5431159c75cSMatthew Dillon } 5446b039a3dSMatthew Dillon ng = ng % (hammer2_xopgroups >> 1); 5456b039a3dSMatthew Dillon ng += (hammer2_xopgroups >> 1); 5466b039a3dSMatthew Dillon #endif 5475f59596dSMatthew Dillon } else { 54820852157SMatthew Dillon ng = (int)(hammer2_icrc32(&xop->ip1, sizeof(xop->ip1))); 5496b039a3dSMatthew Dillon ng = (unsigned int)ng % (hammer2_xopgroups >> 1); 5505f59596dSMatthew Dillon } 551c4421f07SMatthew Dillon xop->desc = desc; 5525f59596dSMatthew Dillon 5535f59596dSMatthew Dillon /* 5545f59596dSMatthew Dillon * The instant xop is queued another thread can pick it off. In the 5555f59596dSMatthew Dillon * case of asynchronous ops, another thread might even finish and 5565f59596dSMatthew Dillon * deallocate it. 5575f59596dSMatthew Dillon */ 5585f59596dSMatthew Dillon hammer2_spin_ex(&pmp->xop_spin); 5595f59596dSMatthew Dillon nchains = ip1->cluster.nchains; 5605f59596dSMatthew Dillon for (i = 0; i < nchains; ++i) { 5615f59596dSMatthew Dillon /* 5625f59596dSMatthew Dillon * XXX ip1->cluster.array* not stable here. This temporary 5635f59596dSMatthew Dillon * hack fixes basic issues in target XOPs which need to 5645f59596dSMatthew Dillon * obtain a starting chain from the inode but does not 5655f59596dSMatthew Dillon * address possible races against inode updates which 5665f59596dSMatthew Dillon * might NULL-out a chain. 5675f59596dSMatthew Dillon */ 5685f59596dSMatthew Dillon if (i != notidx && ip1->cluster.array[i].chain) { 56920852157SMatthew Dillon thr = &pmp->xop_groups[ng].thrs[i]; 57019808ac9SMatthew Dillon atomic_set_64(&xop->run_mask, 1LLU << i); 57119808ac9SMatthew Dillon atomic_set_64(&xop->chk_mask, 1LLU << i); 57205a3c4ecSMatthew Dillon xop->collect[i].thr = thr; 57320852157SMatthew Dillon TAILQ_INSERT_TAIL(&thr->xopq, xop, collect[i].entry); 5745f59596dSMatthew Dillon } 5755f59596dSMatthew Dillon } 5765f59596dSMatthew Dillon hammer2_spin_unex(&pmp->xop_spin); 5775f59596dSMatthew Dillon /* xop can become invalid at this point */ 5785f59596dSMatthew Dillon 5795f59596dSMatthew Dillon /* 58020852157SMatthew Dillon * Each thread has its own xopq 5815f59596dSMatthew Dillon */ 5825f59596dSMatthew Dillon for (i = 0; i < nchains; ++i) { 5835f59596dSMatthew Dillon if (i != notidx) { 58420852157SMatthew Dillon thr = &pmp->xop_groups[ng].thrs[i]; 58520852157SMatthew Dillon hammer2_thr_signal(thr, HAMMER2_THREAD_XOPQ); 5865f59596dSMatthew Dillon } 5875f59596dSMatthew Dillon } 5885f59596dSMatthew Dillon } 5895f59596dSMatthew Dillon 5905f59596dSMatthew Dillon void 591c4421f07SMatthew Dillon hammer2_xop_start(hammer2_xop_head_t *xop, hammer2_xop_desc_t *desc) 5925f59596dSMatthew Dillon { 593c4421f07SMatthew Dillon hammer2_xop_start_except(xop, desc, -1); 5945f59596dSMatthew Dillon } 5955f59596dSMatthew Dillon 5965f59596dSMatthew Dillon /* 5975f59596dSMatthew Dillon * Retire a XOP. Used by both the VOP frontend and by the XOP backend. 5985f59596dSMatthew Dillon */ 5995f59596dSMatthew Dillon void 60019808ac9SMatthew Dillon hammer2_xop_retire(hammer2_xop_head_t *xop, uint64_t mask) 6015f59596dSMatthew Dillon { 6025f59596dSMatthew Dillon hammer2_chain_t *chain; 60319808ac9SMatthew Dillon uint64_t nmask; 6045f59596dSMatthew Dillon int i; 6055f59596dSMatthew Dillon 6065f59596dSMatthew Dillon /* 6075f59596dSMatthew Dillon * Remove the frontend collector or remove a backend feeder. 60819808ac9SMatthew Dillon * 6095f59596dSMatthew Dillon * When removing the frontend we must wakeup any backend feeders 6105f59596dSMatthew Dillon * who are waiting for FIFO space. 6115f59596dSMatthew Dillon * 61219808ac9SMatthew Dillon * When removing the last backend feeder we must wakeup any waiting 61319808ac9SMatthew Dillon * frontend. 6145f59596dSMatthew Dillon */ 6155f59596dSMatthew Dillon KKASSERT(xop->run_mask & mask); 61619808ac9SMatthew Dillon nmask = atomic_fetchadd_64(&xop->run_mask, 61719808ac9SMatthew Dillon -mask + HAMMER2_XOPMASK_FEED); 61819808ac9SMatthew Dillon 61919808ac9SMatthew Dillon /* 62019808ac9SMatthew Dillon * More than one entity left 62119808ac9SMatthew Dillon */ 62219808ac9SMatthew Dillon if ((nmask & HAMMER2_XOPMASK_ALLDONE) != mask) { 62319808ac9SMatthew Dillon /* 62419808ac9SMatthew Dillon * Frontend terminating, wakeup any backends waiting on 62519808ac9SMatthew Dillon * fifo full. 62619808ac9SMatthew Dillon * 62719808ac9SMatthew Dillon * NOTE!!! The xop can get ripped out from under us at 62819808ac9SMatthew Dillon * this point, so do not reference it again. 62919808ac9SMatthew Dillon * The wakeup(xop) doesn't touch the xop and 63019808ac9SMatthew Dillon * is ok. 63119808ac9SMatthew Dillon */ 6325f59596dSMatthew Dillon if (mask == HAMMER2_XOPMASK_VOP) { 6335f59596dSMatthew Dillon if (nmask & HAMMER2_XOPMASK_FIFOW) 6345f59596dSMatthew Dillon wakeup(xop); 6355f59596dSMatthew Dillon } 63619808ac9SMatthew Dillon 63719808ac9SMatthew Dillon /* 63819808ac9SMatthew Dillon * Wakeup frontend if the last backend is terminating. 63919808ac9SMatthew Dillon */ 64019808ac9SMatthew Dillon nmask -= mask; 64119808ac9SMatthew Dillon if ((nmask & HAMMER2_XOPMASK_ALLDONE) == HAMMER2_XOPMASK_VOP) { 64219808ac9SMatthew Dillon if (nmask & HAMMER2_XOPMASK_WAIT) 64319808ac9SMatthew Dillon wakeup(xop); 64419808ac9SMatthew Dillon } 64519808ac9SMatthew Dillon 6465f59596dSMatthew Dillon return; 6475f59596dSMatthew Dillon } 6485f59596dSMatthew Dillon /* else nobody else left, we can ignore FIFOW */ 6495f59596dSMatthew Dillon 6505f59596dSMatthew Dillon /* 6515f59596dSMatthew Dillon * All collectors are gone, we can cleanup and dispose of the XOP. 6525f59596dSMatthew Dillon * Note that this can wind up being a frontend OR a backend. 6535f59596dSMatthew Dillon * Pending chains are locked shared and not owned by any thread. 654b5795b6bSMatthew Dillon * 6555f59596dSMatthew Dillon * Cleanup the collection cluster. 6565f59596dSMatthew Dillon */ 6575f59596dSMatthew Dillon for (i = 0; i < xop->cluster.nchains; ++i) { 6585f59596dSMatthew Dillon xop->cluster.array[i].flags = 0; 6595f59596dSMatthew Dillon chain = xop->cluster.array[i].chain; 6605f59596dSMatthew Dillon if (chain) { 6615f59596dSMatthew Dillon xop->cluster.array[i].chain = NULL; 6626d51e13aSMatthew Dillon hammer2_chain_drop_unhold(chain); 6635f59596dSMatthew Dillon } 6645f59596dSMatthew Dillon } 6655f59596dSMatthew Dillon 6665f59596dSMatthew Dillon /* 66719808ac9SMatthew Dillon * Cleanup the fifos. Since we are the only entity left on this 66819808ac9SMatthew Dillon * xop we don't have to worry about fifo flow control, and one 66919808ac9SMatthew Dillon * lfence() will do the job. 6705f59596dSMatthew Dillon */ 6715f59596dSMatthew Dillon cpu_lfence(); 6725f59596dSMatthew Dillon mask = xop->chk_mask; 6735f59596dSMatthew Dillon for (i = 0; mask && i < HAMMER2_MAXCLUSTER; ++i) { 6745f59596dSMatthew Dillon hammer2_xop_fifo_t *fifo = &xop->collect[i]; 6755f59596dSMatthew Dillon while (fifo->ri != fifo->wi) { 6765f59596dSMatthew Dillon chain = fifo->array[fifo->ri & HAMMER2_XOPFIFO_MASK]; 6776d51e13aSMatthew Dillon if (chain) 6786d51e13aSMatthew Dillon hammer2_chain_drop_unhold(chain); 6795f59596dSMatthew Dillon ++fifo->ri; 6805f59596dSMatthew Dillon } 6815f59596dSMatthew Dillon mask &= ~(1U << i); 6825f59596dSMatthew Dillon } 6835f59596dSMatthew Dillon 6845f59596dSMatthew Dillon /* 6855f59596dSMatthew Dillon * The inode is only held at this point, simply drop it. 6865f59596dSMatthew Dillon */ 6875f59596dSMatthew Dillon if (xop->ip1) { 6885f59596dSMatthew Dillon hammer2_inode_drop(xop->ip1); 6895f59596dSMatthew Dillon xop->ip1 = NULL; 6905f59596dSMatthew Dillon } 6915f59596dSMatthew Dillon if (xop->ip2) { 6925f59596dSMatthew Dillon hammer2_inode_drop(xop->ip2); 6935f59596dSMatthew Dillon xop->ip2 = NULL; 6945f59596dSMatthew Dillon } 6955f59596dSMatthew Dillon if (xop->ip3) { 6965f59596dSMatthew Dillon hammer2_inode_drop(xop->ip3); 6975f59596dSMatthew Dillon xop->ip3 = NULL; 6985f59596dSMatthew Dillon } 699*38f19b3eSMatthew Dillon if (xop->ip4) { 700*38f19b3eSMatthew Dillon hammer2_inode_drop(xop->ip4); 701*38f19b3eSMatthew Dillon xop->ip4 = NULL; 702*38f19b3eSMatthew Dillon } 7035f59596dSMatthew Dillon if (xop->name1) { 7045f59596dSMatthew Dillon kfree(xop->name1, M_HAMMER2); 7055f59596dSMatthew Dillon xop->name1 = NULL; 7065f59596dSMatthew Dillon xop->name1_len = 0; 7075f59596dSMatthew Dillon } 7085f59596dSMatthew Dillon if (xop->name2) { 7095f59596dSMatthew Dillon kfree(xop->name2, M_HAMMER2); 7105f59596dSMatthew Dillon xop->name2 = NULL; 7115f59596dSMatthew Dillon xop->name2_len = 0; 7125f59596dSMatthew Dillon } 7135f59596dSMatthew Dillon 7145f59596dSMatthew Dillon objcache_put(cache_xops, xop); 7155f59596dSMatthew Dillon } 7165f59596dSMatthew Dillon 7175f59596dSMatthew Dillon /* 7185f59596dSMatthew Dillon * (Backend) Returns non-zero if the frontend is still attached. 7195f59596dSMatthew Dillon */ 7205f59596dSMatthew Dillon int 7215f59596dSMatthew Dillon hammer2_xop_active(hammer2_xop_head_t *xop) 7225f59596dSMatthew Dillon { 7235f59596dSMatthew Dillon if (xop->run_mask & HAMMER2_XOPMASK_VOP) 7245f59596dSMatthew Dillon return 1; 7255f59596dSMatthew Dillon else 7265f59596dSMatthew Dillon return 0; 7275f59596dSMatthew Dillon } 7285f59596dSMatthew Dillon 7295f59596dSMatthew Dillon /* 7305f59596dSMatthew Dillon * (Backend) Feed chain data through the cluster validator and back to 7315f59596dSMatthew Dillon * the frontend. Chains are fed from multiple nodes concurrently 7325f59596dSMatthew Dillon * and pipelined via per-node FIFOs in the XOP. 7335f59596dSMatthew Dillon * 7346d51e13aSMatthew Dillon * The chain must be locked (either shared or exclusive). The caller may 7356d51e13aSMatthew Dillon * unlock and drop the chain on return. This function will add an extra 7366d51e13aSMatthew Dillon * ref and hold the chain's data for the pass-back. 7375f59596dSMatthew Dillon * 7385f59596dSMatthew Dillon * No xop lock is needed because we are only manipulating fields under 7395f59596dSMatthew Dillon * our direct control. 7405f59596dSMatthew Dillon * 7412e3f71c3SSascha Wildner * Returns 0 on success and a hammer2 error code if sync is permanently 7425f59596dSMatthew Dillon * lost. The caller retains a ref on the chain but by convention 7435f59596dSMatthew Dillon * the lock is typically inherited by the xop (caller loses lock). 7445f59596dSMatthew Dillon * 7455f59596dSMatthew Dillon * Returns non-zero on error. In this situation the caller retains a 7465f59596dSMatthew Dillon * ref on the chain but loses the lock (we unlock here). 7475f59596dSMatthew Dillon */ 7485f59596dSMatthew Dillon int 7495f59596dSMatthew Dillon hammer2_xop_feed(hammer2_xop_head_t *xop, hammer2_chain_t *chain, 7505f59596dSMatthew Dillon int clindex, int error) 7515f59596dSMatthew Dillon { 7525f59596dSMatthew Dillon hammer2_xop_fifo_t *fifo; 75319808ac9SMatthew Dillon uint64_t mask; 7545f59596dSMatthew Dillon 7555f59596dSMatthew Dillon /* 7565f59596dSMatthew Dillon * Early termination (typicaly of xop_readir) 7575f59596dSMatthew Dillon */ 7585f59596dSMatthew Dillon if (hammer2_xop_active(xop) == 0) { 75965cacacfSMatthew Dillon error = HAMMER2_ERROR_ABORTED; 7605f59596dSMatthew Dillon goto done; 7615f59596dSMatthew Dillon } 7625f59596dSMatthew Dillon 7635f59596dSMatthew Dillon /* 7645f59596dSMatthew Dillon * Multi-threaded entry into the XOP collector. We own the 7655f59596dSMatthew Dillon * fifo->wi for our clindex. 7665f59596dSMatthew Dillon */ 7675f59596dSMatthew Dillon fifo = &xop->collect[clindex]; 7685f59596dSMatthew Dillon 7695f59596dSMatthew Dillon if (fifo->ri == fifo->wi - HAMMER2_XOPFIFO) 7705f59596dSMatthew Dillon lwkt_yield(); 7715f59596dSMatthew Dillon while (fifo->ri == fifo->wi - HAMMER2_XOPFIFO) { 7725f59596dSMatthew Dillon atomic_set_int(&fifo->flags, HAMMER2_XOP_FIFO_STALL); 7735f59596dSMatthew Dillon mask = xop->run_mask; 7745f59596dSMatthew Dillon if ((mask & HAMMER2_XOPMASK_VOP) == 0) { 77565cacacfSMatthew Dillon error = HAMMER2_ERROR_ABORTED; 7765f59596dSMatthew Dillon goto done; 7775f59596dSMatthew Dillon } 7785f59596dSMatthew Dillon tsleep_interlock(xop, 0); 77919808ac9SMatthew Dillon if (atomic_cmpset_64(&xop->run_mask, mask, 7805f59596dSMatthew Dillon mask | HAMMER2_XOPMASK_FIFOW)) { 7815f59596dSMatthew Dillon if (fifo->ri == fifo->wi - HAMMER2_XOPFIFO) { 7825f59596dSMatthew Dillon tsleep(xop, PINTERLOCKED, "h2feed", hz*60); 7835f59596dSMatthew Dillon } 7845f59596dSMatthew Dillon } 7855f59596dSMatthew Dillon /* retry */ 7865f59596dSMatthew Dillon } 7875f59596dSMatthew Dillon atomic_clear_int(&fifo->flags, HAMMER2_XOP_FIFO_STALL); 7886d51e13aSMatthew Dillon if (chain) 7896d51e13aSMatthew Dillon hammer2_chain_ref_hold(chain); 7905f59596dSMatthew Dillon if (error == 0 && chain) 7915f59596dSMatthew Dillon error = chain->error; 7925f59596dSMatthew Dillon fifo->errors[fifo->wi & HAMMER2_XOPFIFO_MASK] = error; 7935f59596dSMatthew Dillon fifo->array[fifo->wi & HAMMER2_XOPFIFO_MASK] = chain; 7945f59596dSMatthew Dillon cpu_sfence(); 7955f59596dSMatthew Dillon ++fifo->wi; 79619808ac9SMatthew Dillon 79719808ac9SMatthew Dillon mask = atomic_fetchadd_64(&xop->run_mask, HAMMER2_XOPMASK_FEED); 79819808ac9SMatthew Dillon if (mask & HAMMER2_XOPMASK_WAIT) { 79919808ac9SMatthew Dillon atomic_clear_64(&xop->run_mask, HAMMER2_XOPMASK_WAIT); 80019808ac9SMatthew Dillon wakeup(xop); 80120852157SMatthew Dillon } 8025f59596dSMatthew Dillon error = 0; 8035f59596dSMatthew Dillon 8045f59596dSMatthew Dillon /* 8055f59596dSMatthew Dillon * Cleanup. If an error occurred we eat the lock. If no error 8065f59596dSMatthew Dillon * occurred the fifo inherits the lock and gains an additional ref. 8075f59596dSMatthew Dillon * 8085f59596dSMatthew Dillon * The caller's ref remains in both cases. 8095f59596dSMatthew Dillon */ 8105f59596dSMatthew Dillon done: 8115f59596dSMatthew Dillon return error; 8125f59596dSMatthew Dillon } 8135f59596dSMatthew Dillon 8145f59596dSMatthew Dillon /* 8155f59596dSMatthew Dillon * (Frontend) collect a response from a running cluster op. 8165f59596dSMatthew Dillon * 8175f59596dSMatthew Dillon * Responses are fed from all appropriate nodes concurrently 8185f59596dSMatthew Dillon * and collected into a cohesive response >= collect_key. 8195f59596dSMatthew Dillon * 8205f59596dSMatthew Dillon * The collector will return the instant quorum or other requirements 8215f59596dSMatthew Dillon * are met, even if some nodes get behind or become non-responsive. 8225f59596dSMatthew Dillon * 8235f59596dSMatthew Dillon * HAMMER2_XOP_COLLECT_NOWAIT - Used to 'poll' a completed collection, 8245f59596dSMatthew Dillon * usually called synchronously from the 8255f59596dSMatthew Dillon * node XOPs for the strategy code to 8265f59596dSMatthew Dillon * fake the frontend collection and complete 8275f59596dSMatthew Dillon * the BIO as soon as possible. 8285f59596dSMatthew Dillon * 8295f59596dSMatthew Dillon * HAMMER2_XOP_SYNCHRONIZER - Reqeuest synchronization with a particular 8305f59596dSMatthew Dillon * cluster index, prevents looping when that 8315f59596dSMatthew Dillon * index is out of sync so caller can act on 8325f59596dSMatthew Dillon * the out of sync element. ESRCH and EDEADLK 8335f59596dSMatthew Dillon * can be returned if this flag is specified. 8345f59596dSMatthew Dillon * 8355f59596dSMatthew Dillon * Returns 0 on success plus a filled out xop->cluster structure. 8365f59596dSMatthew Dillon * Return ENOENT on normal termination. 8375f59596dSMatthew Dillon * Otherwise return an error. 838fda30e02SMatthew Dillon * 839fda30e02SMatthew Dillon * WARNING! If the xop returns a cluster with a non-NULL focus, note that 840fda30e02SMatthew Dillon * none of the chains in the cluster (or the focus) are either 841fda30e02SMatthew Dillon * locked or I/O synchronized with the cpu. hammer2_xop_gdata() 842fda30e02SMatthew Dillon * and hammer2_xop_pdata() must be used to safely access the focus 843fda30e02SMatthew Dillon * chain's content. 844fda30e02SMatthew Dillon * 845fda30e02SMatthew Dillon * The frontend can make certain assumptions based on higher-level 846fda30e02SMatthew Dillon * locking done by the frontend, but data integrity absolutely 847fda30e02SMatthew Dillon * requires using the gdata/pdata API. 8485f59596dSMatthew Dillon */ 8495f59596dSMatthew Dillon int 8505f59596dSMatthew Dillon hammer2_xop_collect(hammer2_xop_head_t *xop, int flags) 8515f59596dSMatthew Dillon { 8525f59596dSMatthew Dillon hammer2_xop_fifo_t *fifo; 8535f59596dSMatthew Dillon hammer2_chain_t *chain; 8545f59596dSMatthew Dillon hammer2_key_t lokey; 85519808ac9SMatthew Dillon uint64_t mask; 8565f59596dSMatthew Dillon int error; 8575f59596dSMatthew Dillon int keynull; 8585f59596dSMatthew Dillon int adv; /* advance the element */ 8595f59596dSMatthew Dillon int i; 8605f59596dSMatthew Dillon 8615f59596dSMatthew Dillon loop: 8625f59596dSMatthew Dillon /* 8635f59596dSMatthew Dillon * First loop tries to advance pieces of the cluster which 8645f59596dSMatthew Dillon * are out of sync. 8655f59596dSMatthew Dillon */ 8665f59596dSMatthew Dillon lokey = HAMMER2_KEY_MAX; 8675f59596dSMatthew Dillon keynull = HAMMER2_CHECK_NULL; 86819808ac9SMatthew Dillon mask = xop->run_mask; 8695f59596dSMatthew Dillon cpu_lfence(); 8705f59596dSMatthew Dillon 8715f59596dSMatthew Dillon for (i = 0; i < xop->cluster.nchains; ++i) { 8725f59596dSMatthew Dillon chain = xop->cluster.array[i].chain; 8735f59596dSMatthew Dillon if (chain == NULL) { 8745f59596dSMatthew Dillon adv = 1; 8755f59596dSMatthew Dillon } else if (chain->bref.key < xop->collect_key) { 8765f59596dSMatthew Dillon adv = 1; 8775f59596dSMatthew Dillon } else { 8785f59596dSMatthew Dillon keynull &= ~HAMMER2_CHECK_NULL; 8795f59596dSMatthew Dillon if (lokey > chain->bref.key) 8805f59596dSMatthew Dillon lokey = chain->bref.key; 8815f59596dSMatthew Dillon adv = 0; 8825f59596dSMatthew Dillon } 8835f59596dSMatthew Dillon if (adv == 0) 8845f59596dSMatthew Dillon continue; 8855f59596dSMatthew Dillon 8865f59596dSMatthew Dillon /* 8875f59596dSMatthew Dillon * Advance element if possible, advanced element may be NULL. 8885f59596dSMatthew Dillon */ 8896d51e13aSMatthew Dillon if (chain) 8906d51e13aSMatthew Dillon hammer2_chain_drop_unhold(chain); 8916d51e13aSMatthew Dillon 8925f59596dSMatthew Dillon fifo = &xop->collect[i]; 8935f59596dSMatthew Dillon if (fifo->ri != fifo->wi) { 8945f59596dSMatthew Dillon cpu_lfence(); 8955f59596dSMatthew Dillon chain = fifo->array[fifo->ri & HAMMER2_XOPFIFO_MASK]; 8965f59596dSMatthew Dillon error = fifo->errors[fifo->ri & HAMMER2_XOPFIFO_MASK]; 8975f59596dSMatthew Dillon ++fifo->ri; 8985f59596dSMatthew Dillon xop->cluster.array[i].chain = chain; 8995f59596dSMatthew Dillon xop->cluster.array[i].error = error; 9005f59596dSMatthew Dillon if (chain == NULL) { 9015f59596dSMatthew Dillon /* XXX */ 9025f59596dSMatthew Dillon xop->cluster.array[i].flags |= 9035f59596dSMatthew Dillon HAMMER2_CITEM_NULL; 9045f59596dSMatthew Dillon } 9055f59596dSMatthew Dillon if (fifo->wi - fifo->ri <= HAMMER2_XOPFIFO / 2) { 9065f59596dSMatthew Dillon if (fifo->flags & HAMMER2_XOP_FIFO_STALL) { 9075f59596dSMatthew Dillon atomic_clear_int(&fifo->flags, 9085f59596dSMatthew Dillon HAMMER2_XOP_FIFO_STALL); 9095f59596dSMatthew Dillon wakeup(xop); 9105f59596dSMatthew Dillon lwkt_yield(); 9115f59596dSMatthew Dillon } 9125f59596dSMatthew Dillon } 9135f59596dSMatthew Dillon --i; /* loop on same index */ 9145f59596dSMatthew Dillon } else { 9155f59596dSMatthew Dillon /* 9165f59596dSMatthew Dillon * Retain CITEM_NULL flag. If set just repeat EOF. 9175f59596dSMatthew Dillon * If not, the NULL,0 combination indicates an 9185f59596dSMatthew Dillon * operation in-progress. 9195f59596dSMatthew Dillon */ 9205f59596dSMatthew Dillon xop->cluster.array[i].chain = NULL; 9215f59596dSMatthew Dillon /* retain any CITEM_NULL setting */ 9225f59596dSMatthew Dillon } 9235f59596dSMatthew Dillon } 9245f59596dSMatthew Dillon 9255f59596dSMatthew Dillon /* 9265f59596dSMatthew Dillon * Determine whether the lowest collected key meets clustering 9275f59596dSMatthew Dillon * requirements. Returns: 9285f59596dSMatthew Dillon * 9295f59596dSMatthew Dillon * 0 - key valid, cluster can be returned. 9305f59596dSMatthew Dillon * 9315f59596dSMatthew Dillon * ENOENT - normal end of scan, return ENOENT. 9325f59596dSMatthew Dillon * 9335f59596dSMatthew Dillon * ESRCH - sufficient elements collected, quorum agreement 9345f59596dSMatthew Dillon * that lokey is not a valid element and should be 9355f59596dSMatthew Dillon * skipped. 9365f59596dSMatthew Dillon * 9375f59596dSMatthew Dillon * EDEADLK - sufficient elements collected, no quorum agreement 9385f59596dSMatthew Dillon * (and no agreement possible). In this situation a 9395f59596dSMatthew Dillon * repair is needed, for now we loop. 9405f59596dSMatthew Dillon * 9415f59596dSMatthew Dillon * EINPROGRESS - insufficient elements collected to resolve, wait 9425f59596dSMatthew Dillon * for event and loop. 9435f59596dSMatthew Dillon */ 9445f59596dSMatthew Dillon if ((flags & HAMMER2_XOP_COLLECT_WAITALL) && 94519808ac9SMatthew Dillon (mask & HAMMER2_XOPMASK_ALLDONE) != HAMMER2_XOPMASK_VOP) { 94665cacacfSMatthew Dillon error = HAMMER2_ERROR_EINPROGRESS; 9475f59596dSMatthew Dillon } else { 9485f59596dSMatthew Dillon error = hammer2_cluster_check(&xop->cluster, lokey, keynull); 9495f59596dSMatthew Dillon } 95065cacacfSMatthew Dillon if (error == HAMMER2_ERROR_EINPROGRESS) { 9515f59596dSMatthew Dillon if (flags & HAMMER2_XOP_COLLECT_NOWAIT) 9525f59596dSMatthew Dillon goto done; 95319808ac9SMatthew Dillon tsleep_interlock(xop, 0); 95419808ac9SMatthew Dillon if (atomic_cmpset_64(&xop->run_mask, 95519808ac9SMatthew Dillon mask, mask | HAMMER2_XOPMASK_WAIT)) { 95619808ac9SMatthew Dillon tsleep(xop, PINTERLOCKED, "h2coll", hz*60); 9575f59596dSMatthew Dillon } 9585f59596dSMatthew Dillon goto loop; 9595f59596dSMatthew Dillon } 96065cacacfSMatthew Dillon if (error == HAMMER2_ERROR_ESRCH) { 9615f59596dSMatthew Dillon if (lokey != HAMMER2_KEY_MAX) { 9625f59596dSMatthew Dillon xop->collect_key = lokey + 1; 9635f59596dSMatthew Dillon goto loop; 9645f59596dSMatthew Dillon } 96565cacacfSMatthew Dillon error = HAMMER2_ERROR_ENOENT; 9665f59596dSMatthew Dillon } 96765cacacfSMatthew Dillon if (error == HAMMER2_ERROR_EDEADLK) { 9685f59596dSMatthew Dillon kprintf("hammer2: no quorum possible lokey %016jx\n", 9695f59596dSMatthew Dillon lokey); 9705f59596dSMatthew Dillon if (lokey != HAMMER2_KEY_MAX) { 9715f59596dSMatthew Dillon xop->collect_key = lokey + 1; 9725f59596dSMatthew Dillon goto loop; 9735f59596dSMatthew Dillon } 97465cacacfSMatthew Dillon error = HAMMER2_ERROR_ENOENT; 9755f59596dSMatthew Dillon } 9765f59596dSMatthew Dillon if (lokey == HAMMER2_KEY_MAX) 9775f59596dSMatthew Dillon xop->collect_key = lokey; 9785f59596dSMatthew Dillon else 9795f59596dSMatthew Dillon xop->collect_key = lokey + 1; 9805f59596dSMatthew Dillon done: 9815f59596dSMatthew Dillon return error; 9825f59596dSMatthew Dillon } 9835f59596dSMatthew Dillon 9845f59596dSMatthew Dillon /* 9855f59596dSMatthew Dillon * N x M processing threads are available to handle XOPs, N per cluster 98605a3c4ecSMatthew Dillon * index x M cluster nodes. 9875f59596dSMatthew Dillon * 9885f59596dSMatthew Dillon * Locate and return the next runnable xop, or NULL if no xops are 9895f59596dSMatthew Dillon * present or none of the xops are currently runnable (for various reasons). 9905f59596dSMatthew Dillon * The xop is left on the queue and serves to block other dependent xops 9915f59596dSMatthew Dillon * from being run. 9925f59596dSMatthew Dillon * 9935f59596dSMatthew Dillon * Dependent xops will not be returned. 9945f59596dSMatthew Dillon * 9955f59596dSMatthew Dillon * Sets HAMMER2_XOP_FIFO_RUN on the returned xop or returns NULL. 9965f59596dSMatthew Dillon * 9975f59596dSMatthew Dillon * NOTE! Xops run concurrently for each cluster index. 9985f59596dSMatthew Dillon */ 9995f59596dSMatthew Dillon #define XOP_HASH_SIZE 16 10005f59596dSMatthew Dillon #define XOP_HASH_MASK (XOP_HASH_SIZE - 1) 10015f59596dSMatthew Dillon 10025f59596dSMatthew Dillon static __inline 10035f59596dSMatthew Dillon int 10045f59596dSMatthew Dillon xop_testhash(hammer2_thread_t *thr, hammer2_inode_t *ip, uint32_t *hash) 10055f59596dSMatthew Dillon { 10065f59596dSMatthew Dillon uint32_t mask; 10075f59596dSMatthew Dillon int hv; 10085f59596dSMatthew Dillon 10095f59596dSMatthew Dillon hv = (int)((uintptr_t)ip + (uintptr_t)thr) / sizeof(hammer2_inode_t); 10105f59596dSMatthew Dillon mask = 1U << (hv & 31); 10115f59596dSMatthew Dillon hv >>= 5; 10125f59596dSMatthew Dillon 10135f59596dSMatthew Dillon return ((int)(hash[hv & XOP_HASH_MASK] & mask)); 10145f59596dSMatthew Dillon } 10155f59596dSMatthew Dillon 10165f59596dSMatthew Dillon static __inline 10175f59596dSMatthew Dillon void 10185f59596dSMatthew Dillon xop_sethash(hammer2_thread_t *thr, hammer2_inode_t *ip, uint32_t *hash) 10195f59596dSMatthew Dillon { 10205f59596dSMatthew Dillon uint32_t mask; 10215f59596dSMatthew Dillon int hv; 10225f59596dSMatthew Dillon 10235f59596dSMatthew Dillon hv = (int)((uintptr_t)ip + (uintptr_t)thr) / sizeof(hammer2_inode_t); 10245f59596dSMatthew Dillon mask = 1U << (hv & 31); 10255f59596dSMatthew Dillon hv >>= 5; 10265f59596dSMatthew Dillon 10275f59596dSMatthew Dillon hash[hv & XOP_HASH_MASK] |= mask; 10285f59596dSMatthew Dillon } 10295f59596dSMatthew Dillon 10305f59596dSMatthew Dillon static 10315f59596dSMatthew Dillon hammer2_xop_head_t * 10325f59596dSMatthew Dillon hammer2_xop_next(hammer2_thread_t *thr) 10335f59596dSMatthew Dillon { 10345f59596dSMatthew Dillon hammer2_pfs_t *pmp = thr->pmp; 10355f59596dSMatthew Dillon int clindex = thr->clindex; 10365f59596dSMatthew Dillon uint32_t hash[XOP_HASH_SIZE] = { 0 }; 10375f59596dSMatthew Dillon hammer2_xop_head_t *xop; 10385f59596dSMatthew Dillon 10395f59596dSMatthew Dillon hammer2_spin_ex(&pmp->xop_spin); 104020852157SMatthew Dillon TAILQ_FOREACH(xop, &thr->xopq, collect[clindex].entry) { 10415f59596dSMatthew Dillon /* 10425f59596dSMatthew Dillon * Check dependency 10435f59596dSMatthew Dillon */ 10445f59596dSMatthew Dillon if (xop_testhash(thr, xop->ip1, hash) || 10455f59596dSMatthew Dillon (xop->ip2 && xop_testhash(thr, xop->ip2, hash)) || 1046*38f19b3eSMatthew Dillon (xop->ip3 && xop_testhash(thr, xop->ip3, hash)) || 1047*38f19b3eSMatthew Dillon (xop->ip4 && xop_testhash(thr, xop->ip4, hash))) 1048*38f19b3eSMatthew Dillon { 10495f59596dSMatthew Dillon continue; 10505f59596dSMatthew Dillon } 10515f59596dSMatthew Dillon xop_sethash(thr, xop->ip1, hash); 10525f59596dSMatthew Dillon if (xop->ip2) 10535f59596dSMatthew Dillon xop_sethash(thr, xop->ip2, hash); 10545f59596dSMatthew Dillon if (xop->ip3) 10555f59596dSMatthew Dillon xop_sethash(thr, xop->ip3, hash); 1056*38f19b3eSMatthew Dillon if (xop->ip4) 1057*38f19b3eSMatthew Dillon xop_sethash(thr, xop->ip4, hash); 10585f59596dSMatthew Dillon 10595f59596dSMatthew Dillon /* 10605f59596dSMatthew Dillon * Check already running 10615f59596dSMatthew Dillon */ 10625f59596dSMatthew Dillon if (xop->collect[clindex].flags & HAMMER2_XOP_FIFO_RUN) 10635f59596dSMatthew Dillon continue; 10645f59596dSMatthew Dillon 10655f59596dSMatthew Dillon /* 10665f59596dSMatthew Dillon * Found a good one, return it. 10675f59596dSMatthew Dillon */ 10685f59596dSMatthew Dillon atomic_set_int(&xop->collect[clindex].flags, 10695f59596dSMatthew Dillon HAMMER2_XOP_FIFO_RUN); 10705f59596dSMatthew Dillon break; 10715f59596dSMatthew Dillon } 10725f59596dSMatthew Dillon hammer2_spin_unex(&pmp->xop_spin); 10735f59596dSMatthew Dillon 10745f59596dSMatthew Dillon return xop; 10755f59596dSMatthew Dillon } 10765f59596dSMatthew Dillon 10775f59596dSMatthew Dillon /* 10785f59596dSMatthew Dillon * Remove the completed XOP from the queue, clear HAMMER2_XOP_FIFO_RUN. 10795f59596dSMatthew Dillon * 10805f59596dSMatthew Dillon * NOTE! Xops run concurrently for each cluster index. 10815f59596dSMatthew Dillon */ 10825f59596dSMatthew Dillon static 10835f59596dSMatthew Dillon void 10845f59596dSMatthew Dillon hammer2_xop_dequeue(hammer2_thread_t *thr, hammer2_xop_head_t *xop) 10855f59596dSMatthew Dillon { 10865f59596dSMatthew Dillon hammer2_pfs_t *pmp = thr->pmp; 10875f59596dSMatthew Dillon int clindex = thr->clindex; 10885f59596dSMatthew Dillon 10895f59596dSMatthew Dillon hammer2_spin_ex(&pmp->xop_spin); 109020852157SMatthew Dillon TAILQ_REMOVE(&thr->xopq, xop, collect[clindex].entry); 10915f59596dSMatthew Dillon atomic_clear_int(&xop->collect[clindex].flags, 10925f59596dSMatthew Dillon HAMMER2_XOP_FIFO_RUN); 10935f59596dSMatthew Dillon hammer2_spin_unex(&pmp->xop_spin); 109420852157SMatthew Dillon if (TAILQ_FIRST(&thr->xopq)) 109520852157SMatthew Dillon hammer2_thr_signal(thr, HAMMER2_THREAD_XOPQ); 10965f59596dSMatthew Dillon } 10975f59596dSMatthew Dillon 10985f59596dSMatthew Dillon /* 10995f59596dSMatthew Dillon * Primary management thread for xops support. Each node has several such 11005f59596dSMatthew Dillon * threads which replicate front-end operations on cluster nodes. 11015f59596dSMatthew Dillon * 11025f59596dSMatthew Dillon * XOPS thread node operations, allowing the function to focus on a single 11035f59596dSMatthew Dillon * node in the cluster after validating the operation with the cluster. 11045f59596dSMatthew Dillon * This is primarily what prevents dead or stalled nodes from stalling 11055f59596dSMatthew Dillon * the front-end. 11065f59596dSMatthew Dillon */ 11075f59596dSMatthew Dillon void 11085f59596dSMatthew Dillon hammer2_primary_xops_thread(void *arg) 11095f59596dSMatthew Dillon { 11105f59596dSMatthew Dillon hammer2_thread_t *thr = arg; 11115f59596dSMatthew Dillon hammer2_pfs_t *pmp; 11125f59596dSMatthew Dillon hammer2_xop_head_t *xop; 111319808ac9SMatthew Dillon uint64_t mask; 11145f59596dSMatthew Dillon uint32_t flags; 11155f59596dSMatthew Dillon uint32_t nflags; 1116c4421f07SMatthew Dillon hammer2_xop_desc_t *last_desc = NULL; 11175f59596dSMatthew Dillon 11185f59596dSMatthew Dillon pmp = thr->pmp; 11195f59596dSMatthew Dillon /*xgrp = &pmp->xop_groups[thr->repidx]; not needed */ 112019808ac9SMatthew Dillon mask = 1LLU << thr->clindex; 11215f59596dSMatthew Dillon 11225f59596dSMatthew Dillon for (;;) { 11235f59596dSMatthew Dillon flags = thr->flags; 11245f59596dSMatthew Dillon 11255f59596dSMatthew Dillon /* 11265f59596dSMatthew Dillon * Handle stop request 11275f59596dSMatthew Dillon */ 11285f59596dSMatthew Dillon if (flags & HAMMER2_THREAD_STOP) 11295f59596dSMatthew Dillon break; 11305f59596dSMatthew Dillon 11315f59596dSMatthew Dillon /* 11325f59596dSMatthew Dillon * Handle freeze request 11335f59596dSMatthew Dillon */ 11345f59596dSMatthew Dillon if (flags & HAMMER2_THREAD_FREEZE) { 11359dca9515SMatthew Dillon hammer2_thr_signal2(thr, HAMMER2_THREAD_FROZEN, 11369dca9515SMatthew Dillon HAMMER2_THREAD_FREEZE); 11375f59596dSMatthew Dillon continue; 11385f59596dSMatthew Dillon } 11395f59596dSMatthew Dillon 11405f59596dSMatthew Dillon if (flags & HAMMER2_THREAD_UNFREEZE) { 11419dca9515SMatthew Dillon hammer2_thr_signal2(thr, 0, 11425f59596dSMatthew Dillon HAMMER2_THREAD_FROZEN | 11439dca9515SMatthew Dillon HAMMER2_THREAD_UNFREEZE); 11445f59596dSMatthew Dillon continue; 11455f59596dSMatthew Dillon } 11465f59596dSMatthew Dillon 11475f59596dSMatthew Dillon /* 11485f59596dSMatthew Dillon * Force idle if frozen until unfrozen or stopped. 11495f59596dSMatthew Dillon */ 11505f59596dSMatthew Dillon if (flags & HAMMER2_THREAD_FROZEN) { 11519dca9515SMatthew Dillon hammer2_thr_wait_any(thr, 11529dca9515SMatthew Dillon HAMMER2_THREAD_UNFREEZE | 11539dca9515SMatthew Dillon HAMMER2_THREAD_STOP, 11549dca9515SMatthew Dillon 0); 11555f59596dSMatthew Dillon continue; 11565f59596dSMatthew Dillon } 11575f59596dSMatthew Dillon 11585f59596dSMatthew Dillon /* 11595f59596dSMatthew Dillon * Reset state on REMASTER request 11605f59596dSMatthew Dillon */ 11615f59596dSMatthew Dillon if (flags & HAMMER2_THREAD_REMASTER) { 11629dca9515SMatthew Dillon hammer2_thr_signal2(thr, 0, HAMMER2_THREAD_REMASTER); 11635f59596dSMatthew Dillon /* reset state here */ 11645f59596dSMatthew Dillon continue; 11655f59596dSMatthew Dillon } 11665f59596dSMatthew Dillon 11675f59596dSMatthew Dillon /* 11685f59596dSMatthew Dillon * Process requests. Each request can be multi-queued. 11695f59596dSMatthew Dillon * 11705f59596dSMatthew Dillon * If we get behind and the frontend VOP is no longer active, 11715f59596dSMatthew Dillon * we retire the request without processing it. The callback 11725f59596dSMatthew Dillon * may also abort processing if the frontend VOP becomes 11735f59596dSMatthew Dillon * inactive. 11745f59596dSMatthew Dillon */ 11755f59596dSMatthew Dillon if (flags & HAMMER2_THREAD_XOPQ) { 11765f59596dSMatthew Dillon nflags = flags & ~HAMMER2_THREAD_XOPQ; 11775f59596dSMatthew Dillon if (!atomic_cmpset_int(&thr->flags, flags, nflags)) 11785f59596dSMatthew Dillon continue; 11795f59596dSMatthew Dillon flags = nflags; 11805f59596dSMatthew Dillon /* fall through */ 11815f59596dSMatthew Dillon } 11825f59596dSMatthew Dillon while ((xop = hammer2_xop_next(thr)) != NULL) { 11835f59596dSMatthew Dillon if (hammer2_xop_active(xop)) { 1184c4421f07SMatthew Dillon last_desc = xop->desc; 1185c4421f07SMatthew Dillon xop->desc->storage_func((hammer2_xop_t *)xop, 1186c4421f07SMatthew Dillon thr->scratch, 1187c4421f07SMatthew Dillon thr->clindex); 11885f59596dSMatthew Dillon hammer2_xop_dequeue(thr, xop); 11895f59596dSMatthew Dillon hammer2_xop_retire(xop, mask); 11905f59596dSMatthew Dillon } else { 1191c4421f07SMatthew Dillon last_desc = xop->desc; 11925f59596dSMatthew Dillon hammer2_xop_feed(xop, NULL, thr->clindex, 11935f59596dSMatthew Dillon ECONNABORTED); 11945f59596dSMatthew Dillon hammer2_xop_dequeue(thr, xop); 11955f59596dSMatthew Dillon hammer2_xop_retire(xop, mask); 11965f59596dSMatthew Dillon } 11975f59596dSMatthew Dillon } 11985f59596dSMatthew Dillon 11995f59596dSMatthew Dillon /* 12005f59596dSMatthew Dillon * Wait for event, interlock using THREAD_WAITING and 12015f59596dSMatthew Dillon * THREAD_SIGNAL. 12025f59596dSMatthew Dillon * 12035f59596dSMatthew Dillon * For robustness poll on a 30-second interval, but nominally 12045f59596dSMatthew Dillon * expect to be woken up. 12055f59596dSMatthew Dillon */ 12065f59596dSMatthew Dillon nflags = flags | HAMMER2_THREAD_WAITING; 12075f59596dSMatthew Dillon 12085f59596dSMatthew Dillon tsleep_interlock(&thr->flags, 0); 12095f59596dSMatthew Dillon if (atomic_cmpset_int(&thr->flags, flags, nflags)) { 12105f59596dSMatthew Dillon tsleep(&thr->flags, PINTERLOCKED, "h2idle", hz*30); 12115f59596dSMatthew Dillon } 12125f59596dSMatthew Dillon } 12135f59596dSMatthew Dillon 12145f59596dSMatthew Dillon #if 0 12155f59596dSMatthew Dillon /* 12165f59596dSMatthew Dillon * Cleanup / termination 12175f59596dSMatthew Dillon */ 12185f59596dSMatthew Dillon while ((xop = TAILQ_FIRST(&thr->xopq)) != NULL) { 1219c4421f07SMatthew Dillon kprintf("hammer2_thread: aborting xop %s\n", xop->desc->id); 12205f59596dSMatthew Dillon TAILQ_REMOVE(&thr->xopq, xop, 12215f59596dSMatthew Dillon collect[thr->clindex].entry); 12225f59596dSMatthew Dillon hammer2_xop_retire(xop, mask); 12235f59596dSMatthew Dillon } 12245f59596dSMatthew Dillon #endif 12255f59596dSMatthew Dillon thr->td = NULL; 12269dca9515SMatthew Dillon hammer2_thr_signal(thr, HAMMER2_THREAD_STOPPED); 12275f59596dSMatthew Dillon /* thr structure can go invalid after this point */ 12285f59596dSMatthew Dillon } 1229