15f59596dSMatthew Dillon /* 25f59596dSMatthew Dillon * Copyright (c) 2015 The DragonFly Project. All rights reserved. 35f59596dSMatthew Dillon * 45f59596dSMatthew Dillon * This code is derived from software contributed to The DragonFly Project 55f59596dSMatthew Dillon * by Matthew Dillon <dillon@dragonflybsd.org> 65f59596dSMatthew Dillon * 75f59596dSMatthew Dillon * Redistribution and use in source and binary forms, with or without 85f59596dSMatthew Dillon * modification, are permitted provided that the following conditions 95f59596dSMatthew Dillon * are met: 105f59596dSMatthew Dillon * 115f59596dSMatthew Dillon * 1. Redistributions of source code must retain the above copyright 125f59596dSMatthew Dillon * notice, this list of conditions and the following disclaimer. 135f59596dSMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 145f59596dSMatthew Dillon * notice, this list of conditions and the following disclaimer in 155f59596dSMatthew Dillon * the documentation and/or other materials provided with the 165f59596dSMatthew Dillon * distribution. 175f59596dSMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 185f59596dSMatthew Dillon * contributors may be used to endorse or promote products derived 195f59596dSMatthew Dillon * from this software without specific, prior written permission. 205f59596dSMatthew Dillon * 215f59596dSMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 225f59596dSMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 235f59596dSMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 245f59596dSMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 255f59596dSMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 265f59596dSMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 275f59596dSMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 285f59596dSMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 295f59596dSMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 305f59596dSMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 315f59596dSMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 325f59596dSMatthew Dillon * SUCH DAMAGE. 335f59596dSMatthew Dillon */ 345f59596dSMatthew Dillon /* 355f59596dSMatthew Dillon * This module implements the hammer2 helper thread API, including 365f59596dSMatthew Dillon * the frontend/backend XOP API. 375f59596dSMatthew Dillon */ 385f59596dSMatthew Dillon #include "hammer2.h" 395f59596dSMatthew Dillon 405f59596dSMatthew Dillon /* 415f59596dSMatthew Dillon * Signal that the thread has work. 425f59596dSMatthew Dillon */ 435f59596dSMatthew Dillon void 445f59596dSMatthew Dillon hammer2_thr_signal(hammer2_thread_t *thr, uint32_t flags) 455f59596dSMatthew Dillon { 465f59596dSMatthew Dillon uint32_t oflags; 475f59596dSMatthew Dillon 485f59596dSMatthew Dillon for (;;) { 495f59596dSMatthew Dillon oflags = thr->flags; 505f59596dSMatthew Dillon cpu_ccfence(); 515f59596dSMatthew Dillon if (oflags & HAMMER2_THREAD_WAITING) { 525f59596dSMatthew Dillon if (atomic_cmpset_int(&thr->flags, oflags, 535f59596dSMatthew Dillon (oflags | flags) & ~HAMMER2_THREAD_WAITING)) { 545f59596dSMatthew Dillon wakeup(&thr->flags); 555f59596dSMatthew Dillon break; 565f59596dSMatthew Dillon } 575f59596dSMatthew Dillon } else { 585f59596dSMatthew Dillon if (atomic_cmpset_int(&thr->flags, oflags, 595f59596dSMatthew Dillon oflags | flags)) { 605f59596dSMatthew Dillon break; 615f59596dSMatthew Dillon } 625f59596dSMatthew Dillon } 635f59596dSMatthew Dillon } 645f59596dSMatthew Dillon } 655f59596dSMatthew Dillon 665f59596dSMatthew Dillon /* 675f59596dSMatthew Dillon * Return status to waiting client(s) 685f59596dSMatthew Dillon */ 695f59596dSMatthew Dillon void 705f59596dSMatthew Dillon hammer2_thr_return(hammer2_thread_t *thr, uint32_t flags) 715f59596dSMatthew Dillon { 725f59596dSMatthew Dillon uint32_t oflags; 735f59596dSMatthew Dillon uint32_t nflags; 745f59596dSMatthew Dillon 755f59596dSMatthew Dillon for (;;) { 765f59596dSMatthew Dillon oflags = thr->flags; 775f59596dSMatthew Dillon cpu_ccfence(); 785f59596dSMatthew Dillon nflags = (oflags | flags) & ~HAMMER2_THREAD_CLIENTWAIT; 795f59596dSMatthew Dillon 805f59596dSMatthew Dillon if (oflags & HAMMER2_THREAD_CLIENTWAIT) { 815f59596dSMatthew Dillon if (atomic_cmpset_int(&thr->flags, oflags, nflags)) { 825f59596dSMatthew Dillon wakeup(thr); 835f59596dSMatthew Dillon break; 845f59596dSMatthew Dillon } 855f59596dSMatthew Dillon } else { 865f59596dSMatthew Dillon if (atomic_cmpset_int(&thr->flags, oflags, nflags)) 875f59596dSMatthew Dillon break; 885f59596dSMatthew Dillon } 895f59596dSMatthew Dillon } 905f59596dSMatthew Dillon } 915f59596dSMatthew Dillon 925f59596dSMatthew Dillon /* 935f59596dSMatthew Dillon * Wait until the bits in flags are set. 945f59596dSMatthew Dillon */ 955f59596dSMatthew Dillon void 965f59596dSMatthew Dillon hammer2_thr_wait(hammer2_thread_t *thr, uint32_t flags) 975f59596dSMatthew Dillon { 985f59596dSMatthew Dillon uint32_t oflags; 995f59596dSMatthew Dillon uint32_t nflags; 1005f59596dSMatthew Dillon 1015f59596dSMatthew Dillon for (;;) { 1025f59596dSMatthew Dillon oflags = thr->flags; 1035f59596dSMatthew Dillon cpu_ccfence(); 1045f59596dSMatthew Dillon if ((oflags & flags) == flags) 1055f59596dSMatthew Dillon break; 1065f59596dSMatthew Dillon nflags = oflags | HAMMER2_THREAD_CLIENTWAIT; 1075f59596dSMatthew Dillon tsleep_interlock(thr, 0); 1085f59596dSMatthew Dillon if (atomic_cmpset_int(&thr->flags, oflags, nflags)) { 1095f59596dSMatthew Dillon tsleep(thr, PINTERLOCKED, "h2twait", hz*60); 1105f59596dSMatthew Dillon } 1115f59596dSMatthew Dillon } 1125f59596dSMatthew Dillon } 1135f59596dSMatthew Dillon 1145f59596dSMatthew Dillon /* 1155f59596dSMatthew Dillon * Wait until the bits in flags are clear. 1165f59596dSMatthew Dillon */ 1175f59596dSMatthew Dillon void 1185f59596dSMatthew Dillon hammer2_thr_wait_neg(hammer2_thread_t *thr, uint32_t flags) 1195f59596dSMatthew Dillon { 1205f59596dSMatthew Dillon uint32_t oflags; 1215f59596dSMatthew Dillon uint32_t nflags; 1225f59596dSMatthew Dillon 1235f59596dSMatthew Dillon for (;;) { 1245f59596dSMatthew Dillon oflags = thr->flags; 1255f59596dSMatthew Dillon cpu_ccfence(); 1265f59596dSMatthew Dillon if ((oflags & flags) == 0) 1275f59596dSMatthew Dillon break; 1285f59596dSMatthew Dillon nflags = oflags | HAMMER2_THREAD_CLIENTWAIT; 1295f59596dSMatthew Dillon tsleep_interlock(thr, 0); 1305f59596dSMatthew Dillon if (atomic_cmpset_int(&thr->flags, oflags, nflags)) { 1315f59596dSMatthew Dillon tsleep(thr, PINTERLOCKED, "h2twait", hz*60); 1325f59596dSMatthew Dillon } 1335f59596dSMatthew Dillon } 1345f59596dSMatthew Dillon } 1355f59596dSMatthew Dillon 1365f59596dSMatthew Dillon /* 1375f59596dSMatthew Dillon * Initialize the supplied thread structure, starting the specified 1385f59596dSMatthew Dillon * thread. 1395f59596dSMatthew Dillon */ 1405f59596dSMatthew Dillon void 1415f59596dSMatthew Dillon hammer2_thr_create(hammer2_thread_t *thr, hammer2_pfs_t *pmp, 1425f59596dSMatthew Dillon const char *id, int clindex, int repidx, 1435f59596dSMatthew Dillon void (*func)(void *arg)) 1445f59596dSMatthew Dillon { 1455f59596dSMatthew Dillon thr->pmp = pmp; 1465f59596dSMatthew Dillon thr->clindex = clindex; 1475f59596dSMatthew Dillon thr->repidx = repidx; 1485f59596dSMatthew Dillon if (repidx >= 0) { 1495f59596dSMatthew Dillon thr->xopq = &pmp->xopq[clindex][repidx]; 1505f59596dSMatthew Dillon lwkt_create(func, thr, &thr->td, NULL, 0, repidx % ncpus, 1515f59596dSMatthew Dillon "%s-%s.%02d", id, pmp->pfs_names[clindex], repidx); 1525f59596dSMatthew Dillon } else { 1535f59596dSMatthew Dillon thr->xopq = &pmp->xopq[clindex][HAMMER2_XOPGROUPS-repidx]; 1545f59596dSMatthew Dillon lwkt_create(func, thr, &thr->td, NULL, 0, -1, 1555f59596dSMatthew Dillon "%s-%s", id, pmp->pfs_names[clindex]); 1565f59596dSMatthew Dillon } 1575f59596dSMatthew Dillon } 1585f59596dSMatthew Dillon 1595f59596dSMatthew Dillon /* 1605f59596dSMatthew Dillon * Terminate a thread. This function will silently return if the thread 1615f59596dSMatthew Dillon * was never initialized or has already been deleted. 1625f59596dSMatthew Dillon * 1635f59596dSMatthew Dillon * This is accomplished by setting the STOP flag and waiting for the td 1645f59596dSMatthew Dillon * structure to become NULL. 1655f59596dSMatthew Dillon */ 1665f59596dSMatthew Dillon void 1675f59596dSMatthew Dillon hammer2_thr_delete(hammer2_thread_t *thr) 1685f59596dSMatthew Dillon { 1695f59596dSMatthew Dillon if (thr->td == NULL) 1705f59596dSMatthew Dillon return; 1715f59596dSMatthew Dillon hammer2_thr_signal(thr, HAMMER2_THREAD_STOP); 1725f59596dSMatthew Dillon hammer2_thr_wait(thr, HAMMER2_THREAD_STOPPED); 1735f59596dSMatthew Dillon thr->pmp = NULL; 1745f59596dSMatthew Dillon thr->xopq = NULL; 1755f59596dSMatthew Dillon } 1765f59596dSMatthew Dillon 1775f59596dSMatthew Dillon /* 1785f59596dSMatthew Dillon * Asynchronous remaster request. Ask the synchronization thread to 1795f59596dSMatthew Dillon * start over soon (as if it were frozen and unfrozen, but without waiting). 1805f59596dSMatthew Dillon * The thread always recalculates mastership relationships when restarting. 1815f59596dSMatthew Dillon */ 1825f59596dSMatthew Dillon void 1835f59596dSMatthew Dillon hammer2_thr_remaster(hammer2_thread_t *thr) 1845f59596dSMatthew Dillon { 1855f59596dSMatthew Dillon if (thr->td == NULL) 1865f59596dSMatthew Dillon return; 1875f59596dSMatthew Dillon hammer2_thr_signal(thr, HAMMER2_THREAD_REMASTER); 1885f59596dSMatthew Dillon } 1895f59596dSMatthew Dillon 1905f59596dSMatthew Dillon void 1915f59596dSMatthew Dillon hammer2_thr_freeze_async(hammer2_thread_t *thr) 1925f59596dSMatthew Dillon { 1935f59596dSMatthew Dillon hammer2_thr_signal(thr, HAMMER2_THREAD_FREEZE); 1945f59596dSMatthew Dillon } 1955f59596dSMatthew Dillon 1965f59596dSMatthew Dillon void 1975f59596dSMatthew Dillon hammer2_thr_freeze(hammer2_thread_t *thr) 1985f59596dSMatthew Dillon { 1995f59596dSMatthew Dillon if (thr->td == NULL) 2005f59596dSMatthew Dillon return; 2015f59596dSMatthew Dillon hammer2_thr_signal(thr, HAMMER2_THREAD_FREEZE); 2025f59596dSMatthew Dillon hammer2_thr_wait(thr, HAMMER2_THREAD_FROZEN); 2035f59596dSMatthew Dillon } 2045f59596dSMatthew Dillon 2055f59596dSMatthew Dillon void 2065f59596dSMatthew Dillon hammer2_thr_unfreeze(hammer2_thread_t *thr) 2075f59596dSMatthew Dillon { 2085f59596dSMatthew Dillon if (thr->td == NULL) 2095f59596dSMatthew Dillon return; 2105f59596dSMatthew Dillon hammer2_thr_signal(thr, HAMMER2_THREAD_UNFREEZE); 2115f59596dSMatthew Dillon hammer2_thr_wait_neg(thr, HAMMER2_THREAD_FROZEN); 2125f59596dSMatthew Dillon } 2135f59596dSMatthew Dillon 2145f59596dSMatthew Dillon int 2155f59596dSMatthew Dillon hammer2_thr_break(hammer2_thread_t *thr) 2165f59596dSMatthew Dillon { 2175f59596dSMatthew Dillon if (thr->flags & (HAMMER2_THREAD_STOP | 2185f59596dSMatthew Dillon HAMMER2_THREAD_REMASTER | 2195f59596dSMatthew Dillon HAMMER2_THREAD_FREEZE)) { 2205f59596dSMatthew Dillon return 1; 2215f59596dSMatthew Dillon } 2225f59596dSMatthew Dillon return 0; 2235f59596dSMatthew Dillon } 2245f59596dSMatthew Dillon 2255f59596dSMatthew Dillon /**************************************************************************** 2265f59596dSMatthew Dillon * HAMMER2 XOPS API * 2275f59596dSMatthew Dillon ****************************************************************************/ 2285f59596dSMatthew Dillon 2295f59596dSMatthew Dillon void 2305f59596dSMatthew Dillon hammer2_xop_group_init(hammer2_pfs_t *pmp, hammer2_xop_group_t *xgrp) 2315f59596dSMatthew Dillon { 2325f59596dSMatthew Dillon /* no extra fields in structure at the moment */ 2335f59596dSMatthew Dillon } 2345f59596dSMatthew Dillon 2355f59596dSMatthew Dillon /* 2365f59596dSMatthew Dillon * Allocate a XOP request. 2375f59596dSMatthew Dillon * 2385f59596dSMatthew Dillon * Once allocated a XOP request can be started, collected, and retired, 2395f59596dSMatthew Dillon * and can be retired early if desired. 2405f59596dSMatthew Dillon * 2415f59596dSMatthew Dillon * NOTE: Fifo indices might not be zero but ri == wi on objcache_get(). 2425f59596dSMatthew Dillon */ 2435f59596dSMatthew Dillon void * 2445f59596dSMatthew Dillon hammer2_xop_alloc(hammer2_inode_t *ip, int flags) 2455f59596dSMatthew Dillon { 2465f59596dSMatthew Dillon hammer2_xop_t *xop; 2475f59596dSMatthew Dillon 2485f59596dSMatthew Dillon xop = objcache_get(cache_xops, M_WAITOK); 2495f59596dSMatthew Dillon KKASSERT(xop->head.cluster.array[0].chain == NULL); 2505f59596dSMatthew Dillon 2515f59596dSMatthew Dillon xop->head.ip1 = ip; 2525f59596dSMatthew Dillon xop->head.func = NULL; 2535f59596dSMatthew Dillon xop->head.flags = flags; 2545f59596dSMatthew Dillon xop->head.state = 0; 2555f59596dSMatthew Dillon xop->head.error = 0; 2565f59596dSMatthew Dillon xop->head.collect_key = 0; 2575f59596dSMatthew Dillon if (flags & HAMMER2_XOP_MODIFYING) 2585f59596dSMatthew Dillon xop->head.mtid = hammer2_trans_sub(ip->pmp); 2595f59596dSMatthew Dillon else 2605f59596dSMatthew Dillon xop->head.mtid = 0; 2615f59596dSMatthew Dillon 2625f59596dSMatthew Dillon xop->head.cluster.nchains = ip->cluster.nchains; 2635f59596dSMatthew Dillon xop->head.cluster.pmp = ip->pmp; 2645f59596dSMatthew Dillon xop->head.cluster.flags = HAMMER2_CLUSTER_LOCKED; 2655f59596dSMatthew Dillon 2665f59596dSMatthew Dillon /* 2675f59596dSMatthew Dillon * run_mask - Active thread (or frontend) associated with XOP 2685f59596dSMatthew Dillon */ 2695f59596dSMatthew Dillon xop->head.run_mask = HAMMER2_XOPMASK_VOP; 2705f59596dSMatthew Dillon 2715f59596dSMatthew Dillon hammer2_inode_ref(ip); 2725f59596dSMatthew Dillon 2735f59596dSMatthew Dillon return xop; 2745f59596dSMatthew Dillon } 2755f59596dSMatthew Dillon 2765f59596dSMatthew Dillon void 2775f59596dSMatthew Dillon hammer2_xop_setname(hammer2_xop_head_t *xop, const char *name, size_t name_len) 2785f59596dSMatthew Dillon { 2795f59596dSMatthew Dillon xop->name1 = kmalloc(name_len + 1, M_HAMMER2, M_WAITOK | M_ZERO); 2805f59596dSMatthew Dillon xop->name1_len = name_len; 2815f59596dSMatthew Dillon bcopy(name, xop->name1, name_len); 2825f59596dSMatthew Dillon } 2835f59596dSMatthew Dillon 2845f59596dSMatthew Dillon void 2855f59596dSMatthew Dillon hammer2_xop_setname2(hammer2_xop_head_t *xop, const char *name, size_t name_len) 2865f59596dSMatthew Dillon { 2875f59596dSMatthew Dillon xop->name2 = kmalloc(name_len + 1, M_HAMMER2, M_WAITOK | M_ZERO); 2885f59596dSMatthew Dillon xop->name2_len = name_len; 2895f59596dSMatthew Dillon bcopy(name, xop->name2, name_len); 2905f59596dSMatthew Dillon } 2915f59596dSMatthew Dillon 292*cf1b3fafSMatthew Dillon size_t 293*cf1b3fafSMatthew Dillon hammer2_xop_setname_inum(hammer2_xop_head_t *xop, hammer2_key_t inum) 294*cf1b3fafSMatthew Dillon { 295*cf1b3fafSMatthew Dillon const size_t name_len = 18; 296*cf1b3fafSMatthew Dillon 297*cf1b3fafSMatthew Dillon xop->name1 = kmalloc(name_len + 1, M_HAMMER2, M_WAITOK | M_ZERO); 298*cf1b3fafSMatthew Dillon xop->name1_len = name_len; 299*cf1b3fafSMatthew Dillon ksnprintf(xop->name1, name_len + 1, "0x%016jx", (intmax_t)inum); 300*cf1b3fafSMatthew Dillon 301*cf1b3fafSMatthew Dillon return name_len; 302*cf1b3fafSMatthew Dillon } 303*cf1b3fafSMatthew Dillon 3045f59596dSMatthew Dillon 3055f59596dSMatthew Dillon void 3065f59596dSMatthew Dillon hammer2_xop_setip2(hammer2_xop_head_t *xop, hammer2_inode_t *ip2) 3075f59596dSMatthew Dillon { 3085f59596dSMatthew Dillon xop->ip2 = ip2; 3095f59596dSMatthew Dillon hammer2_inode_ref(ip2); 3105f59596dSMatthew Dillon } 3115f59596dSMatthew Dillon 3125f59596dSMatthew Dillon void 3135f59596dSMatthew Dillon hammer2_xop_setip3(hammer2_xop_head_t *xop, hammer2_inode_t *ip3) 3145f59596dSMatthew Dillon { 3155f59596dSMatthew Dillon xop->ip3 = ip3; 3165f59596dSMatthew Dillon hammer2_inode_ref(ip3); 3175f59596dSMatthew Dillon } 3185f59596dSMatthew Dillon 3195f59596dSMatthew Dillon void 3205f59596dSMatthew Dillon hammer2_xop_reinit(hammer2_xop_head_t *xop) 3215f59596dSMatthew Dillon { 3225f59596dSMatthew Dillon xop->state = 0; 3235f59596dSMatthew Dillon xop->error = 0; 3245f59596dSMatthew Dillon xop->collect_key = 0; 3255f59596dSMatthew Dillon xop->run_mask = HAMMER2_XOPMASK_VOP; 3265f59596dSMatthew Dillon } 3275f59596dSMatthew Dillon 3285f59596dSMatthew Dillon /* 3295f59596dSMatthew Dillon * A mounted PFS needs Xops threads to support frontend operations. 3305f59596dSMatthew Dillon */ 3315f59596dSMatthew Dillon void 3325f59596dSMatthew Dillon hammer2_xop_helper_create(hammer2_pfs_t *pmp) 3335f59596dSMatthew Dillon { 3345f59596dSMatthew Dillon int i; 3355f59596dSMatthew Dillon int j; 3365f59596dSMatthew Dillon 3375f59596dSMatthew Dillon lockmgr(&pmp->lock, LK_EXCLUSIVE); 3385f59596dSMatthew Dillon pmp->has_xop_threads = 1; 3395f59596dSMatthew Dillon 3405f59596dSMatthew Dillon for (i = 0; i < pmp->iroot->cluster.nchains; ++i) { 3415f59596dSMatthew Dillon for (j = 0; j < HAMMER2_XOPGROUPS; ++j) { 3425f59596dSMatthew Dillon if (pmp->xop_groups[j].thrs[i].td) 3435f59596dSMatthew Dillon continue; 3445f59596dSMatthew Dillon hammer2_thr_create(&pmp->xop_groups[j].thrs[i], pmp, 3455f59596dSMatthew Dillon "h2xop", i, j, 3465f59596dSMatthew Dillon hammer2_primary_xops_thread); 3475f59596dSMatthew Dillon } 3485f59596dSMatthew Dillon } 3495f59596dSMatthew Dillon lockmgr(&pmp->lock, LK_RELEASE); 3505f59596dSMatthew Dillon } 3515f59596dSMatthew Dillon 3525f59596dSMatthew Dillon void 3535f59596dSMatthew Dillon hammer2_xop_helper_cleanup(hammer2_pfs_t *pmp) 3545f59596dSMatthew Dillon { 3555f59596dSMatthew Dillon int i; 3565f59596dSMatthew Dillon int j; 3575f59596dSMatthew Dillon 3585f59596dSMatthew Dillon for (i = 0; i < pmp->pfs_nmasters; ++i) { 3595f59596dSMatthew Dillon for (j = 0; j < HAMMER2_XOPGROUPS; ++j) { 3605f59596dSMatthew Dillon if (pmp->xop_groups[j].thrs[i].td) 3615f59596dSMatthew Dillon hammer2_thr_delete(&pmp->xop_groups[j].thrs[i]); 3625f59596dSMatthew Dillon } 3635f59596dSMatthew Dillon } 3645f59596dSMatthew Dillon } 3655f59596dSMatthew Dillon 3665f59596dSMatthew Dillon /* 3675f59596dSMatthew Dillon * Start a XOP request, queueing it to all nodes in the cluster to 3685f59596dSMatthew Dillon * execute the cluster op. 3695f59596dSMatthew Dillon * 3705f59596dSMatthew Dillon * XXX optimize single-target case. 3715f59596dSMatthew Dillon */ 3725f59596dSMatthew Dillon void 3735f59596dSMatthew Dillon hammer2_xop_start_except(hammer2_xop_head_t *xop, hammer2_xop_func_t func, 3745f59596dSMatthew Dillon int notidx) 3755f59596dSMatthew Dillon { 3765f59596dSMatthew Dillon hammer2_inode_t *ip1; 3775f59596dSMatthew Dillon #if 0 3785f59596dSMatthew Dillon hammer2_xop_group_t *xgrp; 3795f59596dSMatthew Dillon hammer2_thread_t *thr; 3805f59596dSMatthew Dillon #endif 3815f59596dSMatthew Dillon hammer2_pfs_t *pmp; 3825f59596dSMatthew Dillon int i; 3835f59596dSMatthew Dillon int ng; 3845f59596dSMatthew Dillon int nchains; 3855f59596dSMatthew Dillon 3865f59596dSMatthew Dillon ip1 = xop->ip1; 3875f59596dSMatthew Dillon pmp = ip1->pmp; 3885f59596dSMatthew Dillon if (pmp->has_xop_threads == 0) 3895f59596dSMatthew Dillon hammer2_xop_helper_create(pmp); 3905f59596dSMatthew Dillon 3915f59596dSMatthew Dillon if (xop->flags & HAMMER2_XOP_ITERATOR) { 3925f59596dSMatthew Dillon ng = (int)(hammer2_icrc32(&xop->ip1, sizeof(xop->ip1)) ^ 3935f59596dSMatthew Dillon pmp->xop_iterator++); 3945f59596dSMatthew Dillon } else { 3955f59596dSMatthew Dillon ng = (int)(hammer2_icrc32(&xop->ip1, sizeof(xop->ip1)) ^ 3965f59596dSMatthew Dillon hammer2_icrc32(&func, sizeof(func))); 3975f59596dSMatthew Dillon } 3985f59596dSMatthew Dillon ng = ng & HAMMER2_XOPGROUPS_MASK; 3995f59596dSMatthew Dillon #if 0 4005f59596dSMatthew Dillon g = pmp->xop_iterator++; 4015f59596dSMatthew Dillon g = g & HAMMER2_XOPGROUPS_MASK; 4025f59596dSMatthew Dillon xgrp = &pmp->xop_groups[g]; 4035f59596dSMatthew Dillon xop->xgrp = xgrp; 4045f59596dSMatthew Dillon #endif 4055f59596dSMatthew Dillon xop->func = func; 4065f59596dSMatthew Dillon 4075f59596dSMatthew Dillon /* 4085f59596dSMatthew Dillon * The XOP sequencer is based on ip1, ip2, and ip3. Because ops can 4095f59596dSMatthew Dillon * finish early and unlock the related inodes, some targets may get 4105f59596dSMatthew Dillon * behind. The sequencer ensures that ops on the same inode execute 4115f59596dSMatthew Dillon * in the same order. 4125f59596dSMatthew Dillon * 4135f59596dSMatthew Dillon * The instant xop is queued another thread can pick it off. In the 4145f59596dSMatthew Dillon * case of asynchronous ops, another thread might even finish and 4155f59596dSMatthew Dillon * deallocate it. 4165f59596dSMatthew Dillon */ 4175f59596dSMatthew Dillon hammer2_spin_ex(&pmp->xop_spin); 4185f59596dSMatthew Dillon nchains = ip1->cluster.nchains; 4195f59596dSMatthew Dillon for (i = 0; i < nchains; ++i) { 4205f59596dSMatthew Dillon /* 4215f59596dSMatthew Dillon * XXX ip1->cluster.array* not stable here. This temporary 4225f59596dSMatthew Dillon * hack fixes basic issues in target XOPs which need to 4235f59596dSMatthew Dillon * obtain a starting chain from the inode but does not 4245f59596dSMatthew Dillon * address possible races against inode updates which 4255f59596dSMatthew Dillon * might NULL-out a chain. 4265f59596dSMatthew Dillon */ 4275f59596dSMatthew Dillon if (i != notidx && ip1->cluster.array[i].chain) { 4285f59596dSMatthew Dillon atomic_set_int(&xop->run_mask, 1U << i); 4295f59596dSMatthew Dillon atomic_set_int(&xop->chk_mask, 1U << i); 4305f59596dSMatthew Dillon TAILQ_INSERT_TAIL(&pmp->xopq[i][ng], xop, collect[i].entry); 4315f59596dSMatthew Dillon } 4325f59596dSMatthew Dillon } 4335f59596dSMatthew Dillon hammer2_spin_unex(&pmp->xop_spin); 4345f59596dSMatthew Dillon /* xop can become invalid at this point */ 4355f59596dSMatthew Dillon 4365f59596dSMatthew Dillon /* 4375f59596dSMatthew Dillon * Try to wakeup just one xop thread for each cluster node. 4385f59596dSMatthew Dillon */ 4395f59596dSMatthew Dillon for (i = 0; i < nchains; ++i) { 4405f59596dSMatthew Dillon if (i != notidx) { 4415f59596dSMatthew Dillon hammer2_thr_signal(&pmp->xop_groups[ng].thrs[i], 4425f59596dSMatthew Dillon HAMMER2_THREAD_XOPQ); 4435f59596dSMatthew Dillon } 4445f59596dSMatthew Dillon } 4455f59596dSMatthew Dillon } 4465f59596dSMatthew Dillon 4475f59596dSMatthew Dillon void 4485f59596dSMatthew Dillon hammer2_xop_start(hammer2_xop_head_t *xop, hammer2_xop_func_t func) 4495f59596dSMatthew Dillon { 4505f59596dSMatthew Dillon hammer2_xop_start_except(xop, func, -1); 4515f59596dSMatthew Dillon } 4525f59596dSMatthew Dillon 4535f59596dSMatthew Dillon /* 4545f59596dSMatthew Dillon * Retire a XOP. Used by both the VOP frontend and by the XOP backend. 4555f59596dSMatthew Dillon */ 4565f59596dSMatthew Dillon void 4575f59596dSMatthew Dillon hammer2_xop_retire(hammer2_xop_head_t *xop, uint32_t mask) 4585f59596dSMatthew Dillon { 4595f59596dSMatthew Dillon hammer2_chain_t *chain; 4605f59596dSMatthew Dillon uint32_t nmask; 4615f59596dSMatthew Dillon int i; 4625f59596dSMatthew Dillon 4635f59596dSMatthew Dillon /* 4645f59596dSMatthew Dillon * Remove the frontend collector or remove a backend feeder. 4655f59596dSMatthew Dillon * When removing the frontend we must wakeup any backend feeders 4665f59596dSMatthew Dillon * who are waiting for FIFO space. 4675f59596dSMatthew Dillon * 4685f59596dSMatthew Dillon * XXX optimize wakeup. 4695f59596dSMatthew Dillon */ 4705f59596dSMatthew Dillon KKASSERT(xop->run_mask & mask); 4715f59596dSMatthew Dillon nmask = atomic_fetchadd_int(&xop->run_mask, -mask); 4725f59596dSMatthew Dillon if ((nmask & ~HAMMER2_XOPMASK_FIFOW) != mask) { 4735f59596dSMatthew Dillon if (mask == HAMMER2_XOPMASK_VOP) { 4745f59596dSMatthew Dillon if (nmask & HAMMER2_XOPMASK_FIFOW) 4755f59596dSMatthew Dillon wakeup(xop); 4765f59596dSMatthew Dillon } 4775f59596dSMatthew Dillon return; 4785f59596dSMatthew Dillon } 4795f59596dSMatthew Dillon /* else nobody else left, we can ignore FIFOW */ 4805f59596dSMatthew Dillon 4815f59596dSMatthew Dillon /* 4825f59596dSMatthew Dillon * All collectors are gone, we can cleanup and dispose of the XOP. 4835f59596dSMatthew Dillon * Note that this can wind up being a frontend OR a backend. 4845f59596dSMatthew Dillon * Pending chains are locked shared and not owned by any thread. 4855f59596dSMatthew Dillon */ 4865f59596dSMatthew Dillon #if 0 4875f59596dSMatthew Dillon /* 4885f59596dSMatthew Dillon * Cache the terminating cluster. 4895f59596dSMatthew Dillon */ 4905f59596dSMatthew Dillon hammer2_inode_t *ip; 4915f59596dSMatthew Dillon if ((ip = xop->ip1) != NULL) { 4925f59596dSMatthew Dillon hammer2_cluster_t *tmpclu; 4935f59596dSMatthew Dillon 4945f59596dSMatthew Dillon tmpclu = hammer2_cluster_copy(&xop->cluster); 4955f59596dSMatthew Dillon hammer2_spin_ex(&ip->cluster_spin); 4965f59596dSMatthew Dillon tmpclu = atomic_swap_ptr((volatile void **)&ip->cluster_cache, 4975f59596dSMatthew Dillon tmpclu); 4985f59596dSMatthew Dillon hammer2_spin_unex(&ip->cluster_spin); 4995f59596dSMatthew Dillon if (tmpclu) 5005f59596dSMatthew Dillon hammer2_cluster_drop(tmpclu); 5015f59596dSMatthew Dillon } 5025f59596dSMatthew Dillon #endif 5035f59596dSMatthew Dillon 5045f59596dSMatthew Dillon /* 5055f59596dSMatthew Dillon * Cleanup the collection cluster. 5065f59596dSMatthew Dillon */ 5075f59596dSMatthew Dillon for (i = 0; i < xop->cluster.nchains; ++i) { 5085f59596dSMatthew Dillon xop->cluster.array[i].flags = 0; 5095f59596dSMatthew Dillon chain = xop->cluster.array[i].chain; 5105f59596dSMatthew Dillon if (chain) { 5115f59596dSMatthew Dillon xop->cluster.array[i].chain = NULL; 5125f59596dSMatthew Dillon hammer2_chain_pull_shared_lock(chain); 5135f59596dSMatthew Dillon hammer2_chain_unlock(chain); 5145f59596dSMatthew Dillon hammer2_chain_drop(chain); 5155f59596dSMatthew Dillon } 5165f59596dSMatthew Dillon } 5175f59596dSMatthew Dillon 5185f59596dSMatthew Dillon /* 5195f59596dSMatthew Dillon * Cleanup the fifos, use check_counter to optimize the loop. 5205f59596dSMatthew Dillon * Since we are the only entity left on this xop we don't have 5215f59596dSMatthew Dillon * to worry about fifo flow control, and one lfence() will do the 5225f59596dSMatthew Dillon * job. 5235f59596dSMatthew Dillon */ 5245f59596dSMatthew Dillon cpu_lfence(); 5255f59596dSMatthew Dillon mask = xop->chk_mask; 5265f59596dSMatthew Dillon for (i = 0; mask && i < HAMMER2_MAXCLUSTER; ++i) { 5275f59596dSMatthew Dillon hammer2_xop_fifo_t *fifo = &xop->collect[i]; 5285f59596dSMatthew Dillon while (fifo->ri != fifo->wi) { 5295f59596dSMatthew Dillon chain = fifo->array[fifo->ri & HAMMER2_XOPFIFO_MASK]; 5305f59596dSMatthew Dillon if (chain) { 5315f59596dSMatthew Dillon hammer2_chain_pull_shared_lock(chain); 5325f59596dSMatthew Dillon hammer2_chain_unlock(chain); 5335f59596dSMatthew Dillon hammer2_chain_drop(chain); 5345f59596dSMatthew Dillon } 5355f59596dSMatthew Dillon ++fifo->ri; 5365f59596dSMatthew Dillon } 5375f59596dSMatthew Dillon mask &= ~(1U << i); 5385f59596dSMatthew Dillon } 5395f59596dSMatthew Dillon 5405f59596dSMatthew Dillon /* 5415f59596dSMatthew Dillon * The inode is only held at this point, simply drop it. 5425f59596dSMatthew Dillon */ 5435f59596dSMatthew Dillon if (xop->ip1) { 5445f59596dSMatthew Dillon hammer2_inode_drop(xop->ip1); 5455f59596dSMatthew Dillon xop->ip1 = NULL; 5465f59596dSMatthew Dillon } 5475f59596dSMatthew Dillon if (xop->ip2) { 5485f59596dSMatthew Dillon hammer2_inode_drop(xop->ip2); 5495f59596dSMatthew Dillon xop->ip2 = NULL; 5505f59596dSMatthew Dillon } 5515f59596dSMatthew Dillon if (xop->ip3) { 5525f59596dSMatthew Dillon hammer2_inode_drop(xop->ip3); 5535f59596dSMatthew Dillon xop->ip3 = NULL; 5545f59596dSMatthew Dillon } 5555f59596dSMatthew Dillon if (xop->name1) { 5565f59596dSMatthew Dillon kfree(xop->name1, M_HAMMER2); 5575f59596dSMatthew Dillon xop->name1 = NULL; 5585f59596dSMatthew Dillon xop->name1_len = 0; 5595f59596dSMatthew Dillon } 5605f59596dSMatthew Dillon if (xop->name2) { 5615f59596dSMatthew Dillon kfree(xop->name2, M_HAMMER2); 5625f59596dSMatthew Dillon xop->name2 = NULL; 5635f59596dSMatthew Dillon xop->name2_len = 0; 5645f59596dSMatthew Dillon } 5655f59596dSMatthew Dillon 5665f59596dSMatthew Dillon objcache_put(cache_xops, xop); 5675f59596dSMatthew Dillon } 5685f59596dSMatthew Dillon 5695f59596dSMatthew Dillon /* 5705f59596dSMatthew Dillon * (Backend) Returns non-zero if the frontend is still attached. 5715f59596dSMatthew Dillon */ 5725f59596dSMatthew Dillon int 5735f59596dSMatthew Dillon hammer2_xop_active(hammer2_xop_head_t *xop) 5745f59596dSMatthew Dillon { 5755f59596dSMatthew Dillon if (xop->run_mask & HAMMER2_XOPMASK_VOP) 5765f59596dSMatthew Dillon return 1; 5775f59596dSMatthew Dillon else 5785f59596dSMatthew Dillon return 0; 5795f59596dSMatthew Dillon } 5805f59596dSMatthew Dillon 5815f59596dSMatthew Dillon /* 5825f59596dSMatthew Dillon * (Backend) Feed chain data through the cluster validator and back to 5835f59596dSMatthew Dillon * the frontend. Chains are fed from multiple nodes concurrently 5845f59596dSMatthew Dillon * and pipelined via per-node FIFOs in the XOP. 5855f59596dSMatthew Dillon * 5865f59596dSMatthew Dillon * The chain must be locked shared. This function adds an additional 5875f59596dSMatthew Dillon * shared-lock and ref to the chain for the frontend to collect. Caller 5885f59596dSMatthew Dillon * must still unlock/drop the chain. 5895f59596dSMatthew Dillon * 5905f59596dSMatthew Dillon * No xop lock is needed because we are only manipulating fields under 5915f59596dSMatthew Dillon * our direct control. 5925f59596dSMatthew Dillon * 5935f59596dSMatthew Dillon * Returns 0 on success and a hammer error code if sync is permanently 5945f59596dSMatthew Dillon * lost. The caller retains a ref on the chain but by convention 5955f59596dSMatthew Dillon * the lock is typically inherited by the xop (caller loses lock). 5965f59596dSMatthew Dillon * 5975f59596dSMatthew Dillon * Returns non-zero on error. In this situation the caller retains a 5985f59596dSMatthew Dillon * ref on the chain but loses the lock (we unlock here). 5995f59596dSMatthew Dillon * 6005f59596dSMatthew Dillon * WARNING! The chain is moving between two different threads, it must 6015f59596dSMatthew Dillon * be locked SHARED to retain its data mapping, not exclusive. 6025f59596dSMatthew Dillon * When multiple operations are in progress at once, chains fed 6035f59596dSMatthew Dillon * back to the frontend for collection can wind up being locked 6045f59596dSMatthew Dillon * in different orders, only a shared lock can prevent a deadlock. 6055f59596dSMatthew Dillon * 6065f59596dSMatthew Dillon * Exclusive locks may only be used by a XOP backend node thread 6075f59596dSMatthew Dillon * temporarily, with no direct or indirect dependencies (aka 6085f59596dSMatthew Dillon * blocking/waiting) on other nodes. 6095f59596dSMatthew Dillon */ 6105f59596dSMatthew Dillon int 6115f59596dSMatthew Dillon hammer2_xop_feed(hammer2_xop_head_t *xop, hammer2_chain_t *chain, 6125f59596dSMatthew Dillon int clindex, int error) 6135f59596dSMatthew Dillon { 6145f59596dSMatthew Dillon hammer2_xop_fifo_t *fifo; 6155f59596dSMatthew Dillon uint32_t mask; 6165f59596dSMatthew Dillon 6175f59596dSMatthew Dillon /* 6185f59596dSMatthew Dillon * Early termination (typicaly of xop_readir) 6195f59596dSMatthew Dillon */ 6205f59596dSMatthew Dillon if (hammer2_xop_active(xop) == 0) { 6215f59596dSMatthew Dillon error = EINTR; 6225f59596dSMatthew Dillon goto done; 6235f59596dSMatthew Dillon } 6245f59596dSMatthew Dillon 6255f59596dSMatthew Dillon /* 6265f59596dSMatthew Dillon * Multi-threaded entry into the XOP collector. We own the 6275f59596dSMatthew Dillon * fifo->wi for our clindex. 6285f59596dSMatthew Dillon */ 6295f59596dSMatthew Dillon fifo = &xop->collect[clindex]; 6305f59596dSMatthew Dillon 6315f59596dSMatthew Dillon if (fifo->ri == fifo->wi - HAMMER2_XOPFIFO) 6325f59596dSMatthew Dillon lwkt_yield(); 6335f59596dSMatthew Dillon while (fifo->ri == fifo->wi - HAMMER2_XOPFIFO) { 6345f59596dSMatthew Dillon atomic_set_int(&fifo->flags, HAMMER2_XOP_FIFO_STALL); 6355f59596dSMatthew Dillon mask = xop->run_mask; 6365f59596dSMatthew Dillon if ((mask & HAMMER2_XOPMASK_VOP) == 0) { 6375f59596dSMatthew Dillon error = EINTR; 6385f59596dSMatthew Dillon goto done; 6395f59596dSMatthew Dillon } 6405f59596dSMatthew Dillon tsleep_interlock(xop, 0); 6415f59596dSMatthew Dillon if (atomic_cmpset_int(&xop->run_mask, mask, 6425f59596dSMatthew Dillon mask | HAMMER2_XOPMASK_FIFOW)) { 6435f59596dSMatthew Dillon if (fifo->ri == fifo->wi - HAMMER2_XOPFIFO) { 6445f59596dSMatthew Dillon tsleep(xop, PINTERLOCKED, "h2feed", hz*60); 6455f59596dSMatthew Dillon } 6465f59596dSMatthew Dillon } 6475f59596dSMatthew Dillon /* retry */ 6485f59596dSMatthew Dillon } 6495f59596dSMatthew Dillon atomic_clear_int(&fifo->flags, HAMMER2_XOP_FIFO_STALL); 6505f59596dSMatthew Dillon if (chain) { 6515f59596dSMatthew Dillon hammer2_chain_ref(chain); 6525f59596dSMatthew Dillon hammer2_chain_push_shared_lock(chain); 6535f59596dSMatthew Dillon } 6545f59596dSMatthew Dillon if (error == 0 && chain) 6555f59596dSMatthew Dillon error = chain->error; 6565f59596dSMatthew Dillon fifo->errors[fifo->wi & HAMMER2_XOPFIFO_MASK] = error; 6575f59596dSMatthew Dillon fifo->array[fifo->wi & HAMMER2_XOPFIFO_MASK] = chain; 6585f59596dSMatthew Dillon cpu_sfence(); 6595f59596dSMatthew Dillon ++fifo->wi; 6605f59596dSMatthew Dillon atomic_add_int(&xop->check_counter, 1); 6615f59596dSMatthew Dillon wakeup(&xop->check_counter); /* XXX optimize */ 6625f59596dSMatthew Dillon error = 0; 6635f59596dSMatthew Dillon 6645f59596dSMatthew Dillon /* 6655f59596dSMatthew Dillon * Cleanup. If an error occurred we eat the lock. If no error 6665f59596dSMatthew Dillon * occurred the fifo inherits the lock and gains an additional ref. 6675f59596dSMatthew Dillon * 6685f59596dSMatthew Dillon * The caller's ref remains in both cases. 6695f59596dSMatthew Dillon */ 6705f59596dSMatthew Dillon done: 6715f59596dSMatthew Dillon return error; 6725f59596dSMatthew Dillon } 6735f59596dSMatthew Dillon 6745f59596dSMatthew Dillon /* 6755f59596dSMatthew Dillon * (Frontend) collect a response from a running cluster op. 6765f59596dSMatthew Dillon * 6775f59596dSMatthew Dillon * Responses are fed from all appropriate nodes concurrently 6785f59596dSMatthew Dillon * and collected into a cohesive response >= collect_key. 6795f59596dSMatthew Dillon * 6805f59596dSMatthew Dillon * The collector will return the instant quorum or other requirements 6815f59596dSMatthew Dillon * are met, even if some nodes get behind or become non-responsive. 6825f59596dSMatthew Dillon * 6835f59596dSMatthew Dillon * HAMMER2_XOP_COLLECT_NOWAIT - Used to 'poll' a completed collection, 6845f59596dSMatthew Dillon * usually called synchronously from the 6855f59596dSMatthew Dillon * node XOPs for the strategy code to 6865f59596dSMatthew Dillon * fake the frontend collection and complete 6875f59596dSMatthew Dillon * the BIO as soon as possible. 6885f59596dSMatthew Dillon * 6895f59596dSMatthew Dillon * HAMMER2_XOP_SYNCHRONIZER - Reqeuest synchronization with a particular 6905f59596dSMatthew Dillon * cluster index, prevents looping when that 6915f59596dSMatthew Dillon * index is out of sync so caller can act on 6925f59596dSMatthew Dillon * the out of sync element. ESRCH and EDEADLK 6935f59596dSMatthew Dillon * can be returned if this flag is specified. 6945f59596dSMatthew Dillon * 6955f59596dSMatthew Dillon * Returns 0 on success plus a filled out xop->cluster structure. 6965f59596dSMatthew Dillon * Return ENOENT on normal termination. 6975f59596dSMatthew Dillon * Otherwise return an error. 6985f59596dSMatthew Dillon */ 6995f59596dSMatthew Dillon int 7005f59596dSMatthew Dillon hammer2_xop_collect(hammer2_xop_head_t *xop, int flags) 7015f59596dSMatthew Dillon { 7025f59596dSMatthew Dillon hammer2_xop_fifo_t *fifo; 7035f59596dSMatthew Dillon hammer2_chain_t *chain; 7045f59596dSMatthew Dillon hammer2_key_t lokey; 7055f59596dSMatthew Dillon int error; 7065f59596dSMatthew Dillon int keynull; 7075f59596dSMatthew Dillon int adv; /* advance the element */ 7085f59596dSMatthew Dillon int i; 7095f59596dSMatthew Dillon uint32_t check_counter; 7105f59596dSMatthew Dillon 7115f59596dSMatthew Dillon loop: 7125f59596dSMatthew Dillon /* 7135f59596dSMatthew Dillon * First loop tries to advance pieces of the cluster which 7145f59596dSMatthew Dillon * are out of sync. 7155f59596dSMatthew Dillon */ 7165f59596dSMatthew Dillon lokey = HAMMER2_KEY_MAX; 7175f59596dSMatthew Dillon keynull = HAMMER2_CHECK_NULL; 7185f59596dSMatthew Dillon check_counter = xop->check_counter; 7195f59596dSMatthew Dillon cpu_lfence(); 7205f59596dSMatthew Dillon 7215f59596dSMatthew Dillon for (i = 0; i < xop->cluster.nchains; ++i) { 7225f59596dSMatthew Dillon chain = xop->cluster.array[i].chain; 7235f59596dSMatthew Dillon if (chain == NULL) { 7245f59596dSMatthew Dillon adv = 1; 7255f59596dSMatthew Dillon } else if (chain->bref.key < xop->collect_key) { 7265f59596dSMatthew Dillon adv = 1; 7275f59596dSMatthew Dillon } else { 7285f59596dSMatthew Dillon keynull &= ~HAMMER2_CHECK_NULL; 7295f59596dSMatthew Dillon if (lokey > chain->bref.key) 7305f59596dSMatthew Dillon lokey = chain->bref.key; 7315f59596dSMatthew Dillon adv = 0; 7325f59596dSMatthew Dillon } 7335f59596dSMatthew Dillon if (adv == 0) 7345f59596dSMatthew Dillon continue; 7355f59596dSMatthew Dillon 7365f59596dSMatthew Dillon /* 7375f59596dSMatthew Dillon * Advance element if possible, advanced element may be NULL. 7385f59596dSMatthew Dillon */ 7395f59596dSMatthew Dillon if (chain) { 7405f59596dSMatthew Dillon hammer2_chain_unlock(chain); 7415f59596dSMatthew Dillon hammer2_chain_drop(chain); 7425f59596dSMatthew Dillon } 7435f59596dSMatthew Dillon fifo = &xop->collect[i]; 7445f59596dSMatthew Dillon if (fifo->ri != fifo->wi) { 7455f59596dSMatthew Dillon cpu_lfence(); 7465f59596dSMatthew Dillon chain = fifo->array[fifo->ri & HAMMER2_XOPFIFO_MASK]; 7475f59596dSMatthew Dillon error = fifo->errors[fifo->ri & HAMMER2_XOPFIFO_MASK]; 7485f59596dSMatthew Dillon ++fifo->ri; 7495f59596dSMatthew Dillon xop->cluster.array[i].chain = chain; 7505f59596dSMatthew Dillon xop->cluster.array[i].error = error; 7515f59596dSMatthew Dillon if (chain == NULL) { 7525f59596dSMatthew Dillon /* XXX */ 7535f59596dSMatthew Dillon xop->cluster.array[i].flags |= 7545f59596dSMatthew Dillon HAMMER2_CITEM_NULL; 7555f59596dSMatthew Dillon } 7565f59596dSMatthew Dillon if (fifo->wi - fifo->ri <= HAMMER2_XOPFIFO / 2) { 7575f59596dSMatthew Dillon if (fifo->flags & HAMMER2_XOP_FIFO_STALL) { 7585f59596dSMatthew Dillon atomic_clear_int(&fifo->flags, 7595f59596dSMatthew Dillon HAMMER2_XOP_FIFO_STALL); 7605f59596dSMatthew Dillon wakeup(xop); 7615f59596dSMatthew Dillon lwkt_yield(); 7625f59596dSMatthew Dillon } 7635f59596dSMatthew Dillon } 7645f59596dSMatthew Dillon --i; /* loop on same index */ 7655f59596dSMatthew Dillon } else { 7665f59596dSMatthew Dillon /* 7675f59596dSMatthew Dillon * Retain CITEM_NULL flag. If set just repeat EOF. 7685f59596dSMatthew Dillon * If not, the NULL,0 combination indicates an 7695f59596dSMatthew Dillon * operation in-progress. 7705f59596dSMatthew Dillon */ 7715f59596dSMatthew Dillon xop->cluster.array[i].chain = NULL; 7725f59596dSMatthew Dillon /* retain any CITEM_NULL setting */ 7735f59596dSMatthew Dillon } 7745f59596dSMatthew Dillon } 7755f59596dSMatthew Dillon 7765f59596dSMatthew Dillon /* 7775f59596dSMatthew Dillon * Determine whether the lowest collected key meets clustering 7785f59596dSMatthew Dillon * requirements. Returns: 7795f59596dSMatthew Dillon * 7805f59596dSMatthew Dillon * 0 - key valid, cluster can be returned. 7815f59596dSMatthew Dillon * 7825f59596dSMatthew Dillon * ENOENT - normal end of scan, return ENOENT. 7835f59596dSMatthew Dillon * 7845f59596dSMatthew Dillon * ESRCH - sufficient elements collected, quorum agreement 7855f59596dSMatthew Dillon * that lokey is not a valid element and should be 7865f59596dSMatthew Dillon * skipped. 7875f59596dSMatthew Dillon * 7885f59596dSMatthew Dillon * EDEADLK - sufficient elements collected, no quorum agreement 7895f59596dSMatthew Dillon * (and no agreement possible). In this situation a 7905f59596dSMatthew Dillon * repair is needed, for now we loop. 7915f59596dSMatthew Dillon * 7925f59596dSMatthew Dillon * EINPROGRESS - insufficient elements collected to resolve, wait 7935f59596dSMatthew Dillon * for event and loop. 7945f59596dSMatthew Dillon */ 7955f59596dSMatthew Dillon if ((flags & HAMMER2_XOP_COLLECT_WAITALL) && 7965f59596dSMatthew Dillon xop->run_mask != HAMMER2_XOPMASK_VOP) { 7975f59596dSMatthew Dillon error = EINPROGRESS; 7985f59596dSMatthew Dillon } else { 7995f59596dSMatthew Dillon error = hammer2_cluster_check(&xop->cluster, lokey, keynull); 8005f59596dSMatthew Dillon } 8015f59596dSMatthew Dillon if (error == EINPROGRESS) { 8025f59596dSMatthew Dillon if (xop->check_counter == check_counter) { 8035f59596dSMatthew Dillon if (flags & HAMMER2_XOP_COLLECT_NOWAIT) 8045f59596dSMatthew Dillon goto done; 8055f59596dSMatthew Dillon tsleep_interlock(&xop->check_counter, 0); 8065f59596dSMatthew Dillon cpu_lfence(); 8075f59596dSMatthew Dillon if (xop->check_counter == check_counter) { 8085f59596dSMatthew Dillon tsleep(&xop->check_counter, PINTERLOCKED, 8095f59596dSMatthew Dillon "h2coll", hz*60); 8105f59596dSMatthew Dillon } 8115f59596dSMatthew Dillon } 8125f59596dSMatthew Dillon goto loop; 8135f59596dSMatthew Dillon } 8145f59596dSMatthew Dillon if (error == ESRCH) { 8155f59596dSMatthew Dillon if (lokey != HAMMER2_KEY_MAX) { 8165f59596dSMatthew Dillon xop->collect_key = lokey + 1; 8175f59596dSMatthew Dillon goto loop; 8185f59596dSMatthew Dillon } 8195f59596dSMatthew Dillon error = ENOENT; 8205f59596dSMatthew Dillon } 8215f59596dSMatthew Dillon if (error == EDEADLK) { 8225f59596dSMatthew Dillon kprintf("hammer2: no quorum possible lokey %016jx\n", 8235f59596dSMatthew Dillon lokey); 8245f59596dSMatthew Dillon if (lokey != HAMMER2_KEY_MAX) { 8255f59596dSMatthew Dillon xop->collect_key = lokey + 1; 8265f59596dSMatthew Dillon goto loop; 8275f59596dSMatthew Dillon } 8285f59596dSMatthew Dillon error = ENOENT; 8295f59596dSMatthew Dillon } 8305f59596dSMatthew Dillon if (lokey == HAMMER2_KEY_MAX) 8315f59596dSMatthew Dillon xop->collect_key = lokey; 8325f59596dSMatthew Dillon else 8335f59596dSMatthew Dillon xop->collect_key = lokey + 1; 8345f59596dSMatthew Dillon done: 8355f59596dSMatthew Dillon return error; 8365f59596dSMatthew Dillon } 8375f59596dSMatthew Dillon 8385f59596dSMatthew Dillon /* 8395f59596dSMatthew Dillon * N x M processing threads are available to handle XOPs, N per cluster 8405f59596dSMatthew Dillon * index x M cluster nodes. All the threads for any given cluster index 8415f59596dSMatthew Dillon * share and pull from the same xopq. 8425f59596dSMatthew Dillon * 8435f59596dSMatthew Dillon * Locate and return the next runnable xop, or NULL if no xops are 8445f59596dSMatthew Dillon * present or none of the xops are currently runnable (for various reasons). 8455f59596dSMatthew Dillon * The xop is left on the queue and serves to block other dependent xops 8465f59596dSMatthew Dillon * from being run. 8475f59596dSMatthew Dillon * 8485f59596dSMatthew Dillon * Dependent xops will not be returned. 8495f59596dSMatthew Dillon * 8505f59596dSMatthew Dillon * Sets HAMMER2_XOP_FIFO_RUN on the returned xop or returns NULL. 8515f59596dSMatthew Dillon * 8525f59596dSMatthew Dillon * NOTE! Xops run concurrently for each cluster index. 8535f59596dSMatthew Dillon */ 8545f59596dSMatthew Dillon #define XOP_HASH_SIZE 16 8555f59596dSMatthew Dillon #define XOP_HASH_MASK (XOP_HASH_SIZE - 1) 8565f59596dSMatthew Dillon 8575f59596dSMatthew Dillon static __inline 8585f59596dSMatthew Dillon int 8595f59596dSMatthew Dillon xop_testhash(hammer2_thread_t *thr, hammer2_inode_t *ip, uint32_t *hash) 8605f59596dSMatthew Dillon { 8615f59596dSMatthew Dillon uint32_t mask; 8625f59596dSMatthew Dillon int hv; 8635f59596dSMatthew Dillon 8645f59596dSMatthew Dillon hv = (int)((uintptr_t)ip + (uintptr_t)thr) / sizeof(hammer2_inode_t); 8655f59596dSMatthew Dillon mask = 1U << (hv & 31); 8665f59596dSMatthew Dillon hv >>= 5; 8675f59596dSMatthew Dillon 8685f59596dSMatthew Dillon return ((int)(hash[hv & XOP_HASH_MASK] & mask)); 8695f59596dSMatthew Dillon } 8705f59596dSMatthew Dillon 8715f59596dSMatthew Dillon static __inline 8725f59596dSMatthew Dillon void 8735f59596dSMatthew Dillon xop_sethash(hammer2_thread_t *thr, hammer2_inode_t *ip, uint32_t *hash) 8745f59596dSMatthew Dillon { 8755f59596dSMatthew Dillon uint32_t mask; 8765f59596dSMatthew Dillon int hv; 8775f59596dSMatthew Dillon 8785f59596dSMatthew Dillon hv = (int)((uintptr_t)ip + (uintptr_t)thr) / sizeof(hammer2_inode_t); 8795f59596dSMatthew Dillon mask = 1U << (hv & 31); 8805f59596dSMatthew Dillon hv >>= 5; 8815f59596dSMatthew Dillon 8825f59596dSMatthew Dillon hash[hv & XOP_HASH_MASK] |= mask; 8835f59596dSMatthew Dillon } 8845f59596dSMatthew Dillon 8855f59596dSMatthew Dillon static 8865f59596dSMatthew Dillon hammer2_xop_head_t * 8875f59596dSMatthew Dillon hammer2_xop_next(hammer2_thread_t *thr) 8885f59596dSMatthew Dillon { 8895f59596dSMatthew Dillon hammer2_pfs_t *pmp = thr->pmp; 8905f59596dSMatthew Dillon int clindex = thr->clindex; 8915f59596dSMatthew Dillon uint32_t hash[XOP_HASH_SIZE] = { 0 }; 8925f59596dSMatthew Dillon hammer2_xop_head_t *xop; 8935f59596dSMatthew Dillon 8945f59596dSMatthew Dillon hammer2_spin_ex(&pmp->xop_spin); 8955f59596dSMatthew Dillon TAILQ_FOREACH(xop, thr->xopq, collect[clindex].entry) { 8965f59596dSMatthew Dillon /* 8975f59596dSMatthew Dillon * Check dependency 8985f59596dSMatthew Dillon */ 8995f59596dSMatthew Dillon if (xop_testhash(thr, xop->ip1, hash) || 9005f59596dSMatthew Dillon (xop->ip2 && xop_testhash(thr, xop->ip2, hash)) || 9015f59596dSMatthew Dillon (xop->ip3 && xop_testhash(thr, xop->ip3, hash))) { 9025f59596dSMatthew Dillon continue; 9035f59596dSMatthew Dillon } 9045f59596dSMatthew Dillon xop_sethash(thr, xop->ip1, hash); 9055f59596dSMatthew Dillon if (xop->ip2) 9065f59596dSMatthew Dillon xop_sethash(thr, xop->ip2, hash); 9075f59596dSMatthew Dillon if (xop->ip3) 9085f59596dSMatthew Dillon xop_sethash(thr, xop->ip3, hash); 9095f59596dSMatthew Dillon 9105f59596dSMatthew Dillon /* 9115f59596dSMatthew Dillon * Check already running 9125f59596dSMatthew Dillon */ 9135f59596dSMatthew Dillon if (xop->collect[clindex].flags & HAMMER2_XOP_FIFO_RUN) 9145f59596dSMatthew Dillon continue; 9155f59596dSMatthew Dillon 9165f59596dSMatthew Dillon /* 9175f59596dSMatthew Dillon * Found a good one, return it. 9185f59596dSMatthew Dillon */ 9195f59596dSMatthew Dillon atomic_set_int(&xop->collect[clindex].flags, 9205f59596dSMatthew Dillon HAMMER2_XOP_FIFO_RUN); 9215f59596dSMatthew Dillon break; 9225f59596dSMatthew Dillon } 9235f59596dSMatthew Dillon hammer2_spin_unex(&pmp->xop_spin); 9245f59596dSMatthew Dillon 9255f59596dSMatthew Dillon return xop; 9265f59596dSMatthew Dillon } 9275f59596dSMatthew Dillon 9285f59596dSMatthew Dillon /* 9295f59596dSMatthew Dillon * Remove the completed XOP from the queue, clear HAMMER2_XOP_FIFO_RUN. 9305f59596dSMatthew Dillon * 9315f59596dSMatthew Dillon * NOTE! Xops run concurrently for each cluster index. 9325f59596dSMatthew Dillon */ 9335f59596dSMatthew Dillon static 9345f59596dSMatthew Dillon void 9355f59596dSMatthew Dillon hammer2_xop_dequeue(hammer2_thread_t *thr, hammer2_xop_head_t *xop) 9365f59596dSMatthew Dillon { 9375f59596dSMatthew Dillon hammer2_pfs_t *pmp = thr->pmp; 9385f59596dSMatthew Dillon int clindex = thr->clindex; 9395f59596dSMatthew Dillon 9405f59596dSMatthew Dillon hammer2_spin_ex(&pmp->xop_spin); 9415f59596dSMatthew Dillon TAILQ_REMOVE(thr->xopq, xop, collect[clindex].entry); 9425f59596dSMatthew Dillon atomic_clear_int(&xop->collect[clindex].flags, 9435f59596dSMatthew Dillon HAMMER2_XOP_FIFO_RUN); 9445f59596dSMatthew Dillon hammer2_spin_unex(&pmp->xop_spin); 9455f59596dSMatthew Dillon } 9465f59596dSMatthew Dillon 9475f59596dSMatthew Dillon /* 9485f59596dSMatthew Dillon * Primary management thread for xops support. Each node has several such 9495f59596dSMatthew Dillon * threads which replicate front-end operations on cluster nodes. 9505f59596dSMatthew Dillon * 9515f59596dSMatthew Dillon * XOPS thread node operations, allowing the function to focus on a single 9525f59596dSMatthew Dillon * node in the cluster after validating the operation with the cluster. 9535f59596dSMatthew Dillon * This is primarily what prevents dead or stalled nodes from stalling 9545f59596dSMatthew Dillon * the front-end. 9555f59596dSMatthew Dillon */ 9565f59596dSMatthew Dillon void 9575f59596dSMatthew Dillon hammer2_primary_xops_thread(void *arg) 9585f59596dSMatthew Dillon { 9595f59596dSMatthew Dillon hammer2_thread_t *thr = arg; 9605f59596dSMatthew Dillon hammer2_pfs_t *pmp; 9615f59596dSMatthew Dillon hammer2_xop_head_t *xop; 9625f59596dSMatthew Dillon uint32_t mask; 9635f59596dSMatthew Dillon uint32_t flags; 9645f59596dSMatthew Dillon uint32_t nflags; 9655f59596dSMatthew Dillon hammer2_xop_func_t last_func = NULL; 9665f59596dSMatthew Dillon 9675f59596dSMatthew Dillon pmp = thr->pmp; 9685f59596dSMatthew Dillon /*xgrp = &pmp->xop_groups[thr->repidx]; not needed */ 9695f59596dSMatthew Dillon mask = 1U << thr->clindex; 9705f59596dSMatthew Dillon 9715f59596dSMatthew Dillon for (;;) { 9725f59596dSMatthew Dillon flags = thr->flags; 9735f59596dSMatthew Dillon 9745f59596dSMatthew Dillon /* 9755f59596dSMatthew Dillon * Handle stop request 9765f59596dSMatthew Dillon */ 9775f59596dSMatthew Dillon if (flags & HAMMER2_THREAD_STOP) 9785f59596dSMatthew Dillon break; 9795f59596dSMatthew Dillon 9805f59596dSMatthew Dillon /* 9815f59596dSMatthew Dillon * Handle freeze request 9825f59596dSMatthew Dillon */ 9835f59596dSMatthew Dillon if (flags & HAMMER2_THREAD_FREEZE) { 9845f59596dSMatthew Dillon nflags = (flags & ~(HAMMER2_THREAD_FREEZE | 9855f59596dSMatthew Dillon HAMMER2_THREAD_CLIENTWAIT)) | 9865f59596dSMatthew Dillon HAMMER2_THREAD_FROZEN; 9875f59596dSMatthew Dillon if (!atomic_cmpset_int(&thr->flags, flags, nflags)) 9885f59596dSMatthew Dillon continue; 9895f59596dSMatthew Dillon if (flags & HAMMER2_THREAD_CLIENTWAIT) 9905f59596dSMatthew Dillon wakeup(&thr->flags); 9915f59596dSMatthew Dillon flags = nflags; 9925f59596dSMatthew Dillon /* fall through */ 9935f59596dSMatthew Dillon } 9945f59596dSMatthew Dillon 9955f59596dSMatthew Dillon if (flags & HAMMER2_THREAD_UNFREEZE) { 9965f59596dSMatthew Dillon nflags = flags & ~(HAMMER2_THREAD_UNFREEZE | 9975f59596dSMatthew Dillon HAMMER2_THREAD_FROZEN | 9985f59596dSMatthew Dillon HAMMER2_THREAD_CLIENTWAIT); 9995f59596dSMatthew Dillon if (!atomic_cmpset_int(&thr->flags, flags, nflags)) 10005f59596dSMatthew Dillon continue; 10015f59596dSMatthew Dillon if (flags & HAMMER2_THREAD_CLIENTWAIT) 10025f59596dSMatthew Dillon wakeup(&thr->flags); 10035f59596dSMatthew Dillon flags = nflags; 10045f59596dSMatthew Dillon /* fall through */ 10055f59596dSMatthew Dillon } 10065f59596dSMatthew Dillon 10075f59596dSMatthew Dillon /* 10085f59596dSMatthew Dillon * Force idle if frozen until unfrozen or stopped. 10095f59596dSMatthew Dillon */ 10105f59596dSMatthew Dillon if (flags & HAMMER2_THREAD_FROZEN) { 10115f59596dSMatthew Dillon nflags = flags | HAMMER2_THREAD_WAITING; 10125f59596dSMatthew Dillon tsleep_interlock(&thr->flags, 0); 10135f59596dSMatthew Dillon if (atomic_cmpset_int(&thr->flags, flags, nflags)) { 10145f59596dSMatthew Dillon tsleep(&thr->flags, PINTERLOCKED, "frozen", 0); 10155f59596dSMatthew Dillon atomic_clear_int(&thr->flags, 10165f59596dSMatthew Dillon HAMMER2_THREAD_WAITING); 10175f59596dSMatthew Dillon } 10185f59596dSMatthew Dillon continue; 10195f59596dSMatthew Dillon } 10205f59596dSMatthew Dillon 10215f59596dSMatthew Dillon /* 10225f59596dSMatthew Dillon * Reset state on REMASTER request 10235f59596dSMatthew Dillon */ 10245f59596dSMatthew Dillon if (flags & HAMMER2_THREAD_REMASTER) { 10255f59596dSMatthew Dillon nflags = flags & ~HAMMER2_THREAD_REMASTER; 10265f59596dSMatthew Dillon if (atomic_cmpset_int(&thr->flags, flags, nflags)) { 10275f59596dSMatthew Dillon /* reset state here */ 10285f59596dSMatthew Dillon } 10295f59596dSMatthew Dillon continue; 10305f59596dSMatthew Dillon } 10315f59596dSMatthew Dillon 10325f59596dSMatthew Dillon /* 10335f59596dSMatthew Dillon * Process requests. Each request can be multi-queued. 10345f59596dSMatthew Dillon * 10355f59596dSMatthew Dillon * If we get behind and the frontend VOP is no longer active, 10365f59596dSMatthew Dillon * we retire the request without processing it. The callback 10375f59596dSMatthew Dillon * may also abort processing if the frontend VOP becomes 10385f59596dSMatthew Dillon * inactive. 10395f59596dSMatthew Dillon */ 10405f59596dSMatthew Dillon if (flags & HAMMER2_THREAD_XOPQ) { 10415f59596dSMatthew Dillon nflags = flags & ~HAMMER2_THREAD_XOPQ; 10425f59596dSMatthew Dillon if (!atomic_cmpset_int(&thr->flags, flags, nflags)) 10435f59596dSMatthew Dillon continue; 10445f59596dSMatthew Dillon flags = nflags; 10455f59596dSMatthew Dillon /* fall through */ 10465f59596dSMatthew Dillon } 10475f59596dSMatthew Dillon while ((xop = hammer2_xop_next(thr)) != NULL) { 10485f59596dSMatthew Dillon if (hammer2_xop_active(xop)) { 10495f59596dSMatthew Dillon last_func = xop->func; 10505f59596dSMatthew Dillon xop->func((hammer2_xop_t *)xop, thr->clindex); 10515f59596dSMatthew Dillon hammer2_xop_dequeue(thr, xop); 10525f59596dSMatthew Dillon hammer2_xop_retire(xop, mask); 10535f59596dSMatthew Dillon } else { 10545f59596dSMatthew Dillon last_func = xop->func; 10555f59596dSMatthew Dillon hammer2_xop_feed(xop, NULL, thr->clindex, 10565f59596dSMatthew Dillon ECONNABORTED); 10575f59596dSMatthew Dillon hammer2_xop_dequeue(thr, xop); 10585f59596dSMatthew Dillon hammer2_xop_retire(xop, mask); 10595f59596dSMatthew Dillon } 10605f59596dSMatthew Dillon } 10615f59596dSMatthew Dillon 10625f59596dSMatthew Dillon /* 10635f59596dSMatthew Dillon * Wait for event, interlock using THREAD_WAITING and 10645f59596dSMatthew Dillon * THREAD_SIGNAL. 10655f59596dSMatthew Dillon * 10665f59596dSMatthew Dillon * For robustness poll on a 30-second interval, but nominally 10675f59596dSMatthew Dillon * expect to be woken up. 10685f59596dSMatthew Dillon */ 10695f59596dSMatthew Dillon nflags = flags | HAMMER2_THREAD_WAITING; 10705f59596dSMatthew Dillon 10715f59596dSMatthew Dillon tsleep_interlock(&thr->flags, 0); 10725f59596dSMatthew Dillon if (atomic_cmpset_int(&thr->flags, flags, nflags)) { 10735f59596dSMatthew Dillon tsleep(&thr->flags, PINTERLOCKED, "h2idle", hz*30); 10745f59596dSMatthew Dillon atomic_clear_int(&thr->flags, HAMMER2_THREAD_WAITING); 10755f59596dSMatthew Dillon } 10765f59596dSMatthew Dillon } 10775f59596dSMatthew Dillon 10785f59596dSMatthew Dillon #if 0 10795f59596dSMatthew Dillon /* 10805f59596dSMatthew Dillon * Cleanup / termination 10815f59596dSMatthew Dillon */ 10825f59596dSMatthew Dillon while ((xop = TAILQ_FIRST(&thr->xopq)) != NULL) { 10835f59596dSMatthew Dillon kprintf("hammer2_thread: aborting xop %p\n", xop->func); 10845f59596dSMatthew Dillon TAILQ_REMOVE(&thr->xopq, xop, 10855f59596dSMatthew Dillon collect[thr->clindex].entry); 10865f59596dSMatthew Dillon hammer2_xop_retire(xop, mask); 10875f59596dSMatthew Dillon } 10885f59596dSMatthew Dillon #endif 10895f59596dSMatthew Dillon thr->td = NULL; 10905f59596dSMatthew Dillon hammer2_thr_return(thr, HAMMER2_THREAD_STOPPED); 10915f59596dSMatthew Dillon /* thr structure can go invalid after this point */ 10925f59596dSMatthew Dillon wakeup(thr); 10935f59596dSMatthew Dillon } 1094