xref: /dflybsd-src/sys/vfs/hammer2/hammer2_admin.c (revision cf1b3fafd1f08bd3a18fc9af3c431c48f600eb32)
15f59596dSMatthew Dillon /*
25f59596dSMatthew Dillon  * Copyright (c) 2015 The DragonFly Project.  All rights reserved.
35f59596dSMatthew Dillon  *
45f59596dSMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
55f59596dSMatthew Dillon  * by Matthew Dillon <dillon@dragonflybsd.org>
65f59596dSMatthew Dillon  *
75f59596dSMatthew Dillon  * Redistribution and use in source and binary forms, with or without
85f59596dSMatthew Dillon  * modification, are permitted provided that the following conditions
95f59596dSMatthew Dillon  * are met:
105f59596dSMatthew Dillon  *
115f59596dSMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
125f59596dSMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
135f59596dSMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
145f59596dSMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
155f59596dSMatthew Dillon  *    the documentation and/or other materials provided with the
165f59596dSMatthew Dillon  *    distribution.
175f59596dSMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
185f59596dSMatthew Dillon  *    contributors may be used to endorse or promote products derived
195f59596dSMatthew Dillon  *    from this software without specific, prior written permission.
205f59596dSMatthew Dillon  *
215f59596dSMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
225f59596dSMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
235f59596dSMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
245f59596dSMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
255f59596dSMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
265f59596dSMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
275f59596dSMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
285f59596dSMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
295f59596dSMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
305f59596dSMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
315f59596dSMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
325f59596dSMatthew Dillon  * SUCH DAMAGE.
335f59596dSMatthew Dillon  */
345f59596dSMatthew Dillon /*
355f59596dSMatthew Dillon  * This module implements the hammer2 helper thread API, including
365f59596dSMatthew Dillon  * the frontend/backend XOP API.
375f59596dSMatthew Dillon  */
385f59596dSMatthew Dillon #include "hammer2.h"
395f59596dSMatthew Dillon 
405f59596dSMatthew Dillon /*
415f59596dSMatthew Dillon  * Signal that the thread has work.
425f59596dSMatthew Dillon  */
435f59596dSMatthew Dillon void
445f59596dSMatthew Dillon hammer2_thr_signal(hammer2_thread_t *thr, uint32_t flags)
455f59596dSMatthew Dillon {
465f59596dSMatthew Dillon 	uint32_t oflags;
475f59596dSMatthew Dillon 
485f59596dSMatthew Dillon 	for (;;) {
495f59596dSMatthew Dillon 		oflags = thr->flags;
505f59596dSMatthew Dillon 		cpu_ccfence();
515f59596dSMatthew Dillon 		if (oflags & HAMMER2_THREAD_WAITING) {
525f59596dSMatthew Dillon 			if (atomic_cmpset_int(&thr->flags, oflags,
535f59596dSMatthew Dillon 				  (oflags | flags) & ~HAMMER2_THREAD_WAITING)) {
545f59596dSMatthew Dillon 				wakeup(&thr->flags);
555f59596dSMatthew Dillon 				break;
565f59596dSMatthew Dillon 			}
575f59596dSMatthew Dillon 		} else {
585f59596dSMatthew Dillon 			if (atomic_cmpset_int(&thr->flags, oflags,
595f59596dSMatthew Dillon 					      oflags | flags)) {
605f59596dSMatthew Dillon 				break;
615f59596dSMatthew Dillon 			}
625f59596dSMatthew Dillon 		}
635f59596dSMatthew Dillon 	}
645f59596dSMatthew Dillon }
655f59596dSMatthew Dillon 
665f59596dSMatthew Dillon /*
675f59596dSMatthew Dillon  * Return status to waiting client(s)
685f59596dSMatthew Dillon  */
695f59596dSMatthew Dillon void
705f59596dSMatthew Dillon hammer2_thr_return(hammer2_thread_t *thr, uint32_t flags)
715f59596dSMatthew Dillon {
725f59596dSMatthew Dillon 	uint32_t oflags;
735f59596dSMatthew Dillon 	uint32_t nflags;
745f59596dSMatthew Dillon 
755f59596dSMatthew Dillon 	for (;;) {
765f59596dSMatthew Dillon 		oflags = thr->flags;
775f59596dSMatthew Dillon 		cpu_ccfence();
785f59596dSMatthew Dillon 		nflags = (oflags | flags) & ~HAMMER2_THREAD_CLIENTWAIT;
795f59596dSMatthew Dillon 
805f59596dSMatthew Dillon 		if (oflags & HAMMER2_THREAD_CLIENTWAIT) {
815f59596dSMatthew Dillon 			if (atomic_cmpset_int(&thr->flags, oflags, nflags)) {
825f59596dSMatthew Dillon 				wakeup(thr);
835f59596dSMatthew Dillon 				break;
845f59596dSMatthew Dillon 			}
855f59596dSMatthew Dillon 		} else {
865f59596dSMatthew Dillon 			if (atomic_cmpset_int(&thr->flags, oflags, nflags))
875f59596dSMatthew Dillon 				break;
885f59596dSMatthew Dillon 		}
895f59596dSMatthew Dillon 	}
905f59596dSMatthew Dillon }
915f59596dSMatthew Dillon 
925f59596dSMatthew Dillon /*
935f59596dSMatthew Dillon  * Wait until the bits in flags are set.
945f59596dSMatthew Dillon  */
955f59596dSMatthew Dillon void
965f59596dSMatthew Dillon hammer2_thr_wait(hammer2_thread_t *thr, uint32_t flags)
975f59596dSMatthew Dillon {
985f59596dSMatthew Dillon 	uint32_t oflags;
995f59596dSMatthew Dillon 	uint32_t nflags;
1005f59596dSMatthew Dillon 
1015f59596dSMatthew Dillon 	for (;;) {
1025f59596dSMatthew Dillon 		oflags = thr->flags;
1035f59596dSMatthew Dillon 		cpu_ccfence();
1045f59596dSMatthew Dillon 		if ((oflags & flags) == flags)
1055f59596dSMatthew Dillon 			break;
1065f59596dSMatthew Dillon 		nflags = oflags | HAMMER2_THREAD_CLIENTWAIT;
1075f59596dSMatthew Dillon 		tsleep_interlock(thr, 0);
1085f59596dSMatthew Dillon 		if (atomic_cmpset_int(&thr->flags, oflags, nflags)) {
1095f59596dSMatthew Dillon 			tsleep(thr, PINTERLOCKED, "h2twait", hz*60);
1105f59596dSMatthew Dillon 		}
1115f59596dSMatthew Dillon 	}
1125f59596dSMatthew Dillon }
1135f59596dSMatthew Dillon 
1145f59596dSMatthew Dillon /*
1155f59596dSMatthew Dillon  * Wait until the bits in flags are clear.
1165f59596dSMatthew Dillon  */
1175f59596dSMatthew Dillon void
1185f59596dSMatthew Dillon hammer2_thr_wait_neg(hammer2_thread_t *thr, uint32_t flags)
1195f59596dSMatthew Dillon {
1205f59596dSMatthew Dillon 	uint32_t oflags;
1215f59596dSMatthew Dillon 	uint32_t nflags;
1225f59596dSMatthew Dillon 
1235f59596dSMatthew Dillon 	for (;;) {
1245f59596dSMatthew Dillon 		oflags = thr->flags;
1255f59596dSMatthew Dillon 		cpu_ccfence();
1265f59596dSMatthew Dillon 		if ((oflags & flags) == 0)
1275f59596dSMatthew Dillon 			break;
1285f59596dSMatthew Dillon 		nflags = oflags | HAMMER2_THREAD_CLIENTWAIT;
1295f59596dSMatthew Dillon 		tsleep_interlock(thr, 0);
1305f59596dSMatthew Dillon 		if (atomic_cmpset_int(&thr->flags, oflags, nflags)) {
1315f59596dSMatthew Dillon 			tsleep(thr, PINTERLOCKED, "h2twait", hz*60);
1325f59596dSMatthew Dillon 		}
1335f59596dSMatthew Dillon 	}
1345f59596dSMatthew Dillon }
1355f59596dSMatthew Dillon 
1365f59596dSMatthew Dillon /*
1375f59596dSMatthew Dillon  * Initialize the supplied thread structure, starting the specified
1385f59596dSMatthew Dillon  * thread.
1395f59596dSMatthew Dillon  */
1405f59596dSMatthew Dillon void
1415f59596dSMatthew Dillon hammer2_thr_create(hammer2_thread_t *thr, hammer2_pfs_t *pmp,
1425f59596dSMatthew Dillon 		   const char *id, int clindex, int repidx,
1435f59596dSMatthew Dillon 		   void (*func)(void *arg))
1445f59596dSMatthew Dillon {
1455f59596dSMatthew Dillon 	thr->pmp = pmp;
1465f59596dSMatthew Dillon 	thr->clindex = clindex;
1475f59596dSMatthew Dillon 	thr->repidx = repidx;
1485f59596dSMatthew Dillon 	if (repidx >= 0) {
1495f59596dSMatthew Dillon 		thr->xopq = &pmp->xopq[clindex][repidx];
1505f59596dSMatthew Dillon 		lwkt_create(func, thr, &thr->td, NULL, 0, repidx % ncpus,
1515f59596dSMatthew Dillon 			    "%s-%s.%02d", id, pmp->pfs_names[clindex], repidx);
1525f59596dSMatthew Dillon 	} else {
1535f59596dSMatthew Dillon 		thr->xopq = &pmp->xopq[clindex][HAMMER2_XOPGROUPS-repidx];
1545f59596dSMatthew Dillon 		lwkt_create(func, thr, &thr->td, NULL, 0, -1,
1555f59596dSMatthew Dillon 			    "%s-%s", id, pmp->pfs_names[clindex]);
1565f59596dSMatthew Dillon 	}
1575f59596dSMatthew Dillon }
1585f59596dSMatthew Dillon 
1595f59596dSMatthew Dillon /*
1605f59596dSMatthew Dillon  * Terminate a thread.  This function will silently return if the thread
1615f59596dSMatthew Dillon  * was never initialized or has already been deleted.
1625f59596dSMatthew Dillon  *
1635f59596dSMatthew Dillon  * This is accomplished by setting the STOP flag and waiting for the td
1645f59596dSMatthew Dillon  * structure to become NULL.
1655f59596dSMatthew Dillon  */
1665f59596dSMatthew Dillon void
1675f59596dSMatthew Dillon hammer2_thr_delete(hammer2_thread_t *thr)
1685f59596dSMatthew Dillon {
1695f59596dSMatthew Dillon 	if (thr->td == NULL)
1705f59596dSMatthew Dillon 		return;
1715f59596dSMatthew Dillon 	hammer2_thr_signal(thr, HAMMER2_THREAD_STOP);
1725f59596dSMatthew Dillon 	hammer2_thr_wait(thr, HAMMER2_THREAD_STOPPED);
1735f59596dSMatthew Dillon 	thr->pmp = NULL;
1745f59596dSMatthew Dillon 	thr->xopq = NULL;
1755f59596dSMatthew Dillon }
1765f59596dSMatthew Dillon 
1775f59596dSMatthew Dillon /*
1785f59596dSMatthew Dillon  * Asynchronous remaster request.  Ask the synchronization thread to
1795f59596dSMatthew Dillon  * start over soon (as if it were frozen and unfrozen, but without waiting).
1805f59596dSMatthew Dillon  * The thread always recalculates mastership relationships when restarting.
1815f59596dSMatthew Dillon  */
1825f59596dSMatthew Dillon void
1835f59596dSMatthew Dillon hammer2_thr_remaster(hammer2_thread_t *thr)
1845f59596dSMatthew Dillon {
1855f59596dSMatthew Dillon 	if (thr->td == NULL)
1865f59596dSMatthew Dillon 		return;
1875f59596dSMatthew Dillon 	hammer2_thr_signal(thr, HAMMER2_THREAD_REMASTER);
1885f59596dSMatthew Dillon }
1895f59596dSMatthew Dillon 
1905f59596dSMatthew Dillon void
1915f59596dSMatthew Dillon hammer2_thr_freeze_async(hammer2_thread_t *thr)
1925f59596dSMatthew Dillon {
1935f59596dSMatthew Dillon 	hammer2_thr_signal(thr, HAMMER2_THREAD_FREEZE);
1945f59596dSMatthew Dillon }
1955f59596dSMatthew Dillon 
1965f59596dSMatthew Dillon void
1975f59596dSMatthew Dillon hammer2_thr_freeze(hammer2_thread_t *thr)
1985f59596dSMatthew Dillon {
1995f59596dSMatthew Dillon 	if (thr->td == NULL)
2005f59596dSMatthew Dillon 		return;
2015f59596dSMatthew Dillon 	hammer2_thr_signal(thr, HAMMER2_THREAD_FREEZE);
2025f59596dSMatthew Dillon 	hammer2_thr_wait(thr, HAMMER2_THREAD_FROZEN);
2035f59596dSMatthew Dillon }
2045f59596dSMatthew Dillon 
2055f59596dSMatthew Dillon void
2065f59596dSMatthew Dillon hammer2_thr_unfreeze(hammer2_thread_t *thr)
2075f59596dSMatthew Dillon {
2085f59596dSMatthew Dillon 	if (thr->td == NULL)
2095f59596dSMatthew Dillon 		return;
2105f59596dSMatthew Dillon 	hammer2_thr_signal(thr, HAMMER2_THREAD_UNFREEZE);
2115f59596dSMatthew Dillon 	hammer2_thr_wait_neg(thr, HAMMER2_THREAD_FROZEN);
2125f59596dSMatthew Dillon }
2135f59596dSMatthew Dillon 
2145f59596dSMatthew Dillon int
2155f59596dSMatthew Dillon hammer2_thr_break(hammer2_thread_t *thr)
2165f59596dSMatthew Dillon {
2175f59596dSMatthew Dillon 	if (thr->flags & (HAMMER2_THREAD_STOP |
2185f59596dSMatthew Dillon 			  HAMMER2_THREAD_REMASTER |
2195f59596dSMatthew Dillon 			  HAMMER2_THREAD_FREEZE)) {
2205f59596dSMatthew Dillon 		return 1;
2215f59596dSMatthew Dillon 	}
2225f59596dSMatthew Dillon 	return 0;
2235f59596dSMatthew Dillon }
2245f59596dSMatthew Dillon 
2255f59596dSMatthew Dillon /****************************************************************************
2265f59596dSMatthew Dillon  *			    HAMMER2 XOPS API	 			    *
2275f59596dSMatthew Dillon  ****************************************************************************/
2285f59596dSMatthew Dillon 
2295f59596dSMatthew Dillon void
2305f59596dSMatthew Dillon hammer2_xop_group_init(hammer2_pfs_t *pmp, hammer2_xop_group_t *xgrp)
2315f59596dSMatthew Dillon {
2325f59596dSMatthew Dillon 	/* no extra fields in structure at the moment */
2335f59596dSMatthew Dillon }
2345f59596dSMatthew Dillon 
2355f59596dSMatthew Dillon /*
2365f59596dSMatthew Dillon  * Allocate a XOP request.
2375f59596dSMatthew Dillon  *
2385f59596dSMatthew Dillon  * Once allocated a XOP request can be started, collected, and retired,
2395f59596dSMatthew Dillon  * and can be retired early if desired.
2405f59596dSMatthew Dillon  *
2415f59596dSMatthew Dillon  * NOTE: Fifo indices might not be zero but ri == wi on objcache_get().
2425f59596dSMatthew Dillon  */
2435f59596dSMatthew Dillon void *
2445f59596dSMatthew Dillon hammer2_xop_alloc(hammer2_inode_t *ip, int flags)
2455f59596dSMatthew Dillon {
2465f59596dSMatthew Dillon 	hammer2_xop_t *xop;
2475f59596dSMatthew Dillon 
2485f59596dSMatthew Dillon 	xop = objcache_get(cache_xops, M_WAITOK);
2495f59596dSMatthew Dillon 	KKASSERT(xop->head.cluster.array[0].chain == NULL);
2505f59596dSMatthew Dillon 
2515f59596dSMatthew Dillon 	xop->head.ip1 = ip;
2525f59596dSMatthew Dillon 	xop->head.func = NULL;
2535f59596dSMatthew Dillon 	xop->head.flags = flags;
2545f59596dSMatthew Dillon 	xop->head.state = 0;
2555f59596dSMatthew Dillon 	xop->head.error = 0;
2565f59596dSMatthew Dillon 	xop->head.collect_key = 0;
2575f59596dSMatthew Dillon 	if (flags & HAMMER2_XOP_MODIFYING)
2585f59596dSMatthew Dillon 		xop->head.mtid = hammer2_trans_sub(ip->pmp);
2595f59596dSMatthew Dillon 	else
2605f59596dSMatthew Dillon 		xop->head.mtid = 0;
2615f59596dSMatthew Dillon 
2625f59596dSMatthew Dillon 	xop->head.cluster.nchains = ip->cluster.nchains;
2635f59596dSMatthew Dillon 	xop->head.cluster.pmp = ip->pmp;
2645f59596dSMatthew Dillon 	xop->head.cluster.flags = HAMMER2_CLUSTER_LOCKED;
2655f59596dSMatthew Dillon 
2665f59596dSMatthew Dillon 	/*
2675f59596dSMatthew Dillon 	 * run_mask - Active thread (or frontend) associated with XOP
2685f59596dSMatthew Dillon 	 */
2695f59596dSMatthew Dillon 	xop->head.run_mask = HAMMER2_XOPMASK_VOP;
2705f59596dSMatthew Dillon 
2715f59596dSMatthew Dillon 	hammer2_inode_ref(ip);
2725f59596dSMatthew Dillon 
2735f59596dSMatthew Dillon 	return xop;
2745f59596dSMatthew Dillon }
2755f59596dSMatthew Dillon 
2765f59596dSMatthew Dillon void
2775f59596dSMatthew Dillon hammer2_xop_setname(hammer2_xop_head_t *xop, const char *name, size_t name_len)
2785f59596dSMatthew Dillon {
2795f59596dSMatthew Dillon 	xop->name1 = kmalloc(name_len + 1, M_HAMMER2, M_WAITOK | M_ZERO);
2805f59596dSMatthew Dillon 	xop->name1_len = name_len;
2815f59596dSMatthew Dillon 	bcopy(name, xop->name1, name_len);
2825f59596dSMatthew Dillon }
2835f59596dSMatthew Dillon 
2845f59596dSMatthew Dillon void
2855f59596dSMatthew Dillon hammer2_xop_setname2(hammer2_xop_head_t *xop, const char *name, size_t name_len)
2865f59596dSMatthew Dillon {
2875f59596dSMatthew Dillon 	xop->name2 = kmalloc(name_len + 1, M_HAMMER2, M_WAITOK | M_ZERO);
2885f59596dSMatthew Dillon 	xop->name2_len = name_len;
2895f59596dSMatthew Dillon 	bcopy(name, xop->name2, name_len);
2905f59596dSMatthew Dillon }
2915f59596dSMatthew Dillon 
292*cf1b3fafSMatthew Dillon size_t
293*cf1b3fafSMatthew Dillon hammer2_xop_setname_inum(hammer2_xop_head_t *xop, hammer2_key_t inum)
294*cf1b3fafSMatthew Dillon {
295*cf1b3fafSMatthew Dillon 	const size_t name_len = 18;
296*cf1b3fafSMatthew Dillon 
297*cf1b3fafSMatthew Dillon 	xop->name1 = kmalloc(name_len + 1, M_HAMMER2, M_WAITOK | M_ZERO);
298*cf1b3fafSMatthew Dillon 	xop->name1_len = name_len;
299*cf1b3fafSMatthew Dillon 	ksnprintf(xop->name1, name_len + 1, "0x%016jx", (intmax_t)inum);
300*cf1b3fafSMatthew Dillon 
301*cf1b3fafSMatthew Dillon 	return name_len;
302*cf1b3fafSMatthew Dillon }
303*cf1b3fafSMatthew Dillon 
3045f59596dSMatthew Dillon 
3055f59596dSMatthew Dillon void
3065f59596dSMatthew Dillon hammer2_xop_setip2(hammer2_xop_head_t *xop, hammer2_inode_t *ip2)
3075f59596dSMatthew Dillon {
3085f59596dSMatthew Dillon 	xop->ip2 = ip2;
3095f59596dSMatthew Dillon 	hammer2_inode_ref(ip2);
3105f59596dSMatthew Dillon }
3115f59596dSMatthew Dillon 
3125f59596dSMatthew Dillon void
3135f59596dSMatthew Dillon hammer2_xop_setip3(hammer2_xop_head_t *xop, hammer2_inode_t *ip3)
3145f59596dSMatthew Dillon {
3155f59596dSMatthew Dillon 	xop->ip3 = ip3;
3165f59596dSMatthew Dillon 	hammer2_inode_ref(ip3);
3175f59596dSMatthew Dillon }
3185f59596dSMatthew Dillon 
3195f59596dSMatthew Dillon void
3205f59596dSMatthew Dillon hammer2_xop_reinit(hammer2_xop_head_t *xop)
3215f59596dSMatthew Dillon {
3225f59596dSMatthew Dillon 	xop->state = 0;
3235f59596dSMatthew Dillon 	xop->error = 0;
3245f59596dSMatthew Dillon 	xop->collect_key = 0;
3255f59596dSMatthew Dillon 	xop->run_mask = HAMMER2_XOPMASK_VOP;
3265f59596dSMatthew Dillon }
3275f59596dSMatthew Dillon 
3285f59596dSMatthew Dillon /*
3295f59596dSMatthew Dillon  * A mounted PFS needs Xops threads to support frontend operations.
3305f59596dSMatthew Dillon  */
3315f59596dSMatthew Dillon void
3325f59596dSMatthew Dillon hammer2_xop_helper_create(hammer2_pfs_t *pmp)
3335f59596dSMatthew Dillon {
3345f59596dSMatthew Dillon 	int i;
3355f59596dSMatthew Dillon 	int j;
3365f59596dSMatthew Dillon 
3375f59596dSMatthew Dillon 	lockmgr(&pmp->lock, LK_EXCLUSIVE);
3385f59596dSMatthew Dillon 	pmp->has_xop_threads = 1;
3395f59596dSMatthew Dillon 
3405f59596dSMatthew Dillon 	for (i = 0; i < pmp->iroot->cluster.nchains; ++i) {
3415f59596dSMatthew Dillon 		for (j = 0; j < HAMMER2_XOPGROUPS; ++j) {
3425f59596dSMatthew Dillon 			if (pmp->xop_groups[j].thrs[i].td)
3435f59596dSMatthew Dillon 				continue;
3445f59596dSMatthew Dillon 			hammer2_thr_create(&pmp->xop_groups[j].thrs[i], pmp,
3455f59596dSMatthew Dillon 					   "h2xop", i, j,
3465f59596dSMatthew Dillon 					   hammer2_primary_xops_thread);
3475f59596dSMatthew Dillon 		}
3485f59596dSMatthew Dillon 	}
3495f59596dSMatthew Dillon 	lockmgr(&pmp->lock, LK_RELEASE);
3505f59596dSMatthew Dillon }
3515f59596dSMatthew Dillon 
3525f59596dSMatthew Dillon void
3535f59596dSMatthew Dillon hammer2_xop_helper_cleanup(hammer2_pfs_t *pmp)
3545f59596dSMatthew Dillon {
3555f59596dSMatthew Dillon 	int i;
3565f59596dSMatthew Dillon 	int j;
3575f59596dSMatthew Dillon 
3585f59596dSMatthew Dillon 	for (i = 0; i < pmp->pfs_nmasters; ++i) {
3595f59596dSMatthew Dillon 		for (j = 0; j < HAMMER2_XOPGROUPS; ++j) {
3605f59596dSMatthew Dillon 			if (pmp->xop_groups[j].thrs[i].td)
3615f59596dSMatthew Dillon 				hammer2_thr_delete(&pmp->xop_groups[j].thrs[i]);
3625f59596dSMatthew Dillon 		}
3635f59596dSMatthew Dillon 	}
3645f59596dSMatthew Dillon }
3655f59596dSMatthew Dillon 
3665f59596dSMatthew Dillon /*
3675f59596dSMatthew Dillon  * Start a XOP request, queueing it to all nodes in the cluster to
3685f59596dSMatthew Dillon  * execute the cluster op.
3695f59596dSMatthew Dillon  *
3705f59596dSMatthew Dillon  * XXX optimize single-target case.
3715f59596dSMatthew Dillon  */
3725f59596dSMatthew Dillon void
3735f59596dSMatthew Dillon hammer2_xop_start_except(hammer2_xop_head_t *xop, hammer2_xop_func_t func,
3745f59596dSMatthew Dillon 			 int notidx)
3755f59596dSMatthew Dillon {
3765f59596dSMatthew Dillon 	hammer2_inode_t *ip1;
3775f59596dSMatthew Dillon #if 0
3785f59596dSMatthew Dillon 	hammer2_xop_group_t *xgrp;
3795f59596dSMatthew Dillon 	hammer2_thread_t *thr;
3805f59596dSMatthew Dillon #endif
3815f59596dSMatthew Dillon 	hammer2_pfs_t *pmp;
3825f59596dSMatthew Dillon 	int i;
3835f59596dSMatthew Dillon 	int ng;
3845f59596dSMatthew Dillon 	int nchains;
3855f59596dSMatthew Dillon 
3865f59596dSMatthew Dillon 	ip1 = xop->ip1;
3875f59596dSMatthew Dillon 	pmp = ip1->pmp;
3885f59596dSMatthew Dillon 	if (pmp->has_xop_threads == 0)
3895f59596dSMatthew Dillon 		hammer2_xop_helper_create(pmp);
3905f59596dSMatthew Dillon 
3915f59596dSMatthew Dillon 	if (xop->flags & HAMMER2_XOP_ITERATOR) {
3925f59596dSMatthew Dillon 		ng = (int)(hammer2_icrc32(&xop->ip1, sizeof(xop->ip1)) ^
3935f59596dSMatthew Dillon 			   pmp->xop_iterator++);
3945f59596dSMatthew Dillon 	} else {
3955f59596dSMatthew Dillon 		ng = (int)(hammer2_icrc32(&xop->ip1, sizeof(xop->ip1)) ^
3965f59596dSMatthew Dillon 			   hammer2_icrc32(&func, sizeof(func)));
3975f59596dSMatthew Dillon 	}
3985f59596dSMatthew Dillon 	ng = ng & HAMMER2_XOPGROUPS_MASK;
3995f59596dSMatthew Dillon #if 0
4005f59596dSMatthew Dillon 	g = pmp->xop_iterator++;
4015f59596dSMatthew Dillon 	g = g & HAMMER2_XOPGROUPS_MASK;
4025f59596dSMatthew Dillon 	xgrp = &pmp->xop_groups[g];
4035f59596dSMatthew Dillon 	xop->xgrp = xgrp;
4045f59596dSMatthew Dillon #endif
4055f59596dSMatthew Dillon 	xop->func = func;
4065f59596dSMatthew Dillon 
4075f59596dSMatthew Dillon 	/*
4085f59596dSMatthew Dillon 	 * The XOP sequencer is based on ip1, ip2, and ip3.  Because ops can
4095f59596dSMatthew Dillon 	 * finish early and unlock the related inodes, some targets may get
4105f59596dSMatthew Dillon 	 * behind.  The sequencer ensures that ops on the same inode execute
4115f59596dSMatthew Dillon 	 * in the same order.
4125f59596dSMatthew Dillon 	 *
4135f59596dSMatthew Dillon 	 * The instant xop is queued another thread can pick it off.  In the
4145f59596dSMatthew Dillon 	 * case of asynchronous ops, another thread might even finish and
4155f59596dSMatthew Dillon 	 * deallocate it.
4165f59596dSMatthew Dillon 	 */
4175f59596dSMatthew Dillon 	hammer2_spin_ex(&pmp->xop_spin);
4185f59596dSMatthew Dillon 	nchains = ip1->cluster.nchains;
4195f59596dSMatthew Dillon 	for (i = 0; i < nchains; ++i) {
4205f59596dSMatthew Dillon 		/*
4215f59596dSMatthew Dillon 		 * XXX ip1->cluster.array* not stable here.  This temporary
4225f59596dSMatthew Dillon 		 *     hack fixes basic issues in target XOPs which need to
4235f59596dSMatthew Dillon 		 *     obtain a starting chain from the inode but does not
4245f59596dSMatthew Dillon 		 *     address possible races against inode updates which
4255f59596dSMatthew Dillon 		 *     might NULL-out a chain.
4265f59596dSMatthew Dillon 		 */
4275f59596dSMatthew Dillon 		if (i != notidx && ip1->cluster.array[i].chain) {
4285f59596dSMatthew Dillon 			atomic_set_int(&xop->run_mask, 1U << i);
4295f59596dSMatthew Dillon 			atomic_set_int(&xop->chk_mask, 1U << i);
4305f59596dSMatthew Dillon 			TAILQ_INSERT_TAIL(&pmp->xopq[i][ng], xop, collect[i].entry);
4315f59596dSMatthew Dillon 		}
4325f59596dSMatthew Dillon 	}
4335f59596dSMatthew Dillon 	hammer2_spin_unex(&pmp->xop_spin);
4345f59596dSMatthew Dillon 	/* xop can become invalid at this point */
4355f59596dSMatthew Dillon 
4365f59596dSMatthew Dillon 	/*
4375f59596dSMatthew Dillon 	 * Try to wakeup just one xop thread for each cluster node.
4385f59596dSMatthew Dillon 	 */
4395f59596dSMatthew Dillon 	for (i = 0; i < nchains; ++i) {
4405f59596dSMatthew Dillon 		if (i != notidx) {
4415f59596dSMatthew Dillon 			hammer2_thr_signal(&pmp->xop_groups[ng].thrs[i],
4425f59596dSMatthew Dillon 					   HAMMER2_THREAD_XOPQ);
4435f59596dSMatthew Dillon 		}
4445f59596dSMatthew Dillon 	}
4455f59596dSMatthew Dillon }
4465f59596dSMatthew Dillon 
4475f59596dSMatthew Dillon void
4485f59596dSMatthew Dillon hammer2_xop_start(hammer2_xop_head_t *xop, hammer2_xop_func_t func)
4495f59596dSMatthew Dillon {
4505f59596dSMatthew Dillon 	hammer2_xop_start_except(xop, func, -1);
4515f59596dSMatthew Dillon }
4525f59596dSMatthew Dillon 
4535f59596dSMatthew Dillon /*
4545f59596dSMatthew Dillon  * Retire a XOP.  Used by both the VOP frontend and by the XOP backend.
4555f59596dSMatthew Dillon  */
4565f59596dSMatthew Dillon void
4575f59596dSMatthew Dillon hammer2_xop_retire(hammer2_xop_head_t *xop, uint32_t mask)
4585f59596dSMatthew Dillon {
4595f59596dSMatthew Dillon 	hammer2_chain_t *chain;
4605f59596dSMatthew Dillon 	uint32_t nmask;
4615f59596dSMatthew Dillon 	int i;
4625f59596dSMatthew Dillon 
4635f59596dSMatthew Dillon 	/*
4645f59596dSMatthew Dillon 	 * Remove the frontend collector or remove a backend feeder.
4655f59596dSMatthew Dillon 	 * When removing the frontend we must wakeup any backend feeders
4665f59596dSMatthew Dillon 	 * who are waiting for FIFO space.
4675f59596dSMatthew Dillon 	 *
4685f59596dSMatthew Dillon 	 * XXX optimize wakeup.
4695f59596dSMatthew Dillon 	 */
4705f59596dSMatthew Dillon 	KKASSERT(xop->run_mask & mask);
4715f59596dSMatthew Dillon 	nmask = atomic_fetchadd_int(&xop->run_mask, -mask);
4725f59596dSMatthew Dillon 	if ((nmask & ~HAMMER2_XOPMASK_FIFOW) != mask) {
4735f59596dSMatthew Dillon 		if (mask == HAMMER2_XOPMASK_VOP) {
4745f59596dSMatthew Dillon 			if (nmask & HAMMER2_XOPMASK_FIFOW)
4755f59596dSMatthew Dillon 				wakeup(xop);
4765f59596dSMatthew Dillon 		}
4775f59596dSMatthew Dillon 		return;
4785f59596dSMatthew Dillon 	}
4795f59596dSMatthew Dillon 	/* else nobody else left, we can ignore FIFOW */
4805f59596dSMatthew Dillon 
4815f59596dSMatthew Dillon 	/*
4825f59596dSMatthew Dillon 	 * All collectors are gone, we can cleanup and dispose of the XOP.
4835f59596dSMatthew Dillon 	 * Note that this can wind up being a frontend OR a backend.
4845f59596dSMatthew Dillon 	 * Pending chains are locked shared and not owned by any thread.
4855f59596dSMatthew Dillon 	 */
4865f59596dSMatthew Dillon #if 0
4875f59596dSMatthew Dillon 	/*
4885f59596dSMatthew Dillon 	 * Cache the terminating cluster.
4895f59596dSMatthew Dillon 	 */
4905f59596dSMatthew Dillon 	hammer2_inode_t *ip;
4915f59596dSMatthew Dillon 	if ((ip = xop->ip1) != NULL) {
4925f59596dSMatthew Dillon 		hammer2_cluster_t *tmpclu;
4935f59596dSMatthew Dillon 
4945f59596dSMatthew Dillon 		tmpclu = hammer2_cluster_copy(&xop->cluster);
4955f59596dSMatthew Dillon 		hammer2_spin_ex(&ip->cluster_spin);
4965f59596dSMatthew Dillon 		tmpclu = atomic_swap_ptr((volatile void **)&ip->cluster_cache,
4975f59596dSMatthew Dillon 					 tmpclu);
4985f59596dSMatthew Dillon 		hammer2_spin_unex(&ip->cluster_spin);
4995f59596dSMatthew Dillon 		if (tmpclu)
5005f59596dSMatthew Dillon 			hammer2_cluster_drop(tmpclu);
5015f59596dSMatthew Dillon 	}
5025f59596dSMatthew Dillon #endif
5035f59596dSMatthew Dillon 
5045f59596dSMatthew Dillon 	/*
5055f59596dSMatthew Dillon 	 * Cleanup the collection cluster.
5065f59596dSMatthew Dillon 	 */
5075f59596dSMatthew Dillon 	for (i = 0; i < xop->cluster.nchains; ++i) {
5085f59596dSMatthew Dillon 		xop->cluster.array[i].flags = 0;
5095f59596dSMatthew Dillon 		chain = xop->cluster.array[i].chain;
5105f59596dSMatthew Dillon 		if (chain) {
5115f59596dSMatthew Dillon 			xop->cluster.array[i].chain = NULL;
5125f59596dSMatthew Dillon 			hammer2_chain_pull_shared_lock(chain);
5135f59596dSMatthew Dillon 			hammer2_chain_unlock(chain);
5145f59596dSMatthew Dillon 			hammer2_chain_drop(chain);
5155f59596dSMatthew Dillon 		}
5165f59596dSMatthew Dillon 	}
5175f59596dSMatthew Dillon 
5185f59596dSMatthew Dillon 	/*
5195f59596dSMatthew Dillon 	 * Cleanup the fifos, use check_counter to optimize the loop.
5205f59596dSMatthew Dillon 	 * Since we are the only entity left on this xop we don't have
5215f59596dSMatthew Dillon 	 * to worry about fifo flow control, and one lfence() will do the
5225f59596dSMatthew Dillon 	 * job.
5235f59596dSMatthew Dillon 	 */
5245f59596dSMatthew Dillon 	cpu_lfence();
5255f59596dSMatthew Dillon 	mask = xop->chk_mask;
5265f59596dSMatthew Dillon 	for (i = 0; mask && i < HAMMER2_MAXCLUSTER; ++i) {
5275f59596dSMatthew Dillon 		hammer2_xop_fifo_t *fifo = &xop->collect[i];
5285f59596dSMatthew Dillon 		while (fifo->ri != fifo->wi) {
5295f59596dSMatthew Dillon 			chain = fifo->array[fifo->ri & HAMMER2_XOPFIFO_MASK];
5305f59596dSMatthew Dillon 			if (chain) {
5315f59596dSMatthew Dillon 				hammer2_chain_pull_shared_lock(chain);
5325f59596dSMatthew Dillon 				hammer2_chain_unlock(chain);
5335f59596dSMatthew Dillon 				hammer2_chain_drop(chain);
5345f59596dSMatthew Dillon 			}
5355f59596dSMatthew Dillon 			++fifo->ri;
5365f59596dSMatthew Dillon 		}
5375f59596dSMatthew Dillon 		mask &= ~(1U << i);
5385f59596dSMatthew Dillon 	}
5395f59596dSMatthew Dillon 
5405f59596dSMatthew Dillon 	/*
5415f59596dSMatthew Dillon 	 * The inode is only held at this point, simply drop it.
5425f59596dSMatthew Dillon 	 */
5435f59596dSMatthew Dillon 	if (xop->ip1) {
5445f59596dSMatthew Dillon 		hammer2_inode_drop(xop->ip1);
5455f59596dSMatthew Dillon 		xop->ip1 = NULL;
5465f59596dSMatthew Dillon 	}
5475f59596dSMatthew Dillon 	if (xop->ip2) {
5485f59596dSMatthew Dillon 		hammer2_inode_drop(xop->ip2);
5495f59596dSMatthew Dillon 		xop->ip2 = NULL;
5505f59596dSMatthew Dillon 	}
5515f59596dSMatthew Dillon 	if (xop->ip3) {
5525f59596dSMatthew Dillon 		hammer2_inode_drop(xop->ip3);
5535f59596dSMatthew Dillon 		xop->ip3 = NULL;
5545f59596dSMatthew Dillon 	}
5555f59596dSMatthew Dillon 	if (xop->name1) {
5565f59596dSMatthew Dillon 		kfree(xop->name1, M_HAMMER2);
5575f59596dSMatthew Dillon 		xop->name1 = NULL;
5585f59596dSMatthew Dillon 		xop->name1_len = 0;
5595f59596dSMatthew Dillon 	}
5605f59596dSMatthew Dillon 	if (xop->name2) {
5615f59596dSMatthew Dillon 		kfree(xop->name2, M_HAMMER2);
5625f59596dSMatthew Dillon 		xop->name2 = NULL;
5635f59596dSMatthew Dillon 		xop->name2_len = 0;
5645f59596dSMatthew Dillon 	}
5655f59596dSMatthew Dillon 
5665f59596dSMatthew Dillon 	objcache_put(cache_xops, xop);
5675f59596dSMatthew Dillon }
5685f59596dSMatthew Dillon 
5695f59596dSMatthew Dillon /*
5705f59596dSMatthew Dillon  * (Backend) Returns non-zero if the frontend is still attached.
5715f59596dSMatthew Dillon  */
5725f59596dSMatthew Dillon int
5735f59596dSMatthew Dillon hammer2_xop_active(hammer2_xop_head_t *xop)
5745f59596dSMatthew Dillon {
5755f59596dSMatthew Dillon 	if (xop->run_mask & HAMMER2_XOPMASK_VOP)
5765f59596dSMatthew Dillon 		return 1;
5775f59596dSMatthew Dillon 	else
5785f59596dSMatthew Dillon 		return 0;
5795f59596dSMatthew Dillon }
5805f59596dSMatthew Dillon 
5815f59596dSMatthew Dillon /*
5825f59596dSMatthew Dillon  * (Backend) Feed chain data through the cluster validator and back to
5835f59596dSMatthew Dillon  * the frontend.  Chains are fed from multiple nodes concurrently
5845f59596dSMatthew Dillon  * and pipelined via per-node FIFOs in the XOP.
5855f59596dSMatthew Dillon  *
5865f59596dSMatthew Dillon  * The chain must be locked shared.  This function adds an additional
5875f59596dSMatthew Dillon  * shared-lock and ref to the chain for the frontend to collect.  Caller
5885f59596dSMatthew Dillon  * must still unlock/drop the chain.
5895f59596dSMatthew Dillon  *
5905f59596dSMatthew Dillon  * No xop lock is needed because we are only manipulating fields under
5915f59596dSMatthew Dillon  * our direct control.
5925f59596dSMatthew Dillon  *
5935f59596dSMatthew Dillon  * Returns 0 on success and a hammer error code if sync is permanently
5945f59596dSMatthew Dillon  * lost.  The caller retains a ref on the chain but by convention
5955f59596dSMatthew Dillon  * the lock is typically inherited by the xop (caller loses lock).
5965f59596dSMatthew Dillon  *
5975f59596dSMatthew Dillon  * Returns non-zero on error.  In this situation the caller retains a
5985f59596dSMatthew Dillon  * ref on the chain but loses the lock (we unlock here).
5995f59596dSMatthew Dillon  *
6005f59596dSMatthew Dillon  * WARNING!  The chain is moving between two different threads, it must
6015f59596dSMatthew Dillon  *	     be locked SHARED to retain its data mapping, not exclusive.
6025f59596dSMatthew Dillon  *	     When multiple operations are in progress at once, chains fed
6035f59596dSMatthew Dillon  *	     back to the frontend for collection can wind up being locked
6045f59596dSMatthew Dillon  *	     in different orders, only a shared lock can prevent a deadlock.
6055f59596dSMatthew Dillon  *
6065f59596dSMatthew Dillon  *	     Exclusive locks may only be used by a XOP backend node thread
6075f59596dSMatthew Dillon  *	     temporarily, with no direct or indirect dependencies (aka
6085f59596dSMatthew Dillon  *	     blocking/waiting) on other nodes.
6095f59596dSMatthew Dillon  */
6105f59596dSMatthew Dillon int
6115f59596dSMatthew Dillon hammer2_xop_feed(hammer2_xop_head_t *xop, hammer2_chain_t *chain,
6125f59596dSMatthew Dillon 		 int clindex, int error)
6135f59596dSMatthew Dillon {
6145f59596dSMatthew Dillon 	hammer2_xop_fifo_t *fifo;
6155f59596dSMatthew Dillon 	uint32_t mask;
6165f59596dSMatthew Dillon 
6175f59596dSMatthew Dillon 	/*
6185f59596dSMatthew Dillon 	 * Early termination (typicaly of xop_readir)
6195f59596dSMatthew Dillon 	 */
6205f59596dSMatthew Dillon 	if (hammer2_xop_active(xop) == 0) {
6215f59596dSMatthew Dillon 		error = EINTR;
6225f59596dSMatthew Dillon 		goto done;
6235f59596dSMatthew Dillon 	}
6245f59596dSMatthew Dillon 
6255f59596dSMatthew Dillon 	/*
6265f59596dSMatthew Dillon 	 * Multi-threaded entry into the XOP collector.  We own the
6275f59596dSMatthew Dillon 	 * fifo->wi for our clindex.
6285f59596dSMatthew Dillon 	 */
6295f59596dSMatthew Dillon 	fifo = &xop->collect[clindex];
6305f59596dSMatthew Dillon 
6315f59596dSMatthew Dillon 	if (fifo->ri == fifo->wi - HAMMER2_XOPFIFO)
6325f59596dSMatthew Dillon 		lwkt_yield();
6335f59596dSMatthew Dillon 	while (fifo->ri == fifo->wi - HAMMER2_XOPFIFO) {
6345f59596dSMatthew Dillon 		atomic_set_int(&fifo->flags, HAMMER2_XOP_FIFO_STALL);
6355f59596dSMatthew Dillon 		mask = xop->run_mask;
6365f59596dSMatthew Dillon 		if ((mask & HAMMER2_XOPMASK_VOP) == 0) {
6375f59596dSMatthew Dillon 			error = EINTR;
6385f59596dSMatthew Dillon 			goto done;
6395f59596dSMatthew Dillon 		}
6405f59596dSMatthew Dillon 		tsleep_interlock(xop, 0);
6415f59596dSMatthew Dillon 		if (atomic_cmpset_int(&xop->run_mask, mask,
6425f59596dSMatthew Dillon 				      mask | HAMMER2_XOPMASK_FIFOW)) {
6435f59596dSMatthew Dillon 			if (fifo->ri == fifo->wi - HAMMER2_XOPFIFO) {
6445f59596dSMatthew Dillon 				tsleep(xop, PINTERLOCKED, "h2feed", hz*60);
6455f59596dSMatthew Dillon 			}
6465f59596dSMatthew Dillon 		}
6475f59596dSMatthew Dillon 		/* retry */
6485f59596dSMatthew Dillon 	}
6495f59596dSMatthew Dillon 	atomic_clear_int(&fifo->flags, HAMMER2_XOP_FIFO_STALL);
6505f59596dSMatthew Dillon 	if (chain) {
6515f59596dSMatthew Dillon 		hammer2_chain_ref(chain);
6525f59596dSMatthew Dillon 		hammer2_chain_push_shared_lock(chain);
6535f59596dSMatthew Dillon 	}
6545f59596dSMatthew Dillon 	if (error == 0 && chain)
6555f59596dSMatthew Dillon 		error = chain->error;
6565f59596dSMatthew Dillon 	fifo->errors[fifo->wi & HAMMER2_XOPFIFO_MASK] = error;
6575f59596dSMatthew Dillon 	fifo->array[fifo->wi & HAMMER2_XOPFIFO_MASK] = chain;
6585f59596dSMatthew Dillon 	cpu_sfence();
6595f59596dSMatthew Dillon 	++fifo->wi;
6605f59596dSMatthew Dillon 	atomic_add_int(&xop->check_counter, 1);
6615f59596dSMatthew Dillon 	wakeup(&xop->check_counter);	/* XXX optimize */
6625f59596dSMatthew Dillon 	error = 0;
6635f59596dSMatthew Dillon 
6645f59596dSMatthew Dillon 	/*
6655f59596dSMatthew Dillon 	 * Cleanup.  If an error occurred we eat the lock.  If no error
6665f59596dSMatthew Dillon 	 * occurred the fifo inherits the lock and gains an additional ref.
6675f59596dSMatthew Dillon 	 *
6685f59596dSMatthew Dillon 	 * The caller's ref remains in both cases.
6695f59596dSMatthew Dillon 	 */
6705f59596dSMatthew Dillon done:
6715f59596dSMatthew Dillon 	return error;
6725f59596dSMatthew Dillon }
6735f59596dSMatthew Dillon 
6745f59596dSMatthew Dillon /*
6755f59596dSMatthew Dillon  * (Frontend) collect a response from a running cluster op.
6765f59596dSMatthew Dillon  *
6775f59596dSMatthew Dillon  * Responses are fed from all appropriate nodes concurrently
6785f59596dSMatthew Dillon  * and collected into a cohesive response >= collect_key.
6795f59596dSMatthew Dillon  *
6805f59596dSMatthew Dillon  * The collector will return the instant quorum or other requirements
6815f59596dSMatthew Dillon  * are met, even if some nodes get behind or become non-responsive.
6825f59596dSMatthew Dillon  *
6835f59596dSMatthew Dillon  * HAMMER2_XOP_COLLECT_NOWAIT	- Used to 'poll' a completed collection,
6845f59596dSMatthew Dillon  *				  usually called synchronously from the
6855f59596dSMatthew Dillon  *				  node XOPs for the strategy code to
6865f59596dSMatthew Dillon  *				  fake the frontend collection and complete
6875f59596dSMatthew Dillon  *				  the BIO as soon as possible.
6885f59596dSMatthew Dillon  *
6895f59596dSMatthew Dillon  * HAMMER2_XOP_SYNCHRONIZER	- Reqeuest synchronization with a particular
6905f59596dSMatthew Dillon  *				  cluster index, prevents looping when that
6915f59596dSMatthew Dillon  *				  index is out of sync so caller can act on
6925f59596dSMatthew Dillon  *				  the out of sync element.  ESRCH and EDEADLK
6935f59596dSMatthew Dillon  *				  can be returned if this flag is specified.
6945f59596dSMatthew Dillon  *
6955f59596dSMatthew Dillon  * Returns 0 on success plus a filled out xop->cluster structure.
6965f59596dSMatthew Dillon  * Return ENOENT on normal termination.
6975f59596dSMatthew Dillon  * Otherwise return an error.
6985f59596dSMatthew Dillon  */
6995f59596dSMatthew Dillon int
7005f59596dSMatthew Dillon hammer2_xop_collect(hammer2_xop_head_t *xop, int flags)
7015f59596dSMatthew Dillon {
7025f59596dSMatthew Dillon 	hammer2_xop_fifo_t *fifo;
7035f59596dSMatthew Dillon 	hammer2_chain_t *chain;
7045f59596dSMatthew Dillon 	hammer2_key_t lokey;
7055f59596dSMatthew Dillon 	int error;
7065f59596dSMatthew Dillon 	int keynull;
7075f59596dSMatthew Dillon 	int adv;		/* advance the element */
7085f59596dSMatthew Dillon 	int i;
7095f59596dSMatthew Dillon 	uint32_t check_counter;
7105f59596dSMatthew Dillon 
7115f59596dSMatthew Dillon loop:
7125f59596dSMatthew Dillon 	/*
7135f59596dSMatthew Dillon 	 * First loop tries to advance pieces of the cluster which
7145f59596dSMatthew Dillon 	 * are out of sync.
7155f59596dSMatthew Dillon 	 */
7165f59596dSMatthew Dillon 	lokey = HAMMER2_KEY_MAX;
7175f59596dSMatthew Dillon 	keynull = HAMMER2_CHECK_NULL;
7185f59596dSMatthew Dillon 	check_counter = xop->check_counter;
7195f59596dSMatthew Dillon 	cpu_lfence();
7205f59596dSMatthew Dillon 
7215f59596dSMatthew Dillon 	for (i = 0; i < xop->cluster.nchains; ++i) {
7225f59596dSMatthew Dillon 		chain = xop->cluster.array[i].chain;
7235f59596dSMatthew Dillon 		if (chain == NULL) {
7245f59596dSMatthew Dillon 			adv = 1;
7255f59596dSMatthew Dillon 		} else if (chain->bref.key < xop->collect_key) {
7265f59596dSMatthew Dillon 			adv = 1;
7275f59596dSMatthew Dillon 		} else {
7285f59596dSMatthew Dillon 			keynull &= ~HAMMER2_CHECK_NULL;
7295f59596dSMatthew Dillon 			if (lokey > chain->bref.key)
7305f59596dSMatthew Dillon 				lokey = chain->bref.key;
7315f59596dSMatthew Dillon 			adv = 0;
7325f59596dSMatthew Dillon 		}
7335f59596dSMatthew Dillon 		if (adv == 0)
7345f59596dSMatthew Dillon 			continue;
7355f59596dSMatthew Dillon 
7365f59596dSMatthew Dillon 		/*
7375f59596dSMatthew Dillon 		 * Advance element if possible, advanced element may be NULL.
7385f59596dSMatthew Dillon 		 */
7395f59596dSMatthew Dillon 		if (chain) {
7405f59596dSMatthew Dillon 			hammer2_chain_unlock(chain);
7415f59596dSMatthew Dillon 			hammer2_chain_drop(chain);
7425f59596dSMatthew Dillon 		}
7435f59596dSMatthew Dillon 		fifo = &xop->collect[i];
7445f59596dSMatthew Dillon 		if (fifo->ri != fifo->wi) {
7455f59596dSMatthew Dillon 			cpu_lfence();
7465f59596dSMatthew Dillon 			chain = fifo->array[fifo->ri & HAMMER2_XOPFIFO_MASK];
7475f59596dSMatthew Dillon 			error = fifo->errors[fifo->ri & HAMMER2_XOPFIFO_MASK];
7485f59596dSMatthew Dillon 			++fifo->ri;
7495f59596dSMatthew Dillon 			xop->cluster.array[i].chain = chain;
7505f59596dSMatthew Dillon 			xop->cluster.array[i].error = error;
7515f59596dSMatthew Dillon 			if (chain == NULL) {
7525f59596dSMatthew Dillon 				/* XXX */
7535f59596dSMatthew Dillon 				xop->cluster.array[i].flags |=
7545f59596dSMatthew Dillon 							HAMMER2_CITEM_NULL;
7555f59596dSMatthew Dillon 			}
7565f59596dSMatthew Dillon 			if (fifo->wi - fifo->ri <= HAMMER2_XOPFIFO / 2) {
7575f59596dSMatthew Dillon 				if (fifo->flags & HAMMER2_XOP_FIFO_STALL) {
7585f59596dSMatthew Dillon 					atomic_clear_int(&fifo->flags,
7595f59596dSMatthew Dillon 						    HAMMER2_XOP_FIFO_STALL);
7605f59596dSMatthew Dillon 					wakeup(xop);
7615f59596dSMatthew Dillon 					lwkt_yield();
7625f59596dSMatthew Dillon 				}
7635f59596dSMatthew Dillon 			}
7645f59596dSMatthew Dillon 			--i;		/* loop on same index */
7655f59596dSMatthew Dillon 		} else {
7665f59596dSMatthew Dillon 			/*
7675f59596dSMatthew Dillon 			 * Retain CITEM_NULL flag.  If set just repeat EOF.
7685f59596dSMatthew Dillon 			 * If not, the NULL,0 combination indicates an
7695f59596dSMatthew Dillon 			 * operation in-progress.
7705f59596dSMatthew Dillon 			 */
7715f59596dSMatthew Dillon 			xop->cluster.array[i].chain = NULL;
7725f59596dSMatthew Dillon 			/* retain any CITEM_NULL setting */
7735f59596dSMatthew Dillon 		}
7745f59596dSMatthew Dillon 	}
7755f59596dSMatthew Dillon 
7765f59596dSMatthew Dillon 	/*
7775f59596dSMatthew Dillon 	 * Determine whether the lowest collected key meets clustering
7785f59596dSMatthew Dillon 	 * requirements.  Returns:
7795f59596dSMatthew Dillon 	 *
7805f59596dSMatthew Dillon 	 * 0	 	 - key valid, cluster can be returned.
7815f59596dSMatthew Dillon 	 *
7825f59596dSMatthew Dillon 	 * ENOENT	 - normal end of scan, return ENOENT.
7835f59596dSMatthew Dillon 	 *
7845f59596dSMatthew Dillon 	 * ESRCH	 - sufficient elements collected, quorum agreement
7855f59596dSMatthew Dillon 	 *		   that lokey is not a valid element and should be
7865f59596dSMatthew Dillon 	 *		   skipped.
7875f59596dSMatthew Dillon 	 *
7885f59596dSMatthew Dillon 	 * EDEADLK	 - sufficient elements collected, no quorum agreement
7895f59596dSMatthew Dillon 	 *		   (and no agreement possible).  In this situation a
7905f59596dSMatthew Dillon 	 *		   repair is needed, for now we loop.
7915f59596dSMatthew Dillon 	 *
7925f59596dSMatthew Dillon 	 * EINPROGRESS	 - insufficient elements collected to resolve, wait
7935f59596dSMatthew Dillon 	 *		   for event and loop.
7945f59596dSMatthew Dillon 	 */
7955f59596dSMatthew Dillon 	if ((flags & HAMMER2_XOP_COLLECT_WAITALL) &&
7965f59596dSMatthew Dillon 	    xop->run_mask != HAMMER2_XOPMASK_VOP) {
7975f59596dSMatthew Dillon 		error = EINPROGRESS;
7985f59596dSMatthew Dillon 	} else {
7995f59596dSMatthew Dillon 		error = hammer2_cluster_check(&xop->cluster, lokey, keynull);
8005f59596dSMatthew Dillon 	}
8015f59596dSMatthew Dillon 	if (error == EINPROGRESS) {
8025f59596dSMatthew Dillon 		if (xop->check_counter == check_counter) {
8035f59596dSMatthew Dillon 			if (flags & HAMMER2_XOP_COLLECT_NOWAIT)
8045f59596dSMatthew Dillon 				goto done;
8055f59596dSMatthew Dillon 			tsleep_interlock(&xop->check_counter, 0);
8065f59596dSMatthew Dillon 			cpu_lfence();
8075f59596dSMatthew Dillon 			if (xop->check_counter == check_counter) {
8085f59596dSMatthew Dillon 				tsleep(&xop->check_counter, PINTERLOCKED,
8095f59596dSMatthew Dillon 					"h2coll", hz*60);
8105f59596dSMatthew Dillon 			}
8115f59596dSMatthew Dillon 		}
8125f59596dSMatthew Dillon 		goto loop;
8135f59596dSMatthew Dillon 	}
8145f59596dSMatthew Dillon 	if (error == ESRCH) {
8155f59596dSMatthew Dillon 		if (lokey != HAMMER2_KEY_MAX) {
8165f59596dSMatthew Dillon 			xop->collect_key = lokey + 1;
8175f59596dSMatthew Dillon 			goto loop;
8185f59596dSMatthew Dillon 		}
8195f59596dSMatthew Dillon 		error = ENOENT;
8205f59596dSMatthew Dillon 	}
8215f59596dSMatthew Dillon 	if (error == EDEADLK) {
8225f59596dSMatthew Dillon 		kprintf("hammer2: no quorum possible lokey %016jx\n",
8235f59596dSMatthew Dillon 			lokey);
8245f59596dSMatthew Dillon 		if (lokey != HAMMER2_KEY_MAX) {
8255f59596dSMatthew Dillon 			xop->collect_key = lokey + 1;
8265f59596dSMatthew Dillon 			goto loop;
8275f59596dSMatthew Dillon 		}
8285f59596dSMatthew Dillon 		error = ENOENT;
8295f59596dSMatthew Dillon 	}
8305f59596dSMatthew Dillon 	if (lokey == HAMMER2_KEY_MAX)
8315f59596dSMatthew Dillon 		xop->collect_key = lokey;
8325f59596dSMatthew Dillon 	else
8335f59596dSMatthew Dillon 		xop->collect_key = lokey + 1;
8345f59596dSMatthew Dillon done:
8355f59596dSMatthew Dillon 	return error;
8365f59596dSMatthew Dillon }
8375f59596dSMatthew Dillon 
8385f59596dSMatthew Dillon /*
8395f59596dSMatthew Dillon  * N x M processing threads are available to handle XOPs, N per cluster
8405f59596dSMatthew Dillon  * index x M cluster nodes.  All the threads for any given cluster index
8415f59596dSMatthew Dillon  * share and pull from the same xopq.
8425f59596dSMatthew Dillon  *
8435f59596dSMatthew Dillon  * Locate and return the next runnable xop, or NULL if no xops are
8445f59596dSMatthew Dillon  * present or none of the xops are currently runnable (for various reasons).
8455f59596dSMatthew Dillon  * The xop is left on the queue and serves to block other dependent xops
8465f59596dSMatthew Dillon  * from being run.
8475f59596dSMatthew Dillon  *
8485f59596dSMatthew Dillon  * Dependent xops will not be returned.
8495f59596dSMatthew Dillon  *
8505f59596dSMatthew Dillon  * Sets HAMMER2_XOP_FIFO_RUN on the returned xop or returns NULL.
8515f59596dSMatthew Dillon  *
8525f59596dSMatthew Dillon  * NOTE! Xops run concurrently for each cluster index.
8535f59596dSMatthew Dillon  */
8545f59596dSMatthew Dillon #define XOP_HASH_SIZE	16
8555f59596dSMatthew Dillon #define XOP_HASH_MASK	(XOP_HASH_SIZE - 1)
8565f59596dSMatthew Dillon 
8575f59596dSMatthew Dillon static __inline
8585f59596dSMatthew Dillon int
8595f59596dSMatthew Dillon xop_testhash(hammer2_thread_t *thr, hammer2_inode_t *ip, uint32_t *hash)
8605f59596dSMatthew Dillon {
8615f59596dSMatthew Dillon 	uint32_t mask;
8625f59596dSMatthew Dillon 	int hv;
8635f59596dSMatthew Dillon 
8645f59596dSMatthew Dillon 	hv = (int)((uintptr_t)ip + (uintptr_t)thr) / sizeof(hammer2_inode_t);
8655f59596dSMatthew Dillon 	mask = 1U << (hv & 31);
8665f59596dSMatthew Dillon 	hv >>= 5;
8675f59596dSMatthew Dillon 
8685f59596dSMatthew Dillon 	return ((int)(hash[hv & XOP_HASH_MASK] & mask));
8695f59596dSMatthew Dillon }
8705f59596dSMatthew Dillon 
8715f59596dSMatthew Dillon static __inline
8725f59596dSMatthew Dillon void
8735f59596dSMatthew Dillon xop_sethash(hammer2_thread_t *thr, hammer2_inode_t *ip, uint32_t *hash)
8745f59596dSMatthew Dillon {
8755f59596dSMatthew Dillon 	uint32_t mask;
8765f59596dSMatthew Dillon 	int hv;
8775f59596dSMatthew Dillon 
8785f59596dSMatthew Dillon 	hv = (int)((uintptr_t)ip + (uintptr_t)thr) / sizeof(hammer2_inode_t);
8795f59596dSMatthew Dillon 	mask = 1U << (hv & 31);
8805f59596dSMatthew Dillon 	hv >>= 5;
8815f59596dSMatthew Dillon 
8825f59596dSMatthew Dillon 	hash[hv & XOP_HASH_MASK] |= mask;
8835f59596dSMatthew Dillon }
8845f59596dSMatthew Dillon 
8855f59596dSMatthew Dillon static
8865f59596dSMatthew Dillon hammer2_xop_head_t *
8875f59596dSMatthew Dillon hammer2_xop_next(hammer2_thread_t *thr)
8885f59596dSMatthew Dillon {
8895f59596dSMatthew Dillon 	hammer2_pfs_t *pmp = thr->pmp;
8905f59596dSMatthew Dillon 	int clindex = thr->clindex;
8915f59596dSMatthew Dillon 	uint32_t hash[XOP_HASH_SIZE] = { 0 };
8925f59596dSMatthew Dillon 	hammer2_xop_head_t *xop;
8935f59596dSMatthew Dillon 
8945f59596dSMatthew Dillon 	hammer2_spin_ex(&pmp->xop_spin);
8955f59596dSMatthew Dillon 	TAILQ_FOREACH(xop, thr->xopq, collect[clindex].entry) {
8965f59596dSMatthew Dillon 		/*
8975f59596dSMatthew Dillon 		 * Check dependency
8985f59596dSMatthew Dillon 		 */
8995f59596dSMatthew Dillon 		if (xop_testhash(thr, xop->ip1, hash) ||
9005f59596dSMatthew Dillon 		    (xop->ip2 && xop_testhash(thr, xop->ip2, hash)) ||
9015f59596dSMatthew Dillon 		    (xop->ip3 && xop_testhash(thr, xop->ip3, hash))) {
9025f59596dSMatthew Dillon 			continue;
9035f59596dSMatthew Dillon 		}
9045f59596dSMatthew Dillon 		xop_sethash(thr, xop->ip1, hash);
9055f59596dSMatthew Dillon 		if (xop->ip2)
9065f59596dSMatthew Dillon 			xop_sethash(thr, xop->ip2, hash);
9075f59596dSMatthew Dillon 		if (xop->ip3)
9085f59596dSMatthew Dillon 			xop_sethash(thr, xop->ip3, hash);
9095f59596dSMatthew Dillon 
9105f59596dSMatthew Dillon 		/*
9115f59596dSMatthew Dillon 		 * Check already running
9125f59596dSMatthew Dillon 		 */
9135f59596dSMatthew Dillon 		if (xop->collect[clindex].flags & HAMMER2_XOP_FIFO_RUN)
9145f59596dSMatthew Dillon 			continue;
9155f59596dSMatthew Dillon 
9165f59596dSMatthew Dillon 		/*
9175f59596dSMatthew Dillon 		 * Found a good one, return it.
9185f59596dSMatthew Dillon 		 */
9195f59596dSMatthew Dillon 		atomic_set_int(&xop->collect[clindex].flags,
9205f59596dSMatthew Dillon 			       HAMMER2_XOP_FIFO_RUN);
9215f59596dSMatthew Dillon 		break;
9225f59596dSMatthew Dillon 	}
9235f59596dSMatthew Dillon 	hammer2_spin_unex(&pmp->xop_spin);
9245f59596dSMatthew Dillon 
9255f59596dSMatthew Dillon 	return xop;
9265f59596dSMatthew Dillon }
9275f59596dSMatthew Dillon 
9285f59596dSMatthew Dillon /*
9295f59596dSMatthew Dillon  * Remove the completed XOP from the queue, clear HAMMER2_XOP_FIFO_RUN.
9305f59596dSMatthew Dillon  *
9315f59596dSMatthew Dillon  * NOTE! Xops run concurrently for each cluster index.
9325f59596dSMatthew Dillon  */
9335f59596dSMatthew Dillon static
9345f59596dSMatthew Dillon void
9355f59596dSMatthew Dillon hammer2_xop_dequeue(hammer2_thread_t *thr, hammer2_xop_head_t *xop)
9365f59596dSMatthew Dillon {
9375f59596dSMatthew Dillon 	hammer2_pfs_t *pmp = thr->pmp;
9385f59596dSMatthew Dillon 	int clindex = thr->clindex;
9395f59596dSMatthew Dillon 
9405f59596dSMatthew Dillon 	hammer2_spin_ex(&pmp->xop_spin);
9415f59596dSMatthew Dillon 	TAILQ_REMOVE(thr->xopq, xop, collect[clindex].entry);
9425f59596dSMatthew Dillon 	atomic_clear_int(&xop->collect[clindex].flags,
9435f59596dSMatthew Dillon 			 HAMMER2_XOP_FIFO_RUN);
9445f59596dSMatthew Dillon 	hammer2_spin_unex(&pmp->xop_spin);
9455f59596dSMatthew Dillon }
9465f59596dSMatthew Dillon 
9475f59596dSMatthew Dillon /*
9485f59596dSMatthew Dillon  * Primary management thread for xops support.  Each node has several such
9495f59596dSMatthew Dillon  * threads which replicate front-end operations on cluster nodes.
9505f59596dSMatthew Dillon  *
9515f59596dSMatthew Dillon  * XOPS thread node operations, allowing the function to focus on a single
9525f59596dSMatthew Dillon  * node in the cluster after validating the operation with the cluster.
9535f59596dSMatthew Dillon  * This is primarily what prevents dead or stalled nodes from stalling
9545f59596dSMatthew Dillon  * the front-end.
9555f59596dSMatthew Dillon  */
9565f59596dSMatthew Dillon void
9575f59596dSMatthew Dillon hammer2_primary_xops_thread(void *arg)
9585f59596dSMatthew Dillon {
9595f59596dSMatthew Dillon 	hammer2_thread_t *thr = arg;
9605f59596dSMatthew Dillon 	hammer2_pfs_t *pmp;
9615f59596dSMatthew Dillon 	hammer2_xop_head_t *xop;
9625f59596dSMatthew Dillon 	uint32_t mask;
9635f59596dSMatthew Dillon 	uint32_t flags;
9645f59596dSMatthew Dillon 	uint32_t nflags;
9655f59596dSMatthew Dillon 	hammer2_xop_func_t last_func = NULL;
9665f59596dSMatthew Dillon 
9675f59596dSMatthew Dillon 	pmp = thr->pmp;
9685f59596dSMatthew Dillon 	/*xgrp = &pmp->xop_groups[thr->repidx]; not needed */
9695f59596dSMatthew Dillon 	mask = 1U << thr->clindex;
9705f59596dSMatthew Dillon 
9715f59596dSMatthew Dillon 	for (;;) {
9725f59596dSMatthew Dillon 		flags = thr->flags;
9735f59596dSMatthew Dillon 
9745f59596dSMatthew Dillon 		/*
9755f59596dSMatthew Dillon 		 * Handle stop request
9765f59596dSMatthew Dillon 		 */
9775f59596dSMatthew Dillon 		if (flags & HAMMER2_THREAD_STOP)
9785f59596dSMatthew Dillon 			break;
9795f59596dSMatthew Dillon 
9805f59596dSMatthew Dillon 		/*
9815f59596dSMatthew Dillon 		 * Handle freeze request
9825f59596dSMatthew Dillon 		 */
9835f59596dSMatthew Dillon 		if (flags & HAMMER2_THREAD_FREEZE) {
9845f59596dSMatthew Dillon 			nflags = (flags & ~(HAMMER2_THREAD_FREEZE |
9855f59596dSMatthew Dillon 					    HAMMER2_THREAD_CLIENTWAIT)) |
9865f59596dSMatthew Dillon 				 HAMMER2_THREAD_FROZEN;
9875f59596dSMatthew Dillon 			if (!atomic_cmpset_int(&thr->flags, flags, nflags))
9885f59596dSMatthew Dillon 				continue;
9895f59596dSMatthew Dillon 			if (flags & HAMMER2_THREAD_CLIENTWAIT)
9905f59596dSMatthew Dillon 				wakeup(&thr->flags);
9915f59596dSMatthew Dillon 			flags = nflags;
9925f59596dSMatthew Dillon 			/* fall through */
9935f59596dSMatthew Dillon 		}
9945f59596dSMatthew Dillon 
9955f59596dSMatthew Dillon 		if (flags & HAMMER2_THREAD_UNFREEZE) {
9965f59596dSMatthew Dillon 			nflags = flags & ~(HAMMER2_THREAD_UNFREEZE |
9975f59596dSMatthew Dillon 					   HAMMER2_THREAD_FROZEN |
9985f59596dSMatthew Dillon 					   HAMMER2_THREAD_CLIENTWAIT);
9995f59596dSMatthew Dillon 			if (!atomic_cmpset_int(&thr->flags, flags, nflags))
10005f59596dSMatthew Dillon 				continue;
10015f59596dSMatthew Dillon 			if (flags & HAMMER2_THREAD_CLIENTWAIT)
10025f59596dSMatthew Dillon 				wakeup(&thr->flags);
10035f59596dSMatthew Dillon 			flags = nflags;
10045f59596dSMatthew Dillon 			/* fall through */
10055f59596dSMatthew Dillon 		}
10065f59596dSMatthew Dillon 
10075f59596dSMatthew Dillon 		/*
10085f59596dSMatthew Dillon 		 * Force idle if frozen until unfrozen or stopped.
10095f59596dSMatthew Dillon 		 */
10105f59596dSMatthew Dillon 		if (flags & HAMMER2_THREAD_FROZEN) {
10115f59596dSMatthew Dillon 			nflags = flags | HAMMER2_THREAD_WAITING;
10125f59596dSMatthew Dillon 			tsleep_interlock(&thr->flags, 0);
10135f59596dSMatthew Dillon 			if (atomic_cmpset_int(&thr->flags, flags, nflags)) {
10145f59596dSMatthew Dillon 				tsleep(&thr->flags, PINTERLOCKED, "frozen", 0);
10155f59596dSMatthew Dillon 				atomic_clear_int(&thr->flags,
10165f59596dSMatthew Dillon 						 HAMMER2_THREAD_WAITING);
10175f59596dSMatthew Dillon 			}
10185f59596dSMatthew Dillon 			continue;
10195f59596dSMatthew Dillon 		}
10205f59596dSMatthew Dillon 
10215f59596dSMatthew Dillon 		/*
10225f59596dSMatthew Dillon 		 * Reset state on REMASTER request
10235f59596dSMatthew Dillon 		 */
10245f59596dSMatthew Dillon 		if (flags & HAMMER2_THREAD_REMASTER) {
10255f59596dSMatthew Dillon 			nflags = flags & ~HAMMER2_THREAD_REMASTER;
10265f59596dSMatthew Dillon 			if (atomic_cmpset_int(&thr->flags, flags, nflags)) {
10275f59596dSMatthew Dillon 				/* reset state here */
10285f59596dSMatthew Dillon 			}
10295f59596dSMatthew Dillon 			continue;
10305f59596dSMatthew Dillon 		}
10315f59596dSMatthew Dillon 
10325f59596dSMatthew Dillon 		/*
10335f59596dSMatthew Dillon 		 * Process requests.  Each request can be multi-queued.
10345f59596dSMatthew Dillon 		 *
10355f59596dSMatthew Dillon 		 * If we get behind and the frontend VOP is no longer active,
10365f59596dSMatthew Dillon 		 * we retire the request without processing it.  The callback
10375f59596dSMatthew Dillon 		 * may also abort processing if the frontend VOP becomes
10385f59596dSMatthew Dillon 		 * inactive.
10395f59596dSMatthew Dillon 		 */
10405f59596dSMatthew Dillon 		if (flags & HAMMER2_THREAD_XOPQ) {
10415f59596dSMatthew Dillon 			nflags = flags & ~HAMMER2_THREAD_XOPQ;
10425f59596dSMatthew Dillon 			if (!atomic_cmpset_int(&thr->flags, flags, nflags))
10435f59596dSMatthew Dillon 				continue;
10445f59596dSMatthew Dillon 			flags = nflags;
10455f59596dSMatthew Dillon 			/* fall through */
10465f59596dSMatthew Dillon 		}
10475f59596dSMatthew Dillon 		while ((xop = hammer2_xop_next(thr)) != NULL) {
10485f59596dSMatthew Dillon 			if (hammer2_xop_active(xop)) {
10495f59596dSMatthew Dillon 				last_func = xop->func;
10505f59596dSMatthew Dillon 				xop->func((hammer2_xop_t *)xop, thr->clindex);
10515f59596dSMatthew Dillon 				hammer2_xop_dequeue(thr, xop);
10525f59596dSMatthew Dillon 				hammer2_xop_retire(xop, mask);
10535f59596dSMatthew Dillon 			} else {
10545f59596dSMatthew Dillon 				last_func = xop->func;
10555f59596dSMatthew Dillon 				hammer2_xop_feed(xop, NULL, thr->clindex,
10565f59596dSMatthew Dillon 						 ECONNABORTED);
10575f59596dSMatthew Dillon 				hammer2_xop_dequeue(thr, xop);
10585f59596dSMatthew Dillon 				hammer2_xop_retire(xop, mask);
10595f59596dSMatthew Dillon 			}
10605f59596dSMatthew Dillon 		}
10615f59596dSMatthew Dillon 
10625f59596dSMatthew Dillon 		/*
10635f59596dSMatthew Dillon 		 * Wait for event, interlock using THREAD_WAITING and
10645f59596dSMatthew Dillon 		 * THREAD_SIGNAL.
10655f59596dSMatthew Dillon 		 *
10665f59596dSMatthew Dillon 		 * For robustness poll on a 30-second interval, but nominally
10675f59596dSMatthew Dillon 		 * expect to be woken up.
10685f59596dSMatthew Dillon 		 */
10695f59596dSMatthew Dillon 		nflags = flags | HAMMER2_THREAD_WAITING;
10705f59596dSMatthew Dillon 
10715f59596dSMatthew Dillon 		tsleep_interlock(&thr->flags, 0);
10725f59596dSMatthew Dillon 		if (atomic_cmpset_int(&thr->flags, flags, nflags)) {
10735f59596dSMatthew Dillon 			tsleep(&thr->flags, PINTERLOCKED, "h2idle", hz*30);
10745f59596dSMatthew Dillon 			atomic_clear_int(&thr->flags, HAMMER2_THREAD_WAITING);
10755f59596dSMatthew Dillon 		}
10765f59596dSMatthew Dillon 	}
10775f59596dSMatthew Dillon 
10785f59596dSMatthew Dillon #if 0
10795f59596dSMatthew Dillon 	/*
10805f59596dSMatthew Dillon 	 * Cleanup / termination
10815f59596dSMatthew Dillon 	 */
10825f59596dSMatthew Dillon 	while ((xop = TAILQ_FIRST(&thr->xopq)) != NULL) {
10835f59596dSMatthew Dillon 		kprintf("hammer2_thread: aborting xop %p\n", xop->func);
10845f59596dSMatthew Dillon 		TAILQ_REMOVE(&thr->xopq, xop,
10855f59596dSMatthew Dillon 			     collect[thr->clindex].entry);
10865f59596dSMatthew Dillon 		hammer2_xop_retire(xop, mask);
10875f59596dSMatthew Dillon 	}
10885f59596dSMatthew Dillon #endif
10895f59596dSMatthew Dillon 	thr->td = NULL;
10905f59596dSMatthew Dillon 	hammer2_thr_return(thr, HAMMER2_THREAD_STOPPED);
10915f59596dSMatthew Dillon 	/* thr structure can go invalid after this point */
10925f59596dSMatthew Dillon 	wakeup(thr);
10935f59596dSMatthew Dillon }
1094