xref: /dflybsd-src/sys/vfs/hammer2/hammer2_cluster.c (revision b9777b339da12cff5379fd6e5feb596f55ac7b37)
1278ab2b2SMatthew Dillon /*
268b321c1SMatthew Dillon  * Copyright (c) 2013-2018 The DragonFly Project.  All rights reserved.
3278ab2b2SMatthew Dillon  *
4278ab2b2SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
5278ab2b2SMatthew Dillon  * by Matthew Dillon <dillon@dragonflybsd.org>
6278ab2b2SMatthew Dillon  *
7278ab2b2SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
8278ab2b2SMatthew Dillon  * modification, are permitted provided that the following conditions
9278ab2b2SMatthew Dillon  * are met:
10278ab2b2SMatthew Dillon  *
11278ab2b2SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
12278ab2b2SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
13278ab2b2SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
14278ab2b2SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
15278ab2b2SMatthew Dillon  *    the documentation and/or other materials provided with the
16278ab2b2SMatthew Dillon  *    distribution.
17278ab2b2SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
18278ab2b2SMatthew Dillon  *    contributors may be used to endorse or promote products derived
19278ab2b2SMatthew Dillon  *    from this software without specific, prior written permission.
20278ab2b2SMatthew Dillon  *
21278ab2b2SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22278ab2b2SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23278ab2b2SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24278ab2b2SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25278ab2b2SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26278ab2b2SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27278ab2b2SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28278ab2b2SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29278ab2b2SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30278ab2b2SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31278ab2b2SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32278ab2b2SMatthew Dillon  * SUCH DAMAGE.
33278ab2b2SMatthew Dillon  */
34278ab2b2SMatthew Dillon /*
35278ab2b2SMatthew Dillon  * The cluster module collects multiple chains representing the same
36fe73aa5dSMatthew Dillon  * information from different nodes into a single entity.  It allows direct
37fe73aa5dSMatthew Dillon  * access to media data as long as it is not blockref array data (which
38fe73aa5dSMatthew Dillon  * will obviously have to be different at each node).
39278ab2b2SMatthew Dillon  *
40278ab2b2SMatthew Dillon  * This module also handles I/O dispatch, status rollup, and various
41278ab2b2SMatthew Dillon  * mastership arrangements including quorum operations.  It effectively
42278ab2b2SMatthew Dillon  * presents one topology to the vnops layer.
43278ab2b2SMatthew Dillon  *
44278ab2b2SMatthew Dillon  * Many of the API calls mimic chain API calls but operate on clusters
45278ab2b2SMatthew Dillon  * instead of chains.  Please see hammer2_chain.c for more complete code
46278ab2b2SMatthew Dillon  * documentation of the API functions.
47fe73aa5dSMatthew Dillon  *
48fe73aa5dSMatthew Dillon  * WARNING! This module is *extremely* complex.  It must issue asynchronous
49fe73aa5dSMatthew Dillon  *	    locks and I/O, do quorum and/or master-slave processing, and
50fe73aa5dSMatthew Dillon  *	    it must operate properly even if some nodes are broken (which
51fe73aa5dSMatthew Dillon  *	    can also mean indefinite locks).
527750fd72SMatthew Dillon  *
537750fd72SMatthew Dillon  *				CLUSTER OPERATIONS
547750fd72SMatthew Dillon  *
557750fd72SMatthew Dillon  * Cluster operations can be broken down into three pieces:
567750fd72SMatthew Dillon  *
577750fd72SMatthew Dillon  * (1) Chain locking and data retrieval.
587750fd72SMatthew Dillon  *
597750fd72SMatthew Dillon  *	- Most complex functions, quorum management on transaction ids.
607750fd72SMatthew Dillon  *
617750fd72SMatthew Dillon  *	- Locking and data accesses must be internally asynchronous.
627750fd72SMatthew Dillon  *
637750fd72SMatthew Dillon  *	- Validate and manage cache coherency primitives (cache state
647750fd72SMatthew Dillon  *	  is stored in chain topologies but must be validated by these
657750fd72SMatthew Dillon  *	  functions).
667750fd72SMatthew Dillon  *
677750fd72SMatthew Dillon  * (2) Lookups and Scans
687750fd72SMatthew Dillon  *		hammer2_cluster_lookup()
697750fd72SMatthew Dillon  *		hammer2_cluster_next()
707750fd72SMatthew Dillon  *
717750fd72SMatthew Dillon  *	- Depend on locking & data retrieval functions, but still complex.
727750fd72SMatthew Dillon  *
737750fd72SMatthew Dillon  *	- Must do quorum management on transaction ids.
747750fd72SMatthew Dillon  *
757750fd72SMatthew Dillon  *	- Lookup and Iteration ops Must be internally asynchronous.
767750fd72SMatthew Dillon  *
777750fd72SMatthew Dillon  * (3) Modifying Operations
787750fd72SMatthew Dillon  *		hammer2_cluster_create()
797750fd72SMatthew Dillon  *
807750fd72SMatthew Dillon  *	- Can usually punt on failures, operation continues unless quorum
817750fd72SMatthew Dillon  *	  is lost.  If quorum is lost, must wait for resynchronization
827750fd72SMatthew Dillon  *	  (depending on the management mode).
837750fd72SMatthew Dillon  *
847750fd72SMatthew Dillon  *	- Must disconnect node on failures (also not flush), remount, and
857750fd72SMatthew Dillon  *	  resynchronize.
867750fd72SMatthew Dillon  *
877750fd72SMatthew Dillon  *	- Network links (via kdmsg) are relatively easy to issue as the
887750fd72SMatthew Dillon  *	  complex underworkings of hammer2_chain.c don't have to messed
897750fd72SMatthew Dillon  *	  with (the protocol is at a higher level than block-level).
907750fd72SMatthew Dillon  *
917750fd72SMatthew Dillon  *	- Multiple local disk nodes (i.e. block devices) are another matter.
927750fd72SMatthew Dillon  *	  Chain operations have to be dispatched to per-node threads (xN)
937750fd72SMatthew Dillon  *	  because we can't asynchronize potentially very complex chain
947750fd72SMatthew Dillon  *	  operations in hammer2_chain.c (it would be a huge mess).
957750fd72SMatthew Dillon  *
967750fd72SMatthew Dillon  *	  (these threads are also used to terminate incoming kdmsg ops from
977750fd72SMatthew Dillon  *	  other machines).
987750fd72SMatthew Dillon  *
997750fd72SMatthew Dillon  *	- Single-node filesystems do not use threads and will simply call
1007750fd72SMatthew Dillon  *	  hammer2_chain.c functions directly.  This short-cut is handled
1017750fd72SMatthew Dillon  *	  at the base of each cluster function.
102278ab2b2SMatthew Dillon  */
103278ab2b2SMatthew Dillon #include <sys/cdefs.h>
104278ab2b2SMatthew Dillon #include <sys/param.h>
105278ab2b2SMatthew Dillon #include <sys/systm.h>
106278ab2b2SMatthew Dillon #include <sys/types.h>
107278ab2b2SMatthew Dillon 
108278ab2b2SMatthew Dillon #include "hammer2.h"
109278ab2b2SMatthew Dillon 
11005dd26e4SMatthew Dillon /*
111b93cc2e0SMatthew Dillon  * Returns the bref type of the cluster's foucs.
112b93cc2e0SMatthew Dillon  *
113b93cc2e0SMatthew Dillon  * If the cluster is errored, returns HAMMER2_BREF_TYPE_EMPTY (0).
114b93cc2e0SMatthew Dillon  * The cluster must be locked.
115b93cc2e0SMatthew Dillon  */
116278ab2b2SMatthew Dillon uint8_t
hammer2_cluster_type(hammer2_cluster_t * cluster)117278ab2b2SMatthew Dillon hammer2_cluster_type(hammer2_cluster_t *cluster)
118278ab2b2SMatthew Dillon {
119c847e838SMatthew Dillon 	if (cluster->error == 0) {
120c847e838SMatthew Dillon 		KKASSERT(cluster->focus != NULL);
121278ab2b2SMatthew Dillon 		return(cluster->focus->bref.type);
122c847e838SMatthew Dillon 	}
123b93cc2e0SMatthew Dillon 	return 0;
124278ab2b2SMatthew Dillon }
125278ab2b2SMatthew Dillon 
126b93cc2e0SMatthew Dillon /*
127b93cc2e0SMatthew Dillon  * Returns the bref of the cluster's focus, sans any data-offset information
128b93cc2e0SMatthew Dillon  * (since offset information is per-node and wouldn't be useful).
129b93cc2e0SMatthew Dillon  *
130e513e77eSMatthew Dillon  * Callers use this function to access modify_tid, mirror_tid, type,
131e513e77eSMatthew Dillon  * key, and keybits.
132b93cc2e0SMatthew Dillon  *
133b93cc2e0SMatthew Dillon  * If the cluster is errored, returns an empty bref.
134b93cc2e0SMatthew Dillon  * The cluster must be locked.
13584e47819SMatthew Dillon  */
136278ab2b2SMatthew Dillon void
hammer2_cluster_bref(hammer2_cluster_t * cluster,hammer2_blockref_t * bref)137278ab2b2SMatthew Dillon hammer2_cluster_bref(hammer2_cluster_t *cluster, hammer2_blockref_t *bref)
138278ab2b2SMatthew Dillon {
139b93cc2e0SMatthew Dillon 	if (cluster->error == 0) {
140c847e838SMatthew Dillon 		KKASSERT(cluster->focus != NULL);
141278ab2b2SMatthew Dillon 		*bref = cluster->focus->bref;
142b93cc2e0SMatthew Dillon 		bref->data_off = 0;
143b93cc2e0SMatthew Dillon 	} else {
144b93cc2e0SMatthew Dillon 		bzero(bref, sizeof(*bref));
145b93cc2e0SMatthew Dillon 	}
146278ab2b2SMatthew Dillon }
147278ab2b2SMatthew Dillon 
14871008001SMatthew Dillon /*
149b93cc2e0SMatthew Dillon  * Create a degenerate cluster with one ref from a single locked chain.
150b93cc2e0SMatthew Dillon  * The returned cluster will be focused on the chain and inherit its
151b93cc2e0SMatthew Dillon  * error state.
152fe73aa5dSMatthew Dillon  *
153b93cc2e0SMatthew Dillon  * The chain's lock and reference are transfered to the new cluster, so
154b93cc2e0SMatthew Dillon  * the caller should not try to unlock the chain separately.
15522211834SMatthew Dillon  *
15622211834SMatthew Dillon  * We fake the flags.
15784e47819SMatthew Dillon  */
158fda30e02SMatthew Dillon void
hammer2_dummy_xop_from_chain(hammer2_xop_head_t * xop,hammer2_chain_t * chain)159fda30e02SMatthew Dillon hammer2_dummy_xop_from_chain(hammer2_xop_head_t *xop, hammer2_chain_t *chain)
16084e47819SMatthew Dillon {
16184e47819SMatthew Dillon 	hammer2_cluster_t *cluster;
16284e47819SMatthew Dillon 
163fda30e02SMatthew Dillon 	bzero(xop, sizeof(*xop));
164fda30e02SMatthew Dillon 
165fda30e02SMatthew Dillon 	cluster = &xop->cluster;
1664b7e61e0SMatthew Dillon 	cluster->array[0].chain = chain;
167e513e77eSMatthew Dillon 	cluster->array[0].flags = HAMMER2_CITEM_FEMOD;
16884e47819SMatthew Dillon 	cluster->nchains = 1;
16984e47819SMatthew Dillon 	cluster->focus = chain;
1708db69c9fSMatthew Dillon 	cluster->focus_index = 0;
17150456506SMatthew Dillon 	cluster->pmp = chain->pmp;
17284e47819SMatthew Dillon 	cluster->refs = 1;
173b93cc2e0SMatthew Dillon 	cluster->error = chain->error;
17422211834SMatthew Dillon 	cluster->flags = HAMMER2_CLUSTER_LOCKED |
17522211834SMatthew Dillon 			 HAMMER2_CLUSTER_WRHARD |
17622211834SMatthew Dillon 			 HAMMER2_CLUSTER_RDHARD |
17722211834SMatthew Dillon 			 HAMMER2_CLUSTER_MSYNCED |
17822211834SMatthew Dillon 			 HAMMER2_CLUSTER_SSYNCED;
179278ab2b2SMatthew Dillon }
180278ab2b2SMatthew Dillon 
181278ab2b2SMatthew Dillon /*
182b93cc2e0SMatthew Dillon  * Add a reference to a cluster and its underlying chains.
183278ab2b2SMatthew Dillon  *
184278ab2b2SMatthew Dillon  * We must also ref the underlying chains in order to allow ref/unlock
185278ab2b2SMatthew Dillon  * sequences to later re-lock.
186278ab2b2SMatthew Dillon  */
187278ab2b2SMatthew Dillon void
hammer2_cluster_ref(hammer2_cluster_t * cluster)188278ab2b2SMatthew Dillon hammer2_cluster_ref(hammer2_cluster_t *cluster)
189278ab2b2SMatthew Dillon {
190f7712c43SMatthew Dillon 	atomic_add_int(&cluster->refs, 1);
191278ab2b2SMatthew Dillon }
192278ab2b2SMatthew Dillon 
193278ab2b2SMatthew Dillon /*
194278ab2b2SMatthew Dillon  * Drop the caller's reference to the cluster.  When the ref count drops to
195278ab2b2SMatthew Dillon  * zero this function frees the cluster and drops all underlying chains.
196bca9f8e6SMatthew Dillon  *
197bca9f8e6SMatthew Dillon  * In-progress read I/Os are typically detached from the cluster once the
198bca9f8e6SMatthew Dillon  * first one returns (the remaining stay attached to the DIOs but are then
199bca9f8e6SMatthew Dillon  * ignored and drop naturally).
200278ab2b2SMatthew Dillon  */
201278ab2b2SMatthew Dillon void
hammer2_cluster_drop(hammer2_cluster_t * cluster)202278ab2b2SMatthew Dillon hammer2_cluster_drop(hammer2_cluster_t *cluster)
203278ab2b2SMatthew Dillon {
20484e47819SMatthew Dillon 	hammer2_chain_t *chain;
205278ab2b2SMatthew Dillon 	int i;
206278ab2b2SMatthew Dillon 
20784e47819SMatthew Dillon 	KKASSERT(cluster->refs > 0);
208f7712c43SMatthew Dillon 	if (atomic_fetchadd_int(&cluster->refs, -1) == 1) {
209f7712c43SMatthew Dillon 		cluster->focus = NULL;		/* safety XXX chg to assert */
210f7712c43SMatthew Dillon 		cluster->focus_index = 0;
211f7712c43SMatthew Dillon 
212278ab2b2SMatthew Dillon 		for (i = 0; i < cluster->nchains; ++i) {
2134b7e61e0SMatthew Dillon 			chain = cluster->array[i].chain;
21484e47819SMatthew Dillon 			if (chain) {
21584e47819SMatthew Dillon 				hammer2_chain_drop(chain);
216f7712c43SMatthew Dillon 				cluster->array[i].chain = NULL; /* safety */
217278ab2b2SMatthew Dillon 			}
218278ab2b2SMatthew Dillon 		}
219f7712c43SMatthew Dillon 		cluster->nchains = 0;				/* safety */
220f7712c43SMatthew Dillon 
221278ab2b2SMatthew Dillon 		kfree(cluster, M_HAMMER2);
222fe73aa5dSMatthew Dillon 		/* cluster is invalid */
22384e47819SMatthew Dillon 	}
224278ab2b2SMatthew Dillon }
225278ab2b2SMatthew Dillon 
226278ab2b2SMatthew Dillon /*
227a6cf1052SMatthew Dillon  * Lock a cluster.  Cluster must already be referenced.  Focus is maintained.
228fe73aa5dSMatthew Dillon  *
229a6cf1052SMatthew Dillon  * WARNING! This function expects the caller to handle resolution of the
230a6cf1052SMatthew Dillon  *	    cluster.  We never re-resolve the cluster in this function,
231a6cf1052SMatthew Dillon  *	    because it might be used to temporarily unlock/relock a cparent
232a6cf1052SMatthew Dillon  *	    in an iteration or recursrion, and the cparents elements do not
233a6cf1052SMatthew Dillon  *	    necessarily match.
234a6cf1052SMatthew Dillon  */
235a6cf1052SMatthew Dillon void
hammer2_cluster_lock(hammer2_cluster_t * cluster,int how)23601d71aa5SMatthew Dillon hammer2_cluster_lock(hammer2_cluster_t *cluster, int how)
237a6cf1052SMatthew Dillon {
238a6cf1052SMatthew Dillon 	hammer2_chain_t *chain;
239a6cf1052SMatthew Dillon 	int i;
240a6cf1052SMatthew Dillon 
241a6cf1052SMatthew Dillon 	/* cannot be on inode-embedded cluster template, must be on copy */
242a6cf1052SMatthew Dillon 	KKASSERT(cluster->refs > 0);
243a6cf1052SMatthew Dillon 	KKASSERT((cluster->flags & HAMMER2_CLUSTER_INODE) == 0);
244a6cf1052SMatthew Dillon 	if (cluster->flags & HAMMER2_CLUSTER_LOCKED) {
245a6cf1052SMatthew Dillon 		panic("hammer2_cluster_lock: cluster %p already locked!\n",
246a6cf1052SMatthew Dillon 			cluster);
247a6cf1052SMatthew Dillon 	}
248a6cf1052SMatthew Dillon 	atomic_set_int(&cluster->flags, HAMMER2_CLUSTER_LOCKED);
249a6cf1052SMatthew Dillon 
250a6cf1052SMatthew Dillon 	/*
251a6cf1052SMatthew Dillon 	 * Lock chains and resolve state.
252a6cf1052SMatthew Dillon 	 */
253a6cf1052SMatthew Dillon 	for (i = 0; i < cluster->nchains; ++i) {
254a6cf1052SMatthew Dillon 		chain = cluster->array[i].chain;
255a6cf1052SMatthew Dillon 		if (chain == NULL)
256a6cf1052SMatthew Dillon 			continue;
257a6cf1052SMatthew Dillon 		hammer2_chain_lock(chain, how);
258a6cf1052SMatthew Dillon 	}
259a6cf1052SMatthew Dillon }
260a6cf1052SMatthew Dillon 
2613ad7c35fSMatthew Dillon void
hammer2_cluster_unhold(hammer2_cluster_t * cluster)2623ad7c35fSMatthew Dillon hammer2_cluster_unhold(hammer2_cluster_t *cluster)
2633ad7c35fSMatthew Dillon {
2643ad7c35fSMatthew Dillon 	hammer2_chain_t *chain;
2653ad7c35fSMatthew Dillon 	int i;
2663ad7c35fSMatthew Dillon 
2673ad7c35fSMatthew Dillon 	for (i = 0; i < cluster->nchains; ++i) {
2683ad7c35fSMatthew Dillon 		chain = cluster->array[i].chain;
2693ad7c35fSMatthew Dillon 		if (chain == NULL)
2703ad7c35fSMatthew Dillon 			continue;
2713ad7c35fSMatthew Dillon 		hammer2_chain_unhold(chain);
2723ad7c35fSMatthew Dillon 	}
2733ad7c35fSMatthew Dillon }
2743ad7c35fSMatthew Dillon 
2753ad7c35fSMatthew Dillon void
hammer2_cluster_rehold(hammer2_cluster_t * cluster)2763ad7c35fSMatthew Dillon hammer2_cluster_rehold(hammer2_cluster_t *cluster)
2773ad7c35fSMatthew Dillon {
2783ad7c35fSMatthew Dillon 	hammer2_chain_t *chain;
2793ad7c35fSMatthew Dillon 	int i;
2803ad7c35fSMatthew Dillon 
2813ad7c35fSMatthew Dillon 	for (i = 0; i < cluster->nchains; ++i) {
2823ad7c35fSMatthew Dillon 		chain = cluster->array[i].chain;
2833ad7c35fSMatthew Dillon 		if (chain == NULL)
2843ad7c35fSMatthew Dillon 			continue;
2853ad7c35fSMatthew Dillon 		hammer2_chain_rehold(chain);
2863ad7c35fSMatthew Dillon 	}
2873ad7c35fSMatthew Dillon }
2883ad7c35fSMatthew Dillon 
289a6cf1052SMatthew Dillon /*
290c847e838SMatthew Dillon  * This is used by the XOPS subsystem to calculate the state of
291c847e838SMatthew Dillon  * the collection and tell hammer2_xop_collect() what to do with it.
292c847e838SMatthew Dillon  * The collection can be in various states of desynchronization, the
293c847e838SMatthew Dillon  * caller specifically wants to resolve the passed-in key.
294c847e838SMatthew Dillon  *
295*f3bfcc3fSMatthew Dillon  * Return values (HAMMER2_ERROR_*):
296*f3bfcc3fSMatthew Dillon  *
297c847e838SMatthew Dillon  *	0		- Quorum agreement, key is valid
298c847e838SMatthew Dillon  *
299c847e838SMatthew Dillon  *	ENOENT		- Quorum agreement, end of scan
300c847e838SMatthew Dillon  *
301c847e838SMatthew Dillon  *	ESRCH		- Quorum agreement, key is INVALID (caller should
302c847e838SMatthew Dillon  *			  skip key).
303c847e838SMatthew Dillon  *
304c847e838SMatthew Dillon  *	EIO		- Quorum agreement but all elements had errors.
305c847e838SMatthew Dillon  *
306c847e838SMatthew Dillon  *	EDEADLK		- No quorum agreement possible for key, a repair
307c847e838SMatthew Dillon  *			  may be needed.  Caller has to decide what to do,
308c847e838SMatthew Dillon  *			  possibly iterating the key or generating an EIO.
309c847e838SMatthew Dillon  *
310c847e838SMatthew Dillon  *	EINPROGRESS	- No quorum agreement yet, but agreement is still
311c847e838SMatthew Dillon  *			  possible if caller waits for more responses.  Caller
312c847e838SMatthew Dillon  *			  should not iterate key.
313c847e838SMatthew Dillon  *
314*f3bfcc3fSMatthew Dillon  *	CHECK		- CRC check error
315*f3bfcc3fSMatthew Dillon  *
316b02c0ae6SMatthew Dillon  * NOTE! If the pmp is in HMNT2_LOCAL mode, the cluster check always succeeds.
317b02c0ae6SMatthew Dillon  *
318c847e838SMatthew Dillon  * XXX needs to handle SOFT_MASTER and SOFT_SLAVE
319c847e838SMatthew Dillon  */
320c847e838SMatthew Dillon int
hammer2_cluster_check(hammer2_cluster_t * cluster,hammer2_key_t key,int flags)321c847e838SMatthew Dillon hammer2_cluster_check(hammer2_cluster_t *cluster, hammer2_key_t key, int flags)
322c847e838SMatthew Dillon {
323c847e838SMatthew Dillon 	hammer2_chain_t *chain;
324c847e838SMatthew Dillon 	hammer2_chain_t *focus;
325c847e838SMatthew Dillon 	hammer2_pfs_t *pmp;
326c847e838SMatthew Dillon 	hammer2_tid_t quorum_tid;
327c847e838SMatthew Dillon 	hammer2_tid_t last_best_quorum_tid;
328c847e838SMatthew Dillon 	uint32_t nflags;
329c847e838SMatthew Dillon 	int ttlmasters;
330c847e838SMatthew Dillon 	int ttlslaves;
331c847e838SMatthew Dillon 	int nmasters;
332c847e838SMatthew Dillon 	int nmasters_keymatch;
333c847e838SMatthew Dillon 	int nslaves;
334c847e838SMatthew Dillon 	int nquorum;
335c847e838SMatthew Dillon 	int umasters;	/* unknown masters (still in progress) */
3363f4ec3cfSMatthew Dillon 	int error;
337c847e838SMatthew Dillon 	int i;
338c847e838SMatthew Dillon 
339c847e838SMatthew Dillon 	cluster->error = 0;
340c847e838SMatthew Dillon 	cluster->focus = NULL;
341c847e838SMatthew Dillon 
342b02c0ae6SMatthew Dillon 	pmp = cluster->pmp;
343b02c0ae6SMatthew Dillon 	KKASSERT(pmp != NULL || cluster->nchains == 0);
344b02c0ae6SMatthew Dillon 
345b02c0ae6SMatthew Dillon 	/*
346b02c0ae6SMatthew Dillon 	 * Calculate quorum
347b02c0ae6SMatthew Dillon 	 */
348b02c0ae6SMatthew Dillon 	nquorum = pmp ? pmp->pfs_nmasters / 2 + 1 : 0;
349c847e838SMatthew Dillon 	nflags = 0;
350c847e838SMatthew Dillon 	ttlmasters = 0;
351c847e838SMatthew Dillon 	ttlslaves = 0;
352c847e838SMatthew Dillon 
353c847e838SMatthew Dillon 	/*
354c847e838SMatthew Dillon 	 * Pass 1
355c847e838SMatthew Dillon 	 *
356c847e838SMatthew Dillon 	 * NOTE: A NULL chain is not necessarily an error, it could be
357c847e838SMatthew Dillon 	 *	 e.g. a lookup failure or the end of an iteration.
358c847e838SMatthew Dillon 	 *	 Process normally.
359c847e838SMatthew Dillon 	 */
360c847e838SMatthew Dillon 	for (i = 0; i < cluster->nchains; ++i) {
361c847e838SMatthew Dillon 		cluster->array[i].flags &= ~HAMMER2_CITEM_FEMOD;
362c847e838SMatthew Dillon 		cluster->array[i].flags |= HAMMER2_CITEM_INVALID;
363c847e838SMatthew Dillon 
364c847e838SMatthew Dillon 		chain = cluster->array[i].chain;
3653f4ec3cfSMatthew Dillon 		error = cluster->array[i].error;
3663f4ec3cfSMatthew Dillon 		if (chain && error) {
367c847e838SMatthew Dillon 			if (cluster->focus == NULL || cluster->focus == chain) {
368c847e838SMatthew Dillon 				/* error will be overridden by valid focus */
3693f4ec3cfSMatthew Dillon 				/* XXX */
370c847e838SMatthew Dillon 			}
371c847e838SMatthew Dillon 
372c847e838SMatthew Dillon 			/*
373c847e838SMatthew Dillon 			 * Must count total masters and slaves whether the
374c847e838SMatthew Dillon 			 * chain is errored or not.
375c847e838SMatthew Dillon 			 */
376c847e838SMatthew Dillon 			switch (cluster->pmp->pfs_types[i]) {
3778cd26e36SMatthew Dillon 			case HAMMER2_PFSTYPE_SUPROOT:
378c847e838SMatthew Dillon 			case HAMMER2_PFSTYPE_MASTER:
379c847e838SMatthew Dillon 				++ttlmasters;
380c847e838SMatthew Dillon 				break;
381c847e838SMatthew Dillon 			case HAMMER2_PFSTYPE_SLAVE:
382c847e838SMatthew Dillon 				++ttlslaves;
383c847e838SMatthew Dillon 				break;
384c847e838SMatthew Dillon 			}
385c847e838SMatthew Dillon 			continue;
386c847e838SMatthew Dillon 		}
387c847e838SMatthew Dillon 		switch (cluster->pmp->pfs_types[i]) {
388c847e838SMatthew Dillon 		case HAMMER2_PFSTYPE_MASTER:
389c847e838SMatthew Dillon 			++ttlmasters;
390c847e838SMatthew Dillon 			break;
391c847e838SMatthew Dillon 		case HAMMER2_PFSTYPE_SLAVE:
392c847e838SMatthew Dillon 			++ttlslaves;
393c847e838SMatthew Dillon 			break;
394c847e838SMatthew Dillon 		case HAMMER2_PFSTYPE_SOFT_MASTER:
395c847e838SMatthew Dillon 			nflags |= HAMMER2_CLUSTER_WRSOFT;
396c847e838SMatthew Dillon 			nflags |= HAMMER2_CLUSTER_RDSOFT;
397c847e838SMatthew Dillon 			break;
398c847e838SMatthew Dillon 		case HAMMER2_PFSTYPE_SOFT_SLAVE:
399c847e838SMatthew Dillon 			nflags |= HAMMER2_CLUSTER_RDSOFT;
400c847e838SMatthew Dillon 			break;
401c847e838SMatthew Dillon 		case HAMMER2_PFSTYPE_SUPROOT:
402c847e838SMatthew Dillon 			/*
403c847e838SMatthew Dillon 			 * Degenerate cluster representing the super-root
404c847e838SMatthew Dillon 			 * topology on a single device.  Fake stuff so
405c847e838SMatthew Dillon 			 * cluster ops work as expected.
406c847e838SMatthew Dillon 			 */
4078cd26e36SMatthew Dillon 			++ttlmasters;
408c847e838SMatthew Dillon 			nflags |= HAMMER2_CLUSTER_WRHARD;
409c847e838SMatthew Dillon 			nflags |= HAMMER2_CLUSTER_RDHARD;
410c847e838SMatthew Dillon 			cluster->focus_index = i;
411c847e838SMatthew Dillon 			cluster->focus = chain;
4123f4ec3cfSMatthew Dillon 			cluster->error = error;
413c847e838SMatthew Dillon 			break;
414c847e838SMatthew Dillon 		default:
415c847e838SMatthew Dillon 			break;
416c847e838SMatthew Dillon 		}
417c847e838SMatthew Dillon 	}
418c847e838SMatthew Dillon 
419c847e838SMatthew Dillon 	/*
420c847e838SMatthew Dillon 	 * Pass 2
421c847e838SMatthew Dillon 	 *
422c847e838SMatthew Dillon 	 * Resolve nmasters		- master nodes fully match
423c847e838SMatthew Dillon 	 *
424c847e838SMatthew Dillon 	 * Resolve umasters		- master nodes operation still
425c847e838SMatthew Dillon 	 *				  in progress
426c847e838SMatthew Dillon 	 *
427c847e838SMatthew Dillon 	 * Resolve nmasters_keymatch	- master nodes match the passed-in
428c847e838SMatthew Dillon 	 *				  key and may or may not match
429c847e838SMatthew Dillon 	 *				  the quorum-agreed tid.
430c847e838SMatthew Dillon 	 *
431c847e838SMatthew Dillon 	 * The quorum-agreed TID is the highest matching TID.
432c847e838SMatthew Dillon 	 */
433c847e838SMatthew Dillon 	last_best_quorum_tid = HAMMER2_TID_MAX;
4340d66a712SMatthew Dillon 	umasters = 0;
4350d66a712SMatthew Dillon 	nmasters = 0;
4360d66a712SMatthew Dillon 	nmasters_keymatch = 0;
437c847e838SMatthew Dillon 	quorum_tid = 0;		/* fix gcc warning */
438c847e838SMatthew Dillon 
439c847e838SMatthew Dillon 	while (nmasters < nquorum && last_best_quorum_tid != 0) {
4400d66a712SMatthew Dillon 		umasters = 0;
441c847e838SMatthew Dillon 		nmasters = 0;
4420d66a712SMatthew Dillon 		nmasters_keymatch = 0;
443c847e838SMatthew Dillon 		quorum_tid = 0;
444c847e838SMatthew Dillon 
445c847e838SMatthew Dillon 		for (i = 0; i < cluster->nchains; ++i) {
446c847e838SMatthew Dillon 			/* XXX SOFT smpresent handling */
4478cd26e36SMatthew Dillon 			switch(cluster->pmp->pfs_types[i]) {
4488cd26e36SMatthew Dillon 			case HAMMER2_PFSTYPE_MASTER:
4498cd26e36SMatthew Dillon 			case HAMMER2_PFSTYPE_SUPROOT:
4508cd26e36SMatthew Dillon 				break;
4518cd26e36SMatthew Dillon 			default:
452c847e838SMatthew Dillon 				continue;
453c847e838SMatthew Dillon 			}
454c847e838SMatthew Dillon 
455c847e838SMatthew Dillon 			chain = cluster->array[i].chain;
4563f4ec3cfSMatthew Dillon 			error = cluster->array[i].error;
457c847e838SMatthew Dillon 
458c847e838SMatthew Dillon 			/*
459c847e838SMatthew Dillon 			 * Skip elements still in progress.  umasters keeps
460c847e838SMatthew Dillon 			 * track of masters that might still be in-progress.
461c847e838SMatthew Dillon 			 */
462c847e838SMatthew Dillon 			if (chain == NULL && (cluster->array[i].flags &
463c847e838SMatthew Dillon 					      HAMMER2_CITEM_NULL) == 0) {
464c847e838SMatthew Dillon 				++umasters;
465c847e838SMatthew Dillon 				continue;
466c847e838SMatthew Dillon 			}
467c847e838SMatthew Dillon 
468c847e838SMatthew Dillon 			/*
469c847e838SMatthew Dillon 			 * Key match?
470c847e838SMatthew Dillon 			 */
471c847e838SMatthew Dillon 			if (flags & HAMMER2_CHECK_NULL) {
472c847e838SMatthew Dillon 				if (chain == NULL) {
473c847e838SMatthew Dillon 					++nmasters;
474c847e838SMatthew Dillon 					++nmasters_keymatch;
4753f4ec3cfSMatthew Dillon 					if (cluster->error == 0)
4763f4ec3cfSMatthew Dillon 						cluster->error = error;
477c847e838SMatthew Dillon 				}
478b7add675SMatthew Dillon 			} else if (chain &&
479b7add675SMatthew Dillon 				   (key == (hammer2_key_t)-1 ||
480b7add675SMatthew Dillon 				    chain->bref.key == key)) {
481c847e838SMatthew Dillon 				++nmasters_keymatch;
4820d66a712SMatthew Dillon 
4830d66a712SMatthew Dillon 				if (chain->bref.modify_tid <
4840d66a712SMatthew Dillon 				     last_best_quorum_tid &&
4850d66a712SMatthew Dillon 				    quorum_tid < chain->bref.modify_tid) {
486c847e838SMatthew Dillon 					/*
4870d66a712SMatthew Dillon 					 * Select new TID as master if better
4880d66a712SMatthew Dillon 					 * than any found so far in this loop,
4890d66a712SMatthew Dillon 					 * as long as it does not reach the
4900d66a712SMatthew Dillon 					 * best tid found in the previous loop.
491c847e838SMatthew Dillon 					 */
492c847e838SMatthew Dillon 					nmasters = 0;
493c847e838SMatthew Dillon 					quorum_tid = chain->bref.modify_tid;
494c847e838SMatthew Dillon 				}
495c847e838SMatthew Dillon 				if (quorum_tid == chain->bref.modify_tid) {
496c847e838SMatthew Dillon 					/*
497c847e838SMatthew Dillon 					 * TID matches current collection.
4983f4ec3cfSMatthew Dillon 					 *
4993f4ec3cfSMatthew Dillon 					 * (error handled in next pass)
500c847e838SMatthew Dillon 					 */
501c847e838SMatthew Dillon 					++nmasters;
502c847e838SMatthew Dillon 					if (chain->error == 0) {
503c847e838SMatthew Dillon 						cluster->focus = chain;
504c847e838SMatthew Dillon 						cluster->focus_index = i;
505c847e838SMatthew Dillon 					}
506c847e838SMatthew Dillon 				}
507c847e838SMatthew Dillon 			}
508c847e838SMatthew Dillon 		}
509c847e838SMatthew Dillon 		if (nmasters >= nquorum)
510c847e838SMatthew Dillon 			break;
511c847e838SMatthew Dillon 		last_best_quorum_tid = quorum_tid;
512c847e838SMatthew Dillon 	}
513c847e838SMatthew Dillon 
514c847e838SMatthew Dillon 	/*
515c847e838SMatthew Dillon 	kprintf("nmasters %d/%d nmaster_keymatch=%d umasters=%d\n",
516c847e838SMatthew Dillon 		nmasters, nquorum, nmasters_keymatch, umasters);
517c847e838SMatthew Dillon 	*/
518c847e838SMatthew Dillon 
519c847e838SMatthew Dillon 	/*
520c847e838SMatthew Dillon 	 * Early return if we do not have enough masters.
521c847e838SMatthew Dillon 	 */
522c847e838SMatthew Dillon 	if (nmasters < nquorum) {
523c847e838SMatthew Dillon 		if (nmasters + umasters >= nquorum)
52465cacacfSMatthew Dillon 			return HAMMER2_ERROR_EINPROGRESS;
525c847e838SMatthew Dillon 		if (nmasters_keymatch < nquorum)
52665cacacfSMatthew Dillon 			return HAMMER2_ERROR_ESRCH;
52765cacacfSMatthew Dillon 		return HAMMER2_ERROR_EDEADLK;
528c847e838SMatthew Dillon 	}
529c847e838SMatthew Dillon 
530c847e838SMatthew Dillon 	/*
531c847e838SMatthew Dillon 	 * Validated end of scan.
532c847e838SMatthew Dillon 	 */
5333f4ec3cfSMatthew Dillon 	if (flags & HAMMER2_CHECK_NULL) {
5343f4ec3cfSMatthew Dillon 		if (cluster->error == 0)
53565cacacfSMatthew Dillon 			cluster->error = HAMMER2_ERROR_ENOENT;
5363f4ec3cfSMatthew Dillon 		return cluster->error;
5373f4ec3cfSMatthew Dillon 	}
538c847e838SMatthew Dillon 
539c847e838SMatthew Dillon 	/*
540c847e838SMatthew Dillon 	 * If we have a NULL focus at this point the agreeing quorum all
541c847e838SMatthew Dillon 	 * had chain errors.
542c847e838SMatthew Dillon 	 */
543c847e838SMatthew Dillon 	if (cluster->focus == NULL)
54465cacacfSMatthew Dillon 		return HAMMER2_ERROR_EIO;
545c847e838SMatthew Dillon 
546c847e838SMatthew Dillon 	/*
547c847e838SMatthew Dillon 	 * Pass 3
548c847e838SMatthew Dillon 	 *
549c847e838SMatthew Dillon 	 * We have quorum agreement, validate elements, not end of scan.
550c847e838SMatthew Dillon 	 */
5510d66a712SMatthew Dillon 	nslaves = 0;
5523f4ec3cfSMatthew Dillon 	cluster->error = 0;
5530d66a712SMatthew Dillon 
554c847e838SMatthew Dillon 	for (i = 0; i < cluster->nchains; ++i) {
555c847e838SMatthew Dillon 		chain = cluster->array[i].chain;
5563f4ec3cfSMatthew Dillon 		error = cluster->array[i].error;
557c847e838SMatthew Dillon 		if (chain == NULL ||
558c847e838SMatthew Dillon 		    chain->bref.key != key ||
559c847e838SMatthew Dillon 		    chain->bref.modify_tid != quorum_tid) {
560c847e838SMatthew Dillon 			continue;
561c847e838SMatthew Dillon 		}
562c847e838SMatthew Dillon 
5633f4ec3cfSMatthew Dillon 		/*
5643f4ec3cfSMatthew Dillon 		 * Quorum Match
5653f4ec3cfSMatthew Dillon 		 *
5663f4ec3cfSMatthew Dillon 		 * XXX for now, cumulative error.
5673f4ec3cfSMatthew Dillon 		 */
5683f4ec3cfSMatthew Dillon 		if (cluster->error == 0)
5693f4ec3cfSMatthew Dillon 			cluster->error = error;
5703f4ec3cfSMatthew Dillon 
571c847e838SMatthew Dillon 		switch (cluster->pmp->pfs_types[i]) {
572c847e838SMatthew Dillon 		case HAMMER2_PFSTYPE_MASTER:
573c847e838SMatthew Dillon 			cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
574c847e838SMatthew Dillon 			cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
575c847e838SMatthew Dillon 			nflags |= HAMMER2_CLUSTER_WRHARD;
576c847e838SMatthew Dillon 			nflags |= HAMMER2_CLUSTER_RDHARD;
577c847e838SMatthew Dillon 			break;
578c847e838SMatthew Dillon 		case HAMMER2_PFSTYPE_SLAVE:
579c847e838SMatthew Dillon 			/*
580c847e838SMatthew Dillon 			 * We must have enough up-to-date masters to reach
581c847e838SMatthew Dillon 			 * a quorum and the slave modify_tid must match the
582c847e838SMatthew Dillon 			 * quorum's modify_tid.
583c847e838SMatthew Dillon 			 *
584c847e838SMatthew Dillon 			 * Do not select an errored slave.
585c847e838SMatthew Dillon 			 */
586c847e838SMatthew Dillon 			cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
587c847e838SMatthew Dillon 			nflags |= HAMMER2_CLUSTER_RDHARD;
588c847e838SMatthew Dillon 			++nslaves;
589c847e838SMatthew Dillon 			break;
590c847e838SMatthew Dillon 		case HAMMER2_PFSTYPE_SOFT_MASTER:
591c847e838SMatthew Dillon 			/*
592c847e838SMatthew Dillon 			 * Directly mounted soft master always wins.  There
593c847e838SMatthew Dillon 			 * should be only one.
594c847e838SMatthew Dillon 			 */
595c847e838SMatthew Dillon 			cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
596c847e838SMatthew Dillon 			cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
597c847e838SMatthew Dillon 			break;
598c847e838SMatthew Dillon 		case HAMMER2_PFSTYPE_SOFT_SLAVE:
599c847e838SMatthew Dillon 			/*
600c847e838SMatthew Dillon 			 * Directly mounted soft slave always wins.  There
601c847e838SMatthew Dillon 			 * should be only one.
602c847e838SMatthew Dillon 			 *
603c847e838SMatthew Dillon 			 * XXX
604c847e838SMatthew Dillon 			 */
605c847e838SMatthew Dillon 			cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
606c847e838SMatthew Dillon 			break;
607c847e838SMatthew Dillon 		case HAMMER2_PFSTYPE_SUPROOT:
608c847e838SMatthew Dillon 			/*
609c847e838SMatthew Dillon 			 * spmp (degenerate case)
610c847e838SMatthew Dillon 			 */
611c847e838SMatthew Dillon 			cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
612c847e838SMatthew Dillon 			cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
6138cd26e36SMatthew Dillon 			nflags |= HAMMER2_CLUSTER_WRHARD;
6148cd26e36SMatthew Dillon 			nflags |= HAMMER2_CLUSTER_RDHARD;
615c847e838SMatthew Dillon 			break;
616c847e838SMatthew Dillon 		default:
617c847e838SMatthew Dillon 			break;
618c847e838SMatthew Dillon 		}
619c847e838SMatthew Dillon 	}
620c847e838SMatthew Dillon 
621c847e838SMatthew Dillon 	/*
622c847e838SMatthew Dillon 	 * Focus now set, adjust ddflag.  Skip this pass if the focus
623c847e838SMatthew Dillon 	 * is bad or if we are at the PFS root (the bref won't match at
624c847e838SMatthew Dillon 	 * the PFS root, obviously).
6256f5aaa27SMatthew Dillon 	 *
626fda30e02SMatthew Dillon 	 * focus is probably not locked and it isn't safe to test its
627fda30e02SMatthew Dillon 	 * content (e.g. focus->data, focus->dio, other content).  We
628fda30e02SMatthew Dillon 	 * do not synchronize the dio to the cpu here.  In fact, in numerous
629fda30e02SMatthew Dillon 	 * situations the frontend doesn't even need to access its dio/data,
630fda30e02SMatthew Dillon 	 * so synchronizing it here would be wasteful.
631c847e838SMatthew Dillon 	 */
632c847e838SMatthew Dillon 	focus = cluster->focus;
633c847e838SMatthew Dillon 	if (focus) {
634c847e838SMatthew Dillon 		cluster->ddflag =
635c847e838SMatthew Dillon 			(cluster->focus->bref.type == HAMMER2_BREF_TYPE_INODE);
636c847e838SMatthew Dillon 	} else {
637c847e838SMatthew Dillon 		cluster->ddflag = 0;
638c847e838SMatthew Dillon 		goto skip4;
639c847e838SMatthew Dillon 	}
640c847e838SMatthew Dillon 	if (cluster->focus->flags & HAMMER2_CHAIN_PFSBOUNDARY)
641c847e838SMatthew Dillon 		goto skip4;
642c847e838SMatthew Dillon 
643c847e838SMatthew Dillon 	/*
644c847e838SMatthew Dillon 	 * Pass 4
645c847e838SMatthew Dillon 	 *
646c847e838SMatthew Dillon 	 * Validate the elements that were not marked invalid.  They should
647c847e838SMatthew Dillon 	 * match.
648c847e838SMatthew Dillon 	 */
649c847e838SMatthew Dillon 	for (i = 0; i < cluster->nchains; ++i) {
650c847e838SMatthew Dillon 		int ddflag;
651c847e838SMatthew Dillon 
652c847e838SMatthew Dillon 		chain = cluster->array[i].chain;
653c847e838SMatthew Dillon 
654c847e838SMatthew Dillon 		if (chain == NULL)
655c847e838SMatthew Dillon 			continue;
656c847e838SMatthew Dillon 		if (chain == focus)
657c847e838SMatthew Dillon 			continue;
658c847e838SMatthew Dillon 		if (cluster->array[i].flags & HAMMER2_CITEM_INVALID)
659c847e838SMatthew Dillon 			continue;
660c847e838SMatthew Dillon 
661c847e838SMatthew Dillon 		ddflag = (chain->bref.type == HAMMER2_BREF_TYPE_INODE);
662c847e838SMatthew Dillon 		if (chain->bref.type != focus->bref.type ||
663c847e838SMatthew Dillon 		    chain->bref.key != focus->bref.key ||
664c847e838SMatthew Dillon 		    chain->bref.keybits != focus->bref.keybits ||
665c847e838SMatthew Dillon 		    chain->bref.modify_tid != focus->bref.modify_tid ||
666c847e838SMatthew Dillon 		    chain->bytes != focus->bytes ||
667c847e838SMatthew Dillon 		    ddflag != cluster->ddflag) {
668c847e838SMatthew Dillon 			cluster->array[i].flags |= HAMMER2_CITEM_INVALID;
669c847e838SMatthew Dillon 			if (hammer2_debug & 1)
670*f3bfcc3fSMatthew Dillon 			kprintf("cluster_check: matching modify_tid failed "
671c847e838SMatthew Dillon 				"bref test: idx=%d type=%02x/%02x "
672c847e838SMatthew Dillon 				"key=%016jx/%d-%016jx/%d "
673c847e838SMatthew Dillon 				"mod=%016jx/%016jx bytes=%u/%u\n",
674c847e838SMatthew Dillon 				i,
675c847e838SMatthew Dillon 				chain->bref.type, focus->bref.type,
676c847e838SMatthew Dillon 				chain->bref.key, chain->bref.keybits,
677c847e838SMatthew Dillon 				focus->bref.key, focus->bref.keybits,
678c847e838SMatthew Dillon 				chain->bref.modify_tid, focus->bref.modify_tid,
679c847e838SMatthew Dillon 				chain->bytes, focus->bytes);
680c847e838SMatthew Dillon 			if (hammer2_debug & 0x4000)
681*f3bfcc3fSMatthew Dillon 				panic("cluster_check");
682c847e838SMatthew Dillon 			/* flag issue and force resync? */
683c847e838SMatthew Dillon 		}
684c847e838SMatthew Dillon 	}
685c847e838SMatthew Dillon skip4:
686c847e838SMatthew Dillon 
687c847e838SMatthew Dillon 	if (ttlslaves == 0)
688c847e838SMatthew Dillon 		nflags |= HAMMER2_CLUSTER_NOSOFT;
689c847e838SMatthew Dillon 	if (ttlmasters == 0)
690c847e838SMatthew Dillon 		nflags |= HAMMER2_CLUSTER_NOHARD;
691c847e838SMatthew Dillon 
692c847e838SMatthew Dillon 	/*
693c847e838SMatthew Dillon 	 * Set SSYNCED or MSYNCED for slaves and masters respectively if
694c847e838SMatthew Dillon 	 * all available nodes (even if 0 are available) are fully
695c847e838SMatthew Dillon 	 * synchronized.  This is used by the synchronization thread to
696c847e838SMatthew Dillon 	 * determine if there is work it could potentially accomplish.
697c847e838SMatthew Dillon 	 */
698c847e838SMatthew Dillon 	if (nslaves == ttlslaves)
699c847e838SMatthew Dillon 		nflags |= HAMMER2_CLUSTER_SSYNCED;
700c847e838SMatthew Dillon 	if (nmasters == ttlmasters)
701c847e838SMatthew Dillon 		nflags |= HAMMER2_CLUSTER_MSYNCED;
702c847e838SMatthew Dillon 
703c847e838SMatthew Dillon 	/*
704c847e838SMatthew Dillon 	 * Determine if the cluster was successfully locked for the
705c847e838SMatthew Dillon 	 * requested operation and generate an error code.  The cluster
706c847e838SMatthew Dillon 	 * will not be locked (or ref'd) if an error is returned.
707c847e838SMatthew Dillon 	 */
708c847e838SMatthew Dillon 	atomic_set_int(&cluster->flags, nflags);
709c847e838SMatthew Dillon 	atomic_clear_int(&cluster->flags, HAMMER2_CLUSTER_ZFLAGS & ~nflags);
710c847e838SMatthew Dillon 
7113f4ec3cfSMatthew Dillon 	return cluster->error;
712c847e838SMatthew Dillon }
713c847e838SMatthew Dillon 
714c847e838SMatthew Dillon /*
715a6cf1052SMatthew Dillon  * Unlock a cluster.  Refcount and focus is maintained.
716278ab2b2SMatthew Dillon  */
717278ab2b2SMatthew Dillon void
hammer2_cluster_unlock(hammer2_cluster_t * cluster)71801d71aa5SMatthew Dillon hammer2_cluster_unlock(hammer2_cluster_t *cluster)
719278ab2b2SMatthew Dillon {
72084e47819SMatthew Dillon 	hammer2_chain_t *chain;
721278ab2b2SMatthew Dillon 	int i;
722278ab2b2SMatthew Dillon 
723b8ba9690SMatthew Dillon 	if ((cluster->flags & HAMMER2_CLUSTER_LOCKED) == 0) {
724b8ba9690SMatthew Dillon 		kprintf("hammer2_cluster_unlock: cluster %p not locked\n",
725b8ba9690SMatthew Dillon 			cluster);
726b8ba9690SMatthew Dillon 	}
727e513e77eSMatthew Dillon 	KKASSERT(cluster->flags & HAMMER2_CLUSTER_LOCKED);
728278ab2b2SMatthew Dillon 	KKASSERT(cluster->refs > 0);
729b8ba9690SMatthew Dillon 	atomic_clear_int(&cluster->flags, HAMMER2_CLUSTER_LOCKED);
730b8ba9690SMatthew Dillon 
73184e47819SMatthew Dillon 	for (i = 0; i < cluster->nchains; ++i) {
7324b7e61e0SMatthew Dillon 		chain = cluster->array[i].chain;
733e513e77eSMatthew Dillon 		if (chain)
73484e47819SMatthew Dillon 			hammer2_chain_unlock(chain);
73584e47819SMatthew Dillon 	}
736278ab2b2SMatthew Dillon }
737