1 /*
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
5 * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
6 *
7 * This code is derived from software contributed to The DragonFly Project
8 * by Matthew Dillon <dillon@dragonflybsd.org>
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 * 3. Neither the name of The DragonFly Project nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific, prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37 /*
38 * The cluster module collects multiple chains representing the same
39 * information from different nodes into a single entity. It allows direct
40 * access to media data as long as it is not blockref array data (which
41 * will obviously have to be different at each node).
42 *
43 * This module also handles I/O dispatch, status rollup, and various
44 * mastership arrangements including quorum operations. It effectively
45 * presents one topology to the vnops layer.
46 *
47 * Many of the API calls mimic chain API calls but operate on clusters
48 * instead of chains. Please see hammer2_chain.c for more complete code
49 * documentation of the API functions.
50 *
51 * WARNING! This module is *extremely* complex. It must issue asynchronous
52 * locks and I/O, do quorum and/or master-slave processing, and
53 * it must operate properly even if some nodes are broken (which
54 * can also mean indefinite locks).
55 *
56 * CLUSTER OPERATIONS
57 *
58 * Cluster operations can be broken down into three pieces:
59 *
60 * (1) Chain locking and data retrieval.
61 *
62 * - Most complex functions, quorum management on transaction ids.
63 *
64 * - Locking and data accesses must be internally asynchronous.
65 *
66 * - Validate and manage cache coherency primitives (cache state
67 * is stored in chain topologies but must be validated by these
68 * functions).
69 *
70 * (2) Lookups and Scans
71 * hammer2_cluster_lookup()
72 * hammer2_cluster_next()
73 *
74 * - Depend on locking & data retrieval functions, but still complex.
75 *
76 * - Must do quorum management on transaction ids.
77 *
78 * - Lookup and Iteration ops Must be internally asynchronous.
79 *
80 * (3) Modifying Operations
81 * hammer2_cluster_create()
82 *
83 * - Can usually punt on failures, operation continues unless quorum
84 * is lost. If quorum is lost, must wait for resynchronization
85 * (depending on the management mode).
86 *
87 * - Must disconnect node on failures (also not flush), remount, and
88 * resynchronize.
89 *
90 * - Network links (via kdmsg) are relatively easy to issue as the
91 * complex underworkings of hammer2_chain.c don't have to messed
92 * with (the protocol is at a higher level than block-level).
93 *
94 * - Multiple local disk nodes (i.e. block devices) are another matter.
95 * Chain operations have to be dispatched to per-node threads (xN)
96 * because we can't asynchronize potentially very complex chain
97 * operations in hammer2_chain.c (it would be a huge mess).
98 *
99 * (these threads are also used to terminate incoming kdmsg ops from
100 * other machines).
101 *
102 * - Single-node filesystems do not use threads and will simply call
103 * hammer2_chain.c functions directly. This short-cut is handled
104 * at the base of each cluster function.
105 */
106 /*
107 #include <sys/cdefs.h>
108 #include <sys/param.h>
109 #include <sys/systm.h>
110 #include <sys/types.h>
111 */
112
113 #include "hammer2.h"
114
115 /*
116 * Returns the bref type of the cluster's foucs.
117 *
118 * If the cluster is errored, returns HAMMER2_BREF_TYPE_EMPTY (0).
119 * The cluster must be locked.
120 */
121 uint8_t
hammer2_cluster_type(hammer2_cluster_t * cluster)122 hammer2_cluster_type(hammer2_cluster_t *cluster)
123 {
124 if (cluster->error == 0) {
125 KKASSERT(cluster->focus != NULL);
126 return(cluster->focus->bref.type);
127 }
128 return 0;
129 }
130
131 /*
132 * Returns the bref of the cluster's focus, sans any data-offset information
133 * (since offset information is per-node and wouldn't be useful).
134 *
135 * Callers use this function to access modify_tid, mirror_tid, type,
136 * key, and keybits.
137 *
138 * If the cluster is errored, returns an empty bref.
139 * The cluster must be locked.
140 */
141 void
hammer2_cluster_bref(hammer2_cluster_t * cluster,hammer2_blockref_t * bref)142 hammer2_cluster_bref(hammer2_cluster_t *cluster, hammer2_blockref_t *bref)
143 {
144 if (cluster->error == 0) {
145 KKASSERT(cluster->focus != NULL);
146 *bref = cluster->focus->bref;
147 bref->data_off = 0;
148 } else {
149 bzero(bref, sizeof(*bref));
150 }
151 }
152
153 /*
154 * Create a degenerate cluster with one ref from a single locked chain.
155 * The returned cluster will be focused on the chain and inherit its
156 * error state.
157 *
158 * The chain's lock and reference are transfered to the new cluster, so
159 * the caller should not try to unlock the chain separately.
160 *
161 * We fake the flags.
162 */
163 void
hammer2_dummy_xop_from_chain(hammer2_xop_head_t * xop,hammer2_chain_t * chain)164 hammer2_dummy_xop_from_chain(hammer2_xop_head_t *xop, hammer2_chain_t *chain)
165 {
166 hammer2_cluster_t *cluster;
167
168 bzero(xop, sizeof(*xop));
169
170 cluster = &xop->cluster;
171 cluster->array[0].chain = chain;
172 cluster->array[0].flags = HAMMER2_CITEM_FEMOD;
173 cluster->nchains = 1;
174 cluster->focus = chain;
175 cluster->focus_index = 0;
176 cluster->pmp = chain->pmp;
177 cluster->refs = 1;
178 cluster->error = chain->error;
179 cluster->flags = HAMMER2_CLUSTER_LOCKED |
180 HAMMER2_CLUSTER_WRHARD |
181 HAMMER2_CLUSTER_RDHARD |
182 HAMMER2_CLUSTER_MSYNCED |
183 HAMMER2_CLUSTER_SSYNCED;
184 }
185
186 /*
187 * Add a reference to a cluster and its underlying chains.
188 *
189 * We must also ref the underlying chains in order to allow ref/unlock
190 * sequences to later re-lock.
191 */
192 void
hammer2_cluster_ref(hammer2_cluster_t * cluster)193 hammer2_cluster_ref(hammer2_cluster_t *cluster)
194 {
195 atomic_add_int(&cluster->refs, 1);
196 }
197
198 /*
199 * Drop the caller's reference to the cluster. When the ref count drops to
200 * zero this function frees the cluster and drops all underlying chains.
201 *
202 * In-progress read I/Os are typically detached from the cluster once the
203 * first one returns (the remaining stay attached to the DIOs but are then
204 * ignored and drop naturally).
205 */
206 void
hammer2_cluster_drop(hammer2_cluster_t * cluster)207 hammer2_cluster_drop(hammer2_cluster_t *cluster)
208 {
209 hammer2_chain_t *chain;
210 int i;
211
212 KKASSERT(cluster->refs > 0);
213 if (atomic_fetchadd_int(&cluster->refs, -1) == 1) {
214 cluster->focus = NULL; /* safety XXX chg to assert */
215 cluster->focus_index = 0;
216
217 for (i = 0; i < cluster->nchains; ++i) {
218 chain = cluster->array[i].chain;
219 if (chain) {
220 hammer2_chain_drop(chain);
221 cluster->array[i].chain = NULL; /* safety */
222 }
223 }
224 cluster->nchains = 0; /* safety */
225
226 kfree(cluster, M_HAMMER2);
227 /* cluster is invalid */
228 }
229 }
230
231 /*
232 * Lock a cluster. Cluster must already be referenced. Focus is maintained.
233 *
234 * WARNING! This function expects the caller to handle resolution of the
235 * cluster. We never re-resolve the cluster in this function,
236 * because it might be used to temporarily unlock/relock a cparent
237 * in an iteration or recursrion, and the cparents elements do not
238 * necessarily match.
239 */
240 void
hammer2_cluster_lock(hammer2_cluster_t * cluster,int how)241 hammer2_cluster_lock(hammer2_cluster_t *cluster, int how)
242 {
243 hammer2_chain_t *chain;
244 int i;
245
246 /* cannot be on inode-embedded cluster template, must be on copy */
247 KKASSERT(cluster->refs > 0);
248 KKASSERT((cluster->flags & HAMMER2_CLUSTER_INODE) == 0);
249 if (cluster->flags & HAMMER2_CLUSTER_LOCKED) {
250 panic("hammer2_cluster_lock: cluster %p already locked!\n",
251 cluster);
252 }
253 atomic_set_int(&cluster->flags, HAMMER2_CLUSTER_LOCKED);
254
255 /*
256 * Lock chains and resolve state.
257 */
258 for (i = 0; i < cluster->nchains; ++i) {
259 chain = cluster->array[i].chain;
260 if (chain == NULL)
261 continue;
262 hammer2_chain_lock(chain, how);
263 }
264 }
265
266 void
hammer2_cluster_unhold(hammer2_cluster_t * cluster)267 hammer2_cluster_unhold(hammer2_cluster_t *cluster)
268 {
269 hammer2_chain_t *chain;
270 int i;
271
272 for (i = 0; i < cluster->nchains; ++i) {
273 chain = cluster->array[i].chain;
274 if (chain == NULL)
275 continue;
276 hammer2_chain_unhold(chain);
277 }
278 }
279
280 void
hammer2_cluster_rehold(hammer2_cluster_t * cluster)281 hammer2_cluster_rehold(hammer2_cluster_t *cluster)
282 {
283 hammer2_chain_t *chain;
284 int i;
285
286 for (i = 0; i < cluster->nchains; ++i) {
287 chain = cluster->array[i].chain;
288 if (chain == NULL)
289 continue;
290 hammer2_chain_rehold(chain);
291 }
292 }
293
294 /*
295 * This is used by the XOPS subsystem to calculate the state of
296 * the collection and tell hammer2_xop_collect() what to do with it.
297 * The collection can be in various states of desynchronization, the
298 * caller specifically wants to resolve the passed-in key.
299 *
300 * Return values (HAMMER2_ERROR_*):
301 *
302 * 0 - Quorum agreement, key is valid
303 *
304 * ENOENT - Quorum agreement, end of scan
305 *
306 * ESRCH - Quorum agreement, key is INVALID (caller should
307 * skip key).
308 *
309 * EIO - Quorum agreement but all elements had errors.
310 *
311 * EDEADLK - No quorum agreement possible for key, a repair
312 * may be needed. Caller has to decide what to do,
313 * possibly iterating the key or generating an EIO.
314 *
315 * EINPROGRESS - No quorum agreement yet, but agreement is still
316 * possible if caller waits for more responses. Caller
317 * should not iterate key.
318 *
319 * CHECK - CRC check error
320 *
321 * NOTE! If the pmp is in HMNT2_LOCAL mode, the cluster check always succeeds.
322 *
323 * XXX needs to handle SOFT_MASTER and SOFT_SLAVE
324 */
325 int
hammer2_cluster_check(hammer2_cluster_t * cluster,hammer2_key_t key,int flags)326 hammer2_cluster_check(hammer2_cluster_t *cluster, hammer2_key_t key, int flags)
327 {
328 hammer2_chain_t *chain;
329 hammer2_chain_t *focus;
330 hammer2_pfs_t *pmp;
331 hammer2_tid_t quorum_tid;
332 hammer2_tid_t last_best_quorum_tid;
333 uint32_t nflags;
334 int ttlmasters;
335 int ttlslaves;
336 int nmasters;
337 int nmasters_keymatch;
338 int nslaves;
339 int nquorum;
340 int umasters; /* unknown masters (still in progress) */
341 int error;
342 int i;
343
344 cluster->error = 0;
345 cluster->focus = NULL;
346
347 pmp = cluster->pmp;
348 KKASSERT(pmp != NULL || cluster->nchains == 0);
349
350 /*
351 * Calculate quorum
352 */
353 nquorum = pmp ? pmp->pfs_nmasters / 2 + 1 : 0;
354 nflags = 0;
355 ttlmasters = 0;
356 ttlslaves = 0;
357
358 /*
359 * Pass 1
360 *
361 * NOTE: A NULL chain is not necessarily an error, it could be
362 * e.g. a lookup failure or the end of an iteration.
363 * Process normally.
364 */
365 for (i = 0; i < cluster->nchains; ++i) {
366 cluster->array[i].flags &= ~HAMMER2_CITEM_FEMOD;
367 cluster->array[i].flags |= HAMMER2_CITEM_INVALID;
368
369 chain = cluster->array[i].chain;
370 error = cluster->array[i].error;
371 if (chain && error) {
372 if (cluster->focus == NULL || cluster->focus == chain) {
373 /* error will be overridden by valid focus */
374 /* XXX */
375 }
376
377 /*
378 * Must count total masters and slaves whether the
379 * chain is errored or not.
380 */
381 switch (cluster->pmp->pfs_types[i]) {
382 case HAMMER2_PFSTYPE_SUPROOT:
383 case HAMMER2_PFSTYPE_MASTER:
384 ++ttlmasters;
385 break;
386 case HAMMER2_PFSTYPE_SLAVE:
387 ++ttlslaves;
388 break;
389 }
390 continue;
391 }
392 switch (cluster->pmp->pfs_types[i]) {
393 case HAMMER2_PFSTYPE_MASTER:
394 ++ttlmasters;
395 break;
396 case HAMMER2_PFSTYPE_SLAVE:
397 ++ttlslaves;
398 break;
399 case HAMMER2_PFSTYPE_SOFT_MASTER:
400 nflags |= HAMMER2_CLUSTER_WRSOFT;
401 nflags |= HAMMER2_CLUSTER_RDSOFT;
402 break;
403 case HAMMER2_PFSTYPE_SOFT_SLAVE:
404 nflags |= HAMMER2_CLUSTER_RDSOFT;
405 break;
406 case HAMMER2_PFSTYPE_SUPROOT:
407 /*
408 * Degenerate cluster representing the super-root
409 * topology on a single device. Fake stuff so
410 * cluster ops work as expected.
411 */
412 ++ttlmasters;
413 nflags |= HAMMER2_CLUSTER_WRHARD;
414 nflags |= HAMMER2_CLUSTER_RDHARD;
415 cluster->focus_index = i;
416 cluster->focus = chain;
417 cluster->error = error;
418 break;
419 default:
420 break;
421 }
422 }
423
424 /*
425 * Pass 2
426 *
427 * Resolve nmasters - master nodes fully match
428 *
429 * Resolve umasters - master nodes operation still
430 * in progress
431 *
432 * Resolve nmasters_keymatch - master nodes match the passed-in
433 * key and may or may not match
434 * the quorum-agreed tid.
435 *
436 * The quorum-agreed TID is the highest matching TID.
437 */
438 last_best_quorum_tid = HAMMER2_TID_MAX;
439 umasters = 0;
440 nmasters = 0;
441 nmasters_keymatch = 0;
442 quorum_tid = 0; /* fix gcc warning */
443
444 while (nmasters < nquorum && last_best_quorum_tid != 0) {
445 umasters = 0;
446 nmasters = 0;
447 nmasters_keymatch = 0;
448 quorum_tid = 0;
449
450 for (i = 0; i < cluster->nchains; ++i) {
451 /* XXX SOFT smpresent handling */
452 switch(cluster->pmp->pfs_types[i]) {
453 case HAMMER2_PFSTYPE_MASTER:
454 case HAMMER2_PFSTYPE_SUPROOT:
455 break;
456 default:
457 continue;
458 }
459
460 chain = cluster->array[i].chain;
461 error = cluster->array[i].error;
462
463 /*
464 * Skip elements still in progress. umasters keeps
465 * track of masters that might still be in-progress.
466 */
467 if (chain == NULL && (cluster->array[i].flags &
468 HAMMER2_CITEM_NULL) == 0) {
469 ++umasters;
470 continue;
471 }
472
473 /*
474 * Key match?
475 */
476 if (flags & HAMMER2_CHECK_NULL) {
477 if (chain == NULL) {
478 ++nmasters;
479 ++nmasters_keymatch;
480 if (cluster->error == 0)
481 cluster->error = error;
482 }
483 } else if (chain &&
484 (key == (hammer2_key_t)-1 ||
485 chain->bref.key == key)) {
486 ++nmasters_keymatch;
487
488 if (chain->bref.modify_tid <
489 last_best_quorum_tid &&
490 quorum_tid < chain->bref.modify_tid) {
491 /*
492 * Select new TID as master if better
493 * than any found so far in this loop,
494 * as long as it does not reach the
495 * best tid found in the previous loop.
496 */
497 nmasters = 0;
498 quorum_tid = chain->bref.modify_tid;
499 }
500 if (quorum_tid == chain->bref.modify_tid) {
501 /*
502 * TID matches current collection.
503 *
504 * (error handled in next pass)
505 */
506 ++nmasters;
507 if (chain->error == 0) {
508 cluster->focus = chain;
509 cluster->focus_index = i;
510 }
511 }
512 }
513 }
514 if (nmasters >= nquorum)
515 break;
516 last_best_quorum_tid = quorum_tid;
517 }
518
519 /*
520 kprintf("nmasters %d/%d nmaster_keymatch=%d umasters=%d\n",
521 nmasters, nquorum, nmasters_keymatch, umasters);
522 */
523
524 /*
525 * Early return if we do not have enough masters.
526 */
527 if (nmasters < nquorum) {
528 if (nmasters + umasters >= nquorum)
529 return HAMMER2_ERROR_EINPROGRESS;
530 if (nmasters_keymatch < nquorum)
531 return HAMMER2_ERROR_ESRCH;
532 return HAMMER2_ERROR_EDEADLK;
533 }
534
535 /*
536 * Validated end of scan.
537 */
538 if (flags & HAMMER2_CHECK_NULL) {
539 if (cluster->error == 0)
540 cluster->error = HAMMER2_ERROR_ENOENT;
541 return cluster->error;
542 }
543
544 /*
545 * If we have a NULL focus at this point the agreeing quorum all
546 * had chain errors.
547 */
548 if (cluster->focus == NULL)
549 return HAMMER2_ERROR_EIO;
550
551 /*
552 * Pass 3
553 *
554 * We have quorum agreement, validate elements, not end of scan.
555 */
556 nslaves = 0;
557 cluster->error = 0;
558
559 for (i = 0; i < cluster->nchains; ++i) {
560 chain = cluster->array[i].chain;
561 error = cluster->array[i].error;
562 if (chain == NULL ||
563 chain->bref.key != key ||
564 chain->bref.modify_tid != quorum_tid) {
565 continue;
566 }
567
568 /*
569 * Quorum Match
570 *
571 * XXX for now, cumulative error.
572 */
573 if (cluster->error == 0)
574 cluster->error = error;
575
576 switch (cluster->pmp->pfs_types[i]) {
577 case HAMMER2_PFSTYPE_MASTER:
578 cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
579 cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
580 nflags |= HAMMER2_CLUSTER_WRHARD;
581 nflags |= HAMMER2_CLUSTER_RDHARD;
582 break;
583 case HAMMER2_PFSTYPE_SLAVE:
584 /*
585 * We must have enough up-to-date masters to reach
586 * a quorum and the slave modify_tid must match the
587 * quorum's modify_tid.
588 *
589 * Do not select an errored slave.
590 */
591 cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
592 nflags |= HAMMER2_CLUSTER_RDHARD;
593 ++nslaves;
594 break;
595 case HAMMER2_PFSTYPE_SOFT_MASTER:
596 /*
597 * Directly mounted soft master always wins. There
598 * should be only one.
599 */
600 cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
601 cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
602 break;
603 case HAMMER2_PFSTYPE_SOFT_SLAVE:
604 /*
605 * Directly mounted soft slave always wins. There
606 * should be only one.
607 *
608 * XXX
609 */
610 cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
611 break;
612 case HAMMER2_PFSTYPE_SUPROOT:
613 /*
614 * spmp (degenerate case)
615 */
616 cluster->array[i].flags |= HAMMER2_CITEM_FEMOD;
617 cluster->array[i].flags &= ~HAMMER2_CITEM_INVALID;
618 nflags |= HAMMER2_CLUSTER_WRHARD;
619 nflags |= HAMMER2_CLUSTER_RDHARD;
620 break;
621 default:
622 break;
623 }
624 }
625
626 /*
627 * Focus now set, adjust ddflag. Skip this pass if the focus
628 * is bad or if we are at the PFS root (the bref won't match at
629 * the PFS root, obviously).
630 *
631 * focus is probably not locked and it isn't safe to test its
632 * content (e.g. focus->data, focus->dio, other content). We
633 * do not synchronize the dio to the cpu here. In fact, in numerous
634 * situations the frontend doesn't even need to access its dio/data,
635 * so synchronizing it here would be wasteful.
636 */
637 focus = cluster->focus;
638 if (focus) {
639 cluster->ddflag =
640 (cluster->focus->bref.type == HAMMER2_BREF_TYPE_INODE);
641 } else {
642 cluster->ddflag = 0;
643 goto skip4;
644 }
645 if (cluster->focus->flags & HAMMER2_CHAIN_PFSBOUNDARY)
646 goto skip4;
647
648 /*
649 * Pass 4
650 *
651 * Validate the elements that were not marked invalid. They should
652 * match.
653 */
654 for (i = 0; i < cluster->nchains; ++i) {
655 int ddflag;
656
657 chain = cluster->array[i].chain;
658
659 if (chain == NULL)
660 continue;
661 if (chain == focus)
662 continue;
663 if (cluster->array[i].flags & HAMMER2_CITEM_INVALID)
664 continue;
665
666 ddflag = (chain->bref.type == HAMMER2_BREF_TYPE_INODE);
667 if (chain->bref.type != focus->bref.type ||
668 chain->bref.key != focus->bref.key ||
669 chain->bref.keybits != focus->bref.keybits ||
670 chain->bref.modify_tid != focus->bref.modify_tid ||
671 chain->bytes != focus->bytes ||
672 ddflag != cluster->ddflag) {
673 cluster->array[i].flags |= HAMMER2_CITEM_INVALID;
674 if (hammer2_debug & 1)
675 kprintf("cluster_check: matching modify_tid failed "
676 "bref test: idx=%d type=%02x/%02x "
677 "key=%016jx/%d-%016jx/%d "
678 "mod=%016jx/%016jx bytes=%u/%u\n",
679 i,
680 chain->bref.type, focus->bref.type,
681 chain->bref.key, chain->bref.keybits,
682 focus->bref.key, focus->bref.keybits,
683 chain->bref.modify_tid, focus->bref.modify_tid,
684 chain->bytes, focus->bytes);
685 if (hammer2_debug & 0x4000)
686 panic("cluster_check");
687 /* flag issue and force resync? */
688 }
689 }
690 skip4:
691
692 if (ttlslaves == 0)
693 nflags |= HAMMER2_CLUSTER_NOSOFT;
694 if (ttlmasters == 0)
695 nflags |= HAMMER2_CLUSTER_NOHARD;
696
697 /*
698 * Set SSYNCED or MSYNCED for slaves and masters respectively if
699 * all available nodes (even if 0 are available) are fully
700 * synchronized. This is used by the synchronization thread to
701 * determine if there is work it could potentially accomplish.
702 */
703 if (nslaves == ttlslaves)
704 nflags |= HAMMER2_CLUSTER_SSYNCED;
705 if (nmasters == ttlmasters)
706 nflags |= HAMMER2_CLUSTER_MSYNCED;
707
708 /*
709 * Determine if the cluster was successfully locked for the
710 * requested operation and generate an error code. The cluster
711 * will not be locked (or ref'd) if an error is returned.
712 */
713 atomic_set_int(&cluster->flags, nflags);
714 atomic_clear_int(&cluster->flags, HAMMER2_CLUSTER_ZFLAGS & ~nflags);
715
716 return cluster->error;
717 }
718
719 /*
720 * Unlock a cluster. Refcount and focus is maintained.
721 */
722 void
hammer2_cluster_unlock(hammer2_cluster_t * cluster)723 hammer2_cluster_unlock(hammer2_cluster_t *cluster)
724 {
725 hammer2_chain_t *chain;
726 int i;
727
728 if ((cluster->flags & HAMMER2_CLUSTER_LOCKED) == 0) {
729 kprintf("hammer2_cluster_unlock: cluster %p not locked\n",
730 cluster);
731 }
732 KKASSERT(cluster->flags & HAMMER2_CLUSTER_LOCKED);
733 KKASSERT(cluster->refs > 0);
734 atomic_clear_int(&cluster->flags, HAMMER2_CLUSTER_LOCKED);
735
736 for (i = 0; i < cluster->nchains; ++i) {
737 chain = cluster->array[i].chain;
738 if (chain)
739 hammer2_chain_unlock(chain);
740 }
741 }
742