xref: /netbsd-src/sys/kern/vfs_trans.c (revision f3cfa6f6ce31685c6c4a758bc430e69eb99f50a4)
1 /*	$NetBSD: vfs_trans.c,v 1.60 2019/05/13 08:16:56 hannken Exp $	*/
2 
3 /*-
4  * Copyright (c) 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Juergen Hannken-Illjes.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.60 2019/05/13 08:16:56 hannken Exp $");
34 
35 /*
36  * File system transaction operations.
37  */
38 
39 #ifdef _KERNEL_OPT
40 #include "opt_ddb.h"
41 #endif
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/atomic.h>
46 #include <sys/buf.h>
47 #include <sys/kmem.h>
48 #include <sys/mount.h>
49 #include <sys/pserialize.h>
50 #include <sys/vnode.h>
51 #include <sys/fstrans.h>
52 #include <sys/proc.h>
53 
54 #include <miscfs/specfs/specdev.h>
55 
56 enum fstrans_lock_type {
57 	FSTRANS_LAZY,			/* Granted while not suspended */
58 	FSTRANS_SHARED,			/* Granted while not suspending */
59 	FSTRANS_EXCL			/* Internal: exclusive lock */
60 };
61 
62 struct fscow_handler {
63 	LIST_ENTRY(fscow_handler) ch_list;
64 	int (*ch_func)(void *, struct buf *, bool);
65 	void *ch_arg;
66 };
67 struct fstrans_lwp_info {
68 	struct fstrans_lwp_info *fli_succ;
69 	struct lwp *fli_self;
70 	struct mount *fli_mount;
71 	struct fstrans_lwp_info *fli_alias;
72 	struct fstrans_mount_info *fli_mountinfo;
73 	int fli_trans_cnt;
74 	int fli_alias_cnt;
75 	int fli_cow_cnt;
76 	enum fstrans_lock_type fli_lock_type;
77 	LIST_ENTRY(fstrans_lwp_info) fli_list;
78 };
79 struct fstrans_mount_info {
80 	enum fstrans_state fmi_state;
81 	unsigned int fmi_ref_cnt;
82 	bool fmi_gone;
83 	bool fmi_cow_change;
84 	LIST_HEAD(, fscow_handler) fmi_cow_handler;
85 	struct mount *fmi_mount;
86 };
87 
88 static kmutex_t vfs_suspend_lock;	/* Serialize suspensions. */
89 static kmutex_t fstrans_lock;		/* Fstrans big lock. */
90 static kmutex_t fstrans_mount_lock;	/* Fstrans mount big lock. */
91 static kcondvar_t fstrans_state_cv;	/* Fstrans or cow state changed. */
92 static kcondvar_t fstrans_count_cv;	/* Fstrans or cow count changed. */
93 static pserialize_t fstrans_psz;	/* Pserialize state. */
94 static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head;
95 					/* List of all fstrans_lwp_info. */
96 static int fstrans_gone_count;		/* Number of fstrans_mount_info gone. */
97 
98 static void fstrans_mount_dtor(struct fstrans_mount_info *);
99 static void fstrans_clear_lwp_info(void);
100 static inline struct fstrans_lwp_info *
101     fstrans_get_lwp_info(struct mount *, bool);
102 static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *);
103 static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int);
104 static bool grant_lock(const enum fstrans_state, const enum fstrans_lock_type);
105 static bool state_change_done(const struct fstrans_mount_info *);
106 static bool cow_state_change_done(const struct fstrans_mount_info *);
107 static void cow_change_enter(struct fstrans_mount_info *);
108 static void cow_change_done(struct fstrans_mount_info *);
109 
110 extern struct mount *dead_rootmount;
111 
112 #if defined(DIAGNOSTIC)
113 
114 struct fstrans_debug_mount {
115 	struct mount *fdm_mount;
116 	SLIST_ENTRY(fstrans_debug_mount) fdm_list;
117 };
118 
119 static SLIST_HEAD(, fstrans_debug_mount) fstrans_debug_mount_head =
120     SLIST_HEAD_INITIALIZER(fstrans_debug_mount_head);
121 
122 static void
123 fstrans_debug_mount(struct mount *mp)
124 {
125 	struct fstrans_debug_mount *fdm, *new;
126 
127 	KASSERT(mutex_owned(&fstrans_mount_lock));
128 
129 	mutex_exit(&fstrans_mount_lock);
130 	new = kmem_alloc(sizeof(*new), KM_SLEEP);
131 	new->fdm_mount = mp;
132 	mutex_enter(&fstrans_mount_lock);
133 
134 	SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
135 		KASSERT(fdm->fdm_mount != mp);
136 	SLIST_INSERT_HEAD(&fstrans_debug_mount_head, new, fdm_list);
137 }
138 
139 static void
140 fstrans_debug_unmount(struct mount *mp)
141 {
142 	struct fstrans_debug_mount *fdm;
143 
144 	KASSERT(mutex_owned(&fstrans_mount_lock));
145 
146 	SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
147 		if (fdm->fdm_mount == mp)
148 			break;
149 	KASSERT(fdm != NULL);
150 	SLIST_REMOVE(&fstrans_debug_mount_head, fdm,
151 	    fstrans_debug_mount, fdm_list);
152 	kmem_free(fdm, sizeof(*fdm));
153 }
154 
155 static void
156 fstrans_debug_validate_mount(struct mount *mp)
157 {
158 	struct fstrans_debug_mount *fdm;
159 
160 	KASSERT(mutex_owned(&fstrans_mount_lock));
161 
162 	SLIST_FOREACH(fdm, &fstrans_debug_mount_head, fdm_list)
163 		if (fdm->fdm_mount == mp)
164 			break;
165 	KASSERTMSG(fdm != NULL, "mount %p invalid", mp);
166 }
167 
168 #else /* defined(DIAGNOSTIC) */
169 
170 #define fstrans_debug_mount(mp)
171 #define fstrans_debug_unmount(mp)
172 #define fstrans_debug_validate_mount(mp)
173 
174 #endif  /* defined(DIAGNOSTIC) */
175 
176 /*
177  * Initialize.
178  */
179 void
180 fstrans_init(void)
181 {
182 
183 	mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
184 	mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE);
185 	mutex_init(&fstrans_mount_lock, MUTEX_DEFAULT, IPL_NONE);
186 	cv_init(&fstrans_state_cv, "fstchg");
187 	cv_init(&fstrans_count_cv, "fstcnt");
188 	fstrans_psz = pserialize_create();
189 	LIST_INIT(&fstrans_fli_head);
190 }
191 
192 /*
193  * Deallocate lwp state.
194  */
195 void
196 fstrans_lwp_dtor(lwp_t *l)
197 {
198 	struct fstrans_lwp_info *fli, *fli_next;
199 
200 	for (fli = l->l_fstrans; fli; fli = fli_next) {
201 		KASSERT(fli->fli_trans_cnt == 0);
202 		KASSERT(fli->fli_cow_cnt == 0);
203 		KASSERT(fli->fli_self == l);
204 		if (fli->fli_mount != NULL)
205 			fstrans_mount_dtor(fli->fli_mountinfo);
206 		fli_next = fli->fli_succ;
207 		fli->fli_alias_cnt = 0;
208 		fli->fli_mount = NULL;
209 		fli->fli_alias = NULL;
210 		fli->fli_mountinfo = NULL;
211 		membar_sync();
212 		fli->fli_self = NULL;
213 	}
214 
215 	l->l_fstrans = NULL;
216 }
217 
218 /*
219  * Dereference mount state.
220  */
221 static void
222 fstrans_mount_dtor(struct fstrans_mount_info *fmi)
223 {
224 
225 	mutex_enter(&fstrans_mount_lock);
226 
227 	KASSERT(fmi != NULL);
228 	fmi->fmi_ref_cnt -= 1;
229 	if (fmi->fmi_ref_cnt > 0) {
230 		mutex_exit(&fstrans_mount_lock);
231 		return;
232 	}
233 
234 	KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
235 	KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL);
236 
237 	KASSERT(fstrans_gone_count > 0);
238 	fstrans_gone_count -= 1;
239 
240 	mutex_exit(&fstrans_mount_lock);
241 
242 	kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount));
243 	kmem_free(fmi, sizeof(*fmi));
244 }
245 
246 /*
247  * Allocate mount state.
248  */
249 int
250 fstrans_mount(struct mount *mp)
251 {
252 	struct fstrans_mount_info *newfmi;
253 
254 	newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP);
255 	newfmi->fmi_state = FSTRANS_NORMAL;
256 	newfmi->fmi_ref_cnt = 1;
257 	newfmi->fmi_gone = false;
258 	LIST_INIT(&newfmi->fmi_cow_handler);
259 	newfmi->fmi_cow_change = false;
260 	newfmi->fmi_mount = mp;
261 
262 	mutex_enter(&fstrans_mount_lock);
263 	mp->mnt_transinfo = newfmi;
264 	fstrans_debug_mount(mp);
265 	mutex_exit(&fstrans_mount_lock);
266 
267 	return 0;
268 }
269 
270 /*
271  * Deallocate mount state.
272  */
273 void
274 fstrans_unmount(struct mount *mp)
275 {
276 	struct fstrans_mount_info *fmi = mp->mnt_transinfo;
277 
278 	KASSERT(fmi != NULL);
279 
280 	mutex_enter(&fstrans_mount_lock);
281 	fstrans_debug_unmount(mp);
282 	fmi->fmi_gone = true;
283 	mp->mnt_transinfo = NULL;
284 	fstrans_gone_count += 1;
285 	mutex_exit(&fstrans_mount_lock);
286 
287 	fstrans_mount_dtor(fmi);
288 }
289 
290 /*
291  * Clear mount entries whose mount is gone.
292  */
293 static void
294 fstrans_clear_lwp_info(void)
295 {
296 	struct fstrans_lwp_info **p, *fli;
297 
298 	/*
299 	 * Scan our list clearing entries whose mount is gone.
300 	 */
301 	for (p = &curlwp->l_fstrans; *p; ) {
302 		fli = *p;
303 		if (fli->fli_mount != NULL &&
304 		    fli->fli_mountinfo->fmi_gone &&
305 		    fli->fli_trans_cnt == 0 &&
306 		    fli->fli_cow_cnt == 0 &&
307 		    fli->fli_alias_cnt == 0) {
308 			*p = (*p)->fli_succ;
309 			fstrans_mount_dtor(fli->fli_mountinfo);
310 			if (fli->fli_alias) {
311 				KASSERT(fli->fli_alias->fli_alias_cnt > 0);
312 				fli->fli_alias->fli_alias_cnt--;
313 			}
314 			fli->fli_mount = NULL;
315 			fli->fli_alias = NULL;
316 			fli->fli_mountinfo = NULL;
317 			membar_sync();
318 			fli->fli_self = NULL;
319 			p = &curlwp->l_fstrans;
320 		} else {
321 			p = &(*p)->fli_succ;
322 		}
323 	}
324 #ifdef DIAGNOSTIC
325 	for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ)
326 		if (fli->fli_alias != NULL)
327 			KASSERT(fli->fli_alias->fli_self == curlwp);
328 #endif /* DIAGNOSTIC */
329 }
330 
331 /*
332  * Allocate and return per lwp info for this mount.
333  */
334 static struct fstrans_lwp_info *
335 fstrans_alloc_lwp_info(struct mount *mp)
336 {
337 	struct fstrans_lwp_info *fli;
338 	struct fstrans_mount_info *fmi;
339 
340 	for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) {
341 		if (fli->fli_mount == mp)
342 			return fli;
343 	}
344 
345 	/*
346 	 * Try to reuse a cleared entry or allocate a new one.
347 	 */
348 	mutex_enter(&fstrans_lock);
349 	LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
350 		membar_sync();
351 		if (fli->fli_self == NULL) {
352 			KASSERT(fli->fli_mount == NULL);
353 			KASSERT(fli->fli_trans_cnt == 0);
354 			KASSERT(fli->fli_cow_cnt == 0);
355 			KASSERT(fli->fli_alias_cnt == 0);
356 			fli->fli_self = curlwp;
357 			fli->fli_succ = curlwp->l_fstrans;
358 			curlwp->l_fstrans = fli;
359 			break;
360 		}
361 	}
362 	mutex_exit(&fstrans_lock);
363 
364 	if (fli == NULL) {
365 		fli = kmem_alloc(sizeof(*fli), KM_SLEEP);
366 		mutex_enter(&fstrans_lock);
367 		memset(fli, 0, sizeof(*fli));
368 		fli->fli_self = curlwp;
369 		LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list);
370 		mutex_exit(&fstrans_lock);
371 		fli->fli_succ = curlwp->l_fstrans;
372 		curlwp->l_fstrans = fli;
373 	}
374 
375 	/*
376 	 * Attach the entry to the mount if its mnt_transinfo is valid.
377 	 */
378 
379 	mutex_enter(&fstrans_mount_lock);
380 	fstrans_debug_validate_mount(mp);
381 	fmi = mp->mnt_transinfo;
382 	KASSERT(fmi != NULL);
383 	fli->fli_mount = mp;
384 	fli->fli_mountinfo = fmi;
385 	fmi->fmi_ref_cnt += 1;
386 	do {
387 		mp = mp->mnt_lower;
388 	} while (mp && mp->mnt_lower);
389 	mutex_exit(&fstrans_mount_lock);
390 
391 	if (mp) {
392 		fli->fli_alias = fstrans_alloc_lwp_info(mp);
393 		fli->fli_alias->fli_alias_cnt++;
394 		fli = fli->fli_alias;
395 	}
396 
397 	return fli;
398 }
399 
400 /*
401  * Retrieve the per lwp info for this mount allocating if necessary.
402  */
403 static inline struct fstrans_lwp_info *
404 fstrans_get_lwp_info(struct mount *mp, bool do_alloc)
405 {
406 	struct fstrans_lwp_info *fli;
407 
408 	/*
409 	 * Scan our list for a match.
410 	 */
411 	for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) {
412 		if (fli->fli_mount == mp) {
413 			KASSERT((mp->mnt_lower == NULL) ==
414 			    (fli->fli_alias == NULL));
415 			if (fli->fli_alias != NULL)
416 				fli = fli->fli_alias;
417 			break;
418 		}
419 	}
420 
421 	if (do_alloc) {
422 		if (__predict_false(fli == NULL))
423 			fli = fstrans_alloc_lwp_info(mp);
424 		KASSERT(fli != NULL && !fli->fli_mountinfo->fmi_gone);
425 	} else {
426 		KASSERT(fli != NULL);
427 	}
428 
429 	return fli;
430 }
431 
432 /*
433  * Check if this lock type is granted at this state.
434  */
435 static bool
436 grant_lock(const enum fstrans_state state, const enum fstrans_lock_type type)
437 {
438 
439 	if (__predict_true(state == FSTRANS_NORMAL))
440 		return true;
441 	if (type == FSTRANS_EXCL)
442 		return true;
443 	if  (state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY)
444 		return true;
445 
446 	return false;
447 }
448 
449 /*
450  * Start a transaction.  If this thread already has a transaction on this
451  * file system increment the reference counter.
452  */
453 static inline int
454 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
455 {
456 	int s;
457 	struct fstrans_lwp_info *fli;
458 	struct fstrans_mount_info *fmi;
459 
460 #ifndef FSTRANS_DEAD_ENABLED
461 	if (mp == dead_rootmount)
462 		return 0;
463 #endif
464 
465 	ASSERT_SLEEPABLE();
466 
467 	fli = fstrans_get_lwp_info(mp, true);
468 	fmi = fli->fli_mountinfo;
469 
470 	if (fli->fli_trans_cnt > 0) {
471 		KASSERT(lock_type != FSTRANS_EXCL);
472 		fli->fli_trans_cnt += 1;
473 
474 		return 0;
475 	}
476 
477 	s = pserialize_read_enter();
478 	if (__predict_true(grant_lock(fmi->fmi_state, lock_type))) {
479 		fli->fli_trans_cnt = 1;
480 		fli->fli_lock_type = lock_type;
481 		pserialize_read_exit(s);
482 
483 		return 0;
484 	}
485 	pserialize_read_exit(s);
486 
487 	if (! wait)
488 		return EBUSY;
489 
490 	mutex_enter(&fstrans_lock);
491 	while (! grant_lock(fmi->fmi_state, lock_type))
492 		cv_wait(&fstrans_state_cv, &fstrans_lock);
493 	fli->fli_trans_cnt = 1;
494 	fli->fli_lock_type = lock_type;
495 	mutex_exit(&fstrans_lock);
496 
497 	return 0;
498 }
499 
500 void
501 fstrans_start(struct mount *mp)
502 {
503 	int error __diagused;
504 
505 	error = _fstrans_start(mp, FSTRANS_SHARED, 1);
506 	KASSERT(error == 0);
507 }
508 
509 int
510 fstrans_start_nowait(struct mount *mp)
511 {
512 
513 	return _fstrans_start(mp, FSTRANS_SHARED, 0);
514 }
515 
516 void
517 fstrans_start_lazy(struct mount *mp)
518 {
519 	int error __diagused;
520 
521 	error = _fstrans_start(mp, FSTRANS_LAZY, 1);
522 	KASSERT(error == 0);
523 }
524 
525 /*
526  * Finish a transaction.
527  */
528 void
529 fstrans_done(struct mount *mp)
530 {
531 	int s;
532 	struct fstrans_lwp_info *fli;
533 	struct fstrans_mount_info *fmi;
534 
535 #ifndef FSTRANS_DEAD_ENABLED
536 	if (mp == dead_rootmount)
537 		return;
538 #endif
539 
540 	fli = fstrans_get_lwp_info(mp, false);
541 	fmi = fli->fli_mountinfo;
542 	KASSERT(fli->fli_trans_cnt > 0);
543 
544 	if (fli->fli_trans_cnt > 1) {
545 		fli->fli_trans_cnt -= 1;
546 
547 		return;
548 	}
549 
550 	if (__predict_false(fstrans_gone_count > 0))
551 		fstrans_clear_lwp_info();
552 
553 	s = pserialize_read_enter();
554 	if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) {
555 		fli->fli_trans_cnt = 0;
556 		pserialize_read_exit(s);
557 
558 		return;
559 	}
560 	pserialize_read_exit(s);
561 
562 	mutex_enter(&fstrans_lock);
563 	fli->fli_trans_cnt = 0;
564 	cv_signal(&fstrans_count_cv);
565 	mutex_exit(&fstrans_lock);
566 }
567 
568 /*
569  * Check if this thread has an exclusive lock.
570  */
571 int
572 fstrans_is_owner(struct mount *mp)
573 {
574 	struct fstrans_lwp_info *fli;
575 
576 	KASSERT(mp != dead_rootmount);
577 
578 	fli = fstrans_get_lwp_info(mp, true);
579 
580 	if (fli->fli_trans_cnt == 0)
581 		return 0;
582 
583 	return (fli->fli_lock_type == FSTRANS_EXCL);
584 }
585 
586 /*
587  * True, if no thread is in a transaction not granted at the current state.
588  */
589 static bool
590 state_change_done(const struct fstrans_mount_info *fmi)
591 {
592 	struct fstrans_lwp_info *fli;
593 
594 	KASSERT(mutex_owned(&fstrans_lock));
595 
596 	LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
597 		if (fli->fli_mountinfo != fmi)
598 			continue;
599 		if (fli->fli_trans_cnt == 0)
600 			continue;
601 		if (grant_lock(fmi->fmi_state, fli->fli_lock_type))
602 			continue;
603 
604 		return false;
605 	}
606 
607 	return true;
608 }
609 
610 /*
611  * Set new file system state.
612  */
613 int
614 fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
615 {
616 	int error;
617 	enum fstrans_state old_state;
618 	struct fstrans_lwp_info *fli;
619 	struct fstrans_mount_info *fmi;
620 
621 	KASSERT(mp != dead_rootmount);
622 
623 	fli = fstrans_get_lwp_info(mp, true);
624 	fmi = fli->fli_mountinfo;
625 	old_state = fmi->fmi_state;
626 	if (old_state == new_state)
627 		return 0;
628 
629 	mutex_enter(&fstrans_lock);
630 	fmi->fmi_state = new_state;
631 	pserialize_perform(fstrans_psz);
632 
633 	/*
634 	 * All threads see the new state now.
635 	 * Wait for transactions invalid at this state to leave.
636 	 */
637 	error = 0;
638 	while (! state_change_done(fmi)) {
639 		error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock);
640 		if (error) {
641 			new_state = fmi->fmi_state = FSTRANS_NORMAL;
642 			break;
643 		}
644 	}
645 	cv_broadcast(&fstrans_state_cv);
646 	mutex_exit(&fstrans_lock);
647 
648 	if (old_state != new_state) {
649 		if (old_state == FSTRANS_NORMAL)
650 			_fstrans_start(mp, FSTRANS_EXCL, 1);
651 		if (new_state == FSTRANS_NORMAL)
652 			fstrans_done(mp);
653 	}
654 
655 	return error;
656 }
657 
658 /*
659  * Get current file system state.
660  */
661 enum fstrans_state
662 fstrans_getstate(struct mount *mp)
663 {
664 	struct fstrans_lwp_info *fli;
665 	struct fstrans_mount_info *fmi;
666 
667 	KASSERT(mp != dead_rootmount);
668 
669 	fli = fstrans_get_lwp_info(mp, true);
670 	fmi = fli->fli_mountinfo;
671 
672 	return fmi->fmi_state;
673 }
674 
675 /*
676  * Request a filesystem to suspend all operations.
677  */
678 int
679 vfs_suspend(struct mount *mp, int nowait)
680 {
681 	struct fstrans_lwp_info *fli;
682 	int error;
683 
684 	if (mp == dead_rootmount)
685 		return EOPNOTSUPP;
686 
687 	fli = fstrans_get_lwp_info(mp, true);
688 	mp = fli->fli_mount;
689 
690 	if (nowait) {
691 		if (!mutex_tryenter(&vfs_suspend_lock))
692 			return EWOULDBLOCK;
693 	} else
694 		mutex_enter(&vfs_suspend_lock);
695 
696 	if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0)
697 		mutex_exit(&vfs_suspend_lock);
698 
699 	return error;
700 }
701 
702 /*
703  * Request a filesystem to resume all operations.
704  */
705 void
706 vfs_resume(struct mount *mp)
707 {
708 	struct fstrans_lwp_info *fli;
709 
710 	KASSERT(mp != dead_rootmount);
711 
712 	fli = fstrans_get_lwp_info(mp, false);
713 	mp = fli->fli_mount;
714 
715 	VFS_SUSPENDCTL(mp, SUSPEND_RESUME);
716 	mutex_exit(&vfs_suspend_lock);
717 }
718 
719 
720 /*
721  * True, if no thread is running a cow handler.
722  */
723 static bool
724 cow_state_change_done(const struct fstrans_mount_info *fmi)
725 {
726 	struct fstrans_lwp_info *fli;
727 
728 	KASSERT(mutex_owned(&fstrans_lock));
729 	KASSERT(fmi->fmi_cow_change);
730 
731 	LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
732 		if (fli->fli_mount != fmi->fmi_mount)
733 			continue;
734 		if (fli->fli_cow_cnt == 0)
735 			continue;
736 
737 		return false;
738 	}
739 
740 	return true;
741 }
742 
743 /*
744  * Prepare for changing this mounts cow list.
745  * Returns with fstrans_lock locked.
746  */
747 static void
748 cow_change_enter(struct fstrans_mount_info *fmi)
749 {
750 
751 	mutex_enter(&fstrans_lock);
752 
753 	/*
754 	 * Wait for other threads changing the list.
755 	 */
756 	while (fmi->fmi_cow_change)
757 		cv_wait(&fstrans_state_cv, &fstrans_lock);
758 
759 	/*
760 	 * Wait until all threads are aware of a state change.
761 	 */
762 	fmi->fmi_cow_change = true;
763 	pserialize_perform(fstrans_psz);
764 
765 	while (! cow_state_change_done(fmi))
766 		cv_wait(&fstrans_count_cv, &fstrans_lock);
767 }
768 
769 /*
770  * Done changing this mounts cow list.
771  */
772 static void
773 cow_change_done(struct fstrans_mount_info *fmi)
774 {
775 
776 	KASSERT(mutex_owned(&fstrans_lock));
777 
778 	fmi->fmi_cow_change = false;
779 	pserialize_perform(fstrans_psz);
780 
781 	cv_broadcast(&fstrans_state_cv);
782 
783 	mutex_exit(&fstrans_lock);
784 }
785 
786 /*
787  * Add a handler to this mount.
788  */
789 int
790 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
791     void *arg)
792 {
793 	struct fstrans_mount_info *fmi;
794 	struct fscow_handler *newch;
795 
796 	KASSERT(mp != dead_rootmount);
797 
798 	mutex_enter(&fstrans_mount_lock);
799 	fmi = mp->mnt_transinfo;
800 	KASSERT(fmi != NULL);
801 	fmi->fmi_ref_cnt += 1;
802 	mutex_exit(&fstrans_mount_lock);
803 
804 	newch = kmem_alloc(sizeof(*newch), KM_SLEEP);
805 	newch->ch_func = func;
806 	newch->ch_arg = arg;
807 
808 	cow_change_enter(fmi);
809 	LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list);
810 	cow_change_done(fmi);
811 
812 	return 0;
813 }
814 
815 /*
816  * Remove a handler from this mount.
817  */
818 int
819 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool),
820     void *arg)
821 {
822 	struct fstrans_mount_info *fmi;
823 	struct fscow_handler *hp = NULL;
824 
825 	KASSERT(mp != dead_rootmount);
826 
827 	fmi = mp->mnt_transinfo;
828 	KASSERT(fmi != NULL);
829 
830 	cow_change_enter(fmi);
831 	LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
832 		if (hp->ch_func == func && hp->ch_arg == arg)
833 			break;
834 	if (hp != NULL) {
835 		LIST_REMOVE(hp, ch_list);
836 		kmem_free(hp, sizeof(*hp));
837 	}
838 	cow_change_done(fmi);
839 
840 	fstrans_mount_dtor(fmi);
841 
842 	return hp ? 0 : EINVAL;
843 }
844 
845 /*
846  * Check for need to copy block that is about to be written.
847  */
848 int
849 fscow_run(struct buf *bp, bool data_valid)
850 {
851 	int error, s;
852 	struct mount *mp;
853 	struct fstrans_lwp_info *fli;
854 	struct fstrans_mount_info *fmi;
855 	struct fscow_handler *hp;
856 
857 	/*
858 	 * First check if we need run the copy-on-write handler.
859 	 */
860 	if ((bp->b_flags & B_COWDONE))
861 		return 0;
862 	if (bp->b_vp == NULL) {
863 		bp->b_flags |= B_COWDONE;
864 		return 0;
865 	}
866 	if (bp->b_vp->v_type == VBLK)
867 		mp = spec_node_getmountedfs(bp->b_vp);
868 	else
869 		mp = bp->b_vp->v_mount;
870 	if (mp == NULL || mp == dead_rootmount) {
871 		bp->b_flags |= B_COWDONE;
872 		return 0;
873 	}
874 
875 	fli = fstrans_get_lwp_info(mp, true);
876 	fmi = fli->fli_mountinfo;
877 
878 	/*
879 	 * On non-recursed run check if other threads
880 	 * want to change the list.
881 	 */
882 	if (fli->fli_cow_cnt == 0) {
883 		s = pserialize_read_enter();
884 		if (__predict_false(fmi->fmi_cow_change)) {
885 			pserialize_read_exit(s);
886 			mutex_enter(&fstrans_lock);
887 			while (fmi->fmi_cow_change)
888 				cv_wait(&fstrans_state_cv, &fstrans_lock);
889 			fli->fli_cow_cnt = 1;
890 			mutex_exit(&fstrans_lock);
891 		} else {
892 			fli->fli_cow_cnt = 1;
893 			pserialize_read_exit(s);
894 		}
895 	} else
896 		fli->fli_cow_cnt += 1;
897 
898 	/*
899 	 * Run all copy-on-write handlers, stop on error.
900 	 */
901 	error = 0;
902 	LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
903 		if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
904 			break;
905  	if (error == 0)
906  		bp->b_flags |= B_COWDONE;
907 
908 	/*
909 	 * Check if other threads want to change the list.
910 	 */
911 	if (fli->fli_cow_cnt > 1) {
912 		fli->fli_cow_cnt -= 1;
913 	} else {
914 		s = pserialize_read_enter();
915 		if (__predict_false(fmi->fmi_cow_change)) {
916 			pserialize_read_exit(s);
917 			mutex_enter(&fstrans_lock);
918 			fli->fli_cow_cnt = 0;
919 			cv_signal(&fstrans_count_cv);
920 			mutex_exit(&fstrans_lock);
921 		} else {
922 			fli->fli_cow_cnt = 0;
923 			pserialize_read_exit(s);
924 		}
925 	}
926 
927 	return error;
928 }
929 
930 #if defined(DDB)
931 void fstrans_dump(int);
932 
933 static void
934 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
935 {
936 	char prefix[9];
937 	struct fstrans_lwp_info *fli;
938 
939 	snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
940 	LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
941 		if (fli->fli_self != l)
942 			continue;
943 		if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
944 			if (! verbose)
945 				continue;
946 		}
947 		printf("%-8s", prefix);
948 		if (verbose)
949 			printf(" @%p", fli);
950 		if (fli->fli_mount == dead_rootmount)
951 			printf(" <dead>");
952 		else if (fli->fli_mount != NULL)
953 			printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
954 		else
955 			printf(" NULL");
956 		if (fli->fli_alias != NULL) {
957 			struct mount *amp = fli->fli_alias->fli_mount;
958 
959 			printf(" alias");
960 			if (verbose)
961 				printf(" @%p", fli->fli_alias);
962 			if (amp == NULL)
963 				printf(" NULL");
964 			else
965 				printf(" (%s)", amp->mnt_stat.f_mntonname);
966 		}
967 		if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone)
968 			printf(" gone");
969 		if (fli->fli_trans_cnt == 0) {
970 			printf(" -");
971 		} else {
972 			switch (fli->fli_lock_type) {
973 			case FSTRANS_LAZY:
974 				printf(" lazy");
975 				break;
976 			case FSTRANS_SHARED:
977 				printf(" shared");
978 				break;
979 			case FSTRANS_EXCL:
980 				printf(" excl");
981 				break;
982 			default:
983 				printf(" %#x", fli->fli_lock_type);
984 				break;
985 			}
986 		}
987 		printf(" %d cow %d alias %d\n",
988 		    fli->fli_trans_cnt, fli->fli_cow_cnt, fli->fli_alias_cnt);
989 		prefix[0] = '\0';
990 	}
991 }
992 
993 static void
994 fstrans_print_mount(struct mount *mp, int verbose)
995 {
996 	struct fstrans_mount_info *fmi;
997 
998 	fmi = mp->mnt_transinfo;
999 	if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
1000 		return;
1001 
1002 	printf("%-16s ", mp->mnt_stat.f_mntonname);
1003 	if (fmi == NULL) {
1004 		printf("(null)\n");
1005 		return;
1006 	}
1007 	switch (fmi->fmi_state) {
1008 	case FSTRANS_NORMAL:
1009 		printf("state normal\n");
1010 		break;
1011 	case FSTRANS_SUSPENDING:
1012 		printf("state suspending\n");
1013 		break;
1014 	case FSTRANS_SUSPENDED:
1015 		printf("state suspended\n");
1016 		break;
1017 	default:
1018 		printf("state %#x\n", fmi->fmi_state);
1019 		break;
1020 	}
1021 }
1022 
1023 void
1024 fstrans_dump(int full)
1025 {
1026 	const struct proclist_desc *pd;
1027 	struct proc *p;
1028 	struct lwp *l;
1029 	struct mount *mp;
1030 
1031 	printf("Fstrans locks by lwp:\n");
1032 	for (pd = proclists; pd->pd_list != NULL; pd++)
1033 		PROCLIST_FOREACH(p, pd->pd_list)
1034 			LIST_FOREACH(l, &p->p_lwps, l_sibling)
1035 				fstrans_print_lwp(p, l, full == 1);
1036 
1037 	printf("Fstrans state by mount:\n");
1038 	for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp))
1039 		fstrans_print_mount(mp, full == 1);
1040 }
1041 #endif /* defined(DDB) */
1042