xref: /netbsd-src/sys/kern/vfs_trans.c (revision 7e30e94394d0994ab9534f68a8f91665045c91ce)
1 /*	$NetBSD: vfs_trans.c,v 1.39 2017/03/06 10:11:21 hannken Exp $	*/
2 
3 /*-
4  * Copyright (c) 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Juergen Hannken-Illjes.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.39 2017/03/06 10:11:21 hannken Exp $");
34 
35 /*
36  * File system transaction operations.
37  */
38 
39 #ifdef _KERNEL_OPT
40 #include "opt_ddb.h"
41 #endif
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/atomic.h>
46 #include <sys/buf.h>
47 #include <sys/kmem.h>
48 #include <sys/mount.h>
49 #include <sys/pserialize.h>
50 #include <sys/vnode.h>
51 #define _FSTRANS_API_PRIVATE
52 #include <sys/fstrans.h>
53 #include <sys/proc.h>
54 
55 #include <miscfs/specfs/specdev.h>
56 
57 struct fscow_handler {
58 	LIST_ENTRY(fscow_handler) ch_list;
59 	int (*ch_func)(void *, struct buf *, bool);
60 	void *ch_arg;
61 };
62 struct fstrans_lwp_info {
63 	struct fstrans_lwp_info *fli_succ;
64 	struct lwp *fli_self;
65 	struct mount *fli_mount;
66 	int fli_trans_cnt;
67 	int fli_cow_cnt;
68 	enum fstrans_lock_type fli_lock_type;
69 	LIST_ENTRY(fstrans_lwp_info) fli_list;
70 };
71 struct fstrans_mount_info {
72 	enum fstrans_state fmi_state;
73 	unsigned int fmi_ref_cnt;
74 	bool fmi_cow_change;
75 	LIST_HEAD(, fscow_handler) fmi_cow_handler;
76 };
77 
78 static specificdata_key_t lwp_data_key;	/* Our specific data key. */
79 static kmutex_t vfs_suspend_lock;	/* Serialize suspensions. */
80 static kmutex_t fstrans_lock;		/* Fstrans big lock. */
81 static kmutex_t fstrans_mount_lock;	/* Fstrans mount big lock. */
82 static kcondvar_t fstrans_state_cv;	/* Fstrans or cow state changed. */
83 static kcondvar_t fstrans_count_cv;	/* Fstrans or cow count changed. */
84 static pserialize_t fstrans_psz;	/* Pserialize state. */
85 static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head;
86 					/* List of all fstrans_lwp_info. */
87 
88 static inline struct mount *fstrans_normalize_mount(struct mount *);
89 static void fstrans_lwp_dtor(void *);
90 static void fstrans_mount_dtor(struct mount *);
91 static struct fstrans_lwp_info *fstrans_get_lwp_info(struct mount *, bool);
92 static bool grant_lock(const enum fstrans_state, const enum fstrans_lock_type);
93 static bool state_change_done(const struct mount *);
94 static bool cow_state_change_done(const struct mount *);
95 static void cow_change_enter(const struct mount *);
96 static void cow_change_done(const struct mount *);
97 
98 /*
99  * Initialize.
100  */
101 void
102 fstrans_init(void)
103 {
104 	int error __diagused;
105 
106 	error = lwp_specific_key_create(&lwp_data_key, fstrans_lwp_dtor);
107 	KASSERT(error == 0);
108 
109 	mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
110 	mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE);
111 	mutex_init(&fstrans_mount_lock, MUTEX_DEFAULT, IPL_NONE);
112 	cv_init(&fstrans_state_cv, "fstchg");
113 	cv_init(&fstrans_count_cv, "fstcnt");
114 	fstrans_psz = pserialize_create();
115 	LIST_INIT(&fstrans_fli_head);
116 }
117 
118 /*
119  * Normalize mount.
120  * Return mount if file system supports fstrans, NULL otherwise.
121  */
122 static inline struct mount *
123 fstrans_normalize_mount(struct mount *mp)
124 {
125 
126 	while (mp && mp->mnt_lower)
127 		mp = mp->mnt_lower;
128 	if (mp == NULL)
129 		return NULL;
130 	if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
131 		return NULL;
132 	return mp;
133 }
134 
135 /*
136  * Deallocate lwp state.
137  */
138 static void
139 fstrans_lwp_dtor(void *arg)
140 {
141 	struct fstrans_lwp_info *fli, *fli_next;
142 
143 	for (fli = arg; fli; fli = fli_next) {
144 		KASSERT(fli->fli_trans_cnt == 0);
145 		KASSERT(fli->fli_cow_cnt == 0);
146 		if (fli->fli_mount != NULL)
147 			fstrans_mount_dtor(fli->fli_mount);
148 		fli_next = fli->fli_succ;
149 		fli->fli_mount = NULL;
150 		membar_sync();
151 		fli->fli_self = NULL;
152 	}
153 }
154 
155 /*
156  * Dereference mount state.
157  */
158 static void
159 fstrans_mount_dtor(struct mount *mp)
160 {
161 	struct fstrans_mount_info *fmi;
162 
163 	mutex_enter(&fstrans_mount_lock);
164 
165 	fmi = mp->mnt_transinfo;
166 	KASSERT(fmi != NULL);
167 	fmi->fmi_ref_cnt -= 1;
168 	if (fmi->fmi_ref_cnt > 0) {
169 		mutex_exit(&fstrans_mount_lock);
170 		return;
171 	}
172 
173 	KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
174 	KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL);
175 
176 	mp->mnt_iflag &= ~IMNT_HAS_TRANS;
177 	mp->mnt_transinfo = NULL;
178 
179 	mutex_exit(&fstrans_mount_lock);
180 
181 	kmem_free(fmi, sizeof(*fmi));
182 	vfs_destroy(mp);
183 }
184 
185 /*
186  * Allocate mount state.
187  */
188 int
189 fstrans_mount(struct mount *mp)
190 {
191 	int error;
192 	struct fstrans_mount_info *newfmi;
193 
194 	error = vfs_busy(mp, NULL);
195 	if (error)
196 		return error;
197 	newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP);
198 	newfmi->fmi_state = FSTRANS_NORMAL;
199 	newfmi->fmi_ref_cnt = 1;
200 	LIST_INIT(&newfmi->fmi_cow_handler);
201 	newfmi->fmi_cow_change = false;
202 
203 	mutex_enter(&fstrans_mount_lock);
204 	mp->mnt_transinfo = newfmi;
205 	mp->mnt_iflag |= IMNT_HAS_TRANS;
206 	mutex_exit(&fstrans_mount_lock);
207 
208 	vfs_unbusy(mp, true, NULL);
209 
210 	return 0;
211 }
212 
213 /*
214  * Deallocate mount state.
215  */
216 void
217 fstrans_unmount(struct mount *mp)
218 {
219 
220 	if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
221 		return;
222 
223 	KASSERT(mp->mnt_transinfo != NULL);
224 
225 	fstrans_mount_dtor(mp);
226 }
227 
228 /*
229  * Retrieve the per lwp info for this mount allocating if necessary.
230  */
231 static struct fstrans_lwp_info *
232 fstrans_get_lwp_info(struct mount *mp, bool do_alloc)
233 {
234 	struct fstrans_lwp_info *fli, *res;
235 	struct fstrans_mount_info *fmi;
236 
237 	/*
238 	 * Scan our list for a match clearing entries whose mount is gone.
239 	 */
240 	res = NULL;
241 	for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
242 		if (fli->fli_mount == mp) {
243 			KASSERT(res == NULL);
244 			res = fli;
245 		} else if (fli->fli_mount != NULL &&
246 		    (fli->fli_mount->mnt_iflag & IMNT_GONE) != 0 &&
247 		    fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
248 			fstrans_mount_dtor(fli->fli_mount);
249 			fli->fli_mount = NULL;
250 		}
251 	}
252 	if (__predict_true(res != NULL))
253 		return res;
254 
255 	if (! do_alloc)
256 		return NULL;
257 
258 	/*
259 	 * Try to reuse a cleared entry or allocate a new one.
260 	 */
261 	for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
262 		if (fli->fli_mount == NULL) {
263 			KASSERT(fli->fli_trans_cnt == 0);
264 			KASSERT(fli->fli_cow_cnt == 0);
265 			break;
266 		}
267 	}
268 	if (fli == NULL) {
269 		mutex_enter(&fstrans_lock);
270 		LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
271 			if (fli->fli_self == NULL) {
272 				KASSERT(fli->fli_mount == NULL);
273 				KASSERT(fli->fli_trans_cnt == 0);
274 				KASSERT(fli->fli_cow_cnt == 0);
275 				fli->fli_self = curlwp;
276 				fli->fli_succ = lwp_getspecific(lwp_data_key);
277 				lwp_setspecific(lwp_data_key, fli);
278 				break;
279 			}
280 		}
281 		mutex_exit(&fstrans_lock);
282 	}
283 	if (fli == NULL) {
284 		fli = kmem_alloc(sizeof(*fli), KM_SLEEP);
285 		mutex_enter(&fstrans_lock);
286 		memset(fli, 0, sizeof(*fli));
287 		fli->fli_self = curlwp;
288 		LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list);
289 		mutex_exit(&fstrans_lock);
290 		fli->fli_succ = lwp_getspecific(lwp_data_key);
291 		lwp_setspecific(lwp_data_key, fli);
292 	}
293 
294 	/*
295 	 * Attach the entry to the mount.
296 	 */
297 	mutex_enter(&fstrans_mount_lock);
298 	fmi = mp->mnt_transinfo;
299 	KASSERT(fmi != NULL);
300 	fli->fli_mount = mp;
301 	fmi->fmi_ref_cnt += 1;
302 	mutex_exit(&fstrans_mount_lock);
303 
304 	return fli;
305 }
306 
307 /*
308  * Check if this lock type is granted at this state.
309  */
310 static bool
311 grant_lock(const enum fstrans_state state, const enum fstrans_lock_type type)
312 {
313 
314 	if (__predict_true(state == FSTRANS_NORMAL))
315 		return true;
316 	if (type == FSTRANS_EXCL)
317 		return true;
318 	if  (state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY)
319 		return true;
320 
321 	return false;
322 }
323 
324 /*
325  * Start a transaction.  If this thread already has a transaction on this
326  * file system increment the reference counter.
327  */
328 int
329 _fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
330 {
331 	int s;
332 	struct fstrans_lwp_info *fli;
333 	struct fstrans_mount_info *fmi;
334 
335 	if ((mp = fstrans_normalize_mount(mp)) == NULL)
336 		return 0;
337 
338 	ASSERT_SLEEPABLE();
339 
340 	if ((fli = fstrans_get_lwp_info(mp, true)) == NULL)
341 		return 0;
342 
343 	if (fli->fli_trans_cnt > 0) {
344 		KASSERT(lock_type != FSTRANS_EXCL);
345 		fli->fli_trans_cnt += 1;
346 
347 		return 0;
348 	}
349 
350 	s = pserialize_read_enter();
351 	fmi = mp->mnt_transinfo;
352 	if (__predict_true(grant_lock(fmi->fmi_state, lock_type))) {
353 		fli->fli_trans_cnt = 1;
354 		fli->fli_lock_type = lock_type;
355 		pserialize_read_exit(s);
356 
357 		return 0;
358 	}
359 	pserialize_read_exit(s);
360 
361 	if (! wait)
362 		return EBUSY;
363 
364 	mutex_enter(&fstrans_lock);
365 	while (! grant_lock(fmi->fmi_state, lock_type))
366 		cv_wait(&fstrans_state_cv, &fstrans_lock);
367 	fli->fli_trans_cnt = 1;
368 	fli->fli_lock_type = lock_type;
369 	mutex_exit(&fstrans_lock);
370 
371 	return 0;
372 }
373 
374 /*
375  * Finish a transaction.
376  */
377 void
378 fstrans_done(struct mount *mp)
379 {
380 	int s;
381 	struct fstrans_lwp_info *fli;
382 	struct fstrans_mount_info *fmi;
383 
384 	if ((mp = fstrans_normalize_mount(mp)) == NULL)
385 		return;
386 	if ((fli = fstrans_get_lwp_info(mp, true)) == NULL)
387 		return;
388 
389 	KASSERT(fli->fli_trans_cnt > 0);
390 
391 	if (fli->fli_trans_cnt > 1) {
392 		fli->fli_trans_cnt -= 1;
393 
394 		return;
395 	}
396 
397 	s = pserialize_read_enter();
398 	fmi = mp->mnt_transinfo;
399 	if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) {
400 		fli->fli_trans_cnt = 0;
401 		pserialize_read_exit(s);
402 
403 		return;
404 	}
405 	pserialize_read_exit(s);
406 
407 	mutex_enter(&fstrans_lock);
408 	fli->fli_trans_cnt = 0;
409 	cv_signal(&fstrans_count_cv);
410 	mutex_exit(&fstrans_lock);
411 }
412 
413 /*
414  * Check if this thread has an exclusive lock.
415  */
416 int
417 fstrans_is_owner(struct mount *mp)
418 {
419 	struct fstrans_lwp_info *fli;
420 
421 	if ((mp = fstrans_normalize_mount(mp)) == NULL)
422 		return 0;
423 	if ((fli = fstrans_get_lwp_info(mp, false)) == NULL)
424 		return 0;
425 
426 	if (fli->fli_trans_cnt == 0)
427 		return 0;
428 
429 	KASSERT(fli->fli_mount == mp);
430 	KASSERT(fli->fli_trans_cnt > 0);
431 
432 	return (fli->fli_lock_type == FSTRANS_EXCL);
433 }
434 
435 /*
436  * True, if no thread is in a transaction not granted at the current state.
437  */
438 static bool
439 state_change_done(const struct mount *mp)
440 {
441 	struct fstrans_lwp_info *fli;
442 	struct fstrans_mount_info *fmi;
443 
444 	KASSERT(mutex_owned(&fstrans_lock));
445 
446 	fmi = mp->mnt_transinfo;
447 	LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
448 		if (fli->fli_mount != mp)
449 			continue;
450 		if (fli->fli_trans_cnt == 0)
451 			continue;
452 		if (grant_lock(fmi->fmi_state, fli->fli_lock_type))
453 			continue;
454 
455 		return false;
456 	}
457 
458 	return true;
459 }
460 
461 /*
462  * Set new file system state.
463  */
464 int
465 fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
466 {
467 	int error;
468 	enum fstrans_state old_state;
469 	struct fstrans_mount_info *fmi;
470 
471 	fmi = mp->mnt_transinfo;
472 	old_state = fmi->fmi_state;
473 	if (old_state == new_state)
474 		return 0;
475 
476 	mutex_enter(&fstrans_lock);
477 	fmi->fmi_state = new_state;
478 	pserialize_perform(fstrans_psz);
479 
480 	/*
481 	 * All threads see the new state now.
482 	 * Wait for transactions invalid at this state to leave.
483 	 */
484 	error = 0;
485 	while (! state_change_done(mp)) {
486 		error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock);
487 		if (error) {
488 			new_state = fmi->fmi_state = FSTRANS_NORMAL;
489 			break;
490 		}
491 	}
492 	cv_broadcast(&fstrans_state_cv);
493 	mutex_exit(&fstrans_lock);
494 
495 	if (old_state != new_state) {
496 		if (old_state == FSTRANS_NORMAL)
497 			fstrans_start(mp, FSTRANS_EXCL);
498 		if (new_state == FSTRANS_NORMAL)
499 			fstrans_done(mp);
500 	}
501 
502 	return error;
503 }
504 
505 /*
506  * Get current file system state.
507  */
508 enum fstrans_state
509 fstrans_getstate(struct mount *mp)
510 {
511 	struct fstrans_mount_info *fmi;
512 
513 	fmi = mp->mnt_transinfo;
514 	KASSERT(fmi != NULL);
515 
516 	return fmi->fmi_state;
517 }
518 
519 /*
520  * Request a filesystem to suspend all operations.
521  */
522 int
523 vfs_suspend(struct mount *mp, int nowait)
524 {
525 	int error;
526 
527 	if ((mp = fstrans_normalize_mount(mp)) == NULL)
528 		return EOPNOTSUPP;
529 	if (nowait) {
530 		if (!mutex_tryenter(&vfs_suspend_lock))
531 			return EWOULDBLOCK;
532 	} else
533 		mutex_enter(&vfs_suspend_lock);
534 
535 	if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0)
536 		mutex_exit(&vfs_suspend_lock);
537 
538 	return error;
539 }
540 
541 /*
542  * Request a filesystem to resume all operations.
543  */
544 void
545 vfs_resume(struct mount *mp)
546 {
547 
548 	mp = fstrans_normalize_mount(mp);
549 	KASSERT(mp != NULL);
550 
551 	VFS_SUSPENDCTL(mp, SUSPEND_RESUME);
552 	mutex_exit(&vfs_suspend_lock);
553 }
554 
555 
556 /*
557  * True, if no thread is running a cow handler.
558  */
559 static bool
560 cow_state_change_done(const struct mount *mp)
561 {
562 	struct fstrans_lwp_info *fli;
563 	struct fstrans_mount_info *fmi __diagused;
564 
565 	fmi = mp->mnt_transinfo;
566 
567 	KASSERT(mutex_owned(&fstrans_lock));
568 	KASSERT(fmi->fmi_cow_change);
569 
570 	LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
571 		if (fli->fli_mount != mp)
572 			continue;
573 		if (fli->fli_cow_cnt == 0)
574 			continue;
575 
576 		return false;
577 	}
578 
579 	return true;
580 }
581 
582 /*
583  * Prepare for changing this mounts cow list.
584  * Returns with fstrans_lock locked.
585  */
586 static void
587 cow_change_enter(const struct mount *mp)
588 {
589 	struct fstrans_mount_info *fmi;
590 
591 	fmi = mp->mnt_transinfo;
592 
593 	mutex_enter(&fstrans_lock);
594 
595 	/*
596 	 * Wait for other threads changing the list.
597 	 */
598 	while (fmi->fmi_cow_change)
599 		cv_wait(&fstrans_state_cv, &fstrans_lock);
600 
601 	/*
602 	 * Wait until all threads are aware of a state change.
603 	 */
604 	fmi->fmi_cow_change = true;
605 	pserialize_perform(fstrans_psz);
606 
607 	while (! cow_state_change_done(mp))
608 		cv_wait(&fstrans_count_cv, &fstrans_lock);
609 }
610 
611 /*
612  * Done changing this mounts cow list.
613  */
614 static void
615 cow_change_done(const struct mount *mp)
616 {
617 	struct fstrans_mount_info *fmi;
618 
619 	KASSERT(mutex_owned(&fstrans_lock));
620 
621 	fmi = mp->mnt_transinfo;
622 
623 	fmi->fmi_cow_change = false;
624 	pserialize_perform(fstrans_psz);
625 
626 	cv_broadcast(&fstrans_state_cv);
627 
628 	mutex_exit(&fstrans_lock);
629 }
630 
631 /*
632  * Add a handler to this mount.
633  */
634 int
635 fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
636     void *arg)
637 {
638 	struct fstrans_mount_info *fmi;
639 	struct fscow_handler *newch;
640 
641 	if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
642 		return EINVAL;
643 
644 	fmi = mp->mnt_transinfo;
645 	KASSERT(fmi != NULL);
646 
647 	newch = kmem_alloc(sizeof(*newch), KM_SLEEP);
648 	newch->ch_func = func;
649 	newch->ch_arg = arg;
650 
651 	cow_change_enter(mp);
652 	LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list);
653 	cow_change_done(mp);
654 
655 	return 0;
656 }
657 
658 /*
659  * Remove a handler from this mount.
660  */
661 int
662 fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool),
663     void *arg)
664 {
665 	struct fstrans_mount_info *fmi;
666 	struct fscow_handler *hp = NULL;
667 
668 	if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
669 		return EINVAL;
670 
671 	fmi = mp->mnt_transinfo;
672 	KASSERT(fmi != NULL);
673 
674 	cow_change_enter(mp);
675 	LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
676 		if (hp->ch_func == func && hp->ch_arg == arg)
677 			break;
678 	if (hp != NULL) {
679 		LIST_REMOVE(hp, ch_list);
680 		kmem_free(hp, sizeof(*hp));
681 	}
682 	cow_change_done(mp);
683 
684 	return hp ? 0 : EINVAL;
685 }
686 
687 /*
688  * Check for need to copy block that is about to be written.
689  */
690 int
691 fscow_run(struct buf *bp, bool data_valid)
692 {
693 	int error, s;
694 	struct mount *mp;
695 	struct fstrans_lwp_info *fli;
696 	struct fstrans_mount_info *fmi;
697 	struct fscow_handler *hp;
698 
699 	/*
700 	 * First check if we need run the copy-on-write handler.
701 	 */
702 	if ((bp->b_flags & B_COWDONE))
703 		return 0;
704 	if (bp->b_vp == NULL) {
705 		bp->b_flags |= B_COWDONE;
706 		return 0;
707 	}
708 	if (bp->b_vp->v_type == VBLK)
709 		mp = spec_node_getmountedfs(bp->b_vp);
710 	else
711 		mp = bp->b_vp->v_mount;
712 	if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0) {
713 		bp->b_flags |= B_COWDONE;
714 		return 0;
715 	}
716 
717 	fli = fstrans_get_lwp_info(mp, true);
718 	fmi = mp->mnt_transinfo;
719 
720 	/*
721 	 * On non-recursed run check if other threads
722 	 * want to change the list.
723 	 */
724 	if (fli->fli_cow_cnt == 0) {
725 		s = pserialize_read_enter();
726 		if (__predict_false(fmi->fmi_cow_change)) {
727 			pserialize_read_exit(s);
728 			mutex_enter(&fstrans_lock);
729 			while (fmi->fmi_cow_change)
730 				cv_wait(&fstrans_state_cv, &fstrans_lock);
731 			fli->fli_cow_cnt = 1;
732 			mutex_exit(&fstrans_lock);
733 		} else {
734 			fli->fli_cow_cnt = 1;
735 			pserialize_read_exit(s);
736 		}
737 	} else
738 		fli->fli_cow_cnt += 1;
739 
740 	/*
741 	 * Run all copy-on-write handlers, stop on error.
742 	 */
743 	error = 0;
744 	LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
745 		if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
746 			break;
747  	if (error == 0)
748  		bp->b_flags |= B_COWDONE;
749 
750 	/*
751 	 * Check if other threads want to change the list.
752 	 */
753 	if (fli->fli_cow_cnt > 1) {
754 		fli->fli_cow_cnt -= 1;
755 	} else {
756 		s = pserialize_read_enter();
757 		if (__predict_false(fmi->fmi_cow_change)) {
758 			pserialize_read_exit(s);
759 			mutex_enter(&fstrans_lock);
760 			fli->fli_cow_cnt = 0;
761 			cv_signal(&fstrans_count_cv);
762 			mutex_exit(&fstrans_lock);
763 		} else {
764 			fli->fli_cow_cnt = 0;
765 			pserialize_read_exit(s);
766 		}
767 	}
768 
769 	return error;
770 }
771 
772 #if defined(DDB)
773 void fstrans_dump(int);
774 
775 static void
776 fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
777 {
778 	char prefix[9];
779 	struct fstrans_lwp_info *fli;
780 
781 	snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
782 	LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
783 		if (fli->fli_self != l)
784 			continue;
785 		if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
786 			if (! verbose)
787 				continue;
788 		}
789 		printf("%-8s", prefix);
790 		if (verbose)
791 			printf(" @%p", fli);
792 		if (fli->fli_mount != NULL)
793 			printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
794 		else
795 			printf(" NULL");
796 		if (fli->fli_trans_cnt == 0) {
797 			printf(" -");
798 		} else {
799 			switch (fli->fli_lock_type) {
800 			case FSTRANS_LAZY:
801 				printf(" lazy");
802 				break;
803 			case FSTRANS_SHARED:
804 				printf(" shared");
805 				break;
806 			case FSTRANS_EXCL:
807 				printf(" excl");
808 				break;
809 			default:
810 				printf(" %#x", fli->fli_lock_type);
811 				break;
812 			}
813 		}
814 		printf(" %d cow %d\n", fli->fli_trans_cnt, fli->fli_cow_cnt);
815 		prefix[0] = '\0';
816 	}
817 }
818 
819 static void
820 fstrans_print_mount(struct mount *mp, int verbose)
821 {
822 	struct fstrans_mount_info *fmi;
823 
824 	fmi = mp->mnt_transinfo;
825 	if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
826 		return;
827 
828 	printf("%-16s ", mp->mnt_stat.f_mntonname);
829 	if (fmi == NULL) {
830 		printf("(null)\n");
831 		return;
832 	}
833 	switch (fmi->fmi_state) {
834 	case FSTRANS_NORMAL:
835 		printf("state normal\n");
836 		break;
837 	case FSTRANS_SUSPENDING:
838 		printf("state suspending\n");
839 		break;
840 	case FSTRANS_SUSPENDED:
841 		printf("state suspended\n");
842 		break;
843 	default:
844 		printf("state %#x\n", fmi->fmi_state);
845 		break;
846 	}
847 }
848 
849 void
850 fstrans_dump(int full)
851 {
852 	const struct proclist_desc *pd;
853 	struct proc *p;
854 	struct lwp *l;
855 	struct mount *mp;
856 
857 	printf("Fstrans locks by lwp:\n");
858 	for (pd = proclists; pd->pd_list != NULL; pd++)
859 		PROCLIST_FOREACH(p, pd->pd_list)
860 			LIST_FOREACH(l, &p->p_lwps, l_sibling)
861 				fstrans_print_lwp(p, l, full == 1);
862 
863 	printf("Fstrans state by mount:\n");
864 	TAILQ_FOREACH(mp, &mountlist, mnt_list)
865 		fstrans_print_mount(mp, full == 1);
866 }
867 #endif /* defined(DDB) */
868