xref: /netbsd-src/sys/miscfs/genfs/genfs_vnops.c (revision 5b84b3983f71fd20a534cfa5d1556623a8aaa717)
1 /*	$NetBSD: genfs_vnops.c,v 1.104 2005/07/26 08:06:29 yamt Exp $	*/
2 
3 /*
4  * Copyright (c) 1982, 1986, 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  */
32 
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.104 2005/07/26 08:06:29 yamt Exp $");
35 
36 #if defined(_KERNEL_OPT)
37 #include "opt_nfsserver.h"
38 #endif
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/kernel.h>
44 #include <sys/mount.h>
45 #include <sys/namei.h>
46 #include <sys/vnode.h>
47 #include <sys/fcntl.h>
48 #include <sys/malloc.h>
49 #include <sys/poll.h>
50 #include <sys/mman.h>
51 #include <sys/file.h>
52 
53 #include <miscfs/genfs/genfs.h>
54 #include <miscfs/genfs/genfs_node.h>
55 #include <miscfs/specfs/specdev.h>
56 
57 #include <uvm/uvm.h>
58 #include <uvm/uvm_pager.h>
59 
60 #ifdef NFSSERVER
61 #include <nfs/rpcv2.h>
62 #include <nfs/nfsproto.h>
63 #include <nfs/nfs.h>
64 #include <nfs/nqnfs.h>
65 #include <nfs/nfs_var.h>
66 #endif
67 
68 static __inline void genfs_rel_pages(struct vm_page **, int);
69 static void filt_genfsdetach(struct knote *);
70 static int filt_genfsread(struct knote *, long);
71 static int filt_genfsvnode(struct knote *, long);
72 
73 
74 #define MAX_READ_AHEAD	16 	/* XXXUBC 16 */
75 int genfs_rapages = MAX_READ_AHEAD; /* # of pages in each chunk of readahead */
76 int genfs_racount = 2;		/* # of page chunks to readahead */
77 int genfs_raskip = 2;		/* # of busy page chunks allowed to skip */
78 
79 int
80 genfs_poll(void *v)
81 {
82 	struct vop_poll_args /* {
83 		struct vnode *a_vp;
84 		int a_events;
85 		struct proc *a_p;
86 	} */ *ap = v;
87 
88 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
89 }
90 
91 int
92 genfs_fsync(void *v)
93 {
94 	struct vop_fsync_args /* {
95 		struct vnode *a_vp;
96 		struct ucred *a_cred;
97 		int a_flags;
98 		off_t offlo;
99 		off_t offhi;
100 		struct proc *a_p;
101 	} */ *ap = v;
102 	struct vnode *vp = ap->a_vp, *dvp;
103 	int wait;
104 	int error;
105 
106 	wait = (ap->a_flags & FSYNC_WAIT) != 0;
107 	vflushbuf(vp, wait);
108 	if ((ap->a_flags & FSYNC_DATAONLY) != 0)
109 		error = 0;
110 	else
111 		error = VOP_UPDATE(vp, NULL, NULL, wait ? UPDATE_WAIT : 0);
112 
113 	if (error == 0 && ap->a_flags & FSYNC_CACHE) {
114 		int l = 0;
115 		if (VOP_BMAP(vp, 0, &dvp, NULL, NULL))
116 			error = ENXIO;
117 		else
118 			error = VOP_IOCTL(dvp, DIOCCACHESYNC, &l, FWRITE,
119 					  ap->a_p->p_ucred, ap->a_p);
120 	}
121 
122 	return (error);
123 }
124 
125 int
126 genfs_seek(void *v)
127 {
128 	struct vop_seek_args /* {
129 		struct vnode *a_vp;
130 		off_t a_oldoff;
131 		off_t a_newoff;
132 		struct ucred *a_ucred;
133 	} */ *ap = v;
134 
135 	if (ap->a_newoff < 0)
136 		return (EINVAL);
137 
138 	return (0);
139 }
140 
141 int
142 genfs_abortop(void *v)
143 {
144 	struct vop_abortop_args /* {
145 		struct vnode *a_dvp;
146 		struct componentname *a_cnp;
147 	} */ *ap = v;
148 
149 	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
150 		PNBUF_PUT(ap->a_cnp->cn_pnbuf);
151 	return (0);
152 }
153 
154 int
155 genfs_fcntl(void *v)
156 {
157 	struct vop_fcntl_args /* {
158 		struct vnode *a_vp;
159 		u_int a_command;
160 		caddr_t a_data;
161 		int a_fflag;
162 		struct ucred *a_cred;
163 		struct proc *a_p;
164 	} */ *ap = v;
165 
166 	if (ap->a_command == F_SETFL)
167 		return (0);
168 	else
169 		return (EOPNOTSUPP);
170 }
171 
172 /*ARGSUSED*/
173 int
174 genfs_badop(void *v)
175 {
176 
177 	panic("genfs: bad op");
178 }
179 
180 /*ARGSUSED*/
181 int
182 genfs_nullop(void *v)
183 {
184 
185 	return (0);
186 }
187 
188 /*ARGSUSED*/
189 int
190 genfs_einval(void *v)
191 {
192 
193 	return (EINVAL);
194 }
195 
196 /*
197  * Called when an fs doesn't support a particular vop.
198  * This takes care to vrele, vput, or vunlock passed in vnodes.
199  */
200 int
201 genfs_eopnotsupp(void *v)
202 {
203 	struct vop_generic_args /*
204 		struct vnodeop_desc *a_desc;
205 		/ * other random data follows, presumably * /
206 	} */ *ap = v;
207 	struct vnodeop_desc *desc = ap->a_desc;
208 	struct vnode *vp, *vp_last = NULL;
209 	int flags, i, j, offset;
210 
211 	flags = desc->vdesc_flags;
212 	for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) {
213 		if ((offset = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET)
214 			break;	/* stop at end of list */
215 		if ((j = flags & VDESC_VP0_WILLPUT)) {
216 			vp = *VOPARG_OFFSETTO(struct vnode **, offset, ap);
217 
218 			/* Skip if NULL */
219 			if (!vp)
220 				continue;
221 
222 			switch (j) {
223 			case VDESC_VP0_WILLPUT:
224 				/* Check for dvp == vp cases */
225 				if (vp == vp_last)
226 					vrele(vp);
227 				else {
228 					vput(vp);
229 					vp_last = vp;
230 				}
231 				break;
232 			case VDESC_VP0_WILLUNLOCK:
233 				VOP_UNLOCK(vp, 0);
234 				break;
235 			case VDESC_VP0_WILLRELE:
236 				vrele(vp);
237 				break;
238 			}
239 		}
240 	}
241 
242 	return (EOPNOTSUPP);
243 }
244 
245 /*ARGSUSED*/
246 int
247 genfs_ebadf(void *v)
248 {
249 
250 	return (EBADF);
251 }
252 
253 /* ARGSUSED */
254 int
255 genfs_enoioctl(void *v)
256 {
257 
258 	return (EPASSTHROUGH);
259 }
260 
261 
262 /*
263  * Eliminate all activity associated with the requested vnode
264  * and with all vnodes aliased to the requested vnode.
265  */
266 int
267 genfs_revoke(void *v)
268 {
269 	struct vop_revoke_args /* {
270 		struct vnode *a_vp;
271 		int a_flags;
272 	} */ *ap = v;
273 	struct vnode *vp, *vq;
274 	struct proc *p = curproc;	/* XXX */
275 
276 #ifdef DIAGNOSTIC
277 	if ((ap->a_flags & REVOKEALL) == 0)
278 		panic("genfs_revoke: not revokeall");
279 #endif
280 
281 	vp = ap->a_vp;
282 	simple_lock(&vp->v_interlock);
283 
284 	if (vp->v_flag & VALIASED) {
285 		/*
286 		 * If a vgone (or vclean) is already in progress,
287 		 * wait until it is done and return.
288 		 */
289 		if (vp->v_flag & VXLOCK) {
290 			vp->v_flag |= VXWANT;
291 			ltsleep(vp, PINOD|PNORELOCK, "vop_revokeall", 0,
292 				&vp->v_interlock);
293 			return (0);
294 		}
295 		/*
296 		 * Ensure that vp will not be vgone'd while we
297 		 * are eliminating its aliases.
298 		 */
299 		vp->v_flag |= VXLOCK;
300 		simple_unlock(&vp->v_interlock);
301 		while (vp->v_flag & VALIASED) {
302 			simple_lock(&spechash_slock);
303 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
304 				if (vq->v_rdev != vp->v_rdev ||
305 				    vq->v_type != vp->v_type || vp == vq)
306 					continue;
307 				simple_unlock(&spechash_slock);
308 				vgone(vq);
309 				break;
310 			}
311 			if (vq == NULLVP)
312 				simple_unlock(&spechash_slock);
313 		}
314 		/*
315 		 * Remove the lock so that vgone below will
316 		 * really eliminate the vnode after which time
317 		 * vgone will awaken any sleepers.
318 		 */
319 		simple_lock(&vp->v_interlock);
320 		vp->v_flag &= ~VXLOCK;
321 	}
322 	vgonel(vp, p);
323 	return (0);
324 }
325 
326 /*
327  * Lock the node.
328  */
329 int
330 genfs_lock(void *v)
331 {
332 	struct vop_lock_args /* {
333 		struct vnode *a_vp;
334 		int a_flags;
335 	} */ *ap = v;
336 	struct vnode *vp = ap->a_vp;
337 
338 	return (lockmgr(vp->v_vnlock, ap->a_flags, &vp->v_interlock));
339 }
340 
341 /*
342  * Unlock the node.
343  */
344 int
345 genfs_unlock(void *v)
346 {
347 	struct vop_unlock_args /* {
348 		struct vnode *a_vp;
349 		int a_flags;
350 	} */ *ap = v;
351 	struct vnode *vp = ap->a_vp;
352 
353 	return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE,
354 	    &vp->v_interlock));
355 }
356 
357 /*
358  * Return whether or not the node is locked.
359  */
360 int
361 genfs_islocked(void *v)
362 {
363 	struct vop_islocked_args /* {
364 		struct vnode *a_vp;
365 	} */ *ap = v;
366 	struct vnode *vp = ap->a_vp;
367 
368 	return (lockstatus(vp->v_vnlock));
369 }
370 
371 /*
372  * Stubs to use when there is no locking to be done on the underlying object.
373  */
374 int
375 genfs_nolock(void *v)
376 {
377 	struct vop_lock_args /* {
378 		struct vnode *a_vp;
379 		int a_flags;
380 		struct proc *a_p;
381 	} */ *ap = v;
382 
383 	/*
384 	 * Since we are not using the lock manager, we must clear
385 	 * the interlock here.
386 	 */
387 	if (ap->a_flags & LK_INTERLOCK)
388 		simple_unlock(&ap->a_vp->v_interlock);
389 	return (0);
390 }
391 
392 int
393 genfs_nounlock(void *v)
394 {
395 
396 	return (0);
397 }
398 
399 int
400 genfs_noislocked(void *v)
401 {
402 
403 	return (0);
404 }
405 
406 /*
407  * Local lease check for NFS servers.  Just set up args and let
408  * nqsrv_getlease() do the rest.  If NFSSERVER is not in the kernel,
409  * this is a null operation.
410  */
411 int
412 genfs_lease_check(void *v)
413 {
414 #ifdef NFSSERVER
415 	struct vop_lease_args /* {
416 		struct vnode *a_vp;
417 		struct proc *a_p;
418 		struct ucred *a_cred;
419 		int a_flag;
420 	} */ *ap = v;
421 	u_int32_t duration = 0;
422 	int cache;
423 	u_quad_t frev;
424 
425 	(void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag,
426 	    NQLOCALSLP, ap->a_p, (struct mbuf *)0, &cache, &frev, ap->a_cred);
427 	return (0);
428 #else
429 	return (0);
430 #endif /* NFSSERVER */
431 }
432 
433 int
434 genfs_mmap(void *v)
435 {
436 
437 	return (0);
438 }
439 
440 static __inline void
441 genfs_rel_pages(struct vm_page **pgs, int npages)
442 {
443 	int i;
444 
445 	for (i = 0; i < npages; i++) {
446 		struct vm_page *pg = pgs[i];
447 
448 		if (pg == NULL)
449 			continue;
450 		if (pg->flags & PG_FAKE) {
451 			pg->flags |= PG_RELEASED;
452 		}
453 	}
454 	uvm_lock_pageq();
455 	uvm_page_unbusy(pgs, npages);
456 	uvm_unlock_pageq();
457 }
458 
459 /*
460  * generic VM getpages routine.
461  * Return PG_BUSY pages for the given range,
462  * reading from backing store if necessary.
463  */
464 
465 int
466 genfs_getpages(void *v)
467 {
468 	struct vop_getpages_args /* {
469 		struct vnode *a_vp;
470 		voff_t a_offset;
471 		struct vm_page **a_m;
472 		int *a_count;
473 		int a_centeridx;
474 		vm_prot_t a_access_type;
475 		int a_advice;
476 		int a_flags;
477 	} */ *ap = v;
478 
479 	off_t newsize, diskeof, memeof;
480 	off_t offset, origoffset, startoffset, endoffset, raoffset;
481 	daddr_t lbn, blkno;
482 	int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
483 	int fs_bshift, fs_bsize, dev_bshift;
484 	int flags = ap->a_flags;
485 	size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
486 	vaddr_t kva;
487 	struct buf *bp, *mbp;
488 	struct vnode *vp = ap->a_vp;
489 	struct vnode *devvp;
490 	struct genfs_node *gp = VTOG(vp);
491 	struct uvm_object *uobj = &vp->v_uobj;
492 	struct vm_page *pg, **pgs, *pgs_onstack[MAX_READ_AHEAD];
493 	int pgs_size;
494 	struct ucred *cred = curproc->p_ucred;		/* XXXUBC curlwp */
495 	boolean_t async = (flags & PGO_SYNCIO) == 0;
496 	boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
497 	boolean_t sawhole = FALSE;
498 	boolean_t overwrite = (flags & PGO_OVERWRITE) != 0;
499 	boolean_t blockalloc = write && (flags & PGO_NOBLOCKALLOC) == 0;
500 	UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);
501 
502 	UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d",
503 	    vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count);
504 
505 	/* XXXUBC temp limit */
506 	if (*ap->a_count > MAX_READ_AHEAD) {
507 		panic("genfs_getpages: too many pages");
508 	}
509 
510 	error = 0;
511 	origoffset = ap->a_offset;
512 	orignpages = *ap->a_count;
513 	GOP_SIZE(vp, vp->v_size, &diskeof, GOP_SIZE_READ);
514 	if (flags & PGO_PASTEOF) {
515 		newsize = MAX(vp->v_size,
516 		    origoffset + (orignpages << PAGE_SHIFT));
517 		GOP_SIZE(vp, newsize, &memeof, GOP_SIZE_READ|GOP_SIZE_MEM);
518 	} else {
519 		GOP_SIZE(vp, vp->v_size, &memeof, GOP_SIZE_READ|GOP_SIZE_MEM);
520 	}
521 	KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages);
522 	KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0);
523 	KASSERT(orignpages > 0);
524 
525 	/*
526 	 * Bounds-check the request.
527 	 */
528 
529 	if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) {
530 		if ((flags & PGO_LOCKED) == 0) {
531 			simple_unlock(&uobj->vmobjlock);
532 		}
533 		UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x",
534 		    origoffset, *ap->a_count, memeof,0);
535 		return (EINVAL);
536 	}
537 
538 	/* uobj is locked */
539 
540 	if ((flags & PGO_NOTIMESTAMP) == 0 &&
541 	    (vp->v_type == VREG ||
542 	    (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) {
543 		int updflags = 0;
544 
545 		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
546 			updflags = GOP_UPDATE_ACCESSED;
547 		}
548 		if (write) {
549 			updflags |= GOP_UPDATE_MODIFIED;
550 		}
551 		if (updflags != 0) {
552 			GOP_MARKUPDATE(vp, updflags);
553 		}
554 	}
555 
556 	if (write) {
557 		gp->g_dirtygen++;
558 		if ((vp->v_flag & VONWORKLST) == 0) {
559 			vn_syncer_add_to_worklist(vp, filedelay);
560 		}
561 		if ((vp->v_flag & (VWRITEMAP|VWRITEMAPDIRTY)) == VWRITEMAP) {
562 			vp->v_flag |= VWRITEMAPDIRTY;
563 		}
564 	}
565 
566 	/*
567 	 * For PGO_LOCKED requests, just return whatever's in memory.
568 	 */
569 
570 	if (flags & PGO_LOCKED) {
571 		uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
572 		    UFP_NOWAIT|UFP_NOALLOC| (write ? UFP_NORDONLY : 0));
573 
574 		return (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0);
575 	}
576 
577 	/*
578 	 * find the requested pages and make some simple checks.
579 	 * leave space in the page array for a whole block.
580 	 */
581 
582 	if (vp->v_type == VREG) {
583 		fs_bshift = vp->v_mount->mnt_fs_bshift;
584 		dev_bshift = vp->v_mount->mnt_dev_bshift;
585 	} else {
586 		fs_bshift = DEV_BSHIFT;
587 		dev_bshift = DEV_BSHIFT;
588 	}
589 	fs_bsize = 1 << fs_bshift;
590 
591 	orignpages = MIN(orignpages,
592 	    round_page(memeof - origoffset) >> PAGE_SHIFT);
593 	npages = orignpages;
594 	startoffset = origoffset & ~(fs_bsize - 1);
595 	endoffset = round_page((origoffset + (npages << PAGE_SHIFT) +
596 	    fs_bsize - 1) & ~(fs_bsize - 1));
597 	endoffset = MIN(endoffset, round_page(memeof));
598 	ridx = (origoffset - startoffset) >> PAGE_SHIFT;
599 
600 	pgs_size = sizeof(struct vm_page *) *
601 	    ((endoffset - startoffset) >> PAGE_SHIFT);
602 	if (pgs_size > sizeof(pgs_onstack)) {
603 		pgs = malloc(pgs_size, M_DEVBUF, M_NOWAIT | M_ZERO);
604 		if (pgs == NULL) {
605 			simple_unlock(&uobj->vmobjlock);
606 			return (ENOMEM);
607 		}
608 	} else {
609 		pgs = pgs_onstack;
610 		memset(pgs, 0, pgs_size);
611 	}
612 	UVMHIST_LOG(ubchist, "ridx %d npages %d startoff %ld endoff %ld",
613 	    ridx, npages, startoffset, endoffset);
614 	if (uvn_findpages(uobj, origoffset, &npages, &pgs[ridx],
615 	    async ? UFP_NOWAIT : UFP_ALL) != orignpages) {
616 		KASSERT(async != 0);
617 		genfs_rel_pages(&pgs[ridx], orignpages);
618 		simple_unlock(&uobj->vmobjlock);
619 		if (pgs != pgs_onstack)
620 			free(pgs, M_DEVBUF);
621 		return (EBUSY);
622 	}
623 
624 	/*
625 	 * if the pages are already resident, just return them.
626 	 */
627 
628 	for (i = 0; i < npages; i++) {
629 		struct vm_page *pg1 = pgs[ridx + i];
630 
631 		if ((pg1->flags & PG_FAKE) ||
632 		    (blockalloc && (pg1->flags & PG_RDONLY))) {
633 			break;
634 		}
635 	}
636 	if (i == npages) {
637 		UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
638 		raoffset = origoffset + (orignpages << PAGE_SHIFT);
639 		npages += ridx;
640 		goto raout;
641 	}
642 
643 	/*
644 	 * if PGO_OVERWRITE is set, don't bother reading the pages.
645 	 */
646 
647 	if (flags & PGO_OVERWRITE) {
648 		UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
649 
650 		for (i = 0; i < npages; i++) {
651 			struct vm_page *pg1 = pgs[ridx + i];
652 
653 			pg1->flags &= ~(PG_RDONLY|PG_CLEAN);
654 		}
655 		npages += ridx;
656 		goto out;
657 	}
658 
659 	/*
660 	 * the page wasn't resident and we're not overwriting,
661 	 * so we're going to have to do some i/o.
662 	 * find any additional pages needed to cover the expanded range.
663 	 */
664 
665 	npages = (endoffset - startoffset) >> PAGE_SHIFT;
666 	if (startoffset != origoffset || npages != orignpages) {
667 
668 		/*
669 		 * we need to avoid deadlocks caused by locking
670 		 * additional pages at lower offsets than pages we
671 		 * already have locked.  unlock them all and start over.
672 		 */
673 
674 		genfs_rel_pages(&pgs[ridx], orignpages);
675 		memset(pgs, 0, pgs_size);
676 
677 		UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
678 		    startoffset, endoffset, 0,0);
679 		npgs = npages;
680 		if (uvn_findpages(uobj, startoffset, &npgs, pgs,
681 		    async ? UFP_NOWAIT : UFP_ALL) != npages) {
682 			KASSERT(async != 0);
683 			genfs_rel_pages(pgs, npages);
684 			simple_unlock(&uobj->vmobjlock);
685 			if (pgs != pgs_onstack)
686 				free(pgs, M_DEVBUF);
687 			return (EBUSY);
688 		}
689 	}
690 	simple_unlock(&uobj->vmobjlock);
691 
692 	/*
693 	 * read the desired page(s).
694 	 */
695 
696 	totalbytes = npages << PAGE_SHIFT;
697 	bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0));
698 	tailbytes = totalbytes - bytes;
699 	skipbytes = 0;
700 
701 	kva = uvm_pagermapin(pgs, npages,
702 	    UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
703 
704 	s = splbio();
705 	mbp = pool_get(&bufpool, PR_WAITOK);
706 	splx(s);
707 	BUF_INIT(mbp);
708 	mbp->b_bufsize = totalbytes;
709 	mbp->b_data = (void *)kva;
710 	mbp->b_resid = mbp->b_bcount = bytes;
711 	mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL|B_ASYNC : 0);
712 	mbp->b_iodone = (async ? uvm_aio_biodone : 0);
713 	mbp->b_vp = vp;
714 
715 	/*
716 	 * if EOF is in the middle of the range, zero the part past EOF.
717 	 * if the page including EOF is not PG_FAKE, skip over it since
718 	 * in that case it has valid data that we need to preserve.
719 	 */
720 
721 	if (tailbytes > 0) {
722 		size_t tailstart = bytes;
723 
724 		if ((pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE) == 0) {
725 			tailstart = round_page(tailstart);
726 			tailbytes -= tailstart - bytes;
727 		}
728 		UVMHIST_LOG(ubchist, "tailbytes %p 0x%x 0x%x",
729 		    kva, tailstart, tailbytes,0);
730 		memset((void *)(kva + tailstart), 0, tailbytes);
731 	}
732 
733 	/*
734 	 * now loop over the pages, reading as needed.
735 	 */
736 
737 	if (blockalloc) {
738 		lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL);
739 	} else {
740 		lockmgr(&gp->g_glock, LK_SHARED, NULL);
741 	}
742 
743 	bp = NULL;
744 	for (offset = startoffset;
745 	    bytes > 0;
746 	    offset += iobytes, bytes -= iobytes) {
747 
748 		/*
749 		 * skip pages which don't need to be read.
750 		 */
751 
752 		pidx = (offset - startoffset) >> PAGE_SHIFT;
753 		while ((pgs[pidx]->flags & PG_FAKE) == 0) {
754 			size_t b;
755 
756 			KASSERT((offset & (PAGE_SIZE - 1)) == 0);
757 			if ((pgs[pidx]->flags & PG_RDONLY)) {
758 				sawhole = TRUE;
759 			}
760 			b = MIN(PAGE_SIZE, bytes);
761 			offset += b;
762 			bytes -= b;
763 			skipbytes += b;
764 			pidx++;
765 			UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
766 			    offset, 0,0,0);
767 			if (bytes == 0) {
768 				goto loopdone;
769 			}
770 		}
771 
772 		/*
773 		 * bmap the file to find out the blkno to read from and
774 		 * how much we can read in one i/o.  if bmap returns an error,
775 		 * skip the rest of the top-level i/o.
776 		 */
777 
778 		lbn = offset >> fs_bshift;
779 		error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
780 		if (error) {
781 			UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
782 			    lbn, error,0,0);
783 			skipbytes += bytes;
784 			goto loopdone;
785 		}
786 
787 		/*
788 		 * see how many pages can be read with this i/o.
789 		 * reduce the i/o size if necessary to avoid
790 		 * overwriting pages with valid data.
791 		 */
792 
793 		iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
794 		    bytes);
795 		if (offset + iobytes > round_page(offset)) {
796 			pcount = 1;
797 			while (pidx + pcount < npages &&
798 			    pgs[pidx + pcount]->flags & PG_FAKE) {
799 				pcount++;
800 			}
801 			iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
802 			    (offset - trunc_page(offset)));
803 		}
804 
805 		/*
806 		 * if this block isn't allocated, zero it instead of
807 		 * reading it.  unless we are going to allocate blocks,
808 		 * mark the pages we zeroed PG_RDONLY.
809 		 */
810 
811 		if (blkno < 0) {
812 			int holepages = (round_page(offset + iobytes) -
813 			    trunc_page(offset)) >> PAGE_SHIFT;
814 			UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
815 
816 			sawhole = TRUE;
817 			memset((char *)kva + (offset - startoffset), 0,
818 			    iobytes);
819 			skipbytes += iobytes;
820 
821 			for (i = 0; i < holepages; i++) {
822 				if (write) {
823 					pgs[pidx + i]->flags &= ~PG_CLEAN;
824 				}
825 				if (!blockalloc) {
826 					pgs[pidx + i]->flags |= PG_RDONLY;
827 				}
828 			}
829 			continue;
830 		}
831 
832 		/*
833 		 * allocate a sub-buf for this piece of the i/o
834 		 * (or just use mbp if there's only 1 piece),
835 		 * and start it going.
836 		 */
837 
838 		if (offset == startoffset && iobytes == bytes) {
839 			bp = mbp;
840 		} else {
841 			s = splbio();
842 			bp = pool_get(&bufpool, PR_WAITOK);
843 			splx(s);
844 			BUF_INIT(bp);
845 			bp->b_data = (char *)kva + offset - startoffset;
846 			bp->b_resid = bp->b_bcount = iobytes;
847 			bp->b_flags = B_BUSY|B_READ|B_CALL|B_ASYNC;
848 			bp->b_iodone = uvm_aio_biodone1;
849 			bp->b_vp = vp;
850 			bp->b_proc = NULL;
851 		}
852 		bp->b_lblkno = 0;
853 		bp->b_private = mbp;
854 		if (devvp->v_type == VBLK) {
855 			bp->b_dev = devvp->v_rdev;
856 		}
857 
858 		/* adjust physical blkno for partial blocks */
859 		bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
860 		    dev_bshift);
861 
862 		UVMHIST_LOG(ubchist,
863 		    "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
864 		    bp, offset, iobytes, bp->b_blkno);
865 
866 		if (async)
867 			BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
868 		else
869 			BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
870 		VOP_STRATEGY(bp->b_vp, bp);
871 	}
872 
873 loopdone:
874 	if (skipbytes) {
875 		s = splbio();
876 		if (error) {
877 			mbp->b_flags |= B_ERROR;
878 			mbp->b_error = error;
879 		}
880 		mbp->b_resid -= skipbytes;
881 		if (mbp->b_resid == 0) {
882 			biodone(mbp);
883 		}
884 		splx(s);
885 	}
886 
887 	if (async) {
888 		UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0);
889 		lockmgr(&gp->g_glock, LK_RELEASE, NULL);
890 		if (pgs != pgs_onstack)
891 			free(pgs, M_DEVBUF);
892 		return (0);
893 	}
894 	if (bp != NULL) {
895 		error = biowait(mbp);
896 	}
897 	s = splbio();
898 	pool_put(&bufpool, mbp);
899 	splx(s);
900 	uvm_pagermapout(kva, npages);
901 	raoffset = startoffset + totalbytes;
902 
903 	/*
904 	 * if this we encountered a hole then we have to do a little more work.
905 	 * for read faults, we marked the page PG_RDONLY so that future
906 	 * write accesses to the page will fault again.
907 	 * for write faults, we must make sure that the backing store for
908 	 * the page is completely allocated while the pages are locked.
909 	 */
910 
911 	if (!error && sawhole && blockalloc) {
912 		error = GOP_ALLOC(vp, startoffset, npages << PAGE_SHIFT, 0,
913 		    cred);
914 		UVMHIST_LOG(ubchist, "gop_alloc off 0x%x/0x%x -> %d",
915 		    startoffset, npages << PAGE_SHIFT, error,0);
916 		if (!error) {
917 			for (i = 0; i < npages; i++) {
918 				if (pgs[i] == NULL) {
919 					continue;
920 				}
921 				pgs[i]->flags &= ~(PG_CLEAN|PG_RDONLY);
922 				UVMHIST_LOG(ubchist, "mark dirty pg %p",
923 				    pgs[i],0,0,0);
924 			}
925 		}
926 	}
927 	lockmgr(&gp->g_glock, LK_RELEASE, NULL);
928 	simple_lock(&uobj->vmobjlock);
929 
930 	/*
931 	 * see if we want to start any readahead.
932 	 * XXXUBC for now, just read the next 128k on 64k boundaries.
933 	 * this is pretty nonsensical, but it is 50% faster than reading
934 	 * just the next 64k.
935 	 */
936 
937 raout:
938 	if (!error && !async && !write && ((int)raoffset & 0xffff) == 0 &&
939 	    PAGE_SHIFT <= 16) {
940 		off_t rasize;
941 		int rapages, err, j, skipped;
942 
943 		/* XXXUBC temp limit, from above */
944 		rapages = MIN(MIN(1 << (16 - PAGE_SHIFT), MAX_READ_AHEAD),
945 		    genfs_rapages);
946 		rasize = rapages << PAGE_SHIFT;
947 		for (j = skipped = 0; j < genfs_racount; j++) {
948 
949 			if (raoffset >= memeof)
950 				break;
951 
952 			err = VOP_GETPAGES(vp, raoffset, NULL, &rapages, 0,
953 			    VM_PROT_READ, 0, PGO_NOTIMESTAMP);
954 			simple_lock(&uobj->vmobjlock);
955 			if (err) {
956 				if (err != EBUSY ||
957 				    skipped++ == genfs_raskip)
958 					break;
959 			}
960 			raoffset += rasize;
961 			rapages = rasize >> PAGE_SHIFT;
962 		}
963 	}
964 
965 	/*
966 	 * we're almost done!  release the pages...
967 	 * for errors, we free the pages.
968 	 * otherwise we activate them and mark them as valid and clean.
969 	 * also, unbusy pages that were not actually requested.
970 	 */
971 
972 	if (error) {
973 		for (i = 0; i < npages; i++) {
974 			if (pgs[i] == NULL) {
975 				continue;
976 			}
977 			UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
978 			    pgs[i], pgs[i]->flags, 0,0);
979 			if (pgs[i]->flags & PG_FAKE) {
980 				pgs[i]->flags |= PG_RELEASED;
981 			}
982 		}
983 		uvm_lock_pageq();
984 		uvm_page_unbusy(pgs, npages);
985 		uvm_unlock_pageq();
986 		simple_unlock(&uobj->vmobjlock);
987 		UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
988 		if (pgs != pgs_onstack)
989 			free(pgs, M_DEVBUF);
990 		return (error);
991 	}
992 
993 out:
994 	UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
995 	uvm_lock_pageq();
996 	for (i = 0; i < npages; i++) {
997 		pg = pgs[i];
998 		if (pg == NULL) {
999 			continue;
1000 		}
1001 		UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
1002 		    pg, pg->flags, 0,0);
1003 		if (pg->flags & PG_FAKE && !overwrite) {
1004 			pg->flags &= ~(PG_FAKE);
1005 			pmap_clear_modify(pgs[i]);
1006 		}
1007 		KASSERT(!write || !blockalloc || (pg->flags & PG_RDONLY) == 0);
1008 		if (i < ridx || i >= ridx + orignpages || async) {
1009 			UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
1010 			    pg, pg->offset,0,0);
1011 			if (pg->flags & PG_WANTED) {
1012 				wakeup(pg);
1013 			}
1014 			if (pg->flags & PG_FAKE) {
1015 				KASSERT(overwrite);
1016 				uvm_pagezero(pg);
1017 			}
1018 			if (pg->flags & PG_RELEASED) {
1019 				uvm_pagefree(pg);
1020 				continue;
1021 			}
1022 			uvm_pageactivate(pg);
1023 			pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
1024 			UVM_PAGE_OWN(pg, NULL);
1025 		}
1026 	}
1027 	uvm_unlock_pageq();
1028 	simple_unlock(&uobj->vmobjlock);
1029 	if (ap->a_m != NULL) {
1030 		memcpy(ap->a_m, &pgs[ridx],
1031 		    orignpages * sizeof(struct vm_page *));
1032 	}
1033 	if (pgs != pgs_onstack)
1034 		free(pgs, M_DEVBUF);
1035 	return (0);
1036 }
1037 
1038 /*
1039  * generic VM putpages routine.
1040  * Write the given range of pages to backing store.
1041  *
1042  * => "offhi == 0" means flush all pages at or after "offlo".
1043  * => object should be locked by caller.   we may _unlock_ the object
1044  *	if (and only if) we need to clean a page (PGO_CLEANIT), or
1045  *	if PGO_SYNCIO is set and there are pages busy.
1046  *	we return with the object locked.
1047  * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O).
1048  *	thus, a caller might want to unlock higher level resources
1049  *	(e.g. vm_map) before calling flush.
1050  * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, then we will neither
1051  *	unlock the object nor block.
1052  * => if PGO_ALLPAGES is set, then all pages in the object will be processed.
1053  * => NOTE: we rely on the fact that the object's memq is a TAILQ and
1054  *	that new pages are inserted on the tail end of the list.   thus,
1055  *	we can make a complete pass through the object in one go by starting
1056  *	at the head and working towards the tail (new pages are put in
1057  *	front of us).
1058  * => NOTE: we are allowed to lock the page queues, so the caller
1059  *	must not be holding the page queue lock.
1060  *
1061  * note on "cleaning" object and PG_BUSY pages:
1062  *	this routine is holding the lock on the object.   the only time
1063  *	that it can run into a PG_BUSY page that it does not own is if
1064  *	some other process has started I/O on the page (e.g. either
1065  *	a pagein, or a pageout).    if the PG_BUSY page is being paged
1066  *	in, then it can not be dirty (!PG_CLEAN) because no one has
1067  *	had a chance to modify it yet.    if the PG_BUSY page is being
1068  *	paged out then it means that someone else has already started
1069  *	cleaning the page for us (how nice!).    in this case, if we
1070  *	have syncio specified, then after we make our pass through the
1071  *	object we need to wait for the other PG_BUSY pages to clear
1072  *	off (i.e. we need to do an iosync).   also note that once a
1073  *	page is PG_BUSY it must stay in its object until it is un-busyed.
1074  *
1075  * note on page traversal:
1076  *	we can traverse the pages in an object either by going down the
1077  *	linked list in "uobj->memq", or we can go over the address range
1078  *	by page doing hash table lookups for each address.    depending
1079  *	on how many pages are in the object it may be cheaper to do one
1080  *	or the other.   we set "by_list" to true if we are using memq.
1081  *	if the cost of a hash lookup was equal to the cost of the list
1082  *	traversal we could compare the number of pages in the start->stop
1083  *	range to the total number of pages in the object.   however, it
1084  *	seems that a hash table lookup is more expensive than the linked
1085  *	list traversal, so we multiply the number of pages in the
1086  *	range by an estimate of the relatively higher cost of the hash lookup.
1087  */
1088 
1089 int
1090 genfs_putpages(void *v)
1091 {
1092 	struct vop_putpages_args /* {
1093 		struct vnode *a_vp;
1094 		voff_t a_offlo;
1095 		voff_t a_offhi;
1096 		int a_flags;
1097 	} */ *ap = v;
1098 	struct vnode *vp = ap->a_vp;
1099 	struct uvm_object *uobj = &vp->v_uobj;
1100 	struct simplelock *slock = &uobj->vmobjlock;
1101 	off_t startoff = ap->a_offlo;
1102 	off_t endoff = ap->a_offhi;
1103 	off_t off;
1104 	int flags = ap->a_flags;
1105 	/* Even for strange MAXPHYS, the shift rounds down to a page */
1106 	const int maxpages = MAXPHYS >> PAGE_SHIFT;
1107 	int i, s, error, npages, nback;
1108 	int freeflag;
1109 	struct vm_page *pgs[maxpages], *pg, *nextpg, *tpg, curmp, endmp;
1110 	boolean_t wasclean, by_list, needs_clean, yld;
1111 	boolean_t async = (flags & PGO_SYNCIO) == 0;
1112 	boolean_t pagedaemon = curproc == uvm.pagedaemon_proc;
1113 	struct lwp *l = curlwp ? curlwp : &lwp0;
1114 	struct genfs_node *gp = VTOG(vp);
1115 	int dirtygen;
1116 	boolean_t modified = FALSE;
1117 	boolean_t cleanall;
1118 
1119 	UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
1120 
1121 	KASSERT(flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE));
1122 	KASSERT((startoff & PAGE_MASK) == 0 && (endoff & PAGE_MASK) == 0);
1123 	KASSERT(startoff < endoff || endoff == 0);
1124 
1125 	UVMHIST_LOG(ubchist, "vp %p pages %d off 0x%x len 0x%x",
1126 	    vp, uobj->uo_npages, startoff, endoff - startoff);
1127 
1128 	KASSERT((vp->v_flag & VONWORKLST) != 0 ||
1129 	    (vp->v_flag & VWRITEMAPDIRTY) == 0);
1130 	if (uobj->uo_npages == 0) {
1131 		s = splbio();
1132 		if (vp->v_flag & VONWORKLST) {
1133 			vp->v_flag &= ~VWRITEMAPDIRTY;
1134 			if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1135 				vp->v_flag &= ~VONWORKLST;
1136 				LIST_REMOVE(vp, v_synclist);
1137 			}
1138 		}
1139 		splx(s);
1140 		simple_unlock(slock);
1141 		return (0);
1142 	}
1143 
1144 	/*
1145 	 * the vnode has pages, set up to process the request.
1146 	 */
1147 
1148 	error = 0;
1149 	s = splbio();
1150 	simple_lock(&global_v_numoutput_slock);
1151 	wasclean = (vp->v_numoutput == 0);
1152 	simple_unlock(&global_v_numoutput_slock);
1153 	splx(s);
1154 	off = startoff;
1155 	if (endoff == 0 || flags & PGO_ALLPAGES) {
1156 		endoff = trunc_page(LLONG_MAX);
1157 	}
1158 	by_list = (uobj->uo_npages <=
1159 	    ((endoff - startoff) >> PAGE_SHIFT) * UVM_PAGE_HASH_PENALTY);
1160 
1161 #if !defined(DEBUG)
1162 	/*
1163 	 * if this vnode is known not to have dirty pages,
1164 	 * don't bother to clean it out.
1165 	 */
1166 
1167 	if ((vp->v_flag & VONWORKLST) == 0) {
1168 		if ((flags & (PGO_FREE|PGO_DEACTIVATE)) == 0) {
1169 			goto skip_scan;
1170 		}
1171 		flags &= ~PGO_CLEANIT;
1172 	}
1173 #endif /* !defined(DEBUG) */
1174 
1175 	/*
1176 	 * start the loop.  when scanning by list, hold the last page
1177 	 * in the list before we start.  pages allocated after we start
1178 	 * will be added to the end of the list, so we can stop at the
1179 	 * current last page.
1180 	 */
1181 
1182 	cleanall = (flags & PGO_CLEANIT) != 0 && wasclean &&
1183 	    startoff == 0 && endoff == trunc_page(LLONG_MAX) &&
1184 	    (vp->v_flag & VONWORKLST) != 0;
1185 	dirtygen = gp->g_dirtygen;
1186 	freeflag = pagedaemon ? PG_PAGEOUT : PG_RELEASED;
1187 	curmp.uobject = uobj;
1188 	curmp.offset = (voff_t)-1;
1189 	curmp.flags = PG_BUSY;
1190 	endmp.uobject = uobj;
1191 	endmp.offset = (voff_t)-1;
1192 	endmp.flags = PG_BUSY;
1193 	if (by_list) {
1194 		pg = TAILQ_FIRST(&uobj->memq);
1195 		TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq);
1196 		PHOLD(l);
1197 	} else {
1198 		pg = uvm_pagelookup(uobj, off);
1199 	}
1200 	nextpg = NULL;
1201 	while (by_list || off < endoff) {
1202 
1203 		/*
1204 		 * if the current page is not interesting, move on to the next.
1205 		 */
1206 
1207 		KASSERT(pg == NULL || pg->uobject == uobj);
1208 		KASSERT(pg == NULL ||
1209 		    (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
1210 		    (pg->flags & PG_BUSY) != 0);
1211 		if (by_list) {
1212 			if (pg == &endmp) {
1213 				break;
1214 			}
1215 			if (pg->offset < startoff || pg->offset >= endoff ||
1216 			    pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1217 				if (pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1218 					wasclean = FALSE;
1219 				}
1220 				pg = TAILQ_NEXT(pg, listq);
1221 				continue;
1222 			}
1223 			off = pg->offset;
1224 		} else if (pg == NULL || pg->flags & (PG_RELEASED|PG_PAGEOUT)) {
1225 			if (pg != NULL) {
1226 				wasclean = FALSE;
1227 			}
1228 			off += PAGE_SIZE;
1229 			if (off < endoff) {
1230 				pg = uvm_pagelookup(uobj, off);
1231 			}
1232 			continue;
1233 		}
1234 
1235 		/*
1236 		 * if the current page needs to be cleaned and it's busy,
1237 		 * wait for it to become unbusy.
1238 		 */
1239 
1240 		yld = (l->l_cpu->ci_schedstate.spc_flags &
1241 		    SPCF_SHOULDYIELD) && !pagedaemon;
1242 		if (pg->flags & PG_BUSY || yld) {
1243 			UVMHIST_LOG(ubchist, "busy %p", pg,0,0,0);
1244 			if (flags & PGO_BUSYFAIL && pg->flags & PG_BUSY) {
1245 				UVMHIST_LOG(ubchist, "busyfail %p", pg, 0,0,0);
1246 				error = EDEADLK;
1247 				break;
1248 			}
1249 			KASSERT(!pagedaemon);
1250 			if (by_list) {
1251 				TAILQ_INSERT_BEFORE(pg, &curmp, listq);
1252 				UVMHIST_LOG(ubchist, "curmp next %p",
1253 				    TAILQ_NEXT(&curmp, listq), 0,0,0);
1254 			}
1255 			if (yld) {
1256 				simple_unlock(slock);
1257 				preempt(1);
1258 				simple_lock(slock);
1259 			} else {
1260 				pg->flags |= PG_WANTED;
1261 				UVM_UNLOCK_AND_WAIT(pg, slock, 0, "genput", 0);
1262 				simple_lock(slock);
1263 			}
1264 			if (by_list) {
1265 				UVMHIST_LOG(ubchist, "after next %p",
1266 				    TAILQ_NEXT(&curmp, listq), 0,0,0);
1267 				pg = TAILQ_NEXT(&curmp, listq);
1268 				TAILQ_REMOVE(&uobj->memq, &curmp, listq);
1269 			} else {
1270 				pg = uvm_pagelookup(uobj, off);
1271 			}
1272 			continue;
1273 		}
1274 
1275 		/*
1276 		 * if we're freeing, remove all mappings of the page now.
1277 		 * if we're cleaning, check if the page is needs to be cleaned.
1278 		 */
1279 
1280 		if (flags & PGO_FREE) {
1281 			pmap_page_protect(pg, VM_PROT_NONE);
1282 		} else if (flags & PGO_CLEANIT) {
1283 
1284 			/*
1285 			 * if we still have some hope to pull this vnode off
1286 			 * from the syncer queue, write-protect the page.
1287 			 */
1288 
1289 			if (cleanall && wasclean &&
1290 			    gp->g_dirtygen == dirtygen) {
1291 
1292 				/*
1293 				 * uobj pages get wired only by uvm_fault
1294 				 * where uobj is locked.
1295 				 */
1296 
1297 				if (pg->wire_count == 0) {
1298 					pmap_page_protect(pg,
1299 					    VM_PROT_READ|VM_PROT_EXECUTE);
1300 				} else {
1301 					cleanall = FALSE;
1302 				}
1303 			}
1304 		}
1305 
1306 		if (flags & PGO_CLEANIT) {
1307 			needs_clean = pmap_clear_modify(pg) ||
1308 			    (pg->flags & PG_CLEAN) == 0;
1309 			pg->flags |= PG_CLEAN;
1310 		} else {
1311 			needs_clean = FALSE;
1312 		}
1313 
1314 		/*
1315 		 * if we're cleaning, build a cluster.
1316 		 * the cluster will consist of pages which are currently dirty,
1317 		 * but they will be returned to us marked clean.
1318 		 * if not cleaning, just operate on the one page.
1319 		 */
1320 
1321 		if (needs_clean) {
1322 			KDASSERT((vp->v_flag & VONWORKLST));
1323 			wasclean = FALSE;
1324 			memset(pgs, 0, sizeof(pgs));
1325 			pg->flags |= PG_BUSY;
1326 			UVM_PAGE_OWN(pg, "genfs_putpages");
1327 
1328 			/*
1329 			 * first look backward.
1330 			 */
1331 
1332 			npages = MIN(maxpages >> 1, off >> PAGE_SHIFT);
1333 			nback = npages;
1334 			uvn_findpages(uobj, off - PAGE_SIZE, &nback, &pgs[0],
1335 			    UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY|UFP_BACKWARD);
1336 			if (nback) {
1337 				memmove(&pgs[0], &pgs[npages - nback],
1338 				    nback * sizeof(pgs[0]));
1339 				if (npages - nback < nback)
1340 					memset(&pgs[nback], 0,
1341 					    (npages - nback) * sizeof(pgs[0]));
1342 				else
1343 					memset(&pgs[npages - nback], 0,
1344 					    nback * sizeof(pgs[0]));
1345 			}
1346 
1347 			/*
1348 			 * then plug in our page of interest.
1349 			 */
1350 
1351 			pgs[nback] = pg;
1352 
1353 			/*
1354 			 * then look forward to fill in the remaining space in
1355 			 * the array of pages.
1356 			 */
1357 
1358 			npages = maxpages - nback - 1;
1359 			uvn_findpages(uobj, off + PAGE_SIZE, &npages,
1360 			    &pgs[nback + 1],
1361 			    UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY);
1362 			npages += nback + 1;
1363 		} else {
1364 			pgs[0] = pg;
1365 			npages = 1;
1366 			nback = 0;
1367 		}
1368 
1369 		/*
1370 		 * apply FREE or DEACTIVATE options if requested.
1371 		 */
1372 
1373 		if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1374 			uvm_lock_pageq();
1375 		}
1376 		for (i = 0; i < npages; i++) {
1377 			tpg = pgs[i];
1378 			KASSERT(tpg->uobject == uobj);
1379 			if (by_list && tpg == TAILQ_NEXT(pg, listq))
1380 				pg = tpg;
1381 			if (tpg->offset < startoff || tpg->offset >= endoff)
1382 				continue;
1383 			if (flags & PGO_DEACTIVATE &&
1384 			    (tpg->pqflags & PQ_INACTIVE) == 0 &&
1385 			    tpg->wire_count == 0) {
1386 				(void) pmap_clear_reference(tpg);
1387 				uvm_pagedeactivate(tpg);
1388 			} else if (flags & PGO_FREE) {
1389 				pmap_page_protect(tpg, VM_PROT_NONE);
1390 				if (tpg->flags & PG_BUSY) {
1391 					tpg->flags |= freeflag;
1392 					if (pagedaemon) {
1393 						uvmexp.paging++;
1394 						uvm_pagedequeue(tpg);
1395 					}
1396 				} else {
1397 
1398 					/*
1399 					 * ``page is not busy''
1400 					 * implies that npages is 1
1401 					 * and needs_clean is false.
1402 					 */
1403 
1404 					nextpg = TAILQ_NEXT(tpg, listq);
1405 					uvm_pagefree(tpg);
1406 					if (pagedaemon)
1407 						uvmexp.pdfreed++;
1408 				}
1409 			}
1410 		}
1411 		if (flags & (PGO_DEACTIVATE|PGO_FREE)) {
1412 			uvm_unlock_pageq();
1413 		}
1414 		if (needs_clean) {
1415 			modified = TRUE;
1416 
1417 			/*
1418 			 * start the i/o.  if we're traversing by list,
1419 			 * keep our place in the list with a marker page.
1420 			 */
1421 
1422 			if (by_list) {
1423 				TAILQ_INSERT_AFTER(&uobj->memq, pg, &curmp,
1424 				    listq);
1425 			}
1426 			simple_unlock(slock);
1427 			error = GOP_WRITE(vp, pgs, npages, flags);
1428 			simple_lock(slock);
1429 			if (by_list) {
1430 				pg = TAILQ_NEXT(&curmp, listq);
1431 				TAILQ_REMOVE(&uobj->memq, &curmp, listq);
1432 			}
1433 			if (error) {
1434 				break;
1435 			}
1436 			if (by_list) {
1437 				continue;
1438 			}
1439 		}
1440 
1441 		/*
1442 		 * find the next page and continue if there was no error.
1443 		 */
1444 
1445 		if (by_list) {
1446 			if (nextpg) {
1447 				pg = nextpg;
1448 				nextpg = NULL;
1449 			} else {
1450 				pg = TAILQ_NEXT(pg, listq);
1451 			}
1452 		} else {
1453 			off += (npages - nback) << PAGE_SHIFT;
1454 			if (off < endoff) {
1455 				pg = uvm_pagelookup(uobj, off);
1456 			}
1457 		}
1458 	}
1459 	if (by_list) {
1460 		TAILQ_REMOVE(&uobj->memq, &endmp, listq);
1461 		PRELE(l);
1462 	}
1463 
1464 	if (modified && (vp->v_flag & VWRITEMAPDIRTY) != 0 &&
1465 	    (vp->v_type == VREG ||
1466 	    (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) {
1467 		GOP_MARKUPDATE(vp, GOP_UPDATE_MODIFIED);
1468 	}
1469 
1470 	/*
1471 	 * if we're cleaning and there was nothing to clean,
1472 	 * take us off the syncer list.  if we started any i/o
1473 	 * and we're doing sync i/o, wait for all writes to finish.
1474 	 */
1475 
1476 	s = splbio();
1477 	if (cleanall && wasclean && gp->g_dirtygen == dirtygen &&
1478 	    (vp->v_flag & VONWORKLST) != 0) {
1479 		vp->v_flag &= ~VWRITEMAPDIRTY;
1480 		if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
1481 			vp->v_flag &= ~VONWORKLST;
1482 			LIST_REMOVE(vp, v_synclist);
1483 		}
1484 	}
1485 	splx(s);
1486 
1487 #if !defined(DEBUG)
1488 skip_scan:
1489 #endif /* !defined(DEBUG) */
1490 	if (!wasclean && !async) {
1491 		s = splbio();
1492 		/*
1493 		 * XXX - we want simple_unlock(&global_v_numoutput_slock);
1494 		 *	 but the slot in ltsleep() is taken!
1495 		 * XXX - try to recover from missed wakeups with a timeout..
1496 		 *	 must think of something better.
1497 		 */
1498 		while (vp->v_numoutput != 0) {
1499 			vp->v_flag |= VBWAIT;
1500 			UVM_UNLOCK_AND_WAIT(&vp->v_numoutput, slock, FALSE,
1501 			    "genput2", hz);
1502 			simple_lock(slock);
1503 		}
1504 		splx(s);
1505 	}
1506 	simple_unlock(&uobj->vmobjlock);
1507 	return (error);
1508 }
1509 
1510 int
1511 genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
1512 {
1513 	int s, error, run;
1514 	int fs_bshift, dev_bshift;
1515 	vaddr_t kva;
1516 	off_t eof, offset, startoffset;
1517 	size_t bytes, iobytes, skipbytes;
1518 	daddr_t lbn, blkno;
1519 	struct vm_page *pg;
1520 	struct buf *mbp, *bp;
1521 	struct vnode *devvp;
1522 	boolean_t async = (flags & PGO_SYNCIO) == 0;
1523 	UVMHIST_FUNC("genfs_gop_write"); UVMHIST_CALLED(ubchist);
1524 
1525 	UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x",
1526 	    vp, pgs, npages, flags);
1527 
1528 	GOP_SIZE(vp, vp->v_size, &eof, GOP_SIZE_WRITE);
1529 	if (vp->v_type == VREG) {
1530 		fs_bshift = vp->v_mount->mnt_fs_bshift;
1531 		dev_bshift = vp->v_mount->mnt_dev_bshift;
1532 	} else {
1533 		fs_bshift = DEV_BSHIFT;
1534 		dev_bshift = DEV_BSHIFT;
1535 	}
1536 	error = 0;
1537 	pg = pgs[0];
1538 	startoffset = pg->offset;
1539 	bytes = MIN(npages << PAGE_SHIFT, eof - startoffset);
1540 	skipbytes = 0;
1541 	KASSERT(bytes != 0);
1542 
1543 	kva = uvm_pagermapin(pgs, npages,
1544 	    UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
1545 
1546 	s = splbio();
1547 	simple_lock(&global_v_numoutput_slock);
1548 	vp->v_numoutput += 2;
1549 	simple_unlock(&global_v_numoutput_slock);
1550 	mbp = pool_get(&bufpool, PR_WAITOK);
1551 	BUF_INIT(mbp);
1552 	UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
1553 	    vp, mbp, vp->v_numoutput, bytes);
1554 	splx(s);
1555 	mbp->b_bufsize = npages << PAGE_SHIFT;
1556 	mbp->b_data = (void *)kva;
1557 	mbp->b_resid = mbp->b_bcount = bytes;
1558 	mbp->b_flags = B_BUSY|B_WRITE|B_AGE| (async ? (B_CALL|B_ASYNC) : 0);
1559 	mbp->b_iodone = uvm_aio_biodone;
1560 	mbp->b_vp = vp;
1561 
1562 	bp = NULL;
1563 	for (offset = startoffset;
1564 	    bytes > 0;
1565 	    offset += iobytes, bytes -= iobytes) {
1566 		lbn = offset >> fs_bshift;
1567 		error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
1568 		if (error) {
1569 			UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0);
1570 			skipbytes += bytes;
1571 			bytes = 0;
1572 			break;
1573 		}
1574 
1575 		iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
1576 		    bytes);
1577 		if (blkno == (daddr_t)-1) {
1578 			skipbytes += iobytes;
1579 			continue;
1580 		}
1581 
1582 		/* if it's really one i/o, don't make a second buf */
1583 		if (offset == startoffset && iobytes == bytes) {
1584 			bp = mbp;
1585 		} else {
1586 			s = splbio();
1587 			V_INCR_NUMOUTPUT(vp);
1588 			bp = pool_get(&bufpool, PR_WAITOK);
1589 			UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
1590 			    vp, bp, vp->v_numoutput, 0);
1591 			splx(s);
1592 			BUF_INIT(bp);
1593 			bp->b_data = (char *)kva +
1594 			    (vaddr_t)(offset - pg->offset);
1595 			bp->b_resid = bp->b_bcount = iobytes;
1596 			bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC;
1597 			bp->b_iodone = uvm_aio_biodone1;
1598 			bp->b_vp = vp;
1599 		}
1600 		bp->b_lblkno = 0;
1601 		bp->b_private = mbp;
1602 		if (devvp->v_type == VBLK) {
1603 			bp->b_dev = devvp->v_rdev;
1604 		}
1605 
1606 		/* adjust physical blkno for partial blocks */
1607 		bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
1608 		    dev_bshift);
1609 		UVMHIST_LOG(ubchist,
1610 		    "vp %p offset 0x%x bcount 0x%x blkno 0x%x",
1611 		    vp, offset, bp->b_bcount, bp->b_blkno);
1612 		if (curproc == uvm.pagedaemon_proc)
1613 			BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
1614 		else if (async)
1615 			BIO_SETPRIO(bp, BPRIO_TIMENONCRITICAL);
1616 		else
1617 			BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
1618 		VOP_STRATEGY(bp->b_vp, bp);
1619 	}
1620 	if (skipbytes) {
1621 		UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
1622 		s = splbio();
1623 		if (error) {
1624 			mbp->b_flags |= B_ERROR;
1625 			mbp->b_error = error;
1626 		}
1627 		mbp->b_resid -= skipbytes;
1628 		if (mbp->b_resid == 0) {
1629 			biodone(mbp);
1630 		}
1631 		splx(s);
1632 	}
1633 	if (async) {
1634 		UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0);
1635 		return (0);
1636 	}
1637 	UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0);
1638 	error = biowait(mbp);
1639 	uvm_aio_aiodone(mbp);
1640 	UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
1641 	return (error);
1642 }
1643 
1644 /*
1645  * VOP_PUTPAGES() for vnodes which never have pages.
1646  */
1647 
1648 int
1649 genfs_null_putpages(void *v)
1650 {
1651 	struct vop_putpages_args /* {
1652 		struct vnode *a_vp;
1653 		voff_t a_offlo;
1654 		voff_t a_offhi;
1655 		int a_flags;
1656 	} */ *ap = v;
1657 	struct vnode *vp = ap->a_vp;
1658 
1659 	KASSERT(vp->v_uobj.uo_npages == 0);
1660 	simple_unlock(&vp->v_interlock);
1661 	return (0);
1662 }
1663 
1664 void
1665 genfs_node_init(struct vnode *vp, const struct genfs_ops *ops)
1666 {
1667 	struct genfs_node *gp = VTOG(vp);
1668 
1669 	lockinit(&gp->g_glock, PINOD, "glock", 0, 0);
1670 	gp->g_op = ops;
1671 }
1672 
1673 void
1674 genfs_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
1675 {
1676 	int bsize;
1677 
1678 	bsize = 1 << vp->v_mount->mnt_fs_bshift;
1679 	*eobp = (size + bsize - 1) & ~(bsize - 1);
1680 }
1681 
1682 int
1683 genfs_compat_getpages(void *v)
1684 {
1685 	struct vop_getpages_args /* {
1686 		struct vnode *a_vp;
1687 		voff_t a_offset;
1688 		struct vm_page **a_m;
1689 		int *a_count;
1690 		int a_centeridx;
1691 		vm_prot_t a_access_type;
1692 		int a_advice;
1693 		int a_flags;
1694 	} */ *ap = v;
1695 
1696 	off_t origoffset;
1697 	struct vnode *vp = ap->a_vp;
1698 	struct uvm_object *uobj = &vp->v_uobj;
1699 	struct vm_page *pg, **pgs;
1700 	vaddr_t kva;
1701 	int i, error, orignpages, npages;
1702 	struct iovec iov;
1703 	struct uio uio;
1704 	struct ucred *cred = curproc->p_ucred;
1705 	boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
1706 
1707 	error = 0;
1708 	origoffset = ap->a_offset;
1709 	orignpages = *ap->a_count;
1710 	pgs = ap->a_m;
1711 
1712 	if (write && (vp->v_flag & VONWORKLST) == 0) {
1713 		vn_syncer_add_to_worklist(vp, filedelay);
1714 	}
1715 	if (ap->a_flags & PGO_LOCKED) {
1716 		uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
1717 		    UFP_NOWAIT|UFP_NOALLOC| (write ? UFP_NORDONLY : 0));
1718 
1719 		return (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0);
1720 	}
1721 	if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= vp->v_size) {
1722 		simple_unlock(&uobj->vmobjlock);
1723 		return (EINVAL);
1724 	}
1725 	npages = orignpages;
1726 	uvn_findpages(uobj, origoffset, &npages, pgs, UFP_ALL);
1727 	simple_unlock(&uobj->vmobjlock);
1728 	kva = uvm_pagermapin(pgs, npages,
1729 	    UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
1730 	for (i = 0; i < npages; i++) {
1731 		pg = pgs[i];
1732 		if ((pg->flags & PG_FAKE) == 0) {
1733 			continue;
1734 		}
1735 		iov.iov_base = (char *)kva + (i << PAGE_SHIFT);
1736 		iov.iov_len = PAGE_SIZE;
1737 		uio.uio_iov = &iov;
1738 		uio.uio_iovcnt = 1;
1739 		uio.uio_offset = origoffset + (i << PAGE_SHIFT);
1740 		uio.uio_segflg = UIO_SYSSPACE;
1741 		uio.uio_rw = UIO_READ;
1742 		uio.uio_resid = PAGE_SIZE;
1743 		uio.uio_procp = NULL;
1744 		/* XXX vn_lock */
1745 		error = VOP_READ(vp, &uio, 0, cred);
1746 		if (error) {
1747 			break;
1748 		}
1749 		if (uio.uio_resid) {
1750 			memset(iov.iov_base, 0, uio.uio_resid);
1751 		}
1752 	}
1753 	uvm_pagermapout(kva, npages);
1754 	simple_lock(&uobj->vmobjlock);
1755 	uvm_lock_pageq();
1756 	for (i = 0; i < npages; i++) {
1757 		pg = pgs[i];
1758 		if (error && (pg->flags & PG_FAKE) != 0) {
1759 			pg->flags |= PG_RELEASED;
1760 		} else {
1761 			pmap_clear_modify(pg);
1762 			uvm_pageactivate(pg);
1763 		}
1764 	}
1765 	if (error) {
1766 		uvm_page_unbusy(pgs, npages);
1767 	}
1768 	uvm_unlock_pageq();
1769 	simple_unlock(&uobj->vmobjlock);
1770 	return (error);
1771 }
1772 
1773 int
1774 genfs_compat_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
1775     int flags)
1776 {
1777 	off_t offset;
1778 	struct iovec iov;
1779 	struct uio uio;
1780 	struct ucred *cred = curproc->p_ucred;
1781 	struct buf *bp;
1782 	vaddr_t kva;
1783 	int s, error;
1784 
1785 	offset = pgs[0]->offset;
1786 	kva = uvm_pagermapin(pgs, npages,
1787 	    UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
1788 
1789 	iov.iov_base = (void *)kva;
1790 	iov.iov_len = npages << PAGE_SHIFT;
1791 	uio.uio_iov = &iov;
1792 	uio.uio_iovcnt = 1;
1793 	uio.uio_offset = offset;
1794 	uio.uio_segflg = UIO_SYSSPACE;
1795 	uio.uio_rw = UIO_WRITE;
1796 	uio.uio_resid = npages << PAGE_SHIFT;
1797 	uio.uio_procp = NULL;
1798 	/* XXX vn_lock */
1799 	error = VOP_WRITE(vp, &uio, 0, cred);
1800 
1801 	s = splbio();
1802 	V_INCR_NUMOUTPUT(vp);
1803 	bp = pool_get(&bufpool, PR_WAITOK);
1804 	splx(s);
1805 
1806 	BUF_INIT(bp);
1807 	bp->b_flags = B_BUSY | B_WRITE | B_AGE;
1808 	bp->b_vp = vp;
1809 	bp->b_lblkno = offset >> vp->v_mount->mnt_fs_bshift;
1810 	bp->b_data = (char *)kva;
1811 	bp->b_bcount = npages << PAGE_SHIFT;
1812 	bp->b_bufsize = npages << PAGE_SHIFT;
1813 	bp->b_resid = 0;
1814 	if (error) {
1815 		bp->b_flags |= B_ERROR;
1816 		bp->b_error = error;
1817 	}
1818 	uvm_aio_aiodone(bp);
1819 	return (error);
1820 }
1821 
1822 static void
1823 filt_genfsdetach(struct knote *kn)
1824 {
1825 	struct vnode *vp = (struct vnode *)kn->kn_hook;
1826 
1827 	/* XXXLUKEM lock the struct? */
1828 	SLIST_REMOVE(&vp->v_klist, kn, knote, kn_selnext);
1829 }
1830 
1831 static int
1832 filt_genfsread(struct knote *kn, long hint)
1833 {
1834 	struct vnode *vp = (struct vnode *)kn->kn_hook;
1835 
1836 	/*
1837 	 * filesystem is gone, so set the EOF flag and schedule
1838 	 * the knote for deletion.
1839 	 */
1840 	if (hint == NOTE_REVOKE) {
1841 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
1842 		return (1);
1843 	}
1844 
1845 	/* XXXLUKEM lock the struct? */
1846 	kn->kn_data = vp->v_size - kn->kn_fp->f_offset;
1847         return (kn->kn_data != 0);
1848 }
1849 
1850 static int
1851 filt_genfsvnode(struct knote *kn, long hint)
1852 {
1853 
1854 	if (kn->kn_sfflags & hint)
1855 		kn->kn_fflags |= hint;
1856 	if (hint == NOTE_REVOKE) {
1857 		kn->kn_flags |= EV_EOF;
1858 		return (1);
1859 	}
1860 	return (kn->kn_fflags != 0);
1861 }
1862 
1863 static const struct filterops genfsread_filtops =
1864 	{ 1, NULL, filt_genfsdetach, filt_genfsread };
1865 static const struct filterops genfsvnode_filtops =
1866 	{ 1, NULL, filt_genfsdetach, filt_genfsvnode };
1867 
1868 int
1869 genfs_kqfilter(void *v)
1870 {
1871 	struct vop_kqfilter_args /* {
1872 		struct vnode	*a_vp;
1873 		struct knote	*a_kn;
1874 	} */ *ap = v;
1875 	struct vnode *vp;
1876 	struct knote *kn;
1877 
1878 	vp = ap->a_vp;
1879 	kn = ap->a_kn;
1880 	switch (kn->kn_filter) {
1881 	case EVFILT_READ:
1882 		kn->kn_fop = &genfsread_filtops;
1883 		break;
1884 	case EVFILT_VNODE:
1885 		kn->kn_fop = &genfsvnode_filtops;
1886 		break;
1887 	default:
1888 		return (1);
1889 	}
1890 
1891 	kn->kn_hook = vp;
1892 
1893 	/* XXXLUKEM lock the struct? */
1894 	SLIST_INSERT_HEAD(&vp->v_klist, kn, kn_selnext);
1895 
1896 	return (0);
1897 }
1898