xref: /dflybsd-src/sys/vfs/devfs/devfs_vnops.c (revision cd29885abfb8f68adb0c082e313b891156d66964)
1 /*
2  * Copyright (c) 2009 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Alex Hornung <ahornung@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/time.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/fcntl.h>
40 #include <sys/proc.h>
41 #include <sys/priv.h>
42 #include <sys/signalvar.h>
43 #include <sys/vnode.h>
44 #include <sys/uio.h>
45 #include <sys/mount.h>
46 #include <sys/file.h>
47 #include <sys/fcntl.h>
48 #include <sys/namei.h>
49 #include <sys/dirent.h>
50 #include <sys/malloc.h>
51 #include <sys/stat.h>
52 #include <sys/reg.h>
53 #include <sys/buf2.h>
54 #include <vm/vm_pager.h>
55 #include <vm/vm_zone.h>
56 #include <vm/vm_object.h>
57 #include <sys/filio.h>
58 #include <sys/ttycom.h>
59 #include <sys/sysref2.h>
60 #include <sys/tty.h>
61 #include <vfs/devfs/devfs.h>
62 #include <sys/pioctl.h>
63 
64 #include <machine/limits.h>
65 
66 MALLOC_DECLARE(M_DEVFS);
67 #define DEVFS_BADOP	(void *)devfs_badop
68 
69 static int devfs_badop(struct vop_generic_args *);
70 static int devfs_access(struct vop_access_args *);
71 static int devfs_inactive(struct vop_inactive_args *);
72 static int devfs_reclaim(struct vop_reclaim_args *);
73 static int devfs_readdir(struct vop_readdir_args *);
74 static int devfs_getattr(struct vop_getattr_args *);
75 static int devfs_setattr(struct vop_setattr_args *);
76 static int devfs_readlink(struct vop_readlink_args *);
77 static int devfs_print(struct vop_print_args *);
78 
79 static int devfs_nresolve(struct vop_nresolve_args *);
80 static int devfs_nlookupdotdot(struct vop_nlookupdotdot_args *);
81 static int devfs_nsymlink(struct vop_nsymlink_args *);
82 static int devfs_nremove(struct vop_nremove_args *);
83 
84 static int devfs_spec_open(struct vop_open_args *);
85 static int devfs_spec_close(struct vop_close_args *);
86 static int devfs_spec_fsync(struct vop_fsync_args *);
87 
88 static int devfs_spec_read(struct vop_read_args *);
89 static int devfs_spec_write(struct vop_write_args *);
90 static int devfs_spec_ioctl(struct vop_ioctl_args *);
91 static int devfs_spec_poll(struct vop_poll_args *);
92 static int devfs_spec_kqfilter(struct vop_kqfilter_args *);
93 static int devfs_spec_strategy(struct vop_strategy_args *);
94 static void devfs_spec_strategy_done(struct bio *);
95 static int devfs_spec_freeblks(struct vop_freeblks_args *);
96 static int devfs_spec_bmap(struct vop_bmap_args *);
97 static int devfs_spec_advlock(struct vop_advlock_args *);
98 static void devfs_spec_getpages_iodone(struct bio *);
99 static int devfs_spec_getpages(struct vop_getpages_args *);
100 
101 
102 static int devfs_specf_close(struct file *);
103 static int devfs_specf_read(struct file *, struct uio *, struct ucred *, int);
104 static int devfs_specf_write(struct file *, struct uio *, struct ucred *, int);
105 static int devfs_specf_stat(struct file *, struct stat *, struct ucred *);
106 static int devfs_specf_kqfilter(struct file *, struct knote *);
107 static int devfs_specf_poll(struct file *, int, struct ucred *);
108 static int devfs_specf_ioctl(struct file *, u_long, caddr_t, struct ucred *);
109 
110 
111 static __inline int sequential_heuristic(struct uio *, struct file *);
112 extern struct lock 		devfs_lock;
113 
114 /*
115  * devfs vnode operations for regular files
116  */
117 struct vop_ops devfs_vnode_norm_vops = {
118 	.vop_default =		vop_defaultop,
119 	.vop_access =		devfs_access,
120 	.vop_advlock =		DEVFS_BADOP,
121 	.vop_bmap =			DEVFS_BADOP,
122 	.vop_close =		vop_stdclose,
123 	.vop_getattr =		devfs_getattr,
124 	.vop_inactive =		devfs_inactive,
125 	.vop_ncreate =		DEVFS_BADOP,
126 	.vop_nresolve =		devfs_nresolve,
127 	.vop_nlookupdotdot =	devfs_nlookupdotdot,
128 	.vop_nlink =		DEVFS_BADOP,
129 	.vop_nmkdir =		DEVFS_BADOP,
130 	.vop_nmknod =		DEVFS_BADOP,
131 	.vop_nremove =		devfs_nremove,
132 	.vop_nrename =		DEVFS_BADOP,
133 	.vop_nrmdir =		DEVFS_BADOP,
134 	.vop_nsymlink =		devfs_nsymlink,
135 	.vop_open =			vop_stdopen,
136 	.vop_pathconf =		vop_stdpathconf,
137 	.vop_print =		devfs_print,
138 	.vop_read =			DEVFS_BADOP,
139 	.vop_readdir =		devfs_readdir,
140 	.vop_readlink =		devfs_readlink,
141 	.vop_reclaim =		devfs_reclaim,
142 	.vop_setattr =		devfs_setattr,
143 	.vop_write =		DEVFS_BADOP,
144 	.vop_ioctl =		DEVFS_BADOP
145 };
146 
147 /*
148  * devfs vnode operations for character devices
149  */
150 struct vop_ops devfs_vnode_dev_vops = {
151 	.vop_default =		vop_defaultop,
152 	.vop_access =		devfs_access,
153 	.vop_advlock =		devfs_spec_advlock,
154 	.vop_bmap =			devfs_spec_bmap,
155 	.vop_close =		devfs_spec_close,
156 	.vop_freeblks =		devfs_spec_freeblks,
157 	.vop_fsync =		devfs_spec_fsync,
158 	.vop_getattr =		devfs_getattr,
159 	.vop_getpages =		devfs_spec_getpages,
160 	.vop_inactive =		devfs_inactive,
161 	.vop_open =			devfs_spec_open,
162 	.vop_pathconf =		vop_stdpathconf,
163 	.vop_print =		devfs_print,
164 	.vop_poll =			devfs_spec_poll,
165 	.vop_kqfilter =		devfs_spec_kqfilter,
166 	.vop_read =			devfs_spec_read,
167 	.vop_readdir =		DEVFS_BADOP,
168 	.vop_readlink =		DEVFS_BADOP,
169 	.vop_reclaim =		devfs_reclaim,
170 	.vop_setattr =		devfs_setattr,
171 	.vop_strategy =		devfs_spec_strategy,
172 	.vop_write =		devfs_spec_write,
173 	.vop_ioctl =		devfs_spec_ioctl
174 };
175 
176 struct vop_ops *devfs_vnode_dev_vops_p = &devfs_vnode_dev_vops;
177 
178 struct fileops devfs_dev_fileops = {
179 	.fo_read = devfs_specf_read,
180 	.fo_write = devfs_specf_write,
181 	.fo_ioctl = devfs_specf_ioctl,
182 	.fo_poll = devfs_specf_poll,
183 	.fo_kqfilter = devfs_specf_kqfilter,
184 	.fo_stat = devfs_specf_stat,
185 	.fo_close = devfs_specf_close,
186 	.fo_shutdown = nofo_shutdown
187 };
188 
189 
190 /*
191  * generic entry point for unsupported operations
192  */
193 static int
194 devfs_badop(struct vop_generic_args *ap)
195 {
196 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs: specified vnode operation is not implemented (yet)\n");
197 	return (EIO);
198 }
199 
200 
201 static int
202 devfs_access(struct vop_access_args *ap)
203 {
204 	struct devfs_node *node = DEVFS_NODE(ap->a_vp);
205 	int error = 0;
206 
207 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_access() called!\n");
208 
209 	error = vop_helper_access(ap, node->uid, node->gid,
210 				node->mode, node->flags);
211 
212 	//devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_access ruled over %s: %d\n", "UNKNOWN", error);
213 
214 	return error;
215 	//XXX: consider possible special cases? terminal, ...?
216 }
217 
218 
219 static int
220 devfs_inactive(struct vop_inactive_args *ap)
221 {
222 	//devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_inactive() called!\n");
223 
224 	/* Check if the devfs_node is not linked anymore into the topology.
225 	 * If this is the case, we suggest that the vnode is recycled. */
226 	if (DEVFS_NODE(ap->a_vp)) {
227 		if ((DEVFS_NODE(ap->a_vp)->flags & DEVFS_NODE_LINKED) == 0) {
228 			vrecycle(ap->a_vp);
229 		}
230 	}
231 
232 	return 0;
233 }
234 
235 
236 static int
237 devfs_reclaim(struct vop_reclaim_args *ap)
238 {
239 	int locked = 0;
240 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_reclaim() called!\n");
241 
242 	/* Check if it is locked already. if not, we acquire the devfs lock */
243 	if (!(lockstatus(&devfs_lock, curthread)) == LK_EXCLUSIVE) {
244 		lockmgr(&devfs_lock, LK_EXCLUSIVE);
245 		locked = 1;
246 	}
247 
248 	/* Check if the devfs_node is not linked anymore into the topology.
249 	 * If this is the case, we get rid of the devfs_node. */
250 	if (DEVFS_NODE(ap->a_vp)) {
251 		if ((DEVFS_NODE(ap->a_vp)->flags & DEVFS_NODE_LINKED) == 0) {
252 				devfs_freep(DEVFS_NODE(ap->a_vp));
253 				//devfs_tracer_del_orphan(DEVFS_NODE(ap->a_vp));
254 		}
255 
256 		/* unlink vnode <--> devfs_node */
257 		DEVFS_NODE(ap->a_vp)->v_node = NULL;
258 	}
259 
260 	/* If we acquired the lock, we also get rid of it */
261 	if (locked)
262 		lockmgr(&devfs_lock, LK_RELEASE);
263 
264 	ap->a_vp->v_data = NULL;
265 	/* avoid a panic on release because of not adding it with v_associate_rdev */
266 	ap->a_vp->v_rdev = NULL;
267 
268 	return 0;
269 }
270 
271 
272 static int
273 devfs_readdir(struct vop_readdir_args *ap)
274 {
275 	struct devfs_node *node;
276 	int error2 = 0, r, error = 0;
277 
278 	int cookie_index;
279 	int ncookies;
280 	off_t *cookies;
281 	off_t saveoff;
282 
283 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_readdir() called!\n");
284 
285 	if (ap->a_uio->uio_offset < 0 || ap->a_uio->uio_offset > INT_MAX)
286 		return (EINVAL);
287 	if ((error = vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY)) != 0)
288 		return (error);
289 
290 	saveoff = ap->a_uio->uio_offset;
291 
292 	if (ap->a_ncookies) {
293 		ncookies = ap->a_uio->uio_resid / 16 + 1; /* Why / 16 ?? */
294 		if (ncookies > 256)
295 			ncookies = 256;
296 		cookies = kmalloc(256 * sizeof(off_t), M_TEMP, M_WAITOK);
297 		cookie_index = 0;
298 	} else {
299 		ncookies = -1;
300 		cookies = NULL;
301 		cookie_index = 0;
302 	}
303 
304 	nanotime(&DEVFS_NODE(ap->a_vp)->atime);
305 
306 	if (saveoff == 0) {
307 		r = vop_write_dirent(&error, ap->a_uio, DEVFS_NODE(ap->a_vp)->d_dir.d_ino, DT_DIR, 1, ".");
308 		if (r)
309 			goto done;
310 		if (cookies)
311 			cookies[cookie_index] = saveoff;
312 		saveoff++;
313 		cookie_index++;
314 		if (cookie_index == ncookies)
315 			goto done;
316 	}
317 
318 	if (saveoff == 1) {
319 		if (DEVFS_NODE(ap->a_vp)->parent) {
320 			r = vop_write_dirent(&error, ap->a_uio,
321 					     DEVFS_NODE(ap->a_vp)->d_dir.d_ino,
322 					     DT_DIR, 2, "..");
323 		} else {
324 			r = vop_write_dirent(&error, ap->a_uio,
325 					     DEVFS_NODE(ap->a_vp)->d_dir.d_ino, DT_DIR, 2, "..");
326 		}
327 		if (r)
328 			goto done;
329 		if (cookies)
330 			cookies[cookie_index] = saveoff;
331 		saveoff++;
332 		cookie_index++;
333 		if (cookie_index == ncookies)
334 			goto done;
335 	}
336 
337 	TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(DEVFS_NODE(ap->a_vp)), link) {
338 		if ((node->flags & DEVFS_HIDDEN) || (node->flags & DEVFS_INVISIBLE))
339 			continue;
340 
341 		if (node->cookie < saveoff)
342 			continue;
343 /*
344 		if (skip > 0) {
345 			skip--;
346 			continue;
347 		}
348 */
349 		saveoff = node->cookie;
350 
351 		error2 = vop_write_dirent(&error, ap->a_uio,
352 			node->d_dir.d_ino, node->d_dir.d_type,
353 			node->d_dir.d_namlen, node->d_dir.d_name);
354 
355 		if(error2)
356 			break;
357 
358 		saveoff++;
359 
360 		if (cookies)
361 			cookies[cookie_index] = node->cookie;
362 		++cookie_index;
363 		if (cookie_index == ncookies)
364 			break;
365 
366 		//count++;
367 	}
368 
369 done:
370 	vn_unlock(ap->a_vp);
371 
372 	ap->a_uio->uio_offset = saveoff;
373 	if (error && cookie_index == 0) {
374 		if (cookies) {
375 			kfree(cookies, M_TEMP);
376 			*ap->a_ncookies = 0;
377 			*ap->a_cookies = NULL;
378 		}
379 	} else {
380 		if (cookies) {
381 			*ap->a_ncookies = cookie_index;
382 			*ap->a_cookies = cookies;
383 		}
384 	}
385 	return (error);
386 }
387 
388 
389 static int
390 devfs_nresolve(struct vop_nresolve_args *ap)
391 {
392 	struct devfs_node *node, *found = NULL;
393 	struct namecache *ncp;
394 	struct vnode *vp = NULL;
395 	//void *ident;
396 	int error = 0;
397 	int len;
398 	int hidden = 0;
399 
400 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve() called!\n");
401 
402 	ncp = ap->a_nch->ncp;
403 	len = ncp->nc_nlen;
404 
405 	lockmgr(&devfs_lock, LK_EXCLUSIVE);
406 
407 	if ((DEVFS_NODE(ap->a_dvp)->node_type != Proot) &&
408 		(DEVFS_NODE(ap->a_dvp)->node_type != Pdir)) {
409 		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve: ap->a_dvp is not a dir!!!\n");
410 		cache_setvp(ap->a_nch, NULL);
411 		goto out;
412 	}
413 
414 search:
415 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -search- \n");
416 	TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(DEVFS_NODE(ap->a_dvp)), link) {
417 		if (len == node->d_dir.d_namlen) {
418 			if (!memcmp(ncp->nc_name, node->d_dir.d_name, len)) {
419 				devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve: found: %s\n", ncp->nc_name);
420 				found = node;
421 				break;
422 			}
423 		}
424 	}
425 
426 	if (found) {
427 		if ((found->node_type == Plink) && (found->link_target))
428 			found = found->link_target;
429 
430 		if (!(found->flags & DEVFS_HIDDEN))
431 			devfs_allocv(/*ap->a_dvp->v_mount, */ &vp, found);
432 		else
433 			hidden = 1;
434 		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -2- \n");
435 	}
436 
437 	//devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -3- %c%c%c\n", ncp->nc_name[0], ncp->nc_name[1], ncp->nc_name[2]);
438 	if (vp == NULL) {
439 		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve vp==NULL \n");
440 		/* XXX: len is int, devfs_clone expects size_t*, not int* */
441 		if ((!hidden) && (!devfs_clone(ncp->nc_name, &len, NULL, 0, ap->a_cred))) {
442 			goto search;
443 		}
444 		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -4- \n");
445 		error = ENOENT;
446 		cache_setvp(ap->a_nch, NULL);
447 		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -5- \n");
448 		goto out;
449 
450 	}
451 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -6- \n");
452 	KKASSERT(vp);
453 	vn_unlock(vp);
454 	cache_setvp(ap->a_nch, vp);
455 	vrele(vp);
456 
457 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -9- \n");
458 out:
459 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nresolve -end:10- failed? %s \n", (error)?"FAILED!":"OK!");
460 	lockmgr(&devfs_lock, LK_RELEASE);
461 	return error;
462 }
463 
464 
465 static int
466 devfs_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
467 {
468 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nlookupdotdot() called!\n");
469 	*ap->a_vpp = NULL;
470 
471 	lockmgr(&devfs_lock, LK_EXCLUSIVE);
472 	if (DEVFS_NODE(ap->a_dvp)->parent != NULL) {
473 		devfs_allocv(/*ap->a_dvp->v_mount, */ap->a_vpp, DEVFS_NODE(ap->a_dvp)->parent);
474 		vn_unlock(*ap->a_vpp);
475 	}
476 	lockmgr(&devfs_lock, LK_RELEASE);
477 
478 	return ((*ap->a_vpp == NULL) ? ENOENT : 0);
479 }
480 
481 
482 static int
483 devfs_getattr(struct vop_getattr_args *ap)
484 {
485 	struct vattr *vap = ap->a_vap;
486 	struct devfs_node *node = DEVFS_NODE(ap->a_vp);
487 	int error = 0;
488 
489 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_getattr() called for %s!\n", DEVFS_NODE(ap->a_vp)->d_dir.d_name);
490 
491 	/* start by zeroing out the attributes */
492 	VATTR_NULL(vap);
493 
494 	/* next do all the common fields */
495 	vap->va_type = ap->a_vp->v_type;
496 	vap->va_mode = node->mode;
497 	vap->va_fileid = DEVFS_NODE(ap->a_vp)->d_dir.d_ino ;
498 	vap->va_flags = 0; //what should this be?
499 	vap->va_blocksize = DEV_BSIZE;
500 	vap->va_bytes = vap->va_size = sizeof(struct devfs_node);
501 
502 	//devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_getattr() check dev %s!\n", (DEVFS_NODE(ap->a_vp)->d_dev)?(DEVFS_NODE(ap->a_vp)->d_dev->si_name):"Not a device");
503 
504 	vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
505 
506 
507 	vap->va_atime = node->atime;
508 	vap->va_mtime = node->mtime;
509 	vap->va_ctime = node->ctime;
510 
511 	vap->va_nlink = 1; /* number of references to file */
512 
513 	vap->va_uid = node->uid;
514 	vap->va_gid = node->gid;
515 
516 	vap->va_rmajor = 0;
517 	vap->va_rminor = 0;
518 
519 	if ((DEVFS_NODE(ap->a_vp)->node_type == Pdev) &&
520 		(DEVFS_NODE(ap->a_vp)->d_dev))  {
521 		devfs_debug(DEVFS_DEBUG_DEBUG, "getattr: dev is: %p\n", DEVFS_NODE(ap->a_vp)->d_dev);
522 		reference_dev(DEVFS_NODE(ap->a_vp)->d_dev);
523 		vap->va_fsid = dev2udev(DEVFS_NODE(ap->a_vp)->d_dev);
524 		vap->va_rminor = DEVFS_NODE(ap->a_vp)->d_dev->si_uminor;
525 		release_dev(DEVFS_NODE(ap->a_vp)->d_dev);
526 	}
527 
528 	/* For a softlink the va_size is the length of the softlink */
529 	if (DEVFS_NODE(ap->a_vp)->symlink_name != 0) {
530 		vap->va_size = DEVFS_NODE(ap->a_vp)->symlink_namelen;
531 	}
532 	nanotime(&node->atime);
533 	return (error); //XXX: set error usefully
534 }
535 
536 
537 static int
538 devfs_setattr(struct vop_setattr_args *ap)
539 {
540 	struct devfs_node *node;
541 	struct vattr *vap;
542 	int error = 0;
543 
544 
545 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_setattr() called!\n");
546 	lockmgr(&devfs_lock, LK_EXCLUSIVE);
547 
548 	vap = ap->a_vap;
549 	node = DEVFS_NODE(ap->a_vp);
550 
551 	if (vap->va_uid != (uid_t)VNOVAL) {
552 		if ((ap->a_cred->cr_uid != node->uid) &&
553 			(!groupmember(node->gid, ap->a_cred))) {
554 			error = priv_check(curthread, PRIV_VFS_CHOWN);
555 			if (error) {
556 				devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_setattr, erroring out -1-\n");
557 				goto out;
558 			}
559 		}
560 		node->uid = vap->va_uid;
561 	}
562 
563 	if (vap->va_gid != (uid_t)VNOVAL) {
564 		if ((ap->a_cred->cr_uid != node->uid) &&
565 			(!groupmember(node->gid, ap->a_cred))) {
566 			error = priv_check(curthread, PRIV_VFS_CHOWN);
567 			if (error) {
568 				devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_setattr, erroring out -2-\n");
569 				goto out;
570 			}
571 		}
572 		node->gid = vap->va_gid;
573 	}
574 
575 	if (vap->va_mode != (mode_t)VNOVAL) {
576 		if (ap->a_cred->cr_uid != node->uid) {
577 			error = priv_check(curthread, PRIV_VFS_ADMIN);
578 			if (error) {
579 				devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_setattr, erroring out -3-\n");
580 				goto out;
581 			}
582 		}
583 		node->mode = vap->va_mode;
584 	}
585 
586 out:
587 	nanotime(&node->mtime);
588 	lockmgr(&devfs_lock, LK_RELEASE);
589 	return error;
590 }
591 
592 
593 static int
594 devfs_readlink(struct vop_readlink_args *ap)
595 {
596 	struct devfs_node *node = DEVFS_NODE(ap->a_vp);
597 
598 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_readlink()  called!\n");
599 
600 	return (uiomove(node->symlink_name, node->symlink_namelen, ap->a_uio));
601 }
602 
603 
604 static int
605 devfs_print(struct vop_print_args *ap)
606 {
607 	//struct devfs_node *node = DEVFS_NODE(ap->a_vp);
608 
609 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_print() called!\n");
610 
611 	//XXX: print some useful debugging about node.
612 	return (0);
613 }
614 
615 
616 static int
617 devfs_nsymlink(struct vop_nsymlink_args *ap)
618 {
619 	size_t targetlen = strlen(ap->a_target);
620 
621 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nsymlink() called!\n");
622 
623 	ap->a_vap->va_type = VLNK;
624 
625 	if ((DEVFS_NODE(ap->a_dvp)->node_type != Proot) &&
626 		(DEVFS_NODE(ap->a_dvp)->node_type != Pdir)) {
627 		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nsymlink: ap->a_dvp is not a dir!!!\n");
628 		goto out;
629 	}
630 	lockmgr(&devfs_lock, LK_EXCLUSIVE);
631 	devfs_allocvp(ap->a_dvp->v_mount, ap->a_vpp, Plink,
632 				ap->a_nch->ncp->nc_name, DEVFS_NODE(ap->a_dvp), NULL);
633 
634 	if (*ap->a_vpp) {
635 		DEVFS_NODE(*ap->a_vpp)->flags |= DEVFS_USER_CREATED;
636 
637 		DEVFS_NODE(*ap->a_vpp)->symlink_namelen = targetlen;
638 		DEVFS_NODE(*ap->a_vpp)->symlink_name = kmalloc(targetlen + 1, M_DEVFS, M_WAITOK);
639 		memcpy(DEVFS_NODE(*ap->a_vpp)->symlink_name, ap->a_target, targetlen);
640 		DEVFS_NODE(*ap->a_vpp)->symlink_name[targetlen] = '\0';
641 		cache_setunresolved(ap->a_nch);
642 		//problematic to use cache_* inside lockmgr() ? Probably not...
643 		cache_setvp(ap->a_nch, *ap->a_vpp);
644 	}
645 	lockmgr(&devfs_lock, LK_RELEASE);
646 out:
647 	return ((*ap->a_vpp == NULL) ? ENOTDIR : 0);
648 
649 }
650 
651 
652 static int
653 devfs_nremove(struct vop_nremove_args *ap)
654 {
655 	struct devfs_node *node;
656 	struct namecache *ncp;
657 	//struct vnode *vp = NULL;
658 	int error = ENOENT;
659 
660 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nremove() called!\n");
661 
662 	ncp = ap->a_nch->ncp;
663 
664 	lockmgr(&devfs_lock, LK_EXCLUSIVE);
665 
666 	if ((DEVFS_NODE(ap->a_dvp)->node_type != Proot) &&
667 		(DEVFS_NODE(ap->a_dvp)->node_type != Pdir)) {
668 		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_nremove: ap->a_dvp is not a dir!!!\n");
669 		goto out;
670 	}
671 
672 	TAILQ_FOREACH(node, DEVFS_DENODE_HEAD(DEVFS_NODE(ap->a_dvp)), link)	{
673 		if (ncp->nc_nlen == node->d_dir.d_namlen) {
674 			if (!memcmp(ncp->nc_name, node->d_dir.d_name, ncp->nc_nlen)) {
675 				// allow only removal of user created stuff (e.g. symlinks)
676 				if ((node->flags & DEVFS_USER_CREATED) == 0) {
677 					error = EPERM;
678 					goto out;
679 				} else {
680 					if (node->v_node)
681 						cache_inval_vp(node->v_node, CINV_DESTROY);
682 
683 					devfs_unlinkp(node);
684 					error = 0;
685 					break;
686 				}
687 			}
688 		}
689 	}
690 
691 	cache_setunresolved(ap->a_nch);
692 	cache_setvp(ap->a_nch, NULL);
693 	//cache_inval_vp(node->v_node, CINV_DESTROY);
694 
695 out:
696 	lockmgr(&devfs_lock, LK_RELEASE);
697 	//vrele(ap->a_dvp);
698 	//vput(ap->a_dvp);
699 	return error;
700 }
701 
702 
703 static int
704 devfs_spec_open(struct vop_open_args *ap)
705 {
706 	struct vnode *vp = ap->a_vp;
707 	cdev_t dev, ndev = NULL;
708 	struct devfs_node *node = NULL;
709 	int error = 0;
710 	size_t len;
711 
712 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open() called\n");
713 
714 	if (DEVFS_NODE(vp)) {
715 		if (DEVFS_NODE(vp)->d_dev == NULL)
716 			return ENXIO;
717 	}
718 
719 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open: -1-\n");
720 
721 	if ((dev = vp->v_rdev) == NULL)
722 		return ENXIO;
723 
724 	if (DEVFS_NODE(vp) && ap->a_fp) {
725 		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open: -1.1-\n");
726 		lockmgr(&devfs_lock, LK_EXCLUSIVE);
727 		len = DEVFS_NODE(vp)->d_dir.d_namlen;
728 		if (!(devfs_clone(DEVFS_NODE(vp)->d_dir.d_name, &len, &ndev, 1, ap->a_cred))) {
729 			devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open: -1.2- |%s|\n", ndev->si_name);
730 
731 			dev = ndev;
732 			reference_dev(dev);
733 			devfs_link_dev(dev);
734 			node = devfs_create_device_node(DEVFS_MNTDATA(vp->v_mount)->root_node, dev, NULL, NULL);
735 			//node = devfs_allocp(Pdev, ndev->si_name, DEVFS_NODE(vp)->parent, vp->v_mount, dev);
736 
737 			devfs_debug(DEVFS_DEBUG_DEBUG, "parent here is: %s, node is: |%s|\n", (DEVFS_NODE(vp)->parent->node_type == Proot)?"ROOT!":DEVFS_NODE(vp)->parent->d_dir.d_name, node->d_dir.d_name);
738 			devfs_debug(DEVFS_DEBUG_DEBUG, "test: %s\n", ((struct devfs_node *)(TAILQ_LAST(DEVFS_DENODE_HEAD(DEVFS_NODE(vp)->parent), devfs_node_head)))->d_dir.d_name);
739 
740 			node->flags |= DEVFS_CLONED;
741 			devfs_allocv(&vp, node);
742 
743 			ap->a_vp = vp;
744 
745 			//XXX: propagate to other devfs mounts?
746 		}
747 		lockmgr(&devfs_lock, LK_RELEASE);
748 	}
749 
750 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open() called on %s! \n", dev->si_name);
751 	/*
752 	 * Make this field valid before any I/O in ->d_open
753 	 */
754 	if (!dev->si_iosize_max)
755 		dev->si_iosize_max = DFLTPHYS;
756 
757 	if (dev_dflags(dev) & D_TTY)
758 		vp->v_flag |= VISTTY;
759 
760 	vn_unlock(vp);
761 	error = dev_dopen(dev, ap->a_mode, S_IFCHR, ap->a_cred);
762 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
763 
764 	if (error) {
765 		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open() error out: %x\n", error);
766 		if (DEVFS_NODE(vp) && ((DEVFS_NODE(vp)->flags & DEVFS_CLONED) == DEVFS_CLONED))
767 			vput(vp);
768 		return error;
769 	}
770 
771 
772 	if (dev_dflags(dev) & D_TTY) {
773 		if (dev->si_tty) {
774 			struct tty *tp;
775 			tp = dev->si_tty;
776 			if (!tp->t_stop) {
777 				devfs_debug(DEVFS_DEBUG_DEBUG, "devfs: no t_stop\n");
778 				tp->t_stop = nottystop;
779 			}
780 		}
781 	}
782 
783 
784 	if (vn_isdisk(vp, NULL)) {
785 		if (!dev->si_bsize_phys)
786 			dev->si_bsize_phys = DEV_BSIZE;
787 		vinitvmio(vp, IDX_TO_OFF(INT_MAX));
788 	}
789 
790 	vop_stdopen(ap);
791 	if (DEVFS_NODE(vp))
792 		nanotime(&DEVFS_NODE(vp)->atime);
793 
794 	if (DEVFS_NODE(vp) && ((DEVFS_NODE(vp)->flags & DEVFS_CLONED) == DEVFS_CLONED))
795 		vn_unlock(vp);
796 
797 	/* Ugly pty magic, to make pty devices appear once they are opened */
798 	if (DEVFS_NODE(vp) && ((DEVFS_NODE(vp)->flags & DEVFS_PTY) == DEVFS_PTY))
799 		DEVFS_NODE(vp)->flags &= ~DEVFS_INVISIBLE;
800 
801 	if (ap->a_fp) {
802 		ap->a_fp->f_type = DTYPE_VNODE;
803 		ap->a_fp->f_flag = ap->a_mode & FMASK;
804 		ap->a_fp->f_ops = &devfs_dev_fileops;
805 		ap->a_fp->f_data = vp;
806 	}
807 
808 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open: -end:3-\n");
809 
810 	return 0;
811 }
812 
813 
814 static int
815 devfs_spec_close(struct vop_close_args *ap)
816 {
817 	struct proc *p = curproc;
818 	struct vnode *vp = ap->a_vp;
819 	cdev_t dev = vp->v_rdev;
820 	int error = 0;
821 	int needrelock;
822 
823 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() called on %s! \n", dev->si_name);
824 
825 	/*
826 	 * A couple of hacks for devices and tty devices.  The
827 	 * vnode ref count cannot be used to figure out the
828 	 * last close, but we can use v_opencount now that
829 	 * revoke works properly.
830 	 *
831 	 * Detect the last close on a controlling terminal and clear
832 	 * the session (half-close).
833 	 */
834 	if (dev)
835 		reference_dev(dev);
836 
837 	if (p && vp->v_opencount <= 1 && vp == p->p_session->s_ttyvp) {
838 		p->p_session->s_ttyvp = NULL;
839 		vrele(vp);
840 	}
841 
842 	/*
843 	 * Vnodes can be opened and closed multiple times.  Do not really
844 	 * close the device unless (1) it is being closed forcibly,
845 	 * (2) the device wants to track closes, or (3) this is the last
846 	 * vnode doing its last close on the device.
847 	 *
848 	 * XXX the VXLOCK (force close) case can leave vnodes referencing
849 	 * a closed device.  This might not occur now that our revoke is
850 	 * fixed.
851 	 */
852 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -1- \n");
853 	if (dev && ((vp->v_flag & VRECLAIMED) ||
854 	    (dev_dflags(dev) & D_TRACKCLOSE) ||
855 	    (vp->v_opencount == 1))) {
856 		needrelock = 0;
857 		if (vn_islocked(vp)) {
858 			needrelock = 1;
859 			vn_unlock(vp);
860 		}
861 		error = dev_dclose(dev, ap->a_fflag, S_IFCHR);
862 		if (DEVFS_NODE(vp) && (DEVFS_NODE(vp)->flags & DEVFS_CLONED) == DEVFS_CLONED) {
863 			devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close: last of the cloned ones, so delete node %s\n", dev->si_name);
864 			devfs_unlinkp(DEVFS_NODE(vp));
865 			devfs_freep(DEVFS_NODE(vp));
866 			devfs_unlink_dev(dev);
867 			release_dev(dev);
868 			devfs_destroy_cdev(dev);
869 		}
870 		/* Ugly pty magic, to make pty devices disappear again once they are closed */
871 		if (DEVFS_NODE(vp) && ((DEVFS_NODE(vp)->flags & DEVFS_PTY) == DEVFS_PTY))
872 			DEVFS_NODE(vp)->flags |= DEVFS_INVISIBLE;
873 
874 		if (needrelock)
875 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
876 	} else {
877 		error = 0;
878 	}
879 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -2- \n");
880 	/*
881 	 * Track the actual opens and closes on the vnode.  The last close
882 	 * disassociates the rdev.  If the rdev is already disassociated or the
883 	 * opencount is already 0, the vnode might have been revoked and no
884 	 * further opencount tracking occurs.
885 	 */
886 	if (dev) {
887 		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -3- \n");
888 		if (vp->v_opencount == 1) {
889 			//vp->v_rdev = 0;
890 			devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -3.5- \n");
891 		}
892 		release_dev(dev);
893 	}
894 	if (vp->v_opencount > 0) {
895 		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -4- \n");
896 		vop_stdclose(ap);
897 		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -5- \n");
898 	}
899 
900 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -end:6- \n");
901 	return(error);
902 
903 }
904 
905 
906 static int
907 devfs_specf_close(struct file *fp)
908 {
909 	int error;
910 	struct vnode *vp = (struct vnode *)fp->f_data;
911 
912 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_close() called! \n");
913 	get_mplock();
914 	fp->f_ops = &badfileops;
915 
916 	error = vn_close(vp, fp->f_flag);
917 	rel_mplock();
918 
919 	return (error);
920 }
921 
922 
923 /*
924  * Device-optimized file table vnode read routine.
925  *
926  * This bypasses the VOP table and talks directly to the device.  Most
927  * filesystems just route to specfs and can make this optimization.
928  *
929  * MPALMOSTSAFE - acquires mplock
930  */
931 static int
932 devfs_specf_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
933 {
934 	struct vnode *vp;
935 	int ioflag;
936 	int error;
937 	cdev_t dev;
938 
939 	get_mplock();
940 	//devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_read() called! \n");
941 	KASSERT(uio->uio_td == curthread,
942 		("uio_td %p is not td %p", uio->uio_td, curthread));
943 
944 	vp = (struct vnode *)fp->f_data;
945 	if (vp == NULL || vp->v_type == VBAD) {
946 		error = EBADF;
947 		goto done;
948 	}
949 
950 	if ((dev = vp->v_rdev) == NULL) {
951 		error = EBADF;
952 		goto done;
953 	}
954 	//devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_read() called! for dev %s\n", dev->si_name);
955 
956 	reference_dev(dev);
957 
958 	if (uio->uio_resid == 0) {
959 		error = 0;
960 		goto done;
961 	}
962 	if ((flags & O_FOFFSET) == 0)
963 		uio->uio_offset = fp->f_offset;
964 
965 	ioflag = 0;
966 	if (flags & O_FBLOCKING) {
967 		/* ioflag &= ~IO_NDELAY; */
968 	} else if (flags & O_FNONBLOCKING) {
969 		ioflag |= IO_NDELAY;
970 	} else if (fp->f_flag & FNONBLOCK) {
971 		ioflag |= IO_NDELAY;
972 	}
973 	if (flags & O_FBUFFERED) {
974 		/* ioflag &= ~IO_DIRECT; */
975 	} else if (flags & O_FUNBUFFERED) {
976 		ioflag |= IO_DIRECT;
977 	} else if (fp->f_flag & O_DIRECT) {
978 		ioflag |= IO_DIRECT;
979 	}
980 	ioflag |= sequential_heuristic(uio, fp);
981 
982 	error = dev_dread(dev, uio, ioflag);
983 
984 	release_dev(dev);
985 	if (DEVFS_NODE(vp))
986 		nanotime(&DEVFS_NODE(vp)->atime);
987 	if ((flags & O_FOFFSET) == 0)
988 		fp->f_offset = uio->uio_offset;
989 	fp->f_nextoff = uio->uio_offset;
990 done:
991 	rel_mplock();
992 	//devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_read finished\n");
993 	return (error);
994 }
995 
996 
997 static int
998 devfs_specf_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
999 {
1000 	struct vnode *vp;
1001 	int ioflag;
1002 	int error;
1003 	cdev_t dev;
1004 
1005 	//devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_write() called! \n");
1006 	get_mplock();
1007 	KASSERT(uio->uio_td == curthread,
1008 		("uio_td %p is not p %p", uio->uio_td, curthread));
1009 
1010 	vp = (struct vnode *)fp->f_data;
1011 	if (vp == NULL || vp->v_type == VBAD) {
1012 		error = EBADF;
1013 		goto done;
1014 	}
1015 	if (vp->v_type == VREG)
1016 		bwillwrite(uio->uio_resid);
1017 	vp = (struct vnode *)fp->f_data;
1018 
1019 	if ((dev = vp->v_rdev) == NULL) {
1020 		error = EBADF;
1021 		goto done;
1022 	}
1023 	//devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_write() called! for dev %s\n", dev->si_name);
1024 	reference_dev(dev);
1025 
1026 	if ((flags & O_FOFFSET) == 0)
1027 		uio->uio_offset = fp->f_offset;
1028 
1029 	ioflag = IO_UNIT;
1030 	if (vp->v_type == VREG &&
1031 	   ((fp->f_flag & O_APPEND) || (flags & O_FAPPEND))) {
1032 		ioflag |= IO_APPEND;
1033 	}
1034 
1035 	if (flags & O_FBLOCKING) {
1036 		/* ioflag &= ~IO_NDELAY; */
1037 	} else if (flags & O_FNONBLOCKING) {
1038 		ioflag |= IO_NDELAY;
1039 	} else if (fp->f_flag & FNONBLOCK) {
1040 		ioflag |= IO_NDELAY;
1041 	}
1042 	if (flags & O_FBUFFERED) {
1043 		/* ioflag &= ~IO_DIRECT; */
1044 	} else if (flags & O_FUNBUFFERED) {
1045 		ioflag |= IO_DIRECT;
1046 	} else if (fp->f_flag & O_DIRECT) {
1047 		ioflag |= IO_DIRECT;
1048 	}
1049 	if (flags & O_FASYNCWRITE) {
1050 		/* ioflag &= ~IO_SYNC; */
1051 	} else if (flags & O_FSYNCWRITE) {
1052 		ioflag |= IO_SYNC;
1053 	} else if (fp->f_flag & O_FSYNC) {
1054 		ioflag |= IO_SYNC;
1055 	}
1056 
1057 	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))
1058 		ioflag |= IO_SYNC;
1059 	ioflag |= sequential_heuristic(uio, fp);
1060 
1061 	error = dev_dwrite(dev, uio, ioflag);
1062 
1063 	release_dev(dev);
1064 	if (DEVFS_NODE(vp))
1065 		nanotime(&DEVFS_NODE(vp)->mtime);
1066 
1067 	if ((flags & O_FOFFSET) == 0)
1068 		fp->f_offset = uio->uio_offset;
1069 	fp->f_nextoff = uio->uio_offset;
1070 done:
1071 	rel_mplock();
1072 	//devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_write done\n");
1073 	return (error);
1074 }
1075 
1076 
1077 static int
1078 devfs_specf_stat(struct file *fp, struct stat *sb, struct ucred *cred)
1079 {
1080 	struct vnode *vp;
1081 	int error;
1082 
1083 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_stat() called\n");
1084 
1085 	get_mplock();
1086 	vp = (struct vnode *)fp->f_data;
1087 	error = vn_stat(vp, sb, cred);
1088 	if (error) {
1089 		rel_mplock();
1090 		return (error);
1091 	}
1092 
1093 	struct vattr vattr;
1094 	struct vattr *vap;
1095 	u_short mode;
1096 	cdev_t dev;
1097 
1098 	vap = &vattr;
1099 	error = VOP_GETATTR(vp, vap);
1100 	if (error) {
1101 		rel_mplock();
1102 		return (error);
1103 	}
1104 
1105 	/*
1106 	 * Zero the spare stat fields
1107 	 */
1108 	sb->st_lspare = 0;
1109 	sb->st_qspare = 0;
1110 
1111 	/*
1112 	 * Copy from vattr table ... or not in case it's a cloned device
1113 	 */
1114 	if (vap->va_fsid != VNOVAL)
1115 		sb->st_dev = vap->va_fsid;
1116 	else
1117 		sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
1118 
1119 	sb->st_ino = vap->va_fileid;
1120 
1121 	mode = vap->va_mode;
1122 	mode |= S_IFCHR;
1123 	sb->st_mode = mode;
1124 
1125 	if (vap->va_nlink > (nlink_t)-1)
1126 		sb->st_nlink = (nlink_t)-1;
1127 	else
1128 		sb->st_nlink = vap->va_nlink;
1129 	sb->st_uid = vap->va_uid;
1130 	sb->st_gid = vap->va_gid;
1131 	sb->st_rdev = 0;
1132 	sb->st_size = vap->va_size;
1133 	sb->st_atimespec = vap->va_atime;
1134 	sb->st_mtimespec = vap->va_mtime;
1135 	sb->st_ctimespec = vap->va_ctime;
1136 
1137 	/*
1138 	 * A VCHR and VBLK device may track the last access and last modified
1139 	 * time independantly of the filesystem.  This is particularly true
1140 	 * because device read and write calls may bypass the filesystem.
1141 	 */
1142 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
1143 		dev = vp->v_rdev;
1144 		if (dev != NULL) {
1145 			if (dev->si_lastread) {
1146 				sb->st_atimespec.tv_sec = dev->si_lastread;
1147 				sb->st_atimespec.tv_nsec = 0;
1148 			}
1149 			if (dev->si_lastwrite) {
1150 				sb->st_atimespec.tv_sec = dev->si_lastwrite;
1151 				sb->st_atimespec.tv_nsec = 0;
1152 			}
1153 		}
1154 	}
1155 
1156         /*
1157 	 * According to www.opengroup.org, the meaning of st_blksize is
1158 	 *   "a filesystem-specific preferred I/O block size for this
1159 	 *    object.  In some filesystem types, this may vary from file
1160 	 *    to file"
1161 	 * Default to PAGE_SIZE after much discussion.
1162 	 */
1163 
1164 	sb->st_blksize = PAGE_SIZE;
1165 
1166 	sb->st_flags = vap->va_flags;
1167 
1168 	error = priv_check_cred(cred, PRIV_VFS_GENERATION, 0);
1169 	if (error)
1170 		sb->st_gen = 0;
1171 	else
1172 		sb->st_gen = (u_int32_t)vap->va_gen;
1173 
1174 	sb->st_blocks = vap->va_bytes / S_BLKSIZE;
1175 	sb->st_fsmid = vap->va_fsmid;
1176 
1177 	rel_mplock();
1178 	return (0);
1179 }
1180 
1181 
1182 static int
1183 devfs_specf_kqfilter(struct file *fp, struct knote *kn)
1184 {
1185 	struct vnode *vp;
1186 	//int ioflag;
1187 	int error;
1188 	cdev_t dev;
1189 
1190 	//devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_kqfilter() called! \n");
1191 
1192 	get_mplock();
1193 
1194 	vp = (struct vnode *)fp->f_data;
1195 	if (vp == NULL || vp->v_type == VBAD) {
1196 		error = EBADF;
1197 		goto done;
1198 	}
1199 
1200 	if ((dev = vp->v_rdev) == NULL) {
1201 		error = EBADF;
1202 		goto done;
1203 	}
1204 	reference_dev(dev);
1205 
1206 	error = dev_dkqfilter(dev, kn);
1207 
1208 	release_dev(dev);
1209 
1210 	if (DEVFS_NODE(vp))
1211 		nanotime(&DEVFS_NODE(vp)->atime);
1212 done:
1213 	rel_mplock();
1214 	return (error);
1215 }
1216 
1217 
1218 static int
1219 devfs_specf_poll(struct file *fp, int events, struct ucred *cred)
1220 {
1221 	struct vnode *vp;
1222 	//int ioflag;
1223 	int error;
1224 	cdev_t dev;
1225 
1226 	//devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_poll() called! \n");
1227 
1228 	get_mplock();
1229 
1230 	vp = (struct vnode *)fp->f_data;
1231 	if (vp == NULL || vp->v_type == VBAD) {
1232 		error = EBADF;
1233 		goto done;
1234 	}
1235 
1236 	if ((dev = vp->v_rdev) == NULL) {
1237 		error = EBADF;
1238 		goto done;
1239 	}
1240 	reference_dev(dev);
1241 	error = dev_dpoll(dev, events);
1242 
1243 	release_dev(dev);
1244 
1245 	if (DEVFS_NODE(vp))
1246 		nanotime(&DEVFS_NODE(vp)->atime);
1247 done:
1248 	rel_mplock();
1249 	return (error);
1250 }
1251 
1252 
1253 /*
1254  * MPALMOSTSAFE - acquires mplock
1255  */
1256 static int
1257 devfs_specf_ioctl(struct file *fp, u_long com, caddr_t data, struct ucred *ucred)
1258 {
1259 	struct vnode *vp = ((struct vnode *)fp->f_data);
1260 	struct vnode *ovp;
1261 	//struct vattr vattr;
1262 	cdev_t	dev;
1263 	int error;
1264 	struct fiodname_args *name_args;
1265 	size_t namlen;
1266 	const char *name;
1267 
1268 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl() called! \n");
1269 
1270 	get_mplock();
1271 
1272 	if ((dev = vp->v_rdev) == NULL) {
1273 		error = EBADF;		/* device was revoked */
1274 		goto out;
1275 	}
1276 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl() called! for dev %s\n", dev->si_name);
1277 
1278 	if (!(dev_dflags(dev) & D_TTY))
1279 		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl() called on %s! com is: %x\n", dev->si_name, com);
1280 
1281 	if (com == FIODTYPE) {
1282 		*(int *)data = dev_dflags(dev) & D_TYPEMASK;
1283 		error = 0;
1284 		goto out;
1285 	} else if (com == FIODNAME) {
1286 		name_args = (struct fiodname_args *)data;
1287 		name = dev->si_name;
1288 		namlen = strlen(name) + 1;
1289 
1290 		devfs_debug(DEVFS_DEBUG_DEBUG, "ioctl, got: FIODNAME for %s\n", name);
1291 
1292 		if (namlen <= name_args->len)
1293 			error = copyout(dev->si_name, name_args->name, namlen);
1294 		else
1295 			error = EINVAL;
1296 
1297 		//name_args->len = namlen; //need _IOWR to enable this
1298 		devfs_debug(DEVFS_DEBUG_DEBUG, "ioctl stuff: error: %d\n", error);
1299 		goto out;
1300 	}
1301 	reference_dev(dev);
1302 	error = dev_dioctl(dev, com, data, fp->f_flag, ucred);
1303 	release_dev(dev);
1304 	if (DEVFS_NODE(vp)) {
1305 		nanotime(&DEVFS_NODE(vp)->atime);
1306 		nanotime(&DEVFS_NODE(vp)->mtime);
1307 	}
1308 
1309 	if (com == TIOCSCTTY)
1310 		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl: got TIOCSCTTY on %s\n", dev->si_name);
1311 	if (error == 0 && com == TIOCSCTTY) {
1312 		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl: dealing with TIOCSCTTY on %s\n", dev->si_name);
1313 		struct proc *p = curthread->td_proc;
1314 		struct session *sess;
1315 			if (p == NULL) {
1316 			error = ENOTTY;
1317 			goto out;
1318 		}
1319 		sess = p->p_session;
1320 		/* Do nothing if reassigning same control tty */
1321 		if (sess->s_ttyvp == vp) {
1322 			error = 0;
1323 			goto out;
1324 		}
1325 			/* Get rid of reference to old control tty */
1326 		ovp = sess->s_ttyvp;
1327 		vref(vp);
1328 		sess->s_ttyvp = vp;
1329 		if (ovp)
1330 			vrele(ovp);
1331 	}
1332 
1333 out:
1334 	rel_mplock();
1335 	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_specf_ioctl() finished! \n");
1336 	return (error);
1337 }
1338 
1339 
1340 static int
1341 devfs_spec_fsync(struct vop_fsync_args *ap)
1342 {
1343 	struct vnode *vp = ap->a_vp;
1344 	int error;
1345 
1346 	if (!vn_isdisk(vp, NULL))
1347 		return (0);
1348 
1349 	/*
1350 	 * Flush all dirty buffers associated with a block device.
1351 	 */
1352 	error = vfsync(vp, ap->a_waitfor, 10000, NULL, NULL);
1353 	return (error);
1354 }
1355 
1356 
1357 
1358 
1359 
1360 
1361 
1362 
1363 
1364 
1365 
1366 
1367 
1368 
1369 
1370 
1371 
1372 
1373 
1374 
1375 static int
1376 devfs_spec_read(struct vop_read_args *ap)
1377 {
1378 	struct vnode *vp;
1379 	struct uio *uio;
1380 	cdev_t dev;
1381 	int error;
1382 
1383 	vp = ap->a_vp;
1384 	dev = vp->v_rdev;
1385 	uio = ap->a_uio;
1386 
1387 	if (dev == NULL)		/* device was revoked */
1388 		return (EBADF);
1389 	if (uio->uio_resid == 0)
1390 		return (0);
1391 
1392 	vn_unlock(vp);
1393 	error = dev_dread(dev, uio, ap->a_ioflag);
1394 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1395 
1396 	if (DEVFS_NODE(vp))
1397 		nanotime(&DEVFS_NODE(vp)->atime);
1398 
1399 	return (error);
1400 }
1401 
1402 /*
1403  * Vnode op for write
1404  *
1405  * spec_write(struct vnode *a_vp, struct uio *a_uio, int a_ioflag,
1406  *	      struct ucred *a_cred)
1407  */
1408 /* ARGSUSED */
1409 static int
1410 devfs_spec_write(struct vop_write_args *ap)
1411 {
1412 	struct vnode *vp;
1413 	struct uio *uio;
1414 	cdev_t dev;
1415 	int error;
1416 
1417 	vp = ap->a_vp;
1418 	dev = vp->v_rdev;
1419 	uio = ap->a_uio;
1420 
1421 	KKASSERT(uio->uio_segflg != UIO_NOCOPY);
1422 
1423 	if (dev == NULL)		/* device was revoked */
1424 		return (EBADF);
1425 
1426 	vn_unlock(vp);
1427 	error = dev_dwrite(dev, uio, ap->a_ioflag);
1428 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1429 
1430 	if (DEVFS_NODE(vp))
1431 		nanotime(&DEVFS_NODE(vp)->mtime);
1432 
1433 	return (error);
1434 }
1435 
1436 /*
1437  * Device ioctl operation.
1438  *
1439  * spec_ioctl(struct vnode *a_vp, int a_command, caddr_t a_data,
1440  *	      int a_fflag, struct ucred *a_cred)
1441  */
1442 /* ARGSUSED */
1443 static int
1444 devfs_spec_ioctl(struct vop_ioctl_args *ap)
1445 {
1446 	cdev_t dev;
1447 	struct vnode *vp = ap->a_vp;
1448 
1449 	if ((dev = vp->v_rdev) == NULL)
1450 		return (EBADF);		/* device was revoked */
1451 	if ( ap->a_command == TIOCSCTTY )
1452 		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_*SPEC*_ioctl: got TIOCSCTTY\n");
1453 
1454 	if (DEVFS_NODE(vp)) {
1455 		nanotime(&DEVFS_NODE(vp)->atime);
1456 		nanotime(&DEVFS_NODE(vp)->mtime);
1457 	}
1458 
1459 	return (dev_dioctl(dev, ap->a_command, ap->a_data,
1460 		    ap->a_fflag, ap->a_cred));
1461 }
1462 
1463 /*
1464  * spec_poll(struct vnode *a_vp, int a_events, struct ucred *a_cred)
1465  */
1466 /* ARGSUSED */
1467 static int
1468 devfs_spec_poll(struct vop_poll_args *ap)
1469 {
1470 	cdev_t dev;
1471 	struct vnode *vp = ap->a_vp;
1472 
1473 	if ((dev = vp->v_rdev) == NULL)
1474 		return (EBADF);		/* device was revoked */
1475 
1476 	if (DEVFS_NODE(vp))
1477 		nanotime(&DEVFS_NODE(vp)->atime);
1478 
1479 	return (dev_dpoll(dev, ap->a_events));
1480 }
1481 
1482 /*
1483  * spec_kqfilter(struct vnode *a_vp, struct knote *a_kn)
1484  */
1485 /* ARGSUSED */
1486 static int
1487 devfs_spec_kqfilter(struct vop_kqfilter_args *ap)
1488 {
1489 	cdev_t dev;
1490 	struct vnode *vp = ap->a_vp;
1491 
1492 	if ((dev = vp->v_rdev) == NULL)
1493 		return (EBADF);		/* device was revoked */
1494 
1495 	if (DEVFS_NODE(vp))
1496 		nanotime(&DEVFS_NODE(vp)->atime);
1497 
1498 	return (dev_dkqfilter(dev, ap->a_kn));
1499 }
1500 
1501 
1502 
1503 
1504 
1505 
1506 
1507 
1508 
1509 
1510 
1511 
1512 
1513 
1514 
1515 
1516 
1517 
1518 
1519 
1520 
1521 
1522 
1523 
1524 
1525 
1526 
1527 
1528 
1529 
1530 
1531 
1532 
1533 
1534 
1535 
1536 
1537 
1538 
1539 
1540 
1541 
1542 /*
1543  * Convert a vnode strategy call into a device strategy call.  Vnode strategy
1544  * calls are not limited to device DMA limits so we have to deal with the
1545  * case.
1546  *
1547  * spec_strategy(struct vnode *a_vp, struct bio *a_bio)
1548  */
1549 static int
1550 devfs_spec_strategy(struct vop_strategy_args *ap)
1551 {
1552 	struct bio *bio = ap->a_bio;
1553 	struct buf *bp = bio->bio_buf;
1554 	struct buf *nbp;
1555 	struct vnode *vp;
1556 	struct mount *mp;
1557 	int chunksize;
1558 	int maxiosize;
1559 
1560 	if (bp->b_cmd != BUF_CMD_READ && LIST_FIRST(&bp->b_dep) != NULL)
1561 		buf_start(bp);
1562 
1563 	/*
1564 	 * Collect statistics on synchronous and asynchronous read
1565 	 * and write counts for disks that have associated filesystems.
1566 	 */
1567 	vp = ap->a_vp;
1568 	KKASSERT(vp->v_rdev != NULL);	/* XXX */
1569 	if (vn_isdisk(vp, NULL) && (mp = vp->v_rdev->si_mountpoint) != NULL) {
1570 		if (bp->b_cmd == BUF_CMD_READ) {
1571 			//XXX: no idea what has changed here...
1572 			if (bp->b_flags & BIO_SYNC)
1573 				mp->mnt_stat.f_syncreads++;
1574 			else
1575 				mp->mnt_stat.f_asyncreads++;
1576 		} else {
1577 			if (bp->b_flags & BIO_SYNC)
1578 				mp->mnt_stat.f_syncwrites++;
1579 			else
1580 				mp->mnt_stat.f_asyncwrites++;
1581 		}
1582 	}
1583 
1584         /*
1585          * Device iosize limitations only apply to read and write.  Shortcut
1586          * the I/O if it fits.
1587          */
1588 	if ((maxiosize = vp->v_rdev->si_iosize_max) == 0) {
1589 		devfs_debug(DEVFS_DEBUG_DEBUG, "%s: si_iosize_max not set!\n", dev_dname(vp->v_rdev));
1590 		maxiosize = MAXPHYS;
1591 	}
1592 #if SPEC_CHAIN_DEBUG & 2
1593 	maxiosize = 4096;
1594 #endif
1595         if (bp->b_bcount <= maxiosize ||
1596             (bp->b_cmd != BUF_CMD_READ && bp->b_cmd != BUF_CMD_WRITE)) {
1597                 dev_dstrategy_chain(vp->v_rdev, bio);
1598                 return (0);
1599         }
1600 
1601 	/*
1602 	 * Clone the buffer and set up an I/O chain to chunk up the I/O.
1603 	 */
1604 	nbp = kmalloc(sizeof(*bp), M_DEVBUF, M_INTWAIT|M_ZERO);
1605 	initbufbio(nbp);
1606 	buf_dep_init(nbp);
1607 	BUF_LOCKINIT(nbp);
1608 	BUF_LOCK(nbp, LK_EXCLUSIVE);
1609 	BUF_KERNPROC(nbp);
1610 	nbp->b_vp = vp;
1611 	nbp->b_flags = B_PAGING | (bp->b_flags & B_BNOCLIP);
1612 	nbp->b_data = bp->b_data;
1613 	nbp->b_bio1.bio_done = devfs_spec_strategy_done;
1614 	nbp->b_bio1.bio_offset = bio->bio_offset;
1615 	nbp->b_bio1.bio_caller_info1.ptr = bio;
1616 
1617 	/*
1618 	 * Start the first transfer
1619 	 */
1620 	if (vn_isdisk(vp, NULL))
1621 		chunksize = vp->v_rdev->si_bsize_phys;
1622 	else
1623 		chunksize = DEV_BSIZE;
1624 	chunksize = maxiosize / chunksize * chunksize;
1625 #if SPEC_CHAIN_DEBUG & 1
1626 	devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy chained I/O chunksize=%d\n", chunksize);
1627 #endif
1628 	nbp->b_cmd = bp->b_cmd;
1629 	nbp->b_bcount = chunksize;
1630 	nbp->b_bufsize = chunksize;	/* used to detect a short I/O */
1631 	nbp->b_bio1.bio_caller_info2.index = chunksize;
1632 
1633 #if SPEC_CHAIN_DEBUG & 1
1634 	devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p offset %d/%d bcount %d\n",
1635 		bp, 0, bp->b_bcount, nbp->b_bcount);
1636 #endif
1637 
1638 	dev_dstrategy(vp->v_rdev, &nbp->b_bio1);
1639 
1640 	if (DEVFS_NODE(vp)) {
1641 		nanotime(&DEVFS_NODE(vp)->atime);
1642 		nanotime(&DEVFS_NODE(vp)->mtime);
1643 	}
1644 
1645 	return (0);
1646 }
1647 
1648 /*
1649  * Chunked up transfer completion routine - chain transfers until done
1650  */
1651 static
1652 void
1653 devfs_spec_strategy_done(struct bio *nbio)
1654 {
1655 	struct buf *nbp = nbio->bio_buf;
1656 	struct bio *bio = nbio->bio_caller_info1.ptr;	/* original bio */
1657 	struct buf *bp = bio->bio_buf;			/* original bp */
1658 	int chunksize = nbio->bio_caller_info2.index;	/* chunking */
1659 	int boffset = nbp->b_data - bp->b_data;
1660 
1661 	if (nbp->b_flags & B_ERROR) {
1662 		/*
1663 		 * An error terminates the chain, propogate the error back
1664 		 * to the original bp
1665 		 */
1666 		bp->b_flags |= B_ERROR;
1667 		bp->b_error = nbp->b_error;
1668 		bp->b_resid = bp->b_bcount - boffset +
1669 			      (nbp->b_bcount - nbp->b_resid);
1670 #if SPEC_CHAIN_DEBUG & 1
1671 		devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p error %d bcount %d/%d\n",
1672 			bp, bp->b_error, bp->b_bcount,
1673 			bp->b_bcount - bp->b_resid);
1674 #endif
1675 		kfree(nbp, M_DEVBUF);
1676 		biodone(bio);
1677 	} else if (nbp->b_resid) {
1678 		/*
1679 		 * A short read or write terminates the chain
1680 		 */
1681 		bp->b_error = nbp->b_error;
1682 		bp->b_resid = bp->b_bcount - boffset +
1683 			      (nbp->b_bcount - nbp->b_resid);
1684 #if SPEC_CHAIN_DEBUG & 1
1685 		devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p short read(1) bcount %d/%d\n",
1686 			bp, bp->b_bcount - bp->b_resid, bp->b_bcount);
1687 #endif
1688 		kfree(nbp, M_DEVBUF);
1689 		biodone(bio);
1690 	} else if (nbp->b_bcount != nbp->b_bufsize) {
1691 		/*
1692 		 * A short read or write can also occur by truncating b_bcount
1693 		 */
1694 #if SPEC_CHAIN_DEBUG & 1
1695 		devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p short read(2) bcount %d/%d\n",
1696 			bp, nbp->b_bcount + boffset, bp->b_bcount);
1697 #endif
1698 		bp->b_error = 0;
1699 		bp->b_bcount = nbp->b_bcount + boffset;
1700 		bp->b_resid = nbp->b_resid;
1701 		kfree(nbp, M_DEVBUF);
1702 		biodone(bio);
1703 	} else if (nbp->b_bcount + boffset == bp->b_bcount) {
1704 		/*
1705 		 * No more data terminates the chain
1706 		 */
1707 #if SPEC_CHAIN_DEBUG & 1
1708 		devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p finished bcount %d\n",
1709 			bp, bp->b_bcount);
1710 #endif
1711 		bp->b_error = 0;
1712 		bp->b_resid = 0;
1713 		kfree(nbp, M_DEVBUF);
1714 		biodone(bio);
1715 	} else {
1716 		/*
1717 		 * Continue the chain
1718 		 */
1719 		boffset += nbp->b_bcount;
1720 		nbp->b_data = bp->b_data + boffset;
1721 		nbp->b_bcount = bp->b_bcount - boffset;
1722 		if (nbp->b_bcount > chunksize)
1723 			nbp->b_bcount = chunksize;
1724 		nbp->b_bio1.bio_done = devfs_spec_strategy_done;
1725 		nbp->b_bio1.bio_offset = bio->bio_offset + boffset;
1726 
1727 #if SPEC_CHAIN_DEBUG & 1
1728 		devfs_debug(DEVFS_DEBUG_DEBUG, "spec_strategy: chain %p offset %d/%d bcount %d\n",
1729 			bp, boffset, bp->b_bcount, nbp->b_bcount);
1730 #endif
1731 
1732 		dev_dstrategy(nbp->b_vp->v_rdev, &nbp->b_bio1);
1733 	}
1734 }
1735 
1736 /*
1737  * spec_freeblks(struct vnode *a_vp, daddr_t a_addr, daddr_t a_length)
1738  */
1739 static int
1740 devfs_spec_freeblks(struct vop_freeblks_args *ap)
1741 {
1742 	struct buf *bp;
1743 
1744 	/*
1745 	 * XXX: This assumes that strategy does the deed right away.
1746 	 * XXX: this may not be TRTTD.
1747 	 */
1748 	KKASSERT(ap->a_vp->v_rdev != NULL);
1749 	if ((dev_dflags(ap->a_vp->v_rdev) & D_CANFREE) == 0)
1750 		return (0);
1751 	bp = geteblk(ap->a_length);
1752 	bp->b_cmd = BUF_CMD_FREEBLKS;
1753 	bp->b_bio1.bio_offset = ap->a_offset;
1754 	bp->b_bcount = ap->a_length;
1755 	dev_dstrategy(ap->a_vp->v_rdev, &bp->b_bio1);
1756 	return (0);
1757 }
1758 
1759 /*
1760  * Implement degenerate case where the block requested is the block
1761  * returned, and assume that the entire device is contiguous in regards
1762  * to the contiguous block range (runp and runb).
1763  *
1764  * spec_bmap(struct vnode *a_vp, off_t a_loffset,
1765  *	     off_t *a_doffsetp, int *a_runp, int *a_runb)
1766  */
1767 static int
1768 devfs_spec_bmap(struct vop_bmap_args *ap)
1769 {
1770 	if (ap->a_doffsetp != NULL)
1771 		*ap->a_doffsetp = ap->a_loffset;
1772 	if (ap->a_runp != NULL)
1773 		*ap->a_runp = MAXBSIZE;
1774 	if (ap->a_runb != NULL) {
1775 		if (ap->a_loffset < MAXBSIZE)
1776 			*ap->a_runb = (int)ap->a_loffset;
1777 		else
1778 			*ap->a_runb = MAXBSIZE;
1779 	}
1780 	return (0);
1781 }
1782 
1783 
1784 /*
1785  * Special device advisory byte-level locks.
1786  *
1787  * spec_advlock(struct vnode *a_vp, caddr_t a_id, int a_op,
1788  *		struct flock *a_fl, int a_flags)
1789  */
1790 /* ARGSUSED */
1791 static int
1792 devfs_spec_advlock(struct vop_advlock_args *ap)
1793 {
1794 	return ((ap->a_flags & F_POSIX) ? EINVAL : EOPNOTSUPP);
1795 }
1796 
1797 static void
1798 devfs_spec_getpages_iodone(struct bio *bio)
1799 {
1800 	bio->bio_buf->b_cmd = BUF_CMD_DONE;
1801 	wakeup(bio->bio_buf);
1802 }
1803 
1804 /*
1805  * spec_getpages() - get pages associated with device vnode.
1806  *
1807  * Note that spec_read and spec_write do not use the buffer cache, so we
1808  * must fully implement getpages here.
1809  */
1810 static int
1811 devfs_spec_getpages(struct vop_getpages_args *ap)
1812 {
1813 	vm_offset_t kva;
1814 	int error;
1815 	int i, pcount, size;
1816 	struct buf *bp;
1817 	vm_page_t m;
1818 	vm_ooffset_t offset;
1819 	int toff, nextoff, nread;
1820 	struct vnode *vp = ap->a_vp;
1821 	int blksiz;
1822 	int gotreqpage;
1823 
1824 	error = 0;
1825 	pcount = round_page(ap->a_count) / PAGE_SIZE;
1826 
1827 	/*
1828 	 * Calculate the offset of the transfer and do sanity check.
1829 	 */
1830 	offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;
1831 
1832 	/*
1833 	 * Round up physical size for real devices.  We cannot round using
1834 	 * v_mount's block size data because v_mount has nothing to do with
1835 	 * the device.  i.e. it's usually '/dev'.  We need the physical block
1836 	 * size for the device itself.
1837 	 *
1838 	 * We can't use v_rdev->si_mountpoint because it only exists when the
1839 	 * block device is mounted.  However, we can use v_rdev.
1840 	 */
1841 
1842 	if (vn_isdisk(vp, NULL))
1843 		blksiz = vp->v_rdev->si_bsize_phys;
1844 	else
1845 		blksiz = DEV_BSIZE;
1846 
1847 	size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
1848 
1849 	bp = getpbuf(NULL);
1850 	kva = (vm_offset_t)bp->b_data;
1851 
1852 	/*
1853 	 * Map the pages to be read into the kva.
1854 	 */
1855 	pmap_qenter(kva, ap->a_m, pcount);
1856 
1857 	/* Build a minimal buffer header. */
1858 	bp->b_cmd = BUF_CMD_READ;
1859 	bp->b_bcount = size;
1860 	bp->b_resid = 0;
1861 	bp->b_runningbufspace = size;
1862 	if (size) {
1863 		runningbufspace += bp->b_runningbufspace;
1864 		++runningbufcount;
1865 	}
1866 
1867 	bp->b_bio1.bio_offset = offset;
1868 	bp->b_bio1.bio_done = devfs_spec_getpages_iodone;
1869 
1870 	mycpu->gd_cnt.v_vnodein++;
1871 	mycpu->gd_cnt.v_vnodepgsin += pcount;
1872 
1873 	/* Do the input. */
1874 	vn_strategy(ap->a_vp, &bp->b_bio1);
1875 
1876 	crit_enter();
1877 
1878 	/* We definitely need to be at splbio here. */
1879 	while (bp->b_cmd != BUF_CMD_DONE)
1880 		tsleep(bp, 0, "spread", 0);
1881 
1882 	crit_exit();
1883 
1884 	if (bp->b_flags & B_ERROR) {
1885 		if (bp->b_error)
1886 			error = bp->b_error;
1887 		else
1888 			error = EIO;
1889 	}
1890 
1891 	/*
1892 	 * If EOF is encountered we must zero-extend the result in order
1893 	 * to ensure that the page does not contain garabge.  When no
1894 	 * error occurs, an early EOF is indicated if b_bcount got truncated.
1895 	 * b_resid is relative to b_bcount and should be 0, but some devices
1896 	 * might indicate an EOF with b_resid instead of truncating b_bcount.
1897 	 */
1898 	nread = bp->b_bcount - bp->b_resid;
1899 	if (nread < ap->a_count)
1900 		bzero((caddr_t)kva + nread, ap->a_count - nread);
1901 	pmap_qremove(kva, pcount);
1902 
1903 	gotreqpage = 0;
1904 	for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
1905 		nextoff = toff + PAGE_SIZE;
1906 		m = ap->a_m[i];
1907 
1908 		m->flags &= ~PG_ZERO;
1909 
1910 		if (nextoff <= nread) {
1911 			m->valid = VM_PAGE_BITS_ALL;
1912 			vm_page_undirty(m);
1913 		} else if (toff < nread) {
1914 			/*
1915 			 * Since this is a VM request, we have to supply the
1916 			 * unaligned offset to allow vm_page_set_validclean()
1917 			 * to zero sub-DEV_BSIZE'd portions of the page.
1918 			 */
1919 			vm_page_set_validclean(m, 0, nread - toff);
1920 		} else {
1921 			m->valid = 0;
1922 			vm_page_undirty(m);
1923 		}
1924 
1925 		if (i != ap->a_reqpage) {
1926 			/*
1927 			 * Just in case someone was asking for this page we
1928 			 * now tell them that it is ok to use.
1929 			 */
1930 			if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
1931 				if (m->valid) {
1932 					if (m->flags & PG_WANTED) {
1933 						vm_page_activate(m);
1934 					} else {
1935 						vm_page_deactivate(m);
1936 					}
1937 					vm_page_wakeup(m);
1938 				} else {
1939 					vm_page_free(m);
1940 				}
1941 			} else {
1942 				vm_page_free(m);
1943 			}
1944 		} else if (m->valid) {
1945 			gotreqpage = 1;
1946 			/*
1947 			 * Since this is a VM request, we need to make the
1948 			 * entire page presentable by zeroing invalid sections.
1949 			 */
1950 			if (m->valid != VM_PAGE_BITS_ALL)
1951 			    vm_page_zero_invalid(m, FALSE);
1952 		}
1953 	}
1954 	if (!gotreqpage) {
1955 		m = ap->a_m[ap->a_reqpage];
1956 		devfs_debug(DEVFS_DEBUG_WARNING,
1957 	    "spec_getpages:(%s) I/O read failure: (error=%d) bp %p vp %p\n",
1958 			devtoname(vp->v_rdev), error, bp, bp->b_vp);
1959 		devfs_debug(DEVFS_DEBUG_WARNING,
1960 	    "               size: %d, resid: %d, a_count: %d, valid: 0x%x\n",
1961 		    size, bp->b_resid, ap->a_count, m->valid);
1962 		devfs_debug(DEVFS_DEBUG_WARNING,
1963 	    "               nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n",
1964 		    nread, ap->a_reqpage, (u_long)m->pindex, pcount);
1965 		/*
1966 		 * Free the buffer header back to the swap buffer pool.
1967 		 */
1968 		relpbuf(bp, NULL);
1969 		return VM_PAGER_ERROR;
1970 	}
1971 	/*
1972 	 * Free the buffer header back to the swap buffer pool.
1973 	 */
1974 	relpbuf(bp, NULL);
1975 	return VM_PAGER_OK;
1976 }
1977 
1978 
1979 
1980 
1981 
1982 
1983 
1984 
1985 
1986 
1987 
1988 
1989 
1990 
1991 
1992 
1993 
1994 
1995 
1996 
1997 
1998 
1999 
2000 
2001 
2002 
2003 
2004 
2005 
2006 
2007 
2008 
2009 
2010 
2011 
2012 
2013 
2014 
2015 
2016 static __inline
2017 int
2018 sequential_heuristic(struct uio *uio, struct file *fp)
2019 {
2020 	/*
2021 	 * Sequential heuristic - detect sequential operation
2022 	 */
2023 	if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
2024 	    uio->uio_offset == fp->f_nextoff) {
2025 		int tmpseq = fp->f_seqcount;
2026 		/*
2027 		 * XXX we assume that the filesystem block size is
2028 		 * the default.  Not true, but still gives us a pretty
2029 		 * good indicator of how sequential the read operations
2030 		 * are.
2031 		 */
2032 		tmpseq += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE;
2033 		if (tmpseq > IO_SEQMAX)
2034 			tmpseq = IO_SEQMAX;
2035 		fp->f_seqcount = tmpseq;
2036 		return(fp->f_seqcount << IO_SEQSHIFT);
2037 	}
2038 
2039 	/*
2040 	 * Not sequential, quick draw-down of seqcount
2041 	 */
2042 	if (fp->f_seqcount > 1)
2043 		fp->f_seqcount = 1;
2044 	else
2045 		fp->f_seqcount = 0;
2046 	return(0);
2047 }
2048