xref: /openbsd-src/sys/kern/vfs_vnops.c (revision 8500990981f885cbe5e6a4958549cacc238b5ae6)
1 /*	$OpenBSD: vfs_vnops.c,v 1.44 2003/09/23 16:51:12 millert Exp $	*/
2 /*	$NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_vnops.c	8.5 (Berkeley) 12/8/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/file.h>
44 #include <sys/stat.h>
45 #include <sys/buf.h>
46 #include <sys/proc.h>
47 #include <sys/mount.h>
48 #include <sys/namei.h>
49 #include <sys/vnode.h>
50 #include <sys/ioctl.h>
51 #include <sys/tty.h>
52 #include <sys/cdio.h>
53 #include <sys/poll.h>
54 
55 #include <uvm/uvm_extern.h>
56 
57 int	vn_read(struct file *fp, off_t *off, struct uio *uio,
58 	    struct ucred *cred);
59 int	vn_write(struct file *fp, off_t *off, struct uio *uio,
60 	    struct ucred *cred);
61 int	vn_poll(struct file *fp, int events, struct proc *p);
62 int	vn_kqfilter(struct file *fp, struct knote *kn);
63 int 	vn_closefile(struct file *fp, struct proc *p);
64 int	vn_ioctl(struct file *fp, u_long com, caddr_t data,
65 	    struct proc *p);
66 
67 struct 	fileops vnops =
68 	{ vn_read, vn_write, vn_ioctl, vn_poll, vn_kqfilter, vn_statfile,
69 	  vn_closefile };
70 
71 /*
72  * Common code for vnode open operations.
73  * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
74  */
75 int
76 vn_open(ndp, fmode, cmode)
77 	register struct nameidata *ndp;
78 	int fmode, cmode;
79 {
80 	register struct vnode *vp;
81 	register struct proc *p = ndp->ni_cnd.cn_proc;
82 	register struct ucred *cred = p->p_ucred;
83 	struct vattr va;
84 	int error;
85 
86 	if ((fmode & (FREAD|FWRITE)) == 0)
87 		return (EINVAL);
88 	if ((fmode & (O_TRUNC | FWRITE)) == O_TRUNC)
89 		return (EINVAL);
90 	if (fmode & O_CREAT) {
91 		ndp->ni_cnd.cn_nameiop = CREATE;
92 		ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
93 		if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
94 			ndp->ni_cnd.cn_flags |= FOLLOW;
95 		if ((error = namei(ndp)) != 0)
96 			return (error);
97 
98 		if (ndp->ni_vp == NULL) {
99 			VATTR_NULL(&va);
100 			va.va_type = VREG;
101 			va.va_mode = cmode;
102 			VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE);
103 			error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
104 					   &ndp->ni_cnd, &va);
105 			if (error)
106 				return (error);
107 			fmode &= ~O_TRUNC;
108 			vp = ndp->ni_vp;
109 		} else {
110 			VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
111 			if (ndp->ni_dvp == ndp->ni_vp)
112 				vrele(ndp->ni_dvp);
113 			else
114 				vput(ndp->ni_dvp);
115 			ndp->ni_dvp = NULL;
116 			vp = ndp->ni_vp;
117 			if (fmode & O_EXCL) {
118 				error = EEXIST;
119 				goto bad;
120 			}
121 			fmode &= ~O_CREAT;
122 		}
123 	} else {
124 		ndp->ni_cnd.cn_nameiop = LOOKUP;
125 		ndp->ni_cnd.cn_flags =
126 		    ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF;
127 		if ((error = namei(ndp)) != 0)
128 			return (error);
129 		vp = ndp->ni_vp;
130 	}
131 	if (vp->v_type == VSOCK) {
132 		error = EOPNOTSUPP;
133 		goto bad;
134 	}
135 	if (vp->v_type == VLNK) {
136 		error = EMLINK;
137 		goto bad;
138 	}
139 	if ((fmode & O_CREAT) == 0) {
140 		if (fmode & FREAD) {
141 			if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
142 				goto bad;
143 		}
144 		if (fmode & FWRITE) {
145 			if (vp->v_type == VDIR) {
146 				error = EISDIR;
147 				goto bad;
148 			}
149 			if ((error = vn_writechk(vp)) != 0 ||
150 			    (error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0)
151 				goto bad;
152 		}
153 	}
154 	if ((fmode & O_TRUNC) && vp->v_type == VREG) {
155 		VOP_UNLOCK(vp, 0, p);				/* XXX */
156 		VOP_LEASE(vp, p, cred, LEASE_WRITE);
157 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
158 		VATTR_NULL(&va);
159 		va.va_size = 0;
160 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
161 			goto bad;
162 	}
163 	if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0)
164 		goto bad;
165 	if (fmode & FWRITE)
166 		vp->v_writecount++;
167 	return (0);
168 bad:
169 	vput(vp);
170 	return (error);
171 }
172 
173 /*
174  * Check for write permissions on the specified vnode.
175  * Prototype text segments cannot be written.
176  */
177 int
178 vn_writechk(vp)
179 	register struct vnode *vp;
180 {
181 
182 	/*
183 	 * Disallow write attempts on read-only file systems;
184 	 * unless the file is a socket or a block or character
185 	 * device resident on the file system.
186 	 */
187 	if (vp->v_mount->mnt_flag & MNT_RDONLY) {
188 		switch (vp->v_type) {
189 		case VREG: case VDIR: case VLNK:
190 			return (EROFS);
191 		case VNON: case VCHR: case VSOCK:
192 		case VFIFO: case VBAD: case VBLK:
193 			break;
194 		}
195 	}
196 	/*
197 	 * If there's shared text associated with
198 	 * the vnode, try to free it up once.  If
199 	 * we fail, we can't allow writing.
200 	 */
201 	if ((vp->v_flag & VTEXT) && !uvm_vnp_uncache(vp))
202 		return (ETXTBSY);
203 
204 	return (0);
205 }
206 
207 /*
208  * Mark a vnode as being the text image of a running process.
209  */
210 void
211 vn_marktext(vp)
212 	struct vnode *vp;
213 {
214 	vp->v_flag |= VTEXT;
215 }
216 
217 /*
218  * Vnode close call
219  */
220 int
221 vn_close(vp, flags, cred, p)
222 	register struct vnode *vp;
223 	int flags;
224 	struct ucred *cred;
225 	struct proc *p;
226 {
227 	int error;
228 
229 	if (flags & FWRITE)
230 		vp->v_writecount--;
231 	error = VOP_CLOSE(vp, flags, cred, p);
232 	vrele(vp);
233 	return (error);
234 }
235 
236 /*
237  * Package up an I/O request on a vnode into a uio and do it.
238  */
239 int
240 vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
241 	enum uio_rw rw;
242 	struct vnode *vp;
243 	caddr_t base;
244 	int len;
245 	off_t offset;
246 	enum uio_seg segflg;
247 	int ioflg;
248 	struct ucred *cred;
249 	size_t *aresid;
250 	struct proc *p;
251 {
252 	struct uio auio;
253 	struct iovec aiov;
254 	int error;
255 
256 	if ((ioflg & IO_NODELOCKED) == 0)
257 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
258 	auio.uio_iov = &aiov;
259 	auio.uio_iovcnt = 1;
260 	aiov.iov_base = base;
261 	aiov.iov_len = len;
262 	auio.uio_resid = len;
263 	auio.uio_offset = offset;
264 	auio.uio_segflg = segflg;
265 	auio.uio_rw = rw;
266 	auio.uio_procp = p;
267 	if (rw == UIO_READ) {
268 		error = VOP_READ(vp, &auio, ioflg, cred);
269 	} else {
270 		error = VOP_WRITE(vp, &auio, ioflg, cred);
271 	}
272 	if (aresid)
273 		*aresid = auio.uio_resid;
274 	else
275 		if (auio.uio_resid && error == 0)
276 			error = EIO;
277 	if ((ioflg & IO_NODELOCKED) == 0)
278 		VOP_UNLOCK(vp, 0, p);
279 	return (error);
280 }
281 
282 /*
283  * File table vnode read routine.
284  */
285 int
286 vn_read(fp, poff, uio, cred)
287 	struct file *fp;
288 	off_t *poff;
289 	struct uio *uio;
290 	struct ucred *cred;
291 {
292 	register struct vnode *vp = (struct vnode *)fp->f_data;
293 	int error = 0;
294 	size_t count;
295 	struct proc *p = uio->uio_procp;
296 
297 	VOP_LEASE(vp, uio->uio_procp, cred, LEASE_READ);
298 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
299 	uio->uio_offset = *poff;
300 	count = uio->uio_resid;
301 	if (vp->v_type != VDIR)
302 		error = VOP_READ(vp, uio,
303 		    (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, cred);
304 	*poff += count - uio->uio_resid;
305 	VOP_UNLOCK(vp, 0, p);
306 	return (error);
307 }
308 
309 /*
310  * File table vnode write routine.
311  */
312 int
313 vn_write(fp, poff, uio, cred)
314 	struct file *fp;
315 	off_t *poff;
316 	struct uio *uio;
317 	struct ucred *cred;
318 {
319 	register struct vnode *vp = (struct vnode *)fp->f_data;
320 	struct proc *p = uio->uio_procp;
321 	int error, ioflag = IO_UNIT;
322 	size_t count;
323 
324 	if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
325 		ioflag |= IO_APPEND;
326 	if (fp->f_flag & FNONBLOCK)
327 		ioflag |= IO_NDELAY;
328 	if ((fp->f_flag & FFSYNC) ||
329 	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
330 		ioflag |= IO_SYNC;
331 	VOP_LEASE(vp, uio->uio_procp, cred, LEASE_WRITE);
332 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
333 	uio->uio_offset = *poff;
334 	count = uio->uio_resid;
335 	error = VOP_WRITE(vp, uio, ioflag, cred);
336 	if (ioflag & IO_APPEND)
337 		*poff = uio->uio_offset;
338 	else
339 		*poff += count - uio->uio_resid;
340 	VOP_UNLOCK(vp, 0, p);
341 	return (error);
342 }
343 
344 /*
345  * File table wrapper for vn_stat
346  */
347 int
348 vn_statfile(fp, sb, p)
349 	struct file *fp;
350 	struct stat *sb;
351 	struct proc *p;
352 {
353 	struct vnode *vp = (struct vnode *)fp->f_data;
354 
355 	return vn_stat(vp, sb, p);
356 }
357 
358 /*
359  * vnode stat routine.
360  */
361 int
362 vn_stat(vp, sb, p)
363 	struct vnode *vp;
364 	register struct stat *sb;
365 	struct proc *p;
366 {
367 	struct vattr va;
368 	int error;
369 	u_short mode;
370 
371 	error = VOP_GETATTR(vp, &va, p->p_ucred, p);
372 	if (error)
373 		return (error);
374 	/*
375 	 * Copy from vattr table
376 	 */
377 	sb->st_dev = va.va_fsid;
378 	sb->st_ino = va.va_fileid;
379 	mode = va.va_mode;
380 	switch (vp->v_type) {
381 	case VREG:
382 		mode |= S_IFREG;
383 		break;
384 	case VDIR:
385 		mode |= S_IFDIR;
386 		break;
387 	case VBLK:
388 		mode |= S_IFBLK;
389 		break;
390 	case VCHR:
391 		mode |= S_IFCHR;
392 		break;
393 	case VLNK:
394 		mode |= S_IFLNK;
395 		break;
396 	case VSOCK:
397 		mode |= S_IFSOCK;
398 		break;
399 	case VFIFO:
400 		mode |= S_IFIFO;
401 		break;
402 	default:
403 		return (EBADF);
404 	}
405 	sb->st_mode = mode;
406 	sb->st_nlink = va.va_nlink;
407 	sb->st_uid = va.va_uid;
408 	sb->st_gid = va.va_gid;
409 	sb->st_rdev = va.va_rdev;
410 	sb->st_size = va.va_size;
411 	sb->st_atimespec = va.va_atime;
412 	sb->st_mtimespec = va.va_mtime;
413 	sb->st_ctimespec = va.va_ctime;
414 	sb->st_blksize = va.va_blocksize;
415 	sb->st_flags = va.va_flags;
416 	sb->st_gen = va.va_gen;
417 	sb->st_blocks = va.va_bytes / S_BLKSIZE;
418 	return (0);
419 }
420 
421 /*
422  * File table vnode ioctl routine.
423  */
424 int
425 vn_ioctl(fp, com, data, p)
426 	struct file *fp;
427 	u_long com;
428 	caddr_t data;
429 	struct proc *p;
430 {
431 	register struct vnode *vp = ((struct vnode *)fp->f_data);
432 	struct vattr vattr;
433 	int error;
434 
435 	switch (vp->v_type) {
436 
437 	case VREG:
438 	case VDIR:
439 		if (com == FIONREAD) {
440 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
441 			if (error)
442 				return (error);
443 			*(int *)data = vattr.va_size - fp->f_offset;
444 			return (0);
445 		}
446 		if (com == FIBMAP)
447 			return VOP_IOCTL(vp, com, data, fp->f_flag,
448 					 p->p_ucred, p);
449 		if (com == FIONBIO || com == FIOASYNC)  /* XXX */
450 			return (0);			/* XXX */
451 		/* fall into... */
452 
453 	default:
454 		return (ENOTTY);
455 
456 	case VFIFO:
457 	case VCHR:
458 	case VBLK:
459 		error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
460 		if (error == 0 && com == TIOCSCTTY) {
461 			if (p->p_session->s_ttyvp)
462 				vrele(p->p_session->s_ttyvp);
463 			p->p_session->s_ttyvp = vp;
464 			VREF(vp);
465 		}
466 		return (error);
467 	}
468 }
469 
470 /*
471  * File table vnode poll routine.
472  */
473 int
474 vn_poll(fp, events, p)
475 	struct file *fp;
476 	int events;
477 	struct proc *p;
478 {
479 
480 	return (VOP_POLL(((struct vnode *)fp->f_data), events, p));
481 }
482 
483 /*
484  * Check that the vnode is still valid, and if so
485  * acquire requested lock.
486  */
487 int
488 vn_lock(struct vnode *vp, int flags, struct proc *p)
489 {
490 	int error;
491 
492 	if ((flags & LK_RECURSEFAIL) == 0)
493 		flags |= LK_CANRECURSE;
494 
495 	do {
496 		if ((flags & LK_INTERLOCK) == 0)
497 			simple_lock(&vp->v_interlock);
498 		if (vp->v_flag & VXLOCK) {
499 			vp->v_flag |= VXWANT;
500 			simple_unlock(&vp->v_interlock);
501 			tsleep(vp, PINOD, "vn_lock", 0);
502 			error = ENOENT;
503 		} else {
504 			error = VOP_LOCK(vp, flags | LK_INTERLOCK, p);
505 			if (error == 0)
506 				return (error);
507 		}
508 		flags &= ~LK_INTERLOCK;
509 	} while (flags & LK_RETRY);
510 	return (error);
511 }
512 
513 /*
514  * File table vnode close routine.
515  */
516 int
517 vn_closefile(fp, p)
518 	struct file *fp;
519 	struct proc *p;
520 {
521 
522 	return (vn_close(((struct vnode *)fp->f_data), fp->f_flag,
523 		fp->f_cred, p));
524 }
525 
526 /*ARGSUSED*/
527 int
528 vn_kqfilter(struct file *fp, struct knote *kn)
529 {
530 	return (VOP_KQFILTER(((struct vnode *)fp->f_data), kn));
531 }
532