xref: /openbsd-src/sys/kern/vfs_vnops.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /*	$OpenBSD: vfs_vnops.c,v 1.68 2011/07/09 01:28:48 matthew Exp $	*/
2 /*	$NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_vnops.c	8.5 (Berkeley) 12/8/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/malloc.h>
44 #include <sys/file.h>
45 #include <sys/stat.h>
46 #include <sys/buf.h>
47 #include <sys/proc.h>
48 #include <sys/mount.h>
49 #include <sys/namei.h>
50 #include <sys/vnode.h>
51 #include <sys/ioctl.h>
52 #include <sys/tty.h>
53 #include <sys/cdio.h>
54 #include <sys/poll.h>
55 #include <sys/filedesc.h>
56 #include <sys/specdev.h>
57 
58 #include <uvm/uvm_extern.h>
59 
60 int vn_read(struct file *, off_t *, struct uio *, struct ucred *);
61 int vn_write(struct file *, off_t *, struct uio *, struct ucred *);
62 int vn_poll(struct file *, int, struct proc *);
63 int vn_kqfilter(struct file *, struct knote *);
64 int vn_closefile(struct file *, struct proc *);
65 
66 struct 	fileops vnops =
67 	{ vn_read, vn_write, vn_ioctl, vn_poll, vn_kqfilter, vn_statfile,
68 	  vn_closefile };
69 
70 /*
71  * Common code for vnode open operations.
72  * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
73  */
74 int
75 vn_open(struct nameidata *ndp, int fmode, int cmode)
76 {
77 	struct vnode *vp;
78 	struct proc *p = ndp->ni_cnd.cn_proc;
79 	struct ucred *cred = p->p_ucred;
80 	struct vattr va;
81 	struct cloneinfo *cip;
82 	int error;
83 
84 	if ((fmode & (FREAD|FWRITE)) == 0)
85 		return (EINVAL);
86 	if ((fmode & (O_TRUNC | FWRITE)) == O_TRUNC)
87 		return (EINVAL);
88 	if (fmode & O_CREAT) {
89 		ndp->ni_cnd.cn_nameiop = CREATE;
90 		ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
91 		if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
92 			ndp->ni_cnd.cn_flags |= FOLLOW;
93 		if ((error = namei(ndp)) != 0)
94 			return (error);
95 
96 		if (ndp->ni_vp == NULL) {
97 			VATTR_NULL(&va);
98 			va.va_type = VREG;
99 			va.va_mode = cmode;
100 			if (fmode & O_EXCL)
101 				va.va_vaflags |= VA_EXCLUSIVE;
102 			error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
103 					   &ndp->ni_cnd, &va);
104 			if (error)
105 				return (error);
106 			fmode &= ~O_TRUNC;
107 			vp = ndp->ni_vp;
108 		} else {
109 			VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
110 			if (ndp->ni_dvp == ndp->ni_vp)
111 				vrele(ndp->ni_dvp);
112 			else
113 				vput(ndp->ni_dvp);
114 			ndp->ni_dvp = NULL;
115 			vp = ndp->ni_vp;
116 			if (fmode & O_EXCL) {
117 				error = EEXIST;
118 				goto bad;
119 			}
120 			fmode &= ~O_CREAT;
121 		}
122 	} else {
123 		ndp->ni_cnd.cn_nameiop = LOOKUP;
124 		ndp->ni_cnd.cn_flags =
125 		    ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF;
126 		if ((error = namei(ndp)) != 0)
127 			return (error);
128 		vp = ndp->ni_vp;
129 	}
130 	if (vp->v_type == VSOCK) {
131 		error = EOPNOTSUPP;
132 		goto bad;
133 	}
134 	if (vp->v_type == VLNK) {
135 		error = ELOOP;
136 		goto bad;
137 	}
138 	if ((fmode & O_DIRECTORY) && vp->v_type != VDIR) {
139 		error = ENOTDIR;
140 		goto bad;
141 	}
142 	if ((fmode & O_CREAT) == 0) {
143 		if (fmode & FREAD) {
144 			if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
145 				goto bad;
146 		}
147 		if (fmode & FWRITE) {
148 			if (vp->v_type == VDIR) {
149 				error = EISDIR;
150 				goto bad;
151 			}
152 			if ((error = vn_writechk(vp)) != 0 ||
153 			    (error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0)
154 				goto bad;
155 		}
156 	}
157 	if ((fmode & O_TRUNC) && vp->v_type == VREG) {
158 		VATTR_NULL(&va);
159 		va.va_size = 0;
160 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
161 			goto bad;
162 	}
163 	if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0)
164 		goto bad;
165 
166 	if (vp->v_flag & VCLONED) {
167 		cip = (struct cloneinfo *)vp->v_data;
168 
169 		vp->v_flag &= ~VCLONED;
170 
171 		ndp->ni_vp = cip->ci_vp;	/* return cloned vnode */
172 		vp->v_data = cip->ci_data;	/* restore v_data */
173 		VOP_UNLOCK(vp, 0, p);		/* keep a reference */
174 		vp = ndp->ni_vp;		/* for the increment below */
175 
176 		free(cip, M_TEMP);
177 	}
178 
179 	if (fmode & FWRITE)
180 		vp->v_writecount++;
181 	return (0);
182 bad:
183 	vput(vp);
184 	return (error);
185 }
186 
187 /*
188  * Check for write permissions on the specified vnode.
189  * Prototype text segments cannot be written.
190  */
191 int
192 vn_writechk(struct vnode *vp)
193 {
194 	/*
195 	 * Disallow write attempts on read-only file systems;
196 	 * unless the file is a socket or a block or character
197 	 * device resident on the file system.
198 	 */
199 	if (vp->v_mount->mnt_flag & MNT_RDONLY) {
200 		switch (vp->v_type) {
201 		case VREG:
202 		case VDIR:
203 		case VLNK:
204 			return (EROFS);
205 		case VNON:
206 		case VCHR:
207 		case VSOCK:
208 		case VFIFO:
209 		case VBAD:
210 		case VBLK:
211 			break;
212 		}
213 	}
214 	/*
215 	 * If there's shared text associated with
216 	 * the vnode, try to free it up once.  If
217 	 * we fail, we can't allow writing.
218 	 */
219 	if ((vp->v_flag & VTEXT) && !uvm_vnp_uncache(vp))
220 		return (ETXTBSY);
221 
222 	return (0);
223 }
224 
225 /*
226  * Mark a vnode as being the text image of a running process.
227  */
228 void
229 vn_marktext(struct vnode *vp)
230 {
231 	vp->v_flag |= VTEXT;
232 }
233 
234 /*
235  * Vnode close call
236  */
237 int
238 vn_close(struct vnode *vp, int flags, struct ucred *cred, struct proc *p)
239 {
240 	int error;
241 
242 	if (flags & FWRITE)
243 		vp->v_writecount--;
244 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
245 	error = VOP_CLOSE(vp, flags, cred, p);
246 	vput(vp);
247 	return (error);
248 }
249 
250 /*
251  * Package up an I/O request on a vnode into a uio and do it.
252  */
253 int
254 vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, int len, off_t offset,
255     enum uio_seg segflg, int ioflg, struct ucred *cred, size_t *aresid,
256     struct proc *p)
257 {
258 	struct uio auio;
259 	struct iovec aiov;
260 	int error;
261 
262 	auio.uio_iov = &aiov;
263 	auio.uio_iovcnt = 1;
264 	aiov.iov_base = base;
265 	aiov.iov_len = len;
266 	auio.uio_resid = len;
267 	auio.uio_offset = offset;
268 	auio.uio_segflg = segflg;
269 	auio.uio_rw = rw;
270 	auio.uio_procp = p;
271 
272 	if ((ioflg & IO_NODELOCKED) == 0)
273 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
274 	if (rw == UIO_READ) {
275 		error = VOP_READ(vp, &auio, ioflg, cred);
276 	} else {
277 		error = VOP_WRITE(vp, &auio, ioflg, cred);
278 	}
279 	if ((ioflg & IO_NODELOCKED) == 0)
280 		VOP_UNLOCK(vp, 0, p);
281 
282 	if (aresid)
283 		*aresid = auio.uio_resid;
284 	else
285 		if (auio.uio_resid && error == 0)
286 			error = EIO;
287 	return (error);
288 }
289 
290 /*
291  * File table vnode read routine.
292  */
293 int
294 vn_read(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred)
295 {
296 	struct vnode *vp = (struct vnode *)fp->f_data;
297 	int error = 0;
298 	size_t count;
299 	struct proc *p = uio->uio_procp;
300 
301 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
302 	uio->uio_offset = *poff;
303 	count = uio->uio_resid;
304 	if (vp->v_type != VDIR)
305 		error = VOP_READ(vp, uio,
306 		    (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, cred);
307 	*poff += count - uio->uio_resid;
308 	VOP_UNLOCK(vp, 0, p);
309 	return (error);
310 }
311 
312 /*
313  * File table vnode write routine.
314  */
315 int
316 vn_write(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred)
317 {
318 	struct vnode *vp = (struct vnode *)fp->f_data;
319 	struct proc *p = uio->uio_procp;
320 	int error, ioflag = IO_UNIT;
321 	size_t count;
322 
323 	if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
324 		ioflag |= IO_APPEND;
325 	if (fp->f_flag & FNONBLOCK)
326 		ioflag |= IO_NDELAY;
327 	if ((fp->f_flag & FFSYNC) ||
328 	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
329 		ioflag |= IO_SYNC;
330 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
331 	uio->uio_offset = *poff;
332 	count = uio->uio_resid;
333 	error = VOP_WRITE(vp, uio, ioflag, cred);
334 	if (ioflag & IO_APPEND)
335 		*poff = uio->uio_offset;
336 	else
337 		*poff += count - uio->uio_resid;
338 	VOP_UNLOCK(vp, 0, p);
339 	return (error);
340 }
341 
342 /*
343  * File table wrapper for vn_stat
344  */
345 int
346 vn_statfile(struct file *fp, struct stat *sb, struct proc *p)
347 {
348 	struct vnode *vp = (struct vnode *)fp->f_data;
349 	return vn_stat(vp, sb, p);
350 }
351 
352 /*
353  * vnode stat routine.
354  */
355 int
356 vn_stat(struct vnode *vp, struct stat *sb, struct proc *p)
357 {
358 	struct vattr va;
359 	int error;
360 	mode_t mode;
361 
362 	error = VOP_GETATTR(vp, &va, p->p_ucred, p);
363 	if (error)
364 		return (error);
365 	/*
366 	 * Copy from vattr table
367 	 */
368 	sb->st_dev = va.va_fsid;
369 	sb->st_ino = va.va_fileid;
370 	mode = va.va_mode;
371 	switch (vp->v_type) {
372 	case VREG:
373 		mode |= S_IFREG;
374 		break;
375 	case VDIR:
376 		mode |= S_IFDIR;
377 		break;
378 	case VBLK:
379 		mode |= S_IFBLK;
380 		break;
381 	case VCHR:
382 		mode |= S_IFCHR;
383 		break;
384 	case VLNK:
385 		mode |= S_IFLNK;
386 		break;
387 	case VSOCK:
388 		mode |= S_IFSOCK;
389 		break;
390 	case VFIFO:
391 		mode |= S_IFIFO;
392 		break;
393 	default:
394 		return (EBADF);
395 	}
396 	sb->st_mode = mode;
397 	sb->st_nlink = va.va_nlink;
398 	sb->st_uid = va.va_uid;
399 	sb->st_gid = va.va_gid;
400 	sb->st_rdev = va.va_rdev;
401 	sb->st_size = va.va_size;
402 	sb->st_atim = va.va_atime;
403 	sb->st_mtim = va.va_mtime;
404 	sb->st_ctim = va.va_ctime;
405 	sb->st_blksize = va.va_blocksize;
406 	sb->st_flags = va.va_flags;
407 	sb->st_gen = va.va_gen;
408 	sb->st_blocks = va.va_bytes / S_BLKSIZE;
409 	return (0);
410 }
411 
412 /*
413  * File table vnode ioctl routine.
414  */
415 int
416 vn_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p)
417 {
418 	struct vnode *vp = ((struct vnode *)fp->f_data);
419 	struct vattr vattr;
420 	int error;
421 
422 	switch (vp->v_type) {
423 
424 	case VREG:
425 	case VDIR:
426 		if (com == FIONREAD) {
427 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
428 			if (error)
429 				return (error);
430 			*(int *)data = vattr.va_size - fp->f_offset;
431 			return (0);
432 		}
433 		if (com == FIONBIO || com == FIOASYNC)  /* XXX */
434 			return (0);			/* XXX */
435 		/* FALLTHROUGH */
436 	default:
437 		return (ENOTTY);
438 
439 	case VFIFO:
440 	case VCHR:
441 	case VBLK:
442 		error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
443 		if (error == 0 && com == TIOCSCTTY) {
444 			struct session *s = p->p_p->ps_session;
445 			if (s->s_ttyvp)
446 				vrele(s->s_ttyvp);
447 			s->s_ttyvp = vp;
448 			vref(vp);
449 		}
450 		return (error);
451 	}
452 }
453 
454 /*
455  * File table vnode poll routine.
456  */
457 int
458 vn_poll(struct file *fp, int events, struct proc *p)
459 {
460 	return (VOP_POLL(((struct vnode *)fp->f_data), events, p));
461 }
462 
463 /*
464  * Check that the vnode is still valid, and if so
465  * acquire requested lock.
466  */
467 int
468 vn_lock(struct vnode *vp, int flags, struct proc *p)
469 {
470 	int error;
471 
472 	if ((flags & LK_RECURSEFAIL) == 0)
473 		flags |= LK_CANRECURSE;
474 
475 	do {
476 		if (vp->v_flag & VXLOCK) {
477 			vp->v_flag |= VXWANT;
478 			tsleep(vp, PINOD, "vn_lock", 0);
479 			error = ENOENT;
480 		} else {
481 			error = VOP_LOCK(vp, flags, p);
482 			if (error == 0)
483 				return (error);
484 		}
485 	} while (flags & LK_RETRY);
486 	return (error);
487 }
488 
489 /*
490  * File table vnode close routine.
491  */
492 int
493 vn_closefile(struct file *fp, struct proc *p)
494 {
495 	struct vnode *vp = fp->f_data;
496 	struct flock lf;
497 
498 	if ((fp->f_flag & FHASLOCK)) {
499 		lf.l_whence = SEEK_SET;
500 		lf.l_start = 0;
501 		lf.l_len = 0;
502 		lf.l_type = F_UNLCK;
503 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
504 	}
505 
506 	return (vn_close(vp, fp->f_flag, fp->f_cred, p));
507 }
508 
509 int
510 vn_kqfilter(struct file *fp, struct knote *kn)
511 {
512 	return (VOP_KQFILTER(((struct vnode *)fp->f_data), kn));
513 }
514 
515 /*
516  * Common code for vnode access operations.
517  */
518 
519 /* Check if a directory can be found inside another in the hierarchy */
520 int
521 vn_isunder(struct vnode *lvp, struct vnode *rvp, struct proc *p)
522 {
523 	int error;
524 
525 	error = vfs_getcwd_common(lvp, rvp, NULL, NULL, MAXPATHLEN/2, 0, p);
526 
527 	if (!error)
528 		return (1);
529 
530 	return (0);
531 }
532