xref: /openbsd-src/sys/kern/vfs_vnops.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: vfs_vnops.c,v 1.78 2014/07/13 15:00:40 tedu Exp $	*/
2 /*	$NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vfs_vnops.c	8.5 (Berkeley) 12/8/94
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/malloc.h>
44 #include <sys/file.h>
45 #include <sys/stat.h>
46 #include <sys/buf.h>
47 #include <sys/proc.h>
48 #include <sys/resourcevar.h>
49 #include <sys/signalvar.h>
50 #include <sys/mount.h>
51 #include <sys/namei.h>
52 #include <sys/vnode.h>
53 #include <sys/ioctl.h>
54 #include <sys/tty.h>
55 #include <sys/cdio.h>
56 #include <sys/poll.h>
57 #include <sys/filedesc.h>
58 #include <sys/specdev.h>
59 
60 int vn_read(struct file *, off_t *, struct uio *, struct ucred *);
61 int vn_write(struct file *, off_t *, struct uio *, struct ucred *);
62 int vn_poll(struct file *, int, struct proc *);
63 int vn_kqfilter(struct file *, struct knote *);
64 int vn_closefile(struct file *, struct proc *);
65 
66 struct 	fileops vnops =
67 	{ vn_read, vn_write, vn_ioctl, vn_poll, vn_kqfilter, vn_statfile,
68 	  vn_closefile };
69 
70 /*
71  * Common code for vnode open operations.
72  * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
73  */
74 int
75 vn_open(struct nameidata *ndp, int fmode, int cmode)
76 {
77 	struct vnode *vp;
78 	struct proc *p = ndp->ni_cnd.cn_proc;
79 	struct ucred *cred = p->p_ucred;
80 	struct vattr va;
81 	struct cloneinfo *cip;
82 	int error;
83 
84 	if ((fmode & (FREAD|FWRITE)) == 0)
85 		return (EINVAL);
86 	if ((fmode & (O_TRUNC | FWRITE)) == O_TRUNC)
87 		return (EINVAL);
88 	if (fmode & O_CREAT) {
89 		ndp->ni_cnd.cn_nameiop = CREATE;
90 		ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
91 		if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
92 			ndp->ni_cnd.cn_flags |= FOLLOW;
93 		if ((error = namei(ndp)) != 0)
94 			return (error);
95 
96 		if (ndp->ni_vp == NULL) {
97 			VATTR_NULL(&va);
98 			va.va_type = VREG;
99 			va.va_mode = cmode;
100 			if (fmode & O_EXCL)
101 				va.va_vaflags |= VA_EXCLUSIVE;
102 			error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
103 					   &ndp->ni_cnd, &va);
104 			if (error)
105 				return (error);
106 			fmode &= ~O_TRUNC;
107 			vp = ndp->ni_vp;
108 		} else {
109 			VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd);
110 			if (ndp->ni_dvp == ndp->ni_vp)
111 				vrele(ndp->ni_dvp);
112 			else
113 				vput(ndp->ni_dvp);
114 			ndp->ni_dvp = NULL;
115 			vp = ndp->ni_vp;
116 			if (fmode & O_EXCL) {
117 				error = EEXIST;
118 				goto bad;
119 			}
120 			fmode &= ~O_CREAT;
121 		}
122 	} else {
123 		ndp->ni_cnd.cn_nameiop = LOOKUP;
124 		ndp->ni_cnd.cn_flags =
125 		    ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF;
126 		if ((error = namei(ndp)) != 0)
127 			return (error);
128 		vp = ndp->ni_vp;
129 	}
130 	if (vp->v_type == VSOCK) {
131 		error = EOPNOTSUPP;
132 		goto bad;
133 	}
134 	if (vp->v_type == VLNK) {
135 		error = ELOOP;
136 		goto bad;
137 	}
138 	if ((fmode & O_DIRECTORY) && vp->v_type != VDIR) {
139 		error = ENOTDIR;
140 		goto bad;
141 	}
142 	if ((fmode & O_CREAT) == 0) {
143 		if (fmode & FREAD) {
144 			if ((error = VOP_ACCESS(vp, VREAD, cred, p)) != 0)
145 				goto bad;
146 		}
147 		if (fmode & FWRITE) {
148 			if (vp->v_type == VDIR) {
149 				error = EISDIR;
150 				goto bad;
151 			}
152 			if ((error = vn_writechk(vp)) != 0 ||
153 			    (error = VOP_ACCESS(vp, VWRITE, cred, p)) != 0)
154 				goto bad;
155 		}
156 	}
157 	if ((fmode & O_TRUNC) && vp->v_type == VREG) {
158 		VATTR_NULL(&va);
159 		va.va_size = 0;
160 		if ((error = VOP_SETATTR(vp, &va, cred, p)) != 0)
161 			goto bad;
162 	}
163 	if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0)
164 		goto bad;
165 
166 	if (vp->v_flag & VCLONED) {
167 		cip = (struct cloneinfo *)vp->v_data;
168 
169 		vp->v_flag &= ~VCLONED;
170 
171 		ndp->ni_vp = cip->ci_vp;	/* return cloned vnode */
172 		vp->v_data = cip->ci_data;	/* restore v_data */
173 		VOP_UNLOCK(vp, 0, p);		/* keep a reference */
174 		vp = ndp->ni_vp;		/* for the increment below */
175 
176 		free(cip, M_TEMP, sizeof(*cip));
177 	}
178 
179 	if (fmode & FWRITE)
180 		vp->v_writecount++;
181 	return (0);
182 bad:
183 	vput(vp);
184 	return (error);
185 }
186 
187 /*
188  * Check for write permissions on the specified vnode.
189  * Prototype text segments cannot be written.
190  */
191 int
192 vn_writechk(struct vnode *vp)
193 {
194 	/*
195 	 * Disallow write attempts on read-only file systems;
196 	 * unless the file is a socket or a block or character
197 	 * device resident on the file system.
198 	 */
199 	if (vp->v_mount->mnt_flag & MNT_RDONLY) {
200 		switch (vp->v_type) {
201 		case VREG:
202 		case VDIR:
203 		case VLNK:
204 			return (EROFS);
205 		case VNON:
206 		case VCHR:
207 		case VSOCK:
208 		case VFIFO:
209 		case VBAD:
210 		case VBLK:
211 			break;
212 		}
213 	}
214 	/*
215 	 * If there's shared text associated with
216 	 * the vnode, try to free it up once.  If
217 	 * we fail, we can't allow writing.
218 	 */
219 	if ((vp->v_flag & VTEXT) && !uvm_vnp_uncache(vp))
220 		return (ETXTBSY);
221 
222 	return (0);
223 }
224 
225 /*
226  * Check whether a write operation would exceed the file size rlimit
227  * for the process, if one should be applied for this operation.
228  * If a partial write should take place, the uio is adjusted and the
229  * amount by which the request would have exceeded the limit is returned
230  * via the 'overrun' argument.
231  */
232 int
233 vn_fsizechk(struct vnode *vp, struct uio *uio, int ioflag, ssize_t *overrun)
234 {
235 	struct proc *p = uio->uio_procp;
236 
237 	*overrun = 0;
238 	if (vp->v_type == VREG && p != NULL && !(ioflag & IO_NOLIMIT)) {
239 		rlim_t limit = p->p_rlimit[RLIMIT_FSIZE].rlim_cur;
240 
241 		/* if already at or over the limit, send the signal and fail */
242 		if (uio->uio_offset >= limit) {
243 			psignal(p, SIGXFSZ);
244 			return (EFBIG);
245 		}
246 
247 		/* otherwise, clamp the write to stay under the limit */
248 		if (uio->uio_resid > limit - uio->uio_offset) {
249 			*overrun = uio->uio_resid - (limit - uio->uio_offset);
250 			uio->uio_resid = limit - uio->uio_offset;
251 		}
252 	}
253 
254 	return (0);
255 }
256 
257 
258 /*
259  * Mark a vnode as being the text image of a running process.
260  */
261 void
262 vn_marktext(struct vnode *vp)
263 {
264 	vp->v_flag |= VTEXT;
265 }
266 
267 /*
268  * Vnode close call
269  */
270 int
271 vn_close(struct vnode *vp, int flags, struct ucred *cred, struct proc *p)
272 {
273 	int error;
274 
275 	if (flags & FWRITE)
276 		vp->v_writecount--;
277 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
278 	error = VOP_CLOSE(vp, flags, cred, p);
279 	vput(vp);
280 	return (error);
281 }
282 
283 /*
284  * Package up an I/O request on a vnode into a uio and do it.
285  */
286 int
287 vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, int len, off_t offset,
288     enum uio_seg segflg, int ioflg, struct ucred *cred, size_t *aresid,
289     struct proc *p)
290 {
291 	struct uio auio;
292 	struct iovec aiov;
293 	int error;
294 
295 	auio.uio_iov = &aiov;
296 	auio.uio_iovcnt = 1;
297 	aiov.iov_base = base;
298 	aiov.iov_len = len;
299 	auio.uio_resid = len;
300 	auio.uio_offset = offset;
301 	auio.uio_segflg = segflg;
302 	auio.uio_rw = rw;
303 	auio.uio_procp = p;
304 
305 	if ((ioflg & IO_NODELOCKED) == 0)
306 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
307 	if (rw == UIO_READ) {
308 		error = VOP_READ(vp, &auio, ioflg, cred);
309 	} else {
310 		error = VOP_WRITE(vp, &auio, ioflg, cred);
311 	}
312 	if ((ioflg & IO_NODELOCKED) == 0)
313 		VOP_UNLOCK(vp, 0, p);
314 
315 	if (aresid)
316 		*aresid = auio.uio_resid;
317 	else
318 		if (auio.uio_resid && error == 0)
319 			error = EIO;
320 	return (error);
321 }
322 
323 /*
324  * File table vnode read routine.
325  */
326 int
327 vn_read(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred)
328 {
329 	struct vnode *vp = (struct vnode *)fp->f_data;
330 	int error = 0;
331 	size_t count = uio->uio_resid;
332 	struct proc *p = uio->uio_procp;
333 
334 	/* no wrap around of offsets except on character devices */
335 	if (vp->v_type != VCHR && count > LLONG_MAX - *poff)
336 		return (EINVAL);
337 
338 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
339 	uio->uio_offset = *poff;
340 	if (vp->v_type != VDIR)
341 		error = VOP_READ(vp, uio,
342 		    (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, cred);
343 	*poff += count - uio->uio_resid;
344 	VOP_UNLOCK(vp, 0, p);
345 	return (error);
346 }
347 
348 /*
349  * File table vnode write routine.
350  */
351 int
352 vn_write(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred)
353 {
354 	struct vnode *vp = (struct vnode *)fp->f_data;
355 	struct proc *p = uio->uio_procp;
356 	int error, ioflag = IO_UNIT;
357 	size_t count;
358 
359 	/* note: pwrite/pwritev are unaffected by O_APPEND */
360 	if (vp->v_type == VREG && (fp->f_flag & O_APPEND) &&
361 	    poff == &fp->f_offset)
362 		ioflag |= IO_APPEND;
363 	if (fp->f_flag & FNONBLOCK)
364 		ioflag |= IO_NDELAY;
365 	if ((fp->f_flag & FFSYNC) ||
366 	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
367 		ioflag |= IO_SYNC;
368 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
369 	uio->uio_offset = *poff;
370 	count = uio->uio_resid;
371 	error = VOP_WRITE(vp, uio, ioflag, cred);
372 	if (ioflag & IO_APPEND)
373 		*poff = uio->uio_offset;
374 	else
375 		*poff += count - uio->uio_resid;
376 	VOP_UNLOCK(vp, 0, p);
377 	return (error);
378 }
379 
380 /*
381  * File table wrapper for vn_stat
382  */
383 int
384 vn_statfile(struct file *fp, struct stat *sb, struct proc *p)
385 {
386 	struct vnode *vp = (struct vnode *)fp->f_data;
387 	return vn_stat(vp, sb, p);
388 }
389 
390 /*
391  * vnode stat routine.
392  */
393 int
394 vn_stat(struct vnode *vp, struct stat *sb, struct proc *p)
395 {
396 	struct vattr va;
397 	int error;
398 	mode_t mode;
399 
400 	error = VOP_GETATTR(vp, &va, p->p_ucred, p);
401 	if (error)
402 		return (error);
403 	/*
404 	 * Copy from vattr table
405 	 */
406 	memset(sb, 0, sizeof(*sb));
407 	sb->st_dev = va.va_fsid;
408 	sb->st_ino = va.va_fileid;
409 	mode = va.va_mode;
410 	switch (vp->v_type) {
411 	case VREG:
412 		mode |= S_IFREG;
413 		break;
414 	case VDIR:
415 		mode |= S_IFDIR;
416 		break;
417 	case VBLK:
418 		mode |= S_IFBLK;
419 		break;
420 	case VCHR:
421 		mode |= S_IFCHR;
422 		break;
423 	case VLNK:
424 		mode |= S_IFLNK;
425 		break;
426 	case VSOCK:
427 		mode |= S_IFSOCK;
428 		break;
429 	case VFIFO:
430 		mode |= S_IFIFO;
431 		break;
432 	default:
433 		return (EBADF);
434 	}
435 	sb->st_mode = mode;
436 	sb->st_nlink = va.va_nlink;
437 	sb->st_uid = va.va_uid;
438 	sb->st_gid = va.va_gid;
439 	sb->st_rdev = va.va_rdev;
440 	sb->st_size = va.va_size;
441 	sb->st_atim.tv_sec  = va.va_atime.tv_sec;
442 	sb->st_atim.tv_nsec = va.va_atime.tv_nsec;
443 	sb->st_mtim.tv_sec  = va.va_mtime.tv_sec;
444 	sb->st_mtim.tv_nsec = va.va_mtime.tv_nsec;
445 	sb->st_ctim.tv_sec  = va.va_ctime.tv_sec;
446 	sb->st_ctim.tv_nsec = va.va_ctime.tv_nsec;
447 	sb->st_blksize = va.va_blocksize;
448 	sb->st_flags = va.va_flags;
449 	sb->st_gen = va.va_gen;
450 	sb->st_blocks = va.va_bytes / S_BLKSIZE;
451 	return (0);
452 }
453 
454 /*
455  * File table vnode ioctl routine.
456  */
457 int
458 vn_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p)
459 {
460 	struct vnode *vp = ((struct vnode *)fp->f_data);
461 	struct vattr vattr;
462 	int error;
463 
464 	switch (vp->v_type) {
465 
466 	case VREG:
467 	case VDIR:
468 		if (com == FIONREAD) {
469 			error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
470 			if (error)
471 				return (error);
472 			*(int *)data = vattr.va_size - fp->f_offset;
473 			return (0);
474 		}
475 		if (com == FIONBIO || com == FIOASYNC)  /* XXX */
476 			return (0);			/* XXX */
477 		/* FALLTHROUGH */
478 	default:
479 		return (ENOTTY);
480 
481 	case VFIFO:
482 	case VCHR:
483 	case VBLK:
484 		error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
485 		if (error == 0 && com == TIOCSCTTY) {
486 			struct session *s = p->p_p->ps_session;
487 			struct vnode *ovp = s->s_ttyvp;
488 			s->s_ttyvp = vp;
489 			vref(vp);
490 			if (ovp)
491 				vrele(ovp);
492 		}
493 		return (error);
494 	}
495 }
496 
497 /*
498  * File table vnode poll routine.
499  */
500 int
501 vn_poll(struct file *fp, int events, struct proc *p)
502 {
503 	return (VOP_POLL(((struct vnode *)fp->f_data), events, p));
504 }
505 
506 /*
507  * Check that the vnode is still valid, and if so
508  * acquire requested lock.
509  */
510 int
511 vn_lock(struct vnode *vp, int flags, struct proc *p)
512 {
513 	int error;
514 
515 	if ((flags & LK_RECURSEFAIL) == 0)
516 		flags |= LK_CANRECURSE;
517 
518 	do {
519 		if (vp->v_flag & VXLOCK) {
520 			vp->v_flag |= VXWANT;
521 			tsleep(vp, PINOD, "vn_lock", 0);
522 			error = ENOENT;
523 		} else {
524 			error = VOP_LOCK(vp, flags, p);
525 			if (error == 0)
526 				return (error);
527 		}
528 	} while (flags & LK_RETRY);
529 	return (error);
530 }
531 
532 /*
533  * File table vnode close routine.
534  */
535 int
536 vn_closefile(struct file *fp, struct proc *p)
537 {
538 	struct vnode *vp = fp->f_data;
539 	struct flock lf;
540 
541 	if ((fp->f_iflags & FIF_HASLOCK)) {
542 		lf.l_whence = SEEK_SET;
543 		lf.l_start = 0;
544 		lf.l_len = 0;
545 		lf.l_type = F_UNLCK;
546 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
547 	}
548 
549 	return (vn_close(vp, fp->f_flag, fp->f_cred, p));
550 }
551 
552 int
553 vn_kqfilter(struct file *fp, struct knote *kn)
554 {
555 	return (VOP_KQFILTER(((struct vnode *)fp->f_data), kn));
556 }
557 
558 /*
559  * Common code for vnode access operations.
560  */
561 
562 /* Check if a directory can be found inside another in the hierarchy */
563 int
564 vn_isunder(struct vnode *lvp, struct vnode *rvp, struct proc *p)
565 {
566 	int error;
567 
568 	error = vfs_getcwd_common(lvp, rvp, NULL, NULL, MAXPATHLEN/2, 0, p);
569 
570 	if (!error)
571 		return (1);
572 
573 	return (0);
574 }
575