xref: /netbsd-src/sys/kern/sys_descrip.c (revision 404fbe5fb94ca1e054339640cabb2801ce52dd30)
1 /*	$NetBSD: sys_descrip.c,v 1.9 2009/01/11 02:45:52 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  * (c) UNIX System Laboratories, Inc.
33  * All or some portions of this file are derived from material licensed
34  * to the University of California by American Telephone and Telegraph
35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36  * the permission of UNIX System Laboratories, Inc.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. Neither the name of the University nor the names of its contributors
47  *    may be used to endorse or promote products derived from this software
48  *    without specific prior written permission.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  *
62  *	@(#)kern_descrip.c	8.8 (Berkeley) 2/14/95
63  */
64 
65 /*
66  * System calls on descriptors.
67  */
68 
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.9 2009/01/11 02:45:52 christos Exp $");
71 
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/kernel.h>
76 #include <sys/vnode.h>
77 #include <sys/proc.h>
78 #include <sys/file.h>
79 #include <sys/namei.h>
80 #include <sys/socket.h>
81 #include <sys/socketvar.h>
82 #include <sys/stat.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/malloc.h>
86 #include <sys/pool.h>
87 #include <sys/syslog.h>
88 #include <sys/unistd.h>
89 #include <sys/resourcevar.h>
90 #include <sys/conf.h>
91 #include <sys/event.h>
92 #include <sys/kauth.h>
93 #include <sys/atomic.h>
94 #include <sys/mount.h>
95 #include <sys/syscallargs.h>
96 
97 /*
98  * Duplicate a file descriptor.
99  */
100 int
101 sys_dup(struct lwp *l, const struct sys_dup_args *uap, register_t *retval)
102 {
103 	/* {
104 		syscallarg(int)	fd;
105 	} */
106 	int new, error, old;
107 	file_t *fp;
108 
109 	old = SCARG(uap, fd);
110 
111 	if ((fp = fd_getfile(old)) == NULL) {
112 		return EBADF;
113 	}
114 	error = fd_dup(fp, 0, &new, false);
115 	fd_putfile(old);
116 	*retval = new;
117 	return error;
118 }
119 
120 /*
121  * Duplicate a file descriptor to a particular value.
122  */
123 int
124 sys_dup2(struct lwp *l, const struct sys_dup2_args *uap, register_t *retval)
125 {
126 	/* {
127 		syscallarg(int)	from;
128 		syscallarg(int)	to;
129 	} */
130 	int old, new, error;
131 	file_t *fp;
132 
133 	old = SCARG(uap, from);
134 	new = SCARG(uap, to);
135 
136 	if ((fp = fd_getfile(old)) == NULL) {
137 		return EBADF;
138 	}
139 	mutex_enter(&fp->f_lock);
140 	fp->f_count++;
141 	mutex_exit(&fp->f_lock);
142 	fd_putfile(old);
143 
144 	if ((u_int)new >= curproc->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
145 	    (u_int)new >= maxfiles) {
146 		error = EBADF;
147 	} else if (old == new) {
148 		error = 0;
149 	} else {
150 		error = fd_dup2(fp, new);
151 	}
152 	closef(fp);
153 	*retval = new;
154 
155 	return error;
156 }
157 
158 /*
159  * fcntl call which is being passed to the file's fs.
160  */
161 static int
162 fcntl_forfs(int fd, file_t *fp, int cmd, void *arg)
163 {
164 	int		error;
165 	u_int		size;
166 	void		*data, *memp;
167 #define STK_PARAMS	128
168 	char		stkbuf[STK_PARAMS];
169 
170 	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
171 		return (EBADF);
172 
173 	/*
174 	 * Interpret high order word to find amount of data to be
175 	 * copied to/from the user's address space.
176 	 */
177 	size = (size_t)F_PARAM_LEN(cmd);
178 	if (size > F_PARAM_MAX)
179 		return (EINVAL);
180 	memp = NULL;
181 	if (size > sizeof(stkbuf)) {
182 		memp = kmem_alloc(size, KM_SLEEP);
183 		data = memp;
184 	} else
185 		data = stkbuf;
186 	if (cmd & F_FSIN) {
187 		if (size) {
188 			error = copyin(arg, data, size);
189 			if (error) {
190 				if (memp)
191 					kmem_free(memp, size);
192 				return (error);
193 			}
194 		} else
195 			*(void **)data = arg;
196 	} else if ((cmd & F_FSOUT) != 0 && size != 0) {
197 		/*
198 		 * Zero the buffer so the user always
199 		 * gets back something deterministic.
200 		 */
201 		memset(data, 0, size);
202 	} else if (cmd & F_FSVOID)
203 		*(void **)data = arg;
204 
205 
206 	error = (*fp->f_ops->fo_fcntl)(fp, cmd, data);
207 
208 	/*
209 	 * Copy any data to user, size was
210 	 * already set and checked above.
211 	 */
212 	if (error == 0 && (cmd & F_FSOUT) && size)
213 		error = copyout(data, arg, size);
214 	if (memp)
215 		kmem_free(memp, size);
216 	return (error);
217 }
218 
219 int
220 do_fcntl_lock(int fd, int cmd, struct flock *fl)
221 {
222 	file_t *fp;
223 	vnode_t *vp;
224 	proc_t *p;
225 	int error, flg;
226 
227 	if ((fp = fd_getfile(fd)) == NULL)
228 		return EBADF;
229 	if (fp->f_type != DTYPE_VNODE) {
230 		fd_putfile(fd);
231 		return EINVAL;
232 	}
233 	vp = fp->f_data;
234 	if (fl->l_whence == SEEK_CUR)
235 		fl->l_start += fp->f_offset;
236 
237 	flg = F_POSIX;
238 	p = curproc;
239 
240 	switch (cmd) {
241 	case F_SETLKW:
242 		flg |= F_WAIT;
243 		/* Fall into F_SETLK */
244 
245 	case F_SETLK:
246 		switch (fl->l_type) {
247 		case F_RDLCK:
248 			if ((fp->f_flag & FREAD) == 0) {
249 				error = EBADF;
250 				break;
251 			}
252 			if ((p->p_flag & PK_ADVLOCK) == 0) {
253 				mutex_enter(p->p_lock);
254 				p->p_flag |= PK_ADVLOCK;
255 				mutex_exit(p->p_lock);
256 			}
257 			error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg);
258 			break;
259 
260 		case F_WRLCK:
261 			if ((fp->f_flag & FWRITE) == 0) {
262 				error = EBADF;
263 				break;
264 			}
265 			if ((p->p_flag & PK_ADVLOCK) == 0) {
266 				mutex_enter(p->p_lock);
267 				p->p_flag |= PK_ADVLOCK;
268 				mutex_exit(p->p_lock);
269 			}
270 			error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg);
271 			break;
272 
273 		case F_UNLCK:
274 			error = VOP_ADVLOCK(vp, p, F_UNLCK, fl, F_POSIX);
275 			break;
276 
277 		default:
278 			error = EINVAL;
279 			break;
280 		}
281 		break;
282 
283 	case F_GETLK:
284 		if (fl->l_type != F_RDLCK &&
285 		    fl->l_type != F_WRLCK &&
286 		    fl->l_type != F_UNLCK) {
287 			error = EINVAL;
288 			break;
289 		}
290 		error = VOP_ADVLOCK(vp, p, F_GETLK, fl, F_POSIX);
291 		break;
292 
293 	default:
294 		error = EINVAL;
295 		break;
296 	}
297 
298 	fd_putfile(fd);
299 	return error;
300 }
301 
302 /*
303  * The file control system call.
304  */
305 int
306 sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval)
307 {
308 	/* {
309 		syscallarg(int)		fd;
310 		syscallarg(int)		cmd;
311 		syscallarg(void *)	arg;
312 	} */
313 	int fd, i, tmp, error, cmd, newmin;
314 	filedesc_t *fdp;
315 	file_t *fp;
316 	fdfile_t *ff;
317 	struct flock fl;
318 
319 	fd = SCARG(uap, fd);
320 	cmd = SCARG(uap, cmd);
321 	fdp = l->l_fd;
322 	error = 0;
323 
324 	switch (cmd) {
325 	case F_CLOSEM:
326 		if (fd < 0)
327 			return EBADF;
328 		while ((i = fdp->fd_lastfile) >= fd) {
329 			if (fd_getfile(i) == NULL) {
330 				/* Another thread has updated. */
331 				continue;
332 			}
333 			fd_close(i);
334 		}
335 		return 0;
336 
337 	case F_MAXFD:
338 		*retval = fdp->fd_lastfile;
339 		return 0;
340 
341 	case F_SETLKW:
342 	case F_SETLK:
343 	case F_GETLK:
344 		error = copyin(SCARG(uap, arg), &fl, sizeof(fl));
345 		if (error)
346 			return error;
347 		error = do_fcntl_lock(fd, cmd, &fl);
348 		if (cmd == F_GETLK && error == 0)
349 			error = copyout(&fl, SCARG(uap, arg), sizeof(fl));
350 		return error;
351 
352 	default:
353 		/* Handled below */
354 		break;
355 	}
356 
357 	if ((fp = fd_getfile(fd)) == NULL)
358 		return (EBADF);
359 	ff = fdp->fd_ofiles[fd];
360 
361 	if ((cmd & F_FSCTL)) {
362 		error = fcntl_forfs(fd, fp, cmd, SCARG(uap, arg));
363 		fd_putfile(fd);
364 		return error;
365 	}
366 
367 	switch (cmd) {
368 	case F_DUPFD:
369 		newmin = (long)SCARG(uap, arg);
370 		if ((u_int)newmin >=
371 		    l->l_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
372 		    (u_int)newmin >= maxfiles) {
373 			fd_putfile(fd);
374 			return EINVAL;
375 		}
376 		error = fd_dup(fp, newmin, &i, false);
377 		*retval = i;
378 		break;
379 
380 	case F_GETFD:
381 		*retval = ff->ff_exclose;
382 		break;
383 
384 	case F_SETFD:
385 		if ((long)SCARG(uap, arg) & 1) {
386 			ff->ff_exclose = true;
387 			fdp->fd_exclose = true;
388 		} else {
389 			ff->ff_exclose = false;
390 		}
391 		break;
392 
393 	case F_GETFL:
394 		*retval = OFLAGS(fp->f_flag);
395 		break;
396 
397 	case F_SETFL:
398 		/* XXX not guaranteed to be atomic. */
399 		tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
400 		error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp);
401 		if (error)
402 			break;
403 		i = tmp ^ fp->f_flag;
404 		if (i & FNONBLOCK) {
405 			int flgs = tmp & FNONBLOCK;
406 			error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs);
407 			if (error) {
408 				(*fp->f_ops->fo_fcntl)(fp, F_SETFL,
409 				    &fp->f_flag);
410 				break;
411 			}
412 		}
413 		if (i & FASYNC) {
414 			int flgs = tmp & FASYNC;
415 			error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs);
416 			if (error) {
417 				if (i & FNONBLOCK) {
418 					tmp = fp->f_flag & FNONBLOCK;
419 					(void)(*fp->f_ops->fo_ioctl)(fp,
420 						FIONBIO, &tmp);
421 				}
422 				(*fp->f_ops->fo_fcntl)(fp, F_SETFL,
423 				    &fp->f_flag);
424 				break;
425 			}
426 		}
427 		fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp;
428 		break;
429 
430 	case F_GETOWN:
431 		error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp);
432 		*retval = tmp;
433 		break;
434 
435 	case F_SETOWN:
436 		tmp = (int)(uintptr_t) SCARG(uap, arg);
437 		error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp);
438 		break;
439 
440 	default:
441 		error = EINVAL;
442 	}
443 
444 	fd_putfile(fd);
445 	return (error);
446 }
447 
448 /*
449  * Close a file descriptor.
450  */
451 int
452 sys_close(struct lwp *l, const struct sys_close_args *uap, register_t *retval)
453 {
454 	/* {
455 		syscallarg(int)	fd;
456 	} */
457 
458 	if (fd_getfile(SCARG(uap, fd)) == NULL) {
459 		return EBADF;
460 	}
461 	return fd_close(SCARG(uap, fd));
462 }
463 
464 /*
465  * Return status information about a file descriptor.
466  * Common function for compat code.
467  */
468 int
469 do_sys_fstat(int fd, struct stat *sb)
470 {
471 	file_t *fp;
472 	int error;
473 
474 	if ((fp = fd_getfile(fd)) == NULL) {
475 		return EBADF;
476 	}
477 	error = (*fp->f_ops->fo_stat)(fp, sb);
478 	fd_putfile(fd);
479 
480 	return error;
481 }
482 
483 /*
484  * Return status information about a file descriptor.
485  */
486 int
487 sys___fstat50(struct lwp *l, const struct sys___fstat50_args *uap,
488 	      register_t *retval)
489 {
490 	/* {
491 		syscallarg(int)			fd;
492 		syscallarg(struct stat *)	sb;
493 	} */
494 	struct stat sb;
495 	int error;
496 
497 	error = do_sys_fstat(SCARG(uap, fd), &sb);
498 	if (error == 0) {
499 		error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
500 	}
501 	return error;
502 }
503 
504 /*
505  * Return pathconf information about a file descriptor.
506  */
507 int
508 sys_fpathconf(struct lwp *l, const struct sys_fpathconf_args *uap,
509 	      register_t *retval)
510 {
511 	/* {
512 		syscallarg(int)	fd;
513 		syscallarg(int)	name;
514 	} */
515 	int fd, error;
516 	file_t *fp;
517 
518 	fd = SCARG(uap, fd);
519 	error = 0;
520 
521 	if ((fp = fd_getfile(fd)) == NULL) {
522 		return (EBADF);
523 	}
524 	switch (fp->f_type) {
525 	case DTYPE_SOCKET:
526 	case DTYPE_PIPE:
527 		if (SCARG(uap, name) != _PC_PIPE_BUF)
528 			error = EINVAL;
529 		else
530 			*retval = PIPE_BUF;
531 		break;
532 
533 	case DTYPE_VNODE:
534 		error = VOP_PATHCONF(fp->f_data, SCARG(uap, name), retval);
535 		break;
536 
537 	case DTYPE_KQUEUE:
538 		error = EINVAL;
539 		break;
540 
541 	default:
542 		error = EOPNOTSUPP;
543 		break;
544 	}
545 
546 	fd_putfile(fd);
547 	return (error);
548 }
549 
550 /*
551  * Apply an advisory lock on a file descriptor.
552  *
553  * Just attempt to get a record lock of the requested type on
554  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
555  */
556 /* ARGSUSED */
557 int
558 sys_flock(struct lwp *l, const struct sys_flock_args *uap, register_t *retval)
559 {
560 	/* {
561 		syscallarg(int)	fd;
562 		syscallarg(int)	how;
563 	} */
564 	int fd, how, error;
565 	file_t *fp;
566 	vnode_t	*vp;
567 	struct flock lf;
568 	proc_t *p;
569 
570 	fd = SCARG(uap, fd);
571 	how = SCARG(uap, how);
572 	error = 0;
573 
574 	if ((fp = fd_getfile(fd)) == NULL) {
575 		return EBADF;
576 	}
577 	if (fp->f_type != DTYPE_VNODE) {
578 		fd_putfile(fd);
579 		return EOPNOTSUPP;
580 	}
581 
582 	vp = fp->f_data;
583 	lf.l_whence = SEEK_SET;
584 	lf.l_start = 0;
585 	lf.l_len = 0;
586 	if (how & LOCK_UN) {
587 		lf.l_type = F_UNLCK;
588 		atomic_and_uint(&fp->f_flag, ~FHASLOCK);
589 		error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
590 		fd_putfile(fd);
591 		return error;
592 	}
593 	if (how & LOCK_EX) {
594 		lf.l_type = F_WRLCK;
595 	} else if (how & LOCK_SH) {
596 		lf.l_type = F_RDLCK;
597 	} else {
598 		fd_putfile(fd);
599 		return EINVAL;
600 	}
601 	atomic_or_uint(&fp->f_flag, FHASLOCK);
602 	p = curproc;
603 	if (how & LOCK_NB) {
604 		error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK);
605 	} else {
606 		error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK|F_WAIT);
607 	}
608 	fd_putfile(fd);
609 	return error;
610 }
611 
612 int
613 do_posix_fadvise(int fd, off_t offset, off_t len, int advice)
614 {
615 	file_t *fp;
616 	int error;
617 
618 	if ((fp = fd_getfile(fd)) == NULL) {
619 		return EBADF;
620 	}
621 	if (fp->f_type != DTYPE_VNODE) {
622 		if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
623 			error = ESPIPE;
624 		} else {
625 			error = EOPNOTSUPP;
626 		}
627 		fd_putfile(fd);
628 		return error;
629 	}
630 
631 	switch (advice) {
632 	case POSIX_FADV_NORMAL:
633 	case POSIX_FADV_RANDOM:
634 	case POSIX_FADV_SEQUENTIAL:
635 		KASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL);
636 		KASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM);
637 		KASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL);
638 
639 		/*
640 		 * We ignore offset and size.  must lock the file to
641 		 * do this, as f_advice is sub-word sized.
642 		 */
643 		mutex_enter(&fp->f_lock);
644 		fp->f_advice = (u_char)advice;
645 		mutex_exit(&fp->f_lock);
646 		error = 0;
647 		break;
648 
649 	case POSIX_FADV_WILLNEED:
650 	case POSIX_FADV_DONTNEED:
651 	case POSIX_FADV_NOREUSE:
652 		/* Not implemented yet. */
653 		error = 0;
654 		break;
655 	default:
656 		error = EINVAL;
657 		break;
658 	}
659 
660 	fd_putfile(fd);
661 	return error;
662 }
663 
664 int
665 sys___posix_fadvise50(struct lwp *l,
666 		      const struct sys___posix_fadvise50_args *uap,
667 		      register_t *retval)
668 {
669 	/* {
670 		syscallarg(int) fd;
671 		syscallarg(int) pad;
672 		syscallarg(off_t) offset;
673 		syscallarg(off_t) len;
674 		syscallarg(int) advice;
675 	} */
676 
677 	return do_posix_fadvise(SCARG(uap, fd), SCARG(uap, offset),
678 	    SCARG(uap, len), SCARG(uap, advice));
679 }
680