xref: /netbsd-src/sys/kern/sys_descrip.c (revision 8ac07aec990b9d2e483062509d0a9fa5b4f57cf2)
1 /*	$NetBSD: sys_descrip.c,v 1.2 2008/04/24 18:39:24 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the NetBSD
18  *	Foundation, Inc. and its contributors.
19  * 4. Neither the name of The NetBSD Foundation nor the names of its
20  *    contributors may be used to endorse or promote products derived
21  *    from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /*
37  * Copyright (c) 1982, 1986, 1989, 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  * (c) UNIX System Laboratories, Inc.
40  * All or some portions of this file are derived from material licensed
41  * to the University of California by American Telephone and Telegraph
42  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
43  * the permission of UNIX System Laboratories, Inc.
44  *
45  * Redistribution and use in source and binary forms, with or without
46  * modification, are permitted provided that the following conditions
47  * are met:
48  * 1. Redistributions of source code must retain the above copyright
49  *    notice, this list of conditions and the following disclaimer.
50  * 2. Redistributions in binary form must reproduce the above copyright
51  *    notice, this list of conditions and the following disclaimer in the
52  *    documentation and/or other materials provided with the distribution.
53  * 3. Neither the name of the University nor the names of its contributors
54  *    may be used to endorse or promote products derived from this software
55  *    without specific prior written permission.
56  *
57  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
58  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
59  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
60  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
61  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
62  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
63  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
64  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
65  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67  * SUCH DAMAGE.
68  *
69  *	@(#)kern_descrip.c	8.8 (Berkeley) 2/14/95
70  */
71 
72 /*
73  * System calls on descriptors.
74  */
75 
76 #include <sys/cdefs.h>
77 __KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.2 2008/04/24 18:39:24 ad Exp $");
78 
79 #include <sys/param.h>
80 #include <sys/systm.h>
81 #include <sys/filedesc.h>
82 #include <sys/kernel.h>
83 #include <sys/vnode.h>
84 #include <sys/proc.h>
85 #include <sys/file.h>
86 #include <sys/namei.h>
87 #include <sys/socket.h>
88 #include <sys/socketvar.h>
89 #include <sys/stat.h>
90 #include <sys/ioctl.h>
91 #include <sys/fcntl.h>
92 #include <sys/malloc.h>
93 #include <sys/pool.h>
94 #include <sys/syslog.h>
95 #include <sys/unistd.h>
96 #include <sys/resourcevar.h>
97 #include <sys/conf.h>
98 #include <sys/event.h>
99 #include <sys/kauth.h>
100 #include <sys/atomic.h>
101 #include <sys/mount.h>
102 #include <sys/syscallargs.h>
103 
104 /*
105  * Duplicate a file descriptor.
106  */
107 int
108 sys_dup(struct lwp *l, const struct sys_dup_args *uap, register_t *retval)
109 {
110 	/* {
111 		syscallarg(int)	fd;
112 	} */
113 	int new, error, old;
114 	file_t *fp;
115 
116 	old = SCARG(uap, fd);
117 
118 	if ((fp = fd_getfile(old)) == NULL) {
119 		return EBADF;
120 	}
121 	error = fd_dup(fp, 0, &new, 0);
122 	fd_putfile(old);
123 	*retval = new;
124 	return error;
125 }
126 
127 /*
128  * Duplicate a file descriptor to a particular value.
129  */
130 int
131 sys_dup2(struct lwp *l, const struct sys_dup2_args *uap, register_t *retval)
132 {
133 	/* {
134 		syscallarg(int)	from;
135 		syscallarg(int)	to;
136 	} */
137 	int old, new, error;
138 	file_t *fp;
139 
140 	old = SCARG(uap, from);
141 	new = SCARG(uap, to);
142 
143 	if ((fp = fd_getfile(old)) == NULL) {
144 		return EBADF;
145 	}
146 	if ((u_int)new >= curproc->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
147 	    (u_int)new >= maxfiles) {
148 		error = EBADF;
149 	} else if (old == new) {
150 		error = 0;
151 	} else {
152 		error = fd_dup2(fp, new);
153 	}
154 	fd_putfile(old);
155 	*retval = new;
156 
157 	return 0;
158 }
159 
160 /*
161  * fcntl call which is being passed to the file's fs.
162  */
163 static int
164 fcntl_forfs(int fd, file_t *fp, int cmd, void *arg)
165 {
166 	int		error;
167 	u_int		size;
168 	void		*data, *memp;
169 #define STK_PARAMS	128
170 	char		stkbuf[STK_PARAMS];
171 
172 	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
173 		return (EBADF);
174 
175 	/*
176 	 * Interpret high order word to find amount of data to be
177 	 * copied to/from the user's address space.
178 	 */
179 	size = (size_t)F_PARAM_LEN(cmd);
180 	if (size > F_PARAM_MAX)
181 		return (EINVAL);
182 	memp = NULL;
183 	if (size > sizeof(stkbuf)) {
184 		memp = kmem_alloc(size, KM_SLEEP);
185 		data = memp;
186 	} else
187 		data = stkbuf;
188 	if (cmd & F_FSIN) {
189 		if (size) {
190 			error = copyin(arg, data, size);
191 			if (error) {
192 				if (memp)
193 					kmem_free(memp, size);
194 				return (error);
195 			}
196 		} else
197 			*(void **)data = arg;
198 	} else if ((cmd & F_FSOUT) != 0 && size != 0) {
199 		/*
200 		 * Zero the buffer so the user always
201 		 * gets back something deterministic.
202 		 */
203 		memset(data, 0, size);
204 	} else if (cmd & F_FSVOID)
205 		*(void **)data = arg;
206 
207 
208 	error = (*fp->f_ops->fo_fcntl)(fp, cmd, data);
209 
210 	/*
211 	 * Copy any data to user, size was
212 	 * already set and checked above.
213 	 */
214 	if (error == 0 && (cmd & F_FSOUT) && size)
215 		error = copyout(data, arg, size);
216 	if (memp)
217 		kmem_free(memp, size);
218 	return (error);
219 }
220 
221 int
222 do_fcntl_lock(int fd, int cmd, struct flock *fl)
223 {
224 	file_t *fp;
225 	vnode_t *vp;
226 	proc_t *p;
227 	int error, flg;
228 
229 	if ((fp = fd_getfile(fd)) == NULL)
230 		return EBADF;
231 	if (fp->f_type != DTYPE_VNODE) {
232 		fd_putfile(fd);
233 		return EINVAL;
234 	}
235 	vp = fp->f_data;
236 	if (fl->l_whence == SEEK_CUR)
237 		fl->l_start += fp->f_offset;
238 
239 	flg = F_POSIX;
240 	p = curproc;
241 
242 	switch (cmd) {
243 	case F_SETLKW:
244 		flg |= F_WAIT;
245 		/* Fall into F_SETLK */
246 
247 	case F_SETLK:
248 		switch (fl->l_type) {
249 		case F_RDLCK:
250 			if ((fp->f_flag & FREAD) == 0) {
251 				error = EBADF;
252 				break;
253 			}
254 			if ((p->p_flag & PK_ADVLOCK) == 0) {
255 				mutex_enter(p->p_lock);
256 				p->p_flag |= PK_ADVLOCK;
257 				mutex_exit(p->p_lock);
258 			}
259 			error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg);
260 			break;
261 
262 		case F_WRLCK:
263 			if ((fp->f_flag & FWRITE) == 0) {
264 				error = EBADF;
265 				break;
266 			}
267 			if ((p->p_flag & PK_ADVLOCK) == 0) {
268 				mutex_enter(p->p_lock);
269 				p->p_flag |= PK_ADVLOCK;
270 				mutex_exit(p->p_lock);
271 			}
272 			error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg);
273 			break;
274 
275 		case F_UNLCK:
276 			error = VOP_ADVLOCK(vp, p, F_UNLCK, fl, F_POSIX);
277 			break;
278 
279 		default:
280 			error = EINVAL;
281 			break;
282 		}
283 		break;
284 
285 	case F_GETLK:
286 		if (fl->l_type != F_RDLCK &&
287 		    fl->l_type != F_WRLCK &&
288 		    fl->l_type != F_UNLCK) {
289 			error = EINVAL;
290 			break;
291 		}
292 		error = VOP_ADVLOCK(vp, p, F_GETLK, fl, F_POSIX);
293 		break;
294 
295 	default:
296 		error = EINVAL;
297 		break;
298 	}
299 
300 	fd_putfile(fd);
301 	return error;
302 }
303 
304 /*
305  * The file control system call.
306  */
307 int
308 sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval)
309 {
310 	/* {
311 		syscallarg(int)		fd;
312 		syscallarg(int)		cmd;
313 		syscallarg(void *)	arg;
314 	} */
315 	int fd, i, tmp, error, cmd, newmin;
316 	filedesc_t *fdp;
317 	file_t *fp;
318 	fdfile_t *ff;
319 	proc_t *p;
320 	struct flock fl;
321 
322 	p = l->l_proc;
323 	fd = SCARG(uap, fd);
324 	cmd = SCARG(uap, cmd);
325 	fdp = p->p_fd;
326 	error = 0;
327 
328 	switch (cmd) {
329 	case F_CLOSEM:
330 		if (fd < 0)
331 			return EBADF;
332 		while ((i = fdp->fd_lastfile) >= fd) {
333 			if (fd_getfile(i) == NULL) {
334 				/* Another thread has updated. */
335 				continue;
336 			}
337 			fd_close(i);
338 		}
339 		return 0;
340 
341 	case F_MAXFD:
342 		*retval = fdp->fd_lastfile;
343 		return 0;
344 
345 	case F_SETLKW:
346 	case F_SETLK:
347 	case F_GETLK:
348 		error = copyin(SCARG(uap, arg), &fl, sizeof(fl));
349 		if (error)
350 			return error;
351 		error = do_fcntl_lock(fd, cmd, &fl);
352 		if (cmd == F_GETLK && error == 0)
353 			error = copyout(&fl, SCARG(uap, arg), sizeof(fl));
354 		return error;
355 
356 	default:
357 		/* Handled below */
358 		break;
359 	}
360 
361 	if ((fp = fd_getfile(fd)) == NULL)
362 		return (EBADF);
363 	ff = fdp->fd_ofiles[fd];
364 
365 	if ((cmd & F_FSCTL)) {
366 		error = fcntl_forfs(fd, fp, cmd, SCARG(uap, arg));
367 		fd_putfile(fd);
368 		return error;
369 	}
370 
371 	switch (cmd) {
372 	case F_DUPFD:
373 		newmin = (long)SCARG(uap, arg);
374 		if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
375 		    (u_int)newmin >= maxfiles) {
376 			fd_putfile(fd);
377 			return EINVAL;
378 		}
379 		error = fd_dup(fp, newmin, &i, 0);
380 		*retval = i;
381 		break;
382 
383 	case F_GETFD:
384 		*retval = ff->ff_exclose;
385 		break;
386 
387 	case F_SETFD:
388 		if ((long)SCARG(uap, arg) & 1) {
389 			ff->ff_exclose = 1;
390 			fdp->fd_exclose = 1;
391 		} else {
392 			ff->ff_exclose = 0;
393 		}
394 		break;
395 
396 	case F_GETFL:
397 		*retval = OFLAGS(fp->f_flag);
398 		break;
399 
400 	case F_SETFL:
401 		/* XXX not guaranteed to be atomic. */
402 		tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
403 		error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp);
404 		if (error)
405 			break;
406 		i = tmp ^ fp->f_flag;
407 		if (i & FNONBLOCK) {
408 			int flgs = tmp & FNONBLOCK;
409 			error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs);
410 			if (error) {
411 				(*fp->f_ops->fo_fcntl)(fp, F_SETFL,
412 				    &fp->f_flag);
413 				break;
414 			}
415 		}
416 		if (i & FASYNC) {
417 			int flgs = tmp & FASYNC;
418 			error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs);
419 			if (error) {
420 				if (i & FNONBLOCK) {
421 					tmp = fp->f_flag & FNONBLOCK;
422 					(void)(*fp->f_ops->fo_ioctl)(fp,
423 						FIONBIO, &tmp);
424 				}
425 				(*fp->f_ops->fo_fcntl)(fp, F_SETFL,
426 				    &fp->f_flag);
427 				break;
428 			}
429 		}
430 		fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp;
431 		break;
432 
433 	case F_GETOWN:
434 		error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp);
435 		*retval = tmp;
436 		break;
437 
438 	case F_SETOWN:
439 		tmp = (int)(intptr_t) SCARG(uap, arg);
440 		error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp);
441 		break;
442 
443 	default:
444 		error = EINVAL;
445 	}
446 
447 	fd_putfile(fd);
448 	return (error);
449 }
450 
451 /*
452  * Close a file descriptor.
453  */
454 int
455 sys_close(struct lwp *l, const struct sys_close_args *uap, register_t *retval)
456 {
457 	/* {
458 		syscallarg(int)	fd;
459 	} */
460 
461 	if (fd_getfile(SCARG(uap, fd)) == NULL) {
462 		return EBADF;
463 	}
464 	return fd_close(SCARG(uap, fd));
465 }
466 
467 /*
468  * Return status information about a file descriptor.
469  * Common function for compat code.
470  */
471 int
472 do_sys_fstat(int fd, struct stat *sb)
473 {
474 	file_t *fp;
475 	int error;
476 
477 	if ((fp = fd_getfile(fd)) == NULL) {
478 		return EBADF;
479 	}
480 	error = (*fp->f_ops->fo_stat)(fp, sb);
481 	fd_putfile(fd);
482 
483 	return error;
484 }
485 
486 /*
487  * Return status information about a file descriptor.
488  */
489 int
490 sys___fstat30(struct lwp *l, const struct sys___fstat30_args *uap,
491 	      register_t *retval)
492 {
493 	/* {
494 		syscallarg(int)			fd;
495 		syscallarg(struct stat *)	sb;
496 	} */
497 	struct stat sb;
498 	int error;
499 
500 	error = do_sys_fstat(SCARG(uap, fd), &sb);
501 	if (error == 0) {
502 		error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
503 	}
504 	return error;
505 }
506 
507 /*
508  * Return pathconf information about a file descriptor.
509  */
510 int
511 sys_fpathconf(struct lwp *l, const struct sys_fpathconf_args *uap,
512 	      register_t *retval)
513 {
514 	/* {
515 		syscallarg(int)	fd;
516 		syscallarg(int)	name;
517 	} */
518 	int fd, error;
519 	file_t *fp;
520 
521 	fd = SCARG(uap, fd);
522 	error = 0;
523 
524 	if ((fp = fd_getfile(fd)) == NULL) {
525 		return (EBADF);
526 	}
527 	switch (fp->f_type) {
528 	case DTYPE_SOCKET:
529 	case DTYPE_PIPE:
530 		if (SCARG(uap, name) != _PC_PIPE_BUF)
531 			error = EINVAL;
532 		else
533 			*retval = PIPE_BUF;
534 		break;
535 
536 	case DTYPE_VNODE:
537 		error = VOP_PATHCONF(fp->f_data, SCARG(uap, name), retval);
538 		break;
539 
540 	case DTYPE_KQUEUE:
541 		error = EINVAL;
542 		break;
543 
544 	default:
545 		error = EOPNOTSUPP;
546 		break;
547 	}
548 
549 	fd_putfile(fd);
550 	return (error);
551 }
552 
553 /*
554  * Apply an advisory lock on a file descriptor.
555  *
556  * Just attempt to get a record lock of the requested type on
557  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
558  */
559 /* ARGSUSED */
560 int
561 sys_flock(struct lwp *l, const struct sys_flock_args *uap, register_t *retval)
562 {
563 	/* {
564 		syscallarg(int)	fd;
565 		syscallarg(int)	how;
566 	} */
567 	int fd, how, error;
568 	file_t *fp;
569 	vnode_t	*vp;
570 	struct flock lf;
571 	proc_t *p;
572 
573 	fd = SCARG(uap, fd);
574 	how = SCARG(uap, how);
575 	error = 0;
576 
577 	if ((fp = fd_getfile(fd)) == NULL) {
578 		return EBADF;
579 	}
580 	if (fp->f_type != DTYPE_VNODE) {
581 		fd_putfile(fd);
582 		return EOPNOTSUPP;
583 	}
584 
585 	vp = fp->f_data;
586 	lf.l_whence = SEEK_SET;
587 	lf.l_start = 0;
588 	lf.l_len = 0;
589 	if (how & LOCK_UN) {
590 		lf.l_type = F_UNLCK;
591 		atomic_and_uint(&fp->f_flag, ~FHASLOCK);
592 		error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
593 		fd_putfile(fd);
594 		return error;
595 	}
596 	if (how & LOCK_EX) {
597 		lf.l_type = F_WRLCK;
598 	} else if (how & LOCK_SH) {
599 		lf.l_type = F_RDLCK;
600 	} else {
601 		fd_putfile(fd);
602 		return EINVAL;
603 	}
604 	atomic_or_uint(&fp->f_flag, FHASLOCK);
605 	p = curproc;
606 	if (how & LOCK_NB) {
607 		error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK);
608 	} else {
609 		error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK|F_WAIT);
610 	}
611 	fd_putfile(fd);
612 	return error;
613 }
614 
615 int
616 do_posix_fadvise(int fd, off_t offset, off_t len, int advice)
617 {
618 	file_t *fp;
619 	int error;
620 
621 	if ((fp = fd_getfile(fd)) == NULL) {
622 		return EBADF;
623 	}
624 	if (fp->f_type != DTYPE_VNODE) {
625 		if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
626 			error = ESPIPE;
627 		} else {
628 			error = EOPNOTSUPP;
629 		}
630 		fd_putfile(fd);
631 		return error;
632 	}
633 
634 	switch (advice) {
635 	case POSIX_FADV_NORMAL:
636 	case POSIX_FADV_RANDOM:
637 	case POSIX_FADV_SEQUENTIAL:
638 		KASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL);
639 		KASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM);
640 		KASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL);
641 
642 		/*
643 		 * We ignore offset and size.  must lock the file to
644 		 * do this, as f_advice is sub-word sized.
645 		 */
646 		mutex_enter(&fp->f_lock);
647 		fp->f_advice = (u_char)advice;
648 		mutex_exit(&fp->f_lock);
649 		error = 0;
650 		break;
651 
652 	case POSIX_FADV_WILLNEED:
653 	case POSIX_FADV_DONTNEED:
654 	case POSIX_FADV_NOREUSE:
655 		/* Not implemented yet. */
656 		error = 0;
657 		break;
658 	default:
659 		error = EINVAL;
660 		break;
661 	}
662 
663 	fd_putfile(fd);
664 	return error;
665 }
666 
667 int
668 sys___posix_fadvise50(struct lwp *l,
669 		      const struct sys___posix_fadvise50_args *uap,
670 		      register_t *retval)
671 {
672 	/* {
673 		syscallarg(int) fd;
674 		syscallarg(int) pad;
675 		syscallarg(off_t) offset;
676 		syscallarg(off_t) len;
677 		syscallarg(int) advice;
678 	} */
679 
680 	return do_posix_fadvise(SCARG(uap, fd), SCARG(uap, offset),
681 	    SCARG(uap, len), SCARG(uap, advice));
682 }
683