xref: /netbsd-src/sys/kern/sys_descrip.c (revision cd22f25e6f6d1cc1f197fe8c5468a80f51d1c4e1)
1 /*	$NetBSD: sys_descrip.c,v 1.3 2008/04/28 20:24:04 martin Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
31  *	The Regents of the University of California.  All rights reserved.
32  * (c) UNIX System Laboratories, Inc.
33  * All or some portions of this file are derived from material licensed
34  * to the University of California by American Telephone and Telegraph
35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36  * the permission of UNIX System Laboratories, Inc.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. Neither the name of the University nor the names of its contributors
47  *    may be used to endorse or promote products derived from this software
48  *    without specific prior written permission.
49  *
50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60  * SUCH DAMAGE.
61  *
62  *	@(#)kern_descrip.c	8.8 (Berkeley) 2/14/95
63  */
64 
65 /*
66  * System calls on descriptors.
67  */
68 
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.3 2008/04/28 20:24:04 martin Exp $");
71 
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/kernel.h>
76 #include <sys/vnode.h>
77 #include <sys/proc.h>
78 #include <sys/file.h>
79 #include <sys/namei.h>
80 #include <sys/socket.h>
81 #include <sys/socketvar.h>
82 #include <sys/stat.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/malloc.h>
86 #include <sys/pool.h>
87 #include <sys/syslog.h>
88 #include <sys/unistd.h>
89 #include <sys/resourcevar.h>
90 #include <sys/conf.h>
91 #include <sys/event.h>
92 #include <sys/kauth.h>
93 #include <sys/atomic.h>
94 #include <sys/mount.h>
95 #include <sys/syscallargs.h>
96 
97 /*
98  * Duplicate a file descriptor.
99  */
100 int
101 sys_dup(struct lwp *l, const struct sys_dup_args *uap, register_t *retval)
102 {
103 	/* {
104 		syscallarg(int)	fd;
105 	} */
106 	int new, error, old;
107 	file_t *fp;
108 
109 	old = SCARG(uap, fd);
110 
111 	if ((fp = fd_getfile(old)) == NULL) {
112 		return EBADF;
113 	}
114 	error = fd_dup(fp, 0, &new, 0);
115 	fd_putfile(old);
116 	*retval = new;
117 	return error;
118 }
119 
120 /*
121  * Duplicate a file descriptor to a particular value.
122  */
123 int
124 sys_dup2(struct lwp *l, const struct sys_dup2_args *uap, register_t *retval)
125 {
126 	/* {
127 		syscallarg(int)	from;
128 		syscallarg(int)	to;
129 	} */
130 	int old, new, error;
131 	file_t *fp;
132 
133 	old = SCARG(uap, from);
134 	new = SCARG(uap, to);
135 
136 	if ((fp = fd_getfile(old)) == NULL) {
137 		return EBADF;
138 	}
139 	if ((u_int)new >= curproc->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
140 	    (u_int)new >= maxfiles) {
141 		error = EBADF;
142 	} else if (old == new) {
143 		error = 0;
144 	} else {
145 		error = fd_dup2(fp, new);
146 	}
147 	fd_putfile(old);
148 	*retval = new;
149 
150 	return 0;
151 }
152 
153 /*
154  * fcntl call which is being passed to the file's fs.
155  */
156 static int
157 fcntl_forfs(int fd, file_t *fp, int cmd, void *arg)
158 {
159 	int		error;
160 	u_int		size;
161 	void		*data, *memp;
162 #define STK_PARAMS	128
163 	char		stkbuf[STK_PARAMS];
164 
165 	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
166 		return (EBADF);
167 
168 	/*
169 	 * Interpret high order word to find amount of data to be
170 	 * copied to/from the user's address space.
171 	 */
172 	size = (size_t)F_PARAM_LEN(cmd);
173 	if (size > F_PARAM_MAX)
174 		return (EINVAL);
175 	memp = NULL;
176 	if (size > sizeof(stkbuf)) {
177 		memp = kmem_alloc(size, KM_SLEEP);
178 		data = memp;
179 	} else
180 		data = stkbuf;
181 	if (cmd & F_FSIN) {
182 		if (size) {
183 			error = copyin(arg, data, size);
184 			if (error) {
185 				if (memp)
186 					kmem_free(memp, size);
187 				return (error);
188 			}
189 		} else
190 			*(void **)data = arg;
191 	} else if ((cmd & F_FSOUT) != 0 && size != 0) {
192 		/*
193 		 * Zero the buffer so the user always
194 		 * gets back something deterministic.
195 		 */
196 		memset(data, 0, size);
197 	} else if (cmd & F_FSVOID)
198 		*(void **)data = arg;
199 
200 
201 	error = (*fp->f_ops->fo_fcntl)(fp, cmd, data);
202 
203 	/*
204 	 * Copy any data to user, size was
205 	 * already set and checked above.
206 	 */
207 	if (error == 0 && (cmd & F_FSOUT) && size)
208 		error = copyout(data, arg, size);
209 	if (memp)
210 		kmem_free(memp, size);
211 	return (error);
212 }
213 
214 int
215 do_fcntl_lock(int fd, int cmd, struct flock *fl)
216 {
217 	file_t *fp;
218 	vnode_t *vp;
219 	proc_t *p;
220 	int error, flg;
221 
222 	if ((fp = fd_getfile(fd)) == NULL)
223 		return EBADF;
224 	if (fp->f_type != DTYPE_VNODE) {
225 		fd_putfile(fd);
226 		return EINVAL;
227 	}
228 	vp = fp->f_data;
229 	if (fl->l_whence == SEEK_CUR)
230 		fl->l_start += fp->f_offset;
231 
232 	flg = F_POSIX;
233 	p = curproc;
234 
235 	switch (cmd) {
236 	case F_SETLKW:
237 		flg |= F_WAIT;
238 		/* Fall into F_SETLK */
239 
240 	case F_SETLK:
241 		switch (fl->l_type) {
242 		case F_RDLCK:
243 			if ((fp->f_flag & FREAD) == 0) {
244 				error = EBADF;
245 				break;
246 			}
247 			if ((p->p_flag & PK_ADVLOCK) == 0) {
248 				mutex_enter(p->p_lock);
249 				p->p_flag |= PK_ADVLOCK;
250 				mutex_exit(p->p_lock);
251 			}
252 			error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg);
253 			break;
254 
255 		case F_WRLCK:
256 			if ((fp->f_flag & FWRITE) == 0) {
257 				error = EBADF;
258 				break;
259 			}
260 			if ((p->p_flag & PK_ADVLOCK) == 0) {
261 				mutex_enter(p->p_lock);
262 				p->p_flag |= PK_ADVLOCK;
263 				mutex_exit(p->p_lock);
264 			}
265 			error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg);
266 			break;
267 
268 		case F_UNLCK:
269 			error = VOP_ADVLOCK(vp, p, F_UNLCK, fl, F_POSIX);
270 			break;
271 
272 		default:
273 			error = EINVAL;
274 			break;
275 		}
276 		break;
277 
278 	case F_GETLK:
279 		if (fl->l_type != F_RDLCK &&
280 		    fl->l_type != F_WRLCK &&
281 		    fl->l_type != F_UNLCK) {
282 			error = EINVAL;
283 			break;
284 		}
285 		error = VOP_ADVLOCK(vp, p, F_GETLK, fl, F_POSIX);
286 		break;
287 
288 	default:
289 		error = EINVAL;
290 		break;
291 	}
292 
293 	fd_putfile(fd);
294 	return error;
295 }
296 
297 /*
298  * The file control system call.
299  */
300 int
301 sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval)
302 {
303 	/* {
304 		syscallarg(int)		fd;
305 		syscallarg(int)		cmd;
306 		syscallarg(void *)	arg;
307 	} */
308 	int fd, i, tmp, error, cmd, newmin;
309 	filedesc_t *fdp;
310 	file_t *fp;
311 	fdfile_t *ff;
312 	proc_t *p;
313 	struct flock fl;
314 
315 	p = l->l_proc;
316 	fd = SCARG(uap, fd);
317 	cmd = SCARG(uap, cmd);
318 	fdp = p->p_fd;
319 	error = 0;
320 
321 	switch (cmd) {
322 	case F_CLOSEM:
323 		if (fd < 0)
324 			return EBADF;
325 		while ((i = fdp->fd_lastfile) >= fd) {
326 			if (fd_getfile(i) == NULL) {
327 				/* Another thread has updated. */
328 				continue;
329 			}
330 			fd_close(i);
331 		}
332 		return 0;
333 
334 	case F_MAXFD:
335 		*retval = fdp->fd_lastfile;
336 		return 0;
337 
338 	case F_SETLKW:
339 	case F_SETLK:
340 	case F_GETLK:
341 		error = copyin(SCARG(uap, arg), &fl, sizeof(fl));
342 		if (error)
343 			return error;
344 		error = do_fcntl_lock(fd, cmd, &fl);
345 		if (cmd == F_GETLK && error == 0)
346 			error = copyout(&fl, SCARG(uap, arg), sizeof(fl));
347 		return error;
348 
349 	default:
350 		/* Handled below */
351 		break;
352 	}
353 
354 	if ((fp = fd_getfile(fd)) == NULL)
355 		return (EBADF);
356 	ff = fdp->fd_ofiles[fd];
357 
358 	if ((cmd & F_FSCTL)) {
359 		error = fcntl_forfs(fd, fp, cmd, SCARG(uap, arg));
360 		fd_putfile(fd);
361 		return error;
362 	}
363 
364 	switch (cmd) {
365 	case F_DUPFD:
366 		newmin = (long)SCARG(uap, arg);
367 		if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
368 		    (u_int)newmin >= maxfiles) {
369 			fd_putfile(fd);
370 			return EINVAL;
371 		}
372 		error = fd_dup(fp, newmin, &i, 0);
373 		*retval = i;
374 		break;
375 
376 	case F_GETFD:
377 		*retval = ff->ff_exclose;
378 		break;
379 
380 	case F_SETFD:
381 		if ((long)SCARG(uap, arg) & 1) {
382 			ff->ff_exclose = 1;
383 			fdp->fd_exclose = 1;
384 		} else {
385 			ff->ff_exclose = 0;
386 		}
387 		break;
388 
389 	case F_GETFL:
390 		*retval = OFLAGS(fp->f_flag);
391 		break;
392 
393 	case F_SETFL:
394 		/* XXX not guaranteed to be atomic. */
395 		tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
396 		error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp);
397 		if (error)
398 			break;
399 		i = tmp ^ fp->f_flag;
400 		if (i & FNONBLOCK) {
401 			int flgs = tmp & FNONBLOCK;
402 			error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs);
403 			if (error) {
404 				(*fp->f_ops->fo_fcntl)(fp, F_SETFL,
405 				    &fp->f_flag);
406 				break;
407 			}
408 		}
409 		if (i & FASYNC) {
410 			int flgs = tmp & FASYNC;
411 			error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs);
412 			if (error) {
413 				if (i & FNONBLOCK) {
414 					tmp = fp->f_flag & FNONBLOCK;
415 					(void)(*fp->f_ops->fo_ioctl)(fp,
416 						FIONBIO, &tmp);
417 				}
418 				(*fp->f_ops->fo_fcntl)(fp, F_SETFL,
419 				    &fp->f_flag);
420 				break;
421 			}
422 		}
423 		fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp;
424 		break;
425 
426 	case F_GETOWN:
427 		error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp);
428 		*retval = tmp;
429 		break;
430 
431 	case F_SETOWN:
432 		tmp = (int)(intptr_t) SCARG(uap, arg);
433 		error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp);
434 		break;
435 
436 	default:
437 		error = EINVAL;
438 	}
439 
440 	fd_putfile(fd);
441 	return (error);
442 }
443 
444 /*
445  * Close a file descriptor.
446  */
447 int
448 sys_close(struct lwp *l, const struct sys_close_args *uap, register_t *retval)
449 {
450 	/* {
451 		syscallarg(int)	fd;
452 	} */
453 
454 	if (fd_getfile(SCARG(uap, fd)) == NULL) {
455 		return EBADF;
456 	}
457 	return fd_close(SCARG(uap, fd));
458 }
459 
460 /*
461  * Return status information about a file descriptor.
462  * Common function for compat code.
463  */
464 int
465 do_sys_fstat(int fd, struct stat *sb)
466 {
467 	file_t *fp;
468 	int error;
469 
470 	if ((fp = fd_getfile(fd)) == NULL) {
471 		return EBADF;
472 	}
473 	error = (*fp->f_ops->fo_stat)(fp, sb);
474 	fd_putfile(fd);
475 
476 	return error;
477 }
478 
479 /*
480  * Return status information about a file descriptor.
481  */
482 int
483 sys___fstat30(struct lwp *l, const struct sys___fstat30_args *uap,
484 	      register_t *retval)
485 {
486 	/* {
487 		syscallarg(int)			fd;
488 		syscallarg(struct stat *)	sb;
489 	} */
490 	struct stat sb;
491 	int error;
492 
493 	error = do_sys_fstat(SCARG(uap, fd), &sb);
494 	if (error == 0) {
495 		error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
496 	}
497 	return error;
498 }
499 
500 /*
501  * Return pathconf information about a file descriptor.
502  */
503 int
504 sys_fpathconf(struct lwp *l, const struct sys_fpathconf_args *uap,
505 	      register_t *retval)
506 {
507 	/* {
508 		syscallarg(int)	fd;
509 		syscallarg(int)	name;
510 	} */
511 	int fd, error;
512 	file_t *fp;
513 
514 	fd = SCARG(uap, fd);
515 	error = 0;
516 
517 	if ((fp = fd_getfile(fd)) == NULL) {
518 		return (EBADF);
519 	}
520 	switch (fp->f_type) {
521 	case DTYPE_SOCKET:
522 	case DTYPE_PIPE:
523 		if (SCARG(uap, name) != _PC_PIPE_BUF)
524 			error = EINVAL;
525 		else
526 			*retval = PIPE_BUF;
527 		break;
528 
529 	case DTYPE_VNODE:
530 		error = VOP_PATHCONF(fp->f_data, SCARG(uap, name), retval);
531 		break;
532 
533 	case DTYPE_KQUEUE:
534 		error = EINVAL;
535 		break;
536 
537 	default:
538 		error = EOPNOTSUPP;
539 		break;
540 	}
541 
542 	fd_putfile(fd);
543 	return (error);
544 }
545 
546 /*
547  * Apply an advisory lock on a file descriptor.
548  *
549  * Just attempt to get a record lock of the requested type on
550  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
551  */
552 /* ARGSUSED */
553 int
554 sys_flock(struct lwp *l, const struct sys_flock_args *uap, register_t *retval)
555 {
556 	/* {
557 		syscallarg(int)	fd;
558 		syscallarg(int)	how;
559 	} */
560 	int fd, how, error;
561 	file_t *fp;
562 	vnode_t	*vp;
563 	struct flock lf;
564 	proc_t *p;
565 
566 	fd = SCARG(uap, fd);
567 	how = SCARG(uap, how);
568 	error = 0;
569 
570 	if ((fp = fd_getfile(fd)) == NULL) {
571 		return EBADF;
572 	}
573 	if (fp->f_type != DTYPE_VNODE) {
574 		fd_putfile(fd);
575 		return EOPNOTSUPP;
576 	}
577 
578 	vp = fp->f_data;
579 	lf.l_whence = SEEK_SET;
580 	lf.l_start = 0;
581 	lf.l_len = 0;
582 	if (how & LOCK_UN) {
583 		lf.l_type = F_UNLCK;
584 		atomic_and_uint(&fp->f_flag, ~FHASLOCK);
585 		error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
586 		fd_putfile(fd);
587 		return error;
588 	}
589 	if (how & LOCK_EX) {
590 		lf.l_type = F_WRLCK;
591 	} else if (how & LOCK_SH) {
592 		lf.l_type = F_RDLCK;
593 	} else {
594 		fd_putfile(fd);
595 		return EINVAL;
596 	}
597 	atomic_or_uint(&fp->f_flag, FHASLOCK);
598 	p = curproc;
599 	if (how & LOCK_NB) {
600 		error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK);
601 	} else {
602 		error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK|F_WAIT);
603 	}
604 	fd_putfile(fd);
605 	return error;
606 }
607 
608 int
609 do_posix_fadvise(int fd, off_t offset, off_t len, int advice)
610 {
611 	file_t *fp;
612 	int error;
613 
614 	if ((fp = fd_getfile(fd)) == NULL) {
615 		return EBADF;
616 	}
617 	if (fp->f_type != DTYPE_VNODE) {
618 		if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
619 			error = ESPIPE;
620 		} else {
621 			error = EOPNOTSUPP;
622 		}
623 		fd_putfile(fd);
624 		return error;
625 	}
626 
627 	switch (advice) {
628 	case POSIX_FADV_NORMAL:
629 	case POSIX_FADV_RANDOM:
630 	case POSIX_FADV_SEQUENTIAL:
631 		KASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL);
632 		KASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM);
633 		KASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL);
634 
635 		/*
636 		 * We ignore offset and size.  must lock the file to
637 		 * do this, as f_advice is sub-word sized.
638 		 */
639 		mutex_enter(&fp->f_lock);
640 		fp->f_advice = (u_char)advice;
641 		mutex_exit(&fp->f_lock);
642 		error = 0;
643 		break;
644 
645 	case POSIX_FADV_WILLNEED:
646 	case POSIX_FADV_DONTNEED:
647 	case POSIX_FADV_NOREUSE:
648 		/* Not implemented yet. */
649 		error = 0;
650 		break;
651 	default:
652 		error = EINVAL;
653 		break;
654 	}
655 
656 	fd_putfile(fd);
657 	return error;
658 }
659 
660 int
661 sys___posix_fadvise50(struct lwp *l,
662 		      const struct sys___posix_fadvise50_args *uap,
663 		      register_t *retval)
664 {
665 	/* {
666 		syscallarg(int) fd;
667 		syscallarg(int) pad;
668 		syscallarg(off_t) offset;
669 		syscallarg(off_t) len;
670 		syscallarg(int) advice;
671 	} */
672 
673 	return do_posix_fadvise(SCARG(uap, fd), SCARG(uap, offset),
674 	    SCARG(uap, len), SCARG(uap, advice));
675 }
676