1 /* $NetBSD: linux_file.c,v 1.99 2010/07/01 02:38:28 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 1995, 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Frank van der Linden and Eric Haszlakiewicz. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Functions in multiarch: 34 * linux_sys_llseek : linux_llseek.c 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: linux_file.c,v 1.99 2010/07/01 02:38:28 rmind Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/namei.h> 43 #include <sys/proc.h> 44 #include <sys/file.h> 45 #include <sys/stat.h> 46 #include <sys/filedesc.h> 47 #include <sys/ioctl.h> 48 #include <sys/kernel.h> 49 #include <sys/mount.h> 50 #include <sys/malloc.h> 51 #include <sys/namei.h> 52 #include <sys/vnode.h> 53 #include <sys/tty.h> 54 #include <sys/socketvar.h> 55 #include <sys/conf.h> 56 #include <sys/pipe.h> 57 58 #include <sys/syscallargs.h> 59 #include <sys/vfs_syscalls.h> 60 61 #include <compat/linux/common/linux_types.h> 62 #include <compat/linux/common/linux_signal.h> 63 #include <compat/linux/common/linux_fcntl.h> 64 #include <compat/linux/common/linux_util.h> 65 #include <compat/linux/common/linux_machdep.h> 66 #include <compat/linux/common/linux_ipc.h> 67 #include <compat/linux/common/linux_sem.h> 68 69 #include <compat/linux/linux_syscallargs.h> 70 71 static int linux_to_bsd_ioflags(int); 72 static int bsd_to_linux_ioflags(int); 73 #ifndef __amd64__ 74 static void bsd_to_linux_stat(struct stat *, struct linux_stat *); 75 #endif 76 77 conv_linux_flock(linux, flock) 78 79 /* 80 * Some file-related calls are handled here. The usual flag conversion 81 * an structure conversion is done, and alternate emul path searching. 82 */ 83 84 /* 85 * The next two functions convert between the Linux and NetBSD values 86 * of the flags used in open(2) and fcntl(2). 87 */ 88 static int 89 linux_to_bsd_ioflags(int lflags) 90 { 91 int res = 0; 92 93 res |= cvtto_bsd_mask(lflags, LINUX_O_WRONLY, O_WRONLY); 94 res |= cvtto_bsd_mask(lflags, LINUX_O_RDONLY, O_RDONLY); 95 res |= cvtto_bsd_mask(lflags, LINUX_O_RDWR, O_RDWR); 96 res |= cvtto_bsd_mask(lflags, LINUX_O_CREAT, O_CREAT); 97 res |= cvtto_bsd_mask(lflags, LINUX_O_EXCL, O_EXCL); 98 res |= cvtto_bsd_mask(lflags, LINUX_O_NOCTTY, O_NOCTTY); 99 res |= cvtto_bsd_mask(lflags, LINUX_O_TRUNC, O_TRUNC); 100 res |= cvtto_bsd_mask(lflags, LINUX_O_NDELAY, O_NDELAY); 101 res |= cvtto_bsd_mask(lflags, LINUX_O_SYNC, O_FSYNC); 102 res |= cvtto_bsd_mask(lflags, LINUX_FASYNC, O_ASYNC); 103 res |= cvtto_bsd_mask(lflags, LINUX_O_APPEND, O_APPEND); 104 105 return res; 106 } 107 108 static int 109 bsd_to_linux_ioflags(int bflags) 110 { 111 int res = 0; 112 113 res |= cvtto_linux_mask(bflags, O_WRONLY, LINUX_O_WRONLY); 114 res |= cvtto_linux_mask(bflags, O_RDONLY, LINUX_O_RDONLY); 115 res |= cvtto_linux_mask(bflags, O_RDWR, LINUX_O_RDWR); 116 res |= cvtto_linux_mask(bflags, O_CREAT, LINUX_O_CREAT); 117 res |= cvtto_linux_mask(bflags, O_EXCL, LINUX_O_EXCL); 118 res |= cvtto_linux_mask(bflags, O_NOCTTY, LINUX_O_NOCTTY); 119 res |= cvtto_linux_mask(bflags, O_TRUNC, LINUX_O_TRUNC); 120 res |= cvtto_linux_mask(bflags, O_NDELAY, LINUX_O_NDELAY); 121 res |= cvtto_linux_mask(bflags, O_FSYNC, LINUX_O_SYNC); 122 res |= cvtto_linux_mask(bflags, O_ASYNC, LINUX_FASYNC); 123 res |= cvtto_linux_mask(bflags, O_APPEND, LINUX_O_APPEND); 124 125 return res; 126 } 127 128 /* 129 * creat(2) is an obsolete function, but it's present as a Linux 130 * system call, so let's deal with it. 131 * 132 * Note: On the Alpha this doesn't really exist in Linux, but it's defined 133 * in syscalls.master anyway so this doesn't have to be special cased. 134 * 135 * Just call open(2) with the TRUNC, CREAT and WRONLY flags. 136 */ 137 int 138 linux_sys_creat(struct lwp *l, const struct linux_sys_creat_args *uap, register_t *retval) 139 { 140 /* { 141 syscallarg(const char *) path; 142 syscallarg(int) mode; 143 } */ 144 struct sys_open_args oa; 145 146 SCARG(&oa, path) = SCARG(uap, path); 147 SCARG(&oa, flags) = O_CREAT | O_TRUNC | O_WRONLY; 148 SCARG(&oa, mode) = SCARG(uap, mode); 149 150 return sys_open(l, &oa, retval); 151 } 152 153 /* 154 * open(2). Take care of the different flag values, and let the 155 * NetBSD syscall do the real work. See if this operation 156 * gives the current process a controlling terminal. 157 * (XXX is this necessary?) 158 */ 159 int 160 linux_sys_open(struct lwp *l, const struct linux_sys_open_args *uap, register_t *retval) 161 { 162 /* { 163 syscallarg(const char *) path; 164 syscallarg(int) flags; 165 syscallarg(int) mode; 166 } */ 167 struct proc *p = l->l_proc; 168 int error, fl; 169 struct sys_open_args boa; 170 171 fl = linux_to_bsd_ioflags(SCARG(uap, flags)); 172 173 SCARG(&boa, path) = SCARG(uap, path); 174 SCARG(&boa, flags) = fl; 175 SCARG(&boa, mode) = SCARG(uap, mode); 176 177 if ((error = sys_open(l, &boa, retval))) 178 return error; 179 180 /* 181 * this bit from sunos_misc.c (and svr4_fcntl.c). 182 * If we are a session leader, and we don't have a controlling 183 * terminal yet, and the O_NOCTTY flag is not set, try to make 184 * this the controlling terminal. 185 */ 186 if (!(fl & O_NOCTTY) && SESS_LEADER(p) && !(p->p_lflag & PL_CONTROLT)) { 187 file_t *fp; 188 189 fp = fd_getfile(*retval); 190 191 /* ignore any error, just give it a try */ 192 if (fp != NULL) { 193 if (fp->f_type == DTYPE_VNODE) { 194 (fp->f_ops->fo_ioctl) (fp, TIOCSCTTY, NULL); 195 } 196 fd_putfile(*retval); 197 } 198 } 199 return 0; 200 } 201 202 /* 203 * Most actions in the fcntl() call are straightforward; simply 204 * pass control to the NetBSD system call. A few commands need 205 * conversions after the actual system call has done its work, 206 * because the flag values and lock structure are different. 207 */ 208 int 209 linux_sys_fcntl(struct lwp *l, const struct linux_sys_fcntl_args *uap, register_t *retval) 210 { 211 /* { 212 syscallarg(int) fd; 213 syscallarg(int) cmd; 214 syscallarg(void *) arg; 215 } */ 216 struct proc *p = l->l_proc; 217 int fd, cmd, error; 218 u_long val; 219 void *arg; 220 struct sys_fcntl_args fca; 221 file_t *fp; 222 struct vnode *vp; 223 struct vattr va; 224 long pgid; 225 struct pgrp *pgrp; 226 struct tty *tp; 227 228 fd = SCARG(uap, fd); 229 cmd = SCARG(uap, cmd); 230 arg = SCARG(uap, arg); 231 232 switch (cmd) { 233 234 case LINUX_F_DUPFD: 235 cmd = F_DUPFD; 236 break; 237 238 case LINUX_F_GETFD: 239 cmd = F_GETFD; 240 break; 241 242 case LINUX_F_SETFD: 243 cmd = F_SETFD; 244 break; 245 246 case LINUX_F_GETFL: 247 SCARG(&fca, fd) = fd; 248 SCARG(&fca, cmd) = F_GETFL; 249 SCARG(&fca, arg) = arg; 250 if ((error = sys_fcntl(l, &fca, retval))) 251 return error; 252 retval[0] = bsd_to_linux_ioflags(retval[0]); 253 return 0; 254 255 case LINUX_F_SETFL: { 256 file_t *fp1 = NULL; 257 258 val = linux_to_bsd_ioflags((unsigned long)SCARG(uap, arg)); 259 /* 260 * Linux seems to have same semantics for sending SIGIO to the 261 * read side of socket, but slightly different semantics 262 * for SIGIO to the write side. Rather than sending the SIGIO 263 * every time it's possible to write (directly) more data, it 264 * only sends SIGIO if last write(2) failed due to insufficient 265 * memory to hold the data. This is compatible enough 266 * with NetBSD semantics to not do anything about the 267 * difference. 268 * 269 * Linux does NOT send SIGIO for pipes. Deal with socketpair 270 * ones and DTYPE_PIPE ones. For these, we don't set 271 * the underlying flags (we don't pass O_ASYNC flag down 272 * to sys_fcntl()), but set the FASYNC flag for file descriptor, 273 * so that F_GETFL would report the ASYNC i/o is on. 274 */ 275 if (val & O_ASYNC) { 276 if (((fp1 = fd_getfile(fd)) == NULL)) 277 return (EBADF); 278 if (((fp1->f_type == DTYPE_SOCKET) && fp1->f_data 279 && ((struct socket *)fp1->f_data)->so_state & SS_ISAPIPE) 280 || (fp1->f_type == DTYPE_PIPE)) 281 val &= ~O_ASYNC; 282 else { 283 /* not a pipe, do not modify anything */ 284 fd_putfile(fd); 285 fp1 = NULL; 286 } 287 } 288 289 SCARG(&fca, fd) = fd; 290 SCARG(&fca, cmd) = F_SETFL; 291 SCARG(&fca, arg) = (void *) val; 292 293 error = sys_fcntl(l, &fca, retval); 294 295 /* Now set the FASYNC flag for pipes */ 296 if (fp1) { 297 if (!error) { 298 mutex_enter(&fp1->f_lock); 299 fp1->f_flag |= FASYNC; 300 mutex_exit(&fp1->f_lock); 301 } 302 fd_putfile(fd); 303 } 304 305 return (error); 306 } 307 308 case LINUX_F_GETLK: 309 do_linux_getlk(fd, cmd, arg, linux, flock); 310 311 case LINUX_F_SETLK: 312 case LINUX_F_SETLKW: 313 do_linux_setlk(fd, cmd, arg, linux, flock, LINUX_F_SETLK); 314 315 case LINUX_F_SETOWN: 316 case LINUX_F_GETOWN: 317 /* 318 * We need to route fcntl() for tty descriptors around normal 319 * fcntl(), since NetBSD tty TIOC{G,S}PGRP semantics is too 320 * restrictive for Linux F_{G,S}ETOWN. For non-tty descriptors, 321 * this is not a problem. 322 */ 323 if ((fp = fd_getfile(fd)) == NULL) 324 return EBADF; 325 326 /* Check it's a character device vnode */ 327 if (fp->f_type != DTYPE_VNODE 328 || (vp = (struct vnode *)fp->f_data) == NULL 329 || vp->v_type != VCHR) { 330 fd_putfile(fd); 331 332 not_tty: 333 /* Not a tty, proceed with common fcntl() */ 334 cmd = cmd == LINUX_F_SETOWN ? F_SETOWN : F_GETOWN; 335 break; 336 } 337 338 error = VOP_GETATTR(vp, &va, l->l_cred); 339 340 fd_putfile(fd); 341 342 if (error) 343 return error; 344 345 if ((tp = cdev_tty(va.va_rdev)) == NULL) 346 goto not_tty; 347 348 /* set tty pg_id appropriately */ 349 mutex_enter(proc_lock); 350 if (cmd == LINUX_F_GETOWN) { 351 retval[0] = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID; 352 mutex_exit(proc_lock); 353 return 0; 354 } 355 if ((long)arg <= 0) { 356 pgid = -(long)arg; 357 } else { 358 struct proc *p1 = proc_find((long)arg); 359 if (p1 == NULL) { 360 mutex_exit(proc_lock); 361 return (ESRCH); 362 } 363 pgid = (long)p1->p_pgrp->pg_id; 364 } 365 pgrp = pgrp_find(pgid); 366 if (pgrp == NULL || pgrp->pg_session != p->p_session) { 367 mutex_exit(proc_lock); 368 return EPERM; 369 } 370 tp->t_pgrp = pgrp; 371 mutex_exit(proc_lock); 372 return 0; 373 374 default: 375 return EOPNOTSUPP; 376 } 377 378 SCARG(&fca, fd) = fd; 379 SCARG(&fca, cmd) = cmd; 380 SCARG(&fca, arg) = arg; 381 382 return sys_fcntl(l, &fca, retval); 383 } 384 385 #if !defined(__amd64__) 386 /* 387 * Convert a NetBSD stat structure to a Linux stat structure. 388 * Only the order of the fields and the padding in the structure 389 * is different. linux_fakedev is a machine-dependent function 390 * which optionally converts device driver major/minor numbers 391 * (XXX horrible, but what can you do against code that compares 392 * things against constant major device numbers? sigh) 393 */ 394 static void 395 bsd_to_linux_stat(struct stat *bsp, struct linux_stat *lsp) 396 { 397 398 lsp->lst_dev = linux_fakedev(bsp->st_dev, 0); 399 lsp->lst_ino = bsp->st_ino; 400 lsp->lst_mode = (linux_mode_t)bsp->st_mode; 401 if (bsp->st_nlink >= (1 << 15)) 402 lsp->lst_nlink = (1 << 15) - 1; 403 else 404 lsp->lst_nlink = (linux_nlink_t)bsp->st_nlink; 405 lsp->lst_uid = bsp->st_uid; 406 lsp->lst_gid = bsp->st_gid; 407 lsp->lst_rdev = linux_fakedev(bsp->st_rdev, 1); 408 lsp->lst_size = bsp->st_size; 409 lsp->lst_blksize = bsp->st_blksize; 410 lsp->lst_blocks = bsp->st_blocks; 411 lsp->lst_atime = bsp->st_atime; 412 lsp->lst_mtime = bsp->st_mtime; 413 lsp->lst_ctime = bsp->st_ctime; 414 #ifdef LINUX_STAT_HAS_NSEC 415 lsp->lst_atime_nsec = bsp->st_atimensec; 416 lsp->lst_mtime_nsec = bsp->st_mtimensec; 417 lsp->lst_ctime_nsec = bsp->st_ctimensec; 418 #endif 419 } 420 421 /* 422 * The stat functions below are plain sailing. stat and lstat are handled 423 * by one function to avoid code duplication. 424 */ 425 int 426 linux_sys_fstat(struct lwp *l, const struct linux_sys_fstat_args *uap, register_t *retval) 427 { 428 /* { 429 syscallarg(int) fd; 430 syscallarg(linux_stat *) sp; 431 } */ 432 struct linux_stat tmplst; 433 struct stat tmpst; 434 int error; 435 436 error = do_sys_fstat(SCARG(uap, fd), &tmpst); 437 if (error != 0) 438 return error; 439 bsd_to_linux_stat(&tmpst, &tmplst); 440 441 return copyout(&tmplst, SCARG(uap, sp), sizeof tmplst); 442 } 443 444 static int 445 linux_stat1(const struct linux_sys_stat_args *uap, register_t *retval, int flags) 446 { 447 struct linux_stat tmplst; 448 struct stat tmpst; 449 int error; 450 451 error = do_sys_stat(SCARG(uap, path), flags, &tmpst); 452 if (error != 0) 453 return error; 454 455 bsd_to_linux_stat(&tmpst, &tmplst); 456 457 return copyout(&tmplst, SCARG(uap, sp), sizeof tmplst); 458 } 459 460 int 461 linux_sys_stat(struct lwp *l, const struct linux_sys_stat_args *uap, register_t *retval) 462 { 463 /* { 464 syscallarg(const char *) path; 465 syscallarg(struct linux_stat *) sp; 466 } */ 467 468 return linux_stat1(uap, retval, FOLLOW); 469 } 470 471 /* Note: this is "newlstat" in the Linux sources */ 472 /* (we don't bother with the old lstat currently) */ 473 int 474 linux_sys_lstat(struct lwp *l, const struct linux_sys_lstat_args *uap, register_t *retval) 475 { 476 /* { 477 syscallarg(const char *) path; 478 syscallarg(struct linux_stat *) sp; 479 } */ 480 481 return linux_stat1((const void *)uap, retval, NOFOLLOW); 482 } 483 #endif /* !__amd64__ */ 484 485 /* 486 * The following syscalls are mostly here because of the alternate path check. 487 */ 488 int 489 linux_sys_unlink(struct lwp *l, const struct linux_sys_unlink_args *uap, register_t *retval) 490 { 491 /* { 492 syscallarg(const char *) path; 493 } */ 494 int error; 495 struct nameidata nd; 496 497 error = sys_unlink(l, (const void *)uap, retval); 498 if (error != EPERM) 499 return (error); 500 501 /* 502 * Linux returns EISDIR if unlink(2) is called on a directory. 503 * We return EPERM in such cases. To emulate correct behaviour, 504 * check if the path points to directory and return EISDIR if this 505 * is the case. 506 */ 507 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_USERSPACE, 508 SCARG(uap, path)); 509 if (namei(&nd) == 0) { 510 struct stat sb; 511 512 if (vn_stat(nd.ni_vp, &sb) == 0 513 && S_ISDIR(sb.st_mode)) 514 error = EISDIR; 515 516 vput(nd.ni_vp); 517 } 518 519 return (error); 520 } 521 522 int 523 linux_sys_mknod(struct lwp *l, const struct linux_sys_mknod_args *uap, register_t *retval) 524 { 525 /* { 526 syscallarg(const char *) path; 527 syscallarg(int) mode; 528 syscallarg(int) dev; 529 } */ 530 531 /* 532 * BSD handles FIFOs separately 533 */ 534 if (S_ISFIFO(SCARG(uap, mode))) { 535 struct sys_mkfifo_args bma; 536 537 SCARG(&bma, path) = SCARG(uap, path); 538 SCARG(&bma, mode) = SCARG(uap, mode); 539 return sys_mkfifo(l, &bma, retval); 540 } else { 541 542 /* 543 * Linux device numbers uses 8 bits for minor and 8 bits 544 * for major. Due to how we map our major and minor, 545 * this just fits into our dev_t. Just mask off the 546 * upper 16bit to remove any random junk. 547 */ 548 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode), 549 SCARG(uap, dev) & 0xffff, retval, UIO_USERSPACE); 550 } 551 } 552 553 /* 554 * This is just fsync() for now (just as it is in the Linux kernel) 555 * Note: this is not implemented under Linux on Alpha and Arm 556 * but should still be defined in our syscalls.master. 557 * (syscall #148 on the arm) 558 */ 559 int 560 linux_sys_fdatasync(struct lwp *l, const struct linux_sys_fdatasync_args *uap, register_t *retval) 561 { 562 /* { 563 syscallarg(int) fd; 564 } */ 565 566 return sys_fsync(l, (const void *)uap, retval); 567 } 568 569 /* 570 * pread(2). 571 */ 572 int 573 linux_sys_pread(struct lwp *l, const struct linux_sys_pread_args *uap, register_t *retval) 574 { 575 /* { 576 syscallarg(int) fd; 577 syscallarg(void *) buf; 578 syscallarg(size_t) nbyte; 579 syscallarg(linux_off_t) offset; 580 } */ 581 struct sys_pread_args pra; 582 583 SCARG(&pra, fd) = SCARG(uap, fd); 584 SCARG(&pra, buf) = SCARG(uap, buf); 585 SCARG(&pra, nbyte) = SCARG(uap, nbyte); 586 SCARG(&pra, offset) = SCARG(uap, offset); 587 588 return sys_pread(l, &pra, retval); 589 } 590 591 /* 592 * pwrite(2). 593 */ 594 int 595 linux_sys_pwrite(struct lwp *l, const struct linux_sys_pwrite_args *uap, register_t *retval) 596 { 597 /* { 598 syscallarg(int) fd; 599 syscallarg(void *) buf; 600 syscallarg(size_t) nbyte; 601 syscallarg(linux_off_t) offset; 602 } */ 603 struct sys_pwrite_args pra; 604 605 SCARG(&pra, fd) = SCARG(uap, fd); 606 SCARG(&pra, buf) = SCARG(uap, buf); 607 SCARG(&pra, nbyte) = SCARG(uap, nbyte); 608 SCARG(&pra, offset) = SCARG(uap, offset); 609 610 return sys_pwrite(l, &pra, retval); 611 } 612 613 #define LINUX_NOT_SUPPORTED(fun) \ 614 int \ 615 fun(struct lwp *l, const struct fun##_args *uap, register_t *retval) \ 616 { \ 617 return EOPNOTSUPP; \ 618 } 619 620 LINUX_NOT_SUPPORTED(linux_sys_setxattr) 621 LINUX_NOT_SUPPORTED(linux_sys_lsetxattr) 622 LINUX_NOT_SUPPORTED(linux_sys_fsetxattr) 623 624 LINUX_NOT_SUPPORTED(linux_sys_getxattr) 625 LINUX_NOT_SUPPORTED(linux_sys_lgetxattr) 626 LINUX_NOT_SUPPORTED(linux_sys_fgetxattr) 627 628 LINUX_NOT_SUPPORTED(linux_sys_listxattr) 629 LINUX_NOT_SUPPORTED(linux_sys_llistxattr) 630 LINUX_NOT_SUPPORTED(linux_sys_flistxattr) 631 632 LINUX_NOT_SUPPORTED(linux_sys_removexattr) 633 LINUX_NOT_SUPPORTED(linux_sys_lremovexattr) 634 LINUX_NOT_SUPPORTED(linux_sys_fremovexattr) 635 636