1 /* $NetBSD: linux_file.c,v 1.122 2021/11/25 03:08:04 ryo Exp $ */ 2 3 /*- 4 * Copyright (c) 1995, 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Frank van der Linden and Eric Haszlakiewicz. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Functions in multiarch: 34 * linux_sys_llseek : linux_llseek.c 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: linux_file.c,v 1.122 2021/11/25 03:08:04 ryo Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/namei.h> 43 #include <sys/proc.h> 44 #include <sys/file.h> 45 #include <sys/fcntl.h> 46 #include <sys/stat.h> 47 #include <sys/filedesc.h> 48 #include <sys/ioctl.h> 49 #include <sys/kernel.h> 50 #include <sys/mount.h> 51 #include <sys/namei.h> 52 #include <sys/vnode.h> 53 #include <sys/tty.h> 54 #include <sys/socketvar.h> 55 #include <sys/conf.h> 56 #include <sys/pipe.h> 57 58 #include <sys/syscallargs.h> 59 #include <sys/vfs_syscalls.h> 60 61 #include <compat/linux/common/linux_types.h> 62 #include <compat/linux/common/linux_signal.h> 63 #include <compat/linux/common/linux_fcntl.h> 64 #include <compat/linux/common/linux_util.h> 65 #include <compat/linux/common/linux_machdep.h> 66 #include <compat/linux/common/linux_ipc.h> 67 #include <compat/linux/common/linux_sem.h> 68 69 #include <compat/linux/linux_syscallargs.h> 70 71 static int bsd_to_linux_ioflags(int); 72 #if !defined(__aarch64__) && !defined(__amd64__) 73 static void bsd_to_linux_stat(struct stat *, struct linux_stat *); 74 #endif 75 76 conv_linux_flock(linux, flock) 77 78 /* 79 * Some file-related calls are handled here. The usual flag conversion 80 * an structure conversion is done, and alternate emul path searching. 81 */ 82 83 /* 84 * The next two functions convert between the Linux and NetBSD values 85 * of the flags used in open(2) and fcntl(2). 86 */ 87 int 88 linux_to_bsd_ioflags(int lflags) 89 { 90 int res = 0; 91 92 res |= cvtto_bsd_mask(lflags, LINUX_O_WRONLY, O_WRONLY); 93 res |= cvtto_bsd_mask(lflags, LINUX_O_RDONLY, O_RDONLY); 94 res |= cvtto_bsd_mask(lflags, LINUX_O_RDWR, O_RDWR); 95 96 res |= cvtto_bsd_mask(lflags, LINUX_O_CREAT, O_CREAT); 97 res |= cvtto_bsd_mask(lflags, LINUX_O_EXCL, O_EXCL); 98 res |= cvtto_bsd_mask(lflags, LINUX_O_NOCTTY, O_NOCTTY); 99 res |= cvtto_bsd_mask(lflags, LINUX_O_TRUNC, O_TRUNC); 100 res |= cvtto_bsd_mask(lflags, LINUX_O_APPEND, O_APPEND); 101 res |= cvtto_bsd_mask(lflags, LINUX_O_NONBLOCK, O_NONBLOCK); 102 res |= cvtto_bsd_mask(lflags, LINUX_O_NDELAY, O_NDELAY); 103 res |= cvtto_bsd_mask(lflags, LINUX_O_SYNC, O_FSYNC); 104 res |= cvtto_bsd_mask(lflags, LINUX_FASYNC, O_ASYNC); 105 res |= cvtto_bsd_mask(lflags, LINUX_O_DIRECT, O_DIRECT); 106 res |= cvtto_bsd_mask(lflags, LINUX_O_DIRECTORY, O_DIRECTORY); 107 res |= cvtto_bsd_mask(lflags, LINUX_O_NOFOLLOW, O_NOFOLLOW); 108 res |= cvtto_bsd_mask(lflags, LINUX_O_CLOEXEC, O_CLOEXEC); 109 110 return res; 111 } 112 113 static int 114 bsd_to_linux_ioflags(int bflags) 115 { 116 int res = 0; 117 118 res |= cvtto_linux_mask(bflags, O_WRONLY, LINUX_O_WRONLY); 119 res |= cvtto_linux_mask(bflags, O_RDONLY, LINUX_O_RDONLY); 120 res |= cvtto_linux_mask(bflags, O_RDWR, LINUX_O_RDWR); 121 122 res |= cvtto_linux_mask(bflags, O_CREAT, LINUX_O_CREAT); 123 res |= cvtto_linux_mask(bflags, O_EXCL, LINUX_O_EXCL); 124 res |= cvtto_linux_mask(bflags, O_NOCTTY, LINUX_O_NOCTTY); 125 res |= cvtto_linux_mask(bflags, O_TRUNC, LINUX_O_TRUNC); 126 res |= cvtto_linux_mask(bflags, O_APPEND, LINUX_O_APPEND); 127 res |= cvtto_linux_mask(bflags, O_NONBLOCK, LINUX_O_NONBLOCK); 128 res |= cvtto_linux_mask(bflags, O_NDELAY, LINUX_O_NDELAY); 129 res |= cvtto_linux_mask(bflags, O_FSYNC, LINUX_O_SYNC); 130 res |= cvtto_linux_mask(bflags, O_ASYNC, LINUX_FASYNC); 131 res |= cvtto_linux_mask(bflags, O_DIRECT, LINUX_O_DIRECT); 132 res |= cvtto_linux_mask(bflags, O_DIRECTORY, LINUX_O_DIRECTORY); 133 res |= cvtto_linux_mask(bflags, O_NOFOLLOW, LINUX_O_NOFOLLOW); 134 res |= cvtto_linux_mask(bflags, O_CLOEXEC, LINUX_O_CLOEXEC); 135 136 return res; 137 } 138 139 static inline off_t 140 linux_hilo_to_off_t(unsigned long hi, unsigned long lo) 141 { 142 #ifdef _LP64 143 /* 144 * Linux discards the "hi" portion on LP64 platforms; even though 145 * glibc puts of the upper 32-bits of the offset into the "hi" 146 * argument regardless, the "lo" argument has all the bits in 147 * this case. 148 */ 149 (void) hi; 150 return (off_t)lo; 151 #else 152 return (((off_t)hi) << 32) | lo; 153 #endif /* _LP64 */ 154 } 155 156 #if !defined(__aarch64__) 157 /* 158 * creat(2) is an obsolete function, but it's present as a Linux 159 * system call, so let's deal with it. 160 * 161 * Note: On the Alpha this doesn't really exist in Linux, but it's defined 162 * in syscalls.master anyway so this doesn't have to be special cased. 163 * 164 * Just call open(2) with the TRUNC, CREAT and WRONLY flags. 165 */ 166 int 167 linux_sys_creat(struct lwp *l, const struct linux_sys_creat_args *uap, register_t *retval) 168 { 169 /* { 170 syscallarg(const char *) path; 171 syscallarg(linux_umode_t) mode; 172 } */ 173 struct sys_open_args oa; 174 175 SCARG(&oa, path) = SCARG(uap, path); 176 SCARG(&oa, flags) = O_CREAT | O_TRUNC | O_WRONLY; 177 SCARG(&oa, mode) = SCARG(uap, mode); 178 179 return sys_open(l, &oa, retval); 180 } 181 #endif 182 183 static void 184 linux_open_ctty(struct lwp *l, int flags, int fd) 185 { 186 struct proc *p = l->l_proc; 187 188 /* 189 * this bit from sunos_misc.c (and svr4_fcntl.c). 190 * If we are a session leader, and we don't have a controlling 191 * terminal yet, and the O_NOCTTY flag is not set, try to make 192 * this the controlling terminal. 193 */ 194 if (!(flags & O_NOCTTY) && SESS_LEADER(p) && !(p->p_lflag & PL_CONTROLT)) { 195 file_t *fp; 196 197 fp = fd_getfile(fd); 198 199 /* ignore any error, just give it a try */ 200 if (fp != NULL) { 201 if (fp->f_type == DTYPE_VNODE) { 202 (fp->f_ops->fo_ioctl) (fp, TIOCSCTTY, NULL); 203 } 204 fd_putfile(fd); 205 } 206 } 207 } 208 209 /* 210 * open(2). Take care of the different flag values, and let the 211 * NetBSD syscall do the real work. See if this operation 212 * gives the current process a controlling terminal. 213 * (XXX is this necessary?) 214 */ 215 int 216 linux_sys_open(struct lwp *l, const struct linux_sys_open_args *uap, register_t *retval) 217 { 218 /* { 219 syscallarg(const char *) path; 220 syscallarg(int) flags; 221 syscallarg(linux_umode_t) mode; 222 } */ 223 int error, fl; 224 struct sys_open_args boa; 225 226 fl = linux_to_bsd_ioflags(SCARG(uap, flags)); 227 228 SCARG(&boa, path) = SCARG(uap, path); 229 SCARG(&boa, flags) = fl; 230 SCARG(&boa, mode) = SCARG(uap, mode); 231 232 if ((error = sys_open(l, &boa, retval))) 233 return (error == EFTYPE) ? ELOOP : error; 234 235 linux_open_ctty(l, fl, *retval); 236 return 0; 237 } 238 239 int 240 linux_sys_openat(struct lwp *l, const struct linux_sys_openat_args *uap, register_t *retval) 241 { 242 /* { 243 syscallarg(int) fd; 244 syscallarg(const char *) path; 245 syscallarg(int) flags; 246 syscallarg(linux_umode_t) mode; 247 } */ 248 int error, fl; 249 struct sys_openat_args boa; 250 251 fl = linux_to_bsd_ioflags(SCARG(uap, flags)); 252 253 SCARG(&boa, fd) = SCARG(uap, fd); 254 SCARG(&boa, path) = SCARG(uap, path); 255 SCARG(&boa, oflags) = fl; 256 SCARG(&boa, mode) = SCARG(uap, mode); 257 258 if ((error = sys_openat(l, &boa, retval))) 259 return (error == EFTYPE) ? ELOOP : error; 260 261 linux_open_ctty(l, fl, *retval); 262 return 0; 263 } 264 265 /* 266 * Most actions in the fcntl() call are straightforward; simply 267 * pass control to the NetBSD system call. A few commands need 268 * conversions after the actual system call has done its work, 269 * because the flag values and lock structure are different. 270 */ 271 int 272 linux_sys_fcntl(struct lwp *l, const struct linux_sys_fcntl_args *uap, register_t *retval) 273 { 274 /* { 275 syscallarg(int) fd; 276 syscallarg(int) cmd; 277 syscallarg(void *) arg; 278 } */ 279 struct proc *p = l->l_proc; 280 int fd, cmd, error; 281 u_long val; 282 void *arg; 283 struct sys_fcntl_args fca; 284 file_t *fp; 285 struct vnode *vp; 286 struct vattr va; 287 long pgid; 288 struct pgrp *pgrp; 289 struct tty *tp; 290 291 fd = SCARG(uap, fd); 292 cmd = SCARG(uap, cmd); 293 arg = SCARG(uap, arg); 294 295 switch (cmd) { 296 297 case LINUX_F_DUPFD: 298 cmd = F_DUPFD; 299 break; 300 301 case LINUX_F_GETFD: 302 cmd = F_GETFD; 303 break; 304 305 case LINUX_F_SETFD: 306 cmd = F_SETFD; 307 break; 308 309 case LINUX_F_GETFL: 310 SCARG(&fca, fd) = fd; 311 SCARG(&fca, cmd) = F_GETFL; 312 SCARG(&fca, arg) = arg; 313 if ((error = sys_fcntl(l, &fca, retval))) 314 return error; 315 retval[0] = bsd_to_linux_ioflags(retval[0]); 316 return 0; 317 318 case LINUX_F_SETFL: { 319 file_t *fp1 = NULL; 320 321 val = linux_to_bsd_ioflags((unsigned long)SCARG(uap, arg)); 322 /* 323 * Linux seems to have same semantics for sending SIGIO to the 324 * read side of socket, but slightly different semantics 325 * for SIGIO to the write side. Rather than sending the SIGIO 326 * every time it's possible to write (directly) more data, it 327 * only sends SIGIO if last write(2) failed due to insufficient 328 * memory to hold the data. This is compatible enough 329 * with NetBSD semantics to not do anything about the 330 * difference. 331 * 332 * Linux does NOT send SIGIO for pipes. Deal with socketpair 333 * ones and DTYPE_PIPE ones. For these, we don't set 334 * the underlying flags (we don't pass O_ASYNC flag down 335 * to sys_fcntl()), but set the FASYNC flag for file descriptor, 336 * so that F_GETFL would report the ASYNC i/o is on. 337 */ 338 if (val & O_ASYNC) { 339 if (((fp1 = fd_getfile(fd)) == NULL)) 340 return (EBADF); 341 if (((fp1->f_type == DTYPE_SOCKET) && fp1->f_data 342 && ((struct socket *)fp1->f_data)->so_state & SS_ISAPIPE) 343 || (fp1->f_type == DTYPE_PIPE)) 344 val &= ~O_ASYNC; 345 else { 346 /* not a pipe, do not modify anything */ 347 fd_putfile(fd); 348 fp1 = NULL; 349 } 350 } 351 352 SCARG(&fca, fd) = fd; 353 SCARG(&fca, cmd) = F_SETFL; 354 SCARG(&fca, arg) = (void *) val; 355 356 error = sys_fcntl(l, &fca, retval); 357 358 /* Now set the FASYNC flag for pipes */ 359 if (fp1) { 360 if (!error) { 361 mutex_enter(&fp1->f_lock); 362 fp1->f_flag |= FASYNC; 363 mutex_exit(&fp1->f_lock); 364 } 365 fd_putfile(fd); 366 } 367 368 return (error); 369 } 370 371 case LINUX_F_GETLK: 372 do_linux_getlk(fd, cmd, arg, linux, flock); 373 374 case LINUX_F_SETLK: 375 case LINUX_F_SETLKW: 376 do_linux_setlk(fd, cmd, arg, linux, flock, LINUX_F_SETLK); 377 378 case LINUX_F_SETOWN: 379 case LINUX_F_GETOWN: 380 /* 381 * We need to route fcntl() for tty descriptors around normal 382 * fcntl(), since NetBSD tty TIOC{G,S}PGRP semantics is too 383 * restrictive for Linux F_{G,S}ETOWN. For non-tty descriptors, 384 * this is not a problem. 385 */ 386 if ((fp = fd_getfile(fd)) == NULL) 387 return EBADF; 388 389 /* Check it's a character device vnode */ 390 if (fp->f_type != DTYPE_VNODE 391 || (vp = (struct vnode *)fp->f_data) == NULL 392 || vp->v_type != VCHR) { 393 fd_putfile(fd); 394 395 not_tty: 396 /* Not a tty, proceed with common fcntl() */ 397 cmd = cmd == LINUX_F_SETOWN ? F_SETOWN : F_GETOWN; 398 break; 399 } 400 401 vn_lock(vp, LK_SHARED | LK_RETRY); 402 error = VOP_GETATTR(vp, &va, l->l_cred); 403 VOP_UNLOCK(vp); 404 405 fd_putfile(fd); 406 407 if (error) 408 return error; 409 410 if ((tp = cdev_tty(va.va_rdev)) == NULL) 411 goto not_tty; 412 413 /* set tty pg_id appropriately */ 414 mutex_enter(&proc_lock); 415 if (cmd == LINUX_F_GETOWN) { 416 retval[0] = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID; 417 mutex_exit(&proc_lock); 418 return 0; 419 } 420 if ((long)arg <= 0) { 421 pgid = -(long)arg; 422 } else { 423 struct proc *p1 = proc_find((long)arg); 424 if (p1 == NULL) { 425 mutex_exit(&proc_lock); 426 return (ESRCH); 427 } 428 pgid = (long)p1->p_pgrp->pg_id; 429 } 430 pgrp = pgrp_find(pgid); 431 if (pgrp == NULL || pgrp->pg_session != p->p_session) { 432 mutex_exit(&proc_lock); 433 return EPERM; 434 } 435 tp->t_pgrp = pgrp; 436 mutex_exit(&proc_lock); 437 return 0; 438 439 case LINUX_F_DUPFD_CLOEXEC: 440 cmd = F_DUPFD_CLOEXEC; 441 break; 442 443 default: 444 return EOPNOTSUPP; 445 } 446 447 SCARG(&fca, fd) = fd; 448 SCARG(&fca, cmd) = cmd; 449 SCARG(&fca, arg) = arg; 450 451 return sys_fcntl(l, &fca, retval); 452 } 453 454 #if !defined(__aarch64__) && !defined(__amd64__) 455 /* 456 * Convert a NetBSD stat structure to a Linux stat structure. 457 * Only the order of the fields and the padding in the structure 458 * is different. linux_fakedev is a machine-dependent function 459 * which optionally converts device driver major/minor numbers 460 * (XXX horrible, but what can you do against code that compares 461 * things against constant major device numbers? sigh) 462 */ 463 static void 464 bsd_to_linux_stat(struct stat *bsp, struct linux_stat *lsp) 465 { 466 467 memset(lsp, 0, sizeof(*lsp)); 468 lsp->lst_dev = linux_fakedev(bsp->st_dev, 0); 469 lsp->lst_ino = bsp->st_ino; 470 lsp->lst_mode = (linux_mode_t)bsp->st_mode; 471 if (bsp->st_nlink >= (1 << 15)) 472 lsp->lst_nlink = (1 << 15) - 1; 473 else 474 lsp->lst_nlink = (linux_nlink_t)bsp->st_nlink; 475 lsp->lst_uid = bsp->st_uid; 476 lsp->lst_gid = bsp->st_gid; 477 lsp->lst_rdev = linux_fakedev(bsp->st_rdev, 1); 478 lsp->lst_size = bsp->st_size; 479 lsp->lst_blksize = bsp->st_blksize; 480 lsp->lst_blocks = bsp->st_blocks; 481 lsp->lst_atime = bsp->st_atime; 482 lsp->lst_mtime = bsp->st_mtime; 483 lsp->lst_ctime = bsp->st_ctime; 484 #ifdef LINUX_STAT_HAS_NSEC 485 lsp->lst_atime_nsec = bsp->st_atimensec; 486 lsp->lst_mtime_nsec = bsp->st_mtimensec; 487 lsp->lst_ctime_nsec = bsp->st_ctimensec; 488 #endif 489 } 490 491 /* 492 * The stat functions below are plain sailing. stat and lstat are handled 493 * by one function to avoid code duplication. 494 */ 495 int 496 linux_sys_fstat(struct lwp *l, const struct linux_sys_fstat_args *uap, register_t *retval) 497 { 498 /* { 499 syscallarg(int) fd; 500 syscallarg(linux_stat *) sp; 501 } */ 502 struct linux_stat tmplst; 503 struct stat tmpst; 504 int error; 505 506 error = do_sys_fstat(SCARG(uap, fd), &tmpst); 507 if (error != 0) 508 return error; 509 bsd_to_linux_stat(&tmpst, &tmplst); 510 511 return copyout(&tmplst, SCARG(uap, sp), sizeof tmplst); 512 } 513 514 static int 515 linux_stat1(const struct linux_sys_stat_args *uap, register_t *retval, int flags) 516 { 517 struct linux_stat tmplst; 518 struct stat tmpst; 519 int error; 520 521 error = do_sys_stat(SCARG(uap, path), flags, &tmpst); 522 if (error != 0) 523 return error; 524 525 bsd_to_linux_stat(&tmpst, &tmplst); 526 527 return copyout(&tmplst, SCARG(uap, sp), sizeof tmplst); 528 } 529 530 int 531 linux_sys_stat(struct lwp *l, const struct linux_sys_stat_args *uap, register_t *retval) 532 { 533 /* { 534 syscallarg(const char *) path; 535 syscallarg(struct linux_stat *) sp; 536 } */ 537 538 return linux_stat1(uap, retval, FOLLOW); 539 } 540 541 /* Note: this is "newlstat" in the Linux sources */ 542 /* (we don't bother with the old lstat currently) */ 543 int 544 linux_sys_lstat(struct lwp *l, const struct linux_sys_lstat_args *uap, register_t *retval) 545 { 546 /* { 547 syscallarg(const char *) path; 548 syscallarg(struct linux_stat *) sp; 549 } */ 550 551 return linux_stat1((const void *)uap, retval, NOFOLLOW); 552 } 553 #endif /* !__aarch64__ && !__amd64__ */ 554 555 /* 556 * The following syscalls are mostly here because of the alternate path check. 557 */ 558 559 int 560 linux_sys_linkat(struct lwp *l, const struct linux_sys_linkat_args *uap, register_t *retval) 561 { 562 /* { 563 syscallarg(int) fd1; 564 syscallarg(const char *) name1; 565 syscallarg(int) fd2; 566 syscallarg(const char *) name2; 567 syscallarg(int) flags; 568 } */ 569 int fd1 = SCARG(uap, fd1); 570 const char *name1 = SCARG(uap, name1); 571 int fd2 = SCARG(uap, fd2); 572 const char *name2 = SCARG(uap, name2); 573 int follow; 574 575 follow = SCARG(uap, flags) & LINUX_AT_SYMLINK_FOLLOW; 576 577 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 578 } 579 580 static int 581 linux_unlink_dircheck(const char *path) 582 { 583 struct nameidata nd; 584 struct pathbuf *pb; 585 int error; 586 587 /* 588 * Linux returns EISDIR if unlink(2) is called on a directory. 589 * We return EPERM in such cases. To emulate correct behaviour, 590 * check if the path points to directory and return EISDIR if this 591 * is the case. 592 * 593 * XXX this should really not copy in the path buffer twice... 594 */ 595 error = pathbuf_copyin(path, &pb); 596 if (error) { 597 return error; 598 } 599 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 600 if (namei(&nd) == 0) { 601 struct stat sb; 602 603 if (vn_stat(nd.ni_vp, &sb) == 0 604 && S_ISDIR(sb.st_mode)) 605 error = EISDIR; 606 607 vput(nd.ni_vp); 608 } 609 pathbuf_destroy(pb); 610 return error ? error : EPERM; 611 } 612 613 int 614 linux_sys_unlink(struct lwp *l, const struct linux_sys_unlink_args *uap, register_t *retval) 615 { 616 /* { 617 syscallarg(const char *) path; 618 } */ 619 int error; 620 621 error = sys_unlink(l, (const void *)uap, retval); 622 if (error == EPERM) 623 error = linux_unlink_dircheck(SCARG(uap, path)); 624 625 return error; 626 } 627 628 int 629 linux_sys_unlinkat(struct lwp *l, const struct linux_sys_unlinkat_args *uap, register_t *retval) 630 { 631 /* { 632 syscallarg(int) fd; 633 syscallarg(const char *) path; 634 syscallarg(int) flag; 635 } */ 636 struct sys_unlinkat_args ua; 637 int error; 638 639 SCARG(&ua, fd) = SCARG(uap, fd); 640 SCARG(&ua, path) = SCARG(uap, path); 641 SCARG(&ua, flag) = linux_to_bsd_atflags(SCARG(uap, flag)); 642 643 error = sys_unlinkat(l, &ua, retval); 644 if (error == EPERM) 645 error = linux_unlink_dircheck(SCARG(uap, path)); 646 647 return error; 648 } 649 650 int 651 linux_sys_mknod(struct lwp *l, const struct linux_sys_mknod_args *uap, register_t *retval) 652 { 653 /* { 654 syscallarg(const char *) path; 655 syscallarg(linux_umode_t) mode; 656 syscallarg(unsigned) dev; 657 } */ 658 struct linux_sys_mknodat_args ua; 659 660 SCARG(&ua, fd) = LINUX_AT_FDCWD; 661 SCARG(&ua, path) = SCARG(uap, path); 662 SCARG(&ua, mode) = SCARG(uap, mode); 663 SCARG(&ua, dev) = SCARG(uap, dev); 664 665 return linux_sys_mknodat(l, &ua, retval); 666 } 667 668 int 669 linux_sys_mknodat(struct lwp *l, const struct linux_sys_mknodat_args *uap, register_t *retval) 670 { 671 /* { 672 syscallarg(int) fd; 673 syscallarg(const char *) path; 674 syscallarg(linux_umode_t) mode; 675 syscallarg(unsigned) dev; 676 } */ 677 678 /* 679 * BSD handles FIFOs separately 680 */ 681 if (S_ISFIFO(SCARG(uap, mode))) { 682 struct sys_mkfifoat_args bma; 683 684 SCARG(&bma, fd) = SCARG(uap, fd); 685 SCARG(&bma, path) = SCARG(uap, path); 686 SCARG(&bma, mode) = SCARG(uap, mode); 687 return sys_mkfifoat(l, &bma, retval); 688 } else { 689 690 /* 691 * Linux device numbers uses 8 bits for minor and 8 bits 692 * for major. Due to how we map our major and minor, 693 * this just fits into our dev_t. Just mask off the 694 * upper 16bit to remove any random junk. 695 */ 696 697 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 698 SCARG(uap, mode), SCARG(uap, dev) & 0xffff, UIO_USERSPACE); 699 } 700 } 701 702 int 703 linux_sys_fchmodat(struct lwp *l, const struct linux_sys_fchmodat_args *uap, register_t *retval) 704 { 705 /* { 706 syscallarg(int) fd; 707 syscallarg(const char *) path; 708 syscallarg(linux_umode_t) mode; 709 } */ 710 711 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 712 SCARG(uap, mode), AT_SYMLINK_FOLLOW); 713 } 714 715 int 716 linux_sys_fchownat(struct lwp *l, const struct linux_sys_fchownat_args *uap, register_t *retval) 717 { 718 /* { 719 syscallarg(int) fd; 720 syscallarg(const char *) path; 721 syscallarg(uid_t) owner; 722 syscallarg(gid_t) group; 723 syscallarg(int) flag; 724 } */ 725 int flag; 726 727 flag = linux_to_bsd_atflags(SCARG(uap, flag)); 728 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 729 SCARG(uap, owner), SCARG(uap, group), flag); 730 } 731 732 int 733 linux_sys_faccessat(struct lwp *l, const struct linux_sys_faccessat_args *uap, register_t *retval) 734 { 735 /* { 736 syscallarg(int) fd; 737 syscallarg(const char *) path; 738 syscallarg(int) amode; 739 } */ 740 741 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 742 SCARG(uap, amode), AT_SYMLINK_FOLLOW); 743 } 744 745 /* 746 * This is just fsync() for now (just as it is in the Linux kernel) 747 * Note: this is not implemented under Linux on Alpha and Arm 748 * but should still be defined in our syscalls.master. 749 * (syscall #148 on the arm) 750 */ 751 int 752 linux_sys_fdatasync(struct lwp *l, const struct linux_sys_fdatasync_args *uap, register_t *retval) 753 { 754 /* { 755 syscallarg(int) fd; 756 } */ 757 758 return sys_fsync(l, (const void *)uap, retval); 759 } 760 761 /* 762 * pread(2). 763 */ 764 int 765 linux_sys_pread(struct lwp *l, const struct linux_sys_pread_args *uap, register_t *retval) 766 { 767 /* { 768 syscallarg(int) fd; 769 syscallarg(void *) buf; 770 syscallarg(size_t) nbyte; 771 syscallarg(off_t) offset; 772 } */ 773 struct sys_pread_args pra; 774 775 SCARG(&pra, fd) = SCARG(uap, fd); 776 SCARG(&pra, buf) = SCARG(uap, buf); 777 SCARG(&pra, nbyte) = SCARG(uap, nbyte); 778 SCARG(&pra, PAD) = 0; 779 SCARG(&pra, offset) = SCARG(uap, offset); 780 781 return sys_pread(l, &pra, retval); 782 } 783 784 /* 785 * pwrite(2). 786 */ 787 int 788 linux_sys_pwrite(struct lwp *l, const struct linux_sys_pwrite_args *uap, register_t *retval) 789 { 790 /* { 791 syscallarg(int) fd; 792 syscallarg(void *) buf; 793 syscallarg(size_t) nbyte; 794 syscallarg(off_t) offset; 795 } */ 796 struct sys_pwrite_args pra; 797 798 SCARG(&pra, fd) = SCARG(uap, fd); 799 SCARG(&pra, buf) = SCARG(uap, buf); 800 SCARG(&pra, nbyte) = SCARG(uap, nbyte); 801 SCARG(&pra, PAD) = 0; 802 SCARG(&pra, offset) = SCARG(uap, offset); 803 804 return sys_pwrite(l, &pra, retval); 805 } 806 807 /* 808 * preadv(2) 809 */ 810 int 811 linux_sys_preadv(struct lwp *l, const struct linux_sys_preadv_args *uap, 812 register_t *retval) 813 { 814 /* { 815 syscallarg(int) fd; 816 syscallarg(const struct iovec *) iovp; 817 syscallarg(int) iovcnt; 818 syscallarg(unsigned long) off_lo; 819 syscallarg(unsigned long) off_hi; 820 } */ 821 struct sys_preadv_args ua; 822 823 SCARG(&ua, fd) = SCARG(uap, fd); 824 SCARG(&ua, iovp) = SCARG(uap, iovp); 825 SCARG(&ua, iovcnt) = SCARG(uap, iovcnt); 826 SCARG(&ua, PAD) = 0; 827 SCARG(&ua, offset) = linux_hilo_to_off_t(SCARG(uap, off_hi), 828 SCARG(uap, off_lo)); 829 return sys_preadv(l, &ua, retval); 830 } 831 832 /* 833 * pwritev(2) 834 */ 835 int 836 linux_sys_pwritev(struct lwp *l, const struct linux_sys_pwritev_args *uap, 837 register_t *retval) 838 { 839 /* { 840 syscallarg(int) fd; 841 syscallarg(const struct iovec *) iovp; 842 syscallarg(int) iovcnt; 843 syscallarg(unsigned long) off_lo; 844 syscallarg(unsigned long) off_hi; 845 } */ 846 struct sys_pwritev_args ua; 847 848 SCARG(&ua, fd) = SCARG(uap, fd); 849 SCARG(&ua, iovp) = (const void *)SCARG(uap, iovp); 850 SCARG(&ua, iovcnt) = SCARG(uap, iovcnt); 851 SCARG(&ua, PAD) = 0; 852 SCARG(&ua, offset) = linux_hilo_to_off_t(SCARG(uap, off_hi), 853 SCARG(uap, off_lo)); 854 return sys_pwritev(l, &ua, retval); 855 } 856 857 int 858 linux_sys_dup3(struct lwp *l, const struct linux_sys_dup3_args *uap, 859 register_t *retval) 860 { 861 /* { 862 syscallarg(int) from; 863 syscallarg(int) to; 864 syscallarg(int) flags; 865 } */ 866 int flags; 867 868 flags = linux_to_bsd_ioflags(SCARG(uap, flags)); 869 if ((flags & ~O_CLOEXEC) != 0) 870 return EINVAL; 871 872 if (SCARG(uap, from) == SCARG(uap, to)) 873 return EINVAL; 874 875 return dodup(l, SCARG(uap, from), SCARG(uap, to), flags, retval); 876 } 877 878 879 int 880 linux_to_bsd_atflags(int lflags) 881 { 882 int bflags = 0; 883 884 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 885 bflags |= AT_SYMLINK_NOFOLLOW; 886 if (lflags & LINUX_AT_REMOVEDIR) 887 bflags |= AT_REMOVEDIR; 888 if (lflags & LINUX_AT_SYMLINK_FOLLOW) 889 bflags |= AT_SYMLINK_FOLLOW; 890 891 return bflags; 892 } 893 894 895 #define LINUX_NOT_SUPPORTED(fun) \ 896 int \ 897 fun(struct lwp *l, const struct fun##_args *uap, register_t *retval) \ 898 { \ 899 return EOPNOTSUPP; \ 900 } 901 902 LINUX_NOT_SUPPORTED(linux_sys_setxattr) 903 LINUX_NOT_SUPPORTED(linux_sys_lsetxattr) 904 LINUX_NOT_SUPPORTED(linux_sys_fsetxattr) 905 906 LINUX_NOT_SUPPORTED(linux_sys_getxattr) 907 LINUX_NOT_SUPPORTED(linux_sys_lgetxattr) 908 LINUX_NOT_SUPPORTED(linux_sys_fgetxattr) 909 910 LINUX_NOT_SUPPORTED(linux_sys_listxattr) 911 LINUX_NOT_SUPPORTED(linux_sys_llistxattr) 912 LINUX_NOT_SUPPORTED(linux_sys_flistxattr) 913 914 LINUX_NOT_SUPPORTED(linux_sys_removexattr) 915 LINUX_NOT_SUPPORTED(linux_sys_lremovexattr) 916 LINUX_NOT_SUPPORTED(linux_sys_fremovexattr) 917 918 /* 919 * For now just return EOPNOTSUPP, this makes glibc posix_fallocate() 920 * to fallback to emulation. 921 * XXX Right now no filesystem actually implements fallocate support, 922 * so no need for mapping. 923 */ 924 LINUX_NOT_SUPPORTED(linux_sys_fallocate) 925