1 /* $NetBSD: linux_file.c,v 1.121 2021/09/23 06:56:27 ryo Exp $ */ 2 3 /*- 4 * Copyright (c) 1995, 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Frank van der Linden and Eric Haszlakiewicz. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Functions in multiarch: 34 * linux_sys_llseek : linux_llseek.c 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: linux_file.c,v 1.121 2021/09/23 06:56:27 ryo Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/namei.h> 43 #include <sys/proc.h> 44 #include <sys/file.h> 45 #include <sys/fcntl.h> 46 #include <sys/stat.h> 47 #include <sys/filedesc.h> 48 #include <sys/ioctl.h> 49 #include <sys/kernel.h> 50 #include <sys/mount.h> 51 #include <sys/namei.h> 52 #include <sys/vnode.h> 53 #include <sys/tty.h> 54 #include <sys/socketvar.h> 55 #include <sys/conf.h> 56 #include <sys/pipe.h> 57 58 #include <sys/syscallargs.h> 59 #include <sys/vfs_syscalls.h> 60 61 #include <compat/linux/common/linux_types.h> 62 #include <compat/linux/common/linux_signal.h> 63 #include <compat/linux/common/linux_fcntl.h> 64 #include <compat/linux/common/linux_util.h> 65 #include <compat/linux/common/linux_machdep.h> 66 #include <compat/linux/common/linux_ipc.h> 67 #include <compat/linux/common/linux_sem.h> 68 69 #include <compat/linux/linux_syscallargs.h> 70 71 static int bsd_to_linux_ioflags(int); 72 #if !defined(__aarch64__) && !defined(__amd64__) 73 static void bsd_to_linux_stat(struct stat *, struct linux_stat *); 74 #endif 75 76 conv_linux_flock(linux, flock) 77 78 /* 79 * Some file-related calls are handled here. The usual flag conversion 80 * an structure conversion is done, and alternate emul path searching. 81 */ 82 83 /* 84 * The next two functions convert between the Linux and NetBSD values 85 * of the flags used in open(2) and fcntl(2). 86 */ 87 int 88 linux_to_bsd_ioflags(int lflags) 89 { 90 int res = 0; 91 92 res |= cvtto_bsd_mask(lflags, LINUX_O_WRONLY, O_WRONLY); 93 res |= cvtto_bsd_mask(lflags, LINUX_O_RDONLY, O_RDONLY); 94 res |= cvtto_bsd_mask(lflags, LINUX_O_RDWR, O_RDWR); 95 96 res |= cvtto_bsd_mask(lflags, LINUX_O_CREAT, O_CREAT); 97 res |= cvtto_bsd_mask(lflags, LINUX_O_EXCL, O_EXCL); 98 res |= cvtto_bsd_mask(lflags, LINUX_O_NOCTTY, O_NOCTTY); 99 res |= cvtto_bsd_mask(lflags, LINUX_O_TRUNC, O_TRUNC); 100 res |= cvtto_bsd_mask(lflags, LINUX_O_APPEND, O_APPEND); 101 res |= cvtto_bsd_mask(lflags, LINUX_O_NONBLOCK, O_NONBLOCK); 102 res |= cvtto_bsd_mask(lflags, LINUX_O_NDELAY, O_NDELAY); 103 res |= cvtto_bsd_mask(lflags, LINUX_O_SYNC, O_FSYNC); 104 res |= cvtto_bsd_mask(lflags, LINUX_FASYNC, O_ASYNC); 105 res |= cvtto_bsd_mask(lflags, LINUX_O_DIRECT, O_DIRECT); 106 res |= cvtto_bsd_mask(lflags, LINUX_O_DIRECTORY, O_DIRECTORY); 107 res |= cvtto_bsd_mask(lflags, LINUX_O_NOFOLLOW, O_NOFOLLOW); 108 res |= cvtto_bsd_mask(lflags, LINUX_O_CLOEXEC, O_CLOEXEC); 109 110 return res; 111 } 112 113 static int 114 bsd_to_linux_ioflags(int bflags) 115 { 116 int res = 0; 117 118 res |= cvtto_linux_mask(bflags, O_WRONLY, LINUX_O_WRONLY); 119 res |= cvtto_linux_mask(bflags, O_RDONLY, LINUX_O_RDONLY); 120 res |= cvtto_linux_mask(bflags, O_RDWR, LINUX_O_RDWR); 121 122 res |= cvtto_linux_mask(bflags, O_CREAT, LINUX_O_CREAT); 123 res |= cvtto_linux_mask(bflags, O_EXCL, LINUX_O_EXCL); 124 res |= cvtto_linux_mask(bflags, O_NOCTTY, LINUX_O_NOCTTY); 125 res |= cvtto_linux_mask(bflags, O_TRUNC, LINUX_O_TRUNC); 126 res |= cvtto_linux_mask(bflags, O_APPEND, LINUX_O_APPEND); 127 res |= cvtto_linux_mask(bflags, O_NONBLOCK, LINUX_O_NONBLOCK); 128 res |= cvtto_linux_mask(bflags, O_NDELAY, LINUX_O_NDELAY); 129 res |= cvtto_linux_mask(bflags, O_FSYNC, LINUX_O_SYNC); 130 res |= cvtto_linux_mask(bflags, O_ASYNC, LINUX_FASYNC); 131 res |= cvtto_linux_mask(bflags, O_DIRECT, LINUX_O_DIRECT); 132 res |= cvtto_linux_mask(bflags, O_DIRECTORY, LINUX_O_DIRECTORY); 133 res |= cvtto_linux_mask(bflags, O_NOFOLLOW, LINUX_O_NOFOLLOW); 134 res |= cvtto_linux_mask(bflags, O_CLOEXEC, LINUX_O_CLOEXEC); 135 136 return res; 137 } 138 139 static inline off_t 140 linux_hilo_to_off_t(unsigned long hi, unsigned long lo) 141 { 142 #ifdef _LP64 143 /* 144 * Linux discards the "hi" portion on LP64 platforms; even though 145 * glibc puts of the upper 32-bits of the offset into the "hi" 146 * argument regardless, the "lo" argument has all the bits in 147 * this case. 148 */ 149 (void) hi; 150 return (off_t)lo; 151 #else 152 return (((off_t)hi) << 32) | lo; 153 #endif /* _LP64 */ 154 } 155 156 #if !defined(__aarch64__) 157 /* 158 * creat(2) is an obsolete function, but it's present as a Linux 159 * system call, so let's deal with it. 160 * 161 * Note: On the Alpha this doesn't really exist in Linux, but it's defined 162 * in syscalls.master anyway so this doesn't have to be special cased. 163 * 164 * Just call open(2) with the TRUNC, CREAT and WRONLY flags. 165 */ 166 int 167 linux_sys_creat(struct lwp *l, const struct linux_sys_creat_args *uap, register_t *retval) 168 { 169 /* { 170 syscallarg(const char *) path; 171 syscallarg(linux_umode_t) mode; 172 } */ 173 struct sys_open_args oa; 174 175 SCARG(&oa, path) = SCARG(uap, path); 176 SCARG(&oa, flags) = O_CREAT | O_TRUNC | O_WRONLY; 177 SCARG(&oa, mode) = SCARG(uap, mode); 178 179 return sys_open(l, &oa, retval); 180 } 181 #endif 182 183 static void 184 linux_open_ctty(struct lwp *l, int flags, int fd) 185 { 186 struct proc *p = l->l_proc; 187 188 /* 189 * this bit from sunos_misc.c (and svr4_fcntl.c). 190 * If we are a session leader, and we don't have a controlling 191 * terminal yet, and the O_NOCTTY flag is not set, try to make 192 * this the controlling terminal. 193 */ 194 if (!(flags & O_NOCTTY) && SESS_LEADER(p) && !(p->p_lflag & PL_CONTROLT)) { 195 file_t *fp; 196 197 fp = fd_getfile(fd); 198 199 /* ignore any error, just give it a try */ 200 if (fp != NULL) { 201 if (fp->f_type == DTYPE_VNODE) { 202 (fp->f_ops->fo_ioctl) (fp, TIOCSCTTY, NULL); 203 } 204 fd_putfile(fd); 205 } 206 } 207 } 208 209 #if !defined(__aarch64__) 210 /* 211 * open(2). Take care of the different flag values, and let the 212 * NetBSD syscall do the real work. See if this operation 213 * gives the current process a controlling terminal. 214 * (XXX is this necessary?) 215 */ 216 int 217 linux_sys_open(struct lwp *l, const struct linux_sys_open_args *uap, register_t *retval) 218 { 219 /* { 220 syscallarg(const char *) path; 221 syscallarg(int) flags; 222 syscallarg(linux_umode_t) mode; 223 } */ 224 int error, fl; 225 struct sys_open_args boa; 226 227 fl = linux_to_bsd_ioflags(SCARG(uap, flags)); 228 229 SCARG(&boa, path) = SCARG(uap, path); 230 SCARG(&boa, flags) = fl; 231 SCARG(&boa, mode) = SCARG(uap, mode); 232 233 if ((error = sys_open(l, &boa, retval))) 234 return (error == EFTYPE) ? ELOOP : error; 235 236 linux_open_ctty(l, fl, *retval); 237 return 0; 238 } 239 #endif 240 241 int 242 linux_sys_openat(struct lwp *l, const struct linux_sys_openat_args *uap, register_t *retval) 243 { 244 /* { 245 syscallarg(int) fd; 246 syscallarg(const char *) path; 247 syscallarg(int) flags; 248 syscallarg(linux_umode_t) mode; 249 } */ 250 int error, fl; 251 struct sys_openat_args boa; 252 253 fl = linux_to_bsd_ioflags(SCARG(uap, flags)); 254 255 SCARG(&boa, fd) = SCARG(uap, fd); 256 SCARG(&boa, path) = SCARG(uap, path); 257 SCARG(&boa, oflags) = fl; 258 SCARG(&boa, mode) = SCARG(uap, mode); 259 260 if ((error = sys_openat(l, &boa, retval))) 261 return (error == EFTYPE) ? ELOOP : error; 262 263 linux_open_ctty(l, fl, *retval); 264 return 0; 265 } 266 267 /* 268 * Most actions in the fcntl() call are straightforward; simply 269 * pass control to the NetBSD system call. A few commands need 270 * conversions after the actual system call has done its work, 271 * because the flag values and lock structure are different. 272 */ 273 int 274 linux_sys_fcntl(struct lwp *l, const struct linux_sys_fcntl_args *uap, register_t *retval) 275 { 276 /* { 277 syscallarg(int) fd; 278 syscallarg(int) cmd; 279 syscallarg(void *) arg; 280 } */ 281 struct proc *p = l->l_proc; 282 int fd, cmd, error; 283 u_long val; 284 void *arg; 285 struct sys_fcntl_args fca; 286 file_t *fp; 287 struct vnode *vp; 288 struct vattr va; 289 long pgid; 290 struct pgrp *pgrp; 291 struct tty *tp; 292 293 fd = SCARG(uap, fd); 294 cmd = SCARG(uap, cmd); 295 arg = SCARG(uap, arg); 296 297 switch (cmd) { 298 299 case LINUX_F_DUPFD: 300 cmd = F_DUPFD; 301 break; 302 303 case LINUX_F_GETFD: 304 cmd = F_GETFD; 305 break; 306 307 case LINUX_F_SETFD: 308 cmd = F_SETFD; 309 break; 310 311 case LINUX_F_GETFL: 312 SCARG(&fca, fd) = fd; 313 SCARG(&fca, cmd) = F_GETFL; 314 SCARG(&fca, arg) = arg; 315 if ((error = sys_fcntl(l, &fca, retval))) 316 return error; 317 retval[0] = bsd_to_linux_ioflags(retval[0]); 318 return 0; 319 320 case LINUX_F_SETFL: { 321 file_t *fp1 = NULL; 322 323 val = linux_to_bsd_ioflags((unsigned long)SCARG(uap, arg)); 324 /* 325 * Linux seems to have same semantics for sending SIGIO to the 326 * read side of socket, but slightly different semantics 327 * for SIGIO to the write side. Rather than sending the SIGIO 328 * every time it's possible to write (directly) more data, it 329 * only sends SIGIO if last write(2) failed due to insufficient 330 * memory to hold the data. This is compatible enough 331 * with NetBSD semantics to not do anything about the 332 * difference. 333 * 334 * Linux does NOT send SIGIO for pipes. Deal with socketpair 335 * ones and DTYPE_PIPE ones. For these, we don't set 336 * the underlying flags (we don't pass O_ASYNC flag down 337 * to sys_fcntl()), but set the FASYNC flag for file descriptor, 338 * so that F_GETFL would report the ASYNC i/o is on. 339 */ 340 if (val & O_ASYNC) { 341 if (((fp1 = fd_getfile(fd)) == NULL)) 342 return (EBADF); 343 if (((fp1->f_type == DTYPE_SOCKET) && fp1->f_data 344 && ((struct socket *)fp1->f_data)->so_state & SS_ISAPIPE) 345 || (fp1->f_type == DTYPE_PIPE)) 346 val &= ~O_ASYNC; 347 else { 348 /* not a pipe, do not modify anything */ 349 fd_putfile(fd); 350 fp1 = NULL; 351 } 352 } 353 354 SCARG(&fca, fd) = fd; 355 SCARG(&fca, cmd) = F_SETFL; 356 SCARG(&fca, arg) = (void *) val; 357 358 error = sys_fcntl(l, &fca, retval); 359 360 /* Now set the FASYNC flag for pipes */ 361 if (fp1) { 362 if (!error) { 363 mutex_enter(&fp1->f_lock); 364 fp1->f_flag |= FASYNC; 365 mutex_exit(&fp1->f_lock); 366 } 367 fd_putfile(fd); 368 } 369 370 return (error); 371 } 372 373 case LINUX_F_GETLK: 374 do_linux_getlk(fd, cmd, arg, linux, flock); 375 376 case LINUX_F_SETLK: 377 case LINUX_F_SETLKW: 378 do_linux_setlk(fd, cmd, arg, linux, flock, LINUX_F_SETLK); 379 380 case LINUX_F_SETOWN: 381 case LINUX_F_GETOWN: 382 /* 383 * We need to route fcntl() for tty descriptors around normal 384 * fcntl(), since NetBSD tty TIOC{G,S}PGRP semantics is too 385 * restrictive for Linux F_{G,S}ETOWN. For non-tty descriptors, 386 * this is not a problem. 387 */ 388 if ((fp = fd_getfile(fd)) == NULL) 389 return EBADF; 390 391 /* Check it's a character device vnode */ 392 if (fp->f_type != DTYPE_VNODE 393 || (vp = (struct vnode *)fp->f_data) == NULL 394 || vp->v_type != VCHR) { 395 fd_putfile(fd); 396 397 not_tty: 398 /* Not a tty, proceed with common fcntl() */ 399 cmd = cmd == LINUX_F_SETOWN ? F_SETOWN : F_GETOWN; 400 break; 401 } 402 403 vn_lock(vp, LK_SHARED | LK_RETRY); 404 error = VOP_GETATTR(vp, &va, l->l_cred); 405 VOP_UNLOCK(vp); 406 407 fd_putfile(fd); 408 409 if (error) 410 return error; 411 412 if ((tp = cdev_tty(va.va_rdev)) == NULL) 413 goto not_tty; 414 415 /* set tty pg_id appropriately */ 416 mutex_enter(&proc_lock); 417 if (cmd == LINUX_F_GETOWN) { 418 retval[0] = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID; 419 mutex_exit(&proc_lock); 420 return 0; 421 } 422 if ((long)arg <= 0) { 423 pgid = -(long)arg; 424 } else { 425 struct proc *p1 = proc_find((long)arg); 426 if (p1 == NULL) { 427 mutex_exit(&proc_lock); 428 return (ESRCH); 429 } 430 pgid = (long)p1->p_pgrp->pg_id; 431 } 432 pgrp = pgrp_find(pgid); 433 if (pgrp == NULL || pgrp->pg_session != p->p_session) { 434 mutex_exit(&proc_lock); 435 return EPERM; 436 } 437 tp->t_pgrp = pgrp; 438 mutex_exit(&proc_lock); 439 return 0; 440 441 case LINUX_F_DUPFD_CLOEXEC: 442 cmd = F_DUPFD_CLOEXEC; 443 break; 444 445 default: 446 return EOPNOTSUPP; 447 } 448 449 SCARG(&fca, fd) = fd; 450 SCARG(&fca, cmd) = cmd; 451 SCARG(&fca, arg) = arg; 452 453 return sys_fcntl(l, &fca, retval); 454 } 455 456 #if !defined(__aarch64__) && !defined(__amd64__) 457 /* 458 * Convert a NetBSD stat structure to a Linux stat structure. 459 * Only the order of the fields and the padding in the structure 460 * is different. linux_fakedev is a machine-dependent function 461 * which optionally converts device driver major/minor numbers 462 * (XXX horrible, but what can you do against code that compares 463 * things against constant major device numbers? sigh) 464 */ 465 static void 466 bsd_to_linux_stat(struct stat *bsp, struct linux_stat *lsp) 467 { 468 469 memset(lsp, 0, sizeof(*lsp)); 470 lsp->lst_dev = linux_fakedev(bsp->st_dev, 0); 471 lsp->lst_ino = bsp->st_ino; 472 lsp->lst_mode = (linux_mode_t)bsp->st_mode; 473 if (bsp->st_nlink >= (1 << 15)) 474 lsp->lst_nlink = (1 << 15) - 1; 475 else 476 lsp->lst_nlink = (linux_nlink_t)bsp->st_nlink; 477 lsp->lst_uid = bsp->st_uid; 478 lsp->lst_gid = bsp->st_gid; 479 lsp->lst_rdev = linux_fakedev(bsp->st_rdev, 1); 480 lsp->lst_size = bsp->st_size; 481 lsp->lst_blksize = bsp->st_blksize; 482 lsp->lst_blocks = bsp->st_blocks; 483 lsp->lst_atime = bsp->st_atime; 484 lsp->lst_mtime = bsp->st_mtime; 485 lsp->lst_ctime = bsp->st_ctime; 486 #ifdef LINUX_STAT_HAS_NSEC 487 lsp->lst_atime_nsec = bsp->st_atimensec; 488 lsp->lst_mtime_nsec = bsp->st_mtimensec; 489 lsp->lst_ctime_nsec = bsp->st_ctimensec; 490 #endif 491 } 492 493 /* 494 * The stat functions below are plain sailing. stat and lstat are handled 495 * by one function to avoid code duplication. 496 */ 497 int 498 linux_sys_fstat(struct lwp *l, const struct linux_sys_fstat_args *uap, register_t *retval) 499 { 500 /* { 501 syscallarg(int) fd; 502 syscallarg(linux_stat *) sp; 503 } */ 504 struct linux_stat tmplst; 505 struct stat tmpst; 506 int error; 507 508 error = do_sys_fstat(SCARG(uap, fd), &tmpst); 509 if (error != 0) 510 return error; 511 bsd_to_linux_stat(&tmpst, &tmplst); 512 513 return copyout(&tmplst, SCARG(uap, sp), sizeof tmplst); 514 } 515 516 static int 517 linux_stat1(const struct linux_sys_stat_args *uap, register_t *retval, int flags) 518 { 519 struct linux_stat tmplst; 520 struct stat tmpst; 521 int error; 522 523 error = do_sys_stat(SCARG(uap, path), flags, &tmpst); 524 if (error != 0) 525 return error; 526 527 bsd_to_linux_stat(&tmpst, &tmplst); 528 529 return copyout(&tmplst, SCARG(uap, sp), sizeof tmplst); 530 } 531 532 int 533 linux_sys_stat(struct lwp *l, const struct linux_sys_stat_args *uap, register_t *retval) 534 { 535 /* { 536 syscallarg(const char *) path; 537 syscallarg(struct linux_stat *) sp; 538 } */ 539 540 return linux_stat1(uap, retval, FOLLOW); 541 } 542 543 /* Note: this is "newlstat" in the Linux sources */ 544 /* (we don't bother with the old lstat currently) */ 545 int 546 linux_sys_lstat(struct lwp *l, const struct linux_sys_lstat_args *uap, register_t *retval) 547 { 548 /* { 549 syscallarg(const char *) path; 550 syscallarg(struct linux_stat *) sp; 551 } */ 552 553 return linux_stat1((const void *)uap, retval, NOFOLLOW); 554 } 555 #endif /* !__aarch64__ && !__amd64__ */ 556 557 /* 558 * The following syscalls are mostly here because of the alternate path check. 559 */ 560 561 int 562 linux_sys_linkat(struct lwp *l, const struct linux_sys_linkat_args *uap, register_t *retval) 563 { 564 /* { 565 syscallarg(int) fd1; 566 syscallarg(const char *) name1; 567 syscallarg(int) fd2; 568 syscallarg(const char *) name2; 569 syscallarg(int) flags; 570 } */ 571 int fd1 = SCARG(uap, fd1); 572 const char *name1 = SCARG(uap, name1); 573 int fd2 = SCARG(uap, fd2); 574 const char *name2 = SCARG(uap, name2); 575 int follow; 576 577 follow = SCARG(uap, flags) & LINUX_AT_SYMLINK_FOLLOW; 578 579 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 580 } 581 582 static int 583 linux_unlink_dircheck(const char *path) 584 { 585 struct nameidata nd; 586 struct pathbuf *pb; 587 int error; 588 589 /* 590 * Linux returns EISDIR if unlink(2) is called on a directory. 591 * We return EPERM in such cases. To emulate correct behaviour, 592 * check if the path points to directory and return EISDIR if this 593 * is the case. 594 * 595 * XXX this should really not copy in the path buffer twice... 596 */ 597 error = pathbuf_copyin(path, &pb); 598 if (error) { 599 return error; 600 } 601 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 602 if (namei(&nd) == 0) { 603 struct stat sb; 604 605 if (vn_stat(nd.ni_vp, &sb) == 0 606 && S_ISDIR(sb.st_mode)) 607 error = EISDIR; 608 609 vput(nd.ni_vp); 610 } 611 pathbuf_destroy(pb); 612 return error ? error : EPERM; 613 } 614 615 #if !defined(__aarch64__) 616 int 617 linux_sys_unlink(struct lwp *l, const struct linux_sys_unlink_args *uap, register_t *retval) 618 { 619 /* { 620 syscallarg(const char *) path; 621 } */ 622 int error; 623 624 error = sys_unlink(l, (const void *)uap, retval); 625 if (error == EPERM) 626 error = linux_unlink_dircheck(SCARG(uap, path)); 627 628 return error; 629 } 630 #endif 631 632 int 633 linux_sys_unlinkat(struct lwp *l, const struct linux_sys_unlinkat_args *uap, register_t *retval) 634 { 635 /* { 636 syscallarg(int) fd; 637 syscallarg(const char *) path; 638 syscallarg(int) flag; 639 } */ 640 struct sys_unlinkat_args ua; 641 int error; 642 643 SCARG(&ua, fd) = SCARG(uap, fd); 644 SCARG(&ua, path) = SCARG(uap, path); 645 SCARG(&ua, flag) = linux_to_bsd_atflags(SCARG(uap, flag)); 646 647 error = sys_unlinkat(l, &ua, retval); 648 if (error == EPERM) 649 error = linux_unlink_dircheck(SCARG(uap, path)); 650 651 return error; 652 } 653 654 #if !defined(__aarch64__) 655 int 656 linux_sys_mknod(struct lwp *l, const struct linux_sys_mknod_args *uap, register_t *retval) 657 { 658 /* { 659 syscallarg(const char *) path; 660 syscallarg(linux_umode_t) mode; 661 syscallarg(unsigned) dev; 662 } */ 663 struct linux_sys_mknodat_args ua; 664 665 SCARG(&ua, fd) = LINUX_AT_FDCWD; 666 SCARG(&ua, path) = SCARG(uap, path); 667 SCARG(&ua, mode) = SCARG(uap, mode); 668 SCARG(&ua, dev) = SCARG(uap, dev); 669 670 return linux_sys_mknodat(l, &ua, retval); 671 } 672 #endif 673 674 int 675 linux_sys_mknodat(struct lwp *l, const struct linux_sys_mknodat_args *uap, register_t *retval) 676 { 677 /* { 678 syscallarg(int) fd; 679 syscallarg(const char *) path; 680 syscallarg(linux_umode_t) mode; 681 syscallarg(unsigned) dev; 682 } */ 683 684 /* 685 * BSD handles FIFOs separately 686 */ 687 if (S_ISFIFO(SCARG(uap, mode))) { 688 struct sys_mkfifoat_args bma; 689 690 SCARG(&bma, fd) = SCARG(uap, fd); 691 SCARG(&bma, path) = SCARG(uap, path); 692 SCARG(&bma, mode) = SCARG(uap, mode); 693 return sys_mkfifoat(l, &bma, retval); 694 } else { 695 696 /* 697 * Linux device numbers uses 8 bits for minor and 8 bits 698 * for major. Due to how we map our major and minor, 699 * this just fits into our dev_t. Just mask off the 700 * upper 16bit to remove any random junk. 701 */ 702 703 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 704 SCARG(uap, mode), SCARG(uap, dev) & 0xffff, UIO_USERSPACE); 705 } 706 } 707 708 int 709 linux_sys_fchmodat(struct lwp *l, const struct linux_sys_fchmodat_args *uap, register_t *retval) 710 { 711 /* { 712 syscallarg(int) fd; 713 syscallarg(const char *) path; 714 syscallarg(linux_umode_t) mode; 715 } */ 716 717 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 718 SCARG(uap, mode), AT_SYMLINK_FOLLOW); 719 } 720 721 int 722 linux_sys_fchownat(struct lwp *l, const struct linux_sys_fchownat_args *uap, register_t *retval) 723 { 724 /* { 725 syscallarg(int) fd; 726 syscallarg(const char *) path; 727 syscallarg(uid_t) owner; 728 syscallarg(gid_t) group; 729 syscallarg(int) flag; 730 } */ 731 int flag; 732 733 flag = linux_to_bsd_atflags(SCARG(uap, flag)); 734 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 735 SCARG(uap, owner), SCARG(uap, group), flag); 736 } 737 738 int 739 linux_sys_faccessat(struct lwp *l, const struct linux_sys_faccessat_args *uap, register_t *retval) 740 { 741 /* { 742 syscallarg(int) fd; 743 syscallarg(const char *) path; 744 syscallarg(int) amode; 745 } */ 746 747 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 748 SCARG(uap, amode), AT_SYMLINK_FOLLOW); 749 } 750 751 /* 752 * This is just fsync() for now (just as it is in the Linux kernel) 753 * Note: this is not implemented under Linux on Alpha and Arm 754 * but should still be defined in our syscalls.master. 755 * (syscall #148 on the arm) 756 */ 757 int 758 linux_sys_fdatasync(struct lwp *l, const struct linux_sys_fdatasync_args *uap, register_t *retval) 759 { 760 /* { 761 syscallarg(int) fd; 762 } */ 763 764 return sys_fsync(l, (const void *)uap, retval); 765 } 766 767 /* 768 * pread(2). 769 */ 770 int 771 linux_sys_pread(struct lwp *l, const struct linux_sys_pread_args *uap, register_t *retval) 772 { 773 /* { 774 syscallarg(int) fd; 775 syscallarg(void *) buf; 776 syscallarg(size_t) nbyte; 777 syscallarg(off_t) offset; 778 } */ 779 struct sys_pread_args pra; 780 781 SCARG(&pra, fd) = SCARG(uap, fd); 782 SCARG(&pra, buf) = SCARG(uap, buf); 783 SCARG(&pra, nbyte) = SCARG(uap, nbyte); 784 SCARG(&pra, PAD) = 0; 785 SCARG(&pra, offset) = SCARG(uap, offset); 786 787 return sys_pread(l, &pra, retval); 788 } 789 790 /* 791 * pwrite(2). 792 */ 793 int 794 linux_sys_pwrite(struct lwp *l, const struct linux_sys_pwrite_args *uap, register_t *retval) 795 { 796 /* { 797 syscallarg(int) fd; 798 syscallarg(void *) buf; 799 syscallarg(size_t) nbyte; 800 syscallarg(off_t) offset; 801 } */ 802 struct sys_pwrite_args pra; 803 804 SCARG(&pra, fd) = SCARG(uap, fd); 805 SCARG(&pra, buf) = SCARG(uap, buf); 806 SCARG(&pra, nbyte) = SCARG(uap, nbyte); 807 SCARG(&pra, PAD) = 0; 808 SCARG(&pra, offset) = SCARG(uap, offset); 809 810 return sys_pwrite(l, &pra, retval); 811 } 812 813 /* 814 * preadv(2) 815 */ 816 int 817 linux_sys_preadv(struct lwp *l, const struct linux_sys_preadv_args *uap, 818 register_t *retval) 819 { 820 /* { 821 syscallarg(int) fd; 822 syscallarg(const struct iovec *) iovp; 823 syscallarg(int) iovcnt; 824 syscallarg(unsigned long) off_lo; 825 syscallarg(unsigned long) off_hi; 826 } */ 827 struct sys_preadv_args ua; 828 829 SCARG(&ua, fd) = SCARG(uap, fd); 830 SCARG(&ua, iovp) = SCARG(uap, iovp); 831 SCARG(&ua, iovcnt) = SCARG(uap, iovcnt); 832 SCARG(&ua, PAD) = 0; 833 SCARG(&ua, offset) = linux_hilo_to_off_t(SCARG(uap, off_hi), 834 SCARG(uap, off_lo)); 835 return sys_preadv(l, &ua, retval); 836 } 837 838 /* 839 * pwritev(2) 840 */ 841 int 842 linux_sys_pwritev(struct lwp *l, const struct linux_sys_pwritev_args *uap, 843 register_t *retval) 844 { 845 /* { 846 syscallarg(int) fd; 847 syscallarg(const struct iovec *) iovp; 848 syscallarg(int) iovcnt; 849 syscallarg(unsigned long) off_lo; 850 syscallarg(unsigned long) off_hi; 851 } */ 852 struct sys_pwritev_args ua; 853 854 SCARG(&ua, fd) = SCARG(uap, fd); 855 SCARG(&ua, iovp) = (const void *)SCARG(uap, iovp); 856 SCARG(&ua, iovcnt) = SCARG(uap, iovcnt); 857 SCARG(&ua, PAD) = 0; 858 SCARG(&ua, offset) = linux_hilo_to_off_t(SCARG(uap, off_hi), 859 SCARG(uap, off_lo)); 860 return sys_pwritev(l, &ua, retval); 861 } 862 863 int 864 linux_sys_dup3(struct lwp *l, const struct linux_sys_dup3_args *uap, 865 register_t *retval) 866 { 867 /* { 868 syscallarg(int) from; 869 syscallarg(int) to; 870 syscallarg(int) flags; 871 } */ 872 int flags; 873 874 flags = linux_to_bsd_ioflags(SCARG(uap, flags)); 875 if ((flags & ~O_CLOEXEC) != 0) 876 return EINVAL; 877 878 if (SCARG(uap, from) == SCARG(uap, to)) 879 return EINVAL; 880 881 return dodup(l, SCARG(uap, from), SCARG(uap, to), flags, retval); 882 } 883 884 885 int 886 linux_to_bsd_atflags(int lflags) 887 { 888 int bflags = 0; 889 890 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 891 bflags |= AT_SYMLINK_NOFOLLOW; 892 if (lflags & LINUX_AT_REMOVEDIR) 893 bflags |= AT_REMOVEDIR; 894 if (lflags & LINUX_AT_SYMLINK_FOLLOW) 895 bflags |= AT_SYMLINK_FOLLOW; 896 897 return bflags; 898 } 899 900 901 #define LINUX_NOT_SUPPORTED(fun) \ 902 int \ 903 fun(struct lwp *l, const struct fun##_args *uap, register_t *retval) \ 904 { \ 905 return EOPNOTSUPP; \ 906 } 907 908 LINUX_NOT_SUPPORTED(linux_sys_setxattr) 909 LINUX_NOT_SUPPORTED(linux_sys_lsetxattr) 910 LINUX_NOT_SUPPORTED(linux_sys_fsetxattr) 911 912 LINUX_NOT_SUPPORTED(linux_sys_getxattr) 913 LINUX_NOT_SUPPORTED(linux_sys_lgetxattr) 914 LINUX_NOT_SUPPORTED(linux_sys_fgetxattr) 915 916 LINUX_NOT_SUPPORTED(linux_sys_listxattr) 917 LINUX_NOT_SUPPORTED(linux_sys_llistxattr) 918 LINUX_NOT_SUPPORTED(linux_sys_flistxattr) 919 920 LINUX_NOT_SUPPORTED(linux_sys_removexattr) 921 LINUX_NOT_SUPPORTED(linux_sys_lremovexattr) 922 LINUX_NOT_SUPPORTED(linux_sys_fremovexattr) 923 924 /* 925 * For now just return EOPNOTSUPP, this makes glibc posix_fallocate() 926 * to fallback to emulation. 927 * XXX Right now no filesystem actually implements fallocate support, 928 * so no need for mapping. 929 */ 930 LINUX_NOT_SUPPORTED(linux_sys_fallocate) 931