1 /* $NetBSD: linux_file.c,v 1.124 2024/06/29 13:46:10 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1995, 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Frank van der Linden and Eric Haszlakiewicz. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Functions in multiarch: 34 * linux_sys_llseek : linux_llseek.c 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: linux_file.c,v 1.124 2024/06/29 13:46:10 christos Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/namei.h> 43 #include <sys/proc.h> 44 #include <sys/file.h> 45 #include <sys/fcntl.h> 46 #include <sys/stat.h> 47 #include <sys/filedesc.h> 48 #include <sys/ioctl.h> 49 #include <sys/kernel.h> 50 #include <sys/mount.h> 51 #include <sys/namei.h> 52 #include <sys/vnode.h> 53 #include <sys/tty.h> 54 #include <sys/socketvar.h> 55 #include <sys/conf.h> 56 #include <sys/pipe.h> 57 58 #include <sys/syscallargs.h> 59 #include <sys/vfs_syscalls.h> 60 61 #include <compat/linux/common/linux_types.h> 62 #include <compat/linux/common/linux_signal.h> 63 #include <compat/linux/common/linux_fcntl.h> 64 #include <compat/linux/common/linux_util.h> 65 #include <compat/linux/common/linux_machdep.h> 66 #include <compat/linux/common/linux_ipc.h> 67 #include <compat/linux/common/linux_sem.h> 68 69 #include <compat/linux/linux_syscallargs.h> 70 71 static int bsd_to_linux_ioflags(int); 72 #if !defined(__aarch64__) && !defined(__amd64__) 73 static void bsd_to_linux_stat(struct stat *, struct linux_stat *); 74 #endif 75 76 conv_linux_flock(linux, flock) 77 78 /* 79 * Some file-related calls are handled here. The usual flag conversion 80 * an structure conversion is done, and alternate emul path searching. 81 */ 82 83 /* 84 * The next two functions convert between the Linux and NetBSD values 85 * of the flags used in open(2) and fcntl(2). 86 */ 87 int 88 linux_to_bsd_ioflags(int lflags) 89 { 90 int res = 0; 91 92 res |= cvtto_bsd_mask(lflags, LINUX_O_WRONLY, O_WRONLY); 93 res |= cvtto_bsd_mask(lflags, LINUX_O_RDONLY, O_RDONLY); 94 res |= cvtto_bsd_mask(lflags, LINUX_O_RDWR, O_RDWR); 95 96 res |= cvtto_bsd_mask(lflags, LINUX_O_CREAT, O_CREAT); 97 res |= cvtto_bsd_mask(lflags, LINUX_O_EXCL, O_EXCL); 98 res |= cvtto_bsd_mask(lflags, LINUX_O_NOCTTY, O_NOCTTY); 99 res |= cvtto_bsd_mask(lflags, LINUX_O_TRUNC, O_TRUNC); 100 res |= cvtto_bsd_mask(lflags, LINUX_O_APPEND, O_APPEND); 101 res |= cvtto_bsd_mask(lflags, LINUX_O_NONBLOCK, O_NONBLOCK); 102 res |= cvtto_bsd_mask(lflags, LINUX_O_NDELAY, O_NDELAY); 103 res |= cvtto_bsd_mask(lflags, LINUX_O_SYNC, O_FSYNC); 104 res |= cvtto_bsd_mask(lflags, LINUX_FASYNC, O_ASYNC); 105 res |= cvtto_bsd_mask(lflags, LINUX_O_DIRECT, O_DIRECT); 106 res |= cvtto_bsd_mask(lflags, LINUX_O_DIRECTORY, O_DIRECTORY); 107 res |= cvtto_bsd_mask(lflags, LINUX_O_NOFOLLOW, O_NOFOLLOW); 108 res |= cvtto_bsd_mask(lflags, LINUX_O_CLOEXEC, O_CLOEXEC); 109 110 return res; 111 } 112 113 static int 114 bsd_to_linux_ioflags(int bflags) 115 { 116 int res = 0; 117 118 res |= cvtto_linux_mask(bflags, O_WRONLY, LINUX_O_WRONLY); 119 res |= cvtto_linux_mask(bflags, O_RDONLY, LINUX_O_RDONLY); 120 res |= cvtto_linux_mask(bflags, O_RDWR, LINUX_O_RDWR); 121 122 res |= cvtto_linux_mask(bflags, O_CREAT, LINUX_O_CREAT); 123 res |= cvtto_linux_mask(bflags, O_EXCL, LINUX_O_EXCL); 124 res |= cvtto_linux_mask(bflags, O_NOCTTY, LINUX_O_NOCTTY); 125 res |= cvtto_linux_mask(bflags, O_TRUNC, LINUX_O_TRUNC); 126 res |= cvtto_linux_mask(bflags, O_APPEND, LINUX_O_APPEND); 127 res |= cvtto_linux_mask(bflags, O_NONBLOCK, LINUX_O_NONBLOCK); 128 res |= cvtto_linux_mask(bflags, O_NDELAY, LINUX_O_NDELAY); 129 res |= cvtto_linux_mask(bflags, O_FSYNC, LINUX_O_SYNC); 130 res |= cvtto_linux_mask(bflags, O_ASYNC, LINUX_FASYNC); 131 res |= cvtto_linux_mask(bflags, O_DIRECT, LINUX_O_DIRECT); 132 res |= cvtto_linux_mask(bflags, O_DIRECTORY, LINUX_O_DIRECTORY); 133 res |= cvtto_linux_mask(bflags, O_NOFOLLOW, LINUX_O_NOFOLLOW); 134 res |= cvtto_linux_mask(bflags, O_CLOEXEC, LINUX_O_CLOEXEC); 135 136 return res; 137 } 138 139 static inline off_t 140 linux_hilo_to_off_t(unsigned long hi, unsigned long lo) 141 { 142 #ifdef _LP64 143 /* 144 * Linux discards the "hi" portion on LP64 platforms; even though 145 * glibc puts of the upper 32-bits of the offset into the "hi" 146 * argument regardless, the "lo" argument has all the bits in 147 * this case. 148 */ 149 (void) hi; 150 return (off_t)lo; 151 #else 152 return (((off_t)hi) << 32) | lo; 153 #endif /* _LP64 */ 154 } 155 156 #if !defined(__aarch64__) 157 /* 158 * creat(2) is an obsolete function, but it's present as a Linux 159 * system call, so let's deal with it. 160 * 161 * Note: On the Alpha this doesn't really exist in Linux, but it's defined 162 * in syscalls.master anyway so this doesn't have to be special cased. 163 * 164 * Just call open(2) with the TRUNC, CREAT and WRONLY flags. 165 */ 166 int 167 linux_sys_creat(struct lwp *l, const struct linux_sys_creat_args *uap, register_t *retval) 168 { 169 /* { 170 syscallarg(const char *) path; 171 syscallarg(linux_umode_t) mode; 172 } */ 173 struct sys_open_args oa; 174 175 SCARG(&oa, path) = SCARG(uap, path); 176 SCARG(&oa, flags) = O_CREAT | O_TRUNC | O_WRONLY; 177 SCARG(&oa, mode) = SCARG(uap, mode); 178 179 return sys_open(l, &oa, retval); 180 } 181 #endif 182 183 static void 184 linux_open_ctty(struct lwp *l, int flags, int fd) 185 { 186 struct proc *p = l->l_proc; 187 188 /* 189 * this bit from sunos_misc.c (and svr4_fcntl.c). 190 * If we are a session leader, and we don't have a controlling 191 * terminal yet, and the O_NOCTTY flag is not set, try to make 192 * this the controlling terminal. 193 */ 194 if (!(flags & O_NOCTTY) && SESS_LEADER(p) && !(p->p_lflag & PL_CONTROLT)) { 195 file_t *fp; 196 197 fp = fd_getfile(fd); 198 199 /* ignore any error, just give it a try */ 200 if (fp != NULL) { 201 if (fp->f_type == DTYPE_VNODE) { 202 (fp->f_ops->fo_ioctl) (fp, TIOCSCTTY, NULL); 203 } 204 fd_putfile(fd); 205 } 206 } 207 } 208 209 /* 210 * open(2). Take care of the different flag values, and let the 211 * NetBSD syscall do the real work. See if this operation 212 * gives the current process a controlling terminal. 213 * (XXX is this necessary?) 214 */ 215 int 216 linux_sys_open(struct lwp *l, const struct linux_sys_open_args *uap, register_t *retval) 217 { 218 /* { 219 syscallarg(const char *) path; 220 syscallarg(int) flags; 221 syscallarg(linux_umode_t) mode; 222 } */ 223 int error, fl; 224 struct sys_open_args boa; 225 226 fl = linux_to_bsd_ioflags(SCARG(uap, flags)); 227 228 SCARG(&boa, path) = SCARG(uap, path); 229 SCARG(&boa, flags) = fl; 230 SCARG(&boa, mode) = SCARG(uap, mode); 231 232 if ((error = sys_open(l, &boa, retval))) 233 return (error == EFTYPE) ? ELOOP : error; 234 235 linux_open_ctty(l, fl, *retval); 236 return 0; 237 } 238 239 int 240 linux_sys_openat(struct lwp *l, const struct linux_sys_openat_args *uap, register_t *retval) 241 { 242 /* { 243 syscallarg(int) fd; 244 syscallarg(const char *) path; 245 syscallarg(int) flags; 246 syscallarg(linux_umode_t) mode; 247 } */ 248 int error, fl; 249 struct sys_openat_args boa; 250 251 fl = linux_to_bsd_ioflags(SCARG(uap, flags)); 252 253 SCARG(&boa, fd) = SCARG(uap, fd); 254 SCARG(&boa, path) = SCARG(uap, path); 255 SCARG(&boa, oflags) = fl; 256 SCARG(&boa, mode) = SCARG(uap, mode); 257 258 if ((error = sys_openat(l, &boa, retval))) 259 return (error == EFTYPE) ? ELOOP : error; 260 261 linux_open_ctty(l, fl, *retval); 262 return 0; 263 } 264 265 /* 266 * Most actions in the fcntl() call are straightforward; simply 267 * pass control to the NetBSD system call. A few commands need 268 * conversions after the actual system call has done its work, 269 * because the flag values and lock structure are different. 270 */ 271 int 272 linux_sys_fcntl(struct lwp *l, const struct linux_sys_fcntl_args *uap, register_t *retval) 273 { 274 /* { 275 syscallarg(int) fd; 276 syscallarg(int) cmd; 277 syscallarg(void *) arg; 278 } */ 279 struct proc *p = l->l_proc; 280 int fd, cmd, error; 281 u_long val; 282 void *arg; 283 struct sys_fcntl_args fca; 284 file_t *fp; 285 struct vnode *vp; 286 struct vattr va; 287 long pgid; 288 struct pgrp *pgrp; 289 struct tty *tp; 290 291 fd = SCARG(uap, fd); 292 cmd = SCARG(uap, cmd); 293 arg = SCARG(uap, arg); 294 295 switch (cmd) { 296 297 case LINUX_F_DUPFD: 298 cmd = F_DUPFD; 299 break; 300 301 case LINUX_F_GETFD: 302 cmd = F_GETFD; 303 break; 304 305 case LINUX_F_SETFD: 306 cmd = F_SETFD; 307 break; 308 309 case LINUX_F_GETFL: 310 SCARG(&fca, fd) = fd; 311 SCARG(&fca, cmd) = F_GETFL; 312 SCARG(&fca, arg) = arg; 313 if ((error = sys_fcntl(l, &fca, retval))) 314 return error; 315 retval[0] = bsd_to_linux_ioflags(retval[0]); 316 return 0; 317 318 case LINUX_F_SETFL: { 319 file_t *fp1 = NULL; 320 321 val = linux_to_bsd_ioflags((unsigned long)SCARG(uap, arg)); 322 /* 323 * Linux seems to have same semantics for sending SIGIO to the 324 * read side of socket, but slightly different semantics 325 * for SIGIO to the write side. Rather than sending the SIGIO 326 * every time it's possible to write (directly) more data, it 327 * only sends SIGIO if last write(2) failed due to insufficient 328 * memory to hold the data. This is compatible enough 329 * with NetBSD semantics to not do anything about the 330 * difference. 331 * 332 * Linux does NOT send SIGIO for pipes. Deal with socketpair 333 * ones and DTYPE_PIPE ones. For these, we don't set 334 * the underlying flags (we don't pass O_ASYNC flag down 335 * to sys_fcntl()), but set the FASYNC flag for file descriptor, 336 * so that F_GETFL would report the ASYNC i/o is on. 337 */ 338 if (val & O_ASYNC) { 339 if (((fp1 = fd_getfile(fd)) == NULL)) 340 return (EBADF); 341 if (((fp1->f_type == DTYPE_SOCKET) && fp1->f_data 342 && ((struct socket *)fp1->f_data)->so_state & SS_ISAPIPE) 343 || (fp1->f_type == DTYPE_PIPE)) 344 val &= ~O_ASYNC; 345 else { 346 /* not a pipe, do not modify anything */ 347 fd_putfile(fd); 348 fp1 = NULL; 349 } 350 } 351 352 SCARG(&fca, fd) = fd; 353 SCARG(&fca, cmd) = F_SETFL; 354 SCARG(&fca, arg) = (void *) val; 355 356 error = sys_fcntl(l, &fca, retval); 357 358 /* Now set the FASYNC flag for pipes */ 359 if (fp1) { 360 if (!error) { 361 mutex_enter(&fp1->f_lock); 362 fp1->f_flag |= FASYNC; 363 mutex_exit(&fp1->f_lock); 364 } 365 fd_putfile(fd); 366 } 367 368 return (error); 369 } 370 371 case LINUX_F_GETLK: 372 do_linux_getlk(fd, cmd, arg, linux, flock); 373 374 case LINUX_F_SETLK: 375 case LINUX_F_SETLKW: 376 do_linux_setlk(fd, cmd, arg, linux, flock, LINUX_F_SETLK); 377 378 case LINUX_F_SETOWN: 379 case LINUX_F_GETOWN: 380 /* 381 * We need to route fcntl() for tty descriptors around normal 382 * fcntl(), since NetBSD tty TIOC{G,S}PGRP semantics is too 383 * restrictive for Linux F_{G,S}ETOWN. For non-tty descriptors, 384 * this is not a problem. 385 */ 386 if ((fp = fd_getfile(fd)) == NULL) 387 return EBADF; 388 389 /* Check it's a character device vnode */ 390 if (fp->f_type != DTYPE_VNODE 391 || (vp = (struct vnode *)fp->f_data) == NULL 392 || vp->v_type != VCHR) { 393 fd_putfile(fd); 394 395 not_tty: 396 /* Not a tty, proceed with common fcntl() */ 397 cmd = cmd == LINUX_F_SETOWN ? F_SETOWN : F_GETOWN; 398 break; 399 } 400 401 vn_lock(vp, LK_SHARED | LK_RETRY); 402 error = VOP_GETATTR(vp, &va, l->l_cred); 403 VOP_UNLOCK(vp); 404 405 fd_putfile(fd); 406 407 if (error) 408 return error; 409 410 if ((tp = cdev_tty(va.va_rdev)) == NULL) 411 goto not_tty; 412 413 /* set tty pg_id appropriately */ 414 mutex_enter(&proc_lock); 415 if (cmd == LINUX_F_GETOWN) { 416 retval[0] = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID; 417 mutex_exit(&proc_lock); 418 return 0; 419 } 420 if ((long)arg <= 0) { 421 pgid = -(long)arg; 422 } else { 423 struct proc *p1 = proc_find((long)arg); 424 if (p1 == NULL) { 425 mutex_exit(&proc_lock); 426 return (ESRCH); 427 } 428 pgid = (long)p1->p_pgrp->pg_id; 429 } 430 pgrp = pgrp_find(pgid); 431 if (pgrp == NULL || pgrp->pg_session != p->p_session) { 432 mutex_exit(&proc_lock); 433 return EPERM; 434 } 435 tp->t_pgrp = pgrp; 436 mutex_exit(&proc_lock); 437 return 0; 438 439 case LINUX_F_DUPFD_CLOEXEC: 440 cmd = F_DUPFD_CLOEXEC; 441 break; 442 443 case LINUX_F_ADD_SEALS: 444 cmd = F_ADD_SEALS; 445 break; 446 447 case LINUX_F_GET_SEALS: 448 cmd = F_GET_SEALS; 449 break; 450 451 default: 452 return EOPNOTSUPP; 453 } 454 455 SCARG(&fca, fd) = fd; 456 SCARG(&fca, cmd) = cmd; 457 SCARG(&fca, arg) = arg; 458 459 return sys_fcntl(l, &fca, retval); 460 } 461 462 #if !defined(__aarch64__) && !defined(__amd64__) 463 /* 464 * Convert a NetBSD stat structure to a Linux stat structure. 465 * Only the order of the fields and the padding in the structure 466 * is different. linux_fakedev is a machine-dependent function 467 * which optionally converts device driver major/minor numbers 468 * (XXX horrible, but what can you do against code that compares 469 * things against constant major device numbers? sigh) 470 */ 471 static void 472 bsd_to_linux_stat(struct stat *bsp, struct linux_stat *lsp) 473 { 474 475 memset(lsp, 0, sizeof(*lsp)); 476 lsp->lst_dev = linux_fakedev(bsp->st_dev, 0); 477 lsp->lst_ino = bsp->st_ino; 478 lsp->lst_mode = (linux_mode_t)bsp->st_mode; 479 if (bsp->st_nlink >= (1 << 15)) 480 lsp->lst_nlink = (1 << 15) - 1; 481 else 482 lsp->lst_nlink = (linux_nlink_t)bsp->st_nlink; 483 lsp->lst_uid = bsp->st_uid; 484 lsp->lst_gid = bsp->st_gid; 485 lsp->lst_rdev = linux_fakedev(bsp->st_rdev, 1); 486 lsp->lst_size = bsp->st_size; 487 lsp->lst_blksize = bsp->st_blksize; 488 lsp->lst_blocks = bsp->st_blocks; 489 lsp->lst_atime = bsp->st_atime; 490 lsp->lst_mtime = bsp->st_mtime; 491 lsp->lst_ctime = bsp->st_ctime; 492 #ifdef LINUX_STAT_HAS_NSEC 493 lsp->lst_atime_nsec = bsp->st_atimensec; 494 lsp->lst_mtime_nsec = bsp->st_mtimensec; 495 lsp->lst_ctime_nsec = bsp->st_ctimensec; 496 #endif 497 } 498 499 /* 500 * The stat functions below are plain sailing. stat and lstat are handled 501 * by one function to avoid code duplication. 502 */ 503 int 504 linux_sys_fstat(struct lwp *l, const struct linux_sys_fstat_args *uap, register_t *retval) 505 { 506 /* { 507 syscallarg(int) fd; 508 syscallarg(linux_stat *) sp; 509 } */ 510 struct linux_stat tmplst; 511 struct stat tmpst; 512 int error; 513 514 error = do_sys_fstat(SCARG(uap, fd), &tmpst); 515 if (error != 0) 516 return error; 517 bsd_to_linux_stat(&tmpst, &tmplst); 518 519 return copyout(&tmplst, SCARG(uap, sp), sizeof tmplst); 520 } 521 522 static int 523 linux_stat1(const struct linux_sys_stat_args *uap, register_t *retval, int flags) 524 { 525 struct linux_stat tmplst; 526 struct stat tmpst; 527 int error; 528 529 error = do_sys_stat(SCARG(uap, path), flags, &tmpst); 530 if (error != 0) 531 return error; 532 533 bsd_to_linux_stat(&tmpst, &tmplst); 534 535 return copyout(&tmplst, SCARG(uap, sp), sizeof tmplst); 536 } 537 538 int 539 linux_sys_stat(struct lwp *l, const struct linux_sys_stat_args *uap, register_t *retval) 540 { 541 /* { 542 syscallarg(const char *) path; 543 syscallarg(struct linux_stat *) sp; 544 } */ 545 546 return linux_stat1(uap, retval, FOLLOW); 547 } 548 549 /* Note: this is "newlstat" in the Linux sources */ 550 /* (we don't bother with the old lstat currently) */ 551 int 552 linux_sys_lstat(struct lwp *l, const struct linux_sys_lstat_args *uap, register_t *retval) 553 { 554 /* { 555 syscallarg(const char *) path; 556 syscallarg(struct linux_stat *) sp; 557 } */ 558 559 return linux_stat1((const void *)uap, retval, NOFOLLOW); 560 } 561 #endif /* !__aarch64__ && !__amd64__ */ 562 563 /* 564 * The following syscalls are mostly here because of the alternate path check. 565 */ 566 567 int 568 linux_sys_linkat(struct lwp *l, const struct linux_sys_linkat_args *uap, register_t *retval) 569 { 570 /* { 571 syscallarg(int) fd1; 572 syscallarg(const char *) name1; 573 syscallarg(int) fd2; 574 syscallarg(const char *) name2; 575 syscallarg(int) flags; 576 } */ 577 int fd1 = SCARG(uap, fd1); 578 const char *name1 = SCARG(uap, name1); 579 int fd2 = SCARG(uap, fd2); 580 const char *name2 = SCARG(uap, name2); 581 int follow; 582 583 follow = SCARG(uap, flags) & LINUX_AT_SYMLINK_FOLLOW; 584 585 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 586 } 587 588 static int 589 linux_unlink_dircheck(const char *path) 590 { 591 struct nameidata nd; 592 struct pathbuf *pb; 593 int error; 594 595 /* 596 * Linux returns EISDIR if unlink(2) is called on a directory. 597 * We return EPERM in such cases. To emulate correct behaviour, 598 * check if the path points to directory and return EISDIR if this 599 * is the case. 600 * 601 * XXX this should really not copy in the path buffer twice... 602 */ 603 error = pathbuf_copyin(path, &pb); 604 if (error) { 605 return error; 606 } 607 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 608 if (namei(&nd) == 0) { 609 struct stat sb; 610 611 if (vn_stat(nd.ni_vp, &sb) == 0 612 && S_ISDIR(sb.st_mode)) 613 error = EISDIR; 614 615 vput(nd.ni_vp); 616 } 617 pathbuf_destroy(pb); 618 return error ? error : EPERM; 619 } 620 621 int 622 linux_sys_unlink(struct lwp *l, const struct linux_sys_unlink_args *uap, register_t *retval) 623 { 624 /* { 625 syscallarg(const char *) path; 626 } */ 627 int error; 628 629 error = sys_unlink(l, (const void *)uap, retval); 630 if (error == EPERM) 631 error = linux_unlink_dircheck(SCARG(uap, path)); 632 633 return error; 634 } 635 636 int 637 linux_sys_unlinkat(struct lwp *l, const struct linux_sys_unlinkat_args *uap, register_t *retval) 638 { 639 /* { 640 syscallarg(int) fd; 641 syscallarg(const char *) path; 642 syscallarg(int) flag; 643 } */ 644 struct sys_unlinkat_args ua; 645 int error; 646 647 SCARG(&ua, fd) = SCARG(uap, fd); 648 SCARG(&ua, path) = SCARG(uap, path); 649 SCARG(&ua, flag) = linux_to_bsd_atflags(SCARG(uap, flag)); 650 651 error = sys_unlinkat(l, &ua, retval); 652 if (error == EPERM) 653 error = linux_unlink_dircheck(SCARG(uap, path)); 654 655 return error; 656 } 657 658 int 659 linux_sys_mknod(struct lwp *l, const struct linux_sys_mknod_args *uap, register_t *retval) 660 { 661 /* { 662 syscallarg(const char *) path; 663 syscallarg(linux_umode_t) mode; 664 syscallarg(unsigned) dev; 665 } */ 666 struct linux_sys_mknodat_args ua; 667 668 SCARG(&ua, fd) = LINUX_AT_FDCWD; 669 SCARG(&ua, path) = SCARG(uap, path); 670 SCARG(&ua, mode) = SCARG(uap, mode); 671 SCARG(&ua, dev) = SCARG(uap, dev); 672 673 return linux_sys_mknodat(l, &ua, retval); 674 } 675 676 int 677 linux_sys_mknodat(struct lwp *l, const struct linux_sys_mknodat_args *uap, register_t *retval) 678 { 679 /* { 680 syscallarg(int) fd; 681 syscallarg(const char *) path; 682 syscallarg(linux_umode_t) mode; 683 syscallarg(unsigned) dev; 684 } */ 685 686 /* 687 * BSD handles FIFOs separately 688 */ 689 if (S_ISFIFO(SCARG(uap, mode))) { 690 struct sys_mkfifoat_args bma; 691 692 SCARG(&bma, fd) = SCARG(uap, fd); 693 SCARG(&bma, path) = SCARG(uap, path); 694 SCARG(&bma, mode) = SCARG(uap, mode); 695 return sys_mkfifoat(l, &bma, retval); 696 } else { 697 698 /* 699 * Linux device numbers uses 8 bits for minor and 8 bits 700 * for major. Due to how we map our major and minor, 701 * this just fits into our dev_t. Just mask off the 702 * upper 16bit to remove any random junk. 703 */ 704 705 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 706 SCARG(uap, mode), SCARG(uap, dev) & 0xffff, UIO_USERSPACE); 707 } 708 } 709 710 int 711 linux_sys_fchmodat(struct lwp *l, const struct linux_sys_fchmodat_args *uap, register_t *retval) 712 { 713 /* { 714 syscallarg(int) fd; 715 syscallarg(const char *) path; 716 syscallarg(linux_umode_t) mode; 717 } */ 718 719 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 720 SCARG(uap, mode), AT_SYMLINK_FOLLOW); 721 } 722 723 int 724 linux_sys_fchownat(struct lwp *l, const struct linux_sys_fchownat_args *uap, register_t *retval) 725 { 726 /* { 727 syscallarg(int) fd; 728 syscallarg(const char *) path; 729 syscallarg(uid_t) owner; 730 syscallarg(gid_t) group; 731 syscallarg(int) flag; 732 } */ 733 int flag; 734 735 flag = linux_to_bsd_atflags(SCARG(uap, flag)); 736 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 737 SCARG(uap, owner), SCARG(uap, group), flag); 738 } 739 740 int 741 linux_sys_faccessat(struct lwp *l, const struct linux_sys_faccessat_args *uap, register_t *retval) 742 { 743 /* { 744 syscallarg(int) fd; 745 syscallarg(const char *) path; 746 syscallarg(int) amode; 747 } */ 748 749 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 750 SCARG(uap, amode), AT_SYMLINK_FOLLOW); 751 } 752 753 /* 754 * This is just fsync() for now (just as it is in the Linux kernel) 755 * Note: this is not implemented under Linux on Alpha and Arm 756 * but should still be defined in our syscalls.master. 757 * (syscall #148 on the arm) 758 */ 759 int 760 linux_sys_fdatasync(struct lwp *l, const struct linux_sys_fdatasync_args *uap, register_t *retval) 761 { 762 /* { 763 syscallarg(int) fd; 764 } */ 765 766 return sys_fsync(l, (const void *)uap, retval); 767 } 768 769 /* 770 * pread(2). 771 */ 772 int 773 linux_sys_pread(struct lwp *l, const struct linux_sys_pread_args *uap, register_t *retval) 774 { 775 /* { 776 syscallarg(int) fd; 777 syscallarg(void *) buf; 778 syscallarg(size_t) nbyte; 779 syscallarg(off_t) offset; 780 } */ 781 struct sys_pread_args pra; 782 783 SCARG(&pra, fd) = SCARG(uap, fd); 784 SCARG(&pra, buf) = SCARG(uap, buf); 785 SCARG(&pra, nbyte) = SCARG(uap, nbyte); 786 SCARG(&pra, PAD) = 0; 787 SCARG(&pra, offset) = SCARG(uap, offset); 788 789 return sys_pread(l, &pra, retval); 790 } 791 792 /* 793 * pwrite(2). 794 */ 795 int 796 linux_sys_pwrite(struct lwp *l, const struct linux_sys_pwrite_args *uap, register_t *retval) 797 { 798 /* { 799 syscallarg(int) fd; 800 syscallarg(void *) buf; 801 syscallarg(size_t) nbyte; 802 syscallarg(off_t) offset; 803 } */ 804 struct sys_pwrite_args pra; 805 806 SCARG(&pra, fd) = SCARG(uap, fd); 807 SCARG(&pra, buf) = SCARG(uap, buf); 808 SCARG(&pra, nbyte) = SCARG(uap, nbyte); 809 SCARG(&pra, PAD) = 0; 810 SCARG(&pra, offset) = SCARG(uap, offset); 811 812 return sys_pwrite(l, &pra, retval); 813 } 814 815 /* 816 * preadv(2) 817 */ 818 int 819 linux_sys_preadv(struct lwp *l, const struct linux_sys_preadv_args *uap, 820 register_t *retval) 821 { 822 /* { 823 syscallarg(int) fd; 824 syscallarg(const struct iovec *) iovp; 825 syscallarg(int) iovcnt; 826 syscallarg(unsigned long) off_lo; 827 syscallarg(unsigned long) off_hi; 828 } */ 829 struct sys_preadv_args ua; 830 831 SCARG(&ua, fd) = SCARG(uap, fd); 832 SCARG(&ua, iovp) = SCARG(uap, iovp); 833 SCARG(&ua, iovcnt) = SCARG(uap, iovcnt); 834 SCARG(&ua, PAD) = 0; 835 SCARG(&ua, offset) = linux_hilo_to_off_t(SCARG(uap, off_hi), 836 SCARG(uap, off_lo)); 837 return sys_preadv(l, &ua, retval); 838 } 839 840 /* 841 * pwritev(2) 842 */ 843 int 844 linux_sys_pwritev(struct lwp *l, const struct linux_sys_pwritev_args *uap, 845 register_t *retval) 846 { 847 /* { 848 syscallarg(int) fd; 849 syscallarg(const struct iovec *) iovp; 850 syscallarg(int) iovcnt; 851 syscallarg(unsigned long) off_lo; 852 syscallarg(unsigned long) off_hi; 853 } */ 854 struct sys_pwritev_args ua; 855 856 SCARG(&ua, fd) = SCARG(uap, fd); 857 SCARG(&ua, iovp) = (const void *)SCARG(uap, iovp); 858 SCARG(&ua, iovcnt) = SCARG(uap, iovcnt); 859 SCARG(&ua, PAD) = 0; 860 SCARG(&ua, offset) = linux_hilo_to_off_t(SCARG(uap, off_hi), 861 SCARG(uap, off_lo)); 862 return sys_pwritev(l, &ua, retval); 863 } 864 865 int 866 linux_sys_dup3(struct lwp *l, const struct linux_sys_dup3_args *uap, 867 register_t *retval) 868 { 869 /* { 870 syscallarg(int) from; 871 syscallarg(int) to; 872 syscallarg(int) flags; 873 } */ 874 int flags; 875 876 flags = linux_to_bsd_ioflags(SCARG(uap, flags)); 877 if ((flags & ~O_CLOEXEC) != 0) 878 return EINVAL; 879 880 if (SCARG(uap, from) == SCARG(uap, to)) 881 return EINVAL; 882 883 return dodup(l, SCARG(uap, from), SCARG(uap, to), flags, retval); 884 } 885 886 887 int 888 linux_to_bsd_atflags(int lflags) 889 { 890 int bflags = 0; 891 892 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 893 bflags |= AT_SYMLINK_NOFOLLOW; 894 if (lflags & LINUX_AT_REMOVEDIR) 895 bflags |= AT_REMOVEDIR; 896 if (lflags & LINUX_AT_SYMLINK_FOLLOW) 897 bflags |= AT_SYMLINK_FOLLOW; 898 899 return bflags; 900 } 901 902 int 903 linux_sys_faccessat2(lwp_t *l, const struct linux_sys_faccessat2_args *uap, 904 register_t *retval) 905 { 906 /* { 907 syscallarg(int) fd; 908 syscallarg(const char *) path; 909 syscallarg(int) amode; 910 syscallarg(int) flags; 911 }*/ 912 int flag = linux_to_bsd_atflags(SCARG(uap, flags)); 913 int mode = SCARG(uap, amode); 914 int fd = SCARG(uap, fd); 915 const char *path = SCARG(uap, path); 916 917 return do_sys_accessat(l, fd, path, mode, flag); 918 } 919 920 921 #define LINUX_NOT_SUPPORTED(fun) \ 922 int \ 923 fun(struct lwp *l, const struct fun##_args *uap, register_t *retval) \ 924 { \ 925 return EOPNOTSUPP; \ 926 } 927 928 LINUX_NOT_SUPPORTED(linux_sys_setxattr) 929 LINUX_NOT_SUPPORTED(linux_sys_lsetxattr) 930 LINUX_NOT_SUPPORTED(linux_sys_fsetxattr) 931 932 LINUX_NOT_SUPPORTED(linux_sys_getxattr) 933 LINUX_NOT_SUPPORTED(linux_sys_lgetxattr) 934 LINUX_NOT_SUPPORTED(linux_sys_fgetxattr) 935 936 LINUX_NOT_SUPPORTED(linux_sys_listxattr) 937 LINUX_NOT_SUPPORTED(linux_sys_llistxattr) 938 LINUX_NOT_SUPPORTED(linux_sys_flistxattr) 939 940 LINUX_NOT_SUPPORTED(linux_sys_removexattr) 941 LINUX_NOT_SUPPORTED(linux_sys_lremovexattr) 942 LINUX_NOT_SUPPORTED(linux_sys_fremovexattr) 943 944 /* 945 * For now just return EOPNOTSUPP, this makes glibc posix_fallocate() 946 * to fallback to emulation. 947 * XXX Right now no filesystem actually implements fallocate support, 948 * so no need for mapping. 949 */ 950 LINUX_NOT_SUPPORTED(linux_sys_fallocate) 951