1 /* $NetBSD: linux_file64.c,v 1.68 2023/07/29 15:04:29 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1995, 1998, 2000, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Frank van der Linden and Eric Haszlakiewicz. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Linux 64bit filesystem calls. Used on 32bit archs, not used on 64bit ones. 34 */ 35 36 #include <sys/cdefs.h> 37 __KERNEL_RCSID(0, "$NetBSD: linux_file64.c,v 1.68 2023/07/29 15:04:29 christos Exp $"); 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/namei.h> 42 #include <sys/proc.h> 43 #include <sys/dirent.h> 44 #include <sys/file.h> 45 #include <sys/stat.h> 46 #include <sys/filedesc.h> 47 #include <sys/ioctl.h> 48 #include <sys/kernel.h> 49 #include <sys/mount.h> 50 #include <sys/malloc.h> 51 #include <sys/namei.h> 52 #include <sys/vfs_syscalls.h> 53 #include <sys/vnode.h> 54 #include <sys/tty.h> 55 #include <sys/conf.h> 56 57 #include <sys/syscallargs.h> 58 59 #include <compat/linux/common/linux_types.h> 60 #include <compat/linux/common/linux_signal.h> 61 #include <compat/linux/common/linux_fcntl.h> 62 #include <compat/linux/common/linux_util.h> 63 #include <compat/linux/common/linux_machdep.h> 64 #include <compat/linux/common/linux_dirent.h> 65 #include <compat/linux/common/linux_ipc.h> 66 #include <compat/linux/common/linux_sem.h> 67 68 #include <compat/linux/linux_syscall.h> 69 #include <compat/linux/linux_syscallargs.h> 70 71 static void bsd_to_linux_stat64(struct stat *, struct linux_stat64 *); 72 73 /* 74 * Convert a NetBSD stat structure to a Linux stat structure. 75 * Only the order of the fields and the padding in the structure 76 * is different. linux_fakedev is a machine-dependent function 77 * which optionally converts device driver major/minor numbers 78 * (XXX horrible, but what can you do against code that compares 79 * things against constant major device numbers? sigh) 80 */ 81 static void 82 bsd_to_linux_stat64(struct stat *bsp, struct linux_stat64 *lsp) 83 { 84 memset(lsp, 0, sizeof(*lsp)); 85 lsp->lst_dev = linux_fakedev(bsp->st_dev, 0); 86 lsp->lst_ino = bsp->st_ino; 87 lsp->lst_mode = (linux_mode_t)bsp->st_mode; 88 if (bsp->st_nlink >= (1 << 15)) 89 lsp->lst_nlink = (1 << 15) - 1; 90 else 91 lsp->lst_nlink = (linux_nlink_t)bsp->st_nlink; 92 lsp->lst_uid = bsp->st_uid; 93 lsp->lst_gid = bsp->st_gid; 94 lsp->lst_rdev = linux_fakedev(bsp->st_rdev, 1); 95 lsp->lst_size = bsp->st_size; 96 lsp->lst_blksize = bsp->st_blksize; 97 lsp->lst_blocks = bsp->st_blocks; 98 lsp->lst_atime = bsp->st_atime; 99 lsp->lst_mtime = bsp->st_mtime; 100 lsp->lst_ctime = bsp->st_ctime; 101 # ifdef LINUX_STAT64_HAS_NSEC 102 lsp->lst_atime_nsec = bsp->st_atimensec; 103 lsp->lst_mtime_nsec = bsp->st_mtimensec; 104 lsp->lst_ctime_nsec = bsp->st_ctimensec; 105 # endif 106 # if LINUX_STAT64_HAS_BROKEN_ST_INO 107 lsp->__lst_ino = (linux_ino_t) bsp->st_ino; 108 # endif 109 } 110 111 int 112 bsd_to_linux_statx(struct stat *st, struct linux_statx *stx, 113 unsigned int mask) 114 { 115 if (mask & STATX__RESERVED) 116 return EINVAL; 117 118 /* XXX: STATX_MNT_ID is not supported */ 119 unsigned int rmask = STATX_TYPE | STATX_MODE | STATX_NLINK | 120 STATX_UID | STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME | 121 STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_BTIME; 122 123 memset(stx, 0, sizeof(*stx)); 124 125 if ((st->st_flags & UF_NODUMP) != 0) 126 stx->stx_attributes |= STATX_ATTR_NODUMP; 127 if ((st->st_flags & (UF_IMMUTABLE|SF_IMMUTABLE)) != 0) 128 stx->stx_attributes |= STATX_ATTR_IMMUTABLE; 129 if ((st->st_flags & (UF_APPEND|SF_APPEND)) != 0) 130 stx->stx_attributes |= STATX_ATTR_APPEND; 131 132 stx->stx_attributes_mask = 133 STATX_ATTR_NODUMP | STATX_ATTR_IMMUTABLE | STATX_ATTR_APPEND; 134 135 stx->stx_blksize = st->st_blksize; 136 137 stx->stx_nlink = st->st_nlink; 138 stx->stx_uid = st->st_uid; 139 stx->stx_gid = st->st_gid; 140 stx->stx_mode |= st->st_mode & S_IFMT; 141 stx->stx_mode |= st->st_mode & ~S_IFMT; 142 stx->stx_ino = st->st_ino; 143 stx->stx_size = st->st_size; 144 stx->stx_blocks = st->st_blocks; 145 146 stx->stx_atime.tv_sec = st->st_atime; 147 stx->stx_atime.tv_nsec = st->st_atimensec; 148 149 /* some filesystem has no birthtime returns 0 or -1 */ 150 if ((st->st_birthtime == 0 && st->st_birthtimensec == 0) || 151 (st->st_birthtime == (time_t)-1 && 152 st->st_birthtimensec == (long)-1)) { 153 rmask &= ~STATX_BTIME; 154 } else { 155 stx->stx_btime.tv_sec = st->st_birthtime; 156 stx->stx_btime.tv_nsec = st->st_birthtimensec; 157 } 158 159 stx->stx_ctime.tv_sec = st->st_ctime; 160 stx->stx_ctime.tv_nsec = st->st_ctimensec; 161 162 stx->stx_mtime.tv_sec = st->st_mtime; 163 stx->stx_mtime.tv_nsec = st->st_mtimensec; 164 165 if (S_ISCHR(st->st_mode) || S_ISBLK(st->st_mode)) { 166 stx->stx_rdev_major = major(st->st_rdev); 167 stx->stx_rdev_minor = minor(st->st_rdev); 168 } else { 169 stx->stx_dev_major = major(st->st_rdev); 170 stx->stx_dev_minor = minor(st->st_rdev); 171 } 172 173 stx->stx_mask = rmask; 174 175 return 0; 176 } 177 178 /* 179 * The stat functions below are plain sailing. stat and lstat are handled 180 * by one function to avoid code duplication. 181 */ 182 int 183 linux_sys_fstat64(struct lwp *l, const struct linux_sys_fstat64_args *uap, register_t *retval) 184 { 185 /* { 186 syscallarg(int) fd; 187 syscallarg(struct linux_stat64 *) sp; 188 } */ 189 struct linux_stat64 tmplst; 190 struct stat tmpst; 191 int error; 192 193 error = do_sys_fstat(SCARG(uap, fd), &tmpst); 194 if (error != 0) 195 return error; 196 197 bsd_to_linux_stat64(&tmpst, &tmplst); 198 199 return copyout(&tmplst, SCARG(uap, sp), sizeof tmplst); 200 } 201 202 #if !defined(__aarch64__) 203 static int 204 linux_do_stat64(struct lwp *l, const struct linux_sys_stat64_args *uap, register_t *retval, int flags) 205 { 206 struct linux_stat64 tmplst; 207 struct stat tmpst; 208 int error; 209 210 error = do_sys_stat(SCARG(uap, path), flags, &tmpst); 211 if (error != 0) 212 return error; 213 214 bsd_to_linux_stat64(&tmpst, &tmplst); 215 216 return copyout(&tmplst, SCARG(uap, sp), sizeof tmplst); 217 } 218 219 int 220 linux_sys_stat64(struct lwp *l, const struct linux_sys_stat64_args *uap, register_t *retval) 221 { 222 /* { 223 syscallarg(const char *) path; 224 syscallarg(struct linux_stat64 *) sp; 225 } */ 226 227 return linux_do_stat64(l, uap, retval, FOLLOW); 228 } 229 230 int 231 linux_sys_lstat64(struct lwp *l, const struct linux_sys_lstat64_args *uap, register_t *retval) 232 { 233 /* { 234 syscallarg(const char *) path; 235 syscallarg(struct linux_stat64 *) sp; 236 } */ 237 238 return linux_do_stat64(l, (const void *)uap, retval, NOFOLLOW); 239 } 240 #endif 241 242 /* 243 * This is an internal function for the *statat() variant of linux, 244 * which returns struct stat, but flags and other handling are 245 * the same as in linux. 246 */ 247 int 248 linux_statat(struct lwp *l, int fd, const char *path, int lflag, 249 struct stat *st) 250 { 251 struct vnode *vp; 252 int error, nd_flag; 253 uint8_t c; 254 255 if (lflag & ~(LINUX_AT_EMPTY_PATH|LINUX_AT_NO_AUTOMOUNT 256 |LINUX_AT_SYMLINK_NOFOLLOW)) 257 return EINVAL; 258 259 if (lflag & LINUX_AT_EMPTY_PATH) { 260 /* 261 * If path is null string: 262 */ 263 error = ufetch_8(path, &c); 264 if (error != 0) 265 return error; 266 if (c == '\0') { 267 if (fd == LINUX_AT_FDCWD) { 268 /* 269 * operate on current directory 270 */ 271 vp = l->l_proc->p_cwdi->cwdi_cdir; 272 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 273 error = vn_stat(vp, st); 274 VOP_UNLOCK(vp); 275 } else { 276 /* 277 * operate on fd 278 */ 279 error = do_sys_fstat(fd, st); 280 } 281 return error; 282 } 283 } 284 285 if (lflag & LINUX_AT_SYMLINK_NOFOLLOW) 286 nd_flag = NOFOLLOW; 287 else 288 nd_flag = FOLLOW; 289 290 return do_sys_statat(l, fd, path, nd_flag, st); 291 } 292 293 int 294 linux_sys_fstatat64(struct lwp *l, const struct linux_sys_fstatat64_args *uap, register_t *retval) 295 { 296 /* { 297 syscallarg(int) fd; 298 syscallarg(const char *) path; 299 syscallarg(struct linux_stat64 *) sp; 300 syscallarg(int) flag; 301 } */ 302 struct linux_stat64 tmplst; 303 struct stat tmpst; 304 int error; 305 306 error = linux_statat(l, SCARG(uap, fd), SCARG(uap, path), 307 SCARG(uap, flag), &tmpst); 308 if (error != 0) 309 return error; 310 311 bsd_to_linux_stat64(&tmpst, &tmplst); 312 313 return copyout(&tmplst, SCARG(uap, sp), sizeof tmplst); 314 } 315 316 #ifdef LINUX_SYS_statx 317 int 318 linux_sys_statx(struct lwp *l, const struct linux_sys_statx_args *uap, 319 register_t *retval) 320 { 321 /* { 322 syscallarg(int) fd; 323 syscallarg(const char *) path; 324 syscallarg(int) flag; 325 syscallarg(unsigned int) mask; 326 syscallarg(struct linux_statx *) sp; 327 } */ 328 struct linux_statx stx; 329 struct stat st; 330 int error; 331 332 error = linux_statat(l, SCARG(uap, fd), SCARG(uap, path), 333 SCARG(uap, flag), &st); 334 if (error != 0) 335 return error; 336 337 error = bsd_to_linux_statx(&st, &stx, SCARG(uap, mask)); 338 if (error != 0) 339 return error; 340 341 return copyout(&stx, SCARG(uap, sp), sizeof stx); 342 } 343 #endif /* LINUX_SYS_statx */ 344 345 #ifndef __alpha__ 346 int 347 linux_sys_truncate64(struct lwp *l, const struct linux_sys_truncate64_args *uap, register_t *retval) 348 { 349 /* { 350 syscallarg(const char *) path; 351 syscallarg(off_t) length; 352 } */ 353 struct sys_truncate_args ta; 354 355 /* Linux doesn't have the 'pad' pseudo-parameter */ 356 SCARG(&ta, path) = SCARG(uap, path); 357 SCARG(&ta, PAD) = 0; 358 SCARG(&ta, length) = SCARG(uap, length); 359 360 return sys_truncate(l, &ta, retval); 361 } 362 363 int 364 linux_sys_ftruncate64(struct lwp *l, const struct linux_sys_ftruncate64_args *uap, register_t *retval) 365 { 366 /* { 367 syscallarg(unsigned int) fd; 368 syscallarg(off_t) length; 369 } */ 370 struct sys_ftruncate_args ta; 371 372 /* Linux doesn't have the 'pad' pseudo-parameter */ 373 SCARG(&ta, fd) = SCARG(uap, fd); 374 SCARG(&ta, PAD) = 0; 375 SCARG(&ta, length) = SCARG(uap, length); 376 377 return sys_ftruncate(l, &ta, retval); 378 } 379 #endif /* __alpha__ */ 380 381 /* 382 * Linux 'readdir' call. This code is mostly taken from the 383 * SunOS getdents call (see compat/sunos/sunos_misc.c), though 384 * an attempt has been made to keep it a little cleaner. 385 * 386 * The d_off field contains the offset of the next valid entry, 387 * unless the older Linux getdents(2), which used to have it set 388 * to the offset of the entry itself. This function also doesn't 389 * need to deal with the old count == 1 glibc problem. 390 * 391 * Read in BSD-style entries, convert them, and copy them out. 392 * 393 * Note that this doesn't handle union-mounted filesystems. 394 */ 395 int 396 linux_sys_getdents64(struct lwp *l, const struct linux_sys_getdents64_args *uap, register_t *retval) 397 { 398 /* { 399 syscallarg(int) fd; 400 syscallarg(struct linux_dirent64 *) dent; 401 syscallarg(unsigned int) count; 402 } */ 403 struct dirent *bdp; 404 struct vnode *vp; 405 char *inp, *tbuf; /* BSD-format */ 406 int len, reclen; /* BSD-format */ 407 char *outp; /* Linux-format */ 408 int resid, linux_reclen = 0; /* Linux-format */ 409 file_t *fp; 410 struct uio auio; 411 struct iovec aiov; 412 struct linux_dirent64 idb; 413 off_t off; /* true file offset */ 414 int buflen, error, eofflag, nbytes; 415 struct vattr va; 416 off_t *cookiebuf = NULL, *cookie; 417 int ncookies; 418 419 /* fd_getvnode() will use the descriptor for us */ 420 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 421 return (error); 422 423 if ((fp->f_flag & FREAD) == 0) { 424 error = EBADF; 425 goto out1; 426 } 427 428 vp = (struct vnode *)fp->f_data; 429 if (vp->v_type != VDIR) { 430 error = ENOTDIR; 431 goto out1; 432 } 433 434 vn_lock(vp, LK_SHARED | LK_RETRY); 435 error = VOP_GETATTR(vp, &va, l->l_cred); 436 VOP_UNLOCK(vp); 437 if (error) 438 goto out1; 439 440 nbytes = SCARG(uap, count); 441 buflen = uimin(MAXBSIZE, nbytes); 442 if (buflen < va.va_blocksize) 443 buflen = va.va_blocksize; 444 tbuf = malloc(buflen, M_TEMP, M_WAITOK); 445 446 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 447 off = fp->f_offset; 448 again: 449 aiov.iov_base = tbuf; 450 aiov.iov_len = buflen; 451 auio.uio_iov = &aiov; 452 auio.uio_iovcnt = 1; 453 auio.uio_rw = UIO_READ; 454 auio.uio_resid = buflen; 455 auio.uio_offset = off; 456 UIO_SETUP_SYSSPACE(&auio); 457 /* 458 * First we read into the malloc'ed buffer, then 459 * we massage it into user space, one record at a time. 460 */ 461 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &cookiebuf, 462 &ncookies); 463 if (error) 464 goto out; 465 466 inp = tbuf; 467 outp = (void *)SCARG(uap, dent); 468 resid = nbytes; 469 if ((len = buflen - auio.uio_resid) == 0) 470 goto eof; 471 472 for (cookie = cookiebuf; len > 0; len -= reclen) { 473 bdp = (struct dirent *)inp; 474 reclen = bdp->d_reclen; 475 if (reclen & 3) { 476 error = EIO; 477 goto out; 478 } 479 if (bdp->d_fileno == 0) { 480 inp += reclen; /* it is a hole; squish it out */ 481 if (cookie) 482 off = *cookie++; 483 else 484 off += reclen; 485 continue; 486 } 487 linux_reclen = LINUX_RECLEN(&idb, bdp->d_namlen); 488 if (reclen > len || resid < linux_reclen) { 489 /* entry too big for buffer, so just stop */ 490 outp++; 491 break; 492 } 493 if (cookie) 494 off = *cookie++; /* each entry points to next */ 495 else 496 off += reclen; 497 /* 498 * Massage in place to make a Linux-shaped dirent (otherwise 499 * we have to worry about touching user memory outside of 500 * the copyout() call). 501 */ 502 memset(&idb, 0, sizeof(idb)); 503 idb.d_ino = bdp->d_fileno; 504 idb.d_type = bdp->d_type; 505 idb.d_off = off; 506 idb.d_reclen = (u_short)linux_reclen; 507 memcpy(idb.d_name, bdp->d_name, MIN(sizeof(idb.d_name), 508 bdp->d_namlen + 1)); 509 if ((error = copyout((void *)&idb, outp, linux_reclen))) 510 goto out; 511 /* advance past this real entry */ 512 inp += reclen; 513 /* advance output past Linux-shaped entry */ 514 outp += linux_reclen; 515 resid -= linux_reclen; 516 } 517 518 /* if we squished out the whole block, try again */ 519 if (outp == (void *)SCARG(uap, dent)) { 520 if (cookiebuf) 521 free(cookiebuf, M_TEMP); 522 cookiebuf = NULL; 523 goto again; 524 } 525 fp->f_offset = off; /* update the vnode offset */ 526 527 eof: 528 *retval = nbytes - resid; 529 out: 530 VOP_UNLOCK(vp); 531 if (cookiebuf) 532 free(cookiebuf, M_TEMP); 533 free(tbuf, M_TEMP); 534 out1: 535 fd_putfile(SCARG(uap, fd)); 536 return error; 537 } 538