1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 37 #include <sys/param.h> 38 #include <sys/blist.h> 39 #include <sys/fcntl.h> 40 #if defined(__i386__) 41 #include <sys/imgact_aout.h> 42 #endif 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mman.h> 49 #include <sys/mount.h> 50 #include <sys/msgbuf.h> 51 #include <sys/mutex.h> 52 #include <sys/namei.h> 53 #include <sys/priv.h> 54 #include <sys/proc.h> 55 #include <sys/procctl.h> 56 #include <sys/reboot.h> 57 #include <sys/racct.h> 58 #include <sys/random.h> 59 #include <sys/resourcevar.h> 60 #include <sys/sched.h> 61 #include <sys/sdt.h> 62 #include <sys/signalvar.h> 63 #include <sys/stat.h> 64 #include <sys/syscallsubr.h> 65 #include <sys/sysctl.h> 66 #include <sys/sysproto.h> 67 #include <sys/systm.h> 68 #include <sys/time.h> 69 #include <sys/vmmeter.h> 70 #include <sys/vnode.h> 71 #include <sys/wait.h> 72 #include <sys/cpuset.h> 73 #include <sys/uio.h> 74 75 #include <security/mac/mac_framework.h> 76 77 #include <vm/vm.h> 78 #include <vm/pmap.h> 79 #include <vm/vm_kern.h> 80 #include <vm/vm_map.h> 81 #include <vm/vm_extern.h> 82 #include <vm/swap_pager.h> 83 84 #ifdef COMPAT_LINUX32 85 #include <machine/../linux32/linux.h> 86 #include <machine/../linux32/linux32_proto.h> 87 #else 88 #include <machine/../linux/linux.h> 89 #include <machine/../linux/linux_proto.h> 90 #endif 91 92 #include <compat/linux/linux_dtrace.h> 93 #include <compat/linux/linux_file.h> 94 #include <compat/linux/linux_mib.h> 95 #include <compat/linux/linux_signal.h> 96 #include <compat/linux/linux_timer.h> 97 #include <compat/linux/linux_util.h> 98 #include <compat/linux/linux_sysproto.h> 99 #include <compat/linux/linux_emul.h> 100 #include <compat/linux/linux_misc.h> 101 102 int stclohz; /* Statistics clock frequency */ 103 104 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 105 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 106 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 107 RLIMIT_MEMLOCK, RLIMIT_AS 108 }; 109 110 struct l_sysinfo { 111 l_long uptime; /* Seconds since boot */ 112 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 113 #define LINUX_SYSINFO_LOADS_SCALE 65536 114 l_ulong totalram; /* Total usable main memory size */ 115 l_ulong freeram; /* Available memory size */ 116 l_ulong sharedram; /* Amount of shared memory */ 117 l_ulong bufferram; /* Memory used by buffers */ 118 l_ulong totalswap; /* Total swap space size */ 119 l_ulong freeswap; /* swap space still available */ 120 l_ushort procs; /* Number of current processes */ 121 l_ushort pads; 122 l_ulong totalhigh; 123 l_ulong freehigh; 124 l_uint mem_unit; 125 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 126 }; 127 128 struct l_pselect6arg { 129 l_uintptr_t ss; 130 l_size_t ss_len; 131 }; 132 133 static int linux_utimensat_lts_to_ts(struct l_timespec *, 134 struct timespec *); 135 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 136 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, 137 struct timespec *); 138 #endif 139 static int linux_common_utimensat(struct thread *, int, 140 const char *, struct timespec *, int); 141 static int linux_common_pselect6(struct thread *, l_int, 142 l_fd_set *, l_fd_set *, l_fd_set *, 143 struct timespec *, l_uintptr_t *); 144 static int linux_common_ppoll(struct thread *, struct pollfd *, 145 uint32_t, struct timespec *, l_sigset_t *, 146 l_size_t); 147 148 int 149 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 150 { 151 struct l_sysinfo sysinfo; 152 int i, j; 153 struct timespec ts; 154 155 bzero(&sysinfo, sizeof(sysinfo)); 156 getnanouptime(&ts); 157 if (ts.tv_nsec != 0) 158 ts.tv_sec++; 159 sysinfo.uptime = ts.tv_sec; 160 161 /* Use the information from the mib to get our load averages */ 162 for (i = 0; i < 3; i++) 163 sysinfo.loads[i] = averunnable.ldavg[i] * 164 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 165 166 sysinfo.totalram = physmem * PAGE_SIZE; 167 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 168 169 /* 170 * sharedram counts pages allocated to named, swap-backed objects such 171 * as shared memory segments and tmpfs files. There is no cheap way to 172 * compute this, so just leave the field unpopulated. Linux itself only 173 * started setting this field in the 3.x timeframe. 174 */ 175 sysinfo.sharedram = 0; 176 sysinfo.bufferram = 0; 177 178 swap_pager_status(&i, &j); 179 sysinfo.totalswap = i * PAGE_SIZE; 180 sysinfo.freeswap = (i - j) * PAGE_SIZE; 181 182 sysinfo.procs = nprocs; 183 184 /* 185 * Platforms supported by the emulation layer do not have a notion of 186 * high memory. 187 */ 188 sysinfo.totalhigh = 0; 189 sysinfo.freehigh = 0; 190 191 sysinfo.mem_unit = 1; 192 193 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 194 } 195 196 #ifdef LINUX_LEGACY_SYSCALLS 197 int 198 linux_alarm(struct thread *td, struct linux_alarm_args *args) 199 { 200 struct itimerval it, old_it; 201 u_int secs; 202 int error; 203 204 secs = args->secs; 205 /* 206 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 207 * to match kern_setitimer()'s limit to avoid error from it. 208 * 209 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 210 * platforms. 211 */ 212 if (secs > INT32_MAX / 2) 213 secs = INT32_MAX / 2; 214 215 it.it_value.tv_sec = secs; 216 it.it_value.tv_usec = 0; 217 timevalclear(&it.it_interval); 218 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 219 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 220 221 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 222 old_it.it_value.tv_usec >= 500000) 223 old_it.it_value.tv_sec++; 224 td->td_retval[0] = old_it.it_value.tv_sec; 225 return (0); 226 } 227 #endif 228 229 int 230 linux_brk(struct thread *td, struct linux_brk_args *args) 231 { 232 struct vmspace *vm = td->td_proc->p_vmspace; 233 uintptr_t new, old; 234 235 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 236 new = (uintptr_t)args->dsend; 237 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 238 td->td_retval[0] = (register_t)new; 239 else 240 td->td_retval[0] = (register_t)old; 241 242 return (0); 243 } 244 245 #if defined(__i386__) 246 /* XXX: what about amd64/linux32? */ 247 248 int 249 linux_uselib(struct thread *td, struct linux_uselib_args *args) 250 { 251 struct nameidata ni; 252 struct vnode *vp; 253 struct exec *a_out; 254 vm_map_t map; 255 vm_map_entry_t entry; 256 struct vattr attr; 257 vm_offset_t vmaddr; 258 unsigned long file_offset; 259 unsigned long bss_size; 260 char *library; 261 ssize_t aresid; 262 int error; 263 bool locked, opened, textset; 264 265 a_out = NULL; 266 vp = NULL; 267 locked = false; 268 textset = false; 269 opened = false; 270 271 if (!LUSECONVPATH(td)) { 272 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 273 UIO_USERSPACE, args->library, td); 274 error = namei(&ni); 275 } else { 276 LCONVPATHEXIST(td, args->library, &library); 277 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 278 UIO_SYSSPACE, library, td); 279 error = namei(&ni); 280 LFREEPATH(library); 281 } 282 if (error) 283 goto cleanup; 284 285 vp = ni.ni_vp; 286 NDFREE(&ni, NDF_ONLY_PNBUF); 287 288 /* 289 * From here on down, we have a locked vnode that must be unlocked. 290 * XXX: The code below largely duplicates exec_check_permissions(). 291 */ 292 locked = true; 293 294 /* Executable? */ 295 error = VOP_GETATTR(vp, &attr, td->td_ucred); 296 if (error) 297 goto cleanup; 298 299 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 300 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 301 /* EACCESS is what exec(2) returns. */ 302 error = ENOEXEC; 303 goto cleanup; 304 } 305 306 /* Sensible size? */ 307 if (attr.va_size == 0) { 308 error = ENOEXEC; 309 goto cleanup; 310 } 311 312 /* Can we access it? */ 313 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 314 if (error) 315 goto cleanup; 316 317 /* 318 * XXX: This should use vn_open() so that it is properly authorized, 319 * and to reduce code redundancy all over the place here. 320 * XXX: Not really, it duplicates far more of exec_check_permissions() 321 * than vn_open(). 322 */ 323 #ifdef MAC 324 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 325 if (error) 326 goto cleanup; 327 #endif 328 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 329 if (error) 330 goto cleanup; 331 opened = true; 332 333 /* Pull in executable header into exec_map */ 334 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 335 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 336 if (error) 337 goto cleanup; 338 339 /* Is it a Linux binary ? */ 340 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 341 error = ENOEXEC; 342 goto cleanup; 343 } 344 345 /* 346 * While we are here, we should REALLY do some more checks 347 */ 348 349 /* Set file/virtual offset based on a.out variant. */ 350 switch ((int)(a_out->a_magic & 0xffff)) { 351 case 0413: /* ZMAGIC */ 352 file_offset = 1024; 353 break; 354 case 0314: /* QMAGIC */ 355 file_offset = 0; 356 break; 357 default: 358 error = ENOEXEC; 359 goto cleanup; 360 } 361 362 bss_size = round_page(a_out->a_bss); 363 364 /* Check various fields in header for validity/bounds. */ 365 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 366 error = ENOEXEC; 367 goto cleanup; 368 } 369 370 /* text + data can't exceed file size */ 371 if (a_out->a_data + a_out->a_text > attr.va_size) { 372 error = EFAULT; 373 goto cleanup; 374 } 375 376 /* 377 * text/data/bss must not exceed limits 378 * XXX - this is not complete. it should check current usage PLUS 379 * the resources needed by this library. 380 */ 381 PROC_LOCK(td->td_proc); 382 if (a_out->a_text > maxtsiz || 383 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || 384 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 385 bss_size) != 0) { 386 PROC_UNLOCK(td->td_proc); 387 error = ENOMEM; 388 goto cleanup; 389 } 390 PROC_UNLOCK(td->td_proc); 391 392 /* 393 * Prevent more writers. 394 */ 395 error = VOP_SET_TEXT(vp); 396 if (error != 0) 397 goto cleanup; 398 textset = true; 399 400 /* 401 * Lock no longer needed 402 */ 403 locked = false; 404 VOP_UNLOCK(vp); 405 406 /* 407 * Check if file_offset page aligned. Currently we cannot handle 408 * misalinged file offsets, and so we read in the entire image 409 * (what a waste). 410 */ 411 if (file_offset & PAGE_MASK) { 412 /* Map text+data read/write/execute */ 413 414 /* a_entry is the load address and is page aligned */ 415 vmaddr = trunc_page(a_out->a_entry); 416 417 /* get anon user mapping, read+write+execute */ 418 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 419 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 420 VM_PROT_ALL, VM_PROT_ALL, 0); 421 if (error) 422 goto cleanup; 423 424 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 425 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 426 td->td_ucred, NOCRED, &aresid, td); 427 if (error != 0) 428 goto cleanup; 429 if (aresid != 0) { 430 error = ENOEXEC; 431 goto cleanup; 432 } 433 } else { 434 /* 435 * for QMAGIC, a_entry is 20 bytes beyond the load address 436 * to skip the executable header 437 */ 438 vmaddr = trunc_page(a_out->a_entry); 439 440 /* 441 * Map it all into the process's space as a single 442 * copy-on-write "data" segment. 443 */ 444 map = &td->td_proc->p_vmspace->vm_map; 445 error = vm_mmap(map, &vmaddr, 446 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 447 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 448 if (error) 449 goto cleanup; 450 vm_map_lock(map); 451 if (!vm_map_lookup_entry(map, vmaddr, &entry)) { 452 vm_map_unlock(map); 453 error = EDOOFUS; 454 goto cleanup; 455 } 456 entry->eflags |= MAP_ENTRY_VN_EXEC; 457 vm_map_unlock(map); 458 textset = false; 459 } 460 461 if (bss_size != 0) { 462 /* Calculate BSS start address */ 463 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 464 a_out->a_data; 465 466 /* allocate some 'anon' space */ 467 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 468 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 469 VM_PROT_ALL, 0); 470 if (error) 471 goto cleanup; 472 } 473 474 cleanup: 475 if (opened) { 476 if (locked) 477 VOP_UNLOCK(vp); 478 locked = false; 479 VOP_CLOSE(vp, FREAD, td->td_ucred, td); 480 } 481 if (textset) { 482 if (!locked) { 483 locked = true; 484 VOP_LOCK(vp, LK_SHARED | LK_RETRY); 485 } 486 VOP_UNSET_TEXT_CHECKED(vp); 487 } 488 if (locked) 489 VOP_UNLOCK(vp); 490 491 /* Release the temporary mapping. */ 492 if (a_out) 493 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 494 495 return (error); 496 } 497 498 #endif /* __i386__ */ 499 500 #ifdef LINUX_LEGACY_SYSCALLS 501 int 502 linux_select(struct thread *td, struct linux_select_args *args) 503 { 504 l_timeval ltv; 505 struct timeval tv0, tv1, utv, *tvp; 506 int error; 507 508 /* 509 * Store current time for computation of the amount of 510 * time left. 511 */ 512 if (args->timeout) { 513 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 514 goto select_out; 515 utv.tv_sec = ltv.tv_sec; 516 utv.tv_usec = ltv.tv_usec; 517 518 if (itimerfix(&utv)) { 519 /* 520 * The timeval was invalid. Convert it to something 521 * valid that will act as it does under Linux. 522 */ 523 utv.tv_sec += utv.tv_usec / 1000000; 524 utv.tv_usec %= 1000000; 525 if (utv.tv_usec < 0) { 526 utv.tv_sec -= 1; 527 utv.tv_usec += 1000000; 528 } 529 if (utv.tv_sec < 0) 530 timevalclear(&utv); 531 } 532 microtime(&tv0); 533 tvp = &utv; 534 } else 535 tvp = NULL; 536 537 error = kern_select(td, args->nfds, args->readfds, args->writefds, 538 args->exceptfds, tvp, LINUX_NFDBITS); 539 if (error) 540 goto select_out; 541 542 if (args->timeout) { 543 if (td->td_retval[0]) { 544 /* 545 * Compute how much time was left of the timeout, 546 * by subtracting the current time and the time 547 * before we started the call, and subtracting 548 * that result from the user-supplied value. 549 */ 550 microtime(&tv1); 551 timevalsub(&tv1, &tv0); 552 timevalsub(&utv, &tv1); 553 if (utv.tv_sec < 0) 554 timevalclear(&utv); 555 } else 556 timevalclear(&utv); 557 ltv.tv_sec = utv.tv_sec; 558 ltv.tv_usec = utv.tv_usec; 559 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 560 goto select_out; 561 } 562 563 select_out: 564 return (error); 565 } 566 #endif 567 568 int 569 linux_mremap(struct thread *td, struct linux_mremap_args *args) 570 { 571 uintptr_t addr; 572 size_t len; 573 int error = 0; 574 575 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 576 td->td_retval[0] = 0; 577 return (EINVAL); 578 } 579 580 /* 581 * Check for the page alignment. 582 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 583 */ 584 if (args->addr & PAGE_MASK) { 585 td->td_retval[0] = 0; 586 return (EINVAL); 587 } 588 589 args->new_len = round_page(args->new_len); 590 args->old_len = round_page(args->old_len); 591 592 if (args->new_len > args->old_len) { 593 td->td_retval[0] = 0; 594 return (ENOMEM); 595 } 596 597 if (args->new_len < args->old_len) { 598 addr = args->addr + args->new_len; 599 len = args->old_len - args->new_len; 600 error = kern_munmap(td, addr, len); 601 } 602 603 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 604 return (error); 605 } 606 607 #define LINUX_MS_ASYNC 0x0001 608 #define LINUX_MS_INVALIDATE 0x0002 609 #define LINUX_MS_SYNC 0x0004 610 611 int 612 linux_msync(struct thread *td, struct linux_msync_args *args) 613 { 614 615 return (kern_msync(td, args->addr, args->len, 616 args->fl & ~LINUX_MS_SYNC)); 617 } 618 619 #ifdef LINUX_LEGACY_SYSCALLS 620 int 621 linux_time(struct thread *td, struct linux_time_args *args) 622 { 623 struct timeval tv; 624 l_time_t tm; 625 int error; 626 627 microtime(&tv); 628 tm = tv.tv_sec; 629 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 630 return (error); 631 td->td_retval[0] = tm; 632 return (0); 633 } 634 #endif 635 636 struct l_times_argv { 637 l_clock_t tms_utime; 638 l_clock_t tms_stime; 639 l_clock_t tms_cutime; 640 l_clock_t tms_cstime; 641 }; 642 643 /* 644 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 645 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 646 * auxiliary vector entry. 647 */ 648 #define CLK_TCK 100 649 650 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 651 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 652 653 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 654 CONVNTCK(r) : CONVOTCK(r)) 655 656 int 657 linux_times(struct thread *td, struct linux_times_args *args) 658 { 659 struct timeval tv, utime, stime, cutime, cstime; 660 struct l_times_argv tms; 661 struct proc *p; 662 int error; 663 664 if (args->buf != NULL) { 665 p = td->td_proc; 666 PROC_LOCK(p); 667 PROC_STATLOCK(p); 668 calcru(p, &utime, &stime); 669 PROC_STATUNLOCK(p); 670 calccru(p, &cutime, &cstime); 671 PROC_UNLOCK(p); 672 673 tms.tms_utime = CONVTCK(utime); 674 tms.tms_stime = CONVTCK(stime); 675 676 tms.tms_cutime = CONVTCK(cutime); 677 tms.tms_cstime = CONVTCK(cstime); 678 679 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 680 return (error); 681 } 682 683 microuptime(&tv); 684 td->td_retval[0] = (int)CONVTCK(tv); 685 return (0); 686 } 687 688 int 689 linux_newuname(struct thread *td, struct linux_newuname_args *args) 690 { 691 struct l_new_utsname utsname; 692 char osname[LINUX_MAX_UTSNAME]; 693 char osrelease[LINUX_MAX_UTSNAME]; 694 char *p; 695 696 linux_get_osname(td, osname); 697 linux_get_osrelease(td, osrelease); 698 699 bzero(&utsname, sizeof(utsname)); 700 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 701 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 702 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 703 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 704 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 705 for (p = utsname.version; *p != '\0'; ++p) 706 if (*p == '\n') { 707 *p = '\0'; 708 break; 709 } 710 #if defined(__amd64__) 711 /* 712 * On amd64, Linux uname(2) needs to return "x86_64" 713 * for both 64-bit and 32-bit applications. On 32-bit, 714 * the string returned by getauxval(AT_PLATFORM) needs 715 * to remain "i686", though. 716 */ 717 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 718 #else 719 strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); 720 #endif 721 722 return (copyout(&utsname, args->buf, sizeof(utsname))); 723 } 724 725 struct l_utimbuf { 726 l_time_t l_actime; 727 l_time_t l_modtime; 728 }; 729 730 #ifdef LINUX_LEGACY_SYSCALLS 731 int 732 linux_utime(struct thread *td, struct linux_utime_args *args) 733 { 734 struct timeval tv[2], *tvp; 735 struct l_utimbuf lut; 736 char *fname; 737 int error; 738 739 if (args->times) { 740 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 741 return (error); 742 tv[0].tv_sec = lut.l_actime; 743 tv[0].tv_usec = 0; 744 tv[1].tv_sec = lut.l_modtime; 745 tv[1].tv_usec = 0; 746 tvp = tv; 747 } else 748 tvp = NULL; 749 750 if (!LUSECONVPATH(td)) { 751 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 752 tvp, UIO_SYSSPACE); 753 } else { 754 LCONVPATHEXIST(td, args->fname, &fname); 755 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 756 UIO_SYSSPACE); 757 LFREEPATH(fname); 758 } 759 return (error); 760 } 761 #endif 762 763 #ifdef LINUX_LEGACY_SYSCALLS 764 int 765 linux_utimes(struct thread *td, struct linux_utimes_args *args) 766 { 767 l_timeval ltv[2]; 768 struct timeval tv[2], *tvp = NULL; 769 char *fname; 770 int error; 771 772 if (args->tptr != NULL) { 773 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 774 return (error); 775 tv[0].tv_sec = ltv[0].tv_sec; 776 tv[0].tv_usec = ltv[0].tv_usec; 777 tv[1].tv_sec = ltv[1].tv_sec; 778 tv[1].tv_usec = ltv[1].tv_usec; 779 tvp = tv; 780 } 781 782 if (!LUSECONVPATH(td)) { 783 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 784 tvp, UIO_SYSSPACE); 785 } else { 786 LCONVPATHEXIST(td, args->fname, &fname); 787 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 788 tvp, UIO_SYSSPACE); 789 LFREEPATH(fname); 790 } 791 return (error); 792 } 793 #endif 794 795 static int 796 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) 797 { 798 799 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 800 l_times->tv_nsec != LINUX_UTIME_NOW && 801 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 802 return (EINVAL); 803 804 times->tv_sec = l_times->tv_sec; 805 switch (l_times->tv_nsec) 806 { 807 case LINUX_UTIME_OMIT: 808 times->tv_nsec = UTIME_OMIT; 809 break; 810 case LINUX_UTIME_NOW: 811 times->tv_nsec = UTIME_NOW; 812 break; 813 default: 814 times->tv_nsec = l_times->tv_nsec; 815 } 816 817 return (0); 818 } 819 820 static int 821 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, 822 struct timespec *timesp, int lflags) 823 { 824 char *path = NULL; 825 int error, dfd, flags = 0; 826 827 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; 828 829 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 830 return (EINVAL); 831 832 if (timesp != NULL) { 833 /* This breaks POSIX, but is what the Linux kernel does 834 * _on purpose_ (documented in the man page for utimensat(2)), 835 * so we must follow that behaviour. */ 836 if (timesp[0].tv_nsec == UTIME_OMIT && 837 timesp[1].tv_nsec == UTIME_OMIT) 838 return (0); 839 } 840 841 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 842 flags |= AT_SYMLINK_NOFOLLOW; 843 if (lflags & LINUX_AT_EMPTY_PATH) 844 flags |= AT_EMPTY_PATH; 845 846 if (!LUSECONVPATH(td)) { 847 if (pathname != NULL) { 848 return (kern_utimensat(td, dfd, pathname, 849 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 850 } 851 } 852 853 if (pathname != NULL) 854 LCONVPATHEXIST_AT(td, pathname, &path, dfd); 855 else if (lflags != 0) 856 return (EINVAL); 857 858 if (path == NULL) 859 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 860 else { 861 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 862 UIO_SYSSPACE, flags); 863 LFREEPATH(path); 864 } 865 866 return (error); 867 } 868 869 int 870 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 871 { 872 struct l_timespec l_times[2]; 873 struct timespec times[2], *timesp; 874 int error; 875 876 if (args->times != NULL) { 877 error = copyin(args->times, l_times, sizeof(l_times)); 878 if (error != 0) 879 return (error); 880 881 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); 882 if (error != 0) 883 return (error); 884 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); 885 if (error != 0) 886 return (error); 887 timesp = times; 888 } else 889 timesp = NULL; 890 891 return (linux_common_utimensat(td, args->dfd, args->pathname, 892 timesp, args->flags)); 893 } 894 895 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 896 static int 897 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) 898 { 899 900 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 901 l_times->tv_nsec != LINUX_UTIME_NOW && 902 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 903 return (EINVAL); 904 905 times->tv_sec = l_times->tv_sec; 906 switch (l_times->tv_nsec) 907 { 908 case LINUX_UTIME_OMIT: 909 times->tv_nsec = UTIME_OMIT; 910 break; 911 case LINUX_UTIME_NOW: 912 times->tv_nsec = UTIME_NOW; 913 break; 914 default: 915 times->tv_nsec = l_times->tv_nsec; 916 } 917 918 return (0); 919 } 920 921 int 922 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) 923 { 924 struct l_timespec64 l_times[2]; 925 struct timespec times[2], *timesp; 926 int error; 927 928 if (args->times64 != NULL) { 929 error = copyin(args->times64, l_times, sizeof(l_times)); 930 if (error != 0) 931 return (error); 932 933 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); 934 if (error != 0) 935 return (error); 936 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); 937 if (error != 0) 938 return (error); 939 timesp = times; 940 } else 941 timesp = NULL; 942 943 return (linux_common_utimensat(td, args->dfd, args->pathname, 944 timesp, args->flags)); 945 } 946 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 947 948 #ifdef LINUX_LEGACY_SYSCALLS 949 int 950 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 951 { 952 l_timeval ltv[2]; 953 struct timeval tv[2], *tvp = NULL; 954 char *fname; 955 int error, dfd; 956 957 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 958 959 if (args->utimes != NULL) { 960 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 961 return (error); 962 tv[0].tv_sec = ltv[0].tv_sec; 963 tv[0].tv_usec = ltv[0].tv_usec; 964 tv[1].tv_sec = ltv[1].tv_sec; 965 tv[1].tv_usec = ltv[1].tv_usec; 966 tvp = tv; 967 } 968 969 if (!LUSECONVPATH(td)) { 970 error = kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 971 tvp, UIO_SYSSPACE); 972 } else { 973 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 974 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, 975 tvp, UIO_SYSSPACE); 976 LFREEPATH(fname); 977 } 978 return (error); 979 } 980 #endif 981 982 static int 983 linux_common_wait(struct thread *td, int pid, int *statusp, 984 int options, struct __wrusage *wrup) 985 { 986 siginfo_t siginfo; 987 idtype_t idtype; 988 id_t id; 989 int error, status, tmpstat; 990 991 if (pid == WAIT_ANY) { 992 idtype = P_ALL; 993 id = 0; 994 } else if (pid < 0) { 995 idtype = P_PGID; 996 id = (id_t)-pid; 997 } else { 998 idtype = P_PID; 999 id = (id_t)pid; 1000 } 1001 1002 /* 1003 * For backward compatibility we implicitly add flags WEXITED 1004 * and WTRAPPED here. 1005 */ 1006 options |= WEXITED | WTRAPPED; 1007 error = kern_wait6(td, idtype, id, &status, options, wrup, &siginfo); 1008 if (error) 1009 return (error); 1010 1011 if (statusp) { 1012 tmpstat = status & 0xffff; 1013 if (WIFSIGNALED(tmpstat)) { 1014 tmpstat = (tmpstat & 0xffffff80) | 1015 bsd_to_linux_signal(WTERMSIG(tmpstat)); 1016 } else if (WIFSTOPPED(tmpstat)) { 1017 tmpstat = (tmpstat & 0xffff00ff) | 1018 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 1019 #if defined(__amd64__) && !defined(COMPAT_LINUX32) 1020 if (WSTOPSIG(status) == SIGTRAP) { 1021 tmpstat = linux_ptrace_status(td, 1022 siginfo.si_pid, tmpstat); 1023 } 1024 #endif 1025 } else if (WIFCONTINUED(tmpstat)) { 1026 tmpstat = 0xffff; 1027 } 1028 error = copyout(&tmpstat, statusp, sizeof(int)); 1029 } 1030 1031 return (error); 1032 } 1033 1034 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1035 int 1036 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 1037 { 1038 struct linux_wait4_args wait4_args; 1039 1040 wait4_args.pid = args->pid; 1041 wait4_args.status = args->status; 1042 wait4_args.options = args->options; 1043 wait4_args.rusage = NULL; 1044 1045 return (linux_wait4(td, &wait4_args)); 1046 } 1047 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1048 1049 int 1050 linux_wait4(struct thread *td, struct linux_wait4_args *args) 1051 { 1052 int error, options; 1053 struct __wrusage wru, *wrup; 1054 1055 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 1056 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 1057 return (EINVAL); 1058 1059 options = WEXITED; 1060 linux_to_bsd_waitopts(args->options, &options); 1061 1062 if (args->rusage != NULL) 1063 wrup = &wru; 1064 else 1065 wrup = NULL; 1066 error = linux_common_wait(td, args->pid, args->status, options, wrup); 1067 if (error != 0) 1068 return (error); 1069 if (args->rusage != NULL) 1070 error = linux_copyout_rusage(&wru.wru_self, args->rusage); 1071 return (error); 1072 } 1073 1074 int 1075 linux_waitid(struct thread *td, struct linux_waitid_args *args) 1076 { 1077 int status, options, sig; 1078 struct __wrusage wru; 1079 siginfo_t siginfo; 1080 l_siginfo_t lsi; 1081 idtype_t idtype; 1082 struct proc *p; 1083 int error; 1084 1085 options = 0; 1086 linux_to_bsd_waitopts(args->options, &options); 1087 1088 if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) 1089 return (EINVAL); 1090 if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) 1091 return (EINVAL); 1092 1093 switch (args->idtype) { 1094 case LINUX_P_ALL: 1095 idtype = P_ALL; 1096 break; 1097 case LINUX_P_PID: 1098 if (args->id <= 0) 1099 return (EINVAL); 1100 idtype = P_PID; 1101 break; 1102 case LINUX_P_PGID: 1103 if (args->id <= 0) 1104 return (EINVAL); 1105 idtype = P_PGID; 1106 break; 1107 default: 1108 return (EINVAL); 1109 } 1110 1111 error = kern_wait6(td, idtype, args->id, &status, options, 1112 &wru, &siginfo); 1113 if (error != 0) 1114 return (error); 1115 if (args->rusage != NULL) { 1116 error = linux_copyout_rusage(&wru.wru_children, 1117 args->rusage); 1118 if (error != 0) 1119 return (error); 1120 } 1121 if (args->info != NULL) { 1122 p = td->td_proc; 1123 bzero(&lsi, sizeof(lsi)); 1124 if (td->td_retval[0] != 0) { 1125 sig = bsd_to_linux_signal(siginfo.si_signo); 1126 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1127 } 1128 error = copyout(&lsi, args->info, sizeof(lsi)); 1129 } 1130 td->td_retval[0] = 0; 1131 1132 return (error); 1133 } 1134 1135 #ifdef LINUX_LEGACY_SYSCALLS 1136 int 1137 linux_mknod(struct thread *td, struct linux_mknod_args *args) 1138 { 1139 char *path; 1140 int error; 1141 enum uio_seg seg; 1142 bool convpath; 1143 1144 convpath = LUSECONVPATH(td); 1145 if (!convpath) { 1146 path = args->path; 1147 seg = UIO_USERSPACE; 1148 } else { 1149 LCONVPATHCREAT(td, args->path, &path); 1150 seg = UIO_SYSSPACE; 1151 } 1152 1153 switch (args->mode & S_IFMT) { 1154 case S_IFIFO: 1155 case S_IFSOCK: 1156 error = kern_mkfifoat(td, AT_FDCWD, path, seg, 1157 args->mode); 1158 break; 1159 1160 case S_IFCHR: 1161 case S_IFBLK: 1162 error = kern_mknodat(td, AT_FDCWD, path, seg, 1163 args->mode, args->dev); 1164 break; 1165 1166 case S_IFDIR: 1167 error = EPERM; 1168 break; 1169 1170 case 0: 1171 args->mode |= S_IFREG; 1172 /* FALLTHROUGH */ 1173 case S_IFREG: 1174 error = kern_openat(td, AT_FDCWD, path, seg, 1175 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1176 if (error == 0) 1177 kern_close(td, td->td_retval[0]); 1178 break; 1179 1180 default: 1181 error = EINVAL; 1182 break; 1183 } 1184 if (convpath) 1185 LFREEPATH(path); 1186 return (error); 1187 } 1188 #endif 1189 1190 int 1191 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1192 { 1193 char *path; 1194 int error, dfd; 1195 enum uio_seg seg; 1196 bool convpath; 1197 1198 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1199 1200 convpath = LUSECONVPATH(td); 1201 if (!convpath) { 1202 path = __DECONST(char *, args->filename); 1203 seg = UIO_USERSPACE; 1204 } else { 1205 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 1206 seg = UIO_SYSSPACE; 1207 } 1208 1209 switch (args->mode & S_IFMT) { 1210 case S_IFIFO: 1211 case S_IFSOCK: 1212 error = kern_mkfifoat(td, dfd, path, seg, args->mode); 1213 break; 1214 1215 case S_IFCHR: 1216 case S_IFBLK: 1217 error = kern_mknodat(td, dfd, path, seg, args->mode, 1218 args->dev); 1219 break; 1220 1221 case S_IFDIR: 1222 error = EPERM; 1223 break; 1224 1225 case 0: 1226 args->mode |= S_IFREG; 1227 /* FALLTHROUGH */ 1228 case S_IFREG: 1229 error = kern_openat(td, dfd, path, seg, 1230 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1231 if (error == 0) 1232 kern_close(td, td->td_retval[0]); 1233 break; 1234 1235 default: 1236 error = EINVAL; 1237 break; 1238 } 1239 if (convpath) 1240 LFREEPATH(path); 1241 return (error); 1242 } 1243 1244 /* 1245 * UGH! This is just about the dumbest idea I've ever heard!! 1246 */ 1247 int 1248 linux_personality(struct thread *td, struct linux_personality_args *args) 1249 { 1250 struct linux_pemuldata *pem; 1251 struct proc *p = td->td_proc; 1252 uint32_t old; 1253 1254 PROC_LOCK(p); 1255 pem = pem_find(p); 1256 old = pem->persona; 1257 if (args->per != 0xffffffff) 1258 pem->persona = args->per; 1259 PROC_UNLOCK(p); 1260 1261 td->td_retval[0] = old; 1262 return (0); 1263 } 1264 1265 struct l_itimerval { 1266 l_timeval it_interval; 1267 l_timeval it_value; 1268 }; 1269 1270 #define B2L_ITIMERVAL(bip, lip) \ 1271 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1272 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1273 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1274 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1275 1276 int 1277 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1278 { 1279 int error; 1280 struct l_itimerval ls; 1281 struct itimerval aitv, oitv; 1282 1283 if (uap->itv == NULL) { 1284 uap->itv = uap->oitv; 1285 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1286 } 1287 1288 error = copyin(uap->itv, &ls, sizeof(ls)); 1289 if (error != 0) 1290 return (error); 1291 B2L_ITIMERVAL(&aitv, &ls); 1292 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1293 if (error != 0 || uap->oitv == NULL) 1294 return (error); 1295 B2L_ITIMERVAL(&ls, &oitv); 1296 1297 return (copyout(&ls, uap->oitv, sizeof(ls))); 1298 } 1299 1300 int 1301 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1302 { 1303 int error; 1304 struct l_itimerval ls; 1305 struct itimerval aitv; 1306 1307 error = kern_getitimer(td, uap->which, &aitv); 1308 if (error != 0) 1309 return (error); 1310 B2L_ITIMERVAL(&ls, &aitv); 1311 return (copyout(&ls, uap->itv, sizeof(ls))); 1312 } 1313 1314 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1315 int 1316 linux_nice(struct thread *td, struct linux_nice_args *args) 1317 { 1318 1319 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1320 } 1321 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1322 1323 int 1324 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1325 { 1326 struct ucred *newcred, *oldcred; 1327 l_gid_t *linux_gidset; 1328 gid_t *bsd_gidset; 1329 int ngrp, error; 1330 struct proc *p; 1331 1332 ngrp = args->gidsetsize; 1333 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1334 return (EINVAL); 1335 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1336 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1337 if (error) 1338 goto out; 1339 newcred = crget(); 1340 crextend(newcred, ngrp + 1); 1341 p = td->td_proc; 1342 PROC_LOCK(p); 1343 oldcred = p->p_ucred; 1344 crcopy(newcred, oldcred); 1345 1346 /* 1347 * cr_groups[0] holds egid. Setting the whole set from 1348 * the supplied set will cause egid to be changed too. 1349 * Keep cr_groups[0] unchanged to prevent that. 1350 */ 1351 1352 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1353 PROC_UNLOCK(p); 1354 crfree(newcred); 1355 goto out; 1356 } 1357 1358 if (ngrp > 0) { 1359 newcred->cr_ngroups = ngrp + 1; 1360 1361 bsd_gidset = newcred->cr_groups; 1362 ngrp--; 1363 while (ngrp >= 0) { 1364 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1365 ngrp--; 1366 } 1367 } else 1368 newcred->cr_ngroups = 1; 1369 1370 setsugid(p); 1371 proc_set_cred(p, newcred); 1372 PROC_UNLOCK(p); 1373 crfree(oldcred); 1374 error = 0; 1375 out: 1376 free(linux_gidset, M_LINUX); 1377 return (error); 1378 } 1379 1380 int 1381 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1382 { 1383 struct ucred *cred; 1384 l_gid_t *linux_gidset; 1385 gid_t *bsd_gidset; 1386 int bsd_gidsetsz, ngrp, error; 1387 1388 cred = td->td_ucred; 1389 bsd_gidset = cred->cr_groups; 1390 bsd_gidsetsz = cred->cr_ngroups - 1; 1391 1392 /* 1393 * cr_groups[0] holds egid. Returning the whole set 1394 * here will cause a duplicate. Exclude cr_groups[0] 1395 * to prevent that. 1396 */ 1397 1398 if ((ngrp = args->gidsetsize) == 0) { 1399 td->td_retval[0] = bsd_gidsetsz; 1400 return (0); 1401 } 1402 1403 if (ngrp < bsd_gidsetsz) 1404 return (EINVAL); 1405 1406 ngrp = 0; 1407 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1408 M_LINUX, M_WAITOK); 1409 while (ngrp < bsd_gidsetsz) { 1410 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1411 ngrp++; 1412 } 1413 1414 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1415 free(linux_gidset, M_LINUX); 1416 if (error) 1417 return (error); 1418 1419 td->td_retval[0] = ngrp; 1420 return (0); 1421 } 1422 1423 static bool 1424 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim) 1425 { 1426 1427 if (linux_dummy_rlimits == 0) 1428 return (false); 1429 1430 switch (resource) { 1431 case LINUX_RLIMIT_LOCKS: 1432 case LINUX_RLIMIT_SIGPENDING: 1433 case LINUX_RLIMIT_MSGQUEUE: 1434 case LINUX_RLIMIT_RTTIME: 1435 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1436 rlim->rlim_max = LINUX_RLIM_INFINITY; 1437 return (true); 1438 case LINUX_RLIMIT_NICE: 1439 case LINUX_RLIMIT_RTPRIO: 1440 rlim->rlim_cur = 0; 1441 rlim->rlim_max = 0; 1442 return (true); 1443 default: 1444 return (false); 1445 } 1446 } 1447 1448 int 1449 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1450 { 1451 struct rlimit bsd_rlim; 1452 struct l_rlimit rlim; 1453 u_int which; 1454 int error; 1455 1456 if (args->resource >= LINUX_RLIM_NLIMITS) 1457 return (EINVAL); 1458 1459 which = linux_to_bsd_resource[args->resource]; 1460 if (which == -1) 1461 return (EINVAL); 1462 1463 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1464 if (error) 1465 return (error); 1466 1467 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1468 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1469 return (kern_setrlimit(td, which, &bsd_rlim)); 1470 } 1471 1472 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1473 int 1474 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1475 { 1476 struct l_rlimit rlim; 1477 struct rlimit bsd_rlim; 1478 u_int which; 1479 1480 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1481 rlim.rlim_cur = bsd_rlim.rlim_cur; 1482 rlim.rlim_max = bsd_rlim.rlim_max; 1483 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1484 } 1485 1486 if (args->resource >= LINUX_RLIM_NLIMITS) 1487 return (EINVAL); 1488 1489 which = linux_to_bsd_resource[args->resource]; 1490 if (which == -1) 1491 return (EINVAL); 1492 1493 lim_rlimit(td, which, &bsd_rlim); 1494 1495 #ifdef COMPAT_LINUX32 1496 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1497 if (rlim.rlim_cur == UINT_MAX) 1498 rlim.rlim_cur = INT_MAX; 1499 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1500 if (rlim.rlim_max == UINT_MAX) 1501 rlim.rlim_max = INT_MAX; 1502 #else 1503 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1504 if (rlim.rlim_cur == ULONG_MAX) 1505 rlim.rlim_cur = LONG_MAX; 1506 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1507 if (rlim.rlim_max == ULONG_MAX) 1508 rlim.rlim_max = LONG_MAX; 1509 #endif 1510 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1511 } 1512 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1513 1514 int 1515 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1516 { 1517 struct l_rlimit rlim; 1518 struct rlimit bsd_rlim; 1519 u_int which; 1520 1521 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1522 rlim.rlim_cur = bsd_rlim.rlim_cur; 1523 rlim.rlim_max = bsd_rlim.rlim_max; 1524 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1525 } 1526 1527 if (args->resource >= LINUX_RLIM_NLIMITS) 1528 return (EINVAL); 1529 1530 which = linux_to_bsd_resource[args->resource]; 1531 if (which == -1) 1532 return (EINVAL); 1533 1534 lim_rlimit(td, which, &bsd_rlim); 1535 1536 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1537 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1538 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1539 } 1540 1541 int 1542 linux_sched_setscheduler(struct thread *td, 1543 struct linux_sched_setscheduler_args *args) 1544 { 1545 struct sched_param sched_param; 1546 struct thread *tdt; 1547 int error, policy; 1548 1549 switch (args->policy) { 1550 case LINUX_SCHED_OTHER: 1551 policy = SCHED_OTHER; 1552 break; 1553 case LINUX_SCHED_FIFO: 1554 policy = SCHED_FIFO; 1555 break; 1556 case LINUX_SCHED_RR: 1557 policy = SCHED_RR; 1558 break; 1559 default: 1560 return (EINVAL); 1561 } 1562 1563 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1564 if (error) 1565 return (error); 1566 1567 if (linux_map_sched_prio) { 1568 switch (policy) { 1569 case SCHED_OTHER: 1570 if (sched_param.sched_priority != 0) 1571 return (EINVAL); 1572 1573 sched_param.sched_priority = 1574 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1575 break; 1576 case SCHED_FIFO: 1577 case SCHED_RR: 1578 if (sched_param.sched_priority < 1 || 1579 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1580 return (EINVAL); 1581 1582 /* 1583 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1584 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1585 */ 1586 sched_param.sched_priority = 1587 (sched_param.sched_priority - 1) * 1588 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1589 (LINUX_MAX_RT_PRIO - 1); 1590 break; 1591 } 1592 } 1593 1594 tdt = linux_tdfind(td, args->pid, -1); 1595 if (tdt == NULL) 1596 return (ESRCH); 1597 1598 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1599 PROC_UNLOCK(tdt->td_proc); 1600 return (error); 1601 } 1602 1603 int 1604 linux_sched_getscheduler(struct thread *td, 1605 struct linux_sched_getscheduler_args *args) 1606 { 1607 struct thread *tdt; 1608 int error, policy; 1609 1610 tdt = linux_tdfind(td, args->pid, -1); 1611 if (tdt == NULL) 1612 return (ESRCH); 1613 1614 error = kern_sched_getscheduler(td, tdt, &policy); 1615 PROC_UNLOCK(tdt->td_proc); 1616 1617 switch (policy) { 1618 case SCHED_OTHER: 1619 td->td_retval[0] = LINUX_SCHED_OTHER; 1620 break; 1621 case SCHED_FIFO: 1622 td->td_retval[0] = LINUX_SCHED_FIFO; 1623 break; 1624 case SCHED_RR: 1625 td->td_retval[0] = LINUX_SCHED_RR; 1626 break; 1627 } 1628 return (error); 1629 } 1630 1631 int 1632 linux_sched_get_priority_max(struct thread *td, 1633 struct linux_sched_get_priority_max_args *args) 1634 { 1635 struct sched_get_priority_max_args bsd; 1636 1637 if (linux_map_sched_prio) { 1638 switch (args->policy) { 1639 case LINUX_SCHED_OTHER: 1640 td->td_retval[0] = 0; 1641 return (0); 1642 case LINUX_SCHED_FIFO: 1643 case LINUX_SCHED_RR: 1644 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1645 return (0); 1646 default: 1647 return (EINVAL); 1648 } 1649 } 1650 1651 switch (args->policy) { 1652 case LINUX_SCHED_OTHER: 1653 bsd.policy = SCHED_OTHER; 1654 break; 1655 case LINUX_SCHED_FIFO: 1656 bsd.policy = SCHED_FIFO; 1657 break; 1658 case LINUX_SCHED_RR: 1659 bsd.policy = SCHED_RR; 1660 break; 1661 default: 1662 return (EINVAL); 1663 } 1664 return (sys_sched_get_priority_max(td, &bsd)); 1665 } 1666 1667 int 1668 linux_sched_get_priority_min(struct thread *td, 1669 struct linux_sched_get_priority_min_args *args) 1670 { 1671 struct sched_get_priority_min_args bsd; 1672 1673 if (linux_map_sched_prio) { 1674 switch (args->policy) { 1675 case LINUX_SCHED_OTHER: 1676 td->td_retval[0] = 0; 1677 return (0); 1678 case LINUX_SCHED_FIFO: 1679 case LINUX_SCHED_RR: 1680 td->td_retval[0] = 1; 1681 return (0); 1682 default: 1683 return (EINVAL); 1684 } 1685 } 1686 1687 switch (args->policy) { 1688 case LINUX_SCHED_OTHER: 1689 bsd.policy = SCHED_OTHER; 1690 break; 1691 case LINUX_SCHED_FIFO: 1692 bsd.policy = SCHED_FIFO; 1693 break; 1694 case LINUX_SCHED_RR: 1695 bsd.policy = SCHED_RR; 1696 break; 1697 default: 1698 return (EINVAL); 1699 } 1700 return (sys_sched_get_priority_min(td, &bsd)); 1701 } 1702 1703 #define REBOOT_CAD_ON 0x89abcdef 1704 #define REBOOT_CAD_OFF 0 1705 #define REBOOT_HALT 0xcdef0123 1706 #define REBOOT_RESTART 0x01234567 1707 #define REBOOT_RESTART2 0xA1B2C3D4 1708 #define REBOOT_POWEROFF 0x4321FEDC 1709 #define REBOOT_MAGIC1 0xfee1dead 1710 #define REBOOT_MAGIC2 0x28121969 1711 #define REBOOT_MAGIC2A 0x05121996 1712 #define REBOOT_MAGIC2B 0x16041998 1713 1714 int 1715 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1716 { 1717 struct reboot_args bsd_args; 1718 1719 if (args->magic1 != REBOOT_MAGIC1) 1720 return (EINVAL); 1721 1722 switch (args->magic2) { 1723 case REBOOT_MAGIC2: 1724 case REBOOT_MAGIC2A: 1725 case REBOOT_MAGIC2B: 1726 break; 1727 default: 1728 return (EINVAL); 1729 } 1730 1731 switch (args->cmd) { 1732 case REBOOT_CAD_ON: 1733 case REBOOT_CAD_OFF: 1734 return (priv_check(td, PRIV_REBOOT)); 1735 case REBOOT_HALT: 1736 bsd_args.opt = RB_HALT; 1737 break; 1738 case REBOOT_RESTART: 1739 case REBOOT_RESTART2: 1740 bsd_args.opt = 0; 1741 break; 1742 case REBOOT_POWEROFF: 1743 bsd_args.opt = RB_POWEROFF; 1744 break; 1745 default: 1746 return (EINVAL); 1747 } 1748 return (sys_reboot(td, &bsd_args)); 1749 } 1750 1751 int 1752 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1753 { 1754 1755 td->td_retval[0] = td->td_proc->p_pid; 1756 1757 return (0); 1758 } 1759 1760 int 1761 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1762 { 1763 struct linux_emuldata *em; 1764 1765 em = em_find(td); 1766 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1767 1768 td->td_retval[0] = em->em_tid; 1769 1770 return (0); 1771 } 1772 1773 int 1774 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1775 { 1776 1777 td->td_retval[0] = kern_getppid(td); 1778 return (0); 1779 } 1780 1781 int 1782 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1783 { 1784 1785 td->td_retval[0] = td->td_ucred->cr_rgid; 1786 return (0); 1787 } 1788 1789 int 1790 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1791 { 1792 1793 td->td_retval[0] = td->td_ucred->cr_ruid; 1794 return (0); 1795 } 1796 1797 int 1798 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1799 { 1800 1801 return (kern_getsid(td, args->pid)); 1802 } 1803 1804 int 1805 linux_nosys(struct thread *td, struct nosys_args *ignore) 1806 { 1807 1808 return (ENOSYS); 1809 } 1810 1811 int 1812 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1813 { 1814 int error; 1815 1816 error = kern_getpriority(td, args->which, args->who); 1817 td->td_retval[0] = 20 - td->td_retval[0]; 1818 return (error); 1819 } 1820 1821 int 1822 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1823 { 1824 int name[2]; 1825 1826 name[0] = CTL_KERN; 1827 name[1] = KERN_HOSTNAME; 1828 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1829 args->len, 0, 0)); 1830 } 1831 1832 int 1833 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1834 { 1835 int name[2]; 1836 1837 name[0] = CTL_KERN; 1838 name[1] = KERN_NISDOMAINNAME; 1839 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1840 args->len, 0, 0)); 1841 } 1842 1843 int 1844 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1845 { 1846 1847 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1848 args->error_code); 1849 1850 /* 1851 * XXX: we should send a signal to the parent if 1852 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1853 * as it doesnt occur often. 1854 */ 1855 exit1(td, args->error_code, 0); 1856 /* NOTREACHED */ 1857 } 1858 1859 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1860 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1861 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1862 1863 struct l_user_cap_header { 1864 l_int version; 1865 l_int pid; 1866 }; 1867 1868 struct l_user_cap_data { 1869 l_int effective; 1870 l_int permitted; 1871 l_int inheritable; 1872 }; 1873 1874 int 1875 linux_capget(struct thread *td, struct linux_capget_args *uap) 1876 { 1877 struct l_user_cap_header luch; 1878 struct l_user_cap_data lucd[2]; 1879 int error, u32s; 1880 1881 if (uap->hdrp == NULL) 1882 return (EFAULT); 1883 1884 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1885 if (error != 0) 1886 return (error); 1887 1888 switch (luch.version) { 1889 case _LINUX_CAPABILITY_VERSION_1: 1890 u32s = 1; 1891 break; 1892 case _LINUX_CAPABILITY_VERSION_2: 1893 case _LINUX_CAPABILITY_VERSION_3: 1894 u32s = 2; 1895 break; 1896 default: 1897 luch.version = _LINUX_CAPABILITY_VERSION_1; 1898 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1899 if (error) 1900 return (error); 1901 return (EINVAL); 1902 } 1903 1904 if (luch.pid) 1905 return (EPERM); 1906 1907 if (uap->datap) { 1908 /* 1909 * The current implementation doesn't support setting 1910 * a capability (it's essentially a stub) so indicate 1911 * that no capabilities are currently set or available 1912 * to request. 1913 */ 1914 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1915 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1916 } 1917 1918 return (error); 1919 } 1920 1921 int 1922 linux_capset(struct thread *td, struct linux_capset_args *uap) 1923 { 1924 struct l_user_cap_header luch; 1925 struct l_user_cap_data lucd[2]; 1926 int error, i, u32s; 1927 1928 if (uap->hdrp == NULL || uap->datap == NULL) 1929 return (EFAULT); 1930 1931 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1932 if (error != 0) 1933 return (error); 1934 1935 switch (luch.version) { 1936 case _LINUX_CAPABILITY_VERSION_1: 1937 u32s = 1; 1938 break; 1939 case _LINUX_CAPABILITY_VERSION_2: 1940 case _LINUX_CAPABILITY_VERSION_3: 1941 u32s = 2; 1942 break; 1943 default: 1944 luch.version = _LINUX_CAPABILITY_VERSION_1; 1945 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1946 if (error) 1947 return (error); 1948 return (EINVAL); 1949 } 1950 1951 if (luch.pid) 1952 return (EPERM); 1953 1954 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1955 if (error != 0) 1956 return (error); 1957 1958 /* We currently don't support setting any capabilities. */ 1959 for (i = 0; i < u32s; i++) { 1960 if (lucd[i].effective || lucd[i].permitted || 1961 lucd[i].inheritable) { 1962 linux_msg(td, 1963 "capset[%d] effective=0x%x, permitted=0x%x, " 1964 "inheritable=0x%x is not implemented", i, 1965 (int)lucd[i].effective, (int)lucd[i].permitted, 1966 (int)lucd[i].inheritable); 1967 return (EPERM); 1968 } 1969 } 1970 1971 return (0); 1972 } 1973 1974 int 1975 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1976 { 1977 int error = 0, max_size; 1978 struct proc *p = td->td_proc; 1979 char comm[LINUX_MAX_COMM_LEN]; 1980 int pdeath_signal, trace_state; 1981 1982 switch (args->option) { 1983 case LINUX_PR_SET_PDEATHSIG: 1984 if (!LINUX_SIG_VALID(args->arg2)) 1985 return (EINVAL); 1986 pdeath_signal = linux_to_bsd_signal(args->arg2); 1987 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1988 &pdeath_signal)); 1989 case LINUX_PR_GET_PDEATHSIG: 1990 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1991 &pdeath_signal); 1992 if (error != 0) 1993 return (error); 1994 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1995 return (copyout(&pdeath_signal, 1996 (void *)(register_t)args->arg2, 1997 sizeof(pdeath_signal))); 1998 /* 1999 * In Linux, this flag controls if set[gu]id processes can coredump. 2000 * There are additional semantics imposed on processes that cannot 2001 * coredump: 2002 * - Such processes can not be ptraced. 2003 * - There are some semantics around ownership of process-related files 2004 * in the /proc namespace. 2005 * 2006 * In FreeBSD, we can (and by default, do) disable setuid coredump 2007 * system-wide with 'sugid_coredump.' We control tracability on a 2008 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 2009 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 2010 * procctl is roughly analogous to Linux's DUMPABLE. 2011 * 2012 * So, proxy these knobs to the corresponding PROC_TRACE setting. 2013 */ 2014 case LINUX_PR_GET_DUMPABLE: 2015 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 2016 &trace_state); 2017 if (error != 0) 2018 return (error); 2019 td->td_retval[0] = (trace_state != -1); 2020 return (0); 2021 case LINUX_PR_SET_DUMPABLE: 2022 /* 2023 * It is only valid for userspace to set one of these two 2024 * flags, and only one at a time. 2025 */ 2026 switch (args->arg2) { 2027 case LINUX_SUID_DUMP_DISABLE: 2028 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 2029 break; 2030 case LINUX_SUID_DUMP_USER: 2031 trace_state = PROC_TRACE_CTL_ENABLE; 2032 break; 2033 default: 2034 return (EINVAL); 2035 } 2036 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 2037 &trace_state)); 2038 case LINUX_PR_GET_KEEPCAPS: 2039 /* 2040 * Indicate that we always clear the effective and 2041 * permitted capability sets when the user id becomes 2042 * non-zero (actually the capability sets are simply 2043 * always zero in the current implementation). 2044 */ 2045 td->td_retval[0] = 0; 2046 break; 2047 case LINUX_PR_SET_KEEPCAPS: 2048 /* 2049 * Ignore requests to keep the effective and permitted 2050 * capability sets when the user id becomes non-zero. 2051 */ 2052 break; 2053 case LINUX_PR_SET_NAME: 2054 /* 2055 * To be on the safe side we need to make sure to not 2056 * overflow the size a Linux program expects. We already 2057 * do this here in the copyin, so that we don't need to 2058 * check on copyout. 2059 */ 2060 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 2061 error = copyinstr((void *)(register_t)args->arg2, comm, 2062 max_size, NULL); 2063 2064 /* Linux silently truncates the name if it is too long. */ 2065 if (error == ENAMETOOLONG) { 2066 /* 2067 * XXX: copyinstr() isn't documented to populate the 2068 * array completely, so do a copyin() to be on the 2069 * safe side. This should be changed in case 2070 * copyinstr() is changed to guarantee this. 2071 */ 2072 error = copyin((void *)(register_t)args->arg2, comm, 2073 max_size - 1); 2074 comm[max_size - 1] = '\0'; 2075 } 2076 if (error) 2077 return (error); 2078 2079 PROC_LOCK(p); 2080 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 2081 PROC_UNLOCK(p); 2082 break; 2083 case LINUX_PR_GET_NAME: 2084 PROC_LOCK(p); 2085 strlcpy(comm, p->p_comm, sizeof(comm)); 2086 PROC_UNLOCK(p); 2087 error = copyout(comm, (void *)(register_t)args->arg2, 2088 strlen(comm) + 1); 2089 break; 2090 case LINUX_PR_GET_SECCOMP: 2091 case LINUX_PR_SET_SECCOMP: 2092 /* 2093 * Same as returned by Linux without CONFIG_SECCOMP enabled. 2094 */ 2095 error = EINVAL; 2096 break; 2097 case LINUX_PR_CAPBSET_READ: 2098 #if 0 2099 /* 2100 * This makes too much noise with Ubuntu Focal. 2101 */ 2102 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 2103 (int)args->arg2); 2104 #endif 2105 error = EINVAL; 2106 break; 2107 case LINUX_PR_SET_NO_NEW_PRIVS: 2108 linux_msg(td, "unsupported prctl PR_SET_NO_NEW_PRIVS"); 2109 error = EINVAL; 2110 break; 2111 case LINUX_PR_SET_PTRACER: 2112 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 2113 error = EINVAL; 2114 break; 2115 default: 2116 linux_msg(td, "unsupported prctl option %d", args->option); 2117 error = EINVAL; 2118 break; 2119 } 2120 2121 return (error); 2122 } 2123 2124 int 2125 linux_sched_setparam(struct thread *td, 2126 struct linux_sched_setparam_args *uap) 2127 { 2128 struct sched_param sched_param; 2129 struct thread *tdt; 2130 int error, policy; 2131 2132 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 2133 if (error) 2134 return (error); 2135 2136 tdt = linux_tdfind(td, uap->pid, -1); 2137 if (tdt == NULL) 2138 return (ESRCH); 2139 2140 if (linux_map_sched_prio) { 2141 error = kern_sched_getscheduler(td, tdt, &policy); 2142 if (error) 2143 goto out; 2144 2145 switch (policy) { 2146 case SCHED_OTHER: 2147 if (sched_param.sched_priority != 0) { 2148 error = EINVAL; 2149 goto out; 2150 } 2151 sched_param.sched_priority = 2152 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 2153 break; 2154 case SCHED_FIFO: 2155 case SCHED_RR: 2156 if (sched_param.sched_priority < 1 || 2157 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 2158 error = EINVAL; 2159 goto out; 2160 } 2161 /* 2162 * Map [1, LINUX_MAX_RT_PRIO - 1] to 2163 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 2164 */ 2165 sched_param.sched_priority = 2166 (sched_param.sched_priority - 1) * 2167 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 2168 (LINUX_MAX_RT_PRIO - 1); 2169 break; 2170 } 2171 } 2172 2173 error = kern_sched_setparam(td, tdt, &sched_param); 2174 out: PROC_UNLOCK(tdt->td_proc); 2175 return (error); 2176 } 2177 2178 int 2179 linux_sched_getparam(struct thread *td, 2180 struct linux_sched_getparam_args *uap) 2181 { 2182 struct sched_param sched_param; 2183 struct thread *tdt; 2184 int error, policy; 2185 2186 tdt = linux_tdfind(td, uap->pid, -1); 2187 if (tdt == NULL) 2188 return (ESRCH); 2189 2190 error = kern_sched_getparam(td, tdt, &sched_param); 2191 if (error) { 2192 PROC_UNLOCK(tdt->td_proc); 2193 return (error); 2194 } 2195 2196 if (linux_map_sched_prio) { 2197 error = kern_sched_getscheduler(td, tdt, &policy); 2198 PROC_UNLOCK(tdt->td_proc); 2199 if (error) 2200 return (error); 2201 2202 switch (policy) { 2203 case SCHED_OTHER: 2204 sched_param.sched_priority = 0; 2205 break; 2206 case SCHED_FIFO: 2207 case SCHED_RR: 2208 /* 2209 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 2210 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 2211 */ 2212 sched_param.sched_priority = 2213 (sched_param.sched_priority * 2214 (LINUX_MAX_RT_PRIO - 1) + 2215 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 2216 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 2217 break; 2218 } 2219 } else 2220 PROC_UNLOCK(tdt->td_proc); 2221 2222 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 2223 return (error); 2224 } 2225 2226 /* 2227 * Get affinity of a process. 2228 */ 2229 int 2230 linux_sched_getaffinity(struct thread *td, 2231 struct linux_sched_getaffinity_args *args) 2232 { 2233 int error; 2234 struct thread *tdt; 2235 2236 if (args->len < sizeof(cpuset_t)) 2237 return (EINVAL); 2238 2239 tdt = linux_tdfind(td, args->pid, -1); 2240 if (tdt == NULL) 2241 return (ESRCH); 2242 2243 PROC_UNLOCK(tdt->td_proc); 2244 2245 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2246 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); 2247 if (error == 0) 2248 td->td_retval[0] = sizeof(cpuset_t); 2249 2250 return (error); 2251 } 2252 2253 /* 2254 * Set affinity of a process. 2255 */ 2256 int 2257 linux_sched_setaffinity(struct thread *td, 2258 struct linux_sched_setaffinity_args *args) 2259 { 2260 struct thread *tdt; 2261 2262 if (args->len < sizeof(cpuset_t)) 2263 return (EINVAL); 2264 2265 tdt = linux_tdfind(td, args->pid, -1); 2266 if (tdt == NULL) 2267 return (ESRCH); 2268 2269 PROC_UNLOCK(tdt->td_proc); 2270 2271 return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2272 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); 2273 } 2274 2275 struct linux_rlimit64 { 2276 uint64_t rlim_cur; 2277 uint64_t rlim_max; 2278 }; 2279 2280 int 2281 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2282 { 2283 struct rlimit rlim, nrlim; 2284 struct linux_rlimit64 lrlim; 2285 struct proc *p; 2286 u_int which; 2287 int flags; 2288 int error; 2289 2290 if (args->new == NULL && args->old != NULL) { 2291 if (linux_get_dummy_limit(args->resource, &rlim)) { 2292 lrlim.rlim_cur = rlim.rlim_cur; 2293 lrlim.rlim_max = rlim.rlim_max; 2294 return (copyout(&lrlim, args->old, sizeof(lrlim))); 2295 } 2296 } 2297 2298 if (args->resource >= LINUX_RLIM_NLIMITS) 2299 return (EINVAL); 2300 2301 which = linux_to_bsd_resource[args->resource]; 2302 if (which == -1) 2303 return (EINVAL); 2304 2305 if (args->new != NULL) { 2306 /* 2307 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2308 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2309 * as INFINITY so we do not need a conversion even. 2310 */ 2311 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2312 if (error != 0) 2313 return (error); 2314 } 2315 2316 flags = PGET_HOLD | PGET_NOTWEXIT; 2317 if (args->new != NULL) 2318 flags |= PGET_CANDEBUG; 2319 else 2320 flags |= PGET_CANSEE; 2321 if (args->pid == 0) { 2322 p = td->td_proc; 2323 PHOLD(p); 2324 } else { 2325 error = pget(args->pid, flags, &p); 2326 if (error != 0) 2327 return (error); 2328 } 2329 if (args->old != NULL) { 2330 PROC_LOCK(p); 2331 lim_rlimit_proc(p, which, &rlim); 2332 PROC_UNLOCK(p); 2333 if (rlim.rlim_cur == RLIM_INFINITY) 2334 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2335 else 2336 lrlim.rlim_cur = rlim.rlim_cur; 2337 if (rlim.rlim_max == RLIM_INFINITY) 2338 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2339 else 2340 lrlim.rlim_max = rlim.rlim_max; 2341 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2342 if (error != 0) 2343 goto out; 2344 } 2345 2346 if (args->new != NULL) 2347 error = kern_proc_setrlimit(td, p, which, &nrlim); 2348 2349 out: 2350 PRELE(p); 2351 return (error); 2352 } 2353 2354 int 2355 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2356 { 2357 struct l_timespec lts; 2358 struct timespec ts, *tsp; 2359 int error; 2360 2361 if (args->tsp != NULL) { 2362 error = copyin(args->tsp, <s, sizeof(lts)); 2363 if (error != 0) 2364 return (error); 2365 error = linux_to_native_timespec(&ts, <s); 2366 if (error != 0) 2367 return (error); 2368 tsp = &ts; 2369 } else 2370 tsp = NULL; 2371 2372 error = linux_common_pselect6(td, args->nfds, args->readfds, 2373 args->writefds, args->exceptfds, tsp, args->sig); 2374 if (error != 0) 2375 return (error); 2376 2377 if (args->tsp != NULL) { 2378 error = native_to_linux_timespec(<s, tsp); 2379 if (error == 0) 2380 error = copyout(<s, args->tsp, sizeof(lts)); 2381 } 2382 return (error); 2383 } 2384 2385 static int 2386 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, 2387 l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, 2388 l_uintptr_t *sig) 2389 { 2390 struct timeval utv, tv0, tv1, *tvp; 2391 struct l_pselect6arg lpse6; 2392 l_sigset_t l_ss; 2393 sigset_t *ssp; 2394 sigset_t ss; 2395 int error; 2396 2397 ssp = NULL; 2398 if (sig != NULL) { 2399 error = copyin(sig, &lpse6, sizeof(lpse6)); 2400 if (error != 0) 2401 return (error); 2402 if (lpse6.ss_len != sizeof(l_ss)) 2403 return (EINVAL); 2404 if (lpse6.ss != 0) { 2405 error = copyin(PTRIN(lpse6.ss), &l_ss, 2406 sizeof(l_ss)); 2407 if (error != 0) 2408 return (error); 2409 linux_to_bsd_sigset(&l_ss, &ss); 2410 ssp = &ss; 2411 } 2412 } else 2413 ssp = NULL; 2414 2415 /* 2416 * Currently glibc changes nanosecond number to microsecond. 2417 * This mean losing precision but for now it is hardly seen. 2418 */ 2419 if (tsp != NULL) { 2420 TIMESPEC_TO_TIMEVAL(&utv, tsp); 2421 if (itimerfix(&utv)) 2422 return (EINVAL); 2423 2424 microtime(&tv0); 2425 tvp = &utv; 2426 } else 2427 tvp = NULL; 2428 2429 error = kern_pselect(td, nfds, readfds, writefds, 2430 exceptfds, tvp, ssp, LINUX_NFDBITS); 2431 2432 if (error == 0 && tsp != NULL) { 2433 if (td->td_retval[0] != 0) { 2434 /* 2435 * Compute how much time was left of the timeout, 2436 * by subtracting the current time and the time 2437 * before we started the call, and subtracting 2438 * that result from the user-supplied value. 2439 */ 2440 2441 microtime(&tv1); 2442 timevalsub(&tv1, &tv0); 2443 timevalsub(&utv, &tv1); 2444 if (utv.tv_sec < 0) 2445 timevalclear(&utv); 2446 } else 2447 timevalclear(&utv); 2448 TIMEVAL_TO_TIMESPEC(&utv, tsp); 2449 } 2450 return (error); 2451 } 2452 2453 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2454 int 2455 linux_pselect6_time64(struct thread *td, 2456 struct linux_pselect6_time64_args *args) 2457 { 2458 struct l_timespec64 lts; 2459 struct timespec ts, *tsp; 2460 int error; 2461 2462 if (args->tsp != NULL) { 2463 error = copyin(args->tsp, <s, sizeof(lts)); 2464 if (error != 0) 2465 return (error); 2466 error = linux_to_native_timespec64(&ts, <s); 2467 if (error != 0) 2468 return (error); 2469 tsp = &ts; 2470 } else 2471 tsp = NULL; 2472 2473 error = linux_common_pselect6(td, args->nfds, args->readfds, 2474 args->writefds, args->exceptfds, tsp, args->sig); 2475 if (error != 0) 2476 return (error); 2477 2478 if (args->tsp != NULL) { 2479 error = native_to_linux_timespec64(<s, tsp); 2480 if (error == 0) 2481 error = copyout(<s, args->tsp, sizeof(lts)); 2482 } 2483 return (error); 2484 } 2485 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2486 2487 int 2488 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2489 { 2490 struct timespec uts, *tsp; 2491 struct l_timespec lts; 2492 int error; 2493 2494 if (args->tsp != NULL) { 2495 error = copyin(args->tsp, <s, sizeof(lts)); 2496 if (error) 2497 return (error); 2498 error = linux_to_native_timespec(&uts, <s); 2499 if (error != 0) 2500 return (error); 2501 tsp = &uts; 2502 } else 2503 tsp = NULL; 2504 2505 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2506 args->sset, args->ssize); 2507 if (error != 0) 2508 return (error); 2509 if (tsp != NULL) { 2510 error = native_to_linux_timespec(<s, tsp); 2511 if (error == 0) 2512 error = copyout(<s, args->tsp, sizeof(lts)); 2513 } 2514 return (error); 2515 } 2516 2517 static int 2518 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, 2519 struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) 2520 { 2521 struct timespec ts0, ts1; 2522 l_sigset_t l_ss; 2523 sigset_t *ssp; 2524 sigset_t ss; 2525 int error; 2526 2527 if (sset != NULL) { 2528 if (ssize != sizeof(l_ss)) 2529 return (EINVAL); 2530 error = copyin(sset, &l_ss, sizeof(l_ss)); 2531 if (error) 2532 return (error); 2533 linux_to_bsd_sigset(&l_ss, &ss); 2534 ssp = &ss; 2535 } else 2536 ssp = NULL; 2537 if (tsp != NULL) 2538 nanotime(&ts0); 2539 2540 error = kern_poll(td, fds, nfds, tsp, ssp); 2541 2542 if (error == 0 && tsp != NULL) { 2543 if (td->td_retval[0]) { 2544 nanotime(&ts1); 2545 timespecsub(&ts1, &ts0, &ts1); 2546 timespecsub(tsp, &ts1, tsp); 2547 if (tsp->tv_sec < 0) 2548 timespecclear(tsp); 2549 } else 2550 timespecclear(tsp); 2551 } 2552 return (error); 2553 } 2554 2555 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2556 int 2557 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) 2558 { 2559 struct timespec uts, *tsp; 2560 struct l_timespec64 lts; 2561 int error; 2562 2563 if (args->tsp != NULL) { 2564 error = copyin(args->tsp, <s, sizeof(lts)); 2565 if (error != 0) 2566 return (error); 2567 error = linux_to_native_timespec64(&uts, <s); 2568 if (error != 0) 2569 return (error); 2570 tsp = &uts; 2571 } else 2572 tsp = NULL; 2573 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2574 args->sset, args->ssize); 2575 if (error != 0) 2576 return (error); 2577 if (tsp != NULL) { 2578 error = native_to_linux_timespec64(<s, tsp); 2579 if (error == 0) 2580 error = copyout(<s, args->tsp, sizeof(lts)); 2581 } 2582 return (error); 2583 } 2584 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2585 2586 int 2587 linux_sched_rr_get_interval(struct thread *td, 2588 struct linux_sched_rr_get_interval_args *uap) 2589 { 2590 struct timespec ts; 2591 struct l_timespec lts; 2592 struct thread *tdt; 2593 int error; 2594 2595 /* 2596 * According to man in case the invalid pid specified 2597 * EINVAL should be returned. 2598 */ 2599 if (uap->pid < 0) 2600 return (EINVAL); 2601 2602 tdt = linux_tdfind(td, uap->pid, -1); 2603 if (tdt == NULL) 2604 return (ESRCH); 2605 2606 error = kern_sched_rr_get_interval_td(td, tdt, &ts); 2607 PROC_UNLOCK(tdt->td_proc); 2608 if (error != 0) 2609 return (error); 2610 error = native_to_linux_timespec(<s, &ts); 2611 if (error != 0) 2612 return (error); 2613 return (copyout(<s, uap->interval, sizeof(lts))); 2614 } 2615 2616 /* 2617 * In case when the Linux thread is the initial thread in 2618 * the thread group thread id is equal to the process id. 2619 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2620 */ 2621 struct thread * 2622 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2623 { 2624 struct linux_emuldata *em; 2625 struct thread *tdt; 2626 struct proc *p; 2627 2628 tdt = NULL; 2629 if (tid == 0 || tid == td->td_tid) { 2630 tdt = td; 2631 PROC_LOCK(tdt->td_proc); 2632 } else if (tid > PID_MAX) 2633 tdt = tdfind(tid, pid); 2634 else { 2635 /* 2636 * Initial thread where the tid equal to the pid. 2637 */ 2638 p = pfind(tid); 2639 if (p != NULL) { 2640 if (SV_PROC_ABI(p) != SV_ABI_LINUX) { 2641 /* 2642 * p is not a Linuxulator process. 2643 */ 2644 PROC_UNLOCK(p); 2645 return (NULL); 2646 } 2647 FOREACH_THREAD_IN_PROC(p, tdt) { 2648 em = em_find(tdt); 2649 if (tid == em->em_tid) 2650 return (tdt); 2651 } 2652 PROC_UNLOCK(p); 2653 } 2654 return (NULL); 2655 } 2656 2657 return (tdt); 2658 } 2659 2660 void 2661 linux_to_bsd_waitopts(int options, int *bsdopts) 2662 { 2663 2664 if (options & LINUX_WNOHANG) 2665 *bsdopts |= WNOHANG; 2666 if (options & LINUX_WUNTRACED) 2667 *bsdopts |= WUNTRACED; 2668 if (options & LINUX_WEXITED) 2669 *bsdopts |= WEXITED; 2670 if (options & LINUX_WCONTINUED) 2671 *bsdopts |= WCONTINUED; 2672 if (options & LINUX_WNOWAIT) 2673 *bsdopts |= WNOWAIT; 2674 2675 if (options & __WCLONE) 2676 *bsdopts |= WLINUXCLONE; 2677 } 2678 2679 int 2680 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2681 { 2682 struct uio uio; 2683 struct iovec iov; 2684 int error; 2685 2686 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2687 return (EINVAL); 2688 if (args->count > INT_MAX) 2689 args->count = INT_MAX; 2690 2691 iov.iov_base = args->buf; 2692 iov.iov_len = args->count; 2693 2694 uio.uio_iov = &iov; 2695 uio.uio_iovcnt = 1; 2696 uio.uio_resid = iov.iov_len; 2697 uio.uio_segflg = UIO_USERSPACE; 2698 uio.uio_rw = UIO_READ; 2699 uio.uio_td = td; 2700 2701 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2702 if (error == 0) 2703 td->td_retval[0] = args->count - uio.uio_resid; 2704 return (error); 2705 } 2706 2707 int 2708 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2709 { 2710 2711 /* Needs to be page-aligned */ 2712 if (args->start & PAGE_MASK) 2713 return (EINVAL); 2714 return (kern_mincore(td, args->start, args->len, args->vec)); 2715 } 2716 2717 #define SYSLOG_TAG "<6>" 2718 2719 int 2720 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2721 { 2722 char buf[128], *src, *dst; 2723 u_int seq; 2724 int buflen, error; 2725 2726 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2727 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2728 return (EINVAL); 2729 } 2730 2731 if (args->len < 6) { 2732 td->td_retval[0] = 0; 2733 return (0); 2734 } 2735 2736 error = priv_check(td, PRIV_MSGBUF); 2737 if (error) 2738 return (error); 2739 2740 mtx_lock(&msgbuf_lock); 2741 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2742 mtx_unlock(&msgbuf_lock); 2743 2744 dst = args->buf; 2745 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2746 /* The -1 is to skip the trailing '\0'. */ 2747 dst += sizeof(SYSLOG_TAG) - 1; 2748 2749 while (error == 0) { 2750 mtx_lock(&msgbuf_lock); 2751 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2752 mtx_unlock(&msgbuf_lock); 2753 2754 if (buflen == 0) 2755 break; 2756 2757 for (src = buf; src < buf + buflen && error == 0; src++) { 2758 if (*src == '\0') 2759 continue; 2760 2761 if (dst >= args->buf + args->len) 2762 goto out; 2763 2764 error = copyout(src, dst, 1); 2765 dst++; 2766 2767 if (*src == '\n' && *(src + 1) != '<' && 2768 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2769 error = copyout(&SYSLOG_TAG, 2770 dst, sizeof(SYSLOG_TAG)); 2771 dst += sizeof(SYSLOG_TAG) - 1; 2772 } 2773 } 2774 } 2775 out: 2776 td->td_retval[0] = dst - args->buf; 2777 return (error); 2778 } 2779 2780 int 2781 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2782 { 2783 int cpu, error, node; 2784 2785 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2786 error = 0; 2787 node = cpuid_to_pcpu[cpu]->pc_domain; 2788 2789 if (args->cpu != NULL) 2790 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2791 if (args->node != NULL) 2792 error = copyout(&node, args->node, sizeof(l_int)); 2793 return (error); 2794 } 2795