1 //===--- Implementation of a Linux thread class -----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "src/__support/threads/thread.h" 10 #include "config/app.h" 11 #include "src/__support/CPP/atomic.h" 12 #include "src/__support/CPP/string_view.h" 13 #include "src/__support/CPP/stringstream.h" 14 #include "src/__support/OSUtil/syscall.h" // For syscall functions. 15 #include "src/__support/common.h" 16 #include "src/__support/error_or.h" 17 #include "src/__support/macros/config.h" 18 #include "src/__support/threads/linux/futex_utils.h" // For FutexWordType 19 #include "src/errno/libc_errno.h" // For error macros 20 21 #ifdef LIBC_TARGET_ARCH_IS_AARCH64 22 #include <arm_acle.h> 23 #endif 24 25 #include "hdr/fcntl_macros.h" 26 #include <linux/param.h> // For EXEC_PAGESIZE. 27 #include <linux/prctl.h> // For PR_SET_NAME 28 #include <linux/sched.h> // For CLONE_* flags. 29 #include <stdint.h> 30 #include <sys/mman.h> // For PROT_* and MAP_* definitions. 31 #include <sys/syscall.h> // For syscall numbers. 32 33 namespace LIBC_NAMESPACE_DECL { 34 35 #ifdef SYS_mmap2 36 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2; 37 #elif defined(SYS_mmap) 38 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap; 39 #else 40 #error "mmap or mmap2 syscalls not available." 41 #endif 42 43 static constexpr size_t NAME_SIZE_MAX = 16; // Includes the null terminator 44 static constexpr uint32_t CLEAR_TID_VALUE = 0xABCD1234; 45 static constexpr unsigned CLONE_SYSCALL_FLAGS = 46 CLONE_VM // Share the memory space with the parent. 47 | CLONE_FS // Share the file system with the parent. 48 | CLONE_FILES // Share the files with the parent. 49 | CLONE_SIGHAND // Share the signal handlers with the parent. 50 | CLONE_THREAD // Same thread group as the parent. 51 | CLONE_SYSVSEM // Share a single list of System V semaphore adjustment 52 // values 53 | CLONE_PARENT_SETTID // Set child thread ID in |ptid| of the parent. 54 | CLONE_CHILD_CLEARTID // Let the kernel clear the tid address 55 // wake the joining thread. 56 | CLONE_SETTLS; // Setup the thread pointer of the new thread. 57 58 #ifdef LIBC_TARGET_ARCH_IS_AARCH64 59 #define CLONE_RESULT_REGISTER "x0" 60 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) 61 #define CLONE_RESULT_REGISTER "t0" 62 #elif defined(LIBC_TARGET_ARCH_IS_X86_64) 63 #define CLONE_RESULT_REGISTER "rax" 64 #else 65 #error "CLONE_RESULT_REGISTER not defined for your target architecture" 66 #endif 67 68 static constexpr ErrorOr<size_t> add_no_overflow(size_t lhs, size_t rhs) { 69 if (lhs > SIZE_MAX - rhs) 70 return Error{EINVAL}; 71 if (rhs > SIZE_MAX - lhs) 72 return Error{EINVAL}; 73 return lhs + rhs; 74 } 75 76 static constexpr ErrorOr<size_t> round_to_page(size_t v) { 77 auto vp_or_err = add_no_overflow(v, EXEC_PAGESIZE - 1); 78 if (!vp_or_err) 79 return vp_or_err; 80 81 return vp_or_err.value() & -EXEC_PAGESIZE; 82 } 83 84 LIBC_INLINE ErrorOr<void *> alloc_stack(size_t stacksize, size_t guardsize) { 85 86 // Guard needs to be mapped with PROT_NONE 87 int prot = guardsize ? PROT_NONE : PROT_READ | PROT_WRITE; 88 auto size_or_err = add_no_overflow(stacksize, guardsize); 89 if (!size_or_err) 90 return Error{int(size_or_err.error())}; 91 size_t size = size_or_err.value(); 92 93 // TODO: Maybe add MAP_STACK? Currently unimplemented on linux but helps 94 // future-proof. 95 long mmap_result = LIBC_NAMESPACE::syscall_impl<long>( 96 MMAP_SYSCALL_NUMBER, 97 0, // No special address 98 size, prot, 99 MAP_ANONYMOUS | MAP_PRIVATE, // Process private. 100 -1, // Not backed by any file 101 0 // No offset 102 ); 103 if (mmap_result < 0 && (uintptr_t(mmap_result) >= UINTPTR_MAX - size)) 104 return Error{int(-mmap_result)}; 105 106 if (guardsize) { 107 // Give read/write permissions to actual stack. 108 // TODO: We are assuming stack growsdown here. 109 long result = LIBC_NAMESPACE::syscall_impl<long>( 110 SYS_mprotect, mmap_result + guardsize, stacksize, 111 PROT_READ | PROT_WRITE); 112 113 if (result != 0) 114 return Error{int(-result)}; 115 } 116 mmap_result += guardsize; 117 return reinterpret_cast<void *>(mmap_result); 118 } 119 120 // This must always be inlined as we may be freeing the calling threads stack in 121 // which case a normal return from the top the stack would cause an invalid 122 // memory read. 123 [[gnu::always_inline]] LIBC_INLINE void 124 free_stack(void *stack, size_t stacksize, size_t guardsize) { 125 uintptr_t stackaddr = reinterpret_cast<uintptr_t>(stack); 126 stackaddr -= guardsize; 127 stack = reinterpret_cast<void *>(stackaddr); 128 LIBC_NAMESPACE::syscall_impl<long>(SYS_munmap, stack, stacksize + guardsize); 129 } 130 131 struct Thread; 132 133 // We align the start args to 16-byte boundary as we adjust the allocated 134 // stack memory with its size. We want the adjusted address to be at a 135 // 16-byte boundary to satisfy the x86_64 and aarch64 ABI requirements. 136 // If different architecture in future requires higher alignment, then we 137 // can add a platform specific alignment spec. 138 struct alignas(STACK_ALIGNMENT) StartArgs { 139 ThreadAttributes *thread_attrib; 140 ThreadRunner runner; 141 void *arg; 142 }; 143 144 // This must always be inlined as we may be freeing the calling threads stack in 145 // which case a normal return from the top the stack would cause an invalid 146 // memory read. 147 [[gnu::always_inline]] LIBC_INLINE void 148 cleanup_thread_resources(ThreadAttributes *attrib) { 149 // Cleanup the TLS before the stack as the TLS information is stored on 150 // the stack. 151 cleanup_tls(attrib->tls, attrib->tls_size); 152 if (attrib->owned_stack) 153 free_stack(attrib->stack, attrib->stacksize, attrib->guardsize); 154 } 155 156 [[gnu::always_inline]] LIBC_INLINE uintptr_t get_start_args_addr() { 157 // NOTE: For __builtin_frame_address to work reliably across compilers, 158 // architectures and various optimization levels, the TU including this file 159 // should be compiled with -fno-omit-frame-pointer. 160 #ifdef LIBC_TARGET_ARCH_IS_X86_64 161 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0)) 162 // The x86_64 call instruction pushes resume address on to the stack. 163 // Next, The x86_64 SysV ABI requires that the frame pointer be pushed 164 // on to the stack. So, we have to step past two 64-bit values to get 165 // to the start args. 166 + sizeof(uintptr_t) * 2; 167 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) 168 // The frame pointer after cloning the new thread in the Thread::run method 169 // is set to the stack pointer where start args are stored. So, we fetch 170 // from there. 171 return reinterpret_cast<uintptr_t>(__builtin_frame_address(1)); 172 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) 173 // The current frame pointer is the previous stack pointer where the start 174 // args are stored. 175 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0)); 176 #endif 177 } 178 179 [[gnu::noinline]] void start_thread() { 180 auto *start_args = reinterpret_cast<StartArgs *>(get_start_args_addr()); 181 auto *attrib = start_args->thread_attrib; 182 self.attrib = attrib; 183 self.attrib->atexit_callback_mgr = internal::get_thread_atexit_callback_mgr(); 184 185 if (attrib->style == ThreadStyle::POSIX) { 186 attrib->retval.posix_retval = 187 start_args->runner.posix_runner(start_args->arg); 188 thread_exit(ThreadReturnValue(attrib->retval.posix_retval), 189 ThreadStyle::POSIX); 190 } else { 191 attrib->retval.stdc_retval = 192 start_args->runner.stdc_runner(start_args->arg); 193 thread_exit(ThreadReturnValue(attrib->retval.stdc_retval), 194 ThreadStyle::STDC); 195 } 196 } 197 198 int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack, 199 size_t stacksize, size_t guardsize, bool detached) { 200 bool owned_stack = false; 201 if (stack == nullptr) { 202 // TODO: Should we return EINVAL here? Should we have a generic concept of a 203 // minimum stacksize (like 16384 for pthread). 204 if (stacksize == 0) 205 stacksize = DEFAULT_STACKSIZE; 206 // Roundup stacksize/guardsize to page size. 207 // TODO: Should be also add sizeof(ThreadAttribute) and other internal 208 // meta data? 209 auto round_or_err = round_to_page(guardsize); 210 if (!round_or_err) 211 return round_or_err.error(); 212 guardsize = round_or_err.value(); 213 214 round_or_err = round_to_page(stacksize); 215 if (!round_or_err) 216 return round_or_err.error(); 217 218 stacksize = round_or_err.value(); 219 auto alloc = alloc_stack(stacksize, guardsize); 220 if (!alloc) 221 return alloc.error(); 222 else 223 stack = alloc.value(); 224 owned_stack = true; 225 } 226 227 // Validate that stack/stacksize are validly aligned. 228 uintptr_t stackaddr = reinterpret_cast<uintptr_t>(stack); 229 if ((stackaddr % STACK_ALIGNMENT != 0) || 230 ((stackaddr + stacksize) % STACK_ALIGNMENT != 0)) { 231 if (owned_stack) 232 free_stack(stack, stacksize, guardsize); 233 return EINVAL; 234 } 235 236 TLSDescriptor tls; 237 init_tls(tls); 238 239 // When the new thread is spawned by the kernel, the new thread gets the 240 // stack we pass to the clone syscall. However, this stack is empty and does 241 // not have any local vars present in this function. Hence, one cannot 242 // pass arguments to the thread start function, or use any local vars from 243 // here. So, we pack them into the new stack from where the thread can sniff 244 // them out. 245 // 246 // Likewise, the actual thread state information is also stored on the 247 // stack memory. 248 249 static constexpr size_t INTERNAL_STACK_DATA_SIZE = 250 sizeof(StartArgs) + sizeof(ThreadAttributes) + sizeof(Futex); 251 252 // This is pretty arbitrary, but at the moment we don't adjust user provided 253 // stacksize (or default) to account for this data as its assumed minimal. If 254 // this assert starts failing we probably should. Likewise if we can't bound 255 // this we may overflow when we subtract it from the top of the stack. 256 static_assert(INTERNAL_STACK_DATA_SIZE < EXEC_PAGESIZE); 257 258 // TODO: We are assuming stack growsdown here. 259 auto adjusted_stack_or_err = 260 add_no_overflow(reinterpret_cast<uintptr_t>(stack), stacksize); 261 if (!adjusted_stack_or_err) { 262 cleanup_tls(tls.addr, tls.size); 263 if (owned_stack) 264 free_stack(stack, stacksize, guardsize); 265 return adjusted_stack_or_err.error(); 266 } 267 268 uintptr_t adjusted_stack = 269 adjusted_stack_or_err.value() - INTERNAL_STACK_DATA_SIZE; 270 adjusted_stack &= ~(uintptr_t(STACK_ALIGNMENT) - 1); 271 272 auto *start_args = reinterpret_cast<StartArgs *>(adjusted_stack); 273 274 attrib = 275 reinterpret_cast<ThreadAttributes *>(adjusted_stack + sizeof(StartArgs)); 276 attrib->style = style; 277 attrib->detach_state = 278 uint32_t(detached ? DetachState::DETACHED : DetachState::JOINABLE); 279 attrib->stack = stack; 280 attrib->stacksize = stacksize; 281 attrib->guardsize = guardsize; 282 attrib->owned_stack = owned_stack; 283 attrib->tls = tls.addr; 284 attrib->tls_size = tls.size; 285 286 start_args->thread_attrib = attrib; 287 start_args->runner = runner; 288 start_args->arg = arg; 289 290 auto clear_tid = reinterpret_cast<Futex *>( 291 adjusted_stack + sizeof(StartArgs) + sizeof(ThreadAttributes)); 292 clear_tid->set(CLEAR_TID_VALUE); 293 attrib->platform_data = clear_tid; 294 295 // The clone syscall takes arguments in an architecture specific order. 296 // Also, we want the result of the syscall to be in a register as the child 297 // thread gets a completely different stack after it is created. The stack 298 // variables from this function will not be availalbe to the child thread. 299 #if defined(LIBC_TARGET_ARCH_IS_X86_64) 300 long register clone_result asm(CLONE_RESULT_REGISTER); 301 clone_result = LIBC_NAMESPACE::syscall_impl<long>( 302 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack, 303 &attrib->tid, // The address where the child tid is written 304 &clear_tid->val, // The futex where the child thread status is signalled 305 tls.tp // The thread pointer value for the new thread. 306 ); 307 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) || \ 308 defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) 309 long register clone_result asm(CLONE_RESULT_REGISTER); 310 clone_result = LIBC_NAMESPACE::syscall_impl<long>( 311 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack, 312 &attrib->tid, // The address where the child tid is written 313 tls.tp, // The thread pointer value for the new thread. 314 &clear_tid->val // The futex where the child thread status is signalled 315 ); 316 #else 317 #error "Unsupported architecture for the clone syscall." 318 #endif 319 320 if (clone_result == 0) { 321 #ifdef LIBC_TARGET_ARCH_IS_AARCH64 322 // We set the frame pointer to be the same as the "sp" so that start args 323 // can be sniffed out from start_thread. 324 #ifdef __clang__ 325 // GCC does not currently implement __arm_wsr64/__arm_rsr64. 326 __arm_wsr64("x29", __arm_rsr64("sp")); 327 #else 328 asm volatile("mov x29, sp"); 329 #endif 330 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV) 331 asm volatile("mv fp, sp"); 332 #endif 333 start_thread(); 334 } else if (clone_result < 0) { 335 cleanup_thread_resources(attrib); 336 return static_cast<int>(-clone_result); 337 } 338 339 return 0; 340 } 341 342 int Thread::join(ThreadReturnValue &retval) { 343 wait(); 344 345 if (attrib->style == ThreadStyle::POSIX) 346 retval.posix_retval = attrib->retval.posix_retval; 347 else 348 retval.stdc_retval = attrib->retval.stdc_retval; 349 350 cleanup_thread_resources(attrib); 351 352 return 0; 353 } 354 355 int Thread::detach() { 356 uint32_t joinable_state = uint32_t(DetachState::JOINABLE); 357 if (attrib->detach_state.compare_exchange_strong( 358 joinable_state, uint32_t(DetachState::DETACHED))) { 359 return int(DetachType::SIMPLE); 360 } 361 362 // If the thread was already detached, then the detach method should not 363 // be called at all. If the thread is exiting, then we wait for it to exit 364 // and free up resources. 365 wait(); 366 367 cleanup_thread_resources(attrib); 368 369 return int(DetachType::CLEANUP); 370 } 371 372 void Thread::wait() { 373 // The kernel should set the value at the clear tid address to zero. 374 // If not, it is a spurious wake and we should continue to wait on 375 // the futex. 376 auto *clear_tid = reinterpret_cast<Futex *>(attrib->platform_data); 377 // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a 378 // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE. 379 while (clear_tid->load() != 0) 380 clear_tid->wait(CLEAR_TID_VALUE, cpp::nullopt, true); 381 } 382 383 bool Thread::operator==(const Thread &thread) const { 384 return attrib->tid == thread.attrib->tid; 385 } 386 387 static constexpr cpp::string_view THREAD_NAME_PATH_PREFIX("/proc/self/task/"); 388 static constexpr size_t THREAD_NAME_PATH_SIZE = 389 THREAD_NAME_PATH_PREFIX.size() + 390 IntegerToString<int>::buffer_size() + // Size of tid 391 1 + // For '/' character 392 5; // For the file name "comm" and the nullterminator. 393 394 static void construct_thread_name_file_path(cpp::StringStream &stream, 395 int tid) { 396 stream << THREAD_NAME_PATH_PREFIX << tid << '/' << cpp::string_view("comm") 397 << cpp::StringStream::ENDS; 398 } 399 400 int Thread::set_name(const cpp::string_view &name) { 401 if (name.size() >= NAME_SIZE_MAX) 402 return ERANGE; 403 404 if (*this == self) { 405 // If we are setting the name of the current thread, then we can 406 // use the syscall to set the name. 407 int retval = 408 LIBC_NAMESPACE::syscall_impl<int>(SYS_prctl, PR_SET_NAME, name.data()); 409 if (retval < 0) 410 return -retval; 411 else 412 return 0; 413 } 414 415 char path_name_buffer[THREAD_NAME_PATH_SIZE]; 416 cpp::StringStream path_stream(path_name_buffer); 417 construct_thread_name_file_path(path_stream, attrib->tid); 418 #ifdef SYS_open 419 int fd = 420 LIBC_NAMESPACE::syscall_impl<int>(SYS_open, path_name_buffer, O_RDWR); 421 #else 422 int fd = LIBC_NAMESPACE::syscall_impl<int>(SYS_openat, AT_FDCWD, 423 path_name_buffer, O_RDWR); 424 #endif 425 if (fd < 0) 426 return -fd; 427 428 int retval = LIBC_NAMESPACE::syscall_impl<int>(SYS_write, fd, name.data(), 429 name.size()); 430 LIBC_NAMESPACE::syscall_impl<long>(SYS_close, fd); 431 432 if (retval < 0) 433 return -retval; 434 else if (retval != int(name.size())) 435 return EIO; 436 else 437 return 0; 438 } 439 440 int Thread::get_name(cpp::StringStream &name) const { 441 if (name.bufsize() < NAME_SIZE_MAX) 442 return ERANGE; 443 444 char name_buffer[NAME_SIZE_MAX]; 445 446 if (*this == self) { 447 // If we are getting the name of the current thread, then we can 448 // use the syscall to get the name. 449 int retval = 450 LIBC_NAMESPACE::syscall_impl<int>(SYS_prctl, PR_GET_NAME, name_buffer); 451 if (retval < 0) 452 return -retval; 453 name << name_buffer << cpp::StringStream::ENDS; 454 return 0; 455 } 456 457 char path_name_buffer[THREAD_NAME_PATH_SIZE]; 458 cpp::StringStream path_stream(path_name_buffer); 459 construct_thread_name_file_path(path_stream, attrib->tid); 460 #ifdef SYS_open 461 int fd = 462 LIBC_NAMESPACE::syscall_impl<int>(SYS_open, path_name_buffer, O_RDONLY); 463 #else 464 int fd = LIBC_NAMESPACE::syscall_impl<int>(SYS_openat, AT_FDCWD, 465 path_name_buffer, O_RDONLY); 466 #endif 467 if (fd < 0) 468 return -fd; 469 470 int retval = LIBC_NAMESPACE::syscall_impl<int>(SYS_read, fd, name_buffer, 471 NAME_SIZE_MAX); 472 LIBC_NAMESPACE::syscall_impl<long>(SYS_close, fd); 473 if (retval < 0) 474 return -retval; 475 if (retval == NAME_SIZE_MAX) 476 return ERANGE; 477 if (name_buffer[retval - 1] == '\n') 478 name_buffer[retval - 1] = '\0'; 479 else 480 name_buffer[retval] = '\0'; 481 name << name_buffer << cpp::StringStream::ENDS; 482 return 0; 483 } 484 485 void thread_exit(ThreadReturnValue retval, ThreadStyle style) { 486 auto attrib = self.attrib; 487 488 // The very first thing we do is to call the thread's atexit callbacks. 489 // These callbacks could be the ones registered by the language runtimes, 490 // for example, the destructors of thread local objects. They can also 491 // be destructors of the TSS objects set using API like pthread_setspecific. 492 // NOTE: We cannot call the atexit callbacks as part of the 493 // cleanup_thread_resources function as that function can be called from a 494 // different thread. The destructors of thread local and TSS objects should 495 // be called by the thread which owns them. 496 internal::call_atexit_callbacks(attrib); 497 498 uint32_t joinable_state = uint32_t(DetachState::JOINABLE); 499 if (!attrib->detach_state.compare_exchange_strong( 500 joinable_state, uint32_t(DetachState::EXITING))) { 501 // Thread is detached so cleanup the resources. 502 cleanup_thread_resources(attrib); 503 504 // Set the CLEAR_TID address to nullptr to prevent the kernel 505 // from signalling at a non-existent futex location. 506 LIBC_NAMESPACE::syscall_impl<long>(SYS_set_tid_address, 0); 507 // Return value for detached thread should be unused. We need to avoid 508 // referencing `style` or `retval.*` because they may be stored on the stack 509 // and we have deallocated our stack! 510 LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 0); 511 __builtin_unreachable(); 512 } 513 514 if (style == ThreadStyle::POSIX) 515 LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, retval.posix_retval); 516 else 517 LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, retval.stdc_retval); 518 __builtin_unreachable(); 519 } 520 521 } // namespace LIBC_NAMESPACE_DECL 522