1ca10bc4fSJoseph Huber //===-- GPU implementation of the nanosleep function ----------------------===// 2ca10bc4fSJoseph Huber // 3ca10bc4fSJoseph Huber // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4ca10bc4fSJoseph Huber // See https://llvm.org/LICENSE.txt for license information. 5ca10bc4fSJoseph Huber // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6ca10bc4fSJoseph Huber // 7ca10bc4fSJoseph Huber //===----------------------------------------------------------------------===// 8ca10bc4fSJoseph Huber 9ca10bc4fSJoseph Huber #include "src/time/nanosleep.h" 10ca10bc4fSJoseph Huber 11*ac604b2fSJoseph Huber #include "src/__support/common.h" 125ff3ff33SPetr Hosek #include "src/__support/macros/config.h" 13e6cf5d28SSchrodinger ZHU Yifan #include "src/__support/time/gpu/time_utils.h" 14ca10bc4fSJoseph Huber 155ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 16ca10bc4fSJoseph Huber 17ca10bc4fSJoseph Huber LLVM_LIBC_FUNCTION(int, nanosleep, 18ca10bc4fSJoseph Huber (const struct timespec *req, struct timespec *rem)) { 19ca10bc4fSJoseph Huber if (!GPU_CLOCKS_PER_SEC || !req) 20ca10bc4fSJoseph Huber return -1; 21ca10bc4fSJoseph Huber 221dacfd11SJoseph Huber uint64_t nsecs = req->tv_nsec + req->tv_sec * TICKS_PER_SEC; 231dacfd11SJoseph Huber uint64_t tick_rate = TICKS_PER_SEC / GPU_CLOCKS_PER_SEC; 24ca10bc4fSJoseph Huber 25ca10bc4fSJoseph Huber uint64_t start = gpu::fixed_frequency_clock(); 2663198e06SJoseph Huber #if defined(LIBC_TARGET_ARCH_IS_NVPTX) 271dacfd11SJoseph Huber uint64_t end = start + (nsecs + tick_rate - 1) / tick_rate; 28ca10bc4fSJoseph Huber uint64_t cur = gpu::fixed_frequency_clock(); 29ca10bc4fSJoseph Huber // The NVPTX architecture supports sleeping and guaruntees the actual time 30ca10bc4fSJoseph Huber // slept will be somewhere between zero and twice the requested amount. Here 31ca10bc4fSJoseph Huber // we will sleep again if we undershot the time. 32ca10bc4fSJoseph Huber while (cur < end) { 3363198e06SJoseph Huber if (__nvvm_reflect("__CUDA_ARCH") >= 700) 3463198e06SJoseph Huber LIBC_INLINE_ASM("nanosleep.u32 %0;" ::"r"(nsecs)); 35ca10bc4fSJoseph Huber cur = gpu::fixed_frequency_clock(); 36ca10bc4fSJoseph Huber nsecs -= nsecs > cur - start ? cur - start : 0; 37ca10bc4fSJoseph Huber } 38ca10bc4fSJoseph Huber #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU) 391dacfd11SJoseph Huber uint64_t end = start + (nsecs + tick_rate - 1) / tick_rate; 40ca10bc4fSJoseph Huber uint64_t cur = gpu::fixed_frequency_clock(); 41ca10bc4fSJoseph Huber // The AMDGPU architecture does not provide a sleep implementation with a 42ca10bc4fSJoseph Huber // known delay so we simply repeatedly sleep with a large value of ~960 clock 43ca10bc4fSJoseph Huber // cycles and check until we've passed the time using the known frequency. 44ca10bc4fSJoseph Huber __builtin_amdgcn_s_sleep(2); 45ca10bc4fSJoseph Huber while (cur < end) { 46ca10bc4fSJoseph Huber __builtin_amdgcn_s_sleep(15); 47ca10bc4fSJoseph Huber cur = gpu::fixed_frequency_clock(); 48ca10bc4fSJoseph Huber } 49ca10bc4fSJoseph Huber #else 50ca10bc4fSJoseph Huber // Sleeping is not supported. 51ca10bc4fSJoseph Huber if (rem) { 52ca10bc4fSJoseph Huber rem->tv_sec = req->tv_sec; 53ca10bc4fSJoseph Huber rem->tv_nsec = req->tv_nsec; 54ca10bc4fSJoseph Huber } 55ca10bc4fSJoseph Huber return -1; 56ca10bc4fSJoseph Huber #endif 57ca10bc4fSJoseph Huber uint64_t stop = gpu::fixed_frequency_clock(); 58ca10bc4fSJoseph Huber 59ca10bc4fSJoseph Huber // Check to make sure we slept for at least the desired duration and set the 60ca10bc4fSJoseph Huber // remaining time if not. 611dacfd11SJoseph Huber uint64_t elapsed = (stop - start) * tick_rate; 62ca10bc4fSJoseph Huber if (elapsed < nsecs) { 63ca10bc4fSJoseph Huber if (rem) { 641dacfd11SJoseph Huber rem->tv_sec = (nsecs - elapsed) / TICKS_PER_SEC; 651dacfd11SJoseph Huber rem->tv_nsec = (nsecs - elapsed) % TICKS_PER_SEC; 66ca10bc4fSJoseph Huber } 67ca10bc4fSJoseph Huber return -1; 68ca10bc4fSJoseph Huber } 69ca10bc4fSJoseph Huber 70ca10bc4fSJoseph Huber return 0; 71ca10bc4fSJoseph Huber } 72ca10bc4fSJoseph Huber 735ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 74