xref: /llvm-project/libc/src/time/gpu/nanosleep.cpp (revision ac604b2fa6ff0344a555954069721c0db7b874f9)
1ca10bc4fSJoseph Huber //===-- GPU implementation of the nanosleep function ----------------------===//
2ca10bc4fSJoseph Huber //
3ca10bc4fSJoseph Huber // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4ca10bc4fSJoseph Huber // See https://llvm.org/LICENSE.txt for license information.
5ca10bc4fSJoseph Huber // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ca10bc4fSJoseph Huber //
7ca10bc4fSJoseph Huber //===----------------------------------------------------------------------===//
8ca10bc4fSJoseph Huber 
9ca10bc4fSJoseph Huber #include "src/time/nanosleep.h"
10ca10bc4fSJoseph Huber 
11*ac604b2fSJoseph Huber #include "src/__support/common.h"
125ff3ff33SPetr Hosek #include "src/__support/macros/config.h"
13e6cf5d28SSchrodinger ZHU Yifan #include "src/__support/time/gpu/time_utils.h"
14ca10bc4fSJoseph Huber 
155ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL {
16ca10bc4fSJoseph Huber 
17ca10bc4fSJoseph Huber LLVM_LIBC_FUNCTION(int, nanosleep,
18ca10bc4fSJoseph Huber                    (const struct timespec *req, struct timespec *rem)) {
19ca10bc4fSJoseph Huber   if (!GPU_CLOCKS_PER_SEC || !req)
20ca10bc4fSJoseph Huber     return -1;
21ca10bc4fSJoseph Huber 
221dacfd11SJoseph Huber   uint64_t nsecs = req->tv_nsec + req->tv_sec * TICKS_PER_SEC;
231dacfd11SJoseph Huber   uint64_t tick_rate = TICKS_PER_SEC / GPU_CLOCKS_PER_SEC;
24ca10bc4fSJoseph Huber 
25ca10bc4fSJoseph Huber   uint64_t start = gpu::fixed_frequency_clock();
2663198e06SJoseph Huber #if defined(LIBC_TARGET_ARCH_IS_NVPTX)
271dacfd11SJoseph Huber   uint64_t end = start + (nsecs + tick_rate - 1) / tick_rate;
28ca10bc4fSJoseph Huber   uint64_t cur = gpu::fixed_frequency_clock();
29ca10bc4fSJoseph Huber   // The NVPTX architecture supports sleeping and guaruntees the actual time
30ca10bc4fSJoseph Huber   // slept will be somewhere between zero and twice the requested amount. Here
31ca10bc4fSJoseph Huber   // we will sleep again if we undershot the time.
32ca10bc4fSJoseph Huber   while (cur < end) {
3363198e06SJoseph Huber     if (__nvvm_reflect("__CUDA_ARCH") >= 700)
3463198e06SJoseph Huber       LIBC_INLINE_ASM("nanosleep.u32 %0;" ::"r"(nsecs));
35ca10bc4fSJoseph Huber     cur = gpu::fixed_frequency_clock();
36ca10bc4fSJoseph Huber     nsecs -= nsecs > cur - start ? cur - start : 0;
37ca10bc4fSJoseph Huber   }
38ca10bc4fSJoseph Huber #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
391dacfd11SJoseph Huber   uint64_t end = start + (nsecs + tick_rate - 1) / tick_rate;
40ca10bc4fSJoseph Huber   uint64_t cur = gpu::fixed_frequency_clock();
41ca10bc4fSJoseph Huber   // The AMDGPU architecture does not provide a sleep implementation with a
42ca10bc4fSJoseph Huber   // known delay so we simply repeatedly sleep with a large value of ~960 clock
43ca10bc4fSJoseph Huber   // cycles and check until we've passed the time using the known frequency.
44ca10bc4fSJoseph Huber   __builtin_amdgcn_s_sleep(2);
45ca10bc4fSJoseph Huber   while (cur < end) {
46ca10bc4fSJoseph Huber     __builtin_amdgcn_s_sleep(15);
47ca10bc4fSJoseph Huber     cur = gpu::fixed_frequency_clock();
48ca10bc4fSJoseph Huber   }
49ca10bc4fSJoseph Huber #else
50ca10bc4fSJoseph Huber   // Sleeping is not supported.
51ca10bc4fSJoseph Huber   if (rem) {
52ca10bc4fSJoseph Huber     rem->tv_sec = req->tv_sec;
53ca10bc4fSJoseph Huber     rem->tv_nsec = req->tv_nsec;
54ca10bc4fSJoseph Huber   }
55ca10bc4fSJoseph Huber   return -1;
56ca10bc4fSJoseph Huber #endif
57ca10bc4fSJoseph Huber   uint64_t stop = gpu::fixed_frequency_clock();
58ca10bc4fSJoseph Huber 
59ca10bc4fSJoseph Huber   // Check to make sure we slept for at least the desired duration and set the
60ca10bc4fSJoseph Huber   // remaining time if not.
611dacfd11SJoseph Huber   uint64_t elapsed = (stop - start) * tick_rate;
62ca10bc4fSJoseph Huber   if (elapsed < nsecs) {
63ca10bc4fSJoseph Huber     if (rem) {
641dacfd11SJoseph Huber       rem->tv_sec = (nsecs - elapsed) / TICKS_PER_SEC;
651dacfd11SJoseph Huber       rem->tv_nsec = (nsecs - elapsed) % TICKS_PER_SEC;
66ca10bc4fSJoseph Huber     }
67ca10bc4fSJoseph Huber     return -1;
68ca10bc4fSJoseph Huber   }
69ca10bc4fSJoseph Huber 
70ca10bc4fSJoseph Huber   return 0;
71ca10bc4fSJoseph Huber }
72ca10bc4fSJoseph Huber 
735ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL
74