xref: /llvm-project/libc/startup/gpu/amdgpu/start.cpp (revision 666a3f4ed4f62a9b1b732dae6a34a66d31217563)
1fa34b9e0SJoseph Huber //===-- Implementation of crt for amdgpu ----------------------------------===//
2fa34b9e0SJoseph Huber //
3fa34b9e0SJoseph Huber // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fa34b9e0SJoseph Huber // See https://llvm.org/LICENSE.txt for license information.
5fa34b9e0SJoseph Huber // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fa34b9e0SJoseph Huber //
7fa34b9e0SJoseph Huber //===----------------------------------------------------------------------===//
8fa34b9e0SJoseph Huber 
91a92cc5aSJoseph Huber #include "config/gpu/app.h"
1050445dffSJoseph Huber #include "src/__support/GPU/utils.h"
118e4f9b1fSJoseph Huber #include "src/__support/RPC/rpc_client.h"
125ff3ff33SPetr Hosek #include "src/__support/macros/config.h"
131b823abeSJoseph Huber #include "src/stdlib/atexit.h"
141b823abeSJoseph Huber #include "src/stdlib/exit.h"
158e4f9b1fSJoseph Huber 
16b7c7dbd4SSchrodinger ZHU Yifan extern "C" int main(int argc, char **argv, char **envp);
17fa34b9e0SJoseph Huber 
185ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL {
1950445dffSJoseph Huber 
20*666a3f4eSJoseph Huber // FIXME: Factor this out into common logic so we don't need to stub it here.
21*666a3f4eSJoseph Huber void teardown_main_tls() {}
22*666a3f4eSJoseph Huber 
231a92cc5aSJoseph Huber DataEnvironment app;
241a92cc5aSJoseph Huber 
251b823abeSJoseph Huber extern "C" uintptr_t __init_array_start[];
261b823abeSJoseph Huber extern "C" uintptr_t __init_array_end[];
271b823abeSJoseph Huber extern "C" uintptr_t __fini_array_start[];
281b823abeSJoseph Huber extern "C" uintptr_t __fini_array_end[];
291b823abeSJoseph Huber 
301b823abeSJoseph Huber using InitCallback = void(int, char **, char **);
311b823abeSJoseph Huber using FiniCallback = void(void);
321b823abeSJoseph Huber 
331b823abeSJoseph Huber static void call_init_array_callbacks(int argc, char **argv, char **env) {
341b823abeSJoseph Huber   size_t init_array_size = __init_array_end - __init_array_start;
351b823abeSJoseph Huber   for (size_t i = 0; i < init_array_size; ++i)
361b823abeSJoseph Huber     reinterpret_cast<InitCallback *>(__init_array_start[i])(argc, argv, env);
371b823abeSJoseph Huber }
3850445dffSJoseph Huber 
391b823abeSJoseph Huber static void call_fini_array_callbacks() {
401b823abeSJoseph Huber   size_t fini_array_size = __fini_array_end - __fini_array_start;
41dc30fa6aSJoseph Huber   for (size_t i = fini_array_size; i > 0; --i)
42dc30fa6aSJoseph Huber     reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
431b823abeSJoseph Huber }
441b823abeSJoseph Huber 
455ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL
46901266daSJoseph Huber 
475c13f9aeSJoseph Huber extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel,
485c13f9aeSJoseph Huber              clang::amdgpu_flat_work_group_size(1, 1),
495c13f9aeSJoseph Huber              clang::amdgpu_max_num_work_groups(1)]] void
5059896c16SJoseph Huber _begin(int argc, char **argv, char **env) {
511a92cc5aSJoseph Huber   __atomic_store_n(&LIBC_NAMESPACE::app.env_ptr,
521a92cc5aSJoseph Huber                    reinterpret_cast<uintptr_t *>(env), __ATOMIC_RELAXED);
531b823abeSJoseph Huber   // We want the fini array callbacks to be run after other atexit
541b823abeSJoseph Huber   // callbacks are run. So, we register them before running the init
551b823abeSJoseph Huber   // array callbacks as they can potentially register their own atexit
561b823abeSJoseph Huber   // callbacks.
57b6bc9d72SGuillaume Chatelet   LIBC_NAMESPACE::atexit(&LIBC_NAMESPACE::call_fini_array_callbacks);
58b6bc9d72SGuillaume Chatelet   LIBC_NAMESPACE::call_init_array_callbacks(argc, argv, env);
591b823abeSJoseph Huber }
601b823abeSJoseph Huber 
61901266daSJoseph Huber extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void
62901266daSJoseph Huber _start(int argc, char **argv, char **envp, int *ret) {
63901266daSJoseph Huber   // Invoke the 'main' function with every active thread that the user launched
64901266daSJoseph Huber   // the _start kernel with.
65901266daSJoseph Huber   __atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED);
6650445dffSJoseph Huber }
6750445dffSJoseph Huber 
685c13f9aeSJoseph Huber extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel,
695c13f9aeSJoseph Huber              clang::amdgpu_flat_work_group_size(1, 1),
705c13f9aeSJoseph Huber              clang::amdgpu_max_num_work_groups(1)]] void
71901266daSJoseph Huber _end(int retval) {
721b823abeSJoseph Huber   // Only a single thread should call `exit` here, the rest should gracefully
731b823abeSJoseph Huber   // return from the kernel. This is so only one thread calls the destructors
741b823abeSJoseph Huber   // registred with 'atexit' above.
75b6bc9d72SGuillaume Chatelet   LIBC_NAMESPACE::exit(retval);
761b823abeSJoseph Huber }
77