xref: /llvm-project/libc/utils/gpu/loader/Loader.h (revision e85a9f5540f5399b20a32c8d87474e6fc906ad33)
167d78e3cSJoseph Huber //===-- Generic device loader interface -----------------------------------===//
267d78e3cSJoseph Huber //
367d78e3cSJoseph Huber // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
467d78e3cSJoseph Huber // See https://llvm.org/LICENSE.txt for license information.
567d78e3cSJoseph Huber // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
667d78e3cSJoseph Huber //
767d78e3cSJoseph Huber //===----------------------------------------------------------------------===//
867d78e3cSJoseph Huber 
92bef46d2SJoseph Huber #ifndef LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H
102bef46d2SJoseph Huber #define LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H
112bef46d2SJoseph Huber 
1273aab2f6Slntue #include "include/llvm-libc-types/test_rpc_opcodes_t.h"
13d7c20a6fSJoseph Huber 
14b4d49fb5SJoseph Huber #include "shared/rpc.h"
15d7c20a6fSJoseph Huber #include "shared/rpc_opcodes.h"
16c381a947SJoseph Huber 
17719d77edSJoseph Huber #include <cstddef>
18cd4e8884SJoseph Huber #include <cstdint>
19719d77edSJoseph Huber #include <cstdio>
20719d77edSJoseph Huber #include <cstdlib>
212bef46d2SJoseph Huber #include <cstring>
2267d78e3cSJoseph Huber 
23bc11bb3eSJoseph Huber /// Generic launch parameters for configuration the number of blocks / threads.
24bc11bb3eSJoseph Huber struct LaunchParameters {
25bc11bb3eSJoseph Huber   uint32_t num_threads_x;
26bc11bb3eSJoseph Huber   uint32_t num_threads_y;
27bc11bb3eSJoseph Huber   uint32_t num_threads_z;
28bc11bb3eSJoseph Huber   uint32_t num_blocks_x;
29bc11bb3eSJoseph Huber   uint32_t num_blocks_y;
30bc11bb3eSJoseph Huber   uint32_t num_blocks_z;
31bc11bb3eSJoseph Huber };
32bc11bb3eSJoseph Huber 
33901266daSJoseph Huber /// The arguments to the '_begin' kernel.
34901266daSJoseph Huber struct begin_args_t {
35901266daSJoseph Huber   int argc;
36901266daSJoseph Huber   void *argv;
37901266daSJoseph Huber   void *envp;
38901266daSJoseph Huber };
39901266daSJoseph Huber 
40901266daSJoseph Huber /// The arguments to the '_start' kernel.
41901266daSJoseph Huber struct start_args_t {
42901266daSJoseph Huber   int argc;
43901266daSJoseph Huber   void *argv;
44901266daSJoseph Huber   void *envp;
45901266daSJoseph Huber   void *ret;
46901266daSJoseph Huber };
47901266daSJoseph Huber 
48901266daSJoseph Huber /// The arguments to the '_end' kernel.
49901266daSJoseph Huber struct end_args_t {
50901266daSJoseph Huber   int argc;
51901266daSJoseph Huber };
52901266daSJoseph Huber 
5367d78e3cSJoseph Huber /// Generic interface to load the \p image and launch execution of the _start
5467d78e3cSJoseph Huber /// kernel on the target device. Copies \p argc and \p argv to the device.
5567d78e3cSJoseph Huber /// Returns the final value of the `main` function on the device.
565e326983SJoseph Huber int load(int argc, const char **argv, const char **evnp, void *image,
575e326983SJoseph Huber          size_t size, const LaunchParameters &params,
585e326983SJoseph Huber          bool print_resource_usage);
592bef46d2SJoseph Huber 
60507edb52SJoseph Huber /// Return \p V aligned "upwards" according to \p Align.
61507edb52SJoseph Huber template <typename V, typename A> inline V align_up(V val, A align) {
62507edb52SJoseph Huber   return ((val + V(align) - 1) / V(align)) * V(align);
63507edb52SJoseph Huber }
64507edb52SJoseph Huber 
652bef46d2SJoseph Huber /// Copy the system's argument vector to GPU memory allocated using \p alloc.
662bef46d2SJoseph Huber template <typename Allocator>
675e326983SJoseph Huber void *copy_argument_vector(int argc, const char **argv, Allocator alloc) {
68cd4e8884SJoseph Huber   size_t argv_size = sizeof(char *) * (argc + 1);
69cd4e8884SJoseph Huber   size_t str_size = 0;
70cd4e8884SJoseph Huber   for (int i = 0; i < argc; ++i)
71cd4e8884SJoseph Huber     str_size += strlen(argv[i]) + 1;
72cd4e8884SJoseph Huber 
73cd4e8884SJoseph Huber   // We allocate enough space for a null terminated array and all the strings.
74cd4e8884SJoseph Huber   void *dev_argv = alloc(argv_size + str_size);
75cd4e8884SJoseph Huber   if (!dev_argv)
762bef46d2SJoseph Huber     return nullptr;
772bef46d2SJoseph Huber 
78cd4e8884SJoseph Huber   // Store the strings linerally in the same memory buffer.
79cd4e8884SJoseph Huber   void *dev_str = reinterpret_cast<uint8_t *>(dev_argv) + argv_size;
802bef46d2SJoseph Huber   for (int i = 0; i < argc; ++i) {
812bef46d2SJoseph Huber     size_t size = strlen(argv[i]) + 1;
822bef46d2SJoseph Huber     std::memcpy(dev_str, argv[i], size);
832bef46d2SJoseph Huber     static_cast<void **>(dev_argv)[i] = dev_str;
84cd4e8884SJoseph Huber     dev_str = reinterpret_cast<uint8_t *>(dev_str) + size;
852bef46d2SJoseph Huber   }
86cd4e8884SJoseph Huber 
87cd4e8884SJoseph Huber   // Ensure the vector is null terminated.
882e9f15e1SJoseph Huber   reinterpret_cast<void **>(dev_argv)[argc] = nullptr;
892bef46d2SJoseph Huber   return dev_argv;
90033dbbe4SJoseph Huber }
912bef46d2SJoseph Huber 
922bef46d2SJoseph Huber /// Copy the system's environment to GPU memory allocated using \p alloc.
932bef46d2SJoseph Huber template <typename Allocator>
945e326983SJoseph Huber void *copy_environment(const char **envp, Allocator alloc) {
952bef46d2SJoseph Huber   int envc = 0;
965e326983SJoseph Huber   for (const char **env = envp; *env != 0; ++env)
972bef46d2SJoseph Huber     ++envc;
982bef46d2SJoseph Huber 
992bef46d2SJoseph Huber   return copy_argument_vector(envc, envp, alloc);
100033dbbe4SJoseph Huber }
1012bef46d2SJoseph Huber 
1025849cbadSJoseph Huber inline void handle_error_impl(const char *file, int32_t line, const char *msg) {
1035849cbadSJoseph Huber   fprintf(stderr, "%s:%d:0: Error: %s\n", file, line, msg);
104719d77edSJoseph Huber   exit(EXIT_FAILURE);
105719d77edSJoseph Huber }
1065849cbadSJoseph Huber #define handle_error(X) handle_error_impl(__FILE__, __LINE__, X)
107719d77edSJoseph Huber 
108b4d49fb5SJoseph Huber template <uint32_t num_lanes, typename Alloc, typename Free>
109b4d49fb5SJoseph Huber inline uint32_t handle_server(rpc::Server &server, uint32_t index,
110b4d49fb5SJoseph Huber                               Alloc &&alloc, Free &&free) {
111b4d49fb5SJoseph Huber   auto port = server.try_open(num_lanes, index);
112b4d49fb5SJoseph Huber   if (!port)
113b4d49fb5SJoseph Huber     return 0;
114b4d49fb5SJoseph Huber   index = port->get_index() + 1;
115b4d49fb5SJoseph Huber 
116*e85a9f55SJinsong Ji   int status = rpc::RPC_SUCCESS;
117b4d49fb5SJoseph Huber   switch (port->get_opcode()) {
118b4d49fb5SJoseph Huber   case RPC_TEST_INCREMENT: {
119b4d49fb5SJoseph Huber     port->recv_and_send([](rpc::Buffer *buffer, uint32_t) {
120c381a947SJoseph Huber       reinterpret_cast<uint64_t *>(buffer->data)[0] += 1;
121b4d49fb5SJoseph Huber     });
122b4d49fb5SJoseph Huber     break;
123b4d49fb5SJoseph Huber   }
124b4d49fb5SJoseph Huber   case RPC_TEST_INTERFACE: {
125c381a947SJoseph Huber     bool end_with_recv;
126b4d49fb5SJoseph Huber     uint64_t cnt;
127b4d49fb5SJoseph Huber     port->recv([&](rpc::Buffer *buffer, uint32_t) {
128b4d49fb5SJoseph Huber       end_with_recv = buffer->data[0];
129b4d49fb5SJoseph Huber     });
130b4d49fb5SJoseph Huber     port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; });
131b4d49fb5SJoseph Huber     port->send([&](rpc::Buffer *buffer, uint32_t) {
132c381a947SJoseph Huber       buffer->data[0] = cnt = cnt + 1;
133b4d49fb5SJoseph Huber     });
134b4d49fb5SJoseph Huber     port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; });
135b4d49fb5SJoseph Huber     port->send([&](rpc::Buffer *buffer, uint32_t) {
136c381a947SJoseph Huber       buffer->data[0] = cnt = cnt + 1;
137b4d49fb5SJoseph Huber     });
138b4d49fb5SJoseph Huber     port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; });
139b4d49fb5SJoseph Huber     port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; });
140b4d49fb5SJoseph Huber     port->send([&](rpc::Buffer *buffer, uint32_t) {
141c381a947SJoseph Huber       buffer->data[0] = cnt = cnt + 1;
142b4d49fb5SJoseph Huber     });
143b4d49fb5SJoseph Huber     port->send([&](rpc::Buffer *buffer, uint32_t) {
144c381a947SJoseph Huber       buffer->data[0] = cnt = cnt + 1;
145b4d49fb5SJoseph Huber     });
146c381a947SJoseph Huber     if (end_with_recv)
147b4d49fb5SJoseph Huber       port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; });
148c381a947SJoseph Huber     else
149b4d49fb5SJoseph Huber       port->send([&](rpc::Buffer *buffer, uint32_t) {
150c381a947SJoseph Huber         buffer->data[0] = cnt = cnt + 1;
151b4d49fb5SJoseph Huber       });
152c381a947SJoseph Huber 
153b4d49fb5SJoseph Huber     break;
154b4d49fb5SJoseph Huber   }
155b4d49fb5SJoseph Huber   case RPC_TEST_STREAM: {
156b4d49fb5SJoseph Huber     uint64_t sizes[num_lanes] = {0};
157b4d49fb5SJoseph Huber     void *dst[num_lanes] = {nullptr};
158b4d49fb5SJoseph Huber     port->recv_n(dst, sizes,
159b4d49fb5SJoseph Huber                  [](uint64_t size) -> void * { return new char[size]; });
160b4d49fb5SJoseph Huber     port->send_n(dst, sizes);
161b4d49fb5SJoseph Huber     for (uint64_t i = 0; i < num_lanes; ++i) {
162c381a947SJoseph Huber       if (dst[i])
163c381a947SJoseph Huber         delete[] reinterpret_cast<uint8_t *>(dst[i]);
164c381a947SJoseph Huber     }
165b4d49fb5SJoseph Huber     break;
166b4d49fb5SJoseph Huber   }
167b4d49fb5SJoseph Huber   case RPC_TEST_NOOP: {
168b4d49fb5SJoseph Huber     port->recv([&](rpc::Buffer *, uint32_t) {});
169b4d49fb5SJoseph Huber     break;
170b4d49fb5SJoseph Huber   }
171a6ef0debSJoseph Huber   case LIBC_MALLOC: {
172b4d49fb5SJoseph Huber     port->recv_and_send([&](rpc::Buffer *buffer, uint32_t) {
173b4d49fb5SJoseph Huber       buffer->data[0] = reinterpret_cast<uintptr_t>(alloc(buffer->data[0]));
174b4d49fb5SJoseph Huber     });
175b4d49fb5SJoseph Huber     break;
176b4d49fb5SJoseph Huber   }
177a6ef0debSJoseph Huber   case LIBC_FREE: {
178b4d49fb5SJoseph Huber     port->recv([&](rpc::Buffer *buffer, uint32_t) {
179b4d49fb5SJoseph Huber       free(reinterpret_cast<void *>(buffer->data[0]));
180b4d49fb5SJoseph Huber     });
181b4d49fb5SJoseph Huber     break;
182b4d49fb5SJoseph Huber   }
183b4d49fb5SJoseph Huber   default:
1841d810eceSJoseph Huber     status = handle_libc_opcodes(*port, num_lanes);
185b4d49fb5SJoseph Huber     break;
186b4d49fb5SJoseph Huber   }
187b4d49fb5SJoseph Huber 
188b4d49fb5SJoseph Huber   // Handle all of the `libc` specific opcodes.
189*e85a9f55SJinsong Ji   if (status != rpc::RPC_SUCCESS)
190b4d49fb5SJoseph Huber     handle_error("Error handling RPC server");
191b4d49fb5SJoseph Huber 
192b4d49fb5SJoseph Huber   port->close();
193b4d49fb5SJoseph Huber 
194b4d49fb5SJoseph Huber   return index;
195c381a947SJoseph Huber }
196c381a947SJoseph Huber 
1972bef46d2SJoseph Huber #endif
198