167d78e3cSJoseph Huber //===-- Generic device loader interface -----------------------------------===// 267d78e3cSJoseph Huber // 367d78e3cSJoseph Huber // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 467d78e3cSJoseph Huber // See https://llvm.org/LICENSE.txt for license information. 567d78e3cSJoseph Huber // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 667d78e3cSJoseph Huber // 767d78e3cSJoseph Huber //===----------------------------------------------------------------------===// 867d78e3cSJoseph Huber 92bef46d2SJoseph Huber #ifndef LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H 102bef46d2SJoseph Huber #define LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H 112bef46d2SJoseph Huber 1273aab2f6Slntue #include "include/llvm-libc-types/test_rpc_opcodes_t.h" 13d7c20a6fSJoseph Huber 14b4d49fb5SJoseph Huber #include "shared/rpc.h" 15d7c20a6fSJoseph Huber #include "shared/rpc_opcodes.h" 16c381a947SJoseph Huber 17719d77edSJoseph Huber #include <cstddef> 18cd4e8884SJoseph Huber #include <cstdint> 19719d77edSJoseph Huber #include <cstdio> 20719d77edSJoseph Huber #include <cstdlib> 212bef46d2SJoseph Huber #include <cstring> 2267d78e3cSJoseph Huber 23bc11bb3eSJoseph Huber /// Generic launch parameters for configuration the number of blocks / threads. 24bc11bb3eSJoseph Huber struct LaunchParameters { 25bc11bb3eSJoseph Huber uint32_t num_threads_x; 26bc11bb3eSJoseph Huber uint32_t num_threads_y; 27bc11bb3eSJoseph Huber uint32_t num_threads_z; 28bc11bb3eSJoseph Huber uint32_t num_blocks_x; 29bc11bb3eSJoseph Huber uint32_t num_blocks_y; 30bc11bb3eSJoseph Huber uint32_t num_blocks_z; 31bc11bb3eSJoseph Huber }; 32bc11bb3eSJoseph Huber 33901266daSJoseph Huber /// The arguments to the '_begin' kernel. 34901266daSJoseph Huber struct begin_args_t { 35901266daSJoseph Huber int argc; 36901266daSJoseph Huber void *argv; 37901266daSJoseph Huber void *envp; 38901266daSJoseph Huber }; 39901266daSJoseph Huber 40901266daSJoseph Huber /// The arguments to the '_start' kernel. 41901266daSJoseph Huber struct start_args_t { 42901266daSJoseph Huber int argc; 43901266daSJoseph Huber void *argv; 44901266daSJoseph Huber void *envp; 45901266daSJoseph Huber void *ret; 46901266daSJoseph Huber }; 47901266daSJoseph Huber 48901266daSJoseph Huber /// The arguments to the '_end' kernel. 49901266daSJoseph Huber struct end_args_t { 50901266daSJoseph Huber int argc; 51901266daSJoseph Huber }; 52901266daSJoseph Huber 5367d78e3cSJoseph Huber /// Generic interface to load the \p image and launch execution of the _start 5467d78e3cSJoseph Huber /// kernel on the target device. Copies \p argc and \p argv to the device. 5567d78e3cSJoseph Huber /// Returns the final value of the `main` function on the device. 565e326983SJoseph Huber int load(int argc, const char **argv, const char **evnp, void *image, 575e326983SJoseph Huber size_t size, const LaunchParameters ¶ms, 585e326983SJoseph Huber bool print_resource_usage); 592bef46d2SJoseph Huber 60507edb52SJoseph Huber /// Return \p V aligned "upwards" according to \p Align. 61507edb52SJoseph Huber template <typename V, typename A> inline V align_up(V val, A align) { 62507edb52SJoseph Huber return ((val + V(align) - 1) / V(align)) * V(align); 63507edb52SJoseph Huber } 64507edb52SJoseph Huber 652bef46d2SJoseph Huber /// Copy the system's argument vector to GPU memory allocated using \p alloc. 662bef46d2SJoseph Huber template <typename Allocator> 675e326983SJoseph Huber void *copy_argument_vector(int argc, const char **argv, Allocator alloc) { 68cd4e8884SJoseph Huber size_t argv_size = sizeof(char *) * (argc + 1); 69cd4e8884SJoseph Huber size_t str_size = 0; 70cd4e8884SJoseph Huber for (int i = 0; i < argc; ++i) 71cd4e8884SJoseph Huber str_size += strlen(argv[i]) + 1; 72cd4e8884SJoseph Huber 73cd4e8884SJoseph Huber // We allocate enough space for a null terminated array and all the strings. 74cd4e8884SJoseph Huber void *dev_argv = alloc(argv_size + str_size); 75cd4e8884SJoseph Huber if (!dev_argv) 762bef46d2SJoseph Huber return nullptr; 772bef46d2SJoseph Huber 78cd4e8884SJoseph Huber // Store the strings linerally in the same memory buffer. 79cd4e8884SJoseph Huber void *dev_str = reinterpret_cast<uint8_t *>(dev_argv) + argv_size; 802bef46d2SJoseph Huber for (int i = 0; i < argc; ++i) { 812bef46d2SJoseph Huber size_t size = strlen(argv[i]) + 1; 822bef46d2SJoseph Huber std::memcpy(dev_str, argv[i], size); 832bef46d2SJoseph Huber static_cast<void **>(dev_argv)[i] = dev_str; 84cd4e8884SJoseph Huber dev_str = reinterpret_cast<uint8_t *>(dev_str) + size; 852bef46d2SJoseph Huber } 86cd4e8884SJoseph Huber 87cd4e8884SJoseph Huber // Ensure the vector is null terminated. 882e9f15e1SJoseph Huber reinterpret_cast<void **>(dev_argv)[argc] = nullptr; 892bef46d2SJoseph Huber return dev_argv; 90033dbbe4SJoseph Huber } 912bef46d2SJoseph Huber 922bef46d2SJoseph Huber /// Copy the system's environment to GPU memory allocated using \p alloc. 932bef46d2SJoseph Huber template <typename Allocator> 945e326983SJoseph Huber void *copy_environment(const char **envp, Allocator alloc) { 952bef46d2SJoseph Huber int envc = 0; 965e326983SJoseph Huber for (const char **env = envp; *env != 0; ++env) 972bef46d2SJoseph Huber ++envc; 982bef46d2SJoseph Huber 992bef46d2SJoseph Huber return copy_argument_vector(envc, envp, alloc); 100033dbbe4SJoseph Huber } 1012bef46d2SJoseph Huber 1025849cbadSJoseph Huber inline void handle_error_impl(const char *file, int32_t line, const char *msg) { 1035849cbadSJoseph Huber fprintf(stderr, "%s:%d:0: Error: %s\n", file, line, msg); 104719d77edSJoseph Huber exit(EXIT_FAILURE); 105719d77edSJoseph Huber } 1065849cbadSJoseph Huber #define handle_error(X) handle_error_impl(__FILE__, __LINE__, X) 107719d77edSJoseph Huber 108b4d49fb5SJoseph Huber template <uint32_t num_lanes, typename Alloc, typename Free> 109b4d49fb5SJoseph Huber inline uint32_t handle_server(rpc::Server &server, uint32_t index, 110b4d49fb5SJoseph Huber Alloc &&alloc, Free &&free) { 111b4d49fb5SJoseph Huber auto port = server.try_open(num_lanes, index); 112b4d49fb5SJoseph Huber if (!port) 113b4d49fb5SJoseph Huber return 0; 114b4d49fb5SJoseph Huber index = port->get_index() + 1; 115b4d49fb5SJoseph Huber 116*e85a9f55SJinsong Ji int status = rpc::RPC_SUCCESS; 117b4d49fb5SJoseph Huber switch (port->get_opcode()) { 118b4d49fb5SJoseph Huber case RPC_TEST_INCREMENT: { 119b4d49fb5SJoseph Huber port->recv_and_send([](rpc::Buffer *buffer, uint32_t) { 120c381a947SJoseph Huber reinterpret_cast<uint64_t *>(buffer->data)[0] += 1; 121b4d49fb5SJoseph Huber }); 122b4d49fb5SJoseph Huber break; 123b4d49fb5SJoseph Huber } 124b4d49fb5SJoseph Huber case RPC_TEST_INTERFACE: { 125c381a947SJoseph Huber bool end_with_recv; 126b4d49fb5SJoseph Huber uint64_t cnt; 127b4d49fb5SJoseph Huber port->recv([&](rpc::Buffer *buffer, uint32_t) { 128b4d49fb5SJoseph Huber end_with_recv = buffer->data[0]; 129b4d49fb5SJoseph Huber }); 130b4d49fb5SJoseph Huber port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); 131b4d49fb5SJoseph Huber port->send([&](rpc::Buffer *buffer, uint32_t) { 132c381a947SJoseph Huber buffer->data[0] = cnt = cnt + 1; 133b4d49fb5SJoseph Huber }); 134b4d49fb5SJoseph Huber port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); 135b4d49fb5SJoseph Huber port->send([&](rpc::Buffer *buffer, uint32_t) { 136c381a947SJoseph Huber buffer->data[0] = cnt = cnt + 1; 137b4d49fb5SJoseph Huber }); 138b4d49fb5SJoseph Huber port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); 139b4d49fb5SJoseph Huber port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); 140b4d49fb5SJoseph Huber port->send([&](rpc::Buffer *buffer, uint32_t) { 141c381a947SJoseph Huber buffer->data[0] = cnt = cnt + 1; 142b4d49fb5SJoseph Huber }); 143b4d49fb5SJoseph Huber port->send([&](rpc::Buffer *buffer, uint32_t) { 144c381a947SJoseph Huber buffer->data[0] = cnt = cnt + 1; 145b4d49fb5SJoseph Huber }); 146c381a947SJoseph Huber if (end_with_recv) 147b4d49fb5SJoseph Huber port->recv([&](rpc::Buffer *buffer, uint32_t) { cnt = buffer->data[0]; }); 148c381a947SJoseph Huber else 149b4d49fb5SJoseph Huber port->send([&](rpc::Buffer *buffer, uint32_t) { 150c381a947SJoseph Huber buffer->data[0] = cnt = cnt + 1; 151b4d49fb5SJoseph Huber }); 152c381a947SJoseph Huber 153b4d49fb5SJoseph Huber break; 154b4d49fb5SJoseph Huber } 155b4d49fb5SJoseph Huber case RPC_TEST_STREAM: { 156b4d49fb5SJoseph Huber uint64_t sizes[num_lanes] = {0}; 157b4d49fb5SJoseph Huber void *dst[num_lanes] = {nullptr}; 158b4d49fb5SJoseph Huber port->recv_n(dst, sizes, 159b4d49fb5SJoseph Huber [](uint64_t size) -> void * { return new char[size]; }); 160b4d49fb5SJoseph Huber port->send_n(dst, sizes); 161b4d49fb5SJoseph Huber for (uint64_t i = 0; i < num_lanes; ++i) { 162c381a947SJoseph Huber if (dst[i]) 163c381a947SJoseph Huber delete[] reinterpret_cast<uint8_t *>(dst[i]); 164c381a947SJoseph Huber } 165b4d49fb5SJoseph Huber break; 166b4d49fb5SJoseph Huber } 167b4d49fb5SJoseph Huber case RPC_TEST_NOOP: { 168b4d49fb5SJoseph Huber port->recv([&](rpc::Buffer *, uint32_t) {}); 169b4d49fb5SJoseph Huber break; 170b4d49fb5SJoseph Huber } 171a6ef0debSJoseph Huber case LIBC_MALLOC: { 172b4d49fb5SJoseph Huber port->recv_and_send([&](rpc::Buffer *buffer, uint32_t) { 173b4d49fb5SJoseph Huber buffer->data[0] = reinterpret_cast<uintptr_t>(alloc(buffer->data[0])); 174b4d49fb5SJoseph Huber }); 175b4d49fb5SJoseph Huber break; 176b4d49fb5SJoseph Huber } 177a6ef0debSJoseph Huber case LIBC_FREE: { 178b4d49fb5SJoseph Huber port->recv([&](rpc::Buffer *buffer, uint32_t) { 179b4d49fb5SJoseph Huber free(reinterpret_cast<void *>(buffer->data[0])); 180b4d49fb5SJoseph Huber }); 181b4d49fb5SJoseph Huber break; 182b4d49fb5SJoseph Huber } 183b4d49fb5SJoseph Huber default: 1841d810eceSJoseph Huber status = handle_libc_opcodes(*port, num_lanes); 185b4d49fb5SJoseph Huber break; 186b4d49fb5SJoseph Huber } 187b4d49fb5SJoseph Huber 188b4d49fb5SJoseph Huber // Handle all of the `libc` specific opcodes. 189*e85a9f55SJinsong Ji if (status != rpc::RPC_SUCCESS) 190b4d49fb5SJoseph Huber handle_error("Error handling RPC server"); 191b4d49fb5SJoseph Huber 192b4d49fb5SJoseph Huber port->close(); 193b4d49fb5SJoseph Huber 194b4d49fb5SJoseph Huber return index; 195c381a947SJoseph Huber } 196c381a947SJoseph Huber 1972bef46d2SJoseph Huber #endif 198