xref: /llvm-project/offload/src/KernelLanguage/API.cpp (revision 80525dfcde5bf8aae6ab6b0810124ba502de6096)
1 //===------ API.cpp - Kernel Language (CUDA/HIP) entry points ----- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #include "Shared/APITypes.h"
12 
13 #include <cstdio>
14 
15 struct dim3 {
16   unsigned x = 0, y = 0, z = 0;
17 };
18 
19 struct __omp_kernel_t {
20   dim3 __grid_size;
21   dim3 __block_size;
22   size_t __shared_memory;
23 
24   void *__stream;
25 };
26 
27 static __omp_kernel_t __current_kernel = {};
28 #pragma omp threadprivate(__current_kernel);
29 
30 extern "C" {
31 
32 // TODO: There is little reason we need to keep these names or the way calls are
33 // issued. For now we do to avoid modifying Clang's CUDA codegen. Unclear when
34 // we actually need to push/pop configurations.
35 unsigned __llvmPushCallConfiguration(dim3 __grid_size, dim3 __block_size,
36                                      size_t __shared_memory, void *__stream) {
37   __omp_kernel_t &__kernel = __current_kernel;
38   __kernel.__grid_size = __grid_size;
39   __kernel.__block_size = __block_size;
40   __kernel.__shared_memory = __shared_memory;
41   __kernel.__stream = __stream;
42   return 0;
43 }
44 
45 unsigned __llvmPopCallConfiguration(dim3 *__grid_size, dim3 *__block_size,
46                                     size_t *__shared_memory, void *__stream) {
47   __omp_kernel_t &__kernel = __current_kernel;
48   *__grid_size = __kernel.__grid_size;
49   *__block_size = __kernel.__block_size;
50   *__shared_memory = __kernel.__shared_memory;
51   *((void **)__stream) = __kernel.__stream;
52   return 0;
53 }
54 
55 int __tgt_target_kernel(void *Loc, int64_t DeviceId, int32_t NumTeams,
56                         int32_t ThreadLimit, const void *HostPtr,
57                         KernelArgsTy *Args);
58 
59 unsigned llvmLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
60                           void *args, size_t sharedMem, void *stream) {
61   KernelArgsTy Args = {};
62   Args.DynCGroupMem = sharedMem;
63   Args.NumTeams[0] = gridDim.x;
64   Args.NumTeams[1] = gridDim.y;
65   Args.NumTeams[2] = gridDim.z;
66   Args.ThreadLimit[0] = blockDim.x;
67   Args.ThreadLimit[1] = blockDim.y;
68   Args.ThreadLimit[2] = blockDim.z;
69   Args.ArgPtrs = reinterpret_cast<void **>(args);
70   Args.Flags.IsCUDA = true;
71   return __tgt_target_kernel(nullptr, 0, gridDim.x, blockDim.x, func, &Args);
72 }
73 }
74