xref: /openbsd-src/gnu/llvm/clang/lib/Headers/__clang_cuda_builtin_vars.h (revision a9ac8606c53d55cee9c3a39778b249c51df111ef)
1e5dd7070Spatrick /*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------===
2e5dd7070Spatrick  *
3e5dd7070Spatrick  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e5dd7070Spatrick  * See https://llvm.org/LICENSE.txt for license information.
5e5dd7070Spatrick  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e5dd7070Spatrick  *
7e5dd7070Spatrick  *===-----------------------------------------------------------------------===
8e5dd7070Spatrick  */
9e5dd7070Spatrick 
10e5dd7070Spatrick #ifndef __CUDA_BUILTIN_VARS_H
11e5dd7070Spatrick #define __CUDA_BUILTIN_VARS_H
12e5dd7070Spatrick 
13e5dd7070Spatrick // Forward declares from vector_types.h.
14e5dd7070Spatrick struct uint3;
15e5dd7070Spatrick struct dim3;
16e5dd7070Spatrick 
17e5dd7070Spatrick // The file implements built-in CUDA variables using __declspec(property).
18e5dd7070Spatrick // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx
19e5dd7070Spatrick // All read accesses of built-in variable fields get converted into calls to a
20e5dd7070Spatrick // getter function which in turn calls the appropriate builtin to fetch the
21e5dd7070Spatrick // value.
22e5dd7070Spatrick //
23e5dd7070Spatrick // Example:
24e5dd7070Spatrick //    int x = threadIdx.x;
25e5dd7070Spatrick // IR output:
26e5dd7070Spatrick //  %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3
27e5dd7070Spatrick // PTX output:
28e5dd7070Spatrick //  mov.u32     %r2, %tid.x;
29e5dd7070Spatrick 
30e5dd7070Spatrick #define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC)                                \
31e5dd7070Spatrick   __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD;      \
32e5dd7070Spatrick   static inline __attribute__((always_inline))                                 \
33e5dd7070Spatrick       __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) {     \
34e5dd7070Spatrick     return INTRINSIC;                                                          \
35e5dd7070Spatrick   }
36e5dd7070Spatrick 
37e5dd7070Spatrick #if __cplusplus >= 201103L
38e5dd7070Spatrick #define __DELETE =delete
39e5dd7070Spatrick #else
40e5dd7070Spatrick #define __DELETE
41e5dd7070Spatrick #endif
42e5dd7070Spatrick 
43e5dd7070Spatrick // Make sure nobody can create instances of the special variable types.  nvcc
44e5dd7070Spatrick // also disallows taking address of special variables, so we disable address-of
45e5dd7070Spatrick // operator as well.
46e5dd7070Spatrick #define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName)                            \
47e5dd7070Spatrick   __attribute__((device)) TypeName() __DELETE;                                 \
48e5dd7070Spatrick   __attribute__((device)) TypeName(const TypeName &) __DELETE;                 \
49e5dd7070Spatrick   __attribute__((device)) void operator=(const TypeName &) const __DELETE;     \
50e5dd7070Spatrick   __attribute__((device)) TypeName *operator&() const __DELETE
51e5dd7070Spatrick 
52e5dd7070Spatrick struct __cuda_builtin_threadIdx_t {
53e5dd7070Spatrick   __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());
54e5dd7070Spatrick   __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());
55e5dd7070Spatrick   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());
56e5dd7070Spatrick   // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a
57e5dd7070Spatrick   // uint3).  This function is defined after we pull in vector_types.h.
58*a9ac8606Spatrick   __attribute__((device)) operator dim3() const;
59e5dd7070Spatrick   __attribute__((device)) operator uint3() const;
60*a9ac8606Spatrick 
61e5dd7070Spatrick private:
62e5dd7070Spatrick   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);
63e5dd7070Spatrick };
64e5dd7070Spatrick 
65e5dd7070Spatrick struct __cuda_builtin_blockIdx_t {
66e5dd7070Spatrick   __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());
67e5dd7070Spatrick   __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());
68e5dd7070Spatrick   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());
69e5dd7070Spatrick   // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a
70e5dd7070Spatrick   // uint3).  This function is defined after we pull in vector_types.h.
71*a9ac8606Spatrick   __attribute__((device)) operator dim3() const;
72e5dd7070Spatrick   __attribute__((device)) operator uint3() const;
73*a9ac8606Spatrick 
74e5dd7070Spatrick private:
75e5dd7070Spatrick   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);
76e5dd7070Spatrick };
77e5dd7070Spatrick 
78e5dd7070Spatrick struct __cuda_builtin_blockDim_t {
79e5dd7070Spatrick   __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());
80e5dd7070Spatrick   __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());
81e5dd7070Spatrick   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());
82e5dd7070Spatrick   // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a
83e5dd7070Spatrick   // dim3).  This function is defined after we pull in vector_types.h.
84e5dd7070Spatrick   __attribute__((device)) operator dim3() const;
85*a9ac8606Spatrick   __attribute__((device)) operator uint3() const;
86*a9ac8606Spatrick 
87e5dd7070Spatrick private:
88e5dd7070Spatrick   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);
89e5dd7070Spatrick };
90e5dd7070Spatrick 
91e5dd7070Spatrick struct __cuda_builtin_gridDim_t {
92e5dd7070Spatrick   __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());
93e5dd7070Spatrick   __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());
94e5dd7070Spatrick   __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());
95e5dd7070Spatrick   // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a
96e5dd7070Spatrick   // dim3).  This function is defined after we pull in vector_types.h.
97e5dd7070Spatrick   __attribute__((device)) operator dim3() const;
98*a9ac8606Spatrick   __attribute__((device)) operator uint3() const;
99*a9ac8606Spatrick 
100e5dd7070Spatrick private:
101e5dd7070Spatrick   __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);
102e5dd7070Spatrick };
103e5dd7070Spatrick 
104e5dd7070Spatrick #define __CUDA_BUILTIN_VAR                                                     \
105e5dd7070Spatrick   extern const __attribute__((device)) __attribute__((weak))
106e5dd7070Spatrick __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;
107e5dd7070Spatrick __CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx;
108e5dd7070Spatrick __CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim;
109e5dd7070Spatrick __CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim;
110e5dd7070Spatrick 
111e5dd7070Spatrick // warpSize should translate to read of %WARP_SZ but there's currently no
112e5dd7070Spatrick // builtin to do so. According to PTX v4.2 docs 'to date, all target
113e5dd7070Spatrick // architectures have a WARP_SZ value of 32'.
114e5dd7070Spatrick __attribute__((device)) const int warpSize = 32;
115e5dd7070Spatrick 
116e5dd7070Spatrick #undef __CUDA_DEVICE_BUILTIN
117e5dd7070Spatrick #undef __CUDA_BUILTIN_VAR
118e5dd7070Spatrick #undef __CUDA_DISALLOW_BUILTINVAR_ACCESS
119*a9ac8606Spatrick #undef __DELETE
120e5dd7070Spatrick 
121e5dd7070Spatrick #endif /* __CUDA_BUILTIN_VARS_H */
122