1e5dd7070Spatrick /*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------=== 2e5dd7070Spatrick * 3e5dd7070Spatrick * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e5dd7070Spatrick * See https://llvm.org/LICENSE.txt for license information. 5e5dd7070Spatrick * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e5dd7070Spatrick * 7e5dd7070Spatrick *===-----------------------------------------------------------------------=== 8e5dd7070Spatrick */ 9e5dd7070Spatrick 10e5dd7070Spatrick #ifndef __CUDA_BUILTIN_VARS_H 11e5dd7070Spatrick #define __CUDA_BUILTIN_VARS_H 12e5dd7070Spatrick 13e5dd7070Spatrick // Forward declares from vector_types.h. 14e5dd7070Spatrick struct uint3; 15e5dd7070Spatrick struct dim3; 16e5dd7070Spatrick 17e5dd7070Spatrick // The file implements built-in CUDA variables using __declspec(property). 18e5dd7070Spatrick // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx 19e5dd7070Spatrick // All read accesses of built-in variable fields get converted into calls to a 20e5dd7070Spatrick // getter function which in turn calls the appropriate builtin to fetch the 21e5dd7070Spatrick // value. 22e5dd7070Spatrick // 23e5dd7070Spatrick // Example: 24e5dd7070Spatrick // int x = threadIdx.x; 25e5dd7070Spatrick // IR output: 26e5dd7070Spatrick // %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 27e5dd7070Spatrick // PTX output: 28e5dd7070Spatrick // mov.u32 %r2, %tid.x; 29e5dd7070Spatrick 30e5dd7070Spatrick #define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \ 31e5dd7070Spatrick __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \ 32e5dd7070Spatrick static inline __attribute__((always_inline)) \ 33e5dd7070Spatrick __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \ 34e5dd7070Spatrick return INTRINSIC; \ 35e5dd7070Spatrick } 36e5dd7070Spatrick 37e5dd7070Spatrick #if __cplusplus >= 201103L 38e5dd7070Spatrick #define __DELETE =delete 39e5dd7070Spatrick #else 40e5dd7070Spatrick #define __DELETE 41e5dd7070Spatrick #endif 42e5dd7070Spatrick 43e5dd7070Spatrick // Make sure nobody can create instances of the special variable types. nvcc 44e5dd7070Spatrick // also disallows taking address of special variables, so we disable address-of 45e5dd7070Spatrick // operator as well. 46e5dd7070Spatrick #define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \ 47e5dd7070Spatrick __attribute__((device)) TypeName() __DELETE; \ 48e5dd7070Spatrick __attribute__((device)) TypeName(const TypeName &) __DELETE; \ 49e5dd7070Spatrick __attribute__((device)) void operator=(const TypeName &) const __DELETE; \ 50e5dd7070Spatrick __attribute__((device)) TypeName *operator&() const __DELETE 51e5dd7070Spatrick 52e5dd7070Spatrick struct __cuda_builtin_threadIdx_t { 53e5dd7070Spatrick __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x()); 54e5dd7070Spatrick __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y()); 55e5dd7070Spatrick __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z()); 56e5dd7070Spatrick // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a 57e5dd7070Spatrick // uint3). This function is defined after we pull in vector_types.h. 58*a9ac8606Spatrick __attribute__((device)) operator dim3() const; 59e5dd7070Spatrick __attribute__((device)) operator uint3() const; 60*a9ac8606Spatrick 61e5dd7070Spatrick private: 62e5dd7070Spatrick __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t); 63e5dd7070Spatrick }; 64e5dd7070Spatrick 65e5dd7070Spatrick struct __cuda_builtin_blockIdx_t { 66e5dd7070Spatrick __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x()); 67e5dd7070Spatrick __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y()); 68e5dd7070Spatrick __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z()); 69e5dd7070Spatrick // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a 70e5dd7070Spatrick // uint3). This function is defined after we pull in vector_types.h. 71*a9ac8606Spatrick __attribute__((device)) operator dim3() const; 72e5dd7070Spatrick __attribute__((device)) operator uint3() const; 73*a9ac8606Spatrick 74e5dd7070Spatrick private: 75e5dd7070Spatrick __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t); 76e5dd7070Spatrick }; 77e5dd7070Spatrick 78e5dd7070Spatrick struct __cuda_builtin_blockDim_t { 79e5dd7070Spatrick __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x()); 80e5dd7070Spatrick __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y()); 81e5dd7070Spatrick __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z()); 82e5dd7070Spatrick // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a 83e5dd7070Spatrick // dim3). This function is defined after we pull in vector_types.h. 84e5dd7070Spatrick __attribute__((device)) operator dim3() const; 85*a9ac8606Spatrick __attribute__((device)) operator uint3() const; 86*a9ac8606Spatrick 87e5dd7070Spatrick private: 88e5dd7070Spatrick __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t); 89e5dd7070Spatrick }; 90e5dd7070Spatrick 91e5dd7070Spatrick struct __cuda_builtin_gridDim_t { 92e5dd7070Spatrick __CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x()); 93e5dd7070Spatrick __CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y()); 94e5dd7070Spatrick __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z()); 95e5dd7070Spatrick // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a 96e5dd7070Spatrick // dim3). This function is defined after we pull in vector_types.h. 97e5dd7070Spatrick __attribute__((device)) operator dim3() const; 98*a9ac8606Spatrick __attribute__((device)) operator uint3() const; 99*a9ac8606Spatrick 100e5dd7070Spatrick private: 101e5dd7070Spatrick __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t); 102e5dd7070Spatrick }; 103e5dd7070Spatrick 104e5dd7070Spatrick #define __CUDA_BUILTIN_VAR \ 105e5dd7070Spatrick extern const __attribute__((device)) __attribute__((weak)) 106e5dd7070Spatrick __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx; 107e5dd7070Spatrick __CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx; 108e5dd7070Spatrick __CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim; 109e5dd7070Spatrick __CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim; 110e5dd7070Spatrick 111e5dd7070Spatrick // warpSize should translate to read of %WARP_SZ but there's currently no 112e5dd7070Spatrick // builtin to do so. According to PTX v4.2 docs 'to date, all target 113e5dd7070Spatrick // architectures have a WARP_SZ value of 32'. 114e5dd7070Spatrick __attribute__((device)) const int warpSize = 32; 115e5dd7070Spatrick 116e5dd7070Spatrick #undef __CUDA_DEVICE_BUILTIN 117e5dd7070Spatrick #undef __CUDA_BUILTIN_VAR 118e5dd7070Spatrick #undef __CUDA_DISALLOW_BUILTINVAR_ACCESS 119*a9ac8606Spatrick #undef __DELETE 120e5dd7070Spatrick 121e5dd7070Spatrick #endif /* __CUDA_BUILTIN_VARS_H */ 122