1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" "managed.*" 2 // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \ 3 // RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \ 4 // RUN: --check-prefix=CUDA %s 5 // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \ 6 // RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \ 7 // RUN: --check-prefix=HIP %s 8 // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-windows-gnu -fgpu-rdc \ 9 // RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \ 10 // RUN: --check-prefix=CUDA-COFF %s 11 // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-windows-gnu -fgpu-rdc \ 12 // RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \ 13 // RUN: --check-prefix=HIP-COFF %s 14 15 #include "Inputs/cuda.h" 16 17 #define __managed__ __attribute__((managed)) 18 19 //. 20 // CUDA: @managed = global i32 undef, align 4 21 // CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1 22 // CUDA: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "cuda_offloading_entries", align 1 23 // CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1 24 // CUDA: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "cuda_offloading_entries", align 1 25 // CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1 26 // CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "cuda_offloading_entries", align 1 27 // CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1 28 // CUDA: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @managed, ptr @.offloading.entry_name.3, i64 4, i64 0, ptr null }, section "cuda_offloading_entries", align 1 29 // CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1 30 // CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "cuda_offloading_entries", align 1 31 // CUDA: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1 32 // CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "cuda_offloading_entries", align 1 33 //. 34 // HIP: @managed.managed = global i32 0, align 4 35 // HIP: @managed = externally_initialized global ptr null 36 // HIP: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1 37 // HIP: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "hip_offloading_entries", align 1 38 // HIP: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1 39 // HIP: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "hip_offloading_entries", align 1 40 // HIP: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1 41 // HIP: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "hip_offloading_entries", align 1 42 // HIP: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1 43 // HIP: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 1, ptr @managed.managed, ptr @.offloading.entry_name.3, i64 4, i64 4, ptr @managed }, section "hip_offloading_entries", align 1 44 // HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1 45 // HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "hip_offloading_entries", align 1 46 // HIP: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1 47 // HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "hip_offloading_entries", align 1 48 //. 49 // CUDA-COFF: @managed = dso_local global i32 undef, align 4 50 // CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1 51 // CUDA-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "cuda_offloading_entries$OE", align 1 52 // CUDA-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1 53 // CUDA-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "cuda_offloading_entries$OE", align 1 54 // CUDA-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1 55 // CUDA-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "cuda_offloading_entries$OE", align 1 56 // CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1 57 // CUDA-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 0, ptr @managed, ptr @.offloading.entry_name.3, i64 4, i64 0, ptr null }, section "cuda_offloading_entries$OE", align 1 58 // CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1 59 // CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "cuda_offloading_entries$OE", align 1 60 // CUDA-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1 61 // CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 2, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "cuda_offloading_entries$OE", align 1 62 //. 63 // HIP-COFF: @managed.managed = dso_local global i32 0, align 4 64 // HIP-COFF: @managed = dso_local externally_initialized global ptr null 65 // HIP-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1 66 // HIP-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i64 0, ptr null }, section "hip_offloading_entries$OE", align 1 67 // HIP-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1 68 // HIP-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "hip_offloading_entries$OE", align 1 69 // HIP-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1 70 // HIP-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 0, ptr @var, ptr @.offloading.entry_name.2, i64 4, i64 0, ptr null }, section "hip_offloading_entries$OE", align 1 71 // HIP-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [8 x i8] c"managed\00", section ".llvm.rodata.offloading", align 1 72 // HIP-COFF: @.offloading.entry.managed = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 1, ptr @managed.managed, ptr @.offloading.entry_name.3, i64 4, i64 4, ptr @managed }, section "hip_offloading_entries$OE", align 1 73 // HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1 74 // HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 2, ptr @surf, ptr @.offloading.entry_name.4, i64 4, i64 1, ptr null }, section "hip_offloading_entries$OE", align 1 75 // HIP-COFF: @.offloading.entry_name.5 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1 76 // HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 3, i32 3, ptr @tex, ptr @.offloading.entry_name.5, i64 4, i64 1, ptr null }, section "hip_offloading_entries$OE", align 1 77 //. 78 // CUDA-LABEL: @_Z18__device_stub__foov( 79 // CUDA-NEXT: entry: 80 // CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov) 81 // CUDA-NEXT: br label [[SETUP_END:%.*]] 82 // CUDA: setup.end: 83 // CUDA-NEXT: ret void 84 // 85 // HIP-LABEL: @_Z18__device_stub__foov( 86 // HIP-NEXT: entry: 87 // HIP-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3foov) 88 // HIP-NEXT: br label [[SETUP_END:%.*]] 89 // HIP: setup.end: 90 // HIP-NEXT: ret void 91 // 92 // CUDA-COFF-LABEL: @_Z18__device_stub__foov( 93 // CUDA-COFF-NEXT: entry: 94 // CUDA-COFF-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov) 95 // CUDA-COFF-NEXT: br label [[SETUP_END:%.*]] 96 // CUDA-COFF: setup.end: 97 // CUDA-COFF-NEXT: ret void 98 // 99 // HIP-COFF-LABEL: @_Z18__device_stub__foov( 100 // HIP-COFF-NEXT: entry: 101 // HIP-COFF-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3foov) 102 // HIP-COFF-NEXT: br label [[SETUP_END:%.*]] 103 // HIP-COFF: setup.end: 104 // HIP-COFF-NEXT: ret void 105 // 106 __global__ void foo() {} 107 __device__ int var = 1; 108 const __device__ int constant = 1; 109 extern __device__ int external; 110 __device__ __managed__ int managed = 0; 111 112 // CUDA-LABEL: @_Z21__device_stub__kernelv( 113 // CUDA-NEXT: entry: 114 // CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z21__device_stub__kernelv) 115 // CUDA-NEXT: br label [[SETUP_END:%.*]] 116 // CUDA: setup.end: 117 // CUDA-NEXT: ret void 118 // 119 // HIP-LABEL: @_Z21__device_stub__kernelv( 120 // HIP-NEXT: entry: 121 // HIP-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z6kernelv) 122 // HIP-NEXT: br label [[SETUP_END:%.*]] 123 // HIP: setup.end: 124 // HIP-NEXT: ret void 125 // 126 // CUDA-COFF-LABEL: @_Z21__device_stub__kernelv( 127 // CUDA-COFF-NEXT: entry: 128 // CUDA-COFF-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z21__device_stub__kernelv) 129 // CUDA-COFF-NEXT: br label [[SETUP_END:%.*]] 130 // CUDA-COFF: setup.end: 131 // CUDA-COFF-NEXT: ret void 132 // 133 // HIP-COFF-LABEL: @_Z21__device_stub__kernelv( 134 // HIP-COFF-NEXT: entry: 135 // HIP-COFF-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z6kernelv) 136 // HIP-COFF-NEXT: br label [[SETUP_END:%.*]] 137 // HIP-COFF: setup.end: 138 // HIP-COFF-NEXT: ret void 139 // 140 __global__ void kernel() { external = 1; } 141 142 struct surfaceReference { int desc; }; 143 144 template <typename T, int dim = 1> 145 struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {}; 146 147 surface<void> surf; 148 149 struct textureReference { 150 int desc; 151 }; 152 153 template <typename T, int dim = 1, int mode = 0> 154 struct __attribute__((device_builtin_texture_type)) texture : public textureReference {}; 155 156 texture<void> tex; 157