1 // REQUIRES: x86-registered-target 2 // REQUIRES: nvptx-registered-target 3 // REQUIRES: amdgpu-registered-target 4 5 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o 6 7 // RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 8 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ 9 // RUN: -fembed-offload-object=%t.out 10 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \ 11 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-ELF 12 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run -r --host-triple=x86_64-unknown-linux-gnu \ 13 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP-ELF,OPENMP-REL 14 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ 15 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-COFF 16 17 // OPENMP-ELF: @__start_omp_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] 18 // OPENMP-ELF-NEXT: @__stop_omp_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] 19 // OPENMP-ELF-NEXT: @__dummy.omp_offloading_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries" 20 21 // OPENMP-COFF: @__start_omp_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries$OA" 22 // OPENMP-COFF-NEXT: @__stop_omp_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries$OZ" 23 24 // OPENMP-REL: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"\10\FF\10\AD{{.*}}", section ".llvm.offloading.relocatable", align 8 25 26 // OPENMP: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"\10\FF\10\AD{{.*}}", section ".llvm.offloading", align 8 27 // OPENMP-NEXT: @.omp_offloading.device_images = internal unnamed_addr constant [1 x %__tgt_device_image] [%__tgt_device_image { ptr getelementptr ([[[BEGIN:[0-9]+]] x i8], ptr @.omp_offloading.device_image, i64 0, i64 144), ptr getelementptr ([[[END:[0-9]+]] x i8], ptr @.omp_offloading.device_image, i64 0, i64 144), ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries }] 28 // OPENMP-NEXT: @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 1, ptr @.omp_offloading.device_images, ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries } 29 // OPENMP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.omp_offloading.descriptor_reg, ptr null }] 30 31 // OPENMP: define internal void @.omp_offloading.descriptor_reg() section ".text.startup" { 32 // OPENMP-NEXT: entry: 33 // OPENMP-NEXT: call void @__tgt_register_lib(ptr @.omp_offloading.descriptor) 34 // OPENMP-NEXT: %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg) 35 // OPENMP-NEXT: ret void 36 // OPENMP-NEXT: } 37 38 // OPENMP: define internal void @.omp_offloading.descriptor_unreg() section ".text.startup" { 39 // OPENMP-NEXT: entry: 40 // OPENMP-NEXT: call void @__tgt_unregister_lib(ptr @.omp_offloading.descriptor) 41 // OPENMP-NEXT: ret void 42 // OPENMP-NEXT: } 43 44 // RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70 45 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ 46 // RUN: -fembed-offload-object=%t.out 47 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \ 48 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF 49 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run -r --host-triple=x86_64-unknown-linux-gnu \ 50 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF 51 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ 52 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-COFF 53 54 // CUDA-ELF: @__start_cuda_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] 55 // CUDA-ELF-NEXT: @__stop_cuda_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] 56 // CUDA-ELF-NEXT: @__dummy.cuda_offloading_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "cuda_offloading_entries" 57 58 // CUDA-COFF: @__start_cuda_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "cuda_offloading_entries$OA" 59 // CUDA-COFF-NEXT: @__stop_cuda_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "cuda_offloading_entries$OZ" 60 61 // CUDA: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".nv_fatbin" 62 // CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8 63 // CUDA-NEXT: @.cuda.binary_handle = internal global ptr null 64 65 // CUDA: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.cuda.fatbin_reg, ptr null }] 66 67 // CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" { 68 // CUDA-NEXT: entry: 69 // CUDA-NEXT: %0 = call ptr @__cudaRegisterFatBinary(ptr @.fatbin_wrapper) 70 // CUDA-NEXT: store ptr %0, ptr @.cuda.binary_handle, align 8 71 // CUDA-NEXT: call void @.cuda.globals_reg(ptr %0) 72 // CUDA-NEXT: call void @__cudaRegisterFatBinaryEnd(ptr %0) 73 // CUDA-NEXT: %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg) 74 // CUDA-NEXT: ret void 75 // CUDA-NEXT: } 76 // 77 // CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" { 78 // CUDA-NEXT: entry: 79 // CUDA-NEXT: %0 = load ptr, ptr @.cuda.binary_handle, align 8 80 // CUDA-NEXT: call void @__cudaUnregisterFatBinary(ptr %0) 81 // CUDA-NEXT: ret void 82 // CUDA-NEXT: } 83 // 84 // CUDA: define internal void @.cuda.globals_reg(ptr %0) section ".text.startup" { 85 // CUDA-NEXT: entry: 86 // CUDA-NEXT: %1 = icmp ne ptr @__start_cuda_offloading_entries, @__stop_cuda_offloading_entries 87 // CUDA-NEXT: br i1 %1, label %while.entry, label %while.end 88 // 89 // CUDA: while.entry: 90 // CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %16, %if.end ] 91 // CUDA-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 4 92 // CUDA-NEXT: %addr = load ptr, ptr %2, align 8 93 // CUDA-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 8 94 // CUDA-NEXT: %aux_addr = load ptr, ptr %3, align 8 95 // CUDA-NEXT: %4 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 2 96 // CUDA-NEXT: %kind = load i16, ptr %4, align 2 97 // CUDA-NEXT: %5 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 5 98 // CUDA-NEXT: %name = load ptr, ptr %5, align 8 99 // CUDA-NEXT: %6 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 6 100 // CUDA-NEXT: %size = load i64, ptr %6, align 4 101 // CUDA-NEXT: %7 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 3 102 // CUDA-NEXT: %flags = load i32, ptr %7, align 4 103 // CUDA-NEXT: %8 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 7 104 // CUDA-NEXT: %data = load i64, ptr %8, align 4 105 // CUDA-NEXT: %9 = trunc i64 %data to i32 106 // CUDA-NEXT: %type = and i32 %flags, 7 107 // CUDA-NEXT: %10 = and i32 %flags, 8 108 // CUDA-NEXT: %extern = lshr i32 %10, 3 109 // CUDA-NEXT: %11 = and i32 %flags, 16 110 // CUDA-NEXT: %constant = lshr i32 %11, 4 111 // CUDA-NEXT: %12 = and i32 %flags, 32 112 // CUDA-NEXT: %normalized = lshr i32 %12, 5 113 // CUDA-NEXT: %13 = icmp eq i16 %kind, 2 114 // CUDA-NEXT: br i1 %13, label %if.kind, label %if.end 115 // 116 // CUDA: if.kind: 117 // CUDA-NEXT: %14 = icmp eq i64 %size, 0 118 // CUDA-NEXT: br i1 %14, label %if.then, label %if.else 119 // 120 // CUDA: if.then: 121 // CUDA-NEXT: %15 = call i32 @__cudaRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null) 122 // CUDA-NEXT: br label %if.end 123 // 124 // CUDA: if.else: 125 // CUDA-NEXT: switch i32 %type, label %if.end [ 126 // CUDA-NEXT: i32 0, label %sw.global 127 // CUDA-NEXT: i32 1, label %sw.managed 128 // CUDA-NEXT: i32 2, label %sw.surface 129 // CUDA-NEXT: i32 3, label %sw.texture 130 // CUDA-NEXT: ] 131 // 132 // CUDA: sw.global: 133 // CUDA-NEXT: call void @__cudaRegisterVar(ptr %0, ptr %addr, ptr %name, ptr %name, i32 %extern, i64 %size, i32 %constant, i32 0) 134 // CUDA-NEXT: br label %if.end 135 // 136 // CUDA: sw.managed: 137 // CUDA-NEXT: call void @__cudaRegisterManagedVar(ptr %0, ptr %aux_addr, ptr %addr, ptr %name, i64 %size, i32 %9) 138 // CUDA-NEXT: br label %if.end 139 // 140 // CUDA: sw.surface: 141 // CUDA-NEXT: br label %if.end 142 // 143 // CUDA: sw.texture: 144 // CUDA-NEXT: br label %if.end 145 // 146 // CUDA: if.end: 147 // CUDA-NEXT: %16 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1 148 // CUDA-NEXT: %17 = icmp eq ptr %16, @__stop_cuda_offloading_entries 149 // CUDA-NEXT: br i1 %17, label %while.end, label %while.entry 150 // 151 // CUDA: while.end: 152 // CUDA-NEXT: ret void 153 // CUDA-NEXT: } 154 155 // RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908 156 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ 157 // RUN: -fembed-offload-object=%t.out 158 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \ 159 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF 160 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu -r \ 161 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF 162 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ 163 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF 164 165 // HIP-ELF: @__start_hip_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] 166 // HIP-ELF-NEXT: @__stop_hip_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry] 167 // HIP-ELF-NEXT: @__dummy.hip_offloading_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries" 168 169 // HIP-COFF: @__start_hip_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries$OA" 170 // HIP-COFF-NEXT: @__stop_hip_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries$OZ" 171 172 // HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin" 173 // HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8 174 // HIP-NEXT: @.hip.binary_handle = internal global ptr null 175 176 // HIP: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }] 177 178 // HIP: define internal void @.hip.fatbin_reg() section ".text.startup" { 179 // HIP-NEXT: entry: 180 // HIP-NEXT: %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper) 181 // HIP-NEXT: store ptr %0, ptr @.hip.binary_handle, align 8 182 // HIP-NEXT: call void @.hip.globals_reg(ptr %0) 183 // HIP-NEXT: %1 = call i32 @atexit(ptr @.hip.fatbin_unreg) 184 // HIP-NEXT: ret void 185 // HIP-NEXT: } 186 // 187 // HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" { 188 // HIP-NEXT: entry: 189 // HIP-NEXT: %0 = load ptr, ptr @.hip.binary_handle, align 8 190 // HIP-NEXT: call void @__hipUnregisterFatBinary(ptr %0) 191 // HIP-NEXT: ret void 192 // HIP-NEXT: } 193 // 194 // HIP: define internal void @.hip.globals_reg(ptr %0) section ".text.startup" { 195 // HIP-NEXT: entry: 196 // HIP-NEXT: %1 = icmp ne ptr @__start_hip_offloading_entries, @__stop_hip_offloading_entries 197 // HIP-NEXT: br i1 %1, label %while.entry, label %while.end 198 // 199 // HIP: while.entry: 200 // HIP-NEXT: %entry1 = phi ptr [ @__start_hip_offloading_entries, %entry ], [ %16, %if.end ] 201 // HIP-NEXT: %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 4 202 // HIP-NEXT: %addr = load ptr, ptr %2, align 8 203 // HIP-NEXT: %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 8 204 // HIP-NEXT: %aux_addr = load ptr, ptr %3, align 8 205 // HIP-NEXT: %4 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 2 206 // HIP-NEXT: %kind = load i16, ptr %4, align 2 207 // HIP-NEXT: %5 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 5 208 // HIP-NEXT: %name = load ptr, ptr %5, align 8 209 // HIP-NEXT: %6 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 6 210 // HIP-NEXT: %size = load i64, ptr %6, align 4 211 // HIP-NEXT: %7 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 3 212 // HIP-NEXT: %flags = load i32, ptr %7, align 4 213 // HIP-NEXT: %8 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 7 214 // HIP-NEXT: %data = load i64, ptr %8, align 4 215 // HIP-NEXT: %9 = trunc i64 %data to i32 216 // HIP-NEXT: %type = and i32 %flags, 7 217 // HIP-NEXT: %10 = and i32 %flags, 8 218 // HIP-NEXT: %extern = lshr i32 %10, 3 219 // HIP-NEXT: %11 = and i32 %flags, 16 220 // HIP-NEXT: %constant = lshr i32 %11, 4 221 // HIP-NEXT: %12 = and i32 %flags, 32 222 // HIP-NEXT: %normalized = lshr i32 %12, 5 223 // HIP-NEXT: %13 = icmp eq i16 %kind, 3 224 // HIP-NEXT: br i1 %13, label %if.kind, label %if.end 225 // 226 // HIP: if.kind: 227 // HIP-NEXT: %14 = icmp eq i64 %size, 0 228 // HIP-NEXT: br i1 %14, label %if.then, label %if.else 229 // 230 // HIP: if.then: 231 // HIP-NEXT: %15 = call i32 @__hipRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null) 232 // HIP-NEXT: br label %if.end 233 // 234 // HIP: if.else: 235 // HIP-NEXT: switch i32 %type, label %if.end [ 236 // HIP-NEXT: i32 0, label %sw.global 237 // HIP-NEXT: i32 1, label %sw.managed 238 // HIP-NEXT: i32 2, label %sw.surface 239 // HIP-NEXT: i32 3, label %sw.texture 240 // HIP-NEXT: ] 241 // 242 // HIP: sw.global: 243 // HIP-NEXT: call void @__hipRegisterVar(ptr %0, ptr %addr, ptr %name, ptr %name, i32 %extern, i64 %size, i32 %constant, i32 0) 244 // HIP-NEXT: br label %if.end 245 // 246 // HIP: sw.managed: 247 // HIP-NEXT: call void @__hipRegisterManagedVar(ptr %0, ptr %aux_addr, ptr %addr, ptr %name, i64 %size, i32 %9) 248 // HIP-NEXT: br label %if.end 249 // 250 // HIP: sw.surface: 251 // HIP-NEXT: call void @__hipRegisterSurface(ptr %0, ptr %addr, ptr %name, ptr %name, i32 %9, i32 %extern) 252 // HIP-NEXT: br label %if.end 253 // 254 // HIP: sw.texture: 255 // HIP-NEXT: call void @__hipRegisterTexture(ptr %0, ptr %addr, ptr %name, ptr %name, i32 %9, i32 %normalized, i32 %extern) 256 // HIP-NEXT: br label %if.end 257 // 258 // HIP: if.end: 259 // HIP-NEXT: %16 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1 260 // HIP-NEXT: %17 = icmp eq ptr %16, @__stop_hip_offloading_entries 261 // HIP-NEXT: br i1 %17, label %while.end, label %while.entry 262 // 263 // HIP: while.end: 264 // HIP-NEXT: ret void 265 // HIP-NEXT: } 266