xref: /llvm-project/clang/test/Driver/linker-wrapper-image.c (revision 13dcc95dcd4999ff99f2de89d881f1aed5b21709)
1 // REQUIRES: x86-registered-target
2 // REQUIRES: nvptx-registered-target
3 // REQUIRES: amdgpu-registered-target
4 
5 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o
6 
7 // RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
8 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
9 // RUN:   -fembed-offload-object=%t.out
10 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
11 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-ELF
12 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run -r --host-triple=x86_64-unknown-linux-gnu \
13 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP-ELF,OPENMP-REL
14 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \
15 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-COFF
16 
17 //      OPENMP-ELF: @__start_omp_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
18 // OPENMP-ELF-NEXT: @__stop_omp_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
19 // OPENMP-ELF-NEXT: @__dummy.omp_offloading_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries"
20 
21 //      OPENMP-COFF: @__start_omp_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries$OA"
22 // OPENMP-COFF-NEXT: @__stop_omp_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries$OZ"
23 
24 // OPENMP-REL: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"\10\FF\10\AD{{.*}}", section ".llvm.offloading.relocatable", align 8
25 
26 //      OPENMP: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"\10\FF\10\AD{{.*}}", section ".llvm.offloading", align 8
27 // OPENMP-NEXT: @.omp_offloading.device_images = internal unnamed_addr constant [1 x %__tgt_device_image] [%__tgt_device_image { ptr getelementptr ([[[BEGIN:[0-9]+]] x i8], ptr @.omp_offloading.device_image, i64 0, i64 144), ptr getelementptr ([[[END:[0-9]+]] x i8], ptr @.omp_offloading.device_image, i64 0, i64 144), ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries }]
28 // OPENMP-NEXT: @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 1, ptr @.omp_offloading.device_images, ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries }
29 // OPENMP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.omp_offloading.descriptor_reg, ptr null }]
30 
31 //      OPENMP: define internal void @.omp_offloading.descriptor_reg() section ".text.startup" {
32 // OPENMP-NEXT: entry:
33 // OPENMP-NEXT:   call void @__tgt_register_lib(ptr @.omp_offloading.descriptor)
34 // OPENMP-NEXT:   %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg)
35 // OPENMP-NEXT:   ret void
36 // OPENMP-NEXT: }
37 
38 //      OPENMP: define internal void @.omp_offloading.descriptor_unreg() section ".text.startup" {
39 // OPENMP-NEXT: entry:
40 // OPENMP-NEXT:   call void @__tgt_unregister_lib(ptr @.omp_offloading.descriptor)
41 // OPENMP-NEXT:   ret void
42 // OPENMP-NEXT: }
43 
44 // RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70
45 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
46 // RUN:   -fembed-offload-object=%t.out
47 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
48 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF
49 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run -r --host-triple=x86_64-unknown-linux-gnu \
50 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF
51 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \
52 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-COFF
53 
54 //      CUDA-ELF: @__start_cuda_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
55 // CUDA-ELF-NEXT: @__stop_cuda_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
56 // CUDA-ELF-NEXT: @__dummy.cuda_offloading_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "cuda_offloading_entries"
57 
58 //      CUDA-COFF: @__start_cuda_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "cuda_offloading_entries$OA"
59 // CUDA-COFF-NEXT: @__stop_cuda_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "cuda_offloading_entries$OZ"
60 
61 //      CUDA: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".nv_fatbin"
62 // CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8
63 // CUDA-NEXT: @.cuda.binary_handle = internal global ptr null
64 
65 // CUDA: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.cuda.fatbin_reg, ptr null }]
66 
67 //      CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" {
68 // CUDA-NEXT: entry:
69 // CUDA-NEXT:   %0 = call ptr @__cudaRegisterFatBinary(ptr @.fatbin_wrapper)
70 // CUDA-NEXT:   store ptr %0, ptr @.cuda.binary_handle, align 8
71 // CUDA-NEXT:   call void @.cuda.globals_reg(ptr %0)
72 // CUDA-NEXT:   call void @__cudaRegisterFatBinaryEnd(ptr %0)
73 // CUDA-NEXT:   %1 = call i32 @atexit(ptr @.cuda.fatbin_unreg)
74 // CUDA-NEXT:   ret void
75 // CUDA-NEXT: }
76 //
77 //      CUDA: define internal void @.cuda.fatbin_unreg() section ".text.startup" {
78 // CUDA-NEXT: entry:
79 // CUDA-NEXT:   %0 = load ptr, ptr @.cuda.binary_handle, align 8
80 // CUDA-NEXT:   call void @__cudaUnregisterFatBinary(ptr %0)
81 // CUDA-NEXT:   ret void
82 // CUDA-NEXT: }
83 //
84 //      CUDA: define internal void @.cuda.globals_reg(ptr %0) section ".text.startup" {
85 // CUDA-NEXT: entry:
86 // CUDA-NEXT:   %1 = icmp ne ptr @__start_cuda_offloading_entries, @__stop_cuda_offloading_entries
87 // CUDA-NEXT:   br i1 %1, label %while.entry, label %while.end
88 //
89 //      CUDA: while.entry:
90 // CUDA-NEXT:   %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %16, %if.end ]
91 // CUDA-NEXT:   %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 4
92 // CUDA-NEXT:   %addr = load ptr, ptr %2, align 8
93 // CUDA-NEXT:   %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 8
94 // CUDA-NEXT:   %aux_addr = load ptr, ptr %3, align 8
95 // CUDA-NEXT:   %4 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 2
96 // CUDA-NEXT:   %kind = load i16, ptr %4, align 2
97 // CUDA-NEXT:   %5 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 5
98 // CUDA-NEXT:   %name = load ptr, ptr %5, align 8
99 // CUDA-NEXT:   %6 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 6
100 // CUDA-NEXT:   %size = load i64, ptr %6, align 4
101 // CUDA-NEXT:   %7 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 3
102 // CUDA-NEXT:   %flags = load i32, ptr %7, align 4
103 // CUDA-NEXT:   %8 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 7
104 // CUDA-NEXT:   %data = load i64, ptr %8, align 4
105 // CUDA-NEXT:   %9 = trunc i64 %data to i32
106 // CUDA-NEXT:   %type = and i32 %flags, 7
107 // CUDA-NEXT:   %10 = and i32 %flags, 8
108 // CUDA-NEXT:   %extern = lshr i32 %10, 3
109 // CUDA-NEXT:   %11 = and i32 %flags, 16
110 // CUDA-NEXT:   %constant = lshr i32 %11, 4
111 // CUDA-NEXT:   %12 = and i32 %flags, 32
112 // CUDA-NEXT:   %normalized = lshr i32 %12, 5
113 // CUDA-NEXT:   %13 = icmp eq i16 %kind, 2
114 // CUDA-NEXT:   br i1 %13, label %if.kind, label %if.end
115 //
116 //      CUDA: if.kind:
117 // CUDA-NEXT:   %14 = icmp eq i64 %size, 0
118 // CUDA-NEXT:   br i1 %14, label %if.then, label %if.else
119 //
120 //      CUDA: if.then:
121 // CUDA-NEXT:   %15 = call i32 @__cudaRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null)
122 // CUDA-NEXT:   br label %if.end
123 //
124 //      CUDA: if.else:
125 // CUDA-NEXT:   switch i32 %type, label %if.end [
126 // CUDA-NEXT:     i32 0, label %sw.global
127 // CUDA-NEXT:     i32 1, label %sw.managed
128 // CUDA-NEXT:     i32 2, label %sw.surface
129 // CUDA-NEXT:     i32 3, label %sw.texture
130 // CUDA-NEXT:   ]
131 //
132 //      CUDA: sw.global:
133 // CUDA-NEXT:   call void @__cudaRegisterVar(ptr %0, ptr %addr, ptr %name, ptr %name, i32 %extern, i64 %size, i32 %constant, i32 0)
134 // CUDA-NEXT:   br label %if.end
135 //
136 //      CUDA: sw.managed:
137 // CUDA-NEXT:   call void @__cudaRegisterManagedVar(ptr %0, ptr %aux_addr, ptr %addr, ptr %name, i64 %size, i32 %9)
138 // CUDA-NEXT:   br label %if.end
139 //
140 //      CUDA: sw.surface:
141 // CUDA-NEXT:   br label %if.end
142 //
143 //      CUDA: sw.texture:
144 // CUDA-NEXT:   br label %if.end
145 //
146 //      CUDA: if.end:
147 // CUDA-NEXT:   %16 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
148 // CUDA-NEXT:   %17 = icmp eq ptr %16, @__stop_cuda_offloading_entries
149 // CUDA-NEXT:   br i1 %17, label %while.end, label %while.entry
150 //
151 //      CUDA: while.end:
152 // CUDA-NEXT:   ret void
153 // CUDA-NEXT: }
154 
155 // RUN: clang-offload-packager -o %t.out --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908
156 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \
157 // RUN:   -fembed-offload-object=%t.out
158 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \
159 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF
160 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu -r \
161 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF
162 // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \
163 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF
164 
165 //      HIP-ELF: @__start_hip_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
166 // HIP-ELF-NEXT: @__stop_hip_offloading_entries = external hidden constant [0 x %struct.__tgt_offload_entry]
167 // HIP-ELF-NEXT: @__dummy.hip_offloading_entries = internal constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries"
168 
169 //      HIP-COFF: @__start_hip_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries$OA"
170 // HIP-COFF-NEXT: @__stop_hip_offloading_entries = weak_odr hidden constant [0 x %struct.__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries$OZ"
171 
172 //      HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin"
173 // HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
174 // HIP-NEXT: @.hip.binary_handle = internal global ptr null
175 
176 // HIP: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
177 
178 //      HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
179 // HIP-NEXT: entry:
180 // HIP-NEXT:   %0 = call ptr @__hipRegisterFatBinary(ptr @.fatbin_wrapper)
181 // HIP-NEXT:   store ptr %0, ptr @.hip.binary_handle, align 8
182 // HIP-NEXT:   call void @.hip.globals_reg(ptr %0)
183 // HIP-NEXT:   %1 = call i32 @atexit(ptr @.hip.fatbin_unreg)
184 // HIP-NEXT:   ret void
185 // HIP-NEXT: }
186 //
187 //      HIP: define internal void @.hip.fatbin_unreg() section ".text.startup" {
188 // HIP-NEXT: entry:
189 // HIP-NEXT:   %0 = load ptr, ptr @.hip.binary_handle, align 8
190 // HIP-NEXT:   call void @__hipUnregisterFatBinary(ptr %0)
191 // HIP-NEXT:   ret void
192 // HIP-NEXT: }
193 //
194 //      HIP: define internal void @.hip.globals_reg(ptr %0) section ".text.startup" {
195 // HIP-NEXT: entry:
196 // HIP-NEXT:   %1 = icmp ne ptr @__start_hip_offloading_entries, @__stop_hip_offloading_entries
197 // HIP-NEXT:   br i1 %1, label %while.entry, label %while.end
198 //
199 //      HIP: while.entry:
200 // HIP-NEXT:   %entry1 = phi ptr [ @__start_hip_offloading_entries, %entry ], [ %16, %if.end ]
201 // HIP-NEXT:   %2 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 4
202 // HIP-NEXT:   %addr = load ptr, ptr %2, align 8
203 // HIP-NEXT:   %3 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 8
204 // HIP-NEXT:   %aux_addr = load ptr, ptr %3, align 8
205 // HIP-NEXT:   %4 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 2
206 // HIP-NEXT:   %kind = load i16, ptr %4, align 2
207 // HIP-NEXT:   %5 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 5
208 // HIP-NEXT:   %name = load ptr, ptr %5, align 8
209 // HIP-NEXT:   %6 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 6
210 // HIP-NEXT:   %size = load i64, ptr %6, align 4
211 // HIP-NEXT:   %7 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 3
212 // HIP-NEXT:   %flags = load i32, ptr %7, align 4
213 // HIP-NEXT:   %8 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i32 0, i32 7
214 // HIP-NEXT:   %data = load i64, ptr %8, align 4
215 // HIP-NEXT:   %9 = trunc i64 %data to i32
216 // HIP-NEXT:   %type = and i32 %flags, 7
217 // HIP-NEXT:   %10 = and i32 %flags, 8
218 // HIP-NEXT:   %extern = lshr i32 %10, 3
219 // HIP-NEXT:   %11 = and i32 %flags, 16
220 // HIP-NEXT:   %constant = lshr i32 %11, 4
221 // HIP-NEXT:   %12 = and i32 %flags, 32
222 // HIP-NEXT:   %normalized = lshr i32 %12, 5
223 // HIP-NEXT:   %13 = icmp eq i16 %kind, 3
224 // HIP-NEXT:   br i1 %13, label %if.kind, label %if.end
225 //
226 //      HIP: if.kind:
227 // HIP-NEXT:   %14 = icmp eq i64 %size, 0
228 // HIP-NEXT:   br i1 %14, label %if.then, label %if.else
229 //
230 //      HIP: if.then:
231 // HIP-NEXT:   %15 = call i32 @__hipRegisterFunction(ptr %0, ptr %addr, ptr %name, ptr %name, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null)
232 // HIP-NEXT:   br label %if.end
233 //
234 //      HIP: if.else:
235 // HIP-NEXT:   switch i32 %type, label %if.end [
236 // HIP-NEXT:     i32 0, label %sw.global
237 // HIP-NEXT:     i32 1, label %sw.managed
238 // HIP-NEXT:     i32 2, label %sw.surface
239 // HIP-NEXT:     i32 3, label %sw.texture
240 // HIP-NEXT:   ]
241 //
242 //      HIP: sw.global:
243 // HIP-NEXT:   call void @__hipRegisterVar(ptr %0, ptr %addr, ptr %name, ptr %name, i32 %extern, i64 %size, i32 %constant, i32 0)
244 // HIP-NEXT:   br label %if.end
245 //
246 //      HIP: sw.managed:
247 // HIP-NEXT:   call void @__hipRegisterManagedVar(ptr %0, ptr %aux_addr, ptr %addr, ptr %name, i64 %size, i32 %9)
248 // HIP-NEXT:   br label %if.end
249 //
250 //      HIP: sw.surface:
251 // HIP-NEXT:   call void @__hipRegisterSurface(ptr %0, ptr %addr, ptr %name, ptr %name, i32 %9, i32 %extern)
252 // HIP-NEXT:   br label %if.end
253 //
254 //      HIP: sw.texture:
255 // HIP-NEXT:   call void @__hipRegisterTexture(ptr %0, ptr %addr, ptr %name, ptr %name, i32 %9, i32 %normalized, i32 %extern)
256 // HIP-NEXT:   br label %if.end
257 //
258 //      HIP: if.end:
259 // HIP-NEXT:   %16 = getelementptr inbounds %struct.__tgt_offload_entry, ptr %entry1, i64 1
260 // HIP-NEXT:   %17 = icmp eq ptr %16, @__stop_hip_offloading_entries
261 // HIP-NEXT:   br i1 %17, label %while.end, label %while.entry
262 //
263 //      HIP: while.end:
264 // HIP-NEXT:   ret void
265 // HIP-NEXT: }
266