xref: /llvm-project/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll (revision 07ed8187acc31ac3f4779da452864a29d48799ac)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
2; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
3; RUN: opt -aa-pipeline=basic-aa -passes=openmp-opt-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
4
5target triple = "amdgcn-amd-amdhsa"
6
7%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
8%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
9
10@G = internal addrspace(3) global i32 undef, align 4
11@H = internal addrspace(3) global i32 undef, align 4
12@X = internal addrspace(3) global i32 undef, align 4
13@QA1 = internal addrspace(3) global i32 undef, align 4
14@QB1 = internal addrspace(3) global i32 undef, align 4
15@QC1 = internal addrspace(3) global i32 undef, align 4
16@QD1 = internal addrspace(3) global i32 undef, align 4
17@QA2 = internal addrspace(3) global i32 undef, align 4
18@QB2 = internal addrspace(3) global i32 undef, align 4
19@QC2 = internal addrspace(3) global i32 undef, align 4
20@QD2 = internal addrspace(3) global i32 undef, align 4
21@QA3 = internal addrspace(3) global i32 undef, align 4
22@QB3 = internal addrspace(3) global i32 undef, align 4
23@QC3 = internal addrspace(3) global i32 undef, align 4
24@QD3 = internal addrspace(3) global i32 undef, align 4
25@UAA1 = internal addrspace(3) global i32 undef, align 4
26@UAA2 = internal addrspace(3) global i32 undef, align 4
27@UAA3 = internal addrspace(3) global i32 undef, align 4
28@UANA1 = internal addrspace(3) global i32 undef, align 4
29@str = private unnamed_addr addrspace(4) constant [1 x i8] c"\00", align 1
30@kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
31
32; Make sure we do not delete the stores to @G without also replacing the load with `1`.
33;.
34; CHECK: @G = internal addrspace(3) global i32 undef, align 4
35; CHECK: @H = internal addrspace(3) global i32 undef, align 4
36; CHECK: @X = internal addrspace(3) global i32 undef, align 4
37; CHECK: @QA1 = internal addrspace(3) global i32 undef, align 4
38; CHECK: @QB1 = internal addrspace(3) global i32 undef, align 4
39; CHECK: @QC1 = internal addrspace(3) global i32 undef, align 4
40; CHECK: @QD1 = internal addrspace(3) global i32 undef, align 4
41; CHECK: @QA2 = internal addrspace(3) global i32 undef, align 4
42; CHECK: @QB2 = internal addrspace(3) global i32 undef, align 4
43; CHECK: @QC2 = internal addrspace(3) global i32 undef, align 4
44; CHECK: @QD2 = internal addrspace(3) global i32 undef, align 4
45; CHECK: @QA3 = internal addrspace(3) global i32 undef, align 4
46; CHECK: @QB3 = internal addrspace(3) global i32 undef, align 4
47; CHECK: @QC3 = internal addrspace(3) global i32 undef, align 4
48; CHECK: @QD3 = internal addrspace(3) global i32 undef, align 4
49; CHECK: @UAA1 = internal addrspace(3) global i32 undef, align 4
50; CHECK: @UAA2 = internal addrspace(3) global i32 undef, align 4
51; CHECK: @UAA3 = internal addrspace(3) global i32 undef, align 4
52; CHECK: @UANA1 = internal addrspace(3) global i32 undef, align 4
53; CHECK: @str = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
54; CHECK: @kernel_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
55;.
56define amdgpu_kernel void @kernel(ptr %dyn) "kernel" {
57;
58; TUNIT: Function Attrs: norecurse
59; TUNIT-LABEL: define {{[^@]+}}@kernel
60; TUNIT-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
61; TUNIT-NEXT:    [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr [[DYN]])
62; TUNIT-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
63; TUNIT-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
64; TUNIT:       if.then:
65; TUNIT-NEXT:    br label [[IF_MERGE:%.*]]
66; TUNIT:       if.else:
67; TUNIT-NEXT:    call void @barrier() #[[ATTR6:[0-9]+]]
68; TUNIT-NEXT:    call void @use1(i32 1) #[[ATTR7:[0-9]+]]
69; TUNIT-NEXT:    call void @llvm.assume(i1 true)
70; TUNIT-NEXT:    call void @barrier() #[[ATTR6]]
71; TUNIT-NEXT:    br label [[IF_MERGE]]
72; TUNIT:       if.merge:
73; TUNIT-NEXT:    call void @use1(i32 2) #[[ATTR7]]
74; TUNIT-NEXT:    br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
75; TUNIT:       if.then2:
76; TUNIT-NEXT:    call void @barrier() #[[ATTR6]]
77; TUNIT-NEXT:    br label [[IF_END]]
78; TUNIT:       if.end:
79; TUNIT-NEXT:    call void @__kmpc_target_deinit()
80; TUNIT-NEXT:    ret void
81;
82; CGSCC: Function Attrs: norecurse
83; CGSCC-LABEL: define {{[^@]+}}@kernel
84; CGSCC-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
85; CGSCC-NEXT:    [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr [[DYN]])
86; CGSCC-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
87; CGSCC-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
88; CGSCC:       if.then:
89; CGSCC-NEXT:    br label [[IF_MERGE:%.*]]
90; CGSCC:       if.else:
91; CGSCC-NEXT:    call void @barrier() #[[ATTR6:[0-9]+]]
92; CGSCC-NEXT:    call void @use1(i32 1) #[[ATTR6]]
93; CGSCC-NEXT:    call void @llvm.assume(i1 true)
94; CGSCC-NEXT:    call void @barrier() #[[ATTR6]]
95; CGSCC-NEXT:    br label [[IF_MERGE]]
96; CGSCC:       if.merge:
97; CGSCC-NEXT:    call void @use1(i32 2) #[[ATTR6]]
98; CGSCC-NEXT:    br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
99; CGSCC:       if.then2:
100; CGSCC-NEXT:    call void @barrier() #[[ATTR6]]
101; CGSCC-NEXT:    br label [[IF_END]]
102; CGSCC:       if.end:
103; CGSCC-NEXT:    call void @__kmpc_target_deinit()
104; CGSCC-NEXT:    ret void
105;
106  %call = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr %dyn)
107  %cmp = icmp eq i32 %call, -1
108  br i1 %cmp, label %if.then, label %if.else
109if.then:
110  store i32 1, ptr addrspace(3) @G
111  store i32 2, ptr addrspace(3) @H
112  br label %if.merge
113if.else:
114  call void @barrier();
115  %l = load i32, ptr addrspace(3) @G
116  call void @use1(i32 %l)
117  %hv = load i32, ptr addrspace(3) @H
118  %hc = icmp eq i32 %hv, 2
119  call void @llvm.assume(i1 %hc)
120  call void @barrier();
121  br label %if.merge
122if.merge:
123  %hreload = load i32, ptr addrspace(3) @H
124  call void @use1(i32 %hreload)
125  br i1 %cmp, label %if.then2, label %if.end
126if.then2:
127  store i32 2, ptr addrspace(3) @G
128  call void @barrier();
129  br label %if.end
130if.end:
131  call void @__kmpc_target_deinit()
132  ret void
133}
134
135define void @test_assume() {
136; CHECK-LABEL: define {{[^@]+}}@test_assume() {
137; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr addrspacecast (ptr addrspace(4) @str to ptr), null
138; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
139; CHECK-NEXT:    ret void
140;
141  %cmp = icmp ne ptr addrspacecast (ptr addrspace(4) @str to ptr), null
142  call void @llvm.assume(i1 %cmp)
143  ret void
144}
145
146; We can't ignore the sync, hence this might store 2 into %p
147define amdgpu_kernel void @kernel2(ptr %p) "kernel" {
148; CHECK-LABEL: define {{[^@]+}}@kernel2
149; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR1:[0-9]+]] {
150; CHECK-NEXT:    store i32 1, ptr addrspace(3) @X, align 4
151; CHECK-NEXT:    call void @sync()
152; CHECK-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
153; CHECK-NEXT:    store i32 2, ptr addrspace(3) @X, align 4
154; CHECK-NEXT:    store i32 [[V]], ptr [[P]], align 4
155; CHECK-NEXT:    ret void
156;
157  store i32 1, ptr addrspace(3) @X
158  call void @sync()
159  %v = load i32, ptr addrspace(3) @X
160  store i32 2, ptr addrspace(3) @X
161  store i32 %v, ptr %p
162  ret void
163}
164
165; We can't ignore the sync, hence this might store 2 into %p
166define amdgpu_kernel void @kernel3(ptr %p) "kernel" {
167; TUNIT-LABEL: define {{[^@]+}}@kernel3
168; TUNIT-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
169; TUNIT-NEXT:    store i32 1, ptr addrspace(3) @X, align 4
170; TUNIT-NEXT:    call void @sync_def.internalized()
171; TUNIT-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
172; TUNIT-NEXT:    store i32 2, ptr addrspace(3) @X, align 4
173; TUNIT-NEXT:    store i32 [[V]], ptr [[P]], align 4
174; TUNIT-NEXT:    ret void
175;
176; CGSCC-LABEL: define {{[^@]+}}@kernel3
177; CGSCC-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
178; CGSCC-NEXT:    store i32 1, ptr addrspace(3) @X, align 4
179; CGSCC-NEXT:    call void @sync_def()
180; CGSCC-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
181; CGSCC-NEXT:    store i32 2, ptr addrspace(3) @X, align 4
182; CGSCC-NEXT:    store i32 [[V]], ptr [[P]], align 4
183; CGSCC-NEXT:    ret void
184;
185  store i32 1, ptr addrspace(3) @X
186  call void @sync_def()
187  %v = load i32, ptr addrspace(3) @X
188  store i32 2, ptr addrspace(3) @X
189  store i32 %v, ptr %p
190  ret void
191}
192
193define void @sync_def() {
194; CHECK-LABEL: define {{[^@]+}}@sync_def() {
195; CHECK-NEXT:    call void @sync()
196; CHECK-NEXT:    ret void
197;
198  call void @sync()
199  ret void
200}
201
202define amdgpu_kernel void @kernel4a1(i1 %c) "kernel" {
203; TUNIT-LABEL: define {{[^@]+}}@kernel4a1
204; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
205; TUNIT-NEXT:    store i32 0, ptr addrspace(3) @QA1, align 4
206; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
207; TUNIT:       L:
208; TUNIT-NEXT:    call void @sync()
209; TUNIT-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @QA1, align 4
210; TUNIT-NEXT:    call void @use1(i32 [[V]]) #[[ATTR7]]
211; TUNIT-NEXT:    ret void
212; TUNIT:       S:
213; TUNIT-NEXT:    store i32 2, ptr addrspace(3) @QA1, align 4
214; TUNIT-NEXT:    call void @sync()
215; TUNIT-NEXT:    ret void
216;
217; CGSCC-LABEL: define {{[^@]+}}@kernel4a1
218; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
219; CGSCC-NEXT:    store i32 0, ptr addrspace(3) @QA1, align 4
220; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
221; CGSCC:       L:
222; CGSCC-NEXT:    call void @sync()
223; CGSCC-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @QA1, align 4
224; CGSCC-NEXT:    call void @use1(i32 [[V]]) #[[ATTR6]]
225; CGSCC-NEXT:    ret void
226; CGSCC:       S:
227; CGSCC-NEXT:    store i32 2, ptr addrspace(3) @QA1, align 4
228; CGSCC-NEXT:    call void @sync()
229; CGSCC-NEXT:    ret void
230;
231  store i32 0, ptr addrspace(3) @QA1
232  br i1 %c, label %S, label %L
233L:
234  call void @sync();
235  %v = load i32, ptr addrspace(3) @QA1
236  call void @use1(i32 %v)
237  ret void
238S:
239  store i32 2, ptr addrspace(3) @QA1
240  call void @sync();
241  ret void
242}
243
244; We should not replace the load or delete the second store.
245define amdgpu_kernel void @kernel4b1(i1 %c) "kernel" {
246; TUNIT-LABEL: define {{[^@]+}}@kernel4b1
247; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
248; TUNIT-NEXT:    store i32 0, ptr addrspace(3) @QB1, align 4
249; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
250; TUNIT:       L:
251; TUNIT-NEXT:    call void @sync()
252; TUNIT-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @QB1, align 4
253; TUNIT-NEXT:    call void @use1(i32 [[V]]) #[[ATTR7]]
254; TUNIT-NEXT:    ret void
255; TUNIT:       S:
256; TUNIT-NEXT:    store i32 2, ptr addrspace(3) @QB1, align 4
257; TUNIT-NEXT:    ret void
258;
259; CGSCC-LABEL: define {{[^@]+}}@kernel4b1
260; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
261; CGSCC-NEXT:    store i32 0, ptr addrspace(3) @QB1, align 4
262; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
263; CGSCC:       L:
264; CGSCC-NEXT:    call void @sync()
265; CGSCC-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @QB1, align 4
266; CGSCC-NEXT:    call void @use1(i32 [[V]]) #[[ATTR6]]
267; CGSCC-NEXT:    ret void
268; CGSCC:       S:
269; CGSCC-NEXT:    store i32 2, ptr addrspace(3) @QB1, align 4
270; CGSCC-NEXT:    ret void
271;
272  store i32 0, ptr addrspace(3) @QB1
273  br i1 %c, label %S, label %L
274L:
275  call void @sync();
276  %v = load i32, ptr addrspace(3) @QB1
277  call void @use1(i32 %v)
278  ret void
279S:
280  store i32 2, ptr addrspace(3) @QB1
281  ret void
282}
283
284define amdgpu_kernel void @kernel4a2(i1 %c) "kernel" {
285; TUNIT-LABEL: define {{[^@]+}}@kernel4a2
286; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
287; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
288; TUNIT:       L:
289; TUNIT-NEXT:    call void @sync()
290; TUNIT-NEXT:    call void @use1(i32 2) #[[ATTR7]]
291; TUNIT-NEXT:    ret void
292; TUNIT:       S:
293; TUNIT-NEXT:    call void @sync()
294; TUNIT-NEXT:    ret void
295;
296; CGSCC-LABEL: define {{[^@]+}}@kernel4a2
297; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
298; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
299; CGSCC:       L:
300; CGSCC-NEXT:    call void @sync()
301; CGSCC-NEXT:    call void @use1(i32 2) #[[ATTR6]]
302; CGSCC-NEXT:    ret void
303; CGSCC:       S:
304; CGSCC-NEXT:    call void @sync()
305; CGSCC-NEXT:    ret void
306;
307  br i1 %c, label %S, label %L
308L:
309  call void @sync();
310  %v = load i32, ptr addrspace(3) @QA2
311  call void @use1(i32 %v)
312  ret void
313S:
314  store i32 2, ptr addrspace(3) @QA2
315  call void @sync();
316  ret void
317}
318
319; FIXME: We should not replace the load with undef.
320define amdgpu_kernel void @kernel4b2(i1 %c) "kernel" {
321; TUNIT-LABEL: define {{[^@]+}}@kernel4b2
322; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
323; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
324; TUNIT:       L:
325; TUNIT-NEXT:    call void @sync()
326; TUNIT-NEXT:    call void @use1(i32 2) #[[ATTR7]]
327; TUNIT-NEXT:    ret void
328; TUNIT:       S:
329; TUNIT-NEXT:    ret void
330;
331; CGSCC-LABEL: define {{[^@]+}}@kernel4b2
332; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
333; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
334; CGSCC:       L:
335; CGSCC-NEXT:    call void @sync()
336; CGSCC-NEXT:    call void @use1(i32 2) #[[ATTR6]]
337; CGSCC-NEXT:    ret void
338; CGSCC:       S:
339; CGSCC-NEXT:    ret void
340;
341  br i1 %c, label %S, label %L
342L:
343  call void @sync();
344  %v = load i32, ptr addrspace(3) @QB2
345  call void @use1(i32 %v)
346  ret void
347S:
348  store i32 2, ptr addrspace(3) @QB2
349  ret void
350}
351
352define amdgpu_kernel void @kernel4a3(i1 %c) "kernel" {
353; TUNIT-LABEL: define {{[^@]+}}@kernel4a3
354; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
355; TUNIT-NEXT:    store i32 0, ptr addrspace(3) @QA3, align 4
356; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
357; TUNIT:       L:
358; TUNIT-NEXT:    call void @sync()
359; TUNIT-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @QA3, align 4
360; TUNIT-NEXT:    call void @use1(i32 [[V]]) #[[ATTR7]]
361; TUNIT-NEXT:    ret void
362; TUNIT:       S:
363; TUNIT-NEXT:    store i32 2, ptr addrspace(3) @QA3, align 4
364; TUNIT-NEXT:    call void @sync()
365; TUNIT-NEXT:    call void @sync()
366; TUNIT-NEXT:    call void @sync()
367; TUNIT-NEXT:    call void @sync()
368; TUNIT-NEXT:    ret void
369;
370; CGSCC-LABEL: define {{[^@]+}}@kernel4a3
371; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
372; CGSCC-NEXT:    store i32 0, ptr addrspace(3) @QA3, align 4
373; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
374; CGSCC:       L:
375; CGSCC-NEXT:    call void @sync()
376; CGSCC-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @QA3, align 4
377; CGSCC-NEXT:    call void @use1(i32 [[V]]) #[[ATTR6]]
378; CGSCC-NEXT:    ret void
379; CGSCC:       S:
380; CGSCC-NEXT:    store i32 2, ptr addrspace(3) @QA3, align 4
381; CGSCC-NEXT:    call void @sync()
382; CGSCC-NEXT:    call void @sync()
383; CGSCC-NEXT:    call void @sync()
384; CGSCC-NEXT:    call void @sync()
385; CGSCC-NEXT:    ret void
386;
387  store i32 0, ptr addrspace(3) @QA3
388  br i1 %c, label %S, label %L
389L:
390  call void @sync();
391  %v = load i32, ptr addrspace(3) @QA3
392  call void @use1(i32 %v)
393  ret void
394S:
395  store i32 2, ptr addrspace(3) @QA3
396  call void @sync();
397  call void @sync();
398  call void @sync();
399  call void @sync();
400  ret void
401}
402
403; The load of QB3 should not be simplified to 0.
404define amdgpu_kernel void @kernel4b3(i1 %c) "kernel" {
405; TUNIT-LABEL: define {{[^@]+}}@kernel4b3
406; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
407; TUNIT-NEXT:    store i32 0, ptr addrspace(3) @QB3, align 4
408; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
409; TUNIT:       L:
410; TUNIT-NEXT:    call void @sync()
411; TUNIT-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @QB3, align 4
412; TUNIT-NEXT:    call void @use1(i32 [[V]]) #[[ATTR7]]
413; TUNIT-NEXT:    ret void
414; TUNIT:       S:
415; TUNIT-NEXT:    store i32 2, ptr addrspace(3) @QB3, align 4
416; TUNIT-NEXT:    call void @use1(i32 0) #[[ATTR7]]
417; TUNIT-NEXT:    call void @use1(i32 1) #[[ATTR7]]
418; TUNIT-NEXT:    call void @use1(i32 2) #[[ATTR7]]
419; TUNIT-NEXT:    call void @use1(i32 3) #[[ATTR7]]
420; TUNIT-NEXT:    ret void
421;
422; CGSCC-LABEL: define {{[^@]+}}@kernel4b3
423; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
424; CGSCC-NEXT:    store i32 0, ptr addrspace(3) @QB3, align 4
425; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
426; CGSCC:       L:
427; CGSCC-NEXT:    call void @sync()
428; CGSCC-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @QB3, align 4
429; CGSCC-NEXT:    call void @use1(i32 [[V]]) #[[ATTR6]]
430; CGSCC-NEXT:    ret void
431; CGSCC:       S:
432; CGSCC-NEXT:    store i32 2, ptr addrspace(3) @QB3, align 4
433; CGSCC-NEXT:    call void @use1(i32 0) #[[ATTR6]]
434; CGSCC-NEXT:    call void @use1(i32 1) #[[ATTR6]]
435; CGSCC-NEXT:    call void @use1(i32 2) #[[ATTR6]]
436; CGSCC-NEXT:    call void @use1(i32 3) #[[ATTR6]]
437; CGSCC-NEXT:    ret void
438;
439  store i32 0, ptr addrspace(3) @QB3
440  br i1 %c, label %S, label %L
441L:
442  call void @sync();
443  %v = load i32, ptr addrspace(3) @QB3
444  call void @use1(i32 %v)
445  ret void
446S:
447  store i32 2, ptr addrspace(3) @QB3
448  call void @use1(i32 0)
449  call void @use1(i32 1)
450  call void @use1(i32 2)
451  call void @use1(i32 3)
452  ret void
453}
454
455
456define amdgpu_kernel void @kernel4c1(i1 %c) "kernel" {
457; TUNIT: Function Attrs: norecurse
458; TUNIT-LABEL: define {{[^@]+}}@kernel4c1
459; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
460; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
461; TUNIT:       L:
462; TUNIT-NEXT:    call void @use1(i32 0) #[[ATTR7]]
463; TUNIT-NEXT:    ret void
464; TUNIT:       S:
465; TUNIT-NEXT:    ret void
466;
467; CGSCC: Function Attrs: norecurse
468; CGSCC-LABEL: define {{[^@]+}}@kernel4c1
469; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
470; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
471; CGSCC:       L:
472; CGSCC-NEXT:    call void @use1(i32 0) #[[ATTR6]]
473; CGSCC-NEXT:    ret void
474; CGSCC:       S:
475; CGSCC-NEXT:    ret void
476;
477  store i32 0, ptr addrspace(3) @QC1
478  br i1 %c, label %S, label %L
479L:
480  call void @barrier();
481  %v = load i32, ptr addrspace(3) @QC1
482  call void @use1(i32 %v)
483  ret void
484S:
485  store i32 2, ptr addrspace(3) @QC1
486  call void @barrier();
487  ret void
488}
489
490; We should not replace the load or delete the second store.
491define amdgpu_kernel void @kernel4d1(i1 %c) "kernel" {
492; TUNIT: Function Attrs: norecurse
493; TUNIT-LABEL: define {{[^@]+}}@kernel4d1
494; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
495; TUNIT-NEXT:    store i32 0, ptr addrspace(3) @QD1, align 4
496; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
497; TUNIT:       L:
498; TUNIT-NEXT:    call void @barrier() #[[ATTR7]]
499; TUNIT-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @QD1, align 4
500; TUNIT-NEXT:    call void @use1(i32 [[V]]) #[[ATTR7]]
501; TUNIT-NEXT:    ret void
502; TUNIT:       S:
503; TUNIT-NEXT:    store i32 2, ptr addrspace(3) @QD1, align 4
504; TUNIT-NEXT:    ret void
505;
506; CGSCC: Function Attrs: norecurse
507; CGSCC-LABEL: define {{[^@]+}}@kernel4d1
508; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
509; CGSCC-NEXT:    store i32 0, ptr addrspace(3) @QD1, align 4
510; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
511; CGSCC:       L:
512; CGSCC-NEXT:    call void @barrier() #[[ATTR6]]
513; CGSCC-NEXT:    [[V:%.*]] = load i32, ptr addrspace(3) @QD1, align 4
514; CGSCC-NEXT:    call void @use1(i32 [[V]]) #[[ATTR6]]
515; CGSCC-NEXT:    ret void
516; CGSCC:       S:
517; CGSCC-NEXT:    store i32 2, ptr addrspace(3) @QD1, align 4
518; CGSCC-NEXT:    ret void
519;
520  store i32 0, ptr addrspace(3) @QD1
521  br i1 %c, label %S, label %L
522L:
523  call void @barrier();
524  %v = load i32, ptr addrspace(3) @QD1
525  call void @use1(i32 %v)
526  ret void
527S:
528  store i32 2, ptr addrspace(3) @QD1
529  ret void
530}
531
532define amdgpu_kernel void @kernel4c2(i1 %c) "kernel" {
533; TUNIT: Function Attrs: norecurse
534; TUNIT-LABEL: define {{[^@]+}}@kernel4c2
535; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
536; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
537; TUNIT:       L:
538; TUNIT-NEXT:    call void @use1(i32 undef) #[[ATTR7]]
539; TUNIT-NEXT:    ret void
540; TUNIT:       S:
541; TUNIT-NEXT:    ret void
542;
543; CGSCC: Function Attrs: norecurse
544; CGSCC-LABEL: define {{[^@]+}}@kernel4c2
545; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
546; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
547; CGSCC:       L:
548; CGSCC-NEXT:    call void @use1(i32 undef) #[[ATTR6]]
549; CGSCC-NEXT:    ret void
550; CGSCC:       S:
551; CGSCC-NEXT:    ret void
552;
553  br i1 %c, label %S, label %L
554L:
555  call void @barrier();
556  %v = load i32, ptr addrspace(3) @QC2
557  call void @use1(i32 %v)
558  ret void
559S:
560  store i32 2, ptr addrspace(3) @QC2
561  call void @barrier();
562  ret void
563}
564
565; We should not replace the load with undef.
566define amdgpu_kernel void @kernel4d2(i1 %c) "kernel" {
567; TUNIT: Function Attrs: norecurse
568; TUNIT-LABEL: define {{[^@]+}}@kernel4d2
569; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
570; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
571; TUNIT:       L:
572; TUNIT-NEXT:    call void @use1(i32 2) #[[ATTR7]]
573; TUNIT-NEXT:    ret void
574; TUNIT:       S:
575; TUNIT-NEXT:    ret void
576;
577; CGSCC: Function Attrs: norecurse
578; CGSCC-LABEL: define {{[^@]+}}@kernel4d2
579; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
580; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
581; CGSCC:       L:
582; CGSCC-NEXT:    call void @use1(i32 2) #[[ATTR6]]
583; CGSCC-NEXT:    ret void
584; CGSCC:       S:
585; CGSCC-NEXT:    ret void
586;
587  br i1 %c, label %S, label %L
588L:
589  call void @barrier();
590  %v = load i32, ptr addrspace(3) @QD2
591  call void @use1(i32 %v)
592  ret void
593S:
594  store i32 2, ptr addrspace(3) @QD2
595  ret void
596}
597
598define amdgpu_kernel void @kernel4c3(i1 %c) "kernel" {
599; TUNIT: Function Attrs: norecurse
600; TUNIT-LABEL: define {{[^@]+}}@kernel4c3
601; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
602; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
603; TUNIT:       L:
604; TUNIT-NEXT:    call void @use1(i32 undef) #[[ATTR7]]
605; TUNIT-NEXT:    ret void
606; TUNIT:       S:
607; TUNIT-NEXT:    ret void
608;
609; CGSCC: Function Attrs: norecurse
610; CGSCC-LABEL: define {{[^@]+}}@kernel4c3
611; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
612; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
613; CGSCC:       L:
614; CGSCC-NEXT:    call void @use1(i32 undef) #[[ATTR6]]
615; CGSCC-NEXT:    ret void
616; CGSCC:       S:
617; CGSCC-NEXT:    ret void
618;
619  br i1 %c, label %S, label %L
620L:
621  call void @barrier();
622  %v = load i32, ptr addrspace(3) @QC3
623  call void @use1(i32 %v)
624  ret void
625S:
626  store i32 2, ptr addrspace(3) @QC3
627  call void @barrier();
628  ret void
629}
630
631; We should not replace the load with undef.
632define amdgpu_kernel void @kernel4d3(i1 %c) "kernel" {
633; TUNIT: Function Attrs: norecurse
634; TUNIT-LABEL: define {{[^@]+}}@kernel4d3
635; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
636; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
637; TUNIT:       L:
638; TUNIT-NEXT:    call void @use1(i32 2) #[[ATTR7]]
639; TUNIT-NEXT:    ret void
640; TUNIT:       S:
641; TUNIT-NEXT:    ret void
642;
643; CGSCC: Function Attrs: norecurse
644; CGSCC-LABEL: define {{[^@]+}}@kernel4d3
645; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
646; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
647; CGSCC:       L:
648; CGSCC-NEXT:    call void @use1(i32 2) #[[ATTR6]]
649; CGSCC-NEXT:    ret void
650; CGSCC:       S:
651; CGSCC-NEXT:    ret void
652;
653  br i1 %c, label %S, label %L
654L:
655  call void @barrier();
656  %v = load i32, ptr addrspace(3) @QD3
657  call void @use1(i32 %v)
658  ret void
659S:
660  store i32 2, ptr addrspace(3) @QD3
661  ret void
662}
663
664define amdgpu_kernel void @kernel_unknown_and_aligned1(i1 %c) "kernel" {
665; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned1
666; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
667; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
668; TUNIT:       L:
669; TUNIT-NEXT:    call void @use1(i32 2) #[[ATTR7]]
670; TUNIT-NEXT:    ret void
671; TUNIT:       S:
672; TUNIT-NEXT:    call void @sync()
673; TUNIT-NEXT:    call void @barrier() #[[ATTR7]]
674; TUNIT-NEXT:    call void @sync()
675; TUNIT-NEXT:    ret void
676;
677; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned1
678; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
679; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
680; CGSCC:       L:
681; CGSCC-NEXT:    call void @use1(i32 2) #[[ATTR6]]
682; CGSCC-NEXT:    ret void
683; CGSCC:       S:
684; CGSCC-NEXT:    call void @sync()
685; CGSCC-NEXT:    call void @barrier() #[[ATTR6]]
686; CGSCC-NEXT:    call void @sync()
687; CGSCC-NEXT:    ret void
688;
689  br i1 %c, label %S, label %L
690L:
691  call void @barrier();
692  %v = load i32, ptr addrspace(3) @UAA1
693  call void @use1(i32 %v)
694  ret void
695S:
696  call void @sync();
697  store i32 2, ptr addrspace(3) @UAA1
698  call void @barrier();
699  call void @sync();
700  ret void
701}
702
703define amdgpu_kernel void @kernel_unknown_and_aligned2(i1 %c) "kernel" {
704; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned2
705; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
706; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
707; TUNIT:       L:
708; TUNIT-NEXT:    call void @sync()
709; TUNIT-NEXT:    call void @use1(i32 2) #[[ATTR7]]
710; TUNIT-NEXT:    ret void
711; TUNIT:       S:
712; TUNIT-NEXT:    call void @sync()
713; TUNIT-NEXT:    call void @barrier() #[[ATTR7]]
714; TUNIT-NEXT:    call void @sync()
715; TUNIT-NEXT:    ret void
716;
717; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned2
718; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
719; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
720; CGSCC:       L:
721; CGSCC-NEXT:    call void @sync()
722; CGSCC-NEXT:    call void @use1(i32 2) #[[ATTR6]]
723; CGSCC-NEXT:    ret void
724; CGSCC:       S:
725; CGSCC-NEXT:    call void @sync()
726; CGSCC-NEXT:    call void @barrier() #[[ATTR6]]
727; CGSCC-NEXT:    call void @sync()
728; CGSCC-NEXT:    ret void
729;
730  br i1 %c, label %S, label %L
731L:
732  call void @sync();
733  %v = load i32, ptr addrspace(3) @UAA2
734  call void @use1(i32 %v)
735  ret void
736S:
737  call void @sync();
738  store i32 2, ptr addrspace(3) @UAA2
739  call void @barrier();
740  call void @sync();
741  ret void
742}
743
744define amdgpu_kernel void @kernel_unknown_and_aligned3(i1 %c) "kernel" {
745; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned3
746; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
747; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
748; TUNIT:       L:
749; TUNIT-NEXT:    call void @sync()
750; TUNIT-NEXT:    call void @use1(i32 2) #[[ATTR7]]
751; TUNIT-NEXT:    call void @barrier() #[[ATTR7]]
752; TUNIT-NEXT:    ret void
753; TUNIT:       S:
754; TUNIT-NEXT:    call void @sync()
755; TUNIT-NEXT:    call void @sync()
756; TUNIT-NEXT:    ret void
757;
758; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_aligned3
759; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
760; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
761; CGSCC:       L:
762; CGSCC-NEXT:    call void @sync()
763; CGSCC-NEXT:    call void @use1(i32 2) #[[ATTR6]]
764; CGSCC-NEXT:    call void @barrier() #[[ATTR6]]
765; CGSCC-NEXT:    ret void
766; CGSCC:       S:
767; CGSCC-NEXT:    call void @sync()
768; CGSCC-NEXT:    call void @sync()
769; CGSCC-NEXT:    ret void
770;
771  br i1 %c, label %S, label %L
772L:
773  call void @sync();
774  %v = load i32, ptr addrspace(3) @UAA3
775  call void @use1(i32 %v)
776  call void @barrier();
777  ret void
778S:
779  call void @sync();
780  store i32 2, ptr addrspace(3) @UAA3
781  call void @sync();
782  ret void
783}
784
785define amdgpu_kernel void @kernel_unknown_and_not_aligned1(i1 %c) "kernel" {
786; TUNIT-LABEL: define {{[^@]+}}@kernel_unknown_and_not_aligned1
787; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
788; TUNIT-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
789; TUNIT:       L:
790; TUNIT-NEXT:    call void @sync()
791; TUNIT-NEXT:    call void @use1(i32 2) #[[ATTR7]]
792; TUNIT-NEXT:    ret void
793; TUNIT:       S:
794; TUNIT-NEXT:    call void @sync()
795; TUNIT-NEXT:    call void @sync()
796; TUNIT-NEXT:    ret void
797;
798; CGSCC-LABEL: define {{[^@]+}}@kernel_unknown_and_not_aligned1
799; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
800; CGSCC-NEXT:    br i1 [[C]], label [[S:%.*]], label [[L:%.*]]
801; CGSCC:       L:
802; CGSCC-NEXT:    call void @sync()
803; CGSCC-NEXT:    call void @use1(i32 2) #[[ATTR6]]
804; CGSCC-NEXT:    ret void
805; CGSCC:       S:
806; CGSCC-NEXT:    call void @sync()
807; CGSCC-NEXT:    call void @sync()
808; CGSCC-NEXT:    ret void
809;
810  br i1 %c, label %S, label %L
811L:
812  call void @sync();
813  %v = load i32, ptr addrspace(3) @UANA1
814  call void @use1(i32 %v)
815  ret void
816S:
817  call void @sync();
818  store i32 2, ptr addrspace(3) @UANA1
819  call void @sync();
820  ret void
821}
822
823declare void @sync()
824declare void @barrier() norecurse nounwind nocallback "llvm.assume"="ompx_aligned_barrier"
825declare void @use1(i32) nosync norecurse nounwind nocallback
826declare i32 @__kmpc_target_init(ptr, ptr) nocallback
827declare void @__kmpc_target_deinit() nocallback
828declare void @llvm.assume(i1)
829
830!llvm.module.flags = !{!0, !1}
831
832!0 = !{i32 7, !"openmp", i32 50}
833!1 = !{i32 7, !"openmp-device", i32 50}
834
835;.
836; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" }
837; TUNIT: attributes #[[ATTR1]] = { "kernel" }
838; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" }
839; TUNIT: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind }
840; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback }
841; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
842; TUNIT: attributes #[[ATTR6]] = { nounwind "llvm.assume"="ompx_aligned_barrier" }
843; TUNIT: attributes #[[ATTR7]] = { nounwind }
844;.
845; CGSCC: attributes #[[ATTR0]] = { norecurse "kernel" }
846; CGSCC: attributes #[[ATTR1]] = { "kernel" }
847; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" }
848; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind }
849; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback }
850; CGSCC: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
851; CGSCC: attributes #[[ATTR6]] = { nounwind }
852;.
853; TUNIT: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
854; TUNIT: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
855;.
856; CGSCC: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
857; CGSCC: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
858;.
859