xref: /llvm-project/llvm/test/Transforms/OpenMP/barrier_removal.ll (revision 07ed8187acc31ac3f4779da452864a29d48799ac)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2; RUN: opt < %s -S -passes=openmp-opt | FileCheck %s --check-prefixes=CHECK,MODULE
3; RUN: opt < %s -S -passes=openmp-opt-cgscc | FileCheck %s --check-prefixes=CHECK,CGSCC
4; REQUIRES: amdgpu-registered-target
5
6target triple = "amdgcn-amd-amdhsa"
7
8declare void @useI32(i32)
9declare void @unknown()
10declare void @aligned_barrier() "llvm.assume"="ompx_aligned_barrier"
11declare void @llvm.nvvm.barrier0()
12declare i32 @llvm.nvvm.barrier0.and(i32)
13declare i32 @llvm.nvvm.barrier0.or(i32)
14declare i32 @llvm.nvvm.barrier0.popc(i32)
15declare void @llvm.amdgcn.s.barrier()
16declare void @llvm.assume(i1)
17
18;.
19; CHECK: @GC1 = constant i32 42
20; CHECK: @GC2 = addrspace(4) global i32 0
21; CHECK: @GPtr4 = addrspace(4) global ptr addrspace(4) null
22; CHECK: @G = global i32 42
23; CHECK: @GS = addrspace(3) global i32 0
24; CHECK: @GPtr = global ptr null
25; CHECK: @PG1 = thread_local global i32 42
26; CHECK: @PG2 = addrspace(5) global i32 0
27; CHECK: @GPtr5 = global ptr addrspace(5) null
28; CHECK: @G1 = global i32 42
29; CHECK: @G2 = addrspace(1) global i32 0
30;.
31define amdgpu_kernel void @pos_empty_1(i1 %c) "kernel" {
32; MODULE-LABEL: define {{[^@]+}}@pos_empty_1
33; MODULE-SAME: (i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] {
34; MODULE-NEXT:    ret void
35;
36; CGSCC-LABEL: define {{[^@]+}}@pos_empty_1
37; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] {
38; CGSCC-NEXT:    call void @llvm.assume(i1 [[C]])
39; CGSCC-NEXT:    call void @unknown() #[[ATTR0:[0-9]+]]
40; CGSCC-NEXT:    call void @llvm.assume(i1 [[C]])
41; CGSCC-NEXT:    ret void
42;
43  call void @llvm.assume(i1 %c)
44  call void @unknown() "llvm.assume"="ompx_aligned_barrier"
45  call void @llvm.assume(i1 %c)
46  ret void
47}
48define amdgpu_kernel void @pos_empty_2() "kernel" {
49; CHECK-LABEL: define {{[^@]+}}@pos_empty_2
50; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
51; CHECK-NEXT:    ret void
52;
53  call void @aligned_barrier()
54  ret void
55}
56define amdgpu_kernel void @pos_empty_3() "kernel" {
57; CHECK-LABEL: define {{[^@]+}}@pos_empty_3
58; CHECK-SAME: () #[[ATTR4]] {
59; CHECK-NEXT:    ret void
60;
61  call void @llvm.nvvm.barrier0()
62  ret void
63}
64define amdgpu_kernel void @pos_empty_4() "kernel" {
65; CHECK-LABEL: define {{[^@]+}}@pos_empty_4
66; CHECK-SAME: () #[[ATTR4]] {
67; CHECK-NEXT:    ret void
68;
69  call i32 @llvm.nvvm.barrier0.and(i32 0)
70  ret void
71}
72define amdgpu_kernel void @pos_empty_5() "kernel" {
73; CHECK-LABEL: define {{[^@]+}}@pos_empty_5
74; CHECK-SAME: () #[[ATTR4]] {
75; CHECK-NEXT:    ret void
76;
77  call i32 @llvm.nvvm.barrier0.or(i32 0)
78  ret void
79}
80define amdgpu_kernel void @pos_empty_6() "kernel" {
81; CHECK-LABEL: define {{[^@]+}}@pos_empty_6
82; CHECK-SAME: () #[[ATTR4]] {
83; CHECK-NEXT:    ret void
84;
85  call i32 @llvm.nvvm.barrier0.popc(i32 0)
86  ret void
87}
88define amdgpu_kernel void @pos_empty_7a() "kernel" {
89; CHECK-LABEL: define {{[^@]+}}@pos_empty_7a
90; CHECK-SAME: () #[[ATTR4]] {
91; CHECK-NEXT:    call void @unknown()
92; CHECK-NEXT:    ret void
93;
94  call void @llvm.amdgcn.s.barrier()
95  call void @unknown()
96  ret void
97}
98; FIXME: We should remove the barrier.
99define amdgpu_kernel void @pos_empty_7b() "kernel" {
100; CHECK-LABEL: define {{[^@]+}}@pos_empty_7b
101; CHECK-SAME: () #[[ATTR4]] {
102; CHECK-NEXT:    call void @unknown() #[[ATTR5:[0-9]+]]
103; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
104; CHECK-NEXT:    call void @unknown()
105; CHECK-NEXT:    ret void
106;
107  call void @unknown() nosync readnone
108  call void @llvm.amdgcn.s.barrier()
109  call void @unknown()
110  ret void
111}
112define amdgpu_kernel void @pos_empty_8(i1 %c) "kernel" {
113; CHECK-LABEL: define {{[^@]+}}@pos_empty_8
114; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR4]] {
115; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
116; CHECK:       t:
117; CHECK-NEXT:    br label [[F]]
118; CHECK:       f:
119; CHECK-NEXT:    ret void
120;
121  br i1 %c, label %t, label %f
122t:
123  fence release
124  call void @llvm.amdgcn.s.barrier() "llvm.assume"="ompx_aligned_barrier"
125  br label %f
126f:
127  ret void
128}
129define amdgpu_kernel void @neg_empty_8() "kernel" {
130; CHECK-LABEL: define {{[^@]+}}@neg_empty_8
131; CHECK-SAME: () #[[ATTR4]] {
132; CHECK-NEXT:    call void @unknown()
133; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
134; CHECK-NEXT:    ret void
135;
136  call void @unknown()
137  call void @llvm.amdgcn.s.barrier()
138  ret void
139}
140define amdgpu_kernel void @neg_empty_9(i1 %c) "kernel" {
141; CHECK-LABEL: define {{[^@]+}}@neg_empty_9
142; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR4]] {
143; CHECK-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
144; CHECK:       t:
145; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
146; CHECK-NEXT:    fence release
147; CHECK-NEXT:    br label [[M:%.*]]
148; CHECK:       f:
149; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
150; CHECK-NEXT:    fence release
151; CHECK-NEXT:    br label [[M]]
152; CHECK:       m:
153; CHECK-NEXT:    fence release
154; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
155; CHECK-NEXT:    fence release
156; CHECK-NEXT:    ret void
157;
158  br i1 %c, label %t, label %f
159t:
160  fence release
161  call void @llvm.amdgcn.s.barrier()
162  fence release
163  br label %m
164f:
165  fence release
166  call void @llvm.amdgcn.s.barrier()
167  fence release
168  br label %m
169m:
170  fence release
171  call void @llvm.amdgcn.s.barrier()
172  fence release
173  ret void
174}
175; FIXME: We should remove the barrier
176define amdgpu_kernel void @pos_empty_10() "kernel" {
177; CHECK-LABEL: define {{[^@]+}}@pos_empty_10
178; CHECK-SAME: () #[[ATTR4]] {
179; CHECK-NEXT:    br label [[M:%.*]]
180; CHECK:       m:
181; CHECK-NEXT:    call void @llvm.amdgcn.s.barrier()
182; CHECK-NEXT:    ret void
183;
184  br label %m
185m:
186  call void @llvm.amdgcn.s.barrier()
187  ret void
188}
189define amdgpu_kernel void @pos_empty_11() "kernel" {
190; CHECK-LABEL: define {{[^@]+}}@pos_empty_11
191; CHECK-SAME: () #[[ATTR4]] {
192; CHECK-NEXT:    br label [[M:%.*]]
193; CHECK:       m:
194; CHECK-NEXT:    ret void
195;
196  br label %m
197m:
198  call void @aligned_barrier()
199  call void @llvm.amdgcn.s.barrier()
200  ret void
201}
202define void @empty() {
203; CHECK-LABEL: define {{[^@]+}}@empty() {
204; CHECK-NEXT:    ret void
205;
206  ret void
207}
208; FIXME: We should remove the barrier in the end but not the first one.
209define amdgpu_kernel void @neg_empty_12(i1 %c) "kernel" {
210; MODULE-LABEL: define {{[^@]+}}@neg_empty_12
211; MODULE-SAME: (i1 [[C:%.*]]) #[[ATTR4]] {
212; MODULE-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
213; MODULE:       t:
214; MODULE-NEXT:    call void @llvm.amdgcn.s.barrier()
215; MODULE-NEXT:    br label [[M:%.*]]
216; MODULE:       f:
217; MODULE-NEXT:    br label [[M]]
218; MODULE:       m:
219; MODULE-NEXT:    call void @llvm.amdgcn.s.barrier()
220; MODULE-NEXT:    ret void
221;
222; CGSCC-LABEL: define {{[^@]+}}@neg_empty_12
223; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR4]] {
224; CGSCC-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
225; CGSCC:       t:
226; CGSCC-NEXT:    call void @empty()
227; CGSCC-NEXT:    call void @llvm.amdgcn.s.barrier()
228; CGSCC-NEXT:    br label [[M:%.*]]
229; CGSCC:       f:
230; CGSCC-NEXT:    call void @empty()
231; CGSCC-NEXT:    br label [[M]]
232; CGSCC:       m:
233; CGSCC-NEXT:    call void @llvm.amdgcn.s.barrier()
234; CGSCC-NEXT:    ret void
235;
236  br i1 %c, label %t, label %f
237t:
238  call void @empty()
239  call void @llvm.amdgcn.s.barrier()
240  br label %m
241f:
242  call void @empty()
243  br label %m
244m:
245  call void @llvm.amdgcn.s.barrier()
246  ret void
247}
248define void @neg_empty_1() "kernel" {
249; CHECK-LABEL: define {{[^@]+}}@neg_empty_1
250; CHECK-SAME: () #[[ATTR4]] {
251; CHECK-NEXT:    call void @unknown()
252; CHECK-NEXT:    ret void
253;
254  call void @unknown()
255  ret void
256}
257define void @neg_empty_2() "kernel" {
258; CHECK-LABEL: define {{[^@]+}}@neg_empty_2
259; CHECK-SAME: () #[[ATTR4]] {
260; CHECK-NEXT:    ret void
261;
262  call void @aligned_barrier()
263  ret void
264}
265
266@GC1 = constant i32 42
267@GC2 = addrspace(4) global i32 0
268@GPtr4 = addrspace(4) global ptr addrspace(4) null
269define amdgpu_kernel void @pos_constant_loads() "kernel" {
270; CHECK-LABEL: define {{[^@]+}}@pos_constant_loads
271; CHECK-SAME: () #[[ATTR4]] {
272; CHECK-NEXT:    [[ARG:%.*]] = load ptr addrspace(4), ptr addrspace(4) @GPtr4, align 8
273; CHECK-NEXT:    [[B:%.*]] = load i32, ptr addrspace(4) @GC2, align 4
274; CHECK-NEXT:    [[C:%.*]] = load i32, ptr addrspace(4) [[ARG]], align 4
275; CHECK-NEXT:    [[D:%.*]] = add i32 42, [[B]]
276; CHECK-NEXT:    [[E:%.*]] = add i32 [[D]], [[C]]
277; CHECK-NEXT:    call void @useI32(i32 [[E]])
278; CHECK-NEXT:    ret void
279;
280  %GPtr4c = addrspacecast ptr addrspace(4) @GPtr4 to ptr
281  %arg = load ptr addrspace(4), ptr %GPtr4c
282  %a = load i32, ptr @GC1
283  call void @aligned_barrier()
284  %GC2c = addrspacecast ptr addrspace(4) @GC2 to ptr
285  %b = load i32, ptr %GC2c
286  call void @aligned_barrier()
287  %argc = addrspacecast ptr addrspace(4) %arg to ptr
288  %c = load i32, ptr %argc
289  call void @aligned_barrier()
290  %d = add i32 %a, %b
291  %e = add i32 %d, %c
292  call void @useI32(i32 %e)
293  ret void
294}
295@G = global i32 42
296@GS = addrspace(3) global i32 0
297@GPtr = global ptr null
298; TODO: We could remove some of the barriers due to the lack of write effects.
299define amdgpu_kernel void @neg_loads() "kernel" {
300; CHECK-LABEL: define {{[^@]+}}@neg_loads
301; CHECK-SAME: () #[[ATTR4]] {
302; CHECK-NEXT:    [[ARG:%.*]] = load ptr, ptr @GPtr, align 8
303; CHECK-NEXT:    [[A:%.*]] = load i32, ptr @G, align 4
304; CHECK-NEXT:    call void @aligned_barrier()
305; CHECK-NEXT:    [[B:%.*]] = load i32, ptr addrspace(3) @GS, align 4
306; CHECK-NEXT:    call void @aligned_barrier()
307; CHECK-NEXT:    [[C:%.*]] = load i32, ptr [[ARG]], align 4
308; CHECK-NEXT:    call void @aligned_barrier()
309; CHECK-NEXT:    [[D:%.*]] = add i32 [[A]], [[B]]
310; CHECK-NEXT:    [[E:%.*]] = add i32 [[D]], [[C]]
311; CHECK-NEXT:    call void @useI32(i32 [[E]])
312; CHECK-NEXT:    ret void
313;
314  %arg = load ptr, ptr @GPtr
315  %a = load i32, ptr @G
316  call void @aligned_barrier()
317  %GSc = addrspacecast ptr addrspace(3) @GS to ptr
318  %b = load i32, ptr %GSc
319  call void @aligned_barrier()
320  %c = load i32, ptr %arg
321  call void @aligned_barrier()
322  %d = add i32 %a, %b
323  %e = add i32 %d, %c
324  call void @useI32(i32 %e)
325  ret void
326}
327@PG1 = thread_local global i32 42
328@PG2 = addrspace(5) global i32 0
329@GPtr5 = global ptr addrspace(5) null
330define amdgpu_kernel void @pos_priv_mem() "kernel" {
331; CHECK-LABEL: define {{[^@]+}}@pos_priv_mem
332; CHECK-SAME: () #[[ATTR4]] {
333; CHECK-NEXT:    [[ARG:%.*]] = load ptr addrspace(5), ptr @GPtr5, align 4
334; CHECK-NEXT:    [[LOC:%.*]] = alloca i32, align 4, addrspace(5)
335; CHECK-NEXT:    [[A:%.*]] = load i32, ptr @PG1, align 4
336; CHECK-NEXT:    store i32 [[A]], ptr addrspace(5) [[LOC]], align 4
337; CHECK-NEXT:    [[B:%.*]] = load i32, ptr addrspace(5) @PG2, align 4
338; CHECK-NEXT:    store i32 [[B]], ptr addrspace(5) [[ARG]], align 4
339; CHECK-NEXT:    [[V:%.*]] = load i32, ptr addrspace(5) [[LOC]], align 4
340; CHECK-NEXT:    store i32 [[V]], ptr @PG1, align 4
341; CHECK-NEXT:    ret void
342;
343  %arg = load ptr addrspace(5), ptr @GPtr5
344  %loc = alloca i32, addrspace(5)
345  %a = load i32, ptr @PG1
346  call void @aligned_barrier()
347  store i32 %a, ptr addrspace(5) %loc
348  %PG2c = addrspacecast ptr addrspace(5) @PG2 to ptr
349  %b = load i32, ptr %PG2c
350  call void @aligned_barrier()
351  %argc = addrspacecast ptr addrspace(5) %arg to ptr
352  store i32 %b, ptr %argc
353  call void @aligned_barrier()
354  %v = load i32, ptr addrspace(5) %loc
355  store i32 %v, ptr @PG1
356  call void @aligned_barrier()
357  ret void
358}
359@G1 = global i32 42
360@G2 = addrspace(1) global i32 0
361define amdgpu_kernel void @neg_mem() "kernel" {
362; CHECK-LABEL: define {{[^@]+}}@neg_mem
363; CHECK-SAME: () #[[ATTR4]] {
364; CHECK-NEXT:    [[ARG:%.*]] = load ptr, ptr @GPtr, align 8
365; CHECK-NEXT:    [[A:%.*]] = load i32, ptr @G1, align 4
366; CHECK-NEXT:    fence seq_cst
367; CHECK-NEXT:    call void @aligned_barrier()
368; CHECK-NEXT:    store i32 [[A]], ptr [[ARG]], align 4
369; CHECK-NEXT:    fence release
370; CHECK-NEXT:    call void @aligned_barrier()
371; CHECK-NEXT:    [[B:%.*]] = load i32, ptr addrspace(1) @G2, align 4
372; CHECK-NEXT:    store i32 [[B]], ptr @G1, align 4
373; CHECK-NEXT:    fence acquire
374; CHECK-NEXT:    ret void
375;
376  %arg = load ptr, ptr @GPtr
377  %a = load i32, ptr @G1
378  fence seq_cst
379  call void @aligned_barrier()
380  store i32 %a, ptr %arg
381  fence release
382  call void @aligned_barrier()
383  %G2c = addrspacecast ptr addrspace(1) @G2 to ptr
384  %b = load i32, ptr %G2c
385  store i32 %b, ptr @G1
386  fence acquire
387  call void @aligned_barrier()
388  ret void
389}
390
391define amdgpu_kernel void @pos_multiple() "kernel" {
392; CHECK-LABEL: define {{[^@]+}}@pos_multiple
393; CHECK-SAME: () #[[ATTR4]] {
394; CHECK-NEXT:    ret void
395;
396  call void @llvm.nvvm.barrier0()
397  call void @aligned_barrier()
398  call void @aligned_barrier()
399  call void @llvm.amdgcn.s.barrier()
400  call void @aligned_barrier()
401  call void @llvm.nvvm.barrier0()
402  call void @aligned_barrier()
403  call void @aligned_barrier()
404  ret void
405}
406
407define amdgpu_kernel void @multiple_blocks_kernel_1(i1 %c0, i1 %c1) "kernel" {
408; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_kernel_1
409; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] {
410; CHECK-NEXT:    br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
411; CHECK:       t0:
412; CHECK-NEXT:    br label [[T0B:%.*]]
413; CHECK:       t0b:
414; CHECK-NEXT:    br label [[M:%.*]]
415; CHECK:       f0:
416; CHECK-NEXT:    br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
417; CHECK:       t1:
418; CHECK-NEXT:    br label [[M]]
419; CHECK:       f1:
420; CHECK-NEXT:    br label [[M]]
421; CHECK:       m:
422; CHECK-NEXT:    ret void
423;
424  fence acquire
425  call void @llvm.nvvm.barrier0()
426  fence release
427  call void @aligned_barrier()
428  fence seq_cst
429  br i1 %c0, label %t0, label %f0
430t0:
431  fence seq_cst
432  call void @aligned_barrier()
433  fence seq_cst
434  br label %t0b
435t0b:
436  fence seq_cst
437  call void @aligned_barrier()
438  fence seq_cst
439  br label %m
440f0:
441  fence release
442  call void @aligned_barrier()
443  fence acquire
444  call void @llvm.nvvm.barrier0()
445  fence acquire
446  br i1 %c1, label %t1, label %f1
447t1:
448  fence acquire
449  call void @aligned_barrier()
450  fence seq_cst
451  br label %m
452f1:
453  fence seq_cst
454  call void @aligned_barrier()
455  fence acquire
456  br label %m
457m:
458  fence seq_cst
459  call void @aligned_barrier()
460  fence seq_cst
461  ret void
462}
463
464define amdgpu_kernel void @multiple_blocks_kernel_2(i1 %c0, i1 %c1, ptr %p) "kernel" {
465; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_kernel_2
466; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
467; CHECK-NEXT:    store i32 4, ptr [[P]], align 4
468; CHECK-NEXT:    call void @aligned_barrier()
469; CHECK-NEXT:    br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
470; CHECK:       t0:
471; CHECK-NEXT:    br label [[T0B:%.*]]
472; CHECK:       t0b:
473; CHECK-NEXT:    br label [[M:%.*]]
474; CHECK:       f0:
475; CHECK-NEXT:    store i32 4, ptr [[P]], align 4
476; CHECK-NEXT:    call void @llvm.nvvm.barrier0()
477; CHECK-NEXT:    br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
478; CHECK:       t1:
479; CHECK-NEXT:    br label [[M]]
480; CHECK:       f1:
481; CHECK-NEXT:    br label [[M]]
482; CHECK:       m:
483; CHECK-NEXT:    store i32 4, ptr [[P]], align 4
484; CHECK-NEXT:    ret void
485;
486  call void @llvm.nvvm.barrier0()
487  store i32 4, ptr %p
488  call void @aligned_barrier()
489  br i1 %c0, label %t0, label %f0
490t0:
491  call void @aligned_barrier()
492  br label %t0b
493t0b:
494  call void @aligned_barrier()
495  br label %m
496f0:
497  call void @aligned_barrier()
498  store i32 4, ptr %p
499  call void @llvm.nvvm.barrier0()
500  br i1 %c1, label %t1, label %f1
501t1:
502  call void @aligned_barrier()
503  br label %m
504f1:
505  call void @aligned_barrier()
506  br label %m
507m:
508  store i32 4, ptr %p
509  call void @aligned_barrier()
510  ret void
511}
512
513define void @multiple_blocks_non_kernel_1(i1 %c0, i1 %c1) "kernel" {
514; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_1
515; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] {
516; CHECK-NEXT:    br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
517; CHECK:       t0:
518; CHECK-NEXT:    br label [[T0B:%.*]]
519; CHECK:       t0b:
520; CHECK-NEXT:    br label [[M:%.*]]
521; CHECK:       f0:
522; CHECK-NEXT:    br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
523; CHECK:       t1:
524; CHECK-NEXT:    br label [[M]]
525; CHECK:       f1:
526; CHECK-NEXT:    br label [[M]]
527; CHECK:       m:
528; CHECK-NEXT:    ret void
529;
530  call void @llvm.nvvm.barrier0()
531  call void @aligned_barrier()
532  br i1 %c0, label %t0, label %f0
533t0:
534  call void @aligned_barrier()
535  br label %t0b
536t0b:
537  call void @aligned_barrier()
538  br label %m
539f0:
540  call void @aligned_barrier()
541  call void @llvm.nvvm.barrier0()
542  br i1 %c1, label %t1, label %f1
543t1:
544  call void @aligned_barrier()
545  br label %m
546f1:
547  call void @aligned_barrier()
548  br label %m
549m:
550  call void @aligned_barrier()
551  ret void
552}
553
554define void @multiple_blocks_non_kernel_2(i1 %c0, i1 %c1) "kernel" {
555; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_2
556; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] {
557; CHECK-NEXT:    br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
558; CHECK:       t0:
559; CHECK-NEXT:    br label [[T0B:%.*]]
560; CHECK:       t0b:
561; CHECK-NEXT:    br label [[M:%.*]]
562; CHECK:       f0:
563; CHECK-NEXT:    br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
564; CHECK:       t1:
565; CHECK-NEXT:    br label [[M]]
566; CHECK:       f1:
567; CHECK-NEXT:    br label [[M]]
568; CHECK:       m:
569; CHECK-NEXT:    ret void
570;
571  br i1 %c0, label %t0, label %f0
572t0:
573  call void @aligned_barrier()
574  br label %t0b
575t0b:
576  call void @aligned_barrier()
577  br label %m
578f0:
579  call void @aligned_barrier()
580  call void @llvm.nvvm.barrier0()
581  br i1 %c1, label %t1, label %f1
582t1:
583  call void @aligned_barrier()
584  br label %m
585f1:
586  call void @aligned_barrier()
587  br label %m
588m:
589  call void @aligned_barrier()
590  ret void
591}
592
593define void @multiple_blocks_non_kernel_3(i1 %c0, i1 %c1) "kernel" {
594; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_3
595; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR4]] {
596; CHECK-NEXT:    br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
597; CHECK:       t0:
598; CHECK-NEXT:    br label [[T0B:%.*]]
599; CHECK:       t0b:
600; CHECK-NEXT:    br label [[M:%.*]]
601; CHECK:       f0:
602; CHECK-NEXT:    br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
603; CHECK:       t1:
604; CHECK-NEXT:    br label [[M]]
605; CHECK:       f1:
606; CHECK-NEXT:    br label [[M]]
607; CHECK:       m:
608; CHECK-NEXT:    ret void
609;
610  br i1 %c0, label %t0, label %f0
611t0:
612  br label %t0b
613t0b:
614  br label %m
615f0:
616  call void @aligned_barrier()
617  call void @llvm.nvvm.barrier0()
618  br i1 %c1, label %t1, label %f1
619t1:
620  call void @aligned_barrier()
621  br label %m
622f1:
623  call void @aligned_barrier()
624  br label %m
625m:
626  call void @aligned_barrier()
627  ret void
628}
629
630define void @multiple_blocks_non_kernel_effects_1(i1 %c0, i1 %c1, ptr %p) "kernel" {
631; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_effects_1
632; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
633; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
634; CHECK-NEXT:    call void @aligned_barrier()
635; CHECK-NEXT:    br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
636; CHECK:       t0:
637; CHECK-NEXT:    store i32 1, ptr [[P]], align 4
638; CHECK-NEXT:    br label [[T0B:%.*]]
639; CHECK:       t0b:
640; CHECK-NEXT:    call void @aligned_barrier()
641; CHECK-NEXT:    br label [[M:%.*]]
642; CHECK:       f0:
643; CHECK-NEXT:    store i32 2, ptr [[P]], align 4
644; CHECK-NEXT:    br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
645; CHECK:       t1:
646; CHECK-NEXT:    call void @aligned_barrier()
647; CHECK-NEXT:    br label [[M]]
648; CHECK:       f1:
649; CHECK-NEXT:    call void @aligned_barrier()
650; CHECK-NEXT:    br label [[M]]
651; CHECK:       m:
652; CHECK-NEXT:    store i32 3, ptr [[P]], align 4
653; CHECK-NEXT:    ret void
654;
655  call void @aligned_barrier()
656  store i32 0, ptr %p
657  call void @aligned_barrier()
658  br i1 %c0, label %t0, label %f0
659t0:
660  call void @aligned_barrier()
661  store i32 1, ptr %p
662  br label %t0b
663t0b:
664  call void @aligned_barrier()
665  br label %m
666f0:
667  call void @aligned_barrier()
668  call void @llvm.nvvm.barrier0()
669  store i32 2, ptr %p
670  br i1 %c1, label %t1, label %f1
671t1:
672  call void @aligned_barrier()
673  br label %m
674f1:
675  call void @aligned_barrier()
676  br label %m
677m:
678  call void @aligned_barrier()
679  store i32 3, ptr %p
680  call void @aligned_barrier()
681  ret void
682}
683
684define internal void @write_then_barrier0(ptr %p) {
685; CHECK-LABEL: define {{[^@]+}}@write_then_barrier0
686; CHECK-SAME: (ptr [[P:%.*]]) {
687; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
688; CHECK-NEXT:    call void @aligned_barrier()
689; CHECK-NEXT:    ret void
690;
691  store i32 0, ptr %p
692  call void @aligned_barrier()
693  ret void
694}
695define internal void @barrier_then_write0(ptr %p) {
696; MODULE-LABEL: define {{[^@]+}}@barrier_then_write0
697; MODULE-SAME: (ptr [[P:%.*]]) {
698; MODULE-NEXT:    store i32 0, ptr [[P]], align 4
699; MODULE-NEXT:    ret void
700;
701; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write0
702; CGSCC-SAME: (ptr [[P:%.*]]) {
703; CGSCC-NEXT:    call void @aligned_barrier()
704; CGSCC-NEXT:    store i32 0, ptr [[P]], align 4
705; CGSCC-NEXT:    ret void
706;
707  call void @aligned_barrier()
708  store i32 0, ptr %p
709  ret void
710}
711define internal void @barrier_then_write_then_barrier0(ptr %p) {
712; MODULE-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier0
713; MODULE-SAME: (ptr [[P:%.*]]) {
714; MODULE-NEXT:    store i32 0, ptr [[P]], align 4
715; MODULE-NEXT:    call void @aligned_barrier()
716; MODULE-NEXT:    ret void
717;
718; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier0
719; CGSCC-SAME: (ptr [[P:%.*]]) {
720; CGSCC-NEXT:    call void @aligned_barrier()
721; CGSCC-NEXT:    store i32 0, ptr [[P]], align 4
722; CGSCC-NEXT:    call void @aligned_barrier()
723; CGSCC-NEXT:    ret void
724;
725  call void @aligned_barrier()
726  store i32 0, ptr %p
727  call void @aligned_barrier()
728  ret void
729}
730define amdgpu_kernel void @multiple_blocks_functions_kernel_effects_0(i1 %c0, i1 %c1, ptr %p) "kernel" {
731; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_kernel_effects_0
732; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
733; MODULE-NEXT:    call void @barrier_then_write_then_barrier0(ptr [[P]])
734; MODULE-NEXT:    br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
735; MODULE:       t03:
736; MODULE-NEXT:    call void @barrier_then_write0(ptr [[P]])
737; MODULE-NEXT:    br label [[T0B3:%.*]]
738; MODULE:       t0b3:
739; MODULE-NEXT:    br label [[M3:%.*]]
740; MODULE:       f03:
741; MODULE-NEXT:    call void @barrier_then_write0(ptr [[P]])
742; MODULE-NEXT:    br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
743; MODULE:       t13:
744; MODULE-NEXT:    br label [[M3]]
745; MODULE:       f13:
746; MODULE-NEXT:    br label [[M3]]
747; MODULE:       m3:
748; MODULE-NEXT:    call void @write_then_barrier0(ptr [[P]])
749; MODULE-NEXT:    ret void
750;
751; CGSCC-LABEL: define {{[^@]+}}@multiple_blocks_functions_kernel_effects_0
752; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
753; CGSCC-NEXT:    call void @barrier_then_write_then_barrier0(ptr [[P]])
754; CGSCC-NEXT:    call void @aligned_barrier()
755; CGSCC-NEXT:    br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
756; CGSCC:       t03:
757; CGSCC-NEXT:    call void @barrier_then_write0(ptr [[P]])
758; CGSCC-NEXT:    br label [[T0B3:%.*]]
759; CGSCC:       t0b3:
760; CGSCC-NEXT:    call void @aligned_barrier()
761; CGSCC-NEXT:    br label [[M3:%.*]]
762; CGSCC:       f03:
763; CGSCC-NEXT:    call void @barrier_then_write0(ptr [[P]])
764; CGSCC-NEXT:    br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
765; CGSCC:       t13:
766; CGSCC-NEXT:    call void @aligned_barrier()
767; CGSCC-NEXT:    br label [[M3]]
768; CGSCC:       f13:
769; CGSCC-NEXT:    call void @aligned_barrier()
770; CGSCC-NEXT:    br label [[M3]]
771; CGSCC:       m3:
772; CGSCC-NEXT:    call void @write_then_barrier0(ptr [[P]])
773; CGSCC-NEXT:    ret void
774;
775  call void @barrier_then_write_then_barrier0(ptr %p)
776  call void @aligned_barrier()
777  br i1 %c0, label %t03, label %f03
778t03:
779  call void @barrier_then_write0(ptr %p)
780  br label %t0b3
781t0b3:
782  call void @aligned_barrier()
783  br label %m3
784f03:
785  call void @aligned_barrier()
786  call void @barrier_then_write0(ptr %p)
787  br i1 %c1, label %t13, label %f13
788t13:
789  call void @aligned_barrier()
790  br label %m3
791f13:
792  call void @aligned_barrier()
793  br label %m3
794m3:
795  call void @aligned_barrier()
796  call void @write_then_barrier0(ptr %p)
797  ret void
798}
799define internal void @write_then_barrier1(ptr %p) {
800; CHECK-LABEL: define {{[^@]+}}@write_then_barrier1
801; CHECK-SAME: (ptr [[P:%.*]]) {
802; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
803; CHECK-NEXT:    call void @aligned_barrier()
804; CHECK-NEXT:    ret void
805;
806  store i32 0, ptr %p
807  call void @aligned_barrier()
808  ret void
809}
810define internal void @barrier_then_write1(ptr %p) {
811; MODULE-LABEL: define {{[^@]+}}@barrier_then_write1
812; MODULE-SAME: (ptr [[P:%.*]]) {
813; MODULE-NEXT:    store i32 0, ptr [[P]], align 4
814; MODULE-NEXT:    ret void
815;
816; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write1
817; CGSCC-SAME: (ptr [[P:%.*]]) {
818; CGSCC-NEXT:    call void @aligned_barrier()
819; CGSCC-NEXT:    store i32 0, ptr [[P]], align 4
820; CGSCC-NEXT:    ret void
821;
822  call void @aligned_barrier()
823  store i32 0, ptr %p
824  ret void
825}
826define internal void @barrier_then_write_then_barrier1(ptr %p) {
827; CHECK-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier1
828; CHECK-SAME: (ptr [[P:%.*]]) {
829; CHECK-NEXT:    call void @aligned_barrier()
830; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
831; CHECK-NEXT:    call void @aligned_barrier()
832; CHECK-NEXT:    ret void
833;
834  call void @aligned_barrier()
835  store i32 0, ptr %p
836  call void @aligned_barrier()
837  ret void
838}
839define void @multiple_blocks_functions_non_kernel_effects_1(i1 %c0, i1 %c1, ptr %p) {
840; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_1
841; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) {
842; MODULE-NEXT:    call void @barrier_then_write_then_barrier1(ptr [[P]])
843; MODULE-NEXT:    br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
844; MODULE:       t03:
845; MODULE-NEXT:    call void @barrier_then_write1(ptr [[P]])
846; MODULE-NEXT:    br label [[T0B3:%.*]]
847; MODULE:       t0b3:
848; MODULE-NEXT:    call void @aligned_barrier()
849; MODULE-NEXT:    br label [[M3:%.*]]
850; MODULE:       f03:
851; MODULE-NEXT:    call void @barrier_then_write1(ptr [[P]])
852; MODULE-NEXT:    br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
853; MODULE:       t13:
854; MODULE-NEXT:    call void @aligned_barrier()
855; MODULE-NEXT:    br label [[M3]]
856; MODULE:       f13:
857; MODULE-NEXT:    call void @aligned_barrier()
858; MODULE-NEXT:    br label [[M3]]
859; MODULE:       m3:
860; MODULE-NEXT:    call void @write_then_barrier1(ptr [[P]])
861; MODULE-NEXT:    ret void
862;
863; CGSCC-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_1
864; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) {
865; CGSCC-NEXT:    call void @barrier_then_write_then_barrier1(ptr [[P]])
866; CGSCC-NEXT:    call void @aligned_barrier()
867; CGSCC-NEXT:    br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
868; CGSCC:       t03:
869; CGSCC-NEXT:    call void @barrier_then_write1(ptr [[P]])
870; CGSCC-NEXT:    br label [[T0B3:%.*]]
871; CGSCC:       t0b3:
872; CGSCC-NEXT:    call void @aligned_barrier()
873; CGSCC-NEXT:    br label [[M3:%.*]]
874; CGSCC:       f03:
875; CGSCC-NEXT:    call void @barrier_then_write1(ptr [[P]])
876; CGSCC-NEXT:    br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
877; CGSCC:       t13:
878; CGSCC-NEXT:    call void @aligned_barrier()
879; CGSCC-NEXT:    br label [[M3]]
880; CGSCC:       f13:
881; CGSCC-NEXT:    call void @aligned_barrier()
882; CGSCC-NEXT:    br label [[M3]]
883; CGSCC:       m3:
884; CGSCC-NEXT:    call void @write_then_barrier1(ptr [[P]])
885; CGSCC-NEXT:    ret void
886;
887  call void @barrier_then_write_then_barrier1(ptr %p)
888  call void @aligned_barrier()
889  br i1 %c0, label %t03, label %f03
890t03:
891  call void @barrier_then_write1(ptr %p)
892  br label %t0b3
893t0b3:
894  call void @aligned_barrier()
895  br label %m3
896f03:
897  call void @aligned_barrier()
898  call void @barrier_then_write1(ptr %p)
899  br i1 %c1, label %t13, label %f13
900t13:
901  call void @aligned_barrier()
902  br label %m3
903f13:
904  call void @aligned_barrier()
905  br label %m3
906m3:
907  call void @aligned_barrier()
908  call void @write_then_barrier1(ptr %p)
909  ret void
910}
911
912define internal void @write_then_barrier2(ptr %p) {
913; CHECK-LABEL: define {{[^@]+}}@write_then_barrier2
914; CHECK-SAME: (ptr [[P:%.*]]) {
915; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
916; CHECK-NEXT:    call void @aligned_barrier()
917; CHECK-NEXT:    ret void
918;
919  store i32 0, ptr %p
920  call void @aligned_barrier()
921  ret void
922}
923define internal void @barrier_then_write2(ptr %p) {
924; CHECK-LABEL: define {{[^@]+}}@barrier_then_write2
925; CHECK-SAME: (ptr [[P:%.*]]) {
926; CHECK-NEXT:    call void @aligned_barrier()
927; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
928; CHECK-NEXT:    ret void
929;
930  call void @aligned_barrier()
931  store i32 0, ptr %p
932  ret void
933}
934define internal void @barrier_then_write_then_barrier2(ptr %p) {
935; MODULE-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier2
936; MODULE-SAME: (ptr [[P:%.*]]) {
937; MODULE-NEXT:    store i32 0, ptr [[P]], align 4
938; MODULE-NEXT:    call void @aligned_barrier()
939; MODULE-NEXT:    ret void
940;
941; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier2
942; CGSCC-SAME: (ptr [[P:%.*]]) {
943; CGSCC-NEXT:    call void @aligned_barrier()
944; CGSCC-NEXT:    store i32 0, ptr [[P]], align 4
945; CGSCC-NEXT:    call void @aligned_barrier()
946; CGSCC-NEXT:    ret void
947;
948  call void @aligned_barrier()
949  store i32 0, ptr %p
950  call void @aligned_barrier()
951  ret void
952}
953define void @multiple_blocks_functions_non_kernel_effects_2(i1 %c0, i1 %c1, ptr %p) "kernel" {
954; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_2
955; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
956; MODULE-NEXT:    call void @barrier_then_write_then_barrier2(ptr [[P]])
957; MODULE-NEXT:    store i32 0, ptr [[P]], align 4
958; MODULE-NEXT:    br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
959; MODULE:       t03:
960; MODULE-NEXT:    call void @barrier_then_write2(ptr [[P]])
961; MODULE-NEXT:    br label [[T0B3:%.*]]
962; MODULE:       t0b3:
963; MODULE-NEXT:    call void @aligned_barrier()
964; MODULE-NEXT:    br label [[M3:%.*]]
965; MODULE:       f03:
966; MODULE-NEXT:    call void @aligned_barrier()
967; MODULE-NEXT:    call void @barrier_then_write2(ptr [[P]])
968; MODULE-NEXT:    br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
969; MODULE:       t13:
970; MODULE-NEXT:    call void @aligned_barrier()
971; MODULE-NEXT:    br label [[M3]]
972; MODULE:       f13:
973; MODULE-NEXT:    call void @aligned_barrier()
974; MODULE-NEXT:    br label [[M3]]
975; MODULE:       m3:
976; MODULE-NEXT:    call void @write_then_barrier2(ptr [[P]])
977; MODULE-NEXT:    store i32 0, ptr [[P]], align 4
978; MODULE-NEXT:    ret void
979;
980; CGSCC-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_2
981; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) #[[ATTR4]] {
982; CGSCC-NEXT:    call void @barrier_then_write_then_barrier2(ptr [[P]])
983; CGSCC-NEXT:    call void @aligned_barrier()
984; CGSCC-NEXT:    store i32 0, ptr [[P]], align 4
985; CGSCC-NEXT:    br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
986; CGSCC:       t03:
987; CGSCC-NEXT:    call void @barrier_then_write2(ptr [[P]])
988; CGSCC-NEXT:    br label [[T0B3:%.*]]
989; CGSCC:       t0b3:
990; CGSCC-NEXT:    call void @aligned_barrier()
991; CGSCC-NEXT:    br label [[M3:%.*]]
992; CGSCC:       f03:
993; CGSCC-NEXT:    call void @aligned_barrier()
994; CGSCC-NEXT:    call void @barrier_then_write2(ptr [[P]])
995; CGSCC-NEXT:    br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
996; CGSCC:       t13:
997; CGSCC-NEXT:    call void @aligned_barrier()
998; CGSCC-NEXT:    br label [[M3]]
999; CGSCC:       f13:
1000; CGSCC-NEXT:    call void @aligned_barrier()
1001; CGSCC-NEXT:    br label [[M3]]
1002; CGSCC:       m3:
1003; CGSCC-NEXT:    call void @write_then_barrier2(ptr [[P]])
1004; CGSCC-NEXT:    store i32 0, ptr [[P]], align 4
1005; CGSCC-NEXT:    ret void
1006;
1007  call void @barrier_then_write_then_barrier2(ptr %p)
1008  call void @aligned_barrier()
1009  store i32 0, ptr %p
1010  br i1 %c0, label %t03, label %f03
1011t03:
1012  call void @barrier_then_write2(ptr %p)
1013  br label %t0b3
1014t0b3:
1015  call void @aligned_barrier()
1016  br label %m3
1017f03:
1018  call void @aligned_barrier()
1019  call void @barrier_then_write2(ptr %p)
1020  br i1 %c1, label %t13, label %f13
1021t13:
1022  call void @aligned_barrier()
1023  br label %m3
1024f13:
1025  call void @aligned_barrier()
1026  br label %m3
1027m3:
1028  call void @aligned_barrier()
1029  call void @write_then_barrier2(ptr %p)
1030  store i32 0, ptr %p
1031  ret void
1032}
1033
1034; Verify we do not remove the barrier in the callee.
1035define internal void @callee_barrier() {
1036; CHECK-LABEL: define {{[^@]+}}@callee_barrier() {
1037; CHECK-NEXT:    call void @aligned_barrier()
1038; CHECK-NEXT:    ret void
1039;
1040  call void @aligned_barrier()
1041  ret void
1042}
1043define amdgpu_kernel void @caller_barrier1() "kernel" {
1044; CHECK-LABEL: define {{[^@]+}}@caller_barrier1
1045; CHECK-SAME: () #[[ATTR4]] {
1046; CHECK-NEXT:    call void @callee_barrier()
1047; CHECK-NEXT:    ret void
1048;
1049  call void @aligned_barrier()
1050  call void @callee_barrier()
1051  call void @aligned_barrier()
1052  ret void
1053}
1054define amdgpu_kernel void @caller_barrier2() "kernel" {
1055; CHECK-LABEL: define {{[^@]+}}@caller_barrier2
1056; CHECK-SAME: () #[[ATTR4]] {
1057; CHECK-NEXT:    call void @unknown()
1058; CHECK-NEXT:    call void @callee_barrier()
1059; CHECK-NEXT:    call void @unknown()
1060; CHECK-NEXT:    ret void
1061;
1062  call void @unknown()
1063  call void @callee_barrier()
1064  call void @unknown()
1065  ret void
1066}
1067
1068define amdgpu_kernel void @loop_barrier() "kernel" {
1069; CHECK-LABEL: define {{[^@]+}}@loop_barrier
1070; CHECK-SAME: () #[[ATTR4]] {
1071; CHECK-NEXT:  entry:
1072; CHECK-NEXT:    br label [[LOOP:%.*]]
1073; CHECK:       loop:
1074; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
1075; CHECK-NEXT:    call void @unknown()
1076; CHECK-NEXT:    call void @aligned_barrier()
1077; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
1078; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
1079; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
1080; CHECK:       exit:
1081; CHECK-NEXT:    ret void
1082;
1083entry:
1084  br label %loop
1085
1086loop:
1087  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
1088  call void @unknown()
1089  call void @aligned_barrier()
1090  %i.next = add nuw nsw i32 %i, 1
1091  %cond = icmp ne i32 %i.next, 128
1092  br i1 %cond, label %loop, label %exit
1093
1094exit:
1095  ret void
1096}
1097
1098define amdgpu_kernel void @loop_barrier_end_barriers() "kernel" {
1099; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers
1100; CHECK-SAME: () #[[ATTR4]] {
1101; CHECK-NEXT:  entry:
1102; CHECK-NEXT:    br label [[LOOP:%.*]]
1103; CHECK:       loop:
1104; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
1105; CHECK-NEXT:    call void @unknown()
1106; CHECK-NEXT:    call void @aligned_barrier()
1107; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
1108; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
1109; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
1110; CHECK:       exit:
1111; CHECK-NEXT:    ret void
1112;
1113entry:
1114  br label %loop
1115
1116loop:
1117  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
1118  call void @unknown()
1119  call void @aligned_barrier()
1120  %i.next = add nuw nsw i32 %i, 1
1121  %cond = icmp ne i32 %i.next, 128
1122  br i1 %cond, label %loop, label %exit
1123
1124exit:
1125  call void @aligned_barrier()
1126  call void @aligned_barrier()
1127  call void @aligned_barrier()
1128  call void @aligned_barrier()
1129  ret void
1130}
1131
1132define amdgpu_kernel void @loop_barrier_end_barriers_unknown() "kernel" {
1133; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_unknown
1134; CHECK-SAME: () #[[ATTR4]] {
1135; CHECK-NEXT:  entry:
1136; CHECK-NEXT:    br label [[LOOP:%.*]]
1137; CHECK:       loop:
1138; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
1139; CHECK-NEXT:    call void @unknown()
1140; CHECK-NEXT:    call void @aligned_barrier()
1141; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
1142; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
1143; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
1144; CHECK:       exit:
1145; CHECK-NEXT:    call void @unknown()
1146; CHECK-NEXT:    ret void
1147;
1148entry:
1149  br label %loop
1150
1151loop:
1152  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
1153  call void @unknown()
1154  call void @aligned_barrier()
1155  %i.next = add nuw nsw i32 %i, 1
1156  %cond = icmp ne i32 %i.next, 128
1157  br i1 %cond, label %loop, label %exit
1158
1159exit:
1160  call void @aligned_barrier()
1161  call void @aligned_barrier()
1162  call void @unknown()
1163  call void @aligned_barrier()
1164  call void @aligned_barrier()
1165  ret void
1166}
1167
1168define amdgpu_kernel void @loop_barrier_store() "kernel" {
1169; CHECK-LABEL: define {{[^@]+}}@loop_barrier_store
1170; CHECK-SAME: () #[[ATTR4]] {
1171; CHECK-NEXT:  entry:
1172; CHECK-NEXT:    br label [[LOOP:%.*]]
1173; CHECK:       loop:
1174; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
1175; CHECK-NEXT:    store i32 [[I]], ptr @G1, align 4
1176; CHECK-NEXT:    call void @aligned_barrier()
1177; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
1178; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
1179; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
1180; CHECK:       exit:
1181; CHECK-NEXT:    ret void
1182;
1183entry:
1184  br label %loop
1185
1186loop:
1187  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
1188  store i32 %i, ptr @G1
1189  call void @aligned_barrier()
1190  %i.next = add nuw nsw i32 %i, 1
1191  %cond = icmp ne i32 %i.next, 128
1192  br i1 %cond, label %loop, label %exit
1193
1194exit:
1195  ret void
1196}
1197
1198define amdgpu_kernel void @loop_barrier_end_barriers_store() "kernel" {
1199; CHECK-LABEL: define {{[^@]+}}@loop_barrier_end_barriers_store
1200; CHECK-SAME: () #[[ATTR4]] {
1201; CHECK-NEXT:  entry:
1202; CHECK-NEXT:    br label [[LOOP:%.*]]
1203; CHECK:       loop:
1204; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
1205; CHECK-NEXT:    store i32 [[I]], ptr @G1, align 4
1206; CHECK-NEXT:    call void @aligned_barrier()
1207; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
1208; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[I_NEXT]], 128
1209; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
1210; CHECK:       exit:
1211; CHECK-NEXT:    store i32 [[I_NEXT]], ptr @G1, align 4
1212; CHECK-NEXT:    ret void
1213;
1214entry:
1215  br label %loop
1216
1217loop:
1218  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
1219  store i32 %i, ptr @G1
1220  call void @aligned_barrier()
1221  %i.next = add nuw nsw i32 %i, 1
1222  %cond = icmp ne i32 %i.next, 128
1223  br i1 %cond, label %loop, label %exit
1224
1225exit:
1226  call void @aligned_barrier()
1227  call void @aligned_barrier()
1228  store i32 %i.next, ptr @G1
1229  call void @aligned_barrier()
1230  call void @aligned_barrier()
1231  ret void
1232}
1233
1234!llvm.module.flags = !{!16,!15}
1235
1236!15 = !{i32 7, !"openmp", i32 50}
1237!16 = !{i32 7, !"openmp-device", i32 50}
1238;.
1239; MODULE: attributes #[[ATTR0:[0-9]+]] = { "llvm.assume"="ompx_aligned_barrier" }
1240; MODULE: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind }
1241; MODULE: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
1242; MODULE: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
1243; MODULE: attributes #[[ATTR4]] = { "kernel" }
1244; MODULE: attributes #[[ATTR5]] = { nosync memory(none) }
1245;.
1246; CGSCC: attributes #[[ATTR0]] = { "llvm.assume"="ompx_aligned_barrier" }
1247; CGSCC: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind }
1248; CGSCC: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
1249; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
1250; CGSCC: attributes #[[ATTR4]] = { "kernel" }
1251; CGSCC: attributes #[[ATTR5]] = { nosync memory(none) }
1252;.
1253; MODULE: [[META0:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
1254; MODULE: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 50}
1255;.
1256; CGSCC: [[META0:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
1257; CGSCC: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 50}
1258;.
1259