xref: /llvm-project/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll (revision 76f722f10c7ac54792821c0a16e47c7d462e53d0)
1; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=OPT %s
2; RUN: opt -mtriple=amdgcn-- -S -passes=structurizecfg,si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=OPT %s
3; RUN: llc -mtriple=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s
4
5
6; OPT-LABEL: @annotate_unreachable_noloop(
7; OPT-NOT: call i1 @llvm.amdgcn.loop
8
9; GCN-LABEL: {{^}}annotate_unreachable_noloop:
10; GCN: s_cbranch_scc1
11; GCN-NOT: s_endpgm
12; GCN: .Lfunc_end0
13define amdgpu_kernel void @annotate_unreachable_noloop(ptr addrspace(1) noalias nocapture readonly %arg) #0 {
14bb:
15  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
16  br label %bb1
17
18bb1:                                              ; preds = %bb
19  %tmp2 = sext i32 %tmp to i64
20  %tmp3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i64 %tmp2
21  %tmp4 = load <4 x float>, ptr addrspace(1) %tmp3, align 16
22  br i1 undef, label %bb5, label %bb3
23
24bb3:                                              ; preds = %bb1
25  %tmp6 = extractelement <4 x float> %tmp4, i32 2
26  %tmp7 = fcmp olt float %tmp6, 0.000000e+00
27  br i1 %tmp7, label %bb4, label %bb5 ; crash goes away if these are swapped
28
29bb4:                                              ; preds = %bb3
30  unreachable
31
32bb5:                                              ; preds = %bb3, %bb1
33  unreachable
34}
35
36
37; OPT-LABEL: @annotate_ret_noloop(
38; OPT-NOT: call i1 @llvm.amdgcn.loop
39
40; GCN-LABEL: {{^}}annotate_ret_noloop:
41; GCN: load_dwordx4
42; GCN: v_cmp_nlt_f32
43; GCN: s_and_saveexec_b64
44; GCN-NEXT: s_endpgm
45; GCN: .Lfunc_end
46define amdgpu_kernel void @annotate_ret_noloop(ptr addrspace(1) noalias nocapture readonly %arg) #0 {
47bb:
48  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
49  br label %bb1
50
51bb1:                                              ; preds = %bb
52  %tmp2 = sext i32 %tmp to i64
53  %tmp3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i64 %tmp2
54  %tmp4 = load <4 x float>, ptr addrspace(1) %tmp3, align 16
55  %tmp5 = extractelement <4 x float> %tmp4, i32 1
56  store volatile <4 x float> %tmp4, ptr addrspace(1) undef
57  %cmp = fcmp ogt float %tmp5, 1.0
58  br i1 %cmp, label %bb5, label %bb3
59
60bb3:                                              ; preds = %bb1
61  %tmp6 = extractelement <4 x float> %tmp4, i32 2
62  %tmp7 = fcmp olt float %tmp6, 0.000000e+00
63  br i1 %tmp7, label %bb4, label %bb5 ; crash goes away if these are swapped
64
65bb4:                                              ; preds = %bb3
66  ret void
67
68bb5:                                              ; preds = %bb3, %bb1
69  ret void
70}
71
72; OPT-LABEL: @uniform_annotate_ret_noloop(
73; OPT-NOT: call i1 @llvm.amdgcn.loop
74
75; GCN-LABEL: {{^}}uniform_annotate_ret_noloop:
76; GCN: s_cbranch_scc1
77; GCN: s_endpgm
78; GCN: .Lfunc_end
79define amdgpu_kernel void @uniform_annotate_ret_noloop(ptr addrspace(1) noalias nocapture readonly %arg, i32 %tmp) #0 {
80bb:
81  br label %bb1
82
83bb1:                                              ; preds = %bb
84  %tmp2 = sext i32 %tmp to i64
85  %tmp3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i64 %tmp2
86  %tmp4 = load <4 x float>, ptr addrspace(1) %tmp3, align 16
87  br i1 undef, label %bb5, label %bb3
88
89bb3:                                              ; preds = %bb1
90  %tmp6 = extractelement <4 x float> %tmp4, i32 2
91  %tmp7 = fcmp olt float %tmp6, 0.000000e+00
92  br i1 %tmp7, label %bb4, label %bb5 ; crash goes away if these are swapped
93
94bb4:                                              ; preds = %bb3
95  ret void
96
97bb5:                                              ; preds = %bb3, %bb1
98  ret void
99}
100
101
102declare i32 @llvm.amdgcn.workitem.id.x() #1
103
104attributes #0 = { nounwind }
105attributes #1 = { nounwind readnone }
106