xref: /llvm-project/llvm/test/CodeGen/AMDGPU/set-wave-priority.ll (revision faa2c678aa1963147af35c3700e6b44c264af99f)
1; RUN: llc -mtriple=amdgcn -amdgpu-set-wave-priority=true -o - %s | \
2; RUN:   FileCheck %s
3
4; CHECK-LABEL: no_setprio:
5; CHECK-NOT:       s_setprio
6; CHECK:           ; return to shader part epilog
7define amdgpu_ps <2 x float> @no_setprio(<2 x float> %a, <2 x float> %b) "amdgpu-wave-priority-threshold"="1" {
8  %s = fadd <2 x float> %a, %b
9  ret <2 x float> %s
10}
11
12; CHECK-LABEL: vmem_in_exit_block:
13; CHECK:           s_setprio 3
14; CHECK:           buffer_load_dwordx2
15; CHECK-NEXT:      s_setprio 0
16; CHECK:           ; return to shader part epilog
17define amdgpu_ps <2 x float> @vmem_in_exit_block(ptr addrspace(8) inreg %p, <2 x float> %x) "amdgpu-wave-priority-threshold"="2" {
18  %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0)
19  %s = fadd <2 x float> %v, %x
20  ret <2 x float> %s
21}
22
23; CHECK-LABEL: branch:
24; CHECK:           s_setprio 3
25; CHECK:           s_cbranch_scc0 [[A:.*]]
26; CHECK:       {{.*}}:  ; %b
27; CHECK:           buffer_load_dwordx2
28; CHECK-NEXT:      s_setprio 0
29; CHECK:           s_branch [[EXIT:.*]]
30; CHECK:       [[A]]:  ; %a
31; CHECK-NEXT:      s_setprio 0
32; CHECK:           s_branch [[EXIT]]
33; CHECK-NEXT:  [[EXIT]]:
34define amdgpu_ps <2 x float> @branch(ptr addrspace(8) inreg %p, i32 inreg %i, <2 x float> %x) "amdgpu-wave-priority-threshold"="2" {
35  %cond = icmp eq i32 %i, 0
36  br i1 %cond, label %a, label %b
37
38a:
39  ret <2 x float> <float 0.0, float 0.0>
40
41b:
42  %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0)
43  %s = fadd <2 x float> %v, %x
44  ret <2 x float> %s
45}
46
47; CHECK-LABEL: setprio_follows_setprio:
48; CHECK:           s_setprio 3
49; CHECK:           buffer_load_dwordx2
50; CHECK:           s_cbranch_scc1 [[C:.*]]
51; CHECK:       {{.*}}:  ; %a
52; CHECK:           buffer_load_dwordx2
53; CHECK-NEXT:      s_setprio 0
54; CHECK:           s_cbranch_vccnz [[C]]
55; CHECK:       {{.*}}:  ; %b
56; CHECK-NOT:       s_setprio
57; CHECK:           s_branch [[EXIT:.*]]
58; CHECK:       [[C]]:  ; %c
59; CHECK-NEXT:      s_setprio 0
60; CHECK:           s_branch [[EXIT]]
61; CHECK:       [[EXIT]]:
62define amdgpu_ps <2 x float> @setprio_follows_setprio(ptr addrspace(8) inreg %p, i32 inreg %i) "amdgpu-wave-priority-threshold"="3" {
63entry:
64  %v1 = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0)
65  %cond1 = icmp ne i32 %i, 0
66  br i1 %cond1, label %a, label %c
67
68a:
69  %v2 = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 1, i32 0)
70  %v20 = extractelement <2 x float> %v2, i32 0
71  %v21 = extractelement <2 x float> %v2, i32 1
72  %cond2 = fcmp ult float %v20, %v21
73  br i1 %cond2, label %b, label %c
74
75b:
76  ret <2 x float> %v2
77
78c:
79  %v4 = fadd <2 x float> %v1, %v1
80  ret <2 x float> %v4
81}
82
83; CHECK-LABEL: loop:
84; CHECK:       {{.*}}:  ; %entry
85; CHECK:           s_setprio 3
86; CHECK-NOT:       s_setprio
87; CHECK:       [[LOOP:.*]]:  ; %loop
88; CHECK-NOT:       s_setprio
89; CHECK:           buffer_load_dwordx2
90; CHECK-NOT:       s_setprio
91; CHECK:           s_cbranch_scc1 [[LOOP]]
92; CHECK-NEXT:  {{.*}}:  ; %exit
93; CHECK-NEXT:      s_setprio 0
94define amdgpu_ps <2 x float> @loop(ptr addrspace(8) inreg %p) "amdgpu-wave-priority-threshold"="2" {
95entry:
96  br label %loop
97
98loop:
99  %i = phi i32 [0, %entry], [%i2, %loop]
100  %sum = phi <2 x float> [<float 0.0, float 0.0>, %entry], [%sum2, %loop]
101
102  %i2 = add i32 %i, 1
103
104  %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 %i, i32 0, i32 0, i32 0)
105  %sum2 = fadd <2 x float> %sum, %v
106
107  %cond = icmp ult i32 %i2, 5
108  br i1 %cond, label %loop, label %exit
109
110exit:
111  ret <2 x float> %sum2
112}
113
114; CHECK-LABEL: edge_split:
115; CHECK:           s_setprio 3
116; CHECK:           buffer_load_dwordx2
117; CHECK-NOT:       s_setprio
118; CHECK:           s_cbranch_scc1 [[ANOTHER_LOAD:.*]]
119; CHECK:       {{.*}}:  ; %loop.preheader
120; CHECK-NEXT:      s_setprio 0
121; CHECK:       [[LOOP:.*]]:  ; %loop
122; CHECK-NOT:       s_setprio
123; CHECK:           s_cbranch_scc1 [[LOOP]]
124; CHECK        {{.*}}:  ; %exit
125; CHECK-NOT:       s_setprio
126; CHECK:           s_branch [[RET:.*]]
127; CHECK:       [[ANOTHER_LOAD]]:  ; %another_load
128; CHECK:           buffer_load_dwordx2
129; CHECK-NEXT:      s_setprio 0
130; CHECK:           s_branch [[RET]]
131; CHECK:       [[RET]]:
132define amdgpu_ps <2 x float> @edge_split(ptr addrspace(8) inreg %p, i32 inreg %x) "amdgpu-wave-priority-threshold"="2" {
133entry:
134  %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0)
135  %cond = icmp ne i32 %x, 0
136  br i1 %cond, label %loop, label %another_load
137
138loop:
139  %i = phi i32 [0, %entry], [%i2, %loop]
140  %mul = phi <2 x float> [%v, %entry], [%mul2, %loop]
141
142  %i2 = add i32 %i, 1
143  %mul2 = fmul <2 x float> %mul, %v
144
145  %cond2 = icmp ult i32 %i2, 5
146  br i1 %cond2, label %loop, label %exit
147
148exit:
149  ret <2 x float> %mul2
150
151another_load:
152  %v2 = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 1, i32 0)
153  %sum = fadd <2 x float> %v, %v2
154  ret <2 x float> %sum
155}
156
157; CHECK-LABEL: valu_insts_threshold:
158; CHECK:           s_setprio 3
159; CHECK:           buffer_load_dwordx2
160; CHECK-NEXT:      s_setprio 0
161; CHECK-COUNT-4:   v_add_f32_e32
162; CHECK:           s_cbranch_scc0 [[A:.*]]
163; CHECK:       {{.*}}:  ; %b
164; CHECK-NEXT:      buffer_load_dwordx2
165; CHECK:           s_branch [[END:.*]]
166; CHECK:       [[A]]:  ; %a
167; CHECK:           s_branch [[END]]
168; CHECK:       [[END]]:
169define amdgpu_ps <2 x float> @valu_insts_threshold(ptr addrspace(8) inreg %p, i32 inreg %i) "amdgpu-wave-priority-threshold"="4" {
170  %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0)
171  %add = fadd <2 x float> %v, %v
172  %add2 = fadd <2 x float> %add, %add
173
174  %cond = icmp eq i32 %i, 0
175  br i1 %cond, label %a, label %b
176
177a:
178  ret <2 x float> %add2
179
180b:
181  %v2 = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 1, i32 0, i32 0)
182  %sub = fsub <2 x float> %add2, %v2
183  ret <2 x float> %sub
184}
185
186; CHECK-LABEL: valu_insts_threshold2:
187; CHECK-NOT: s_setprio
188; CHECK: ; -- End function
189define amdgpu_ps <2 x float> @valu_insts_threshold2(ptr addrspace(8) inreg %p, i32 inreg %i) "amdgpu-wave-priority-threshold"="5" {
190  %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0)
191  %add = fadd <2 x float> %v, %v
192  %add2 = fadd <2 x float> %add, %add
193
194  %cond = icmp eq i32 %i, 0
195  br i1 %cond, label %a, label %b
196
197a:
198  ret <2 x float> %add2
199
200b:
201  %v2 = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 1, i32 0, i32 0)
202  %sub = fsub <2 x float> %add2, %v2
203  ret <2 x float> %sub
204}
205
206declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) nounwind
207