xref: /llvm-project/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
3; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
5
6define amdgpu_kernel void @br_cc_f16(
7; SI-LABEL: br_cc_f16:
8; SI:       ; %bb.0: ; %entry
9; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
10; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
11; SI-NEXT:    s_mov_b32 s7, 0xf000
12; SI-NEXT:    s_mov_b32 s6, -1
13; SI-NEXT:    s_mov_b32 s10, s6
14; SI-NEXT:    s_waitcnt lgkmcnt(0)
15; SI-NEXT:    s_mov_b32 s4, s2
16; SI-NEXT:    s_mov_b32 s5, s3
17; SI-NEXT:    s_mov_b32 s11, s7
18; SI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0 glc
19; SI-NEXT:    s_waitcnt vmcnt(0)
20; SI-NEXT:    buffer_load_ushort v1, off, s[8:11], 0 glc
21; SI-NEXT:    s_waitcnt vmcnt(0)
22; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
23; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
24; SI-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v1
25; SI-NEXT:    s_cbranch_vccnz .LBB0_2
26; SI-NEXT:  ; %bb.1: ; %one
27; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
28; SI-NEXT:    s_branch .LBB0_3
29; SI-NEXT:  .LBB0_2: ; %two
30; SI-NEXT:    v_cvt_f16_f32_e32 v0, v1
31; SI-NEXT:  .LBB0_3: ; %one
32; SI-NEXT:    s_mov_b32 s2, s6
33; SI-NEXT:    s_mov_b32 s3, s7
34; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
35; SI-NEXT:    s_endpgm
36;
37; VI-LABEL: br_cc_f16:
38; VI:       ; %bb.0: ; %entry
39; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
40; VI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x34
41; VI-NEXT:    s_mov_b32 s7, 0xf000
42; VI-NEXT:    s_mov_b32 s6, -1
43; VI-NEXT:    s_mov_b32 s10, s6
44; VI-NEXT:    s_waitcnt lgkmcnt(0)
45; VI-NEXT:    s_mov_b32 s4, s2
46; VI-NEXT:    s_mov_b32 s5, s3
47; VI-NEXT:    s_mov_b32 s11, s7
48; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0 glc
49; VI-NEXT:    s_waitcnt vmcnt(0)
50; VI-NEXT:    buffer_load_ushort v1, off, s[8:11], 0 glc
51; VI-NEXT:    s_waitcnt vmcnt(0)
52; VI-NEXT:    s_mov_b32 s2, s6
53; VI-NEXT:    s_mov_b32 s3, s7
54; VI-NEXT:    v_cmp_nlt_f16_e32 vcc, v0, v1
55; VI-NEXT:    s_cbranch_vccnz .LBB0_2
56; VI-NEXT:  ; %bb.1: ; %one
57; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
58; VI-NEXT:    s_endpgm
59; VI-NEXT:  .LBB0_2: ; %two
60; VI-NEXT:    buffer_store_short v1, off, s[0:3], 0
61; VI-NEXT:    s_endpgm
62;
63; GFX11-LABEL: br_cc_f16:
64; GFX11:       ; %bb.0: ; %entry
65; GFX11-NEXT:    s_clause 0x1
66; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
67; GFX11-NEXT:    s_load_b64 s[8:9], s[4:5], 0x34
68; GFX11-NEXT:    s_mov_b32 s6, -1
69; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
70; GFX11-NEXT:    s_mov_b32 s10, s6
71; GFX11-NEXT:    s_mov_b32 s11, s7
72; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
73; GFX11-NEXT:    s_mov_b32 s4, s2
74; GFX11-NEXT:    s_mov_b32 s5, s3
75; GFX11-NEXT:    buffer_load_u16 v0, off, s[4:7], 0 glc dlc
76; GFX11-NEXT:    s_waitcnt vmcnt(0)
77; GFX11-NEXT:    buffer_load_u16 v1, off, s[8:11], 0 glc dlc
78; GFX11-NEXT:    s_waitcnt vmcnt(0)
79; GFX11-NEXT:    s_mov_b32 s2, s6
80; GFX11-NEXT:    s_mov_b32 s3, s7
81; GFX11-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v0, v1
82; GFX11-NEXT:    s_cbranch_vccnz .LBB0_2
83; GFX11-NEXT:  ; %bb.1: ; %one
84; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
85; GFX11-NEXT:    s_endpgm
86; GFX11-NEXT:  .LBB0_2: ; %two
87; GFX11-NEXT:    buffer_store_b16 v1, off, s[0:3], 0
88; GFX11-NEXT:    s_endpgm
89    ptr addrspace(1) %r,
90    ptr addrspace(1) %a,
91    ptr addrspace(1) %b) {
92entry:
93  %a.val = load volatile half, ptr addrspace(1) %a
94  %b.val = load volatile half, ptr addrspace(1) %b
95  %fcmp = fcmp olt half %a.val, %b.val
96  br i1 %fcmp, label %one, label %two
97
98one:
99  store half %a.val, ptr addrspace(1) %r
100  ret void
101
102two:
103  store half %b.val, ptr addrspace(1) %r
104  ret void
105}
106
107define amdgpu_kernel void @br_cc_f16_imm_a(
108; SI-LABEL: br_cc_f16_imm_a:
109; SI:       ; %bb.0: ; %entry
110; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
111; SI-NEXT:    s_mov_b32 s7, 0xf000
112; SI-NEXT:    s_mov_b32 s6, -1
113; SI-NEXT:    s_waitcnt lgkmcnt(0)
114; SI-NEXT:    s_mov_b32 s4, s2
115; SI-NEXT:    s_mov_b32 s5, s3
116; SI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
117; SI-NEXT:    s_waitcnt vmcnt(0)
118; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
119; SI-NEXT:    v_cmp_nlt_f32_e32 vcc, 0.5, v0
120; SI-NEXT:    s_cbranch_vccnz .LBB1_2
121; SI-NEXT:  ; %bb.1: ; %one
122; SI-NEXT:    s_mov_b32 s2, s6
123; SI-NEXT:    s_mov_b32 s3, s7
124; SI-NEXT:    v_mov_b32_e32 v0, 0x3800
125; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
126; SI-NEXT:    s_endpgm
127; SI-NEXT:  .LBB1_2: ; %two
128; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
129; SI-NEXT:    s_mov_b32 s2, s6
130; SI-NEXT:    s_mov_b32 s3, s7
131; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
132; SI-NEXT:    s_endpgm
133;
134; VI-LABEL: br_cc_f16_imm_a:
135; VI:       ; %bb.0: ; %entry
136; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
137; VI-NEXT:    s_mov_b32 s7, 0xf000
138; VI-NEXT:    s_mov_b32 s6, -1
139; VI-NEXT:    s_waitcnt lgkmcnt(0)
140; VI-NEXT:    s_mov_b32 s4, s2
141; VI-NEXT:    s_mov_b32 s5, s3
142; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
143; VI-NEXT:    s_mov_b32 s2, s6
144; VI-NEXT:    s_mov_b32 s3, s7
145; VI-NEXT:    s_waitcnt vmcnt(0)
146; VI-NEXT:    v_cmp_nlt_f16_e32 vcc, 0.5, v0
147; VI-NEXT:    s_cbranch_vccnz .LBB1_2
148; VI-NEXT:  ; %bb.1: ; %one
149; VI-NEXT:    v_mov_b32_e32 v0, 0x3800
150; VI-NEXT:  .LBB1_2: ; %two
151; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
152; VI-NEXT:    s_endpgm
153;
154; GFX11-LABEL: br_cc_f16_imm_a:
155; GFX11:       ; %bb.0: ; %entry
156; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
157; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
158; GFX11-NEXT:    s_mov_b32 s6, -1
159; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
160; GFX11-NEXT:    s_mov_b32 s4, s2
161; GFX11-NEXT:    s_mov_b32 s5, s3
162; GFX11-NEXT:    buffer_load_u16 v0, off, s[4:7], 0
163; GFX11-NEXT:    s_waitcnt vmcnt(0)
164; GFX11-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, 0.5, v0
165; GFX11-NEXT:    s_cbranch_vccnz .LBB1_2
166; GFX11-NEXT:  ; %bb.1: ; %one
167; GFX11-NEXT:    v_mov_b32_e32 v0, 0x3800
168; GFX11-NEXT:  .LBB1_2: ; %two
169; GFX11-NEXT:    s_mov_b32 s2, s6
170; GFX11-NEXT:    s_mov_b32 s3, s7
171; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
172; GFX11-NEXT:    s_endpgm
173    ptr addrspace(1) %r,
174    ptr addrspace(1) %b) {
175entry:
176  %b.val = load half, ptr addrspace(1) %b
177  %fcmp = fcmp olt half 0xH3800, %b.val
178  br i1 %fcmp, label %one, label %two
179
180one:
181  store half 0xH3800, ptr addrspace(1) %r
182  ret void
183
184two:
185  store half %b.val, ptr addrspace(1) %r
186  ret void
187}
188
189define amdgpu_kernel void @br_cc_f16_imm_b(
190; SI-LABEL: br_cc_f16_imm_b:
191; SI:       ; %bb.0: ; %entry
192; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
193; SI-NEXT:    s_mov_b32 s7, 0xf000
194; SI-NEXT:    s_mov_b32 s6, -1
195; SI-NEXT:    s_waitcnt lgkmcnt(0)
196; SI-NEXT:    s_mov_b32 s4, s2
197; SI-NEXT:    s_mov_b32 s5, s3
198; SI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
199; SI-NEXT:    s_waitcnt vmcnt(0)
200; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
201; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0.5, v0
202; SI-NEXT:    s_cbranch_vccnz .LBB2_2
203; SI-NEXT:  ; %bb.1: ; %one
204; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
205; SI-NEXT:    s_mov_b32 s2, s6
206; SI-NEXT:    s_mov_b32 s3, s7
207; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
208; SI-NEXT:    s_endpgm
209; SI-NEXT:  .LBB2_2: ; %two
210; SI-NEXT:    s_mov_b32 s2, s6
211; SI-NEXT:    s_mov_b32 s3, s7
212; SI-NEXT:    v_mov_b32_e32 v0, 0x3800
213; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
214; SI-NEXT:    s_endpgm
215;
216; VI-LABEL: br_cc_f16_imm_b:
217; VI:       ; %bb.0: ; %entry
218; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
219; VI-NEXT:    s_mov_b32 s7, 0xf000
220; VI-NEXT:    s_mov_b32 s6, -1
221; VI-NEXT:    s_waitcnt lgkmcnt(0)
222; VI-NEXT:    s_mov_b32 s4, s2
223; VI-NEXT:    s_mov_b32 s5, s3
224; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
225; VI-NEXT:    s_mov_b32 s2, s6
226; VI-NEXT:    s_mov_b32 s3, s7
227; VI-NEXT:    s_waitcnt vmcnt(0)
228; VI-NEXT:    v_cmp_ngt_f16_e32 vcc, 0.5, v0
229; VI-NEXT:    s_cbranch_vccnz .LBB2_2
230; VI-NEXT:  ; %bb.1: ; %one
231; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
232; VI-NEXT:    s_endpgm
233; VI-NEXT:  .LBB2_2: ; %two
234; VI-NEXT:    v_mov_b32_e32 v0, 0x3800
235; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
236; VI-NEXT:    s_endpgm
237;
238; GFX11-LABEL: br_cc_f16_imm_b:
239; GFX11:       ; %bb.0: ; %entry
240; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
241; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
242; GFX11-NEXT:    s_mov_b32 s6, -1
243; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
244; GFX11-NEXT:    s_mov_b32 s4, s2
245; GFX11-NEXT:    s_mov_b32 s5, s3
246; GFX11-NEXT:    buffer_load_u16 v0, off, s[4:7], 0
247; GFX11-NEXT:    s_waitcnt vmcnt(0)
248; GFX11-NEXT:    v_cmp_ngt_f16_e32 vcc_lo, 0.5, v0
249; GFX11-NEXT:    s_cbranch_vccz .LBB2_2
250; GFX11-NEXT:  ; %bb.1: ; %two
251; GFX11-NEXT:    v_mov_b32_e32 v0, 0x3800
252; GFX11-NEXT:  .LBB2_2: ; %one
253; GFX11-NEXT:    s_mov_b32 s2, s6
254; GFX11-NEXT:    s_mov_b32 s3, s7
255; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
256; GFX11-NEXT:    s_endpgm
257    ptr addrspace(1) %r,
258    ptr addrspace(1) %a) {
259entry:
260  %a.val = load half, ptr addrspace(1) %a
261  %fcmp = fcmp olt half %a.val, 0xH3800
262  br i1 %fcmp, label %one, label %two
263
264one:
265  store half %a.val, ptr addrspace(1) %r
266  ret void
267
268two:
269  store half 0xH3800, ptr addrspace(1) %r
270  ret void
271}
272