xref: /llvm-project/llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll (revision 2d6d723a85c2d007b0359c206d66cd2e5a9f00e1)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,GFX1150 %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -global-isel -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,GFX1150 %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,GFX12 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -global-isel -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,GFX12 %s
6
7define amdgpu_vs float @fadd_f32(float inreg %a, float inreg %b) {
8; CHECK-LABEL: fadd_f32:
9; CHECK:       ; %bb.0:
10; CHECK-NEXT:    s_add_f32 s0, s0, s1
11; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
12; CHECK-NEXT:    v_mov_b32_e32 v0, s0
13; CHECK-NEXT:    ; return to shader part epilog
14   %add = fadd float %a, %b
15   ret float %add
16}
17
18define amdgpu_vs float @fsub_f32(float inreg %a, float inreg %b) {
19; CHECK-LABEL: fsub_f32:
20; CHECK:       ; %bb.0:
21; CHECK-NEXT:    s_sub_f32 s0, s0, s1
22; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
23; CHECK-NEXT:    v_mov_b32_e32 v0, s0
24; CHECK-NEXT:    ; return to shader part epilog
25   %sub = fsub float %a, %b
26   ret float %sub
27}
28
29define amdgpu_vs float @fmul_f32(float inreg %a, float inreg %b) {
30; CHECK-LABEL: fmul_f32:
31; CHECK:       ; %bb.0:
32; CHECK-NEXT:    s_mul_f32 s0, s0, s1
33; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
34; CHECK-NEXT:    v_mov_b32_e32 v0, s0
35; CHECK-NEXT:    ; return to shader part epilog
36   %mul = fmul float %a, %b
37   ret float %mul
38}
39
40define amdgpu_vs float @fmin_f32(float inreg %a, float inreg %b) {
41; GFX1150-LABEL: fmin_f32:
42; GFX1150:       ; %bb.0:
43; GFX1150-NEXT:    s_min_f32 s0, s0, s1
44; GFX1150-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
45; GFX1150-NEXT:    v_mov_b32_e32 v0, s0
46; GFX1150-NEXT:    ; return to shader part epilog
47;
48; GFX12-LABEL: fmin_f32:
49; GFX12:       ; %bb.0:
50; GFX12-NEXT:    s_min_num_f32 s0, s0, s1
51; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
52; GFX12-NEXT:    v_mov_b32_e32 v0, s0
53; GFX12-NEXT:    ; return to shader part epilog
54   %min = call float @llvm.minnum.f32(float %a, float %b)
55   ret float %min
56}
57
58define amdgpu_vs float @fmax_f32(float inreg %a, float inreg %b) {
59; GFX1150-LABEL: fmax_f32:
60; GFX1150:       ; %bb.0:
61; GFX1150-NEXT:    s_max_f32 s0, s0, s1
62; GFX1150-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
63; GFX1150-NEXT:    v_mov_b32_e32 v0, s0
64; GFX1150-NEXT:    ; return to shader part epilog
65;
66; GFX12-LABEL: fmax_f32:
67; GFX12:       ; %bb.0:
68; GFX12-NEXT:    s_max_num_f32 s0, s0, s1
69; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
70; GFX12-NEXT:    v_mov_b32_e32 v0, s0
71; GFX12-NEXT:    ; return to shader part epilog
72   %max = call float @llvm.maxnum.f32(float %a, float %b)
73   ret float %max
74}
75
76define amdgpu_vs half @fadd_f16(half inreg %a, half inreg %b) {
77; CHECK-LABEL: fadd_f16:
78; CHECK:       ; %bb.0:
79; CHECK-NEXT:    s_add_f16 s0, s0, s1
80; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
81; CHECK-NEXT:    v_mov_b32_e32 v0, s0
82; CHECK-NEXT:    ; return to shader part epilog
83   %add = fadd half %a, %b
84   ret half %add
85}
86
87define amdgpu_vs half @fsub_f16(half inreg %a, half inreg %b) {
88; CHECK-LABEL: fsub_f16:
89; CHECK:       ; %bb.0:
90; CHECK-NEXT:    s_sub_f16 s0, s0, s1
91; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
92; CHECK-NEXT:    v_mov_b32_e32 v0, s0
93; CHECK-NEXT:    ; return to shader part epilog
94   %sub = fsub half %a, %b
95   ret half %sub
96}
97
98define amdgpu_vs half @fmul_f16(half inreg %a, half inreg %b) {
99; CHECK-LABEL: fmul_f16:
100; CHECK:       ; %bb.0:
101; CHECK-NEXT:    s_mul_f16 s0, s0, s1
102; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
103; CHECK-NEXT:    v_mov_b32_e32 v0, s0
104; CHECK-NEXT:    ; return to shader part epilog
105   %mul = fmul half %a, %b
106   ret half %mul
107}
108
109define amdgpu_vs half @fmin_f16(half inreg %a, half inreg %b) {
110; GFX1150-LABEL: fmin_f16:
111; GFX1150:       ; %bb.0:
112; GFX1150-NEXT:    s_min_f16 s0, s0, s1
113; GFX1150-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
114; GFX1150-NEXT:    v_mov_b32_e32 v0, s0
115; GFX1150-NEXT:    ; return to shader part epilog
116;
117; GFX12-LABEL: fmin_f16:
118; GFX12:       ; %bb.0:
119; GFX12-NEXT:    s_min_num_f16 s0, s0, s1
120; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
121; GFX12-NEXT:    v_mov_b32_e32 v0, s0
122; GFX12-NEXT:    ; return to shader part epilog
123   %min = call half @llvm.minnum.f16(half %a, half %b)
124   ret half %min
125}
126
127define amdgpu_vs half @fmax_f16(half inreg %a, half inreg %b) {
128; GFX1150-LABEL: fmax_f16:
129; GFX1150:       ; %bb.0:
130; GFX1150-NEXT:    s_max_f16 s0, s0, s1
131; GFX1150-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
132; GFX1150-NEXT:    v_mov_b32_e32 v0, s0
133; GFX1150-NEXT:    ; return to shader part epilog
134;
135; GFX12-LABEL: fmax_f16:
136; GFX12:       ; %bb.0:
137; GFX12-NEXT:    s_max_num_f16 s0, s0, s1
138; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
139; GFX12-NEXT:    v_mov_b32_e32 v0, s0
140; GFX12-NEXT:    ; return to shader part epilog
141   %max = call half @llvm.maxnum.f16(half %a, half %b)
142   ret half %max
143}
144
145define amdgpu_vs <2 x half> @s_cvt_pkrtz_v2f16_f32(float inreg %x, float inreg %y) {
146; CHECK-LABEL: s_cvt_pkrtz_v2f16_f32:
147; CHECK:       ; %bb.0:
148; CHECK-NEXT:    s_cvt_pk_rtz_f16_f32 s0, s0, s1
149; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
150; CHECK-NEXT:    v_mov_b32_e32 v0, s0
151; CHECK-NEXT:    ; return to shader part epilog
152  %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
153  ret <2 x half> %result
154}
155
156define amdgpu_vs float @fmac_f32(float inreg %a, float inreg %b, float inreg %c) {
157; CHECK-LABEL: fmac_f32:
158; CHECK:       ; %bb.0:
159; CHECK-NEXT:    s_fmac_f32 s0, s1, s2
160; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
161; CHECK-NEXT:    v_mov_b32_e32 v0, s0
162; CHECK-NEXT:    ; return to shader part epilog
163  %res = call float @llvm.fma.f32(float %b, float %c, float %a)
164  ret float %res
165}
166
167; Check selection of mov + fmac if src2 of fmac has a use later on
168define amdgpu_vs float @fmac_f32_with_mov(float inreg %a, float inreg %b, float inreg %c) {
169; CHECK-LABEL: fmac_f32_with_mov:
170; CHECK:       ; %bb.0:
171; CHECK-NEXT:    s_mov_b32 s3, s2
172; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
173; CHECK-NEXT:    s_fmac_f32 s3, s0, s1
174; CHECK-NEXT:    s_add_f32 s0, s3, s2
175; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
176; CHECK-NEXT:    v_mov_b32_e32 v0, s0
177; CHECK-NEXT:    ; return to shader part epilog
178  %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
179  %res = fadd float %fma, %c
180  ret float %res
181}
182
183define amdgpu_vs half @fmac_f16(half inreg %a, half inreg %b, half inreg %c) {
184; CHECK-LABEL: fmac_f16:
185; CHECK:       ; %bb.0:
186; CHECK-NEXT:    s_fmac_f16 s0, s1, s2
187; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
188; CHECK-NEXT:    v_mov_b32_e32 v0, s0
189; CHECK-NEXT:    ; return to shader part epilog
190  %res = call half @llvm.fma.f16(half %b, half %c, half %a)
191  ret half %res
192}
193
194; Check selection of mov + fmac if src2 of fmac has a use later
195define amdgpu_vs half @fmac_f16_with_mov(half inreg %a, half inreg %b, half inreg %c) {
196; CHECK-LABEL: fmac_f16_with_mov:
197; CHECK:       ; %bb.0:
198; CHECK-NEXT:    s_mov_b32 s3, s2
199; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
200; CHECK-NEXT:    s_fmac_f16 s3, s0, s1
201; CHECK-NEXT:    s_add_f16 s0, s3, s2
202; CHECK-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
203; CHECK-NEXT:    v_mov_b32_e32 v0, s0
204; CHECK-NEXT:    ; return to shader part epilog
205  %fma = call half @llvm.fma.f16(half %a, half %b, half %c)
206  %res = fadd half %fma, %c
207  ret half %res
208}
209
210; Regression test for crash in SIFoldOperands
211define amdgpu_ps float @_amdgpu_ps_main() {
212; GFX1150-LABEL: _amdgpu_ps_main:
213; GFX1150:       ; %bb.0: ; %bb
214; GFX1150-NEXT:    s_mov_b32 s0, 0
215; GFX1150-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
216; GFX1150-NEXT:    s_mov_b32 s1, s0
217; GFX1150-NEXT:    s_mov_b32 s2, s0
218; GFX1150-NEXT:    s_mov_b32 s3, s0
219; GFX1150-NEXT:    s_buffer_load_b64 s[0:1], s[0:3], 0x0
220; GFX1150-NEXT:    s_waitcnt lgkmcnt(0)
221; GFX1150-NEXT:    s_fmamk_f32 s0, s1, 0x40800000, s0
222; GFX1150-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
223; GFX1150-NEXT:    v_mov_b32_e32 v0, s0
224; GFX1150-NEXT:    ; return to shader part epilog
225;
226; GFX12-LABEL: _amdgpu_ps_main:
227; GFX12:       ; %bb.0: ; %bb
228; GFX12-NEXT:    s_mov_b32 s0, 0
229; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
230; GFX12-NEXT:    s_mov_b32 s1, s0
231; GFX12-NEXT:    s_mov_b32 s2, s0
232; GFX12-NEXT:    s_mov_b32 s3, s0
233; GFX12-NEXT:    s_buffer_load_b64 s[0:1], s[0:3], 0x0
234; GFX12-NEXT:    s_wait_kmcnt 0x0
235; GFX12-NEXT:    s_fmamk_f32 s0, s1, 0x40800000, s0
236; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
237; GFX12-NEXT:    v_mov_b32_e32 v0, s0
238; GFX12-NEXT:    ; return to shader part epilog
239bb:
240  %i = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> zeroinitializer, i32 0, i32 0)
241  %i1 = bitcast i32 %i to float
242  %i2 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> zeroinitializer, i32 4, i32 0)
243  %i3 = bitcast i32 %i2 to float
244  %i4 = fmul contract float %i3, 4.0
245  %i5 = fadd contract float %i4, %i1
246  ret float %i5
247}
248
249declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
250declare float @llvm.minnum.f32(float, float)
251declare float @llvm.maxnum.f32(float, float)
252declare half @llvm.minnum.f16(half, half)
253declare half @llvm.maxnum.f16(half, half)
254declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float)
255declare float @llvm.fma.f32(float, float, float) nounwind readnone
256declare half @llvm.fma.f16(half, half, half) nounwind readnone
257