xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,VI,VI-SAFE %s
3; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,VI,VI-NSZ %s
4
5; --------------------------------------------------------------------------------
6; rcp tests
7; --------------------------------------------------------------------------------
8
9define half @v_fneg_rcp_f16(half %a) #0 {
10; GCN-LABEL: v_fneg_rcp_f16:
11; GCN:       ; %bb.0:
12; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GCN-NEXT:    v_rcp_f16_e64 v0, -v0
14; GCN-NEXT:    s_setpc_b64 s[30:31]
15  %rcp = call half @llvm.amdgcn.rcp.f16(half %a)
16  %fneg = fneg half %rcp
17  ret half %fneg
18}
19
20define half @v_fneg_rcp_fneg_f16(half %a) #0 {
21; GCN-LABEL: v_fneg_rcp_fneg_f16:
22; GCN:       ; %bb.0:
23; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24; GCN-NEXT:    v_rcp_f16_e32 v0, v0
25; GCN-NEXT:    s_setpc_b64 s[30:31]
26  %fneg.a = fneg half %a
27  %rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a)
28  %fneg = fneg half %rcp
29  ret half %fneg
30}
31
32define { half, half } @v_fneg_rcp_store_use_fneg_f16(half %a) #0 {
33; GCN-LABEL: v_fneg_rcp_store_use_fneg_f16:
34; GCN:       ; %bb.0:
35; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36; GCN-NEXT:    v_rcp_f16_e32 v2, v0
37; GCN-NEXT:    v_xor_b32_e32 v1, 0x8000, v0
38; GCN-NEXT:    v_mov_b32_e32 v0, v2
39; GCN-NEXT:    s_setpc_b64 s[30:31]
40  %fneg.a = fneg half %a
41  %rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a)
42  %fneg = fneg half %rcp
43  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
44  %insert.1 = insertvalue { half, half } %insert.0, half %fneg.a, 1
45  ret { half, half } %insert.1
46}
47
48define { half, half } @v_fneg_rcp_multi_use_fneg_f16(half %a, half %c) #0 {
49; GCN-LABEL: v_fneg_rcp_multi_use_fneg_f16:
50; GCN:       ; %bb.0:
51; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52; GCN-NEXT:    v_rcp_f16_e32 v2, v0
53; GCN-NEXT:    v_mul_f16_e64 v1, -v0, v1
54; GCN-NEXT:    v_mov_b32_e32 v0, v2
55; GCN-NEXT:    s_setpc_b64 s[30:31]
56  %fneg.a = fneg half %a
57  %rcp = call half @llvm.amdgcn.rcp.f16(half %fneg.a)
58  %fneg = fneg half %rcp
59  %use1 = fmul half %fneg.a, %c
60  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
61  %insert.1 = insertvalue { half, half } %insert.0, half %use1, 1
62  ret { half, half } %insert.1
63}
64
65; --------------------------------------------------------------------------------
66; sin tests
67; --------------------------------------------------------------------------------
68
69define half @v_fneg_amdgcn_sin_f16(half %a) #0 {
70; GCN-LABEL: v_fneg_amdgcn_sin_f16:
71; GCN:       ; %bb.0:
72; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73; GCN-NEXT:    v_sin_f16_e64 v0, -v0
74; GCN-NEXT:    s_setpc_b64 s[30:31]
75  %sin = call half @llvm.amdgcn.sin.f16(half %a)
76  %fneg = fneg half %sin
77  ret half %fneg
78}
79
80; --------------------------------------------------------------------------------
81; vintrp tests
82; --------------------------------------------------------------------------------
83
84define { float, float } @v_fneg_interp_p1_f16(float %a, float %b) #0 {
85; GCN-LABEL: v_fneg_interp_p1_f16:
86; GCN:       ; %bb.0:
87; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88; GCN-NEXT:    v_mul_f32_e64 v1, v0, -v1
89; GCN-NEXT:    s_mov_b32 m0, 0
90; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
91; GCN-NEXT:    v_interp_p1ll_f16 v0, v1, attr0.x
92; GCN-NEXT:    v_interp_p1ll_f16 v1, v1, attr0.y
93; GCN-NEXT:    s_setpc_b64 s[30:31]
94  %mul = fmul float %a, %b
95  %fneg = fneg float %mul
96  %intrp0 = call float @llvm.amdgcn.interp.p1.f16(float %fneg, i32 0, i32 0, i1 false, i32 0)
97  %intrp1 = call float @llvm.amdgcn.interp.p1.f16(float %fneg, i32 1, i32 0, i1 false, i32 0)
98  %insert.0 = insertvalue { float, float } poison, float %intrp0, 0
99  %insert.1 = insertvalue { float, float } %insert.0, float %intrp1, 1
100  ret { float, float } %insert.1
101}
102
103define { half, half } @v_fneg_interp_p2_f16(float %a, float %b) #0 {
104; GCN-LABEL: v_fneg_interp_p2_f16:
105; GCN:       ; %bb.0:
106; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107; GCN-NEXT:    v_mul_f32_e64 v1, v0, -v1
108; GCN-NEXT:    v_mov_b32_e32 v2, 4.0
109; GCN-NEXT:    s_mov_b32 m0, 0
110; GCN-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
111; GCN-NEXT:    v_interp_p2_f16 v0, v1, attr0.x, v2
112; GCN-NEXT:    v_interp_p2_f16 v1, v1, attr0.y, v2
113; GCN-NEXT:    s_setpc_b64 s[30:31]
114  %mul = fmul float %a, %b
115  %fneg = fneg float %mul
116  %intrp0 = call half @llvm.amdgcn.interp.p2.f16(float 4.0, float %fneg, i32 0, i32 0, i1 false, i32 0)
117  %intrp1 = call half @llvm.amdgcn.interp.p2.f16(float 4.0, float %fneg, i32 1, i32 0, i1 false, i32 0)
118  %insert.0 = insertvalue { half, half } poison, half %intrp0, 0
119  %insert.1 = insertvalue { half, half } %insert.0, half %intrp1, 1
120  ret { half, half } %insert.1
121}
122
123; --------------------------------------------------------------------------------
124; arithmetic.fence tests
125; --------------------------------------------------------------------------------
126
127; FIXME: Legalization/promote is broken
128define half @v_fneg_arithmetic_fence_f16(half %a) #0 {
129; GCN-LABEL: v_fneg_arithmetic_fence_f16:
130; GCN:       ; %bb.0:
131; GCN-NEXT:    ;ARITH_FENCE
132; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133; GCN-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
134; GCN-NEXT:    s_setpc_b64 s[30:31]
135  %fence = call half @llvm.arithmetic.fence.f16(half %a)
136  %fneg = fneg half %fence
137  ret half %fneg
138}
139
140define half @v_fneg_arithmetic_fence_fmul_f16(half %a, half %b) #0 {
141; GCN-LABEL: v_fneg_arithmetic_fence_fmul_f16:
142; GCN:       ; %bb.0:
143; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
144; GCN-NEXT:    v_mul_f16_e32 v0, v0, v1
145; GCN-NEXT:    ;ARITH_FENCE
146; GCN-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
147; GCN-NEXT:    s_setpc_b64 s[30:31]
148  %mul = fmul half %a, %b
149  %fence = call half @llvm.arithmetic.fence.f16(half %mul)
150  %fneg = fneg half %fence
151  ret half %fneg
152}
153
154declare half @llvm.amdgcn.rcp.f16(half) #1
155declare half @llvm.amdgcn.sin.f16(half) #1
156declare half @llvm.arithmetic.fence.f16(half) #1
157declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32) #0
158declare half @llvm.amdgcn.interp.p2.f16(float, float, i32, i32, i1, i32) #0
159
160attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
161attributes #1 = { nounwind readnone }
162attributes #2 = { nounwind "unsafe-fp-math"="true" }
163attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
164attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
165;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
166; GCN-NSZ: {{.*}}
167; GCN-SAFE: {{.*}}
168; VI: {{.*}}
169; VI-NSZ: {{.*}}
170; VI-SAFE: {{.*}}
171