xref: /llvm-project/llvm/test/CodeGen/AMDGPU/strict_fpext.ll (revision 11bf02e0192aea0ddef9a81098c2162cde82dc7e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI %s
3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX8 %s
4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX9 %s
5; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
6; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
7
8define float @v_constrained_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 {
9; SI-LABEL: v_constrained_fpext_f16_to_f32_fpexcept_strict:
10; SI:       ; %bb.0:
11; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
13; SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
14; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
15; SI-NEXT:    s_setpc_b64 s[30:31]
16;
17; GFX89-LABEL: v_constrained_fpext_f16_to_f32_fpexcept_strict:
18; GFX89:       ; %bb.0:
19; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GFX89-NEXT:    v_cvt_f32_f16_e32 v0, v0
21; GFX89-NEXT:    s_setpc_b64 s[30:31]
22;
23; GFX1011-LABEL: v_constrained_fpext_f16_to_f32_fpexcept_strict:
24; GFX1011:       ; %bb.0:
25; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26; GFX1011-NEXT:    v_cvt_f32_f16_e32 v0, v0
27; GFX1011-NEXT:    s_setpc_b64 s[30:31]
28  %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict")
29  ret float %result
30}
31
32define <2 x float> @v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict(<2 x half> %arg) #0 {
33; SI-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict:
34; SI:       ; %bb.0:
35; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
37; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
38; SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
39; SI-NEXT:    v_and_b32_e32 v1, 0xffff, v1
40; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
41; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
42; SI-NEXT:    s_setpc_b64 s[30:31]
43;
44; GFX89-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict:
45; GFX89:       ; %bb.0:
46; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47; GFX89-NEXT:    v_cvt_f32_f16_e32 v2, v0
48; GFX89-NEXT:    v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
49; GFX89-NEXT:    v_mov_b32_e32 v0, v2
50; GFX89-NEXT:    s_setpc_b64 s[30:31]
51;
52; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict:
53; GFX10:       ; %bb.0:
54; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, v0
56; GFX10-NEXT:    v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
57; GFX10-NEXT:    v_mov_b32_e32 v0, v2
58; GFX10-NEXT:    s_setpc_b64 s[30:31]
59;
60; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict:
61; GFX11:       ; %bb.0:
62; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
64; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
65; GFX11-NEXT:    v_cvt_f32_f16_e32 v1, v1
66; GFX11-NEXT:    s_setpc_b64 s[30:31]
67  %result = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %arg, metadata !"fpexcept.strict")
68  ret <2 x float>   %result
69}
70
71define <3 x float> @v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict(<3 x half> %arg) #0 {
72; SI-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict:
73; SI:       ; %bb.0:
74; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
76; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
77; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
78; SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
79; SI-NEXT:    v_and_b32_e32 v1, 0xffff, v1
80; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
81; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
82; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
83; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
84; SI-NEXT:    s_setpc_b64 s[30:31]
85;
86; GFX89-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict:
87; GFX89:       ; %bb.0:
88; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89; GFX89-NEXT:    v_cvt_f32_f16_e32 v4, v0
90; GFX89-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
91; GFX89-NEXT:    v_cvt_f32_f16_e32 v2, v1
92; GFX89-NEXT:    v_mov_b32_e32 v0, v4
93; GFX89-NEXT:    v_mov_b32_e32 v1, v3
94; GFX89-NEXT:    s_setpc_b64 s[30:31]
95;
96; GFX10-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict:
97; GFX10:       ; %bb.0:
98; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99; GFX10-NEXT:    v_cvt_f32_f16_e32 v4, v0
100; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
101; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, v1
102; GFX10-NEXT:    v_mov_b32_e32 v0, v4
103; GFX10-NEXT:    v_mov_b32_e32 v1, v3
104; GFX10-NEXT:    s_setpc_b64 s[30:31]
105;
106; GFX11-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict:
107; GFX11:       ; %bb.0:
108; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
110; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
111; GFX11-NEXT:    v_cvt_f32_f16_e32 v3, v2
112; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, v1
113; GFX11-NEXT:    v_mov_b32_e32 v1, v3
114; GFX11-NEXT:    s_setpc_b64 s[30:31]
115  %result = call <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half> %arg, metadata !"fpexcept.strict")
116  ret <3 x float>   %result
117}
118
119define double @v_constrained_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 {
120; GCN-LABEL: v_constrained_fpext_f32_to_f64_fpexcept_strict:
121; GCN:       ; %bb.0:
122; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123; GCN-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
124; GCN-NEXT:    s_setpc_b64 s[30:31]
125  %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict")
126  ret double %result
127}
128
129define <2 x double> @v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict(<2 x float> %arg) #0 {
130; GCN-LABEL: v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict:
131; GCN:       ; %bb.0:
132; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133; GCN-NEXT:    v_mov_b32_e32 v2, v1
134; GCN-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
135; GCN-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
136; GCN-NEXT:    s_setpc_b64 s[30:31]
137  %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float> %arg, metadata !"fpexcept.strict")
138  ret <2 x double>   %result
139}
140
141define <3 x double> @v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict(<3 x float> %arg) #0 {
142; SI-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict:
143; SI:       ; %bb.0:
144; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145; SI-NEXT:    v_mov_b32_e32 v4, v2
146; SI-NEXT:    v_mov_b32_e32 v2, v1
147; SI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
148; SI-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
149; SI-NEXT:    v_cvt_f64_f32_e32 v[4:5], v4
150; SI-NEXT:    s_setpc_b64 s[30:31]
151;
152; GFX89-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict:
153; GFX89:       ; %bb.0:
154; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155; GFX89-NEXT:    v_mov_b32_e32 v4, v2
156; GFX89-NEXT:    v_mov_b32_e32 v2, v1
157; GFX89-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
158; GFX89-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
159; GFX89-NEXT:    v_cvt_f64_f32_e32 v[4:5], v4
160; GFX89-NEXT:    s_setpc_b64 s[30:31]
161;
162; GFX1011-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict:
163; GFX1011:       ; %bb.0:
164; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165; GFX1011-NEXT:    v_mov_b32_e32 v4, v2
166; GFX1011-NEXT:    v_mov_b32_e32 v2, v1
167; GFX1011-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
168; GFX1011-NEXT:    v_cvt_f64_f32_e32 v[4:5], v4
169; GFX1011-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
170; GFX1011-NEXT:    s_setpc_b64 s[30:31]
171  %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float> %arg, metadata !"fpexcept.strict")
172  ret <3 x double>   %result
173}
174
175define double @v_constrained_fpext_f16_to_f64_fpexcept_strict(half %arg) #0 {
176; SI-LABEL: v_constrained_fpext_f16_to_f64_fpexcept_strict:
177; SI:       ; %bb.0:
178; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
180; SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
181; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
182; SI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
183; SI-NEXT:    s_setpc_b64 s[30:31]
184;
185; GFX89-LABEL: v_constrained_fpext_f16_to_f64_fpexcept_strict:
186; GFX89:       ; %bb.0:
187; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
188; GFX89-NEXT:    v_cvt_f32_f16_e32 v0, v0
189; GFX89-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
190; GFX89-NEXT:    s_setpc_b64 s[30:31]
191;
192; GFX1011-LABEL: v_constrained_fpext_f16_to_f64_fpexcept_strict:
193; GFX1011:       ; %bb.0:
194; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195; GFX1011-NEXT:    v_cvt_f32_f16_e32 v0, v0
196; GFX1011-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
197; GFX1011-NEXT:    s_setpc_b64 s[30:31]
198  %result = call double @llvm.experimental.constrained.fpext.f64.f16(half %arg, metadata !"fpexcept.strict")
199  ret double %result
200}
201
202define <2 x double> @v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict(<2 x half> %arg) #0 {
203; SI-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict:
204; SI:       ; %bb.0:
205; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
207; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
208; SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
209; SI-NEXT:    v_and_b32_e32 v1, 0xffff, v1
210; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
211; SI-NEXT:    v_cvt_f32_f16_e32 v2, v1
212; SI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
213; SI-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
214; SI-NEXT:    s_setpc_b64 s[30:31]
215;
216; GFX89-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict:
217; GFX89:       ; %bb.0:
218; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
219; GFX89-NEXT:    v_cvt_f32_f16_e32 v1, v0
220; GFX89-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
221; GFX89-NEXT:    v_cvt_f64_f32_e32 v[0:1], v1
222; GFX89-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
223; GFX89-NEXT:    s_setpc_b64 s[30:31]
224;
225; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict:
226; GFX10:       ; %bb.0:
227; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v0
229; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
230; GFX10-NEXT:    v_cvt_f64_f32_e32 v[0:1], v1
231; GFX10-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
232; GFX10-NEXT:    s_setpc_b64 s[30:31]
233;
234; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict:
235; GFX11:       ; %bb.0:
236; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
238; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
239; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, v1
240; GFX11-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
241; GFX11-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
242; GFX11-NEXT:    s_setpc_b64 s[30:31]
243  %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half> %arg, metadata !"fpexcept.strict")
244  ret <2 x double>   %result
245}
246
247define <3 x double> @v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict(<3 x half> %arg) #0 {
248; SI-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict:
249; SI:       ; %bb.0:
250; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
251; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
252; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
253; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
254; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
255; SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
256; SI-NEXT:    v_and_b32_e32 v1, 0xffff, v1
257; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
258; SI-NEXT:    v_cvt_f32_f16_e32 v3, v1
259; SI-NEXT:    v_cvt_f32_f16_e32 v4, v2
260; SI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
261; SI-NEXT:    v_cvt_f64_f32_e32 v[2:3], v3
262; SI-NEXT:    v_cvt_f64_f32_e32 v[4:5], v4
263; SI-NEXT:    s_setpc_b64 s[30:31]
264;
265; GFX89-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict:
266; GFX89:       ; %bb.0:
267; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268; GFX89-NEXT:    v_cvt_f32_f16_e32 v2, v0
269; GFX89-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
270; GFX89-NEXT:    v_cvt_f32_f16_e32 v4, v1
271; GFX89-NEXT:    v_cvt_f64_f32_e32 v[0:1], v2
272; GFX89-NEXT:    v_cvt_f64_f32_e32 v[2:3], v3
273; GFX89-NEXT:    v_cvt_f64_f32_e32 v[4:5], v4
274; GFX89-NEXT:    s_setpc_b64 s[30:31]
275;
276; GFX10-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict:
277; GFX10:       ; %bb.0:
278; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, v0
280; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
281; GFX10-NEXT:    v_cvt_f32_f16_e32 v4, v1
282; GFX10-NEXT:    v_cvt_f64_f32_e32 v[0:1], v2
283; GFX10-NEXT:    v_cvt_f64_f32_e32 v[2:3], v3
284; GFX10-NEXT:    v_cvt_f64_f32_e32 v[4:5], v4
285; GFX10-NEXT:    s_setpc_b64 s[30:31]
286;
287; GFX11-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict:
288; GFX11:       ; %bb.0:
289; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
291; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
292; GFX11-NEXT:    v_cvt_f32_f16_e32 v3, v1
293; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, v2
294; GFX11-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
295; GFX11-NEXT:    v_cvt_f64_f32_e32 v[4:5], v3
296; GFX11-NEXT:    v_cvt_f64_f32_e32 v[2:3], v2
297; GFX11-NEXT:    s_setpc_b64 s[30:31]
298  %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half> %arg, metadata !"fpexcept.strict")
299  ret <3 x double>   %result
300}
301
302define float @v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 {
303; SI-LABEL: v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict:
304; SI:       ; %bb.0:
305; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
306; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
307; SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
308; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
309; SI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
310; SI-NEXT:    s_setpc_b64 s[30:31]
311;
312; GFX89-LABEL: v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict:
313; GFX89:       ; %bb.0:
314; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
315; GFX89-NEXT:    v_cvt_f32_f16_e32 v0, v0
316; GFX89-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
317; GFX89-NEXT:    s_setpc_b64 s[30:31]
318;
319; GFX1011-LABEL: v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict:
320; GFX1011:       ; %bb.0:
321; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322; GFX1011-NEXT:    v_cvt_f32_f16_e32 v0, v0
323; GFX1011-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
324; GFX1011-NEXT:    s_setpc_b64 s[30:31]
325  %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict")
326  %neg.result = fneg float %result
327  ret float %neg.result
328}
329
330define float @v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict(half %arg) #0 {
331; SI-LABEL: v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict:
332; SI:       ; %bb.0:
333; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
334; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
335; SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
336; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
337; SI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
338; SI-NEXT:    s_setpc_b64 s[30:31]
339;
340; GFX89-LABEL: v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict:
341; GFX89:       ; %bb.0:
342; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343; GFX89-NEXT:    v_cvt_f32_f16_e64 v0, -v0
344; GFX89-NEXT:    s_setpc_b64 s[30:31]
345;
346; GFX1011-LABEL: v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict:
347; GFX1011:       ; %bb.0:
348; GFX1011-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
349; GFX1011-NEXT:    v_cvt_f32_f16_e64 v0, -v0
350; GFX1011-NEXT:    s_setpc_b64 s[30:31]
351  %neg.arg = fneg half %arg
352  %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %neg.arg, metadata !"fpexcept.strict")
353  ret float %result
354}
355
356define double @v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict(float %arg) #0 {
357; GCN-LABEL: v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict:
358; GCN:       ; %bb.0:
359; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
360; GCN-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
361; GCN-NEXT:    s_setpc_b64 s[30:31]
362  %neg.arg = fneg float %arg
363  %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict")
364  ret double %result
365}
366
367define double @v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 {
368; GCN-LABEL: v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict:
369; GCN:       ; %bb.0:
370; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371; GCN-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
372; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
373; GCN-NEXT:    s_setpc_b64 s[30:31]
374  %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict")
375  %neg.result = fneg double %result
376  ret double %neg.result
377}
378
379define float @v_constrained_fpext_f16_to_f32_noabi(ptr addrspace(1) %ptr) #0 {
380; SI-LABEL: v_constrained_fpext_f16_to_f32_noabi:
381; SI:       ; %bb.0:
382; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
383; SI-NEXT:    s_mov_b32 s6, 0
384; SI-NEXT:    s_mov_b32 s7, 0xf000
385; SI-NEXT:    s_mov_b32 s4, s6
386; SI-NEXT:    s_mov_b32 s5, s6
387; SI-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
388; SI-NEXT:    s_waitcnt vmcnt(0)
389; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
390; SI-NEXT:    s_setpc_b64 s[30:31]
391;
392; GFX8-LABEL: v_constrained_fpext_f16_to_f32_noabi:
393; GFX8:       ; %bb.0:
394; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
396; GFX8-NEXT:    s_waitcnt vmcnt(0)
397; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
398; GFX8-NEXT:    s_setpc_b64 s[30:31]
399;
400; GFX9-LABEL: v_constrained_fpext_f16_to_f32_noabi:
401; GFX9:       ; %bb.0:
402; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
403; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
404; GFX9-NEXT:    s_waitcnt vmcnt(0)
405; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
406; GFX9-NEXT:    s_setpc_b64 s[30:31]
407;
408; GFX10-LABEL: v_constrained_fpext_f16_to_f32_noabi:
409; GFX10:       ; %bb.0:
410; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
411; GFX10-NEXT:    global_load_ushort v0, v[0:1], off
412; GFX10-NEXT:    s_waitcnt vmcnt(0)
413; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
414; GFX10-NEXT:    s_setpc_b64 s[30:31]
415;
416; GFX11-LABEL: v_constrained_fpext_f16_to_f32_noabi:
417; GFX11:       ; %bb.0:
418; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
419; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
420; GFX11-NEXT:    s_waitcnt vmcnt(0)
421; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
422; GFX11-NEXT:    s_setpc_b64 s[30:31]
423  %val = load half, ptr addrspace(1) %ptr
424  %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %val, metadata !"fpexcept.strict")
425  ret float %result
426}
427
428define <2 x float> @v_constrained_fpext_v2f16_to_v2f32_noabi(ptr addrspace(1) %ptr) #0 {
429; SI-LABEL: v_constrained_fpext_v2f16_to_v2f32_noabi:
430; SI:       ; %bb.0:
431; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
432; SI-NEXT:    s_mov_b32 s6, 0
433; SI-NEXT:    s_mov_b32 s7, 0xf000
434; SI-NEXT:    s_mov_b32 s4, s6
435; SI-NEXT:    s_mov_b32 s5, s6
436; SI-NEXT:    buffer_load_dword v1, v[0:1], s[4:7], 0 addr64
437; SI-NEXT:    s_waitcnt vmcnt(0)
438; SI-NEXT:    v_cvt_f32_f16_e32 v0, v1
439; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
440; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
441; SI-NEXT:    s_setpc_b64 s[30:31]
442;
443; GFX8-LABEL: v_constrained_fpext_v2f16_to_v2f32_noabi:
444; GFX8:       ; %bb.0:
445; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
446; GFX8-NEXT:    flat_load_dword v1, v[0:1]
447; GFX8-NEXT:    s_waitcnt vmcnt(0)
448; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v1
449; GFX8-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
450; GFX8-NEXT:    s_setpc_b64 s[30:31]
451;
452; GFX9-LABEL: v_constrained_fpext_v2f16_to_v2f32_noabi:
453; GFX9:       ; %bb.0:
454; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
455; GFX9-NEXT:    global_load_dword v1, v[0:1], off
456; GFX9-NEXT:    s_waitcnt vmcnt(0)
457; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v1
458; GFX9-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
459; GFX9-NEXT:    s_setpc_b64 s[30:31]
460;
461; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f32_noabi:
462; GFX10:       ; %bb.0:
463; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
464; GFX10-NEXT:    global_load_dword v1, v[0:1], off
465; GFX10-NEXT:    s_waitcnt vmcnt(0)
466; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v1
467; GFX10-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
468; GFX10-NEXT:    s_setpc_b64 s[30:31]
469;
470; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f32_noabi:
471; GFX11:       ; %bb.0:
472; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
473; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
474; GFX11-NEXT:    s_waitcnt vmcnt(0)
475; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
476; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
477; GFX11-NEXT:    v_cvt_f32_f16_e32 v1, v1
478; GFX11-NEXT:    s_setpc_b64 s[30:31]
479  %val = load <2 x half>, ptr addrspace(1) %ptr
480  %result = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %val, metadata !"fpexcept.strict")
481  ret <2 x float> %result
482}
483
484declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) #1
485declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) #1
486declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata) #1
487
488declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) #1
489declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half>, metadata) #1
490declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half>, metadata) #1
491
492declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #1
493declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #1
494declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #1
495
496attributes #0 = { strictfp }
497attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
498