xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll (revision 15676ec552c7028ce3c902cec19efe9c99d7f303)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=0 -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=SI-SDAG %s
3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=1 -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI-GISEL %s
4; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=VI-SDAG %s
5; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s
6; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SDAG %s
7; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-GISEL %s
8; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-SDAG %s
9; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-GISEL %s
10; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG %s
11; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL %s
12
13define amdgpu_kernel void @fptrunc_f32_to_f16(
14; SI-SDAG-LABEL: fptrunc_f32_to_f16:
15; SI-SDAG:       ; %bb.0: ; %entry
16; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
17; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
18; SI-SDAG-NEXT:    s_mov_b32 s6, -1
19; SI-SDAG-NEXT:    s_mov_b32 s10, s6
20; SI-SDAG-NEXT:    s_mov_b32 s11, s7
21; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
22; SI-SDAG-NEXT:    s_mov_b32 s8, s2
23; SI-SDAG-NEXT:    s_mov_b32 s9, s3
24; SI-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
25; SI-SDAG-NEXT:    s_mov_b32 s4, s0
26; SI-SDAG-NEXT:    s_mov_b32 s5, s1
27; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
28; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
29; SI-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
30; SI-SDAG-NEXT:    s_endpgm
31;
32; SI-GISEL-LABEL: fptrunc_f32_to_f16:
33; SI-GISEL:       ; %bb.0: ; %entry
34; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
35; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
36; SI-GISEL-NEXT:    s_load_dword s3, s[2:3], 0x0
37; SI-GISEL-NEXT:    s_mov_b32 s2, -1
38; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
39; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s3
40; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
41; SI-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
42; SI-GISEL-NEXT:    s_endpgm
43;
44; VI-SDAG-LABEL: fptrunc_f32_to_f16:
45; VI-SDAG:       ; %bb.0: ; %entry
46; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
47; VI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
48; VI-SDAG-NEXT:    s_mov_b32 s6, -1
49; VI-SDAG-NEXT:    s_mov_b32 s10, s6
50; VI-SDAG-NEXT:    s_mov_b32 s11, s7
51; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
52; VI-SDAG-NEXT:    s_mov_b32 s8, s2
53; VI-SDAG-NEXT:    s_mov_b32 s9, s3
54; VI-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
55; VI-SDAG-NEXT:    s_mov_b32 s4, s0
56; VI-SDAG-NEXT:    s_mov_b32 s5, s1
57; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
58; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
59; VI-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
60; VI-SDAG-NEXT:    s_endpgm
61;
62; VI-GISEL-LABEL: fptrunc_f32_to_f16:
63; VI-GISEL:       ; %bb.0: ; %entry
64; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
65; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
66; VI-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
67; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
68; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
69; VI-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s2
70; VI-GISEL-NEXT:    s_mov_b32 s2, -1
71; VI-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
72; VI-GISEL-NEXT:    s_endpgm
73;
74; GFX9-SDAG-LABEL: fptrunc_f32_to_f16:
75; GFX9-SDAG:       ; %bb.0: ; %entry
76; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
77; GFX9-SDAG-NEXT:    s_mov_b32 s7, 0xf000
78; GFX9-SDAG-NEXT:    s_mov_b32 s6, -1
79; GFX9-SDAG-NEXT:    s_mov_b32 s10, s6
80; GFX9-SDAG-NEXT:    s_mov_b32 s11, s7
81; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
82; GFX9-SDAG-NEXT:    s_mov_b32 s8, s2
83; GFX9-SDAG-NEXT:    s_mov_b32 s9, s3
84; GFX9-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
85; GFX9-SDAG-NEXT:    s_mov_b32 s4, s0
86; GFX9-SDAG-NEXT:    s_mov_b32 s5, s1
87; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
88; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
89; GFX9-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
90; GFX9-SDAG-NEXT:    s_endpgm
91;
92; GFX9-GISEL-LABEL: fptrunc_f32_to_f16:
93; GFX9-GISEL:       ; %bb.0: ; %entry
94; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
95; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
96; GFX9-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
97; GFX9-GISEL-NEXT:    s_mov_b32 s3, 0xf000
98; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
99; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s2
100; GFX9-GISEL-NEXT:    s_mov_b32 s2, -1
101; GFX9-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
102; GFX9-GISEL-NEXT:    s_endpgm
103;
104; GFX950-SDAG-LABEL: fptrunc_f32_to_f16:
105; GFX950-SDAG:       ; %bb.0: ; %entry
106; GFX950-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
107; GFX950-SDAG-NEXT:    s_mov_b32 s7, 0xf000
108; GFX950-SDAG-NEXT:    s_mov_b32 s6, -1
109; GFX950-SDAG-NEXT:    s_mov_b32 s10, s6
110; GFX950-SDAG-NEXT:    s_mov_b32 s11, s7
111; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
112; GFX950-SDAG-NEXT:    s_mov_b32 s8, s2
113; GFX950-SDAG-NEXT:    s_mov_b32 s9, s3
114; GFX950-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
115; GFX950-SDAG-NEXT:    s_mov_b32 s4, s0
116; GFX950-SDAG-NEXT:    s_mov_b32 s5, s1
117; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
118; GFX950-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
119; GFX950-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
120; GFX950-SDAG-NEXT:    s_endpgm
121;
122; GFX950-GISEL-LABEL: fptrunc_f32_to_f16:
123; GFX950-GISEL:       ; %bb.0: ; %entry
124; GFX950-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
125; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
126; GFX950-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
127; GFX950-GISEL-NEXT:    s_mov_b32 s3, 0xf000
128; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
129; GFX950-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s2
130; GFX950-GISEL-NEXT:    s_mov_b32 s2, -1
131; GFX950-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
132; GFX950-GISEL-NEXT:    s_endpgm
133;
134; GFX11-SDAG-LABEL: fptrunc_f32_to_f16:
135; GFX11-SDAG:       ; %bb.0: ; %entry
136; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
137; GFX11-SDAG-NEXT:    s_mov_b32 s6, -1
138; GFX11-SDAG-NEXT:    s_mov_b32 s7, 0x31016000
139; GFX11-SDAG-NEXT:    s_mov_b32 s10, s6
140; GFX11-SDAG-NEXT:    s_mov_b32 s11, s7
141; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
142; GFX11-SDAG-NEXT:    s_mov_b32 s8, s2
143; GFX11-SDAG-NEXT:    s_mov_b32 s9, s3
144; GFX11-SDAG-NEXT:    s_mov_b32 s4, s0
145; GFX11-SDAG-NEXT:    buffer_load_b32 v0, off, s[8:11], 0
146; GFX11-SDAG-NEXT:    s_mov_b32 s5, s1
147; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
148; GFX11-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
149; GFX11-SDAG-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
150; GFX11-SDAG-NEXT:    s_endpgm
151;
152; GFX11-GISEL-LABEL: fptrunc_f32_to_f16:
153; GFX11-GISEL:       ; %bb.0: ; %entry
154; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
155; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
156; GFX11-GISEL-NEXT:    s_load_b32 s2, s[2:3], 0x0
157; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
158; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
159; GFX11-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s2
160; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
161; GFX11-GISEL-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
162; GFX11-GISEL-NEXT:    s_endpgm
163    ptr addrspace(1) %r,
164    ptr addrspace(1) %a) {
165entry:
166  %a.val = load float, ptr addrspace(1) %a
167  %r.val = fptrunc float %a.val to half
168  store half %r.val, ptr addrspace(1) %r
169  ret void
170}
171
172define amdgpu_kernel void @fptrunc_f64_to_f16(
173; SI-SDAG-LABEL: fptrunc_f64_to_f16:
174; SI-SDAG:       ; %bb.0: ; %entry
175; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
176; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
177; SI-SDAG-NEXT:    s_mov_b32 s6, -1
178; SI-SDAG-NEXT:    s_mov_b32 s10, s6
179; SI-SDAG-NEXT:    s_mov_b32 s11, s7
180; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
181; SI-SDAG-NEXT:    s_mov_b32 s8, s2
182; SI-SDAG-NEXT:    s_mov_b32 s9, s3
183; SI-SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
184; SI-SDAG-NEXT:    s_mov_b32 s4, s0
185; SI-SDAG-NEXT:    s_mov_b32 s5, s1
186; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
187; SI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
188; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
189; SI-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
190; SI-SDAG-NEXT:    s_endpgm
191;
192; SI-GISEL-LABEL: fptrunc_f64_to_f16:
193; SI-GISEL:       ; %bb.0: ; %entry
194; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
195; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
196; SI-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
197; SI-GISEL-NEXT:    s_mov_b32 s2, -1
198; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
199; SI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
200; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
201; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
202; SI-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
203; SI-GISEL-NEXT:    s_endpgm
204;
205; VI-SDAG-LABEL: fptrunc_f64_to_f16:
206; VI-SDAG:       ; %bb.0: ; %entry
207; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
208; VI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
209; VI-SDAG-NEXT:    s_mov_b32 s6, -1
210; VI-SDAG-NEXT:    s_mov_b32 s10, s6
211; VI-SDAG-NEXT:    s_mov_b32 s11, s7
212; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
213; VI-SDAG-NEXT:    s_mov_b32 s8, s2
214; VI-SDAG-NEXT:    s_mov_b32 s9, s3
215; VI-SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
216; VI-SDAG-NEXT:    s_mov_b32 s4, s0
217; VI-SDAG-NEXT:    s_mov_b32 s5, s1
218; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
219; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
220; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
221; VI-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
222; VI-SDAG-NEXT:    s_endpgm
223;
224; VI-GISEL-LABEL: fptrunc_f64_to_f16:
225; VI-GISEL:       ; %bb.0: ; %entry
226; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
227; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
228; VI-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
229; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
230; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
231; VI-GISEL-NEXT:    s_mov_b32 s2, -1
232; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
233; VI-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
234; VI-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
235; VI-GISEL-NEXT:    s_endpgm
236;
237; GFX9-SDAG-LABEL: fptrunc_f64_to_f16:
238; GFX9-SDAG:       ; %bb.0: ; %entry
239; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
240; GFX9-SDAG-NEXT:    s_mov_b32 s7, 0xf000
241; GFX9-SDAG-NEXT:    s_mov_b32 s6, -1
242; GFX9-SDAG-NEXT:    s_mov_b32 s10, s6
243; GFX9-SDAG-NEXT:    s_mov_b32 s11, s7
244; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
245; GFX9-SDAG-NEXT:    s_mov_b32 s8, s2
246; GFX9-SDAG-NEXT:    s_mov_b32 s9, s3
247; GFX9-SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
248; GFX9-SDAG-NEXT:    s_mov_b32 s4, s0
249; GFX9-SDAG-NEXT:    s_mov_b32 s5, s1
250; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
251; GFX9-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
252; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
253; GFX9-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
254; GFX9-SDAG-NEXT:    s_endpgm
255;
256; GFX9-GISEL-LABEL: fptrunc_f64_to_f16:
257; GFX9-GISEL:       ; %bb.0: ; %entry
258; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
259; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
260; GFX9-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
261; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
262; GFX9-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
263; GFX9-GISEL-NEXT:    s_mov_b32 s2, -1
264; GFX9-GISEL-NEXT:    s_mov_b32 s3, 0xf000
265; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
266; GFX9-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
267; GFX9-GISEL-NEXT:    s_endpgm
268;
269; GFX950-SDAG-LABEL: fptrunc_f64_to_f16:
270; GFX950-SDAG:       ; %bb.0: ; %entry
271; GFX950-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
272; GFX950-SDAG-NEXT:    s_mov_b32 s7, 0xf000
273; GFX950-SDAG-NEXT:    s_mov_b32 s6, -1
274; GFX950-SDAG-NEXT:    s_mov_b32 s10, s6
275; GFX950-SDAG-NEXT:    s_mov_b32 s11, s7
276; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
277; GFX950-SDAG-NEXT:    s_mov_b32 s8, s2
278; GFX950-SDAG-NEXT:    s_mov_b32 s9, s3
279; GFX950-SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
280; GFX950-SDAG-NEXT:    s_mov_b32 s4, s0
281; GFX950-SDAG-NEXT:    s_mov_b32 s5, s1
282; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
283; GFX950-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
284; GFX950-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
285; GFX950-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
286; GFX950-SDAG-NEXT:    s_endpgm
287;
288; GFX950-GISEL-LABEL: fptrunc_f64_to_f16:
289; GFX950-GISEL:       ; %bb.0: ; %entry
290; GFX950-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
291; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
292; GFX950-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
293; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
294; GFX950-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
295; GFX950-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
296; GFX950-GISEL-NEXT:    s_mov_b32 s2, -1
297; GFX950-GISEL-NEXT:    s_mov_b32 s3, 0xf000
298; GFX950-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
299; GFX950-GISEL-NEXT:    s_endpgm
300;
301; GFX11-SDAG-LABEL: fptrunc_f64_to_f16:
302; GFX11-SDAG:       ; %bb.0: ; %entry
303; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
304; GFX11-SDAG-NEXT:    s_mov_b32 s6, -1
305; GFX11-SDAG-NEXT:    s_mov_b32 s7, 0x31016000
306; GFX11-SDAG-NEXT:    s_mov_b32 s10, s6
307; GFX11-SDAG-NEXT:    s_mov_b32 s11, s7
308; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
309; GFX11-SDAG-NEXT:    s_mov_b32 s8, s2
310; GFX11-SDAG-NEXT:    s_mov_b32 s9, s3
311; GFX11-SDAG-NEXT:    s_mov_b32 s4, s0
312; GFX11-SDAG-NEXT:    buffer_load_b64 v[0:1], off, s[8:11], 0
313; GFX11-SDAG-NEXT:    s_mov_b32 s5, s1
314; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
315; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
316; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
317; GFX11-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
318; GFX11-SDAG-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
319; GFX11-SDAG-NEXT:    s_endpgm
320;
321; GFX11-GISEL-LABEL: fptrunc_f64_to_f16:
322; GFX11-GISEL:       ; %bb.0: ; %entry
323; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
324; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
325; GFX11-GISEL-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
326; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
327; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
328; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
329; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
330; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
331; GFX11-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
332; GFX11-GISEL-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
333; GFX11-GISEL-NEXT:    s_endpgm
334    ptr addrspace(1) %r,
335    ptr addrspace(1) %a) {
336entry:
337  %a.val = load double, ptr addrspace(1) %a
338  %r.val = fptrunc double %a.val to half
339  store half %r.val, ptr addrspace(1) %r
340  ret void
341}
342
343define amdgpu_kernel void @fptrunc_v2f32_to_v2f16(
344; SI-SDAG-LABEL: fptrunc_v2f32_to_v2f16:
345; SI-SDAG:       ; %bb.0: ; %entry
346; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
347; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
348; SI-SDAG-NEXT:    s_mov_b32 s6, -1
349; SI-SDAG-NEXT:    s_mov_b32 s10, s6
350; SI-SDAG-NEXT:    s_mov_b32 s11, s7
351; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
352; SI-SDAG-NEXT:    s_mov_b32 s8, s2
353; SI-SDAG-NEXT:    s_mov_b32 s9, s3
354; SI-SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
355; SI-SDAG-NEXT:    s_mov_b32 s4, s0
356; SI-SDAG-NEXT:    s_mov_b32 s5, s1
357; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
358; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
359; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
360; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
361; SI-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
362; SI-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
363; SI-SDAG-NEXT:    s_endpgm
364;
365; SI-GISEL-LABEL: fptrunc_v2f32_to_v2f16:
366; SI-GISEL:       ; %bb.0: ; %entry
367; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
368; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
369; SI-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
370; SI-GISEL-NEXT:    s_mov_b32 s2, -1
371; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
372; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s4
373; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v1, s5
374; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
375; SI-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
376; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
377; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
378; SI-GISEL-NEXT:    s_endpgm
379;
380; VI-SDAG-LABEL: fptrunc_v2f32_to_v2f16:
381; VI-SDAG:       ; %bb.0: ; %entry
382; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
383; VI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
384; VI-SDAG-NEXT:    s_mov_b32 s6, -1
385; VI-SDAG-NEXT:    s_mov_b32 s10, s6
386; VI-SDAG-NEXT:    s_mov_b32 s11, s7
387; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
388; VI-SDAG-NEXT:    s_mov_b32 s8, s2
389; VI-SDAG-NEXT:    s_mov_b32 s9, s3
390; VI-SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
391; VI-SDAG-NEXT:    s_mov_b32 s4, s0
392; VI-SDAG-NEXT:    s_mov_b32 s5, s1
393; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
394; VI-SDAG-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
395; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
396; VI-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
397; VI-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
398; VI-SDAG-NEXT:    s_endpgm
399;
400; VI-GISEL-LABEL: fptrunc_v2f32_to_v2f16:
401; VI-GISEL:       ; %bb.0: ; %entry
402; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
403; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
404; VI-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
405; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
406; VI-GISEL-NEXT:    v_mov_b32_e32 v1, s3
407; VI-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s2
408; VI-GISEL-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
409; VI-GISEL-NEXT:    s_mov_b32 s2, -1
410; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
411; VI-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
412; VI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
413; VI-GISEL-NEXT:    s_endpgm
414;
415; GFX9-SDAG-LABEL: fptrunc_v2f32_to_v2f16:
416; GFX9-SDAG:       ; %bb.0: ; %entry
417; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
418; GFX9-SDAG-NEXT:    s_mov_b32 s7, 0xf000
419; GFX9-SDAG-NEXT:    s_mov_b32 s6, -1
420; GFX9-SDAG-NEXT:    s_mov_b32 s10, s6
421; GFX9-SDAG-NEXT:    s_mov_b32 s11, s7
422; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
423; GFX9-SDAG-NEXT:    s_mov_b32 s8, s2
424; GFX9-SDAG-NEXT:    s_mov_b32 s9, s3
425; GFX9-SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
426; GFX9-SDAG-NEXT:    s_mov_b32 s4, s0
427; GFX9-SDAG-NEXT:    s_mov_b32 s5, s1
428; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
429; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
430; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
431; GFX9-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v1
432; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
433; GFX9-SDAG-NEXT:    s_endpgm
434;
435; GFX9-GISEL-LABEL: fptrunc_v2f32_to_v2f16:
436; GFX9-GISEL:       ; %bb.0: ; %entry
437; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
438; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
439; GFX9-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
440; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
441; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s2
442; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v1, s3
443; GFX9-GISEL-NEXT:    s_mov_b32 s2, -1
444; GFX9-GISEL-NEXT:    s_mov_b32 s3, 0xf000
445; GFX9-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
446; GFX9-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
447; GFX9-GISEL-NEXT:    s_endpgm
448;
449; GFX950-SDAG-LABEL: fptrunc_v2f32_to_v2f16:
450; GFX950-SDAG:       ; %bb.0: ; %entry
451; GFX950-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
452; GFX950-SDAG-NEXT:    s_mov_b32 s7, 0xf000
453; GFX950-SDAG-NEXT:    s_mov_b32 s6, -1
454; GFX950-SDAG-NEXT:    s_mov_b32 s10, s6
455; GFX950-SDAG-NEXT:    s_mov_b32 s11, s7
456; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
457; GFX950-SDAG-NEXT:    s_mov_b32 s8, s2
458; GFX950-SDAG-NEXT:    s_mov_b32 s9, s3
459; GFX950-SDAG-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
460; GFX950-SDAG-NEXT:    s_mov_b32 s4, s0
461; GFX950-SDAG-NEXT:    s_mov_b32 s5, s1
462; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
463; GFX950-SDAG-NEXT:    v_cvt_pk_f16_f32 v0, v0, v1
464; GFX950-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
465; GFX950-SDAG-NEXT:    s_endpgm
466;
467; GFX950-GISEL-LABEL: fptrunc_v2f32_to_v2f16:
468; GFX950-GISEL:       ; %bb.0: ; %entry
469; GFX950-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
470; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
471; GFX950-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
472; GFX950-GISEL-NEXT:    s_mov_b32 s2, -1
473; GFX950-GISEL-NEXT:    s_mov_b32 s3, 0xf000
474; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
475; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[0:1], s[4:5]
476; GFX950-GISEL-NEXT:    v_cvt_pk_f16_f32 v0, v0, v1
477; GFX950-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
478; GFX950-GISEL-NEXT:    s_endpgm
479;
480; GFX11-SDAG-LABEL: fptrunc_v2f32_to_v2f16:
481; GFX11-SDAG:       ; %bb.0: ; %entry
482; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
483; GFX11-SDAG-NEXT:    s_mov_b32 s6, -1
484; GFX11-SDAG-NEXT:    s_mov_b32 s7, 0x31016000
485; GFX11-SDAG-NEXT:    s_mov_b32 s10, s6
486; GFX11-SDAG-NEXT:    s_mov_b32 s11, s7
487; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
488; GFX11-SDAG-NEXT:    s_mov_b32 s8, s2
489; GFX11-SDAG-NEXT:    s_mov_b32 s9, s3
490; GFX11-SDAG-NEXT:    s_mov_b32 s4, s0
491; GFX11-SDAG-NEXT:    buffer_load_b64 v[0:1], off, s[8:11], 0
492; GFX11-SDAG-NEXT:    s_mov_b32 s5, s1
493; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
494; GFX11-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
495; GFX11-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
496; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
497; GFX11-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v1
498; GFX11-SDAG-NEXT:    buffer_store_b32 v0, off, s[4:7], 0
499; GFX11-SDAG-NEXT:    s_endpgm
500;
501; GFX11-GISEL-LABEL: fptrunc_v2f32_to_v2f16:
502; GFX11-GISEL:       ; %bb.0: ; %entry
503; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
504; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
505; GFX11-GISEL-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
506; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
507; GFX11-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s2
508; GFX11-GISEL-NEXT:    v_cvt_f16_f32_e32 v1, s3
509; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
510; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
511; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
512; GFX11-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
513; GFX11-GISEL-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
514; GFX11-GISEL-NEXT:    s_endpgm
515    ptr addrspace(1) %r,
516    ptr addrspace(1) %a) {
517entry:
518  %a.val = load <2 x float>, ptr addrspace(1) %a
519  %r.val = fptrunc <2 x float> %a.val to <2 x half>
520  store <2 x half> %r.val, ptr addrspace(1) %r
521  ret void
522}
523
524define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
525; SI-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
526; SI-SDAG:       ; %bb.0: ; %entry
527; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
528; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
529; SI-SDAG-NEXT:    s_mov_b32 s6, -1
530; SI-SDAG-NEXT:    s_mov_b32 s10, s6
531; SI-SDAG-NEXT:    s_mov_b32 s11, s7
532; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
533; SI-SDAG-NEXT:    s_mov_b32 s8, s2
534; SI-SDAG-NEXT:    s_mov_b32 s9, s3
535; SI-SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
536; SI-SDAG-NEXT:    s_mov_b32 s4, s0
537; SI-SDAG-NEXT:    s_mov_b32 s5, s1
538; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
539; SI-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, v[2:3]
540; SI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
541; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v2
542; SI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
543; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
544; SI-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
545; SI-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
546; SI-SDAG-NEXT:    s_endpgm
547;
548; SI-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
549; SI-GISEL:       ; %bb.0: ; %entry
550; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
551; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
552; SI-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
553; SI-GISEL-NEXT:    s_mov_b32 s2, -1
554; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
555; SI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
556; SI-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
557; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
558; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1
559; SI-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
560; SI-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
561; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
562; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
563; SI-GISEL-NEXT:    s_endpgm
564;
565; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
566; VI-SDAG:       ; %bb.0: ; %entry
567; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
568; VI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
569; VI-SDAG-NEXT:    s_mov_b32 s6, -1
570; VI-SDAG-NEXT:    s_mov_b32 s10, s6
571; VI-SDAG-NEXT:    s_mov_b32 s11, s7
572; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
573; VI-SDAG-NEXT:    s_mov_b32 s8, s2
574; VI-SDAG-NEXT:    s_mov_b32 s9, s3
575; VI-SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
576; VI-SDAG-NEXT:    s_mov_b32 s4, s0
577; VI-SDAG-NEXT:    s_mov_b32 s5, s1
578; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
579; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, v[2:3]
580; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
581; VI-SDAG-NEXT:    v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
582; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
583; VI-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
584; VI-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
585; VI-SDAG-NEXT:    s_endpgm
586;
587; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
588; VI-GISEL:       ; %bb.0: ; %entry
589; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
590; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
591; VI-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
592; VI-GISEL-NEXT:    s_mov_b32 s2, -1
593; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
594; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
595; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
596; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
597; VI-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
598; VI-GISEL-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
599; VI-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
600; VI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
601; VI-GISEL-NEXT:    s_endpgm
602;
603; GFX9-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
604; GFX9-SDAG:       ; %bb.0: ; %entry
605; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
606; GFX9-SDAG-NEXT:    s_mov_b32 s7, 0xf000
607; GFX9-SDAG-NEXT:    s_mov_b32 s6, -1
608; GFX9-SDAG-NEXT:    s_mov_b32 s10, s6
609; GFX9-SDAG-NEXT:    s_mov_b32 s11, s7
610; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
611; GFX9-SDAG-NEXT:    s_mov_b32 s8, s2
612; GFX9-SDAG-NEXT:    s_mov_b32 s9, s3
613; GFX9-SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
614; GFX9-SDAG-NEXT:    s_mov_b32 s4, s0
615; GFX9-SDAG-NEXT:    s_mov_b32 s5, s1
616; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
617; GFX9-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, v[2:3]
618; GFX9-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
619; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v2
620; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
621; GFX9-SDAG-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
622; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
623; GFX9-SDAG-NEXT:    s_endpgm
624;
625; GFX9-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
626; GFX9-GISEL:       ; %bb.0: ; %entry
627; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
628; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
629; GFX9-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
630; GFX9-GISEL-NEXT:    s_mov_b32 s2, -1
631; GFX9-GISEL-NEXT:    s_mov_b32 s3, 0xf000
632; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
633; GFX9-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
634; GFX9-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
635; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
636; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1
637; GFX9-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
638; GFX9-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
639; GFX9-GISEL-NEXT:    s_endpgm
640;
641; GFX950-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
642; GFX950-SDAG:       ; %bb.0: ; %entry
643; GFX950-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
644; GFX950-SDAG-NEXT:    s_mov_b32 s7, 0xf000
645; GFX950-SDAG-NEXT:    s_mov_b32 s6, -1
646; GFX950-SDAG-NEXT:    s_mov_b32 s10, s6
647; GFX950-SDAG-NEXT:    s_mov_b32 s11, s7
648; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
649; GFX950-SDAG-NEXT:    s_mov_b32 s8, s2
650; GFX950-SDAG-NEXT:    s_mov_b32 s9, s3
651; GFX950-SDAG-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
652; GFX950-SDAG-NEXT:    s_mov_b32 s4, s0
653; GFX950-SDAG-NEXT:    s_mov_b32 s5, s1
654; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
655; GFX950-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, v[2:3]
656; GFX950-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
657; GFX950-SDAG-NEXT:    v_cvt_pk_f16_f32 v0, v0, v2
658; GFX950-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
659; GFX950-SDAG-NEXT:    s_endpgm
660;
661; GFX950-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
662; GFX950-GISEL:       ; %bb.0: ; %entry
663; GFX950-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
664; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
665; GFX950-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
666; GFX950-GISEL-NEXT:    s_mov_b32 s2, -1
667; GFX950-GISEL-NEXT:    s_mov_b32 s3, 0xf000
668; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
669; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[0:1], s[4:5]
670; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[2:3], s[6:7]
671; GFX950-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, v[2:3]
672; GFX950-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
673; GFX950-GISEL-NEXT:    v_cvt_pk_f16_f32 v0, v0, v2
674; GFX950-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
675; GFX950-GISEL-NEXT:    s_endpgm
676;
677; GFX11-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
678; GFX11-SDAG:       ; %bb.0: ; %entry
679; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
680; GFX11-SDAG-NEXT:    s_mov_b32 s6, -1
681; GFX11-SDAG-NEXT:    s_mov_b32 s7, 0x31016000
682; GFX11-SDAG-NEXT:    s_mov_b32 s10, s6
683; GFX11-SDAG-NEXT:    s_mov_b32 s11, s7
684; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
685; GFX11-SDAG-NEXT:    s_mov_b32 s8, s2
686; GFX11-SDAG-NEXT:    s_mov_b32 s9, s3
687; GFX11-SDAG-NEXT:    s_mov_b32 s4, s0
688; GFX11-SDAG-NEXT:    buffer_load_b128 v[0:3], off, s[8:11], 0
689; GFX11-SDAG-NEXT:    s_mov_b32 s5, s1
690; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
691; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
692; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, v[2:3]
693; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
694; GFX11-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
695; GFX11-SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
696; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
697; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
698; GFX11-SDAG-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
699; GFX11-SDAG-NEXT:    buffer_store_b32 v0, off, s[4:7], 0
700; GFX11-SDAG-NEXT:    s_endpgm
701;
702; GFX11-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
703; GFX11-GISEL:       ; %bb.0: ; %entry
704; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
705; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
706; GFX11-GISEL-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
707; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
708; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
709; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
710; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[4:5]
711; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[6:7]
712; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
713; GFX11-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
714; GFX11-GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1
715; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
716; GFX11-GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
717; GFX11-GISEL-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
718; GFX11-GISEL-NEXT:    s_endpgm
719    ptr addrspace(1) %r,
720    ptr addrspace(1) %a) {
721entry:
722  %a.val = load <2 x double>, ptr addrspace(1) %a
723  %r.val = fptrunc <2 x double> %a.val to <2 x half>
724  store <2 x half> %r.val, ptr addrspace(1) %r
725  ret void
726}
727
728define amdgpu_kernel void @fneg_fptrunc_f32_to_f16(
729; SI-SDAG-LABEL: fneg_fptrunc_f32_to_f16:
730; SI-SDAG:       ; %bb.0: ; %entry
731; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
732; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
733; SI-SDAG-NEXT:    s_mov_b32 s6, -1
734; SI-SDAG-NEXT:    s_mov_b32 s10, s6
735; SI-SDAG-NEXT:    s_mov_b32 s11, s7
736; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
737; SI-SDAG-NEXT:    s_mov_b32 s8, s2
738; SI-SDAG-NEXT:    s_mov_b32 s9, s3
739; SI-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
740; SI-SDAG-NEXT:    s_mov_b32 s4, s0
741; SI-SDAG-NEXT:    s_mov_b32 s5, s1
742; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
743; SI-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, -v0
744; SI-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
745; SI-SDAG-NEXT:    s_endpgm
746;
747; SI-GISEL-LABEL: fneg_fptrunc_f32_to_f16:
748; SI-GISEL:       ; %bb.0: ; %entry
749; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
750; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
751; SI-GISEL-NEXT:    s_load_dword s3, s[2:3], 0x0
752; SI-GISEL-NEXT:    s_mov_b32 s2, -1
753; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
754; SI-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, -s3
755; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
756; SI-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
757; SI-GISEL-NEXT:    s_endpgm
758;
759; VI-SDAG-LABEL: fneg_fptrunc_f32_to_f16:
760; VI-SDAG:       ; %bb.0: ; %entry
761; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
762; VI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
763; VI-SDAG-NEXT:    s_mov_b32 s6, -1
764; VI-SDAG-NEXT:    s_mov_b32 s10, s6
765; VI-SDAG-NEXT:    s_mov_b32 s11, s7
766; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
767; VI-SDAG-NEXT:    s_mov_b32 s8, s2
768; VI-SDAG-NEXT:    s_mov_b32 s9, s3
769; VI-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
770; VI-SDAG-NEXT:    s_mov_b32 s4, s0
771; VI-SDAG-NEXT:    s_mov_b32 s5, s1
772; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
773; VI-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, -v0
774; VI-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
775; VI-SDAG-NEXT:    s_endpgm
776;
777; VI-GISEL-LABEL: fneg_fptrunc_f32_to_f16:
778; VI-GISEL:       ; %bb.0: ; %entry
779; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
780; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
781; VI-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
782; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
783; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
784; VI-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, -s2
785; VI-GISEL-NEXT:    s_mov_b32 s2, -1
786; VI-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
787; VI-GISEL-NEXT:    s_endpgm
788;
789; GFX9-SDAG-LABEL: fneg_fptrunc_f32_to_f16:
790; GFX9-SDAG:       ; %bb.0: ; %entry
791; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
792; GFX9-SDAG-NEXT:    s_mov_b32 s7, 0xf000
793; GFX9-SDAG-NEXT:    s_mov_b32 s6, -1
794; GFX9-SDAG-NEXT:    s_mov_b32 s10, s6
795; GFX9-SDAG-NEXT:    s_mov_b32 s11, s7
796; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
797; GFX9-SDAG-NEXT:    s_mov_b32 s8, s2
798; GFX9-SDAG-NEXT:    s_mov_b32 s9, s3
799; GFX9-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
800; GFX9-SDAG-NEXT:    s_mov_b32 s4, s0
801; GFX9-SDAG-NEXT:    s_mov_b32 s5, s1
802; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
803; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, -v0
804; GFX9-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
805; GFX9-SDAG-NEXT:    s_endpgm
806;
807; GFX9-GISEL-LABEL: fneg_fptrunc_f32_to_f16:
808; GFX9-GISEL:       ; %bb.0: ; %entry
809; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
810; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
811; GFX9-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
812; GFX9-GISEL-NEXT:    s_mov_b32 s3, 0xf000
813; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
814; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, -s2
815; GFX9-GISEL-NEXT:    s_mov_b32 s2, -1
816; GFX9-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
817; GFX9-GISEL-NEXT:    s_endpgm
818;
819; GFX950-SDAG-LABEL: fneg_fptrunc_f32_to_f16:
820; GFX950-SDAG:       ; %bb.0: ; %entry
821; GFX950-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
822; GFX950-SDAG-NEXT:    s_mov_b32 s7, 0xf000
823; GFX950-SDAG-NEXT:    s_mov_b32 s6, -1
824; GFX950-SDAG-NEXT:    s_mov_b32 s10, s6
825; GFX950-SDAG-NEXT:    s_mov_b32 s11, s7
826; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
827; GFX950-SDAG-NEXT:    s_mov_b32 s8, s2
828; GFX950-SDAG-NEXT:    s_mov_b32 s9, s3
829; GFX950-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
830; GFX950-SDAG-NEXT:    s_mov_b32 s4, s0
831; GFX950-SDAG-NEXT:    s_mov_b32 s5, s1
832; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
833; GFX950-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, -v0
834; GFX950-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
835; GFX950-SDAG-NEXT:    s_endpgm
836;
837; GFX950-GISEL-LABEL: fneg_fptrunc_f32_to_f16:
838; GFX950-GISEL:       ; %bb.0: ; %entry
839; GFX950-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
840; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
841; GFX950-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
842; GFX950-GISEL-NEXT:    s_mov_b32 s3, 0xf000
843; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
844; GFX950-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, -s2
845; GFX950-GISEL-NEXT:    s_mov_b32 s2, -1
846; GFX950-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
847; GFX950-GISEL-NEXT:    s_endpgm
848;
849; GFX11-SDAG-LABEL: fneg_fptrunc_f32_to_f16:
850; GFX11-SDAG:       ; %bb.0: ; %entry
851; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
852; GFX11-SDAG-NEXT:    s_mov_b32 s6, -1
853; GFX11-SDAG-NEXT:    s_mov_b32 s7, 0x31016000
854; GFX11-SDAG-NEXT:    s_mov_b32 s10, s6
855; GFX11-SDAG-NEXT:    s_mov_b32 s11, s7
856; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
857; GFX11-SDAG-NEXT:    s_mov_b32 s8, s2
858; GFX11-SDAG-NEXT:    s_mov_b32 s9, s3
859; GFX11-SDAG-NEXT:    s_mov_b32 s4, s0
860; GFX11-SDAG-NEXT:    buffer_load_b32 v0, off, s[8:11], 0
861; GFX11-SDAG-NEXT:    s_mov_b32 s5, s1
862; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
863; GFX11-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, -v0
864; GFX11-SDAG-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
865; GFX11-SDAG-NEXT:    s_endpgm
866;
867; GFX11-GISEL-LABEL: fneg_fptrunc_f32_to_f16:
868; GFX11-GISEL:       ; %bb.0: ; %entry
869; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
870; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
871; GFX11-GISEL-NEXT:    s_load_b32 s2, s[2:3], 0x0
872; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
873; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
874; GFX11-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, -s2
875; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
876; GFX11-GISEL-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
877; GFX11-GISEL-NEXT:    s_endpgm
878    ptr addrspace(1) %r,
879    ptr addrspace(1) %a) {
880entry:
881  %a.val = load float, ptr addrspace(1) %a
882  %a.fneg = fneg float %a.val
883  %r.val = fptrunc float %a.fneg to half
884  store half %r.val, ptr addrspace(1) %r
885  ret void
886}
887
888define amdgpu_kernel void @fabs_fptrunc_f32_to_f16(
889; SI-SDAG-LABEL: fabs_fptrunc_f32_to_f16:
890; SI-SDAG:       ; %bb.0: ; %entry
891; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
892; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
893; SI-SDAG-NEXT:    s_mov_b32 s6, -1
894; SI-SDAG-NEXT:    s_mov_b32 s10, s6
895; SI-SDAG-NEXT:    s_mov_b32 s11, s7
896; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
897; SI-SDAG-NEXT:    s_mov_b32 s8, s2
898; SI-SDAG-NEXT:    s_mov_b32 s9, s3
899; SI-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
900; SI-SDAG-NEXT:    s_mov_b32 s4, s0
901; SI-SDAG-NEXT:    s_mov_b32 s5, s1
902; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
903; SI-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
904; SI-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
905; SI-SDAG-NEXT:    s_endpgm
906;
907; SI-GISEL-LABEL: fabs_fptrunc_f32_to_f16:
908; SI-GISEL:       ; %bb.0: ; %entry
909; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
910; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
911; SI-GISEL-NEXT:    s_load_dword s3, s[2:3], 0x0
912; SI-GISEL-NEXT:    s_mov_b32 s2, -1
913; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
914; SI-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, |s3|
915; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
916; SI-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
917; SI-GISEL-NEXT:    s_endpgm
918;
919; VI-SDAG-LABEL: fabs_fptrunc_f32_to_f16:
920; VI-SDAG:       ; %bb.0: ; %entry
921; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
922; VI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
923; VI-SDAG-NEXT:    s_mov_b32 s6, -1
924; VI-SDAG-NEXT:    s_mov_b32 s10, s6
925; VI-SDAG-NEXT:    s_mov_b32 s11, s7
926; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
927; VI-SDAG-NEXT:    s_mov_b32 s8, s2
928; VI-SDAG-NEXT:    s_mov_b32 s9, s3
929; VI-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
930; VI-SDAG-NEXT:    s_mov_b32 s4, s0
931; VI-SDAG-NEXT:    s_mov_b32 s5, s1
932; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
933; VI-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
934; VI-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
935; VI-SDAG-NEXT:    s_endpgm
936;
937; VI-GISEL-LABEL: fabs_fptrunc_f32_to_f16:
938; VI-GISEL:       ; %bb.0: ; %entry
939; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
940; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
941; VI-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
942; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
943; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
944; VI-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, |s2|
945; VI-GISEL-NEXT:    s_mov_b32 s2, -1
946; VI-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
947; VI-GISEL-NEXT:    s_endpgm
948;
949; GFX9-SDAG-LABEL: fabs_fptrunc_f32_to_f16:
950; GFX9-SDAG:       ; %bb.0: ; %entry
951; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
952; GFX9-SDAG-NEXT:    s_mov_b32 s7, 0xf000
953; GFX9-SDAG-NEXT:    s_mov_b32 s6, -1
954; GFX9-SDAG-NEXT:    s_mov_b32 s10, s6
955; GFX9-SDAG-NEXT:    s_mov_b32 s11, s7
956; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
957; GFX9-SDAG-NEXT:    s_mov_b32 s8, s2
958; GFX9-SDAG-NEXT:    s_mov_b32 s9, s3
959; GFX9-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
960; GFX9-SDAG-NEXT:    s_mov_b32 s4, s0
961; GFX9-SDAG-NEXT:    s_mov_b32 s5, s1
962; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
963; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
964; GFX9-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
965; GFX9-SDAG-NEXT:    s_endpgm
966;
967; GFX9-GISEL-LABEL: fabs_fptrunc_f32_to_f16:
968; GFX9-GISEL:       ; %bb.0: ; %entry
969; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
970; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
971; GFX9-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
972; GFX9-GISEL-NEXT:    s_mov_b32 s3, 0xf000
973; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
974; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, |s2|
975; GFX9-GISEL-NEXT:    s_mov_b32 s2, -1
976; GFX9-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
977; GFX9-GISEL-NEXT:    s_endpgm
978;
979; GFX950-SDAG-LABEL: fabs_fptrunc_f32_to_f16:
980; GFX950-SDAG:       ; %bb.0: ; %entry
981; GFX950-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
982; GFX950-SDAG-NEXT:    s_mov_b32 s7, 0xf000
983; GFX950-SDAG-NEXT:    s_mov_b32 s6, -1
984; GFX950-SDAG-NEXT:    s_mov_b32 s10, s6
985; GFX950-SDAG-NEXT:    s_mov_b32 s11, s7
986; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
987; GFX950-SDAG-NEXT:    s_mov_b32 s8, s2
988; GFX950-SDAG-NEXT:    s_mov_b32 s9, s3
989; GFX950-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
990; GFX950-SDAG-NEXT:    s_mov_b32 s4, s0
991; GFX950-SDAG-NEXT:    s_mov_b32 s5, s1
992; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
993; GFX950-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
994; GFX950-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
995; GFX950-SDAG-NEXT:    s_endpgm
996;
997; GFX950-GISEL-LABEL: fabs_fptrunc_f32_to_f16:
998; GFX950-GISEL:       ; %bb.0: ; %entry
999; GFX950-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1000; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1001; GFX950-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
1002; GFX950-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1003; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1004; GFX950-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, |s2|
1005; GFX950-GISEL-NEXT:    s_mov_b32 s2, -1
1006; GFX950-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
1007; GFX950-GISEL-NEXT:    s_endpgm
1008;
1009; GFX11-SDAG-LABEL: fabs_fptrunc_f32_to_f16:
1010; GFX11-SDAG:       ; %bb.0: ; %entry
1011; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1012; GFX11-SDAG-NEXT:    s_mov_b32 s6, -1
1013; GFX11-SDAG-NEXT:    s_mov_b32 s7, 0x31016000
1014; GFX11-SDAG-NEXT:    s_mov_b32 s10, s6
1015; GFX11-SDAG-NEXT:    s_mov_b32 s11, s7
1016; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1017; GFX11-SDAG-NEXT:    s_mov_b32 s8, s2
1018; GFX11-SDAG-NEXT:    s_mov_b32 s9, s3
1019; GFX11-SDAG-NEXT:    s_mov_b32 s4, s0
1020; GFX11-SDAG-NEXT:    buffer_load_b32 v0, off, s[8:11], 0
1021; GFX11-SDAG-NEXT:    s_mov_b32 s5, s1
1022; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1023; GFX11-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
1024; GFX11-SDAG-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
1025; GFX11-SDAG-NEXT:    s_endpgm
1026;
1027; GFX11-GISEL-LABEL: fabs_fptrunc_f32_to_f16:
1028; GFX11-GISEL:       ; %bb.0: ; %entry
1029; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1030; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1031; GFX11-GISEL-NEXT:    s_load_b32 s2, s[2:3], 0x0
1032; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
1033; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1034; GFX11-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, |s2|
1035; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
1036; GFX11-GISEL-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
1037; GFX11-GISEL-NEXT:    s_endpgm
1038    ptr addrspace(1) %r,
1039    ptr addrspace(1) %a) {
1040entry:
1041  %a.val = load float, ptr addrspace(1) %a
1042  %a.fabs = call float @llvm.fabs.f32(float %a.val)
1043  %r.val = fptrunc float %a.fabs to half
1044  store half %r.val, ptr addrspace(1) %r
1045  ret void
1046}
1047
1048define amdgpu_kernel void @fneg_fabs_fptrunc_f32_to_f16(
1049; SI-SDAG-LABEL: fneg_fabs_fptrunc_f32_to_f16:
1050; SI-SDAG:       ; %bb.0: ; %entry
1051; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1052; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
1053; SI-SDAG-NEXT:    s_mov_b32 s6, -1
1054; SI-SDAG-NEXT:    s_mov_b32 s10, s6
1055; SI-SDAG-NEXT:    s_mov_b32 s11, s7
1056; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1057; SI-SDAG-NEXT:    s_mov_b32 s8, s2
1058; SI-SDAG-NEXT:    s_mov_b32 s9, s3
1059; SI-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1060; SI-SDAG-NEXT:    s_mov_b32 s4, s0
1061; SI-SDAG-NEXT:    s_mov_b32 s5, s1
1062; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1063; SI-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, -|v0|
1064; SI-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
1065; SI-SDAG-NEXT:    s_endpgm
1066;
1067; SI-GISEL-LABEL: fneg_fabs_fptrunc_f32_to_f16:
1068; SI-GISEL:       ; %bb.0: ; %entry
1069; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1070; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1071; SI-GISEL-NEXT:    s_load_dword s3, s[2:3], 0x0
1072; SI-GISEL-NEXT:    s_mov_b32 s2, -1
1073; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1074; SI-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, -|s3|
1075; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1076; SI-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
1077; SI-GISEL-NEXT:    s_endpgm
1078;
1079; VI-SDAG-LABEL: fneg_fabs_fptrunc_f32_to_f16:
1080; VI-SDAG:       ; %bb.0: ; %entry
1081; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1082; VI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
1083; VI-SDAG-NEXT:    s_mov_b32 s6, -1
1084; VI-SDAG-NEXT:    s_mov_b32 s10, s6
1085; VI-SDAG-NEXT:    s_mov_b32 s11, s7
1086; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1087; VI-SDAG-NEXT:    s_mov_b32 s8, s2
1088; VI-SDAG-NEXT:    s_mov_b32 s9, s3
1089; VI-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1090; VI-SDAG-NEXT:    s_mov_b32 s4, s0
1091; VI-SDAG-NEXT:    s_mov_b32 s5, s1
1092; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1093; VI-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, -|v0|
1094; VI-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
1095; VI-SDAG-NEXT:    s_endpgm
1096;
1097; VI-GISEL-LABEL: fneg_fabs_fptrunc_f32_to_f16:
1098; VI-GISEL:       ; %bb.0: ; %entry
1099; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1100; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1101; VI-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
1102; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1103; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1104; VI-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, -|s2|
1105; VI-GISEL-NEXT:    s_mov_b32 s2, -1
1106; VI-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
1107; VI-GISEL-NEXT:    s_endpgm
1108;
1109; GFX9-SDAG-LABEL: fneg_fabs_fptrunc_f32_to_f16:
1110; GFX9-SDAG:       ; %bb.0: ; %entry
1111; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1112; GFX9-SDAG-NEXT:    s_mov_b32 s7, 0xf000
1113; GFX9-SDAG-NEXT:    s_mov_b32 s6, -1
1114; GFX9-SDAG-NEXT:    s_mov_b32 s10, s6
1115; GFX9-SDAG-NEXT:    s_mov_b32 s11, s7
1116; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1117; GFX9-SDAG-NEXT:    s_mov_b32 s8, s2
1118; GFX9-SDAG-NEXT:    s_mov_b32 s9, s3
1119; GFX9-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1120; GFX9-SDAG-NEXT:    s_mov_b32 s4, s0
1121; GFX9-SDAG-NEXT:    s_mov_b32 s5, s1
1122; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1123; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, -|v0|
1124; GFX9-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
1125; GFX9-SDAG-NEXT:    s_endpgm
1126;
1127; GFX9-GISEL-LABEL: fneg_fabs_fptrunc_f32_to_f16:
1128; GFX9-GISEL:       ; %bb.0: ; %entry
1129; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1130; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1131; GFX9-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
1132; GFX9-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1133; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1134; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, -|s2|
1135; GFX9-GISEL-NEXT:    s_mov_b32 s2, -1
1136; GFX9-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
1137; GFX9-GISEL-NEXT:    s_endpgm
1138;
1139; GFX950-SDAG-LABEL: fneg_fabs_fptrunc_f32_to_f16:
1140; GFX950-SDAG:       ; %bb.0: ; %entry
1141; GFX950-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1142; GFX950-SDAG-NEXT:    s_mov_b32 s7, 0xf000
1143; GFX950-SDAG-NEXT:    s_mov_b32 s6, -1
1144; GFX950-SDAG-NEXT:    s_mov_b32 s10, s6
1145; GFX950-SDAG-NEXT:    s_mov_b32 s11, s7
1146; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1147; GFX950-SDAG-NEXT:    s_mov_b32 s8, s2
1148; GFX950-SDAG-NEXT:    s_mov_b32 s9, s3
1149; GFX950-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1150; GFX950-SDAG-NEXT:    s_mov_b32 s4, s0
1151; GFX950-SDAG-NEXT:    s_mov_b32 s5, s1
1152; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
1153; GFX950-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, -|v0|
1154; GFX950-SDAG-NEXT:    buffer_store_short v0, off, s[4:7], 0
1155; GFX950-SDAG-NEXT:    s_endpgm
1156;
1157; GFX950-GISEL-LABEL: fneg_fabs_fptrunc_f32_to_f16:
1158; GFX950-GISEL:       ; %bb.0: ; %entry
1159; GFX950-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1160; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1161; GFX950-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
1162; GFX950-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1163; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1164; GFX950-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, -|s2|
1165; GFX950-GISEL-NEXT:    s_mov_b32 s2, -1
1166; GFX950-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
1167; GFX950-GISEL-NEXT:    s_endpgm
1168;
1169; GFX11-SDAG-LABEL: fneg_fabs_fptrunc_f32_to_f16:
1170; GFX11-SDAG:       ; %bb.0: ; %entry
1171; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1172; GFX11-SDAG-NEXT:    s_mov_b32 s6, -1
1173; GFX11-SDAG-NEXT:    s_mov_b32 s7, 0x31016000
1174; GFX11-SDAG-NEXT:    s_mov_b32 s10, s6
1175; GFX11-SDAG-NEXT:    s_mov_b32 s11, s7
1176; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1177; GFX11-SDAG-NEXT:    s_mov_b32 s8, s2
1178; GFX11-SDAG-NEXT:    s_mov_b32 s9, s3
1179; GFX11-SDAG-NEXT:    s_mov_b32 s4, s0
1180; GFX11-SDAG-NEXT:    buffer_load_b32 v0, off, s[8:11], 0
1181; GFX11-SDAG-NEXT:    s_mov_b32 s5, s1
1182; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1183; GFX11-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, -|v0|
1184; GFX11-SDAG-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
1185; GFX11-SDAG-NEXT:    s_endpgm
1186;
1187; GFX11-GISEL-LABEL: fneg_fabs_fptrunc_f32_to_f16:
1188; GFX11-GISEL:       ; %bb.0: ; %entry
1189; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1190; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1191; GFX11-GISEL-NEXT:    s_load_b32 s2, s[2:3], 0x0
1192; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
1193; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1194; GFX11-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, -|s2|
1195; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
1196; GFX11-GISEL-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
1197; GFX11-GISEL-NEXT:    s_endpgm
1198    ptr addrspace(1) %r,
1199    ptr addrspace(1) %a) #0 {
1200entry:
1201  %a.val = load float, ptr addrspace(1) %a
1202  %a.fabs = call float @llvm.fabs.f32(float %a.val)
1203  %a.fneg.fabs = fneg float %a.fabs
1204  %r.val = fptrunc float %a.fneg.fabs to half
1205  store half %r.val, ptr addrspace(1) %r
1206  ret void
1207}
1208
1209define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32(
1210; SI-SDAG-LABEL: fptrunc_f32_to_f16_zext_i32:
1211; SI-SDAG:       ; %bb.0: ; %entry
1212; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1213; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
1214; SI-SDAG-NEXT:    s_mov_b32 s6, -1
1215; SI-SDAG-NEXT:    s_mov_b32 s10, s6
1216; SI-SDAG-NEXT:    s_mov_b32 s11, s7
1217; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1218; SI-SDAG-NEXT:    s_mov_b32 s8, s2
1219; SI-SDAG-NEXT:    s_mov_b32 s9, s3
1220; SI-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1221; SI-SDAG-NEXT:    s_mov_b32 s4, s0
1222; SI-SDAG-NEXT:    s_mov_b32 s5, s1
1223; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1224; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
1225; SI-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1226; SI-SDAG-NEXT:    s_endpgm
1227;
1228; SI-GISEL-LABEL: fptrunc_f32_to_f16_zext_i32:
1229; SI-GISEL:       ; %bb.0: ; %entry
1230; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1231; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1232; SI-GISEL-NEXT:    s_load_dword s3, s[2:3], 0x0
1233; SI-GISEL-NEXT:    s_mov_b32 s2, -1
1234; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1235; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s3
1236; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1237; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1238; SI-GISEL-NEXT:    s_endpgm
1239;
1240; VI-SDAG-LABEL: fptrunc_f32_to_f16_zext_i32:
1241; VI-SDAG:       ; %bb.0: ; %entry
1242; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1243; VI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
1244; VI-SDAG-NEXT:    s_mov_b32 s6, -1
1245; VI-SDAG-NEXT:    s_mov_b32 s10, s6
1246; VI-SDAG-NEXT:    s_mov_b32 s11, s7
1247; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1248; VI-SDAG-NEXT:    s_mov_b32 s8, s2
1249; VI-SDAG-NEXT:    s_mov_b32 s9, s3
1250; VI-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1251; VI-SDAG-NEXT:    s_mov_b32 s4, s0
1252; VI-SDAG-NEXT:    s_mov_b32 s5, s1
1253; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1254; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
1255; VI-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1256; VI-SDAG-NEXT:    s_endpgm
1257;
1258; VI-GISEL-LABEL: fptrunc_f32_to_f16_zext_i32:
1259; VI-GISEL:       ; %bb.0: ; %entry
1260; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1261; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1262; VI-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
1263; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1264; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1265; VI-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s2
1266; VI-GISEL-NEXT:    s_mov_b32 s2, -1
1267; VI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1268; VI-GISEL-NEXT:    s_endpgm
1269;
1270; GFX9-SDAG-LABEL: fptrunc_f32_to_f16_zext_i32:
1271; GFX9-SDAG:       ; %bb.0: ; %entry
1272; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1273; GFX9-SDAG-NEXT:    s_mov_b32 s7, 0xf000
1274; GFX9-SDAG-NEXT:    s_mov_b32 s6, -1
1275; GFX9-SDAG-NEXT:    s_mov_b32 s10, s6
1276; GFX9-SDAG-NEXT:    s_mov_b32 s11, s7
1277; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1278; GFX9-SDAG-NEXT:    s_mov_b32 s8, s2
1279; GFX9-SDAG-NEXT:    s_mov_b32 s9, s3
1280; GFX9-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1281; GFX9-SDAG-NEXT:    s_mov_b32 s4, s0
1282; GFX9-SDAG-NEXT:    s_mov_b32 s5, s1
1283; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1284; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
1285; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1286; GFX9-SDAG-NEXT:    s_endpgm
1287;
1288; GFX9-GISEL-LABEL: fptrunc_f32_to_f16_zext_i32:
1289; GFX9-GISEL:       ; %bb.0: ; %entry
1290; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1291; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1292; GFX9-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
1293; GFX9-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1294; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1295; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s2
1296; GFX9-GISEL-NEXT:    s_mov_b32 s2, -1
1297; GFX9-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1298; GFX9-GISEL-NEXT:    s_endpgm
1299;
1300; GFX950-SDAG-LABEL: fptrunc_f32_to_f16_zext_i32:
1301; GFX950-SDAG:       ; %bb.0: ; %entry
1302; GFX950-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1303; GFX950-SDAG-NEXT:    s_mov_b32 s7, 0xf000
1304; GFX950-SDAG-NEXT:    s_mov_b32 s6, -1
1305; GFX950-SDAG-NEXT:    s_mov_b32 s10, s6
1306; GFX950-SDAG-NEXT:    s_mov_b32 s11, s7
1307; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1308; GFX950-SDAG-NEXT:    s_mov_b32 s8, s2
1309; GFX950-SDAG-NEXT:    s_mov_b32 s9, s3
1310; GFX950-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1311; GFX950-SDAG-NEXT:    s_mov_b32 s4, s0
1312; GFX950-SDAG-NEXT:    s_mov_b32 s5, s1
1313; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
1314; GFX950-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
1315; GFX950-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1316; GFX950-SDAG-NEXT:    s_endpgm
1317;
1318; GFX950-GISEL-LABEL: fptrunc_f32_to_f16_zext_i32:
1319; GFX950-GISEL:       ; %bb.0: ; %entry
1320; GFX950-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1321; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1322; GFX950-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
1323; GFX950-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1324; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1325; GFX950-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s2
1326; GFX950-GISEL-NEXT:    s_mov_b32 s2, -1
1327; GFX950-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1328; GFX950-GISEL-NEXT:    s_endpgm
1329;
1330; GFX11-SDAG-LABEL: fptrunc_f32_to_f16_zext_i32:
1331; GFX11-SDAG:       ; %bb.0: ; %entry
1332; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1333; GFX11-SDAG-NEXT:    s_mov_b32 s6, -1
1334; GFX11-SDAG-NEXT:    s_mov_b32 s7, 0x31016000
1335; GFX11-SDAG-NEXT:    s_mov_b32 s10, s6
1336; GFX11-SDAG-NEXT:    s_mov_b32 s11, s7
1337; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1338; GFX11-SDAG-NEXT:    s_mov_b32 s8, s2
1339; GFX11-SDAG-NEXT:    s_mov_b32 s9, s3
1340; GFX11-SDAG-NEXT:    s_mov_b32 s4, s0
1341; GFX11-SDAG-NEXT:    buffer_load_b32 v0, off, s[8:11], 0
1342; GFX11-SDAG-NEXT:    s_mov_b32 s5, s1
1343; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1344; GFX11-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
1345; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1346; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1347; GFX11-SDAG-NEXT:    buffer_store_b32 v0, off, s[4:7], 0
1348; GFX11-SDAG-NEXT:    s_endpgm
1349;
1350; GFX11-GISEL-LABEL: fptrunc_f32_to_f16_zext_i32:
1351; GFX11-GISEL:       ; %bb.0: ; %entry
1352; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1353; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1354; GFX11-GISEL-NEXT:    s_load_b32 s2, s[2:3], 0x0
1355; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
1356; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1357; GFX11-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s2
1358; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
1359; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1360; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1361; GFX11-GISEL-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
1362; GFX11-GISEL-NEXT:    s_endpgm
1363    ptr addrspace(1) %r,
1364    ptr addrspace(1) %a) #0 {
1365entry:
1366  %a.val = load float, ptr addrspace(1) %a
1367  %r.val = fptrunc float %a.val to half
1368  %r.i16 = bitcast half %r.val to i16
1369  %zext = zext i16 %r.i16 to i32
1370  store i32 %zext, ptr addrspace(1) %r
1371  ret void
1372}
1373
1374define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32(
1375; SI-SDAG-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1376; SI-SDAG:       ; %bb.0: ; %entry
1377; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1378; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
1379; SI-SDAG-NEXT:    s_mov_b32 s6, -1
1380; SI-SDAG-NEXT:    s_mov_b32 s10, s6
1381; SI-SDAG-NEXT:    s_mov_b32 s11, s7
1382; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1383; SI-SDAG-NEXT:    s_mov_b32 s8, s2
1384; SI-SDAG-NEXT:    s_mov_b32 s9, s3
1385; SI-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1386; SI-SDAG-NEXT:    s_mov_b32 s4, s0
1387; SI-SDAG-NEXT:    s_mov_b32 s5, s1
1388; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1389; SI-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
1390; SI-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1391; SI-SDAG-NEXT:    s_endpgm
1392;
1393; SI-GISEL-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1394; SI-GISEL:       ; %bb.0: ; %entry
1395; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1396; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1397; SI-GISEL-NEXT:    s_load_dword s3, s[2:3], 0x0
1398; SI-GISEL-NEXT:    s_mov_b32 s2, -1
1399; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1400; SI-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, |s3|
1401; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1402; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1403; SI-GISEL-NEXT:    s_endpgm
1404;
1405; VI-SDAG-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1406; VI-SDAG:       ; %bb.0: ; %entry
1407; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1408; VI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
1409; VI-SDAG-NEXT:    s_mov_b32 s6, -1
1410; VI-SDAG-NEXT:    s_mov_b32 s10, s6
1411; VI-SDAG-NEXT:    s_mov_b32 s11, s7
1412; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1413; VI-SDAG-NEXT:    s_mov_b32 s8, s2
1414; VI-SDAG-NEXT:    s_mov_b32 s9, s3
1415; VI-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1416; VI-SDAG-NEXT:    s_mov_b32 s4, s0
1417; VI-SDAG-NEXT:    s_mov_b32 s5, s1
1418; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1419; VI-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
1420; VI-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1421; VI-SDAG-NEXT:    s_endpgm
1422;
1423; VI-GISEL-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1424; VI-GISEL:       ; %bb.0: ; %entry
1425; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1426; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1427; VI-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
1428; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1429; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1430; VI-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, |s2|
1431; VI-GISEL-NEXT:    s_mov_b32 s2, -1
1432; VI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1433; VI-GISEL-NEXT:    s_endpgm
1434;
1435; GFX9-SDAG-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1436; GFX9-SDAG:       ; %bb.0: ; %entry
1437; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1438; GFX9-SDAG-NEXT:    s_mov_b32 s7, 0xf000
1439; GFX9-SDAG-NEXT:    s_mov_b32 s6, -1
1440; GFX9-SDAG-NEXT:    s_mov_b32 s10, s6
1441; GFX9-SDAG-NEXT:    s_mov_b32 s11, s7
1442; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1443; GFX9-SDAG-NEXT:    s_mov_b32 s8, s2
1444; GFX9-SDAG-NEXT:    s_mov_b32 s9, s3
1445; GFX9-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1446; GFX9-SDAG-NEXT:    s_mov_b32 s4, s0
1447; GFX9-SDAG-NEXT:    s_mov_b32 s5, s1
1448; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1449; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
1450; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1451; GFX9-SDAG-NEXT:    s_endpgm
1452;
1453; GFX9-GISEL-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1454; GFX9-GISEL:       ; %bb.0: ; %entry
1455; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1456; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1457; GFX9-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
1458; GFX9-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1459; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1460; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, |s2|
1461; GFX9-GISEL-NEXT:    s_mov_b32 s2, -1
1462; GFX9-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1463; GFX9-GISEL-NEXT:    s_endpgm
1464;
1465; GFX950-SDAG-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1466; GFX950-SDAG:       ; %bb.0: ; %entry
1467; GFX950-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1468; GFX950-SDAG-NEXT:    s_mov_b32 s7, 0xf000
1469; GFX950-SDAG-NEXT:    s_mov_b32 s6, -1
1470; GFX950-SDAG-NEXT:    s_mov_b32 s10, s6
1471; GFX950-SDAG-NEXT:    s_mov_b32 s11, s7
1472; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1473; GFX950-SDAG-NEXT:    s_mov_b32 s8, s2
1474; GFX950-SDAG-NEXT:    s_mov_b32 s9, s3
1475; GFX950-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1476; GFX950-SDAG-NEXT:    s_mov_b32 s4, s0
1477; GFX950-SDAG-NEXT:    s_mov_b32 s5, s1
1478; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
1479; GFX950-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
1480; GFX950-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1481; GFX950-SDAG-NEXT:    s_endpgm
1482;
1483; GFX950-GISEL-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1484; GFX950-GISEL:       ; %bb.0: ; %entry
1485; GFX950-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1486; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1487; GFX950-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
1488; GFX950-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1489; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1490; GFX950-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, |s2|
1491; GFX950-GISEL-NEXT:    s_mov_b32 s2, -1
1492; GFX950-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1493; GFX950-GISEL-NEXT:    s_endpgm
1494;
1495; GFX11-SDAG-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1496; GFX11-SDAG:       ; %bb.0: ; %entry
1497; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1498; GFX11-SDAG-NEXT:    s_mov_b32 s6, -1
1499; GFX11-SDAG-NEXT:    s_mov_b32 s7, 0x31016000
1500; GFX11-SDAG-NEXT:    s_mov_b32 s10, s6
1501; GFX11-SDAG-NEXT:    s_mov_b32 s11, s7
1502; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1503; GFX11-SDAG-NEXT:    s_mov_b32 s8, s2
1504; GFX11-SDAG-NEXT:    s_mov_b32 s9, s3
1505; GFX11-SDAG-NEXT:    s_mov_b32 s4, s0
1506; GFX11-SDAG-NEXT:    buffer_load_b32 v0, off, s[8:11], 0
1507; GFX11-SDAG-NEXT:    s_mov_b32 s5, s1
1508; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1509; GFX11-SDAG-NEXT:    v_cvt_f16_f32_e64 v0, |v0|
1510; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1511; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1512; GFX11-SDAG-NEXT:    buffer_store_b32 v0, off, s[4:7], 0
1513; GFX11-SDAG-NEXT:    s_endpgm
1514;
1515; GFX11-GISEL-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
1516; GFX11-GISEL:       ; %bb.0: ; %entry
1517; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1518; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1519; GFX11-GISEL-NEXT:    s_load_b32 s2, s[2:3], 0x0
1520; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
1521; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1522; GFX11-GISEL-NEXT:    v_cvt_f16_f32_e64 v0, |s2|
1523; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
1524; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1525; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1526; GFX11-GISEL-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
1527; GFX11-GISEL-NEXT:    s_endpgm
1528    ptr addrspace(1) %r,
1529    ptr addrspace(1) %a) #0 {
1530entry:
1531  %a.val = load float, ptr addrspace(1) %a
1532  %a.fabs = call float @llvm.fabs.f32(float %a.val)
1533  %r.val = fptrunc float %a.fabs to half
1534  %r.i16 = bitcast half %r.val to i16
1535  %zext = zext i16 %r.i16 to i32
1536  store i32 %zext, ptr addrspace(1) %r
1537  ret void
1538}
1539
1540define amdgpu_kernel void @fptrunc_f32_to_f16_sext_i32(
1541; SI-SDAG-LABEL: fptrunc_f32_to_f16_sext_i32:
1542; SI-SDAG:       ; %bb.0: ; %entry
1543; SI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1544; SI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
1545; SI-SDAG-NEXT:    s_mov_b32 s6, -1
1546; SI-SDAG-NEXT:    s_mov_b32 s10, s6
1547; SI-SDAG-NEXT:    s_mov_b32 s11, s7
1548; SI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1549; SI-SDAG-NEXT:    s_mov_b32 s8, s2
1550; SI-SDAG-NEXT:    s_mov_b32 s9, s3
1551; SI-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1552; SI-SDAG-NEXT:    s_mov_b32 s4, s0
1553; SI-SDAG-NEXT:    s_mov_b32 s5, s1
1554; SI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1555; SI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
1556; SI-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
1557; SI-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1558; SI-SDAG-NEXT:    s_endpgm
1559;
1560; SI-GISEL-LABEL: fptrunc_f32_to_f16_sext_i32:
1561; SI-GISEL:       ; %bb.0: ; %entry
1562; SI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
1563; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1564; SI-GISEL-NEXT:    s_load_dword s3, s[2:3], 0x0
1565; SI-GISEL-NEXT:    s_mov_b32 s2, -1
1566; SI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1567; SI-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s3
1568; SI-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
1569; SI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1570; SI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1571; SI-GISEL-NEXT:    s_endpgm
1572;
1573; VI-SDAG-LABEL: fptrunc_f32_to_f16_sext_i32:
1574; VI-SDAG:       ; %bb.0: ; %entry
1575; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1576; VI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
1577; VI-SDAG-NEXT:    s_mov_b32 s6, -1
1578; VI-SDAG-NEXT:    s_mov_b32 s10, s6
1579; VI-SDAG-NEXT:    s_mov_b32 s11, s7
1580; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1581; VI-SDAG-NEXT:    s_mov_b32 s8, s2
1582; VI-SDAG-NEXT:    s_mov_b32 s9, s3
1583; VI-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1584; VI-SDAG-NEXT:    s_mov_b32 s4, s0
1585; VI-SDAG-NEXT:    s_mov_b32 s5, s1
1586; VI-SDAG-NEXT:    s_waitcnt vmcnt(0)
1587; VI-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
1588; VI-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
1589; VI-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1590; VI-SDAG-NEXT:    s_endpgm
1591;
1592; VI-GISEL-LABEL: fptrunc_f32_to_f16_sext_i32:
1593; VI-GISEL:       ; %bb.0: ; %entry
1594; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1595; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1596; VI-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
1597; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1598; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1599; VI-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s2
1600; VI-GISEL-NEXT:    s_mov_b32 s2, -1
1601; VI-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
1602; VI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1603; VI-GISEL-NEXT:    s_endpgm
1604;
1605; GFX9-SDAG-LABEL: fptrunc_f32_to_f16_sext_i32:
1606; GFX9-SDAG:       ; %bb.0: ; %entry
1607; GFX9-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1608; GFX9-SDAG-NEXT:    s_mov_b32 s7, 0xf000
1609; GFX9-SDAG-NEXT:    s_mov_b32 s6, -1
1610; GFX9-SDAG-NEXT:    s_mov_b32 s10, s6
1611; GFX9-SDAG-NEXT:    s_mov_b32 s11, s7
1612; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1613; GFX9-SDAG-NEXT:    s_mov_b32 s8, s2
1614; GFX9-SDAG-NEXT:    s_mov_b32 s9, s3
1615; GFX9-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1616; GFX9-SDAG-NEXT:    s_mov_b32 s4, s0
1617; GFX9-SDAG-NEXT:    s_mov_b32 s5, s1
1618; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1619; GFX9-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
1620; GFX9-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
1621; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1622; GFX9-SDAG-NEXT:    s_endpgm
1623;
1624; GFX9-GISEL-LABEL: fptrunc_f32_to_f16_sext_i32:
1625; GFX9-GISEL:       ; %bb.0: ; %entry
1626; GFX9-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1627; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1628; GFX9-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
1629; GFX9-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1630; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1631; GFX9-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s2
1632; GFX9-GISEL-NEXT:    s_mov_b32 s2, -1
1633; GFX9-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
1634; GFX9-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1635; GFX9-GISEL-NEXT:    s_endpgm
1636;
1637; GFX950-SDAG-LABEL: fptrunc_f32_to_f16_sext_i32:
1638; GFX950-SDAG:       ; %bb.0: ; %entry
1639; GFX950-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1640; GFX950-SDAG-NEXT:    s_mov_b32 s7, 0xf000
1641; GFX950-SDAG-NEXT:    s_mov_b32 s6, -1
1642; GFX950-SDAG-NEXT:    s_mov_b32 s10, s6
1643; GFX950-SDAG-NEXT:    s_mov_b32 s11, s7
1644; GFX950-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1645; GFX950-SDAG-NEXT:    s_mov_b32 s8, s2
1646; GFX950-SDAG-NEXT:    s_mov_b32 s9, s3
1647; GFX950-SDAG-NEXT:    buffer_load_dword v0, off, s[8:11], 0
1648; GFX950-SDAG-NEXT:    s_mov_b32 s4, s0
1649; GFX950-SDAG-NEXT:    s_mov_b32 s5, s1
1650; GFX950-SDAG-NEXT:    s_waitcnt vmcnt(0)
1651; GFX950-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
1652; GFX950-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
1653; GFX950-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1654; GFX950-SDAG-NEXT:    s_endpgm
1655;
1656; GFX950-GISEL-LABEL: fptrunc_f32_to_f16_sext_i32:
1657; GFX950-GISEL:       ; %bb.0: ; %entry
1658; GFX950-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1659; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1660; GFX950-GISEL-NEXT:    s_load_dword s2, s[2:3], 0x0
1661; GFX950-GISEL-NEXT:    s_mov_b32 s3, 0xf000
1662; GFX950-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1663; GFX950-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s2
1664; GFX950-GISEL-NEXT:    s_mov_b32 s2, -1
1665; GFX950-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
1666; GFX950-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1667; GFX950-GISEL-NEXT:    s_endpgm
1668;
1669; GFX11-SDAG-LABEL: fptrunc_f32_to_f16_sext_i32:
1670; GFX11-SDAG:       ; %bb.0: ; %entry
1671; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1672; GFX11-SDAG-NEXT:    s_mov_b32 s6, -1
1673; GFX11-SDAG-NEXT:    s_mov_b32 s7, 0x31016000
1674; GFX11-SDAG-NEXT:    s_mov_b32 s10, s6
1675; GFX11-SDAG-NEXT:    s_mov_b32 s11, s7
1676; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
1677; GFX11-SDAG-NEXT:    s_mov_b32 s8, s2
1678; GFX11-SDAG-NEXT:    s_mov_b32 s9, s3
1679; GFX11-SDAG-NEXT:    s_mov_b32 s4, s0
1680; GFX11-SDAG-NEXT:    buffer_load_b32 v0, off, s[8:11], 0
1681; GFX11-SDAG-NEXT:    s_mov_b32 s5, s1
1682; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0)
1683; GFX11-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
1684; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1685; GFX11-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
1686; GFX11-SDAG-NEXT:    buffer_store_b32 v0, off, s[4:7], 0
1687; GFX11-SDAG-NEXT:    s_endpgm
1688;
1689; GFX11-GISEL-LABEL: fptrunc_f32_to_f16_sext_i32:
1690; GFX11-GISEL:       ; %bb.0: ; %entry
1691; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
1692; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1693; GFX11-GISEL-NEXT:    s_load_b32 s2, s[2:3], 0x0
1694; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
1695; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1696; GFX11-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, s2
1697; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
1698; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1699; GFX11-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
1700; GFX11-GISEL-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
1701; GFX11-GISEL-NEXT:    s_endpgm
1702    ptr addrspace(1) %r,
1703    ptr addrspace(1) %a) #0 {
1704entry:
1705  %a.val = load float, ptr addrspace(1) %a
1706  %r.val = fptrunc float %a.val to half
1707  %r.i16 = bitcast half %r.val to i16
1708  %zext = sext i16 %r.i16 to i32
1709  store i32 %zext, ptr addrspace(1) %r
1710  ret void
1711}
1712
1713declare float @llvm.fabs.f32(float) #1
1714
1715attributes #0 = { nounwind }
1716attributes #1 = { nounwind readnone }
1717