xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fptrunc.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s
3; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-SDAG,VI-SAFE-SDAG %s
4; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-GISEL,VI-SAFE-GISEL %s
5; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-SDAG,VI-UNSAFE-SDAG %s
6; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-GISEL,VI-UNSAFE-GISEL %s
7; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-SAFE-SDAG %s
8; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-SAFE-GISEL %s
9; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-UNSAFE-SDAG %s
10; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-UNSAFE-GISEL %s
11; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-SAFE-SDAG %s
12; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-SAFE-GISEL %s
13; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-SDAG %s
14; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL %s
15
16define amdgpu_kernel void @fptrunc_f64_to_f32(ptr addrspace(1) %out, double %in) {
17; SI-LABEL: fptrunc_f64_to_f32:
18; SI:       ; %bb.0:
19; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
20; SI-NEXT:    s_mov_b32 s7, 0xf000
21; SI-NEXT:    s_mov_b32 s6, -1
22; SI-NEXT:    s_waitcnt lgkmcnt(0)
23; SI-NEXT:    s_mov_b32 s4, s0
24; SI-NEXT:    s_mov_b32 s5, s1
25; SI-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
26; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
27; SI-NEXT:    s_endpgm
28;
29; VI-SDAG-LABEL: fptrunc_f64_to_f32:
30; VI-SDAG:       ; %bb.0:
31; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
32; VI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
33; VI-SDAG-NEXT:    s_mov_b32 s6, -1
34; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
35; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
36; VI-SDAG-NEXT:    s_mov_b32 s4, s0
37; VI-SDAG-NEXT:    s_mov_b32 s5, s1
38; VI-SDAG-NEXT:    buffer_store_dword v0, off, s[4:7], 0
39; VI-SDAG-NEXT:    s_endpgm
40;
41; VI-GISEL-LABEL: fptrunc_f64_to_f32:
42; VI-GISEL:       ; %bb.0:
43; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
44; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
45; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
46; VI-GISEL-NEXT:    s_mov_b32 s2, -1
47; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
48; VI-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
49; VI-GISEL-NEXT:    s_endpgm
50;
51; GFX10-SDAG-LABEL: fptrunc_f64_to_f32:
52; GFX10-SDAG:       ; %bb.0:
53; GFX10-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
54; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
55; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
56; GFX10-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
57; GFX10-SDAG-NEXT:    s_mov_b32 s2, -1
58; GFX10-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], 0
59; GFX10-SDAG-NEXT:    s_endpgm
60;
61; GFX10-GISEL-LABEL: fptrunc_f64_to_f32:
62; GFX10-GISEL:       ; %bb.0:
63; GFX10-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
64; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
65; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
66; GFX10-GISEL-NEXT:    s_mov_b32 s2, -1
67; GFX10-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
68; GFX10-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], 0
69; GFX10-GISEL-NEXT:    s_endpgm
70;
71; GFX11-SDAG-LABEL: fptrunc_f64_to_f32:
72; GFX11-SDAG:       ; %bb.0:
73; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
74; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
75; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
76; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
77; GFX11-SDAG-NEXT:    s_mov_b32 s2, -1
78; GFX11-SDAG-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
79; GFX11-SDAG-NEXT:    s_endpgm
80;
81; GFX11-GISEL-LABEL: fptrunc_f64_to_f32:
82; GFX11-GISEL:       ; %bb.0:
83; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
84; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
85; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
86; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
87; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
88; GFX11-GISEL-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
89; GFX11-GISEL-NEXT:    s_endpgm
90  %result = fptrunc double %in to float
91  store float %result, ptr addrspace(1) %out
92  ret void
93}
94
95define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) {
96; SI-LABEL: fptrunc_f64_to_f16:
97; SI:       ; %bb.0:
98; SI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
99; SI-NEXT:    s_mov_b32 s3, 0xf000
100; SI-NEXT:    s_mov_b32 s2, -1
101; SI-NEXT:    s_waitcnt lgkmcnt(0)
102; SI-NEXT:    s_mov_b32 s0, s4
103; SI-NEXT:    s_mov_b32 s1, s5
104; SI-NEXT:    s_lshr_b32 s4, s7, 8
105; SI-NEXT:    s_and_b32 s5, s7, 0x1ff
106; SI-NEXT:    s_and_b32 s8, s4, 0xffe
107; SI-NEXT:    s_or_b32 s4, s5, s6
108; SI-NEXT:    s_cmp_lg_u32 s4, 0
109; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
110; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
111; SI-NEXT:    s_bfe_u32 s4, s7, 0xb0014
112; SI-NEXT:    v_readfirstlane_b32 s5, v0
113; SI-NEXT:    s_sub_i32 s6, 0x3f1, s4
114; SI-NEXT:    s_add_i32 s10, s4, 0xfffffc10
115; SI-NEXT:    s_or_b32 s11, s8, s5
116; SI-NEXT:    v_med3_i32 v0, s6, 0, 13
117; SI-NEXT:    s_lshl_b32 s4, s10, 12
118; SI-NEXT:    s_or_b32 s5, s11, 0x1000
119; SI-NEXT:    v_readfirstlane_b32 s6, v0
120; SI-NEXT:    s_or_b32 s4, s11, s4
121; SI-NEXT:    s_lshr_b32 s6, s5, s6
122; SI-NEXT:    v_lshl_b32_e32 v0, s6, v0
123; SI-NEXT:    v_cmp_ne_u32_e32 vcc, s5, v0
124; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
125; SI-NEXT:    v_readfirstlane_b32 s5, v0
126; SI-NEXT:    s_or_b32 s5, s6, s5
127; SI-NEXT:    s_cmp_lt_i32 s10, 1
128; SI-NEXT:    s_cselect_b32 s6, s5, s4
129; SI-NEXT:    s_and_b32 s8, s6, 7
130; SI-NEXT:    s_cmp_gt_i32 s8, 5
131; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
132; SI-NEXT:    s_cmp_eq_u32 s8, 3
133; SI-NEXT:    s_cselect_b64 s[8:9], -1, 0
134; SI-NEXT:    s_lshr_b32 s6, s6, 2
135; SI-NEXT:    s_or_b64 s[4:5], s[8:9], s[4:5]
136; SI-NEXT:    s_or_b32 s4, s4, s5
137; SI-NEXT:    s_cmp_lg_u32 s4, 0
138; SI-NEXT:    s_addc_u32 s4, s6, 0
139; SI-NEXT:    s_cmp_lt_i32 s10, 31
140; SI-NEXT:    s_cselect_b32 s6, s4, 0x7c00
141; SI-NEXT:    s_cmp_lg_u32 s11, 0
142; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
143; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
144; SI-NEXT:    s_cmpk_eq_i32 s10, 0x40f
145; SI-NEXT:    v_mov_b32_e32 v1, s6
146; SI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
147; SI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
148; SI-NEXT:    s_cselect_b64 vcc, -1, 0
149; SI-NEXT:    s_lshr_b32 s4, s7, 16
150; SI-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
151; SI-NEXT:    s_and_b32 s4, s4, 0x8000
152; SI-NEXT:    v_or_b32_e32 v0, s4, v0
153; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
154; SI-NEXT:    s_endpgm
155;
156; VI-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:
157; VI-SAFE-SDAG:       ; %bb.0:
158; VI-SAFE-SDAG-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x24
159; VI-SAFE-SDAG-NEXT:    s_mov_b32 s3, 0xf000
160; VI-SAFE-SDAG-NEXT:    s_mov_b32 s2, -1
161; VI-SAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
162; VI-SAFE-SDAG-NEXT:    s_mov_b32 s0, s4
163; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s4, s7, 8
164; VI-SAFE-SDAG-NEXT:    s_and_b32 s8, s4, 0xffe
165; VI-SAFE-SDAG-NEXT:    s_and_b32 s4, s7, 0x1ff
166; VI-SAFE-SDAG-NEXT:    s_or_b32 s4, s4, s6
167; VI-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
168; VI-SAFE-SDAG-NEXT:    s_mov_b32 s1, s5
169; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[4:5], -1, 0
170; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
171; VI-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s4, v0
172; VI-SAFE-SDAG-NEXT:    s_bfe_u32 s5, s7, 0xb0014
173; VI-SAFE-SDAG-NEXT:    s_or_b32 s6, s8, s4
174; VI-SAFE-SDAG-NEXT:    s_sub_i32 s8, 0x3f1, s5
175; VI-SAFE-SDAG-NEXT:    v_med3_i32 v0, s8, 0, 13
176; VI-SAFE-SDAG-NEXT:    s_or_b32 s4, s6, 0x1000
177; VI-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s8, v0
178; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s8, s4, s8
179; VI-SAFE-SDAG-NEXT:    v_lshlrev_b32_e64 v0, v0, s8
180; VI-SAFE-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, s4, v0
181; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
182; VI-SAFE-SDAG-NEXT:    s_add_i32 s10, s5, 0xfffffc10
183; VI-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s4, v0
184; VI-SAFE-SDAG-NEXT:    s_lshl_b32 s5, s10, 12
185; VI-SAFE-SDAG-NEXT:    s_or_b32 s4, s8, s4
186; VI-SAFE-SDAG-NEXT:    s_or_b32 s5, s6, s5
187; VI-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s10, 1
188; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s11, s4, s5
189; VI-SAFE-SDAG-NEXT:    s_and_b32 s8, s11, 7
190; VI-SAFE-SDAG-NEXT:    s_cmp_gt_i32 s8, 5
191; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[4:5], -1, 0
192; VI-SAFE-SDAG-NEXT:    s_cmp_eq_u32 s8, 3
193; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[8:9], -1, 0
194; VI-SAFE-SDAG-NEXT:    s_or_b64 s[4:5], s[8:9], s[4:5]
195; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s8, s11, 2
196; VI-SAFE-SDAG-NEXT:    s_cmp_lg_u64 s[4:5], 0
197; VI-SAFE-SDAG-NEXT:    s_addc_u32 s4, s8, 0
198; VI-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s10, 31
199; VI-SAFE-SDAG-NEXT:    s_cselect_b32 s8, s4, 0x7c00
200; VI-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s6, 0
201; VI-SAFE-SDAG-NEXT:    s_cselect_b64 s[4:5], -1, 0
202; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
203; VI-SAFE-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
204; VI-SAFE-SDAG-NEXT:    s_cmpk_eq_i32 s10, 0x40f
205; VI-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
206; VI-SAFE-SDAG-NEXT:    v_mov_b32_e32 v1, s8
207; VI-SAFE-SDAG-NEXT:    s_cselect_b64 vcc, -1, 0
208; VI-SAFE-SDAG-NEXT:    s_lshr_b32 s4, s7, 16
209; VI-SAFE-SDAG-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
210; VI-SAFE-SDAG-NEXT:    s_and_b32 s4, s4, 0x8000
211; VI-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, s4, v0
212; VI-SAFE-SDAG-NEXT:    buffer_store_short v0, off, s[0:3], 0
213; VI-SAFE-SDAG-NEXT:    s_endpgm
214;
215; VI-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
216; VI-SAFE-GISEL:       ; %bb.0:
217; VI-SAFE-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
218; VI-SAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
219; VI-SAFE-GISEL-NEXT:    s_bfe_u32 s4, s3, 0xb0014
220; VI-SAFE-GISEL-NEXT:    s_lshr_b32 s5, s3, 8
221; VI-SAFE-GISEL-NEXT:    s_and_b32 s6, s3, 0x1ff
222; VI-SAFE-GISEL-NEXT:    s_addk_i32 s4, 0xfc10
223; VI-SAFE-GISEL-NEXT:    s_and_b32 s5, s5, 0xffe
224; VI-SAFE-GISEL-NEXT:    s_or_b32 s2, s6, s2
225; VI-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
226; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
227; VI-SAFE-GISEL-NEXT:    s_or_b32 s2, s5, s2
228; VI-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
229; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s5, 1, 0
230; VI-SAFE-GISEL-NEXT:    s_sub_i32 s7, 1, s4
231; VI-SAFE-GISEL-NEXT:    s_lshl_b32 s6, s4, 12
232; VI-SAFE-GISEL-NEXT:    s_max_i32 s7, s7, 0
233; VI-SAFE-GISEL-NEXT:    s_or_b32 s6, s2, s6
234; VI-SAFE-GISEL-NEXT:    s_min_i32 s7, s7, 13
235; VI-SAFE-GISEL-NEXT:    s_bitset1_b32 s2, 12
236; VI-SAFE-GISEL-NEXT:    s_lshl_b32 s5, s5, 9
237; VI-SAFE-GISEL-NEXT:    s_lshr_b32 s8, s2, s7
238; VI-SAFE-GISEL-NEXT:    s_or_b32 s5, s5, 0x7c00
239; VI-SAFE-GISEL-NEXT:    s_lshl_b32 s7, s8, s7
240; VI-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s7, s2
241; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
242; VI-SAFE-GISEL-NEXT:    s_or_b32 s2, s8, s2
243; VI-SAFE-GISEL-NEXT:    s_cmp_lt_i32 s4, 1
244; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s2, s6
245; VI-SAFE-GISEL-NEXT:    s_and_b32 s6, s2, 7
246; VI-SAFE-GISEL-NEXT:    s_lshr_b32 s2, s2, 2
247; VI-SAFE-GISEL-NEXT:    s_cmp_eq_u32 s6, 3
248; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s7, 1, 0
249; VI-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s6, 5
250; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s6, 1, 0
251; VI-SAFE-GISEL-NEXT:    s_or_b32 s6, s7, s6
252; VI-SAFE-GISEL-NEXT:    s_and_b32 s6, s6, 1
253; VI-SAFE-GISEL-NEXT:    s_add_i32 s2, s2, s6
254; VI-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s4, 30
255; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 0x7c00, s2
256; VI-SAFE-GISEL-NEXT:    s_cmpk_eq_i32 s4, 0x40f
257; VI-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s5, s2
258; VI-SAFE-GISEL-NEXT:    s_lshr_b32 s3, s3, 16
259; VI-SAFE-GISEL-NEXT:    s_and_b32 s3, s3, 0x8000
260; VI-SAFE-GISEL-NEXT:    s_or_b32 s2, s3, s2
261; VI-SAFE-GISEL-NEXT:    v_mov_b32_e32 v0, s2
262; VI-SAFE-GISEL-NEXT:    s_mov_b32 s2, -1
263; VI-SAFE-GISEL-NEXT:    s_mov_b32 s3, 0xf000
264; VI-SAFE-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
265; VI-SAFE-GISEL-NEXT:    s_endpgm
266;
267; VI-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
268; VI-UNSAFE-SDAG:       ; %bb.0:
269; VI-UNSAFE-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
270; VI-UNSAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
271; VI-UNSAFE-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
272; VI-UNSAFE-SDAG-NEXT:    s_mov_b32 s3, 0xf000
273; VI-UNSAFE-SDAG-NEXT:    s_mov_b32 s2, -1
274; VI-UNSAFE-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
275; VI-UNSAFE-SDAG-NEXT:    buffer_store_short v0, off, s[0:3], 0
276; VI-UNSAFE-SDAG-NEXT:    s_endpgm
277;
278; VI-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
279; VI-UNSAFE-GISEL:       ; %bb.0:
280; VI-UNSAFE-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
281; VI-UNSAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
282; VI-UNSAFE-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
283; VI-UNSAFE-GISEL-NEXT:    s_mov_b32 s2, -1
284; VI-UNSAFE-GISEL-NEXT:    s_mov_b32 s3, 0xf000
285; VI-UNSAFE-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
286; VI-UNSAFE-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
287; VI-UNSAFE-GISEL-NEXT:    s_endpgm
288;
289; GFX10-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:
290; GFX10-SAFE-SDAG:       ; %bb.0:
291; GFX10-SAFE-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
292; GFX10-SAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
293; GFX10-SAFE-SDAG-NEXT:    s_and_b32 s4, s3, 0x1ff
294; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s3, 8
295; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s2, s4, s2
296; GFX10-SAFE-SDAG-NEXT:    s_and_b32 s4, s5, 0xffe
297; GFX10-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s2, 0
298; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s2, -1, 0
299; GFX10-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
300; GFX10-SAFE-SDAG-NEXT:    s_bfe_u32 s2, s3, 0xb0014
301; GFX10-SAFE-SDAG-NEXT:    s_sub_i32 s5, 0x3f1, s2
302; GFX10-SAFE-SDAG-NEXT:    s_addk_i32 s2, 0xfc10
303; GFX10-SAFE-SDAG-NEXT:    v_med3_i32 v1, s5, 0, 13
304; GFX10-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s5, v0
305; GFX10-SAFE-SDAG-NEXT:    s_lshl_b32 s7, s2, 12
306; GFX10-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s6, v1
307; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s4, s4, s5
308; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s5, s4, 0x1000
309; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s7, s4, s7
310; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s6, s5, s6
311; GFX10-SAFE-SDAG-NEXT:    v_lshlrev_b32_e64 v0, v1, s6
312; GFX10-SAFE-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc_lo, s5, v0
313; GFX10-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
314; GFX10-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s5, v0
315; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s5, s6, s5
316; GFX10-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 1
317; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, s7
318; GFX10-SAFE-SDAG-NEXT:    s_and_b32 s6, s5, 7
319; GFX10-SAFE-SDAG-NEXT:    s_cmp_gt_i32 s6, 5
320; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s7, -1, 0
321; GFX10-SAFE-SDAG-NEXT:    s_cmp_eq_u32 s6, 3
322; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s6, -1, 0
323; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s5, 2
324; GFX10-SAFE-SDAG-NEXT:    s_or_b32 s6, s6, s7
325; GFX10-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s6, 0
326; GFX10-SAFE-SDAG-NEXT:    s_addc_u32 s5, s5, 0
327; GFX10-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 31
328; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, 0x7c00
329; GFX10-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
330; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 s4, -1, 0
331; GFX10-SAFE-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0x40f
332; GFX10-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
333; GFX10-SAFE-SDAG-NEXT:    s_cselect_b32 vcc_lo, -1, 0
334; GFX10-SAFE-SDAG-NEXT:    s_lshr_b32 s2, s3, 16
335; GFX10-SAFE-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
336; GFX10-SAFE-SDAG-NEXT:    s_and_b32 s2, s2, 0x8000
337; GFX10-SAFE-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
338; GFX10-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
339; GFX10-SAFE-SDAG-NEXT:    v_cndmask_b32_e32 v0, s5, v0, vcc_lo
340; GFX10-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, s2, v0
341; GFX10-SAFE-SDAG-NEXT:    s_mov_b32 s2, -1
342; GFX10-SAFE-SDAG-NEXT:    buffer_store_short v0, off, s[0:3], 0
343; GFX10-SAFE-SDAG-NEXT:    s_endpgm
344;
345; GFX10-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
346; GFX10-SAFE-GISEL:       ; %bb.0:
347; GFX10-SAFE-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
348; GFX10-SAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
349; GFX10-SAFE-GISEL-NEXT:    s_and_b32 s6, s3, 0x1ff
350; GFX10-SAFE-GISEL-NEXT:    s_bfe_u32 s4, s3, 0xb0014
351; GFX10-SAFE-GISEL-NEXT:    s_lshr_b32 s5, s3, 8
352; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s2, s6, s2
353; GFX10-SAFE-GISEL-NEXT:    s_addk_i32 s4, 0xfc10
354; GFX10-SAFE-GISEL-NEXT:    s_and_b32 s5, s5, 0xffe
355; GFX10-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
356; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
357; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s2, s5, s2
358; GFX10-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
359; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s5, 1, 0
360; GFX10-SAFE-GISEL-NEXT:    s_sub_i32 s6, 1, s4
361; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s8, s2, 0x1000
362; GFX10-SAFE-GISEL-NEXT:    s_max_i32 s6, s6, 0
363; GFX10-SAFE-GISEL-NEXT:    s_lshl_b32 s7, s4, 12
364; GFX10-SAFE-GISEL-NEXT:    s_min_i32 s6, s6, 13
365; GFX10-SAFE-GISEL-NEXT:    s_lshl_b32 s5, s5, 9
366; GFX10-SAFE-GISEL-NEXT:    s_lshr_b32 s9, s8, s6
367; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s2, s2, s7
368; GFX10-SAFE-GISEL-NEXT:    s_lshl_b32 s6, s9, s6
369; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s5, s5, 0x7c00
370; GFX10-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s6, s8
371; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s6, 1, 0
372; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s6, s9, s6
373; GFX10-SAFE-GISEL-NEXT:    s_cmp_lt_i32 s4, 1
374; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s6, s2
375; GFX10-SAFE-GISEL-NEXT:    s_and_b32 s6, s2, 7
376; GFX10-SAFE-GISEL-NEXT:    s_lshr_b32 s2, s2, 2
377; GFX10-SAFE-GISEL-NEXT:    s_cmp_eq_u32 s6, 3
378; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s7, 1, 0
379; GFX10-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s6, 5
380; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s6, 1, 0
381; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s6, s7, s6
382; GFX10-SAFE-GISEL-NEXT:    s_and_b32 s6, s6, 1
383; GFX10-SAFE-GISEL-NEXT:    s_add_i32 s2, s2, s6
384; GFX10-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s4, 30
385; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 0x7c00, s2
386; GFX10-SAFE-GISEL-NEXT:    s_cmpk_eq_i32 s4, 0x40f
387; GFX10-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s5, s2
388; GFX10-SAFE-GISEL-NEXT:    s_lshr_b32 s3, s3, 16
389; GFX10-SAFE-GISEL-NEXT:    s_and_b32 s3, s3, 0x8000
390; GFX10-SAFE-GISEL-NEXT:    s_or_b32 s2, s3, s2
391; GFX10-SAFE-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
392; GFX10-SAFE-GISEL-NEXT:    v_mov_b32_e32 v0, s2
393; GFX10-SAFE-GISEL-NEXT:    s_mov_b32 s2, -1
394; GFX10-SAFE-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
395; GFX10-SAFE-GISEL-NEXT:    s_endpgm
396;
397; GFX10-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
398; GFX10-UNSAFE-SDAG:       ; %bb.0:
399; GFX10-UNSAFE-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
400; GFX10-UNSAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
401; GFX10-UNSAFE-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
402; GFX10-UNSAFE-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
403; GFX10-UNSAFE-SDAG-NEXT:    s_mov_b32 s2, -1
404; GFX10-UNSAFE-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
405; GFX10-UNSAFE-SDAG-NEXT:    buffer_store_short v0, off, s[0:3], 0
406; GFX10-UNSAFE-SDAG-NEXT:    s_endpgm
407;
408; GFX10-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
409; GFX10-UNSAFE-GISEL:       ; %bb.0:
410; GFX10-UNSAFE-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
411; GFX10-UNSAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
412; GFX10-UNSAFE-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
413; GFX10-UNSAFE-GISEL-NEXT:    s_mov_b32 s2, -1
414; GFX10-UNSAFE-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
415; GFX10-UNSAFE-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
416; GFX10-UNSAFE-GISEL-NEXT:    buffer_store_short v0, off, s[0:3], 0
417; GFX10-UNSAFE-GISEL-NEXT:    s_endpgm
418;
419; GFX11-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:
420; GFX11-SAFE-SDAG:       ; %bb.0:
421; GFX11-SAFE-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
422; GFX11-SAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
423; GFX11-SAFE-SDAG-NEXT:    s_and_b32 s4, s3, 0x1ff
424; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s3, 8
425; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s2, s4, s2
426; GFX11-SAFE-SDAG-NEXT:    s_and_b32 s4, s5, 0xffe
427; GFX11-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s2, 0
428; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s2, -1, 0
429; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
430; GFX11-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
431; GFX11-SAFE-SDAG-NEXT:    s_bfe_u32 s2, s3, 0xb0014
432; GFX11-SAFE-SDAG-NEXT:    s_sub_i32 s5, 0x3f1, s2
433; GFX11-SAFE-SDAG-NEXT:    s_addk_i32 s2, 0xfc10
434; GFX11-SAFE-SDAG-NEXT:    v_med3_i32 v1, s5, 0, 13
435; GFX11-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s5, v0
436; GFX11-SAFE-SDAG-NEXT:    s_lshl_b32 s7, s2, 12
437; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
438; GFX11-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s6, v1
439; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s4, s4, s5
440; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
441; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s5, s4, 0x1000
442; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s7, s4, s7
443; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s6, s5, s6
444; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
445; GFX11-SAFE-SDAG-NEXT:    v_lshlrev_b32_e64 v0, v1, s6
446; GFX11-SAFE-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc_lo, s5, v0
447; GFX11-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
448; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
449; GFX11-SAFE-SDAG-NEXT:    v_readfirstlane_b32 s5, v0
450; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s5, s6, s5
451; GFX11-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 1
452; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, s7
453; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
454; GFX11-SAFE-SDAG-NEXT:    s_and_b32 s6, s5, 7
455; GFX11-SAFE-SDAG-NEXT:    s_cmp_gt_i32 s6, 5
456; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s7, -1, 0
457; GFX11-SAFE-SDAG-NEXT:    s_cmp_eq_u32 s6, 3
458; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s6, -1, 0
459; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s5, s5, 2
460; GFX11-SAFE-SDAG-NEXT:    s_or_b32 s6, s6, s7
461; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
462; GFX11-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s6, 0
463; GFX11-SAFE-SDAG-NEXT:    s_addc_u32 s5, s5, 0
464; GFX11-SAFE-SDAG-NEXT:    s_cmp_lt_i32 s2, 31
465; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s5, s5, 0x7c00
466; GFX11-SAFE-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
467; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 s4, -1, 0
468; GFX11-SAFE-SDAG-NEXT:    s_cmpk_eq_i32 s2, 0x40f
469; GFX11-SAFE-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
470; GFX11-SAFE-SDAG-NEXT:    s_cselect_b32 vcc_lo, -1, 0
471; GFX11-SAFE-SDAG-NEXT:    s_lshr_b32 s2, s3, 16
472; GFX11-SAFE-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
473; GFX11-SAFE-SDAG-NEXT:    s_and_b32 s2, s2, 0x8000
474; GFX11-SAFE-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
475; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
476; GFX11-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
477; GFX11-SAFE-SDAG-NEXT:    v_cndmask_b32_e32 v0, s5, v0, vcc_lo
478; GFX11-SAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
479; GFX11-SAFE-SDAG-NEXT:    v_or_b32_e32 v0, s2, v0
480; GFX11-SAFE-SDAG-NEXT:    s_mov_b32 s2, -1
481; GFX11-SAFE-SDAG-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
482; GFX11-SAFE-SDAG-NEXT:    s_endpgm
483;
484; GFX11-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
485; GFX11-SAFE-GISEL:       ; %bb.0:
486; GFX11-SAFE-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
487; GFX11-SAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
488; GFX11-SAFE-GISEL-NEXT:    s_and_b32 s6, s3, 0x1ff
489; GFX11-SAFE-GISEL-NEXT:    s_bfe_u32 s4, s3, 0xb0014
490; GFX11-SAFE-GISEL-NEXT:    s_lshr_b32 s5, s3, 8
491; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s2, s6, s2
492; GFX11-SAFE-GISEL-NEXT:    s_addk_i32 s4, 0xfc10
493; GFX11-SAFE-GISEL-NEXT:    s_and_b32 s5, s5, 0xffe
494; GFX11-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
495; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
496; GFX11-SAFE-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
497; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s2, s5, s2
498; GFX11-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
499; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s5, 1, 0
500; GFX11-SAFE-GISEL-NEXT:    s_sub_i32 s6, 1, s4
501; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s8, s2, 0x1000
502; GFX11-SAFE-GISEL-NEXT:    s_max_i32 s6, s6, 0
503; GFX11-SAFE-GISEL-NEXT:    s_lshl_b32 s7, s4, 12
504; GFX11-SAFE-GISEL-NEXT:    s_min_i32 s6, s6, 13
505; GFX11-SAFE-GISEL-NEXT:    s_lshl_b32 s5, s5, 9
506; GFX11-SAFE-GISEL-NEXT:    s_lshr_b32 s9, s8, s6
507; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s2, s2, s7
508; GFX11-SAFE-GISEL-NEXT:    s_lshl_b32 s6, s9, s6
509; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s5, s5, 0x7c00
510; GFX11-SAFE-GISEL-NEXT:    s_cmp_lg_u32 s6, s8
511; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s6, 1, 0
512; GFX11-SAFE-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
513; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s6, s9, s6
514; GFX11-SAFE-GISEL-NEXT:    s_cmp_lt_i32 s4, 1
515; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s6, s2
516; GFX11-SAFE-GISEL-NEXT:    s_and_b32 s6, s2, 7
517; GFX11-SAFE-GISEL-NEXT:    s_lshr_b32 s2, s2, 2
518; GFX11-SAFE-GISEL-NEXT:    s_cmp_eq_u32 s6, 3
519; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s7, 1, 0
520; GFX11-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s6, 5
521; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s6, 1, 0
522; GFX11-SAFE-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
523; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s6, s7, s6
524; GFX11-SAFE-GISEL-NEXT:    s_and_b32 s6, s6, 1
525; GFX11-SAFE-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
526; GFX11-SAFE-GISEL-NEXT:    s_add_i32 s2, s2, s6
527; GFX11-SAFE-GISEL-NEXT:    s_cmp_gt_i32 s4, 30
528; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s2, 0x7c00, s2
529; GFX11-SAFE-GISEL-NEXT:    s_cmpk_eq_i32 s4, 0x40f
530; GFX11-SAFE-GISEL-NEXT:    s_cselect_b32 s2, s5, s2
531; GFX11-SAFE-GISEL-NEXT:    s_lshr_b32 s3, s3, 16
532; GFX11-SAFE-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
533; GFX11-SAFE-GISEL-NEXT:    s_and_b32 s3, s3, 0x8000
534; GFX11-SAFE-GISEL-NEXT:    s_or_b32 s2, s3, s2
535; GFX11-SAFE-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
536; GFX11-SAFE-GISEL-NEXT:    v_mov_b32_e32 v0, s2
537; GFX11-SAFE-GISEL-NEXT:    s_mov_b32 s2, -1
538; GFX11-SAFE-GISEL-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
539; GFX11-SAFE-GISEL-NEXT:    s_endpgm
540;
541; GFX11-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
542; GFX11-UNSAFE-SDAG:       ; %bb.0:
543; GFX11-UNSAFE-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
544; GFX11-UNSAFE-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
545; GFX11-UNSAFE-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
546; GFX11-UNSAFE-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
547; GFX11-UNSAFE-SDAG-NEXT:    s_mov_b32 s2, -1
548; GFX11-UNSAFE-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
549; GFX11-UNSAFE-SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
550; GFX11-UNSAFE-SDAG-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
551; GFX11-UNSAFE-SDAG-NEXT:    s_endpgm
552;
553; GFX11-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
554; GFX11-UNSAFE-GISEL:       ; %bb.0:
555; GFX11-UNSAFE-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
556; GFX11-UNSAFE-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
557; GFX11-UNSAFE-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[2:3]
558; GFX11-UNSAFE-GISEL-NEXT:    s_mov_b32 s2, -1
559; GFX11-UNSAFE-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
560; GFX11-UNSAFE-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
561; GFX11-UNSAFE-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
562; GFX11-UNSAFE-GISEL-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
563; GFX11-UNSAFE-GISEL-NEXT:    s_endpgm
564  %result = fptrunc double %in to half
565  %result_i16 = bitcast half %result to i16
566  store i16 %result_i16, ptr addrspace(1) %out
567  ret void
568}
569
570define amdgpu_kernel void @fptrunc_v2f64_to_v2f32(ptr addrspace(1) %out, <2 x double> %in) {
571; SI-LABEL: fptrunc_v2f64_to_v2f32:
572; SI:       ; %bb.0:
573; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0xd
574; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
575; SI-NEXT:    s_mov_b32 s7, 0xf000
576; SI-NEXT:    s_mov_b32 s6, -1
577; SI-NEXT:    s_waitcnt lgkmcnt(0)
578; SI-NEXT:    v_cvt_f32_f64_e32 v1, s[2:3]
579; SI-NEXT:    v_cvt_f32_f64_e32 v0, s[0:1]
580; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
581; SI-NEXT:    s_endpgm
582;
583; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f32:
584; VI-SDAG:       ; %bb.0:
585; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
586; VI-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
587; VI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
588; VI-SDAG-NEXT:    s_mov_b32 s6, -1
589; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
590; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[2:3]
591; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[0:1]
592; VI-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
593; VI-SDAG-NEXT:    s_endpgm
594;
595; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f32:
596; VI-GISEL:       ; %bb.0:
597; VI-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
598; VI-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
599; VI-GISEL-NEXT:    s_mov_b32 s6, -1
600; VI-GISEL-NEXT:    s_mov_b32 s7, 0xf000
601; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
602; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[0:1]
603; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[2:3]
604; VI-GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
605; VI-GISEL-NEXT:    s_endpgm
606;
607; GFX10-SDAG-LABEL: fptrunc_v2f64_to_v2f32:
608; GFX10-SDAG:       ; %bb.0:
609; GFX10-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
610; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
611; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[2:3]
612; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[0:1]
613; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
614; GFX10-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
615; GFX10-SDAG-NEXT:    s_mov_b32 s2, -1
616; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
617; GFX10-SDAG-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
618; GFX10-SDAG-NEXT:    s_endpgm
619;
620; GFX10-GISEL-LABEL: fptrunc_v2f64_to_v2f32:
621; GFX10-GISEL:       ; %bb.0:
622; GFX10-GISEL-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x34
623; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
624; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[0:1]
625; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[2:3]
626; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
627; GFX10-GISEL-NEXT:    s_mov_b32 s2, -1
628; GFX10-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
629; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
630; GFX10-GISEL-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
631; GFX10-GISEL-NEXT:    s_endpgm
632;
633; GFX11-SDAG-LABEL: fptrunc_v2f64_to_v2f32:
634; GFX11-SDAG:       ; %bb.0:
635; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
636; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
637; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[2:3]
638; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[0:1]
639; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
640; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
641; GFX11-SDAG-NEXT:    s_mov_b32 s2, -1
642; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
643; GFX11-SDAG-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
644; GFX11-SDAG-NEXT:    s_endpgm
645;
646; GFX11-GISEL-LABEL: fptrunc_v2f64_to_v2f32:
647; GFX11-GISEL:       ; %bb.0:
648; GFX11-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x34
649; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
650; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[0:1]
651; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[2:3]
652; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
653; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
654; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
655; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
656; GFX11-GISEL-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
657; GFX11-GISEL-NEXT:    s_endpgm
658  %result = fptrunc <2 x double> %in to <2 x float>
659  store <2 x float> %result, ptr addrspace(1) %out
660  ret void
661}
662
663define amdgpu_kernel void @fptrunc_v3f64_to_v3f32(ptr addrspace(1) %out, <3 x double> %in) {
664; SI-LABEL: fptrunc_v3f64_to_v3f32:
665; SI:       ; %bb.0:
666; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
667; SI-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x11
668; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x15
669; SI-NEXT:    s_mov_b32 s3, 0xf000
670; SI-NEXT:    s_mov_b32 s2, -1
671; SI-NEXT:    s_waitcnt lgkmcnt(0)
672; SI-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
673; SI-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
674; SI-NEXT:    v_cvt_f32_f64_e32 v2, s[4:5]
675; SI-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:8
676; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
677; SI-NEXT:    s_endpgm
678;
679; VI-SDAG-LABEL: fptrunc_v3f64_to_v3f32:
680; VI-SDAG:       ; %bb.0:
681; VI-SDAG-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x54
682; VI-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x44
683; VI-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
684; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
685; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[6:7]
686; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[2:3]
687; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[0:1]
688; VI-SDAG-NEXT:    s_mov_b32 s7, 0xf000
689; VI-SDAG-NEXT:    s_mov_b32 s6, -1
690; VI-SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[4:7], 0
691; VI-SDAG-NEXT:    s_endpgm
692;
693; VI-GISEL-LABEL: fptrunc_v3f64_to_v3f32:
694; VI-GISEL:       ; %bb.0:
695; VI-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x44
696; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
697; VI-GISEL-NEXT:    s_mov_b32 s2, -1
698; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
699; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
700; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
701; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
702; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
703; VI-GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
704; VI-GISEL-NEXT:    s_endpgm
705;
706; GFX10-SDAG-LABEL: fptrunc_v3f64_to_v3f32:
707; GFX10-SDAG:       ; %bb.0:
708; GFX10-SDAG-NEXT:    s_clause 0x1
709; GFX10-SDAG-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x54
710; GFX10-SDAG-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x44
711; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
712; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[6:7]
713; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[2:3]
714; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[0:1]
715; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
716; GFX10-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
717; GFX10-SDAG-NEXT:    s_mov_b32 s2, -1
718; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
719; GFX10-SDAG-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
720; GFX10-SDAG-NEXT:    s_endpgm
721;
722; GFX10-GISEL-LABEL: fptrunc_v3f64_to_v3f32:
723; GFX10-GISEL:       ; %bb.0:
724; GFX10-GISEL-NEXT:    s_clause 0x1
725; GFX10-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x44
726; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
727; GFX10-GISEL-NEXT:    s_mov_b32 s2, -1
728; GFX10-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
729; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
730; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
731; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
732; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
733; GFX10-GISEL-NEXT:    buffer_store_dwordx3 v[0:2], off, s[0:3], 0
734; GFX10-GISEL-NEXT:    s_endpgm
735;
736; GFX11-SDAG-LABEL: fptrunc_v3f64_to_v3f32:
737; GFX11-SDAG:       ; %bb.0:
738; GFX11-SDAG-NEXT:    s_clause 0x1
739; GFX11-SDAG-NEXT:    s_load_b64 s[6:7], s[4:5], 0x54
740; GFX11-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x44
741; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
742; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[6:7]
743; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[2:3]
744; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[0:1]
745; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
746; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
747; GFX11-SDAG-NEXT:    s_mov_b32 s2, -1
748; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
749; GFX11-SDAG-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], 0
750; GFX11-SDAG-NEXT:    s_endpgm
751;
752; GFX11-GISEL-LABEL: fptrunc_v3f64_to_v3f32:
753; GFX11-GISEL:       ; %bb.0:
754; GFX11-GISEL-NEXT:    s_clause 0x1
755; GFX11-GISEL-NEXT:    s_load_b256 s[8:15], s[4:5], 0x44
756; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
757; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
758; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
759; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
760; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
761; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
762; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
763; GFX11-GISEL-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], 0
764; GFX11-GISEL-NEXT:    s_endpgm
765  %result = fptrunc <3 x double> %in to <3 x float>
766  store <3 x float> %result, ptr addrspace(1) %out
767  ret void
768}
769
770define amdgpu_kernel void @fptrunc_v4f64_to_v4f32(ptr addrspace(1) %out, <4 x double> %in) {
771; SI-LABEL: fptrunc_v4f64_to_v4f32:
772; SI:       ; %bb.0:
773; SI-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x11
774; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
775; SI-NEXT:    s_mov_b32 s3, 0xf000
776; SI-NEXT:    s_mov_b32 s2, -1
777; SI-NEXT:    s_waitcnt lgkmcnt(0)
778; SI-NEXT:    v_cvt_f32_f64_e32 v3, s[14:15]
779; SI-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
780; SI-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
781; SI-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
782; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
783; SI-NEXT:    s_endpgm
784;
785; VI-SDAG-LABEL: fptrunc_v4f64_to_v4f32:
786; VI-SDAG:       ; %bb.0:
787; VI-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x44
788; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
789; VI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
790; VI-SDAG-NEXT:    s_mov_b32 s2, -1
791; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
792; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[14:15]
793; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
794; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
795; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
796; VI-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
797; VI-SDAG-NEXT:    s_endpgm
798;
799; VI-GISEL-LABEL: fptrunc_v4f64_to_v4f32:
800; VI-GISEL:       ; %bb.0:
801; VI-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x44
802; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
803; VI-GISEL-NEXT:    s_mov_b32 s2, -1
804; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
805; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
806; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
807; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
808; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
809; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[14:15]
810; VI-GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
811; VI-GISEL-NEXT:    s_endpgm
812;
813; GFX10-SDAG-LABEL: fptrunc_v4f64_to_v4f32:
814; GFX10-SDAG:       ; %bb.0:
815; GFX10-SDAG-NEXT:    s_clause 0x1
816; GFX10-SDAG-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x44
817; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
818; GFX10-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
819; GFX10-SDAG-NEXT:    s_mov_b32 s2, -1
820; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
821; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[14:15]
822; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
823; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
824; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
825; GFX10-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
826; GFX10-SDAG-NEXT:    s_endpgm
827;
828; GFX10-GISEL-LABEL: fptrunc_v4f64_to_v4f32:
829; GFX10-GISEL:       ; %bb.0:
830; GFX10-GISEL-NEXT:    s_clause 0x1
831; GFX10-GISEL-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x44
832; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
833; GFX10-GISEL-NEXT:    s_mov_b32 s2, -1
834; GFX10-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
835; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
836; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
837; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
838; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
839; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[14:15]
840; GFX10-GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
841; GFX10-GISEL-NEXT:    s_endpgm
842;
843; GFX11-SDAG-LABEL: fptrunc_v4f64_to_v4f32:
844; GFX11-SDAG:       ; %bb.0:
845; GFX11-SDAG-NEXT:    s_clause 0x1
846; GFX11-SDAG-NEXT:    s_load_b256 s[8:15], s[4:5], 0x44
847; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
848; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
849; GFX11-SDAG-NEXT:    s_mov_b32 s2, -1
850; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
851; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[14:15]
852; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
853; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
854; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
855; GFX11-SDAG-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
856; GFX11-SDAG-NEXT:    s_endpgm
857;
858; GFX11-GISEL-LABEL: fptrunc_v4f64_to_v4f32:
859; GFX11-GISEL:       ; %bb.0:
860; GFX11-GISEL-NEXT:    s_clause 0x1
861; GFX11-GISEL-NEXT:    s_load_b256 s[8:15], s[4:5], 0x44
862; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
863; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
864; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
865; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
866; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
867; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
868; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
869; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[14:15]
870; GFX11-GISEL-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
871; GFX11-GISEL-NEXT:    s_endpgm
872  %result = fptrunc <4 x double> %in to <4 x float>
873  store <4 x float> %result, ptr addrspace(1) %out
874  ret void
875}
876
877define amdgpu_kernel void @fptrunc_v8f64_to_v8f32(ptr addrspace(1) %out, <8 x double> %in) {
878; SI-LABEL: fptrunc_v8f64_to_v8f32:
879; SI:       ; %bb.0:
880; SI-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x19
881; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
882; SI-NEXT:    s_mov_b32 s3, 0xf000
883; SI-NEXT:    s_mov_b32 s2, -1
884; SI-NEXT:    s_waitcnt lgkmcnt(0)
885; SI-NEXT:    v_cvt_f32_f64_e32 v3, s[14:15]
886; SI-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
887; SI-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
888; SI-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
889; SI-NEXT:    v_cvt_f32_f64_e32 v7, s[22:23]
890; SI-NEXT:    v_cvt_f32_f64_e32 v6, s[20:21]
891; SI-NEXT:    v_cvt_f32_f64_e32 v5, s[18:19]
892; SI-NEXT:    v_cvt_f32_f64_e32 v4, s[16:17]
893; SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
894; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
895; SI-NEXT:    s_endpgm
896;
897; VI-SDAG-LABEL: fptrunc_v8f64_to_v8f32:
898; VI-SDAG:       ; %bb.0:
899; VI-SDAG-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
900; VI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
901; VI-SDAG-NEXT:    s_mov_b32 s3, 0xf000
902; VI-SDAG-NEXT:    s_mov_b32 s2, -1
903; VI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
904; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v7, s[22:23]
905; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v6, s[20:21]
906; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v5, s[18:19]
907; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v4, s[16:17]
908; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[14:15]
909; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
910; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
911; VI-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
912; VI-SDAG-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
913; VI-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
914; VI-SDAG-NEXT:    s_endpgm
915;
916; VI-GISEL-LABEL: fptrunc_v8f64_to_v8f32:
917; VI-GISEL:       ; %bb.0:
918; VI-GISEL-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
919; VI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
920; VI-GISEL-NEXT:    s_mov_b32 s2, -1
921; VI-GISEL-NEXT:    s_mov_b32 s3, 0xf000
922; VI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
923; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
924; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
925; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
926; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[14:15]
927; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v4, s[16:17]
928; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v5, s[18:19]
929; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v6, s[20:21]
930; VI-GISEL-NEXT:    v_cvt_f32_f64_e32 v7, s[22:23]
931; VI-GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
932; VI-GISEL-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
933; VI-GISEL-NEXT:    s_endpgm
934;
935; GFX10-SDAG-LABEL: fptrunc_v8f64_to_v8f32:
936; GFX10-SDAG:       ; %bb.0:
937; GFX10-SDAG-NEXT:    s_clause 0x1
938; GFX10-SDAG-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
939; GFX10-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
940; GFX10-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
941; GFX10-SDAG-NEXT:    s_mov_b32 s2, -1
942; GFX10-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
943; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v7, s[22:23]
944; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v6, s[20:21]
945; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v5, s[18:19]
946; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v4, s[16:17]
947; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[14:15]
948; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
949; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
950; GFX10-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
951; GFX10-SDAG-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
952; GFX10-SDAG-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
953; GFX10-SDAG-NEXT:    s_endpgm
954;
955; GFX10-GISEL-LABEL: fptrunc_v8f64_to_v8f32:
956; GFX10-GISEL:       ; %bb.0:
957; GFX10-GISEL-NEXT:    s_clause 0x1
958; GFX10-GISEL-NEXT:    s_load_dwordx16 s[8:23], s[4:5], 0x64
959; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
960; GFX10-GISEL-NEXT:    s_mov_b32 s2, -1
961; GFX10-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
962; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
963; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
964; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
965; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
966; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[14:15]
967; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v4, s[16:17]
968; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v5, s[18:19]
969; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v6, s[20:21]
970; GFX10-GISEL-NEXT:    v_cvt_f32_f64_e32 v7, s[22:23]
971; GFX10-GISEL-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
972; GFX10-GISEL-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
973; GFX10-GISEL-NEXT:    s_endpgm
974;
975; GFX11-SDAG-LABEL: fptrunc_v8f64_to_v8f32:
976; GFX11-SDAG:       ; %bb.0:
977; GFX11-SDAG-NEXT:    s_clause 0x1
978; GFX11-SDAG-NEXT:    s_load_b512 s[8:23], s[4:5], 0x64
979; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
980; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0x31016000
981; GFX11-SDAG-NEXT:    s_mov_b32 s2, -1
982; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
983; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v7, s[22:23]
984; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v6, s[20:21]
985; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v5, s[18:19]
986; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v4, s[16:17]
987; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v3, s[14:15]
988; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
989; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
990; GFX11-SDAG-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
991; GFX11-SDAG-NEXT:    s_clause 0x1
992; GFX11-SDAG-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16
993; GFX11-SDAG-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
994; GFX11-SDAG-NEXT:    s_endpgm
995;
996; GFX11-GISEL-LABEL: fptrunc_v8f64_to_v8f32:
997; GFX11-GISEL:       ; %bb.0:
998; GFX11-GISEL-NEXT:    s_clause 0x1
999; GFX11-GISEL-NEXT:    s_load_b512 s[8:23], s[4:5], 0x64
1000; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
1001; GFX11-GISEL-NEXT:    s_mov_b32 s2, -1
1002; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0x31016000
1003; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1004; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v0, s[8:9]
1005; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v1, s[10:11]
1006; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v2, s[12:13]
1007; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v3, s[14:15]
1008; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v4, s[16:17]
1009; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v5, s[18:19]
1010; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v6, s[20:21]
1011; GFX11-GISEL-NEXT:    v_cvt_f32_f64_e32 v7, s[22:23]
1012; GFX11-GISEL-NEXT:    s_clause 0x1
1013; GFX11-GISEL-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
1014; GFX11-GISEL-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16
1015; GFX11-GISEL-NEXT:    s_endpgm
1016  %result = fptrunc <8 x double> %in to <8 x float>
1017  store <8 x float> %result, ptr addrspace(1) %out
1018  ret void
1019}
1020