xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk.ll (revision 5a81a559d69fb84e1e8ef623ac4b642081c14c51)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX950-SDAG %s
3; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX950-GISEL %s
4
5declare <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.bf16(<32 x bfloat> %src, float %scale)
6declare <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.f16(<32 x half> %src, float %scale)
7declare <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.bf16(<32 x bfloat> %src, float %scale)
8declare <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.f16(<32 x half> %src, float %scale)
9
10define amdgpu_ps void @test_scalef32_pk32_bf6_bf16_vv(<32 x bfloat> %src, float %scale, ptr addrspace(1) %out) {
11; GFX950-SDAG-LABEL: test_scalef32_pk32_bf6_bf16_vv:
12; GFX950-SDAG:       ; %bb.0:
13; GFX950-SDAG-NEXT:    v_mov_b32_e32 v25, v18
14; GFX950-SDAG-NEXT:    v_mov_b32_e32 v24, v17
15; GFX950-SDAG-NEXT:    v_cvt_scalef32_pk32_bf6_bf16 v[18:23], v[0:15], v16
16; GFX950-SDAG-NEXT:    global_store_dwordx2 v[24:25], v[22:23], off offset:16
17; GFX950-SDAG-NEXT:    global_store_dwordx4 v[24:25], v[18:21], off
18; GFX950-SDAG-NEXT:    s_endpgm
19;
20; GFX950-GISEL-LABEL: test_scalef32_pk32_bf6_bf16_vv:
21; GFX950-GISEL:       ; %bb.0:
22; GFX950-GISEL-NEXT:    v_mov_b32_e32 v25, v18
23; GFX950-GISEL-NEXT:    v_mov_b32_e32 v24, v17
24; GFX950-GISEL-NEXT:    v_cvt_scalef32_pk32_bf6_bf16 v[18:23], v[0:15], v16
25; GFX950-GISEL-NEXT:    global_store_dwordx2 v[24:25], v[22:23], off offset:16
26; GFX950-GISEL-NEXT:    global_store_dwordx4 v[24:25], v[18:21], off
27; GFX950-GISEL-NEXT:    s_endpgm
28  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.bf16(<32 x bfloat> %src, float %scale)
29  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
30  ret void
31}
32
33define amdgpu_ps void @test_scalef32_pk32_bf6_bf16_sl(<32 x bfloat> inreg %src, ptr addrspace(1) %out) {
34; GFX950-SDAG-LABEL: test_scalef32_pk32_bf6_bf16_sl:
35; GFX950-SDAG:       ; %bb.0:
36; GFX950-SDAG-NEXT:    v_mov_b32_e32 v2, s0
37; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, s1
38; GFX950-SDAG-NEXT:    v_mov_b32_e32 v4, s2
39; GFX950-SDAG-NEXT:    v_mov_b32_e32 v5, s3
40; GFX950-SDAG-NEXT:    v_mov_b32_e32 v6, s4
41; GFX950-SDAG-NEXT:    v_mov_b32_e32 v7, s5
42; GFX950-SDAG-NEXT:    v_mov_b32_e32 v8, s6
43; GFX950-SDAG-NEXT:    v_mov_b32_e32 v9, s7
44; GFX950-SDAG-NEXT:    v_mov_b32_e32 v10, s8
45; GFX950-SDAG-NEXT:    v_mov_b32_e32 v11, s9
46; GFX950-SDAG-NEXT:    v_mov_b32_e32 v12, s10
47; GFX950-SDAG-NEXT:    v_mov_b32_e32 v13, s11
48; GFX950-SDAG-NEXT:    v_mov_b32_e32 v14, s12
49; GFX950-SDAG-NEXT:    v_mov_b32_e32 v15, s13
50; GFX950-SDAG-NEXT:    v_mov_b32_e32 v16, s14
51; GFX950-SDAG-NEXT:    v_mov_b32_e32 v17, s15
52; GFX950-SDAG-NEXT:    s_mov_b32 s0, 0x42c80000
53; GFX950-SDAG-NEXT:    v_cvt_scalef32_pk32_bf6_bf16 v[18:23], v[2:17], s0
54; GFX950-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[22:23], off offset:16
55; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[18:21], off
56; GFX950-SDAG-NEXT:    s_endpgm
57;
58; GFX950-GISEL-LABEL: test_scalef32_pk32_bf6_bf16_sl:
59; GFX950-GISEL:       ; %bb.0:
60; GFX950-GISEL-NEXT:    v_mov_b32_e32 v2, s0
61; GFX950-GISEL-NEXT:    v_mov_b32_e32 v3, s1
62; GFX950-GISEL-NEXT:    v_mov_b32_e32 v4, s2
63; GFX950-GISEL-NEXT:    v_mov_b32_e32 v5, s3
64; GFX950-GISEL-NEXT:    v_mov_b32_e32 v6, s4
65; GFX950-GISEL-NEXT:    v_mov_b32_e32 v7, s5
66; GFX950-GISEL-NEXT:    v_mov_b32_e32 v8, s6
67; GFX950-GISEL-NEXT:    v_mov_b32_e32 v9, s7
68; GFX950-GISEL-NEXT:    v_mov_b32_e32 v10, s8
69; GFX950-GISEL-NEXT:    v_mov_b32_e32 v11, s9
70; GFX950-GISEL-NEXT:    v_mov_b32_e32 v12, s10
71; GFX950-GISEL-NEXT:    v_mov_b32_e32 v13, s11
72; GFX950-GISEL-NEXT:    v_mov_b32_e32 v14, s12
73; GFX950-GISEL-NEXT:    v_mov_b32_e32 v15, s13
74; GFX950-GISEL-NEXT:    v_mov_b32_e32 v16, s14
75; GFX950-GISEL-NEXT:    v_mov_b32_e32 v17, s15
76; GFX950-GISEL-NEXT:    s_mov_b32 s0, 0x42c80000
77; GFX950-GISEL-NEXT:    v_cvt_scalef32_pk32_bf6_bf16 v[18:23], v[2:17], s0
78; GFX950-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[22:23], off offset:16
79; GFX950-GISEL-NEXT:    global_store_dwordx4 v[0:1], v[18:21], off
80; GFX950-GISEL-NEXT:    s_endpgm
81  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.bf16(<32 x bfloat> %src, float 100.0)
82  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
83  ret void
84}
85
86define amdgpu_ps void @test_scalef32_pk32_bf6_f16_vv(<32 x half> %src, float %scale, ptr addrspace(1) %out) {
87; GFX950-SDAG-LABEL: test_scalef32_pk32_bf6_f16_vv:
88; GFX950-SDAG:       ; %bb.0:
89; GFX950-SDAG-NEXT:    v_mov_b32_e32 v25, v18
90; GFX950-SDAG-NEXT:    v_mov_b32_e32 v24, v17
91; GFX950-SDAG-NEXT:    v_cvt_scalef32_pk32_bf6_f16 v[18:23], v[0:15], v16
92; GFX950-SDAG-NEXT:    global_store_dwordx2 v[24:25], v[22:23], off offset:16
93; GFX950-SDAG-NEXT:    global_store_dwordx4 v[24:25], v[18:21], off
94; GFX950-SDAG-NEXT:    s_endpgm
95;
96; GFX950-GISEL-LABEL: test_scalef32_pk32_bf6_f16_vv:
97; GFX950-GISEL:       ; %bb.0:
98; GFX950-GISEL-NEXT:    v_mov_b32_e32 v24, v17
99; GFX950-GISEL-NEXT:    v_mov_b32_e32 v25, v18
100; GFX950-GISEL-NEXT:    v_cvt_scalef32_pk32_bf6_f16 v[18:23], v[0:15], v16
101; GFX950-GISEL-NEXT:    global_store_dwordx4 v[24:25], v[18:21], off
102; GFX950-GISEL-NEXT:    global_store_dwordx2 v[24:25], v[22:23], off offset:16
103; GFX950-GISEL-NEXT:    s_endpgm
104  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.f16(<32 x half> %src, float %scale)
105  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
106  ret void
107}
108
109define amdgpu_ps void @test_scalef32_pk32_bf6_f16_sl(<32 x half> inreg %src, ptr addrspace(1) %out) {
110; GFX950-SDAG-LABEL: test_scalef32_pk32_bf6_f16_sl:
111; GFX950-SDAG:       ; %bb.0:
112; GFX950-SDAG-NEXT:    v_mov_b32_e32 v2, s0
113; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, s1
114; GFX950-SDAG-NEXT:    v_mov_b32_e32 v4, s2
115; GFX950-SDAG-NEXT:    v_mov_b32_e32 v5, s3
116; GFX950-SDAG-NEXT:    v_mov_b32_e32 v6, s4
117; GFX950-SDAG-NEXT:    v_mov_b32_e32 v7, s5
118; GFX950-SDAG-NEXT:    v_mov_b32_e32 v8, s6
119; GFX950-SDAG-NEXT:    v_mov_b32_e32 v9, s7
120; GFX950-SDAG-NEXT:    v_mov_b32_e32 v10, s8
121; GFX950-SDAG-NEXT:    v_mov_b32_e32 v11, s9
122; GFX950-SDAG-NEXT:    v_mov_b32_e32 v12, s10
123; GFX950-SDAG-NEXT:    v_mov_b32_e32 v13, s11
124; GFX950-SDAG-NEXT:    v_mov_b32_e32 v14, s12
125; GFX950-SDAG-NEXT:    v_mov_b32_e32 v15, s13
126; GFX950-SDAG-NEXT:    v_mov_b32_e32 v16, s14
127; GFX950-SDAG-NEXT:    v_mov_b32_e32 v17, s15
128; GFX950-SDAG-NEXT:    s_mov_b32 s0, 0x42c80000
129; GFX950-SDAG-NEXT:    v_cvt_scalef32_pk32_bf6_f16 v[18:23], v[2:17], s0
130; GFX950-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[22:23], off offset:16
131; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[18:21], off
132; GFX950-SDAG-NEXT:    s_endpgm
133;
134; GFX950-GISEL-LABEL: test_scalef32_pk32_bf6_f16_sl:
135; GFX950-GISEL:       ; %bb.0:
136; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[16:17], s[14:15]
137; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[14:15], s[12:13]
138; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[12:13], s[10:11]
139; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[10:11], s[8:9]
140; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[8:9], s[6:7]
141; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[6:7], s[4:5]
142; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[4:5], s[2:3]
143; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
144; GFX950-GISEL-NEXT:    v_mov_b32_e32 v24, 0x42c80000
145; GFX950-GISEL-NEXT:    v_cvt_scalef32_pk32_bf6_f16 v[18:23], v[2:17], v24
146; GFX950-GISEL-NEXT:    global_store_dwordx4 v[0:1], v[18:21], off
147; GFX950-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[22:23], off offset:16
148; GFX950-GISEL-NEXT:    s_endpgm
149  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.f16(<32 x half> %src, float 100.0)
150  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
151  ret void
152}
153
154define amdgpu_ps void @test_scalef32_pk32_fp6_bf16_vv(<32 x bfloat> %src, float %scale, ptr addrspace(1) %out) {
155; GFX950-SDAG-LABEL: test_scalef32_pk32_fp6_bf16_vv:
156; GFX950-SDAG:       ; %bb.0:
157; GFX950-SDAG-NEXT:    v_mov_b32_e32 v25, v18
158; GFX950-SDAG-NEXT:    v_mov_b32_e32 v24, v17
159; GFX950-SDAG-NEXT:    v_cvt_scalef32_pk32_fp6_bf16 v[18:23], v[0:15], v16
160; GFX950-SDAG-NEXT:    global_store_dwordx2 v[24:25], v[22:23], off offset:16
161; GFX950-SDAG-NEXT:    global_store_dwordx4 v[24:25], v[18:21], off
162; GFX950-SDAG-NEXT:    s_endpgm
163;
164; GFX950-GISEL-LABEL: test_scalef32_pk32_fp6_bf16_vv:
165; GFX950-GISEL:       ; %bb.0:
166; GFX950-GISEL-NEXT:    v_mov_b32_e32 v25, v18
167; GFX950-GISEL-NEXT:    v_mov_b32_e32 v24, v17
168; GFX950-GISEL-NEXT:    v_cvt_scalef32_pk32_fp6_bf16 v[18:23], v[0:15], v16
169; GFX950-GISEL-NEXT:    global_store_dwordx2 v[24:25], v[22:23], off offset:16
170; GFX950-GISEL-NEXT:    global_store_dwordx4 v[24:25], v[18:21], off
171; GFX950-GISEL-NEXT:    s_endpgm
172  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.bf16(<32 x bfloat> %src, float %scale)
173  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
174  ret void
175}
176
177define amdgpu_ps void @test_scalef32_pk32_fp6_bf16_sl(<32 x bfloat> inreg %src, ptr addrspace(1) %out) {
178; GFX950-SDAG-LABEL: test_scalef32_pk32_fp6_bf16_sl:
179; GFX950-SDAG:       ; %bb.0:
180; GFX950-SDAG-NEXT:    v_mov_b32_e32 v2, s0
181; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, s1
182; GFX950-SDAG-NEXT:    v_mov_b32_e32 v4, s2
183; GFX950-SDAG-NEXT:    v_mov_b32_e32 v5, s3
184; GFX950-SDAG-NEXT:    v_mov_b32_e32 v6, s4
185; GFX950-SDAG-NEXT:    v_mov_b32_e32 v7, s5
186; GFX950-SDAG-NEXT:    v_mov_b32_e32 v8, s6
187; GFX950-SDAG-NEXT:    v_mov_b32_e32 v9, s7
188; GFX950-SDAG-NEXT:    v_mov_b32_e32 v10, s8
189; GFX950-SDAG-NEXT:    v_mov_b32_e32 v11, s9
190; GFX950-SDAG-NEXT:    v_mov_b32_e32 v12, s10
191; GFX950-SDAG-NEXT:    v_mov_b32_e32 v13, s11
192; GFX950-SDAG-NEXT:    v_mov_b32_e32 v14, s12
193; GFX950-SDAG-NEXT:    v_mov_b32_e32 v15, s13
194; GFX950-SDAG-NEXT:    v_mov_b32_e32 v16, s14
195; GFX950-SDAG-NEXT:    v_mov_b32_e32 v17, s15
196; GFX950-SDAG-NEXT:    s_mov_b32 s0, 0x42c80000
197; GFX950-SDAG-NEXT:    v_cvt_scalef32_pk32_fp6_bf16 v[18:23], v[2:17], s0
198; GFX950-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[22:23], off offset:16
199; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[18:21], off
200; GFX950-SDAG-NEXT:    s_endpgm
201;
202; GFX950-GISEL-LABEL: test_scalef32_pk32_fp6_bf16_sl:
203; GFX950-GISEL:       ; %bb.0:
204; GFX950-GISEL-NEXT:    v_mov_b32_e32 v2, s0
205; GFX950-GISEL-NEXT:    v_mov_b32_e32 v3, s1
206; GFX950-GISEL-NEXT:    v_mov_b32_e32 v4, s2
207; GFX950-GISEL-NEXT:    v_mov_b32_e32 v5, s3
208; GFX950-GISEL-NEXT:    v_mov_b32_e32 v6, s4
209; GFX950-GISEL-NEXT:    v_mov_b32_e32 v7, s5
210; GFX950-GISEL-NEXT:    v_mov_b32_e32 v8, s6
211; GFX950-GISEL-NEXT:    v_mov_b32_e32 v9, s7
212; GFX950-GISEL-NEXT:    v_mov_b32_e32 v10, s8
213; GFX950-GISEL-NEXT:    v_mov_b32_e32 v11, s9
214; GFX950-GISEL-NEXT:    v_mov_b32_e32 v12, s10
215; GFX950-GISEL-NEXT:    v_mov_b32_e32 v13, s11
216; GFX950-GISEL-NEXT:    v_mov_b32_e32 v14, s12
217; GFX950-GISEL-NEXT:    v_mov_b32_e32 v15, s13
218; GFX950-GISEL-NEXT:    v_mov_b32_e32 v16, s14
219; GFX950-GISEL-NEXT:    v_mov_b32_e32 v17, s15
220; GFX950-GISEL-NEXT:    s_mov_b32 s0, 0x42c80000
221; GFX950-GISEL-NEXT:    v_cvt_scalef32_pk32_fp6_bf16 v[18:23], v[2:17], s0
222; GFX950-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[22:23], off offset:16
223; GFX950-GISEL-NEXT:    global_store_dwordx4 v[0:1], v[18:21], off
224; GFX950-GISEL-NEXT:    s_endpgm
225  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.bf16(<32 x bfloat> %src, float 100.0)
226  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
227  ret void
228}
229
230define amdgpu_ps void @test_scalef32_pk32_fp6_f16_vv(<32 x half> %src, float %scale, ptr addrspace(1) %out) {
231; GFX950-SDAG-LABEL: test_scalef32_pk32_fp6_f16_vv:
232; GFX950-SDAG:       ; %bb.0:
233; GFX950-SDAG-NEXT:    v_mov_b32_e32 v25, v18
234; GFX950-SDAG-NEXT:    v_mov_b32_e32 v24, v17
235; GFX950-SDAG-NEXT:    v_cvt_scalef32_pk32_fp6_f16 v[18:23], v[0:15], v16
236; GFX950-SDAG-NEXT:    global_store_dwordx2 v[24:25], v[22:23], off offset:16
237; GFX950-SDAG-NEXT:    global_store_dwordx4 v[24:25], v[18:21], off
238; GFX950-SDAG-NEXT:    s_endpgm
239;
240; GFX950-GISEL-LABEL: test_scalef32_pk32_fp6_f16_vv:
241; GFX950-GISEL:       ; %bb.0:
242; GFX950-GISEL-NEXT:    v_mov_b32_e32 v24, v17
243; GFX950-GISEL-NEXT:    v_mov_b32_e32 v25, v18
244; GFX950-GISEL-NEXT:    v_cvt_scalef32_pk32_fp6_f16 v[18:23], v[0:15], v16
245; GFX950-GISEL-NEXT:    global_store_dwordx4 v[24:25], v[18:21], off
246; GFX950-GISEL-NEXT:    global_store_dwordx2 v[24:25], v[22:23], off offset:16
247; GFX950-GISEL-NEXT:    s_endpgm
248  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.f16(<32 x half> %src, float %scale)
249  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
250  ret void
251}
252
253define amdgpu_ps void @test_scalef32_pk32_fp6_f16_sl(<32 x half> inreg %src, ptr addrspace(1) %out) {
254; GFX950-SDAG-LABEL: test_scalef32_pk32_fp6_f16_sl:
255; GFX950-SDAG:       ; %bb.0:
256; GFX950-SDAG-NEXT:    v_mov_b32_e32 v2, s0
257; GFX950-SDAG-NEXT:    v_mov_b32_e32 v3, s1
258; GFX950-SDAG-NEXT:    v_mov_b32_e32 v4, s2
259; GFX950-SDAG-NEXT:    v_mov_b32_e32 v5, s3
260; GFX950-SDAG-NEXT:    v_mov_b32_e32 v6, s4
261; GFX950-SDAG-NEXT:    v_mov_b32_e32 v7, s5
262; GFX950-SDAG-NEXT:    v_mov_b32_e32 v8, s6
263; GFX950-SDAG-NEXT:    v_mov_b32_e32 v9, s7
264; GFX950-SDAG-NEXT:    v_mov_b32_e32 v10, s8
265; GFX950-SDAG-NEXT:    v_mov_b32_e32 v11, s9
266; GFX950-SDAG-NEXT:    v_mov_b32_e32 v12, s10
267; GFX950-SDAG-NEXT:    v_mov_b32_e32 v13, s11
268; GFX950-SDAG-NEXT:    v_mov_b32_e32 v14, s12
269; GFX950-SDAG-NEXT:    v_mov_b32_e32 v15, s13
270; GFX950-SDAG-NEXT:    v_mov_b32_e32 v16, s14
271; GFX950-SDAG-NEXT:    v_mov_b32_e32 v17, s15
272; GFX950-SDAG-NEXT:    s_mov_b32 s0, 0x42c80000
273; GFX950-SDAG-NEXT:    v_cvt_scalef32_pk32_fp6_f16 v[18:23], v[2:17], s0
274; GFX950-SDAG-NEXT:    global_store_dwordx2 v[0:1], v[22:23], off offset:16
275; GFX950-SDAG-NEXT:    global_store_dwordx4 v[0:1], v[18:21], off
276; GFX950-SDAG-NEXT:    s_endpgm
277;
278; GFX950-GISEL-LABEL: test_scalef32_pk32_fp6_f16_sl:
279; GFX950-GISEL:       ; %bb.0:
280; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[16:17], s[14:15]
281; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[14:15], s[12:13]
282; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[12:13], s[10:11]
283; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[10:11], s[8:9]
284; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[8:9], s[6:7]
285; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[6:7], s[4:5]
286; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[4:5], s[2:3]
287; GFX950-GISEL-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
288; GFX950-GISEL-NEXT:    v_mov_b32_e32 v24, 0x42c80000
289; GFX950-GISEL-NEXT:    v_cvt_scalef32_pk32_fp6_f16 v[18:23], v[2:17], v24
290; GFX950-GISEL-NEXT:    global_store_dwordx4 v[0:1], v[18:21], off
291; GFX950-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[22:23], off offset:16
292; GFX950-GISEL-NEXT:    s_endpgm
293  %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.f16(<32 x half> %src, float 100.0)
294  store <6 x i32> %cvt, ptr addrspace(1) %out, align 8
295  ret void
296}
297