xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.sr.ll (revision 5a81a559d69fb84e1e8ef623ac4b642081c14c51)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX950 %s
3; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX950 %s
4
5declare i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 %dst_sel)
6declare i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 %dst_sel)
7declare i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 %dst_sel)
8declare i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 %dst_sel)
9declare i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 %dst_sel)
10declare i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 %dst_sel)
11
12define amdgpu_ps void @test_cvt_scalef32_sr_bf8_bf16_dst_sel_0(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) {
13; GFX950-LABEL: test_cvt_scalef32_sr_bf8_bf16_dst_sel_0:
14; GFX950:       ; %bb.0:
15; GFX950-NEXT:    global_load_dword v5, v[0:1], off
16; GFX950-NEXT:    s_waitcnt vmcnt(0)
17; GFX950-NEXT:    v_cvt_scalef32_sr_bf8_bf16 v5, v2, v3, v4
18; GFX950-NEXT:    global_store_dword v[0:1], v5, off
19; GFX950-NEXT:    s_endpgm
20  %old = load i32, ptr addrspace(1) %out, align 4
21  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 0)
22  store i32 %cvt, ptr addrspace(1) %out, align 4
23  ret void
24}
25
26define amdgpu_ps void @test_cvt_scalef32_sr_bf8_bf16_dst_sel_1(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) {
27; GFX950-LABEL: test_cvt_scalef32_sr_bf8_bf16_dst_sel_1:
28; GFX950:       ; %bb.0:
29; GFX950-NEXT:    global_load_dword v5, v[0:1], off
30; GFX950-NEXT:    s_waitcnt vmcnt(0)
31; GFX950-NEXT:    v_cvt_scalef32_sr_bf8_bf16 v5, v2, v3, v4 op_sel:[0,0,1,0]
32; GFX950-NEXT:    global_store_dword v[0:1], v5, off
33; GFX950-NEXT:    s_endpgm
34  %old = load i32, ptr addrspace(1) %out, align 4
35  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 1)
36  store i32 %cvt, ptr addrspace(1) %out, align 4
37  ret void
38}
39
40define amdgpu_ps void @test_cvt_scalef32_sr_bf8_bf16_dst_sel_2(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) {
41; GFX950-LABEL: test_cvt_scalef32_sr_bf8_bf16_dst_sel_2:
42; GFX950:       ; %bb.0:
43; GFX950-NEXT:    global_load_dword v5, v[0:1], off
44; GFX950-NEXT:    s_waitcnt vmcnt(0)
45; GFX950-NEXT:    v_cvt_scalef32_sr_bf8_bf16 v5, v2, v3, v4 op_sel:[0,0,0,1]
46; GFX950-NEXT:    global_store_dword v[0:1], v5, off
47; GFX950-NEXT:    s_endpgm
48  %old = load i32, ptr addrspace(1) %out, align 4
49  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 2)
50  store i32 %cvt, ptr addrspace(1) %out, align 4
51  ret void
52}
53
54define amdgpu_ps void @test_cvt_scalef32_sr_bf8_bf16_dst_sel_3(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) {
55; GFX950-LABEL: test_cvt_scalef32_sr_bf8_bf16_dst_sel_3:
56; GFX950:       ; %bb.0:
57; GFX950-NEXT:    global_load_dword v5, v[0:1], off
58; GFX950-NEXT:    s_waitcnt vmcnt(0)
59; GFX950-NEXT:    v_cvt_scalef32_sr_bf8_bf16 v5, v2, v3, v4 op_sel:[0,0,1,1]
60; GFX950-NEXT:    global_store_dword v[0:1], v5, off
61; GFX950-NEXT:    s_endpgm
62  %old = load i32, ptr addrspace(1) %out, align 4
63  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 3)
64  store i32 %cvt, ptr addrspace(1) %out, align 4
65  ret void
66}
67
68define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f16_dst_sel_0(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) {
69; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f16_dst_sel_0:
70; GFX950:       ; %bb.0:
71; GFX950-NEXT:    global_load_dword v5, v[0:1], off
72; GFX950-NEXT:    s_waitcnt vmcnt(0)
73; GFX950-NEXT:    v_cvt_scalef32_sr_bf8_f16 v5, v2, v3, v4
74; GFX950-NEXT:    global_store_dword v[0:1], v5, off
75; GFX950-NEXT:    s_endpgm
76  %old = load i32, ptr addrspace(1) %out, align 4
77  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 0)
78  store i32 %cvt, ptr addrspace(1) %out, align 4
79  ret void
80}
81
82define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f16_dst_sel_1(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) {
83; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f16_dst_sel_1:
84; GFX950:       ; %bb.0:
85; GFX950-NEXT:    global_load_dword v5, v[0:1], off
86; GFX950-NEXT:    s_waitcnt vmcnt(0)
87; GFX950-NEXT:    v_cvt_scalef32_sr_bf8_f16 v5, v2, v3, v4 op_sel:[0,0,1,0]
88; GFX950-NEXT:    global_store_dword v[0:1], v5, off
89; GFX950-NEXT:    s_endpgm
90  %old = load i32, ptr addrspace(1) %out, align 4
91  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 1)
92  store i32 %cvt, ptr addrspace(1) %out, align 4
93  ret void
94}
95
96define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f16_dst_sel_2(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) {
97; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f16_dst_sel_2:
98; GFX950:       ; %bb.0:
99; GFX950-NEXT:    global_load_dword v5, v[0:1], off
100; GFX950-NEXT:    s_waitcnt vmcnt(0)
101; GFX950-NEXT:    v_cvt_scalef32_sr_bf8_f16 v5, v2, v3, v4 op_sel:[0,0,0,1]
102; GFX950-NEXT:    global_store_dword v[0:1], v5, off
103; GFX950-NEXT:    s_endpgm
104  %old = load i32, ptr addrspace(1) %out, align 4
105  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 2)
106  store i32 %cvt, ptr addrspace(1) %out, align 4
107  ret void
108}
109
110define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f16_dst_sel_3(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) {
111; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f16_dst_sel_3:
112; GFX950:       ; %bb.0:
113; GFX950-NEXT:    global_load_dword v5, v[0:1], off
114; GFX950-NEXT:    s_waitcnt vmcnt(0)
115; GFX950-NEXT:    v_cvt_scalef32_sr_bf8_f16 v5, v2, v3, v4 op_sel:[0,0,1,1]
116; GFX950-NEXT:    global_store_dword v[0:1], v5, off
117; GFX950-NEXT:    s_endpgm
118  %old = load i32, ptr addrspace(1) %out, align 4
119  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 3)
120  store i32 %cvt, ptr addrspace(1) %out, align 4
121  ret void
122}
123
124define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f32_dst_sel_0(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) {
125; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f32_dst_sel_0:
126; GFX950:       ; %bb.0:
127; GFX950-NEXT:    global_load_dword v5, v[0:1], off
128; GFX950-NEXT:    s_waitcnt vmcnt(0)
129; GFX950-NEXT:    v_cvt_scalef32_sr_bf8_f32 v5, v2, v3, v4
130; GFX950-NEXT:    global_store_dword v[0:1], v5, off
131; GFX950-NEXT:    s_endpgm
132  %old = load i32, ptr addrspace(1) %out, align 4
133  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 0)
134  store i32 %cvt, ptr addrspace(1) %out, align 4
135  ret void
136}
137
138define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f32_dst_sel_1(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) {
139; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f32_dst_sel_1:
140; GFX950:       ; %bb.0:
141; GFX950-NEXT:    global_load_dword v5, v[0:1], off
142; GFX950-NEXT:    s_waitcnt vmcnt(0)
143; GFX950-NEXT:    v_cvt_scalef32_sr_bf8_f32 v5, v2, v3, v4 op_sel:[0,0,1,0]
144; GFX950-NEXT:    global_store_dword v[0:1], v5, off
145; GFX950-NEXT:    s_endpgm
146  %old = load i32, ptr addrspace(1) %out, align 4
147  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 1)
148  store i32 %cvt, ptr addrspace(1) %out, align 4
149  ret void
150}
151
152define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f32_dst_sel_2(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) {
153; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f32_dst_sel_2:
154; GFX950:       ; %bb.0:
155; GFX950-NEXT:    global_load_dword v5, v[0:1], off
156; GFX950-NEXT:    s_waitcnt vmcnt(0)
157; GFX950-NEXT:    v_cvt_scalef32_sr_bf8_f32 v5, v2, v3, v4 op_sel:[0,0,0,1]
158; GFX950-NEXT:    global_store_dword v[0:1], v5, off
159; GFX950-NEXT:    s_endpgm
160  %old = load i32, ptr addrspace(1) %out, align 4
161  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 2)
162  store i32 %cvt, ptr addrspace(1) %out, align 4
163  ret void
164}
165
166define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f32_dst_sel_3(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) {
167; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f32_dst_sel_3:
168; GFX950:       ; %bb.0:
169; GFX950-NEXT:    global_load_dword v5, v[0:1], off
170; GFX950-NEXT:    s_waitcnt vmcnt(0)
171; GFX950-NEXT:    v_cvt_scalef32_sr_bf8_f32 v5, v2, v3, v4 op_sel:[0,0,1,1]
172; GFX950-NEXT:    global_store_dword v[0:1], v5, off
173; GFX950-NEXT:    s_endpgm
174  %old = load i32, ptr addrspace(1) %out, align 4
175  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 3)
176  store i32 %cvt, ptr addrspace(1) %out, align 4
177  ret void
178}
179
180define amdgpu_ps void @test_cvt_scalef32_sr_fp8_bf16_dst_sel_0(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) {
181; GFX950-LABEL: test_cvt_scalef32_sr_fp8_bf16_dst_sel_0:
182; GFX950:       ; %bb.0:
183; GFX950-NEXT:    global_load_dword v5, v[0:1], off
184; GFX950-NEXT:    s_waitcnt vmcnt(0)
185; GFX950-NEXT:    v_cvt_scalef32_sr_fp8_bf16 v5, v2, v3, v4
186; GFX950-NEXT:    global_store_dword v[0:1], v5, off
187; GFX950-NEXT:    s_endpgm
188  %old = load i32, ptr addrspace(1) %out, align 4
189  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 0)
190  store i32 %cvt, ptr addrspace(1) %out, align 4
191  ret void
192}
193
194define amdgpu_ps void @test_cvt_scalef32_sr_fp8_bf16_dst_sel_1(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) {
195; GFX950-LABEL: test_cvt_scalef32_sr_fp8_bf16_dst_sel_1:
196; GFX950:       ; %bb.0:
197; GFX950-NEXT:    global_load_dword v5, v[0:1], off
198; GFX950-NEXT:    s_waitcnt vmcnt(0)
199; GFX950-NEXT:    v_cvt_scalef32_sr_fp8_bf16 v5, v2, v3, v4 op_sel:[0,0,1,0]
200; GFX950-NEXT:    global_store_dword v[0:1], v5, off
201; GFX950-NEXT:    s_endpgm
202  %old = load i32, ptr addrspace(1) %out, align 4
203  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 1)
204  store i32 %cvt, ptr addrspace(1) %out, align 4
205  ret void
206}
207
208define amdgpu_ps void @test_cvt_scalef32_sr_fp8_bf16_dst_sel_2(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) {
209; GFX950-LABEL: test_cvt_scalef32_sr_fp8_bf16_dst_sel_2:
210; GFX950:       ; %bb.0:
211; GFX950-NEXT:    global_load_dword v5, v[0:1], off
212; GFX950-NEXT:    s_waitcnt vmcnt(0)
213; GFX950-NEXT:    v_cvt_scalef32_sr_fp8_bf16 v5, v2, v3, v4 op_sel:[0,0,0,1]
214; GFX950-NEXT:    global_store_dword v[0:1], v5, off
215; GFX950-NEXT:    s_endpgm
216  %old = load i32, ptr addrspace(1) %out, align 4
217  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 2)
218  store i32 %cvt, ptr addrspace(1) %out, align 4
219  ret void
220}
221
222define amdgpu_ps void @test_cvt_scalef32_sr_fp8_bf16_dst_sel_3(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) {
223; GFX950-LABEL: test_cvt_scalef32_sr_fp8_bf16_dst_sel_3:
224; GFX950:       ; %bb.0:
225; GFX950-NEXT:    global_load_dword v5, v[0:1], off
226; GFX950-NEXT:    s_waitcnt vmcnt(0)
227; GFX950-NEXT:    v_cvt_scalef32_sr_fp8_bf16 v5, v2, v3, v4 op_sel:[0,0,1,1]
228; GFX950-NEXT:    global_store_dword v[0:1], v5, off
229; GFX950-NEXT:    s_endpgm
230  %old = load i32, ptr addrspace(1) %out, align 4
231  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 3)
232  store i32 %cvt, ptr addrspace(1) %out, align 4
233  ret void
234}
235
236define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f16_dst_sel_0(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) {
237; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f16_dst_sel_0:
238; GFX950:       ; %bb.0:
239; GFX950-NEXT:    global_load_dword v5, v[0:1], off
240; GFX950-NEXT:    s_waitcnt vmcnt(0)
241; GFX950-NEXT:    v_cvt_scalef32_sr_fp8_f16 v5, v2, v3, v4
242; GFX950-NEXT:    global_store_dword v[0:1], v5, off
243; GFX950-NEXT:    s_endpgm
244  %old = load i32, ptr addrspace(1) %out, align 4
245  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 0)
246  store i32 %cvt, ptr addrspace(1) %out, align 4
247  ret void
248}
249
250define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f16_dst_sel_1(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) {
251; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f16_dst_sel_1:
252; GFX950:       ; %bb.0:
253; GFX950-NEXT:    global_load_dword v5, v[0:1], off
254; GFX950-NEXT:    s_waitcnt vmcnt(0)
255; GFX950-NEXT:    v_cvt_scalef32_sr_fp8_f16 v5, v2, v3, v4 op_sel:[0,0,1,0]
256; GFX950-NEXT:    global_store_dword v[0:1], v5, off
257; GFX950-NEXT:    s_endpgm
258  %old = load i32, ptr addrspace(1) %out, align 4
259  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 1)
260  store i32 %cvt, ptr addrspace(1) %out, align 4
261  ret void
262}
263
264define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f16_dst_sel_2(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) {
265; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f16_dst_sel_2:
266; GFX950:       ; %bb.0:
267; GFX950-NEXT:    global_load_dword v5, v[0:1], off
268; GFX950-NEXT:    s_waitcnt vmcnt(0)
269; GFX950-NEXT:    v_cvt_scalef32_sr_fp8_f16 v5, v2, v3, v4 op_sel:[0,0,0,1]
270; GFX950-NEXT:    global_store_dword v[0:1], v5, off
271; GFX950-NEXT:    s_endpgm
272  %old = load i32, ptr addrspace(1) %out, align 4
273  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 2)
274  store i32 %cvt, ptr addrspace(1) %out, align 4
275  ret void
276}
277
278define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f16_dst_sel_3(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) {
279; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f16_dst_sel_3:
280; GFX950:       ; %bb.0:
281; GFX950-NEXT:    global_load_dword v5, v[0:1], off
282; GFX950-NEXT:    s_waitcnt vmcnt(0)
283; GFX950-NEXT:    v_cvt_scalef32_sr_fp8_f16 v5, v2, v3, v4 op_sel:[0,0,1,1]
284; GFX950-NEXT:    global_store_dword v[0:1], v5, off
285; GFX950-NEXT:    s_endpgm
286  %old = load i32, ptr addrspace(1) %out, align 4
287  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 3)
288  store i32 %cvt, ptr addrspace(1) %out, align 4
289  ret void
290}
291
292define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f32_dst_sel_0(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) {
293; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f32_dst_sel_0:
294; GFX950:       ; %bb.0:
295; GFX950-NEXT:    global_load_dword v5, v[0:1], off
296; GFX950-NEXT:    s_waitcnt vmcnt(0)
297; GFX950-NEXT:    v_cvt_scalef32_sr_fp8_f32 v5, v2, v3, v4
298; GFX950-NEXT:    global_store_dword v[0:1], v5, off
299; GFX950-NEXT:    s_endpgm
300  %old = load i32, ptr addrspace(1) %out, align 4
301  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 0)
302  store i32 %cvt, ptr addrspace(1) %out, align 4
303  ret void
304}
305
306define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f32_dst_sel_1(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) {
307; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f32_dst_sel_1:
308; GFX950:       ; %bb.0:
309; GFX950-NEXT:    global_load_dword v5, v[0:1], off
310; GFX950-NEXT:    s_waitcnt vmcnt(0)
311; GFX950-NEXT:    v_cvt_scalef32_sr_fp8_f32 v5, v2, v3, v4 op_sel:[0,0,1,0]
312; GFX950-NEXT:    global_store_dword v[0:1], v5, off
313; GFX950-NEXT:    s_endpgm
314  %old = load i32, ptr addrspace(1) %out, align 4
315  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 1)
316  store i32 %cvt, ptr addrspace(1) %out, align 4
317  ret void
318}
319
320define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f32_dst_sel_2(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) {
321; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f32_dst_sel_2:
322; GFX950:       ; %bb.0:
323; GFX950-NEXT:    global_load_dword v5, v[0:1], off
324; GFX950-NEXT:    s_waitcnt vmcnt(0)
325; GFX950-NEXT:    v_cvt_scalef32_sr_fp8_f32 v5, v2, v3, v4 op_sel:[0,0,0,1]
326; GFX950-NEXT:    global_store_dword v[0:1], v5, off
327; GFX950-NEXT:    s_endpgm
328  %old = load i32, ptr addrspace(1) %out, align 4
329  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 2)
330  store i32 %cvt, ptr addrspace(1) %out, align 4
331  ret void
332}
333
334define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f32_dst_sel_3(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) {
335; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f32_dst_sel_3:
336; GFX950:       ; %bb.0:
337; GFX950-NEXT:    global_load_dword v5, v[0:1], off
338; GFX950-NEXT:    s_waitcnt vmcnt(0)
339; GFX950-NEXT:    v_cvt_scalef32_sr_fp8_f32 v5, v2, v3, v4 op_sel:[0,0,1,1]
340; GFX950-NEXT:    global_store_dword v[0:1], v5, off
341; GFX950-NEXT:    s_endpgm
342  %old = load i32, ptr addrspace(1) %out, align 4
343  %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 3)
344  store i32 %cvt, ptr addrspace(1) %out, align 4
345  ret void
346}
347