1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX950 %s 3; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX950 %s 4 5declare i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 %dst_sel) 6declare i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 %dst_sel) 7declare i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 %dst_sel) 8declare i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 %dst_sel) 9declare i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 %dst_sel) 10declare i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 %dst_sel) 11 12define amdgpu_ps void @test_cvt_scalef32_sr_bf8_bf16_dst_sel_0(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) { 13; GFX950-LABEL: test_cvt_scalef32_sr_bf8_bf16_dst_sel_0: 14; GFX950: ; %bb.0: 15; GFX950-NEXT: global_load_dword v5, v[0:1], off 16; GFX950-NEXT: s_waitcnt vmcnt(0) 17; GFX950-NEXT: v_cvt_scalef32_sr_bf8_bf16 v5, v2, v3, v4 18; GFX950-NEXT: global_store_dword v[0:1], v5, off 19; GFX950-NEXT: s_endpgm 20 %old = load i32, ptr addrspace(1) %out, align 4 21 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 0) 22 store i32 %cvt, ptr addrspace(1) %out, align 4 23 ret void 24} 25 26define amdgpu_ps void @test_cvt_scalef32_sr_bf8_bf16_dst_sel_1(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) { 27; GFX950-LABEL: test_cvt_scalef32_sr_bf8_bf16_dst_sel_1: 28; GFX950: ; %bb.0: 29; GFX950-NEXT: global_load_dword v5, v[0:1], off 30; GFX950-NEXT: s_waitcnt vmcnt(0) 31; GFX950-NEXT: v_cvt_scalef32_sr_bf8_bf16 v5, v2, v3, v4 op_sel:[0,0,1,0] 32; GFX950-NEXT: global_store_dword v[0:1], v5, off 33; GFX950-NEXT: s_endpgm 34 %old = load i32, ptr addrspace(1) %out, align 4 35 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 1) 36 store i32 %cvt, ptr addrspace(1) %out, align 4 37 ret void 38} 39 40define amdgpu_ps void @test_cvt_scalef32_sr_bf8_bf16_dst_sel_2(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) { 41; GFX950-LABEL: test_cvt_scalef32_sr_bf8_bf16_dst_sel_2: 42; GFX950: ; %bb.0: 43; GFX950-NEXT: global_load_dword v5, v[0:1], off 44; GFX950-NEXT: s_waitcnt vmcnt(0) 45; GFX950-NEXT: v_cvt_scalef32_sr_bf8_bf16 v5, v2, v3, v4 op_sel:[0,0,0,1] 46; GFX950-NEXT: global_store_dword v[0:1], v5, off 47; GFX950-NEXT: s_endpgm 48 %old = load i32, ptr addrspace(1) %out, align 4 49 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 2) 50 store i32 %cvt, ptr addrspace(1) %out, align 4 51 ret void 52} 53 54define amdgpu_ps void @test_cvt_scalef32_sr_bf8_bf16_dst_sel_3(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) { 55; GFX950-LABEL: test_cvt_scalef32_sr_bf8_bf16_dst_sel_3: 56; GFX950: ; %bb.0: 57; GFX950-NEXT: global_load_dword v5, v[0:1], off 58; GFX950-NEXT: s_waitcnt vmcnt(0) 59; GFX950-NEXT: v_cvt_scalef32_sr_bf8_bf16 v5, v2, v3, v4 op_sel:[0,0,1,1] 60; GFX950-NEXT: global_store_dword v[0:1], v5, off 61; GFX950-NEXT: s_endpgm 62 %old = load i32, ptr addrspace(1) %out, align 4 63 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 3) 64 store i32 %cvt, ptr addrspace(1) %out, align 4 65 ret void 66} 67 68define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f16_dst_sel_0(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) { 69; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f16_dst_sel_0: 70; GFX950: ; %bb.0: 71; GFX950-NEXT: global_load_dword v5, v[0:1], off 72; GFX950-NEXT: s_waitcnt vmcnt(0) 73; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f16 v5, v2, v3, v4 74; GFX950-NEXT: global_store_dword v[0:1], v5, off 75; GFX950-NEXT: s_endpgm 76 %old = load i32, ptr addrspace(1) %out, align 4 77 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 0) 78 store i32 %cvt, ptr addrspace(1) %out, align 4 79 ret void 80} 81 82define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f16_dst_sel_1(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) { 83; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f16_dst_sel_1: 84; GFX950: ; %bb.0: 85; GFX950-NEXT: global_load_dword v5, v[0:1], off 86; GFX950-NEXT: s_waitcnt vmcnt(0) 87; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f16 v5, v2, v3, v4 op_sel:[0,0,1,0] 88; GFX950-NEXT: global_store_dword v[0:1], v5, off 89; GFX950-NEXT: s_endpgm 90 %old = load i32, ptr addrspace(1) %out, align 4 91 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 1) 92 store i32 %cvt, ptr addrspace(1) %out, align 4 93 ret void 94} 95 96define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f16_dst_sel_2(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) { 97; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f16_dst_sel_2: 98; GFX950: ; %bb.0: 99; GFX950-NEXT: global_load_dword v5, v[0:1], off 100; GFX950-NEXT: s_waitcnt vmcnt(0) 101; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f16 v5, v2, v3, v4 op_sel:[0,0,0,1] 102; GFX950-NEXT: global_store_dword v[0:1], v5, off 103; GFX950-NEXT: s_endpgm 104 %old = load i32, ptr addrspace(1) %out, align 4 105 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 2) 106 store i32 %cvt, ptr addrspace(1) %out, align 4 107 ret void 108} 109 110define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f16_dst_sel_3(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) { 111; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f16_dst_sel_3: 112; GFX950: ; %bb.0: 113; GFX950-NEXT: global_load_dword v5, v[0:1], off 114; GFX950-NEXT: s_waitcnt vmcnt(0) 115; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f16 v5, v2, v3, v4 op_sel:[0,0,1,1] 116; GFX950-NEXT: global_store_dword v[0:1], v5, off 117; GFX950-NEXT: s_endpgm 118 %old = load i32, ptr addrspace(1) %out, align 4 119 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 3) 120 store i32 %cvt, ptr addrspace(1) %out, align 4 121 ret void 122} 123 124define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f32_dst_sel_0(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) { 125; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f32_dst_sel_0: 126; GFX950: ; %bb.0: 127; GFX950-NEXT: global_load_dword v5, v[0:1], off 128; GFX950-NEXT: s_waitcnt vmcnt(0) 129; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f32 v5, v2, v3, v4 130; GFX950-NEXT: global_store_dword v[0:1], v5, off 131; GFX950-NEXT: s_endpgm 132 %old = load i32, ptr addrspace(1) %out, align 4 133 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 0) 134 store i32 %cvt, ptr addrspace(1) %out, align 4 135 ret void 136} 137 138define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f32_dst_sel_1(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) { 139; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f32_dst_sel_1: 140; GFX950: ; %bb.0: 141; GFX950-NEXT: global_load_dword v5, v[0:1], off 142; GFX950-NEXT: s_waitcnt vmcnt(0) 143; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f32 v5, v2, v3, v4 op_sel:[0,0,1,0] 144; GFX950-NEXT: global_store_dword v[0:1], v5, off 145; GFX950-NEXT: s_endpgm 146 %old = load i32, ptr addrspace(1) %out, align 4 147 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 1) 148 store i32 %cvt, ptr addrspace(1) %out, align 4 149 ret void 150} 151 152define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f32_dst_sel_2(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) { 153; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f32_dst_sel_2: 154; GFX950: ; %bb.0: 155; GFX950-NEXT: global_load_dword v5, v[0:1], off 156; GFX950-NEXT: s_waitcnt vmcnt(0) 157; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f32 v5, v2, v3, v4 op_sel:[0,0,0,1] 158; GFX950-NEXT: global_store_dword v[0:1], v5, off 159; GFX950-NEXT: s_endpgm 160 %old = load i32, ptr addrspace(1) %out, align 4 161 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 2) 162 store i32 %cvt, ptr addrspace(1) %out, align 4 163 ret void 164} 165 166define amdgpu_ps void @test_cvt_scalef32_sr_bf8_f32_dst_sel_3(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) { 167; GFX950-LABEL: test_cvt_scalef32_sr_bf8_f32_dst_sel_3: 168; GFX950: ; %bb.0: 169; GFX950-NEXT: global_load_dword v5, v[0:1], off 170; GFX950-NEXT: s_waitcnt vmcnt(0) 171; GFX950-NEXT: v_cvt_scalef32_sr_bf8_f32 v5, v2, v3, v4 op_sel:[0,0,1,1] 172; GFX950-NEXT: global_store_dword v[0:1], v5, off 173; GFX950-NEXT: s_endpgm 174 %old = load i32, ptr addrspace(1) %out, align 4 175 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.bf8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 3) 176 store i32 %cvt, ptr addrspace(1) %out, align 4 177 ret void 178} 179 180define amdgpu_ps void @test_cvt_scalef32_sr_fp8_bf16_dst_sel_0(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) { 181; GFX950-LABEL: test_cvt_scalef32_sr_fp8_bf16_dst_sel_0: 182; GFX950: ; %bb.0: 183; GFX950-NEXT: global_load_dword v5, v[0:1], off 184; GFX950-NEXT: s_waitcnt vmcnt(0) 185; GFX950-NEXT: v_cvt_scalef32_sr_fp8_bf16 v5, v2, v3, v4 186; GFX950-NEXT: global_store_dword v[0:1], v5, off 187; GFX950-NEXT: s_endpgm 188 %old = load i32, ptr addrspace(1) %out, align 4 189 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 0) 190 store i32 %cvt, ptr addrspace(1) %out, align 4 191 ret void 192} 193 194define amdgpu_ps void @test_cvt_scalef32_sr_fp8_bf16_dst_sel_1(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) { 195; GFX950-LABEL: test_cvt_scalef32_sr_fp8_bf16_dst_sel_1: 196; GFX950: ; %bb.0: 197; GFX950-NEXT: global_load_dword v5, v[0:1], off 198; GFX950-NEXT: s_waitcnt vmcnt(0) 199; GFX950-NEXT: v_cvt_scalef32_sr_fp8_bf16 v5, v2, v3, v4 op_sel:[0,0,1,0] 200; GFX950-NEXT: global_store_dword v[0:1], v5, off 201; GFX950-NEXT: s_endpgm 202 %old = load i32, ptr addrspace(1) %out, align 4 203 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 1) 204 store i32 %cvt, ptr addrspace(1) %out, align 4 205 ret void 206} 207 208define amdgpu_ps void @test_cvt_scalef32_sr_fp8_bf16_dst_sel_2(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) { 209; GFX950-LABEL: test_cvt_scalef32_sr_fp8_bf16_dst_sel_2: 210; GFX950: ; %bb.0: 211; GFX950-NEXT: global_load_dword v5, v[0:1], off 212; GFX950-NEXT: s_waitcnt vmcnt(0) 213; GFX950-NEXT: v_cvt_scalef32_sr_fp8_bf16 v5, v2, v3, v4 op_sel:[0,0,0,1] 214; GFX950-NEXT: global_store_dword v[0:1], v5, off 215; GFX950-NEXT: s_endpgm 216 %old = load i32, ptr addrspace(1) %out, align 4 217 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 2) 218 store i32 %cvt, ptr addrspace(1) %out, align 4 219 ret void 220} 221 222define amdgpu_ps void @test_cvt_scalef32_sr_fp8_bf16_dst_sel_3(ptr addrspace(1) %out, bfloat %src, i32 %seed, float %scale) { 223; GFX950-LABEL: test_cvt_scalef32_sr_fp8_bf16_dst_sel_3: 224; GFX950: ; %bb.0: 225; GFX950-NEXT: global_load_dword v5, v[0:1], off 226; GFX950-NEXT: s_waitcnt vmcnt(0) 227; GFX950-NEXT: v_cvt_scalef32_sr_fp8_bf16 v5, v2, v3, v4 op_sel:[0,0,1,1] 228; GFX950-NEXT: global_store_dword v[0:1], v5, off 229; GFX950-NEXT: s_endpgm 230 %old = load i32, ptr addrspace(1) %out, align 4 231 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.bf16(i32 %old, bfloat %src, i32 %seed, float %scale, i32 3) 232 store i32 %cvt, ptr addrspace(1) %out, align 4 233 ret void 234} 235 236define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f16_dst_sel_0(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) { 237; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f16_dst_sel_0: 238; GFX950: ; %bb.0: 239; GFX950-NEXT: global_load_dword v5, v[0:1], off 240; GFX950-NEXT: s_waitcnt vmcnt(0) 241; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f16 v5, v2, v3, v4 242; GFX950-NEXT: global_store_dword v[0:1], v5, off 243; GFX950-NEXT: s_endpgm 244 %old = load i32, ptr addrspace(1) %out, align 4 245 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 0) 246 store i32 %cvt, ptr addrspace(1) %out, align 4 247 ret void 248} 249 250define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f16_dst_sel_1(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) { 251; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f16_dst_sel_1: 252; GFX950: ; %bb.0: 253; GFX950-NEXT: global_load_dword v5, v[0:1], off 254; GFX950-NEXT: s_waitcnt vmcnt(0) 255; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f16 v5, v2, v3, v4 op_sel:[0,0,1,0] 256; GFX950-NEXT: global_store_dword v[0:1], v5, off 257; GFX950-NEXT: s_endpgm 258 %old = load i32, ptr addrspace(1) %out, align 4 259 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 1) 260 store i32 %cvt, ptr addrspace(1) %out, align 4 261 ret void 262} 263 264define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f16_dst_sel_2(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) { 265; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f16_dst_sel_2: 266; GFX950: ; %bb.0: 267; GFX950-NEXT: global_load_dword v5, v[0:1], off 268; GFX950-NEXT: s_waitcnt vmcnt(0) 269; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f16 v5, v2, v3, v4 op_sel:[0,0,0,1] 270; GFX950-NEXT: global_store_dword v[0:1], v5, off 271; GFX950-NEXT: s_endpgm 272 %old = load i32, ptr addrspace(1) %out, align 4 273 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 2) 274 store i32 %cvt, ptr addrspace(1) %out, align 4 275 ret void 276} 277 278define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f16_dst_sel_3(ptr addrspace(1) %out, half %src, i32 %seed, float %scale) { 279; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f16_dst_sel_3: 280; GFX950: ; %bb.0: 281; GFX950-NEXT: global_load_dword v5, v[0:1], off 282; GFX950-NEXT: s_waitcnt vmcnt(0) 283; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f16 v5, v2, v3, v4 op_sel:[0,0,1,1] 284; GFX950-NEXT: global_store_dword v[0:1], v5, off 285; GFX950-NEXT: s_endpgm 286 %old = load i32, ptr addrspace(1) %out, align 4 287 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f16(i32 %old, half %src, i32 %seed, float %scale, i32 3) 288 store i32 %cvt, ptr addrspace(1) %out, align 4 289 ret void 290} 291 292define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f32_dst_sel_0(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) { 293; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f32_dst_sel_0: 294; GFX950: ; %bb.0: 295; GFX950-NEXT: global_load_dword v5, v[0:1], off 296; GFX950-NEXT: s_waitcnt vmcnt(0) 297; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f32 v5, v2, v3, v4 298; GFX950-NEXT: global_store_dword v[0:1], v5, off 299; GFX950-NEXT: s_endpgm 300 %old = load i32, ptr addrspace(1) %out, align 4 301 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 0) 302 store i32 %cvt, ptr addrspace(1) %out, align 4 303 ret void 304} 305 306define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f32_dst_sel_1(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) { 307; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f32_dst_sel_1: 308; GFX950: ; %bb.0: 309; GFX950-NEXT: global_load_dword v5, v[0:1], off 310; GFX950-NEXT: s_waitcnt vmcnt(0) 311; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f32 v5, v2, v3, v4 op_sel:[0,0,1,0] 312; GFX950-NEXT: global_store_dword v[0:1], v5, off 313; GFX950-NEXT: s_endpgm 314 %old = load i32, ptr addrspace(1) %out, align 4 315 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 1) 316 store i32 %cvt, ptr addrspace(1) %out, align 4 317 ret void 318} 319 320define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f32_dst_sel_2(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) { 321; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f32_dst_sel_2: 322; GFX950: ; %bb.0: 323; GFX950-NEXT: global_load_dword v5, v[0:1], off 324; GFX950-NEXT: s_waitcnt vmcnt(0) 325; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f32 v5, v2, v3, v4 op_sel:[0,0,0,1] 326; GFX950-NEXT: global_store_dword v[0:1], v5, off 327; GFX950-NEXT: s_endpgm 328 %old = load i32, ptr addrspace(1) %out, align 4 329 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 2) 330 store i32 %cvt, ptr addrspace(1) %out, align 4 331 ret void 332} 333 334define amdgpu_ps void @test_cvt_scalef32_sr_fp8_f32_dst_sel_3(ptr addrspace(1) %out, float %src, i32 %seed, float %scale) { 335; GFX950-LABEL: test_cvt_scalef32_sr_fp8_f32_dst_sel_3: 336; GFX950: ; %bb.0: 337; GFX950-NEXT: global_load_dword v5, v[0:1], off 338; GFX950-NEXT: s_waitcnt vmcnt(0) 339; GFX950-NEXT: v_cvt_scalef32_sr_fp8_f32 v5, v2, v3, v4 op_sel:[0,0,1,1] 340; GFX950-NEXT: global_store_dword v[0:1], v5, off 341; GFX950-NEXT: s_endpgm 342 %old = load i32, ptr addrspace(1) %out, align 4 343 %cvt = call i32 @llvm.amdgcn.cvt.scalef32.sr.fp8.f32(i32 %old, float %src, i32 %seed, float %scale, i32 3) 344 store i32 %cvt, ptr addrspace(1) %out, align 4 345 ret void 346} 347