1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX950-SDAG %s 3; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX950-GISEL %s 4 5declare <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.bf16(<32 x bfloat> %src, float %scale) 6declare <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.f16(<32 x half> %src, float %scale) 7declare <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.bf16(<32 x bfloat> %src, float %scale) 8declare <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.f16(<32 x half> %src, float %scale) 9 10define amdgpu_ps void @test_scalef32_pk32_bf6_bf16_vv(<32 x bfloat> %src, float %scale, ptr addrspace(1) %out) { 11; GFX950-SDAG-LABEL: test_scalef32_pk32_bf6_bf16_vv: 12; GFX950-SDAG: ; %bb.0: 13; GFX950-SDAG-NEXT: v_mov_b32_e32 v25, v18 14; GFX950-SDAG-NEXT: v_mov_b32_e32 v24, v17 15; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf6_bf16 v[18:23], v[0:15], v16 16; GFX950-SDAG-NEXT: global_store_dwordx2 v[24:25], v[22:23], off offset:16 17; GFX950-SDAG-NEXT: global_store_dwordx4 v[24:25], v[18:21], off 18; GFX950-SDAG-NEXT: s_endpgm 19; 20; GFX950-GISEL-LABEL: test_scalef32_pk32_bf6_bf16_vv: 21; GFX950-GISEL: ; %bb.0: 22; GFX950-GISEL-NEXT: v_mov_b32_e32 v25, v18 23; GFX950-GISEL-NEXT: v_mov_b32_e32 v24, v17 24; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_bf6_bf16 v[18:23], v[0:15], v16 25; GFX950-GISEL-NEXT: global_store_dwordx2 v[24:25], v[22:23], off offset:16 26; GFX950-GISEL-NEXT: global_store_dwordx4 v[24:25], v[18:21], off 27; GFX950-GISEL-NEXT: s_endpgm 28 %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.bf16(<32 x bfloat> %src, float %scale) 29 store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 30 ret void 31} 32 33define amdgpu_ps void @test_scalef32_pk32_bf6_bf16_sl(<32 x bfloat> inreg %src, ptr addrspace(1) %out) { 34; GFX950-SDAG-LABEL: test_scalef32_pk32_bf6_bf16_sl: 35; GFX950-SDAG: ; %bb.0: 36; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, s0 37; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, s1 38; GFX950-SDAG-NEXT: v_mov_b32_e32 v4, s2 39; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, s3 40; GFX950-SDAG-NEXT: v_mov_b32_e32 v6, s4 41; GFX950-SDAG-NEXT: v_mov_b32_e32 v7, s5 42; GFX950-SDAG-NEXT: v_mov_b32_e32 v8, s6 43; GFX950-SDAG-NEXT: v_mov_b32_e32 v9, s7 44; GFX950-SDAG-NEXT: v_mov_b32_e32 v10, s8 45; GFX950-SDAG-NEXT: v_mov_b32_e32 v11, s9 46; GFX950-SDAG-NEXT: v_mov_b32_e32 v12, s10 47; GFX950-SDAG-NEXT: v_mov_b32_e32 v13, s11 48; GFX950-SDAG-NEXT: v_mov_b32_e32 v14, s12 49; GFX950-SDAG-NEXT: v_mov_b32_e32 v15, s13 50; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, s14 51; GFX950-SDAG-NEXT: v_mov_b32_e32 v17, s15 52; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x42c80000 53; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf6_bf16 v[18:23], v[2:17], s0 54; GFX950-SDAG-NEXT: global_store_dwordx2 v[0:1], v[22:23], off offset:16 55; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[18:21], off 56; GFX950-SDAG-NEXT: s_endpgm 57; 58; GFX950-GISEL-LABEL: test_scalef32_pk32_bf6_bf16_sl: 59; GFX950-GISEL: ; %bb.0: 60; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, s0 61; GFX950-GISEL-NEXT: v_mov_b32_e32 v3, s1 62; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, s2 63; GFX950-GISEL-NEXT: v_mov_b32_e32 v5, s3 64; GFX950-GISEL-NEXT: v_mov_b32_e32 v6, s4 65; GFX950-GISEL-NEXT: v_mov_b32_e32 v7, s5 66; GFX950-GISEL-NEXT: v_mov_b32_e32 v8, s6 67; GFX950-GISEL-NEXT: v_mov_b32_e32 v9, s7 68; GFX950-GISEL-NEXT: v_mov_b32_e32 v10, s8 69; GFX950-GISEL-NEXT: v_mov_b32_e32 v11, s9 70; GFX950-GISEL-NEXT: v_mov_b32_e32 v12, s10 71; GFX950-GISEL-NEXT: v_mov_b32_e32 v13, s11 72; GFX950-GISEL-NEXT: v_mov_b32_e32 v14, s12 73; GFX950-GISEL-NEXT: v_mov_b32_e32 v15, s13 74; GFX950-GISEL-NEXT: v_mov_b32_e32 v16, s14 75; GFX950-GISEL-NEXT: v_mov_b32_e32 v17, s15 76; GFX950-GISEL-NEXT: s_mov_b32 s0, 0x42c80000 77; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_bf6_bf16 v[18:23], v[2:17], s0 78; GFX950-GISEL-NEXT: global_store_dwordx2 v[0:1], v[22:23], off offset:16 79; GFX950-GISEL-NEXT: global_store_dwordx4 v[0:1], v[18:21], off 80; GFX950-GISEL-NEXT: s_endpgm 81 %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.bf16(<32 x bfloat> %src, float 100.0) 82 store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 83 ret void 84} 85 86define amdgpu_ps void @test_scalef32_pk32_bf6_f16_vv(<32 x half> %src, float %scale, ptr addrspace(1) %out) { 87; GFX950-SDAG-LABEL: test_scalef32_pk32_bf6_f16_vv: 88; GFX950-SDAG: ; %bb.0: 89; GFX950-SDAG-NEXT: v_mov_b32_e32 v25, v18 90; GFX950-SDAG-NEXT: v_mov_b32_e32 v24, v17 91; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf6_f16 v[18:23], v[0:15], v16 92; GFX950-SDAG-NEXT: global_store_dwordx2 v[24:25], v[22:23], off offset:16 93; GFX950-SDAG-NEXT: global_store_dwordx4 v[24:25], v[18:21], off 94; GFX950-SDAG-NEXT: s_endpgm 95; 96; GFX950-GISEL-LABEL: test_scalef32_pk32_bf6_f16_vv: 97; GFX950-GISEL: ; %bb.0: 98; GFX950-GISEL-NEXT: v_mov_b32_e32 v24, v17 99; GFX950-GISEL-NEXT: v_mov_b32_e32 v25, v18 100; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_bf6_f16 v[18:23], v[0:15], v16 101; GFX950-GISEL-NEXT: global_store_dwordx4 v[24:25], v[18:21], off 102; GFX950-GISEL-NEXT: global_store_dwordx2 v[24:25], v[22:23], off offset:16 103; GFX950-GISEL-NEXT: s_endpgm 104 %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.f16(<32 x half> %src, float %scale) 105 store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 106 ret void 107} 108 109define amdgpu_ps void @test_scalef32_pk32_bf6_f16_sl(<32 x half> inreg %src, ptr addrspace(1) %out) { 110; GFX950-SDAG-LABEL: test_scalef32_pk32_bf6_f16_sl: 111; GFX950-SDAG: ; %bb.0: 112; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, s0 113; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, s1 114; GFX950-SDAG-NEXT: v_mov_b32_e32 v4, s2 115; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, s3 116; GFX950-SDAG-NEXT: v_mov_b32_e32 v6, s4 117; GFX950-SDAG-NEXT: v_mov_b32_e32 v7, s5 118; GFX950-SDAG-NEXT: v_mov_b32_e32 v8, s6 119; GFX950-SDAG-NEXT: v_mov_b32_e32 v9, s7 120; GFX950-SDAG-NEXT: v_mov_b32_e32 v10, s8 121; GFX950-SDAG-NEXT: v_mov_b32_e32 v11, s9 122; GFX950-SDAG-NEXT: v_mov_b32_e32 v12, s10 123; GFX950-SDAG-NEXT: v_mov_b32_e32 v13, s11 124; GFX950-SDAG-NEXT: v_mov_b32_e32 v14, s12 125; GFX950-SDAG-NEXT: v_mov_b32_e32 v15, s13 126; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, s14 127; GFX950-SDAG-NEXT: v_mov_b32_e32 v17, s15 128; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x42c80000 129; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_bf6_f16 v[18:23], v[2:17], s0 130; GFX950-SDAG-NEXT: global_store_dwordx2 v[0:1], v[22:23], off offset:16 131; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[18:21], off 132; GFX950-SDAG-NEXT: s_endpgm 133; 134; GFX950-GISEL-LABEL: test_scalef32_pk32_bf6_f16_sl: 135; GFX950-GISEL: ; %bb.0: 136; GFX950-GISEL-NEXT: v_mov_b64_e32 v[16:17], s[14:15] 137; GFX950-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[12:13] 138; GFX950-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[10:11] 139; GFX950-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[8:9] 140; GFX950-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7] 141; GFX950-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5] 142; GFX950-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3] 143; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] 144; GFX950-GISEL-NEXT: v_mov_b32_e32 v24, 0x42c80000 145; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_bf6_f16 v[18:23], v[2:17], v24 146; GFX950-GISEL-NEXT: global_store_dwordx4 v[0:1], v[18:21], off 147; GFX950-GISEL-NEXT: global_store_dwordx2 v[0:1], v[22:23], off offset:16 148; GFX950-GISEL-NEXT: s_endpgm 149 %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.bf6.f16(<32 x half> %src, float 100.0) 150 store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 151 ret void 152} 153 154define amdgpu_ps void @test_scalef32_pk32_fp6_bf16_vv(<32 x bfloat> %src, float %scale, ptr addrspace(1) %out) { 155; GFX950-SDAG-LABEL: test_scalef32_pk32_fp6_bf16_vv: 156; GFX950-SDAG: ; %bb.0: 157; GFX950-SDAG-NEXT: v_mov_b32_e32 v25, v18 158; GFX950-SDAG-NEXT: v_mov_b32_e32 v24, v17 159; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_fp6_bf16 v[18:23], v[0:15], v16 160; GFX950-SDAG-NEXT: global_store_dwordx2 v[24:25], v[22:23], off offset:16 161; GFX950-SDAG-NEXT: global_store_dwordx4 v[24:25], v[18:21], off 162; GFX950-SDAG-NEXT: s_endpgm 163; 164; GFX950-GISEL-LABEL: test_scalef32_pk32_fp6_bf16_vv: 165; GFX950-GISEL: ; %bb.0: 166; GFX950-GISEL-NEXT: v_mov_b32_e32 v25, v18 167; GFX950-GISEL-NEXT: v_mov_b32_e32 v24, v17 168; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_fp6_bf16 v[18:23], v[0:15], v16 169; GFX950-GISEL-NEXT: global_store_dwordx2 v[24:25], v[22:23], off offset:16 170; GFX950-GISEL-NEXT: global_store_dwordx4 v[24:25], v[18:21], off 171; GFX950-GISEL-NEXT: s_endpgm 172 %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.bf16(<32 x bfloat> %src, float %scale) 173 store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 174 ret void 175} 176 177define amdgpu_ps void @test_scalef32_pk32_fp6_bf16_sl(<32 x bfloat> inreg %src, ptr addrspace(1) %out) { 178; GFX950-SDAG-LABEL: test_scalef32_pk32_fp6_bf16_sl: 179; GFX950-SDAG: ; %bb.0: 180; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, s0 181; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, s1 182; GFX950-SDAG-NEXT: v_mov_b32_e32 v4, s2 183; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, s3 184; GFX950-SDAG-NEXT: v_mov_b32_e32 v6, s4 185; GFX950-SDAG-NEXT: v_mov_b32_e32 v7, s5 186; GFX950-SDAG-NEXT: v_mov_b32_e32 v8, s6 187; GFX950-SDAG-NEXT: v_mov_b32_e32 v9, s7 188; GFX950-SDAG-NEXT: v_mov_b32_e32 v10, s8 189; GFX950-SDAG-NEXT: v_mov_b32_e32 v11, s9 190; GFX950-SDAG-NEXT: v_mov_b32_e32 v12, s10 191; GFX950-SDAG-NEXT: v_mov_b32_e32 v13, s11 192; GFX950-SDAG-NEXT: v_mov_b32_e32 v14, s12 193; GFX950-SDAG-NEXT: v_mov_b32_e32 v15, s13 194; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, s14 195; GFX950-SDAG-NEXT: v_mov_b32_e32 v17, s15 196; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x42c80000 197; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_fp6_bf16 v[18:23], v[2:17], s0 198; GFX950-SDAG-NEXT: global_store_dwordx2 v[0:1], v[22:23], off offset:16 199; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[18:21], off 200; GFX950-SDAG-NEXT: s_endpgm 201; 202; GFX950-GISEL-LABEL: test_scalef32_pk32_fp6_bf16_sl: 203; GFX950-GISEL: ; %bb.0: 204; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, s0 205; GFX950-GISEL-NEXT: v_mov_b32_e32 v3, s1 206; GFX950-GISEL-NEXT: v_mov_b32_e32 v4, s2 207; GFX950-GISEL-NEXT: v_mov_b32_e32 v5, s3 208; GFX950-GISEL-NEXT: v_mov_b32_e32 v6, s4 209; GFX950-GISEL-NEXT: v_mov_b32_e32 v7, s5 210; GFX950-GISEL-NEXT: v_mov_b32_e32 v8, s6 211; GFX950-GISEL-NEXT: v_mov_b32_e32 v9, s7 212; GFX950-GISEL-NEXT: v_mov_b32_e32 v10, s8 213; GFX950-GISEL-NEXT: v_mov_b32_e32 v11, s9 214; GFX950-GISEL-NEXT: v_mov_b32_e32 v12, s10 215; GFX950-GISEL-NEXT: v_mov_b32_e32 v13, s11 216; GFX950-GISEL-NEXT: v_mov_b32_e32 v14, s12 217; GFX950-GISEL-NEXT: v_mov_b32_e32 v15, s13 218; GFX950-GISEL-NEXT: v_mov_b32_e32 v16, s14 219; GFX950-GISEL-NEXT: v_mov_b32_e32 v17, s15 220; GFX950-GISEL-NEXT: s_mov_b32 s0, 0x42c80000 221; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_fp6_bf16 v[18:23], v[2:17], s0 222; GFX950-GISEL-NEXT: global_store_dwordx2 v[0:1], v[22:23], off offset:16 223; GFX950-GISEL-NEXT: global_store_dwordx4 v[0:1], v[18:21], off 224; GFX950-GISEL-NEXT: s_endpgm 225 %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.bf16(<32 x bfloat> %src, float 100.0) 226 store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 227 ret void 228} 229 230define amdgpu_ps void @test_scalef32_pk32_fp6_f16_vv(<32 x half> %src, float %scale, ptr addrspace(1) %out) { 231; GFX950-SDAG-LABEL: test_scalef32_pk32_fp6_f16_vv: 232; GFX950-SDAG: ; %bb.0: 233; GFX950-SDAG-NEXT: v_mov_b32_e32 v25, v18 234; GFX950-SDAG-NEXT: v_mov_b32_e32 v24, v17 235; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_fp6_f16 v[18:23], v[0:15], v16 236; GFX950-SDAG-NEXT: global_store_dwordx2 v[24:25], v[22:23], off offset:16 237; GFX950-SDAG-NEXT: global_store_dwordx4 v[24:25], v[18:21], off 238; GFX950-SDAG-NEXT: s_endpgm 239; 240; GFX950-GISEL-LABEL: test_scalef32_pk32_fp6_f16_vv: 241; GFX950-GISEL: ; %bb.0: 242; GFX950-GISEL-NEXT: v_mov_b32_e32 v24, v17 243; GFX950-GISEL-NEXT: v_mov_b32_e32 v25, v18 244; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_fp6_f16 v[18:23], v[0:15], v16 245; GFX950-GISEL-NEXT: global_store_dwordx4 v[24:25], v[18:21], off 246; GFX950-GISEL-NEXT: global_store_dwordx2 v[24:25], v[22:23], off offset:16 247; GFX950-GISEL-NEXT: s_endpgm 248 %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.f16(<32 x half> %src, float %scale) 249 store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 250 ret void 251} 252 253define amdgpu_ps void @test_scalef32_pk32_fp6_f16_sl(<32 x half> inreg %src, ptr addrspace(1) %out) { 254; GFX950-SDAG-LABEL: test_scalef32_pk32_fp6_f16_sl: 255; GFX950-SDAG: ; %bb.0: 256; GFX950-SDAG-NEXT: v_mov_b32_e32 v2, s0 257; GFX950-SDAG-NEXT: v_mov_b32_e32 v3, s1 258; GFX950-SDAG-NEXT: v_mov_b32_e32 v4, s2 259; GFX950-SDAG-NEXT: v_mov_b32_e32 v5, s3 260; GFX950-SDAG-NEXT: v_mov_b32_e32 v6, s4 261; GFX950-SDAG-NEXT: v_mov_b32_e32 v7, s5 262; GFX950-SDAG-NEXT: v_mov_b32_e32 v8, s6 263; GFX950-SDAG-NEXT: v_mov_b32_e32 v9, s7 264; GFX950-SDAG-NEXT: v_mov_b32_e32 v10, s8 265; GFX950-SDAG-NEXT: v_mov_b32_e32 v11, s9 266; GFX950-SDAG-NEXT: v_mov_b32_e32 v12, s10 267; GFX950-SDAG-NEXT: v_mov_b32_e32 v13, s11 268; GFX950-SDAG-NEXT: v_mov_b32_e32 v14, s12 269; GFX950-SDAG-NEXT: v_mov_b32_e32 v15, s13 270; GFX950-SDAG-NEXT: v_mov_b32_e32 v16, s14 271; GFX950-SDAG-NEXT: v_mov_b32_e32 v17, s15 272; GFX950-SDAG-NEXT: s_mov_b32 s0, 0x42c80000 273; GFX950-SDAG-NEXT: v_cvt_scalef32_pk32_fp6_f16 v[18:23], v[2:17], s0 274; GFX950-SDAG-NEXT: global_store_dwordx2 v[0:1], v[22:23], off offset:16 275; GFX950-SDAG-NEXT: global_store_dwordx4 v[0:1], v[18:21], off 276; GFX950-SDAG-NEXT: s_endpgm 277; 278; GFX950-GISEL-LABEL: test_scalef32_pk32_fp6_f16_sl: 279; GFX950-GISEL: ; %bb.0: 280; GFX950-GISEL-NEXT: v_mov_b64_e32 v[16:17], s[14:15] 281; GFX950-GISEL-NEXT: v_mov_b64_e32 v[14:15], s[12:13] 282; GFX950-GISEL-NEXT: v_mov_b64_e32 v[12:13], s[10:11] 283; GFX950-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[8:9] 284; GFX950-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7] 285; GFX950-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5] 286; GFX950-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3] 287; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] 288; GFX950-GISEL-NEXT: v_mov_b32_e32 v24, 0x42c80000 289; GFX950-GISEL-NEXT: v_cvt_scalef32_pk32_fp6_f16 v[18:23], v[2:17], v24 290; GFX950-GISEL-NEXT: global_store_dwordx4 v[0:1], v[18:21], off 291; GFX950-GISEL-NEXT: global_store_dwordx2 v[0:1], v[22:23], off offset:16 292; GFX950-GISEL-NEXT: s_endpgm 293 %cvt = tail call <6 x i32> @llvm.amdgcn.cvt.scalef32.pk32.fp6.f16(<32 x half> %src, float 100.0) 294 store <6 x i32> %cvt, ptr addrspace(1) %out, align 8 295 ret void 296} 297