1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v,+m,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s 3; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v,+m,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s 4; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v,+m,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s 5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v,+m,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s 6 7declare <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) 8 9define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) { 10; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f32: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 13; CHECK-NEXT: vfncvt.f.f.w v9, v8, v0.t 14; CHECK-NEXT: vmv1r.v v8, v9 15; CHECK-NEXT: ret 16 %v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %vl) 17 ret <vscale x 2 x half> %v 18} 19 20define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f32_unmasked(<vscale x 2 x float> %a, i32 zeroext %vl) { 21; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f32_unmasked: 22; CHECK: # %bb.0: 23; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 24; CHECK-NEXT: vfncvt.f.f.w v9, v8 25; CHECK-NEXT: vmv1r.v v8, v9 26; CHECK-NEXT: ret 27 %v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl) 28 ret <vscale x 2 x half> %v 29} 30 31declare <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32) 32 33define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) { 34; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f64: 35; CHECK: # %bb.0: 36; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 37; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8, v0.t 38; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 39; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t 40; CHECK-NEXT: ret 41 %v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 %vl) 42 ret <vscale x 2 x half> %v 43} 44 45define <vscale x 2 x half> @vfptrunc_nxv2f16_nxv2f64_unmasked(<vscale x 2 x double> %a, i32 zeroext %vl) { 46; CHECK-LABEL: vfptrunc_nxv2f16_nxv2f64_unmasked: 47; CHECK: # %bb.0: 48; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 49; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8 50; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 51; CHECK-NEXT: vfncvt.f.f.w v8, v10 52; CHECK-NEXT: ret 53 %v = call <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl) 54 ret <vscale x 2 x half> %v 55} 56 57declare <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double>, <vscale x 2 x i1>, i32) 58 59define <vscale x 2 x float> @vfptrunc_nxv2f32_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) { 60; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f64: 61; CHECK: # %bb.0: 62; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 63; CHECK-NEXT: vfncvt.f.f.w v10, v8, v0.t 64; CHECK-NEXT: vmv.v.v v8, v10 65; CHECK-NEXT: ret 66 %v = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 %vl) 67 ret <vscale x 2 x float> %v 68} 69 70define <vscale x 2 x float> @vfptrunc_nxv2f32_nxv2f64_unmasked(<vscale x 2 x double> %a, i32 zeroext %vl) { 71; CHECK-LABEL: vfptrunc_nxv2f32_nxv2f64_unmasked: 72; CHECK: # %bb.0: 73; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 74; CHECK-NEXT: vfncvt.f.f.w v10, v8 75; CHECK-NEXT: vmv.v.v v8, v10 76; CHECK-NEXT: ret 77 %v = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f64.nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl) 78 ret <vscale x 2 x float> %v 79} 80 81declare <vscale x 7 x float> @llvm.vp.fptrunc.nxv7f64.nxv7f32(<vscale x 7 x double>, <vscale x 7 x i1>, i32) 82 83define <vscale x 7 x float> @vfptrunc_nxv7f32_nxv7f64(<vscale x 7 x double> %a, <vscale x 7 x i1> %m, i32 zeroext %vl) { 84; CHECK-LABEL: vfptrunc_nxv7f32_nxv7f64: 85; CHECK: # %bb.0: 86; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 87; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t 88; CHECK-NEXT: vmv.v.v v8, v16 89; CHECK-NEXT: ret 90 %v = call <vscale x 7 x float> @llvm.vp.fptrunc.nxv7f64.nxv7f32(<vscale x 7 x double> %a, <vscale x 7 x i1> %m, i32 %vl) 91 ret <vscale x 7 x float> %v 92} 93 94declare <vscale x 16 x float> @llvm.vp.fptrunc.nxv16f64.nxv16f32(<vscale x 16 x double>, <vscale x 16 x i1>, i32) 95 96define <vscale x 16 x float> @vfptrunc_nxv16f32_nxv16f64(<vscale x 16 x double> %a, <vscale x 16 x i1> %m, i32 zeroext %vl) { 97; CHECK-LABEL: vfptrunc_nxv16f32_nxv16f64: 98; CHECK: # %bb.0: 99; CHECK-NEXT: addi sp, sp, -16 100; CHECK-NEXT: .cfi_def_cfa_offset 16 101; CHECK-NEXT: csrr a1, vlenb 102; CHECK-NEXT: slli a1, a1, 3 103; CHECK-NEXT: sub sp, sp, a1 104; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 105; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 106; CHECK-NEXT: vmv1r.v v7, v0 107; CHECK-NEXT: addi a1, sp, 16 108; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 109; CHECK-NEXT: csrr a1, vlenb 110; CHECK-NEXT: srli a2, a1, 3 111; CHECK-NEXT: sub a3, a0, a1 112; CHECK-NEXT: vslidedown.vx v0, v0, a2 113; CHECK-NEXT: sltu a2, a0, a3 114; CHECK-NEXT: addi a2, a2, -1 115; CHECK-NEXT: and a2, a2, a3 116; CHECK-NEXT: addi a3, sp, 16 117; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload 118; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma 119; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t 120; CHECK-NEXT: bltu a0, a1, .LBB7_2 121; CHECK-NEXT: # %bb.1: 122; CHECK-NEXT: mv a0, a1 123; CHECK-NEXT: .LBB7_2: 124; CHECK-NEXT: vmv1r.v v0, v7 125; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 126; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t 127; CHECK-NEXT: vmv8r.v v8, v16 128; CHECK-NEXT: csrr a0, vlenb 129; CHECK-NEXT: slli a0, a0, 3 130; CHECK-NEXT: add sp, sp, a0 131; CHECK-NEXT: .cfi_def_cfa sp, 16 132; CHECK-NEXT: addi sp, sp, 16 133; CHECK-NEXT: .cfi_def_cfa_offset 0 134; CHECK-NEXT: ret 135 %v = call <vscale x 16 x float> @llvm.vp.fptrunc.nxv16f64.nxv16f32(<vscale x 16 x double> %a, <vscale x 16 x i1> %m, i32 %vl) 136 ret <vscale x 16 x float> %v 137} 138 139declare <vscale x 32 x float> @llvm.vp.fptrunc.nxv32f64.nxv32f32(<vscale x 32 x double>, <vscale x 32 x i1>, i32) 140 141define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double> %a, <vscale x 32 x i1> %m, i32 zeroext %vl) { 142; CHECK-LABEL: vfptrunc_nxv32f32_nxv32f64: 143; CHECK: # %bb.0: 144; CHECK-NEXT: addi sp, sp, -16 145; CHECK-NEXT: .cfi_def_cfa_offset 16 146; CHECK-NEXT: csrr a1, vlenb 147; CHECK-NEXT: slli a1, a1, 4 148; CHECK-NEXT: sub sp, sp, a1 149; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 150; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma 151; CHECK-NEXT: vmv1r.v v7, v0 152; CHECK-NEXT: addi a1, sp, 16 153; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 154; CHECK-NEXT: csrr a1, vlenb 155; CHECK-NEXT: slli a1, a1, 3 156; CHECK-NEXT: add a1, sp, a1 157; CHECK-NEXT: addi a1, a1, 16 158; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 159; CHECK-NEXT: csrr a1, vlenb 160; CHECK-NEXT: srli a3, a1, 3 161; CHECK-NEXT: srli a5, a1, 2 162; CHECK-NEXT: slli a6, a1, 3 163; CHECK-NEXT: slli a4, a1, 1 164; CHECK-NEXT: vslidedown.vx v16, v0, a5 165; CHECK-NEXT: add a6, a0, a6 166; CHECK-NEXT: sub a5, a2, a4 167; CHECK-NEXT: vl8re64.v v24, (a6) 168; CHECK-NEXT: sltu a6, a2, a5 169; CHECK-NEXT: addi a6, a6, -1 170; CHECK-NEXT: and a5, a6, a5 171; CHECK-NEXT: sub a6, a5, a1 172; CHECK-NEXT: sltu a7, a5, a6 173; CHECK-NEXT: addi a7, a7, -1 174; CHECK-NEXT: vl8re64.v v8, (a0) 175; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 176; CHECK-NEXT: vslidedown.vx v0, v16, a3 177; CHECK-NEXT: and a0, a7, a6 178; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 179; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t 180; CHECK-NEXT: bltu a5, a1, .LBB8_2 181; CHECK-NEXT: # %bb.1: 182; CHECK-NEXT: mv a5, a1 183; CHECK-NEXT: .LBB8_2: 184; CHECK-NEXT: vmv1r.v v0, v16 185; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 186; CHECK-NEXT: vslidedown.vx v6, v7, a3 187; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma 188; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t 189; CHECK-NEXT: bltu a2, a4, .LBB8_4 190; CHECK-NEXT: # %bb.3: 191; CHECK-NEXT: mv a2, a4 192; CHECK-NEXT: .LBB8_4: 193; CHECK-NEXT: sub a0, a2, a1 194; CHECK-NEXT: sltu a3, a2, a0 195; CHECK-NEXT: addi a3, a3, -1 196; CHECK-NEXT: and a0, a3, a0 197; CHECK-NEXT: vmv1r.v v0, v6 198; CHECK-NEXT: addi a3, sp, 16 199; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 200; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 201; CHECK-NEXT: vfncvt.f.f.w v28, v8, v0.t 202; CHECK-NEXT: bltu a2, a1, .LBB8_6 203; CHECK-NEXT: # %bb.5: 204; CHECK-NEXT: mv a2, a1 205; CHECK-NEXT: .LBB8_6: 206; CHECK-NEXT: vmv1r.v v0, v7 207; CHECK-NEXT: csrr a0, vlenb 208; CHECK-NEXT: slli a0, a0, 3 209; CHECK-NEXT: add a0, sp, a0 210; CHECK-NEXT: addi a0, a0, 16 211; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 212; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma 213; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t 214; CHECK-NEXT: vmv8r.v v8, v24 215; CHECK-NEXT: csrr a0, vlenb 216; CHECK-NEXT: slli a0, a0, 4 217; CHECK-NEXT: add sp, sp, a0 218; CHECK-NEXT: .cfi_def_cfa sp, 16 219; CHECK-NEXT: addi sp, sp, 16 220; CHECK-NEXT: .cfi_def_cfa_offset 0 221; CHECK-NEXT: ret 222 %v = call <vscale x 32 x float> @llvm.vp.fptrunc.nxv32f64.nxv32f32(<vscale x 32 x double> %a, <vscale x 32 x i1> %m, i32 %vl) 223 ret <vscale x 32 x float> %v 224} 225 226declare <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) 227 228define <vscale x 2 x bfloat> @vfptrunc_nxv2bf16_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) { 229; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f32: 230; CHECK: # %bb.0: 231; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 232; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8, v0.t 233; CHECK-NEXT: vmv1r.v v8, v9 234; CHECK-NEXT: ret 235 %v = call <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %vl) 236 ret <vscale x 2 x bfloat> %v 237} 238 239define <vscale x 2 x bfloat> @vfptrunc_nxv2bf16_nxv2f32_unmasked(<vscale x 2 x float> %a, i32 zeroext %vl) { 240; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f32_unmasked: 241; CHECK: # %bb.0: 242; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 243; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8 244; CHECK-NEXT: vmv1r.v v8, v9 245; CHECK-NEXT: ret 246 %v = call <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl) 247 ret <vscale x 2 x bfloat> %v 248} 249 250declare <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32) 251 252define <vscale x 2 x bfloat> @vfptrunc_nxv2bf16_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) { 253; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f64: 254; CHECK: # %bb.0: 255; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 256; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8, v0.t 257; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 258; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t 259; CHECK-NEXT: ret 260 %v = call <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 %vl) 261 ret <vscale x 2 x bfloat> %v 262} 263 264define <vscale x 2 x bfloat> @vfptrunc_nxv2bf16_nxv2f64_unmasked(<vscale x 2 x double> %a, i32 zeroext %vl) { 265; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f64_unmasked: 266; CHECK: # %bb.0: 267; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 268; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8 269; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 270; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 271; CHECK-NEXT: ret 272 %v = call <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl) 273 ret <vscale x 2 x bfloat> %v 274} 275