1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 3; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 4; RUN: --check-prefixes=CHECK,ZVFH 5; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \ 6; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 7; RUN: --check-prefixes=CHECK,ZVFH 8; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 9; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \ 10; RUN: --check-prefixes=CHECK,ZVFHMIN 11; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \ 12; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \ 13; RUN: --check-prefixes=CHECK,ZVFHMIN 14 15declare <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32) 16 17define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { 18; CHECK-LABEL: vfadd_vv_nxv1bf16: 19; CHECK: # %bb.0: 20; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 21; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9, v0.t 22; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t 23; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 24; CHECK-NEXT: vfadd.vv v9, v9, v10, v0.t 25; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 26; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t 27; CHECK-NEXT: ret 28 %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> %m, i32 %evl) 29 ret <vscale x 1 x bfloat> %v 30} 31 32define <vscale x 1 x bfloat> @vfadd_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, i32 zeroext %evl) { 33; CHECK-LABEL: vfadd_vv_nxv1bf16_unmasked: 34; CHECK: # %bb.0: 35; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 36; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 37; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 38; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 39; CHECK-NEXT: vfadd.vv v9, v9, v10 40; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 41; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 42; CHECK-NEXT: ret 43 %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl) 44 ret <vscale x 1 x bfloat> %v 45} 46 47define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { 48; CHECK-LABEL: vfadd_vf_nxv1bf16: 49; CHECK: # %bb.0: 50; CHECK-NEXT: fmv.x.h a1, fa0 51; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 52; CHECK-NEXT: vmv.v.x v9, a1 53; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t 54; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t 55; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 56; CHECK-NEXT: vfadd.vv v9, v10, v8, v0.t 57; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 58; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t 59; CHECK-NEXT: ret 60 %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 61 %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer 62 %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl) 63 ret <vscale x 1 x bfloat> %v 64} 65 66define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16_commute(<vscale x 1 x bfloat> %va, bfloat %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { 67; CHECK-LABEL: vfadd_vf_nxv1bf16_commute: 68; CHECK: # %bb.0: 69; CHECK-NEXT: fmv.x.h a1, fa0 70; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 71; CHECK-NEXT: vmv.v.x v9, a1 72; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t 73; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t 74; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 75; CHECK-NEXT: vfadd.vv v9, v8, v10, v0.t 76; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 77; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t 78; CHECK-NEXT: ret 79 %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 80 %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer 81 %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl) 82 ret <vscale x 1 x bfloat> %v 83} 84 85define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, bfloat %b, i32 zeroext %evl) { 86; CHECK-LABEL: vfadd_vf_nxv1bf16_unmasked: 87; CHECK: # %bb.0: 88; CHECK-NEXT: fmv.x.h a1, fa0 89; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 90; CHECK-NEXT: vmv.v.x v9, a1 91; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 92; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 93; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 94; CHECK-NEXT: vfadd.vv v9, v10, v8 95; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 96; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 97; CHECK-NEXT: ret 98 %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 99 %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer 100 %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) 101 ret <vscale x 1 x bfloat> %v 102} 103 104define <vscale x 1 x bfloat> @vfadd_vf_nxv1bf16_unmasked_commute(<vscale x 1 x bfloat> %va, bfloat %b, i32 zeroext %evl) { 105; CHECK-LABEL: vfadd_vf_nxv1bf16_unmasked_commute: 106; CHECK: # %bb.0: 107; CHECK-NEXT: fmv.x.h a1, fa0 108; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 109; CHECK-NEXT: vmv.v.x v9, a1 110; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 111; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 112; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 113; CHECK-NEXT: vfadd.vv v9, v8, v10 114; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 115; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 116; CHECK-NEXT: ret 117 %elt.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0 118 %vb = shufflevector <vscale x 1 x bfloat> %elt.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer 119 %v = call <vscale x 1 x bfloat> @llvm.vp.fadd.nxv1bf16(<vscale x 1 x bfloat> %vb, <vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 120 ret <vscale x 1 x bfloat> %v 121} 122 123declare <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32) 124 125define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { 126; CHECK-LABEL: vfadd_vv_nxv2bf16: 127; CHECK: # %bb.0: 128; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 129; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9, v0.t 130; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t 131; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 132; CHECK-NEXT: vfadd.vv v9, v9, v10, v0.t 133; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 134; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t 135; CHECK-NEXT: ret 136 %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> %m, i32 %evl) 137 ret <vscale x 2 x bfloat> %v 138} 139 140define <vscale x 2 x bfloat> @vfadd_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, i32 zeroext %evl) { 141; CHECK-LABEL: vfadd_vv_nxv2bf16_unmasked: 142; CHECK: # %bb.0: 143; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 144; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 145; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 146; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 147; CHECK-NEXT: vfadd.vv v9, v9, v10 148; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 149; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 150; CHECK-NEXT: ret 151 %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl) 152 ret <vscale x 2 x bfloat> %v 153} 154 155define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { 156; CHECK-LABEL: vfadd_vf_nxv2bf16: 157; CHECK: # %bb.0: 158; CHECK-NEXT: fmv.x.h a1, fa0 159; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 160; CHECK-NEXT: vmv.v.x v9, a1 161; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t 162; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9, v0.t 163; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 164; CHECK-NEXT: vfadd.vv v9, v10, v8, v0.t 165; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 166; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t 167; CHECK-NEXT: ret 168 %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 169 %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer 170 %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl) 171 ret <vscale x 2 x bfloat> %v 172} 173 174define <vscale x 2 x bfloat> @vfadd_vf_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, bfloat %b, i32 zeroext %evl) { 175; CHECK-LABEL: vfadd_vf_nxv2bf16_unmasked: 176; CHECK: # %bb.0: 177; CHECK-NEXT: fmv.x.h a1, fa0 178; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 179; CHECK-NEXT: vmv.v.x v9, a1 180; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 181; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 182; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 183; CHECK-NEXT: vfadd.vv v9, v10, v8 184; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 185; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 186; CHECK-NEXT: ret 187 %elt.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0 188 %vb = shufflevector <vscale x 2 x bfloat> %elt.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer 189 %v = call <vscale x 2 x bfloat> @llvm.vp.fadd.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) 190 ret <vscale x 2 x bfloat> %v 191} 192 193declare <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32) 194 195define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { 196; CHECK-LABEL: vfadd_vv_nxv4bf16: 197; CHECK: # %bb.0: 198; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 199; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9, v0.t 200; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t 201; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 202; CHECK-NEXT: vfadd.vv v10, v12, v10, v0.t 203; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 204; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t 205; CHECK-NEXT: ret 206 %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> %m, i32 %evl) 207 ret <vscale x 4 x bfloat> %v 208} 209 210define <vscale x 4 x bfloat> @vfadd_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, i32 zeroext %evl) { 211; CHECK-LABEL: vfadd_vv_nxv4bf16_unmasked: 212; CHECK: # %bb.0: 213; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 214; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 215; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 216; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 217; CHECK-NEXT: vfadd.vv v10, v12, v10 218; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 219; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 220; CHECK-NEXT: ret 221 %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl) 222 ret <vscale x 4 x bfloat> %v 223} 224 225define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { 226; CHECK-LABEL: vfadd_vf_nxv4bf16: 227; CHECK: # %bb.0: 228; CHECK-NEXT: fmv.x.h a1, fa0 229; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 230; CHECK-NEXT: vmv.v.x v9, a1 231; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t 232; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9, v0.t 233; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 234; CHECK-NEXT: vfadd.vv v10, v10, v12, v0.t 235; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 236; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t 237; CHECK-NEXT: ret 238 %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 239 %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer 240 %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl) 241 ret <vscale x 4 x bfloat> %v 242} 243 244define <vscale x 4 x bfloat> @vfadd_vf_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, bfloat %b, i32 zeroext %evl) { 245; CHECK-LABEL: vfadd_vf_nxv4bf16_unmasked: 246; CHECK: # %bb.0: 247; CHECK-NEXT: fmv.x.h a1, fa0 248; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 249; CHECK-NEXT: vmv.v.x v9, a1 250; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 251; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 252; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 253; CHECK-NEXT: vfadd.vv v10, v10, v12 254; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 255; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 256; CHECK-NEXT: ret 257 %elt.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0 258 %vb = shufflevector <vscale x 4 x bfloat> %elt.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer 259 %v = call <vscale x 4 x bfloat> @llvm.vp.fadd.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) 260 ret <vscale x 4 x bfloat> %v 261} 262 263declare <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32) 264 265define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { 266; CHECK-LABEL: vfadd_vv_nxv8bf16: 267; CHECK: # %bb.0: 268; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 269; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10, v0.t 270; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t 271; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 272; CHECK-NEXT: vfadd.vv v12, v16, v12, v0.t 273; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 274; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t 275; CHECK-NEXT: ret 276 %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> %m, i32 %evl) 277 ret <vscale x 8 x bfloat> %v 278} 279 280define <vscale x 8 x bfloat> @vfadd_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, i32 zeroext %evl) { 281; CHECK-LABEL: vfadd_vv_nxv8bf16_unmasked: 282; CHECK: # %bb.0: 283; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 284; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 285; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 286; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 287; CHECK-NEXT: vfadd.vv v12, v16, v12 288; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 289; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 290; CHECK-NEXT: ret 291 %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl) 292 ret <vscale x 8 x bfloat> %v 293} 294 295define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { 296; CHECK-LABEL: vfadd_vf_nxv8bf16: 297; CHECK: # %bb.0: 298; CHECK-NEXT: fmv.x.h a1, fa0 299; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 300; CHECK-NEXT: vmv.v.x v10, a1 301; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t 302; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10, v0.t 303; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 304; CHECK-NEXT: vfadd.vv v12, v12, v16, v0.t 305; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 306; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t 307; CHECK-NEXT: ret 308 %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 309 %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer 310 %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl) 311 ret <vscale x 8 x bfloat> %v 312} 313 314define <vscale x 8 x bfloat> @vfadd_vf_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, bfloat %b, i32 zeroext %evl) { 315; CHECK-LABEL: vfadd_vf_nxv8bf16_unmasked: 316; CHECK: # %bb.0: 317; CHECK-NEXT: fmv.x.h a1, fa0 318; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 319; CHECK-NEXT: vmv.v.x v10, a1 320; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 321; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 322; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 323; CHECK-NEXT: vfadd.vv v12, v12, v16 324; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 325; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 326; CHECK-NEXT: ret 327 %elt.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0 328 %vb = shufflevector <vscale x 8 x bfloat> %elt.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer 329 %v = call <vscale x 8 x bfloat> @llvm.vp.fadd.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) 330 ret <vscale x 8 x bfloat> %v 331} 332 333declare <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32) 334 335define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { 336; CHECK-LABEL: vfadd_vv_nxv16bf16: 337; CHECK: # %bb.0: 338; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 339; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t 340; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8, v0.t 341; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 342; CHECK-NEXT: vfadd.vv v16, v24, v16, v0.t 343; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 344; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t 345; CHECK-NEXT: ret 346 %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> %m, i32 %evl) 347 ret <vscale x 16 x bfloat> %v 348} 349 350define <vscale x 16 x bfloat> @vfadd_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, i32 zeroext %evl) { 351; CHECK-LABEL: vfadd_vv_nxv16bf16_unmasked: 352; CHECK: # %bb.0: 353; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 354; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 355; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 356; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 357; CHECK-NEXT: vfadd.vv v16, v24, v16 358; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 359; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 360; CHECK-NEXT: ret 361 %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl) 362 ret <vscale x 16 x bfloat> %v 363} 364 365define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { 366; CHECK-LABEL: vfadd_vf_nxv16bf16: 367; CHECK: # %bb.0: 368; CHECK-NEXT: fmv.x.h a1, fa0 369; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 370; CHECK-NEXT: vmv.v.x v12, a1 371; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t 372; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t 373; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 374; CHECK-NEXT: vfadd.vv v16, v16, v24, v0.t 375; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 376; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t 377; CHECK-NEXT: ret 378 %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 379 %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer 380 %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl) 381 ret <vscale x 16 x bfloat> %v 382} 383 384define <vscale x 16 x bfloat> @vfadd_vf_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, bfloat %b, i32 zeroext %evl) { 385; CHECK-LABEL: vfadd_vf_nxv16bf16_unmasked: 386; CHECK: # %bb.0: 387; CHECK-NEXT: fmv.x.h a1, fa0 388; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 389; CHECK-NEXT: vmv.v.x v12, a1 390; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 391; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12 392; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 393; CHECK-NEXT: vfadd.vv v16, v16, v24 394; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 395; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 396; CHECK-NEXT: ret 397 %elt.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0 398 %vb = shufflevector <vscale x 16 x bfloat> %elt.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer 399 %v = call <vscale x 16 x bfloat> @llvm.vp.fadd.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) 400 ret <vscale x 16 x bfloat> %v 401} 402 403declare <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32) 404 405define <vscale x 32 x bfloat> @vfadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { 406; CHECK-LABEL: vfadd_vv_nxv32bf16: 407; CHECK: # %bb.0: 408; CHECK-NEXT: addi sp, sp, -16 409; CHECK-NEXT: .cfi_def_cfa_offset 16 410; CHECK-NEXT: csrr a1, vlenb 411; CHECK-NEXT: slli a1, a1, 3 412; CHECK-NEXT: mv a2, a1 413; CHECK-NEXT: slli a1, a1, 1 414; CHECK-NEXT: add a1, a1, a2 415; CHECK-NEXT: sub sp, sp, a1 416; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb 417; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma 418; CHECK-NEXT: vmv1r.v v7, v0 419; CHECK-NEXT: csrr a1, vlenb 420; CHECK-NEXT: slli a1, a1, 4 421; CHECK-NEXT: add a1, sp, a1 422; CHECK-NEXT: addi a1, a1, 16 423; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 424; CHECK-NEXT: csrr a2, vlenb 425; CHECK-NEXT: slli a1, a2, 1 426; CHECK-NEXT: srli a2, a2, 2 427; CHECK-NEXT: sub a3, a0, a1 428; CHECK-NEXT: vslidedown.vx v0, v0, a2 429; CHECK-NEXT: sltu a2, a0, a3 430; CHECK-NEXT: addi a2, a2, -1 431; CHECK-NEXT: and a2, a2, a3 432; CHECK-NEXT: vmv4r.v v8, v16 433; CHECK-NEXT: csrr a3, vlenb 434; CHECK-NEXT: slli a3, a3, 3 435; CHECK-NEXT: add a3, sp, a3 436; CHECK-NEXT: addi a3, a3, 16 437; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 438; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 439; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t 440; CHECK-NEXT: addi a2, sp, 16 441; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 442; CHECK-NEXT: csrr a2, vlenb 443; CHECK-NEXT: slli a2, a2, 4 444; CHECK-NEXT: add a2, sp, a2 445; CHECK-NEXT: addi a2, a2, 16 446; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 447; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t 448; CHECK-NEXT: addi a2, sp, 16 449; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 450; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 451; CHECK-NEXT: vfadd.vv v16, v8, v16, v0.t 452; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 453; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16, v0.t 454; CHECK-NEXT: bltu a0, a1, .LBB22_2 455; CHECK-NEXT: # %bb.1: 456; CHECK-NEXT: mv a0, a1 457; CHECK-NEXT: .LBB22_2: 458; CHECK-NEXT: vmv1r.v v0, v7 459; CHECK-NEXT: csrr a1, vlenb 460; CHECK-NEXT: slli a1, a1, 3 461; CHECK-NEXT: add a1, sp, a1 462; CHECK-NEXT: addi a1, a1, 16 463; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 464; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 465; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t 466; CHECK-NEXT: addi a0, sp, 16 467; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 468; CHECK-NEXT: csrr a0, vlenb 469; CHECK-NEXT: slli a0, a0, 4 470; CHECK-NEXT: add a0, sp, a0 471; CHECK-NEXT: addi a0, a0, 16 472; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 473; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t 474; CHECK-NEXT: addi a0, sp, 16 475; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 476; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 477; CHECK-NEXT: vfadd.vv v16, v24, v16, v0.t 478; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 479; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t 480; CHECK-NEXT: csrr a0, vlenb 481; CHECK-NEXT: slli a0, a0, 3 482; CHECK-NEXT: mv a1, a0 483; CHECK-NEXT: slli a0, a0, 1 484; CHECK-NEXT: add a0, a0, a1 485; CHECK-NEXT: add sp, sp, a0 486; CHECK-NEXT: .cfi_def_cfa sp, 16 487; CHECK-NEXT: addi sp, sp, 16 488; CHECK-NEXT: .cfi_def_cfa_offset 0 489; CHECK-NEXT: ret 490 %v = call <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> %m, i32 %evl) 491 ret <vscale x 32 x bfloat> %v 492} 493 494define <vscale x 32 x bfloat> @vfadd_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, i32 zeroext %evl) { 495; CHECK-LABEL: vfadd_vv_nxv32bf16_unmasked: 496; CHECK: # %bb.0: 497; CHECK-NEXT: addi sp, sp, -16 498; CHECK-NEXT: .cfi_def_cfa_offset 16 499; CHECK-NEXT: csrr a1, vlenb 500; CHECK-NEXT: slli a1, a1, 3 501; CHECK-NEXT: sub sp, sp, a1 502; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 503; CHECK-NEXT: csrr a2, vlenb 504; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma 505; CHECK-NEXT: vmset.m v24 506; CHECK-NEXT: slli a1, a2, 1 507; CHECK-NEXT: srli a2, a2, 2 508; CHECK-NEXT: sub a3, a0, a1 509; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma 510; CHECK-NEXT: vslidedown.vx v0, v24, a2 511; CHECK-NEXT: sltu a2, a0, a3 512; CHECK-NEXT: addi a2, a2, -1 513; CHECK-NEXT: and a2, a2, a3 514; CHECK-NEXT: addi a3, sp, 16 515; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 516; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 517; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20, v0.t 518; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t 519; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 520; CHECK-NEXT: vfadd.vv v16, v16, v24, v0.t 521; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 522; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16, v0.t 523; CHECK-NEXT: bltu a0, a1, .LBB23_2 524; CHECK-NEXT: # %bb.1: 525; CHECK-NEXT: mv a0, a1 526; CHECK-NEXT: .LBB23_2: 527; CHECK-NEXT: addi a1, sp, 16 528; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 529; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 530; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24 531; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8 532; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 533; CHECK-NEXT: vfadd.vv v16, v24, v16 534; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 535; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 536; CHECK-NEXT: csrr a0, vlenb 537; CHECK-NEXT: slli a0, a0, 3 538; CHECK-NEXT: add sp, sp, a0 539; CHECK-NEXT: .cfi_def_cfa sp, 16 540; CHECK-NEXT: addi sp, sp, 16 541; CHECK-NEXT: .cfi_def_cfa_offset 0 542; CHECK-NEXT: ret 543 %v = call <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %b, <vscale x 32 x i1> splat (i1 true), i32 %evl) 544 ret <vscale x 32 x bfloat> %v 545} 546 547define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { 548; CHECK-LABEL: vfadd_vf_nxv32bf16: 549; CHECK: # %bb.0: 550; CHECK-NEXT: addi sp, sp, -16 551; CHECK-NEXT: .cfi_def_cfa_offset 16 552; CHECK-NEXT: csrr a1, vlenb 553; CHECK-NEXT: slli a1, a1, 3 554; CHECK-NEXT: mv a2, a1 555; CHECK-NEXT: slli a1, a1, 1 556; CHECK-NEXT: add a1, a1, a2 557; CHECK-NEXT: sub sp, sp, a1 558; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb 559; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma 560; CHECK-NEXT: vmv1r.v v7, v0 561; CHECK-NEXT: vmv8r.v v16, v8 562; CHECK-NEXT: fmv.x.h a1, fa0 563; CHECK-NEXT: csrr a2, vlenb 564; CHECK-NEXT: vmv.v.x v8, a1 565; CHECK-NEXT: slli a1, a2, 1 566; CHECK-NEXT: srli a2, a2, 2 567; CHECK-NEXT: sub a3, a0, a1 568; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma 569; CHECK-NEXT: vslidedown.vx v0, v0, a2 570; CHECK-NEXT: sltu a2, a0, a3 571; CHECK-NEXT: addi a2, a2, -1 572; CHECK-NEXT: and a2, a2, a3 573; CHECK-NEXT: csrr a3, vlenb 574; CHECK-NEXT: slli a3, a3, 3 575; CHECK-NEXT: add a3, sp, a3 576; CHECK-NEXT: addi a3, a3, 16 577; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 578; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 579; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t 580; CHECK-NEXT: vmv4r.v v8, v16 581; CHECK-NEXT: csrr a2, vlenb 582; CHECK-NEXT: slli a2, a2, 4 583; CHECK-NEXT: add a2, sp, a2 584; CHECK-NEXT: addi a2, a2, 16 585; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 586; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20, v0.t 587; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 588; CHECK-NEXT: vfadd.vv v24, v8, v24, v0.t 589; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 590; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24, v0.t 591; CHECK-NEXT: bltu a0, a1, .LBB24_2 592; CHECK-NEXT: # %bb.1: 593; CHECK-NEXT: mv a0, a1 594; CHECK-NEXT: .LBB24_2: 595; CHECK-NEXT: vmv1r.v v0, v7 596; CHECK-NEXT: csrr a1, vlenb 597; CHECK-NEXT: slli a1, a1, 4 598; CHECK-NEXT: add a1, sp, a1 599; CHECK-NEXT: addi a1, a1, 16 600; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 601; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 602; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24, v0.t 603; CHECK-NEXT: addi a0, sp, 16 604; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 605; CHECK-NEXT: csrr a0, vlenb 606; CHECK-NEXT: slli a0, a0, 3 607; CHECK-NEXT: add a0, sp, a0 608; CHECK-NEXT: addi a0, a0, 16 609; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 610; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24, v0.t 611; CHECK-NEXT: vmv8r.v v24, v16 612; CHECK-NEXT: addi a0, sp, 16 613; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 614; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 615; CHECK-NEXT: vfadd.vv v24, v16, v24, v0.t 616; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 617; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t 618; CHECK-NEXT: csrr a0, vlenb 619; CHECK-NEXT: slli a0, a0, 3 620; CHECK-NEXT: mv a1, a0 621; CHECK-NEXT: slli a0, a0, 1 622; CHECK-NEXT: add a0, a0, a1 623; CHECK-NEXT: add sp, sp, a0 624; CHECK-NEXT: .cfi_def_cfa sp, 16 625; CHECK-NEXT: addi sp, sp, 16 626; CHECK-NEXT: .cfi_def_cfa_offset 0 627; CHECK-NEXT: ret 628 %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 629 %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer 630 %v = call <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl) 631 ret <vscale x 32 x bfloat> %v 632} 633 634define <vscale x 32 x bfloat> @vfadd_vf_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, bfloat %b, i32 zeroext %evl) { 635; CHECK-LABEL: vfadd_vf_nxv32bf16_unmasked: 636; CHECK: # %bb.0: 637; CHECK-NEXT: addi sp, sp, -16 638; CHECK-NEXT: .cfi_def_cfa_offset 16 639; CHECK-NEXT: csrr a1, vlenb 640; CHECK-NEXT: slli a1, a1, 3 641; CHECK-NEXT: sub sp, sp, a1 642; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 643; CHECK-NEXT: fmv.x.h a1, fa0 644; CHECK-NEXT: csrr a2, vlenb 645; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma 646; CHECK-NEXT: vmset.m v24 647; CHECK-NEXT: vmv.v.x v16, a1 648; CHECK-NEXT: slli a1, a2, 1 649; CHECK-NEXT: srli a2, a2, 2 650; CHECK-NEXT: sub a3, a0, a1 651; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma 652; CHECK-NEXT: vslidedown.vx v0, v24, a2 653; CHECK-NEXT: sltu a2, a0, a3 654; CHECK-NEXT: addi a2, a2, -1 655; CHECK-NEXT: and a2, a2, a3 656; CHECK-NEXT: addi a3, sp, 16 657; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 658; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma 659; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20, v0.t 660; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12, v0.t 661; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 662; CHECK-NEXT: vfadd.vv v16, v16, v24, v0.t 663; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 664; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16, v0.t 665; CHECK-NEXT: bltu a0, a1, .LBB25_2 666; CHECK-NEXT: # %bb.1: 667; CHECK-NEXT: mv a0, a1 668; CHECK-NEXT: .LBB25_2: 669; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 670; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 671; CHECK-NEXT: addi a0, sp, 16 672; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 673; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0 674; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 675; CHECK-NEXT: vfadd.vv v16, v16, v24 676; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 677; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 678; CHECK-NEXT: csrr a0, vlenb 679; CHECK-NEXT: slli a0, a0, 3 680; CHECK-NEXT: add sp, sp, a0 681; CHECK-NEXT: .cfi_def_cfa sp, 16 682; CHECK-NEXT: addi sp, sp, 16 683; CHECK-NEXT: .cfi_def_cfa_offset 0 684; CHECK-NEXT: ret 685 %elt.head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0 686 %vb = shufflevector <vscale x 32 x bfloat> %elt.head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer 687 %v = call <vscale x 32 x bfloat> @llvm.vp.fadd.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl) 688 ret <vscale x 32 x bfloat> %v 689} 690declare <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32) 691 692define <vscale x 1 x half> @vfadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { 693; ZVFH-LABEL: vfadd_vv_nxv1f16: 694; ZVFH: # %bb.0: 695; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 696; ZVFH-NEXT: vfadd.vv v8, v8, v9, v0.t 697; ZVFH-NEXT: ret 698; 699; ZVFHMIN-LABEL: vfadd_vv_nxv1f16: 700; ZVFHMIN: # %bb.0: 701; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 702; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t 703; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t 704; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 705; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10, v0.t 706; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 707; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t 708; ZVFHMIN-NEXT: ret 709 %v = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl) 710 ret <vscale x 1 x half> %v 711} 712 713define <vscale x 1 x half> @vfadd_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, <vscale x 1 x half> %b, i32 zeroext %evl) { 714; ZVFH-LABEL: vfadd_vv_nxv1f16_unmasked: 715; ZVFH: # %bb.0: 716; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 717; ZVFH-NEXT: vfadd.vv v8, v8, v9 718; ZVFH-NEXT: ret 719; 720; ZVFHMIN-LABEL: vfadd_vv_nxv1f16_unmasked: 721; ZVFHMIN: # %bb.0: 722; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 723; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 724; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 725; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 726; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10 727; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 728; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 729; ZVFHMIN-NEXT: ret 730 %v = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl) 731 ret <vscale x 1 x half> %v 732} 733 734define <vscale x 1 x half> @vfadd_vf_nxv1f16(<vscale x 1 x half> %va, half %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { 735; ZVFH-LABEL: vfadd_vf_nxv1f16: 736; ZVFH: # %bb.0: 737; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 738; ZVFH-NEXT: vfadd.vf v8, v8, fa0, v0.t 739; ZVFH-NEXT: ret 740; 741; ZVFHMIN-LABEL: vfadd_vf_nxv1f16: 742; ZVFHMIN: # %bb.0: 743; ZVFHMIN-NEXT: fmv.x.h a1, fa0 744; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 745; ZVFHMIN-NEXT: vmv.v.x v9, a1 746; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 747; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t 748; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 749; ZVFHMIN-NEXT: vfadd.vv v9, v10, v8, v0.t 750; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 751; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t 752; ZVFHMIN-NEXT: ret 753 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0 754 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer 755 %v = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl) 756 ret <vscale x 1 x half> %v 757} 758 759define <vscale x 1 x half> @vfadd_vf_nxv1f16_commute(<vscale x 1 x half> %va, half %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { 760; ZVFH-LABEL: vfadd_vf_nxv1f16_commute: 761; ZVFH: # %bb.0: 762; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 763; ZVFH-NEXT: vfadd.vf v8, v8, fa0, v0.t 764; ZVFH-NEXT: ret 765; 766; ZVFHMIN-LABEL: vfadd_vf_nxv1f16_commute: 767; ZVFHMIN: # %bb.0: 768; ZVFHMIN-NEXT: fmv.x.h a1, fa0 769; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 770; ZVFHMIN-NEXT: vmv.v.x v9, a1 771; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 772; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t 773; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 774; ZVFHMIN-NEXT: vfadd.vv v9, v8, v10, v0.t 775; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 776; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t 777; ZVFHMIN-NEXT: ret 778 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0 779 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer 780 %v = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl) 781 ret <vscale x 1 x half> %v 782} 783 784define <vscale x 1 x half> @vfadd_vf_nxv1f16_unmasked(<vscale x 1 x half> %va, half %b, i32 zeroext %evl) { 785; ZVFH-LABEL: vfadd_vf_nxv1f16_unmasked: 786; ZVFH: # %bb.0: 787; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 788; ZVFH-NEXT: vfadd.vf v8, v8, fa0 789; ZVFH-NEXT: ret 790; 791; ZVFHMIN-LABEL: vfadd_vf_nxv1f16_unmasked: 792; ZVFHMIN: # %bb.0: 793; ZVFHMIN-NEXT: fmv.x.h a1, fa0 794; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 795; ZVFHMIN-NEXT: vmv.v.x v9, a1 796; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 797; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 798; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 799; ZVFHMIN-NEXT: vfadd.vv v9, v10, v8 800; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 801; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 802; ZVFHMIN-NEXT: ret 803 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0 804 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer 805 %v = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) 806 ret <vscale x 1 x half> %v 807} 808 809define <vscale x 1 x half> @vfadd_vf_nxv1f16_unmasked_commute(<vscale x 1 x half> %va, half %b, i32 zeroext %evl) { 810; ZVFH-LABEL: vfadd_vf_nxv1f16_unmasked_commute: 811; ZVFH: # %bb.0: 812; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 813; ZVFH-NEXT: vfadd.vf v8, v8, fa0 814; ZVFH-NEXT: ret 815; 816; ZVFHMIN-LABEL: vfadd_vf_nxv1f16_unmasked_commute: 817; ZVFHMIN: # %bb.0: 818; ZVFHMIN-NEXT: fmv.x.h a1, fa0 819; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 820; ZVFHMIN-NEXT: vmv.v.x v9, a1 821; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 822; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 823; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 824; ZVFHMIN-NEXT: vfadd.vv v9, v8, v10 825; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 826; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 827; ZVFHMIN-NEXT: ret 828 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0 829 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer 830 %v = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 831 ret <vscale x 1 x half> %v 832} 833 834declare <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x i1>, i32) 835 836define <vscale x 2 x half> @vfadd_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { 837; ZVFH-LABEL: vfadd_vv_nxv2f16: 838; ZVFH: # %bb.0: 839; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 840; ZVFH-NEXT: vfadd.vv v8, v8, v9, v0.t 841; ZVFH-NEXT: ret 842; 843; ZVFHMIN-LABEL: vfadd_vv_nxv2f16: 844; ZVFHMIN: # %bb.0: 845; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 846; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t 847; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t 848; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 849; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10, v0.t 850; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 851; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t 852; ZVFHMIN-NEXT: ret 853 %v = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl) 854 ret <vscale x 2 x half> %v 855} 856 857define <vscale x 2 x half> @vfadd_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, <vscale x 2 x half> %b, i32 zeroext %evl) { 858; ZVFH-LABEL: vfadd_vv_nxv2f16_unmasked: 859; ZVFH: # %bb.0: 860; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 861; ZVFH-NEXT: vfadd.vv v8, v8, v9 862; ZVFH-NEXT: ret 863; 864; ZVFHMIN-LABEL: vfadd_vv_nxv2f16_unmasked: 865; ZVFHMIN: # %bb.0: 866; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 867; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 868; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 869; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 870; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10 871; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 872; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 873; ZVFHMIN-NEXT: ret 874 %v = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl) 875 ret <vscale x 2 x half> %v 876} 877 878define <vscale x 2 x half> @vfadd_vf_nxv2f16(<vscale x 2 x half> %va, half %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { 879; ZVFH-LABEL: vfadd_vf_nxv2f16: 880; ZVFH: # %bb.0: 881; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 882; ZVFH-NEXT: vfadd.vf v8, v8, fa0, v0.t 883; ZVFH-NEXT: ret 884; 885; ZVFHMIN-LABEL: vfadd_vf_nxv2f16: 886; ZVFHMIN: # %bb.0: 887; ZVFHMIN-NEXT: fmv.x.h a1, fa0 888; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 889; ZVFHMIN-NEXT: vmv.v.x v9, a1 890; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 891; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t 892; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 893; ZVFHMIN-NEXT: vfadd.vv v9, v10, v8, v0.t 894; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 895; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9, v0.t 896; ZVFHMIN-NEXT: ret 897 %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0 898 %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer 899 %v = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl) 900 ret <vscale x 2 x half> %v 901} 902 903define <vscale x 2 x half> @vfadd_vf_nxv2f16_unmasked(<vscale x 2 x half> %va, half %b, i32 zeroext %evl) { 904; ZVFH-LABEL: vfadd_vf_nxv2f16_unmasked: 905; ZVFH: # %bb.0: 906; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 907; ZVFH-NEXT: vfadd.vf v8, v8, fa0 908; ZVFH-NEXT: ret 909; 910; ZVFHMIN-LABEL: vfadd_vf_nxv2f16_unmasked: 911; ZVFHMIN: # %bb.0: 912; ZVFHMIN-NEXT: fmv.x.h a1, fa0 913; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 914; ZVFHMIN-NEXT: vmv.v.x v9, a1 915; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 916; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 917; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma 918; ZVFHMIN-NEXT: vfadd.vv v9, v10, v8 919; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 920; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 921; ZVFHMIN-NEXT: ret 922 %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0 923 %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer 924 %v = call <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) 925 ret <vscale x 2 x half> %v 926} 927 928declare <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x i1>, i32) 929 930define <vscale x 4 x half> @vfadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { 931; ZVFH-LABEL: vfadd_vv_nxv4f16: 932; ZVFH: # %bb.0: 933; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 934; ZVFH-NEXT: vfadd.vv v8, v8, v9, v0.t 935; ZVFH-NEXT: ret 936; 937; ZVFHMIN-LABEL: vfadd_vv_nxv4f16: 938; ZVFHMIN: # %bb.0: 939; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 940; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t 941; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t 942; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 943; ZVFHMIN-NEXT: vfadd.vv v10, v12, v10, v0.t 944; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 945; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t 946; ZVFHMIN-NEXT: ret 947 %v = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl) 948 ret <vscale x 4 x half> %v 949} 950 951define <vscale x 4 x half> @vfadd_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, <vscale x 4 x half> %b, i32 zeroext %evl) { 952; ZVFH-LABEL: vfadd_vv_nxv4f16_unmasked: 953; ZVFH: # %bb.0: 954; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 955; ZVFH-NEXT: vfadd.vv v8, v8, v9 956; ZVFH-NEXT: ret 957; 958; ZVFHMIN-LABEL: vfadd_vv_nxv4f16_unmasked: 959; ZVFHMIN: # %bb.0: 960; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 961; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 962; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 963; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 964; ZVFHMIN-NEXT: vfadd.vv v10, v12, v10 965; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 966; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 967; ZVFHMIN-NEXT: ret 968 %v = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl) 969 ret <vscale x 4 x half> %v 970} 971 972define <vscale x 4 x half> @vfadd_vf_nxv4f16(<vscale x 4 x half> %va, half %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { 973; ZVFH-LABEL: vfadd_vf_nxv4f16: 974; ZVFH: # %bb.0: 975; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 976; ZVFH-NEXT: vfadd.vf v8, v8, fa0, v0.t 977; ZVFH-NEXT: ret 978; 979; ZVFHMIN-LABEL: vfadd_vf_nxv4f16: 980; ZVFHMIN: # %bb.0: 981; ZVFHMIN-NEXT: fmv.x.h a1, fa0 982; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 983; ZVFHMIN-NEXT: vmv.v.x v9, a1 984; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t 985; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t 986; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 987; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12, v0.t 988; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 989; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10, v0.t 990; ZVFHMIN-NEXT: ret 991 %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0 992 %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer 993 %v = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl) 994 ret <vscale x 4 x half> %v 995} 996 997define <vscale x 4 x half> @vfadd_vf_nxv4f16_unmasked(<vscale x 4 x half> %va, half %b, i32 zeroext %evl) { 998; ZVFH-LABEL: vfadd_vf_nxv4f16_unmasked: 999; ZVFH: # %bb.0: 1000; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1001; ZVFH-NEXT: vfadd.vf v8, v8, fa0 1002; ZVFH-NEXT: ret 1003; 1004; ZVFHMIN-LABEL: vfadd_vf_nxv4f16_unmasked: 1005; ZVFHMIN: # %bb.0: 1006; ZVFHMIN-NEXT: fmv.x.h a1, fa0 1007; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma 1008; ZVFHMIN-NEXT: vmv.v.x v9, a1 1009; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 1010; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 1011; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1012; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12 1013; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 1014; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 1015; ZVFHMIN-NEXT: ret 1016 %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0 1017 %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer 1018 %v = call <vscale x 4 x half> @llvm.vp.fadd.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1019 ret <vscale x 4 x half> %v 1020} 1021 1022declare <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, i32) 1023 1024define <vscale x 8 x half> @vfadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1025; ZVFH-LABEL: vfadd_vv_nxv8f16: 1026; ZVFH: # %bb.0: 1027; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1028; ZVFH-NEXT: vfadd.vv v8, v8, v10, v0.t 1029; ZVFH-NEXT: ret 1030; 1031; ZVFHMIN-LABEL: vfadd_vv_nxv8f16: 1032; ZVFHMIN: # %bb.0: 1033; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1034; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t 1035; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t 1036; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1037; ZVFHMIN-NEXT: vfadd.vv v12, v16, v12, v0.t 1038; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1039; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t 1040; ZVFHMIN-NEXT: ret 1041 %v = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl) 1042 ret <vscale x 8 x half> %v 1043} 1044 1045define <vscale x 8 x half> @vfadd_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, <vscale x 8 x half> %b, i32 zeroext %evl) { 1046; ZVFH-LABEL: vfadd_vv_nxv8f16_unmasked: 1047; ZVFH: # %bb.0: 1048; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1049; ZVFH-NEXT: vfadd.vv v8, v8, v10 1050; ZVFH-NEXT: ret 1051; 1052; ZVFHMIN-LABEL: vfadd_vv_nxv8f16_unmasked: 1053; ZVFHMIN: # %bb.0: 1054; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1055; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 1056; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 1057; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1058; ZVFHMIN-NEXT: vfadd.vv v12, v16, v12 1059; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1060; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 1061; ZVFHMIN-NEXT: ret 1062 %v = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1063 ret <vscale x 8 x half> %v 1064} 1065 1066define <vscale x 8 x half> @vfadd_vf_nxv8f16(<vscale x 8 x half> %va, half %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1067; ZVFH-LABEL: vfadd_vf_nxv8f16: 1068; ZVFH: # %bb.0: 1069; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1070; ZVFH-NEXT: vfadd.vf v8, v8, fa0, v0.t 1071; ZVFH-NEXT: ret 1072; 1073; ZVFHMIN-LABEL: vfadd_vf_nxv8f16: 1074; ZVFHMIN: # %bb.0: 1075; ZVFHMIN-NEXT: fmv.x.h a1, fa0 1076; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1077; ZVFHMIN-NEXT: vmv.v.x v10, a1 1078; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t 1079; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t 1080; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1081; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16, v0.t 1082; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1083; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t 1084; ZVFHMIN-NEXT: ret 1085 %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 1086 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer 1087 %v = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl) 1088 ret <vscale x 8 x half> %v 1089} 1090 1091define <vscale x 8 x half> @vfadd_vf_nxv8f16_unmasked(<vscale x 8 x half> %va, half %b, i32 zeroext %evl) { 1092; ZVFH-LABEL: vfadd_vf_nxv8f16_unmasked: 1093; ZVFH: # %bb.0: 1094; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1095; ZVFH-NEXT: vfadd.vf v8, v8, fa0 1096; ZVFH-NEXT: ret 1097; 1098; ZVFHMIN-LABEL: vfadd_vf_nxv8f16_unmasked: 1099; ZVFHMIN: # %bb.0: 1100; ZVFHMIN-NEXT: fmv.x.h a1, fa0 1101; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma 1102; ZVFHMIN-NEXT: vmv.v.x v10, a1 1103; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 1104; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 1105; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1106; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16 1107; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1108; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 1109; ZVFHMIN-NEXT: ret 1110 %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 1111 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer 1112 %v = call <vscale x 8 x half> @llvm.vp.fadd.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1113 ret <vscale x 8 x half> %v 1114} 1115 1116declare <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>, <vscale x 16 x i1>, i32) 1117 1118define <vscale x 16 x half> @vfadd_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1119; ZVFH-LABEL: vfadd_vv_nxv16f16: 1120; ZVFH: # %bb.0: 1121; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 1122; ZVFH-NEXT: vfadd.vv v8, v8, v12, v0.t 1123; ZVFH-NEXT: ret 1124; 1125; ZVFHMIN-LABEL: vfadd_vv_nxv16f16: 1126; ZVFHMIN: # %bb.0: 1127; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 1128; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t 1129; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t 1130; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1131; ZVFHMIN-NEXT: vfadd.vv v16, v24, v16, v0.t 1132; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1133; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t 1134; ZVFHMIN-NEXT: ret 1135 %v = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl) 1136 ret <vscale x 16 x half> %v 1137} 1138 1139define <vscale x 16 x half> @vfadd_vv_nxv16f16_unmasked(<vscale x 16 x half> %va, <vscale x 16 x half> %b, i32 zeroext %evl) { 1140; ZVFH-LABEL: vfadd_vv_nxv16f16_unmasked: 1141; ZVFH: # %bb.0: 1142; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 1143; ZVFH-NEXT: vfadd.vv v8, v8, v12 1144; ZVFH-NEXT: ret 1145; 1146; ZVFHMIN-LABEL: vfadd_vv_nxv16f16_unmasked: 1147; ZVFHMIN: # %bb.0: 1148; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 1149; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 1150; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 1151; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1152; ZVFHMIN-NEXT: vfadd.vv v16, v24, v16 1153; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1154; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 1155; ZVFHMIN-NEXT: ret 1156 %v = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl) 1157 ret <vscale x 16 x half> %v 1158} 1159 1160define <vscale x 16 x half> @vfadd_vf_nxv16f16(<vscale x 16 x half> %va, half %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1161; ZVFH-LABEL: vfadd_vf_nxv16f16: 1162; ZVFH: # %bb.0: 1163; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 1164; ZVFH-NEXT: vfadd.vf v8, v8, fa0, v0.t 1165; ZVFH-NEXT: ret 1166; 1167; ZVFHMIN-LABEL: vfadd_vf_nxv16f16: 1168; ZVFHMIN: # %bb.0: 1169; ZVFHMIN-NEXT: fmv.x.h a1, fa0 1170; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 1171; ZVFHMIN-NEXT: vmv.v.x v12, a1 1172; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t 1173; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t 1174; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1175; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t 1176; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1177; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t 1178; ZVFHMIN-NEXT: ret 1179 %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0 1180 %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer 1181 %v = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl) 1182 ret <vscale x 16 x half> %v 1183} 1184 1185define <vscale x 16 x half> @vfadd_vf_nxv16f16_unmasked(<vscale x 16 x half> %va, half %b, i32 zeroext %evl) { 1186; ZVFH-LABEL: vfadd_vf_nxv16f16_unmasked: 1187; ZVFH: # %bb.0: 1188; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma 1189; ZVFH-NEXT: vfadd.vf v8, v8, fa0 1190; ZVFH-NEXT: ret 1191; 1192; ZVFHMIN-LABEL: vfadd_vf_nxv16f16_unmasked: 1193; ZVFHMIN: # %bb.0: 1194; ZVFHMIN-NEXT: fmv.x.h a1, fa0 1195; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 1196; ZVFHMIN-NEXT: vmv.v.x v12, a1 1197; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 1198; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 1199; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1200; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24 1201; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1202; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 1203; ZVFHMIN-NEXT: ret 1204 %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0 1205 %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer 1206 %v = call <vscale x 16 x half> @llvm.vp.fadd.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) 1207 ret <vscale x 16 x half> %v 1208} 1209 1210declare <vscale x 32 x half> @llvm.vp.fadd.nxv32f16(<vscale x 32 x half>, <vscale x 32 x half>, <vscale x 32 x i1>, i32) 1211 1212define <vscale x 32 x half> @vfadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { 1213; ZVFH-LABEL: vfadd_vv_nxv32f16: 1214; ZVFH: # %bb.0: 1215; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma 1216; ZVFH-NEXT: vfadd.vv v8, v8, v16, v0.t 1217; ZVFH-NEXT: ret 1218; 1219; ZVFHMIN-LABEL: vfadd_vv_nxv32f16: 1220; ZVFHMIN: # %bb.0: 1221; ZVFHMIN-NEXT: addi sp, sp, -16 1222; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 1223; ZVFHMIN-NEXT: csrr a1, vlenb 1224; ZVFHMIN-NEXT: slli a1, a1, 3 1225; ZVFHMIN-NEXT: mv a2, a1 1226; ZVFHMIN-NEXT: slli a1, a1, 1 1227; ZVFHMIN-NEXT: add a1, a1, a2 1228; ZVFHMIN-NEXT: sub sp, sp, a1 1229; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb 1230; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma 1231; ZVFHMIN-NEXT: vmv1r.v v7, v0 1232; ZVFHMIN-NEXT: csrr a1, vlenb 1233; ZVFHMIN-NEXT: slli a1, a1, 4 1234; ZVFHMIN-NEXT: add a1, sp, a1 1235; ZVFHMIN-NEXT: addi a1, a1, 16 1236; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 1237; ZVFHMIN-NEXT: csrr a2, vlenb 1238; ZVFHMIN-NEXT: slli a1, a2, 1 1239; ZVFHMIN-NEXT: srli a2, a2, 2 1240; ZVFHMIN-NEXT: sub a3, a0, a1 1241; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 1242; ZVFHMIN-NEXT: sltu a2, a0, a3 1243; ZVFHMIN-NEXT: addi a2, a2, -1 1244; ZVFHMIN-NEXT: and a2, a2, a3 1245; ZVFHMIN-NEXT: vmv4r.v v8, v16 1246; ZVFHMIN-NEXT: csrr a3, vlenb 1247; ZVFHMIN-NEXT: slli a3, a3, 3 1248; ZVFHMIN-NEXT: add a3, sp, a3 1249; ZVFHMIN-NEXT: addi a3, a3, 16 1250; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 1251; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma 1252; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t 1253; ZVFHMIN-NEXT: addi a2, sp, 16 1254; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 1255; ZVFHMIN-NEXT: csrr a2, vlenb 1256; ZVFHMIN-NEXT: slli a2, a2, 4 1257; ZVFHMIN-NEXT: add a2, sp, a2 1258; ZVFHMIN-NEXT: addi a2, a2, 16 1259; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 1260; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t 1261; ZVFHMIN-NEXT: addi a2, sp, 16 1262; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 1263; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1264; ZVFHMIN-NEXT: vfadd.vv v16, v8, v16, v0.t 1265; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1266; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t 1267; ZVFHMIN-NEXT: bltu a0, a1, .LBB48_2 1268; ZVFHMIN-NEXT: # %bb.1: 1269; ZVFHMIN-NEXT: mv a0, a1 1270; ZVFHMIN-NEXT: .LBB48_2: 1271; ZVFHMIN-NEXT: vmv1r.v v0, v7 1272; ZVFHMIN-NEXT: csrr a1, vlenb 1273; ZVFHMIN-NEXT: slli a1, a1, 3 1274; ZVFHMIN-NEXT: add a1, sp, a1 1275; ZVFHMIN-NEXT: addi a1, a1, 16 1276; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 1277; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 1278; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t 1279; ZVFHMIN-NEXT: addi a0, sp, 16 1280; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 1281; ZVFHMIN-NEXT: csrr a0, vlenb 1282; ZVFHMIN-NEXT: slli a0, a0, 4 1283; ZVFHMIN-NEXT: add a0, sp, a0 1284; ZVFHMIN-NEXT: addi a0, a0, 16 1285; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1286; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t 1287; ZVFHMIN-NEXT: addi a0, sp, 16 1288; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1289; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1290; ZVFHMIN-NEXT: vfadd.vv v16, v24, v16, v0.t 1291; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1292; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16, v0.t 1293; ZVFHMIN-NEXT: csrr a0, vlenb 1294; ZVFHMIN-NEXT: slli a0, a0, 3 1295; ZVFHMIN-NEXT: mv a1, a0 1296; ZVFHMIN-NEXT: slli a0, a0, 1 1297; ZVFHMIN-NEXT: add a0, a0, a1 1298; ZVFHMIN-NEXT: add sp, sp, a0 1299; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 1300; ZVFHMIN-NEXT: addi sp, sp, 16 1301; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 1302; ZVFHMIN-NEXT: ret 1303 %v = call <vscale x 32 x half> @llvm.vp.fadd.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %b, <vscale x 32 x i1> %m, i32 %evl) 1304 ret <vscale x 32 x half> %v 1305} 1306 1307define <vscale x 32 x half> @vfadd_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, <vscale x 32 x half> %b, i32 zeroext %evl) { 1308; ZVFH-LABEL: vfadd_vv_nxv32f16_unmasked: 1309; ZVFH: # %bb.0: 1310; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma 1311; ZVFH-NEXT: vfadd.vv v8, v8, v16 1312; ZVFH-NEXT: ret 1313; 1314; ZVFHMIN-LABEL: vfadd_vv_nxv32f16_unmasked: 1315; ZVFHMIN: # %bb.0: 1316; ZVFHMIN-NEXT: addi sp, sp, -16 1317; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 1318; ZVFHMIN-NEXT: csrr a1, vlenb 1319; ZVFHMIN-NEXT: slli a1, a1, 3 1320; ZVFHMIN-NEXT: sub sp, sp, a1 1321; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 1322; ZVFHMIN-NEXT: csrr a2, vlenb 1323; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma 1324; ZVFHMIN-NEXT: vmset.m v24 1325; ZVFHMIN-NEXT: slli a1, a2, 1 1326; ZVFHMIN-NEXT: srli a2, a2, 2 1327; ZVFHMIN-NEXT: sub a3, a0, a1 1328; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma 1329; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 1330; ZVFHMIN-NEXT: sltu a2, a0, a3 1331; ZVFHMIN-NEXT: addi a2, a2, -1 1332; ZVFHMIN-NEXT: and a2, a2, a3 1333; ZVFHMIN-NEXT: addi a3, sp, 16 1334; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 1335; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma 1336; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t 1337; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t 1338; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1339; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t 1340; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1341; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t 1342; ZVFHMIN-NEXT: bltu a0, a1, .LBB49_2 1343; ZVFHMIN-NEXT: # %bb.1: 1344; ZVFHMIN-NEXT: mv a0, a1 1345; ZVFHMIN-NEXT: .LBB49_2: 1346; ZVFHMIN-NEXT: addi a1, sp, 16 1347; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 1348; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 1349; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 1350; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 1351; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1352; ZVFHMIN-NEXT: vfadd.vv v16, v24, v16 1353; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1354; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 1355; ZVFHMIN-NEXT: csrr a0, vlenb 1356; ZVFHMIN-NEXT: slli a0, a0, 3 1357; ZVFHMIN-NEXT: add sp, sp, a0 1358; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 1359; ZVFHMIN-NEXT: addi sp, sp, 16 1360; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 1361; ZVFHMIN-NEXT: ret 1362 %v = call <vscale x 32 x half> @llvm.vp.fadd.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %b, <vscale x 32 x i1> splat (i1 true), i32 %evl) 1363 ret <vscale x 32 x half> %v 1364} 1365 1366define <vscale x 32 x half> @vfadd_vf_nxv32f16(<vscale x 32 x half> %va, half %b, <vscale x 32 x i1> %m, i32 zeroext %evl) { 1367; ZVFH-LABEL: vfadd_vf_nxv32f16: 1368; ZVFH: # %bb.0: 1369; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma 1370; ZVFH-NEXT: vfadd.vf v8, v8, fa0, v0.t 1371; ZVFH-NEXT: ret 1372; 1373; ZVFHMIN-LABEL: vfadd_vf_nxv32f16: 1374; ZVFHMIN: # %bb.0: 1375; ZVFHMIN-NEXT: addi sp, sp, -16 1376; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 1377; ZVFHMIN-NEXT: csrr a1, vlenb 1378; ZVFHMIN-NEXT: slli a1, a1, 3 1379; ZVFHMIN-NEXT: mv a2, a1 1380; ZVFHMIN-NEXT: slli a1, a1, 1 1381; ZVFHMIN-NEXT: add a1, a1, a2 1382; ZVFHMIN-NEXT: sub sp, sp, a1 1383; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb 1384; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma 1385; ZVFHMIN-NEXT: vmv1r.v v7, v0 1386; ZVFHMIN-NEXT: vmv8r.v v16, v8 1387; ZVFHMIN-NEXT: fmv.x.h a1, fa0 1388; ZVFHMIN-NEXT: csrr a2, vlenb 1389; ZVFHMIN-NEXT: vmv.v.x v8, a1 1390; ZVFHMIN-NEXT: slli a1, a2, 1 1391; ZVFHMIN-NEXT: srli a2, a2, 2 1392; ZVFHMIN-NEXT: sub a3, a0, a1 1393; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma 1394; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 1395; ZVFHMIN-NEXT: sltu a2, a0, a3 1396; ZVFHMIN-NEXT: addi a2, a2, -1 1397; ZVFHMIN-NEXT: and a2, a2, a3 1398; ZVFHMIN-NEXT: csrr a3, vlenb 1399; ZVFHMIN-NEXT: slli a3, a3, 3 1400; ZVFHMIN-NEXT: add a3, sp, a3 1401; ZVFHMIN-NEXT: addi a3, a3, 16 1402; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 1403; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma 1404; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t 1405; ZVFHMIN-NEXT: vmv4r.v v8, v16 1406; ZVFHMIN-NEXT: csrr a2, vlenb 1407; ZVFHMIN-NEXT: slli a2, a2, 4 1408; ZVFHMIN-NEXT: add a2, sp, a2 1409; ZVFHMIN-NEXT: addi a2, a2, 16 1410; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 1411; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t 1412; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1413; ZVFHMIN-NEXT: vfadd.vv v24, v8, v24, v0.t 1414; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1415; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24, v0.t 1416; ZVFHMIN-NEXT: bltu a0, a1, .LBB50_2 1417; ZVFHMIN-NEXT: # %bb.1: 1418; ZVFHMIN-NEXT: mv a0, a1 1419; ZVFHMIN-NEXT: .LBB50_2: 1420; ZVFHMIN-NEXT: vmv1r.v v0, v7 1421; ZVFHMIN-NEXT: csrr a1, vlenb 1422; ZVFHMIN-NEXT: slli a1, a1, 4 1423; ZVFHMIN-NEXT: add a1, sp, a1 1424; ZVFHMIN-NEXT: addi a1, a1, 16 1425; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 1426; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 1427; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t 1428; ZVFHMIN-NEXT: addi a0, sp, 16 1429; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 1430; ZVFHMIN-NEXT: csrr a0, vlenb 1431; ZVFHMIN-NEXT: slli a0, a0, 3 1432; ZVFHMIN-NEXT: add a0, sp, a0 1433; ZVFHMIN-NEXT: addi a0, a0, 16 1434; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 1435; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24, v0.t 1436; ZVFHMIN-NEXT: vmv8r.v v24, v16 1437; ZVFHMIN-NEXT: addi a0, sp, 16 1438; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1439; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1440; ZVFHMIN-NEXT: vfadd.vv v24, v16, v24, v0.t 1441; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1442; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24, v0.t 1443; ZVFHMIN-NEXT: csrr a0, vlenb 1444; ZVFHMIN-NEXT: slli a0, a0, 3 1445; ZVFHMIN-NEXT: mv a1, a0 1446; ZVFHMIN-NEXT: slli a0, a0, 1 1447; ZVFHMIN-NEXT: add a0, a0, a1 1448; ZVFHMIN-NEXT: add sp, sp, a0 1449; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 1450; ZVFHMIN-NEXT: addi sp, sp, 16 1451; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 1452; ZVFHMIN-NEXT: ret 1453 %elt.head = insertelement <vscale x 32 x half> poison, half %b, i32 0 1454 %vb = shufflevector <vscale x 32 x half> %elt.head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer 1455 %v = call <vscale x 32 x half> @llvm.vp.fadd.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 %evl) 1456 ret <vscale x 32 x half> %v 1457} 1458 1459define <vscale x 32 x half> @vfadd_vf_nxv32f16_unmasked(<vscale x 32 x half> %va, half %b, i32 zeroext %evl) { 1460; ZVFH-LABEL: vfadd_vf_nxv32f16_unmasked: 1461; ZVFH: # %bb.0: 1462; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma 1463; ZVFH-NEXT: vfadd.vf v8, v8, fa0 1464; ZVFH-NEXT: ret 1465; 1466; ZVFHMIN-LABEL: vfadd_vf_nxv32f16_unmasked: 1467; ZVFHMIN: # %bb.0: 1468; ZVFHMIN-NEXT: addi sp, sp, -16 1469; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 1470; ZVFHMIN-NEXT: csrr a1, vlenb 1471; ZVFHMIN-NEXT: slli a1, a1, 3 1472; ZVFHMIN-NEXT: sub sp, sp, a1 1473; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb 1474; ZVFHMIN-NEXT: fmv.x.h a1, fa0 1475; ZVFHMIN-NEXT: csrr a2, vlenb 1476; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m8, ta, ma 1477; ZVFHMIN-NEXT: vmset.m v24 1478; ZVFHMIN-NEXT: vmv.v.x v16, a1 1479; ZVFHMIN-NEXT: slli a1, a2, 1 1480; ZVFHMIN-NEXT: srli a2, a2, 2 1481; ZVFHMIN-NEXT: sub a3, a0, a1 1482; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma 1483; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 1484; ZVFHMIN-NEXT: sltu a2, a0, a3 1485; ZVFHMIN-NEXT: addi a2, a2, -1 1486; ZVFHMIN-NEXT: and a2, a2, a3 1487; ZVFHMIN-NEXT: addi a3, sp, 16 1488; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 1489; ZVFHMIN-NEXT: vsetvli zero, a2, e16, m4, ta, ma 1490; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20, v0.t 1491; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t 1492; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1493; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t 1494; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1495; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16, v0.t 1496; ZVFHMIN-NEXT: bltu a0, a1, .LBB51_2 1497; ZVFHMIN-NEXT: # %bb.1: 1498; ZVFHMIN-NEXT: mv a0, a1 1499; ZVFHMIN-NEXT: .LBB51_2: 1500; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma 1501; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 1502; ZVFHMIN-NEXT: addi a0, sp, 16 1503; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 1504; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 1505; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma 1506; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24 1507; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1508; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 1509; ZVFHMIN-NEXT: csrr a0, vlenb 1510; ZVFHMIN-NEXT: slli a0, a0, 3 1511; ZVFHMIN-NEXT: add sp, sp, a0 1512; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16 1513; ZVFHMIN-NEXT: addi sp, sp, 16 1514; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0 1515; ZVFHMIN-NEXT: ret 1516 %elt.head = insertelement <vscale x 32 x half> poison, half %b, i32 0 1517 %vb = shufflevector <vscale x 32 x half> %elt.head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer 1518 %v = call <vscale x 32 x half> @llvm.vp.fadd.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl) 1519 ret <vscale x 32 x half> %v 1520} 1521 1522declare <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x i1>, i32) 1523 1524define <vscale x 1 x float> @vfadd_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1525; CHECK-LABEL: vfadd_vv_nxv1f32: 1526; CHECK: # %bb.0: 1527; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1528; CHECK-NEXT: vfadd.vv v8, v8, v9, v0.t 1529; CHECK-NEXT: ret 1530 %v = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %b, <vscale x 1 x i1> %m, i32 %evl) 1531 ret <vscale x 1 x float> %v 1532} 1533 1534define <vscale x 1 x float> @vfadd_vv_nxv1f32_unmasked(<vscale x 1 x float> %va, <vscale x 1 x float> %b, i32 zeroext %evl) { 1535; CHECK-LABEL: vfadd_vv_nxv1f32_unmasked: 1536; CHECK: # %bb.0: 1537; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1538; CHECK-NEXT: vfadd.vv v8, v8, v9 1539; CHECK-NEXT: ret 1540 %v = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1541 ret <vscale x 1 x float> %v 1542} 1543 1544define <vscale x 1 x float> @vfadd_vf_nxv1f32(<vscale x 1 x float> %va, float %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1545; CHECK-LABEL: vfadd_vf_nxv1f32: 1546; CHECK: # %bb.0: 1547; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1548; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t 1549; CHECK-NEXT: ret 1550 %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0 1551 %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer 1552 %v = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 %evl) 1553 ret <vscale x 1 x float> %v 1554} 1555 1556define <vscale x 1 x float> @vfadd_vf_nxv1f32_unmasked(<vscale x 1 x float> %va, float %b, i32 zeroext %evl) { 1557; CHECK-LABEL: vfadd_vf_nxv1f32_unmasked: 1558; CHECK: # %bb.0: 1559; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1560; CHECK-NEXT: vfadd.vf v8, v8, fa0 1561; CHECK-NEXT: ret 1562 %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0 1563 %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer 1564 %v = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1565 ret <vscale x 1 x float> %v 1566} 1567 1568declare <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32) 1569 1570define <vscale x 2 x float> @vfadd_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1571; CHECK-LABEL: vfadd_vv_nxv2f32: 1572; CHECK: # %bb.0: 1573; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1574; CHECK-NEXT: vfadd.vv v8, v8, v9, v0.t 1575; CHECK-NEXT: ret 1576 %v = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %b, <vscale x 2 x i1> %m, i32 %evl) 1577 ret <vscale x 2 x float> %v 1578} 1579 1580define <vscale x 2 x float> @vfadd_vv_nxv2f32_unmasked(<vscale x 2 x float> %va, <vscale x 2 x float> %b, i32 zeroext %evl) { 1581; CHECK-LABEL: vfadd_vv_nxv2f32_unmasked: 1582; CHECK: # %bb.0: 1583; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1584; CHECK-NEXT: vfadd.vv v8, v8, v9 1585; CHECK-NEXT: ret 1586 %v = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1587 ret <vscale x 2 x float> %v 1588} 1589 1590define <vscale x 2 x float> @vfadd_vf_nxv2f32(<vscale x 2 x float> %va, float %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1591; CHECK-LABEL: vfadd_vf_nxv2f32: 1592; CHECK: # %bb.0: 1593; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1594; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t 1595; CHECK-NEXT: ret 1596 %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0 1597 %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer 1598 %v = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 %evl) 1599 ret <vscale x 2 x float> %v 1600} 1601 1602define <vscale x 2 x float> @vfadd_vf_nxv2f32_unmasked(<vscale x 2 x float> %va, float %b, i32 zeroext %evl) { 1603; CHECK-LABEL: vfadd_vf_nxv2f32_unmasked: 1604; CHECK: # %bb.0: 1605; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1606; CHECK-NEXT: vfadd.vf v8, v8, fa0 1607; CHECK-NEXT: ret 1608 %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0 1609 %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer 1610 %v = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1611 ret <vscale x 2 x float> %v 1612} 1613 1614declare <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, i32) 1615 1616define <vscale x 4 x float> @vfadd_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1617; CHECK-LABEL: vfadd_vv_nxv4f32: 1618; CHECK: # %bb.0: 1619; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1620; CHECK-NEXT: vfadd.vv v8, v8, v10, v0.t 1621; CHECK-NEXT: ret 1622 %v = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %b, <vscale x 4 x i1> %m, i32 %evl) 1623 ret <vscale x 4 x float> %v 1624} 1625 1626define <vscale x 4 x float> @vfadd_vv_nxv4f32_unmasked(<vscale x 4 x float> %va, <vscale x 4 x float> %b, i32 zeroext %evl) { 1627; CHECK-LABEL: vfadd_vv_nxv4f32_unmasked: 1628; CHECK: # %bb.0: 1629; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1630; CHECK-NEXT: vfadd.vv v8, v8, v10 1631; CHECK-NEXT: ret 1632 %v = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1633 ret <vscale x 4 x float> %v 1634} 1635 1636define <vscale x 4 x float> @vfadd_vf_nxv4f32(<vscale x 4 x float> %va, float %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1637; CHECK-LABEL: vfadd_vf_nxv4f32: 1638; CHECK: # %bb.0: 1639; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1640; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t 1641; CHECK-NEXT: ret 1642 %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0 1643 %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 1644 %v = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 %evl) 1645 ret <vscale x 4 x float> %v 1646} 1647 1648define <vscale x 4 x float> @vfadd_vf_nxv4f32_unmasked(<vscale x 4 x float> %va, float %b, i32 zeroext %evl) { 1649; CHECK-LABEL: vfadd_vf_nxv4f32_unmasked: 1650; CHECK: # %bb.0: 1651; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1652; CHECK-NEXT: vfadd.vf v8, v8, fa0 1653; CHECK-NEXT: ret 1654 %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0 1655 %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 1656 %v = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1657 ret <vscale x 4 x float> %v 1658} 1659 1660declare <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x i1>, i32) 1661 1662define <vscale x 8 x float> @vfadd_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1663; CHECK-LABEL: vfadd_vv_nxv8f32: 1664; CHECK: # %bb.0: 1665; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1666; CHECK-NEXT: vfadd.vv v8, v8, v12, v0.t 1667; CHECK-NEXT: ret 1668 %v = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %b, <vscale x 8 x i1> %m, i32 %evl) 1669 ret <vscale x 8 x float> %v 1670} 1671 1672define <vscale x 8 x float> @vfadd_vv_nxv8f32_unmasked(<vscale x 8 x float> %va, <vscale x 8 x float> %b, i32 zeroext %evl) { 1673; CHECK-LABEL: vfadd_vv_nxv8f32_unmasked: 1674; CHECK: # %bb.0: 1675; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1676; CHECK-NEXT: vfadd.vv v8, v8, v12 1677; CHECK-NEXT: ret 1678 %v = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1679 ret <vscale x 8 x float> %v 1680} 1681 1682define <vscale x 8 x float> @vfadd_vf_nxv8f32(<vscale x 8 x float> %va, float %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1683; CHECK-LABEL: vfadd_vf_nxv8f32: 1684; CHECK: # %bb.0: 1685; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1686; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t 1687; CHECK-NEXT: ret 1688 %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0 1689 %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer 1690 %v = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 %evl) 1691 ret <vscale x 8 x float> %v 1692} 1693 1694define <vscale x 8 x float> @vfadd_vf_nxv8f32_unmasked(<vscale x 8 x float> %va, float %b, i32 zeroext %evl) { 1695; CHECK-LABEL: vfadd_vf_nxv8f32_unmasked: 1696; CHECK: # %bb.0: 1697; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1698; CHECK-NEXT: vfadd.vf v8, v8, fa0 1699; CHECK-NEXT: ret 1700 %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0 1701 %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer 1702 %v = call <vscale x 8 x float> @llvm.vp.fadd.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1703 ret <vscale x 8 x float> %v 1704} 1705 1706declare <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x i1>, i32) 1707 1708define <vscale x 16 x float> @vfadd_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1709; CHECK-LABEL: vfadd_vv_nxv16f32: 1710; CHECK: # %bb.0: 1711; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1712; CHECK-NEXT: vfadd.vv v8, v8, v16, v0.t 1713; CHECK-NEXT: ret 1714 %v = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %b, <vscale x 16 x i1> %m, i32 %evl) 1715 ret <vscale x 16 x float> %v 1716} 1717 1718define <vscale x 16 x float> @vfadd_vv_nxv16f32_unmasked(<vscale x 16 x float> %va, <vscale x 16 x float> %b, i32 zeroext %evl) { 1719; CHECK-LABEL: vfadd_vv_nxv16f32_unmasked: 1720; CHECK: # %bb.0: 1721; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1722; CHECK-NEXT: vfadd.vv v8, v8, v16 1723; CHECK-NEXT: ret 1724 %v = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %b, <vscale x 16 x i1> splat (i1 true), i32 %evl) 1725 ret <vscale x 16 x float> %v 1726} 1727 1728define <vscale x 16 x float> @vfadd_vf_nxv16f32(<vscale x 16 x float> %va, float %b, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1729; CHECK-LABEL: vfadd_vf_nxv16f32: 1730; CHECK: # %bb.0: 1731; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1732; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t 1733; CHECK-NEXT: ret 1734 %elt.head = insertelement <vscale x 16 x float> poison, float %b, i32 0 1735 %vb = shufflevector <vscale x 16 x float> %elt.head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer 1736 %v = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, <vscale x 16 x i1> %m, i32 %evl) 1737 ret <vscale x 16 x float> %v 1738} 1739 1740define <vscale x 16 x float> @vfadd_vf_nxv16f32_unmasked(<vscale x 16 x float> %va, float %b, i32 zeroext %evl) { 1741; CHECK-LABEL: vfadd_vf_nxv16f32_unmasked: 1742; CHECK: # %bb.0: 1743; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1744; CHECK-NEXT: vfadd.vf v8, v8, fa0 1745; CHECK-NEXT: ret 1746 %elt.head = insertelement <vscale x 16 x float> poison, float %b, i32 0 1747 %vb = shufflevector <vscale x 16 x float> %elt.head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer 1748 %v = call <vscale x 16 x float> @llvm.vp.fadd.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) 1749 ret <vscale x 16 x float> %v 1750} 1751 1752declare <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x i1>, i32) 1753 1754define <vscale x 1 x double> @vfadd_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1755; CHECK-LABEL: vfadd_vv_nxv1f64: 1756; CHECK: # %bb.0: 1757; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1758; CHECK-NEXT: vfadd.vv v8, v8, v9, v0.t 1759; CHECK-NEXT: ret 1760 %v = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %b, <vscale x 1 x i1> %m, i32 %evl) 1761 ret <vscale x 1 x double> %v 1762} 1763 1764define <vscale x 1 x double> @vfadd_vv_nxv1f64_unmasked(<vscale x 1 x double> %va, <vscale x 1 x double> %b, i32 zeroext %evl) { 1765; CHECK-LABEL: vfadd_vv_nxv1f64_unmasked: 1766; CHECK: # %bb.0: 1767; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1768; CHECK-NEXT: vfadd.vv v8, v8, v9 1769; CHECK-NEXT: ret 1770 %v = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %b, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1771 ret <vscale x 1 x double> %v 1772} 1773 1774define <vscale x 1 x double> @vfadd_vf_nxv1f64(<vscale x 1 x double> %va, double %b, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1775; CHECK-LABEL: vfadd_vf_nxv1f64: 1776; CHECK: # %bb.0: 1777; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1778; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t 1779; CHECK-NEXT: ret 1780 %elt.head = insertelement <vscale x 1 x double> poison, double %b, i32 0 1781 %vb = shufflevector <vscale x 1 x double> %elt.head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer 1782 %v = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 %evl) 1783 ret <vscale x 1 x double> %v 1784} 1785 1786define <vscale x 1 x double> @vfadd_vf_nxv1f64_unmasked(<vscale x 1 x double> %va, double %b, i32 zeroext %evl) { 1787; CHECK-LABEL: vfadd_vf_nxv1f64_unmasked: 1788; CHECK: # %bb.0: 1789; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1790; CHECK-NEXT: vfadd.vf v8, v8, fa0 1791; CHECK-NEXT: ret 1792 %elt.head = insertelement <vscale x 1 x double> poison, double %b, i32 0 1793 %vb = shufflevector <vscale x 1 x double> %elt.head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer 1794 %v = call <vscale x 1 x double> @llvm.vp.fadd.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1795 ret <vscale x 1 x double> %v 1796} 1797 1798declare <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, i32) 1799 1800define <vscale x 2 x double> @vfadd_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1801; CHECK-LABEL: vfadd_vv_nxv2f64: 1802; CHECK: # %bb.0: 1803; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1804; CHECK-NEXT: vfadd.vv v8, v8, v10, v0.t 1805; CHECK-NEXT: ret 1806 %v = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %b, <vscale x 2 x i1> %m, i32 %evl) 1807 ret <vscale x 2 x double> %v 1808} 1809 1810define <vscale x 2 x double> @vfadd_vv_nxv2f64_unmasked(<vscale x 2 x double> %va, <vscale x 2 x double> %b, i32 zeroext %evl) { 1811; CHECK-LABEL: vfadd_vv_nxv2f64_unmasked: 1812; CHECK: # %bb.0: 1813; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1814; CHECK-NEXT: vfadd.vv v8, v8, v10 1815; CHECK-NEXT: ret 1816 %v = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %b, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1817 ret <vscale x 2 x double> %v 1818} 1819 1820define <vscale x 2 x double> @vfadd_vf_nxv2f64(<vscale x 2 x double> %va, double %b, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1821; CHECK-LABEL: vfadd_vf_nxv2f64: 1822; CHECK: # %bb.0: 1823; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1824; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t 1825; CHECK-NEXT: ret 1826 %elt.head = insertelement <vscale x 2 x double> poison, double %b, i32 0 1827 %vb = shufflevector <vscale x 2 x double> %elt.head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer 1828 %v = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 %evl) 1829 ret <vscale x 2 x double> %v 1830} 1831 1832define <vscale x 2 x double> @vfadd_vf_nxv2f64_unmasked(<vscale x 2 x double> %va, double %b, i32 zeroext %evl) { 1833; CHECK-LABEL: vfadd_vf_nxv2f64_unmasked: 1834; CHECK: # %bb.0: 1835; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1836; CHECK-NEXT: vfadd.vf v8, v8, fa0 1837; CHECK-NEXT: ret 1838 %elt.head = insertelement <vscale x 2 x double> poison, double %b, i32 0 1839 %vb = shufflevector <vscale x 2 x double> %elt.head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer 1840 %v = call <vscale x 2 x double> @llvm.vp.fadd.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1841 ret <vscale x 2 x double> %v 1842} 1843 1844declare <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x i1>, i32) 1845 1846define <vscale x 4 x double> @vfadd_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1847; CHECK-LABEL: vfadd_vv_nxv4f64: 1848; CHECK: # %bb.0: 1849; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1850; CHECK-NEXT: vfadd.vv v8, v8, v12, v0.t 1851; CHECK-NEXT: ret 1852 %v = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %b, <vscale x 4 x i1> %m, i32 %evl) 1853 ret <vscale x 4 x double> %v 1854} 1855 1856define <vscale x 4 x double> @vfadd_vv_nxv4f64_unmasked(<vscale x 4 x double> %va, <vscale x 4 x double> %b, i32 zeroext %evl) { 1857; CHECK-LABEL: vfadd_vv_nxv4f64_unmasked: 1858; CHECK: # %bb.0: 1859; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1860; CHECK-NEXT: vfadd.vv v8, v8, v12 1861; CHECK-NEXT: ret 1862 %v = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1863 ret <vscale x 4 x double> %v 1864} 1865 1866define <vscale x 4 x double> @vfadd_vf_nxv4f64(<vscale x 4 x double> %va, double %b, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1867; CHECK-LABEL: vfadd_vf_nxv4f64: 1868; CHECK: # %bb.0: 1869; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1870; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t 1871; CHECK-NEXT: ret 1872 %elt.head = insertelement <vscale x 4 x double> poison, double %b, i32 0 1873 %vb = shufflevector <vscale x 4 x double> %elt.head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer 1874 %v = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 %evl) 1875 ret <vscale x 4 x double> %v 1876} 1877 1878define <vscale x 4 x double> @vfadd_vf_nxv4f64_unmasked(<vscale x 4 x double> %va, double %b, i32 zeroext %evl) { 1879; CHECK-LABEL: vfadd_vf_nxv4f64_unmasked: 1880; CHECK: # %bb.0: 1881; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1882; CHECK-NEXT: vfadd.vf v8, v8, fa0 1883; CHECK-NEXT: ret 1884 %elt.head = insertelement <vscale x 4 x double> poison, double %b, i32 0 1885 %vb = shufflevector <vscale x 4 x double> %elt.head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer 1886 %v = call <vscale x 4 x double> @llvm.vp.fadd.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1887 ret <vscale x 4 x double> %v 1888} 1889 1890declare <vscale x 7 x double> @llvm.vp.fadd.nxv7f64(<vscale x 7 x double>, <vscale x 7 x double>, <vscale x 7 x i1>, i32) 1891 1892define <vscale x 7 x double> @vfadd_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x double> %b, <vscale x 7 x i1> %m, i32 zeroext %evl) { 1893; CHECK-LABEL: vfadd_vv_nxv7f64: 1894; CHECK: # %bb.0: 1895; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1896; CHECK-NEXT: vfadd.vv v8, v8, v16, v0.t 1897; CHECK-NEXT: ret 1898 %v = call <vscale x 7 x double> @llvm.vp.fadd.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x double> %b, <vscale x 7 x i1> %m, i32 %evl) 1899 ret <vscale x 7 x double> %v 1900} 1901 1902declare <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x i1>, i32) 1903 1904define <vscale x 8 x double> @vfadd_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1905; CHECK-LABEL: vfadd_vv_nxv8f64: 1906; CHECK: # %bb.0: 1907; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1908; CHECK-NEXT: vfadd.vv v8, v8, v16, v0.t 1909; CHECK-NEXT: ret 1910 %v = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %b, <vscale x 8 x i1> %m, i32 %evl) 1911 ret <vscale x 8 x double> %v 1912} 1913 1914define <vscale x 8 x double> @vfadd_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, <vscale x 8 x double> %b, i32 zeroext %evl) { 1915; CHECK-LABEL: vfadd_vv_nxv8f64_unmasked: 1916; CHECK: # %bb.0: 1917; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1918; CHECK-NEXT: vfadd.vv v8, v8, v16 1919; CHECK-NEXT: ret 1920 %v = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %b, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1921 ret <vscale x 8 x double> %v 1922} 1923 1924define <vscale x 8 x double> @vfadd_vf_nxv8f64(<vscale x 8 x double> %va, double %b, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1925; CHECK-LABEL: vfadd_vf_nxv8f64: 1926; CHECK: # %bb.0: 1927; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1928; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t 1929; CHECK-NEXT: ret 1930 %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 1931 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer 1932 %v = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 %evl) 1933 ret <vscale x 8 x double> %v 1934} 1935 1936define <vscale x 8 x double> @vfadd_vf_nxv8f64_unmasked(<vscale x 8 x double> %va, double %b, i32 zeroext %evl) { 1937; CHECK-LABEL: vfadd_vf_nxv8f64_unmasked: 1938; CHECK: # %bb.0: 1939; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1940; CHECK-NEXT: vfadd.vf v8, v8, fa0 1941; CHECK-NEXT: ret 1942 %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 1943 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer 1944 %v = call <vscale x 8 x double> @llvm.vp.fadd.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1945 ret <vscale x 8 x double> %v 1946} 1947