1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 6 7declare <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32) 8declare <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32) 9declare <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32) 10declare <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32) 11 12define <vscale x 1 x i8> @vmacc_vv_nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 13; CHECK-LABEL: vmacc_vv_nxv1i8: 14; CHECK: # %bb.0: 15; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu 16; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 17; CHECK-NEXT: vmv1r.v v8, v10 18; CHECK-NEXT: ret 19 %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 20 %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 21 %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl) 22 ret <vscale x 1 x i8> %u 23} 24 25define <vscale x 1 x i8> @vmacc_vv_nxv1i8_unmasked(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 26; CHECK-LABEL: vmacc_vv_nxv1i8_unmasked: 27; CHECK: # %bb.0: 28; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma 29; CHECK-NEXT: vmacc.vv v10, v8, v9 30; CHECK-NEXT: vmv1r.v v8, v10 31; CHECK-NEXT: ret 32 %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 33 %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 34 %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl) 35 ret <vscale x 1 x i8> %u 36} 37 38define <vscale x 1 x i8> @vmacc_vx_nxv1i8(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 39; CHECK-LABEL: vmacc_vx_nxv1i8: 40; CHECK: # %bb.0: 41; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu 42; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 43; CHECK-NEXT: vmv1r.v v8, v9 44; CHECK-NEXT: ret 45 %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 46 %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer 47 %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 48 %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 49 %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl) 50 ret <vscale x 1 x i8> %u 51} 52 53define <vscale x 1 x i8> @vmacc_vx_nxv1i8_unmasked(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 54; CHECK-LABEL: vmacc_vx_nxv1i8_unmasked: 55; CHECK: # %bb.0: 56; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma 57; CHECK-NEXT: vmacc.vx v9, a0, v8 58; CHECK-NEXT: vmv1r.v v8, v9 59; CHECK-NEXT: ret 60 %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 61 %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer 62 %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 63 %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 64 %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl) 65 ret <vscale x 1 x i8> %u 66} 67 68define <vscale x 1 x i8> @vmacc_vv_nxv1i8_ta(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 69; CHECK-LABEL: vmacc_vv_nxv1i8_ta: 70; CHECK: # %bb.0: 71; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu 72; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 73; CHECK-NEXT: vmv1r.v v8, v10 74; CHECK-NEXT: ret 75 %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 76 %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 77 %u = call <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl) 78 ret <vscale x 1 x i8> %u 79} 80 81define <vscale x 1 x i8> @vmacc_vx_nxv1i8_ta(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 82; CHECK-LABEL: vmacc_vx_nxv1i8_ta: 83; CHECK: # %bb.0: 84; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu 85; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 86; CHECK-NEXT: vmv1r.v v8, v9 87; CHECK-NEXT: ret 88 %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0 89 %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer 90 %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 91 %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 92 %u = call <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl) 93 ret <vscale x 1 x i8> %u 94} 95 96declare <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32) 97declare <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32) 98declare <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32) 99declare <vscale x 2 x i8> @llvm.vp.select.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32) 100 101define <vscale x 2 x i8> @vmacc_vv_nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i8> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 102; CHECK-LABEL: vmacc_vv_nxv2i8: 103; CHECK: # %bb.0: 104; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu 105; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 106; CHECK-NEXT: vmv1r.v v8, v10 107; CHECK-NEXT: ret 108 %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 109 %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 110 %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl) 111 ret <vscale x 2 x i8> %u 112} 113 114define <vscale x 2 x i8> @vmacc_vv_nxv2i8_unmasked(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i8> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 115; CHECK-LABEL: vmacc_vv_nxv2i8_unmasked: 116; CHECK: # %bb.0: 117; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma 118; CHECK-NEXT: vmacc.vv v10, v8, v9 119; CHECK-NEXT: vmv1r.v v8, v10 120; CHECK-NEXT: ret 121 %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 122 %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 123 %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl) 124 ret <vscale x 2 x i8> %u 125} 126 127define <vscale x 2 x i8> @vmacc_vx_nxv2i8(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 128; CHECK-LABEL: vmacc_vx_nxv2i8: 129; CHECK: # %bb.0: 130; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu 131; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 132; CHECK-NEXT: vmv1r.v v8, v9 133; CHECK-NEXT: ret 134 %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0 135 %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer 136 %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 137 %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 138 %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl) 139 ret <vscale x 2 x i8> %u 140} 141 142define <vscale x 2 x i8> @vmacc_vx_nxv2i8_unmasked(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 143; CHECK-LABEL: vmacc_vx_nxv2i8_unmasked: 144; CHECK: # %bb.0: 145; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma 146; CHECK-NEXT: vmacc.vx v9, a0, v8 147; CHECK-NEXT: vmv1r.v v8, v9 148; CHECK-NEXT: ret 149 %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0 150 %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer 151 %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 152 %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 153 %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl) 154 ret <vscale x 2 x i8> %u 155} 156 157define <vscale x 2 x i8> @vmacc_vv_nxv2i8_ta(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i8> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 158; CHECK-LABEL: vmacc_vv_nxv2i8_ta: 159; CHECK: # %bb.0: 160; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu 161; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 162; CHECK-NEXT: vmv1r.v v8, v10 163; CHECK-NEXT: ret 164 %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 165 %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 166 %u = call <vscale x 2 x i8> @llvm.vp.select.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl) 167 ret <vscale x 2 x i8> %u 168} 169 170define <vscale x 2 x i8> @vmacc_vx_nxv2i8_ta(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 171; CHECK-LABEL: vmacc_vx_nxv2i8_ta: 172; CHECK: # %bb.0: 173; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu 174; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 175; CHECK-NEXT: vmv1r.v v8, v9 176; CHECK-NEXT: ret 177 %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0 178 %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer 179 %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 180 %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 181 %u = call <vscale x 2 x i8> @llvm.vp.select.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl) 182 ret <vscale x 2 x i8> %u 183} 184 185declare <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32) 186declare <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32) 187declare <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32) 188declare <vscale x 4 x i8> @llvm.vp.select.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32) 189 190define <vscale x 4 x i8> @vmacc_vv_nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i8> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 191; CHECK-LABEL: vmacc_vv_nxv4i8: 192; CHECK: # %bb.0: 193; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu 194; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 195; CHECK-NEXT: vmv1r.v v8, v10 196; CHECK-NEXT: ret 197 %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 198 %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 199 %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl) 200 ret <vscale x 4 x i8> %u 201} 202 203define <vscale x 4 x i8> @vmacc_vv_nxv4i8_unmasked(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i8> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 204; CHECK-LABEL: vmacc_vv_nxv4i8_unmasked: 205; CHECK: # %bb.0: 206; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma 207; CHECK-NEXT: vmacc.vv v10, v8, v9 208; CHECK-NEXT: vmv1r.v v8, v10 209; CHECK-NEXT: ret 210 %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 211 %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 212 %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl) 213 ret <vscale x 4 x i8> %u 214} 215 216define <vscale x 4 x i8> @vmacc_vx_nxv4i8(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 217; CHECK-LABEL: vmacc_vx_nxv4i8: 218; CHECK: # %bb.0: 219; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu 220; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 221; CHECK-NEXT: vmv1r.v v8, v9 222; CHECK-NEXT: ret 223 %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0 224 %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer 225 %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 226 %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 227 %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl) 228 ret <vscale x 4 x i8> %u 229} 230 231define <vscale x 4 x i8> @vmacc_vx_nxv4i8_unmasked(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 232; CHECK-LABEL: vmacc_vx_nxv4i8_unmasked: 233; CHECK: # %bb.0: 234; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma 235; CHECK-NEXT: vmacc.vx v9, a0, v8 236; CHECK-NEXT: vmv1r.v v8, v9 237; CHECK-NEXT: ret 238 %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0 239 %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer 240 %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 241 %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 242 %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl) 243 ret <vscale x 4 x i8> %u 244} 245 246define <vscale x 4 x i8> @vmacc_vv_nxv4i8_ta(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i8> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 247; CHECK-LABEL: vmacc_vv_nxv4i8_ta: 248; CHECK: # %bb.0: 249; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu 250; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 251; CHECK-NEXT: vmv1r.v v8, v10 252; CHECK-NEXT: ret 253 %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 254 %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 255 %u = call <vscale x 4 x i8> @llvm.vp.select.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl) 256 ret <vscale x 4 x i8> %u 257} 258 259define <vscale x 4 x i8> @vmacc_vx_nxv4i8_ta(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 260; CHECK-LABEL: vmacc_vx_nxv4i8_ta: 261; CHECK: # %bb.0: 262; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu 263; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 264; CHECK-NEXT: vmv1r.v v8, v9 265; CHECK-NEXT: ret 266 %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0 267 %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer 268 %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 269 %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 270 %u = call <vscale x 4 x i8> @llvm.vp.select.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl) 271 ret <vscale x 4 x i8> %u 272} 273 274declare <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32) 275declare <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32) 276declare <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32) 277declare <vscale x 8 x i8> @llvm.vp.select.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32) 278 279define <vscale x 8 x i8> @vmacc_vv_nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 280; CHECK-LABEL: vmacc_vv_nxv8i8: 281; CHECK: # %bb.0: 282; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu 283; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 284; CHECK-NEXT: vmv1r.v v8, v10 285; CHECK-NEXT: ret 286 %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 287 %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 288 %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl) 289 ret <vscale x 8 x i8> %u 290} 291 292define <vscale x 8 x i8> @vmacc_vv_nxv8i8_unmasked(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 293; CHECK-LABEL: vmacc_vv_nxv8i8_unmasked: 294; CHECK: # %bb.0: 295; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma 296; CHECK-NEXT: vmacc.vv v10, v8, v9 297; CHECK-NEXT: vmv1r.v v8, v10 298; CHECK-NEXT: ret 299 %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 300 %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 301 %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl) 302 ret <vscale x 8 x i8> %u 303} 304 305define <vscale x 8 x i8> @vmacc_vx_nxv8i8(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 306; CHECK-LABEL: vmacc_vx_nxv8i8: 307; CHECK: # %bb.0: 308; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, mu 309; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 310; CHECK-NEXT: vmv1r.v v8, v9 311; CHECK-NEXT: ret 312 %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0 313 %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer 314 %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 315 %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 316 %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl) 317 ret <vscale x 8 x i8> %u 318} 319 320define <vscale x 8 x i8> @vmacc_vx_nxv8i8_unmasked(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 321; CHECK-LABEL: vmacc_vx_nxv8i8_unmasked: 322; CHECK: # %bb.0: 323; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma 324; CHECK-NEXT: vmacc.vx v9, a0, v8 325; CHECK-NEXT: vmv1r.v v8, v9 326; CHECK-NEXT: ret 327 %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0 328 %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer 329 %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 330 %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 331 %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl) 332 ret <vscale x 8 x i8> %u 333} 334 335define <vscale x 8 x i8> @vmacc_vv_nxv8i8_ta(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 336; CHECK-LABEL: vmacc_vv_nxv8i8_ta: 337; CHECK: # %bb.0: 338; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu 339; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 340; CHECK-NEXT: vmv.v.v v8, v10 341; CHECK-NEXT: ret 342 %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 343 %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 344 %u = call <vscale x 8 x i8> @llvm.vp.select.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl) 345 ret <vscale x 8 x i8> %u 346} 347 348define <vscale x 8 x i8> @vmacc_vx_nxv8i8_ta(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 349; CHECK-LABEL: vmacc_vx_nxv8i8_ta: 350; CHECK: # %bb.0: 351; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu 352; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 353; CHECK-NEXT: vmv.v.v v8, v9 354; CHECK-NEXT: ret 355 %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0 356 %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer 357 %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 358 %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 359 %u = call <vscale x 8 x i8> @llvm.vp.select.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl) 360 ret <vscale x 8 x i8> %u 361} 362 363declare <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32) 364declare <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32) 365declare <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32) 366declare <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32) 367 368define <vscale x 16 x i8> @vmacc_vv_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 369; CHECK-LABEL: vmacc_vv_nxv16i8: 370; CHECK: # %bb.0: 371; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu 372; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t 373; CHECK-NEXT: vmv2r.v v8, v12 374; CHECK-NEXT: ret 375 %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 376 %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 377 %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl) 378 ret <vscale x 16 x i8> %u 379} 380 381define <vscale x 16 x i8> @vmacc_vv_nxv16i8_unmasked(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 382; CHECK-LABEL: vmacc_vv_nxv16i8_unmasked: 383; CHECK: # %bb.0: 384; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, ma 385; CHECK-NEXT: vmacc.vv v12, v8, v10 386; CHECK-NEXT: vmv2r.v v8, v12 387; CHECK-NEXT: ret 388 %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 389 %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 390 %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl) 391 ret <vscale x 16 x i8> %u 392} 393 394define <vscale x 16 x i8> @vmacc_vx_nxv16i8(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 395; CHECK-LABEL: vmacc_vx_nxv16i8: 396; CHECK: # %bb.0: 397; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, mu 398; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t 399; CHECK-NEXT: vmv2r.v v8, v10 400; CHECK-NEXT: ret 401 %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0 402 %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer 403 %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 404 %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 405 %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl) 406 ret <vscale x 16 x i8> %u 407} 408 409define <vscale x 16 x i8> @vmacc_vx_nxv16i8_unmasked(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 410; CHECK-LABEL: vmacc_vx_nxv16i8_unmasked: 411; CHECK: # %bb.0: 412; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, ma 413; CHECK-NEXT: vmacc.vx v10, a0, v8 414; CHECK-NEXT: vmv2r.v v8, v10 415; CHECK-NEXT: ret 416 %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0 417 %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer 418 %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 419 %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 420 %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl) 421 ret <vscale x 16 x i8> %u 422} 423 424define <vscale x 16 x i8> @vmacc_vv_nxv16i8_ta(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 425; CHECK-LABEL: vmacc_vv_nxv16i8_ta: 426; CHECK: # %bb.0: 427; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu 428; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t 429; CHECK-NEXT: vmv.v.v v8, v12 430; CHECK-NEXT: ret 431 %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 432 %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 433 %u = call <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl) 434 ret <vscale x 16 x i8> %u 435} 436 437define <vscale x 16 x i8> @vmacc_vx_nxv16i8_ta(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 438; CHECK-LABEL: vmacc_vx_nxv16i8_ta: 439; CHECK: # %bb.0: 440; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu 441; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t 442; CHECK-NEXT: vmv.v.v v8, v10 443; CHECK-NEXT: ret 444 %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0 445 %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer 446 %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 447 %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 448 %u = call <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl) 449 ret <vscale x 16 x i8> %u 450} 451 452declare <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32) 453declare <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32) 454declare <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1>, <vscale x 32 x i8>, <vscale x 32 x i8>, i32) 455declare <vscale x 32 x i8> @llvm.vp.select.nxv32i8(<vscale x 32 x i1>, <vscale x 32 x i8>, <vscale x 32 x i8>, i32) 456 457define <vscale x 32 x i8> @vmacc_vv_nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) { 458; CHECK-LABEL: vmacc_vv_nxv32i8: 459; CHECK: # %bb.0: 460; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu 461; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t 462; CHECK-NEXT: vmv4r.v v8, v16 463; CHECK-NEXT: ret 464 %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 465 %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 466 %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl) 467 ret <vscale x 32 x i8> %u 468} 469 470define <vscale x 32 x i8> @vmacc_vv_nxv32i8_unmasked(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) { 471; CHECK-LABEL: vmacc_vv_nxv32i8_unmasked: 472; CHECK: # %bb.0: 473; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma 474; CHECK-NEXT: vmacc.vv v16, v8, v12 475; CHECK-NEXT: vmv4r.v v8, v16 476; CHECK-NEXT: ret 477 %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 478 %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 479 %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> splat (i1 -1), <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl) 480 ret <vscale x 32 x i8> %u 481} 482 483define <vscale x 32 x i8> @vmacc_vx_nxv32i8(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) { 484; CHECK-LABEL: vmacc_vx_nxv32i8: 485; CHECK: # %bb.0: 486; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, mu 487; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t 488; CHECK-NEXT: vmv4r.v v8, v12 489; CHECK-NEXT: ret 490 %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0 491 %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer 492 %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 493 %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 494 %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl) 495 ret <vscale x 32 x i8> %u 496} 497 498define <vscale x 32 x i8> @vmacc_vx_nxv32i8_unmasked(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) { 499; CHECK-LABEL: vmacc_vx_nxv32i8_unmasked: 500; CHECK: # %bb.0: 501; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, ma 502; CHECK-NEXT: vmacc.vx v12, a0, v8 503; CHECK-NEXT: vmv4r.v v8, v12 504; CHECK-NEXT: ret 505 %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0 506 %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer 507 %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 508 %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 509 %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> splat (i1 -1), <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl) 510 ret <vscale x 32 x i8> %u 511} 512 513define <vscale x 32 x i8> @vmacc_vv_nxv32i8_ta(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) { 514; CHECK-LABEL: vmacc_vv_nxv32i8_ta: 515; CHECK: # %bb.0: 516; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu 517; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t 518; CHECK-NEXT: vmv.v.v v8, v16 519; CHECK-NEXT: ret 520 %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 521 %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 522 %u = call <vscale x 32 x i8> @llvm.vp.select.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl) 523 ret <vscale x 32 x i8> %u 524} 525 526define <vscale x 32 x i8> @vmacc_vx_nxv32i8_ta(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) { 527; CHECK-LABEL: vmacc_vx_nxv32i8_ta: 528; CHECK: # %bb.0: 529; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu 530; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t 531; CHECK-NEXT: vmv.v.v v8, v12 532; CHECK-NEXT: ret 533 %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0 534 %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer 535 %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 536 %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 537 %u = call <vscale x 32 x i8> @llvm.vp.select.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl) 538 ret <vscale x 32 x i8> %u 539} 540 541declare <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32) 542declare <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32) 543declare <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1>, <vscale x 64 x i8>, <vscale x 64 x i8>, i32) 544declare <vscale x 64 x i8> @llvm.vp.select.nxv64i8(<vscale x 64 x i1>, <vscale x 64 x i8>, <vscale x 64 x i8>, i32) 545 546define <vscale x 64 x i8> @vmacc_vv_nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i8> %c, <vscale x 64 x i1> %m, i32 zeroext %evl) { 547; CHECK-LABEL: vmacc_vv_nxv64i8: 548; CHECK: # %bb.0: 549; CHECK-NEXT: vl8r.v v24, (a0) 550; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu 551; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t 552; CHECK-NEXT: vmv8r.v v8, v24 553; CHECK-NEXT: ret 554 %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i1> splat (i1 -1), i32 %evl) 555 %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl) 556 %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl) 557 ret <vscale x 64 x i8> %u 558} 559 560define <vscale x 64 x i8> @vmacc_vv_nxv64i8_unmasked(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i8> %c, <vscale x 64 x i1> %m, i32 zeroext %evl) { 561; CHECK-LABEL: vmacc_vv_nxv64i8_unmasked: 562; CHECK: # %bb.0: 563; CHECK-NEXT: vl8r.v v24, (a0) 564; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma 565; CHECK-NEXT: vmacc.vv v24, v8, v16 566; CHECK-NEXT: vmv8r.v v8, v24 567; CHECK-NEXT: ret 568 %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i1> splat (i1 -1), i32 %evl) 569 %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl) 570 %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> splat (i1 -1), <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl) 571 ret <vscale x 64 x i8> %u 572} 573 574define <vscale x 64 x i8> @vmacc_vx_nxv64i8(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c, <vscale x 64 x i1> %m, i32 zeroext %evl) { 575; CHECK-LABEL: vmacc_vx_nxv64i8: 576; CHECK: # %bb.0: 577; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu 578; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t 579; CHECK-NEXT: vmv8r.v v8, v16 580; CHECK-NEXT: ret 581 %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0 582 %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer 583 %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %vb, <vscale x 64 x i1> splat (i1 -1), i32 %evl) 584 %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl) 585 %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl) 586 ret <vscale x 64 x i8> %u 587} 588 589define <vscale x 64 x i8> @vmacc_vx_nxv64i8_unmasked(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c, <vscale x 64 x i1> %m, i32 zeroext %evl) { 590; CHECK-LABEL: vmacc_vx_nxv64i8_unmasked: 591; CHECK: # %bb.0: 592; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma 593; CHECK-NEXT: vmacc.vx v16, a0, v8 594; CHECK-NEXT: vmv8r.v v8, v16 595; CHECK-NEXT: ret 596 %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0 597 %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer 598 %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %vb, <vscale x 64 x i1> splat (i1 -1), i32 %evl) 599 %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl) 600 %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> splat (i1 -1), <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl) 601 ret <vscale x 64 x i8> %u 602} 603 604define <vscale x 64 x i8> @vmacc_vv_nxv64i8_ta(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i8> %c, <vscale x 64 x i1> %m, i32 zeroext %evl) { 605; CHECK-LABEL: vmacc_vv_nxv64i8_ta: 606; CHECK: # %bb.0: 607; CHECK-NEXT: vl8r.v v24, (a0) 608; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu 609; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t 610; CHECK-NEXT: vmv.v.v v8, v24 611; CHECK-NEXT: ret 612 %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i1> splat (i1 -1), i32 %evl) 613 %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl) 614 %u = call <vscale x 64 x i8> @llvm.vp.select.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl) 615 ret <vscale x 64 x i8> %u 616} 617 618define <vscale x 64 x i8> @vmacc_vx_nxv64i8_ta(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c, <vscale x 64 x i1> %m, i32 zeroext %evl) { 619; CHECK-LABEL: vmacc_vx_nxv64i8_ta: 620; CHECK: # %bb.0: 621; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu 622; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t 623; CHECK-NEXT: vmv.v.v v8, v16 624; CHECK-NEXT: ret 625 %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0 626 %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer 627 %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %vb, <vscale x 64 x i1> splat (i1 -1), i32 %evl) 628 %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl) 629 %u = call <vscale x 64 x i8> @llvm.vp.select.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl) 630 ret <vscale x 64 x i8> %u 631} 632 633declare <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32) 634declare <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32) 635declare <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1>, <vscale x 1 x i16>, <vscale x 1 x i16>, i32) 636declare <vscale x 1 x i16> @llvm.vp.select.nxv1i16(<vscale x 1 x i1>, <vscale x 1 x i16>, <vscale x 1 x i16>, i32) 637 638define <vscale x 1 x i16> @vmacc_vv_nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i16> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 639; CHECK-LABEL: vmacc_vv_nxv1i16: 640; CHECK: # %bb.0: 641; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu 642; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 643; CHECK-NEXT: vmv1r.v v8, v10 644; CHECK-NEXT: ret 645 %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 646 %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 647 %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl) 648 ret <vscale x 1 x i16> %u 649} 650 651define <vscale x 1 x i16> @vmacc_vv_nxv1i16_unmasked(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i16> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 652; CHECK-LABEL: vmacc_vv_nxv1i16_unmasked: 653; CHECK: # %bb.0: 654; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma 655; CHECK-NEXT: vmacc.vv v10, v8, v9 656; CHECK-NEXT: vmv1r.v v8, v10 657; CHECK-NEXT: ret 658 %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 659 %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 660 %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl) 661 ret <vscale x 1 x i16> %u 662} 663 664define <vscale x 1 x i16> @vmacc_vx_nxv1i16(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 665; CHECK-LABEL: vmacc_vx_nxv1i16: 666; CHECK: # %bb.0: 667; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu 668; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 669; CHECK-NEXT: vmv1r.v v8, v9 670; CHECK-NEXT: ret 671 %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0 672 %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer 673 %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 674 %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 675 %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl) 676 ret <vscale x 1 x i16> %u 677} 678 679define <vscale x 1 x i16> @vmacc_vx_nxv1i16_unmasked(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 680; CHECK-LABEL: vmacc_vx_nxv1i16_unmasked: 681; CHECK: # %bb.0: 682; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma 683; CHECK-NEXT: vmacc.vx v9, a0, v8 684; CHECK-NEXT: vmv1r.v v8, v9 685; CHECK-NEXT: ret 686 %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0 687 %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer 688 %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 689 %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 690 %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl) 691 ret <vscale x 1 x i16> %u 692} 693 694define <vscale x 1 x i16> @vmacc_vv_nxv1i16_ta(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i16> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 695; CHECK-LABEL: vmacc_vv_nxv1i16_ta: 696; CHECK: # %bb.0: 697; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu 698; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 699; CHECK-NEXT: vmv1r.v v8, v10 700; CHECK-NEXT: ret 701 %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 702 %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 703 %u = call <vscale x 1 x i16> @llvm.vp.select.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl) 704 ret <vscale x 1 x i16> %u 705} 706 707define <vscale x 1 x i16> @vmacc_vx_nxv1i16_ta(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 708; CHECK-LABEL: vmacc_vx_nxv1i16_ta: 709; CHECK: # %bb.0: 710; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu 711; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 712; CHECK-NEXT: vmv1r.v v8, v9 713; CHECK-NEXT: ret 714 %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0 715 %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer 716 %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 717 %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 718 %u = call <vscale x 1 x i16> @llvm.vp.select.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl) 719 ret <vscale x 1 x i16> %u 720} 721 722declare <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32) 723declare <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32) 724declare <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32) 725declare <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32) 726 727define <vscale x 2 x i16> @vmacc_vv_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 728; CHECK-LABEL: vmacc_vv_nxv2i16: 729; CHECK: # %bb.0: 730; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu 731; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 732; CHECK-NEXT: vmv1r.v v8, v10 733; CHECK-NEXT: ret 734 %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 735 %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 736 %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl) 737 ret <vscale x 2 x i16> %u 738} 739 740define <vscale x 2 x i16> @vmacc_vv_nxv2i16_unmasked(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 741; CHECK-LABEL: vmacc_vv_nxv2i16_unmasked: 742; CHECK: # %bb.0: 743; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma 744; CHECK-NEXT: vmacc.vv v10, v8, v9 745; CHECK-NEXT: vmv1r.v v8, v10 746; CHECK-NEXT: ret 747 %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 748 %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 749 %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl) 750 ret <vscale x 2 x i16> %u 751} 752 753define <vscale x 2 x i16> @vmacc_vx_nxv2i16(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 754; CHECK-LABEL: vmacc_vx_nxv2i16: 755; CHECK: # %bb.0: 756; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu 757; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 758; CHECK-NEXT: vmv1r.v v8, v9 759; CHECK-NEXT: ret 760 %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 761 %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer 762 %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 763 %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 764 %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl) 765 ret <vscale x 2 x i16> %u 766} 767 768define <vscale x 2 x i16> @vmacc_vx_nxv2i16_unmasked(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 769; CHECK-LABEL: vmacc_vx_nxv2i16_unmasked: 770; CHECK: # %bb.0: 771; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma 772; CHECK-NEXT: vmacc.vx v9, a0, v8 773; CHECK-NEXT: vmv1r.v v8, v9 774; CHECK-NEXT: ret 775 %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 776 %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer 777 %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 778 %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 779 %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl) 780 ret <vscale x 2 x i16> %u 781} 782 783define <vscale x 2 x i16> @vmacc_vv_nxv2i16_ta(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 784; CHECK-LABEL: vmacc_vv_nxv2i16_ta: 785; CHECK: # %bb.0: 786; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu 787; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 788; CHECK-NEXT: vmv1r.v v8, v10 789; CHECK-NEXT: ret 790 %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 791 %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 792 %u = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl) 793 ret <vscale x 2 x i16> %u 794} 795 796define <vscale x 2 x i16> @vmacc_vx_nxv2i16_ta(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 797; CHECK-LABEL: vmacc_vx_nxv2i16_ta: 798; CHECK: # %bb.0: 799; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu 800; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 801; CHECK-NEXT: vmv1r.v v8, v9 802; CHECK-NEXT: ret 803 %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 804 %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer 805 %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 806 %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 807 %u = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl) 808 ret <vscale x 2 x i16> %u 809} 810 811declare <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32) 812declare <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32) 813declare <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1>, <vscale x 4 x i16>, <vscale x 4 x i16>, i32) 814declare <vscale x 4 x i16> @llvm.vp.select.nxv4i16(<vscale x 4 x i1>, <vscale x 4 x i16>, <vscale x 4 x i16>, i32) 815 816define <vscale x 4 x i16> @vmacc_vv_nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 817; CHECK-LABEL: vmacc_vv_nxv4i16: 818; CHECK: # %bb.0: 819; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu 820; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 821; CHECK-NEXT: vmv1r.v v8, v10 822; CHECK-NEXT: ret 823 %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 824 %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 825 %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl) 826 ret <vscale x 4 x i16> %u 827} 828 829define <vscale x 4 x i16> @vmacc_vv_nxv4i16_unmasked(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 830; CHECK-LABEL: vmacc_vv_nxv4i16_unmasked: 831; CHECK: # %bb.0: 832; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma 833; CHECK-NEXT: vmacc.vv v10, v8, v9 834; CHECK-NEXT: vmv1r.v v8, v10 835; CHECK-NEXT: ret 836 %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 837 %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 838 %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl) 839 ret <vscale x 4 x i16> %u 840} 841 842define <vscale x 4 x i16> @vmacc_vx_nxv4i16(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 843; CHECK-LABEL: vmacc_vx_nxv4i16: 844; CHECK: # %bb.0: 845; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu 846; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 847; CHECK-NEXT: vmv1r.v v8, v9 848; CHECK-NEXT: ret 849 %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0 850 %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer 851 %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 852 %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 853 %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl) 854 ret <vscale x 4 x i16> %u 855} 856 857define <vscale x 4 x i16> @vmacc_vx_nxv4i16_unmasked(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 858; CHECK-LABEL: vmacc_vx_nxv4i16_unmasked: 859; CHECK: # %bb.0: 860; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma 861; CHECK-NEXT: vmacc.vx v9, a0, v8 862; CHECK-NEXT: vmv1r.v v8, v9 863; CHECK-NEXT: ret 864 %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0 865 %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer 866 %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 867 %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 868 %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl) 869 ret <vscale x 4 x i16> %u 870} 871 872define <vscale x 4 x i16> @vmacc_vv_nxv4i16_ta(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 873; CHECK-LABEL: vmacc_vv_nxv4i16_ta: 874; CHECK: # %bb.0: 875; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu 876; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 877; CHECK-NEXT: vmv.v.v v8, v10 878; CHECK-NEXT: ret 879 %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 880 %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 881 %u = call <vscale x 4 x i16> @llvm.vp.select.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl) 882 ret <vscale x 4 x i16> %u 883} 884 885define <vscale x 4 x i16> @vmacc_vx_nxv4i16_ta(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 886; CHECK-LABEL: vmacc_vx_nxv4i16_ta: 887; CHECK: # %bb.0: 888; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu 889; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 890; CHECK-NEXT: vmv.v.v v8, v9 891; CHECK-NEXT: ret 892 %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0 893 %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer 894 %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 895 %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 896 %u = call <vscale x 4 x i16> @llvm.vp.select.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl) 897 ret <vscale x 4 x i16> %u 898} 899 900declare <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32) 901declare <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32) 902declare <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32) 903declare <vscale x 8 x i16> @llvm.vp.select.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32) 904 905define <vscale x 8 x i16> @vmacc_vv_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 906; CHECK-LABEL: vmacc_vv_nxv8i16: 907; CHECK: # %bb.0: 908; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu 909; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t 910; CHECK-NEXT: vmv2r.v v8, v12 911; CHECK-NEXT: ret 912 %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 913 %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 914 %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl) 915 ret <vscale x 8 x i16> %u 916} 917 918define <vscale x 8 x i16> @vmacc_vv_nxv8i16_unmasked(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 919; CHECK-LABEL: vmacc_vv_nxv8i16_unmasked: 920; CHECK: # %bb.0: 921; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma 922; CHECK-NEXT: vmacc.vv v12, v8, v10 923; CHECK-NEXT: vmv2r.v v8, v12 924; CHECK-NEXT: ret 925 %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 926 %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 927 %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl) 928 ret <vscale x 8 x i16> %u 929} 930 931define <vscale x 8 x i16> @vmacc_vx_nxv8i16(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 932; CHECK-LABEL: vmacc_vx_nxv8i16: 933; CHECK: # %bb.0: 934; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu 935; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t 936; CHECK-NEXT: vmv2r.v v8, v10 937; CHECK-NEXT: ret 938 %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0 939 %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer 940 %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 941 %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 942 %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl) 943 ret <vscale x 8 x i16> %u 944} 945 946define <vscale x 8 x i16> @vmacc_vx_nxv8i16_unmasked(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 947; CHECK-LABEL: vmacc_vx_nxv8i16_unmasked: 948; CHECK: # %bb.0: 949; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma 950; CHECK-NEXT: vmacc.vx v10, a0, v8 951; CHECK-NEXT: vmv2r.v v8, v10 952; CHECK-NEXT: ret 953 %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0 954 %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer 955 %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 956 %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 957 %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl) 958 ret <vscale x 8 x i16> %u 959} 960 961define <vscale x 8 x i16> @vmacc_vv_nxv8i16_ta(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 962; CHECK-LABEL: vmacc_vv_nxv8i16_ta: 963; CHECK: # %bb.0: 964; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu 965; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t 966; CHECK-NEXT: vmv.v.v v8, v12 967; CHECK-NEXT: ret 968 %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 969 %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 970 %u = call <vscale x 8 x i16> @llvm.vp.select.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl) 971 ret <vscale x 8 x i16> %u 972} 973 974define <vscale x 8 x i16> @vmacc_vx_nxv8i16_ta(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 975; CHECK-LABEL: vmacc_vx_nxv8i16_ta: 976; CHECK: # %bb.0: 977; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu 978; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t 979; CHECK-NEXT: vmv.v.v v8, v10 980; CHECK-NEXT: ret 981 %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0 982 %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer 983 %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 984 %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 985 %u = call <vscale x 8 x i16> @llvm.vp.select.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl) 986 ret <vscale x 8 x i16> %u 987} 988 989declare <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32) 990declare <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32) 991declare <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1>, <vscale x 16 x i16>, <vscale x 16 x i16>, i32) 992declare <vscale x 16 x i16> @llvm.vp.select.nxv16i16(<vscale x 16 x i1>, <vscale x 16 x i16>, <vscale x 16 x i16>, i32) 993 994define <vscale x 16 x i16> @vmacc_vv_nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 995; CHECK-LABEL: vmacc_vv_nxv16i16: 996; CHECK: # %bb.0: 997; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu 998; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t 999; CHECK-NEXT: vmv4r.v v8, v16 1000; CHECK-NEXT: ret 1001 %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1002 %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1003 %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl) 1004 ret <vscale x 16 x i16> %u 1005} 1006 1007define <vscale x 16 x i16> @vmacc_vv_nxv16i16_unmasked(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1008; CHECK-LABEL: vmacc_vv_nxv16i16_unmasked: 1009; CHECK: # %bb.0: 1010; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma 1011; CHECK-NEXT: vmacc.vv v16, v8, v12 1012; CHECK-NEXT: vmv4r.v v8, v16 1013; CHECK-NEXT: ret 1014 %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1015 %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1016 %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl) 1017 ret <vscale x 16 x i16> %u 1018} 1019 1020define <vscale x 16 x i16> @vmacc_vx_nxv16i16(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1021; CHECK-LABEL: vmacc_vx_nxv16i16: 1022; CHECK: # %bb.0: 1023; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu 1024; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t 1025; CHECK-NEXT: vmv4r.v v8, v12 1026; CHECK-NEXT: ret 1027 %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0 1028 %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer 1029 %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1030 %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1031 %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl) 1032 ret <vscale x 16 x i16> %u 1033} 1034 1035define <vscale x 16 x i16> @vmacc_vx_nxv16i16_unmasked(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1036; CHECK-LABEL: vmacc_vx_nxv16i16_unmasked: 1037; CHECK: # %bb.0: 1038; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma 1039; CHECK-NEXT: vmacc.vx v12, a0, v8 1040; CHECK-NEXT: vmv4r.v v8, v12 1041; CHECK-NEXT: ret 1042 %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0 1043 %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer 1044 %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1045 %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1046 %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl) 1047 ret <vscale x 16 x i16> %u 1048} 1049 1050define <vscale x 16 x i16> @vmacc_vv_nxv16i16_ta(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1051; CHECK-LABEL: vmacc_vv_nxv16i16_ta: 1052; CHECK: # %bb.0: 1053; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu 1054; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t 1055; CHECK-NEXT: vmv.v.v v8, v16 1056; CHECK-NEXT: ret 1057 %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1058 %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1059 %u = call <vscale x 16 x i16> @llvm.vp.select.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl) 1060 ret <vscale x 16 x i16> %u 1061} 1062 1063define <vscale x 16 x i16> @vmacc_vx_nxv16i16_ta(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1064; CHECK-LABEL: vmacc_vx_nxv16i16_ta: 1065; CHECK: # %bb.0: 1066; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu 1067; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t 1068; CHECK-NEXT: vmv.v.v v8, v12 1069; CHECK-NEXT: ret 1070 %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0 1071 %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer 1072 %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1073 %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1074 %u = call <vscale x 16 x i16> @llvm.vp.select.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl) 1075 ret <vscale x 16 x i16> %u 1076} 1077 1078declare <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32) 1079declare <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32) 1080declare <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1>, <vscale x 32 x i16>, <vscale x 32 x i16>, i32) 1081declare <vscale x 32 x i16> @llvm.vp.select.nxv32i16(<vscale x 32 x i1>, <vscale x 32 x i16>, <vscale x 32 x i16>, i32) 1082 1083define <vscale x 32 x i16> @vmacc_vv_nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) { 1084; CHECK-LABEL: vmacc_vv_nxv32i16: 1085; CHECK: # %bb.0: 1086; CHECK-NEXT: vl8re16.v v24, (a0) 1087; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu 1088; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t 1089; CHECK-NEXT: vmv8r.v v8, v24 1090; CHECK-NEXT: ret 1091 %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 1092 %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 1093 %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl) 1094 ret <vscale x 32 x i16> %u 1095} 1096 1097define <vscale x 32 x i16> @vmacc_vv_nxv32i16_unmasked(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) { 1098; CHECK-LABEL: vmacc_vv_nxv32i16_unmasked: 1099; CHECK: # %bb.0: 1100; CHECK-NEXT: vl8re16.v v24, (a0) 1101; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma 1102; CHECK-NEXT: vmacc.vv v24, v8, v16 1103; CHECK-NEXT: vmv8r.v v8, v24 1104; CHECK-NEXT: ret 1105 %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 1106 %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 1107 %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> splat (i1 -1), <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl) 1108 ret <vscale x 32 x i16> %u 1109} 1110 1111define <vscale x 32 x i16> @vmacc_vx_nxv32i16(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) { 1112; CHECK-LABEL: vmacc_vx_nxv32i16: 1113; CHECK: # %bb.0: 1114; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu 1115; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t 1116; CHECK-NEXT: vmv8r.v v8, v16 1117; CHECK-NEXT: ret 1118 %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0 1119 %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer 1120 %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 1121 %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 1122 %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl) 1123 ret <vscale x 32 x i16> %u 1124} 1125 1126define <vscale x 32 x i16> @vmacc_vx_nxv32i16_unmasked(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) { 1127; CHECK-LABEL: vmacc_vx_nxv32i16_unmasked: 1128; CHECK: # %bb.0: 1129; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma 1130; CHECK-NEXT: vmacc.vx v16, a0, v8 1131; CHECK-NEXT: vmv8r.v v8, v16 1132; CHECK-NEXT: ret 1133 %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0 1134 %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer 1135 %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 1136 %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 1137 %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> splat (i1 -1), <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl) 1138 ret <vscale x 32 x i16> %u 1139} 1140 1141define <vscale x 32 x i16> @vmacc_vv_nxv32i16_ta(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) { 1142; CHECK-LABEL: vmacc_vv_nxv32i16_ta: 1143; CHECK: # %bb.0: 1144; CHECK-NEXT: vl8re16.v v24, (a0) 1145; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu 1146; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t 1147; CHECK-NEXT: vmv.v.v v8, v24 1148; CHECK-NEXT: ret 1149 %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 1150 %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 1151 %u = call <vscale x 32 x i16> @llvm.vp.select.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl) 1152 ret <vscale x 32 x i16> %u 1153} 1154 1155define <vscale x 32 x i16> @vmacc_vx_nxv32i16_ta(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) { 1156; CHECK-LABEL: vmacc_vx_nxv32i16_ta: 1157; CHECK: # %bb.0: 1158; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu 1159; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t 1160; CHECK-NEXT: vmv.v.v v8, v16 1161; CHECK-NEXT: ret 1162 %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0 1163 %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer 1164 %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 1165 %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl) 1166 %u = call <vscale x 32 x i16> @llvm.vp.select.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl) 1167 ret <vscale x 32 x i16> %u 1168} 1169 1170declare <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32) 1171declare <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32) 1172declare <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1>, <vscale x 1 x i32>, <vscale x 1 x i32>, i32) 1173declare <vscale x 1 x i32> @llvm.vp.select.nxv1i32(<vscale x 1 x i1>, <vscale x 1 x i32>, <vscale x 1 x i32>, i32) 1174 1175define <vscale x 1 x i32> @vmacc_vv_nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1176; CHECK-LABEL: vmacc_vv_nxv1i32: 1177; CHECK: # %bb.0: 1178; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu 1179; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 1180; CHECK-NEXT: vmv1r.v v8, v10 1181; CHECK-NEXT: ret 1182 %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1183 %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1184 %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl) 1185 ret <vscale x 1 x i32> %u 1186} 1187 1188define <vscale x 1 x i32> @vmacc_vv_nxv1i32_unmasked(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1189; CHECK-LABEL: vmacc_vv_nxv1i32_unmasked: 1190; CHECK: # %bb.0: 1191; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma 1192; CHECK-NEXT: vmacc.vv v10, v8, v9 1193; CHECK-NEXT: vmv1r.v v8, v10 1194; CHECK-NEXT: ret 1195 %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1196 %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1197 %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl) 1198 ret <vscale x 1 x i32> %u 1199} 1200 1201define <vscale x 1 x i32> @vmacc_vx_nxv1i32(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1202; CHECK-LABEL: vmacc_vx_nxv1i32: 1203; CHECK: # %bb.0: 1204; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu 1205; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 1206; CHECK-NEXT: vmv1r.v v8, v9 1207; CHECK-NEXT: ret 1208 %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0 1209 %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer 1210 %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1211 %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1212 %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl) 1213 ret <vscale x 1 x i32> %u 1214} 1215 1216define <vscale x 1 x i32> @vmacc_vx_nxv1i32_unmasked(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1217; CHECK-LABEL: vmacc_vx_nxv1i32_unmasked: 1218; CHECK: # %bb.0: 1219; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma 1220; CHECK-NEXT: vmacc.vx v9, a0, v8 1221; CHECK-NEXT: vmv1r.v v8, v9 1222; CHECK-NEXT: ret 1223 %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0 1224 %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer 1225 %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1226 %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1227 %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl) 1228 ret <vscale x 1 x i32> %u 1229} 1230 1231define <vscale x 1 x i32> @vmacc_vv_nxv1i32_ta(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1232; CHECK-LABEL: vmacc_vv_nxv1i32_ta: 1233; CHECK: # %bb.0: 1234; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu 1235; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 1236; CHECK-NEXT: vmv1r.v v8, v10 1237; CHECK-NEXT: ret 1238 %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1239 %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1240 %u = call <vscale x 1 x i32> @llvm.vp.select.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl) 1241 ret <vscale x 1 x i32> %u 1242} 1243 1244define <vscale x 1 x i32> @vmacc_vx_nxv1i32_ta(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1245; CHECK-LABEL: vmacc_vx_nxv1i32_ta: 1246; CHECK: # %bb.0: 1247; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu 1248; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 1249; CHECK-NEXT: vmv1r.v v8, v9 1250; CHECK-NEXT: ret 1251 %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0 1252 %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer 1253 %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1254 %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1255 %u = call <vscale x 1 x i32> @llvm.vp.select.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl) 1256 ret <vscale x 1 x i32> %u 1257} 1258 1259declare <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32) 1260declare <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32) 1261declare <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32) 1262declare <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32) 1263 1264define <vscale x 2 x i32> @vmacc_vv_nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1265; CHECK-LABEL: vmacc_vv_nxv2i32: 1266; CHECK: # %bb.0: 1267; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu 1268; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 1269; CHECK-NEXT: vmv1r.v v8, v10 1270; CHECK-NEXT: ret 1271 %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1272 %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1273 %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl) 1274 ret <vscale x 2 x i32> %u 1275} 1276 1277define <vscale x 2 x i32> @vmacc_vv_nxv2i32_unmasked(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1278; CHECK-LABEL: vmacc_vv_nxv2i32_unmasked: 1279; CHECK: # %bb.0: 1280; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma 1281; CHECK-NEXT: vmacc.vv v10, v8, v9 1282; CHECK-NEXT: vmv1r.v v8, v10 1283; CHECK-NEXT: ret 1284 %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1285 %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1286 %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl) 1287 ret <vscale x 2 x i32> %u 1288} 1289 1290define <vscale x 2 x i32> @vmacc_vx_nxv2i32(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1291; CHECK-LABEL: vmacc_vx_nxv2i32: 1292; CHECK: # %bb.0: 1293; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu 1294; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 1295; CHECK-NEXT: vmv1r.v v8, v9 1296; CHECK-NEXT: ret 1297 %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0 1298 %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer 1299 %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1300 %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1301 %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl) 1302 ret <vscale x 2 x i32> %u 1303} 1304 1305define <vscale x 2 x i32> @vmacc_vx_nxv2i32_unmasked(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1306; CHECK-LABEL: vmacc_vx_nxv2i32_unmasked: 1307; CHECK: # %bb.0: 1308; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma 1309; CHECK-NEXT: vmacc.vx v9, a0, v8 1310; CHECK-NEXT: vmv1r.v v8, v9 1311; CHECK-NEXT: ret 1312 %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0 1313 %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer 1314 %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1315 %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1316 %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl) 1317 ret <vscale x 2 x i32> %u 1318} 1319 1320define <vscale x 2 x i32> @vmacc_vv_nxv2i32_ta(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1321; CHECK-LABEL: vmacc_vv_nxv2i32_ta: 1322; CHECK: # %bb.0: 1323; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu 1324; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 1325; CHECK-NEXT: vmv.v.v v8, v10 1326; CHECK-NEXT: ret 1327 %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1328 %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1329 %u = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl) 1330 ret <vscale x 2 x i32> %u 1331} 1332 1333define <vscale x 2 x i32> @vmacc_vx_nxv2i32_ta(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1334; CHECK-LABEL: vmacc_vx_nxv2i32_ta: 1335; CHECK: # %bb.0: 1336; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu 1337; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t 1338; CHECK-NEXT: vmv.v.v v8, v9 1339; CHECK-NEXT: ret 1340 %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0 1341 %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer 1342 %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1343 %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1344 %u = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl) 1345 ret <vscale x 2 x i32> %u 1346} 1347 1348declare <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32) 1349declare <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32) 1350declare <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32) 1351declare <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32) 1352 1353define <vscale x 4 x i32> @vmacc_vv_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1354; CHECK-LABEL: vmacc_vv_nxv4i32: 1355; CHECK: # %bb.0: 1356; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu 1357; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t 1358; CHECK-NEXT: vmv2r.v v8, v12 1359; CHECK-NEXT: ret 1360 %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1361 %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1362 %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl) 1363 ret <vscale x 4 x i32> %u 1364} 1365 1366define <vscale x 4 x i32> @vmacc_vv_nxv4i32_unmasked(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1367; CHECK-LABEL: vmacc_vv_nxv4i32_unmasked: 1368; CHECK: # %bb.0: 1369; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma 1370; CHECK-NEXT: vmacc.vv v12, v8, v10 1371; CHECK-NEXT: vmv2r.v v8, v12 1372; CHECK-NEXT: ret 1373 %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1374 %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1375 %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl) 1376 ret <vscale x 4 x i32> %u 1377} 1378 1379define <vscale x 4 x i32> @vmacc_vx_nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1380; CHECK-LABEL: vmacc_vx_nxv4i32: 1381; CHECK: # %bb.0: 1382; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu 1383; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t 1384; CHECK-NEXT: vmv2r.v v8, v10 1385; CHECK-NEXT: ret 1386 %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0 1387 %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 1388 %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1389 %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1390 %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl) 1391 ret <vscale x 4 x i32> %u 1392} 1393 1394define <vscale x 4 x i32> @vmacc_vx_nxv4i32_unmasked(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1395; CHECK-LABEL: vmacc_vx_nxv4i32_unmasked: 1396; CHECK: # %bb.0: 1397; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma 1398; CHECK-NEXT: vmacc.vx v10, a0, v8 1399; CHECK-NEXT: vmv2r.v v8, v10 1400; CHECK-NEXT: ret 1401 %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0 1402 %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 1403 %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1404 %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1405 %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl) 1406 ret <vscale x 4 x i32> %u 1407} 1408 1409define <vscale x 4 x i32> @vmacc_vv_nxv4i32_ta(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1410; CHECK-LABEL: vmacc_vv_nxv4i32_ta: 1411; CHECK: # %bb.0: 1412; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu 1413; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t 1414; CHECK-NEXT: vmv.v.v v8, v12 1415; CHECK-NEXT: ret 1416 %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1417 %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1418 %u = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl) 1419 ret <vscale x 4 x i32> %u 1420} 1421 1422define <vscale x 4 x i32> @vmacc_vx_nxv4i32_ta(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1423; CHECK-LABEL: vmacc_vx_nxv4i32_ta: 1424; CHECK: # %bb.0: 1425; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu 1426; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t 1427; CHECK-NEXT: vmv.v.v v8, v10 1428; CHECK-NEXT: ret 1429 %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0 1430 %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 1431 %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1432 %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1433 %u = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl) 1434 ret <vscale x 4 x i32> %u 1435} 1436 1437declare <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32) 1438declare <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32) 1439declare <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32) 1440declare <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32) 1441 1442define <vscale x 8 x i32> @vmacc_vv_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1443; CHECK-LABEL: vmacc_vv_nxv8i32: 1444; CHECK: # %bb.0: 1445; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu 1446; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t 1447; CHECK-NEXT: vmv4r.v v8, v16 1448; CHECK-NEXT: ret 1449 %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 1450 %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 1451 %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl) 1452 ret <vscale x 8 x i32> %u 1453} 1454 1455define <vscale x 8 x i32> @vmacc_vv_nxv8i32_unmasked(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1456; CHECK-LABEL: vmacc_vv_nxv8i32_unmasked: 1457; CHECK: # %bb.0: 1458; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma 1459; CHECK-NEXT: vmacc.vv v16, v8, v12 1460; CHECK-NEXT: vmv4r.v v8, v16 1461; CHECK-NEXT: ret 1462 %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 1463 %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 1464 %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl) 1465 ret <vscale x 8 x i32> %u 1466} 1467 1468define <vscale x 8 x i32> @vmacc_vx_nxv8i32(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1469; CHECK-LABEL: vmacc_vx_nxv8i32: 1470; CHECK: # %bb.0: 1471; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu 1472; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t 1473; CHECK-NEXT: vmv4r.v v8, v12 1474; CHECK-NEXT: ret 1475 %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 1476 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer 1477 %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 1478 %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 1479 %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl) 1480 ret <vscale x 8 x i32> %u 1481} 1482 1483define <vscale x 8 x i32> @vmacc_vx_nxv8i32_unmasked(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1484; CHECK-LABEL: vmacc_vx_nxv8i32_unmasked: 1485; CHECK: # %bb.0: 1486; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, ma 1487; CHECK-NEXT: vmacc.vx v12, a0, v8 1488; CHECK-NEXT: vmv4r.v v8, v12 1489; CHECK-NEXT: ret 1490 %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 1491 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer 1492 %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 1493 %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 1494 %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl) 1495 ret <vscale x 8 x i32> %u 1496} 1497 1498define <vscale x 8 x i32> @vmacc_vv_nxv8i32_ta(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1499; CHECK-LABEL: vmacc_vv_nxv8i32_ta: 1500; CHECK: # %bb.0: 1501; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu 1502; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t 1503; CHECK-NEXT: vmv.v.v v8, v16 1504; CHECK-NEXT: ret 1505 %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 1506 %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 1507 %u = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl) 1508 ret <vscale x 8 x i32> %u 1509} 1510 1511define <vscale x 8 x i32> @vmacc_vx_nxv8i32_ta(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1512; CHECK-LABEL: vmacc_vx_nxv8i32_ta: 1513; CHECK: # %bb.0: 1514; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu 1515; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t 1516; CHECK-NEXT: vmv.v.v v8, v12 1517; CHECK-NEXT: ret 1518 %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 1519 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer 1520 %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 1521 %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 1522 %u = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl) 1523 ret <vscale x 8 x i32> %u 1524} 1525 1526declare <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32) 1527declare <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32) 1528declare <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1>, <vscale x 16 x i32>, <vscale x 16 x i32>, i32) 1529declare <vscale x 16 x i32> @llvm.vp.select.nxv16i32(<vscale x 16 x i1>, <vscale x 16 x i32>, <vscale x 16 x i32>, i32) 1530 1531define <vscale x 16 x i32> @vmacc_vv_nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i32> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1532; CHECK-LABEL: vmacc_vv_nxv16i32: 1533; CHECK: # %bb.0: 1534; CHECK-NEXT: vl8re32.v v24, (a0) 1535; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu 1536; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t 1537; CHECK-NEXT: vmv8r.v v8, v24 1538; CHECK-NEXT: ret 1539 %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1540 %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1541 %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl) 1542 ret <vscale x 16 x i32> %u 1543} 1544 1545define <vscale x 16 x i32> @vmacc_vv_nxv16i32_unmasked(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i32> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1546; CHECK-LABEL: vmacc_vv_nxv16i32_unmasked: 1547; CHECK: # %bb.0: 1548; CHECK-NEXT: vl8re32.v v24, (a0) 1549; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma 1550; CHECK-NEXT: vmacc.vv v24, v8, v16 1551; CHECK-NEXT: vmv8r.v v8, v24 1552; CHECK-NEXT: ret 1553 %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1554 %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1555 %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl) 1556 ret <vscale x 16 x i32> %u 1557} 1558 1559define <vscale x 16 x i32> @vmacc_vx_nxv16i32(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1560; CHECK-LABEL: vmacc_vx_nxv16i32: 1561; CHECK: # %bb.0: 1562; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu 1563; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t 1564; CHECK-NEXT: vmv8r.v v8, v16 1565; CHECK-NEXT: ret 1566 %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0 1567 %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer 1568 %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1569 %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1570 %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl) 1571 ret <vscale x 16 x i32> %u 1572} 1573 1574define <vscale x 16 x i32> @vmacc_vx_nxv16i32_unmasked(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1575; CHECK-LABEL: vmacc_vx_nxv16i32_unmasked: 1576; CHECK: # %bb.0: 1577; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma 1578; CHECK-NEXT: vmacc.vx v16, a0, v8 1579; CHECK-NEXT: vmv8r.v v8, v16 1580; CHECK-NEXT: ret 1581 %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0 1582 %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer 1583 %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1584 %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1585 %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl) 1586 ret <vscale x 16 x i32> %u 1587} 1588 1589define <vscale x 16 x i32> @vmacc_vv_nxv16i32_ta(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i32> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1590; CHECK-LABEL: vmacc_vv_nxv16i32_ta: 1591; CHECK: # %bb.0: 1592; CHECK-NEXT: vl8re32.v v24, (a0) 1593; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu 1594; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t 1595; CHECK-NEXT: vmv.v.v v8, v24 1596; CHECK-NEXT: ret 1597 %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1598 %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1599 %u = call <vscale x 16 x i32> @llvm.vp.select.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl) 1600 ret <vscale x 16 x i32> %u 1601} 1602 1603define <vscale x 16 x i32> @vmacc_vx_nxv16i32_ta(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1604; CHECK-LABEL: vmacc_vx_nxv16i32_ta: 1605; CHECK: # %bb.0: 1606; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu 1607; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t 1608; CHECK-NEXT: vmv.v.v v8, v16 1609; CHECK-NEXT: ret 1610 %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0 1611 %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer 1612 %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1613 %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl) 1614 %u = call <vscale x 16 x i32> @llvm.vp.select.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl) 1615 ret <vscale x 16 x i32> %u 1616} 1617 1618declare <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32) 1619declare <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32) 1620declare <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1>, <vscale x 1 x i64>, <vscale x 1 x i64>, i32) 1621declare <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1>, <vscale x 1 x i64>, <vscale x 1 x i64>, i32) 1622 1623define <vscale x 1 x i64> @vmacc_vv_nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i64> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1624; CHECK-LABEL: vmacc_vv_nxv1i64: 1625; CHECK: # %bb.0: 1626; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu 1627; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 1628; CHECK-NEXT: vmv1r.v v8, v10 1629; CHECK-NEXT: ret 1630 %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1631 %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1632 %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl) 1633 ret <vscale x 1 x i64> %u 1634} 1635 1636define <vscale x 1 x i64> @vmacc_vv_nxv1i64_unmasked(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i64> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1637; CHECK-LABEL: vmacc_vv_nxv1i64_unmasked: 1638; CHECK: # %bb.0: 1639; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma 1640; CHECK-NEXT: vmacc.vv v10, v8, v9 1641; CHECK-NEXT: vmv1r.v v8, v10 1642; CHECK-NEXT: ret 1643 %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1644 %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1645 %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl) 1646 ret <vscale x 1 x i64> %u 1647} 1648 1649define <vscale x 1 x i64> @vmacc_vx_nxv1i64(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i64> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1650; RV32-LABEL: vmacc_vx_nxv1i64: 1651; RV32: # %bb.0: 1652; RV32-NEXT: addi sp, sp, -16 1653; RV32-NEXT: .cfi_def_cfa_offset 16 1654; RV32-NEXT: sw a0, 8(sp) 1655; RV32-NEXT: sw a1, 12(sp) 1656; RV32-NEXT: addi a0, sp, 8 1657; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma 1658; RV32-NEXT: vlse64.v v10, (a0), zero 1659; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, mu 1660; RV32-NEXT: vmacc.vv v9, v8, v10, v0.t 1661; RV32-NEXT: vmv1r.v v8, v9 1662; RV32-NEXT: addi sp, sp, 16 1663; RV32-NEXT: .cfi_def_cfa_offset 0 1664; RV32-NEXT: ret 1665; 1666; RV64-LABEL: vmacc_vx_nxv1i64: 1667; RV64: # %bb.0: 1668; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu 1669; RV64-NEXT: vmacc.vx v9, a0, v8, v0.t 1670; RV64-NEXT: vmv1r.v v8, v9 1671; RV64-NEXT: ret 1672 %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0 1673 %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 1674 %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1675 %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1676 %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl) 1677 ret <vscale x 1 x i64> %u 1678} 1679 1680define <vscale x 1 x i64> @vmacc_vx_nxv1i64_unmasked(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i64> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1681; RV32-LABEL: vmacc_vx_nxv1i64_unmasked: 1682; RV32: # %bb.0: 1683; RV32-NEXT: addi sp, sp, -16 1684; RV32-NEXT: .cfi_def_cfa_offset 16 1685; RV32-NEXT: sw a0, 8(sp) 1686; RV32-NEXT: sw a1, 12(sp) 1687; RV32-NEXT: addi a0, sp, 8 1688; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma 1689; RV32-NEXT: vlse64.v v10, (a0), zero 1690; RV32-NEXT: vsetvli zero, zero, e64, m1, tu, ma 1691; RV32-NEXT: vmacc.vv v9, v8, v10 1692; RV32-NEXT: vmv1r.v v8, v9 1693; RV32-NEXT: addi sp, sp, 16 1694; RV32-NEXT: .cfi_def_cfa_offset 0 1695; RV32-NEXT: ret 1696; 1697; RV64-LABEL: vmacc_vx_nxv1i64_unmasked: 1698; RV64: # %bb.0: 1699; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma 1700; RV64-NEXT: vmacc.vx v9, a0, v8 1701; RV64-NEXT: vmv1r.v v8, v9 1702; RV64-NEXT: ret 1703 %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0 1704 %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 1705 %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1706 %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1707 %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl) 1708 ret <vscale x 1 x i64> %u 1709} 1710 1711define <vscale x 1 x i64> @vmacc_vv_nxv1i64_ta(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i64> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1712; CHECK-LABEL: vmacc_vv_nxv1i64_ta: 1713; CHECK: # %bb.0: 1714; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu 1715; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t 1716; CHECK-NEXT: vmv.v.v v8, v10 1717; CHECK-NEXT: ret 1718 %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1719 %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1720 %u = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl) 1721 ret <vscale x 1 x i64> %u 1722} 1723 1724define <vscale x 1 x i64> @vmacc_vx_nxv1i64_ta(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i64> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1725; RV32-LABEL: vmacc_vx_nxv1i64_ta: 1726; RV32: # %bb.0: 1727; RV32-NEXT: addi sp, sp, -16 1728; RV32-NEXT: .cfi_def_cfa_offset 16 1729; RV32-NEXT: sw a0, 8(sp) 1730; RV32-NEXT: sw a1, 12(sp) 1731; RV32-NEXT: addi a0, sp, 8 1732; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu 1733; RV32-NEXT: vlse64.v v10, (a0), zero 1734; RV32-NEXT: vmacc.vv v9, v8, v10, v0.t 1735; RV32-NEXT: vmv.v.v v8, v9 1736; RV32-NEXT: addi sp, sp, 16 1737; RV32-NEXT: .cfi_def_cfa_offset 0 1738; RV32-NEXT: ret 1739; 1740; RV64-LABEL: vmacc_vx_nxv1i64_ta: 1741; RV64: # %bb.0: 1742; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu 1743; RV64-NEXT: vmacc.vx v9, a0, v8, v0.t 1744; RV64-NEXT: vmv.v.v v8, v9 1745; RV64-NEXT: ret 1746 %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0 1747 %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 1748 %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1749 %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl) 1750 %u = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl) 1751 ret <vscale x 1 x i64> %u 1752} 1753 1754declare <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32) 1755declare <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32) 1756declare <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32) 1757declare <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32) 1758 1759define <vscale x 2 x i64> @vmacc_vv_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1760; CHECK-LABEL: vmacc_vv_nxv2i64: 1761; CHECK: # %bb.0: 1762; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu 1763; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t 1764; CHECK-NEXT: vmv2r.v v8, v12 1765; CHECK-NEXT: ret 1766 %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1767 %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1768 %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl) 1769 ret <vscale x 2 x i64> %u 1770} 1771 1772define <vscale x 2 x i64> @vmacc_vv_nxv2i64_unmasked(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1773; CHECK-LABEL: vmacc_vv_nxv2i64_unmasked: 1774; CHECK: # %bb.0: 1775; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma 1776; CHECK-NEXT: vmacc.vv v12, v8, v10 1777; CHECK-NEXT: vmv2r.v v8, v12 1778; CHECK-NEXT: ret 1779 %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1780 %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1781 %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl) 1782 ret <vscale x 2 x i64> %u 1783} 1784 1785define <vscale x 2 x i64> @vmacc_vx_nxv2i64(<vscale x 2 x i64> %a, i64 %b, <vscale x 2 x i64> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1786; RV32-LABEL: vmacc_vx_nxv2i64: 1787; RV32: # %bb.0: 1788; RV32-NEXT: addi sp, sp, -16 1789; RV32-NEXT: .cfi_def_cfa_offset 16 1790; RV32-NEXT: sw a0, 8(sp) 1791; RV32-NEXT: sw a1, 12(sp) 1792; RV32-NEXT: addi a0, sp, 8 1793; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma 1794; RV32-NEXT: vlse64.v v12, (a0), zero 1795; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu 1796; RV32-NEXT: vmacc.vv v10, v8, v12, v0.t 1797; RV32-NEXT: vmv2r.v v8, v10 1798; RV32-NEXT: addi sp, sp, 16 1799; RV32-NEXT: .cfi_def_cfa_offset 0 1800; RV32-NEXT: ret 1801; 1802; RV64-LABEL: vmacc_vx_nxv2i64: 1803; RV64: # %bb.0: 1804; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, mu 1805; RV64-NEXT: vmacc.vx v10, a0, v8, v0.t 1806; RV64-NEXT: vmv2r.v v8, v10 1807; RV64-NEXT: ret 1808 %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0 1809 %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 1810 %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1811 %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1812 %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl) 1813 ret <vscale x 2 x i64> %u 1814} 1815 1816define <vscale x 2 x i64> @vmacc_vx_nxv2i64_unmasked(<vscale x 2 x i64> %a, i64 %b, <vscale x 2 x i64> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1817; RV32-LABEL: vmacc_vx_nxv2i64_unmasked: 1818; RV32: # %bb.0: 1819; RV32-NEXT: addi sp, sp, -16 1820; RV32-NEXT: .cfi_def_cfa_offset 16 1821; RV32-NEXT: sw a0, 8(sp) 1822; RV32-NEXT: sw a1, 12(sp) 1823; RV32-NEXT: addi a0, sp, 8 1824; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma 1825; RV32-NEXT: vlse64.v v12, (a0), zero 1826; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma 1827; RV32-NEXT: vmacc.vv v10, v8, v12 1828; RV32-NEXT: vmv2r.v v8, v10 1829; RV32-NEXT: addi sp, sp, 16 1830; RV32-NEXT: .cfi_def_cfa_offset 0 1831; RV32-NEXT: ret 1832; 1833; RV64-LABEL: vmacc_vx_nxv2i64_unmasked: 1834; RV64: # %bb.0: 1835; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma 1836; RV64-NEXT: vmacc.vx v10, a0, v8 1837; RV64-NEXT: vmv2r.v v8, v10 1838; RV64-NEXT: ret 1839 %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0 1840 %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 1841 %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1842 %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1843 %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl) 1844 ret <vscale x 2 x i64> %u 1845} 1846 1847define <vscale x 2 x i64> @vmacc_vv_nxv2i64_ta(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1848; CHECK-LABEL: vmacc_vv_nxv2i64_ta: 1849; CHECK: # %bb.0: 1850; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu 1851; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t 1852; CHECK-NEXT: vmv.v.v v8, v12 1853; CHECK-NEXT: ret 1854 %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1855 %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1856 %u = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl) 1857 ret <vscale x 2 x i64> %u 1858} 1859 1860define <vscale x 2 x i64> @vmacc_vx_nxv2i64_ta(<vscale x 2 x i64> %a, i64 %b, <vscale x 2 x i64> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1861; RV32-LABEL: vmacc_vx_nxv2i64_ta: 1862; RV32: # %bb.0: 1863; RV32-NEXT: addi sp, sp, -16 1864; RV32-NEXT: .cfi_def_cfa_offset 16 1865; RV32-NEXT: sw a0, 8(sp) 1866; RV32-NEXT: sw a1, 12(sp) 1867; RV32-NEXT: addi a0, sp, 8 1868; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu 1869; RV32-NEXT: vlse64.v v12, (a0), zero 1870; RV32-NEXT: vmacc.vv v10, v8, v12, v0.t 1871; RV32-NEXT: vmv.v.v v8, v10 1872; RV32-NEXT: addi sp, sp, 16 1873; RV32-NEXT: .cfi_def_cfa_offset 0 1874; RV32-NEXT: ret 1875; 1876; RV64-LABEL: vmacc_vx_nxv2i64_ta: 1877; RV64: # %bb.0: 1878; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu 1879; RV64-NEXT: vmacc.vx v10, a0, v8, v0.t 1880; RV64-NEXT: vmv.v.v v8, v10 1881; RV64-NEXT: ret 1882 %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0 1883 %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 1884 %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1885 %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl) 1886 %u = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl) 1887 ret <vscale x 2 x i64> %u 1888} 1889 1890declare <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32) 1891declare <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32) 1892declare <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1>, <vscale x 4 x i64>, <vscale x 4 x i64>, i32) 1893declare <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1>, <vscale x 4 x i64>, <vscale x 4 x i64>, i32) 1894 1895define <vscale x 4 x i64> @vmacc_vv_nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1896; CHECK-LABEL: vmacc_vv_nxv4i64: 1897; CHECK: # %bb.0: 1898; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu 1899; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t 1900; CHECK-NEXT: vmv4r.v v8, v16 1901; CHECK-NEXT: ret 1902 %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1903 %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1904 %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl) 1905 ret <vscale x 4 x i64> %u 1906} 1907 1908define <vscale x 4 x i64> @vmacc_vv_nxv4i64_unmasked(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1909; CHECK-LABEL: vmacc_vv_nxv4i64_unmasked: 1910; CHECK: # %bb.0: 1911; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma 1912; CHECK-NEXT: vmacc.vv v16, v8, v12 1913; CHECK-NEXT: vmv4r.v v8, v16 1914; CHECK-NEXT: ret 1915 %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1916 %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1917 %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl) 1918 ret <vscale x 4 x i64> %u 1919} 1920 1921define <vscale x 4 x i64> @vmacc_vx_nxv4i64(<vscale x 4 x i64> %a, i64 %b, <vscale x 4 x i64> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1922; RV32-LABEL: vmacc_vx_nxv4i64: 1923; RV32: # %bb.0: 1924; RV32-NEXT: addi sp, sp, -16 1925; RV32-NEXT: .cfi_def_cfa_offset 16 1926; RV32-NEXT: sw a0, 8(sp) 1927; RV32-NEXT: sw a1, 12(sp) 1928; RV32-NEXT: addi a0, sp, 8 1929; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma 1930; RV32-NEXT: vlse64.v v16, (a0), zero 1931; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, mu 1932; RV32-NEXT: vmacc.vv v12, v8, v16, v0.t 1933; RV32-NEXT: vmv4r.v v8, v12 1934; RV32-NEXT: addi sp, sp, 16 1935; RV32-NEXT: .cfi_def_cfa_offset 0 1936; RV32-NEXT: ret 1937; 1938; RV64-LABEL: vmacc_vx_nxv4i64: 1939; RV64: # %bb.0: 1940; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, mu 1941; RV64-NEXT: vmacc.vx v12, a0, v8, v0.t 1942; RV64-NEXT: vmv4r.v v8, v12 1943; RV64-NEXT: ret 1944 %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0 1945 %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer 1946 %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1947 %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1948 %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl) 1949 ret <vscale x 4 x i64> %u 1950} 1951 1952define <vscale x 4 x i64> @vmacc_vx_nxv4i64_unmasked(<vscale x 4 x i64> %a, i64 %b, <vscale x 4 x i64> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1953; RV32-LABEL: vmacc_vx_nxv4i64_unmasked: 1954; RV32: # %bb.0: 1955; RV32-NEXT: addi sp, sp, -16 1956; RV32-NEXT: .cfi_def_cfa_offset 16 1957; RV32-NEXT: sw a0, 8(sp) 1958; RV32-NEXT: sw a1, 12(sp) 1959; RV32-NEXT: addi a0, sp, 8 1960; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma 1961; RV32-NEXT: vlse64.v v16, (a0), zero 1962; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, ma 1963; RV32-NEXT: vmacc.vv v12, v8, v16 1964; RV32-NEXT: vmv4r.v v8, v12 1965; RV32-NEXT: addi sp, sp, 16 1966; RV32-NEXT: .cfi_def_cfa_offset 0 1967; RV32-NEXT: ret 1968; 1969; RV64-LABEL: vmacc_vx_nxv4i64_unmasked: 1970; RV64: # %bb.0: 1971; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma 1972; RV64-NEXT: vmacc.vx v12, a0, v8 1973; RV64-NEXT: vmv4r.v v8, v12 1974; RV64-NEXT: ret 1975 %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0 1976 %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer 1977 %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1978 %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1979 %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl) 1980 ret <vscale x 4 x i64> %u 1981} 1982 1983define <vscale x 4 x i64> @vmacc_vv_nxv4i64_ta(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1984; CHECK-LABEL: vmacc_vv_nxv4i64_ta: 1985; CHECK: # %bb.0: 1986; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu 1987; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t 1988; CHECK-NEXT: vmv.v.v v8, v16 1989; CHECK-NEXT: ret 1990 %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1991 %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 1992 %u = call <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl) 1993 ret <vscale x 4 x i64> %u 1994} 1995 1996define <vscale x 4 x i64> @vmacc_vx_nxv4i64_ta(<vscale x 4 x i64> %a, i64 %b, <vscale x 4 x i64> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1997; RV32-LABEL: vmacc_vx_nxv4i64_ta: 1998; RV32: # %bb.0: 1999; RV32-NEXT: addi sp, sp, -16 2000; RV32-NEXT: .cfi_def_cfa_offset 16 2001; RV32-NEXT: sw a0, 8(sp) 2002; RV32-NEXT: sw a1, 12(sp) 2003; RV32-NEXT: addi a0, sp, 8 2004; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu 2005; RV32-NEXT: vlse64.v v16, (a0), zero 2006; RV32-NEXT: vmacc.vv v12, v8, v16, v0.t 2007; RV32-NEXT: vmv.v.v v8, v12 2008; RV32-NEXT: addi sp, sp, 16 2009; RV32-NEXT: .cfi_def_cfa_offset 0 2010; RV32-NEXT: ret 2011; 2012; RV64-LABEL: vmacc_vx_nxv4i64_ta: 2013; RV64: # %bb.0: 2014; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu 2015; RV64-NEXT: vmacc.vx v12, a0, v8, v0.t 2016; RV64-NEXT: vmv.v.v v8, v12 2017; RV64-NEXT: ret 2018 %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0 2019 %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer 2020 %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 2021 %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl) 2022 %u = call <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl) 2023 ret <vscale x 4 x i64> %u 2024} 2025 2026declare <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32) 2027declare <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32) 2028declare <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32) 2029declare <vscale x 8 x i64> @llvm.vp.select.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32) 2030 2031define <vscale x 8 x i64> @vmacc_vv_nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i64> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2032; CHECK-LABEL: vmacc_vv_nxv8i64: 2033; CHECK: # %bb.0: 2034; CHECK-NEXT: vl8re64.v v24, (a0) 2035; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu 2036; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t 2037; CHECK-NEXT: vmv8r.v v8, v24 2038; CHECK-NEXT: ret 2039 %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 2040 %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 2041 %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl) 2042 ret <vscale x 8 x i64> %u 2043} 2044 2045define <vscale x 8 x i64> @vmacc_vv_nxv8i64_unmasked(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i64> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2046; CHECK-LABEL: vmacc_vv_nxv8i64_unmasked: 2047; CHECK: # %bb.0: 2048; CHECK-NEXT: vl8re64.v v24, (a0) 2049; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma 2050; CHECK-NEXT: vmacc.vv v24, v8, v16 2051; CHECK-NEXT: vmv8r.v v8, v24 2052; CHECK-NEXT: ret 2053 %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 2054 %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 2055 %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl) 2056 ret <vscale x 8 x i64> %u 2057} 2058 2059define <vscale x 8 x i64> @vmacc_vx_nxv8i64(<vscale x 8 x i64> %a, i64 %b, <vscale x 8 x i64> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2060; RV32-LABEL: vmacc_vx_nxv8i64: 2061; RV32: # %bb.0: 2062; RV32-NEXT: addi sp, sp, -16 2063; RV32-NEXT: .cfi_def_cfa_offset 16 2064; RV32-NEXT: sw a0, 8(sp) 2065; RV32-NEXT: sw a1, 12(sp) 2066; RV32-NEXT: addi a0, sp, 8 2067; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2068; RV32-NEXT: vlse64.v v24, (a0), zero 2069; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, mu 2070; RV32-NEXT: vmacc.vv v16, v8, v24, v0.t 2071; RV32-NEXT: vmv8r.v v8, v16 2072; RV32-NEXT: addi sp, sp, 16 2073; RV32-NEXT: .cfi_def_cfa_offset 0 2074; RV32-NEXT: ret 2075; 2076; RV64-LABEL: vmacc_vx_nxv8i64: 2077; RV64: # %bb.0: 2078; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, mu 2079; RV64-NEXT: vmacc.vx v16, a0, v8, v0.t 2080; RV64-NEXT: vmv8r.v v8, v16 2081; RV64-NEXT: ret 2082 %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 2083 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer 2084 %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 2085 %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 2086 %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl) 2087 ret <vscale x 8 x i64> %u 2088} 2089 2090define <vscale x 8 x i64> @vmacc_vx_nxv8i64_unmasked(<vscale x 8 x i64> %a, i64 %b, <vscale x 8 x i64> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2091; RV32-LABEL: vmacc_vx_nxv8i64_unmasked: 2092; RV32: # %bb.0: 2093; RV32-NEXT: addi sp, sp, -16 2094; RV32-NEXT: .cfi_def_cfa_offset 16 2095; RV32-NEXT: sw a0, 8(sp) 2096; RV32-NEXT: sw a1, 12(sp) 2097; RV32-NEXT: addi a0, sp, 8 2098; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2099; RV32-NEXT: vlse64.v v24, (a0), zero 2100; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma 2101; RV32-NEXT: vmacc.vv v16, v8, v24 2102; RV32-NEXT: vmv8r.v v8, v16 2103; RV32-NEXT: addi sp, sp, 16 2104; RV32-NEXT: .cfi_def_cfa_offset 0 2105; RV32-NEXT: ret 2106; 2107; RV64-LABEL: vmacc_vx_nxv8i64_unmasked: 2108; RV64: # %bb.0: 2109; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, ma 2110; RV64-NEXT: vmacc.vx v16, a0, v8 2111; RV64-NEXT: vmv8r.v v8, v16 2112; RV64-NEXT: ret 2113 %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 2114 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer 2115 %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 2116 %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 2117 %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl) 2118 ret <vscale x 8 x i64> %u 2119} 2120 2121define <vscale x 8 x i64> @vmacc_vv_nxv8i64_ta(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i64> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2122; CHECK-LABEL: vmacc_vv_nxv8i64_ta: 2123; CHECK: # %bb.0: 2124; CHECK-NEXT: vl8re64.v v24, (a0) 2125; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2126; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t 2127; CHECK-NEXT: vmv.v.v v8, v24 2128; CHECK-NEXT: ret 2129 %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 2130 %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 2131 %u = call <vscale x 8 x i64> @llvm.vp.select.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl) 2132 ret <vscale x 8 x i64> %u 2133} 2134 2135define <vscale x 8 x i64> @vmacc_vx_nxv8i64_ta(<vscale x 8 x i64> %a, i64 %b, <vscale x 8 x i64> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2136; RV32-LABEL: vmacc_vx_nxv8i64_ta: 2137; RV32: # %bb.0: 2138; RV32-NEXT: addi sp, sp, -16 2139; RV32-NEXT: .cfi_def_cfa_offset 16 2140; RV32-NEXT: sw a0, 8(sp) 2141; RV32-NEXT: sw a1, 12(sp) 2142; RV32-NEXT: addi a0, sp, 8 2143; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu 2144; RV32-NEXT: vlse64.v v24, (a0), zero 2145; RV32-NEXT: vmacc.vv v16, v8, v24, v0.t 2146; RV32-NEXT: vmv.v.v v8, v16 2147; RV32-NEXT: addi sp, sp, 16 2148; RV32-NEXT: .cfi_def_cfa_offset 0 2149; RV32-NEXT: ret 2150; 2151; RV64-LABEL: vmacc_vx_nxv8i64_ta: 2152; RV64: # %bb.0: 2153; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu 2154; RV64-NEXT: vmacc.vx v16, a0, v8, v0.t 2155; RV64-NEXT: vmv.v.v v8, v16 2156; RV64-NEXT: ret 2157 %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 2158 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer 2159 %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 2160 %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl) 2161 %u = call <vscale x 8 x i64> @llvm.vp.select.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl) 2162 ret <vscale x 8 x i64> %u 2163} 2164