1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s 6 7declare half @llvm.vp.reduce.fadd.v2f16(half, <2 x half>, <2 x i1>, i32) 8 9define half @vpreduce_fadd_v2f16(half %s, <2 x half> %v, <2 x i1> %m, i32 zeroext %evl) { 10; CHECK-LABEL: vpreduce_fadd_v2f16: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 13; CHECK-NEXT: vfmv.s.f v9, fa0 14; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 15; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t 16; CHECK-NEXT: vfmv.f.s fa0, v9 17; CHECK-NEXT: ret 18 %r = call reassoc half @llvm.vp.reduce.fadd.v2f16(half %s, <2 x half> %v, <2 x i1> %m, i32 %evl) 19 ret half %r 20} 21 22define half @vpreduce_ord_fadd_v2f16(half %s, <2 x half> %v, <2 x i1> %m, i32 zeroext %evl) { 23; CHECK-LABEL: vpreduce_ord_fadd_v2f16: 24; CHECK: # %bb.0: 25; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 26; CHECK-NEXT: vfmv.s.f v9, fa0 27; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 28; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t 29; CHECK-NEXT: vfmv.f.s fa0, v9 30; CHECK-NEXT: ret 31 %r = call half @llvm.vp.reduce.fadd.v2f16(half %s, <2 x half> %v, <2 x i1> %m, i32 %evl) 32 ret half %r 33} 34 35declare half @llvm.vp.reduce.fadd.v4f16(half, <4 x half>, <4 x i1>, i32) 36 37define half @vpreduce_fadd_v4f16(half %s, <4 x half> %v, <4 x i1> %m, i32 zeroext %evl) { 38; CHECK-LABEL: vpreduce_fadd_v4f16: 39; CHECK: # %bb.0: 40; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 41; CHECK-NEXT: vfmv.s.f v9, fa0 42; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 43; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t 44; CHECK-NEXT: vfmv.f.s fa0, v9 45; CHECK-NEXT: ret 46 %r = call reassoc half @llvm.vp.reduce.fadd.v4f16(half %s, <4 x half> %v, <4 x i1> %m, i32 %evl) 47 ret half %r 48} 49 50define half @vpreduce_ord_fadd_v4f16(half %s, <4 x half> %v, <4 x i1> %m, i32 zeroext %evl) { 51; CHECK-LABEL: vpreduce_ord_fadd_v4f16: 52; CHECK: # %bb.0: 53; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma 54; CHECK-NEXT: vfmv.s.f v9, fa0 55; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 56; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t 57; CHECK-NEXT: vfmv.f.s fa0, v9 58; CHECK-NEXT: ret 59 %r = call half @llvm.vp.reduce.fadd.v4f16(half %s, <4 x half> %v, <4 x i1> %m, i32 %evl) 60 ret half %r 61} 62 63declare float @llvm.vp.reduce.fadd.v2f32(float, <2 x float>, <2 x i1>, i32) 64 65define float @vpreduce_fadd_v2f32(float %s, <2 x float> %v, <2 x i1> %m, i32 zeroext %evl) { 66; CHECK-LABEL: vpreduce_fadd_v2f32: 67; CHECK: # %bb.0: 68; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 69; CHECK-NEXT: vfmv.s.f v9, fa0 70; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 71; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t 72; CHECK-NEXT: vfmv.f.s fa0, v9 73; CHECK-NEXT: ret 74 %r = call reassoc float @llvm.vp.reduce.fadd.v2f32(float %s, <2 x float> %v, <2 x i1> %m, i32 %evl) 75 ret float %r 76} 77 78define float @vpreduce_ord_fadd_v2f32(float %s, <2 x float> %v, <2 x i1> %m, i32 zeroext %evl) { 79; CHECK-LABEL: vpreduce_ord_fadd_v2f32: 80; CHECK: # %bb.0: 81; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 82; CHECK-NEXT: vfmv.s.f v9, fa0 83; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 84; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t 85; CHECK-NEXT: vfmv.f.s fa0, v9 86; CHECK-NEXT: ret 87 %r = call float @llvm.vp.reduce.fadd.v2f32(float %s, <2 x float> %v, <2 x i1> %m, i32 %evl) 88 ret float %r 89} 90 91declare float @llvm.vp.reduce.fadd.v4f32(float, <4 x float>, <4 x i1>, i32) 92 93define float @vpreduce_fadd_v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32 zeroext %evl) { 94; CHECK-LABEL: vpreduce_fadd_v4f32: 95; CHECK: # %bb.0: 96; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 97; CHECK-NEXT: vfmv.s.f v9, fa0 98; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 99; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t 100; CHECK-NEXT: vfmv.f.s fa0, v9 101; CHECK-NEXT: ret 102 %r = call reassoc float @llvm.vp.reduce.fadd.v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32 %evl) 103 ret float %r 104} 105 106define float @vpreduce_ord_fadd_v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32 zeroext %evl) { 107; CHECK-LABEL: vpreduce_ord_fadd_v4f32: 108; CHECK: # %bb.0: 109; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma 110; CHECK-NEXT: vfmv.s.f v9, fa0 111; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 112; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t 113; CHECK-NEXT: vfmv.f.s fa0, v9 114; CHECK-NEXT: ret 115 %r = call float @llvm.vp.reduce.fadd.v4f32(float %s, <4 x float> %v, <4 x i1> %m, i32 %evl) 116 ret float %r 117} 118 119declare float @llvm.vp.reduce.fadd.v64f32(float, <64 x float>, <64 x i1>, i32) 120 121define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 zeroext %evl) { 122; CHECK-LABEL: vpreduce_fadd_v64f32: 123; CHECK: # %bb.0: 124; CHECK-NEXT: li a2, 32 125; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 126; CHECK-NEXT: vslidedown.vi v24, v0, 4 127; CHECK-NEXT: mv a1, a0 128; CHECK-NEXT: bltu a0, a2, .LBB8_2 129; CHECK-NEXT: # %bb.1: 130; CHECK-NEXT: li a1, 32 131; CHECK-NEXT: .LBB8_2: 132; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 133; CHECK-NEXT: vfmv.s.f v25, fa0 134; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 135; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t 136; CHECK-NEXT: addi a1, a0, -32 137; CHECK-NEXT: sltu a0, a0, a1 138; CHECK-NEXT: addi a0, a0, -1 139; CHECK-NEXT: and a0, a0, a1 140; CHECK-NEXT: vmv1r.v v0, v24 141; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 142; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t 143; CHECK-NEXT: vfmv.f.s fa0, v25 144; CHECK-NEXT: ret 145 %r = call reassoc float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl) 146 ret float %r 147} 148 149define float @vpreduce_ord_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 zeroext %evl) { 150; CHECK-LABEL: vpreduce_ord_fadd_v64f32: 151; CHECK: # %bb.0: 152; CHECK-NEXT: li a2, 32 153; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma 154; CHECK-NEXT: vslidedown.vi v24, v0, 4 155; CHECK-NEXT: mv a1, a0 156; CHECK-NEXT: bltu a0, a2, .LBB9_2 157; CHECK-NEXT: # %bb.1: 158; CHECK-NEXT: li a1, 32 159; CHECK-NEXT: .LBB9_2: 160; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 161; CHECK-NEXT: vfmv.s.f v25, fa0 162; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma 163; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t 164; CHECK-NEXT: addi a1, a0, -32 165; CHECK-NEXT: sltu a0, a0, a1 166; CHECK-NEXT: addi a0, a0, -1 167; CHECK-NEXT: and a0, a0, a1 168; CHECK-NEXT: vmv1r.v v0, v24 169; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 170; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t 171; CHECK-NEXT: vfmv.f.s fa0, v25 172; CHECK-NEXT: ret 173 %r = call float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl) 174 ret float %r 175} 176 177declare double @llvm.vp.reduce.fadd.v2f64(double, <2 x double>, <2 x i1>, i32) 178 179define double @vpreduce_fadd_v2f64(double %s, <2 x double> %v, <2 x i1> %m, i32 zeroext %evl) { 180; CHECK-LABEL: vpreduce_fadd_v2f64: 181; CHECK: # %bb.0: 182; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma 183; CHECK-NEXT: vfmv.s.f v9, fa0 184; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 185; CHECK-NEXT: vfredusum.vs v9, v8, v9, v0.t 186; CHECK-NEXT: vfmv.f.s fa0, v9 187; CHECK-NEXT: ret 188 %r = call reassoc double @llvm.vp.reduce.fadd.v2f64(double %s, <2 x double> %v, <2 x i1> %m, i32 %evl) 189 ret double %r 190} 191 192define double @vpreduce_ord_fadd_v2f64(double %s, <2 x double> %v, <2 x i1> %m, i32 zeroext %evl) { 193; CHECK-LABEL: vpreduce_ord_fadd_v2f64: 194; CHECK: # %bb.0: 195; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma 196; CHECK-NEXT: vfmv.s.f v9, fa0 197; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 198; CHECK-NEXT: vfredosum.vs v9, v8, v9, v0.t 199; CHECK-NEXT: vfmv.f.s fa0, v9 200; CHECK-NEXT: ret 201 %r = call double @llvm.vp.reduce.fadd.v2f64(double %s, <2 x double> %v, <2 x i1> %m, i32 %evl) 202 ret double %r 203} 204 205declare double @llvm.vp.reduce.fadd.v3f64(double, <3 x double>, <3 x i1>, i32) 206 207define double @vpreduce_fadd_v3f64(double %s, <3 x double> %v, <3 x i1> %m, i32 zeroext %evl) { 208; CHECK-LABEL: vpreduce_fadd_v3f64: 209; CHECK: # %bb.0: 210; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma 211; CHECK-NEXT: vfmv.s.f v10, fa0 212; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 213; CHECK-NEXT: vfredusum.vs v10, v8, v10, v0.t 214; CHECK-NEXT: vfmv.f.s fa0, v10 215; CHECK-NEXT: ret 216 %r = call reassoc double @llvm.vp.reduce.fadd.v3f64(double %s, <3 x double> %v, <3 x i1> %m, i32 %evl) 217 ret double %r 218} 219 220define double @vpreduce_ord_fadd_v3f64(double %s, <3 x double> %v, <3 x i1> %m, i32 zeroext %evl) { 221; CHECK-LABEL: vpreduce_ord_fadd_v3f64: 222; CHECK: # %bb.0: 223; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma 224; CHECK-NEXT: vfmv.s.f v10, fa0 225; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 226; CHECK-NEXT: vfredosum.vs v10, v8, v10, v0.t 227; CHECK-NEXT: vfmv.f.s fa0, v10 228; CHECK-NEXT: ret 229 %r = call double @llvm.vp.reduce.fadd.v3f64(double %s, <3 x double> %v, <3 x i1> %m, i32 %evl) 230 ret double %r 231} 232 233declare double @llvm.vp.reduce.fadd.v4f64(double, <4 x double>, <4 x i1>, i32) 234 235define double @vpreduce_fadd_v4f64(double %s, <4 x double> %v, <4 x i1> %m, i32 zeroext %evl) { 236; CHECK-LABEL: vpreduce_fadd_v4f64: 237; CHECK: # %bb.0: 238; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma 239; CHECK-NEXT: vfmv.s.f v10, fa0 240; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 241; CHECK-NEXT: vfredusum.vs v10, v8, v10, v0.t 242; CHECK-NEXT: vfmv.f.s fa0, v10 243; CHECK-NEXT: ret 244 %r = call reassoc double @llvm.vp.reduce.fadd.v4f64(double %s, <4 x double> %v, <4 x i1> %m, i32 %evl) 245 ret double %r 246} 247 248define double @vpreduce_ord_fadd_v4f64(double %s, <4 x double> %v, <4 x i1> %m, i32 zeroext %evl) { 249; CHECK-LABEL: vpreduce_ord_fadd_v4f64: 250; CHECK: # %bb.0: 251; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma 252; CHECK-NEXT: vfmv.s.f v10, fa0 253; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 254; CHECK-NEXT: vfredosum.vs v10, v8, v10, v0.t 255; CHECK-NEXT: vfmv.f.s fa0, v10 256; CHECK-NEXT: ret 257 %r = call double @llvm.vp.reduce.fadd.v4f64(double %s, <4 x double> %v, <4 x i1> %m, i32 %evl) 258 ret double %r 259} 260