115b0fabbSLuke Lau; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 215b0fabbSLuke Lau; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s 315b0fabbSLuke Lau 415b0fabbSLuke Laudeclare i64 @llvm.vscale.i64() 515b0fabbSLuke Laudeclare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>) 615b0fabbSLuke Lau 715b0fabbSLuke Laudefine float @reduce_fadd(ptr %f) { 815b0fabbSLuke Lau; CHECK-LABEL: reduce_fadd: 915b0fabbSLuke Lau; CHECK: # %bb.0: # %entry 1015b0fabbSLuke Lau; CHECK-NEXT: csrr a2, vlenb 11*9122c523SPengcheng Wang; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma 1215b0fabbSLuke Lau; CHECK-NEXT: vmv.s.x v8, zero 13*9122c523SPengcheng Wang; CHECK-NEXT: srli a1, a2, 1 1415b0fabbSLuke Lau; CHECK-NEXT: slli a2, a2, 1 1515b0fabbSLuke Lau; CHECK-NEXT: li a3, 1024 1615b0fabbSLuke Lau; CHECK-NEXT: .LBB0_1: # %vector.body 1715b0fabbSLuke Lau; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 1815b0fabbSLuke Lau; CHECK-NEXT: vl2re32.v v10, (a0) 19*9122c523SPengcheng Wang; CHECK-NEXT: sub a3, a3, a1 2015b0fabbSLuke Lau; CHECK-NEXT: vsetvli a4, zero, e32, m2, ta, ma 2115b0fabbSLuke Lau; CHECK-NEXT: vfredosum.vs v8, v10, v8 2215b0fabbSLuke Lau; CHECK-NEXT: add a0, a0, a2 2315b0fabbSLuke Lau; CHECK-NEXT: bnez a3, .LBB0_1 2415b0fabbSLuke Lau; CHECK-NEXT: # %bb.2: # %exit 2515b0fabbSLuke Lau; CHECK-NEXT: vfmv.f.s fa0, v8 2615b0fabbSLuke Lau; CHECK-NEXT: ret 2715b0fabbSLuke Lauentry: 2815b0fabbSLuke Lau %vscale = tail call i64 @llvm.vscale.i64() 2915b0fabbSLuke Lau %vecsize = shl nuw nsw i64 %vscale, 2 3015b0fabbSLuke Lau br label %vector.body 3115b0fabbSLuke Lau 3215b0fabbSLuke Lauvector.body: 3315b0fabbSLuke Lau %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 3415b0fabbSLuke Lau %vec.phi = phi float [ 0.000000e+00, %entry ], [ %acc, %vector.body ] 3515b0fabbSLuke Lau %gep = getelementptr inbounds float, ptr %f, i64 %index 3615b0fabbSLuke Lau %wide.load = load <vscale x 4 x float>, ptr %gep, align 4 3715b0fabbSLuke Lau %acc = tail call float @llvm.vector.reduce.fadd.nxv4f32(float %vec.phi, <vscale x 4 x float> %wide.load) 3815b0fabbSLuke Lau %index.next = add nuw i64 %index, %vecsize 3915b0fabbSLuke Lau %done = icmp eq i64 %index.next, 1024 4015b0fabbSLuke Lau br i1 %done, label %exit, label %vector.body 4115b0fabbSLuke Lau 4215b0fabbSLuke Lauexit: 4315b0fabbSLuke Lau ret float %acc 4415b0fabbSLuke Lau} 45