xref: /llvm-project/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll (revision cd28da390f8b8dedd00f9a2a383ec81e90436841)
1; RUN: opt < %s -passes=loop-vectorize -S | FileCheck %s
2
3; CHECK: fadd
4; CHECK-NEXT: fadd
5; CHECK-NEXT: fadd
6; CHECK-NEXT: fadd
7; CHECK-NEXT: fadd
8; CHECK-NEXT: fadd
9; CHECK-NEXT: fadd
10; CHECK-NEXT: fadd
11; CHECK-NEXT: =
12; CHECK-NOT: fadd
13; CHECK-SAME: >
14
15target datalayout = "e-m:e-i64:64-n32:64"
16target triple = "powerpc64le-ibm-linux-gnu"
17
18define void @QLA_F3_r_veq_norm2_V(ptr noalias nocapture %r, ptr noalias nocapture readonly %a, i32 signext %n) #0 {
19entry:
20  %cmp24 = icmp sgt i32 %n, 0
21  br i1 %cmp24, label %for.cond1.preheader.preheader, label %for.end13
22
23for.cond1.preheader.preheader:                    ; preds = %entry
24  br label %for.cond1.preheader
25
26for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.cond1.preheader
27  %indvars.iv = phi i64 [ %indvars.iv.next, %for.cond1.preheader ], [ 0, %for.cond1.preheader.preheader ]
28  %sum.026 = phi double [ %add10.2, %for.cond1.preheader ], [ 0.000000e+00, %for.cond1.preheader.preheader ]
29  %arrayidx5.realp = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 0, i32 0
30  %arrayidx5.real = load float, ptr %arrayidx5.realp, align 8
31  %arrayidx5.imagp = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 0, i32 1
32  %arrayidx5.imag = load float, ptr %arrayidx5.imagp, align 8
33  %mul = fmul fast float %arrayidx5.real, %arrayidx5.real
34  %mul9 = fmul fast float %arrayidx5.imag, %arrayidx5.imag
35  %add = fadd fast float %mul9, %mul
36  %conv = fpext float %add to double
37  %add10 = fadd fast double %conv, %sum.026
38  %arrayidx5.realp.1 = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 1, i32 0
39  %arrayidx5.real.1 = load float, ptr %arrayidx5.realp.1, align 8
40  %arrayidx5.imagp.1 = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 1, i32 1
41  %arrayidx5.imag.1 = load float, ptr %arrayidx5.imagp.1, align 8
42  %mul.1 = fmul fast float %arrayidx5.real.1, %arrayidx5.real.1
43  %mul9.1 = fmul fast float %arrayidx5.imag.1, %arrayidx5.imag.1
44  %add.1 = fadd fast float %mul9.1, %mul.1
45  %conv.1 = fpext float %add.1 to double
46  %add10.1 = fadd fast double %conv.1, %add10
47  %arrayidx5.realp.2 = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 2, i32 0
48  %arrayidx5.real.2 = load float, ptr %arrayidx5.realp.2, align 8
49  %arrayidx5.imagp.2 = getelementptr inbounds [3 x { float, float }], ptr %a, i64 %indvars.iv, i64 2, i32 1
50  %arrayidx5.imag.2 = load float, ptr %arrayidx5.imagp.2, align 8
51  %mul.2 = fmul fast float %arrayidx5.real.2, %arrayidx5.real.2
52  %mul9.2 = fmul fast float %arrayidx5.imag.2, %arrayidx5.imag.2
53  %add.2 = fadd fast float %mul9.2, %mul.2
54  %conv.2 = fpext float %add.2 to double
55  %add10.2 = fadd fast double %conv.2, %add10.1
56  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
57  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
58  %exitcond = icmp eq i32 %lftr.wideiv, %n
59  br i1 %exitcond, label %for.cond.for.end13_crit_edge, label %for.cond1.preheader
60
61for.cond.for.end13_crit_edge:                     ; preds = %for.cond1.preheader
62  %add10.2.lcssa = phi double [ %add10.2, %for.cond1.preheader ]
63  %phitmp = fptrunc double %add10.2.lcssa to float
64  br label %for.end13
65
66for.end13:                                        ; preds = %for.cond.for.end13_crit_edge, %entry
67  %sum.0.lcssa = phi float [ %phitmp, %for.cond.for.end13_crit_edge ], [ 0.000000e+00, %entry ]
68  store float %sum.0.lcssa, ptr %r, align 4
69  ret void
70}
71
72