xref: /llvm-project/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll (revision b51153792b1fdfe93d3a20a226466b44c8f23eac)
1; RUN: opt < %s -loop-reduce -mtriple=x86_64-- -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
2; RUN: opt < %s -loop-reduce -mtriple=x86_64-- -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
3; RUN: llc < %s -O2 -mtriple=x86_64-- -lsr-insns-cost -asm-verbose=0 | FileCheck %s
4
5; OPT checks that LSR prefers less instructions to less registers.
6; For x86 LSR should prefer complicated address to new lsr induction
7; variables.
8
9; BOTH: for.body:
10; INSN:   [[OFFSET1:%.+]] = shl nuw nsw i64 %indvars.iv, 2
11; INSN:   getelementptr i8, ptr %x, i64 [[OFFSET1]]
12; INSN:   [[OFFSET2:%.+]] = shl nuw nsw i64 %indvars.iv, 2
13; INSN:   getelementptr i8, ptr %y, i64 [[OFFSET2]]
14; INSN:   [[OFFSET3:%.+]] = shl nuw nsw i64 %indvars.iv, 2
15; INSN:   getelementptr i8, ptr %q, i64 [[OFFSET3]]
16; REGS:   %lsr.iv4 = phi
17; REGS:   %lsr.iv2 = phi
18; REGS:   %lsr.iv1 = phi
19; REGS:   getelementptr i8, ptr %lsr.iv1, i64 4
20; REGS:   getelementptr i8, ptr %lsr.iv2, i64 4
21; REGS:   getelementptr i8, ptr %lsr.iv4, i64 4
22
23; LLC checks that LSR prefers less instructions to less registers.
24; LSR should prefer complicated address to additonal add instructions.
25
26; CHECK:      LBB0_2:
27; CHECK-NEXT:   movl (%r{{.+}},{{.*}}), [[REG:%[a-z0-9]+]]
28; CHECK-NEXT:   addl (%r{{.+}},{{.*}}), [[REG]]
29; CHECK-NEXT:   movl [[REG]], (%{{.*}})
30
31target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
32
33; Function Attrs: norecurse nounwind uwtable
34define void @foo(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocapture %q, i32 %n) {
35entry:
36  %cmp10 = icmp sgt i32 %n, 0
37  br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup
38
39for.body.preheader:                               ; preds = %entry
40  %wide.trip.count = zext i32 %n to i64
41  br label %for.body
42
43for.cond.cleanup.loopexit:                        ; preds = %for.body
44  br label %for.cond.cleanup
45
46for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
47  ret void
48
49for.body:                                         ; preds = %for.body, %for.body.preheader
50  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
51  %arrayidx = getelementptr inbounds i32, ptr %x, i64 %indvars.iv
52  %tmp = load i32, ptr %arrayidx, align 4
53  %arrayidx2 = getelementptr inbounds i32, ptr %y, i64 %indvars.iv
54  %tmp1 = load i32, ptr %arrayidx2, align 4
55  %add = add nsw i32 %tmp1, %tmp
56  %arrayidx4 = getelementptr inbounds i32, ptr %q, i64 %indvars.iv
57  store i32 %add, ptr %arrayidx4, align 4
58  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
59  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
60  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
61}
62