xref: /llvm-project/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-memset.ll (revision 055fb7795aa219a3d274d280ec9129784f169f56)
1; RUN: llc < %s -O3 -mtriple=arm64-unknown-unknown -mcpu=cyclone -pre-RA-sched=list-hybrid | FileCheck %s
2; <rdar://problem/11635990> [arm64] [lsr] Inefficient EA/loop-exit calc in bzero_phys
3;
4; LSR on loop %while.cond should reassociate non-address mode
5; expressions at use %cmp16 to avoid sinking computation into %while.body18.
6;
7; Remove the -pre-RA-sched=list-hybrid option after fixing:
8; <rdar://problem/12702735> [ARM64][coalescer] need better register
9; coalescing for simple unit tests.
10
11; CHECK: @memset
12; CHECK: %while.body18{{$}}
13; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #8
14; First set the IVREG variable, then use it
15; CHECK-NEXT: sub [[IVREG:x[0-9]+]],
16; CHECK: [[IVREG]], #8
17; CHECK-NEXT: cmp  [[IVREG]], #7
18; CHECK-NEXT: b.hi
19define ptr @memset(ptr %dest, i32 %val, i64 %len) nounwind ssp noimplicitfloat {
20entry:
21  %cmp = icmp eq i64 %len, 0
22  br i1 %cmp, label %done, label %while.cond.preheader
23
24while.cond.preheader:                             ; preds = %entry
25  %conv = trunc i32 %val to i8
26  br label %while.cond
27
28while.cond:                                       ; preds = %while.body, %while.cond.preheader
29  %ptr.0 = phi ptr [ %incdec.ptr, %while.body ], [ %dest, %while.cond.preheader ]
30  %len.addr.0 = phi i64 [ %dec, %while.body ], [ %len, %while.cond.preheader ]
31  %cond = icmp eq i64 %len.addr.0, 0
32  br i1 %cond, label %done, label %land.rhs
33
34land.rhs:                                         ; preds = %while.cond
35  %0 = ptrtoint ptr %ptr.0 to i64
36  %and = and i64 %0, 7
37  %cmp5 = icmp eq i64 %and, 0
38  br i1 %cmp5, label %if.end9, label %while.body
39
40while.body:                                       ; preds = %land.rhs
41  %incdec.ptr = getelementptr inbounds i8, ptr %ptr.0, i64 1
42  store i8 %conv, ptr %ptr.0, align 1, !tbaa !0
43  %dec = add i64 %len.addr.0, -1
44  br label %while.cond
45
46if.end9:                                          ; preds = %land.rhs
47  %conv.mask = and i32 %val, 255
48  %1 = zext i32 %conv.mask to i64
49  %2 = shl nuw nsw i64 %1, 8
50  %ins18 = or i64 %2, %1
51  %3 = shl nuw nsw i64 %1, 16
52  %ins15 = or i64 %ins18, %3
53  %4 = shl nuw nsw i64 %1, 24
54  %5 = shl nuw nsw i64 %1, 32
55  %mask8 = or i64 %ins15, %4
56  %6 = shl nuw nsw i64 %1, 40
57  %mask5 = or i64 %mask8, %5
58  %7 = shl nuw nsw i64 %1, 48
59  %8 = shl nuw i64 %1, 56
60  %mask2.masked = or i64 %mask5, %6
61  %mask = or i64 %mask2.masked, %7
62  %ins = or i64 %mask, %8
63  %cmp1636 = icmp ugt i64 %len.addr.0, 7
64  br i1 %cmp1636, label %while.body18, label %while.body29.lr.ph
65
66while.body18:                                     ; preds = %if.end9, %while.body18
67  %wideptr.038 = phi ptr [ %incdec.ptr19, %while.body18 ], [ %ptr.0, %if.end9 ]
68  %len.addr.137 = phi i64 [ %sub, %while.body18 ], [ %len.addr.0, %if.end9 ]
69  %incdec.ptr19 = getelementptr inbounds i64, ptr %wideptr.038, i64 1
70  store i64 %ins, ptr %wideptr.038, align 8, !tbaa !2
71  %sub = add i64 %len.addr.137, -8
72  %cmp16 = icmp ugt i64 %sub, 7
73  br i1 %cmp16, label %while.body18, label %while.end20
74
75while.end20:                                      ; preds = %while.body18
76  %cmp21 = icmp eq i64 %sub, 0
77  br i1 %cmp21, label %done, label %while.body29.lr.ph
78
79while.body29.lr.ph:                               ; preds = %while.end20, %if.end9
80  %len.addr.1.lcssa49 = phi i64 [ %sub, %while.end20 ], [ %len.addr.0, %if.end9 ]
81  %wideptr.0.lcssa48 = phi ptr [ %incdec.ptr19, %while.end20 ], [ %ptr.0, %if.end9 ]
82  br label %while.body29
83
84while.body29:                                     ; preds = %while.body29, %while.body29.lr.ph
85  %len.addr.235 = phi i64 [ %len.addr.1.lcssa49, %while.body29.lr.ph ], [ %dec26, %while.body29 ]
86  %ptr.134 = phi ptr [ %wideptr.0.lcssa48, %while.body29.lr.ph ], [ %incdec.ptr31, %while.body29 ]
87  %dec26 = add i64 %len.addr.235, -1
88  %incdec.ptr31 = getelementptr inbounds i8, ptr %ptr.134, i64 1
89  store i8 %conv, ptr %ptr.134, align 1, !tbaa !0
90  %cmp27 = icmp eq i64 %dec26, 0
91  br i1 %cmp27, label %done, label %while.body29
92
93done:                                             ; preds = %while.cond, %while.body29, %while.end20, %entry
94  ret ptr %dest
95}
96
97!0 = !{!"omnipotent char", !1}
98!1 = !{!"Simple C/C++ TBAA"}
99!2 = !{!"long long", !0}
100