xref: /llvm-project/llvm/test/CodeGen/PowerPC/loop-align.ll (revision 427fb35192f1f7bb694a5910b05abc5925a798b2)
1; Test the loop alignment.
2; RUN: llc -verify-machineinstrs -mcpu=a2 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,GENERIC
3; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR
4; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR
5; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR
6; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR
7
8; Test the loop alignment and the option -disable-ppc-innermost-loop-align32.
9; RUN: llc -verify-machineinstrs -mcpu=a2 -disable-ppc-innermost-loop-align32 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,GENERIC-DISABLE-PPC-INNERMOST-LOOP-ALIGN32
10; RUN: llc -verify-machineinstrs -mcpu=pwr8 -disable-ppc-innermost-loop-align32 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32
11; RUN: llc -verify-machineinstrs -mcpu=pwr9 -disable-ppc-innermost-loop-align32 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32
12; RUN: llc -verify-machineinstrs -mcpu=pwr8 -disable-ppc-innermost-loop-align32 -mtriple powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32
13; RUN: llc -verify-machineinstrs -mcpu=pwr9 -disable-ppc-innermost-loop-align32 -mtriple powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32
14
15
16%struct.parm = type { ptr, i32, i32 }
17
18; Test the loop alignment when the innermost hot loop has more than 8 instructions.
19define void @big_loop(ptr %arg) {
20entry:
21  %localArg.sroa.0.0.copyload = load ptr, ptr %arg, align 8
22  %localArg.sroa.4.0..sroa_idx56 = getelementptr inbounds %struct.parm, ptr %arg, i64 0, i32 1
23  %localArg.sroa.4.0.copyload = load i32, ptr %localArg.sroa.4.0..sroa_idx56, align 8
24  %localArg.sroa.5.0..sroa_idx58 = getelementptr inbounds %struct.parm, ptr %arg, i64 0, i32 2
25  %localArg.sroa.5.0.copyload = load i32, ptr %localArg.sroa.5.0..sroa_idx58, align 4
26  %0 = sext i32 %localArg.sroa.5.0.copyload to i64
27  br label %do.body
28
29do.body:                                          ; preds = %do.end, %entry
30  %m.0 = phi i32 [ %localArg.sroa.4.0.copyload, %entry ], [ %dec24, %do.end ]
31  br label %do.body3
32
33do.body3:                                         ; preds = %do.body3, %do.body
34  %indvars.iv = phi i64 [ %indvars.iv.next, %do.body3 ], [ %0, %do.body ]
35  %1 = add nsw i64 %indvars.iv, 2
36  %arrayidx = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %1
37  %2 = add nsw i64 %indvars.iv, 3
38  %3 = trunc i64 %1 to i32
39  %4 = add nsw i64 %indvars.iv, 4
40  %arrayidx10 = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %2
41  %5 = trunc i64 %2 to i32
42  store i32 %5, ptr %arrayidx10, align 4
43  %arrayidx12 = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %4
44  %6 = trunc i64 %4 to i32
45  store i32 %6, ptr %arrayidx12, align 4
46  store i32 %3, ptr %arrayidx, align 4
47  %arrayidx21 = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %indvars.iv
48  %7 = trunc i64 %indvars.iv to i32
49  %8 = add i32 %7, 1
50  store i32 %8, ptr %arrayidx21, align 4
51  %indvars.iv.next = add nsw i64 %indvars.iv, -1
52  %9 = icmp eq i64 %indvars.iv, 0
53  br i1 %9, label %do.end, label %do.body3
54
55do.end:                                           ; preds = %do.body3
56  %dec24 = add nsw i32 %m.0, -1
57  %tobool25 = icmp eq i32 %m.0, 0
58  br i1 %tobool25, label %do.end26, label %do.body
59
60do.end26:                                         ; preds = %do.end
61  %arrayidx28 = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %0
62  store i32 0, ptr %arrayidx28, align 4
63  ret void
64
65
66; CHECK-LABEL: @big_loop
67; CHECK: mtctr
68; GENERIC: .p2align  4
69; PWR: .p2align  5
70; GENERIC-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align  4
71; PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align  4
72; CHECK: bdnz
73}
74
75; Test the loop alignment when the innermost hot loop has 5-8 instructions.
76define void @general_loop(ptr %s, i64 %m) {
77entry:
78  %tobool40 = icmp eq i64 %m, 0
79  br i1 %tobool40, label %while.end18, label %while.body3.lr.ph
80
81while.cond.loopexit:                              ; preds = %while.body3
82  %tobool = icmp eq i64 %dec, 0
83  br i1 %tobool, label %while.end18, label %while.body3.lr.ph
84
85while.body3.lr.ph:                                ; preds = %entry, %while.cond.loopexit
86  %m.addr.041 = phi i64 [ %dec, %while.cond.loopexit ], [ %m, %entry ]
87  %dec = add nsw i64 %m.addr.041, -1
88  %conv = trunc i64 %m.addr.041 to i32
89  %conv11 = trunc i64 %dec to i32
90  br label %while.body3
91
92while.body3:                                      ; preds = %while.body3.lr.ph, %while.body3
93  %n.039 = phi i64 [ %m.addr.041, %while.body3.lr.ph ], [ %dec16, %while.body3 ]
94  %inc = add nsw i64 %n.039, 1
95  %arrayidx = getelementptr inbounds i32, ptr %s, i64 %n.039
96  %inc5 = add nsw i64 %n.039, 2
97  %arrayidx6 = getelementptr inbounds i32, ptr %s, i64 %inc
98  %sub = sub nsw i64 %dec, %inc5
99  %conv7 = trunc i64 %sub to i32
100  %arrayidx9 = getelementptr inbounds i32, ptr %s, i64 %inc5
101  store i32 %conv7, ptr %arrayidx9, align 4
102  store i32 %conv11, ptr %arrayidx6, align 4
103  store i32 %conv, ptr %arrayidx, align 4
104  %dec16 = add nsw i64 %n.039, -1
105  %tobool2 = icmp eq i64 %dec16, 0
106  br i1 %tobool2, label %while.cond.loopexit, label %while.body3
107
108while.end18:                                      ; preds = %while.cond.loopexit, %entry
109  ret void
110
111
112; CHECK-LABEL: @general_loop
113; CHECK: mtctr
114; GENERIC: .p2align  4
115; PWR: .p2align  5
116; GENERIC-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align  4
117; PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align  5
118; CHECK: bdnz
119}
120
121; Test the small loop alignment when the innermost hot loop has less than 4 instructions.
122define void @small_loop(i64 %m) {
123entry:
124  br label %do.body
125
126do.body:                                          ; preds = %do.end, %entry
127  %m.addr.0 = phi i64 [ %m, %entry ], [ %1, %do.end ]
128  br label %do.body1
129
130do.body1:                                         ; preds = %do.body1, %do.body
131  %n.0 = phi i64 [ %m.addr.0, %do.body ], [ %0, %do.body1 ]
132  %0 = tail call i64 asm "subi     $0,$0,1", "=r,0"(i64 %n.0)
133  %tobool = icmp eq i64 %0, 0
134  br i1 %tobool, label %do.end, label %do.body1
135
136do.end:                                           ; preds = %do.body1
137  %1 = tail call i64 asm "subi     $1,$1,1", "=r,0"(i64 %m.addr.0)
138  %tobool3 = icmp eq i64 %1, 0
139  br i1 %tobool3, label %do.end4, label %do.body
140
141do.end4:                                          ; preds = %do.end
142  ret void
143
144
145; CHECK-LABEL: @small_loop
146; CHECK: mr
147; GENERIC: .p2align  4
148; PWR: .p2align  5
149; GENERIC-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align  4
150; PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align  4
151; CHECK: bne
152}
153
154; Test the loop alignment when the innermost cold loop has more than 8 instructions.
155define void @big_loop_cold_innerloop(ptr %arg) {
156entry:
157  %localArg.sroa.0.0.copyload = load ptr, ptr %arg, align 8
158  %localArg.sroa.4.0..sroa_idx56 = getelementptr inbounds %struct.parm, ptr %arg, i64 0, i32 1
159  %localArg.sroa.4.0.copyload = load i32, ptr %localArg.sroa.4.0..sroa_idx56, align 8
160  %localArg.sroa.5.0..sroa_idx58 = getelementptr inbounds %struct.parm, ptr %arg, i64 0, i32 2
161  %localArg.sroa.5.0.copyload = load i32, ptr %localArg.sroa.5.0..sroa_idx58, align 4
162  %0 = sext i32 %localArg.sroa.5.0.copyload to i64
163  br label %do.body
164
165do.body:                                          ; preds = %do.end, %entry
166  %m.0 = phi i32 [ %localArg.sroa.4.0.copyload, %entry ], [ %dec24, %do.end ]
167  br label %do.body3
168
169do.body3:                                         ; preds = %do.body3, %do.body
170  %indvars.iv = phi i64 [ %indvars.iv.next, %do.body3 ], [ %0, %do.body ]
171  %1 = add nsw i64 %indvars.iv, 2
172  %arrayidx = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %1
173  %2 = add nsw i64 %indvars.iv, 3
174  %3 = trunc i64 %1 to i32
175  %4 = add nsw i64 %indvars.iv, 4
176  %arrayidx10 = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %2
177  %5 = trunc i64 %2 to i32
178  store i32 %5, ptr %arrayidx10, align 4
179  %arrayidx12 = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %4
180  %6 = trunc i64 %4 to i32
181  store i32 %6, ptr %arrayidx12, align 4
182  store i32 %3, ptr %arrayidx, align 4
183  %arrayidx21 = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %indvars.iv
184  %7 = trunc i64 %indvars.iv to i32
185  %8 = add i32 %7, 1
186  store i32 %8, ptr %arrayidx21, align 4
187  %indvars.iv.next = add nsw i64 %indvars.iv, -1
188  %9 = icmp eq i64 %indvars.iv, 0
189  br i1 %9, label %do.end, label %do.body3
190
191do.end:                                           ; preds = %do.body3
192  %dec24 = add nsw i32 %m.0, -1
193  %tobool25 = icmp eq i32 %m.0, 0
194  br i1 %tobool25, label %do.end26, label %do.body
195
196do.end26:                                         ; preds = %do.end
197  %arrayidx28 = getelementptr inbounds i32, ptr %localArg.sroa.0.0.copyload, i64 %0
198  store i32 0, ptr %arrayidx28, align 4
199  ret void
200
201
202; CHECK-LABEL: @big_loop_cold_innerloop
203; CHECK: mtctr
204; PWR: .p2align 5
205; CHECK-NOT: .p2align 5
206; CHECK: bdnz
207}
208