xref: /llvm-project/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=armv6t2-eabi %s -o - | FileCheck %s --check-prefix=CHECK
3
4; Checks SSAT is still generated when loop unrolling is on
5
6define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) {
7; CHECK-LABEL: ssat_unroll:
8; CHECK:       @ %bb.0: @ %entry
9; CHECK-NEXT:    .save {r11, lr}
10; CHECK-NEXT:    push {r11, lr}
11; CHECK-NEXT:    cmp r3, #0
12; CHECK-NEXT:    beq .LBB0_5
13; CHECK-NEXT:  @ %bb.1: @ %while.body.preheader
14; CHECK-NEXT:    sub r12, r3, #1
15; CHECK-NEXT:    tst r3, #1
16; CHECK-NEXT:    beq .LBB0_3
17; CHECK-NEXT:  @ %bb.2: @ %while.body.prol.preheader
18; CHECK-NEXT:    ldrsh lr, [r0], #2
19; CHECK-NEXT:    ldrsh r3, [r1], #2
20; CHECK-NEXT:    smulbb r3, r3, lr
21; CHECK-NEXT:    ssat r3, #16, r3, asr #14
22; CHECK-NEXT:    strh r3, [r2], #2
23; CHECK-NEXT:    mov r3, r12
24; CHECK-NEXT:  .LBB0_3: @ %while.body.prol.loopexit
25; CHECK-NEXT:    cmp r12, #0
26; CHECK-NEXT:    popeq {r11, pc}
27; CHECK-NEXT:  .LBB0_4: @ %while.body
28; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
29; CHECK-NEXT:    ldrsh r12, [r0]
30; CHECK-NEXT:    subs r3, r3, #2
31; CHECK-NEXT:    ldrsh lr, [r1]
32; CHECK-NEXT:    smulbb r12, lr, r12
33; CHECK-NEXT:    ssat r12, #16, r12, asr #14
34; CHECK-NEXT:    strh r12, [r2]
35; CHECK-NEXT:    ldrsh r12, [r0, #2]
36; CHECK-NEXT:    add r0, r0, #4
37; CHECK-NEXT:    ldrsh lr, [r1, #2]
38; CHECK-NEXT:    add r1, r1, #4
39; CHECK-NEXT:    smulbb r12, lr, r12
40; CHECK-NEXT:    ssat r12, #16, r12, asr #14
41; CHECK-NEXT:    strh r12, [r2, #2]
42; CHECK-NEXT:    add r2, r2, #4
43; CHECK-NEXT:    bne .LBB0_4
44; CHECK-NEXT:  .LBB0_5: @ %while.end
45; CHECK-NEXT:    pop {r11, pc}
46entry:
47  %cmp.not7 = icmp eq i32 %blockSize, 0
48  br i1 %cmp.not7, label %while.end, label %while.body.preheader
49
50while.body.preheader:                             ; preds = %entry
51  %0 = add i32 %blockSize, -1
52  %xtraiter = and i32 %blockSize, 1
53  %lcmp.mod.not = icmp eq i32 %xtraiter, 0
54  br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol.preheader
55
56while.body.prol.preheader:                        ; preds = %while.body.preheader
57  %incdec.ptr.prol = getelementptr inbounds i16, ptr %pSrcA, i32 1
58  %1 = load i16, ptr %pSrcA
59  %conv.prol = sext i16 %1 to i32
60  %incdec.ptr1.prol = getelementptr inbounds i16, ptr %pSrcB, i32 1
61  %2 = load i16, ptr %pSrcB
62  %conv2.prol = sext i16 %2 to i32
63  %mul.prol = mul nsw i32 %conv2.prol, %conv.prol
64  %shr.prol = ashr i32 %mul.prol, 14
65  %3 = icmp sgt i32 %shr.prol, -32768
66  %4 = select i1 %3, i32 %shr.prol, i32 -32768
67  %5 = icmp slt i32 %4, 32767
68  %spec.select.i.prol = select i1 %5, i32 %4, i32 32767
69  %conv3.prol = trunc i32 %spec.select.i.prol to i16
70  %incdec.ptr4.prol = getelementptr inbounds i16, ptr %pDst, i32 1
71  store i16 %conv3.prol, ptr %pDst
72  br label %while.body.prol.loopexit
73
74while.body.prol.loopexit:                         ; preds = %while.body.prol.preheader, %while.body.preheader
75  %blkCnt.011.unr = phi i32 [ %blockSize, %while.body.preheader ], [ %0, %while.body.prol.preheader ]
76  %pSrcA.addr.010.unr = phi ptr [ %pSrcA, %while.body.preheader ], [ %incdec.ptr.prol, %while.body.prol.preheader ]
77  %pDst.addr.09.unr = phi ptr [ %pDst, %while.body.preheader ], [ %incdec.ptr4.prol, %while.body.prol.preheader ]
78  %pSrcB.addr.08.unr = phi ptr [ %pSrcB, %while.body.preheader ], [ %incdec.ptr1.prol, %while.body.prol.preheader ]
79  %6 = icmp eq i32 %0, 0
80  br i1 %6, label %while.end, label %while.body
81
82while.body:                                       ; preds = %while.body.prol.loopexit, %while.body
83  %blkCnt.011 = phi i32 [ %dec.1, %while.body ], [ %blkCnt.011.unr, %while.body.prol.loopexit ]
84  %pSrcA.addr.010 = phi ptr [ %incdec.ptr.1, %while.body ], [ %pSrcA.addr.010.unr, %while.body.prol.loopexit ]
85  %pDst.addr.09 = phi ptr [ %incdec.ptr4.1, %while.body ], [ %pDst.addr.09.unr, %while.body.prol.loopexit ]
86  %pSrcB.addr.08 = phi ptr [ %incdec.ptr1.1, %while.body ], [ %pSrcB.addr.08.unr, %while.body.prol.loopexit ]
87  %incdec.ptr = getelementptr inbounds i16, ptr %pSrcA.addr.010, i32 1
88  %7 = load i16, ptr %pSrcA.addr.010
89  %conv = sext i16 %7 to i32
90  %incdec.ptr1 = getelementptr inbounds i16, ptr %pSrcB.addr.08, i32 1
91  %8 = load i16, ptr %pSrcB.addr.08
92  %conv2 = sext i16 %8 to i32
93  %mul = mul nsw i32 %conv2, %conv
94  %shr = ashr i32 %mul, 14
95  %9 = icmp sgt i32 %shr, -32768
96  %10 = select i1 %9, i32 %shr, i32 -32768
97  %11 = icmp slt i32 %10, 32767
98  %spec.select.i = select i1 %11, i32 %10, i32 32767
99  %conv3 = trunc i32 %spec.select.i to i16
100  %incdec.ptr4 = getelementptr inbounds i16, ptr %pDst.addr.09, i32 1
101  store i16 %conv3, ptr %pDst.addr.09
102  %incdec.ptr.1 = getelementptr inbounds i16, ptr %pSrcA.addr.010, i32 2
103  %12 = load i16, ptr %incdec.ptr
104  %conv.1 = sext i16 %12 to i32
105  %incdec.ptr1.1 = getelementptr inbounds i16, ptr %pSrcB.addr.08, i32 2
106  %13 = load i16, ptr %incdec.ptr1
107  %conv2.1 = sext i16 %13 to i32
108  %mul.1 = mul nsw i32 %conv2.1, %conv.1
109  %shr.1 = ashr i32 %mul.1, 14
110  %14 = icmp sgt i32 %shr.1, -32768
111  %15 = select i1 %14, i32 %shr.1, i32 -32768
112  %16 = icmp slt i32 %15, 32767
113  %spec.select.i.1 = select i1 %16, i32 %15, i32 32767
114  %conv3.1 = trunc i32 %spec.select.i.1 to i16
115  %incdec.ptr4.1 = getelementptr inbounds i16, ptr %pDst.addr.09, i32 2
116  store i16 %conv3.1, ptr %incdec.ptr4
117  %dec.1 = add i32 %blkCnt.011, -2
118  %cmp.not.1 = icmp eq i32 %dec.1, 0
119  br i1 %cmp.not.1, label %while.end, label %while.body
120
121while.end:                                        ; preds = %while.body, %while.body.prol.loopexit, %entry
122  ret void
123}
124
125define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, ptr nocapture writeonly %pDst, i32 %blockSize) {
126; CHECK-LABEL: ssat_unroll_minmax:
127; CHECK:       @ %bb.0: @ %entry
128; CHECK-NEXT:    .save {r11, lr}
129; CHECK-NEXT:    push {r11, lr}
130; CHECK-NEXT:    cmp r3, #0
131; CHECK-NEXT:    beq .LBB1_5
132; CHECK-NEXT:  @ %bb.1: @ %while.body.preheader
133; CHECK-NEXT:    sub r12, r3, #1
134; CHECK-NEXT:    tst r3, #1
135; CHECK-NEXT:    beq .LBB1_3
136; CHECK-NEXT:  @ %bb.2: @ %while.body.prol.preheader
137; CHECK-NEXT:    ldrsh lr, [r0], #2
138; CHECK-NEXT:    ldrsh r3, [r1], #2
139; CHECK-NEXT:    smulbb r3, r3, lr
140; CHECK-NEXT:    ssat r3, #16, r3, asr #14
141; CHECK-NEXT:    strh r3, [r2], #2
142; CHECK-NEXT:    mov r3, r12
143; CHECK-NEXT:  .LBB1_3: @ %while.body.prol.loopexit
144; CHECK-NEXT:    cmp r12, #0
145; CHECK-NEXT:    popeq {r11, pc}
146; CHECK-NEXT:  .LBB1_4: @ %while.body
147; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
148; CHECK-NEXT:    ldrsh r12, [r0]
149; CHECK-NEXT:    subs r3, r3, #2
150; CHECK-NEXT:    ldrsh lr, [r1]
151; CHECK-NEXT:    smulbb r12, lr, r12
152; CHECK-NEXT:    ssat r12, #16, r12, asr #14
153; CHECK-NEXT:    strh r12, [r2]
154; CHECK-NEXT:    ldrsh r12, [r0, #2]
155; CHECK-NEXT:    add r0, r0, #4
156; CHECK-NEXT:    ldrsh lr, [r1, #2]
157; CHECK-NEXT:    add r1, r1, #4
158; CHECK-NEXT:    smulbb r12, lr, r12
159; CHECK-NEXT:    ssat r12, #16, r12, asr #14
160; CHECK-NEXT:    strh r12, [r2, #2]
161; CHECK-NEXT:    add r2, r2, #4
162; CHECK-NEXT:    bne .LBB1_4
163; CHECK-NEXT:  .LBB1_5: @ %while.end
164; CHECK-NEXT:    pop {r11, pc}
165entry:
166  %cmp.not7 = icmp eq i32 %blockSize, 0
167  br i1 %cmp.not7, label %while.end, label %while.body.preheader
168
169while.body.preheader:                             ; preds = %entry
170  %0 = add i32 %blockSize, -1
171  %xtraiter = and i32 %blockSize, 1
172  %lcmp.mod.not = icmp eq i32 %xtraiter, 0
173  br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol.preheader
174
175while.body.prol.preheader:                        ; preds = %while.body.preheader
176  %incdec.ptr.prol = getelementptr inbounds i16, ptr %pSrcA, i64 1
177  %1 = load i16, ptr %pSrcA, align 2
178  %conv.prol = sext i16 %1 to i32
179  %incdec.ptr1.prol = getelementptr inbounds i16, ptr %pSrcB, i64 1
180  %2 = load i16, ptr %pSrcB, align 2
181  %conv2.prol = sext i16 %2 to i32
182  %mul.prol = mul nsw i32 %conv2.prol, %conv.prol
183  %shr.prol = ashr i32 %mul.prol, 14
184  %3 = call i32 @llvm.smax.i32(i32 %shr.prol, i32 -32768)
185  %4 = call i32 @llvm.smin.i32(i32 %3, i32 32767)
186  %conv3.prol = trunc i32 %4 to i16
187  %incdec.ptr4.prol = getelementptr inbounds i16, ptr %pDst, i64 1
188  store i16 %conv3.prol, ptr %pDst, align 2
189  br label %while.body.prol.loopexit
190
191while.body.prol.loopexit:                         ; preds = %while.body.prol.preheader, %while.body.preheader
192  %blkCnt.011.unr = phi i32 [ %blockSize, %while.body.preheader ], [ %0, %while.body.prol.preheader ]
193  %pSrcA.addr.010.unr = phi ptr [ %pSrcA, %while.body.preheader ], [ %incdec.ptr.prol, %while.body.prol.preheader ]
194  %pDst.addr.09.unr = phi ptr [ %pDst, %while.body.preheader ], [ %incdec.ptr4.prol, %while.body.prol.preheader ]
195  %pSrcB.addr.08.unr = phi ptr [ %pSrcB, %while.body.preheader ], [ %incdec.ptr1.prol, %while.body.prol.preheader ]
196  %5 = icmp eq i32 %0, 0
197  br i1 %5, label %while.end, label %while.body
198
199while.body:                                       ; preds = %while.body.prol.loopexit, %while.body
200  %blkCnt.011 = phi i32 [ %dec.1, %while.body ], [ %blkCnt.011.unr, %while.body.prol.loopexit ]
201  %pSrcA.addr.010 = phi ptr [ %incdec.ptr.1, %while.body ], [ %pSrcA.addr.010.unr, %while.body.prol.loopexit ]
202  %pDst.addr.09 = phi ptr [ %incdec.ptr4.1, %while.body ], [ %pDst.addr.09.unr, %while.body.prol.loopexit ]
203  %pSrcB.addr.08 = phi ptr [ %incdec.ptr1.1, %while.body ], [ %pSrcB.addr.08.unr, %while.body.prol.loopexit ]
204  %incdec.ptr = getelementptr inbounds i16, ptr %pSrcA.addr.010, i64 1
205  %6 = load i16, ptr %pSrcA.addr.010, align 2
206  %conv = sext i16 %6 to i32
207  %incdec.ptr1 = getelementptr inbounds i16, ptr %pSrcB.addr.08, i64 1
208  %7 = load i16, ptr %pSrcB.addr.08, align 2
209  %conv2 = sext i16 %7 to i32
210  %mul = mul nsw i32 %conv2, %conv
211  %shr = ashr i32 %mul, 14
212  %8 = call i32 @llvm.smax.i32(i32 %shr, i32 -32768)
213  %9 = call i32 @llvm.smin.i32(i32 %8, i32 32767)
214  %conv3 = trunc i32 %9 to i16
215  %incdec.ptr4 = getelementptr inbounds i16, ptr %pDst.addr.09, i64 1
216  store i16 %conv3, ptr %pDst.addr.09, align 2
217  %incdec.ptr.1 = getelementptr inbounds i16, ptr %pSrcA.addr.010, i64 2
218  %10 = load i16, ptr %incdec.ptr, align 2
219  %conv.1 = sext i16 %10 to i32
220  %incdec.ptr1.1 = getelementptr inbounds i16, ptr %pSrcB.addr.08, i64 2
221  %11 = load i16, ptr %incdec.ptr1, align 2
222  %conv2.1 = sext i16 %11 to i32
223  %mul.1 = mul nsw i32 %conv2.1, %conv.1
224  %shr.1 = ashr i32 %mul.1, 14
225  %12 = call i32 @llvm.smax.i32(i32 %shr.1, i32 -32768)
226  %13 = call i32 @llvm.smin.i32(i32 %12, i32 32767)
227  %conv3.1 = trunc i32 %13 to i16
228  %incdec.ptr4.1 = getelementptr inbounds i16, ptr %pDst.addr.09, i64 2
229  store i16 %conv3.1, ptr %incdec.ptr4, align 2
230  %dec.1 = add i32 %blkCnt.011, -2
231  %cmp.not.1 = icmp eq i32 %dec.1, 0
232  br i1 %cmp.not.1, label %while.end, label %while.body
233
234while.end:                                        ; preds = %while.body, %while.body.prol.loopexit, %entry
235  ret void
236}
237
238declare i32 @llvm.smax.i32(i32, i32) #1
239declare i32 @llvm.smin.i32(i32, i32) #1
240