1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=armv6t2-eabi %s -o - | FileCheck %s --check-prefix=CHECK 3 4; Checks SSAT is still generated when loop unrolling is on 5 6define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) { 7; CHECK-LABEL: ssat_unroll: 8; CHECK: @ %bb.0: @ %entry 9; CHECK-NEXT: cmp r3, #0 10; CHECK-NEXT: bxeq lr 11; CHECK-NEXT: .LBB0_1: @ %while.body.preheader 12; CHECK-NEXT: .save {r11, lr} 13; CHECK-NEXT: push {r11, lr} 14; CHECK-NEXT: sub r12, r3, #1 15; CHECK-NEXT: tst r3, #1 16; CHECK-NEXT: beq .LBB0_3 17; CHECK-NEXT: @ %bb.2: @ %while.body.prol.preheader 18; CHECK-NEXT: ldrsh lr, [r0], #2 19; CHECK-NEXT: ldrsh r3, [r1], #2 20; CHECK-NEXT: smulbb r3, r3, lr 21; CHECK-NEXT: ssat r3, #16, r3, asr #14 22; CHECK-NEXT: strh r3, [r2], #2 23; CHECK-NEXT: mov r3, r12 24; CHECK-NEXT: .LBB0_3: @ %while.body.prol.loopexit 25; CHECK-NEXT: cmp r12, #0 26; CHECK-NEXT: beq .LBB0_5 27; CHECK-NEXT: .LBB0_4: @ %while.body 28; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 29; CHECK-NEXT: ldrsh r12, [r0] 30; CHECK-NEXT: subs r3, r3, #2 31; CHECK-NEXT: ldrsh lr, [r1] 32; CHECK-NEXT: smulbb r12, lr, r12 33; CHECK-NEXT: ssat r12, #16, r12, asr #14 34; CHECK-NEXT: strh r12, [r2] 35; CHECK-NEXT: ldrsh r12, [r0, #2] 36; CHECK-NEXT: add r0, r0, #4 37; CHECK-NEXT: ldrsh lr, [r1, #2] 38; CHECK-NEXT: add r1, r1, #4 39; CHECK-NEXT: smulbb r12, lr, r12 40; CHECK-NEXT: ssat r12, #16, r12, asr #14 41; CHECK-NEXT: strh r12, [r2, #2] 42; CHECK-NEXT: add r2, r2, #4 43; CHECK-NEXT: bne .LBB0_4 44; CHECK-NEXT: .LBB0_5: 45; CHECK-NEXT: pop {r11, lr} 46; CHECK-NEXT: bx lr 47entry: 48 %cmp.not7 = icmp eq i32 %blockSize, 0 49 br i1 %cmp.not7, label %while.end, label %while.body.preheader 50 51while.body.preheader: ; preds = %entry 52 %0 = add i32 %blockSize, -1 53 %xtraiter = and i32 %blockSize, 1 54 %lcmp.mod.not = icmp eq i32 %xtraiter, 0 55 br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol.preheader 56 57while.body.prol.preheader: ; preds = %while.body.preheader 58 %incdec.ptr.prol = getelementptr inbounds i16, ptr %pSrcA, i32 1 59 %1 = load i16, ptr %pSrcA 60 %conv.prol = sext i16 %1 to i32 61 %incdec.ptr1.prol = getelementptr inbounds i16, ptr %pSrcB, i32 1 62 %2 = load i16, ptr %pSrcB 63 %conv2.prol = sext i16 %2 to i32 64 %mul.prol = mul nsw i32 %conv2.prol, %conv.prol 65 %shr.prol = ashr i32 %mul.prol, 14 66 %3 = icmp sgt i32 %shr.prol, -32768 67 %4 = select i1 %3, i32 %shr.prol, i32 -32768 68 %5 = icmp slt i32 %4, 32767 69 %spec.select.i.prol = select i1 %5, i32 %4, i32 32767 70 %conv3.prol = trunc i32 %spec.select.i.prol to i16 71 %incdec.ptr4.prol = getelementptr inbounds i16, ptr %pDst, i32 1 72 store i16 %conv3.prol, ptr %pDst 73 br label %while.body.prol.loopexit 74 75while.body.prol.loopexit: ; preds = %while.body.prol.preheader, %while.body.preheader 76 %blkCnt.011.unr = phi i32 [ %blockSize, %while.body.preheader ], [ %0, %while.body.prol.preheader ] 77 %pSrcA.addr.010.unr = phi ptr [ %pSrcA, %while.body.preheader ], [ %incdec.ptr.prol, %while.body.prol.preheader ] 78 %pDst.addr.09.unr = phi ptr [ %pDst, %while.body.preheader ], [ %incdec.ptr4.prol, %while.body.prol.preheader ] 79 %pSrcB.addr.08.unr = phi ptr [ %pSrcB, %while.body.preheader ], [ %incdec.ptr1.prol, %while.body.prol.preheader ] 80 %6 = icmp eq i32 %0, 0 81 br i1 %6, label %while.end, label %while.body 82 83while.body: ; preds = %while.body.prol.loopexit, %while.body 84 %blkCnt.011 = phi i32 [ %dec.1, %while.body ], [ %blkCnt.011.unr, %while.body.prol.loopexit ] 85 %pSrcA.addr.010 = phi ptr [ %incdec.ptr.1, %while.body ], [ %pSrcA.addr.010.unr, %while.body.prol.loopexit ] 86 %pDst.addr.09 = phi ptr [ %incdec.ptr4.1, %while.body ], [ %pDst.addr.09.unr, %while.body.prol.loopexit ] 87 %pSrcB.addr.08 = phi ptr [ %incdec.ptr1.1, %while.body ], [ %pSrcB.addr.08.unr, %while.body.prol.loopexit ] 88 %incdec.ptr = getelementptr inbounds i16, ptr %pSrcA.addr.010, i32 1 89 %7 = load i16, ptr %pSrcA.addr.010 90 %conv = sext i16 %7 to i32 91 %incdec.ptr1 = getelementptr inbounds i16, ptr %pSrcB.addr.08, i32 1 92 %8 = load i16, ptr %pSrcB.addr.08 93 %conv2 = sext i16 %8 to i32 94 %mul = mul nsw i32 %conv2, %conv 95 %shr = ashr i32 %mul, 14 96 %9 = icmp sgt i32 %shr, -32768 97 %10 = select i1 %9, i32 %shr, i32 -32768 98 %11 = icmp slt i32 %10, 32767 99 %spec.select.i = select i1 %11, i32 %10, i32 32767 100 %conv3 = trunc i32 %spec.select.i to i16 101 %incdec.ptr4 = getelementptr inbounds i16, ptr %pDst.addr.09, i32 1 102 store i16 %conv3, ptr %pDst.addr.09 103 %incdec.ptr.1 = getelementptr inbounds i16, ptr %pSrcA.addr.010, i32 2 104 %12 = load i16, ptr %incdec.ptr 105 %conv.1 = sext i16 %12 to i32 106 %incdec.ptr1.1 = getelementptr inbounds i16, ptr %pSrcB.addr.08, i32 2 107 %13 = load i16, ptr %incdec.ptr1 108 %conv2.1 = sext i16 %13 to i32 109 %mul.1 = mul nsw i32 %conv2.1, %conv.1 110 %shr.1 = ashr i32 %mul.1, 14 111 %14 = icmp sgt i32 %shr.1, -32768 112 %15 = select i1 %14, i32 %shr.1, i32 -32768 113 %16 = icmp slt i32 %15, 32767 114 %spec.select.i.1 = select i1 %16, i32 %15, i32 32767 115 %conv3.1 = trunc i32 %spec.select.i.1 to i16 116 %incdec.ptr4.1 = getelementptr inbounds i16, ptr %pDst.addr.09, i32 2 117 store i16 %conv3.1, ptr %incdec.ptr4 118 %dec.1 = add i32 %blkCnt.011, -2 119 %cmp.not.1 = icmp eq i32 %dec.1, 0 120 br i1 %cmp.not.1, label %while.end, label %while.body 121 122while.end: ; preds = %while.body, %while.body.prol.loopexit, %entry 123 ret void 124} 125 126define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, ptr nocapture writeonly %pDst, i32 %blockSize) { 127; CHECK-LABEL: ssat_unroll_minmax: 128; CHECK: @ %bb.0: @ %entry 129; CHECK-NEXT: cmp r3, #0 130; CHECK-NEXT: bxeq lr 131; CHECK-NEXT: .LBB1_1: @ %while.body.preheader 132; CHECK-NEXT: .save {r11, lr} 133; CHECK-NEXT: push {r11, lr} 134; CHECK-NEXT: sub r12, r3, #1 135; CHECK-NEXT: tst r3, #1 136; CHECK-NEXT: beq .LBB1_3 137; CHECK-NEXT: @ %bb.2: @ %while.body.prol.preheader 138; CHECK-NEXT: ldrsh lr, [r0], #2 139; CHECK-NEXT: ldrsh r3, [r1], #2 140; CHECK-NEXT: smulbb r3, r3, lr 141; CHECK-NEXT: ssat r3, #16, r3, asr #14 142; CHECK-NEXT: strh r3, [r2], #2 143; CHECK-NEXT: mov r3, r12 144; CHECK-NEXT: .LBB1_3: @ %while.body.prol.loopexit 145; CHECK-NEXT: cmp r12, #0 146; CHECK-NEXT: beq .LBB1_5 147; CHECK-NEXT: .LBB1_4: @ %while.body 148; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 149; CHECK-NEXT: ldrsh r12, [r0] 150; CHECK-NEXT: subs r3, r3, #2 151; CHECK-NEXT: ldrsh lr, [r1] 152; CHECK-NEXT: smulbb r12, lr, r12 153; CHECK-NEXT: ssat r12, #16, r12, asr #14 154; CHECK-NEXT: strh r12, [r2] 155; CHECK-NEXT: ldrsh r12, [r0, #2] 156; CHECK-NEXT: add r0, r0, #4 157; CHECK-NEXT: ldrsh lr, [r1, #2] 158; CHECK-NEXT: add r1, r1, #4 159; CHECK-NEXT: smulbb r12, lr, r12 160; CHECK-NEXT: ssat r12, #16, r12, asr #14 161; CHECK-NEXT: strh r12, [r2, #2] 162; CHECK-NEXT: add r2, r2, #4 163; CHECK-NEXT: bne .LBB1_4 164; CHECK-NEXT: .LBB1_5: 165; CHECK-NEXT: pop {r11, lr} 166; CHECK-NEXT: bx lr 167entry: 168 %cmp.not7 = icmp eq i32 %blockSize, 0 169 br i1 %cmp.not7, label %while.end, label %while.body.preheader 170 171while.body.preheader: ; preds = %entry 172 %0 = add i32 %blockSize, -1 173 %xtraiter = and i32 %blockSize, 1 174 %lcmp.mod.not = icmp eq i32 %xtraiter, 0 175 br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol.preheader 176 177while.body.prol.preheader: ; preds = %while.body.preheader 178 %incdec.ptr.prol = getelementptr inbounds i16, ptr %pSrcA, i64 1 179 %1 = load i16, ptr %pSrcA, align 2 180 %conv.prol = sext i16 %1 to i32 181 %incdec.ptr1.prol = getelementptr inbounds i16, ptr %pSrcB, i64 1 182 %2 = load i16, ptr %pSrcB, align 2 183 %conv2.prol = sext i16 %2 to i32 184 %mul.prol = mul nsw i32 %conv2.prol, %conv.prol 185 %shr.prol = ashr i32 %mul.prol, 14 186 %3 = call i32 @llvm.smax.i32(i32 %shr.prol, i32 -32768) 187 %4 = call i32 @llvm.smin.i32(i32 %3, i32 32767) 188 %conv3.prol = trunc i32 %4 to i16 189 %incdec.ptr4.prol = getelementptr inbounds i16, ptr %pDst, i64 1 190 store i16 %conv3.prol, ptr %pDst, align 2 191 br label %while.body.prol.loopexit 192 193while.body.prol.loopexit: ; preds = %while.body.prol.preheader, %while.body.preheader 194 %blkCnt.011.unr = phi i32 [ %blockSize, %while.body.preheader ], [ %0, %while.body.prol.preheader ] 195 %pSrcA.addr.010.unr = phi ptr [ %pSrcA, %while.body.preheader ], [ %incdec.ptr.prol, %while.body.prol.preheader ] 196 %pDst.addr.09.unr = phi ptr [ %pDst, %while.body.preheader ], [ %incdec.ptr4.prol, %while.body.prol.preheader ] 197 %pSrcB.addr.08.unr = phi ptr [ %pSrcB, %while.body.preheader ], [ %incdec.ptr1.prol, %while.body.prol.preheader ] 198 %5 = icmp eq i32 %0, 0 199 br i1 %5, label %while.end, label %while.body 200 201while.body: ; preds = %while.body.prol.loopexit, %while.body 202 %blkCnt.011 = phi i32 [ %dec.1, %while.body ], [ %blkCnt.011.unr, %while.body.prol.loopexit ] 203 %pSrcA.addr.010 = phi ptr [ %incdec.ptr.1, %while.body ], [ %pSrcA.addr.010.unr, %while.body.prol.loopexit ] 204 %pDst.addr.09 = phi ptr [ %incdec.ptr4.1, %while.body ], [ %pDst.addr.09.unr, %while.body.prol.loopexit ] 205 %pSrcB.addr.08 = phi ptr [ %incdec.ptr1.1, %while.body ], [ %pSrcB.addr.08.unr, %while.body.prol.loopexit ] 206 %incdec.ptr = getelementptr inbounds i16, ptr %pSrcA.addr.010, i64 1 207 %6 = load i16, ptr %pSrcA.addr.010, align 2 208 %conv = sext i16 %6 to i32 209 %incdec.ptr1 = getelementptr inbounds i16, ptr %pSrcB.addr.08, i64 1 210 %7 = load i16, ptr %pSrcB.addr.08, align 2 211 %conv2 = sext i16 %7 to i32 212 %mul = mul nsw i32 %conv2, %conv 213 %shr = ashr i32 %mul, 14 214 %8 = call i32 @llvm.smax.i32(i32 %shr, i32 -32768) 215 %9 = call i32 @llvm.smin.i32(i32 %8, i32 32767) 216 %conv3 = trunc i32 %9 to i16 217 %incdec.ptr4 = getelementptr inbounds i16, ptr %pDst.addr.09, i64 1 218 store i16 %conv3, ptr %pDst.addr.09, align 2 219 %incdec.ptr.1 = getelementptr inbounds i16, ptr %pSrcA.addr.010, i64 2 220 %10 = load i16, ptr %incdec.ptr, align 2 221 %conv.1 = sext i16 %10 to i32 222 %incdec.ptr1.1 = getelementptr inbounds i16, ptr %pSrcB.addr.08, i64 2 223 %11 = load i16, ptr %incdec.ptr1, align 2 224 %conv2.1 = sext i16 %11 to i32 225 %mul.1 = mul nsw i32 %conv2.1, %conv.1 226 %shr.1 = ashr i32 %mul.1, 14 227 %12 = call i32 @llvm.smax.i32(i32 %shr.1, i32 -32768) 228 %13 = call i32 @llvm.smin.i32(i32 %12, i32 32767) 229 %conv3.1 = trunc i32 %13 to i16 230 %incdec.ptr4.1 = getelementptr inbounds i16, ptr %pDst.addr.09, i64 2 231 store i16 %conv3.1, ptr %incdec.ptr4, align 2 232 %dec.1 = add i32 %blkCnt.011, -2 233 %cmp.not.1 = icmp eq i32 %dec.1, 0 234 br i1 %cmp.not.1, label %while.end, label %while.body 235 236while.end: ; preds = %while.body, %while.body.prol.loopexit, %entry 237 ret void 238} 239 240declare i32 @llvm.smax.i32(i32, i32) #1 241declare i32 @llvm.smin.i32(i32, i32) #1 242