1; REQUIRES: asserts 2; RUN: opt -mcpu=thunderx2t99 -passes=loop-unroll --debug-only=loop-unroll --debug-only=basicblock-utils -S -unroll-allow-partial < %s 2>&1 | FileCheck %s 3 4target triple = "aarch64-unknown-linux-gnu" 5 6; CHECK: Loop Unroll: F[foo] Loop %loop.header 7; CHECK: Loop Size = 18 8; CHECK: Exiting block %loop.inc: TripCount=512, TripMultiple=0, BreakoutTrip=0 9; CHECK: UNROLLING loop %loop.header by 4 10; CHECK: Merging: 11; CHECK: Loop Unroll: F[foo] Loop %loop.2.header 12; CHECK: Loop Size = 19 13; CHECK: Exiting block %loop.2.inc: TripCount=512, TripMultiple=0, BreakoutTrip=0 14; CHECK: UNROLLING loop %loop.2.header by 4 15; CHECK: Merging: 16; CHECK: %counter = phi i32 [ 0, %entry ], [ %inc.3, %loop.inc.3 ] 17; CHECK: %val = add nuw nsw i32 %counter, 5 18; CHECK: %val1 = add nuw nsw i32 %counter, 6 19; CHECK: %val2 = add nuw nsw i32 %counter, 7 20; CHECK: %val3 = add nuw nsw i32 %counter, 8 21; CHECK: %val4 = add nuw nsw i32 %counter, 9 22; CHECK: %val5 = add nuw nsw i32 %counter, 10 23; CHECK-NOT: %val = add i32 %counter, 5 24; CHECK-NOT: %val = add i32 %counter, 6 25; CHECK-NOT: %val = add i32 %counter, 7 26; CHECK-NOT: %val = add i32 %counter, 8 27; CHECK-NOT: %val = add i32 %counter, 9 28; CHECK-NOT: %val = add i32 %counter, 10 29; CHECK: %counter.2 = phi i32 [ 0, %exit.0 ], [ %inc.2.3, %loop.2.inc.3 ] 30 31define void @foo(ptr %out) { 32entry: 33 %0 = alloca [1024 x i32] 34 %x0 = alloca [1024 x i32] 35 %x01 = alloca [1024 x i32] 36 %x02 = alloca [1024 x i32] 37 %x03 = alloca [1024 x i32] 38 %x04 = alloca [1024 x i32] 39 %x05 = alloca [1024 x i32] 40 %x06 = alloca [1024 x i32] 41 br label %loop.header 42 43loop.header: 44 %counter = phi i32 [0, %entry], [%inc, %loop.inc] 45 br label %loop.body 46 47loop.body: 48 %ptr = getelementptr [1024 x i32], ptr %0, i32 0, i32 %counter 49 store i32 %counter, ptr %ptr 50 %val = add i32 %counter, 5 51 %xptr = getelementptr [1024 x i32], ptr %x0, i32 0, i32 %counter 52 store i32 %val, ptr %xptr 53 %val1 = add i32 %counter, 6 54 %xptr1 = getelementptr [1024 x i32], ptr %x01, i32 0, i32 %counter 55 store i32 %val1, ptr %xptr1 56 %val2 = add i32 %counter, 7 57 %xptr2 = getelementptr [1024 x i32], ptr %x02, i32 0, i32 %counter 58 store i32 %val2, ptr %xptr2 59 %val3 = add i32 %counter, 8 60 %xptr3 = getelementptr [1024 x i32], ptr %x03, i32 0, i32 %counter 61 store i32 %val3, ptr %xptr3 62 %val4 = add i32 %counter, 9 63 %xptr4 = getelementptr [1024 x i32], ptr %x04, i32 0, i32 %counter 64 store i32 %val4, ptr %xptr4 65 %val5 = add i32 %counter, 10 66 %xptr5 = getelementptr [1024 x i32], ptr %x05, i32 0, i32 %counter 67 store i32 %val5, ptr %xptr5 68 br label %loop.inc 69 70loop.inc: 71 %inc = add i32 %counter, 2 72 %1 = icmp sge i32 %inc, 1023 73 br i1 %1, label %exit.0, label %loop.header 74 75exit.0: 76 %2 = getelementptr [1024 x i32], ptr %0, i32 0, i32 5 77 %3 = load i32, ptr %2 78 store i32 %3, ptr %out 79 br label %loop.2.header 80 81 82loop.2.header: 83 %counter.2 = phi i32 [0, %exit.0], [%inc.2, %loop.2.inc] 84 br label %loop.2.body 85 86loop.2.body: 87 %ptr.2 = getelementptr [1024 x i32], ptr %0, i32 0, i32 %counter.2 88 store i32 %counter.2, ptr %ptr.2 89 %val.2 = add i32 %counter.2, 5 90 %xptr.2 = getelementptr [1024 x i32], ptr %x0, i32 0, i32 %counter.2 91 store i32 %val.2, ptr %xptr.2 92 %val1.2 = add i32 %counter.2, 6 93 %xptr1.2 = getelementptr [1024 x i32], ptr %x01, i32 0, i32 %counter.2 94 store i32 %val1, ptr %xptr1.2 95 %val2.2 = add i32 %counter.2, 7 96 %xptr2.2 = getelementptr [1024 x i32], ptr %x02, i32 0, i32 %counter.2 97 store i32 %val2, ptr %xptr2.2 98 %val3.2 = add i32 %counter.2, 8 99 %xptr3.2 = getelementptr [1024 x i32], ptr %x03, i32 0, i32 %counter.2 100 store i32 %val3.2, ptr %xptr3.2 101 %val4.2 = add i32 %counter.2, 9 102 %xptr4.2 = getelementptr [1024 x i32], ptr %x04, i32 0, i32 %counter.2 103 store i32 %val4.2, ptr %xptr4.2 104 %val5.2 = add i32 %counter.2, 10 105 %xptr5.2 = getelementptr [1024 x i32], ptr %x05, i32 0, i32 %counter.2 106 store i32 %val5.2, ptr %xptr5.2 107 %xptr6.2 = getelementptr [1024 x i32], ptr %x06, i32 0, i32 %counter.2 108 store i32 %val5.2, ptr %xptr6.2 109 br label %loop.2.inc 110 111loop.2.inc: 112 %inc.2 = add i32 %counter.2, 2 113 %4 = icmp sge i32 %inc.2, 1023 114 br i1 %4, label %exit.2, label %loop.2.header 115 116exit.2: 117 %x2 = getelementptr [1024 x i32], ptr %0, i32 0, i32 6 118 %x3 = load i32, ptr %x2 119 %out2 = getelementptr i32, ptr %out, i32 1 120 store i32 %3, ptr %out2 121 ret void 122} 123