1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=loop-unroll -mtriple=thumbv7a-unknown-linux-gnueabihf -S %s | FileCheck %s 3 4; Check we unroll even with optsize, if the result is smaller, either because 5; we have single iteration loops or bodies with constant folding opportunities 6; after fully unrolling. 7 8; TODO: Looks like we should enable some unrolling for M-class, even when 9; optimising for size. 10 11declare i32 @get() 12 13define void @fully_unrolled_single_iteration(ptr %src) #0 { 14; CHECK-LABEL: @fully_unrolled_single_iteration( 15; CHECK-NEXT: entry: 16; CHECK-NEXT: [[ARR:%.*]] = alloca [4 x i32], align 4 17; CHECK-NEXT: br label [[FOR_BODY:%.*]] 18; CHECK: for.body: 19; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[SRC:%.*]] 20; CHECK-NEXT: store i32 [[V]], ptr [[ARR]], align 4 21; CHECK-NEXT: call void @use(ptr nonnull [[ARR]]) 22; CHECK-NEXT: ret void 23; 24entry: 25 %arr = alloca [4 x i32], align 4 26 br label %for.body 27 28for.body: ; preds = %for.body, %entry 29 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 30 %src.idx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv 31 %v = load i32, ptr %src.idx 32 %arrayidx = getelementptr inbounds [4 x i32], ptr %arr, i64 0, i64 %indvars.iv 33 store i32 %v, ptr %arrayidx, align 4 34 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 35 %exitcond = icmp eq i64 %indvars.iv.next, 1 36 br i1 %exitcond, label %for.cond.cleanup, label %for.body 37 38for.cond.cleanup: ; preds = %for.cond 39 call void @use(ptr nonnull %arr) #4 40 ret void 41} 42 43 44define void @fully_unrolled_smaller() #0 { 45; CHECK-LABEL: @fully_unrolled_smaller( 46; CHECK-NEXT: entry: 47; CHECK-NEXT: [[ARR:%.*]] = alloca [4 x i32], align 4 48; CHECK-NEXT: br label [[FOR_BODY:%.*]] 49; CHECK: for.body: 50; CHECK-NEXT: store i32 16, ptr [[ARR]], align 4 51; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [4 x i32], ptr [[ARR]], i64 0, i64 1 52; CHECK-NEXT: store i32 4104, ptr [[ARRAYIDX_1]], align 4 53; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [4 x i32], ptr [[ARR]], i64 0, i64 2 54; CHECK-NEXT: store i32 1048592, ptr [[ARRAYIDX_2]], align 4 55; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [4 x i32], ptr [[ARR]], i64 0, i64 3 56; CHECK-NEXT: store i32 268435480, ptr [[ARRAYIDX_3]], align 4 57; CHECK-NEXT: call void @use(ptr nonnull [[ARR]]) 58; CHECK-NEXT: ret void 59; 60entry: 61 %arr = alloca [4 x i32], align 4 62 br label %for.body 63 64for.body: ; preds = %for.body, %entry 65 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 66 %indvars.iv.tr = trunc i64 %indvars.iv to i32 67 %shl.0 = shl i32 %indvars.iv.tr, 3 68 %shl.1 = shl i32 16, %shl.0 69 %or = or i32 %shl.1, %shl.0 70 %arrayidx = getelementptr inbounds [4 x i32], ptr %arr, i64 0, i64 %indvars.iv 71 store i32 %or, ptr %arrayidx, align 4 72 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 73 %exitcond = icmp eq i64 %indvars.iv, 3 74 br i1 %exitcond, label %for.cond.cleanup, label %for.body 75 76for.cond.cleanup: ; preds = %for.cond 77 call void @use(ptr nonnull %arr) #4 78 ret void 79} 80 81define void @fully_unrolled_smaller_Oz() #1 { 82; CHECK-LABEL: @fully_unrolled_smaller_Oz( 83; CHECK-NEXT: entry: 84; CHECK-NEXT: [[ARR:%.*]] = alloca [4 x i32], align 4 85; CHECK-NEXT: br label [[FOR_BODY:%.*]] 86; CHECK: for.body: 87; CHECK-NEXT: store i32 16, ptr [[ARR]], align 4 88; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [4 x i32], ptr [[ARR]], i64 0, i64 1 89; CHECK-NEXT: store i32 4104, ptr [[ARRAYIDX_1]], align 4 90; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [4 x i32], ptr [[ARR]], i64 0, i64 2 91; CHECK-NEXT: store i32 1048592, ptr [[ARRAYIDX_2]], align 4 92; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [4 x i32], ptr [[ARR]], i64 0, i64 3 93; CHECK-NEXT: store i32 268435480, ptr [[ARRAYIDX_3]], align 4 94; CHECK-NEXT: call void @use(ptr nonnull [[ARR]]) 95; CHECK-NEXT: ret void 96; 97entry: 98 %arr = alloca [4 x i32], align 4 99 br label %for.body 100 101for.body: ; preds = %for.body, %entry 102 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 103 %indvars.iv.tr = trunc i64 %indvars.iv to i32 104 %shl.0 = shl i32 %indvars.iv.tr, 3 105 %shl.1 = shl i32 16, %shl.0 106 %or = or i32 %shl.1, %shl.0 107 %arrayidx = getelementptr inbounds [4 x i32], ptr %arr, i64 0, i64 %indvars.iv 108 store i32 %or, ptr %arrayidx, align 4 109 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 110 %exitcond = icmp eq i64 %indvars.iv, 3 111 br i1 %exitcond, label %for.cond.cleanup, label %for.body 112 113for.cond.cleanup: ; preds = %for.cond 114 call void @use(ptr nonnull %arr) #4 115 ret void 116} 117 118 119define void @fully_unrolled_bigger() #0 { 120; CHECK-LABEL: @fully_unrolled_bigger( 121; CHECK-NEXT: entry: 122; CHECK-NEXT: [[ARR:%.*]] = alloca [4 x i32], align 4 123; CHECK-NEXT: br label [[FOR_BODY:%.*]] 124; CHECK: for.body: 125; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] 126; CHECK-NEXT: [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32 127; CHECK-NEXT: [[SHL_0:%.*]] = shl i32 [[INDVARS_IV_TR]], 3 128; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 16, [[SHL_0]] 129; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL_1]], [[SHL_0]] 130; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[ARR]], i64 0, i64 [[INDVARS_IV]] 131; CHECK-NEXT: store i32 [[OR]], ptr [[ARRAYIDX]], align 4 132; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 133; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV]], 7 134; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] 135; CHECK: for.cond.cleanup: 136; CHECK-NEXT: call void @use(ptr nonnull [[ARR]]) 137; CHECK-NEXT: ret void 138; 139entry: 140 %arr = alloca [4 x i32], align 4 141 br label %for.body 142 143for.body: ; preds = %for.body, %entry 144 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 145 %indvars.iv.tr = trunc i64 %indvars.iv to i32 146 %shl.0 = shl i32 %indvars.iv.tr, 3 147 %shl.1 = shl i32 16, %shl.0 148 %or = or i32 %shl.1, %shl.0 149 %arrayidx = getelementptr inbounds [4 x i32], ptr %arr, i64 0, i64 %indvars.iv 150 store i32 %or, ptr %arrayidx, align 4 151 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 152 %exitcond = icmp eq i64 %indvars.iv, 7 153 br i1 %exitcond, label %for.cond.cleanup, label %for.body 154 155for.cond.cleanup: ; preds = %for.cond 156 call void @use(ptr nonnull %arr) #4 157 ret void 158} 159 160declare void @use(ptr) 161 162attributes #0 = { optsize } 163attributes #1 = { minsize optsize } 164