1; RUN: opt -S -passes=loop-fusion < %s | FileCheck %s 2; 3; int A[1024][1024]; 4; int B[1024][1024]; 5; 6; #define EXPENSIVE_PURE_COMPUTATION(i) ((i - 3) * (i + 3) % i) 7; 8; void dep_free() { 9; 10; for (int i = 0; i < 100; i++) 11; for (int j = 0; j < 100; j++) 12; A[i][j] = EXPENSIVE_PURE_COMPUTATION(i); 13; 14; for (int i = 0; i < 100; i++) 15; for (int j = 0; j < 100; j++) 16; B[i][j] = EXPENSIVE_PURE_COMPUTATION(i); 17; } 18; 19@A = common global [1024 x [1024 x i32]] zeroinitializer, align 16 20@B = common global [1024 x [1024 x i32]] zeroinitializer, align 16 21 22; CHECK: void @dep_free 23; CHECK-NEXT: bb: 24; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]+]] 25; CHECK: [[LOOP1HEADER]] 26; CHECK: br label %[[LOOP3HEADER:bb[0-9]+]] 27; CHECK: [[LOOP3HEADER]] 28; CHECK: br label %[[LOOP2HEADER:bb[0-9]+]] 29; CHECK: [[LOOP2HEADER]] 30; CHECK: br label %[[LOOP4HEADER:bb[0-9]+]] 31; CHECK: [[LOOP4HEADER]] 32; CHECK: br i1 %{{.*}}, label %[[LOOP3HEADER]], label %[[LOOP1LATCH:bb[0-9]+]] 33; CHECK: [[LOOP1LATCH]] 34; CHECK-NEXT: %inc.outer.fc0 = add nuw nsw i64 %indvars.iv105, 1 35; CHECK-NEXT: %add.outer.fc0 = add nuw nsw i32 %.06, 1 36; CHECK-NEXT: %cmp.outer.fc0 = icmp ne i64 %inc.outer.fc0, 100 37; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOP1EXIT:bb[0-9]*]] 38; CHECK: ret void 39 40; TODO: The current version of loop fusion does not allow the inner loops to be 41; fused because they are not control flow equivalent and adjacent. These are 42; limitations that can be addressed in future improvements to fusion. 43define void @dep_free() { 44bb: 45 br label %bb16 46 47bb16: ; preds = %bb, %bb27 48 %.06 = phi i32 [ 0, %bb ], [ %add.outer.fc0, %bb27 ] 49 %indvars.iv105 = phi i64 [ 0, %bb ], [ %inc.outer.fc0, %bb27 ] 50 br label %bb18 51 52bb30: ; preds = %bb27 53 br label %bb33 54 55bb18: ; preds = %bb16, %bb25 56 %indvars.iv74 = phi i64 [ 0, %bb16 ], [ %indvars.iv.next8, %bb25 ] 57 %tmp = add nsw i32 %.06, -3 58 %tmp19 = add nuw nsw i64 %indvars.iv105, 3 59 %tmp20 = trunc i64 %tmp19 to i32 60 %tmp21 = mul nsw i32 %tmp, %tmp20 61 %tmp22 = trunc i64 %indvars.iv105 to i32 62 %tmp23 = srem i32 %tmp21, %tmp22 63 %tmp24 = getelementptr inbounds [1024 x [1024 x i32]], ptr @A, i64 0, i64 %indvars.iv105, i64 %indvars.iv74 64 store i32 %tmp23, ptr %tmp24, align 4 65 br label %bb25 66 67bb25: ; preds = %bb18 68 %indvars.iv.next8 = add nuw nsw i64 %indvars.iv74, 1 69 %exitcond9 = icmp ne i64 %indvars.iv.next8, 100 70 br i1 %exitcond9, label %bb18, label %bb27 71 72bb27: ; preds = %bb25 73 %inc.outer.fc0 = add nuw nsw i64 %indvars.iv105, 1 74 %add.outer.fc0 = add nuw nsw i32 %.06, 1 75 %cmp.outer.fc0 = icmp ne i64 %inc.outer.fc0, 100 76 br i1 %cmp.outer.fc0, label %bb16, label %bb30 77 78bb33: ; preds = %bb30, %bb45 79 %.023 = phi i32 [ 0, %bb30 ], [ %tmp46, %bb45 ] 80 %indvars.iv42 = phi i64 [ 0, %bb30 ], [ %indvars.iv.next5, %bb45 ] 81 br label %bb35 82 83bb31: ; preds = %bb45 84 br label %bb47 85 86bb35: ; preds = %bb33, %bb43 87 %indvars.iv1 = phi i64 [ 0, %bb33 ], [ %indvars.iv.next, %bb43 ] 88 %tmp36 = add nsw i32 %.023, -3 89 %tmp37 = add nuw nsw i64 %indvars.iv42, 3 90 %tmp38 = trunc i64 %tmp37 to i32 91 %tmp39 = mul nsw i32 %tmp36, %tmp38 92 %tmp40 = trunc i64 %indvars.iv42 to i32 93 %tmp41 = srem i32 %tmp39, %tmp40 94 %tmp42 = getelementptr inbounds [1024 x [1024 x i32]], ptr @B, i64 0, i64 %indvars.iv42, i64 %indvars.iv1 95 store i32 %tmp41, ptr %tmp42, align 4 96 br label %bb43 97 98bb43: ; preds = %bb35 99 %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 100 %exitcond = icmp ne i64 %indvars.iv.next, 100 101 br i1 %exitcond, label %bb35, label %bb45 102 103bb45: ; preds = %bb43 104 %indvars.iv.next5 = add nuw nsw i64 %indvars.iv42, 1 105 %tmp46 = add nuw nsw i32 %.023, 1 106 %exitcond6 = icmp ne i64 %indvars.iv.next5, 100 107 br i1 %exitcond6, label %bb33, label %bb31 108 109bb47: ; preds = %bb31 110 ret void 111} 112