1; RUN: opt -S -passes=loop-fusion < %s 2>&1 | FileCheck %s 2 3; Verify that LoopFusion can fuse two double-loop nests with guarded inner 4; loops. Loops are in canonical form. 5 6@a = common global [10 x [10 x i32]] zeroinitializer 7@b = common global [10 x [10 x i32]] zeroinitializer 8@c = common global [10 x [10 x i32]] zeroinitializer 9 10; CHECK-LABEL: @double_loop_nest_inner_guard 11; CHECK: br i1 %{{.*}}, label %[[OUTER_PH:outer1.ph]], label %[[FUNC_EXIT:func_exit]] 12 13; CHECK: [[OUTER_PH]]: 14; CHECK: br label %[[OUTER_BODY_INNER_GUARD:outer1.body.inner.guard]] 15 16; CHECK: [[OUTER_BODY_INNER_GUARD]]: 17; CHECK: br i1 %{{.*}}, label %[[INNER_PH:inner1.ph]], label %[[OUTER_LATCH:outer2.latch]] 18 19; CHECK: [[INNER_PH]]: 20; CHECK-NEXT: br label %[[INNER_BODY:inner1.body]] 21 22; CHECK: [[INNER_BODY]]: 23; First loop body. 24; CHECK: load 25; CHECK: add 26; CHECK: store 27; Second loop body. 28; CHECK: load 29; CHECK: mul 30; CHECK: store 31; CHECK: br i1 %{{.*}}, label %[[INNER_EXIT:inner2.exit]], label %[[INNER_BODY:inner1.body]] 32 33; CHECK: [[INNER_EXIT]]: 34; CHECK-NEXT: br label %[[OUTER_LATCH:outer2.latch]] 35 36; CHECK: [[OUTER_LATCH]]: 37; CHECK: br i1 %{{.*}}, label %[[OUTER_EXIT:outer2.exit]], label %[[OUTER_BODY_INNER_GUARD]] 38 39; CHECK: [[OUTER_EXIT]]: 40; CHECK-NEXT: br label %[[FUNC_EXIT:func_exit]] 41 42; CHECK: [[FUNC_EXIT]]: 43; CHECK-NEXT: ret 44 45define i32 @double_loop_nest_inner_guard(i32 %m, i32 %n, i32 %M, i32 %N) { 46entry: 47 %cmp63 = icmp sgt i32 %m, 0 48 br i1 %cmp63, label %outer1.ph, label %func_exit 49 50outer1.ph: 51 %cmp261 = icmp sgt i32 %n, 0 52 %wide.trip.count76 = zext i32 %m to i64 53 %wide.trip.count72 = zext i32 %n to i64 54 br label %outer1.body.inner.guard 55 56outer1.body.inner.guard: 57 %iv74 = phi i64 [ 0, %outer1.ph ], [ %iv.next75, %outer1.latch ] 58 br i1 %cmp261, label %inner1.ph, label %outer1.latch 59 60inner1.ph: 61 br label %inner1.body 62 63inner1.body: 64 %iv70 = phi i64 [ %iv.next71, %inner1.body ], [ 0, %inner1.ph ] 65 %idx6 = getelementptr inbounds [10 x [10 x i32]], ptr @a, i64 0, i64 %iv74, i64 %iv70 66 %0 = load i32, ptr %idx6 67 %add = add nsw i32 %0, 2 68 %idx10 = getelementptr inbounds [10 x [10 x i32]], ptr @b, i64 0, i64 %iv74, i64 %iv70 69 store i32 %add, ptr %idx10 70 %iv.next71 = add nuw nsw i64 %iv70, 1 71 %exitcond73 = icmp eq i64 %iv.next71, %wide.trip.count72 72 br i1 %exitcond73, label %inner1.exit, label %inner1.body 73 74inner1.exit: 75 br label %outer1.latch 76 77outer1.latch: 78 %iv.next75 = add nuw nsw i64 %iv74, 1 79 %exitcond77 = icmp eq i64 %iv.next75, %wide.trip.count76 80 br i1 %exitcond77, label %outer2.ph, label %outer1.body.inner.guard 81 82outer2.ph: 83 br label %outer2.body.inner.guard 84 85outer2.body.inner.guard: 86 %iv66 = phi i64 [ %iv.next67, %outer2.latch ], [ 0, %outer2.ph ] 87 br i1 %cmp261, label %inner2.ph, label %outer2.latch 88 89inner2.ph: 90 br label %inner2.body 91 92inner2.body: 93 %iv = phi i64 [ %iv.next, %inner2.body ], [ 0, %inner2.ph ] 94 %idx27 = getelementptr inbounds [10 x [10 x i32]], ptr @a, i64 0, i64 %iv66, i64 %iv 95 %1 = load i32, ptr %idx27 96 %mul = shl nsw i32 %1, 1 97 %idx31 = getelementptr inbounds [10 x [10 x i32]], ptr @c, i64 0, i64 %iv66, i64 %iv 98 store i32 %mul, ptr %idx31 99 %iv.next = add nuw nsw i64 %iv, 1 100 %exitcond = icmp eq i64 %iv.next, %wide.trip.count72 101 br i1 %exitcond, label %inner2.exit, label %inner2.body 102 103inner2.exit: 104 br label %outer2.latch 105 106outer2.latch: 107 %iv.next67 = add nuw nsw i64 %iv66, 1 108 %exitcond69 = icmp eq i64 %iv.next67, %wide.trip.count76 109 br i1 %exitcond69, label %outer2.exit, label %outer2.body.inner.guard 110 111outer2.exit: 112 br label %func_exit 113 114func_exit: 115 ret i32 undef 116} 117