127291c1eSRoman Lebedev; RUN: opt -S -passes=loop-simplify,loop-fusion -debug-only=loop-fusion -disable-output < %s 2>&1 | FileCheck %s 23cdf8794SKit Barton; REQUIRES: asserts 33cdf8794SKit Barton 43cdf8794SKit Barton@B = common global [1024 x i32] zeroinitializer, align 16 53cdf8794SKit Barton 6f02a0a69SDávid Bolvanský; Check that the two candidates for fusion are placed into separate candidate 73cdf8794SKit Barton; sets because they are not control flow equivalent. 83cdf8794SKit Barton 93cdf8794SKit Barton; CHECK: Performing Loop Fusion on function non_cfe 103cdf8794SKit Barton; CHECK: Fusion Candidates: 113cdf8794SKit Barton; CHECK: *** Fusion Candidate Set *** 123cdf8794SKit Barton; CHECK: bb 133cdf8794SKit Barton; CHECK: **************************** 143cdf8794SKit Barton; CHECK: *** Fusion Candidate Set *** 153cdf8794SKit Barton; CHECK: bb20.preheader 163cdf8794SKit Barton; CHECK: **************************** 173cdf8794SKit Barton; CHECK: Loop Fusion complete 18*055fb779SNikita Popovdefine void @non_cfe(ptr noalias %arg, i32 %N) { 193cdf8794SKit Bartonbb: 20ff07fc66SKit Barton br label %bb7 213cdf8794SKit Barton 22ff07fc66SKit Bartonbb7: ; preds = %bb, %bb14 23ff07fc66SKit Barton %.014 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ] 24ff07fc66SKit Barton %indvars.iv23 = phi i64 [ 0, %bb ], [ %indvars.iv.next3, %bb14 ] 25ff07fc66SKit Barton %tmp = add nsw i32 %.014, -3 26ff07fc66SKit Barton %tmp8 = add nuw nsw i64 %indvars.iv23, 3 273cdf8794SKit Barton %tmp9 = trunc i64 %tmp8 to i32 283cdf8794SKit Barton %tmp10 = mul nsw i32 %tmp, %tmp9 29ff07fc66SKit Barton %tmp11 = trunc i64 %indvars.iv23 to i32 303cdf8794SKit Barton %tmp12 = srem i32 %tmp10, %tmp11 31*055fb779SNikita Popov %tmp13 = getelementptr inbounds i32, ptr %arg, i64 %indvars.iv23 32*055fb779SNikita Popov store i32 %tmp12, ptr %tmp13, align 4 333cdf8794SKit Barton br label %bb14 343cdf8794SKit Barton 353cdf8794SKit Bartonbb14: ; preds = %bb7 36ff07fc66SKit Barton %indvars.iv.next3 = add nuw nsw i64 %indvars.iv23, 1 37ff07fc66SKit Barton %tmp15 = add nuw nsw i32 %.014, 1 38ff07fc66SKit Barton %exitcond4 = icmp ne i64 %indvars.iv.next3, 100 39ff07fc66SKit Barton br i1 %exitcond4, label %bb7, label %bb34 403cdf8794SKit Barton 41ff07fc66SKit Bartonbb34: 42ff07fc66SKit Barton %cmp = icmp slt i32 %N, 50 43ff07fc66SKit Barton br i1 %cmp, label %bb16, label %bb33 44ff07fc66SKit Barton 45ff07fc66SKit Bartonbb16: ; preds = %bb34 46*055fb779SNikita Popov %tmp17 = load i32, ptr %arg, align 4 473cdf8794SKit Barton %tmp18 = icmp slt i32 %tmp17, 0 48ff07fc66SKit Barton br i1 %tmp18, label %bb20.preheader, label %bb33 493cdf8794SKit Barton 50ff07fc66SKit Bartonbb20.preheader: ; preds = %bb16 51ff07fc66SKit Barton br label %bb22 523cdf8794SKit Barton 53ff07fc66SKit Bartonbb22: ; preds = %bb20.preheader, %bb30 54ff07fc66SKit Barton %.02 = phi i32 [ 0, %bb20.preheader ], [ %tmp31, %bb30 ] 55ff07fc66SKit Barton %indvars.iv1 = phi i64 [ 0, %bb20.preheader ], [ %indvars.iv.next, %bb30 ] 56ff07fc66SKit Barton %tmp23 = add nsw i32 %.02, -3 57ff07fc66SKit Barton %tmp24 = add nuw nsw i64 %indvars.iv1, 3 583cdf8794SKit Barton %tmp25 = trunc i64 %tmp24 to i32 593cdf8794SKit Barton %tmp26 = mul nsw i32 %tmp23, %tmp25 60ff07fc66SKit Barton %tmp27 = trunc i64 %indvars.iv1 to i32 613cdf8794SKit Barton %tmp28 = srem i32 %tmp26, %tmp27 62*055fb779SNikita Popov %tmp29 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %indvars.iv1 63*055fb779SNikita Popov store i32 %tmp28, ptr %tmp29, align 4 643cdf8794SKit Barton br label %bb30 653cdf8794SKit Barton 663cdf8794SKit Bartonbb30: ; preds = %bb22 67ff07fc66SKit Barton %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 68ff07fc66SKit Barton %tmp31 = add nuw nsw i32 %.02, 1 69ff07fc66SKit Barton %exitcond = icmp ne i64 %indvars.iv.next, 100 70ff07fc66SKit Barton br i1 %exitcond, label %bb22, label %bb33.loopexit 713cdf8794SKit Barton 72ff07fc66SKit Bartonbb33.loopexit: ; preds = %bb30 73ff07fc66SKit Barton br label %bb33 74ff07fc66SKit Barton 75ff07fc66SKit Bartonbb33: ; preds = %bb33.loopexit, %bb16, %bb34 763cdf8794SKit Barton ret void 773cdf8794SKit Barton} 783cdf8794SKit Barton 793cdf8794SKit Barton; Check that fusion detects the two canddates are not adjacent (the exit block 803cdf8794SKit Barton; of the first candidate is not the preheader of the second candidate). 813cdf8794SKit Barton 823cdf8794SKit Barton; CHECK: Performing Loop Fusion on function non_adjacent 833cdf8794SKit Barton; CHECK: Fusion Candidates: 843cdf8794SKit Barton; CHECK: *** Fusion Candidate Set *** 853cdf8794SKit Barton; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]] 863cdf8794SKit Barton; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]] 873cdf8794SKit Barton; CHECK-NEXT: **************************** 883cdf8794SKit Barton; CHECK: Attempting fusion on Candidate Set: 893cdf8794SKit Barton; CHECK-NEXT: [[LOOP1PREHEADER]] 903cdf8794SKit Barton; CHECK-NEXT: [[LOOP2PREHEADER]] 913cdf8794SKit Barton; CHECK: Fusion candidates are not adjacent. Not fusing. 923cdf8794SKit Barton; CHECK: Loop Fusion complete 93*055fb779SNikita Popovdefine void @non_adjacent(ptr noalias %arg) { 943cdf8794SKit Bartonbb: 95ff07fc66SKit Barton br label %bb5 963cdf8794SKit Barton 97ff07fc66SKit Bartonbb4: ; preds = %bb11 983cdf8794SKit Barton br label %bb13 993cdf8794SKit Barton 100ff07fc66SKit Bartonbb5: ; preds = %bb, %bb11 101ff07fc66SKit Barton %.013 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ] 102ff07fc66SKit Barton %tmp = add nsw i64 %.013, -3 103ff07fc66SKit Barton %tmp6 = add nuw nsw i64 %.013, 3 1043cdf8794SKit Barton %tmp7 = mul nsw i64 %tmp, %tmp6 105ff07fc66SKit Barton %tmp8 = srem i64 %tmp7, %.013 1063cdf8794SKit Barton %tmp9 = trunc i64 %tmp8 to i32 107*055fb779SNikita Popov %tmp10 = getelementptr inbounds i32, ptr %arg, i64 %.013 108*055fb779SNikita Popov store i32 %tmp9, ptr %tmp10, align 4 1093cdf8794SKit Barton br label %bb11 1103cdf8794SKit Barton 1113cdf8794SKit Bartonbb11: ; preds = %bb5 112ff07fc66SKit Barton %tmp12 = add nuw nsw i64 %.013, 1 113ff07fc66SKit Barton %exitcond2 = icmp ne i64 %tmp12, 100 114ff07fc66SKit Barton br i1 %exitcond2, label %bb5, label %bb4 1153cdf8794SKit Barton 1163cdf8794SKit Bartonbb13: ; preds = %bb4 117ff07fc66SKit Barton br label %bb16 1183cdf8794SKit Barton 119ff07fc66SKit Bartonbb15: ; preds = %bb23 1203cdf8794SKit Barton br label %bb25 1213cdf8794SKit Barton 122ff07fc66SKit Bartonbb16: ; preds = %bb13, %bb23 123ff07fc66SKit Barton %.02 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ] 124ff07fc66SKit Barton %tmp17 = add nsw i64 %.02, -3 125ff07fc66SKit Barton %tmp18 = add nuw nsw i64 %.02, 3 1263cdf8794SKit Barton %tmp19 = mul nsw i64 %tmp17, %tmp18 127ff07fc66SKit Barton %tmp20 = srem i64 %tmp19, %.02 1283cdf8794SKit Barton %tmp21 = trunc i64 %tmp20 to i32 129*055fb779SNikita Popov %tmp22 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %.02 130*055fb779SNikita Popov store i32 %tmp21, ptr %tmp22, align 4 1313cdf8794SKit Barton br label %bb23 1323cdf8794SKit Barton 1333cdf8794SKit Bartonbb23: ; preds = %bb16 134ff07fc66SKit Barton %tmp24 = add nuw nsw i64 %.02, 1 135ff07fc66SKit Barton %exitcond = icmp ne i64 %tmp24, 100 136ff07fc66SKit Barton br i1 %exitcond, label %bb16, label %bb15 1373cdf8794SKit Barton 1383cdf8794SKit Bartonbb25: ; preds = %bb15 1393cdf8794SKit Barton ret void 1403cdf8794SKit Barton} 1413cdf8794SKit Barton 1423cdf8794SKit Barton; Check that the different bounds are detected and prevent fusion. 1433cdf8794SKit Barton 1443cdf8794SKit Barton; CHECK: Performing Loop Fusion on function different_bounds 1453cdf8794SKit Barton; CHECK: Fusion Candidates: 1463cdf8794SKit Barton; CHECK: *** Fusion Candidate Set *** 1473cdf8794SKit Barton; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]] 1483cdf8794SKit Barton; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]] 1493cdf8794SKit Barton; CHECK-NEXT: **************************** 1503cdf8794SKit Barton; CHECK: Attempting fusion on Candidate Set: 1513cdf8794SKit Barton; CHECK-NEXT: [[LOOP1PREHEADER]] 1523cdf8794SKit Barton; CHECK-NEXT: [[LOOP2PREHEADER]] 1533cdf8794SKit Barton; CHECK: Fusion candidates do not have identical trip counts. Not fusing. 1543cdf8794SKit Barton; CHECK: Loop Fusion complete 155*055fb779SNikita Popovdefine void @different_bounds(ptr noalias %arg) { 1563cdf8794SKit Bartonbb: 157ff07fc66SKit Barton br label %bb5 1583cdf8794SKit Barton 159ff07fc66SKit Bartonbb4: ; preds = %bb11 1603cdf8794SKit Barton br label %bb13 1613cdf8794SKit Barton 162ff07fc66SKit Bartonbb5: ; preds = %bb, %bb11 163ff07fc66SKit Barton %.013 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ] 164ff07fc66SKit Barton %tmp = add nsw i64 %.013, -3 165ff07fc66SKit Barton %tmp6 = add nuw nsw i64 %.013, 3 1663cdf8794SKit Barton %tmp7 = mul nsw i64 %tmp, %tmp6 167ff07fc66SKit Barton %tmp8 = srem i64 %tmp7, %.013 1683cdf8794SKit Barton %tmp9 = trunc i64 %tmp8 to i32 169*055fb779SNikita Popov %tmp10 = getelementptr inbounds i32, ptr %arg, i64 %.013 170*055fb779SNikita Popov store i32 %tmp9, ptr %tmp10, align 4 1713cdf8794SKit Barton br label %bb11 1723cdf8794SKit Barton 1733cdf8794SKit Bartonbb11: ; preds = %bb5 174ff07fc66SKit Barton %tmp12 = add nuw nsw i64 %.013, 1 175ff07fc66SKit Barton %exitcond2 = icmp ne i64 %tmp12, 100 176ff07fc66SKit Barton br i1 %exitcond2, label %bb5, label %bb4 1773cdf8794SKit Barton 1783cdf8794SKit Bartonbb13: ; preds = %bb4 179ff07fc66SKit Barton br label %bb16 1803cdf8794SKit Barton 181ff07fc66SKit Bartonbb15: ; preds = %bb23 1823cdf8794SKit Barton br label %bb25 1833cdf8794SKit Barton 184ff07fc66SKit Bartonbb16: ; preds = %bb13, %bb23 185ff07fc66SKit Barton %.02 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ] 186ff07fc66SKit Barton %tmp17 = add nsw i64 %.02, -3 187ff07fc66SKit Barton %tmp18 = add nuw nsw i64 %.02, 3 1883cdf8794SKit Barton %tmp19 = mul nsw i64 %tmp17, %tmp18 189ff07fc66SKit Barton %tmp20 = srem i64 %tmp19, %.02 1903cdf8794SKit Barton %tmp21 = trunc i64 %tmp20 to i32 191*055fb779SNikita Popov %tmp22 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %.02 192*055fb779SNikita Popov store i32 %tmp21, ptr %tmp22, align 4 1933cdf8794SKit Barton br label %bb23 1943cdf8794SKit Barton 1953cdf8794SKit Bartonbb23: ; preds = %bb16 196ff07fc66SKit Barton %tmp24 = add nuw nsw i64 %.02, 1 197ff07fc66SKit Barton %exitcond = icmp ne i64 %tmp24, 200 198ff07fc66SKit Barton br i1 %exitcond, label %bb16, label %bb15 1993cdf8794SKit Barton 2003cdf8794SKit Bartonbb25: ; preds = %bb15 2013cdf8794SKit Barton ret void 2023cdf8794SKit Barton} 2033cdf8794SKit Barton 2043cdf8794SKit Barton; Check that the negative dependence between the two candidates is identified 2053cdf8794SKit Barton; and prevents fusion. 2063cdf8794SKit Barton 2073cdf8794SKit Barton; CHECK: Performing Loop Fusion on function negative_dependence 2083cdf8794SKit Barton; CHECK: Fusion Candidates: 2093cdf8794SKit Barton; CHECK: *** Fusion Candidate Set *** 2103cdf8794SKit Barton; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]] 2113cdf8794SKit Barton; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]] 2123cdf8794SKit Barton; CHECK-NEXT: **************************** 2133cdf8794SKit Barton; CHECK: Attempting fusion on Candidate Set: 2143cdf8794SKit Barton; CHECK-NEXT: [[LOOP1PREHEADER]] 2153cdf8794SKit Barton; CHECK-NEXT: [[LOOP2PREHEADER]] 2163cdf8794SKit Barton; CHECK: Memory dependencies do not allow fusion! 2173cdf8794SKit Barton; CHECK: Loop Fusion complete 218*055fb779SNikita Popovdefine void @negative_dependence(ptr noalias %arg) { 2193cdf8794SKit Bartonbb: 220ff07fc66SKit Barton br label %bb7 2213cdf8794SKit Barton 222ff07fc66SKit Bartonbb11.preheader: ; preds = %bb9 223ff07fc66SKit Barton br label %bb13 2243cdf8794SKit Barton 225ff07fc66SKit Bartonbb7: ; preds = %bb, %bb9 226ff07fc66SKit Barton %indvars.iv22 = phi i64 [ 0, %bb ], [ %indvars.iv.next3, %bb9 ] 227*055fb779SNikita Popov %tmp = getelementptr inbounds i32, ptr %arg, i64 %indvars.iv22 228ff07fc66SKit Barton %tmp8 = trunc i64 %indvars.iv22 to i32 229*055fb779SNikita Popov store i32 %tmp8, ptr %tmp, align 4 2303cdf8794SKit Barton br label %bb9 2313cdf8794SKit Barton 2323cdf8794SKit Bartonbb9: ; preds = %bb7 233ff07fc66SKit Barton %indvars.iv.next3 = add nuw nsw i64 %indvars.iv22, 1 234ff07fc66SKit Barton %exitcond4 = icmp ne i64 %indvars.iv.next3, 100 235ff07fc66SKit Barton br i1 %exitcond4, label %bb7, label %bb11.preheader 2363cdf8794SKit Barton 237ff07fc66SKit Bartonbb13: ; preds = %bb11.preheader, %bb18 238ff07fc66SKit Barton %indvars.iv1 = phi i64 [ 0, %bb11.preheader ], [ %indvars.iv.next, %bb18 ] 239ff07fc66SKit Barton %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 240*055fb779SNikita Popov %tmp14 = getelementptr inbounds i32, ptr %arg, i64 %indvars.iv.next 241*055fb779SNikita Popov %tmp15 = load i32, ptr %tmp14, align 4 2423cdf8794SKit Barton %tmp16 = shl nsw i32 %tmp15, 1 243*055fb779SNikita Popov %tmp17 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %indvars.iv1 244*055fb779SNikita Popov store i32 %tmp16, ptr %tmp17, align 4 2453cdf8794SKit Barton br label %bb18 2463cdf8794SKit Barton 2473cdf8794SKit Bartonbb18: ; preds = %bb13 248ff07fc66SKit Barton %exitcond = icmp ne i64 %indvars.iv.next, 100 249ff07fc66SKit Barton br i1 %exitcond, label %bb13, label %bb19 2503cdf8794SKit Barton 251ff07fc66SKit Bartonbb19: ; preds = %bb18 2523cdf8794SKit Barton ret void 2533cdf8794SKit Barton} 2543cdf8794SKit Barton 2553cdf8794SKit Barton; Check for values defined in Loop 0 and used in Loop 1. 2563cdf8794SKit Barton; It is not safe to fuse in this case, because the second loop has 2573cdf8794SKit Barton; a use of %.01.lcssa which is defined in the body of loop 0. The 2583cdf8794SKit Barton; first loop must execute completely in order to compute the correct 2593cdf8794SKit Barton; value of %.01.lcssa to be used in the second loop. 2603cdf8794SKit Barton 2613cdf8794SKit Barton; CHECK: Performing Loop Fusion on function sumTest 2623cdf8794SKit Barton; CHECK: Fusion Candidates: 2633cdf8794SKit Barton; CHECK: *** Fusion Candidate Set *** 2643cdf8794SKit Barton; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]] 2653cdf8794SKit Barton; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]] 2663cdf8794SKit Barton; CHECK-NEXT: **************************** 2673cdf8794SKit Barton; CHECK: Attempting fusion on Candidate Set: 2683cdf8794SKit Barton; CHECK-NEXT: [[LOOP1PREHEADER]] 2693cdf8794SKit Barton; CHECK-NEXT: [[LOOP2PREHEADER]] 2703cdf8794SKit Barton; CHECK: Memory dependencies do not allow fusion! 2713cdf8794SKit Barton; CHECK: Loop Fusion complete 272*055fb779SNikita Popovdefine i32 @sumTest(ptr noalias %arg) { 2733cdf8794SKit Bartonbb: 274ff07fc66SKit Barton br label %bb9 2753cdf8794SKit Barton 276ff07fc66SKit Bartonbb13.preheader: ; preds = %bb9 277ff07fc66SKit Barton br label %bb15 2783cdf8794SKit Barton 279ff07fc66SKit Bartonbb9: ; preds = %bb, %bb9 280ff07fc66SKit Barton %.01.lcssa = phi i32 [ 0, %bb ], [ %tmp11, %bb9 ] 281ff07fc66SKit Barton %.013 = phi i32 [ 0, %bb ], [ %tmp11, %bb9 ] 282ff07fc66SKit Barton %indvars.iv32 = phi i64 [ 0, %bb ], [ %indvars.iv.next4, %bb9 ] 283*055fb779SNikita Popov %tmp = getelementptr inbounds i32, ptr %arg, i64 %indvars.iv32 284*055fb779SNikita Popov %tmp10 = load i32, ptr %tmp, align 4 285ff07fc66SKit Barton %tmp11 = add nsw i32 %.013, %tmp10 286ff07fc66SKit Barton %indvars.iv.next4 = add nuw nsw i64 %indvars.iv32, 1 287ff07fc66SKit Barton %exitcond5 = icmp ne i64 %indvars.iv.next4, 100 288ff07fc66SKit Barton br i1 %exitcond5, label %bb9, label %bb13.preheader 2893cdf8794SKit Barton 290ff07fc66SKit Bartonbb14: ; preds = %bb20 2913cdf8794SKit Barton br label %bb21 2923cdf8794SKit Barton 293ff07fc66SKit Bartonbb15: ; preds = %bb13.preheader, %bb20 294ff07fc66SKit Barton %indvars.iv1 = phi i64 [ 0, %bb13.preheader ], [ %indvars.iv.next, %bb20 ] 295*055fb779SNikita Popov %tmp16 = getelementptr inbounds i32, ptr %arg, i64 %indvars.iv1 296*055fb779SNikita Popov %tmp17 = load i32, ptr %tmp16, align 4 2973cdf8794SKit Barton %tmp18 = sdiv i32 %tmp17, %.01.lcssa 298*055fb779SNikita Popov %tmp19 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %indvars.iv1 299*055fb779SNikita Popov store i32 %tmp18, ptr %tmp19, align 4 3003cdf8794SKit Barton br label %bb20 3013cdf8794SKit Barton 3023cdf8794SKit Bartonbb20: ; preds = %bb15 303ff07fc66SKit Barton %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 304ff07fc66SKit Barton %exitcond = icmp ne i64 %indvars.iv.next, 100 305ff07fc66SKit Barton br i1 %exitcond, label %bb15, label %bb14 3063cdf8794SKit Barton 3073cdf8794SKit Bartonbb21: ; preds = %bb14 3083cdf8794SKit Barton ret i32 %.01.lcssa 3093cdf8794SKit Barton} 3103cdf8794SKit Barton 3113cdf8794SKit Barton; Similar to sumTest above. The first loop computes %add and must 3123cdf8794SKit Barton; complete before it is used in the second loop. Thus, these two loops 3133cdf8794SKit Barton; also cannot be fused. 3143cdf8794SKit Barton 3153cdf8794SKit Barton; CHECK: Performing Loop Fusion on function test 3163cdf8794SKit Barton; CHECK: Fusion Candidates: 3173cdf8794SKit Barton; CHECK: *** Fusion Candidate Set *** 3183cdf8794SKit Barton; CHECK-NEXT: [[LOOP1PREHEADER:for.body[0-9]*.preheader]] 3193cdf8794SKit Barton; CHECK-NEXT: [[LOOP2PREHEADER:for.body[0-9]*.preheader]] 3203cdf8794SKit Barton; CHECK-NEXT: **************************** 3213cdf8794SKit Barton; CHECK: Attempting fusion on Candidate Set: 3223cdf8794SKit Barton; CHECK-NEXT: [[LOOP1PREHEADER]] 3233cdf8794SKit Barton; CHECK-NEXT: [[LOOP2PREHEADER]] 3243cdf8794SKit Barton; CHECK: Memory dependencies do not allow fusion! 3253cdf8794SKit Barton; CHECK: Loop Fusion complete 326*055fb779SNikita Popovdefine float @test(ptr nocapture %a, i32 %n) { 3273cdf8794SKit Bartonentry: 3283cdf8794SKit Barton %conv = zext i32 %n to i64 3293cdf8794SKit Barton %cmp32 = icmp eq i32 %n, 0 3303cdf8794SKit Barton br i1 %cmp32, label %for.cond.cleanup7, label %for.body 3313cdf8794SKit Barton 3323cdf8794SKit Bartonfor.body: ; preds = %for.body, %entry 3333cdf8794SKit Barton %i.034 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 3343cdf8794SKit Barton %sum1.033 = phi float [ %add, %for.body ], [ 0.000000e+00, %entry ] 3353cdf8794SKit Barton %idxprom = trunc i64 %i.034 to i32 336*055fb779SNikita Popov %arrayidx = getelementptr inbounds float, ptr %a, i32 %idxprom 337*055fb779SNikita Popov %0 = load float, ptr %arrayidx, align 4 3383cdf8794SKit Barton %add = fadd float %sum1.033, %0 3393cdf8794SKit Barton %inc = add nuw nsw i64 %i.034, 1 3403cdf8794SKit Barton %cmp = icmp ult i64 %inc, %conv 3413cdf8794SKit Barton br i1 %cmp, label %for.body, label %for.body8 3423cdf8794SKit Barton 3433cdf8794SKit Bartonfor.body8: ; preds = %for.body, %for.body8 3443cdf8794SKit Barton %i2.031 = phi i64 [ %inc14, %for.body8 ], [ 0, %for.body ] 3453cdf8794SKit Barton %idxprom9 = trunc i64 %i2.031 to i32 346*055fb779SNikita Popov %arrayidx10 = getelementptr inbounds float, ptr %a, i32 %idxprom9 347*055fb779SNikita Popov %1 = load float, ptr %arrayidx10, align 4 3483cdf8794SKit Barton %div = fdiv float %1, %add 349*055fb779SNikita Popov store float %div, ptr %arrayidx10, align 4 3503cdf8794SKit Barton %inc14 = add nuw nsw i64 %i2.031, 1 3513cdf8794SKit Barton %cmp5 = icmp ult i64 %inc14, %conv 3523cdf8794SKit Barton br i1 %cmp5, label %for.body8, label %for.cond.cleanup7 3533cdf8794SKit Barton 3543cdf8794SKit Bartonfor.cond.cleanup7: ; preds = %for.body8, %entry 3553cdf8794SKit Barton %sum1.0.lcssa36 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body8 ] 3563cdf8794SKit Barton ret float %sum1.0.lcssa36 3573cdf8794SKit Barton} 358ff07fc66SKit Barton 359ff07fc66SKit Barton; Check that non-rotated loops are not considered for fusion. 360ff07fc66SKit Barton; CHECK: Performing Loop Fusion on function notRotated 361ff07fc66SKit Barton; CHECK: Loop bb{{.*}} is not rotated! 362ff07fc66SKit Barton; CHECK: Loop bb{{.*}} is not rotated! 363*055fb779SNikita Popovdefine void @notRotated(ptr noalias %arg) { 364ff07fc66SKit Bartonbb: 365ff07fc66SKit Barton br label %bb5 366ff07fc66SKit Barton 367ff07fc66SKit Bartonbb5: ; preds = %bb14, %bb 368ff07fc66SKit Barton %indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb14 ], [ 0, %bb ] 369ff07fc66SKit Barton %.01 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ] 370ff07fc66SKit Barton %exitcond4 = icmp ne i64 %indvars.iv2, 100 371ff07fc66SKit Barton br i1 %exitcond4, label %bb7, label %bb17 372ff07fc66SKit Barton 373ff07fc66SKit Bartonbb7: ; preds = %bb5 374ff07fc66SKit Barton %tmp = add nsw i32 %.01, -3 375ff07fc66SKit Barton %tmp8 = add nuw nsw i64 %indvars.iv2, 3 376ff07fc66SKit Barton %tmp9 = trunc i64 %tmp8 to i32 377ff07fc66SKit Barton %tmp10 = mul nsw i32 %tmp, %tmp9 378ff07fc66SKit Barton %tmp11 = trunc i64 %indvars.iv2 to i32 379ff07fc66SKit Barton %tmp12 = srem i32 %tmp10, %tmp11 380*055fb779SNikita Popov %tmp13 = getelementptr inbounds i32, ptr %arg, i64 %indvars.iv2 381*055fb779SNikita Popov store i32 %tmp12, ptr %tmp13, align 4 382ff07fc66SKit Barton br label %bb14 383ff07fc66SKit Barton 384ff07fc66SKit Bartonbb14: ; preds = %bb7 385ff07fc66SKit Barton %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1 386ff07fc66SKit Barton %tmp15 = add nuw nsw i32 %.01, 1 387ff07fc66SKit Barton br label %bb5 388ff07fc66SKit Barton 389ff07fc66SKit Bartonbb17: ; preds = %bb27, %bb5 390ff07fc66SKit Barton %indvars.iv = phi i64 [ %indvars.iv.next, %bb27 ], [ 0, %bb5 ] 391ff07fc66SKit Barton %.0 = phi i32 [ 0, %bb5 ], [ %tmp28, %bb27 ] 392ff07fc66SKit Barton %exitcond = icmp ne i64 %indvars.iv, 100 393ff07fc66SKit Barton br i1 %exitcond, label %bb19, label %bb18 394ff07fc66SKit Barton 395ff07fc66SKit Bartonbb18: ; preds = %bb17 396ff07fc66SKit Barton br label %bb29 397ff07fc66SKit Barton 398ff07fc66SKit Bartonbb19: ; preds = %bb17 399ff07fc66SKit Barton %tmp20 = add nsw i32 %.0, -3 400ff07fc66SKit Barton %tmp21 = add nuw nsw i64 %indvars.iv, 3 401ff07fc66SKit Barton %tmp22 = trunc i64 %tmp21 to i32 402ff07fc66SKit Barton %tmp23 = mul nsw i32 %tmp20, %tmp22 403ff07fc66SKit Barton %tmp24 = trunc i64 %indvars.iv to i32 404ff07fc66SKit Barton %tmp25 = srem i32 %tmp23, %tmp24 405*055fb779SNikita Popov %tmp26 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %indvars.iv 406*055fb779SNikita Popov store i32 %tmp25, ptr %tmp26, align 4 407ff07fc66SKit Barton br label %bb27 408ff07fc66SKit Barton 409ff07fc66SKit Bartonbb27: ; preds = %bb19 410ff07fc66SKit Barton %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 411ff07fc66SKit Barton %tmp28 = add nuw nsw i32 %.0, 1 412ff07fc66SKit Barton br label %bb17 413ff07fc66SKit Barton 414ff07fc66SKit Bartonbb29: ; preds = %bb18 415ff07fc66SKit Barton ret void 416ff07fc66SKit Barton} 417