1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -passes=loop-fusion < %s | FileCheck %s 3 4@B = common global [1024 x i32] zeroinitializer, align 16 5 6define void @dep_free_parametric(ptr noalias %A, i64 %N) { 7; CHECK-LABEL: @dep_free_parametric( 8; CHECK-NEXT: entry: 9; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i64 0, [[N:%.*]] 10; CHECK-NEXT: [[CMP31:%.*]] = icmp slt i64 0, [[N]] 11; CHECK-NEXT: br i1 [[CMP4]], label [[BB3:%.*]], label [[BB12:%.*]] 12; CHECK: bb3: 13; CHECK-NEXT: br label [[BB5:%.*]] 14; CHECK: bb5: 15; CHECK-NEXT: [[I_05:%.*]] = phi i64 [ [[INC:%.*]], [[BB5]] ], [ 0, [[BB3]] ] 16; CHECK-NEXT: [[I1_02:%.*]] = phi i64 [ [[INC14:%.*]], [[BB5]] ], [ 0, [[BB3]] ] 17; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[I_05]], 3 18; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[I_05]], 3 19; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[SUB]], [[ADD]] 20; CHECK-NEXT: [[REM:%.*]] = srem i64 [[MUL]], [[I_05]] 21; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[REM]] to i32 22; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I_05]] 23; CHECK-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4 24; CHECK-NEXT: [[INC]] = add nsw i64 [[I_05]], 1 25; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]] 26; CHECK-NEXT: [[SUB7:%.*]] = sub nsw i64 [[I1_02]], 3 27; CHECK-NEXT: [[ADD8:%.*]] = add nsw i64 [[I1_02]], 3 28; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i64 [[SUB7]], [[ADD8]] 29; CHECK-NEXT: [[REM10:%.*]] = srem i64 [[MUL9]], [[I1_02]] 30; CHECK-NEXT: [[CONV11:%.*]] = trunc i64 [[REM10]] to i32 31; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 [[I1_02]] 32; CHECK-NEXT: store i32 [[CONV11]], ptr [[ARRAYIDX12]], align 4 33; CHECK-NEXT: [[INC14]] = add nsw i64 [[I1_02]], 1 34; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i64 [[INC14]], [[N]] 35; CHECK-NEXT: br i1 [[CMP3]], label [[BB5]], label [[BB15:%.*]] 36; CHECK: bb15: 37; CHECK-NEXT: br label [[BB12]] 38; CHECK: bb12: 39; CHECK-NEXT: ret void 40; 41entry: 42 %cmp4 = icmp slt i64 0, %N 43 br i1 %cmp4, label %bb3, label %bb14 44 45bb3: ; preds = %entry 46 br label %bb5 47 48bb5: ; preds = %bb3, %bb5 49 %i.05 = phi i64 [ %inc, %bb5 ], [ 0, %bb3 ] 50 %sub = sub nsw i64 %i.05, 3 51 %add = add nsw i64 %i.05, 3 52 %mul = mul nsw i64 %sub, %add 53 %rem = srem i64 %mul, %i.05 54 %conv = trunc i64 %rem to i32 55 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.05 56 store i32 %conv, ptr %arrayidx, align 4 57 %inc = add nsw i64 %i.05, 1 58 %cmp = icmp slt i64 %inc, %N 59 br i1 %cmp, label %bb5, label %bb10 60 61bb10: ; preds = %bb5 62 br label %bb14 63 64bb14: ; preds = %bb10, %entry 65 %cmp31 = icmp slt i64 0, %N 66 br i1 %cmp31, label %bb8, label %bb12 67 68bb8: ; preds = %bb14 69 br label %bb9 70 71bb9: ; preds = %bb8, %bb9 72 %i1.02 = phi i64 [ %inc14, %bb9 ], [ 0, %bb8 ] 73 %sub7 = sub nsw i64 %i1.02, 3 74 %add8 = add nsw i64 %i1.02, 3 75 %mul9 = mul nsw i64 %sub7, %add8 76 %rem10 = srem i64 %mul9, %i1.02 77 %conv11 = trunc i64 %rem10 to i32 78 %arrayidx12 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %i1.02 79 store i32 %conv11, ptr %arrayidx12, align 4 80 %inc14 = add nsw i64 %i1.02, 1 81 %cmp3 = icmp slt i64 %inc14, %N 82 br i1 %cmp3, label %bb9, label %bb15 83 84bb15: ; preds = %bb9 85 br label %bb12 86 87bb12: ; preds = %bb15, %bb14 88 ret void 89} 90 91; Test that `%add` is moved in for.first.preheader, and the two loops for.first 92; and for.second are fused. 93 94define void @moveinsts_preheader(ptr noalias %A, ptr noalias %B, i64 %N, i32 %x) { 95; CHECK-LABEL: @moveinsts_preheader( 96; CHECK-NEXT: for.first.guard: 97; CHECK-NEXT: [[CMP_GUARD:%.*]] = icmp slt i64 0, [[N:%.*]] 98; CHECK-NEXT: br i1 [[CMP_GUARD]], label [[FOR_FIRST_PREHEADER:%.*]], label [[FOR_END:%.*]] 99; CHECK: for.first.preheader: 100; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X:%.*]], 1 101; CHECK-NEXT: br label [[FOR_FIRST:%.*]] 102; CHECK: for.first: 103; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INC_I:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ] 104; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[INC_J:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ] 105; CHECK-NEXT: [[AI:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I]] 106; CHECK-NEXT: store i32 0, ptr [[AI]], align 4 107; CHECK-NEXT: [[INC_I]] = add nsw i64 [[I]], 1 108; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i64 [[INC_I]], [[N]] 109; CHECK-NEXT: [[BJ:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[J]] 110; CHECK-NEXT: store i32 0, ptr [[BJ]], align 4 111; CHECK-NEXT: [[INC_J]] = add nsw i64 [[J]], 1 112; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i64 [[INC_J]], [[N]] 113; CHECK-NEXT: br i1 [[CMP_J]], label [[FOR_FIRST]], label [[FOR_SECOND_EXIT:%.*]] 114; CHECK: for.second.exit: 115; CHECK-NEXT: br label [[FOR_END]] 116; CHECK: for.end: 117; CHECK-NEXT: ret void 118; 119for.first.guard: 120 %cmp.guard = icmp slt i64 0, %N 121 br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard 122 123for.first.preheader: 124 br label %for.first 125 126for.first: 127 %i = phi i64 [ %inc.i, %for.first ], [ 0, %for.first.preheader ] 128 %Ai = getelementptr inbounds i32, ptr %A, i64 %i 129 store i32 0, ptr %Ai, align 4 130 %inc.i = add nsw i64 %i, 1 131 %cmp.i = icmp slt i64 %inc.i, %N 132 br i1 %cmp.i, label %for.first, label %for.first.exit 133 134for.first.exit: 135 br label %for.second.guard 136 137for.second.guard: 138 br i1 %cmp.guard, label %for.second.preheader, label %for.end 139 140for.second.preheader: 141 %add = add nsw i32 %x, 1 142 br label %for.second 143 144for.second: 145 %j = phi i64 [ %inc.j, %for.second ], [ 0, %for.second.preheader ] 146 %Bj = getelementptr inbounds i32, ptr %B, i64 %j 147 store i32 0, ptr %Bj, align 4 148 %inc.j = add nsw i64 %j, 1 149 %cmp.j = icmp slt i64 %inc.j, %N 150 br i1 %cmp.j, label %for.second, label %for.second.exit 151 152for.second.exit: 153 br label %for.end 154 155for.end: 156 ret void 157} 158 159; Test that `%add` is moved in for.second.exit, and the two loops for.first 160; and for.second are fused. 161 162define void @moveinsts_exitblock(ptr noalias %A, ptr noalias %B, i64 %N, i32 %x) { 163; CHECK-LABEL: @moveinsts_exitblock( 164; CHECK-NEXT: for.first.guard: 165; CHECK-NEXT: [[CMP_GUARD:%.*]] = icmp slt i64 0, [[N:%.*]] 166; CHECK-NEXT: br i1 [[CMP_GUARD]], label [[FOR_FIRST_PREHEADER:%.*]], label [[FOR_END:%.*]] 167; CHECK: for.first.preheader: 168; CHECK-NEXT: br label [[FOR_FIRST:%.*]] 169; CHECK: for.first: 170; CHECK-NEXT: [[I_04:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ] 171; CHECK-NEXT: [[J_02:%.*]] = phi i64 [ [[INC6:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ] 172; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I_04]] 173; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 174; CHECK-NEXT: [[INC]] = add nsw i64 [[I_04]], 1 175; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]] 176; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[J_02]] 177; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX4]], align 4 178; CHECK-NEXT: [[INC6]] = add nsw i64 [[J_02]], 1 179; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i64 [[INC6]], [[N]] 180; CHECK-NEXT: br i1 [[CMP_J]], label [[FOR_FIRST]], label [[FOR_SECOND_EXIT:%.*]] 181; CHECK: for.second.exit: 182; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X:%.*]], 1 183; CHECK-NEXT: br label [[FOR_END]] 184; CHECK: for.end: 185; CHECK-NEXT: ret void 186; 187for.first.guard: 188 %cmp.guard = icmp slt i64 0, %N 189 br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard 190 191for.first.preheader: 192 br label %for.first 193 194for.first: 195 %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ] 196 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.04 197 store i32 0, ptr %arrayidx, align 4 198 %inc = add nsw i64 %i.04, 1 199 %cmp = icmp slt i64 %inc, %N 200 br i1 %cmp, label %for.first, label %for.first.exit 201 202for.first.exit: 203 %add = add nsw i32 %x, 1 204 br label %for.second.guard 205 206for.second.guard: 207 br i1 %cmp.guard, label %for.second.preheader, label %for.end 208 209for.second.preheader: 210 br label %for.second 211 212for.second: 213 %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ] 214 %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %j.02 215 store i32 0, ptr %arrayidx4, align 4 216 %inc6 = add nsw i64 %j.02, 1 217 %cmp.j = icmp slt i64 %inc6, %N 218 br i1 %cmp.j, label %for.second, label %for.second.exit 219 220for.second.exit: 221 br label %for.end 222 223for.end: 224 ret void 225} 226 227; Test that `%add` is moved in for.first.guard, and the two loops for.first 228; and for.second are fused. 229 230define void @moveinsts_guardblock(ptr noalias %A, ptr noalias %B, i64 %N, i32 %x) { 231; CHECK-LABEL: @moveinsts_guardblock( 232; CHECK-NEXT: for.first.guard: 233; CHECK-NEXT: [[CMP_GUARD:%.*]] = icmp slt i64 0, [[N:%.*]] 234; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X:%.*]], 1 235; CHECK-NEXT: br i1 [[CMP_GUARD]], label [[FOR_FIRST_PREHEADER:%.*]], label [[FOR_END:%.*]] 236; CHECK: for.first.preheader: 237; CHECK-NEXT: br label [[FOR_FIRST:%.*]] 238; CHECK: for.first: 239; CHECK-NEXT: [[I_04:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ] 240; CHECK-NEXT: [[J_02:%.*]] = phi i64 [ [[INC6:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ] 241; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I_04]] 242; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 243; CHECK-NEXT: [[INC]] = add nsw i64 [[I_04]], 1 244; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]] 245; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[J_02]] 246; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX4]], align 4 247; CHECK-NEXT: [[INC6]] = add nsw i64 [[J_02]], 1 248; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i64 [[INC6]], [[N]] 249; CHECK-NEXT: br i1 [[CMP_J]], label [[FOR_FIRST]], label [[FOR_SECOND_EXIT:%.*]] 250; CHECK: for.second.exit: 251; CHECK-NEXT: br label [[FOR_END]] 252; CHECK: for.end: 253; CHECK-NEXT: ret void 254; 255for.first.guard: 256 %cmp.guard = icmp slt i64 0, %N 257 br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard 258 259for.first.preheader: 260 br label %for.first 261 262for.first: 263 %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ] 264 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.04 265 store i32 0, ptr %arrayidx, align 4 266 %inc = add nsw i64 %i.04, 1 267 %cmp = icmp slt i64 %inc, %N 268 br i1 %cmp, label %for.first, label %for.first.exit 269 270for.first.exit: 271 br label %for.second.guard 272 273for.second.guard: 274 %add = add nsw i32 %x, 1 275 br i1 %cmp.guard, label %for.second.preheader, label %for.end 276 277for.second.preheader: 278 br label %for.second 279 280for.second: 281 %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ] 282 %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %j.02 283 store i32 0, ptr %arrayidx4, align 4 284 %inc6 = add nsw i64 %j.02, 1 285 %cmp.j = icmp slt i64 %inc6, %N 286 br i1 %cmp.j, label %for.second, label %for.second.exit 287 288for.second.exit: 289 br label %for.end 290 291for.end: 292 ret void 293} 294 295; Test that the incoming block of `%j.lcssa` is updated correctly 296; from for.second.guard to for.first.guard, and the two loops for.first and 297; for.second are fused. 298 299define i64 @updatephi_guardnonloopblock(ptr noalias %A, ptr noalias %B, i64 %N, i32 %x) { 300; CHECK-LABEL: @updatephi_guardnonloopblock( 301; CHECK-NEXT: for.first.guard: 302; CHECK-NEXT: [[CMP_GUARD:%.*]] = icmp slt i64 0, [[N:%.*]] 303; CHECK-NEXT: br i1 [[CMP_GUARD]], label [[FOR_FIRST_PREHEADER:%.*]], label [[FOR_END:%.*]] 304; CHECK: for.first.preheader: 305; CHECK-NEXT: br label [[FOR_FIRST:%.*]] 306; CHECK: for.first: 307; CHECK-NEXT: [[I_04:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ] 308; CHECK-NEXT: [[J_02:%.*]] = phi i64 [ [[INC6:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ] 309; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I_04]] 310; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 311; CHECK-NEXT: [[INC]] = add nsw i64 [[I_04]], 1 312; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]] 313; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[J_02]] 314; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX4]], align 4 315; CHECK-NEXT: [[INC6]] = add nsw i64 [[J_02]], 1 316; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i64 [[INC6]], [[N]] 317; CHECK-NEXT: br i1 [[CMP_J]], label [[FOR_FIRST]], label [[FOR_SECOND_EXIT:%.*]] 318; CHECK: for.second.exit: 319; CHECK-NEXT: br label [[FOR_END]] 320; CHECK: for.end: 321; CHECK-NEXT: [[J_LCSSA:%.*]] = phi i64 [ 0, [[FOR_FIRST_GUARD:%.*]] ], [ [[J_02]], [[FOR_SECOND_EXIT]] ] 322; CHECK-NEXT: ret i64 [[J_LCSSA]] 323; 324for.first.guard: 325 %cmp.guard = icmp slt i64 0, %N 326 br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard 327 328for.first.preheader: 329 br label %for.first 330 331for.first: 332 %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ] 333 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.04 334 store i32 0, ptr %arrayidx, align 4 335 %inc = add nsw i64 %i.04, 1 336 %cmp = icmp slt i64 %inc, %N 337 br i1 %cmp, label %for.first, label %for.first.exit 338 339for.first.exit: 340 br label %for.second.guard 341 342for.second.guard: 343 br i1 %cmp.guard, label %for.second.preheader, label %for.end 344 345for.second.preheader: 346 br label %for.second 347 348for.second: 349 %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ] 350 %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %j.02 351 store i32 0, ptr %arrayidx4, align 4 352 %inc6 = add nsw i64 %j.02, 1 353 %cmp.j = icmp slt i64 %inc6, %N 354 br i1 %cmp.j, label %for.second, label %for.second.exit 355 356for.second.exit: 357 br label %for.end 358 359for.end: 360 %j.lcssa = phi i64 [ 0, %for.second.guard ], [ %j.02, %for.second.exit ] 361 ret i64 %j.lcssa 362} 363 364define void @pr59024() { 365; CHECK-LABEL: @pr59024( 366; CHECK-NEXT: entry: 367; CHECK-NEXT: br i1 false, label [[FOR_2_PREHEADER:%.*]], label [[FOR_1_PREHEADER:%.*]] 368; CHECK: for.1.preheader: 369; CHECK-NEXT: br label [[FOR_1:%.*]] 370; CHECK: for.1: 371; CHECK-NEXT: br i1 true, label [[FOR_2_PREHEADER_LOOPEXIT:%.*]], label [[FOR_1]] 372; CHECK: for.2.preheader.loopexit: 373; CHECK-NEXT: br label [[FOR_2_PREHEADER]] 374; CHECK: for.2.preheader: 375; CHECK-NEXT: br label [[FOR_2:%.*]] 376; CHECK: for.2: 377; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[FOR_2]] 378; CHECK: exit: 379; CHECK-NEXT: ret void 380; 381entry: 382 br i1 false, label %for.2, label %for.1 383 384for.1: ; preds = %for.body6, %entry 385 br i1 true, label %for.2, label %for.1 386 387for.2: ; preds = %for.cond13, %for.body6, %entry 388 br i1 true, label %exit, label %for.2 389 390exit: ; preds = %for.cond13 391 ret void 392} 393