1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -loop-reduce < %s | FileCheck %s 3 4; These are regression tests for PR43768. 5target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 6 7; Test checks that LSR does not hoist increment of %val9 while expanding the other pieces of formula 8; to original place in backedge causing incorrect SSA form. 9define void @test1() { 10; CHECK-LABEL: @test1( 11; CHECK-NEXT: bb: 12; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(3) undef, align 4 13; CHECK-NEXT: [[VAL1:%.*]] = add i32 undef, 12 14; CHECK-NEXT: [[VAL2:%.*]] = trunc i64 undef to i32 15; CHECK-NEXT: [[VAL3:%.*]] = mul i32 [[VAL1]], [[VAL2]] 16; CHECK-NEXT: [[VAL4:%.*]] = sub i32 [[VAL]], [[VAL3]] 17; CHECK-NEXT: [[VAL5:%.*]] = ashr i32 undef, undef 18; CHECK-NEXT: [[VAL6:%.*]] = sub i32 [[VAL4]], [[VAL5]] 19; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[VAL]], 7 20; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[VAL3]], 7 21; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[TMP1]] 22; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[VAL5]], 7 23; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]] 24; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[VAL6]], 3 25; CHECK-NEXT: br label [[BB7:%.*]] 26; CHECK: bb7: 27; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[BB32:%.*]] ], [ 0, [[BB:%.*]] ] 28; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BB32]] ], [ -8, [[BB]] ] 29; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 8 30; CHECK-NEXT: [[LSR_IV_NEXT2]] = add nuw nsw i32 [[LSR_IV1]], [[TMP5]] 31; CHECK-NEXT: [[VAL10:%.*]] = icmp ult i64 [[LSR_IV_NEXT]], 65536 32; CHECK-NEXT: br i1 [[VAL10]], label [[BB12:%.*]], label [[BB11:%.*]] 33; CHECK: bb11: 34; CHECK-NEXT: unreachable 35; CHECK: bb12: 36; CHECK-NEXT: [[VAL14:%.*]] = icmp slt i32 undef, undef 37; CHECK-NEXT: br i1 [[VAL14]], label [[BB17:%.*]], label [[BB12_BB15SPLITSPLITSPLITSPLITSPLIT_CRIT_EDGE:%.*]] 38; CHECK: bb15splitsplitsplitsplitsplitsplit: 39; CHECK-NEXT: br label [[BB15SPLITSPLITSPLITSPLITSPLIT:%.*]] 40; CHECK: bb12.bb15splitsplitsplitsplitsplit_crit_edge: 41; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[VAL6]], [[LSR_IV1]] 42; CHECK-NEXT: br label [[BB15SPLITSPLITSPLITSPLITSPLIT]] 43; CHECK: bb15splitsplitsplitsplitsplit: 44; CHECK-NEXT: [[VAL16_PH_PH_PH_PH_PH:%.*]] = phi i32 [ [[TMP6]], [[BB12_BB15SPLITSPLITSPLITSPLITSPLIT_CRIT_EDGE]] ], [ [[VAL35:%.*]], [[BB15SPLITSPLITSPLITSPLITSPLITSPLIT:%.*]] ] 45; CHECK-NEXT: br label [[BB15SPLITSPLITSPLITSPLIT:%.*]] 46; CHECK: bb17.bb15splitsplitsplitsplit_crit_edge: 47; CHECK-NEXT: [[TMP7:%.*]] = shl i32 [[VAL]], 1 48; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[VAL1]], [[VAL2]] 49; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 1 50; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP7]], [[TMP9]] 51; CHECK-NEXT: [[TMP11:%.*]] = shl i32 [[VAL5]], 1 52; CHECK-NEXT: [[TMP12:%.*]] = sub i32 [[TMP10]], [[TMP11]] 53; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[LSR_IV1]] 54; CHECK-NEXT: br label [[BB15SPLITSPLITSPLITSPLIT]] 55; CHECK: bb15splitsplitsplitsplit: 56; CHECK-NEXT: [[VAL16_PH_PH_PH_PH:%.*]] = phi i32 [ [[TMP13]], [[BB17_BB15SPLITSPLITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH_PH_PH_PH]], [[BB15SPLITSPLITSPLITSPLITSPLIT]] ] 57; CHECK-NEXT: br label [[BB15SPLITSPLITSPLIT:%.*]] 58; CHECK: bb20.bb15splitsplitsplit_crit_edge: 59; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[VAL]], 3 60; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[VAL1]], [[VAL2]] 61; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 3 62; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP14]], [[TMP16]] 63; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[VAL5]], 3 64; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP17]], [[TMP18]] 65; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], [[LSR_IV1]] 66; CHECK-NEXT: br label [[BB15SPLITSPLITSPLIT]] 67; CHECK: bb15splitsplitsplit: 68; CHECK-NEXT: [[VAL16_PH_PH_PH:%.*]] = phi i32 [ [[TMP20]], [[BB20_BB15SPLITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH_PH_PH]], [[BB15SPLITSPLITSPLITSPLIT]] ] 69; CHECK-NEXT: br label [[BB15SPLITSPLIT:%.*]] 70; CHECK: bb23.bb15splitsplit_crit_edge: 71; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[VAL]], 2 72; CHECK-NEXT: [[TMP22:%.*]] = mul i32 [[VAL1]], [[VAL2]] 73; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 2 74; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP21]], [[TMP23]] 75; CHECK-NEXT: [[TMP25:%.*]] = shl i32 [[VAL5]], 2 76; CHECK-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP25]] 77; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[TMP26]], [[LSR_IV1]] 78; CHECK-NEXT: br label [[BB15SPLITSPLIT]] 79; CHECK: bb15splitsplit: 80; CHECK-NEXT: [[VAL16_PH_PH:%.*]] = phi i32 [ [[TMP27]], [[BB23_BB15SPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH_PH]], [[BB15SPLITSPLITSPLIT]] ] 81; CHECK-NEXT: br label [[BB15SPLIT:%.*]] 82; CHECK: bb26.bb15split_crit_edge: 83; CHECK-NEXT: [[TMP28:%.*]] = mul i32 [[VAL]], 5 84; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[VAL1]], [[VAL2]] 85; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 5 86; CHECK-NEXT: [[TMP31:%.*]] = sub i32 [[TMP28]], [[TMP30]] 87; CHECK-NEXT: [[TMP32:%.*]] = mul i32 [[VAL5]], 5 88; CHECK-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP32]] 89; CHECK-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], [[LSR_IV1]] 90; CHECK-NEXT: br label [[BB15SPLIT]] 91; CHECK: bb15split: 92; CHECK-NEXT: [[VAL16_PH:%.*]] = phi i32 [ [[TMP34]], [[BB26_BB15SPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH]], [[BB15SPLITSPLIT]] ] 93; CHECK-NEXT: br label [[BB15:%.*]] 94; CHECK: bb29.bb15_crit_edge: 95; CHECK-NEXT: [[TMP35:%.*]] = mul i32 [[VAL]], 6 96; CHECK-NEXT: [[TMP36:%.*]] = mul i32 [[VAL1]], [[VAL2]] 97; CHECK-NEXT: [[TMP37:%.*]] = mul i32 [[TMP36]], 6 98; CHECK-NEXT: [[TMP38:%.*]] = sub i32 [[TMP35]], [[TMP37]] 99; CHECK-NEXT: [[TMP39:%.*]] = mul i32 [[VAL5]], 6 100; CHECK-NEXT: [[TMP40:%.*]] = sub i32 [[TMP38]], [[TMP39]] 101; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP40]], [[LSR_IV1]] 102; CHECK-NEXT: br label [[BB15]] 103; CHECK: bb15: 104; CHECK-NEXT: [[VAL16:%.*]] = phi i32 [ [[TMP41]], [[BB29_BB15_CRIT_EDGE:%.*]] ], [ [[VAL16_PH]], [[BB15SPLIT]] ] 105; CHECK-NEXT: call void @widget() [ "deopt"(i32 [[VAL16]], i32 3, i32 [[VAL]]) ] 106; CHECK-NEXT: unreachable 107; CHECK: bb17: 108; CHECK-NEXT: [[VAL19:%.*]] = icmp slt i32 undef, undef 109; CHECK-NEXT: br i1 [[VAL19]], label [[BB20:%.*]], label [[BB17_BB15SPLITSPLITSPLITSPLIT_CRIT_EDGE]] 110; CHECK: bb20: 111; CHECK-NEXT: [[VAL22:%.*]] = icmp slt i32 undef, undef 112; CHECK-NEXT: br i1 [[VAL22]], label [[BB23:%.*]], label [[BB20_BB15SPLITSPLITSPLIT_CRIT_EDGE]] 113; CHECK: bb23: 114; CHECK-NEXT: [[VAL25:%.*]] = icmp slt i32 undef, undef 115; CHECK-NEXT: br i1 [[VAL25]], label [[BB26:%.*]], label [[BB23_BB15SPLITSPLIT_CRIT_EDGE]] 116; CHECK: bb26: 117; CHECK-NEXT: [[VAL28:%.*]] = icmp slt i32 undef, undef 118; CHECK-NEXT: br i1 [[VAL28]], label [[BB29:%.*]], label [[BB26_BB15SPLIT_CRIT_EDGE]] 119; CHECK: bb29: 120; CHECK-NEXT: [[VAL31:%.*]] = icmp slt i32 undef, undef 121; CHECK-NEXT: br i1 [[VAL31]], label [[BB32]], label [[BB29_BB15_CRIT_EDGE]] 122; CHECK: bb32: 123; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP4]], [[LSR_IV1]] 124; CHECK-NEXT: [[VAL35]] = add i32 [[TMP42]], [[VAL6]] 125; CHECK-NEXT: br i1 false, label [[BB7]], label [[BB15SPLITSPLITSPLITSPLITSPLITSPLIT]] 126; 127bb: 128 %val = load i32, ptr addrspace(3) undef, align 4 129 %val1 = add i32 undef, 12 130 %val2 = trunc i64 undef to i32 131 %val3 = mul i32 %val1, %val2 132 %val4 = sub i32 %val, %val3 133 %val5 = ashr i32 undef, undef 134 %val6 = sub i32 %val4, %val5 135 br label %bb7 136 137bb7: ; preds = %bb32, %bb 138 %val8 = phi i64 [ 0, %bb ], [ %val34, %bb32 ] 139 %val9 = phi i32 [ 0, %bb ], [ %val35, %bb32 ] 140 %val10 = icmp ult i64 %val8, 65536 141 br i1 %val10, label %bb12, label %bb11 142 143bb11: ; preds = %bb7 144 unreachable 145 146bb12: ; preds = %bb7 147 %val13 = add i32 %val9, %val6 148 %val14 = icmp slt i32 undef, undef 149 br i1 %val14, label %bb17, label %bb15 150 151bb15: ; preds = %bb32, %bb29, %bb26, %bb23, %bb20, %bb17, %bb12 152 %val16 = phi i32 [ %val35, %bb32 ], [ %val30, %bb29 ], [ %val27, %bb26 ], [ %val24, %bb23 ], [ %val21, %bb20 ], [ %val18, %bb17 ], [ %val13, %bb12 ] 153 call void @widget() [ "deopt"(i32 %val16, i32 3, i32 %val) ] 154 unreachable 155 156bb17: ; preds = %bb12 157 %val18 = add i32 %val13, %val6 158 %val19 = icmp slt i32 undef, undef 159 br i1 %val19, label %bb20, label %bb15 160 161bb20: ; preds = %bb17 162 %val21 = add i32 %val18, %val6 163 %val22 = icmp slt i32 undef, undef 164 br i1 %val22, label %bb23, label %bb15 165 166bb23: ; preds = %bb20 167 %val24 = add i32 %val21, %val6 168 %val25 = icmp slt i32 undef, undef 169 br i1 %val25, label %bb26, label %bb15 170 171bb26: ; preds = %bb23 172 %val27 = add i32 %val24, %val6 173 %val28 = icmp slt i32 undef, undef 174 br i1 %val28, label %bb29, label %bb15 175 176bb29: ; preds = %bb26 177 %val30 = add i32 %val27, %val6 178 %val31 = icmp slt i32 undef, undef 179 br i1 %val31, label %bb32, label %bb15 180 181bb32: ; preds = %bb29 182 %val33 = add i32 %val30, %val6 183 %val34 = add nuw nsw i64 %val8, 8 184 %val35 = add i32 %val33, %val6 185 br i1 false, label %bb7, label %bb15 186} 187 188; Test checks that LSR does not hoist increment of %val8 while expanding the other pieces of formula 189; to original place in backedge causing incorrect SSA form. 190define void @test2() { 191; CHECK-LABEL: @test2( 192; CHECK-NEXT: bb: 193; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr null, align 4 194; CHECK-NEXT: [[VAL3:%.*]] = load i32, ptr null, align 4 195; CHECK-NEXT: br label [[BB6:%.*]] 196; CHECK: bb4: 197; CHECK-NEXT: [[VAL5:%.*]] = sext i32 [[VAL16:%.*]] to i64 198; CHECK-NEXT: unreachable 199; CHECK: bb6: 200; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BB12:%.*]] ], [ -1, [[BB:%.*]] ] 201; CHECK-NEXT: [[VAL8:%.*]] = phi i32 [ [[VAL16]], [[BB12]] ], [ [[VAL3]], [[BB]] ] 202; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 1 203; CHECK-NEXT: [[VAL10:%.*]] = icmp ult i64 [[LSR_IV_NEXT]], 1048576 204; CHECK-NEXT: br i1 [[VAL10]], label [[BB12]], label [[BB11:%.*]] 205; CHECK: bb11: 206; CHECK-NEXT: unreachable 207; CHECK: bb12: 208; CHECK-NEXT: [[VAL14:%.*]] = add i32 [[VAL8]], [[VAL1]] 209; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[VAL1]], [[VAL8]] 210; CHECK-NEXT: [[VAL15:%.*]] = select i1 false, i32 [[VAL14]], i32 [[VAL8]] 211; CHECK-NEXT: [[VAL16]] = add i32 [[TMP0]], 1 212; CHECK-NEXT: [[VAL17:%.*]] = fcmp olt double 0.000000e+00, 2.270000e+02 213; CHECK-NEXT: br i1 [[VAL17]], label [[BB6]], label [[BB4:%.*]] 214; 215bb: 216 %val1 = load i32, ptr null, align 4 217 %val3 = load i32, ptr null, align 4 218 br label %bb6 219 220bb4: ; preds = %bb12 221 %val5 = sext i32 %val16 to i64 222 unreachable 223 224bb6: ; preds = %bb12, %bb 225 %val7 = phi i64 [ %val9, %bb12 ], [ 0, %bb ] 226 %val8 = phi i32 [ %val16, %bb12 ], [ %val3, %bb ] 227 %val9 = add nuw nsw i64 %val7, 1 228 %val10 = icmp ult i64 %val7, 1048576 229 br i1 %val10, label %bb12, label %bb11 230 231bb11: ; preds = %bb6 232 unreachable 233 234bb12: ; preds = %bb6 235 %val13 = select i1 false, i32 0, i32 %val8 236 %val14 = add i32 %val8, %val1 237 %val15 = select i1 false, i32 %val14, i32 %val13 238 %val16 = add i32 %val14, 1 239 %val17 = fcmp olt double 0.000000e+00, 2.270000e+02 240 br i1 %val17, label %bb6, label %bb4 241} 242 243declare void @widget() 244