1; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \ 2; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1 | FileCheck %s 3; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s 4 5 6target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 7 8; REMARKS: --- !Passed 9; REMARKS-NEXT: Pass: loop-interchange 10; REMARKS-NEXT: Name: Interchanged 11; REMARKS-NEXT: Function: test1 12 13define i64 @test1(ptr %Arr) { 14; CHECK-LABEL: @test1( 15; CHECK-NEXT: entry: 16; CHECK-NEXT: br label [[FOR2_PREHEADER:%.*]] 17; CHECK: for1.header.preheader: 18; CHECK-NEXT: br label [[FOR1_HEADER:%.*]] 19; CHECK: for1.header: 20; CHECK-NEXT: [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR1_INC:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ] 21; CHECK-NEXT: [[SUM_INNER:%.*]] = phi i64 [ [[SUM_INC:%.*]], [[FOR1_INC]] ], [ [[SUM_OUTER:%.*]], [[FOR1_HEADER_PREHEADER]] ] 22; CHECK-NEXT: br label [[FOR2_SPLIT1:%.*]] 23; CHECK: for2.preheader: 24; CHECK-NEXT: br label [[FOR2:%.*]] 25; CHECK: for2: 26; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR2_SPLIT:%.*]] ], [ 0, [[FOR2_PREHEADER]] ] 27; CHECK-NEXT: [[SUM_OUTER]] = phi i64 [ [[SUM_INC_LCSSA:%.*]], [[FOR2_SPLIT]] ], [ 0, [[FOR2_PREHEADER]] ] 28; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]] 29; CHECK: for2.split1: 30; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x [100 x i64]], ptr [[ARR:%.*]], i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV23]] 31; CHECK-NEXT: [[LV:%.*]] = load i64, ptr [[ARRAYIDX]], align 4 32; CHECK-NEXT: [[SUM_INC]] = add i64 [[SUM_INNER]], [[LV]] 33; CHECK-NEXT: [[IV_ORIGINAL:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 34; CHECK-NEXT: [[EXIT1_ORIGINAL:%.*]] = icmp eq i64 [[IV_ORIGINAL]], 100 35; CHECK-NEXT: br label [[FOR1_INC]] 36; CHECK: for2.split: 37; CHECK-NEXT: [[SUM_INC_LCSSA]] = phi i64 [ [[SUM_INC]], %for1.inc ] 38; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 1 39; CHECK-NEXT: [[EXIT1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_3]], 100 40; CHECK-NEXT: br i1 [[EXIT1]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR2]] 41; CHECK: for1.inc: 42; CHECK-NEXT: [[INDVARS_IV_NEXT24]] = add nuw nsw i64 [[INDVARS_IV23]], 1 43; CHECK-NEXT: [[EXIT2:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT24]], 100 44; CHECK-NEXT: br i1 [[EXIT2]], label [[FOR2_SPLIT]], label [[FOR1_HEADER]] 45; CHECK: for1.loopexit: 46; CHECK-NEXT: [[SUM_INC_LCSSA2:%.*]] = phi i64 [ [[SUM_INC_LCSSA]], [[FOR2_SPLIT]] ] 47; CHECK-NEXT: ret i64 [[SUM_INC_LCSSA2]] 48; 49entry: 50 br label %for1.header 51 52for1.header: ; preds = %for1.inc, %entry 53 %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ] 54 %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ] 55 br label %for2 56 57for2: ; preds = %for2, %for1.header 58 %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ] 59 %sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ] 60 %arrayidx = getelementptr inbounds [100 x [100 x i64]], ptr %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23 61 %lv = load i64, ptr %arrayidx, align 4 62 %sum.inc = add i64 %sum.inner, %lv 63 %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1 64 %exit1 = icmp eq i64 %indvars.iv.next.3, 100 65 br i1 %exit1, label %for1.inc, label %for2 66 67for1.inc: ; preds = %for2 68 %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ] 69 %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 70 %exit2 = icmp eq i64 %indvars.iv.next24, 100 71 br i1 %exit2, label %for1.loopexit, label %for1.header 72 73for1.loopexit: ; preds = %for1.inc 74 %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ] 75 ret i64 %sum.inc.lcssa2 76} 77 78; In this test case, the inner reduction PHI %inner does not involve the outer 79; reduction PHI %sum.outer, do not interchange. 80; REMARKS: --- !Missed 81; REMARKS-NEXT: Pass: loop-interchange 82; REMARKS-NEXT: Name: UnsupportedPHIOuter 83; REMARKS-NEXT: Function: test2 84 85define i64 @test2(ptr %Arr) { 86entry: 87 br label %for1.header 88 89for1.header: ; preds = %for1.inc, %entry 90 %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ] 91 %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ] 92 br label %for2 93 94for2: ; preds = %for2, %for1.header 95 %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ] 96 %inner = phi i64 [ %indvars.iv23, %for1.header ], [ %sum.inc, %for2 ] 97 %arrayidx = getelementptr inbounds [100 x [100 x i64]], ptr %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23 98 %lv = load i64, ptr %arrayidx, align 4 99 %sum.inc = add i64 %inner, %lv 100 %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1 101 %exit1 = icmp eq i64 %indvars.iv.next.3, 100 102 br i1 %exit1, label %for1.inc, label %for2 103 104for1.inc: ; preds = %for2 105 %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ] 106 %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 107 %exit2 = icmp eq i64 %indvars.iv.next24, 100 108 br i1 %exit2, label %for1.loopexit, label %for1.header 109 110for1.loopexit: ; preds = %for1.inc 111 %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ] 112 ret i64 %sum.inc.lcssa2 113} 114 115; Check that we do not interchange if there is an additional instruction 116; between the outer and inner reduction PHIs. 117; REMARKS: --- !Missed 118; REMARKS-NEXT: Pass: loop-interchange 119; REMARKS-NEXT: Name: UnsupportedPHIOuter 120; REMARKS-NEXT: Function: test3 121 122define i64 @test3(ptr %Arr) { 123entry: 124 br label %for1.header 125 126for1.header: ; preds = %for1.inc, %entry 127 %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ] 128 %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ] 129 %so = add i64 %sum.outer, 10 130 br label %for2 131 132for2: ; preds = %for2, %for1.header 133 %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ] 134 %sum.inner = phi i64 [ %so, %for1.header ], [ %sum.inc, %for2 ] 135 %arrayidx = getelementptr inbounds [100 x [100 x i64]], ptr %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23 136 %lv = load i64, ptr %arrayidx, align 4 137 %sum.inc = add i64 %sum.inner, %lv 138 %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1 139 %exit1 = icmp eq i64 %indvars.iv.next.3, 100 140 br i1 %exit1, label %for1.inc, label %for2 141 142for1.inc: ; preds = %for2 143 %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ] 144 %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 145 %exit2 = icmp eq i64 %indvars.iv.next24, 100 146 br i1 %exit2, label %for1.loopexit, label %for1.header 147 148for1.loopexit: ; preds = %for1.inc 149 %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ] 150 ret i64 %sum.inc.lcssa2 151} 152 153; Check that we do not interchange if reduction is stored in an invariant address inside inner loop 154; REMARKS: --- !Missed 155; REMARKS-NEXT: Pass: loop-interchange 156; REMARKS-NEXT: Name: Dependence 157; REMARKS-NEXT: Function: test4 158 159define i64 @test4(ptr %Arr, ptr %dst) { 160entry: 161 %gep.dst = getelementptr inbounds i64, ptr %dst, i64 42 162 br label %for1.header 163 164for1.header: ; preds = %for1.inc, %entry 165 %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ] 166 %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ] 167 br label %for2 168 169for2: ; preds = %for2, %for1.header 170 %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ] 171 %sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ] 172 %arrayidx = getelementptr inbounds [100 x [100 x i64]], ptr %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23 173 %lv = load i64, ptr %arrayidx, align 4 174 %sum.inc = add i64 %sum.inner, %lv 175 store i64 %sum.inc, ptr %gep.dst, align 4 176 %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1 177 %exit1 = icmp eq i64 %indvars.iv.next.3, 100 178 br i1 %exit1, label %for1.inc, label %for2 179 180for1.inc: ; preds = %for2 181 %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ] 182 %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 183 %exit2 = icmp eq i64 %indvars.iv.next24, 100 184 br i1 %exit2, label %for1.loopexit, label %for1.header 185 186for1.loopexit: ; preds = %for1.inc 187 %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ] 188 ret i64 %sum.inc.lcssa2 189} 190 191; Check that we do not interchange or crash if the PHI in the outer loop gets a 192; constant from the inner loop. 193; REMARKS: --- !Missed 194; REMARKS-NEXT: Pass: loop-interchange 195; REMARKS-NEXT: Name: UnsupportedPHIOuter 196; REMARKS-NEXT: Function: test_constant_inner_loop_res 197 198define i64 @test_constant_inner_loop_res(ptr %Arr) { 199entry: 200 br label %for1.header 201 202for1.header: ; preds = %for1.inc, %entry 203 %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ] 204 %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.amend, %for1.inc ] 205 br label %for2 206 207for2: ; preds = %for2, %for1.header 208 %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ] 209 %sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ] 210 %arrayidx = getelementptr inbounds [100 x [100 x i64]], ptr %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23 211 %lv = load i64, ptr %arrayidx, align 4 212 %sum.inc = add i64 %sum.inner, %lv 213 %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1 214 %exit1 = icmp eq i64 %indvars.iv.next.3, 100 215 br i1 %exit1, label %for1.inc, label %for2 216 217for1.inc: ; preds = %for2 218 %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ] 219 %const.lcssa = phi i64 [ 0, %for2 ] 220 %sum.inc.amend = add i64 %const.lcssa, %sum.inc.lcssa 221 %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 222 %exit2 = icmp eq i64 %indvars.iv.next24, 100 223 br i1 %exit2, label %for1.loopexit, label %for1.header 224 225for1.loopexit: ; preds = %for1.inc 226 %il.res.lcssa2 = phi i64 [ %sum.inc.amend, %for1.inc ] 227 ret i64 %il.res.lcssa2 228} 229 230; Floating point reductions are interchanged if all the fp instructions 231; involved allow reassociation. 232; REMARKS: --- !Passed 233; REMARKS-NEXT: Pass: loop-interchange 234; REMARKS-NEXT: Name: Interchanged 235; REMARKS-NEXT: Function: test5 236 237define float @test5(ptr %Arr, ptr %Arr2) { 238entry: 239 br label %outer.header 240 241outer.header: ; preds = %outer.inc, %entry 242 %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] 243 %float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ] 244 br label %for.body3 245 246for.body3: ; preds = %for.body3, %outer.header 247 %float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc.inc, %for.body3 ] 248 %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ] 249 %arrayidx5 = getelementptr inbounds [100 x [100 x float]], ptr %Arr, i64 0, i64 %iv.inner, i64 %iv.outer 250 %vA = load float, ptr %arrayidx5 251 %float.inner.inc = fadd fast float %float.inner, %vA 252 %arrayidx6 = getelementptr inbounds [100 x [100 x float]], ptr %Arr2, i64 0, i64 %iv.inner, i64 %iv.outer 253 %vB = load float, ptr %arrayidx6 254 %float.inner.inc.inc = fadd fast float %float.inner.inc, %vB 255 %iv.inner.next = add nuw nsw i64 %iv.inner, 1 256 %exitcond = icmp eq i64 %iv.inner.next, 100 257 br i1 %exitcond, label %outer.inc, label %for.body3 258 259outer.inc: ; preds = %for.body3 260 %float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ] 261 %iv.outer.next = add nsw i64 %iv.outer, 1 262 %cmp = icmp eq i64 %iv.outer.next, 100 263 br i1 %cmp, label %outer.header, label %for.exit 264 265for.exit: ; preds = %outer.inc 266 %float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ] 267 ret float %float.outer.lcssa 268} 269 270; Floating point reductions are not interchanged if not all the fp instructions 271; involved allow reassociation. 272; REMARKS: --- !Missed 273; REMARKS-NEXT: Pass: loop-interchange 274; REMARKS-NEXT: Name: UnsupportedPHIOuter 275; REMARKS-NEXT: Function: test6 276 277define float @test6(ptr %Arr, ptr %Arr2) { 278entry: 279 br label %outer.header 280 281outer.header: ; preds = %outer.inc, %entry 282 %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] 283 %float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ] 284 br label %for.body3 285 286for.body3: ; preds = %for.body3, %outer.header 287 %float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc.inc, %for.body3 ] 288 %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ] 289 %arrayidx5 = getelementptr inbounds [100 x [100 x float]], ptr %Arr, i64 0, i64 %iv.inner, i64 %iv.outer 290 %vA = load float, ptr %arrayidx5 291 %float.inner.inc = fadd float %float.inner, %vA ; do not allow reassociation 292 %arrayidx6 = getelementptr inbounds [100 x [100 x float]], ptr %Arr2, i64 0, i64 %iv.inner, i64 %iv.outer 293 %vB = load float, ptr %arrayidx6 294 %float.inner.inc.inc = fadd fast float %float.inner.inc, %vB 295 %iv.inner.next = add nuw nsw i64 %iv.inner, 1 296 %exitcond = icmp eq i64 %iv.inner.next, 100 297 br i1 %exitcond, label %outer.inc, label %for.body3 298 299outer.inc: ; preds = %for.body3 300 %float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ] 301 %iv.outer.next = add nsw i64 %iv.outer, 1 302 %cmp = icmp eq i64 %iv.outer.next, 100 303 br i1 %cmp, label %outer.header, label %for.exit 304 305for.exit: ; preds = %outer.inc 306 %float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ] 307 ret float %float.outer.lcssa 308} 309