1; RUN: opt -S -verify-memoryssa -passes=loop-sink < %s | FileCheck %s 2; RUN: opt -S -verify-memoryssa -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s 3 4@g = global i32 0, align 4 5 6; b1 7; / \ 8; b2 b6 9; / \ | 10; b3 b4 | 11; \ / | 12; b5 | 13; \ / 14; b7 15; preheader: 1000 16; b2: 15 17; b3: 7 18; b4: 7 19; Sink load to b2 20; CHECK: t1 21; CHECK: .b2: 22; CHECK: load i32, ptr @g 23; CHECK: .b3: 24; CHECK-NOT: load i32, ptr @g 25define i32 @t1(i32, i32) #0 !prof !0 { 26 %3 = icmp eq i32 %1, 0 27 br i1 %3, label %.exit, label %.preheader 28 29.preheader: 30 %invariant = load i32, ptr @g 31 br label %.b1 32 33.b1: 34 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 35 %c1 = icmp sgt i32 %iv, %0 36 br i1 %c1, label %.b2, label %.b6, !prof !1 37 38.b2: 39 %c2 = icmp sgt i32 %iv, 1 40 br i1 %c2, label %.b3, label %.b4 41 42.b3: 43 %t3 = sub nsw i32 %invariant, %iv 44 br label %.b5 45 46.b4: 47 %t4 = add nsw i32 %invariant, %iv 48 br label %.b5 49 50.b5: 51 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 52 %t5 = mul nsw i32 %p5, 5 53 br label %.b7 54 55.b6: 56 %t6 = add nsw i32 %iv, 100 57 br label %.b7 58 59.b7: 60 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 61 %t7 = add nuw nsw i32 %iv, 1 62 %c7 = icmp eq i32 %t7, %p7 63 br i1 %c7, label %.b1, label %.exit, !prof !3 64 65.exit: 66 ret i32 10 67} 68 69; b1 70; / \ 71; b2 b6 72; / \ | 73; b3 b4 | 74; \ / | 75; b5 | 76; \ / 77; b7 78; preheader: 500 79; b1: 16016 80; b3: 8 81; b6: 8 82; Sink load to b3 and b6 83; CHECK: t2 84; CHECK: .preheader: 85; CHECK-NOT: load i32, ptr @g 86; CHECK: .b3: 87; CHECK: load i32, ptr @g 88; CHECK: .b4: 89; CHECK: .b6: 90; CHECK: load i32, ptr @g 91; CHECK: .b7: 92define i32 @t2(i32, i32) #0 !prof !0 { 93 %3 = icmp eq i32 %1, 0 94 br i1 %3, label %.exit, label %.preheader 95 96.preheader: 97 %invariant = load i32, ptr @g 98 br label %.b1 99 100.b1: 101 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 102 %c1 = icmp sgt i32 %iv, %0 103 br i1 %c1, label %.b2, label %.b6, !prof !2 104 105.b2: 106 %c2 = icmp sgt i32 %iv, 1 107 br i1 %c2, label %.b3, label %.b4, !prof !1 108 109.b3: 110 %t3 = sub nsw i32 %invariant, %iv 111 br label %.b5 112 113.b4: 114 %t4 = add nsw i32 5, %iv 115 br label %.b5 116 117.b5: 118 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 119 %t5 = mul nsw i32 %p5, 5 120 br label %.b7 121 122.b6: 123 %t6 = add nsw i32 %iv, %invariant 124 br label %.b7 125 126.b7: 127 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 128 %t7 = add nuw nsw i32 %iv, 1 129 %c7 = icmp eq i32 %t7, %p7 130 br i1 %c7, label %.b1, label %.exit, !prof !3 131 132.exit: 133 ret i32 10 134} 135 136; b1 137; / \ 138; b2 b6 139; / \ | 140; b3 b4 | 141; \ / | 142; b5 | 143; \ / 144; b7 145; preheader: 500 146; b3: 8 147; b5: 16008 148; Do not sink load from preheader. 149; CHECK: t3 150; CHECK: .preheader: 151; CHECK: load i32, ptr @g 152; CHECK: .b1: 153; CHECK-NOT: load i32, ptr @g 154define i32 @t3(i32, i32) #0 !prof !0 { 155 %3 = icmp eq i32 %1, 0 156 br i1 %3, label %.exit, label %.preheader 157 158.preheader: 159 %invariant = load i32, ptr @g 160 br label %.b1 161 162.b1: 163 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 164 %c1 = icmp sgt i32 %iv, %0 165 br i1 %c1, label %.b2, label %.b6, !prof !2 166 167.b2: 168 %c2 = icmp sgt i32 %iv, 1 169 br i1 %c2, label %.b3, label %.b4, !prof !1 170 171.b3: 172 %t3 = sub nsw i32 %invariant, %iv 173 br label %.b5 174 175.b4: 176 %t4 = add nsw i32 5, %iv 177 br label %.b5 178 179.b5: 180 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 181 %t5 = mul nsw i32 %p5, %invariant 182 br label %.b7 183 184.b6: 185 %t6 = add nsw i32 %iv, 5 186 br label %.b7 187 188.b7: 189 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 190 %t7 = add nuw nsw i32 %iv, 1 191 %c7 = icmp eq i32 %t7, %p7 192 br i1 %c7, label %.b1, label %.exit, !prof !3 193 194.exit: 195 ret i32 10 196} 197 198; For single-BB loop with <=1 avg trip count, sink load to body 199; CHECK: t4 200; CHECK: .header: 201; CHECK-NOT: load i32, ptr @g 202; CHECK: .body: 203; CHECK: load i32, ptr @g 204; CHECK: .exit: 205define i32 @t4(i32, i32) #0 !prof !0 { 206.entry: 207 %invariant = load i32, ptr @g 208 br label %.header 209 210.header: 211 %iv = phi i32 [ %t1, %.body ], [ 0, %.entry ] 212 %c0 = icmp sgt i32 %iv, %0 213 br i1 %c0, label %.body, label %.exit, !prof !1 214 215.body: 216 %t1 = add nsw i32 %invariant, %iv 217 %c1 = icmp sgt i32 %iv, %0 218 br label %.header 219 220.exit: 221 ret i32 10 222} 223 224; b1 225; / \ 226; b2 b6 227; / \ | 228; b3 b4 | 229; \ / | 230; b5 | 231; \ / 232; b7 233; preheader: 1000 234; b2: 15 235; b3: 7 236; b4: 7 237; There is alias store in loop, do not sink load 238; CHECK: t5 239; CHECK: .preheader: 240; CHECK: load i32, ptr @g 241; CHECK: .b1: 242; CHECK-NOT: load i32, ptr @g 243define i32 @t5(i32, ptr) #0 !prof !0 { 244 %3 = icmp eq i32 %0, 0 245 br i1 %3, label %.exit, label %.preheader 246 247.preheader: 248 %invariant = load i32, ptr @g 249 br label %.b1 250 251.b1: 252 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 253 %c1 = icmp sgt i32 %iv, %0 254 br i1 %c1, label %.b2, label %.b6, !prof !1 255 256.b2: 257 %c2 = icmp sgt i32 %iv, 1 258 br i1 %c2, label %.b3, label %.b4 259 260.b3: 261 %t3 = sub nsw i32 %invariant, %iv 262 br label %.b5 263 264.b4: 265 %t4 = add nsw i32 %invariant, %iv 266 br label %.b5 267 268.b5: 269 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 270 %t5 = mul nsw i32 %p5, 5 271 br label %.b7 272 273.b6: 274 %t6 = call i32 @foo() 275 br label %.b7 276 277.b7: 278 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 279 %t7 = add nuw nsw i32 %iv, 1 280 %c7 = icmp eq i32 %t7, %p7 281 br i1 %c7, label %.b1, label %.exit, !prof !3 282 283.exit: 284 ret i32 10 285} 286 287; b1 288; / \ 289; b2 b6 290; / \ | 291; b3 b4 | 292; \ / | 293; b5 | 294; \ / 295; b7 296; preheader: 1000 297; b2: 15 298; b3: 7 299; b4: 7 300; Regardless of aliasing store in loop this load from constant memory can be sunk. 301; CHECK: t5_const_memory 302; CHECK: .preheader: 303; CHECK-NOT: load i32, ptr @g_const 304; CHECK: .b2: 305; CHECK: load i32, ptr @g_const 306; CHECK: br i1 %c2, label %.b3, label %.b4 307define i32 @t5_const_memory(i32, ptr) #0 !prof !0 { 308 %3 = icmp eq i32 %0, 0 309 br i1 %3, label %.exit, label %.preheader 310 311.preheader: 312 %invariant = load i32, ptr @g_const 313 br label %.b1 314 315.b1: 316 %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ] 317 %c1 = icmp sgt i32 %iv, %0 318 br i1 %c1, label %.b2, label %.b6, !prof !1 319 320.b2: 321 %c2 = icmp sgt i32 %iv, 1 322 br i1 %c2, label %.b3, label %.b4 323 324.b3: 325 %t3 = sub nsw i32 %invariant, %iv 326 br label %.b5 327 328.b4: 329 %t4 = add nsw i32 %invariant, %iv 330 br label %.b5 331 332.b5: 333 %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ] 334 %t5 = mul nsw i32 %p5, 5 335 br label %.b7 336 337.b6: 338 %t6 = call i32 @foo() 339 br label %.b7 340 341.b7: 342 %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ] 343 %t7 = add nuw nsw i32 %iv, 1 344 %c7 = icmp eq i32 %t7, %p7 345 br i1 %c7, label %.b1, label %.exit, !prof !3 346 347.exit: 348 ret i32 10 349} 350 351; b1 352; / \ 353; b2 b3 354; \ / 355; b4 356; preheader: 1000 357; b2: 15 358; b3: 7 359; Do not sink unordered atomic load to b2 360; CHECK: t6 361; CHECK: .preheader: 362; CHECK: load atomic i32, ptr @g unordered, align 4 363; CHECK: .b2: 364; CHECK-NOT: load atomic i32, ptr @g unordered, align 4 365define i32 @t6(i32, i32) #0 !prof !0 { 366 %3 = icmp eq i32 %1, 0 367 br i1 %3, label %.exit, label %.preheader 368 369.preheader: 370 %invariant = load atomic i32, ptr @g unordered, align 4 371 br label %.b1 372 373.b1: 374 %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ] 375 %c1 = icmp sgt i32 %iv, %0 376 br i1 %c1, label %.b2, label %.b3, !prof !1 377 378.b2: 379 %t1 = add nsw i32 %invariant, %iv 380 br label %.b4 381 382.b3: 383 %t2 = add nsw i32 %iv, 100 384 br label %.b4 385 386.b4: 387 %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ] 388 %t3 = add nuw nsw i32 %iv, 1 389 %c2 = icmp eq i32 %t3, %p1 390 br i1 %c2, label %.b1, label %.exit, !prof !3 391 392.exit: 393 ret i32 10 394} 395 396@g_const = constant i32 0, align 4 397 398; b1 399; / \ 400; b2 b3 401; \ / 402; b4 403; preheader: 1000 404; b2: 0.5 405; b3: 999.5 406; Sink unordered atomic load to b2. It is allowed to sink into loop unordered 407; load from constant. 408; CHECK: t7 409; CHECK: .preheader: 410; CHECK-NOT: load atomic i32, ptr @g_const unordered, align 4 411; CHECK: .b2: 412; CHECK: load atomic i32, ptr @g_const unordered, align 4 413define i32 @t7(i32, i32) #0 !prof !0 { 414 %3 = icmp eq i32 %1, 0 415 br i1 %3, label %.exit, label %.preheader 416 417.preheader: 418 %invariant = load atomic i32, ptr @g_const unordered, align 4 419 br label %.b1 420 421.b1: 422 %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ] 423 %c1 = icmp sgt i32 %iv, %0 424 br i1 %c1, label %.b2, label %.b3, !prof !1 425 426.b2: 427 %t1 = add nsw i32 %invariant, %iv 428 br label %.b4 429 430.b3: 431 %t2 = add nsw i32 %iv, 100 432 br label %.b4 433 434.b4: 435 %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ] 436 %t3 = add nuw nsw i32 %iv, 1 437 %c2 = icmp eq i32 %t3, %p1 438 br i1 %c2, label %.b1, label %.exit, !prof !3 439 440.exit: 441 ret i32 10 442} 443 444declare i32 @foo() 445 446!0 = !{!"function_entry_count", i64 1} 447!1 = !{!"branch_weights", i32 1, i32 2000} 448!2 = !{!"branch_weights", i32 2000, i32 1} 449!3 = !{!"branch_weights", i32 100, i32 1} 450