1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -aa-pipeline=basic-aa -passes='loop-mssa(licm)' -S | FileCheck %s 3 4@X = global i32 0 ; <ptr> [#uses=1] 5 6declare void @foo() 7 8declare i32 @llvm.bitreverse.i32(i32) 9 10; This testcase tests for a problem where LICM hoists 11; potentially trapping instructions when they are not guaranteed to execute. 12define i32 @test1(i1 %c) { 13; CHECK-LABEL: @test1( 14; CHECK-NEXT: [[A:%.*]] = load i32, ptr @X, align 4 15; CHECK-NEXT: br label [[LOOP:%.*]] 16; CHECK: Loop: 17; CHECK-NEXT: call void @foo() 18; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOPTAIL:%.*]], label [[IFUNEQUAL:%.*]] 19; CHECK: IfUnEqual: 20; CHECK-NEXT: [[B1:%.*]] = sdiv i32 4, [[A]] 21; CHECK-NEXT: br label [[LOOPTAIL]] 22; CHECK: LoopTail: 23; CHECK-NEXT: [[B:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[B1]], [[IFUNEQUAL]] ] 24; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[OUT:%.*]] 25; CHECK: Out: 26; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[LOOPTAIL]] ] 27; CHECK-NEXT: [[C:%.*]] = sub i32 [[A]], [[B_LCSSA]] 28; CHECK-NEXT: ret i32 [[C]] 29; 30 %A = load i32, ptr @X ; <i32> [#uses=2] 31 br label %Loop 32Loop: ; preds = %LoopTail, %0 33 call void @foo( ) 34 br i1 %c, label %LoopTail, label %IfUnEqual 35 36IfUnEqual: ; preds = %Loop 37 %B1 = sdiv i32 4, %A ; <i32> [#uses=1] 38 br label %LoopTail 39 40LoopTail: ; preds = %IfUnEqual, %Loop 41 %B = phi i32 [ 0, %Loop ], [ %B1, %IfUnEqual ] ; <i32> [#uses=1] 42 br i1 %c, label %Loop, label %Out 43Out: ; preds = %LoopTail 44 %C = sub i32 %A, %B ; <i32> [#uses=1] 45 ret i32 %C 46} 47 48 49declare void @foo2(i32) nounwind 50 51 52;; It is ok and desirable to hoist this potentially trapping instruction. 53define i32 @test2(i1 %c) { 54; CHECK-LABEL: @test2( 55; CHECK-NEXT: [[A:%.*]] = load i32, ptr @X, align 4 56; CHECK-NEXT: [[B:%.*]] = sdiv i32 4, [[A]] 57; CHECK-NEXT: br label [[LOOP:%.*]] 58; CHECK: Loop: 59; CHECK-NEXT: br label [[LOOP2:%.*]] 60; CHECK: loop2: 61; CHECK-NEXT: call void @foo2(i32 [[B]]) 62; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[OUT:%.*]] 63; CHECK: Out: 64; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[LOOP2]] ] 65; CHECK-NEXT: [[C:%.*]] = sub i32 [[A]], [[B_LCSSA]] 66; CHECK-NEXT: ret i32 [[C]] 67; 68 %A = load i32, ptr @X 69 br label %Loop 70 71Loop: 72 ;; Should have hoisted this div! 73 %B = sdiv i32 4, %A 74 br label %loop2 75 76loop2: 77 call void @foo2( i32 %B ) 78 br i1 %c, label %Loop, label %Out 79 80Out: 81 %C = sub i32 %A, %B 82 ret i32 %C 83} 84 85 86; Don't bother constant folding the add, just hoist it. 87define i32 @test3(i1 %c) { 88; CHECK-LABEL: @test3( 89; CHECK-NEXT: [[A:%.*]] = load i32, ptr @X, align 4 90; CHECK-NEXT: [[B:%.*]] = add i32 4, 2 91; CHECK-NEXT: br label [[LOOP:%.*]] 92; CHECK: Loop: 93; CHECK-NEXT: call void @foo2(i32 [[B]]) 94; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[OUT:%.*]] 95; CHECK: Out: 96; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[LOOP]] ] 97; CHECK-NEXT: [[C:%.*]] = sub i32 [[A]], [[B_LCSSA]] 98; CHECK-NEXT: ret i32 [[C]] 99; 100 %A = load i32, ptr @X ; <i32> [#uses=2] 101 br label %Loop 102Loop: 103 %B = add i32 4, 2 ; <i32> [#uses=2] 104 call void @foo2( i32 %B ) 105 br i1 %c, label %Loop, label %Out 106Out: ; preds = %Loop 107 %C = sub i32 %A, %B ; <i32> [#uses=1] 108 ret i32 %C 109} 110 111define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp { 112; CHECK-LABEL: @test4( 113; CHECK-NEXT: entry: 114; CHECK-NEXT: br label [[FOR_BODY:%.*]] 115; CHECK: for.body: 116; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 117; CHECK-NEXT: [[N_01:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 118; CHECK-NEXT: call void @foo_may_call_exit(i32 0) 119; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[X:%.*]], [[Y:%.*]] 120; CHECK-NEXT: [[ADD]] = add nsw i32 [[N_01]], [[DIV]] 121; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 122; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 10000 123; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] 124; CHECK: for.end: 125; CHECK-NEXT: [[N_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ] 126; CHECK-NEXT: ret i32 [[N_0_LCSSA]] 127; 128entry: 129 br label %for.body 130 131for.body: ; preds = %entry, %for.body 132 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 133 %n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ] 134 call void @foo_may_call_exit(i32 0) 135 %div = sdiv i32 %x, %y 136 %add = add nsw i32 %n.01, %div 137 %inc = add nsw i32 %i.02, 1 138 %cmp = icmp slt i32 %inc, 10000 139 br i1 %cmp, label %for.body, label %for.end 140 141for.end: ; preds = %for.body 142 %n.0.lcssa = phi i32 [ %add, %for.body ] 143 ret i32 %n.0.lcssa 144} 145 146declare void @foo_may_call_exit(i32) 147 148; PR14854 149define { ptr, i32 } @test5(i32 %i, { ptr, i32 } %e) { 150; CHECK-LABEL: @test5( 151; CHECK-NEXT: entry: 152; CHECK-NEXT: [[OUT:%.*]] = extractvalue { ptr, i32 } [[E:%.*]], 1 153; CHECK-NEXT: br label [[TAILRECURSE:%.*]] 154; CHECK: tailrecurse: 155; CHECK-NEXT: [[I_TR:%.*]] = phi i32 [ [[I:%.*]], [[ENTRY:%.*]] ], [ [[CMP2:%.*]], [[THEN:%.*]] ] 156; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[OUT]], [[I_TR]] 157; CHECK-NEXT: br i1 [[CMP1]], label [[THEN]], label [[IFEND:%.*]] 158; CHECK: then: 159; CHECK-NEXT: call void @foo() 160; CHECK-NEXT: [[CMP2]] = add i32 [[I_TR]], 1 161; CHECK-NEXT: br label [[TAILRECURSE]] 162; CHECK: ifend: 163; CHECK-NEXT: [[D_LE:%.*]] = insertvalue { ptr, i32 } [[E]], ptr null, 0 164; CHECK-NEXT: ret { ptr, i32 } [[D_LE]] 165; 166entry: 167 br label %tailrecurse 168 169tailrecurse: ; preds = %then, %entry 170 %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ] 171 %out = extractvalue { ptr, i32 } %e, 1 172 %d = insertvalue { ptr, i32 } %e, ptr null, 0 173 %cmp1 = icmp sgt i32 %out, %i.tr 174 br i1 %cmp1, label %then, label %ifend 175 176then: ; preds = %tailrecurse 177 call void @foo() 178 %cmp2 = add i32 %i.tr, 1 179 br label %tailrecurse 180 181ifend: ; preds = %tailrecurse 182 ret { ptr, i32 } %d 183} 184 185define void @test6(float %f) #2 { 186; CHECK-LABEL: @test6( 187; CHECK-NEXT: entry: 188; CHECK-NEXT: [[NEG:%.*]] = fneg float [[F:%.*]] 189; CHECK-NEXT: br label [[FOR_BODY:%.*]] 190; CHECK: for.body: 191; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 192; CHECK-NEXT: call void @foo_may_call_exit(i32 0) 193; CHECK-NEXT: call void @use(float [[NEG]]) 194; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 195; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 10000 196; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] 197; CHECK: for.end: 198; CHECK-NEXT: ret void 199; 200entry: 201 br label %for.body 202 203for.body: ; preds = %for.body, %entry 204 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 205 call void @foo_may_call_exit(i32 0) 206 %neg = fneg float %f 207 call void @use(float %neg) 208 %inc = add nsw i32 %i, 1 209 %cmp = icmp slt i32 %inc, 10000 210 br i1 %cmp, label %for.body, label %for.end 211 212for.end: ; preds = %for.body 213 ret void 214} 215 216declare void @use(float) 217 218define i32 @hoist_bitreverse(i32 %0) { 219; CHECK-LABEL: @hoist_bitreverse( 220; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP0:%.*]]) 221; CHECK-NEXT: br label [[HEADER:%.*]] 222; CHECK: header: 223; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[TMP1:%.*]] ], [ [[TMP5:%.*]], [[LATCH:%.*]] ] 224; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, [[TMP1]] ], [ [[TMP6:%.*]], [[LATCH]] ] 225; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1024 226; CHECK-NEXT: br i1 [[TMP4]], label [[BODY:%.*]], label [[RETURN:%.*]] 227; CHECK: body: 228; CHECK-NEXT: [[TMP5]] = add i32 [[SUM]], [[TMP2]] 229; CHECK-NEXT: br label [[LATCH]] 230; CHECK: latch: 231; CHECK-NEXT: [[TMP6]] = add nsw i32 [[TMP3]], 1 232; CHECK-NEXT: br label [[HEADER]] 233; CHECK: return: 234; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[HEADER]] ] 235; CHECK-NEXT: ret i32 [[SUM_LCSSA]] 236; 237 br label %header 238 239header: 240 %sum = phi i32 [ 0, %1 ], [ %5, %latch ] 241 %2 = phi i32 [ 0, %1 ], [ %6, %latch ] 242 %3 = icmp slt i32 %2, 1024 243 br i1 %3, label %body, label %return 244 245body: 246 %4 = call i32 @llvm.bitreverse.i32(i32 %0) 247 %5 = add i32 %sum, %4 248 br label %latch 249 250latch: 251 %6 = add nsw i32 %2, 1 252 br label %header 253 254return: 255 ret i32 %sum 256} 257 258; Can neither sink nor hoist 259define i32 @test_volatile(i1 %c) { 260; CHECK-LABEL: @test_volatile( 261; CHECK-NEXT: br label [[LOOP:%.*]] 262; CHECK: Loop: 263; CHECK-NEXT: [[A:%.*]] = load volatile i32, ptr @X, align 4 264; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[OUT:%.*]] 265; CHECK: Out: 266; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i32 [ [[A]], [[LOOP]] ] 267; CHECK-NEXT: ret i32 [[A_LCSSA]] 268; 269 br label %Loop 270 271Loop: 272 %A = load volatile i32, ptr @X 273 br i1 %c, label %Loop, label %Out 274 275Out: 276 ret i32 %A 277} 278 279 280declare ptr @llvm.invariant.start.p0(i64, ptr nocapture) nounwind readonly 281declare void @llvm.invariant.end.p0(ptr, i64, ptr nocapture) nounwind 282declare void @escaping.invariant.start(ptr) nounwind 283; invariant.start dominates the load, and in this scope, the 284; load is invariant. So, we can hoist the `addrld` load out of the loop. 285define i32 @test_fence(ptr %addr, i32 %n, ptr %volatile) { 286; CHECK-LABEL: @test_fence( 287; CHECK-NEXT: entry: 288; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ADDR:%.*]], i64 8 289; CHECK-NEXT: store atomic i32 5, ptr [[GEP]] unordered, align 8 290; CHECK-NEXT: fence release 291; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[GEP]]) 292; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[GEP]] unordered, align 8 293; CHECK-NEXT: br label [[LOOP:%.*]] 294; CHECK: loop: 295; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] 296; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] 297; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8 298; CHECK-NEXT: fence acquire 299; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 300; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] 301; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] 302; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 303; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] 304; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] 305; CHECK: loopexit: 306; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] 307; CHECK-NEXT: ret i32 [[SUM_LCSSA]] 308; 309entry: 310 %gep = getelementptr inbounds i8, ptr %addr, i64 8 311 store atomic i32 5, ptr %gep unordered, align 8 312 fence release 313 %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %gep) 314 br label %loop 315 316loop: 317 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 318 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 319 %volload = load atomic i8, ptr %volatile unordered, align 8 320 fence acquire 321 %volchk = icmp eq i8 %volload, 0 322 %addrld = load atomic i32, ptr %gep unordered, align 8 323 %sel = select i1 %volchk, i32 0, i32 %addrld 324 %sum.next = add i32 %sel, %sum 325 %indvar.next = add i32 %indvar, 1 326 %cond = icmp slt i32 %indvar.next, %n 327 br i1 %cond, label %loop, label %loopexit 328 329loopexit: 330 ret i32 %sum 331} 332 333 334 335; Same as test above, but the load is no longer invariant (presence of 336; invariant.end). We cannot hoist the addrld out of loop. 337define i32 @test_fence1(ptr %addr, i32 %n, ptr %volatile) { 338; CHECK-LABEL: @test_fence1( 339; CHECK-NEXT: entry: 340; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ADDR:%.*]], i64 8 341; CHECK-NEXT: store atomic i32 5, ptr [[GEP]] unordered, align 8 342; CHECK-NEXT: fence release 343; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[GEP]]) 344; CHECK-NEXT: call void @llvm.invariant.end.p0(ptr [[INVST]], i64 4, ptr [[GEP]]) 345; CHECK-NEXT: br label [[LOOP:%.*]] 346; CHECK: loop: 347; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] 348; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] 349; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8 350; CHECK-NEXT: fence acquire 351; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 352; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[GEP]] unordered, align 8 353; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] 354; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] 355; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 356; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] 357; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] 358; CHECK: loopexit: 359; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] 360; CHECK-NEXT: ret i32 [[SUM_LCSSA]] 361; 362entry: 363 %gep = getelementptr inbounds i8, ptr %addr, i64 8 364 store atomic i32 5, ptr %gep unordered, align 8 365 fence release 366 %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %gep) 367 call void @llvm.invariant.end.p0(ptr %invst, i64 4, ptr %gep) 368 br label %loop 369 370loop: 371 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 372 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 373 %volload = load atomic i8, ptr %volatile unordered, align 8 374 fence acquire 375 %volchk = icmp eq i8 %volload, 0 376 %addrld = load atomic i32, ptr %gep unordered, align 8 377 %sel = select i1 %volchk, i32 0, i32 %addrld 378 %sum.next = add i32 %sel, %sum 379 %indvar.next = add i32 %indvar, 1 380 %cond = icmp slt i32 %indvar.next, %n 381 br i1 %cond, label %loop, label %loopexit 382 383loopexit: 384 ret i32 %sum 385} 386 387; same as test above, but instead of invariant.end, we have the result of 388; invariant.start escaping through a call. We cannot hoist the load. 389define i32 @test_fence2(ptr %addr, i32 %n, ptr %volatile) { 390; CHECK-LABEL: @test_fence2( 391; CHECK-NEXT: entry: 392; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ADDR:%.*]], i64 8 393; CHECK-NEXT: store atomic i32 5, ptr [[GEP]] unordered, align 8 394; CHECK-NEXT: fence release 395; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[GEP]]) 396; CHECK-NEXT: call void @escaping.invariant.start(ptr [[INVST]]) 397; CHECK-NEXT: br label [[LOOP:%.*]] 398; CHECK: loop: 399; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] 400; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] 401; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8 402; CHECK-NEXT: fence acquire 403; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 404; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[GEP]] unordered, align 8 405; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] 406; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] 407; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 408; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] 409; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] 410; CHECK: loopexit: 411; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] 412; CHECK-NEXT: ret i32 [[SUM_LCSSA]] 413; 414entry: 415 %gep = getelementptr inbounds i8, ptr %addr, i64 8 416 store atomic i32 5, ptr %gep unordered, align 8 417 fence release 418 %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %gep) 419 call void @escaping.invariant.start(ptr %invst) 420 br label %loop 421 422loop: 423 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 424 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 425 %volload = load atomic i8, ptr %volatile unordered, align 8 426 fence acquire 427 %volchk = icmp eq i8 %volload, 0 428 %addrld = load atomic i32, ptr %gep unordered, align 8 429 %sel = select i1 %volchk, i32 0, i32 %addrld 430 %sum.next = add i32 %sel, %sum 431 %indvar.next = add i32 %indvar, 1 432 %cond = icmp slt i32 %indvar.next, %n 433 br i1 %cond, label %loop, label %loopexit 434 435loopexit: 436 ret i32 %sum 437} 438 439; Consider the loadoperand addr.i bitcasted before being passed to 440; invariant.start 441define i32 @test_fence3(ptr %addr, i32 %n, ptr %volatile) { 442; CHECK-LABEL: @test_fence3( 443; CHECK-NEXT: entry: 444; CHECK-NEXT: [[ADDR_I:%.*]] = getelementptr inbounds i32, ptr [[ADDR:%.*]], i64 8 445; CHECK-NEXT: store atomic i32 5, ptr [[ADDR_I]] unordered, align 8 446; CHECK-NEXT: fence release 447; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[ADDR_I]]) 448; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[ADDR_I]] unordered, align 8 449; CHECK-NEXT: br label [[LOOP:%.*]] 450; CHECK: loop: 451; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] 452; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] 453; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8 454; CHECK-NEXT: fence acquire 455; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 456; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] 457; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] 458; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 459; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] 460; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] 461; CHECK: loopexit: 462; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] 463; CHECK-NEXT: ret i32 [[SUM_LCSSA]] 464; 465entry: 466 %addr.i = getelementptr inbounds i32, ptr %addr, i64 8 467 store atomic i32 5, ptr %addr.i unordered, align 8 468 fence release 469 %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %addr.i) 470 br label %loop 471 472loop: 473 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 474 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 475 %volload = load atomic i8, ptr %volatile unordered, align 8 476 fence acquire 477 %volchk = icmp eq i8 %volload, 0 478 %addrld = load atomic i32, ptr %addr.i unordered, align 8 479 %sel = select i1 %volchk, i32 0, i32 %addrld 480 %sum.next = add i32 %sel, %sum 481 %indvar.next = add i32 %indvar, 1 482 %cond = icmp slt i32 %indvar.next, %n 483 br i1 %cond, label %loop, label %loopexit 484 485loopexit: 486 ret i32 %sum 487} 488 489; We should not hoist the addrld out of the loop. 490define i32 @test_fence4(ptr %addr, i32 %n, ptr %volatile) { 491; CHECK-LABEL: @test_fence4( 492; CHECK-NEXT: entry: 493; CHECK-NEXT: [[ADDR_I:%.*]] = getelementptr inbounds i32, ptr [[ADDR:%.*]], i64 8 494; CHECK-NEXT: br label [[LOOP:%.*]] 495; CHECK: loop: 496; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] 497; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] 498; CHECK-NEXT: store atomic i32 5, ptr [[ADDR_I]] unordered, align 8 499; CHECK-NEXT: fence release 500; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[ADDR_I]]) 501; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8 502; CHECK-NEXT: fence acquire 503; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 504; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[ADDR_I]] unordered, align 8 505; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] 506; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] 507; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 508; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] 509; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] 510; CHECK: loopexit: 511; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] 512; CHECK-NEXT: ret i32 [[SUM_LCSSA]] 513; 514entry: 515 %addr.i = getelementptr inbounds i32, ptr %addr, i64 8 516 br label %loop 517 518loop: 519 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 520 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 521 store atomic i32 5, ptr %addr.i unordered, align 8 522 fence release 523 %invst = call ptr @llvm.invariant.start.p0(i64 4, ptr %addr.i) 524 %volload = load atomic i8, ptr %volatile unordered, align 8 525 fence acquire 526 %volchk = icmp eq i8 %volload, 0 527 %addrld = load atomic i32, ptr %addr.i unordered, align 8 528 %sel = select i1 %volchk, i32 0, i32 %addrld 529 %sum.next = add i32 %sel, %sum 530 %indvar.next = add i32 %indvar, 1 531 %cond = icmp slt i32 %indvar.next, %n 532 br i1 %cond, label %loop, label %loopexit 533 534loopexit: 535 ret i32 %sum 536} 537 538; We can't hoist the invariant load out of the loop because 539; the marker is given a variable size (-1). 540define i32 @test_fence5(ptr %addr, i32 %n, ptr %volatile) { 541; CHECK-LABEL: @test_fence5( 542; CHECK-NEXT: entry: 543; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[ADDR:%.*]], i64 8 544; CHECK-NEXT: store atomic i32 5, ptr [[GEP]] unordered, align 8 545; CHECK-NEXT: fence release 546; CHECK-NEXT: [[INVST:%.*]] = call ptr @llvm.invariant.start.p0(i64 -1, ptr [[GEP]]) 547; CHECK-NEXT: br label [[LOOP:%.*]] 548; CHECK: loop: 549; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] 550; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] 551; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, ptr [[VOLATILE:%.*]] unordered, align 8 552; CHECK-NEXT: fence acquire 553; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 554; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, ptr [[GEP]] unordered, align 8 555; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] 556; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] 557; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 558; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] 559; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] 560; CHECK: loopexit: 561; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] 562; CHECK-NEXT: ret i32 [[SUM_LCSSA]] 563; 564entry: 565 %gep = getelementptr inbounds i8, ptr %addr, i64 8 566 store atomic i32 5, ptr %gep unordered, align 8 567 fence release 568 %invst = call ptr @llvm.invariant.start.p0(i64 -1, ptr %gep) 569 br label %loop 570 571loop: 572 %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] 573 %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] 574 %volload = load atomic i8, ptr %volatile unordered, align 8 575 fence acquire 576 %volchk = icmp eq i8 %volload, 0 577 %addrld = load atomic i32, ptr %gep unordered, align 8 578 %sel = select i1 %volchk, i32 0, i32 %addrld 579 %sum.next = add i32 %sel, %sum 580 %indvar.next = add i32 %indvar, 1 581 %cond = icmp slt i32 %indvar.next, %n 582 br i1 %cond, label %loop, label %loopexit 583 584loopexit: 585 ret i32 %sum 586} 587 588declare void @g(i1) 589 590@a = external global i8 591 592; FIXME: Support hoisting invariant loads of globals. 593define void @test_fence6() { 594; CHECK-LABEL: @test_fence6( 595; CHECK-NEXT: entry: 596; CHECK-NEXT: [[I:%.*]] = call ptr @llvm.invariant.start.p0(i64 1, ptr @a) 597; CHECK-NEXT: br label [[F:%.*]] 598; CHECK: f: 599; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr @a, align 1 600; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[TMP0]], 0 601; CHECK-NEXT: [[T:%.*]] = icmp eq i8 [[TMP1]], 0 602; CHECK-NEXT: tail call void @g(i1 [[T]]) 603; CHECK-NEXT: br label [[F]] 604; 605entry: 606 %i = call ptr @llvm.invariant.start.p0(i64 1, ptr @a) 607 br label %f 608 609f: 610 %0 = load i8, ptr @a 611 %1 = and i8 %0, 0 612 %t = icmp eq i8 %1, 0 613 tail call void @g(i1 %t) 614 br label %f 615} 616