1 //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass performs various transformations related to eliminating memcpy 10 // calls, or transforming sets of stores into memset's. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" 15 #include "llvm/ADT/DenseSet.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/ScopeExit.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/ADT/iterator_range.h" 21 #include "llvm/Analysis/AliasAnalysis.h" 22 #include "llvm/Analysis/AssumptionCache.h" 23 #include "llvm/Analysis/CFG.h" 24 #include "llvm/Analysis/CaptureTracking.h" 25 #include "llvm/Analysis/GlobalsModRef.h" 26 #include "llvm/Analysis/InstructionSimplify.h" 27 #include "llvm/Analysis/Loads.h" 28 #include "llvm/Analysis/MemoryLocation.h" 29 #include "llvm/Analysis/MemorySSA.h" 30 #include "llvm/Analysis/MemorySSAUpdater.h" 31 #include "llvm/Analysis/PostDominators.h" 32 #include "llvm/Analysis/TargetLibraryInfo.h" 33 #include "llvm/Analysis/ValueTracking.h" 34 #include "llvm/IR/BasicBlock.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/DataLayout.h" 37 #include "llvm/IR/DerivedTypes.h" 38 #include "llvm/IR/Dominators.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/IR/GlobalVariable.h" 41 #include "llvm/IR/IRBuilder.h" 42 #include "llvm/IR/InstrTypes.h" 43 #include "llvm/IR/Instruction.h" 44 #include "llvm/IR/Instructions.h" 45 #include "llvm/IR/IntrinsicInst.h" 46 #include "llvm/IR/Intrinsics.h" 47 #include "llvm/IR/LLVMContext.h" 48 #include "llvm/IR/Module.h" 49 #include "llvm/IR/PassManager.h" 50 #include "llvm/IR/Type.h" 51 #include "llvm/IR/User.h" 52 #include "llvm/IR/Value.h" 53 #include "llvm/Support/Casting.h" 54 #include "llvm/Support/Debug.h" 55 #include "llvm/Support/raw_ostream.h" 56 #include "llvm/Transforms/Utils/Local.h" 57 #include <algorithm> 58 #include <cassert> 59 #include <cstdint> 60 #include <optional> 61 62 using namespace llvm; 63 64 #define DEBUG_TYPE "memcpyopt" 65 66 static cl::opt<bool> EnableMemCpyOptWithoutLibcalls( 67 "enable-memcpyopt-without-libcalls", cl::Hidden, 68 cl::desc("Enable memcpyopt even when libcalls are disabled")); 69 70 STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); 71 STATISTIC(NumMemMoveInstr, "Number of memmove instructions deleted"); 72 STATISTIC(NumMemSetInfer, "Number of memsets inferred"); 73 STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); 74 STATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); 75 STATISTIC(NumCallSlot, "Number of call slot optimizations performed"); 76 STATISTIC(NumStackMove, "Number of stack-move optimizations performed"); 77 78 namespace { 79 80 /// Represents a range of memset'd bytes with the ByteVal value. 81 /// This allows us to analyze stores like: 82 /// store 0 -> P+1 83 /// store 0 -> P+0 84 /// store 0 -> P+3 85 /// store 0 -> P+2 86 /// which sometimes happens with stores to arrays of structs etc. When we see 87 /// the first store, we make a range [1, 2). The second store extends the range 88 /// to [0, 2). The third makes a new range [2, 3). The fourth store joins the 89 /// two ranges into [0, 3) which is memset'able. 90 struct MemsetRange { 91 // Start/End - A semi range that describes the span that this range covers. 92 // The range is closed at the start and open at the end: [Start, End). 93 int64_t Start, End; 94 95 /// StartPtr - The getelementptr instruction that points to the start of the 96 /// range. 97 Value *StartPtr; 98 99 /// Alignment - The known alignment of the first store. 100 MaybeAlign Alignment; 101 102 /// TheStores - The actual stores that make up this range. 103 SmallVector<Instruction *, 16> TheStores; 104 105 bool isProfitableToUseMemset(const DataLayout &DL) const; 106 }; 107 108 } // end anonymous namespace 109 110 bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const { 111 // If we found more than 4 stores to merge or 16 bytes, use memset. 112 if (TheStores.size() >= 4 || End - Start >= 16) 113 return true; 114 115 // If there is nothing to merge, don't do anything. 116 if (TheStores.size() < 2) 117 return false; 118 119 // If any of the stores are a memset, then it is always good to extend the 120 // memset. 121 for (Instruction *SI : TheStores) 122 if (!isa<StoreInst>(SI)) 123 return true; 124 125 // Assume that the code generator is capable of merging pairs of stores 126 // together if it wants to. 127 if (TheStores.size() == 2) 128 return false; 129 130 // If we have fewer than 8 stores, it can still be worthwhile to do this. 131 // For example, merging 4 i8 stores into an i32 store is useful almost always. 132 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the 133 // memset will be split into 2 32-bit stores anyway) and doing so can 134 // pessimize the llvm optimizer. 135 // 136 // Since we don't have perfect knowledge here, make some assumptions: assume 137 // the maximum GPR width is the same size as the largest legal integer 138 // size. If so, check to see whether we will end up actually reducing the 139 // number of stores used. 140 unsigned Bytes = unsigned(End - Start); 141 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8; 142 if (MaxIntSize == 0) 143 MaxIntSize = 1; 144 unsigned NumPointerStores = Bytes / MaxIntSize; 145 146 // Assume the remaining bytes if any are done a byte at a time. 147 unsigned NumByteStores = Bytes % MaxIntSize; 148 149 // If we will reduce the # stores (according to this heuristic), do the 150 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 151 // etc. 152 return TheStores.size() > NumPointerStores + NumByteStores; 153 } 154 155 namespace { 156 157 class MemsetRanges { 158 using range_iterator = SmallVectorImpl<MemsetRange>::iterator; 159 160 /// A sorted list of the memset ranges. 161 SmallVector<MemsetRange, 8> Ranges; 162 163 const DataLayout &DL; 164 165 public: 166 MemsetRanges(const DataLayout &DL) : DL(DL) {} 167 168 using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator; 169 170 const_iterator begin() const { return Ranges.begin(); } 171 const_iterator end() const { return Ranges.end(); } 172 bool empty() const { return Ranges.empty(); } 173 174 void addInst(int64_t OffsetFromFirst, Instruction *Inst) { 175 if (auto *SI = dyn_cast<StoreInst>(Inst)) 176 addStore(OffsetFromFirst, SI); 177 else 178 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst)); 179 } 180 181 void addStore(int64_t OffsetFromFirst, StoreInst *SI) { 182 TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType()); 183 assert(!StoreSize.isScalable() && "Can't track scalable-typed stores"); 184 addRange(OffsetFromFirst, StoreSize.getFixedValue(), 185 SI->getPointerOperand(), SI->getAlign(), SI); 186 } 187 188 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { 189 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 190 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI); 191 } 192 193 void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment, 194 Instruction *Inst); 195 }; 196 197 } // end anonymous namespace 198 199 /// Add a new store to the MemsetRanges data structure. This adds a 200 /// new range for the specified store at the specified offset, merging into 201 /// existing ranges as appropriate. 202 void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, 203 MaybeAlign Alignment, Instruction *Inst) { 204 int64_t End = Start + Size; 205 206 range_iterator I = partition_point( 207 Ranges, [=](const MemsetRange &O) { return O.End < Start; }); 208 209 // We now know that I == E, in which case we didn't find anything to merge 210 // with, or that Start <= I->End. If End < I->Start or I == E, then we need 211 // to insert a new range. Handle this now. 212 if (I == Ranges.end() || End < I->Start) { 213 MemsetRange &R = *Ranges.insert(I, MemsetRange()); 214 R.Start = Start; 215 R.End = End; 216 R.StartPtr = Ptr; 217 R.Alignment = Alignment; 218 R.TheStores.push_back(Inst); 219 return; 220 } 221 222 // This store overlaps with I, add it. 223 I->TheStores.push_back(Inst); 224 225 // At this point, we may have an interval that completely contains our store. 226 // If so, just add it to the interval and return. 227 if (I->Start <= Start && I->End >= End) 228 return; 229 230 // Now we know that Start <= I->End and End >= I->Start so the range overlaps 231 // but is not entirely contained within the range. 232 233 // See if the range extends the start of the range. In this case, it couldn't 234 // possibly cause it to join the prior range, because otherwise we would have 235 // stopped on *it*. 236 if (Start < I->Start) { 237 I->Start = Start; 238 I->StartPtr = Ptr; 239 I->Alignment = Alignment; 240 } 241 242 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint 243 // is in or right at the end of I), and that End >= I->Start. Extend I out to 244 // End. 245 if (End > I->End) { 246 I->End = End; 247 range_iterator NextI = I; 248 while (++NextI != Ranges.end() && End >= NextI->Start) { 249 // Merge the range in. 250 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end()); 251 if (NextI->End > I->End) 252 I->End = NextI->End; 253 Ranges.erase(NextI); 254 NextI = I; 255 } 256 } 257 } 258 259 //===----------------------------------------------------------------------===// 260 // MemCpyOptLegacyPass Pass 261 //===----------------------------------------------------------------------===// 262 263 // Check that V is either not accessible by the caller, or unwinding cannot 264 // occur between Start and End. 265 static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, 266 Instruction *End) { 267 assert(Start->getParent() == End->getParent() && "Must be in same block"); 268 // Function can't unwind, so it also can't be visible through unwinding. 269 if (Start->getFunction()->doesNotThrow()) 270 return false; 271 272 // Object is not visible on unwind. 273 // TODO: Support RequiresNoCaptureBeforeUnwind case. 274 bool RequiresNoCaptureBeforeUnwind; 275 if (isNotVisibleOnUnwind(getUnderlyingObject(V), 276 RequiresNoCaptureBeforeUnwind) && 277 !RequiresNoCaptureBeforeUnwind) 278 return false; 279 280 // Check whether there are any unwinding instructions in the range. 281 return any_of(make_range(Start->getIterator(), End->getIterator()), 282 [](const Instruction &I) { return I.mayThrow(); }); 283 } 284 285 void MemCpyOptPass::eraseInstruction(Instruction *I) { 286 MSSAU->removeMemoryAccess(I); 287 EEA->removeInstruction(I); 288 I->eraseFromParent(); 289 } 290 291 // Check for mod or ref of Loc between Start and End, excluding both boundaries. 292 // Start and End must be in the same block. 293 // If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start 294 // intrinsic and store it inside SkippedLifetimeStart. 295 static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, 296 const MemoryUseOrDef *Start, 297 const MemoryUseOrDef *End, 298 Instruction **SkippedLifetimeStart = nullptr) { 299 assert(Start->getBlock() == End->getBlock() && "Only local supported"); 300 for (const MemoryAccess &MA : 301 make_range(++Start->getIterator(), End->getIterator())) { 302 Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst(); 303 if (isModOrRefSet(AA.getModRefInfo(I, Loc))) { 304 auto *II = dyn_cast<IntrinsicInst>(I); 305 if (II && II->getIntrinsicID() == Intrinsic::lifetime_start && 306 SkippedLifetimeStart && !*SkippedLifetimeStart) { 307 *SkippedLifetimeStart = I; 308 continue; 309 } 310 311 return true; 312 } 313 } 314 return false; 315 } 316 317 // Check for mod of Loc between Start and End, excluding both boundaries. 318 // Start and End can be in different blocks. 319 static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, 320 MemoryLocation Loc, const MemoryUseOrDef *Start, 321 const MemoryUseOrDef *End) { 322 if (isa<MemoryUse>(End)) { 323 // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes. 324 // Manually check read accesses between Start and End, if they are in the 325 // same block, for clobbers. Otherwise assume Loc is clobbered. 326 return Start->getBlock() != End->getBlock() || 327 any_of( 328 make_range(std::next(Start->getIterator()), End->getIterator()), 329 [&AA, Loc](const MemoryAccess &Acc) { 330 if (isa<MemoryUse>(&Acc)) 331 return false; 332 Instruction *AccInst = 333 cast<MemoryUseOrDef>(&Acc)->getMemoryInst(); 334 return isModSet(AA.getModRefInfo(AccInst, Loc)); 335 }); 336 } 337 338 // TODO: Only walk until we hit Start. 339 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( 340 End->getDefiningAccess(), Loc, AA); 341 return !MSSA->dominates(Clobber, Start); 342 } 343 344 /// When scanning forward over instructions, we look for some other patterns to 345 /// fold away. In particular, this looks for stores to neighboring locations of 346 /// memory. If it sees enough consecutive ones, it attempts to merge them 347 /// together into a memcpy/memset. 348 Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, 349 Value *StartPtr, 350 Value *ByteVal) { 351 const DataLayout &DL = StartInst->getDataLayout(); 352 353 // We can't track scalable types 354 if (auto *SI = dyn_cast<StoreInst>(StartInst)) 355 if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable()) 356 return nullptr; 357 358 // Okay, so we now have a single store that can be splatable. Scan to find 359 // all subsequent stores of the same value to offset from the same pointer. 360 // Join these together into ranges, so we can decide whether contiguous blocks 361 // are stored. 362 MemsetRanges Ranges(DL); 363 364 BasicBlock::iterator BI(StartInst); 365 366 // Keeps track of the last memory use or def before the insertion point for 367 // the new memset. The new MemoryDef for the inserted memsets will be inserted 368 // after MemInsertPoint. 369 MemoryUseOrDef *MemInsertPoint = nullptr; 370 for (++BI; !BI->isTerminator(); ++BI) { 371 auto *CurrentAcc = 372 cast_or_null<MemoryUseOrDef>(MSSA->getMemoryAccess(&*BI)); 373 if (CurrentAcc) 374 MemInsertPoint = CurrentAcc; 375 376 // Calls that only access inaccessible memory do not block merging 377 // accessible stores. 378 if (auto *CB = dyn_cast<CallBase>(BI)) { 379 if (CB->onlyAccessesInaccessibleMemory()) 380 continue; 381 } 382 383 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { 384 // If the instruction is readnone, ignore it, otherwise bail out. We 385 // don't even allow readonly here because we don't want something like: 386 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). 387 if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) 388 break; 389 continue; 390 } 391 392 if (auto *NextStore = dyn_cast<StoreInst>(BI)) { 393 // If this is a store, see if we can merge it in. 394 if (!NextStore->isSimple()) 395 break; 396 397 Value *StoredVal = NextStore->getValueOperand(); 398 399 // Don't convert stores of non-integral pointer types to memsets (which 400 // stores integers). 401 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) 402 break; 403 404 // We can't track ranges involving scalable types. 405 if (DL.getTypeStoreSize(StoredVal->getType()).isScalable()) 406 break; 407 408 // Check to see if this stored value is of the same byte-splattable value. 409 Value *StoredByte = isBytewiseValue(StoredVal, DL); 410 if (isa<UndefValue>(ByteVal) && StoredByte) 411 ByteVal = StoredByte; 412 if (ByteVal != StoredByte) 413 break; 414 415 // Check to see if this store is to a constant offset from the start ptr. 416 std::optional<int64_t> Offset = 417 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL); 418 if (!Offset) 419 break; 420 421 Ranges.addStore(*Offset, NextStore); 422 } else { 423 auto *MSI = cast<MemSetInst>(BI); 424 425 if (MSI->isVolatile() || ByteVal != MSI->getValue() || 426 !isa<ConstantInt>(MSI->getLength())) 427 break; 428 429 // Check to see if this store is to a constant offset from the start ptr. 430 std::optional<int64_t> Offset = 431 MSI->getDest()->getPointerOffsetFrom(StartPtr, DL); 432 if (!Offset) 433 break; 434 435 Ranges.addMemSet(*Offset, MSI); 436 } 437 } 438 439 // If we have no ranges, then we just had a single store with nothing that 440 // could be merged in. This is a very common case of course. 441 if (Ranges.empty()) 442 return nullptr; 443 444 // If we had at least one store that could be merged in, add the starting 445 // store as well. We try to avoid this unless there is at least something 446 // interesting as a small compile-time optimization. 447 Ranges.addInst(0, StartInst); 448 449 // If we create any memsets, we put it right before the first instruction that 450 // isn't part of the memset block. This ensure that the memset is dominated 451 // by any addressing instruction needed by the start of the block. 452 IRBuilder<> Builder(&*BI); 453 454 // Now that we have full information about ranges, loop over the ranges and 455 // emit memset's for anything big enough to be worthwhile. 456 Instruction *AMemSet = nullptr; 457 for (const MemsetRange &Range : Ranges) { 458 if (Range.TheStores.size() == 1) 459 continue; 460 461 // If it is profitable to lower this range to memset, do so now. 462 if (!Range.isProfitableToUseMemset(DL)) 463 continue; 464 465 // Otherwise, we do want to transform this! Create a new memset. 466 // Get the starting pointer of the block. 467 StartPtr = Range.StartPtr; 468 469 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start, 470 Range.Alignment); 471 AMemSet->mergeDIAssignID(Range.TheStores); 472 473 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI 474 : Range.TheStores) dbgs() 475 << *SI << '\n'; 476 dbgs() << "With: " << *AMemSet << '\n'); 477 if (!Range.TheStores.empty()) 478 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); 479 480 auto *NewDef = cast<MemoryDef>( 481 MemInsertPoint->getMemoryInst() == &*BI 482 ? MSSAU->createMemoryAccessBefore(AMemSet, nullptr, MemInsertPoint) 483 : MSSAU->createMemoryAccessAfter(AMemSet, nullptr, MemInsertPoint)); 484 MSSAU->insertDef(NewDef, /*RenameUses=*/true); 485 MemInsertPoint = NewDef; 486 487 // Zap all the stores. 488 for (Instruction *SI : Range.TheStores) 489 eraseInstruction(SI); 490 491 ++NumMemSetInfer; 492 } 493 494 return AMemSet; 495 } 496 497 // This method try to lift a store instruction before position P. 498 // It will lift the store and its argument + that anything that 499 // may alias with these. 500 // The method returns true if it was successful. 501 bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) { 502 // If the store alias this position, early bail out. 503 MemoryLocation StoreLoc = MemoryLocation::get(SI); 504 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc))) 505 return false; 506 507 // Keep track of the arguments of all instruction we plan to lift 508 // so we can make sure to lift them as well if appropriate. 509 DenseSet<Instruction *> Args; 510 auto AddArg = [&](Value *Arg) { 511 auto *I = dyn_cast<Instruction>(Arg); 512 if (I && I->getParent() == SI->getParent()) { 513 // Cannot hoist user of P above P 514 if (I == P) 515 return false; 516 Args.insert(I); 517 } 518 return true; 519 }; 520 if (!AddArg(SI->getPointerOperand())) 521 return false; 522 523 // Instruction to lift before P. 524 SmallVector<Instruction *, 8> ToLift{SI}; 525 526 // Memory locations of lifted instructions. 527 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc}; 528 529 // Lifted calls. 530 SmallVector<const CallBase *, 8> Calls; 531 532 const MemoryLocation LoadLoc = MemoryLocation::get(LI); 533 534 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) { 535 auto *C = &*I; 536 537 // Make sure hoisting does not perform a store that was not guaranteed to 538 // happen. 539 if (!isGuaranteedToTransferExecutionToSuccessor(C)) 540 return false; 541 542 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt)); 543 544 bool NeedLift = false; 545 if (Args.erase(C)) 546 NeedLift = true; 547 else if (MayAlias) { 548 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) { 549 return isModOrRefSet(AA->getModRefInfo(C, ML)); 550 }); 551 552 if (!NeedLift) 553 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) { 554 return isModOrRefSet(AA->getModRefInfo(C, Call)); 555 }); 556 } 557 558 if (!NeedLift) 559 continue; 560 561 if (MayAlias) { 562 // Since LI is implicitly moved downwards past the lifted instructions, 563 // none of them may modify its source. 564 if (isModSet(AA->getModRefInfo(C, LoadLoc))) 565 return false; 566 else if (const auto *Call = dyn_cast<CallBase>(C)) { 567 // If we can't lift this before P, it's game over. 568 if (isModOrRefSet(AA->getModRefInfo(P, Call))) 569 return false; 570 571 Calls.push_back(Call); 572 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) { 573 // If we can't lift this before P, it's game over. 574 auto ML = MemoryLocation::get(C); 575 if (isModOrRefSet(AA->getModRefInfo(P, ML))) 576 return false; 577 578 MemLocs.push_back(ML); 579 } else 580 // We don't know how to lift this instruction. 581 return false; 582 } 583 584 ToLift.push_back(C); 585 for (Value *Op : C->operands()) 586 if (!AddArg(Op)) 587 return false; 588 } 589 590 // Find MSSA insertion point. Normally P will always have a corresponding 591 // memory access before which we can insert. However, with non-standard AA 592 // pipelines, there may be a mismatch between AA and MSSA, in which case we 593 // will scan for a memory access before P. In either case, we know for sure 594 // that at least the load will have a memory access. 595 // TODO: Simplify this once P will be determined by MSSA, in which case the 596 // discrepancy can no longer occur. 597 MemoryUseOrDef *MemInsertPoint = nullptr; 598 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(P)) { 599 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator()); 600 } else { 601 const Instruction *ConstP = P; 602 for (const Instruction &I : make_range(++ConstP->getReverseIterator(), 603 ++LI->getReverseIterator())) { 604 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(&I)) { 605 MemInsertPoint = MA; 606 break; 607 } 608 } 609 } 610 611 // We made it, we need to lift. 612 for (auto *I : llvm::reverse(ToLift)) { 613 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n"); 614 I->moveBefore(P->getIterator()); 615 assert(MemInsertPoint && "Must have found insert point"); 616 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(I)) { 617 MSSAU->moveAfter(MA, MemInsertPoint); 618 MemInsertPoint = MA; 619 } 620 } 621 622 return true; 623 } 624 625 bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI, 626 const DataLayout &DL, 627 BasicBlock::iterator &BBI) { 628 if (!LI->isSimple() || !LI->hasOneUse() || LI->getParent() != SI->getParent()) 629 return false; 630 631 BatchAAResults BAA(*AA, EEA); 632 auto *T = LI->getType(); 633 // Don't introduce calls to memcpy/memmove intrinsics out of thin air if 634 // the corresponding libcalls are not available. 635 // TODO: We should really distinguish between libcall availability and 636 // our ability to introduce intrinsics. 637 if (T->isAggregateType() && 638 (EnableMemCpyOptWithoutLibcalls || 639 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) { 640 MemoryLocation LoadLoc = MemoryLocation::get(LI); 641 MemoryUseOrDef *LoadAccess = MSSA->getMemoryAccess(LI), 642 *StoreAccess = MSSA->getMemoryAccess(SI); 643 644 // We use MSSA to check if an instruction may store to the memory we load 645 // from in between the load and the store. If such an instruction is found, 646 // we try to promote there instead of at the store position. 647 auto *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( 648 StoreAccess->getDefiningAccess(), LoadLoc, BAA); 649 Instruction *P = MSSA->dominates(LoadAccess, Clobber) 650 ? cast<MemoryUseOrDef>(Clobber)->getMemoryInst() 651 : SI; 652 653 // If we found an instruction that may write to the loaded memory, 654 // we can try to promote at this position instead of the store 655 // position if nothing aliases the store memory after this and the store 656 // destination is not in the range. 657 if (P == SI || moveUp(SI, P, LI)) { 658 // If we load from memory that may alias the memory we store to, 659 // memmove must be used to preserve semantic. If not, memcpy can 660 // be used. Also, if we load from constant memory, memcpy can be used 661 // as the constant memory won't be modified. 662 bool UseMemMove = false; 663 if (isModSet(AA->getModRefInfo(SI, LoadLoc))) 664 UseMemMove = true; 665 666 IRBuilder<> Builder(P); 667 Value *Size = 668 Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T)); 669 Instruction *M; 670 if (UseMemMove) 671 M = Builder.CreateMemMove(SI->getPointerOperand(), SI->getAlign(), 672 LI->getPointerOperand(), LI->getAlign(), 673 Size); 674 else 675 M = Builder.CreateMemCpy(SI->getPointerOperand(), SI->getAlign(), 676 LI->getPointerOperand(), LI->getAlign(), Size); 677 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID); 678 679 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M 680 << "\n"); 681 682 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI)); 683 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef); 684 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); 685 686 eraseInstruction(SI); 687 eraseInstruction(LI); 688 ++NumMemCpyInstr; 689 690 // Make sure we do not invalidate the iterator. 691 BBI = M->getIterator(); 692 return true; 693 } 694 } 695 696 // Detect cases where we're performing call slot forwarding, but 697 // happen to be using a load-store pair to implement it, rather than 698 // a memcpy. 699 auto GetCall = [&]() -> CallInst * { 700 // We defer this expensive clobber walk until the cheap checks 701 // have been done on the source inside performCallSlotOptzn. 702 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>( 703 MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA))) 704 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst()); 705 return nullptr; 706 }; 707 708 bool Changed = performCallSlotOptzn( 709 LI, SI, SI->getPointerOperand()->stripPointerCasts(), 710 LI->getPointerOperand()->stripPointerCasts(), 711 DL.getTypeStoreSize(SI->getOperand(0)->getType()), 712 std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall); 713 if (Changed) { 714 eraseInstruction(SI); 715 eraseInstruction(LI); 716 ++NumMemCpyInstr; 717 return true; 718 } 719 720 // If this is a load-store pair from a stack slot to a stack slot, we 721 // might be able to perform the stack-move optimization just as we do for 722 // memcpys from an alloca to an alloca. 723 if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) { 724 if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) { 725 if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca, 726 DL.getTypeStoreSize(T), BAA)) { 727 // Avoid invalidating the iterator. 728 BBI = SI->getNextNonDebugInstruction()->getIterator(); 729 eraseInstruction(SI); 730 eraseInstruction(LI); 731 ++NumMemCpyInstr; 732 return true; 733 } 734 } 735 } 736 737 return false; 738 } 739 740 bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { 741 if (!SI->isSimple()) 742 return false; 743 744 // Avoid merging nontemporal stores since the resulting 745 // memcpy/memset would not be able to preserve the nontemporal hint. 746 // In theory we could teach how to propagate the !nontemporal metadata to 747 // memset calls. However, that change would force the backend to 748 // conservatively expand !nontemporal memset calls back to sequences of 749 // store instructions (effectively undoing the merging). 750 if (SI->getMetadata(LLVMContext::MD_nontemporal)) 751 return false; 752 753 const DataLayout &DL = SI->getDataLayout(); 754 755 Value *StoredVal = SI->getValueOperand(); 756 757 // Not all the transforms below are correct for non-integral pointers, bail 758 // until we've audited the individual pieces. 759 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType())) 760 return false; 761 762 // Load to store forwarding can be interpreted as memcpy. 763 if (auto *LI = dyn_cast<LoadInst>(StoredVal)) 764 return processStoreOfLoad(SI, LI, DL, BBI); 765 766 // The following code creates memset intrinsics out of thin air. Don't do 767 // this if the corresponding libfunc is not available. 768 // TODO: We should really distinguish between libcall availability and 769 // our ability to introduce intrinsics. 770 if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls)) 771 return false; 772 773 // There are two cases that are interesting for this code to handle: memcpy 774 // and memset. Right now we only handle memset. 775 776 // Ensure that the value being stored is something that can be memset'able a 777 // byte at a time like "0" or "-1" or any width, as well as things like 778 // 0xA0A0A0A0 and 0.0. 779 Value *V = SI->getOperand(0); 780 Value *ByteVal = isBytewiseValue(V, DL); 781 if (!ByteVal) 782 return false; 783 784 if (Instruction *I = 785 tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) { 786 BBI = I->getIterator(); // Don't invalidate iterator. 787 return true; 788 } 789 790 // If we have an aggregate, we try to promote it to memset regardless 791 // of opportunity for merging as it can expose optimization opportunities 792 // in subsequent passes. 793 auto *T = V->getType(); 794 if (!T->isAggregateType()) 795 return false; 796 797 TypeSize Size = DL.getTypeStoreSize(T); 798 if (Size.isScalable()) 799 return false; 800 801 IRBuilder<> Builder(SI); 802 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, 803 SI->getAlign()); 804 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID); 805 806 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n"); 807 808 // The newly inserted memset is immediately overwritten by the original 809 // store, so we do not need to rename uses. 810 auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI)); 811 auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef); 812 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false); 813 814 eraseInstruction(SI); 815 NumMemSetInfer++; 816 817 // Make sure we do not invalidate the iterator. 818 BBI = M->getIterator(); 819 return true; 820 } 821 822 bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { 823 // See if there is another memset or store neighboring this memset which 824 // allows us to widen out the memset to do a single larger store. 825 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile()) 826 if (Instruction *I = 827 tryMergingIntoMemset(MSI, MSI->getDest(), MSI->getValue())) { 828 BBI = I->getIterator(); // Don't invalidate iterator. 829 return true; 830 } 831 return false; 832 } 833 834 /// Takes a memcpy and a call that it depends on, 835 /// and checks for the possibility of a call slot optimization by having 836 /// the call write its result directly into the destination of the memcpy. 837 bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, 838 Instruction *cpyStore, Value *cpyDest, 839 Value *cpySrc, TypeSize cpySize, 840 Align cpyDestAlign, 841 BatchAAResults &BAA, 842 std::function<CallInst *()> GetC) { 843 // The general transformation to keep in mind is 844 // 845 // call @func(..., src, ...) 846 // memcpy(dest, src, ...) 847 // 848 // -> 849 // 850 // memcpy(dest, src, ...) 851 // call @func(..., dest, ...) 852 // 853 // Since moving the memcpy is technically awkward, we additionally check that 854 // src only holds uninitialized values at the moment of the call, meaning that 855 // the memcpy can be discarded rather than moved. 856 857 // We can't optimize scalable types. 858 if (cpySize.isScalable()) 859 return false; 860 861 // Require that src be an alloca. This simplifies the reasoning considerably. 862 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc); 863 if (!srcAlloca) 864 return false; 865 866 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); 867 if (!srcArraySize) 868 return false; 869 870 const DataLayout &DL = cpyLoad->getDataLayout(); 871 TypeSize SrcAllocaSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()); 872 // We can't optimize scalable types. 873 if (SrcAllocaSize.isScalable()) 874 return false; 875 uint64_t srcSize = SrcAllocaSize * srcArraySize->getZExtValue(); 876 877 if (cpySize < srcSize) 878 return false; 879 880 CallInst *C = GetC(); 881 if (!C) 882 return false; 883 884 // Lifetime marks shouldn't be operated on. 885 if (Function *F = C->getCalledFunction()) 886 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start) 887 return false; 888 889 if (C->getParent() != cpyStore->getParent()) { 890 LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n"); 891 return false; 892 } 893 894 MemoryLocation DestLoc = 895 isa<StoreInst>(cpyStore) 896 ? MemoryLocation::get(cpyStore) 897 : MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore)); 898 899 // Check that nothing touches the dest of the copy between 900 // the call and the store/memcpy. 901 Instruction *SkippedLifetimeStart = nullptr; 902 if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C), 903 MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) { 904 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n"); 905 return false; 906 } 907 908 // If we need to move a lifetime.start above the call, make sure that we can 909 // actually do so. If the argument is bitcasted for example, we would have to 910 // move the bitcast as well, which we don't handle. 911 if (SkippedLifetimeStart) { 912 auto *LifetimeArg = 913 dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1)); 914 if (LifetimeArg && LifetimeArg->getParent() == C->getParent() && 915 C->comesBefore(LifetimeArg)) 916 return false; 917 } 918 919 // Check that storing to the first srcSize bytes of dest will not cause a 920 // trap or data race. 921 bool ExplicitlyDereferenceableOnly; 922 if (!isWritableObject(getUnderlyingObject(cpyDest), 923 ExplicitlyDereferenceableOnly) || 924 !isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize), 925 DL, C, AC, DT)) { 926 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n"); 927 return false; 928 } 929 930 // Make sure that nothing can observe cpyDest being written early. There are 931 // a number of cases to consider: 932 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of 933 // the transform. 934 // 2. C itself may not access cpyDest (prior to the transform). This is 935 // checked further below. 936 // 3. If cpyDest is accessible to the caller of this function (potentially 937 // captured and not based on an alloca), we need to ensure that we cannot 938 // unwind between C and cpyStore. This is checked here. 939 // 4. If cpyDest is potentially captured, there may be accesses to it from 940 // another thread. In this case, we need to check that cpyStore is 941 // guaranteed to be executed if C is. As it is a non-atomic access, it 942 // renders accesses from other threads undefined. 943 // TODO: This is currently not checked. 944 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) { 945 LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n"); 946 return false; 947 } 948 949 // Check that dest points to memory that is at least as aligned as src. 950 Align srcAlign = srcAlloca->getAlign(); 951 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign; 952 // If dest is not aligned enough and we can't increase its alignment then 953 // bail out. 954 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) { 955 LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n"); 956 return false; 957 } 958 959 // Check that src is not accessed except via the call and the memcpy. This 960 // guarantees that it holds only undefined values when passed in (so the final 961 // memcpy can be dropped), that it is not read or written between the call and 962 // the memcpy, and that writing beyond the end of it is undefined. 963 SmallVector<User *, 8> srcUseList(srcAlloca->users()); 964 while (!srcUseList.empty()) { 965 User *U = srcUseList.pop_back_val(); 966 967 if (isa<AddrSpaceCastInst>(U)) { 968 append_range(srcUseList, U->users()); 969 continue; 970 } 971 if (const auto *IT = dyn_cast<IntrinsicInst>(U)) 972 if (IT->isLifetimeStartOrEnd()) 973 continue; 974 975 if (U != C && U != cpyLoad) { 976 LLVM_DEBUG(dbgs() << "Call slot: Source accessed by " << *U << "\n"); 977 return false; 978 } 979 } 980 981 // Check whether src is captured by the called function, in which case there 982 // may be further indirect uses of src. 983 bool SrcIsCaptured = any_of(C->args(), [&](Use &U) { 984 return U->stripPointerCasts() == cpySrc && 985 !C->doesNotCapture(C->getArgOperandNo(&U)); 986 }); 987 988 // If src is captured, then check whether there are any potential uses of 989 // src through the captured pointer before the lifetime of src ends, either 990 // due to a lifetime.end or a return from the function. 991 if (SrcIsCaptured) { 992 // Check that dest is not captured before/at the call. We have already 993 // checked that src is not captured before it. If either had been captured, 994 // then the call might be comparing the argument against the captured dest 995 // or src pointer. 996 Value *DestObj = getUnderlyingObject(cpyDest); 997 if (!isIdentifiedFunctionLocal(DestObj) || 998 PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true, 999 /* StoreCaptures */ true, C, DT, 1000 /* IncludeI */ true)) 1001 return false; 1002 1003 MemoryLocation SrcLoc = 1004 MemoryLocation(srcAlloca, LocationSize::precise(srcSize)); 1005 for (Instruction &I : 1006 make_range(++C->getIterator(), C->getParent()->end())) { 1007 // Lifetime of srcAlloca ends at lifetime.end. 1008 if (auto *II = dyn_cast<IntrinsicInst>(&I)) { 1009 if (II->getIntrinsicID() == Intrinsic::lifetime_end && 1010 II->getArgOperand(1)->stripPointerCasts() == srcAlloca && 1011 cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize)) 1012 break; 1013 } 1014 1015 // Lifetime of srcAlloca ends at return. 1016 if (isa<ReturnInst>(&I)) 1017 break; 1018 1019 // Ignore the direct read of src in the load. 1020 if (&I == cpyLoad) 1021 continue; 1022 1023 // Check whether this instruction may mod/ref src through the captured 1024 // pointer (we have already any direct mod/refs in the loop above). 1025 // Also bail if we hit a terminator, as we don't want to scan into other 1026 // blocks. 1027 if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator()) 1028 return false; 1029 } 1030 } 1031 1032 // Since we're changing the parameter to the callsite, we need to make sure 1033 // that what would be the new parameter dominates the callsite. 1034 bool NeedMoveGEP = false; 1035 if (!DT->dominates(cpyDest, C)) { 1036 // Support moving a constant index GEP before the call. 1037 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest); 1038 if (GEP && GEP->hasAllConstantIndices() && 1039 DT->dominates(GEP->getPointerOperand(), C)) 1040 NeedMoveGEP = true; 1041 else 1042 return false; 1043 } 1044 1045 // In addition to knowing that the call does not access src in some 1046 // unexpected manner, for example via a global, which we deduce from 1047 // the use analysis, we also need to know that it does not sneakily 1048 // access dest. We rely on AA to figure this out for us. 1049 MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize)); 1050 ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize); 1051 // If necessary, perform additional analysis. 1052 if (isModOrRefSet(MR)) 1053 MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT); 1054 if (isModOrRefSet(MR)) 1055 return false; 1056 1057 // We can't create address space casts here because we don't know if they're 1058 // safe for the target. 1059 if (cpySrc->getType() != cpyDest->getType()) 1060 return false; 1061 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI) 1062 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc && 1063 cpySrc->getType() != C->getArgOperand(ArgI)->getType()) 1064 return false; 1065 1066 // All the checks have passed, so do the transformation. 1067 bool changedArgument = false; 1068 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI) 1069 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) { 1070 changedArgument = true; 1071 C->setArgOperand(ArgI, cpyDest); 1072 } 1073 1074 if (!changedArgument) 1075 return false; 1076 1077 // If the destination wasn't sufficiently aligned then increase its alignment. 1078 if (!isDestSufficientlyAligned) { 1079 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!"); 1080 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign); 1081 } 1082 1083 if (NeedMoveGEP) { 1084 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest); 1085 GEP->moveBefore(C->getIterator()); 1086 } 1087 1088 if (SkippedLifetimeStart) { 1089 SkippedLifetimeStart->moveBefore(C->getIterator()); 1090 MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart), 1091 MSSA->getMemoryAccess(C)); 1092 } 1093 1094 combineAAMetadata(C, cpyLoad); 1095 if (cpyLoad != cpyStore) 1096 combineAAMetadata(C, cpyStore); 1097 1098 ++NumCallSlot; 1099 return true; 1100 } 1101 1102 /// We've found that the (upward scanning) memory dependence of memcpy 'M' is 1103 /// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can. 1104 bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M, 1105 MemCpyInst *MDep, 1106 BatchAAResults &BAA) { 1107 // If dep instruction is reading from our current input, then it is a noop 1108 // transfer and substituting the input won't change this instruction. Just 1109 // ignore the input and let someone else zap MDep. This handles cases like: 1110 // memcpy(a <- a) 1111 // memcpy(b <- a) 1112 if (M->getSource() == MDep->getSource()) 1113 return false; 1114 1115 // We can only optimize non-volatile memcpy's. 1116 if (MDep->isVolatile()) 1117 return false; 1118 1119 int64_t MForwardOffset = 0; 1120 const DataLayout &DL = M->getModule()->getDataLayout(); 1121 // We can only transforms memcpy's where the dest of one is the source of the 1122 // other, or they have an offset in a range. 1123 if (M->getSource() != MDep->getDest()) { 1124 std::optional<int64_t> Offset = 1125 M->getSource()->getPointerOffsetFrom(MDep->getDest(), DL); 1126 if (!Offset || *Offset < 0) 1127 return false; 1128 MForwardOffset = *Offset; 1129 } 1130 1131 // The length of the memcpy's must be the same, or the preceding one 1132 // must be larger than the following one. 1133 if (MForwardOffset != 0 || MDep->getLength() != M->getLength()) { 1134 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); 1135 auto *MLen = dyn_cast<ConstantInt>(M->getLength()); 1136 if (!MDepLen || !MLen || 1137 MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset) 1138 return false; 1139 } 1140 1141 IRBuilder<> Builder(M); 1142 auto *CopySource = MDep->getSource(); 1143 Instruction *NewCopySource = nullptr; 1144 auto CleanupOnRet = llvm::make_scope_exit([&] { 1145 if (NewCopySource && NewCopySource->use_empty()) 1146 // Safety: It's safe here because we will only allocate more instructions 1147 // after finishing all BatchAA queries, but we have to be careful if we 1148 // want to do something like this in another place. Then we'd probably 1149 // have to delay instruction removal until all transforms on an 1150 // instruction finished. 1151 eraseInstruction(NewCopySource); 1152 }); 1153 MaybeAlign CopySourceAlign = MDep->getSourceAlign(); 1154 // We just need to calculate the actual size of the copy. 1155 auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize( 1156 MemoryLocation::getForSource(M).Size); 1157 1158 // When the forwarding offset is greater than 0, we transform 1159 // memcpy(d1 <- s1) 1160 // memcpy(d2 <- d1+o) 1161 // to 1162 // memcpy(d2 <- s1+o) 1163 if (MForwardOffset > 0) { 1164 // The copy destination of `M` maybe can serve as the source of copying. 1165 std::optional<int64_t> MDestOffset = 1166 M->getRawDest()->getPointerOffsetFrom(MDep->getRawSource(), DL); 1167 if (MDestOffset == MForwardOffset) 1168 CopySource = M->getDest(); 1169 else { 1170 CopySource = Builder.CreateInBoundsPtrAdd( 1171 CopySource, Builder.getInt64(MForwardOffset)); 1172 NewCopySource = dyn_cast<Instruction>(CopySource); 1173 } 1174 // We need to update `MCopyLoc` if an offset exists. 1175 MCopyLoc = MCopyLoc.getWithNewPtr(CopySource); 1176 if (CopySourceAlign) 1177 CopySourceAlign = commonAlignment(*CopySourceAlign, MForwardOffset); 1178 } 1179 1180 // Avoid infinite loops 1181 if (BAA.isMustAlias(M->getSource(), CopySource)) 1182 return false; 1183 1184 // Verify that the copied-from memory doesn't change in between the two 1185 // transfers. For example, in: 1186 // memcpy(a <- b) 1187 // *b = 42; 1188 // memcpy(c <- a) 1189 // It would be invalid to transform the second memcpy into memcpy(c <- b). 1190 // 1191 // TODO: If the code between M and MDep is transparent to the destination "c", 1192 // then we could still perform the xform by moving M up to the first memcpy. 1193 if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep), 1194 MSSA->getMemoryAccess(M))) 1195 return false; 1196 1197 // No need to create `memcpy(a <- a)`. 1198 if (BAA.isMustAlias(M->getDest(), CopySource)) { 1199 // Remove the instruction we're replacing. 1200 eraseInstruction(M); 1201 ++NumMemCpyInstr; 1202 return true; 1203 } 1204 1205 // If the dest of the second might alias the source of the first, then the 1206 // source and dest might overlap. In addition, if the source of the first 1207 // points to constant memory, they won't overlap by definition. Otherwise, we 1208 // still want to eliminate the intermediate value, but we have to generate a 1209 // memmove instead of memcpy. 1210 bool UseMemMove = false; 1211 if (isModSet(BAA.getModRefInfo(M, MemoryLocation::getForSource(MDep)))) { 1212 // Don't convert llvm.memcpy.inline into memmove because memmove can be 1213 // lowered as a call, and that is not allowed for llvm.memcpy.inline (and 1214 // there is no inline version of llvm.memmove) 1215 if (isa<MemCpyInlineInst>(M)) 1216 return false; 1217 UseMemMove = true; 1218 } 1219 1220 // If all checks passed, then we can transform M. 1221 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n" 1222 << *MDep << '\n' 1223 << *M << '\n'); 1224 1225 // TODO: Is this worth it if we're creating a less aligned memcpy? For 1226 // example we could be moving from movaps -> movq on x86. 1227 Instruction *NewM; 1228 if (UseMemMove) 1229 NewM = 1230 Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource, 1231 CopySourceAlign, M->getLength(), M->isVolatile()); 1232 else if (isa<MemCpyInlineInst>(M)) { 1233 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is 1234 // never allowed since that would allow the latter to be lowered as a call 1235 // to an external function. 1236 NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(), 1237 CopySource, CopySourceAlign, 1238 M->getLength(), M->isVolatile()); 1239 } else 1240 NewM = 1241 Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource, 1242 CopySourceAlign, M->getLength(), M->isVolatile()); 1243 NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID); 1244 1245 assert(isa<MemoryDef>(MSSA->getMemoryAccess(M))); 1246 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(M)); 1247 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef); 1248 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); 1249 1250 // Remove the instruction we're replacing. 1251 eraseInstruction(M); 1252 ++NumMemCpyInstr; 1253 return true; 1254 } 1255 1256 /// We've found that the (upward scanning) memory dependence of \p MemCpy is 1257 /// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that 1258 /// weren't copied over by \p MemCpy. 1259 /// 1260 /// In other words, transform: 1261 /// \code 1262 /// memset(dst, c, dst_size); 1263 /// ... 1264 /// memcpy(dst, src, src_size); 1265 /// \endcode 1266 /// into: 1267 /// \code 1268 /// ... 1269 /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size); 1270 /// memcpy(dst, src, src_size); 1271 /// \endcode 1272 /// 1273 /// The memset is sunk to just before the memcpy to ensure that src_size is 1274 /// present when emitting the simplified memset. 1275 bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, 1276 MemSetInst *MemSet, 1277 BatchAAResults &BAA) { 1278 // We can only transform memset/memcpy with the same destination. 1279 if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest())) 1280 return false; 1281 1282 // Don't perform the transform if src_size may be zero. In that case, the 1283 // transform is essentially a complex no-op and may lead to an infinite 1284 // loop if BasicAA is smart enough to understand that dst and dst + src_size 1285 // are still MustAlias after the transform. 1286 Value *SrcSize = MemCpy->getLength(); 1287 if (!isKnownNonZero(SrcSize, 1288 SimplifyQuery(MemCpy->getDataLayout(), DT, AC, MemCpy))) 1289 return false; 1290 1291 // Check that src and dst of the memcpy aren't the same. While memcpy 1292 // operands cannot partially overlap, exact equality is allowed. 1293 if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy)))) 1294 return false; 1295 1296 // We know that dst up to src_size is not written. We now need to make sure 1297 // that dst up to dst_size is not accessed. (If we did not move the memset, 1298 // checking for reads would be sufficient.) 1299 if (accessedBetween(BAA, MemoryLocation::getForDest(MemSet), 1300 MSSA->getMemoryAccess(MemSet), 1301 MSSA->getMemoryAccess(MemCpy))) 1302 return false; 1303 1304 // Use the same i8* dest as the memcpy, killing the memset dest if different. 1305 Value *Dest = MemCpy->getRawDest(); 1306 Value *DestSize = MemSet->getLength(); 1307 1308 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy)) 1309 return false; 1310 1311 // If the sizes are the same, simply drop the memset instead of generating 1312 // a replacement with zero size. 1313 if (DestSize == SrcSize) { 1314 eraseInstruction(MemSet); 1315 return true; 1316 } 1317 1318 // By default, create an unaligned memset. 1319 Align Alignment = Align(1); 1320 // If Dest is aligned, and SrcSize is constant, use the minimum alignment 1321 // of the sum. 1322 const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(), 1323 MemCpy->getDestAlign().valueOrOne()); 1324 if (DestAlign > 1) 1325 if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize)) 1326 Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue()); 1327 1328 IRBuilder<> Builder(MemCpy); 1329 1330 // Preserve the debug location of the old memset for the code emitted here 1331 // related to the new memset. This is correct according to the rules in 1332 // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an 1333 // instruction location", given that we move the memset within the basic 1334 // block. 1335 assert(MemSet->getParent() == MemCpy->getParent() && 1336 "Preserving debug location based on moving memset within BB."); 1337 Builder.SetCurrentDebugLocation(MemSet->getDebugLoc()); 1338 1339 // If the sizes have different types, zext the smaller one. 1340 if (DestSize->getType() != SrcSize->getType()) { 1341 if (DestSize->getType()->getIntegerBitWidth() > 1342 SrcSize->getType()->getIntegerBitWidth()) 1343 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType()); 1344 else 1345 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType()); 1346 } 1347 1348 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize); 1349 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize); 1350 Value *MemsetLen = Builder.CreateSelect( 1351 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff); 1352 Instruction *NewMemSet = 1353 Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize), 1354 MemSet->getOperand(1), MemsetLen, Alignment); 1355 1356 assert(isa<MemoryDef>(MSSA->getMemoryAccess(MemCpy)) && 1357 "MemCpy must be a MemoryDef"); 1358 // The new memset is inserted before the memcpy, and it is known that the 1359 // memcpy's defining access is the memset about to be removed. 1360 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy)); 1361 auto *NewAccess = 1362 MSSAU->createMemoryAccessBefore(NewMemSet, nullptr, LastDef); 1363 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); 1364 1365 eraseInstruction(MemSet); 1366 return true; 1367 } 1368 1369 /// Determine whether the instruction has undefined content for the given Size, 1370 /// either because it was freshly alloca'd or started its lifetime. 1371 static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, 1372 MemoryDef *Def, Value *Size) { 1373 if (MSSA->isLiveOnEntryDef(Def)) 1374 return isa<AllocaInst>(getUnderlyingObject(V)); 1375 1376 if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) { 1377 if (II->getIntrinsicID() == Intrinsic::lifetime_start) { 1378 auto *LTSize = cast<ConstantInt>(II->getArgOperand(0)); 1379 1380 if (auto *CSize = dyn_cast<ConstantInt>(Size)) { 1381 if (AA.isMustAlias(V, II->getArgOperand(1)) && 1382 LTSize->getZExtValue() >= CSize->getZExtValue()) 1383 return true; 1384 } 1385 1386 // If the lifetime.start covers a whole alloca (as it almost always 1387 // does) and we're querying a pointer based on that alloca, then we know 1388 // the memory is definitely undef, regardless of how exactly we alias. 1389 // The size also doesn't matter, as an out-of-bounds access would be UB. 1390 if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) { 1391 if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) { 1392 const DataLayout &DL = Alloca->getDataLayout(); 1393 if (std::optional<TypeSize> AllocaSize = 1394 Alloca->getAllocationSize(DL)) 1395 if (*AllocaSize == LTSize->getValue()) 1396 return true; 1397 } 1398 } 1399 } 1400 } 1401 1402 return false; 1403 } 1404 1405 /// Transform memcpy to memset when its source was just memset. 1406 /// In other words, turn: 1407 /// \code 1408 /// memset(dst1, c, dst1_size); 1409 /// memcpy(dst2, dst1, dst2_size); 1410 /// \endcode 1411 /// into: 1412 /// \code 1413 /// memset(dst1, c, dst1_size); 1414 /// memset(dst2, c, dst2_size); 1415 /// \endcode 1416 /// When dst2_size <= dst1_size. 1417 bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, 1418 MemSetInst *MemSet, 1419 BatchAAResults &BAA) { 1420 // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and 1421 // memcpying from the same address. Otherwise it is hard to reason about. 1422 if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource())) 1423 return false; 1424 1425 Value *MemSetSize = MemSet->getLength(); 1426 Value *CopySize = MemCpy->getLength(); 1427 1428 if (MemSetSize != CopySize) { 1429 // Make sure the memcpy doesn't read any more than what the memset wrote. 1430 // Don't worry about sizes larger than i64. 1431 1432 // A known memset size is required. 1433 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize); 1434 if (!CMemSetSize) 1435 return false; 1436 1437 // A known memcpy size is also required. 1438 auto *CCopySize = dyn_cast<ConstantInt>(CopySize); 1439 if (!CCopySize) 1440 return false; 1441 if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) { 1442 // If the memcpy is larger than the memset, but the memory was undef prior 1443 // to the memset, we can just ignore the tail. Technically we're only 1444 // interested in the bytes from MemSetSize..CopySize here, but as we can't 1445 // easily represent this location, we use the full 0..CopySize range. 1446 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy); 1447 bool CanReduceSize = false; 1448 MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet); 1449 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( 1450 MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA); 1451 if (auto *MD = dyn_cast<MemoryDef>(Clobber)) 1452 if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize)) 1453 CanReduceSize = true; 1454 1455 if (!CanReduceSize) 1456 return false; 1457 CopySize = MemSetSize; 1458 } 1459 } 1460 1461 IRBuilder<> Builder(MemCpy); 1462 Instruction *NewM = 1463 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), 1464 CopySize, MemCpy->getDestAlign()); 1465 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy)); 1466 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef); 1467 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); 1468 1469 return true; 1470 } 1471 1472 // Attempts to optimize the pattern whereby memory is copied from an alloca to 1473 // another alloca, where the two allocas don't have conflicting mod/ref. If 1474 // successful, the two allocas can be merged into one and the transfer can be 1475 // deleted. This pattern is generated frequently in Rust, due to the ubiquity of 1476 // move operations in that language. 1477 // 1478 // Once we determine that the optimization is safe to perform, we replace all 1479 // uses of the destination alloca with the source alloca. We also "shrink wrap" 1480 // the lifetime markers of the single merged alloca to before the first use 1481 // and after the last use. Note that the "shrink wrapping" procedure is a safe 1482 // transformation only because we restrict the scope of this optimization to 1483 // allocas that aren't captured. 1484 bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, 1485 AllocaInst *DestAlloca, 1486 AllocaInst *SrcAlloca, TypeSize Size, 1487 BatchAAResults &BAA) { 1488 LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n" 1489 << *Store << "\n"); 1490 1491 // Make sure the two allocas are in the same address space. 1492 if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) { 1493 LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n"); 1494 return false; 1495 } 1496 1497 // Check that copy is full with static size. 1498 const DataLayout &DL = DestAlloca->getDataLayout(); 1499 std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL); 1500 if (!SrcSize || Size != *SrcSize) { 1501 LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n"); 1502 return false; 1503 } 1504 std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL); 1505 if (!DestSize || Size != *DestSize) { 1506 LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n"); 1507 return false; 1508 } 1509 1510 if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca()) 1511 return false; 1512 1513 // Check that src and dest are never captured, unescaped allocas. Also 1514 // find the nearest common dominator and postdominator for all users in 1515 // order to shrink wrap the lifetimes, and instructions with noalias metadata 1516 // to remove them. 1517 1518 SmallVector<Instruction *, 4> LifetimeMarkers; 1519 SmallSet<Instruction *, 4> NoAliasInstrs; 1520 bool SrcNotDom = false; 1521 1522 // Recursively track the user and check whether modified alias exist. 1523 auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool { 1524 bool CanBeNull, CanBeFreed; 1525 return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); 1526 }; 1527 1528 auto CaptureTrackingWithModRef = 1529 [&](Instruction *AI, 1530 function_ref<bool(Instruction *)> ModRefCallback) -> bool { 1531 SmallVector<Instruction *, 8> Worklist; 1532 Worklist.push_back(AI); 1533 unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking(); 1534 Worklist.reserve(MaxUsesToExplore); 1535 SmallSet<const Use *, 20> Visited; 1536 while (!Worklist.empty()) { 1537 Instruction *I = Worklist.back(); 1538 Worklist.pop_back(); 1539 for (const Use &U : I->uses()) { 1540 auto *UI = cast<Instruction>(U.getUser()); 1541 // If any use that isn't dominated by SrcAlloca exists, we move src 1542 // alloca to the entry before the transformation. 1543 if (!DT->dominates(SrcAlloca, UI)) 1544 SrcNotDom = true; 1545 1546 if (Visited.size() >= MaxUsesToExplore) { 1547 LLVM_DEBUG( 1548 dbgs() 1549 << "Stack Move: Exceeded max uses to see ModRef, bailing\n"); 1550 return false; 1551 } 1552 if (!Visited.insert(&U).second) 1553 continue; 1554 switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) { 1555 case UseCaptureKind::MAY_CAPTURE: 1556 return false; 1557 case UseCaptureKind::PASSTHROUGH: 1558 // Instructions cannot have non-instruction users. 1559 Worklist.push_back(UI); 1560 continue; 1561 case UseCaptureKind::NO_CAPTURE: { 1562 if (UI->isLifetimeStartOrEnd()) { 1563 // We note the locations of these intrinsic calls so that we can 1564 // delete them later if the optimization succeeds, this is safe 1565 // since both llvm.lifetime.start and llvm.lifetime.end intrinsics 1566 // practically fill all the bytes of the alloca with an undefined 1567 // value, although conceptually marked as alive/dead. 1568 int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue(); 1569 if (Size < 0 || Size == DestSize) { 1570 LifetimeMarkers.push_back(UI); 1571 continue; 1572 } 1573 } 1574 if (UI->hasMetadata(LLVMContext::MD_noalias)) 1575 NoAliasInstrs.insert(UI); 1576 if (!ModRefCallback(UI)) 1577 return false; 1578 } 1579 } 1580 } 1581 } 1582 return true; 1583 }; 1584 1585 // Check that dest has no Mod/Ref, from the alloca to the Store, except full 1586 // size lifetime intrinsics. And collect modref inst for the reachability 1587 // check. 1588 ModRefInfo DestModRef = ModRefInfo::NoModRef; 1589 MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size)); 1590 SmallVector<BasicBlock *, 8> ReachabilityWorklist; 1591 auto DestModRefCallback = [&](Instruction *UI) -> bool { 1592 // We don't care about the store itself. 1593 if (UI == Store) 1594 return true; 1595 ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc); 1596 DestModRef |= Res; 1597 if (isModOrRefSet(Res)) { 1598 // Instructions reachability checks. 1599 // FIXME: adding the Instruction version isPotentiallyReachableFromMany on 1600 // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful. 1601 if (UI->getParent() == Store->getParent()) { 1602 // The same block case is special because it's the only time we're 1603 // looking within a single block to see which instruction comes first. 1604 // Once we start looking at multiple blocks, the first instruction of 1605 // the block is reachable, so we only need to determine reachability 1606 // between whole blocks. 1607 BasicBlock *BB = UI->getParent(); 1608 1609 // If A comes before B, then B is definitively reachable from A. 1610 if (UI->comesBefore(Store)) 1611 return false; 1612 1613 // If the user's parent block is entry, no predecessor exists. 1614 if (BB->isEntryBlock()) 1615 return true; 1616 1617 // Otherwise, continue doing the normal per-BB CFG walk. 1618 ReachabilityWorklist.append(succ_begin(BB), succ_end(BB)); 1619 } else { 1620 ReachabilityWorklist.push_back(UI->getParent()); 1621 } 1622 } 1623 return true; 1624 }; 1625 1626 if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback)) 1627 return false; 1628 // Bailout if Dest may have any ModRef before Store. 1629 if (!ReachabilityWorklist.empty() && 1630 isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(), 1631 nullptr, DT, nullptr)) 1632 return false; 1633 1634 // Check that, from after the Load to the end of the BB, 1635 // - if the dest has any Mod, src has no Ref, and 1636 // - if the dest has any Ref, src has no Mod except full-sized lifetimes. 1637 MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size)); 1638 1639 auto SrcModRefCallback = [&](Instruction *UI) -> bool { 1640 // Any ModRef post-dominated by Load doesn't matter, also Load and Store 1641 // themselves can be ignored. 1642 if (PDT->dominates(Load, UI) || UI == Load || UI == Store) 1643 return true; 1644 ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc); 1645 if ((isModSet(DestModRef) && isRefSet(Res)) || 1646 (isRefSet(DestModRef) && isModSet(Res))) 1647 return false; 1648 1649 return true; 1650 }; 1651 1652 if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback)) 1653 return false; 1654 1655 // We can do the transformation. First, move the SrcAlloca to the start of the 1656 // BB. 1657 if (SrcNotDom) 1658 SrcAlloca->moveBefore(*SrcAlloca->getParent(), 1659 SrcAlloca->getParent()->getFirstInsertionPt()); 1660 // Align the allocas appropriately. 1661 SrcAlloca->setAlignment( 1662 std::max(SrcAlloca->getAlign(), DestAlloca->getAlign())); 1663 1664 // Merge the two allocas. 1665 DestAlloca->replaceAllUsesWith(SrcAlloca); 1666 eraseInstruction(DestAlloca); 1667 1668 // Drop metadata on the source alloca. 1669 SrcAlloca->dropUnknownNonDebugMetadata(); 1670 1671 // TODO: Reconstruct merged lifetime markers. 1672 // Remove all other lifetime markers. if the original lifetime intrinsics 1673 // exists. 1674 if (!LifetimeMarkers.empty()) { 1675 for (Instruction *I : LifetimeMarkers) 1676 eraseInstruction(I); 1677 } 1678 1679 // As this transformation can cause memory accesses that didn't previously 1680 // alias to begin to alias one another, we remove !noalias metadata from any 1681 // uses of either alloca. This is conservative, but more precision doesn't 1682 // seem worthwhile right now. 1683 for (Instruction *I : NoAliasInstrs) 1684 I->setMetadata(LLVMContext::MD_noalias, nullptr); 1685 1686 LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n"); 1687 NumStackMove++; 1688 return true; 1689 } 1690 1691 static bool isZeroSize(Value *Size) { 1692 if (auto *I = dyn_cast<Instruction>(Size)) 1693 if (auto *Res = simplifyInstruction(I, I->getDataLayout())) 1694 Size = Res; 1695 // Treat undef/poison size like zero. 1696 if (auto *C = dyn_cast<Constant>(Size)) 1697 return isa<UndefValue>(C) || C->isNullValue(); 1698 return false; 1699 } 1700 1701 /// Perform simplification of memcpy's. If we have memcpy A 1702 /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite 1703 /// B to be a memcpy from X to Z (or potentially a memmove, depending on 1704 /// circumstances). This allows later passes to remove the first memcpy 1705 /// altogether. 1706 bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { 1707 // We can only optimize non-volatile memcpy's. 1708 if (M->isVolatile()) 1709 return false; 1710 1711 // If the source and destination of the memcpy are the same, then zap it. 1712 if (M->getSource() == M->getDest()) { 1713 ++BBI; 1714 eraseInstruction(M); 1715 return true; 1716 } 1717 1718 // If the size is zero, remove the memcpy. 1719 if (isZeroSize(M->getLength())) { 1720 ++BBI; 1721 eraseInstruction(M); 1722 return true; 1723 } 1724 1725 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M); 1726 if (!MA) 1727 // Degenerate case: memcpy marked as not accessing memory. 1728 return false; 1729 1730 // If copying from a constant, try to turn the memcpy into a memset. 1731 if (auto *GV = dyn_cast<GlobalVariable>(M->getSource())) 1732 if (GV->isConstant() && GV->hasDefinitiveInitializer()) 1733 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(), 1734 M->getDataLayout())) { 1735 IRBuilder<> Builder(M); 1736 Instruction *NewM = Builder.CreateMemSet( 1737 M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false); 1738 auto *LastDef = cast<MemoryDef>(MA); 1739 auto *NewAccess = 1740 MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef); 1741 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true); 1742 1743 eraseInstruction(M); 1744 ++NumCpyToSet; 1745 return true; 1746 } 1747 1748 BatchAAResults BAA(*AA, EEA); 1749 // FIXME: Not using getClobberingMemoryAccess() here due to PR54682. 1750 MemoryAccess *AnyClobber = MA->getDefiningAccess(); 1751 MemoryLocation DestLoc = MemoryLocation::getForDest(M); 1752 const MemoryAccess *DestClobber = 1753 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA); 1754 1755 // Try to turn a partially redundant memset + memcpy into 1756 // smaller memset + memcpy. We don't need the memcpy size for this. 1757 // The memcpy must post-dom the memset, so limit this to the same basic 1758 // block. A non-local generalization is likely not worthwhile. 1759 if (auto *MD = dyn_cast<MemoryDef>(DestClobber)) 1760 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst())) 1761 if (DestClobber->getBlock() == M->getParent()) 1762 if (processMemSetMemCpyDependence(M, MDep, BAA)) 1763 return true; 1764 1765 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess( 1766 AnyClobber, MemoryLocation::getForSource(M), BAA); 1767 1768 // There are five possible optimizations we can do for memcpy: 1769 // a) memcpy-memcpy xform which exposes redundance for DSE. 1770 // b) call-memcpy xform for return slot optimization. 1771 // c) memcpy from freshly alloca'd space or space that has just started 1772 // its lifetime copies undefined data, and we can therefore eliminate 1773 // the memcpy in favor of the data that was already at the destination. 1774 // d) memcpy from a just-memset'd source can be turned into memset. 1775 // e) elimination of memcpy via stack-move optimization. 1776 if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) { 1777 if (Instruction *MI = MD->getMemoryInst()) { 1778 if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) { 1779 if (auto *C = dyn_cast<CallInst>(MI)) { 1780 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), 1781 TypeSize::getFixed(CopySize->getZExtValue()), 1782 M->getDestAlign().valueOrOne(), BAA, 1783 [C]() -> CallInst * { return C; })) { 1784 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n" 1785 << " call: " << *C << "\n" 1786 << " memcpy: " << *M << "\n"); 1787 eraseInstruction(M); 1788 ++NumMemCpyInstr; 1789 return true; 1790 } 1791 } 1792 } 1793 if (auto *MDep = dyn_cast<MemCpyInst>(MI)) 1794 if (processMemCpyMemCpyDependence(M, MDep, BAA)) 1795 return true; 1796 if (auto *MDep = dyn_cast<MemSetInst>(MI)) { 1797 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) { 1798 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n"); 1799 eraseInstruction(M); 1800 ++NumCpyToSet; 1801 return true; 1802 } 1803 } 1804 } 1805 1806 if (hasUndefContents(MSSA, BAA, M->getSource(), MD, M->getLength())) { 1807 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n"); 1808 eraseInstruction(M); 1809 ++NumMemCpyInstr; 1810 return true; 1811 } 1812 } 1813 1814 // If the transfer is from a stack slot to a stack slot, then we may be able 1815 // to perform the stack-move optimization. See the comments in 1816 // performStackMoveOptzn() for more details. 1817 auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest()); 1818 if (!DestAlloca) 1819 return false; 1820 auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource()); 1821 if (!SrcAlloca) 1822 return false; 1823 ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()); 1824 if (Len == nullptr) 1825 return false; 1826 if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, 1827 TypeSize::getFixed(Len->getZExtValue()), BAA)) { 1828 // Avoid invalidating the iterator. 1829 BBI = M->getNextNonDebugInstruction()->getIterator(); 1830 eraseInstruction(M); 1831 ++NumMemCpyInstr; 1832 return true; 1833 } 1834 1835 return false; 1836 } 1837 1838 /// Memmove calls with overlapping src/dest buffers that come after a memset may 1839 /// be removed. 1840 bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) { 1841 const auto &DL = M->getDataLayout(); 1842 MemoryUseOrDef *MemMoveAccess = MSSA->getMemoryAccess(M); 1843 if (!MemMoveAccess) 1844 return false; 1845 1846 // The memmove is of form memmove(x, x + A, B). 1847 MemoryLocation SourceLoc = MemoryLocation::getForSource(M); 1848 auto *MemMoveSourceOp = M->getSource(); 1849 auto *Source = dyn_cast<GEPOperator>(MemMoveSourceOp); 1850 if (!Source) 1851 return false; 1852 1853 APInt Offset(DL.getIndexTypeSizeInBits(Source->getType()), 0); 1854 LocationSize MemMoveLocSize = SourceLoc.Size; 1855 if (Source->getPointerOperand() != M->getDest() || 1856 !MemMoveLocSize.hasValue() || 1857 !Source->accumulateConstantOffset(DL, Offset) || Offset.isNegative()) { 1858 return false; 1859 } 1860 1861 uint64_t MemMoveSize = MemMoveLocSize.getValue(); 1862 LocationSize TotalSize = 1863 LocationSize::precise(Offset.getZExtValue() + MemMoveSize); 1864 MemoryLocation CombinedLoc(M->getDest(), TotalSize); 1865 1866 // The first dominating clobbering MemoryAccess for the combined location 1867 // needs to be a memset. 1868 BatchAAResults BAA(*AA); 1869 MemoryAccess *FirstDef = MemMoveAccess->getDefiningAccess(); 1870 auto *DestClobber = dyn_cast<MemoryDef>( 1871 MSSA->getWalker()->getClobberingMemoryAccess(FirstDef, CombinedLoc, BAA)); 1872 if (!DestClobber) 1873 return false; 1874 1875 auto *MS = dyn_cast_or_null<MemSetInst>(DestClobber->getMemoryInst()); 1876 if (!MS) 1877 return false; 1878 1879 // Memset length must be sufficiently large. 1880 auto *MemSetLength = dyn_cast<ConstantInt>(MS->getLength()); 1881 if (!MemSetLength || MemSetLength->getZExtValue() < MemMoveSize) 1882 return false; 1883 1884 // The destination buffer must have been memset'd. 1885 if (!BAA.isMustAlias(MS->getDest(), M->getDest())) 1886 return false; 1887 1888 return true; 1889 } 1890 1891 /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed 1892 /// not to alias. 1893 bool MemCpyOptPass::processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI) { 1894 // See if the source could be modified by this memmove potentially. 1895 if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M)))) { 1896 // On the off-chance the memmove clobbers src with previously memset'd 1897 // bytes, the memmove may be redundant. 1898 if (!M->isVolatile() && isMemMoveMemSetDependency(M)) { 1899 LLVM_DEBUG(dbgs() << "Removed redundant memmove.\n"); 1900 ++BBI; 1901 eraseInstruction(M); 1902 ++NumMemMoveInstr; 1903 return true; 1904 } 1905 return false; 1906 } 1907 1908 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M 1909 << "\n"); 1910 1911 // If not, then we know we can transform this. 1912 Type *ArgTys[3] = {M->getRawDest()->getType(), M->getRawSource()->getType(), 1913 M->getLength()->getType()}; 1914 M->setCalledFunction(Intrinsic::getOrInsertDeclaration( 1915 M->getModule(), Intrinsic::memcpy, ArgTys)); 1916 1917 // For MemorySSA nothing really changes (except that memcpy may imply stricter 1918 // aliasing guarantees). 1919 1920 ++NumMoveToCpy; 1921 return true; 1922 } 1923 1924 /// This is called on every byval argument in call sites. 1925 bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { 1926 const DataLayout &DL = CB.getDataLayout(); 1927 // Find out what feeds this byval argument. 1928 Value *ByValArg = CB.getArgOperand(ArgNo); 1929 Type *ByValTy = CB.getParamByValType(ArgNo); 1930 TypeSize ByValSize = DL.getTypeAllocSize(ByValTy); 1931 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize)); 1932 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB); 1933 if (!CallAccess) 1934 return false; 1935 MemCpyInst *MDep = nullptr; 1936 BatchAAResults BAA(*AA, EEA); 1937 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( 1938 CallAccess->getDefiningAccess(), Loc, BAA); 1939 if (auto *MD = dyn_cast<MemoryDef>(Clobber)) 1940 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst()); 1941 1942 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by 1943 // a memcpy, see if we can byval from the source of the memcpy instead of the 1944 // result. 1945 if (!MDep || MDep->isVolatile() || 1946 ByValArg->stripPointerCasts() != MDep->getDest()) 1947 return false; 1948 1949 // The length of the memcpy must be larger or equal to the size of the byval. 1950 auto *C1 = dyn_cast<ConstantInt>(MDep->getLength()); 1951 if (!C1 || !TypeSize::isKnownGE( 1952 TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize)) 1953 return false; 1954 1955 // Get the alignment of the byval. If the call doesn't specify the alignment, 1956 // then it is some target specific value that we can't know. 1957 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo); 1958 if (!ByValAlign) 1959 return false; 1960 1961 // If it is greater than the memcpy, then we check to see if we can force the 1962 // source of the memcpy to the alignment we need. If we fail, we bail out. 1963 MaybeAlign MemDepAlign = MDep->getSourceAlign(); 1964 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) && 1965 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC, 1966 DT) < *ByValAlign) 1967 return false; 1968 1969 // The type of the memcpy source must match the byval argument 1970 if (MDep->getSource()->getType() != ByValArg->getType()) 1971 return false; 1972 1973 // Verify that the copied-from memory doesn't change in between the memcpy and 1974 // the byval call. 1975 // memcpy(a <- b) 1976 // *b = 42; 1977 // foo(*a) 1978 // It would be invalid to transform the second memcpy into foo(*b). 1979 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep), 1980 MSSA->getMemoryAccess(MDep), CallAccess)) 1981 return false; 1982 1983 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n" 1984 << " " << *MDep << "\n" 1985 << " " << CB << "\n"); 1986 1987 // Otherwise we're good! Update the byval argument. 1988 combineAAMetadata(&CB, MDep); 1989 CB.setArgOperand(ArgNo, MDep->getSource()); 1990 ++NumMemCpyInstr; 1991 return true; 1992 } 1993 1994 /// This is called on memcpy dest pointer arguments attributed as immutable 1995 /// during call. Try to use memcpy source directly if all of the following 1996 /// conditions are satisfied. 1997 /// 1. The memcpy dst is neither modified during the call nor captured by the 1998 /// call. 1999 /// 2. The memcpy dst is an alloca with known alignment & size. 2000 /// 2-1. The memcpy length == the alloca size which ensures that the new 2001 /// pointer is dereferenceable for the required range 2002 /// 2-2. The src pointer has alignment >= the alloca alignment or can be 2003 /// enforced so. 2004 /// 3. The memcpy dst and src is not modified between the memcpy and the call. 2005 /// (if MSSA clobber check is safe.) 2006 /// 4. The memcpy src is not modified during the call. (ModRef check shows no 2007 /// Mod.) 2008 bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) { 2009 BatchAAResults BAA(*AA, EEA); 2010 Value *ImmutArg = CB.getArgOperand(ArgNo); 2011 2012 // 1. Ensure passed argument is immutable during call. 2013 if (!CB.doesNotCapture(ArgNo)) 2014 return false; 2015 2016 // We know that the argument is readonly at this point, but the function 2017 // might still modify the same memory through a different pointer. Exclude 2018 // this either via noalias, or alias analysis. 2019 if (!CB.paramHasAttr(ArgNo, Attribute::NoAlias) && 2020 isModSet( 2021 BAA.getModRefInfo(&CB, MemoryLocation::getBeforeOrAfter(ImmutArg)))) 2022 return false; 2023 2024 const DataLayout &DL = CB.getDataLayout(); 2025 2026 // 2. Check that arg is alloca 2027 // TODO: Even if the arg gets back to branches, we can remove memcpy if all 2028 // the alloca alignments can be enforced to source alignment. 2029 auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts()); 2030 if (!AI) 2031 return false; 2032 2033 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL); 2034 // Can't handle unknown size alloca. 2035 // (e.g. Variable Length Array, Scalable Vector) 2036 if (!AllocaSize || AllocaSize->isScalable()) 2037 return false; 2038 MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize)); 2039 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB); 2040 if (!CallAccess) 2041 return false; 2042 2043 MemCpyInst *MDep = nullptr; 2044 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( 2045 CallAccess->getDefiningAccess(), Loc, BAA); 2046 if (auto *MD = dyn_cast<MemoryDef>(Clobber)) 2047 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst()); 2048 2049 // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by 2050 // a memcpy, check that the arg equals the memcpy dest. 2051 if (!MDep || MDep->isVolatile() || AI != MDep->getDest()) 2052 return false; 2053 2054 // The type of the memcpy source must match the immut argument 2055 if (MDep->getSource()->getType() != ImmutArg->getType()) 2056 return false; 2057 2058 // 2-1. The length of the memcpy must be equal to the size of the alloca. 2059 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); 2060 if (!MDepLen || AllocaSize != MDepLen->getValue()) 2061 return false; 2062 2063 // 2-2. the memcpy source align must be larger than or equal the alloca's 2064 // align. If not so, we check to see if we can force the source of the memcpy 2065 // to the alignment we need. If we fail, we bail out. 2066 Align MemDepAlign = MDep->getSourceAlign().valueOrOne(); 2067 Align AllocaAlign = AI->getAlign(); 2068 if (MemDepAlign < AllocaAlign && 2069 getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC, 2070 DT) < AllocaAlign) 2071 return false; 2072 2073 // 3. Verify that the source doesn't change in between the memcpy and 2074 // the call. 2075 // memcpy(a <- b) 2076 // *b = 42; 2077 // foo(*a) 2078 // It would be invalid to transform the second memcpy into foo(*b). 2079 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep), 2080 MSSA->getMemoryAccess(MDep), CallAccess)) 2081 return false; 2082 2083 // 4. The memcpy src must not be modified during the call. 2084 if (isModSet(BAA.getModRefInfo(&CB, MemoryLocation::getForSource(MDep)))) 2085 return false; 2086 2087 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n" 2088 << " " << *MDep << "\n" 2089 << " " << CB << "\n"); 2090 2091 // Otherwise we're good! Update the immut argument. 2092 combineAAMetadata(&CB, MDep); 2093 CB.setArgOperand(ArgNo, MDep->getSource()); 2094 ++NumMemCpyInstr; 2095 return true; 2096 } 2097 2098 /// Executes one iteration of MemCpyOptPass. 2099 bool MemCpyOptPass::iterateOnFunction(Function &F) { 2100 bool MadeChange = false; 2101 2102 // Walk all instruction in the function. 2103 for (BasicBlock &BB : F) { 2104 // Skip unreachable blocks. For example processStore assumes that an 2105 // instruction in a BB can't be dominated by a later instruction in the 2106 // same BB (which is a scenario that can happen for an unreachable BB that 2107 // has itself as a predecessor). 2108 if (!DT->isReachableFromEntry(&BB)) 2109 continue; 2110 2111 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { 2112 // Avoid invalidating the iterator. 2113 Instruction *I = &*BI++; 2114 2115 bool RepeatInstruction = false; 2116 2117 if (auto *SI = dyn_cast<StoreInst>(I)) 2118 MadeChange |= processStore(SI, BI); 2119 else if (auto *M = dyn_cast<MemSetInst>(I)) 2120 RepeatInstruction = processMemSet(M, BI); 2121 else if (auto *M = dyn_cast<MemCpyInst>(I)) 2122 RepeatInstruction = processMemCpy(M, BI); 2123 else if (auto *M = dyn_cast<MemMoveInst>(I)) 2124 RepeatInstruction = processMemMove(M, BI); 2125 else if (auto *CB = dyn_cast<CallBase>(I)) { 2126 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) { 2127 if (CB->isByValArgument(i)) 2128 MadeChange |= processByValArgument(*CB, i); 2129 else if (CB->onlyReadsMemory(i)) 2130 MadeChange |= processImmutArgument(*CB, i); 2131 } 2132 } 2133 2134 // Reprocess the instruction if desired. 2135 if (RepeatInstruction) { 2136 if (BI != BB.begin()) 2137 --BI; 2138 MadeChange = true; 2139 } 2140 } 2141 } 2142 2143 return MadeChange; 2144 } 2145 2146 PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { 2147 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); 2148 auto *AA = &AM.getResult<AAManager>(F); 2149 auto *AC = &AM.getResult<AssumptionAnalysis>(F); 2150 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); 2151 auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F); 2152 auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F); 2153 2154 bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA()); 2155 if (!MadeChange) 2156 return PreservedAnalyses::all(); 2157 2158 PreservedAnalyses PA; 2159 PA.preserveSet<CFGAnalyses>(); 2160 PA.preserve<MemorySSAAnalysis>(); 2161 return PA; 2162 } 2163 2164 bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_, 2165 AliasAnalysis *AA_, AssumptionCache *AC_, 2166 DominatorTree *DT_, PostDominatorTree *PDT_, 2167 MemorySSA *MSSA_) { 2168 bool MadeChange = false; 2169 TLI = TLI_; 2170 AA = AA_; 2171 AC = AC_; 2172 DT = DT_; 2173 PDT = PDT_; 2174 MSSA = MSSA_; 2175 MemorySSAUpdater MSSAU_(MSSA_); 2176 MSSAU = &MSSAU_; 2177 EarliestEscapeAnalysis EEA_(*DT); 2178 EEA = &EEA_; 2179 2180 while (true) { 2181 if (!iterateOnFunction(F)) 2182 break; 2183 MadeChange = true; 2184 } 2185 2186 if (VerifyMemorySSA) 2187 MSSA_->verifyMemorySSA(); 2188 2189 return MadeChange; 2190 } 2191