1 //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" 10 #include "llvm/Analysis/ScalarEvolution.h" 11 #include "llvm/Analysis/TargetTransformInfo.h" 12 #include "llvm/IR/IRBuilder.h" 13 #include "llvm/IR/IntrinsicInst.h" 14 #include "llvm/IR/MDBuilder.h" 15 #include "llvm/Support/Debug.h" 16 #include "llvm/Support/MathExtras.h" 17 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 18 #include <optional> 19 20 #define DEBUG_TYPE "lower-mem-intrinsics" 21 22 using namespace llvm; 23 24 void llvm::createMemCpyLoopKnownSize( 25 Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, 26 ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, 27 bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, 28 std::optional<uint32_t> AtomicElementSize) { 29 // No need to expand zero length copies. 30 if (CopyLen->isZero()) 31 return; 32 33 BasicBlock *PreLoopBB = InsertBefore->getParent(); 34 BasicBlock *PostLoopBB = nullptr; 35 Function *ParentFunc = PreLoopBB->getParent(); 36 LLVMContext &Ctx = PreLoopBB->getContext(); 37 const DataLayout &DL = ParentFunc->getDataLayout(); 38 MDBuilder MDB(Ctx); 39 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); 40 StringRef Name = "MemCopyAliasScope"; 41 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 42 43 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); 44 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 45 46 Type *TypeOfCopyLen = CopyLen->getType(); 47 Type *LoopOpType = TTI.getMemcpyLoopLoweringType( 48 Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize); 49 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && 50 "Atomic memcpy lowering is not supported for vector operand type"); 51 52 Type *Int8Type = Type::getInt8Ty(Ctx); 53 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); 54 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && 55 "Atomic memcpy lowering is not supported for selected operand size"); 56 57 uint64_t LoopEndCount = alignDown(CopyLen->getZExtValue(), LoopOpSize); 58 59 if (LoopEndCount != 0) { 60 // Split 61 PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); 62 BasicBlock *LoopBB = 63 BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); 64 PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); 65 66 IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); 67 68 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); 69 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); 70 71 IRBuilder<> LoopBuilder(LoopBB); 72 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); 73 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); 74 // Loop Body 75 76 // If we used LoopOpType as GEP element type, we would iterate over the 77 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes, i.e., 78 // we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore, use 79 // byte offsets computed from the TypeStoreSize. 80 Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, LoopIndex); 81 LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, 82 PartSrcAlign, SrcIsVolatile); 83 if (!CanOverlap) { 84 // Set alias scope for loads. 85 Load->setMetadata(LLVMContext::MD_alias_scope, 86 MDNode::get(Ctx, NewScope)); 87 } 88 Value *DstGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LoopIndex); 89 StoreInst *Store = LoopBuilder.CreateAlignedStore( 90 Load, DstGEP, PartDstAlign, DstIsVolatile); 91 if (!CanOverlap) { 92 // Indicate that stores don't overlap loads. 93 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 94 } 95 if (AtomicElementSize) { 96 Load->setAtomic(AtomicOrdering::Unordered); 97 Store->setAtomic(AtomicOrdering::Unordered); 98 } 99 Value *NewIndex = LoopBuilder.CreateAdd( 100 LoopIndex, ConstantInt::get(TypeOfCopyLen, LoopOpSize)); 101 LoopIndex->addIncoming(NewIndex, LoopBB); 102 103 // Create the loop branch condition. 104 Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); 105 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), 106 LoopBB, PostLoopBB); 107 } 108 109 uint64_t BytesCopied = LoopEndCount; 110 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; 111 if (RemainingBytes) { 112 BasicBlock::iterator InsertIt = PostLoopBB ? PostLoopBB->getFirstNonPHIIt() 113 : InsertBefore->getIterator(); 114 IRBuilder<> RBuilder(InsertIt->getParent(), InsertIt); 115 116 SmallVector<Type *, 5> RemainingOps; 117 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, 118 SrcAS, DstAS, SrcAlign, DstAlign, 119 AtomicElementSize); 120 121 for (auto *OpTy : RemainingOps) { 122 Align PartSrcAlign(commonAlignment(SrcAlign, BytesCopied)); 123 Align PartDstAlign(commonAlignment(DstAlign, BytesCopied)); 124 125 unsigned OperandSize = DL.getTypeStoreSize(OpTy); 126 assert( 127 (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) && 128 "Atomic memcpy lowering is not supported for selected operand size"); 129 130 Value *SrcGEP = RBuilder.CreateInBoundsGEP( 131 Int8Type, SrcAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied)); 132 LoadInst *Load = 133 RBuilder.CreateAlignedLoad(OpTy, SrcGEP, PartSrcAlign, SrcIsVolatile); 134 if (!CanOverlap) { 135 // Set alias scope for loads. 136 Load->setMetadata(LLVMContext::MD_alias_scope, 137 MDNode::get(Ctx, NewScope)); 138 } 139 Value *DstGEP = RBuilder.CreateInBoundsGEP( 140 Int8Type, DstAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied)); 141 StoreInst *Store = RBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, 142 DstIsVolatile); 143 if (!CanOverlap) { 144 // Indicate that stores don't overlap loads. 145 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 146 } 147 if (AtomicElementSize) { 148 Load->setAtomic(AtomicOrdering::Unordered); 149 Store->setAtomic(AtomicOrdering::Unordered); 150 } 151 BytesCopied += OperandSize; 152 } 153 } 154 assert(BytesCopied == CopyLen->getZExtValue() && 155 "Bytes copied should match size in the call!"); 156 } 157 158 // \returns \p Len urem \p OpSize, checking for optimization opportunities. 159 static Value *getRuntimeLoopRemainder(const DataLayout &DL, IRBuilderBase &B, 160 Value *Len, Value *OpSize, 161 unsigned OpSizeVal) { 162 // For powers of 2, we can and by (OpSizeVal - 1) instead of using urem. 163 if (isPowerOf2_32(OpSizeVal)) 164 return B.CreateAnd(Len, OpSizeVal - 1); 165 return B.CreateURem(Len, OpSize); 166 } 167 168 // \returns (\p Len udiv \p OpSize) mul \p OpSize, checking for optimization 169 // opportunities. 170 // If RTLoopRemainder is provided, it must be the result of 171 // getRuntimeLoopRemainder() with the same arguments. 172 static Value *getRuntimeLoopBytes(const DataLayout &DL, IRBuilderBase &B, 173 Value *Len, Value *OpSize, unsigned OpSizeVal, 174 Value *RTLoopRemainder = nullptr) { 175 if (!RTLoopRemainder) 176 RTLoopRemainder = getRuntimeLoopRemainder(DL, B, Len, OpSize, OpSizeVal); 177 return B.CreateSub(Len, RTLoopRemainder); 178 } 179 180 void llvm::createMemCpyLoopUnknownSize( 181 Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, 182 Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, 183 bool CanOverlap, const TargetTransformInfo &TTI, 184 std::optional<uint32_t> AtomicElementSize) { 185 BasicBlock *PreLoopBB = InsertBefore->getParent(); 186 BasicBlock *PostLoopBB = 187 PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); 188 189 Function *ParentFunc = PreLoopBB->getParent(); 190 const DataLayout &DL = ParentFunc->getDataLayout(); 191 LLVMContext &Ctx = PreLoopBB->getContext(); 192 MDBuilder MDB(Ctx); 193 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("MemCopyDomain"); 194 StringRef Name = "MemCopyAliasScope"; 195 MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 196 197 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); 198 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 199 200 Type *LoopOpType = TTI.getMemcpyLoopLoweringType( 201 Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize); 202 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && 203 "Atomic memcpy lowering is not supported for vector operand type"); 204 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); 205 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && 206 "Atomic memcpy lowering is not supported for selected operand size"); 207 208 IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); 209 210 // Calculate the loop trip count, and remaining bytes to copy after the loop. 211 Type *CopyLenType = CopyLen->getType(); 212 IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); 213 assert(ILengthType && 214 "expected size argument to memcpy to be an integer type!"); 215 Type *Int8Type = Type::getInt8Ty(Ctx); 216 bool LoopOpIsInt8 = LoopOpType == Int8Type; 217 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); 218 219 Value *RuntimeLoopBytes = CopyLen; 220 Value *RuntimeResidualBytes = nullptr; 221 if (!LoopOpIsInt8) { 222 RuntimeResidualBytes = getRuntimeLoopRemainder(DL, PLBuilder, CopyLen, 223 CILoopOpSize, LoopOpSize); 224 RuntimeLoopBytes = getRuntimeLoopBytes(DL, PLBuilder, CopyLen, CILoopOpSize, 225 LoopOpSize, RuntimeResidualBytes); 226 } 227 228 BasicBlock *LoopBB = 229 BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, PostLoopBB); 230 IRBuilder<> LoopBuilder(LoopBB); 231 232 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); 233 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); 234 235 PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); 236 LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); 237 238 // If we used LoopOpType as GEP element type, we would iterate over the 239 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes, i.e., 240 // we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore, use byte 241 // offsets computed from the TypeStoreSize. 242 Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, LoopIndex); 243 LoadInst *Load = LoopBuilder.CreateAlignedLoad(LoopOpType, SrcGEP, 244 PartSrcAlign, SrcIsVolatile); 245 if (!CanOverlap) { 246 // Set alias scope for loads. 247 Load->setMetadata(LLVMContext::MD_alias_scope, MDNode::get(Ctx, NewScope)); 248 } 249 Value *DstGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LoopIndex); 250 StoreInst *Store = 251 LoopBuilder.CreateAlignedStore(Load, DstGEP, PartDstAlign, DstIsVolatile); 252 if (!CanOverlap) { 253 // Indicate that stores don't overlap loads. 254 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 255 } 256 if (AtomicElementSize) { 257 Load->setAtomic(AtomicOrdering::Unordered); 258 Store->setAtomic(AtomicOrdering::Unordered); 259 } 260 Value *NewIndex = LoopBuilder.CreateAdd( 261 LoopIndex, ConstantInt::get(CopyLenType, LoopOpSize)); 262 LoopIndex->addIncoming(NewIndex, LoopBB); 263 264 bool RequiresResidual = 265 !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize); 266 if (RequiresResidual) { 267 Type *ResLoopOpType = AtomicElementSize 268 ? Type::getIntNTy(Ctx, *AtomicElementSize * 8) 269 : Int8Type; 270 unsigned ResLoopOpSize = DL.getTypeStoreSize(ResLoopOpType); 271 assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) && 272 "Store size is expected to match type size"); 273 274 Align ResSrcAlign(commonAlignment(PartSrcAlign, ResLoopOpSize)); 275 Align ResDstAlign(commonAlignment(PartDstAlign, ResLoopOpSize)); 276 277 // Loop body for the residual copy. 278 BasicBlock *ResLoopBB = BasicBlock::Create( 279 Ctx, "loop-memcpy-residual", PreLoopBB->getParent(), PostLoopBB); 280 // Residual loop header. 281 BasicBlock *ResHeaderBB = BasicBlock::Create( 282 Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); 283 284 // Need to update the pre-loop basic block to branch to the correct place. 285 // branch to the main loop if the count is non-zero, branch to the residual 286 // loop if the copy size is smaller then 1 iteration of the main loop but 287 // non-zero and finally branch to after the residual loop if the memcpy 288 // size is zero. 289 ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); 290 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopBytes, Zero), 291 LoopBB, ResHeaderBB); 292 PreLoopBB->getTerminator()->eraseFromParent(); 293 294 LoopBuilder.CreateCondBr( 295 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopBytes), LoopBB, 296 ResHeaderBB); 297 298 // Determine if we need to branch to the residual loop or bypass it. 299 IRBuilder<> RHBuilder(ResHeaderBB); 300 RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidualBytes, Zero), 301 ResLoopBB, PostLoopBB); 302 303 // Copy the residual with single byte load/store loop. 304 IRBuilder<> ResBuilder(ResLoopBB); 305 PHINode *ResidualIndex = 306 ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); 307 ResidualIndex->addIncoming(Zero, ResHeaderBB); 308 309 Value *FullOffset = ResBuilder.CreateAdd(RuntimeLoopBytes, ResidualIndex); 310 Value *SrcGEP = ResBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, FullOffset); 311 LoadInst *Load = ResBuilder.CreateAlignedLoad(ResLoopOpType, SrcGEP, 312 ResSrcAlign, SrcIsVolatile); 313 if (!CanOverlap) { 314 // Set alias scope for loads. 315 Load->setMetadata(LLVMContext::MD_alias_scope, 316 MDNode::get(Ctx, NewScope)); 317 } 318 Value *DstGEP = ResBuilder.CreateInBoundsGEP(Int8Type, DstAddr, FullOffset); 319 StoreInst *Store = 320 ResBuilder.CreateAlignedStore(Load, DstGEP, ResDstAlign, DstIsVolatile); 321 if (!CanOverlap) { 322 // Indicate that stores don't overlap loads. 323 Store->setMetadata(LLVMContext::MD_noalias, MDNode::get(Ctx, NewScope)); 324 } 325 if (AtomicElementSize) { 326 Load->setAtomic(AtomicOrdering::Unordered); 327 Store->setAtomic(AtomicOrdering::Unordered); 328 } 329 Value *ResNewIndex = ResBuilder.CreateAdd( 330 ResidualIndex, ConstantInt::get(CopyLenType, ResLoopOpSize)); 331 ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); 332 333 // Create the loop branch condition. 334 ResBuilder.CreateCondBr( 335 ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidualBytes), ResLoopBB, 336 PostLoopBB); 337 } else { 338 // In this case the loop operand type was a byte, and there is no need for a 339 // residual loop to copy the remaining memory after the main loop. 340 // We do however need to patch up the control flow by creating the 341 // terminators for the preloop block and the memcpy loop. 342 ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); 343 PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopBytes, Zero), 344 LoopBB, PostLoopBB); 345 PreLoopBB->getTerminator()->eraseFromParent(); 346 LoopBuilder.CreateCondBr( 347 LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopBytes), LoopBB, 348 PostLoopBB); 349 } 350 } 351 352 // If \p Addr1 and \p Addr2 are pointers to different address spaces, create an 353 // addresspacecast to obtain a pair of pointers in the same addressspace. The 354 // caller needs to ensure that addrspacecasting is possible. 355 // No-op if the pointers are in the same address space. 356 static std::pair<Value *, Value *> 357 tryInsertCastToCommonAddrSpace(IRBuilderBase &B, Value *Addr1, Value *Addr2, 358 const TargetTransformInfo &TTI) { 359 Value *ResAddr1 = Addr1; 360 Value *ResAddr2 = Addr2; 361 362 unsigned AS1 = cast<PointerType>(Addr1->getType())->getAddressSpace(); 363 unsigned AS2 = cast<PointerType>(Addr2->getType())->getAddressSpace(); 364 if (AS1 != AS2) { 365 if (TTI.isValidAddrSpaceCast(AS2, AS1)) 366 ResAddr2 = B.CreateAddrSpaceCast(Addr2, Addr1->getType()); 367 else if (TTI.isValidAddrSpaceCast(AS1, AS2)) 368 ResAddr1 = B.CreateAddrSpaceCast(Addr1, Addr2->getType()); 369 else 370 llvm_unreachable("Can only lower memmove between address spaces if they " 371 "support addrspacecast"); 372 } 373 return {ResAddr1, ResAddr2}; 374 } 375 376 // Lower memmove to IR. memmove is required to correctly copy overlapping memory 377 // regions; therefore, it has to check the relative positions of the source and 378 // destination pointers and choose the copy direction accordingly. 379 // 380 // The code below is an IR rendition of this C function: 381 // 382 // void* memmove(void* dst, const void* src, size_t n) { 383 // unsigned char* d = dst; 384 // const unsigned char* s = src; 385 // if (s < d) { 386 // // copy backwards 387 // while (n--) { 388 // d[n] = s[n]; 389 // } 390 // } else { 391 // // copy forward 392 // for (size_t i = 0; i < n; ++i) { 393 // d[i] = s[i]; 394 // } 395 // } 396 // return dst; 397 // } 398 // 399 // If the TargetTransformInfo specifies a wider MemcpyLoopLoweringType, it is 400 // used for the memory accesses in the loops. Then, additional loops with 401 // byte-wise accesses are added for the remaining bytes. 402 static void createMemMoveLoopUnknownSize(Instruction *InsertBefore, 403 Value *SrcAddr, Value *DstAddr, 404 Value *CopyLen, Align SrcAlign, 405 Align DstAlign, bool SrcIsVolatile, 406 bool DstIsVolatile, 407 const TargetTransformInfo &TTI) { 408 Type *TypeOfCopyLen = CopyLen->getType(); 409 BasicBlock *OrigBB = InsertBefore->getParent(); 410 Function *F = OrigBB->getParent(); 411 const DataLayout &DL = F->getDataLayout(); 412 LLVMContext &Ctx = OrigBB->getContext(); 413 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); 414 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 415 416 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS, 417 SrcAlign, DstAlign); 418 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); 419 Type *Int8Type = Type::getInt8Ty(Ctx); 420 bool LoopOpIsInt8 = LoopOpType == Int8Type; 421 422 // If the memory accesses are wider than one byte, residual loops with 423 // i8-accesses are required to move remaining bytes. 424 bool RequiresResidual = !LoopOpIsInt8; 425 426 Type *ResidualLoopOpType = Int8Type; 427 unsigned ResidualLoopOpSize = DL.getTypeStoreSize(ResidualLoopOpType); 428 429 // Calculate the loop trip count and remaining bytes to copy after the loop. 430 IntegerType *ILengthType = cast<IntegerType>(TypeOfCopyLen); 431 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); 432 ConstantInt *CIResidualLoopOpSize = 433 ConstantInt::get(ILengthType, ResidualLoopOpSize); 434 ConstantInt *Zero = ConstantInt::get(ILengthType, 0); 435 436 IRBuilder<> PLBuilder(InsertBefore); 437 438 Value *RuntimeLoopBytes = CopyLen; 439 Value *RuntimeLoopRemainder = nullptr; 440 Value *SkipResidualCondition = nullptr; 441 if (RequiresResidual) { 442 RuntimeLoopRemainder = getRuntimeLoopRemainder(DL, PLBuilder, CopyLen, 443 CILoopOpSize, LoopOpSize); 444 RuntimeLoopBytes = getRuntimeLoopBytes(DL, PLBuilder, CopyLen, CILoopOpSize, 445 LoopOpSize, RuntimeLoopRemainder); 446 SkipResidualCondition = 447 PLBuilder.CreateICmpEQ(RuntimeLoopRemainder, Zero, "skip_residual"); 448 } 449 Value *SkipMainCondition = 450 PLBuilder.CreateICmpEQ(RuntimeLoopBytes, Zero, "skip_main"); 451 452 // Create the a comparison of src and dst, based on which we jump to either 453 // the forward-copy part of the function (if src >= dst) or the backwards-copy 454 // part (if src < dst). 455 // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else 456 // structure. Its block terminators (unconditional branches) are replaced by 457 // the appropriate conditional branches when the loop is built. 458 // If the pointers are in different address spaces, they need to be converted 459 // to a compatible one. Cases where memory ranges in the different address 460 // spaces cannot overlap are lowered as memcpy and not handled here. 461 auto [CmpSrcAddr, CmpDstAddr] = 462 tryInsertCastToCommonAddrSpace(PLBuilder, SrcAddr, DstAddr, TTI); 463 Value *PtrCompare = 464 PLBuilder.CreateICmpULT(CmpSrcAddr, CmpDstAddr, "compare_src_dst"); 465 Instruction *ThenTerm, *ElseTerm; 466 SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore->getIterator(), 467 &ThenTerm, &ElseTerm); 468 469 // If the LoopOpSize is greater than 1, each part of the function consists of 470 // four blocks: 471 // memmove_copy_backwards: 472 // skip the residual loop when 0 iterations are required 473 // memmove_bwd_residual_loop: 474 // copy the last few bytes individually so that the remaining length is 475 // a multiple of the LoopOpSize 476 // memmove_bwd_middle: skip the main loop when 0 iterations are required 477 // memmove_bwd_main_loop: the actual backwards loop BB with wide accesses 478 // memmove_copy_forward: skip the main loop when 0 iterations are required 479 // memmove_fwd_main_loop: the actual forward loop BB with wide accesses 480 // memmove_fwd_middle: skip the residual loop when 0 iterations are required 481 // memmove_fwd_residual_loop: copy the last few bytes individually 482 // 483 // The main and residual loop are switched between copying forward and 484 // backward so that the residual loop always operates on the end of the moved 485 // range. This is based on the assumption that buffers whose start is aligned 486 // with the LoopOpSize are more common than buffers whose end is. 487 // 488 // If the LoopOpSize is 1, each part of the function consists of two blocks: 489 // memmove_copy_backwards: skip the loop when 0 iterations are required 490 // memmove_bwd_main_loop: the actual backwards loop BB 491 // memmove_copy_forward: skip the loop when 0 iterations are required 492 // memmove_fwd_main_loop: the actual forward loop BB 493 BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); 494 CopyBackwardsBB->setName("memmove_copy_backwards"); 495 BasicBlock *CopyForwardBB = ElseTerm->getParent(); 496 CopyForwardBB->setName("memmove_copy_forward"); 497 BasicBlock *ExitBB = InsertBefore->getParent(); 498 ExitBB->setName("memmove_done"); 499 500 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); 501 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); 502 503 // Accesses in the residual loops do not share the same alignment as those in 504 // the main loops. 505 Align ResidualSrcAlign(commonAlignment(PartSrcAlign, ResidualLoopOpSize)); 506 Align ResidualDstAlign(commonAlignment(PartDstAlign, ResidualLoopOpSize)); 507 508 // Copying backwards. 509 { 510 BasicBlock *MainLoopBB = BasicBlock::Create( 511 F->getContext(), "memmove_bwd_main_loop", F, CopyForwardBB); 512 513 // The predecessor of the memmove_bwd_main_loop. Updated in the 514 // following if a residual loop is emitted first. 515 BasicBlock *PredBB = CopyBackwardsBB; 516 517 if (RequiresResidual) { 518 // backwards residual loop 519 BasicBlock *ResidualLoopBB = BasicBlock::Create( 520 F->getContext(), "memmove_bwd_residual_loop", F, MainLoopBB); 521 IRBuilder<> ResidualLoopBuilder(ResidualLoopBB); 522 PHINode *ResidualLoopPhi = ResidualLoopBuilder.CreatePHI(ILengthType, 0); 523 Value *ResidualIndex = ResidualLoopBuilder.CreateSub( 524 ResidualLoopPhi, CIResidualLoopOpSize, "bwd_residual_index"); 525 // If we used LoopOpType as GEP element type, we would iterate over the 526 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes, 527 // i.e., we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore, 528 // use byte offsets computed from the TypeStoreSize. 529 Value *LoadGEP = ResidualLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, 530 ResidualIndex); 531 Value *Element = ResidualLoopBuilder.CreateAlignedLoad( 532 ResidualLoopOpType, LoadGEP, ResidualSrcAlign, SrcIsVolatile, 533 "element"); 534 Value *StoreGEP = ResidualLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, 535 ResidualIndex); 536 ResidualLoopBuilder.CreateAlignedStore(Element, StoreGEP, 537 ResidualDstAlign, DstIsVolatile); 538 539 // After the residual loop, go to an intermediate block. 540 BasicBlock *IntermediateBB = BasicBlock::Create( 541 F->getContext(), "memmove_bwd_middle", F, MainLoopBB); 542 // Later code expects a terminator in the PredBB. 543 IRBuilder<> IntermediateBuilder(IntermediateBB); 544 IntermediateBuilder.CreateUnreachable(); 545 ResidualLoopBuilder.CreateCondBr( 546 ResidualLoopBuilder.CreateICmpEQ(ResidualIndex, RuntimeLoopBytes), 547 IntermediateBB, ResidualLoopBB); 548 549 ResidualLoopPhi->addIncoming(ResidualIndex, ResidualLoopBB); 550 ResidualLoopPhi->addIncoming(CopyLen, CopyBackwardsBB); 551 552 // How to get to the residual: 553 BranchInst::Create(IntermediateBB, ResidualLoopBB, SkipResidualCondition, 554 ThenTerm->getIterator()); 555 ThenTerm->eraseFromParent(); 556 557 PredBB = IntermediateBB; 558 } 559 560 // main loop 561 IRBuilder<> MainLoopBuilder(MainLoopBB); 562 PHINode *MainLoopPhi = MainLoopBuilder.CreatePHI(ILengthType, 0); 563 Value *MainIndex = 564 MainLoopBuilder.CreateSub(MainLoopPhi, CILoopOpSize, "bwd_main_index"); 565 Value *LoadGEP = 566 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, MainIndex); 567 Value *Element = MainLoopBuilder.CreateAlignedLoad( 568 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile, "element"); 569 Value *StoreGEP = 570 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, MainIndex); 571 MainLoopBuilder.CreateAlignedStore(Element, StoreGEP, PartDstAlign, 572 DstIsVolatile); 573 MainLoopBuilder.CreateCondBr(MainLoopBuilder.CreateICmpEQ(MainIndex, Zero), 574 ExitBB, MainLoopBB); 575 MainLoopPhi->addIncoming(MainIndex, MainLoopBB); 576 MainLoopPhi->addIncoming(RuntimeLoopBytes, PredBB); 577 578 // How to get to the main loop: 579 Instruction *PredBBTerm = PredBB->getTerminator(); 580 BranchInst::Create(ExitBB, MainLoopBB, SkipMainCondition, 581 PredBBTerm->getIterator()); 582 PredBBTerm->eraseFromParent(); 583 } 584 585 // Copying forward. 586 // main loop 587 { 588 BasicBlock *MainLoopBB = 589 BasicBlock::Create(F->getContext(), "memmove_fwd_main_loop", F, ExitBB); 590 IRBuilder<> MainLoopBuilder(MainLoopBB); 591 PHINode *MainLoopPhi = 592 MainLoopBuilder.CreatePHI(ILengthType, 0, "fwd_main_index"); 593 Value *LoadGEP = 594 MainLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, MainLoopPhi); 595 Value *Element = MainLoopBuilder.CreateAlignedLoad( 596 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile, "element"); 597 Value *StoreGEP = 598 MainLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, MainLoopPhi); 599 MainLoopBuilder.CreateAlignedStore(Element, StoreGEP, PartDstAlign, 600 DstIsVolatile); 601 Value *MainIndex = MainLoopBuilder.CreateAdd(MainLoopPhi, CILoopOpSize); 602 MainLoopPhi->addIncoming(MainIndex, MainLoopBB); 603 MainLoopPhi->addIncoming(Zero, CopyForwardBB); 604 605 Instruction *CopyFwdBBTerm = CopyForwardBB->getTerminator(); 606 BasicBlock *SuccessorBB = ExitBB; 607 if (RequiresResidual) 608 SuccessorBB = 609 BasicBlock::Create(F->getContext(), "memmove_fwd_middle", F, ExitBB); 610 611 // leaving or staying in the main loop 612 MainLoopBuilder.CreateCondBr( 613 MainLoopBuilder.CreateICmpEQ(MainIndex, RuntimeLoopBytes), SuccessorBB, 614 MainLoopBB); 615 616 // getting in or skipping the main loop 617 BranchInst::Create(SuccessorBB, MainLoopBB, SkipMainCondition, 618 CopyFwdBBTerm->getIterator()); 619 CopyFwdBBTerm->eraseFromParent(); 620 621 if (RequiresResidual) { 622 BasicBlock *IntermediateBB = SuccessorBB; 623 IRBuilder<> IntermediateBuilder(IntermediateBB); 624 BasicBlock *ResidualLoopBB = BasicBlock::Create( 625 F->getContext(), "memmove_fwd_residual_loop", F, ExitBB); 626 IntermediateBuilder.CreateCondBr(SkipResidualCondition, ExitBB, 627 ResidualLoopBB); 628 629 // Residual loop 630 IRBuilder<> ResidualLoopBuilder(ResidualLoopBB); 631 PHINode *ResidualLoopPhi = 632 ResidualLoopBuilder.CreatePHI(ILengthType, 0, "fwd_residual_index"); 633 Value *LoadGEP = ResidualLoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, 634 ResidualLoopPhi); 635 Value *Element = ResidualLoopBuilder.CreateAlignedLoad( 636 ResidualLoopOpType, LoadGEP, ResidualSrcAlign, SrcIsVolatile, 637 "element"); 638 Value *StoreGEP = ResidualLoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, 639 ResidualLoopPhi); 640 ResidualLoopBuilder.CreateAlignedStore(Element, StoreGEP, 641 ResidualDstAlign, DstIsVolatile); 642 Value *ResidualIndex = 643 ResidualLoopBuilder.CreateAdd(ResidualLoopPhi, CIResidualLoopOpSize); 644 ResidualLoopBuilder.CreateCondBr( 645 ResidualLoopBuilder.CreateICmpEQ(ResidualIndex, CopyLen), ExitBB, 646 ResidualLoopBB); 647 ResidualLoopPhi->addIncoming(ResidualIndex, ResidualLoopBB); 648 ResidualLoopPhi->addIncoming(RuntimeLoopBytes, IntermediateBB); 649 } 650 } 651 } 652 653 // Similar to createMemMoveLoopUnknownSize, only the trip counts are computed at 654 // compile time, obsolete loops and branches are omitted, and the residual code 655 // is straight-line code instead of a loop. 656 static void createMemMoveLoopKnownSize(Instruction *InsertBefore, 657 Value *SrcAddr, Value *DstAddr, 658 ConstantInt *CopyLen, Align SrcAlign, 659 Align DstAlign, bool SrcIsVolatile, 660 bool DstIsVolatile, 661 const TargetTransformInfo &TTI) { 662 // No need to expand zero length moves. 663 if (CopyLen->isZero()) 664 return; 665 666 Type *TypeOfCopyLen = CopyLen->getType(); 667 BasicBlock *OrigBB = InsertBefore->getParent(); 668 Function *F = OrigBB->getParent(); 669 const DataLayout &DL = F->getDataLayout(); 670 LLVMContext &Ctx = OrigBB->getContext(); 671 unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); 672 unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); 673 674 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS, 675 SrcAlign, DstAlign); 676 unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType); 677 Type *Int8Type = Type::getInt8Ty(Ctx); 678 679 // Calculate the loop trip count and remaining bytes to copy after the loop. 680 uint64_t BytesCopiedInLoop = alignDown(CopyLen->getZExtValue(), LoopOpSize); 681 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopiedInLoop; 682 683 IntegerType *ILengthType = cast<IntegerType>(TypeOfCopyLen); 684 ConstantInt *Zero = ConstantInt::get(ILengthType, 0); 685 ConstantInt *LoopBound = ConstantInt::get(ILengthType, BytesCopiedInLoop); 686 ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); 687 688 IRBuilder<> PLBuilder(InsertBefore); 689 690 auto [CmpSrcAddr, CmpDstAddr] = 691 tryInsertCastToCommonAddrSpace(PLBuilder, SrcAddr, DstAddr, TTI); 692 Value *PtrCompare = 693 PLBuilder.CreateICmpULT(CmpSrcAddr, CmpDstAddr, "compare_src_dst"); 694 Instruction *ThenTerm, *ElseTerm; 695 SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore->getIterator(), 696 &ThenTerm, &ElseTerm); 697 698 BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); 699 BasicBlock *CopyForwardBB = ElseTerm->getParent(); 700 BasicBlock *ExitBB = InsertBefore->getParent(); 701 ExitBB->setName("memmove_done"); 702 703 Align PartSrcAlign(commonAlignment(SrcAlign, LoopOpSize)); 704 Align PartDstAlign(commonAlignment(DstAlign, LoopOpSize)); 705 706 // Helper function to generate a load/store pair of a given type in the 707 // residual. Used in the forward and backward branches. 708 auto GenerateResidualLdStPair = [&](Type *OpTy, IRBuilderBase &Builder, 709 uint64_t &BytesCopied) { 710 Align ResSrcAlign(commonAlignment(SrcAlign, BytesCopied)); 711 Align ResDstAlign(commonAlignment(DstAlign, BytesCopied)); 712 713 unsigned OperandSize = DL.getTypeStoreSize(OpTy); 714 715 // If we used LoopOpType as GEP element type, we would iterate over the 716 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes, i.e., 717 // we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore, use 718 // byte offsets computed from the TypeStoreSize. 719 Value *SrcGEP = Builder.CreateInBoundsGEP( 720 Int8Type, SrcAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied)); 721 LoadInst *Load = 722 Builder.CreateAlignedLoad(OpTy, SrcGEP, ResSrcAlign, SrcIsVolatile); 723 Value *DstGEP = Builder.CreateInBoundsGEP( 724 Int8Type, DstAddr, ConstantInt::get(TypeOfCopyLen, BytesCopied)); 725 Builder.CreateAlignedStore(Load, DstGEP, ResDstAlign, DstIsVolatile); 726 BytesCopied += OperandSize; 727 }; 728 729 // Copying backwards. 730 if (RemainingBytes != 0) { 731 CopyBackwardsBB->setName("memmove_bwd_residual"); 732 uint64_t BytesCopied = BytesCopiedInLoop; 733 734 // Residual code is required to move the remaining bytes. We need the same 735 // instructions as in the forward case, only in reverse. So we generate code 736 // the same way, except that we change the IRBuilder insert point for each 737 // load/store pair so that each one is inserted before the previous one 738 // instead of after it. 739 IRBuilder<> BwdResBuilder(CopyBackwardsBB, 740 CopyBackwardsBB->getFirstNonPHIIt()); 741 SmallVector<Type *, 5> RemainingOps; 742 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, 743 SrcAS, DstAS, PartSrcAlign, 744 PartDstAlign); 745 for (auto *OpTy : RemainingOps) { 746 // reverse the order of the emitted operations 747 BwdResBuilder.SetInsertPoint(CopyBackwardsBB, 748 CopyBackwardsBB->getFirstNonPHIIt()); 749 GenerateResidualLdStPair(OpTy, BwdResBuilder, BytesCopied); 750 } 751 } 752 if (BytesCopiedInLoop != 0) { 753 BasicBlock *LoopBB = CopyBackwardsBB; 754 BasicBlock *PredBB = OrigBB; 755 if (RemainingBytes != 0) { 756 // if we introduce residual code, it needs its separate BB 757 LoopBB = CopyBackwardsBB->splitBasicBlock( 758 CopyBackwardsBB->getTerminator(), "memmove_bwd_loop"); 759 PredBB = CopyBackwardsBB; 760 } else { 761 CopyBackwardsBB->setName("memmove_bwd_loop"); 762 } 763 IRBuilder<> LoopBuilder(LoopBB->getTerminator()); 764 PHINode *LoopPhi = LoopBuilder.CreatePHI(ILengthType, 0); 765 Value *Index = LoopBuilder.CreateSub(LoopPhi, CILoopOpSize, "bwd_index"); 766 Value *LoadGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, Index); 767 Value *Element = LoopBuilder.CreateAlignedLoad( 768 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile, "element"); 769 Value *StoreGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, Index); 770 LoopBuilder.CreateAlignedStore(Element, StoreGEP, PartDstAlign, 771 DstIsVolatile); 772 773 // Replace the unconditional branch introduced by 774 // SplitBlockAndInsertIfThenElse to turn LoopBB into a loop. 775 Instruction *UncondTerm = LoopBB->getTerminator(); 776 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpEQ(Index, Zero), ExitBB, 777 LoopBB); 778 UncondTerm->eraseFromParent(); 779 780 LoopPhi->addIncoming(Index, LoopBB); 781 LoopPhi->addIncoming(LoopBound, PredBB); 782 } 783 784 // Copying forward. 785 BasicBlock *FwdResidualBB = CopyForwardBB; 786 if (BytesCopiedInLoop != 0) { 787 CopyForwardBB->setName("memmove_fwd_loop"); 788 BasicBlock *LoopBB = CopyForwardBB; 789 BasicBlock *SuccBB = ExitBB; 790 if (RemainingBytes != 0) { 791 // if we introduce residual code, it needs its separate BB 792 SuccBB = CopyForwardBB->splitBasicBlock(CopyForwardBB->getTerminator(), 793 "memmove_fwd_residual"); 794 FwdResidualBB = SuccBB; 795 } 796 IRBuilder<> LoopBuilder(LoopBB->getTerminator()); 797 PHINode *LoopPhi = LoopBuilder.CreatePHI(ILengthType, 0, "fwd_index"); 798 Value *LoadGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, SrcAddr, LoopPhi); 799 Value *Element = LoopBuilder.CreateAlignedLoad( 800 LoopOpType, LoadGEP, PartSrcAlign, SrcIsVolatile, "element"); 801 Value *StoreGEP = LoopBuilder.CreateInBoundsGEP(Int8Type, DstAddr, LoopPhi); 802 LoopBuilder.CreateAlignedStore(Element, StoreGEP, PartDstAlign, 803 DstIsVolatile); 804 Value *Index = LoopBuilder.CreateAdd(LoopPhi, CILoopOpSize); 805 LoopPhi->addIncoming(Index, LoopBB); 806 LoopPhi->addIncoming(Zero, OrigBB); 807 808 // Replace the unconditional branch to turn LoopBB into a loop. 809 Instruction *UncondTerm = LoopBB->getTerminator(); 810 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpEQ(Index, LoopBound), SuccBB, 811 LoopBB); 812 UncondTerm->eraseFromParent(); 813 } 814 815 if (RemainingBytes != 0) { 816 uint64_t BytesCopied = BytesCopiedInLoop; 817 818 // Residual code is required to move the remaining bytes. In the forward 819 // case, we emit it in the normal order. 820 IRBuilder<> FwdResBuilder(FwdResidualBB->getTerminator()); 821 SmallVector<Type *, 5> RemainingOps; 822 TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, 823 SrcAS, DstAS, PartSrcAlign, 824 PartDstAlign); 825 for (auto *OpTy : RemainingOps) 826 GenerateResidualLdStPair(OpTy, FwdResBuilder, BytesCopied); 827 } 828 } 829 830 static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, 831 Value *CopyLen, Value *SetValue, Align DstAlign, 832 bool IsVolatile) { 833 Type *TypeOfCopyLen = CopyLen->getType(); 834 BasicBlock *OrigBB = InsertBefore->getParent(); 835 Function *F = OrigBB->getParent(); 836 const DataLayout &DL = F->getDataLayout(); 837 BasicBlock *NewBB = 838 OrigBB->splitBasicBlock(InsertBefore, "split"); 839 BasicBlock *LoopBB 840 = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB); 841 842 IRBuilder<> Builder(OrigBB->getTerminator()); 843 844 Builder.CreateCondBr( 845 Builder.CreateICmpEQ(ConstantInt::get(TypeOfCopyLen, 0), CopyLen), NewBB, 846 LoopBB); 847 OrigBB->getTerminator()->eraseFromParent(); 848 849 unsigned PartSize = DL.getTypeStoreSize(SetValue->getType()); 850 Align PartAlign(commonAlignment(DstAlign, PartSize)); 851 852 IRBuilder<> LoopBuilder(LoopBB); 853 PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0); 854 LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB); 855 856 LoopBuilder.CreateAlignedStore( 857 SetValue, 858 LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), 859 PartAlign, IsVolatile); 860 861 Value *NewIndex = 862 LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1)); 863 LoopIndex->addIncoming(NewIndex, LoopBB); 864 865 LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB, 866 NewBB); 867 } 868 869 template <typename T> 870 static bool canOverlap(MemTransferBase<T> *Memcpy, ScalarEvolution *SE) { 871 if (SE) { 872 const SCEV *SrcSCEV = SE->getSCEV(Memcpy->getRawSource()); 873 const SCEV *DestSCEV = SE->getSCEV(Memcpy->getRawDest()); 874 if (SE->isKnownPredicateAt(CmpInst::ICMP_NE, SrcSCEV, DestSCEV, Memcpy)) 875 return false; 876 } 877 return true; 878 } 879 880 void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, 881 const TargetTransformInfo &TTI, 882 ScalarEvolution *SE) { 883 bool CanOverlap = canOverlap(Memcpy, SE); 884 if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { 885 createMemCpyLoopKnownSize( 886 /* InsertBefore */ Memcpy, 887 /* SrcAddr */ Memcpy->getRawSource(), 888 /* DstAddr */ Memcpy->getRawDest(), 889 /* CopyLen */ CI, 890 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), 891 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), 892 /* SrcIsVolatile */ Memcpy->isVolatile(), 893 /* DstIsVolatile */ Memcpy->isVolatile(), 894 /* CanOverlap */ CanOverlap, 895 /* TargetTransformInfo */ TTI); 896 } else { 897 createMemCpyLoopUnknownSize( 898 /* InsertBefore */ Memcpy, 899 /* SrcAddr */ Memcpy->getRawSource(), 900 /* DstAddr */ Memcpy->getRawDest(), 901 /* CopyLen */ Memcpy->getLength(), 902 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), 903 /* DestAlign */ Memcpy->getDestAlign().valueOrOne(), 904 /* SrcIsVolatile */ Memcpy->isVolatile(), 905 /* DstIsVolatile */ Memcpy->isVolatile(), 906 /* CanOverlap */ CanOverlap, 907 /* TargetTransformInfo */ TTI); 908 } 909 } 910 911 bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove, 912 const TargetTransformInfo &TTI) { 913 Value *CopyLen = Memmove->getLength(); 914 Value *SrcAddr = Memmove->getRawSource(); 915 Value *DstAddr = Memmove->getRawDest(); 916 Align SrcAlign = Memmove->getSourceAlign().valueOrOne(); 917 Align DstAlign = Memmove->getDestAlign().valueOrOne(); 918 bool SrcIsVolatile = Memmove->isVolatile(); 919 bool DstIsVolatile = SrcIsVolatile; 920 IRBuilder<> CastBuilder(Memmove); 921 922 unsigned SrcAS = SrcAddr->getType()->getPointerAddressSpace(); 923 unsigned DstAS = DstAddr->getType()->getPointerAddressSpace(); 924 if (SrcAS != DstAS) { 925 if (!TTI.addrspacesMayAlias(SrcAS, DstAS)) { 926 // We may not be able to emit a pointer comparison, but we don't have 927 // to. Expand as memcpy. 928 if (ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) { 929 createMemCpyLoopKnownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr, 930 CI, SrcAlign, DstAlign, SrcIsVolatile, 931 DstIsVolatile, 932 /*CanOverlap=*/false, TTI); 933 } else { 934 createMemCpyLoopUnknownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr, 935 CopyLen, SrcAlign, DstAlign, SrcIsVolatile, 936 DstIsVolatile, 937 /*CanOverlap=*/false, TTI); 938 } 939 940 return true; 941 } 942 943 if (!(TTI.isValidAddrSpaceCast(DstAS, SrcAS) || 944 TTI.isValidAddrSpaceCast(SrcAS, DstAS))) { 945 // We don't know generically if it's legal to introduce an 946 // addrspacecast. We need to know either if it's legal to insert an 947 // addrspacecast, or if the address spaces cannot alias. 948 LLVM_DEBUG( 949 dbgs() << "Do not know how to expand memmove between different " 950 "address spaces\n"); 951 return false; 952 } 953 } 954 955 if (ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) { 956 createMemMoveLoopKnownSize( 957 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CI, SrcAlign, DstAlign, 958 SrcIsVolatile, DstIsVolatile, TTI); 959 } else { 960 createMemMoveLoopUnknownSize( 961 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign, 962 SrcIsVolatile, DstIsVolatile, TTI); 963 } 964 return true; 965 } 966 967 void llvm::expandMemSetAsLoop(MemSetInst *Memset) { 968 createMemSetLoop(/* InsertBefore */ Memset, 969 /* DstAddr */ Memset->getRawDest(), 970 /* CopyLen */ Memset->getLength(), 971 /* SetValue */ Memset->getValue(), 972 /* Alignment */ Memset->getDestAlign().valueOrOne(), 973 Memset->isVolatile()); 974 } 975 976 void llvm::expandMemSetPatternAsLoop(MemSetPatternInst *Memset) { 977 createMemSetLoop(/* InsertBefore=*/Memset, 978 /* DstAddr=*/Memset->getRawDest(), 979 /* CopyLen=*/Memset->getLength(), 980 /* SetValue=*/Memset->getValue(), 981 /* Alignment=*/Memset->getDestAlign().valueOrOne(), 982 Memset->isVolatile()); 983 } 984 985 void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy, 986 const TargetTransformInfo &TTI, 987 ScalarEvolution *SE) { 988 if (ConstantInt *CI = dyn_cast<ConstantInt>(AtomicMemcpy->getLength())) { 989 createMemCpyLoopKnownSize( 990 /* InsertBefore */ AtomicMemcpy, 991 /* SrcAddr */ AtomicMemcpy->getRawSource(), 992 /* DstAddr */ AtomicMemcpy->getRawDest(), 993 /* CopyLen */ CI, 994 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), 995 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), 996 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), 997 /* DstIsVolatile */ AtomicMemcpy->isVolatile(), 998 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. 999 /* TargetTransformInfo */ TTI, 1000 /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); 1001 } else { 1002 createMemCpyLoopUnknownSize( 1003 /* InsertBefore */ AtomicMemcpy, 1004 /* SrcAddr */ AtomicMemcpy->getRawSource(), 1005 /* DstAddr */ AtomicMemcpy->getRawDest(), 1006 /* CopyLen */ AtomicMemcpy->getLength(), 1007 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), 1008 /* DestAlign */ AtomicMemcpy->getDestAlign().valueOrOne(), 1009 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), 1010 /* DstIsVolatile */ AtomicMemcpy->isVolatile(), 1011 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. 1012 /* TargetTransformInfo */ TTI, 1013 /* AtomicCpySize */ AtomicMemcpy->getElementSizeInBytes()); 1014 } 1015 } 1016