1 #include "llvm/Transforms/Utils/VNCoercion.h" 2 #include "llvm/Analysis/ConstantFolding.h" 3 #include "llvm/Analysis/ValueTracking.h" 4 #include "llvm/IR/IRBuilder.h" 5 #include "llvm/IR/IntrinsicInst.h" 6 7 #define DEBUG_TYPE "vncoerce" 8 9 namespace llvm { 10 namespace VNCoercion { 11 12 static bool isFirstClassAggregateOrScalableType(Type *Ty) { 13 return Ty->isStructTy() || Ty->isArrayTy() || isa<ScalableVectorType>(Ty); 14 } 15 16 /// Return true if coerceAvailableValueToLoadType will succeed. 17 bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, 18 const DataLayout &DL) { 19 Type *StoredTy = StoredVal->getType(); 20 21 if (StoredTy == LoadTy) 22 return true; 23 24 if (isa<ScalableVectorType>(StoredTy) && isa<ScalableVectorType>(LoadTy) && 25 DL.getTypeSizeInBits(StoredTy) == DL.getTypeSizeInBits(LoadTy)) 26 return true; 27 28 // If the loaded/stored value is a first class array/struct, or scalable type, 29 // don't try to transform them. We need to be able to bitcast to integer. 30 if (isFirstClassAggregateOrScalableType(LoadTy) || 31 isFirstClassAggregateOrScalableType(StoredTy)) 32 return false; 33 34 uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy).getFixedValue(); 35 36 // The store size must be byte-aligned to support future type casts. 37 if (llvm::alignTo(StoreSize, 8) != StoreSize) 38 return false; 39 40 // The store has to be at least as big as the load. 41 if (StoreSize < DL.getTypeSizeInBits(LoadTy).getFixedValue()) 42 return false; 43 44 bool StoredNI = DL.isNonIntegralPointerType(StoredTy->getScalarType()); 45 bool LoadNI = DL.isNonIntegralPointerType(LoadTy->getScalarType()); 46 // Don't coerce non-integral pointers to integers or vice versa. 47 if (StoredNI != LoadNI) { 48 // As a special case, allow coercion of memset used to initialize 49 // an array w/null. Despite non-integral pointers not generally having a 50 // specific bit pattern, we do assume null is zero. 51 if (auto *CI = dyn_cast<Constant>(StoredVal)) 52 return CI->isNullValue(); 53 return false; 54 } else if (StoredNI && LoadNI && 55 StoredTy->getPointerAddressSpace() != 56 LoadTy->getPointerAddressSpace()) { 57 return false; 58 } 59 60 61 // The implementation below uses inttoptr for vectors of unequal size; we 62 // can't allow this for non integral pointers. We could teach it to extract 63 // exact subvectors if desired. 64 if (StoredNI && StoreSize != DL.getTypeSizeInBits(LoadTy).getFixedValue()) 65 return false; 66 67 if (StoredTy->isTargetExtTy() || LoadTy->isTargetExtTy()) 68 return false; 69 70 return true; 71 } 72 73 /// If we saw a store of a value to memory, and 74 /// then a load from a must-aliased pointer of a different type, try to coerce 75 /// the stored value. LoadedTy is the type of the load we want to replace. 76 /// IRB is IRBuilder used to insert new instructions. 77 /// 78 /// If we can't do it, return null. 79 Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy, 80 IRBuilderBase &Helper, 81 const DataLayout &DL) { 82 assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) && 83 "precondition violation - materialization can't fail"); 84 if (auto *C = dyn_cast<Constant>(StoredVal)) 85 StoredVal = ConstantFoldConstant(C, DL); 86 87 // If this is already the right type, just return it. 88 Type *StoredValTy = StoredVal->getType(); 89 90 TypeSize StoredValSize = DL.getTypeSizeInBits(StoredValTy); 91 TypeSize LoadedValSize = DL.getTypeSizeInBits(LoadedTy); 92 93 // If the store and reload are the same size, we can always reuse it. 94 if (StoredValSize == LoadedValSize) { 95 // Pointer to Pointer -> use bitcast. 96 if (StoredValTy->isPtrOrPtrVectorTy() && LoadedTy->isPtrOrPtrVectorTy()) { 97 StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy); 98 } else { 99 // Convert source pointers to integers, which can be bitcast. 100 if (StoredValTy->isPtrOrPtrVectorTy()) { 101 StoredValTy = DL.getIntPtrType(StoredValTy); 102 StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); 103 } 104 105 Type *TypeToCastTo = LoadedTy; 106 if (TypeToCastTo->isPtrOrPtrVectorTy()) 107 TypeToCastTo = DL.getIntPtrType(TypeToCastTo); 108 109 if (StoredValTy != TypeToCastTo) 110 StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo); 111 112 // Cast to pointer if the load needs a pointer type. 113 if (LoadedTy->isPtrOrPtrVectorTy()) 114 StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); 115 } 116 117 if (auto *C = dyn_cast<ConstantExpr>(StoredVal)) 118 StoredVal = ConstantFoldConstant(C, DL); 119 120 return StoredVal; 121 } 122 // If the loaded value is smaller than the available value, then we can 123 // extract out a piece from it. If the available value is too small, then we 124 // can't do anything. 125 assert(!StoredValSize.isScalable() && 126 TypeSize::isKnownGE(StoredValSize, LoadedValSize) && 127 "canCoerceMustAliasedValueToLoad fail"); 128 129 // Convert source pointers to integers, which can be manipulated. 130 if (StoredValTy->isPtrOrPtrVectorTy()) { 131 StoredValTy = DL.getIntPtrType(StoredValTy); 132 StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); 133 } 134 135 // Convert vectors and fp to integer, which can be manipulated. 136 if (!StoredValTy->isIntegerTy()) { 137 StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize); 138 StoredVal = Helper.CreateBitCast(StoredVal, StoredValTy); 139 } 140 141 // If this is a big-endian system, we need to shift the value down to the low 142 // bits so that a truncate will work. 143 if (DL.isBigEndian()) { 144 uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy).getFixedValue() - 145 DL.getTypeStoreSizeInBits(LoadedTy).getFixedValue(); 146 StoredVal = Helper.CreateLShr( 147 StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt)); 148 } 149 150 // Truncate the integer to the right size now. 151 Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize); 152 StoredVal = Helper.CreateTruncOrBitCast(StoredVal, NewIntTy); 153 154 if (LoadedTy != NewIntTy) { 155 // If the result is a pointer, inttoptr. 156 if (LoadedTy->isPtrOrPtrVectorTy()) 157 StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); 158 else 159 // Otherwise, bitcast. 160 StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy); 161 } 162 163 if (auto *C = dyn_cast<Constant>(StoredVal)) 164 StoredVal = ConstantFoldConstant(C, DL); 165 166 return StoredVal; 167 } 168 169 /// This function is called when we have a memdep query of a load that ends up 170 /// being a clobbering memory write (store, memset, memcpy, memmove). This 171 /// means that the write *may* provide bits used by the load but we can't be 172 /// sure because the pointers don't must-alias. 173 /// 174 /// Check this case to see if there is anything more we can do before we give 175 /// up. This returns -1 if we have to give up, or a byte number in the stored 176 /// value of the piece that feeds the load. 177 static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr, 178 Value *WritePtr, 179 uint64_t WriteSizeInBits, 180 const DataLayout &DL) { 181 // If the loaded/stored value is a first class array/struct, or scalable type, 182 // don't try to transform them. We need to be able to bitcast to integer. 183 if (isFirstClassAggregateOrScalableType(LoadTy)) 184 return -1; 185 186 int64_t StoreOffset = 0, LoadOffset = 0; 187 Value *StoreBase = 188 GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL); 189 Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL); 190 if (StoreBase != LoadBase) 191 return -1; 192 193 uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedValue(); 194 195 if ((WriteSizeInBits & 7) | (LoadSize & 7)) 196 return -1; 197 uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes. 198 LoadSize /= 8; 199 200 // If the Load isn't completely contained within the stored bits, we don't 201 // have all the bits to feed it. We could do something crazy in the future 202 // (issue a smaller load then merge the bits in) but this seems unlikely to be 203 // valuable. 204 if (StoreOffset > LoadOffset || 205 StoreOffset + int64_t(StoreSize) < LoadOffset + int64_t(LoadSize)) 206 return -1; 207 208 // Okay, we can do this transformation. Return the number of bytes into the 209 // store that the load is. 210 return LoadOffset - StoreOffset; 211 } 212 213 /// This function is called when we have a 214 /// memdep query of a load that ends up being a clobbering store. 215 int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, 216 StoreInst *DepSI, const DataLayout &DL) { 217 auto *StoredVal = DepSI->getValueOperand(); 218 219 // Cannot handle reading from store of first-class aggregate or scalable type. 220 if (isFirstClassAggregateOrScalableType(StoredVal->getType())) 221 return -1; 222 223 if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL)) 224 return -1; 225 226 Value *StorePtr = DepSI->getPointerOperand(); 227 uint64_t StoreSize = 228 DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()).getFixedValue(); 229 return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize, 230 DL); 231 } 232 233 /// This function is called when we have a 234 /// memdep query of a load that ends up being clobbered by another load. See if 235 /// the other load can feed into the second load. 236 int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, 237 const DataLayout &DL) { 238 // Cannot handle reading from store of first-class aggregate yet. 239 if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) 240 return -1; 241 242 if (!canCoerceMustAliasedValueToLoad(DepLI, LoadTy, DL)) 243 return -1; 244 245 Value *DepPtr = DepLI->getPointerOperand(); 246 uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()).getFixedValue(); 247 return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL); 248 } 249 250 int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, 251 MemIntrinsic *MI, const DataLayout &DL) { 252 // If the mem operation is a non-constant size, we can't handle it. 253 ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); 254 if (!SizeCst) 255 return -1; 256 uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8; 257 258 // If this is memset, we just need to see if the offset is valid in the size 259 // of the memset.. 260 if (const auto *memset_inst = dyn_cast<MemSetInst>(MI)) { 261 if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) { 262 auto *CI = dyn_cast<ConstantInt>(memset_inst->getValue()); 263 if (!CI || !CI->isZero()) 264 return -1; 265 } 266 return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), 267 MemSizeInBits, DL); 268 } 269 270 // If we have a memcpy/memmove, the only case we can handle is if this is a 271 // copy from constant memory. In that case, we can read directly from the 272 // constant memory. 273 MemTransferInst *MTI = cast<MemTransferInst>(MI); 274 275 Constant *Src = dyn_cast<Constant>(MTI->getSource()); 276 if (!Src) 277 return -1; 278 279 GlobalVariable *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(Src)); 280 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) 281 return -1; 282 283 // See if the access is within the bounds of the transfer. 284 int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), 285 MemSizeInBits, DL); 286 if (Offset == -1) 287 return Offset; 288 289 // Otherwise, see if we can constant fold a load from the constant with the 290 // offset applied as appropriate. 291 unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType()); 292 if (ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset), DL)) 293 return Offset; 294 return -1; 295 } 296 297 static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset, 298 Type *LoadTy, IRBuilderBase &Builder, 299 const DataLayout &DL) { 300 LLVMContext &Ctx = SrcVal->getType()->getContext(); 301 302 // If two pointers are in the same address space, they have the same size, 303 // so we don't need to do any truncation, etc. This avoids introducing 304 // ptrtoint instructions for pointers that may be non-integral. 305 if (SrcVal->getType()->isPointerTy() && LoadTy->isPointerTy() && 306 cast<PointerType>(SrcVal->getType())->getAddressSpace() == 307 cast<PointerType>(LoadTy)->getAddressSpace()) { 308 return SrcVal; 309 } 310 311 // Return scalable values directly to avoid needing to bitcast to integer 312 // types, as we do not support non-zero Offsets. 313 if (isa<ScalableVectorType>(LoadTy)) { 314 assert(Offset == 0 && "Expected a zero offset for scalable types"); 315 return SrcVal; 316 } 317 318 uint64_t StoreSize = 319 (DL.getTypeSizeInBits(SrcVal->getType()).getFixedValue() + 7) / 8; 320 uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy).getFixedValue() + 7) / 8; 321 // Compute which bits of the stored value are being used by the load. Convert 322 // to an integer type to start with. 323 if (SrcVal->getType()->isPtrOrPtrVectorTy()) 324 SrcVal = 325 Builder.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType())); 326 if (!SrcVal->getType()->isIntegerTy()) 327 SrcVal = 328 Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8)); 329 330 // Shift the bits to the least significant depending on endianness. 331 unsigned ShiftAmt; 332 if (DL.isLittleEndian()) 333 ShiftAmt = Offset * 8; 334 else 335 ShiftAmt = (StoreSize - LoadSize - Offset) * 8; 336 if (ShiftAmt) 337 SrcVal = Builder.CreateLShr(SrcVal, 338 ConstantInt::get(SrcVal->getType(), ShiftAmt)); 339 340 if (LoadSize != StoreSize) 341 SrcVal = Builder.CreateTruncOrBitCast(SrcVal, 342 IntegerType::get(Ctx, LoadSize * 8)); 343 return SrcVal; 344 } 345 346 Value *getValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, 347 Instruction *InsertPt, const DataLayout &DL) { 348 #ifndef NDEBUG 349 TypeSize SrcValSize = DL.getTypeStoreSize(SrcVal->getType()); 350 TypeSize LoadSize = DL.getTypeStoreSize(LoadTy); 351 assert(SrcValSize.isScalable() == LoadSize.isScalable()); 352 assert((SrcValSize.isScalable() || Offset + LoadSize <= SrcValSize) && 353 "Expected Offset + LoadSize <= SrcValSize"); 354 assert( 355 (!SrcValSize.isScalable() || (Offset == 0 && LoadSize == SrcValSize)) && 356 "Expected scalable type sizes to match"); 357 #endif 358 IRBuilder<> Builder(InsertPt); 359 SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL); 360 return coerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL); 361 } 362 363 Constant *getConstantValueForLoad(Constant *SrcVal, unsigned Offset, 364 Type *LoadTy, const DataLayout &DL) { 365 #ifndef NDEBUG 366 unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType()).getFixedValue(); 367 unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedValue(); 368 assert(Offset + LoadSize <= SrcValSize); 369 #endif 370 return ConstantFoldLoadFromConst(SrcVal, LoadTy, APInt(32, Offset), DL); 371 } 372 373 /// This function is called when we have a 374 /// memdep query of a load that ends up being a clobbering mem intrinsic. 375 Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 376 Type *LoadTy, Instruction *InsertPt, 377 const DataLayout &DL) { 378 LLVMContext &Ctx = LoadTy->getContext(); 379 uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedValue() / 8; 380 IRBuilder<> Builder(InsertPt); 381 382 // We know that this method is only called when the mem transfer fully 383 // provides the bits for the load. 384 if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { 385 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and 386 // independently of what the offset is. 387 Value *Val = MSI->getValue(); 388 if (LoadSize != 1) 389 Val = 390 Builder.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8)); 391 Value *OneElt = Val; 392 393 // Splat the value out to the right number of bits. 394 for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) { 395 // If we can double the number of bytes set, do it. 396 if (NumBytesSet * 2 <= LoadSize) { 397 Value *ShVal = Builder.CreateShl( 398 Val, ConstantInt::get(Val->getType(), NumBytesSet * 8)); 399 Val = Builder.CreateOr(Val, ShVal); 400 NumBytesSet <<= 1; 401 continue; 402 } 403 404 // Otherwise insert one byte at a time. 405 Value *ShVal = 406 Builder.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8)); 407 Val = Builder.CreateOr(OneElt, ShVal); 408 ++NumBytesSet; 409 } 410 411 return coerceAvailableValueToLoadType(Val, LoadTy, Builder, DL); 412 } 413 414 // Otherwise, this is a memcpy/memmove from a constant global. 415 MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); 416 Constant *Src = cast<Constant>(MTI->getSource()); 417 unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType()); 418 return ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset), 419 DL); 420 } 421 422 Constant *getConstantMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, 423 Type *LoadTy, const DataLayout &DL) { 424 LLVMContext &Ctx = LoadTy->getContext(); 425 uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedValue() / 8; 426 427 // We know that this method is only called when the mem transfer fully 428 // provides the bits for the load. 429 if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { 430 auto *Val = dyn_cast<ConstantInt>(MSI->getValue()); 431 if (!Val) 432 return nullptr; 433 434 Val = ConstantInt::get(Ctx, APInt::getSplat(LoadSize * 8, Val->getValue())); 435 return ConstantFoldLoadFromConst(Val, LoadTy, DL); 436 } 437 438 // Otherwise, this is a memcpy/memmove from a constant global. 439 MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); 440 Constant *Src = cast<Constant>(MTI->getSource()); 441 unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType()); 442 return ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset), 443 DL); 444 } 445 } // namespace VNCoercion 446 } // namespace llvm 447