1 //===------ PPCLoopInstrFormPrep.cpp - Loop Instr Form Prep Pass ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a pass to prepare loops for ppc preferred addressing 10 // modes, leveraging different instruction form. (eg: DS/DQ form, D/DS form with 11 // update) 12 // Additional PHIs are created for loop induction variables used by load/store 13 // instructions so that preferred addressing modes can be used. 14 // 15 // 1: DS/DQ form preparation, prepare the load/store instructions so that they 16 // can satisfy the DS/DQ form displacement requirements. 17 // Generically, this means transforming loops like this: 18 // for (int i = 0; i < n; ++i) { 19 // unsigned long x1 = *(unsigned long *)(p + i + 5); 20 // unsigned long x2 = *(unsigned long *)(p + i + 9); 21 // } 22 // 23 // to look like this: 24 // 25 // unsigned NewP = p + 5; 26 // for (int i = 0; i < n; ++i) { 27 // unsigned long x1 = *(unsigned long *)(i + NewP); 28 // unsigned long x2 = *(unsigned long *)(i + NewP + 4); 29 // } 30 // 31 // 2: D/DS form with update preparation, prepare the load/store instructions so 32 // that we can use update form to do pre-increment. 33 // Generically, this means transforming loops like this: 34 // for (int i = 0; i < n; ++i) 35 // array[i] = c; 36 // 37 // to look like this: 38 // 39 // T *p = array[-1]; 40 // for (int i = 0; i < n; ++i) 41 // *++p = c; 42 //===----------------------------------------------------------------------===// 43 44 #include "PPC.h" 45 #include "PPCSubtarget.h" 46 #include "PPCTargetMachine.h" 47 #include "llvm/ADT/DepthFirstIterator.h" 48 #include "llvm/ADT/SmallPtrSet.h" 49 #include "llvm/ADT/SmallSet.h" 50 #include "llvm/ADT/SmallVector.h" 51 #include "llvm/ADT/Statistic.h" 52 #include "llvm/Analysis/LoopInfo.h" 53 #include "llvm/Analysis/ScalarEvolution.h" 54 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 55 #include "llvm/IR/BasicBlock.h" 56 #include "llvm/IR/CFG.h" 57 #include "llvm/IR/Dominators.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/IntrinsicsPowerPC.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Type.h" 64 #include "llvm/IR/Value.h" 65 #include "llvm/InitializePasses.h" 66 #include "llvm/Pass.h" 67 #include "llvm/Support/Casting.h" 68 #include "llvm/Support/CommandLine.h" 69 #include "llvm/Support/Debug.h" 70 #include "llvm/Transforms/Scalar.h" 71 #include "llvm/Transforms/Utils.h" 72 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 73 #include "llvm/Transforms/Utils/Local.h" 74 #include "llvm/Transforms/Utils/LoopUtils.h" 75 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 76 #include <cassert> 77 #include <iterator> 78 #include <utility> 79 80 #define DEBUG_TYPE "ppc-loop-instr-form-prep" 81 82 using namespace llvm; 83 84 static cl::opt<unsigned> MaxVarsPrep("ppc-formprep-max-vars", 85 cl::Hidden, cl::init(24), 86 cl::desc("Potential common base number threshold per function for PPC loop " 87 "prep")); 88 89 static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update", 90 cl::init(true), cl::Hidden, 91 cl::desc("prefer update form when ds form is also a update form")); 92 93 // Sum of following 3 per loop thresholds for all loops can not be larger 94 // than MaxVarsPrep. 95 // now the thresholds for each kind prep are exterimental values on Power9. 96 static cl::opt<unsigned> MaxVarsUpdateForm("ppc-preinc-prep-max-vars", 97 cl::Hidden, cl::init(3), 98 cl::desc("Potential PHI threshold per loop for PPC loop prep of update " 99 "form")); 100 101 static cl::opt<unsigned> MaxVarsDSForm("ppc-dsprep-max-vars", 102 cl::Hidden, cl::init(3), 103 cl::desc("Potential PHI threshold per loop for PPC loop prep of DS form")); 104 105 static cl::opt<unsigned> MaxVarsDQForm("ppc-dqprep-max-vars", 106 cl::Hidden, cl::init(8), 107 cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form")); 108 109 110 // If would not be profitable if the common base has only one load/store, ISEL 111 // should already be able to choose best load/store form based on offset for 112 // single load/store. Set minimal profitable value default to 2 and make it as 113 // an option. 114 static cl::opt<unsigned> DispFormPrepMinThreshold("ppc-dispprep-min-threshold", 115 cl::Hidden, cl::init(2), 116 cl::desc("Minimal common base load/store instructions triggering DS/DQ form " 117 "preparation")); 118 119 STATISTIC(PHINodeAlreadyExistsUpdate, "PHI node already in pre-increment form"); 120 STATISTIC(PHINodeAlreadyExistsDS, "PHI node already in DS form"); 121 STATISTIC(PHINodeAlreadyExistsDQ, "PHI node already in DQ form"); 122 STATISTIC(DSFormChainRewritten, "Num of DS form chain rewritten"); 123 STATISTIC(DQFormChainRewritten, "Num of DQ form chain rewritten"); 124 STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten"); 125 126 namespace { 127 struct BucketElement { 128 BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {} 129 BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {} 130 131 const SCEVConstant *Offset; 132 Instruction *Instr; 133 }; 134 135 struct Bucket { 136 Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B), 137 Elements(1, BucketElement(I)) {} 138 139 const SCEV *BaseSCEV; 140 SmallVector<BucketElement, 16> Elements; 141 }; 142 143 // "UpdateForm" is not a real PPC instruction form, it stands for dform 144 // load/store with update like ldu/stdu, or Prefetch intrinsic. 145 // For DS form instructions, their displacements must be multiple of 4. 146 // For DQ form instructions, their displacements must be multiple of 16. 147 enum InstrForm { UpdateForm = 1, DSForm = 4, DQForm = 16 }; 148 149 class PPCLoopInstrFormPrep : public FunctionPass { 150 public: 151 static char ID; // Pass ID, replacement for typeid 152 153 PPCLoopInstrFormPrep() : FunctionPass(ID) { 154 initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry()); 155 } 156 157 PPCLoopInstrFormPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { 158 initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry()); 159 } 160 161 void getAnalysisUsage(AnalysisUsage &AU) const override { 162 AU.addPreserved<DominatorTreeWrapperPass>(); 163 AU.addRequired<LoopInfoWrapperPass>(); 164 AU.addPreserved<LoopInfoWrapperPass>(); 165 AU.addRequired<ScalarEvolutionWrapperPass>(); 166 } 167 168 bool runOnFunction(Function &F) override; 169 170 private: 171 PPCTargetMachine *TM = nullptr; 172 const PPCSubtarget *ST; 173 DominatorTree *DT; 174 LoopInfo *LI; 175 ScalarEvolution *SE; 176 bool PreserveLCSSA; 177 178 /// Successful preparation number for Update/DS/DQ form in all inner most 179 /// loops. One successful preparation will put one common base out of loop, 180 /// this may leads to register presure like LICM does. 181 /// Make sure total preparation number can be controlled by option. 182 unsigned SuccPrepCount; 183 184 bool runOnLoop(Loop *L); 185 186 /// Check if required PHI node is already exist in Loop \p L. 187 bool alreadyPrepared(Loop *L, Instruction *MemI, 188 const SCEV *BasePtrStartSCEV, 189 const SCEV *BasePtrIncSCEV, InstrForm Form); 190 191 /// Get the value which defines the increment SCEV \p BasePtrIncSCEV. 192 Value *getNodeForInc(Loop *L, Instruction *MemI, 193 const SCEV *BasePtrIncSCEV); 194 195 /// Collect condition matched(\p isValidCandidate() returns true) 196 /// candidates in Loop \p L. 197 SmallVector<Bucket, 16> collectCandidates( 198 Loop *L, 199 std::function<bool(const Instruction *, const Value *, const Type *)> 200 isValidCandidate, 201 unsigned MaxCandidateNum); 202 203 /// Add a candidate to candidates \p Buckets. 204 void addOneCandidate(Instruction *MemI, const SCEV *LSCEV, 205 SmallVector<Bucket, 16> &Buckets, 206 unsigned MaxCandidateNum); 207 208 /// Prepare all candidates in \p Buckets for update form. 209 bool updateFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets); 210 211 /// Prepare all candidates in \p Buckets for displacement form, now for 212 /// ds/dq. 213 bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets, 214 InstrForm Form); 215 216 /// Prepare for one chain \p BucketChain, find the best base element and 217 /// update all other elements in \p BucketChain accordingly. 218 /// \p Form is used to find the best base element. 219 /// If success, best base element must be stored as the first element of 220 /// \p BucketChain. 221 /// Return false if no base element found, otherwise return true. 222 bool prepareBaseForDispFormChain(Bucket &BucketChain, 223 InstrForm Form); 224 225 /// Prepare for one chain \p BucketChain, find the best base element and 226 /// update all other elements in \p BucketChain accordingly. 227 /// If success, best base element must be stored as the first element of 228 /// \p BucketChain. 229 /// Return false if no base element found, otherwise return true. 230 bool prepareBaseForUpdateFormChain(Bucket &BucketChain); 231 232 /// Rewrite load/store instructions in \p BucketChain according to 233 /// preparation. 234 bool rewriteLoadStores(Loop *L, Bucket &BucketChain, 235 SmallSet<BasicBlock *, 16> &BBChanged, 236 InstrForm Form); 237 }; 238 239 } // end anonymous namespace 240 241 char PPCLoopInstrFormPrep::ID = 0; 242 static const char *name = "Prepare loop for ppc preferred instruction forms"; 243 INITIALIZE_PASS_BEGIN(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false) 244 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 245 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 246 INITIALIZE_PASS_END(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false) 247 248 static constexpr StringRef PHINodeNameSuffix = ".phi"; 249 static constexpr StringRef CastNodeNameSuffix = ".cast"; 250 static constexpr StringRef GEPNodeIncNameSuffix = ".inc"; 251 static constexpr StringRef GEPNodeOffNameSuffix = ".off"; 252 253 FunctionPass *llvm::createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM) { 254 return new PPCLoopInstrFormPrep(TM); 255 } 256 257 static bool IsPtrInBounds(Value *BasePtr) { 258 Value *StrippedBasePtr = BasePtr; 259 while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBasePtr)) 260 StrippedBasePtr = BC->getOperand(0); 261 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(StrippedBasePtr)) 262 return GEP->isInBounds(); 263 264 return false; 265 } 266 267 static std::string getInstrName(const Value *I, StringRef Suffix) { 268 assert(I && "Invalid paramater!"); 269 if (I->hasName()) 270 return (I->getName() + Suffix).str(); 271 else 272 return ""; 273 } 274 275 static Value *getPointerOperandAndType(Value *MemI, 276 Type **PtrElementType = nullptr) { 277 278 Value *PtrValue = nullptr; 279 Type *PointerElementType = nullptr; 280 281 if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) { 282 PtrValue = LMemI->getPointerOperand(); 283 PointerElementType = LMemI->getType(); 284 } else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) { 285 PtrValue = SMemI->getPointerOperand(); 286 PointerElementType = SMemI->getValueOperand()->getType(); 287 } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) { 288 PointerElementType = Type::getInt8Ty(MemI->getContext()); 289 if (IMemI->getIntrinsicID() == Intrinsic::prefetch || 290 IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) { 291 PtrValue = IMemI->getArgOperand(0); 292 } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) { 293 PtrValue = IMemI->getArgOperand(1); 294 } 295 } 296 /*Get ElementType if PtrElementType is not null.*/ 297 if (PtrElementType) 298 *PtrElementType = PointerElementType; 299 300 return PtrValue; 301 } 302 303 bool PPCLoopInstrFormPrep::runOnFunction(Function &F) { 304 if (skipFunction(F)) 305 return false; 306 307 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 308 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 309 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 310 DT = DTWP ? &DTWP->getDomTree() : nullptr; 311 PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); 312 ST = TM ? TM->getSubtargetImpl(F) : nullptr; 313 SuccPrepCount = 0; 314 315 bool MadeChange = false; 316 317 for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) 318 for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) 319 MadeChange |= runOnLoop(*L); 320 321 return MadeChange; 322 } 323 324 void PPCLoopInstrFormPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV, 325 SmallVector<Bucket, 16> &Buckets, 326 unsigned MaxCandidateNum) { 327 assert((MemI && getPointerOperandAndType(MemI)) && 328 "Candidate should be a memory instruction."); 329 assert(LSCEV && "Invalid SCEV for Ptr value."); 330 bool FoundBucket = false; 331 for (auto &B : Buckets) { 332 const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV); 333 if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) { 334 B.Elements.push_back(BucketElement(CDiff, MemI)); 335 FoundBucket = true; 336 break; 337 } 338 } 339 340 if (!FoundBucket) { 341 if (Buckets.size() == MaxCandidateNum) 342 return; 343 Buckets.push_back(Bucket(LSCEV, MemI)); 344 } 345 } 346 347 SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates( 348 Loop *L, 349 std::function<bool(const Instruction *, const Value *, const Type *)> 350 isValidCandidate, 351 unsigned MaxCandidateNum) { 352 SmallVector<Bucket, 16> Buckets; 353 for (const auto &BB : L->blocks()) 354 for (auto &J : *BB) { 355 Value *PtrValue = nullptr; 356 Type *PointerElementType = nullptr; 357 PtrValue = getPointerOperandAndType(&J, &PointerElementType); 358 359 if (!PtrValue) 360 continue; 361 362 if (PtrValue->getType()->getPointerAddressSpace()) 363 continue; 364 365 if (L->isLoopInvariant(PtrValue)) 366 continue; 367 368 const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L); 369 const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV); 370 if (!LARSCEV || LARSCEV->getLoop() != L) 371 continue; 372 373 if (isValidCandidate(&J, PtrValue, PointerElementType)) 374 addOneCandidate(&J, LSCEV, Buckets, MaxCandidateNum); 375 } 376 return Buckets; 377 } 378 379 bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain, 380 InstrForm Form) { 381 // RemainderOffsetInfo details: 382 // key: value of (Offset urem DispConstraint). For DSForm, it can 383 // be [0, 4). 384 // first of pair: the index of first BucketElement whose remainder is equal 385 // to key. For key 0, this value must be 0. 386 // second of pair: number of load/stores with the same remainder. 387 DenseMap<unsigned, std::pair<unsigned, unsigned>> RemainderOffsetInfo; 388 389 for (unsigned j = 0, je = BucketChain.Elements.size(); j != je; ++j) { 390 if (!BucketChain.Elements[j].Offset) 391 RemainderOffsetInfo[0] = std::make_pair(0, 1); 392 else { 393 unsigned Remainder = 394 BucketChain.Elements[j].Offset->getAPInt().urem(Form); 395 if (RemainderOffsetInfo.find(Remainder) == RemainderOffsetInfo.end()) 396 RemainderOffsetInfo[Remainder] = std::make_pair(j, 1); 397 else 398 RemainderOffsetInfo[Remainder].second++; 399 } 400 } 401 // Currently we choose the most profitable base as the one which has the max 402 // number of load/store with same remainder. 403 // FIXME: adjust the base selection strategy according to load/store offset 404 // distribution. 405 // For example, if we have one candidate chain for DS form preparation, which 406 // contains following load/stores with different remainders: 407 // 1: 10 load/store whose remainder is 1; 408 // 2: 9 load/store whose remainder is 2; 409 // 3: 1 for remainder 3 and 0 for remainder 0; 410 // Now we will choose the first load/store whose remainder is 1 as base and 411 // adjust all other load/stores according to new base, so we will get 10 DS 412 // form and 10 X form. 413 // But we should be more clever, for this case we could use two bases, one for 414 // remainder 1 and the other for remainder 2, thus we could get 19 DS form and 415 // 1 X form. 416 unsigned MaxCountRemainder = 0; 417 for (unsigned j = 0; j < (unsigned)Form; j++) 418 if ((RemainderOffsetInfo.find(j) != RemainderOffsetInfo.end()) && 419 RemainderOffsetInfo[j].second > 420 RemainderOffsetInfo[MaxCountRemainder].second) 421 MaxCountRemainder = j; 422 423 // Abort when there are too few insts with common base. 424 if (RemainderOffsetInfo[MaxCountRemainder].second < DispFormPrepMinThreshold) 425 return false; 426 427 // If the first value is most profitable, no needed to adjust BucketChain 428 // elements as they are substracted the first value when collecting. 429 if (MaxCountRemainder == 0) 430 return true; 431 432 // Adjust load/store to the new chosen base. 433 const SCEV *Offset = 434 BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first].Offset; 435 BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset); 436 for (auto &E : BucketChain.Elements) { 437 if (E.Offset) 438 E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset)); 439 else 440 E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset)); 441 } 442 443 std::swap(BucketChain.Elements[RemainderOffsetInfo[MaxCountRemainder].first], 444 BucketChain.Elements[0]); 445 return true; 446 } 447 448 // FIXME: implement a more clever base choosing policy. 449 // Currently we always choose an exist load/store offset. This maybe lead to 450 // suboptimal code sequences. For example, for one DS chain with offsets 451 // {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp 452 // for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a 453 // multipler of 4, it cannot be represented by sint16. 454 bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) { 455 // We have a choice now of which instruction's memory operand we use as the 456 // base for the generated PHI. Always picking the first instruction in each 457 // bucket does not work well, specifically because that instruction might 458 // be a prefetch (and there are no pre-increment dcbt variants). Otherwise, 459 // the choice is somewhat arbitrary, because the backend will happily 460 // generate direct offsets from both the pre-incremented and 461 // post-incremented pointer values. Thus, we'll pick the first non-prefetch 462 // instruction in each bucket, and adjust the recurrence and other offsets 463 // accordingly. 464 for (int j = 0, je = BucketChain.Elements.size(); j != je; ++j) { 465 if (auto *II = dyn_cast<IntrinsicInst>(BucketChain.Elements[j].Instr)) 466 if (II->getIntrinsicID() == Intrinsic::prefetch) 467 continue; 468 469 // If we'd otherwise pick the first element anyway, there's nothing to do. 470 if (j == 0) 471 break; 472 473 // If our chosen element has no offset from the base pointer, there's 474 // nothing to do. 475 if (!BucketChain.Elements[j].Offset || 476 BucketChain.Elements[j].Offset->isZero()) 477 break; 478 479 const SCEV *Offset = BucketChain.Elements[j].Offset; 480 BucketChain.BaseSCEV = SE->getAddExpr(BucketChain.BaseSCEV, Offset); 481 for (auto &E : BucketChain.Elements) { 482 if (E.Offset) 483 E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset)); 484 else 485 E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset)); 486 } 487 488 std::swap(BucketChain.Elements[j], BucketChain.Elements[0]); 489 break; 490 } 491 return true; 492 } 493 494 bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain, 495 SmallSet<BasicBlock *, 16> &BBChanged, 496 InstrForm Form) { 497 bool MadeChange = false; 498 const SCEVAddRecExpr *BasePtrSCEV = 499 cast<SCEVAddRecExpr>(BucketChain.BaseSCEV); 500 if (!BasePtrSCEV->isAffine()) 501 return MadeChange; 502 503 LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n"); 504 505 assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?"); 506 507 // The instruction corresponding to the Bucket's BaseSCEV must be the first 508 // in the vector of elements. 509 Instruction *MemI = BucketChain.Elements.begin()->Instr; 510 Value *BasePtr = getPointerOperandAndType(MemI); 511 assert(BasePtr && "No pointer operand"); 512 513 Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext()); 514 Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(), 515 BasePtr->getType()->getPointerAddressSpace()); 516 517 if (!SE->isLoopInvariant(BasePtrSCEV->getStart(), L)) 518 return MadeChange; 519 520 bool IsConstantInc = false; 521 const SCEV *BasePtrIncSCEV = BasePtrSCEV->getStepRecurrence(*SE); 522 Value *IncNode = getNodeForInc(L, MemI, BasePtrIncSCEV); 523 524 const SCEVConstant *BasePtrIncConstantSCEV = 525 dyn_cast<SCEVConstant>(BasePtrIncSCEV); 526 if (BasePtrIncConstantSCEV) 527 IsConstantInc = true; 528 529 // No valid representation for the increment. 530 if (!IncNode) { 531 LLVM_DEBUG(dbgs() << "Loop Increasement can not be represented!\n"); 532 return MadeChange; 533 } 534 535 // Now we only handle update form for constant increment. 536 // FIXME: add support for non-constant increment UpdateForm. 537 if (!IsConstantInc && Form == UpdateForm) { 538 LLVM_DEBUG(dbgs() << "not a constant increment for update form!\n"); 539 return MadeChange; 540 } 541 542 // For some DS form load/store instructions, it can also be an update form, 543 // if the stride is constant and is a multipler of 4. Use update form if 544 // prefer it. 545 bool CanPreInc = 546 (Form == UpdateForm || 547 ((Form == DSForm) && IsConstantInc && 548 !BasePtrIncConstantSCEV->getAPInt().urem(4) && PreferUpdateForm)); 549 const SCEV *BasePtrStartSCEV = nullptr; 550 if (CanPreInc) 551 BasePtrStartSCEV = 552 SE->getMinusSCEV(BasePtrSCEV->getStart(), BasePtrIncConstantSCEV); 553 else 554 BasePtrStartSCEV = BasePtrSCEV->getStart(); 555 556 if (!isSafeToExpand(BasePtrStartSCEV, *SE)) 557 return MadeChange; 558 559 if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV, Form)) { 560 LLVM_DEBUG(dbgs() << "Instruction form is already prepared!\n"); 561 return MadeChange; 562 } 563 564 LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n"); 565 566 BasicBlock *Header = L->getHeader(); 567 unsigned HeaderLoopPredCount = pred_size(Header); 568 BasicBlock *LoopPredecessor = L->getLoopPredecessor(); 569 570 PHINode *NewPHI = 571 PHINode::Create(I8PtrTy, HeaderLoopPredCount, 572 getInstrName(MemI, PHINodeNameSuffix), 573 Header->getFirstNonPHI()); 574 575 SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart"); 576 Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy, 577 LoopPredecessor->getTerminator()); 578 579 // Note that LoopPredecessor might occur in the predecessor list multiple 580 // times, and we need to add it the right number of times. 581 for (auto PI : predecessors(Header)) { 582 if (PI != LoopPredecessor) 583 continue; 584 585 NewPHI->addIncoming(BasePtrStart, LoopPredecessor); 586 } 587 588 Instruction *PtrInc = nullptr; 589 Instruction *NewBasePtr = nullptr; 590 if (CanPreInc) { 591 assert(BasePtrIncConstantSCEV && 592 "update form now only supports constant increment."); 593 Instruction *InsPoint = &*Header->getFirstInsertionPt(); 594 PtrInc = GetElementPtrInst::Create( 595 I8Ty, NewPHI, BasePtrIncConstantSCEV->getValue(), 596 getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint); 597 cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr)); 598 for (auto PI : predecessors(Header)) { 599 if (PI == LoopPredecessor) 600 continue; 601 602 NewPHI->addIncoming(PtrInc, PI); 603 } 604 if (PtrInc->getType() != BasePtr->getType()) 605 NewBasePtr = new BitCastInst( 606 PtrInc, BasePtr->getType(), 607 getInstrName(PtrInc, CastNodeNameSuffix), InsPoint); 608 else 609 NewBasePtr = PtrInc; 610 } else { 611 // Note that LoopPredecessor might occur in the predecessor list multiple 612 // times, and we need to make sure no more incoming value for them in PHI. 613 for (auto PI : predecessors(Header)) { 614 if (PI == LoopPredecessor) 615 continue; 616 617 // For the latch predecessor, we need to insert a GEP just before the 618 // terminator to increase the address. 619 BasicBlock *BB = PI; 620 Instruction *InsPoint = BB->getTerminator(); 621 PtrInc = GetElementPtrInst::Create( 622 I8Ty, NewPHI, IncNode, getInstrName(MemI, GEPNodeIncNameSuffix), 623 InsPoint); 624 cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr)); 625 626 NewPHI->addIncoming(PtrInc, PI); 627 } 628 PtrInc = NewPHI; 629 if (NewPHI->getType() != BasePtr->getType()) 630 NewBasePtr = 631 new BitCastInst(NewPHI, BasePtr->getType(), 632 getInstrName(NewPHI, CastNodeNameSuffix), 633 &*Header->getFirstInsertionPt()); 634 else 635 NewBasePtr = NewPHI; 636 } 637 638 // Clear the rewriter cache, because values that are in the rewriter's cache 639 // can be deleted below, causing the AssertingVH in the cache to trigger. 640 SCEVE.clear(); 641 642 if (Instruction *IDel = dyn_cast<Instruction>(BasePtr)) 643 BBChanged.insert(IDel->getParent()); 644 BasePtr->replaceAllUsesWith(NewBasePtr); 645 RecursivelyDeleteTriviallyDeadInstructions(BasePtr); 646 647 // Keep track of the replacement pointer values we've inserted so that we 648 // don't generate more pointer values than necessary. 649 SmallPtrSet<Value *, 16> NewPtrs; 650 NewPtrs.insert(NewBasePtr); 651 652 for (auto I = std::next(BucketChain.Elements.begin()), 653 IE = BucketChain.Elements.end(); I != IE; ++I) { 654 Value *Ptr = getPointerOperandAndType(I->Instr); 655 assert(Ptr && "No pointer operand"); 656 if (NewPtrs.count(Ptr)) 657 continue; 658 659 Instruction *RealNewPtr; 660 if (!I->Offset || I->Offset->getValue()->isZero()) { 661 RealNewPtr = NewBasePtr; 662 } else { 663 Instruction *PtrIP = dyn_cast<Instruction>(Ptr); 664 if (PtrIP && isa<Instruction>(NewBasePtr) && 665 cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent()) 666 PtrIP = nullptr; 667 else if (PtrIP && isa<PHINode>(PtrIP)) 668 PtrIP = &*PtrIP->getParent()->getFirstInsertionPt(); 669 else if (!PtrIP) 670 PtrIP = I->Instr; 671 672 GetElementPtrInst *NewPtr = GetElementPtrInst::Create( 673 I8Ty, PtrInc, I->Offset->getValue(), 674 getInstrName(I->Instr, GEPNodeOffNameSuffix), PtrIP); 675 if (!PtrIP) 676 NewPtr->insertAfter(cast<Instruction>(PtrInc)); 677 NewPtr->setIsInBounds(IsPtrInBounds(Ptr)); 678 RealNewPtr = NewPtr; 679 } 680 681 if (Instruction *IDel = dyn_cast<Instruction>(Ptr)) 682 BBChanged.insert(IDel->getParent()); 683 684 Instruction *ReplNewPtr; 685 if (Ptr->getType() != RealNewPtr->getType()) { 686 ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(), 687 getInstrName(Ptr, CastNodeNameSuffix)); 688 ReplNewPtr->insertAfter(RealNewPtr); 689 } else 690 ReplNewPtr = RealNewPtr; 691 692 Ptr->replaceAllUsesWith(ReplNewPtr); 693 RecursivelyDeleteTriviallyDeadInstructions(Ptr); 694 695 NewPtrs.insert(RealNewPtr); 696 } 697 698 MadeChange = true; 699 700 SuccPrepCount++; 701 702 if (Form == DSForm && !CanPreInc) 703 DSFormChainRewritten++; 704 else if (Form == DQForm) 705 DQFormChainRewritten++; 706 else if (Form == UpdateForm || (Form == DSForm && CanPreInc)) 707 UpdFormChainRewritten++; 708 709 return MadeChange; 710 } 711 712 bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L, 713 SmallVector<Bucket, 16> &Buckets) { 714 bool MadeChange = false; 715 if (Buckets.empty()) 716 return MadeChange; 717 SmallSet<BasicBlock *, 16> BBChanged; 718 for (auto &Bucket : Buckets) 719 // The base address of each bucket is transformed into a phi and the others 720 // are rewritten based on new base. 721 if (prepareBaseForUpdateFormChain(Bucket)) 722 MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, UpdateForm); 723 724 if (MadeChange) 725 for (auto &BB : L->blocks()) 726 if (BBChanged.count(BB)) 727 DeleteDeadPHIs(BB); 728 return MadeChange; 729 } 730 731 bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets, 732 InstrForm Form) { 733 bool MadeChange = false; 734 735 if (Buckets.empty()) 736 return MadeChange; 737 738 SmallSet<BasicBlock *, 16> BBChanged; 739 for (auto &Bucket : Buckets) { 740 if (Bucket.Elements.size() < DispFormPrepMinThreshold) 741 continue; 742 if (prepareBaseForDispFormChain(Bucket, Form)) 743 MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, Form); 744 } 745 746 if (MadeChange) 747 for (auto &BB : L->blocks()) 748 if (BBChanged.count(BB)) 749 DeleteDeadPHIs(BB); 750 return MadeChange; 751 } 752 753 // Find the loop invariant increment node for SCEV BasePtrIncSCEV. 754 // bb.loop.preheader: 755 // %start = ... 756 // bb.loop.body: 757 // %phinode = phi [ %start, %bb.loop.preheader ], [ %add, %bb.loop.body ] 758 // ... 759 // %add = add %phinode, %inc ; %inc is what we want to get. 760 // 761 Value *PPCLoopInstrFormPrep::getNodeForInc(Loop *L, Instruction *MemI, 762 const SCEV *BasePtrIncSCEV) { 763 // If the increment is a constant, no definition is needed. 764 // Return the value directly. 765 if (isa<SCEVConstant>(BasePtrIncSCEV)) 766 return cast<SCEVConstant>(BasePtrIncSCEV)->getValue(); 767 768 if (!SE->isLoopInvariant(BasePtrIncSCEV, L)) 769 return nullptr; 770 771 BasicBlock *BB = MemI->getParent(); 772 if (!BB) 773 return nullptr; 774 775 BasicBlock *LatchBB = L->getLoopLatch(); 776 777 if (!LatchBB) 778 return nullptr; 779 780 // Run through the PHIs and check their operands to find valid representation 781 // for the increment SCEV. 782 iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis(); 783 for (auto &CurrentPHI : PHIIter) { 784 PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI); 785 if (!CurrentPHINode) 786 continue; 787 788 if (!SE->isSCEVable(CurrentPHINode->getType())) 789 continue; 790 791 const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L); 792 793 const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV); 794 if (!PHIBasePtrSCEV) 795 continue; 796 797 const SCEV *PHIBasePtrIncSCEV = PHIBasePtrSCEV->getStepRecurrence(*SE); 798 799 if (!PHIBasePtrIncSCEV || (PHIBasePtrIncSCEV != BasePtrIncSCEV)) 800 continue; 801 802 // Get the incoming value from the loop latch and check if the value has 803 // the add form with the required increment. 804 if (Instruction *I = dyn_cast<Instruction>( 805 CurrentPHINode->getIncomingValueForBlock(LatchBB))) { 806 Value *StrippedBaseI = I; 807 while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBaseI)) 808 StrippedBaseI = BC->getOperand(0); 809 810 Instruction *StrippedI = dyn_cast<Instruction>(StrippedBaseI); 811 if (!StrippedI) 812 continue; 813 814 // LSR pass may add a getelementptr instruction to do the loop increment, 815 // also search in that getelementptr instruction. 816 if (StrippedI->getOpcode() == Instruction::Add || 817 (StrippedI->getOpcode() == Instruction::GetElementPtr && 818 StrippedI->getNumOperands() == 2)) { 819 if (SE->getSCEVAtScope(StrippedI->getOperand(0), L) == BasePtrIncSCEV) 820 return StrippedI->getOperand(0); 821 if (SE->getSCEVAtScope(StrippedI->getOperand(1), L) == BasePtrIncSCEV) 822 return StrippedI->getOperand(1); 823 } 824 } 825 } 826 return nullptr; 827 } 828 829 // In order to prepare for the preferred instruction form, a PHI is added. 830 // This function will check to see if that PHI already exists and will return 831 // true if it found an existing PHI with the matched start and increment as the 832 // one we wanted to create. 833 bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction *MemI, 834 const SCEV *BasePtrStartSCEV, 835 const SCEV *BasePtrIncSCEV, 836 InstrForm Form) { 837 BasicBlock *BB = MemI->getParent(); 838 if (!BB) 839 return false; 840 841 BasicBlock *PredBB = L->getLoopPredecessor(); 842 BasicBlock *LatchBB = L->getLoopLatch(); 843 844 if (!PredBB || !LatchBB) 845 return false; 846 847 // Run through the PHIs and see if we have some that looks like a preparation 848 iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis(); 849 for (auto & CurrentPHI : PHIIter) { 850 PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI); 851 if (!CurrentPHINode) 852 continue; 853 854 if (!SE->isSCEVable(CurrentPHINode->getType())) 855 continue; 856 857 const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L); 858 859 const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV); 860 if (!PHIBasePtrSCEV) 861 continue; 862 863 const SCEVConstant *PHIBasePtrIncSCEV = 864 dyn_cast<SCEVConstant>(PHIBasePtrSCEV->getStepRecurrence(*SE)); 865 if (!PHIBasePtrIncSCEV) 866 continue; 867 868 if (CurrentPHINode->getNumIncomingValues() == 2) { 869 if ((CurrentPHINode->getIncomingBlock(0) == LatchBB && 870 CurrentPHINode->getIncomingBlock(1) == PredBB) || 871 (CurrentPHINode->getIncomingBlock(1) == LatchBB && 872 CurrentPHINode->getIncomingBlock(0) == PredBB)) { 873 if (PHIBasePtrIncSCEV == BasePtrIncSCEV) { 874 // The existing PHI (CurrentPHINode) has the same start and increment 875 // as the PHI that we wanted to create. 876 if (Form == UpdateForm && 877 PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) { 878 ++PHINodeAlreadyExistsUpdate; 879 return true; 880 } 881 if (Form == DSForm || Form == DQForm) { 882 const SCEVConstant *Diff = dyn_cast<SCEVConstant>( 883 SE->getMinusSCEV(PHIBasePtrSCEV->getStart(), BasePtrStartSCEV)); 884 if (Diff && !Diff->getAPInt().urem(Form)) { 885 if (Form == DSForm) 886 ++PHINodeAlreadyExistsDS; 887 else 888 ++PHINodeAlreadyExistsDQ; 889 return true; 890 } 891 } 892 } 893 } 894 } 895 } 896 return false; 897 } 898 899 bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) { 900 bool MadeChange = false; 901 902 // Only prep. the inner-most loop 903 if (!L->isInnermost()) 904 return MadeChange; 905 906 // Return if already done enough preparation. 907 if (SuccPrepCount >= MaxVarsPrep) 908 return MadeChange; 909 910 LLVM_DEBUG(dbgs() << "PIP: Examining: " << *L << "\n"); 911 912 BasicBlock *LoopPredecessor = L->getLoopPredecessor(); 913 // If there is no loop predecessor, or the loop predecessor's terminator 914 // returns a value (which might contribute to determining the loop's 915 // iteration space), insert a new preheader for the loop. 916 if (!LoopPredecessor || 917 !LoopPredecessor->getTerminator()->getType()->isVoidTy()) { 918 LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA); 919 if (LoopPredecessor) 920 MadeChange = true; 921 } 922 if (!LoopPredecessor) { 923 LLVM_DEBUG(dbgs() << "PIP fails since no predecessor for current loop.\n"); 924 return MadeChange; 925 } 926 // Check if a load/store has update form. This lambda is used by function 927 // collectCandidates which can collect candidates for types defined by lambda. 928 auto isUpdateFormCandidate = [&](const Instruction *I, const Value *PtrValue, 929 const Type *PointerElementType) { 930 assert((PtrValue && I) && "Invalid parameter!"); 931 // There are no update forms for Altivec vector load/stores. 932 if (ST && ST->hasAltivec() && PointerElementType->isVectorTy()) 933 return false; 934 // There are no update forms for P10 lxvp/stxvp intrinsic. 935 auto *II = dyn_cast<IntrinsicInst>(I); 936 if (II && ((II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) || 937 II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp)) 938 return false; 939 // See getPreIndexedAddressParts, the displacement for LDU/STDU has to 940 // be 4's multiple (DS-form). For i64 loads/stores when the displacement 941 // fits in a 16-bit signed field but isn't a multiple of 4, it will be 942 // useless and possible to break some original well-form addressing mode 943 // to make this pre-inc prep for it. 944 if (PointerElementType->isIntegerTy(64)) { 945 const SCEV *LSCEV = SE->getSCEVAtScope(const_cast<Value *>(PtrValue), L); 946 const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV); 947 if (!LARSCEV || LARSCEV->getLoop() != L) 948 return false; 949 if (const SCEVConstant *StepConst = 950 dyn_cast<SCEVConstant>(LARSCEV->getStepRecurrence(*SE))) { 951 const APInt &ConstInt = StepConst->getValue()->getValue(); 952 if (ConstInt.isSignedIntN(16) && ConstInt.srem(4) != 0) 953 return false; 954 } 955 } 956 return true; 957 }; 958 959 // Check if a load/store has DS form. 960 auto isDSFormCandidate = [](const Instruction *I, const Value *PtrValue, 961 const Type *PointerElementType) { 962 assert((PtrValue && I) && "Invalid parameter!"); 963 if (isa<IntrinsicInst>(I)) 964 return false; 965 return (PointerElementType->isIntegerTy(64)) || 966 (PointerElementType->isFloatTy()) || 967 (PointerElementType->isDoubleTy()) || 968 (PointerElementType->isIntegerTy(32) && 969 llvm::any_of(I->users(), 970 [](const User *U) { return isa<SExtInst>(U); })); 971 }; 972 973 // Check if a load/store has DQ form. 974 auto isDQFormCandidate = [&](const Instruction *I, const Value *PtrValue, 975 const Type *PointerElementType) { 976 assert((PtrValue && I) && "Invalid parameter!"); 977 // Check if it is a P10 lxvp/stxvp intrinsic. 978 auto *II = dyn_cast<IntrinsicInst>(I); 979 if (II) 980 return II->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp || 981 II->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp; 982 // Check if it is a P9 vector load/store. 983 return ST && ST->hasP9Vector() && (PointerElementType->isVectorTy()); 984 }; 985 986 // Collect buckets of comparable addresses used by loads and stores for update 987 // form. 988 SmallVector<Bucket, 16> UpdateFormBuckets = 989 collectCandidates(L, isUpdateFormCandidate, MaxVarsUpdateForm); 990 991 // Prepare for update form. 992 if (!UpdateFormBuckets.empty()) 993 MadeChange |= updateFormPrep(L, UpdateFormBuckets); 994 995 // Collect buckets of comparable addresses used by loads and stores for DS 996 // form. 997 SmallVector<Bucket, 16> DSFormBuckets = 998 collectCandidates(L, isDSFormCandidate, MaxVarsDSForm); 999 1000 // Prepare for DS form. 1001 if (!DSFormBuckets.empty()) 1002 MadeChange |= dispFormPrep(L, DSFormBuckets, DSForm); 1003 1004 // Collect buckets of comparable addresses used by loads and stores for DQ 1005 // form. 1006 SmallVector<Bucket, 16> DQFormBuckets = 1007 collectCandidates(L, isDQFormCandidate, MaxVarsDQForm); 1008 1009 // Prepare for DQ form. 1010 if (!DQFormBuckets.empty()) 1011 MadeChange |= dispFormPrep(L, DQFormBuckets, DQForm); 1012 1013 return MadeChange; 1014 } 1015