1 //===- ScopDetection.cpp - Detect Scops -----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Detect the maximal Scops of a function. 10 // 11 // A static control part (Scop) is a subgraph of the control flow graph (CFG) 12 // that only has statically known control flow and can therefore be described 13 // within the polyhedral model. 14 // 15 // Every Scop fulfills these restrictions: 16 // 17 // * It is a single entry single exit region 18 // 19 // * Only affine linear bounds in the loops 20 // 21 // Every natural loop in a Scop must have a number of loop iterations that can 22 // be described as an affine linear function in surrounding loop iterators or 23 // parameters. (A parameter is a scalar that does not change its value during 24 // execution of the Scop). 25 // 26 // * Only comparisons of affine linear expressions in conditions 27 // 28 // * All loops and conditions perfectly nested 29 // 30 // The control flow needs to be structured such that it could be written using 31 // just 'for' and 'if' statements, without the need for any 'goto', 'break' or 32 // 'continue'. 33 // 34 // * Side effect free functions call 35 // 36 // Function calls and intrinsics that do not have side effects (readnone) 37 // or memory intrinsics (memset, memcpy, memmove) are allowed. 38 // 39 // The Scop detection finds the largest Scops by checking if the largest 40 // region is a Scop. If this is not the case, its canonical subregions are 41 // checked until a region is a Scop. It is now tried to extend this Scop by 42 // creating a larger non canonical region. 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "polly/ScopDetection.h" 47 #include "polly/LinkAllPasses.h" 48 #include "polly/Options.h" 49 #include "polly/ScopDetectionDiagnostic.h" 50 #include "polly/Support/SCEVValidator.h" 51 #include "polly/Support/ScopHelper.h" 52 #include "polly/Support/ScopLocation.h" 53 #include "llvm/ADT/SmallPtrSet.h" 54 #include "llvm/ADT/Statistic.h" 55 #include "llvm/Analysis/AliasAnalysis.h" 56 #include "llvm/Analysis/Delinearization.h" 57 #include "llvm/Analysis/Loads.h" 58 #include "llvm/Analysis/LoopInfo.h" 59 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 60 #include "llvm/Analysis/RegionInfo.h" 61 #include "llvm/Analysis/ScalarEvolution.h" 62 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 63 #include "llvm/IR/BasicBlock.h" 64 #include "llvm/IR/DebugLoc.h" 65 #include "llvm/IR/DerivedTypes.h" 66 #include "llvm/IR/DiagnosticInfo.h" 67 #include "llvm/IR/DiagnosticPrinter.h" 68 #include "llvm/IR/Dominators.h" 69 #include "llvm/IR/Function.h" 70 #include "llvm/IR/InstrTypes.h" 71 #include "llvm/IR/Instruction.h" 72 #include "llvm/IR/Instructions.h" 73 #include "llvm/IR/IntrinsicInst.h" 74 #include "llvm/IR/Metadata.h" 75 #include "llvm/IR/Module.h" 76 #include "llvm/IR/PassManager.h" 77 #include "llvm/IR/Value.h" 78 #include "llvm/InitializePasses.h" 79 #include "llvm/Pass.h" 80 #include "llvm/Support/Debug.h" 81 #include "llvm/Support/Regex.h" 82 #include "llvm/Support/raw_ostream.h" 83 #include <algorithm> 84 #include <cassert> 85 #include <memory> 86 #include <stack> 87 #include <string> 88 #include <utility> 89 #include <vector> 90 91 using namespace llvm; 92 using namespace polly; 93 94 #include "polly/Support/PollyDebug.h" 95 #define DEBUG_TYPE "polly-detect" 96 97 // This option is set to a very high value, as analyzing such loops increases 98 // compile time on several cases. For experiments that enable this option, 99 // a value of around 40 has been working to avoid run-time regressions with 100 // Polly while still exposing interesting optimization opportunities. 101 static cl::opt<int> ProfitabilityMinPerLoopInstructions( 102 "polly-detect-profitability-min-per-loop-insts", 103 cl::desc("The minimal number of per-loop instructions before a single loop " 104 "region is considered profitable"), 105 cl::Hidden, cl::ValueRequired, cl::init(100000000), cl::cat(PollyCategory)); 106 107 bool polly::PollyProcessUnprofitable; 108 109 static cl::opt<bool, true> XPollyProcessUnprofitable( 110 "polly-process-unprofitable", 111 cl::desc( 112 "Process scops that are unlikely to benefit from Polly optimizations."), 113 cl::location(PollyProcessUnprofitable), cl::cat(PollyCategory)); 114 115 static cl::list<std::string> OnlyFunctions( 116 "polly-only-func", 117 cl::desc("Only run on functions that match a regex. " 118 "Multiple regexes can be comma separated. " 119 "Scop detection will run on all functions that match " 120 "ANY of the regexes provided."), 121 cl::CommaSeparated, cl::cat(PollyCategory)); 122 123 static cl::list<std::string> IgnoredFunctions( 124 "polly-ignore-func", 125 cl::desc("Ignore functions that match a regex. " 126 "Multiple regexes can be comma separated. " 127 "Scop detection will ignore all functions that match " 128 "ANY of the regexes provided."), 129 cl::CommaSeparated, cl::cat(PollyCategory)); 130 131 bool polly::PollyAllowFullFunction; 132 133 static cl::opt<bool, true> 134 XAllowFullFunction("polly-detect-full-functions", 135 cl::desc("Allow the detection of full functions"), 136 cl::location(polly::PollyAllowFullFunction), 137 cl::init(false), cl::cat(PollyCategory)); 138 139 static cl::opt<std::string> OnlyRegion( 140 "polly-only-region", 141 cl::desc("Only run on certain regions (The provided identifier must " 142 "appear in the name of the region's entry block"), 143 cl::value_desc("identifier"), cl::ValueRequired, cl::init(""), 144 cl::cat(PollyCategory)); 145 146 static cl::opt<bool> 147 IgnoreAliasing("polly-ignore-aliasing", 148 cl::desc("Ignore possible aliasing of the array bases"), 149 cl::Hidden, cl::cat(PollyCategory)); 150 151 bool polly::PollyAllowUnsignedOperations; 152 153 static cl::opt<bool, true> XPollyAllowUnsignedOperations( 154 "polly-allow-unsigned-operations", 155 cl::desc("Allow unsigned operations such as comparisons or zero-extends."), 156 cl::location(PollyAllowUnsignedOperations), cl::Hidden, cl::init(true), 157 cl::cat(PollyCategory)); 158 159 bool polly::PollyUseRuntimeAliasChecks; 160 161 static cl::opt<bool, true> XPollyUseRuntimeAliasChecks( 162 "polly-use-runtime-alias-checks", 163 cl::desc("Use runtime alias checks to resolve possible aliasing."), 164 cl::location(PollyUseRuntimeAliasChecks), cl::Hidden, cl::init(true), 165 cl::cat(PollyCategory)); 166 167 static cl::opt<bool> 168 ReportLevel("polly-report", 169 cl::desc("Print information about the activities of Polly"), 170 cl::cat(PollyCategory)); 171 172 static cl::opt<bool> AllowDifferentTypes( 173 "polly-allow-differing-element-types", 174 cl::desc("Allow different element types for array accesses"), cl::Hidden, 175 cl::init(true), cl::cat(PollyCategory)); 176 177 static cl::opt<bool> 178 AllowNonAffine("polly-allow-nonaffine", 179 cl::desc("Allow non affine access functions in arrays"), 180 cl::Hidden, cl::cat(PollyCategory)); 181 182 static cl::opt<bool> 183 AllowModrefCall("polly-allow-modref-calls", 184 cl::desc("Allow functions with known modref behavior"), 185 cl::Hidden, cl::cat(PollyCategory)); 186 187 static cl::opt<bool> AllowNonAffineSubRegions( 188 "polly-allow-nonaffine-branches", 189 cl::desc("Allow non affine conditions for branches"), cl::Hidden, 190 cl::init(true), cl::cat(PollyCategory)); 191 192 static cl::opt<bool> 193 AllowNonAffineSubLoops("polly-allow-nonaffine-loops", 194 cl::desc("Allow non affine conditions for loops"), 195 cl::Hidden, cl::cat(PollyCategory)); 196 197 static cl::opt<bool, true> 198 TrackFailures("polly-detect-track-failures", 199 cl::desc("Track failure strings in detecting scop regions"), 200 cl::location(PollyTrackFailures), cl::Hidden, cl::init(true), 201 cl::cat(PollyCategory)); 202 203 static cl::opt<bool> KeepGoing("polly-detect-keep-going", 204 cl::desc("Do not fail on the first error."), 205 cl::Hidden, cl::cat(PollyCategory)); 206 207 static cl::opt<bool, true> 208 PollyDelinearizeX("polly-delinearize", 209 cl::desc("Delinearize array access functions"), 210 cl::location(PollyDelinearize), cl::Hidden, 211 cl::init(true), cl::cat(PollyCategory)); 212 213 static cl::opt<bool> 214 VerifyScops("polly-detect-verify", 215 cl::desc("Verify the detected SCoPs after each transformation"), 216 cl::Hidden, cl::cat(PollyCategory)); 217 218 bool polly::PollyInvariantLoadHoisting; 219 220 static cl::opt<bool, true> 221 XPollyInvariantLoadHoisting("polly-invariant-load-hoisting", 222 cl::desc("Hoist invariant loads."), 223 cl::location(PollyInvariantLoadHoisting), 224 cl::Hidden, cl::cat(PollyCategory)); 225 226 static cl::opt<bool> PollyAllowErrorBlocks( 227 "polly-allow-error-blocks", 228 cl::desc("Allow to speculate on the execution of 'error blocks'."), 229 cl::Hidden, cl::init(true), cl::cat(PollyCategory)); 230 231 /// The minimal trip count under which loops are considered unprofitable. 232 static const unsigned MIN_LOOP_TRIP_COUNT = 8; 233 234 bool polly::PollyTrackFailures = false; 235 bool polly::PollyDelinearize = false; 236 StringRef polly::PollySkipFnAttr = "polly.skip.fn"; 237 238 //===----------------------------------------------------------------------===// 239 // Statistics. 240 241 STATISTIC(NumScopRegions, "Number of scops"); 242 STATISTIC(NumLoopsInScop, "Number of loops in scops"); 243 STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0"); 244 STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); 245 STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); 246 STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); 247 STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); 248 STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); 249 STATISTIC(NumScopsDepthLarger, 250 "Number of scops with maximal loop depth 6 and larger"); 251 STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)"); 252 STATISTIC(NumLoopsInProfScop, 253 "Number of loops in scops (profitable scops only)"); 254 STATISTIC(NumLoopsOverall, "Number of total loops"); 255 STATISTIC(NumProfScopsDepthZero, 256 "Number of scops with maximal loop depth 0 (profitable scops only)"); 257 STATISTIC(NumProfScopsDepthOne, 258 "Number of scops with maximal loop depth 1 (profitable scops only)"); 259 STATISTIC(NumProfScopsDepthTwo, 260 "Number of scops with maximal loop depth 2 (profitable scops only)"); 261 STATISTIC(NumProfScopsDepthThree, 262 "Number of scops with maximal loop depth 3 (profitable scops only)"); 263 STATISTIC(NumProfScopsDepthFour, 264 "Number of scops with maximal loop depth 4 (profitable scops only)"); 265 STATISTIC(NumProfScopsDepthFive, 266 "Number of scops with maximal loop depth 5 (profitable scops only)"); 267 STATISTIC(NumProfScopsDepthLarger, 268 "Number of scops with maximal loop depth 6 and larger " 269 "(profitable scops only)"); 270 STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); 271 STATISTIC(MaxNumLoopsInProfScop, 272 "Maximal number of loops in scops (profitable scops only)"); 273 274 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 275 bool OnlyProfitable); 276 277 namespace { 278 279 class DiagnosticScopFound final : public DiagnosticInfo { 280 private: 281 static int PluginDiagnosticKind; 282 283 Function &F; 284 std::string FileName; 285 unsigned EntryLine, ExitLine; 286 287 public: 288 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine, 289 unsigned ExitLine) 290 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName), 291 EntryLine(EntryLine), ExitLine(ExitLine) {} 292 293 void print(DiagnosticPrinter &DP) const override; 294 295 static bool classof(const DiagnosticInfo *DI) { 296 return DI->getKind() == PluginDiagnosticKind; 297 } 298 }; 299 } // namespace 300 301 int DiagnosticScopFound::PluginDiagnosticKind = 302 getNextAvailablePluginDiagnosticKind(); 303 304 void DiagnosticScopFound::print(DiagnosticPrinter &DP) const { 305 DP << "Polly detected an optimizable loop region (scop) in function '" << F 306 << "'\n"; 307 308 if (FileName.empty()) { 309 DP << "Scop location is unknown. Compile with debug info " 310 "(-g) to get more precise information. "; 311 return; 312 } 313 314 DP << FileName << ":" << EntryLine << ": Start of scop\n"; 315 DP << FileName << ":" << ExitLine << ": End of scop"; 316 } 317 318 /// Check if a string matches any regex in a list of regexes. 319 /// @param Str the input string to match against. 320 /// @param RegexList a list of strings that are regular expressions. 321 static bool doesStringMatchAnyRegex(StringRef Str, 322 const cl::list<std::string> &RegexList) { 323 for (auto RegexStr : RegexList) { 324 Regex R(RegexStr); 325 326 std::string Err; 327 if (!R.isValid(Err)) 328 report_fatal_error(Twine("invalid regex given as input to polly: ") + Err, 329 true); 330 331 if (R.match(Str)) 332 return true; 333 } 334 return false; 335 } 336 337 //===----------------------------------------------------------------------===// 338 // ScopDetection. 339 340 ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE, 341 LoopInfo &LI, RegionInfo &RI, AAResults &AA, 342 OptimizationRemarkEmitter &ORE) 343 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {} 344 345 void ScopDetection::detect(Function &F) { 346 assert(ValidRegions.empty() && "Detection must run only once"); 347 348 if (!PollyProcessUnprofitable && LI.empty()) 349 return; 350 351 Region *TopRegion = RI.getTopLevelRegion(); 352 353 if (!OnlyFunctions.empty() && 354 !doesStringMatchAnyRegex(F.getName(), OnlyFunctions)) 355 return; 356 357 if (doesStringMatchAnyRegex(F.getName(), IgnoredFunctions)) 358 return; 359 360 if (!isValidFunction(F)) 361 return; 362 363 findScops(*TopRegion); 364 365 NumScopRegions += ValidRegions.size(); 366 367 // Prune non-profitable regions. 368 for (auto &DIt : DetectionContextMap) { 369 DetectionContext &DC = *DIt.getSecond().get(); 370 if (DC.Log.hasErrors()) 371 continue; 372 if (!ValidRegions.count(&DC.CurRegion)) 373 continue; 374 LoopStats Stats = countBeneficialLoops(&DC.CurRegion, SE, LI, 0); 375 updateLoopCountStatistic(Stats, false /* OnlyProfitable */); 376 if (isProfitableRegion(DC)) { 377 updateLoopCountStatistic(Stats, true /* OnlyProfitable */); 378 continue; 379 } 380 381 ValidRegions.remove(&DC.CurRegion); 382 } 383 384 NumProfScopRegions += ValidRegions.size(); 385 NumLoopsOverall += countBeneficialLoops(TopRegion, SE, LI, 0).NumLoops; 386 387 // Only makes sense when we tracked errors. 388 if (PollyTrackFailures) 389 emitMissedRemarks(F); 390 391 if (ReportLevel) 392 printLocations(F); 393 394 assert(ValidRegions.size() <= DetectionContextMap.size() && 395 "Cached more results than valid regions"); 396 } 397 398 template <class RR, typename... Args> 399 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert, 400 Args &&...Arguments) const { 401 if (!Context.Verifying) { 402 RejectLog &Log = Context.Log; 403 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...); 404 Context.IsInvalid = true; 405 406 // Log even if PollyTrackFailures is false, the log entries are also used in 407 // canUseISLTripCount(). 408 Log.report(RejectReason); 409 410 POLLY_DEBUG(dbgs() << RejectReason->getMessage()); 411 POLLY_DEBUG(dbgs() << "\n"); 412 } else { 413 assert(!Assert && "Verification of detected scop failed"); 414 } 415 416 return false; 417 } 418 419 bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) { 420 if (!ValidRegions.count(&R)) 421 return false; 422 423 if (Verify) { 424 BBPair P = getBBPairForRegion(&R); 425 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 426 427 // Free previous DetectionContext for the region and create and verify a new 428 // one. Be sure that the DetectionContext is not still used by a ScopInfop. 429 // Due to changes but CodeGeneration of another Scop, the Region object and 430 // the BBPair might not match anymore. 431 Entry = std::make_unique<DetectionContext>(const_cast<Region &>(R), AA, 432 /*Verifying=*/false); 433 434 return isValidRegion(*Entry.get()); 435 } 436 437 return true; 438 } 439 440 std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { 441 // Get the first error we found. Even in keep-going mode, this is the first 442 // reason that caused the candidate to be rejected. 443 auto *Log = lookupRejectionLog(R); 444 445 // This can happen when we marked a region invalid, but didn't track 446 // an error for it. 447 if (!Log || !Log->hasErrors()) 448 return ""; 449 450 RejectReasonPtr RR = *Log->begin(); 451 return RR->getMessage(); 452 } 453 454 bool ScopDetection::addOverApproximatedRegion(Region *AR, 455 DetectionContext &Context) const { 456 // If we already know about Ar we can exit. 457 if (!Context.NonAffineSubRegionSet.insert(AR)) 458 return true; 459 460 // All loops in the region have to be overapproximated too if there 461 // are accesses that depend on the iteration count. 462 463 for (BasicBlock *BB : AR->blocks()) { 464 Loop *L = LI.getLoopFor(BB); 465 if (AR->contains(L)) 466 Context.BoxedLoopsSet.insert(L); 467 } 468 469 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); 470 } 471 472 bool ScopDetection::onlyValidRequiredInvariantLoads( 473 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { 474 Region &CurRegion = Context.CurRegion; 475 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout(); 476 477 if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) 478 return false; 479 480 for (LoadInst *Load : RequiredILS) { 481 // If we already know a load has been accepted as required invariant, we 482 // already run the validation below once and consequently don't need to 483 // run it again. Hence, we return early. For certain test cases (e.g., 484 // COSMO this avoids us spending 50% of scop-detection time in this 485 // very function (and its children). 486 if (Context.RequiredILS.count(Load)) 487 continue; 488 if (!isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 489 return false; 490 491 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { 492 if (isSafeToLoadUnconditionally(Load->getPointerOperand(), 493 Load->getType(), Load->getAlign(), DL, 494 nullptr)) 495 continue; 496 497 if (NonAffineRegion->contains(Load) && 498 Load->getParent() != NonAffineRegion->getEntry()) 499 return false; 500 } 501 } 502 503 Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); 504 505 return true; 506 } 507 508 bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1, 509 Loop *Scope) const { 510 SetVector<Value *> Values; 511 findValues(S0, SE, Values); 512 if (S1) 513 findValues(S1, SE, Values); 514 515 SmallPtrSet<Value *, 8> PtrVals; 516 for (auto *V : Values) { 517 if (auto *P2I = dyn_cast<PtrToIntInst>(V)) 518 V = P2I->getOperand(0); 519 520 if (!V->getType()->isPointerTy()) 521 continue; 522 523 const SCEV *PtrSCEV = SE.getSCEVAtScope(V, Scope); 524 if (isa<SCEVConstant>(PtrSCEV)) 525 continue; 526 527 auto *BasePtr = dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrSCEV)); 528 if (!BasePtr) 529 return true; 530 531 Value *BasePtrVal = BasePtr->getValue(); 532 if (PtrVals.insert(BasePtrVal).second) { 533 for (auto *PtrVal : PtrVals) 534 if (PtrVal != BasePtrVal && !AA.isNoAlias(PtrVal, BasePtrVal)) 535 return true; 536 } 537 } 538 539 return false; 540 } 541 542 bool ScopDetection::isAffine(const SCEV *S, Loop *Scope, 543 DetectionContext &Context) const { 544 InvariantLoadsSetTy AccessILS; 545 if (!isAffineExpr(&Context.CurRegion, Scope, S, SE, &AccessILS)) 546 return false; 547 548 if (!onlyValidRequiredInvariantLoads(AccessILS, Context)) 549 return false; 550 551 return true; 552 } 553 554 bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI, 555 Value *Condition, bool IsLoopBranch, 556 DetectionContext &Context) const { 557 Loop *L = LI.getLoopFor(&BB); 558 const SCEV *ConditionSCEV = SE.getSCEVAtScope(Condition, L); 559 560 if (IsLoopBranch && L->isLoopLatch(&BB)) 561 return false; 562 563 // Check for invalid usage of different pointers in one expression. 564 if (involvesMultiplePtrs(ConditionSCEV, nullptr, L)) 565 return false; 566 567 if (isAffine(ConditionSCEV, L, Context)) 568 return true; 569 570 if (AllowNonAffineSubRegions && 571 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 572 return true; 573 574 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, 575 ConditionSCEV, ConditionSCEV, SI); 576 } 577 578 bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI, 579 Value *Condition, bool IsLoopBranch, 580 DetectionContext &Context) { 581 // Constant integer conditions are always affine. 582 if (isa<ConstantInt>(Condition)) 583 return true; 584 585 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 586 auto Opcode = BinOp->getOpcode(); 587 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 588 Value *Op0 = BinOp->getOperand(0); 589 Value *Op1 = BinOp->getOperand(1); 590 return isValidBranch(BB, BI, Op0, IsLoopBranch, Context) && 591 isValidBranch(BB, BI, Op1, IsLoopBranch, Context); 592 } 593 } 594 595 if (auto PHI = dyn_cast<PHINode>(Condition)) { 596 auto *Unique = dyn_cast_or_null<ConstantInt>( 597 getUniqueNonErrorValue(PHI, &Context.CurRegion, this)); 598 if (Unique && (Unique->isZero() || Unique->isOne())) 599 return true; 600 } 601 602 if (auto Load = dyn_cast<LoadInst>(Condition)) 603 if (!IsLoopBranch && Context.CurRegion.contains(Load)) { 604 Context.RequiredILS.insert(Load); 605 return true; 606 } 607 608 // Non constant conditions of branches need to be ICmpInst. 609 if (!isa<ICmpInst>(Condition)) { 610 if (!IsLoopBranch && AllowNonAffineSubRegions && 611 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 612 return true; 613 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, BI, &BB); 614 } 615 616 ICmpInst *ICmp = cast<ICmpInst>(Condition); 617 618 // Are both operands of the ICmp affine? 619 if (isa<UndefValue>(ICmp->getOperand(0)) || 620 isa<UndefValue>(ICmp->getOperand(1))) 621 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, &BB, ICmp); 622 623 Loop *L = LI.getLoopFor(&BB); 624 const SCEV *LHS = SE.getSCEVAtScope(ICmp->getOperand(0), L); 625 const SCEV *RHS = SE.getSCEVAtScope(ICmp->getOperand(1), L); 626 627 LHS = tryForwardThroughPHI(LHS, Context.CurRegion, SE, this); 628 RHS = tryForwardThroughPHI(RHS, Context.CurRegion, SE, this); 629 630 // If unsigned operations are not allowed try to approximate the region. 631 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations) 632 return !IsLoopBranch && AllowNonAffineSubRegions && 633 addOverApproximatedRegion(RI.getRegionFor(&BB), Context); 634 635 // Check for invalid usage of different pointers in one expression. 636 if (ICmp->isEquality() && involvesMultiplePtrs(LHS, nullptr, L) && 637 involvesMultiplePtrs(RHS, nullptr, L)) 638 return false; 639 640 // Check for invalid usage of different pointers in a relational comparison. 641 if (ICmp->isRelational() && involvesMultiplePtrs(LHS, RHS, L)) 642 return false; 643 644 if (isAffine(LHS, L, Context) && isAffine(RHS, L, Context)) 645 return true; 646 647 if (!IsLoopBranch && AllowNonAffineSubRegions && 648 addOverApproximatedRegion(RI.getRegionFor(&BB), Context)) 649 return true; 650 651 if (IsLoopBranch) 652 return false; 653 654 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, &BB, LHS, RHS, 655 ICmp); 656 } 657 658 bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch, 659 bool AllowUnreachable, 660 DetectionContext &Context) { 661 Region &CurRegion = Context.CurRegion; 662 663 Instruction *TI = BB.getTerminator(); 664 665 if (AllowUnreachable && isa<UnreachableInst>(TI)) 666 return true; 667 668 // Return instructions are only valid if the region is the top level region. 669 if (isa<ReturnInst>(TI) && CurRegion.isTopLevelRegion()) 670 return true; 671 672 Value *Condition = getConditionFromTerminator(TI); 673 674 if (!Condition) 675 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, &BB); 676 677 // UndefValue is not allowed as condition. 678 if (isa<UndefValue>(Condition)) 679 return invalid<ReportUndefCond>(Context, /*Assert=*/true, TI, &BB); 680 681 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) 682 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context); 683 684 SwitchInst *SI = dyn_cast<SwitchInst>(TI); 685 assert(SI && "Terminator was neither branch nor switch"); 686 687 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context); 688 } 689 690 bool ScopDetection::isValidCallInst(CallInst &CI, 691 DetectionContext &Context) const { 692 if (CI.doesNotReturn()) 693 return false; 694 695 if (CI.doesNotAccessMemory()) 696 return true; 697 698 if (auto *II = dyn_cast<IntrinsicInst>(&CI)) 699 if (isValidIntrinsicInst(*II, Context)) 700 return true; 701 702 Function *CalledFunction = CI.getCalledFunction(); 703 704 // Indirect calls are not supported. 705 if (CalledFunction == nullptr) 706 return false; 707 708 if (isDebugCall(&CI)) { 709 POLLY_DEBUG(dbgs() << "Allow call to debug function: " 710 << CalledFunction->getName() << '\n'); 711 return true; 712 } 713 714 if (AllowModrefCall) { 715 MemoryEffects ME = AA.getMemoryEffects(CalledFunction); 716 if (ME.onlyAccessesArgPointees()) { 717 for (const auto &Arg : CI.args()) { 718 if (!Arg->getType()->isPointerTy()) 719 continue; 720 721 // Bail if a pointer argument has a base address not known to 722 // ScalarEvolution. Note that a zero pointer is acceptable. 723 const SCEV *ArgSCEV = 724 SE.getSCEVAtScope(Arg, LI.getLoopFor(CI.getParent())); 725 if (ArgSCEV->isZero()) 726 continue; 727 728 auto *BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 729 if (!BP) 730 return false; 731 732 // Implicitly disable delinearization since we have an unknown 733 // accesses with an unknown access function. 734 Context.HasUnknownAccess = true; 735 } 736 737 // Explicitly use addUnknown so we don't put a loop-variant 738 // pointer into the alias set. 739 Context.AST.addUnknown(&CI); 740 return true; 741 } 742 743 if (ME.onlyReadsMemory()) { 744 // Implicitly disable delinearization since we have an unknown 745 // accesses with an unknown access function. 746 Context.HasUnknownAccess = true; 747 // Explicitly use addUnknown so we don't put a loop-variant 748 // pointer into the alias set. 749 Context.AST.addUnknown(&CI); 750 return true; 751 } 752 return false; 753 } 754 755 return false; 756 } 757 758 bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II, 759 DetectionContext &Context) const { 760 if (isIgnoredIntrinsic(&II)) 761 return true; 762 763 // The closest loop surrounding the call instruction. 764 Loop *L = LI.getLoopFor(II.getParent()); 765 766 // The access function and base pointer for memory intrinsics. 767 const SCEV *AF; 768 const SCEVUnknown *BP; 769 770 switch (II.getIntrinsicID()) { 771 // Memory intrinsics that can be represented are supported. 772 case Intrinsic::memmove: 773 case Intrinsic::memcpy: 774 AF = SE.getSCEVAtScope(cast<MemTransferInst>(II).getSource(), L); 775 if (!AF->isZero()) { 776 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 777 // Bail if the source pointer is not valid. 778 if (!isValidAccess(&II, AF, BP, Context)) 779 return false; 780 } 781 [[fallthrough]]; 782 case Intrinsic::memset: 783 AF = SE.getSCEVAtScope(cast<MemIntrinsic>(II).getDest(), L); 784 if (!AF->isZero()) { 785 BP = dyn_cast<SCEVUnknown>(SE.getPointerBase(AF)); 786 // Bail if the destination pointer is not valid. 787 if (!isValidAccess(&II, AF, BP, Context)) 788 return false; 789 } 790 791 // Bail if the length is not affine. 792 if (!isAffine(SE.getSCEVAtScope(cast<MemIntrinsic>(II).getLength(), L), L, 793 Context)) 794 return false; 795 796 return true; 797 default: 798 break; 799 } 800 801 return false; 802 } 803 804 bool ScopDetection::isInvariant(Value &Val, const Region &Reg, 805 DetectionContext &Ctx) const { 806 // A reference to function argument or constant value is invariant. 807 if (isa<Argument>(Val) || isa<Constant>(Val)) 808 return true; 809 810 Instruction *I = dyn_cast<Instruction>(&Val); 811 if (!I) 812 return false; 813 814 if (!Reg.contains(I)) 815 return true; 816 817 // Loads within the SCoP may read arbitrary values, need to hoist them. If it 818 // is not hoistable, it will be rejected later, but here we assume it is and 819 // that makes the value invariant. 820 if (auto LI = dyn_cast<LoadInst>(I)) { 821 Ctx.RequiredILS.insert(LI); 822 return true; 823 } 824 825 return false; 826 } 827 828 namespace { 829 830 /// Remove smax of smax(0, size) expressions from a SCEV expression and 831 /// register the '...' components. 832 /// 833 /// Array access expressions as they are generated by GFortran contain smax(0, 834 /// size) expressions that confuse the 'normal' delinearization algorithm. 835 /// However, if we extract such expressions before the normal delinearization 836 /// takes place they can actually help to identify array size expressions in 837 /// Fortran accesses. For the subsequently following delinearization the smax(0, 838 /// size) component can be replaced by just 'size'. This is correct as we will 839 /// always add and verify the assumption that for all subscript expressions 840 /// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify 841 /// that 0 <= size, which means smax(0, size) == size. 842 class SCEVRemoveMax final : public SCEVRewriteVisitor<SCEVRemoveMax> { 843 public: 844 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms) 845 : SCEVRewriteVisitor(SE), Terms(Terms) {} 846 847 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, 848 std::vector<const SCEV *> *Terms = nullptr) { 849 SCEVRemoveMax Rewriter(SE, Terms); 850 return Rewriter.visit(Scev); 851 } 852 853 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { 854 if ((Expr->getNumOperands() == 2) && Expr->getOperand(0)->isZero()) { 855 auto Res = visit(Expr->getOperand(1)); 856 if (Terms) 857 (*Terms).push_back(Res); 858 return Res; 859 } 860 861 return Expr; 862 } 863 864 private: 865 std::vector<const SCEV *> *Terms; 866 }; 867 } // namespace 868 869 SmallVector<const SCEV *, 4> 870 ScopDetection::getDelinearizationTerms(DetectionContext &Context, 871 const SCEVUnknown *BasePointer) const { 872 SmallVector<const SCEV *, 4> Terms; 873 for (const auto &Pair : Context.Accesses[BasePointer]) { 874 std::vector<const SCEV *> MaxTerms; 875 SCEVRemoveMax::rewrite(Pair.second, SE, &MaxTerms); 876 if (!MaxTerms.empty()) { 877 Terms.insert(Terms.begin(), MaxTerms.begin(), MaxTerms.end()); 878 continue; 879 } 880 // In case the outermost expression is a plain add, we check if any of its 881 // terms has the form 4 * %inst * %param * %param ..., aka a term that 882 // contains a product between a parameter and an instruction that is 883 // inside the scop. Such instructions, if allowed at all, are instructions 884 // SCEV can not represent, but Polly is still looking through. As a 885 // result, these instructions can depend on induction variables and are 886 // most likely no array sizes. However, terms that are multiplied with 887 // them are likely candidates for array sizes. 888 if (auto *AF = dyn_cast<SCEVAddExpr>(Pair.second)) { 889 for (auto Op : AF->operands()) { 890 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Op)) 891 collectParametricTerms(SE, AF2, Terms); 892 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Op)) { 893 SmallVector<const SCEV *, 0> Operands; 894 895 for (const SCEV *MulOp : AF2->operands()) { 896 if (auto *Const = dyn_cast<SCEVConstant>(MulOp)) 897 Operands.push_back(Const); 898 if (auto *Unknown = dyn_cast<SCEVUnknown>(MulOp)) { 899 if (auto *Inst = dyn_cast<Instruction>(Unknown->getValue())) { 900 if (!Context.CurRegion.contains(Inst)) 901 Operands.push_back(MulOp); 902 903 } else { 904 Operands.push_back(MulOp); 905 } 906 } 907 } 908 if (Operands.size()) 909 Terms.push_back(SE.getMulExpr(Operands)); 910 } 911 } 912 } 913 if (Terms.empty()) 914 collectParametricTerms(SE, Pair.second, Terms); 915 } 916 return Terms; 917 } 918 919 bool ScopDetection::hasValidArraySizes(DetectionContext &Context, 920 SmallVectorImpl<const SCEV *> &Sizes, 921 const SCEVUnknown *BasePointer, 922 Loop *Scope) const { 923 // If no sizes were found, all sizes are trivially valid. We allow this case 924 // to make it possible to pass known-affine accesses to the delinearization to 925 // try to recover some interesting multi-dimensional accesses, but to still 926 // allow the already known to be affine access in case the delinearization 927 // fails. In such situations, the delinearization will just return a Sizes 928 // array of size zero. 929 if (Sizes.size() == 0) 930 return true; 931 932 Value *BaseValue = BasePointer->getValue(); 933 Region &CurRegion = Context.CurRegion; 934 for (const SCEV *DelinearizedSize : Sizes) { 935 // Don't pass down the scope to isAfffine; array dimensions must be 936 // invariant across the entire scop. 937 if (!isAffine(DelinearizedSize, nullptr, Context)) { 938 Sizes.clear(); 939 break; 940 } 941 if (auto *Unknown = dyn_cast<SCEVUnknown>(DelinearizedSize)) { 942 auto *V = dyn_cast<Value>(Unknown->getValue()); 943 if (auto *Load = dyn_cast<LoadInst>(V)) { 944 if (Context.CurRegion.contains(Load) && 945 isHoistableLoad(Load, CurRegion, LI, SE, DT, Context.RequiredILS)) 946 Context.RequiredILS.insert(Load); 947 continue; 948 } 949 } 950 if (hasScalarDepsInsideRegion(DelinearizedSize, &CurRegion, Scope, false, 951 Context.RequiredILS)) 952 return invalid<ReportNonAffineAccess>( 953 Context, /*Assert=*/true, DelinearizedSize, 954 Context.Accesses[BasePointer].front().first, BaseValue); 955 } 956 957 // No array shape derived. 958 if (Sizes.empty()) { 959 if (AllowNonAffine) 960 return true; 961 962 for (const auto &Pair : Context.Accesses[BasePointer]) { 963 const Instruction *Insn = Pair.first; 964 const SCEV *AF = Pair.second; 965 966 if (!isAffine(AF, Scope, Context)) { 967 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Insn, 968 BaseValue); 969 if (!KeepGoing) 970 return false; 971 } 972 } 973 return false; 974 } 975 return true; 976 } 977 978 // We first store the resulting memory accesses in TempMemoryAccesses. Only 979 // if the access functions for all memory accesses have been successfully 980 // delinearized we continue. Otherwise, we either report a failure or, if 981 // non-affine accesses are allowed, we drop the information. In case the 982 // information is dropped the memory accesses need to be overapproximated 983 // when translated to a polyhedral representation. 984 bool ScopDetection::computeAccessFunctions( 985 DetectionContext &Context, const SCEVUnknown *BasePointer, 986 std::shared_ptr<ArrayShape> Shape) const { 987 Value *BaseValue = BasePointer->getValue(); 988 bool BasePtrHasNonAffine = false; 989 MapInsnToMemAcc TempMemoryAccesses; 990 for (const auto &Pair : Context.Accesses[BasePointer]) { 991 const Instruction *Insn = Pair.first; 992 auto *AF = Pair.second; 993 AF = SCEVRemoveMax::rewrite(AF, SE); 994 bool IsNonAffine = false; 995 TempMemoryAccesses.insert(std::make_pair(Insn, MemAcc(Insn, Shape))); 996 MemAcc *Acc = &TempMemoryAccesses.find(Insn)->second; 997 auto *Scope = LI.getLoopFor(Insn->getParent()); 998 999 if (!AF) { 1000 if (isAffine(Pair.second, Scope, Context)) 1001 Acc->DelinearizedSubscripts.push_back(Pair.second); 1002 else 1003 IsNonAffine = true; 1004 } else { 1005 if (Shape->DelinearizedSizes.size() == 0) { 1006 Acc->DelinearizedSubscripts.push_back(AF); 1007 } else { 1008 llvm::computeAccessFunctions(SE, AF, Acc->DelinearizedSubscripts, 1009 Shape->DelinearizedSizes); 1010 if (Acc->DelinearizedSubscripts.size() == 0) 1011 IsNonAffine = true; 1012 } 1013 for (const SCEV *S : Acc->DelinearizedSubscripts) 1014 if (!isAffine(S, Scope, Context)) 1015 IsNonAffine = true; 1016 } 1017 1018 // (Possibly) report non affine access 1019 if (IsNonAffine) { 1020 BasePtrHasNonAffine = true; 1021 if (!AllowNonAffine) { 1022 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Pair.second, 1023 Insn, BaseValue); 1024 if (!KeepGoing) 1025 return false; 1026 } 1027 } 1028 } 1029 1030 if (!BasePtrHasNonAffine) 1031 Context.InsnToMemAcc.insert(TempMemoryAccesses.begin(), 1032 TempMemoryAccesses.end()); 1033 1034 return true; 1035 } 1036 1037 bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context, 1038 const SCEVUnknown *BasePointer, 1039 Loop *Scope) const { 1040 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer)); 1041 1042 auto Terms = getDelinearizationTerms(Context, BasePointer); 1043 1044 findArrayDimensions(SE, Terms, Shape->DelinearizedSizes, 1045 Context.ElementSize[BasePointer]); 1046 1047 if (!hasValidArraySizes(Context, Shape->DelinearizedSizes, BasePointer, 1048 Scope)) 1049 return false; 1050 1051 return computeAccessFunctions(Context, BasePointer, Shape); 1052 } 1053 1054 bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { 1055 // TODO: If we have an unknown access and other non-affine accesses we do 1056 // not try to delinearize them for now. 1057 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty()) 1058 return AllowNonAffine; 1059 1060 for (auto &Pair : Context.NonAffineAccesses) { 1061 auto *BasePointer = Pair.first; 1062 auto *Scope = Pair.second; 1063 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) { 1064 Context.IsInvalid = true; 1065 if (!KeepGoing) 1066 return false; 1067 } 1068 } 1069 return true; 1070 } 1071 1072 bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF, 1073 const SCEVUnknown *BP, 1074 DetectionContext &Context) const { 1075 1076 if (!BP) 1077 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Inst); 1078 1079 auto *BV = BP->getValue(); 1080 if (isa<UndefValue>(BV)) 1081 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Inst); 1082 1083 // FIXME: Think about allowing IntToPtrInst 1084 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BV)) 1085 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); 1086 1087 // Check that the base address of the access is invariant in the current 1088 // region. 1089 if (!isInvariant(*BV, Context.CurRegion, Context)) 1090 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BV, Inst); 1091 1092 AF = SE.getMinusSCEV(AF, BP); 1093 1094 const SCEV *Size; 1095 if (!isa<MemIntrinsic>(Inst)) { 1096 Size = SE.getElementSize(Inst); 1097 } else { 1098 auto *SizeTy = 1099 SE.getEffectiveSCEVType(PointerType::getUnqual(SE.getContext())); 1100 Size = SE.getConstant(SizeTy, 8); 1101 } 1102 1103 if (Context.ElementSize[BP]) { 1104 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size) 1105 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true, 1106 Inst, BV); 1107 1108 Context.ElementSize[BP] = SE.getSMinExpr(Size, Context.ElementSize[BP]); 1109 } else { 1110 Context.ElementSize[BP] = Size; 1111 } 1112 1113 bool IsVariantInNonAffineLoop = false; 1114 SetVector<const Loop *> Loops; 1115 findLoops(AF, Loops); 1116 for (const Loop *L : Loops) 1117 if (Context.BoxedLoopsSet.count(L)) 1118 IsVariantInNonAffineLoop = true; 1119 1120 auto *Scope = LI.getLoopFor(Inst->getParent()); 1121 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(AF, Scope, Context); 1122 // Do not try to delinearize memory intrinsics and force them to be affine. 1123 if (isa<MemIntrinsic>(Inst) && !IsAffine) { 1124 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1125 BV); 1126 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) { 1127 Context.Accesses[BP].push_back({Inst, AF}); 1128 1129 if (!IsAffine) 1130 Context.NonAffineAccesses.insert( 1131 std::make_pair(BP, LI.getLoopFor(Inst->getParent()))); 1132 } else if (!AllowNonAffine && !IsAffine) { 1133 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, AF, Inst, 1134 BV); 1135 } 1136 1137 if (IgnoreAliasing) 1138 return true; 1139 1140 // Check if the base pointer of the memory access does alias with 1141 // any other pointer. This cannot be handled at the moment. 1142 AAMDNodes AATags = Inst->getAAMetadata(); 1143 AliasSet &AS = Context.AST.getAliasSetFor( 1144 MemoryLocation::getBeforeOrAfter(BP->getValue(), AATags)); 1145 1146 if (!AS.isMustAlias()) { 1147 if (PollyUseRuntimeAliasChecks) { 1148 bool CanBuildRunTimeCheck = true; 1149 // The run-time alias check places code that involves the base pointer at 1150 // the beginning of the SCoP. This breaks if the base pointer is defined 1151 // inside the scop. Hence, we can only create a run-time check if we are 1152 // sure the base pointer is not an instruction defined inside the scop. 1153 // However, we can ignore loads that will be hoisted. 1154 1155 auto ASPointers = AS.getPointers(); 1156 1157 InvariantLoadsSetTy VariantLS, InvariantLS; 1158 // In order to detect loads which are dependent on other invariant loads 1159 // as invariant, we use fixed-point iteration method here i.e we iterate 1160 // over the alias set for arbitrary number of times until it is safe to 1161 // assume that all the invariant loads have been detected 1162 while (true) { 1163 const unsigned int VariantSize = VariantLS.size(), 1164 InvariantSize = InvariantLS.size(); 1165 1166 for (const Value *Ptr : ASPointers) { 1167 Instruction *Inst = dyn_cast<Instruction>(const_cast<Value *>(Ptr)); 1168 if (Inst && Context.CurRegion.contains(Inst)) { 1169 auto *Load = dyn_cast<LoadInst>(Inst); 1170 if (Load && InvariantLS.count(Load)) 1171 continue; 1172 if (Load && isHoistableLoad(Load, Context.CurRegion, LI, SE, DT, 1173 InvariantLS)) { 1174 if (VariantLS.count(Load)) 1175 VariantLS.remove(Load); 1176 Context.RequiredILS.insert(Load); 1177 InvariantLS.insert(Load); 1178 } else { 1179 CanBuildRunTimeCheck = false; 1180 VariantLS.insert(Load); 1181 } 1182 } 1183 } 1184 1185 if (InvariantSize == InvariantLS.size() && 1186 VariantSize == VariantLS.size()) 1187 break; 1188 } 1189 1190 if (CanBuildRunTimeCheck) 1191 return true; 1192 } 1193 return invalid<ReportAlias>(Context, /*Assert=*/true, Inst, AS); 1194 } 1195 1196 return true; 1197 } 1198 1199 bool ScopDetection::isValidMemoryAccess(MemAccInst Inst, 1200 DetectionContext &Context) const { 1201 Value *Ptr = Inst.getPointerOperand(); 1202 Loop *L = LI.getLoopFor(Inst->getParent()); 1203 const SCEV *AccessFunction = SE.getSCEVAtScope(Ptr, L); 1204 const SCEVUnknown *BasePointer; 1205 1206 BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1207 1208 return isValidAccess(Inst, AccessFunction, BasePointer, Context); 1209 } 1210 1211 bool ScopDetection::isValidInstruction(Instruction &Inst, 1212 DetectionContext &Context) { 1213 for (auto &Op : Inst.operands()) { 1214 auto *OpInst = dyn_cast<Instruction>(&Op); 1215 1216 if (!OpInst) 1217 continue; 1218 1219 if (isErrorBlock(*OpInst->getParent(), Context.CurRegion)) { 1220 auto *PHI = dyn_cast<PHINode>(OpInst); 1221 if (PHI) { 1222 for (User *U : PHI->users()) { 1223 auto *UI = dyn_cast<Instruction>(U); 1224 if (!UI || !UI->isTerminator()) 1225 return false; 1226 } 1227 } else { 1228 return false; 1229 } 1230 } 1231 } 1232 1233 if (isa<LandingPadInst>(&Inst) || isa<ResumeInst>(&Inst)) 1234 return false; 1235 1236 // We only check the call instruction but not invoke instruction. 1237 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1238 if (isValidCallInst(*CI, Context)) 1239 return true; 1240 1241 return invalid<ReportFuncCall>(Context, /*Assert=*/true, &Inst); 1242 } 1243 1244 if (!Inst.mayReadOrWriteMemory()) { 1245 if (!isa<AllocaInst>(Inst)) 1246 return true; 1247 1248 return invalid<ReportAlloca>(Context, /*Assert=*/true, &Inst); 1249 } 1250 1251 // Check the access function. 1252 if (auto MemInst = MemAccInst::dyn_cast(Inst)) { 1253 Context.hasStores |= isa<StoreInst>(MemInst); 1254 Context.hasLoads |= isa<LoadInst>(MemInst); 1255 if (!MemInst.isSimple()) 1256 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true, 1257 &Inst); 1258 1259 return isValidMemoryAccess(MemInst, Context); 1260 } 1261 1262 // We do not know this instruction, therefore we assume it is invalid. 1263 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, &Inst); 1264 } 1265 1266 /// Check whether @p L has exiting blocks. 1267 /// 1268 /// @param L The loop of interest 1269 /// 1270 /// @return True if the loop has exiting blocks, false otherwise. 1271 static bool hasExitingBlocks(Loop *L) { 1272 SmallVector<BasicBlock *, 4> ExitingBlocks; 1273 L->getExitingBlocks(ExitingBlocks); 1274 return !ExitingBlocks.empty(); 1275 } 1276 1277 bool ScopDetection::canUseISLTripCount(Loop *L, DetectionContext &Context) { 1278 // FIXME: Yes, this is bad. isValidCFG() may call invalid<Reason>() which 1279 // causes the SCoP to be rejected regardless on whether non-ISL trip counts 1280 // could be used. We currently preserve the legacy behaviour of rejecting 1281 // based on Context.Log.size() added by isValidCFG() or before, regardless on 1282 // whether the ISL trip count can be used or can be used as a non-affine 1283 // region. However, we allow rejections by isValidCFG() that do not result in 1284 // an error log entry. 1285 bool OldIsInvalid = Context.IsInvalid; 1286 1287 // Ensure the loop has valid exiting blocks as well as latches, otherwise we 1288 // need to overapproximate it as a boxed loop. 1289 SmallVector<BasicBlock *, 4> LoopControlBlocks; 1290 L->getExitingBlocks(LoopControlBlocks); 1291 L->getLoopLatches(LoopControlBlocks); 1292 for (BasicBlock *ControlBB : LoopControlBlocks) { 1293 if (!isValidCFG(*ControlBB, true, false, Context)) { 1294 Context.IsInvalid = OldIsInvalid || Context.Log.size(); 1295 return false; 1296 } 1297 } 1298 1299 // We can use ISL to compute the trip count of L. 1300 Context.IsInvalid = OldIsInvalid || Context.Log.size(); 1301 return true; 1302 } 1303 1304 bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) { 1305 // Loops that contain part but not all of the blocks of a region cannot be 1306 // handled by the schedule generation. Such loop constructs can happen 1307 // because a region can contain BBs that have no path to the exit block 1308 // (Infinite loops, UnreachableInst), but such blocks are never part of a 1309 // loop. 1310 // 1311 // _______________ 1312 // | Loop Header | <-----------. 1313 // --------------- | 1314 // | | 1315 // _______________ ______________ 1316 // | RegionEntry |-----> | RegionExit |-----> 1317 // --------------- -------------- 1318 // | 1319 // _______________ 1320 // | EndlessLoop | <--. 1321 // --------------- | 1322 // | | 1323 // \------------/ 1324 // 1325 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is 1326 // neither entirely contained in the region RegionEntry->RegionExit 1327 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained 1328 // in the loop. 1329 // The block EndlessLoop is contained in the region because Region::contains 1330 // tests whether it is not dominated by RegionExit. This is probably to not 1331 // having to query the PostdominatorTree. Instead of an endless loop, a dead 1332 // end can also be formed by an UnreachableInst. This case is already caught 1333 // by isErrorBlock(). We hence only have to reject endless loops here. 1334 if (!hasExitingBlocks(L)) 1335 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, L); 1336 1337 // The algorithm for domain construction assumes that loops has only a single 1338 // exit block (and hence corresponds to a subregion). Note that we cannot use 1339 // L->getExitBlock() because it does not check whether all exiting edges point 1340 // to the same BB. 1341 SmallVector<BasicBlock *, 4> ExitBlocks; 1342 L->getExitBlocks(ExitBlocks); 1343 BasicBlock *TheExitBlock = ExitBlocks[0]; 1344 for (BasicBlock *ExitBB : ExitBlocks) { 1345 if (TheExitBlock != ExitBB) 1346 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, L); 1347 } 1348 1349 if (canUseISLTripCount(L, Context)) 1350 return true; 1351 1352 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) { 1353 Region *R = RI.getRegionFor(L->getHeader()); 1354 while (R != &Context.CurRegion && !R->contains(L)) 1355 R = R->getParent(); 1356 1357 if (addOverApproximatedRegion(R, Context)) 1358 return true; 1359 } 1360 1361 const SCEV *LoopCount = SE.getBackedgeTakenCount(L); 1362 return invalid<ReportLoopBound>(Context, /*Assert=*/true, L, LoopCount); 1363 } 1364 1365 /// Return the number of loops in @p L (incl. @p L) that have a trip 1366 /// count that is not known to be less than @MinProfitableTrips. 1367 ScopDetection::LoopStats 1368 ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE, 1369 unsigned MinProfitableTrips) { 1370 const SCEV *TripCount = SE.getBackedgeTakenCount(L); 1371 1372 int NumLoops = 1; 1373 int MaxLoopDepth = 1; 1374 if (MinProfitableTrips > 0) 1375 if (auto *TripCountC = dyn_cast<SCEVConstant>(TripCount)) 1376 if (TripCountC->getType()->getScalarSizeInBits() <= 64) 1377 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips) 1378 NumLoops -= 1; 1379 1380 for (auto &SubLoop : *L) { 1381 LoopStats Stats = countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1382 NumLoops += Stats.NumLoops; 1383 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth + 1); 1384 } 1385 1386 return {NumLoops, MaxLoopDepth}; 1387 } 1388 1389 ScopDetection::LoopStats 1390 ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE, 1391 LoopInfo &LI, unsigned MinProfitableTrips) { 1392 int LoopNum = 0; 1393 int MaxLoopDepth = 0; 1394 1395 auto L = LI.getLoopFor(R->getEntry()); 1396 1397 // If L is fully contained in R, move to first loop surrounding R. Otherwise, 1398 // L is either nullptr or already surrounding R. 1399 if (L && R->contains(L)) { 1400 L = R->outermostLoopInRegion(L); 1401 L = L->getParentLoop(); 1402 } 1403 1404 auto SubLoops = 1405 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end()); 1406 1407 for (auto &SubLoop : SubLoops) 1408 if (R->contains(SubLoop)) { 1409 LoopStats Stats = 1410 countBeneficialSubLoops(SubLoop, SE, MinProfitableTrips); 1411 LoopNum += Stats.NumLoops; 1412 MaxLoopDepth = std::max(MaxLoopDepth, Stats.MaxDepth); 1413 } 1414 1415 return {LoopNum, MaxLoopDepth}; 1416 } 1417 1418 static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI, 1419 const DominatorTree &DT) { 1420 if (isa<UnreachableInst>(BB.getTerminator())) 1421 return true; 1422 1423 if (LI.isLoopHeader(&BB)) 1424 return false; 1425 1426 // Don't consider something outside the SCoP as error block. It will precede 1427 // the code versioning runtime check. 1428 if (!R.contains(&BB)) 1429 return false; 1430 1431 // Basic blocks that are always executed are not considered error blocks, 1432 // as their execution can not be a rare event. 1433 bool DominatesAllPredecessors = true; 1434 if (R.isTopLevelRegion()) { 1435 for (BasicBlock &I : *R.getEntry()->getParent()) { 1436 if (isa<ReturnInst>(I.getTerminator()) && !DT.dominates(&BB, &I)) { 1437 DominatesAllPredecessors = false; 1438 break; 1439 } 1440 } 1441 } else { 1442 for (auto Pred : predecessors(R.getExit())) { 1443 if (R.contains(Pred) && !DT.dominates(&BB, Pred)) { 1444 DominatesAllPredecessors = false; 1445 break; 1446 } 1447 } 1448 } 1449 1450 if (DominatesAllPredecessors) 1451 return false; 1452 1453 for (Instruction &Inst : BB) 1454 if (CallInst *CI = dyn_cast<CallInst>(&Inst)) { 1455 if (isDebugCall(CI)) 1456 continue; 1457 1458 if (isIgnoredIntrinsic(CI)) 1459 continue; 1460 1461 // memset, memcpy and memmove are modeled intrinsics. 1462 if (isa<MemSetInst>(CI) || isa<MemTransferInst>(CI)) 1463 continue; 1464 1465 if (!CI->doesNotAccessMemory()) 1466 return true; 1467 if (CI->doesNotReturn()) 1468 return true; 1469 } 1470 1471 return false; 1472 } 1473 1474 bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) { 1475 if (!PollyAllowErrorBlocks) 1476 return false; 1477 1478 auto It = ErrorBlockCache.insert({std::make_pair(&BB, &R), false}); 1479 if (!It.second) 1480 return It.first->getSecond(); 1481 1482 bool Result = isErrorBlockImpl(BB, R, LI, DT); 1483 It.first->second = Result; 1484 return Result; 1485 } 1486 1487 Region *ScopDetection::expandRegion(Region &R) { 1488 // Initial no valid region was found (greater than R) 1489 std::unique_ptr<Region> LastValidRegion; 1490 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion()); 1491 1492 POLLY_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); 1493 1494 while (ExpandedRegion) { 1495 BBPair P = getBBPairForRegion(ExpandedRegion.get()); 1496 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P]; 1497 Entry = std::make_unique<DetectionContext>(*ExpandedRegion, AA, 1498 /*Verifying=*/false); 1499 DetectionContext &Context = *Entry.get(); 1500 1501 POLLY_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() 1502 << "\n"); 1503 // Only expand when we did not collect errors. 1504 1505 if (!Context.Log.hasErrors()) { 1506 // If the exit is valid check all blocks 1507 // - if true, a valid region was found => store it + keep expanding 1508 // - if false, .tbd. => stop (should this really end the loop?) 1509 if (!allBlocksValid(Context) || Context.Log.hasErrors()) { 1510 removeCachedResults(*ExpandedRegion); 1511 DetectionContextMap.erase(P); 1512 break; 1513 } 1514 1515 // Store this region, because it is the greatest valid (encountered so 1516 // far). 1517 if (LastValidRegion) { 1518 removeCachedResults(*LastValidRegion); 1519 DetectionContextMap.erase(P); 1520 } 1521 LastValidRegion = std::move(ExpandedRegion); 1522 1523 // Create and test the next greater region (if any) 1524 ExpandedRegion = 1525 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion()); 1526 1527 } else { 1528 // Create and test the next greater region (if any) 1529 removeCachedResults(*ExpandedRegion); 1530 DetectionContextMap.erase(P); 1531 ExpandedRegion = 1532 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion()); 1533 } 1534 } 1535 1536 POLLY_DEBUG({ 1537 if (LastValidRegion) 1538 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n"; 1539 else 1540 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n"; 1541 }); 1542 1543 return LastValidRegion.release(); 1544 } 1545 1546 static bool regionWithoutLoops(Region &R, LoopInfo &LI) { 1547 for (const BasicBlock *BB : R.blocks()) 1548 if (R.contains(LI.getLoopFor(BB))) 1549 return false; 1550 1551 return true; 1552 } 1553 1554 void ScopDetection::removeCachedResultsRecursively(const Region &R) { 1555 for (auto &SubRegion : R) { 1556 if (ValidRegions.count(SubRegion.get())) { 1557 removeCachedResults(*SubRegion.get()); 1558 } else 1559 removeCachedResultsRecursively(*SubRegion); 1560 } 1561 } 1562 1563 void ScopDetection::removeCachedResults(const Region &R) { 1564 ValidRegions.remove(&R); 1565 } 1566 1567 void ScopDetection::findScops(Region &R) { 1568 std::unique_ptr<DetectionContext> &Entry = 1569 DetectionContextMap[getBBPairForRegion(&R)]; 1570 Entry = std::make_unique<DetectionContext>(R, AA, /*Verifying=*/false); 1571 DetectionContext &Context = *Entry.get(); 1572 1573 bool DidBailout = true; 1574 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI)) 1575 invalid<ReportUnprofitable>(Context, /*Assert=*/true, &R); 1576 else 1577 DidBailout = !isValidRegion(Context); 1578 1579 (void)DidBailout; 1580 if (KeepGoing) { 1581 assert((!DidBailout || Context.IsInvalid) && 1582 "With -polly-detect-keep-going, it is sufficient that if " 1583 "isValidRegion short-circuited, that SCoP is invalid"); 1584 } else { 1585 assert(DidBailout == Context.IsInvalid && 1586 "isValidRegion must short-circuit iff the ScoP is invalid"); 1587 } 1588 1589 if (Context.IsInvalid) { 1590 removeCachedResults(R); 1591 } else { 1592 ValidRegions.insert(&R); 1593 return; 1594 } 1595 1596 for (auto &SubRegion : R) 1597 findScops(*SubRegion); 1598 1599 // Try to expand regions. 1600 // 1601 // As the region tree normally only contains canonical regions, non canonical 1602 // regions that form a Scop are not found. Therefore, those non canonical 1603 // regions are checked by expanding the canonical ones. 1604 1605 std::vector<Region *> ToExpand; 1606 1607 for (auto &SubRegion : R) 1608 ToExpand.push_back(SubRegion.get()); 1609 1610 for (Region *CurrentRegion : ToExpand) { 1611 // Skip invalid regions. Regions may become invalid, if they are element of 1612 // an already expanded region. 1613 if (!ValidRegions.count(CurrentRegion)) 1614 continue; 1615 1616 // Skip regions that had errors. 1617 bool HadErrors = lookupRejectionLog(CurrentRegion)->hasErrors(); 1618 if (HadErrors) 1619 continue; 1620 1621 Region *ExpandedR = expandRegion(*CurrentRegion); 1622 1623 if (!ExpandedR) 1624 continue; 1625 1626 R.addSubRegion(ExpandedR, true); 1627 ValidRegions.insert(ExpandedR); 1628 removeCachedResults(*CurrentRegion); 1629 removeCachedResultsRecursively(*ExpandedR); 1630 } 1631 } 1632 1633 bool ScopDetection::allBlocksValid(DetectionContext &Context) { 1634 Region &CurRegion = Context.CurRegion; 1635 1636 for (const BasicBlock *BB : CurRegion.blocks()) { 1637 Loop *L = LI.getLoopFor(BB); 1638 if (L && L->getHeader() == BB) { 1639 if (CurRegion.contains(L)) { 1640 if (!isValidLoop(L, Context)) { 1641 Context.IsInvalid = true; 1642 if (!KeepGoing) 1643 return false; 1644 } 1645 } else { 1646 SmallVector<BasicBlock *, 1> Latches; 1647 L->getLoopLatches(Latches); 1648 for (BasicBlock *Latch : Latches) 1649 if (CurRegion.contains(Latch)) 1650 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true, 1651 L); 1652 } 1653 } 1654 } 1655 1656 for (BasicBlock *BB : CurRegion.blocks()) { 1657 bool IsErrorBlock = isErrorBlock(*BB, CurRegion); 1658 1659 // Also check exception blocks (and possibly register them as non-affine 1660 // regions). Even though exception blocks are not modeled, we use them 1661 // to forward-propagate domain constraints during ScopInfo construction. 1662 if (!isValidCFG(*BB, false, IsErrorBlock, Context) && !KeepGoing) 1663 return false; 1664 1665 if (IsErrorBlock) 1666 continue; 1667 1668 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) 1669 if (!isValidInstruction(*I, Context)) { 1670 Context.IsInvalid = true; 1671 if (!KeepGoing) 1672 return false; 1673 } 1674 } 1675 1676 if (!hasAffineMemoryAccesses(Context)) 1677 return false; 1678 1679 return true; 1680 } 1681 1682 bool ScopDetection::hasSufficientCompute(DetectionContext &Context, 1683 int NumLoops) const { 1684 int InstCount = 0; 1685 1686 if (NumLoops == 0) 1687 return false; 1688 1689 for (auto *BB : Context.CurRegion.blocks()) 1690 if (Context.CurRegion.contains(LI.getLoopFor(BB))) 1691 InstCount += BB->size(); 1692 1693 InstCount = InstCount / NumLoops; 1694 1695 return InstCount >= ProfitabilityMinPerLoopInstructions; 1696 } 1697 1698 bool ScopDetection::hasPossiblyDistributableLoop( 1699 DetectionContext &Context) const { 1700 for (auto *BB : Context.CurRegion.blocks()) { 1701 auto *L = LI.getLoopFor(BB); 1702 if (!L) 1703 continue; 1704 if (!Context.CurRegion.contains(L)) 1705 continue; 1706 if (Context.BoxedLoopsSet.count(L)) 1707 continue; 1708 unsigned StmtsWithStoresInLoops = 0; 1709 for (auto *LBB : L->blocks()) { 1710 bool MemStore = false; 1711 for (auto &I : *LBB) 1712 MemStore |= isa<StoreInst>(&I); 1713 StmtsWithStoresInLoops += MemStore; 1714 } 1715 return (StmtsWithStoresInLoops > 1); 1716 } 1717 return false; 1718 } 1719 1720 bool ScopDetection::isProfitableRegion(DetectionContext &Context) const { 1721 Region &CurRegion = Context.CurRegion; 1722 1723 if (PollyProcessUnprofitable) 1724 return true; 1725 1726 // We can probably not do a lot on scops that only write or only read 1727 // data. 1728 if (!Context.hasStores || !Context.hasLoads) 1729 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1730 1731 int NumLoops = 1732 countBeneficialLoops(&CurRegion, SE, LI, MIN_LOOP_TRIP_COUNT).NumLoops; 1733 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size(); 1734 1735 // Scops with at least two loops may allow either loop fusion or tiling and 1736 // are consequently interesting to look at. 1737 if (NumAffineLoops >= 2) 1738 return true; 1739 1740 // A loop with multiple non-trivial blocks might be amendable to distribution. 1741 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context)) 1742 return true; 1743 1744 // Scops that contain a loop with a non-trivial amount of computation per 1745 // loop-iteration are interesting as we may be able to parallelize such 1746 // loops. Individual loops that have only a small amount of computation 1747 // per-iteration are performance-wise very fragile as any change to the 1748 // loop induction variables may affect performance. To not cause spurious 1749 // performance regressions, we do not consider such loops. 1750 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops)) 1751 return true; 1752 1753 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, &CurRegion); 1754 } 1755 1756 bool ScopDetection::isValidRegion(DetectionContext &Context) { 1757 Region &CurRegion = Context.CurRegion; 1758 1759 POLLY_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() 1760 << "\n\t"); 1761 1762 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) { 1763 POLLY_DEBUG(dbgs() << "Top level region is invalid\n"); 1764 Context.IsInvalid = true; 1765 return false; 1766 } 1767 1768 DebugLoc DbgLoc; 1769 if (CurRegion.getExit() && 1770 isa<UnreachableInst>(CurRegion.getExit()->getTerminator())) { 1771 POLLY_DEBUG(dbgs() << "Unreachable in exit\n"); 1772 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true, 1773 CurRegion.getExit(), DbgLoc); 1774 } 1775 1776 if (!OnlyRegion.empty() && 1777 !CurRegion.getEntry()->getName().count(OnlyRegion)) { 1778 POLLY_DEBUG({ 1779 dbgs() << "Region entry does not match -polly-only-region"; 1780 dbgs() << "\n"; 1781 }); 1782 Context.IsInvalid = true; 1783 return false; 1784 } 1785 1786 for (BasicBlock *Pred : predecessors(CurRegion.getEntry())) { 1787 Instruction *PredTerm = Pred->getTerminator(); 1788 if (isa<IndirectBrInst>(PredTerm) || isa<CallBrInst>(PredTerm)) 1789 return invalid<ReportIndirectPredecessor>( 1790 Context, /*Assert=*/true, PredTerm, PredTerm->getDebugLoc()); 1791 } 1792 1793 // SCoP cannot contain the entry block of the function, because we need 1794 // to insert alloca instruction there when translate scalar to array. 1795 if (!PollyAllowFullFunction && 1796 CurRegion.getEntry() == 1797 &(CurRegion.getEntry()->getParent()->getEntryBlock())) 1798 return invalid<ReportEntry>(Context, /*Assert=*/true, CurRegion.getEntry()); 1799 1800 if (!allBlocksValid(Context)) { 1801 // TODO: Every failure condition within allBlocksValid should call 1802 // invalid<Reason>(). Otherwise we reject SCoPs without giving feedback to 1803 // the user. 1804 Context.IsInvalid = true; 1805 return false; 1806 } 1807 1808 if (!isReducibleRegion(CurRegion, DbgLoc)) 1809 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true, 1810 &CurRegion, DbgLoc); 1811 1812 POLLY_DEBUG(dbgs() << "OK\n"); 1813 return true; 1814 } 1815 1816 void ScopDetection::markFunctionAsInvalid(Function *F) { 1817 F->addFnAttr(PollySkipFnAttr); 1818 } 1819 1820 bool ScopDetection::isValidFunction(Function &F) { 1821 return !F.hasFnAttribute(PollySkipFnAttr); 1822 } 1823 1824 void ScopDetection::printLocations(Function &F) { 1825 for (const Region *R : *this) { 1826 unsigned LineEntry, LineExit; 1827 std::string FileName; 1828 1829 getDebugLocation(R, LineEntry, LineExit, FileName); 1830 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit); 1831 F.getContext().diagnose(Diagnostic); 1832 } 1833 } 1834 1835 void ScopDetection::emitMissedRemarks(const Function &F) { 1836 for (auto &DIt : DetectionContextMap) { 1837 DetectionContext &DC = *DIt.getSecond().get(); 1838 if (DC.Log.hasErrors()) 1839 emitRejectionRemarks(DIt.getFirst(), DC.Log, ORE); 1840 } 1841 } 1842 1843 bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const { 1844 /// Enum for coloring BBs in Region. 1845 /// 1846 /// WHITE - Unvisited BB in DFS walk. 1847 /// GREY - BBs which are currently on the DFS stack for processing. 1848 /// BLACK - Visited and completely processed BB. 1849 enum Color { WHITE, GREY, BLACK }; 1850 1851 BasicBlock *REntry = R.getEntry(); 1852 BasicBlock *RExit = R.getExit(); 1853 // Map to match the color of a BasicBlock during the DFS walk. 1854 DenseMap<const BasicBlock *, Color> BBColorMap; 1855 // Stack keeping track of current BB and index of next child to be processed. 1856 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack; 1857 1858 unsigned AdjacentBlockIndex = 0; 1859 BasicBlock *CurrBB, *SuccBB; 1860 CurrBB = REntry; 1861 1862 // Initialize the map for all BB with WHITE color. 1863 for (auto *BB : R.blocks()) 1864 BBColorMap[BB] = WHITE; 1865 1866 // Process the entry block of the Region. 1867 BBColorMap[CurrBB] = GREY; 1868 DFSStack.push(std::make_pair(CurrBB, 0)); 1869 1870 while (!DFSStack.empty()) { 1871 // Get next BB on stack to be processed. 1872 CurrBB = DFSStack.top().first; 1873 AdjacentBlockIndex = DFSStack.top().second; 1874 DFSStack.pop(); 1875 1876 // Loop to iterate over the successors of current BB. 1877 const Instruction *TInst = CurrBB->getTerminator(); 1878 unsigned NSucc = TInst->getNumSuccessors(); 1879 for (unsigned I = AdjacentBlockIndex; I < NSucc; 1880 ++I, ++AdjacentBlockIndex) { 1881 SuccBB = TInst->getSuccessor(I); 1882 1883 // Checks for region exit block and self-loops in BB. 1884 if (SuccBB == RExit || SuccBB == CurrBB) 1885 continue; 1886 1887 // WHITE indicates an unvisited BB in DFS walk. 1888 if (BBColorMap[SuccBB] == WHITE) { 1889 // Push the current BB and the index of the next child to be visited. 1890 DFSStack.push(std::make_pair(CurrBB, I + 1)); 1891 // Push the next BB to be processed. 1892 DFSStack.push(std::make_pair(SuccBB, 0)); 1893 // First time the BB is being processed. 1894 BBColorMap[SuccBB] = GREY; 1895 break; 1896 } else if (BBColorMap[SuccBB] == GREY) { 1897 // GREY indicates a loop in the control flow. 1898 // If the destination dominates the source, it is a natural loop 1899 // else, an irreducible control flow in the region is detected. 1900 if (!DT.dominates(SuccBB, CurrBB)) { 1901 // Get debug info of instruction which causes irregular control flow. 1902 DbgLoc = TInst->getDebugLoc(); 1903 return false; 1904 } 1905 } 1906 } 1907 1908 // If all children of current BB have been processed, 1909 // then mark that BB as fully processed. 1910 if (AdjacentBlockIndex == NSucc) 1911 BBColorMap[CurrBB] = BLACK; 1912 } 1913 1914 return true; 1915 } 1916 1917 static void updateLoopCountStatistic(ScopDetection::LoopStats Stats, 1918 bool OnlyProfitable) { 1919 if (!OnlyProfitable) { 1920 NumLoopsInScop += Stats.NumLoops; 1921 MaxNumLoopsInScop = 1922 std::max(MaxNumLoopsInScop.getValue(), (uint64_t)Stats.NumLoops); 1923 if (Stats.MaxDepth == 0) 1924 NumScopsDepthZero++; 1925 else if (Stats.MaxDepth == 1) 1926 NumScopsDepthOne++; 1927 else if (Stats.MaxDepth == 2) 1928 NumScopsDepthTwo++; 1929 else if (Stats.MaxDepth == 3) 1930 NumScopsDepthThree++; 1931 else if (Stats.MaxDepth == 4) 1932 NumScopsDepthFour++; 1933 else if (Stats.MaxDepth == 5) 1934 NumScopsDepthFive++; 1935 else 1936 NumScopsDepthLarger++; 1937 } else { 1938 NumLoopsInProfScop += Stats.NumLoops; 1939 MaxNumLoopsInProfScop = 1940 std::max(MaxNumLoopsInProfScop.getValue(), (uint64_t)Stats.NumLoops); 1941 if (Stats.MaxDepth == 0) 1942 NumProfScopsDepthZero++; 1943 else if (Stats.MaxDepth == 1) 1944 NumProfScopsDepthOne++; 1945 else if (Stats.MaxDepth == 2) 1946 NumProfScopsDepthTwo++; 1947 else if (Stats.MaxDepth == 3) 1948 NumProfScopsDepthThree++; 1949 else if (Stats.MaxDepth == 4) 1950 NumProfScopsDepthFour++; 1951 else if (Stats.MaxDepth == 5) 1952 NumProfScopsDepthFive++; 1953 else 1954 NumProfScopsDepthLarger++; 1955 } 1956 } 1957 1958 ScopDetection::DetectionContext * 1959 ScopDetection::getDetectionContext(const Region *R) const { 1960 auto DCMIt = DetectionContextMap.find(getBBPairForRegion(R)); 1961 if (DCMIt == DetectionContextMap.end()) 1962 return nullptr; 1963 return DCMIt->second.get(); 1964 } 1965 1966 const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const { 1967 const DetectionContext *DC = getDetectionContext(R); 1968 return DC ? &DC->Log : nullptr; 1969 } 1970 1971 void ScopDetection::verifyRegion(const Region &R) { 1972 assert(isMaxRegionInScop(R) && "Expect R is a valid region."); 1973 1974 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/); 1975 isValidRegion(Context); 1976 } 1977 1978 void ScopDetection::verifyAnalysis() { 1979 if (!VerifyScops) 1980 return; 1981 1982 for (const Region *R : ValidRegions) 1983 verifyRegion(*R); 1984 } 1985 1986 bool ScopDetectionWrapperPass::runOnFunction(Function &F) { 1987 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1988 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1989 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 1990 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1991 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1992 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 1993 1994 Result = std::make_unique<ScopDetection>(DT, SE, LI, RI, AA, ORE); 1995 Result->detect(F); 1996 return false; 1997 } 1998 1999 void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 2000 AU.addRequired<LoopInfoWrapperPass>(); 2001 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>(); 2002 AU.addRequired<DominatorTreeWrapperPass>(); 2003 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 2004 // We also need AA and RegionInfo when we are verifying analysis. 2005 AU.addRequiredTransitive<AAResultsWrapperPass>(); 2006 AU.addRequiredTransitive<RegionInfoPass>(); 2007 AU.setPreservesAll(); 2008 } 2009 2010 void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const { 2011 for (const Region *R : Result->ValidRegions) 2012 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 2013 2014 OS << "\n"; 2015 } 2016 2017 ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) { 2018 // Disable runtime alias checks if we ignore aliasing all together. 2019 if (IgnoreAliasing) 2020 PollyUseRuntimeAliasChecks = false; 2021 } 2022 2023 ScopAnalysis::ScopAnalysis() { 2024 // Disable runtime alias checks if we ignore aliasing all together. 2025 if (IgnoreAliasing) 2026 PollyUseRuntimeAliasChecks = false; 2027 } 2028 2029 void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); } 2030 2031 char ScopDetectionWrapperPass::ID; 2032 2033 AnalysisKey ScopAnalysis::Key; 2034 2035 ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { 2036 auto &LI = FAM.getResult<LoopAnalysis>(F); 2037 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2038 auto &AA = FAM.getResult<AAManager>(F); 2039 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 2040 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2041 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2042 2043 ScopDetection Result(DT, SE, LI, RI, AA, ORE); 2044 Result.detect(F); 2045 return Result; 2046 } 2047 2048 PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F, 2049 FunctionAnalysisManager &FAM) { 2050 OS << "Detected Scops in Function " << F.getName() << "\n"; 2051 auto &SD = FAM.getResult<ScopAnalysis>(F); 2052 for (const Region *R : SD.ValidRegions) 2053 OS << "Valid Region for Scop: " << R->getNameStr() << '\n'; 2054 2055 OS << "\n"; 2056 return PreservedAnalyses::all(); 2057 } 2058 2059 Pass *polly::createScopDetectionWrapperPassPass() { 2060 return new ScopDetectionWrapperPass(); 2061 } 2062 2063 INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect", 2064 "Polly - Detect static control parts (SCoPs)", false, 2065 false); 2066 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); 2067 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 2068 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 2069 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 2070 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 2071 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); 2072 INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect", 2073 "Polly - Detect static control parts (SCoPs)", false, false) 2074 2075 //===----------------------------------------------------------------------===// 2076 2077 namespace { 2078 /// Print result from ScopDetectionWrapperPass. 2079 class ScopDetectionPrinterLegacyPass final : public FunctionPass { 2080 public: 2081 static char ID; 2082 2083 ScopDetectionPrinterLegacyPass() : ScopDetectionPrinterLegacyPass(outs()) {} 2084 2085 explicit ScopDetectionPrinterLegacyPass(llvm::raw_ostream &OS) 2086 : FunctionPass(ID), OS(OS) {} 2087 2088 bool runOnFunction(Function &F) override { 2089 ScopDetectionWrapperPass &P = getAnalysis<ScopDetectionWrapperPass>(); 2090 2091 OS << "Printing analysis '" << P.getPassName() << "' for function '" 2092 << F.getName() << "':\n"; 2093 P.print(OS); 2094 2095 return false; 2096 } 2097 2098 void getAnalysisUsage(AnalysisUsage &AU) const override { 2099 FunctionPass::getAnalysisUsage(AU); 2100 AU.addRequired<ScopDetectionWrapperPass>(); 2101 AU.setPreservesAll(); 2102 } 2103 2104 private: 2105 llvm::raw_ostream &OS; 2106 }; 2107 2108 char ScopDetectionPrinterLegacyPass::ID = 0; 2109 } // namespace 2110 2111 Pass *polly::createScopDetectionPrinterLegacyPass(raw_ostream &OS) { 2112 return new ScopDetectionPrinterLegacyPass(OS); 2113 } 2114 2115 INITIALIZE_PASS_BEGIN(ScopDetectionPrinterLegacyPass, "polly-print-detect", 2116 "Polly - Print static control parts (SCoPs)", false, 2117 false); 2118 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass); 2119 INITIALIZE_PASS_END(ScopDetectionPrinterLegacyPass, "polly-print-detect", 2120 "Polly - Print static control parts (SCoPs)", false, false) 2121