1 //===- ScopBuilder.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Create a polyhedral description for a static control flow region. 10 // 11 // The pass creates a polyhedral description of the Scops detected by the SCoP 12 // detection derived from their LLVM-IR code. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "polly/ScopBuilder.h" 17 #include "polly/Options.h" 18 #include "polly/ScopDetection.h" 19 #include "polly/ScopInfo.h" 20 #include "polly/Support/GICHelper.h" 21 #include "polly/Support/ISLTools.h" 22 #include "polly/Support/SCEVValidator.h" 23 #include "polly/Support/ScopHelper.h" 24 #include "polly/Support/VirtualInstruction.h" 25 #include "llvm/ADT/ArrayRef.h" 26 #include "llvm/ADT/EquivalenceClasses.h" 27 #include "llvm/ADT/PostOrderIterator.h" 28 #include "llvm/ADT/Sequence.h" 29 #include "llvm/ADT/SmallSet.h" 30 #include "llvm/ADT/Statistic.h" 31 #include "llvm/Analysis/AliasAnalysis.h" 32 #include "llvm/Analysis/AssumptionCache.h" 33 #include "llvm/Analysis/Delinearization.h" 34 #include "llvm/Analysis/Loads.h" 35 #include "llvm/Analysis/LoopInfo.h" 36 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 37 #include "llvm/Analysis/RegionInfo.h" 38 #include "llvm/Analysis/RegionIterator.h" 39 #include "llvm/Analysis/ScalarEvolution.h" 40 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 41 #include "llvm/IR/BasicBlock.h" 42 #include "llvm/IR/DataLayout.h" 43 #include "llvm/IR/DebugLoc.h" 44 #include "llvm/IR/DerivedTypes.h" 45 #include "llvm/IR/Dominators.h" 46 #include "llvm/IR/Function.h" 47 #include "llvm/IR/InstrTypes.h" 48 #include "llvm/IR/Instruction.h" 49 #include "llvm/IR/Instructions.h" 50 #include "llvm/IR/Type.h" 51 #include "llvm/IR/Use.h" 52 #include "llvm/IR/Value.h" 53 #include "llvm/Support/CommandLine.h" 54 #include "llvm/Support/Compiler.h" 55 #include "llvm/Support/Debug.h" 56 #include "llvm/Support/ErrorHandling.h" 57 #include "llvm/Support/raw_ostream.h" 58 #include <cassert> 59 60 using namespace llvm; 61 using namespace polly; 62 63 #include "polly/Support/PollyDebug.h" 64 #define DEBUG_TYPE "polly-scops" 65 66 STATISTIC(ScopFound, "Number of valid Scops"); 67 STATISTIC(RichScopFound, "Number of Scops containing a loop"); 68 STATISTIC(InfeasibleScops, 69 "Number of SCoPs with statically infeasible context."); 70 71 bool polly::ModelReadOnlyScalars; 72 73 // The maximal number of dimensions we allow during invariant load construction. 74 // More complex access ranges will result in very high compile time and are also 75 // unlikely to result in good code. This value is very high and should only 76 // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006). 77 static unsigned const MaxDimensionsInAccessRange = 9; 78 79 static cl::opt<bool, true> XModelReadOnlyScalars( 80 "polly-analyze-read-only-scalars", 81 cl::desc("Model read-only scalar values in the scop description"), 82 cl::location(ModelReadOnlyScalars), cl::Hidden, cl::init(true), 83 cl::cat(PollyCategory)); 84 85 static cl::opt<int> 86 OptComputeOut("polly-analysis-computeout", 87 cl::desc("Bound the scop analysis by a maximal amount of " 88 "computational steps (0 means no bound)"), 89 cl::Hidden, cl::init(800000), cl::cat(PollyCategory)); 90 91 static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams( 92 "polly-allow-dereference-of-all-function-parameters", 93 cl::desc( 94 "Treat all parameters to functions that are pointers as dereferencible." 95 " This is useful for invariant load hoisting, since we can generate" 96 " less runtime checks. This is only valid if all pointers to functions" 97 " are always initialized, so that Polly can choose to hoist" 98 " their loads. "), 99 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 100 101 static cl::opt<bool> 102 PollyIgnoreInbounds("polly-ignore-inbounds", 103 cl::desc("Do not take inbounds assumptions at all"), 104 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 105 106 static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup( 107 "polly-rtc-max-arrays-per-group", 108 cl::desc("The maximal number of arrays to compare in each alias group."), 109 cl::Hidden, cl::init(20), cl::cat(PollyCategory)); 110 111 static cl::opt<unsigned> RunTimeChecksMaxAccessDisjuncts( 112 "polly-rtc-max-array-disjuncts", 113 cl::desc("The maximal number of disjunts allowed in memory accesses to " 114 "to build RTCs."), 115 cl::Hidden, cl::init(8), cl::cat(PollyCategory)); 116 117 static cl::opt<unsigned> RunTimeChecksMaxParameters( 118 "polly-rtc-max-parameters", 119 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden, 120 cl::init(8), cl::cat(PollyCategory)); 121 122 static cl::opt<bool> UnprofitableScalarAccs( 123 "polly-unprofitable-scalar-accs", 124 cl::desc("Count statements with scalar accesses as not optimizable"), 125 cl::Hidden, cl::init(false), cl::cat(PollyCategory)); 126 127 static cl::opt<std::string> UserContextStr( 128 "polly-context", cl::value_desc("isl parameter set"), 129 cl::desc("Provide additional constraints on the context parameters"), 130 cl::init(""), cl::cat(PollyCategory)); 131 132 static cl::opt<bool> DetectReductions("polly-detect-reductions", 133 cl::desc("Detect and exploit reductions"), 134 cl::Hidden, cl::init(true), 135 cl::cat(PollyCategory)); 136 137 // Multiplicative reductions can be disabled separately as these kind of 138 // operations can overflow easily. Additive reductions and bit operations 139 // are in contrast pretty stable. 140 static cl::opt<bool> DisableMultiplicativeReductions( 141 "polly-disable-multiplicative-reductions", 142 cl::desc("Disable multiplicative reductions"), cl::Hidden, 143 cl::cat(PollyCategory)); 144 145 enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores }; 146 147 static cl::opt<GranularityChoice> StmtGranularity( 148 "polly-stmt-granularity", 149 cl::desc( 150 "Algorithm to use for splitting basic blocks into multiple statements"), 151 cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb", 152 "One statement per basic block"), 153 clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep", 154 "Scalar independence heuristic"), 155 clEnumValN(GranularityChoice::Stores, "store", 156 "Store-level granularity")), 157 cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory)); 158 159 /// Helper to treat non-affine regions and basic blocks the same. 160 /// 161 ///{ 162 163 /// Return the block that is the representing block for @p RN. 164 static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) { 165 return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry() 166 : RN->getNodeAs<BasicBlock>(); 167 } 168 169 /// Return the @p idx'th block that is executed after @p RN. 170 static inline BasicBlock * 171 getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx) { 172 if (RN->isSubRegion()) { 173 assert(idx == 0); 174 return RN->getNodeAs<Region>()->getExit(); 175 } 176 return TI->getSuccessor(idx); 177 } 178 179 static bool containsErrorBlock(RegionNode *RN, const Region &R, 180 ScopDetection *SD) { 181 if (!RN->isSubRegion()) 182 return SD->isErrorBlock(*RN->getNodeAs<BasicBlock>(), R); 183 for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks()) 184 if (SD->isErrorBlock(*BB, R)) 185 return true; 186 return false; 187 } 188 189 ///} 190 191 /// Create a map to map from a given iteration to a subsequent iteration. 192 /// 193 /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim 194 /// is incremented by one and all other dimensions are equal, e.g., 195 /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3] 196 /// 197 /// if @p Dim is 2 and @p SetSpace has 4 dimensions. 198 static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) { 199 isl::space MapSpace = SetSpace.map_from_set(); 200 isl::map NextIterationMap = isl::map::universe(MapSpace); 201 for (unsigned u : rangeIslSize(0, NextIterationMap.domain_tuple_dim())) 202 if (u != Dim) 203 NextIterationMap = 204 NextIterationMap.equate(isl::dim::in, u, isl::dim::out, u); 205 isl::constraint C = 206 isl::constraint::alloc_equality(isl::local_space(MapSpace)); 207 C = C.set_constant_si(1); 208 C = C.set_coefficient_si(isl::dim::in, Dim, 1); 209 C = C.set_coefficient_si(isl::dim::out, Dim, -1); 210 NextIterationMap = NextIterationMap.add_constraint(C); 211 return NextIterationMap; 212 } 213 214 /// Add @p BSet to set @p BoundedParts if @p BSet is bounded. 215 static isl::set collectBoundedParts(isl::set S) { 216 isl::set BoundedParts = isl::set::empty(S.get_space()); 217 for (isl::basic_set BSet : S.get_basic_set_list()) 218 if (BSet.is_bounded()) 219 BoundedParts = BoundedParts.unite(isl::set(BSet)); 220 return BoundedParts; 221 } 222 223 /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim. 224 /// 225 /// @returns A separation of @p S into first an unbounded then a bounded subset, 226 /// both with regards to the dimension @p Dim. 227 static std::pair<isl::set, isl::set> partitionSetParts(isl::set S, 228 unsigned Dim) { 229 for (unsigned u : rangeIslSize(0, S.tuple_dim())) 230 S = S.lower_bound_si(isl::dim::set, u, 0); 231 232 unsigned NumDimsS = unsignedFromIslSize(S.tuple_dim()); 233 isl::set OnlyDimS = S; 234 235 // Remove dimensions that are greater than Dim as they are not interesting. 236 assert(NumDimsS >= Dim + 1); 237 OnlyDimS = OnlyDimS.project_out(isl::dim::set, Dim + 1, NumDimsS - Dim - 1); 238 239 // Create artificial parametric upper bounds for dimensions smaller than Dim 240 // as we are not interested in them. 241 OnlyDimS = OnlyDimS.insert_dims(isl::dim::param, 0, Dim); 242 243 for (unsigned u = 0; u < Dim; u++) { 244 isl::constraint C = isl::constraint::alloc_inequality( 245 isl::local_space(OnlyDimS.get_space())); 246 C = C.set_coefficient_si(isl::dim::param, u, 1); 247 C = C.set_coefficient_si(isl::dim::set, u, -1); 248 OnlyDimS = OnlyDimS.add_constraint(C); 249 } 250 251 // Collect all bounded parts of OnlyDimS. 252 isl::set BoundedParts = collectBoundedParts(OnlyDimS); 253 254 // Create the dimensions greater than Dim again. 255 BoundedParts = 256 BoundedParts.insert_dims(isl::dim::set, Dim + 1, NumDimsS - Dim - 1); 257 258 // Remove the artificial upper bound parameters again. 259 BoundedParts = BoundedParts.remove_dims(isl::dim::param, 0, Dim); 260 261 isl::set UnboundedParts = S.subtract(BoundedParts); 262 return std::make_pair(UnboundedParts, BoundedParts); 263 } 264 265 /// Create the conditions under which @p L @p Pred @p R is true. 266 static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L, 267 isl::pw_aff R) { 268 switch (Pred) { 269 case ICmpInst::ICMP_EQ: 270 return L.eq_set(R); 271 case ICmpInst::ICMP_NE: 272 return L.ne_set(R); 273 case ICmpInst::ICMP_SLT: 274 return L.lt_set(R); 275 case ICmpInst::ICMP_SLE: 276 return L.le_set(R); 277 case ICmpInst::ICMP_SGT: 278 return L.gt_set(R); 279 case ICmpInst::ICMP_SGE: 280 return L.ge_set(R); 281 case ICmpInst::ICMP_ULT: 282 return L.lt_set(R); 283 case ICmpInst::ICMP_UGT: 284 return L.gt_set(R); 285 case ICmpInst::ICMP_ULE: 286 return L.le_set(R); 287 case ICmpInst::ICMP_UGE: 288 return L.ge_set(R); 289 default: 290 llvm_unreachable("Non integer predicate not supported"); 291 } 292 } 293 294 isl::set ScopBuilder::adjustDomainDimensions(isl::set Dom, Loop *OldL, 295 Loop *NewL) { 296 // If the loops are the same there is nothing to do. 297 if (NewL == OldL) 298 return Dom; 299 300 int OldDepth = scop->getRelativeLoopDepth(OldL); 301 int NewDepth = scop->getRelativeLoopDepth(NewL); 302 // If both loops are non-affine loops there is nothing to do. 303 if (OldDepth == -1 && NewDepth == -1) 304 return Dom; 305 306 // Distinguish three cases: 307 // 1) The depth is the same but the loops are not. 308 // => One loop was left one was entered. 309 // 2) The depth increased from OldL to NewL. 310 // => One loop was entered, none was left. 311 // 3) The depth decreased from OldL to NewL. 312 // => Loops were left were difference of the depths defines how many. 313 if (OldDepth == NewDepth) { 314 assert(OldL->getParentLoop() == NewL->getParentLoop()); 315 Dom = Dom.project_out(isl::dim::set, NewDepth, 1); 316 Dom = Dom.add_dims(isl::dim::set, 1); 317 } else if (OldDepth < NewDepth) { 318 assert(OldDepth + 1 == NewDepth); 319 auto &R = scop->getRegion(); 320 (void)R; 321 assert(NewL->getParentLoop() == OldL || 322 ((!OldL || !R.contains(OldL)) && R.contains(NewL))); 323 Dom = Dom.add_dims(isl::dim::set, 1); 324 } else { 325 assert(OldDepth > NewDepth); 326 unsigned Diff = OldDepth - NewDepth; 327 unsigned NumDim = unsignedFromIslSize(Dom.tuple_dim()); 328 assert(NumDim >= Diff); 329 Dom = Dom.project_out(isl::dim::set, NumDim - Diff, Diff); 330 } 331 332 return Dom; 333 } 334 335 /// Compute the isl representation for the SCEV @p E in this BB. 336 /// 337 /// @param BB The BB for which isl representation is to be 338 /// computed. 339 /// @param InvalidDomainMap A map of BB to their invalid domains. 340 /// @param E The SCEV that should be translated. 341 /// @param NonNegative Flag to indicate the @p E has to be non-negative. 342 /// 343 /// Note that this function will also adjust the invalid context accordingly. 344 345 __isl_give isl_pw_aff * 346 ScopBuilder::getPwAff(BasicBlock *BB, 347 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, 348 const SCEV *E, bool NonNegative) { 349 PWACtx PWAC = scop->getPwAff(E, BB, NonNegative, &RecordedAssumptions); 350 InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second); 351 return PWAC.first.release(); 352 } 353 354 /// Build condition sets for unsigned ICmpInst(s). 355 /// Special handling is required for unsigned operands to ensure that if 356 /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst 357 /// it should wrap around. 358 /// 359 /// @param IsStrictUpperBound holds information on the predicate relation 360 /// between TestVal and UpperBound, i.e, 361 /// TestVal < UpperBound OR TestVal <= UpperBound 362 __isl_give isl_set *ScopBuilder::buildUnsignedConditionSets( 363 BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain, 364 const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound, 365 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, 366 bool IsStrictUpperBound) { 367 // Do not take NonNeg assumption on TestVal 368 // as it might have MSB (Sign bit) set. 369 isl_pw_aff *TestVal = getPwAff(BB, InvalidDomainMap, SCEV_TestVal, false); 370 // Take NonNeg assumption on UpperBound. 371 isl_pw_aff *UpperBound = 372 getPwAff(BB, InvalidDomainMap, SCEV_UpperBound, true); 373 374 // 0 <= TestVal 375 isl_set *First = 376 isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space( 377 isl_pw_aff_get_domain_space(TestVal))), 378 isl_pw_aff_copy(TestVal)); 379 380 isl_set *Second; 381 if (IsStrictUpperBound) 382 // TestVal < UpperBound 383 Second = isl_pw_aff_lt_set(TestVal, UpperBound); 384 else 385 // TestVal <= UpperBound 386 Second = isl_pw_aff_le_set(TestVal, UpperBound); 387 388 isl_set *ConsequenceCondSet = isl_set_intersect(First, Second); 389 return ConsequenceCondSet; 390 } 391 392 bool ScopBuilder::buildConditionSets( 393 BasicBlock *BB, SwitchInst *SI, Loop *L, __isl_keep isl_set *Domain, 394 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, 395 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) { 396 Value *Condition = getConditionFromTerminator(SI); 397 assert(Condition && "No condition for switch"); 398 399 isl_pw_aff *LHS, *RHS; 400 LHS = getPwAff(BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L)); 401 402 unsigned NumSuccessors = SI->getNumSuccessors(); 403 ConditionSets.resize(NumSuccessors); 404 for (auto &Case : SI->cases()) { 405 unsigned Idx = Case.getSuccessorIndex(); 406 ConstantInt *CaseValue = Case.getCaseValue(); 407 408 RHS = getPwAff(BB, InvalidDomainMap, SE.getSCEV(CaseValue)); 409 isl_set *CaseConditionSet = 410 buildConditionSet(ICmpInst::ICMP_EQ, isl::manage_copy(LHS), 411 isl::manage(RHS)) 412 .release(); 413 ConditionSets[Idx] = isl_set_coalesce( 414 isl_set_intersect(CaseConditionSet, isl_set_copy(Domain))); 415 } 416 417 assert(ConditionSets[0] == nullptr && "Default condition set was set"); 418 isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]); 419 for (unsigned u = 2; u < NumSuccessors; u++) 420 ConditionSetUnion = 421 isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u])); 422 ConditionSets[0] = isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion); 423 424 isl_pw_aff_free(LHS); 425 426 return true; 427 } 428 429 bool ScopBuilder::buildConditionSets( 430 BasicBlock *BB, Value *Condition, Instruction *TI, Loop *L, 431 __isl_keep isl_set *Domain, 432 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, 433 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) { 434 isl_set *ConsequenceCondSet = nullptr; 435 436 if (auto Load = dyn_cast<LoadInst>(Condition)) { 437 const SCEV *LHSSCEV = SE.getSCEVAtScope(Load, L); 438 const SCEV *RHSSCEV = SE.getZero(LHSSCEV->getType()); 439 bool NonNeg = false; 440 isl_pw_aff *LHS = getPwAff(BB, InvalidDomainMap, LHSSCEV, NonNeg); 441 isl_pw_aff *RHS = getPwAff(BB, InvalidDomainMap, RHSSCEV, NonNeg); 442 ConsequenceCondSet = buildConditionSet(ICmpInst::ICMP_SLE, isl::manage(LHS), 443 isl::manage(RHS)) 444 .release(); 445 } else if (auto *PHI = dyn_cast<PHINode>(Condition)) { 446 auto *Unique = dyn_cast<ConstantInt>( 447 getUniqueNonErrorValue(PHI, &scop->getRegion(), &SD)); 448 assert(Unique && 449 "A PHINode condition should only be accepted by ScopDetection if " 450 "getUniqueNonErrorValue returns non-NULL"); 451 452 if (Unique->isZero()) 453 ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain)); 454 else 455 ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain)); 456 } else if (auto *CCond = dyn_cast<ConstantInt>(Condition)) { 457 if (CCond->isZero()) 458 ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain)); 459 else 460 ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain)); 461 } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Condition)) { 462 auto Opcode = BinOp->getOpcode(); 463 assert(Opcode == Instruction::And || Opcode == Instruction::Or); 464 465 bool Valid = buildConditionSets(BB, BinOp->getOperand(0), TI, L, Domain, 466 InvalidDomainMap, ConditionSets) && 467 buildConditionSets(BB, BinOp->getOperand(1), TI, L, Domain, 468 InvalidDomainMap, ConditionSets); 469 if (!Valid) { 470 while (!ConditionSets.empty()) 471 isl_set_free(ConditionSets.pop_back_val()); 472 return false; 473 } 474 475 isl_set_free(ConditionSets.pop_back_val()); 476 isl_set *ConsCondPart0 = ConditionSets.pop_back_val(); 477 isl_set_free(ConditionSets.pop_back_val()); 478 isl_set *ConsCondPart1 = ConditionSets.pop_back_val(); 479 480 if (Opcode == Instruction::And) 481 ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1); 482 else 483 ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1); 484 } else { 485 auto *ICond = dyn_cast<ICmpInst>(Condition); 486 assert(ICond && 487 "Condition of exiting branch was neither constant nor ICmp!"); 488 489 Region &R = scop->getRegion(); 490 491 isl_pw_aff *LHS, *RHS; 492 // For unsigned comparisons we assumed the signed bit of neither operand 493 // to be set. The comparison is equal to a signed comparison under this 494 // assumption. 495 bool NonNeg = ICond->isUnsigned(); 496 const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L), 497 *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L); 498 499 LeftOperand = tryForwardThroughPHI(LeftOperand, R, SE, &SD); 500 RightOperand = tryForwardThroughPHI(RightOperand, R, SE, &SD); 501 502 switch (ICond->getPredicate()) { 503 case ICmpInst::ICMP_ULT: 504 ConsequenceCondSet = 505 buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand, 506 RightOperand, InvalidDomainMap, true); 507 break; 508 case ICmpInst::ICMP_ULE: 509 ConsequenceCondSet = 510 buildUnsignedConditionSets(BB, Condition, Domain, LeftOperand, 511 RightOperand, InvalidDomainMap, false); 512 break; 513 case ICmpInst::ICMP_UGT: 514 ConsequenceCondSet = 515 buildUnsignedConditionSets(BB, Condition, Domain, RightOperand, 516 LeftOperand, InvalidDomainMap, true); 517 break; 518 case ICmpInst::ICMP_UGE: 519 ConsequenceCondSet = 520 buildUnsignedConditionSets(BB, Condition, Domain, RightOperand, 521 LeftOperand, InvalidDomainMap, false); 522 break; 523 default: 524 LHS = getPwAff(BB, InvalidDomainMap, LeftOperand, NonNeg); 525 RHS = getPwAff(BB, InvalidDomainMap, RightOperand, NonNeg); 526 ConsequenceCondSet = buildConditionSet(ICond->getPredicate(), 527 isl::manage(LHS), isl::manage(RHS)) 528 .release(); 529 break; 530 } 531 } 532 533 // If no terminator was given we are only looking for parameter constraints 534 // under which @p Condition is true/false. 535 if (!TI) 536 ConsequenceCondSet = isl_set_params(ConsequenceCondSet); 537 assert(ConsequenceCondSet); 538 ConsequenceCondSet = isl_set_coalesce( 539 isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain))); 540 541 isl_set *AlternativeCondSet = nullptr; 542 bool TooComplex = 543 isl_set_n_basic_set(ConsequenceCondSet) >= (int)MaxDisjunctsInDomain; 544 545 if (!TooComplex) { 546 AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain), 547 isl_set_copy(ConsequenceCondSet)); 548 TooComplex = 549 isl_set_n_basic_set(AlternativeCondSet) >= (int)MaxDisjunctsInDomain; 550 } 551 552 if (TooComplex) { 553 scop->invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(), 554 TI ? TI->getParent() : nullptr /* BasicBlock */); 555 isl_set_free(AlternativeCondSet); 556 isl_set_free(ConsequenceCondSet); 557 return false; 558 } 559 560 ConditionSets.push_back(ConsequenceCondSet); 561 ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet)); 562 563 return true; 564 } 565 566 bool ScopBuilder::buildConditionSets( 567 BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain, 568 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap, 569 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) { 570 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) 571 return buildConditionSets(BB, SI, L, Domain, InvalidDomainMap, 572 ConditionSets); 573 574 assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch."); 575 576 if (TI->getNumSuccessors() == 1) { 577 ConditionSets.push_back(isl_set_copy(Domain)); 578 return true; 579 } 580 581 Value *Condition = getConditionFromTerminator(TI); 582 assert(Condition && "No condition for Terminator"); 583 584 return buildConditionSets(BB, Condition, TI, L, Domain, InvalidDomainMap, 585 ConditionSets); 586 } 587 588 bool ScopBuilder::propagateDomainConstraints( 589 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) { 590 // Iterate over the region R and propagate the domain constrains from the 591 // predecessors to the current node. In contrast to the 592 // buildDomainsWithBranchConstraints function, this one will pull the domain 593 // information from the predecessors instead of pushing it to the successors. 594 // Additionally, we assume the domains to be already present in the domain 595 // map here. However, we iterate again in reverse post order so we know all 596 // predecessors have been visited before a block or non-affine subregion is 597 // visited. 598 599 ReversePostOrderTraversal<Region *> RTraversal(R); 600 for (auto *RN : RTraversal) { 601 // Recurse for affine subregions but go on for basic blocks and non-affine 602 // subregions. 603 if (RN->isSubRegion()) { 604 Region *SubRegion = RN->getNodeAs<Region>(); 605 if (!scop->isNonAffineSubRegion(SubRegion)) { 606 if (!propagateDomainConstraints(SubRegion, InvalidDomainMap)) 607 return false; 608 continue; 609 } 610 } 611 612 BasicBlock *BB = getRegionNodeBasicBlock(RN); 613 isl::set &Domain = scop->getOrInitEmptyDomain(BB); 614 assert(!Domain.is_null()); 615 616 // Under the union of all predecessor conditions we can reach this block. 617 isl::set PredDom = getPredecessorDomainConstraints(BB, Domain); 618 Domain = Domain.intersect(PredDom).coalesce(); 619 Domain = Domain.align_params(scop->getParamSpace()); 620 621 Loop *BBLoop = getRegionNodeLoop(RN, LI); 622 if (BBLoop && BBLoop->getHeader() == BB && scop->contains(BBLoop)) 623 if (!addLoopBoundsToHeaderDomain(BBLoop, InvalidDomainMap)) 624 return false; 625 } 626 627 return true; 628 } 629 630 void ScopBuilder::propagateDomainConstraintsToRegionExit( 631 BasicBlock *BB, Loop *BBLoop, 632 SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks, 633 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) { 634 // Check if the block @p BB is the entry of a region. If so we propagate it's 635 // domain to the exit block of the region. Otherwise we are done. 636 auto *RI = scop->getRegion().getRegionInfo(); 637 auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr; 638 auto *ExitBB = BBReg ? BBReg->getExit() : nullptr; 639 if (!BBReg || BBReg->getEntry() != BB || !scop->contains(ExitBB)) 640 return; 641 642 // Do not propagate the domain if there is a loop backedge inside the region 643 // that would prevent the exit block from being executed. 644 auto *L = BBLoop; 645 while (L && scop->contains(L)) { 646 SmallVector<BasicBlock *, 4> LatchBBs; 647 BBLoop->getLoopLatches(LatchBBs); 648 for (auto *LatchBB : LatchBBs) 649 if (BB != LatchBB && BBReg->contains(LatchBB)) 650 return; 651 L = L->getParentLoop(); 652 } 653 654 isl::set Domain = scop->getOrInitEmptyDomain(BB); 655 assert(!Domain.is_null() && "Cannot propagate a nullptr"); 656 657 Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, scop->getBoxedLoops()); 658 659 // Since the dimensions of @p BB and @p ExitBB might be different we have to 660 // adjust the domain before we can propagate it. 661 isl::set AdjustedDomain = adjustDomainDimensions(Domain, BBLoop, ExitBBLoop); 662 isl::set &ExitDomain = scop->getOrInitEmptyDomain(ExitBB); 663 664 // If the exit domain is not yet created we set it otherwise we "add" the 665 // current domain. 666 ExitDomain = 667 !ExitDomain.is_null() ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain; 668 669 // Initialize the invalid domain. 670 InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space()); 671 672 FinishedExitBlocks.insert(ExitBB); 673 } 674 675 isl::set ScopBuilder::getPredecessorDomainConstraints(BasicBlock *BB, 676 isl::set Domain) { 677 // If @p BB is the ScopEntry we are done 678 if (scop->getRegion().getEntry() == BB) 679 return isl::set::universe(Domain.get_space()); 680 681 // The region info of this function. 682 auto &RI = *scop->getRegion().getRegionInfo(); 683 684 Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, scop->getBoxedLoops()); 685 686 // A domain to collect all predecessor domains, thus all conditions under 687 // which the block is executed. To this end we start with the empty domain. 688 isl::set PredDom = isl::set::empty(Domain.get_space()); 689 690 // Set of regions of which the entry block domain has been propagated to BB. 691 // all predecessors inside any of the regions can be skipped. 692 SmallSet<Region *, 8> PropagatedRegions; 693 694 for (auto *PredBB : predecessors(BB)) { 695 // Skip backedges. 696 if (DT.dominates(BB, PredBB)) 697 continue; 698 699 // If the predecessor is in a region we used for propagation we can skip it. 700 auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); }; 701 if (llvm::any_of(PropagatedRegions, PredBBInRegion)) { 702 continue; 703 } 704 705 // Check if there is a valid region we can use for propagation, thus look 706 // for a region that contains the predecessor and has @p BB as exit block. 707 // FIXME: This was an side-effect-free (and possibly infinite) loop when 708 // committed and seems not to be needed. 709 auto *PredR = RI.getRegionFor(PredBB); 710 while (PredR->getExit() != BB && !PredR->contains(BB)) 711 PredR = PredR->getParent(); 712 713 // If a valid region for propagation was found use the entry of that region 714 // for propagation, otherwise the PredBB directly. 715 if (PredR->getExit() == BB) { 716 PredBB = PredR->getEntry(); 717 PropagatedRegions.insert(PredR); 718 } 719 720 isl::set PredBBDom = scop->getDomainConditions(PredBB); 721 Loop *PredBBLoop = 722 getFirstNonBoxedLoopFor(PredBB, LI, scop->getBoxedLoops()); 723 PredBBDom = adjustDomainDimensions(PredBBDom, PredBBLoop, BBLoop); 724 PredDom = PredDom.unite(PredBBDom); 725 } 726 727 return PredDom; 728 } 729 730 bool ScopBuilder::addLoopBoundsToHeaderDomain( 731 Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) { 732 int LoopDepth = scop->getRelativeLoopDepth(L); 733 assert(LoopDepth >= 0 && "Loop in region should have at least depth one"); 734 735 BasicBlock *HeaderBB = L->getHeader(); 736 assert(scop->isDomainDefined(HeaderBB)); 737 isl::set &HeaderBBDom = scop->getOrInitEmptyDomain(HeaderBB); 738 739 isl::map NextIterationMap = 740 createNextIterationMap(HeaderBBDom.get_space(), LoopDepth); 741 742 isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space()); 743 744 SmallVector<BasicBlock *, 4> LatchBlocks; 745 L->getLoopLatches(LatchBlocks); 746 747 for (BasicBlock *LatchBB : LatchBlocks) { 748 // If the latch is only reachable via error statements we skip it. 749 if (!scop->isDomainDefined(LatchBB)) 750 continue; 751 752 isl::set LatchBBDom = scop->getDomainConditions(LatchBB); 753 754 isl::set BackedgeCondition; 755 756 Instruction *TI = LatchBB->getTerminator(); 757 BranchInst *BI = dyn_cast<BranchInst>(TI); 758 assert(BI && "Only branch instructions allowed in loop latches"); 759 760 if (BI->isUnconditional()) 761 BackedgeCondition = LatchBBDom; 762 else { 763 SmallVector<isl_set *, 8> ConditionSets; 764 int idx = BI->getSuccessor(0) != HeaderBB; 765 if (!buildConditionSets(LatchBB, TI, L, LatchBBDom.get(), 766 InvalidDomainMap, ConditionSets)) 767 return false; 768 769 // Free the non back edge condition set as we do not need it. 770 isl_set_free(ConditionSets[1 - idx]); 771 772 BackedgeCondition = isl::manage(ConditionSets[idx]); 773 } 774 775 int LatchLoopDepth = scop->getRelativeLoopDepth(LI.getLoopFor(LatchBB)); 776 assert(LatchLoopDepth >= LoopDepth); 777 BackedgeCondition = BackedgeCondition.project_out( 778 isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth); 779 UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition); 780 } 781 782 isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space()); 783 for (int i = 0; i < LoopDepth; i++) 784 ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i); 785 786 isl::set UnionBackedgeConditionComplement = 787 UnionBackedgeCondition.complement(); 788 UnionBackedgeConditionComplement = 789 UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth, 790 0); 791 UnionBackedgeConditionComplement = 792 UnionBackedgeConditionComplement.apply(ForwardMap); 793 HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement); 794 HeaderBBDom = HeaderBBDom.apply(NextIterationMap); 795 796 auto Parts = partitionSetParts(HeaderBBDom, LoopDepth); 797 HeaderBBDom = Parts.second; 798 799 // Check if there is a <nsw> tagged AddRec for this loop and if so do not 800 // require a runtime check. The assumption is already implied by the <nsw> 801 // tag. 802 bool RequiresRTC = !scop->hasNSWAddRecForLoop(L); 803 804 isl::set UnboundedCtx = Parts.first.params(); 805 recordAssumption(&RecordedAssumptions, INFINITELOOP, UnboundedCtx, 806 HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION, 807 nullptr, RequiresRTC); 808 return true; 809 } 810 811 void ScopBuilder::buildInvariantEquivalenceClasses() { 812 DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses; 813 814 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads(); 815 for (LoadInst *LInst : RIL) { 816 const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand()); 817 818 Type *Ty = LInst->getType(); 819 LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)]; 820 if (ClassRep) { 821 scop->addInvariantLoadMapping(LInst, ClassRep); 822 continue; 823 } 824 825 ClassRep = LInst; 826 scop->addInvariantEquivClass( 827 InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), {}, Ty}); 828 } 829 } 830 831 bool ScopBuilder::buildDomains( 832 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) { 833 bool IsOnlyNonAffineRegion = scop->isNonAffineSubRegion(R); 834 auto *EntryBB = R->getEntry(); 835 auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB); 836 int LD = scop->getRelativeLoopDepth(L); 837 auto *S = 838 isl_set_universe(isl_space_set_alloc(scop->getIslCtx().get(), 0, LD + 1)); 839 840 InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S))); 841 isl::set Domain = isl::manage(S); 842 scop->setDomain(EntryBB, Domain); 843 844 if (IsOnlyNonAffineRegion) 845 return !containsErrorBlock(R->getNode(), *R, &SD); 846 847 if (!buildDomainsWithBranchConstraints(R, InvalidDomainMap)) 848 return false; 849 850 if (!propagateDomainConstraints(R, InvalidDomainMap)) 851 return false; 852 853 // Error blocks and blocks dominated by them have been assumed to never be 854 // executed. Representing them in the Scop does not add any value. In fact, 855 // it is likely to cause issues during construction of the ScopStmts. The 856 // contents of error blocks have not been verified to be expressible and 857 // will cause problems when building up a ScopStmt for them. 858 // Furthermore, basic blocks dominated by error blocks may reference 859 // instructions in the error block which, if the error block is not modeled, 860 // can themselves not be constructed properly. To this end we will replace 861 // the domains of error blocks and those only reachable via error blocks 862 // with an empty set. Additionally, we will record for each block under which 863 // parameter combination it would be reached via an error block in its 864 // InvalidDomain. This information is needed during load hoisting. 865 if (!propagateInvalidStmtDomains(R, InvalidDomainMap)) 866 return false; 867 868 return true; 869 } 870 871 bool ScopBuilder::buildDomainsWithBranchConstraints( 872 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) { 873 // To create the domain for each block in R we iterate over all blocks and 874 // subregions in R and propagate the conditions under which the current region 875 // element is executed. To this end we iterate in reverse post order over R as 876 // it ensures that we first visit all predecessors of a region node (either a 877 // basic block or a subregion) before we visit the region node itself. 878 // Initially, only the domain for the SCoP region entry block is set and from 879 // there we propagate the current domain to all successors, however we add the 880 // condition that the successor is actually executed next. 881 // As we are only interested in non-loop carried constraints here we can 882 // simply skip loop back edges. 883 884 SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks; 885 ReversePostOrderTraversal<Region *> RTraversal(R); 886 for (auto *RN : RTraversal) { 887 // Recurse for affine subregions but go on for basic blocks and non-affine 888 // subregions. 889 if (RN->isSubRegion()) { 890 Region *SubRegion = RN->getNodeAs<Region>(); 891 if (!scop->isNonAffineSubRegion(SubRegion)) { 892 if (!buildDomainsWithBranchConstraints(SubRegion, InvalidDomainMap)) 893 return false; 894 continue; 895 } 896 } 897 898 if (containsErrorBlock(RN, scop->getRegion(), &SD)) 899 scop->notifyErrorBlock(); 900 ; 901 902 BasicBlock *BB = getRegionNodeBasicBlock(RN); 903 Instruction *TI = BB->getTerminator(); 904 905 if (isa<UnreachableInst>(TI)) 906 continue; 907 908 if (!scop->isDomainDefined(BB)) 909 continue; 910 isl::set Domain = scop->getDomainConditions(BB); 911 912 scop->updateMaxLoopDepth(unsignedFromIslSize(Domain.tuple_dim())); 913 914 auto *BBLoop = getRegionNodeLoop(RN, LI); 915 // Propagate the domain from BB directly to blocks that have a superset 916 // domain, at the moment only region exit nodes of regions that start in BB. 917 propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks, 918 InvalidDomainMap); 919 920 // If all successors of BB have been set a domain through the propagation 921 // above we do not need to build condition sets but can just skip this 922 // block. However, it is important to note that this is a local property 923 // with regards to the region @p R. To this end FinishedExitBlocks is a 924 // local variable. 925 auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) { 926 return FinishedExitBlocks.count(SuccBB); 927 }; 928 if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit)) 929 continue; 930 931 // Build the condition sets for the successor nodes of the current region 932 // node. If it is a non-affine subregion we will always execute the single 933 // exit node, hence the single entry node domain is the condition set. For 934 // basic blocks we use the helper function buildConditionSets. 935 SmallVector<isl_set *, 8> ConditionSets; 936 if (RN->isSubRegion()) 937 ConditionSets.push_back(Domain.copy()); 938 else if (!buildConditionSets(BB, TI, BBLoop, Domain.get(), InvalidDomainMap, 939 ConditionSets)) 940 return false; 941 942 // Now iterate over the successors and set their initial domain based on 943 // their condition set. We skip back edges here and have to be careful when 944 // we leave a loop not to keep constraints over a dimension that doesn't 945 // exist anymore. 946 assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size()); 947 for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) { 948 isl::set CondSet = isl::manage(ConditionSets[u]); 949 BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u); 950 951 // Skip blocks outside the region. 952 if (!scop->contains(SuccBB)) 953 continue; 954 955 // If we propagate the domain of some block to "SuccBB" we do not have to 956 // adjust the domain. 957 if (FinishedExitBlocks.count(SuccBB)) 958 continue; 959 960 // Skip back edges. 961 if (DT.dominates(SuccBB, BB)) 962 continue; 963 964 Loop *SuccBBLoop = 965 getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops()); 966 967 CondSet = adjustDomainDimensions(CondSet, BBLoop, SuccBBLoop); 968 969 // Set the domain for the successor or merge it with an existing domain in 970 // case there are multiple paths (without loop back edges) to the 971 // successor block. 972 isl::set &SuccDomain = scop->getOrInitEmptyDomain(SuccBB); 973 974 if (!SuccDomain.is_null()) { 975 SuccDomain = SuccDomain.unite(CondSet).coalesce(); 976 } else { 977 // Initialize the invalid domain. 978 InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space()); 979 SuccDomain = CondSet; 980 } 981 982 SuccDomain = SuccDomain.detect_equalities(); 983 984 // Check if the maximal number of domain disjunctions was reached. 985 // In case this happens we will clean up and bail. 986 if (unsignedFromIslSize(SuccDomain.n_basic_set()) < MaxDisjunctsInDomain) 987 continue; 988 989 scop->invalidate(COMPLEXITY, DebugLoc()); 990 while (++u < ConditionSets.size()) 991 isl_set_free(ConditionSets[u]); 992 return false; 993 } 994 } 995 996 return true; 997 } 998 999 bool ScopBuilder::propagateInvalidStmtDomains( 1000 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) { 1001 ReversePostOrderTraversal<Region *> RTraversal(R); 1002 for (auto *RN : RTraversal) { 1003 1004 // Recurse for affine subregions but go on for basic blocks and non-affine 1005 // subregions. 1006 if (RN->isSubRegion()) { 1007 Region *SubRegion = RN->getNodeAs<Region>(); 1008 if (!scop->isNonAffineSubRegion(SubRegion)) { 1009 propagateInvalidStmtDomains(SubRegion, InvalidDomainMap); 1010 continue; 1011 } 1012 } 1013 1014 bool ContainsErrorBlock = containsErrorBlock(RN, scop->getRegion(), &SD); 1015 BasicBlock *BB = getRegionNodeBasicBlock(RN); 1016 isl::set &Domain = scop->getOrInitEmptyDomain(BB); 1017 assert(!Domain.is_null() && "Cannot propagate a nullptr"); 1018 1019 isl::set InvalidDomain = InvalidDomainMap[BB]; 1020 1021 bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain); 1022 1023 if (!IsInvalidBlock) { 1024 InvalidDomain = InvalidDomain.intersect(Domain); 1025 } else { 1026 InvalidDomain = Domain; 1027 isl::set DomPar = Domain.params(); 1028 recordAssumption(&RecordedAssumptions, ERRORBLOCK, DomPar, 1029 BB->getTerminator()->getDebugLoc(), AS_RESTRICTION); 1030 Domain = isl::set::empty(Domain.get_space()); 1031 } 1032 1033 if (InvalidDomain.is_empty()) { 1034 InvalidDomainMap[BB] = InvalidDomain; 1035 continue; 1036 } 1037 1038 auto *BBLoop = getRegionNodeLoop(RN, LI); 1039 auto *TI = BB->getTerminator(); 1040 unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors(); 1041 for (unsigned u = 0; u < NumSuccs; u++) { 1042 auto *SuccBB = getRegionNodeSuccessor(RN, TI, u); 1043 1044 // Skip successors outside the SCoP. 1045 if (!scop->contains(SuccBB)) 1046 continue; 1047 1048 // Skip backedges. 1049 if (DT.dominates(SuccBB, BB)) 1050 continue; 1051 1052 Loop *SuccBBLoop = 1053 getFirstNonBoxedLoopFor(SuccBB, LI, scop->getBoxedLoops()); 1054 1055 auto AdjustedInvalidDomain = 1056 adjustDomainDimensions(InvalidDomain, BBLoop, SuccBBLoop); 1057 1058 isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB]; 1059 SuccInvalidDomain = SuccInvalidDomain.unite(AdjustedInvalidDomain); 1060 SuccInvalidDomain = SuccInvalidDomain.coalesce(); 1061 1062 InvalidDomainMap[SuccBB] = SuccInvalidDomain; 1063 1064 // Check if the maximal number of domain disjunctions was reached. 1065 // In case this happens we will bail. 1066 if (unsignedFromIslSize(SuccInvalidDomain.n_basic_set()) < 1067 MaxDisjunctsInDomain) 1068 continue; 1069 1070 InvalidDomainMap.erase(BB); 1071 scop->invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent()); 1072 return false; 1073 } 1074 1075 InvalidDomainMap[BB] = InvalidDomain; 1076 } 1077 1078 return true; 1079 } 1080 1081 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI, 1082 Region *NonAffineSubRegion, 1083 bool IsExitBlock) { 1084 // PHI nodes that are in the exit block of the region, hence if IsExitBlock is 1085 // true, are not modeled as ordinary PHI nodes as they are not part of the 1086 // region. However, we model the operands in the predecessor blocks that are 1087 // part of the region as regular scalar accesses. 1088 1089 // If we can synthesize a PHI we can skip it, however only if it is in 1090 // the region. If it is not it can only be in the exit block of the region. 1091 // In this case we model the operands but not the PHI itself. 1092 auto *Scope = LI.getLoopFor(PHI->getParent()); 1093 if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope)) 1094 return; 1095 1096 // PHI nodes are modeled as if they had been demoted prior to the SCoP 1097 // detection. Hence, the PHI is a load of a new memory location in which the 1098 // incoming value was written at the end of the incoming basic block. 1099 bool OnlyNonAffineSubRegionOperands = true; 1100 for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) { 1101 Value *Op = PHI->getIncomingValue(u); 1102 BasicBlock *OpBB = PHI->getIncomingBlock(u); 1103 ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u)); 1104 1105 // Do not build PHI dependences inside a non-affine subregion, but make 1106 // sure that the necessary scalar values are still made available. 1107 if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) { 1108 auto *OpInst = dyn_cast<Instruction>(Op); 1109 if (!OpInst || !NonAffineSubRegion->contains(OpInst)) 1110 ensureValueRead(Op, OpStmt); 1111 continue; 1112 } 1113 1114 OnlyNonAffineSubRegionOperands = false; 1115 ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock); 1116 } 1117 1118 if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) { 1119 addPHIReadAccess(PHIStmt, PHI); 1120 } 1121 } 1122 1123 void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt, 1124 Instruction *Inst) { 1125 assert(!isa<PHINode>(Inst)); 1126 1127 // Pull-in required operands. 1128 for (Use &Op : Inst->operands()) 1129 ensureValueRead(Op.get(), UserStmt); 1130 } 1131 1132 // Create a sequence of two schedules. Either argument may be null and is 1133 // interpreted as the empty schedule. Can also return null if both schedules are 1134 // empty. 1135 static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) { 1136 if (Prev.is_null()) 1137 return Succ; 1138 if (Succ.is_null()) 1139 return Prev; 1140 1141 return Prev.sequence(Succ); 1142 } 1143 1144 // Create an isl_multi_union_aff that defines an identity mapping from the 1145 // elements of USet to their N-th dimension. 1146 // 1147 // # Example: 1148 // 1149 // Domain: { A[i,j]; B[i,j,k] } 1150 // N: 1 1151 // 1152 // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] } 1153 // 1154 // @param USet A union set describing the elements for which to generate a 1155 // mapping. 1156 // @param N The dimension to map to. 1157 // @returns A mapping from USet to its N-th dimension. 1158 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, unsigned N) { 1159 assert(!USet.is_null()); 1160 assert(!USet.is_empty()); 1161 1162 auto Result = isl::union_pw_multi_aff::empty(USet.get_space()); 1163 1164 for (isl::set S : USet.get_set_list()) { 1165 unsigned Dim = unsignedFromIslSize(S.tuple_dim()); 1166 assert(Dim >= N); 1167 auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set, 1168 N, Dim - N); 1169 if (N > 1) 1170 PMA = PMA.drop_dims(isl::dim::out, 0, N - 1); 1171 1172 Result = Result.add_pw_multi_aff(PMA); 1173 } 1174 1175 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result)); 1176 } 1177 1178 void ScopBuilder::buildSchedule() { 1179 Loop *L = getLoopSurroundingScop(*scop, LI); 1180 LoopStackTy LoopStack({LoopStackElementTy(L, {}, 0)}); 1181 buildSchedule(scop->getRegion().getNode(), LoopStack); 1182 assert(LoopStack.size() == 1 && LoopStack.back().L == L); 1183 scop->setScheduleTree(LoopStack[0].Schedule); 1184 } 1185 1186 /// To generate a schedule for the elements in a Region we traverse the Region 1187 /// in reverse-post-order and add the contained RegionNodes in traversal order 1188 /// to the schedule of the loop that is currently at the top of the LoopStack. 1189 /// For loop-free codes, this results in a correct sequential ordering. 1190 /// 1191 /// Example: 1192 /// bb1(0) 1193 /// / \. 1194 /// bb2(1) bb3(2) 1195 /// \ / \. 1196 /// bb4(3) bb5(4) 1197 /// \ / 1198 /// bb6(5) 1199 /// 1200 /// Including loops requires additional processing. Whenever a loop header is 1201 /// encountered, the corresponding loop is added to the @p LoopStack. Starting 1202 /// from an empty schedule, we first process all RegionNodes that are within 1203 /// this loop and complete the sequential schedule at this loop-level before 1204 /// processing about any other nodes. To implement this 1205 /// loop-nodes-first-processing, the reverse post-order traversal is 1206 /// insufficient. Hence, we additionally check if the traversal yields 1207 /// sub-regions or blocks that are outside the last loop on the @p LoopStack. 1208 /// These region-nodes are then queue and only traverse after the all nodes 1209 /// within the current loop have been processed. 1210 void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) { 1211 Loop *OuterScopLoop = getLoopSurroundingScop(*scop, LI); 1212 1213 ReversePostOrderTraversal<Region *> RTraversal(R); 1214 std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end()); 1215 std::deque<RegionNode *> DelayList; 1216 bool LastRNWaiting = false; 1217 1218 // Iterate over the region @p R in reverse post-order but queue 1219 // sub-regions/blocks iff they are not part of the last encountered but not 1220 // completely traversed loop. The variable LastRNWaiting is a flag to indicate 1221 // that we queued the last sub-region/block from the reverse post-order 1222 // iterator. If it is set we have to explore the next sub-region/block from 1223 // the iterator (if any) to guarantee progress. If it is not set we first try 1224 // the next queued sub-region/blocks. 1225 while (!WorkList.empty() || !DelayList.empty()) { 1226 RegionNode *RN; 1227 1228 if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) { 1229 RN = WorkList.front(); 1230 WorkList.pop_front(); 1231 LastRNWaiting = false; 1232 } else { 1233 RN = DelayList.front(); 1234 DelayList.pop_front(); 1235 } 1236 1237 Loop *L = getRegionNodeLoop(RN, LI); 1238 if (!scop->contains(L)) 1239 L = OuterScopLoop; 1240 1241 Loop *LastLoop = LoopStack.back().L; 1242 if (LastLoop != L) { 1243 if (LastLoop && !LastLoop->contains(L)) { 1244 LastRNWaiting = true; 1245 DelayList.push_back(RN); 1246 continue; 1247 } 1248 LoopStack.push_back({L, {}, 0}); 1249 } 1250 buildSchedule(RN, LoopStack); 1251 } 1252 } 1253 1254 void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) { 1255 if (RN->isSubRegion()) { 1256 auto *LocalRegion = RN->getNodeAs<Region>(); 1257 if (!scop->isNonAffineSubRegion(LocalRegion)) { 1258 buildSchedule(LocalRegion, LoopStack); 1259 return; 1260 } 1261 } 1262 1263 assert(LoopStack.rbegin() != LoopStack.rend()); 1264 auto LoopData = LoopStack.rbegin(); 1265 LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN); 1266 1267 for (auto *Stmt : scop->getStmtListFor(RN)) { 1268 isl::union_set UDomain{Stmt->getDomain()}; 1269 auto StmtSchedule = isl::schedule::from_domain(UDomain); 1270 LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule); 1271 } 1272 1273 // Check if we just processed the last node in this loop. If we did, finalize 1274 // the loop by: 1275 // 1276 // - adding new schedule dimensions 1277 // - folding the resulting schedule into the parent loop schedule 1278 // - dropping the loop schedule from the LoopStack. 1279 // 1280 // Then continue to check surrounding loops, which might also have been 1281 // completed by this node. 1282 size_t Dimension = LoopStack.size(); 1283 while (LoopData->L && 1284 LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) { 1285 isl::schedule Schedule = LoopData->Schedule; 1286 auto NumBlocksProcessed = LoopData->NumBlocksProcessed; 1287 1288 assert(std::next(LoopData) != LoopStack.rend()); 1289 Loop *L = LoopData->L; 1290 ++LoopData; 1291 --Dimension; 1292 1293 if (!Schedule.is_null()) { 1294 isl::union_set Domain = Schedule.get_domain(); 1295 isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension); 1296 Schedule = Schedule.insert_partial_schedule(MUPA); 1297 1298 if (hasDisableAllTransformsHint(L)) { 1299 /// If any of the loops has a disable_nonforced heuristic, mark the 1300 /// entire SCoP as such. The ISL rescheduler can only reschedule the 1301 /// SCoP in its entirety. 1302 /// TODO: ScopDetection could avoid including such loops or warp them as 1303 /// boxed loop. It still needs to pass-through loop with user-defined 1304 /// metadata. 1305 scop->markDisableHeuristics(); 1306 } 1307 1308 // It is easier to insert the marks here that do it retroactively. 1309 isl::id IslLoopId = createIslLoopAttr(scop->getIslCtx(), L); 1310 if (!IslLoopId.is_null()) 1311 Schedule = 1312 Schedule.get_root().child(0).insert_mark(IslLoopId).get_schedule(); 1313 1314 LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule); 1315 } 1316 1317 LoopData->NumBlocksProcessed += NumBlocksProcessed; 1318 } 1319 // Now pop all loops processed up there from the LoopStack 1320 LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end()); 1321 } 1322 1323 void ScopBuilder::buildEscapingDependences(Instruction *Inst) { 1324 // Check for uses of this instruction outside the scop. Because we do not 1325 // iterate over such instructions and therefore did not "ensure" the existence 1326 // of a write, we must determine such use here. 1327 if (scop->isEscaping(Inst)) 1328 ensureValueWrite(Inst); 1329 } 1330 1331 void ScopBuilder::addRecordedAssumptions() { 1332 for (auto &AS : llvm::reverse(RecordedAssumptions)) { 1333 1334 if (!AS.BB) { 1335 scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign, 1336 nullptr /* BasicBlock */, AS.RequiresRTC); 1337 continue; 1338 } 1339 1340 // If the domain was deleted the assumptions are void. 1341 isl_set *Dom = scop->getDomainConditions(AS.BB).release(); 1342 if (!Dom) 1343 continue; 1344 1345 // If a basic block was given use its domain to simplify the assumption. 1346 // In case of restrictions we know they only have to hold on the domain, 1347 // thus we can intersect them with the domain of the block. However, for 1348 // assumptions the domain has to imply them, thus: 1349 // _ _____ 1350 // Dom => S <==> A v B <==> A - B 1351 // 1352 // To avoid the complement we will register A - B as a restriction not an 1353 // assumption. 1354 isl_set *S = AS.Set.copy(); 1355 if (AS.Sign == AS_RESTRICTION) 1356 S = isl_set_params(isl_set_intersect(S, Dom)); 1357 else /* (AS.Sign == AS_ASSUMPTION) */ 1358 S = isl_set_params(isl_set_subtract(Dom, S)); 1359 1360 scop->addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB, 1361 AS.RequiresRTC); 1362 } 1363 } 1364 1365 void ScopBuilder::addUserAssumptions( 1366 AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) { 1367 for (auto &Assumption : AC.assumptions()) { 1368 auto *CI = dyn_cast_or_null<CallInst>(Assumption); 1369 if (!CI || CI->arg_size() != 1) 1370 continue; 1371 1372 bool InScop = scop->contains(CI); 1373 if (!InScop && !scop->isDominatedBy(DT, CI->getParent())) 1374 continue; 1375 1376 auto *L = LI.getLoopFor(CI->getParent()); 1377 auto *Val = CI->getArgOperand(0); 1378 ParameterSetTy DetectedParams; 1379 auto &R = scop->getRegion(); 1380 if (!isAffineConstraint(Val, &R, L, SE, DetectedParams)) { 1381 ORE.emit( 1382 OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI) 1383 << "Non-affine user assumption ignored."); 1384 continue; 1385 } 1386 1387 // Collect all newly introduced parameters. 1388 ParameterSetTy NewParams; 1389 for (auto *Param : DetectedParams) { 1390 Param = extractConstantFactor(Param, SE).second; 1391 Param = scop->getRepresentingInvariantLoadSCEV(Param); 1392 if (scop->isParam(Param)) 1393 continue; 1394 NewParams.insert(Param); 1395 } 1396 1397 SmallVector<isl_set *, 2> ConditionSets; 1398 auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr; 1399 BasicBlock *BB = InScop ? CI->getParent() : R.getEntry(); 1400 auto *Dom = InScop ? isl_set_copy(scop->getDomainConditions(BB).get()) 1401 : isl_set_copy(scop->getContext().get()); 1402 assert(Dom && "Cannot propagate a nullptr."); 1403 bool Valid = buildConditionSets(BB, Val, TI, L, Dom, InvalidDomainMap, 1404 ConditionSets); 1405 isl_set_free(Dom); 1406 1407 if (!Valid) 1408 continue; 1409 1410 isl_set *AssumptionCtx = nullptr; 1411 if (InScop) { 1412 AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1])); 1413 isl_set_free(ConditionSets[0]); 1414 } else { 1415 AssumptionCtx = isl_set_complement(ConditionSets[1]); 1416 AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]); 1417 } 1418 1419 // Project out newly introduced parameters as they are not otherwise useful. 1420 if (!NewParams.empty()) { 1421 for (isl_size u = 0; u < isl_set_n_param(AssumptionCtx); u++) { 1422 auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u); 1423 auto *Param = static_cast<const SCEV *>(isl_id_get_user(Id)); 1424 isl_id_free(Id); 1425 1426 if (!NewParams.count(Param)) 1427 continue; 1428 1429 AssumptionCtx = 1430 isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1); 1431 } 1432 } 1433 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI) 1434 << "Use user assumption: " 1435 << stringFromIslObj(AssumptionCtx, "null")); 1436 isl::set newContext = 1437 scop->getContext().intersect(isl::manage(AssumptionCtx)); 1438 scop->setContext(newContext); 1439 } 1440 } 1441 1442 bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) { 1443 // Memory builtins are not considered in this function. 1444 if (!Inst.isLoad() && !Inst.isStore()) 1445 return false; 1446 1447 Value *Val = Inst.getValueOperand(); 1448 Type *ElementType = Val->getType(); 1449 Value *Address = Inst.getPointerOperand(); 1450 const SCEV *AccessFunction = 1451 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent())); 1452 const SCEVUnknown *BasePointer = 1453 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1454 enum MemoryAccess::AccessType AccType = 1455 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; 1456 1457 if (auto *BitCast = dyn_cast<BitCastInst>(Address)) 1458 Address = BitCast->getOperand(0); 1459 1460 auto *GEP = dyn_cast<GetElementPtrInst>(Address); 1461 if (!GEP || DL.getTypeAllocSize(GEP->getResultElementType()) != 1462 DL.getTypeAllocSize(ElementType)) 1463 return false; 1464 1465 SmallVector<const SCEV *, 4> Subscripts; 1466 SmallVector<int, 4> Sizes; 1467 getIndexExpressionsFromGEP(SE, GEP, Subscripts, Sizes); 1468 auto *BasePtr = GEP->getOperand(0); 1469 1470 if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr)) 1471 BasePtr = BasePtrCast->getOperand(0); 1472 1473 // Check for identical base pointers to ensure that we do not miss index 1474 // offsets that have been added before this GEP is applied. 1475 if (BasePtr != BasePointer->getValue()) 1476 return false; 1477 1478 std::vector<const SCEV *> SizesSCEV; 1479 1480 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); 1481 1482 Loop *SurroundingLoop = Stmt->getSurroundingLoop(); 1483 for (auto *Subscript : Subscripts) { 1484 InvariantLoadsSetTy AccessILS; 1485 if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE, 1486 &AccessILS)) 1487 return false; 1488 1489 for (LoadInst *LInst : AccessILS) 1490 if (!ScopRIL.count(LInst)) 1491 return false; 1492 } 1493 1494 if (Sizes.empty()) 1495 return false; 1496 1497 SizesSCEV.push_back(nullptr); 1498 1499 for (auto V : Sizes) 1500 SizesSCEV.push_back(SE.getSCEV( 1501 ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V))); 1502 1503 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, 1504 true, Subscripts, SizesSCEV, Val); 1505 return true; 1506 } 1507 1508 bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) { 1509 // Memory builtins are not considered by this function. 1510 if (!Inst.isLoad() && !Inst.isStore()) 1511 return false; 1512 1513 if (!PollyDelinearize) 1514 return false; 1515 1516 Value *Address = Inst.getPointerOperand(); 1517 Value *Val = Inst.getValueOperand(); 1518 Type *ElementType = Val->getType(); 1519 unsigned ElementSize = DL.getTypeAllocSize(ElementType); 1520 enum MemoryAccess::AccessType AccType = 1521 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; 1522 1523 const SCEV *AccessFunction = 1524 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent())); 1525 const SCEVUnknown *BasePointer = 1526 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1527 1528 assert(BasePointer && "Could not find base pointer"); 1529 1530 auto &InsnToMemAcc = scop->getInsnToMemAccMap(); 1531 auto AccItr = InsnToMemAcc.find(Inst); 1532 if (AccItr == InsnToMemAcc.end()) 1533 return false; 1534 1535 std::vector<const SCEV *> Sizes = {nullptr}; 1536 1537 Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(), 1538 AccItr->second.Shape->DelinearizedSizes.end()); 1539 1540 // In case only the element size is contained in the 'Sizes' array, the 1541 // access does not access a real multi-dimensional array. Hence, we allow 1542 // the normal single-dimensional access construction to handle this. 1543 if (Sizes.size() == 1) 1544 return false; 1545 1546 // Remove the element size. This information is already provided by the 1547 // ElementSize parameter. In case the element size of this access and the 1548 // element size used for delinearization differs the delinearization is 1549 // incorrect. Hence, we invalidate the scop. 1550 // 1551 // TODO: Handle delinearization with differing element sizes. 1552 auto DelinearizedSize = 1553 cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue(); 1554 Sizes.pop_back(); 1555 if (ElementSize != DelinearizedSize) 1556 scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent()); 1557 1558 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, 1559 true, AccItr->second.DelinearizedSubscripts, Sizes, Val); 1560 return true; 1561 } 1562 1563 bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) { 1564 auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst); 1565 1566 if (MemIntr == nullptr) 1567 return false; 1568 1569 auto *L = LI.getLoopFor(Inst->getParent()); 1570 const SCEV *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L); 1571 assert(LengthVal); 1572 1573 // Check if the length val is actually affine or if we overapproximate it 1574 InvariantLoadsSetTy AccessILS; 1575 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); 1576 1577 Loop *SurroundingLoop = Stmt->getSurroundingLoop(); 1578 bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop, 1579 LengthVal, SE, &AccessILS); 1580 for (LoadInst *LInst : AccessILS) 1581 if (!ScopRIL.count(LInst)) 1582 LengthIsAffine = false; 1583 if (!LengthIsAffine) 1584 LengthVal = nullptr; 1585 1586 auto *DestPtrVal = MemIntr->getDest(); 1587 assert(DestPtrVal); 1588 1589 const SCEV *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L); 1590 assert(DestAccFunc); 1591 // Ignore accesses to "NULL". 1592 // TODO: We could use this to optimize the region further, e.g., intersect 1593 // the context with 1594 // isl_set_complement(isl_set_params(getDomain())) 1595 // as we know it would be undefined to execute this instruction anyway. 1596 if (DestAccFunc->isZero()) 1597 return true; 1598 1599 if (auto *U = dyn_cast<SCEVUnknown>(DestAccFunc)) { 1600 if (isa<ConstantPointerNull>(U->getValue())) 1601 return true; 1602 } 1603 1604 auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc)); 1605 assert(DestPtrSCEV); 1606 DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV); 1607 addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(), 1608 IntegerType::getInt8Ty(DestPtrVal->getContext()), 1609 LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr}, 1610 Inst.getValueOperand()); 1611 1612 auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr); 1613 if (!MemTrans) 1614 return true; 1615 1616 auto *SrcPtrVal = MemTrans->getSource(); 1617 assert(SrcPtrVal); 1618 1619 const SCEV *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L); 1620 assert(SrcAccFunc); 1621 // Ignore accesses to "NULL". 1622 // TODO: See above TODO 1623 if (SrcAccFunc->isZero()) 1624 return true; 1625 1626 auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc)); 1627 assert(SrcPtrSCEV); 1628 SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV); 1629 addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(), 1630 IntegerType::getInt8Ty(SrcPtrVal->getContext()), 1631 LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr}, 1632 Inst.getValueOperand()); 1633 1634 return true; 1635 } 1636 1637 bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) { 1638 auto *CI = dyn_cast_or_null<CallInst>(Inst); 1639 1640 if (CI == nullptr) 1641 return false; 1642 1643 if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI) || isDebugCall(CI)) 1644 return true; 1645 1646 const SCEV *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0); 1647 auto *CalledFunction = CI->getCalledFunction(); 1648 MemoryEffects ME = AA.getMemoryEffects(CalledFunction); 1649 if (ME.doesNotAccessMemory()) 1650 return true; 1651 1652 if (ME.onlyAccessesArgPointees()) { 1653 ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem); 1654 auto AccType = 1655 !isModSet(ArgMR) ? MemoryAccess::READ : MemoryAccess::MAY_WRITE; 1656 Loop *L = LI.getLoopFor(Inst->getParent()); 1657 for (const auto &Arg : CI->args()) { 1658 if (!Arg->getType()->isPointerTy()) 1659 continue; 1660 1661 const SCEV *ArgSCEV = SE.getSCEVAtScope(Arg, L); 1662 if (ArgSCEV->isZero()) 1663 continue; 1664 1665 if (auto *U = dyn_cast<SCEVUnknown>(ArgSCEV)) { 1666 if (isa<ConstantPointerNull>(U->getValue())) 1667 return true; 1668 } 1669 1670 auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); 1671 addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(), 1672 ArgBasePtr->getType(), false, {AF}, {nullptr}, CI); 1673 } 1674 return true; 1675 } 1676 1677 if (ME.onlyReadsMemory()) { 1678 GlobalReads.emplace_back(Stmt, CI); 1679 return true; 1680 } 1681 return false; 1682 } 1683 1684 bool ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) { 1685 // Memory builtins are not considered by this function. 1686 if (!Inst.isLoad() && !Inst.isStore()) 1687 return false; 1688 1689 Value *Address = Inst.getPointerOperand(); 1690 Value *Val = Inst.getValueOperand(); 1691 Type *ElementType = Val->getType(); 1692 enum MemoryAccess::AccessType AccType = 1693 isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; 1694 1695 const SCEV *AccessFunction = 1696 SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent())); 1697 const SCEVUnknown *BasePointer = 1698 dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); 1699 1700 assert(BasePointer && "Could not find base pointer"); 1701 AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer); 1702 1703 // Check if the access depends on a loop contained in a non-affine subregion. 1704 bool isVariantInNonAffineLoop = false; 1705 SetVector<const Loop *> Loops; 1706 findLoops(AccessFunction, Loops); 1707 for (const Loop *L : Loops) 1708 if (Stmt->contains(L)) { 1709 isVariantInNonAffineLoop = true; 1710 break; 1711 } 1712 1713 InvariantLoadsSetTy AccessILS; 1714 1715 Loop *SurroundingLoop = Stmt->getSurroundingLoop(); 1716 bool IsAffine = !isVariantInNonAffineLoop && 1717 isAffineExpr(&scop->getRegion(), SurroundingLoop, 1718 AccessFunction, SE, &AccessILS); 1719 1720 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); 1721 for (LoadInst *LInst : AccessILS) 1722 if (!ScopRIL.count(LInst)) 1723 IsAffine = false; 1724 1725 if (!IsAffine && AccType == MemoryAccess::MUST_WRITE) 1726 AccType = MemoryAccess::MAY_WRITE; 1727 1728 addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, 1729 IsAffine, {AccessFunction}, {nullptr}, Val); 1730 return true; 1731 } 1732 1733 void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) { 1734 if (buildAccessMemIntrinsic(Inst, Stmt)) 1735 return; 1736 1737 if (buildAccessCallInst(Inst, Stmt)) 1738 return; 1739 1740 if (buildAccessMultiDimFixed(Inst, Stmt)) 1741 return; 1742 1743 if (buildAccessMultiDimParam(Inst, Stmt)) 1744 return; 1745 1746 if (buildAccessSingleDim(Inst, Stmt)) 1747 return; 1748 1749 llvm_unreachable( 1750 "At least one of the buildAccess functions must handled this access, or " 1751 "ScopDetection should have rejected this SCoP"); 1752 } 1753 1754 void ScopBuilder::buildAccessFunctions() { 1755 for (auto &Stmt : *scop) { 1756 if (Stmt.isBlockStmt()) { 1757 buildAccessFunctions(&Stmt, *Stmt.getBasicBlock()); 1758 continue; 1759 } 1760 1761 Region *R = Stmt.getRegion(); 1762 for (BasicBlock *BB : R->blocks()) 1763 buildAccessFunctions(&Stmt, *BB, R); 1764 } 1765 1766 // Build write accesses for values that are used after the SCoP. 1767 // The instructions defining them might be synthesizable and therefore not 1768 // contained in any statement, hence we iterate over the original instructions 1769 // to identify all escaping values. 1770 for (BasicBlock *BB : scop->getRegion().blocks()) { 1771 for (Instruction &Inst : *BB) 1772 buildEscapingDependences(&Inst); 1773 } 1774 } 1775 1776 bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) { 1777 return !Inst->isTerminator() && !isIgnoredIntrinsic(Inst) && 1778 !canSynthesize(Inst, *scop, &SE, L); 1779 } 1780 1781 /// Generate a name for a statement. 1782 /// 1783 /// @param BB The basic block the statement will represent. 1784 /// @param BBIdx The index of the @p BB relative to other BBs/regions. 1785 /// @param Count The index of the created statement in @p BB. 1786 /// @param IsMain Whether this is the main of all statement for @p BB. If true, 1787 /// no suffix will be added. 1788 /// @param IsLast Uses a special indicator for the last statement of a BB. 1789 static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count, 1790 bool IsMain, bool IsLast = false) { 1791 std::string Suffix; 1792 if (!IsMain) { 1793 if (UseInstructionNames) 1794 Suffix = '_'; 1795 if (IsLast) 1796 Suffix += "last"; 1797 else if (Count < 26) 1798 Suffix += 'a' + Count; 1799 else 1800 Suffix += std::to_string(Count); 1801 } 1802 return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames); 1803 } 1804 1805 /// Generate a name for a statement that represents a non-affine subregion. 1806 /// 1807 /// @param R The region the statement will represent. 1808 /// @param RIdx The index of the @p R relative to other BBs/regions. 1809 static std::string makeStmtName(Region *R, long RIdx) { 1810 return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "", 1811 UseInstructionNames); 1812 } 1813 1814 void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) { 1815 Loop *SurroundingLoop = LI.getLoopFor(BB); 1816 1817 int Count = 0; 1818 long BBIdx = scop->getNextStmtIdx(); 1819 std::vector<Instruction *> Instructions; 1820 for (Instruction &Inst : *BB) { 1821 if (shouldModelInst(&Inst, SurroundingLoop)) 1822 Instructions.push_back(&Inst); 1823 if (Inst.getMetadata("polly_split_after") || 1824 (SplitOnStore && isa<StoreInst>(Inst))) { 1825 std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0); 1826 scop->addScopStmt(BB, Name, SurroundingLoop, Instructions); 1827 Count++; 1828 Instructions.clear(); 1829 } 1830 } 1831 1832 std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0); 1833 scop->addScopStmt(BB, Name, SurroundingLoop, Instructions); 1834 } 1835 1836 /// Is @p Inst an ordered instruction? 1837 /// 1838 /// An unordered instruction is an instruction, such that a sequence of 1839 /// unordered instructions can be permuted without changing semantics. Any 1840 /// instruction for which this is not always the case is ordered. 1841 static bool isOrderedInstruction(Instruction *Inst) { 1842 return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory(); 1843 } 1844 1845 /// Join instructions to the same statement if one uses the scalar result of the 1846 /// other. 1847 static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind, 1848 ArrayRef<Instruction *> ModeledInsts) { 1849 for (Instruction *Inst : ModeledInsts) { 1850 if (isa<PHINode>(Inst)) 1851 continue; 1852 1853 for (Use &Op : Inst->operands()) { 1854 Instruction *OpInst = dyn_cast<Instruction>(Op.get()); 1855 if (!OpInst) 1856 continue; 1857 1858 // Check if OpInst is in the BB and is a modeled instruction. 1859 auto OpVal = UnionFind.findValue(OpInst); 1860 if (OpVal == UnionFind.end()) 1861 continue; 1862 1863 UnionFind.unionSets(Inst, OpInst); 1864 } 1865 } 1866 } 1867 1868 /// Ensure that the order of ordered instructions does not change. 1869 /// 1870 /// If we encounter an ordered instruction enclosed in instructions belonging to 1871 /// a different statement (which might as well contain ordered instructions, but 1872 /// this is not tested here), join them. 1873 static void 1874 joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind, 1875 ArrayRef<Instruction *> ModeledInsts) { 1876 SetVector<Instruction *> SeenLeaders; 1877 for (Instruction *Inst : ModeledInsts) { 1878 if (!isOrderedInstruction(Inst)) 1879 continue; 1880 1881 Instruction *Leader = UnionFind.getLeaderValue(Inst); 1882 // Since previous iterations might have merged sets, some items in 1883 // SeenLeaders are not leaders anymore. However, The new leader of 1884 // previously merged instructions must be one of the former leaders of 1885 // these merged instructions. 1886 bool Inserted = SeenLeaders.insert(Leader); 1887 if (Inserted) 1888 continue; 1889 1890 // Merge statements to close holes. Say, we have already seen statements A 1891 // and B, in this order. Then we see an instruction of A again and we would 1892 // see the pattern "A B A". This function joins all statements until the 1893 // only seen occurrence of A. 1894 for (Instruction *Prev : reverse(SeenLeaders)) { 1895 // We are backtracking from the last element until we see Inst's leader 1896 // in SeenLeaders and merge all into one set. Although leaders of 1897 // instructions change during the execution of this loop, it's irrelevant 1898 // as we are just searching for the element that we already confirmed is 1899 // in the list. 1900 if (Prev == Leader) 1901 break; 1902 UnionFind.unionSets(Prev, Leader); 1903 } 1904 } 1905 } 1906 1907 /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for 1908 /// the incoming values from this block are executed after the PHI READ. 1909 /// 1910 /// Otherwise it could overwrite the incoming value from before the BB with the 1911 /// value for the next execution. This can happen if the PHI WRITE is added to 1912 /// the statement with the instruction that defines the incoming value (instead 1913 /// of the last statement of the same BB). To ensure that the PHI READ and WRITE 1914 /// are in order, we put both into the statement. PHI WRITEs are always executed 1915 /// after PHI READs when they are in the same statement. 1916 /// 1917 /// TODO: This is an overpessimization. We only have to ensure that the PHI 1918 /// WRITE is not put into a statement containing the PHI itself. That could also 1919 /// be done by 1920 /// - having all (strongly connected) PHIs in a single statement, 1921 /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only 1922 /// has a chance of being lifted before a PHI by being in a statement with a 1923 /// PHI that comes before in the basic block), or 1924 /// - when uniting statements, ensure that no (relevant) PHIs are overtaken. 1925 static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind, 1926 ArrayRef<Instruction *> ModeledInsts) { 1927 for (Instruction *Inst : ModeledInsts) { 1928 PHINode *PHI = dyn_cast<PHINode>(Inst); 1929 if (!PHI) 1930 continue; 1931 1932 int Idx = PHI->getBasicBlockIndex(PHI->getParent()); 1933 if (Idx < 0) 1934 continue; 1935 1936 Instruction *IncomingVal = 1937 dyn_cast<Instruction>(PHI->getIncomingValue(Idx)); 1938 if (!IncomingVal) 1939 continue; 1940 1941 UnionFind.unionSets(PHI, IncomingVal); 1942 } 1943 } 1944 1945 void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) { 1946 Loop *L = LI.getLoopFor(BB); 1947 1948 // Extracting out modeled instructions saves us from checking 1949 // shouldModelInst() repeatedly. 1950 SmallVector<Instruction *, 32> ModeledInsts; 1951 EquivalenceClasses<Instruction *> UnionFind; 1952 Instruction *MainInst = nullptr, *MainLeader = nullptr; 1953 for (Instruction &Inst : *BB) { 1954 if (!shouldModelInst(&Inst, L)) 1955 continue; 1956 ModeledInsts.push_back(&Inst); 1957 UnionFind.insert(&Inst); 1958 1959 // When a BB is split into multiple statements, the main statement is the 1960 // one containing the 'main' instruction. We select the first instruction 1961 // that is unlikely to be removed (because it has side-effects) as the main 1962 // one. It is used to ensure that at least one statement from the bb has the 1963 // same name as with -polly-stmt-granularity=bb. 1964 if (!MainInst && (isa<StoreInst>(Inst) || 1965 (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst)))) 1966 MainInst = &Inst; 1967 } 1968 1969 joinOperandTree(UnionFind, ModeledInsts); 1970 joinOrderedInstructions(UnionFind, ModeledInsts); 1971 joinOrderedPHIs(UnionFind, ModeledInsts); 1972 1973 // The list of instructions for statement (statement represented by the leader 1974 // instruction). 1975 MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList; 1976 1977 // The order of statements must be preserved w.r.t. their ordered 1978 // instructions. Without this explicit scan, we would also use non-ordered 1979 // instructions (whose order is arbitrary) to determine statement order. 1980 for (Instruction *Inst : ModeledInsts) { 1981 if (!isOrderedInstruction(Inst)) 1982 continue; 1983 1984 auto LeaderIt = UnionFind.findLeader(Inst); 1985 if (LeaderIt == UnionFind.member_end()) 1986 continue; 1987 1988 // Insert element for the leader instruction. 1989 (void)LeaderToInstList[*LeaderIt]; 1990 } 1991 1992 // Collect the instructions of all leaders. UnionFind's member iterator 1993 // unfortunately are not in any specific order. 1994 for (Instruction *Inst : ModeledInsts) { 1995 auto LeaderIt = UnionFind.findLeader(Inst); 1996 if (LeaderIt == UnionFind.member_end()) 1997 continue; 1998 1999 if (Inst == MainInst) 2000 MainLeader = *LeaderIt; 2001 std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt]; 2002 InstList.push_back(Inst); 2003 } 2004 2005 // Finally build the statements. 2006 int Count = 0; 2007 long BBIdx = scop->getNextStmtIdx(); 2008 for (auto &Instructions : LeaderToInstList) { 2009 std::vector<Instruction *> &InstList = Instructions.second; 2010 2011 // If there is no main instruction, make the first statement the main. 2012 bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0); 2013 2014 std::string Name = makeStmtName(BB, BBIdx, Count, IsMain); 2015 scop->addScopStmt(BB, Name, L, std::move(InstList)); 2016 Count += 1; 2017 } 2018 2019 // Unconditionally add an epilogue (last statement). It contains no 2020 // instructions, but holds the PHI write accesses for successor basic blocks, 2021 // if the incoming value is not defined in another statement if the same BB. 2022 // The epilogue becomes the main statement only if there is no other 2023 // statement that could become main. 2024 // The epilogue will be removed if no PHIWrite is added to it. 2025 std::string EpilogueName = makeStmtName(BB, BBIdx, Count, Count == 0, true); 2026 scop->addScopStmt(BB, EpilogueName, L, {}); 2027 } 2028 2029 void ScopBuilder::buildStmts(Region &SR) { 2030 if (scop->isNonAffineSubRegion(&SR)) { 2031 std::vector<Instruction *> Instructions; 2032 Loop *SurroundingLoop = 2033 getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops()); 2034 for (Instruction &Inst : *SR.getEntry()) 2035 if (shouldModelInst(&Inst, SurroundingLoop)) 2036 Instructions.push_back(&Inst); 2037 long RIdx = scop->getNextStmtIdx(); 2038 std::string Name = makeStmtName(&SR, RIdx); 2039 scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions); 2040 return; 2041 } 2042 2043 for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I) 2044 if (I->isSubRegion()) 2045 buildStmts(*I->getNodeAs<Region>()); 2046 else { 2047 BasicBlock *BB = I->getNodeAs<BasicBlock>(); 2048 switch (StmtGranularity) { 2049 case GranularityChoice::BasicBlocks: 2050 buildSequentialBlockStmts(BB); 2051 break; 2052 case GranularityChoice::ScalarIndependence: 2053 buildEqivClassBlockStmts(BB); 2054 break; 2055 case GranularityChoice::Stores: 2056 buildSequentialBlockStmts(BB, true); 2057 break; 2058 } 2059 } 2060 } 2061 2062 void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB, 2063 Region *NonAffineSubRegion) { 2064 assert( 2065 Stmt && 2066 "The exit BB is the only one that cannot be represented by a statement"); 2067 assert(Stmt->represents(&BB)); 2068 2069 // We do not build access functions for error blocks, as they may contain 2070 // instructions we can not model. 2071 if (SD.isErrorBlock(BB, scop->getRegion())) 2072 return; 2073 2074 auto BuildAccessesForInst = [this, Stmt, 2075 NonAffineSubRegion](Instruction *Inst) { 2076 PHINode *PHI = dyn_cast<PHINode>(Inst); 2077 if (PHI) 2078 buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false); 2079 2080 if (auto MemInst = MemAccInst::dyn_cast(*Inst)) { 2081 assert(Stmt && "Cannot build access function in non-existing statement"); 2082 buildMemoryAccess(MemInst, Stmt); 2083 } 2084 2085 // PHI nodes have already been modeled above and terminators that are 2086 // not part of a non-affine subregion are fully modeled and regenerated 2087 // from the polyhedral domains. Hence, they do not need to be modeled as 2088 // explicit data dependences. 2089 if (!PHI) 2090 buildScalarDependences(Stmt, Inst); 2091 }; 2092 2093 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads(); 2094 bool IsEntryBlock = (Stmt->getEntryBlock() == &BB); 2095 if (IsEntryBlock) { 2096 for (Instruction *Inst : Stmt->getInstructions()) 2097 BuildAccessesForInst(Inst); 2098 if (Stmt->isRegionStmt()) 2099 BuildAccessesForInst(BB.getTerminator()); 2100 } else { 2101 for (Instruction &Inst : BB) { 2102 if (isIgnoredIntrinsic(&Inst)) 2103 continue; 2104 2105 // Invariant loads already have been processed. 2106 if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst))) 2107 continue; 2108 2109 BuildAccessesForInst(&Inst); 2110 } 2111 } 2112 } 2113 2114 MemoryAccess *ScopBuilder::addMemoryAccess( 2115 ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType, 2116 Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue, 2117 ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes, 2118 MemoryKind Kind) { 2119 bool isKnownMustAccess = false; 2120 2121 // Accesses in single-basic block statements are always executed. 2122 if (Stmt->isBlockStmt()) 2123 isKnownMustAccess = true; 2124 2125 if (Stmt->isRegionStmt()) { 2126 // Accesses that dominate the exit block of a non-affine region are always 2127 // executed. In non-affine regions there may exist MemoryKind::Values that 2128 // do not dominate the exit. MemoryKind::Values will always dominate the 2129 // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the 2130 // non-affine region. 2131 if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit())) 2132 isKnownMustAccess = true; 2133 } 2134 2135 // Non-affine PHI writes do not "happen" at a particular instruction, but 2136 // after exiting the statement. Therefore they are guaranteed to execute and 2137 // overwrite the old value. 2138 if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI) 2139 isKnownMustAccess = true; 2140 2141 if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE) 2142 AccType = MemoryAccess::MAY_WRITE; 2143 2144 auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType, 2145 Affine, Subscripts, Sizes, AccessValue, Kind); 2146 2147 scop->addAccessFunction(Access); 2148 Stmt->addAccess(Access); 2149 return Access; 2150 } 2151 2152 void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst, 2153 MemoryAccess::AccessType AccType, 2154 Value *BaseAddress, Type *ElementType, 2155 bool IsAffine, 2156 ArrayRef<const SCEV *> Subscripts, 2157 ArrayRef<const SCEV *> Sizes, 2158 Value *AccessValue) { 2159 ArrayBasePointers.insert(BaseAddress); 2160 addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress, ElementType, IsAffine, 2161 AccessValue, Subscripts, Sizes, MemoryKind::Array); 2162 } 2163 2164 /// Check if @p Expr is divisible by @p Size. 2165 static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) { 2166 assert(Size != 0); 2167 if (Size == 1) 2168 return true; 2169 2170 // Only one factor needs to be divisible. 2171 if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Expr)) { 2172 for (const SCEV *FactorExpr : MulExpr->operands()) 2173 if (isDivisible(FactorExpr, Size, SE)) 2174 return true; 2175 return false; 2176 } 2177 2178 // For other n-ary expressions (Add, AddRec, Max,...) all operands need 2179 // to be divisible. 2180 if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Expr)) { 2181 for (const SCEV *OpExpr : NAryExpr->operands()) 2182 if (!isDivisible(OpExpr, Size, SE)) 2183 return false; 2184 return true; 2185 } 2186 2187 const SCEV *SizeSCEV = SE.getConstant(Expr->getType(), Size); 2188 const SCEV *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV); 2189 const SCEV *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV); 2190 return MulSCEV == Expr; 2191 } 2192 2193 void ScopBuilder::foldSizeConstantsToRight() { 2194 isl::union_set Accessed = scop->getAccesses().range(); 2195 2196 for (auto Array : scop->arrays()) { 2197 if (Array->getNumberOfDimensions() <= 1) 2198 continue; 2199 2200 isl::space Space = Array->getSpace(); 2201 Space = Space.align_params(Accessed.get_space()); 2202 2203 if (!Accessed.contains(Space)) 2204 continue; 2205 2206 isl::set Elements = Accessed.extract_set(Space); 2207 isl::map Transform = isl::map::universe(Array->getSpace().map_from_set()); 2208 2209 std::vector<int> Int; 2210 unsigned Dims = unsignedFromIslSize(Elements.tuple_dim()); 2211 for (unsigned i = 0; i < Dims; i++) { 2212 isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i); 2213 DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1); 2214 DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0); 2215 2216 isl::basic_set DimHull = DimOnly.affine_hull(); 2217 2218 if (i == Dims - 1) { 2219 Int.push_back(1); 2220 Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i); 2221 continue; 2222 } 2223 2224 if (unsignedFromIslSize(DimHull.dim(isl::dim::div)) == 1) { 2225 isl::aff Diff = DimHull.get_div(0); 2226 isl::val Val = Diff.get_denominator_val(); 2227 2228 int ValInt = 1; 2229 if (Val.is_int()) { 2230 auto ValAPInt = APIntFromVal(Val); 2231 if (ValAPInt.isSignedIntN(32)) 2232 ValInt = ValAPInt.getSExtValue(); 2233 } else { 2234 } 2235 2236 Int.push_back(ValInt); 2237 isl::constraint C = isl::constraint::alloc_equality( 2238 isl::local_space(Transform.get_space())); 2239 C = C.set_coefficient_si(isl::dim::out, i, ValInt); 2240 C = C.set_coefficient_si(isl::dim::in, i, -1); 2241 Transform = Transform.add_constraint(C); 2242 continue; 2243 } 2244 2245 isl::basic_set ZeroSet = isl::basic_set(DimHull); 2246 ZeroSet = ZeroSet.fix_si(isl::dim::set, 0, 0); 2247 2248 int ValInt = 1; 2249 if (ZeroSet.is_equal(DimHull)) { 2250 ValInt = 0; 2251 } 2252 2253 Int.push_back(ValInt); 2254 Transform = Transform.equate(isl::dim::in, i, isl::dim::out, i); 2255 } 2256 2257 isl::set MappedElements = isl::map(Transform).domain(); 2258 if (!Elements.is_subset(MappedElements)) 2259 continue; 2260 2261 bool CanFold = true; 2262 if (Int[0] <= 1) 2263 CanFold = false; 2264 2265 unsigned NumDims = Array->getNumberOfDimensions(); 2266 for (unsigned i = 1; i < NumDims - 1; i++) 2267 if (Int[0] != Int[i] && Int[i]) 2268 CanFold = false; 2269 2270 if (!CanFold) 2271 continue; 2272 2273 for (auto &Access : scop->access_functions()) 2274 if (Access->getScopArrayInfo() == Array) 2275 Access->setAccessRelation( 2276 Access->getAccessRelation().apply_range(Transform)); 2277 2278 std::vector<const SCEV *> Sizes; 2279 for (unsigned i = 0; i < NumDims; i++) { 2280 auto Size = Array->getDimensionSize(i); 2281 2282 if (i == NumDims - 1) 2283 Size = SE.getMulExpr(Size, SE.getConstant(Size->getType(), Int[0])); 2284 Sizes.push_back(Size); 2285 } 2286 2287 Array->updateSizes(Sizes, false /* CheckConsistency */); 2288 } 2289 } 2290 2291 void ScopBuilder::finalizeAccesses() { 2292 updateAccessDimensionality(); 2293 foldSizeConstantsToRight(); 2294 foldAccessRelations(); 2295 assumeNoOutOfBounds(); 2296 } 2297 2298 void ScopBuilder::updateAccessDimensionality() { 2299 // Check all array accesses for each base pointer and find a (virtual) element 2300 // size for the base pointer that divides all access functions. 2301 for (ScopStmt &Stmt : *scop) 2302 for (MemoryAccess *Access : Stmt) { 2303 if (!Access->isArrayKind()) 2304 continue; 2305 ScopArrayInfo *Array = 2306 const_cast<ScopArrayInfo *>(Access->getScopArrayInfo()); 2307 2308 if (Array->getNumberOfDimensions() != 1) 2309 continue; 2310 unsigned DivisibleSize = Array->getElemSizeInBytes(); 2311 const SCEV *Subscript = Access->getSubscript(0); 2312 while (!isDivisible(Subscript, DivisibleSize, SE)) 2313 DivisibleSize /= 2; 2314 auto *Ty = IntegerType::get(SE.getContext(), DivisibleSize * 8); 2315 Array->updateElementType(Ty); 2316 } 2317 2318 for (auto &Stmt : *scop) 2319 for (auto &Access : Stmt) 2320 Access->updateDimensionality(); 2321 } 2322 2323 void ScopBuilder::foldAccessRelations() { 2324 for (auto &Stmt : *scop) 2325 for (auto &Access : Stmt) 2326 Access->foldAccessRelation(); 2327 } 2328 2329 void ScopBuilder::assumeNoOutOfBounds() { 2330 if (PollyIgnoreInbounds) 2331 return; 2332 for (auto &Stmt : *scop) 2333 for (auto &Access : Stmt) { 2334 isl::set Outside = Access->assumeNoOutOfBound(); 2335 const auto &Loc = Access->getAccessInstruction() 2336 ? Access->getAccessInstruction()->getDebugLoc() 2337 : DebugLoc(); 2338 recordAssumption(&RecordedAssumptions, INBOUNDS, Outside, Loc, 2339 AS_ASSUMPTION); 2340 } 2341 } 2342 2343 void ScopBuilder::ensureValueWrite(Instruction *Inst) { 2344 // Find the statement that defines the value of Inst. That statement has to 2345 // write the value to make it available to those statements that read it. 2346 ScopStmt *Stmt = scop->getStmtFor(Inst); 2347 2348 // It is possible that the value is synthesizable within a loop (such that it 2349 // is not part of any statement), but not after the loop (where you need the 2350 // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will 2351 // avoid this. In case the IR has no such PHI, use the last statement (where 2352 // the value is synthesizable) to write the value. 2353 if (!Stmt) 2354 Stmt = scop->getLastStmtFor(Inst->getParent()); 2355 2356 // Inst not defined within this SCoP. 2357 if (!Stmt) 2358 return; 2359 2360 // Do not process further if the instruction is already written. 2361 if (Stmt->lookupValueWriteOf(Inst)) 2362 return; 2363 2364 addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(), 2365 true, Inst, ArrayRef<const SCEV *>(), 2366 ArrayRef<const SCEV *>(), MemoryKind::Value); 2367 } 2368 2369 void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) { 2370 // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality 2371 // to be able to replace this one. Currently, there is a split responsibility. 2372 // In a first step, the MemoryAccess is created, but without the 2373 // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the 2374 // AccessRelation is created. At least for scalar accesses, there is no new 2375 // information available at ScopStmt::buildAccessRelations(), so we could 2376 // create the AccessRelation right away. This is what 2377 // ScopStmt::ensureValueRead(Value*) does. 2378 2379 auto *Scope = UserStmt->getSurroundingLoop(); 2380 auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false); 2381 switch (VUse.getKind()) { 2382 case VirtualUse::Constant: 2383 case VirtualUse::Block: 2384 case VirtualUse::Synthesizable: 2385 case VirtualUse::Hoisted: 2386 case VirtualUse::Intra: 2387 // Uses of these kinds do not need a MemoryAccess. 2388 break; 2389 2390 case VirtualUse::ReadOnly: 2391 // Add MemoryAccess for invariant values only if requested. 2392 if (!ModelReadOnlyScalars) 2393 break; 2394 2395 [[fallthrough]]; 2396 case VirtualUse::Inter: 2397 2398 // Do not create another MemoryAccess for reloading the value if one already 2399 // exists. 2400 if (UserStmt->lookupValueReadOf(V)) 2401 break; 2402 2403 addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(), 2404 true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(), 2405 MemoryKind::Value); 2406 2407 // Inter-statement uses need to write the value in their defining statement. 2408 if (VUse.isInter()) 2409 ensureValueWrite(cast<Instruction>(V)); 2410 break; 2411 } 2412 } 2413 2414 void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt, 2415 BasicBlock *IncomingBlock, 2416 Value *IncomingValue, bool IsExitBlock) { 2417 // As the incoming block might turn out to be an error statement ensure we 2418 // will create an exit PHI SAI object. It is needed during code generation 2419 // and would be created later anyway. 2420 if (IsExitBlock) 2421 scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {}, 2422 MemoryKind::ExitPHI); 2423 2424 // This is possible if PHI is in the SCoP's entry block. The incoming blocks 2425 // from outside the SCoP's region have no statement representation. 2426 if (!IncomingStmt) 2427 return; 2428 2429 // Take care for the incoming value being available in the incoming block. 2430 // This must be done before the check for multiple PHI writes because multiple 2431 // exiting edges from subregion each can be the effective written value of the 2432 // subregion. As such, all of them must be made available in the subregion 2433 // statement. 2434 ensureValueRead(IncomingValue, IncomingStmt); 2435 2436 // Do not add more than one MemoryAccess per PHINode and ScopStmt. 2437 if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) { 2438 assert(Acc->getAccessInstruction() == PHI); 2439 Acc->addIncoming(IncomingBlock, IncomingValue); 2440 return; 2441 } 2442 2443 MemoryAccess *Acc = addMemoryAccess( 2444 IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true, 2445 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(), 2446 IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI); 2447 assert(Acc); 2448 Acc->addIncoming(IncomingBlock, IncomingValue); 2449 } 2450 2451 void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) { 2452 addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true, 2453 PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(), 2454 MemoryKind::PHI); 2455 } 2456 2457 void ScopBuilder::buildDomain(ScopStmt &Stmt) { 2458 isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt); 2459 2460 Stmt.Domain = scop->getDomainConditions(&Stmt); 2461 Stmt.Domain = Stmt.Domain.set_tuple_id(Id); 2462 } 2463 2464 void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) { 2465 isl::set Domain = Stmt.getDomain(); 2466 BasicBlock *BB = Stmt.getEntryBlock(); 2467 2468 Loop *L = LI.getLoopFor(BB); 2469 2470 while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L)) 2471 L = L->getParentLoop(); 2472 2473 SmallVector<llvm::Loop *, 8> Loops; 2474 2475 while (L && Stmt.getParent()->getRegion().contains(L)) { 2476 Loops.push_back(L); 2477 L = L->getParentLoop(); 2478 } 2479 2480 Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend()); 2481 } 2482 2483 /// Return the reduction type for a given binary operator. 2484 static MemoryAccess::ReductionType 2485 getReductionType(const BinaryOperator *BinOp) { 2486 if (!BinOp) 2487 return MemoryAccess::RT_NONE; 2488 switch (BinOp->getOpcode()) { 2489 case Instruction::FAdd: 2490 if (!BinOp->isFast()) 2491 return MemoryAccess::RT_NONE; 2492 [[fallthrough]]; 2493 case Instruction::Add: 2494 return MemoryAccess::RT_ADD; 2495 case Instruction::Or: 2496 return MemoryAccess::RT_BOR; 2497 case Instruction::Xor: 2498 return MemoryAccess::RT_BXOR; 2499 case Instruction::And: 2500 return MemoryAccess::RT_BAND; 2501 case Instruction::FMul: 2502 if (!BinOp->isFast()) 2503 return MemoryAccess::RT_NONE; 2504 [[fallthrough]]; 2505 case Instruction::Mul: 2506 if (DisableMultiplicativeReductions) 2507 return MemoryAccess::RT_NONE; 2508 return MemoryAccess::RT_MUL; 2509 default: 2510 return MemoryAccess::RT_NONE; 2511 } 2512 } 2513 2514 /// @brief Combine two reduction types 2515 static MemoryAccess::ReductionType 2516 combineReductionType(MemoryAccess::ReductionType RT0, 2517 MemoryAccess::ReductionType RT1) { 2518 if (RT0 == MemoryAccess::RT_BOTTOM) 2519 return RT1; 2520 if (RT0 == RT1) 2521 return RT1; 2522 return MemoryAccess::RT_NONE; 2523 } 2524 2525 /// True if @p AllAccs intersects with @p MemAccs except @p LoadMA and @p 2526 /// StoreMA 2527 bool hasIntersectingAccesses(isl::set AllAccs, MemoryAccess *LoadMA, 2528 MemoryAccess *StoreMA, isl::set Domain, 2529 SmallVector<MemoryAccess *, 8> &MemAccs) { 2530 bool HasIntersectingAccs = false; 2531 auto AllAccsNoParams = AllAccs.project_out_all_params(); 2532 2533 for (MemoryAccess *MA : MemAccs) { 2534 if (MA == LoadMA || MA == StoreMA) 2535 continue; 2536 auto AccRel = MA->getAccessRelation().intersect_domain(Domain); 2537 auto Accs = AccRel.range(); 2538 auto AccsNoParams = Accs.project_out_all_params(); 2539 2540 bool CompatibleSpace = AllAccsNoParams.has_equal_space(AccsNoParams); 2541 2542 if (CompatibleSpace) { 2543 auto OverlapAccs = Accs.intersect(AllAccs); 2544 bool DoesIntersect = !OverlapAccs.is_empty(); 2545 HasIntersectingAccs |= DoesIntersect; 2546 } 2547 } 2548 return HasIntersectingAccs; 2549 } 2550 2551 /// Test if the accesses of @p LoadMA and @p StoreMA can form a reduction 2552 bool checkCandidatePairAccesses(MemoryAccess *LoadMA, MemoryAccess *StoreMA, 2553 isl::set Domain, 2554 SmallVector<MemoryAccess *, 8> &MemAccs) { 2555 // First check if the base value is the same. 2556 isl::map LoadAccs = LoadMA->getAccessRelation(); 2557 isl::map StoreAccs = StoreMA->getAccessRelation(); 2558 bool Valid = LoadAccs.has_equal_space(StoreAccs); 2559 POLLY_DEBUG(dbgs() << " == The accessed space below is " 2560 << (Valid ? "" : "not ") << "equal!\n"); 2561 POLLY_DEBUG(LoadMA->dump(); StoreMA->dump()); 2562 2563 if (Valid) { 2564 // Then check if they actually access the same memory. 2565 isl::map R = isl::manage(LoadAccs.copy()) 2566 .intersect_domain(isl::manage(Domain.copy())); 2567 isl::map W = isl::manage(StoreAccs.copy()) 2568 .intersect_domain(isl::manage(Domain.copy())); 2569 isl::set RS = R.range(); 2570 isl::set WS = W.range(); 2571 2572 isl::set InterAccs = 2573 isl::manage(RS.copy()).intersect(isl::manage(WS.copy())); 2574 Valid = !InterAccs.is_empty(); 2575 POLLY_DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "" : "not ") 2576 << "overlapping!\n"); 2577 } 2578 2579 if (Valid) { 2580 // Finally, check if they are no other instructions accessing this memory 2581 isl::map AllAccsRel = LoadAccs.unite(StoreAccs); 2582 AllAccsRel = AllAccsRel.intersect_domain(Domain); 2583 isl::set AllAccs = AllAccsRel.range(); 2584 Valid = !hasIntersectingAccesses(AllAccs, LoadMA, StoreMA, Domain, MemAccs); 2585 POLLY_DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "not " : "") 2586 << "accessed by other instructions!\n"); 2587 } 2588 2589 return Valid; 2590 } 2591 2592 void ScopBuilder::checkForReductions(ScopStmt &Stmt) { 2593 // Perform a data flow analysis on the current scop statement to propagate the 2594 // uses of loaded values. Then check and mark the memory accesses which are 2595 // part of reduction like chains. 2596 // During the data flow analysis we use the State variable to keep track of 2597 // the used "load-instructions" for each instruction in the scop statement. 2598 // This includes the LLVM-IR of the load and the "number of uses" (or the 2599 // number of paths in the operand tree which end in this load). 2600 using StatePairTy = std::pair<unsigned, MemoryAccess::ReductionType>; 2601 using FlowInSetTy = MapVector<const LoadInst *, StatePairTy>; 2602 using StateTy = MapVector<const Instruction *, FlowInSetTy>; 2603 StateTy State; 2604 2605 // Invalid loads are loads which have uses we can't track properly in the 2606 // state map. This includes loads which: 2607 // o do not form a reduction when they flow into a memory location: 2608 // (e.g., A[i] = B[i] * 3 and A[i] = A[i] * A[i] + A[i]) 2609 // o are used by a non binary operator or one which is not commutative 2610 // and associative (e.g., A[i] = A[i] % 3) 2611 // o might change the control flow (e.g., if (A[i])) 2612 // o are used in indirect memory accesses (e.g., A[B[i]]) 2613 // o are used outside the current scop statement 2614 SmallPtrSet<const Instruction *, 8> InvalidLoads; 2615 SmallVector<BasicBlock *, 8> ScopBlocks; 2616 BasicBlock *BB = Stmt.getBasicBlock(); 2617 if (BB) 2618 ScopBlocks.push_back(BB); 2619 else 2620 for (BasicBlock *Block : Stmt.getRegion()->blocks()) 2621 ScopBlocks.push_back(Block); 2622 // Run the data flow analysis for all values in the scop statement 2623 for (BasicBlock *Block : ScopBlocks) { 2624 for (Instruction &Inst : *Block) { 2625 if ((Stmt.getParent())->getStmtFor(&Inst) != &Stmt) 2626 continue; 2627 bool UsedOutsideStmt = any_of(Inst.users(), [&Stmt](User *U) { 2628 return (Stmt.getParent())->getStmtFor(cast<Instruction>(U)) != &Stmt; 2629 }); 2630 // Treat loads and stores special 2631 if (auto *Load = dyn_cast<LoadInst>(&Inst)) { 2632 // Invalidate all loads used which feed into the address of this load. 2633 if (auto *Ptr = dyn_cast<Instruction>(Load->getPointerOperand())) { 2634 const auto &It = State.find(Ptr); 2635 if (It != State.end()) 2636 for (const auto &FlowInSetElem : It->second) 2637 InvalidLoads.insert(FlowInSetElem.first); 2638 } 2639 2640 // If this load is used outside this stmt, invalidate it. 2641 if (UsedOutsideStmt) 2642 InvalidLoads.insert(Load); 2643 2644 // And indicate that this load uses itself once but without specifying 2645 // any reduction operator. 2646 State[Load].insert( 2647 std::make_pair(Load, std::make_pair(1, MemoryAccess::RT_BOTTOM))); 2648 continue; 2649 } 2650 2651 if (auto *Store = dyn_cast<StoreInst>(&Inst)) { 2652 // Invalidate all loads which feed into the address of this store. 2653 if (const Instruction *Ptr = 2654 dyn_cast<Instruction>(Store->getPointerOperand())) { 2655 const auto &It = State.find(Ptr); 2656 if (It != State.end()) 2657 for (const auto &FlowInSetElem : It->second) 2658 InvalidLoads.insert(FlowInSetElem.first); 2659 } 2660 2661 // Propagate the uses of the value operand to the store 2662 if (auto *ValueInst = dyn_cast<Instruction>(Store->getValueOperand())) 2663 State.insert(std::make_pair(Store, State[ValueInst])); 2664 continue; 2665 } 2666 2667 // Non load and store instructions are either binary operators or they 2668 // will invalidate all used loads. 2669 auto *BinOp = dyn_cast<BinaryOperator>(&Inst); 2670 MemoryAccess::ReductionType CurRedType = getReductionType(BinOp); 2671 POLLY_DEBUG(dbgs() << "CurInst: " << Inst << " RT: " << CurRedType 2672 << "\n"); 2673 2674 // Iterate over all operands and propagate their input loads to 2675 // instruction. 2676 FlowInSetTy &InstInFlowSet = State[&Inst]; 2677 for (Use &Op : Inst.operands()) { 2678 auto *OpInst = dyn_cast<Instruction>(Op); 2679 if (!OpInst) 2680 continue; 2681 2682 POLLY_DEBUG(dbgs().indent(4) << "Op Inst: " << *OpInst << "\n"); 2683 const StateTy::iterator &OpInFlowSetIt = State.find(OpInst); 2684 if (OpInFlowSetIt == State.end()) 2685 continue; 2686 2687 // Iterate over all the input loads of the operand and combine them 2688 // with the input loads of current instruction. 2689 FlowInSetTy &OpInFlowSet = OpInFlowSetIt->second; 2690 for (auto &OpInFlowPair : OpInFlowSet) { 2691 unsigned OpFlowIn = OpInFlowPair.second.first; 2692 unsigned InstFlowIn = InstInFlowSet[OpInFlowPair.first].first; 2693 2694 MemoryAccess::ReductionType OpRedType = OpInFlowPair.second.second; 2695 MemoryAccess::ReductionType InstRedType = 2696 InstInFlowSet[OpInFlowPair.first].second; 2697 2698 MemoryAccess::ReductionType NewRedType = 2699 combineReductionType(OpRedType, CurRedType); 2700 if (InstFlowIn) 2701 NewRedType = combineReductionType(NewRedType, InstRedType); 2702 2703 POLLY_DEBUG(dbgs().indent(8) << "OpRedType: " << OpRedType << "\n"); 2704 POLLY_DEBUG(dbgs().indent(8) << "NewRedType: " << NewRedType << "\n"); 2705 InstInFlowSet[OpInFlowPair.first] = 2706 std::make_pair(OpFlowIn + InstFlowIn, NewRedType); 2707 } 2708 } 2709 2710 // If this operation is used outside the stmt, invalidate all the loads 2711 // which feed into it. 2712 if (UsedOutsideStmt) 2713 for (const auto &FlowInSetElem : InstInFlowSet) 2714 InvalidLoads.insert(FlowInSetElem.first); 2715 } 2716 } 2717 2718 // All used loads are propagated through the whole basic block; now try to 2719 // find valid reduction-like candidate pairs. These load-store pairs fulfill 2720 // all reduction like properties with regards to only this load-store chain. 2721 // We later have to check if the loaded value was invalidated by an 2722 // instruction not in that chain. 2723 using MemAccPair = std::pair<MemoryAccess *, MemoryAccess *>; 2724 DenseMap<MemAccPair, MemoryAccess::ReductionType> ValidCandidates; 2725 2726 // Iterate over all write memory accesses and check the loads flowing into 2727 // it for reduction candidate pairs. 2728 for (MemoryAccess *WriteMA : Stmt.MemAccs) { 2729 if (WriteMA->isRead()) 2730 continue; 2731 StoreInst *St = dyn_cast<StoreInst>(WriteMA->getAccessInstruction()); 2732 if (!St) 2733 continue; 2734 assert(!St->isVolatile()); 2735 2736 FlowInSetTy &MaInFlowSet = State[WriteMA->getAccessInstruction()]; 2737 for (auto &MaInFlowSetElem : MaInFlowSet) { 2738 MemoryAccess *ReadMA = &Stmt.getArrayAccessFor(MaInFlowSetElem.first); 2739 assert(ReadMA && "Couldn't find memory access for incoming load!"); 2740 2741 POLLY_DEBUG(dbgs() << "'" << *ReadMA->getAccessInstruction() 2742 << "'\n\tflows into\n'" 2743 << *WriteMA->getAccessInstruction() << "'\n\t #" 2744 << MaInFlowSetElem.second.first << " times & RT: " 2745 << MaInFlowSetElem.second.second << "\n"); 2746 2747 MemoryAccess::ReductionType RT = MaInFlowSetElem.second.second; 2748 unsigned NumAllowableInFlow = 1; 2749 2750 // We allow the load to flow in exactly once for binary reductions 2751 bool Valid = (MaInFlowSetElem.second.first == NumAllowableInFlow); 2752 2753 // Check if we saw a valid chain of binary operators. 2754 Valid = Valid && RT != MemoryAccess::RT_BOTTOM; 2755 Valid = Valid && RT != MemoryAccess::RT_NONE; 2756 2757 // Then check if the memory accesses allow a reduction. 2758 Valid = Valid && checkCandidatePairAccesses( 2759 ReadMA, WriteMA, Stmt.getDomain(), Stmt.MemAccs); 2760 2761 // Finally, mark the pair as a candidate or the load as a invalid one. 2762 if (Valid) 2763 ValidCandidates[std::make_pair(ReadMA, WriteMA)] = RT; 2764 else 2765 InvalidLoads.insert(ReadMA->getAccessInstruction()); 2766 } 2767 } 2768 2769 // In the last step mark the memory accesses of candidate pairs as reduction 2770 // like if the load wasn't marked invalid in the previous step. 2771 for (auto &CandidatePair : ValidCandidates) { 2772 MemoryAccess *LoadMA = CandidatePair.first.first; 2773 if (InvalidLoads.count(LoadMA->getAccessInstruction())) 2774 continue; 2775 POLLY_DEBUG( 2776 dbgs() << " Load :: " 2777 << *((CandidatePair.first.first)->getAccessInstruction()) 2778 << "\n Store :: " 2779 << *((CandidatePair.first.second)->getAccessInstruction()) 2780 << "\n are marked as reduction like\n"); 2781 MemoryAccess::ReductionType RT = CandidatePair.second; 2782 CandidatePair.first.first->markAsReductionLike(RT); 2783 CandidatePair.first.second->markAsReductionLike(RT); 2784 } 2785 } 2786 2787 void ScopBuilder::verifyInvariantLoads() { 2788 auto &RIL = scop->getRequiredInvariantLoads(); 2789 for (LoadInst *LI : RIL) { 2790 assert(LI && scop->contains(LI)); 2791 // If there exists a statement in the scop which has a memory access for 2792 // @p LI, then mark this scop as infeasible for optimization. 2793 for (ScopStmt &Stmt : *scop) 2794 if (Stmt.getArrayAccessOrNULLFor(LI)) { 2795 scop->invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent()); 2796 return; 2797 } 2798 } 2799 } 2800 2801 void ScopBuilder::hoistInvariantLoads() { 2802 if (!PollyInvariantLoadHoisting) 2803 return; 2804 2805 isl::union_map Writes = scop->getWrites(); 2806 for (ScopStmt &Stmt : *scop) { 2807 InvariantAccessesTy InvariantAccesses; 2808 2809 for (MemoryAccess *Access : Stmt) { 2810 isl::set NHCtx = getNonHoistableCtx(Access, Writes); 2811 if (!NHCtx.is_null()) 2812 InvariantAccesses.push_back({Access, NHCtx}); 2813 } 2814 2815 // Transfer the memory access from the statement to the SCoP. 2816 for (auto InvMA : InvariantAccesses) 2817 Stmt.removeMemoryAccess(InvMA.MA); 2818 addInvariantLoads(Stmt, InvariantAccesses); 2819 } 2820 } 2821 2822 /// Check if an access range is too complex. 2823 /// 2824 /// An access range is too complex, if it contains either many disjuncts or 2825 /// very complex expressions. As a simple heuristic, we assume if a set to 2826 /// be too complex if the sum of existentially quantified dimensions and 2827 /// set dimensions is larger than a threshold. This reliably detects both 2828 /// sets with many disjuncts as well as sets with many divisions as they 2829 /// arise in h264. 2830 /// 2831 /// @param AccessRange The range to check for complexity. 2832 /// 2833 /// @returns True if the access range is too complex. 2834 static bool isAccessRangeTooComplex(isl::set AccessRange) { 2835 unsigned NumTotalDims = 0; 2836 2837 for (isl::basic_set BSet : AccessRange.get_basic_set_list()) { 2838 NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::div)); 2839 NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::set)); 2840 } 2841 2842 if (NumTotalDims > MaxDimensionsInAccessRange) 2843 return true; 2844 2845 return false; 2846 } 2847 2848 bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA, 2849 isl::union_map Writes) { 2850 if (auto *BasePtrMA = scop->lookupBasePtrAccess(MA)) { 2851 return getNonHoistableCtx(BasePtrMA, Writes).is_null(); 2852 } 2853 2854 Value *BaseAddr = MA->getOriginalBaseAddr(); 2855 if (auto *BasePtrInst = dyn_cast<Instruction>(BaseAddr)) 2856 if (!isa<LoadInst>(BasePtrInst)) 2857 return scop->contains(BasePtrInst); 2858 2859 return false; 2860 } 2861 2862 void ScopBuilder::addUserContext() { 2863 if (UserContextStr.empty()) 2864 return; 2865 2866 isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str()); 2867 isl::space Space = scop->getParamSpace(); 2868 isl::size SpaceParams = Space.dim(isl::dim::param); 2869 if (unsignedFromIslSize(SpaceParams) != 2870 unsignedFromIslSize(UserContext.dim(isl::dim::param))) { 2871 std::string SpaceStr = stringFromIslObj(Space, "null"); 2872 errs() << "Error: the context provided in -polly-context has not the same " 2873 << "number of dimensions than the computed context. Due to this " 2874 << "mismatch, the -polly-context option is ignored. Please provide " 2875 << "the context in the parameter space: " << SpaceStr << ".\n"; 2876 return; 2877 } 2878 2879 for (auto i : rangeIslSize(0, SpaceParams)) { 2880 std::string NameContext = 2881 scop->getContext().get_dim_name(isl::dim::param, i); 2882 std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i); 2883 2884 if (NameContext != NameUserContext) { 2885 std::string SpaceStr = stringFromIslObj(Space, "null"); 2886 errs() << "Error: the name of dimension " << i 2887 << " provided in -polly-context " 2888 << "is '" << NameUserContext << "', but the name in the computed " 2889 << "context is '" << NameContext 2890 << "'. Due to this name mismatch, " 2891 << "the -polly-context option is ignored. Please provide " 2892 << "the context in the parameter space: " << SpaceStr << ".\n"; 2893 return; 2894 } 2895 2896 UserContext = UserContext.set_dim_id(isl::dim::param, i, 2897 Space.get_dim_id(isl::dim::param, i)); 2898 } 2899 isl::set newContext = scop->getContext().intersect(UserContext); 2900 scop->setContext(newContext); 2901 } 2902 2903 isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access, 2904 isl::union_map Writes) { 2905 // TODO: Loads that are not loop carried, hence are in a statement with 2906 // zero iterators, are by construction invariant, though we 2907 // currently "hoist" them anyway. This is necessary because we allow 2908 // them to be treated as parameters (e.g., in conditions) and our code 2909 // generation would otherwise use the old value. 2910 2911 auto &Stmt = *Access->getStatement(); 2912 BasicBlock *BB = Stmt.getEntryBlock(); 2913 2914 if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() || 2915 Access->isMemoryIntrinsic()) 2916 return {}; 2917 2918 // Skip accesses that have an invariant base pointer which is defined but 2919 // not loaded inside the SCoP. This can happened e.g., if a readnone call 2920 // returns a pointer that is used as a base address. However, as we want 2921 // to hoist indirect pointers, we allow the base pointer to be defined in 2922 // the region if it is also a memory access. Each ScopArrayInfo object 2923 // that has a base pointer origin has a base pointer that is loaded and 2924 // that it is invariant, thus it will be hoisted too. However, if there is 2925 // no base pointer origin we check that the base pointer is defined 2926 // outside the region. 2927 auto *LI = cast<LoadInst>(Access->getAccessInstruction()); 2928 if (hasNonHoistableBasePtrInScop(Access, Writes)) 2929 return {}; 2930 2931 isl::map AccessRelation = Access->getAccessRelation(); 2932 assert(!AccessRelation.is_empty()); 2933 2934 if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators())) 2935 return {}; 2936 2937 AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain()); 2938 isl::set SafeToLoad; 2939 2940 auto &DL = scop->getFunction().getDataLayout(); 2941 if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getType(), 2942 LI->getAlign(), DL, nullptr)) { 2943 SafeToLoad = isl::set::universe(AccessRelation.get_space().range()); 2944 } else if (BB != LI->getParent()) { 2945 // Skip accesses in non-affine subregions as they might not be executed 2946 // under the same condition as the entry of the non-affine subregion. 2947 return {}; 2948 } else { 2949 SafeToLoad = AccessRelation.range(); 2950 } 2951 2952 if (isAccessRangeTooComplex(AccessRelation.range())) 2953 return {}; 2954 2955 isl::union_map Written = Writes.intersect_range(SafeToLoad); 2956 isl::set WrittenCtx = Written.params(); 2957 bool IsWritten = !WrittenCtx.is_empty(); 2958 2959 if (!IsWritten) 2960 return WrittenCtx; 2961 2962 WrittenCtx = WrittenCtx.remove_divs(); 2963 bool TooComplex = 2964 unsignedFromIslSize(WrittenCtx.n_basic_set()) >= MaxDisjunctsInDomain; 2965 if (TooComplex || !isRequiredInvariantLoad(LI)) 2966 return {}; 2967 2968 scop->addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(), 2969 AS_RESTRICTION, LI->getParent()); 2970 return WrittenCtx; 2971 } 2972 2973 static bool isAParameter(llvm::Value *maybeParam, const Function &F) { 2974 for (const llvm::Argument &Arg : F.args()) 2975 if (&Arg == maybeParam) 2976 return true; 2977 2978 return false; 2979 } 2980 2981 bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess *MA, 2982 bool StmtInvalidCtxIsEmpty, 2983 bool MAInvalidCtxIsEmpty, 2984 bool NonHoistableCtxIsEmpty) { 2985 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction()); 2986 const DataLayout &DL = LInst->getDataLayout(); 2987 if (PollyAllowDereferenceOfAllFunctionParams && 2988 isAParameter(LInst->getPointerOperand(), scop->getFunction())) 2989 return true; 2990 2991 // TODO: We can provide more information for better but more expensive 2992 // results. 2993 if (!isDereferenceableAndAlignedPointer( 2994 LInst->getPointerOperand(), LInst->getType(), LInst->getAlign(), DL)) 2995 return false; 2996 2997 // If the location might be overwritten we do not hoist it unconditionally. 2998 // 2999 // TODO: This is probably too conservative. 3000 if (!NonHoistableCtxIsEmpty) 3001 return false; 3002 3003 // If a dereferenceable load is in a statement that is modeled precisely we 3004 // can hoist it. 3005 if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty) 3006 return true; 3007 3008 // Even if the statement is not modeled precisely we can hoist the load if it 3009 // does not involve any parameters that might have been specialized by the 3010 // statement domain. 3011 for (const SCEV *Subscript : MA->subscripts()) 3012 if (!isa<SCEVConstant>(Subscript)) 3013 return false; 3014 return true; 3015 } 3016 3017 void ScopBuilder::addInvariantLoads(ScopStmt &Stmt, 3018 InvariantAccessesTy &InvMAs) { 3019 if (InvMAs.empty()) 3020 return; 3021 3022 isl::set StmtInvalidCtx = Stmt.getInvalidContext(); 3023 bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty(); 3024 3025 // Get the context under which the statement is executed but remove the error 3026 // context under which this statement is reached. 3027 isl::set DomainCtx = Stmt.getDomain().params(); 3028 DomainCtx = DomainCtx.subtract(StmtInvalidCtx); 3029 3030 if (unsignedFromIslSize(DomainCtx.n_basic_set()) >= MaxDisjunctsInDomain) { 3031 auto *AccInst = InvMAs.front().MA->getAccessInstruction(); 3032 scop->invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent()); 3033 return; 3034 } 3035 3036 // Project out all parameters that relate to loads in the statement. Otherwise 3037 // we could have cyclic dependences on the constraints under which the 3038 // hoisted loads are executed and we could not determine an order in which to 3039 // pre-load them. This happens because not only lower bounds are part of the 3040 // domain but also upper bounds. 3041 for (auto &InvMA : InvMAs) { 3042 auto *MA = InvMA.MA; 3043 Instruction *AccInst = MA->getAccessInstruction(); 3044 if (SE.isSCEVable(AccInst->getType())) { 3045 SetVector<Value *> Values; 3046 for (const SCEV *Parameter : scop->parameters()) { 3047 Values.clear(); 3048 findValues(Parameter, SE, Values); 3049 if (!Values.count(AccInst)) 3050 continue; 3051 3052 isl::id ParamId = scop->getIdForParam(Parameter); 3053 if (!ParamId.is_null()) { 3054 int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId); 3055 if (Dim >= 0) 3056 DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1); 3057 } 3058 } 3059 } 3060 } 3061 3062 for (auto &InvMA : InvMAs) { 3063 auto *MA = InvMA.MA; 3064 isl::set NHCtx = InvMA.NonHoistableCtx; 3065 3066 // Check for another invariant access that accesses the same location as 3067 // MA and if found consolidate them. Otherwise create a new equivalence 3068 // class at the end of InvariantEquivClasses. 3069 LoadInst *LInst = cast<LoadInst>(MA->getAccessInstruction()); 3070 Type *Ty = LInst->getType(); 3071 const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand()); 3072 3073 isl::set MAInvalidCtx = MA->getInvalidContext(); 3074 bool NonHoistableCtxIsEmpty = NHCtx.is_empty(); 3075 bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty(); 3076 3077 isl::set MACtx; 3078 // Check if we know that this pointer can be speculatively accessed. 3079 if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty, 3080 NonHoistableCtxIsEmpty)) { 3081 MACtx = isl::set::universe(DomainCtx.get_space()); 3082 } else { 3083 MACtx = DomainCtx; 3084 MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx)); 3085 MACtx = MACtx.gist_params(scop->getContext()); 3086 } 3087 3088 bool Consolidated = false; 3089 for (auto &IAClass : scop->invariantEquivClasses()) { 3090 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType) 3091 continue; 3092 3093 // If the pointer and the type is equal check if the access function wrt. 3094 // to the domain is equal too. It can happen that the domain fixes 3095 // parameter values and these can be different for distinct part of the 3096 // SCoP. If this happens we cannot consolidate the loads but need to 3097 // create a new invariant load equivalence class. 3098 auto &MAs = IAClass.InvariantAccesses; 3099 if (!MAs.empty()) { 3100 auto *LastMA = MAs.front(); 3101 3102 isl::set AR = MA->getAccessRelation().range(); 3103 isl::set LastAR = LastMA->getAccessRelation().range(); 3104 bool SameAR = AR.is_equal(LastAR); 3105 3106 if (!SameAR) 3107 continue; 3108 } 3109 3110 // Add MA to the list of accesses that are in this class. 3111 MAs.push_front(MA); 3112 3113 Consolidated = true; 3114 3115 // Unify the execution context of the class and this statement. 3116 isl::set IAClassDomainCtx = IAClass.ExecutionContext; 3117 if (!IAClassDomainCtx.is_null()) 3118 IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce(); 3119 else 3120 IAClassDomainCtx = MACtx; 3121 IAClass.ExecutionContext = IAClassDomainCtx; 3122 break; 3123 } 3124 3125 if (Consolidated) 3126 continue; 3127 3128 MACtx = MACtx.coalesce(); 3129 3130 // If we did not consolidate MA, thus did not find an equivalence class 3131 // for it, we create a new one. 3132 scop->addInvariantEquivClass( 3133 InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty}); 3134 } 3135 } 3136 3137 /// Find the canonical scop array info object for a set of invariant load 3138 /// hoisted loads. The canonical array is the one that corresponds to the 3139 /// first load in the list of accesses which is used as base pointer of a 3140 /// scop array. 3141 static const ScopArrayInfo *findCanonicalArray(Scop &S, 3142 MemoryAccessList &Accesses) { 3143 for (MemoryAccess *Access : Accesses) { 3144 const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull( 3145 Access->getAccessInstruction(), MemoryKind::Array); 3146 if (CanonicalArray) 3147 return CanonicalArray; 3148 } 3149 return nullptr; 3150 } 3151 3152 /// Check if @p Array severs as base array in an invariant load. 3153 static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array) { 3154 for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses()) 3155 for (MemoryAccess *Access2 : EqClass2.InvariantAccesses) 3156 if (Access2->getScopArrayInfo() == Array) 3157 return true; 3158 return false; 3159 } 3160 3161 /// Replace the base pointer arrays in all memory accesses referencing @p Old, 3162 /// with a reference to @p New. 3163 static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old, 3164 const ScopArrayInfo *New) { 3165 for (ScopStmt &Stmt : S) 3166 for (MemoryAccess *Access : Stmt) { 3167 if (Access->getLatestScopArrayInfo() != Old) 3168 continue; 3169 3170 isl::id Id = New->getBasePtrId(); 3171 isl::map Map = Access->getAccessRelation(); 3172 Map = Map.set_tuple_id(isl::dim::out, Id); 3173 Access->setAccessRelation(Map); 3174 } 3175 } 3176 3177 void ScopBuilder::canonicalizeDynamicBasePtrs() { 3178 for (InvariantEquivClassTy &EqClass : scop->InvariantEquivClasses) { 3179 MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses; 3180 3181 const ScopArrayInfo *CanonicalBasePtrSAI = 3182 findCanonicalArray(*scop, BasePtrAccesses); 3183 3184 if (!CanonicalBasePtrSAI) 3185 continue; 3186 3187 for (MemoryAccess *BasePtrAccess : BasePtrAccesses) { 3188 const ScopArrayInfo *BasePtrSAI = scop->getScopArrayInfoOrNull( 3189 BasePtrAccess->getAccessInstruction(), MemoryKind::Array); 3190 if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI || 3191 !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI)) 3192 continue; 3193 3194 // we currently do not canonicalize arrays where some accesses are 3195 // hoisted as invariant loads. If we would, we need to update the access 3196 // function of the invariant loads as well. However, as this is not a 3197 // very common situation, we leave this for now to avoid further 3198 // complexity increases. 3199 if (isUsedForIndirectHoistedLoad(*scop, BasePtrSAI)) 3200 continue; 3201 3202 replaceBasePtrArrays(*scop, BasePtrSAI, CanonicalBasePtrSAI); 3203 } 3204 } 3205 } 3206 3207 void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) { 3208 for (MemoryAccess *Access : Stmt.MemAccs) { 3209 Type *ElementType = Access->getElementType(); 3210 3211 MemoryKind Ty; 3212 if (Access->isPHIKind()) 3213 Ty = MemoryKind::PHI; 3214 else if (Access->isExitPHIKind()) 3215 Ty = MemoryKind::ExitPHI; 3216 else if (Access->isValueKind()) 3217 Ty = MemoryKind::Value; 3218 else 3219 Ty = MemoryKind::Array; 3220 3221 // Create isl::pw_aff for SCEVs which describe sizes. Collect all 3222 // assumptions which are taken. isl::pw_aff objects are cached internally 3223 // and they are used later by scop. 3224 for (const SCEV *Size : Access->Sizes) { 3225 if (!Size) 3226 continue; 3227 scop->getPwAff(Size, nullptr, false, &RecordedAssumptions); 3228 } 3229 auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(), 3230 ElementType, Access->Sizes, Ty); 3231 3232 // Create isl::pw_aff for SCEVs which describe subscripts. Collect all 3233 // assumptions which are taken. isl::pw_aff objects are cached internally 3234 // and they are used later by scop. 3235 for (const SCEV *Subscript : Access->subscripts()) { 3236 if (!Access->isAffine() || !Subscript) 3237 continue; 3238 scop->getPwAff(Subscript, Stmt.getEntryBlock(), false, 3239 &RecordedAssumptions); 3240 } 3241 Access->buildAccessRelation(SAI); 3242 scop->addAccessData(Access); 3243 } 3244 } 3245 3246 /// Add the minimal/maximal access in @p Set to @p User. 3247 /// 3248 /// @return True if more accesses should be added, false if we reached the 3249 /// maximal number of run-time checks to be generated. 3250 static bool buildMinMaxAccess(isl::set Set, 3251 Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) { 3252 isl::pw_multi_aff MinPMA, MaxPMA; 3253 isl::pw_aff LastDimAff; 3254 isl::aff OneAff; 3255 unsigned Pos; 3256 3257 Set = Set.remove_divs(); 3258 polly::simplify(Set); 3259 3260 if (unsignedFromIslSize(Set.n_basic_set()) > RunTimeChecksMaxAccessDisjuncts) 3261 Set = Set.simple_hull(); 3262 3263 // Restrict the number of parameters involved in the access as the lexmin/ 3264 // lexmax computation will take too long if this number is high. 3265 // 3266 // Experiments with a simple test case using an i7 4800MQ: 3267 // 3268 // #Parameters involved | Time (in sec) 3269 // 6 | 0.01 3270 // 7 | 0.04 3271 // 8 | 0.12 3272 // 9 | 0.40 3273 // 10 | 1.54 3274 // 11 | 6.78 3275 // 12 | 30.38 3276 // 3277 if (isl_set_n_param(Set.get()) > 3278 static_cast<isl_size>(RunTimeChecksMaxParameters)) { 3279 unsigned InvolvedParams = 0; 3280 for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++) 3281 if (Set.involves_dims(isl::dim::param, u, 1)) 3282 InvolvedParams++; 3283 3284 if (InvolvedParams > RunTimeChecksMaxParameters) 3285 return false; 3286 } 3287 3288 MinPMA = Set.lexmin_pw_multi_aff(); 3289 MaxPMA = Set.lexmax_pw_multi_aff(); 3290 3291 MinPMA = MinPMA.coalesce(); 3292 MaxPMA = MaxPMA.coalesce(); 3293 3294 if (MaxPMA.is_null()) 3295 return false; 3296 3297 unsigned MaxOutputSize = unsignedFromIslSize(MaxPMA.dim(isl::dim::out)); 3298 3299 // Adjust the last dimension of the maximal access by one as we want to 3300 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer 3301 // we test during code generation might now point after the end of the 3302 // allocated array but we will never dereference it anyway. 3303 assert(MaxOutputSize >= 1 && "Assumed at least one output dimension"); 3304 3305 Pos = MaxOutputSize - 1; 3306 LastDimAff = MaxPMA.at(Pos); 3307 OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space())); 3308 OneAff = OneAff.add_constant_si(1); 3309 LastDimAff = LastDimAff.add(OneAff); 3310 MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff); 3311 3312 if (MinPMA.is_null() || MaxPMA.is_null()) 3313 return false; 3314 3315 MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA)); 3316 3317 return true; 3318 } 3319 3320 /// Wrapper function to calculate minimal/maximal accesses to each array. 3321 bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup, 3322 Scop::MinMaxVectorTy &MinMaxAccesses) { 3323 MinMaxAccesses.reserve(AliasGroup.size()); 3324 3325 isl::union_set Domains = scop->getDomains(); 3326 isl::union_map Accesses = isl::union_map::empty(scop->getIslCtx()); 3327 3328 for (MemoryAccess *MA : AliasGroup) 3329 Accesses = Accesses.unite(MA->getAccessRelation()); 3330 3331 Accesses = Accesses.intersect_domain(Domains); 3332 isl::union_set Locations = Accesses.range(); 3333 3334 bool LimitReached = false; 3335 for (isl::set Set : Locations.get_set_list()) { 3336 LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, *scop); 3337 if (LimitReached) 3338 break; 3339 } 3340 3341 return !LimitReached; 3342 } 3343 3344 static isl::set getAccessDomain(MemoryAccess *MA) { 3345 isl::set Domain = MA->getStatement()->getDomain(); 3346 Domain = Domain.project_out(isl::dim::set, 0, 3347 unsignedFromIslSize(Domain.tuple_dim())); 3348 return Domain.reset_tuple_id(); 3349 } 3350 3351 bool ScopBuilder::buildAliasChecks() { 3352 if (!PollyUseRuntimeAliasChecks) 3353 return true; 3354 3355 if (buildAliasGroups()) { 3356 // Aliasing assumptions do not go through addAssumption but we still want to 3357 // collect statistics so we do it here explicitly. 3358 if (scop->getAliasGroups().size()) 3359 Scop::incrementNumberOfAliasingAssumptions(1); 3360 return true; 3361 } 3362 3363 // If a problem occurs while building the alias groups we need to delete 3364 // this SCoP and pretend it wasn't valid in the first place. To this end 3365 // we make the assumed context infeasible. 3366 scop->invalidate(ALIASING, DebugLoc()); 3367 3368 POLLY_DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr() 3369 << " could not be created. This SCoP has been dismissed."); 3370 return false; 3371 } 3372 3373 std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>> 3374 ScopBuilder::buildAliasGroupsForAccesses() { 3375 BatchAAResults BAA(AA); 3376 AliasSetTracker AST(BAA); 3377 3378 DenseMap<Value *, MemoryAccess *> PtrToAcc; 3379 DenseSet<const ScopArrayInfo *> HasWriteAccess; 3380 for (ScopStmt &Stmt : *scop) { 3381 3382 isl::set StmtDomain = Stmt.getDomain(); 3383 bool StmtDomainEmpty = StmtDomain.is_empty(); 3384 3385 // Statements with an empty domain will never be executed. 3386 if (StmtDomainEmpty) 3387 continue; 3388 3389 for (MemoryAccess *MA : Stmt) { 3390 if (MA->isScalarKind()) 3391 continue; 3392 if (!MA->isRead()) 3393 HasWriteAccess.insert(MA->getScopArrayInfo()); 3394 MemAccInst Acc(MA->getAccessInstruction()); 3395 if (MA->isRead() && isa<MemTransferInst>(Acc)) 3396 PtrToAcc[cast<MemTransferInst>(Acc)->getRawSource()] = MA; 3397 else 3398 PtrToAcc[Acc.getPointerOperand()] = MA; 3399 AST.add(Acc); 3400 } 3401 } 3402 3403 AliasGroupVectorTy AliasGroups; 3404 for (AliasSet &AS : AST) { 3405 if (AS.isMustAlias() || AS.isForwardingAliasSet()) 3406 continue; 3407 AliasGroupTy AG; 3408 for (const Value *Ptr : AS.getPointers()) 3409 AG.push_back(PtrToAcc[const_cast<Value *>(Ptr)]); 3410 if (AG.size() < 2) 3411 continue; 3412 AliasGroups.push_back(std::move(AG)); 3413 } 3414 3415 return std::make_tuple(AliasGroups, HasWriteAccess); 3416 } 3417 3418 bool ScopBuilder::buildAliasGroups() { 3419 // To create sound alias checks we perform the following steps: 3420 // o) We partition each group into read only and non read only accesses. 3421 // o) For each group with more than one base pointer we then compute minimal 3422 // and maximal accesses to each array of a group in read only and non 3423 // read only partitions separately. 3424 AliasGroupVectorTy AliasGroups; 3425 DenseSet<const ScopArrayInfo *> HasWriteAccess; 3426 3427 std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses(); 3428 3429 splitAliasGroupsByDomain(AliasGroups); 3430 3431 for (AliasGroupTy &AG : AliasGroups) { 3432 if (!scop->hasFeasibleRuntimeContext()) 3433 return false; 3434 3435 { 3436 IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut); 3437 bool Valid = buildAliasGroup(AG, HasWriteAccess); 3438 if (!Valid) 3439 return false; 3440 } 3441 if (isl_ctx_last_error(scop->getIslCtx().get()) == isl_error_quota) { 3442 scop->invalidate(COMPLEXITY, DebugLoc()); 3443 return false; 3444 } 3445 } 3446 3447 return true; 3448 } 3449 3450 bool ScopBuilder::buildAliasGroup( 3451 AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) { 3452 AliasGroupTy ReadOnlyAccesses; 3453 AliasGroupTy ReadWriteAccesses; 3454 SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays; 3455 SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays; 3456 3457 if (AliasGroup.size() < 2) 3458 return true; 3459 3460 for (MemoryAccess *Access : AliasGroup) { 3461 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias", 3462 Access->getAccessInstruction()) 3463 << "Possibly aliasing pointer, use restrict keyword."); 3464 const ScopArrayInfo *Array = Access->getScopArrayInfo(); 3465 if (HasWriteAccess.count(Array)) { 3466 ReadWriteArrays.insert(Array); 3467 ReadWriteAccesses.push_back(Access); 3468 } else { 3469 ReadOnlyArrays.insert(Array); 3470 ReadOnlyAccesses.push_back(Access); 3471 } 3472 } 3473 3474 // If there are no read-only pointers, and less than two read-write pointers, 3475 // no alias check is needed. 3476 if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1) 3477 return true; 3478 3479 // If there is no read-write pointer, no alias check is needed. 3480 if (ReadWriteArrays.empty()) 3481 return true; 3482 3483 // For non-affine accesses, no alias check can be generated as we cannot 3484 // compute a sufficiently tight lower and upper bound: bail out. 3485 for (MemoryAccess *MA : AliasGroup) { 3486 if (!MA->isAffine()) { 3487 scop->invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(), 3488 MA->getAccessInstruction()->getParent()); 3489 return false; 3490 } 3491 } 3492 3493 // Ensure that for all memory accesses for which we generate alias checks, 3494 // their base pointers are available. 3495 for (MemoryAccess *MA : AliasGroup) { 3496 if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA)) 3497 scop->addRequiredInvariantLoad( 3498 cast<LoadInst>(BasePtrMA->getAccessInstruction())); 3499 } 3500 3501 // scop->getAliasGroups().emplace_back(); 3502 // Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back(); 3503 Scop::MinMaxVectorTy MinMaxAccessesReadWrite; 3504 Scop::MinMaxVectorTy MinMaxAccessesReadOnly; 3505 3506 bool Valid; 3507 3508 Valid = calculateMinMaxAccess(ReadWriteAccesses, MinMaxAccessesReadWrite); 3509 3510 if (!Valid) 3511 return false; 3512 3513 // Bail out if the number of values we need to compare is too large. 3514 // This is important as the number of comparisons grows quadratically with 3515 // the number of values we need to compare. 3516 if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() > 3517 RunTimeChecksMaxArraysPerGroup) 3518 return false; 3519 3520 Valid = calculateMinMaxAccess(ReadOnlyAccesses, MinMaxAccessesReadOnly); 3521 3522 scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly); 3523 if (!Valid) 3524 return false; 3525 3526 return true; 3527 } 3528 3529 void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) { 3530 for (unsigned u = 0; u < AliasGroups.size(); u++) { 3531 AliasGroupTy NewAG; 3532 AliasGroupTy &AG = AliasGroups[u]; 3533 AliasGroupTy::iterator AGI = AG.begin(); 3534 isl::set AGDomain = getAccessDomain(*AGI); 3535 while (AGI != AG.end()) { 3536 MemoryAccess *MA = *AGI; 3537 isl::set MADomain = getAccessDomain(MA); 3538 if (AGDomain.is_disjoint(MADomain)) { 3539 NewAG.push_back(MA); 3540 AGI = AG.erase(AGI); 3541 } else { 3542 AGDomain = AGDomain.unite(MADomain); 3543 AGI++; 3544 } 3545 } 3546 if (NewAG.size() > 1) 3547 AliasGroups.push_back(std::move(NewAG)); 3548 } 3549 } 3550 3551 #ifndef NDEBUG 3552 static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) { 3553 auto PhysUse = VirtualUse::create(S, Op, &LI, false); 3554 auto VirtUse = VirtualUse::create(S, Op, &LI, true); 3555 assert(PhysUse.getKind() == VirtUse.getKind()); 3556 } 3557 3558 /// Check the consistency of every statement's MemoryAccesses. 3559 /// 3560 /// The check is carried out by expecting the "physical" kind of use (derived 3561 /// from the BasicBlocks instructions resides in) to be same as the "virtual" 3562 /// kind of use (derived from a statement's MemoryAccess). 3563 /// 3564 /// The "physical" uses are taken by ensureValueRead to determine whether to 3565 /// create MemoryAccesses. When done, the kind of scalar access should be the 3566 /// same no matter which way it was derived. 3567 /// 3568 /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence 3569 /// can intentionally influence on the kind of uses (not corresponding to the 3570 /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has 3571 /// to pick up the virtual uses. But here in the code generator, this has not 3572 /// happened yet, such that virtual and physical uses are equivalent. 3573 static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) { 3574 for (auto *BB : S->getRegion().blocks()) { 3575 for (auto &Inst : *BB) { 3576 auto *Stmt = S->getStmtFor(&Inst); 3577 if (!Stmt) 3578 continue; 3579 3580 if (isIgnoredIntrinsic(&Inst)) 3581 continue; 3582 3583 // Branch conditions are encoded in the statement domains. 3584 if (Inst.isTerminator() && Stmt->isBlockStmt()) 3585 continue; 3586 3587 // Verify all uses. 3588 for (auto &Op : Inst.operands()) 3589 verifyUse(S, Op, LI); 3590 3591 // Stores do not produce values used by other statements. 3592 if (isa<StoreInst>(Inst)) 3593 continue; 3594 3595 // For every value defined in the block, also check that a use of that 3596 // value in the same statement would not be an inter-statement use. It can 3597 // still be synthesizable or load-hoisted, but these kind of instructions 3598 // are not directly copied in code-generation. 3599 auto VirtDef = 3600 VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true); 3601 assert(VirtDef.getKind() == VirtualUse::Synthesizable || 3602 VirtDef.getKind() == VirtualUse::Intra || 3603 VirtDef.getKind() == VirtualUse::Hoisted); 3604 } 3605 } 3606 3607 if (S->hasSingleExitEdge()) 3608 return; 3609 3610 // PHINodes in the SCoP region's exit block are also uses to be checked. 3611 if (!S->getRegion().isTopLevelRegion()) { 3612 for (auto &Inst : *S->getRegion().getExit()) { 3613 if (!isa<PHINode>(Inst)) 3614 break; 3615 3616 for (auto &Op : Inst.operands()) 3617 verifyUse(S, Op, LI); 3618 } 3619 } 3620 } 3621 #endif 3622 3623 void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) { 3624 scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE, 3625 SD.getNextID())); 3626 3627 buildStmts(R); 3628 3629 // Create all invariant load instructions first. These are categorized as 3630 // 'synthesizable', therefore are not part of any ScopStmt but need to be 3631 // created somewhere. 3632 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads(); 3633 for (BasicBlock *BB : scop->getRegion().blocks()) { 3634 if (SD.isErrorBlock(*BB, scop->getRegion())) 3635 continue; 3636 3637 for (Instruction &Inst : *BB) { 3638 LoadInst *Load = dyn_cast<LoadInst>(&Inst); 3639 if (!Load) 3640 continue; 3641 3642 if (!RIL.count(Load)) 3643 continue; 3644 3645 // Invariant loads require a MemoryAccess to be created in some statement. 3646 // It is not important to which statement the MemoryAccess is added 3647 // because it will later be removed from the ScopStmt again. We chose the 3648 // first statement of the basic block the LoadInst is in. 3649 ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB); 3650 assert(!List.empty()); 3651 ScopStmt *RILStmt = List.front(); 3652 buildMemoryAccess(Load, RILStmt); 3653 } 3654 } 3655 buildAccessFunctions(); 3656 3657 // In case the region does not have an exiting block we will later (during 3658 // code generation) split the exit block. This will move potential PHI nodes 3659 // from the current exit block into the new region exiting block. Hence, PHI 3660 // nodes that are at this point not part of the region will be. 3661 // To handle these PHI nodes later we will now model their operands as scalar 3662 // accesses. Note that we do not model anything in the exit block if we have 3663 // an exiting block in the region, as there will not be any splitting later. 3664 if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) { 3665 for (Instruction &Inst : *R.getExit()) { 3666 PHINode *PHI = dyn_cast<PHINode>(&Inst); 3667 if (!PHI) 3668 break; 3669 3670 buildPHIAccesses(nullptr, PHI, nullptr, true); 3671 } 3672 } 3673 3674 // Create memory accesses for global reads since all arrays are now known. 3675 const SCEV *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0); 3676 for (auto GlobalReadPair : GlobalReads) { 3677 ScopStmt *GlobalReadStmt = GlobalReadPair.first; 3678 Instruction *GlobalRead = GlobalReadPair.second; 3679 for (auto *BP : ArrayBasePointers) 3680 addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ, 3681 BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead); 3682 } 3683 3684 buildInvariantEquivalenceClasses(); 3685 3686 /// A map from basic blocks to their invalid domains. 3687 DenseMap<BasicBlock *, isl::set> InvalidDomainMap; 3688 3689 if (!buildDomains(&R, InvalidDomainMap)) { 3690 POLLY_DEBUG( 3691 dbgs() << "Bailing-out because buildDomains encountered problems\n"); 3692 return; 3693 } 3694 3695 addUserAssumptions(AC, InvalidDomainMap); 3696 3697 // Initialize the invalid domain. 3698 for (ScopStmt &Stmt : scop->Stmts) 3699 if (Stmt.isBlockStmt()) 3700 Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]); 3701 else 3702 Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock( 3703 Stmt.getRegion()->getNode())]); 3704 3705 // Remove empty statements. 3706 // Exit early in case there are no executable statements left in this scop. 3707 scop->removeStmtNotInDomainMap(); 3708 scop->simplifySCoP(false); 3709 if (scop->isEmpty()) { 3710 POLLY_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n"); 3711 return; 3712 } 3713 3714 // The ScopStmts now have enough information to initialize themselves. 3715 for (ScopStmt &Stmt : *scop) { 3716 collectSurroundingLoops(Stmt); 3717 3718 buildDomain(Stmt); 3719 buildAccessRelations(Stmt); 3720 3721 if (DetectReductions) 3722 checkForReductions(Stmt); 3723 } 3724 3725 // Check early for a feasible runtime context. 3726 if (!scop->hasFeasibleRuntimeContext()) { 3727 POLLY_DEBUG( 3728 dbgs() << "Bailing-out because of unfeasible context (early)\n"); 3729 return; 3730 } 3731 3732 // Check early for profitability. Afterwards it cannot change anymore, 3733 // only the runtime context could become infeasible. 3734 if (!scop->isProfitable(UnprofitableScalarAccs)) { 3735 scop->invalidate(PROFITABLE, DebugLoc()); 3736 POLLY_DEBUG( 3737 dbgs() << "Bailing-out because SCoP is not considered profitable\n"); 3738 return; 3739 } 3740 3741 buildSchedule(); 3742 3743 finalizeAccesses(); 3744 3745 scop->realignParams(); 3746 addUserContext(); 3747 3748 // After the context was fully constructed, thus all our knowledge about 3749 // the parameters is in there, we add all recorded assumptions to the 3750 // assumed/invalid context. 3751 addRecordedAssumptions(); 3752 3753 scop->simplifyContexts(); 3754 if (!buildAliasChecks()) { 3755 POLLY_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n"); 3756 return; 3757 } 3758 3759 hoistInvariantLoads(); 3760 canonicalizeDynamicBasePtrs(); 3761 verifyInvariantLoads(); 3762 scop->simplifySCoP(true); 3763 3764 // Check late for a feasible runtime context because profitability did not 3765 // change. 3766 if (!scop->hasFeasibleRuntimeContext()) { 3767 POLLY_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n"); 3768 return; 3769 } 3770 3771 #ifndef NDEBUG 3772 verifyUses(scop.get(), LI, DT); 3773 #endif 3774 } 3775 3776 ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AAResults &AA, 3777 const DataLayout &DL, DominatorTree &DT, LoopInfo &LI, 3778 ScopDetection &SD, ScalarEvolution &SE, 3779 OptimizationRemarkEmitter &ORE) 3780 : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) { 3781 DebugLoc Beg, End; 3782 auto P = getBBPairForRegion(R); 3783 getDebugLocations(P, Beg, End); 3784 3785 std::string Msg = "SCoP begins here."; 3786 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first) 3787 << Msg); 3788 3789 buildScop(*R, AC); 3790 3791 POLLY_DEBUG(dbgs() << *scop); 3792 3793 if (!scop->hasFeasibleRuntimeContext()) { 3794 InfeasibleScops++; 3795 Msg = "SCoP ends here but was dismissed."; 3796 POLLY_DEBUG(dbgs() << "SCoP detected but dismissed\n"); 3797 RecordedAssumptions.clear(); 3798 scop.reset(); 3799 } else { 3800 Msg = "SCoP ends here."; 3801 ++ScopFound; 3802 if (scop->getMaxLoopDepth() > 0) 3803 ++RichScopFound; 3804 } 3805 3806 if (R->isTopLevelRegion()) 3807 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first) 3808 << Msg); 3809 else 3810 ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second) 3811 << Msg); 3812 } 3813