1 //===------ DeLICM.cpp -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Undo the effect of Loop Invariant Code Motion (LICM) and 10 // GVN Partial Redundancy Elimination (PRE) on SCoP-level. 11 // 12 // Namely, remove register/scalar dependencies by mapping them back to array 13 // elements. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "polly/DeLICM.h" 18 #include "polly/LinkAllPasses.h" 19 #include "polly/Options.h" 20 #include "polly/ScopInfo.h" 21 #include "polly/ScopPass.h" 22 #include "polly/Support/GICHelper.h" 23 #include "polly/Support/ISLOStream.h" 24 #include "polly/Support/ISLTools.h" 25 #include "polly/ZoneAlgo.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/IR/Module.h" 28 #include "llvm/InitializePasses.h" 29 30 #include "polly/Support/PollyDebug.h" 31 #define DEBUG_TYPE "polly-delicm" 32 33 using namespace polly; 34 using namespace llvm; 35 36 namespace { 37 38 cl::opt<int> 39 DelicmMaxOps("polly-delicm-max-ops", 40 cl::desc("Maximum number of isl operations to invest for " 41 "lifetime analysis; 0=no limit"), 42 cl::init(1000000), cl::cat(PollyCategory)); 43 44 cl::opt<bool> DelicmOverapproximateWrites( 45 "polly-delicm-overapproximate-writes", 46 cl::desc( 47 "Do more PHI writes than necessary in order to avoid partial accesses"), 48 cl::init(false), cl::Hidden, cl::cat(PollyCategory)); 49 50 cl::opt<bool> DelicmPartialWrites("polly-delicm-partial-writes", 51 cl::desc("Allow partial writes"), 52 cl::init(true), cl::Hidden, 53 cl::cat(PollyCategory)); 54 55 cl::opt<bool> 56 DelicmComputeKnown("polly-delicm-compute-known", 57 cl::desc("Compute known content of array elements"), 58 cl::init(true), cl::Hidden, cl::cat(PollyCategory)); 59 60 STATISTIC(DeLICMAnalyzed, "Number of successfully analyzed SCoPs"); 61 STATISTIC(DeLICMOutOfQuota, 62 "Analyses aborted because max_operations was reached"); 63 STATISTIC(MappedValueScalars, "Number of mapped Value scalars"); 64 STATISTIC(MappedPHIScalars, "Number of mapped PHI scalars"); 65 STATISTIC(TargetsMapped, "Number of stores used for at least one mapping"); 66 STATISTIC(DeLICMScopsModified, "Number of SCoPs optimized"); 67 68 STATISTIC(NumValueWrites, "Number of scalar value writes after DeLICM"); 69 STATISTIC(NumValueWritesInLoops, 70 "Number of scalar value writes nested in affine loops after DeLICM"); 71 STATISTIC(NumPHIWrites, "Number of scalar phi writes after DeLICM"); 72 STATISTIC(NumPHIWritesInLoops, 73 "Number of scalar phi writes nested in affine loops after DeLICM"); 74 STATISTIC(NumSingletonWrites, "Number of singleton writes after DeLICM"); 75 STATISTIC(NumSingletonWritesInLoops, 76 "Number of singleton writes nested in affine loops after DeLICM"); 77 78 isl::union_map computeReachingOverwrite(isl::union_map Schedule, 79 isl::union_map Writes, 80 bool InclPrevWrite, 81 bool InclOverwrite) { 82 return computeReachingWrite(Schedule, Writes, true, InclPrevWrite, 83 InclOverwrite); 84 } 85 86 /// Compute the next overwrite for a scalar. 87 /// 88 /// @param Schedule { DomainWrite[] -> Scatter[] } 89 /// Schedule of (at least) all writes. Instances not in @p 90 /// Writes are ignored. 91 /// @param Writes { DomainWrite[] } 92 /// The element instances that write to the scalar. 93 /// @param InclPrevWrite Whether to extend the timepoints to include 94 /// the timepoint where the previous write happens. 95 /// @param InclOverwrite Whether the reaching overwrite includes the timepoint 96 /// of the overwrite itself. 97 /// 98 /// @return { Scatter[] -> DomainDef[] } 99 isl::union_map computeScalarReachingOverwrite(isl::union_map Schedule, 100 isl::union_set Writes, 101 bool InclPrevWrite, 102 bool InclOverwrite) { 103 104 // { DomainWrite[] } 105 auto WritesMap = isl::union_map::from_domain(Writes); 106 107 // { [Element[] -> Scatter[]] -> DomainWrite[] } 108 auto Result = computeReachingOverwrite( 109 std::move(Schedule), std::move(WritesMap), InclPrevWrite, InclOverwrite); 110 111 return Result.domain_factor_range(); 112 } 113 114 /// Overload of computeScalarReachingOverwrite, with only one writing statement. 115 /// Consequently, the result consists of only one map space. 116 /// 117 /// @param Schedule { DomainWrite[] -> Scatter[] } 118 /// @param Writes { DomainWrite[] } 119 /// @param InclPrevWrite Include the previous write to result. 120 /// @param InclOverwrite Include the overwrite to the result. 121 /// 122 /// @return { Scatter[] -> DomainWrite[] } 123 isl::map computeScalarReachingOverwrite(isl::union_map Schedule, 124 isl::set Writes, bool InclPrevWrite, 125 bool InclOverwrite) { 126 isl::space ScatterSpace = getScatterSpace(Schedule); 127 isl::space DomSpace = Writes.get_space(); 128 129 isl::union_map ReachOverwrite = computeScalarReachingOverwrite( 130 Schedule, isl::union_set(Writes), InclPrevWrite, InclOverwrite); 131 132 isl::space ResultSpace = ScatterSpace.map_from_domain_and_range(DomSpace); 133 return singleton(std::move(ReachOverwrite), ResultSpace); 134 } 135 136 /// Try to find a 'natural' extension of a mapped to elements outside its 137 /// domain. 138 /// 139 /// @param Relevant The map with mapping that may not be modified. 140 /// @param Universe The domain to which @p Relevant needs to be extended. 141 /// 142 /// @return A map with that associates the domain elements of @p Relevant to the 143 /// same elements and in addition the elements of @p Universe to some 144 /// undefined elements. The function prefers to return simple maps. 145 isl::union_map expandMapping(isl::union_map Relevant, isl::union_set Universe) { 146 Relevant = Relevant.coalesce(); 147 isl::union_set RelevantDomain = Relevant.domain(); 148 isl::union_map Simplified = Relevant.gist_domain(RelevantDomain); 149 Simplified = Simplified.coalesce(); 150 return Simplified.intersect_domain(Universe); 151 } 152 153 /// Represent the knowledge of the contents of any array elements in any zone or 154 /// the knowledge we would add when mapping a scalar to an array element. 155 /// 156 /// Every array element at every zone unit has one of two states: 157 /// 158 /// - Unused: Not occupied by any value so a transformation can change it to 159 /// other values. 160 /// 161 /// - Occupied: The element contains a value that is still needed. 162 /// 163 /// The union of Unused and Unknown zones forms the universe, the set of all 164 /// elements at every timepoint. The universe can easily be derived from the 165 /// array elements that are accessed someway. Arrays that are never accessed 166 /// also never play a role in any computation and can hence be ignored. With a 167 /// given universe, only one of the sets needs to stored implicitly. Computing 168 /// the complement is also an expensive operation, hence this class has been 169 /// designed that only one of sets is needed while the other is assumed to be 170 /// implicit. It can still be given, but is mostly ignored. 171 /// 172 /// There are two use cases for the Knowledge class: 173 /// 174 /// 1) To represent the knowledge of the current state of ScopInfo. The unused 175 /// state means that an element is currently unused: there is no read of it 176 /// before the next overwrite. Also called 'Existing'. 177 /// 178 /// 2) To represent the requirements for mapping a scalar to array elements. The 179 /// unused state means that there is no change/requirement. Also called 180 /// 'Proposed'. 181 /// 182 /// In addition to these states at unit zones, Knowledge needs to know when 183 /// values are written. This is because written values may have no lifetime (one 184 /// reason is that the value is never read). Such writes would therefore never 185 /// conflict, but overwrite values that might still be required. Another source 186 /// of problems are multiple writes to the same element at the same timepoint, 187 /// because their order is undefined. 188 class Knowledge final { 189 private: 190 /// { [Element[] -> Zone[]] } 191 /// Set of array elements and when they are alive. 192 /// Can contain a nullptr; in this case the set is implicitly defined as the 193 /// complement of #Unused. 194 /// 195 /// The set of alive array elements is represented as zone, as the set of live 196 /// values can differ depending on how the elements are interpreted. 197 /// Assuming a value X is written at timestep [0] and read at timestep [1] 198 /// without being used at any later point, then the value is alive in the 199 /// interval ]0,1[. This interval cannot be represented by an integer set, as 200 /// it does not contain any integer point. Zones allow us to represent this 201 /// interval and can be converted to sets of timepoints when needed (e.g., in 202 /// isConflicting when comparing to the write sets). 203 /// @see convertZoneToTimepoints and this file's comment for more details. 204 isl::union_set Occupied; 205 206 /// { [Element[] -> Zone[]] } 207 /// Set of array elements when they are not alive, i.e. their memory can be 208 /// used for other purposed. Can contain a nullptr; in this case the set is 209 /// implicitly defined as the complement of #Occupied. 210 isl::union_set Unused; 211 212 /// { [Element[] -> Zone[]] -> ValInst[] } 213 /// Maps to the known content for each array element at any interval. 214 /// 215 /// Any element/interval can map to multiple known elements. This is due to 216 /// multiple llvm::Value referring to the same content. Examples are 217 /// 218 /// - A value stored and loaded again. The LoadInst represents the same value 219 /// as the StoreInst's value operand. 220 /// 221 /// - A PHINode is equal to any one of the incoming values. In case of 222 /// LCSSA-form, it is always equal to its single incoming value. 223 /// 224 /// Two Knowledges are considered not conflicting if at least one of the known 225 /// values match. Not known values are not stored as an unnamed tuple (as 226 /// #Written does), but maps to nothing. 227 /// 228 /// Known values are usually just defined for #Occupied elements. Knowing 229 /// #Unused contents has no advantage as it can be overwritten. 230 isl::union_map Known; 231 232 /// { [Element[] -> Scatter[]] -> ValInst[] } 233 /// The write actions currently in the scop or that would be added when 234 /// mapping a scalar. Maps to the value that is written. 235 /// 236 /// Written values that cannot be identified are represented by an unknown 237 /// ValInst[] (an unnamed tuple of 0 dimension). It conflicts with itself. 238 isl::union_map Written; 239 240 /// Check whether this Knowledge object is well-formed. 241 void checkConsistency() const { 242 #ifndef NDEBUG 243 // Default-initialized object 244 if (Occupied.is_null() && Unused.is_null() && Known.is_null() && 245 Written.is_null()) 246 return; 247 248 assert(!Occupied.is_null() || !Unused.is_null()); 249 assert(!Known.is_null()); 250 assert(!Written.is_null()); 251 252 // If not all fields are defined, we cannot derived the universe. 253 if (Occupied.is_null() || Unused.is_null()) 254 return; 255 256 assert(Occupied.is_disjoint(Unused)); 257 auto Universe = Occupied.unite(Unused); 258 259 assert(!Known.domain().is_subset(Universe).is_false()); 260 assert(!Written.domain().is_subset(Universe).is_false()); 261 #endif 262 } 263 264 public: 265 /// Initialize a nullptr-Knowledge. This is only provided for convenience; do 266 /// not use such an object. 267 Knowledge() {} 268 269 /// Create a new object with the given members. 270 Knowledge(isl::union_set Occupied, isl::union_set Unused, 271 isl::union_map Known, isl::union_map Written) 272 : Occupied(std::move(Occupied)), Unused(std::move(Unused)), 273 Known(std::move(Known)), Written(std::move(Written)) { 274 checkConsistency(); 275 } 276 277 /// Return whether this object was not default-constructed. 278 bool isUsable() const { 279 return (Occupied.is_null() || Unused.is_null()) && !Known.is_null() && 280 !Written.is_null(); 281 } 282 283 /// Print the content of this object to @p OS. 284 void print(llvm::raw_ostream &OS, unsigned Indent = 0) const { 285 if (isUsable()) { 286 if (!Occupied.is_null()) 287 OS.indent(Indent) << "Occupied: " << Occupied << "\n"; 288 else 289 OS.indent(Indent) << "Occupied: <Everything else not in Unused>\n"; 290 if (!Unused.is_null()) 291 OS.indent(Indent) << "Unused: " << Unused << "\n"; 292 else 293 OS.indent(Indent) << "Unused: <Everything else not in Occupied>\n"; 294 OS.indent(Indent) << "Known: " << Known << "\n"; 295 OS.indent(Indent) << "Written : " << Written << '\n'; 296 } else { 297 OS.indent(Indent) << "Invalid knowledge\n"; 298 } 299 } 300 301 /// Combine two knowledges, this and @p That. 302 void learnFrom(Knowledge That) { 303 assert(!isConflicting(*this, That)); 304 assert(!Unused.is_null() && !That.Occupied.is_null()); 305 assert( 306 That.Unused.is_null() && 307 "This function is only prepared to learn occupied elements from That"); 308 assert(Occupied.is_null() && "This function does not implement " 309 "`this->Occupied = " 310 "this->Occupied.unite(That.Occupied);`"); 311 312 Unused = Unused.subtract(That.Occupied); 313 Known = Known.unite(That.Known); 314 Written = Written.unite(That.Written); 315 316 checkConsistency(); 317 } 318 319 /// Determine whether two Knowledges conflict with each other. 320 /// 321 /// In theory @p Existing and @p Proposed are symmetric, but the 322 /// implementation is constrained by the implicit interpretation. That is, @p 323 /// Existing must have #Unused defined (use case 1) and @p Proposed must have 324 /// #Occupied defined (use case 1). 325 /// 326 /// A conflict is defined as non-preserved semantics when they are merged. For 327 /// instance, when for the same array and zone they assume different 328 /// llvm::Values. 329 /// 330 /// @param Existing One of the knowledges with #Unused defined. 331 /// @param Proposed One of the knowledges with #Occupied defined. 332 /// @param OS Dump the conflict reason to this output stream; use 333 /// nullptr to not output anything. 334 /// @param Indent Indention for the conflict reason. 335 /// 336 /// @return True, iff the two knowledges are conflicting. 337 static bool isConflicting(const Knowledge &Existing, 338 const Knowledge &Proposed, 339 llvm::raw_ostream *OS = nullptr, 340 unsigned Indent = 0) { 341 assert(!Existing.Unused.is_null()); 342 assert(!Proposed.Occupied.is_null()); 343 344 #ifndef NDEBUG 345 if (!Existing.Occupied.is_null() && !Proposed.Unused.is_null()) { 346 auto ExistingUniverse = Existing.Occupied.unite(Existing.Unused); 347 auto ProposedUniverse = Proposed.Occupied.unite(Proposed.Unused); 348 assert(ExistingUniverse.is_equal(ProposedUniverse) && 349 "Both inputs' Knowledges must be over the same universe"); 350 } 351 #endif 352 353 // Do the Existing and Proposed lifetimes conflict? 354 // 355 // Lifetimes are described as the cross-product of array elements and zone 356 // intervals in which they are alive (the space { [Element[] -> Zone[]] }). 357 // In the following we call this "element/lifetime interval". 358 // 359 // In order to not conflict, one of the following conditions must apply for 360 // each element/lifetime interval: 361 // 362 // 1. If occupied in one of the knowledges, it is unused in the other. 363 // 364 // - or - 365 // 366 // 2. Both contain the same value. 367 // 368 // Instead of partitioning the element/lifetime intervals into a part that 369 // both Knowledges occupy (which requires an expensive subtraction) and for 370 // these to check whether they are known to be the same value, we check only 371 // the second condition and ensure that it also applies when then first 372 // condition is true. This is done by adding a wildcard value to 373 // Proposed.Known and Existing.Unused such that they match as a common known 374 // value. We use the "unknown ValInst" for this purpose. Every 375 // Existing.Unused may match with an unknown Proposed.Occupied because these 376 // never are in conflict with each other. 377 auto ProposedOccupiedAnyVal = makeUnknownForDomain(Proposed.Occupied); 378 auto ProposedValues = Proposed.Known.unite(ProposedOccupiedAnyVal); 379 380 auto ExistingUnusedAnyVal = makeUnknownForDomain(Existing.Unused); 381 auto ExistingValues = Existing.Known.unite(ExistingUnusedAnyVal); 382 383 auto MatchingVals = ExistingValues.intersect(ProposedValues); 384 auto Matches = MatchingVals.domain(); 385 386 // Any Proposed.Occupied must either have a match between the known values 387 // of Existing and Occupied, or be in Existing.Unused. In the latter case, 388 // the previously added "AnyVal" will match each other. 389 if (!Proposed.Occupied.is_subset(Matches)) { 390 if (OS) { 391 auto Conflicting = Proposed.Occupied.subtract(Matches); 392 auto ExistingConflictingKnown = 393 Existing.Known.intersect_domain(Conflicting); 394 auto ProposedConflictingKnown = 395 Proposed.Known.intersect_domain(Conflicting); 396 397 OS->indent(Indent) << "Proposed lifetime conflicting with Existing's\n"; 398 OS->indent(Indent) << "Conflicting occupied: " << Conflicting << "\n"; 399 if (!ExistingConflictingKnown.is_empty()) 400 OS->indent(Indent) 401 << "Existing Known: " << ExistingConflictingKnown << "\n"; 402 if (!ProposedConflictingKnown.is_empty()) 403 OS->indent(Indent) 404 << "Proposed Known: " << ProposedConflictingKnown << "\n"; 405 } 406 return true; 407 } 408 409 // Do the writes in Existing conflict with occupied values in Proposed? 410 // 411 // In order to not conflict, it must either write to unused lifetime or 412 // write the same value. To check, we remove the writes that write into 413 // Proposed.Unused (they never conflict) and then see whether the written 414 // value is already in Proposed.Known. If there are multiple known values 415 // and a written value is known under different names, it is enough when one 416 // of the written values (assuming that they are the same value under 417 // different names, e.g. a PHINode and one of the incoming values) matches 418 // one of the known names. 419 // 420 // We convert here the set of lifetimes to actual timepoints. A lifetime is 421 // in conflict with a set of write timepoints, if either a live timepoint is 422 // clearly within the lifetime or if a write happens at the beginning of the 423 // lifetime (where it would conflict with the value that actually writes the 424 // value alive). There is no conflict at the end of a lifetime, as the alive 425 // value will always be read, before it is overwritten again. The last 426 // property holds in Polly for all scalar values and we expect all users of 427 // Knowledge to check this property also for accesses to MemoryKind::Array. 428 auto ProposedFixedDefs = 429 convertZoneToTimepoints(Proposed.Occupied, true, false); 430 auto ProposedFixedKnown = 431 convertZoneToTimepoints(Proposed.Known, isl::dim::in, true, false); 432 433 auto ExistingConflictingWrites = 434 Existing.Written.intersect_domain(ProposedFixedDefs); 435 auto ExistingConflictingWritesDomain = ExistingConflictingWrites.domain(); 436 437 auto CommonWrittenVal = 438 ProposedFixedKnown.intersect(ExistingConflictingWrites); 439 auto CommonWrittenValDomain = CommonWrittenVal.domain(); 440 441 if (!ExistingConflictingWritesDomain.is_subset(CommonWrittenValDomain)) { 442 if (OS) { 443 auto ExistingConflictingWritten = 444 ExistingConflictingWrites.subtract_domain(CommonWrittenValDomain); 445 auto ProposedConflictingKnown = ProposedFixedKnown.subtract_domain( 446 ExistingConflictingWritten.domain()); 447 448 OS->indent(Indent) 449 << "Proposed a lifetime where there is an Existing write into it\n"; 450 OS->indent(Indent) << "Existing conflicting writes: " 451 << ExistingConflictingWritten << "\n"; 452 if (!ProposedConflictingKnown.is_empty()) 453 OS->indent(Indent) 454 << "Proposed conflicting known: " << ProposedConflictingKnown 455 << "\n"; 456 } 457 return true; 458 } 459 460 // Do the writes in Proposed conflict with occupied values in Existing? 461 auto ExistingAvailableDefs = 462 convertZoneToTimepoints(Existing.Unused, true, false); 463 auto ExistingKnownDefs = 464 convertZoneToTimepoints(Existing.Known, isl::dim::in, true, false); 465 466 auto ProposedWrittenDomain = Proposed.Written.domain(); 467 auto KnownIdentical = ExistingKnownDefs.intersect(Proposed.Written); 468 auto IdenticalOrUnused = 469 ExistingAvailableDefs.unite(KnownIdentical.domain()); 470 if (!ProposedWrittenDomain.is_subset(IdenticalOrUnused)) { 471 if (OS) { 472 auto Conflicting = ProposedWrittenDomain.subtract(IdenticalOrUnused); 473 auto ExistingConflictingKnown = 474 ExistingKnownDefs.intersect_domain(Conflicting); 475 auto ProposedConflictingWritten = 476 Proposed.Written.intersect_domain(Conflicting); 477 478 OS->indent(Indent) << "Proposed writes into range used by Existing\n"; 479 OS->indent(Indent) << "Proposed conflicting writes: " 480 << ProposedConflictingWritten << "\n"; 481 if (!ExistingConflictingKnown.is_empty()) 482 OS->indent(Indent) 483 << "Existing conflicting known: " << ExistingConflictingKnown 484 << "\n"; 485 } 486 return true; 487 } 488 489 // Does Proposed write at the same time as Existing already does (order of 490 // writes is undefined)? Writing the same value is permitted. 491 auto ExistingWrittenDomain = Existing.Written.domain(); 492 auto BothWritten = 493 Existing.Written.domain().intersect(Proposed.Written.domain()); 494 auto ExistingKnownWritten = filterKnownValInst(Existing.Written); 495 auto ProposedKnownWritten = filterKnownValInst(Proposed.Written); 496 auto CommonWritten = 497 ExistingKnownWritten.intersect(ProposedKnownWritten).domain(); 498 499 if (!BothWritten.is_subset(CommonWritten)) { 500 if (OS) { 501 auto Conflicting = BothWritten.subtract(CommonWritten); 502 auto ExistingConflictingWritten = 503 Existing.Written.intersect_domain(Conflicting); 504 auto ProposedConflictingWritten = 505 Proposed.Written.intersect_domain(Conflicting); 506 507 OS->indent(Indent) << "Proposed writes at the same time as an already " 508 "Existing write\n"; 509 OS->indent(Indent) << "Conflicting writes: " << Conflicting << "\n"; 510 if (!ExistingConflictingWritten.is_empty()) 511 OS->indent(Indent) 512 << "Exiting write: " << ExistingConflictingWritten << "\n"; 513 if (!ProposedConflictingWritten.is_empty()) 514 OS->indent(Indent) 515 << "Proposed write: " << ProposedConflictingWritten << "\n"; 516 } 517 return true; 518 } 519 520 return false; 521 } 522 }; 523 524 /// Implementation of the DeLICM/DePRE transformation. 525 class DeLICMImpl final : public ZoneAlgorithm { 526 private: 527 /// Knowledge before any transformation took place. 528 Knowledge OriginalZone; 529 530 /// Current knowledge of the SCoP including all already applied 531 /// transformations. 532 Knowledge Zone; 533 534 /// Number of StoreInsts something can be mapped to. 535 int NumberOfCompatibleTargets = 0; 536 537 /// The number of StoreInsts to which at least one value or PHI has been 538 /// mapped to. 539 int NumberOfTargetsMapped = 0; 540 541 /// The number of llvm::Value mapped to some array element. 542 int NumberOfMappedValueScalars = 0; 543 544 /// The number of PHIs mapped to some array element. 545 int NumberOfMappedPHIScalars = 0; 546 547 /// Determine whether two knowledges are conflicting with each other. 548 /// 549 /// @see Knowledge::isConflicting 550 bool isConflicting(const Knowledge &Proposed) { 551 raw_ostream *OS = nullptr; 552 POLLY_DEBUG(OS = &llvm::dbgs()); 553 return Knowledge::isConflicting(Zone, Proposed, OS, 4); 554 } 555 556 /// Determine whether @p SAI is a scalar that can be mapped to an array 557 /// element. 558 bool isMappable(const ScopArrayInfo *SAI) { 559 assert(SAI); 560 561 if (SAI->isValueKind()) { 562 auto *MA = S->getValueDef(SAI); 563 if (!MA) { 564 POLLY_DEBUG( 565 dbgs() 566 << " Reject because value is read-only within the scop\n"); 567 return false; 568 } 569 570 // Mapping if value is used after scop is not supported. The code 571 // generator would need to reload the scalar after the scop, but it 572 // does not have the information to where it is mapped to. Only the 573 // MemoryAccesses have that information, not the ScopArrayInfo. 574 auto Inst = MA->getAccessInstruction(); 575 for (auto User : Inst->users()) { 576 if (!isa<Instruction>(User)) 577 return false; 578 auto UserInst = cast<Instruction>(User); 579 580 if (!S->contains(UserInst)) { 581 POLLY_DEBUG(dbgs() << " Reject because value is escaping\n"); 582 return false; 583 } 584 } 585 586 return true; 587 } 588 589 if (SAI->isPHIKind()) { 590 auto *MA = S->getPHIRead(SAI); 591 assert(MA); 592 593 // Mapping of an incoming block from before the SCoP is not supported by 594 // the code generator. 595 auto PHI = cast<PHINode>(MA->getAccessInstruction()); 596 for (auto Incoming : PHI->blocks()) { 597 if (!S->contains(Incoming)) { 598 POLLY_DEBUG(dbgs() 599 << " Reject because at least one incoming block is " 600 "not in the scop region\n"); 601 return false; 602 } 603 } 604 605 return true; 606 } 607 608 POLLY_DEBUG(dbgs() << " Reject ExitPHI or other non-value\n"); 609 return false; 610 } 611 612 /// Compute the uses of a MemoryKind::Value and its lifetime (from its 613 /// definition to the last use). 614 /// 615 /// @param SAI The ScopArrayInfo representing the value's storage. 616 /// 617 /// @return { DomainDef[] -> DomainUse[] }, { DomainDef[] -> Zone[] } 618 /// First element is the set of uses for each definition. 619 /// The second is the lifetime of each definition. 620 std::tuple<isl::union_map, isl::map> 621 computeValueUses(const ScopArrayInfo *SAI) { 622 assert(SAI->isValueKind()); 623 624 // { DomainRead[] } 625 auto Reads = makeEmptyUnionSet(); 626 627 // Find all uses. 628 for (auto *MA : S->getValueUses(SAI)) 629 Reads = Reads.unite(getDomainFor(MA)); 630 631 // { DomainRead[] -> Scatter[] } 632 auto ReadSchedule = getScatterFor(Reads); 633 634 auto *DefMA = S->getValueDef(SAI); 635 assert(DefMA); 636 637 // { DomainDef[] } 638 auto Writes = getDomainFor(DefMA); 639 640 // { DomainDef[] -> Scatter[] } 641 auto WriteScatter = getScatterFor(Writes); 642 643 // { Scatter[] -> DomainDef[] } 644 auto ReachDef = getScalarReachingDefinition(DefMA->getStatement()); 645 646 // { [DomainDef[] -> Scatter[]] -> DomainUse[] } 647 auto Uses = isl::union_map(ReachDef.reverse().range_map()) 648 .apply_range(ReadSchedule.reverse()); 649 650 // { DomainDef[] -> Scatter[] } 651 auto UseScatter = 652 singleton(Uses.domain().unwrap(), 653 Writes.get_space().map_from_domain_and_range(ScatterSpace)); 654 655 // { DomainDef[] -> Zone[] } 656 auto Lifetime = betweenScatter(WriteScatter, UseScatter, false, true); 657 658 // { DomainDef[] -> DomainRead[] } 659 auto DefUses = Uses.domain_factor_domain(); 660 661 return std::make_pair(DefUses, Lifetime); 662 } 663 664 /// Try to map a MemoryKind::Value to a given array element. 665 /// 666 /// @param SAI Representation of the scalar's memory to map. 667 /// @param TargetElt { Scatter[] -> Element[] } 668 /// Suggestion where to map a scalar to when at a timepoint. 669 /// 670 /// @return true if the scalar was successfully mapped. 671 bool tryMapValue(const ScopArrayInfo *SAI, isl::map TargetElt) { 672 assert(SAI->isValueKind()); 673 674 auto *DefMA = S->getValueDef(SAI); 675 assert(DefMA->isValueKind()); 676 assert(DefMA->isMustWrite()); 677 auto *V = DefMA->getAccessValue(); 678 auto *DefInst = DefMA->getAccessInstruction(); 679 680 // Stop if the scalar has already been mapped. 681 if (!DefMA->getLatestScopArrayInfo()->isValueKind()) 682 return false; 683 684 // { DomainDef[] -> Scatter[] } 685 auto DefSched = getScatterFor(DefMA); 686 687 // Where each write is mapped to, according to the suggestion. 688 // { DomainDef[] -> Element[] } 689 auto DefTarget = TargetElt.apply_domain(DefSched.reverse()); 690 simplify(DefTarget); 691 POLLY_DEBUG(dbgs() << " Def Mapping: " << DefTarget << '\n'); 692 693 auto OrigDomain = getDomainFor(DefMA); 694 auto MappedDomain = DefTarget.domain(); 695 if (!OrigDomain.is_subset(MappedDomain)) { 696 POLLY_DEBUG( 697 dbgs() 698 << " Reject because mapping does not encompass all instances\n"); 699 return false; 700 } 701 702 // { DomainDef[] -> Zone[] } 703 isl::map Lifetime; 704 705 // { DomainDef[] -> DomainUse[] } 706 isl::union_map DefUses; 707 708 std::tie(DefUses, Lifetime) = computeValueUses(SAI); 709 POLLY_DEBUG(dbgs() << " Lifetime: " << Lifetime << '\n'); 710 711 /// { [Element[] -> Zone[]] } 712 auto EltZone = Lifetime.apply_domain(DefTarget).wrap(); 713 simplify(EltZone); 714 715 // When known knowledge is disabled, just return the unknown value. It will 716 // either get filtered out or conflict with itself. 717 // { DomainDef[] -> ValInst[] } 718 isl::map ValInst; 719 if (DelicmComputeKnown) 720 ValInst = makeValInst(V, DefMA->getStatement(), 721 LI->getLoopFor(DefInst->getParent())); 722 else 723 ValInst = makeUnknownForDomain(DefMA->getStatement()); 724 725 // { DomainDef[] -> [Element[] -> Zone[]] } 726 auto EltKnownTranslator = DefTarget.range_product(Lifetime); 727 728 // { [Element[] -> Zone[]] -> ValInst[] } 729 auto EltKnown = ValInst.apply_domain(EltKnownTranslator); 730 simplify(EltKnown); 731 732 // { DomainDef[] -> [Element[] -> Scatter[]] } 733 auto WrittenTranslator = DefTarget.range_product(DefSched); 734 735 // { [Element[] -> Scatter[]] -> ValInst[] } 736 auto DefEltSched = ValInst.apply_domain(WrittenTranslator); 737 simplify(DefEltSched); 738 739 Knowledge Proposed(EltZone, {}, filterKnownValInst(EltKnown), DefEltSched); 740 if (isConflicting(Proposed)) 741 return false; 742 743 // { DomainUse[] -> Element[] } 744 auto UseTarget = DefUses.reverse().apply_range(DefTarget); 745 746 mapValue(SAI, std::move(DefTarget), std::move(UseTarget), 747 std::move(Lifetime), std::move(Proposed)); 748 return true; 749 } 750 751 /// After a scalar has been mapped, update the global knowledge. 752 void applyLifetime(Knowledge Proposed) { 753 Zone.learnFrom(std::move(Proposed)); 754 } 755 756 /// Map a MemoryKind::Value scalar to an array element. 757 /// 758 /// Callers must have ensured that the mapping is valid and not conflicting. 759 /// 760 /// @param SAI The ScopArrayInfo representing the scalar's memory to 761 /// map. 762 /// @param DefTarget { DomainDef[] -> Element[] } 763 /// The array element to map the scalar to. 764 /// @param UseTarget { DomainUse[] -> Element[] } 765 /// The array elements the uses are mapped to. 766 /// @param Lifetime { DomainDef[] -> Zone[] } 767 /// The lifetime of each llvm::Value definition for 768 /// reporting. 769 /// @param Proposed Mapping constraints for reporting. 770 void mapValue(const ScopArrayInfo *SAI, isl::map DefTarget, 771 isl::union_map UseTarget, isl::map Lifetime, 772 Knowledge Proposed) { 773 // Redirect the read accesses. 774 for (auto *MA : S->getValueUses(SAI)) { 775 // { DomainUse[] } 776 auto Domain = getDomainFor(MA); 777 778 // { DomainUse[] -> Element[] } 779 auto NewAccRel = UseTarget.intersect_domain(Domain); 780 simplify(NewAccRel); 781 782 assert(isl_union_map_n_map(NewAccRel.get()) == 1); 783 MA->setNewAccessRelation(isl::map::from_union_map(NewAccRel)); 784 } 785 786 auto *WA = S->getValueDef(SAI); 787 WA->setNewAccessRelation(DefTarget); 788 applyLifetime(Proposed); 789 790 MappedValueScalars++; 791 NumberOfMappedValueScalars += 1; 792 } 793 794 isl::map makeValInst(Value *Val, ScopStmt *UserStmt, Loop *Scope, 795 bool IsCertain = true) { 796 // When known knowledge is disabled, just return the unknown value. It will 797 // either get filtered out or conflict with itself. 798 if (!DelicmComputeKnown) 799 return makeUnknownForDomain(UserStmt); 800 return ZoneAlgorithm::makeValInst(Val, UserStmt, Scope, IsCertain); 801 } 802 803 /// Express the incoming values of a PHI for each incoming statement in an 804 /// isl::union_map. 805 /// 806 /// @param SAI The PHI scalar represented by a ScopArrayInfo. 807 /// 808 /// @return { PHIWriteDomain[] -> ValInst[] } 809 isl::union_map determinePHIWrittenValues(const ScopArrayInfo *SAI) { 810 auto Result = makeEmptyUnionMap(); 811 812 // Collect the incoming values. 813 for (auto *MA : S->getPHIIncomings(SAI)) { 814 // { DomainWrite[] -> ValInst[] } 815 isl::union_map ValInst; 816 auto *WriteStmt = MA->getStatement(); 817 818 auto Incoming = MA->getIncoming(); 819 assert(!Incoming.empty()); 820 if (Incoming.size() == 1) { 821 ValInst = makeValInst(Incoming[0].second, WriteStmt, 822 LI->getLoopFor(Incoming[0].first)); 823 } else { 824 // If the PHI is in a subregion's exit node it can have multiple 825 // incoming values (+ maybe another incoming edge from an unrelated 826 // block). We cannot directly represent it as a single llvm::Value. 827 // We currently model it as unknown value, but modeling as the PHIInst 828 // itself could be OK, too. 829 ValInst = makeUnknownForDomain(WriteStmt); 830 } 831 832 Result = Result.unite(ValInst); 833 } 834 835 assert(Result.is_single_valued() && 836 "Cannot have multiple incoming values for same incoming statement"); 837 return Result; 838 } 839 840 /// Try to map a MemoryKind::PHI scalar to a given array element. 841 /// 842 /// @param SAI Representation of the scalar's memory to map. 843 /// @param TargetElt { Scatter[] -> Element[] } 844 /// Suggestion where to map the scalar to when at a 845 /// timepoint. 846 /// 847 /// @return true if the PHI scalar has been mapped. 848 bool tryMapPHI(const ScopArrayInfo *SAI, isl::map TargetElt) { 849 auto *PHIRead = S->getPHIRead(SAI); 850 assert(PHIRead->isPHIKind()); 851 assert(PHIRead->isRead()); 852 853 // Skip if already been mapped. 854 if (!PHIRead->getLatestScopArrayInfo()->isPHIKind()) 855 return false; 856 857 // { DomainRead[] -> Scatter[] } 858 auto PHISched = getScatterFor(PHIRead); 859 860 // { DomainRead[] -> Element[] } 861 auto PHITarget = PHISched.apply_range(TargetElt); 862 simplify(PHITarget); 863 POLLY_DEBUG(dbgs() << " Mapping: " << PHITarget << '\n'); 864 865 auto OrigDomain = getDomainFor(PHIRead); 866 auto MappedDomain = PHITarget.domain(); 867 if (!OrigDomain.is_subset(MappedDomain)) { 868 POLLY_DEBUG( 869 dbgs() 870 << " Reject because mapping does not encompass all instances\n"); 871 return false; 872 } 873 874 // { DomainRead[] -> DomainWrite[] } 875 auto PerPHIWrites = computePerPHI(SAI); 876 if (PerPHIWrites.is_null()) { 877 POLLY_DEBUG( 878 dbgs() << " Reject because cannot determine incoming values\n"); 879 return false; 880 } 881 882 // { DomainWrite[] -> Element[] } 883 auto WritesTarget = PerPHIWrites.apply_domain(PHITarget).reverse(); 884 simplify(WritesTarget); 885 886 // { DomainWrite[] } 887 auto UniverseWritesDom = isl::union_set::empty(ParamSpace.ctx()); 888 889 for (auto *MA : S->getPHIIncomings(SAI)) 890 UniverseWritesDom = UniverseWritesDom.unite(getDomainFor(MA)); 891 892 auto RelevantWritesTarget = WritesTarget; 893 if (DelicmOverapproximateWrites) 894 WritesTarget = expandMapping(WritesTarget, UniverseWritesDom); 895 896 auto ExpandedWritesDom = WritesTarget.domain(); 897 if (!DelicmPartialWrites && 898 !UniverseWritesDom.is_subset(ExpandedWritesDom)) { 899 POLLY_DEBUG( 900 dbgs() << " Reject because did not find PHI write mapping for " 901 "all instances\n"); 902 if (DelicmOverapproximateWrites) 903 POLLY_DEBUG(dbgs() << " Relevant Mapping: " 904 << RelevantWritesTarget << '\n'); 905 POLLY_DEBUG(dbgs() << " Deduced Mapping: " << WritesTarget 906 << '\n'); 907 POLLY_DEBUG(dbgs() << " Missing instances: " 908 << UniverseWritesDom.subtract(ExpandedWritesDom) 909 << '\n'); 910 return false; 911 } 912 913 // { DomainRead[] -> Scatter[] } 914 isl::union_map PerPHIWriteScatterUmap = PerPHIWrites.apply_range(Schedule); 915 isl::map PerPHIWriteScatter = 916 singleton(PerPHIWriteScatterUmap, PHISched.get_space()); 917 918 // { DomainRead[] -> Zone[] } 919 auto Lifetime = betweenScatter(PerPHIWriteScatter, PHISched, false, true); 920 simplify(Lifetime); 921 POLLY_DEBUG(dbgs() << " Lifetime: " << Lifetime << "\n"); 922 923 // { DomainWrite[] -> Zone[] } 924 auto WriteLifetime = isl::union_map(Lifetime).apply_domain(PerPHIWrites); 925 926 // { DomainWrite[] -> ValInst[] } 927 auto WrittenValue = determinePHIWrittenValues(SAI); 928 929 // { DomainWrite[] -> [Element[] -> Scatter[]] } 930 auto WrittenTranslator = WritesTarget.range_product(Schedule); 931 932 // { [Element[] -> Scatter[]] -> ValInst[] } 933 auto Written = WrittenValue.apply_domain(WrittenTranslator); 934 simplify(Written); 935 936 // { DomainWrite[] -> [Element[] -> Zone[]] } 937 auto LifetimeTranslator = WritesTarget.range_product(WriteLifetime); 938 939 // { DomainWrite[] -> ValInst[] } 940 auto WrittenKnownValue = filterKnownValInst(WrittenValue); 941 942 // { [Element[] -> Zone[]] -> ValInst[] } 943 auto EltLifetimeInst = WrittenKnownValue.apply_domain(LifetimeTranslator); 944 simplify(EltLifetimeInst); 945 946 // { [Element[] -> Zone[] } 947 auto Occupied = LifetimeTranslator.range(); 948 simplify(Occupied); 949 950 Knowledge Proposed(Occupied, {}, EltLifetimeInst, Written); 951 if (isConflicting(Proposed)) 952 return false; 953 954 mapPHI(SAI, std::move(PHITarget), std::move(WritesTarget), 955 std::move(Lifetime), std::move(Proposed)); 956 return true; 957 } 958 959 /// Map a MemoryKind::PHI scalar to an array element. 960 /// 961 /// Callers must have ensured that the mapping is valid and not conflicting 962 /// with the common knowledge. 963 /// 964 /// @param SAI The ScopArrayInfo representing the scalar's memory to 965 /// map. 966 /// @param ReadTarget { DomainRead[] -> Element[] } 967 /// The array element to map the scalar to. 968 /// @param WriteTarget { DomainWrite[] -> Element[] } 969 /// New access target for each PHI incoming write. 970 /// @param Lifetime { DomainRead[] -> Zone[] } 971 /// The lifetime of each PHI for reporting. 972 /// @param Proposed Mapping constraints for reporting. 973 void mapPHI(const ScopArrayInfo *SAI, isl::map ReadTarget, 974 isl::union_map WriteTarget, isl::map Lifetime, 975 Knowledge Proposed) { 976 // { Element[] } 977 isl::space ElementSpace = ReadTarget.get_space().range(); 978 979 // Redirect the PHI incoming writes. 980 for (auto *MA : S->getPHIIncomings(SAI)) { 981 // { DomainWrite[] } 982 auto Domain = getDomainFor(MA); 983 984 // { DomainWrite[] -> Element[] } 985 auto NewAccRel = WriteTarget.intersect_domain(Domain); 986 simplify(NewAccRel); 987 988 isl::space NewAccRelSpace = 989 Domain.get_space().map_from_domain_and_range(ElementSpace); 990 isl::map NewAccRelMap = singleton(NewAccRel, NewAccRelSpace); 991 MA->setNewAccessRelation(NewAccRelMap); 992 } 993 994 // Redirect the PHI read. 995 auto *PHIRead = S->getPHIRead(SAI); 996 PHIRead->setNewAccessRelation(ReadTarget); 997 applyLifetime(Proposed); 998 999 MappedPHIScalars++; 1000 NumberOfMappedPHIScalars++; 1001 } 1002 1003 /// Search and map scalars to memory overwritten by @p TargetStoreMA. 1004 /// 1005 /// Start trying to map scalars that are used in the same statement as the 1006 /// store. For every successful mapping, try to also map scalars of the 1007 /// statements where those are written. Repeat, until no more mapping 1008 /// opportunity is found. 1009 /// 1010 /// There is currently no preference in which order scalars are tried. 1011 /// Ideally, we would direct it towards a load instruction of the same array 1012 /// element. 1013 bool collapseScalarsToStore(MemoryAccess *TargetStoreMA) { 1014 assert(TargetStoreMA->isLatestArrayKind()); 1015 assert(TargetStoreMA->isMustWrite()); 1016 1017 auto TargetStmt = TargetStoreMA->getStatement(); 1018 1019 // { DomTarget[] } 1020 auto TargetDom = getDomainFor(TargetStmt); 1021 1022 // { DomTarget[] -> Element[] } 1023 auto TargetAccRel = getAccessRelationFor(TargetStoreMA); 1024 1025 // { Zone[] -> DomTarget[] } 1026 // For each point in time, find the next target store instance. 1027 auto Target = 1028 computeScalarReachingOverwrite(Schedule, TargetDom, false, true); 1029 1030 // { Zone[] -> Element[] } 1031 // Use the target store's write location as a suggestion to map scalars to. 1032 auto EltTarget = Target.apply_range(TargetAccRel); 1033 simplify(EltTarget); 1034 POLLY_DEBUG(dbgs() << " Target mapping is " << EltTarget << '\n'); 1035 1036 // Stack of elements not yet processed. 1037 SmallVector<MemoryAccess *, 16> Worklist; 1038 1039 // Set of scalars already tested. 1040 SmallPtrSet<const ScopArrayInfo *, 16> Closed; 1041 1042 // Lambda to add all scalar reads to the work list. 1043 auto ProcessAllIncoming = [&](ScopStmt *Stmt) { 1044 for (auto *MA : *Stmt) { 1045 if (!MA->isLatestScalarKind()) 1046 continue; 1047 if (!MA->isRead()) 1048 continue; 1049 1050 Worklist.push_back(MA); 1051 } 1052 }; 1053 1054 auto *WrittenVal = TargetStoreMA->getAccessInstruction()->getOperand(0); 1055 if (auto *WrittenValInputMA = TargetStmt->lookupInputAccessOf(WrittenVal)) 1056 Worklist.push_back(WrittenValInputMA); 1057 else 1058 ProcessAllIncoming(TargetStmt); 1059 1060 auto AnyMapped = false; 1061 auto &DL = S->getRegion().getEntry()->getModule()->getDataLayout(); 1062 auto StoreSize = 1063 DL.getTypeAllocSize(TargetStoreMA->getAccessValue()->getType()); 1064 1065 while (!Worklist.empty()) { 1066 auto *MA = Worklist.pop_back_val(); 1067 1068 auto *SAI = MA->getScopArrayInfo(); 1069 if (Closed.count(SAI)) 1070 continue; 1071 Closed.insert(SAI); 1072 POLLY_DEBUG(dbgs() << "\n Trying to map " << MA << " (SAI: " << SAI 1073 << ")\n"); 1074 1075 // Skip non-mappable scalars. 1076 if (!isMappable(SAI)) 1077 continue; 1078 1079 auto MASize = DL.getTypeAllocSize(MA->getAccessValue()->getType()); 1080 if (MASize > StoreSize) { 1081 POLLY_DEBUG( 1082 dbgs() << " Reject because storage size is insufficient\n"); 1083 continue; 1084 } 1085 1086 // Try to map MemoryKind::Value scalars. 1087 if (SAI->isValueKind()) { 1088 if (!tryMapValue(SAI, EltTarget)) 1089 continue; 1090 1091 auto *DefAcc = S->getValueDef(SAI); 1092 ProcessAllIncoming(DefAcc->getStatement()); 1093 1094 AnyMapped = true; 1095 continue; 1096 } 1097 1098 // Try to map MemoryKind::PHI scalars. 1099 if (SAI->isPHIKind()) { 1100 if (!tryMapPHI(SAI, EltTarget)) 1101 continue; 1102 // Add inputs of all incoming statements to the worklist. Prefer the 1103 // input accesses of the incoming blocks. 1104 for (auto *PHIWrite : S->getPHIIncomings(SAI)) { 1105 auto *PHIWriteStmt = PHIWrite->getStatement(); 1106 bool FoundAny = false; 1107 for (auto Incoming : PHIWrite->getIncoming()) { 1108 auto *IncomingInputMA = 1109 PHIWriteStmt->lookupInputAccessOf(Incoming.second); 1110 if (!IncomingInputMA) 1111 continue; 1112 1113 Worklist.push_back(IncomingInputMA); 1114 FoundAny = true; 1115 } 1116 1117 if (!FoundAny) 1118 ProcessAllIncoming(PHIWrite->getStatement()); 1119 } 1120 1121 AnyMapped = true; 1122 continue; 1123 } 1124 } 1125 1126 if (AnyMapped) { 1127 TargetsMapped++; 1128 NumberOfTargetsMapped++; 1129 } 1130 return AnyMapped; 1131 } 1132 1133 /// Compute when an array element is unused. 1134 /// 1135 /// @return { [Element[] -> Zone[]] } 1136 isl::union_set computeLifetime() const { 1137 // { Element[] -> Zone[] } 1138 auto ArrayUnused = computeArrayUnused(Schedule, AllMustWrites, AllReads, 1139 false, false, true); 1140 1141 auto Result = ArrayUnused.wrap(); 1142 1143 simplify(Result); 1144 return Result; 1145 } 1146 1147 /// Determine when an array element is written to, and which value instance is 1148 /// written. 1149 /// 1150 /// @return { [Element[] -> Scatter[]] -> ValInst[] } 1151 isl::union_map computeWritten() const { 1152 // { [Element[] -> Scatter[]] -> ValInst[] } 1153 auto EltWritten = applyDomainRange(AllWriteValInst, Schedule); 1154 1155 simplify(EltWritten); 1156 return EltWritten; 1157 } 1158 1159 /// Determine whether an access touches at most one element. 1160 /// 1161 /// The accessed element could be a scalar or accessing an array with constant 1162 /// subscript, such that all instances access only that element. 1163 /// 1164 /// @param MA The access to test. 1165 /// 1166 /// @return True, if zero or one elements are accessed; False if at least two 1167 /// different elements are accessed. 1168 bool isScalarAccess(MemoryAccess *MA) { 1169 auto Map = getAccessRelationFor(MA); 1170 auto Set = Map.range(); 1171 return Set.is_singleton(); 1172 } 1173 1174 /// Print mapping statistics to @p OS. 1175 void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const { 1176 OS.indent(Indent) << "Statistics {\n"; 1177 OS.indent(Indent + 4) << "Compatible overwrites: " 1178 << NumberOfCompatibleTargets << "\n"; 1179 OS.indent(Indent + 4) << "Overwrites mapped to: " << NumberOfTargetsMapped 1180 << '\n'; 1181 OS.indent(Indent + 4) << "Value scalars mapped: " 1182 << NumberOfMappedValueScalars << '\n'; 1183 OS.indent(Indent + 4) << "PHI scalars mapped: " 1184 << NumberOfMappedPHIScalars << '\n'; 1185 OS.indent(Indent) << "}\n"; 1186 } 1187 1188 public: 1189 DeLICMImpl(Scop *S, LoopInfo *LI) : ZoneAlgorithm("polly-delicm", S, LI) {} 1190 1191 /// Calculate the lifetime (definition to last use) of every array element. 1192 /// 1193 /// @return True if the computed lifetimes (#Zone) is usable. 1194 bool computeZone() { 1195 // Check that nothing strange occurs. 1196 collectCompatibleElts(); 1197 1198 isl::union_set EltUnused; 1199 isl::union_map EltKnown, EltWritten; 1200 1201 { 1202 IslMaxOperationsGuard MaxOpGuard(IslCtx.get(), DelicmMaxOps); 1203 1204 computeCommon(); 1205 1206 EltUnused = computeLifetime(); 1207 EltKnown = computeKnown(true, false); 1208 EltWritten = computeWritten(); 1209 } 1210 DeLICMAnalyzed++; 1211 1212 if (EltUnused.is_null() || EltKnown.is_null() || EltWritten.is_null()) { 1213 assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota && 1214 "The only reason that these things have not been computed should " 1215 "be if the max-operations limit hit"); 1216 DeLICMOutOfQuota++; 1217 POLLY_DEBUG(dbgs() << "DeLICM analysis exceeded max_operations\n"); 1218 DebugLoc Begin, End; 1219 getDebugLocations(getBBPairForRegion(&S->getRegion()), Begin, End); 1220 OptimizationRemarkAnalysis R(DEBUG_TYPE, "OutOfQuota", Begin, 1221 S->getEntry()); 1222 R << "maximal number of operations exceeded during zone analysis"; 1223 S->getFunction().getContext().diagnose(R); 1224 return false; 1225 } 1226 1227 Zone = OriginalZone = Knowledge({}, EltUnused, EltKnown, EltWritten); 1228 POLLY_DEBUG(dbgs() << "Computed Zone:\n"; OriginalZone.print(dbgs(), 4)); 1229 1230 assert(Zone.isUsable() && OriginalZone.isUsable()); 1231 return true; 1232 } 1233 1234 /// Try to map as many scalars to unused array elements as possible. 1235 /// 1236 /// Multiple scalars might be mappable to intersecting unused array element 1237 /// zones, but we can only chose one. This is a greedy algorithm, therefore 1238 /// the first processed element claims it. 1239 void greedyCollapse() { 1240 bool Modified = false; 1241 1242 for (auto &Stmt : *S) { 1243 for (auto *MA : Stmt) { 1244 if (!MA->isLatestArrayKind()) 1245 continue; 1246 if (!MA->isWrite()) 1247 continue; 1248 1249 if (MA->isMayWrite()) { 1250 POLLY_DEBUG(dbgs() << "Access " << MA 1251 << " pruned because it is a MAY_WRITE\n"); 1252 OptimizationRemarkMissed R(DEBUG_TYPE, "TargetMayWrite", 1253 MA->getAccessInstruction()); 1254 R << "Skipped possible mapping target because it is not an " 1255 "unconditional overwrite"; 1256 S->getFunction().getContext().diagnose(R); 1257 continue; 1258 } 1259 1260 if (Stmt.getNumIterators() == 0) { 1261 POLLY_DEBUG(dbgs() << "Access " << MA 1262 << " pruned because it is not in a loop\n"); 1263 OptimizationRemarkMissed R(DEBUG_TYPE, "WriteNotInLoop", 1264 MA->getAccessInstruction()); 1265 R << "skipped possible mapping target because it is not in a loop"; 1266 S->getFunction().getContext().diagnose(R); 1267 continue; 1268 } 1269 1270 if (isScalarAccess(MA)) { 1271 POLLY_DEBUG(dbgs() 1272 << "Access " << MA 1273 << " pruned because it writes only a single element\n"); 1274 OptimizationRemarkMissed R(DEBUG_TYPE, "ScalarWrite", 1275 MA->getAccessInstruction()); 1276 R << "skipped possible mapping target because the memory location " 1277 "written to does not depend on its outer loop"; 1278 S->getFunction().getContext().diagnose(R); 1279 continue; 1280 } 1281 1282 if (!isa<StoreInst>(MA->getAccessInstruction())) { 1283 POLLY_DEBUG(dbgs() << "Access " << MA 1284 << " pruned because it is not a StoreInst\n"); 1285 OptimizationRemarkMissed R(DEBUG_TYPE, "NotAStore", 1286 MA->getAccessInstruction()); 1287 R << "skipped possible mapping target because non-store instructions " 1288 "are not supported"; 1289 S->getFunction().getContext().diagnose(R); 1290 continue; 1291 } 1292 1293 // Check for more than one element access per statement instance. 1294 // Currently we expect write accesses to be functional, eg. disallow 1295 // 1296 // { Stmt[0] -> [i] : 0 <= i < 2 } 1297 // 1298 // This may occur when some accesses to the element write/read only 1299 // parts of the element, eg. a single byte. Polly then divides each 1300 // element into subelements of the smallest access length, normal access 1301 // then touch multiple of such subelements. It is very common when the 1302 // array is accesses with memset, memcpy or memmove which take i8* 1303 // arguments. 1304 isl::union_map AccRel = MA->getLatestAccessRelation(); 1305 if (!AccRel.is_single_valued().is_true()) { 1306 POLLY_DEBUG(dbgs() << "Access " << MA 1307 << " is incompatible because it writes multiple " 1308 "elements per instance\n"); 1309 OptimizationRemarkMissed R(DEBUG_TYPE, "NonFunctionalAccRel", 1310 MA->getAccessInstruction()); 1311 R << "skipped possible mapping target because it writes more than " 1312 "one element"; 1313 S->getFunction().getContext().diagnose(R); 1314 continue; 1315 } 1316 1317 isl::union_set TouchedElts = AccRel.range(); 1318 if (!TouchedElts.is_subset(CompatibleElts)) { 1319 POLLY_DEBUG( 1320 dbgs() 1321 << "Access " << MA 1322 << " is incompatible because it touches incompatible elements\n"); 1323 OptimizationRemarkMissed R(DEBUG_TYPE, "IncompatibleElts", 1324 MA->getAccessInstruction()); 1325 R << "skipped possible mapping target because a target location " 1326 "cannot be reliably analyzed"; 1327 S->getFunction().getContext().diagnose(R); 1328 continue; 1329 } 1330 1331 assert(isCompatibleAccess(MA)); 1332 NumberOfCompatibleTargets++; 1333 POLLY_DEBUG(dbgs() << "Analyzing target access " << MA << "\n"); 1334 if (collapseScalarsToStore(MA)) 1335 Modified = true; 1336 } 1337 } 1338 1339 if (Modified) 1340 DeLICMScopsModified++; 1341 } 1342 1343 /// Dump the internal information about a performed DeLICM to @p OS. 1344 void print(llvm::raw_ostream &OS, int Indent = 0) { 1345 if (!Zone.isUsable()) { 1346 OS.indent(Indent) << "Zone not computed\n"; 1347 return; 1348 } 1349 1350 printStatistics(OS, Indent); 1351 if (!isModified()) { 1352 OS.indent(Indent) << "No modification has been made\n"; 1353 return; 1354 } 1355 printAccesses(OS, Indent); 1356 } 1357 1358 /// Return whether at least one transformation been applied. 1359 bool isModified() const { return NumberOfTargetsMapped > 0; } 1360 }; 1361 1362 static std::unique_ptr<DeLICMImpl> collapseToUnused(Scop &S, LoopInfo &LI) { 1363 std::unique_ptr<DeLICMImpl> Impl = std::make_unique<DeLICMImpl>(&S, &LI); 1364 1365 if (!Impl->computeZone()) { 1366 POLLY_DEBUG(dbgs() << "Abort because cannot reliably compute lifetimes\n"); 1367 return Impl; 1368 } 1369 1370 POLLY_DEBUG(dbgs() << "Collapsing scalars to unused array elements...\n"); 1371 Impl->greedyCollapse(); 1372 1373 POLLY_DEBUG(dbgs() << "\nFinal Scop:\n"); 1374 POLLY_DEBUG(dbgs() << S); 1375 1376 return Impl; 1377 } 1378 1379 static std::unique_ptr<DeLICMImpl> runDeLICM(Scop &S, LoopInfo &LI) { 1380 std::unique_ptr<DeLICMImpl> Impl = collapseToUnused(S, LI); 1381 1382 Scop::ScopStatistics ScopStats = S.getStatistics(); 1383 NumValueWrites += ScopStats.NumValueWrites; 1384 NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; 1385 NumPHIWrites += ScopStats.NumPHIWrites; 1386 NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; 1387 NumSingletonWrites += ScopStats.NumSingletonWrites; 1388 NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; 1389 1390 return Impl; 1391 } 1392 1393 static PreservedAnalyses runDeLICMUsingNPM(Scop &S, ScopAnalysisManager &SAM, 1394 ScopStandardAnalysisResults &SAR, 1395 SPMUpdater &U, raw_ostream *OS) { 1396 LoopInfo &LI = SAR.LI; 1397 std::unique_ptr<DeLICMImpl> Impl = runDeLICM(S, LI); 1398 1399 if (OS) { 1400 *OS << "Printing analysis 'Polly - DeLICM/DePRE' for region: '" 1401 << S.getName() << "' in function '" << S.getFunction().getName() 1402 << "':\n"; 1403 if (Impl) { 1404 assert(Impl->getScop() == &S); 1405 1406 *OS << "DeLICM result:\n"; 1407 Impl->print(*OS); 1408 } 1409 } 1410 1411 if (!Impl->isModified()) 1412 return PreservedAnalyses::all(); 1413 1414 PreservedAnalyses PA; 1415 PA.preserveSet<AllAnalysesOn<Module>>(); 1416 PA.preserveSet<AllAnalysesOn<Function>>(); 1417 PA.preserveSet<AllAnalysesOn<Loop>>(); 1418 return PA; 1419 } 1420 1421 class DeLICMWrapperPass final : public ScopPass { 1422 private: 1423 DeLICMWrapperPass(const DeLICMWrapperPass &) = delete; 1424 const DeLICMWrapperPass &operator=(const DeLICMWrapperPass &) = delete; 1425 1426 /// The pass implementation, also holding per-scop data. 1427 std::unique_ptr<DeLICMImpl> Impl; 1428 1429 public: 1430 static char ID; 1431 explicit DeLICMWrapperPass() : ScopPass(ID) {} 1432 1433 void getAnalysisUsage(AnalysisUsage &AU) const override { 1434 AU.addRequiredTransitive<ScopInfoRegionPass>(); 1435 AU.addRequired<LoopInfoWrapperPass>(); 1436 AU.setPreservesAll(); 1437 } 1438 1439 bool runOnScop(Scop &S) override { 1440 // Free resources for previous scop's computation, if not yet done. 1441 releaseMemory(); 1442 1443 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1444 Impl = runDeLICM(S, LI); 1445 1446 return Impl->isModified(); 1447 } 1448 1449 void printScop(raw_ostream &OS, Scop &S) const override { 1450 if (!Impl) 1451 return; 1452 assert(Impl->getScop() == &S); 1453 1454 OS << "DeLICM result:\n"; 1455 Impl->print(OS); 1456 } 1457 1458 void releaseMemory() override { Impl.reset(); } 1459 }; 1460 1461 char DeLICMWrapperPass::ID; 1462 1463 /// Print result from DeLICMWrapperPass. 1464 class DeLICMPrinterLegacyPass final : public ScopPass { 1465 public: 1466 static char ID; 1467 1468 DeLICMPrinterLegacyPass() : DeLICMPrinterLegacyPass(outs()) {} 1469 explicit DeLICMPrinterLegacyPass(llvm::raw_ostream &OS) 1470 : ScopPass(ID), OS(OS) {} 1471 1472 bool runOnScop(Scop &S) override { 1473 DeLICMWrapperPass &P = getAnalysis<DeLICMWrapperPass>(); 1474 1475 OS << "Printing analysis '" << P.getPassName() << "' for region: '" 1476 << S.getRegion().getNameStr() << "' in function '" 1477 << S.getFunction().getName() << "':\n"; 1478 P.printScop(OS, S); 1479 1480 return false; 1481 } 1482 1483 void getAnalysisUsage(AnalysisUsage &AU) const override { 1484 ScopPass::getAnalysisUsage(AU); 1485 AU.addRequired<DeLICMWrapperPass>(); 1486 AU.setPreservesAll(); 1487 } 1488 1489 private: 1490 llvm::raw_ostream &OS; 1491 }; 1492 1493 char DeLICMPrinterLegacyPass::ID = 0; 1494 } // anonymous namespace 1495 1496 Pass *polly::createDeLICMWrapperPass() { return new DeLICMWrapperPass(); } 1497 1498 llvm::Pass *polly::createDeLICMPrinterLegacyPass(llvm::raw_ostream &OS) { 1499 return new DeLICMPrinterLegacyPass(OS); 1500 } 1501 1502 llvm::PreservedAnalyses polly::DeLICMPass::run(Scop &S, 1503 ScopAnalysisManager &SAM, 1504 ScopStandardAnalysisResults &SAR, 1505 SPMUpdater &U) { 1506 return runDeLICMUsingNPM(S, SAM, SAR, U, nullptr); 1507 } 1508 1509 llvm::PreservedAnalyses DeLICMPrinterPass::run(Scop &S, 1510 ScopAnalysisManager &SAM, 1511 ScopStandardAnalysisResults &SAR, 1512 SPMUpdater &U) { 1513 return runDeLICMUsingNPM(S, SAM, SAR, U, &OS); 1514 } 1515 1516 bool polly::isConflicting( 1517 isl::union_set ExistingOccupied, isl::union_set ExistingUnused, 1518 isl::union_map ExistingKnown, isl::union_map ExistingWrites, 1519 isl::union_set ProposedOccupied, isl::union_set ProposedUnused, 1520 isl::union_map ProposedKnown, isl::union_map ProposedWrites, 1521 llvm::raw_ostream *OS, unsigned Indent) { 1522 Knowledge Existing(std::move(ExistingOccupied), std::move(ExistingUnused), 1523 std::move(ExistingKnown), std::move(ExistingWrites)); 1524 Knowledge Proposed(std::move(ProposedOccupied), std::move(ProposedUnused), 1525 std::move(ProposedKnown), std::move(ProposedWrites)); 1526 1527 return Knowledge::isConflicting(Existing, Proposed, OS, Indent); 1528 } 1529 1530 INITIALIZE_PASS_BEGIN(DeLICMWrapperPass, "polly-delicm", "Polly - DeLICM/DePRE", 1531 false, false) 1532 INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass) 1533 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 1534 INITIALIZE_PASS_END(DeLICMWrapperPass, "polly-delicm", "Polly - DeLICM/DePRE", 1535 false, false) 1536 1537 INITIALIZE_PASS_BEGIN(DeLICMPrinterLegacyPass, "polly-print-delicm", 1538 "Polly - Print DeLICM/DePRE", false, false) 1539 INITIALIZE_PASS_DEPENDENCY(ScopInfoWrapperPass) 1540 INITIALIZE_PASS_END(DeLICMPrinterLegacyPass, "polly-print-delicm", 1541 "Polly - Print DeLICM/DePRE", false, false) 1542