1 //===- CodeGeneration.cpp - Code generate the Scops using ISL. ---------======// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The CodeGeneration pass takes a Scop created by ScopInfo and translates it 10 // back to LLVM-IR using the ISL code generator. 11 // 12 // The Scop describes the high level memory behavior of a control flow region. 13 // Transformation passes can update the schedule (execution order) of statements 14 // in the Scop. ISL is used to generate an abstract syntax tree that reflects 15 // the updated execution order. This clast is used to create new LLVM-IR that is 16 // computationally equivalent to the original control flow region, but executes 17 // its code in the new execution order defined by the changed schedule. 18 // 19 //===----------------------------------------------------------------------===// 20 21 #include "polly/CodeGen/CodeGeneration.h" 22 #include "polly/CodeGen/IRBuilder.h" 23 #include "polly/CodeGen/IslAst.h" 24 #include "polly/CodeGen/IslNodeBuilder.h" 25 #include "polly/CodeGen/PerfMonitor.h" 26 #include "polly/CodeGen/Utils.h" 27 #include "polly/DependenceInfo.h" 28 #include "polly/LinkAllPasses.h" 29 #include "polly/Options.h" 30 #include "polly/ScopInfo.h" 31 #include "polly/Support/ScopHelper.h" 32 #include "llvm/ADT/Statistic.h" 33 #include "llvm/Analysis/LoopInfo.h" 34 #include "llvm/Analysis/RegionInfo.h" 35 #include "llvm/IR/BasicBlock.h" 36 #include "llvm/IR/Dominators.h" 37 #include "llvm/IR/Function.h" 38 #include "llvm/IR/PassManager.h" 39 #include "llvm/IR/Verifier.h" 40 #include "llvm/InitializePasses.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/ErrorHandling.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include "llvm/Transforms/Utils/LoopUtils.h" 45 #include "isl/ast.h" 46 #include <cassert> 47 48 using namespace llvm; 49 using namespace polly; 50 51 #include "polly/Support/PollyDebug.h" 52 #define DEBUG_TYPE "polly-codegen" 53 54 static cl::opt<bool> Verify("polly-codegen-verify", 55 cl::desc("Verify the function generated by Polly"), 56 cl::Hidden, cl::cat(PollyCategory)); 57 58 bool polly::PerfMonitoring; 59 60 static cl::opt<bool, true> 61 XPerfMonitoring("polly-codegen-perf-monitoring", 62 cl::desc("Add run-time performance monitoring"), cl::Hidden, 63 cl::location(polly::PerfMonitoring), 64 cl::cat(PollyCategory)); 65 66 STATISTIC(ScopsProcessed, "Number of SCoP processed"); 67 STATISTIC(CodegenedScops, "Number of successfully generated SCoPs"); 68 STATISTIC(CodegenedAffineLoops, 69 "Number of original affine loops in SCoPs that have been generated"); 70 STATISTIC(CodegenedBoxedLoops, 71 "Number of original boxed loops in SCoPs that have been generated"); 72 73 namespace polly { 74 75 /// Mark a basic block unreachable. 76 /// 77 /// Marks the basic block @p Block unreachable by equipping it with an 78 /// UnreachableInst. 79 void markBlockUnreachable(BasicBlock &Block, PollyIRBuilder &Builder) { 80 auto *OrigTerminator = Block.getTerminator(); 81 Builder.SetInsertPoint(OrigTerminator); 82 Builder.CreateUnreachable(); 83 OrigTerminator->eraseFromParent(); 84 } 85 } // namespace polly 86 87 static void verifyGeneratedFunction(Scop &S, Function &F, IslAstInfo &AI) { 88 if (!Verify || !verifyFunction(F, &errs())) 89 return; 90 91 POLLY_DEBUG({ 92 errs() << "== ISL Codegen created an invalid function ==\n\n== The " 93 "SCoP ==\n"; 94 errs() << S; 95 errs() << "\n== The isl AST ==\n"; 96 AI.print(errs()); 97 errs() << "\n== The invalid function ==\n"; 98 F.print(errs()); 99 }); 100 101 llvm_unreachable("Polly generated function could not be verified. Add " 102 "-polly-codegen-verify=false to disable this assertion."); 103 } 104 105 // CodeGeneration adds a lot of BBs without updating the RegionInfo 106 // We make all created BBs belong to the scop's parent region without any 107 // nested structure to keep the RegionInfo verifier happy. 108 static void fixRegionInfo(Function &F, Region &ParentRegion, RegionInfo &RI) { 109 for (BasicBlock &BB : F) { 110 if (RI.getRegionFor(&BB)) 111 continue; 112 113 RI.setRegionFor(&BB, &ParentRegion); 114 } 115 } 116 117 /// Remove all lifetime markers (llvm.lifetime.start, llvm.lifetime.end) from 118 /// @R. 119 /// 120 /// CodeGeneration does not copy lifetime markers into the optimized SCoP, 121 /// which would leave the them only in the original path. This can transform 122 /// code such as 123 /// 124 /// llvm.lifetime.start(%p) 125 /// llvm.lifetime.end(%p) 126 /// 127 /// into 128 /// 129 /// if (RTC) { 130 /// // generated code 131 /// } else { 132 /// // original code 133 /// llvm.lifetime.start(%p) 134 /// } 135 /// llvm.lifetime.end(%p) 136 /// 137 /// The current StackColoring algorithm cannot handle if some, but not all, 138 /// paths from the end marker to the entry block cross the start marker. Same 139 /// for start markers that do not always cross the end markers. We avoid any 140 /// issues by removing all lifetime markers, even from the original code. 141 /// 142 /// A better solution could be to hoist all llvm.lifetime.start to the split 143 /// node and all llvm.lifetime.end to the merge node, which should be 144 /// conservatively correct. 145 static void removeLifetimeMarkers(Region *R) { 146 for (auto *BB : R->blocks()) { 147 auto InstIt = BB->begin(); 148 auto InstEnd = BB->end(); 149 150 while (InstIt != InstEnd) { 151 auto NextIt = InstIt; 152 ++NextIt; 153 154 if (auto *IT = dyn_cast<IntrinsicInst>(&*InstIt)) { 155 switch (IT->getIntrinsicID()) { 156 case Intrinsic::lifetime_start: 157 case Intrinsic::lifetime_end: 158 IT->eraseFromParent(); 159 break; 160 default: 161 break; 162 } 163 } 164 165 InstIt = NextIt; 166 } 167 } 168 } 169 170 static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI, 171 DominatorTree &DT, ScalarEvolution &SE, 172 RegionInfo &RI) { 173 // Check whether IslAstInfo uses the same isl_ctx. Since -polly-codegen 174 // reports itself to preserve DependenceInfo and IslAstInfo, we might get 175 // those analysis that were computed by a different ScopInfo for a different 176 // Scop structure. When the ScopInfo/Scop object is freed, there is a high 177 // probability that the new ScopInfo/Scop object will be created at the same 178 // heap position with the same address. Comparing whether the Scop or ScopInfo 179 // address is the expected therefore is unreliable. 180 // Instead, we compare the address of the isl_ctx object. Both, DependenceInfo 181 // and IslAstInfo must hold a reference to the isl_ctx object to ensure it is 182 // not freed before the destruction of those analyses which might happen after 183 // the destruction of the Scop/ScopInfo they refer to. Hence, the isl_ctx 184 // will not be freed and its space not reused as long there is a 185 // DependenceInfo or IslAstInfo around. 186 IslAst &Ast = AI.getIslAst(); 187 if (Ast.getSharedIslCtx() != S.getSharedIslCtx()) { 188 POLLY_DEBUG(dbgs() << "Got an IstAst for a different Scop/isl_ctx\n"); 189 return false; 190 } 191 192 // Check if we created an isl_ast root node, otherwise exit. 193 isl::ast_node AstRoot = Ast.getAst(); 194 if (AstRoot.is_null()) 195 return false; 196 197 // Collect statistics. Do it before we modify the IR to avoid having it any 198 // influence on the result. 199 auto ScopStats = S.getStatistics(); 200 ScopsProcessed++; 201 202 auto &DL = S.getFunction().getDataLayout(); 203 Region *R = &S.getRegion(); 204 assert(!R->isTopLevelRegion() && "Top level regions are not supported"); 205 206 ScopAnnotator Annotator; 207 208 simplifyRegion(R, &DT, &LI, &RI); 209 assert(R->isSimple()); 210 BasicBlock *EnteringBB = S.getEnteringBlock(); 211 assert(EnteringBB); 212 PollyIRBuilder Builder(EnteringBB->getContext(), ConstantFolder(), 213 IRInserter(Annotator)); 214 Builder.SetInsertPoint(EnteringBB->getTerminator()); 215 216 // Only build the run-time condition and parameters _after_ having 217 // introduced the conditional branch. This is important as the conditional 218 // branch will guard the original scop from new induction variables that 219 // the SCEVExpander may introduce while code generating the parameters and 220 // which may introduce scalar dependences that prevent us from correctly 221 // code generating this scop. 222 BBPair StartExitBlocks = 223 std::get<0>(executeScopConditionally(S, Builder.getTrue(), DT, RI, LI)); 224 BasicBlock *StartBlock = std::get<0>(StartExitBlocks); 225 BasicBlock *ExitBlock = std::get<1>(StartExitBlocks); 226 227 removeLifetimeMarkers(R); 228 auto *SplitBlock = StartBlock->getSinglePredecessor(); 229 230 IslNodeBuilder NodeBuilder(Builder, Annotator, DL, LI, SE, DT, S, StartBlock); 231 232 // All arrays must have their base pointers known before 233 // ScopAnnotator::buildAliasScopes. 234 NodeBuilder.allocateNewArrays(StartExitBlocks); 235 Annotator.buildAliasScopes(S); 236 237 // The code below annotates the "llvm.loop.vectorize.enable" to false 238 // for the code flow taken when RTCs fail. Because we don't want the 239 // Loop Vectorizer to come in later and vectorize the original fall back 240 // loop when Polly is enabled. 241 for (Loop *L : LI.getLoopsInPreorder()) { 242 if (S.contains(L)) 243 addStringMetadataToLoop(L, "llvm.loop.vectorize.enable", 0); 244 } 245 246 if (PerfMonitoring) { 247 PerfMonitor P(S, EnteringBB->getParent()->getParent()); 248 P.initialize(); 249 P.insertRegionStart(SplitBlock->getTerminator()); 250 251 BasicBlock *MergeBlock = ExitBlock->getUniqueSuccessor(); 252 P.insertRegionEnd(MergeBlock->getTerminator()); 253 } 254 255 // First generate code for the hoisted invariant loads and transitively the 256 // parameters they reference. Afterwards, for the remaining parameters that 257 // might reference the hoisted loads. Finally, build the runtime check 258 // that might reference both hoisted loads as well as parameters. 259 // If the hoisting fails we have to bail and execute the original code. 260 Builder.SetInsertPoint(SplitBlock->getTerminator()); 261 if (!NodeBuilder.preloadInvariantLoads()) { 262 // Patch the introduced branch condition to ensure that we always execute 263 // the original SCoP. 264 auto *FalseI1 = Builder.getFalse(); 265 auto *SplitBBTerm = Builder.GetInsertBlock()->getTerminator(); 266 SplitBBTerm->setOperand(0, FalseI1); 267 268 // Since the other branch is hence ignored we mark it as unreachable and 269 // adjust the dominator tree accordingly. 270 auto *ExitingBlock = StartBlock->getUniqueSuccessor(); 271 assert(ExitingBlock); 272 auto *MergeBlock = ExitingBlock->getUniqueSuccessor(); 273 assert(MergeBlock); 274 markBlockUnreachable(*StartBlock, Builder); 275 markBlockUnreachable(*ExitingBlock, Builder); 276 auto *ExitingBB = S.getExitingBlock(); 277 assert(ExitingBB); 278 DT.changeImmediateDominator(MergeBlock, ExitingBB); 279 DT.eraseNode(ExitingBlock); 280 } else { 281 NodeBuilder.addParameters(S.getContext().release()); 282 Value *RTC = NodeBuilder.createRTC(AI.getRunCondition().release()); 283 284 Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); 285 286 // Explicitly set the insert point to the end of the block to avoid that a 287 // split at the builder's current 288 // insert position would move the malloc calls to the wrong BasicBlock. 289 // Ideally we would just split the block during allocation of the new 290 // arrays, but this would break the assumption that there are no blocks 291 // between polly.start and polly.exiting (at this point). 292 Builder.SetInsertPoint(StartBlock->getTerminator()); 293 294 NodeBuilder.create(AstRoot.release()); 295 NodeBuilder.finalize(); 296 fixRegionInfo(*EnteringBB->getParent(), *R->getParent(), RI); 297 298 CodegenedScops++; 299 CodegenedAffineLoops += ScopStats.NumAffineLoops; 300 CodegenedBoxedLoops += ScopStats.NumBoxedLoops; 301 } 302 303 Function *F = EnteringBB->getParent(); 304 verifyGeneratedFunction(S, *F, AI); 305 for (auto *SubF : NodeBuilder.getParallelSubfunctions()) 306 verifyGeneratedFunction(S, *SubF, AI); 307 308 // Mark the function such that we run additional cleanup passes on this 309 // function (e.g. mem2reg to rediscover phi nodes). 310 F->addFnAttr("polly-optimized"); 311 return true; 312 } 313 314 namespace { 315 316 class CodeGeneration final : public ScopPass { 317 public: 318 static char ID; 319 320 /// The data layout used. 321 const DataLayout *DL; 322 323 /// @name The analysis passes we need to generate code. 324 /// 325 ///{ 326 LoopInfo *LI; 327 IslAstInfo *AI; 328 DominatorTree *DT; 329 ScalarEvolution *SE; 330 RegionInfo *RI; 331 ///} 332 333 CodeGeneration() : ScopPass(ID) {} 334 335 /// Generate LLVM-IR for the SCoP @p S. 336 bool runOnScop(Scop &S) override { 337 AI = &getAnalysis<IslAstInfoWrapperPass>().getAI(); 338 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 339 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 340 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 341 DL = &S.getFunction().getDataLayout(); 342 RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); 343 return generateCode(S, *AI, *LI, *DT, *SE, *RI); 344 } 345 346 /// Register all analyses and transformation required. 347 void getAnalysisUsage(AnalysisUsage &AU) const override { 348 ScopPass::getAnalysisUsage(AU); 349 350 AU.addRequired<DominatorTreeWrapperPass>(); 351 AU.addRequired<IslAstInfoWrapperPass>(); 352 AU.addRequired<RegionInfoPass>(); 353 AU.addRequired<ScalarEvolutionWrapperPass>(); 354 AU.addRequired<ScopDetectionWrapperPass>(); 355 AU.addRequired<ScopInfoRegionPass>(); 356 AU.addRequired<LoopInfoWrapperPass>(); 357 358 AU.addPreserved<DependenceInfo>(); 359 AU.addPreserved<IslAstInfoWrapperPass>(); 360 361 // FIXME: We do not yet add regions for the newly generated code to the 362 // region tree. 363 } 364 }; 365 } // namespace 366 367 PreservedAnalyses CodeGenerationPass::run(Scop &S, ScopAnalysisManager &SAM, 368 ScopStandardAnalysisResults &AR, 369 SPMUpdater &U) { 370 auto &AI = SAM.getResult<IslAstAnalysis>(S, AR); 371 if (generateCode(S, AI, AR.LI, AR.DT, AR.SE, AR.RI)) { 372 U.invalidateScop(S); 373 return PreservedAnalyses::none(); 374 } 375 376 return PreservedAnalyses::all(); 377 } 378 379 char CodeGeneration::ID = 1; 380 381 Pass *polly::createCodeGenerationPass() { return new CodeGeneration(); } 382 383 INITIALIZE_PASS_BEGIN(CodeGeneration, "polly-codegen", 384 "Polly - Create LLVM-IR from SCoPs", false, false); 385 INITIALIZE_PASS_DEPENDENCY(DependenceInfo); 386 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 387 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 388 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 389 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 390 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass); 391 INITIALIZE_PASS_END(CodeGeneration, "polly-codegen", 392 "Polly - Create LLVM-IR from SCoPs", false, false) 393