1 //===- CodeGeneration.cpp - Code generate the Scops using ISL. ---------======// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // The CodeGeneration pass takes a Scop created by ScopInfo and translates it 11 // back to LLVM-IR using the ISL code generator. 12 // 13 // The Scop describes the high level memory behavior of a control flow region. 14 // Transformation passes can update the schedule (execution order) of statements 15 // in the Scop. ISL is used to generate an abstract syntax tree that reflects 16 // the updated execution order. This clast is used to create new LLVM-IR that is 17 // computationally equivalent to the original control flow region, but executes 18 // its code in the new execution order defined by the changed schedule. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "polly/CodeGen/CodeGeneration.h" 23 #include "polly/CodeGen/IRBuilder.h" 24 #include "polly/CodeGen/IslAst.h" 25 #include "polly/CodeGen/IslNodeBuilder.h" 26 #include "polly/CodeGen/PerfMonitor.h" 27 #include "polly/CodeGen/Utils.h" 28 #include "polly/DependenceInfo.h" 29 #include "polly/LinkAllPasses.h" 30 #include "polly/Options.h" 31 #include "polly/ScopDetectionDiagnostic.h" 32 #include "polly/ScopInfo.h" 33 #include "polly/Support/ScopHelper.h" 34 #include "llvm/ADT/Statistic.h" 35 #include "llvm/Analysis/AliasAnalysis.h" 36 #include "llvm/Analysis/BasicAliasAnalysis.h" 37 #include "llvm/Analysis/GlobalsModRef.h" 38 #include "llvm/Analysis/LoopInfo.h" 39 #include "llvm/Analysis/RegionInfo.h" 40 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 41 #include "llvm/IR/BasicBlock.h" 42 #include "llvm/IR/Dominators.h" 43 #include "llvm/IR/Function.h" 44 #include "llvm/IR/Instruction.h" 45 #include "llvm/IR/IntrinsicInst.h" 46 #include "llvm/IR/Intrinsics.h" 47 #include "llvm/IR/Module.h" 48 #include "llvm/IR/PassManager.h" 49 #include "llvm/IR/Verifier.h" 50 #include "llvm/Pass.h" 51 #include "llvm/Support/Casting.h" 52 #include "llvm/Support/CommandLine.h" 53 #include "llvm/Support/Debug.h" 54 #include "llvm/Support/ErrorHandling.h" 55 #include "llvm/Support/raw_ostream.h" 56 #include "isl/ast.h" 57 #include <cassert> 58 #include <utility> 59 60 using namespace llvm; 61 using namespace polly; 62 63 #define DEBUG_TYPE "polly-codegen" 64 65 static cl::opt<bool> Verify("polly-codegen-verify", 66 cl::desc("Verify the function generated by Polly"), 67 cl::Hidden, cl::init(false), cl::ZeroOrMore, 68 cl::cat(PollyCategory)); 69 70 bool polly::PerfMonitoring; 71 72 static cl::opt<bool, true> 73 XPerfMonitoring("polly-codegen-perf-monitoring", 74 cl::desc("Add run-time performance monitoring"), cl::Hidden, 75 cl::location(polly::PerfMonitoring), cl::init(false), 76 cl::ZeroOrMore, cl::cat(PollyCategory)); 77 78 STATISTIC(ScopsProcessed, "Number of SCoP processed"); 79 STATISTIC(CodegenedScops, "Number of successfully generated SCoPs"); 80 STATISTIC(CodegenedAffineLoops, 81 "Number of original affine loops in SCoPs that have been generated"); 82 STATISTIC(CodegenedBoxedLoops, 83 "Number of original boxed loops in SCoPs that have been generated"); 84 85 namespace polly { 86 87 /// Mark a basic block unreachable. 88 /// 89 /// Marks the basic block @p Block unreachable by equipping it with an 90 /// UnreachableInst. 91 void markBlockUnreachable(BasicBlock &Block, PollyIRBuilder &Builder) { 92 auto *OrigTerminator = Block.getTerminator(); 93 Builder.SetInsertPoint(OrigTerminator); 94 Builder.CreateUnreachable(); 95 OrigTerminator->eraseFromParent(); 96 } 97 } // namespace polly 98 99 static void verifyGeneratedFunction(Scop &S, Function &F, IslAstInfo &AI) { 100 if (!Verify || !verifyFunction(F, &errs())) 101 return; 102 103 DEBUG({ 104 errs() << "== ISL Codegen created an invalid function ==\n\n== The " 105 "SCoP ==\n"; 106 errs() << S; 107 errs() << "\n== The isl AST ==\n"; 108 AI.print(errs()); 109 errs() << "\n== The invalid function ==\n"; 110 F.print(errs()); 111 }); 112 113 llvm_unreachable("Polly generated function could not be verified. Add " 114 "-polly-codegen-verify=false to disable this assertion."); 115 } 116 117 // CodeGeneration adds a lot of BBs without updating the RegionInfo 118 // We make all created BBs belong to the scop's parent region without any 119 // nested structure to keep the RegionInfo verifier happy. 120 static void fixRegionInfo(Function &F, Region &ParentRegion, RegionInfo &RI) { 121 for (BasicBlock &BB : F) { 122 if (RI.getRegionFor(&BB)) 123 continue; 124 125 RI.setRegionFor(&BB, &ParentRegion); 126 } 127 } 128 129 /// Remove all lifetime markers (llvm.lifetime.start, llvm.lifetime.end) from 130 /// @R. 131 /// 132 /// CodeGeneration does not copy lifetime markers into the optimized SCoP, 133 /// which would leave the them only in the original path. This can transform 134 /// code such as 135 /// 136 /// llvm.lifetime.start(%p) 137 /// llvm.lifetime.end(%p) 138 /// 139 /// into 140 /// 141 /// if (RTC) { 142 /// // generated code 143 /// } else { 144 /// // original code 145 /// llvm.lifetime.start(%p) 146 /// } 147 /// llvm.lifetime.end(%p) 148 /// 149 /// The current StackColoring algorithm cannot handle if some, but not all, 150 /// paths from the end marker to the entry block cross the start marker. Same 151 /// for start markers that do not always cross the end markers. We avoid any 152 /// issues by removing all lifetime markers, even from the original code. 153 /// 154 /// A better solution could be to hoist all llvm.lifetime.start to the split 155 /// node and all llvm.lifetime.end to the merge node, which should be 156 /// conservatively correct. 157 static void removeLifetimeMarkers(Region *R) { 158 for (auto *BB : R->blocks()) { 159 auto InstIt = BB->begin(); 160 auto InstEnd = BB->end(); 161 162 while (InstIt != InstEnd) { 163 auto NextIt = InstIt; 164 ++NextIt; 165 166 if (auto *IT = dyn_cast<IntrinsicInst>(&*InstIt)) { 167 switch (IT->getIntrinsicID()) { 168 case Intrinsic::lifetime_start: 169 case Intrinsic::lifetime_end: 170 BB->getInstList().erase(InstIt); 171 break; 172 default: 173 break; 174 } 175 } 176 177 InstIt = NextIt; 178 } 179 } 180 } 181 182 static bool CodeGen(Scop &S, IslAstInfo &AI, LoopInfo &LI, DominatorTree &DT, 183 ScalarEvolution &SE, RegionInfo &RI) { 184 // Check whether IslAstInfo uses the same isl_ctx. Since -polly-codegen 185 // reports itself to preserve DependenceInfo and IslAstInfo, we might get 186 // those analysis that were computed by a different ScopInfo for a different 187 // Scop structure. When the ScopInfo/Scop object is freed, there is a high 188 // probability that the new ScopInfo/Scop object will be created at the same 189 // heap position with the same address. Comparing whether the Scop or ScopInfo 190 // address is the expected therefore is unreliable. 191 // Instead, we compare the address of the isl_ctx object. Both, DependenceInfo 192 // and IslAstInfo must hold a reference to the isl_ctx object to ensure it is 193 // not freed before the destruction of those analyses which might happen after 194 // the destruction of the Scop/ScopInfo they refer to. Hence, the isl_ctx 195 // will not be freed and its space not reused as long there is a 196 // DependenceInfo or IslAstInfo around. 197 IslAst &Ast = AI.getIslAst(); 198 if (Ast.getSharedIslCtx() != S.getSharedIslCtx()) { 199 DEBUG(dbgs() << "Got an IstAst for a different Scop/isl_ctx\n"); 200 return false; 201 } 202 203 // Check if we created an isl_ast root node, otherwise exit. 204 isl_ast_node *AstRoot = Ast.getAst(); 205 if (!AstRoot) 206 return false; 207 208 // Collect statistics. Do it before we modify the IR to avoid having it any 209 // influence on the result. 210 auto ScopStats = S.getStatistics(); 211 ScopsProcessed++; 212 213 auto &DL = S.getFunction().getParent()->getDataLayout(); 214 Region *R = &S.getRegion(); 215 assert(!R->isTopLevelRegion() && "Top level regions are not supported"); 216 217 ScopAnnotator Annotator; 218 219 simplifyRegion(R, &DT, &LI, &RI); 220 assert(R->isSimple()); 221 BasicBlock *EnteringBB = S.getEnteringBlock(); 222 assert(EnteringBB); 223 PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); 224 225 // Only build the run-time condition and parameters _after_ having 226 // introduced the conditional branch. This is important as the conditional 227 // branch will guard the original scop from new induction variables that 228 // the SCEVExpander may introduce while code generating the parameters and 229 // which may introduce scalar dependences that prevent us from correctly 230 // code generating this scop. 231 BBPair StartExitBlocks = 232 std::get<0>(executeScopConditionally(S, Builder.getTrue(), DT, RI, LI)); 233 BasicBlock *StartBlock = std::get<0>(StartExitBlocks); 234 BasicBlock *ExitBlock = std::get<1>(StartExitBlocks); 235 236 removeLifetimeMarkers(R); 237 auto *SplitBlock = StartBlock->getSinglePredecessor(); 238 239 IslNodeBuilder NodeBuilder(Builder, Annotator, DL, LI, SE, DT, S, StartBlock); 240 241 // All arrays must have their base pointers known before 242 // ScopAnnotator::buildAliasScopes. 243 NodeBuilder.allocateNewArrays(StartExitBlocks); 244 Annotator.buildAliasScopes(S); 245 246 if (PerfMonitoring) { 247 PerfMonitor P(S, EnteringBB->getParent()->getParent()); 248 P.initialize(); 249 P.insertRegionStart(SplitBlock->getTerminator()); 250 251 BasicBlock *MergeBlock = ExitBlock->getUniqueSuccessor(); 252 P.insertRegionEnd(MergeBlock->getTerminator()); 253 } 254 255 // First generate code for the hoisted invariant loads and transitively the 256 // parameters they reference. Afterwards, for the remaining parameters that 257 // might reference the hoisted loads. Finally, build the runtime check 258 // that might reference both hoisted loads as well as parameters. 259 // If the hoisting fails we have to bail and execute the original code. 260 Builder.SetInsertPoint(SplitBlock->getTerminator()); 261 if (!NodeBuilder.preloadInvariantLoads()) { 262 // Patch the introduced branch condition to ensure that we always execute 263 // the original SCoP. 264 auto *FalseI1 = Builder.getFalse(); 265 auto *SplitBBTerm = Builder.GetInsertBlock()->getTerminator(); 266 SplitBBTerm->setOperand(0, FalseI1); 267 268 // Since the other branch is hence ignored we mark it as unreachable and 269 // adjust the dominator tree accordingly. 270 auto *ExitingBlock = StartBlock->getUniqueSuccessor(); 271 assert(ExitingBlock); 272 auto *MergeBlock = ExitingBlock->getUniqueSuccessor(); 273 assert(MergeBlock); 274 markBlockUnreachable(*StartBlock, Builder); 275 markBlockUnreachable(*ExitingBlock, Builder); 276 auto *ExitingBB = S.getExitingBlock(); 277 assert(ExitingBB); 278 DT.changeImmediateDominator(MergeBlock, ExitingBB); 279 DT.eraseNode(ExitingBlock); 280 281 isl_ast_node_free(AstRoot); 282 } else { 283 NodeBuilder.addParameters(S.getContext().release()); 284 Value *RTC = NodeBuilder.createRTC(AI.getRunCondition()); 285 286 Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); 287 288 // Explicitly set the insert point to the end of the block to avoid that a 289 // split at the builder's current 290 // insert position would move the malloc calls to the wrong BasicBlock. 291 // Ideally we would just split the block during allocation of the new 292 // arrays, but this would break the assumption that there are no blocks 293 // between polly.start and polly.exiting (at this point). 294 Builder.SetInsertPoint(StartBlock->getTerminator()); 295 296 NodeBuilder.create(AstRoot); 297 NodeBuilder.finalize(); 298 fixRegionInfo(*EnteringBB->getParent(), *R->getParent(), RI); 299 300 CodegenedScops++; 301 CodegenedAffineLoops += ScopStats.NumAffineLoops; 302 CodegenedBoxedLoops += ScopStats.NumBoxedLoops; 303 } 304 305 Function *F = EnteringBB->getParent(); 306 verifyGeneratedFunction(S, *F, AI); 307 for (auto *SubF : NodeBuilder.getParallelSubfunctions()) 308 verifyGeneratedFunction(S, *SubF, AI); 309 310 // Mark the function such that we run additional cleanup passes on this 311 // function (e.g. mem2reg to rediscover phi nodes). 312 F->addFnAttr("polly-optimized"); 313 return true; 314 } 315 316 namespace { 317 318 class CodeGeneration : public ScopPass { 319 public: 320 static char ID; 321 322 /// The data layout used. 323 const DataLayout *DL; 324 325 /// @name The analysis passes we need to generate code. 326 /// 327 ///{ 328 LoopInfo *LI; 329 IslAstInfo *AI; 330 DominatorTree *DT; 331 ScalarEvolution *SE; 332 RegionInfo *RI; 333 ///} 334 335 CodeGeneration() : ScopPass(ID) {} 336 337 /// Generate LLVM-IR for the SCoP @p S. 338 bool runOnScop(Scop &S) override { 339 // Skip SCoPs in case they're already code-generated by PPCGCodeGeneration. 340 if (S.isToBeSkipped()) 341 return false; 342 343 AI = &getAnalysis<IslAstInfoWrapperPass>().getAI(); 344 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 345 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 346 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 347 DL = &S.getFunction().getParent()->getDataLayout(); 348 RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); 349 return CodeGen(S, *AI, *LI, *DT, *SE, *RI); 350 } 351 352 /// Register all analyses and transformation required. 353 void getAnalysisUsage(AnalysisUsage &AU) const override { 354 ScopPass::getAnalysisUsage(AU); 355 356 AU.addRequired<DominatorTreeWrapperPass>(); 357 AU.addRequired<IslAstInfoWrapperPass>(); 358 AU.addRequired<RegionInfoPass>(); 359 AU.addRequired<ScalarEvolutionWrapperPass>(); 360 AU.addRequired<ScopDetectionWrapperPass>(); 361 AU.addRequired<ScopInfoRegionPass>(); 362 AU.addRequired<LoopInfoWrapperPass>(); 363 364 AU.addPreserved<DependenceInfo>(); 365 AU.addPreserved<IslAstInfoWrapperPass>(); 366 367 // FIXME: We do not yet add regions for the newly generated code to the 368 // region tree. 369 } 370 }; 371 } // namespace 372 373 PreservedAnalyses CodeGenerationPass::run(Scop &S, ScopAnalysisManager &SAM, 374 ScopStandardAnalysisResults &AR, 375 SPMUpdater &U) { 376 auto &AI = SAM.getResult<IslAstAnalysis>(S, AR); 377 if (CodeGen(S, AI, AR.LI, AR.DT, AR.SE, AR.RI)) { 378 U.invalidateScop(S); 379 return PreservedAnalyses::none(); 380 } 381 382 return PreservedAnalyses::all(); 383 } 384 385 char CodeGeneration::ID = 1; 386 387 Pass *polly::createCodeGenerationPass() { return new CodeGeneration(); } 388 389 INITIALIZE_PASS_BEGIN(CodeGeneration, "polly-codegen", 390 "Polly - Create LLVM-IR from SCoPs", false, false); 391 INITIALIZE_PASS_DEPENDENCY(DependenceInfo); 392 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 393 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); 394 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); 395 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); 396 INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass); 397 INITIALIZE_PASS_END(CodeGeneration, "polly-codegen", 398 "Polly - Create LLVM-IR from SCoPs", false, false) 399