1 //===- IslNodeBuilder.cpp - Translate an isl AST into a LLVM-IR AST -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the IslNodeBuilder, a class to translate an isl AST into 10 // a LLVM-IR AST. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "polly/CodeGen/IslNodeBuilder.h" 15 #include "polly/CodeGen/BlockGenerators.h" 16 #include "polly/CodeGen/CodeGeneration.h" 17 #include "polly/CodeGen/IslAst.h" 18 #include "polly/CodeGen/IslExprBuilder.h" 19 #include "polly/CodeGen/LoopGeneratorsGOMP.h" 20 #include "polly/CodeGen/LoopGeneratorsKMP.h" 21 #include "polly/CodeGen/RuntimeDebugBuilder.h" 22 #include "polly/Options.h" 23 #include "polly/ScopInfo.h" 24 #include "polly/Support/ISLTools.h" 25 #include "polly/Support/SCEVValidator.h" 26 #include "polly/Support/ScopHelper.h" 27 #include "polly/Support/VirtualInstruction.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/PostOrderIterator.h" 30 #include "llvm/ADT/SetVector.h" 31 #include "llvm/ADT/SmallPtrSet.h" 32 #include "llvm/ADT/Statistic.h" 33 #include "llvm/Analysis/AssumptionCache.h" 34 #include "llvm/Analysis/LoopInfo.h" 35 #include "llvm/Analysis/RegionInfo.h" 36 #include "llvm/Analysis/ScalarEvolution.h" 37 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 38 #include "llvm/Analysis/TargetLibraryInfo.h" 39 #include "llvm/IR/BasicBlock.h" 40 #include "llvm/IR/Constant.h" 41 #include "llvm/IR/Constants.h" 42 #include "llvm/IR/DataLayout.h" 43 #include "llvm/IR/DerivedTypes.h" 44 #include "llvm/IR/Dominators.h" 45 #include "llvm/IR/Function.h" 46 #include "llvm/IR/InstrTypes.h" 47 #include "llvm/IR/Instruction.h" 48 #include "llvm/IR/Instructions.h" 49 #include "llvm/IR/Module.h" 50 #include "llvm/IR/Type.h" 51 #include "llvm/IR/Value.h" 52 #include "llvm/Support/Casting.h" 53 #include "llvm/Support/CommandLine.h" 54 #include "llvm/Support/ErrorHandling.h" 55 #include "llvm/TargetParser/Triple.h" 56 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 57 #include "isl/aff.h" 58 #include "isl/aff_type.h" 59 #include "isl/ast.h" 60 #include "isl/ast_build.h" 61 #include "isl/isl-noexceptions.h" 62 #include "isl/map.h" 63 #include "isl/set.h" 64 #include "isl/union_map.h" 65 #include "isl/union_set.h" 66 #include "isl/val.h" 67 #include <algorithm> 68 #include <cassert> 69 #include <cstdint> 70 #include <cstring> 71 #include <string> 72 #include <utility> 73 #include <vector> 74 75 using namespace llvm; 76 using namespace polly; 77 78 #define DEBUG_TYPE "polly-codegen" 79 80 STATISTIC(VersionedScops, "Number of SCoPs that required versioning."); 81 82 STATISTIC(SequentialLoops, "Number of generated sequential for-loops"); 83 STATISTIC(ParallelLoops, "Number of generated parallel for-loops"); 84 STATISTIC(IfConditions, "Number of generated if-conditions"); 85 86 /// OpenMP backend options 87 enum class OpenMPBackend { GNU, LLVM }; 88 89 static cl::opt<bool> PollyGenerateRTCPrint( 90 "polly-codegen-emit-rtc-print", 91 cl::desc("Emit code that prints the runtime check result dynamically."), 92 cl::Hidden, cl::cat(PollyCategory)); 93 94 // If this option is set we always use the isl AST generator to regenerate 95 // memory accesses. Without this option set we regenerate expressions using the 96 // original SCEV expressions and only generate new expressions in case the 97 // access relation has been changed and consequently must be regenerated. 98 static cl::opt<bool> PollyGenerateExpressions( 99 "polly-codegen-generate-expressions", 100 cl::desc("Generate AST expressions for unmodified and modified accesses"), 101 cl::Hidden, cl::cat(PollyCategory)); 102 103 static cl::opt<int> PollyTargetFirstLevelCacheLineSize( 104 "polly-target-first-level-cache-line-size", 105 cl::desc("The size of the first level cache line size specified in bytes."), 106 cl::Hidden, cl::init(64), cl::cat(PollyCategory)); 107 108 static cl::opt<OpenMPBackend> PollyOmpBackend( 109 "polly-omp-backend", cl::desc("Choose the OpenMP library to use:"), 110 cl::values(clEnumValN(OpenMPBackend::GNU, "GNU", "GNU OpenMP"), 111 clEnumValN(OpenMPBackend::LLVM, "LLVM", "LLVM OpenMP")), 112 cl::Hidden, cl::init(OpenMPBackend::GNU), cl::cat(PollyCategory)); 113 114 isl::ast_expr IslNodeBuilder::getUpperBound(isl::ast_node_for For, 115 ICmpInst::Predicate &Predicate) { 116 isl::ast_expr Cond = For.cond(); 117 isl::ast_expr Iterator = For.iterator(); 118 assert(isl_ast_expr_get_type(Cond.get()) == isl_ast_expr_op && 119 "conditional expression is not an atomic upper bound"); 120 121 isl_ast_op_type OpType = isl_ast_expr_get_op_type(Cond.get()); 122 123 switch (OpType) { 124 case isl_ast_op_le: 125 Predicate = ICmpInst::ICMP_SLE; 126 break; 127 case isl_ast_op_lt: 128 Predicate = ICmpInst::ICMP_SLT; 129 break; 130 default: 131 llvm_unreachable("Unexpected comparison type in loop condition"); 132 } 133 134 isl::ast_expr Arg0 = Cond.get_op_arg(0); 135 136 assert(isl_ast_expr_get_type(Arg0.get()) == isl_ast_expr_id && 137 "conditional expression is not an atomic upper bound"); 138 139 isl::id UBID = Arg0.get_id(); 140 141 assert(isl_ast_expr_get_type(Iterator.get()) == isl_ast_expr_id && 142 "Could not get the iterator"); 143 144 isl::id IteratorID = Iterator.get_id(); 145 146 assert(UBID.get() == IteratorID.get() && 147 "conditional expression is not an atomic upper bound"); 148 149 return Cond.get_op_arg(1); 150 } 151 152 int IslNodeBuilder::getNumberOfIterations(isl::ast_node_for For) { 153 assert(isl_ast_node_get_type(For.get()) == isl_ast_node_for); 154 isl::ast_node Body = For.body(); 155 156 // First, check if we can actually handle this code. 157 switch (isl_ast_node_get_type(Body.get())) { 158 case isl_ast_node_user: 159 break; 160 case isl_ast_node_block: { 161 isl::ast_node_block BodyBlock = Body.as<isl::ast_node_block>(); 162 isl::ast_node_list List = BodyBlock.children(); 163 for (isl::ast_node Node : List) { 164 isl_ast_node_type NodeType = isl_ast_node_get_type(Node.get()); 165 if (NodeType != isl_ast_node_user) 166 return -1; 167 } 168 break; 169 } 170 default: 171 return -1; 172 } 173 174 isl::ast_expr Init = For.init(); 175 if (!Init.isa<isl::ast_expr_int>() || !Init.val().is_zero()) 176 return -1; 177 isl::ast_expr Inc = For.inc(); 178 if (!Inc.isa<isl::ast_expr_int>() || !Inc.val().is_one()) 179 return -1; 180 CmpInst::Predicate Predicate; 181 isl::ast_expr UB = getUpperBound(For, Predicate); 182 if (!UB.isa<isl::ast_expr_int>()) 183 return -1; 184 isl::val UpVal = UB.get_val(); 185 int NumberIterations = UpVal.get_num_si(); 186 if (NumberIterations < 0) 187 return -1; 188 if (Predicate == CmpInst::ICMP_SLT) 189 return NumberIterations; 190 else 191 return NumberIterations + 1; 192 } 193 194 static void findReferencesByUse(Value *SrcVal, ScopStmt *UserStmt, 195 Loop *UserScope, const ValueMapT &GlobalMap, 196 SetVector<Value *> &Values, 197 SetVector<const SCEV *> &SCEVs) { 198 VirtualUse VUse = VirtualUse::create(UserStmt, UserScope, SrcVal, true); 199 switch (VUse.getKind()) { 200 case VirtualUse::Constant: 201 // When accelerator-offloading, GlobalValue is a host address whose content 202 // must still be transferred to the GPU. 203 if (isa<GlobalValue>(SrcVal)) 204 Values.insert(SrcVal); 205 break; 206 207 case VirtualUse::Synthesizable: 208 SCEVs.insert(VUse.getScevExpr()); 209 return; 210 211 case VirtualUse::Block: 212 case VirtualUse::ReadOnly: 213 case VirtualUse::Hoisted: 214 case VirtualUse::Intra: 215 case VirtualUse::Inter: 216 break; 217 } 218 219 if (Value *NewVal = GlobalMap.lookup(SrcVal)) 220 Values.insert(NewVal); 221 } 222 223 static void findReferencesInInst(Instruction *Inst, ScopStmt *UserStmt, 224 Loop *UserScope, const ValueMapT &GlobalMap, 225 SetVector<Value *> &Values, 226 SetVector<const SCEV *> &SCEVs) { 227 for (Use &U : Inst->operands()) 228 findReferencesByUse(U.get(), UserStmt, UserScope, GlobalMap, Values, SCEVs); 229 } 230 231 static void findReferencesInStmt(ScopStmt *Stmt, SetVector<Value *> &Values, 232 ValueMapT &GlobalMap, 233 SetVector<const SCEV *> &SCEVs) { 234 LoopInfo *LI = Stmt->getParent()->getLI(); 235 236 BasicBlock *BB = Stmt->getBasicBlock(); 237 Loop *Scope = LI->getLoopFor(BB); 238 for (Instruction *Inst : Stmt->getInstructions()) 239 findReferencesInInst(Inst, Stmt, Scope, GlobalMap, Values, SCEVs); 240 241 if (Stmt->isRegionStmt()) { 242 for (BasicBlock *BB : Stmt->getRegion()->blocks()) { 243 Loop *Scope = LI->getLoopFor(BB); 244 for (Instruction &Inst : *BB) 245 findReferencesInInst(&Inst, Stmt, Scope, GlobalMap, Values, SCEVs); 246 } 247 } 248 } 249 250 void polly::addReferencesFromStmt(ScopStmt *Stmt, void *UserPtr, 251 bool CreateScalarRefs) { 252 auto &References = *static_cast<SubtreeReferences *>(UserPtr); 253 254 findReferencesInStmt(Stmt, References.Values, References.GlobalMap, 255 References.SCEVs); 256 257 for (auto &Access : *Stmt) { 258 if (References.ParamSpace) { 259 isl::space ParamSpace = Access->getLatestAccessRelation().get_space(); 260 (*References.ParamSpace) = 261 References.ParamSpace->align_params(ParamSpace); 262 } 263 264 if (Access->isLatestArrayKind()) { 265 auto *BasePtr = Access->getLatestScopArrayInfo()->getBasePtr(); 266 if (Instruction *OpInst = dyn_cast<Instruction>(BasePtr)) 267 if (Stmt->getParent()->contains(OpInst)) 268 continue; 269 270 References.Values.insert(BasePtr); 271 continue; 272 } 273 274 if (CreateScalarRefs) 275 References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access)); 276 } 277 } 278 279 /// Extract the out-of-scop values and SCEVs referenced from a set describing 280 /// a ScopStmt. 281 /// 282 /// This includes the SCEVUnknowns referenced by the SCEVs used in the 283 /// statement and the base pointers of the memory accesses. For scalar 284 /// statements we force the generation of alloca memory locations and list 285 /// these locations in the set of out-of-scop values as well. 286 /// 287 /// @param Set A set which references the ScopStmt we are interested in. 288 /// @param UserPtr A void pointer that can be casted to a SubtreeReferences 289 /// structure. 290 static void addReferencesFromStmtSet(isl::set Set, SubtreeReferences *UserPtr) { 291 isl::id Id = Set.get_tuple_id(); 292 auto *Stmt = static_cast<ScopStmt *>(Id.get_user()); 293 addReferencesFromStmt(Stmt, UserPtr); 294 } 295 296 /// Extract the out-of-scop values and SCEVs referenced from a union set 297 /// referencing multiple ScopStmts. 298 /// 299 /// This includes the SCEVUnknowns referenced by the SCEVs used in the 300 /// statement and the base pointers of the memory accesses. For scalar 301 /// statements we force the generation of alloca memory locations and list 302 /// these locations in the set of out-of-scop values as well. 303 /// 304 /// @param USet A union set referencing the ScopStmts we are interested 305 /// in. 306 /// @param References The SubtreeReferences data structure through which 307 /// results are returned and further information is 308 /// provided. 309 static void addReferencesFromStmtUnionSet(isl::union_set USet, 310 SubtreeReferences &References) { 311 312 for (isl::set Set : USet.get_set_list()) 313 addReferencesFromStmtSet(Set, &References); 314 } 315 316 isl::union_map 317 IslNodeBuilder::getScheduleForAstNode(const isl::ast_node &Node) { 318 return IslAstInfo::getSchedule(Node); 319 } 320 321 void IslNodeBuilder::getReferencesInSubtree(const isl::ast_node &For, 322 SetVector<Value *> &Values, 323 SetVector<const Loop *> &Loops) { 324 SetVector<const SCEV *> SCEVs; 325 SubtreeReferences References = { 326 LI, SE, S, ValueMap, Values, SCEVs, getBlockGenerator(), nullptr}; 327 328 for (const auto &I : IDToValue) 329 Values.insert(I.second); 330 331 // NOTE: this is populated in IslNodeBuilder::addParameters 332 for (const auto &I : OutsideLoopIterations) 333 Values.insert(cast<SCEVUnknown>(I.second)->getValue()); 334 335 isl::union_set Schedule = getScheduleForAstNode(For).domain(); 336 addReferencesFromStmtUnionSet(Schedule, References); 337 338 for (const SCEV *Expr : SCEVs) { 339 findValues(Expr, SE, Values); 340 findLoops(Expr, Loops); 341 } 342 343 Values.remove_if([](const Value *V) { return isa<GlobalValue>(V); }); 344 345 /// Note: Code generation of induction variables of loops outside Scops 346 /// 347 /// Remove loops that contain the scop or that are part of the scop, as they 348 /// are considered local. This leaves only loops that are before the scop, but 349 /// do not contain the scop itself. 350 /// We ignore loops perfectly contained in the Scop because these are already 351 /// generated at `IslNodeBuilder::addParameters`. These `Loops` are loops 352 /// whose induction variables are referred to by the Scop, but the Scop is not 353 /// fully contained in these Loops. Since there can be many of these, 354 /// we choose to codegen these on-demand. 355 /// @see IslNodeBuilder::materializeNonScopLoopInductionVariable. 356 Loops.remove_if([this](const Loop *L) { 357 return S.contains(L) || L->contains(S.getEntry()); 358 }); 359 360 // Contains Values that may need to be replaced with other values 361 // due to replacements from the ValueMap. We should make sure 362 // that we return correctly remapped values. 363 // NOTE: this code path is tested by: 364 // 1. test/Isl/CodeGen/OpenMP/single_loop_with_loop_invariant_baseptr.ll 365 // 2. test/Isl/CodeGen/OpenMP/loop-body-references-outer-values-3.ll 366 SetVector<Value *> ReplacedValues; 367 for (Value *V : Values) { 368 ReplacedValues.insert(getLatestValue(V)); 369 } 370 Values = ReplacedValues; 371 } 372 373 Value *IslNodeBuilder::getLatestValue(Value *Original) const { 374 auto It = ValueMap.find(Original); 375 if (It == ValueMap.end()) 376 return Original; 377 return It->second; 378 } 379 380 void IslNodeBuilder::createMark(__isl_take isl_ast_node *Node) { 381 auto *Id = isl_ast_node_mark_get_id(Node); 382 auto Child = isl_ast_node_mark_get_node(Node); 383 isl_ast_node_free(Node); 384 // If a child node of a 'SIMD mark' is a loop that has a single iteration, 385 // it will be optimized away and we should skip it. 386 if (strcmp(isl_id_get_name(Id), "SIMD") == 0 && 387 isl_ast_node_get_type(Child) == isl_ast_node_for) { 388 createForSequential(isl::manage(Child).as<isl::ast_node_for>(), true); 389 isl_id_free(Id); 390 return; 391 } 392 393 BandAttr *ChildLoopAttr = getLoopAttr(isl::manage_copy(Id)); 394 BandAttr *AncestorLoopAttr; 395 if (ChildLoopAttr) { 396 // Save current LoopAttr environment to restore again when leaving this 397 // subtree. This means there was no loop between the ancestor LoopAttr and 398 // this mark, i.e. the ancestor LoopAttr did not directly mark a loop. This 399 // can happen e.g. if the AST build peeled or unrolled the loop. 400 AncestorLoopAttr = Annotator.getStagingAttrEnv(); 401 402 Annotator.getStagingAttrEnv() = ChildLoopAttr; 403 } 404 405 create(Child); 406 407 if (ChildLoopAttr) { 408 assert(Annotator.getStagingAttrEnv() == ChildLoopAttr && 409 "Nest must not overwrite loop attr environment"); 410 Annotator.getStagingAttrEnv() = AncestorLoopAttr; 411 } 412 413 isl_id_free(Id); 414 } 415 416 /// Restore the initial ordering of dimensions of the band node 417 /// 418 /// In case the band node represents all the dimensions of the iteration 419 /// domain, recreate the band node to restore the initial ordering of the 420 /// dimensions. 421 /// 422 /// @param Node The band node to be modified. 423 /// @return The modified schedule node. 424 static bool IsLoopVectorizerDisabled(isl::ast_node_for Node) { 425 assert(isl_ast_node_get_type(Node.get()) == isl_ast_node_for); 426 isl::ast_node Body = Node.body(); 427 if (isl_ast_node_get_type(Body.get()) != isl_ast_node_mark) 428 return false; 429 430 isl::ast_node_mark BodyMark = Body.as<isl::ast_node_mark>(); 431 auto Id = BodyMark.id(); 432 if (strcmp(Id.get_name().c_str(), "Loop Vectorizer Disabled") == 0) 433 return true; 434 return false; 435 } 436 437 void IslNodeBuilder::createForSequential(isl::ast_node_for For, 438 bool MarkParallel) { 439 Value *ValueLB, *ValueUB, *ValueInc; 440 Type *MaxType; 441 BasicBlock *ExitBlock; 442 Value *IV; 443 CmpInst::Predicate Predicate; 444 445 bool LoopVectorizerDisabled = IsLoopVectorizerDisabled(For); 446 447 isl::ast_node Body = For.body(); 448 449 // isl_ast_node_for_is_degenerate(For) 450 // 451 // TODO: For degenerated loops we could generate a plain assignment. 452 // However, for now we just reuse the logic for normal loops, which will 453 // create a loop with a single iteration. 454 455 isl::ast_expr Init = For.init(); 456 isl::ast_expr Inc = For.inc(); 457 isl::ast_expr Iterator = For.iterator(); 458 isl::id IteratorID = Iterator.get_id(); 459 isl::ast_expr UB = getUpperBound(For, Predicate); 460 461 ValueLB = ExprBuilder.create(Init.release()); 462 ValueUB = ExprBuilder.create(UB.release()); 463 ValueInc = ExprBuilder.create(Inc.release()); 464 465 MaxType = ExprBuilder.getType(Iterator.get()); 466 MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); 467 MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType()); 468 MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); 469 470 if (MaxType != ValueLB->getType()) 471 ValueLB = Builder.CreateSExt(ValueLB, MaxType); 472 if (MaxType != ValueUB->getType()) 473 ValueUB = Builder.CreateSExt(ValueUB, MaxType); 474 if (MaxType != ValueInc->getType()) 475 ValueInc = Builder.CreateSExt(ValueInc, MaxType); 476 477 // If we can show that LB <Predicate> UB holds at least once, we can 478 // omit the GuardBB in front of the loop. 479 bool UseGuardBB = !GenSE->isKnownPredicate(Predicate, GenSE->getSCEV(ValueLB), 480 GenSE->getSCEV(ValueUB)); 481 IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, *GenLI, *GenDT, 482 ExitBlock, Predicate, &Annotator, MarkParallel, UseGuardBB, 483 LoopVectorizerDisabled); 484 IDToValue[IteratorID.get()] = IV; 485 486 create(Body.release()); 487 488 Annotator.popLoop(MarkParallel); 489 490 IDToValue.erase(IDToValue.find(IteratorID.get())); 491 492 Builder.SetInsertPoint(&ExitBlock->front()); 493 494 SequentialLoops++; 495 } 496 497 void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) { 498 isl_ast_node *Body; 499 isl_ast_expr *Init, *Inc, *Iterator, *UB; 500 isl_id *IteratorID; 501 Value *ValueLB, *ValueUB, *ValueInc; 502 Type *MaxType; 503 Value *IV; 504 CmpInst::Predicate Predicate; 505 506 // The preamble of parallel code interacts different than normal code with 507 // e.g., scalar initialization. Therefore, we ensure the parallel code is 508 // separated from the last basic block. 509 BasicBlock *ParBB = SplitBlock(Builder.GetInsertBlock(), 510 &*Builder.GetInsertPoint(), &DT, &LI); 511 ParBB->setName("polly.parallel.for"); 512 Builder.SetInsertPoint(&ParBB->front()); 513 514 Body = isl_ast_node_for_get_body(For); 515 Init = isl_ast_node_for_get_init(For); 516 Inc = isl_ast_node_for_get_inc(For); 517 Iterator = isl_ast_node_for_get_iterator(For); 518 IteratorID = isl_ast_expr_get_id(Iterator); 519 UB = getUpperBound(isl::manage_copy(For).as<isl::ast_node_for>(), Predicate) 520 .release(); 521 522 ValueLB = ExprBuilder.create(Init); 523 ValueUB = ExprBuilder.create(UB); 524 ValueInc = ExprBuilder.create(Inc); 525 526 // OpenMP always uses SLE. In case the isl generated AST uses a SLT 527 // expression, we need to adjust the loop bound by one. 528 if (Predicate == CmpInst::ICMP_SLT) 529 ValueUB = Builder.CreateAdd( 530 ValueUB, Builder.CreateSExt(Builder.getTrue(), ValueUB->getType())); 531 532 MaxType = ExprBuilder.getType(Iterator); 533 MaxType = ExprBuilder.getWidestType(MaxType, ValueLB->getType()); 534 MaxType = ExprBuilder.getWidestType(MaxType, ValueUB->getType()); 535 MaxType = ExprBuilder.getWidestType(MaxType, ValueInc->getType()); 536 537 if (MaxType != ValueLB->getType()) 538 ValueLB = Builder.CreateSExt(ValueLB, MaxType); 539 if (MaxType != ValueUB->getType()) 540 ValueUB = Builder.CreateSExt(ValueUB, MaxType); 541 if (MaxType != ValueInc->getType()) 542 ValueInc = Builder.CreateSExt(ValueInc, MaxType); 543 544 BasicBlock::iterator LoopBody; 545 546 SetVector<Value *> SubtreeValues; 547 SetVector<const Loop *> Loops; 548 549 getReferencesInSubtree(isl::manage_copy(For), SubtreeValues, Loops); 550 551 // Create for all loops we depend on values that contain the current loop 552 // iteration. These values are necessary to generate code for SCEVs that 553 // depend on such loops. As a result we need to pass them to the subfunction. 554 // See [Code generation of induction variables of loops outside Scops] 555 for (const Loop *L : Loops) { 556 Value *LoopInductionVar = materializeNonScopLoopInductionVariable(L); 557 SubtreeValues.insert(LoopInductionVar); 558 } 559 560 ValueMapT NewValues; 561 562 std::unique_ptr<ParallelLoopGenerator> ParallelLoopGenPtr; 563 564 switch (PollyOmpBackend) { 565 case OpenMPBackend::GNU: 566 ParallelLoopGenPtr.reset(new ParallelLoopGeneratorGOMP(Builder, DL)); 567 break; 568 case OpenMPBackend::LLVM: 569 ParallelLoopGenPtr.reset(new ParallelLoopGeneratorKMP(Builder, DL)); 570 break; 571 } 572 573 IV = ParallelLoopGenPtr->createParallelLoop( 574 ValueLB, ValueUB, ValueInc, SubtreeValues, NewValues, &LoopBody); 575 BasicBlock::iterator AfterLoop = Builder.GetInsertPoint(); 576 577 // Remember the parallel subfunction 578 Function *SubFn = LoopBody->getFunction(); 579 ParallelSubfunctions.push_back(SubFn); 580 581 // We start working on the outlined function. Since DominatorTree/LoopInfo are 582 // not an inter-procedural passes, we temporarily switch them out. Save the 583 // old ones first. 584 Function *CallerFn = Builder.GetInsertBlock()->getParent(); 585 DominatorTree *CallerDT = GenDT; 586 LoopInfo *CallerLI = GenLI; 587 ScalarEvolution *CallerSE = GenSE; 588 ValueMapT CallerGlobals = ValueMap; 589 IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue; 590 591 // Get the analyses for the subfunction. ParallelLoopGenerator already create 592 // DominatorTree and LoopInfo for us. 593 DominatorTree *SubDT = ParallelLoopGenPtr->getCalleeDominatorTree(); 594 LoopInfo *SubLI = ParallelLoopGenPtr->getCalleeLoopInfo(); 595 596 // Create TargetLibraryInfo, AssumptionCachem and ScalarEvolution ourselves. 597 // TODO: Ideally, we would use the pass manager's TargetLibraryInfoPass and 598 // AssumptionAnalysis instead of our own. They contain more target-specific 599 // information than we have available here: TargetLibraryInfoImpl can be a 600 // derived class determined by TargetMachine, AssumptionCache can be 601 // configured using a TargetTransformInfo object also derived from 602 // TargetMachine. 603 TargetLibraryInfoImpl BaselineInfoImpl( 604 Triple(SubFn->getParent()->getTargetTriple())); 605 TargetLibraryInfo CalleeTLI(BaselineInfoImpl, SubFn); 606 AssumptionCache CalleeAC(*SubFn); 607 std::unique_ptr<ScalarEvolution> SubSE = std::make_unique<ScalarEvolution>( 608 *SubFn, CalleeTLI, CalleeAC, *SubDT, *SubLI); 609 610 // Switch to the subfunction 611 GenDT = SubDT; 612 GenLI = SubLI; 613 GenSE = SubSE.get(); 614 BlockGen.switchGeneratedFunc(SubFn, GenDT, GenLI, GenSE); 615 RegionGen.switchGeneratedFunc(SubFn, GenDT, GenLI, GenSE); 616 ExprBuilder.switchGeneratedFunc(SubFn, GenDT, GenLI, GenSE); 617 Builder.SetInsertPoint(&*LoopBody); 618 619 // Update the ValueMap to use instructions in the subfunction. Note that 620 // "GlobalMap" used in BlockGenerator/IslExprBuilder is a reference to this 621 // ValueMap. 622 for (auto &[OldVal, NewVal] : ValueMap) { 623 NewVal = NewValues.lookup(NewVal); 624 625 // Clean-up any value that getReferencesInSubtree thinks we do not need. 626 // DenseMap::erase only writes a tombstone (and destroys OldVal/NewVal), so 627 // does not invalidate our iterator. 628 if (!NewVal) 629 ValueMap.erase(OldVal); 630 } 631 632 // This is for NewVals that do not appear in ValueMap (such as SCoP-invariant 633 // values whose original value can be reused as long as we are in the same 634 // function). No need to map the others. 635 for (auto &[NewVal, NewNewVal] : NewValues) { 636 if (Instruction *NewValInst = dyn_cast<Instruction>((Value *)NewVal)) { 637 if (S.contains(NewValInst)) 638 continue; 639 assert(NewValInst->getFunction() == &S.getFunction()); 640 } 641 assert(!ValueMap.contains(NewVal)); 642 ValueMap[NewVal] = NewNewVal; 643 } 644 645 // Also update the IDToValue map to use instructions from the subfunction. 646 for (auto &[OldVal, NewVal] : IDToValue) { 647 NewVal = NewValues.lookup(NewVal); 648 assert(NewVal); 649 } 650 IDToValue[IteratorID] = IV; 651 652 #ifndef NDEBUG 653 // Check whether the maps now exclusively refer to SubFn values. 654 for (auto &[OldVal, SubVal] : ValueMap) { 655 Instruction *SubInst = dyn_cast<Instruction>((Value *)SubVal); 656 assert(SubInst->getFunction() == SubFn && 657 "Instructions from outside the subfn cannot be accessed within the " 658 "subfn"); 659 } 660 for (auto &[Id, SubVal] : IDToValue) { 661 Instruction *SubInst = dyn_cast<Instruction>((Value *)SubVal); 662 assert(SubInst->getFunction() == SubFn && 663 "Instructions from outside the subfn cannot be accessed within the " 664 "subfn"); 665 } 666 #endif 667 668 ValueMapT NewValuesReverse; 669 for (auto P : NewValues) 670 NewValuesReverse[P.second] = P.first; 671 672 Annotator.addAlternativeAliasBases(NewValuesReverse); 673 674 create(Body); 675 676 Annotator.resetAlternativeAliasBases(); 677 678 // Resume working on the caller function. 679 GenDT = CallerDT; 680 GenLI = CallerLI; 681 GenSE = CallerSE; 682 IDToValue = std::move(IDToValueCopy); 683 ValueMap = std::move(CallerGlobals); 684 ExprBuilder.switchGeneratedFunc(CallerFn, CallerDT, CallerLI, CallerSE); 685 RegionGen.switchGeneratedFunc(CallerFn, CallerDT, CallerLI, CallerSE); 686 BlockGen.switchGeneratedFunc(CallerFn, CallerDT, CallerLI, CallerSE); 687 Builder.SetInsertPoint(&*AfterLoop); 688 689 for (const Loop *L : Loops) 690 OutsideLoopIterations.erase(L); 691 692 isl_ast_node_free(For); 693 isl_ast_expr_free(Iterator); 694 isl_id_free(IteratorID); 695 696 ParallelLoops++; 697 } 698 699 void IslNodeBuilder::createFor(__isl_take isl_ast_node *For) { 700 if (IslAstInfo::isExecutedInParallel(isl::manage_copy(For))) { 701 createForParallel(For); 702 return; 703 } 704 bool Parallel = (IslAstInfo::isParallel(isl::manage_copy(For)) && 705 !IslAstInfo::isReductionParallel(isl::manage_copy(For))); 706 createForSequential(isl::manage(For).as<isl::ast_node_for>(), Parallel); 707 } 708 709 void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) { 710 isl_ast_expr *Cond = isl_ast_node_if_get_cond(If); 711 712 Function *F = Builder.GetInsertBlock()->getParent(); 713 LLVMContext &Context = F->getContext(); 714 715 BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), 716 &*Builder.GetInsertPoint(), GenDT, GenLI); 717 CondBB->setName("polly.cond"); 718 BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), GenDT, GenLI); 719 MergeBB->setName("polly.merge"); 720 BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F); 721 BasicBlock *ElseBB = BasicBlock::Create(Context, "polly.else", F); 722 723 GenDT->addNewBlock(ThenBB, CondBB); 724 GenDT->addNewBlock(ElseBB, CondBB); 725 GenDT->changeImmediateDominator(MergeBB, CondBB); 726 727 Loop *L = GenLI->getLoopFor(CondBB); 728 if (L) { 729 L->addBasicBlockToLoop(ThenBB, *GenLI); 730 L->addBasicBlockToLoop(ElseBB, *GenLI); 731 } 732 733 CondBB->getTerminator()->eraseFromParent(); 734 735 Builder.SetInsertPoint(CondBB); 736 Value *Predicate = ExprBuilder.create(Cond); 737 Builder.CreateCondBr(Predicate, ThenBB, ElseBB); 738 Builder.SetInsertPoint(ThenBB); 739 Builder.CreateBr(MergeBB); 740 Builder.SetInsertPoint(ElseBB); 741 Builder.CreateBr(MergeBB); 742 Builder.SetInsertPoint(&ThenBB->front()); 743 744 create(isl_ast_node_if_get_then(If)); 745 746 Builder.SetInsertPoint(&ElseBB->front()); 747 748 if (isl_ast_node_if_has_else(If)) 749 create(isl_ast_node_if_get_else(If)); 750 751 Builder.SetInsertPoint(&MergeBB->front()); 752 753 isl_ast_node_free(If); 754 755 IfConditions++; 756 } 757 758 __isl_give isl_id_to_ast_expr * 759 IslNodeBuilder::createNewAccesses(ScopStmt *Stmt, 760 __isl_keep isl_ast_node *Node) { 761 isl::id_to_ast_expr NewAccesses = 762 isl::id_to_ast_expr::alloc(Stmt->getParent()->getIslCtx(), 0); 763 764 isl::ast_build Build = IslAstInfo::getBuild(isl::manage_copy(Node)); 765 assert(!Build.is_null() && "Could not obtain isl_ast_build from user node"); 766 Stmt->setAstBuild(Build); 767 768 for (auto *MA : *Stmt) { 769 if (!MA->hasNewAccessRelation()) { 770 if (PollyGenerateExpressions) { 771 if (!MA->isAffine()) 772 continue; 773 if (MA->getLatestScopArrayInfo()->getBasePtrOriginSAI()) 774 continue; 775 776 auto *BasePtr = 777 dyn_cast<Instruction>(MA->getLatestScopArrayInfo()->getBasePtr()); 778 if (BasePtr && Stmt->getParent()->getRegion().contains(BasePtr)) 779 continue; 780 } else { 781 continue; 782 } 783 } 784 assert(MA->isAffine() && 785 "Only affine memory accesses can be code generated"); 786 787 isl::union_map Schedule = Build.get_schedule(); 788 789 #ifndef NDEBUG 790 if (MA->isRead()) { 791 auto Dom = Stmt->getDomain().release(); 792 auto SchedDom = isl_set_from_union_set(Schedule.domain().release()); 793 auto AccDom = isl_map_domain(MA->getAccessRelation().release()); 794 Dom = isl_set_intersect_params(Dom, 795 Stmt->getParent()->getContext().release()); 796 SchedDom = isl_set_intersect_params( 797 SchedDom, Stmt->getParent()->getContext().release()); 798 assert(isl_set_is_subset(SchedDom, AccDom) && 799 "Access relation not defined on full schedule domain"); 800 assert(isl_set_is_subset(Dom, AccDom) && 801 "Access relation not defined on full domain"); 802 isl_set_free(AccDom); 803 isl_set_free(SchedDom); 804 isl_set_free(Dom); 805 } 806 #endif 807 808 isl::pw_multi_aff PWAccRel = MA->applyScheduleToAccessRelation(Schedule); 809 810 // isl cannot generate an index expression for access-nothing accesses. 811 isl::set AccDomain = PWAccRel.domain(); 812 isl::set Context = S.getContext(); 813 AccDomain = AccDomain.intersect_params(Context); 814 if (AccDomain.is_empty()) 815 continue; 816 817 isl::ast_expr AccessExpr = Build.access_from(PWAccRel); 818 NewAccesses = NewAccesses.set(MA->getId(), AccessExpr); 819 } 820 821 return NewAccesses.release(); 822 } 823 824 void IslNodeBuilder::createSubstitutions(__isl_take isl_ast_expr *Expr, 825 ScopStmt *Stmt, LoopToScevMapT <S) { 826 assert(isl_ast_expr_get_type(Expr) == isl_ast_expr_op && 827 "Expression of type 'op' expected"); 828 assert(isl_ast_expr_get_op_type(Expr) == isl_ast_op_call && 829 "Operation of type 'call' expected"); 830 for (int i = 0; i < isl_ast_expr_get_op_n_arg(Expr) - 1; ++i) { 831 isl_ast_expr *SubExpr; 832 Value *V; 833 834 SubExpr = isl_ast_expr_get_op_arg(Expr, i + 1); 835 V = ExprBuilder.create(SubExpr); 836 ScalarEvolution *SE = Stmt->getParent()->getSE(); 837 LTS[Stmt->getLoopForDimension(i)] = SE->getUnknown(V); 838 } 839 840 isl_ast_expr_free(Expr); 841 } 842 843 void IslNodeBuilder::createSubstitutionsVector( 844 __isl_take isl_ast_expr *Expr, ScopStmt *Stmt, 845 std::vector<LoopToScevMapT> &VLTS, std::vector<Value *> &IVS, 846 __isl_take isl_id *IteratorID) { 847 int i = 0; 848 849 Value *OldValue = IDToValue[IteratorID]; 850 for (Value *IV : IVS) { 851 IDToValue[IteratorID] = IV; 852 createSubstitutions(isl_ast_expr_copy(Expr), Stmt, VLTS[i]); 853 i++; 854 } 855 856 IDToValue[IteratorID] = OldValue; 857 isl_id_free(IteratorID); 858 isl_ast_expr_free(Expr); 859 } 860 861 void IslNodeBuilder::generateCopyStmt( 862 ScopStmt *Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) { 863 assert(Stmt->size() == 2); 864 auto ReadAccess = Stmt->begin(); 865 auto WriteAccess = ReadAccess++; 866 assert((*ReadAccess)->isRead() && (*WriteAccess)->isMustWrite()); 867 assert((*ReadAccess)->getElementType() == (*WriteAccess)->getElementType() && 868 "Accesses use the same data type"); 869 assert((*ReadAccess)->isArrayKind() && (*WriteAccess)->isArrayKind()); 870 auto *AccessExpr = 871 isl_id_to_ast_expr_get(NewAccesses, (*ReadAccess)->getId().release()); 872 auto *LoadValue = ExprBuilder.create(AccessExpr); 873 AccessExpr = 874 isl_id_to_ast_expr_get(NewAccesses, (*WriteAccess)->getId().release()); 875 auto *StoreAddr = ExprBuilder.createAccessAddress(AccessExpr).first; 876 Builder.CreateStore(LoadValue, StoreAddr); 877 } 878 879 Value *IslNodeBuilder::materializeNonScopLoopInductionVariable(const Loop *L) { 880 assert(!OutsideLoopIterations.contains(L) && 881 "trying to materialize loop induction variable twice"); 882 const SCEV *OuterLIV = SE.getAddRecExpr(SE.getUnknown(Builder.getInt64(0)), 883 SE.getUnknown(Builder.getInt64(1)), L, 884 SCEV::FlagAnyWrap); 885 Value *V = generateSCEV(OuterLIV); 886 OutsideLoopIterations[L] = SE.getUnknown(V); 887 return V; 888 } 889 890 void IslNodeBuilder::createUser(__isl_take isl_ast_node *User) { 891 LoopToScevMapT LTS; 892 isl_id *Id; 893 ScopStmt *Stmt; 894 895 isl_ast_expr *Expr = isl_ast_node_user_get_expr(User); 896 isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0); 897 Id = isl_ast_expr_get_id(StmtExpr); 898 isl_ast_expr_free(StmtExpr); 899 900 LTS.insert(OutsideLoopIterations.begin(), OutsideLoopIterations.end()); 901 902 Stmt = (ScopStmt *)isl_id_get_user(Id); 903 auto *NewAccesses = createNewAccesses(Stmt, User); 904 if (Stmt->isCopyStmt()) { 905 generateCopyStmt(Stmt, NewAccesses); 906 isl_ast_expr_free(Expr); 907 } else { 908 createSubstitutions(Expr, Stmt, LTS); 909 910 if (Stmt->isBlockStmt()) 911 BlockGen.copyStmt(*Stmt, LTS, NewAccesses); 912 else 913 RegionGen.copyStmt(*Stmt, LTS, NewAccesses); 914 } 915 916 isl_id_to_ast_expr_free(NewAccesses); 917 isl_ast_node_free(User); 918 isl_id_free(Id); 919 } 920 921 void IslNodeBuilder::createBlock(__isl_take isl_ast_node *Block) { 922 isl_ast_node_list *List = isl_ast_node_block_get_children(Block); 923 924 for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i) 925 create(isl_ast_node_list_get_ast_node(List, i)); 926 927 isl_ast_node_free(Block); 928 isl_ast_node_list_free(List); 929 } 930 931 void IslNodeBuilder::create(__isl_take isl_ast_node *Node) { 932 switch (isl_ast_node_get_type(Node)) { 933 case isl_ast_node_error: 934 llvm_unreachable("code generation error"); 935 case isl_ast_node_mark: 936 createMark(Node); 937 return; 938 case isl_ast_node_for: 939 createFor(Node); 940 return; 941 case isl_ast_node_if: 942 createIf(Node); 943 return; 944 case isl_ast_node_user: 945 createUser(Node); 946 return; 947 case isl_ast_node_block: 948 createBlock(Node); 949 return; 950 } 951 952 llvm_unreachable("Unknown isl_ast_node type"); 953 } 954 955 bool IslNodeBuilder::materializeValue(__isl_take isl_id *Id) { 956 // If the Id is already mapped, skip it. 957 if (!IDToValue.count(Id)) { 958 auto *ParamSCEV = (const SCEV *)isl_id_get_user(Id); 959 Value *V = nullptr; 960 961 // Parameters could refer to invariant loads that need to be 962 // preloaded before we can generate code for the parameter. Thus, 963 // check if any value referred to in ParamSCEV is an invariant load 964 // and if so make sure its equivalence class is preloaded. 965 SetVector<Value *> Values; 966 findValues(ParamSCEV, SE, Values); 967 for (auto *Val : Values) { 968 // Check if the value is an instruction in a dead block within the SCoP 969 // and if so do not code generate it. 970 if (auto *Inst = dyn_cast<Instruction>(Val)) { 971 if (S.contains(Inst)) { 972 bool IsDead = true; 973 974 // Check for "undef" loads first, then if there is a statement for 975 // the parent of Inst and lastly if the parent of Inst has an empty 976 // domain. In the first and last case the instruction is dead but if 977 // there is a statement or the domain is not empty Inst is not dead. 978 auto MemInst = MemAccInst::dyn_cast(Inst); 979 auto Address = MemInst ? MemInst.getPointerOperand() : nullptr; 980 if (Address && SE.getUnknown(UndefValue::get(Address->getType())) == 981 SE.getPointerBase(SE.getSCEV(Address))) { 982 } else if (S.getStmtFor(Inst)) { 983 IsDead = false; 984 } else { 985 auto *Domain = S.getDomainConditions(Inst->getParent()).release(); 986 IsDead = isl_set_is_empty(Domain); 987 isl_set_free(Domain); 988 } 989 990 if (IsDead) { 991 V = UndefValue::get(ParamSCEV->getType()); 992 break; 993 } 994 } 995 } 996 997 if (auto *IAClass = S.lookupInvariantEquivClass(Val)) { 998 // Check if this invariant access class is empty, hence if we never 999 // actually added a loads instruction to it. In that case it has no 1000 // (meaningful) users and we should not try to code generate it. 1001 if (IAClass->InvariantAccesses.empty()) 1002 V = UndefValue::get(ParamSCEV->getType()); 1003 1004 if (!preloadInvariantEquivClass(*IAClass)) { 1005 isl_id_free(Id); 1006 return false; 1007 } 1008 } 1009 } 1010 1011 V = V ? V : generateSCEV(ParamSCEV); 1012 IDToValue[Id] = V; 1013 } 1014 1015 isl_id_free(Id); 1016 return true; 1017 } 1018 1019 bool IslNodeBuilder::materializeParameters(__isl_take isl_set *Set) { 1020 for (unsigned i = 0, e = isl_set_dim(Set, isl_dim_param); i < e; ++i) { 1021 if (!isl_set_involves_dims(Set, isl_dim_param, i, 1)) 1022 continue; 1023 isl_id *Id = isl_set_get_dim_id(Set, isl_dim_param, i); 1024 if (!materializeValue(Id)) 1025 return false; 1026 } 1027 return true; 1028 } 1029 1030 bool IslNodeBuilder::materializeParameters() { 1031 for (const SCEV *Param : S.parameters()) { 1032 isl_id *Id = S.getIdForParam(Param).release(); 1033 if (!materializeValue(Id)) 1034 return false; 1035 } 1036 return true; 1037 } 1038 1039 Value *IslNodeBuilder::preloadUnconditionally(__isl_take isl_set *AccessRange, 1040 isl_ast_build *Build, 1041 Instruction *AccInst) { 1042 isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange); 1043 isl_ast_expr *Access = 1044 isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); 1045 auto *Address = isl_ast_expr_address_of(Access); 1046 auto *AddressValue = ExprBuilder.create(Address); 1047 Value *PreloadVal; 1048 1049 // Correct the type as the SAI might have a different type than the user 1050 // expects, especially if the base pointer is a struct. 1051 Type *Ty = AccInst->getType(); 1052 1053 auto *Ptr = AddressValue; 1054 auto Name = Ptr->getName(); 1055 PreloadVal = Builder.CreateLoad(Ty, Ptr, Name + ".load"); 1056 if (LoadInst *PreloadInst = dyn_cast<LoadInst>(PreloadVal)) 1057 PreloadInst->setAlignment(cast<LoadInst>(AccInst)->getAlign()); 1058 1059 // TODO: This is only a hot fix for SCoP sequences that use the same load 1060 // instruction contained and hoisted by one of the SCoPs. 1061 if (SE.isSCEVable(Ty)) 1062 SE.forgetValue(AccInst); 1063 1064 return PreloadVal; 1065 } 1066 1067 Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA, 1068 __isl_take isl_set *Domain) { 1069 isl_set *AccessRange = isl_map_range(MA.getAddressFunction().release()); 1070 AccessRange = isl_set_gist_params(AccessRange, S.getContext().release()); 1071 1072 if (!materializeParameters(AccessRange)) { 1073 isl_set_free(AccessRange); 1074 isl_set_free(Domain); 1075 return nullptr; 1076 } 1077 1078 auto *Build = 1079 isl_ast_build_from_context(isl_set_universe(S.getParamSpace().release())); 1080 isl_set *Universe = isl_set_universe(isl_set_get_space(Domain)); 1081 bool AlwaysExecuted = isl_set_is_equal(Domain, Universe); 1082 isl_set_free(Universe); 1083 1084 Instruction *AccInst = MA.getAccessInstruction(); 1085 Type *AccInstTy = AccInst->getType(); 1086 1087 Value *PreloadVal = nullptr; 1088 if (AlwaysExecuted) { 1089 PreloadVal = preloadUnconditionally(AccessRange, Build, AccInst); 1090 isl_ast_build_free(Build); 1091 isl_set_free(Domain); 1092 return PreloadVal; 1093 } 1094 1095 if (!materializeParameters(Domain)) { 1096 isl_ast_build_free(Build); 1097 isl_set_free(AccessRange); 1098 isl_set_free(Domain); 1099 return nullptr; 1100 } 1101 1102 isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain); 1103 Domain = nullptr; 1104 1105 ExprBuilder.setTrackOverflow(true); 1106 Value *Cond = ExprBuilder.createBool(DomainCond); 1107 Value *OverflowHappened = Builder.CreateNot(ExprBuilder.getOverflowState(), 1108 "polly.preload.cond.overflown"); 1109 Cond = Builder.CreateAnd(Cond, OverflowHappened, "polly.preload.cond.result"); 1110 ExprBuilder.setTrackOverflow(false); 1111 1112 if (!Cond->getType()->isIntegerTy(1)) 1113 Cond = Builder.CreateIsNotNull(Cond); 1114 1115 BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), 1116 &*Builder.GetInsertPoint(), GenDT, GenLI); 1117 CondBB->setName("polly.preload.cond"); 1118 1119 BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), GenDT, GenLI); 1120 MergeBB->setName("polly.preload.merge"); 1121 1122 Function *F = Builder.GetInsertBlock()->getParent(); 1123 LLVMContext &Context = F->getContext(); 1124 BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F); 1125 1126 GenDT->addNewBlock(ExecBB, CondBB); 1127 if (Loop *L = GenLI->getLoopFor(CondBB)) 1128 L->addBasicBlockToLoop(ExecBB, *GenLI); 1129 1130 auto *CondBBTerminator = CondBB->getTerminator(); 1131 Builder.SetInsertPoint(CondBBTerminator); 1132 Builder.CreateCondBr(Cond, ExecBB, MergeBB); 1133 CondBBTerminator->eraseFromParent(); 1134 1135 Builder.SetInsertPoint(ExecBB); 1136 Builder.CreateBr(MergeBB); 1137 1138 Builder.SetInsertPoint(ExecBB->getTerminator()); 1139 Value *PreAccInst = preloadUnconditionally(AccessRange, Build, AccInst); 1140 Builder.SetInsertPoint(MergeBB->getTerminator()); 1141 auto *MergePHI = Builder.CreatePHI( 1142 AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge"); 1143 PreloadVal = MergePHI; 1144 1145 if (!PreAccInst) { 1146 PreloadVal = nullptr; 1147 PreAccInst = UndefValue::get(AccInstTy); 1148 } 1149 1150 MergePHI->addIncoming(PreAccInst, ExecBB); 1151 MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB); 1152 1153 isl_ast_build_free(Build); 1154 return PreloadVal; 1155 } 1156 1157 bool IslNodeBuilder::preloadInvariantEquivClass( 1158 InvariantEquivClassTy &IAClass) { 1159 // For an equivalence class of invariant loads we pre-load the representing 1160 // element with the unified execution context. However, we have to map all 1161 // elements of the class to the one preloaded load as they are referenced 1162 // during the code generation and therefore need to be mapped. 1163 const MemoryAccessList &MAs = IAClass.InvariantAccesses; 1164 if (MAs.empty()) 1165 return true; 1166 1167 MemoryAccess *MA = MAs.front(); 1168 assert(MA->isArrayKind() && MA->isRead()); 1169 1170 // If the access function was already mapped, the preload of this equivalence 1171 // class was triggered earlier already and doesn't need to be done again. 1172 if (ValueMap.count(MA->getAccessInstruction())) 1173 return true; 1174 1175 // Check for recursion which can be caused by additional constraints, e.g., 1176 // non-finite loop constraints. In such a case we have to bail out and insert 1177 // a "false" runtime check that will cause the original code to be executed. 1178 auto PtrId = std::make_pair(IAClass.IdentifyingPointer, IAClass.AccessType); 1179 if (!PreloadedPtrs.insert(PtrId).second) 1180 return false; 1181 1182 // The execution context of the IAClass. 1183 isl::set &ExecutionCtx = IAClass.ExecutionContext; 1184 1185 // If the base pointer of this class is dependent on another one we have to 1186 // make sure it was preloaded already. 1187 auto *SAI = MA->getScopArrayInfo(); 1188 if (auto *BaseIAClass = S.lookupInvariantEquivClass(SAI->getBasePtr())) { 1189 if (!preloadInvariantEquivClass(*BaseIAClass)) 1190 return false; 1191 1192 // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx and 1193 // we need to refine the ExecutionCtx. 1194 isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext; 1195 ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx); 1196 } 1197 1198 // If the size of a dimension is dependent on another class, make sure it is 1199 // preloaded. 1200 for (unsigned i = 1, e = SAI->getNumberOfDimensions(); i < e; ++i) { 1201 const SCEV *Dim = SAI->getDimensionSize(i); 1202 SetVector<Value *> Values; 1203 findValues(Dim, SE, Values); 1204 for (auto *Val : Values) { 1205 if (auto *BaseIAClass = S.lookupInvariantEquivClass(Val)) { 1206 if (!preloadInvariantEquivClass(*BaseIAClass)) 1207 return false; 1208 1209 // After we preloaded the BaseIAClass we adjusted the BaseExecutionCtx 1210 // and we need to refine the ExecutionCtx. 1211 isl::set BaseExecutionCtx = BaseIAClass->ExecutionContext; 1212 ExecutionCtx = ExecutionCtx.intersect(BaseExecutionCtx); 1213 } 1214 } 1215 } 1216 1217 Instruction *AccInst = MA->getAccessInstruction(); 1218 Type *AccInstTy = AccInst->getType(); 1219 1220 Value *PreloadVal = preloadInvariantLoad(*MA, ExecutionCtx.copy()); 1221 if (!PreloadVal) 1222 return false; 1223 1224 for (const MemoryAccess *MA : MAs) { 1225 Instruction *MAAccInst = MA->getAccessInstruction(); 1226 assert(PreloadVal->getType() == MAAccInst->getType()); 1227 ValueMap[MAAccInst] = PreloadVal; 1228 } 1229 1230 if (SE.isSCEVable(AccInstTy)) { 1231 isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)).release(); 1232 if (ParamId) 1233 IDToValue[ParamId] = PreloadVal; 1234 isl_id_free(ParamId); 1235 } 1236 1237 BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock(); 1238 auto *Alloca = new AllocaInst(AccInstTy, DL.getAllocaAddrSpace(), 1239 AccInst->getName() + ".preload.s2a", 1240 EntryBB->getFirstInsertionPt()); 1241 Builder.CreateStore(PreloadVal, Alloca); 1242 ValueMapT PreloadedPointer; 1243 PreloadedPointer[PreloadVal] = AccInst; 1244 Annotator.addAlternativeAliasBases(PreloadedPointer); 1245 1246 for (auto *DerivedSAI : SAI->getDerivedSAIs()) { 1247 Value *BasePtr = DerivedSAI->getBasePtr(); 1248 1249 for (const MemoryAccess *MA : MAs) { 1250 // As the derived SAI information is quite coarse, any load from the 1251 // current SAI could be the base pointer of the derived SAI, however we 1252 // should only change the base pointer of the derived SAI if we actually 1253 // preloaded it. 1254 if (BasePtr == MA->getOriginalBaseAddr()) { 1255 assert(BasePtr->getType() == PreloadVal->getType()); 1256 DerivedSAI->setBasePtr(PreloadVal); 1257 } 1258 1259 // For scalar derived SAIs we remap the alloca used for the derived value. 1260 if (BasePtr == MA->getAccessInstruction()) 1261 ScalarMap[DerivedSAI] = Alloca; 1262 } 1263 } 1264 1265 for (const MemoryAccess *MA : MAs) { 1266 Instruction *MAAccInst = MA->getAccessInstruction(); 1267 // Use the escape system to get the correct value to users outside the SCoP. 1268 BlockGenerator::EscapeUserVectorTy EscapeUsers; 1269 for (auto *U : MAAccInst->users()) 1270 if (Instruction *UI = dyn_cast<Instruction>(U)) 1271 if (!S.contains(UI)) 1272 EscapeUsers.push_back(UI); 1273 1274 if (EscapeUsers.empty()) 1275 continue; 1276 1277 EscapeMap[MA->getAccessInstruction()] = 1278 std::make_pair(Alloca, std::move(EscapeUsers)); 1279 } 1280 1281 return true; 1282 } 1283 1284 void IslNodeBuilder::allocateNewArrays(BBPair StartExitBlocks) { 1285 for (auto &SAI : S.arrays()) { 1286 if (SAI->getBasePtr()) 1287 continue; 1288 1289 assert(SAI->getNumberOfDimensions() > 0 && SAI->getDimensionSize(0) && 1290 "The size of the outermost dimension is used to declare newly " 1291 "created arrays that require memory allocation."); 1292 1293 Type *NewArrayType = nullptr; 1294 1295 // Get the size of the array = size(dim_1)*...*size(dim_n) 1296 uint64_t ArraySizeInt = 1; 1297 for (int i = SAI->getNumberOfDimensions() - 1; i >= 0; i--) { 1298 auto *DimSize = SAI->getDimensionSize(i); 1299 unsigned UnsignedDimSize = static_cast<const SCEVConstant *>(DimSize) 1300 ->getAPInt() 1301 .getLimitedValue(); 1302 1303 if (!NewArrayType) 1304 NewArrayType = SAI->getElementType(); 1305 1306 NewArrayType = ArrayType::get(NewArrayType, UnsignedDimSize); 1307 ArraySizeInt *= UnsignedDimSize; 1308 } 1309 1310 if (SAI->isOnHeap()) { 1311 LLVMContext &Ctx = NewArrayType->getContext(); 1312 1313 // Get the IntPtrTy from the Datalayout 1314 auto IntPtrTy = DL.getIntPtrType(Ctx); 1315 1316 // Get the size of the element type in bits 1317 unsigned Size = SAI->getElemSizeInBytes(); 1318 1319 // Insert the malloc call at polly.start 1320 Builder.SetInsertPoint(std::get<0>(StartExitBlocks)->getTerminator()); 1321 auto *CreatedArray = Builder.CreateMalloc( 1322 IntPtrTy, SAI->getElementType(), 1323 ConstantInt::get(Type::getInt64Ty(Ctx), Size), 1324 ConstantInt::get(Type::getInt64Ty(Ctx), ArraySizeInt), nullptr, 1325 SAI->getName()); 1326 1327 SAI->setBasePtr(CreatedArray); 1328 1329 // Insert the free call at polly.exiting 1330 Builder.SetInsertPoint(std::get<1>(StartExitBlocks)->getTerminator()); 1331 Builder.CreateFree(CreatedArray); 1332 } else { 1333 auto InstIt = Builder.GetInsertBlock() 1334 ->getParent() 1335 ->getEntryBlock() 1336 .getTerminator() 1337 ->getIterator(); 1338 1339 auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(), 1340 SAI->getName(), InstIt); 1341 if (PollyTargetFirstLevelCacheLineSize) 1342 CreatedArray->setAlignment(Align(PollyTargetFirstLevelCacheLineSize)); 1343 SAI->setBasePtr(CreatedArray); 1344 } 1345 } 1346 } 1347 1348 bool IslNodeBuilder::preloadInvariantLoads() { 1349 auto &InvariantEquivClasses = S.getInvariantAccesses(); 1350 if (InvariantEquivClasses.empty()) 1351 return true; 1352 1353 BasicBlock *PreLoadBB = SplitBlock(Builder.GetInsertBlock(), 1354 &*Builder.GetInsertPoint(), GenDT, GenLI); 1355 PreLoadBB->setName("polly.preload.begin"); 1356 Builder.SetInsertPoint(&PreLoadBB->front()); 1357 1358 for (auto &IAClass : InvariantEquivClasses) 1359 if (!preloadInvariantEquivClass(IAClass)) 1360 return false; 1361 1362 return true; 1363 } 1364 1365 void IslNodeBuilder::addParameters(__isl_take isl_set *Context) { 1366 // Materialize values for the parameters of the SCoP. 1367 materializeParameters(); 1368 1369 // Generate values for the current loop iteration for all surrounding loops. 1370 // 1371 // We may also reference loops outside of the scop which do not contain the 1372 // scop itself, but as the number of such scops may be arbitrarily large we do 1373 // not generate code for them here, but only at the point of code generation 1374 // where these values are needed. 1375 Loop *L = LI.getLoopFor(S.getEntry()); 1376 1377 while (L != nullptr && S.contains(L)) 1378 L = L->getParentLoop(); 1379 1380 while (L != nullptr) { 1381 materializeNonScopLoopInductionVariable(L); 1382 L = L->getParentLoop(); 1383 } 1384 1385 isl_set_free(Context); 1386 } 1387 1388 Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) { 1389 /// We pass the insert location of our Builder, as Polly ensures during IR 1390 /// generation that there is always a valid CFG into which instructions are 1391 /// inserted. As a result, the insertpoint is known to be always followed by a 1392 /// terminator instruction. This means the insert point may be specified by a 1393 /// terminator instruction, but it can never point to an ->end() iterator 1394 /// which does not have a corresponding instruction. Hence, dereferencing 1395 /// the insertpoint to obtain an instruction is known to be save. 1396 /// 1397 /// We also do not need to update the Builder here, as new instructions are 1398 /// always inserted _before_ the given InsertLocation. As a result, the 1399 /// insert location remains valid. 1400 assert(Builder.GetInsertBlock()->end() != Builder.GetInsertPoint() && 1401 "Insert location points after last valid instruction"); 1402 Instruction *InsertLocation = &*Builder.GetInsertPoint(); 1403 1404 return expandCodeFor(S, SE, Builder.GetInsertBlock()->getParent(), *GenSE, DL, 1405 "polly", Expr, Expr->getType(), InsertLocation, 1406 &ValueMap, /*LoopToScevMap*/ nullptr, 1407 StartBlock->getSinglePredecessor()); 1408 } 1409 1410 /// The AST expression we generate to perform the run-time check assumes 1411 /// computations on integer types of infinite size. As we only use 64-bit 1412 /// arithmetic we check for overflows, in case of which we set the result 1413 /// of this run-time check to false to be conservatively correct, 1414 Value *IslNodeBuilder::createRTC(isl_ast_expr *Condition) { 1415 auto ExprBuilder = getExprBuilder(); 1416 1417 // In case the AST expression has integers larger than 64 bit, bail out. The 1418 // resulting LLVM-IR will contain operations on types that use more than 64 1419 // bits. These are -- in case wrapping intrinsics are used -- translated to 1420 // runtime library calls that are not available on all systems (e.g., Android) 1421 // and consequently will result in linker errors. 1422 if (ExprBuilder.hasLargeInts(isl::manage_copy(Condition))) { 1423 isl_ast_expr_free(Condition); 1424 return Builder.getFalse(); 1425 } 1426 1427 ExprBuilder.setTrackOverflow(true); 1428 Value *RTC = ExprBuilder.create(Condition); 1429 if (!RTC->getType()->isIntegerTy(1)) 1430 RTC = Builder.CreateIsNotNull(RTC); 1431 Value *OverflowHappened = 1432 Builder.CreateNot(ExprBuilder.getOverflowState(), "polly.rtc.overflown"); 1433 1434 if (PollyGenerateRTCPrint) { 1435 auto *F = Builder.GetInsertBlock()->getParent(); 1436 RuntimeDebugBuilder::createCPUPrinter( 1437 Builder, 1438 "F: " + F->getName().str() + " R: " + S.getRegion().getNameStr() + 1439 "RTC: ", 1440 RTC, " Overflow: ", OverflowHappened, 1441 "\n" 1442 " (0 failed, -1 succeeded)\n" 1443 " (if one or both are 0 falling back to original code, if both are -1 " 1444 "executing Polly code)\n"); 1445 } 1446 1447 RTC = Builder.CreateAnd(RTC, OverflowHappened, "polly.rtc.result"); 1448 ExprBuilder.setTrackOverflow(false); 1449 1450 if (!isa<ConstantInt>(RTC)) 1451 VersionedScops++; 1452 1453 return RTC; 1454 } 1455