1 //===------ LoopGenerators.cpp - IR helper to create loops ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains functions to create scalar loops and orchestrate the 10 // creation of parallel loops as LLVM-IR. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "polly/CodeGen/LoopGenerators.h" 15 #include "polly/Options.h" 16 #include "polly/ScopDetection.h" 17 #include "llvm/Analysis/LoopInfo.h" 18 #include "llvm/IR/DataLayout.h" 19 #include "llvm/IR/DebugInfoMetadata.h" 20 #include "llvm/IR/Dominators.h" 21 #include "llvm/IR/Module.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 24 25 using namespace llvm; 26 using namespace polly; 27 28 int polly::PollyNumThreads; 29 OMPGeneralSchedulingType polly::PollyScheduling; 30 int polly::PollyChunkSize; 31 32 static cl::opt<int, true> 33 XPollyNumThreads("polly-num-threads", 34 cl::desc("Number of threads to use (0 = auto)"), 35 cl::Hidden, cl::location(polly::PollyNumThreads), 36 cl::init(0), cl::cat(PollyCategory)); 37 38 cl::opt<bool> PollyVectorizeMetadata( 39 "polly-annotate-metadata-vectorize", 40 cl::desc("Append vectorize enable/disable metadata from polly"), 41 cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); 42 43 static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling( 44 "polly-scheduling", 45 cl::desc("Scheduling type of parallel OpenMP for loops"), 46 cl::values(clEnumValN(OMPGeneralSchedulingType::StaticChunked, "static", 47 "Static scheduling"), 48 clEnumValN(OMPGeneralSchedulingType::Dynamic, "dynamic", 49 "Dynamic scheduling"), 50 clEnumValN(OMPGeneralSchedulingType::Guided, "guided", 51 "Guided scheduling"), 52 clEnumValN(OMPGeneralSchedulingType::Runtime, "runtime", 53 "Runtime determined (OMP_SCHEDULE)")), 54 cl::Hidden, cl::location(polly::PollyScheduling), 55 cl::init(OMPGeneralSchedulingType::Runtime), cl::Optional, 56 cl::cat(PollyCategory)); 57 58 static cl::opt<int, true> 59 XPollyChunkSize("polly-scheduling-chunksize", 60 cl::desc("Chunksize to use by the OpenMP runtime calls"), 61 cl::Hidden, cl::location(polly::PollyChunkSize), 62 cl::init(0), cl::Optional, cl::cat(PollyCategory)); 63 64 // We generate a loop of either of the following structures: 65 // 66 // BeforeBB BeforeBB 67 // | | 68 // v v 69 // GuardBB PreHeaderBB 70 // / | | _____ 71 // __ PreHeaderBB | v \/ | 72 // / \ / | HeaderBB latch 73 // latch HeaderBB | |\ | 74 // \ / \ / | \------/ 75 // < \ / | 76 // \ / v 77 // ExitBB ExitBB 78 // 79 // depending on whether or not we know that it is executed at least once. If 80 // not, GuardBB checks if the loop is executed at least once. If this is the 81 // case we branch to PreHeaderBB and subsequently to the HeaderBB, which 82 // contains the loop iv 'polly.indvar', the incremented loop iv 83 // 'polly.indvar_next' as well as the condition to check if we execute another 84 // iteration of the loop. After the loop has finished, we branch to ExitBB. 85 // We expect the type of UB, LB, UB+Stride to be large enough for values that 86 // UB may take throughout the execution of the loop, including the computation 87 // of indvar + Stride before the final abort. 88 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, 89 PollyIRBuilder &Builder, LoopInfo &LI, 90 DominatorTree &DT, BasicBlock *&ExitBB, 91 ICmpInst::Predicate Predicate, 92 ScopAnnotator *Annotator, bool Parallel, bool UseGuard, 93 bool LoopVectDisabled) { 94 Function *F = Builder.GetInsertBlock()->getParent(); 95 LLVMContext &Context = F->getContext(); 96 97 assert(LB->getType() == UB->getType() && "Types of loop bounds do not match"); 98 IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType()); 99 assert(LoopIVType && "UB is not integer?"); 100 101 BasicBlock *BeforeBB = Builder.GetInsertBlock(); 102 BasicBlock *GuardBB = 103 UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr; 104 BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F); 105 BasicBlock *PreHeaderBB = 106 BasicBlock::Create(Context, "polly.loop_preheader", F); 107 108 // Update LoopInfo 109 Loop *OuterLoop = LI.getLoopFor(BeforeBB); 110 Loop *NewLoop = LI.AllocateLoop(); 111 112 if (OuterLoop) 113 OuterLoop->addChildLoop(NewLoop); 114 else 115 LI.addTopLevelLoop(NewLoop); 116 117 if (OuterLoop) { 118 if (GuardBB) 119 OuterLoop->addBasicBlockToLoop(GuardBB, LI); 120 OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI); 121 } 122 123 NewLoop->addBasicBlockToLoop(HeaderBB, LI); 124 125 // Notify the annotator (if present) that we have a new loop, but only 126 // after the header block is set. 127 if (Annotator) 128 Annotator->pushLoop(NewLoop, Parallel); 129 130 // ExitBB 131 ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI); 132 ExitBB->setName("polly.loop_exit"); 133 134 // BeforeBB 135 if (GuardBB) { 136 BeforeBB->getTerminator()->setSuccessor(0, GuardBB); 137 DT.addNewBlock(GuardBB, BeforeBB); 138 139 // GuardBB 140 Builder.SetInsertPoint(GuardBB); 141 Value *LoopGuard; 142 LoopGuard = Builder.CreateICmp(Predicate, LB, UB); 143 LoopGuard->setName("polly.loop_guard"); 144 Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB); 145 DT.addNewBlock(PreHeaderBB, GuardBB); 146 } else { 147 BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB); 148 DT.addNewBlock(PreHeaderBB, BeforeBB); 149 } 150 151 // PreHeaderBB 152 Builder.SetInsertPoint(PreHeaderBB); 153 Builder.CreateBr(HeaderBB); 154 155 // HeaderBB 156 DT.addNewBlock(HeaderBB, PreHeaderBB); 157 Builder.SetInsertPoint(HeaderBB); 158 PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar"); 159 IV->addIncoming(LB, PreHeaderBB); 160 Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType); 161 Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next"); 162 Value *LoopCondition = 163 Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond"); 164 165 // Create the loop latch and annotate it as such. 166 BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); 167 168 // Don't annotate vectorize metadata when both LoopVectDisabled and 169 // PollyVectorizeMetadata are disabled. Annotate vectorize metadata to false 170 // when LoopVectDisabled is true. Otherwise we annotate the vectorize metadata 171 // to true. 172 if (Annotator) { 173 std::optional<bool> EnableVectorizeMetadata; 174 if (LoopVectDisabled) 175 EnableVectorizeMetadata = false; 176 else if (PollyVectorizeMetadata) 177 EnableVectorizeMetadata = true; 178 Annotator->annotateLoopLatch(B, Parallel, EnableVectorizeMetadata); 179 } 180 181 IV->addIncoming(IncrementedIV, HeaderBB); 182 if (GuardBB) 183 DT.changeImmediateDominator(ExitBB, GuardBB); 184 else 185 DT.changeImmediateDominator(ExitBB, HeaderBB); 186 187 // The loop body should be added here. 188 Builder.SetInsertPoint(HeaderBB->getFirstNonPHIIt()); 189 return IV; 190 } 191 192 Value *ParallelLoopGenerator::createParallelLoop( 193 Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues, 194 ValueMapT &Map, BasicBlock::iterator *LoopBody) { 195 196 AllocaInst *Struct = storeValuesIntoStruct(UsedValues); 197 BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint(); 198 199 Value *IV; 200 Function *SubFn; 201 std::tie(IV, SubFn) = createSubFn(Stride, Struct, UsedValues, Map); 202 *LoopBody = Builder.GetInsertPoint(); 203 Builder.SetInsertPoint(&*BeforeLoop); 204 205 // Add one as the upper bound provided by OpenMP is a < comparison 206 // whereas the codegenForSequential function creates a <= comparison. 207 UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1)); 208 209 // Execute the prepared subfunction in parallel. 210 deployParallelExecution(SubFn, Struct, LB, UB, Stride); 211 212 return IV; 213 } 214 215 Function *ParallelLoopGenerator::createSubFnDefinition() { 216 Function *F = Builder.GetInsertBlock()->getParent(); 217 Function *SubFn = prepareSubFnDefinition(F); 218 219 // Certain backends (e.g., NVPTX) do not support '.'s in function names. 220 // Hence, we ensure that all '.'s are replaced by '_'s. 221 std::string FunctionName = SubFn->getName().str(); 222 std::replace(FunctionName.begin(), FunctionName.end(), '.', '_'); 223 SubFn->setName(FunctionName); 224 225 // Do not run any polly pass on the new function. 226 SubFn->addFnAttr(PollySkipFnAttr); 227 228 return SubFn; 229 } 230 231 AllocaInst * 232 ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) { 233 SmallVector<Type *, 8> Members; 234 235 for (Value *V : Values) 236 Members.push_back(V->getType()); 237 238 const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); 239 240 // We do not want to allocate the alloca inside any loop, thus we allocate it 241 // in the entry block of the function and use annotations to denote the actual 242 // live span (similar to clang). 243 BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock(); 244 BasicBlock::iterator IP = EntryBB.getFirstInsertionPt(); 245 StructType *Ty = StructType::get(Builder.getContext(), Members); 246 AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr, 247 "polly.par.userContext", IP); 248 249 for (unsigned i = 0; i < Values.size(); i++) { 250 Value *Address = Builder.CreateStructGEP(Ty, Struct, i); 251 Address->setName("polly.subfn.storeaddr." + Values[i]->getName()); 252 Builder.CreateStore(Values[i], Address); 253 } 254 255 return Struct; 256 } 257 258 void ParallelLoopGenerator::extractValuesFromStruct( 259 SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) { 260 for (unsigned i = 0; i < OldValues.size(); i++) { 261 Value *Address = Builder.CreateStructGEP(Ty, Struct, i); 262 Type *ElemTy = cast<GetElementPtrInst>(Address)->getResultElementType(); 263 Value *NewValue = Builder.CreateLoad(ElemTy, Address); 264 NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName()); 265 Map[OldValues[i]] = NewValue; 266 } 267 } 268 269 DebugLoc polly::createDebugLocForGeneratedCode(Function *F) { 270 if (!F) 271 return DebugLoc(); 272 273 LLVMContext &Ctx = F->getContext(); 274 DISubprogram *DILScope = 275 dyn_cast_or_null<DISubprogram>(F->getMetadata(LLVMContext::MD_dbg)); 276 if (!DILScope) 277 return DebugLoc(); 278 return DILocation::get(Ctx, 0, 0, DILScope); 279 } 280