1 //===------ LoopGenerators.cpp - IR helper to create loops ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains functions to create scalar and parallel loops as LLVM-IR. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "polly/CodeGen/LoopGenerators.h" 14 #include "polly/ScopDetection.h" 15 #include "llvm/Analysis/LoopInfo.h" 16 #include "llvm/IR/DataLayout.h" 17 #include "llvm/IR/Dominators.h" 18 #include "llvm/IR/Module.h" 19 #include "llvm/Support/CommandLine.h" 20 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 21 22 using namespace llvm; 23 using namespace polly; 24 25 static cl::opt<int> 26 PollyNumThreads("polly-num-threads", 27 cl::desc("Number of threads to use (0 = auto)"), cl::Hidden, 28 cl::init(0)); 29 30 // We generate a loop of either of the following structures: 31 // 32 // BeforeBB BeforeBB 33 // | | 34 // v v 35 // GuardBB PreHeaderBB 36 // / | | _____ 37 // __ PreHeaderBB | v \/ | 38 // / \ / | HeaderBB latch 39 // latch HeaderBB | |\ | 40 // \ / \ / | \------/ 41 // < \ / | 42 // \ / v 43 // ExitBB ExitBB 44 // 45 // depending on whether or not we know that it is executed at least once. If 46 // not, GuardBB checks if the loop is executed at least once. If this is the 47 // case we branch to PreHeaderBB and subsequently to the HeaderBB, which 48 // contains the loop iv 'polly.indvar', the incremented loop iv 49 // 'polly.indvar_next' as well as the condition to check if we execute another 50 // iteration of the loop. After the loop has finished, we branch to ExitBB. 51 // We expect the type of UB, LB, UB+Stride to be large enough for values that 52 // UB may take throughout the execution of the loop, including the computation 53 // of indvar + Stride before the final abort. 54 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, 55 PollyIRBuilder &Builder, LoopInfo &LI, 56 DominatorTree &DT, BasicBlock *&ExitBB, 57 ICmpInst::Predicate Predicate, 58 ScopAnnotator *Annotator, bool Parallel, bool UseGuard, 59 bool LoopVectDisabled) { 60 Function *F = Builder.GetInsertBlock()->getParent(); 61 LLVMContext &Context = F->getContext(); 62 63 assert(LB->getType() == UB->getType() && "Types of loop bounds do not match"); 64 IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType()); 65 assert(LoopIVType && "UB is not integer?"); 66 67 BasicBlock *BeforeBB = Builder.GetInsertBlock(); 68 BasicBlock *GuardBB = 69 UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr; 70 BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F); 71 BasicBlock *PreHeaderBB = 72 BasicBlock::Create(Context, "polly.loop_preheader", F); 73 74 // Update LoopInfo 75 Loop *OuterLoop = LI.getLoopFor(BeforeBB); 76 Loop *NewLoop = LI.AllocateLoop(); 77 78 if (OuterLoop) 79 OuterLoop->addChildLoop(NewLoop); 80 else 81 LI.addTopLevelLoop(NewLoop); 82 83 if (OuterLoop) { 84 if (GuardBB) 85 OuterLoop->addBasicBlockToLoop(GuardBB, LI); 86 OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI); 87 } 88 89 NewLoop->addBasicBlockToLoop(HeaderBB, LI); 90 91 // Notify the annotator (if present) that we have a new loop, but only 92 // after the header block is set. 93 if (Annotator) 94 Annotator->pushLoop(NewLoop, Parallel); 95 96 // ExitBB 97 ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI); 98 ExitBB->setName("polly.loop_exit"); 99 100 // BeforeBB 101 if (GuardBB) { 102 BeforeBB->getTerminator()->setSuccessor(0, GuardBB); 103 DT.addNewBlock(GuardBB, BeforeBB); 104 105 // GuardBB 106 Builder.SetInsertPoint(GuardBB); 107 Value *LoopGuard; 108 LoopGuard = Builder.CreateICmp(Predicate, LB, UB); 109 LoopGuard->setName("polly.loop_guard"); 110 Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB); 111 DT.addNewBlock(PreHeaderBB, GuardBB); 112 } else { 113 BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB); 114 DT.addNewBlock(PreHeaderBB, BeforeBB); 115 } 116 117 // PreHeaderBB 118 Builder.SetInsertPoint(PreHeaderBB); 119 Builder.CreateBr(HeaderBB); 120 121 // HeaderBB 122 DT.addNewBlock(HeaderBB, PreHeaderBB); 123 Builder.SetInsertPoint(HeaderBB); 124 PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar"); 125 IV->addIncoming(LB, PreHeaderBB); 126 Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType); 127 Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next"); 128 Value *LoopCondition = 129 Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond"); 130 131 // Create the loop latch and annotate it as such. 132 BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); 133 if (Annotator) 134 Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled); 135 136 IV->addIncoming(IncrementedIV, HeaderBB); 137 if (GuardBB) 138 DT.changeImmediateDominator(ExitBB, GuardBB); 139 else 140 DT.changeImmediateDominator(ExitBB, HeaderBB); 141 142 // The loop body should be added here. 143 Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); 144 return IV; 145 } 146 147 Value *ParallelLoopGenerator::createParallelLoop( 148 Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues, 149 ValueMapT &Map, BasicBlock::iterator *LoopBody) { 150 Function *SubFn; 151 152 AllocaInst *Struct = storeValuesIntoStruct(UsedValues); 153 BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint(); 154 Value *IV = createSubFn(Stride, Struct, UsedValues, Map, &SubFn); 155 *LoopBody = Builder.GetInsertPoint(); 156 Builder.SetInsertPoint(&*BeforeLoop); 157 158 Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), 159 "polly.par.userContext"); 160 161 // Add one as the upper bound provided by OpenMP is a < comparison 162 // whereas the codegenForSequential function creates a <= comparison. 163 UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1)); 164 165 // Tell the runtime we start a parallel loop 166 createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride); 167 Builder.CreateCall(SubFn, SubFnParam); 168 createCallJoinThreads(); 169 170 return IV; 171 } 172 173 void ParallelLoopGenerator::createCallSpawnThreads(Value *SubFn, 174 Value *SubFnParam, Value *LB, 175 Value *UB, Value *Stride) { 176 const std::string Name = "GOMP_parallel_loop_runtime_start"; 177 178 Function *F = M->getFunction(Name); 179 180 // If F is not available, declare it. 181 if (!F) { 182 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 183 184 Type *Params[] = {PointerType::getUnqual(FunctionType::get( 185 Builder.getVoidTy(), Builder.getInt8PtrTy(), false)), 186 Builder.getInt8PtrTy(), 187 Builder.getInt32Ty(), 188 LongType, 189 LongType, 190 LongType}; 191 192 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 193 F = Function::Create(Ty, Linkage, Name, M); 194 } 195 196 Value *NumberOfThreads = Builder.getInt32(PollyNumThreads); 197 Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride}; 198 199 Builder.CreateCall(F, Args); 200 } 201 202 Value *ParallelLoopGenerator::createCallGetWorkItem(Value *LBPtr, 203 Value *UBPtr) { 204 const std::string Name = "GOMP_loop_runtime_next"; 205 206 Function *F = M->getFunction(Name); 207 208 // If F is not available, declare it. 209 if (!F) { 210 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 211 Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()}; 212 FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false); 213 F = Function::Create(Ty, Linkage, Name, M); 214 } 215 216 Value *Args[] = {LBPtr, UBPtr}; 217 Value *Return = Builder.CreateCall(F, Args); 218 Return = Builder.CreateICmpNE( 219 Return, Builder.CreateZExt(Builder.getFalse(), Return->getType())); 220 return Return; 221 } 222 223 void ParallelLoopGenerator::createCallJoinThreads() { 224 const std::string Name = "GOMP_parallel_end"; 225 226 Function *F = M->getFunction(Name); 227 228 // If F is not available, declare it. 229 if (!F) { 230 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 231 232 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 233 F = Function::Create(Ty, Linkage, Name, M); 234 } 235 236 Builder.CreateCall(F, {}); 237 } 238 239 void ParallelLoopGenerator::createCallCleanupThread() { 240 const std::string Name = "GOMP_loop_end_nowait"; 241 242 Function *F = M->getFunction(Name); 243 244 // If F is not available, declare it. 245 if (!F) { 246 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 247 248 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 249 F = Function::Create(Ty, Linkage, Name, M); 250 } 251 252 Builder.CreateCall(F, {}); 253 } 254 255 Function *ParallelLoopGenerator::createSubFnDefinition() { 256 Function *F = Builder.GetInsertBlock()->getParent(); 257 std::vector<Type *> Arguments(1, Builder.getInt8PtrTy()); 258 FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false); 259 Function *SubFn = Function::Create(FT, Function::InternalLinkage, 260 F->getName() + "_polly_subfn", M); 261 262 // Certain backends (e.g., NVPTX) do not support '.'s in function names. 263 // Hence, we ensure that all '.'s are replaced by '_'s. 264 std::string FunctionName = SubFn->getName(); 265 std::replace(FunctionName.begin(), FunctionName.end(), '.', '_'); 266 SubFn->setName(FunctionName); 267 268 // Do not run any polly pass on the new function. 269 SubFn->addFnAttr(PollySkipFnAttr); 270 271 Function::arg_iterator AI = SubFn->arg_begin(); 272 AI->setName("polly.par.userContext"); 273 274 return SubFn; 275 } 276 277 AllocaInst * 278 ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) { 279 SmallVector<Type *, 8> Members; 280 281 for (Value *V : Values) 282 Members.push_back(V->getType()); 283 284 const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); 285 286 // We do not want to allocate the alloca inside any loop, thus we allocate it 287 // in the entry block of the function and use annotations to denote the actual 288 // live span (similar to clang). 289 BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock(); 290 Instruction *IP = &*EntryBB.getFirstInsertionPt(); 291 StructType *Ty = StructType::get(Builder.getContext(), Members); 292 AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr, 293 "polly.par.userContext", IP); 294 295 for (unsigned i = 0; i < Values.size(); i++) { 296 Value *Address = Builder.CreateStructGEP(Ty, Struct, i); 297 Address->setName("polly.subfn.storeaddr." + Values[i]->getName()); 298 Builder.CreateStore(Values[i], Address); 299 } 300 301 return Struct; 302 } 303 304 void ParallelLoopGenerator::extractValuesFromStruct( 305 SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) { 306 for (unsigned i = 0; i < OldValues.size(); i++) { 307 Value *Address = Builder.CreateStructGEP(Ty, Struct, i); 308 Value *NewValue = Builder.CreateLoad(Address); 309 NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName()); 310 Map[OldValues[i]] = NewValue; 311 } 312 } 313 314 Value *ParallelLoopGenerator::createSubFn(Value *Stride, AllocaInst *StructData, 315 SetVector<Value *> Data, 316 ValueMapT &Map, Function **SubFnPtr) { 317 BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB; 318 Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV; 319 Function *SubFn = createSubFnDefinition(); 320 LLVMContext &Context = SubFn->getContext(); 321 322 // Store the previous basic block. 323 PrevBB = Builder.GetInsertBlock(); 324 325 // Create basic blocks. 326 HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn); 327 ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn); 328 CheckNextBB = BasicBlock::Create(Context, "polly.par.checkNext", SubFn); 329 PreHeaderBB = BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn); 330 331 DT.addNewBlock(HeaderBB, PrevBB); 332 DT.addNewBlock(ExitBB, HeaderBB); 333 DT.addNewBlock(CheckNextBB, HeaderBB); 334 DT.addNewBlock(PreHeaderBB, HeaderBB); 335 336 // Fill up basic block HeaderBB. 337 Builder.SetInsertPoint(HeaderBB); 338 LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr"); 339 UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr"); 340 UserContext = Builder.CreateBitCast( 341 &*SubFn->arg_begin(), StructData->getType(), "polly.par.userContext"); 342 343 extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext, 344 Map); 345 Builder.CreateBr(CheckNextBB); 346 347 // Add code to check if another set of iterations will be executed. 348 Builder.SetInsertPoint(CheckNextBB); 349 Ret1 = createCallGetWorkItem(LBPtr, UBPtr); 350 HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(), 351 "polly.par.hasNextScheduleBlock"); 352 Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB); 353 354 // Add code to load the iv bounds for this set of iterations. 355 Builder.SetInsertPoint(PreHeaderBB); 356 LB = Builder.CreateLoad(LBPtr, "polly.par.LB"); 357 UB = Builder.CreateLoad(UBPtr, "polly.par.UB"); 358 359 // Subtract one as the upper bound provided by OpenMP is a < comparison 360 // whereas the codegenForSequential function creates a <= comparison. 361 UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1), 362 "polly.par.UBAdjusted"); 363 364 Builder.CreateBr(CheckNextBB); 365 Builder.SetInsertPoint(&*--Builder.GetInsertPoint()); 366 IV = createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE, 367 nullptr, true, /* UseGuard */ false); 368 369 BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); 370 371 // Add code to terminate this subfunction. 372 Builder.SetInsertPoint(ExitBB); 373 createCallCleanupThread(); 374 Builder.CreateRetVoid(); 375 376 Builder.SetInsertPoint(&*LoopBody); 377 *SubFnPtr = SubFn; 378 379 return IV; 380 } 381