1 //===------ LoopGenerators.cpp - IR helper to create loops ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains functions to create scalar and parallel loops as LLVM-IR. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "polly/CodeGen/LoopGenerators.h" 15 #include "polly/ScopDetection.h" 16 #include "llvm/Analysis/LoopInfo.h" 17 #include "llvm/IR/DataLayout.h" 18 #include "llvm/IR/Dominators.h" 19 #include "llvm/IR/Module.h" 20 #include "llvm/IR/PatternMatch.h" 21 #include "llvm/Support/CommandLine.h" 22 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 23 24 using namespace llvm; 25 using namespace polly; 26 using namespace PatternMatch; 27 28 static cl::opt<int> 29 PollyNumThreads("polly-num-threads", 30 cl::desc("Number of threads to use (0 = auto)"), cl::Hidden, 31 cl::init(0)); 32 33 // We generate a loop of either of the following structures: 34 // 35 // BeforeBB BeforeBB 36 // | | 37 // v v 38 // GuardBB PreHeaderBB 39 // / | | _____ 40 // __ PreHeaderBB | v \/ | 41 // / \ / | HeaderBB latch 42 // latch HeaderBB | |\ | 43 // \ / \ / | \------/ 44 // < \ / | 45 // \ / v 46 // ExitBB ExitBB 47 // 48 // depending on whether or not we know that it is executed at least once. If 49 // not, GuardBB checks if the loop is executed at least once. If this is the 50 // case we branch to PreHeaderBB and subsequently to the HeaderBB, which 51 // contains the loop iv 'polly.indvar', the incremented loop iv 52 // 'polly.indvar_next' as well as the condition to check if we execute another 53 // iteration of the loop. After the loop has finished, we branch to ExitBB. 54 // We expect the type of UB, LB, UB+Stride to be large enough for values that 55 // UB may take throughout the execution of the loop, including the computation 56 // of indvar + Stride before the final abort. 57 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, 58 PollyIRBuilder &Builder, LoopInfo &LI, 59 DominatorTree &DT, BasicBlock *&ExitBB, 60 ICmpInst::Predicate Predicate, 61 ScopAnnotator *Annotator, bool Parallel, 62 bool UseGuard) { 63 Function *F = Builder.GetInsertBlock()->getParent(); 64 LLVMContext &Context = F->getContext(); 65 66 assert(LB->getType() == UB->getType() && "Types of loop bounds do not match"); 67 IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType()); 68 assert(LoopIVType && "UB is not integer?"); 69 70 BasicBlock *BeforeBB = Builder.GetInsertBlock(); 71 BasicBlock *GuardBB = 72 UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr; 73 BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F); 74 BasicBlock *PreHeaderBB = 75 BasicBlock::Create(Context, "polly.loop_preheader", F); 76 77 // Update LoopInfo 78 Loop *OuterLoop = LI.getLoopFor(BeforeBB); 79 Loop *NewLoop = new Loop(); 80 81 if (OuterLoop) 82 OuterLoop->addChildLoop(NewLoop); 83 else 84 LI.addTopLevelLoop(NewLoop); 85 86 if (OuterLoop) { 87 if (GuardBB) 88 OuterLoop->addBasicBlockToLoop(GuardBB, LI); 89 OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI); 90 } 91 92 NewLoop->addBasicBlockToLoop(HeaderBB, LI); 93 94 // Notify the annotator (if present) that we have a new loop, but only 95 // after the header block is set. 96 if (Annotator) 97 Annotator->pushLoop(NewLoop, Parallel); 98 99 // ExitBB 100 ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI); 101 ExitBB->setName("polly.loop_exit"); 102 103 // BeforeBB 104 if (GuardBB) { 105 BeforeBB->getTerminator()->setSuccessor(0, GuardBB); 106 DT.addNewBlock(GuardBB, BeforeBB); 107 108 // GuardBB 109 Builder.SetInsertPoint(GuardBB); 110 Value *LoopGuard; 111 LoopGuard = Builder.CreateICmp(Predicate, LB, UB); 112 LoopGuard->setName("polly.loop_guard"); 113 Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB); 114 DT.addNewBlock(PreHeaderBB, GuardBB); 115 } else { 116 BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB); 117 DT.addNewBlock(PreHeaderBB, BeforeBB); 118 } 119 120 // PreHeaderBB 121 Builder.SetInsertPoint(PreHeaderBB); 122 Builder.CreateBr(HeaderBB); 123 124 // HeaderBB 125 DT.addNewBlock(HeaderBB, PreHeaderBB); 126 Builder.SetInsertPoint(HeaderBB); 127 PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar"); 128 IV->addIncoming(LB, PreHeaderBB); 129 Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType); 130 Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next"); 131 Value *LoopCondition = 132 Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond"); 133 134 // Create the loop latch and annotate it as such. 135 BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); 136 if (Annotator) 137 Annotator->annotateLoopLatch(B, NewLoop, Parallel); 138 139 IV->addIncoming(IncrementedIV, HeaderBB); 140 if (GuardBB) 141 DT.changeImmediateDominator(ExitBB, GuardBB); 142 else 143 DT.changeImmediateDominator(ExitBB, HeaderBB); 144 145 // The loop body should be added here. 146 Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); 147 return IV; 148 } 149 150 Value *ParallelLoopGenerator::createParallelLoop( 151 Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues, 152 ValueMapT &Map, BasicBlock::iterator *LoopBody) { 153 Function *SubFn; 154 155 AllocaInst *Struct = storeValuesIntoStruct(UsedValues); 156 BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint(); 157 Value *IV = createSubFn(Stride, Struct, UsedValues, Map, &SubFn); 158 *LoopBody = Builder.GetInsertPoint(); 159 Builder.SetInsertPoint(&*BeforeLoop); 160 161 Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), 162 "polly.par.userContext"); 163 164 // Add one as the upper bound provided by openmp is a < comparison 165 // whereas the codegenForSequential function creates a <= comparison. 166 UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1)); 167 168 // Tell the runtime we start a parallel loop 169 createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride); 170 Builder.CreateCall(SubFn, SubFnParam); 171 createCallJoinThreads(); 172 173 return IV; 174 } 175 176 void ParallelLoopGenerator::createCallSpawnThreads(Value *SubFn, 177 Value *SubFnParam, Value *LB, 178 Value *UB, Value *Stride) { 179 const std::string Name = "GOMP_parallel_loop_runtime_start"; 180 181 Function *F = M->getFunction(Name); 182 183 // If F is not available, declare it. 184 if (!F) { 185 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 186 187 Type *Params[] = {PointerType::getUnqual(FunctionType::get( 188 Builder.getVoidTy(), Builder.getInt8PtrTy(), false)), 189 Builder.getInt8PtrTy(), 190 Builder.getInt32Ty(), 191 LongType, 192 LongType, 193 LongType}; 194 195 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 196 F = Function::Create(Ty, Linkage, Name, M); 197 } 198 199 Value *NumberOfThreads = Builder.getInt32(PollyNumThreads); 200 Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride}; 201 202 Builder.CreateCall(F, Args); 203 } 204 205 Value *ParallelLoopGenerator::createCallGetWorkItem(Value *LBPtr, 206 Value *UBPtr) { 207 const std::string Name = "GOMP_loop_runtime_next"; 208 209 Function *F = M->getFunction(Name); 210 211 // If F is not available, declare it. 212 if (!F) { 213 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 214 Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()}; 215 FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false); 216 F = Function::Create(Ty, Linkage, Name, M); 217 } 218 219 Value *Args[] = {LBPtr, UBPtr}; 220 Value *Return = Builder.CreateCall(F, Args); 221 Return = Builder.CreateICmpNE( 222 Return, Builder.CreateZExt(Builder.getFalse(), Return->getType())); 223 return Return; 224 } 225 226 void ParallelLoopGenerator::createCallJoinThreads() { 227 const std::string Name = "GOMP_parallel_end"; 228 229 Function *F = M->getFunction(Name); 230 231 // If F is not available, declare it. 232 if (!F) { 233 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 234 235 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 236 F = Function::Create(Ty, Linkage, Name, M); 237 } 238 239 Builder.CreateCall(F, {}); 240 } 241 242 void ParallelLoopGenerator::createCallCleanupThread() { 243 const std::string Name = "GOMP_loop_end_nowait"; 244 245 Function *F = M->getFunction(Name); 246 247 // If F is not available, declare it. 248 if (!F) { 249 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 250 251 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 252 F = Function::Create(Ty, Linkage, Name, M); 253 } 254 255 Builder.CreateCall(F, {}); 256 } 257 258 Function *ParallelLoopGenerator::createSubFnDefinition() { 259 Function *F = Builder.GetInsertBlock()->getParent(); 260 std::vector<Type *> Arguments(1, Builder.getInt8PtrTy()); 261 FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false); 262 Function *SubFn = Function::Create(FT, Function::InternalLinkage, 263 F->getName() + "_polly_subfn", M); 264 265 // Certain backends (e.g., NVPTX) do not support '.'s in function names. 266 // Hence, we ensure that all '.'s are replaced by '_'s. 267 std::string FunctionName = SubFn->getName(); 268 std::replace(FunctionName.begin(), FunctionName.end(), '.', '_'); 269 SubFn->setName(FunctionName); 270 271 // Do not run any polly pass on the new function. 272 SubFn->addFnAttr(PollySkipFnAttr); 273 274 Function::arg_iterator AI = SubFn->arg_begin(); 275 AI->setName("polly.par.userContext"); 276 277 return SubFn; 278 } 279 280 AllocaInst * 281 ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) { 282 SmallVector<Type *, 8> Members; 283 284 for (Value *V : Values) 285 Members.push_back(V->getType()); 286 287 const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); 288 289 // We do not want to allocate the alloca inside any loop, thus we allocate it 290 // in the entry block of the function and use annotations to denote the actual 291 // live span (similar to clang). 292 BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock(); 293 Instruction *IP = &*EntryBB.getFirstInsertionPt(); 294 StructType *Ty = StructType::get(Builder.getContext(), Members); 295 AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr, 296 "polly.par.userContext", IP); 297 298 for (unsigned i = 0; i < Values.size(); i++) { 299 Value *Address = Builder.CreateStructGEP(Ty, Struct, i); 300 Address->setName("polly.subfn.storeaddr." + Values[i]->getName()); 301 Builder.CreateStore(Values[i], Address); 302 } 303 304 return Struct; 305 } 306 307 void ParallelLoopGenerator::extractValuesFromStruct( 308 SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) { 309 for (unsigned i = 0; i < OldValues.size(); i++) { 310 Value *Address = Builder.CreateStructGEP(Ty, Struct, i); 311 Value *NewValue = Builder.CreateLoad(Address); 312 NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName()); 313 Map[OldValues[i]] = NewValue; 314 } 315 } 316 317 Value *ParallelLoopGenerator::createSubFn(Value *Stride, AllocaInst *StructData, 318 SetVector<Value *> Data, 319 ValueMapT &Map, Function **SubFnPtr) { 320 BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB; 321 Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV; 322 Function *SubFn = createSubFnDefinition(); 323 LLVMContext &Context = SubFn->getContext(); 324 325 // Store the previous basic block. 326 PrevBB = Builder.GetInsertBlock(); 327 328 // Create basic blocks. 329 HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn); 330 ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn); 331 CheckNextBB = BasicBlock::Create(Context, "polly.par.checkNext", SubFn); 332 PreHeaderBB = BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn); 333 334 DT.addNewBlock(HeaderBB, PrevBB); 335 DT.addNewBlock(ExitBB, HeaderBB); 336 DT.addNewBlock(CheckNextBB, HeaderBB); 337 DT.addNewBlock(PreHeaderBB, HeaderBB); 338 339 // Fill up basic block HeaderBB. 340 Builder.SetInsertPoint(HeaderBB); 341 LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr"); 342 UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr"); 343 UserContext = Builder.CreateBitCast( 344 &*SubFn->arg_begin(), StructData->getType(), "polly.par.userContext"); 345 346 extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext, 347 Map); 348 Builder.CreateBr(CheckNextBB); 349 350 // Add code to check if another set of iterations will be executed. 351 Builder.SetInsertPoint(CheckNextBB); 352 Ret1 = createCallGetWorkItem(LBPtr, UBPtr); 353 HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(), 354 "polly.par.hasNextScheduleBlock"); 355 Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB); 356 357 // Add code to load the iv bounds for this set of iterations. 358 Builder.SetInsertPoint(PreHeaderBB); 359 LB = Builder.CreateLoad(LBPtr, "polly.par.LB"); 360 UB = Builder.CreateLoad(UBPtr, "polly.par.UB"); 361 362 // Subtract one as the upper bound provided by openmp is a < comparison 363 // whereas the codegenForSequential function creates a <= comparison. 364 UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1), 365 "polly.par.UBAdjusted"); 366 367 Builder.CreateBr(CheckNextBB); 368 Builder.SetInsertPoint(&*--Builder.GetInsertPoint()); 369 IV = createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE, 370 nullptr, true, /* UseGuard */ false); 371 372 BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); 373 374 // Add code to terminate this subfunction. 375 Builder.SetInsertPoint(ExitBB); 376 createCallCleanupThread(); 377 Builder.CreateRetVoid(); 378 379 Builder.SetInsertPoint(&*LoopBody); 380 *SubFnPtr = SubFn; 381 382 return IV; 383 } 384