1 //===------ LoopGenerators.cpp - IR helper to create loops ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains functions to create scalar and parallel loops as LLVM-IR. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "polly/CodeGen/LoopGenerators.h" 15 #include "polly/ScopDetection.h" 16 #include "llvm/Analysis/LoopInfo.h" 17 #include "llvm/IR/DataLayout.h" 18 #include "llvm/IR/Dominators.h" 19 #include "llvm/IR/Module.h" 20 #include "llvm/Support/CommandLine.h" 21 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 22 23 using namespace llvm; 24 using namespace polly; 25 26 static cl::opt<int> 27 PollyNumThreads("polly-num-threads", 28 cl::desc("Number of threads to use (0 = auto)"), cl::Hidden, 29 cl::init(0)); 30 31 // We generate a loop of either of the following structures: 32 // 33 // BeforeBB BeforeBB 34 // | | 35 // v v 36 // GuardBB PreHeaderBB 37 // / | | _____ 38 // __ PreHeaderBB | v \/ | 39 // / \ / | HeaderBB latch 40 // latch HeaderBB | |\ | 41 // \ / \ / | \------/ 42 // < \ / | 43 // \ / v 44 // ExitBB ExitBB 45 // 46 // depending on whether or not we know that it is executed at least once. If 47 // not, GuardBB checks if the loop is executed at least once. If this is the 48 // case we branch to PreHeaderBB and subsequently to the HeaderBB, which 49 // contains the loop iv 'polly.indvar', the incremented loop iv 50 // 'polly.indvar_next' as well as the condition to check if we execute another 51 // iteration of the loop. After the loop has finished, we branch to ExitBB. 52 // We expect the type of UB, LB, UB+Stride to be large enough for values that 53 // UB may take throughout the execution of the loop, including the computation 54 // of indvar + Stride before the final abort. 55 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, 56 PollyIRBuilder &Builder, LoopInfo &LI, 57 DominatorTree &DT, BasicBlock *&ExitBB, 58 ICmpInst::Predicate Predicate, 59 ScopAnnotator *Annotator, bool Parallel, 60 bool UseGuard) { 61 Function *F = Builder.GetInsertBlock()->getParent(); 62 LLVMContext &Context = F->getContext(); 63 64 assert(LB->getType() == UB->getType() && "Types of loop bounds do not match"); 65 IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType()); 66 assert(LoopIVType && "UB is not integer?"); 67 68 BasicBlock *BeforeBB = Builder.GetInsertBlock(); 69 BasicBlock *GuardBB = 70 UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr; 71 BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F); 72 BasicBlock *PreHeaderBB = 73 BasicBlock::Create(Context, "polly.loop_preheader", F); 74 75 // Update LoopInfo 76 Loop *OuterLoop = LI.getLoopFor(BeforeBB); 77 Loop *NewLoop = new Loop(); 78 79 if (OuterLoop) 80 OuterLoop->addChildLoop(NewLoop); 81 else 82 LI.addTopLevelLoop(NewLoop); 83 84 if (OuterLoop) { 85 if (GuardBB) 86 OuterLoop->addBasicBlockToLoop(GuardBB, LI); 87 OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI); 88 } 89 90 NewLoop->addBasicBlockToLoop(HeaderBB, LI); 91 92 // Notify the annotator (if present) that we have a new loop, but only 93 // after the header block is set. 94 if (Annotator) 95 Annotator->pushLoop(NewLoop, Parallel); 96 97 // ExitBB 98 ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI); 99 ExitBB->setName("polly.loop_exit"); 100 101 // BeforeBB 102 if (GuardBB) { 103 BeforeBB->getTerminator()->setSuccessor(0, GuardBB); 104 DT.addNewBlock(GuardBB, BeforeBB); 105 106 // GuardBB 107 Builder.SetInsertPoint(GuardBB); 108 Value *LoopGuard; 109 LoopGuard = Builder.CreateICmp(Predicate, LB, UB); 110 LoopGuard->setName("polly.loop_guard"); 111 Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB); 112 DT.addNewBlock(PreHeaderBB, GuardBB); 113 } else { 114 BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB); 115 DT.addNewBlock(PreHeaderBB, BeforeBB); 116 } 117 118 // PreHeaderBB 119 Builder.SetInsertPoint(PreHeaderBB); 120 Builder.CreateBr(HeaderBB); 121 122 // HeaderBB 123 DT.addNewBlock(HeaderBB, PreHeaderBB); 124 Builder.SetInsertPoint(HeaderBB); 125 PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar"); 126 IV->addIncoming(LB, PreHeaderBB); 127 Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType); 128 Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next"); 129 Value *LoopCondition = 130 Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond"); 131 132 // Create the loop latch and annotate it as such. 133 BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); 134 if (Annotator) 135 Annotator->annotateLoopLatch(B, NewLoop, Parallel); 136 137 IV->addIncoming(IncrementedIV, HeaderBB); 138 if (GuardBB) 139 DT.changeImmediateDominator(ExitBB, GuardBB); 140 else 141 DT.changeImmediateDominator(ExitBB, HeaderBB); 142 143 // The loop body should be added here. 144 Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); 145 return IV; 146 } 147 148 Value *ParallelLoopGenerator::createParallelLoop( 149 Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues, 150 ValueMapT &Map, BasicBlock::iterator *LoopBody) { 151 Function *SubFn; 152 153 AllocaInst *Struct = storeValuesIntoStruct(UsedValues); 154 BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint(); 155 Value *IV = createSubFn(Stride, Struct, UsedValues, Map, &SubFn); 156 *LoopBody = Builder.GetInsertPoint(); 157 Builder.SetInsertPoint(&*BeforeLoop); 158 159 Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), 160 "polly.par.userContext"); 161 162 // Add one as the upper bound provided by openmp is a < comparison 163 // whereas the codegenForSequential function creates a <= comparison. 164 UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1)); 165 166 // Tell the runtime we start a parallel loop 167 createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride); 168 Builder.CreateCall(SubFn, SubFnParam); 169 createCallJoinThreads(); 170 171 return IV; 172 } 173 174 void ParallelLoopGenerator::createCallSpawnThreads(Value *SubFn, 175 Value *SubFnParam, Value *LB, 176 Value *UB, Value *Stride) { 177 const std::string Name = "GOMP_parallel_loop_runtime_start"; 178 179 Function *F = M->getFunction(Name); 180 181 // If F is not available, declare it. 182 if (!F) { 183 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 184 185 Type *Params[] = {PointerType::getUnqual(FunctionType::get( 186 Builder.getVoidTy(), Builder.getInt8PtrTy(), false)), 187 Builder.getInt8PtrTy(), 188 Builder.getInt32Ty(), 189 LongType, 190 LongType, 191 LongType}; 192 193 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 194 F = Function::Create(Ty, Linkage, Name, M); 195 } 196 197 Value *NumberOfThreads = Builder.getInt32(PollyNumThreads); 198 Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride}; 199 200 Builder.CreateCall(F, Args); 201 } 202 203 Value *ParallelLoopGenerator::createCallGetWorkItem(Value *LBPtr, 204 Value *UBPtr) { 205 const std::string Name = "GOMP_loop_runtime_next"; 206 207 Function *F = M->getFunction(Name); 208 209 // If F is not available, declare it. 210 if (!F) { 211 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 212 Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()}; 213 FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false); 214 F = Function::Create(Ty, Linkage, Name, M); 215 } 216 217 Value *Args[] = {LBPtr, UBPtr}; 218 Value *Return = Builder.CreateCall(F, Args); 219 Return = Builder.CreateICmpNE( 220 Return, Builder.CreateZExt(Builder.getFalse(), Return->getType())); 221 return Return; 222 } 223 224 void ParallelLoopGenerator::createCallJoinThreads() { 225 const std::string Name = "GOMP_parallel_end"; 226 227 Function *F = M->getFunction(Name); 228 229 // If F is not available, declare it. 230 if (!F) { 231 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 232 233 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 234 F = Function::Create(Ty, Linkage, Name, M); 235 } 236 237 Builder.CreateCall(F, {}); 238 } 239 240 void ParallelLoopGenerator::createCallCleanupThread() { 241 const std::string Name = "GOMP_loop_end_nowait"; 242 243 Function *F = M->getFunction(Name); 244 245 // If F is not available, declare it. 246 if (!F) { 247 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 248 249 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 250 F = Function::Create(Ty, Linkage, Name, M); 251 } 252 253 Builder.CreateCall(F, {}); 254 } 255 256 Function *ParallelLoopGenerator::createSubFnDefinition() { 257 Function *F = Builder.GetInsertBlock()->getParent(); 258 std::vector<Type *> Arguments(1, Builder.getInt8PtrTy()); 259 FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false); 260 Function *SubFn = Function::Create(FT, Function::InternalLinkage, 261 F->getName() + "_polly_subfn", M); 262 263 // Certain backends (e.g., NVPTX) do not support '.'s in function names. 264 // Hence, we ensure that all '.'s are replaced by '_'s. 265 std::string FunctionName = SubFn->getName(); 266 std::replace(FunctionName.begin(), FunctionName.end(), '.', '_'); 267 SubFn->setName(FunctionName); 268 269 // Do not run any polly pass on the new function. 270 SubFn->addFnAttr(PollySkipFnAttr); 271 272 Function::arg_iterator AI = SubFn->arg_begin(); 273 AI->setName("polly.par.userContext"); 274 275 return SubFn; 276 } 277 278 AllocaInst * 279 ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) { 280 SmallVector<Type *, 8> Members; 281 282 for (Value *V : Values) 283 Members.push_back(V->getType()); 284 285 const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); 286 287 // We do not want to allocate the alloca inside any loop, thus we allocate it 288 // in the entry block of the function and use annotations to denote the actual 289 // live span (similar to clang). 290 BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock(); 291 Instruction *IP = &*EntryBB.getFirstInsertionPt(); 292 StructType *Ty = StructType::get(Builder.getContext(), Members); 293 AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr, 294 "polly.par.userContext", IP); 295 296 for (unsigned i = 0; i < Values.size(); i++) { 297 Value *Address = Builder.CreateStructGEP(Ty, Struct, i); 298 Address->setName("polly.subfn.storeaddr." + Values[i]->getName()); 299 Builder.CreateStore(Values[i], Address); 300 } 301 302 return Struct; 303 } 304 305 void ParallelLoopGenerator::extractValuesFromStruct( 306 SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) { 307 for (unsigned i = 0; i < OldValues.size(); i++) { 308 Value *Address = Builder.CreateStructGEP(Ty, Struct, i); 309 Value *NewValue = Builder.CreateLoad(Address); 310 NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName()); 311 Map[OldValues[i]] = NewValue; 312 } 313 } 314 315 Value *ParallelLoopGenerator::createSubFn(Value *Stride, AllocaInst *StructData, 316 SetVector<Value *> Data, 317 ValueMapT &Map, Function **SubFnPtr) { 318 BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB; 319 Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV; 320 Function *SubFn = createSubFnDefinition(); 321 LLVMContext &Context = SubFn->getContext(); 322 323 // Store the previous basic block. 324 PrevBB = Builder.GetInsertBlock(); 325 326 // Create basic blocks. 327 HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn); 328 ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn); 329 CheckNextBB = BasicBlock::Create(Context, "polly.par.checkNext", SubFn); 330 PreHeaderBB = BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn); 331 332 DT.addNewBlock(HeaderBB, PrevBB); 333 DT.addNewBlock(ExitBB, HeaderBB); 334 DT.addNewBlock(CheckNextBB, HeaderBB); 335 DT.addNewBlock(PreHeaderBB, HeaderBB); 336 337 // Fill up basic block HeaderBB. 338 Builder.SetInsertPoint(HeaderBB); 339 LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr"); 340 UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr"); 341 UserContext = Builder.CreateBitCast( 342 &*SubFn->arg_begin(), StructData->getType(), "polly.par.userContext"); 343 344 extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext, 345 Map); 346 Builder.CreateBr(CheckNextBB); 347 348 // Add code to check if another set of iterations will be executed. 349 Builder.SetInsertPoint(CheckNextBB); 350 Ret1 = createCallGetWorkItem(LBPtr, UBPtr); 351 HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(), 352 "polly.par.hasNextScheduleBlock"); 353 Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB); 354 355 // Add code to load the iv bounds for this set of iterations. 356 Builder.SetInsertPoint(PreHeaderBB); 357 LB = Builder.CreateLoad(LBPtr, "polly.par.LB"); 358 UB = Builder.CreateLoad(UBPtr, "polly.par.UB"); 359 360 // Subtract one as the upper bound provided by openmp is a < comparison 361 // whereas the codegenForSequential function creates a <= comparison. 362 UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1), 363 "polly.par.UBAdjusted"); 364 365 Builder.CreateBr(CheckNextBB); 366 Builder.SetInsertPoint(&*--Builder.GetInsertPoint()); 367 IV = createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE, 368 nullptr, true, /* UseGuard */ false); 369 370 BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); 371 372 // Add code to terminate this subfunction. 373 Builder.SetInsertPoint(ExitBB); 374 createCallCleanupThread(); 375 Builder.CreateRetVoid(); 376 377 Builder.SetInsertPoint(&*LoopBody); 378 *SubFnPtr = SubFn; 379 380 return IV; 381 } 382