xref: /llvm-project/polly/lib/CodeGen/LoopGenerators.cpp (revision 4fe342cb7560993988dd6f978607a2442a905910)
1 //===------ LoopGenerators.cpp -  IR helper to create loops ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains functions to create scalar and parallel loops as LLVM-IR.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "polly/CodeGen/LoopGenerators.h"
15 #include "polly/ScopDetection.h"
16 #include "llvm/Analysis/LoopInfo.h"
17 #include "llvm/IR/DataLayout.h"
18 #include "llvm/IR/Dominators.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/IR/PatternMatch.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
23 
24 using namespace llvm;
25 using namespace polly;
26 using namespace PatternMatch;
27 
28 static cl::opt<int>
29     PollyNumThreads("polly-num-threads",
30                     cl::desc("Number of threads to use (0 = auto)"), cl::Hidden,
31                     cl::init(0));
32 
33 // We generate a loop of either of the following structures:
34 //
35 //              BeforeBB                      BeforeBB
36 //                 |                             |
37 //                 v                             v
38 //              GuardBB                      PreHeaderBB
39 //              /      |                         |   _____
40 //     __  PreHeaderBB  |                        v  \/    |
41 //    /  \    /         |                     HeaderBB  latch
42 // latch  HeaderBB      |                        |\       |
43 //    \  /    \         /                        | \------/
44 //     <       \       /                         |
45 //              \     /                          v
46 //              ExitBB                         ExitBB
47 //
48 // depending on whether or not we know that it is executed at least once. If
49 // not, GuardBB checks if the loop is executed at least once. If this is the
50 // case we branch to PreHeaderBB and subsequently to the HeaderBB, which
51 // contains the loop iv 'polly.indvar', the incremented loop iv
52 // 'polly.indvar_next' as well as the condition to check if we execute another
53 // iteration of the loop. After the loop has finished, we branch to ExitBB.
54 // We expect the type of UB, LB, UB+Stride to be large enough for values that
55 // UB may take throughout the execution of the loop, including the computation
56 // of indvar + Stride before the final abort.
57 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
58                          PollyIRBuilder &Builder, LoopInfo &LI,
59                          DominatorTree &DT, BasicBlock *&ExitBB,
60                          ICmpInst::Predicate Predicate,
61                          ScopAnnotator *Annotator, bool Parallel,
62                          bool UseGuard) {
63   Function *F = Builder.GetInsertBlock()->getParent();
64   LLVMContext &Context = F->getContext();
65 
66   assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
67   IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
68   assert(LoopIVType && "UB is not integer?");
69 
70   BasicBlock *BeforeBB = Builder.GetInsertBlock();
71   BasicBlock *GuardBB =
72       UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr;
73   BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
74   BasicBlock *PreHeaderBB =
75       BasicBlock::Create(Context, "polly.loop_preheader", F);
76 
77   // Update LoopInfo
78   Loop *OuterLoop = LI.getLoopFor(BeforeBB);
79   Loop *NewLoop = new Loop();
80 
81   if (OuterLoop)
82     OuterLoop->addChildLoop(NewLoop);
83   else
84     LI.addTopLevelLoop(NewLoop);
85 
86   if (OuterLoop) {
87     if (GuardBB)
88       OuterLoop->addBasicBlockToLoop(GuardBB, LI);
89     OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI);
90   }
91 
92   NewLoop->addBasicBlockToLoop(HeaderBB, LI);
93 
94   // Notify the annotator (if present) that we have a new loop, but only
95   // after the header block is set.
96   if (Annotator)
97     Annotator->pushLoop(NewLoop, Parallel);
98 
99   // ExitBB
100   ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI);
101   ExitBB->setName("polly.loop_exit");
102 
103   // BeforeBB
104   if (GuardBB) {
105     BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
106     DT.addNewBlock(GuardBB, BeforeBB);
107 
108     // GuardBB
109     Builder.SetInsertPoint(GuardBB);
110     Value *LoopGuard;
111     LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
112     LoopGuard->setName("polly.loop_guard");
113     Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
114     DT.addNewBlock(PreHeaderBB, GuardBB);
115   } else {
116     BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB);
117     DT.addNewBlock(PreHeaderBB, BeforeBB);
118   }
119 
120   // PreHeaderBB
121   Builder.SetInsertPoint(PreHeaderBB);
122   Builder.CreateBr(HeaderBB);
123 
124   // HeaderBB
125   DT.addNewBlock(HeaderBB, PreHeaderBB);
126   Builder.SetInsertPoint(HeaderBB);
127   PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
128   IV->addIncoming(LB, PreHeaderBB);
129   Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
130   Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
131   Value *LoopCondition =
132       Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond");
133 
134   // Create the loop latch and annotate it as such.
135   BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
136   if (Annotator)
137     Annotator->annotateLoopLatch(B, NewLoop, Parallel);
138 
139   IV->addIncoming(IncrementedIV, HeaderBB);
140   if (GuardBB)
141     DT.changeImmediateDominator(ExitBB, GuardBB);
142   else
143     DT.changeImmediateDominator(ExitBB, HeaderBB);
144 
145   // The loop body should be added here.
146   Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
147   return IV;
148 }
149 
150 Value *ParallelLoopGenerator::createParallelLoop(
151     Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
152     ValueMapT &Map, BasicBlock::iterator *LoopBody) {
153   Function *SubFn;
154 
155   AllocaInst *Struct = storeValuesIntoStruct(UsedValues);
156   BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
157   Value *IV = createSubFn(Stride, Struct, UsedValues, Map, &SubFn);
158   *LoopBody = Builder.GetInsertPoint();
159   Builder.SetInsertPoint(&*BeforeLoop);
160 
161   Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
162                                             "polly.par.userContext");
163 
164   // Add one as the upper bound provided by openmp is a < comparison
165   // whereas the codegenForSequential function creates a <= comparison.
166   UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
167 
168   // Tell the runtime we start a parallel loop
169   createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
170   Builder.CreateCall(SubFn, SubFnParam);
171   createCallJoinThreads();
172 
173   return IV;
174 }
175 
176 void ParallelLoopGenerator::createCallSpawnThreads(Value *SubFn,
177                                                    Value *SubFnParam, Value *LB,
178                                                    Value *UB, Value *Stride) {
179   const std::string Name = "GOMP_parallel_loop_runtime_start";
180 
181   Function *F = M->getFunction(Name);
182 
183   // If F is not available, declare it.
184   if (!F) {
185     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
186 
187     Type *Params[] = {PointerType::getUnqual(FunctionType::get(
188                           Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
189                       Builder.getInt8PtrTy(),
190                       Builder.getInt32Ty(),
191                       LongType,
192                       LongType,
193                       LongType};
194 
195     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
196     F = Function::Create(Ty, Linkage, Name, M);
197   }
198 
199   Value *NumberOfThreads = Builder.getInt32(PollyNumThreads);
200   Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride};
201 
202   Builder.CreateCall(F, Args);
203 }
204 
205 Value *ParallelLoopGenerator::createCallGetWorkItem(Value *LBPtr,
206                                                     Value *UBPtr) {
207   const std::string Name = "GOMP_loop_runtime_next";
208 
209   Function *F = M->getFunction(Name);
210 
211   // If F is not available, declare it.
212   if (!F) {
213     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
214     Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()};
215     FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
216     F = Function::Create(Ty, Linkage, Name, M);
217   }
218 
219   Value *Args[] = {LBPtr, UBPtr};
220   Value *Return = Builder.CreateCall(F, Args);
221   Return = Builder.CreateICmpNE(
222       Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
223   return Return;
224 }
225 
226 void ParallelLoopGenerator::createCallJoinThreads() {
227   const std::string Name = "GOMP_parallel_end";
228 
229   Function *F = M->getFunction(Name);
230 
231   // If F is not available, declare it.
232   if (!F) {
233     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
234 
235     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
236     F = Function::Create(Ty, Linkage, Name, M);
237   }
238 
239   Builder.CreateCall(F, {});
240 }
241 
242 void ParallelLoopGenerator::createCallCleanupThread() {
243   const std::string Name = "GOMP_loop_end_nowait";
244 
245   Function *F = M->getFunction(Name);
246 
247   // If F is not available, declare it.
248   if (!F) {
249     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
250 
251     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
252     F = Function::Create(Ty, Linkage, Name, M);
253   }
254 
255   Builder.CreateCall(F, {});
256 }
257 
258 Function *ParallelLoopGenerator::createSubFnDefinition() {
259   Function *F = Builder.GetInsertBlock()->getParent();
260   std::vector<Type *> Arguments(1, Builder.getInt8PtrTy());
261   FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
262   Function *SubFn = Function::Create(FT, Function::InternalLinkage,
263                                      F->getName() + "_polly_subfn", M);
264 
265   // Certain backends (e.g., NVPTX) do not support '.'s in function names.
266   // Hence, we ensure that all '.'s are replaced by '_'s.
267   std::string FunctionName = SubFn->getName();
268   std::replace(FunctionName.begin(), FunctionName.end(), '.', '_');
269   SubFn->setName(FunctionName);
270 
271   // Do not run any polly pass on the new function.
272   SubFn->addFnAttr(PollySkipFnAttr);
273 
274   Function::arg_iterator AI = SubFn->arg_begin();
275   AI->setName("polly.par.userContext");
276 
277   return SubFn;
278 }
279 
280 AllocaInst *
281 ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
282   SmallVector<Type *, 8> Members;
283 
284   for (Value *V : Values)
285     Members.push_back(V->getType());
286 
287   const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
288 
289   // We do not want to allocate the alloca inside any loop, thus we allocate it
290   // in the entry block of the function and use annotations to denote the actual
291   // live span (similar to clang).
292   BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock();
293   Instruction *IP = &*EntryBB.getFirstInsertionPt();
294   StructType *Ty = StructType::get(Builder.getContext(), Members);
295   AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
296                                       "polly.par.userContext", IP);
297 
298   for (unsigned i = 0; i < Values.size(); i++) {
299     Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
300     Address->setName("polly.subfn.storeaddr." + Values[i]->getName());
301     Builder.CreateStore(Values[i], Address);
302   }
303 
304   return Struct;
305 }
306 
307 void ParallelLoopGenerator::extractValuesFromStruct(
308     SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) {
309   for (unsigned i = 0; i < OldValues.size(); i++) {
310     Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
311     Value *NewValue = Builder.CreateLoad(Address);
312     NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName());
313     Map[OldValues[i]] = NewValue;
314   }
315 }
316 
317 Value *ParallelLoopGenerator::createSubFn(Value *Stride, AllocaInst *StructData,
318                                           SetVector<Value *> Data,
319                                           ValueMapT &Map, Function **SubFnPtr) {
320   BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB;
321   Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV;
322   Function *SubFn = createSubFnDefinition();
323   LLVMContext &Context = SubFn->getContext();
324 
325   // Store the previous basic block.
326   PrevBB = Builder.GetInsertBlock();
327 
328   // Create basic blocks.
329   HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
330   ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
331   CheckNextBB = BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
332   PreHeaderBB = BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
333 
334   DT.addNewBlock(HeaderBB, PrevBB);
335   DT.addNewBlock(ExitBB, HeaderBB);
336   DT.addNewBlock(CheckNextBB, HeaderBB);
337   DT.addNewBlock(PreHeaderBB, HeaderBB);
338 
339   // Fill up basic block HeaderBB.
340   Builder.SetInsertPoint(HeaderBB);
341   LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
342   UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
343   UserContext = Builder.CreateBitCast(
344       &*SubFn->arg_begin(), StructData->getType(), "polly.par.userContext");
345 
346   extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext,
347                           Map);
348   Builder.CreateBr(CheckNextBB);
349 
350   // Add code to check if another set of iterations will be executed.
351   Builder.SetInsertPoint(CheckNextBB);
352   Ret1 = createCallGetWorkItem(LBPtr, UBPtr);
353   HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(),
354                                         "polly.par.hasNextScheduleBlock");
355   Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB);
356 
357   // Add code to load the iv bounds for this set of iterations.
358   Builder.SetInsertPoint(PreHeaderBB);
359   LB = Builder.CreateLoad(LBPtr, "polly.par.LB");
360   UB = Builder.CreateLoad(UBPtr, "polly.par.UB");
361 
362   // Subtract one as the upper bound provided by openmp is a < comparison
363   // whereas the codegenForSequential function creates a <= comparison.
364   UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1),
365                          "polly.par.UBAdjusted");
366 
367   Builder.CreateBr(CheckNextBB);
368   Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
369   IV = createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE,
370                   nullptr, true, /* UseGuard */ false);
371 
372   BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
373 
374   // Add code to terminate this subfunction.
375   Builder.SetInsertPoint(ExitBB);
376   createCallCleanupThread();
377   Builder.CreateRetVoid();
378 
379   Builder.SetInsertPoint(&*LoopBody);
380   *SubFnPtr = SubFn;
381 
382   return IV;
383 }
384