xref: /llvm-project/polly/lib/CodeGen/LoopGenerators.cpp (revision 6292a808b3524d9ba6f4ce55bc5b9e547b088dd8)
1 //===------ LoopGenerators.cpp -  IR helper to create loops ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains functions to create scalar loops and orchestrate the
10 // creation of parallel loops as LLVM-IR.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "polly/CodeGen/LoopGenerators.h"
15 #include "polly/Options.h"
16 #include "polly/ScopDetection.h"
17 #include "llvm/Analysis/LoopInfo.h"
18 #include "llvm/IR/DataLayout.h"
19 #include "llvm/IR/DebugInfoMetadata.h"
20 #include "llvm/IR/Dominators.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
24 
25 using namespace llvm;
26 using namespace polly;
27 
28 int polly::PollyNumThreads;
29 OMPGeneralSchedulingType polly::PollyScheduling;
30 int polly::PollyChunkSize;
31 
32 static cl::opt<int, true>
33     XPollyNumThreads("polly-num-threads",
34                      cl::desc("Number of threads to use (0 = auto)"),
35                      cl::Hidden, cl::location(polly::PollyNumThreads),
36                      cl::init(0), cl::cat(PollyCategory));
37 
38 cl::opt<bool> PollyVectorizeMetadata(
39     "polly-annotate-metadata-vectorize",
40     cl::desc("Append vectorize enable/disable metadata from polly"),
41     cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
42 
43 static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling(
44     "polly-scheduling",
45     cl::desc("Scheduling type of parallel OpenMP for loops"),
46     cl::values(clEnumValN(OMPGeneralSchedulingType::StaticChunked, "static",
47                           "Static scheduling"),
48                clEnumValN(OMPGeneralSchedulingType::Dynamic, "dynamic",
49                           "Dynamic scheduling"),
50                clEnumValN(OMPGeneralSchedulingType::Guided, "guided",
51                           "Guided scheduling"),
52                clEnumValN(OMPGeneralSchedulingType::Runtime, "runtime",
53                           "Runtime determined (OMP_SCHEDULE)")),
54     cl::Hidden, cl::location(polly::PollyScheduling),
55     cl::init(OMPGeneralSchedulingType::Runtime), cl::Optional,
56     cl::cat(PollyCategory));
57 
58 static cl::opt<int, true>
59     XPollyChunkSize("polly-scheduling-chunksize",
60                     cl::desc("Chunksize to use by the OpenMP runtime calls"),
61                     cl::Hidden, cl::location(polly::PollyChunkSize),
62                     cl::init(0), cl::Optional, cl::cat(PollyCategory));
63 
64 // We generate a loop of either of the following structures:
65 //
66 //              BeforeBB                      BeforeBB
67 //                 |                             |
68 //                 v                             v
69 //              GuardBB                      PreHeaderBB
70 //              /      |                         |   _____
71 //     __  PreHeaderBB  |                        v  \/    |
72 //    /  \    /         |                     HeaderBB  latch
73 // latch  HeaderBB      |                        |\       |
74 //    \  /    \         /                        | \------/
75 //     <       \       /                         |
76 //              \     /                          v
77 //              ExitBB                         ExitBB
78 //
79 // depending on whether or not we know that it is executed at least once. If
80 // not, GuardBB checks if the loop is executed at least once. If this is the
81 // case we branch to PreHeaderBB and subsequently to the HeaderBB, which
82 // contains the loop iv 'polly.indvar', the incremented loop iv
83 // 'polly.indvar_next' as well as the condition to check if we execute another
84 // iteration of the loop. After the loop has finished, we branch to ExitBB.
85 // We expect the type of UB, LB, UB+Stride to be large enough for values that
86 // UB may take throughout the execution of the loop, including the computation
87 // of indvar + Stride before the final abort.
88 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
89                          PollyIRBuilder &Builder, LoopInfo &LI,
90                          DominatorTree &DT, BasicBlock *&ExitBB,
91                          ICmpInst::Predicate Predicate,
92                          ScopAnnotator *Annotator, bool Parallel, bool UseGuard,
93                          bool LoopVectDisabled) {
94   Function *F = Builder.GetInsertBlock()->getParent();
95   LLVMContext &Context = F->getContext();
96 
97   assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
98   IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
99   assert(LoopIVType && "UB is not integer?");
100 
101   BasicBlock *BeforeBB = Builder.GetInsertBlock();
102   BasicBlock *GuardBB =
103       UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr;
104   BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
105   BasicBlock *PreHeaderBB =
106       BasicBlock::Create(Context, "polly.loop_preheader", F);
107 
108   // Update LoopInfo
109   Loop *OuterLoop = LI.getLoopFor(BeforeBB);
110   Loop *NewLoop = LI.AllocateLoop();
111 
112   if (OuterLoop)
113     OuterLoop->addChildLoop(NewLoop);
114   else
115     LI.addTopLevelLoop(NewLoop);
116 
117   if (OuterLoop) {
118     if (GuardBB)
119       OuterLoop->addBasicBlockToLoop(GuardBB, LI);
120     OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI);
121   }
122 
123   NewLoop->addBasicBlockToLoop(HeaderBB, LI);
124 
125   // Notify the annotator (if present) that we have a new loop, but only
126   // after the header block is set.
127   if (Annotator)
128     Annotator->pushLoop(NewLoop, Parallel);
129 
130   // ExitBB
131   ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI);
132   ExitBB->setName("polly.loop_exit");
133 
134   // BeforeBB
135   if (GuardBB) {
136     BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
137     DT.addNewBlock(GuardBB, BeforeBB);
138 
139     // GuardBB
140     Builder.SetInsertPoint(GuardBB);
141     Value *LoopGuard;
142     LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
143     LoopGuard->setName("polly.loop_guard");
144     Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
145     DT.addNewBlock(PreHeaderBB, GuardBB);
146   } else {
147     BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB);
148     DT.addNewBlock(PreHeaderBB, BeforeBB);
149   }
150 
151   // PreHeaderBB
152   Builder.SetInsertPoint(PreHeaderBB);
153   Builder.CreateBr(HeaderBB);
154 
155   // HeaderBB
156   DT.addNewBlock(HeaderBB, PreHeaderBB);
157   Builder.SetInsertPoint(HeaderBB);
158   PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
159   IV->addIncoming(LB, PreHeaderBB);
160   Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
161   Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
162   Value *LoopCondition =
163       Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond");
164 
165   // Create the loop latch and annotate it as such.
166   BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
167 
168   // Don't annotate vectorize metadata when both LoopVectDisabled and
169   // PollyVectorizeMetadata are disabled. Annotate vectorize metadata to false
170   // when LoopVectDisabled is true. Otherwise we annotate the vectorize metadata
171   // to true.
172   if (Annotator) {
173     std::optional<bool> EnableVectorizeMetadata;
174     if (LoopVectDisabled)
175       EnableVectorizeMetadata = false;
176     else if (PollyVectorizeMetadata)
177       EnableVectorizeMetadata = true;
178     Annotator->annotateLoopLatch(B, Parallel, EnableVectorizeMetadata);
179   }
180 
181   IV->addIncoming(IncrementedIV, HeaderBB);
182   if (GuardBB)
183     DT.changeImmediateDominator(ExitBB, GuardBB);
184   else
185     DT.changeImmediateDominator(ExitBB, HeaderBB);
186 
187   // The loop body should be added here.
188   Builder.SetInsertPoint(HeaderBB->getFirstNonPHIIt());
189   return IV;
190 }
191 
192 Value *ParallelLoopGenerator::createParallelLoop(
193     Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
194     ValueMapT &Map, BasicBlock::iterator *LoopBody) {
195 
196   AllocaInst *Struct = storeValuesIntoStruct(UsedValues);
197   BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
198 
199   Value *IV;
200   Function *SubFn;
201   std::tie(IV, SubFn) = createSubFn(Stride, Struct, UsedValues, Map);
202   *LoopBody = Builder.GetInsertPoint();
203   Builder.SetInsertPoint(&*BeforeLoop);
204 
205   // Add one as the upper bound provided by OpenMP is a < comparison
206   // whereas the codegenForSequential function creates a <= comparison.
207   UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
208 
209   // Execute the prepared subfunction in parallel.
210   deployParallelExecution(SubFn, Struct, LB, UB, Stride);
211 
212   return IV;
213 }
214 
215 Function *ParallelLoopGenerator::createSubFnDefinition() {
216   Function *F = Builder.GetInsertBlock()->getParent();
217   Function *SubFn = prepareSubFnDefinition(F);
218 
219   // Certain backends (e.g., NVPTX) do not support '.'s in function names.
220   // Hence, we ensure that all '.'s are replaced by '_'s.
221   std::string FunctionName = SubFn->getName().str();
222   std::replace(FunctionName.begin(), FunctionName.end(), '.', '_');
223   SubFn->setName(FunctionName);
224 
225   // Do not run any polly pass on the new function.
226   SubFn->addFnAttr(PollySkipFnAttr);
227 
228   return SubFn;
229 }
230 
231 AllocaInst *
232 ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
233   SmallVector<Type *, 8> Members;
234 
235   for (Value *V : Values)
236     Members.push_back(V->getType());
237 
238   const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
239 
240   // We do not want to allocate the alloca inside any loop, thus we allocate it
241   // in the entry block of the function and use annotations to denote the actual
242   // live span (similar to clang).
243   BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock();
244   BasicBlock::iterator IP = EntryBB.getFirstInsertionPt();
245   StructType *Ty = StructType::get(Builder.getContext(), Members);
246   AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
247                                       "polly.par.userContext", IP);
248 
249   for (unsigned i = 0; i < Values.size(); i++) {
250     Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
251     Address->setName("polly.subfn.storeaddr." + Values[i]->getName());
252     Builder.CreateStore(Values[i], Address);
253   }
254 
255   return Struct;
256 }
257 
258 void ParallelLoopGenerator::extractValuesFromStruct(
259     SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) {
260   for (unsigned i = 0; i < OldValues.size(); i++) {
261     Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
262     Type *ElemTy = cast<GetElementPtrInst>(Address)->getResultElementType();
263     Value *NewValue = Builder.CreateLoad(ElemTy, Address);
264     NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName());
265     Map[OldValues[i]] = NewValue;
266   }
267 }
268 
269 DebugLoc polly::createDebugLocForGeneratedCode(Function *F) {
270   if (!F)
271     return DebugLoc();
272 
273   LLVMContext &Ctx = F->getContext();
274   DISubprogram *DILScope =
275       dyn_cast_or_null<DISubprogram>(F->getMetadata(LLVMContext::MD_dbg));
276   if (!DILScope)
277     return DebugLoc();
278   return DILocation::get(Ctx, 0, 0, DILScope);
279 }
280