xref: /llvm-project/polly/lib/CodeGen/LoopGeneratorsGOMP.cpp (revision 467a9bde06e681cecc69afa18580aadf2ed9769b)
1 //===------ LoopGeneratorsGOMP.cpp - IR helper to create loops ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains functions to create parallel loops as LLVM-IR.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "polly/CodeGen/LoopGeneratorsGOMP.h"
14 #include "llvm/Analysis/LoopInfo.h"
15 #include "llvm/IR/Dominators.h"
16 #include "llvm/IR/Module.h"
17 
18 using namespace llvm;
19 using namespace polly;
20 
21 void ParallelLoopGeneratorGOMP::createCallSpawnThreads(Value *SubFn,
22                                                        Value *SubFnParam,
23                                                        Value *LB, Value *UB,
24                                                        Value *Stride) {
25   const std::string Name = "GOMP_parallel_loop_runtime_start";
26 
27   Function *F = M->getFunction(Name);
28 
29   // If F is not available, declare it.
30   if (!F) {
31     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
32 
33     Type *Params[] = {PointerType::getUnqual(FunctionType::get(
34                           Builder.getVoidTy(), Builder.getPtrTy(), false)),
35                       Builder.getPtrTy(),
36                       Builder.getInt32Ty(),
37                       LongType,
38                       LongType,
39                       LongType};
40 
41     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
42     F = Function::Create(Ty, Linkage, Name, M);
43   }
44 
45   Value *Args[] = {SubFn, SubFnParam, Builder.getInt32(PollyNumThreads),
46                    LB,    UB,         Stride};
47 
48   CallInst *Call = Builder.CreateCall(F, Args);
49   Call->setDebugLoc(DLGenerated);
50 }
51 
52 void ParallelLoopGeneratorGOMP::deployParallelExecution(Function *SubFn,
53                                                         Value *SubFnParam,
54                                                         Value *LB, Value *UB,
55                                                         Value *Stride) {
56   // Tell the runtime we start a parallel loop
57   createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
58   CallInst *Call = Builder.CreateCall(SubFn, SubFnParam);
59   Call->setDebugLoc(DLGenerated);
60   createCallJoinThreads();
61 }
62 
63 Function *ParallelLoopGeneratorGOMP::prepareSubFnDefinition(Function *F) const {
64   FunctionType *FT =
65       FunctionType::get(Builder.getVoidTy(), {Builder.getPtrTy()}, false);
66   Function *SubFn = Function::Create(FT, Function::InternalLinkage,
67                                      F->getName() + "_polly_subfn", M);
68   // Name the function's arguments
69   SubFn->arg_begin()->setName("polly.par.userContext");
70   return SubFn;
71 }
72 
73 // Create a subfunction of the following (preliminary) structure:
74 //
75 //    PrevBB
76 //       |
77 //       v
78 //    HeaderBB
79 //       |   _____
80 //       v  v    |
81 //   CheckNextBB  PreHeaderBB
82 //       |\       |
83 //       | \______/
84 //       |
85 //       v
86 //     ExitBB
87 //
88 // HeaderBB will hold allocations and loading of variables.
89 // CheckNextBB will check for more work.
90 // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB.
91 // PreHeaderBB loads the new boundaries (& will lead to the loop body later on).
92 // ExitBB marks the end of the parallel execution.
93 std::tuple<Value *, Function *>
94 ParallelLoopGeneratorGOMP::createSubFn(Value *Stride, AllocaInst *StructData,
95                                        SetVector<Value *> Data,
96                                        ValueMapT &Map) {
97   if (PollyScheduling != OMPGeneralSchedulingType::Runtime) {
98     // User tried to influence the scheduling type (currently not supported)
99     errs() << "warning: Polly's GNU OpenMP backend solely "
100               "supports the scheduling type 'runtime'.\n";
101   }
102 
103   if (PollyChunkSize != 0) {
104     // User tried to influence the chunk size (currently not supported)
105     errs() << "warning: Polly's GNU OpenMP backend solely "
106               "supports the default chunk size.\n";
107   }
108 
109   Function *SubFn = createSubFnDefinition();
110   LLVMContext &Context = SubFn->getContext();
111 
112   // Create basic blocks.
113   BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
114   SubFnDT = std::make_unique<DominatorTree>(*SubFn);
115   SubFnLI = std::make_unique<LoopInfo>(*SubFnDT);
116 
117   BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
118   BasicBlock *CheckNextBB =
119       BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
120   BasicBlock *PreHeaderBB =
121       BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
122 
123   SubFnDT->addNewBlock(ExitBB, HeaderBB);
124   SubFnDT->addNewBlock(CheckNextBB, HeaderBB);
125   SubFnDT->addNewBlock(PreHeaderBB, HeaderBB);
126 
127   // Fill up basic block HeaderBB.
128   Builder.SetInsertPoint(HeaderBB);
129   Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
130   Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
131   Value *UserContext = &*SubFn->arg_begin();
132 
133   extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext,
134                           Map);
135   Builder.CreateBr(CheckNextBB);
136 
137   // Add code to check if another set of iterations will be executed.
138   Builder.SetInsertPoint(CheckNextBB);
139   Value *Next = createCallGetWorkItem(LBPtr, UBPtr);
140   Value *HasNextSchedule = Builder.CreateTrunc(
141       Next, Builder.getInt1Ty(), "polly.par.hasNextScheduleBlock");
142   Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB);
143 
144   // Add code to load the iv bounds for this set of iterations.
145   Builder.SetInsertPoint(PreHeaderBB);
146   Value *LB = Builder.CreateLoad(LongType, LBPtr, "polly.par.LB");
147   Value *UB = Builder.CreateLoad(LongType, UBPtr, "polly.par.UB");
148 
149   // Subtract one as the upper bound provided by OpenMP is a < comparison
150   // whereas the codegenForSequential function creates a <= comparison.
151   UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1),
152                          "polly.par.UBAdjusted");
153 
154   Builder.CreateBr(CheckNextBB);
155   Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
156   BasicBlock *AfterBB;
157   Value *IV =
158       createLoop(LB, UB, Stride, Builder, *SubFnLI, *SubFnDT, AfterBB,
159                  ICmpInst::ICMP_SLE, nullptr, true, /* UseGuard */ false);
160 
161   BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
162 
163   // Add code to terminate this subfunction.
164   Builder.SetInsertPoint(ExitBB);
165   createCallCleanupThread();
166   Builder.CreateRetVoid();
167 
168   Builder.SetInsertPoint(&*LoopBody);
169 
170   // FIXME: Call SubFnDT->verify() and SubFnLI->verify() to check that the
171   // DominatorTree/LoopInfo has been created correctly. Alternatively, recreate
172   // from scratch since it is not needed here directly.
173 
174   return std::make_tuple(IV, SubFn);
175 }
176 
177 Value *ParallelLoopGeneratorGOMP::createCallGetWorkItem(Value *LBPtr,
178                                                         Value *UBPtr) {
179   const std::string Name = "GOMP_loop_runtime_next";
180 
181   Function *F = M->getFunction(Name);
182 
183   // If F is not available, declare it.
184   if (!F) {
185     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
186     Type *Params[] = {Builder.getPtrTy(0), Builder.getPtrTy(0)};
187     FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
188     F = Function::Create(Ty, Linkage, Name, M);
189   }
190 
191   Value *Args[] = {LBPtr, UBPtr};
192   CallInst *Call = Builder.CreateCall(F, Args);
193   Call->setDebugLoc(DLGenerated);
194   Value *Return = Builder.CreateICmpNE(
195       Call, Builder.CreateZExt(Builder.getFalse(), Call->getType()));
196   return Return;
197 }
198 
199 void ParallelLoopGeneratorGOMP::createCallJoinThreads() {
200   const std::string Name = "GOMP_parallel_end";
201 
202   Function *F = M->getFunction(Name);
203 
204   // If F is not available, declare it.
205   if (!F) {
206     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
207 
208     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
209     F = Function::Create(Ty, Linkage, Name, M);
210   }
211 
212   CallInst *Call = Builder.CreateCall(F, {});
213   Call->setDebugLoc(DLGenerated);
214 }
215 
216 void ParallelLoopGeneratorGOMP::createCallCleanupThread() {
217   const std::string Name = "GOMP_loop_end_nowait";
218 
219   Function *F = M->getFunction(Name);
220 
221   // If F is not available, declare it.
222   if (!F) {
223     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
224 
225     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
226     F = Function::Create(Ty, Linkage, Name, M);
227   }
228 
229   CallInst *Call = Builder.CreateCall(F, {});
230   Call->setDebugLoc(DLGenerated);
231 }
232