1 //===------ LoopGeneratorsGOMP.cpp - IR helper to create loops ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains functions to create parallel loops as LLVM-IR. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "polly/CodeGen/LoopGeneratorsGOMP.h" 14 #include "llvm/Analysis/LoopInfo.h" 15 #include "llvm/IR/Dominators.h" 16 #include "llvm/IR/Module.h" 17 18 using namespace llvm; 19 using namespace polly; 20 21 void ParallelLoopGeneratorGOMP::createCallSpawnThreads(Value *SubFn, 22 Value *SubFnParam, 23 Value *LB, Value *UB, 24 Value *Stride) { 25 const std::string Name = "GOMP_parallel_loop_runtime_start"; 26 27 Function *F = M->getFunction(Name); 28 29 // If F is not available, declare it. 30 if (!F) { 31 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 32 33 Type *Params[] = {PointerType::getUnqual(FunctionType::get( 34 Builder.getVoidTy(), Builder.getPtrTy(), false)), 35 Builder.getPtrTy(), 36 Builder.getInt32Ty(), 37 LongType, 38 LongType, 39 LongType}; 40 41 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 42 F = Function::Create(Ty, Linkage, Name, M); 43 } 44 45 Value *Args[] = {SubFn, SubFnParam, Builder.getInt32(PollyNumThreads), 46 LB, UB, Stride}; 47 48 CallInst *Call = Builder.CreateCall(F, Args); 49 Call->setDebugLoc(DLGenerated); 50 } 51 52 void ParallelLoopGeneratorGOMP::deployParallelExecution(Function *SubFn, 53 Value *SubFnParam, 54 Value *LB, Value *UB, 55 Value *Stride) { 56 // Tell the runtime we start a parallel loop 57 createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride); 58 CallInst *Call = Builder.CreateCall(SubFn, SubFnParam); 59 Call->setDebugLoc(DLGenerated); 60 createCallJoinThreads(); 61 } 62 63 Function *ParallelLoopGeneratorGOMP::prepareSubFnDefinition(Function *F) const { 64 FunctionType *FT = 65 FunctionType::get(Builder.getVoidTy(), {Builder.getPtrTy()}, false); 66 Function *SubFn = Function::Create(FT, Function::InternalLinkage, 67 F->getName() + "_polly_subfn", M); 68 // Name the function's arguments 69 SubFn->arg_begin()->setName("polly.par.userContext"); 70 return SubFn; 71 } 72 73 // Create a subfunction of the following (preliminary) structure: 74 // 75 // PrevBB 76 // | 77 // v 78 // HeaderBB 79 // | _____ 80 // v v | 81 // CheckNextBB PreHeaderBB 82 // |\ | 83 // | \______/ 84 // | 85 // v 86 // ExitBB 87 // 88 // HeaderBB will hold allocations and loading of variables. 89 // CheckNextBB will check for more work. 90 // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB. 91 // PreHeaderBB loads the new boundaries (& will lead to the loop body later on). 92 // ExitBB marks the end of the parallel execution. 93 std::tuple<Value *, Function *> 94 ParallelLoopGeneratorGOMP::createSubFn(Value *Stride, AllocaInst *StructData, 95 SetVector<Value *> Data, 96 ValueMapT &Map) { 97 if (PollyScheduling != OMPGeneralSchedulingType::Runtime) { 98 // User tried to influence the scheduling type (currently not supported) 99 errs() << "warning: Polly's GNU OpenMP backend solely " 100 "supports the scheduling type 'runtime'.\n"; 101 } 102 103 if (PollyChunkSize != 0) { 104 // User tried to influence the chunk size (currently not supported) 105 errs() << "warning: Polly's GNU OpenMP backend solely " 106 "supports the default chunk size.\n"; 107 } 108 109 Function *SubFn = createSubFnDefinition(); 110 LLVMContext &Context = SubFn->getContext(); 111 112 // Create basic blocks. 113 BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn); 114 SubFnDT = std::make_unique<DominatorTree>(*SubFn); 115 SubFnLI = std::make_unique<LoopInfo>(*SubFnDT); 116 117 BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn); 118 BasicBlock *CheckNextBB = 119 BasicBlock::Create(Context, "polly.par.checkNext", SubFn); 120 BasicBlock *PreHeaderBB = 121 BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn); 122 123 SubFnDT->addNewBlock(ExitBB, HeaderBB); 124 SubFnDT->addNewBlock(CheckNextBB, HeaderBB); 125 SubFnDT->addNewBlock(PreHeaderBB, HeaderBB); 126 127 // Fill up basic block HeaderBB. 128 Builder.SetInsertPoint(HeaderBB); 129 Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr"); 130 Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr"); 131 Value *UserContext = &*SubFn->arg_begin(); 132 133 extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext, 134 Map); 135 Builder.CreateBr(CheckNextBB); 136 137 // Add code to check if another set of iterations will be executed. 138 Builder.SetInsertPoint(CheckNextBB); 139 Value *Next = createCallGetWorkItem(LBPtr, UBPtr); 140 Value *HasNextSchedule = Builder.CreateTrunc( 141 Next, Builder.getInt1Ty(), "polly.par.hasNextScheduleBlock"); 142 Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB); 143 144 // Add code to load the iv bounds for this set of iterations. 145 Builder.SetInsertPoint(PreHeaderBB); 146 Value *LB = Builder.CreateLoad(LongType, LBPtr, "polly.par.LB"); 147 Value *UB = Builder.CreateLoad(LongType, UBPtr, "polly.par.UB"); 148 149 // Subtract one as the upper bound provided by OpenMP is a < comparison 150 // whereas the codegenForSequential function creates a <= comparison. 151 UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1), 152 "polly.par.UBAdjusted"); 153 154 Builder.CreateBr(CheckNextBB); 155 Builder.SetInsertPoint(&*--Builder.GetInsertPoint()); 156 BasicBlock *AfterBB; 157 Value *IV = 158 createLoop(LB, UB, Stride, Builder, *SubFnLI, *SubFnDT, AfterBB, 159 ICmpInst::ICMP_SLE, nullptr, true, /* UseGuard */ false); 160 161 BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); 162 163 // Add code to terminate this subfunction. 164 Builder.SetInsertPoint(ExitBB); 165 createCallCleanupThread(); 166 Builder.CreateRetVoid(); 167 168 Builder.SetInsertPoint(&*LoopBody); 169 170 // FIXME: Call SubFnDT->verify() and SubFnLI->verify() to check that the 171 // DominatorTree/LoopInfo has been created correctly. Alternatively, recreate 172 // from scratch since it is not needed here directly. 173 174 return std::make_tuple(IV, SubFn); 175 } 176 177 Value *ParallelLoopGeneratorGOMP::createCallGetWorkItem(Value *LBPtr, 178 Value *UBPtr) { 179 const std::string Name = "GOMP_loop_runtime_next"; 180 181 Function *F = M->getFunction(Name); 182 183 // If F is not available, declare it. 184 if (!F) { 185 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 186 Type *Params[] = {Builder.getPtrTy(0), Builder.getPtrTy(0)}; 187 FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false); 188 F = Function::Create(Ty, Linkage, Name, M); 189 } 190 191 Value *Args[] = {LBPtr, UBPtr}; 192 CallInst *Call = Builder.CreateCall(F, Args); 193 Call->setDebugLoc(DLGenerated); 194 Value *Return = Builder.CreateICmpNE( 195 Call, Builder.CreateZExt(Builder.getFalse(), Call->getType())); 196 return Return; 197 } 198 199 void ParallelLoopGeneratorGOMP::createCallJoinThreads() { 200 const std::string Name = "GOMP_parallel_end"; 201 202 Function *F = M->getFunction(Name); 203 204 // If F is not available, declare it. 205 if (!F) { 206 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 207 208 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 209 F = Function::Create(Ty, Linkage, Name, M); 210 } 211 212 CallInst *Call = Builder.CreateCall(F, {}); 213 Call->setDebugLoc(DLGenerated); 214 } 215 216 void ParallelLoopGeneratorGOMP::createCallCleanupThread() { 217 const std::string Name = "GOMP_loop_end_nowait"; 218 219 Function *F = M->getFunction(Name); 220 221 // If F is not available, declare it. 222 if (!F) { 223 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 224 225 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 226 F = Function::Create(Ty, Linkage, Name, M); 227 } 228 229 CallInst *Call = Builder.CreateCall(F, {}); 230 Call->setDebugLoc(DLGenerated); 231 } 232