189251edeSMichael Kruse //===------ LoopGeneratorsKMP.cpp - IR helper to create loops -------------===// 289251edeSMichael Kruse // 389251edeSMichael Kruse // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 489251edeSMichael Kruse // See https://llvm.org/LICENSE.txt for license information. 589251edeSMichael Kruse // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 689251edeSMichael Kruse // 789251edeSMichael Kruse //===----------------------------------------------------------------------===// 889251edeSMichael Kruse // 989251edeSMichael Kruse // This file contains functions to create parallel loops as LLVM-IR. 1089251edeSMichael Kruse // 1189251edeSMichael Kruse //===----------------------------------------------------------------------===// 1289251edeSMichael Kruse 1389251edeSMichael Kruse #include "polly/CodeGen/LoopGeneratorsKMP.h" 1422c77f23SMichael Kruse #include "llvm/Analysis/LoopInfo.h" 1589251edeSMichael Kruse #include "llvm/IR/Dominators.h" 1689251edeSMichael Kruse #include "llvm/IR/Module.h" 1789251edeSMichael Kruse 1889251edeSMichael Kruse using namespace llvm; 1989251edeSMichael Kruse using namespace polly; 2089251edeSMichael Kruse 2189251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallSpawnThreads(Value *SubFn, 2289251edeSMichael Kruse Value *SubFnParam, 2389251edeSMichael Kruse Value *LB, Value *UB, 2489251edeSMichael Kruse Value *Stride) { 2589251edeSMichael Kruse const std::string Name = "__kmpc_fork_call"; 2689251edeSMichael Kruse Function *F = M->getFunction(Name); 27fe431683SNick Lewycky Type *KMPCMicroTy = StructType::getTypeByName(M->getContext(), "kmpc_micro"); 2889251edeSMichael Kruse 2989251edeSMichael Kruse if (!KMPCMicroTy) { 3089251edeSMichael Kruse // void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...) 31*2ab2539cSYoungsuk Kim Type *MicroParams[] = {Builder.getPtrTy(0), Builder.getPtrTy(0)}; 3289251edeSMichael Kruse 3389251edeSMichael Kruse KMPCMicroTy = FunctionType::get(Builder.getVoidTy(), MicroParams, true); 3489251edeSMichael Kruse } 3589251edeSMichael Kruse 3689251edeSMichael Kruse // If F is not available, declare it. 3789251edeSMichael Kruse if (!F) { 3889251edeSMichael Kruse GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 39*2ab2539cSYoungsuk Kim Type *Params[] = {Builder.getPtrTy(0), Builder.getInt32Ty(), 40*2ab2539cSYoungsuk Kim Builder.getPtrTy(0)}; 4189251edeSMichael Kruse 4289251edeSMichael Kruse FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, true); 4389251edeSMichael Kruse F = Function::Create(Ty, Linkage, Name, M); 4489251edeSMichael Kruse } 4589251edeSMichael Kruse 46*2ab2539cSYoungsuk Kim Value *Task = 47*2ab2539cSYoungsuk Kim Builder.CreatePointerBitCastOrAddrSpaceCast(SubFn, Builder.getPtrTy(0)); 4889251edeSMichael Kruse 4989251edeSMichael Kruse Value *Args[] = {SourceLocationInfo, 5089251edeSMichael Kruse Builder.getInt32(4) /* Number of arguments (w/o Task) */, 5189251edeSMichael Kruse Task, 5289251edeSMichael Kruse LB, 5389251edeSMichael Kruse UB, 5489251edeSMichael Kruse Stride, 5589251edeSMichael Kruse SubFnParam}; 5689251edeSMichael Kruse 57fe0e5b3eSMichael Kruse CallInst *Call = Builder.CreateCall(F, Args); 58fe0e5b3eSMichael Kruse Call->setDebugLoc(DLGenerated); 5989251edeSMichael Kruse } 6089251edeSMichael Kruse 613e5d671cSEli Friedman void ParallelLoopGeneratorKMP::deployParallelExecution(Function *SubFn, 6289251edeSMichael Kruse Value *SubFnParam, 6389251edeSMichael Kruse Value *LB, Value *UB, 6489251edeSMichael Kruse Value *Stride) { 6589251edeSMichael Kruse // Inform OpenMP runtime about the number of threads if greater than zero 6689251edeSMichael Kruse if (PollyNumThreads > 0) { 6789251edeSMichael Kruse Value *GlobalThreadID = createCallGlobalThreadNum(); 6889251edeSMichael Kruse createCallPushNumThreads(GlobalThreadID, Builder.getInt32(PollyNumThreads)); 6989251edeSMichael Kruse } 7089251edeSMichael Kruse 7189251edeSMichael Kruse // Tell the runtime we start a parallel loop 7289251edeSMichael Kruse createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride); 7389251edeSMichael Kruse } 7489251edeSMichael Kruse 7589251edeSMichael Kruse Function *ParallelLoopGeneratorKMP::prepareSubFnDefinition(Function *F) const { 76*2ab2539cSYoungsuk Kim std::vector<Type *> Arguments = { 77*2ab2539cSYoungsuk Kim Builder.getPtrTy(0), Builder.getPtrTy(0), LongType, LongType, LongType, 78a3ef8589SFangrui Song Builder.getPtrTy()}; 7989251edeSMichael Kruse 8089251edeSMichael Kruse FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false); 8189251edeSMichael Kruse Function *SubFn = Function::Create(FT, Function::InternalLinkage, 8289251edeSMichael Kruse F->getName() + "_polly_subfn", M); 8389251edeSMichael Kruse // Name the function's arguments 8489251edeSMichael Kruse Function::arg_iterator AI = SubFn->arg_begin(); 8589251edeSMichael Kruse AI->setName("polly.kmpc.global_tid"); 8689251edeSMichael Kruse std::advance(AI, 1); 8789251edeSMichael Kruse AI->setName("polly.kmpc.bound_tid"); 8889251edeSMichael Kruse std::advance(AI, 1); 8989251edeSMichael Kruse AI->setName("polly.kmpc.lb"); 9089251edeSMichael Kruse std::advance(AI, 1); 9189251edeSMichael Kruse AI->setName("polly.kmpc.ub"); 9289251edeSMichael Kruse std::advance(AI, 1); 9389251edeSMichael Kruse AI->setName("polly.kmpc.inc"); 9489251edeSMichael Kruse std::advance(AI, 1); 9589251edeSMichael Kruse AI->setName("polly.kmpc.shared"); 9689251edeSMichael Kruse 9789251edeSMichael Kruse return SubFn; 9889251edeSMichael Kruse } 9989251edeSMichael Kruse 10089251edeSMichael Kruse // Create a subfunction of the following (preliminary) structure: 10189251edeSMichael Kruse // 10289251edeSMichael Kruse // PrevBB 10389251edeSMichael Kruse // | 10489251edeSMichael Kruse // v 10589251edeSMichael Kruse // HeaderBB 1061e0be76eSMichael Halkenhäuser // / | _____ 1071e0be76eSMichael Halkenhäuser // / v v | 1081e0be76eSMichael Halkenhäuser // / PreHeaderBB | 1091e0be76eSMichael Halkenhäuser // | | | 1101e0be76eSMichael Halkenhäuser // | v | 1111e0be76eSMichael Halkenhäuser // | CheckNextBB | 1121e0be76eSMichael Halkenhäuser // \ | \_____/ 1131e0be76eSMichael Halkenhäuser // \ | 1141e0be76eSMichael Halkenhäuser // v v 11589251edeSMichael Kruse // ExitBB 11689251edeSMichael Kruse // 11789251edeSMichael Kruse // HeaderBB will hold allocations, loading of variables and kmp-init calls. 1181e0be76eSMichael Halkenhäuser // CheckNextBB will check for more work (dynamic / static chunked) or will be 1191e0be76eSMichael Halkenhäuser // empty (static non chunked). 12089251edeSMichael Kruse // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB. 12189251edeSMichael Kruse // PreHeaderBB loads the new boundaries (& will lead to the loop body later on). 1221e0be76eSMichael Halkenhäuser // Just like CheckNextBB: PreHeaderBB is (preliminary) empty in the static non 1231e0be76eSMichael Halkenhäuser // chunked scheduling case. ExitBB marks the end of the parallel execution. 12489251edeSMichael Kruse // The possibly empty BasicBlocks will automatically be removed. 12589251edeSMichael Kruse std::tuple<Value *, Function *> 1261e0be76eSMichael Halkenhäuser ParallelLoopGeneratorKMP::createSubFn(Value *SequentialLoopStride, 12789251edeSMichael Kruse AllocaInst *StructData, 12889251edeSMichael Kruse SetVector<Value *> Data, ValueMapT &Map) { 12989251edeSMichael Kruse Function *SubFn = createSubFnDefinition(); 13089251edeSMichael Kruse LLVMContext &Context = SubFn->getContext(); 13189251edeSMichael Kruse 13289251edeSMichael Kruse // Create basic blocks. 13389251edeSMichael Kruse BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn); 13422c77f23SMichael Kruse SubFnDT = std::make_unique<DominatorTree>(*SubFn); 13522c77f23SMichael Kruse SubFnLI = std::make_unique<LoopInfo>(*SubFnDT); 13622c77f23SMichael Kruse 13789251edeSMichael Kruse BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn); 13889251edeSMichael Kruse BasicBlock *CheckNextBB = 13989251edeSMichael Kruse BasicBlock::Create(Context, "polly.par.checkNext", SubFn); 14089251edeSMichael Kruse BasicBlock *PreHeaderBB = 14189251edeSMichael Kruse BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn); 14289251edeSMichael Kruse 14322c77f23SMichael Kruse SubFnDT->addNewBlock(ExitBB, HeaderBB); 14422c77f23SMichael Kruse SubFnDT->addNewBlock(CheckNextBB, HeaderBB); 14522c77f23SMichael Kruse SubFnDT->addNewBlock(PreHeaderBB, HeaderBB); 14689251edeSMichael Kruse 14789251edeSMichael Kruse // Fill up basic block HeaderBB. 14889251edeSMichael Kruse Builder.SetInsertPoint(HeaderBB); 14989251edeSMichael Kruse Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr"); 15089251edeSMichael Kruse Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr"); 15189251edeSMichael Kruse Value *IsLastPtr = Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, 15289251edeSMichael Kruse "polly.par.lastIterPtr"); 15389251edeSMichael Kruse Value *StridePtr = 15489251edeSMichael Kruse Builder.CreateAlloca(LongType, nullptr, "polly.par.StridePtr"); 15589251edeSMichael Kruse 15689251edeSMichael Kruse // Get iterator for retrieving the previously defined parameters. 15789251edeSMichael Kruse Function::arg_iterator AI = SubFn->arg_begin(); 15889251edeSMichael Kruse // First argument holds "global thread ID". 15989251edeSMichael Kruse Value *IDPtr = &*AI; 16089251edeSMichael Kruse // Skip "bound thread ID" since it is not used (but had to be defined). 16189251edeSMichael Kruse std::advance(AI, 2); 16289251edeSMichael Kruse // Move iterator to: LB, UB, Stride, Shared variable struct. 16389251edeSMichael Kruse Value *LB = &*AI; 16489251edeSMichael Kruse std::advance(AI, 1); 16589251edeSMichael Kruse Value *UB = &*AI; 16689251edeSMichael Kruse std::advance(AI, 1); 16789251edeSMichael Kruse Value *Stride = &*AI; 16889251edeSMichael Kruse std::advance(AI, 1); 16989251edeSMichael Kruse Value *Shared = &*AI; 17089251edeSMichael Kruse 17118680a36SNikita Popov extractValuesFromStruct(Data, StructData->getAllocatedType(), Shared, Map); 17289251edeSMichael Kruse 17359f95222SGuillaume Chatelet const auto Alignment = llvm::Align(is64BitArch() ? 8 : 4); 17446354bacSNikita Popov Value *ID = Builder.CreateAlignedLoad(Builder.getInt32Ty(), IDPtr, Alignment, 17546354bacSNikita Popov "polly.par.global_tid"); 17689251edeSMichael Kruse 17789251edeSMichael Kruse Builder.CreateAlignedStore(LB, LBPtr, Alignment); 17889251edeSMichael Kruse Builder.CreateAlignedStore(UB, UBPtr, Alignment); 17989251edeSMichael Kruse Builder.CreateAlignedStore(Builder.getInt32(0), IsLastPtr, Alignment); 18089251edeSMichael Kruse Builder.CreateAlignedStore(Stride, StridePtr, Alignment); 18189251edeSMichael Kruse 18289251edeSMichael Kruse // Subtract one as the upper bound provided by openmp is a < comparison 18389251edeSMichael Kruse // whereas the codegenForSequential function creates a <= comparison. 18489251edeSMichael Kruse Value *AdjustedUB = Builder.CreateAdd(UB, ConstantInt::get(LongType, -1), 18589251edeSMichael Kruse "polly.indvar.UBAdjusted"); 18689251edeSMichael Kruse 18789251edeSMichael Kruse Value *ChunkSize = 18889251edeSMichael Kruse ConstantInt::get(LongType, std::max<int>(PollyChunkSize, 1)); 18989251edeSMichael Kruse 1901e0be76eSMichael Halkenhäuser OMPGeneralSchedulingType Scheduling = 1911e0be76eSMichael Halkenhäuser getSchedType(PollyChunkSize, PollyScheduling); 1921e0be76eSMichael Halkenhäuser 1931e0be76eSMichael Halkenhäuser switch (Scheduling) { 19489251edeSMichael Kruse case OMPGeneralSchedulingType::Dynamic: 19589251edeSMichael Kruse case OMPGeneralSchedulingType::Guided: 19689251edeSMichael Kruse case OMPGeneralSchedulingType::Runtime: 19789251edeSMichael Kruse // "DYNAMIC" scheduling types are handled below (including 'runtime') 19889251edeSMichael Kruse { 19989251edeSMichael Kruse UB = AdjustedUB; 20089251edeSMichael Kruse createCallDispatchInit(ID, LB, UB, Stride, ChunkSize); 20189251edeSMichael Kruse Value *HasWork = 20289251edeSMichael Kruse createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr); 20389251edeSMichael Kruse Value *HasIteration = 20489251edeSMichael Kruse Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork, 20589251edeSMichael Kruse Builder.getInt32(1), "polly.hasIteration"); 20689251edeSMichael Kruse Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB); 20789251edeSMichael Kruse 20889251edeSMichael Kruse Builder.SetInsertPoint(CheckNextBB); 20989251edeSMichael Kruse HasWork = createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr); 21089251edeSMichael Kruse HasIteration = 21189251edeSMichael Kruse Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork, 21289251edeSMichael Kruse Builder.getInt32(1), "polly.hasWork"); 21389251edeSMichael Kruse Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB); 21489251edeSMichael Kruse 21589251edeSMichael Kruse Builder.SetInsertPoint(PreHeaderBB); 21646354bacSNikita Popov LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment, 21746354bacSNikita Popov "polly.indvar.LB"); 21846354bacSNikita Popov UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment, 21946354bacSNikita Popov "polly.indvar.UB"); 22089251edeSMichael Kruse } 22189251edeSMichael Kruse break; 22289251edeSMichael Kruse case OMPGeneralSchedulingType::StaticChunked: 22389251edeSMichael Kruse case OMPGeneralSchedulingType::StaticNonChunked: 22489251edeSMichael Kruse // "STATIC" scheduling types are handled below 22589251edeSMichael Kruse { 2261e0be76eSMichael Halkenhäuser Builder.CreateAlignedStore(AdjustedUB, UBPtr, Alignment); 22789251edeSMichael Kruse createCallStaticInit(ID, IsLastPtr, LBPtr, UBPtr, StridePtr, ChunkSize); 22889251edeSMichael Kruse 22946354bacSNikita Popov Value *ChunkedStride = Builder.CreateAlignedLoad( 23046354bacSNikita Popov LongType, StridePtr, Alignment, "polly.kmpc.stride"); 2311e0be76eSMichael Halkenhäuser 23246354bacSNikita Popov LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment, 23346354bacSNikita Popov "polly.indvar.LB"); 23446354bacSNikita Popov UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment, 23546354bacSNikita Popov "polly.indvar.UB.temp"); 23689251edeSMichael Kruse 2371e0be76eSMichael Halkenhäuser Value *UBInRange = 2381e0be76eSMichael Halkenhäuser Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLE, UB, AdjustedUB, 2391e0be76eSMichael Halkenhäuser "polly.indvar.UB.inRange"); 2401e0be76eSMichael Halkenhäuser UB = Builder.CreateSelect(UBInRange, UB, AdjustedUB, "polly.indvar.UB"); 24189251edeSMichael Kruse Builder.CreateAlignedStore(UB, UBPtr, Alignment); 24289251edeSMichael Kruse 24389251edeSMichael Kruse Value *HasIteration = Builder.CreateICmp( 24489251edeSMichael Kruse llvm::CmpInst::Predicate::ICMP_SLE, LB, UB, "polly.hasIteration"); 24589251edeSMichael Kruse Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB); 24689251edeSMichael Kruse 2471e0be76eSMichael Halkenhäuser if (Scheduling == OMPGeneralSchedulingType::StaticChunked) { 2481e0be76eSMichael Halkenhäuser Builder.SetInsertPoint(PreHeaderBB); 24946354bacSNikita Popov LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment, 2501e0be76eSMichael Halkenhäuser "polly.indvar.LB.entry"); 25146354bacSNikita Popov UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment, 2521e0be76eSMichael Halkenhäuser "polly.indvar.UB.entry"); 2531e0be76eSMichael Halkenhäuser } 2541e0be76eSMichael Halkenhäuser 25589251edeSMichael Kruse Builder.SetInsertPoint(CheckNextBB); 2561e0be76eSMichael Halkenhäuser 2571e0be76eSMichael Halkenhäuser if (Scheduling == OMPGeneralSchedulingType::StaticChunked) { 2581e0be76eSMichael Halkenhäuser Value *NextLB = 2591e0be76eSMichael Halkenhäuser Builder.CreateAdd(LB, ChunkedStride, "polly.indvar.nextLB"); 2601e0be76eSMichael Halkenhäuser Value *NextUB = Builder.CreateAdd(UB, ChunkedStride); 2611e0be76eSMichael Halkenhäuser 2621e0be76eSMichael Halkenhäuser Value *NextUBOutOfBounds = 2631e0be76eSMichael Halkenhäuser Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SGT, NextUB, 2641e0be76eSMichael Halkenhäuser AdjustedUB, "polly.indvar.nextUB.outOfBounds"); 2651e0be76eSMichael Halkenhäuser NextUB = Builder.CreateSelect(NextUBOutOfBounds, AdjustedUB, NextUB, 2661e0be76eSMichael Halkenhäuser "polly.indvar.nextUB"); 2671e0be76eSMichael Halkenhäuser 2681e0be76eSMichael Halkenhäuser Builder.CreateAlignedStore(NextLB, LBPtr, Alignment); 2691e0be76eSMichael Halkenhäuser Builder.CreateAlignedStore(NextUB, UBPtr, Alignment); 2701e0be76eSMichael Halkenhäuser 2711e0be76eSMichael Halkenhäuser Value *HasWork = 2721e0be76eSMichael Halkenhäuser Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLE, NextLB, 2731e0be76eSMichael Halkenhäuser AdjustedUB, "polly.hasWork"); 2741e0be76eSMichael Halkenhäuser Builder.CreateCondBr(HasWork, PreHeaderBB, ExitBB); 2751e0be76eSMichael Halkenhäuser } else { 27689251edeSMichael Kruse Builder.CreateBr(ExitBB); 2771e0be76eSMichael Halkenhäuser } 27889251edeSMichael Kruse 27989251edeSMichael Kruse Builder.SetInsertPoint(PreHeaderBB); 28089251edeSMichael Kruse } 28189251edeSMichael Kruse break; 28289251edeSMichael Kruse } 28389251edeSMichael Kruse 28489251edeSMichael Kruse Builder.CreateBr(CheckNextBB); 28589251edeSMichael Kruse Builder.SetInsertPoint(&*--Builder.GetInsertPoint()); 28689251edeSMichael Kruse BasicBlock *AfterBB; 28722c77f23SMichael Kruse Value *IV = createLoop(LB, UB, SequentialLoopStride, Builder, *SubFnLI, 28822c77f23SMichael Kruse *SubFnDT, AfterBB, ICmpInst::ICMP_SLE, nullptr, true, 28989251edeSMichael Kruse /* UseGuard */ false); 29089251edeSMichael Kruse 29189251edeSMichael Kruse BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); 29289251edeSMichael Kruse 29389251edeSMichael Kruse // Add code to terminate this subfunction. 29489251edeSMichael Kruse Builder.SetInsertPoint(ExitBB); 29589251edeSMichael Kruse // Static (i.e. non-dynamic) scheduling types, are terminated with a fini-call 2961e0be76eSMichael Halkenhäuser if (Scheduling == OMPGeneralSchedulingType::StaticChunked || 2971e0be76eSMichael Halkenhäuser Scheduling == OMPGeneralSchedulingType::StaticNonChunked) { 29889251edeSMichael Kruse createCallStaticFini(ID); 29989251edeSMichael Kruse } 30089251edeSMichael Kruse Builder.CreateRetVoid(); 30189251edeSMichael Kruse Builder.SetInsertPoint(&*LoopBody); 30289251edeSMichael Kruse 30322c77f23SMichael Kruse // FIXME: Call SubFnDT->verify() and SubFnLI->verify() to check that the 30422c77f23SMichael Kruse // DominatorTree/LoopInfo has been created correctly. Alternatively, recreate 30522c77f23SMichael Kruse // from scratch since it is not needed here directly. 30622c77f23SMichael Kruse 30789251edeSMichael Kruse return std::make_tuple(IV, SubFn); 30889251edeSMichael Kruse } 30989251edeSMichael Kruse 31089251edeSMichael Kruse Value *ParallelLoopGeneratorKMP::createCallGlobalThreadNum() { 31189251edeSMichael Kruse const std::string Name = "__kmpc_global_thread_num"; 31289251edeSMichael Kruse Function *F = M->getFunction(Name); 31389251edeSMichael Kruse 31489251edeSMichael Kruse // If F is not available, declare it. 31589251edeSMichael Kruse if (!F) { 31689251edeSMichael Kruse GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 317*2ab2539cSYoungsuk Kim Type *Params[] = {Builder.getPtrTy(0)}; 31889251edeSMichael Kruse 31989251edeSMichael Kruse FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false); 32089251edeSMichael Kruse F = Function::Create(Ty, Linkage, Name, M); 32189251edeSMichael Kruse } 32289251edeSMichael Kruse 323fe0e5b3eSMichael Kruse CallInst *Call = Builder.CreateCall(F, {SourceLocationInfo}); 324fe0e5b3eSMichael Kruse Call->setDebugLoc(DLGenerated); 325fe0e5b3eSMichael Kruse return Call; 32689251edeSMichael Kruse } 32789251edeSMichael Kruse 32889251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallPushNumThreads(Value *GlobalThreadID, 32989251edeSMichael Kruse Value *NumThreads) { 33089251edeSMichael Kruse const std::string Name = "__kmpc_push_num_threads"; 33189251edeSMichael Kruse Function *F = M->getFunction(Name); 33289251edeSMichael Kruse 33389251edeSMichael Kruse // If F is not available, declare it. 33489251edeSMichael Kruse if (!F) { 33589251edeSMichael Kruse GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 336*2ab2539cSYoungsuk Kim Type *Params[] = {Builder.getPtrTy(0), Builder.getInt32Ty(), 33789251edeSMichael Kruse Builder.getInt32Ty()}; 33889251edeSMichael Kruse 33989251edeSMichael Kruse FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 34089251edeSMichael Kruse F = Function::Create(Ty, Linkage, Name, M); 34189251edeSMichael Kruse } 34289251edeSMichael Kruse 34389251edeSMichael Kruse Value *Args[] = {SourceLocationInfo, GlobalThreadID, NumThreads}; 34489251edeSMichael Kruse 345fe0e5b3eSMichael Kruse CallInst *Call = Builder.CreateCall(F, Args); 346fe0e5b3eSMichael Kruse Call->setDebugLoc(DLGenerated); 34789251edeSMichael Kruse } 34889251edeSMichael Kruse 34989251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallStaticInit(Value *GlobalThreadID, 35089251edeSMichael Kruse Value *IsLastPtr, 35189251edeSMichael Kruse Value *LBPtr, Value *UBPtr, 35289251edeSMichael Kruse Value *StridePtr, 35389251edeSMichael Kruse Value *ChunkSize) { 35489251edeSMichael Kruse const std::string Name = 35589251edeSMichael Kruse is64BitArch() ? "__kmpc_for_static_init_8" : "__kmpc_for_static_init_4"; 35689251edeSMichael Kruse Function *F = M->getFunction(Name); 35789251edeSMichael Kruse 35889251edeSMichael Kruse // If F is not available, declare it. 35989251edeSMichael Kruse if (!F) { 36089251edeSMichael Kruse GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 36189251edeSMichael Kruse 362*2ab2539cSYoungsuk Kim Type *Params[] = {Builder.getPtrTy(0), 36389251edeSMichael Kruse Builder.getInt32Ty(), 36489251edeSMichael Kruse Builder.getInt32Ty(), 365*2ab2539cSYoungsuk Kim Builder.getPtrTy(0), 366*2ab2539cSYoungsuk Kim Builder.getPtrTy(0), 367*2ab2539cSYoungsuk Kim Builder.getPtrTy(0), 368*2ab2539cSYoungsuk Kim Builder.getPtrTy(0), 36989251edeSMichael Kruse LongType, 37089251edeSMichael Kruse LongType}; 37189251edeSMichael Kruse 37289251edeSMichael Kruse FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 37389251edeSMichael Kruse F = Function::Create(Ty, Linkage, Name, M); 37489251edeSMichael Kruse } 37589251edeSMichael Kruse 37689251edeSMichael Kruse // The parameter 'ChunkSize' will hold strictly positive integer values, 37789251edeSMichael Kruse // regardless of PollyChunkSize's value 37889251edeSMichael Kruse Value *Args[] = { 37989251edeSMichael Kruse SourceLocationInfo, 38089251edeSMichael Kruse GlobalThreadID, 38189251edeSMichael Kruse Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))), 38289251edeSMichael Kruse IsLastPtr, 38389251edeSMichael Kruse LBPtr, 38489251edeSMichael Kruse UBPtr, 38589251edeSMichael Kruse StridePtr, 38689251edeSMichael Kruse ConstantInt::get(LongType, 1), 38789251edeSMichael Kruse ChunkSize}; 38889251edeSMichael Kruse 389fe0e5b3eSMichael Kruse CallInst *Call = Builder.CreateCall(F, Args); 390fe0e5b3eSMichael Kruse Call->setDebugLoc(DLGenerated); 39189251edeSMichael Kruse } 39289251edeSMichael Kruse 39389251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallStaticFini(Value *GlobalThreadID) { 39489251edeSMichael Kruse const std::string Name = "__kmpc_for_static_fini"; 39589251edeSMichael Kruse Function *F = M->getFunction(Name); 39689251edeSMichael Kruse 39789251edeSMichael Kruse // If F is not available, declare it. 39889251edeSMichael Kruse if (!F) { 39989251edeSMichael Kruse GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 400*2ab2539cSYoungsuk Kim Type *Params[] = {Builder.getPtrTy(0), Builder.getInt32Ty()}; 40189251edeSMichael Kruse FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 40289251edeSMichael Kruse F = Function::Create(Ty, Linkage, Name, M); 40389251edeSMichael Kruse } 40489251edeSMichael Kruse 40589251edeSMichael Kruse Value *Args[] = {SourceLocationInfo, GlobalThreadID}; 40689251edeSMichael Kruse 407fe0e5b3eSMichael Kruse CallInst *Call = Builder.CreateCall(F, Args); 408fe0e5b3eSMichael Kruse Call->setDebugLoc(DLGenerated); 40989251edeSMichael Kruse } 41089251edeSMichael Kruse 41189251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallDispatchInit(Value *GlobalThreadID, 41289251edeSMichael Kruse Value *LB, Value *UB, 41389251edeSMichael Kruse Value *Inc, 41489251edeSMichael Kruse Value *ChunkSize) { 41589251edeSMichael Kruse const std::string Name = 41689251edeSMichael Kruse is64BitArch() ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_4"; 41789251edeSMichael Kruse Function *F = M->getFunction(Name); 41889251edeSMichael Kruse 41989251edeSMichael Kruse // If F is not available, declare it. 42089251edeSMichael Kruse if (!F) { 42189251edeSMichael Kruse GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 42289251edeSMichael Kruse 423*2ab2539cSYoungsuk Kim Type *Params[] = {Builder.getPtrTy(0), 42489251edeSMichael Kruse Builder.getInt32Ty(), 42589251edeSMichael Kruse Builder.getInt32Ty(), 42689251edeSMichael Kruse LongType, 42789251edeSMichael Kruse LongType, 42889251edeSMichael Kruse LongType, 42989251edeSMichael Kruse LongType}; 43089251edeSMichael Kruse 43189251edeSMichael Kruse FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 43289251edeSMichael Kruse F = Function::Create(Ty, Linkage, Name, M); 43389251edeSMichael Kruse } 43489251edeSMichael Kruse 43589251edeSMichael Kruse // The parameter 'ChunkSize' will hold strictly positive integer values, 43689251edeSMichael Kruse // regardless of PollyChunkSize's value 43789251edeSMichael Kruse Value *Args[] = { 43889251edeSMichael Kruse SourceLocationInfo, 43989251edeSMichael Kruse GlobalThreadID, 44089251edeSMichael Kruse Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))), 44189251edeSMichael Kruse LB, 44289251edeSMichael Kruse UB, 44389251edeSMichael Kruse Inc, 44489251edeSMichael Kruse ChunkSize}; 44589251edeSMichael Kruse 446fe0e5b3eSMichael Kruse CallInst *Call = Builder.CreateCall(F, Args); 447fe0e5b3eSMichael Kruse Call->setDebugLoc(DLGenerated); 44889251edeSMichael Kruse } 44989251edeSMichael Kruse 45089251edeSMichael Kruse Value *ParallelLoopGeneratorKMP::createCallDispatchNext(Value *GlobalThreadID, 45189251edeSMichael Kruse Value *IsLastPtr, 45289251edeSMichael Kruse Value *LBPtr, 45389251edeSMichael Kruse Value *UBPtr, 45489251edeSMichael Kruse Value *StridePtr) { 45589251edeSMichael Kruse const std::string Name = 45689251edeSMichael Kruse is64BitArch() ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_4"; 45789251edeSMichael Kruse Function *F = M->getFunction(Name); 45889251edeSMichael Kruse 45989251edeSMichael Kruse // If F is not available, declare it. 46089251edeSMichael Kruse if (!F) { 46189251edeSMichael Kruse GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 46289251edeSMichael Kruse 463*2ab2539cSYoungsuk Kim Type *Params[] = {Builder.getPtrTy(0), Builder.getInt32Ty(), 464*2ab2539cSYoungsuk Kim Builder.getPtrTy(0), Builder.getPtrTy(0), 465*2ab2539cSYoungsuk Kim Builder.getPtrTy(0), Builder.getPtrTy(0)}; 46689251edeSMichael Kruse 46789251edeSMichael Kruse FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false); 46889251edeSMichael Kruse F = Function::Create(Ty, Linkage, Name, M); 46989251edeSMichael Kruse } 47089251edeSMichael Kruse 47189251edeSMichael Kruse Value *Args[] = {SourceLocationInfo, GlobalThreadID, IsLastPtr, LBPtr, UBPtr, 47289251edeSMichael Kruse StridePtr}; 47389251edeSMichael Kruse 474fe0e5b3eSMichael Kruse CallInst *Call = Builder.CreateCall(F, Args); 475fe0e5b3eSMichael Kruse Call->setDebugLoc(DLGenerated); 476fe0e5b3eSMichael Kruse return Call; 47789251edeSMichael Kruse } 47889251edeSMichael Kruse 47989251edeSMichael Kruse // TODO: This function currently creates a source location dummy. It might be 48089251edeSMichael Kruse // necessary to (actually) provide information, in the future. 48189251edeSMichael Kruse GlobalVariable *ParallelLoopGeneratorKMP::createSourceLocation() { 48289251edeSMichael Kruse const std::string LocName = ".loc.dummy"; 48389251edeSMichael Kruse GlobalVariable *SourceLocDummy = M->getGlobalVariable(LocName); 48489251edeSMichael Kruse 48589251edeSMichael Kruse if (SourceLocDummy == nullptr) { 48689251edeSMichael Kruse const std::string StructName = "struct.ident_t"; 487fe431683SNick Lewycky StructType *IdentTy = 488fe431683SNick Lewycky StructType::getTypeByName(M->getContext(), StructName); 48989251edeSMichael Kruse 49089251edeSMichael Kruse // If the ident_t StructType is not available, declare it. 49189251edeSMichael Kruse // in LLVM-IR: ident_t = type { i32, i32, i32, i32, i8* } 49289251edeSMichael Kruse if (!IdentTy) { 49389251edeSMichael Kruse Type *LocMembers[] = {Builder.getInt32Ty(), Builder.getInt32Ty(), 49489251edeSMichael Kruse Builder.getInt32Ty(), Builder.getInt32Ty(), 495a3ef8589SFangrui Song Builder.getPtrTy()}; 49689251edeSMichael Kruse 49789251edeSMichael Kruse IdentTy = 49889251edeSMichael Kruse StructType::create(M->getContext(), LocMembers, StructName, false); 49989251edeSMichael Kruse } 50089251edeSMichael Kruse 50189251edeSMichael Kruse const auto ArrayType = 50289251edeSMichael Kruse llvm::ArrayType::get(Builder.getInt8Ty(), /* Length */ 23); 50389251edeSMichael Kruse 50489251edeSMichael Kruse // Global Variable Definitions 505fb7cf900SKazu Hirata GlobalVariable *StrVar = 506fb7cf900SKazu Hirata new GlobalVariable(*M, ArrayType, true, GlobalValue::PrivateLinkage, 507fb7cf900SKazu Hirata nullptr, ".str.ident"); 508805c157eSGuillaume Chatelet StrVar->setAlignment(llvm::Align(1)); 50989251edeSMichael Kruse 51089251edeSMichael Kruse SourceLocDummy = new GlobalVariable( 51189251edeSMichael Kruse *M, IdentTy, true, GlobalValue::PrivateLinkage, nullptr, LocName); 5120e62011dSGuillaume Chatelet SourceLocDummy->setAlignment(llvm::Align(8)); 51389251edeSMichael Kruse 51489251edeSMichael Kruse // Constant Definitions 51589251edeSMichael Kruse Constant *InitStr = ConstantDataArray::getString( 51689251edeSMichael Kruse M->getContext(), "Source location dummy.", true); 51789251edeSMichael Kruse 51889251edeSMichael Kruse Constant *StrPtr = static_cast<Constant *>(Builder.CreateInBoundsGEP( 51989251edeSMichael Kruse ArrayType, StrVar, {Builder.getInt32(0), Builder.getInt32(0)})); 52089251edeSMichael Kruse 52189251edeSMichael Kruse Constant *LocInitStruct = ConstantStruct::get( 52289251edeSMichael Kruse IdentTy, {Builder.getInt32(0), Builder.getInt32(0), Builder.getInt32(0), 52389251edeSMichael Kruse Builder.getInt32(0), StrPtr}); 52489251edeSMichael Kruse 52589251edeSMichael Kruse // Initialize variables 52689251edeSMichael Kruse StrVar->setInitializer(InitStr); 52789251edeSMichael Kruse SourceLocDummy->setInitializer(LocInitStruct); 52889251edeSMichael Kruse } 52989251edeSMichael Kruse 53089251edeSMichael Kruse return SourceLocDummy; 53189251edeSMichael Kruse } 53289251edeSMichael Kruse 53389251edeSMichael Kruse bool ParallelLoopGeneratorKMP::is64BitArch() { 53489251edeSMichael Kruse return (LongType->getIntegerBitWidth() == 64); 53589251edeSMichael Kruse } 53689251edeSMichael Kruse 53789251edeSMichael Kruse OMPGeneralSchedulingType ParallelLoopGeneratorKMP::getSchedType( 53889251edeSMichael Kruse int ChunkSize, OMPGeneralSchedulingType Scheduling) const { 53989251edeSMichael Kruse if (ChunkSize == 0 && Scheduling == OMPGeneralSchedulingType::StaticChunked) 54089251edeSMichael Kruse return OMPGeneralSchedulingType::StaticNonChunked; 54189251edeSMichael Kruse 54289251edeSMichael Kruse return Scheduling; 54389251edeSMichael Kruse } 544