xref: /llvm-project/polly/lib/CodeGen/LoopGeneratorsKMP.cpp (revision f42785d0c8886a65fbdd160b0ef47baa5931e582)
189251edeSMichael Kruse //===------ LoopGeneratorsKMP.cpp - IR helper to create loops -------------===//
289251edeSMichael Kruse //
389251edeSMichael Kruse // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
489251edeSMichael Kruse // See https://llvm.org/LICENSE.txt for license information.
589251edeSMichael Kruse // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
689251edeSMichael Kruse //
789251edeSMichael Kruse //===----------------------------------------------------------------------===//
889251edeSMichael Kruse //
989251edeSMichael Kruse // This file contains functions to create parallel loops as LLVM-IR.
1089251edeSMichael Kruse //
1189251edeSMichael Kruse //===----------------------------------------------------------------------===//
1289251edeSMichael Kruse 
1389251edeSMichael Kruse #include "polly/CodeGen/LoopGeneratorsKMP.h"
1422c77f23SMichael Kruse #include "llvm/Analysis/LoopInfo.h"
1589251edeSMichael Kruse #include "llvm/IR/Dominators.h"
1689251edeSMichael Kruse #include "llvm/IR/Module.h"
1789251edeSMichael Kruse 
1889251edeSMichael Kruse using namespace llvm;
1989251edeSMichael Kruse using namespace polly;
2089251edeSMichael Kruse 
2189251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallSpawnThreads(Value *SubFn,
2289251edeSMichael Kruse                                                       Value *SubFnParam,
2389251edeSMichael Kruse                                                       Value *LB, Value *UB,
2489251edeSMichael Kruse                                                       Value *Stride) {
2589251edeSMichael Kruse   const std::string Name = "__kmpc_fork_call";
2689251edeSMichael Kruse   Function *F = M->getFunction(Name);
27fe431683SNick Lewycky   Type *KMPCMicroTy = StructType::getTypeByName(M->getContext(), "kmpc_micro");
2889251edeSMichael Kruse 
2989251edeSMichael Kruse   if (!KMPCMicroTy) {
3089251edeSMichael Kruse     // void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...)
31*2ab2539cSYoungsuk Kim     Type *MicroParams[] = {Builder.getPtrTy(0), Builder.getPtrTy(0)};
3289251edeSMichael Kruse 
3389251edeSMichael Kruse     KMPCMicroTy = FunctionType::get(Builder.getVoidTy(), MicroParams, true);
3489251edeSMichael Kruse   }
3589251edeSMichael Kruse 
3689251edeSMichael Kruse   // If F is not available, declare it.
3789251edeSMichael Kruse   if (!F) {
3889251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
39*2ab2539cSYoungsuk Kim     Type *Params[] = {Builder.getPtrTy(0), Builder.getInt32Ty(),
40*2ab2539cSYoungsuk Kim                       Builder.getPtrTy(0)};
4189251edeSMichael Kruse 
4289251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, true);
4389251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
4489251edeSMichael Kruse   }
4589251edeSMichael Kruse 
46*2ab2539cSYoungsuk Kim   Value *Task =
47*2ab2539cSYoungsuk Kim       Builder.CreatePointerBitCastOrAddrSpaceCast(SubFn, Builder.getPtrTy(0));
4889251edeSMichael Kruse 
4989251edeSMichael Kruse   Value *Args[] = {SourceLocationInfo,
5089251edeSMichael Kruse                    Builder.getInt32(4) /* Number of arguments (w/o Task) */,
5189251edeSMichael Kruse                    Task,
5289251edeSMichael Kruse                    LB,
5389251edeSMichael Kruse                    UB,
5489251edeSMichael Kruse                    Stride,
5589251edeSMichael Kruse                    SubFnParam};
5689251edeSMichael Kruse 
57fe0e5b3eSMichael Kruse   CallInst *Call = Builder.CreateCall(F, Args);
58fe0e5b3eSMichael Kruse   Call->setDebugLoc(DLGenerated);
5989251edeSMichael Kruse }
6089251edeSMichael Kruse 
613e5d671cSEli Friedman void ParallelLoopGeneratorKMP::deployParallelExecution(Function *SubFn,
6289251edeSMichael Kruse                                                        Value *SubFnParam,
6389251edeSMichael Kruse                                                        Value *LB, Value *UB,
6489251edeSMichael Kruse                                                        Value *Stride) {
6589251edeSMichael Kruse   // Inform OpenMP runtime about the number of threads if greater than zero
6689251edeSMichael Kruse   if (PollyNumThreads > 0) {
6789251edeSMichael Kruse     Value *GlobalThreadID = createCallGlobalThreadNum();
6889251edeSMichael Kruse     createCallPushNumThreads(GlobalThreadID, Builder.getInt32(PollyNumThreads));
6989251edeSMichael Kruse   }
7089251edeSMichael Kruse 
7189251edeSMichael Kruse   // Tell the runtime we start a parallel loop
7289251edeSMichael Kruse   createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
7389251edeSMichael Kruse }
7489251edeSMichael Kruse 
7589251edeSMichael Kruse Function *ParallelLoopGeneratorKMP::prepareSubFnDefinition(Function *F) const {
76*2ab2539cSYoungsuk Kim   std::vector<Type *> Arguments = {
77*2ab2539cSYoungsuk Kim       Builder.getPtrTy(0), Builder.getPtrTy(0), LongType, LongType, LongType,
78a3ef8589SFangrui Song       Builder.getPtrTy()};
7989251edeSMichael Kruse 
8089251edeSMichael Kruse   FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
8189251edeSMichael Kruse   Function *SubFn = Function::Create(FT, Function::InternalLinkage,
8289251edeSMichael Kruse                                      F->getName() + "_polly_subfn", M);
8389251edeSMichael Kruse   // Name the function's arguments
8489251edeSMichael Kruse   Function::arg_iterator AI = SubFn->arg_begin();
8589251edeSMichael Kruse   AI->setName("polly.kmpc.global_tid");
8689251edeSMichael Kruse   std::advance(AI, 1);
8789251edeSMichael Kruse   AI->setName("polly.kmpc.bound_tid");
8889251edeSMichael Kruse   std::advance(AI, 1);
8989251edeSMichael Kruse   AI->setName("polly.kmpc.lb");
9089251edeSMichael Kruse   std::advance(AI, 1);
9189251edeSMichael Kruse   AI->setName("polly.kmpc.ub");
9289251edeSMichael Kruse   std::advance(AI, 1);
9389251edeSMichael Kruse   AI->setName("polly.kmpc.inc");
9489251edeSMichael Kruse   std::advance(AI, 1);
9589251edeSMichael Kruse   AI->setName("polly.kmpc.shared");
9689251edeSMichael Kruse 
9789251edeSMichael Kruse   return SubFn;
9889251edeSMichael Kruse }
9989251edeSMichael Kruse 
10089251edeSMichael Kruse // Create a subfunction of the following (preliminary) structure:
10189251edeSMichael Kruse //
10289251edeSMichael Kruse //        PrevBB
10389251edeSMichael Kruse //           |
10489251edeSMichael Kruse //           v
10589251edeSMichael Kruse //        HeaderBB
1061e0be76eSMichael Halkenhäuser //       /   |    _____
1071e0be76eSMichael Halkenhäuser //      /    v   v     |
1081e0be76eSMichael Halkenhäuser //     / PreHeaderBB   |
1091e0be76eSMichael Halkenhäuser //    |      |         |
1101e0be76eSMichael Halkenhäuser //    |      v         |
1111e0be76eSMichael Halkenhäuser //    |  CheckNextBB   |
1121e0be76eSMichael Halkenhäuser //     \     |   \_____/
1131e0be76eSMichael Halkenhäuser //      \    |
1141e0be76eSMichael Halkenhäuser //       v   v
11589251edeSMichael Kruse //       ExitBB
11689251edeSMichael Kruse //
11789251edeSMichael Kruse // HeaderBB will hold allocations, loading of variables and kmp-init calls.
1181e0be76eSMichael Halkenhäuser // CheckNextBB will check for more work (dynamic / static chunked) or will be
1191e0be76eSMichael Halkenhäuser // empty (static non chunked).
12089251edeSMichael Kruse // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB.
12189251edeSMichael Kruse // PreHeaderBB loads the new boundaries (& will lead to the loop body later on).
1221e0be76eSMichael Halkenhäuser // Just like CheckNextBB: PreHeaderBB is (preliminary) empty in the static non
1231e0be76eSMichael Halkenhäuser // chunked scheduling case. ExitBB marks the end of the parallel execution.
12489251edeSMichael Kruse // The possibly empty BasicBlocks will automatically be removed.
12589251edeSMichael Kruse std::tuple<Value *, Function *>
1261e0be76eSMichael Halkenhäuser ParallelLoopGeneratorKMP::createSubFn(Value *SequentialLoopStride,
12789251edeSMichael Kruse                                       AllocaInst *StructData,
12889251edeSMichael Kruse                                       SetVector<Value *> Data, ValueMapT &Map) {
12989251edeSMichael Kruse   Function *SubFn = createSubFnDefinition();
13089251edeSMichael Kruse   LLVMContext &Context = SubFn->getContext();
13189251edeSMichael Kruse 
13289251edeSMichael Kruse   // Create basic blocks.
13389251edeSMichael Kruse   BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
13422c77f23SMichael Kruse   SubFnDT = std::make_unique<DominatorTree>(*SubFn);
13522c77f23SMichael Kruse   SubFnLI = std::make_unique<LoopInfo>(*SubFnDT);
13622c77f23SMichael Kruse 
13789251edeSMichael Kruse   BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
13889251edeSMichael Kruse   BasicBlock *CheckNextBB =
13989251edeSMichael Kruse       BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
14089251edeSMichael Kruse   BasicBlock *PreHeaderBB =
14189251edeSMichael Kruse       BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
14289251edeSMichael Kruse 
14322c77f23SMichael Kruse   SubFnDT->addNewBlock(ExitBB, HeaderBB);
14422c77f23SMichael Kruse   SubFnDT->addNewBlock(CheckNextBB, HeaderBB);
14522c77f23SMichael Kruse   SubFnDT->addNewBlock(PreHeaderBB, HeaderBB);
14689251edeSMichael Kruse 
14789251edeSMichael Kruse   // Fill up basic block HeaderBB.
14889251edeSMichael Kruse   Builder.SetInsertPoint(HeaderBB);
14989251edeSMichael Kruse   Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
15089251edeSMichael Kruse   Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
15189251edeSMichael Kruse   Value *IsLastPtr = Builder.CreateAlloca(Builder.getInt32Ty(), nullptr,
15289251edeSMichael Kruse                                           "polly.par.lastIterPtr");
15389251edeSMichael Kruse   Value *StridePtr =
15489251edeSMichael Kruse       Builder.CreateAlloca(LongType, nullptr, "polly.par.StridePtr");
15589251edeSMichael Kruse 
15689251edeSMichael Kruse   // Get iterator for retrieving the previously defined parameters.
15789251edeSMichael Kruse   Function::arg_iterator AI = SubFn->arg_begin();
15889251edeSMichael Kruse   // First argument holds "global thread ID".
15989251edeSMichael Kruse   Value *IDPtr = &*AI;
16089251edeSMichael Kruse   // Skip "bound thread ID" since it is not used (but had to be defined).
16189251edeSMichael Kruse   std::advance(AI, 2);
16289251edeSMichael Kruse   // Move iterator to: LB, UB, Stride, Shared variable struct.
16389251edeSMichael Kruse   Value *LB = &*AI;
16489251edeSMichael Kruse   std::advance(AI, 1);
16589251edeSMichael Kruse   Value *UB = &*AI;
16689251edeSMichael Kruse   std::advance(AI, 1);
16789251edeSMichael Kruse   Value *Stride = &*AI;
16889251edeSMichael Kruse   std::advance(AI, 1);
16989251edeSMichael Kruse   Value *Shared = &*AI;
17089251edeSMichael Kruse 
17118680a36SNikita Popov   extractValuesFromStruct(Data, StructData->getAllocatedType(), Shared, Map);
17289251edeSMichael Kruse 
17359f95222SGuillaume Chatelet   const auto Alignment = llvm::Align(is64BitArch() ? 8 : 4);
17446354bacSNikita Popov   Value *ID = Builder.CreateAlignedLoad(Builder.getInt32Ty(), IDPtr, Alignment,
17546354bacSNikita Popov                                         "polly.par.global_tid");
17689251edeSMichael Kruse 
17789251edeSMichael Kruse   Builder.CreateAlignedStore(LB, LBPtr, Alignment);
17889251edeSMichael Kruse   Builder.CreateAlignedStore(UB, UBPtr, Alignment);
17989251edeSMichael Kruse   Builder.CreateAlignedStore(Builder.getInt32(0), IsLastPtr, Alignment);
18089251edeSMichael Kruse   Builder.CreateAlignedStore(Stride, StridePtr, Alignment);
18189251edeSMichael Kruse 
18289251edeSMichael Kruse   // Subtract one as the upper bound provided by openmp is a < comparison
18389251edeSMichael Kruse   // whereas the codegenForSequential function creates a <= comparison.
18489251edeSMichael Kruse   Value *AdjustedUB = Builder.CreateAdd(UB, ConstantInt::get(LongType, -1),
18589251edeSMichael Kruse                                         "polly.indvar.UBAdjusted");
18689251edeSMichael Kruse 
18789251edeSMichael Kruse   Value *ChunkSize =
18889251edeSMichael Kruse       ConstantInt::get(LongType, std::max<int>(PollyChunkSize, 1));
18989251edeSMichael Kruse 
1901e0be76eSMichael Halkenhäuser   OMPGeneralSchedulingType Scheduling =
1911e0be76eSMichael Halkenhäuser       getSchedType(PollyChunkSize, PollyScheduling);
1921e0be76eSMichael Halkenhäuser 
1931e0be76eSMichael Halkenhäuser   switch (Scheduling) {
19489251edeSMichael Kruse   case OMPGeneralSchedulingType::Dynamic:
19589251edeSMichael Kruse   case OMPGeneralSchedulingType::Guided:
19689251edeSMichael Kruse   case OMPGeneralSchedulingType::Runtime:
19789251edeSMichael Kruse     // "DYNAMIC" scheduling types are handled below (including 'runtime')
19889251edeSMichael Kruse     {
19989251edeSMichael Kruse       UB = AdjustedUB;
20089251edeSMichael Kruse       createCallDispatchInit(ID, LB, UB, Stride, ChunkSize);
20189251edeSMichael Kruse       Value *HasWork =
20289251edeSMichael Kruse           createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr);
20389251edeSMichael Kruse       Value *HasIteration =
20489251edeSMichael Kruse           Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork,
20589251edeSMichael Kruse                              Builder.getInt32(1), "polly.hasIteration");
20689251edeSMichael Kruse       Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
20789251edeSMichael Kruse 
20889251edeSMichael Kruse       Builder.SetInsertPoint(CheckNextBB);
20989251edeSMichael Kruse       HasWork = createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr);
21089251edeSMichael Kruse       HasIteration =
21189251edeSMichael Kruse           Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork,
21289251edeSMichael Kruse                              Builder.getInt32(1), "polly.hasWork");
21389251edeSMichael Kruse       Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
21489251edeSMichael Kruse 
21589251edeSMichael Kruse       Builder.SetInsertPoint(PreHeaderBB);
21646354bacSNikita Popov       LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment,
21746354bacSNikita Popov                                      "polly.indvar.LB");
21846354bacSNikita Popov       UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment,
21946354bacSNikita Popov                                      "polly.indvar.UB");
22089251edeSMichael Kruse     }
22189251edeSMichael Kruse     break;
22289251edeSMichael Kruse   case OMPGeneralSchedulingType::StaticChunked:
22389251edeSMichael Kruse   case OMPGeneralSchedulingType::StaticNonChunked:
22489251edeSMichael Kruse     // "STATIC" scheduling types are handled below
22589251edeSMichael Kruse     {
2261e0be76eSMichael Halkenhäuser       Builder.CreateAlignedStore(AdjustedUB, UBPtr, Alignment);
22789251edeSMichael Kruse       createCallStaticInit(ID, IsLastPtr, LBPtr, UBPtr, StridePtr, ChunkSize);
22889251edeSMichael Kruse 
22946354bacSNikita Popov       Value *ChunkedStride = Builder.CreateAlignedLoad(
23046354bacSNikita Popov           LongType, StridePtr, Alignment, "polly.kmpc.stride");
2311e0be76eSMichael Halkenhäuser 
23246354bacSNikita Popov       LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment,
23346354bacSNikita Popov                                      "polly.indvar.LB");
23446354bacSNikita Popov       UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment,
23546354bacSNikita Popov                                      "polly.indvar.UB.temp");
23689251edeSMichael Kruse 
2371e0be76eSMichael Halkenhäuser       Value *UBInRange =
2381e0be76eSMichael Halkenhäuser           Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLE, UB, AdjustedUB,
2391e0be76eSMichael Halkenhäuser                              "polly.indvar.UB.inRange");
2401e0be76eSMichael Halkenhäuser       UB = Builder.CreateSelect(UBInRange, UB, AdjustedUB, "polly.indvar.UB");
24189251edeSMichael Kruse       Builder.CreateAlignedStore(UB, UBPtr, Alignment);
24289251edeSMichael Kruse 
24389251edeSMichael Kruse       Value *HasIteration = Builder.CreateICmp(
24489251edeSMichael Kruse           llvm::CmpInst::Predicate::ICMP_SLE, LB, UB, "polly.hasIteration");
24589251edeSMichael Kruse       Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
24689251edeSMichael Kruse 
2471e0be76eSMichael Halkenhäuser       if (Scheduling == OMPGeneralSchedulingType::StaticChunked) {
2481e0be76eSMichael Halkenhäuser         Builder.SetInsertPoint(PreHeaderBB);
24946354bacSNikita Popov         LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment,
2501e0be76eSMichael Halkenhäuser                                        "polly.indvar.LB.entry");
25146354bacSNikita Popov         UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment,
2521e0be76eSMichael Halkenhäuser                                        "polly.indvar.UB.entry");
2531e0be76eSMichael Halkenhäuser       }
2541e0be76eSMichael Halkenhäuser 
25589251edeSMichael Kruse       Builder.SetInsertPoint(CheckNextBB);
2561e0be76eSMichael Halkenhäuser 
2571e0be76eSMichael Halkenhäuser       if (Scheduling == OMPGeneralSchedulingType::StaticChunked) {
2581e0be76eSMichael Halkenhäuser         Value *NextLB =
2591e0be76eSMichael Halkenhäuser             Builder.CreateAdd(LB, ChunkedStride, "polly.indvar.nextLB");
2601e0be76eSMichael Halkenhäuser         Value *NextUB = Builder.CreateAdd(UB, ChunkedStride);
2611e0be76eSMichael Halkenhäuser 
2621e0be76eSMichael Halkenhäuser         Value *NextUBOutOfBounds =
2631e0be76eSMichael Halkenhäuser             Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SGT, NextUB,
2641e0be76eSMichael Halkenhäuser                                AdjustedUB, "polly.indvar.nextUB.outOfBounds");
2651e0be76eSMichael Halkenhäuser         NextUB = Builder.CreateSelect(NextUBOutOfBounds, AdjustedUB, NextUB,
2661e0be76eSMichael Halkenhäuser                                       "polly.indvar.nextUB");
2671e0be76eSMichael Halkenhäuser 
2681e0be76eSMichael Halkenhäuser         Builder.CreateAlignedStore(NextLB, LBPtr, Alignment);
2691e0be76eSMichael Halkenhäuser         Builder.CreateAlignedStore(NextUB, UBPtr, Alignment);
2701e0be76eSMichael Halkenhäuser 
2711e0be76eSMichael Halkenhäuser         Value *HasWork =
2721e0be76eSMichael Halkenhäuser             Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLE, NextLB,
2731e0be76eSMichael Halkenhäuser                                AdjustedUB, "polly.hasWork");
2741e0be76eSMichael Halkenhäuser         Builder.CreateCondBr(HasWork, PreHeaderBB, ExitBB);
2751e0be76eSMichael Halkenhäuser       } else {
27689251edeSMichael Kruse         Builder.CreateBr(ExitBB);
2771e0be76eSMichael Halkenhäuser       }
27889251edeSMichael Kruse 
27989251edeSMichael Kruse       Builder.SetInsertPoint(PreHeaderBB);
28089251edeSMichael Kruse     }
28189251edeSMichael Kruse     break;
28289251edeSMichael Kruse   }
28389251edeSMichael Kruse 
28489251edeSMichael Kruse   Builder.CreateBr(CheckNextBB);
28589251edeSMichael Kruse   Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
28689251edeSMichael Kruse   BasicBlock *AfterBB;
28722c77f23SMichael Kruse   Value *IV = createLoop(LB, UB, SequentialLoopStride, Builder, *SubFnLI,
28822c77f23SMichael Kruse                          *SubFnDT, AfterBB, ICmpInst::ICMP_SLE, nullptr, true,
28989251edeSMichael Kruse                          /* UseGuard */ false);
29089251edeSMichael Kruse 
29189251edeSMichael Kruse   BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
29289251edeSMichael Kruse 
29389251edeSMichael Kruse   // Add code to terminate this subfunction.
29489251edeSMichael Kruse   Builder.SetInsertPoint(ExitBB);
29589251edeSMichael Kruse   // Static (i.e. non-dynamic) scheduling types, are terminated with a fini-call
2961e0be76eSMichael Halkenhäuser   if (Scheduling == OMPGeneralSchedulingType::StaticChunked ||
2971e0be76eSMichael Halkenhäuser       Scheduling == OMPGeneralSchedulingType::StaticNonChunked) {
29889251edeSMichael Kruse     createCallStaticFini(ID);
29989251edeSMichael Kruse   }
30089251edeSMichael Kruse   Builder.CreateRetVoid();
30189251edeSMichael Kruse   Builder.SetInsertPoint(&*LoopBody);
30289251edeSMichael Kruse 
30322c77f23SMichael Kruse   // FIXME: Call SubFnDT->verify() and SubFnLI->verify() to check that the
30422c77f23SMichael Kruse   // DominatorTree/LoopInfo has been created correctly. Alternatively, recreate
30522c77f23SMichael Kruse   // from scratch since it is not needed here directly.
30622c77f23SMichael Kruse 
30789251edeSMichael Kruse   return std::make_tuple(IV, SubFn);
30889251edeSMichael Kruse }
30989251edeSMichael Kruse 
31089251edeSMichael Kruse Value *ParallelLoopGeneratorKMP::createCallGlobalThreadNum() {
31189251edeSMichael Kruse   const std::string Name = "__kmpc_global_thread_num";
31289251edeSMichael Kruse   Function *F = M->getFunction(Name);
31389251edeSMichael Kruse 
31489251edeSMichael Kruse   // If F is not available, declare it.
31589251edeSMichael Kruse   if (!F) {
31689251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
317*2ab2539cSYoungsuk Kim     Type *Params[] = {Builder.getPtrTy(0)};
31889251edeSMichael Kruse 
31989251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false);
32089251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
32189251edeSMichael Kruse   }
32289251edeSMichael Kruse 
323fe0e5b3eSMichael Kruse   CallInst *Call = Builder.CreateCall(F, {SourceLocationInfo});
324fe0e5b3eSMichael Kruse   Call->setDebugLoc(DLGenerated);
325fe0e5b3eSMichael Kruse   return Call;
32689251edeSMichael Kruse }
32789251edeSMichael Kruse 
32889251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallPushNumThreads(Value *GlobalThreadID,
32989251edeSMichael Kruse                                                         Value *NumThreads) {
33089251edeSMichael Kruse   const std::string Name = "__kmpc_push_num_threads";
33189251edeSMichael Kruse   Function *F = M->getFunction(Name);
33289251edeSMichael Kruse 
33389251edeSMichael Kruse   // If F is not available, declare it.
33489251edeSMichael Kruse   if (!F) {
33589251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
336*2ab2539cSYoungsuk Kim     Type *Params[] = {Builder.getPtrTy(0), Builder.getInt32Ty(),
33789251edeSMichael Kruse                       Builder.getInt32Ty()};
33889251edeSMichael Kruse 
33989251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
34089251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
34189251edeSMichael Kruse   }
34289251edeSMichael Kruse 
34389251edeSMichael Kruse   Value *Args[] = {SourceLocationInfo, GlobalThreadID, NumThreads};
34489251edeSMichael Kruse 
345fe0e5b3eSMichael Kruse   CallInst *Call = Builder.CreateCall(F, Args);
346fe0e5b3eSMichael Kruse   Call->setDebugLoc(DLGenerated);
34789251edeSMichael Kruse }
34889251edeSMichael Kruse 
34989251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallStaticInit(Value *GlobalThreadID,
35089251edeSMichael Kruse                                                     Value *IsLastPtr,
35189251edeSMichael Kruse                                                     Value *LBPtr, Value *UBPtr,
35289251edeSMichael Kruse                                                     Value *StridePtr,
35389251edeSMichael Kruse                                                     Value *ChunkSize) {
35489251edeSMichael Kruse   const std::string Name =
35589251edeSMichael Kruse       is64BitArch() ? "__kmpc_for_static_init_8" : "__kmpc_for_static_init_4";
35689251edeSMichael Kruse   Function *F = M->getFunction(Name);
35789251edeSMichael Kruse 
35889251edeSMichael Kruse   // If F is not available, declare it.
35989251edeSMichael Kruse   if (!F) {
36089251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
36189251edeSMichael Kruse 
362*2ab2539cSYoungsuk Kim     Type *Params[] = {Builder.getPtrTy(0),
36389251edeSMichael Kruse                       Builder.getInt32Ty(),
36489251edeSMichael Kruse                       Builder.getInt32Ty(),
365*2ab2539cSYoungsuk Kim                       Builder.getPtrTy(0),
366*2ab2539cSYoungsuk Kim                       Builder.getPtrTy(0),
367*2ab2539cSYoungsuk Kim                       Builder.getPtrTy(0),
368*2ab2539cSYoungsuk Kim                       Builder.getPtrTy(0),
36989251edeSMichael Kruse                       LongType,
37089251edeSMichael Kruse                       LongType};
37189251edeSMichael Kruse 
37289251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
37389251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
37489251edeSMichael Kruse   }
37589251edeSMichael Kruse 
37689251edeSMichael Kruse   // The parameter 'ChunkSize' will hold strictly positive integer values,
37789251edeSMichael Kruse   // regardless of PollyChunkSize's value
37889251edeSMichael Kruse   Value *Args[] = {
37989251edeSMichael Kruse       SourceLocationInfo,
38089251edeSMichael Kruse       GlobalThreadID,
38189251edeSMichael Kruse       Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))),
38289251edeSMichael Kruse       IsLastPtr,
38389251edeSMichael Kruse       LBPtr,
38489251edeSMichael Kruse       UBPtr,
38589251edeSMichael Kruse       StridePtr,
38689251edeSMichael Kruse       ConstantInt::get(LongType, 1),
38789251edeSMichael Kruse       ChunkSize};
38889251edeSMichael Kruse 
389fe0e5b3eSMichael Kruse   CallInst *Call = Builder.CreateCall(F, Args);
390fe0e5b3eSMichael Kruse   Call->setDebugLoc(DLGenerated);
39189251edeSMichael Kruse }
39289251edeSMichael Kruse 
39389251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallStaticFini(Value *GlobalThreadID) {
39489251edeSMichael Kruse   const std::string Name = "__kmpc_for_static_fini";
39589251edeSMichael Kruse   Function *F = M->getFunction(Name);
39689251edeSMichael Kruse 
39789251edeSMichael Kruse   // If F is not available, declare it.
39889251edeSMichael Kruse   if (!F) {
39989251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
400*2ab2539cSYoungsuk Kim     Type *Params[] = {Builder.getPtrTy(0), Builder.getInt32Ty()};
40189251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
40289251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
40389251edeSMichael Kruse   }
40489251edeSMichael Kruse 
40589251edeSMichael Kruse   Value *Args[] = {SourceLocationInfo, GlobalThreadID};
40689251edeSMichael Kruse 
407fe0e5b3eSMichael Kruse   CallInst *Call = Builder.CreateCall(F, Args);
408fe0e5b3eSMichael Kruse   Call->setDebugLoc(DLGenerated);
40989251edeSMichael Kruse }
41089251edeSMichael Kruse 
41189251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallDispatchInit(Value *GlobalThreadID,
41289251edeSMichael Kruse                                                       Value *LB, Value *UB,
41389251edeSMichael Kruse                                                       Value *Inc,
41489251edeSMichael Kruse                                                       Value *ChunkSize) {
41589251edeSMichael Kruse   const std::string Name =
41689251edeSMichael Kruse       is64BitArch() ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_4";
41789251edeSMichael Kruse   Function *F = M->getFunction(Name);
41889251edeSMichael Kruse 
41989251edeSMichael Kruse   // If F is not available, declare it.
42089251edeSMichael Kruse   if (!F) {
42189251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
42289251edeSMichael Kruse 
423*2ab2539cSYoungsuk Kim     Type *Params[] = {Builder.getPtrTy(0),
42489251edeSMichael Kruse                       Builder.getInt32Ty(),
42589251edeSMichael Kruse                       Builder.getInt32Ty(),
42689251edeSMichael Kruse                       LongType,
42789251edeSMichael Kruse                       LongType,
42889251edeSMichael Kruse                       LongType,
42989251edeSMichael Kruse                       LongType};
43089251edeSMichael Kruse 
43189251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
43289251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
43389251edeSMichael Kruse   }
43489251edeSMichael Kruse 
43589251edeSMichael Kruse   // The parameter 'ChunkSize' will hold strictly positive integer values,
43689251edeSMichael Kruse   // regardless of PollyChunkSize's value
43789251edeSMichael Kruse   Value *Args[] = {
43889251edeSMichael Kruse       SourceLocationInfo,
43989251edeSMichael Kruse       GlobalThreadID,
44089251edeSMichael Kruse       Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))),
44189251edeSMichael Kruse       LB,
44289251edeSMichael Kruse       UB,
44389251edeSMichael Kruse       Inc,
44489251edeSMichael Kruse       ChunkSize};
44589251edeSMichael Kruse 
446fe0e5b3eSMichael Kruse   CallInst *Call = Builder.CreateCall(F, Args);
447fe0e5b3eSMichael Kruse   Call->setDebugLoc(DLGenerated);
44889251edeSMichael Kruse }
44989251edeSMichael Kruse 
45089251edeSMichael Kruse Value *ParallelLoopGeneratorKMP::createCallDispatchNext(Value *GlobalThreadID,
45189251edeSMichael Kruse                                                         Value *IsLastPtr,
45289251edeSMichael Kruse                                                         Value *LBPtr,
45389251edeSMichael Kruse                                                         Value *UBPtr,
45489251edeSMichael Kruse                                                         Value *StridePtr) {
45589251edeSMichael Kruse   const std::string Name =
45689251edeSMichael Kruse       is64BitArch() ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_4";
45789251edeSMichael Kruse   Function *F = M->getFunction(Name);
45889251edeSMichael Kruse 
45989251edeSMichael Kruse   // If F is not available, declare it.
46089251edeSMichael Kruse   if (!F) {
46189251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
46289251edeSMichael Kruse 
463*2ab2539cSYoungsuk Kim     Type *Params[] = {Builder.getPtrTy(0), Builder.getInt32Ty(),
464*2ab2539cSYoungsuk Kim                       Builder.getPtrTy(0), Builder.getPtrTy(0),
465*2ab2539cSYoungsuk Kim                       Builder.getPtrTy(0), Builder.getPtrTy(0)};
46689251edeSMichael Kruse 
46789251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false);
46889251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
46989251edeSMichael Kruse   }
47089251edeSMichael Kruse 
47189251edeSMichael Kruse   Value *Args[] = {SourceLocationInfo, GlobalThreadID, IsLastPtr, LBPtr, UBPtr,
47289251edeSMichael Kruse                    StridePtr};
47389251edeSMichael Kruse 
474fe0e5b3eSMichael Kruse   CallInst *Call = Builder.CreateCall(F, Args);
475fe0e5b3eSMichael Kruse   Call->setDebugLoc(DLGenerated);
476fe0e5b3eSMichael Kruse   return Call;
47789251edeSMichael Kruse }
47889251edeSMichael Kruse 
47989251edeSMichael Kruse // TODO: This function currently creates a source location dummy. It might be
48089251edeSMichael Kruse // necessary to (actually) provide information, in the future.
48189251edeSMichael Kruse GlobalVariable *ParallelLoopGeneratorKMP::createSourceLocation() {
48289251edeSMichael Kruse   const std::string LocName = ".loc.dummy";
48389251edeSMichael Kruse   GlobalVariable *SourceLocDummy = M->getGlobalVariable(LocName);
48489251edeSMichael Kruse 
48589251edeSMichael Kruse   if (SourceLocDummy == nullptr) {
48689251edeSMichael Kruse     const std::string StructName = "struct.ident_t";
487fe431683SNick Lewycky     StructType *IdentTy =
488fe431683SNick Lewycky         StructType::getTypeByName(M->getContext(), StructName);
48989251edeSMichael Kruse 
49089251edeSMichael Kruse     // If the ident_t StructType is not available, declare it.
49189251edeSMichael Kruse     // in LLVM-IR: ident_t = type { i32, i32, i32, i32, i8* }
49289251edeSMichael Kruse     if (!IdentTy) {
49389251edeSMichael Kruse       Type *LocMembers[] = {Builder.getInt32Ty(), Builder.getInt32Ty(),
49489251edeSMichael Kruse                             Builder.getInt32Ty(), Builder.getInt32Ty(),
495a3ef8589SFangrui Song                             Builder.getPtrTy()};
49689251edeSMichael Kruse 
49789251edeSMichael Kruse       IdentTy =
49889251edeSMichael Kruse           StructType::create(M->getContext(), LocMembers, StructName, false);
49989251edeSMichael Kruse     }
50089251edeSMichael Kruse 
50189251edeSMichael Kruse     const auto ArrayType =
50289251edeSMichael Kruse         llvm::ArrayType::get(Builder.getInt8Ty(), /* Length */ 23);
50389251edeSMichael Kruse 
50489251edeSMichael Kruse     // Global Variable Definitions
505fb7cf900SKazu Hirata     GlobalVariable *StrVar =
506fb7cf900SKazu Hirata         new GlobalVariable(*M, ArrayType, true, GlobalValue::PrivateLinkage,
507fb7cf900SKazu Hirata                            nullptr, ".str.ident");
508805c157eSGuillaume Chatelet     StrVar->setAlignment(llvm::Align(1));
50989251edeSMichael Kruse 
51089251edeSMichael Kruse     SourceLocDummy = new GlobalVariable(
51189251edeSMichael Kruse         *M, IdentTy, true, GlobalValue::PrivateLinkage, nullptr, LocName);
5120e62011dSGuillaume Chatelet     SourceLocDummy->setAlignment(llvm::Align(8));
51389251edeSMichael Kruse 
51489251edeSMichael Kruse     // Constant Definitions
51589251edeSMichael Kruse     Constant *InitStr = ConstantDataArray::getString(
51689251edeSMichael Kruse         M->getContext(), "Source location dummy.", true);
51789251edeSMichael Kruse 
51889251edeSMichael Kruse     Constant *StrPtr = static_cast<Constant *>(Builder.CreateInBoundsGEP(
51989251edeSMichael Kruse         ArrayType, StrVar, {Builder.getInt32(0), Builder.getInt32(0)}));
52089251edeSMichael Kruse 
52189251edeSMichael Kruse     Constant *LocInitStruct = ConstantStruct::get(
52289251edeSMichael Kruse         IdentTy, {Builder.getInt32(0), Builder.getInt32(0), Builder.getInt32(0),
52389251edeSMichael Kruse                   Builder.getInt32(0), StrPtr});
52489251edeSMichael Kruse 
52589251edeSMichael Kruse     // Initialize variables
52689251edeSMichael Kruse     StrVar->setInitializer(InitStr);
52789251edeSMichael Kruse     SourceLocDummy->setInitializer(LocInitStruct);
52889251edeSMichael Kruse   }
52989251edeSMichael Kruse 
53089251edeSMichael Kruse   return SourceLocDummy;
53189251edeSMichael Kruse }
53289251edeSMichael Kruse 
53389251edeSMichael Kruse bool ParallelLoopGeneratorKMP::is64BitArch() {
53489251edeSMichael Kruse   return (LongType->getIntegerBitWidth() == 64);
53589251edeSMichael Kruse }
53689251edeSMichael Kruse 
53789251edeSMichael Kruse OMPGeneralSchedulingType ParallelLoopGeneratorKMP::getSchedType(
53889251edeSMichael Kruse     int ChunkSize, OMPGeneralSchedulingType Scheduling) const {
53989251edeSMichael Kruse   if (ChunkSize == 0 && Scheduling == OMPGeneralSchedulingType::StaticChunked)
54089251edeSMichael Kruse     return OMPGeneralSchedulingType::StaticNonChunked;
54189251edeSMichael Kruse 
54289251edeSMichael Kruse   return Scheduling;
54389251edeSMichael Kruse }
544