1 //===- LoopGenerators.h - IR helper to create loops -------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains functions to create scalar and OpenMP parallel loops 10 // as LLVM-IR. 11 // 12 //===----------------------------------------------------------------------===// 13 #ifndef POLLY_LOOP_GENERATORS_H 14 #define POLLY_LOOP_GENERATORS_H 15 16 #include "polly/CodeGen/IRBuilder.h" 17 #include "polly/Support/ScopHelper.h" 18 #include "llvm/ADT/SetVector.h" 19 20 namespace polly { 21 using llvm::AllocaInst; 22 using llvm::BasicBlock; 23 using llvm::DataLayout; 24 using llvm::DominatorTree; 25 using llvm::Function; 26 using llvm::ICmpInst; 27 using llvm::LoopInfo; 28 using llvm::Module; 29 using llvm::SetVector; 30 using llvm::Type; 31 using llvm::Value; 32 33 /// General scheduling types of parallel OpenMP for loops. 34 /// Initialization values taken from OpenMP's enum in kmp.h: sched_type. 35 /// Currently, only 'static' scheduling may change from chunked to non-chunked. 36 enum class OMPGeneralSchedulingType { 37 StaticChunked = 33, 38 StaticNonChunked = 34, 39 Dynamic = 35, 40 Guided = 36, 41 Runtime = 37 42 }; 43 44 extern int PollyNumThreads; 45 extern OMPGeneralSchedulingType PollyScheduling; 46 extern int PollyChunkSize; 47 48 /// Create a scalar do/for-style loop. 49 /// 50 /// @param LowerBound The starting value of the induction variable. 51 /// @param UpperBound The upper bound of the induction variable. 52 /// @param Stride The value by which the induction variable 53 /// is incremented. 54 /// 55 /// @param Builder The builder used to create the loop. 56 /// @param P A pointer to the pass that uses this function. 57 /// It is used to update analysis information. 58 /// @param LI The loop info we need to update 59 /// @param DT The dominator tree we need to update 60 /// @param ExitBlock The block the loop will exit to. 61 /// @param Predicate The predicate used to generate the upper loop 62 /// bound. 63 /// @param Annotator This function can (optionally) take 64 /// a ScopAnnotator which 65 /// annotates loops and alias information in the SCoP. 66 /// @param Parallel If this loop should be marked parallel in 67 /// the Annotator. 68 /// @param UseGuard Create a guard in front of the header to check if 69 /// the loop is executed at least once, otherwise just 70 /// assume it. 71 /// @param LoopVectDisabled If the Loop vectorizer should be disabled for this 72 /// loop. 73 /// 74 /// @return Value* The newly created induction variable for this loop. 75 Value *createLoop(Value *LowerBound, Value *UpperBound, Value *Stride, 76 PollyIRBuilder &Builder, LoopInfo &LI, DominatorTree &DT, 77 BasicBlock *&ExitBlock, ICmpInst::Predicate Predicate, 78 ScopAnnotator *Annotator = nullptr, bool Parallel = false, 79 bool UseGuard = true, bool LoopVectDisabled = false); 80 81 /// Create a DebugLoc representing generated instructions. 82 /// 83 /// The IR verifier requires !dbg metadata to be set in some situations. For 84 /// instance, if an (inlinable) function has debug info, all its call site must 85 /// have debug info as well. 86 llvm::DebugLoc createDebugLocForGeneratedCode(Function *F); 87 88 /// The ParallelLoopGenerator allows to create parallelized loops 89 /// 90 /// To parallelize a loop, we perform the following steps: 91 /// o Generate a subfunction which will hold the loop body. 92 /// o Create a struct to hold all outer values needed in the loop body. 93 /// o Create calls to a runtime library to achieve the actual parallelism. 94 /// These calls will spawn and join threads, define how the work (here the 95 /// iterations) are distributed between them and make sure each has access 96 /// to the struct holding all needed values. 97 /// 98 /// At the moment we support only one parallel runtime, OpenMP. 99 /// 100 /// If we parallelize the outer loop of the following loop nest, 101 /// 102 /// S0; 103 /// for (int i = 0; i < N; i++) 104 /// for (int j = 0; j < M; j++) 105 /// S1(i, j); 106 /// S2; 107 /// 108 /// we will generate the following code (with different runtime function names): 109 /// 110 /// S0; 111 /// auto *values = storeValuesIntoStruct(); 112 /// // Execute subfunction with multiple threads 113 /// spawn_threads(subfunction, values); 114 /// join_threads(); 115 /// S2; 116 /// 117 /// // This function is executed in parallel by different threads 118 /// void subfunction(values) { 119 /// while (auto *WorkItem = getWorkItem()) { 120 /// int LB = WorkItem.begin(); 121 /// int UB = WorkItem.end(); 122 /// for (int i = LB; i < UB; i++) 123 /// for (int j = 0; j < M; j++) 124 /// S1(i, j); 125 /// } 126 /// cleanup_thread(); 127 /// } 128 class ParallelLoopGenerator { 129 public: 130 /// Create a parallel loop generator for the current function. 131 ParallelLoopGenerator(PollyIRBuilder &Builder, const DataLayout &DL) 132 : Builder(Builder), LongType(Type::getIntNTy(Builder.getContext(), 133 DL.getPointerSizeInBits())), 134 M(Builder.GetInsertBlock()->getParent()->getParent()), 135 DLGenerated(createDebugLocForGeneratedCode( 136 Builder.GetInsertBlock()->getParent())) {} 137 138 virtual ~ParallelLoopGenerator() {} 139 140 /// Create a parallel loop. 141 /// 142 /// This function is the main function to automatically generate a parallel 143 /// loop with all its components. 144 /// 145 /// @param LB The lower bound for the loop we parallelize. 146 /// @param UB The upper bound for the loop we parallelize. 147 /// @param Stride The stride of the loop we parallelize. 148 /// @param Values A set of LLVM-IR Values that should be available in 149 /// the new loop body. 150 /// @param VMap A map to allow outside access to the new versions of 151 /// the values in @p Values. 152 /// @param LoopBody A pointer to an iterator that is set to point to the 153 /// body of the created loop. It should be used to insert 154 /// instructions that form the actual loop body. 155 /// 156 /// @return The newly created induction variable for this loop. 157 Value *createParallelLoop(Value *LB, Value *UB, Value *Stride, 158 SetVector<Value *> &Values, ValueMapT &VMap, 159 BasicBlock::iterator *LoopBody); 160 161 protected: 162 /// The IR builder we use to create instructions. 163 PollyIRBuilder &Builder; 164 165 /// The loop info for the generated subfunction. 166 std::unique_ptr<LoopInfo> SubFnLI; 167 168 /// The dominance tree for the generated subfunction. 169 std::unique_ptr<DominatorTree> SubFnDT; 170 171 /// The type of a "long" on this hardware used for backend calls. 172 Type *LongType; 173 174 /// The current module 175 Module *M; 176 177 /// Debug location for generated code without direct link to any specific 178 /// line. 179 /// 180 /// We only set the DebugLoc where the IR Verifier requires us to. Otherwise, 181 /// absent debug location for optimized code should be fine. 182 llvm::DebugLoc DLGenerated; 183 184 public: 185 /// Returns the DominatorTree for the generated subfunction. 186 DominatorTree *getCalleeDominatorTree() const { return SubFnDT.get(); } 187 188 /// Returns the LoopInfo for the generated subfunction. 189 LoopInfo *getCalleeLoopInfo() const { return SubFnLI.get(); } 190 191 /// Create a struct for all @p Values and store them in there. 192 /// 193 /// @param Values The values which should be stored in the struct. 194 /// 195 /// @return The created struct. 196 AllocaInst *storeValuesIntoStruct(SetVector<Value *> &Values); 197 198 /// Extract all values from the @p Struct and construct the mapping. 199 /// 200 /// @param Values The values which were stored in the struct. 201 /// @param Struct The struct holding all the values in @p Values. 202 /// @param VMap A map to associate every element of @p Values with the 203 /// new llvm value loaded from the @p Struct. 204 void extractValuesFromStruct(SetVector<Value *> Values, Type *Ty, 205 Value *Struct, ValueMapT &VMap); 206 207 /// Create the definition of the parallel subfunction. 208 /// 209 /// @return A pointer to the subfunction. 210 Function *createSubFnDefinition(); 211 212 /// Create the runtime library calls for spawn and join of the worker threads. 213 /// Additionally, places a call to the specified subfunction. 214 /// 215 /// @param SubFn The subfunction which holds the loop body. 216 /// @param SubFnParam The parameter for the subfunction (basically the struct 217 /// filled with the outside values). 218 /// @param LB The lower bound for the loop we parallelize. 219 /// @param UB The upper bound for the loop we parallelize. 220 /// @param Stride The stride of the loop we parallelize. 221 virtual void deployParallelExecution(Function *SubFn, Value *SubFnParam, 222 Value *LB, Value *UB, Value *Stride) = 0; 223 224 /// Prepare the definition of the parallel subfunction. 225 /// Creates the argument list and names them (as well as the subfunction). 226 /// 227 /// @param F A pointer to the (parallel) subfunction's parent function. 228 /// 229 /// @return The pointer to the (parallel) subfunction. 230 virtual Function *prepareSubFnDefinition(Function *F) const = 0; 231 232 /// Create the parallel subfunction. 233 /// 234 /// @param Stride The induction variable increment. 235 /// @param Struct A struct holding all values in @p Values. 236 /// @param Values A set of LLVM-IR Values that should be available in 237 /// the new loop body. 238 /// @param VMap A map to allow outside access to the new versions of 239 /// the values in @p Values. 240 /// @param SubFn The newly created subfunction is returned here. 241 /// 242 /// @return The newly created induction variable. 243 virtual std::tuple<Value *, Function *> 244 createSubFn(Value *Stride, AllocaInst *Struct, SetVector<Value *> UsedValues, 245 ValueMapT &VMap) = 0; 246 }; 247 } // end namespace polly 248 #endif 249