1*5ffd83dbSDimitry Andric //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// 2*5ffd83dbSDimitry Andric // 3*5ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*5ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*5ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*5ffd83dbSDimitry Andric // 7*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 8*5ffd83dbSDimitry Andric // 9*5ffd83dbSDimitry Andric // OpenMP specific optimizations: 10*5ffd83dbSDimitry Andric // 11*5ffd83dbSDimitry Andric // - Deduplication of runtime calls, e.g., omp_get_thread_num. 12*5ffd83dbSDimitry Andric // 13*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 14*5ffd83dbSDimitry Andric 15*5ffd83dbSDimitry Andric #include "llvm/Transforms/IPO/OpenMPOpt.h" 16*5ffd83dbSDimitry Andric 17*5ffd83dbSDimitry Andric #include "llvm/ADT/EnumeratedArray.h" 18*5ffd83dbSDimitry Andric #include "llvm/ADT/Statistic.h" 19*5ffd83dbSDimitry Andric #include "llvm/Analysis/CallGraph.h" 20*5ffd83dbSDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h" 21*5ffd83dbSDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22*5ffd83dbSDimitry Andric #include "llvm/Frontend/OpenMP/OMPConstants.h" 23*5ffd83dbSDimitry Andric #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 24*5ffd83dbSDimitry Andric #include "llvm/InitializePasses.h" 25*5ffd83dbSDimitry Andric #include "llvm/Support/CommandLine.h" 26*5ffd83dbSDimitry Andric #include "llvm/Transforms/IPO.h" 27*5ffd83dbSDimitry Andric #include "llvm/Transforms/IPO/Attributor.h" 28*5ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/CallGraphUpdater.h" 29*5ffd83dbSDimitry Andric 30*5ffd83dbSDimitry Andric using namespace llvm; 31*5ffd83dbSDimitry Andric using namespace omp; 32*5ffd83dbSDimitry Andric 33*5ffd83dbSDimitry Andric #define DEBUG_TYPE "openmp-opt" 34*5ffd83dbSDimitry Andric 35*5ffd83dbSDimitry Andric static cl::opt<bool> DisableOpenMPOptimizations( 36*5ffd83dbSDimitry Andric "openmp-opt-disable", cl::ZeroOrMore, 37*5ffd83dbSDimitry Andric cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, 38*5ffd83dbSDimitry Andric cl::init(false)); 39*5ffd83dbSDimitry Andric 40*5ffd83dbSDimitry Andric static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false), 41*5ffd83dbSDimitry Andric cl::Hidden); 42*5ffd83dbSDimitry Andric static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels", 43*5ffd83dbSDimitry Andric cl::init(false), cl::Hidden); 44*5ffd83dbSDimitry Andric 45*5ffd83dbSDimitry Andric STATISTIC(NumOpenMPRuntimeCallsDeduplicated, 46*5ffd83dbSDimitry Andric "Number of OpenMP runtime calls deduplicated"); 47*5ffd83dbSDimitry Andric STATISTIC(NumOpenMPParallelRegionsDeleted, 48*5ffd83dbSDimitry Andric "Number of OpenMP parallel regions deleted"); 49*5ffd83dbSDimitry Andric STATISTIC(NumOpenMPRuntimeFunctionsIdentified, 50*5ffd83dbSDimitry Andric "Number of OpenMP runtime functions identified"); 51*5ffd83dbSDimitry Andric STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, 52*5ffd83dbSDimitry Andric "Number of OpenMP runtime function uses identified"); 53*5ffd83dbSDimitry Andric STATISTIC(NumOpenMPTargetRegionKernels, 54*5ffd83dbSDimitry Andric "Number of OpenMP target region entry points (=kernels) identified"); 55*5ffd83dbSDimitry Andric STATISTIC( 56*5ffd83dbSDimitry Andric NumOpenMPParallelRegionsReplacedInGPUStateMachine, 57*5ffd83dbSDimitry Andric "Number of OpenMP parallel regions replaced with ID in GPU state machines"); 58*5ffd83dbSDimitry Andric 59*5ffd83dbSDimitry Andric #if !defined(NDEBUG) 60*5ffd83dbSDimitry Andric static constexpr auto TAG = "[" DEBUG_TYPE "]"; 61*5ffd83dbSDimitry Andric #endif 62*5ffd83dbSDimitry Andric 63*5ffd83dbSDimitry Andric /// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is 64*5ffd83dbSDimitry Andric /// true, constant expression users are not given to \p CB but their uses are 65*5ffd83dbSDimitry Andric /// traversed transitively. 66*5ffd83dbSDimitry Andric template <typename CBTy> 67*5ffd83dbSDimitry Andric static void foreachUse(Function &F, CBTy CB, 68*5ffd83dbSDimitry Andric bool LookThroughConstantExprUses = true) { 69*5ffd83dbSDimitry Andric SmallVector<Use *, 8> Worklist(make_pointer_range(F.uses())); 70*5ffd83dbSDimitry Andric 71*5ffd83dbSDimitry Andric for (unsigned idx = 0; idx < Worklist.size(); ++idx) { 72*5ffd83dbSDimitry Andric Use &U = *Worklist[idx]; 73*5ffd83dbSDimitry Andric 74*5ffd83dbSDimitry Andric // Allow use in constant bitcasts and simply look through them. 75*5ffd83dbSDimitry Andric if (LookThroughConstantExprUses && isa<ConstantExpr>(U.getUser())) { 76*5ffd83dbSDimitry Andric for (Use &CEU : cast<ConstantExpr>(U.getUser())->uses()) 77*5ffd83dbSDimitry Andric Worklist.push_back(&CEU); 78*5ffd83dbSDimitry Andric continue; 79*5ffd83dbSDimitry Andric } 80*5ffd83dbSDimitry Andric 81*5ffd83dbSDimitry Andric CB(U); 82*5ffd83dbSDimitry Andric } 83*5ffd83dbSDimitry Andric } 84*5ffd83dbSDimitry Andric 85*5ffd83dbSDimitry Andric /// Helper struct to store tracked ICV values at specif instructions. 86*5ffd83dbSDimitry Andric struct ICVValue { 87*5ffd83dbSDimitry Andric Instruction *Inst; 88*5ffd83dbSDimitry Andric Value *TrackedValue; 89*5ffd83dbSDimitry Andric 90*5ffd83dbSDimitry Andric ICVValue(Instruction *I, Value *Val) : Inst(I), TrackedValue(Val) {} 91*5ffd83dbSDimitry Andric }; 92*5ffd83dbSDimitry Andric 93*5ffd83dbSDimitry Andric namespace llvm { 94*5ffd83dbSDimitry Andric 95*5ffd83dbSDimitry Andric // Provide DenseMapInfo for ICVValue 96*5ffd83dbSDimitry Andric template <> struct DenseMapInfo<ICVValue> { 97*5ffd83dbSDimitry Andric using InstInfo = DenseMapInfo<Instruction *>; 98*5ffd83dbSDimitry Andric using ValueInfo = DenseMapInfo<Value *>; 99*5ffd83dbSDimitry Andric 100*5ffd83dbSDimitry Andric static inline ICVValue getEmptyKey() { 101*5ffd83dbSDimitry Andric return ICVValue(InstInfo::getEmptyKey(), ValueInfo::getEmptyKey()); 102*5ffd83dbSDimitry Andric }; 103*5ffd83dbSDimitry Andric 104*5ffd83dbSDimitry Andric static inline ICVValue getTombstoneKey() { 105*5ffd83dbSDimitry Andric return ICVValue(InstInfo::getTombstoneKey(), ValueInfo::getTombstoneKey()); 106*5ffd83dbSDimitry Andric }; 107*5ffd83dbSDimitry Andric 108*5ffd83dbSDimitry Andric static unsigned getHashValue(const ICVValue &ICVVal) { 109*5ffd83dbSDimitry Andric return detail::combineHashValue( 110*5ffd83dbSDimitry Andric InstInfo::getHashValue(ICVVal.Inst), 111*5ffd83dbSDimitry Andric ValueInfo::getHashValue(ICVVal.TrackedValue)); 112*5ffd83dbSDimitry Andric } 113*5ffd83dbSDimitry Andric 114*5ffd83dbSDimitry Andric static bool isEqual(const ICVValue &LHS, const ICVValue &RHS) { 115*5ffd83dbSDimitry Andric return InstInfo::isEqual(LHS.Inst, RHS.Inst) && 116*5ffd83dbSDimitry Andric ValueInfo::isEqual(LHS.TrackedValue, RHS.TrackedValue); 117*5ffd83dbSDimitry Andric } 118*5ffd83dbSDimitry Andric }; 119*5ffd83dbSDimitry Andric 120*5ffd83dbSDimitry Andric } // end namespace llvm 121*5ffd83dbSDimitry Andric 122*5ffd83dbSDimitry Andric namespace { 123*5ffd83dbSDimitry Andric 124*5ffd83dbSDimitry Andric struct AAICVTracker; 125*5ffd83dbSDimitry Andric 126*5ffd83dbSDimitry Andric /// OpenMP specific information. For now, stores RFIs and ICVs also needed for 127*5ffd83dbSDimitry Andric /// Attributor runs. 128*5ffd83dbSDimitry Andric struct OMPInformationCache : public InformationCache { 129*5ffd83dbSDimitry Andric OMPInformationCache(Module &M, AnalysisGetter &AG, 130*5ffd83dbSDimitry Andric BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC, 131*5ffd83dbSDimitry Andric SmallPtrSetImpl<Kernel> &Kernels) 132*5ffd83dbSDimitry Andric : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M), 133*5ffd83dbSDimitry Andric Kernels(Kernels) { 134*5ffd83dbSDimitry Andric initializeModuleSlice(CGSCC); 135*5ffd83dbSDimitry Andric 136*5ffd83dbSDimitry Andric OMPBuilder.initialize(); 137*5ffd83dbSDimitry Andric initializeRuntimeFunctions(); 138*5ffd83dbSDimitry Andric initializeInternalControlVars(); 139*5ffd83dbSDimitry Andric } 140*5ffd83dbSDimitry Andric 141*5ffd83dbSDimitry Andric /// Generic information that describes an internal control variable. 142*5ffd83dbSDimitry Andric struct InternalControlVarInfo { 143*5ffd83dbSDimitry Andric /// The kind, as described by InternalControlVar enum. 144*5ffd83dbSDimitry Andric InternalControlVar Kind; 145*5ffd83dbSDimitry Andric 146*5ffd83dbSDimitry Andric /// The name of the ICV. 147*5ffd83dbSDimitry Andric StringRef Name; 148*5ffd83dbSDimitry Andric 149*5ffd83dbSDimitry Andric /// Environment variable associated with this ICV. 150*5ffd83dbSDimitry Andric StringRef EnvVarName; 151*5ffd83dbSDimitry Andric 152*5ffd83dbSDimitry Andric /// Initial value kind. 153*5ffd83dbSDimitry Andric ICVInitValue InitKind; 154*5ffd83dbSDimitry Andric 155*5ffd83dbSDimitry Andric /// Initial value. 156*5ffd83dbSDimitry Andric ConstantInt *InitValue; 157*5ffd83dbSDimitry Andric 158*5ffd83dbSDimitry Andric /// Setter RTL function associated with this ICV. 159*5ffd83dbSDimitry Andric RuntimeFunction Setter; 160*5ffd83dbSDimitry Andric 161*5ffd83dbSDimitry Andric /// Getter RTL function associated with this ICV. 162*5ffd83dbSDimitry Andric RuntimeFunction Getter; 163*5ffd83dbSDimitry Andric 164*5ffd83dbSDimitry Andric /// RTL Function corresponding to the override clause of this ICV 165*5ffd83dbSDimitry Andric RuntimeFunction Clause; 166*5ffd83dbSDimitry Andric }; 167*5ffd83dbSDimitry Andric 168*5ffd83dbSDimitry Andric /// Generic information that describes a runtime function 169*5ffd83dbSDimitry Andric struct RuntimeFunctionInfo { 170*5ffd83dbSDimitry Andric 171*5ffd83dbSDimitry Andric /// The kind, as described by the RuntimeFunction enum. 172*5ffd83dbSDimitry Andric RuntimeFunction Kind; 173*5ffd83dbSDimitry Andric 174*5ffd83dbSDimitry Andric /// The name of the function. 175*5ffd83dbSDimitry Andric StringRef Name; 176*5ffd83dbSDimitry Andric 177*5ffd83dbSDimitry Andric /// Flag to indicate a variadic function. 178*5ffd83dbSDimitry Andric bool IsVarArg; 179*5ffd83dbSDimitry Andric 180*5ffd83dbSDimitry Andric /// The return type of the function. 181*5ffd83dbSDimitry Andric Type *ReturnType; 182*5ffd83dbSDimitry Andric 183*5ffd83dbSDimitry Andric /// The argument types of the function. 184*5ffd83dbSDimitry Andric SmallVector<Type *, 8> ArgumentTypes; 185*5ffd83dbSDimitry Andric 186*5ffd83dbSDimitry Andric /// The declaration if available. 187*5ffd83dbSDimitry Andric Function *Declaration = nullptr; 188*5ffd83dbSDimitry Andric 189*5ffd83dbSDimitry Andric /// Uses of this runtime function per function containing the use. 190*5ffd83dbSDimitry Andric using UseVector = SmallVector<Use *, 16>; 191*5ffd83dbSDimitry Andric 192*5ffd83dbSDimitry Andric /// Clear UsesMap for runtime function. 193*5ffd83dbSDimitry Andric void clearUsesMap() { UsesMap.clear(); } 194*5ffd83dbSDimitry Andric 195*5ffd83dbSDimitry Andric /// Boolean conversion that is true if the runtime function was found. 196*5ffd83dbSDimitry Andric operator bool() const { return Declaration; } 197*5ffd83dbSDimitry Andric 198*5ffd83dbSDimitry Andric /// Return the vector of uses in function \p F. 199*5ffd83dbSDimitry Andric UseVector &getOrCreateUseVector(Function *F) { 200*5ffd83dbSDimitry Andric std::shared_ptr<UseVector> &UV = UsesMap[F]; 201*5ffd83dbSDimitry Andric if (!UV) 202*5ffd83dbSDimitry Andric UV = std::make_shared<UseVector>(); 203*5ffd83dbSDimitry Andric return *UV; 204*5ffd83dbSDimitry Andric } 205*5ffd83dbSDimitry Andric 206*5ffd83dbSDimitry Andric /// Return the vector of uses in function \p F or `nullptr` if there are 207*5ffd83dbSDimitry Andric /// none. 208*5ffd83dbSDimitry Andric const UseVector *getUseVector(Function &F) const { 209*5ffd83dbSDimitry Andric auto I = UsesMap.find(&F); 210*5ffd83dbSDimitry Andric if (I != UsesMap.end()) 211*5ffd83dbSDimitry Andric return I->second.get(); 212*5ffd83dbSDimitry Andric return nullptr; 213*5ffd83dbSDimitry Andric } 214*5ffd83dbSDimitry Andric 215*5ffd83dbSDimitry Andric /// Return how many functions contain uses of this runtime function. 216*5ffd83dbSDimitry Andric size_t getNumFunctionsWithUses() const { return UsesMap.size(); } 217*5ffd83dbSDimitry Andric 218*5ffd83dbSDimitry Andric /// Return the number of arguments (or the minimal number for variadic 219*5ffd83dbSDimitry Andric /// functions). 220*5ffd83dbSDimitry Andric size_t getNumArgs() const { return ArgumentTypes.size(); } 221*5ffd83dbSDimitry Andric 222*5ffd83dbSDimitry Andric /// Run the callback \p CB on each use and forget the use if the result is 223*5ffd83dbSDimitry Andric /// true. The callback will be fed the function in which the use was 224*5ffd83dbSDimitry Andric /// encountered as second argument. 225*5ffd83dbSDimitry Andric void foreachUse(SmallVectorImpl<Function *> &SCC, 226*5ffd83dbSDimitry Andric function_ref<bool(Use &, Function &)> CB) { 227*5ffd83dbSDimitry Andric for (Function *F : SCC) 228*5ffd83dbSDimitry Andric foreachUse(CB, F); 229*5ffd83dbSDimitry Andric } 230*5ffd83dbSDimitry Andric 231*5ffd83dbSDimitry Andric /// Run the callback \p CB on each use within the function \p F and forget 232*5ffd83dbSDimitry Andric /// the use if the result is true. 233*5ffd83dbSDimitry Andric void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) { 234*5ffd83dbSDimitry Andric SmallVector<unsigned, 8> ToBeDeleted; 235*5ffd83dbSDimitry Andric ToBeDeleted.clear(); 236*5ffd83dbSDimitry Andric 237*5ffd83dbSDimitry Andric unsigned Idx = 0; 238*5ffd83dbSDimitry Andric UseVector &UV = getOrCreateUseVector(F); 239*5ffd83dbSDimitry Andric 240*5ffd83dbSDimitry Andric for (Use *U : UV) { 241*5ffd83dbSDimitry Andric if (CB(*U, *F)) 242*5ffd83dbSDimitry Andric ToBeDeleted.push_back(Idx); 243*5ffd83dbSDimitry Andric ++Idx; 244*5ffd83dbSDimitry Andric } 245*5ffd83dbSDimitry Andric 246*5ffd83dbSDimitry Andric // Remove the to-be-deleted indices in reverse order as prior 247*5ffd83dbSDimitry Andric // modifications will not modify the smaller indices. 248*5ffd83dbSDimitry Andric while (!ToBeDeleted.empty()) { 249*5ffd83dbSDimitry Andric unsigned Idx = ToBeDeleted.pop_back_val(); 250*5ffd83dbSDimitry Andric UV[Idx] = UV.back(); 251*5ffd83dbSDimitry Andric UV.pop_back(); 252*5ffd83dbSDimitry Andric } 253*5ffd83dbSDimitry Andric } 254*5ffd83dbSDimitry Andric 255*5ffd83dbSDimitry Andric private: 256*5ffd83dbSDimitry Andric /// Map from functions to all uses of this runtime function contained in 257*5ffd83dbSDimitry Andric /// them. 258*5ffd83dbSDimitry Andric DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap; 259*5ffd83dbSDimitry Andric }; 260*5ffd83dbSDimitry Andric 261*5ffd83dbSDimitry Andric /// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains 262*5ffd83dbSDimitry Andric /// (a subset of) all functions that we can look at during this SCC traversal. 263*5ffd83dbSDimitry Andric /// This includes functions (transitively) called from the SCC and the 264*5ffd83dbSDimitry Andric /// (transitive) callers of SCC functions. We also can look at a function if 265*5ffd83dbSDimitry Andric /// there is a "reference edge", i.a., if the function somehow uses (!=calls) 266*5ffd83dbSDimitry Andric /// a function in the SCC or a caller of a function in the SCC. 267*5ffd83dbSDimitry Andric void initializeModuleSlice(SetVector<Function *> &SCC) { 268*5ffd83dbSDimitry Andric ModuleSlice.insert(SCC.begin(), SCC.end()); 269*5ffd83dbSDimitry Andric 270*5ffd83dbSDimitry Andric SmallPtrSet<Function *, 16> Seen; 271*5ffd83dbSDimitry Andric SmallVector<Function *, 16> Worklist(SCC.begin(), SCC.end()); 272*5ffd83dbSDimitry Andric while (!Worklist.empty()) { 273*5ffd83dbSDimitry Andric Function *F = Worklist.pop_back_val(); 274*5ffd83dbSDimitry Andric ModuleSlice.insert(F); 275*5ffd83dbSDimitry Andric 276*5ffd83dbSDimitry Andric for (Instruction &I : instructions(*F)) 277*5ffd83dbSDimitry Andric if (auto *CB = dyn_cast<CallBase>(&I)) 278*5ffd83dbSDimitry Andric if (Function *Callee = CB->getCalledFunction()) 279*5ffd83dbSDimitry Andric if (Seen.insert(Callee).second) 280*5ffd83dbSDimitry Andric Worklist.push_back(Callee); 281*5ffd83dbSDimitry Andric } 282*5ffd83dbSDimitry Andric 283*5ffd83dbSDimitry Andric Seen.clear(); 284*5ffd83dbSDimitry Andric Worklist.append(SCC.begin(), SCC.end()); 285*5ffd83dbSDimitry Andric while (!Worklist.empty()) { 286*5ffd83dbSDimitry Andric Function *F = Worklist.pop_back_val(); 287*5ffd83dbSDimitry Andric ModuleSlice.insert(F); 288*5ffd83dbSDimitry Andric 289*5ffd83dbSDimitry Andric // Traverse all transitive uses. 290*5ffd83dbSDimitry Andric foreachUse(*F, [&](Use &U) { 291*5ffd83dbSDimitry Andric if (auto *UsrI = dyn_cast<Instruction>(U.getUser())) 292*5ffd83dbSDimitry Andric if (Seen.insert(UsrI->getFunction()).second) 293*5ffd83dbSDimitry Andric Worklist.push_back(UsrI->getFunction()); 294*5ffd83dbSDimitry Andric }); 295*5ffd83dbSDimitry Andric } 296*5ffd83dbSDimitry Andric } 297*5ffd83dbSDimitry Andric 298*5ffd83dbSDimitry Andric /// The slice of the module we are allowed to look at. 299*5ffd83dbSDimitry Andric SmallPtrSet<Function *, 8> ModuleSlice; 300*5ffd83dbSDimitry Andric 301*5ffd83dbSDimitry Andric /// An OpenMP-IR-Builder instance 302*5ffd83dbSDimitry Andric OpenMPIRBuilder OMPBuilder; 303*5ffd83dbSDimitry Andric 304*5ffd83dbSDimitry Andric /// Map from runtime function kind to the runtime function description. 305*5ffd83dbSDimitry Andric EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction, 306*5ffd83dbSDimitry Andric RuntimeFunction::OMPRTL___last> 307*5ffd83dbSDimitry Andric RFIs; 308*5ffd83dbSDimitry Andric 309*5ffd83dbSDimitry Andric /// Map from ICV kind to the ICV description. 310*5ffd83dbSDimitry Andric EnumeratedArray<InternalControlVarInfo, InternalControlVar, 311*5ffd83dbSDimitry Andric InternalControlVar::ICV___last> 312*5ffd83dbSDimitry Andric ICVs; 313*5ffd83dbSDimitry Andric 314*5ffd83dbSDimitry Andric /// Helper to initialize all internal control variable information for those 315*5ffd83dbSDimitry Andric /// defined in OMPKinds.def. 316*5ffd83dbSDimitry Andric void initializeInternalControlVars() { 317*5ffd83dbSDimitry Andric #define ICV_RT_SET(_Name, RTL) \ 318*5ffd83dbSDimitry Andric { \ 319*5ffd83dbSDimitry Andric auto &ICV = ICVs[_Name]; \ 320*5ffd83dbSDimitry Andric ICV.Setter = RTL; \ 321*5ffd83dbSDimitry Andric } 322*5ffd83dbSDimitry Andric #define ICV_RT_GET(Name, RTL) \ 323*5ffd83dbSDimitry Andric { \ 324*5ffd83dbSDimitry Andric auto &ICV = ICVs[Name]; \ 325*5ffd83dbSDimitry Andric ICV.Getter = RTL; \ 326*5ffd83dbSDimitry Andric } 327*5ffd83dbSDimitry Andric #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \ 328*5ffd83dbSDimitry Andric { \ 329*5ffd83dbSDimitry Andric auto &ICV = ICVs[Enum]; \ 330*5ffd83dbSDimitry Andric ICV.Name = _Name; \ 331*5ffd83dbSDimitry Andric ICV.Kind = Enum; \ 332*5ffd83dbSDimitry Andric ICV.InitKind = Init; \ 333*5ffd83dbSDimitry Andric ICV.EnvVarName = _EnvVarName; \ 334*5ffd83dbSDimitry Andric switch (ICV.InitKind) { \ 335*5ffd83dbSDimitry Andric case ICV_IMPLEMENTATION_DEFINED: \ 336*5ffd83dbSDimitry Andric ICV.InitValue = nullptr; \ 337*5ffd83dbSDimitry Andric break; \ 338*5ffd83dbSDimitry Andric case ICV_ZERO: \ 339*5ffd83dbSDimitry Andric ICV.InitValue = ConstantInt::get( \ 340*5ffd83dbSDimitry Andric Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \ 341*5ffd83dbSDimitry Andric break; \ 342*5ffd83dbSDimitry Andric case ICV_FALSE: \ 343*5ffd83dbSDimitry Andric ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \ 344*5ffd83dbSDimitry Andric break; \ 345*5ffd83dbSDimitry Andric case ICV_LAST: \ 346*5ffd83dbSDimitry Andric break; \ 347*5ffd83dbSDimitry Andric } \ 348*5ffd83dbSDimitry Andric } 349*5ffd83dbSDimitry Andric #include "llvm/Frontend/OpenMP/OMPKinds.def" 350*5ffd83dbSDimitry Andric } 351*5ffd83dbSDimitry Andric 352*5ffd83dbSDimitry Andric /// Returns true if the function declaration \p F matches the runtime 353*5ffd83dbSDimitry Andric /// function types, that is, return type \p RTFRetType, and argument types 354*5ffd83dbSDimitry Andric /// \p RTFArgTypes. 355*5ffd83dbSDimitry Andric static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, 356*5ffd83dbSDimitry Andric SmallVector<Type *, 8> &RTFArgTypes) { 357*5ffd83dbSDimitry Andric // TODO: We should output information to the user (under debug output 358*5ffd83dbSDimitry Andric // and via remarks). 359*5ffd83dbSDimitry Andric 360*5ffd83dbSDimitry Andric if (!F) 361*5ffd83dbSDimitry Andric return false; 362*5ffd83dbSDimitry Andric if (F->getReturnType() != RTFRetType) 363*5ffd83dbSDimitry Andric return false; 364*5ffd83dbSDimitry Andric if (F->arg_size() != RTFArgTypes.size()) 365*5ffd83dbSDimitry Andric return false; 366*5ffd83dbSDimitry Andric 367*5ffd83dbSDimitry Andric auto RTFTyIt = RTFArgTypes.begin(); 368*5ffd83dbSDimitry Andric for (Argument &Arg : F->args()) { 369*5ffd83dbSDimitry Andric if (Arg.getType() != *RTFTyIt) 370*5ffd83dbSDimitry Andric return false; 371*5ffd83dbSDimitry Andric 372*5ffd83dbSDimitry Andric ++RTFTyIt; 373*5ffd83dbSDimitry Andric } 374*5ffd83dbSDimitry Andric 375*5ffd83dbSDimitry Andric return true; 376*5ffd83dbSDimitry Andric } 377*5ffd83dbSDimitry Andric 378*5ffd83dbSDimitry Andric // Helper to collect all uses of the declaration in the UsesMap. 379*5ffd83dbSDimitry Andric unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) { 380*5ffd83dbSDimitry Andric unsigned NumUses = 0; 381*5ffd83dbSDimitry Andric if (!RFI.Declaration) 382*5ffd83dbSDimitry Andric return NumUses; 383*5ffd83dbSDimitry Andric OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); 384*5ffd83dbSDimitry Andric 385*5ffd83dbSDimitry Andric if (CollectStats) { 386*5ffd83dbSDimitry Andric NumOpenMPRuntimeFunctionsIdentified += 1; 387*5ffd83dbSDimitry Andric NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); 388*5ffd83dbSDimitry Andric } 389*5ffd83dbSDimitry Andric 390*5ffd83dbSDimitry Andric // TODO: We directly convert uses into proper calls and unknown uses. 391*5ffd83dbSDimitry Andric for (Use &U : RFI.Declaration->uses()) { 392*5ffd83dbSDimitry Andric if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) { 393*5ffd83dbSDimitry Andric if (ModuleSlice.count(UserI->getFunction())) { 394*5ffd83dbSDimitry Andric RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); 395*5ffd83dbSDimitry Andric ++NumUses; 396*5ffd83dbSDimitry Andric } 397*5ffd83dbSDimitry Andric } else { 398*5ffd83dbSDimitry Andric RFI.getOrCreateUseVector(nullptr).push_back(&U); 399*5ffd83dbSDimitry Andric ++NumUses; 400*5ffd83dbSDimitry Andric } 401*5ffd83dbSDimitry Andric } 402*5ffd83dbSDimitry Andric return NumUses; 403*5ffd83dbSDimitry Andric } 404*5ffd83dbSDimitry Andric 405*5ffd83dbSDimitry Andric // Helper function to recollect uses of all runtime functions. 406*5ffd83dbSDimitry Andric void recollectUses() { 407*5ffd83dbSDimitry Andric for (int Idx = 0; Idx < RFIs.size(); ++Idx) { 408*5ffd83dbSDimitry Andric auto &RFI = RFIs[static_cast<RuntimeFunction>(Idx)]; 409*5ffd83dbSDimitry Andric RFI.clearUsesMap(); 410*5ffd83dbSDimitry Andric collectUses(RFI, /*CollectStats*/ false); 411*5ffd83dbSDimitry Andric } 412*5ffd83dbSDimitry Andric } 413*5ffd83dbSDimitry Andric 414*5ffd83dbSDimitry Andric /// Helper to initialize all runtime function information for those defined 415*5ffd83dbSDimitry Andric /// in OpenMPKinds.def. 416*5ffd83dbSDimitry Andric void initializeRuntimeFunctions() { 417*5ffd83dbSDimitry Andric Module &M = *((*ModuleSlice.begin())->getParent()); 418*5ffd83dbSDimitry Andric 419*5ffd83dbSDimitry Andric // Helper macros for handling __VA_ARGS__ in OMP_RTL 420*5ffd83dbSDimitry Andric #define OMP_TYPE(VarName, ...) \ 421*5ffd83dbSDimitry Andric Type *VarName = OMPBuilder.VarName; \ 422*5ffd83dbSDimitry Andric (void)VarName; 423*5ffd83dbSDimitry Andric 424*5ffd83dbSDimitry Andric #define OMP_ARRAY_TYPE(VarName, ...) \ 425*5ffd83dbSDimitry Andric ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \ 426*5ffd83dbSDimitry Andric (void)VarName##Ty; \ 427*5ffd83dbSDimitry Andric PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \ 428*5ffd83dbSDimitry Andric (void)VarName##PtrTy; 429*5ffd83dbSDimitry Andric 430*5ffd83dbSDimitry Andric #define OMP_FUNCTION_TYPE(VarName, ...) \ 431*5ffd83dbSDimitry Andric FunctionType *VarName = OMPBuilder.VarName; \ 432*5ffd83dbSDimitry Andric (void)VarName; \ 433*5ffd83dbSDimitry Andric PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 434*5ffd83dbSDimitry Andric (void)VarName##Ptr; 435*5ffd83dbSDimitry Andric 436*5ffd83dbSDimitry Andric #define OMP_STRUCT_TYPE(VarName, ...) \ 437*5ffd83dbSDimitry Andric StructType *VarName = OMPBuilder.VarName; \ 438*5ffd83dbSDimitry Andric (void)VarName; \ 439*5ffd83dbSDimitry Andric PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 440*5ffd83dbSDimitry Andric (void)VarName##Ptr; 441*5ffd83dbSDimitry Andric 442*5ffd83dbSDimitry Andric #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ 443*5ffd83dbSDimitry Andric { \ 444*5ffd83dbSDimitry Andric SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \ 445*5ffd83dbSDimitry Andric Function *F = M.getFunction(_Name); \ 446*5ffd83dbSDimitry Andric if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ 447*5ffd83dbSDimitry Andric auto &RFI = RFIs[_Enum]; \ 448*5ffd83dbSDimitry Andric RFI.Kind = _Enum; \ 449*5ffd83dbSDimitry Andric RFI.Name = _Name; \ 450*5ffd83dbSDimitry Andric RFI.IsVarArg = _IsVarArg; \ 451*5ffd83dbSDimitry Andric RFI.ReturnType = OMPBuilder._ReturnType; \ 452*5ffd83dbSDimitry Andric RFI.ArgumentTypes = std::move(ArgsTypes); \ 453*5ffd83dbSDimitry Andric RFI.Declaration = F; \ 454*5ffd83dbSDimitry Andric unsigned NumUses = collectUses(RFI); \ 455*5ffd83dbSDimitry Andric (void)NumUses; \ 456*5ffd83dbSDimitry Andric LLVM_DEBUG({ \ 457*5ffd83dbSDimitry Andric dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ 458*5ffd83dbSDimitry Andric << " found\n"; \ 459*5ffd83dbSDimitry Andric if (RFI.Declaration) \ 460*5ffd83dbSDimitry Andric dbgs() << TAG << "-> got " << NumUses << " uses in " \ 461*5ffd83dbSDimitry Andric << RFI.getNumFunctionsWithUses() \ 462*5ffd83dbSDimitry Andric << " different functions.\n"; \ 463*5ffd83dbSDimitry Andric }); \ 464*5ffd83dbSDimitry Andric } \ 465*5ffd83dbSDimitry Andric } 466*5ffd83dbSDimitry Andric #include "llvm/Frontend/OpenMP/OMPKinds.def" 467*5ffd83dbSDimitry Andric 468*5ffd83dbSDimitry Andric // TODO: We should attach the attributes defined in OMPKinds.def. 469*5ffd83dbSDimitry Andric } 470*5ffd83dbSDimitry Andric 471*5ffd83dbSDimitry Andric /// Collection of known kernels (\see Kernel) in the module. 472*5ffd83dbSDimitry Andric SmallPtrSetImpl<Kernel> &Kernels; 473*5ffd83dbSDimitry Andric }; 474*5ffd83dbSDimitry Andric 475*5ffd83dbSDimitry Andric struct OpenMPOpt { 476*5ffd83dbSDimitry Andric 477*5ffd83dbSDimitry Andric using OptimizationRemarkGetter = 478*5ffd83dbSDimitry Andric function_ref<OptimizationRemarkEmitter &(Function *)>; 479*5ffd83dbSDimitry Andric 480*5ffd83dbSDimitry Andric OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater, 481*5ffd83dbSDimitry Andric OptimizationRemarkGetter OREGetter, 482*5ffd83dbSDimitry Andric OMPInformationCache &OMPInfoCache, Attributor &A) 483*5ffd83dbSDimitry Andric : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater), 484*5ffd83dbSDimitry Andric OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {} 485*5ffd83dbSDimitry Andric 486*5ffd83dbSDimitry Andric /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. 487*5ffd83dbSDimitry Andric bool run() { 488*5ffd83dbSDimitry Andric if (SCC.empty()) 489*5ffd83dbSDimitry Andric return false; 490*5ffd83dbSDimitry Andric 491*5ffd83dbSDimitry Andric bool Changed = false; 492*5ffd83dbSDimitry Andric 493*5ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() 494*5ffd83dbSDimitry Andric << " functions in a slice with " 495*5ffd83dbSDimitry Andric << OMPInfoCache.ModuleSlice.size() << " functions\n"); 496*5ffd83dbSDimitry Andric 497*5ffd83dbSDimitry Andric if (PrintICVValues) 498*5ffd83dbSDimitry Andric printICVs(); 499*5ffd83dbSDimitry Andric if (PrintOpenMPKernels) 500*5ffd83dbSDimitry Andric printKernels(); 501*5ffd83dbSDimitry Andric 502*5ffd83dbSDimitry Andric Changed |= rewriteDeviceCodeStateMachine(); 503*5ffd83dbSDimitry Andric 504*5ffd83dbSDimitry Andric Changed |= runAttributor(); 505*5ffd83dbSDimitry Andric 506*5ffd83dbSDimitry Andric // Recollect uses, in case Attributor deleted any. 507*5ffd83dbSDimitry Andric OMPInfoCache.recollectUses(); 508*5ffd83dbSDimitry Andric 509*5ffd83dbSDimitry Andric Changed |= deduplicateRuntimeCalls(); 510*5ffd83dbSDimitry Andric Changed |= deleteParallelRegions(); 511*5ffd83dbSDimitry Andric 512*5ffd83dbSDimitry Andric return Changed; 513*5ffd83dbSDimitry Andric } 514*5ffd83dbSDimitry Andric 515*5ffd83dbSDimitry Andric /// Print initial ICV values for testing. 516*5ffd83dbSDimitry Andric /// FIXME: This should be done from the Attributor once it is added. 517*5ffd83dbSDimitry Andric void printICVs() const { 518*5ffd83dbSDimitry Andric InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel}; 519*5ffd83dbSDimitry Andric 520*5ffd83dbSDimitry Andric for (Function *F : OMPInfoCache.ModuleSlice) { 521*5ffd83dbSDimitry Andric for (auto ICV : ICVs) { 522*5ffd83dbSDimitry Andric auto ICVInfo = OMPInfoCache.ICVs[ICV]; 523*5ffd83dbSDimitry Andric auto Remark = [&](OptimizationRemark OR) { 524*5ffd83dbSDimitry Andric return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) 525*5ffd83dbSDimitry Andric << " Value: " 526*5ffd83dbSDimitry Andric << (ICVInfo.InitValue 527*5ffd83dbSDimitry Andric ? ICVInfo.InitValue->getValue().toString(10, true) 528*5ffd83dbSDimitry Andric : "IMPLEMENTATION_DEFINED"); 529*5ffd83dbSDimitry Andric }; 530*5ffd83dbSDimitry Andric 531*5ffd83dbSDimitry Andric emitRemarkOnFunction(F, "OpenMPICVTracker", Remark); 532*5ffd83dbSDimitry Andric } 533*5ffd83dbSDimitry Andric } 534*5ffd83dbSDimitry Andric } 535*5ffd83dbSDimitry Andric 536*5ffd83dbSDimitry Andric /// Print OpenMP GPU kernels for testing. 537*5ffd83dbSDimitry Andric void printKernels() const { 538*5ffd83dbSDimitry Andric for (Function *F : SCC) { 539*5ffd83dbSDimitry Andric if (!OMPInfoCache.Kernels.count(F)) 540*5ffd83dbSDimitry Andric continue; 541*5ffd83dbSDimitry Andric 542*5ffd83dbSDimitry Andric auto Remark = [&](OptimizationRemark OR) { 543*5ffd83dbSDimitry Andric return OR << "OpenMP GPU kernel " 544*5ffd83dbSDimitry Andric << ore::NV("OpenMPGPUKernel", F->getName()) << "\n"; 545*5ffd83dbSDimitry Andric }; 546*5ffd83dbSDimitry Andric 547*5ffd83dbSDimitry Andric emitRemarkOnFunction(F, "OpenMPGPU", Remark); 548*5ffd83dbSDimitry Andric } 549*5ffd83dbSDimitry Andric } 550*5ffd83dbSDimitry Andric 551*5ffd83dbSDimitry Andric /// Return the call if \p U is a callee use in a regular call. If \p RFI is 552*5ffd83dbSDimitry Andric /// given it has to be the callee or a nullptr is returned. 553*5ffd83dbSDimitry Andric static CallInst *getCallIfRegularCall( 554*5ffd83dbSDimitry Andric Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { 555*5ffd83dbSDimitry Andric CallInst *CI = dyn_cast<CallInst>(U.getUser()); 556*5ffd83dbSDimitry Andric if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && 557*5ffd83dbSDimitry Andric (!RFI || CI->getCalledFunction() == RFI->Declaration)) 558*5ffd83dbSDimitry Andric return CI; 559*5ffd83dbSDimitry Andric return nullptr; 560*5ffd83dbSDimitry Andric } 561*5ffd83dbSDimitry Andric 562*5ffd83dbSDimitry Andric /// Return the call if \p V is a regular call. If \p RFI is given it has to be 563*5ffd83dbSDimitry Andric /// the callee or a nullptr is returned. 564*5ffd83dbSDimitry Andric static CallInst *getCallIfRegularCall( 565*5ffd83dbSDimitry Andric Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { 566*5ffd83dbSDimitry Andric CallInst *CI = dyn_cast<CallInst>(&V); 567*5ffd83dbSDimitry Andric if (CI && !CI->hasOperandBundles() && 568*5ffd83dbSDimitry Andric (!RFI || CI->getCalledFunction() == RFI->Declaration)) 569*5ffd83dbSDimitry Andric return CI; 570*5ffd83dbSDimitry Andric return nullptr; 571*5ffd83dbSDimitry Andric } 572*5ffd83dbSDimitry Andric 573*5ffd83dbSDimitry Andric private: 574*5ffd83dbSDimitry Andric /// Try to delete parallel regions if possible. 575*5ffd83dbSDimitry Andric bool deleteParallelRegions() { 576*5ffd83dbSDimitry Andric const unsigned CallbackCalleeOperand = 2; 577*5ffd83dbSDimitry Andric 578*5ffd83dbSDimitry Andric OMPInformationCache::RuntimeFunctionInfo &RFI = 579*5ffd83dbSDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; 580*5ffd83dbSDimitry Andric 581*5ffd83dbSDimitry Andric if (!RFI.Declaration) 582*5ffd83dbSDimitry Andric return false; 583*5ffd83dbSDimitry Andric 584*5ffd83dbSDimitry Andric bool Changed = false; 585*5ffd83dbSDimitry Andric auto DeleteCallCB = [&](Use &U, Function &) { 586*5ffd83dbSDimitry Andric CallInst *CI = getCallIfRegularCall(U); 587*5ffd83dbSDimitry Andric if (!CI) 588*5ffd83dbSDimitry Andric return false; 589*5ffd83dbSDimitry Andric auto *Fn = dyn_cast<Function>( 590*5ffd83dbSDimitry Andric CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); 591*5ffd83dbSDimitry Andric if (!Fn) 592*5ffd83dbSDimitry Andric return false; 593*5ffd83dbSDimitry Andric if (!Fn->onlyReadsMemory()) 594*5ffd83dbSDimitry Andric return false; 595*5ffd83dbSDimitry Andric if (!Fn->hasFnAttribute(Attribute::WillReturn)) 596*5ffd83dbSDimitry Andric return false; 597*5ffd83dbSDimitry Andric 598*5ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in " 599*5ffd83dbSDimitry Andric << CI->getCaller()->getName() << "\n"); 600*5ffd83dbSDimitry Andric 601*5ffd83dbSDimitry Andric auto Remark = [&](OptimizationRemark OR) { 602*5ffd83dbSDimitry Andric return OR << "Parallel region in " 603*5ffd83dbSDimitry Andric << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName()) 604*5ffd83dbSDimitry Andric << " deleted"; 605*5ffd83dbSDimitry Andric }; 606*5ffd83dbSDimitry Andric emitRemark<OptimizationRemark>(CI, "OpenMPParallelRegionDeletion", 607*5ffd83dbSDimitry Andric Remark); 608*5ffd83dbSDimitry Andric 609*5ffd83dbSDimitry Andric CGUpdater.removeCallSite(*CI); 610*5ffd83dbSDimitry Andric CI->eraseFromParent(); 611*5ffd83dbSDimitry Andric Changed = true; 612*5ffd83dbSDimitry Andric ++NumOpenMPParallelRegionsDeleted; 613*5ffd83dbSDimitry Andric return true; 614*5ffd83dbSDimitry Andric }; 615*5ffd83dbSDimitry Andric 616*5ffd83dbSDimitry Andric RFI.foreachUse(SCC, DeleteCallCB); 617*5ffd83dbSDimitry Andric 618*5ffd83dbSDimitry Andric return Changed; 619*5ffd83dbSDimitry Andric } 620*5ffd83dbSDimitry Andric 621*5ffd83dbSDimitry Andric /// Try to eliminate runtime calls by reusing existing ones. 622*5ffd83dbSDimitry Andric bool deduplicateRuntimeCalls() { 623*5ffd83dbSDimitry Andric bool Changed = false; 624*5ffd83dbSDimitry Andric 625*5ffd83dbSDimitry Andric RuntimeFunction DeduplicableRuntimeCallIDs[] = { 626*5ffd83dbSDimitry Andric OMPRTL_omp_get_num_threads, 627*5ffd83dbSDimitry Andric OMPRTL_omp_in_parallel, 628*5ffd83dbSDimitry Andric OMPRTL_omp_get_cancellation, 629*5ffd83dbSDimitry Andric OMPRTL_omp_get_thread_limit, 630*5ffd83dbSDimitry Andric OMPRTL_omp_get_supported_active_levels, 631*5ffd83dbSDimitry Andric OMPRTL_omp_get_level, 632*5ffd83dbSDimitry Andric OMPRTL_omp_get_ancestor_thread_num, 633*5ffd83dbSDimitry Andric OMPRTL_omp_get_team_size, 634*5ffd83dbSDimitry Andric OMPRTL_omp_get_active_level, 635*5ffd83dbSDimitry Andric OMPRTL_omp_in_final, 636*5ffd83dbSDimitry Andric OMPRTL_omp_get_proc_bind, 637*5ffd83dbSDimitry Andric OMPRTL_omp_get_num_places, 638*5ffd83dbSDimitry Andric OMPRTL_omp_get_num_procs, 639*5ffd83dbSDimitry Andric OMPRTL_omp_get_place_num, 640*5ffd83dbSDimitry Andric OMPRTL_omp_get_partition_num_places, 641*5ffd83dbSDimitry Andric OMPRTL_omp_get_partition_place_nums}; 642*5ffd83dbSDimitry Andric 643*5ffd83dbSDimitry Andric // Global-tid is handled separately. 644*5ffd83dbSDimitry Andric SmallSetVector<Value *, 16> GTIdArgs; 645*5ffd83dbSDimitry Andric collectGlobalThreadIdArguments(GTIdArgs); 646*5ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() 647*5ffd83dbSDimitry Andric << " global thread ID arguments\n"); 648*5ffd83dbSDimitry Andric 649*5ffd83dbSDimitry Andric for (Function *F : SCC) { 650*5ffd83dbSDimitry Andric for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) 651*5ffd83dbSDimitry Andric deduplicateRuntimeCalls(*F, 652*5ffd83dbSDimitry Andric OMPInfoCache.RFIs[DeduplicableRuntimeCallID]); 653*5ffd83dbSDimitry Andric 654*5ffd83dbSDimitry Andric // __kmpc_global_thread_num is special as we can replace it with an 655*5ffd83dbSDimitry Andric // argument in enough cases to make it worth trying. 656*5ffd83dbSDimitry Andric Value *GTIdArg = nullptr; 657*5ffd83dbSDimitry Andric for (Argument &Arg : F->args()) 658*5ffd83dbSDimitry Andric if (GTIdArgs.count(&Arg)) { 659*5ffd83dbSDimitry Andric GTIdArg = &Arg; 660*5ffd83dbSDimitry Andric break; 661*5ffd83dbSDimitry Andric } 662*5ffd83dbSDimitry Andric Changed |= deduplicateRuntimeCalls( 663*5ffd83dbSDimitry Andric *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); 664*5ffd83dbSDimitry Andric } 665*5ffd83dbSDimitry Andric 666*5ffd83dbSDimitry Andric return Changed; 667*5ffd83dbSDimitry Andric } 668*5ffd83dbSDimitry Andric 669*5ffd83dbSDimitry Andric static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, 670*5ffd83dbSDimitry Andric bool GlobalOnly, bool &SingleChoice) { 671*5ffd83dbSDimitry Andric if (CurrentIdent == NextIdent) 672*5ffd83dbSDimitry Andric return CurrentIdent; 673*5ffd83dbSDimitry Andric 674*5ffd83dbSDimitry Andric // TODO: Figure out how to actually combine multiple debug locations. For 675*5ffd83dbSDimitry Andric // now we just keep an existing one if there is a single choice. 676*5ffd83dbSDimitry Andric if (!GlobalOnly || isa<GlobalValue>(NextIdent)) { 677*5ffd83dbSDimitry Andric SingleChoice = !CurrentIdent; 678*5ffd83dbSDimitry Andric return NextIdent; 679*5ffd83dbSDimitry Andric } 680*5ffd83dbSDimitry Andric return nullptr; 681*5ffd83dbSDimitry Andric } 682*5ffd83dbSDimitry Andric 683*5ffd83dbSDimitry Andric /// Return an `struct ident_t*` value that represents the ones used in the 684*5ffd83dbSDimitry Andric /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not 685*5ffd83dbSDimitry Andric /// return a local `struct ident_t*`. For now, if we cannot find a suitable 686*5ffd83dbSDimitry Andric /// return value we create one from scratch. We also do not yet combine 687*5ffd83dbSDimitry Andric /// information, e.g., the source locations, see combinedIdentStruct. 688*5ffd83dbSDimitry Andric Value * 689*5ffd83dbSDimitry Andric getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI, 690*5ffd83dbSDimitry Andric Function &F, bool GlobalOnly) { 691*5ffd83dbSDimitry Andric bool SingleChoice = true; 692*5ffd83dbSDimitry Andric Value *Ident = nullptr; 693*5ffd83dbSDimitry Andric auto CombineIdentStruct = [&](Use &U, Function &Caller) { 694*5ffd83dbSDimitry Andric CallInst *CI = getCallIfRegularCall(U, &RFI); 695*5ffd83dbSDimitry Andric if (!CI || &F != &Caller) 696*5ffd83dbSDimitry Andric return false; 697*5ffd83dbSDimitry Andric Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), 698*5ffd83dbSDimitry Andric /* GlobalOnly */ true, SingleChoice); 699*5ffd83dbSDimitry Andric return false; 700*5ffd83dbSDimitry Andric }; 701*5ffd83dbSDimitry Andric RFI.foreachUse(SCC, CombineIdentStruct); 702*5ffd83dbSDimitry Andric 703*5ffd83dbSDimitry Andric if (!Ident || !SingleChoice) { 704*5ffd83dbSDimitry Andric // The IRBuilder uses the insertion block to get to the module, this is 705*5ffd83dbSDimitry Andric // unfortunate but we work around it for now. 706*5ffd83dbSDimitry Andric if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock()) 707*5ffd83dbSDimitry Andric OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( 708*5ffd83dbSDimitry Andric &F.getEntryBlock(), F.getEntryBlock().begin())); 709*5ffd83dbSDimitry Andric // Create a fallback location if non was found. 710*5ffd83dbSDimitry Andric // TODO: Use the debug locations of the calls instead. 711*5ffd83dbSDimitry Andric Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(); 712*5ffd83dbSDimitry Andric Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc); 713*5ffd83dbSDimitry Andric } 714*5ffd83dbSDimitry Andric return Ident; 715*5ffd83dbSDimitry Andric } 716*5ffd83dbSDimitry Andric 717*5ffd83dbSDimitry Andric /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or 718*5ffd83dbSDimitry Andric /// \p ReplVal if given. 719*5ffd83dbSDimitry Andric bool deduplicateRuntimeCalls(Function &F, 720*5ffd83dbSDimitry Andric OMPInformationCache::RuntimeFunctionInfo &RFI, 721*5ffd83dbSDimitry Andric Value *ReplVal = nullptr) { 722*5ffd83dbSDimitry Andric auto *UV = RFI.getUseVector(F); 723*5ffd83dbSDimitry Andric if (!UV || UV->size() + (ReplVal != nullptr) < 2) 724*5ffd83dbSDimitry Andric return false; 725*5ffd83dbSDimitry Andric 726*5ffd83dbSDimitry Andric LLVM_DEBUG( 727*5ffd83dbSDimitry Andric dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name 728*5ffd83dbSDimitry Andric << (ReplVal ? " with an existing value\n" : "\n") << "\n"); 729*5ffd83dbSDimitry Andric 730*5ffd83dbSDimitry Andric assert((!ReplVal || (isa<Argument>(ReplVal) && 731*5ffd83dbSDimitry Andric cast<Argument>(ReplVal)->getParent() == &F)) && 732*5ffd83dbSDimitry Andric "Unexpected replacement value!"); 733*5ffd83dbSDimitry Andric 734*5ffd83dbSDimitry Andric // TODO: Use dominance to find a good position instead. 735*5ffd83dbSDimitry Andric auto CanBeMoved = [this](CallBase &CB) { 736*5ffd83dbSDimitry Andric unsigned NumArgs = CB.getNumArgOperands(); 737*5ffd83dbSDimitry Andric if (NumArgs == 0) 738*5ffd83dbSDimitry Andric return true; 739*5ffd83dbSDimitry Andric if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr) 740*5ffd83dbSDimitry Andric return false; 741*5ffd83dbSDimitry Andric for (unsigned u = 1; u < NumArgs; ++u) 742*5ffd83dbSDimitry Andric if (isa<Instruction>(CB.getArgOperand(u))) 743*5ffd83dbSDimitry Andric return false; 744*5ffd83dbSDimitry Andric return true; 745*5ffd83dbSDimitry Andric }; 746*5ffd83dbSDimitry Andric 747*5ffd83dbSDimitry Andric if (!ReplVal) { 748*5ffd83dbSDimitry Andric for (Use *U : *UV) 749*5ffd83dbSDimitry Andric if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { 750*5ffd83dbSDimitry Andric if (!CanBeMoved(*CI)) 751*5ffd83dbSDimitry Andric continue; 752*5ffd83dbSDimitry Andric 753*5ffd83dbSDimitry Andric auto Remark = [&](OptimizationRemark OR) { 754*5ffd83dbSDimitry Andric auto newLoc = &*F.getEntryBlock().getFirstInsertionPt(); 755*5ffd83dbSDimitry Andric return OR << "OpenMP runtime call " 756*5ffd83dbSDimitry Andric << ore::NV("OpenMPOptRuntime", RFI.Name) << " moved to " 757*5ffd83dbSDimitry Andric << ore::NV("OpenMPRuntimeMoves", newLoc->getDebugLoc()); 758*5ffd83dbSDimitry Andric }; 759*5ffd83dbSDimitry Andric emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeCodeMotion", Remark); 760*5ffd83dbSDimitry Andric 761*5ffd83dbSDimitry Andric CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); 762*5ffd83dbSDimitry Andric ReplVal = CI; 763*5ffd83dbSDimitry Andric break; 764*5ffd83dbSDimitry Andric } 765*5ffd83dbSDimitry Andric if (!ReplVal) 766*5ffd83dbSDimitry Andric return false; 767*5ffd83dbSDimitry Andric } 768*5ffd83dbSDimitry Andric 769*5ffd83dbSDimitry Andric // If we use a call as a replacement value we need to make sure the ident is 770*5ffd83dbSDimitry Andric // valid at the new location. For now we just pick a global one, either 771*5ffd83dbSDimitry Andric // existing and used by one of the calls, or created from scratch. 772*5ffd83dbSDimitry Andric if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) { 773*5ffd83dbSDimitry Andric if (CI->getNumArgOperands() > 0 && 774*5ffd83dbSDimitry Andric CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) { 775*5ffd83dbSDimitry Andric Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, 776*5ffd83dbSDimitry Andric /* GlobalOnly */ true); 777*5ffd83dbSDimitry Andric CI->setArgOperand(0, Ident); 778*5ffd83dbSDimitry Andric } 779*5ffd83dbSDimitry Andric } 780*5ffd83dbSDimitry Andric 781*5ffd83dbSDimitry Andric bool Changed = false; 782*5ffd83dbSDimitry Andric auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { 783*5ffd83dbSDimitry Andric CallInst *CI = getCallIfRegularCall(U, &RFI); 784*5ffd83dbSDimitry Andric if (!CI || CI == ReplVal || &F != &Caller) 785*5ffd83dbSDimitry Andric return false; 786*5ffd83dbSDimitry Andric assert(CI->getCaller() == &F && "Unexpected call!"); 787*5ffd83dbSDimitry Andric 788*5ffd83dbSDimitry Andric auto Remark = [&](OptimizationRemark OR) { 789*5ffd83dbSDimitry Andric return OR << "OpenMP runtime call " 790*5ffd83dbSDimitry Andric << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated"; 791*5ffd83dbSDimitry Andric }; 792*5ffd83dbSDimitry Andric emitRemark<OptimizationRemark>(CI, "OpenMPRuntimeDeduplicated", Remark); 793*5ffd83dbSDimitry Andric 794*5ffd83dbSDimitry Andric CGUpdater.removeCallSite(*CI); 795*5ffd83dbSDimitry Andric CI->replaceAllUsesWith(ReplVal); 796*5ffd83dbSDimitry Andric CI->eraseFromParent(); 797*5ffd83dbSDimitry Andric ++NumOpenMPRuntimeCallsDeduplicated; 798*5ffd83dbSDimitry Andric Changed = true; 799*5ffd83dbSDimitry Andric return true; 800*5ffd83dbSDimitry Andric }; 801*5ffd83dbSDimitry Andric RFI.foreachUse(SCC, ReplaceAndDeleteCB); 802*5ffd83dbSDimitry Andric 803*5ffd83dbSDimitry Andric return Changed; 804*5ffd83dbSDimitry Andric } 805*5ffd83dbSDimitry Andric 806*5ffd83dbSDimitry Andric /// Collect arguments that represent the global thread id in \p GTIdArgs. 807*5ffd83dbSDimitry Andric void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) { 808*5ffd83dbSDimitry Andric // TODO: Below we basically perform a fixpoint iteration with a pessimistic 809*5ffd83dbSDimitry Andric // initialization. We could define an AbstractAttribute instead and 810*5ffd83dbSDimitry Andric // run the Attributor here once it can be run as an SCC pass. 811*5ffd83dbSDimitry Andric 812*5ffd83dbSDimitry Andric // Helper to check the argument \p ArgNo at all call sites of \p F for 813*5ffd83dbSDimitry Andric // a GTId. 814*5ffd83dbSDimitry Andric auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { 815*5ffd83dbSDimitry Andric if (!F.hasLocalLinkage()) 816*5ffd83dbSDimitry Andric return false; 817*5ffd83dbSDimitry Andric for (Use &U : F.uses()) { 818*5ffd83dbSDimitry Andric if (CallInst *CI = getCallIfRegularCall(U)) { 819*5ffd83dbSDimitry Andric Value *ArgOp = CI->getArgOperand(ArgNo); 820*5ffd83dbSDimitry Andric if (CI == &RefCI || GTIdArgs.count(ArgOp) || 821*5ffd83dbSDimitry Andric getCallIfRegularCall( 822*5ffd83dbSDimitry Andric *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num])) 823*5ffd83dbSDimitry Andric continue; 824*5ffd83dbSDimitry Andric } 825*5ffd83dbSDimitry Andric return false; 826*5ffd83dbSDimitry Andric } 827*5ffd83dbSDimitry Andric return true; 828*5ffd83dbSDimitry Andric }; 829*5ffd83dbSDimitry Andric 830*5ffd83dbSDimitry Andric // Helper to identify uses of a GTId as GTId arguments. 831*5ffd83dbSDimitry Andric auto AddUserArgs = [&](Value >Id) { 832*5ffd83dbSDimitry Andric for (Use &U : GTId.uses()) 833*5ffd83dbSDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(U.getUser())) 834*5ffd83dbSDimitry Andric if (CI->isArgOperand(&U)) 835*5ffd83dbSDimitry Andric if (Function *Callee = CI->getCalledFunction()) 836*5ffd83dbSDimitry Andric if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) 837*5ffd83dbSDimitry Andric GTIdArgs.insert(Callee->getArg(U.getOperandNo())); 838*5ffd83dbSDimitry Andric }; 839*5ffd83dbSDimitry Andric 840*5ffd83dbSDimitry Andric // The argument users of __kmpc_global_thread_num calls are GTIds. 841*5ffd83dbSDimitry Andric OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = 842*5ffd83dbSDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; 843*5ffd83dbSDimitry Andric 844*5ffd83dbSDimitry Andric GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) { 845*5ffd83dbSDimitry Andric if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) 846*5ffd83dbSDimitry Andric AddUserArgs(*CI); 847*5ffd83dbSDimitry Andric return false; 848*5ffd83dbSDimitry Andric }); 849*5ffd83dbSDimitry Andric 850*5ffd83dbSDimitry Andric // Transitively search for more arguments by looking at the users of the 851*5ffd83dbSDimitry Andric // ones we know already. During the search the GTIdArgs vector is extended 852*5ffd83dbSDimitry Andric // so we cannot cache the size nor can we use a range based for. 853*5ffd83dbSDimitry Andric for (unsigned u = 0; u < GTIdArgs.size(); ++u) 854*5ffd83dbSDimitry Andric AddUserArgs(*GTIdArgs[u]); 855*5ffd83dbSDimitry Andric } 856*5ffd83dbSDimitry Andric 857*5ffd83dbSDimitry Andric /// Kernel (=GPU) optimizations and utility functions 858*5ffd83dbSDimitry Andric /// 859*5ffd83dbSDimitry Andric ///{{ 860*5ffd83dbSDimitry Andric 861*5ffd83dbSDimitry Andric /// Check if \p F is a kernel, hence entry point for target offloading. 862*5ffd83dbSDimitry Andric bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); } 863*5ffd83dbSDimitry Andric 864*5ffd83dbSDimitry Andric /// Cache to remember the unique kernel for a function. 865*5ffd83dbSDimitry Andric DenseMap<Function *, Optional<Kernel>> UniqueKernelMap; 866*5ffd83dbSDimitry Andric 867*5ffd83dbSDimitry Andric /// Find the unique kernel that will execute \p F, if any. 868*5ffd83dbSDimitry Andric Kernel getUniqueKernelFor(Function &F); 869*5ffd83dbSDimitry Andric 870*5ffd83dbSDimitry Andric /// Find the unique kernel that will execute \p I, if any. 871*5ffd83dbSDimitry Andric Kernel getUniqueKernelFor(Instruction &I) { 872*5ffd83dbSDimitry Andric return getUniqueKernelFor(*I.getFunction()); 873*5ffd83dbSDimitry Andric } 874*5ffd83dbSDimitry Andric 875*5ffd83dbSDimitry Andric /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in 876*5ffd83dbSDimitry Andric /// the cases we can avoid taking the address of a function. 877*5ffd83dbSDimitry Andric bool rewriteDeviceCodeStateMachine(); 878*5ffd83dbSDimitry Andric 879*5ffd83dbSDimitry Andric /// 880*5ffd83dbSDimitry Andric ///}} 881*5ffd83dbSDimitry Andric 882*5ffd83dbSDimitry Andric /// Emit a remark generically 883*5ffd83dbSDimitry Andric /// 884*5ffd83dbSDimitry Andric /// This template function can be used to generically emit a remark. The 885*5ffd83dbSDimitry Andric /// RemarkKind should be one of the following: 886*5ffd83dbSDimitry Andric /// - OptimizationRemark to indicate a successful optimization attempt 887*5ffd83dbSDimitry Andric /// - OptimizationRemarkMissed to report a failed optimization attempt 888*5ffd83dbSDimitry Andric /// - OptimizationRemarkAnalysis to provide additional information about an 889*5ffd83dbSDimitry Andric /// optimization attempt 890*5ffd83dbSDimitry Andric /// 891*5ffd83dbSDimitry Andric /// The remark is built using a callback function provided by the caller that 892*5ffd83dbSDimitry Andric /// takes a RemarkKind as input and returns a RemarkKind. 893*5ffd83dbSDimitry Andric template <typename RemarkKind, 894*5ffd83dbSDimitry Andric typename RemarkCallBack = function_ref<RemarkKind(RemarkKind &&)>> 895*5ffd83dbSDimitry Andric void emitRemark(Instruction *Inst, StringRef RemarkName, 896*5ffd83dbSDimitry Andric RemarkCallBack &&RemarkCB) const { 897*5ffd83dbSDimitry Andric Function *F = Inst->getParent()->getParent(); 898*5ffd83dbSDimitry Andric auto &ORE = OREGetter(F); 899*5ffd83dbSDimitry Andric 900*5ffd83dbSDimitry Andric ORE.emit( 901*5ffd83dbSDimitry Andric [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); }); 902*5ffd83dbSDimitry Andric } 903*5ffd83dbSDimitry Andric 904*5ffd83dbSDimitry Andric /// Emit a remark on a function. Since only OptimizationRemark is supporting 905*5ffd83dbSDimitry Andric /// this, it can't be made generic. 906*5ffd83dbSDimitry Andric void 907*5ffd83dbSDimitry Andric emitRemarkOnFunction(Function *F, StringRef RemarkName, 908*5ffd83dbSDimitry Andric function_ref<OptimizationRemark(OptimizationRemark &&)> 909*5ffd83dbSDimitry Andric &&RemarkCB) const { 910*5ffd83dbSDimitry Andric auto &ORE = OREGetter(F); 911*5ffd83dbSDimitry Andric 912*5ffd83dbSDimitry Andric ORE.emit([&]() { 913*5ffd83dbSDimitry Andric return RemarkCB(OptimizationRemark(DEBUG_TYPE, RemarkName, F)); 914*5ffd83dbSDimitry Andric }); 915*5ffd83dbSDimitry Andric } 916*5ffd83dbSDimitry Andric 917*5ffd83dbSDimitry Andric /// The underlying module. 918*5ffd83dbSDimitry Andric Module &M; 919*5ffd83dbSDimitry Andric 920*5ffd83dbSDimitry Andric /// The SCC we are operating on. 921*5ffd83dbSDimitry Andric SmallVectorImpl<Function *> &SCC; 922*5ffd83dbSDimitry Andric 923*5ffd83dbSDimitry Andric /// Callback to update the call graph, the first argument is a removed call, 924*5ffd83dbSDimitry Andric /// the second an optional replacement call. 925*5ffd83dbSDimitry Andric CallGraphUpdater &CGUpdater; 926*5ffd83dbSDimitry Andric 927*5ffd83dbSDimitry Andric /// Callback to get an OptimizationRemarkEmitter from a Function * 928*5ffd83dbSDimitry Andric OptimizationRemarkGetter OREGetter; 929*5ffd83dbSDimitry Andric 930*5ffd83dbSDimitry Andric /// OpenMP-specific information cache. Also Used for Attributor runs. 931*5ffd83dbSDimitry Andric OMPInformationCache &OMPInfoCache; 932*5ffd83dbSDimitry Andric 933*5ffd83dbSDimitry Andric /// Attributor instance. 934*5ffd83dbSDimitry Andric Attributor &A; 935*5ffd83dbSDimitry Andric 936*5ffd83dbSDimitry Andric /// Helper function to run Attributor on SCC. 937*5ffd83dbSDimitry Andric bool runAttributor() { 938*5ffd83dbSDimitry Andric if (SCC.empty()) 939*5ffd83dbSDimitry Andric return false; 940*5ffd83dbSDimitry Andric 941*5ffd83dbSDimitry Andric registerAAs(); 942*5ffd83dbSDimitry Andric 943*5ffd83dbSDimitry Andric ChangeStatus Changed = A.run(); 944*5ffd83dbSDimitry Andric 945*5ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size() 946*5ffd83dbSDimitry Andric << " functions, result: " << Changed << ".\n"); 947*5ffd83dbSDimitry Andric 948*5ffd83dbSDimitry Andric return Changed == ChangeStatus::CHANGED; 949*5ffd83dbSDimitry Andric } 950*5ffd83dbSDimitry Andric 951*5ffd83dbSDimitry Andric /// Populate the Attributor with abstract attribute opportunities in the 952*5ffd83dbSDimitry Andric /// function. 953*5ffd83dbSDimitry Andric void registerAAs() { 954*5ffd83dbSDimitry Andric for (Function *F : SCC) { 955*5ffd83dbSDimitry Andric if (F->isDeclaration()) 956*5ffd83dbSDimitry Andric continue; 957*5ffd83dbSDimitry Andric 958*5ffd83dbSDimitry Andric A.getOrCreateAAFor<AAICVTracker>(IRPosition::function(*F)); 959*5ffd83dbSDimitry Andric } 960*5ffd83dbSDimitry Andric } 961*5ffd83dbSDimitry Andric }; 962*5ffd83dbSDimitry Andric 963*5ffd83dbSDimitry Andric Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { 964*5ffd83dbSDimitry Andric if (!OMPInfoCache.ModuleSlice.count(&F)) 965*5ffd83dbSDimitry Andric return nullptr; 966*5ffd83dbSDimitry Andric 967*5ffd83dbSDimitry Andric // Use a scope to keep the lifetime of the CachedKernel short. 968*5ffd83dbSDimitry Andric { 969*5ffd83dbSDimitry Andric Optional<Kernel> &CachedKernel = UniqueKernelMap[&F]; 970*5ffd83dbSDimitry Andric if (CachedKernel) 971*5ffd83dbSDimitry Andric return *CachedKernel; 972*5ffd83dbSDimitry Andric 973*5ffd83dbSDimitry Andric // TODO: We should use an AA to create an (optimistic and callback 974*5ffd83dbSDimitry Andric // call-aware) call graph. For now we stick to simple patterns that 975*5ffd83dbSDimitry Andric // are less powerful, basically the worst fixpoint. 976*5ffd83dbSDimitry Andric if (isKernel(F)) { 977*5ffd83dbSDimitry Andric CachedKernel = Kernel(&F); 978*5ffd83dbSDimitry Andric return *CachedKernel; 979*5ffd83dbSDimitry Andric } 980*5ffd83dbSDimitry Andric 981*5ffd83dbSDimitry Andric CachedKernel = nullptr; 982*5ffd83dbSDimitry Andric if (!F.hasLocalLinkage()) 983*5ffd83dbSDimitry Andric return nullptr; 984*5ffd83dbSDimitry Andric } 985*5ffd83dbSDimitry Andric 986*5ffd83dbSDimitry Andric auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel { 987*5ffd83dbSDimitry Andric if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) { 988*5ffd83dbSDimitry Andric // Allow use in equality comparisons. 989*5ffd83dbSDimitry Andric if (Cmp->isEquality()) 990*5ffd83dbSDimitry Andric return getUniqueKernelFor(*Cmp); 991*5ffd83dbSDimitry Andric return nullptr; 992*5ffd83dbSDimitry Andric } 993*5ffd83dbSDimitry Andric if (auto *CB = dyn_cast<CallBase>(U.getUser())) { 994*5ffd83dbSDimitry Andric // Allow direct calls. 995*5ffd83dbSDimitry Andric if (CB->isCallee(&U)) 996*5ffd83dbSDimitry Andric return getUniqueKernelFor(*CB); 997*5ffd83dbSDimitry Andric // Allow the use in __kmpc_kernel_prepare_parallel calls. 998*5ffd83dbSDimitry Andric if (Function *Callee = CB->getCalledFunction()) 999*5ffd83dbSDimitry Andric if (Callee->getName() == "__kmpc_kernel_prepare_parallel") 1000*5ffd83dbSDimitry Andric return getUniqueKernelFor(*CB); 1001*5ffd83dbSDimitry Andric return nullptr; 1002*5ffd83dbSDimitry Andric } 1003*5ffd83dbSDimitry Andric // Disallow every other use. 1004*5ffd83dbSDimitry Andric return nullptr; 1005*5ffd83dbSDimitry Andric }; 1006*5ffd83dbSDimitry Andric 1007*5ffd83dbSDimitry Andric // TODO: In the future we want to track more than just a unique kernel. 1008*5ffd83dbSDimitry Andric SmallPtrSet<Kernel, 2> PotentialKernels; 1009*5ffd83dbSDimitry Andric foreachUse(F, [&](const Use &U) { 1010*5ffd83dbSDimitry Andric PotentialKernels.insert(GetUniqueKernelForUse(U)); 1011*5ffd83dbSDimitry Andric }); 1012*5ffd83dbSDimitry Andric 1013*5ffd83dbSDimitry Andric Kernel K = nullptr; 1014*5ffd83dbSDimitry Andric if (PotentialKernels.size() == 1) 1015*5ffd83dbSDimitry Andric K = *PotentialKernels.begin(); 1016*5ffd83dbSDimitry Andric 1017*5ffd83dbSDimitry Andric // Cache the result. 1018*5ffd83dbSDimitry Andric UniqueKernelMap[&F] = K; 1019*5ffd83dbSDimitry Andric 1020*5ffd83dbSDimitry Andric return K; 1021*5ffd83dbSDimitry Andric } 1022*5ffd83dbSDimitry Andric 1023*5ffd83dbSDimitry Andric bool OpenMPOpt::rewriteDeviceCodeStateMachine() { 1024*5ffd83dbSDimitry Andric OMPInformationCache::RuntimeFunctionInfo &KernelPrepareParallelRFI = 1025*5ffd83dbSDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_kernel_prepare_parallel]; 1026*5ffd83dbSDimitry Andric 1027*5ffd83dbSDimitry Andric bool Changed = false; 1028*5ffd83dbSDimitry Andric if (!KernelPrepareParallelRFI) 1029*5ffd83dbSDimitry Andric return Changed; 1030*5ffd83dbSDimitry Andric 1031*5ffd83dbSDimitry Andric for (Function *F : SCC) { 1032*5ffd83dbSDimitry Andric 1033*5ffd83dbSDimitry Andric // Check if the function is uses in a __kmpc_kernel_prepare_parallel call at 1034*5ffd83dbSDimitry Andric // all. 1035*5ffd83dbSDimitry Andric bool UnknownUse = false; 1036*5ffd83dbSDimitry Andric bool KernelPrepareUse = false; 1037*5ffd83dbSDimitry Andric unsigned NumDirectCalls = 0; 1038*5ffd83dbSDimitry Andric 1039*5ffd83dbSDimitry Andric SmallVector<Use *, 2> ToBeReplacedStateMachineUses; 1040*5ffd83dbSDimitry Andric foreachUse(*F, [&](Use &U) { 1041*5ffd83dbSDimitry Andric if (auto *CB = dyn_cast<CallBase>(U.getUser())) 1042*5ffd83dbSDimitry Andric if (CB->isCallee(&U)) { 1043*5ffd83dbSDimitry Andric ++NumDirectCalls; 1044*5ffd83dbSDimitry Andric return; 1045*5ffd83dbSDimitry Andric } 1046*5ffd83dbSDimitry Andric 1047*5ffd83dbSDimitry Andric if (isa<ICmpInst>(U.getUser())) { 1048*5ffd83dbSDimitry Andric ToBeReplacedStateMachineUses.push_back(&U); 1049*5ffd83dbSDimitry Andric return; 1050*5ffd83dbSDimitry Andric } 1051*5ffd83dbSDimitry Andric if (!KernelPrepareUse && OpenMPOpt::getCallIfRegularCall( 1052*5ffd83dbSDimitry Andric *U.getUser(), &KernelPrepareParallelRFI)) { 1053*5ffd83dbSDimitry Andric KernelPrepareUse = true; 1054*5ffd83dbSDimitry Andric ToBeReplacedStateMachineUses.push_back(&U); 1055*5ffd83dbSDimitry Andric return; 1056*5ffd83dbSDimitry Andric } 1057*5ffd83dbSDimitry Andric UnknownUse = true; 1058*5ffd83dbSDimitry Andric }); 1059*5ffd83dbSDimitry Andric 1060*5ffd83dbSDimitry Andric // Do not emit a remark if we haven't seen a __kmpc_kernel_prepare_parallel 1061*5ffd83dbSDimitry Andric // use. 1062*5ffd83dbSDimitry Andric if (!KernelPrepareUse) 1063*5ffd83dbSDimitry Andric continue; 1064*5ffd83dbSDimitry Andric 1065*5ffd83dbSDimitry Andric { 1066*5ffd83dbSDimitry Andric auto Remark = [&](OptimizationRemark OR) { 1067*5ffd83dbSDimitry Andric return OR << "Found a parallel region that is called in a target " 1068*5ffd83dbSDimitry Andric "region but not part of a combined target construct nor " 1069*5ffd83dbSDimitry Andric "nesed inside a target construct without intermediate " 1070*5ffd83dbSDimitry Andric "code. This can lead to excessive register usage for " 1071*5ffd83dbSDimitry Andric "unrelated target regions in the same translation unit " 1072*5ffd83dbSDimitry Andric "due to spurious call edges assumed by ptxas."; 1073*5ffd83dbSDimitry Andric }; 1074*5ffd83dbSDimitry Andric emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark); 1075*5ffd83dbSDimitry Andric } 1076*5ffd83dbSDimitry Andric 1077*5ffd83dbSDimitry Andric // If this ever hits, we should investigate. 1078*5ffd83dbSDimitry Andric // TODO: Checking the number of uses is not a necessary restriction and 1079*5ffd83dbSDimitry Andric // should be lifted. 1080*5ffd83dbSDimitry Andric if (UnknownUse || NumDirectCalls != 1 || 1081*5ffd83dbSDimitry Andric ToBeReplacedStateMachineUses.size() != 2) { 1082*5ffd83dbSDimitry Andric { 1083*5ffd83dbSDimitry Andric auto Remark = [&](OptimizationRemark OR) { 1084*5ffd83dbSDimitry Andric return OR << "Parallel region is used in " 1085*5ffd83dbSDimitry Andric << (UnknownUse ? "unknown" : "unexpected") 1086*5ffd83dbSDimitry Andric << " ways; will not attempt to rewrite the state machine."; 1087*5ffd83dbSDimitry Andric }; 1088*5ffd83dbSDimitry Andric emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark); 1089*5ffd83dbSDimitry Andric } 1090*5ffd83dbSDimitry Andric continue; 1091*5ffd83dbSDimitry Andric } 1092*5ffd83dbSDimitry Andric 1093*5ffd83dbSDimitry Andric // Even if we have __kmpc_kernel_prepare_parallel calls, we (for now) give 1094*5ffd83dbSDimitry Andric // up if the function is not called from a unique kernel. 1095*5ffd83dbSDimitry Andric Kernel K = getUniqueKernelFor(*F); 1096*5ffd83dbSDimitry Andric if (!K) { 1097*5ffd83dbSDimitry Andric { 1098*5ffd83dbSDimitry Andric auto Remark = [&](OptimizationRemark OR) { 1099*5ffd83dbSDimitry Andric return OR << "Parallel region is not known to be called from a " 1100*5ffd83dbSDimitry Andric "unique single target region, maybe the surrounding " 1101*5ffd83dbSDimitry Andric "function has external linkage?; will not attempt to " 1102*5ffd83dbSDimitry Andric "rewrite the state machine use."; 1103*5ffd83dbSDimitry Andric }; 1104*5ffd83dbSDimitry Andric emitRemarkOnFunction(F, "OpenMPParallelRegionInMultipleKernesl", 1105*5ffd83dbSDimitry Andric Remark); 1106*5ffd83dbSDimitry Andric } 1107*5ffd83dbSDimitry Andric continue; 1108*5ffd83dbSDimitry Andric } 1109*5ffd83dbSDimitry Andric 1110*5ffd83dbSDimitry Andric // We now know F is a parallel body function called only from the kernel K. 1111*5ffd83dbSDimitry Andric // We also identified the state machine uses in which we replace the 1112*5ffd83dbSDimitry Andric // function pointer by a new global symbol for identification purposes. This 1113*5ffd83dbSDimitry Andric // ensures only direct calls to the function are left. 1114*5ffd83dbSDimitry Andric 1115*5ffd83dbSDimitry Andric { 1116*5ffd83dbSDimitry Andric auto RemarkParalleRegion = [&](OptimizationRemark OR) { 1117*5ffd83dbSDimitry Andric return OR << "Specialize parallel region that is only reached from a " 1118*5ffd83dbSDimitry Andric "single target region to avoid spurious call edges and " 1119*5ffd83dbSDimitry Andric "excessive register usage in other target regions. " 1120*5ffd83dbSDimitry Andric "(parallel region ID: " 1121*5ffd83dbSDimitry Andric << ore::NV("OpenMPParallelRegion", F->getName()) 1122*5ffd83dbSDimitry Andric << ", kernel ID: " 1123*5ffd83dbSDimitry Andric << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; 1124*5ffd83dbSDimitry Andric }; 1125*5ffd83dbSDimitry Andric emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", 1126*5ffd83dbSDimitry Andric RemarkParalleRegion); 1127*5ffd83dbSDimitry Andric auto RemarkKernel = [&](OptimizationRemark OR) { 1128*5ffd83dbSDimitry Andric return OR << "Target region containing the parallel region that is " 1129*5ffd83dbSDimitry Andric "specialized. (parallel region ID: " 1130*5ffd83dbSDimitry Andric << ore::NV("OpenMPParallelRegion", F->getName()) 1131*5ffd83dbSDimitry Andric << ", kernel ID: " 1132*5ffd83dbSDimitry Andric << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; 1133*5ffd83dbSDimitry Andric }; 1134*5ffd83dbSDimitry Andric emitRemarkOnFunction(K, "OpenMPParallelRegionInNonSPMD", RemarkKernel); 1135*5ffd83dbSDimitry Andric } 1136*5ffd83dbSDimitry Andric 1137*5ffd83dbSDimitry Andric Module &M = *F->getParent(); 1138*5ffd83dbSDimitry Andric Type *Int8Ty = Type::getInt8Ty(M.getContext()); 1139*5ffd83dbSDimitry Andric 1140*5ffd83dbSDimitry Andric auto *ID = new GlobalVariable( 1141*5ffd83dbSDimitry Andric M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage, 1142*5ffd83dbSDimitry Andric UndefValue::get(Int8Ty), F->getName() + ".ID"); 1143*5ffd83dbSDimitry Andric 1144*5ffd83dbSDimitry Andric for (Use *U : ToBeReplacedStateMachineUses) 1145*5ffd83dbSDimitry Andric U->set(ConstantExpr::getBitCast(ID, U->get()->getType())); 1146*5ffd83dbSDimitry Andric 1147*5ffd83dbSDimitry Andric ++NumOpenMPParallelRegionsReplacedInGPUStateMachine; 1148*5ffd83dbSDimitry Andric 1149*5ffd83dbSDimitry Andric Changed = true; 1150*5ffd83dbSDimitry Andric } 1151*5ffd83dbSDimitry Andric 1152*5ffd83dbSDimitry Andric return Changed; 1153*5ffd83dbSDimitry Andric } 1154*5ffd83dbSDimitry Andric 1155*5ffd83dbSDimitry Andric /// Abstract Attribute for tracking ICV values. 1156*5ffd83dbSDimitry Andric struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> { 1157*5ffd83dbSDimitry Andric using Base = StateWrapper<BooleanState, AbstractAttribute>; 1158*5ffd83dbSDimitry Andric AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 1159*5ffd83dbSDimitry Andric 1160*5ffd83dbSDimitry Andric /// Returns true if value is assumed to be tracked. 1161*5ffd83dbSDimitry Andric bool isAssumedTracked() const { return getAssumed(); } 1162*5ffd83dbSDimitry Andric 1163*5ffd83dbSDimitry Andric /// Returns true if value is known to be tracked. 1164*5ffd83dbSDimitry Andric bool isKnownTracked() const { return getAssumed(); } 1165*5ffd83dbSDimitry Andric 1166*5ffd83dbSDimitry Andric /// Create an abstract attribute biew for the position \p IRP. 1167*5ffd83dbSDimitry Andric static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A); 1168*5ffd83dbSDimitry Andric 1169*5ffd83dbSDimitry Andric /// Return the value with which \p I can be replaced for specific \p ICV. 1170*5ffd83dbSDimitry Andric virtual Value *getReplacementValue(InternalControlVar ICV, 1171*5ffd83dbSDimitry Andric const Instruction *I, Attributor &A) = 0; 1172*5ffd83dbSDimitry Andric 1173*5ffd83dbSDimitry Andric /// See AbstractAttribute::getName() 1174*5ffd83dbSDimitry Andric const std::string getName() const override { return "AAICVTracker"; } 1175*5ffd83dbSDimitry Andric 1176*5ffd83dbSDimitry Andric /// See AbstractAttribute::getIdAddr() 1177*5ffd83dbSDimitry Andric const char *getIdAddr() const override { return &ID; } 1178*5ffd83dbSDimitry Andric 1179*5ffd83dbSDimitry Andric /// This function should return true if the type of the \p AA is AAICVTracker 1180*5ffd83dbSDimitry Andric static bool classof(const AbstractAttribute *AA) { 1181*5ffd83dbSDimitry Andric return (AA->getIdAddr() == &ID); 1182*5ffd83dbSDimitry Andric } 1183*5ffd83dbSDimitry Andric 1184*5ffd83dbSDimitry Andric static const char ID; 1185*5ffd83dbSDimitry Andric }; 1186*5ffd83dbSDimitry Andric 1187*5ffd83dbSDimitry Andric struct AAICVTrackerFunction : public AAICVTracker { 1188*5ffd83dbSDimitry Andric AAICVTrackerFunction(const IRPosition &IRP, Attributor &A) 1189*5ffd83dbSDimitry Andric : AAICVTracker(IRP, A) {} 1190*5ffd83dbSDimitry Andric 1191*5ffd83dbSDimitry Andric // FIXME: come up with better string. 1192*5ffd83dbSDimitry Andric const std::string getAsStr() const override { return "ICVTracker"; } 1193*5ffd83dbSDimitry Andric 1194*5ffd83dbSDimitry Andric // FIXME: come up with some stats. 1195*5ffd83dbSDimitry Andric void trackStatistics() const override {} 1196*5ffd83dbSDimitry Andric 1197*5ffd83dbSDimitry Andric /// TODO: decide whether to deduplicate here, or use current 1198*5ffd83dbSDimitry Andric /// deduplicateRuntimeCalls function. 1199*5ffd83dbSDimitry Andric ChangeStatus manifest(Attributor &A) override { 1200*5ffd83dbSDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED; 1201*5ffd83dbSDimitry Andric 1202*5ffd83dbSDimitry Andric for (InternalControlVar &ICV : TrackableICVs) 1203*5ffd83dbSDimitry Andric if (deduplicateICVGetters(ICV, A)) 1204*5ffd83dbSDimitry Andric Changed = ChangeStatus::CHANGED; 1205*5ffd83dbSDimitry Andric 1206*5ffd83dbSDimitry Andric return Changed; 1207*5ffd83dbSDimitry Andric } 1208*5ffd83dbSDimitry Andric 1209*5ffd83dbSDimitry Andric bool deduplicateICVGetters(InternalControlVar &ICV, Attributor &A) { 1210*5ffd83dbSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 1211*5ffd83dbSDimitry Andric auto &ICVInfo = OMPInfoCache.ICVs[ICV]; 1212*5ffd83dbSDimitry Andric auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter]; 1213*5ffd83dbSDimitry Andric 1214*5ffd83dbSDimitry Andric bool Changed = false; 1215*5ffd83dbSDimitry Andric 1216*5ffd83dbSDimitry Andric auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { 1217*5ffd83dbSDimitry Andric CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI); 1218*5ffd83dbSDimitry Andric Instruction *UserI = cast<Instruction>(U.getUser()); 1219*5ffd83dbSDimitry Andric Value *ReplVal = getReplacementValue(ICV, UserI, A); 1220*5ffd83dbSDimitry Andric 1221*5ffd83dbSDimitry Andric if (!ReplVal || !CI) 1222*5ffd83dbSDimitry Andric return false; 1223*5ffd83dbSDimitry Andric 1224*5ffd83dbSDimitry Andric A.removeCallSite(CI); 1225*5ffd83dbSDimitry Andric CI->replaceAllUsesWith(ReplVal); 1226*5ffd83dbSDimitry Andric CI->eraseFromParent(); 1227*5ffd83dbSDimitry Andric Changed = true; 1228*5ffd83dbSDimitry Andric return true; 1229*5ffd83dbSDimitry Andric }; 1230*5ffd83dbSDimitry Andric 1231*5ffd83dbSDimitry Andric GetterRFI.foreachUse(ReplaceAndDeleteCB, getAnchorScope()); 1232*5ffd83dbSDimitry Andric return Changed; 1233*5ffd83dbSDimitry Andric } 1234*5ffd83dbSDimitry Andric 1235*5ffd83dbSDimitry Andric // Map of ICV to their values at specific program point. 1236*5ffd83dbSDimitry Andric EnumeratedArray<SmallSetVector<ICVValue, 4>, InternalControlVar, 1237*5ffd83dbSDimitry Andric InternalControlVar::ICV___last> 1238*5ffd83dbSDimitry Andric ICVValuesMap; 1239*5ffd83dbSDimitry Andric 1240*5ffd83dbSDimitry Andric // Currently only nthreads is being tracked. 1241*5ffd83dbSDimitry Andric // this array will only grow with time. 1242*5ffd83dbSDimitry Andric InternalControlVar TrackableICVs[1] = {ICV_nthreads}; 1243*5ffd83dbSDimitry Andric 1244*5ffd83dbSDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 1245*5ffd83dbSDimitry Andric ChangeStatus HasChanged = ChangeStatus::UNCHANGED; 1246*5ffd83dbSDimitry Andric 1247*5ffd83dbSDimitry Andric Function *F = getAnchorScope(); 1248*5ffd83dbSDimitry Andric 1249*5ffd83dbSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 1250*5ffd83dbSDimitry Andric 1251*5ffd83dbSDimitry Andric for (InternalControlVar ICV : TrackableICVs) { 1252*5ffd83dbSDimitry Andric auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; 1253*5ffd83dbSDimitry Andric 1254*5ffd83dbSDimitry Andric auto TrackValues = [&](Use &U, Function &) { 1255*5ffd83dbSDimitry Andric CallInst *CI = OpenMPOpt::getCallIfRegularCall(U); 1256*5ffd83dbSDimitry Andric if (!CI) 1257*5ffd83dbSDimitry Andric return false; 1258*5ffd83dbSDimitry Andric 1259*5ffd83dbSDimitry Andric // FIXME: handle setters with more that 1 arguments. 1260*5ffd83dbSDimitry Andric /// Track new value. 1261*5ffd83dbSDimitry Andric if (ICVValuesMap[ICV].insert(ICVValue(CI, CI->getArgOperand(0)))) 1262*5ffd83dbSDimitry Andric HasChanged = ChangeStatus::CHANGED; 1263*5ffd83dbSDimitry Andric 1264*5ffd83dbSDimitry Andric return false; 1265*5ffd83dbSDimitry Andric }; 1266*5ffd83dbSDimitry Andric 1267*5ffd83dbSDimitry Andric SetterRFI.foreachUse(TrackValues, F); 1268*5ffd83dbSDimitry Andric } 1269*5ffd83dbSDimitry Andric 1270*5ffd83dbSDimitry Andric return HasChanged; 1271*5ffd83dbSDimitry Andric } 1272*5ffd83dbSDimitry Andric 1273*5ffd83dbSDimitry Andric /// Return the value with which \p I can be replaced for specific \p ICV. 1274*5ffd83dbSDimitry Andric Value *getReplacementValue(InternalControlVar ICV, const Instruction *I, 1275*5ffd83dbSDimitry Andric Attributor &A) override { 1276*5ffd83dbSDimitry Andric const BasicBlock *CurrBB = I->getParent(); 1277*5ffd83dbSDimitry Andric 1278*5ffd83dbSDimitry Andric auto &ValuesSet = ICVValuesMap[ICV]; 1279*5ffd83dbSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 1280*5ffd83dbSDimitry Andric auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter]; 1281*5ffd83dbSDimitry Andric 1282*5ffd83dbSDimitry Andric for (const auto &ICVVal : ValuesSet) { 1283*5ffd83dbSDimitry Andric if (CurrBB == ICVVal.Inst->getParent()) { 1284*5ffd83dbSDimitry Andric if (!ICVVal.Inst->comesBefore(I)) 1285*5ffd83dbSDimitry Andric continue; 1286*5ffd83dbSDimitry Andric 1287*5ffd83dbSDimitry Andric // both instructions are in the same BB and at \p I we know the ICV 1288*5ffd83dbSDimitry Andric // value. 1289*5ffd83dbSDimitry Andric while (I != ICVVal.Inst) { 1290*5ffd83dbSDimitry Andric // we don't yet know if a call might update an ICV. 1291*5ffd83dbSDimitry Andric // TODO: check callsite AA for value. 1292*5ffd83dbSDimitry Andric if (const auto *CB = dyn_cast<CallBase>(I)) 1293*5ffd83dbSDimitry Andric if (CB->getCalledFunction() != GetterRFI.Declaration) 1294*5ffd83dbSDimitry Andric return nullptr; 1295*5ffd83dbSDimitry Andric 1296*5ffd83dbSDimitry Andric I = I->getPrevNode(); 1297*5ffd83dbSDimitry Andric } 1298*5ffd83dbSDimitry Andric 1299*5ffd83dbSDimitry Andric // No call in between, return the value. 1300*5ffd83dbSDimitry Andric return ICVVal.TrackedValue; 1301*5ffd83dbSDimitry Andric } 1302*5ffd83dbSDimitry Andric } 1303*5ffd83dbSDimitry Andric 1304*5ffd83dbSDimitry Andric // No value was tracked. 1305*5ffd83dbSDimitry Andric return nullptr; 1306*5ffd83dbSDimitry Andric } 1307*5ffd83dbSDimitry Andric }; 1308*5ffd83dbSDimitry Andric } // namespace 1309*5ffd83dbSDimitry Andric 1310*5ffd83dbSDimitry Andric const char AAICVTracker::ID = 0; 1311*5ffd83dbSDimitry Andric 1312*5ffd83dbSDimitry Andric AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP, 1313*5ffd83dbSDimitry Andric Attributor &A) { 1314*5ffd83dbSDimitry Andric AAICVTracker *AA = nullptr; 1315*5ffd83dbSDimitry Andric switch (IRP.getPositionKind()) { 1316*5ffd83dbSDimitry Andric case IRPosition::IRP_INVALID: 1317*5ffd83dbSDimitry Andric case IRPosition::IRP_FLOAT: 1318*5ffd83dbSDimitry Andric case IRPosition::IRP_ARGUMENT: 1319*5ffd83dbSDimitry Andric case IRPosition::IRP_RETURNED: 1320*5ffd83dbSDimitry Andric case IRPosition::IRP_CALL_SITE_RETURNED: 1321*5ffd83dbSDimitry Andric case IRPosition::IRP_CALL_SITE_ARGUMENT: 1322*5ffd83dbSDimitry Andric case IRPosition::IRP_CALL_SITE: 1323*5ffd83dbSDimitry Andric llvm_unreachable("ICVTracker can only be created for function position!"); 1324*5ffd83dbSDimitry Andric case IRPosition::IRP_FUNCTION: 1325*5ffd83dbSDimitry Andric AA = new (A.Allocator) AAICVTrackerFunction(IRP, A); 1326*5ffd83dbSDimitry Andric break; 1327*5ffd83dbSDimitry Andric } 1328*5ffd83dbSDimitry Andric 1329*5ffd83dbSDimitry Andric return *AA; 1330*5ffd83dbSDimitry Andric } 1331*5ffd83dbSDimitry Andric 1332*5ffd83dbSDimitry Andric PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C, 1333*5ffd83dbSDimitry Andric CGSCCAnalysisManager &AM, 1334*5ffd83dbSDimitry Andric LazyCallGraph &CG, CGSCCUpdateResult &UR) { 1335*5ffd83dbSDimitry Andric if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule)) 1336*5ffd83dbSDimitry Andric return PreservedAnalyses::all(); 1337*5ffd83dbSDimitry Andric 1338*5ffd83dbSDimitry Andric if (DisableOpenMPOptimizations) 1339*5ffd83dbSDimitry Andric return PreservedAnalyses::all(); 1340*5ffd83dbSDimitry Andric 1341*5ffd83dbSDimitry Andric SmallVector<Function *, 16> SCC; 1342*5ffd83dbSDimitry Andric for (LazyCallGraph::Node &N : C) 1343*5ffd83dbSDimitry Andric SCC.push_back(&N.getFunction()); 1344*5ffd83dbSDimitry Andric 1345*5ffd83dbSDimitry Andric if (SCC.empty()) 1346*5ffd83dbSDimitry Andric return PreservedAnalyses::all(); 1347*5ffd83dbSDimitry Andric 1348*5ffd83dbSDimitry Andric FunctionAnalysisManager &FAM = 1349*5ffd83dbSDimitry Andric AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 1350*5ffd83dbSDimitry Andric 1351*5ffd83dbSDimitry Andric AnalysisGetter AG(FAM); 1352*5ffd83dbSDimitry Andric 1353*5ffd83dbSDimitry Andric auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { 1354*5ffd83dbSDimitry Andric return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 1355*5ffd83dbSDimitry Andric }; 1356*5ffd83dbSDimitry Andric 1357*5ffd83dbSDimitry Andric CallGraphUpdater CGUpdater; 1358*5ffd83dbSDimitry Andric CGUpdater.initialize(CG, C, AM, UR); 1359*5ffd83dbSDimitry Andric 1360*5ffd83dbSDimitry Andric SetVector<Function *> Functions(SCC.begin(), SCC.end()); 1361*5ffd83dbSDimitry Andric BumpPtrAllocator Allocator; 1362*5ffd83dbSDimitry Andric OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, 1363*5ffd83dbSDimitry Andric /*CGSCC*/ Functions, OMPInModule.getKernels()); 1364*5ffd83dbSDimitry Andric 1365*5ffd83dbSDimitry Andric Attributor A(Functions, InfoCache, CGUpdater); 1366*5ffd83dbSDimitry Andric 1367*5ffd83dbSDimitry Andric // TODO: Compute the module slice we are allowed to look at. 1368*5ffd83dbSDimitry Andric OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); 1369*5ffd83dbSDimitry Andric bool Changed = OMPOpt.run(); 1370*5ffd83dbSDimitry Andric if (Changed) 1371*5ffd83dbSDimitry Andric return PreservedAnalyses::none(); 1372*5ffd83dbSDimitry Andric 1373*5ffd83dbSDimitry Andric return PreservedAnalyses::all(); 1374*5ffd83dbSDimitry Andric } 1375*5ffd83dbSDimitry Andric 1376*5ffd83dbSDimitry Andric namespace { 1377*5ffd83dbSDimitry Andric 1378*5ffd83dbSDimitry Andric struct OpenMPOptLegacyPass : public CallGraphSCCPass { 1379*5ffd83dbSDimitry Andric CallGraphUpdater CGUpdater; 1380*5ffd83dbSDimitry Andric OpenMPInModule OMPInModule; 1381*5ffd83dbSDimitry Andric static char ID; 1382*5ffd83dbSDimitry Andric 1383*5ffd83dbSDimitry Andric OpenMPOptLegacyPass() : CallGraphSCCPass(ID) { 1384*5ffd83dbSDimitry Andric initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry()); 1385*5ffd83dbSDimitry Andric } 1386*5ffd83dbSDimitry Andric 1387*5ffd83dbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 1388*5ffd83dbSDimitry Andric CallGraphSCCPass::getAnalysisUsage(AU); 1389*5ffd83dbSDimitry Andric } 1390*5ffd83dbSDimitry Andric 1391*5ffd83dbSDimitry Andric bool doInitialization(CallGraph &CG) override { 1392*5ffd83dbSDimitry Andric // Disable the pass if there is no OpenMP (runtime call) in the module. 1393*5ffd83dbSDimitry Andric containsOpenMP(CG.getModule(), OMPInModule); 1394*5ffd83dbSDimitry Andric return false; 1395*5ffd83dbSDimitry Andric } 1396*5ffd83dbSDimitry Andric 1397*5ffd83dbSDimitry Andric bool runOnSCC(CallGraphSCC &CGSCC) override { 1398*5ffd83dbSDimitry Andric if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule)) 1399*5ffd83dbSDimitry Andric return false; 1400*5ffd83dbSDimitry Andric if (DisableOpenMPOptimizations || skipSCC(CGSCC)) 1401*5ffd83dbSDimitry Andric return false; 1402*5ffd83dbSDimitry Andric 1403*5ffd83dbSDimitry Andric SmallVector<Function *, 16> SCC; 1404*5ffd83dbSDimitry Andric for (CallGraphNode *CGN : CGSCC) 1405*5ffd83dbSDimitry Andric if (Function *Fn = CGN->getFunction()) 1406*5ffd83dbSDimitry Andric if (!Fn->isDeclaration()) 1407*5ffd83dbSDimitry Andric SCC.push_back(Fn); 1408*5ffd83dbSDimitry Andric 1409*5ffd83dbSDimitry Andric if (SCC.empty()) 1410*5ffd83dbSDimitry Andric return false; 1411*5ffd83dbSDimitry Andric 1412*5ffd83dbSDimitry Andric CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); 1413*5ffd83dbSDimitry Andric CGUpdater.initialize(CG, CGSCC); 1414*5ffd83dbSDimitry Andric 1415*5ffd83dbSDimitry Andric // Maintain a map of functions to avoid rebuilding the ORE 1416*5ffd83dbSDimitry Andric DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap; 1417*5ffd83dbSDimitry Andric auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & { 1418*5ffd83dbSDimitry Andric std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F]; 1419*5ffd83dbSDimitry Andric if (!ORE) 1420*5ffd83dbSDimitry Andric ORE = std::make_unique<OptimizationRemarkEmitter>(F); 1421*5ffd83dbSDimitry Andric return *ORE; 1422*5ffd83dbSDimitry Andric }; 1423*5ffd83dbSDimitry Andric 1424*5ffd83dbSDimitry Andric AnalysisGetter AG; 1425*5ffd83dbSDimitry Andric SetVector<Function *> Functions(SCC.begin(), SCC.end()); 1426*5ffd83dbSDimitry Andric BumpPtrAllocator Allocator; 1427*5ffd83dbSDimitry Andric OMPInformationCache InfoCache( 1428*5ffd83dbSDimitry Andric *(Functions.back()->getParent()), AG, Allocator, 1429*5ffd83dbSDimitry Andric /*CGSCC*/ Functions, OMPInModule.getKernels()); 1430*5ffd83dbSDimitry Andric 1431*5ffd83dbSDimitry Andric Attributor A(Functions, InfoCache, CGUpdater); 1432*5ffd83dbSDimitry Andric 1433*5ffd83dbSDimitry Andric // TODO: Compute the module slice we are allowed to look at. 1434*5ffd83dbSDimitry Andric OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); 1435*5ffd83dbSDimitry Andric return OMPOpt.run(); 1436*5ffd83dbSDimitry Andric } 1437*5ffd83dbSDimitry Andric 1438*5ffd83dbSDimitry Andric bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } 1439*5ffd83dbSDimitry Andric }; 1440*5ffd83dbSDimitry Andric 1441*5ffd83dbSDimitry Andric } // end anonymous namespace 1442*5ffd83dbSDimitry Andric 1443*5ffd83dbSDimitry Andric void OpenMPInModule::identifyKernels(Module &M) { 1444*5ffd83dbSDimitry Andric 1445*5ffd83dbSDimitry Andric NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); 1446*5ffd83dbSDimitry Andric if (!MD) 1447*5ffd83dbSDimitry Andric return; 1448*5ffd83dbSDimitry Andric 1449*5ffd83dbSDimitry Andric for (auto *Op : MD->operands()) { 1450*5ffd83dbSDimitry Andric if (Op->getNumOperands() < 2) 1451*5ffd83dbSDimitry Andric continue; 1452*5ffd83dbSDimitry Andric MDString *KindID = dyn_cast<MDString>(Op->getOperand(1)); 1453*5ffd83dbSDimitry Andric if (!KindID || KindID->getString() != "kernel") 1454*5ffd83dbSDimitry Andric continue; 1455*5ffd83dbSDimitry Andric 1456*5ffd83dbSDimitry Andric Function *KernelFn = 1457*5ffd83dbSDimitry Andric mdconst::dyn_extract_or_null<Function>(Op->getOperand(0)); 1458*5ffd83dbSDimitry Andric if (!KernelFn) 1459*5ffd83dbSDimitry Andric continue; 1460*5ffd83dbSDimitry Andric 1461*5ffd83dbSDimitry Andric ++NumOpenMPTargetRegionKernels; 1462*5ffd83dbSDimitry Andric 1463*5ffd83dbSDimitry Andric Kernels.insert(KernelFn); 1464*5ffd83dbSDimitry Andric } 1465*5ffd83dbSDimitry Andric } 1466*5ffd83dbSDimitry Andric 1467*5ffd83dbSDimitry Andric bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) { 1468*5ffd83dbSDimitry Andric if (OMPInModule.isKnown()) 1469*5ffd83dbSDimitry Andric return OMPInModule; 1470*5ffd83dbSDimitry Andric 1471*5ffd83dbSDimitry Andric // MSVC doesn't like long if-else chains for some reason and instead just 1472*5ffd83dbSDimitry Andric // issues an error. Work around it.. 1473*5ffd83dbSDimitry Andric do { 1474*5ffd83dbSDimitry Andric #define OMP_RTL(_Enum, _Name, ...) \ 1475*5ffd83dbSDimitry Andric if (M.getFunction(_Name)) { \ 1476*5ffd83dbSDimitry Andric OMPInModule = true; \ 1477*5ffd83dbSDimitry Andric break; \ 1478*5ffd83dbSDimitry Andric } 1479*5ffd83dbSDimitry Andric #include "llvm/Frontend/OpenMP/OMPKinds.def" 1480*5ffd83dbSDimitry Andric } while (false); 1481*5ffd83dbSDimitry Andric 1482*5ffd83dbSDimitry Andric // Identify kernels once. TODO: We should split the OMPInformationCache into a 1483*5ffd83dbSDimitry Andric // module and an SCC part. The kernel information, among other things, could 1484*5ffd83dbSDimitry Andric // go into the module part. 1485*5ffd83dbSDimitry Andric if (OMPInModule.isKnown() && OMPInModule) { 1486*5ffd83dbSDimitry Andric OMPInModule.identifyKernels(M); 1487*5ffd83dbSDimitry Andric return true; 1488*5ffd83dbSDimitry Andric } 1489*5ffd83dbSDimitry Andric 1490*5ffd83dbSDimitry Andric return OMPInModule = false; 1491*5ffd83dbSDimitry Andric } 1492*5ffd83dbSDimitry Andric 1493*5ffd83dbSDimitry Andric char OpenMPOptLegacyPass::ID = 0; 1494*5ffd83dbSDimitry Andric 1495*5ffd83dbSDimitry Andric INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt", 1496*5ffd83dbSDimitry Andric "OpenMP specific optimizations", false, false) 1497*5ffd83dbSDimitry Andric INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) 1498*5ffd83dbSDimitry Andric INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt", 1499*5ffd83dbSDimitry Andric "OpenMP specific optimizations", false, false) 1500*5ffd83dbSDimitry Andric 1501*5ffd83dbSDimitry Andric Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); } 1502