15ffd83dbSDimitry Andric //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric // OpenMP specific optimizations: 105ffd83dbSDimitry Andric // 115ffd83dbSDimitry Andric // - Deduplication of runtime calls, e.g., omp_get_thread_num. 12fe6060f1SDimitry Andric // - Replacing globalized device memory with stack memory. 13fe6060f1SDimitry Andric // - Replacing globalized device memory with shared memory. 14fe6060f1SDimitry Andric // - Parallel region merging. 15fe6060f1SDimitry Andric // - Transforming generic-mode device kernels to SPMD mode. 16fe6060f1SDimitry Andric // - Specializing the state machine for generic-mode device kernels. 175ffd83dbSDimitry Andric // 185ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 195ffd83dbSDimitry Andric 205ffd83dbSDimitry Andric #include "llvm/Transforms/IPO/OpenMPOpt.h" 215ffd83dbSDimitry Andric 225ffd83dbSDimitry Andric #include "llvm/ADT/EnumeratedArray.h" 23fe6060f1SDimitry Andric #include "llvm/ADT/PostOrderIterator.h" 2404eeddc0SDimitry Andric #include "llvm/ADT/SetVector.h" 25*06c3fb27SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 26bdd1243dSDimitry Andric #include "llvm/ADT/SmallVector.h" 275ffd83dbSDimitry Andric #include "llvm/ADT/Statistic.h" 28*06c3fb27SDimitry Andric #include "llvm/ADT/StringExtras.h" 29349cc55cSDimitry Andric #include "llvm/ADT/StringRef.h" 305ffd83dbSDimitry Andric #include "llvm/Analysis/CallGraph.h" 315ffd83dbSDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h" 321fd87a68SDimitry Andric #include "llvm/Analysis/MemoryLocation.h" 335ffd83dbSDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 34e8d8bef9SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 355ffd83dbSDimitry Andric #include "llvm/Frontend/OpenMP/OMPConstants.h" 365ffd83dbSDimitry Andric #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 37fe6060f1SDimitry Andric #include "llvm/IR/Assumptions.h" 38bdd1243dSDimitry Andric #include "llvm/IR/BasicBlock.h" 391fd87a68SDimitry Andric #include "llvm/IR/Constants.h" 40fe6060f1SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 41*06c3fb27SDimitry Andric #include "llvm/IR/Dominators.h" 42*06c3fb27SDimitry Andric #include "llvm/IR/Function.h" 43fe6060f1SDimitry Andric #include "llvm/IR/GlobalValue.h" 441fd87a68SDimitry Andric #include "llvm/IR/GlobalVariable.h" 45fe6060f1SDimitry Andric #include "llvm/IR/Instruction.h" 461fd87a68SDimitry Andric #include "llvm/IR/Instructions.h" 47fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 48349cc55cSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 49349cc55cSDimitry Andric #include "llvm/IR/IntrinsicsNVPTX.h" 501fd87a68SDimitry Andric #include "llvm/IR/LLVMContext.h" 51*06c3fb27SDimitry Andric #include "llvm/Support/Casting.h" 525ffd83dbSDimitry Andric #include "llvm/Support/CommandLine.h" 531fd87a68SDimitry Andric #include "llvm/Support/Debug.h" 545ffd83dbSDimitry Andric #include "llvm/Transforms/IPO/Attributor.h" 55e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 565ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/CallGraphUpdater.h" 575ffd83dbSDimitry Andric 58349cc55cSDimitry Andric #include <algorithm> 59bdd1243dSDimitry Andric #include <optional> 60bdd1243dSDimitry Andric #include <string> 61349cc55cSDimitry Andric 625ffd83dbSDimitry Andric using namespace llvm; 635ffd83dbSDimitry Andric using namespace omp; 645ffd83dbSDimitry Andric 655ffd83dbSDimitry Andric #define DEBUG_TYPE "openmp-opt" 665ffd83dbSDimitry Andric 675ffd83dbSDimitry Andric static cl::opt<bool> DisableOpenMPOptimizations( 6881ad6265SDimitry Andric "openmp-opt-disable", cl::desc("Disable OpenMP specific optimizations."), 6981ad6265SDimitry Andric cl::Hidden, cl::init(false)); 705ffd83dbSDimitry Andric 71e8d8bef9SDimitry Andric static cl::opt<bool> EnableParallelRegionMerging( 7281ad6265SDimitry Andric "openmp-opt-enable-merging", 73e8d8bef9SDimitry Andric cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, 74e8d8bef9SDimitry Andric cl::init(false)); 75e8d8bef9SDimitry Andric 76fe6060f1SDimitry Andric static cl::opt<bool> 7781ad6265SDimitry Andric DisableInternalization("openmp-opt-disable-internalization", 78fe6060f1SDimitry Andric cl::desc("Disable function internalization."), 79fe6060f1SDimitry Andric cl::Hidden, cl::init(false)); 80fe6060f1SDimitry Andric 81bdd1243dSDimitry Andric static cl::opt<bool> DeduceICVValues("openmp-deduce-icv-values", 82bdd1243dSDimitry Andric cl::init(false), cl::Hidden); 835ffd83dbSDimitry Andric static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false), 845ffd83dbSDimitry Andric cl::Hidden); 855ffd83dbSDimitry Andric static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels", 865ffd83dbSDimitry Andric cl::init(false), cl::Hidden); 875ffd83dbSDimitry Andric 88e8d8bef9SDimitry Andric static cl::opt<bool> HideMemoryTransferLatency( 89e8d8bef9SDimitry Andric "openmp-hide-memory-transfer-latency", 90e8d8bef9SDimitry Andric cl::desc("[WIP] Tries to hide the latency of host to device memory" 91e8d8bef9SDimitry Andric " transfers"), 92e8d8bef9SDimitry Andric cl::Hidden, cl::init(false)); 93e8d8bef9SDimitry Andric 94349cc55cSDimitry Andric static cl::opt<bool> DisableOpenMPOptDeglobalization( 9581ad6265SDimitry Andric "openmp-opt-disable-deglobalization", 96349cc55cSDimitry Andric cl::desc("Disable OpenMP optimizations involving deglobalization."), 97349cc55cSDimitry Andric cl::Hidden, cl::init(false)); 98349cc55cSDimitry Andric 99349cc55cSDimitry Andric static cl::opt<bool> DisableOpenMPOptSPMDization( 10081ad6265SDimitry Andric "openmp-opt-disable-spmdization", 101349cc55cSDimitry Andric cl::desc("Disable OpenMP optimizations involving SPMD-ization."), 102349cc55cSDimitry Andric cl::Hidden, cl::init(false)); 103349cc55cSDimitry Andric 104349cc55cSDimitry Andric static cl::opt<bool> DisableOpenMPOptFolding( 10581ad6265SDimitry Andric "openmp-opt-disable-folding", 106349cc55cSDimitry Andric cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden, 107349cc55cSDimitry Andric cl::init(false)); 108349cc55cSDimitry Andric 109349cc55cSDimitry Andric static cl::opt<bool> DisableOpenMPOptStateMachineRewrite( 11081ad6265SDimitry Andric "openmp-opt-disable-state-machine-rewrite", 111349cc55cSDimitry Andric cl::desc("Disable OpenMP optimizations that replace the state machine."), 112349cc55cSDimitry Andric cl::Hidden, cl::init(false)); 113349cc55cSDimitry Andric 1141fd87a68SDimitry Andric static cl::opt<bool> DisableOpenMPOptBarrierElimination( 11581ad6265SDimitry Andric "openmp-opt-disable-barrier-elimination", 1161fd87a68SDimitry Andric cl::desc("Disable OpenMP optimizations that eliminate barriers."), 1171fd87a68SDimitry Andric cl::Hidden, cl::init(false)); 1181fd87a68SDimitry Andric 119349cc55cSDimitry Andric static cl::opt<bool> PrintModuleAfterOptimizations( 12081ad6265SDimitry Andric "openmp-opt-print-module-after", 121349cc55cSDimitry Andric cl::desc("Print the current module after OpenMP optimizations."), 122349cc55cSDimitry Andric cl::Hidden, cl::init(false)); 123349cc55cSDimitry Andric 12481ad6265SDimitry Andric static cl::opt<bool> PrintModuleBeforeOptimizations( 12581ad6265SDimitry Andric "openmp-opt-print-module-before", 12681ad6265SDimitry Andric cl::desc("Print the current module before OpenMP optimizations."), 12781ad6265SDimitry Andric cl::Hidden, cl::init(false)); 12881ad6265SDimitry Andric 129349cc55cSDimitry Andric static cl::opt<bool> AlwaysInlineDeviceFunctions( 13081ad6265SDimitry Andric "openmp-opt-inline-device", 131349cc55cSDimitry Andric cl::desc("Inline all applicible functions on the device."), cl::Hidden, 132349cc55cSDimitry Andric cl::init(false)); 133349cc55cSDimitry Andric 134349cc55cSDimitry Andric static cl::opt<bool> 13581ad6265SDimitry Andric EnableVerboseRemarks("openmp-opt-verbose-remarks", 136349cc55cSDimitry Andric cl::desc("Enables more verbose remarks."), cl::Hidden, 137349cc55cSDimitry Andric cl::init(false)); 138349cc55cSDimitry Andric 139349cc55cSDimitry Andric static cl::opt<unsigned> 140349cc55cSDimitry Andric SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden, 141349cc55cSDimitry Andric cl::desc("Maximal number of attributor iterations."), 142349cc55cSDimitry Andric cl::init(256)); 143349cc55cSDimitry Andric 14481ad6265SDimitry Andric static cl::opt<unsigned> 14581ad6265SDimitry Andric SharedMemoryLimit("openmp-opt-shared-limit", cl::Hidden, 14681ad6265SDimitry Andric cl::desc("Maximum amount of shared memory to use."), 14781ad6265SDimitry Andric cl::init(std::numeric_limits<unsigned>::max())); 14881ad6265SDimitry Andric 1495ffd83dbSDimitry Andric STATISTIC(NumOpenMPRuntimeCallsDeduplicated, 1505ffd83dbSDimitry Andric "Number of OpenMP runtime calls deduplicated"); 1515ffd83dbSDimitry Andric STATISTIC(NumOpenMPParallelRegionsDeleted, 1525ffd83dbSDimitry Andric "Number of OpenMP parallel regions deleted"); 1535ffd83dbSDimitry Andric STATISTIC(NumOpenMPRuntimeFunctionsIdentified, 1545ffd83dbSDimitry Andric "Number of OpenMP runtime functions identified"); 1555ffd83dbSDimitry Andric STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, 1565ffd83dbSDimitry Andric "Number of OpenMP runtime function uses identified"); 1575ffd83dbSDimitry Andric STATISTIC(NumOpenMPTargetRegionKernels, 1585ffd83dbSDimitry Andric "Number of OpenMP target region entry points (=kernels) identified"); 159fe6060f1SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernelsSPMD, 160fe6060f1SDimitry Andric "Number of OpenMP target region entry points (=kernels) executed in " 161fe6060f1SDimitry Andric "SPMD-mode instead of generic-mode"); 162fe6060f1SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernelsWithoutStateMachine, 163fe6060f1SDimitry Andric "Number of OpenMP target region entry points (=kernels) executed in " 164fe6060f1SDimitry Andric "generic-mode without a state machines"); 165fe6060f1SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback, 166fe6060f1SDimitry Andric "Number of OpenMP target region entry points (=kernels) executed in " 167fe6060f1SDimitry Andric "generic-mode with customized state machines with fallback"); 168fe6060f1SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback, 169fe6060f1SDimitry Andric "Number of OpenMP target region entry points (=kernels) executed in " 170fe6060f1SDimitry Andric "generic-mode with customized state machines without fallback"); 1715ffd83dbSDimitry Andric STATISTIC( 1725ffd83dbSDimitry Andric NumOpenMPParallelRegionsReplacedInGPUStateMachine, 1735ffd83dbSDimitry Andric "Number of OpenMP parallel regions replaced with ID in GPU state machines"); 174e8d8bef9SDimitry Andric STATISTIC(NumOpenMPParallelRegionsMerged, 175e8d8bef9SDimitry Andric "Number of OpenMP parallel regions merged"); 176fe6060f1SDimitry Andric STATISTIC(NumBytesMovedToSharedMemory, 177fe6060f1SDimitry Andric "Amount of memory pushed to shared memory"); 1781fd87a68SDimitry Andric STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated"); 1795ffd83dbSDimitry Andric 1805ffd83dbSDimitry Andric #if !defined(NDEBUG) 1815ffd83dbSDimitry Andric static constexpr auto TAG = "[" DEBUG_TYPE "]"; 1825ffd83dbSDimitry Andric #endif 1835ffd83dbSDimitry Andric 1845ffd83dbSDimitry Andric namespace { 1855ffd83dbSDimitry Andric 186fe6060f1SDimitry Andric struct AAHeapToShared; 187fe6060f1SDimitry Andric 1885ffd83dbSDimitry Andric struct AAICVTracker; 1895ffd83dbSDimitry Andric 1905ffd83dbSDimitry Andric /// OpenMP specific information. For now, stores RFIs and ICVs also needed for 1915ffd83dbSDimitry Andric /// Attributor runs. 1925ffd83dbSDimitry Andric struct OMPInformationCache : public InformationCache { 1935ffd83dbSDimitry Andric OMPInformationCache(Module &M, AnalysisGetter &AG, 194bdd1243dSDimitry Andric BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC, 195*06c3fb27SDimitry Andric bool OpenMPPostLink) 196bdd1243dSDimitry Andric : InformationCache(M, AG, Allocator, CGSCC), OMPBuilder(M), 197*06c3fb27SDimitry Andric OpenMPPostLink(OpenMPPostLink) { 1985ffd83dbSDimitry Andric 1995ffd83dbSDimitry Andric OMPBuilder.initialize(); 200bdd1243dSDimitry Andric initializeRuntimeFunctions(M); 2015ffd83dbSDimitry Andric initializeInternalControlVars(); 2025ffd83dbSDimitry Andric } 2035ffd83dbSDimitry Andric 2045ffd83dbSDimitry Andric /// Generic information that describes an internal control variable. 2055ffd83dbSDimitry Andric struct InternalControlVarInfo { 2065ffd83dbSDimitry Andric /// The kind, as described by InternalControlVar enum. 2075ffd83dbSDimitry Andric InternalControlVar Kind; 2085ffd83dbSDimitry Andric 2095ffd83dbSDimitry Andric /// The name of the ICV. 2105ffd83dbSDimitry Andric StringRef Name; 2115ffd83dbSDimitry Andric 2125ffd83dbSDimitry Andric /// Environment variable associated with this ICV. 2135ffd83dbSDimitry Andric StringRef EnvVarName; 2145ffd83dbSDimitry Andric 2155ffd83dbSDimitry Andric /// Initial value kind. 2165ffd83dbSDimitry Andric ICVInitValue InitKind; 2175ffd83dbSDimitry Andric 2185ffd83dbSDimitry Andric /// Initial value. 2195ffd83dbSDimitry Andric ConstantInt *InitValue; 2205ffd83dbSDimitry Andric 2215ffd83dbSDimitry Andric /// Setter RTL function associated with this ICV. 2225ffd83dbSDimitry Andric RuntimeFunction Setter; 2235ffd83dbSDimitry Andric 2245ffd83dbSDimitry Andric /// Getter RTL function associated with this ICV. 2255ffd83dbSDimitry Andric RuntimeFunction Getter; 2265ffd83dbSDimitry Andric 2275ffd83dbSDimitry Andric /// RTL Function corresponding to the override clause of this ICV 2285ffd83dbSDimitry Andric RuntimeFunction Clause; 2295ffd83dbSDimitry Andric }; 2305ffd83dbSDimitry Andric 2315ffd83dbSDimitry Andric /// Generic information that describes a runtime function 2325ffd83dbSDimitry Andric struct RuntimeFunctionInfo { 2335ffd83dbSDimitry Andric 2345ffd83dbSDimitry Andric /// The kind, as described by the RuntimeFunction enum. 2355ffd83dbSDimitry Andric RuntimeFunction Kind; 2365ffd83dbSDimitry Andric 2375ffd83dbSDimitry Andric /// The name of the function. 2385ffd83dbSDimitry Andric StringRef Name; 2395ffd83dbSDimitry Andric 2405ffd83dbSDimitry Andric /// Flag to indicate a variadic function. 2415ffd83dbSDimitry Andric bool IsVarArg; 2425ffd83dbSDimitry Andric 2435ffd83dbSDimitry Andric /// The return type of the function. 2445ffd83dbSDimitry Andric Type *ReturnType; 2455ffd83dbSDimitry Andric 2465ffd83dbSDimitry Andric /// The argument types of the function. 2475ffd83dbSDimitry Andric SmallVector<Type *, 8> ArgumentTypes; 2485ffd83dbSDimitry Andric 2495ffd83dbSDimitry Andric /// The declaration if available. 2505ffd83dbSDimitry Andric Function *Declaration = nullptr; 2515ffd83dbSDimitry Andric 2525ffd83dbSDimitry Andric /// Uses of this runtime function per function containing the use. 2535ffd83dbSDimitry Andric using UseVector = SmallVector<Use *, 16>; 2545ffd83dbSDimitry Andric 2555ffd83dbSDimitry Andric /// Clear UsesMap for runtime function. 2565ffd83dbSDimitry Andric void clearUsesMap() { UsesMap.clear(); } 2575ffd83dbSDimitry Andric 2585ffd83dbSDimitry Andric /// Boolean conversion that is true if the runtime function was found. 2595ffd83dbSDimitry Andric operator bool() const { return Declaration; } 2605ffd83dbSDimitry Andric 2615ffd83dbSDimitry Andric /// Return the vector of uses in function \p F. 2625ffd83dbSDimitry Andric UseVector &getOrCreateUseVector(Function *F) { 2635ffd83dbSDimitry Andric std::shared_ptr<UseVector> &UV = UsesMap[F]; 2645ffd83dbSDimitry Andric if (!UV) 2655ffd83dbSDimitry Andric UV = std::make_shared<UseVector>(); 2665ffd83dbSDimitry Andric return *UV; 2675ffd83dbSDimitry Andric } 2685ffd83dbSDimitry Andric 2695ffd83dbSDimitry Andric /// Return the vector of uses in function \p F or `nullptr` if there are 2705ffd83dbSDimitry Andric /// none. 2715ffd83dbSDimitry Andric const UseVector *getUseVector(Function &F) const { 2725ffd83dbSDimitry Andric auto I = UsesMap.find(&F); 2735ffd83dbSDimitry Andric if (I != UsesMap.end()) 2745ffd83dbSDimitry Andric return I->second.get(); 2755ffd83dbSDimitry Andric return nullptr; 2765ffd83dbSDimitry Andric } 2775ffd83dbSDimitry Andric 2785ffd83dbSDimitry Andric /// Return how many functions contain uses of this runtime function. 2795ffd83dbSDimitry Andric size_t getNumFunctionsWithUses() const { return UsesMap.size(); } 2805ffd83dbSDimitry Andric 2815ffd83dbSDimitry Andric /// Return the number of arguments (or the minimal number for variadic 2825ffd83dbSDimitry Andric /// functions). 2835ffd83dbSDimitry Andric size_t getNumArgs() const { return ArgumentTypes.size(); } 2845ffd83dbSDimitry Andric 2855ffd83dbSDimitry Andric /// Run the callback \p CB on each use and forget the use if the result is 2865ffd83dbSDimitry Andric /// true. The callback will be fed the function in which the use was 2875ffd83dbSDimitry Andric /// encountered as second argument. 2885ffd83dbSDimitry Andric void foreachUse(SmallVectorImpl<Function *> &SCC, 2895ffd83dbSDimitry Andric function_ref<bool(Use &, Function &)> CB) { 2905ffd83dbSDimitry Andric for (Function *F : SCC) 2915ffd83dbSDimitry Andric foreachUse(CB, F); 2925ffd83dbSDimitry Andric } 2935ffd83dbSDimitry Andric 2945ffd83dbSDimitry Andric /// Run the callback \p CB on each use within the function \p F and forget 2955ffd83dbSDimitry Andric /// the use if the result is true. 2965ffd83dbSDimitry Andric void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) { 2975ffd83dbSDimitry Andric SmallVector<unsigned, 8> ToBeDeleted; 2985ffd83dbSDimitry Andric ToBeDeleted.clear(); 2995ffd83dbSDimitry Andric 3005ffd83dbSDimitry Andric unsigned Idx = 0; 3015ffd83dbSDimitry Andric UseVector &UV = getOrCreateUseVector(F); 3025ffd83dbSDimitry Andric 3035ffd83dbSDimitry Andric for (Use *U : UV) { 3045ffd83dbSDimitry Andric if (CB(*U, *F)) 3055ffd83dbSDimitry Andric ToBeDeleted.push_back(Idx); 3065ffd83dbSDimitry Andric ++Idx; 3075ffd83dbSDimitry Andric } 3085ffd83dbSDimitry Andric 3095ffd83dbSDimitry Andric // Remove the to-be-deleted indices in reverse order as prior 3105ffd83dbSDimitry Andric // modifications will not modify the smaller indices. 3115ffd83dbSDimitry Andric while (!ToBeDeleted.empty()) { 3125ffd83dbSDimitry Andric unsigned Idx = ToBeDeleted.pop_back_val(); 3135ffd83dbSDimitry Andric UV[Idx] = UV.back(); 3145ffd83dbSDimitry Andric UV.pop_back(); 3155ffd83dbSDimitry Andric } 3165ffd83dbSDimitry Andric } 3175ffd83dbSDimitry Andric 3185ffd83dbSDimitry Andric private: 3195ffd83dbSDimitry Andric /// Map from functions to all uses of this runtime function contained in 3205ffd83dbSDimitry Andric /// them. 3215ffd83dbSDimitry Andric DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap; 322fe6060f1SDimitry Andric 323fe6060f1SDimitry Andric public: 324fe6060f1SDimitry Andric /// Iterators for the uses of this runtime function. 325fe6060f1SDimitry Andric decltype(UsesMap)::iterator begin() { return UsesMap.begin(); } 326fe6060f1SDimitry Andric decltype(UsesMap)::iterator end() { return UsesMap.end(); } 3275ffd83dbSDimitry Andric }; 3285ffd83dbSDimitry Andric 3295ffd83dbSDimitry Andric /// An OpenMP-IR-Builder instance 3305ffd83dbSDimitry Andric OpenMPIRBuilder OMPBuilder; 3315ffd83dbSDimitry Andric 3325ffd83dbSDimitry Andric /// Map from runtime function kind to the runtime function description. 3335ffd83dbSDimitry Andric EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction, 3345ffd83dbSDimitry Andric RuntimeFunction::OMPRTL___last> 3355ffd83dbSDimitry Andric RFIs; 3365ffd83dbSDimitry Andric 337fe6060f1SDimitry Andric /// Map from function declarations/definitions to their runtime enum type. 338fe6060f1SDimitry Andric DenseMap<Function *, RuntimeFunction> RuntimeFunctionIDMap; 339fe6060f1SDimitry Andric 3405ffd83dbSDimitry Andric /// Map from ICV kind to the ICV description. 3415ffd83dbSDimitry Andric EnumeratedArray<InternalControlVarInfo, InternalControlVar, 3425ffd83dbSDimitry Andric InternalControlVar::ICV___last> 3435ffd83dbSDimitry Andric ICVs; 3445ffd83dbSDimitry Andric 3455ffd83dbSDimitry Andric /// Helper to initialize all internal control variable information for those 3465ffd83dbSDimitry Andric /// defined in OMPKinds.def. 3475ffd83dbSDimitry Andric void initializeInternalControlVars() { 3485ffd83dbSDimitry Andric #define ICV_RT_SET(_Name, RTL) \ 3495ffd83dbSDimitry Andric { \ 3505ffd83dbSDimitry Andric auto &ICV = ICVs[_Name]; \ 3515ffd83dbSDimitry Andric ICV.Setter = RTL; \ 3525ffd83dbSDimitry Andric } 3535ffd83dbSDimitry Andric #define ICV_RT_GET(Name, RTL) \ 3545ffd83dbSDimitry Andric { \ 3555ffd83dbSDimitry Andric auto &ICV = ICVs[Name]; \ 3565ffd83dbSDimitry Andric ICV.Getter = RTL; \ 3575ffd83dbSDimitry Andric } 3585ffd83dbSDimitry Andric #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \ 3595ffd83dbSDimitry Andric { \ 3605ffd83dbSDimitry Andric auto &ICV = ICVs[Enum]; \ 3615ffd83dbSDimitry Andric ICV.Name = _Name; \ 3625ffd83dbSDimitry Andric ICV.Kind = Enum; \ 3635ffd83dbSDimitry Andric ICV.InitKind = Init; \ 3645ffd83dbSDimitry Andric ICV.EnvVarName = _EnvVarName; \ 3655ffd83dbSDimitry Andric switch (ICV.InitKind) { \ 3665ffd83dbSDimitry Andric case ICV_IMPLEMENTATION_DEFINED: \ 3675ffd83dbSDimitry Andric ICV.InitValue = nullptr; \ 3685ffd83dbSDimitry Andric break; \ 3695ffd83dbSDimitry Andric case ICV_ZERO: \ 3705ffd83dbSDimitry Andric ICV.InitValue = ConstantInt::get( \ 3715ffd83dbSDimitry Andric Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \ 3725ffd83dbSDimitry Andric break; \ 3735ffd83dbSDimitry Andric case ICV_FALSE: \ 3745ffd83dbSDimitry Andric ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \ 3755ffd83dbSDimitry Andric break; \ 3765ffd83dbSDimitry Andric case ICV_LAST: \ 3775ffd83dbSDimitry Andric break; \ 3785ffd83dbSDimitry Andric } \ 3795ffd83dbSDimitry Andric } 3805ffd83dbSDimitry Andric #include "llvm/Frontend/OpenMP/OMPKinds.def" 3815ffd83dbSDimitry Andric } 3825ffd83dbSDimitry Andric 3835ffd83dbSDimitry Andric /// Returns true if the function declaration \p F matches the runtime 3845ffd83dbSDimitry Andric /// function types, that is, return type \p RTFRetType, and argument types 3855ffd83dbSDimitry Andric /// \p RTFArgTypes. 3865ffd83dbSDimitry Andric static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, 3875ffd83dbSDimitry Andric SmallVector<Type *, 8> &RTFArgTypes) { 3885ffd83dbSDimitry Andric // TODO: We should output information to the user (under debug output 3895ffd83dbSDimitry Andric // and via remarks). 3905ffd83dbSDimitry Andric 3915ffd83dbSDimitry Andric if (!F) 3925ffd83dbSDimitry Andric return false; 3935ffd83dbSDimitry Andric if (F->getReturnType() != RTFRetType) 3945ffd83dbSDimitry Andric return false; 3955ffd83dbSDimitry Andric if (F->arg_size() != RTFArgTypes.size()) 3965ffd83dbSDimitry Andric return false; 3975ffd83dbSDimitry Andric 398349cc55cSDimitry Andric auto *RTFTyIt = RTFArgTypes.begin(); 3995ffd83dbSDimitry Andric for (Argument &Arg : F->args()) { 4005ffd83dbSDimitry Andric if (Arg.getType() != *RTFTyIt) 4015ffd83dbSDimitry Andric return false; 4025ffd83dbSDimitry Andric 4035ffd83dbSDimitry Andric ++RTFTyIt; 4045ffd83dbSDimitry Andric } 4055ffd83dbSDimitry Andric 4065ffd83dbSDimitry Andric return true; 4075ffd83dbSDimitry Andric } 4085ffd83dbSDimitry Andric 4095ffd83dbSDimitry Andric // Helper to collect all uses of the declaration in the UsesMap. 4105ffd83dbSDimitry Andric unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) { 4115ffd83dbSDimitry Andric unsigned NumUses = 0; 4125ffd83dbSDimitry Andric if (!RFI.Declaration) 4135ffd83dbSDimitry Andric return NumUses; 4145ffd83dbSDimitry Andric OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); 4155ffd83dbSDimitry Andric 4165ffd83dbSDimitry Andric if (CollectStats) { 4175ffd83dbSDimitry Andric NumOpenMPRuntimeFunctionsIdentified += 1; 4185ffd83dbSDimitry Andric NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); 4195ffd83dbSDimitry Andric } 4205ffd83dbSDimitry Andric 4215ffd83dbSDimitry Andric // TODO: We directly convert uses into proper calls and unknown uses. 4225ffd83dbSDimitry Andric for (Use &U : RFI.Declaration->uses()) { 4235ffd83dbSDimitry Andric if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) { 424*06c3fb27SDimitry Andric if (!CGSCC || CGSCC->empty() || CGSCC->contains(UserI->getFunction())) { 4255ffd83dbSDimitry Andric RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); 4265ffd83dbSDimitry Andric ++NumUses; 4275ffd83dbSDimitry Andric } 4285ffd83dbSDimitry Andric } else { 4295ffd83dbSDimitry Andric RFI.getOrCreateUseVector(nullptr).push_back(&U); 4305ffd83dbSDimitry Andric ++NumUses; 4315ffd83dbSDimitry Andric } 4325ffd83dbSDimitry Andric } 4335ffd83dbSDimitry Andric return NumUses; 4345ffd83dbSDimitry Andric } 4355ffd83dbSDimitry Andric 436e8d8bef9SDimitry Andric // Helper function to recollect uses of a runtime function. 437e8d8bef9SDimitry Andric void recollectUsesForFunction(RuntimeFunction RTF) { 438e8d8bef9SDimitry Andric auto &RFI = RFIs[RTF]; 4395ffd83dbSDimitry Andric RFI.clearUsesMap(); 4405ffd83dbSDimitry Andric collectUses(RFI, /*CollectStats*/ false); 4415ffd83dbSDimitry Andric } 442e8d8bef9SDimitry Andric 443e8d8bef9SDimitry Andric // Helper function to recollect uses of all runtime functions. 444e8d8bef9SDimitry Andric void recollectUses() { 445e8d8bef9SDimitry Andric for (int Idx = 0; Idx < RFIs.size(); ++Idx) 446e8d8bef9SDimitry Andric recollectUsesForFunction(static_cast<RuntimeFunction>(Idx)); 4475ffd83dbSDimitry Andric } 4485ffd83dbSDimitry Andric 44904eeddc0SDimitry Andric // Helper function to inherit the calling convention of the function callee. 45004eeddc0SDimitry Andric void setCallingConvention(FunctionCallee Callee, CallInst *CI) { 45104eeddc0SDimitry Andric if (Function *Fn = dyn_cast<Function>(Callee.getCallee())) 45204eeddc0SDimitry Andric CI->setCallingConv(Fn->getCallingConv()); 45304eeddc0SDimitry Andric } 45404eeddc0SDimitry Andric 4551ac55f4cSDimitry Andric // Helper function to determine if it's legal to create a call to the runtime 4561ac55f4cSDimitry Andric // functions. 4571ac55f4cSDimitry Andric bool runtimeFnsAvailable(ArrayRef<RuntimeFunction> Fns) { 4581ac55f4cSDimitry Andric // We can always emit calls if we haven't yet linked in the runtime. 4591ac55f4cSDimitry Andric if (!OpenMPPostLink) 4601ac55f4cSDimitry Andric return true; 4611ac55f4cSDimitry Andric 4621ac55f4cSDimitry Andric // Once the runtime has been already been linked in we cannot emit calls to 4631ac55f4cSDimitry Andric // any undefined functions. 4641ac55f4cSDimitry Andric for (RuntimeFunction Fn : Fns) { 4651ac55f4cSDimitry Andric RuntimeFunctionInfo &RFI = RFIs[Fn]; 4661ac55f4cSDimitry Andric 4671ac55f4cSDimitry Andric if (RFI.Declaration && RFI.Declaration->isDeclaration()) 4681ac55f4cSDimitry Andric return false; 4691ac55f4cSDimitry Andric } 4701ac55f4cSDimitry Andric return true; 4711ac55f4cSDimitry Andric } 4721ac55f4cSDimitry Andric 4735ffd83dbSDimitry Andric /// Helper to initialize all runtime function information for those defined 4745ffd83dbSDimitry Andric /// in OpenMPKinds.def. 475bdd1243dSDimitry Andric void initializeRuntimeFunctions(Module &M) { 4765ffd83dbSDimitry Andric 4775ffd83dbSDimitry Andric // Helper macros for handling __VA_ARGS__ in OMP_RTL 4785ffd83dbSDimitry Andric #define OMP_TYPE(VarName, ...) \ 4795ffd83dbSDimitry Andric Type *VarName = OMPBuilder.VarName; \ 4805ffd83dbSDimitry Andric (void)VarName; 4815ffd83dbSDimitry Andric 4825ffd83dbSDimitry Andric #define OMP_ARRAY_TYPE(VarName, ...) \ 4835ffd83dbSDimitry Andric ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \ 4845ffd83dbSDimitry Andric (void)VarName##Ty; \ 4855ffd83dbSDimitry Andric PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \ 4865ffd83dbSDimitry Andric (void)VarName##PtrTy; 4875ffd83dbSDimitry Andric 4885ffd83dbSDimitry Andric #define OMP_FUNCTION_TYPE(VarName, ...) \ 4895ffd83dbSDimitry Andric FunctionType *VarName = OMPBuilder.VarName; \ 4905ffd83dbSDimitry Andric (void)VarName; \ 4915ffd83dbSDimitry Andric PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 4925ffd83dbSDimitry Andric (void)VarName##Ptr; 4935ffd83dbSDimitry Andric 4945ffd83dbSDimitry Andric #define OMP_STRUCT_TYPE(VarName, ...) \ 4955ffd83dbSDimitry Andric StructType *VarName = OMPBuilder.VarName; \ 4965ffd83dbSDimitry Andric (void)VarName; \ 4975ffd83dbSDimitry Andric PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 4985ffd83dbSDimitry Andric (void)VarName##Ptr; 4995ffd83dbSDimitry Andric 5005ffd83dbSDimitry Andric #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ 5015ffd83dbSDimitry Andric { \ 5025ffd83dbSDimitry Andric SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \ 5035ffd83dbSDimitry Andric Function *F = M.getFunction(_Name); \ 504fe6060f1SDimitry Andric RTLFunctions.insert(F); \ 5055ffd83dbSDimitry Andric if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ 506fe6060f1SDimitry Andric RuntimeFunctionIDMap[F] = _Enum; \ 5075ffd83dbSDimitry Andric auto &RFI = RFIs[_Enum]; \ 5085ffd83dbSDimitry Andric RFI.Kind = _Enum; \ 5095ffd83dbSDimitry Andric RFI.Name = _Name; \ 5105ffd83dbSDimitry Andric RFI.IsVarArg = _IsVarArg; \ 5115ffd83dbSDimitry Andric RFI.ReturnType = OMPBuilder._ReturnType; \ 5125ffd83dbSDimitry Andric RFI.ArgumentTypes = std::move(ArgsTypes); \ 5135ffd83dbSDimitry Andric RFI.Declaration = F; \ 5145ffd83dbSDimitry Andric unsigned NumUses = collectUses(RFI); \ 5155ffd83dbSDimitry Andric (void)NumUses; \ 5165ffd83dbSDimitry Andric LLVM_DEBUG({ \ 5175ffd83dbSDimitry Andric dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ 5185ffd83dbSDimitry Andric << " found\n"; \ 5195ffd83dbSDimitry Andric if (RFI.Declaration) \ 5205ffd83dbSDimitry Andric dbgs() << TAG << "-> got " << NumUses << " uses in " \ 5215ffd83dbSDimitry Andric << RFI.getNumFunctionsWithUses() \ 5225ffd83dbSDimitry Andric << " different functions.\n"; \ 5235ffd83dbSDimitry Andric }); \ 5245ffd83dbSDimitry Andric } \ 5255ffd83dbSDimitry Andric } 5265ffd83dbSDimitry Andric #include "llvm/Frontend/OpenMP/OMPKinds.def" 5275ffd83dbSDimitry Andric 528bdd1243dSDimitry Andric // Remove the `noinline` attribute from `__kmpc`, `ompx::` and `omp_` 52961cfbce3SDimitry Andric // functions, except if `optnone` is present. 53061cfbce3SDimitry Andric if (isOpenMPDevice(M)) { 53161cfbce3SDimitry Andric for (Function &F : M) { 532bdd1243dSDimitry Andric for (StringRef Prefix : {"__kmpc", "_ZN4ompx", "omp_"}) 53361cfbce3SDimitry Andric if (F.hasFnAttribute(Attribute::NoInline) && 53461cfbce3SDimitry Andric F.getName().startswith(Prefix) && 53561cfbce3SDimitry Andric !F.hasFnAttribute(Attribute::OptimizeNone)) 53661cfbce3SDimitry Andric F.removeFnAttr(Attribute::NoInline); 53761cfbce3SDimitry Andric } 53861cfbce3SDimitry Andric } 53961cfbce3SDimitry Andric 5405ffd83dbSDimitry Andric // TODO: We should attach the attributes defined in OMPKinds.def. 5415ffd83dbSDimitry Andric } 5425ffd83dbSDimitry Andric 543fe6060f1SDimitry Andric /// Collection of known OpenMP runtime functions.. 544fe6060f1SDimitry Andric DenseSet<const Function *> RTLFunctions; 5451ac55f4cSDimitry Andric 5461ac55f4cSDimitry Andric /// Indicates if we have already linked in the OpenMP device library. 5471ac55f4cSDimitry Andric bool OpenMPPostLink = false; 548fe6060f1SDimitry Andric }; 549fe6060f1SDimitry Andric 550fe6060f1SDimitry Andric template <typename Ty, bool InsertInvalidates = true> 551fe6060f1SDimitry Andric struct BooleanStateWithSetVector : public BooleanState { 552fe6060f1SDimitry Andric bool contains(const Ty &Elem) const { return Set.contains(Elem); } 553fe6060f1SDimitry Andric bool insert(const Ty &Elem) { 554fe6060f1SDimitry Andric if (InsertInvalidates) 555fe6060f1SDimitry Andric BooleanState::indicatePessimisticFixpoint(); 556fe6060f1SDimitry Andric return Set.insert(Elem); 557fe6060f1SDimitry Andric } 558fe6060f1SDimitry Andric 559fe6060f1SDimitry Andric const Ty &operator[](int Idx) const { return Set[Idx]; } 560fe6060f1SDimitry Andric bool operator==(const BooleanStateWithSetVector &RHS) const { 561fe6060f1SDimitry Andric return BooleanState::operator==(RHS) && Set == RHS.Set; 562fe6060f1SDimitry Andric } 563fe6060f1SDimitry Andric bool operator!=(const BooleanStateWithSetVector &RHS) const { 564fe6060f1SDimitry Andric return !(*this == RHS); 565fe6060f1SDimitry Andric } 566fe6060f1SDimitry Andric 567fe6060f1SDimitry Andric bool empty() const { return Set.empty(); } 568fe6060f1SDimitry Andric size_t size() const { return Set.size(); } 569fe6060f1SDimitry Andric 570fe6060f1SDimitry Andric /// "Clamp" this state with \p RHS. 571fe6060f1SDimitry Andric BooleanStateWithSetVector &operator^=(const BooleanStateWithSetVector &RHS) { 572fe6060f1SDimitry Andric BooleanState::operator^=(RHS); 573fe6060f1SDimitry Andric Set.insert(RHS.Set.begin(), RHS.Set.end()); 574fe6060f1SDimitry Andric return *this; 575fe6060f1SDimitry Andric } 576fe6060f1SDimitry Andric 577fe6060f1SDimitry Andric private: 578fe6060f1SDimitry Andric /// A set to keep track of elements. 579fe6060f1SDimitry Andric SetVector<Ty> Set; 580fe6060f1SDimitry Andric 581fe6060f1SDimitry Andric public: 582fe6060f1SDimitry Andric typename decltype(Set)::iterator begin() { return Set.begin(); } 583fe6060f1SDimitry Andric typename decltype(Set)::iterator end() { return Set.end(); } 584fe6060f1SDimitry Andric typename decltype(Set)::const_iterator begin() const { return Set.begin(); } 585fe6060f1SDimitry Andric typename decltype(Set)::const_iterator end() const { return Set.end(); } 586fe6060f1SDimitry Andric }; 587fe6060f1SDimitry Andric 588fe6060f1SDimitry Andric template <typename Ty, bool InsertInvalidates = true> 589fe6060f1SDimitry Andric using BooleanStateWithPtrSetVector = 590fe6060f1SDimitry Andric BooleanStateWithSetVector<Ty *, InsertInvalidates>; 591fe6060f1SDimitry Andric 592fe6060f1SDimitry Andric struct KernelInfoState : AbstractState { 593fe6060f1SDimitry Andric /// Flag to track if we reached a fixpoint. 594fe6060f1SDimitry Andric bool IsAtFixpoint = false; 595fe6060f1SDimitry Andric 596fe6060f1SDimitry Andric /// The parallel regions (identified by the outlined parallel functions) that 597fe6060f1SDimitry Andric /// can be reached from the associated function. 598fe6060f1SDimitry Andric BooleanStateWithPtrSetVector<Function, /* InsertInvalidates */ false> 599fe6060f1SDimitry Andric ReachedKnownParallelRegions; 600fe6060f1SDimitry Andric 601fe6060f1SDimitry Andric /// State to track what parallel region we might reach. 602fe6060f1SDimitry Andric BooleanStateWithPtrSetVector<CallBase> ReachedUnknownParallelRegions; 603fe6060f1SDimitry Andric 604fe6060f1SDimitry Andric /// State to track if we are in SPMD-mode, assumed or know, and why we decided 605fe6060f1SDimitry Andric /// we cannot be. If it is assumed, then RequiresFullRuntime should also be 606fe6060f1SDimitry Andric /// false. 607349cc55cSDimitry Andric BooleanStateWithPtrSetVector<Instruction, false> SPMDCompatibilityTracker; 608fe6060f1SDimitry Andric 609fe6060f1SDimitry Andric /// The __kmpc_target_init call in this kernel, if any. If we find more than 610fe6060f1SDimitry Andric /// one we abort as the kernel is malformed. 611fe6060f1SDimitry Andric CallBase *KernelInitCB = nullptr; 612fe6060f1SDimitry Andric 613fe6060f1SDimitry Andric /// The __kmpc_target_deinit call in this kernel, if any. If we find more than 614fe6060f1SDimitry Andric /// one we abort as the kernel is malformed. 615fe6060f1SDimitry Andric CallBase *KernelDeinitCB = nullptr; 616fe6060f1SDimitry Andric 617fe6060f1SDimitry Andric /// Flag to indicate if the associated function is a kernel entry. 618fe6060f1SDimitry Andric bool IsKernelEntry = false; 619fe6060f1SDimitry Andric 620fe6060f1SDimitry Andric /// State to track what kernel entries can reach the associated function. 621fe6060f1SDimitry Andric BooleanStateWithPtrSetVector<Function, false> ReachingKernelEntries; 622fe6060f1SDimitry Andric 623fe6060f1SDimitry Andric /// State to indicate if we can track parallel level of the associated 624fe6060f1SDimitry Andric /// function. We will give up tracking if we encounter unknown caller or the 625fe6060f1SDimitry Andric /// caller is __kmpc_parallel_51. 626fe6060f1SDimitry Andric BooleanStateWithSetVector<uint8_t> ParallelLevels; 627fe6060f1SDimitry Andric 628bdd1243dSDimitry Andric /// Flag that indicates if the kernel has nested Parallelism 629bdd1243dSDimitry Andric bool NestedParallelism = false; 630bdd1243dSDimitry Andric 631fe6060f1SDimitry Andric /// Abstract State interface 632fe6060f1SDimitry Andric ///{ 633fe6060f1SDimitry Andric 63481ad6265SDimitry Andric KernelInfoState() = default; 635fe6060f1SDimitry Andric KernelInfoState(bool BestState) { 636fe6060f1SDimitry Andric if (!BestState) 637fe6060f1SDimitry Andric indicatePessimisticFixpoint(); 638fe6060f1SDimitry Andric } 639fe6060f1SDimitry Andric 640fe6060f1SDimitry Andric /// See AbstractState::isValidState(...) 641fe6060f1SDimitry Andric bool isValidState() const override { return true; } 642fe6060f1SDimitry Andric 643fe6060f1SDimitry Andric /// See AbstractState::isAtFixpoint(...) 644fe6060f1SDimitry Andric bool isAtFixpoint() const override { return IsAtFixpoint; } 645fe6060f1SDimitry Andric 646fe6060f1SDimitry Andric /// See AbstractState::indicatePessimisticFixpoint(...) 647fe6060f1SDimitry Andric ChangeStatus indicatePessimisticFixpoint() override { 648fe6060f1SDimitry Andric IsAtFixpoint = true; 649bdd1243dSDimitry Andric ParallelLevels.indicatePessimisticFixpoint(); 650349cc55cSDimitry Andric ReachingKernelEntries.indicatePessimisticFixpoint(); 651fe6060f1SDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 652349cc55cSDimitry Andric ReachedKnownParallelRegions.indicatePessimisticFixpoint(); 653fe6060f1SDimitry Andric ReachedUnknownParallelRegions.indicatePessimisticFixpoint(); 654fe6060f1SDimitry Andric return ChangeStatus::CHANGED; 655fe6060f1SDimitry Andric } 656fe6060f1SDimitry Andric 657fe6060f1SDimitry Andric /// See AbstractState::indicateOptimisticFixpoint(...) 658fe6060f1SDimitry Andric ChangeStatus indicateOptimisticFixpoint() override { 659fe6060f1SDimitry Andric IsAtFixpoint = true; 660bdd1243dSDimitry Andric ParallelLevels.indicateOptimisticFixpoint(); 661349cc55cSDimitry Andric ReachingKernelEntries.indicateOptimisticFixpoint(); 662349cc55cSDimitry Andric SPMDCompatibilityTracker.indicateOptimisticFixpoint(); 663349cc55cSDimitry Andric ReachedKnownParallelRegions.indicateOptimisticFixpoint(); 664349cc55cSDimitry Andric ReachedUnknownParallelRegions.indicateOptimisticFixpoint(); 665fe6060f1SDimitry Andric return ChangeStatus::UNCHANGED; 666fe6060f1SDimitry Andric } 667fe6060f1SDimitry Andric 668fe6060f1SDimitry Andric /// Return the assumed state 669fe6060f1SDimitry Andric KernelInfoState &getAssumed() { return *this; } 670fe6060f1SDimitry Andric const KernelInfoState &getAssumed() const { return *this; } 671fe6060f1SDimitry Andric 672fe6060f1SDimitry Andric bool operator==(const KernelInfoState &RHS) const { 673fe6060f1SDimitry Andric if (SPMDCompatibilityTracker != RHS.SPMDCompatibilityTracker) 674fe6060f1SDimitry Andric return false; 675fe6060f1SDimitry Andric if (ReachedKnownParallelRegions != RHS.ReachedKnownParallelRegions) 676fe6060f1SDimitry Andric return false; 677fe6060f1SDimitry Andric if (ReachedUnknownParallelRegions != RHS.ReachedUnknownParallelRegions) 678fe6060f1SDimitry Andric return false; 679fe6060f1SDimitry Andric if (ReachingKernelEntries != RHS.ReachingKernelEntries) 680fe6060f1SDimitry Andric return false; 681bdd1243dSDimitry Andric if (ParallelLevels != RHS.ParallelLevels) 682bdd1243dSDimitry Andric return false; 683fe6060f1SDimitry Andric return true; 684fe6060f1SDimitry Andric } 685fe6060f1SDimitry Andric 686349cc55cSDimitry Andric /// Returns true if this kernel contains any OpenMP parallel regions. 687349cc55cSDimitry Andric bool mayContainParallelRegion() { 688349cc55cSDimitry Andric return !ReachedKnownParallelRegions.empty() || 689349cc55cSDimitry Andric !ReachedUnknownParallelRegions.empty(); 690349cc55cSDimitry Andric } 691349cc55cSDimitry Andric 692fe6060f1SDimitry Andric /// Return empty set as the best state of potential values. 693fe6060f1SDimitry Andric static KernelInfoState getBestState() { return KernelInfoState(true); } 694fe6060f1SDimitry Andric 695fe6060f1SDimitry Andric static KernelInfoState getBestState(KernelInfoState &KIS) { 696fe6060f1SDimitry Andric return getBestState(); 697fe6060f1SDimitry Andric } 698fe6060f1SDimitry Andric 699fe6060f1SDimitry Andric /// Return full set as the worst state of potential values. 700fe6060f1SDimitry Andric static KernelInfoState getWorstState() { return KernelInfoState(false); } 701fe6060f1SDimitry Andric 702fe6060f1SDimitry Andric /// "Clamp" this state with \p KIS. 703fe6060f1SDimitry Andric KernelInfoState operator^=(const KernelInfoState &KIS) { 704fe6060f1SDimitry Andric // Do not merge two different _init and _deinit call sites. 705fe6060f1SDimitry Andric if (KIS.KernelInitCB) { 706fe6060f1SDimitry Andric if (KernelInitCB && KernelInitCB != KIS.KernelInitCB) 707349cc55cSDimitry Andric llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt " 708349cc55cSDimitry Andric "assumptions."); 709fe6060f1SDimitry Andric KernelInitCB = KIS.KernelInitCB; 710fe6060f1SDimitry Andric } 711fe6060f1SDimitry Andric if (KIS.KernelDeinitCB) { 712fe6060f1SDimitry Andric if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB) 713349cc55cSDimitry Andric llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt " 714349cc55cSDimitry Andric "assumptions."); 715fe6060f1SDimitry Andric KernelDeinitCB = KIS.KernelDeinitCB; 716fe6060f1SDimitry Andric } 717fe6060f1SDimitry Andric SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker; 718fe6060f1SDimitry Andric ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions; 719fe6060f1SDimitry Andric ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions; 720bdd1243dSDimitry Andric NestedParallelism |= KIS.NestedParallelism; 721fe6060f1SDimitry Andric return *this; 722fe6060f1SDimitry Andric } 723fe6060f1SDimitry Andric 724fe6060f1SDimitry Andric KernelInfoState operator&=(const KernelInfoState &KIS) { 725fe6060f1SDimitry Andric return (*this ^= KIS); 726fe6060f1SDimitry Andric } 727fe6060f1SDimitry Andric 728fe6060f1SDimitry Andric ///} 7295ffd83dbSDimitry Andric }; 7305ffd83dbSDimitry Andric 731e8d8bef9SDimitry Andric /// Used to map the values physically (in the IR) stored in an offload 732e8d8bef9SDimitry Andric /// array, to a vector in memory. 733e8d8bef9SDimitry Andric struct OffloadArray { 734e8d8bef9SDimitry Andric /// Physical array (in the IR). 735e8d8bef9SDimitry Andric AllocaInst *Array = nullptr; 736e8d8bef9SDimitry Andric /// Mapped values. 737e8d8bef9SDimitry Andric SmallVector<Value *, 8> StoredValues; 738e8d8bef9SDimitry Andric /// Last stores made in the offload array. 739e8d8bef9SDimitry Andric SmallVector<StoreInst *, 8> LastAccesses; 740e8d8bef9SDimitry Andric 741e8d8bef9SDimitry Andric OffloadArray() = default; 742e8d8bef9SDimitry Andric 743e8d8bef9SDimitry Andric /// Initializes the OffloadArray with the values stored in \p Array before 744e8d8bef9SDimitry Andric /// instruction \p Before is reached. Returns false if the initialization 745e8d8bef9SDimitry Andric /// fails. 746e8d8bef9SDimitry Andric /// This MUST be used immediately after the construction of the object. 747e8d8bef9SDimitry Andric bool initialize(AllocaInst &Array, Instruction &Before) { 748e8d8bef9SDimitry Andric if (!Array.getAllocatedType()->isArrayTy()) 749e8d8bef9SDimitry Andric return false; 750e8d8bef9SDimitry Andric 751e8d8bef9SDimitry Andric if (!getValues(Array, Before)) 752e8d8bef9SDimitry Andric return false; 753e8d8bef9SDimitry Andric 754e8d8bef9SDimitry Andric this->Array = &Array; 755e8d8bef9SDimitry Andric return true; 756e8d8bef9SDimitry Andric } 757e8d8bef9SDimitry Andric 758e8d8bef9SDimitry Andric static const unsigned DeviceIDArgNum = 1; 759e8d8bef9SDimitry Andric static const unsigned BasePtrsArgNum = 3; 760e8d8bef9SDimitry Andric static const unsigned PtrsArgNum = 4; 761e8d8bef9SDimitry Andric static const unsigned SizesArgNum = 5; 762e8d8bef9SDimitry Andric 763e8d8bef9SDimitry Andric private: 764e8d8bef9SDimitry Andric /// Traverses the BasicBlock where \p Array is, collecting the stores made to 765e8d8bef9SDimitry Andric /// \p Array, leaving StoredValues with the values stored before the 766e8d8bef9SDimitry Andric /// instruction \p Before is reached. 767e8d8bef9SDimitry Andric bool getValues(AllocaInst &Array, Instruction &Before) { 768e8d8bef9SDimitry Andric // Initialize container. 769e8d8bef9SDimitry Andric const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements(); 770e8d8bef9SDimitry Andric StoredValues.assign(NumValues, nullptr); 771e8d8bef9SDimitry Andric LastAccesses.assign(NumValues, nullptr); 772e8d8bef9SDimitry Andric 773e8d8bef9SDimitry Andric // TODO: This assumes the instruction \p Before is in the same 774e8d8bef9SDimitry Andric // BasicBlock as Array. Make it general, for any control flow graph. 775e8d8bef9SDimitry Andric BasicBlock *BB = Array.getParent(); 776e8d8bef9SDimitry Andric if (BB != Before.getParent()) 777e8d8bef9SDimitry Andric return false; 778e8d8bef9SDimitry Andric 779e8d8bef9SDimitry Andric const DataLayout &DL = Array.getModule()->getDataLayout(); 780e8d8bef9SDimitry Andric const unsigned int PointerSize = DL.getPointerSize(); 781e8d8bef9SDimitry Andric 782e8d8bef9SDimitry Andric for (Instruction &I : *BB) { 783e8d8bef9SDimitry Andric if (&I == &Before) 784e8d8bef9SDimitry Andric break; 785e8d8bef9SDimitry Andric 786e8d8bef9SDimitry Andric if (!isa<StoreInst>(&I)) 787e8d8bef9SDimitry Andric continue; 788e8d8bef9SDimitry Andric 789e8d8bef9SDimitry Andric auto *S = cast<StoreInst>(&I); 790e8d8bef9SDimitry Andric int64_t Offset = -1; 791e8d8bef9SDimitry Andric auto *Dst = 792e8d8bef9SDimitry Andric GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL); 793e8d8bef9SDimitry Andric if (Dst == &Array) { 794e8d8bef9SDimitry Andric int64_t Idx = Offset / PointerSize; 795e8d8bef9SDimitry Andric StoredValues[Idx] = getUnderlyingObject(S->getValueOperand()); 796e8d8bef9SDimitry Andric LastAccesses[Idx] = S; 797e8d8bef9SDimitry Andric } 798e8d8bef9SDimitry Andric } 799e8d8bef9SDimitry Andric 800e8d8bef9SDimitry Andric return isFilled(); 801e8d8bef9SDimitry Andric } 802e8d8bef9SDimitry Andric 803e8d8bef9SDimitry Andric /// Returns true if all values in StoredValues and 804e8d8bef9SDimitry Andric /// LastAccesses are not nullptrs. 805e8d8bef9SDimitry Andric bool isFilled() { 806e8d8bef9SDimitry Andric const unsigned NumValues = StoredValues.size(); 807e8d8bef9SDimitry Andric for (unsigned I = 0; I < NumValues; ++I) { 808e8d8bef9SDimitry Andric if (!StoredValues[I] || !LastAccesses[I]) 809e8d8bef9SDimitry Andric return false; 810e8d8bef9SDimitry Andric } 811e8d8bef9SDimitry Andric 812e8d8bef9SDimitry Andric return true; 813e8d8bef9SDimitry Andric } 814e8d8bef9SDimitry Andric }; 815e8d8bef9SDimitry Andric 8165ffd83dbSDimitry Andric struct OpenMPOpt { 8175ffd83dbSDimitry Andric 8185ffd83dbSDimitry Andric using OptimizationRemarkGetter = 8195ffd83dbSDimitry Andric function_ref<OptimizationRemarkEmitter &(Function *)>; 8205ffd83dbSDimitry Andric 8215ffd83dbSDimitry Andric OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater, 8225ffd83dbSDimitry Andric OptimizationRemarkGetter OREGetter, 8235ffd83dbSDimitry Andric OMPInformationCache &OMPInfoCache, Attributor &A) 8245ffd83dbSDimitry Andric : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater), 8255ffd83dbSDimitry Andric OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {} 8265ffd83dbSDimitry Andric 827e8d8bef9SDimitry Andric /// Check if any remarks are enabled for openmp-opt 828e8d8bef9SDimitry Andric bool remarksEnabled() { 829e8d8bef9SDimitry Andric auto &Ctx = M.getContext(); 830e8d8bef9SDimitry Andric return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE); 831e8d8bef9SDimitry Andric } 832e8d8bef9SDimitry Andric 833*06c3fb27SDimitry Andric /// Run all OpenMP optimizations on the underlying SCC. 834fe6060f1SDimitry Andric bool run(bool IsModulePass) { 8355ffd83dbSDimitry Andric if (SCC.empty()) 8365ffd83dbSDimitry Andric return false; 8375ffd83dbSDimitry Andric 8385ffd83dbSDimitry Andric bool Changed = false; 8395ffd83dbSDimitry Andric 8405ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() 841*06c3fb27SDimitry Andric << " functions\n"); 8425ffd83dbSDimitry Andric 843fe6060f1SDimitry Andric if (IsModulePass) { 844fe6060f1SDimitry Andric Changed |= runAttributor(IsModulePass); 845fe6060f1SDimitry Andric 846fe6060f1SDimitry Andric // Recollect uses, in case Attributor deleted any. 847fe6060f1SDimitry Andric OMPInfoCache.recollectUses(); 848fe6060f1SDimitry Andric 849fe6060f1SDimitry Andric // TODO: This should be folded into buildCustomStateMachine. 850fe6060f1SDimitry Andric Changed |= rewriteDeviceCodeStateMachine(); 851fe6060f1SDimitry Andric 852fe6060f1SDimitry Andric if (remarksEnabled()) 853fe6060f1SDimitry Andric analysisGlobalization(); 854fe6060f1SDimitry Andric } else { 8555ffd83dbSDimitry Andric if (PrintICVValues) 8565ffd83dbSDimitry Andric printICVs(); 8575ffd83dbSDimitry Andric if (PrintOpenMPKernels) 8585ffd83dbSDimitry Andric printKernels(); 8595ffd83dbSDimitry Andric 860fe6060f1SDimitry Andric Changed |= runAttributor(IsModulePass); 8615ffd83dbSDimitry Andric 8625ffd83dbSDimitry Andric // Recollect uses, in case Attributor deleted any. 8635ffd83dbSDimitry Andric OMPInfoCache.recollectUses(); 8645ffd83dbSDimitry Andric 8655ffd83dbSDimitry Andric Changed |= deleteParallelRegions(); 866fe6060f1SDimitry Andric 867e8d8bef9SDimitry Andric if (HideMemoryTransferLatency) 868e8d8bef9SDimitry Andric Changed |= hideMemTransfersLatency(); 869e8d8bef9SDimitry Andric Changed |= deduplicateRuntimeCalls(); 870e8d8bef9SDimitry Andric if (EnableParallelRegionMerging) { 871e8d8bef9SDimitry Andric if (mergeParallelRegions()) { 872e8d8bef9SDimitry Andric deduplicateRuntimeCalls(); 873e8d8bef9SDimitry Andric Changed = true; 874e8d8bef9SDimitry Andric } 875e8d8bef9SDimitry Andric } 876fe6060f1SDimitry Andric } 8775ffd83dbSDimitry Andric 8785ffd83dbSDimitry Andric return Changed; 8795ffd83dbSDimitry Andric } 8805ffd83dbSDimitry Andric 8815ffd83dbSDimitry Andric /// Print initial ICV values for testing. 8825ffd83dbSDimitry Andric /// FIXME: This should be done from the Attributor once it is added. 8835ffd83dbSDimitry Andric void printICVs() const { 884e8d8bef9SDimitry Andric InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel, 885e8d8bef9SDimitry Andric ICV_proc_bind}; 8865ffd83dbSDimitry Andric 887bdd1243dSDimitry Andric for (Function *F : SCC) { 8885ffd83dbSDimitry Andric for (auto ICV : ICVs) { 8895ffd83dbSDimitry Andric auto ICVInfo = OMPInfoCache.ICVs[ICV]; 890fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) { 891fe6060f1SDimitry Andric return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) 8925ffd83dbSDimitry Andric << " Value: " 8935ffd83dbSDimitry Andric << (ICVInfo.InitValue 894fe6060f1SDimitry Andric ? toString(ICVInfo.InitValue->getValue(), 10, true) 8955ffd83dbSDimitry Andric : "IMPLEMENTATION_DEFINED"); 8965ffd83dbSDimitry Andric }; 8975ffd83dbSDimitry Andric 898fe6060f1SDimitry Andric emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark); 8995ffd83dbSDimitry Andric } 9005ffd83dbSDimitry Andric } 9015ffd83dbSDimitry Andric } 9025ffd83dbSDimitry Andric 9035ffd83dbSDimitry Andric /// Print OpenMP GPU kernels for testing. 9045ffd83dbSDimitry Andric void printKernels() const { 9055ffd83dbSDimitry Andric for (Function *F : SCC) { 906*06c3fb27SDimitry Andric if (!omp::isKernel(*F)) 9075ffd83dbSDimitry Andric continue; 9085ffd83dbSDimitry Andric 909fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) { 910fe6060f1SDimitry Andric return ORA << "OpenMP GPU kernel " 9115ffd83dbSDimitry Andric << ore::NV("OpenMPGPUKernel", F->getName()) << "\n"; 9125ffd83dbSDimitry Andric }; 9135ffd83dbSDimitry Andric 914fe6060f1SDimitry Andric emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark); 9155ffd83dbSDimitry Andric } 9165ffd83dbSDimitry Andric } 9175ffd83dbSDimitry Andric 9185ffd83dbSDimitry Andric /// Return the call if \p U is a callee use in a regular call. If \p RFI is 9195ffd83dbSDimitry Andric /// given it has to be the callee or a nullptr is returned. 9205ffd83dbSDimitry Andric static CallInst *getCallIfRegularCall( 9215ffd83dbSDimitry Andric Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { 9225ffd83dbSDimitry Andric CallInst *CI = dyn_cast<CallInst>(U.getUser()); 9235ffd83dbSDimitry Andric if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && 924fe6060f1SDimitry Andric (!RFI || 925fe6060f1SDimitry Andric (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration))) 9265ffd83dbSDimitry Andric return CI; 9275ffd83dbSDimitry Andric return nullptr; 9285ffd83dbSDimitry Andric } 9295ffd83dbSDimitry Andric 9305ffd83dbSDimitry Andric /// Return the call if \p V is a regular call. If \p RFI is given it has to be 9315ffd83dbSDimitry Andric /// the callee or a nullptr is returned. 9325ffd83dbSDimitry Andric static CallInst *getCallIfRegularCall( 9335ffd83dbSDimitry Andric Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { 9345ffd83dbSDimitry Andric CallInst *CI = dyn_cast<CallInst>(&V); 9355ffd83dbSDimitry Andric if (CI && !CI->hasOperandBundles() && 936fe6060f1SDimitry Andric (!RFI || 937fe6060f1SDimitry Andric (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration))) 9385ffd83dbSDimitry Andric return CI; 9395ffd83dbSDimitry Andric return nullptr; 9405ffd83dbSDimitry Andric } 9415ffd83dbSDimitry Andric 9425ffd83dbSDimitry Andric private: 943e8d8bef9SDimitry Andric /// Merge parallel regions when it is safe. 944e8d8bef9SDimitry Andric bool mergeParallelRegions() { 945e8d8bef9SDimitry Andric const unsigned CallbackCalleeOperand = 2; 946e8d8bef9SDimitry Andric const unsigned CallbackFirstArgOperand = 3; 947e8d8bef9SDimitry Andric using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 948e8d8bef9SDimitry Andric 949e8d8bef9SDimitry Andric // Check if there are any __kmpc_fork_call calls to merge. 950e8d8bef9SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &RFI = 951e8d8bef9SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; 952e8d8bef9SDimitry Andric 953e8d8bef9SDimitry Andric if (!RFI.Declaration) 954e8d8bef9SDimitry Andric return false; 955e8d8bef9SDimitry Andric 956e8d8bef9SDimitry Andric // Unmergable calls that prevent merging a parallel region. 957e8d8bef9SDimitry Andric OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = { 958e8d8bef9SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind], 959e8d8bef9SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads], 960e8d8bef9SDimitry Andric }; 961e8d8bef9SDimitry Andric 962e8d8bef9SDimitry Andric bool Changed = false; 963e8d8bef9SDimitry Andric LoopInfo *LI = nullptr; 964e8d8bef9SDimitry Andric DominatorTree *DT = nullptr; 965e8d8bef9SDimitry Andric 966e8d8bef9SDimitry Andric SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap; 967e8d8bef9SDimitry Andric 968e8d8bef9SDimitry Andric BasicBlock *StartBB = nullptr, *EndBB = nullptr; 96981ad6265SDimitry Andric auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 970e8d8bef9SDimitry Andric BasicBlock *CGStartBB = CodeGenIP.getBlock(); 971e8d8bef9SDimitry Andric BasicBlock *CGEndBB = 972e8d8bef9SDimitry Andric SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); 973e8d8bef9SDimitry Andric assert(StartBB != nullptr && "StartBB should not be null"); 974e8d8bef9SDimitry Andric CGStartBB->getTerminator()->setSuccessor(0, StartBB); 975e8d8bef9SDimitry Andric assert(EndBB != nullptr && "EndBB should not be null"); 976e8d8bef9SDimitry Andric EndBB->getTerminator()->setSuccessor(0, CGEndBB); 977e8d8bef9SDimitry Andric }; 978e8d8bef9SDimitry Andric 979e8d8bef9SDimitry Andric auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &, 980e8d8bef9SDimitry Andric Value &Inner, Value *&ReplacementValue) -> InsertPointTy { 981e8d8bef9SDimitry Andric ReplacementValue = &Inner; 982e8d8bef9SDimitry Andric return CodeGenIP; 983e8d8bef9SDimitry Andric }; 984e8d8bef9SDimitry Andric 985e8d8bef9SDimitry Andric auto FiniCB = [&](InsertPointTy CodeGenIP) {}; 986e8d8bef9SDimitry Andric 987e8d8bef9SDimitry Andric /// Create a sequential execution region within a merged parallel region, 988e8d8bef9SDimitry Andric /// encapsulated in a master construct with a barrier for synchronization. 989e8d8bef9SDimitry Andric auto CreateSequentialRegion = [&](Function *OuterFn, 990e8d8bef9SDimitry Andric BasicBlock *OuterPredBB, 991e8d8bef9SDimitry Andric Instruction *SeqStartI, 992e8d8bef9SDimitry Andric Instruction *SeqEndI) { 993e8d8bef9SDimitry Andric // Isolate the instructions of the sequential region to a separate 994e8d8bef9SDimitry Andric // block. 995e8d8bef9SDimitry Andric BasicBlock *ParentBB = SeqStartI->getParent(); 996e8d8bef9SDimitry Andric BasicBlock *SeqEndBB = 997e8d8bef9SDimitry Andric SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI); 998e8d8bef9SDimitry Andric BasicBlock *SeqAfterBB = 999e8d8bef9SDimitry Andric SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI); 1000e8d8bef9SDimitry Andric BasicBlock *SeqStartBB = 1001e8d8bef9SDimitry Andric SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged"); 1002e8d8bef9SDimitry Andric 1003e8d8bef9SDimitry Andric assert(ParentBB->getUniqueSuccessor() == SeqStartBB && 1004e8d8bef9SDimitry Andric "Expected a different CFG"); 1005e8d8bef9SDimitry Andric const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); 1006e8d8bef9SDimitry Andric ParentBB->getTerminator()->eraseFromParent(); 1007e8d8bef9SDimitry Andric 100881ad6265SDimitry Andric auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1009e8d8bef9SDimitry Andric BasicBlock *CGStartBB = CodeGenIP.getBlock(); 1010e8d8bef9SDimitry Andric BasicBlock *CGEndBB = 1011e8d8bef9SDimitry Andric SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); 1012e8d8bef9SDimitry Andric assert(SeqStartBB != nullptr && "SeqStartBB should not be null"); 1013e8d8bef9SDimitry Andric CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB); 1014e8d8bef9SDimitry Andric assert(SeqEndBB != nullptr && "SeqEndBB should not be null"); 1015e8d8bef9SDimitry Andric SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB); 1016e8d8bef9SDimitry Andric }; 1017e8d8bef9SDimitry Andric auto FiniCB = [&](InsertPointTy CodeGenIP) {}; 1018e8d8bef9SDimitry Andric 1019e8d8bef9SDimitry Andric // Find outputs from the sequential region to outside users and 1020e8d8bef9SDimitry Andric // broadcast their values to them. 1021e8d8bef9SDimitry Andric for (Instruction &I : *SeqStartBB) { 1022e8d8bef9SDimitry Andric SmallPtrSet<Instruction *, 4> OutsideUsers; 1023e8d8bef9SDimitry Andric for (User *Usr : I.users()) { 1024e8d8bef9SDimitry Andric Instruction &UsrI = *cast<Instruction>(Usr); 1025e8d8bef9SDimitry Andric // Ignore outputs to LT intrinsics, code extraction for the merged 1026e8d8bef9SDimitry Andric // parallel region will fix them. 1027e8d8bef9SDimitry Andric if (UsrI.isLifetimeStartOrEnd()) 1028e8d8bef9SDimitry Andric continue; 1029e8d8bef9SDimitry Andric 1030e8d8bef9SDimitry Andric if (UsrI.getParent() != SeqStartBB) 1031e8d8bef9SDimitry Andric OutsideUsers.insert(&UsrI); 1032e8d8bef9SDimitry Andric } 1033e8d8bef9SDimitry Andric 1034e8d8bef9SDimitry Andric if (OutsideUsers.empty()) 1035e8d8bef9SDimitry Andric continue; 1036e8d8bef9SDimitry Andric 1037e8d8bef9SDimitry Andric // Emit an alloca in the outer region to store the broadcasted 1038e8d8bef9SDimitry Andric // value. 1039e8d8bef9SDimitry Andric const DataLayout &DL = M.getDataLayout(); 1040e8d8bef9SDimitry Andric AllocaInst *AllocaI = new AllocaInst( 1041e8d8bef9SDimitry Andric I.getType(), DL.getAllocaAddrSpace(), nullptr, 1042e8d8bef9SDimitry Andric I.getName() + ".seq.output.alloc", &OuterFn->front().front()); 1043e8d8bef9SDimitry Andric 1044e8d8bef9SDimitry Andric // Emit a store instruction in the sequential BB to update the 1045e8d8bef9SDimitry Andric // value. 1046e8d8bef9SDimitry Andric new StoreInst(&I, AllocaI, SeqStartBB->getTerminator()); 1047e8d8bef9SDimitry Andric 1048e8d8bef9SDimitry Andric // Emit a load instruction and replace the use of the output value 1049e8d8bef9SDimitry Andric // with it. 1050e8d8bef9SDimitry Andric for (Instruction *UsrI : OutsideUsers) { 1051fe6060f1SDimitry Andric LoadInst *LoadI = new LoadInst( 1052fe6060f1SDimitry Andric I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI); 1053e8d8bef9SDimitry Andric UsrI->replaceUsesOfWith(&I, LoadI); 1054e8d8bef9SDimitry Andric } 1055e8d8bef9SDimitry Andric } 1056e8d8bef9SDimitry Andric 1057e8d8bef9SDimitry Andric OpenMPIRBuilder::LocationDescription Loc( 1058e8d8bef9SDimitry Andric InsertPointTy(ParentBB, ParentBB->end()), DL); 1059e8d8bef9SDimitry Andric InsertPointTy SeqAfterIP = 1060e8d8bef9SDimitry Andric OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB); 1061e8d8bef9SDimitry Andric 1062e8d8bef9SDimitry Andric OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel); 1063e8d8bef9SDimitry Andric 1064e8d8bef9SDimitry Andric BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock()); 1065e8d8bef9SDimitry Andric 1066e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn 1067e8d8bef9SDimitry Andric << "\n"); 1068e8d8bef9SDimitry Andric }; 1069e8d8bef9SDimitry Andric 1070e8d8bef9SDimitry Andric // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all 1071e8d8bef9SDimitry Andric // contained in BB and only separated by instructions that can be 1072e8d8bef9SDimitry Andric // redundantly executed in parallel. The block BB is split before the first 1073e8d8bef9SDimitry Andric // call (in MergableCIs) and after the last so the entire region we merge 1074e8d8bef9SDimitry Andric // into a single parallel region is contained in a single basic block 1075e8d8bef9SDimitry Andric // without any other instructions. We use the OpenMPIRBuilder to outline 1076e8d8bef9SDimitry Andric // that block and call the resulting function via __kmpc_fork_call. 107704eeddc0SDimitry Andric auto Merge = [&](const SmallVectorImpl<CallInst *> &MergableCIs, 107804eeddc0SDimitry Andric BasicBlock *BB) { 1079e8d8bef9SDimitry Andric // TODO: Change the interface to allow single CIs expanded, e.g, to 1080e8d8bef9SDimitry Andric // include an outer loop. 1081e8d8bef9SDimitry Andric assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs"); 1082e8d8bef9SDimitry Andric 1083e8d8bef9SDimitry Andric auto Remark = [&](OptimizationRemark OR) { 1084fe6060f1SDimitry Andric OR << "Parallel region merged with parallel region" 1085fe6060f1SDimitry Andric << (MergableCIs.size() > 2 ? "s" : "") << " at "; 1086e8d8bef9SDimitry Andric for (auto *CI : llvm::drop_begin(MergableCIs)) { 1087e8d8bef9SDimitry Andric OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc()); 1088e8d8bef9SDimitry Andric if (CI != MergableCIs.back()) 1089e8d8bef9SDimitry Andric OR << ", "; 1090e8d8bef9SDimitry Andric } 1091fe6060f1SDimitry Andric return OR << "."; 1092e8d8bef9SDimitry Andric }; 1093e8d8bef9SDimitry Andric 1094fe6060f1SDimitry Andric emitRemark<OptimizationRemark>(MergableCIs.front(), "OMP150", Remark); 1095e8d8bef9SDimitry Andric 1096e8d8bef9SDimitry Andric Function *OriginalFn = BB->getParent(); 1097e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size() 1098e8d8bef9SDimitry Andric << " parallel regions in " << OriginalFn->getName() 1099e8d8bef9SDimitry Andric << "\n"); 1100e8d8bef9SDimitry Andric 1101e8d8bef9SDimitry Andric // Isolate the calls to merge in a separate block. 1102e8d8bef9SDimitry Andric EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI); 1103e8d8bef9SDimitry Andric BasicBlock *AfterBB = 1104e8d8bef9SDimitry Andric SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI); 1105e8d8bef9SDimitry Andric StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr, 1106e8d8bef9SDimitry Andric "omp.par.merged"); 1107e8d8bef9SDimitry Andric 1108e8d8bef9SDimitry Andric assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG"); 1109e8d8bef9SDimitry Andric const DebugLoc DL = BB->getTerminator()->getDebugLoc(); 1110e8d8bef9SDimitry Andric BB->getTerminator()->eraseFromParent(); 1111e8d8bef9SDimitry Andric 1112e8d8bef9SDimitry Andric // Create sequential regions for sequential instructions that are 1113e8d8bef9SDimitry Andric // in-between mergable parallel regions. 1114e8d8bef9SDimitry Andric for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1; 1115e8d8bef9SDimitry Andric It != End; ++It) { 1116e8d8bef9SDimitry Andric Instruction *ForkCI = *It; 1117e8d8bef9SDimitry Andric Instruction *NextForkCI = *(It + 1); 1118e8d8bef9SDimitry Andric 1119e8d8bef9SDimitry Andric // Continue if there are not in-between instructions. 1120e8d8bef9SDimitry Andric if (ForkCI->getNextNode() == NextForkCI) 1121e8d8bef9SDimitry Andric continue; 1122e8d8bef9SDimitry Andric 1123e8d8bef9SDimitry Andric CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(), 1124e8d8bef9SDimitry Andric NextForkCI->getPrevNode()); 1125e8d8bef9SDimitry Andric } 1126e8d8bef9SDimitry Andric 1127e8d8bef9SDimitry Andric OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()), 1128e8d8bef9SDimitry Andric DL); 1129e8d8bef9SDimitry Andric IRBuilder<>::InsertPoint AllocaIP( 1130e8d8bef9SDimitry Andric &OriginalFn->getEntryBlock(), 1131e8d8bef9SDimitry Andric OriginalFn->getEntryBlock().getFirstInsertionPt()); 1132e8d8bef9SDimitry Andric // Create the merged parallel region with default proc binding, to 1133e8d8bef9SDimitry Andric // avoid overriding binding settings, and without explicit cancellation. 1134e8d8bef9SDimitry Andric InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel( 1135e8d8bef9SDimitry Andric Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, 1136e8d8bef9SDimitry Andric OMP_PROC_BIND_default, /* IsCancellable */ false); 1137e8d8bef9SDimitry Andric BranchInst::Create(AfterBB, AfterIP.getBlock()); 1138e8d8bef9SDimitry Andric 1139e8d8bef9SDimitry Andric // Perform the actual outlining. 114004eeddc0SDimitry Andric OMPInfoCache.OMPBuilder.finalize(OriginalFn); 1141e8d8bef9SDimitry Andric 1142e8d8bef9SDimitry Andric Function *OutlinedFn = MergableCIs.front()->getCaller(); 1143e8d8bef9SDimitry Andric 1144e8d8bef9SDimitry Andric // Replace the __kmpc_fork_call calls with direct calls to the outlined 1145e8d8bef9SDimitry Andric // callbacks. 1146e8d8bef9SDimitry Andric SmallVector<Value *, 8> Args; 1147e8d8bef9SDimitry Andric for (auto *CI : MergableCIs) { 114881ad6265SDimitry Andric Value *Callee = CI->getArgOperand(CallbackCalleeOperand); 114981ad6265SDimitry Andric FunctionType *FT = OMPInfoCache.OMPBuilder.ParallelTask; 1150e8d8bef9SDimitry Andric Args.clear(); 1151e8d8bef9SDimitry Andric Args.push_back(OutlinedFn->getArg(0)); 1152e8d8bef9SDimitry Andric Args.push_back(OutlinedFn->getArg(1)); 1153349cc55cSDimitry Andric for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E; 1154349cc55cSDimitry Andric ++U) 1155e8d8bef9SDimitry Andric Args.push_back(CI->getArgOperand(U)); 1156e8d8bef9SDimitry Andric 1157e8d8bef9SDimitry Andric CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI); 1158e8d8bef9SDimitry Andric if (CI->getDebugLoc()) 1159e8d8bef9SDimitry Andric NewCI->setDebugLoc(CI->getDebugLoc()); 1160e8d8bef9SDimitry Andric 1161e8d8bef9SDimitry Andric // Forward parameter attributes from the callback to the callee. 1162349cc55cSDimitry Andric for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E; 1163349cc55cSDimitry Andric ++U) 1164349cc55cSDimitry Andric for (const Attribute &A : CI->getAttributes().getParamAttrs(U)) 1165e8d8bef9SDimitry Andric NewCI->addParamAttr( 1166e8d8bef9SDimitry Andric U - (CallbackFirstArgOperand - CallbackCalleeOperand), A); 1167e8d8bef9SDimitry Andric 1168e8d8bef9SDimitry Andric // Emit an explicit barrier to replace the implicit fork-join barrier. 1169e8d8bef9SDimitry Andric if (CI != MergableCIs.back()) { 1170e8d8bef9SDimitry Andric // TODO: Remove barrier if the merged parallel region includes the 1171e8d8bef9SDimitry Andric // 'nowait' clause. 1172e8d8bef9SDimitry Andric OMPInfoCache.OMPBuilder.createBarrier( 1173e8d8bef9SDimitry Andric InsertPointTy(NewCI->getParent(), 1174e8d8bef9SDimitry Andric NewCI->getNextNode()->getIterator()), 1175e8d8bef9SDimitry Andric OMPD_parallel); 1176e8d8bef9SDimitry Andric } 1177e8d8bef9SDimitry Andric 1178e8d8bef9SDimitry Andric CI->eraseFromParent(); 1179e8d8bef9SDimitry Andric } 1180e8d8bef9SDimitry Andric 1181e8d8bef9SDimitry Andric assert(OutlinedFn != OriginalFn && "Outlining failed"); 1182e8d8bef9SDimitry Andric CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn); 1183e8d8bef9SDimitry Andric CGUpdater.reanalyzeFunction(*OriginalFn); 1184e8d8bef9SDimitry Andric 1185e8d8bef9SDimitry Andric NumOpenMPParallelRegionsMerged += MergableCIs.size(); 1186e8d8bef9SDimitry Andric 1187e8d8bef9SDimitry Andric return true; 1188e8d8bef9SDimitry Andric }; 1189e8d8bef9SDimitry Andric 1190e8d8bef9SDimitry Andric // Helper function that identifes sequences of 1191e8d8bef9SDimitry Andric // __kmpc_fork_call uses in a basic block. 1192e8d8bef9SDimitry Andric auto DetectPRsCB = [&](Use &U, Function &F) { 1193e8d8bef9SDimitry Andric CallInst *CI = getCallIfRegularCall(U, &RFI); 1194e8d8bef9SDimitry Andric BB2PRMap[CI->getParent()].insert(CI); 1195e8d8bef9SDimitry Andric 1196e8d8bef9SDimitry Andric return false; 1197e8d8bef9SDimitry Andric }; 1198e8d8bef9SDimitry Andric 1199e8d8bef9SDimitry Andric BB2PRMap.clear(); 1200e8d8bef9SDimitry Andric RFI.foreachUse(SCC, DetectPRsCB); 1201e8d8bef9SDimitry Andric SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector; 1202e8d8bef9SDimitry Andric // Find mergable parallel regions within a basic block that are 1203e8d8bef9SDimitry Andric // safe to merge, that is any in-between instructions can safely 1204e8d8bef9SDimitry Andric // execute in parallel after merging. 1205e8d8bef9SDimitry Andric // TODO: support merging across basic-blocks. 1206e8d8bef9SDimitry Andric for (auto &It : BB2PRMap) { 1207e8d8bef9SDimitry Andric auto &CIs = It.getSecond(); 1208e8d8bef9SDimitry Andric if (CIs.size() < 2) 1209e8d8bef9SDimitry Andric continue; 1210e8d8bef9SDimitry Andric 1211e8d8bef9SDimitry Andric BasicBlock *BB = It.getFirst(); 1212e8d8bef9SDimitry Andric SmallVector<CallInst *, 4> MergableCIs; 1213e8d8bef9SDimitry Andric 1214e8d8bef9SDimitry Andric /// Returns true if the instruction is mergable, false otherwise. 1215e8d8bef9SDimitry Andric /// A terminator instruction is unmergable by definition since merging 1216e8d8bef9SDimitry Andric /// works within a BB. Instructions before the mergable region are 1217e8d8bef9SDimitry Andric /// mergable if they are not calls to OpenMP runtime functions that may 1218e8d8bef9SDimitry Andric /// set different execution parameters for subsequent parallel regions. 1219e8d8bef9SDimitry Andric /// Instructions in-between parallel regions are mergable if they are not 1220e8d8bef9SDimitry Andric /// calls to any non-intrinsic function since that may call a non-mergable 1221e8d8bef9SDimitry Andric /// OpenMP runtime function. 1222e8d8bef9SDimitry Andric auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) { 1223e8d8bef9SDimitry Andric // We do not merge across BBs, hence return false (unmergable) if the 1224e8d8bef9SDimitry Andric // instruction is a terminator. 1225e8d8bef9SDimitry Andric if (I.isTerminator()) 1226e8d8bef9SDimitry Andric return false; 1227e8d8bef9SDimitry Andric 1228e8d8bef9SDimitry Andric if (!isa<CallInst>(&I)) 1229e8d8bef9SDimitry Andric return true; 1230e8d8bef9SDimitry Andric 1231e8d8bef9SDimitry Andric CallInst *CI = cast<CallInst>(&I); 1232e8d8bef9SDimitry Andric if (IsBeforeMergableRegion) { 1233e8d8bef9SDimitry Andric Function *CalledFunction = CI->getCalledFunction(); 1234e8d8bef9SDimitry Andric if (!CalledFunction) 1235e8d8bef9SDimitry Andric return false; 1236e8d8bef9SDimitry Andric // Return false (unmergable) if the call before the parallel 1237e8d8bef9SDimitry Andric // region calls an explicit affinity (proc_bind) or number of 1238e8d8bef9SDimitry Andric // threads (num_threads) compiler-generated function. Those settings 1239e8d8bef9SDimitry Andric // may be incompatible with following parallel regions. 1240e8d8bef9SDimitry Andric // TODO: ICV tracking to detect compatibility. 1241e8d8bef9SDimitry Andric for (const auto &RFI : UnmergableCallsInfo) { 1242e8d8bef9SDimitry Andric if (CalledFunction == RFI.Declaration) 1243e8d8bef9SDimitry Andric return false; 1244e8d8bef9SDimitry Andric } 1245e8d8bef9SDimitry Andric } else { 1246e8d8bef9SDimitry Andric // Return false (unmergable) if there is a call instruction 1247e8d8bef9SDimitry Andric // in-between parallel regions when it is not an intrinsic. It 1248e8d8bef9SDimitry Andric // may call an unmergable OpenMP runtime function in its callpath. 1249e8d8bef9SDimitry Andric // TODO: Keep track of possible OpenMP calls in the callpath. 1250e8d8bef9SDimitry Andric if (!isa<IntrinsicInst>(CI)) 1251e8d8bef9SDimitry Andric return false; 1252e8d8bef9SDimitry Andric } 1253e8d8bef9SDimitry Andric 1254e8d8bef9SDimitry Andric return true; 1255e8d8bef9SDimitry Andric }; 1256e8d8bef9SDimitry Andric // Find maximal number of parallel region CIs that are safe to merge. 1257e8d8bef9SDimitry Andric for (auto It = BB->begin(), End = BB->end(); It != End;) { 1258e8d8bef9SDimitry Andric Instruction &I = *It; 1259e8d8bef9SDimitry Andric ++It; 1260e8d8bef9SDimitry Andric 1261e8d8bef9SDimitry Andric if (CIs.count(&I)) { 1262e8d8bef9SDimitry Andric MergableCIs.push_back(cast<CallInst>(&I)); 1263e8d8bef9SDimitry Andric continue; 1264e8d8bef9SDimitry Andric } 1265e8d8bef9SDimitry Andric 1266e8d8bef9SDimitry Andric // Continue expanding if the instruction is mergable. 1267e8d8bef9SDimitry Andric if (IsMergable(I, MergableCIs.empty())) 1268e8d8bef9SDimitry Andric continue; 1269e8d8bef9SDimitry Andric 1270e8d8bef9SDimitry Andric // Forward the instruction iterator to skip the next parallel region 1271e8d8bef9SDimitry Andric // since there is an unmergable instruction which can affect it. 1272e8d8bef9SDimitry Andric for (; It != End; ++It) { 1273e8d8bef9SDimitry Andric Instruction &SkipI = *It; 1274e8d8bef9SDimitry Andric if (CIs.count(&SkipI)) { 1275e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI 1276e8d8bef9SDimitry Andric << " due to " << I << "\n"); 1277e8d8bef9SDimitry Andric ++It; 1278e8d8bef9SDimitry Andric break; 1279e8d8bef9SDimitry Andric } 1280e8d8bef9SDimitry Andric } 1281e8d8bef9SDimitry Andric 1282e8d8bef9SDimitry Andric // Store mergable regions found. 1283e8d8bef9SDimitry Andric if (MergableCIs.size() > 1) { 1284e8d8bef9SDimitry Andric MergableCIsVector.push_back(MergableCIs); 1285e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size() 1286e8d8bef9SDimitry Andric << " parallel regions in block " << BB->getName() 1287e8d8bef9SDimitry Andric << " of function " << BB->getParent()->getName() 1288e8d8bef9SDimitry Andric << "\n";); 1289e8d8bef9SDimitry Andric } 1290e8d8bef9SDimitry Andric 1291e8d8bef9SDimitry Andric MergableCIs.clear(); 1292e8d8bef9SDimitry Andric } 1293e8d8bef9SDimitry Andric 1294e8d8bef9SDimitry Andric if (!MergableCIsVector.empty()) { 1295e8d8bef9SDimitry Andric Changed = true; 1296e8d8bef9SDimitry Andric 1297e8d8bef9SDimitry Andric for (auto &MergableCIs : MergableCIsVector) 1298e8d8bef9SDimitry Andric Merge(MergableCIs, BB); 1299fe6060f1SDimitry Andric MergableCIsVector.clear(); 1300e8d8bef9SDimitry Andric } 1301e8d8bef9SDimitry Andric } 1302e8d8bef9SDimitry Andric 1303e8d8bef9SDimitry Andric if (Changed) { 1304e8d8bef9SDimitry Andric /// Re-collect use for fork calls, emitted barrier calls, and 1305e8d8bef9SDimitry Andric /// any emitted master/end_master calls. 1306e8d8bef9SDimitry Andric OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call); 1307e8d8bef9SDimitry Andric OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier); 1308e8d8bef9SDimitry Andric OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master); 1309e8d8bef9SDimitry Andric OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master); 1310e8d8bef9SDimitry Andric } 1311e8d8bef9SDimitry Andric 1312e8d8bef9SDimitry Andric return Changed; 1313e8d8bef9SDimitry Andric } 1314e8d8bef9SDimitry Andric 13155ffd83dbSDimitry Andric /// Try to delete parallel regions if possible. 13165ffd83dbSDimitry Andric bool deleteParallelRegions() { 13175ffd83dbSDimitry Andric const unsigned CallbackCalleeOperand = 2; 13185ffd83dbSDimitry Andric 13195ffd83dbSDimitry Andric OMPInformationCache::RuntimeFunctionInfo &RFI = 13205ffd83dbSDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; 13215ffd83dbSDimitry Andric 13225ffd83dbSDimitry Andric if (!RFI.Declaration) 13235ffd83dbSDimitry Andric return false; 13245ffd83dbSDimitry Andric 13255ffd83dbSDimitry Andric bool Changed = false; 13265ffd83dbSDimitry Andric auto DeleteCallCB = [&](Use &U, Function &) { 13275ffd83dbSDimitry Andric CallInst *CI = getCallIfRegularCall(U); 13285ffd83dbSDimitry Andric if (!CI) 13295ffd83dbSDimitry Andric return false; 13305ffd83dbSDimitry Andric auto *Fn = dyn_cast<Function>( 13315ffd83dbSDimitry Andric CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); 13325ffd83dbSDimitry Andric if (!Fn) 13335ffd83dbSDimitry Andric return false; 13345ffd83dbSDimitry Andric if (!Fn->onlyReadsMemory()) 13355ffd83dbSDimitry Andric return false; 13365ffd83dbSDimitry Andric if (!Fn->hasFnAttribute(Attribute::WillReturn)) 13375ffd83dbSDimitry Andric return false; 13385ffd83dbSDimitry Andric 13395ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in " 13405ffd83dbSDimitry Andric << CI->getCaller()->getName() << "\n"); 13415ffd83dbSDimitry Andric 13425ffd83dbSDimitry Andric auto Remark = [&](OptimizationRemark OR) { 1343fe6060f1SDimitry Andric return OR << "Removing parallel region with no side-effects."; 13445ffd83dbSDimitry Andric }; 1345fe6060f1SDimitry Andric emitRemark<OptimizationRemark>(CI, "OMP160", Remark); 13465ffd83dbSDimitry Andric 13475ffd83dbSDimitry Andric CGUpdater.removeCallSite(*CI); 13485ffd83dbSDimitry Andric CI->eraseFromParent(); 13495ffd83dbSDimitry Andric Changed = true; 13505ffd83dbSDimitry Andric ++NumOpenMPParallelRegionsDeleted; 13515ffd83dbSDimitry Andric return true; 13525ffd83dbSDimitry Andric }; 13535ffd83dbSDimitry Andric 13545ffd83dbSDimitry Andric RFI.foreachUse(SCC, DeleteCallCB); 13555ffd83dbSDimitry Andric 13565ffd83dbSDimitry Andric return Changed; 13575ffd83dbSDimitry Andric } 13585ffd83dbSDimitry Andric 13595ffd83dbSDimitry Andric /// Try to eliminate runtime calls by reusing existing ones. 13605ffd83dbSDimitry Andric bool deduplicateRuntimeCalls() { 13615ffd83dbSDimitry Andric bool Changed = false; 13625ffd83dbSDimitry Andric 13635ffd83dbSDimitry Andric RuntimeFunction DeduplicableRuntimeCallIDs[] = { 13645ffd83dbSDimitry Andric OMPRTL_omp_get_num_threads, 13655ffd83dbSDimitry Andric OMPRTL_omp_in_parallel, 13665ffd83dbSDimitry Andric OMPRTL_omp_get_cancellation, 13675ffd83dbSDimitry Andric OMPRTL_omp_get_thread_limit, 13685ffd83dbSDimitry Andric OMPRTL_omp_get_supported_active_levels, 13695ffd83dbSDimitry Andric OMPRTL_omp_get_level, 13705ffd83dbSDimitry Andric OMPRTL_omp_get_ancestor_thread_num, 13715ffd83dbSDimitry Andric OMPRTL_omp_get_team_size, 13725ffd83dbSDimitry Andric OMPRTL_omp_get_active_level, 13735ffd83dbSDimitry Andric OMPRTL_omp_in_final, 13745ffd83dbSDimitry Andric OMPRTL_omp_get_proc_bind, 13755ffd83dbSDimitry Andric OMPRTL_omp_get_num_places, 13765ffd83dbSDimitry Andric OMPRTL_omp_get_num_procs, 13775ffd83dbSDimitry Andric OMPRTL_omp_get_place_num, 13785ffd83dbSDimitry Andric OMPRTL_omp_get_partition_num_places, 13795ffd83dbSDimitry Andric OMPRTL_omp_get_partition_place_nums}; 13805ffd83dbSDimitry Andric 13815ffd83dbSDimitry Andric // Global-tid is handled separately. 13825ffd83dbSDimitry Andric SmallSetVector<Value *, 16> GTIdArgs; 13835ffd83dbSDimitry Andric collectGlobalThreadIdArguments(GTIdArgs); 13845ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() 13855ffd83dbSDimitry Andric << " global thread ID arguments\n"); 13865ffd83dbSDimitry Andric 13875ffd83dbSDimitry Andric for (Function *F : SCC) { 13885ffd83dbSDimitry Andric for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) 1389e8d8bef9SDimitry Andric Changed |= deduplicateRuntimeCalls( 1390e8d8bef9SDimitry Andric *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]); 13915ffd83dbSDimitry Andric 13925ffd83dbSDimitry Andric // __kmpc_global_thread_num is special as we can replace it with an 13935ffd83dbSDimitry Andric // argument in enough cases to make it worth trying. 13945ffd83dbSDimitry Andric Value *GTIdArg = nullptr; 13955ffd83dbSDimitry Andric for (Argument &Arg : F->args()) 13965ffd83dbSDimitry Andric if (GTIdArgs.count(&Arg)) { 13975ffd83dbSDimitry Andric GTIdArg = &Arg; 13985ffd83dbSDimitry Andric break; 13995ffd83dbSDimitry Andric } 14005ffd83dbSDimitry Andric Changed |= deduplicateRuntimeCalls( 14015ffd83dbSDimitry Andric *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); 14025ffd83dbSDimitry Andric } 14035ffd83dbSDimitry Andric 14045ffd83dbSDimitry Andric return Changed; 14055ffd83dbSDimitry Andric } 14065ffd83dbSDimitry Andric 1407e8d8bef9SDimitry Andric /// Tries to hide the latency of runtime calls that involve host to 1408e8d8bef9SDimitry Andric /// device memory transfers by splitting them into their "issue" and "wait" 1409e8d8bef9SDimitry Andric /// versions. The "issue" is moved upwards as much as possible. The "wait" is 1410e8d8bef9SDimitry Andric /// moved downards as much as possible. The "issue" issues the memory transfer 1411e8d8bef9SDimitry Andric /// asynchronously, returning a handle. The "wait" waits in the returned 1412e8d8bef9SDimitry Andric /// handle for the memory transfer to finish. 1413e8d8bef9SDimitry Andric bool hideMemTransfersLatency() { 1414e8d8bef9SDimitry Andric auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper]; 1415e8d8bef9SDimitry Andric bool Changed = false; 1416e8d8bef9SDimitry Andric auto SplitMemTransfers = [&](Use &U, Function &Decl) { 1417e8d8bef9SDimitry Andric auto *RTCall = getCallIfRegularCall(U, &RFI); 1418e8d8bef9SDimitry Andric if (!RTCall) 1419e8d8bef9SDimitry Andric return false; 1420e8d8bef9SDimitry Andric 1421e8d8bef9SDimitry Andric OffloadArray OffloadArrays[3]; 1422e8d8bef9SDimitry Andric if (!getValuesInOffloadArrays(*RTCall, OffloadArrays)) 1423e8d8bef9SDimitry Andric return false; 1424e8d8bef9SDimitry Andric 1425e8d8bef9SDimitry Andric LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays)); 1426e8d8bef9SDimitry Andric 1427e8d8bef9SDimitry Andric // TODO: Check if can be moved upwards. 1428e8d8bef9SDimitry Andric bool WasSplit = false; 1429e8d8bef9SDimitry Andric Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall); 1430e8d8bef9SDimitry Andric if (WaitMovementPoint) 1431e8d8bef9SDimitry Andric WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint); 1432e8d8bef9SDimitry Andric 1433e8d8bef9SDimitry Andric Changed |= WasSplit; 1434e8d8bef9SDimitry Andric return WasSplit; 1435e8d8bef9SDimitry Andric }; 14361ac55f4cSDimitry Andric if (OMPInfoCache.runtimeFnsAvailable( 14371ac55f4cSDimitry Andric {OMPRTL___tgt_target_data_begin_mapper_issue, 14381ac55f4cSDimitry Andric OMPRTL___tgt_target_data_begin_mapper_wait})) 1439e8d8bef9SDimitry Andric RFI.foreachUse(SCC, SplitMemTransfers); 1440e8d8bef9SDimitry Andric 1441e8d8bef9SDimitry Andric return Changed; 1442e8d8bef9SDimitry Andric } 1443e8d8bef9SDimitry Andric 1444e8d8bef9SDimitry Andric void analysisGlobalization() { 1445fe6060f1SDimitry Andric auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; 1446e8d8bef9SDimitry Andric 1447e8d8bef9SDimitry Andric auto CheckGlobalization = [&](Use &U, Function &Decl) { 1448e8d8bef9SDimitry Andric if (CallInst *CI = getCallIfRegularCall(U, &RFI)) { 1449fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemarkMissed ORM) { 1450fe6060f1SDimitry Andric return ORM 1451e8d8bef9SDimitry Andric << "Found thread data sharing on the GPU. " 1452e8d8bef9SDimitry Andric << "Expect degraded performance due to data globalization."; 1453e8d8bef9SDimitry Andric }; 1454fe6060f1SDimitry Andric emitRemark<OptimizationRemarkMissed>(CI, "OMP112", Remark); 1455e8d8bef9SDimitry Andric } 1456e8d8bef9SDimitry Andric 1457e8d8bef9SDimitry Andric return false; 1458e8d8bef9SDimitry Andric }; 1459e8d8bef9SDimitry Andric 1460e8d8bef9SDimitry Andric RFI.foreachUse(SCC, CheckGlobalization); 1461e8d8bef9SDimitry Andric } 1462e8d8bef9SDimitry Andric 1463e8d8bef9SDimitry Andric /// Maps the values stored in the offload arrays passed as arguments to 1464e8d8bef9SDimitry Andric /// \p RuntimeCall into the offload arrays in \p OAs. 1465e8d8bef9SDimitry Andric bool getValuesInOffloadArrays(CallInst &RuntimeCall, 1466e8d8bef9SDimitry Andric MutableArrayRef<OffloadArray> OAs) { 1467e8d8bef9SDimitry Andric assert(OAs.size() == 3 && "Need space for three offload arrays!"); 1468e8d8bef9SDimitry Andric 1469e8d8bef9SDimitry Andric // A runtime call that involves memory offloading looks something like: 1470e8d8bef9SDimitry Andric // call void @__tgt_target_data_begin_mapper(arg0, arg1, 1471e8d8bef9SDimitry Andric // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes, 1472e8d8bef9SDimitry Andric // ...) 1473e8d8bef9SDimitry Andric // So, the idea is to access the allocas that allocate space for these 1474e8d8bef9SDimitry Andric // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes. 1475e8d8bef9SDimitry Andric // Therefore: 1476e8d8bef9SDimitry Andric // i8** %offload_baseptrs. 1477e8d8bef9SDimitry Andric Value *BasePtrsArg = 1478e8d8bef9SDimitry Andric RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum); 1479e8d8bef9SDimitry Andric // i8** %offload_ptrs. 1480e8d8bef9SDimitry Andric Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum); 1481e8d8bef9SDimitry Andric // i8** %offload_sizes. 1482e8d8bef9SDimitry Andric Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum); 1483e8d8bef9SDimitry Andric 1484e8d8bef9SDimitry Andric // Get values stored in **offload_baseptrs. 1485e8d8bef9SDimitry Andric auto *V = getUnderlyingObject(BasePtrsArg); 1486e8d8bef9SDimitry Andric if (!isa<AllocaInst>(V)) 1487e8d8bef9SDimitry Andric return false; 1488e8d8bef9SDimitry Andric auto *BasePtrsArray = cast<AllocaInst>(V); 1489e8d8bef9SDimitry Andric if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall)) 1490e8d8bef9SDimitry Andric return false; 1491e8d8bef9SDimitry Andric 1492e8d8bef9SDimitry Andric // Get values stored in **offload_baseptrs. 1493e8d8bef9SDimitry Andric V = getUnderlyingObject(PtrsArg); 1494e8d8bef9SDimitry Andric if (!isa<AllocaInst>(V)) 1495e8d8bef9SDimitry Andric return false; 1496e8d8bef9SDimitry Andric auto *PtrsArray = cast<AllocaInst>(V); 1497e8d8bef9SDimitry Andric if (!OAs[1].initialize(*PtrsArray, RuntimeCall)) 1498e8d8bef9SDimitry Andric return false; 1499e8d8bef9SDimitry Andric 1500e8d8bef9SDimitry Andric // Get values stored in **offload_sizes. 1501e8d8bef9SDimitry Andric V = getUnderlyingObject(SizesArg); 1502e8d8bef9SDimitry Andric // If it's a [constant] global array don't analyze it. 1503e8d8bef9SDimitry Andric if (isa<GlobalValue>(V)) 1504e8d8bef9SDimitry Andric return isa<Constant>(V); 1505e8d8bef9SDimitry Andric if (!isa<AllocaInst>(V)) 1506e8d8bef9SDimitry Andric return false; 1507e8d8bef9SDimitry Andric 1508e8d8bef9SDimitry Andric auto *SizesArray = cast<AllocaInst>(V); 1509e8d8bef9SDimitry Andric if (!OAs[2].initialize(*SizesArray, RuntimeCall)) 1510e8d8bef9SDimitry Andric return false; 1511e8d8bef9SDimitry Andric 1512e8d8bef9SDimitry Andric return true; 1513e8d8bef9SDimitry Andric } 1514e8d8bef9SDimitry Andric 1515e8d8bef9SDimitry Andric /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG. 1516e8d8bef9SDimitry Andric /// For now this is a way to test that the function getValuesInOffloadArrays 1517e8d8bef9SDimitry Andric /// is working properly. 1518e8d8bef9SDimitry Andric /// TODO: Move this to a unittest when unittests are available for OpenMPOpt. 1519e8d8bef9SDimitry Andric void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) { 1520e8d8bef9SDimitry Andric assert(OAs.size() == 3 && "There are three offload arrays to debug!"); 1521e8d8bef9SDimitry Andric 1522e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n"); 1523e8d8bef9SDimitry Andric std::string ValuesStr; 1524e8d8bef9SDimitry Andric raw_string_ostream Printer(ValuesStr); 1525e8d8bef9SDimitry Andric std::string Separator = " --- "; 1526e8d8bef9SDimitry Andric 1527e8d8bef9SDimitry Andric for (auto *BP : OAs[0].StoredValues) { 1528e8d8bef9SDimitry Andric BP->print(Printer); 1529e8d8bef9SDimitry Andric Printer << Separator; 1530e8d8bef9SDimitry Andric } 1531e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n"); 1532e8d8bef9SDimitry Andric ValuesStr.clear(); 1533e8d8bef9SDimitry Andric 1534e8d8bef9SDimitry Andric for (auto *P : OAs[1].StoredValues) { 1535e8d8bef9SDimitry Andric P->print(Printer); 1536e8d8bef9SDimitry Andric Printer << Separator; 1537e8d8bef9SDimitry Andric } 1538e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n"); 1539e8d8bef9SDimitry Andric ValuesStr.clear(); 1540e8d8bef9SDimitry Andric 1541e8d8bef9SDimitry Andric for (auto *S : OAs[2].StoredValues) { 1542e8d8bef9SDimitry Andric S->print(Printer); 1543e8d8bef9SDimitry Andric Printer << Separator; 1544e8d8bef9SDimitry Andric } 1545e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n"); 1546e8d8bef9SDimitry Andric } 1547e8d8bef9SDimitry Andric 1548e8d8bef9SDimitry Andric /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be 1549e8d8bef9SDimitry Andric /// moved. Returns nullptr if the movement is not possible, or not worth it. 1550e8d8bef9SDimitry Andric Instruction *canBeMovedDownwards(CallInst &RuntimeCall) { 1551e8d8bef9SDimitry Andric // FIXME: This traverses only the BasicBlock where RuntimeCall is. 1552e8d8bef9SDimitry Andric // Make it traverse the CFG. 1553e8d8bef9SDimitry Andric 1554e8d8bef9SDimitry Andric Instruction *CurrentI = &RuntimeCall; 1555e8d8bef9SDimitry Andric bool IsWorthIt = false; 1556e8d8bef9SDimitry Andric while ((CurrentI = CurrentI->getNextNode())) { 1557e8d8bef9SDimitry Andric 1558e8d8bef9SDimitry Andric // TODO: Once we detect the regions to be offloaded we should use the 1559e8d8bef9SDimitry Andric // alias analysis manager to check if CurrentI may modify one of 1560e8d8bef9SDimitry Andric // the offloaded regions. 1561e8d8bef9SDimitry Andric if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) { 1562e8d8bef9SDimitry Andric if (IsWorthIt) 1563e8d8bef9SDimitry Andric return CurrentI; 1564e8d8bef9SDimitry Andric 1565e8d8bef9SDimitry Andric return nullptr; 1566e8d8bef9SDimitry Andric } 1567e8d8bef9SDimitry Andric 1568e8d8bef9SDimitry Andric // FIXME: For now if we move it over anything without side effect 1569e8d8bef9SDimitry Andric // is worth it. 1570e8d8bef9SDimitry Andric IsWorthIt = true; 1571e8d8bef9SDimitry Andric } 1572e8d8bef9SDimitry Andric 1573e8d8bef9SDimitry Andric // Return end of BasicBlock. 1574e8d8bef9SDimitry Andric return RuntimeCall.getParent()->getTerminator(); 1575e8d8bef9SDimitry Andric } 1576e8d8bef9SDimitry Andric 1577e8d8bef9SDimitry Andric /// Splits \p RuntimeCall into its "issue" and "wait" counterparts. 1578e8d8bef9SDimitry Andric bool splitTargetDataBeginRTC(CallInst &RuntimeCall, 1579e8d8bef9SDimitry Andric Instruction &WaitMovementPoint) { 1580e8d8bef9SDimitry Andric // Create stack allocated handle (__tgt_async_info) at the beginning of the 1581e8d8bef9SDimitry Andric // function. Used for storing information of the async transfer, allowing to 1582e8d8bef9SDimitry Andric // wait on it later. 1583e8d8bef9SDimitry Andric auto &IRBuilder = OMPInfoCache.OMPBuilder; 1584bdd1243dSDimitry Andric Function *F = RuntimeCall.getCaller(); 1585bdd1243dSDimitry Andric BasicBlock &Entry = F->getEntryBlock(); 1586bdd1243dSDimitry Andric IRBuilder.Builder.SetInsertPoint(&Entry, 1587bdd1243dSDimitry Andric Entry.getFirstNonPHIOrDbgOrAlloca()); 1588bdd1243dSDimitry Andric Value *Handle = IRBuilder.Builder.CreateAlloca( 1589bdd1243dSDimitry Andric IRBuilder.AsyncInfo, /*ArraySize=*/nullptr, "handle"); 1590bdd1243dSDimitry Andric Handle = 1591bdd1243dSDimitry Andric IRBuilder.Builder.CreateAddrSpaceCast(Handle, IRBuilder.AsyncInfoPtr); 1592e8d8bef9SDimitry Andric 1593e8d8bef9SDimitry Andric // Add "issue" runtime call declaration: 1594e8d8bef9SDimitry Andric // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32, 1595e8d8bef9SDimitry Andric // i8**, i8**, i64*, i64*) 1596e8d8bef9SDimitry Andric FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction( 1597e8d8bef9SDimitry Andric M, OMPRTL___tgt_target_data_begin_mapper_issue); 1598e8d8bef9SDimitry Andric 1599e8d8bef9SDimitry Andric // Change RuntimeCall call site for its asynchronous version. 1600e8d8bef9SDimitry Andric SmallVector<Value *, 16> Args; 1601e8d8bef9SDimitry Andric for (auto &Arg : RuntimeCall.args()) 1602e8d8bef9SDimitry Andric Args.push_back(Arg.get()); 1603e8d8bef9SDimitry Andric Args.push_back(Handle); 1604e8d8bef9SDimitry Andric 1605e8d8bef9SDimitry Andric CallInst *IssueCallsite = 1606e8d8bef9SDimitry Andric CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall); 160704eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(IssueDecl, IssueCallsite); 1608e8d8bef9SDimitry Andric RuntimeCall.eraseFromParent(); 1609e8d8bef9SDimitry Andric 1610e8d8bef9SDimitry Andric // Add "wait" runtime call declaration: 1611e8d8bef9SDimitry Andric // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info) 1612e8d8bef9SDimitry Andric FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction( 1613e8d8bef9SDimitry Andric M, OMPRTL___tgt_target_data_begin_mapper_wait); 1614e8d8bef9SDimitry Andric 1615e8d8bef9SDimitry Andric Value *WaitParams[2] = { 1616e8d8bef9SDimitry Andric IssueCallsite->getArgOperand( 1617e8d8bef9SDimitry Andric OffloadArray::DeviceIDArgNum), // device_id. 1618e8d8bef9SDimitry Andric Handle // handle to wait on. 1619e8d8bef9SDimitry Andric }; 162004eeddc0SDimitry Andric CallInst *WaitCallsite = CallInst::Create( 162104eeddc0SDimitry Andric WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint); 162204eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(WaitDecl, WaitCallsite); 1623e8d8bef9SDimitry Andric 1624e8d8bef9SDimitry Andric return true; 1625e8d8bef9SDimitry Andric } 1626e8d8bef9SDimitry Andric 16275ffd83dbSDimitry Andric static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, 16285ffd83dbSDimitry Andric bool GlobalOnly, bool &SingleChoice) { 16295ffd83dbSDimitry Andric if (CurrentIdent == NextIdent) 16305ffd83dbSDimitry Andric return CurrentIdent; 16315ffd83dbSDimitry Andric 16325ffd83dbSDimitry Andric // TODO: Figure out how to actually combine multiple debug locations. For 16335ffd83dbSDimitry Andric // now we just keep an existing one if there is a single choice. 16345ffd83dbSDimitry Andric if (!GlobalOnly || isa<GlobalValue>(NextIdent)) { 16355ffd83dbSDimitry Andric SingleChoice = !CurrentIdent; 16365ffd83dbSDimitry Andric return NextIdent; 16375ffd83dbSDimitry Andric } 16385ffd83dbSDimitry Andric return nullptr; 16395ffd83dbSDimitry Andric } 16405ffd83dbSDimitry Andric 16415ffd83dbSDimitry Andric /// Return an `struct ident_t*` value that represents the ones used in the 16425ffd83dbSDimitry Andric /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not 16435ffd83dbSDimitry Andric /// return a local `struct ident_t*`. For now, if we cannot find a suitable 16445ffd83dbSDimitry Andric /// return value we create one from scratch. We also do not yet combine 16455ffd83dbSDimitry Andric /// information, e.g., the source locations, see combinedIdentStruct. 16465ffd83dbSDimitry Andric Value * 16475ffd83dbSDimitry Andric getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI, 16485ffd83dbSDimitry Andric Function &F, bool GlobalOnly) { 16495ffd83dbSDimitry Andric bool SingleChoice = true; 16505ffd83dbSDimitry Andric Value *Ident = nullptr; 16515ffd83dbSDimitry Andric auto CombineIdentStruct = [&](Use &U, Function &Caller) { 16525ffd83dbSDimitry Andric CallInst *CI = getCallIfRegularCall(U, &RFI); 16535ffd83dbSDimitry Andric if (!CI || &F != &Caller) 16545ffd83dbSDimitry Andric return false; 16555ffd83dbSDimitry Andric Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), 16565ffd83dbSDimitry Andric /* GlobalOnly */ true, SingleChoice); 16575ffd83dbSDimitry Andric return false; 16585ffd83dbSDimitry Andric }; 16595ffd83dbSDimitry Andric RFI.foreachUse(SCC, CombineIdentStruct); 16605ffd83dbSDimitry Andric 16615ffd83dbSDimitry Andric if (!Ident || !SingleChoice) { 16625ffd83dbSDimitry Andric // The IRBuilder uses the insertion block to get to the module, this is 16635ffd83dbSDimitry Andric // unfortunate but we work around it for now. 16645ffd83dbSDimitry Andric if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock()) 16655ffd83dbSDimitry Andric OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( 16665ffd83dbSDimitry Andric &F.getEntryBlock(), F.getEntryBlock().begin())); 16675ffd83dbSDimitry Andric // Create a fallback location if non was found. 16685ffd83dbSDimitry Andric // TODO: Use the debug locations of the calls instead. 166904eeddc0SDimitry Andric uint32_t SrcLocStrSize; 167004eeddc0SDimitry Andric Constant *Loc = 167104eeddc0SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 167204eeddc0SDimitry Andric Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc, SrcLocStrSize); 16735ffd83dbSDimitry Andric } 16745ffd83dbSDimitry Andric return Ident; 16755ffd83dbSDimitry Andric } 16765ffd83dbSDimitry Andric 16775ffd83dbSDimitry Andric /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or 16785ffd83dbSDimitry Andric /// \p ReplVal if given. 16795ffd83dbSDimitry Andric bool deduplicateRuntimeCalls(Function &F, 16805ffd83dbSDimitry Andric OMPInformationCache::RuntimeFunctionInfo &RFI, 16815ffd83dbSDimitry Andric Value *ReplVal = nullptr) { 16825ffd83dbSDimitry Andric auto *UV = RFI.getUseVector(F); 16835ffd83dbSDimitry Andric if (!UV || UV->size() + (ReplVal != nullptr) < 2) 16845ffd83dbSDimitry Andric return false; 16855ffd83dbSDimitry Andric 16865ffd83dbSDimitry Andric LLVM_DEBUG( 16875ffd83dbSDimitry Andric dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name 16885ffd83dbSDimitry Andric << (ReplVal ? " with an existing value\n" : "\n") << "\n"); 16895ffd83dbSDimitry Andric 16905ffd83dbSDimitry Andric assert((!ReplVal || (isa<Argument>(ReplVal) && 16915ffd83dbSDimitry Andric cast<Argument>(ReplVal)->getParent() == &F)) && 16925ffd83dbSDimitry Andric "Unexpected replacement value!"); 16935ffd83dbSDimitry Andric 16945ffd83dbSDimitry Andric // TODO: Use dominance to find a good position instead. 16955ffd83dbSDimitry Andric auto CanBeMoved = [this](CallBase &CB) { 1696349cc55cSDimitry Andric unsigned NumArgs = CB.arg_size(); 16975ffd83dbSDimitry Andric if (NumArgs == 0) 16985ffd83dbSDimitry Andric return true; 16995ffd83dbSDimitry Andric if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr) 17005ffd83dbSDimitry Andric return false; 1701349cc55cSDimitry Andric for (unsigned U = 1; U < NumArgs; ++U) 1702349cc55cSDimitry Andric if (isa<Instruction>(CB.getArgOperand(U))) 17035ffd83dbSDimitry Andric return false; 17045ffd83dbSDimitry Andric return true; 17055ffd83dbSDimitry Andric }; 17065ffd83dbSDimitry Andric 17075ffd83dbSDimitry Andric if (!ReplVal) { 1708*06c3fb27SDimitry Andric auto *DT = 1709*06c3fb27SDimitry Andric OMPInfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(F); 1710*06c3fb27SDimitry Andric if (!DT) 1711*06c3fb27SDimitry Andric return false; 1712*06c3fb27SDimitry Andric Instruction *IP = nullptr; 1713*06c3fb27SDimitry Andric for (Use *U : *UV) { 17145ffd83dbSDimitry Andric if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { 1715*06c3fb27SDimitry Andric if (IP) 1716*06c3fb27SDimitry Andric IP = DT->findNearestCommonDominator(IP, CI); 1717*06c3fb27SDimitry Andric else 1718*06c3fb27SDimitry Andric IP = CI; 17195ffd83dbSDimitry Andric if (!CanBeMoved(*CI)) 17205ffd83dbSDimitry Andric continue; 1721*06c3fb27SDimitry Andric if (!ReplVal) 17225ffd83dbSDimitry Andric ReplVal = CI; 1723*06c3fb27SDimitry Andric } 17245ffd83dbSDimitry Andric } 17255ffd83dbSDimitry Andric if (!ReplVal) 17265ffd83dbSDimitry Andric return false; 1727*06c3fb27SDimitry Andric assert(IP && "Expected insertion point!"); 1728*06c3fb27SDimitry Andric cast<Instruction>(ReplVal)->moveBefore(IP); 17295ffd83dbSDimitry Andric } 17305ffd83dbSDimitry Andric 17315ffd83dbSDimitry Andric // If we use a call as a replacement value we need to make sure the ident is 17325ffd83dbSDimitry Andric // valid at the new location. For now we just pick a global one, either 17335ffd83dbSDimitry Andric // existing and used by one of the calls, or created from scratch. 17345ffd83dbSDimitry Andric if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) { 1735349cc55cSDimitry Andric if (!CI->arg_empty() && 17365ffd83dbSDimitry Andric CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) { 17375ffd83dbSDimitry Andric Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, 17385ffd83dbSDimitry Andric /* GlobalOnly */ true); 17395ffd83dbSDimitry Andric CI->setArgOperand(0, Ident); 17405ffd83dbSDimitry Andric } 17415ffd83dbSDimitry Andric } 17425ffd83dbSDimitry Andric 17435ffd83dbSDimitry Andric bool Changed = false; 17445ffd83dbSDimitry Andric auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { 17455ffd83dbSDimitry Andric CallInst *CI = getCallIfRegularCall(U, &RFI); 17465ffd83dbSDimitry Andric if (!CI || CI == ReplVal || &F != &Caller) 17475ffd83dbSDimitry Andric return false; 17485ffd83dbSDimitry Andric assert(CI->getCaller() == &F && "Unexpected call!"); 17495ffd83dbSDimitry Andric 17505ffd83dbSDimitry Andric auto Remark = [&](OptimizationRemark OR) { 17515ffd83dbSDimitry Andric return OR << "OpenMP runtime call " 1752fe6060f1SDimitry Andric << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated."; 17535ffd83dbSDimitry Andric }; 1754fe6060f1SDimitry Andric if (CI->getDebugLoc()) 1755fe6060f1SDimitry Andric emitRemark<OptimizationRemark>(CI, "OMP170", Remark); 1756fe6060f1SDimitry Andric else 1757fe6060f1SDimitry Andric emitRemark<OptimizationRemark>(&F, "OMP170", Remark); 17585ffd83dbSDimitry Andric 17595ffd83dbSDimitry Andric CGUpdater.removeCallSite(*CI); 17605ffd83dbSDimitry Andric CI->replaceAllUsesWith(ReplVal); 17615ffd83dbSDimitry Andric CI->eraseFromParent(); 17625ffd83dbSDimitry Andric ++NumOpenMPRuntimeCallsDeduplicated; 17635ffd83dbSDimitry Andric Changed = true; 17645ffd83dbSDimitry Andric return true; 17655ffd83dbSDimitry Andric }; 17665ffd83dbSDimitry Andric RFI.foreachUse(SCC, ReplaceAndDeleteCB); 17675ffd83dbSDimitry Andric 17685ffd83dbSDimitry Andric return Changed; 17695ffd83dbSDimitry Andric } 17705ffd83dbSDimitry Andric 17715ffd83dbSDimitry Andric /// Collect arguments that represent the global thread id in \p GTIdArgs. 17725ffd83dbSDimitry Andric void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) { 17735ffd83dbSDimitry Andric // TODO: Below we basically perform a fixpoint iteration with a pessimistic 17745ffd83dbSDimitry Andric // initialization. We could define an AbstractAttribute instead and 17755ffd83dbSDimitry Andric // run the Attributor here once it can be run as an SCC pass. 17765ffd83dbSDimitry Andric 17775ffd83dbSDimitry Andric // Helper to check the argument \p ArgNo at all call sites of \p F for 17785ffd83dbSDimitry Andric // a GTId. 17795ffd83dbSDimitry Andric auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { 17805ffd83dbSDimitry Andric if (!F.hasLocalLinkage()) 17815ffd83dbSDimitry Andric return false; 17825ffd83dbSDimitry Andric for (Use &U : F.uses()) { 17835ffd83dbSDimitry Andric if (CallInst *CI = getCallIfRegularCall(U)) { 17845ffd83dbSDimitry Andric Value *ArgOp = CI->getArgOperand(ArgNo); 17855ffd83dbSDimitry Andric if (CI == &RefCI || GTIdArgs.count(ArgOp) || 17865ffd83dbSDimitry Andric getCallIfRegularCall( 17875ffd83dbSDimitry Andric *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num])) 17885ffd83dbSDimitry Andric continue; 17895ffd83dbSDimitry Andric } 17905ffd83dbSDimitry Andric return false; 17915ffd83dbSDimitry Andric } 17925ffd83dbSDimitry Andric return true; 17935ffd83dbSDimitry Andric }; 17945ffd83dbSDimitry Andric 17955ffd83dbSDimitry Andric // Helper to identify uses of a GTId as GTId arguments. 17965ffd83dbSDimitry Andric auto AddUserArgs = [&](Value >Id) { 17975ffd83dbSDimitry Andric for (Use &U : GTId.uses()) 17985ffd83dbSDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(U.getUser())) 17995ffd83dbSDimitry Andric if (CI->isArgOperand(&U)) 18005ffd83dbSDimitry Andric if (Function *Callee = CI->getCalledFunction()) 18015ffd83dbSDimitry Andric if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) 18025ffd83dbSDimitry Andric GTIdArgs.insert(Callee->getArg(U.getOperandNo())); 18035ffd83dbSDimitry Andric }; 18045ffd83dbSDimitry Andric 18055ffd83dbSDimitry Andric // The argument users of __kmpc_global_thread_num calls are GTIds. 18065ffd83dbSDimitry Andric OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = 18075ffd83dbSDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; 18085ffd83dbSDimitry Andric 18095ffd83dbSDimitry Andric GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) { 18105ffd83dbSDimitry Andric if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) 18115ffd83dbSDimitry Andric AddUserArgs(*CI); 18125ffd83dbSDimitry Andric return false; 18135ffd83dbSDimitry Andric }); 18145ffd83dbSDimitry Andric 18155ffd83dbSDimitry Andric // Transitively search for more arguments by looking at the users of the 18165ffd83dbSDimitry Andric // ones we know already. During the search the GTIdArgs vector is extended 18175ffd83dbSDimitry Andric // so we cannot cache the size nor can we use a range based for. 1818349cc55cSDimitry Andric for (unsigned U = 0; U < GTIdArgs.size(); ++U) 1819349cc55cSDimitry Andric AddUserArgs(*GTIdArgs[U]); 18205ffd83dbSDimitry Andric } 18215ffd83dbSDimitry Andric 18225ffd83dbSDimitry Andric /// Kernel (=GPU) optimizations and utility functions 18235ffd83dbSDimitry Andric /// 18245ffd83dbSDimitry Andric ///{{ 18255ffd83dbSDimitry Andric 18265ffd83dbSDimitry Andric /// Cache to remember the unique kernel for a function. 1827bdd1243dSDimitry Andric DenseMap<Function *, std::optional<Kernel>> UniqueKernelMap; 18285ffd83dbSDimitry Andric 18295ffd83dbSDimitry Andric /// Find the unique kernel that will execute \p F, if any. 18305ffd83dbSDimitry Andric Kernel getUniqueKernelFor(Function &F); 18315ffd83dbSDimitry Andric 18325ffd83dbSDimitry Andric /// Find the unique kernel that will execute \p I, if any. 18335ffd83dbSDimitry Andric Kernel getUniqueKernelFor(Instruction &I) { 18345ffd83dbSDimitry Andric return getUniqueKernelFor(*I.getFunction()); 18355ffd83dbSDimitry Andric } 18365ffd83dbSDimitry Andric 18375ffd83dbSDimitry Andric /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in 18385ffd83dbSDimitry Andric /// the cases we can avoid taking the address of a function. 18395ffd83dbSDimitry Andric bool rewriteDeviceCodeStateMachine(); 18405ffd83dbSDimitry Andric 18415ffd83dbSDimitry Andric /// 18425ffd83dbSDimitry Andric ///}} 18435ffd83dbSDimitry Andric 18445ffd83dbSDimitry Andric /// Emit a remark generically 18455ffd83dbSDimitry Andric /// 18465ffd83dbSDimitry Andric /// This template function can be used to generically emit a remark. The 18475ffd83dbSDimitry Andric /// RemarkKind should be one of the following: 18485ffd83dbSDimitry Andric /// - OptimizationRemark to indicate a successful optimization attempt 18495ffd83dbSDimitry Andric /// - OptimizationRemarkMissed to report a failed optimization attempt 18505ffd83dbSDimitry Andric /// - OptimizationRemarkAnalysis to provide additional information about an 18515ffd83dbSDimitry Andric /// optimization attempt 18525ffd83dbSDimitry Andric /// 18535ffd83dbSDimitry Andric /// The remark is built using a callback function provided by the caller that 18545ffd83dbSDimitry Andric /// takes a RemarkKind as input and returns a RemarkKind. 1855fe6060f1SDimitry Andric template <typename RemarkKind, typename RemarkCallBack> 1856fe6060f1SDimitry Andric void emitRemark(Instruction *I, StringRef RemarkName, 18575ffd83dbSDimitry Andric RemarkCallBack &&RemarkCB) const { 1858fe6060f1SDimitry Andric Function *F = I->getParent()->getParent(); 18595ffd83dbSDimitry Andric auto &ORE = OREGetter(F); 18605ffd83dbSDimitry Andric 1861fe6060f1SDimitry Andric if (RemarkName.startswith("OMP")) 18625ffd83dbSDimitry Andric ORE.emit([&]() { 1863fe6060f1SDimitry Andric return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)) 1864fe6060f1SDimitry Andric << " [" << RemarkName << "]"; 18655ffd83dbSDimitry Andric }); 1866fe6060f1SDimitry Andric else 1867fe6060f1SDimitry Andric ORE.emit( 1868fe6060f1SDimitry Andric [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)); }); 18695ffd83dbSDimitry Andric } 18705ffd83dbSDimitry Andric 1871fe6060f1SDimitry Andric /// Emit a remark on a function. 1872fe6060f1SDimitry Andric template <typename RemarkKind, typename RemarkCallBack> 1873fe6060f1SDimitry Andric void emitRemark(Function *F, StringRef RemarkName, 1874fe6060f1SDimitry Andric RemarkCallBack &&RemarkCB) const { 1875fe6060f1SDimitry Andric auto &ORE = OREGetter(F); 1876fe6060f1SDimitry Andric 1877fe6060f1SDimitry Andric if (RemarkName.startswith("OMP")) 1878fe6060f1SDimitry Andric ORE.emit([&]() { 1879fe6060f1SDimitry Andric return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)) 1880fe6060f1SDimitry Andric << " [" << RemarkName << "]"; 1881fe6060f1SDimitry Andric }); 1882fe6060f1SDimitry Andric else 1883fe6060f1SDimitry Andric ORE.emit( 1884fe6060f1SDimitry Andric [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)); }); 1885fe6060f1SDimitry Andric } 1886fe6060f1SDimitry Andric 18875ffd83dbSDimitry Andric /// The underlying module. 18885ffd83dbSDimitry Andric Module &M; 18895ffd83dbSDimitry Andric 18905ffd83dbSDimitry Andric /// The SCC we are operating on. 18915ffd83dbSDimitry Andric SmallVectorImpl<Function *> &SCC; 18925ffd83dbSDimitry Andric 18935ffd83dbSDimitry Andric /// Callback to update the call graph, the first argument is a removed call, 18945ffd83dbSDimitry Andric /// the second an optional replacement call. 18955ffd83dbSDimitry Andric CallGraphUpdater &CGUpdater; 18965ffd83dbSDimitry Andric 18975ffd83dbSDimitry Andric /// Callback to get an OptimizationRemarkEmitter from a Function * 18985ffd83dbSDimitry Andric OptimizationRemarkGetter OREGetter; 18995ffd83dbSDimitry Andric 19005ffd83dbSDimitry Andric /// OpenMP-specific information cache. Also Used for Attributor runs. 19015ffd83dbSDimitry Andric OMPInformationCache &OMPInfoCache; 19025ffd83dbSDimitry Andric 19035ffd83dbSDimitry Andric /// Attributor instance. 19045ffd83dbSDimitry Andric Attributor &A; 19055ffd83dbSDimitry Andric 19065ffd83dbSDimitry Andric /// Helper function to run Attributor on SCC. 1907fe6060f1SDimitry Andric bool runAttributor(bool IsModulePass) { 19085ffd83dbSDimitry Andric if (SCC.empty()) 19095ffd83dbSDimitry Andric return false; 19105ffd83dbSDimitry Andric 1911fe6060f1SDimitry Andric registerAAs(IsModulePass); 19125ffd83dbSDimitry Andric 19135ffd83dbSDimitry Andric ChangeStatus Changed = A.run(); 19145ffd83dbSDimitry Andric 19155ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size() 19165ffd83dbSDimitry Andric << " functions, result: " << Changed << ".\n"); 19175ffd83dbSDimitry Andric 19185ffd83dbSDimitry Andric return Changed == ChangeStatus::CHANGED; 19195ffd83dbSDimitry Andric } 19205ffd83dbSDimitry Andric 1921fe6060f1SDimitry Andric void registerFoldRuntimeCall(RuntimeFunction RF); 1922fe6060f1SDimitry Andric 19235ffd83dbSDimitry Andric /// Populate the Attributor with abstract attribute opportunities in the 1924bdd1243dSDimitry Andric /// functions. 1925fe6060f1SDimitry Andric void registerAAs(bool IsModulePass); 1926bdd1243dSDimitry Andric 1927bdd1243dSDimitry Andric public: 1928bdd1243dSDimitry Andric /// Callback to register AAs for live functions, including internal functions 1929bdd1243dSDimitry Andric /// marked live during the traversal. 1930bdd1243dSDimitry Andric static void registerAAsForFunction(Attributor &A, const Function &F); 19315ffd83dbSDimitry Andric }; 19325ffd83dbSDimitry Andric 19335ffd83dbSDimitry Andric Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { 1934*06c3fb27SDimitry Andric if (OMPInfoCache.CGSCC && !OMPInfoCache.CGSCC->empty() && 1935*06c3fb27SDimitry Andric !OMPInfoCache.CGSCC->contains(&F)) 19365ffd83dbSDimitry Andric return nullptr; 19375ffd83dbSDimitry Andric 19385ffd83dbSDimitry Andric // Use a scope to keep the lifetime of the CachedKernel short. 19395ffd83dbSDimitry Andric { 1940bdd1243dSDimitry Andric std::optional<Kernel> &CachedKernel = UniqueKernelMap[&F]; 19415ffd83dbSDimitry Andric if (CachedKernel) 19425ffd83dbSDimitry Andric return *CachedKernel; 19435ffd83dbSDimitry Andric 19445ffd83dbSDimitry Andric // TODO: We should use an AA to create an (optimistic and callback 19455ffd83dbSDimitry Andric // call-aware) call graph. For now we stick to simple patterns that 19465ffd83dbSDimitry Andric // are less powerful, basically the worst fixpoint. 19475ffd83dbSDimitry Andric if (isKernel(F)) { 19485ffd83dbSDimitry Andric CachedKernel = Kernel(&F); 19495ffd83dbSDimitry Andric return *CachedKernel; 19505ffd83dbSDimitry Andric } 19515ffd83dbSDimitry Andric 19525ffd83dbSDimitry Andric CachedKernel = nullptr; 1953e8d8bef9SDimitry Andric if (!F.hasLocalLinkage()) { 1954e8d8bef9SDimitry Andric 1955e8d8bef9SDimitry Andric // See https://openmp.llvm.org/remarks/OptimizationRemarks.html 1956fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) { 1957fe6060f1SDimitry Andric return ORA << "Potentially unknown OpenMP target region caller."; 1958e8d8bef9SDimitry Andric }; 1959fe6060f1SDimitry Andric emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark); 1960e8d8bef9SDimitry Andric 19615ffd83dbSDimitry Andric return nullptr; 19625ffd83dbSDimitry Andric } 1963e8d8bef9SDimitry Andric } 19645ffd83dbSDimitry Andric 19655ffd83dbSDimitry Andric auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel { 19665ffd83dbSDimitry Andric if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) { 19675ffd83dbSDimitry Andric // Allow use in equality comparisons. 19685ffd83dbSDimitry Andric if (Cmp->isEquality()) 19695ffd83dbSDimitry Andric return getUniqueKernelFor(*Cmp); 19705ffd83dbSDimitry Andric return nullptr; 19715ffd83dbSDimitry Andric } 19725ffd83dbSDimitry Andric if (auto *CB = dyn_cast<CallBase>(U.getUser())) { 19735ffd83dbSDimitry Andric // Allow direct calls. 19745ffd83dbSDimitry Andric if (CB->isCallee(&U)) 19755ffd83dbSDimitry Andric return getUniqueKernelFor(*CB); 1976fe6060f1SDimitry Andric 1977fe6060f1SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI = 1978fe6060f1SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; 1979fe6060f1SDimitry Andric // Allow the use in __kmpc_parallel_51 calls. 1980fe6060f1SDimitry Andric if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI)) 19815ffd83dbSDimitry Andric return getUniqueKernelFor(*CB); 19825ffd83dbSDimitry Andric return nullptr; 19835ffd83dbSDimitry Andric } 19845ffd83dbSDimitry Andric // Disallow every other use. 19855ffd83dbSDimitry Andric return nullptr; 19865ffd83dbSDimitry Andric }; 19875ffd83dbSDimitry Andric 19885ffd83dbSDimitry Andric // TODO: In the future we want to track more than just a unique kernel. 19895ffd83dbSDimitry Andric SmallPtrSet<Kernel, 2> PotentialKernels; 1990e8d8bef9SDimitry Andric OMPInformationCache::foreachUse(F, [&](const Use &U) { 19915ffd83dbSDimitry Andric PotentialKernels.insert(GetUniqueKernelForUse(U)); 19925ffd83dbSDimitry Andric }); 19935ffd83dbSDimitry Andric 19945ffd83dbSDimitry Andric Kernel K = nullptr; 19955ffd83dbSDimitry Andric if (PotentialKernels.size() == 1) 19965ffd83dbSDimitry Andric K = *PotentialKernels.begin(); 19975ffd83dbSDimitry Andric 19985ffd83dbSDimitry Andric // Cache the result. 19995ffd83dbSDimitry Andric UniqueKernelMap[&F] = K; 20005ffd83dbSDimitry Andric 20015ffd83dbSDimitry Andric return K; 20025ffd83dbSDimitry Andric } 20035ffd83dbSDimitry Andric 20045ffd83dbSDimitry Andric bool OpenMPOpt::rewriteDeviceCodeStateMachine() { 2005fe6060f1SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI = 2006fe6060f1SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; 20075ffd83dbSDimitry Andric 20085ffd83dbSDimitry Andric bool Changed = false; 2009fe6060f1SDimitry Andric if (!KernelParallelRFI) 20105ffd83dbSDimitry Andric return Changed; 20115ffd83dbSDimitry Andric 2012349cc55cSDimitry Andric // If we have disabled state machine changes, exit 2013349cc55cSDimitry Andric if (DisableOpenMPOptStateMachineRewrite) 2014349cc55cSDimitry Andric return Changed; 2015349cc55cSDimitry Andric 20165ffd83dbSDimitry Andric for (Function *F : SCC) { 20175ffd83dbSDimitry Andric 2018fe6060f1SDimitry Andric // Check if the function is a use in a __kmpc_parallel_51 call at 20195ffd83dbSDimitry Andric // all. 20205ffd83dbSDimitry Andric bool UnknownUse = false; 2021fe6060f1SDimitry Andric bool KernelParallelUse = false; 20225ffd83dbSDimitry Andric unsigned NumDirectCalls = 0; 20235ffd83dbSDimitry Andric 20245ffd83dbSDimitry Andric SmallVector<Use *, 2> ToBeReplacedStateMachineUses; 2025e8d8bef9SDimitry Andric OMPInformationCache::foreachUse(*F, [&](Use &U) { 20265ffd83dbSDimitry Andric if (auto *CB = dyn_cast<CallBase>(U.getUser())) 20275ffd83dbSDimitry Andric if (CB->isCallee(&U)) { 20285ffd83dbSDimitry Andric ++NumDirectCalls; 20295ffd83dbSDimitry Andric return; 20305ffd83dbSDimitry Andric } 20315ffd83dbSDimitry Andric 20325ffd83dbSDimitry Andric if (isa<ICmpInst>(U.getUser())) { 20335ffd83dbSDimitry Andric ToBeReplacedStateMachineUses.push_back(&U); 20345ffd83dbSDimitry Andric return; 20355ffd83dbSDimitry Andric } 2036fe6060f1SDimitry Andric 2037fe6060f1SDimitry Andric // Find wrapper functions that represent parallel kernels. 2038fe6060f1SDimitry Andric CallInst *CI = 2039fe6060f1SDimitry Andric OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI); 2040fe6060f1SDimitry Andric const unsigned int WrapperFunctionArgNo = 6; 2041fe6060f1SDimitry Andric if (!KernelParallelUse && CI && 2042fe6060f1SDimitry Andric CI->getArgOperandNo(&U) == WrapperFunctionArgNo) { 2043fe6060f1SDimitry Andric KernelParallelUse = true; 20445ffd83dbSDimitry Andric ToBeReplacedStateMachineUses.push_back(&U); 20455ffd83dbSDimitry Andric return; 20465ffd83dbSDimitry Andric } 20475ffd83dbSDimitry Andric UnknownUse = true; 20485ffd83dbSDimitry Andric }); 20495ffd83dbSDimitry Andric 2050fe6060f1SDimitry Andric // Do not emit a remark if we haven't seen a __kmpc_parallel_51 20515ffd83dbSDimitry Andric // use. 2052fe6060f1SDimitry Andric if (!KernelParallelUse) 20535ffd83dbSDimitry Andric continue; 20545ffd83dbSDimitry Andric 20555ffd83dbSDimitry Andric // If this ever hits, we should investigate. 20565ffd83dbSDimitry Andric // TODO: Checking the number of uses is not a necessary restriction and 20575ffd83dbSDimitry Andric // should be lifted. 20585ffd83dbSDimitry Andric if (UnknownUse || NumDirectCalls != 1 || 2059fe6060f1SDimitry Andric ToBeReplacedStateMachineUses.size() > 2) { 2060fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) { 2061fe6060f1SDimitry Andric return ORA << "Parallel region is used in " 20625ffd83dbSDimitry Andric << (UnknownUse ? "unknown" : "unexpected") 2063fe6060f1SDimitry Andric << " ways. Will not attempt to rewrite the state machine."; 20645ffd83dbSDimitry Andric }; 2065fe6060f1SDimitry Andric emitRemark<OptimizationRemarkAnalysis>(F, "OMP101", Remark); 20665ffd83dbSDimitry Andric continue; 20675ffd83dbSDimitry Andric } 20685ffd83dbSDimitry Andric 2069fe6060f1SDimitry Andric // Even if we have __kmpc_parallel_51 calls, we (for now) give 20705ffd83dbSDimitry Andric // up if the function is not called from a unique kernel. 20715ffd83dbSDimitry Andric Kernel K = getUniqueKernelFor(*F); 20725ffd83dbSDimitry Andric if (!K) { 2073fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) { 2074fe6060f1SDimitry Andric return ORA << "Parallel region is not called from a unique kernel. " 2075fe6060f1SDimitry Andric "Will not attempt to rewrite the state machine."; 20765ffd83dbSDimitry Andric }; 2077fe6060f1SDimitry Andric emitRemark<OptimizationRemarkAnalysis>(F, "OMP102", Remark); 20785ffd83dbSDimitry Andric continue; 20795ffd83dbSDimitry Andric } 20805ffd83dbSDimitry Andric 20815ffd83dbSDimitry Andric // We now know F is a parallel body function called only from the kernel K. 20825ffd83dbSDimitry Andric // We also identified the state machine uses in which we replace the 20835ffd83dbSDimitry Andric // function pointer by a new global symbol for identification purposes. This 20845ffd83dbSDimitry Andric // ensures only direct calls to the function are left. 20855ffd83dbSDimitry Andric 20865ffd83dbSDimitry Andric Module &M = *F->getParent(); 20875ffd83dbSDimitry Andric Type *Int8Ty = Type::getInt8Ty(M.getContext()); 20885ffd83dbSDimitry Andric 20895ffd83dbSDimitry Andric auto *ID = new GlobalVariable( 20905ffd83dbSDimitry Andric M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage, 20915ffd83dbSDimitry Andric UndefValue::get(Int8Ty), F->getName() + ".ID"); 20925ffd83dbSDimitry Andric 20935ffd83dbSDimitry Andric for (Use *U : ToBeReplacedStateMachineUses) 20948c6f6c0cSDimitry Andric U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast( 20958c6f6c0cSDimitry Andric ID, U->get()->getType())); 20965ffd83dbSDimitry Andric 20975ffd83dbSDimitry Andric ++NumOpenMPParallelRegionsReplacedInGPUStateMachine; 20985ffd83dbSDimitry Andric 20995ffd83dbSDimitry Andric Changed = true; 21005ffd83dbSDimitry Andric } 21015ffd83dbSDimitry Andric 21025ffd83dbSDimitry Andric return Changed; 21035ffd83dbSDimitry Andric } 21045ffd83dbSDimitry Andric 21055ffd83dbSDimitry Andric /// Abstract Attribute for tracking ICV values. 21065ffd83dbSDimitry Andric struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> { 21075ffd83dbSDimitry Andric using Base = StateWrapper<BooleanState, AbstractAttribute>; 21085ffd83dbSDimitry Andric AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 21095ffd83dbSDimitry Andric 21105ffd83dbSDimitry Andric /// Returns true if value is assumed to be tracked. 21115ffd83dbSDimitry Andric bool isAssumedTracked() const { return getAssumed(); } 21125ffd83dbSDimitry Andric 21135ffd83dbSDimitry Andric /// Returns true if value is known to be tracked. 21145ffd83dbSDimitry Andric bool isKnownTracked() const { return getAssumed(); } 21155ffd83dbSDimitry Andric 21165ffd83dbSDimitry Andric /// Create an abstract attribute biew for the position \p IRP. 21175ffd83dbSDimitry Andric static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A); 21185ffd83dbSDimitry Andric 21195ffd83dbSDimitry Andric /// Return the value with which \p I can be replaced for specific \p ICV. 2120bdd1243dSDimitry Andric virtual std::optional<Value *> getReplacementValue(InternalControlVar ICV, 2121e8d8bef9SDimitry Andric const Instruction *I, 2122e8d8bef9SDimitry Andric Attributor &A) const { 2123bdd1243dSDimitry Andric return std::nullopt; 2124e8d8bef9SDimitry Andric } 2125e8d8bef9SDimitry Andric 2126e8d8bef9SDimitry Andric /// Return an assumed unique ICV value if a single candidate is found. If 2127bdd1243dSDimitry Andric /// there cannot be one, return a nullptr. If it is not clear yet, return 2128bdd1243dSDimitry Andric /// std::nullopt. 2129bdd1243dSDimitry Andric virtual std::optional<Value *> 2130e8d8bef9SDimitry Andric getUniqueReplacementValue(InternalControlVar ICV) const = 0; 2131e8d8bef9SDimitry Andric 2132e8d8bef9SDimitry Andric // Currently only nthreads is being tracked. 2133e8d8bef9SDimitry Andric // this array will only grow with time. 2134e8d8bef9SDimitry Andric InternalControlVar TrackableICVs[1] = {ICV_nthreads}; 21355ffd83dbSDimitry Andric 21365ffd83dbSDimitry Andric /// See AbstractAttribute::getName() 21375ffd83dbSDimitry Andric const std::string getName() const override { return "AAICVTracker"; } 21385ffd83dbSDimitry Andric 21395ffd83dbSDimitry Andric /// See AbstractAttribute::getIdAddr() 21405ffd83dbSDimitry Andric const char *getIdAddr() const override { return &ID; } 21415ffd83dbSDimitry Andric 21425ffd83dbSDimitry Andric /// This function should return true if the type of the \p AA is AAICVTracker 21435ffd83dbSDimitry Andric static bool classof(const AbstractAttribute *AA) { 21445ffd83dbSDimitry Andric return (AA->getIdAddr() == &ID); 21455ffd83dbSDimitry Andric } 21465ffd83dbSDimitry Andric 21475ffd83dbSDimitry Andric static const char ID; 21485ffd83dbSDimitry Andric }; 21495ffd83dbSDimitry Andric 21505ffd83dbSDimitry Andric struct AAICVTrackerFunction : public AAICVTracker { 21515ffd83dbSDimitry Andric AAICVTrackerFunction(const IRPosition &IRP, Attributor &A) 21525ffd83dbSDimitry Andric : AAICVTracker(IRP, A) {} 21535ffd83dbSDimitry Andric 21545ffd83dbSDimitry Andric // FIXME: come up with better string. 2155*06c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 2156*06c3fb27SDimitry Andric return "ICVTrackerFunction"; 2157*06c3fb27SDimitry Andric } 21585ffd83dbSDimitry Andric 21595ffd83dbSDimitry Andric // FIXME: come up with some stats. 21605ffd83dbSDimitry Andric void trackStatistics() const override {} 21615ffd83dbSDimitry Andric 2162e8d8bef9SDimitry Andric /// We don't manifest anything for this AA. 21635ffd83dbSDimitry Andric ChangeStatus manifest(Attributor &A) override { 2164e8d8bef9SDimitry Andric return ChangeStatus::UNCHANGED; 21655ffd83dbSDimitry Andric } 21665ffd83dbSDimitry Andric 21675ffd83dbSDimitry Andric // Map of ICV to their values at specific program point. 2168e8d8bef9SDimitry Andric EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar, 21695ffd83dbSDimitry Andric InternalControlVar::ICV___last> 2170e8d8bef9SDimitry Andric ICVReplacementValuesMap; 21715ffd83dbSDimitry Andric 21725ffd83dbSDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 21735ffd83dbSDimitry Andric ChangeStatus HasChanged = ChangeStatus::UNCHANGED; 21745ffd83dbSDimitry Andric 21755ffd83dbSDimitry Andric Function *F = getAnchorScope(); 21765ffd83dbSDimitry Andric 21775ffd83dbSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 21785ffd83dbSDimitry Andric 21795ffd83dbSDimitry Andric for (InternalControlVar ICV : TrackableICVs) { 21805ffd83dbSDimitry Andric auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; 21815ffd83dbSDimitry Andric 2182e8d8bef9SDimitry Andric auto &ValuesMap = ICVReplacementValuesMap[ICV]; 21835ffd83dbSDimitry Andric auto TrackValues = [&](Use &U, Function &) { 21845ffd83dbSDimitry Andric CallInst *CI = OpenMPOpt::getCallIfRegularCall(U); 21855ffd83dbSDimitry Andric if (!CI) 21865ffd83dbSDimitry Andric return false; 21875ffd83dbSDimitry Andric 21885ffd83dbSDimitry Andric // FIXME: handle setters with more that 1 arguments. 21895ffd83dbSDimitry Andric /// Track new value. 2190e8d8bef9SDimitry Andric if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second) 21915ffd83dbSDimitry Andric HasChanged = ChangeStatus::CHANGED; 21925ffd83dbSDimitry Andric 21935ffd83dbSDimitry Andric return false; 21945ffd83dbSDimitry Andric }; 21955ffd83dbSDimitry Andric 2196e8d8bef9SDimitry Andric auto CallCheck = [&](Instruction &I) { 2197bdd1243dSDimitry Andric std::optional<Value *> ReplVal = getValueForCall(A, I, ICV); 219881ad6265SDimitry Andric if (ReplVal && ValuesMap.insert(std::make_pair(&I, *ReplVal)).second) 2199e8d8bef9SDimitry Andric HasChanged = ChangeStatus::CHANGED; 2200e8d8bef9SDimitry Andric 2201e8d8bef9SDimitry Andric return true; 2202e8d8bef9SDimitry Andric }; 2203e8d8bef9SDimitry Andric 2204e8d8bef9SDimitry Andric // Track all changes of an ICV. 22055ffd83dbSDimitry Andric SetterRFI.foreachUse(TrackValues, F); 2206e8d8bef9SDimitry Andric 2207fe6060f1SDimitry Andric bool UsedAssumedInformation = false; 2208e8d8bef9SDimitry Andric A.checkForAllInstructions(CallCheck, *this, {Instruction::Call}, 2209fe6060f1SDimitry Andric UsedAssumedInformation, 2210e8d8bef9SDimitry Andric /* CheckBBLivenessOnly */ true); 2211e8d8bef9SDimitry Andric 2212e8d8bef9SDimitry Andric /// TODO: Figure out a way to avoid adding entry in 2213e8d8bef9SDimitry Andric /// ICVReplacementValuesMap 2214e8d8bef9SDimitry Andric Instruction *Entry = &F->getEntryBlock().front(); 2215e8d8bef9SDimitry Andric if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry)) 2216e8d8bef9SDimitry Andric ValuesMap.insert(std::make_pair(Entry, nullptr)); 22175ffd83dbSDimitry Andric } 22185ffd83dbSDimitry Andric 22195ffd83dbSDimitry Andric return HasChanged; 22205ffd83dbSDimitry Andric } 22215ffd83dbSDimitry Andric 222204eeddc0SDimitry Andric /// Helper to check if \p I is a call and get the value for it if it is 2223e8d8bef9SDimitry Andric /// unique. 2224bdd1243dSDimitry Andric std::optional<Value *> getValueForCall(Attributor &A, const Instruction &I, 2225e8d8bef9SDimitry Andric InternalControlVar &ICV) const { 22265ffd83dbSDimitry Andric 222704eeddc0SDimitry Andric const auto *CB = dyn_cast<CallBase>(&I); 2228e8d8bef9SDimitry Andric if (!CB || CB->hasFnAttr("no_openmp") || 2229e8d8bef9SDimitry Andric CB->hasFnAttr("no_openmp_routines")) 2230bdd1243dSDimitry Andric return std::nullopt; 2231e8d8bef9SDimitry Andric 22325ffd83dbSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 22335ffd83dbSDimitry Andric auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter]; 2234e8d8bef9SDimitry Andric auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; 2235e8d8bef9SDimitry Andric Function *CalledFunction = CB->getCalledFunction(); 22365ffd83dbSDimitry Andric 2237e8d8bef9SDimitry Andric // Indirect call, assume ICV changes. 2238e8d8bef9SDimitry Andric if (CalledFunction == nullptr) 2239e8d8bef9SDimitry Andric return nullptr; 2240e8d8bef9SDimitry Andric if (CalledFunction == GetterRFI.Declaration) 2241bdd1243dSDimitry Andric return std::nullopt; 2242e8d8bef9SDimitry Andric if (CalledFunction == SetterRFI.Declaration) { 224304eeddc0SDimitry Andric if (ICVReplacementValuesMap[ICV].count(&I)) 224404eeddc0SDimitry Andric return ICVReplacementValuesMap[ICV].lookup(&I); 2245e8d8bef9SDimitry Andric 2246e8d8bef9SDimitry Andric return nullptr; 2247e8d8bef9SDimitry Andric } 2248e8d8bef9SDimitry Andric 2249e8d8bef9SDimitry Andric // Since we don't know, assume it changes the ICV. 2250e8d8bef9SDimitry Andric if (CalledFunction->isDeclaration()) 2251e8d8bef9SDimitry Andric return nullptr; 2252e8d8bef9SDimitry Andric 2253*06c3fb27SDimitry Andric const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>( 2254fe6060f1SDimitry Andric *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED); 2255e8d8bef9SDimitry Andric 2256*06c3fb27SDimitry Andric if (ICVTrackingAA->isAssumedTracked()) { 2257*06c3fb27SDimitry Andric std::optional<Value *> URV = 2258*06c3fb27SDimitry Andric ICVTrackingAA->getUniqueReplacementValue(ICV); 225981ad6265SDimitry Andric if (!URV || (*URV && AA::isValidAtPosition(AA::ValueAndContext(**URV, I), 226081ad6265SDimitry Andric OMPInfoCache))) 226104eeddc0SDimitry Andric return URV; 226204eeddc0SDimitry Andric } 2263e8d8bef9SDimitry Andric 2264e8d8bef9SDimitry Andric // If we don't know, assume it changes. 2265e8d8bef9SDimitry Andric return nullptr; 2266e8d8bef9SDimitry Andric } 2267e8d8bef9SDimitry Andric 2268bdd1243dSDimitry Andric // We don't check unique value for a function, so return std::nullopt. 2269bdd1243dSDimitry Andric std::optional<Value *> 2270e8d8bef9SDimitry Andric getUniqueReplacementValue(InternalControlVar ICV) const override { 2271bdd1243dSDimitry Andric return std::nullopt; 2272e8d8bef9SDimitry Andric } 2273e8d8bef9SDimitry Andric 2274e8d8bef9SDimitry Andric /// Return the value with which \p I can be replaced for specific \p ICV. 2275bdd1243dSDimitry Andric std::optional<Value *> getReplacementValue(InternalControlVar ICV, 2276e8d8bef9SDimitry Andric const Instruction *I, 2277e8d8bef9SDimitry Andric Attributor &A) const override { 2278e8d8bef9SDimitry Andric const auto &ValuesMap = ICVReplacementValuesMap[ICV]; 2279e8d8bef9SDimitry Andric if (ValuesMap.count(I)) 2280e8d8bef9SDimitry Andric return ValuesMap.lookup(I); 2281e8d8bef9SDimitry Andric 2282e8d8bef9SDimitry Andric SmallVector<const Instruction *, 16> Worklist; 2283e8d8bef9SDimitry Andric SmallPtrSet<const Instruction *, 16> Visited; 2284e8d8bef9SDimitry Andric Worklist.push_back(I); 2285e8d8bef9SDimitry Andric 2286bdd1243dSDimitry Andric std::optional<Value *> ReplVal; 2287e8d8bef9SDimitry Andric 2288e8d8bef9SDimitry Andric while (!Worklist.empty()) { 2289e8d8bef9SDimitry Andric const Instruction *CurrInst = Worklist.pop_back_val(); 2290e8d8bef9SDimitry Andric if (!Visited.insert(CurrInst).second) 22915ffd83dbSDimitry Andric continue; 22925ffd83dbSDimitry Andric 2293e8d8bef9SDimitry Andric const BasicBlock *CurrBB = CurrInst->getParent(); 2294e8d8bef9SDimitry Andric 2295e8d8bef9SDimitry Andric // Go up and look for all potential setters/calls that might change the 2296e8d8bef9SDimitry Andric // ICV. 2297e8d8bef9SDimitry Andric while ((CurrInst = CurrInst->getPrevNode())) { 2298e8d8bef9SDimitry Andric if (ValuesMap.count(CurrInst)) { 2299bdd1243dSDimitry Andric std::optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst); 2300e8d8bef9SDimitry Andric // Unknown value, track new. 230181ad6265SDimitry Andric if (!ReplVal) { 2302e8d8bef9SDimitry Andric ReplVal = NewReplVal; 2303e8d8bef9SDimitry Andric break; 2304e8d8bef9SDimitry Andric } 2305e8d8bef9SDimitry Andric 2306e8d8bef9SDimitry Andric // If we found a new value, we can't know the icv value anymore. 230781ad6265SDimitry Andric if (NewReplVal) 2308e8d8bef9SDimitry Andric if (ReplVal != NewReplVal) 23095ffd83dbSDimitry Andric return nullptr; 23105ffd83dbSDimitry Andric 2311e8d8bef9SDimitry Andric break; 23125ffd83dbSDimitry Andric } 23135ffd83dbSDimitry Andric 2314bdd1243dSDimitry Andric std::optional<Value *> NewReplVal = getValueForCall(A, *CurrInst, ICV); 231581ad6265SDimitry Andric if (!NewReplVal) 2316e8d8bef9SDimitry Andric continue; 2317e8d8bef9SDimitry Andric 2318e8d8bef9SDimitry Andric // Unknown value, track new. 231981ad6265SDimitry Andric if (!ReplVal) { 2320e8d8bef9SDimitry Andric ReplVal = NewReplVal; 2321e8d8bef9SDimitry Andric break; 23225ffd83dbSDimitry Andric } 23235ffd83dbSDimitry Andric 2324e8d8bef9SDimitry Andric // if (NewReplVal.hasValue()) 2325e8d8bef9SDimitry Andric // We found a new value, we can't know the icv value anymore. 2326e8d8bef9SDimitry Andric if (ReplVal != NewReplVal) 23275ffd83dbSDimitry Andric return nullptr; 23285ffd83dbSDimitry Andric } 2329e8d8bef9SDimitry Andric 2330e8d8bef9SDimitry Andric // If we are in the same BB and we have a value, we are done. 233181ad6265SDimitry Andric if (CurrBB == I->getParent() && ReplVal) 2332e8d8bef9SDimitry Andric return ReplVal; 2333e8d8bef9SDimitry Andric 2334e8d8bef9SDimitry Andric // Go through all predecessors and add terminators for analysis. 2335e8d8bef9SDimitry Andric for (const BasicBlock *Pred : predecessors(CurrBB)) 2336e8d8bef9SDimitry Andric if (const Instruction *Terminator = Pred->getTerminator()) 2337e8d8bef9SDimitry Andric Worklist.push_back(Terminator); 2338e8d8bef9SDimitry Andric } 2339e8d8bef9SDimitry Andric 2340e8d8bef9SDimitry Andric return ReplVal; 2341e8d8bef9SDimitry Andric } 2342e8d8bef9SDimitry Andric }; 2343e8d8bef9SDimitry Andric 2344e8d8bef9SDimitry Andric struct AAICVTrackerFunctionReturned : AAICVTracker { 2345e8d8bef9SDimitry Andric AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A) 2346e8d8bef9SDimitry Andric : AAICVTracker(IRP, A) {} 2347e8d8bef9SDimitry Andric 2348e8d8bef9SDimitry Andric // FIXME: come up with better string. 2349*06c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 2350e8d8bef9SDimitry Andric return "ICVTrackerFunctionReturned"; 2351e8d8bef9SDimitry Andric } 2352e8d8bef9SDimitry Andric 2353e8d8bef9SDimitry Andric // FIXME: come up with some stats. 2354e8d8bef9SDimitry Andric void trackStatistics() const override {} 2355e8d8bef9SDimitry Andric 2356e8d8bef9SDimitry Andric /// We don't manifest anything for this AA. 2357e8d8bef9SDimitry Andric ChangeStatus manifest(Attributor &A) override { 2358e8d8bef9SDimitry Andric return ChangeStatus::UNCHANGED; 2359e8d8bef9SDimitry Andric } 2360e8d8bef9SDimitry Andric 2361e8d8bef9SDimitry Andric // Map of ICV to their values at specific program point. 2362bdd1243dSDimitry Andric EnumeratedArray<std::optional<Value *>, InternalControlVar, 2363e8d8bef9SDimitry Andric InternalControlVar::ICV___last> 2364e8d8bef9SDimitry Andric ICVReplacementValuesMap; 2365e8d8bef9SDimitry Andric 2366e8d8bef9SDimitry Andric /// Return the value with which \p I can be replaced for specific \p ICV. 2367bdd1243dSDimitry Andric std::optional<Value *> 2368e8d8bef9SDimitry Andric getUniqueReplacementValue(InternalControlVar ICV) const override { 2369e8d8bef9SDimitry Andric return ICVReplacementValuesMap[ICV]; 2370e8d8bef9SDimitry Andric } 2371e8d8bef9SDimitry Andric 2372e8d8bef9SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 2373e8d8bef9SDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED; 2374*06c3fb27SDimitry Andric const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>( 2375fe6060f1SDimitry Andric *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); 2376e8d8bef9SDimitry Andric 2377*06c3fb27SDimitry Andric if (!ICVTrackingAA->isAssumedTracked()) 2378e8d8bef9SDimitry Andric return indicatePessimisticFixpoint(); 2379e8d8bef9SDimitry Andric 2380e8d8bef9SDimitry Andric for (InternalControlVar ICV : TrackableICVs) { 2381bdd1243dSDimitry Andric std::optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; 2382bdd1243dSDimitry Andric std::optional<Value *> UniqueICVValue; 2383e8d8bef9SDimitry Andric 2384e8d8bef9SDimitry Andric auto CheckReturnInst = [&](Instruction &I) { 2385bdd1243dSDimitry Andric std::optional<Value *> NewReplVal = 2386*06c3fb27SDimitry Andric ICVTrackingAA->getReplacementValue(ICV, &I, A); 2387e8d8bef9SDimitry Andric 2388e8d8bef9SDimitry Andric // If we found a second ICV value there is no unique returned value. 238981ad6265SDimitry Andric if (UniqueICVValue && UniqueICVValue != NewReplVal) 2390e8d8bef9SDimitry Andric return false; 2391e8d8bef9SDimitry Andric 2392e8d8bef9SDimitry Andric UniqueICVValue = NewReplVal; 2393e8d8bef9SDimitry Andric 2394e8d8bef9SDimitry Andric return true; 2395e8d8bef9SDimitry Andric }; 2396e8d8bef9SDimitry Andric 2397fe6060f1SDimitry Andric bool UsedAssumedInformation = false; 2398e8d8bef9SDimitry Andric if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret}, 2399fe6060f1SDimitry Andric UsedAssumedInformation, 2400e8d8bef9SDimitry Andric /* CheckBBLivenessOnly */ true)) 2401e8d8bef9SDimitry Andric UniqueICVValue = nullptr; 2402e8d8bef9SDimitry Andric 2403e8d8bef9SDimitry Andric if (UniqueICVValue == ReplVal) 2404e8d8bef9SDimitry Andric continue; 2405e8d8bef9SDimitry Andric 2406e8d8bef9SDimitry Andric ReplVal = UniqueICVValue; 2407e8d8bef9SDimitry Andric Changed = ChangeStatus::CHANGED; 2408e8d8bef9SDimitry Andric } 2409e8d8bef9SDimitry Andric 2410e8d8bef9SDimitry Andric return Changed; 2411e8d8bef9SDimitry Andric } 2412e8d8bef9SDimitry Andric }; 2413e8d8bef9SDimitry Andric 2414e8d8bef9SDimitry Andric struct AAICVTrackerCallSite : AAICVTracker { 2415e8d8bef9SDimitry Andric AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A) 2416e8d8bef9SDimitry Andric : AAICVTracker(IRP, A) {} 2417e8d8bef9SDimitry Andric 2418e8d8bef9SDimitry Andric void initialize(Attributor &A) override { 2419*06c3fb27SDimitry Andric assert(getAnchorScope() && "Expected anchor function"); 2420e8d8bef9SDimitry Andric 2421e8d8bef9SDimitry Andric // We only initialize this AA for getters, so we need to know which ICV it 2422e8d8bef9SDimitry Andric // gets. 2423e8d8bef9SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 2424e8d8bef9SDimitry Andric for (InternalControlVar ICV : TrackableICVs) { 2425e8d8bef9SDimitry Andric auto ICVInfo = OMPInfoCache.ICVs[ICV]; 2426e8d8bef9SDimitry Andric auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter]; 2427e8d8bef9SDimitry Andric if (Getter.Declaration == getAssociatedFunction()) { 2428e8d8bef9SDimitry Andric AssociatedICV = ICVInfo.Kind; 2429e8d8bef9SDimitry Andric return; 2430e8d8bef9SDimitry Andric } 2431e8d8bef9SDimitry Andric } 2432e8d8bef9SDimitry Andric 2433e8d8bef9SDimitry Andric /// Unknown ICV. 2434e8d8bef9SDimitry Andric indicatePessimisticFixpoint(); 2435e8d8bef9SDimitry Andric } 2436e8d8bef9SDimitry Andric 2437e8d8bef9SDimitry Andric ChangeStatus manifest(Attributor &A) override { 243881ad6265SDimitry Andric if (!ReplVal || !*ReplVal) 2439e8d8bef9SDimitry Andric return ChangeStatus::UNCHANGED; 2440e8d8bef9SDimitry Andric 244181ad6265SDimitry Andric A.changeAfterManifest(IRPosition::inst(*getCtxI()), **ReplVal); 2442e8d8bef9SDimitry Andric A.deleteAfterManifest(*getCtxI()); 2443e8d8bef9SDimitry Andric 2444e8d8bef9SDimitry Andric return ChangeStatus::CHANGED; 2445e8d8bef9SDimitry Andric } 2446e8d8bef9SDimitry Andric 2447e8d8bef9SDimitry Andric // FIXME: come up with better string. 2448*06c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 2449*06c3fb27SDimitry Andric return "ICVTrackerCallSite"; 2450*06c3fb27SDimitry Andric } 2451e8d8bef9SDimitry Andric 2452e8d8bef9SDimitry Andric // FIXME: come up with some stats. 2453e8d8bef9SDimitry Andric void trackStatistics() const override {} 2454e8d8bef9SDimitry Andric 2455e8d8bef9SDimitry Andric InternalControlVar AssociatedICV; 2456bdd1243dSDimitry Andric std::optional<Value *> ReplVal; 2457e8d8bef9SDimitry Andric 2458e8d8bef9SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 2459*06c3fb27SDimitry Andric const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>( 2460fe6060f1SDimitry Andric *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); 2461e8d8bef9SDimitry Andric 2462e8d8bef9SDimitry Andric // We don't have any information, so we assume it changes the ICV. 2463*06c3fb27SDimitry Andric if (!ICVTrackingAA->isAssumedTracked()) 2464e8d8bef9SDimitry Andric return indicatePessimisticFixpoint(); 2465e8d8bef9SDimitry Andric 2466bdd1243dSDimitry Andric std::optional<Value *> NewReplVal = 2467*06c3fb27SDimitry Andric ICVTrackingAA->getReplacementValue(AssociatedICV, getCtxI(), A); 2468e8d8bef9SDimitry Andric 2469e8d8bef9SDimitry Andric if (ReplVal == NewReplVal) 2470e8d8bef9SDimitry Andric return ChangeStatus::UNCHANGED; 2471e8d8bef9SDimitry Andric 2472e8d8bef9SDimitry Andric ReplVal = NewReplVal; 2473e8d8bef9SDimitry Andric return ChangeStatus::CHANGED; 2474e8d8bef9SDimitry Andric } 2475e8d8bef9SDimitry Andric 2476e8d8bef9SDimitry Andric // Return the value with which associated value can be replaced for specific 2477e8d8bef9SDimitry Andric // \p ICV. 2478bdd1243dSDimitry Andric std::optional<Value *> 2479e8d8bef9SDimitry Andric getUniqueReplacementValue(InternalControlVar ICV) const override { 2480e8d8bef9SDimitry Andric return ReplVal; 2481e8d8bef9SDimitry Andric } 2482e8d8bef9SDimitry Andric }; 2483e8d8bef9SDimitry Andric 2484e8d8bef9SDimitry Andric struct AAICVTrackerCallSiteReturned : AAICVTracker { 2485e8d8bef9SDimitry Andric AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A) 2486e8d8bef9SDimitry Andric : AAICVTracker(IRP, A) {} 2487e8d8bef9SDimitry Andric 2488e8d8bef9SDimitry Andric // FIXME: come up with better string. 2489*06c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 2490e8d8bef9SDimitry Andric return "ICVTrackerCallSiteReturned"; 2491e8d8bef9SDimitry Andric } 2492e8d8bef9SDimitry Andric 2493e8d8bef9SDimitry Andric // FIXME: come up with some stats. 2494e8d8bef9SDimitry Andric void trackStatistics() const override {} 2495e8d8bef9SDimitry Andric 2496e8d8bef9SDimitry Andric /// We don't manifest anything for this AA. 2497e8d8bef9SDimitry Andric ChangeStatus manifest(Attributor &A) override { 2498e8d8bef9SDimitry Andric return ChangeStatus::UNCHANGED; 2499e8d8bef9SDimitry Andric } 2500e8d8bef9SDimitry Andric 2501e8d8bef9SDimitry Andric // Map of ICV to their values at specific program point. 2502bdd1243dSDimitry Andric EnumeratedArray<std::optional<Value *>, InternalControlVar, 2503e8d8bef9SDimitry Andric InternalControlVar::ICV___last> 2504e8d8bef9SDimitry Andric ICVReplacementValuesMap; 2505e8d8bef9SDimitry Andric 2506e8d8bef9SDimitry Andric /// Return the value with which associated value can be replaced for specific 2507e8d8bef9SDimitry Andric /// \p ICV. 2508bdd1243dSDimitry Andric std::optional<Value *> 2509e8d8bef9SDimitry Andric getUniqueReplacementValue(InternalControlVar ICV) const override { 2510e8d8bef9SDimitry Andric return ICVReplacementValuesMap[ICV]; 2511e8d8bef9SDimitry Andric } 2512e8d8bef9SDimitry Andric 2513e8d8bef9SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 2514e8d8bef9SDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED; 2515*06c3fb27SDimitry Andric const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>( 2516fe6060f1SDimitry Andric *this, IRPosition::returned(*getAssociatedFunction()), 2517fe6060f1SDimitry Andric DepClassTy::REQUIRED); 2518e8d8bef9SDimitry Andric 2519e8d8bef9SDimitry Andric // We don't have any information, so we assume it changes the ICV. 2520*06c3fb27SDimitry Andric if (!ICVTrackingAA->isAssumedTracked()) 2521e8d8bef9SDimitry Andric return indicatePessimisticFixpoint(); 2522e8d8bef9SDimitry Andric 2523e8d8bef9SDimitry Andric for (InternalControlVar ICV : TrackableICVs) { 2524bdd1243dSDimitry Andric std::optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; 2525bdd1243dSDimitry Andric std::optional<Value *> NewReplVal = 2526*06c3fb27SDimitry Andric ICVTrackingAA->getUniqueReplacementValue(ICV); 2527e8d8bef9SDimitry Andric 2528e8d8bef9SDimitry Andric if (ReplVal == NewReplVal) 2529e8d8bef9SDimitry Andric continue; 2530e8d8bef9SDimitry Andric 2531e8d8bef9SDimitry Andric ReplVal = NewReplVal; 2532e8d8bef9SDimitry Andric Changed = ChangeStatus::CHANGED; 2533e8d8bef9SDimitry Andric } 2534e8d8bef9SDimitry Andric return Changed; 2535e8d8bef9SDimitry Andric } 25365ffd83dbSDimitry Andric }; 2537fe6060f1SDimitry Andric 2538fe6060f1SDimitry Andric struct AAExecutionDomainFunction : public AAExecutionDomain { 2539fe6060f1SDimitry Andric AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A) 2540fe6060f1SDimitry Andric : AAExecutionDomain(IRP, A) {} 2541fe6060f1SDimitry Andric 2542*06c3fb27SDimitry Andric ~AAExecutionDomainFunction() { delete RPOT; } 2543bdd1243dSDimitry Andric 2544bdd1243dSDimitry Andric void initialize(Attributor &A) override { 2545*06c3fb27SDimitry Andric Function *F = getAnchorScope(); 2546*06c3fb27SDimitry Andric assert(F && "Expected anchor function"); 2547*06c3fb27SDimitry Andric RPOT = new ReversePostOrderTraversal<Function *>(F); 2548bdd1243dSDimitry Andric } 2549bdd1243dSDimitry Andric 2550*06c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 2551*06c3fb27SDimitry Andric unsigned TotalBlocks = 0, InitialThreadBlocks = 0, AlignedBlocks = 0; 2552bdd1243dSDimitry Andric for (auto &It : BEDMap) { 2553*06c3fb27SDimitry Andric if (!It.getFirst()) 2554*06c3fb27SDimitry Andric continue; 2555bdd1243dSDimitry Andric TotalBlocks++; 2556bdd1243dSDimitry Andric InitialThreadBlocks += It.getSecond().IsExecutedByInitialThreadOnly; 2557*06c3fb27SDimitry Andric AlignedBlocks += It.getSecond().IsReachedFromAlignedBarrierOnly && 2558*06c3fb27SDimitry Andric It.getSecond().IsReachingAlignedBarrierOnly; 2559bdd1243dSDimitry Andric } 2560bdd1243dSDimitry Andric return "[AAExecutionDomain] " + std::to_string(InitialThreadBlocks) + "/" + 2561*06c3fb27SDimitry Andric std::to_string(AlignedBlocks) + " of " + 2562*06c3fb27SDimitry Andric std::to_string(TotalBlocks) + 2563*06c3fb27SDimitry Andric " executed by initial thread / aligned"; 2564fe6060f1SDimitry Andric } 2565fe6060f1SDimitry Andric 2566fe6060f1SDimitry Andric /// See AbstractAttribute::trackStatistics(). 2567fe6060f1SDimitry Andric void trackStatistics() const override {} 2568fe6060f1SDimitry Andric 2569fe6060f1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 2570fe6060f1SDimitry Andric LLVM_DEBUG({ 2571bdd1243dSDimitry Andric for (const BasicBlock &BB : *getAnchorScope()) { 2572bdd1243dSDimitry Andric if (!isExecutedByInitialThreadOnly(BB)) 2573bdd1243dSDimitry Andric continue; 2574fe6060f1SDimitry Andric dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " " 2575bdd1243dSDimitry Andric << BB.getName() << " is executed by a single thread.\n"; 2576bdd1243dSDimitry Andric } 2577fe6060f1SDimitry Andric }); 2578bdd1243dSDimitry Andric 2579bdd1243dSDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED; 2580bdd1243dSDimitry Andric 2581bdd1243dSDimitry Andric if (DisableOpenMPOptBarrierElimination) 2582bdd1243dSDimitry Andric return Changed; 2583bdd1243dSDimitry Andric 2584bdd1243dSDimitry Andric SmallPtrSet<CallBase *, 16> DeletedBarriers; 2585bdd1243dSDimitry Andric auto HandleAlignedBarrier = [&](CallBase *CB) { 2586*06c3fb27SDimitry Andric const ExecutionDomainTy &ED = CB ? CEDMap[{CB, PRE}] : BEDMap[nullptr]; 2587bdd1243dSDimitry Andric if (!ED.IsReachedFromAlignedBarrierOnly || 2588bdd1243dSDimitry Andric ED.EncounteredNonLocalSideEffect) 2589bdd1243dSDimitry Andric return; 2590bdd1243dSDimitry Andric 2591bdd1243dSDimitry Andric // We can remove this barrier, if it is one, or all aligned barriers 2592bdd1243dSDimitry Andric // reaching the kernel end. In the latter case we can transitively work 2593bdd1243dSDimitry Andric // our way back until we find a barrier that guards a side-effect if we 2594bdd1243dSDimitry Andric // are dealing with the kernel end here. 2595bdd1243dSDimitry Andric if (CB) { 2596bdd1243dSDimitry Andric DeletedBarriers.insert(CB); 2597bdd1243dSDimitry Andric A.deleteAfterManifest(*CB); 2598bdd1243dSDimitry Andric ++NumBarriersEliminated; 2599bdd1243dSDimitry Andric Changed = ChangeStatus::CHANGED; 2600bdd1243dSDimitry Andric } else if (!ED.AlignedBarriers.empty()) { 2601bdd1243dSDimitry Andric NumBarriersEliminated += ED.AlignedBarriers.size(); 2602bdd1243dSDimitry Andric Changed = ChangeStatus::CHANGED; 2603bdd1243dSDimitry Andric SmallVector<CallBase *> Worklist(ED.AlignedBarriers.begin(), 2604bdd1243dSDimitry Andric ED.AlignedBarriers.end()); 2605bdd1243dSDimitry Andric SmallSetVector<CallBase *, 16> Visited; 2606bdd1243dSDimitry Andric while (!Worklist.empty()) { 2607bdd1243dSDimitry Andric CallBase *LastCB = Worklist.pop_back_val(); 2608bdd1243dSDimitry Andric if (!Visited.insert(LastCB)) 2609bdd1243dSDimitry Andric continue; 2610*06c3fb27SDimitry Andric if (LastCB->getFunction() != getAnchorScope()) 2611*06c3fb27SDimitry Andric continue; 2612bdd1243dSDimitry Andric if (!DeletedBarriers.count(LastCB)) { 2613bdd1243dSDimitry Andric A.deleteAfterManifest(*LastCB); 2614bdd1243dSDimitry Andric continue; 2615bdd1243dSDimitry Andric } 2616bdd1243dSDimitry Andric // The final aligned barrier (LastCB) reaching the kernel end was 2617bdd1243dSDimitry Andric // removed already. This means we can go one step further and remove 2618bdd1243dSDimitry Andric // the barriers encoutered last before (LastCB). 2619*06c3fb27SDimitry Andric const ExecutionDomainTy &LastED = CEDMap[{LastCB, PRE}]; 2620bdd1243dSDimitry Andric Worklist.append(LastED.AlignedBarriers.begin(), 2621bdd1243dSDimitry Andric LastED.AlignedBarriers.end()); 2622bdd1243dSDimitry Andric } 2623fe6060f1SDimitry Andric } 2624fe6060f1SDimitry Andric 2625bdd1243dSDimitry Andric // If we actually eliminated a barrier we need to eliminate the associated 2626bdd1243dSDimitry Andric // llvm.assumes as well to avoid creating UB. 2627bdd1243dSDimitry Andric if (!ED.EncounteredAssumes.empty() && (CB || !ED.AlignedBarriers.empty())) 2628bdd1243dSDimitry Andric for (auto *AssumeCB : ED.EncounteredAssumes) 2629bdd1243dSDimitry Andric A.deleteAfterManifest(*AssumeCB); 2630fe6060f1SDimitry Andric }; 2631fe6060f1SDimitry Andric 2632bdd1243dSDimitry Andric for (auto *CB : AlignedBarriers) 2633bdd1243dSDimitry Andric HandleAlignedBarrier(CB); 2634fe6060f1SDimitry Andric 2635bdd1243dSDimitry Andric // Handle the "kernel end barrier" for kernels too. 2636*06c3fb27SDimitry Andric if (omp::isKernel(*getAnchorScope())) 2637bdd1243dSDimitry Andric HandleAlignedBarrier(nullptr); 2638bdd1243dSDimitry Andric 2639bdd1243dSDimitry Andric return Changed; 2640bdd1243dSDimitry Andric } 2641bdd1243dSDimitry Andric 2642*06c3fb27SDimitry Andric bool isNoOpFence(const FenceInst &FI) const override { 2643*06c3fb27SDimitry Andric return getState().isValidState() && !NonNoOpFences.count(&FI); 2644*06c3fb27SDimitry Andric } 2645*06c3fb27SDimitry Andric 2646bdd1243dSDimitry Andric /// Merge barrier and assumption information from \p PredED into the successor 2647bdd1243dSDimitry Andric /// \p ED. 2648bdd1243dSDimitry Andric void 2649bdd1243dSDimitry Andric mergeInPredecessorBarriersAndAssumptions(Attributor &A, ExecutionDomainTy &ED, 2650bdd1243dSDimitry Andric const ExecutionDomainTy &PredED); 2651bdd1243dSDimitry Andric 2652bdd1243dSDimitry Andric /// Merge all information from \p PredED into the successor \p ED. If 2653bdd1243dSDimitry Andric /// \p InitialEdgeOnly is set, only the initial edge will enter the block 2654bdd1243dSDimitry Andric /// represented by \p ED from this predecessor. 2655*06c3fb27SDimitry Andric bool mergeInPredecessor(Attributor &A, ExecutionDomainTy &ED, 2656bdd1243dSDimitry Andric const ExecutionDomainTy &PredED, 2657bdd1243dSDimitry Andric bool InitialEdgeOnly = false); 2658bdd1243dSDimitry Andric 2659bdd1243dSDimitry Andric /// Accumulate information for the entry block in \p EntryBBED. 2660*06c3fb27SDimitry Andric bool handleCallees(Attributor &A, ExecutionDomainTy &EntryBBED); 2661bdd1243dSDimitry Andric 2662bdd1243dSDimitry Andric /// See AbstractAttribute::updateImpl. 2663bdd1243dSDimitry Andric ChangeStatus updateImpl(Attributor &A) override; 2664bdd1243dSDimitry Andric 2665bdd1243dSDimitry Andric /// Query interface, see AAExecutionDomain 2666bdd1243dSDimitry Andric ///{ 2667bdd1243dSDimitry Andric bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override { 2668bdd1243dSDimitry Andric if (!isValidState()) 2669bdd1243dSDimitry Andric return false; 2670*06c3fb27SDimitry Andric assert(BB.getParent() == getAnchorScope() && "Block is out of scope!"); 2671bdd1243dSDimitry Andric return BEDMap.lookup(&BB).IsExecutedByInitialThreadOnly; 2672bdd1243dSDimitry Andric } 2673bdd1243dSDimitry Andric 2674bdd1243dSDimitry Andric bool isExecutedInAlignedRegion(Attributor &A, 2675bdd1243dSDimitry Andric const Instruction &I) const override { 26761ac55f4cSDimitry Andric assert(I.getFunction() == getAnchorScope() && 26771ac55f4cSDimitry Andric "Instruction is out of scope!"); 26781ac55f4cSDimitry Andric if (!isValidState()) 2679bdd1243dSDimitry Andric return false; 2680bdd1243dSDimitry Andric 2681*06c3fb27SDimitry Andric bool ForwardIsOk = true; 2682bdd1243dSDimitry Andric const Instruction *CurI; 2683bdd1243dSDimitry Andric 2684bdd1243dSDimitry Andric // Check forward until a call or the block end is reached. 2685bdd1243dSDimitry Andric CurI = &I; 2686bdd1243dSDimitry Andric do { 2687bdd1243dSDimitry Andric auto *CB = dyn_cast<CallBase>(CurI); 2688bdd1243dSDimitry Andric if (!CB) 2689bdd1243dSDimitry Andric continue; 2690*06c3fb27SDimitry Andric if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) 2691*06c3fb27SDimitry Andric return true; 2692*06c3fb27SDimitry Andric const auto &It = CEDMap.find({CB, PRE}); 2693bdd1243dSDimitry Andric if (It == CEDMap.end()) 2694bdd1243dSDimitry Andric continue; 26951ac55f4cSDimitry Andric if (!It->getSecond().IsReachingAlignedBarrierOnly) 2696*06c3fb27SDimitry Andric ForwardIsOk = false; 26971ac55f4cSDimitry Andric break; 2698bdd1243dSDimitry Andric } while ((CurI = CurI->getNextNonDebugInstruction())); 2699bdd1243dSDimitry Andric 27001ac55f4cSDimitry Andric if (!CurI && !BEDMap.lookup(I.getParent()).IsReachingAlignedBarrierOnly) 2701*06c3fb27SDimitry Andric ForwardIsOk = false; 2702bdd1243dSDimitry Andric 2703bdd1243dSDimitry Andric // Check backward until a call or the block beginning is reached. 2704bdd1243dSDimitry Andric CurI = &I; 2705bdd1243dSDimitry Andric do { 2706bdd1243dSDimitry Andric auto *CB = dyn_cast<CallBase>(CurI); 2707bdd1243dSDimitry Andric if (!CB) 2708bdd1243dSDimitry Andric continue; 2709*06c3fb27SDimitry Andric if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) 2710*06c3fb27SDimitry Andric return true; 2711*06c3fb27SDimitry Andric const auto &It = CEDMap.find({CB, POST}); 2712bdd1243dSDimitry Andric if (It == CEDMap.end()) 2713bdd1243dSDimitry Andric continue; 2714*06c3fb27SDimitry Andric if (It->getSecond().IsReachedFromAlignedBarrierOnly) 2715bdd1243dSDimitry Andric break; 2716bdd1243dSDimitry Andric return false; 2717bdd1243dSDimitry Andric } while ((CurI = CurI->getPrevNonDebugInstruction())); 2718bdd1243dSDimitry Andric 2719*06c3fb27SDimitry Andric // Delayed decision on the forward pass to allow aligned barrier detection 2720*06c3fb27SDimitry Andric // in the backwards traversal. 2721*06c3fb27SDimitry Andric if (!ForwardIsOk) 2722*06c3fb27SDimitry Andric return false; 2723*06c3fb27SDimitry Andric 2724*06c3fb27SDimitry Andric if (!CurI) { 2725*06c3fb27SDimitry Andric const BasicBlock *BB = I.getParent(); 2726*06c3fb27SDimitry Andric if (BB == &BB->getParent()->getEntryBlock()) 2727*06c3fb27SDimitry Andric return BEDMap.lookup(nullptr).IsReachedFromAlignedBarrierOnly; 2728*06c3fb27SDimitry Andric if (!llvm::all_of(predecessors(BB), [&](const BasicBlock *PredBB) { 2729bdd1243dSDimitry Andric return BEDMap.lookup(PredBB).IsReachedFromAlignedBarrierOnly; 2730bdd1243dSDimitry Andric })) { 2731bdd1243dSDimitry Andric return false; 2732bdd1243dSDimitry Andric } 2733*06c3fb27SDimitry Andric } 2734bdd1243dSDimitry Andric 2735bdd1243dSDimitry Andric // On neither traversal we found a anything but aligned barriers. 2736bdd1243dSDimitry Andric return true; 2737bdd1243dSDimitry Andric } 2738bdd1243dSDimitry Andric 2739bdd1243dSDimitry Andric ExecutionDomainTy getExecutionDomain(const BasicBlock &BB) const override { 2740bdd1243dSDimitry Andric assert(isValidState() && 2741bdd1243dSDimitry Andric "No request should be made against an invalid state!"); 2742bdd1243dSDimitry Andric return BEDMap.lookup(&BB); 2743bdd1243dSDimitry Andric } 2744*06c3fb27SDimitry Andric std::pair<ExecutionDomainTy, ExecutionDomainTy> 2745*06c3fb27SDimitry Andric getExecutionDomain(const CallBase &CB) const override { 2746bdd1243dSDimitry Andric assert(isValidState() && 2747bdd1243dSDimitry Andric "No request should be made against an invalid state!"); 2748*06c3fb27SDimitry Andric return {CEDMap.lookup({&CB, PRE}), CEDMap.lookup({&CB, POST})}; 2749bdd1243dSDimitry Andric } 2750bdd1243dSDimitry Andric ExecutionDomainTy getFunctionExecutionDomain() const override { 2751bdd1243dSDimitry Andric assert(isValidState() && 2752bdd1243dSDimitry Andric "No request should be made against an invalid state!"); 2753*06c3fb27SDimitry Andric return InterProceduralED; 2754bdd1243dSDimitry Andric } 2755bdd1243dSDimitry Andric ///} 2756fe6060f1SDimitry Andric 2757349cc55cSDimitry Andric // Check if the edge into the successor block contains a condition that only 2758349cc55cSDimitry Andric // lets the main thread execute it. 2759bdd1243dSDimitry Andric static bool isInitialThreadOnlyEdge(Attributor &A, BranchInst *Edge, 2760bdd1243dSDimitry Andric BasicBlock &SuccessorBB) { 2761fe6060f1SDimitry Andric if (!Edge || !Edge->isConditional()) 2762fe6060f1SDimitry Andric return false; 2763bdd1243dSDimitry Andric if (Edge->getSuccessor(0) != &SuccessorBB) 2764fe6060f1SDimitry Andric return false; 2765fe6060f1SDimitry Andric 2766fe6060f1SDimitry Andric auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition()); 2767fe6060f1SDimitry Andric if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality()) 2768fe6060f1SDimitry Andric return false; 2769fe6060f1SDimitry Andric 2770fe6060f1SDimitry Andric ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1)); 2771fe6060f1SDimitry Andric if (!C) 2772fe6060f1SDimitry Andric return false; 2773fe6060f1SDimitry Andric 2774fe6060f1SDimitry Andric // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!) 2775fe6060f1SDimitry Andric if (C->isAllOnesValue()) { 2776fe6060f1SDimitry Andric auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0)); 2777bdd1243dSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 2778bdd1243dSDimitry Andric auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; 2779fe6060f1SDimitry Andric CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr; 2780fe6060f1SDimitry Andric if (!CB) 2781fe6060f1SDimitry Andric return false; 2782349cc55cSDimitry Andric const int InitModeArgNo = 1; 2783349cc55cSDimitry Andric auto *ModeCI = dyn_cast<ConstantInt>(CB->getOperand(InitModeArgNo)); 2784349cc55cSDimitry Andric return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC); 2785349cc55cSDimitry Andric } 2786349cc55cSDimitry Andric 2787349cc55cSDimitry Andric if (C->isZero()) { 2788349cc55cSDimitry Andric // Match: 0 == llvm.nvvm.read.ptx.sreg.tid.x() 2789349cc55cSDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0))) 2790349cc55cSDimitry Andric if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x) 2791349cc55cSDimitry Andric return true; 2792349cc55cSDimitry Andric 2793349cc55cSDimitry Andric // Match: 0 == llvm.amdgcn.workitem.id.x() 2794349cc55cSDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0))) 2795349cc55cSDimitry Andric if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x) 2796349cc55cSDimitry Andric return true; 2797fe6060f1SDimitry Andric } 2798fe6060f1SDimitry Andric 2799fe6060f1SDimitry Andric return false; 2800fe6060f1SDimitry Andric }; 2801fe6060f1SDimitry Andric 2802*06c3fb27SDimitry Andric /// Mapping containing information about the function for other AAs. 2803*06c3fb27SDimitry Andric ExecutionDomainTy InterProceduralED; 2804*06c3fb27SDimitry Andric 2805*06c3fb27SDimitry Andric enum Direction { PRE = 0, POST = 1 }; 2806bdd1243dSDimitry Andric /// Mapping containing information per block. 2807bdd1243dSDimitry Andric DenseMap<const BasicBlock *, ExecutionDomainTy> BEDMap; 2808*06c3fb27SDimitry Andric DenseMap<PointerIntPair<const CallBase *, 1, Direction>, ExecutionDomainTy> 2809*06c3fb27SDimitry Andric CEDMap; 2810bdd1243dSDimitry Andric SmallSetVector<CallBase *, 16> AlignedBarriers; 2811fe6060f1SDimitry Andric 2812bdd1243dSDimitry Andric ReversePostOrderTraversal<Function *> *RPOT = nullptr; 2813*06c3fb27SDimitry Andric 2814*06c3fb27SDimitry Andric /// Set \p R to \V and report true if that changed \p R. 2815*06c3fb27SDimitry Andric static bool setAndRecord(bool &R, bool V) { 2816*06c3fb27SDimitry Andric bool Eq = (R == V); 2817*06c3fb27SDimitry Andric R = V; 2818*06c3fb27SDimitry Andric return !Eq; 2819*06c3fb27SDimitry Andric } 2820*06c3fb27SDimitry Andric 2821*06c3fb27SDimitry Andric /// Collection of fences known to be non-no-opt. All fences not in this set 2822*06c3fb27SDimitry Andric /// can be assumed no-opt. 2823*06c3fb27SDimitry Andric SmallPtrSet<const FenceInst *, 8> NonNoOpFences; 2824fe6060f1SDimitry Andric }; 2825fe6060f1SDimitry Andric 2826bdd1243dSDimitry Andric void AAExecutionDomainFunction::mergeInPredecessorBarriersAndAssumptions( 2827bdd1243dSDimitry Andric Attributor &A, ExecutionDomainTy &ED, const ExecutionDomainTy &PredED) { 2828bdd1243dSDimitry Andric for (auto *EA : PredED.EncounteredAssumes) 2829bdd1243dSDimitry Andric ED.addAssumeInst(A, *EA); 2830bdd1243dSDimitry Andric 2831bdd1243dSDimitry Andric for (auto *AB : PredED.AlignedBarriers) 2832bdd1243dSDimitry Andric ED.addAlignedBarrier(A, *AB); 2833fe6060f1SDimitry Andric } 2834fe6060f1SDimitry Andric 2835*06c3fb27SDimitry Andric bool AAExecutionDomainFunction::mergeInPredecessor( 2836bdd1243dSDimitry Andric Attributor &A, ExecutionDomainTy &ED, const ExecutionDomainTy &PredED, 2837bdd1243dSDimitry Andric bool InitialEdgeOnly) { 2838bdd1243dSDimitry Andric 2839*06c3fb27SDimitry Andric bool Changed = false; 2840*06c3fb27SDimitry Andric Changed |= 2841*06c3fb27SDimitry Andric setAndRecord(ED.IsExecutedByInitialThreadOnly, 2842*06c3fb27SDimitry Andric InitialEdgeOnly || (PredED.IsExecutedByInitialThreadOnly && 2843*06c3fb27SDimitry Andric ED.IsExecutedByInitialThreadOnly)); 2844*06c3fb27SDimitry Andric 2845*06c3fb27SDimitry Andric Changed |= setAndRecord(ED.IsReachedFromAlignedBarrierOnly, 2846*06c3fb27SDimitry Andric ED.IsReachedFromAlignedBarrierOnly && 2847*06c3fb27SDimitry Andric PredED.IsReachedFromAlignedBarrierOnly); 2848*06c3fb27SDimitry Andric Changed |= setAndRecord(ED.EncounteredNonLocalSideEffect, 2849*06c3fb27SDimitry Andric ED.EncounteredNonLocalSideEffect | 2850*06c3fb27SDimitry Andric PredED.EncounteredNonLocalSideEffect); 2851*06c3fb27SDimitry Andric // Do not track assumptions and barriers as part of Changed. 2852bdd1243dSDimitry Andric if (ED.IsReachedFromAlignedBarrierOnly) 2853bdd1243dSDimitry Andric mergeInPredecessorBarriersAndAssumptions(A, ED, PredED); 2854bdd1243dSDimitry Andric else 2855bdd1243dSDimitry Andric ED.clearAssumeInstAndAlignedBarriers(); 2856*06c3fb27SDimitry Andric return Changed; 2857bdd1243dSDimitry Andric } 2858bdd1243dSDimitry Andric 2859*06c3fb27SDimitry Andric bool AAExecutionDomainFunction::handleCallees(Attributor &A, 2860bdd1243dSDimitry Andric ExecutionDomainTy &EntryBBED) { 2861*06c3fb27SDimitry Andric SmallVector<std::pair<ExecutionDomainTy, ExecutionDomainTy>, 4> CallSiteEDs; 2862bdd1243dSDimitry Andric auto PredForCallSite = [&](AbstractCallSite ACS) { 2863*06c3fb27SDimitry Andric const auto *EDAA = A.getAAFor<AAExecutionDomain>( 2864bdd1243dSDimitry Andric *this, IRPosition::function(*ACS.getInstruction()->getFunction()), 2865bdd1243dSDimitry Andric DepClassTy::OPTIONAL); 2866*06c3fb27SDimitry Andric if (!EDAA || !EDAA->getState().isValidState()) 2867bdd1243dSDimitry Andric return false; 2868*06c3fb27SDimitry Andric CallSiteEDs.emplace_back( 2869*06c3fb27SDimitry Andric EDAA->getExecutionDomain(*cast<CallBase>(ACS.getInstruction()))); 2870bdd1243dSDimitry Andric return true; 2871bdd1243dSDimitry Andric }; 2872bdd1243dSDimitry Andric 2873*06c3fb27SDimitry Andric ExecutionDomainTy ExitED; 2874bdd1243dSDimitry Andric bool AllCallSitesKnown; 2875bdd1243dSDimitry Andric if (A.checkForAllCallSites(PredForCallSite, *this, 2876bdd1243dSDimitry Andric /* RequiresAllCallSites */ true, 2877bdd1243dSDimitry Andric AllCallSitesKnown)) { 2878*06c3fb27SDimitry Andric for (const auto &[CSInED, CSOutED] : CallSiteEDs) { 2879*06c3fb27SDimitry Andric mergeInPredecessor(A, EntryBBED, CSInED); 2880*06c3fb27SDimitry Andric ExitED.IsReachingAlignedBarrierOnly &= 2881*06c3fb27SDimitry Andric CSOutED.IsReachingAlignedBarrierOnly; 2882*06c3fb27SDimitry Andric } 2883bdd1243dSDimitry Andric 2884bdd1243dSDimitry Andric } else { 2885bdd1243dSDimitry Andric // We could not find all predecessors, so this is either a kernel or a 2886bdd1243dSDimitry Andric // function with external linkage (or with some other weird uses). 2887*06c3fb27SDimitry Andric if (omp::isKernel(*getAnchorScope())) { 2888bdd1243dSDimitry Andric EntryBBED.IsExecutedByInitialThreadOnly = false; 2889bdd1243dSDimitry Andric EntryBBED.IsReachedFromAlignedBarrierOnly = true; 2890bdd1243dSDimitry Andric EntryBBED.EncounteredNonLocalSideEffect = false; 2891*06c3fb27SDimitry Andric ExitED.IsReachingAlignedBarrierOnly = true; 2892bdd1243dSDimitry Andric } else { 2893bdd1243dSDimitry Andric EntryBBED.IsExecutedByInitialThreadOnly = false; 2894bdd1243dSDimitry Andric EntryBBED.IsReachedFromAlignedBarrierOnly = false; 2895bdd1243dSDimitry Andric EntryBBED.EncounteredNonLocalSideEffect = true; 2896*06c3fb27SDimitry Andric ExitED.IsReachingAlignedBarrierOnly = false; 2897bdd1243dSDimitry Andric } 2898bdd1243dSDimitry Andric } 2899bdd1243dSDimitry Andric 2900*06c3fb27SDimitry Andric bool Changed = false; 2901bdd1243dSDimitry Andric auto &FnED = BEDMap[nullptr]; 2902*06c3fb27SDimitry Andric Changed |= setAndRecord(FnED.IsReachedFromAlignedBarrierOnly, 2903*06c3fb27SDimitry Andric FnED.IsReachedFromAlignedBarrierOnly & 2904*06c3fb27SDimitry Andric EntryBBED.IsReachedFromAlignedBarrierOnly); 2905*06c3fb27SDimitry Andric Changed |= setAndRecord(FnED.IsReachingAlignedBarrierOnly, 2906*06c3fb27SDimitry Andric FnED.IsReachingAlignedBarrierOnly & 2907*06c3fb27SDimitry Andric ExitED.IsReachingAlignedBarrierOnly); 2908*06c3fb27SDimitry Andric Changed |= setAndRecord(FnED.IsExecutedByInitialThreadOnly, 2909*06c3fb27SDimitry Andric EntryBBED.IsExecutedByInitialThreadOnly); 2910*06c3fb27SDimitry Andric return Changed; 2911bdd1243dSDimitry Andric } 2912bdd1243dSDimitry Andric 2913bdd1243dSDimitry Andric ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { 2914bdd1243dSDimitry Andric 2915bdd1243dSDimitry Andric bool Changed = false; 2916bdd1243dSDimitry Andric 2917bdd1243dSDimitry Andric // Helper to deal with an aligned barrier encountered during the forward 2918bdd1243dSDimitry Andric // traversal. \p CB is the aligned barrier, \p ED is the execution domain when 2919bdd1243dSDimitry Andric // it was encountered. 2920*06c3fb27SDimitry Andric auto HandleAlignedBarrier = [&](CallBase &CB, ExecutionDomainTy &ED) { 2921*06c3fb27SDimitry Andric Changed |= AlignedBarriers.insert(&CB); 2922bdd1243dSDimitry Andric // First, update the barrier ED kept in the separate CEDMap. 2923*06c3fb27SDimitry Andric auto &CallInED = CEDMap[{&CB, PRE}]; 2924*06c3fb27SDimitry Andric Changed |= mergeInPredecessor(A, CallInED, ED); 2925*06c3fb27SDimitry Andric CallInED.IsReachingAlignedBarrierOnly = true; 2926bdd1243dSDimitry Andric // Next adjust the ED we use for the traversal. 2927bdd1243dSDimitry Andric ED.EncounteredNonLocalSideEffect = false; 2928bdd1243dSDimitry Andric ED.IsReachedFromAlignedBarrierOnly = true; 2929bdd1243dSDimitry Andric // Aligned barrier collection has to come last. 2930bdd1243dSDimitry Andric ED.clearAssumeInstAndAlignedBarriers(); 2931*06c3fb27SDimitry Andric ED.addAlignedBarrier(A, CB); 2932*06c3fb27SDimitry Andric auto &CallOutED = CEDMap[{&CB, POST}]; 2933*06c3fb27SDimitry Andric Changed |= mergeInPredecessor(A, CallOutED, ED); 2934bdd1243dSDimitry Andric }; 2935bdd1243dSDimitry Andric 2936*06c3fb27SDimitry Andric auto *LivenessAA = 2937bdd1243dSDimitry Andric A.getAAFor<AAIsDead>(*this, getIRPosition(), DepClassTy::OPTIONAL); 2938bdd1243dSDimitry Andric 2939bdd1243dSDimitry Andric Function *F = getAnchorScope(); 2940bdd1243dSDimitry Andric BasicBlock &EntryBB = F->getEntryBlock(); 2941*06c3fb27SDimitry Andric bool IsKernel = omp::isKernel(*F); 2942bdd1243dSDimitry Andric 2943bdd1243dSDimitry Andric SmallVector<Instruction *> SyncInstWorklist; 2944bdd1243dSDimitry Andric for (auto &RIt : *RPOT) { 2945bdd1243dSDimitry Andric BasicBlock &BB = *RIt; 2946bdd1243dSDimitry Andric 2947bdd1243dSDimitry Andric bool IsEntryBB = &BB == &EntryBB; 2948bdd1243dSDimitry Andric // TODO: We use local reasoning since we don't have a divergence analysis 2949bdd1243dSDimitry Andric // running as well. We could basically allow uniform branches here. 2950bdd1243dSDimitry Andric bool AlignedBarrierLastInBlock = IsEntryBB && IsKernel; 2951*06c3fb27SDimitry Andric bool IsExplicitlyAligned = IsEntryBB && IsKernel; 2952bdd1243dSDimitry Andric ExecutionDomainTy ED; 2953bdd1243dSDimitry Andric // Propagate "incoming edges" into information about this block. 2954bdd1243dSDimitry Andric if (IsEntryBB) { 2955*06c3fb27SDimitry Andric Changed |= handleCallees(A, ED); 2956bdd1243dSDimitry Andric } else { 2957bdd1243dSDimitry Andric // For live non-entry blocks we only propagate 2958bdd1243dSDimitry Andric // information via live edges. 2959*06c3fb27SDimitry Andric if (LivenessAA && LivenessAA->isAssumedDead(&BB)) 2960bdd1243dSDimitry Andric continue; 2961bdd1243dSDimitry Andric 2962bdd1243dSDimitry Andric for (auto *PredBB : predecessors(&BB)) { 2963*06c3fb27SDimitry Andric if (LivenessAA && LivenessAA->isEdgeDead(PredBB, &BB)) 2964bdd1243dSDimitry Andric continue; 2965bdd1243dSDimitry Andric bool InitialEdgeOnly = isInitialThreadOnlyEdge( 2966bdd1243dSDimitry Andric A, dyn_cast<BranchInst>(PredBB->getTerminator()), BB); 2967bdd1243dSDimitry Andric mergeInPredecessor(A, ED, BEDMap[PredBB], InitialEdgeOnly); 2968bdd1243dSDimitry Andric } 2969bdd1243dSDimitry Andric } 2970bdd1243dSDimitry Andric 2971bdd1243dSDimitry Andric // Now we traverse the block, accumulate effects in ED and attach 2972bdd1243dSDimitry Andric // information to calls. 2973bdd1243dSDimitry Andric for (Instruction &I : BB) { 2974bdd1243dSDimitry Andric bool UsedAssumedInformation; 2975*06c3fb27SDimitry Andric if (A.isAssumedDead(I, *this, LivenessAA, UsedAssumedInformation, 2976bdd1243dSDimitry Andric /* CheckBBLivenessOnly */ false, DepClassTy::OPTIONAL, 2977bdd1243dSDimitry Andric /* CheckForDeadStore */ true)) 2978bdd1243dSDimitry Andric continue; 2979bdd1243dSDimitry Andric 2980bdd1243dSDimitry Andric // Asummes and "assume-like" (dbg, lifetime, ...) are handled first, the 2981bdd1243dSDimitry Andric // former is collected the latter is ignored. 2982bdd1243dSDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(&I)) { 2983bdd1243dSDimitry Andric if (auto *AI = dyn_cast_or_null<AssumeInst>(II)) { 2984bdd1243dSDimitry Andric ED.addAssumeInst(A, *AI); 2985bdd1243dSDimitry Andric continue; 2986bdd1243dSDimitry Andric } 2987bdd1243dSDimitry Andric // TODO: Should we also collect and delete lifetime markers? 2988bdd1243dSDimitry Andric if (II->isAssumeLikeIntrinsic()) 2989bdd1243dSDimitry Andric continue; 2990bdd1243dSDimitry Andric } 2991bdd1243dSDimitry Andric 2992*06c3fb27SDimitry Andric if (auto *FI = dyn_cast<FenceInst>(&I)) { 2993*06c3fb27SDimitry Andric if (!ED.EncounteredNonLocalSideEffect) { 2994*06c3fb27SDimitry Andric // An aligned fence without non-local side-effects is a no-op. 2995*06c3fb27SDimitry Andric if (ED.IsReachedFromAlignedBarrierOnly) 2996*06c3fb27SDimitry Andric continue; 2997*06c3fb27SDimitry Andric // A non-aligned fence without non-local side-effects is a no-op 2998*06c3fb27SDimitry Andric // if the ordering only publishes non-local side-effects (or less). 2999*06c3fb27SDimitry Andric switch (FI->getOrdering()) { 3000*06c3fb27SDimitry Andric case AtomicOrdering::NotAtomic: 3001*06c3fb27SDimitry Andric continue; 3002*06c3fb27SDimitry Andric case AtomicOrdering::Unordered: 3003*06c3fb27SDimitry Andric continue; 3004*06c3fb27SDimitry Andric case AtomicOrdering::Monotonic: 3005*06c3fb27SDimitry Andric continue; 3006*06c3fb27SDimitry Andric case AtomicOrdering::Acquire: 3007*06c3fb27SDimitry Andric break; 3008*06c3fb27SDimitry Andric case AtomicOrdering::Release: 3009*06c3fb27SDimitry Andric continue; 3010*06c3fb27SDimitry Andric case AtomicOrdering::AcquireRelease: 3011*06c3fb27SDimitry Andric break; 3012*06c3fb27SDimitry Andric case AtomicOrdering::SequentiallyConsistent: 3013*06c3fb27SDimitry Andric break; 3014*06c3fb27SDimitry Andric }; 3015*06c3fb27SDimitry Andric } 3016*06c3fb27SDimitry Andric NonNoOpFences.insert(FI); 3017*06c3fb27SDimitry Andric } 3018*06c3fb27SDimitry Andric 3019bdd1243dSDimitry Andric auto *CB = dyn_cast<CallBase>(&I); 3020bdd1243dSDimitry Andric bool IsNoSync = AA::isNoSyncInst(A, I, *this); 3021bdd1243dSDimitry Andric bool IsAlignedBarrier = 3022bdd1243dSDimitry Andric !IsNoSync && CB && 3023bdd1243dSDimitry Andric AANoSync::isAlignedBarrier(*CB, AlignedBarrierLastInBlock); 3024bdd1243dSDimitry Andric 3025bdd1243dSDimitry Andric AlignedBarrierLastInBlock &= IsNoSync; 3026*06c3fb27SDimitry Andric IsExplicitlyAligned &= IsNoSync; 3027bdd1243dSDimitry Andric 3028bdd1243dSDimitry Andric // Next we check for calls. Aligned barriers are handled 3029bdd1243dSDimitry Andric // explicitly, everything else is kept for the backward traversal and will 3030bdd1243dSDimitry Andric // also affect our state. 3031bdd1243dSDimitry Andric if (CB) { 3032bdd1243dSDimitry Andric if (IsAlignedBarrier) { 3033*06c3fb27SDimitry Andric HandleAlignedBarrier(*CB, ED); 3034bdd1243dSDimitry Andric AlignedBarrierLastInBlock = true; 3035*06c3fb27SDimitry Andric IsExplicitlyAligned = true; 3036bdd1243dSDimitry Andric continue; 3037bdd1243dSDimitry Andric } 3038bdd1243dSDimitry Andric 3039bdd1243dSDimitry Andric // Check the pointer(s) of a memory intrinsic explicitly. 3040bdd1243dSDimitry Andric if (isa<MemIntrinsic>(&I)) { 3041bdd1243dSDimitry Andric if (!ED.EncounteredNonLocalSideEffect && 3042bdd1243dSDimitry Andric AA::isPotentiallyAffectedByBarrier(A, I, *this)) 3043bdd1243dSDimitry Andric ED.EncounteredNonLocalSideEffect = true; 3044bdd1243dSDimitry Andric if (!IsNoSync) { 3045bdd1243dSDimitry Andric ED.IsReachedFromAlignedBarrierOnly = false; 3046bdd1243dSDimitry Andric SyncInstWorklist.push_back(&I); 3047bdd1243dSDimitry Andric } 3048bdd1243dSDimitry Andric continue; 3049bdd1243dSDimitry Andric } 3050bdd1243dSDimitry Andric 3051bdd1243dSDimitry Andric // Record how we entered the call, then accumulate the effect of the 3052bdd1243dSDimitry Andric // call in ED for potential use by the callee. 3053*06c3fb27SDimitry Andric auto &CallInED = CEDMap[{CB, PRE}]; 3054*06c3fb27SDimitry Andric Changed |= mergeInPredecessor(A, CallInED, ED); 3055bdd1243dSDimitry Andric 3056bdd1243dSDimitry Andric // If we have a sync-definition we can check if it starts/ends in an 3057bdd1243dSDimitry Andric // aligned barrier. If we are unsure we assume any sync breaks 3058bdd1243dSDimitry Andric // alignment. 3059bdd1243dSDimitry Andric Function *Callee = CB->getCalledFunction(); 3060bdd1243dSDimitry Andric if (!IsNoSync && Callee && !Callee->isDeclaration()) { 3061*06c3fb27SDimitry Andric const auto *EDAA = A.getAAFor<AAExecutionDomain>( 3062bdd1243dSDimitry Andric *this, IRPosition::function(*Callee), DepClassTy::OPTIONAL); 3063*06c3fb27SDimitry Andric if (EDAA && EDAA->getState().isValidState()) { 3064*06c3fb27SDimitry Andric const auto &CalleeED = EDAA->getFunctionExecutionDomain(); 3065bdd1243dSDimitry Andric ED.IsReachedFromAlignedBarrierOnly = 3066bdd1243dSDimitry Andric CalleeED.IsReachedFromAlignedBarrierOnly; 3067bdd1243dSDimitry Andric AlignedBarrierLastInBlock = ED.IsReachedFromAlignedBarrierOnly; 3068bdd1243dSDimitry Andric if (IsNoSync || !CalleeED.IsReachedFromAlignedBarrierOnly) 3069bdd1243dSDimitry Andric ED.EncounteredNonLocalSideEffect |= 3070bdd1243dSDimitry Andric CalleeED.EncounteredNonLocalSideEffect; 3071bdd1243dSDimitry Andric else 3072bdd1243dSDimitry Andric ED.EncounteredNonLocalSideEffect = 3073bdd1243dSDimitry Andric CalleeED.EncounteredNonLocalSideEffect; 3074*06c3fb27SDimitry Andric if (!CalleeED.IsReachingAlignedBarrierOnly) { 3075*06c3fb27SDimitry Andric Changed |= 3076*06c3fb27SDimitry Andric setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false); 3077bdd1243dSDimitry Andric SyncInstWorklist.push_back(&I); 3078*06c3fb27SDimitry Andric } 3079bdd1243dSDimitry Andric if (CalleeED.IsReachedFromAlignedBarrierOnly) 3080bdd1243dSDimitry Andric mergeInPredecessorBarriersAndAssumptions(A, ED, CalleeED); 3081*06c3fb27SDimitry Andric auto &CallOutED = CEDMap[{CB, POST}]; 3082*06c3fb27SDimitry Andric Changed |= mergeInPredecessor(A, CallOutED, ED); 3083bdd1243dSDimitry Andric continue; 3084bdd1243dSDimitry Andric } 3085bdd1243dSDimitry Andric } 3086*06c3fb27SDimitry Andric if (!IsNoSync) { 3087*06c3fb27SDimitry Andric ED.IsReachedFromAlignedBarrierOnly = false; 3088*06c3fb27SDimitry Andric Changed |= setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false); 3089*06c3fb27SDimitry Andric SyncInstWorklist.push_back(&I); 3090*06c3fb27SDimitry Andric } 3091bdd1243dSDimitry Andric AlignedBarrierLastInBlock &= ED.IsReachedFromAlignedBarrierOnly; 3092bdd1243dSDimitry Andric ED.EncounteredNonLocalSideEffect |= !CB->doesNotAccessMemory(); 3093*06c3fb27SDimitry Andric auto &CallOutED = CEDMap[{CB, POST}]; 3094*06c3fb27SDimitry Andric Changed |= mergeInPredecessor(A, CallOutED, ED); 3095bdd1243dSDimitry Andric } 3096bdd1243dSDimitry Andric 3097bdd1243dSDimitry Andric if (!I.mayHaveSideEffects() && !I.mayReadFromMemory()) 3098bdd1243dSDimitry Andric continue; 3099bdd1243dSDimitry Andric 3100bdd1243dSDimitry Andric // If we have a callee we try to use fine-grained information to 3101bdd1243dSDimitry Andric // determine local side-effects. 3102bdd1243dSDimitry Andric if (CB) { 3103*06c3fb27SDimitry Andric const auto *MemAA = A.getAAFor<AAMemoryLocation>( 3104bdd1243dSDimitry Andric *this, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL); 3105bdd1243dSDimitry Andric 3106bdd1243dSDimitry Andric auto AccessPred = [&](const Instruction *I, const Value *Ptr, 3107bdd1243dSDimitry Andric AAMemoryLocation::AccessKind, 3108bdd1243dSDimitry Andric AAMemoryLocation::MemoryLocationsKind) { 3109bdd1243dSDimitry Andric return !AA::isPotentiallyAffectedByBarrier(A, {Ptr}, *this, I); 3110bdd1243dSDimitry Andric }; 3111*06c3fb27SDimitry Andric if (MemAA && MemAA->getState().isValidState() && 3112*06c3fb27SDimitry Andric MemAA->checkForAllAccessesToMemoryKind( 3113bdd1243dSDimitry Andric AccessPred, AAMemoryLocation::ALL_LOCATIONS)) 3114bdd1243dSDimitry Andric continue; 3115bdd1243dSDimitry Andric } 3116bdd1243dSDimitry Andric 3117*06c3fb27SDimitry Andric auto &InfoCache = A.getInfoCache(); 3118*06c3fb27SDimitry Andric if (!I.mayHaveSideEffects() && InfoCache.isOnlyUsedByAssume(I)) 3119bdd1243dSDimitry Andric continue; 3120bdd1243dSDimitry Andric 3121bdd1243dSDimitry Andric if (auto *LI = dyn_cast<LoadInst>(&I)) 3122bdd1243dSDimitry Andric if (LI->hasMetadata(LLVMContext::MD_invariant_load)) 3123bdd1243dSDimitry Andric continue; 3124bdd1243dSDimitry Andric 3125bdd1243dSDimitry Andric if (!ED.EncounteredNonLocalSideEffect && 3126bdd1243dSDimitry Andric AA::isPotentiallyAffectedByBarrier(A, I, *this)) 3127bdd1243dSDimitry Andric ED.EncounteredNonLocalSideEffect = true; 3128bdd1243dSDimitry Andric } 3129bdd1243dSDimitry Andric 3130*06c3fb27SDimitry Andric bool IsEndAndNotReachingAlignedBarriersOnly = false; 3131bdd1243dSDimitry Andric if (!isa<UnreachableInst>(BB.getTerminator()) && 3132bdd1243dSDimitry Andric !BB.getTerminator()->getNumSuccessors()) { 3133bdd1243dSDimitry Andric 3134*06c3fb27SDimitry Andric Changed |= mergeInPredecessor(A, InterProceduralED, ED); 3135bdd1243dSDimitry Andric 3136*06c3fb27SDimitry Andric auto &FnED = BEDMap[nullptr]; 3137*06c3fb27SDimitry Andric if (IsKernel && !IsExplicitlyAligned) 3138*06c3fb27SDimitry Andric FnED.IsReachingAlignedBarrierOnly = false; 3139*06c3fb27SDimitry Andric Changed |= mergeInPredecessor(A, FnED, ED); 3140*06c3fb27SDimitry Andric 3141*06c3fb27SDimitry Andric if (!FnED.IsReachingAlignedBarrierOnly) { 3142*06c3fb27SDimitry Andric IsEndAndNotReachingAlignedBarriersOnly = true; 3143*06c3fb27SDimitry Andric SyncInstWorklist.push_back(BB.getTerminator()); 3144*06c3fb27SDimitry Andric auto &BBED = BEDMap[&BB]; 3145*06c3fb27SDimitry Andric Changed |= setAndRecord(BBED.IsReachingAlignedBarrierOnly, false); 3146*06c3fb27SDimitry Andric } 3147bdd1243dSDimitry Andric } 3148bdd1243dSDimitry Andric 3149bdd1243dSDimitry Andric ExecutionDomainTy &StoredED = BEDMap[&BB]; 3150*06c3fb27SDimitry Andric ED.IsReachingAlignedBarrierOnly = StoredED.IsReachingAlignedBarrierOnly & 3151*06c3fb27SDimitry Andric !IsEndAndNotReachingAlignedBarriersOnly; 3152bdd1243dSDimitry Andric 3153bdd1243dSDimitry Andric // Check if we computed anything different as part of the forward 3154bdd1243dSDimitry Andric // traversal. We do not take assumptions and aligned barriers into account 3155bdd1243dSDimitry Andric // as they do not influence the state we iterate. Backward traversal values 3156bdd1243dSDimitry Andric // are handled later on. 3157bdd1243dSDimitry Andric if (ED.IsExecutedByInitialThreadOnly != 3158bdd1243dSDimitry Andric StoredED.IsExecutedByInitialThreadOnly || 3159bdd1243dSDimitry Andric ED.IsReachedFromAlignedBarrierOnly != 3160bdd1243dSDimitry Andric StoredED.IsReachedFromAlignedBarrierOnly || 3161bdd1243dSDimitry Andric ED.EncounteredNonLocalSideEffect != 3162bdd1243dSDimitry Andric StoredED.EncounteredNonLocalSideEffect) 3163bdd1243dSDimitry Andric Changed = true; 3164bdd1243dSDimitry Andric 3165bdd1243dSDimitry Andric // Update the state with the new value. 3166bdd1243dSDimitry Andric StoredED = std::move(ED); 3167bdd1243dSDimitry Andric } 3168bdd1243dSDimitry Andric 3169bdd1243dSDimitry Andric // Propagate (non-aligned) sync instruction effects backwards until the 3170bdd1243dSDimitry Andric // entry is hit or an aligned barrier. 3171bdd1243dSDimitry Andric SmallSetVector<BasicBlock *, 16> Visited; 3172bdd1243dSDimitry Andric while (!SyncInstWorklist.empty()) { 3173bdd1243dSDimitry Andric Instruction *SyncInst = SyncInstWorklist.pop_back_val(); 3174bdd1243dSDimitry Andric Instruction *CurInst = SyncInst; 3175*06c3fb27SDimitry Andric bool HitAlignedBarrierOrKnownEnd = false; 3176bdd1243dSDimitry Andric while ((CurInst = CurInst->getPrevNode())) { 3177bdd1243dSDimitry Andric auto *CB = dyn_cast<CallBase>(CurInst); 3178bdd1243dSDimitry Andric if (!CB) 3179bdd1243dSDimitry Andric continue; 3180*06c3fb27SDimitry Andric auto &CallOutED = CEDMap[{CB, POST}]; 3181*06c3fb27SDimitry Andric Changed |= setAndRecord(CallOutED.IsReachingAlignedBarrierOnly, false); 3182*06c3fb27SDimitry Andric auto &CallInED = CEDMap[{CB, PRE}]; 3183*06c3fb27SDimitry Andric HitAlignedBarrierOrKnownEnd = 3184*06c3fb27SDimitry Andric AlignedBarriers.count(CB) || !CallInED.IsReachingAlignedBarrierOnly; 3185*06c3fb27SDimitry Andric if (HitAlignedBarrierOrKnownEnd) 3186bdd1243dSDimitry Andric break; 3187*06c3fb27SDimitry Andric Changed |= setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false); 3188bdd1243dSDimitry Andric } 3189*06c3fb27SDimitry Andric if (HitAlignedBarrierOrKnownEnd) 3190bdd1243dSDimitry Andric continue; 3191bdd1243dSDimitry Andric BasicBlock *SyncBB = SyncInst->getParent(); 3192bdd1243dSDimitry Andric for (auto *PredBB : predecessors(SyncBB)) { 3193*06c3fb27SDimitry Andric if (LivenessAA && LivenessAA->isEdgeDead(PredBB, SyncBB)) 3194bdd1243dSDimitry Andric continue; 3195bdd1243dSDimitry Andric if (!Visited.insert(PredBB)) 3196bdd1243dSDimitry Andric continue; 3197bdd1243dSDimitry Andric auto &PredED = BEDMap[PredBB]; 3198*06c3fb27SDimitry Andric if (setAndRecord(PredED.IsReachingAlignedBarrierOnly, false)) { 3199bdd1243dSDimitry Andric Changed = true; 3200*06c3fb27SDimitry Andric SyncInstWorklist.push_back(PredBB->getTerminator()); 3201*06c3fb27SDimitry Andric } 3202bdd1243dSDimitry Andric } 3203bdd1243dSDimitry Andric if (SyncBB != &EntryBB) 3204bdd1243dSDimitry Andric continue; 3205*06c3fb27SDimitry Andric Changed |= 3206*06c3fb27SDimitry Andric setAndRecord(InterProceduralED.IsReachingAlignedBarrierOnly, false); 3207bdd1243dSDimitry Andric } 3208bdd1243dSDimitry Andric 3209bdd1243dSDimitry Andric return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; 3210fe6060f1SDimitry Andric } 3211fe6060f1SDimitry Andric 3212fe6060f1SDimitry Andric /// Try to replace memory allocation calls called by a single thread with a 3213fe6060f1SDimitry Andric /// static buffer of shared memory. 3214fe6060f1SDimitry Andric struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> { 3215fe6060f1SDimitry Andric using Base = StateWrapper<BooleanState, AbstractAttribute>; 3216fe6060f1SDimitry Andric AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 3217fe6060f1SDimitry Andric 3218fe6060f1SDimitry Andric /// Create an abstract attribute view for the position \p IRP. 3219fe6060f1SDimitry Andric static AAHeapToShared &createForPosition(const IRPosition &IRP, 3220fe6060f1SDimitry Andric Attributor &A); 3221fe6060f1SDimitry Andric 3222fe6060f1SDimitry Andric /// Returns true if HeapToShared conversion is assumed to be possible. 3223fe6060f1SDimitry Andric virtual bool isAssumedHeapToShared(CallBase &CB) const = 0; 3224fe6060f1SDimitry Andric 3225fe6060f1SDimitry Andric /// Returns true if HeapToShared conversion is assumed and the CB is a 3226fe6060f1SDimitry Andric /// callsite to a free operation to be removed. 3227fe6060f1SDimitry Andric virtual bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const = 0; 3228fe6060f1SDimitry Andric 3229fe6060f1SDimitry Andric /// See AbstractAttribute::getName(). 3230fe6060f1SDimitry Andric const std::string getName() const override { return "AAHeapToShared"; } 3231fe6060f1SDimitry Andric 3232fe6060f1SDimitry Andric /// See AbstractAttribute::getIdAddr(). 3233fe6060f1SDimitry Andric const char *getIdAddr() const override { return &ID; } 3234fe6060f1SDimitry Andric 3235fe6060f1SDimitry Andric /// This function should return true if the type of the \p AA is 3236fe6060f1SDimitry Andric /// AAHeapToShared. 3237fe6060f1SDimitry Andric static bool classof(const AbstractAttribute *AA) { 3238fe6060f1SDimitry Andric return (AA->getIdAddr() == &ID); 3239fe6060f1SDimitry Andric } 3240fe6060f1SDimitry Andric 3241fe6060f1SDimitry Andric /// Unique ID (due to the unique address) 3242fe6060f1SDimitry Andric static const char ID; 3243fe6060f1SDimitry Andric }; 3244fe6060f1SDimitry Andric 3245fe6060f1SDimitry Andric struct AAHeapToSharedFunction : public AAHeapToShared { 3246fe6060f1SDimitry Andric AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A) 3247fe6060f1SDimitry Andric : AAHeapToShared(IRP, A) {} 3248fe6060f1SDimitry Andric 3249*06c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 3250fe6060f1SDimitry Andric return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) + 3251fe6060f1SDimitry Andric " malloc calls eligible."; 3252fe6060f1SDimitry Andric } 3253fe6060f1SDimitry Andric 3254fe6060f1SDimitry Andric /// See AbstractAttribute::trackStatistics(). 3255fe6060f1SDimitry Andric void trackStatistics() const override {} 3256fe6060f1SDimitry Andric 3257fe6060f1SDimitry Andric /// This functions finds free calls that will be removed by the 3258fe6060f1SDimitry Andric /// HeapToShared transformation. 3259fe6060f1SDimitry Andric void findPotentialRemovedFreeCalls(Attributor &A) { 3260fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 3261fe6060f1SDimitry Andric auto &FreeRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared]; 3262fe6060f1SDimitry Andric 3263fe6060f1SDimitry Andric PotentialRemovedFreeCalls.clear(); 3264fe6060f1SDimitry Andric // Update free call users of found malloc calls. 3265fe6060f1SDimitry Andric for (CallBase *CB : MallocCalls) { 3266fe6060f1SDimitry Andric SmallVector<CallBase *, 4> FreeCalls; 3267fe6060f1SDimitry Andric for (auto *U : CB->users()) { 3268fe6060f1SDimitry Andric CallBase *C = dyn_cast<CallBase>(U); 3269fe6060f1SDimitry Andric if (C && C->getCalledFunction() == FreeRFI.Declaration) 3270fe6060f1SDimitry Andric FreeCalls.push_back(C); 3271fe6060f1SDimitry Andric } 3272fe6060f1SDimitry Andric 3273fe6060f1SDimitry Andric if (FreeCalls.size() != 1) 3274fe6060f1SDimitry Andric continue; 3275fe6060f1SDimitry Andric 3276fe6060f1SDimitry Andric PotentialRemovedFreeCalls.insert(FreeCalls.front()); 3277fe6060f1SDimitry Andric } 3278fe6060f1SDimitry Andric } 3279fe6060f1SDimitry Andric 3280fe6060f1SDimitry Andric void initialize(Attributor &A) override { 328181ad6265SDimitry Andric if (DisableOpenMPOptDeglobalization) { 328281ad6265SDimitry Andric indicatePessimisticFixpoint(); 328381ad6265SDimitry Andric return; 328481ad6265SDimitry Andric } 328581ad6265SDimitry Andric 3286fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 3287fe6060f1SDimitry Andric auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; 3288bdd1243dSDimitry Andric if (!RFI.Declaration) 3289bdd1243dSDimitry Andric return; 3290fe6060f1SDimitry Andric 329181ad6265SDimitry Andric Attributor::SimplifictionCallbackTy SCB = 329281ad6265SDimitry Andric [](const IRPosition &, const AbstractAttribute *, 3293bdd1243dSDimitry Andric bool &) -> std::optional<Value *> { return nullptr; }; 3294bdd1243dSDimitry Andric 3295bdd1243dSDimitry Andric Function *F = getAnchorScope(); 3296fe6060f1SDimitry Andric for (User *U : RFI.Declaration->users()) 329781ad6265SDimitry Andric if (CallBase *CB = dyn_cast<CallBase>(U)) { 3298bdd1243dSDimitry Andric if (CB->getFunction() != F) 3299bdd1243dSDimitry Andric continue; 3300fe6060f1SDimitry Andric MallocCalls.insert(CB); 330181ad6265SDimitry Andric A.registerSimplificationCallback(IRPosition::callsite_returned(*CB), 330281ad6265SDimitry Andric SCB); 330381ad6265SDimitry Andric } 3304fe6060f1SDimitry Andric 3305fe6060f1SDimitry Andric findPotentialRemovedFreeCalls(A); 3306fe6060f1SDimitry Andric } 3307fe6060f1SDimitry Andric 3308fe6060f1SDimitry Andric bool isAssumedHeapToShared(CallBase &CB) const override { 3309fe6060f1SDimitry Andric return isValidState() && MallocCalls.count(&CB); 3310fe6060f1SDimitry Andric } 3311fe6060f1SDimitry Andric 3312fe6060f1SDimitry Andric bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const override { 3313fe6060f1SDimitry Andric return isValidState() && PotentialRemovedFreeCalls.count(&CB); 3314fe6060f1SDimitry Andric } 3315fe6060f1SDimitry Andric 3316fe6060f1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 3317fe6060f1SDimitry Andric if (MallocCalls.empty()) 3318fe6060f1SDimitry Andric return ChangeStatus::UNCHANGED; 3319fe6060f1SDimitry Andric 3320fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 3321fe6060f1SDimitry Andric auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared]; 3322fe6060f1SDimitry Andric 3323fe6060f1SDimitry Andric Function *F = getAnchorScope(); 3324fe6060f1SDimitry Andric auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this, 3325fe6060f1SDimitry Andric DepClassTy::OPTIONAL); 3326fe6060f1SDimitry Andric 3327fe6060f1SDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED; 3328fe6060f1SDimitry Andric for (CallBase *CB : MallocCalls) { 3329fe6060f1SDimitry Andric // Skip replacing this if HeapToStack has already claimed it. 3330fe6060f1SDimitry Andric if (HS && HS->isAssumedHeapToStack(*CB)) 3331fe6060f1SDimitry Andric continue; 3332fe6060f1SDimitry Andric 3333fe6060f1SDimitry Andric // Find the unique free call to remove it. 3334fe6060f1SDimitry Andric SmallVector<CallBase *, 4> FreeCalls; 3335fe6060f1SDimitry Andric for (auto *U : CB->users()) { 3336fe6060f1SDimitry Andric CallBase *C = dyn_cast<CallBase>(U); 3337fe6060f1SDimitry Andric if (C && C->getCalledFunction() == FreeCall.Declaration) 3338fe6060f1SDimitry Andric FreeCalls.push_back(C); 3339fe6060f1SDimitry Andric } 3340fe6060f1SDimitry Andric if (FreeCalls.size() != 1) 3341fe6060f1SDimitry Andric continue; 3342fe6060f1SDimitry Andric 334304eeddc0SDimitry Andric auto *AllocSize = cast<ConstantInt>(CB->getArgOperand(0)); 3344fe6060f1SDimitry Andric 334581ad6265SDimitry Andric if (AllocSize->getZExtValue() + SharedMemoryUsed > SharedMemoryLimit) { 334681ad6265SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Cannot replace call " << *CB 334781ad6265SDimitry Andric << " with shared memory." 334881ad6265SDimitry Andric << " Shared memory usage is limited to " 334981ad6265SDimitry Andric << SharedMemoryLimit << " bytes\n"); 335081ad6265SDimitry Andric continue; 335181ad6265SDimitry Andric } 335281ad6265SDimitry Andric 3353349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CB 3354349cc55cSDimitry Andric << " with " << AllocSize->getZExtValue() 3355fe6060f1SDimitry Andric << " bytes of shared memory\n"); 3356fe6060f1SDimitry Andric 3357fe6060f1SDimitry Andric // Create a new shared memory buffer of the same size as the allocation 3358fe6060f1SDimitry Andric // and replace all the uses of the original allocation with it. 3359fe6060f1SDimitry Andric Module *M = CB->getModule(); 3360fe6060f1SDimitry Andric Type *Int8Ty = Type::getInt8Ty(M->getContext()); 3361fe6060f1SDimitry Andric Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue()); 3362fe6060f1SDimitry Andric auto *SharedMem = new GlobalVariable( 3363fe6060f1SDimitry Andric *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage, 3364*06c3fb27SDimitry Andric PoisonValue::get(Int8ArrTy), CB->getName() + "_shared", nullptr, 3365fe6060f1SDimitry Andric GlobalValue::NotThreadLocal, 3366fe6060f1SDimitry Andric static_cast<unsigned>(AddressSpace::Shared)); 3367fe6060f1SDimitry Andric auto *NewBuffer = 3368fe6060f1SDimitry Andric ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo()); 3369fe6060f1SDimitry Andric 3370fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemark OR) { 3371fe6060f1SDimitry Andric return OR << "Replaced globalized variable with " 3372fe6060f1SDimitry Andric << ore::NV("SharedMemory", AllocSize->getZExtValue()) 3373*06c3fb27SDimitry Andric << (AllocSize->isOne() ? " byte " : " bytes ") 3374fe6060f1SDimitry Andric << "of shared memory."; 3375fe6060f1SDimitry Andric }; 3376fe6060f1SDimitry Andric A.emitRemark<OptimizationRemark>(CB, "OMP111", Remark); 3377fe6060f1SDimitry Andric 337804eeddc0SDimitry Andric MaybeAlign Alignment = CB->getRetAlign(); 337904eeddc0SDimitry Andric assert(Alignment && 338004eeddc0SDimitry Andric "HeapToShared on allocation without alignment attribute"); 3381*06c3fb27SDimitry Andric SharedMem->setAlignment(*Alignment); 3382fe6060f1SDimitry Andric 338381ad6265SDimitry Andric A.changeAfterManifest(IRPosition::callsite_returned(*CB), *NewBuffer); 3384fe6060f1SDimitry Andric A.deleteAfterManifest(*CB); 3385fe6060f1SDimitry Andric A.deleteAfterManifest(*FreeCalls.front()); 3386fe6060f1SDimitry Andric 338781ad6265SDimitry Andric SharedMemoryUsed += AllocSize->getZExtValue(); 338881ad6265SDimitry Andric NumBytesMovedToSharedMemory = SharedMemoryUsed; 3389fe6060f1SDimitry Andric Changed = ChangeStatus::CHANGED; 3390fe6060f1SDimitry Andric } 3391fe6060f1SDimitry Andric 3392fe6060f1SDimitry Andric return Changed; 3393fe6060f1SDimitry Andric } 3394fe6060f1SDimitry Andric 3395fe6060f1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 3396bdd1243dSDimitry Andric if (MallocCalls.empty()) 3397bdd1243dSDimitry Andric return indicatePessimisticFixpoint(); 3398fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 3399fe6060f1SDimitry Andric auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; 3400bdd1243dSDimitry Andric if (!RFI.Declaration) 3401bdd1243dSDimitry Andric return ChangeStatus::UNCHANGED; 3402bdd1243dSDimitry Andric 3403fe6060f1SDimitry Andric Function *F = getAnchorScope(); 3404fe6060f1SDimitry Andric 3405fe6060f1SDimitry Andric auto NumMallocCalls = MallocCalls.size(); 3406fe6060f1SDimitry Andric 3407fe6060f1SDimitry Andric // Only consider malloc calls executed by a single thread with a constant. 3408fe6060f1SDimitry Andric for (User *U : RFI.Declaration->users()) { 3409bdd1243dSDimitry Andric if (CallBase *CB = dyn_cast<CallBase>(U)) { 3410bdd1243dSDimitry Andric if (CB->getCaller() != F) 3411bdd1243dSDimitry Andric continue; 3412bdd1243dSDimitry Andric if (!MallocCalls.count(CB)) 3413bdd1243dSDimitry Andric continue; 3414bdd1243dSDimitry Andric if (!isa<ConstantInt>(CB->getArgOperand(0))) { 3415bdd1243dSDimitry Andric MallocCalls.remove(CB); 3416bdd1243dSDimitry Andric continue; 3417bdd1243dSDimitry Andric } 3418*06c3fb27SDimitry Andric const auto *ED = A.getAAFor<AAExecutionDomain>( 3419fe6060f1SDimitry Andric *this, IRPosition::function(*F), DepClassTy::REQUIRED); 3420*06c3fb27SDimitry Andric if (!ED || !ED->isExecutedByInitialThreadOnly(*CB)) 342104eeddc0SDimitry Andric MallocCalls.remove(CB); 3422fe6060f1SDimitry Andric } 3423bdd1243dSDimitry Andric } 3424fe6060f1SDimitry Andric 3425fe6060f1SDimitry Andric findPotentialRemovedFreeCalls(A); 3426fe6060f1SDimitry Andric 3427fe6060f1SDimitry Andric if (NumMallocCalls != MallocCalls.size()) 3428fe6060f1SDimitry Andric return ChangeStatus::CHANGED; 3429fe6060f1SDimitry Andric 3430fe6060f1SDimitry Andric return ChangeStatus::UNCHANGED; 3431fe6060f1SDimitry Andric } 3432fe6060f1SDimitry Andric 3433fe6060f1SDimitry Andric /// Collection of all malloc calls in a function. 343404eeddc0SDimitry Andric SmallSetVector<CallBase *, 4> MallocCalls; 3435fe6060f1SDimitry Andric /// Collection of potentially removed free calls in a function. 3436fe6060f1SDimitry Andric SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls; 343781ad6265SDimitry Andric /// The total amount of shared memory that has been used for HeapToShared. 343881ad6265SDimitry Andric unsigned SharedMemoryUsed = 0; 3439fe6060f1SDimitry Andric }; 3440fe6060f1SDimitry Andric 3441fe6060f1SDimitry Andric struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> { 3442fe6060f1SDimitry Andric using Base = StateWrapper<KernelInfoState, AbstractAttribute>; 3443fe6060f1SDimitry Andric AAKernelInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 3444fe6060f1SDimitry Andric 3445fe6060f1SDimitry Andric /// Statistics are tracked as part of manifest for now. 3446fe6060f1SDimitry Andric void trackStatistics() const override {} 3447fe6060f1SDimitry Andric 3448fe6060f1SDimitry Andric /// See AbstractAttribute::getAsStr() 3449*06c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 3450fe6060f1SDimitry Andric if (!isValidState()) 3451fe6060f1SDimitry Andric return "<invalid>"; 3452fe6060f1SDimitry Andric return std::string(SPMDCompatibilityTracker.isAssumed() ? "SPMD" 3453fe6060f1SDimitry Andric : "generic") + 3454fe6060f1SDimitry Andric std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]" 3455fe6060f1SDimitry Andric : "") + 3456fe6060f1SDimitry Andric std::string(" #PRs: ") + 3457349cc55cSDimitry Andric (ReachedKnownParallelRegions.isValidState() 3458349cc55cSDimitry Andric ? std::to_string(ReachedKnownParallelRegions.size()) 3459349cc55cSDimitry Andric : "<invalid>") + 3460fe6060f1SDimitry Andric ", #Unknown PRs: " + 3461349cc55cSDimitry Andric (ReachedUnknownParallelRegions.isValidState() 3462349cc55cSDimitry Andric ? std::to_string(ReachedUnknownParallelRegions.size()) 3463349cc55cSDimitry Andric : "<invalid>") + 3464349cc55cSDimitry Andric ", #Reaching Kernels: " + 3465349cc55cSDimitry Andric (ReachingKernelEntries.isValidState() 3466349cc55cSDimitry Andric ? std::to_string(ReachingKernelEntries.size()) 3467bdd1243dSDimitry Andric : "<invalid>") + 3468bdd1243dSDimitry Andric ", #ParLevels: " + 3469bdd1243dSDimitry Andric (ParallelLevels.isValidState() 3470bdd1243dSDimitry Andric ? std::to_string(ParallelLevels.size()) 3471349cc55cSDimitry Andric : "<invalid>"); 3472fe6060f1SDimitry Andric } 3473fe6060f1SDimitry Andric 3474fe6060f1SDimitry Andric /// Create an abstract attribute biew for the position \p IRP. 3475fe6060f1SDimitry Andric static AAKernelInfo &createForPosition(const IRPosition &IRP, Attributor &A); 3476fe6060f1SDimitry Andric 3477fe6060f1SDimitry Andric /// See AbstractAttribute::getName() 3478fe6060f1SDimitry Andric const std::string getName() const override { return "AAKernelInfo"; } 3479fe6060f1SDimitry Andric 3480fe6060f1SDimitry Andric /// See AbstractAttribute::getIdAddr() 3481fe6060f1SDimitry Andric const char *getIdAddr() const override { return &ID; } 3482fe6060f1SDimitry Andric 3483fe6060f1SDimitry Andric /// This function should return true if the type of the \p AA is AAKernelInfo 3484fe6060f1SDimitry Andric static bool classof(const AbstractAttribute *AA) { 3485fe6060f1SDimitry Andric return (AA->getIdAddr() == &ID); 3486fe6060f1SDimitry Andric } 3487fe6060f1SDimitry Andric 3488fe6060f1SDimitry Andric static const char ID; 3489fe6060f1SDimitry Andric }; 3490fe6060f1SDimitry Andric 3491fe6060f1SDimitry Andric /// The function kernel info abstract attribute, basically, what can we say 3492fe6060f1SDimitry Andric /// about a function with regards to the KernelInfoState. 3493fe6060f1SDimitry Andric struct AAKernelInfoFunction : AAKernelInfo { 3494fe6060f1SDimitry Andric AAKernelInfoFunction(const IRPosition &IRP, Attributor &A) 3495fe6060f1SDimitry Andric : AAKernelInfo(IRP, A) {} 3496fe6060f1SDimitry Andric 3497349cc55cSDimitry Andric SmallPtrSet<Instruction *, 4> GuardedInstructions; 3498349cc55cSDimitry Andric 3499349cc55cSDimitry Andric SmallPtrSetImpl<Instruction *> &getGuardedInstructions() { 3500349cc55cSDimitry Andric return GuardedInstructions; 3501349cc55cSDimitry Andric } 3502349cc55cSDimitry Andric 3503fe6060f1SDimitry Andric /// See AbstractAttribute::initialize(...). 3504fe6060f1SDimitry Andric void initialize(Attributor &A) override { 3505fe6060f1SDimitry Andric // This is a high-level transform that might change the constant arguments 3506fe6060f1SDimitry Andric // of the init and dinit calls. We need to tell the Attributor about this 3507fe6060f1SDimitry Andric // to avoid other parts using the current constant value for simpliication. 3508fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 3509fe6060f1SDimitry Andric 3510fe6060f1SDimitry Andric Function *Fn = getAnchorScope(); 3511fe6060f1SDimitry Andric 3512fe6060f1SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &InitRFI = 3513fe6060f1SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; 3514fe6060f1SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &DeinitRFI = 3515fe6060f1SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_target_deinit]; 3516fe6060f1SDimitry Andric 3517fe6060f1SDimitry Andric // For kernels we perform more initialization work, first we find the init 3518fe6060f1SDimitry Andric // and deinit calls. 3519fe6060f1SDimitry Andric auto StoreCallBase = [](Use &U, 3520fe6060f1SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &RFI, 3521fe6060f1SDimitry Andric CallBase *&Storage) { 3522fe6060f1SDimitry Andric CallBase *CB = OpenMPOpt::getCallIfRegularCall(U, &RFI); 3523fe6060f1SDimitry Andric assert(CB && 3524fe6060f1SDimitry Andric "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!"); 3525fe6060f1SDimitry Andric assert(!Storage && 3526fe6060f1SDimitry Andric "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!"); 3527fe6060f1SDimitry Andric Storage = CB; 3528fe6060f1SDimitry Andric return false; 3529fe6060f1SDimitry Andric }; 3530fe6060f1SDimitry Andric InitRFI.foreachUse( 3531fe6060f1SDimitry Andric [&](Use &U, Function &) { 3532fe6060f1SDimitry Andric StoreCallBase(U, InitRFI, KernelInitCB); 3533fe6060f1SDimitry Andric return false; 3534fe6060f1SDimitry Andric }, 3535fe6060f1SDimitry Andric Fn); 3536fe6060f1SDimitry Andric DeinitRFI.foreachUse( 3537fe6060f1SDimitry Andric [&](Use &U, Function &) { 3538fe6060f1SDimitry Andric StoreCallBase(U, DeinitRFI, KernelDeinitCB); 3539fe6060f1SDimitry Andric return false; 3540fe6060f1SDimitry Andric }, 3541fe6060f1SDimitry Andric Fn); 3542fe6060f1SDimitry Andric 3543349cc55cSDimitry Andric // Ignore kernels without initializers such as global constructors. 354481ad6265SDimitry Andric if (!KernelInitCB || !KernelDeinitCB) 3545349cc55cSDimitry Andric return; 354681ad6265SDimitry Andric 354781ad6265SDimitry Andric // Add itself to the reaching kernel and set IsKernelEntry. 354881ad6265SDimitry Andric ReachingKernelEntries.insert(Fn); 354981ad6265SDimitry Andric IsKernelEntry = true; 3550fe6060f1SDimitry Andric 3551fe6060f1SDimitry Andric // For kernels we might need to initialize/finalize the IsSPMD state and 3552fe6060f1SDimitry Andric // we need to register a simplification callback so that the Attributor 3553fe6060f1SDimitry Andric // knows the constant arguments to __kmpc_target_init and 3554fe6060f1SDimitry Andric // __kmpc_target_deinit might actually change. 3555fe6060f1SDimitry Andric 3556fe6060f1SDimitry Andric Attributor::SimplifictionCallbackTy StateMachineSimplifyCB = 3557fe6060f1SDimitry Andric [&](const IRPosition &IRP, const AbstractAttribute *AA, 3558bdd1243dSDimitry Andric bool &UsedAssumedInformation) -> std::optional<Value *> { 3559349cc55cSDimitry Andric return nullptr; 3560fe6060f1SDimitry Andric }; 3561fe6060f1SDimitry Andric 3562349cc55cSDimitry Andric Attributor::SimplifictionCallbackTy ModeSimplifyCB = 3563fe6060f1SDimitry Andric [&](const IRPosition &IRP, const AbstractAttribute *AA, 3564bdd1243dSDimitry Andric bool &UsedAssumedInformation) -> std::optional<Value *> { 3565fe6060f1SDimitry Andric // IRP represents the "SPMDCompatibilityTracker" argument of an 3566fe6060f1SDimitry Andric // __kmpc_target_init or 3567fe6060f1SDimitry Andric // __kmpc_target_deinit call. We will answer this one with the internal 3568fe6060f1SDimitry Andric // state. 3569fe6060f1SDimitry Andric if (!SPMDCompatibilityTracker.isValidState()) 3570fe6060f1SDimitry Andric return nullptr; 3571fe6060f1SDimitry Andric if (!SPMDCompatibilityTracker.isAtFixpoint()) { 3572fe6060f1SDimitry Andric if (AA) 3573fe6060f1SDimitry Andric A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); 3574fe6060f1SDimitry Andric UsedAssumedInformation = true; 3575fe6060f1SDimitry Andric } else { 3576fe6060f1SDimitry Andric UsedAssumedInformation = false; 3577fe6060f1SDimitry Andric } 3578349cc55cSDimitry Andric auto *Val = ConstantInt::getSigned( 3579349cc55cSDimitry Andric IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()), 3580349cc55cSDimitry Andric SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD 3581349cc55cSDimitry Andric : OMP_TGT_EXEC_MODE_GENERIC); 3582fe6060f1SDimitry Andric return Val; 3583fe6060f1SDimitry Andric }; 3584fe6060f1SDimitry Andric 3585349cc55cSDimitry Andric constexpr const int InitModeArgNo = 1; 3586349cc55cSDimitry Andric constexpr const int DeinitModeArgNo = 1; 3587fe6060f1SDimitry Andric constexpr const int InitUseStateMachineArgNo = 2; 3588fe6060f1SDimitry Andric A.registerSimplificationCallback( 3589fe6060f1SDimitry Andric IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo), 3590fe6060f1SDimitry Andric StateMachineSimplifyCB); 3591fe6060f1SDimitry Andric A.registerSimplificationCallback( 3592349cc55cSDimitry Andric IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo), 3593349cc55cSDimitry Andric ModeSimplifyCB); 3594fe6060f1SDimitry Andric A.registerSimplificationCallback( 3595349cc55cSDimitry Andric IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo), 3596349cc55cSDimitry Andric ModeSimplifyCB); 3597fe6060f1SDimitry Andric 3598fe6060f1SDimitry Andric // Check if we know we are in SPMD-mode already. 3599349cc55cSDimitry Andric ConstantInt *ModeArg = 3600349cc55cSDimitry Andric dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo)); 3601349cc55cSDimitry Andric if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) 3602fe6060f1SDimitry Andric SPMDCompatibilityTracker.indicateOptimisticFixpoint(); 3603349cc55cSDimitry Andric // This is a generic region but SPMDization is disabled so stop tracking. 3604349cc55cSDimitry Andric else if (DisableOpenMPOptSPMDization) 3605349cc55cSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 3606bdd1243dSDimitry Andric 3607bdd1243dSDimitry Andric // Register virtual uses of functions we might need to preserve. 3608bdd1243dSDimitry Andric auto RegisterVirtualUse = [&](RuntimeFunction RFKind, 3609bdd1243dSDimitry Andric Attributor::VirtualUseCallbackTy &CB) { 3610bdd1243dSDimitry Andric if (!OMPInfoCache.RFIs[RFKind].Declaration) 3611bdd1243dSDimitry Andric return; 3612bdd1243dSDimitry Andric A.registerVirtualUseCallback(*OMPInfoCache.RFIs[RFKind].Declaration, CB); 3613bdd1243dSDimitry Andric }; 3614bdd1243dSDimitry Andric 3615bdd1243dSDimitry Andric // Add a dependence to ensure updates if the state changes. 3616bdd1243dSDimitry Andric auto AddDependence = [](Attributor &A, const AAKernelInfo *KI, 3617bdd1243dSDimitry Andric const AbstractAttribute *QueryingAA) { 3618bdd1243dSDimitry Andric if (QueryingAA) { 3619bdd1243dSDimitry Andric A.recordDependence(*KI, *QueryingAA, DepClassTy::OPTIONAL); 3620bdd1243dSDimitry Andric } 3621bdd1243dSDimitry Andric return true; 3622bdd1243dSDimitry Andric }; 3623bdd1243dSDimitry Andric 3624bdd1243dSDimitry Andric Attributor::VirtualUseCallbackTy CustomStateMachineUseCB = 3625bdd1243dSDimitry Andric [&](Attributor &A, const AbstractAttribute *QueryingAA) { 3626bdd1243dSDimitry Andric // Whenever we create a custom state machine we will insert calls to 3627bdd1243dSDimitry Andric // __kmpc_get_hardware_num_threads_in_block, 3628bdd1243dSDimitry Andric // __kmpc_get_warp_size, 3629bdd1243dSDimitry Andric // __kmpc_barrier_simple_generic, 3630bdd1243dSDimitry Andric // __kmpc_kernel_parallel, and 3631bdd1243dSDimitry Andric // __kmpc_kernel_end_parallel. 3632bdd1243dSDimitry Andric // Not needed if we are on track for SPMDzation. 3633bdd1243dSDimitry Andric if (SPMDCompatibilityTracker.isValidState()) 3634bdd1243dSDimitry Andric return AddDependence(A, this, QueryingAA); 3635bdd1243dSDimitry Andric // Not needed if we can't rewrite due to an invalid state. 3636bdd1243dSDimitry Andric if (!ReachedKnownParallelRegions.isValidState()) 3637bdd1243dSDimitry Andric return AddDependence(A, this, QueryingAA); 3638bdd1243dSDimitry Andric return false; 3639bdd1243dSDimitry Andric }; 3640bdd1243dSDimitry Andric 3641bdd1243dSDimitry Andric // Not needed if we are pre-runtime merge. 3642bdd1243dSDimitry Andric if (!KernelInitCB->getCalledFunction()->isDeclaration()) { 3643bdd1243dSDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_get_hardware_num_threads_in_block, 3644bdd1243dSDimitry Andric CustomStateMachineUseCB); 3645bdd1243dSDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_get_warp_size, CustomStateMachineUseCB); 3646bdd1243dSDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_barrier_simple_generic, 3647bdd1243dSDimitry Andric CustomStateMachineUseCB); 3648bdd1243dSDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_kernel_parallel, 3649bdd1243dSDimitry Andric CustomStateMachineUseCB); 3650bdd1243dSDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_kernel_end_parallel, 3651bdd1243dSDimitry Andric CustomStateMachineUseCB); 3652bdd1243dSDimitry Andric } 3653bdd1243dSDimitry Andric 3654bdd1243dSDimitry Andric // If we do not perform SPMDzation we do not need the virtual uses below. 3655bdd1243dSDimitry Andric if (SPMDCompatibilityTracker.isAtFixpoint()) 3656bdd1243dSDimitry Andric return; 3657bdd1243dSDimitry Andric 3658bdd1243dSDimitry Andric Attributor::VirtualUseCallbackTy HWThreadIdUseCB = 3659bdd1243dSDimitry Andric [&](Attributor &A, const AbstractAttribute *QueryingAA) { 3660bdd1243dSDimitry Andric // Whenever we perform SPMDzation we will insert 3661bdd1243dSDimitry Andric // __kmpc_get_hardware_thread_id_in_block calls. 3662bdd1243dSDimitry Andric if (!SPMDCompatibilityTracker.isValidState()) 3663bdd1243dSDimitry Andric return AddDependence(A, this, QueryingAA); 3664bdd1243dSDimitry Andric return false; 3665bdd1243dSDimitry Andric }; 3666bdd1243dSDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_get_hardware_thread_id_in_block, 3667bdd1243dSDimitry Andric HWThreadIdUseCB); 3668bdd1243dSDimitry Andric 3669bdd1243dSDimitry Andric Attributor::VirtualUseCallbackTy SPMDBarrierUseCB = 3670bdd1243dSDimitry Andric [&](Attributor &A, const AbstractAttribute *QueryingAA) { 3671bdd1243dSDimitry Andric // Whenever we perform SPMDzation with guarding we will insert 3672bdd1243dSDimitry Andric // __kmpc_simple_barrier_spmd calls. If SPMDzation failed, there is 3673bdd1243dSDimitry Andric // nothing to guard, or there are no parallel regions, we don't need 3674bdd1243dSDimitry Andric // the calls. 3675bdd1243dSDimitry Andric if (!SPMDCompatibilityTracker.isValidState()) 3676bdd1243dSDimitry Andric return AddDependence(A, this, QueryingAA); 3677bdd1243dSDimitry Andric if (SPMDCompatibilityTracker.empty()) 3678bdd1243dSDimitry Andric return AddDependence(A, this, QueryingAA); 3679bdd1243dSDimitry Andric if (!mayContainParallelRegion()) 3680bdd1243dSDimitry Andric return AddDependence(A, this, QueryingAA); 3681bdd1243dSDimitry Andric return false; 3682bdd1243dSDimitry Andric }; 3683bdd1243dSDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_barrier_simple_spmd, SPMDBarrierUseCB); 3684349cc55cSDimitry Andric } 3685349cc55cSDimitry Andric 3686349cc55cSDimitry Andric /// Sanitize the string \p S such that it is a suitable global symbol name. 3687349cc55cSDimitry Andric static std::string sanitizeForGlobalName(std::string S) { 3688349cc55cSDimitry Andric std::replace_if( 3689349cc55cSDimitry Andric S.begin(), S.end(), 3690349cc55cSDimitry Andric [](const char C) { 3691349cc55cSDimitry Andric return !((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || 3692349cc55cSDimitry Andric (C >= '0' && C <= '9') || C == '_'); 3693349cc55cSDimitry Andric }, 3694349cc55cSDimitry Andric '.'); 3695349cc55cSDimitry Andric return S; 3696fe6060f1SDimitry Andric } 3697fe6060f1SDimitry Andric 3698fe6060f1SDimitry Andric /// Modify the IR based on the KernelInfoState as the fixpoint iteration is 3699fe6060f1SDimitry Andric /// finished now. 3700fe6060f1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 3701fe6060f1SDimitry Andric // If we are not looking at a kernel with __kmpc_target_init and 3702fe6060f1SDimitry Andric // __kmpc_target_deinit call we cannot actually manifest the information. 3703fe6060f1SDimitry Andric if (!KernelInitCB || !KernelDeinitCB) 3704fe6060f1SDimitry Andric return ChangeStatus::UNCHANGED; 3705fe6060f1SDimitry Andric 3706bdd1243dSDimitry Andric /// Insert nested Parallelism global variable 3707bdd1243dSDimitry Andric Function *Kernel = getAnchorScope(); 3708bdd1243dSDimitry Andric Module &M = *Kernel->getParent(); 3709bdd1243dSDimitry Andric Type *Int8Ty = Type::getInt8Ty(M.getContext()); 3710*06c3fb27SDimitry Andric auto *GV = new GlobalVariable( 3711*06c3fb27SDimitry Andric M, Int8Ty, /* isConstant */ true, GlobalValue::WeakAnyLinkage, 3712bdd1243dSDimitry Andric ConstantInt::get(Int8Ty, NestedParallelism ? 1 : 0), 3713bdd1243dSDimitry Andric Kernel->getName() + "_nested_parallelism"); 3714*06c3fb27SDimitry Andric GV->setVisibility(GlobalValue::HiddenVisibility); 3715bdd1243dSDimitry Andric 3716fe6060f1SDimitry Andric // If we can we change the execution mode to SPMD-mode otherwise we build a 3717fe6060f1SDimitry Andric // custom state machine. 3718349cc55cSDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED; 3719bdd1243dSDimitry Andric if (!changeToSPMDMode(A, Changed)) { 3720bdd1243dSDimitry Andric if (!KernelInitCB->getCalledFunction()->isDeclaration()) 3721349cc55cSDimitry Andric return buildCustomStateMachine(A); 3722bdd1243dSDimitry Andric } 3723fe6060f1SDimitry Andric 3724349cc55cSDimitry Andric return Changed; 3725fe6060f1SDimitry Andric } 3726fe6060f1SDimitry Andric 3727bdd1243dSDimitry Andric void insertInstructionGuardsHelper(Attributor &A) { 3728fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 3729fe6060f1SDimitry Andric 3730349cc55cSDimitry Andric auto CreateGuardedRegion = [&](Instruction *RegionStartI, 3731349cc55cSDimitry Andric Instruction *RegionEndI) { 3732349cc55cSDimitry Andric LoopInfo *LI = nullptr; 3733349cc55cSDimitry Andric DominatorTree *DT = nullptr; 3734349cc55cSDimitry Andric MemorySSAUpdater *MSU = nullptr; 3735349cc55cSDimitry Andric using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3736349cc55cSDimitry Andric 3737349cc55cSDimitry Andric BasicBlock *ParentBB = RegionStartI->getParent(); 3738349cc55cSDimitry Andric Function *Fn = ParentBB->getParent(); 3739349cc55cSDimitry Andric Module &M = *Fn->getParent(); 3740349cc55cSDimitry Andric 3741349cc55cSDimitry Andric // Create all the blocks and logic. 3742349cc55cSDimitry Andric // ParentBB: 3743349cc55cSDimitry Andric // goto RegionCheckTidBB 3744349cc55cSDimitry Andric // RegionCheckTidBB: 3745349cc55cSDimitry Andric // Tid = __kmpc_hardware_thread_id() 3746349cc55cSDimitry Andric // if (Tid != 0) 3747349cc55cSDimitry Andric // goto RegionBarrierBB 3748349cc55cSDimitry Andric // RegionStartBB: 3749349cc55cSDimitry Andric // <execute instructions guarded> 3750349cc55cSDimitry Andric // goto RegionEndBB 3751349cc55cSDimitry Andric // RegionEndBB: 3752349cc55cSDimitry Andric // <store escaping values to shared mem> 3753349cc55cSDimitry Andric // goto RegionBarrierBB 3754349cc55cSDimitry Andric // RegionBarrierBB: 3755349cc55cSDimitry Andric // __kmpc_simple_barrier_spmd() 3756349cc55cSDimitry Andric // // second barrier is omitted if lacking escaping values. 3757349cc55cSDimitry Andric // <load escaping values from shared mem> 3758349cc55cSDimitry Andric // __kmpc_simple_barrier_spmd() 3759349cc55cSDimitry Andric // goto RegionExitBB 3760349cc55cSDimitry Andric // RegionExitBB: 3761349cc55cSDimitry Andric // <execute rest of instructions> 3762349cc55cSDimitry Andric 3763349cc55cSDimitry Andric BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(), 3764349cc55cSDimitry Andric DT, LI, MSU, "region.guarded.end"); 3765349cc55cSDimitry Andric BasicBlock *RegionBarrierBB = 3766349cc55cSDimitry Andric SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI, 3767349cc55cSDimitry Andric MSU, "region.barrier"); 3768349cc55cSDimitry Andric BasicBlock *RegionExitBB = 3769349cc55cSDimitry Andric SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(), 3770349cc55cSDimitry Andric DT, LI, MSU, "region.exit"); 3771349cc55cSDimitry Andric BasicBlock *RegionStartBB = 3772349cc55cSDimitry Andric SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded"); 3773349cc55cSDimitry Andric 3774349cc55cSDimitry Andric assert(ParentBB->getUniqueSuccessor() == RegionStartBB && 3775349cc55cSDimitry Andric "Expected a different CFG"); 3776349cc55cSDimitry Andric 3777349cc55cSDimitry Andric BasicBlock *RegionCheckTidBB = SplitBlock( 3778349cc55cSDimitry Andric ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid"); 3779349cc55cSDimitry Andric 3780349cc55cSDimitry Andric // Register basic blocks with the Attributor. 3781349cc55cSDimitry Andric A.registerManifestAddedBasicBlock(*RegionEndBB); 3782349cc55cSDimitry Andric A.registerManifestAddedBasicBlock(*RegionBarrierBB); 3783349cc55cSDimitry Andric A.registerManifestAddedBasicBlock(*RegionExitBB); 3784349cc55cSDimitry Andric A.registerManifestAddedBasicBlock(*RegionStartBB); 3785349cc55cSDimitry Andric A.registerManifestAddedBasicBlock(*RegionCheckTidBB); 3786349cc55cSDimitry Andric 3787349cc55cSDimitry Andric bool HasBroadcastValues = false; 3788349cc55cSDimitry Andric // Find escaping outputs from the guarded region to outside users and 3789349cc55cSDimitry Andric // broadcast their values to them. 3790349cc55cSDimitry Andric for (Instruction &I : *RegionStartBB) { 3791349cc55cSDimitry Andric SmallPtrSet<Instruction *, 4> OutsideUsers; 3792349cc55cSDimitry Andric for (User *Usr : I.users()) { 3793349cc55cSDimitry Andric Instruction &UsrI = *cast<Instruction>(Usr); 3794349cc55cSDimitry Andric if (UsrI.getParent() != RegionStartBB) 3795349cc55cSDimitry Andric OutsideUsers.insert(&UsrI); 3796349cc55cSDimitry Andric } 3797349cc55cSDimitry Andric 3798349cc55cSDimitry Andric if (OutsideUsers.empty()) 3799349cc55cSDimitry Andric continue; 3800349cc55cSDimitry Andric 3801349cc55cSDimitry Andric HasBroadcastValues = true; 3802349cc55cSDimitry Andric 3803349cc55cSDimitry Andric // Emit a global variable in shared memory to store the broadcasted 3804349cc55cSDimitry Andric // value. 3805349cc55cSDimitry Andric auto *SharedMem = new GlobalVariable( 3806349cc55cSDimitry Andric M, I.getType(), /* IsConstant */ false, 3807349cc55cSDimitry Andric GlobalValue::InternalLinkage, UndefValue::get(I.getType()), 3808349cc55cSDimitry Andric sanitizeForGlobalName( 3809349cc55cSDimitry Andric (I.getName() + ".guarded.output.alloc").str()), 3810349cc55cSDimitry Andric nullptr, GlobalValue::NotThreadLocal, 3811349cc55cSDimitry Andric static_cast<unsigned>(AddressSpace::Shared)); 3812349cc55cSDimitry Andric 3813349cc55cSDimitry Andric // Emit a store instruction to update the value. 3814349cc55cSDimitry Andric new StoreInst(&I, SharedMem, RegionEndBB->getTerminator()); 3815349cc55cSDimitry Andric 3816349cc55cSDimitry Andric LoadInst *LoadI = new LoadInst(I.getType(), SharedMem, 3817349cc55cSDimitry Andric I.getName() + ".guarded.output.load", 3818349cc55cSDimitry Andric RegionBarrierBB->getTerminator()); 3819349cc55cSDimitry Andric 3820349cc55cSDimitry Andric // Emit a load instruction and replace uses of the output value. 3821349cc55cSDimitry Andric for (Instruction *UsrI : OutsideUsers) 3822349cc55cSDimitry Andric UsrI->replaceUsesOfWith(&I, LoadI); 3823349cc55cSDimitry Andric } 3824349cc55cSDimitry Andric 3825349cc55cSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 3826349cc55cSDimitry Andric 3827349cc55cSDimitry Andric // Go to tid check BB in ParentBB. 3828349cc55cSDimitry Andric const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); 3829349cc55cSDimitry Andric ParentBB->getTerminator()->eraseFromParent(); 3830349cc55cSDimitry Andric OpenMPIRBuilder::LocationDescription Loc( 3831349cc55cSDimitry Andric InsertPointTy(ParentBB, ParentBB->end()), DL); 3832349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.updateToLocation(Loc); 383304eeddc0SDimitry Andric uint32_t SrcLocStrSize; 383404eeddc0SDimitry Andric auto *SrcLocStr = 383504eeddc0SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc, SrcLocStrSize); 383604eeddc0SDimitry Andric Value *Ident = 383704eeddc0SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize); 3838349cc55cSDimitry Andric BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL); 3839349cc55cSDimitry Andric 3840349cc55cSDimitry Andric // Add check for Tid in RegionCheckTidBB 3841349cc55cSDimitry Andric RegionCheckTidBB->getTerminator()->eraseFromParent(); 3842349cc55cSDimitry Andric OpenMPIRBuilder::LocationDescription LocRegionCheckTid( 3843349cc55cSDimitry Andric InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL); 3844349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid); 3845349cc55cSDimitry Andric FunctionCallee HardwareTidFn = 3846349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( 3847349cc55cSDimitry Andric M, OMPRTL___kmpc_get_hardware_thread_id_in_block); 384804eeddc0SDimitry Andric CallInst *Tid = 3849349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {}); 385004eeddc0SDimitry Andric Tid->setDebugLoc(DL); 385104eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(HardwareTidFn, Tid); 3852349cc55cSDimitry Andric Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid); 3853349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.Builder 3854349cc55cSDimitry Andric .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB) 3855349cc55cSDimitry Andric ->setDebugLoc(DL); 3856349cc55cSDimitry Andric 3857349cc55cSDimitry Andric // First barrier for synchronization, ensures main thread has updated 3858349cc55cSDimitry Andric // values. 3859349cc55cSDimitry Andric FunctionCallee BarrierFn = 3860349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( 3861349cc55cSDimitry Andric M, OMPRTL___kmpc_barrier_simple_spmd); 3862349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy( 3863349cc55cSDimitry Andric RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt())); 386404eeddc0SDimitry Andric CallInst *Barrier = 386504eeddc0SDimitry Andric OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid}); 386604eeddc0SDimitry Andric Barrier->setDebugLoc(DL); 386704eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(BarrierFn, Barrier); 3868349cc55cSDimitry Andric 3869349cc55cSDimitry Andric // Second barrier ensures workers have read broadcast values. 387004eeddc0SDimitry Andric if (HasBroadcastValues) { 387104eeddc0SDimitry Andric CallInst *Barrier = CallInst::Create(BarrierFn, {Ident, Tid}, "", 387204eeddc0SDimitry Andric RegionBarrierBB->getTerminator()); 387304eeddc0SDimitry Andric Barrier->setDebugLoc(DL); 387404eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(BarrierFn, Barrier); 387504eeddc0SDimitry Andric } 3876349cc55cSDimitry Andric }; 3877349cc55cSDimitry Andric 3878349cc55cSDimitry Andric auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; 3879349cc55cSDimitry Andric SmallPtrSet<BasicBlock *, 8> Visited; 3880349cc55cSDimitry Andric for (Instruction *GuardedI : SPMDCompatibilityTracker) { 3881349cc55cSDimitry Andric BasicBlock *BB = GuardedI->getParent(); 3882349cc55cSDimitry Andric if (!Visited.insert(BB).second) 3883349cc55cSDimitry Andric continue; 3884349cc55cSDimitry Andric 3885349cc55cSDimitry Andric SmallVector<std::pair<Instruction *, Instruction *>> Reorders; 3886349cc55cSDimitry Andric Instruction *LastEffect = nullptr; 3887349cc55cSDimitry Andric BasicBlock::reverse_iterator IP = BB->rbegin(), IPEnd = BB->rend(); 3888349cc55cSDimitry Andric while (++IP != IPEnd) { 3889349cc55cSDimitry Andric if (!IP->mayHaveSideEffects() && !IP->mayReadFromMemory()) 3890349cc55cSDimitry Andric continue; 3891349cc55cSDimitry Andric Instruction *I = &*IP; 3892349cc55cSDimitry Andric if (OpenMPOpt::getCallIfRegularCall(*I, &AllocSharedRFI)) 3893349cc55cSDimitry Andric continue; 3894349cc55cSDimitry Andric if (!I->user_empty() || !SPMDCompatibilityTracker.contains(I)) { 3895349cc55cSDimitry Andric LastEffect = nullptr; 3896349cc55cSDimitry Andric continue; 3897349cc55cSDimitry Andric } 3898349cc55cSDimitry Andric if (LastEffect) 3899349cc55cSDimitry Andric Reorders.push_back({I, LastEffect}); 3900349cc55cSDimitry Andric LastEffect = &*IP; 3901349cc55cSDimitry Andric } 3902349cc55cSDimitry Andric for (auto &Reorder : Reorders) 3903349cc55cSDimitry Andric Reorder.first->moveBefore(Reorder.second); 3904349cc55cSDimitry Andric } 3905349cc55cSDimitry Andric 3906349cc55cSDimitry Andric SmallVector<std::pair<Instruction *, Instruction *>, 4> GuardedRegions; 3907349cc55cSDimitry Andric 3908349cc55cSDimitry Andric for (Instruction *GuardedI : SPMDCompatibilityTracker) { 3909349cc55cSDimitry Andric BasicBlock *BB = GuardedI->getParent(); 3910349cc55cSDimitry Andric auto *CalleeAA = A.lookupAAFor<AAKernelInfo>( 3911349cc55cSDimitry Andric IRPosition::function(*GuardedI->getFunction()), nullptr, 3912349cc55cSDimitry Andric DepClassTy::NONE); 3913349cc55cSDimitry Andric assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo"); 3914349cc55cSDimitry Andric auto &CalleeAAFunction = *cast<AAKernelInfoFunction>(CalleeAA); 3915349cc55cSDimitry Andric // Continue if instruction is already guarded. 3916349cc55cSDimitry Andric if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI)) 3917349cc55cSDimitry Andric continue; 3918349cc55cSDimitry Andric 3919349cc55cSDimitry Andric Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr; 3920349cc55cSDimitry Andric for (Instruction &I : *BB) { 3921349cc55cSDimitry Andric // If instruction I needs to be guarded update the guarded region 3922349cc55cSDimitry Andric // bounds. 3923349cc55cSDimitry Andric if (SPMDCompatibilityTracker.contains(&I)) { 3924349cc55cSDimitry Andric CalleeAAFunction.getGuardedInstructions().insert(&I); 3925349cc55cSDimitry Andric if (GuardedRegionStart) 3926349cc55cSDimitry Andric GuardedRegionEnd = &I; 3927349cc55cSDimitry Andric else 3928349cc55cSDimitry Andric GuardedRegionStart = GuardedRegionEnd = &I; 3929349cc55cSDimitry Andric 3930349cc55cSDimitry Andric continue; 3931349cc55cSDimitry Andric } 3932349cc55cSDimitry Andric 3933349cc55cSDimitry Andric // Instruction I does not need guarding, store 3934349cc55cSDimitry Andric // any region found and reset bounds. 3935349cc55cSDimitry Andric if (GuardedRegionStart) { 3936349cc55cSDimitry Andric GuardedRegions.push_back( 3937349cc55cSDimitry Andric std::make_pair(GuardedRegionStart, GuardedRegionEnd)); 3938349cc55cSDimitry Andric GuardedRegionStart = nullptr; 3939349cc55cSDimitry Andric GuardedRegionEnd = nullptr; 3940349cc55cSDimitry Andric } 3941349cc55cSDimitry Andric } 3942349cc55cSDimitry Andric } 3943349cc55cSDimitry Andric 3944349cc55cSDimitry Andric for (auto &GR : GuardedRegions) 3945349cc55cSDimitry Andric CreateGuardedRegion(GR.first, GR.second); 3946bdd1243dSDimitry Andric } 3947bdd1243dSDimitry Andric 3948bdd1243dSDimitry Andric void forceSingleThreadPerWorkgroupHelper(Attributor &A) { 3949bdd1243dSDimitry Andric // Only allow 1 thread per workgroup to continue executing the user code. 3950bdd1243dSDimitry Andric // 3951bdd1243dSDimitry Andric // InitCB = __kmpc_target_init(...) 3952bdd1243dSDimitry Andric // ThreadIdInBlock = __kmpc_get_hardware_thread_id_in_block(); 3953bdd1243dSDimitry Andric // if (ThreadIdInBlock != 0) return; 3954bdd1243dSDimitry Andric // UserCode: 3955bdd1243dSDimitry Andric // // user code 3956bdd1243dSDimitry Andric // 3957bdd1243dSDimitry Andric auto &Ctx = getAnchorValue().getContext(); 3958bdd1243dSDimitry Andric Function *Kernel = getAssociatedFunction(); 3959bdd1243dSDimitry Andric assert(Kernel && "Expected an associated function!"); 3960bdd1243dSDimitry Andric 3961bdd1243dSDimitry Andric // Create block for user code to branch to from initial block. 3962bdd1243dSDimitry Andric BasicBlock *InitBB = KernelInitCB->getParent(); 3963bdd1243dSDimitry Andric BasicBlock *UserCodeBB = InitBB->splitBasicBlock( 3964bdd1243dSDimitry Andric KernelInitCB->getNextNode(), "main.thread.user_code"); 3965bdd1243dSDimitry Andric BasicBlock *ReturnBB = 3966bdd1243dSDimitry Andric BasicBlock::Create(Ctx, "exit.threads", Kernel, UserCodeBB); 3967bdd1243dSDimitry Andric 3968bdd1243dSDimitry Andric // Register blocks with attributor: 3969bdd1243dSDimitry Andric A.registerManifestAddedBasicBlock(*InitBB); 3970bdd1243dSDimitry Andric A.registerManifestAddedBasicBlock(*UserCodeBB); 3971bdd1243dSDimitry Andric A.registerManifestAddedBasicBlock(*ReturnBB); 3972bdd1243dSDimitry Andric 3973bdd1243dSDimitry Andric // Debug location: 3974bdd1243dSDimitry Andric const DebugLoc &DLoc = KernelInitCB->getDebugLoc(); 3975bdd1243dSDimitry Andric ReturnInst::Create(Ctx, ReturnBB)->setDebugLoc(DLoc); 3976bdd1243dSDimitry Andric InitBB->getTerminator()->eraseFromParent(); 3977bdd1243dSDimitry Andric 3978bdd1243dSDimitry Andric // Prepare call to OMPRTL___kmpc_get_hardware_thread_id_in_block. 3979bdd1243dSDimitry Andric Module &M = *Kernel->getParent(); 3980bdd1243dSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 3981bdd1243dSDimitry Andric FunctionCallee ThreadIdInBlockFn = 3982bdd1243dSDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( 3983bdd1243dSDimitry Andric M, OMPRTL___kmpc_get_hardware_thread_id_in_block); 3984bdd1243dSDimitry Andric 3985bdd1243dSDimitry Andric // Get thread ID in block. 3986bdd1243dSDimitry Andric CallInst *ThreadIdInBlock = 3987bdd1243dSDimitry Andric CallInst::Create(ThreadIdInBlockFn, "thread_id.in.block", InitBB); 3988bdd1243dSDimitry Andric OMPInfoCache.setCallingConvention(ThreadIdInBlockFn, ThreadIdInBlock); 3989bdd1243dSDimitry Andric ThreadIdInBlock->setDebugLoc(DLoc); 3990bdd1243dSDimitry Andric 3991bdd1243dSDimitry Andric // Eliminate all threads in the block with ID not equal to 0: 3992bdd1243dSDimitry Andric Instruction *IsMainThread = 3993bdd1243dSDimitry Andric ICmpInst::Create(ICmpInst::ICmp, CmpInst::ICMP_NE, ThreadIdInBlock, 3994bdd1243dSDimitry Andric ConstantInt::get(ThreadIdInBlock->getType(), 0), 3995bdd1243dSDimitry Andric "thread.is_main", InitBB); 3996bdd1243dSDimitry Andric IsMainThread->setDebugLoc(DLoc); 3997bdd1243dSDimitry Andric BranchInst::Create(ReturnBB, UserCodeBB, IsMainThread, InitBB); 3998bdd1243dSDimitry Andric } 3999bdd1243dSDimitry Andric 4000bdd1243dSDimitry Andric bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) { 4001bdd1243dSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 4002bdd1243dSDimitry Andric 40031ac55f4cSDimitry Andric // We cannot change to SPMD mode if the runtime functions aren't availible. 40041ac55f4cSDimitry Andric if (!OMPInfoCache.runtimeFnsAvailable( 40051ac55f4cSDimitry Andric {OMPRTL___kmpc_get_hardware_thread_id_in_block, 40061ac55f4cSDimitry Andric OMPRTL___kmpc_barrier_simple_spmd})) 40071ac55f4cSDimitry Andric return false; 40081ac55f4cSDimitry Andric 4009bdd1243dSDimitry Andric if (!SPMDCompatibilityTracker.isAssumed()) { 4010bdd1243dSDimitry Andric for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) { 4011bdd1243dSDimitry Andric if (!NonCompatibleI) 4012bdd1243dSDimitry Andric continue; 4013bdd1243dSDimitry Andric 4014bdd1243dSDimitry Andric // Skip diagnostics on calls to known OpenMP runtime functions for now. 4015bdd1243dSDimitry Andric if (auto *CB = dyn_cast<CallBase>(NonCompatibleI)) 4016bdd1243dSDimitry Andric if (OMPInfoCache.RTLFunctions.contains(CB->getCalledFunction())) 4017bdd1243dSDimitry Andric continue; 4018bdd1243dSDimitry Andric 4019bdd1243dSDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) { 4020bdd1243dSDimitry Andric ORA << "Value has potential side effects preventing SPMD-mode " 4021bdd1243dSDimitry Andric "execution"; 4022bdd1243dSDimitry Andric if (isa<CallBase>(NonCompatibleI)) { 4023bdd1243dSDimitry Andric ORA << ". Add `__attribute__((assume(\"ompx_spmd_amenable\")))` to " 4024bdd1243dSDimitry Andric "the called function to override"; 4025bdd1243dSDimitry Andric } 4026bdd1243dSDimitry Andric return ORA << "."; 4027bdd1243dSDimitry Andric }; 4028bdd1243dSDimitry Andric A.emitRemark<OptimizationRemarkAnalysis>(NonCompatibleI, "OMP121", 4029bdd1243dSDimitry Andric Remark); 4030bdd1243dSDimitry Andric 4031bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "SPMD-incompatible side-effect: " 4032bdd1243dSDimitry Andric << *NonCompatibleI << "\n"); 4033bdd1243dSDimitry Andric } 4034bdd1243dSDimitry Andric 4035bdd1243dSDimitry Andric return false; 4036bdd1243dSDimitry Andric } 4037bdd1243dSDimitry Andric 4038bdd1243dSDimitry Andric // Get the actual kernel, could be the caller of the anchor scope if we have 4039bdd1243dSDimitry Andric // a debug wrapper. 4040bdd1243dSDimitry Andric Function *Kernel = getAnchorScope(); 4041bdd1243dSDimitry Andric if (Kernel->hasLocalLinkage()) { 4042bdd1243dSDimitry Andric assert(Kernel->hasOneUse() && "Unexpected use of debug kernel wrapper."); 4043bdd1243dSDimitry Andric auto *CB = cast<CallBase>(Kernel->user_back()); 4044bdd1243dSDimitry Andric Kernel = CB->getCaller(); 4045bdd1243dSDimitry Andric } 4046*06c3fb27SDimitry Andric assert(omp::isKernel(*Kernel) && "Expected kernel function!"); 4047bdd1243dSDimitry Andric 4048bdd1243dSDimitry Andric // Check if the kernel is already in SPMD mode, if so, return success. 4049bdd1243dSDimitry Andric GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable( 4050bdd1243dSDimitry Andric (Kernel->getName() + "_exec_mode").str()); 4051bdd1243dSDimitry Andric assert(ExecMode && "Kernel without exec mode?"); 4052bdd1243dSDimitry Andric assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!"); 4053bdd1243dSDimitry Andric 4054bdd1243dSDimitry Andric // Set the global exec mode flag to indicate SPMD-Generic mode. 4055bdd1243dSDimitry Andric assert(isa<ConstantInt>(ExecMode->getInitializer()) && 4056bdd1243dSDimitry Andric "ExecMode is not an integer!"); 4057bdd1243dSDimitry Andric const int8_t ExecModeVal = 4058bdd1243dSDimitry Andric cast<ConstantInt>(ExecMode->getInitializer())->getSExtValue(); 4059bdd1243dSDimitry Andric if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC) 4060bdd1243dSDimitry Andric return true; 4061bdd1243dSDimitry Andric 4062bdd1243dSDimitry Andric // We will now unconditionally modify the IR, indicate a change. 4063bdd1243dSDimitry Andric Changed = ChangeStatus::CHANGED; 4064bdd1243dSDimitry Andric 4065bdd1243dSDimitry Andric // Do not use instruction guards when no parallel is present inside 4066bdd1243dSDimitry Andric // the target region. 4067bdd1243dSDimitry Andric if (mayContainParallelRegion()) 4068bdd1243dSDimitry Andric insertInstructionGuardsHelper(A); 4069bdd1243dSDimitry Andric else 4070bdd1243dSDimitry Andric forceSingleThreadPerWorkgroupHelper(A); 4071349cc55cSDimitry Andric 4072349cc55cSDimitry Andric // Adjust the global exec mode flag that tells the runtime what mode this 4073349cc55cSDimitry Andric // kernel is executed in. 4074349cc55cSDimitry Andric assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && 4075349cc55cSDimitry Andric "Initially non-SPMD kernel has SPMD exec mode!"); 4076fe6060f1SDimitry Andric ExecMode->setInitializer( 4077349cc55cSDimitry Andric ConstantInt::get(ExecMode->getInitializer()->getType(), 4078349cc55cSDimitry Andric ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD)); 4079fe6060f1SDimitry Andric 4080fe6060f1SDimitry Andric // Next rewrite the init and deinit calls to indicate we use SPMD-mode now. 4081349cc55cSDimitry Andric const int InitModeArgNo = 1; 4082349cc55cSDimitry Andric const int DeinitModeArgNo = 1; 4083fe6060f1SDimitry Andric const int InitUseStateMachineArgNo = 2; 4084fe6060f1SDimitry Andric 4085fe6060f1SDimitry Andric auto &Ctx = getAnchorValue().getContext(); 4086349cc55cSDimitry Andric A.changeUseAfterManifest( 4087349cc55cSDimitry Andric KernelInitCB->getArgOperandUse(InitModeArgNo), 4088349cc55cSDimitry Andric *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), 4089349cc55cSDimitry Andric OMP_TGT_EXEC_MODE_SPMD)); 4090fe6060f1SDimitry Andric A.changeUseAfterManifest( 4091fe6060f1SDimitry Andric KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), 409204eeddc0SDimitry Andric *ConstantInt::getBool(Ctx, false)); 4093fe6060f1SDimitry Andric A.changeUseAfterManifest( 4094349cc55cSDimitry Andric KernelDeinitCB->getArgOperandUse(DeinitModeArgNo), 4095349cc55cSDimitry Andric *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), 4096349cc55cSDimitry Andric OMP_TGT_EXEC_MODE_SPMD)); 4097fe6060f1SDimitry Andric 4098fe6060f1SDimitry Andric ++NumOpenMPTargetRegionKernelsSPMD; 4099fe6060f1SDimitry Andric 4100fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemark OR) { 4101fe6060f1SDimitry Andric return OR << "Transformed generic-mode kernel to SPMD-mode."; 4102fe6060f1SDimitry Andric }; 4103fe6060f1SDimitry Andric A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP120", Remark); 4104fe6060f1SDimitry Andric return true; 4105fe6060f1SDimitry Andric }; 4106fe6060f1SDimitry Andric 4107fe6060f1SDimitry Andric ChangeStatus buildCustomStateMachine(Attributor &A) { 4108349cc55cSDimitry Andric // If we have disabled state machine rewrites, don't make a custom one 4109349cc55cSDimitry Andric if (DisableOpenMPOptStateMachineRewrite) 4110349cc55cSDimitry Andric return ChangeStatus::UNCHANGED; 4111fe6060f1SDimitry Andric 4112349cc55cSDimitry Andric // Don't rewrite the state machine if we are not in a valid state. 4113349cc55cSDimitry Andric if (!ReachedKnownParallelRegions.isValidState()) 4114349cc55cSDimitry Andric return ChangeStatus::UNCHANGED; 4115349cc55cSDimitry Andric 41161ac55f4cSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 41171ac55f4cSDimitry Andric if (!OMPInfoCache.runtimeFnsAvailable( 41181ac55f4cSDimitry Andric {OMPRTL___kmpc_get_hardware_num_threads_in_block, 41191ac55f4cSDimitry Andric OMPRTL___kmpc_get_warp_size, OMPRTL___kmpc_barrier_simple_generic, 41201ac55f4cSDimitry Andric OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel})) 41211ac55f4cSDimitry Andric return ChangeStatus::UNCHANGED; 41221ac55f4cSDimitry Andric 4123349cc55cSDimitry Andric const int InitModeArgNo = 1; 4124fe6060f1SDimitry Andric const int InitUseStateMachineArgNo = 2; 4125fe6060f1SDimitry Andric 4126fe6060f1SDimitry Andric // Check if the current configuration is non-SPMD and generic state machine. 4127fe6060f1SDimitry Andric // If we already have SPMD mode or a custom state machine we do not need to 4128fe6060f1SDimitry Andric // go any further. If it is anything but a constant something is weird and 4129fe6060f1SDimitry Andric // we give up. 4130fe6060f1SDimitry Andric ConstantInt *UseStateMachine = dyn_cast<ConstantInt>( 4131fe6060f1SDimitry Andric KernelInitCB->getArgOperand(InitUseStateMachineArgNo)); 4132349cc55cSDimitry Andric ConstantInt *Mode = 4133349cc55cSDimitry Andric dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo)); 4134fe6060f1SDimitry Andric 4135fe6060f1SDimitry Andric // If we are stuck with generic mode, try to create a custom device (=GPU) 4136fe6060f1SDimitry Andric // state machine which is specialized for the parallel regions that are 4137fe6060f1SDimitry Andric // reachable by the kernel. 4138349cc55cSDimitry Andric if (!UseStateMachine || UseStateMachine->isZero() || !Mode || 4139349cc55cSDimitry Andric (Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) 4140fe6060f1SDimitry Andric return ChangeStatus::UNCHANGED; 4141fe6060f1SDimitry Andric 4142fe6060f1SDimitry Andric // If not SPMD mode, indicate we use a custom state machine now. 4143fe6060f1SDimitry Andric auto &Ctx = getAnchorValue().getContext(); 414404eeddc0SDimitry Andric auto *FalseVal = ConstantInt::getBool(Ctx, false); 4145fe6060f1SDimitry Andric A.changeUseAfterManifest( 4146fe6060f1SDimitry Andric KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal); 4147fe6060f1SDimitry Andric 4148fe6060f1SDimitry Andric // If we don't actually need a state machine we are done here. This can 4149fe6060f1SDimitry Andric // happen if there simply are no parallel regions. In the resulting kernel 4150fe6060f1SDimitry Andric // all worker threads will simply exit right away, leaving the main thread 4151fe6060f1SDimitry Andric // to do the work alone. 4152349cc55cSDimitry Andric if (!mayContainParallelRegion()) { 4153fe6060f1SDimitry Andric ++NumOpenMPTargetRegionKernelsWithoutStateMachine; 4154fe6060f1SDimitry Andric 4155fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemark OR) { 4156fe6060f1SDimitry Andric return OR << "Removing unused state machine from generic-mode kernel."; 4157fe6060f1SDimitry Andric }; 4158fe6060f1SDimitry Andric A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP130", Remark); 4159fe6060f1SDimitry Andric 4160fe6060f1SDimitry Andric return ChangeStatus::CHANGED; 4161fe6060f1SDimitry Andric } 4162fe6060f1SDimitry Andric 4163fe6060f1SDimitry Andric // Keep track in the statistics of our new shiny custom state machine. 4164fe6060f1SDimitry Andric if (ReachedUnknownParallelRegions.empty()) { 4165fe6060f1SDimitry Andric ++NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback; 4166fe6060f1SDimitry Andric 4167fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemark OR) { 4168fe6060f1SDimitry Andric return OR << "Rewriting generic-mode kernel with a customized state " 4169fe6060f1SDimitry Andric "machine."; 4170fe6060f1SDimitry Andric }; 4171fe6060f1SDimitry Andric A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP131", Remark); 4172fe6060f1SDimitry Andric } else { 4173fe6060f1SDimitry Andric ++NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback; 4174fe6060f1SDimitry Andric 4175fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis OR) { 4176fe6060f1SDimitry Andric return OR << "Generic-mode kernel is executed with a customized state " 4177fe6060f1SDimitry Andric "machine that requires a fallback."; 4178fe6060f1SDimitry Andric }; 4179fe6060f1SDimitry Andric A.emitRemark<OptimizationRemarkAnalysis>(KernelInitCB, "OMP132", Remark); 4180fe6060f1SDimitry Andric 4181fe6060f1SDimitry Andric // Tell the user why we ended up with a fallback. 4182fe6060f1SDimitry Andric for (CallBase *UnknownParallelRegionCB : ReachedUnknownParallelRegions) { 4183fe6060f1SDimitry Andric if (!UnknownParallelRegionCB) 4184fe6060f1SDimitry Andric continue; 4185fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) { 4186fe6060f1SDimitry Andric return ORA << "Call may contain unknown parallel regions. Use " 4187fe6060f1SDimitry Andric << "`__attribute__((assume(\"omp_no_parallelism\")))` to " 4188fe6060f1SDimitry Andric "override."; 4189fe6060f1SDimitry Andric }; 4190fe6060f1SDimitry Andric A.emitRemark<OptimizationRemarkAnalysis>(UnknownParallelRegionCB, 4191fe6060f1SDimitry Andric "OMP133", Remark); 4192fe6060f1SDimitry Andric } 4193fe6060f1SDimitry Andric } 4194fe6060f1SDimitry Andric 4195fe6060f1SDimitry Andric // Create all the blocks: 4196fe6060f1SDimitry Andric // 4197fe6060f1SDimitry Andric // InitCB = __kmpc_target_init(...) 4198349cc55cSDimitry Andric // BlockHwSize = 4199349cc55cSDimitry Andric // __kmpc_get_hardware_num_threads_in_block(); 4200349cc55cSDimitry Andric // WarpSize = __kmpc_get_warp_size(); 4201349cc55cSDimitry Andric // BlockSize = BlockHwSize - WarpSize; 4202fb03ea46SDimitry Andric // IsWorkerCheckBB: bool IsWorker = InitCB != -1; 4203fe6060f1SDimitry Andric // if (IsWorker) { 4204fb03ea46SDimitry Andric // if (InitCB >= BlockSize) return; 4205349cc55cSDimitry Andric // SMBeginBB: __kmpc_barrier_simple_generic(...); 4206fe6060f1SDimitry Andric // void *WorkFn; 4207fe6060f1SDimitry Andric // bool Active = __kmpc_kernel_parallel(&WorkFn); 4208fe6060f1SDimitry Andric // if (!WorkFn) return; 4209fe6060f1SDimitry Andric // SMIsActiveCheckBB: if (Active) { 4210fe6060f1SDimitry Andric // SMIfCascadeCurrentBB: if (WorkFn == <ParFn0>) 4211fe6060f1SDimitry Andric // ParFn0(...); 4212fe6060f1SDimitry Andric // SMIfCascadeCurrentBB: else if (WorkFn == <ParFn1>) 4213fe6060f1SDimitry Andric // ParFn1(...); 4214fe6060f1SDimitry Andric // ... 4215fe6060f1SDimitry Andric // SMIfCascadeCurrentBB: else 4216fe6060f1SDimitry Andric // ((WorkFnTy*)WorkFn)(...); 4217fe6060f1SDimitry Andric // SMEndParallelBB: __kmpc_kernel_end_parallel(...); 4218fe6060f1SDimitry Andric // } 4219349cc55cSDimitry Andric // SMDoneBB: __kmpc_barrier_simple_generic(...); 4220fe6060f1SDimitry Andric // goto SMBeginBB; 4221fe6060f1SDimitry Andric // } 4222fe6060f1SDimitry Andric // UserCodeEntryBB: // user code 4223fe6060f1SDimitry Andric // __kmpc_target_deinit(...) 4224fe6060f1SDimitry Andric // 4225fe6060f1SDimitry Andric Function *Kernel = getAssociatedFunction(); 4226fe6060f1SDimitry Andric assert(Kernel && "Expected an associated function!"); 4227fe6060f1SDimitry Andric 4228fe6060f1SDimitry Andric BasicBlock *InitBB = KernelInitCB->getParent(); 4229fe6060f1SDimitry Andric BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock( 4230fe6060f1SDimitry Andric KernelInitCB->getNextNode(), "thread.user_code.check"); 4231349cc55cSDimitry Andric BasicBlock *IsWorkerCheckBB = 4232349cc55cSDimitry Andric BasicBlock::Create(Ctx, "is_worker_check", Kernel, UserCodeEntryBB); 4233fe6060f1SDimitry Andric BasicBlock *StateMachineBeginBB = BasicBlock::Create( 4234fe6060f1SDimitry Andric Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB); 4235fe6060f1SDimitry Andric BasicBlock *StateMachineFinishedBB = BasicBlock::Create( 4236fe6060f1SDimitry Andric Ctx, "worker_state_machine.finished", Kernel, UserCodeEntryBB); 4237fe6060f1SDimitry Andric BasicBlock *StateMachineIsActiveCheckBB = BasicBlock::Create( 4238fe6060f1SDimitry Andric Ctx, "worker_state_machine.is_active.check", Kernel, UserCodeEntryBB); 4239fe6060f1SDimitry Andric BasicBlock *StateMachineIfCascadeCurrentBB = 4240fe6060f1SDimitry Andric BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check", 4241fe6060f1SDimitry Andric Kernel, UserCodeEntryBB); 4242fe6060f1SDimitry Andric BasicBlock *StateMachineEndParallelBB = 4243fe6060f1SDimitry Andric BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.end", 4244fe6060f1SDimitry Andric Kernel, UserCodeEntryBB); 4245fe6060f1SDimitry Andric BasicBlock *StateMachineDoneBarrierBB = BasicBlock::Create( 4246fe6060f1SDimitry Andric Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB); 4247fe6060f1SDimitry Andric A.registerManifestAddedBasicBlock(*InitBB); 4248fe6060f1SDimitry Andric A.registerManifestAddedBasicBlock(*UserCodeEntryBB); 4249349cc55cSDimitry Andric A.registerManifestAddedBasicBlock(*IsWorkerCheckBB); 4250fe6060f1SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineBeginBB); 4251fe6060f1SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineFinishedBB); 4252fe6060f1SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB); 4253fe6060f1SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineIfCascadeCurrentBB); 4254fe6060f1SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineEndParallelBB); 4255fe6060f1SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineDoneBarrierBB); 4256fe6060f1SDimitry Andric 4257fe6060f1SDimitry Andric const DebugLoc &DLoc = KernelInitCB->getDebugLoc(); 4258fe6060f1SDimitry Andric ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc); 4259fe6060f1SDimitry Andric InitBB->getTerminator()->eraseFromParent(); 4260349cc55cSDimitry Andric 4261fb03ea46SDimitry Andric Instruction *IsWorker = 4262fb03ea46SDimitry Andric ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB, 4263fb03ea46SDimitry Andric ConstantInt::get(KernelInitCB->getType(), -1), 4264fb03ea46SDimitry Andric "thread.is_worker", InitBB); 4265fb03ea46SDimitry Andric IsWorker->setDebugLoc(DLoc); 4266fb03ea46SDimitry Andric BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB); 4267fb03ea46SDimitry Andric 4268349cc55cSDimitry Andric Module &M = *Kernel->getParent(); 4269349cc55cSDimitry Andric FunctionCallee BlockHwSizeFn = 4270349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( 4271349cc55cSDimitry Andric M, OMPRTL___kmpc_get_hardware_num_threads_in_block); 4272349cc55cSDimitry Andric FunctionCallee WarpSizeFn = 4273349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( 4274349cc55cSDimitry Andric M, OMPRTL___kmpc_get_warp_size); 427504eeddc0SDimitry Andric CallInst *BlockHwSize = 4276fb03ea46SDimitry Andric CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB); 427704eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize); 4278349cc55cSDimitry Andric BlockHwSize->setDebugLoc(DLoc); 4279fb03ea46SDimitry Andric CallInst *WarpSize = 4280fb03ea46SDimitry Andric CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB); 428104eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize); 4282349cc55cSDimitry Andric WarpSize->setDebugLoc(DLoc); 4283fb03ea46SDimitry Andric Instruction *BlockSize = BinaryOperator::CreateSub( 4284fb03ea46SDimitry Andric BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB); 4285349cc55cSDimitry Andric BlockSize->setDebugLoc(DLoc); 4286fb03ea46SDimitry Andric Instruction *IsMainOrWorker = ICmpInst::Create( 4287fb03ea46SDimitry Andric ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize, 4288fb03ea46SDimitry Andric "thread.is_main_or_worker", IsWorkerCheckBB); 4289349cc55cSDimitry Andric IsMainOrWorker->setDebugLoc(DLoc); 4290fb03ea46SDimitry Andric BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB, 4291fb03ea46SDimitry Andric IsMainOrWorker, IsWorkerCheckBB); 42928c6f6c0cSDimitry Andric 4293fe6060f1SDimitry Andric // Create local storage for the work function pointer. 42948c6f6c0cSDimitry Andric const DataLayout &DL = M.getDataLayout(); 4295fe6060f1SDimitry Andric Type *VoidPtrTy = Type::getInt8PtrTy(Ctx); 42968c6f6c0cSDimitry Andric Instruction *WorkFnAI = 42978c6f6c0cSDimitry Andric new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr, 42988c6f6c0cSDimitry Andric "worker.work_fn.addr", &Kernel->getEntryBlock().front()); 4299fe6060f1SDimitry Andric WorkFnAI->setDebugLoc(DLoc); 4300fe6060f1SDimitry Andric 4301fe6060f1SDimitry Andric OMPInfoCache.OMPBuilder.updateToLocation( 4302fe6060f1SDimitry Andric OpenMPIRBuilder::LocationDescription( 4303fe6060f1SDimitry Andric IRBuilder<>::InsertPoint(StateMachineBeginBB, 4304fe6060f1SDimitry Andric StateMachineBeginBB->end()), 4305fe6060f1SDimitry Andric DLoc)); 4306fe6060f1SDimitry Andric 4307fe6060f1SDimitry Andric Value *Ident = KernelInitCB->getArgOperand(0); 4308fe6060f1SDimitry Andric Value *GTid = KernelInitCB; 4309fe6060f1SDimitry Andric 4310fe6060f1SDimitry Andric FunctionCallee BarrierFn = 4311fe6060f1SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( 4312349cc55cSDimitry Andric M, OMPRTL___kmpc_barrier_simple_generic); 431304eeddc0SDimitry Andric CallInst *Barrier = 431404eeddc0SDimitry Andric CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB); 431504eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(BarrierFn, Barrier); 431604eeddc0SDimitry Andric Barrier->setDebugLoc(DLoc); 4317fe6060f1SDimitry Andric 43188c6f6c0cSDimitry Andric if (WorkFnAI->getType()->getPointerAddressSpace() != 43198c6f6c0cSDimitry Andric (unsigned int)AddressSpace::Generic) { 43208c6f6c0cSDimitry Andric WorkFnAI = new AddrSpaceCastInst( 4321*06c3fb27SDimitry Andric WorkFnAI, PointerType::get(Ctx, (unsigned int)AddressSpace::Generic), 43228c6f6c0cSDimitry Andric WorkFnAI->getName() + ".generic", StateMachineBeginBB); 43238c6f6c0cSDimitry Andric WorkFnAI->setDebugLoc(DLoc); 43248c6f6c0cSDimitry Andric } 43258c6f6c0cSDimitry Andric 4326fe6060f1SDimitry Andric FunctionCallee KernelParallelFn = 4327fe6060f1SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( 4328fe6060f1SDimitry Andric M, OMPRTL___kmpc_kernel_parallel); 432904eeddc0SDimitry Andric CallInst *IsActiveWorker = CallInst::Create( 4330fe6060f1SDimitry Andric KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB); 433104eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(KernelParallelFn, IsActiveWorker); 4332fe6060f1SDimitry Andric IsActiveWorker->setDebugLoc(DLoc); 4333fe6060f1SDimitry Andric Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn", 4334fe6060f1SDimitry Andric StateMachineBeginBB); 4335fe6060f1SDimitry Andric WorkFn->setDebugLoc(DLoc); 4336fe6060f1SDimitry Andric 4337fe6060f1SDimitry Andric FunctionType *ParallelRegionFnTy = FunctionType::get( 4338fe6060f1SDimitry Andric Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)}, 4339fe6060f1SDimitry Andric false); 4340fe6060f1SDimitry Andric Value *WorkFnCast = BitCastInst::CreatePointerBitCastOrAddrSpaceCast( 4341fe6060f1SDimitry Andric WorkFn, ParallelRegionFnTy->getPointerTo(), "worker.work_fn.addr_cast", 4342fe6060f1SDimitry Andric StateMachineBeginBB); 4343fe6060f1SDimitry Andric 4344fe6060f1SDimitry Andric Instruction *IsDone = 4345fe6060f1SDimitry Andric ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn, 4346fe6060f1SDimitry Andric Constant::getNullValue(VoidPtrTy), "worker.is_done", 4347fe6060f1SDimitry Andric StateMachineBeginBB); 4348fe6060f1SDimitry Andric IsDone->setDebugLoc(DLoc); 4349fe6060f1SDimitry Andric BranchInst::Create(StateMachineFinishedBB, StateMachineIsActiveCheckBB, 4350fe6060f1SDimitry Andric IsDone, StateMachineBeginBB) 4351fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4352fe6060f1SDimitry Andric 4353fe6060f1SDimitry Andric BranchInst::Create(StateMachineIfCascadeCurrentBB, 4354fe6060f1SDimitry Andric StateMachineDoneBarrierBB, IsActiveWorker, 4355fe6060f1SDimitry Andric StateMachineIsActiveCheckBB) 4356fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4357fe6060f1SDimitry Andric 4358fe6060f1SDimitry Andric Value *ZeroArg = 4359fe6060f1SDimitry Andric Constant::getNullValue(ParallelRegionFnTy->getParamType(0)); 4360fe6060f1SDimitry Andric 4361fe6060f1SDimitry Andric // Now that we have most of the CFG skeleton it is time for the if-cascade 4362fe6060f1SDimitry Andric // that checks the function pointer we got from the runtime against the 4363fe6060f1SDimitry Andric // parallel regions we expect, if there are any. 4364349cc55cSDimitry Andric for (int I = 0, E = ReachedKnownParallelRegions.size(); I < E; ++I) { 4365349cc55cSDimitry Andric auto *ParallelRegion = ReachedKnownParallelRegions[I]; 4366fe6060f1SDimitry Andric BasicBlock *PRExecuteBB = BasicBlock::Create( 4367fe6060f1SDimitry Andric Ctx, "worker_state_machine.parallel_region.execute", Kernel, 4368fe6060f1SDimitry Andric StateMachineEndParallelBB); 4369fe6060f1SDimitry Andric CallInst::Create(ParallelRegion, {ZeroArg, GTid}, "", PRExecuteBB) 4370fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4371fe6060f1SDimitry Andric BranchInst::Create(StateMachineEndParallelBB, PRExecuteBB) 4372fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4373fe6060f1SDimitry Andric 4374fe6060f1SDimitry Andric BasicBlock *PRNextBB = 4375fe6060f1SDimitry Andric BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check", 4376fe6060f1SDimitry Andric Kernel, StateMachineEndParallelBB); 4377fe6060f1SDimitry Andric 4378fe6060f1SDimitry Andric // Check if we need to compare the pointer at all or if we can just 4379fe6060f1SDimitry Andric // call the parallel region function. 4380fe6060f1SDimitry Andric Value *IsPR; 4381349cc55cSDimitry Andric if (I + 1 < E || !ReachedUnknownParallelRegions.empty()) { 4382fe6060f1SDimitry Andric Instruction *CmpI = ICmpInst::Create( 4383fe6060f1SDimitry Andric ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion, 4384fe6060f1SDimitry Andric "worker.check_parallel_region", StateMachineIfCascadeCurrentBB); 4385fe6060f1SDimitry Andric CmpI->setDebugLoc(DLoc); 4386fe6060f1SDimitry Andric IsPR = CmpI; 4387fe6060f1SDimitry Andric } else { 4388fe6060f1SDimitry Andric IsPR = ConstantInt::getTrue(Ctx); 4389fe6060f1SDimitry Andric } 4390fe6060f1SDimitry Andric 4391fe6060f1SDimitry Andric BranchInst::Create(PRExecuteBB, PRNextBB, IsPR, 4392fe6060f1SDimitry Andric StateMachineIfCascadeCurrentBB) 4393fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4394fe6060f1SDimitry Andric StateMachineIfCascadeCurrentBB = PRNextBB; 4395fe6060f1SDimitry Andric } 4396fe6060f1SDimitry Andric 4397fe6060f1SDimitry Andric // At the end of the if-cascade we place the indirect function pointer call 4398fe6060f1SDimitry Andric // in case we might need it, that is if there can be parallel regions we 4399fe6060f1SDimitry Andric // have not handled in the if-cascade above. 4400fe6060f1SDimitry Andric if (!ReachedUnknownParallelRegions.empty()) { 4401fe6060f1SDimitry Andric StateMachineIfCascadeCurrentBB->setName( 4402fe6060f1SDimitry Andric "worker_state_machine.parallel_region.fallback.execute"); 4403fe6060f1SDimitry Andric CallInst::Create(ParallelRegionFnTy, WorkFnCast, {ZeroArg, GTid}, "", 4404fe6060f1SDimitry Andric StateMachineIfCascadeCurrentBB) 4405fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4406fe6060f1SDimitry Andric } 4407fe6060f1SDimitry Andric BranchInst::Create(StateMachineEndParallelBB, 4408fe6060f1SDimitry Andric StateMachineIfCascadeCurrentBB) 4409fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4410fe6060f1SDimitry Andric 441104eeddc0SDimitry Andric FunctionCallee EndParallelFn = 441204eeddc0SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( 441304eeddc0SDimitry Andric M, OMPRTL___kmpc_kernel_end_parallel); 441404eeddc0SDimitry Andric CallInst *EndParallel = 441504eeddc0SDimitry Andric CallInst::Create(EndParallelFn, {}, "", StateMachineEndParallelBB); 441604eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(EndParallelFn, EndParallel); 441704eeddc0SDimitry Andric EndParallel->setDebugLoc(DLoc); 4418fe6060f1SDimitry Andric BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB) 4419fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4420fe6060f1SDimitry Andric 4421fe6060f1SDimitry Andric CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineDoneBarrierBB) 4422fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4423fe6060f1SDimitry Andric BranchInst::Create(StateMachineBeginBB, StateMachineDoneBarrierBB) 4424fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4425fe6060f1SDimitry Andric 4426fe6060f1SDimitry Andric return ChangeStatus::CHANGED; 4427fe6060f1SDimitry Andric } 4428fe6060f1SDimitry Andric 4429fe6060f1SDimitry Andric /// Fixpoint iteration update function. Will be called every time a dependence 4430fe6060f1SDimitry Andric /// changed its state (and in the beginning). 4431fe6060f1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 4432fe6060f1SDimitry Andric KernelInfoState StateBefore = getState(); 4433fe6060f1SDimitry Andric 4434fe6060f1SDimitry Andric // Callback to check a read/write instruction. 4435fe6060f1SDimitry Andric auto CheckRWInst = [&](Instruction &I) { 4436fe6060f1SDimitry Andric // We handle calls later. 4437fe6060f1SDimitry Andric if (isa<CallBase>(I)) 4438fe6060f1SDimitry Andric return true; 4439fe6060f1SDimitry Andric // We only care about write effects. 4440fe6060f1SDimitry Andric if (!I.mayWriteToMemory()) 4441fe6060f1SDimitry Andric return true; 4442fe6060f1SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(&I)) { 4443*06c3fb27SDimitry Andric const auto *UnderlyingObjsAA = A.getAAFor<AAUnderlyingObjects>( 4444bdd1243dSDimitry Andric *this, IRPosition::value(*SI->getPointerOperand()), 4445bdd1243dSDimitry Andric DepClassTy::OPTIONAL); 4446*06c3fb27SDimitry Andric auto *HS = A.getAAFor<AAHeapToStack>( 4447349cc55cSDimitry Andric *this, IRPosition::function(*I.getFunction()), 4448349cc55cSDimitry Andric DepClassTy::OPTIONAL); 4449*06c3fb27SDimitry Andric if (UnderlyingObjsAA && 4450*06c3fb27SDimitry Andric UnderlyingObjsAA->forallUnderlyingObjects([&](Value &Obj) { 4451bdd1243dSDimitry Andric if (AA::isAssumedThreadLocalObject(A, Obj, *this)) 4452349cc55cSDimitry Andric return true; 4453bdd1243dSDimitry Andric // Check for AAHeapToStack moved objects which must not be 4454bdd1243dSDimitry Andric // guarded. 4455bdd1243dSDimitry Andric auto *CB = dyn_cast<CallBase>(&Obj); 4456*06c3fb27SDimitry Andric return CB && HS && HS->isAssumedHeapToStack(*CB); 4457bdd1243dSDimitry Andric })) 4458bdd1243dSDimitry Andric return true; 4459349cc55cSDimitry Andric } 4460349cc55cSDimitry Andric 4461349cc55cSDimitry Andric // Insert instruction that needs guarding. 4462fe6060f1SDimitry Andric SPMDCompatibilityTracker.insert(&I); 4463fe6060f1SDimitry Andric return true; 4464fe6060f1SDimitry Andric }; 4465fe6060f1SDimitry Andric 4466fe6060f1SDimitry Andric bool UsedAssumedInformationInCheckRWInst = false; 4467fe6060f1SDimitry Andric if (!SPMDCompatibilityTracker.isAtFixpoint()) 4468fe6060f1SDimitry Andric if (!A.checkForAllReadWriteInstructions( 4469fe6060f1SDimitry Andric CheckRWInst, *this, UsedAssumedInformationInCheckRWInst)) 4470fe6060f1SDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 4471fe6060f1SDimitry Andric 44724824e7fdSDimitry Andric bool UsedAssumedInformationFromReachingKernels = false; 4473fe6060f1SDimitry Andric if (!IsKernelEntry) { 4474fe6060f1SDimitry Andric updateParallelLevels(A); 4475349cc55cSDimitry Andric 44764824e7fdSDimitry Andric bool AllReachingKernelsKnown = true; 44774824e7fdSDimitry Andric updateReachingKernelEntries(A, AllReachingKernelsKnown); 44784824e7fdSDimitry Andric UsedAssumedInformationFromReachingKernels = !AllReachingKernelsKnown; 44794824e7fdSDimitry Andric 4480bdd1243dSDimitry Andric if (!SPMDCompatibilityTracker.empty()) { 4481349cc55cSDimitry Andric if (!ParallelLevels.isValidState()) 4482349cc55cSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 44834824e7fdSDimitry Andric else if (!ReachingKernelEntries.isValidState()) 44844824e7fdSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 4485bdd1243dSDimitry Andric else { 44864824e7fdSDimitry Andric // Check if all reaching kernels agree on the mode as we can otherwise 44874824e7fdSDimitry Andric // not guard instructions. We might not be sure about the mode so we 44884824e7fdSDimitry Andric // we cannot fix the internal spmd-zation state either. 44894824e7fdSDimitry Andric int SPMD = 0, Generic = 0; 44904824e7fdSDimitry Andric for (auto *Kernel : ReachingKernelEntries) { 4491*06c3fb27SDimitry Andric auto *CBAA = A.getAAFor<AAKernelInfo>( 44924824e7fdSDimitry Andric *this, IRPosition::function(*Kernel), DepClassTy::OPTIONAL); 4493*06c3fb27SDimitry Andric if (CBAA && CBAA->SPMDCompatibilityTracker.isValidState() && 4494*06c3fb27SDimitry Andric CBAA->SPMDCompatibilityTracker.isAssumed()) 44954824e7fdSDimitry Andric ++SPMD; 44964824e7fdSDimitry Andric else 44974824e7fdSDimitry Andric ++Generic; 4498*06c3fb27SDimitry Andric if (!CBAA || !CBAA->SPMDCompatibilityTracker.isAtFixpoint()) 44994824e7fdSDimitry Andric UsedAssumedInformationFromReachingKernels = true; 45004824e7fdSDimitry Andric } 45014824e7fdSDimitry Andric if (SPMD != 0 && Generic != 0) 45024824e7fdSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 45034824e7fdSDimitry Andric } 4504fe6060f1SDimitry Andric } 4505bdd1243dSDimitry Andric } 4506fe6060f1SDimitry Andric 4507fe6060f1SDimitry Andric // Callback to check a call instruction. 4508349cc55cSDimitry Andric bool AllParallelRegionStatesWereFixed = true; 4509fe6060f1SDimitry Andric bool AllSPMDStatesWereFixed = true; 4510fe6060f1SDimitry Andric auto CheckCallInst = [&](Instruction &I) { 4511fe6060f1SDimitry Andric auto &CB = cast<CallBase>(I); 4512*06c3fb27SDimitry Andric auto *CBAA = A.getAAFor<AAKernelInfo>( 4513fe6060f1SDimitry Andric *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL); 4514*06c3fb27SDimitry Andric if (!CBAA) 4515*06c3fb27SDimitry Andric return false; 4516*06c3fb27SDimitry Andric getState() ^= CBAA->getState(); 4517*06c3fb27SDimitry Andric AllSPMDStatesWereFixed &= CBAA->SPMDCompatibilityTracker.isAtFixpoint(); 4518349cc55cSDimitry Andric AllParallelRegionStatesWereFixed &= 4519*06c3fb27SDimitry Andric CBAA->ReachedKnownParallelRegions.isAtFixpoint(); 4520349cc55cSDimitry Andric AllParallelRegionStatesWereFixed &= 4521*06c3fb27SDimitry Andric CBAA->ReachedUnknownParallelRegions.isAtFixpoint(); 4522fe6060f1SDimitry Andric return true; 4523fe6060f1SDimitry Andric }; 4524fe6060f1SDimitry Andric 4525fe6060f1SDimitry Andric bool UsedAssumedInformationInCheckCallInst = false; 4526fe6060f1SDimitry Andric if (!A.checkForAllCallLikeInstructions( 4527349cc55cSDimitry Andric CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) { 4528349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << TAG 4529349cc55cSDimitry Andric << "Failed to visit all call-like instructions!\n";); 4530fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 4531349cc55cSDimitry Andric } 4532349cc55cSDimitry Andric 4533349cc55cSDimitry Andric // If we haven't used any assumed information for the reached parallel 4534349cc55cSDimitry Andric // region states we can fix it. 4535349cc55cSDimitry Andric if (!UsedAssumedInformationInCheckCallInst && 4536349cc55cSDimitry Andric AllParallelRegionStatesWereFixed) { 4537349cc55cSDimitry Andric ReachedKnownParallelRegions.indicateOptimisticFixpoint(); 4538349cc55cSDimitry Andric ReachedUnknownParallelRegions.indicateOptimisticFixpoint(); 4539349cc55cSDimitry Andric } 4540349cc55cSDimitry Andric 4541fe6060f1SDimitry Andric // If we haven't used any assumed information for the SPMD state we can fix 4542fe6060f1SDimitry Andric // it. 4543fe6060f1SDimitry Andric if (!UsedAssumedInformationInCheckRWInst && 45444824e7fdSDimitry Andric !UsedAssumedInformationInCheckCallInst && 45454824e7fdSDimitry Andric !UsedAssumedInformationFromReachingKernels && AllSPMDStatesWereFixed) 4546fe6060f1SDimitry Andric SPMDCompatibilityTracker.indicateOptimisticFixpoint(); 4547fe6060f1SDimitry Andric 4548fe6060f1SDimitry Andric return StateBefore == getState() ? ChangeStatus::UNCHANGED 4549fe6060f1SDimitry Andric : ChangeStatus::CHANGED; 4550fe6060f1SDimitry Andric } 4551fe6060f1SDimitry Andric 4552fe6060f1SDimitry Andric private: 4553fe6060f1SDimitry Andric /// Update info regarding reaching kernels. 45544824e7fdSDimitry Andric void updateReachingKernelEntries(Attributor &A, 45554824e7fdSDimitry Andric bool &AllReachingKernelsKnown) { 4556fe6060f1SDimitry Andric auto PredCallSite = [&](AbstractCallSite ACS) { 4557fe6060f1SDimitry Andric Function *Caller = ACS.getInstruction()->getFunction(); 4558fe6060f1SDimitry Andric 4559fe6060f1SDimitry Andric assert(Caller && "Caller is nullptr"); 4560fe6060f1SDimitry Andric 4561*06c3fb27SDimitry Andric auto *CAA = A.getOrCreateAAFor<AAKernelInfo>( 4562fe6060f1SDimitry Andric IRPosition::function(*Caller), this, DepClassTy::REQUIRED); 4563*06c3fb27SDimitry Andric if (CAA && CAA->ReachingKernelEntries.isValidState()) { 4564*06c3fb27SDimitry Andric ReachingKernelEntries ^= CAA->ReachingKernelEntries; 4565fe6060f1SDimitry Andric return true; 4566fe6060f1SDimitry Andric } 4567fe6060f1SDimitry Andric 4568fe6060f1SDimitry Andric // We lost track of the caller of the associated function, any kernel 4569fe6060f1SDimitry Andric // could reach now. 4570fe6060f1SDimitry Andric ReachingKernelEntries.indicatePessimisticFixpoint(); 4571fe6060f1SDimitry Andric 4572fe6060f1SDimitry Andric return true; 4573fe6060f1SDimitry Andric }; 4574fe6060f1SDimitry Andric 4575fe6060f1SDimitry Andric if (!A.checkForAllCallSites(PredCallSite, *this, 4576fe6060f1SDimitry Andric true /* RequireAllCallSites */, 45774824e7fdSDimitry Andric AllReachingKernelsKnown)) 4578fe6060f1SDimitry Andric ReachingKernelEntries.indicatePessimisticFixpoint(); 4579fe6060f1SDimitry Andric } 4580fe6060f1SDimitry Andric 4581fe6060f1SDimitry Andric /// Update info regarding parallel levels. 4582fe6060f1SDimitry Andric void updateParallelLevels(Attributor &A) { 4583fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 4584fe6060f1SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &Parallel51RFI = 4585fe6060f1SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; 4586fe6060f1SDimitry Andric 4587fe6060f1SDimitry Andric auto PredCallSite = [&](AbstractCallSite ACS) { 4588fe6060f1SDimitry Andric Function *Caller = ACS.getInstruction()->getFunction(); 4589fe6060f1SDimitry Andric 4590fe6060f1SDimitry Andric assert(Caller && "Caller is nullptr"); 4591fe6060f1SDimitry Andric 4592*06c3fb27SDimitry Andric auto *CAA = 4593fe6060f1SDimitry Andric A.getOrCreateAAFor<AAKernelInfo>(IRPosition::function(*Caller)); 4594*06c3fb27SDimitry Andric if (CAA && CAA->ParallelLevels.isValidState()) { 4595fe6060f1SDimitry Andric // Any function that is called by `__kmpc_parallel_51` will not be 4596fe6060f1SDimitry Andric // folded as the parallel level in the function is updated. In order to 4597fe6060f1SDimitry Andric // get it right, all the analysis would depend on the implentation. That 4598fe6060f1SDimitry Andric // said, if in the future any change to the implementation, the analysis 4599fe6060f1SDimitry Andric // could be wrong. As a consequence, we are just conservative here. 4600fe6060f1SDimitry Andric if (Caller == Parallel51RFI.Declaration) { 4601fe6060f1SDimitry Andric ParallelLevels.indicatePessimisticFixpoint(); 4602fe6060f1SDimitry Andric return true; 4603fe6060f1SDimitry Andric } 4604fe6060f1SDimitry Andric 4605*06c3fb27SDimitry Andric ParallelLevels ^= CAA->ParallelLevels; 4606fe6060f1SDimitry Andric 4607fe6060f1SDimitry Andric return true; 4608fe6060f1SDimitry Andric } 4609fe6060f1SDimitry Andric 4610fe6060f1SDimitry Andric // We lost track of the caller of the associated function, any kernel 4611fe6060f1SDimitry Andric // could reach now. 4612fe6060f1SDimitry Andric ParallelLevels.indicatePessimisticFixpoint(); 4613fe6060f1SDimitry Andric 4614fe6060f1SDimitry Andric return true; 4615fe6060f1SDimitry Andric }; 4616fe6060f1SDimitry Andric 4617fe6060f1SDimitry Andric bool AllCallSitesKnown = true; 4618fe6060f1SDimitry Andric if (!A.checkForAllCallSites(PredCallSite, *this, 4619fe6060f1SDimitry Andric true /* RequireAllCallSites */, 4620fe6060f1SDimitry Andric AllCallSitesKnown)) 4621fe6060f1SDimitry Andric ParallelLevels.indicatePessimisticFixpoint(); 4622fe6060f1SDimitry Andric } 4623fe6060f1SDimitry Andric }; 4624fe6060f1SDimitry Andric 4625fe6060f1SDimitry Andric /// The call site kernel info abstract attribute, basically, what can we say 4626fe6060f1SDimitry Andric /// about a call site with regards to the KernelInfoState. For now this simply 4627fe6060f1SDimitry Andric /// forwards the information from the callee. 4628fe6060f1SDimitry Andric struct AAKernelInfoCallSite : AAKernelInfo { 4629fe6060f1SDimitry Andric AAKernelInfoCallSite(const IRPosition &IRP, Attributor &A) 4630fe6060f1SDimitry Andric : AAKernelInfo(IRP, A) {} 4631fe6060f1SDimitry Andric 4632fe6060f1SDimitry Andric /// See AbstractAttribute::initialize(...). 4633fe6060f1SDimitry Andric void initialize(Attributor &A) override { 4634fe6060f1SDimitry Andric AAKernelInfo::initialize(A); 4635fe6060f1SDimitry Andric 4636fe6060f1SDimitry Andric CallBase &CB = cast<CallBase>(getAssociatedValue()); 4637fe6060f1SDimitry Andric Function *Callee = getAssociatedFunction(); 4638fe6060f1SDimitry Andric 4639*06c3fb27SDimitry Andric auto *AssumptionAA = A.getAAFor<AAAssumptionInfo>( 4640349cc55cSDimitry Andric *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL); 4641fe6060f1SDimitry Andric 4642fe6060f1SDimitry Andric // Check for SPMD-mode assumptions. 4643*06c3fb27SDimitry Andric if (AssumptionAA && AssumptionAA->hasAssumption("ompx_spmd_amenable")) { 4644fe6060f1SDimitry Andric SPMDCompatibilityTracker.indicateOptimisticFixpoint(); 4645349cc55cSDimitry Andric indicateOptimisticFixpoint(); 4646349cc55cSDimitry Andric } 4647fe6060f1SDimitry Andric 4648fe6060f1SDimitry Andric // First weed out calls we do not care about, that is readonly/readnone 4649fe6060f1SDimitry Andric // calls, intrinsics, and "no_openmp" calls. Neither of these can reach a 4650fe6060f1SDimitry Andric // parallel region or anything else we are looking for. 4651fe6060f1SDimitry Andric if (!CB.mayWriteToMemory() || isa<IntrinsicInst>(CB)) { 4652fe6060f1SDimitry Andric indicateOptimisticFixpoint(); 4653fe6060f1SDimitry Andric return; 4654fe6060f1SDimitry Andric } 4655fe6060f1SDimitry Andric 4656fe6060f1SDimitry Andric // Next we check if we know the callee. If it is a known OpenMP function 4657fe6060f1SDimitry Andric // we will handle them explicitly in the switch below. If it is not, we 4658fe6060f1SDimitry Andric // will use an AAKernelInfo object on the callee to gather information and 4659fe6060f1SDimitry Andric // merge that into the current state. The latter happens in the updateImpl. 4660fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 4661fe6060f1SDimitry Andric const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee); 4662fe6060f1SDimitry Andric if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) { 4663fe6060f1SDimitry Andric // Unknown caller or declarations are not analyzable, we give up. 4664fe6060f1SDimitry Andric if (!Callee || !A.isFunctionIPOAmendable(*Callee)) { 4665fe6060f1SDimitry Andric 4666fe6060f1SDimitry Andric // Unknown callees might contain parallel regions, except if they have 4667fe6060f1SDimitry Andric // an appropriate assumption attached. 4668*06c3fb27SDimitry Andric if (!AssumptionAA || 4669*06c3fb27SDimitry Andric !(AssumptionAA->hasAssumption("omp_no_openmp") || 4670*06c3fb27SDimitry Andric AssumptionAA->hasAssumption("omp_no_parallelism"))) 4671fe6060f1SDimitry Andric ReachedUnknownParallelRegions.insert(&CB); 4672fe6060f1SDimitry Andric 4673fe6060f1SDimitry Andric // If SPMDCompatibilityTracker is not fixed, we need to give up on the 4674fe6060f1SDimitry Andric // idea we can run something unknown in SPMD-mode. 4675349cc55cSDimitry Andric if (!SPMDCompatibilityTracker.isAtFixpoint()) { 4676349cc55cSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 4677fe6060f1SDimitry Andric SPMDCompatibilityTracker.insert(&CB); 4678349cc55cSDimitry Andric } 4679fe6060f1SDimitry Andric 4680fe6060f1SDimitry Andric // We have updated the state for this unknown call properly, there won't 4681fe6060f1SDimitry Andric // be any change so we indicate a fixpoint. 4682fe6060f1SDimitry Andric indicateOptimisticFixpoint(); 4683fe6060f1SDimitry Andric } 4684fe6060f1SDimitry Andric // If the callee is known and can be used in IPO, we will update the state 4685fe6060f1SDimitry Andric // based on the callee state in updateImpl. 4686fe6060f1SDimitry Andric return; 4687fe6060f1SDimitry Andric } 4688fe6060f1SDimitry Andric 4689fe6060f1SDimitry Andric const unsigned int WrapperFunctionArgNo = 6; 4690fe6060f1SDimitry Andric RuntimeFunction RF = It->getSecond(); 4691fe6060f1SDimitry Andric switch (RF) { 4692fe6060f1SDimitry Andric // All the functions we know are compatible with SPMD mode. 4693fe6060f1SDimitry Andric case OMPRTL___kmpc_is_spmd_exec_mode: 4694349cc55cSDimitry Andric case OMPRTL___kmpc_distribute_static_fini: 4695fe6060f1SDimitry Andric case OMPRTL___kmpc_for_static_fini: 4696fe6060f1SDimitry Andric case OMPRTL___kmpc_global_thread_num: 4697fe6060f1SDimitry Andric case OMPRTL___kmpc_get_hardware_num_threads_in_block: 4698fe6060f1SDimitry Andric case OMPRTL___kmpc_get_hardware_num_blocks: 4699fe6060f1SDimitry Andric case OMPRTL___kmpc_single: 4700fe6060f1SDimitry Andric case OMPRTL___kmpc_end_single: 4701fe6060f1SDimitry Andric case OMPRTL___kmpc_master: 4702fe6060f1SDimitry Andric case OMPRTL___kmpc_end_master: 4703fe6060f1SDimitry Andric case OMPRTL___kmpc_barrier: 47040eae32dcSDimitry Andric case OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2: 47050eae32dcSDimitry Andric case OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2: 47060eae32dcSDimitry Andric case OMPRTL___kmpc_nvptx_end_reduce_nowait: 4707fe6060f1SDimitry Andric break; 4708349cc55cSDimitry Andric case OMPRTL___kmpc_distribute_static_init_4: 4709349cc55cSDimitry Andric case OMPRTL___kmpc_distribute_static_init_4u: 4710349cc55cSDimitry Andric case OMPRTL___kmpc_distribute_static_init_8: 4711349cc55cSDimitry Andric case OMPRTL___kmpc_distribute_static_init_8u: 4712fe6060f1SDimitry Andric case OMPRTL___kmpc_for_static_init_4: 4713fe6060f1SDimitry Andric case OMPRTL___kmpc_for_static_init_4u: 4714fe6060f1SDimitry Andric case OMPRTL___kmpc_for_static_init_8: 4715fe6060f1SDimitry Andric case OMPRTL___kmpc_for_static_init_8u: { 4716fe6060f1SDimitry Andric // Check the schedule and allow static schedule in SPMD mode. 4717fe6060f1SDimitry Andric unsigned ScheduleArgOpNo = 2; 4718fe6060f1SDimitry Andric auto *ScheduleTypeCI = 4719fe6060f1SDimitry Andric dyn_cast<ConstantInt>(CB.getArgOperand(ScheduleArgOpNo)); 4720fe6060f1SDimitry Andric unsigned ScheduleTypeVal = 4721fe6060f1SDimitry Andric ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0; 4722fe6060f1SDimitry Andric switch (OMPScheduleType(ScheduleTypeVal)) { 472381ad6265SDimitry Andric case OMPScheduleType::UnorderedStatic: 472481ad6265SDimitry Andric case OMPScheduleType::UnorderedStaticChunked: 472581ad6265SDimitry Andric case OMPScheduleType::OrderedDistribute: 472681ad6265SDimitry Andric case OMPScheduleType::OrderedDistributeChunked: 4727fe6060f1SDimitry Andric break; 4728fe6060f1SDimitry Andric default: 4729349cc55cSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 4730fe6060f1SDimitry Andric SPMDCompatibilityTracker.insert(&CB); 4731fe6060f1SDimitry Andric break; 4732fe6060f1SDimitry Andric }; 4733fe6060f1SDimitry Andric } break; 4734fe6060f1SDimitry Andric case OMPRTL___kmpc_target_init: 4735fe6060f1SDimitry Andric KernelInitCB = &CB; 4736fe6060f1SDimitry Andric break; 4737fe6060f1SDimitry Andric case OMPRTL___kmpc_target_deinit: 4738fe6060f1SDimitry Andric KernelDeinitCB = &CB; 4739fe6060f1SDimitry Andric break; 4740fe6060f1SDimitry Andric case OMPRTL___kmpc_parallel_51: 4741fe6060f1SDimitry Andric if (auto *ParallelRegion = dyn_cast<Function>( 4742fe6060f1SDimitry Andric CB.getArgOperand(WrapperFunctionArgNo)->stripPointerCasts())) { 4743fe6060f1SDimitry Andric ReachedKnownParallelRegions.insert(ParallelRegion); 4744bdd1243dSDimitry Andric /// Check nested parallelism 4745*06c3fb27SDimitry Andric auto *FnAA = A.getAAFor<AAKernelInfo>( 4746bdd1243dSDimitry Andric *this, IRPosition::function(*ParallelRegion), DepClassTy::OPTIONAL); 4747*06c3fb27SDimitry Andric NestedParallelism |= !FnAA || !FnAA->getState().isValidState() || 4748*06c3fb27SDimitry Andric !FnAA->ReachedKnownParallelRegions.empty() || 4749*06c3fb27SDimitry Andric !FnAA->ReachedUnknownParallelRegions.empty(); 4750fe6060f1SDimitry Andric break; 4751fe6060f1SDimitry Andric } 4752fe6060f1SDimitry Andric // The condition above should usually get the parallel region function 4753fe6060f1SDimitry Andric // pointer and record it. In the off chance it doesn't we assume the 4754fe6060f1SDimitry Andric // worst. 4755fe6060f1SDimitry Andric ReachedUnknownParallelRegions.insert(&CB); 4756fe6060f1SDimitry Andric break; 4757fe6060f1SDimitry Andric case OMPRTL___kmpc_omp_task: 4758fe6060f1SDimitry Andric // We do not look into tasks right now, just give up. 47590eae32dcSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 4760fe6060f1SDimitry Andric SPMDCompatibilityTracker.insert(&CB); 4761fe6060f1SDimitry Andric ReachedUnknownParallelRegions.insert(&CB); 4762fe6060f1SDimitry Andric break; 4763fe6060f1SDimitry Andric case OMPRTL___kmpc_alloc_shared: 4764fe6060f1SDimitry Andric case OMPRTL___kmpc_free_shared: 4765fe6060f1SDimitry Andric // Return without setting a fixpoint, to be resolved in updateImpl. 4766fe6060f1SDimitry Andric return; 4767fe6060f1SDimitry Andric default: 4768fe6060f1SDimitry Andric // Unknown OpenMP runtime calls cannot be executed in SPMD-mode, 4769349cc55cSDimitry Andric // generally. However, they do not hide parallel regions. 47700eae32dcSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 4771fe6060f1SDimitry Andric SPMDCompatibilityTracker.insert(&CB); 4772fe6060f1SDimitry Andric break; 4773fe6060f1SDimitry Andric } 4774fe6060f1SDimitry Andric // All other OpenMP runtime calls will not reach parallel regions so they 4775fe6060f1SDimitry Andric // can be safely ignored for now. Since it is a known OpenMP runtime call we 4776fe6060f1SDimitry Andric // have now modeled all effects and there is no need for any update. 4777fe6060f1SDimitry Andric indicateOptimisticFixpoint(); 4778fe6060f1SDimitry Andric } 4779fe6060f1SDimitry Andric 4780fe6060f1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 4781fe6060f1SDimitry Andric // TODO: Once we have call site specific value information we can provide 4782fe6060f1SDimitry Andric // call site specific liveness information and then it makes 4783fe6060f1SDimitry Andric // sense to specialize attributes for call sites arguments instead of 4784fe6060f1SDimitry Andric // redirecting requests to the callee argument. 4785fe6060f1SDimitry Andric Function *F = getAssociatedFunction(); 4786fe6060f1SDimitry Andric 4787fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 4788fe6060f1SDimitry Andric const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(F); 4789fe6060f1SDimitry Andric 4790fe6060f1SDimitry Andric // If F is not a runtime function, propagate the AAKernelInfo of the callee. 4791fe6060f1SDimitry Andric if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) { 4792fe6060f1SDimitry Andric const IRPosition &FnPos = IRPosition::function(*F); 4793*06c3fb27SDimitry Andric auto *FnAA = A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED); 4794*06c3fb27SDimitry Andric if (!FnAA) 4795*06c3fb27SDimitry Andric return indicatePessimisticFixpoint(); 4796*06c3fb27SDimitry Andric if (getState() == FnAA->getState()) 4797fe6060f1SDimitry Andric return ChangeStatus::UNCHANGED; 4798*06c3fb27SDimitry Andric getState() = FnAA->getState(); 4799fe6060f1SDimitry Andric return ChangeStatus::CHANGED; 4800fe6060f1SDimitry Andric } 4801fe6060f1SDimitry Andric 4802fe6060f1SDimitry Andric // F is a runtime function that allocates or frees memory, check 4803fe6060f1SDimitry Andric // AAHeapToStack and AAHeapToShared. 4804fe6060f1SDimitry Andric KernelInfoState StateBefore = getState(); 4805fe6060f1SDimitry Andric assert((It->getSecond() == OMPRTL___kmpc_alloc_shared || 4806fe6060f1SDimitry Andric It->getSecond() == OMPRTL___kmpc_free_shared) && 4807fe6060f1SDimitry Andric "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call"); 4808fe6060f1SDimitry Andric 4809fe6060f1SDimitry Andric CallBase &CB = cast<CallBase>(getAssociatedValue()); 4810fe6060f1SDimitry Andric 4811*06c3fb27SDimitry Andric auto *HeapToStackAA = A.getAAFor<AAHeapToStack>( 4812fe6060f1SDimitry Andric *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL); 4813*06c3fb27SDimitry Andric auto *HeapToSharedAA = A.getAAFor<AAHeapToShared>( 4814fe6060f1SDimitry Andric *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL); 4815fe6060f1SDimitry Andric 4816fe6060f1SDimitry Andric RuntimeFunction RF = It->getSecond(); 4817fe6060f1SDimitry Andric 4818fe6060f1SDimitry Andric switch (RF) { 4819fe6060f1SDimitry Andric // If neither HeapToStack nor HeapToShared assume the call is removed, 4820fe6060f1SDimitry Andric // assume SPMD incompatibility. 4821fe6060f1SDimitry Andric case OMPRTL___kmpc_alloc_shared: 4822*06c3fb27SDimitry Andric if ((!HeapToStackAA || !HeapToStackAA->isAssumedHeapToStack(CB)) && 4823*06c3fb27SDimitry Andric (!HeapToSharedAA || !HeapToSharedAA->isAssumedHeapToShared(CB))) 4824fe6060f1SDimitry Andric SPMDCompatibilityTracker.insert(&CB); 4825fe6060f1SDimitry Andric break; 4826fe6060f1SDimitry Andric case OMPRTL___kmpc_free_shared: 4827*06c3fb27SDimitry Andric if ((!HeapToStackAA || 4828*06c3fb27SDimitry Andric !HeapToStackAA->isAssumedHeapToStackRemovedFree(CB)) && 4829*06c3fb27SDimitry Andric (!HeapToSharedAA || 4830*06c3fb27SDimitry Andric !HeapToSharedAA->isAssumedHeapToSharedRemovedFree(CB))) 4831fe6060f1SDimitry Andric SPMDCompatibilityTracker.insert(&CB); 4832fe6060f1SDimitry Andric break; 4833fe6060f1SDimitry Andric default: 48340eae32dcSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 4835fe6060f1SDimitry Andric SPMDCompatibilityTracker.insert(&CB); 4836fe6060f1SDimitry Andric } 4837fe6060f1SDimitry Andric 4838fe6060f1SDimitry Andric return StateBefore == getState() ? ChangeStatus::UNCHANGED 4839fe6060f1SDimitry Andric : ChangeStatus::CHANGED; 4840fe6060f1SDimitry Andric } 4841fe6060f1SDimitry Andric }; 4842fe6060f1SDimitry Andric 4843fe6060f1SDimitry Andric struct AAFoldRuntimeCall 4844fe6060f1SDimitry Andric : public StateWrapper<BooleanState, AbstractAttribute> { 4845fe6060f1SDimitry Andric using Base = StateWrapper<BooleanState, AbstractAttribute>; 4846fe6060f1SDimitry Andric 4847fe6060f1SDimitry Andric AAFoldRuntimeCall(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 4848fe6060f1SDimitry Andric 4849fe6060f1SDimitry Andric /// Statistics are tracked as part of manifest for now. 4850fe6060f1SDimitry Andric void trackStatistics() const override {} 4851fe6060f1SDimitry Andric 4852fe6060f1SDimitry Andric /// Create an abstract attribute biew for the position \p IRP. 4853fe6060f1SDimitry Andric static AAFoldRuntimeCall &createForPosition(const IRPosition &IRP, 4854fe6060f1SDimitry Andric Attributor &A); 4855fe6060f1SDimitry Andric 4856fe6060f1SDimitry Andric /// See AbstractAttribute::getName() 4857fe6060f1SDimitry Andric const std::string getName() const override { return "AAFoldRuntimeCall"; } 4858fe6060f1SDimitry Andric 4859fe6060f1SDimitry Andric /// See AbstractAttribute::getIdAddr() 4860fe6060f1SDimitry Andric const char *getIdAddr() const override { return &ID; } 4861fe6060f1SDimitry Andric 4862fe6060f1SDimitry Andric /// This function should return true if the type of the \p AA is 4863fe6060f1SDimitry Andric /// AAFoldRuntimeCall 4864fe6060f1SDimitry Andric static bool classof(const AbstractAttribute *AA) { 4865fe6060f1SDimitry Andric return (AA->getIdAddr() == &ID); 4866fe6060f1SDimitry Andric } 4867fe6060f1SDimitry Andric 4868fe6060f1SDimitry Andric static const char ID; 4869fe6060f1SDimitry Andric }; 4870fe6060f1SDimitry Andric 4871fe6060f1SDimitry Andric struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall { 4872fe6060f1SDimitry Andric AAFoldRuntimeCallCallSiteReturned(const IRPosition &IRP, Attributor &A) 4873fe6060f1SDimitry Andric : AAFoldRuntimeCall(IRP, A) {} 4874fe6060f1SDimitry Andric 4875fe6060f1SDimitry Andric /// See AbstractAttribute::getAsStr() 4876*06c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 4877fe6060f1SDimitry Andric if (!isValidState()) 4878fe6060f1SDimitry Andric return "<invalid>"; 4879fe6060f1SDimitry Andric 4880fe6060f1SDimitry Andric std::string Str("simplified value: "); 4881fe6060f1SDimitry Andric 488281ad6265SDimitry Andric if (!SimplifiedValue) 4883fe6060f1SDimitry Andric return Str + std::string("none"); 4884fe6060f1SDimitry Andric 4885bdd1243dSDimitry Andric if (!*SimplifiedValue) 4886fe6060f1SDimitry Andric return Str + std::string("nullptr"); 4887fe6060f1SDimitry Andric 4888bdd1243dSDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(*SimplifiedValue)) 4889fe6060f1SDimitry Andric return Str + std::to_string(CI->getSExtValue()); 4890fe6060f1SDimitry Andric 4891fe6060f1SDimitry Andric return Str + std::string("unknown"); 4892fe6060f1SDimitry Andric } 4893fe6060f1SDimitry Andric 4894fe6060f1SDimitry Andric void initialize(Attributor &A) override { 4895349cc55cSDimitry Andric if (DisableOpenMPOptFolding) 4896349cc55cSDimitry Andric indicatePessimisticFixpoint(); 4897349cc55cSDimitry Andric 4898fe6060f1SDimitry Andric Function *Callee = getAssociatedFunction(); 4899fe6060f1SDimitry Andric 4900fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 4901fe6060f1SDimitry Andric const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee); 4902fe6060f1SDimitry Andric assert(It != OMPInfoCache.RuntimeFunctionIDMap.end() && 4903fe6060f1SDimitry Andric "Expected a known OpenMP runtime function"); 4904fe6060f1SDimitry Andric 4905fe6060f1SDimitry Andric RFKind = It->getSecond(); 4906fe6060f1SDimitry Andric 4907fe6060f1SDimitry Andric CallBase &CB = cast<CallBase>(getAssociatedValue()); 4908fe6060f1SDimitry Andric A.registerSimplificationCallback( 4909fe6060f1SDimitry Andric IRPosition::callsite_returned(CB), 4910fe6060f1SDimitry Andric [&](const IRPosition &IRP, const AbstractAttribute *AA, 4911bdd1243dSDimitry Andric bool &UsedAssumedInformation) -> std::optional<Value *> { 491281ad6265SDimitry Andric assert((isValidState() || 4913bdd1243dSDimitry Andric (SimplifiedValue && *SimplifiedValue == nullptr)) && 4914fe6060f1SDimitry Andric "Unexpected invalid state!"); 4915fe6060f1SDimitry Andric 4916fe6060f1SDimitry Andric if (!isAtFixpoint()) { 4917fe6060f1SDimitry Andric UsedAssumedInformation = true; 4918fe6060f1SDimitry Andric if (AA) 4919fe6060f1SDimitry Andric A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); 4920fe6060f1SDimitry Andric } 4921fe6060f1SDimitry Andric return SimplifiedValue; 4922fe6060f1SDimitry Andric }); 4923fe6060f1SDimitry Andric } 4924fe6060f1SDimitry Andric 4925fe6060f1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 4926fe6060f1SDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED; 4927fe6060f1SDimitry Andric switch (RFKind) { 4928fe6060f1SDimitry Andric case OMPRTL___kmpc_is_spmd_exec_mode: 4929fe6060f1SDimitry Andric Changed |= foldIsSPMDExecMode(A); 4930fe6060f1SDimitry Andric break; 4931fe6060f1SDimitry Andric case OMPRTL___kmpc_parallel_level: 4932fe6060f1SDimitry Andric Changed |= foldParallelLevel(A); 4933fe6060f1SDimitry Andric break; 4934fe6060f1SDimitry Andric case OMPRTL___kmpc_get_hardware_num_threads_in_block: 4935fe6060f1SDimitry Andric Changed = Changed | foldKernelFnAttribute(A, "omp_target_thread_limit"); 4936fe6060f1SDimitry Andric break; 4937fe6060f1SDimitry Andric case OMPRTL___kmpc_get_hardware_num_blocks: 4938fe6060f1SDimitry Andric Changed = Changed | foldKernelFnAttribute(A, "omp_target_num_teams"); 4939fe6060f1SDimitry Andric break; 4940fe6060f1SDimitry Andric default: 4941fe6060f1SDimitry Andric llvm_unreachable("Unhandled OpenMP runtime function!"); 4942fe6060f1SDimitry Andric } 4943fe6060f1SDimitry Andric 4944fe6060f1SDimitry Andric return Changed; 4945fe6060f1SDimitry Andric } 4946fe6060f1SDimitry Andric 4947fe6060f1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 4948fe6060f1SDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED; 4949fe6060f1SDimitry Andric 495081ad6265SDimitry Andric if (SimplifiedValue && *SimplifiedValue) { 4951349cc55cSDimitry Andric Instruction &I = *getCtxI(); 495281ad6265SDimitry Andric A.changeAfterManifest(IRPosition::inst(I), **SimplifiedValue); 4953349cc55cSDimitry Andric A.deleteAfterManifest(I); 4954fe6060f1SDimitry Andric 4955349cc55cSDimitry Andric CallBase *CB = dyn_cast<CallBase>(&I); 4956349cc55cSDimitry Andric auto Remark = [&](OptimizationRemark OR) { 4957349cc55cSDimitry Andric if (auto *C = dyn_cast<ConstantInt>(*SimplifiedValue)) 4958349cc55cSDimitry Andric return OR << "Replacing OpenMP runtime call " 4959349cc55cSDimitry Andric << CB->getCalledFunction()->getName() << " with " 4960349cc55cSDimitry Andric << ore::NV("FoldedValue", C->getZExtValue()) << "."; 4961349cc55cSDimitry Andric return OR << "Replacing OpenMP runtime call " 4962349cc55cSDimitry Andric << CB->getCalledFunction()->getName() << "."; 4963349cc55cSDimitry Andric }; 4964349cc55cSDimitry Andric 4965349cc55cSDimitry Andric if (CB && EnableVerboseRemarks) 4966349cc55cSDimitry Andric A.emitRemark<OptimizationRemark>(CB, "OMP180", Remark); 4967349cc55cSDimitry Andric 4968349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Replacing runtime call: " << I << " with " 4969fe6060f1SDimitry Andric << **SimplifiedValue << "\n"); 4970fe6060f1SDimitry Andric 4971fe6060f1SDimitry Andric Changed = ChangeStatus::CHANGED; 4972fe6060f1SDimitry Andric } 4973fe6060f1SDimitry Andric 4974fe6060f1SDimitry Andric return Changed; 4975fe6060f1SDimitry Andric } 4976fe6060f1SDimitry Andric 4977fe6060f1SDimitry Andric ChangeStatus indicatePessimisticFixpoint() override { 4978fe6060f1SDimitry Andric SimplifiedValue = nullptr; 4979fe6060f1SDimitry Andric return AAFoldRuntimeCall::indicatePessimisticFixpoint(); 4980fe6060f1SDimitry Andric } 4981fe6060f1SDimitry Andric 4982fe6060f1SDimitry Andric private: 4983fe6060f1SDimitry Andric /// Fold __kmpc_is_spmd_exec_mode into a constant if possible. 4984fe6060f1SDimitry Andric ChangeStatus foldIsSPMDExecMode(Attributor &A) { 4985bdd1243dSDimitry Andric std::optional<Value *> SimplifiedValueBefore = SimplifiedValue; 4986fe6060f1SDimitry Andric 4987fe6060f1SDimitry Andric unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0; 4988fe6060f1SDimitry Andric unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0; 4989*06c3fb27SDimitry Andric auto *CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( 4990fe6060f1SDimitry Andric *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); 4991fe6060f1SDimitry Andric 4992*06c3fb27SDimitry Andric if (!CallerKernelInfoAA || 4993*06c3fb27SDimitry Andric !CallerKernelInfoAA->ReachingKernelEntries.isValidState()) 4994fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 4995fe6060f1SDimitry Andric 4996*06c3fb27SDimitry Andric for (Kernel K : CallerKernelInfoAA->ReachingKernelEntries) { 4997*06c3fb27SDimitry Andric auto *AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K), 4998fe6060f1SDimitry Andric DepClassTy::REQUIRED); 4999fe6060f1SDimitry Andric 5000*06c3fb27SDimitry Andric if (!AA || !AA->isValidState()) { 5001fe6060f1SDimitry Andric SimplifiedValue = nullptr; 5002fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5003fe6060f1SDimitry Andric } 5004fe6060f1SDimitry Andric 5005*06c3fb27SDimitry Andric if (AA->SPMDCompatibilityTracker.isAssumed()) { 5006*06c3fb27SDimitry Andric if (AA->SPMDCompatibilityTracker.isAtFixpoint()) 5007fe6060f1SDimitry Andric ++KnownSPMDCount; 5008fe6060f1SDimitry Andric else 5009fe6060f1SDimitry Andric ++AssumedSPMDCount; 5010fe6060f1SDimitry Andric } else { 5011*06c3fb27SDimitry Andric if (AA->SPMDCompatibilityTracker.isAtFixpoint()) 5012fe6060f1SDimitry Andric ++KnownNonSPMDCount; 5013fe6060f1SDimitry Andric else 5014fe6060f1SDimitry Andric ++AssumedNonSPMDCount; 5015fe6060f1SDimitry Andric } 5016fe6060f1SDimitry Andric } 5017fe6060f1SDimitry Andric 5018fe6060f1SDimitry Andric if ((AssumedSPMDCount + KnownSPMDCount) && 5019fe6060f1SDimitry Andric (AssumedNonSPMDCount + KnownNonSPMDCount)) 5020fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5021fe6060f1SDimitry Andric 5022fe6060f1SDimitry Andric auto &Ctx = getAnchorValue().getContext(); 5023fe6060f1SDimitry Andric if (KnownSPMDCount || AssumedSPMDCount) { 5024fe6060f1SDimitry Andric assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && 5025fe6060f1SDimitry Andric "Expected only SPMD kernels!"); 5026fe6060f1SDimitry Andric // All reaching kernels are in SPMD mode. Update all function calls to 5027fe6060f1SDimitry Andric // __kmpc_is_spmd_exec_mode to 1. 5028fe6060f1SDimitry Andric SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true); 5029fe6060f1SDimitry Andric } else if (KnownNonSPMDCount || AssumedNonSPMDCount) { 5030fe6060f1SDimitry Andric assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 && 5031fe6060f1SDimitry Andric "Expected only non-SPMD kernels!"); 5032fe6060f1SDimitry Andric // All reaching kernels are in non-SPMD mode. Update all function 5033fe6060f1SDimitry Andric // calls to __kmpc_is_spmd_exec_mode to 0. 5034fe6060f1SDimitry Andric SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), false); 5035fe6060f1SDimitry Andric } else { 5036fe6060f1SDimitry Andric // We have empty reaching kernels, therefore we cannot tell if the 5037fe6060f1SDimitry Andric // associated call site can be folded. At this moment, SimplifiedValue 5038fe6060f1SDimitry Andric // must be none. 503981ad6265SDimitry Andric assert(!SimplifiedValue && "SimplifiedValue should be none"); 5040fe6060f1SDimitry Andric } 5041fe6060f1SDimitry Andric 5042fe6060f1SDimitry Andric return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED 5043fe6060f1SDimitry Andric : ChangeStatus::CHANGED; 5044fe6060f1SDimitry Andric } 5045fe6060f1SDimitry Andric 5046fe6060f1SDimitry Andric /// Fold __kmpc_parallel_level into a constant if possible. 5047fe6060f1SDimitry Andric ChangeStatus foldParallelLevel(Attributor &A) { 5048bdd1243dSDimitry Andric std::optional<Value *> SimplifiedValueBefore = SimplifiedValue; 5049fe6060f1SDimitry Andric 5050*06c3fb27SDimitry Andric auto *CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( 5051fe6060f1SDimitry Andric *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); 5052fe6060f1SDimitry Andric 5053*06c3fb27SDimitry Andric if (!CallerKernelInfoAA || 5054*06c3fb27SDimitry Andric !CallerKernelInfoAA->ParallelLevels.isValidState()) 5055fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5056fe6060f1SDimitry Andric 5057*06c3fb27SDimitry Andric if (!CallerKernelInfoAA->ReachingKernelEntries.isValidState()) 5058fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5059fe6060f1SDimitry Andric 5060*06c3fb27SDimitry Andric if (CallerKernelInfoAA->ReachingKernelEntries.empty()) { 506181ad6265SDimitry Andric assert(!SimplifiedValue && 5062fe6060f1SDimitry Andric "SimplifiedValue should keep none at this point"); 5063fe6060f1SDimitry Andric return ChangeStatus::UNCHANGED; 5064fe6060f1SDimitry Andric } 5065fe6060f1SDimitry Andric 5066fe6060f1SDimitry Andric unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0; 5067fe6060f1SDimitry Andric unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0; 5068*06c3fb27SDimitry Andric for (Kernel K : CallerKernelInfoAA->ReachingKernelEntries) { 5069*06c3fb27SDimitry Andric auto *AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K), 5070fe6060f1SDimitry Andric DepClassTy::REQUIRED); 5071*06c3fb27SDimitry Andric if (!AA || !AA->SPMDCompatibilityTracker.isValidState()) 5072fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5073fe6060f1SDimitry Andric 5074*06c3fb27SDimitry Andric if (AA->SPMDCompatibilityTracker.isAssumed()) { 5075*06c3fb27SDimitry Andric if (AA->SPMDCompatibilityTracker.isAtFixpoint()) 5076fe6060f1SDimitry Andric ++KnownSPMDCount; 5077fe6060f1SDimitry Andric else 5078fe6060f1SDimitry Andric ++AssumedSPMDCount; 5079fe6060f1SDimitry Andric } else { 5080*06c3fb27SDimitry Andric if (AA->SPMDCompatibilityTracker.isAtFixpoint()) 5081fe6060f1SDimitry Andric ++KnownNonSPMDCount; 5082fe6060f1SDimitry Andric else 5083fe6060f1SDimitry Andric ++AssumedNonSPMDCount; 5084fe6060f1SDimitry Andric } 5085fe6060f1SDimitry Andric } 5086fe6060f1SDimitry Andric 5087fe6060f1SDimitry Andric if ((AssumedSPMDCount + KnownSPMDCount) && 5088fe6060f1SDimitry Andric (AssumedNonSPMDCount + KnownNonSPMDCount)) 5089fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5090fe6060f1SDimitry Andric 5091fe6060f1SDimitry Andric auto &Ctx = getAnchorValue().getContext(); 5092fe6060f1SDimitry Andric // If the caller can only be reached by SPMD kernel entries, the parallel 5093fe6060f1SDimitry Andric // level is 1. Similarly, if the caller can only be reached by non-SPMD 5094fe6060f1SDimitry Andric // kernel entries, it is 0. 5095fe6060f1SDimitry Andric if (AssumedSPMDCount || KnownSPMDCount) { 5096fe6060f1SDimitry Andric assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && 5097fe6060f1SDimitry Andric "Expected only SPMD kernels!"); 5098fe6060f1SDimitry Andric SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1); 5099fe6060f1SDimitry Andric } else { 5100fe6060f1SDimitry Andric assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 && 5101fe6060f1SDimitry Andric "Expected only non-SPMD kernels!"); 5102fe6060f1SDimitry Andric SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 0); 5103fe6060f1SDimitry Andric } 5104fe6060f1SDimitry Andric return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED 5105fe6060f1SDimitry Andric : ChangeStatus::CHANGED; 5106fe6060f1SDimitry Andric } 5107fe6060f1SDimitry Andric 5108fe6060f1SDimitry Andric ChangeStatus foldKernelFnAttribute(Attributor &A, llvm::StringRef Attr) { 5109fe6060f1SDimitry Andric // Specialize only if all the calls agree with the attribute constant value 5110fe6060f1SDimitry Andric int32_t CurrentAttrValue = -1; 5111bdd1243dSDimitry Andric std::optional<Value *> SimplifiedValueBefore = SimplifiedValue; 5112fe6060f1SDimitry Andric 5113*06c3fb27SDimitry Andric auto *CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( 5114fe6060f1SDimitry Andric *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); 5115fe6060f1SDimitry Andric 5116*06c3fb27SDimitry Andric if (!CallerKernelInfoAA || 5117*06c3fb27SDimitry Andric !CallerKernelInfoAA->ReachingKernelEntries.isValidState()) 5118fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5119fe6060f1SDimitry Andric 5120fe6060f1SDimitry Andric // Iterate over the kernels that reach this function 5121*06c3fb27SDimitry Andric for (Kernel K : CallerKernelInfoAA->ReachingKernelEntries) { 5122bdd1243dSDimitry Andric int32_t NextAttrVal = K->getFnAttributeAsParsedInteger(Attr, -1); 5123fe6060f1SDimitry Andric 5124fe6060f1SDimitry Andric if (NextAttrVal == -1 || 5125fe6060f1SDimitry Andric (CurrentAttrValue != -1 && CurrentAttrValue != NextAttrVal)) 5126fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5127fe6060f1SDimitry Andric CurrentAttrValue = NextAttrVal; 5128fe6060f1SDimitry Andric } 5129fe6060f1SDimitry Andric 5130fe6060f1SDimitry Andric if (CurrentAttrValue != -1) { 5131fe6060f1SDimitry Andric auto &Ctx = getAnchorValue().getContext(); 5132fe6060f1SDimitry Andric SimplifiedValue = 5133fe6060f1SDimitry Andric ConstantInt::get(Type::getInt32Ty(Ctx), CurrentAttrValue); 5134fe6060f1SDimitry Andric } 5135fe6060f1SDimitry Andric return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED 5136fe6060f1SDimitry Andric : ChangeStatus::CHANGED; 5137fe6060f1SDimitry Andric } 5138fe6060f1SDimitry Andric 5139fe6060f1SDimitry Andric /// An optional value the associated value is assumed to fold to. That is, we 5140fe6060f1SDimitry Andric /// assume the associated value (which is a call) can be replaced by this 5141fe6060f1SDimitry Andric /// simplified value. 5142bdd1243dSDimitry Andric std::optional<Value *> SimplifiedValue; 5143fe6060f1SDimitry Andric 5144fe6060f1SDimitry Andric /// The runtime function kind of the callee of the associated call site. 5145fe6060f1SDimitry Andric RuntimeFunction RFKind; 5146fe6060f1SDimitry Andric }; 5147fe6060f1SDimitry Andric 51485ffd83dbSDimitry Andric } // namespace 51495ffd83dbSDimitry Andric 5150fe6060f1SDimitry Andric /// Register folding callsite 5151fe6060f1SDimitry Andric void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) { 5152fe6060f1SDimitry Andric auto &RFI = OMPInfoCache.RFIs[RF]; 5153fe6060f1SDimitry Andric RFI.foreachUse(SCC, [&](Use &U, Function &F) { 5154fe6060f1SDimitry Andric CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &RFI); 5155fe6060f1SDimitry Andric if (!CI) 5156fe6060f1SDimitry Andric return false; 5157fe6060f1SDimitry Andric A.getOrCreateAAFor<AAFoldRuntimeCall>( 5158fe6060f1SDimitry Andric IRPosition::callsite_returned(*CI), /* QueryingAA */ nullptr, 5159fe6060f1SDimitry Andric DepClassTy::NONE, /* ForceUpdate */ false, 5160fe6060f1SDimitry Andric /* UpdateAfterInit */ false); 5161fe6060f1SDimitry Andric return false; 5162fe6060f1SDimitry Andric }); 5163fe6060f1SDimitry Andric } 5164fe6060f1SDimitry Andric 5165fe6060f1SDimitry Andric void OpenMPOpt::registerAAs(bool IsModulePass) { 5166fe6060f1SDimitry Andric if (SCC.empty()) 5167fe6060f1SDimitry Andric return; 516881ad6265SDimitry Andric 5169fe6060f1SDimitry Andric if (IsModulePass) { 5170fe6060f1SDimitry Andric // Ensure we create the AAKernelInfo AAs first and without triggering an 5171fe6060f1SDimitry Andric // update. This will make sure we register all value simplification 5172fe6060f1SDimitry Andric // callbacks before any other AA has the chance to create an AAValueSimplify 5173fe6060f1SDimitry Andric // or similar. 517481ad6265SDimitry Andric auto CreateKernelInfoCB = [&](Use &, Function &Kernel) { 5175fe6060f1SDimitry Andric A.getOrCreateAAFor<AAKernelInfo>( 517681ad6265SDimitry Andric IRPosition::function(Kernel), /* QueryingAA */ nullptr, 5177fe6060f1SDimitry Andric DepClassTy::NONE, /* ForceUpdate */ false, 5178fe6060f1SDimitry Andric /* UpdateAfterInit */ false); 517981ad6265SDimitry Andric return false; 518081ad6265SDimitry Andric }; 518181ad6265SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &InitRFI = 518281ad6265SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; 518381ad6265SDimitry Andric InitRFI.foreachUse(SCC, CreateKernelInfoCB); 5184fe6060f1SDimitry Andric 5185fe6060f1SDimitry Andric registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode); 5186fe6060f1SDimitry Andric registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level); 5187fe6060f1SDimitry Andric registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_threads_in_block); 5188fe6060f1SDimitry Andric registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_blocks); 5189fe6060f1SDimitry Andric } 5190fe6060f1SDimitry Andric 5191fe6060f1SDimitry Andric // Create CallSite AA for all Getters. 5192bdd1243dSDimitry Andric if (DeduceICVValues) { 5193fe6060f1SDimitry Andric for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) { 5194fe6060f1SDimitry Andric auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)]; 5195fe6060f1SDimitry Andric 5196fe6060f1SDimitry Andric auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter]; 5197fe6060f1SDimitry Andric 5198fe6060f1SDimitry Andric auto CreateAA = [&](Use &U, Function &Caller) { 5199fe6060f1SDimitry Andric CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI); 5200fe6060f1SDimitry Andric if (!CI) 5201fe6060f1SDimitry Andric return false; 5202fe6060f1SDimitry Andric 5203fe6060f1SDimitry Andric auto &CB = cast<CallBase>(*CI); 5204fe6060f1SDimitry Andric 5205fe6060f1SDimitry Andric IRPosition CBPos = IRPosition::callsite_function(CB); 5206fe6060f1SDimitry Andric A.getOrCreateAAFor<AAICVTracker>(CBPos); 5207fe6060f1SDimitry Andric return false; 5208fe6060f1SDimitry Andric }; 5209fe6060f1SDimitry Andric 5210fe6060f1SDimitry Andric GetterRFI.foreachUse(SCC, CreateAA); 5211fe6060f1SDimitry Andric } 5212bdd1243dSDimitry Andric } 5213fe6060f1SDimitry Andric 5214fe6060f1SDimitry Andric // Create an ExecutionDomain AA for every function and a HeapToStack AA for 5215fe6060f1SDimitry Andric // every function if there is a device kernel. 5216fe6060f1SDimitry Andric if (!isOpenMPDevice(M)) 5217fe6060f1SDimitry Andric return; 5218fe6060f1SDimitry Andric 5219fe6060f1SDimitry Andric for (auto *F : SCC) { 5220fe6060f1SDimitry Andric if (F->isDeclaration()) 5221fe6060f1SDimitry Andric continue; 5222fe6060f1SDimitry Andric 5223bdd1243dSDimitry Andric // We look at internal functions only on-demand but if any use is not a 5224bdd1243dSDimitry Andric // direct call or outside the current set of analyzed functions, we have 5225bdd1243dSDimitry Andric // to do it eagerly. 5226bdd1243dSDimitry Andric if (F->hasLocalLinkage()) { 5227bdd1243dSDimitry Andric if (llvm::all_of(F->uses(), [this](const Use &U) { 5228bdd1243dSDimitry Andric const auto *CB = dyn_cast<CallBase>(U.getUser()); 5229bdd1243dSDimitry Andric return CB && CB->isCallee(&U) && 5230bdd1243dSDimitry Andric A.isRunOn(const_cast<Function *>(CB->getCaller())); 5231bdd1243dSDimitry Andric })) 5232bdd1243dSDimitry Andric continue; 5233bdd1243dSDimitry Andric } 5234bdd1243dSDimitry Andric registerAAsForFunction(A, *F); 5235bdd1243dSDimitry Andric } 5236bdd1243dSDimitry Andric } 5237fe6060f1SDimitry Andric 5238bdd1243dSDimitry Andric void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) { 5239bdd1243dSDimitry Andric if (!DisableOpenMPOptDeglobalization) 5240bdd1243dSDimitry Andric A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F)); 5241bdd1243dSDimitry Andric A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(F)); 5242bdd1243dSDimitry Andric if (!DisableOpenMPOptDeglobalization) 5243bdd1243dSDimitry Andric A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(F)); 5244*06c3fb27SDimitry Andric if (F.hasFnAttribute(Attribute::Convergent)) 5245*06c3fb27SDimitry Andric A.getOrCreateAAFor<AANonConvergent>(IRPosition::function(F)); 5246bdd1243dSDimitry Andric 5247bdd1243dSDimitry Andric for (auto &I : instructions(F)) { 5248fe6060f1SDimitry Andric if (auto *LI = dyn_cast<LoadInst>(&I)) { 5249fe6060f1SDimitry Andric bool UsedAssumedInformation = false; 5250fe6060f1SDimitry Andric A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr, 5251fcaf7f86SDimitry Andric UsedAssumedInformation, AA::Interprocedural); 5252bdd1243dSDimitry Andric continue; 5253bdd1243dSDimitry Andric } 5254bdd1243dSDimitry Andric if (auto *SI = dyn_cast<StoreInst>(&I)) { 525504eeddc0SDimitry Andric A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI)); 5256bdd1243dSDimitry Andric continue; 5257bdd1243dSDimitry Andric } 5258*06c3fb27SDimitry Andric if (auto *FI = dyn_cast<FenceInst>(&I)) { 5259*06c3fb27SDimitry Andric A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*FI)); 5260*06c3fb27SDimitry Andric continue; 5261*06c3fb27SDimitry Andric } 5262bdd1243dSDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(&I)) { 5263bdd1243dSDimitry Andric if (II->getIntrinsicID() == Intrinsic::assume) { 5264bdd1243dSDimitry Andric A.getOrCreateAAFor<AAPotentialValues>( 5265bdd1243dSDimitry Andric IRPosition::value(*II->getArgOperand(0))); 5266bdd1243dSDimitry Andric continue; 5267fe6060f1SDimitry Andric } 5268fe6060f1SDimitry Andric } 5269fe6060f1SDimitry Andric } 5270fe6060f1SDimitry Andric } 5271fe6060f1SDimitry Andric 52725ffd83dbSDimitry Andric const char AAICVTracker::ID = 0; 5273fe6060f1SDimitry Andric const char AAKernelInfo::ID = 0; 5274fe6060f1SDimitry Andric const char AAExecutionDomain::ID = 0; 5275fe6060f1SDimitry Andric const char AAHeapToShared::ID = 0; 5276fe6060f1SDimitry Andric const char AAFoldRuntimeCall::ID = 0; 52775ffd83dbSDimitry Andric 52785ffd83dbSDimitry Andric AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP, 52795ffd83dbSDimitry Andric Attributor &A) { 52805ffd83dbSDimitry Andric AAICVTracker *AA = nullptr; 52815ffd83dbSDimitry Andric switch (IRP.getPositionKind()) { 52825ffd83dbSDimitry Andric case IRPosition::IRP_INVALID: 52835ffd83dbSDimitry Andric case IRPosition::IRP_FLOAT: 52845ffd83dbSDimitry Andric case IRPosition::IRP_ARGUMENT: 52855ffd83dbSDimitry Andric case IRPosition::IRP_CALL_SITE_ARGUMENT: 52865ffd83dbSDimitry Andric llvm_unreachable("ICVTracker can only be created for function position!"); 5287e8d8bef9SDimitry Andric case IRPosition::IRP_RETURNED: 5288e8d8bef9SDimitry Andric AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A); 5289e8d8bef9SDimitry Andric break; 5290e8d8bef9SDimitry Andric case IRPosition::IRP_CALL_SITE_RETURNED: 5291e8d8bef9SDimitry Andric AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A); 5292e8d8bef9SDimitry Andric break; 5293e8d8bef9SDimitry Andric case IRPosition::IRP_CALL_SITE: 5294e8d8bef9SDimitry Andric AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A); 5295e8d8bef9SDimitry Andric break; 52965ffd83dbSDimitry Andric case IRPosition::IRP_FUNCTION: 52975ffd83dbSDimitry Andric AA = new (A.Allocator) AAICVTrackerFunction(IRP, A); 52985ffd83dbSDimitry Andric break; 52995ffd83dbSDimitry Andric } 53005ffd83dbSDimitry Andric 53015ffd83dbSDimitry Andric return *AA; 53025ffd83dbSDimitry Andric } 53035ffd83dbSDimitry Andric 5304fe6060f1SDimitry Andric AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP, 5305fe6060f1SDimitry Andric Attributor &A) { 5306fe6060f1SDimitry Andric AAExecutionDomainFunction *AA = nullptr; 5307fe6060f1SDimitry Andric switch (IRP.getPositionKind()) { 5308fe6060f1SDimitry Andric case IRPosition::IRP_INVALID: 5309fe6060f1SDimitry Andric case IRPosition::IRP_FLOAT: 5310fe6060f1SDimitry Andric case IRPosition::IRP_ARGUMENT: 5311fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE_ARGUMENT: 5312fe6060f1SDimitry Andric case IRPosition::IRP_RETURNED: 5313fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE_RETURNED: 5314fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE: 5315fe6060f1SDimitry Andric llvm_unreachable( 5316fe6060f1SDimitry Andric "AAExecutionDomain can only be created for function position!"); 5317fe6060f1SDimitry Andric case IRPosition::IRP_FUNCTION: 5318fe6060f1SDimitry Andric AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A); 5319fe6060f1SDimitry Andric break; 5320fe6060f1SDimitry Andric } 5321fe6060f1SDimitry Andric 5322fe6060f1SDimitry Andric return *AA; 5323fe6060f1SDimitry Andric } 5324fe6060f1SDimitry Andric 5325fe6060f1SDimitry Andric AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP, 5326fe6060f1SDimitry Andric Attributor &A) { 5327fe6060f1SDimitry Andric AAHeapToSharedFunction *AA = nullptr; 5328fe6060f1SDimitry Andric switch (IRP.getPositionKind()) { 5329fe6060f1SDimitry Andric case IRPosition::IRP_INVALID: 5330fe6060f1SDimitry Andric case IRPosition::IRP_FLOAT: 5331fe6060f1SDimitry Andric case IRPosition::IRP_ARGUMENT: 5332fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE_ARGUMENT: 5333fe6060f1SDimitry Andric case IRPosition::IRP_RETURNED: 5334fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE_RETURNED: 5335fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE: 5336fe6060f1SDimitry Andric llvm_unreachable( 5337fe6060f1SDimitry Andric "AAHeapToShared can only be created for function position!"); 5338fe6060f1SDimitry Andric case IRPosition::IRP_FUNCTION: 5339fe6060f1SDimitry Andric AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A); 5340fe6060f1SDimitry Andric break; 5341fe6060f1SDimitry Andric } 5342fe6060f1SDimitry Andric 5343fe6060f1SDimitry Andric return *AA; 5344fe6060f1SDimitry Andric } 5345fe6060f1SDimitry Andric 5346fe6060f1SDimitry Andric AAKernelInfo &AAKernelInfo::createForPosition(const IRPosition &IRP, 5347fe6060f1SDimitry Andric Attributor &A) { 5348fe6060f1SDimitry Andric AAKernelInfo *AA = nullptr; 5349fe6060f1SDimitry Andric switch (IRP.getPositionKind()) { 5350fe6060f1SDimitry Andric case IRPosition::IRP_INVALID: 5351fe6060f1SDimitry Andric case IRPosition::IRP_FLOAT: 5352fe6060f1SDimitry Andric case IRPosition::IRP_ARGUMENT: 5353fe6060f1SDimitry Andric case IRPosition::IRP_RETURNED: 5354fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE_RETURNED: 5355fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE_ARGUMENT: 5356fe6060f1SDimitry Andric llvm_unreachable("KernelInfo can only be created for function position!"); 5357fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE: 5358fe6060f1SDimitry Andric AA = new (A.Allocator) AAKernelInfoCallSite(IRP, A); 5359fe6060f1SDimitry Andric break; 5360fe6060f1SDimitry Andric case IRPosition::IRP_FUNCTION: 5361fe6060f1SDimitry Andric AA = new (A.Allocator) AAKernelInfoFunction(IRP, A); 5362fe6060f1SDimitry Andric break; 5363fe6060f1SDimitry Andric } 5364fe6060f1SDimitry Andric 5365fe6060f1SDimitry Andric return *AA; 5366fe6060f1SDimitry Andric } 5367fe6060f1SDimitry Andric 5368fe6060f1SDimitry Andric AAFoldRuntimeCall &AAFoldRuntimeCall::createForPosition(const IRPosition &IRP, 5369fe6060f1SDimitry Andric Attributor &A) { 5370fe6060f1SDimitry Andric AAFoldRuntimeCall *AA = nullptr; 5371fe6060f1SDimitry Andric switch (IRP.getPositionKind()) { 5372fe6060f1SDimitry Andric case IRPosition::IRP_INVALID: 5373fe6060f1SDimitry Andric case IRPosition::IRP_FLOAT: 5374fe6060f1SDimitry Andric case IRPosition::IRP_ARGUMENT: 5375fe6060f1SDimitry Andric case IRPosition::IRP_RETURNED: 5376fe6060f1SDimitry Andric case IRPosition::IRP_FUNCTION: 5377fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE: 5378fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE_ARGUMENT: 5379fe6060f1SDimitry Andric llvm_unreachable("KernelInfo can only be created for call site position!"); 5380fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE_RETURNED: 5381fe6060f1SDimitry Andric AA = new (A.Allocator) AAFoldRuntimeCallCallSiteReturned(IRP, A); 5382fe6060f1SDimitry Andric break; 5383fe6060f1SDimitry Andric } 5384fe6060f1SDimitry Andric 5385fe6060f1SDimitry Andric return *AA; 5386fe6060f1SDimitry Andric } 5387fe6060f1SDimitry Andric 5388fe6060f1SDimitry Andric PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { 5389fe6060f1SDimitry Andric if (!containsOpenMP(M)) 5390fe6060f1SDimitry Andric return PreservedAnalyses::all(); 5391fe6060f1SDimitry Andric if (DisableOpenMPOptimizations) 53925ffd83dbSDimitry Andric return PreservedAnalyses::all(); 53935ffd83dbSDimitry Andric 5394fe6060f1SDimitry Andric FunctionAnalysisManager &FAM = 5395fe6060f1SDimitry Andric AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 5396fe6060f1SDimitry Andric KernelSet Kernels = getDeviceKernels(M); 5397fe6060f1SDimitry Andric 539881ad6265SDimitry Andric if (PrintModuleBeforeOptimizations) 539981ad6265SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt Module Pass:\n" << M); 540081ad6265SDimitry Andric 5401fe6060f1SDimitry Andric auto IsCalled = [&](Function &F) { 5402fe6060f1SDimitry Andric if (Kernels.contains(&F)) 5403fe6060f1SDimitry Andric return true; 5404fe6060f1SDimitry Andric for (const User *U : F.users()) 5405fe6060f1SDimitry Andric if (!isa<BlockAddress>(U)) 5406fe6060f1SDimitry Andric return true; 5407fe6060f1SDimitry Andric return false; 5408fe6060f1SDimitry Andric }; 5409fe6060f1SDimitry Andric 5410fe6060f1SDimitry Andric auto EmitRemark = [&](Function &F) { 5411fe6060f1SDimitry Andric auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 5412fe6060f1SDimitry Andric ORE.emit([&]() { 5413fe6060f1SDimitry Andric OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F); 5414fe6060f1SDimitry Andric return ORA << "Could not internalize function. " 54156e75b2fbSDimitry Andric << "Some optimizations may not be possible. [OMP140]"; 5416fe6060f1SDimitry Andric }); 5417fe6060f1SDimitry Andric }; 5418fe6060f1SDimitry Andric 5419*06c3fb27SDimitry Andric bool Changed = false; 5420*06c3fb27SDimitry Andric 5421fe6060f1SDimitry Andric // Create internal copies of each function if this is a kernel Module. This 5422fe6060f1SDimitry Andric // allows iterprocedural passes to see every call edge. 54236e75b2fbSDimitry Andric DenseMap<Function *, Function *> InternalizedMap; 54246e75b2fbSDimitry Andric if (isOpenMPDevice(M)) { 54256e75b2fbSDimitry Andric SmallPtrSet<Function *, 16> InternalizeFns; 5426fe6060f1SDimitry Andric for (Function &F : M) 5427fe6060f1SDimitry Andric if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) && 5428fe6060f1SDimitry Andric !DisableInternalization) { 54296e75b2fbSDimitry Andric if (Attributor::isInternalizable(F)) { 54306e75b2fbSDimitry Andric InternalizeFns.insert(&F); 5431fe6060f1SDimitry Andric } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) { 5432fe6060f1SDimitry Andric EmitRemark(F); 5433fe6060f1SDimitry Andric } 5434fe6060f1SDimitry Andric } 5435fe6060f1SDimitry Andric 5436*06c3fb27SDimitry Andric Changed |= 54376e75b2fbSDimitry Andric Attributor::internalizeFunctions(InternalizeFns, InternalizedMap); 54386e75b2fbSDimitry Andric } 54396e75b2fbSDimitry Andric 5440fe6060f1SDimitry Andric // Look at every function in the Module unless it was internalized. 5441bdd1243dSDimitry Andric SetVector<Function *> Functions; 5442fe6060f1SDimitry Andric SmallVector<Function *, 16> SCC; 5443fe6060f1SDimitry Andric for (Function &F : M) 5444bdd1243dSDimitry Andric if (!F.isDeclaration() && !InternalizedMap.lookup(&F)) { 5445fe6060f1SDimitry Andric SCC.push_back(&F); 5446bdd1243dSDimitry Andric Functions.insert(&F); 5447bdd1243dSDimitry Andric } 5448fe6060f1SDimitry Andric 5449fe6060f1SDimitry Andric if (SCC.empty()) 5450*06c3fb27SDimitry Andric return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 5451fe6060f1SDimitry Andric 5452fe6060f1SDimitry Andric AnalysisGetter AG(FAM); 5453fe6060f1SDimitry Andric 5454fe6060f1SDimitry Andric auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { 5455fe6060f1SDimitry Andric return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 5456fe6060f1SDimitry Andric }; 5457fe6060f1SDimitry Andric 5458fe6060f1SDimitry Andric BumpPtrAllocator Allocator; 5459fe6060f1SDimitry Andric CallGraphUpdater CGUpdater; 5460fe6060f1SDimitry Andric 54611ac55f4cSDimitry Andric bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink || 54621ac55f4cSDimitry Andric LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink; 5463*06c3fb27SDimitry Andric OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, PostLink); 5464fe6060f1SDimitry Andric 5465349cc55cSDimitry Andric unsigned MaxFixpointIterations = 5466349cc55cSDimitry Andric (isOpenMPDevice(M)) ? SetFixpointIterations : 32; 546781ad6265SDimitry Andric 546881ad6265SDimitry Andric AttributorConfig AC(CGUpdater); 546981ad6265SDimitry Andric AC.DefaultInitializeLiveInternals = false; 5470bdd1243dSDimitry Andric AC.IsModulePass = true; 547181ad6265SDimitry Andric AC.RewriteSignatures = false; 547281ad6265SDimitry Andric AC.MaxFixpointIterations = MaxFixpointIterations; 547381ad6265SDimitry Andric AC.OREGetter = OREGetter; 547481ad6265SDimitry Andric AC.PassName = DEBUG_TYPE; 5475bdd1243dSDimitry Andric AC.InitializationCallback = OpenMPOpt::registerAAsForFunction; 5476*06c3fb27SDimitry Andric AC.IPOAmendableCB = [](const Function &F) { 5477*06c3fb27SDimitry Andric return F.hasFnAttribute("kernel"); 5478*06c3fb27SDimitry Andric }; 547981ad6265SDimitry Andric 548081ad6265SDimitry Andric Attributor A(Functions, InfoCache, AC); 5481fe6060f1SDimitry Andric 5482fe6060f1SDimitry Andric OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); 5483*06c3fb27SDimitry Andric Changed |= OMPOpt.run(true); 5484349cc55cSDimitry Andric 5485349cc55cSDimitry Andric // Optionally inline device functions for potentially better performance. 5486349cc55cSDimitry Andric if (AlwaysInlineDeviceFunctions && isOpenMPDevice(M)) 5487349cc55cSDimitry Andric for (Function &F : M) 5488349cc55cSDimitry Andric if (!F.isDeclaration() && !Kernels.contains(&F) && 5489349cc55cSDimitry Andric !F.hasFnAttribute(Attribute::NoInline)) 5490349cc55cSDimitry Andric F.addFnAttr(Attribute::AlwaysInline); 5491349cc55cSDimitry Andric 5492349cc55cSDimitry Andric if (PrintModuleAfterOptimizations) 5493349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M); 5494349cc55cSDimitry Andric 5495fe6060f1SDimitry Andric if (Changed) 5496fe6060f1SDimitry Andric return PreservedAnalyses::none(); 5497fe6060f1SDimitry Andric 5498fe6060f1SDimitry Andric return PreservedAnalyses::all(); 5499fe6060f1SDimitry Andric } 5500fe6060f1SDimitry Andric 5501fe6060f1SDimitry Andric PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, 5502fe6060f1SDimitry Andric CGSCCAnalysisManager &AM, 5503fe6060f1SDimitry Andric LazyCallGraph &CG, 5504fe6060f1SDimitry Andric CGSCCUpdateResult &UR) { 5505fe6060f1SDimitry Andric if (!containsOpenMP(*C.begin()->getFunction().getParent())) 5506fe6060f1SDimitry Andric return PreservedAnalyses::all(); 55075ffd83dbSDimitry Andric if (DisableOpenMPOptimizations) 55085ffd83dbSDimitry Andric return PreservedAnalyses::all(); 55095ffd83dbSDimitry Andric 55105ffd83dbSDimitry Andric SmallVector<Function *, 16> SCC; 5511e8d8bef9SDimitry Andric // If there are kernels in the module, we have to run on all SCC's. 5512e8d8bef9SDimitry Andric for (LazyCallGraph::Node &N : C) { 5513e8d8bef9SDimitry Andric Function *Fn = &N.getFunction(); 5514e8d8bef9SDimitry Andric SCC.push_back(Fn); 5515e8d8bef9SDimitry Andric } 5516e8d8bef9SDimitry Andric 5517fe6060f1SDimitry Andric if (SCC.empty()) 55185ffd83dbSDimitry Andric return PreservedAnalyses::all(); 55195ffd83dbSDimitry Andric 5520fe6060f1SDimitry Andric Module &M = *C.begin()->getFunction().getParent(); 5521fe6060f1SDimitry Andric 552281ad6265SDimitry Andric if (PrintModuleBeforeOptimizations) 552381ad6265SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt CGSCC Pass:\n" << M); 552481ad6265SDimitry Andric 5525fe6060f1SDimitry Andric KernelSet Kernels = getDeviceKernels(M); 5526fe6060f1SDimitry Andric 55275ffd83dbSDimitry Andric FunctionAnalysisManager &FAM = 55285ffd83dbSDimitry Andric AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 55295ffd83dbSDimitry Andric 55305ffd83dbSDimitry Andric AnalysisGetter AG(FAM); 55315ffd83dbSDimitry Andric 55325ffd83dbSDimitry Andric auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { 55335ffd83dbSDimitry Andric return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 55345ffd83dbSDimitry Andric }; 55355ffd83dbSDimitry Andric 5536fe6060f1SDimitry Andric BumpPtrAllocator Allocator; 55375ffd83dbSDimitry Andric CallGraphUpdater CGUpdater; 55385ffd83dbSDimitry Andric CGUpdater.initialize(CG, C, AM, UR); 55395ffd83dbSDimitry Andric 55401ac55f4cSDimitry Andric bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink || 55411ac55f4cSDimitry Andric LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink; 55425ffd83dbSDimitry Andric SetVector<Function *> Functions(SCC.begin(), SCC.end()); 55435ffd83dbSDimitry Andric OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, 5544*06c3fb27SDimitry Andric /*CGSCC*/ &Functions, PostLink); 55455ffd83dbSDimitry Andric 5546349cc55cSDimitry Andric unsigned MaxFixpointIterations = 5547349cc55cSDimitry Andric (isOpenMPDevice(M)) ? SetFixpointIterations : 32; 554881ad6265SDimitry Andric 554981ad6265SDimitry Andric AttributorConfig AC(CGUpdater); 555081ad6265SDimitry Andric AC.DefaultInitializeLiveInternals = false; 555181ad6265SDimitry Andric AC.IsModulePass = false; 555281ad6265SDimitry Andric AC.RewriteSignatures = false; 555381ad6265SDimitry Andric AC.MaxFixpointIterations = MaxFixpointIterations; 555481ad6265SDimitry Andric AC.OREGetter = OREGetter; 555581ad6265SDimitry Andric AC.PassName = DEBUG_TYPE; 5556bdd1243dSDimitry Andric AC.InitializationCallback = OpenMPOpt::registerAAsForFunction; 555781ad6265SDimitry Andric 555881ad6265SDimitry Andric Attributor A(Functions, InfoCache, AC); 55595ffd83dbSDimitry Andric 55605ffd83dbSDimitry Andric OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); 5561fe6060f1SDimitry Andric bool Changed = OMPOpt.run(false); 5562349cc55cSDimitry Andric 5563349cc55cSDimitry Andric if (PrintModuleAfterOptimizations) 5564349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M); 5565349cc55cSDimitry Andric 55665ffd83dbSDimitry Andric if (Changed) 55675ffd83dbSDimitry Andric return PreservedAnalyses::none(); 55685ffd83dbSDimitry Andric 55695ffd83dbSDimitry Andric return PreservedAnalyses::all(); 55705ffd83dbSDimitry Andric } 55715ffd83dbSDimitry Andric 5572*06c3fb27SDimitry Andric bool llvm::omp::isKernel(Function &Fn) { return Fn.hasFnAttribute("kernel"); } 5573*06c3fb27SDimitry Andric 5574fe6060f1SDimitry Andric KernelSet llvm::omp::getDeviceKernels(Module &M) { 5575fe6060f1SDimitry Andric // TODO: Create a more cross-platform way of determining device kernels. 5576bdd1243dSDimitry Andric NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations"); 5577fe6060f1SDimitry Andric KernelSet Kernels; 5578fe6060f1SDimitry Andric 55795ffd83dbSDimitry Andric if (!MD) 5580fe6060f1SDimitry Andric return Kernels; 55815ffd83dbSDimitry Andric 55825ffd83dbSDimitry Andric for (auto *Op : MD->operands()) { 55835ffd83dbSDimitry Andric if (Op->getNumOperands() < 2) 55845ffd83dbSDimitry Andric continue; 55855ffd83dbSDimitry Andric MDString *KindID = dyn_cast<MDString>(Op->getOperand(1)); 55865ffd83dbSDimitry Andric if (!KindID || KindID->getString() != "kernel") 55875ffd83dbSDimitry Andric continue; 55885ffd83dbSDimitry Andric 55895ffd83dbSDimitry Andric Function *KernelFn = 55905ffd83dbSDimitry Andric mdconst::dyn_extract_or_null<Function>(Op->getOperand(0)); 55915ffd83dbSDimitry Andric if (!KernelFn) 55925ffd83dbSDimitry Andric continue; 55935ffd83dbSDimitry Andric 5594*06c3fb27SDimitry Andric assert(isKernel(*KernelFn) && "Inconsistent kernel function annotation"); 55955ffd83dbSDimitry Andric ++NumOpenMPTargetRegionKernels; 55965ffd83dbSDimitry Andric 55975ffd83dbSDimitry Andric Kernels.insert(KernelFn); 55985ffd83dbSDimitry Andric } 5599fe6060f1SDimitry Andric 5600fe6060f1SDimitry Andric return Kernels; 56015ffd83dbSDimitry Andric } 56025ffd83dbSDimitry Andric 5603fe6060f1SDimitry Andric bool llvm::omp::containsOpenMP(Module &M) { 5604fe6060f1SDimitry Andric Metadata *MD = M.getModuleFlag("openmp"); 5605fe6060f1SDimitry Andric if (!MD) 5606fe6060f1SDimitry Andric return false; 56075ffd83dbSDimitry Andric 56085ffd83dbSDimitry Andric return true; 56095ffd83dbSDimitry Andric } 56105ffd83dbSDimitry Andric 5611fe6060f1SDimitry Andric bool llvm::omp::isOpenMPDevice(Module &M) { 5612fe6060f1SDimitry Andric Metadata *MD = M.getModuleFlag("openmp-device"); 5613fe6060f1SDimitry Andric if (!MD) 5614fe6060f1SDimitry Andric return false; 5615fe6060f1SDimitry Andric 5616fe6060f1SDimitry Andric return true; 56175ffd83dbSDimitry Andric } 5618