15ffd83dbSDimitry Andric //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric // OpenMP specific optimizations: 105ffd83dbSDimitry Andric // 115ffd83dbSDimitry Andric // - Deduplication of runtime calls, e.g., omp_get_thread_num. 12fe6060f1SDimitry Andric // - Replacing globalized device memory with stack memory. 13fe6060f1SDimitry Andric // - Replacing globalized device memory with shared memory. 14fe6060f1SDimitry Andric // - Parallel region merging. 15fe6060f1SDimitry Andric // - Transforming generic-mode device kernels to SPMD mode. 16fe6060f1SDimitry Andric // - Specializing the state machine for generic-mode device kernels. 175ffd83dbSDimitry Andric // 185ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 195ffd83dbSDimitry Andric 205ffd83dbSDimitry Andric #include "llvm/Transforms/IPO/OpenMPOpt.h" 215ffd83dbSDimitry Andric 225ffd83dbSDimitry Andric #include "llvm/ADT/EnumeratedArray.h" 23fe6060f1SDimitry Andric #include "llvm/ADT/PostOrderIterator.h" 2404eeddc0SDimitry Andric #include "llvm/ADT/SetVector.h" 2506c3fb27SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 26bdd1243dSDimitry Andric #include "llvm/ADT/SmallVector.h" 275ffd83dbSDimitry Andric #include "llvm/ADT/Statistic.h" 2806c3fb27SDimitry Andric #include "llvm/ADT/StringExtras.h" 29349cc55cSDimitry Andric #include "llvm/ADT/StringRef.h" 305ffd83dbSDimitry Andric #include "llvm/Analysis/CallGraph.h" 315ffd83dbSDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h" 321fd87a68SDimitry Andric #include "llvm/Analysis/MemoryLocation.h" 335ffd83dbSDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 34e8d8bef9SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 355ffd83dbSDimitry Andric #include "llvm/Frontend/OpenMP/OMPConstants.h" 365f757f3fSDimitry Andric #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" 375ffd83dbSDimitry Andric #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 38fe6060f1SDimitry Andric #include "llvm/IR/Assumptions.h" 39bdd1243dSDimitry Andric #include "llvm/IR/BasicBlock.h" 401fd87a68SDimitry Andric #include "llvm/IR/Constants.h" 41fe6060f1SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 4206c3fb27SDimitry Andric #include "llvm/IR/Dominators.h" 4306c3fb27SDimitry Andric #include "llvm/IR/Function.h" 44fe6060f1SDimitry Andric #include "llvm/IR/GlobalValue.h" 451fd87a68SDimitry Andric #include "llvm/IR/GlobalVariable.h" 465f757f3fSDimitry Andric #include "llvm/IR/InstrTypes.h" 47fe6060f1SDimitry Andric #include "llvm/IR/Instruction.h" 481fd87a68SDimitry Andric #include "llvm/IR/Instructions.h" 49fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 50349cc55cSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 51349cc55cSDimitry Andric #include "llvm/IR/IntrinsicsNVPTX.h" 521fd87a68SDimitry Andric #include "llvm/IR/LLVMContext.h" 5306c3fb27SDimitry Andric #include "llvm/Support/Casting.h" 545ffd83dbSDimitry Andric #include "llvm/Support/CommandLine.h" 551fd87a68SDimitry Andric #include "llvm/Support/Debug.h" 565ffd83dbSDimitry Andric #include "llvm/Transforms/IPO/Attributor.h" 57e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 585ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/CallGraphUpdater.h" 595ffd83dbSDimitry Andric 60349cc55cSDimitry Andric #include <algorithm> 61bdd1243dSDimitry Andric #include <optional> 62bdd1243dSDimitry Andric #include <string> 63349cc55cSDimitry Andric 645ffd83dbSDimitry Andric using namespace llvm; 655ffd83dbSDimitry Andric using namespace omp; 665ffd83dbSDimitry Andric 675ffd83dbSDimitry Andric #define DEBUG_TYPE "openmp-opt" 685ffd83dbSDimitry Andric 695ffd83dbSDimitry Andric static cl::opt<bool> DisableOpenMPOptimizations( 7081ad6265SDimitry Andric "openmp-opt-disable", cl::desc("Disable OpenMP specific optimizations."), 7181ad6265SDimitry Andric cl::Hidden, cl::init(false)); 725ffd83dbSDimitry Andric 73e8d8bef9SDimitry Andric static cl::opt<bool> EnableParallelRegionMerging( 7481ad6265SDimitry Andric "openmp-opt-enable-merging", 75e8d8bef9SDimitry Andric cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, 76e8d8bef9SDimitry Andric cl::init(false)); 77e8d8bef9SDimitry Andric 78fe6060f1SDimitry Andric static cl::opt<bool> 7981ad6265SDimitry Andric DisableInternalization("openmp-opt-disable-internalization", 80fe6060f1SDimitry Andric cl::desc("Disable function internalization."), 81fe6060f1SDimitry Andric cl::Hidden, cl::init(false)); 82fe6060f1SDimitry Andric 83bdd1243dSDimitry Andric static cl::opt<bool> DeduceICVValues("openmp-deduce-icv-values", 84bdd1243dSDimitry Andric cl::init(false), cl::Hidden); 855ffd83dbSDimitry Andric static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false), 865ffd83dbSDimitry Andric cl::Hidden); 875ffd83dbSDimitry Andric static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels", 885ffd83dbSDimitry Andric cl::init(false), cl::Hidden); 895ffd83dbSDimitry Andric 90e8d8bef9SDimitry Andric static cl::opt<bool> HideMemoryTransferLatency( 91e8d8bef9SDimitry Andric "openmp-hide-memory-transfer-latency", 92e8d8bef9SDimitry Andric cl::desc("[WIP] Tries to hide the latency of host to device memory" 93e8d8bef9SDimitry Andric " transfers"), 94e8d8bef9SDimitry Andric cl::Hidden, cl::init(false)); 95e8d8bef9SDimitry Andric 96349cc55cSDimitry Andric static cl::opt<bool> DisableOpenMPOptDeglobalization( 9781ad6265SDimitry Andric "openmp-opt-disable-deglobalization", 98349cc55cSDimitry Andric cl::desc("Disable OpenMP optimizations involving deglobalization."), 99349cc55cSDimitry Andric cl::Hidden, cl::init(false)); 100349cc55cSDimitry Andric 101349cc55cSDimitry Andric static cl::opt<bool> DisableOpenMPOptSPMDization( 10281ad6265SDimitry Andric "openmp-opt-disable-spmdization", 103349cc55cSDimitry Andric cl::desc("Disable OpenMP optimizations involving SPMD-ization."), 104349cc55cSDimitry Andric cl::Hidden, cl::init(false)); 105349cc55cSDimitry Andric 106349cc55cSDimitry Andric static cl::opt<bool> DisableOpenMPOptFolding( 10781ad6265SDimitry Andric "openmp-opt-disable-folding", 108349cc55cSDimitry Andric cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden, 109349cc55cSDimitry Andric cl::init(false)); 110349cc55cSDimitry Andric 111349cc55cSDimitry Andric static cl::opt<bool> DisableOpenMPOptStateMachineRewrite( 11281ad6265SDimitry Andric "openmp-opt-disable-state-machine-rewrite", 113349cc55cSDimitry Andric cl::desc("Disable OpenMP optimizations that replace the state machine."), 114349cc55cSDimitry Andric cl::Hidden, cl::init(false)); 115349cc55cSDimitry Andric 1161fd87a68SDimitry Andric static cl::opt<bool> DisableOpenMPOptBarrierElimination( 11781ad6265SDimitry Andric "openmp-opt-disable-barrier-elimination", 1181fd87a68SDimitry Andric cl::desc("Disable OpenMP optimizations that eliminate barriers."), 1191fd87a68SDimitry Andric cl::Hidden, cl::init(false)); 1201fd87a68SDimitry Andric 121349cc55cSDimitry Andric static cl::opt<bool> PrintModuleAfterOptimizations( 12281ad6265SDimitry Andric "openmp-opt-print-module-after", 123349cc55cSDimitry Andric cl::desc("Print the current module after OpenMP optimizations."), 124349cc55cSDimitry Andric cl::Hidden, cl::init(false)); 125349cc55cSDimitry Andric 12681ad6265SDimitry Andric static cl::opt<bool> PrintModuleBeforeOptimizations( 12781ad6265SDimitry Andric "openmp-opt-print-module-before", 12881ad6265SDimitry Andric cl::desc("Print the current module before OpenMP optimizations."), 12981ad6265SDimitry Andric cl::Hidden, cl::init(false)); 13081ad6265SDimitry Andric 131349cc55cSDimitry Andric static cl::opt<bool> AlwaysInlineDeviceFunctions( 13281ad6265SDimitry Andric "openmp-opt-inline-device", 133349cc55cSDimitry Andric cl::desc("Inline all applicible functions on the device."), cl::Hidden, 134349cc55cSDimitry Andric cl::init(false)); 135349cc55cSDimitry Andric 136349cc55cSDimitry Andric static cl::opt<bool> 13781ad6265SDimitry Andric EnableVerboseRemarks("openmp-opt-verbose-remarks", 138349cc55cSDimitry Andric cl::desc("Enables more verbose remarks."), cl::Hidden, 139349cc55cSDimitry Andric cl::init(false)); 140349cc55cSDimitry Andric 141349cc55cSDimitry Andric static cl::opt<unsigned> 142349cc55cSDimitry Andric SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden, 143349cc55cSDimitry Andric cl::desc("Maximal number of attributor iterations."), 144349cc55cSDimitry Andric cl::init(256)); 145349cc55cSDimitry Andric 14681ad6265SDimitry Andric static cl::opt<unsigned> 14781ad6265SDimitry Andric SharedMemoryLimit("openmp-opt-shared-limit", cl::Hidden, 14881ad6265SDimitry Andric cl::desc("Maximum amount of shared memory to use."), 14981ad6265SDimitry Andric cl::init(std::numeric_limits<unsigned>::max())); 15081ad6265SDimitry Andric 1515ffd83dbSDimitry Andric STATISTIC(NumOpenMPRuntimeCallsDeduplicated, 1525ffd83dbSDimitry Andric "Number of OpenMP runtime calls deduplicated"); 1535ffd83dbSDimitry Andric STATISTIC(NumOpenMPParallelRegionsDeleted, 1545ffd83dbSDimitry Andric "Number of OpenMP parallel regions deleted"); 1555ffd83dbSDimitry Andric STATISTIC(NumOpenMPRuntimeFunctionsIdentified, 1565ffd83dbSDimitry Andric "Number of OpenMP runtime functions identified"); 1575ffd83dbSDimitry Andric STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, 1585ffd83dbSDimitry Andric "Number of OpenMP runtime function uses identified"); 1595ffd83dbSDimitry Andric STATISTIC(NumOpenMPTargetRegionKernels, 1605ffd83dbSDimitry Andric "Number of OpenMP target region entry points (=kernels) identified"); 1615f757f3fSDimitry Andric STATISTIC(NumNonOpenMPTargetRegionKernels, 1625f757f3fSDimitry Andric "Number of non-OpenMP target region kernels identified"); 163fe6060f1SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernelsSPMD, 164fe6060f1SDimitry Andric "Number of OpenMP target region entry points (=kernels) executed in " 165fe6060f1SDimitry Andric "SPMD-mode instead of generic-mode"); 166fe6060f1SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernelsWithoutStateMachine, 167fe6060f1SDimitry Andric "Number of OpenMP target region entry points (=kernels) executed in " 168fe6060f1SDimitry Andric "generic-mode without a state machines"); 169fe6060f1SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback, 170fe6060f1SDimitry Andric "Number of OpenMP target region entry points (=kernels) executed in " 171fe6060f1SDimitry Andric "generic-mode with customized state machines with fallback"); 172fe6060f1SDimitry Andric STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback, 173fe6060f1SDimitry Andric "Number of OpenMP target region entry points (=kernels) executed in " 174fe6060f1SDimitry Andric "generic-mode with customized state machines without fallback"); 1755ffd83dbSDimitry Andric STATISTIC( 1765ffd83dbSDimitry Andric NumOpenMPParallelRegionsReplacedInGPUStateMachine, 1775ffd83dbSDimitry Andric "Number of OpenMP parallel regions replaced with ID in GPU state machines"); 178e8d8bef9SDimitry Andric STATISTIC(NumOpenMPParallelRegionsMerged, 179e8d8bef9SDimitry Andric "Number of OpenMP parallel regions merged"); 180fe6060f1SDimitry Andric STATISTIC(NumBytesMovedToSharedMemory, 181fe6060f1SDimitry Andric "Amount of memory pushed to shared memory"); 1821fd87a68SDimitry Andric STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated"); 1835ffd83dbSDimitry Andric 1845ffd83dbSDimitry Andric #if !defined(NDEBUG) 1855ffd83dbSDimitry Andric static constexpr auto TAG = "[" DEBUG_TYPE "]"; 1865ffd83dbSDimitry Andric #endif 1875ffd83dbSDimitry Andric 1885f757f3fSDimitry Andric namespace KernelInfo { 1895f757f3fSDimitry Andric 1905f757f3fSDimitry Andric // struct ConfigurationEnvironmentTy { 1915f757f3fSDimitry Andric // uint8_t UseGenericStateMachine; 1925f757f3fSDimitry Andric // uint8_t MayUseNestedParallelism; 1935f757f3fSDimitry Andric // llvm::omp::OMPTgtExecModeFlags ExecMode; 1945f757f3fSDimitry Andric // int32_t MinThreads; 1955f757f3fSDimitry Andric // int32_t MaxThreads; 1965f757f3fSDimitry Andric // int32_t MinTeams; 1975f757f3fSDimitry Andric // int32_t MaxTeams; 1985f757f3fSDimitry Andric // }; 1995f757f3fSDimitry Andric 2005f757f3fSDimitry Andric // struct DynamicEnvironmentTy { 2015f757f3fSDimitry Andric // uint16_t DebugIndentionLevel; 2025f757f3fSDimitry Andric // }; 2035f757f3fSDimitry Andric 2045f757f3fSDimitry Andric // struct KernelEnvironmentTy { 2055f757f3fSDimitry Andric // ConfigurationEnvironmentTy Configuration; 2065f757f3fSDimitry Andric // IdentTy *Ident; 2075f757f3fSDimitry Andric // DynamicEnvironmentTy *DynamicEnv; 2085f757f3fSDimitry Andric // }; 2095f757f3fSDimitry Andric 2105f757f3fSDimitry Andric #define KERNEL_ENVIRONMENT_IDX(MEMBER, IDX) \ 2115f757f3fSDimitry Andric constexpr const unsigned MEMBER##Idx = IDX; 2125f757f3fSDimitry Andric 2135f757f3fSDimitry Andric KERNEL_ENVIRONMENT_IDX(Configuration, 0) 2145f757f3fSDimitry Andric KERNEL_ENVIRONMENT_IDX(Ident, 1) 2155f757f3fSDimitry Andric 2165f757f3fSDimitry Andric #undef KERNEL_ENVIRONMENT_IDX 2175f757f3fSDimitry Andric 2185f757f3fSDimitry Andric #define KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MEMBER, IDX) \ 2195f757f3fSDimitry Andric constexpr const unsigned MEMBER##Idx = IDX; 2205f757f3fSDimitry Andric 2215f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(UseGenericStateMachine, 0) 2225f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MayUseNestedParallelism, 1) 2235f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(ExecMode, 2) 2245f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MinThreads, 3) 2255f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MaxThreads, 4) 2265f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MinTeams, 5) 2275f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_IDX(MaxTeams, 6) 2285f757f3fSDimitry Andric 2295f757f3fSDimitry Andric #undef KERNEL_ENVIRONMENT_CONFIGURATION_IDX 2305f757f3fSDimitry Andric 2315f757f3fSDimitry Andric #define KERNEL_ENVIRONMENT_GETTER(MEMBER, RETURNTYPE) \ 2325f757f3fSDimitry Andric RETURNTYPE *get##MEMBER##FromKernelEnvironment(ConstantStruct *KernelEnvC) { \ 2335f757f3fSDimitry Andric return cast<RETURNTYPE>(KernelEnvC->getAggregateElement(MEMBER##Idx)); \ 2345f757f3fSDimitry Andric } 2355f757f3fSDimitry Andric 2365f757f3fSDimitry Andric KERNEL_ENVIRONMENT_GETTER(Ident, Constant) 2375f757f3fSDimitry Andric KERNEL_ENVIRONMENT_GETTER(Configuration, ConstantStruct) 2385f757f3fSDimitry Andric 2395f757f3fSDimitry Andric #undef KERNEL_ENVIRONMENT_GETTER 2405f757f3fSDimitry Andric 2415f757f3fSDimitry Andric #define KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MEMBER) \ 2425f757f3fSDimitry Andric ConstantInt *get##MEMBER##FromKernelEnvironment( \ 2435f757f3fSDimitry Andric ConstantStruct *KernelEnvC) { \ 2445f757f3fSDimitry Andric ConstantStruct *ConfigC = \ 2455f757f3fSDimitry Andric getConfigurationFromKernelEnvironment(KernelEnvC); \ 2465f757f3fSDimitry Andric return dyn_cast<ConstantInt>(ConfigC->getAggregateElement(MEMBER##Idx)); \ 2475f757f3fSDimitry Andric } 2485f757f3fSDimitry Andric 2495f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(UseGenericStateMachine) 2505f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MayUseNestedParallelism) 2515f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(ExecMode) 2525f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MinThreads) 2535f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxThreads) 2545f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MinTeams) 2555f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_GETTER(MaxTeams) 2565f757f3fSDimitry Andric 2575f757f3fSDimitry Andric #undef KERNEL_ENVIRONMENT_CONFIGURATION_GETTER 2585f757f3fSDimitry Andric 2595f757f3fSDimitry Andric GlobalVariable * 2605f757f3fSDimitry Andric getKernelEnvironementGVFromKernelInitCB(CallBase *KernelInitCB) { 2615f757f3fSDimitry Andric constexpr const int InitKernelEnvironmentArgNo = 0; 2625f757f3fSDimitry Andric return cast<GlobalVariable>( 2635f757f3fSDimitry Andric KernelInitCB->getArgOperand(InitKernelEnvironmentArgNo) 2645f757f3fSDimitry Andric ->stripPointerCasts()); 2655f757f3fSDimitry Andric } 2665f757f3fSDimitry Andric 2675f757f3fSDimitry Andric ConstantStruct *getKernelEnvironementFromKernelInitCB(CallBase *KernelInitCB) { 2685f757f3fSDimitry Andric GlobalVariable *KernelEnvGV = 2695f757f3fSDimitry Andric getKernelEnvironementGVFromKernelInitCB(KernelInitCB); 2705f757f3fSDimitry Andric return cast<ConstantStruct>(KernelEnvGV->getInitializer()); 2715f757f3fSDimitry Andric } 2725f757f3fSDimitry Andric } // namespace KernelInfo 2735f757f3fSDimitry Andric 2745ffd83dbSDimitry Andric namespace { 2755ffd83dbSDimitry Andric 276fe6060f1SDimitry Andric struct AAHeapToShared; 277fe6060f1SDimitry Andric 2785ffd83dbSDimitry Andric struct AAICVTracker; 2795ffd83dbSDimitry Andric 2805ffd83dbSDimitry Andric /// OpenMP specific information. For now, stores RFIs and ICVs also needed for 2815ffd83dbSDimitry Andric /// Attributor runs. 2825ffd83dbSDimitry Andric struct OMPInformationCache : public InformationCache { 2835ffd83dbSDimitry Andric OMPInformationCache(Module &M, AnalysisGetter &AG, 284bdd1243dSDimitry Andric BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC, 28506c3fb27SDimitry Andric bool OpenMPPostLink) 286bdd1243dSDimitry Andric : InformationCache(M, AG, Allocator, CGSCC), OMPBuilder(M), 28706c3fb27SDimitry Andric OpenMPPostLink(OpenMPPostLink) { 2885ffd83dbSDimitry Andric 2895f757f3fSDimitry Andric OMPBuilder.Config.IsTargetDevice = isOpenMPDevice(OMPBuilder.M); 2905ffd83dbSDimitry Andric OMPBuilder.initialize(); 291bdd1243dSDimitry Andric initializeRuntimeFunctions(M); 2925ffd83dbSDimitry Andric initializeInternalControlVars(); 2935ffd83dbSDimitry Andric } 2945ffd83dbSDimitry Andric 2955ffd83dbSDimitry Andric /// Generic information that describes an internal control variable. 2965ffd83dbSDimitry Andric struct InternalControlVarInfo { 2975ffd83dbSDimitry Andric /// The kind, as described by InternalControlVar enum. 2985ffd83dbSDimitry Andric InternalControlVar Kind; 2995ffd83dbSDimitry Andric 3005ffd83dbSDimitry Andric /// The name of the ICV. 3015ffd83dbSDimitry Andric StringRef Name; 3025ffd83dbSDimitry Andric 3035ffd83dbSDimitry Andric /// Environment variable associated with this ICV. 3045ffd83dbSDimitry Andric StringRef EnvVarName; 3055ffd83dbSDimitry Andric 3065ffd83dbSDimitry Andric /// Initial value kind. 3075ffd83dbSDimitry Andric ICVInitValue InitKind; 3085ffd83dbSDimitry Andric 3095ffd83dbSDimitry Andric /// Initial value. 3105ffd83dbSDimitry Andric ConstantInt *InitValue; 3115ffd83dbSDimitry Andric 3125ffd83dbSDimitry Andric /// Setter RTL function associated with this ICV. 3135ffd83dbSDimitry Andric RuntimeFunction Setter; 3145ffd83dbSDimitry Andric 3155ffd83dbSDimitry Andric /// Getter RTL function associated with this ICV. 3165ffd83dbSDimitry Andric RuntimeFunction Getter; 3175ffd83dbSDimitry Andric 3185ffd83dbSDimitry Andric /// RTL Function corresponding to the override clause of this ICV 3195ffd83dbSDimitry Andric RuntimeFunction Clause; 3205ffd83dbSDimitry Andric }; 3215ffd83dbSDimitry Andric 3225ffd83dbSDimitry Andric /// Generic information that describes a runtime function 3235ffd83dbSDimitry Andric struct RuntimeFunctionInfo { 3245ffd83dbSDimitry Andric 3255ffd83dbSDimitry Andric /// The kind, as described by the RuntimeFunction enum. 3265ffd83dbSDimitry Andric RuntimeFunction Kind; 3275ffd83dbSDimitry Andric 3285ffd83dbSDimitry Andric /// The name of the function. 3295ffd83dbSDimitry Andric StringRef Name; 3305ffd83dbSDimitry Andric 3315ffd83dbSDimitry Andric /// Flag to indicate a variadic function. 3325ffd83dbSDimitry Andric bool IsVarArg; 3335ffd83dbSDimitry Andric 3345ffd83dbSDimitry Andric /// The return type of the function. 3355ffd83dbSDimitry Andric Type *ReturnType; 3365ffd83dbSDimitry Andric 3375ffd83dbSDimitry Andric /// The argument types of the function. 3385ffd83dbSDimitry Andric SmallVector<Type *, 8> ArgumentTypes; 3395ffd83dbSDimitry Andric 3405ffd83dbSDimitry Andric /// The declaration if available. 3415ffd83dbSDimitry Andric Function *Declaration = nullptr; 3425ffd83dbSDimitry Andric 3435ffd83dbSDimitry Andric /// Uses of this runtime function per function containing the use. 3445ffd83dbSDimitry Andric using UseVector = SmallVector<Use *, 16>; 3455ffd83dbSDimitry Andric 3465ffd83dbSDimitry Andric /// Clear UsesMap for runtime function. 3475ffd83dbSDimitry Andric void clearUsesMap() { UsesMap.clear(); } 3485ffd83dbSDimitry Andric 3495ffd83dbSDimitry Andric /// Boolean conversion that is true if the runtime function was found. 3505ffd83dbSDimitry Andric operator bool() const { return Declaration; } 3515ffd83dbSDimitry Andric 3525ffd83dbSDimitry Andric /// Return the vector of uses in function \p F. 3535ffd83dbSDimitry Andric UseVector &getOrCreateUseVector(Function *F) { 3545ffd83dbSDimitry Andric std::shared_ptr<UseVector> &UV = UsesMap[F]; 3555ffd83dbSDimitry Andric if (!UV) 3565ffd83dbSDimitry Andric UV = std::make_shared<UseVector>(); 3575ffd83dbSDimitry Andric return *UV; 3585ffd83dbSDimitry Andric } 3595ffd83dbSDimitry Andric 3605ffd83dbSDimitry Andric /// Return the vector of uses in function \p F or `nullptr` if there are 3615ffd83dbSDimitry Andric /// none. 3625ffd83dbSDimitry Andric const UseVector *getUseVector(Function &F) const { 3635ffd83dbSDimitry Andric auto I = UsesMap.find(&F); 3645ffd83dbSDimitry Andric if (I != UsesMap.end()) 3655ffd83dbSDimitry Andric return I->second.get(); 3665ffd83dbSDimitry Andric return nullptr; 3675ffd83dbSDimitry Andric } 3685ffd83dbSDimitry Andric 3695ffd83dbSDimitry Andric /// Return how many functions contain uses of this runtime function. 3705ffd83dbSDimitry Andric size_t getNumFunctionsWithUses() const { return UsesMap.size(); } 3715ffd83dbSDimitry Andric 3725ffd83dbSDimitry Andric /// Return the number of arguments (or the minimal number for variadic 3735ffd83dbSDimitry Andric /// functions). 3745ffd83dbSDimitry Andric size_t getNumArgs() const { return ArgumentTypes.size(); } 3755ffd83dbSDimitry Andric 3765ffd83dbSDimitry Andric /// Run the callback \p CB on each use and forget the use if the result is 3775ffd83dbSDimitry Andric /// true. The callback will be fed the function in which the use was 3785ffd83dbSDimitry Andric /// encountered as second argument. 3795ffd83dbSDimitry Andric void foreachUse(SmallVectorImpl<Function *> &SCC, 3805ffd83dbSDimitry Andric function_ref<bool(Use &, Function &)> CB) { 3815ffd83dbSDimitry Andric for (Function *F : SCC) 3825ffd83dbSDimitry Andric foreachUse(CB, F); 3835ffd83dbSDimitry Andric } 3845ffd83dbSDimitry Andric 3855ffd83dbSDimitry Andric /// Run the callback \p CB on each use within the function \p F and forget 3865ffd83dbSDimitry Andric /// the use if the result is true. 3875ffd83dbSDimitry Andric void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) { 3885ffd83dbSDimitry Andric SmallVector<unsigned, 8> ToBeDeleted; 3895ffd83dbSDimitry Andric ToBeDeleted.clear(); 3905ffd83dbSDimitry Andric 3915ffd83dbSDimitry Andric unsigned Idx = 0; 3925ffd83dbSDimitry Andric UseVector &UV = getOrCreateUseVector(F); 3935ffd83dbSDimitry Andric 3945ffd83dbSDimitry Andric for (Use *U : UV) { 3955ffd83dbSDimitry Andric if (CB(*U, *F)) 3965ffd83dbSDimitry Andric ToBeDeleted.push_back(Idx); 3975ffd83dbSDimitry Andric ++Idx; 3985ffd83dbSDimitry Andric } 3995ffd83dbSDimitry Andric 4005ffd83dbSDimitry Andric // Remove the to-be-deleted indices in reverse order as prior 4015ffd83dbSDimitry Andric // modifications will not modify the smaller indices. 4025ffd83dbSDimitry Andric while (!ToBeDeleted.empty()) { 4035ffd83dbSDimitry Andric unsigned Idx = ToBeDeleted.pop_back_val(); 4045ffd83dbSDimitry Andric UV[Idx] = UV.back(); 4055ffd83dbSDimitry Andric UV.pop_back(); 4065ffd83dbSDimitry Andric } 4075ffd83dbSDimitry Andric } 4085ffd83dbSDimitry Andric 4095ffd83dbSDimitry Andric private: 4105ffd83dbSDimitry Andric /// Map from functions to all uses of this runtime function contained in 4115ffd83dbSDimitry Andric /// them. 4125ffd83dbSDimitry Andric DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap; 413fe6060f1SDimitry Andric 414fe6060f1SDimitry Andric public: 415fe6060f1SDimitry Andric /// Iterators for the uses of this runtime function. 416fe6060f1SDimitry Andric decltype(UsesMap)::iterator begin() { return UsesMap.begin(); } 417fe6060f1SDimitry Andric decltype(UsesMap)::iterator end() { return UsesMap.end(); } 4185ffd83dbSDimitry Andric }; 4195ffd83dbSDimitry Andric 4205ffd83dbSDimitry Andric /// An OpenMP-IR-Builder instance 4215ffd83dbSDimitry Andric OpenMPIRBuilder OMPBuilder; 4225ffd83dbSDimitry Andric 4235ffd83dbSDimitry Andric /// Map from runtime function kind to the runtime function description. 4245ffd83dbSDimitry Andric EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction, 4255ffd83dbSDimitry Andric RuntimeFunction::OMPRTL___last> 4265ffd83dbSDimitry Andric RFIs; 4275ffd83dbSDimitry Andric 428fe6060f1SDimitry Andric /// Map from function declarations/definitions to their runtime enum type. 429fe6060f1SDimitry Andric DenseMap<Function *, RuntimeFunction> RuntimeFunctionIDMap; 430fe6060f1SDimitry Andric 4315ffd83dbSDimitry Andric /// Map from ICV kind to the ICV description. 4325ffd83dbSDimitry Andric EnumeratedArray<InternalControlVarInfo, InternalControlVar, 4335ffd83dbSDimitry Andric InternalControlVar::ICV___last> 4345ffd83dbSDimitry Andric ICVs; 4355ffd83dbSDimitry Andric 4365ffd83dbSDimitry Andric /// Helper to initialize all internal control variable information for those 4375ffd83dbSDimitry Andric /// defined in OMPKinds.def. 4385ffd83dbSDimitry Andric void initializeInternalControlVars() { 4395ffd83dbSDimitry Andric #define ICV_RT_SET(_Name, RTL) \ 4405ffd83dbSDimitry Andric { \ 4415ffd83dbSDimitry Andric auto &ICV = ICVs[_Name]; \ 4425ffd83dbSDimitry Andric ICV.Setter = RTL; \ 4435ffd83dbSDimitry Andric } 4445ffd83dbSDimitry Andric #define ICV_RT_GET(Name, RTL) \ 4455ffd83dbSDimitry Andric { \ 4465ffd83dbSDimitry Andric auto &ICV = ICVs[Name]; \ 4475ffd83dbSDimitry Andric ICV.Getter = RTL; \ 4485ffd83dbSDimitry Andric } 4495ffd83dbSDimitry Andric #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \ 4505ffd83dbSDimitry Andric { \ 4515ffd83dbSDimitry Andric auto &ICV = ICVs[Enum]; \ 4525ffd83dbSDimitry Andric ICV.Name = _Name; \ 4535ffd83dbSDimitry Andric ICV.Kind = Enum; \ 4545ffd83dbSDimitry Andric ICV.InitKind = Init; \ 4555ffd83dbSDimitry Andric ICV.EnvVarName = _EnvVarName; \ 4565ffd83dbSDimitry Andric switch (ICV.InitKind) { \ 4575ffd83dbSDimitry Andric case ICV_IMPLEMENTATION_DEFINED: \ 4585ffd83dbSDimitry Andric ICV.InitValue = nullptr; \ 4595ffd83dbSDimitry Andric break; \ 4605ffd83dbSDimitry Andric case ICV_ZERO: \ 4615ffd83dbSDimitry Andric ICV.InitValue = ConstantInt::get( \ 4625ffd83dbSDimitry Andric Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \ 4635ffd83dbSDimitry Andric break; \ 4645ffd83dbSDimitry Andric case ICV_FALSE: \ 4655ffd83dbSDimitry Andric ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \ 4665ffd83dbSDimitry Andric break; \ 4675ffd83dbSDimitry Andric case ICV_LAST: \ 4685ffd83dbSDimitry Andric break; \ 4695ffd83dbSDimitry Andric } \ 4705ffd83dbSDimitry Andric } 4715ffd83dbSDimitry Andric #include "llvm/Frontend/OpenMP/OMPKinds.def" 4725ffd83dbSDimitry Andric } 4735ffd83dbSDimitry Andric 4745ffd83dbSDimitry Andric /// Returns true if the function declaration \p F matches the runtime 4755ffd83dbSDimitry Andric /// function types, that is, return type \p RTFRetType, and argument types 4765ffd83dbSDimitry Andric /// \p RTFArgTypes. 4775ffd83dbSDimitry Andric static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, 4785ffd83dbSDimitry Andric SmallVector<Type *, 8> &RTFArgTypes) { 4795ffd83dbSDimitry Andric // TODO: We should output information to the user (under debug output 4805ffd83dbSDimitry Andric // and via remarks). 4815ffd83dbSDimitry Andric 4825ffd83dbSDimitry Andric if (!F) 4835ffd83dbSDimitry Andric return false; 4845ffd83dbSDimitry Andric if (F->getReturnType() != RTFRetType) 4855ffd83dbSDimitry Andric return false; 4865ffd83dbSDimitry Andric if (F->arg_size() != RTFArgTypes.size()) 4875ffd83dbSDimitry Andric return false; 4885ffd83dbSDimitry Andric 489349cc55cSDimitry Andric auto *RTFTyIt = RTFArgTypes.begin(); 4905ffd83dbSDimitry Andric for (Argument &Arg : F->args()) { 4915ffd83dbSDimitry Andric if (Arg.getType() != *RTFTyIt) 4925ffd83dbSDimitry Andric return false; 4935ffd83dbSDimitry Andric 4945ffd83dbSDimitry Andric ++RTFTyIt; 4955ffd83dbSDimitry Andric } 4965ffd83dbSDimitry Andric 4975ffd83dbSDimitry Andric return true; 4985ffd83dbSDimitry Andric } 4995ffd83dbSDimitry Andric 5005ffd83dbSDimitry Andric // Helper to collect all uses of the declaration in the UsesMap. 5015ffd83dbSDimitry Andric unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) { 5025ffd83dbSDimitry Andric unsigned NumUses = 0; 5035ffd83dbSDimitry Andric if (!RFI.Declaration) 5045ffd83dbSDimitry Andric return NumUses; 5055ffd83dbSDimitry Andric OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); 5065ffd83dbSDimitry Andric 5075ffd83dbSDimitry Andric if (CollectStats) { 5085ffd83dbSDimitry Andric NumOpenMPRuntimeFunctionsIdentified += 1; 5095ffd83dbSDimitry Andric NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); 5105ffd83dbSDimitry Andric } 5115ffd83dbSDimitry Andric 5125ffd83dbSDimitry Andric // TODO: We directly convert uses into proper calls and unknown uses. 5135ffd83dbSDimitry Andric for (Use &U : RFI.Declaration->uses()) { 5145ffd83dbSDimitry Andric if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) { 51506c3fb27SDimitry Andric if (!CGSCC || CGSCC->empty() || CGSCC->contains(UserI->getFunction())) { 5165ffd83dbSDimitry Andric RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); 5175ffd83dbSDimitry Andric ++NumUses; 5185ffd83dbSDimitry Andric } 5195ffd83dbSDimitry Andric } else { 5205ffd83dbSDimitry Andric RFI.getOrCreateUseVector(nullptr).push_back(&U); 5215ffd83dbSDimitry Andric ++NumUses; 5225ffd83dbSDimitry Andric } 5235ffd83dbSDimitry Andric } 5245ffd83dbSDimitry Andric return NumUses; 5255ffd83dbSDimitry Andric } 5265ffd83dbSDimitry Andric 527e8d8bef9SDimitry Andric // Helper function to recollect uses of a runtime function. 528e8d8bef9SDimitry Andric void recollectUsesForFunction(RuntimeFunction RTF) { 529e8d8bef9SDimitry Andric auto &RFI = RFIs[RTF]; 5305ffd83dbSDimitry Andric RFI.clearUsesMap(); 5315ffd83dbSDimitry Andric collectUses(RFI, /*CollectStats*/ false); 5325ffd83dbSDimitry Andric } 533e8d8bef9SDimitry Andric 534e8d8bef9SDimitry Andric // Helper function to recollect uses of all runtime functions. 535e8d8bef9SDimitry Andric void recollectUses() { 536e8d8bef9SDimitry Andric for (int Idx = 0; Idx < RFIs.size(); ++Idx) 537e8d8bef9SDimitry Andric recollectUsesForFunction(static_cast<RuntimeFunction>(Idx)); 5385ffd83dbSDimitry Andric } 5395ffd83dbSDimitry Andric 54004eeddc0SDimitry Andric // Helper function to inherit the calling convention of the function callee. 54104eeddc0SDimitry Andric void setCallingConvention(FunctionCallee Callee, CallInst *CI) { 54204eeddc0SDimitry Andric if (Function *Fn = dyn_cast<Function>(Callee.getCallee())) 54304eeddc0SDimitry Andric CI->setCallingConv(Fn->getCallingConv()); 54404eeddc0SDimitry Andric } 54504eeddc0SDimitry Andric 5461ac55f4cSDimitry Andric // Helper function to determine if it's legal to create a call to the runtime 5471ac55f4cSDimitry Andric // functions. 5481ac55f4cSDimitry Andric bool runtimeFnsAvailable(ArrayRef<RuntimeFunction> Fns) { 5491ac55f4cSDimitry Andric // We can always emit calls if we haven't yet linked in the runtime. 5501ac55f4cSDimitry Andric if (!OpenMPPostLink) 5511ac55f4cSDimitry Andric return true; 5521ac55f4cSDimitry Andric 5531ac55f4cSDimitry Andric // Once the runtime has been already been linked in we cannot emit calls to 5541ac55f4cSDimitry Andric // any undefined functions. 5551ac55f4cSDimitry Andric for (RuntimeFunction Fn : Fns) { 5561ac55f4cSDimitry Andric RuntimeFunctionInfo &RFI = RFIs[Fn]; 5571ac55f4cSDimitry Andric 5581ac55f4cSDimitry Andric if (RFI.Declaration && RFI.Declaration->isDeclaration()) 5591ac55f4cSDimitry Andric return false; 5601ac55f4cSDimitry Andric } 5611ac55f4cSDimitry Andric return true; 5621ac55f4cSDimitry Andric } 5631ac55f4cSDimitry Andric 5645ffd83dbSDimitry Andric /// Helper to initialize all runtime function information for those defined 5655ffd83dbSDimitry Andric /// in OpenMPKinds.def. 566bdd1243dSDimitry Andric void initializeRuntimeFunctions(Module &M) { 5675ffd83dbSDimitry Andric 5685ffd83dbSDimitry Andric // Helper macros for handling __VA_ARGS__ in OMP_RTL 5695ffd83dbSDimitry Andric #define OMP_TYPE(VarName, ...) \ 5705ffd83dbSDimitry Andric Type *VarName = OMPBuilder.VarName; \ 5715ffd83dbSDimitry Andric (void)VarName; 5725ffd83dbSDimitry Andric 5735ffd83dbSDimitry Andric #define OMP_ARRAY_TYPE(VarName, ...) \ 5745ffd83dbSDimitry Andric ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \ 5755ffd83dbSDimitry Andric (void)VarName##Ty; \ 5765ffd83dbSDimitry Andric PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \ 5775ffd83dbSDimitry Andric (void)VarName##PtrTy; 5785ffd83dbSDimitry Andric 5795ffd83dbSDimitry Andric #define OMP_FUNCTION_TYPE(VarName, ...) \ 5805ffd83dbSDimitry Andric FunctionType *VarName = OMPBuilder.VarName; \ 5815ffd83dbSDimitry Andric (void)VarName; \ 5825ffd83dbSDimitry Andric PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 5835ffd83dbSDimitry Andric (void)VarName##Ptr; 5845ffd83dbSDimitry Andric 5855ffd83dbSDimitry Andric #define OMP_STRUCT_TYPE(VarName, ...) \ 5865ffd83dbSDimitry Andric StructType *VarName = OMPBuilder.VarName; \ 5875ffd83dbSDimitry Andric (void)VarName; \ 5885ffd83dbSDimitry Andric PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ 5895ffd83dbSDimitry Andric (void)VarName##Ptr; 5905ffd83dbSDimitry Andric 5915ffd83dbSDimitry Andric #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ 5925ffd83dbSDimitry Andric { \ 5935ffd83dbSDimitry Andric SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \ 5945ffd83dbSDimitry Andric Function *F = M.getFunction(_Name); \ 595fe6060f1SDimitry Andric RTLFunctions.insert(F); \ 5965ffd83dbSDimitry Andric if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ 597fe6060f1SDimitry Andric RuntimeFunctionIDMap[F] = _Enum; \ 5985ffd83dbSDimitry Andric auto &RFI = RFIs[_Enum]; \ 5995ffd83dbSDimitry Andric RFI.Kind = _Enum; \ 6005ffd83dbSDimitry Andric RFI.Name = _Name; \ 6015ffd83dbSDimitry Andric RFI.IsVarArg = _IsVarArg; \ 6025ffd83dbSDimitry Andric RFI.ReturnType = OMPBuilder._ReturnType; \ 6035ffd83dbSDimitry Andric RFI.ArgumentTypes = std::move(ArgsTypes); \ 6045ffd83dbSDimitry Andric RFI.Declaration = F; \ 6055ffd83dbSDimitry Andric unsigned NumUses = collectUses(RFI); \ 6065ffd83dbSDimitry Andric (void)NumUses; \ 6075ffd83dbSDimitry Andric LLVM_DEBUG({ \ 6085ffd83dbSDimitry Andric dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ 6095ffd83dbSDimitry Andric << " found\n"; \ 6105ffd83dbSDimitry Andric if (RFI.Declaration) \ 6115ffd83dbSDimitry Andric dbgs() << TAG << "-> got " << NumUses << " uses in " \ 6125ffd83dbSDimitry Andric << RFI.getNumFunctionsWithUses() \ 6135ffd83dbSDimitry Andric << " different functions.\n"; \ 6145ffd83dbSDimitry Andric }); \ 6155ffd83dbSDimitry Andric } \ 6165ffd83dbSDimitry Andric } 6175ffd83dbSDimitry Andric #include "llvm/Frontend/OpenMP/OMPKinds.def" 6185ffd83dbSDimitry Andric 619bdd1243dSDimitry Andric // Remove the `noinline` attribute from `__kmpc`, `ompx::` and `omp_` 62061cfbce3SDimitry Andric // functions, except if `optnone` is present. 62161cfbce3SDimitry Andric if (isOpenMPDevice(M)) { 62261cfbce3SDimitry Andric for (Function &F : M) { 623bdd1243dSDimitry Andric for (StringRef Prefix : {"__kmpc", "_ZN4ompx", "omp_"}) 62461cfbce3SDimitry Andric if (F.hasFnAttribute(Attribute::NoInline) && 6255f757f3fSDimitry Andric F.getName().starts_with(Prefix) && 62661cfbce3SDimitry Andric !F.hasFnAttribute(Attribute::OptimizeNone)) 62761cfbce3SDimitry Andric F.removeFnAttr(Attribute::NoInline); 62861cfbce3SDimitry Andric } 62961cfbce3SDimitry Andric } 63061cfbce3SDimitry Andric 6315ffd83dbSDimitry Andric // TODO: We should attach the attributes defined in OMPKinds.def. 6325ffd83dbSDimitry Andric } 6335ffd83dbSDimitry Andric 634fe6060f1SDimitry Andric /// Collection of known OpenMP runtime functions.. 635fe6060f1SDimitry Andric DenseSet<const Function *> RTLFunctions; 6361ac55f4cSDimitry Andric 6371ac55f4cSDimitry Andric /// Indicates if we have already linked in the OpenMP device library. 6381ac55f4cSDimitry Andric bool OpenMPPostLink = false; 639fe6060f1SDimitry Andric }; 640fe6060f1SDimitry Andric 641fe6060f1SDimitry Andric template <typename Ty, bool InsertInvalidates = true> 642fe6060f1SDimitry Andric struct BooleanStateWithSetVector : public BooleanState { 643fe6060f1SDimitry Andric bool contains(const Ty &Elem) const { return Set.contains(Elem); } 644fe6060f1SDimitry Andric bool insert(const Ty &Elem) { 645fe6060f1SDimitry Andric if (InsertInvalidates) 646fe6060f1SDimitry Andric BooleanState::indicatePessimisticFixpoint(); 647fe6060f1SDimitry Andric return Set.insert(Elem); 648fe6060f1SDimitry Andric } 649fe6060f1SDimitry Andric 650fe6060f1SDimitry Andric const Ty &operator[](int Idx) const { return Set[Idx]; } 651fe6060f1SDimitry Andric bool operator==(const BooleanStateWithSetVector &RHS) const { 652fe6060f1SDimitry Andric return BooleanState::operator==(RHS) && Set == RHS.Set; 653fe6060f1SDimitry Andric } 654fe6060f1SDimitry Andric bool operator!=(const BooleanStateWithSetVector &RHS) const { 655fe6060f1SDimitry Andric return !(*this == RHS); 656fe6060f1SDimitry Andric } 657fe6060f1SDimitry Andric 658fe6060f1SDimitry Andric bool empty() const { return Set.empty(); } 659fe6060f1SDimitry Andric size_t size() const { return Set.size(); } 660fe6060f1SDimitry Andric 661fe6060f1SDimitry Andric /// "Clamp" this state with \p RHS. 662fe6060f1SDimitry Andric BooleanStateWithSetVector &operator^=(const BooleanStateWithSetVector &RHS) { 663fe6060f1SDimitry Andric BooleanState::operator^=(RHS); 664fe6060f1SDimitry Andric Set.insert(RHS.Set.begin(), RHS.Set.end()); 665fe6060f1SDimitry Andric return *this; 666fe6060f1SDimitry Andric } 667fe6060f1SDimitry Andric 668fe6060f1SDimitry Andric private: 669fe6060f1SDimitry Andric /// A set to keep track of elements. 670fe6060f1SDimitry Andric SetVector<Ty> Set; 671fe6060f1SDimitry Andric 672fe6060f1SDimitry Andric public: 673fe6060f1SDimitry Andric typename decltype(Set)::iterator begin() { return Set.begin(); } 674fe6060f1SDimitry Andric typename decltype(Set)::iterator end() { return Set.end(); } 675fe6060f1SDimitry Andric typename decltype(Set)::const_iterator begin() const { return Set.begin(); } 676fe6060f1SDimitry Andric typename decltype(Set)::const_iterator end() const { return Set.end(); } 677fe6060f1SDimitry Andric }; 678fe6060f1SDimitry Andric 679fe6060f1SDimitry Andric template <typename Ty, bool InsertInvalidates = true> 680fe6060f1SDimitry Andric using BooleanStateWithPtrSetVector = 681fe6060f1SDimitry Andric BooleanStateWithSetVector<Ty *, InsertInvalidates>; 682fe6060f1SDimitry Andric 683fe6060f1SDimitry Andric struct KernelInfoState : AbstractState { 684fe6060f1SDimitry Andric /// Flag to track if we reached a fixpoint. 685fe6060f1SDimitry Andric bool IsAtFixpoint = false; 686fe6060f1SDimitry Andric 687fe6060f1SDimitry Andric /// The parallel regions (identified by the outlined parallel functions) that 688fe6060f1SDimitry Andric /// can be reached from the associated function. 6895f757f3fSDimitry Andric BooleanStateWithPtrSetVector<CallBase, /* InsertInvalidates */ false> 690fe6060f1SDimitry Andric ReachedKnownParallelRegions; 691fe6060f1SDimitry Andric 692fe6060f1SDimitry Andric /// State to track what parallel region we might reach. 693fe6060f1SDimitry Andric BooleanStateWithPtrSetVector<CallBase> ReachedUnknownParallelRegions; 694fe6060f1SDimitry Andric 695fe6060f1SDimitry Andric /// State to track if we are in SPMD-mode, assumed or know, and why we decided 696fe6060f1SDimitry Andric /// we cannot be. If it is assumed, then RequiresFullRuntime should also be 697fe6060f1SDimitry Andric /// false. 698349cc55cSDimitry Andric BooleanStateWithPtrSetVector<Instruction, false> SPMDCompatibilityTracker; 699fe6060f1SDimitry Andric 700fe6060f1SDimitry Andric /// The __kmpc_target_init call in this kernel, if any. If we find more than 701fe6060f1SDimitry Andric /// one we abort as the kernel is malformed. 702fe6060f1SDimitry Andric CallBase *KernelInitCB = nullptr; 703fe6060f1SDimitry Andric 7045f757f3fSDimitry Andric /// The constant kernel environement as taken from and passed to 7055f757f3fSDimitry Andric /// __kmpc_target_init. 7065f757f3fSDimitry Andric ConstantStruct *KernelEnvC = nullptr; 7075f757f3fSDimitry Andric 708fe6060f1SDimitry Andric /// The __kmpc_target_deinit call in this kernel, if any. If we find more than 709fe6060f1SDimitry Andric /// one we abort as the kernel is malformed. 710fe6060f1SDimitry Andric CallBase *KernelDeinitCB = nullptr; 711fe6060f1SDimitry Andric 712fe6060f1SDimitry Andric /// Flag to indicate if the associated function is a kernel entry. 713fe6060f1SDimitry Andric bool IsKernelEntry = false; 714fe6060f1SDimitry Andric 715fe6060f1SDimitry Andric /// State to track what kernel entries can reach the associated function. 716fe6060f1SDimitry Andric BooleanStateWithPtrSetVector<Function, false> ReachingKernelEntries; 717fe6060f1SDimitry Andric 718fe6060f1SDimitry Andric /// State to indicate if we can track parallel level of the associated 719fe6060f1SDimitry Andric /// function. We will give up tracking if we encounter unknown caller or the 720fe6060f1SDimitry Andric /// caller is __kmpc_parallel_51. 721fe6060f1SDimitry Andric BooleanStateWithSetVector<uint8_t> ParallelLevels; 722fe6060f1SDimitry Andric 723bdd1243dSDimitry Andric /// Flag that indicates if the kernel has nested Parallelism 724bdd1243dSDimitry Andric bool NestedParallelism = false; 725bdd1243dSDimitry Andric 726fe6060f1SDimitry Andric /// Abstract State interface 727fe6060f1SDimitry Andric ///{ 728fe6060f1SDimitry Andric 72981ad6265SDimitry Andric KernelInfoState() = default; 730fe6060f1SDimitry Andric KernelInfoState(bool BestState) { 731fe6060f1SDimitry Andric if (!BestState) 732fe6060f1SDimitry Andric indicatePessimisticFixpoint(); 733fe6060f1SDimitry Andric } 734fe6060f1SDimitry Andric 735fe6060f1SDimitry Andric /// See AbstractState::isValidState(...) 736fe6060f1SDimitry Andric bool isValidState() const override { return true; } 737fe6060f1SDimitry Andric 738fe6060f1SDimitry Andric /// See AbstractState::isAtFixpoint(...) 739fe6060f1SDimitry Andric bool isAtFixpoint() const override { return IsAtFixpoint; } 740fe6060f1SDimitry Andric 741fe6060f1SDimitry Andric /// See AbstractState::indicatePessimisticFixpoint(...) 742fe6060f1SDimitry Andric ChangeStatus indicatePessimisticFixpoint() override { 743fe6060f1SDimitry Andric IsAtFixpoint = true; 744bdd1243dSDimitry Andric ParallelLevels.indicatePessimisticFixpoint(); 745349cc55cSDimitry Andric ReachingKernelEntries.indicatePessimisticFixpoint(); 746fe6060f1SDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 747349cc55cSDimitry Andric ReachedKnownParallelRegions.indicatePessimisticFixpoint(); 748fe6060f1SDimitry Andric ReachedUnknownParallelRegions.indicatePessimisticFixpoint(); 7495f757f3fSDimitry Andric NestedParallelism = true; 750fe6060f1SDimitry Andric return ChangeStatus::CHANGED; 751fe6060f1SDimitry Andric } 752fe6060f1SDimitry Andric 753fe6060f1SDimitry Andric /// See AbstractState::indicateOptimisticFixpoint(...) 754fe6060f1SDimitry Andric ChangeStatus indicateOptimisticFixpoint() override { 755fe6060f1SDimitry Andric IsAtFixpoint = true; 756bdd1243dSDimitry Andric ParallelLevels.indicateOptimisticFixpoint(); 757349cc55cSDimitry Andric ReachingKernelEntries.indicateOptimisticFixpoint(); 758349cc55cSDimitry Andric SPMDCompatibilityTracker.indicateOptimisticFixpoint(); 759349cc55cSDimitry Andric ReachedKnownParallelRegions.indicateOptimisticFixpoint(); 760349cc55cSDimitry Andric ReachedUnknownParallelRegions.indicateOptimisticFixpoint(); 761fe6060f1SDimitry Andric return ChangeStatus::UNCHANGED; 762fe6060f1SDimitry Andric } 763fe6060f1SDimitry Andric 764fe6060f1SDimitry Andric /// Return the assumed state 765fe6060f1SDimitry Andric KernelInfoState &getAssumed() { return *this; } 766fe6060f1SDimitry Andric const KernelInfoState &getAssumed() const { return *this; } 767fe6060f1SDimitry Andric 768fe6060f1SDimitry Andric bool operator==(const KernelInfoState &RHS) const { 769fe6060f1SDimitry Andric if (SPMDCompatibilityTracker != RHS.SPMDCompatibilityTracker) 770fe6060f1SDimitry Andric return false; 771fe6060f1SDimitry Andric if (ReachedKnownParallelRegions != RHS.ReachedKnownParallelRegions) 772fe6060f1SDimitry Andric return false; 773fe6060f1SDimitry Andric if (ReachedUnknownParallelRegions != RHS.ReachedUnknownParallelRegions) 774fe6060f1SDimitry Andric return false; 775fe6060f1SDimitry Andric if (ReachingKernelEntries != RHS.ReachingKernelEntries) 776fe6060f1SDimitry Andric return false; 777bdd1243dSDimitry Andric if (ParallelLevels != RHS.ParallelLevels) 778bdd1243dSDimitry Andric return false; 7795f757f3fSDimitry Andric if (NestedParallelism != RHS.NestedParallelism) 7805f757f3fSDimitry Andric return false; 781fe6060f1SDimitry Andric return true; 782fe6060f1SDimitry Andric } 783fe6060f1SDimitry Andric 784349cc55cSDimitry Andric /// Returns true if this kernel contains any OpenMP parallel regions. 785349cc55cSDimitry Andric bool mayContainParallelRegion() { 786349cc55cSDimitry Andric return !ReachedKnownParallelRegions.empty() || 787349cc55cSDimitry Andric !ReachedUnknownParallelRegions.empty(); 788349cc55cSDimitry Andric } 789349cc55cSDimitry Andric 790fe6060f1SDimitry Andric /// Return empty set as the best state of potential values. 791fe6060f1SDimitry Andric static KernelInfoState getBestState() { return KernelInfoState(true); } 792fe6060f1SDimitry Andric 793fe6060f1SDimitry Andric static KernelInfoState getBestState(KernelInfoState &KIS) { 794fe6060f1SDimitry Andric return getBestState(); 795fe6060f1SDimitry Andric } 796fe6060f1SDimitry Andric 797fe6060f1SDimitry Andric /// Return full set as the worst state of potential values. 798fe6060f1SDimitry Andric static KernelInfoState getWorstState() { return KernelInfoState(false); } 799fe6060f1SDimitry Andric 800fe6060f1SDimitry Andric /// "Clamp" this state with \p KIS. 801fe6060f1SDimitry Andric KernelInfoState operator^=(const KernelInfoState &KIS) { 802fe6060f1SDimitry Andric // Do not merge two different _init and _deinit call sites. 803fe6060f1SDimitry Andric if (KIS.KernelInitCB) { 804fe6060f1SDimitry Andric if (KernelInitCB && KernelInitCB != KIS.KernelInitCB) 805349cc55cSDimitry Andric llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt " 806349cc55cSDimitry Andric "assumptions."); 807fe6060f1SDimitry Andric KernelInitCB = KIS.KernelInitCB; 808fe6060f1SDimitry Andric } 809fe6060f1SDimitry Andric if (KIS.KernelDeinitCB) { 810fe6060f1SDimitry Andric if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB) 811349cc55cSDimitry Andric llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt " 812349cc55cSDimitry Andric "assumptions."); 813fe6060f1SDimitry Andric KernelDeinitCB = KIS.KernelDeinitCB; 814fe6060f1SDimitry Andric } 8155f757f3fSDimitry Andric if (KIS.KernelEnvC) { 8165f757f3fSDimitry Andric if (KernelEnvC && KernelEnvC != KIS.KernelEnvC) 8175f757f3fSDimitry Andric llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt " 8185f757f3fSDimitry Andric "assumptions."); 8195f757f3fSDimitry Andric KernelEnvC = KIS.KernelEnvC; 8205f757f3fSDimitry Andric } 821fe6060f1SDimitry Andric SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker; 822fe6060f1SDimitry Andric ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions; 823fe6060f1SDimitry Andric ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions; 824bdd1243dSDimitry Andric NestedParallelism |= KIS.NestedParallelism; 825fe6060f1SDimitry Andric return *this; 826fe6060f1SDimitry Andric } 827fe6060f1SDimitry Andric 828fe6060f1SDimitry Andric KernelInfoState operator&=(const KernelInfoState &KIS) { 829fe6060f1SDimitry Andric return (*this ^= KIS); 830fe6060f1SDimitry Andric } 831fe6060f1SDimitry Andric 832fe6060f1SDimitry Andric ///} 8335ffd83dbSDimitry Andric }; 8345ffd83dbSDimitry Andric 835e8d8bef9SDimitry Andric /// Used to map the values physically (in the IR) stored in an offload 836e8d8bef9SDimitry Andric /// array, to a vector in memory. 837e8d8bef9SDimitry Andric struct OffloadArray { 838e8d8bef9SDimitry Andric /// Physical array (in the IR). 839e8d8bef9SDimitry Andric AllocaInst *Array = nullptr; 840e8d8bef9SDimitry Andric /// Mapped values. 841e8d8bef9SDimitry Andric SmallVector<Value *, 8> StoredValues; 842e8d8bef9SDimitry Andric /// Last stores made in the offload array. 843e8d8bef9SDimitry Andric SmallVector<StoreInst *, 8> LastAccesses; 844e8d8bef9SDimitry Andric 845e8d8bef9SDimitry Andric OffloadArray() = default; 846e8d8bef9SDimitry Andric 847e8d8bef9SDimitry Andric /// Initializes the OffloadArray with the values stored in \p Array before 848e8d8bef9SDimitry Andric /// instruction \p Before is reached. Returns false if the initialization 849e8d8bef9SDimitry Andric /// fails. 850e8d8bef9SDimitry Andric /// This MUST be used immediately after the construction of the object. 851e8d8bef9SDimitry Andric bool initialize(AllocaInst &Array, Instruction &Before) { 852e8d8bef9SDimitry Andric if (!Array.getAllocatedType()->isArrayTy()) 853e8d8bef9SDimitry Andric return false; 854e8d8bef9SDimitry Andric 855e8d8bef9SDimitry Andric if (!getValues(Array, Before)) 856e8d8bef9SDimitry Andric return false; 857e8d8bef9SDimitry Andric 858e8d8bef9SDimitry Andric this->Array = &Array; 859e8d8bef9SDimitry Andric return true; 860e8d8bef9SDimitry Andric } 861e8d8bef9SDimitry Andric 862e8d8bef9SDimitry Andric static const unsigned DeviceIDArgNum = 1; 863e8d8bef9SDimitry Andric static const unsigned BasePtrsArgNum = 3; 864e8d8bef9SDimitry Andric static const unsigned PtrsArgNum = 4; 865e8d8bef9SDimitry Andric static const unsigned SizesArgNum = 5; 866e8d8bef9SDimitry Andric 867e8d8bef9SDimitry Andric private: 868e8d8bef9SDimitry Andric /// Traverses the BasicBlock where \p Array is, collecting the stores made to 869e8d8bef9SDimitry Andric /// \p Array, leaving StoredValues with the values stored before the 870e8d8bef9SDimitry Andric /// instruction \p Before is reached. 871e8d8bef9SDimitry Andric bool getValues(AllocaInst &Array, Instruction &Before) { 872e8d8bef9SDimitry Andric // Initialize container. 873e8d8bef9SDimitry Andric const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements(); 874e8d8bef9SDimitry Andric StoredValues.assign(NumValues, nullptr); 875e8d8bef9SDimitry Andric LastAccesses.assign(NumValues, nullptr); 876e8d8bef9SDimitry Andric 877e8d8bef9SDimitry Andric // TODO: This assumes the instruction \p Before is in the same 878e8d8bef9SDimitry Andric // BasicBlock as Array. Make it general, for any control flow graph. 879e8d8bef9SDimitry Andric BasicBlock *BB = Array.getParent(); 880e8d8bef9SDimitry Andric if (BB != Before.getParent()) 881e8d8bef9SDimitry Andric return false; 882e8d8bef9SDimitry Andric 883*0fca6ea1SDimitry Andric const DataLayout &DL = Array.getDataLayout(); 884e8d8bef9SDimitry Andric const unsigned int PointerSize = DL.getPointerSize(); 885e8d8bef9SDimitry Andric 886e8d8bef9SDimitry Andric for (Instruction &I : *BB) { 887e8d8bef9SDimitry Andric if (&I == &Before) 888e8d8bef9SDimitry Andric break; 889e8d8bef9SDimitry Andric 890e8d8bef9SDimitry Andric if (!isa<StoreInst>(&I)) 891e8d8bef9SDimitry Andric continue; 892e8d8bef9SDimitry Andric 893e8d8bef9SDimitry Andric auto *S = cast<StoreInst>(&I); 894e8d8bef9SDimitry Andric int64_t Offset = -1; 895e8d8bef9SDimitry Andric auto *Dst = 896e8d8bef9SDimitry Andric GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL); 897e8d8bef9SDimitry Andric if (Dst == &Array) { 898e8d8bef9SDimitry Andric int64_t Idx = Offset / PointerSize; 899e8d8bef9SDimitry Andric StoredValues[Idx] = getUnderlyingObject(S->getValueOperand()); 900e8d8bef9SDimitry Andric LastAccesses[Idx] = S; 901e8d8bef9SDimitry Andric } 902e8d8bef9SDimitry Andric } 903e8d8bef9SDimitry Andric 904e8d8bef9SDimitry Andric return isFilled(); 905e8d8bef9SDimitry Andric } 906e8d8bef9SDimitry Andric 907e8d8bef9SDimitry Andric /// Returns true if all values in StoredValues and 908e8d8bef9SDimitry Andric /// LastAccesses are not nullptrs. 909e8d8bef9SDimitry Andric bool isFilled() { 910e8d8bef9SDimitry Andric const unsigned NumValues = StoredValues.size(); 911e8d8bef9SDimitry Andric for (unsigned I = 0; I < NumValues; ++I) { 912e8d8bef9SDimitry Andric if (!StoredValues[I] || !LastAccesses[I]) 913e8d8bef9SDimitry Andric return false; 914e8d8bef9SDimitry Andric } 915e8d8bef9SDimitry Andric 916e8d8bef9SDimitry Andric return true; 917e8d8bef9SDimitry Andric } 918e8d8bef9SDimitry Andric }; 919e8d8bef9SDimitry Andric 9205ffd83dbSDimitry Andric struct OpenMPOpt { 9215ffd83dbSDimitry Andric 9225ffd83dbSDimitry Andric using OptimizationRemarkGetter = 9235ffd83dbSDimitry Andric function_ref<OptimizationRemarkEmitter &(Function *)>; 9245ffd83dbSDimitry Andric 9255ffd83dbSDimitry Andric OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater, 9265ffd83dbSDimitry Andric OptimizationRemarkGetter OREGetter, 9275ffd83dbSDimitry Andric OMPInformationCache &OMPInfoCache, Attributor &A) 9285ffd83dbSDimitry Andric : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater), 9295ffd83dbSDimitry Andric OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {} 9305ffd83dbSDimitry Andric 931e8d8bef9SDimitry Andric /// Check if any remarks are enabled for openmp-opt 932e8d8bef9SDimitry Andric bool remarksEnabled() { 933e8d8bef9SDimitry Andric auto &Ctx = M.getContext(); 934e8d8bef9SDimitry Andric return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE); 935e8d8bef9SDimitry Andric } 936e8d8bef9SDimitry Andric 93706c3fb27SDimitry Andric /// Run all OpenMP optimizations on the underlying SCC. 938fe6060f1SDimitry Andric bool run(bool IsModulePass) { 9395ffd83dbSDimitry Andric if (SCC.empty()) 9405ffd83dbSDimitry Andric return false; 9415ffd83dbSDimitry Andric 9425ffd83dbSDimitry Andric bool Changed = false; 9435ffd83dbSDimitry Andric 9445ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() 94506c3fb27SDimitry Andric << " functions\n"); 9465ffd83dbSDimitry Andric 947fe6060f1SDimitry Andric if (IsModulePass) { 948fe6060f1SDimitry Andric Changed |= runAttributor(IsModulePass); 949fe6060f1SDimitry Andric 950fe6060f1SDimitry Andric // Recollect uses, in case Attributor deleted any. 951fe6060f1SDimitry Andric OMPInfoCache.recollectUses(); 952fe6060f1SDimitry Andric 953fe6060f1SDimitry Andric // TODO: This should be folded into buildCustomStateMachine. 954fe6060f1SDimitry Andric Changed |= rewriteDeviceCodeStateMachine(); 955fe6060f1SDimitry Andric 956fe6060f1SDimitry Andric if (remarksEnabled()) 957fe6060f1SDimitry Andric analysisGlobalization(); 958fe6060f1SDimitry Andric } else { 9595ffd83dbSDimitry Andric if (PrintICVValues) 9605ffd83dbSDimitry Andric printICVs(); 9615ffd83dbSDimitry Andric if (PrintOpenMPKernels) 9625ffd83dbSDimitry Andric printKernels(); 9635ffd83dbSDimitry Andric 964fe6060f1SDimitry Andric Changed |= runAttributor(IsModulePass); 9655ffd83dbSDimitry Andric 9665ffd83dbSDimitry Andric // Recollect uses, in case Attributor deleted any. 9675ffd83dbSDimitry Andric OMPInfoCache.recollectUses(); 9685ffd83dbSDimitry Andric 9695ffd83dbSDimitry Andric Changed |= deleteParallelRegions(); 970fe6060f1SDimitry Andric 971e8d8bef9SDimitry Andric if (HideMemoryTransferLatency) 972e8d8bef9SDimitry Andric Changed |= hideMemTransfersLatency(); 973e8d8bef9SDimitry Andric Changed |= deduplicateRuntimeCalls(); 974e8d8bef9SDimitry Andric if (EnableParallelRegionMerging) { 975e8d8bef9SDimitry Andric if (mergeParallelRegions()) { 976e8d8bef9SDimitry Andric deduplicateRuntimeCalls(); 977e8d8bef9SDimitry Andric Changed = true; 978e8d8bef9SDimitry Andric } 979e8d8bef9SDimitry Andric } 980fe6060f1SDimitry Andric } 9815ffd83dbSDimitry Andric 9825f757f3fSDimitry Andric if (OMPInfoCache.OpenMPPostLink) 9835f757f3fSDimitry Andric Changed |= removeRuntimeSymbols(); 9845f757f3fSDimitry Andric 9855ffd83dbSDimitry Andric return Changed; 9865ffd83dbSDimitry Andric } 9875ffd83dbSDimitry Andric 9885ffd83dbSDimitry Andric /// Print initial ICV values for testing. 9895ffd83dbSDimitry Andric /// FIXME: This should be done from the Attributor once it is added. 9905ffd83dbSDimitry Andric void printICVs() const { 991e8d8bef9SDimitry Andric InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel, 992e8d8bef9SDimitry Andric ICV_proc_bind}; 9935ffd83dbSDimitry Andric 994bdd1243dSDimitry Andric for (Function *F : SCC) { 9955ffd83dbSDimitry Andric for (auto ICV : ICVs) { 9965ffd83dbSDimitry Andric auto ICVInfo = OMPInfoCache.ICVs[ICV]; 997fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) { 998fe6060f1SDimitry Andric return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) 9995ffd83dbSDimitry Andric << " Value: " 10005ffd83dbSDimitry Andric << (ICVInfo.InitValue 1001fe6060f1SDimitry Andric ? toString(ICVInfo.InitValue->getValue(), 10, true) 10025ffd83dbSDimitry Andric : "IMPLEMENTATION_DEFINED"); 10035ffd83dbSDimitry Andric }; 10045ffd83dbSDimitry Andric 1005fe6060f1SDimitry Andric emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark); 10065ffd83dbSDimitry Andric } 10075ffd83dbSDimitry Andric } 10085ffd83dbSDimitry Andric } 10095ffd83dbSDimitry Andric 10105ffd83dbSDimitry Andric /// Print OpenMP GPU kernels for testing. 10115ffd83dbSDimitry Andric void printKernels() const { 10125ffd83dbSDimitry Andric for (Function *F : SCC) { 10135f757f3fSDimitry Andric if (!omp::isOpenMPKernel(*F)) 10145ffd83dbSDimitry Andric continue; 10155ffd83dbSDimitry Andric 1016fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) { 1017fe6060f1SDimitry Andric return ORA << "OpenMP GPU kernel " 10185ffd83dbSDimitry Andric << ore::NV("OpenMPGPUKernel", F->getName()) << "\n"; 10195ffd83dbSDimitry Andric }; 10205ffd83dbSDimitry Andric 1021fe6060f1SDimitry Andric emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark); 10225ffd83dbSDimitry Andric } 10235ffd83dbSDimitry Andric } 10245ffd83dbSDimitry Andric 10255ffd83dbSDimitry Andric /// Return the call if \p U is a callee use in a regular call. If \p RFI is 10265ffd83dbSDimitry Andric /// given it has to be the callee or a nullptr is returned. 10275ffd83dbSDimitry Andric static CallInst *getCallIfRegularCall( 10285ffd83dbSDimitry Andric Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { 10295ffd83dbSDimitry Andric CallInst *CI = dyn_cast<CallInst>(U.getUser()); 10305ffd83dbSDimitry Andric if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && 1031fe6060f1SDimitry Andric (!RFI || 1032fe6060f1SDimitry Andric (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration))) 10335ffd83dbSDimitry Andric return CI; 10345ffd83dbSDimitry Andric return nullptr; 10355ffd83dbSDimitry Andric } 10365ffd83dbSDimitry Andric 10375ffd83dbSDimitry Andric /// Return the call if \p V is a regular call. If \p RFI is given it has to be 10385ffd83dbSDimitry Andric /// the callee or a nullptr is returned. 10395ffd83dbSDimitry Andric static CallInst *getCallIfRegularCall( 10405ffd83dbSDimitry Andric Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { 10415ffd83dbSDimitry Andric CallInst *CI = dyn_cast<CallInst>(&V); 10425ffd83dbSDimitry Andric if (CI && !CI->hasOperandBundles() && 1043fe6060f1SDimitry Andric (!RFI || 1044fe6060f1SDimitry Andric (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration))) 10455ffd83dbSDimitry Andric return CI; 10465ffd83dbSDimitry Andric return nullptr; 10475ffd83dbSDimitry Andric } 10485ffd83dbSDimitry Andric 10495ffd83dbSDimitry Andric private: 1050e8d8bef9SDimitry Andric /// Merge parallel regions when it is safe. 1051e8d8bef9SDimitry Andric bool mergeParallelRegions() { 1052e8d8bef9SDimitry Andric const unsigned CallbackCalleeOperand = 2; 1053e8d8bef9SDimitry Andric const unsigned CallbackFirstArgOperand = 3; 1054e8d8bef9SDimitry Andric using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 1055e8d8bef9SDimitry Andric 1056e8d8bef9SDimitry Andric // Check if there are any __kmpc_fork_call calls to merge. 1057e8d8bef9SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &RFI = 1058e8d8bef9SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; 1059e8d8bef9SDimitry Andric 1060e8d8bef9SDimitry Andric if (!RFI.Declaration) 1061e8d8bef9SDimitry Andric return false; 1062e8d8bef9SDimitry Andric 1063e8d8bef9SDimitry Andric // Unmergable calls that prevent merging a parallel region. 1064e8d8bef9SDimitry Andric OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = { 1065e8d8bef9SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind], 1066e8d8bef9SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads], 1067e8d8bef9SDimitry Andric }; 1068e8d8bef9SDimitry Andric 1069e8d8bef9SDimitry Andric bool Changed = false; 1070e8d8bef9SDimitry Andric LoopInfo *LI = nullptr; 1071e8d8bef9SDimitry Andric DominatorTree *DT = nullptr; 1072e8d8bef9SDimitry Andric 1073e8d8bef9SDimitry Andric SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap; 1074e8d8bef9SDimitry Andric 1075e8d8bef9SDimitry Andric BasicBlock *StartBB = nullptr, *EndBB = nullptr; 107681ad6265SDimitry Andric auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1077e8d8bef9SDimitry Andric BasicBlock *CGStartBB = CodeGenIP.getBlock(); 1078e8d8bef9SDimitry Andric BasicBlock *CGEndBB = 1079e8d8bef9SDimitry Andric SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); 1080e8d8bef9SDimitry Andric assert(StartBB != nullptr && "StartBB should not be null"); 1081e8d8bef9SDimitry Andric CGStartBB->getTerminator()->setSuccessor(0, StartBB); 1082e8d8bef9SDimitry Andric assert(EndBB != nullptr && "EndBB should not be null"); 1083e8d8bef9SDimitry Andric EndBB->getTerminator()->setSuccessor(0, CGEndBB); 1084e8d8bef9SDimitry Andric }; 1085e8d8bef9SDimitry Andric 1086e8d8bef9SDimitry Andric auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &, 1087e8d8bef9SDimitry Andric Value &Inner, Value *&ReplacementValue) -> InsertPointTy { 1088e8d8bef9SDimitry Andric ReplacementValue = &Inner; 1089e8d8bef9SDimitry Andric return CodeGenIP; 1090e8d8bef9SDimitry Andric }; 1091e8d8bef9SDimitry Andric 1092e8d8bef9SDimitry Andric auto FiniCB = [&](InsertPointTy CodeGenIP) {}; 1093e8d8bef9SDimitry Andric 1094e8d8bef9SDimitry Andric /// Create a sequential execution region within a merged parallel region, 1095e8d8bef9SDimitry Andric /// encapsulated in a master construct with a barrier for synchronization. 1096e8d8bef9SDimitry Andric auto CreateSequentialRegion = [&](Function *OuterFn, 1097e8d8bef9SDimitry Andric BasicBlock *OuterPredBB, 1098e8d8bef9SDimitry Andric Instruction *SeqStartI, 1099e8d8bef9SDimitry Andric Instruction *SeqEndI) { 1100e8d8bef9SDimitry Andric // Isolate the instructions of the sequential region to a separate 1101e8d8bef9SDimitry Andric // block. 1102e8d8bef9SDimitry Andric BasicBlock *ParentBB = SeqStartI->getParent(); 1103e8d8bef9SDimitry Andric BasicBlock *SeqEndBB = 1104e8d8bef9SDimitry Andric SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI); 1105e8d8bef9SDimitry Andric BasicBlock *SeqAfterBB = 1106e8d8bef9SDimitry Andric SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI); 1107e8d8bef9SDimitry Andric BasicBlock *SeqStartBB = 1108e8d8bef9SDimitry Andric SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged"); 1109e8d8bef9SDimitry Andric 1110e8d8bef9SDimitry Andric assert(ParentBB->getUniqueSuccessor() == SeqStartBB && 1111e8d8bef9SDimitry Andric "Expected a different CFG"); 1112e8d8bef9SDimitry Andric const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); 1113e8d8bef9SDimitry Andric ParentBB->getTerminator()->eraseFromParent(); 1114e8d8bef9SDimitry Andric 111581ad6265SDimitry Andric auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { 1116e8d8bef9SDimitry Andric BasicBlock *CGStartBB = CodeGenIP.getBlock(); 1117e8d8bef9SDimitry Andric BasicBlock *CGEndBB = 1118e8d8bef9SDimitry Andric SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); 1119e8d8bef9SDimitry Andric assert(SeqStartBB != nullptr && "SeqStartBB should not be null"); 1120e8d8bef9SDimitry Andric CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB); 1121e8d8bef9SDimitry Andric assert(SeqEndBB != nullptr && "SeqEndBB should not be null"); 1122e8d8bef9SDimitry Andric SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB); 1123e8d8bef9SDimitry Andric }; 1124e8d8bef9SDimitry Andric auto FiniCB = [&](InsertPointTy CodeGenIP) {}; 1125e8d8bef9SDimitry Andric 1126e8d8bef9SDimitry Andric // Find outputs from the sequential region to outside users and 1127e8d8bef9SDimitry Andric // broadcast their values to them. 1128e8d8bef9SDimitry Andric for (Instruction &I : *SeqStartBB) { 1129e8d8bef9SDimitry Andric SmallPtrSet<Instruction *, 4> OutsideUsers; 1130e8d8bef9SDimitry Andric for (User *Usr : I.users()) { 1131e8d8bef9SDimitry Andric Instruction &UsrI = *cast<Instruction>(Usr); 1132e8d8bef9SDimitry Andric // Ignore outputs to LT intrinsics, code extraction for the merged 1133e8d8bef9SDimitry Andric // parallel region will fix them. 1134e8d8bef9SDimitry Andric if (UsrI.isLifetimeStartOrEnd()) 1135e8d8bef9SDimitry Andric continue; 1136e8d8bef9SDimitry Andric 1137e8d8bef9SDimitry Andric if (UsrI.getParent() != SeqStartBB) 1138e8d8bef9SDimitry Andric OutsideUsers.insert(&UsrI); 1139e8d8bef9SDimitry Andric } 1140e8d8bef9SDimitry Andric 1141e8d8bef9SDimitry Andric if (OutsideUsers.empty()) 1142e8d8bef9SDimitry Andric continue; 1143e8d8bef9SDimitry Andric 1144e8d8bef9SDimitry Andric // Emit an alloca in the outer region to store the broadcasted 1145e8d8bef9SDimitry Andric // value. 1146e8d8bef9SDimitry Andric const DataLayout &DL = M.getDataLayout(); 1147e8d8bef9SDimitry Andric AllocaInst *AllocaI = new AllocaInst( 1148e8d8bef9SDimitry Andric I.getType(), DL.getAllocaAddrSpace(), nullptr, 1149*0fca6ea1SDimitry Andric I.getName() + ".seq.output.alloc", OuterFn->front().begin()); 1150e8d8bef9SDimitry Andric 1151e8d8bef9SDimitry Andric // Emit a store instruction in the sequential BB to update the 1152e8d8bef9SDimitry Andric // value. 1153*0fca6ea1SDimitry Andric new StoreInst(&I, AllocaI, SeqStartBB->getTerminator()->getIterator()); 1154e8d8bef9SDimitry Andric 1155e8d8bef9SDimitry Andric // Emit a load instruction and replace the use of the output value 1156e8d8bef9SDimitry Andric // with it. 1157e8d8bef9SDimitry Andric for (Instruction *UsrI : OutsideUsers) { 1158*0fca6ea1SDimitry Andric LoadInst *LoadI = new LoadInst(I.getType(), AllocaI, 1159*0fca6ea1SDimitry Andric I.getName() + ".seq.output.load", 1160*0fca6ea1SDimitry Andric UsrI->getIterator()); 1161e8d8bef9SDimitry Andric UsrI->replaceUsesOfWith(&I, LoadI); 1162e8d8bef9SDimitry Andric } 1163e8d8bef9SDimitry Andric } 1164e8d8bef9SDimitry Andric 1165e8d8bef9SDimitry Andric OpenMPIRBuilder::LocationDescription Loc( 1166e8d8bef9SDimitry Andric InsertPointTy(ParentBB, ParentBB->end()), DL); 1167e8d8bef9SDimitry Andric InsertPointTy SeqAfterIP = 1168e8d8bef9SDimitry Andric OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB); 1169e8d8bef9SDimitry Andric 1170e8d8bef9SDimitry Andric OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel); 1171e8d8bef9SDimitry Andric 1172e8d8bef9SDimitry Andric BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock()); 1173e8d8bef9SDimitry Andric 1174e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn 1175e8d8bef9SDimitry Andric << "\n"); 1176e8d8bef9SDimitry Andric }; 1177e8d8bef9SDimitry Andric 1178e8d8bef9SDimitry Andric // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all 1179e8d8bef9SDimitry Andric // contained in BB and only separated by instructions that can be 1180e8d8bef9SDimitry Andric // redundantly executed in parallel. The block BB is split before the first 1181e8d8bef9SDimitry Andric // call (in MergableCIs) and after the last so the entire region we merge 1182e8d8bef9SDimitry Andric // into a single parallel region is contained in a single basic block 1183e8d8bef9SDimitry Andric // without any other instructions. We use the OpenMPIRBuilder to outline 1184e8d8bef9SDimitry Andric // that block and call the resulting function via __kmpc_fork_call. 118504eeddc0SDimitry Andric auto Merge = [&](const SmallVectorImpl<CallInst *> &MergableCIs, 118604eeddc0SDimitry Andric BasicBlock *BB) { 1187e8d8bef9SDimitry Andric // TODO: Change the interface to allow single CIs expanded, e.g, to 1188e8d8bef9SDimitry Andric // include an outer loop. 1189e8d8bef9SDimitry Andric assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs"); 1190e8d8bef9SDimitry Andric 1191e8d8bef9SDimitry Andric auto Remark = [&](OptimizationRemark OR) { 1192fe6060f1SDimitry Andric OR << "Parallel region merged with parallel region" 1193fe6060f1SDimitry Andric << (MergableCIs.size() > 2 ? "s" : "") << " at "; 1194e8d8bef9SDimitry Andric for (auto *CI : llvm::drop_begin(MergableCIs)) { 1195e8d8bef9SDimitry Andric OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc()); 1196e8d8bef9SDimitry Andric if (CI != MergableCIs.back()) 1197e8d8bef9SDimitry Andric OR << ", "; 1198e8d8bef9SDimitry Andric } 1199fe6060f1SDimitry Andric return OR << "."; 1200e8d8bef9SDimitry Andric }; 1201e8d8bef9SDimitry Andric 1202fe6060f1SDimitry Andric emitRemark<OptimizationRemark>(MergableCIs.front(), "OMP150", Remark); 1203e8d8bef9SDimitry Andric 1204e8d8bef9SDimitry Andric Function *OriginalFn = BB->getParent(); 1205e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size() 1206e8d8bef9SDimitry Andric << " parallel regions in " << OriginalFn->getName() 1207e8d8bef9SDimitry Andric << "\n"); 1208e8d8bef9SDimitry Andric 1209e8d8bef9SDimitry Andric // Isolate the calls to merge in a separate block. 1210e8d8bef9SDimitry Andric EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI); 1211e8d8bef9SDimitry Andric BasicBlock *AfterBB = 1212e8d8bef9SDimitry Andric SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI); 1213e8d8bef9SDimitry Andric StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr, 1214e8d8bef9SDimitry Andric "omp.par.merged"); 1215e8d8bef9SDimitry Andric 1216e8d8bef9SDimitry Andric assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG"); 1217e8d8bef9SDimitry Andric const DebugLoc DL = BB->getTerminator()->getDebugLoc(); 1218e8d8bef9SDimitry Andric BB->getTerminator()->eraseFromParent(); 1219e8d8bef9SDimitry Andric 1220e8d8bef9SDimitry Andric // Create sequential regions for sequential instructions that are 1221e8d8bef9SDimitry Andric // in-between mergable parallel regions. 1222e8d8bef9SDimitry Andric for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1; 1223e8d8bef9SDimitry Andric It != End; ++It) { 1224e8d8bef9SDimitry Andric Instruction *ForkCI = *It; 1225e8d8bef9SDimitry Andric Instruction *NextForkCI = *(It + 1); 1226e8d8bef9SDimitry Andric 1227e8d8bef9SDimitry Andric // Continue if there are not in-between instructions. 1228e8d8bef9SDimitry Andric if (ForkCI->getNextNode() == NextForkCI) 1229e8d8bef9SDimitry Andric continue; 1230e8d8bef9SDimitry Andric 1231e8d8bef9SDimitry Andric CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(), 1232e8d8bef9SDimitry Andric NextForkCI->getPrevNode()); 1233e8d8bef9SDimitry Andric } 1234e8d8bef9SDimitry Andric 1235e8d8bef9SDimitry Andric OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()), 1236e8d8bef9SDimitry Andric DL); 1237e8d8bef9SDimitry Andric IRBuilder<>::InsertPoint AllocaIP( 1238e8d8bef9SDimitry Andric &OriginalFn->getEntryBlock(), 1239e8d8bef9SDimitry Andric OriginalFn->getEntryBlock().getFirstInsertionPt()); 1240e8d8bef9SDimitry Andric // Create the merged parallel region with default proc binding, to 1241e8d8bef9SDimitry Andric // avoid overriding binding settings, and without explicit cancellation. 1242e8d8bef9SDimitry Andric InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel( 1243e8d8bef9SDimitry Andric Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, 1244e8d8bef9SDimitry Andric OMP_PROC_BIND_default, /* IsCancellable */ false); 1245e8d8bef9SDimitry Andric BranchInst::Create(AfterBB, AfterIP.getBlock()); 1246e8d8bef9SDimitry Andric 1247e8d8bef9SDimitry Andric // Perform the actual outlining. 124804eeddc0SDimitry Andric OMPInfoCache.OMPBuilder.finalize(OriginalFn); 1249e8d8bef9SDimitry Andric 1250e8d8bef9SDimitry Andric Function *OutlinedFn = MergableCIs.front()->getCaller(); 1251e8d8bef9SDimitry Andric 1252e8d8bef9SDimitry Andric // Replace the __kmpc_fork_call calls with direct calls to the outlined 1253e8d8bef9SDimitry Andric // callbacks. 1254e8d8bef9SDimitry Andric SmallVector<Value *, 8> Args; 1255e8d8bef9SDimitry Andric for (auto *CI : MergableCIs) { 125681ad6265SDimitry Andric Value *Callee = CI->getArgOperand(CallbackCalleeOperand); 125781ad6265SDimitry Andric FunctionType *FT = OMPInfoCache.OMPBuilder.ParallelTask; 1258e8d8bef9SDimitry Andric Args.clear(); 1259e8d8bef9SDimitry Andric Args.push_back(OutlinedFn->getArg(0)); 1260e8d8bef9SDimitry Andric Args.push_back(OutlinedFn->getArg(1)); 1261349cc55cSDimitry Andric for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E; 1262349cc55cSDimitry Andric ++U) 1263e8d8bef9SDimitry Andric Args.push_back(CI->getArgOperand(U)); 1264e8d8bef9SDimitry Andric 1265*0fca6ea1SDimitry Andric CallInst *NewCI = 1266*0fca6ea1SDimitry Andric CallInst::Create(FT, Callee, Args, "", CI->getIterator()); 1267e8d8bef9SDimitry Andric if (CI->getDebugLoc()) 1268e8d8bef9SDimitry Andric NewCI->setDebugLoc(CI->getDebugLoc()); 1269e8d8bef9SDimitry Andric 1270e8d8bef9SDimitry Andric // Forward parameter attributes from the callback to the callee. 1271349cc55cSDimitry Andric for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E; 1272349cc55cSDimitry Andric ++U) 1273349cc55cSDimitry Andric for (const Attribute &A : CI->getAttributes().getParamAttrs(U)) 1274e8d8bef9SDimitry Andric NewCI->addParamAttr( 1275e8d8bef9SDimitry Andric U - (CallbackFirstArgOperand - CallbackCalleeOperand), A); 1276e8d8bef9SDimitry Andric 1277e8d8bef9SDimitry Andric // Emit an explicit barrier to replace the implicit fork-join barrier. 1278e8d8bef9SDimitry Andric if (CI != MergableCIs.back()) { 1279e8d8bef9SDimitry Andric // TODO: Remove barrier if the merged parallel region includes the 1280e8d8bef9SDimitry Andric // 'nowait' clause. 1281e8d8bef9SDimitry Andric OMPInfoCache.OMPBuilder.createBarrier( 1282e8d8bef9SDimitry Andric InsertPointTy(NewCI->getParent(), 1283e8d8bef9SDimitry Andric NewCI->getNextNode()->getIterator()), 1284e8d8bef9SDimitry Andric OMPD_parallel); 1285e8d8bef9SDimitry Andric } 1286e8d8bef9SDimitry Andric 1287e8d8bef9SDimitry Andric CI->eraseFromParent(); 1288e8d8bef9SDimitry Andric } 1289e8d8bef9SDimitry Andric 1290e8d8bef9SDimitry Andric assert(OutlinedFn != OriginalFn && "Outlining failed"); 1291e8d8bef9SDimitry Andric CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn); 1292e8d8bef9SDimitry Andric CGUpdater.reanalyzeFunction(*OriginalFn); 1293e8d8bef9SDimitry Andric 1294e8d8bef9SDimitry Andric NumOpenMPParallelRegionsMerged += MergableCIs.size(); 1295e8d8bef9SDimitry Andric 1296e8d8bef9SDimitry Andric return true; 1297e8d8bef9SDimitry Andric }; 1298e8d8bef9SDimitry Andric 1299e8d8bef9SDimitry Andric // Helper function that identifes sequences of 1300e8d8bef9SDimitry Andric // __kmpc_fork_call uses in a basic block. 1301e8d8bef9SDimitry Andric auto DetectPRsCB = [&](Use &U, Function &F) { 1302e8d8bef9SDimitry Andric CallInst *CI = getCallIfRegularCall(U, &RFI); 1303e8d8bef9SDimitry Andric BB2PRMap[CI->getParent()].insert(CI); 1304e8d8bef9SDimitry Andric 1305e8d8bef9SDimitry Andric return false; 1306e8d8bef9SDimitry Andric }; 1307e8d8bef9SDimitry Andric 1308e8d8bef9SDimitry Andric BB2PRMap.clear(); 1309e8d8bef9SDimitry Andric RFI.foreachUse(SCC, DetectPRsCB); 1310e8d8bef9SDimitry Andric SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector; 1311e8d8bef9SDimitry Andric // Find mergable parallel regions within a basic block that are 1312e8d8bef9SDimitry Andric // safe to merge, that is any in-between instructions can safely 1313e8d8bef9SDimitry Andric // execute in parallel after merging. 1314e8d8bef9SDimitry Andric // TODO: support merging across basic-blocks. 1315e8d8bef9SDimitry Andric for (auto &It : BB2PRMap) { 1316e8d8bef9SDimitry Andric auto &CIs = It.getSecond(); 1317e8d8bef9SDimitry Andric if (CIs.size() < 2) 1318e8d8bef9SDimitry Andric continue; 1319e8d8bef9SDimitry Andric 1320e8d8bef9SDimitry Andric BasicBlock *BB = It.getFirst(); 1321e8d8bef9SDimitry Andric SmallVector<CallInst *, 4> MergableCIs; 1322e8d8bef9SDimitry Andric 1323e8d8bef9SDimitry Andric /// Returns true if the instruction is mergable, false otherwise. 1324e8d8bef9SDimitry Andric /// A terminator instruction is unmergable by definition since merging 1325e8d8bef9SDimitry Andric /// works within a BB. Instructions before the mergable region are 1326e8d8bef9SDimitry Andric /// mergable if they are not calls to OpenMP runtime functions that may 1327e8d8bef9SDimitry Andric /// set different execution parameters for subsequent parallel regions. 1328e8d8bef9SDimitry Andric /// Instructions in-between parallel regions are mergable if they are not 1329e8d8bef9SDimitry Andric /// calls to any non-intrinsic function since that may call a non-mergable 1330e8d8bef9SDimitry Andric /// OpenMP runtime function. 1331e8d8bef9SDimitry Andric auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) { 1332e8d8bef9SDimitry Andric // We do not merge across BBs, hence return false (unmergable) if the 1333e8d8bef9SDimitry Andric // instruction is a terminator. 1334e8d8bef9SDimitry Andric if (I.isTerminator()) 1335e8d8bef9SDimitry Andric return false; 1336e8d8bef9SDimitry Andric 1337e8d8bef9SDimitry Andric if (!isa<CallInst>(&I)) 1338e8d8bef9SDimitry Andric return true; 1339e8d8bef9SDimitry Andric 1340e8d8bef9SDimitry Andric CallInst *CI = cast<CallInst>(&I); 1341e8d8bef9SDimitry Andric if (IsBeforeMergableRegion) { 1342e8d8bef9SDimitry Andric Function *CalledFunction = CI->getCalledFunction(); 1343e8d8bef9SDimitry Andric if (!CalledFunction) 1344e8d8bef9SDimitry Andric return false; 1345e8d8bef9SDimitry Andric // Return false (unmergable) if the call before the parallel 1346e8d8bef9SDimitry Andric // region calls an explicit affinity (proc_bind) or number of 1347e8d8bef9SDimitry Andric // threads (num_threads) compiler-generated function. Those settings 1348e8d8bef9SDimitry Andric // may be incompatible with following parallel regions. 1349e8d8bef9SDimitry Andric // TODO: ICV tracking to detect compatibility. 1350e8d8bef9SDimitry Andric for (const auto &RFI : UnmergableCallsInfo) { 1351e8d8bef9SDimitry Andric if (CalledFunction == RFI.Declaration) 1352e8d8bef9SDimitry Andric return false; 1353e8d8bef9SDimitry Andric } 1354e8d8bef9SDimitry Andric } else { 1355e8d8bef9SDimitry Andric // Return false (unmergable) if there is a call instruction 1356e8d8bef9SDimitry Andric // in-between parallel regions when it is not an intrinsic. It 1357e8d8bef9SDimitry Andric // may call an unmergable OpenMP runtime function in its callpath. 1358e8d8bef9SDimitry Andric // TODO: Keep track of possible OpenMP calls in the callpath. 1359e8d8bef9SDimitry Andric if (!isa<IntrinsicInst>(CI)) 1360e8d8bef9SDimitry Andric return false; 1361e8d8bef9SDimitry Andric } 1362e8d8bef9SDimitry Andric 1363e8d8bef9SDimitry Andric return true; 1364e8d8bef9SDimitry Andric }; 1365e8d8bef9SDimitry Andric // Find maximal number of parallel region CIs that are safe to merge. 1366e8d8bef9SDimitry Andric for (auto It = BB->begin(), End = BB->end(); It != End;) { 1367e8d8bef9SDimitry Andric Instruction &I = *It; 1368e8d8bef9SDimitry Andric ++It; 1369e8d8bef9SDimitry Andric 1370e8d8bef9SDimitry Andric if (CIs.count(&I)) { 1371e8d8bef9SDimitry Andric MergableCIs.push_back(cast<CallInst>(&I)); 1372e8d8bef9SDimitry Andric continue; 1373e8d8bef9SDimitry Andric } 1374e8d8bef9SDimitry Andric 1375e8d8bef9SDimitry Andric // Continue expanding if the instruction is mergable. 1376e8d8bef9SDimitry Andric if (IsMergable(I, MergableCIs.empty())) 1377e8d8bef9SDimitry Andric continue; 1378e8d8bef9SDimitry Andric 1379e8d8bef9SDimitry Andric // Forward the instruction iterator to skip the next parallel region 1380e8d8bef9SDimitry Andric // since there is an unmergable instruction which can affect it. 1381e8d8bef9SDimitry Andric for (; It != End; ++It) { 1382e8d8bef9SDimitry Andric Instruction &SkipI = *It; 1383e8d8bef9SDimitry Andric if (CIs.count(&SkipI)) { 1384e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI 1385e8d8bef9SDimitry Andric << " due to " << I << "\n"); 1386e8d8bef9SDimitry Andric ++It; 1387e8d8bef9SDimitry Andric break; 1388e8d8bef9SDimitry Andric } 1389e8d8bef9SDimitry Andric } 1390e8d8bef9SDimitry Andric 1391e8d8bef9SDimitry Andric // Store mergable regions found. 1392e8d8bef9SDimitry Andric if (MergableCIs.size() > 1) { 1393e8d8bef9SDimitry Andric MergableCIsVector.push_back(MergableCIs); 1394e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size() 1395e8d8bef9SDimitry Andric << " parallel regions in block " << BB->getName() 1396e8d8bef9SDimitry Andric << " of function " << BB->getParent()->getName() 1397e8d8bef9SDimitry Andric << "\n";); 1398e8d8bef9SDimitry Andric } 1399e8d8bef9SDimitry Andric 1400e8d8bef9SDimitry Andric MergableCIs.clear(); 1401e8d8bef9SDimitry Andric } 1402e8d8bef9SDimitry Andric 1403e8d8bef9SDimitry Andric if (!MergableCIsVector.empty()) { 1404e8d8bef9SDimitry Andric Changed = true; 1405e8d8bef9SDimitry Andric 1406e8d8bef9SDimitry Andric for (auto &MergableCIs : MergableCIsVector) 1407e8d8bef9SDimitry Andric Merge(MergableCIs, BB); 1408fe6060f1SDimitry Andric MergableCIsVector.clear(); 1409e8d8bef9SDimitry Andric } 1410e8d8bef9SDimitry Andric } 1411e8d8bef9SDimitry Andric 1412e8d8bef9SDimitry Andric if (Changed) { 1413e8d8bef9SDimitry Andric /// Re-collect use for fork calls, emitted barrier calls, and 1414e8d8bef9SDimitry Andric /// any emitted master/end_master calls. 1415e8d8bef9SDimitry Andric OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call); 1416e8d8bef9SDimitry Andric OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier); 1417e8d8bef9SDimitry Andric OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master); 1418e8d8bef9SDimitry Andric OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master); 1419e8d8bef9SDimitry Andric } 1420e8d8bef9SDimitry Andric 1421e8d8bef9SDimitry Andric return Changed; 1422e8d8bef9SDimitry Andric } 1423e8d8bef9SDimitry Andric 14245ffd83dbSDimitry Andric /// Try to delete parallel regions if possible. 14255ffd83dbSDimitry Andric bool deleteParallelRegions() { 14265ffd83dbSDimitry Andric const unsigned CallbackCalleeOperand = 2; 14275ffd83dbSDimitry Andric 14285ffd83dbSDimitry Andric OMPInformationCache::RuntimeFunctionInfo &RFI = 14295ffd83dbSDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; 14305ffd83dbSDimitry Andric 14315ffd83dbSDimitry Andric if (!RFI.Declaration) 14325ffd83dbSDimitry Andric return false; 14335ffd83dbSDimitry Andric 14345ffd83dbSDimitry Andric bool Changed = false; 14355ffd83dbSDimitry Andric auto DeleteCallCB = [&](Use &U, Function &) { 14365ffd83dbSDimitry Andric CallInst *CI = getCallIfRegularCall(U); 14375ffd83dbSDimitry Andric if (!CI) 14385ffd83dbSDimitry Andric return false; 14395ffd83dbSDimitry Andric auto *Fn = dyn_cast<Function>( 14405ffd83dbSDimitry Andric CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); 14415ffd83dbSDimitry Andric if (!Fn) 14425ffd83dbSDimitry Andric return false; 14435ffd83dbSDimitry Andric if (!Fn->onlyReadsMemory()) 14445ffd83dbSDimitry Andric return false; 14455ffd83dbSDimitry Andric if (!Fn->hasFnAttribute(Attribute::WillReturn)) 14465ffd83dbSDimitry Andric return false; 14475ffd83dbSDimitry Andric 14485ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in " 14495ffd83dbSDimitry Andric << CI->getCaller()->getName() << "\n"); 14505ffd83dbSDimitry Andric 14515ffd83dbSDimitry Andric auto Remark = [&](OptimizationRemark OR) { 1452fe6060f1SDimitry Andric return OR << "Removing parallel region with no side-effects."; 14535ffd83dbSDimitry Andric }; 1454fe6060f1SDimitry Andric emitRemark<OptimizationRemark>(CI, "OMP160", Remark); 14555ffd83dbSDimitry Andric 14565ffd83dbSDimitry Andric CI->eraseFromParent(); 14575ffd83dbSDimitry Andric Changed = true; 14585ffd83dbSDimitry Andric ++NumOpenMPParallelRegionsDeleted; 14595ffd83dbSDimitry Andric return true; 14605ffd83dbSDimitry Andric }; 14615ffd83dbSDimitry Andric 14625ffd83dbSDimitry Andric RFI.foreachUse(SCC, DeleteCallCB); 14635ffd83dbSDimitry Andric 14645ffd83dbSDimitry Andric return Changed; 14655ffd83dbSDimitry Andric } 14665ffd83dbSDimitry Andric 14675ffd83dbSDimitry Andric /// Try to eliminate runtime calls by reusing existing ones. 14685ffd83dbSDimitry Andric bool deduplicateRuntimeCalls() { 14695ffd83dbSDimitry Andric bool Changed = false; 14705ffd83dbSDimitry Andric 14715ffd83dbSDimitry Andric RuntimeFunction DeduplicableRuntimeCallIDs[] = { 14725ffd83dbSDimitry Andric OMPRTL_omp_get_num_threads, 14735ffd83dbSDimitry Andric OMPRTL_omp_in_parallel, 14745ffd83dbSDimitry Andric OMPRTL_omp_get_cancellation, 14755ffd83dbSDimitry Andric OMPRTL_omp_get_supported_active_levels, 14765ffd83dbSDimitry Andric OMPRTL_omp_get_level, 14775ffd83dbSDimitry Andric OMPRTL_omp_get_ancestor_thread_num, 14785ffd83dbSDimitry Andric OMPRTL_omp_get_team_size, 14795ffd83dbSDimitry Andric OMPRTL_omp_get_active_level, 14805ffd83dbSDimitry Andric OMPRTL_omp_in_final, 14815ffd83dbSDimitry Andric OMPRTL_omp_get_proc_bind, 14825ffd83dbSDimitry Andric OMPRTL_omp_get_num_places, 14835ffd83dbSDimitry Andric OMPRTL_omp_get_num_procs, 14845ffd83dbSDimitry Andric OMPRTL_omp_get_place_num, 14855ffd83dbSDimitry Andric OMPRTL_omp_get_partition_num_places, 14865ffd83dbSDimitry Andric OMPRTL_omp_get_partition_place_nums}; 14875ffd83dbSDimitry Andric 14885ffd83dbSDimitry Andric // Global-tid is handled separately. 14895ffd83dbSDimitry Andric SmallSetVector<Value *, 16> GTIdArgs; 14905ffd83dbSDimitry Andric collectGlobalThreadIdArguments(GTIdArgs); 14915ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() 14925ffd83dbSDimitry Andric << " global thread ID arguments\n"); 14935ffd83dbSDimitry Andric 14945ffd83dbSDimitry Andric for (Function *F : SCC) { 14955ffd83dbSDimitry Andric for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) 1496e8d8bef9SDimitry Andric Changed |= deduplicateRuntimeCalls( 1497e8d8bef9SDimitry Andric *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]); 14985ffd83dbSDimitry Andric 14995ffd83dbSDimitry Andric // __kmpc_global_thread_num is special as we can replace it with an 15005ffd83dbSDimitry Andric // argument in enough cases to make it worth trying. 15015ffd83dbSDimitry Andric Value *GTIdArg = nullptr; 15025ffd83dbSDimitry Andric for (Argument &Arg : F->args()) 15035ffd83dbSDimitry Andric if (GTIdArgs.count(&Arg)) { 15045ffd83dbSDimitry Andric GTIdArg = &Arg; 15055ffd83dbSDimitry Andric break; 15065ffd83dbSDimitry Andric } 15075ffd83dbSDimitry Andric Changed |= deduplicateRuntimeCalls( 15085ffd83dbSDimitry Andric *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); 15095ffd83dbSDimitry Andric } 15105ffd83dbSDimitry Andric 15115ffd83dbSDimitry Andric return Changed; 15125ffd83dbSDimitry Andric } 15135ffd83dbSDimitry Andric 15145f757f3fSDimitry Andric /// Tries to remove known runtime symbols that are optional from the module. 15155f757f3fSDimitry Andric bool removeRuntimeSymbols() { 15165f757f3fSDimitry Andric // The RPC client symbol is defined in `libc` and indicates that something 15175f757f3fSDimitry Andric // required an RPC server. If its users were all optimized out then we can 15185f757f3fSDimitry Andric // safely remove it. 15195f757f3fSDimitry Andric // TODO: This should be somewhere more common in the future. 15205f757f3fSDimitry Andric if (GlobalVariable *GV = M.getNamedGlobal("__llvm_libc_rpc_client")) { 15215f757f3fSDimitry Andric if (!GV->getType()->isPointerTy()) 15225f757f3fSDimitry Andric return false; 15235f757f3fSDimitry Andric 15245f757f3fSDimitry Andric Constant *C = GV->getInitializer(); 15255f757f3fSDimitry Andric if (!C) 15265f757f3fSDimitry Andric return false; 15275f757f3fSDimitry Andric 15285f757f3fSDimitry Andric // Check to see if the only user of the RPC client is the external handle. 15295f757f3fSDimitry Andric GlobalVariable *Client = dyn_cast<GlobalVariable>(C->stripPointerCasts()); 15305f757f3fSDimitry Andric if (!Client || Client->getNumUses() > 1 || 15315f757f3fSDimitry Andric Client->user_back() != GV->getInitializer()) 15325f757f3fSDimitry Andric return false; 15335f757f3fSDimitry Andric 15345f757f3fSDimitry Andric Client->replaceAllUsesWith(PoisonValue::get(Client->getType())); 15355f757f3fSDimitry Andric Client->eraseFromParent(); 15365f757f3fSDimitry Andric 15375f757f3fSDimitry Andric GV->replaceAllUsesWith(PoisonValue::get(GV->getType())); 15385f757f3fSDimitry Andric GV->eraseFromParent(); 15395f757f3fSDimitry Andric 15405f757f3fSDimitry Andric return true; 15415f757f3fSDimitry Andric } 15425f757f3fSDimitry Andric return false; 15435f757f3fSDimitry Andric } 15445f757f3fSDimitry Andric 1545e8d8bef9SDimitry Andric /// Tries to hide the latency of runtime calls that involve host to 1546e8d8bef9SDimitry Andric /// device memory transfers by splitting them into their "issue" and "wait" 1547e8d8bef9SDimitry Andric /// versions. The "issue" is moved upwards as much as possible. The "wait" is 1548e8d8bef9SDimitry Andric /// moved downards as much as possible. The "issue" issues the memory transfer 1549e8d8bef9SDimitry Andric /// asynchronously, returning a handle. The "wait" waits in the returned 1550e8d8bef9SDimitry Andric /// handle for the memory transfer to finish. 1551e8d8bef9SDimitry Andric bool hideMemTransfersLatency() { 1552e8d8bef9SDimitry Andric auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper]; 1553e8d8bef9SDimitry Andric bool Changed = false; 1554e8d8bef9SDimitry Andric auto SplitMemTransfers = [&](Use &U, Function &Decl) { 1555e8d8bef9SDimitry Andric auto *RTCall = getCallIfRegularCall(U, &RFI); 1556e8d8bef9SDimitry Andric if (!RTCall) 1557e8d8bef9SDimitry Andric return false; 1558e8d8bef9SDimitry Andric 1559e8d8bef9SDimitry Andric OffloadArray OffloadArrays[3]; 1560e8d8bef9SDimitry Andric if (!getValuesInOffloadArrays(*RTCall, OffloadArrays)) 1561e8d8bef9SDimitry Andric return false; 1562e8d8bef9SDimitry Andric 1563e8d8bef9SDimitry Andric LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays)); 1564e8d8bef9SDimitry Andric 1565e8d8bef9SDimitry Andric // TODO: Check if can be moved upwards. 1566e8d8bef9SDimitry Andric bool WasSplit = false; 1567e8d8bef9SDimitry Andric Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall); 1568e8d8bef9SDimitry Andric if (WaitMovementPoint) 1569e8d8bef9SDimitry Andric WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint); 1570e8d8bef9SDimitry Andric 1571e8d8bef9SDimitry Andric Changed |= WasSplit; 1572e8d8bef9SDimitry Andric return WasSplit; 1573e8d8bef9SDimitry Andric }; 15741ac55f4cSDimitry Andric if (OMPInfoCache.runtimeFnsAvailable( 15751ac55f4cSDimitry Andric {OMPRTL___tgt_target_data_begin_mapper_issue, 15761ac55f4cSDimitry Andric OMPRTL___tgt_target_data_begin_mapper_wait})) 1577e8d8bef9SDimitry Andric RFI.foreachUse(SCC, SplitMemTransfers); 1578e8d8bef9SDimitry Andric 1579e8d8bef9SDimitry Andric return Changed; 1580e8d8bef9SDimitry Andric } 1581e8d8bef9SDimitry Andric 1582e8d8bef9SDimitry Andric void analysisGlobalization() { 1583fe6060f1SDimitry Andric auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; 1584e8d8bef9SDimitry Andric 1585e8d8bef9SDimitry Andric auto CheckGlobalization = [&](Use &U, Function &Decl) { 1586e8d8bef9SDimitry Andric if (CallInst *CI = getCallIfRegularCall(U, &RFI)) { 1587fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemarkMissed ORM) { 1588fe6060f1SDimitry Andric return ORM 1589e8d8bef9SDimitry Andric << "Found thread data sharing on the GPU. " 1590e8d8bef9SDimitry Andric << "Expect degraded performance due to data globalization."; 1591e8d8bef9SDimitry Andric }; 1592fe6060f1SDimitry Andric emitRemark<OptimizationRemarkMissed>(CI, "OMP112", Remark); 1593e8d8bef9SDimitry Andric } 1594e8d8bef9SDimitry Andric 1595e8d8bef9SDimitry Andric return false; 1596e8d8bef9SDimitry Andric }; 1597e8d8bef9SDimitry Andric 1598e8d8bef9SDimitry Andric RFI.foreachUse(SCC, CheckGlobalization); 1599e8d8bef9SDimitry Andric } 1600e8d8bef9SDimitry Andric 1601e8d8bef9SDimitry Andric /// Maps the values stored in the offload arrays passed as arguments to 1602e8d8bef9SDimitry Andric /// \p RuntimeCall into the offload arrays in \p OAs. 1603e8d8bef9SDimitry Andric bool getValuesInOffloadArrays(CallInst &RuntimeCall, 1604e8d8bef9SDimitry Andric MutableArrayRef<OffloadArray> OAs) { 1605e8d8bef9SDimitry Andric assert(OAs.size() == 3 && "Need space for three offload arrays!"); 1606e8d8bef9SDimitry Andric 1607e8d8bef9SDimitry Andric // A runtime call that involves memory offloading looks something like: 1608e8d8bef9SDimitry Andric // call void @__tgt_target_data_begin_mapper(arg0, arg1, 1609e8d8bef9SDimitry Andric // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes, 1610e8d8bef9SDimitry Andric // ...) 1611e8d8bef9SDimitry Andric // So, the idea is to access the allocas that allocate space for these 1612e8d8bef9SDimitry Andric // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes. 1613e8d8bef9SDimitry Andric // Therefore: 1614e8d8bef9SDimitry Andric // i8** %offload_baseptrs. 1615e8d8bef9SDimitry Andric Value *BasePtrsArg = 1616e8d8bef9SDimitry Andric RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum); 1617e8d8bef9SDimitry Andric // i8** %offload_ptrs. 1618e8d8bef9SDimitry Andric Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum); 1619e8d8bef9SDimitry Andric // i8** %offload_sizes. 1620e8d8bef9SDimitry Andric Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum); 1621e8d8bef9SDimitry Andric 1622e8d8bef9SDimitry Andric // Get values stored in **offload_baseptrs. 1623e8d8bef9SDimitry Andric auto *V = getUnderlyingObject(BasePtrsArg); 1624e8d8bef9SDimitry Andric if (!isa<AllocaInst>(V)) 1625e8d8bef9SDimitry Andric return false; 1626e8d8bef9SDimitry Andric auto *BasePtrsArray = cast<AllocaInst>(V); 1627e8d8bef9SDimitry Andric if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall)) 1628e8d8bef9SDimitry Andric return false; 1629e8d8bef9SDimitry Andric 1630e8d8bef9SDimitry Andric // Get values stored in **offload_baseptrs. 1631e8d8bef9SDimitry Andric V = getUnderlyingObject(PtrsArg); 1632e8d8bef9SDimitry Andric if (!isa<AllocaInst>(V)) 1633e8d8bef9SDimitry Andric return false; 1634e8d8bef9SDimitry Andric auto *PtrsArray = cast<AllocaInst>(V); 1635e8d8bef9SDimitry Andric if (!OAs[1].initialize(*PtrsArray, RuntimeCall)) 1636e8d8bef9SDimitry Andric return false; 1637e8d8bef9SDimitry Andric 1638e8d8bef9SDimitry Andric // Get values stored in **offload_sizes. 1639e8d8bef9SDimitry Andric V = getUnderlyingObject(SizesArg); 1640e8d8bef9SDimitry Andric // If it's a [constant] global array don't analyze it. 1641e8d8bef9SDimitry Andric if (isa<GlobalValue>(V)) 1642e8d8bef9SDimitry Andric return isa<Constant>(V); 1643e8d8bef9SDimitry Andric if (!isa<AllocaInst>(V)) 1644e8d8bef9SDimitry Andric return false; 1645e8d8bef9SDimitry Andric 1646e8d8bef9SDimitry Andric auto *SizesArray = cast<AllocaInst>(V); 1647e8d8bef9SDimitry Andric if (!OAs[2].initialize(*SizesArray, RuntimeCall)) 1648e8d8bef9SDimitry Andric return false; 1649e8d8bef9SDimitry Andric 1650e8d8bef9SDimitry Andric return true; 1651e8d8bef9SDimitry Andric } 1652e8d8bef9SDimitry Andric 1653e8d8bef9SDimitry Andric /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG. 1654e8d8bef9SDimitry Andric /// For now this is a way to test that the function getValuesInOffloadArrays 1655e8d8bef9SDimitry Andric /// is working properly. 1656e8d8bef9SDimitry Andric /// TODO: Move this to a unittest when unittests are available for OpenMPOpt. 1657e8d8bef9SDimitry Andric void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) { 1658e8d8bef9SDimitry Andric assert(OAs.size() == 3 && "There are three offload arrays to debug!"); 1659e8d8bef9SDimitry Andric 1660e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n"); 1661e8d8bef9SDimitry Andric std::string ValuesStr; 1662e8d8bef9SDimitry Andric raw_string_ostream Printer(ValuesStr); 1663e8d8bef9SDimitry Andric std::string Separator = " --- "; 1664e8d8bef9SDimitry Andric 1665e8d8bef9SDimitry Andric for (auto *BP : OAs[0].StoredValues) { 1666e8d8bef9SDimitry Andric BP->print(Printer); 1667e8d8bef9SDimitry Andric Printer << Separator; 1668e8d8bef9SDimitry Andric } 1669*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << ValuesStr << "\n"); 1670e8d8bef9SDimitry Andric ValuesStr.clear(); 1671e8d8bef9SDimitry Andric 1672e8d8bef9SDimitry Andric for (auto *P : OAs[1].StoredValues) { 1673e8d8bef9SDimitry Andric P->print(Printer); 1674e8d8bef9SDimitry Andric Printer << Separator; 1675e8d8bef9SDimitry Andric } 1676*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << ValuesStr << "\n"); 1677e8d8bef9SDimitry Andric ValuesStr.clear(); 1678e8d8bef9SDimitry Andric 1679e8d8bef9SDimitry Andric for (auto *S : OAs[2].StoredValues) { 1680e8d8bef9SDimitry Andric S->print(Printer); 1681e8d8bef9SDimitry Andric Printer << Separator; 1682e8d8bef9SDimitry Andric } 1683*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << ValuesStr << "\n"); 1684e8d8bef9SDimitry Andric } 1685e8d8bef9SDimitry Andric 1686e8d8bef9SDimitry Andric /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be 1687e8d8bef9SDimitry Andric /// moved. Returns nullptr if the movement is not possible, or not worth it. 1688e8d8bef9SDimitry Andric Instruction *canBeMovedDownwards(CallInst &RuntimeCall) { 1689e8d8bef9SDimitry Andric // FIXME: This traverses only the BasicBlock where RuntimeCall is. 1690e8d8bef9SDimitry Andric // Make it traverse the CFG. 1691e8d8bef9SDimitry Andric 1692e8d8bef9SDimitry Andric Instruction *CurrentI = &RuntimeCall; 1693e8d8bef9SDimitry Andric bool IsWorthIt = false; 1694e8d8bef9SDimitry Andric while ((CurrentI = CurrentI->getNextNode())) { 1695e8d8bef9SDimitry Andric 1696e8d8bef9SDimitry Andric // TODO: Once we detect the regions to be offloaded we should use the 1697e8d8bef9SDimitry Andric // alias analysis manager to check if CurrentI may modify one of 1698e8d8bef9SDimitry Andric // the offloaded regions. 1699e8d8bef9SDimitry Andric if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) { 1700e8d8bef9SDimitry Andric if (IsWorthIt) 1701e8d8bef9SDimitry Andric return CurrentI; 1702e8d8bef9SDimitry Andric 1703e8d8bef9SDimitry Andric return nullptr; 1704e8d8bef9SDimitry Andric } 1705e8d8bef9SDimitry Andric 1706e8d8bef9SDimitry Andric // FIXME: For now if we move it over anything without side effect 1707e8d8bef9SDimitry Andric // is worth it. 1708e8d8bef9SDimitry Andric IsWorthIt = true; 1709e8d8bef9SDimitry Andric } 1710e8d8bef9SDimitry Andric 1711e8d8bef9SDimitry Andric // Return end of BasicBlock. 1712e8d8bef9SDimitry Andric return RuntimeCall.getParent()->getTerminator(); 1713e8d8bef9SDimitry Andric } 1714e8d8bef9SDimitry Andric 1715e8d8bef9SDimitry Andric /// Splits \p RuntimeCall into its "issue" and "wait" counterparts. 1716e8d8bef9SDimitry Andric bool splitTargetDataBeginRTC(CallInst &RuntimeCall, 1717e8d8bef9SDimitry Andric Instruction &WaitMovementPoint) { 1718e8d8bef9SDimitry Andric // Create stack allocated handle (__tgt_async_info) at the beginning of the 1719e8d8bef9SDimitry Andric // function. Used for storing information of the async transfer, allowing to 1720e8d8bef9SDimitry Andric // wait on it later. 1721e8d8bef9SDimitry Andric auto &IRBuilder = OMPInfoCache.OMPBuilder; 1722bdd1243dSDimitry Andric Function *F = RuntimeCall.getCaller(); 1723bdd1243dSDimitry Andric BasicBlock &Entry = F->getEntryBlock(); 1724bdd1243dSDimitry Andric IRBuilder.Builder.SetInsertPoint(&Entry, 1725bdd1243dSDimitry Andric Entry.getFirstNonPHIOrDbgOrAlloca()); 1726bdd1243dSDimitry Andric Value *Handle = IRBuilder.Builder.CreateAlloca( 1727bdd1243dSDimitry Andric IRBuilder.AsyncInfo, /*ArraySize=*/nullptr, "handle"); 1728bdd1243dSDimitry Andric Handle = 1729bdd1243dSDimitry Andric IRBuilder.Builder.CreateAddrSpaceCast(Handle, IRBuilder.AsyncInfoPtr); 1730e8d8bef9SDimitry Andric 1731e8d8bef9SDimitry Andric // Add "issue" runtime call declaration: 1732e8d8bef9SDimitry Andric // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32, 1733e8d8bef9SDimitry Andric // i8**, i8**, i64*, i64*) 1734e8d8bef9SDimitry Andric FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction( 1735e8d8bef9SDimitry Andric M, OMPRTL___tgt_target_data_begin_mapper_issue); 1736e8d8bef9SDimitry Andric 1737e8d8bef9SDimitry Andric // Change RuntimeCall call site for its asynchronous version. 1738e8d8bef9SDimitry Andric SmallVector<Value *, 16> Args; 1739e8d8bef9SDimitry Andric for (auto &Arg : RuntimeCall.args()) 1740e8d8bef9SDimitry Andric Args.push_back(Arg.get()); 1741e8d8bef9SDimitry Andric Args.push_back(Handle); 1742e8d8bef9SDimitry Andric 1743*0fca6ea1SDimitry Andric CallInst *IssueCallsite = CallInst::Create(IssueDecl, Args, /*NameStr=*/"", 1744*0fca6ea1SDimitry Andric RuntimeCall.getIterator()); 174504eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(IssueDecl, IssueCallsite); 1746e8d8bef9SDimitry Andric RuntimeCall.eraseFromParent(); 1747e8d8bef9SDimitry Andric 1748e8d8bef9SDimitry Andric // Add "wait" runtime call declaration: 1749e8d8bef9SDimitry Andric // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info) 1750e8d8bef9SDimitry Andric FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction( 1751e8d8bef9SDimitry Andric M, OMPRTL___tgt_target_data_begin_mapper_wait); 1752e8d8bef9SDimitry Andric 1753e8d8bef9SDimitry Andric Value *WaitParams[2] = { 1754e8d8bef9SDimitry Andric IssueCallsite->getArgOperand( 1755e8d8bef9SDimitry Andric OffloadArray::DeviceIDArgNum), // device_id. 1756e8d8bef9SDimitry Andric Handle // handle to wait on. 1757e8d8bef9SDimitry Andric }; 175804eeddc0SDimitry Andric CallInst *WaitCallsite = CallInst::Create( 1759*0fca6ea1SDimitry Andric WaitDecl, WaitParams, /*NameStr=*/"", WaitMovementPoint.getIterator()); 176004eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(WaitDecl, WaitCallsite); 1761e8d8bef9SDimitry Andric 1762e8d8bef9SDimitry Andric return true; 1763e8d8bef9SDimitry Andric } 1764e8d8bef9SDimitry Andric 17655ffd83dbSDimitry Andric static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, 17665ffd83dbSDimitry Andric bool GlobalOnly, bool &SingleChoice) { 17675ffd83dbSDimitry Andric if (CurrentIdent == NextIdent) 17685ffd83dbSDimitry Andric return CurrentIdent; 17695ffd83dbSDimitry Andric 17705ffd83dbSDimitry Andric // TODO: Figure out how to actually combine multiple debug locations. For 17715ffd83dbSDimitry Andric // now we just keep an existing one if there is a single choice. 17725ffd83dbSDimitry Andric if (!GlobalOnly || isa<GlobalValue>(NextIdent)) { 17735ffd83dbSDimitry Andric SingleChoice = !CurrentIdent; 17745ffd83dbSDimitry Andric return NextIdent; 17755ffd83dbSDimitry Andric } 17765ffd83dbSDimitry Andric return nullptr; 17775ffd83dbSDimitry Andric } 17785ffd83dbSDimitry Andric 17795ffd83dbSDimitry Andric /// Return an `struct ident_t*` value that represents the ones used in the 17805ffd83dbSDimitry Andric /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not 17815ffd83dbSDimitry Andric /// return a local `struct ident_t*`. For now, if we cannot find a suitable 17825ffd83dbSDimitry Andric /// return value we create one from scratch. We also do not yet combine 17835ffd83dbSDimitry Andric /// information, e.g., the source locations, see combinedIdentStruct. 17845ffd83dbSDimitry Andric Value * 17855ffd83dbSDimitry Andric getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI, 17865ffd83dbSDimitry Andric Function &F, bool GlobalOnly) { 17875ffd83dbSDimitry Andric bool SingleChoice = true; 17885ffd83dbSDimitry Andric Value *Ident = nullptr; 17895ffd83dbSDimitry Andric auto CombineIdentStruct = [&](Use &U, Function &Caller) { 17905ffd83dbSDimitry Andric CallInst *CI = getCallIfRegularCall(U, &RFI); 17915ffd83dbSDimitry Andric if (!CI || &F != &Caller) 17925ffd83dbSDimitry Andric return false; 17935ffd83dbSDimitry Andric Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), 17945ffd83dbSDimitry Andric /* GlobalOnly */ true, SingleChoice); 17955ffd83dbSDimitry Andric return false; 17965ffd83dbSDimitry Andric }; 17975ffd83dbSDimitry Andric RFI.foreachUse(SCC, CombineIdentStruct); 17985ffd83dbSDimitry Andric 17995ffd83dbSDimitry Andric if (!Ident || !SingleChoice) { 18005ffd83dbSDimitry Andric // The IRBuilder uses the insertion block to get to the module, this is 18015ffd83dbSDimitry Andric // unfortunate but we work around it for now. 18025ffd83dbSDimitry Andric if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock()) 18035ffd83dbSDimitry Andric OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( 18045ffd83dbSDimitry Andric &F.getEntryBlock(), F.getEntryBlock().begin())); 18055ffd83dbSDimitry Andric // Create a fallback location if non was found. 18065ffd83dbSDimitry Andric // TODO: Use the debug locations of the calls instead. 180704eeddc0SDimitry Andric uint32_t SrcLocStrSize; 180804eeddc0SDimitry Andric Constant *Loc = 180904eeddc0SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); 181004eeddc0SDimitry Andric Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc, SrcLocStrSize); 18115ffd83dbSDimitry Andric } 18125ffd83dbSDimitry Andric return Ident; 18135ffd83dbSDimitry Andric } 18145ffd83dbSDimitry Andric 18155ffd83dbSDimitry Andric /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or 18165ffd83dbSDimitry Andric /// \p ReplVal if given. 18175ffd83dbSDimitry Andric bool deduplicateRuntimeCalls(Function &F, 18185ffd83dbSDimitry Andric OMPInformationCache::RuntimeFunctionInfo &RFI, 18195ffd83dbSDimitry Andric Value *ReplVal = nullptr) { 18205ffd83dbSDimitry Andric auto *UV = RFI.getUseVector(F); 18215ffd83dbSDimitry Andric if (!UV || UV->size() + (ReplVal != nullptr) < 2) 18225ffd83dbSDimitry Andric return false; 18235ffd83dbSDimitry Andric 18245ffd83dbSDimitry Andric LLVM_DEBUG( 18255ffd83dbSDimitry Andric dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name 18265ffd83dbSDimitry Andric << (ReplVal ? " with an existing value\n" : "\n") << "\n"); 18275ffd83dbSDimitry Andric 18285ffd83dbSDimitry Andric assert((!ReplVal || (isa<Argument>(ReplVal) && 18295ffd83dbSDimitry Andric cast<Argument>(ReplVal)->getParent() == &F)) && 18305ffd83dbSDimitry Andric "Unexpected replacement value!"); 18315ffd83dbSDimitry Andric 18325ffd83dbSDimitry Andric // TODO: Use dominance to find a good position instead. 18335ffd83dbSDimitry Andric auto CanBeMoved = [this](CallBase &CB) { 1834349cc55cSDimitry Andric unsigned NumArgs = CB.arg_size(); 18355ffd83dbSDimitry Andric if (NumArgs == 0) 18365ffd83dbSDimitry Andric return true; 18375ffd83dbSDimitry Andric if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr) 18385ffd83dbSDimitry Andric return false; 1839349cc55cSDimitry Andric for (unsigned U = 1; U < NumArgs; ++U) 1840349cc55cSDimitry Andric if (isa<Instruction>(CB.getArgOperand(U))) 18415ffd83dbSDimitry Andric return false; 18425ffd83dbSDimitry Andric return true; 18435ffd83dbSDimitry Andric }; 18445ffd83dbSDimitry Andric 18455ffd83dbSDimitry Andric if (!ReplVal) { 184606c3fb27SDimitry Andric auto *DT = 184706c3fb27SDimitry Andric OMPInfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(F); 184806c3fb27SDimitry Andric if (!DT) 184906c3fb27SDimitry Andric return false; 185006c3fb27SDimitry Andric Instruction *IP = nullptr; 185106c3fb27SDimitry Andric for (Use *U : *UV) { 18525ffd83dbSDimitry Andric if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { 185306c3fb27SDimitry Andric if (IP) 185406c3fb27SDimitry Andric IP = DT->findNearestCommonDominator(IP, CI); 185506c3fb27SDimitry Andric else 185606c3fb27SDimitry Andric IP = CI; 18575ffd83dbSDimitry Andric if (!CanBeMoved(*CI)) 18585ffd83dbSDimitry Andric continue; 185906c3fb27SDimitry Andric if (!ReplVal) 18605ffd83dbSDimitry Andric ReplVal = CI; 186106c3fb27SDimitry Andric } 18625ffd83dbSDimitry Andric } 18635ffd83dbSDimitry Andric if (!ReplVal) 18645ffd83dbSDimitry Andric return false; 186506c3fb27SDimitry Andric assert(IP && "Expected insertion point!"); 186606c3fb27SDimitry Andric cast<Instruction>(ReplVal)->moveBefore(IP); 18675ffd83dbSDimitry Andric } 18685ffd83dbSDimitry Andric 18695ffd83dbSDimitry Andric // If we use a call as a replacement value we need to make sure the ident is 18705ffd83dbSDimitry Andric // valid at the new location. For now we just pick a global one, either 18715ffd83dbSDimitry Andric // existing and used by one of the calls, or created from scratch. 18725ffd83dbSDimitry Andric if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) { 1873349cc55cSDimitry Andric if (!CI->arg_empty() && 18745ffd83dbSDimitry Andric CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) { 18755ffd83dbSDimitry Andric Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, 18765ffd83dbSDimitry Andric /* GlobalOnly */ true); 18775ffd83dbSDimitry Andric CI->setArgOperand(0, Ident); 18785ffd83dbSDimitry Andric } 18795ffd83dbSDimitry Andric } 18805ffd83dbSDimitry Andric 18815ffd83dbSDimitry Andric bool Changed = false; 18825ffd83dbSDimitry Andric auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { 18835ffd83dbSDimitry Andric CallInst *CI = getCallIfRegularCall(U, &RFI); 18845ffd83dbSDimitry Andric if (!CI || CI == ReplVal || &F != &Caller) 18855ffd83dbSDimitry Andric return false; 18865ffd83dbSDimitry Andric assert(CI->getCaller() == &F && "Unexpected call!"); 18875ffd83dbSDimitry Andric 18885ffd83dbSDimitry Andric auto Remark = [&](OptimizationRemark OR) { 18895ffd83dbSDimitry Andric return OR << "OpenMP runtime call " 1890fe6060f1SDimitry Andric << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated."; 18915ffd83dbSDimitry Andric }; 1892fe6060f1SDimitry Andric if (CI->getDebugLoc()) 1893fe6060f1SDimitry Andric emitRemark<OptimizationRemark>(CI, "OMP170", Remark); 1894fe6060f1SDimitry Andric else 1895fe6060f1SDimitry Andric emitRemark<OptimizationRemark>(&F, "OMP170", Remark); 18965ffd83dbSDimitry Andric 18975ffd83dbSDimitry Andric CI->replaceAllUsesWith(ReplVal); 18985ffd83dbSDimitry Andric CI->eraseFromParent(); 18995ffd83dbSDimitry Andric ++NumOpenMPRuntimeCallsDeduplicated; 19005ffd83dbSDimitry Andric Changed = true; 19015ffd83dbSDimitry Andric return true; 19025ffd83dbSDimitry Andric }; 19035ffd83dbSDimitry Andric RFI.foreachUse(SCC, ReplaceAndDeleteCB); 19045ffd83dbSDimitry Andric 19055ffd83dbSDimitry Andric return Changed; 19065ffd83dbSDimitry Andric } 19075ffd83dbSDimitry Andric 19085ffd83dbSDimitry Andric /// Collect arguments that represent the global thread id in \p GTIdArgs. 19095ffd83dbSDimitry Andric void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) { 19105ffd83dbSDimitry Andric // TODO: Below we basically perform a fixpoint iteration with a pessimistic 19115ffd83dbSDimitry Andric // initialization. We could define an AbstractAttribute instead and 19125ffd83dbSDimitry Andric // run the Attributor here once it can be run as an SCC pass. 19135ffd83dbSDimitry Andric 19145ffd83dbSDimitry Andric // Helper to check the argument \p ArgNo at all call sites of \p F for 19155ffd83dbSDimitry Andric // a GTId. 19165ffd83dbSDimitry Andric auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { 19175ffd83dbSDimitry Andric if (!F.hasLocalLinkage()) 19185ffd83dbSDimitry Andric return false; 19195ffd83dbSDimitry Andric for (Use &U : F.uses()) { 19205ffd83dbSDimitry Andric if (CallInst *CI = getCallIfRegularCall(U)) { 19215ffd83dbSDimitry Andric Value *ArgOp = CI->getArgOperand(ArgNo); 19225ffd83dbSDimitry Andric if (CI == &RefCI || GTIdArgs.count(ArgOp) || 19235ffd83dbSDimitry Andric getCallIfRegularCall( 19245ffd83dbSDimitry Andric *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num])) 19255ffd83dbSDimitry Andric continue; 19265ffd83dbSDimitry Andric } 19275ffd83dbSDimitry Andric return false; 19285ffd83dbSDimitry Andric } 19295ffd83dbSDimitry Andric return true; 19305ffd83dbSDimitry Andric }; 19315ffd83dbSDimitry Andric 19325ffd83dbSDimitry Andric // Helper to identify uses of a GTId as GTId arguments. 19335ffd83dbSDimitry Andric auto AddUserArgs = [&](Value >Id) { 19345ffd83dbSDimitry Andric for (Use &U : GTId.uses()) 19355ffd83dbSDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(U.getUser())) 19365ffd83dbSDimitry Andric if (CI->isArgOperand(&U)) 19375ffd83dbSDimitry Andric if (Function *Callee = CI->getCalledFunction()) 19385ffd83dbSDimitry Andric if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) 19395ffd83dbSDimitry Andric GTIdArgs.insert(Callee->getArg(U.getOperandNo())); 19405ffd83dbSDimitry Andric }; 19415ffd83dbSDimitry Andric 19425ffd83dbSDimitry Andric // The argument users of __kmpc_global_thread_num calls are GTIds. 19435ffd83dbSDimitry Andric OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = 19445ffd83dbSDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; 19455ffd83dbSDimitry Andric 19465ffd83dbSDimitry Andric GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) { 19475ffd83dbSDimitry Andric if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) 19485ffd83dbSDimitry Andric AddUserArgs(*CI); 19495ffd83dbSDimitry Andric return false; 19505ffd83dbSDimitry Andric }); 19515ffd83dbSDimitry Andric 19525ffd83dbSDimitry Andric // Transitively search for more arguments by looking at the users of the 19535ffd83dbSDimitry Andric // ones we know already. During the search the GTIdArgs vector is extended 19545ffd83dbSDimitry Andric // so we cannot cache the size nor can we use a range based for. 1955349cc55cSDimitry Andric for (unsigned U = 0; U < GTIdArgs.size(); ++U) 1956349cc55cSDimitry Andric AddUserArgs(*GTIdArgs[U]); 19575ffd83dbSDimitry Andric } 19585ffd83dbSDimitry Andric 19595ffd83dbSDimitry Andric /// Kernel (=GPU) optimizations and utility functions 19605ffd83dbSDimitry Andric /// 19615ffd83dbSDimitry Andric ///{{ 19625ffd83dbSDimitry Andric 19635ffd83dbSDimitry Andric /// Cache to remember the unique kernel for a function. 1964bdd1243dSDimitry Andric DenseMap<Function *, std::optional<Kernel>> UniqueKernelMap; 19655ffd83dbSDimitry Andric 19665ffd83dbSDimitry Andric /// Find the unique kernel that will execute \p F, if any. 19675ffd83dbSDimitry Andric Kernel getUniqueKernelFor(Function &F); 19685ffd83dbSDimitry Andric 19695ffd83dbSDimitry Andric /// Find the unique kernel that will execute \p I, if any. 19705ffd83dbSDimitry Andric Kernel getUniqueKernelFor(Instruction &I) { 19715ffd83dbSDimitry Andric return getUniqueKernelFor(*I.getFunction()); 19725ffd83dbSDimitry Andric } 19735ffd83dbSDimitry Andric 19745ffd83dbSDimitry Andric /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in 19755ffd83dbSDimitry Andric /// the cases we can avoid taking the address of a function. 19765ffd83dbSDimitry Andric bool rewriteDeviceCodeStateMachine(); 19775ffd83dbSDimitry Andric 19785ffd83dbSDimitry Andric /// 19795ffd83dbSDimitry Andric ///}} 19805ffd83dbSDimitry Andric 19815ffd83dbSDimitry Andric /// Emit a remark generically 19825ffd83dbSDimitry Andric /// 19835ffd83dbSDimitry Andric /// This template function can be used to generically emit a remark. The 19845ffd83dbSDimitry Andric /// RemarkKind should be one of the following: 19855ffd83dbSDimitry Andric /// - OptimizationRemark to indicate a successful optimization attempt 19865ffd83dbSDimitry Andric /// - OptimizationRemarkMissed to report a failed optimization attempt 19875ffd83dbSDimitry Andric /// - OptimizationRemarkAnalysis to provide additional information about an 19885ffd83dbSDimitry Andric /// optimization attempt 19895ffd83dbSDimitry Andric /// 19905ffd83dbSDimitry Andric /// The remark is built using a callback function provided by the caller that 19915ffd83dbSDimitry Andric /// takes a RemarkKind as input and returns a RemarkKind. 1992fe6060f1SDimitry Andric template <typename RemarkKind, typename RemarkCallBack> 1993fe6060f1SDimitry Andric void emitRemark(Instruction *I, StringRef RemarkName, 19945ffd83dbSDimitry Andric RemarkCallBack &&RemarkCB) const { 1995fe6060f1SDimitry Andric Function *F = I->getParent()->getParent(); 19965ffd83dbSDimitry Andric auto &ORE = OREGetter(F); 19975ffd83dbSDimitry Andric 19985f757f3fSDimitry Andric if (RemarkName.starts_with("OMP")) 19995ffd83dbSDimitry Andric ORE.emit([&]() { 2000fe6060f1SDimitry Andric return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)) 2001fe6060f1SDimitry Andric << " [" << RemarkName << "]"; 20025ffd83dbSDimitry Andric }); 2003fe6060f1SDimitry Andric else 2004fe6060f1SDimitry Andric ORE.emit( 2005fe6060f1SDimitry Andric [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)); }); 20065ffd83dbSDimitry Andric } 20075ffd83dbSDimitry Andric 2008fe6060f1SDimitry Andric /// Emit a remark on a function. 2009fe6060f1SDimitry Andric template <typename RemarkKind, typename RemarkCallBack> 2010fe6060f1SDimitry Andric void emitRemark(Function *F, StringRef RemarkName, 2011fe6060f1SDimitry Andric RemarkCallBack &&RemarkCB) const { 2012fe6060f1SDimitry Andric auto &ORE = OREGetter(F); 2013fe6060f1SDimitry Andric 20145f757f3fSDimitry Andric if (RemarkName.starts_with("OMP")) 2015fe6060f1SDimitry Andric ORE.emit([&]() { 2016fe6060f1SDimitry Andric return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)) 2017fe6060f1SDimitry Andric << " [" << RemarkName << "]"; 2018fe6060f1SDimitry Andric }); 2019fe6060f1SDimitry Andric else 2020fe6060f1SDimitry Andric ORE.emit( 2021fe6060f1SDimitry Andric [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)); }); 2022fe6060f1SDimitry Andric } 2023fe6060f1SDimitry Andric 20245ffd83dbSDimitry Andric /// The underlying module. 20255ffd83dbSDimitry Andric Module &M; 20265ffd83dbSDimitry Andric 20275ffd83dbSDimitry Andric /// The SCC we are operating on. 20285ffd83dbSDimitry Andric SmallVectorImpl<Function *> &SCC; 20295ffd83dbSDimitry Andric 20305ffd83dbSDimitry Andric /// Callback to update the call graph, the first argument is a removed call, 20315ffd83dbSDimitry Andric /// the second an optional replacement call. 20325ffd83dbSDimitry Andric CallGraphUpdater &CGUpdater; 20335ffd83dbSDimitry Andric 20345ffd83dbSDimitry Andric /// Callback to get an OptimizationRemarkEmitter from a Function * 20355ffd83dbSDimitry Andric OptimizationRemarkGetter OREGetter; 20365ffd83dbSDimitry Andric 20375ffd83dbSDimitry Andric /// OpenMP-specific information cache. Also Used for Attributor runs. 20385ffd83dbSDimitry Andric OMPInformationCache &OMPInfoCache; 20395ffd83dbSDimitry Andric 20405ffd83dbSDimitry Andric /// Attributor instance. 20415ffd83dbSDimitry Andric Attributor &A; 20425ffd83dbSDimitry Andric 20435ffd83dbSDimitry Andric /// Helper function to run Attributor on SCC. 2044fe6060f1SDimitry Andric bool runAttributor(bool IsModulePass) { 20455ffd83dbSDimitry Andric if (SCC.empty()) 20465ffd83dbSDimitry Andric return false; 20475ffd83dbSDimitry Andric 2048fe6060f1SDimitry Andric registerAAs(IsModulePass); 20495ffd83dbSDimitry Andric 20505ffd83dbSDimitry Andric ChangeStatus Changed = A.run(); 20515ffd83dbSDimitry Andric 20525ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size() 20535ffd83dbSDimitry Andric << " functions, result: " << Changed << ".\n"); 20545ffd83dbSDimitry Andric 2055cb14a3feSDimitry Andric if (Changed == ChangeStatus::CHANGED) 2056cb14a3feSDimitry Andric OMPInfoCache.invalidateAnalyses(); 2057cb14a3feSDimitry Andric 20585ffd83dbSDimitry Andric return Changed == ChangeStatus::CHANGED; 20595ffd83dbSDimitry Andric } 20605ffd83dbSDimitry Andric 2061fe6060f1SDimitry Andric void registerFoldRuntimeCall(RuntimeFunction RF); 2062fe6060f1SDimitry Andric 20635ffd83dbSDimitry Andric /// Populate the Attributor with abstract attribute opportunities in the 2064bdd1243dSDimitry Andric /// functions. 2065fe6060f1SDimitry Andric void registerAAs(bool IsModulePass); 2066bdd1243dSDimitry Andric 2067bdd1243dSDimitry Andric public: 2068bdd1243dSDimitry Andric /// Callback to register AAs for live functions, including internal functions 2069bdd1243dSDimitry Andric /// marked live during the traversal. 2070bdd1243dSDimitry Andric static void registerAAsForFunction(Attributor &A, const Function &F); 20715ffd83dbSDimitry Andric }; 20725ffd83dbSDimitry Andric 20735ffd83dbSDimitry Andric Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { 207406c3fb27SDimitry Andric if (OMPInfoCache.CGSCC && !OMPInfoCache.CGSCC->empty() && 207506c3fb27SDimitry Andric !OMPInfoCache.CGSCC->contains(&F)) 20765ffd83dbSDimitry Andric return nullptr; 20775ffd83dbSDimitry Andric 20785ffd83dbSDimitry Andric // Use a scope to keep the lifetime of the CachedKernel short. 20795ffd83dbSDimitry Andric { 2080bdd1243dSDimitry Andric std::optional<Kernel> &CachedKernel = UniqueKernelMap[&F]; 20815ffd83dbSDimitry Andric if (CachedKernel) 20825ffd83dbSDimitry Andric return *CachedKernel; 20835ffd83dbSDimitry Andric 20845ffd83dbSDimitry Andric // TODO: We should use an AA to create an (optimistic and callback 20855ffd83dbSDimitry Andric // call-aware) call graph. For now we stick to simple patterns that 20865ffd83dbSDimitry Andric // are less powerful, basically the worst fixpoint. 20875f757f3fSDimitry Andric if (isOpenMPKernel(F)) { 20885ffd83dbSDimitry Andric CachedKernel = Kernel(&F); 20895ffd83dbSDimitry Andric return *CachedKernel; 20905ffd83dbSDimitry Andric } 20915ffd83dbSDimitry Andric 20925ffd83dbSDimitry Andric CachedKernel = nullptr; 2093e8d8bef9SDimitry Andric if (!F.hasLocalLinkage()) { 2094e8d8bef9SDimitry Andric 2095e8d8bef9SDimitry Andric // See https://openmp.llvm.org/remarks/OptimizationRemarks.html 2096fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) { 2097fe6060f1SDimitry Andric return ORA << "Potentially unknown OpenMP target region caller."; 2098e8d8bef9SDimitry Andric }; 2099fe6060f1SDimitry Andric emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark); 2100e8d8bef9SDimitry Andric 21015ffd83dbSDimitry Andric return nullptr; 21025ffd83dbSDimitry Andric } 2103e8d8bef9SDimitry Andric } 21045ffd83dbSDimitry Andric 21055ffd83dbSDimitry Andric auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel { 21065ffd83dbSDimitry Andric if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) { 21075ffd83dbSDimitry Andric // Allow use in equality comparisons. 21085ffd83dbSDimitry Andric if (Cmp->isEquality()) 21095ffd83dbSDimitry Andric return getUniqueKernelFor(*Cmp); 21105ffd83dbSDimitry Andric return nullptr; 21115ffd83dbSDimitry Andric } 21125ffd83dbSDimitry Andric if (auto *CB = dyn_cast<CallBase>(U.getUser())) { 21135ffd83dbSDimitry Andric // Allow direct calls. 21145ffd83dbSDimitry Andric if (CB->isCallee(&U)) 21155ffd83dbSDimitry Andric return getUniqueKernelFor(*CB); 2116fe6060f1SDimitry Andric 2117fe6060f1SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI = 2118fe6060f1SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; 2119fe6060f1SDimitry Andric // Allow the use in __kmpc_parallel_51 calls. 2120fe6060f1SDimitry Andric if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI)) 21215ffd83dbSDimitry Andric return getUniqueKernelFor(*CB); 21225ffd83dbSDimitry Andric return nullptr; 21235ffd83dbSDimitry Andric } 21245ffd83dbSDimitry Andric // Disallow every other use. 21255ffd83dbSDimitry Andric return nullptr; 21265ffd83dbSDimitry Andric }; 21275ffd83dbSDimitry Andric 21285ffd83dbSDimitry Andric // TODO: In the future we want to track more than just a unique kernel. 21295ffd83dbSDimitry Andric SmallPtrSet<Kernel, 2> PotentialKernels; 2130e8d8bef9SDimitry Andric OMPInformationCache::foreachUse(F, [&](const Use &U) { 21315ffd83dbSDimitry Andric PotentialKernels.insert(GetUniqueKernelForUse(U)); 21325ffd83dbSDimitry Andric }); 21335ffd83dbSDimitry Andric 21345ffd83dbSDimitry Andric Kernel K = nullptr; 21355ffd83dbSDimitry Andric if (PotentialKernels.size() == 1) 21365ffd83dbSDimitry Andric K = *PotentialKernels.begin(); 21375ffd83dbSDimitry Andric 21385ffd83dbSDimitry Andric // Cache the result. 21395ffd83dbSDimitry Andric UniqueKernelMap[&F] = K; 21405ffd83dbSDimitry Andric 21415ffd83dbSDimitry Andric return K; 21425ffd83dbSDimitry Andric } 21435ffd83dbSDimitry Andric 21445ffd83dbSDimitry Andric bool OpenMPOpt::rewriteDeviceCodeStateMachine() { 2145fe6060f1SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI = 2146fe6060f1SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; 21475ffd83dbSDimitry Andric 21485ffd83dbSDimitry Andric bool Changed = false; 2149fe6060f1SDimitry Andric if (!KernelParallelRFI) 21505ffd83dbSDimitry Andric return Changed; 21515ffd83dbSDimitry Andric 2152349cc55cSDimitry Andric // If we have disabled state machine changes, exit 2153349cc55cSDimitry Andric if (DisableOpenMPOptStateMachineRewrite) 2154349cc55cSDimitry Andric return Changed; 2155349cc55cSDimitry Andric 21565ffd83dbSDimitry Andric for (Function *F : SCC) { 21575ffd83dbSDimitry Andric 2158fe6060f1SDimitry Andric // Check if the function is a use in a __kmpc_parallel_51 call at 21595ffd83dbSDimitry Andric // all. 21605ffd83dbSDimitry Andric bool UnknownUse = false; 2161fe6060f1SDimitry Andric bool KernelParallelUse = false; 21625ffd83dbSDimitry Andric unsigned NumDirectCalls = 0; 21635ffd83dbSDimitry Andric 21645ffd83dbSDimitry Andric SmallVector<Use *, 2> ToBeReplacedStateMachineUses; 2165e8d8bef9SDimitry Andric OMPInformationCache::foreachUse(*F, [&](Use &U) { 21665ffd83dbSDimitry Andric if (auto *CB = dyn_cast<CallBase>(U.getUser())) 21675ffd83dbSDimitry Andric if (CB->isCallee(&U)) { 21685ffd83dbSDimitry Andric ++NumDirectCalls; 21695ffd83dbSDimitry Andric return; 21705ffd83dbSDimitry Andric } 21715ffd83dbSDimitry Andric 21725ffd83dbSDimitry Andric if (isa<ICmpInst>(U.getUser())) { 21735ffd83dbSDimitry Andric ToBeReplacedStateMachineUses.push_back(&U); 21745ffd83dbSDimitry Andric return; 21755ffd83dbSDimitry Andric } 2176fe6060f1SDimitry Andric 2177fe6060f1SDimitry Andric // Find wrapper functions that represent parallel kernels. 2178fe6060f1SDimitry Andric CallInst *CI = 2179fe6060f1SDimitry Andric OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI); 2180fe6060f1SDimitry Andric const unsigned int WrapperFunctionArgNo = 6; 2181fe6060f1SDimitry Andric if (!KernelParallelUse && CI && 2182fe6060f1SDimitry Andric CI->getArgOperandNo(&U) == WrapperFunctionArgNo) { 2183fe6060f1SDimitry Andric KernelParallelUse = true; 21845ffd83dbSDimitry Andric ToBeReplacedStateMachineUses.push_back(&U); 21855ffd83dbSDimitry Andric return; 21865ffd83dbSDimitry Andric } 21875ffd83dbSDimitry Andric UnknownUse = true; 21885ffd83dbSDimitry Andric }); 21895ffd83dbSDimitry Andric 2190fe6060f1SDimitry Andric // Do not emit a remark if we haven't seen a __kmpc_parallel_51 21915ffd83dbSDimitry Andric // use. 2192fe6060f1SDimitry Andric if (!KernelParallelUse) 21935ffd83dbSDimitry Andric continue; 21945ffd83dbSDimitry Andric 21955ffd83dbSDimitry Andric // If this ever hits, we should investigate. 21965ffd83dbSDimitry Andric // TODO: Checking the number of uses is not a necessary restriction and 21975ffd83dbSDimitry Andric // should be lifted. 21985ffd83dbSDimitry Andric if (UnknownUse || NumDirectCalls != 1 || 2199fe6060f1SDimitry Andric ToBeReplacedStateMachineUses.size() > 2) { 2200fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) { 2201fe6060f1SDimitry Andric return ORA << "Parallel region is used in " 22025ffd83dbSDimitry Andric << (UnknownUse ? "unknown" : "unexpected") 2203fe6060f1SDimitry Andric << " ways. Will not attempt to rewrite the state machine."; 22045ffd83dbSDimitry Andric }; 2205fe6060f1SDimitry Andric emitRemark<OptimizationRemarkAnalysis>(F, "OMP101", Remark); 22065ffd83dbSDimitry Andric continue; 22075ffd83dbSDimitry Andric } 22085ffd83dbSDimitry Andric 2209fe6060f1SDimitry Andric // Even if we have __kmpc_parallel_51 calls, we (for now) give 22105ffd83dbSDimitry Andric // up if the function is not called from a unique kernel. 22115ffd83dbSDimitry Andric Kernel K = getUniqueKernelFor(*F); 22125ffd83dbSDimitry Andric if (!K) { 2213fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) { 2214fe6060f1SDimitry Andric return ORA << "Parallel region is not called from a unique kernel. " 2215fe6060f1SDimitry Andric "Will not attempt to rewrite the state machine."; 22165ffd83dbSDimitry Andric }; 2217fe6060f1SDimitry Andric emitRemark<OptimizationRemarkAnalysis>(F, "OMP102", Remark); 22185ffd83dbSDimitry Andric continue; 22195ffd83dbSDimitry Andric } 22205ffd83dbSDimitry Andric 22215ffd83dbSDimitry Andric // We now know F is a parallel body function called only from the kernel K. 22225ffd83dbSDimitry Andric // We also identified the state machine uses in which we replace the 22235ffd83dbSDimitry Andric // function pointer by a new global symbol for identification purposes. This 22245ffd83dbSDimitry Andric // ensures only direct calls to the function are left. 22255ffd83dbSDimitry Andric 22265ffd83dbSDimitry Andric Module &M = *F->getParent(); 22275ffd83dbSDimitry Andric Type *Int8Ty = Type::getInt8Ty(M.getContext()); 22285ffd83dbSDimitry Andric 22295ffd83dbSDimitry Andric auto *ID = new GlobalVariable( 22305ffd83dbSDimitry Andric M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage, 22315ffd83dbSDimitry Andric UndefValue::get(Int8Ty), F->getName() + ".ID"); 22325ffd83dbSDimitry Andric 22335ffd83dbSDimitry Andric for (Use *U : ToBeReplacedStateMachineUses) 22348c6f6c0cSDimitry Andric U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast( 22358c6f6c0cSDimitry Andric ID, U->get()->getType())); 22365ffd83dbSDimitry Andric 22375ffd83dbSDimitry Andric ++NumOpenMPParallelRegionsReplacedInGPUStateMachine; 22385ffd83dbSDimitry Andric 22395ffd83dbSDimitry Andric Changed = true; 22405ffd83dbSDimitry Andric } 22415ffd83dbSDimitry Andric 22425ffd83dbSDimitry Andric return Changed; 22435ffd83dbSDimitry Andric } 22445ffd83dbSDimitry Andric 22455ffd83dbSDimitry Andric /// Abstract Attribute for tracking ICV values. 22465ffd83dbSDimitry Andric struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> { 22475ffd83dbSDimitry Andric using Base = StateWrapper<BooleanState, AbstractAttribute>; 22485ffd83dbSDimitry Andric AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 22495ffd83dbSDimitry Andric 22505ffd83dbSDimitry Andric /// Returns true if value is assumed to be tracked. 22515ffd83dbSDimitry Andric bool isAssumedTracked() const { return getAssumed(); } 22525ffd83dbSDimitry Andric 22535ffd83dbSDimitry Andric /// Returns true if value is known to be tracked. 22545ffd83dbSDimitry Andric bool isKnownTracked() const { return getAssumed(); } 22555ffd83dbSDimitry Andric 22565ffd83dbSDimitry Andric /// Create an abstract attribute biew for the position \p IRP. 22575ffd83dbSDimitry Andric static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A); 22585ffd83dbSDimitry Andric 22595ffd83dbSDimitry Andric /// Return the value with which \p I can be replaced for specific \p ICV. 2260bdd1243dSDimitry Andric virtual std::optional<Value *> getReplacementValue(InternalControlVar ICV, 2261e8d8bef9SDimitry Andric const Instruction *I, 2262e8d8bef9SDimitry Andric Attributor &A) const { 2263bdd1243dSDimitry Andric return std::nullopt; 2264e8d8bef9SDimitry Andric } 2265e8d8bef9SDimitry Andric 2266e8d8bef9SDimitry Andric /// Return an assumed unique ICV value if a single candidate is found. If 2267bdd1243dSDimitry Andric /// there cannot be one, return a nullptr. If it is not clear yet, return 2268bdd1243dSDimitry Andric /// std::nullopt. 2269bdd1243dSDimitry Andric virtual std::optional<Value *> 2270e8d8bef9SDimitry Andric getUniqueReplacementValue(InternalControlVar ICV) const = 0; 2271e8d8bef9SDimitry Andric 2272e8d8bef9SDimitry Andric // Currently only nthreads is being tracked. 2273e8d8bef9SDimitry Andric // this array will only grow with time. 2274e8d8bef9SDimitry Andric InternalControlVar TrackableICVs[1] = {ICV_nthreads}; 22755ffd83dbSDimitry Andric 22765ffd83dbSDimitry Andric /// See AbstractAttribute::getName() 22775ffd83dbSDimitry Andric const std::string getName() const override { return "AAICVTracker"; } 22785ffd83dbSDimitry Andric 22795ffd83dbSDimitry Andric /// See AbstractAttribute::getIdAddr() 22805ffd83dbSDimitry Andric const char *getIdAddr() const override { return &ID; } 22815ffd83dbSDimitry Andric 22825ffd83dbSDimitry Andric /// This function should return true if the type of the \p AA is AAICVTracker 22835ffd83dbSDimitry Andric static bool classof(const AbstractAttribute *AA) { 22845ffd83dbSDimitry Andric return (AA->getIdAddr() == &ID); 22855ffd83dbSDimitry Andric } 22865ffd83dbSDimitry Andric 22875ffd83dbSDimitry Andric static const char ID; 22885ffd83dbSDimitry Andric }; 22895ffd83dbSDimitry Andric 22905ffd83dbSDimitry Andric struct AAICVTrackerFunction : public AAICVTracker { 22915ffd83dbSDimitry Andric AAICVTrackerFunction(const IRPosition &IRP, Attributor &A) 22925ffd83dbSDimitry Andric : AAICVTracker(IRP, A) {} 22935ffd83dbSDimitry Andric 22945ffd83dbSDimitry Andric // FIXME: come up with better string. 229506c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 229606c3fb27SDimitry Andric return "ICVTrackerFunction"; 229706c3fb27SDimitry Andric } 22985ffd83dbSDimitry Andric 22995ffd83dbSDimitry Andric // FIXME: come up with some stats. 23005ffd83dbSDimitry Andric void trackStatistics() const override {} 23015ffd83dbSDimitry Andric 2302e8d8bef9SDimitry Andric /// We don't manifest anything for this AA. 23035ffd83dbSDimitry Andric ChangeStatus manifest(Attributor &A) override { 2304e8d8bef9SDimitry Andric return ChangeStatus::UNCHANGED; 23055ffd83dbSDimitry Andric } 23065ffd83dbSDimitry Andric 23075ffd83dbSDimitry Andric // Map of ICV to their values at specific program point. 2308e8d8bef9SDimitry Andric EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar, 23095ffd83dbSDimitry Andric InternalControlVar::ICV___last> 2310e8d8bef9SDimitry Andric ICVReplacementValuesMap; 23115ffd83dbSDimitry Andric 23125ffd83dbSDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 23135ffd83dbSDimitry Andric ChangeStatus HasChanged = ChangeStatus::UNCHANGED; 23145ffd83dbSDimitry Andric 23155ffd83dbSDimitry Andric Function *F = getAnchorScope(); 23165ffd83dbSDimitry Andric 23175ffd83dbSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 23185ffd83dbSDimitry Andric 23195ffd83dbSDimitry Andric for (InternalControlVar ICV : TrackableICVs) { 23205ffd83dbSDimitry Andric auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; 23215ffd83dbSDimitry Andric 2322e8d8bef9SDimitry Andric auto &ValuesMap = ICVReplacementValuesMap[ICV]; 23235ffd83dbSDimitry Andric auto TrackValues = [&](Use &U, Function &) { 23245ffd83dbSDimitry Andric CallInst *CI = OpenMPOpt::getCallIfRegularCall(U); 23255ffd83dbSDimitry Andric if (!CI) 23265ffd83dbSDimitry Andric return false; 23275ffd83dbSDimitry Andric 23285ffd83dbSDimitry Andric // FIXME: handle setters with more that 1 arguments. 23295ffd83dbSDimitry Andric /// Track new value. 2330e8d8bef9SDimitry Andric if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second) 23315ffd83dbSDimitry Andric HasChanged = ChangeStatus::CHANGED; 23325ffd83dbSDimitry Andric 23335ffd83dbSDimitry Andric return false; 23345ffd83dbSDimitry Andric }; 23355ffd83dbSDimitry Andric 2336e8d8bef9SDimitry Andric auto CallCheck = [&](Instruction &I) { 2337bdd1243dSDimitry Andric std::optional<Value *> ReplVal = getValueForCall(A, I, ICV); 233881ad6265SDimitry Andric if (ReplVal && ValuesMap.insert(std::make_pair(&I, *ReplVal)).second) 2339e8d8bef9SDimitry Andric HasChanged = ChangeStatus::CHANGED; 2340e8d8bef9SDimitry Andric 2341e8d8bef9SDimitry Andric return true; 2342e8d8bef9SDimitry Andric }; 2343e8d8bef9SDimitry Andric 2344e8d8bef9SDimitry Andric // Track all changes of an ICV. 23455ffd83dbSDimitry Andric SetterRFI.foreachUse(TrackValues, F); 2346e8d8bef9SDimitry Andric 2347fe6060f1SDimitry Andric bool UsedAssumedInformation = false; 2348e8d8bef9SDimitry Andric A.checkForAllInstructions(CallCheck, *this, {Instruction::Call}, 2349fe6060f1SDimitry Andric UsedAssumedInformation, 2350e8d8bef9SDimitry Andric /* CheckBBLivenessOnly */ true); 2351e8d8bef9SDimitry Andric 2352e8d8bef9SDimitry Andric /// TODO: Figure out a way to avoid adding entry in 2353e8d8bef9SDimitry Andric /// ICVReplacementValuesMap 2354e8d8bef9SDimitry Andric Instruction *Entry = &F->getEntryBlock().front(); 2355e8d8bef9SDimitry Andric if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry)) 2356e8d8bef9SDimitry Andric ValuesMap.insert(std::make_pair(Entry, nullptr)); 23575ffd83dbSDimitry Andric } 23585ffd83dbSDimitry Andric 23595ffd83dbSDimitry Andric return HasChanged; 23605ffd83dbSDimitry Andric } 23615ffd83dbSDimitry Andric 236204eeddc0SDimitry Andric /// Helper to check if \p I is a call and get the value for it if it is 2363e8d8bef9SDimitry Andric /// unique. 2364bdd1243dSDimitry Andric std::optional<Value *> getValueForCall(Attributor &A, const Instruction &I, 2365e8d8bef9SDimitry Andric InternalControlVar &ICV) const { 23665ffd83dbSDimitry Andric 236704eeddc0SDimitry Andric const auto *CB = dyn_cast<CallBase>(&I); 2368e8d8bef9SDimitry Andric if (!CB || CB->hasFnAttr("no_openmp") || 2369e8d8bef9SDimitry Andric CB->hasFnAttr("no_openmp_routines")) 2370bdd1243dSDimitry Andric return std::nullopt; 2371e8d8bef9SDimitry Andric 23725ffd83dbSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 23735ffd83dbSDimitry Andric auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter]; 2374e8d8bef9SDimitry Andric auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; 2375e8d8bef9SDimitry Andric Function *CalledFunction = CB->getCalledFunction(); 23765ffd83dbSDimitry Andric 2377e8d8bef9SDimitry Andric // Indirect call, assume ICV changes. 2378e8d8bef9SDimitry Andric if (CalledFunction == nullptr) 2379e8d8bef9SDimitry Andric return nullptr; 2380e8d8bef9SDimitry Andric if (CalledFunction == GetterRFI.Declaration) 2381bdd1243dSDimitry Andric return std::nullopt; 2382e8d8bef9SDimitry Andric if (CalledFunction == SetterRFI.Declaration) { 238304eeddc0SDimitry Andric if (ICVReplacementValuesMap[ICV].count(&I)) 238404eeddc0SDimitry Andric return ICVReplacementValuesMap[ICV].lookup(&I); 2385e8d8bef9SDimitry Andric 2386e8d8bef9SDimitry Andric return nullptr; 2387e8d8bef9SDimitry Andric } 2388e8d8bef9SDimitry Andric 2389e8d8bef9SDimitry Andric // Since we don't know, assume it changes the ICV. 2390e8d8bef9SDimitry Andric if (CalledFunction->isDeclaration()) 2391e8d8bef9SDimitry Andric return nullptr; 2392e8d8bef9SDimitry Andric 239306c3fb27SDimitry Andric const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>( 2394fe6060f1SDimitry Andric *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED); 2395e8d8bef9SDimitry Andric 239606c3fb27SDimitry Andric if (ICVTrackingAA->isAssumedTracked()) { 239706c3fb27SDimitry Andric std::optional<Value *> URV = 239806c3fb27SDimitry Andric ICVTrackingAA->getUniqueReplacementValue(ICV); 239981ad6265SDimitry Andric if (!URV || (*URV && AA::isValidAtPosition(AA::ValueAndContext(**URV, I), 240081ad6265SDimitry Andric OMPInfoCache))) 240104eeddc0SDimitry Andric return URV; 240204eeddc0SDimitry Andric } 2403e8d8bef9SDimitry Andric 2404e8d8bef9SDimitry Andric // If we don't know, assume it changes. 2405e8d8bef9SDimitry Andric return nullptr; 2406e8d8bef9SDimitry Andric } 2407e8d8bef9SDimitry Andric 2408bdd1243dSDimitry Andric // We don't check unique value for a function, so return std::nullopt. 2409bdd1243dSDimitry Andric std::optional<Value *> 2410e8d8bef9SDimitry Andric getUniqueReplacementValue(InternalControlVar ICV) const override { 2411bdd1243dSDimitry Andric return std::nullopt; 2412e8d8bef9SDimitry Andric } 2413e8d8bef9SDimitry Andric 2414e8d8bef9SDimitry Andric /// Return the value with which \p I can be replaced for specific \p ICV. 2415bdd1243dSDimitry Andric std::optional<Value *> getReplacementValue(InternalControlVar ICV, 2416e8d8bef9SDimitry Andric const Instruction *I, 2417e8d8bef9SDimitry Andric Attributor &A) const override { 2418e8d8bef9SDimitry Andric const auto &ValuesMap = ICVReplacementValuesMap[ICV]; 2419e8d8bef9SDimitry Andric if (ValuesMap.count(I)) 2420e8d8bef9SDimitry Andric return ValuesMap.lookup(I); 2421e8d8bef9SDimitry Andric 2422e8d8bef9SDimitry Andric SmallVector<const Instruction *, 16> Worklist; 2423e8d8bef9SDimitry Andric SmallPtrSet<const Instruction *, 16> Visited; 2424e8d8bef9SDimitry Andric Worklist.push_back(I); 2425e8d8bef9SDimitry Andric 2426bdd1243dSDimitry Andric std::optional<Value *> ReplVal; 2427e8d8bef9SDimitry Andric 2428e8d8bef9SDimitry Andric while (!Worklist.empty()) { 2429e8d8bef9SDimitry Andric const Instruction *CurrInst = Worklist.pop_back_val(); 2430e8d8bef9SDimitry Andric if (!Visited.insert(CurrInst).second) 24315ffd83dbSDimitry Andric continue; 24325ffd83dbSDimitry Andric 2433e8d8bef9SDimitry Andric const BasicBlock *CurrBB = CurrInst->getParent(); 2434e8d8bef9SDimitry Andric 2435e8d8bef9SDimitry Andric // Go up and look for all potential setters/calls that might change the 2436e8d8bef9SDimitry Andric // ICV. 2437e8d8bef9SDimitry Andric while ((CurrInst = CurrInst->getPrevNode())) { 2438e8d8bef9SDimitry Andric if (ValuesMap.count(CurrInst)) { 2439bdd1243dSDimitry Andric std::optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst); 2440e8d8bef9SDimitry Andric // Unknown value, track new. 244181ad6265SDimitry Andric if (!ReplVal) { 2442e8d8bef9SDimitry Andric ReplVal = NewReplVal; 2443e8d8bef9SDimitry Andric break; 2444e8d8bef9SDimitry Andric } 2445e8d8bef9SDimitry Andric 2446e8d8bef9SDimitry Andric // If we found a new value, we can't know the icv value anymore. 244781ad6265SDimitry Andric if (NewReplVal) 2448e8d8bef9SDimitry Andric if (ReplVal != NewReplVal) 24495ffd83dbSDimitry Andric return nullptr; 24505ffd83dbSDimitry Andric 2451e8d8bef9SDimitry Andric break; 24525ffd83dbSDimitry Andric } 24535ffd83dbSDimitry Andric 2454bdd1243dSDimitry Andric std::optional<Value *> NewReplVal = getValueForCall(A, *CurrInst, ICV); 245581ad6265SDimitry Andric if (!NewReplVal) 2456e8d8bef9SDimitry Andric continue; 2457e8d8bef9SDimitry Andric 2458e8d8bef9SDimitry Andric // Unknown value, track new. 245981ad6265SDimitry Andric if (!ReplVal) { 2460e8d8bef9SDimitry Andric ReplVal = NewReplVal; 2461e8d8bef9SDimitry Andric break; 24625ffd83dbSDimitry Andric } 24635ffd83dbSDimitry Andric 2464e8d8bef9SDimitry Andric // if (NewReplVal.hasValue()) 2465e8d8bef9SDimitry Andric // We found a new value, we can't know the icv value anymore. 2466e8d8bef9SDimitry Andric if (ReplVal != NewReplVal) 24675ffd83dbSDimitry Andric return nullptr; 24685ffd83dbSDimitry Andric } 2469e8d8bef9SDimitry Andric 2470e8d8bef9SDimitry Andric // If we are in the same BB and we have a value, we are done. 247181ad6265SDimitry Andric if (CurrBB == I->getParent() && ReplVal) 2472e8d8bef9SDimitry Andric return ReplVal; 2473e8d8bef9SDimitry Andric 2474e8d8bef9SDimitry Andric // Go through all predecessors and add terminators for analysis. 2475e8d8bef9SDimitry Andric for (const BasicBlock *Pred : predecessors(CurrBB)) 2476e8d8bef9SDimitry Andric if (const Instruction *Terminator = Pred->getTerminator()) 2477e8d8bef9SDimitry Andric Worklist.push_back(Terminator); 2478e8d8bef9SDimitry Andric } 2479e8d8bef9SDimitry Andric 2480e8d8bef9SDimitry Andric return ReplVal; 2481e8d8bef9SDimitry Andric } 2482e8d8bef9SDimitry Andric }; 2483e8d8bef9SDimitry Andric 2484e8d8bef9SDimitry Andric struct AAICVTrackerFunctionReturned : AAICVTracker { 2485e8d8bef9SDimitry Andric AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A) 2486e8d8bef9SDimitry Andric : AAICVTracker(IRP, A) {} 2487e8d8bef9SDimitry Andric 2488e8d8bef9SDimitry Andric // FIXME: come up with better string. 248906c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 2490e8d8bef9SDimitry Andric return "ICVTrackerFunctionReturned"; 2491e8d8bef9SDimitry Andric } 2492e8d8bef9SDimitry Andric 2493e8d8bef9SDimitry Andric // FIXME: come up with some stats. 2494e8d8bef9SDimitry Andric void trackStatistics() const override {} 2495e8d8bef9SDimitry Andric 2496e8d8bef9SDimitry Andric /// We don't manifest anything for this AA. 2497e8d8bef9SDimitry Andric ChangeStatus manifest(Attributor &A) override { 2498e8d8bef9SDimitry Andric return ChangeStatus::UNCHANGED; 2499e8d8bef9SDimitry Andric } 2500e8d8bef9SDimitry Andric 2501e8d8bef9SDimitry Andric // Map of ICV to their values at specific program point. 2502bdd1243dSDimitry Andric EnumeratedArray<std::optional<Value *>, InternalControlVar, 2503e8d8bef9SDimitry Andric InternalControlVar::ICV___last> 2504e8d8bef9SDimitry Andric ICVReplacementValuesMap; 2505e8d8bef9SDimitry Andric 2506e8d8bef9SDimitry Andric /// Return the value with which \p I can be replaced for specific \p ICV. 2507bdd1243dSDimitry Andric std::optional<Value *> 2508e8d8bef9SDimitry Andric getUniqueReplacementValue(InternalControlVar ICV) const override { 2509e8d8bef9SDimitry Andric return ICVReplacementValuesMap[ICV]; 2510e8d8bef9SDimitry Andric } 2511e8d8bef9SDimitry Andric 2512e8d8bef9SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 2513e8d8bef9SDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED; 251406c3fb27SDimitry Andric const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>( 2515fe6060f1SDimitry Andric *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); 2516e8d8bef9SDimitry Andric 251706c3fb27SDimitry Andric if (!ICVTrackingAA->isAssumedTracked()) 2518e8d8bef9SDimitry Andric return indicatePessimisticFixpoint(); 2519e8d8bef9SDimitry Andric 2520e8d8bef9SDimitry Andric for (InternalControlVar ICV : TrackableICVs) { 2521bdd1243dSDimitry Andric std::optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; 2522bdd1243dSDimitry Andric std::optional<Value *> UniqueICVValue; 2523e8d8bef9SDimitry Andric 2524e8d8bef9SDimitry Andric auto CheckReturnInst = [&](Instruction &I) { 2525bdd1243dSDimitry Andric std::optional<Value *> NewReplVal = 252606c3fb27SDimitry Andric ICVTrackingAA->getReplacementValue(ICV, &I, A); 2527e8d8bef9SDimitry Andric 2528e8d8bef9SDimitry Andric // If we found a second ICV value there is no unique returned value. 252981ad6265SDimitry Andric if (UniqueICVValue && UniqueICVValue != NewReplVal) 2530e8d8bef9SDimitry Andric return false; 2531e8d8bef9SDimitry Andric 2532e8d8bef9SDimitry Andric UniqueICVValue = NewReplVal; 2533e8d8bef9SDimitry Andric 2534e8d8bef9SDimitry Andric return true; 2535e8d8bef9SDimitry Andric }; 2536e8d8bef9SDimitry Andric 2537fe6060f1SDimitry Andric bool UsedAssumedInformation = false; 2538e8d8bef9SDimitry Andric if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret}, 2539fe6060f1SDimitry Andric UsedAssumedInformation, 2540e8d8bef9SDimitry Andric /* CheckBBLivenessOnly */ true)) 2541e8d8bef9SDimitry Andric UniqueICVValue = nullptr; 2542e8d8bef9SDimitry Andric 2543e8d8bef9SDimitry Andric if (UniqueICVValue == ReplVal) 2544e8d8bef9SDimitry Andric continue; 2545e8d8bef9SDimitry Andric 2546e8d8bef9SDimitry Andric ReplVal = UniqueICVValue; 2547e8d8bef9SDimitry Andric Changed = ChangeStatus::CHANGED; 2548e8d8bef9SDimitry Andric } 2549e8d8bef9SDimitry Andric 2550e8d8bef9SDimitry Andric return Changed; 2551e8d8bef9SDimitry Andric } 2552e8d8bef9SDimitry Andric }; 2553e8d8bef9SDimitry Andric 2554e8d8bef9SDimitry Andric struct AAICVTrackerCallSite : AAICVTracker { 2555e8d8bef9SDimitry Andric AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A) 2556e8d8bef9SDimitry Andric : AAICVTracker(IRP, A) {} 2557e8d8bef9SDimitry Andric 2558e8d8bef9SDimitry Andric void initialize(Attributor &A) override { 255906c3fb27SDimitry Andric assert(getAnchorScope() && "Expected anchor function"); 2560e8d8bef9SDimitry Andric 2561e8d8bef9SDimitry Andric // We only initialize this AA for getters, so we need to know which ICV it 2562e8d8bef9SDimitry Andric // gets. 2563e8d8bef9SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 2564e8d8bef9SDimitry Andric for (InternalControlVar ICV : TrackableICVs) { 2565e8d8bef9SDimitry Andric auto ICVInfo = OMPInfoCache.ICVs[ICV]; 2566e8d8bef9SDimitry Andric auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter]; 2567e8d8bef9SDimitry Andric if (Getter.Declaration == getAssociatedFunction()) { 2568e8d8bef9SDimitry Andric AssociatedICV = ICVInfo.Kind; 2569e8d8bef9SDimitry Andric return; 2570e8d8bef9SDimitry Andric } 2571e8d8bef9SDimitry Andric } 2572e8d8bef9SDimitry Andric 2573e8d8bef9SDimitry Andric /// Unknown ICV. 2574e8d8bef9SDimitry Andric indicatePessimisticFixpoint(); 2575e8d8bef9SDimitry Andric } 2576e8d8bef9SDimitry Andric 2577e8d8bef9SDimitry Andric ChangeStatus manifest(Attributor &A) override { 257881ad6265SDimitry Andric if (!ReplVal || !*ReplVal) 2579e8d8bef9SDimitry Andric return ChangeStatus::UNCHANGED; 2580e8d8bef9SDimitry Andric 258181ad6265SDimitry Andric A.changeAfterManifest(IRPosition::inst(*getCtxI()), **ReplVal); 2582e8d8bef9SDimitry Andric A.deleteAfterManifest(*getCtxI()); 2583e8d8bef9SDimitry Andric 2584e8d8bef9SDimitry Andric return ChangeStatus::CHANGED; 2585e8d8bef9SDimitry Andric } 2586e8d8bef9SDimitry Andric 2587e8d8bef9SDimitry Andric // FIXME: come up with better string. 258806c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 258906c3fb27SDimitry Andric return "ICVTrackerCallSite"; 259006c3fb27SDimitry Andric } 2591e8d8bef9SDimitry Andric 2592e8d8bef9SDimitry Andric // FIXME: come up with some stats. 2593e8d8bef9SDimitry Andric void trackStatistics() const override {} 2594e8d8bef9SDimitry Andric 2595e8d8bef9SDimitry Andric InternalControlVar AssociatedICV; 2596bdd1243dSDimitry Andric std::optional<Value *> ReplVal; 2597e8d8bef9SDimitry Andric 2598e8d8bef9SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 259906c3fb27SDimitry Andric const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>( 2600fe6060f1SDimitry Andric *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); 2601e8d8bef9SDimitry Andric 2602e8d8bef9SDimitry Andric // We don't have any information, so we assume it changes the ICV. 260306c3fb27SDimitry Andric if (!ICVTrackingAA->isAssumedTracked()) 2604e8d8bef9SDimitry Andric return indicatePessimisticFixpoint(); 2605e8d8bef9SDimitry Andric 2606bdd1243dSDimitry Andric std::optional<Value *> NewReplVal = 260706c3fb27SDimitry Andric ICVTrackingAA->getReplacementValue(AssociatedICV, getCtxI(), A); 2608e8d8bef9SDimitry Andric 2609e8d8bef9SDimitry Andric if (ReplVal == NewReplVal) 2610e8d8bef9SDimitry Andric return ChangeStatus::UNCHANGED; 2611e8d8bef9SDimitry Andric 2612e8d8bef9SDimitry Andric ReplVal = NewReplVal; 2613e8d8bef9SDimitry Andric return ChangeStatus::CHANGED; 2614e8d8bef9SDimitry Andric } 2615e8d8bef9SDimitry Andric 2616e8d8bef9SDimitry Andric // Return the value with which associated value can be replaced for specific 2617e8d8bef9SDimitry Andric // \p ICV. 2618bdd1243dSDimitry Andric std::optional<Value *> 2619e8d8bef9SDimitry Andric getUniqueReplacementValue(InternalControlVar ICV) const override { 2620e8d8bef9SDimitry Andric return ReplVal; 2621e8d8bef9SDimitry Andric } 2622e8d8bef9SDimitry Andric }; 2623e8d8bef9SDimitry Andric 2624e8d8bef9SDimitry Andric struct AAICVTrackerCallSiteReturned : AAICVTracker { 2625e8d8bef9SDimitry Andric AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A) 2626e8d8bef9SDimitry Andric : AAICVTracker(IRP, A) {} 2627e8d8bef9SDimitry Andric 2628e8d8bef9SDimitry Andric // FIXME: come up with better string. 262906c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 2630e8d8bef9SDimitry Andric return "ICVTrackerCallSiteReturned"; 2631e8d8bef9SDimitry Andric } 2632e8d8bef9SDimitry Andric 2633e8d8bef9SDimitry Andric // FIXME: come up with some stats. 2634e8d8bef9SDimitry Andric void trackStatistics() const override {} 2635e8d8bef9SDimitry Andric 2636e8d8bef9SDimitry Andric /// We don't manifest anything for this AA. 2637e8d8bef9SDimitry Andric ChangeStatus manifest(Attributor &A) override { 2638e8d8bef9SDimitry Andric return ChangeStatus::UNCHANGED; 2639e8d8bef9SDimitry Andric } 2640e8d8bef9SDimitry Andric 2641e8d8bef9SDimitry Andric // Map of ICV to their values at specific program point. 2642bdd1243dSDimitry Andric EnumeratedArray<std::optional<Value *>, InternalControlVar, 2643e8d8bef9SDimitry Andric InternalControlVar::ICV___last> 2644e8d8bef9SDimitry Andric ICVReplacementValuesMap; 2645e8d8bef9SDimitry Andric 2646e8d8bef9SDimitry Andric /// Return the value with which associated value can be replaced for specific 2647e8d8bef9SDimitry Andric /// \p ICV. 2648bdd1243dSDimitry Andric std::optional<Value *> 2649e8d8bef9SDimitry Andric getUniqueReplacementValue(InternalControlVar ICV) const override { 2650e8d8bef9SDimitry Andric return ICVReplacementValuesMap[ICV]; 2651e8d8bef9SDimitry Andric } 2652e8d8bef9SDimitry Andric 2653e8d8bef9SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 2654e8d8bef9SDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED; 265506c3fb27SDimitry Andric const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>( 2656fe6060f1SDimitry Andric *this, IRPosition::returned(*getAssociatedFunction()), 2657fe6060f1SDimitry Andric DepClassTy::REQUIRED); 2658e8d8bef9SDimitry Andric 2659e8d8bef9SDimitry Andric // We don't have any information, so we assume it changes the ICV. 266006c3fb27SDimitry Andric if (!ICVTrackingAA->isAssumedTracked()) 2661e8d8bef9SDimitry Andric return indicatePessimisticFixpoint(); 2662e8d8bef9SDimitry Andric 2663e8d8bef9SDimitry Andric for (InternalControlVar ICV : TrackableICVs) { 2664bdd1243dSDimitry Andric std::optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; 2665bdd1243dSDimitry Andric std::optional<Value *> NewReplVal = 266606c3fb27SDimitry Andric ICVTrackingAA->getUniqueReplacementValue(ICV); 2667e8d8bef9SDimitry Andric 2668e8d8bef9SDimitry Andric if (ReplVal == NewReplVal) 2669e8d8bef9SDimitry Andric continue; 2670e8d8bef9SDimitry Andric 2671e8d8bef9SDimitry Andric ReplVal = NewReplVal; 2672e8d8bef9SDimitry Andric Changed = ChangeStatus::CHANGED; 2673e8d8bef9SDimitry Andric } 2674e8d8bef9SDimitry Andric return Changed; 2675e8d8bef9SDimitry Andric } 26765ffd83dbSDimitry Andric }; 2677fe6060f1SDimitry Andric 26785f757f3fSDimitry Andric /// Determines if \p BB exits the function unconditionally itself or reaches a 26795f757f3fSDimitry Andric /// block that does through only unique successors. 26805f757f3fSDimitry Andric static bool hasFunctionEndAsUniqueSuccessor(const BasicBlock *BB) { 26815f757f3fSDimitry Andric if (succ_empty(BB)) 26825f757f3fSDimitry Andric return true; 26835f757f3fSDimitry Andric const BasicBlock *const Successor = BB->getUniqueSuccessor(); 26845f757f3fSDimitry Andric if (!Successor) 26855f757f3fSDimitry Andric return false; 26865f757f3fSDimitry Andric return hasFunctionEndAsUniqueSuccessor(Successor); 26875f757f3fSDimitry Andric } 26885f757f3fSDimitry Andric 2689fe6060f1SDimitry Andric struct AAExecutionDomainFunction : public AAExecutionDomain { 2690fe6060f1SDimitry Andric AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A) 2691fe6060f1SDimitry Andric : AAExecutionDomain(IRP, A) {} 2692fe6060f1SDimitry Andric 269306c3fb27SDimitry Andric ~AAExecutionDomainFunction() { delete RPOT; } 2694bdd1243dSDimitry Andric 2695bdd1243dSDimitry Andric void initialize(Attributor &A) override { 269606c3fb27SDimitry Andric Function *F = getAnchorScope(); 269706c3fb27SDimitry Andric assert(F && "Expected anchor function"); 269806c3fb27SDimitry Andric RPOT = new ReversePostOrderTraversal<Function *>(F); 2699bdd1243dSDimitry Andric } 2700bdd1243dSDimitry Andric 270106c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 270206c3fb27SDimitry Andric unsigned TotalBlocks = 0, InitialThreadBlocks = 0, AlignedBlocks = 0; 2703bdd1243dSDimitry Andric for (auto &It : BEDMap) { 270406c3fb27SDimitry Andric if (!It.getFirst()) 270506c3fb27SDimitry Andric continue; 2706bdd1243dSDimitry Andric TotalBlocks++; 2707bdd1243dSDimitry Andric InitialThreadBlocks += It.getSecond().IsExecutedByInitialThreadOnly; 270806c3fb27SDimitry Andric AlignedBlocks += It.getSecond().IsReachedFromAlignedBarrierOnly && 270906c3fb27SDimitry Andric It.getSecond().IsReachingAlignedBarrierOnly; 2710bdd1243dSDimitry Andric } 2711bdd1243dSDimitry Andric return "[AAExecutionDomain] " + std::to_string(InitialThreadBlocks) + "/" + 271206c3fb27SDimitry Andric std::to_string(AlignedBlocks) + " of " + 271306c3fb27SDimitry Andric std::to_string(TotalBlocks) + 271406c3fb27SDimitry Andric " executed by initial thread / aligned"; 2715fe6060f1SDimitry Andric } 2716fe6060f1SDimitry Andric 2717fe6060f1SDimitry Andric /// See AbstractAttribute::trackStatistics(). 2718fe6060f1SDimitry Andric void trackStatistics() const override {} 2719fe6060f1SDimitry Andric 2720fe6060f1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 2721fe6060f1SDimitry Andric LLVM_DEBUG({ 2722bdd1243dSDimitry Andric for (const BasicBlock &BB : *getAnchorScope()) { 2723bdd1243dSDimitry Andric if (!isExecutedByInitialThreadOnly(BB)) 2724bdd1243dSDimitry Andric continue; 2725fe6060f1SDimitry Andric dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " " 2726bdd1243dSDimitry Andric << BB.getName() << " is executed by a single thread.\n"; 2727bdd1243dSDimitry Andric } 2728fe6060f1SDimitry Andric }); 2729bdd1243dSDimitry Andric 2730bdd1243dSDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED; 2731bdd1243dSDimitry Andric 2732bdd1243dSDimitry Andric if (DisableOpenMPOptBarrierElimination) 2733bdd1243dSDimitry Andric return Changed; 2734bdd1243dSDimitry Andric 2735bdd1243dSDimitry Andric SmallPtrSet<CallBase *, 16> DeletedBarriers; 2736bdd1243dSDimitry Andric auto HandleAlignedBarrier = [&](CallBase *CB) { 273706c3fb27SDimitry Andric const ExecutionDomainTy &ED = CB ? CEDMap[{CB, PRE}] : BEDMap[nullptr]; 2738bdd1243dSDimitry Andric if (!ED.IsReachedFromAlignedBarrierOnly || 2739bdd1243dSDimitry Andric ED.EncounteredNonLocalSideEffect) 2740bdd1243dSDimitry Andric return; 27415f757f3fSDimitry Andric if (!ED.EncounteredAssumes.empty() && !A.isModulePass()) 27425f757f3fSDimitry Andric return; 2743bdd1243dSDimitry Andric 27445f757f3fSDimitry Andric // We can remove this barrier, if it is one, or aligned barriers reaching 27455f757f3fSDimitry Andric // the kernel end (if CB is nullptr). Aligned barriers reaching the kernel 27465f757f3fSDimitry Andric // end should only be removed if the kernel end is their unique successor; 27475f757f3fSDimitry Andric // otherwise, they may have side-effects that aren't accounted for in the 27485f757f3fSDimitry Andric // kernel end in their other successors. If those barriers have other 27495f757f3fSDimitry Andric // barriers reaching them, those can be transitively removed as well as 27505f757f3fSDimitry Andric // long as the kernel end is also their unique successor. 2751bdd1243dSDimitry Andric if (CB) { 2752bdd1243dSDimitry Andric DeletedBarriers.insert(CB); 2753bdd1243dSDimitry Andric A.deleteAfterManifest(*CB); 2754bdd1243dSDimitry Andric ++NumBarriersEliminated; 2755bdd1243dSDimitry Andric Changed = ChangeStatus::CHANGED; 2756bdd1243dSDimitry Andric } else if (!ED.AlignedBarriers.empty()) { 2757bdd1243dSDimitry Andric Changed = ChangeStatus::CHANGED; 2758bdd1243dSDimitry Andric SmallVector<CallBase *> Worklist(ED.AlignedBarriers.begin(), 2759bdd1243dSDimitry Andric ED.AlignedBarriers.end()); 2760bdd1243dSDimitry Andric SmallSetVector<CallBase *, 16> Visited; 2761bdd1243dSDimitry Andric while (!Worklist.empty()) { 2762bdd1243dSDimitry Andric CallBase *LastCB = Worklist.pop_back_val(); 2763bdd1243dSDimitry Andric if (!Visited.insert(LastCB)) 2764bdd1243dSDimitry Andric continue; 276506c3fb27SDimitry Andric if (LastCB->getFunction() != getAnchorScope()) 276606c3fb27SDimitry Andric continue; 27675f757f3fSDimitry Andric if (!hasFunctionEndAsUniqueSuccessor(LastCB->getParent())) 27685f757f3fSDimitry Andric continue; 2769bdd1243dSDimitry Andric if (!DeletedBarriers.count(LastCB)) { 27705f757f3fSDimitry Andric ++NumBarriersEliminated; 2771bdd1243dSDimitry Andric A.deleteAfterManifest(*LastCB); 2772bdd1243dSDimitry Andric continue; 2773bdd1243dSDimitry Andric } 2774bdd1243dSDimitry Andric // The final aligned barrier (LastCB) reaching the kernel end was 2775bdd1243dSDimitry Andric // removed already. This means we can go one step further and remove 2776bdd1243dSDimitry Andric // the barriers encoutered last before (LastCB). 277706c3fb27SDimitry Andric const ExecutionDomainTy &LastED = CEDMap[{LastCB, PRE}]; 2778bdd1243dSDimitry Andric Worklist.append(LastED.AlignedBarriers.begin(), 2779bdd1243dSDimitry Andric LastED.AlignedBarriers.end()); 2780bdd1243dSDimitry Andric } 2781fe6060f1SDimitry Andric } 2782fe6060f1SDimitry Andric 2783bdd1243dSDimitry Andric // If we actually eliminated a barrier we need to eliminate the associated 2784bdd1243dSDimitry Andric // llvm.assumes as well to avoid creating UB. 2785bdd1243dSDimitry Andric if (!ED.EncounteredAssumes.empty() && (CB || !ED.AlignedBarriers.empty())) 2786bdd1243dSDimitry Andric for (auto *AssumeCB : ED.EncounteredAssumes) 2787bdd1243dSDimitry Andric A.deleteAfterManifest(*AssumeCB); 2788fe6060f1SDimitry Andric }; 2789fe6060f1SDimitry Andric 2790bdd1243dSDimitry Andric for (auto *CB : AlignedBarriers) 2791bdd1243dSDimitry Andric HandleAlignedBarrier(CB); 2792fe6060f1SDimitry Andric 2793bdd1243dSDimitry Andric // Handle the "kernel end barrier" for kernels too. 27945f757f3fSDimitry Andric if (omp::isOpenMPKernel(*getAnchorScope())) 2795bdd1243dSDimitry Andric HandleAlignedBarrier(nullptr); 2796bdd1243dSDimitry Andric 2797bdd1243dSDimitry Andric return Changed; 2798bdd1243dSDimitry Andric } 2799bdd1243dSDimitry Andric 280006c3fb27SDimitry Andric bool isNoOpFence(const FenceInst &FI) const override { 280106c3fb27SDimitry Andric return getState().isValidState() && !NonNoOpFences.count(&FI); 280206c3fb27SDimitry Andric } 280306c3fb27SDimitry Andric 2804bdd1243dSDimitry Andric /// Merge barrier and assumption information from \p PredED into the successor 2805bdd1243dSDimitry Andric /// \p ED. 2806bdd1243dSDimitry Andric void 2807bdd1243dSDimitry Andric mergeInPredecessorBarriersAndAssumptions(Attributor &A, ExecutionDomainTy &ED, 2808bdd1243dSDimitry Andric const ExecutionDomainTy &PredED); 2809bdd1243dSDimitry Andric 2810bdd1243dSDimitry Andric /// Merge all information from \p PredED into the successor \p ED. If 2811bdd1243dSDimitry Andric /// \p InitialEdgeOnly is set, only the initial edge will enter the block 2812bdd1243dSDimitry Andric /// represented by \p ED from this predecessor. 281306c3fb27SDimitry Andric bool mergeInPredecessor(Attributor &A, ExecutionDomainTy &ED, 2814bdd1243dSDimitry Andric const ExecutionDomainTy &PredED, 2815bdd1243dSDimitry Andric bool InitialEdgeOnly = false); 2816bdd1243dSDimitry Andric 2817bdd1243dSDimitry Andric /// Accumulate information for the entry block in \p EntryBBED. 281806c3fb27SDimitry Andric bool handleCallees(Attributor &A, ExecutionDomainTy &EntryBBED); 2819bdd1243dSDimitry Andric 2820bdd1243dSDimitry Andric /// See AbstractAttribute::updateImpl. 2821bdd1243dSDimitry Andric ChangeStatus updateImpl(Attributor &A) override; 2822bdd1243dSDimitry Andric 2823bdd1243dSDimitry Andric /// Query interface, see AAExecutionDomain 2824bdd1243dSDimitry Andric ///{ 2825bdd1243dSDimitry Andric bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override { 2826bdd1243dSDimitry Andric if (!isValidState()) 2827bdd1243dSDimitry Andric return false; 282806c3fb27SDimitry Andric assert(BB.getParent() == getAnchorScope() && "Block is out of scope!"); 2829bdd1243dSDimitry Andric return BEDMap.lookup(&BB).IsExecutedByInitialThreadOnly; 2830bdd1243dSDimitry Andric } 2831bdd1243dSDimitry Andric 2832bdd1243dSDimitry Andric bool isExecutedInAlignedRegion(Attributor &A, 2833bdd1243dSDimitry Andric const Instruction &I) const override { 28341ac55f4cSDimitry Andric assert(I.getFunction() == getAnchorScope() && 28351ac55f4cSDimitry Andric "Instruction is out of scope!"); 28361ac55f4cSDimitry Andric if (!isValidState()) 2837bdd1243dSDimitry Andric return false; 2838bdd1243dSDimitry Andric 283906c3fb27SDimitry Andric bool ForwardIsOk = true; 2840bdd1243dSDimitry Andric const Instruction *CurI; 2841bdd1243dSDimitry Andric 2842bdd1243dSDimitry Andric // Check forward until a call or the block end is reached. 2843bdd1243dSDimitry Andric CurI = &I; 2844bdd1243dSDimitry Andric do { 2845bdd1243dSDimitry Andric auto *CB = dyn_cast<CallBase>(CurI); 2846bdd1243dSDimitry Andric if (!CB) 2847bdd1243dSDimitry Andric continue; 284806c3fb27SDimitry Andric if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) 284906c3fb27SDimitry Andric return true; 285006c3fb27SDimitry Andric const auto &It = CEDMap.find({CB, PRE}); 2851bdd1243dSDimitry Andric if (It == CEDMap.end()) 2852bdd1243dSDimitry Andric continue; 28531ac55f4cSDimitry Andric if (!It->getSecond().IsReachingAlignedBarrierOnly) 285406c3fb27SDimitry Andric ForwardIsOk = false; 28551ac55f4cSDimitry Andric break; 2856bdd1243dSDimitry Andric } while ((CurI = CurI->getNextNonDebugInstruction())); 2857bdd1243dSDimitry Andric 28581ac55f4cSDimitry Andric if (!CurI && !BEDMap.lookup(I.getParent()).IsReachingAlignedBarrierOnly) 285906c3fb27SDimitry Andric ForwardIsOk = false; 2860bdd1243dSDimitry Andric 2861bdd1243dSDimitry Andric // Check backward until a call or the block beginning is reached. 2862bdd1243dSDimitry Andric CurI = &I; 2863bdd1243dSDimitry Andric do { 2864bdd1243dSDimitry Andric auto *CB = dyn_cast<CallBase>(CurI); 2865bdd1243dSDimitry Andric if (!CB) 2866bdd1243dSDimitry Andric continue; 286706c3fb27SDimitry Andric if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) 286806c3fb27SDimitry Andric return true; 286906c3fb27SDimitry Andric const auto &It = CEDMap.find({CB, POST}); 2870bdd1243dSDimitry Andric if (It == CEDMap.end()) 2871bdd1243dSDimitry Andric continue; 287206c3fb27SDimitry Andric if (It->getSecond().IsReachedFromAlignedBarrierOnly) 2873bdd1243dSDimitry Andric break; 2874bdd1243dSDimitry Andric return false; 2875bdd1243dSDimitry Andric } while ((CurI = CurI->getPrevNonDebugInstruction())); 2876bdd1243dSDimitry Andric 287706c3fb27SDimitry Andric // Delayed decision on the forward pass to allow aligned barrier detection 287806c3fb27SDimitry Andric // in the backwards traversal. 287906c3fb27SDimitry Andric if (!ForwardIsOk) 288006c3fb27SDimitry Andric return false; 288106c3fb27SDimitry Andric 288206c3fb27SDimitry Andric if (!CurI) { 288306c3fb27SDimitry Andric const BasicBlock *BB = I.getParent(); 288406c3fb27SDimitry Andric if (BB == &BB->getParent()->getEntryBlock()) 288506c3fb27SDimitry Andric return BEDMap.lookup(nullptr).IsReachedFromAlignedBarrierOnly; 288606c3fb27SDimitry Andric if (!llvm::all_of(predecessors(BB), [&](const BasicBlock *PredBB) { 2887bdd1243dSDimitry Andric return BEDMap.lookup(PredBB).IsReachedFromAlignedBarrierOnly; 2888bdd1243dSDimitry Andric })) { 2889bdd1243dSDimitry Andric return false; 2890bdd1243dSDimitry Andric } 289106c3fb27SDimitry Andric } 2892bdd1243dSDimitry Andric 2893bdd1243dSDimitry Andric // On neither traversal we found a anything but aligned barriers. 2894bdd1243dSDimitry Andric return true; 2895bdd1243dSDimitry Andric } 2896bdd1243dSDimitry Andric 2897bdd1243dSDimitry Andric ExecutionDomainTy getExecutionDomain(const BasicBlock &BB) const override { 2898bdd1243dSDimitry Andric assert(isValidState() && 2899bdd1243dSDimitry Andric "No request should be made against an invalid state!"); 2900bdd1243dSDimitry Andric return BEDMap.lookup(&BB); 2901bdd1243dSDimitry Andric } 290206c3fb27SDimitry Andric std::pair<ExecutionDomainTy, ExecutionDomainTy> 290306c3fb27SDimitry Andric getExecutionDomain(const CallBase &CB) const override { 2904bdd1243dSDimitry Andric assert(isValidState() && 2905bdd1243dSDimitry Andric "No request should be made against an invalid state!"); 290606c3fb27SDimitry Andric return {CEDMap.lookup({&CB, PRE}), CEDMap.lookup({&CB, POST})}; 2907bdd1243dSDimitry Andric } 2908bdd1243dSDimitry Andric ExecutionDomainTy getFunctionExecutionDomain() const override { 2909bdd1243dSDimitry Andric assert(isValidState() && 2910bdd1243dSDimitry Andric "No request should be made against an invalid state!"); 291106c3fb27SDimitry Andric return InterProceduralED; 2912bdd1243dSDimitry Andric } 2913bdd1243dSDimitry Andric ///} 2914fe6060f1SDimitry Andric 2915349cc55cSDimitry Andric // Check if the edge into the successor block contains a condition that only 2916349cc55cSDimitry Andric // lets the main thread execute it. 2917bdd1243dSDimitry Andric static bool isInitialThreadOnlyEdge(Attributor &A, BranchInst *Edge, 2918bdd1243dSDimitry Andric BasicBlock &SuccessorBB) { 2919fe6060f1SDimitry Andric if (!Edge || !Edge->isConditional()) 2920fe6060f1SDimitry Andric return false; 2921bdd1243dSDimitry Andric if (Edge->getSuccessor(0) != &SuccessorBB) 2922fe6060f1SDimitry Andric return false; 2923fe6060f1SDimitry Andric 2924fe6060f1SDimitry Andric auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition()); 2925fe6060f1SDimitry Andric if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality()) 2926fe6060f1SDimitry Andric return false; 2927fe6060f1SDimitry Andric 2928fe6060f1SDimitry Andric ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1)); 2929fe6060f1SDimitry Andric if (!C) 2930fe6060f1SDimitry Andric return false; 2931fe6060f1SDimitry Andric 2932fe6060f1SDimitry Andric // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!) 2933fe6060f1SDimitry Andric if (C->isAllOnesValue()) { 2934fe6060f1SDimitry Andric auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0)); 2935bdd1243dSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 2936bdd1243dSDimitry Andric auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; 2937fe6060f1SDimitry Andric CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr; 2938fe6060f1SDimitry Andric if (!CB) 2939fe6060f1SDimitry Andric return false; 29405f757f3fSDimitry Andric ConstantStruct *KernelEnvC = 29415f757f3fSDimitry Andric KernelInfo::getKernelEnvironementFromKernelInitCB(CB); 29425f757f3fSDimitry Andric ConstantInt *ExecModeC = 29435f757f3fSDimitry Andric KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC); 29445f757f3fSDimitry Andric return ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC; 2945349cc55cSDimitry Andric } 2946349cc55cSDimitry Andric 2947349cc55cSDimitry Andric if (C->isZero()) { 2948349cc55cSDimitry Andric // Match: 0 == llvm.nvvm.read.ptx.sreg.tid.x() 2949349cc55cSDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0))) 2950349cc55cSDimitry Andric if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x) 2951349cc55cSDimitry Andric return true; 2952349cc55cSDimitry Andric 2953349cc55cSDimitry Andric // Match: 0 == llvm.amdgcn.workitem.id.x() 2954349cc55cSDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0))) 2955349cc55cSDimitry Andric if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x) 2956349cc55cSDimitry Andric return true; 2957fe6060f1SDimitry Andric } 2958fe6060f1SDimitry Andric 2959fe6060f1SDimitry Andric return false; 2960fe6060f1SDimitry Andric }; 2961fe6060f1SDimitry Andric 296206c3fb27SDimitry Andric /// Mapping containing information about the function for other AAs. 296306c3fb27SDimitry Andric ExecutionDomainTy InterProceduralED; 296406c3fb27SDimitry Andric 296506c3fb27SDimitry Andric enum Direction { PRE = 0, POST = 1 }; 2966bdd1243dSDimitry Andric /// Mapping containing information per block. 2967bdd1243dSDimitry Andric DenseMap<const BasicBlock *, ExecutionDomainTy> BEDMap; 296806c3fb27SDimitry Andric DenseMap<PointerIntPair<const CallBase *, 1, Direction>, ExecutionDomainTy> 296906c3fb27SDimitry Andric CEDMap; 2970bdd1243dSDimitry Andric SmallSetVector<CallBase *, 16> AlignedBarriers; 2971fe6060f1SDimitry Andric 2972bdd1243dSDimitry Andric ReversePostOrderTraversal<Function *> *RPOT = nullptr; 297306c3fb27SDimitry Andric 297406c3fb27SDimitry Andric /// Set \p R to \V and report true if that changed \p R. 297506c3fb27SDimitry Andric static bool setAndRecord(bool &R, bool V) { 297606c3fb27SDimitry Andric bool Eq = (R == V); 297706c3fb27SDimitry Andric R = V; 297806c3fb27SDimitry Andric return !Eq; 297906c3fb27SDimitry Andric } 298006c3fb27SDimitry Andric 298106c3fb27SDimitry Andric /// Collection of fences known to be non-no-opt. All fences not in this set 298206c3fb27SDimitry Andric /// can be assumed no-opt. 298306c3fb27SDimitry Andric SmallPtrSet<const FenceInst *, 8> NonNoOpFences; 2984fe6060f1SDimitry Andric }; 2985fe6060f1SDimitry Andric 2986bdd1243dSDimitry Andric void AAExecutionDomainFunction::mergeInPredecessorBarriersAndAssumptions( 2987bdd1243dSDimitry Andric Attributor &A, ExecutionDomainTy &ED, const ExecutionDomainTy &PredED) { 2988bdd1243dSDimitry Andric for (auto *EA : PredED.EncounteredAssumes) 2989bdd1243dSDimitry Andric ED.addAssumeInst(A, *EA); 2990bdd1243dSDimitry Andric 2991bdd1243dSDimitry Andric for (auto *AB : PredED.AlignedBarriers) 2992bdd1243dSDimitry Andric ED.addAlignedBarrier(A, *AB); 2993fe6060f1SDimitry Andric } 2994fe6060f1SDimitry Andric 299506c3fb27SDimitry Andric bool AAExecutionDomainFunction::mergeInPredecessor( 2996bdd1243dSDimitry Andric Attributor &A, ExecutionDomainTy &ED, const ExecutionDomainTy &PredED, 2997bdd1243dSDimitry Andric bool InitialEdgeOnly) { 2998bdd1243dSDimitry Andric 299906c3fb27SDimitry Andric bool Changed = false; 300006c3fb27SDimitry Andric Changed |= 300106c3fb27SDimitry Andric setAndRecord(ED.IsExecutedByInitialThreadOnly, 300206c3fb27SDimitry Andric InitialEdgeOnly || (PredED.IsExecutedByInitialThreadOnly && 300306c3fb27SDimitry Andric ED.IsExecutedByInitialThreadOnly)); 300406c3fb27SDimitry Andric 300506c3fb27SDimitry Andric Changed |= setAndRecord(ED.IsReachedFromAlignedBarrierOnly, 300606c3fb27SDimitry Andric ED.IsReachedFromAlignedBarrierOnly && 300706c3fb27SDimitry Andric PredED.IsReachedFromAlignedBarrierOnly); 300806c3fb27SDimitry Andric Changed |= setAndRecord(ED.EncounteredNonLocalSideEffect, 300906c3fb27SDimitry Andric ED.EncounteredNonLocalSideEffect | 301006c3fb27SDimitry Andric PredED.EncounteredNonLocalSideEffect); 301106c3fb27SDimitry Andric // Do not track assumptions and barriers as part of Changed. 3012bdd1243dSDimitry Andric if (ED.IsReachedFromAlignedBarrierOnly) 3013bdd1243dSDimitry Andric mergeInPredecessorBarriersAndAssumptions(A, ED, PredED); 3014bdd1243dSDimitry Andric else 3015bdd1243dSDimitry Andric ED.clearAssumeInstAndAlignedBarriers(); 301606c3fb27SDimitry Andric return Changed; 3017bdd1243dSDimitry Andric } 3018bdd1243dSDimitry Andric 301906c3fb27SDimitry Andric bool AAExecutionDomainFunction::handleCallees(Attributor &A, 3020bdd1243dSDimitry Andric ExecutionDomainTy &EntryBBED) { 302106c3fb27SDimitry Andric SmallVector<std::pair<ExecutionDomainTy, ExecutionDomainTy>, 4> CallSiteEDs; 3022bdd1243dSDimitry Andric auto PredForCallSite = [&](AbstractCallSite ACS) { 302306c3fb27SDimitry Andric const auto *EDAA = A.getAAFor<AAExecutionDomain>( 3024bdd1243dSDimitry Andric *this, IRPosition::function(*ACS.getInstruction()->getFunction()), 3025bdd1243dSDimitry Andric DepClassTy::OPTIONAL); 302606c3fb27SDimitry Andric if (!EDAA || !EDAA->getState().isValidState()) 3027bdd1243dSDimitry Andric return false; 302806c3fb27SDimitry Andric CallSiteEDs.emplace_back( 302906c3fb27SDimitry Andric EDAA->getExecutionDomain(*cast<CallBase>(ACS.getInstruction()))); 3030bdd1243dSDimitry Andric return true; 3031bdd1243dSDimitry Andric }; 3032bdd1243dSDimitry Andric 303306c3fb27SDimitry Andric ExecutionDomainTy ExitED; 3034bdd1243dSDimitry Andric bool AllCallSitesKnown; 3035bdd1243dSDimitry Andric if (A.checkForAllCallSites(PredForCallSite, *this, 3036bdd1243dSDimitry Andric /* RequiresAllCallSites */ true, 3037bdd1243dSDimitry Andric AllCallSitesKnown)) { 303806c3fb27SDimitry Andric for (const auto &[CSInED, CSOutED] : CallSiteEDs) { 303906c3fb27SDimitry Andric mergeInPredecessor(A, EntryBBED, CSInED); 304006c3fb27SDimitry Andric ExitED.IsReachingAlignedBarrierOnly &= 304106c3fb27SDimitry Andric CSOutED.IsReachingAlignedBarrierOnly; 304206c3fb27SDimitry Andric } 3043bdd1243dSDimitry Andric 3044bdd1243dSDimitry Andric } else { 3045bdd1243dSDimitry Andric // We could not find all predecessors, so this is either a kernel or a 3046bdd1243dSDimitry Andric // function with external linkage (or with some other weird uses). 30475f757f3fSDimitry Andric if (omp::isOpenMPKernel(*getAnchorScope())) { 3048bdd1243dSDimitry Andric EntryBBED.IsExecutedByInitialThreadOnly = false; 3049bdd1243dSDimitry Andric EntryBBED.IsReachedFromAlignedBarrierOnly = true; 3050bdd1243dSDimitry Andric EntryBBED.EncounteredNonLocalSideEffect = false; 30515f757f3fSDimitry Andric ExitED.IsReachingAlignedBarrierOnly = false; 3052bdd1243dSDimitry Andric } else { 3053bdd1243dSDimitry Andric EntryBBED.IsExecutedByInitialThreadOnly = false; 3054bdd1243dSDimitry Andric EntryBBED.IsReachedFromAlignedBarrierOnly = false; 3055bdd1243dSDimitry Andric EntryBBED.EncounteredNonLocalSideEffect = true; 305606c3fb27SDimitry Andric ExitED.IsReachingAlignedBarrierOnly = false; 3057bdd1243dSDimitry Andric } 3058bdd1243dSDimitry Andric } 3059bdd1243dSDimitry Andric 306006c3fb27SDimitry Andric bool Changed = false; 3061bdd1243dSDimitry Andric auto &FnED = BEDMap[nullptr]; 306206c3fb27SDimitry Andric Changed |= setAndRecord(FnED.IsReachedFromAlignedBarrierOnly, 306306c3fb27SDimitry Andric FnED.IsReachedFromAlignedBarrierOnly & 306406c3fb27SDimitry Andric EntryBBED.IsReachedFromAlignedBarrierOnly); 306506c3fb27SDimitry Andric Changed |= setAndRecord(FnED.IsReachingAlignedBarrierOnly, 306606c3fb27SDimitry Andric FnED.IsReachingAlignedBarrierOnly & 306706c3fb27SDimitry Andric ExitED.IsReachingAlignedBarrierOnly); 306806c3fb27SDimitry Andric Changed |= setAndRecord(FnED.IsExecutedByInitialThreadOnly, 306906c3fb27SDimitry Andric EntryBBED.IsExecutedByInitialThreadOnly); 307006c3fb27SDimitry Andric return Changed; 3071bdd1243dSDimitry Andric } 3072bdd1243dSDimitry Andric 3073bdd1243dSDimitry Andric ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { 3074bdd1243dSDimitry Andric 3075bdd1243dSDimitry Andric bool Changed = false; 3076bdd1243dSDimitry Andric 3077bdd1243dSDimitry Andric // Helper to deal with an aligned barrier encountered during the forward 3078bdd1243dSDimitry Andric // traversal. \p CB is the aligned barrier, \p ED is the execution domain when 3079bdd1243dSDimitry Andric // it was encountered. 308006c3fb27SDimitry Andric auto HandleAlignedBarrier = [&](CallBase &CB, ExecutionDomainTy &ED) { 308106c3fb27SDimitry Andric Changed |= AlignedBarriers.insert(&CB); 3082bdd1243dSDimitry Andric // First, update the barrier ED kept in the separate CEDMap. 308306c3fb27SDimitry Andric auto &CallInED = CEDMap[{&CB, PRE}]; 308406c3fb27SDimitry Andric Changed |= mergeInPredecessor(A, CallInED, ED); 308506c3fb27SDimitry Andric CallInED.IsReachingAlignedBarrierOnly = true; 3086bdd1243dSDimitry Andric // Next adjust the ED we use for the traversal. 3087bdd1243dSDimitry Andric ED.EncounteredNonLocalSideEffect = false; 3088bdd1243dSDimitry Andric ED.IsReachedFromAlignedBarrierOnly = true; 3089bdd1243dSDimitry Andric // Aligned barrier collection has to come last. 3090bdd1243dSDimitry Andric ED.clearAssumeInstAndAlignedBarriers(); 309106c3fb27SDimitry Andric ED.addAlignedBarrier(A, CB); 309206c3fb27SDimitry Andric auto &CallOutED = CEDMap[{&CB, POST}]; 309306c3fb27SDimitry Andric Changed |= mergeInPredecessor(A, CallOutED, ED); 3094bdd1243dSDimitry Andric }; 3095bdd1243dSDimitry Andric 309606c3fb27SDimitry Andric auto *LivenessAA = 3097bdd1243dSDimitry Andric A.getAAFor<AAIsDead>(*this, getIRPosition(), DepClassTy::OPTIONAL); 3098bdd1243dSDimitry Andric 3099bdd1243dSDimitry Andric Function *F = getAnchorScope(); 3100bdd1243dSDimitry Andric BasicBlock &EntryBB = F->getEntryBlock(); 31015f757f3fSDimitry Andric bool IsKernel = omp::isOpenMPKernel(*F); 3102bdd1243dSDimitry Andric 3103bdd1243dSDimitry Andric SmallVector<Instruction *> SyncInstWorklist; 3104bdd1243dSDimitry Andric for (auto &RIt : *RPOT) { 3105bdd1243dSDimitry Andric BasicBlock &BB = *RIt; 3106bdd1243dSDimitry Andric 3107bdd1243dSDimitry Andric bool IsEntryBB = &BB == &EntryBB; 3108bdd1243dSDimitry Andric // TODO: We use local reasoning since we don't have a divergence analysis 3109bdd1243dSDimitry Andric // running as well. We could basically allow uniform branches here. 3110bdd1243dSDimitry Andric bool AlignedBarrierLastInBlock = IsEntryBB && IsKernel; 311106c3fb27SDimitry Andric bool IsExplicitlyAligned = IsEntryBB && IsKernel; 3112bdd1243dSDimitry Andric ExecutionDomainTy ED; 3113bdd1243dSDimitry Andric // Propagate "incoming edges" into information about this block. 3114bdd1243dSDimitry Andric if (IsEntryBB) { 311506c3fb27SDimitry Andric Changed |= handleCallees(A, ED); 3116bdd1243dSDimitry Andric } else { 3117bdd1243dSDimitry Andric // For live non-entry blocks we only propagate 3118bdd1243dSDimitry Andric // information via live edges. 311906c3fb27SDimitry Andric if (LivenessAA && LivenessAA->isAssumedDead(&BB)) 3120bdd1243dSDimitry Andric continue; 3121bdd1243dSDimitry Andric 3122bdd1243dSDimitry Andric for (auto *PredBB : predecessors(&BB)) { 312306c3fb27SDimitry Andric if (LivenessAA && LivenessAA->isEdgeDead(PredBB, &BB)) 3124bdd1243dSDimitry Andric continue; 3125bdd1243dSDimitry Andric bool InitialEdgeOnly = isInitialThreadOnlyEdge( 3126bdd1243dSDimitry Andric A, dyn_cast<BranchInst>(PredBB->getTerminator()), BB); 3127bdd1243dSDimitry Andric mergeInPredecessor(A, ED, BEDMap[PredBB], InitialEdgeOnly); 3128bdd1243dSDimitry Andric } 3129bdd1243dSDimitry Andric } 3130bdd1243dSDimitry Andric 3131bdd1243dSDimitry Andric // Now we traverse the block, accumulate effects in ED and attach 3132bdd1243dSDimitry Andric // information to calls. 3133bdd1243dSDimitry Andric for (Instruction &I : BB) { 3134bdd1243dSDimitry Andric bool UsedAssumedInformation; 313506c3fb27SDimitry Andric if (A.isAssumedDead(I, *this, LivenessAA, UsedAssumedInformation, 3136bdd1243dSDimitry Andric /* CheckBBLivenessOnly */ false, DepClassTy::OPTIONAL, 3137bdd1243dSDimitry Andric /* CheckForDeadStore */ true)) 3138bdd1243dSDimitry Andric continue; 3139bdd1243dSDimitry Andric 3140bdd1243dSDimitry Andric // Asummes and "assume-like" (dbg, lifetime, ...) are handled first, the 3141bdd1243dSDimitry Andric // former is collected the latter is ignored. 3142bdd1243dSDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(&I)) { 3143bdd1243dSDimitry Andric if (auto *AI = dyn_cast_or_null<AssumeInst>(II)) { 3144bdd1243dSDimitry Andric ED.addAssumeInst(A, *AI); 3145bdd1243dSDimitry Andric continue; 3146bdd1243dSDimitry Andric } 3147bdd1243dSDimitry Andric // TODO: Should we also collect and delete lifetime markers? 3148bdd1243dSDimitry Andric if (II->isAssumeLikeIntrinsic()) 3149bdd1243dSDimitry Andric continue; 3150bdd1243dSDimitry Andric } 3151bdd1243dSDimitry Andric 315206c3fb27SDimitry Andric if (auto *FI = dyn_cast<FenceInst>(&I)) { 315306c3fb27SDimitry Andric if (!ED.EncounteredNonLocalSideEffect) { 315406c3fb27SDimitry Andric // An aligned fence without non-local side-effects is a no-op. 315506c3fb27SDimitry Andric if (ED.IsReachedFromAlignedBarrierOnly) 315606c3fb27SDimitry Andric continue; 315706c3fb27SDimitry Andric // A non-aligned fence without non-local side-effects is a no-op 315806c3fb27SDimitry Andric // if the ordering only publishes non-local side-effects (or less). 315906c3fb27SDimitry Andric switch (FI->getOrdering()) { 316006c3fb27SDimitry Andric case AtomicOrdering::NotAtomic: 316106c3fb27SDimitry Andric continue; 316206c3fb27SDimitry Andric case AtomicOrdering::Unordered: 316306c3fb27SDimitry Andric continue; 316406c3fb27SDimitry Andric case AtomicOrdering::Monotonic: 316506c3fb27SDimitry Andric continue; 316606c3fb27SDimitry Andric case AtomicOrdering::Acquire: 316706c3fb27SDimitry Andric break; 316806c3fb27SDimitry Andric case AtomicOrdering::Release: 316906c3fb27SDimitry Andric continue; 317006c3fb27SDimitry Andric case AtomicOrdering::AcquireRelease: 317106c3fb27SDimitry Andric break; 317206c3fb27SDimitry Andric case AtomicOrdering::SequentiallyConsistent: 317306c3fb27SDimitry Andric break; 317406c3fb27SDimitry Andric }; 317506c3fb27SDimitry Andric } 317606c3fb27SDimitry Andric NonNoOpFences.insert(FI); 317706c3fb27SDimitry Andric } 317806c3fb27SDimitry Andric 3179bdd1243dSDimitry Andric auto *CB = dyn_cast<CallBase>(&I); 3180bdd1243dSDimitry Andric bool IsNoSync = AA::isNoSyncInst(A, I, *this); 3181bdd1243dSDimitry Andric bool IsAlignedBarrier = 3182bdd1243dSDimitry Andric !IsNoSync && CB && 3183bdd1243dSDimitry Andric AANoSync::isAlignedBarrier(*CB, AlignedBarrierLastInBlock); 3184bdd1243dSDimitry Andric 3185bdd1243dSDimitry Andric AlignedBarrierLastInBlock &= IsNoSync; 318606c3fb27SDimitry Andric IsExplicitlyAligned &= IsNoSync; 3187bdd1243dSDimitry Andric 3188bdd1243dSDimitry Andric // Next we check for calls. Aligned barriers are handled 3189bdd1243dSDimitry Andric // explicitly, everything else is kept for the backward traversal and will 3190bdd1243dSDimitry Andric // also affect our state. 3191bdd1243dSDimitry Andric if (CB) { 3192bdd1243dSDimitry Andric if (IsAlignedBarrier) { 319306c3fb27SDimitry Andric HandleAlignedBarrier(*CB, ED); 3194bdd1243dSDimitry Andric AlignedBarrierLastInBlock = true; 319506c3fb27SDimitry Andric IsExplicitlyAligned = true; 3196bdd1243dSDimitry Andric continue; 3197bdd1243dSDimitry Andric } 3198bdd1243dSDimitry Andric 3199bdd1243dSDimitry Andric // Check the pointer(s) of a memory intrinsic explicitly. 3200bdd1243dSDimitry Andric if (isa<MemIntrinsic>(&I)) { 3201bdd1243dSDimitry Andric if (!ED.EncounteredNonLocalSideEffect && 3202bdd1243dSDimitry Andric AA::isPotentiallyAffectedByBarrier(A, I, *this)) 3203bdd1243dSDimitry Andric ED.EncounteredNonLocalSideEffect = true; 3204bdd1243dSDimitry Andric if (!IsNoSync) { 3205bdd1243dSDimitry Andric ED.IsReachedFromAlignedBarrierOnly = false; 3206bdd1243dSDimitry Andric SyncInstWorklist.push_back(&I); 3207bdd1243dSDimitry Andric } 3208bdd1243dSDimitry Andric continue; 3209bdd1243dSDimitry Andric } 3210bdd1243dSDimitry Andric 3211bdd1243dSDimitry Andric // Record how we entered the call, then accumulate the effect of the 3212bdd1243dSDimitry Andric // call in ED for potential use by the callee. 321306c3fb27SDimitry Andric auto &CallInED = CEDMap[{CB, PRE}]; 321406c3fb27SDimitry Andric Changed |= mergeInPredecessor(A, CallInED, ED); 3215bdd1243dSDimitry Andric 3216bdd1243dSDimitry Andric // If we have a sync-definition we can check if it starts/ends in an 3217bdd1243dSDimitry Andric // aligned barrier. If we are unsure we assume any sync breaks 3218bdd1243dSDimitry Andric // alignment. 3219bdd1243dSDimitry Andric Function *Callee = CB->getCalledFunction(); 3220bdd1243dSDimitry Andric if (!IsNoSync && Callee && !Callee->isDeclaration()) { 322106c3fb27SDimitry Andric const auto *EDAA = A.getAAFor<AAExecutionDomain>( 3222bdd1243dSDimitry Andric *this, IRPosition::function(*Callee), DepClassTy::OPTIONAL); 322306c3fb27SDimitry Andric if (EDAA && EDAA->getState().isValidState()) { 322406c3fb27SDimitry Andric const auto &CalleeED = EDAA->getFunctionExecutionDomain(); 3225bdd1243dSDimitry Andric ED.IsReachedFromAlignedBarrierOnly = 3226bdd1243dSDimitry Andric CalleeED.IsReachedFromAlignedBarrierOnly; 3227bdd1243dSDimitry Andric AlignedBarrierLastInBlock = ED.IsReachedFromAlignedBarrierOnly; 3228bdd1243dSDimitry Andric if (IsNoSync || !CalleeED.IsReachedFromAlignedBarrierOnly) 3229bdd1243dSDimitry Andric ED.EncounteredNonLocalSideEffect |= 3230bdd1243dSDimitry Andric CalleeED.EncounteredNonLocalSideEffect; 3231bdd1243dSDimitry Andric else 3232bdd1243dSDimitry Andric ED.EncounteredNonLocalSideEffect = 3233bdd1243dSDimitry Andric CalleeED.EncounteredNonLocalSideEffect; 323406c3fb27SDimitry Andric if (!CalleeED.IsReachingAlignedBarrierOnly) { 323506c3fb27SDimitry Andric Changed |= 323606c3fb27SDimitry Andric setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false); 3237bdd1243dSDimitry Andric SyncInstWorklist.push_back(&I); 323806c3fb27SDimitry Andric } 3239bdd1243dSDimitry Andric if (CalleeED.IsReachedFromAlignedBarrierOnly) 3240bdd1243dSDimitry Andric mergeInPredecessorBarriersAndAssumptions(A, ED, CalleeED); 324106c3fb27SDimitry Andric auto &CallOutED = CEDMap[{CB, POST}]; 324206c3fb27SDimitry Andric Changed |= mergeInPredecessor(A, CallOutED, ED); 3243bdd1243dSDimitry Andric continue; 3244bdd1243dSDimitry Andric } 3245bdd1243dSDimitry Andric } 324606c3fb27SDimitry Andric if (!IsNoSync) { 324706c3fb27SDimitry Andric ED.IsReachedFromAlignedBarrierOnly = false; 324806c3fb27SDimitry Andric Changed |= setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false); 324906c3fb27SDimitry Andric SyncInstWorklist.push_back(&I); 325006c3fb27SDimitry Andric } 3251bdd1243dSDimitry Andric AlignedBarrierLastInBlock &= ED.IsReachedFromAlignedBarrierOnly; 3252bdd1243dSDimitry Andric ED.EncounteredNonLocalSideEffect |= !CB->doesNotAccessMemory(); 325306c3fb27SDimitry Andric auto &CallOutED = CEDMap[{CB, POST}]; 325406c3fb27SDimitry Andric Changed |= mergeInPredecessor(A, CallOutED, ED); 3255bdd1243dSDimitry Andric } 3256bdd1243dSDimitry Andric 3257bdd1243dSDimitry Andric if (!I.mayHaveSideEffects() && !I.mayReadFromMemory()) 3258bdd1243dSDimitry Andric continue; 3259bdd1243dSDimitry Andric 3260bdd1243dSDimitry Andric // If we have a callee we try to use fine-grained information to 3261bdd1243dSDimitry Andric // determine local side-effects. 3262bdd1243dSDimitry Andric if (CB) { 326306c3fb27SDimitry Andric const auto *MemAA = A.getAAFor<AAMemoryLocation>( 3264bdd1243dSDimitry Andric *this, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL); 3265bdd1243dSDimitry Andric 3266bdd1243dSDimitry Andric auto AccessPred = [&](const Instruction *I, const Value *Ptr, 3267bdd1243dSDimitry Andric AAMemoryLocation::AccessKind, 3268bdd1243dSDimitry Andric AAMemoryLocation::MemoryLocationsKind) { 3269bdd1243dSDimitry Andric return !AA::isPotentiallyAffectedByBarrier(A, {Ptr}, *this, I); 3270bdd1243dSDimitry Andric }; 327106c3fb27SDimitry Andric if (MemAA && MemAA->getState().isValidState() && 327206c3fb27SDimitry Andric MemAA->checkForAllAccessesToMemoryKind( 3273bdd1243dSDimitry Andric AccessPred, AAMemoryLocation::ALL_LOCATIONS)) 3274bdd1243dSDimitry Andric continue; 3275bdd1243dSDimitry Andric } 3276bdd1243dSDimitry Andric 327706c3fb27SDimitry Andric auto &InfoCache = A.getInfoCache(); 327806c3fb27SDimitry Andric if (!I.mayHaveSideEffects() && InfoCache.isOnlyUsedByAssume(I)) 3279bdd1243dSDimitry Andric continue; 3280bdd1243dSDimitry Andric 3281bdd1243dSDimitry Andric if (auto *LI = dyn_cast<LoadInst>(&I)) 3282bdd1243dSDimitry Andric if (LI->hasMetadata(LLVMContext::MD_invariant_load)) 3283bdd1243dSDimitry Andric continue; 3284bdd1243dSDimitry Andric 3285bdd1243dSDimitry Andric if (!ED.EncounteredNonLocalSideEffect && 3286bdd1243dSDimitry Andric AA::isPotentiallyAffectedByBarrier(A, I, *this)) 3287bdd1243dSDimitry Andric ED.EncounteredNonLocalSideEffect = true; 3288bdd1243dSDimitry Andric } 3289bdd1243dSDimitry Andric 329006c3fb27SDimitry Andric bool IsEndAndNotReachingAlignedBarriersOnly = false; 3291bdd1243dSDimitry Andric if (!isa<UnreachableInst>(BB.getTerminator()) && 3292bdd1243dSDimitry Andric !BB.getTerminator()->getNumSuccessors()) { 3293bdd1243dSDimitry Andric 329406c3fb27SDimitry Andric Changed |= mergeInPredecessor(A, InterProceduralED, ED); 3295bdd1243dSDimitry Andric 329606c3fb27SDimitry Andric auto &FnED = BEDMap[nullptr]; 329706c3fb27SDimitry Andric if (IsKernel && !IsExplicitlyAligned) 329806c3fb27SDimitry Andric FnED.IsReachingAlignedBarrierOnly = false; 329906c3fb27SDimitry Andric Changed |= mergeInPredecessor(A, FnED, ED); 330006c3fb27SDimitry Andric 330106c3fb27SDimitry Andric if (!FnED.IsReachingAlignedBarrierOnly) { 330206c3fb27SDimitry Andric IsEndAndNotReachingAlignedBarriersOnly = true; 330306c3fb27SDimitry Andric SyncInstWorklist.push_back(BB.getTerminator()); 330406c3fb27SDimitry Andric auto &BBED = BEDMap[&BB]; 330506c3fb27SDimitry Andric Changed |= setAndRecord(BBED.IsReachingAlignedBarrierOnly, false); 330606c3fb27SDimitry Andric } 3307bdd1243dSDimitry Andric } 3308bdd1243dSDimitry Andric 3309bdd1243dSDimitry Andric ExecutionDomainTy &StoredED = BEDMap[&BB]; 331006c3fb27SDimitry Andric ED.IsReachingAlignedBarrierOnly = StoredED.IsReachingAlignedBarrierOnly & 331106c3fb27SDimitry Andric !IsEndAndNotReachingAlignedBarriersOnly; 3312bdd1243dSDimitry Andric 3313bdd1243dSDimitry Andric // Check if we computed anything different as part of the forward 3314bdd1243dSDimitry Andric // traversal. We do not take assumptions and aligned barriers into account 3315bdd1243dSDimitry Andric // as they do not influence the state we iterate. Backward traversal values 3316bdd1243dSDimitry Andric // are handled later on. 3317bdd1243dSDimitry Andric if (ED.IsExecutedByInitialThreadOnly != 3318bdd1243dSDimitry Andric StoredED.IsExecutedByInitialThreadOnly || 3319bdd1243dSDimitry Andric ED.IsReachedFromAlignedBarrierOnly != 3320bdd1243dSDimitry Andric StoredED.IsReachedFromAlignedBarrierOnly || 3321bdd1243dSDimitry Andric ED.EncounteredNonLocalSideEffect != 3322bdd1243dSDimitry Andric StoredED.EncounteredNonLocalSideEffect) 3323bdd1243dSDimitry Andric Changed = true; 3324bdd1243dSDimitry Andric 3325bdd1243dSDimitry Andric // Update the state with the new value. 3326bdd1243dSDimitry Andric StoredED = std::move(ED); 3327bdd1243dSDimitry Andric } 3328bdd1243dSDimitry Andric 3329bdd1243dSDimitry Andric // Propagate (non-aligned) sync instruction effects backwards until the 3330bdd1243dSDimitry Andric // entry is hit or an aligned barrier. 3331bdd1243dSDimitry Andric SmallSetVector<BasicBlock *, 16> Visited; 3332bdd1243dSDimitry Andric while (!SyncInstWorklist.empty()) { 3333bdd1243dSDimitry Andric Instruction *SyncInst = SyncInstWorklist.pop_back_val(); 3334bdd1243dSDimitry Andric Instruction *CurInst = SyncInst; 333506c3fb27SDimitry Andric bool HitAlignedBarrierOrKnownEnd = false; 3336bdd1243dSDimitry Andric while ((CurInst = CurInst->getPrevNode())) { 3337bdd1243dSDimitry Andric auto *CB = dyn_cast<CallBase>(CurInst); 3338bdd1243dSDimitry Andric if (!CB) 3339bdd1243dSDimitry Andric continue; 334006c3fb27SDimitry Andric auto &CallOutED = CEDMap[{CB, POST}]; 334106c3fb27SDimitry Andric Changed |= setAndRecord(CallOutED.IsReachingAlignedBarrierOnly, false); 334206c3fb27SDimitry Andric auto &CallInED = CEDMap[{CB, PRE}]; 334306c3fb27SDimitry Andric HitAlignedBarrierOrKnownEnd = 334406c3fb27SDimitry Andric AlignedBarriers.count(CB) || !CallInED.IsReachingAlignedBarrierOnly; 334506c3fb27SDimitry Andric if (HitAlignedBarrierOrKnownEnd) 3346bdd1243dSDimitry Andric break; 334706c3fb27SDimitry Andric Changed |= setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false); 3348bdd1243dSDimitry Andric } 334906c3fb27SDimitry Andric if (HitAlignedBarrierOrKnownEnd) 3350bdd1243dSDimitry Andric continue; 3351bdd1243dSDimitry Andric BasicBlock *SyncBB = SyncInst->getParent(); 3352bdd1243dSDimitry Andric for (auto *PredBB : predecessors(SyncBB)) { 335306c3fb27SDimitry Andric if (LivenessAA && LivenessAA->isEdgeDead(PredBB, SyncBB)) 3354bdd1243dSDimitry Andric continue; 3355bdd1243dSDimitry Andric if (!Visited.insert(PredBB)) 3356bdd1243dSDimitry Andric continue; 3357bdd1243dSDimitry Andric auto &PredED = BEDMap[PredBB]; 335806c3fb27SDimitry Andric if (setAndRecord(PredED.IsReachingAlignedBarrierOnly, false)) { 3359bdd1243dSDimitry Andric Changed = true; 336006c3fb27SDimitry Andric SyncInstWorklist.push_back(PredBB->getTerminator()); 336106c3fb27SDimitry Andric } 3362bdd1243dSDimitry Andric } 3363bdd1243dSDimitry Andric if (SyncBB != &EntryBB) 3364bdd1243dSDimitry Andric continue; 336506c3fb27SDimitry Andric Changed |= 336606c3fb27SDimitry Andric setAndRecord(InterProceduralED.IsReachingAlignedBarrierOnly, false); 3367bdd1243dSDimitry Andric } 3368bdd1243dSDimitry Andric 3369bdd1243dSDimitry Andric return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; 3370fe6060f1SDimitry Andric } 3371fe6060f1SDimitry Andric 3372fe6060f1SDimitry Andric /// Try to replace memory allocation calls called by a single thread with a 3373fe6060f1SDimitry Andric /// static buffer of shared memory. 3374fe6060f1SDimitry Andric struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> { 3375fe6060f1SDimitry Andric using Base = StateWrapper<BooleanState, AbstractAttribute>; 3376fe6060f1SDimitry Andric AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 3377fe6060f1SDimitry Andric 3378fe6060f1SDimitry Andric /// Create an abstract attribute view for the position \p IRP. 3379fe6060f1SDimitry Andric static AAHeapToShared &createForPosition(const IRPosition &IRP, 3380fe6060f1SDimitry Andric Attributor &A); 3381fe6060f1SDimitry Andric 3382fe6060f1SDimitry Andric /// Returns true if HeapToShared conversion is assumed to be possible. 3383fe6060f1SDimitry Andric virtual bool isAssumedHeapToShared(CallBase &CB) const = 0; 3384fe6060f1SDimitry Andric 3385fe6060f1SDimitry Andric /// Returns true if HeapToShared conversion is assumed and the CB is a 3386fe6060f1SDimitry Andric /// callsite to a free operation to be removed. 3387fe6060f1SDimitry Andric virtual bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const = 0; 3388fe6060f1SDimitry Andric 3389fe6060f1SDimitry Andric /// See AbstractAttribute::getName(). 3390fe6060f1SDimitry Andric const std::string getName() const override { return "AAHeapToShared"; } 3391fe6060f1SDimitry Andric 3392fe6060f1SDimitry Andric /// See AbstractAttribute::getIdAddr(). 3393fe6060f1SDimitry Andric const char *getIdAddr() const override { return &ID; } 3394fe6060f1SDimitry Andric 3395fe6060f1SDimitry Andric /// This function should return true if the type of the \p AA is 3396fe6060f1SDimitry Andric /// AAHeapToShared. 3397fe6060f1SDimitry Andric static bool classof(const AbstractAttribute *AA) { 3398fe6060f1SDimitry Andric return (AA->getIdAddr() == &ID); 3399fe6060f1SDimitry Andric } 3400fe6060f1SDimitry Andric 3401fe6060f1SDimitry Andric /// Unique ID (due to the unique address) 3402fe6060f1SDimitry Andric static const char ID; 3403fe6060f1SDimitry Andric }; 3404fe6060f1SDimitry Andric 3405fe6060f1SDimitry Andric struct AAHeapToSharedFunction : public AAHeapToShared { 3406fe6060f1SDimitry Andric AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A) 3407fe6060f1SDimitry Andric : AAHeapToShared(IRP, A) {} 3408fe6060f1SDimitry Andric 340906c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 3410fe6060f1SDimitry Andric return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) + 3411fe6060f1SDimitry Andric " malloc calls eligible."; 3412fe6060f1SDimitry Andric } 3413fe6060f1SDimitry Andric 3414fe6060f1SDimitry Andric /// See AbstractAttribute::trackStatistics(). 3415fe6060f1SDimitry Andric void trackStatistics() const override {} 3416fe6060f1SDimitry Andric 3417fe6060f1SDimitry Andric /// This functions finds free calls that will be removed by the 3418fe6060f1SDimitry Andric /// HeapToShared transformation. 3419fe6060f1SDimitry Andric void findPotentialRemovedFreeCalls(Attributor &A) { 3420fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 3421fe6060f1SDimitry Andric auto &FreeRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared]; 3422fe6060f1SDimitry Andric 3423fe6060f1SDimitry Andric PotentialRemovedFreeCalls.clear(); 3424fe6060f1SDimitry Andric // Update free call users of found malloc calls. 3425fe6060f1SDimitry Andric for (CallBase *CB : MallocCalls) { 3426fe6060f1SDimitry Andric SmallVector<CallBase *, 4> FreeCalls; 3427fe6060f1SDimitry Andric for (auto *U : CB->users()) { 3428fe6060f1SDimitry Andric CallBase *C = dyn_cast<CallBase>(U); 3429fe6060f1SDimitry Andric if (C && C->getCalledFunction() == FreeRFI.Declaration) 3430fe6060f1SDimitry Andric FreeCalls.push_back(C); 3431fe6060f1SDimitry Andric } 3432fe6060f1SDimitry Andric 3433fe6060f1SDimitry Andric if (FreeCalls.size() != 1) 3434fe6060f1SDimitry Andric continue; 3435fe6060f1SDimitry Andric 3436fe6060f1SDimitry Andric PotentialRemovedFreeCalls.insert(FreeCalls.front()); 3437fe6060f1SDimitry Andric } 3438fe6060f1SDimitry Andric } 3439fe6060f1SDimitry Andric 3440fe6060f1SDimitry Andric void initialize(Attributor &A) override { 344181ad6265SDimitry Andric if (DisableOpenMPOptDeglobalization) { 344281ad6265SDimitry Andric indicatePessimisticFixpoint(); 344381ad6265SDimitry Andric return; 344481ad6265SDimitry Andric } 344581ad6265SDimitry Andric 3446fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 3447fe6060f1SDimitry Andric auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; 3448bdd1243dSDimitry Andric if (!RFI.Declaration) 3449bdd1243dSDimitry Andric return; 3450fe6060f1SDimitry Andric 345181ad6265SDimitry Andric Attributor::SimplifictionCallbackTy SCB = 345281ad6265SDimitry Andric [](const IRPosition &, const AbstractAttribute *, 3453bdd1243dSDimitry Andric bool &) -> std::optional<Value *> { return nullptr; }; 3454bdd1243dSDimitry Andric 3455bdd1243dSDimitry Andric Function *F = getAnchorScope(); 3456fe6060f1SDimitry Andric for (User *U : RFI.Declaration->users()) 345781ad6265SDimitry Andric if (CallBase *CB = dyn_cast<CallBase>(U)) { 3458bdd1243dSDimitry Andric if (CB->getFunction() != F) 3459bdd1243dSDimitry Andric continue; 3460fe6060f1SDimitry Andric MallocCalls.insert(CB); 346181ad6265SDimitry Andric A.registerSimplificationCallback(IRPosition::callsite_returned(*CB), 346281ad6265SDimitry Andric SCB); 346381ad6265SDimitry Andric } 3464fe6060f1SDimitry Andric 3465fe6060f1SDimitry Andric findPotentialRemovedFreeCalls(A); 3466fe6060f1SDimitry Andric } 3467fe6060f1SDimitry Andric 3468fe6060f1SDimitry Andric bool isAssumedHeapToShared(CallBase &CB) const override { 3469fe6060f1SDimitry Andric return isValidState() && MallocCalls.count(&CB); 3470fe6060f1SDimitry Andric } 3471fe6060f1SDimitry Andric 3472fe6060f1SDimitry Andric bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const override { 3473fe6060f1SDimitry Andric return isValidState() && PotentialRemovedFreeCalls.count(&CB); 3474fe6060f1SDimitry Andric } 3475fe6060f1SDimitry Andric 3476fe6060f1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 3477fe6060f1SDimitry Andric if (MallocCalls.empty()) 3478fe6060f1SDimitry Andric return ChangeStatus::UNCHANGED; 3479fe6060f1SDimitry Andric 3480fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 3481fe6060f1SDimitry Andric auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared]; 3482fe6060f1SDimitry Andric 3483fe6060f1SDimitry Andric Function *F = getAnchorScope(); 3484fe6060f1SDimitry Andric auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this, 3485fe6060f1SDimitry Andric DepClassTy::OPTIONAL); 3486fe6060f1SDimitry Andric 3487fe6060f1SDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED; 3488fe6060f1SDimitry Andric for (CallBase *CB : MallocCalls) { 3489fe6060f1SDimitry Andric // Skip replacing this if HeapToStack has already claimed it. 3490fe6060f1SDimitry Andric if (HS && HS->isAssumedHeapToStack(*CB)) 3491fe6060f1SDimitry Andric continue; 3492fe6060f1SDimitry Andric 3493fe6060f1SDimitry Andric // Find the unique free call to remove it. 3494fe6060f1SDimitry Andric SmallVector<CallBase *, 4> FreeCalls; 3495fe6060f1SDimitry Andric for (auto *U : CB->users()) { 3496fe6060f1SDimitry Andric CallBase *C = dyn_cast<CallBase>(U); 3497fe6060f1SDimitry Andric if (C && C->getCalledFunction() == FreeCall.Declaration) 3498fe6060f1SDimitry Andric FreeCalls.push_back(C); 3499fe6060f1SDimitry Andric } 3500fe6060f1SDimitry Andric if (FreeCalls.size() != 1) 3501fe6060f1SDimitry Andric continue; 3502fe6060f1SDimitry Andric 350304eeddc0SDimitry Andric auto *AllocSize = cast<ConstantInt>(CB->getArgOperand(0)); 3504fe6060f1SDimitry Andric 350581ad6265SDimitry Andric if (AllocSize->getZExtValue() + SharedMemoryUsed > SharedMemoryLimit) { 350681ad6265SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Cannot replace call " << *CB 350781ad6265SDimitry Andric << " with shared memory." 350881ad6265SDimitry Andric << " Shared memory usage is limited to " 350981ad6265SDimitry Andric << SharedMemoryLimit << " bytes\n"); 351081ad6265SDimitry Andric continue; 351181ad6265SDimitry Andric } 351281ad6265SDimitry Andric 3513349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CB 3514349cc55cSDimitry Andric << " with " << AllocSize->getZExtValue() 3515fe6060f1SDimitry Andric << " bytes of shared memory\n"); 3516fe6060f1SDimitry Andric 3517fe6060f1SDimitry Andric // Create a new shared memory buffer of the same size as the allocation 3518fe6060f1SDimitry Andric // and replace all the uses of the original allocation with it. 3519fe6060f1SDimitry Andric Module *M = CB->getModule(); 3520fe6060f1SDimitry Andric Type *Int8Ty = Type::getInt8Ty(M->getContext()); 3521fe6060f1SDimitry Andric Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue()); 3522fe6060f1SDimitry Andric auto *SharedMem = new GlobalVariable( 3523fe6060f1SDimitry Andric *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage, 352406c3fb27SDimitry Andric PoisonValue::get(Int8ArrTy), CB->getName() + "_shared", nullptr, 3525fe6060f1SDimitry Andric GlobalValue::NotThreadLocal, 3526fe6060f1SDimitry Andric static_cast<unsigned>(AddressSpace::Shared)); 3527fe6060f1SDimitry Andric auto *NewBuffer = 3528fe6060f1SDimitry Andric ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo()); 3529fe6060f1SDimitry Andric 3530fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemark OR) { 3531fe6060f1SDimitry Andric return OR << "Replaced globalized variable with " 3532fe6060f1SDimitry Andric << ore::NV("SharedMemory", AllocSize->getZExtValue()) 353306c3fb27SDimitry Andric << (AllocSize->isOne() ? " byte " : " bytes ") 3534fe6060f1SDimitry Andric << "of shared memory."; 3535fe6060f1SDimitry Andric }; 3536fe6060f1SDimitry Andric A.emitRemark<OptimizationRemark>(CB, "OMP111", Remark); 3537fe6060f1SDimitry Andric 353804eeddc0SDimitry Andric MaybeAlign Alignment = CB->getRetAlign(); 353904eeddc0SDimitry Andric assert(Alignment && 354004eeddc0SDimitry Andric "HeapToShared on allocation without alignment attribute"); 354106c3fb27SDimitry Andric SharedMem->setAlignment(*Alignment); 3542fe6060f1SDimitry Andric 354381ad6265SDimitry Andric A.changeAfterManifest(IRPosition::callsite_returned(*CB), *NewBuffer); 3544fe6060f1SDimitry Andric A.deleteAfterManifest(*CB); 3545fe6060f1SDimitry Andric A.deleteAfterManifest(*FreeCalls.front()); 3546fe6060f1SDimitry Andric 354781ad6265SDimitry Andric SharedMemoryUsed += AllocSize->getZExtValue(); 354881ad6265SDimitry Andric NumBytesMovedToSharedMemory = SharedMemoryUsed; 3549fe6060f1SDimitry Andric Changed = ChangeStatus::CHANGED; 3550fe6060f1SDimitry Andric } 3551fe6060f1SDimitry Andric 3552fe6060f1SDimitry Andric return Changed; 3553fe6060f1SDimitry Andric } 3554fe6060f1SDimitry Andric 3555fe6060f1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 3556bdd1243dSDimitry Andric if (MallocCalls.empty()) 3557bdd1243dSDimitry Andric return indicatePessimisticFixpoint(); 3558fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 3559fe6060f1SDimitry Andric auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; 3560bdd1243dSDimitry Andric if (!RFI.Declaration) 3561bdd1243dSDimitry Andric return ChangeStatus::UNCHANGED; 3562bdd1243dSDimitry Andric 3563fe6060f1SDimitry Andric Function *F = getAnchorScope(); 3564fe6060f1SDimitry Andric 3565fe6060f1SDimitry Andric auto NumMallocCalls = MallocCalls.size(); 3566fe6060f1SDimitry Andric 3567fe6060f1SDimitry Andric // Only consider malloc calls executed by a single thread with a constant. 3568fe6060f1SDimitry Andric for (User *U : RFI.Declaration->users()) { 3569bdd1243dSDimitry Andric if (CallBase *CB = dyn_cast<CallBase>(U)) { 3570bdd1243dSDimitry Andric if (CB->getCaller() != F) 3571bdd1243dSDimitry Andric continue; 3572bdd1243dSDimitry Andric if (!MallocCalls.count(CB)) 3573bdd1243dSDimitry Andric continue; 3574bdd1243dSDimitry Andric if (!isa<ConstantInt>(CB->getArgOperand(0))) { 3575bdd1243dSDimitry Andric MallocCalls.remove(CB); 3576bdd1243dSDimitry Andric continue; 3577bdd1243dSDimitry Andric } 357806c3fb27SDimitry Andric const auto *ED = A.getAAFor<AAExecutionDomain>( 3579fe6060f1SDimitry Andric *this, IRPosition::function(*F), DepClassTy::REQUIRED); 358006c3fb27SDimitry Andric if (!ED || !ED->isExecutedByInitialThreadOnly(*CB)) 358104eeddc0SDimitry Andric MallocCalls.remove(CB); 3582fe6060f1SDimitry Andric } 3583bdd1243dSDimitry Andric } 3584fe6060f1SDimitry Andric 3585fe6060f1SDimitry Andric findPotentialRemovedFreeCalls(A); 3586fe6060f1SDimitry Andric 3587fe6060f1SDimitry Andric if (NumMallocCalls != MallocCalls.size()) 3588fe6060f1SDimitry Andric return ChangeStatus::CHANGED; 3589fe6060f1SDimitry Andric 3590fe6060f1SDimitry Andric return ChangeStatus::UNCHANGED; 3591fe6060f1SDimitry Andric } 3592fe6060f1SDimitry Andric 3593fe6060f1SDimitry Andric /// Collection of all malloc calls in a function. 359404eeddc0SDimitry Andric SmallSetVector<CallBase *, 4> MallocCalls; 3595fe6060f1SDimitry Andric /// Collection of potentially removed free calls in a function. 3596fe6060f1SDimitry Andric SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls; 359781ad6265SDimitry Andric /// The total amount of shared memory that has been used for HeapToShared. 359881ad6265SDimitry Andric unsigned SharedMemoryUsed = 0; 3599fe6060f1SDimitry Andric }; 3600fe6060f1SDimitry Andric 3601fe6060f1SDimitry Andric struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> { 3602fe6060f1SDimitry Andric using Base = StateWrapper<KernelInfoState, AbstractAttribute>; 3603fe6060f1SDimitry Andric AAKernelInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 3604fe6060f1SDimitry Andric 36055f757f3fSDimitry Andric /// The callee value is tracked beyond a simple stripPointerCasts, so we allow 36065f757f3fSDimitry Andric /// unknown callees. 36075f757f3fSDimitry Andric static bool requiresCalleeForCallBase() { return false; } 36085f757f3fSDimitry Andric 3609fe6060f1SDimitry Andric /// Statistics are tracked as part of manifest for now. 3610fe6060f1SDimitry Andric void trackStatistics() const override {} 3611fe6060f1SDimitry Andric 3612fe6060f1SDimitry Andric /// See AbstractAttribute::getAsStr() 361306c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 3614fe6060f1SDimitry Andric if (!isValidState()) 3615fe6060f1SDimitry Andric return "<invalid>"; 3616fe6060f1SDimitry Andric return std::string(SPMDCompatibilityTracker.isAssumed() ? "SPMD" 3617fe6060f1SDimitry Andric : "generic") + 3618fe6060f1SDimitry Andric std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]" 3619fe6060f1SDimitry Andric : "") + 3620fe6060f1SDimitry Andric std::string(" #PRs: ") + 3621349cc55cSDimitry Andric (ReachedKnownParallelRegions.isValidState() 3622349cc55cSDimitry Andric ? std::to_string(ReachedKnownParallelRegions.size()) 3623349cc55cSDimitry Andric : "<invalid>") + 3624fe6060f1SDimitry Andric ", #Unknown PRs: " + 3625349cc55cSDimitry Andric (ReachedUnknownParallelRegions.isValidState() 3626349cc55cSDimitry Andric ? std::to_string(ReachedUnknownParallelRegions.size()) 3627349cc55cSDimitry Andric : "<invalid>") + 3628349cc55cSDimitry Andric ", #Reaching Kernels: " + 3629349cc55cSDimitry Andric (ReachingKernelEntries.isValidState() 3630349cc55cSDimitry Andric ? std::to_string(ReachingKernelEntries.size()) 3631bdd1243dSDimitry Andric : "<invalid>") + 3632bdd1243dSDimitry Andric ", #ParLevels: " + 3633bdd1243dSDimitry Andric (ParallelLevels.isValidState() 3634bdd1243dSDimitry Andric ? std::to_string(ParallelLevels.size()) 36355f757f3fSDimitry Andric : "<invalid>") + 36365f757f3fSDimitry Andric ", NestedPar: " + (NestedParallelism ? "yes" : "no"); 3637fe6060f1SDimitry Andric } 3638fe6060f1SDimitry Andric 3639fe6060f1SDimitry Andric /// Create an abstract attribute biew for the position \p IRP. 3640fe6060f1SDimitry Andric static AAKernelInfo &createForPosition(const IRPosition &IRP, Attributor &A); 3641fe6060f1SDimitry Andric 3642fe6060f1SDimitry Andric /// See AbstractAttribute::getName() 3643fe6060f1SDimitry Andric const std::string getName() const override { return "AAKernelInfo"; } 3644fe6060f1SDimitry Andric 3645fe6060f1SDimitry Andric /// See AbstractAttribute::getIdAddr() 3646fe6060f1SDimitry Andric const char *getIdAddr() const override { return &ID; } 3647fe6060f1SDimitry Andric 3648fe6060f1SDimitry Andric /// This function should return true if the type of the \p AA is AAKernelInfo 3649fe6060f1SDimitry Andric static bool classof(const AbstractAttribute *AA) { 3650fe6060f1SDimitry Andric return (AA->getIdAddr() == &ID); 3651fe6060f1SDimitry Andric } 3652fe6060f1SDimitry Andric 3653fe6060f1SDimitry Andric static const char ID; 3654fe6060f1SDimitry Andric }; 3655fe6060f1SDimitry Andric 3656fe6060f1SDimitry Andric /// The function kernel info abstract attribute, basically, what can we say 3657fe6060f1SDimitry Andric /// about a function with regards to the KernelInfoState. 3658fe6060f1SDimitry Andric struct AAKernelInfoFunction : AAKernelInfo { 3659fe6060f1SDimitry Andric AAKernelInfoFunction(const IRPosition &IRP, Attributor &A) 3660fe6060f1SDimitry Andric : AAKernelInfo(IRP, A) {} 3661fe6060f1SDimitry Andric 3662349cc55cSDimitry Andric SmallPtrSet<Instruction *, 4> GuardedInstructions; 3663349cc55cSDimitry Andric 3664349cc55cSDimitry Andric SmallPtrSetImpl<Instruction *> &getGuardedInstructions() { 3665349cc55cSDimitry Andric return GuardedInstructions; 3666349cc55cSDimitry Andric } 3667349cc55cSDimitry Andric 36685f757f3fSDimitry Andric void setConfigurationOfKernelEnvironment(ConstantStruct *ConfigC) { 36695f757f3fSDimitry Andric Constant *NewKernelEnvC = ConstantFoldInsertValueInstruction( 36705f757f3fSDimitry Andric KernelEnvC, ConfigC, {KernelInfo::ConfigurationIdx}); 36715f757f3fSDimitry Andric assert(NewKernelEnvC && "Failed to create new kernel environment"); 36725f757f3fSDimitry Andric KernelEnvC = cast<ConstantStruct>(NewKernelEnvC); 36735f757f3fSDimitry Andric } 36745f757f3fSDimitry Andric 36755f757f3fSDimitry Andric #define KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MEMBER) \ 36765f757f3fSDimitry Andric void set##MEMBER##OfKernelEnvironment(ConstantInt *NewVal) { \ 36775f757f3fSDimitry Andric ConstantStruct *ConfigC = \ 36785f757f3fSDimitry Andric KernelInfo::getConfigurationFromKernelEnvironment(KernelEnvC); \ 36795f757f3fSDimitry Andric Constant *NewConfigC = ConstantFoldInsertValueInstruction( \ 36805f757f3fSDimitry Andric ConfigC, NewVal, {KernelInfo::MEMBER##Idx}); \ 36815f757f3fSDimitry Andric assert(NewConfigC && "Failed to create new configuration environment"); \ 36825f757f3fSDimitry Andric setConfigurationOfKernelEnvironment(cast<ConstantStruct>(NewConfigC)); \ 36835f757f3fSDimitry Andric } 36845f757f3fSDimitry Andric 36855f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(UseGenericStateMachine) 36865f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MayUseNestedParallelism) 36875f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(ExecMode) 36885f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MinThreads) 36895f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MaxThreads) 36905f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MinTeams) 36915f757f3fSDimitry Andric KERNEL_ENVIRONMENT_CONFIGURATION_SETTER(MaxTeams) 36925f757f3fSDimitry Andric 36935f757f3fSDimitry Andric #undef KERNEL_ENVIRONMENT_CONFIGURATION_SETTER 36945f757f3fSDimitry Andric 3695fe6060f1SDimitry Andric /// See AbstractAttribute::initialize(...). 3696fe6060f1SDimitry Andric void initialize(Attributor &A) override { 3697fe6060f1SDimitry Andric // This is a high-level transform that might change the constant arguments 3698fe6060f1SDimitry Andric // of the init and dinit calls. We need to tell the Attributor about this 3699fe6060f1SDimitry Andric // to avoid other parts using the current constant value for simpliication. 3700fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 3701fe6060f1SDimitry Andric 3702fe6060f1SDimitry Andric Function *Fn = getAnchorScope(); 3703fe6060f1SDimitry Andric 3704fe6060f1SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &InitRFI = 3705fe6060f1SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; 3706fe6060f1SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &DeinitRFI = 3707fe6060f1SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_target_deinit]; 3708fe6060f1SDimitry Andric 3709fe6060f1SDimitry Andric // For kernels we perform more initialization work, first we find the init 3710fe6060f1SDimitry Andric // and deinit calls. 3711fe6060f1SDimitry Andric auto StoreCallBase = [](Use &U, 3712fe6060f1SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &RFI, 3713fe6060f1SDimitry Andric CallBase *&Storage) { 3714fe6060f1SDimitry Andric CallBase *CB = OpenMPOpt::getCallIfRegularCall(U, &RFI); 3715fe6060f1SDimitry Andric assert(CB && 3716fe6060f1SDimitry Andric "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!"); 3717fe6060f1SDimitry Andric assert(!Storage && 3718fe6060f1SDimitry Andric "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!"); 3719fe6060f1SDimitry Andric Storage = CB; 3720fe6060f1SDimitry Andric return false; 3721fe6060f1SDimitry Andric }; 3722fe6060f1SDimitry Andric InitRFI.foreachUse( 3723fe6060f1SDimitry Andric [&](Use &U, Function &) { 3724fe6060f1SDimitry Andric StoreCallBase(U, InitRFI, KernelInitCB); 3725fe6060f1SDimitry Andric return false; 3726fe6060f1SDimitry Andric }, 3727fe6060f1SDimitry Andric Fn); 3728fe6060f1SDimitry Andric DeinitRFI.foreachUse( 3729fe6060f1SDimitry Andric [&](Use &U, Function &) { 3730fe6060f1SDimitry Andric StoreCallBase(U, DeinitRFI, KernelDeinitCB); 3731fe6060f1SDimitry Andric return false; 3732fe6060f1SDimitry Andric }, 3733fe6060f1SDimitry Andric Fn); 3734fe6060f1SDimitry Andric 3735349cc55cSDimitry Andric // Ignore kernels without initializers such as global constructors. 373681ad6265SDimitry Andric if (!KernelInitCB || !KernelDeinitCB) 3737349cc55cSDimitry Andric return; 373881ad6265SDimitry Andric 373981ad6265SDimitry Andric // Add itself to the reaching kernel and set IsKernelEntry. 374081ad6265SDimitry Andric ReachingKernelEntries.insert(Fn); 374181ad6265SDimitry Andric IsKernelEntry = true; 3742fe6060f1SDimitry Andric 37435f757f3fSDimitry Andric KernelEnvC = 37445f757f3fSDimitry Andric KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB); 37455f757f3fSDimitry Andric GlobalVariable *KernelEnvGV = 37465f757f3fSDimitry Andric KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB); 3747fe6060f1SDimitry Andric 37485f757f3fSDimitry Andric Attributor::GlobalVariableSimplifictionCallbackTy 37495f757f3fSDimitry Andric KernelConfigurationSimplifyCB = 37505f757f3fSDimitry Andric [&](const GlobalVariable &GV, const AbstractAttribute *AA, 37515f757f3fSDimitry Andric bool &UsedAssumedInformation) -> std::optional<Constant *> { 37525f757f3fSDimitry Andric if (!isAtFixpoint()) { 37535f757f3fSDimitry Andric if (!AA) 3754349cc55cSDimitry Andric return nullptr; 3755fe6060f1SDimitry Andric UsedAssumedInformation = true; 37565f757f3fSDimitry Andric A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); 3757fe6060f1SDimitry Andric } 37585f757f3fSDimitry Andric return KernelEnvC; 3759fe6060f1SDimitry Andric }; 3760fe6060f1SDimitry Andric 37615f757f3fSDimitry Andric A.registerGlobalVariableSimplificationCallback( 37625f757f3fSDimitry Andric *KernelEnvGV, KernelConfigurationSimplifyCB); 3763fe6060f1SDimitry Andric 3764fe6060f1SDimitry Andric // Check if we know we are in SPMD-mode already. 37655f757f3fSDimitry Andric ConstantInt *ExecModeC = 37665f757f3fSDimitry Andric KernelInfo::getExecModeFromKernelEnvironment(KernelEnvC); 37675f757f3fSDimitry Andric ConstantInt *AssumedExecModeC = ConstantInt::get( 3768cb14a3feSDimitry Andric ExecModeC->getIntegerType(), 37695f757f3fSDimitry Andric ExecModeC->getSExtValue() | OMP_TGT_EXEC_MODE_GENERIC_SPMD); 37705f757f3fSDimitry Andric if (ExecModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD) 3771fe6060f1SDimitry Andric SPMDCompatibilityTracker.indicateOptimisticFixpoint(); 3772349cc55cSDimitry Andric else if (DisableOpenMPOptSPMDization) 37735f757f3fSDimitry Andric // This is a generic region but SPMDization is disabled so stop 37745f757f3fSDimitry Andric // tracking. 3775349cc55cSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 37765f757f3fSDimitry Andric else 37775f757f3fSDimitry Andric setExecModeOfKernelEnvironment(AssumedExecModeC); 37785f757f3fSDimitry Andric 37795f757f3fSDimitry Andric const Triple T(Fn->getParent()->getTargetTriple()); 37805f757f3fSDimitry Andric auto *Int32Ty = Type::getInt32Ty(Fn->getContext()); 37815f757f3fSDimitry Andric auto [MinThreads, MaxThreads] = 37825f757f3fSDimitry Andric OpenMPIRBuilder::readThreadBoundsForKernel(T, *Fn); 37835f757f3fSDimitry Andric if (MinThreads) 37845f757f3fSDimitry Andric setMinThreadsOfKernelEnvironment(ConstantInt::get(Int32Ty, MinThreads)); 37855f757f3fSDimitry Andric if (MaxThreads) 37865f757f3fSDimitry Andric setMaxThreadsOfKernelEnvironment(ConstantInt::get(Int32Ty, MaxThreads)); 37875f757f3fSDimitry Andric auto [MinTeams, MaxTeams] = 37885f757f3fSDimitry Andric OpenMPIRBuilder::readTeamBoundsForKernel(T, *Fn); 37895f757f3fSDimitry Andric if (MinTeams) 37905f757f3fSDimitry Andric setMinTeamsOfKernelEnvironment(ConstantInt::get(Int32Ty, MinTeams)); 37915f757f3fSDimitry Andric if (MaxTeams) 37925f757f3fSDimitry Andric setMaxTeamsOfKernelEnvironment(ConstantInt::get(Int32Ty, MaxTeams)); 37935f757f3fSDimitry Andric 37945f757f3fSDimitry Andric ConstantInt *MayUseNestedParallelismC = 37955f757f3fSDimitry Andric KernelInfo::getMayUseNestedParallelismFromKernelEnvironment(KernelEnvC); 37965f757f3fSDimitry Andric ConstantInt *AssumedMayUseNestedParallelismC = ConstantInt::get( 3797cb14a3feSDimitry Andric MayUseNestedParallelismC->getIntegerType(), NestedParallelism); 37985f757f3fSDimitry Andric setMayUseNestedParallelismOfKernelEnvironment( 37995f757f3fSDimitry Andric AssumedMayUseNestedParallelismC); 38005f757f3fSDimitry Andric 38015f757f3fSDimitry Andric if (!DisableOpenMPOptStateMachineRewrite) { 38025f757f3fSDimitry Andric ConstantInt *UseGenericStateMachineC = 38035f757f3fSDimitry Andric KernelInfo::getUseGenericStateMachineFromKernelEnvironment( 38045f757f3fSDimitry Andric KernelEnvC); 38055f757f3fSDimitry Andric ConstantInt *AssumedUseGenericStateMachineC = 3806cb14a3feSDimitry Andric ConstantInt::get(UseGenericStateMachineC->getIntegerType(), false); 38075f757f3fSDimitry Andric setUseGenericStateMachineOfKernelEnvironment( 38085f757f3fSDimitry Andric AssumedUseGenericStateMachineC); 38095f757f3fSDimitry Andric } 3810bdd1243dSDimitry Andric 3811bdd1243dSDimitry Andric // Register virtual uses of functions we might need to preserve. 3812bdd1243dSDimitry Andric auto RegisterVirtualUse = [&](RuntimeFunction RFKind, 3813bdd1243dSDimitry Andric Attributor::VirtualUseCallbackTy &CB) { 3814bdd1243dSDimitry Andric if (!OMPInfoCache.RFIs[RFKind].Declaration) 3815bdd1243dSDimitry Andric return; 3816bdd1243dSDimitry Andric A.registerVirtualUseCallback(*OMPInfoCache.RFIs[RFKind].Declaration, CB); 3817bdd1243dSDimitry Andric }; 3818bdd1243dSDimitry Andric 3819bdd1243dSDimitry Andric // Add a dependence to ensure updates if the state changes. 3820bdd1243dSDimitry Andric auto AddDependence = [](Attributor &A, const AAKernelInfo *KI, 3821bdd1243dSDimitry Andric const AbstractAttribute *QueryingAA) { 3822bdd1243dSDimitry Andric if (QueryingAA) { 3823bdd1243dSDimitry Andric A.recordDependence(*KI, *QueryingAA, DepClassTy::OPTIONAL); 3824bdd1243dSDimitry Andric } 3825bdd1243dSDimitry Andric return true; 3826bdd1243dSDimitry Andric }; 3827bdd1243dSDimitry Andric 3828bdd1243dSDimitry Andric Attributor::VirtualUseCallbackTy CustomStateMachineUseCB = 3829bdd1243dSDimitry Andric [&](Attributor &A, const AbstractAttribute *QueryingAA) { 3830bdd1243dSDimitry Andric // Whenever we create a custom state machine we will insert calls to 3831bdd1243dSDimitry Andric // __kmpc_get_hardware_num_threads_in_block, 3832bdd1243dSDimitry Andric // __kmpc_get_warp_size, 3833bdd1243dSDimitry Andric // __kmpc_barrier_simple_generic, 3834bdd1243dSDimitry Andric // __kmpc_kernel_parallel, and 3835bdd1243dSDimitry Andric // __kmpc_kernel_end_parallel. 3836bdd1243dSDimitry Andric // Not needed if we are on track for SPMDzation. 3837bdd1243dSDimitry Andric if (SPMDCompatibilityTracker.isValidState()) 3838bdd1243dSDimitry Andric return AddDependence(A, this, QueryingAA); 3839bdd1243dSDimitry Andric // Not needed if we can't rewrite due to an invalid state. 3840bdd1243dSDimitry Andric if (!ReachedKnownParallelRegions.isValidState()) 3841bdd1243dSDimitry Andric return AddDependence(A, this, QueryingAA); 3842bdd1243dSDimitry Andric return false; 3843bdd1243dSDimitry Andric }; 3844bdd1243dSDimitry Andric 3845bdd1243dSDimitry Andric // Not needed if we are pre-runtime merge. 3846bdd1243dSDimitry Andric if (!KernelInitCB->getCalledFunction()->isDeclaration()) { 3847bdd1243dSDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_get_hardware_num_threads_in_block, 3848bdd1243dSDimitry Andric CustomStateMachineUseCB); 3849bdd1243dSDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_get_warp_size, CustomStateMachineUseCB); 3850bdd1243dSDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_barrier_simple_generic, 3851bdd1243dSDimitry Andric CustomStateMachineUseCB); 3852bdd1243dSDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_kernel_parallel, 3853bdd1243dSDimitry Andric CustomStateMachineUseCB); 3854bdd1243dSDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_kernel_end_parallel, 3855bdd1243dSDimitry Andric CustomStateMachineUseCB); 3856bdd1243dSDimitry Andric } 3857bdd1243dSDimitry Andric 3858bdd1243dSDimitry Andric // If we do not perform SPMDzation we do not need the virtual uses below. 3859bdd1243dSDimitry Andric if (SPMDCompatibilityTracker.isAtFixpoint()) 3860bdd1243dSDimitry Andric return; 3861bdd1243dSDimitry Andric 3862bdd1243dSDimitry Andric Attributor::VirtualUseCallbackTy HWThreadIdUseCB = 3863bdd1243dSDimitry Andric [&](Attributor &A, const AbstractAttribute *QueryingAA) { 3864bdd1243dSDimitry Andric // Whenever we perform SPMDzation we will insert 3865bdd1243dSDimitry Andric // __kmpc_get_hardware_thread_id_in_block calls. 3866bdd1243dSDimitry Andric if (!SPMDCompatibilityTracker.isValidState()) 3867bdd1243dSDimitry Andric return AddDependence(A, this, QueryingAA); 3868bdd1243dSDimitry Andric return false; 3869bdd1243dSDimitry Andric }; 3870bdd1243dSDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_get_hardware_thread_id_in_block, 3871bdd1243dSDimitry Andric HWThreadIdUseCB); 3872bdd1243dSDimitry Andric 3873bdd1243dSDimitry Andric Attributor::VirtualUseCallbackTy SPMDBarrierUseCB = 3874bdd1243dSDimitry Andric [&](Attributor &A, const AbstractAttribute *QueryingAA) { 3875bdd1243dSDimitry Andric // Whenever we perform SPMDzation with guarding we will insert 3876bdd1243dSDimitry Andric // __kmpc_simple_barrier_spmd calls. If SPMDzation failed, there is 3877bdd1243dSDimitry Andric // nothing to guard, or there are no parallel regions, we don't need 3878bdd1243dSDimitry Andric // the calls. 3879bdd1243dSDimitry Andric if (!SPMDCompatibilityTracker.isValidState()) 3880bdd1243dSDimitry Andric return AddDependence(A, this, QueryingAA); 3881bdd1243dSDimitry Andric if (SPMDCompatibilityTracker.empty()) 3882bdd1243dSDimitry Andric return AddDependence(A, this, QueryingAA); 3883bdd1243dSDimitry Andric if (!mayContainParallelRegion()) 3884bdd1243dSDimitry Andric return AddDependence(A, this, QueryingAA); 3885bdd1243dSDimitry Andric return false; 3886bdd1243dSDimitry Andric }; 3887bdd1243dSDimitry Andric RegisterVirtualUse(OMPRTL___kmpc_barrier_simple_spmd, SPMDBarrierUseCB); 3888349cc55cSDimitry Andric } 3889349cc55cSDimitry Andric 3890349cc55cSDimitry Andric /// Sanitize the string \p S such that it is a suitable global symbol name. 3891349cc55cSDimitry Andric static std::string sanitizeForGlobalName(std::string S) { 3892349cc55cSDimitry Andric std::replace_if( 3893349cc55cSDimitry Andric S.begin(), S.end(), 3894349cc55cSDimitry Andric [](const char C) { 3895349cc55cSDimitry Andric return !((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || 3896349cc55cSDimitry Andric (C >= '0' && C <= '9') || C == '_'); 3897349cc55cSDimitry Andric }, 3898349cc55cSDimitry Andric '.'); 3899349cc55cSDimitry Andric return S; 3900fe6060f1SDimitry Andric } 3901fe6060f1SDimitry Andric 3902fe6060f1SDimitry Andric /// Modify the IR based on the KernelInfoState as the fixpoint iteration is 3903fe6060f1SDimitry Andric /// finished now. 3904fe6060f1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 3905fe6060f1SDimitry Andric // If we are not looking at a kernel with __kmpc_target_init and 3906fe6060f1SDimitry Andric // __kmpc_target_deinit call we cannot actually manifest the information. 3907fe6060f1SDimitry Andric if (!KernelInitCB || !KernelDeinitCB) 3908fe6060f1SDimitry Andric return ChangeStatus::UNCHANGED; 3909fe6060f1SDimitry Andric 3910349cc55cSDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED; 39115f757f3fSDimitry Andric 39125f757f3fSDimitry Andric bool HasBuiltStateMachine = true; 3913bdd1243dSDimitry Andric if (!changeToSPMDMode(A, Changed)) { 3914bdd1243dSDimitry Andric if (!KernelInitCB->getCalledFunction()->isDeclaration()) 39155f757f3fSDimitry Andric HasBuiltStateMachine = buildCustomStateMachine(A, Changed); 39165f757f3fSDimitry Andric else 39175f757f3fSDimitry Andric HasBuiltStateMachine = false; 39185f757f3fSDimitry Andric } 39195f757f3fSDimitry Andric 39205f757f3fSDimitry Andric // We need to reset KernelEnvC if specific rewriting is not done. 39215f757f3fSDimitry Andric ConstantStruct *ExistingKernelEnvC = 39225f757f3fSDimitry Andric KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB); 39235f757f3fSDimitry Andric ConstantInt *OldUseGenericStateMachineVal = 39245f757f3fSDimitry Andric KernelInfo::getUseGenericStateMachineFromKernelEnvironment( 39255f757f3fSDimitry Andric ExistingKernelEnvC); 39265f757f3fSDimitry Andric if (!HasBuiltStateMachine) 39275f757f3fSDimitry Andric setUseGenericStateMachineOfKernelEnvironment( 39285f757f3fSDimitry Andric OldUseGenericStateMachineVal); 39295f757f3fSDimitry Andric 39305f757f3fSDimitry Andric // At last, update the KernelEnvc 39315f757f3fSDimitry Andric GlobalVariable *KernelEnvGV = 39325f757f3fSDimitry Andric KernelInfo::getKernelEnvironementGVFromKernelInitCB(KernelInitCB); 39335f757f3fSDimitry Andric if (KernelEnvGV->getInitializer() != KernelEnvC) { 39345f757f3fSDimitry Andric KernelEnvGV->setInitializer(KernelEnvC); 39355f757f3fSDimitry Andric Changed = ChangeStatus::CHANGED; 3936bdd1243dSDimitry Andric } 3937fe6060f1SDimitry Andric 3938349cc55cSDimitry Andric return Changed; 3939fe6060f1SDimitry Andric } 3940fe6060f1SDimitry Andric 3941bdd1243dSDimitry Andric void insertInstructionGuardsHelper(Attributor &A) { 3942fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 3943fe6060f1SDimitry Andric 3944349cc55cSDimitry Andric auto CreateGuardedRegion = [&](Instruction *RegionStartI, 3945349cc55cSDimitry Andric Instruction *RegionEndI) { 3946349cc55cSDimitry Andric LoopInfo *LI = nullptr; 3947349cc55cSDimitry Andric DominatorTree *DT = nullptr; 3948349cc55cSDimitry Andric MemorySSAUpdater *MSU = nullptr; 3949349cc55cSDimitry Andric using InsertPointTy = OpenMPIRBuilder::InsertPointTy; 3950349cc55cSDimitry Andric 3951349cc55cSDimitry Andric BasicBlock *ParentBB = RegionStartI->getParent(); 3952349cc55cSDimitry Andric Function *Fn = ParentBB->getParent(); 3953349cc55cSDimitry Andric Module &M = *Fn->getParent(); 3954349cc55cSDimitry Andric 3955349cc55cSDimitry Andric // Create all the blocks and logic. 3956349cc55cSDimitry Andric // ParentBB: 3957349cc55cSDimitry Andric // goto RegionCheckTidBB 3958349cc55cSDimitry Andric // RegionCheckTidBB: 3959349cc55cSDimitry Andric // Tid = __kmpc_hardware_thread_id() 3960349cc55cSDimitry Andric // if (Tid != 0) 3961349cc55cSDimitry Andric // goto RegionBarrierBB 3962349cc55cSDimitry Andric // RegionStartBB: 3963349cc55cSDimitry Andric // <execute instructions guarded> 3964349cc55cSDimitry Andric // goto RegionEndBB 3965349cc55cSDimitry Andric // RegionEndBB: 3966349cc55cSDimitry Andric // <store escaping values to shared mem> 3967349cc55cSDimitry Andric // goto RegionBarrierBB 3968349cc55cSDimitry Andric // RegionBarrierBB: 3969349cc55cSDimitry Andric // __kmpc_simple_barrier_spmd() 3970349cc55cSDimitry Andric // // second barrier is omitted if lacking escaping values. 3971349cc55cSDimitry Andric // <load escaping values from shared mem> 3972349cc55cSDimitry Andric // __kmpc_simple_barrier_spmd() 3973349cc55cSDimitry Andric // goto RegionExitBB 3974349cc55cSDimitry Andric // RegionExitBB: 3975349cc55cSDimitry Andric // <execute rest of instructions> 3976349cc55cSDimitry Andric 3977349cc55cSDimitry Andric BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(), 3978349cc55cSDimitry Andric DT, LI, MSU, "region.guarded.end"); 3979349cc55cSDimitry Andric BasicBlock *RegionBarrierBB = 3980349cc55cSDimitry Andric SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI, 3981349cc55cSDimitry Andric MSU, "region.barrier"); 3982349cc55cSDimitry Andric BasicBlock *RegionExitBB = 3983349cc55cSDimitry Andric SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(), 3984349cc55cSDimitry Andric DT, LI, MSU, "region.exit"); 3985349cc55cSDimitry Andric BasicBlock *RegionStartBB = 3986349cc55cSDimitry Andric SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded"); 3987349cc55cSDimitry Andric 3988349cc55cSDimitry Andric assert(ParentBB->getUniqueSuccessor() == RegionStartBB && 3989349cc55cSDimitry Andric "Expected a different CFG"); 3990349cc55cSDimitry Andric 3991349cc55cSDimitry Andric BasicBlock *RegionCheckTidBB = SplitBlock( 3992349cc55cSDimitry Andric ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid"); 3993349cc55cSDimitry Andric 3994349cc55cSDimitry Andric // Register basic blocks with the Attributor. 3995349cc55cSDimitry Andric A.registerManifestAddedBasicBlock(*RegionEndBB); 3996349cc55cSDimitry Andric A.registerManifestAddedBasicBlock(*RegionBarrierBB); 3997349cc55cSDimitry Andric A.registerManifestAddedBasicBlock(*RegionExitBB); 3998349cc55cSDimitry Andric A.registerManifestAddedBasicBlock(*RegionStartBB); 3999349cc55cSDimitry Andric A.registerManifestAddedBasicBlock(*RegionCheckTidBB); 4000349cc55cSDimitry Andric 4001349cc55cSDimitry Andric bool HasBroadcastValues = false; 4002349cc55cSDimitry Andric // Find escaping outputs from the guarded region to outside users and 4003349cc55cSDimitry Andric // broadcast their values to them. 4004349cc55cSDimitry Andric for (Instruction &I : *RegionStartBB) { 40055f757f3fSDimitry Andric SmallVector<Use *, 4> OutsideUses; 40065f757f3fSDimitry Andric for (Use &U : I.uses()) { 40075f757f3fSDimitry Andric Instruction &UsrI = *cast<Instruction>(U.getUser()); 4008349cc55cSDimitry Andric if (UsrI.getParent() != RegionStartBB) 40095f757f3fSDimitry Andric OutsideUses.push_back(&U); 4010349cc55cSDimitry Andric } 4011349cc55cSDimitry Andric 40125f757f3fSDimitry Andric if (OutsideUses.empty()) 4013349cc55cSDimitry Andric continue; 4014349cc55cSDimitry Andric 4015349cc55cSDimitry Andric HasBroadcastValues = true; 4016349cc55cSDimitry Andric 4017349cc55cSDimitry Andric // Emit a global variable in shared memory to store the broadcasted 4018349cc55cSDimitry Andric // value. 4019349cc55cSDimitry Andric auto *SharedMem = new GlobalVariable( 4020349cc55cSDimitry Andric M, I.getType(), /* IsConstant */ false, 4021349cc55cSDimitry Andric GlobalValue::InternalLinkage, UndefValue::get(I.getType()), 4022349cc55cSDimitry Andric sanitizeForGlobalName( 4023349cc55cSDimitry Andric (I.getName() + ".guarded.output.alloc").str()), 4024349cc55cSDimitry Andric nullptr, GlobalValue::NotThreadLocal, 4025349cc55cSDimitry Andric static_cast<unsigned>(AddressSpace::Shared)); 4026349cc55cSDimitry Andric 4027349cc55cSDimitry Andric // Emit a store instruction to update the value. 4028*0fca6ea1SDimitry Andric new StoreInst(&I, SharedMem, 4029*0fca6ea1SDimitry Andric RegionEndBB->getTerminator()->getIterator()); 4030349cc55cSDimitry Andric 4031*0fca6ea1SDimitry Andric LoadInst *LoadI = new LoadInst( 4032*0fca6ea1SDimitry Andric I.getType(), SharedMem, I.getName() + ".guarded.output.load", 4033*0fca6ea1SDimitry Andric RegionBarrierBB->getTerminator()->getIterator()); 4034349cc55cSDimitry Andric 4035349cc55cSDimitry Andric // Emit a load instruction and replace uses of the output value. 40365f757f3fSDimitry Andric for (Use *U : OutsideUses) 40375f757f3fSDimitry Andric A.changeUseAfterManifest(*U, *LoadI); 4038349cc55cSDimitry Andric } 4039349cc55cSDimitry Andric 4040349cc55cSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 4041349cc55cSDimitry Andric 4042349cc55cSDimitry Andric // Go to tid check BB in ParentBB. 4043349cc55cSDimitry Andric const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); 4044349cc55cSDimitry Andric ParentBB->getTerminator()->eraseFromParent(); 4045349cc55cSDimitry Andric OpenMPIRBuilder::LocationDescription Loc( 4046349cc55cSDimitry Andric InsertPointTy(ParentBB, ParentBB->end()), DL); 4047349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.updateToLocation(Loc); 404804eeddc0SDimitry Andric uint32_t SrcLocStrSize; 404904eeddc0SDimitry Andric auto *SrcLocStr = 405004eeddc0SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc, SrcLocStrSize); 405104eeddc0SDimitry Andric Value *Ident = 405204eeddc0SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize); 4053349cc55cSDimitry Andric BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL); 4054349cc55cSDimitry Andric 4055349cc55cSDimitry Andric // Add check for Tid in RegionCheckTidBB 4056349cc55cSDimitry Andric RegionCheckTidBB->getTerminator()->eraseFromParent(); 4057349cc55cSDimitry Andric OpenMPIRBuilder::LocationDescription LocRegionCheckTid( 4058349cc55cSDimitry Andric InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL); 4059349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid); 4060349cc55cSDimitry Andric FunctionCallee HardwareTidFn = 4061349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( 4062349cc55cSDimitry Andric M, OMPRTL___kmpc_get_hardware_thread_id_in_block); 406304eeddc0SDimitry Andric CallInst *Tid = 4064349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {}); 406504eeddc0SDimitry Andric Tid->setDebugLoc(DL); 406604eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(HardwareTidFn, Tid); 4067349cc55cSDimitry Andric Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid); 4068349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.Builder 4069349cc55cSDimitry Andric .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB) 4070349cc55cSDimitry Andric ->setDebugLoc(DL); 4071349cc55cSDimitry Andric 4072349cc55cSDimitry Andric // First barrier for synchronization, ensures main thread has updated 4073349cc55cSDimitry Andric // values. 4074349cc55cSDimitry Andric FunctionCallee BarrierFn = 4075349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( 4076349cc55cSDimitry Andric M, OMPRTL___kmpc_barrier_simple_spmd); 4077349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy( 4078349cc55cSDimitry Andric RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt())); 407904eeddc0SDimitry Andric CallInst *Barrier = 408004eeddc0SDimitry Andric OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid}); 408104eeddc0SDimitry Andric Barrier->setDebugLoc(DL); 408204eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(BarrierFn, Barrier); 4083349cc55cSDimitry Andric 4084349cc55cSDimitry Andric // Second barrier ensures workers have read broadcast values. 408504eeddc0SDimitry Andric if (HasBroadcastValues) { 4086*0fca6ea1SDimitry Andric CallInst *Barrier = 4087*0fca6ea1SDimitry Andric CallInst::Create(BarrierFn, {Ident, Tid}, "", 4088*0fca6ea1SDimitry Andric RegionBarrierBB->getTerminator()->getIterator()); 408904eeddc0SDimitry Andric Barrier->setDebugLoc(DL); 409004eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(BarrierFn, Barrier); 409104eeddc0SDimitry Andric } 4092349cc55cSDimitry Andric }; 4093349cc55cSDimitry Andric 4094349cc55cSDimitry Andric auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; 4095349cc55cSDimitry Andric SmallPtrSet<BasicBlock *, 8> Visited; 4096349cc55cSDimitry Andric for (Instruction *GuardedI : SPMDCompatibilityTracker) { 4097349cc55cSDimitry Andric BasicBlock *BB = GuardedI->getParent(); 4098349cc55cSDimitry Andric if (!Visited.insert(BB).second) 4099349cc55cSDimitry Andric continue; 4100349cc55cSDimitry Andric 4101349cc55cSDimitry Andric SmallVector<std::pair<Instruction *, Instruction *>> Reorders; 4102349cc55cSDimitry Andric Instruction *LastEffect = nullptr; 4103349cc55cSDimitry Andric BasicBlock::reverse_iterator IP = BB->rbegin(), IPEnd = BB->rend(); 4104349cc55cSDimitry Andric while (++IP != IPEnd) { 4105349cc55cSDimitry Andric if (!IP->mayHaveSideEffects() && !IP->mayReadFromMemory()) 4106349cc55cSDimitry Andric continue; 4107349cc55cSDimitry Andric Instruction *I = &*IP; 4108349cc55cSDimitry Andric if (OpenMPOpt::getCallIfRegularCall(*I, &AllocSharedRFI)) 4109349cc55cSDimitry Andric continue; 4110349cc55cSDimitry Andric if (!I->user_empty() || !SPMDCompatibilityTracker.contains(I)) { 4111349cc55cSDimitry Andric LastEffect = nullptr; 4112349cc55cSDimitry Andric continue; 4113349cc55cSDimitry Andric } 4114349cc55cSDimitry Andric if (LastEffect) 4115349cc55cSDimitry Andric Reorders.push_back({I, LastEffect}); 4116349cc55cSDimitry Andric LastEffect = &*IP; 4117349cc55cSDimitry Andric } 4118349cc55cSDimitry Andric for (auto &Reorder : Reorders) 4119349cc55cSDimitry Andric Reorder.first->moveBefore(Reorder.second); 4120349cc55cSDimitry Andric } 4121349cc55cSDimitry Andric 4122349cc55cSDimitry Andric SmallVector<std::pair<Instruction *, Instruction *>, 4> GuardedRegions; 4123349cc55cSDimitry Andric 4124349cc55cSDimitry Andric for (Instruction *GuardedI : SPMDCompatibilityTracker) { 4125349cc55cSDimitry Andric BasicBlock *BB = GuardedI->getParent(); 4126349cc55cSDimitry Andric auto *CalleeAA = A.lookupAAFor<AAKernelInfo>( 4127349cc55cSDimitry Andric IRPosition::function(*GuardedI->getFunction()), nullptr, 4128349cc55cSDimitry Andric DepClassTy::NONE); 4129349cc55cSDimitry Andric assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo"); 4130349cc55cSDimitry Andric auto &CalleeAAFunction = *cast<AAKernelInfoFunction>(CalleeAA); 4131349cc55cSDimitry Andric // Continue if instruction is already guarded. 4132349cc55cSDimitry Andric if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI)) 4133349cc55cSDimitry Andric continue; 4134349cc55cSDimitry Andric 4135349cc55cSDimitry Andric Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr; 4136349cc55cSDimitry Andric for (Instruction &I : *BB) { 4137349cc55cSDimitry Andric // If instruction I needs to be guarded update the guarded region 4138349cc55cSDimitry Andric // bounds. 4139349cc55cSDimitry Andric if (SPMDCompatibilityTracker.contains(&I)) { 4140349cc55cSDimitry Andric CalleeAAFunction.getGuardedInstructions().insert(&I); 4141349cc55cSDimitry Andric if (GuardedRegionStart) 4142349cc55cSDimitry Andric GuardedRegionEnd = &I; 4143349cc55cSDimitry Andric else 4144349cc55cSDimitry Andric GuardedRegionStart = GuardedRegionEnd = &I; 4145349cc55cSDimitry Andric 4146349cc55cSDimitry Andric continue; 4147349cc55cSDimitry Andric } 4148349cc55cSDimitry Andric 4149349cc55cSDimitry Andric // Instruction I does not need guarding, store 4150349cc55cSDimitry Andric // any region found and reset bounds. 4151349cc55cSDimitry Andric if (GuardedRegionStart) { 4152349cc55cSDimitry Andric GuardedRegions.push_back( 4153349cc55cSDimitry Andric std::make_pair(GuardedRegionStart, GuardedRegionEnd)); 4154349cc55cSDimitry Andric GuardedRegionStart = nullptr; 4155349cc55cSDimitry Andric GuardedRegionEnd = nullptr; 4156349cc55cSDimitry Andric } 4157349cc55cSDimitry Andric } 4158349cc55cSDimitry Andric } 4159349cc55cSDimitry Andric 4160349cc55cSDimitry Andric for (auto &GR : GuardedRegions) 4161349cc55cSDimitry Andric CreateGuardedRegion(GR.first, GR.second); 4162bdd1243dSDimitry Andric } 4163bdd1243dSDimitry Andric 4164bdd1243dSDimitry Andric void forceSingleThreadPerWorkgroupHelper(Attributor &A) { 4165bdd1243dSDimitry Andric // Only allow 1 thread per workgroup to continue executing the user code. 4166bdd1243dSDimitry Andric // 4167bdd1243dSDimitry Andric // InitCB = __kmpc_target_init(...) 4168bdd1243dSDimitry Andric // ThreadIdInBlock = __kmpc_get_hardware_thread_id_in_block(); 4169bdd1243dSDimitry Andric // if (ThreadIdInBlock != 0) return; 4170bdd1243dSDimitry Andric // UserCode: 4171bdd1243dSDimitry Andric // // user code 4172bdd1243dSDimitry Andric // 4173bdd1243dSDimitry Andric auto &Ctx = getAnchorValue().getContext(); 4174bdd1243dSDimitry Andric Function *Kernel = getAssociatedFunction(); 4175bdd1243dSDimitry Andric assert(Kernel && "Expected an associated function!"); 4176bdd1243dSDimitry Andric 4177bdd1243dSDimitry Andric // Create block for user code to branch to from initial block. 4178bdd1243dSDimitry Andric BasicBlock *InitBB = KernelInitCB->getParent(); 4179bdd1243dSDimitry Andric BasicBlock *UserCodeBB = InitBB->splitBasicBlock( 4180bdd1243dSDimitry Andric KernelInitCB->getNextNode(), "main.thread.user_code"); 4181bdd1243dSDimitry Andric BasicBlock *ReturnBB = 4182bdd1243dSDimitry Andric BasicBlock::Create(Ctx, "exit.threads", Kernel, UserCodeBB); 4183bdd1243dSDimitry Andric 4184bdd1243dSDimitry Andric // Register blocks with attributor: 4185bdd1243dSDimitry Andric A.registerManifestAddedBasicBlock(*InitBB); 4186bdd1243dSDimitry Andric A.registerManifestAddedBasicBlock(*UserCodeBB); 4187bdd1243dSDimitry Andric A.registerManifestAddedBasicBlock(*ReturnBB); 4188bdd1243dSDimitry Andric 4189bdd1243dSDimitry Andric // Debug location: 4190bdd1243dSDimitry Andric const DebugLoc &DLoc = KernelInitCB->getDebugLoc(); 4191bdd1243dSDimitry Andric ReturnInst::Create(Ctx, ReturnBB)->setDebugLoc(DLoc); 4192bdd1243dSDimitry Andric InitBB->getTerminator()->eraseFromParent(); 4193bdd1243dSDimitry Andric 4194bdd1243dSDimitry Andric // Prepare call to OMPRTL___kmpc_get_hardware_thread_id_in_block. 4195bdd1243dSDimitry Andric Module &M = *Kernel->getParent(); 4196bdd1243dSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 4197bdd1243dSDimitry Andric FunctionCallee ThreadIdInBlockFn = 4198bdd1243dSDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( 4199bdd1243dSDimitry Andric M, OMPRTL___kmpc_get_hardware_thread_id_in_block); 4200bdd1243dSDimitry Andric 4201bdd1243dSDimitry Andric // Get thread ID in block. 4202bdd1243dSDimitry Andric CallInst *ThreadIdInBlock = 4203bdd1243dSDimitry Andric CallInst::Create(ThreadIdInBlockFn, "thread_id.in.block", InitBB); 4204bdd1243dSDimitry Andric OMPInfoCache.setCallingConvention(ThreadIdInBlockFn, ThreadIdInBlock); 4205bdd1243dSDimitry Andric ThreadIdInBlock->setDebugLoc(DLoc); 4206bdd1243dSDimitry Andric 4207bdd1243dSDimitry Andric // Eliminate all threads in the block with ID not equal to 0: 4208bdd1243dSDimitry Andric Instruction *IsMainThread = 4209bdd1243dSDimitry Andric ICmpInst::Create(ICmpInst::ICmp, CmpInst::ICMP_NE, ThreadIdInBlock, 4210bdd1243dSDimitry Andric ConstantInt::get(ThreadIdInBlock->getType(), 0), 4211bdd1243dSDimitry Andric "thread.is_main", InitBB); 4212bdd1243dSDimitry Andric IsMainThread->setDebugLoc(DLoc); 4213bdd1243dSDimitry Andric BranchInst::Create(ReturnBB, UserCodeBB, IsMainThread, InitBB); 4214bdd1243dSDimitry Andric } 4215bdd1243dSDimitry Andric 4216bdd1243dSDimitry Andric bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) { 4217bdd1243dSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 4218bdd1243dSDimitry Andric 42191ac55f4cSDimitry Andric // We cannot change to SPMD mode if the runtime functions aren't availible. 42201ac55f4cSDimitry Andric if (!OMPInfoCache.runtimeFnsAvailable( 42211ac55f4cSDimitry Andric {OMPRTL___kmpc_get_hardware_thread_id_in_block, 42221ac55f4cSDimitry Andric OMPRTL___kmpc_barrier_simple_spmd})) 42231ac55f4cSDimitry Andric return false; 42241ac55f4cSDimitry Andric 4225bdd1243dSDimitry Andric if (!SPMDCompatibilityTracker.isAssumed()) { 4226bdd1243dSDimitry Andric for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) { 4227bdd1243dSDimitry Andric if (!NonCompatibleI) 4228bdd1243dSDimitry Andric continue; 4229bdd1243dSDimitry Andric 4230bdd1243dSDimitry Andric // Skip diagnostics on calls to known OpenMP runtime functions for now. 4231bdd1243dSDimitry Andric if (auto *CB = dyn_cast<CallBase>(NonCompatibleI)) 4232bdd1243dSDimitry Andric if (OMPInfoCache.RTLFunctions.contains(CB->getCalledFunction())) 4233bdd1243dSDimitry Andric continue; 4234bdd1243dSDimitry Andric 4235bdd1243dSDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) { 4236bdd1243dSDimitry Andric ORA << "Value has potential side effects preventing SPMD-mode " 4237bdd1243dSDimitry Andric "execution"; 4238bdd1243dSDimitry Andric if (isa<CallBase>(NonCompatibleI)) { 4239*0fca6ea1SDimitry Andric ORA << ". Add `[[omp::assume(\"ompx_spmd_amenable\")]]` to " 4240bdd1243dSDimitry Andric "the called function to override"; 4241bdd1243dSDimitry Andric } 4242bdd1243dSDimitry Andric return ORA << "."; 4243bdd1243dSDimitry Andric }; 4244bdd1243dSDimitry Andric A.emitRemark<OptimizationRemarkAnalysis>(NonCompatibleI, "OMP121", 4245bdd1243dSDimitry Andric Remark); 4246bdd1243dSDimitry Andric 4247bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "SPMD-incompatible side-effect: " 4248bdd1243dSDimitry Andric << *NonCompatibleI << "\n"); 4249bdd1243dSDimitry Andric } 4250bdd1243dSDimitry Andric 4251bdd1243dSDimitry Andric return false; 4252bdd1243dSDimitry Andric } 4253bdd1243dSDimitry Andric 4254bdd1243dSDimitry Andric // Get the actual kernel, could be the caller of the anchor scope if we have 4255bdd1243dSDimitry Andric // a debug wrapper. 4256bdd1243dSDimitry Andric Function *Kernel = getAnchorScope(); 4257bdd1243dSDimitry Andric if (Kernel->hasLocalLinkage()) { 4258bdd1243dSDimitry Andric assert(Kernel->hasOneUse() && "Unexpected use of debug kernel wrapper."); 4259bdd1243dSDimitry Andric auto *CB = cast<CallBase>(Kernel->user_back()); 4260bdd1243dSDimitry Andric Kernel = CB->getCaller(); 4261bdd1243dSDimitry Andric } 42625f757f3fSDimitry Andric assert(omp::isOpenMPKernel(*Kernel) && "Expected kernel function!"); 4263bdd1243dSDimitry Andric 4264bdd1243dSDimitry Andric // Check if the kernel is already in SPMD mode, if so, return success. 42655f757f3fSDimitry Andric ConstantStruct *ExistingKernelEnvC = 42665f757f3fSDimitry Andric KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB); 42675f757f3fSDimitry Andric auto *ExecModeC = 42685f757f3fSDimitry Andric KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC); 42695f757f3fSDimitry Andric const int8_t ExecModeVal = ExecModeC->getSExtValue(); 4270bdd1243dSDimitry Andric if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC) 4271bdd1243dSDimitry Andric return true; 4272bdd1243dSDimitry Andric 4273bdd1243dSDimitry Andric // We will now unconditionally modify the IR, indicate a change. 4274bdd1243dSDimitry Andric Changed = ChangeStatus::CHANGED; 4275bdd1243dSDimitry Andric 4276bdd1243dSDimitry Andric // Do not use instruction guards when no parallel is present inside 4277bdd1243dSDimitry Andric // the target region. 4278bdd1243dSDimitry Andric if (mayContainParallelRegion()) 4279bdd1243dSDimitry Andric insertInstructionGuardsHelper(A); 4280bdd1243dSDimitry Andric else 4281bdd1243dSDimitry Andric forceSingleThreadPerWorkgroupHelper(A); 4282349cc55cSDimitry Andric 4283349cc55cSDimitry Andric // Adjust the global exec mode flag that tells the runtime what mode this 4284349cc55cSDimitry Andric // kernel is executed in. 4285349cc55cSDimitry Andric assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && 4286349cc55cSDimitry Andric "Initially non-SPMD kernel has SPMD exec mode!"); 4287cb14a3feSDimitry Andric setExecModeOfKernelEnvironment( 4288cb14a3feSDimitry Andric ConstantInt::get(ExecModeC->getIntegerType(), 4289cb14a3feSDimitry Andric ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD)); 4290fe6060f1SDimitry Andric 4291fe6060f1SDimitry Andric ++NumOpenMPTargetRegionKernelsSPMD; 4292fe6060f1SDimitry Andric 4293fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemark OR) { 4294fe6060f1SDimitry Andric return OR << "Transformed generic-mode kernel to SPMD-mode."; 4295fe6060f1SDimitry Andric }; 4296fe6060f1SDimitry Andric A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP120", Remark); 4297fe6060f1SDimitry Andric return true; 4298fe6060f1SDimitry Andric }; 4299fe6060f1SDimitry Andric 43005f757f3fSDimitry Andric bool buildCustomStateMachine(Attributor &A, ChangeStatus &Changed) { 4301349cc55cSDimitry Andric // If we have disabled state machine rewrites, don't make a custom one 4302349cc55cSDimitry Andric if (DisableOpenMPOptStateMachineRewrite) 43035f757f3fSDimitry Andric return false; 4304fe6060f1SDimitry Andric 4305349cc55cSDimitry Andric // Don't rewrite the state machine if we are not in a valid state. 4306349cc55cSDimitry Andric if (!ReachedKnownParallelRegions.isValidState()) 43075f757f3fSDimitry Andric return false; 4308349cc55cSDimitry Andric 43091ac55f4cSDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 43101ac55f4cSDimitry Andric if (!OMPInfoCache.runtimeFnsAvailable( 43111ac55f4cSDimitry Andric {OMPRTL___kmpc_get_hardware_num_threads_in_block, 43121ac55f4cSDimitry Andric OMPRTL___kmpc_get_warp_size, OMPRTL___kmpc_barrier_simple_generic, 43131ac55f4cSDimitry Andric OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel})) 43145f757f3fSDimitry Andric return false; 43151ac55f4cSDimitry Andric 43165f757f3fSDimitry Andric ConstantStruct *ExistingKernelEnvC = 43175f757f3fSDimitry Andric KernelInfo::getKernelEnvironementFromKernelInitCB(KernelInitCB); 4318fe6060f1SDimitry Andric 4319fe6060f1SDimitry Andric // Check if the current configuration is non-SPMD and generic state machine. 4320fe6060f1SDimitry Andric // If we already have SPMD mode or a custom state machine we do not need to 4321fe6060f1SDimitry Andric // go any further. If it is anything but a constant something is weird and 4322fe6060f1SDimitry Andric // we give up. 43235f757f3fSDimitry Andric ConstantInt *UseStateMachineC = 43245f757f3fSDimitry Andric KernelInfo::getUseGenericStateMachineFromKernelEnvironment( 43255f757f3fSDimitry Andric ExistingKernelEnvC); 43265f757f3fSDimitry Andric ConstantInt *ModeC = 43275f757f3fSDimitry Andric KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC); 4328fe6060f1SDimitry Andric 4329fe6060f1SDimitry Andric // If we are stuck with generic mode, try to create a custom device (=GPU) 4330fe6060f1SDimitry Andric // state machine which is specialized for the parallel regions that are 4331fe6060f1SDimitry Andric // reachable by the kernel. 43325f757f3fSDimitry Andric if (UseStateMachineC->isZero() || 43335f757f3fSDimitry Andric (ModeC->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) 43345f757f3fSDimitry Andric return false; 43355f757f3fSDimitry Andric 43365f757f3fSDimitry Andric Changed = ChangeStatus::CHANGED; 4337fe6060f1SDimitry Andric 4338fe6060f1SDimitry Andric // If not SPMD mode, indicate we use a custom state machine now. 43395f757f3fSDimitry Andric setUseGenericStateMachineOfKernelEnvironment( 4340cb14a3feSDimitry Andric ConstantInt::get(UseStateMachineC->getIntegerType(), false)); 4341fe6060f1SDimitry Andric 4342fe6060f1SDimitry Andric // If we don't actually need a state machine we are done here. This can 4343fe6060f1SDimitry Andric // happen if there simply are no parallel regions. In the resulting kernel 4344fe6060f1SDimitry Andric // all worker threads will simply exit right away, leaving the main thread 4345fe6060f1SDimitry Andric // to do the work alone. 4346349cc55cSDimitry Andric if (!mayContainParallelRegion()) { 4347fe6060f1SDimitry Andric ++NumOpenMPTargetRegionKernelsWithoutStateMachine; 4348fe6060f1SDimitry Andric 4349fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemark OR) { 4350fe6060f1SDimitry Andric return OR << "Removing unused state machine from generic-mode kernel."; 4351fe6060f1SDimitry Andric }; 4352fe6060f1SDimitry Andric A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP130", Remark); 4353fe6060f1SDimitry Andric 43545f757f3fSDimitry Andric return true; 4355fe6060f1SDimitry Andric } 4356fe6060f1SDimitry Andric 4357fe6060f1SDimitry Andric // Keep track in the statistics of our new shiny custom state machine. 4358fe6060f1SDimitry Andric if (ReachedUnknownParallelRegions.empty()) { 4359fe6060f1SDimitry Andric ++NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback; 4360fe6060f1SDimitry Andric 4361fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemark OR) { 4362fe6060f1SDimitry Andric return OR << "Rewriting generic-mode kernel with a customized state " 4363fe6060f1SDimitry Andric "machine."; 4364fe6060f1SDimitry Andric }; 4365fe6060f1SDimitry Andric A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP131", Remark); 4366fe6060f1SDimitry Andric } else { 4367fe6060f1SDimitry Andric ++NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback; 4368fe6060f1SDimitry Andric 4369fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis OR) { 4370fe6060f1SDimitry Andric return OR << "Generic-mode kernel is executed with a customized state " 4371fe6060f1SDimitry Andric "machine that requires a fallback."; 4372fe6060f1SDimitry Andric }; 4373fe6060f1SDimitry Andric A.emitRemark<OptimizationRemarkAnalysis>(KernelInitCB, "OMP132", Remark); 4374fe6060f1SDimitry Andric 4375fe6060f1SDimitry Andric // Tell the user why we ended up with a fallback. 4376fe6060f1SDimitry Andric for (CallBase *UnknownParallelRegionCB : ReachedUnknownParallelRegions) { 4377fe6060f1SDimitry Andric if (!UnknownParallelRegionCB) 4378fe6060f1SDimitry Andric continue; 4379fe6060f1SDimitry Andric auto Remark = [&](OptimizationRemarkAnalysis ORA) { 4380fe6060f1SDimitry Andric return ORA << "Call may contain unknown parallel regions. Use " 4381*0fca6ea1SDimitry Andric << "`[[omp::assume(\"omp_no_parallelism\")]]` to " 4382fe6060f1SDimitry Andric "override."; 4383fe6060f1SDimitry Andric }; 4384fe6060f1SDimitry Andric A.emitRemark<OptimizationRemarkAnalysis>(UnknownParallelRegionCB, 4385fe6060f1SDimitry Andric "OMP133", Remark); 4386fe6060f1SDimitry Andric } 4387fe6060f1SDimitry Andric } 4388fe6060f1SDimitry Andric 4389fe6060f1SDimitry Andric // Create all the blocks: 4390fe6060f1SDimitry Andric // 4391fe6060f1SDimitry Andric // InitCB = __kmpc_target_init(...) 4392349cc55cSDimitry Andric // BlockHwSize = 4393349cc55cSDimitry Andric // __kmpc_get_hardware_num_threads_in_block(); 4394349cc55cSDimitry Andric // WarpSize = __kmpc_get_warp_size(); 4395349cc55cSDimitry Andric // BlockSize = BlockHwSize - WarpSize; 4396fb03ea46SDimitry Andric // IsWorkerCheckBB: bool IsWorker = InitCB != -1; 4397fe6060f1SDimitry Andric // if (IsWorker) { 4398fb03ea46SDimitry Andric // if (InitCB >= BlockSize) return; 4399349cc55cSDimitry Andric // SMBeginBB: __kmpc_barrier_simple_generic(...); 4400fe6060f1SDimitry Andric // void *WorkFn; 4401fe6060f1SDimitry Andric // bool Active = __kmpc_kernel_parallel(&WorkFn); 4402fe6060f1SDimitry Andric // if (!WorkFn) return; 4403fe6060f1SDimitry Andric // SMIsActiveCheckBB: if (Active) { 4404fe6060f1SDimitry Andric // SMIfCascadeCurrentBB: if (WorkFn == <ParFn0>) 4405fe6060f1SDimitry Andric // ParFn0(...); 4406fe6060f1SDimitry Andric // SMIfCascadeCurrentBB: else if (WorkFn == <ParFn1>) 4407fe6060f1SDimitry Andric // ParFn1(...); 4408fe6060f1SDimitry Andric // ... 4409fe6060f1SDimitry Andric // SMIfCascadeCurrentBB: else 4410fe6060f1SDimitry Andric // ((WorkFnTy*)WorkFn)(...); 4411fe6060f1SDimitry Andric // SMEndParallelBB: __kmpc_kernel_end_parallel(...); 4412fe6060f1SDimitry Andric // } 4413349cc55cSDimitry Andric // SMDoneBB: __kmpc_barrier_simple_generic(...); 4414fe6060f1SDimitry Andric // goto SMBeginBB; 4415fe6060f1SDimitry Andric // } 4416fe6060f1SDimitry Andric // UserCodeEntryBB: // user code 4417fe6060f1SDimitry Andric // __kmpc_target_deinit(...) 4418fe6060f1SDimitry Andric // 44195f757f3fSDimitry Andric auto &Ctx = getAnchorValue().getContext(); 4420fe6060f1SDimitry Andric Function *Kernel = getAssociatedFunction(); 4421fe6060f1SDimitry Andric assert(Kernel && "Expected an associated function!"); 4422fe6060f1SDimitry Andric 4423fe6060f1SDimitry Andric BasicBlock *InitBB = KernelInitCB->getParent(); 4424fe6060f1SDimitry Andric BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock( 4425fe6060f1SDimitry Andric KernelInitCB->getNextNode(), "thread.user_code.check"); 4426349cc55cSDimitry Andric BasicBlock *IsWorkerCheckBB = 4427349cc55cSDimitry Andric BasicBlock::Create(Ctx, "is_worker_check", Kernel, UserCodeEntryBB); 4428fe6060f1SDimitry Andric BasicBlock *StateMachineBeginBB = BasicBlock::Create( 4429fe6060f1SDimitry Andric Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB); 4430fe6060f1SDimitry Andric BasicBlock *StateMachineFinishedBB = BasicBlock::Create( 4431fe6060f1SDimitry Andric Ctx, "worker_state_machine.finished", Kernel, UserCodeEntryBB); 4432fe6060f1SDimitry Andric BasicBlock *StateMachineIsActiveCheckBB = BasicBlock::Create( 4433fe6060f1SDimitry Andric Ctx, "worker_state_machine.is_active.check", Kernel, UserCodeEntryBB); 4434fe6060f1SDimitry Andric BasicBlock *StateMachineIfCascadeCurrentBB = 4435fe6060f1SDimitry Andric BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check", 4436fe6060f1SDimitry Andric Kernel, UserCodeEntryBB); 4437fe6060f1SDimitry Andric BasicBlock *StateMachineEndParallelBB = 4438fe6060f1SDimitry Andric BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.end", 4439fe6060f1SDimitry Andric Kernel, UserCodeEntryBB); 4440fe6060f1SDimitry Andric BasicBlock *StateMachineDoneBarrierBB = BasicBlock::Create( 4441fe6060f1SDimitry Andric Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB); 4442fe6060f1SDimitry Andric A.registerManifestAddedBasicBlock(*InitBB); 4443fe6060f1SDimitry Andric A.registerManifestAddedBasicBlock(*UserCodeEntryBB); 4444349cc55cSDimitry Andric A.registerManifestAddedBasicBlock(*IsWorkerCheckBB); 4445fe6060f1SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineBeginBB); 4446fe6060f1SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineFinishedBB); 4447fe6060f1SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB); 4448fe6060f1SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineIfCascadeCurrentBB); 4449fe6060f1SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineEndParallelBB); 4450fe6060f1SDimitry Andric A.registerManifestAddedBasicBlock(*StateMachineDoneBarrierBB); 4451fe6060f1SDimitry Andric 4452fe6060f1SDimitry Andric const DebugLoc &DLoc = KernelInitCB->getDebugLoc(); 4453fe6060f1SDimitry Andric ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc); 4454fe6060f1SDimitry Andric InitBB->getTerminator()->eraseFromParent(); 4455349cc55cSDimitry Andric 4456fb03ea46SDimitry Andric Instruction *IsWorker = 4457fb03ea46SDimitry Andric ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB, 4458fb03ea46SDimitry Andric ConstantInt::get(KernelInitCB->getType(), -1), 4459fb03ea46SDimitry Andric "thread.is_worker", InitBB); 4460fb03ea46SDimitry Andric IsWorker->setDebugLoc(DLoc); 4461fb03ea46SDimitry Andric BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB); 4462fb03ea46SDimitry Andric 4463349cc55cSDimitry Andric Module &M = *Kernel->getParent(); 4464349cc55cSDimitry Andric FunctionCallee BlockHwSizeFn = 4465349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( 4466349cc55cSDimitry Andric M, OMPRTL___kmpc_get_hardware_num_threads_in_block); 4467349cc55cSDimitry Andric FunctionCallee WarpSizeFn = 4468349cc55cSDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( 4469349cc55cSDimitry Andric M, OMPRTL___kmpc_get_warp_size); 447004eeddc0SDimitry Andric CallInst *BlockHwSize = 4471fb03ea46SDimitry Andric CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB); 447204eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize); 4473349cc55cSDimitry Andric BlockHwSize->setDebugLoc(DLoc); 4474fb03ea46SDimitry Andric CallInst *WarpSize = 4475fb03ea46SDimitry Andric CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB); 447604eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize); 4477349cc55cSDimitry Andric WarpSize->setDebugLoc(DLoc); 4478fb03ea46SDimitry Andric Instruction *BlockSize = BinaryOperator::CreateSub( 4479fb03ea46SDimitry Andric BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB); 4480349cc55cSDimitry Andric BlockSize->setDebugLoc(DLoc); 4481fb03ea46SDimitry Andric Instruction *IsMainOrWorker = ICmpInst::Create( 4482fb03ea46SDimitry Andric ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize, 4483fb03ea46SDimitry Andric "thread.is_main_or_worker", IsWorkerCheckBB); 4484349cc55cSDimitry Andric IsMainOrWorker->setDebugLoc(DLoc); 4485fb03ea46SDimitry Andric BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB, 4486fb03ea46SDimitry Andric IsMainOrWorker, IsWorkerCheckBB); 44878c6f6c0cSDimitry Andric 4488fe6060f1SDimitry Andric // Create local storage for the work function pointer. 44898c6f6c0cSDimitry Andric const DataLayout &DL = M.getDataLayout(); 44905f757f3fSDimitry Andric Type *VoidPtrTy = PointerType::getUnqual(Ctx); 44918c6f6c0cSDimitry Andric Instruction *WorkFnAI = 44928c6f6c0cSDimitry Andric new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr, 4493*0fca6ea1SDimitry Andric "worker.work_fn.addr", Kernel->getEntryBlock().begin()); 4494fe6060f1SDimitry Andric WorkFnAI->setDebugLoc(DLoc); 4495fe6060f1SDimitry Andric 4496fe6060f1SDimitry Andric OMPInfoCache.OMPBuilder.updateToLocation( 4497fe6060f1SDimitry Andric OpenMPIRBuilder::LocationDescription( 4498fe6060f1SDimitry Andric IRBuilder<>::InsertPoint(StateMachineBeginBB, 4499fe6060f1SDimitry Andric StateMachineBeginBB->end()), 4500fe6060f1SDimitry Andric DLoc)); 4501fe6060f1SDimitry Andric 45025f757f3fSDimitry Andric Value *Ident = KernelInfo::getIdentFromKernelEnvironment(KernelEnvC); 4503fe6060f1SDimitry Andric Value *GTid = KernelInitCB; 4504fe6060f1SDimitry Andric 4505fe6060f1SDimitry Andric FunctionCallee BarrierFn = 4506fe6060f1SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( 4507349cc55cSDimitry Andric M, OMPRTL___kmpc_barrier_simple_generic); 450804eeddc0SDimitry Andric CallInst *Barrier = 450904eeddc0SDimitry Andric CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB); 451004eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(BarrierFn, Barrier); 451104eeddc0SDimitry Andric Barrier->setDebugLoc(DLoc); 4512fe6060f1SDimitry Andric 45138c6f6c0cSDimitry Andric if (WorkFnAI->getType()->getPointerAddressSpace() != 45148c6f6c0cSDimitry Andric (unsigned int)AddressSpace::Generic) { 45158c6f6c0cSDimitry Andric WorkFnAI = new AddrSpaceCastInst( 451606c3fb27SDimitry Andric WorkFnAI, PointerType::get(Ctx, (unsigned int)AddressSpace::Generic), 45178c6f6c0cSDimitry Andric WorkFnAI->getName() + ".generic", StateMachineBeginBB); 45188c6f6c0cSDimitry Andric WorkFnAI->setDebugLoc(DLoc); 45198c6f6c0cSDimitry Andric } 45208c6f6c0cSDimitry Andric 4521fe6060f1SDimitry Andric FunctionCallee KernelParallelFn = 4522fe6060f1SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( 4523fe6060f1SDimitry Andric M, OMPRTL___kmpc_kernel_parallel); 452404eeddc0SDimitry Andric CallInst *IsActiveWorker = CallInst::Create( 4525fe6060f1SDimitry Andric KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB); 452604eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(KernelParallelFn, IsActiveWorker); 4527fe6060f1SDimitry Andric IsActiveWorker->setDebugLoc(DLoc); 4528fe6060f1SDimitry Andric Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn", 4529fe6060f1SDimitry Andric StateMachineBeginBB); 4530fe6060f1SDimitry Andric WorkFn->setDebugLoc(DLoc); 4531fe6060f1SDimitry Andric 4532fe6060f1SDimitry Andric FunctionType *ParallelRegionFnTy = FunctionType::get( 4533fe6060f1SDimitry Andric Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)}, 4534fe6060f1SDimitry Andric false); 4535fe6060f1SDimitry Andric 4536fe6060f1SDimitry Andric Instruction *IsDone = 4537fe6060f1SDimitry Andric ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn, 4538fe6060f1SDimitry Andric Constant::getNullValue(VoidPtrTy), "worker.is_done", 4539fe6060f1SDimitry Andric StateMachineBeginBB); 4540fe6060f1SDimitry Andric IsDone->setDebugLoc(DLoc); 4541fe6060f1SDimitry Andric BranchInst::Create(StateMachineFinishedBB, StateMachineIsActiveCheckBB, 4542fe6060f1SDimitry Andric IsDone, StateMachineBeginBB) 4543fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4544fe6060f1SDimitry Andric 4545fe6060f1SDimitry Andric BranchInst::Create(StateMachineIfCascadeCurrentBB, 4546fe6060f1SDimitry Andric StateMachineDoneBarrierBB, IsActiveWorker, 4547fe6060f1SDimitry Andric StateMachineIsActiveCheckBB) 4548fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4549fe6060f1SDimitry Andric 4550fe6060f1SDimitry Andric Value *ZeroArg = 4551fe6060f1SDimitry Andric Constant::getNullValue(ParallelRegionFnTy->getParamType(0)); 4552fe6060f1SDimitry Andric 45535f757f3fSDimitry Andric const unsigned int WrapperFunctionArgNo = 6; 45545f757f3fSDimitry Andric 4555fe6060f1SDimitry Andric // Now that we have most of the CFG skeleton it is time for the if-cascade 4556fe6060f1SDimitry Andric // that checks the function pointer we got from the runtime against the 4557fe6060f1SDimitry Andric // parallel regions we expect, if there are any. 4558349cc55cSDimitry Andric for (int I = 0, E = ReachedKnownParallelRegions.size(); I < E; ++I) { 45595f757f3fSDimitry Andric auto *CB = ReachedKnownParallelRegions[I]; 45605f757f3fSDimitry Andric auto *ParallelRegion = dyn_cast<Function>( 45615f757f3fSDimitry Andric CB->getArgOperand(WrapperFunctionArgNo)->stripPointerCasts()); 4562fe6060f1SDimitry Andric BasicBlock *PRExecuteBB = BasicBlock::Create( 4563fe6060f1SDimitry Andric Ctx, "worker_state_machine.parallel_region.execute", Kernel, 4564fe6060f1SDimitry Andric StateMachineEndParallelBB); 4565fe6060f1SDimitry Andric CallInst::Create(ParallelRegion, {ZeroArg, GTid}, "", PRExecuteBB) 4566fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4567fe6060f1SDimitry Andric BranchInst::Create(StateMachineEndParallelBB, PRExecuteBB) 4568fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4569fe6060f1SDimitry Andric 4570fe6060f1SDimitry Andric BasicBlock *PRNextBB = 4571fe6060f1SDimitry Andric BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check", 4572fe6060f1SDimitry Andric Kernel, StateMachineEndParallelBB); 45735f757f3fSDimitry Andric A.registerManifestAddedBasicBlock(*PRExecuteBB); 45745f757f3fSDimitry Andric A.registerManifestAddedBasicBlock(*PRNextBB); 4575fe6060f1SDimitry Andric 4576fe6060f1SDimitry Andric // Check if we need to compare the pointer at all or if we can just 4577fe6060f1SDimitry Andric // call the parallel region function. 4578fe6060f1SDimitry Andric Value *IsPR; 4579349cc55cSDimitry Andric if (I + 1 < E || !ReachedUnknownParallelRegions.empty()) { 4580fe6060f1SDimitry Andric Instruction *CmpI = ICmpInst::Create( 45815f757f3fSDimitry Andric ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn, ParallelRegion, 4582fe6060f1SDimitry Andric "worker.check_parallel_region", StateMachineIfCascadeCurrentBB); 4583fe6060f1SDimitry Andric CmpI->setDebugLoc(DLoc); 4584fe6060f1SDimitry Andric IsPR = CmpI; 4585fe6060f1SDimitry Andric } else { 4586fe6060f1SDimitry Andric IsPR = ConstantInt::getTrue(Ctx); 4587fe6060f1SDimitry Andric } 4588fe6060f1SDimitry Andric 4589fe6060f1SDimitry Andric BranchInst::Create(PRExecuteBB, PRNextBB, IsPR, 4590fe6060f1SDimitry Andric StateMachineIfCascadeCurrentBB) 4591fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4592fe6060f1SDimitry Andric StateMachineIfCascadeCurrentBB = PRNextBB; 4593fe6060f1SDimitry Andric } 4594fe6060f1SDimitry Andric 4595fe6060f1SDimitry Andric // At the end of the if-cascade we place the indirect function pointer call 4596fe6060f1SDimitry Andric // in case we might need it, that is if there can be parallel regions we 4597fe6060f1SDimitry Andric // have not handled in the if-cascade above. 4598fe6060f1SDimitry Andric if (!ReachedUnknownParallelRegions.empty()) { 4599fe6060f1SDimitry Andric StateMachineIfCascadeCurrentBB->setName( 4600fe6060f1SDimitry Andric "worker_state_machine.parallel_region.fallback.execute"); 46015f757f3fSDimitry Andric CallInst::Create(ParallelRegionFnTy, WorkFn, {ZeroArg, GTid}, "", 4602fe6060f1SDimitry Andric StateMachineIfCascadeCurrentBB) 4603fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4604fe6060f1SDimitry Andric } 4605fe6060f1SDimitry Andric BranchInst::Create(StateMachineEndParallelBB, 4606fe6060f1SDimitry Andric StateMachineIfCascadeCurrentBB) 4607fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4608fe6060f1SDimitry Andric 460904eeddc0SDimitry Andric FunctionCallee EndParallelFn = 461004eeddc0SDimitry Andric OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( 461104eeddc0SDimitry Andric M, OMPRTL___kmpc_kernel_end_parallel); 461204eeddc0SDimitry Andric CallInst *EndParallel = 461304eeddc0SDimitry Andric CallInst::Create(EndParallelFn, {}, "", StateMachineEndParallelBB); 461404eeddc0SDimitry Andric OMPInfoCache.setCallingConvention(EndParallelFn, EndParallel); 461504eeddc0SDimitry Andric EndParallel->setDebugLoc(DLoc); 4616fe6060f1SDimitry Andric BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB) 4617fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4618fe6060f1SDimitry Andric 4619fe6060f1SDimitry Andric CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineDoneBarrierBB) 4620fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4621fe6060f1SDimitry Andric BranchInst::Create(StateMachineBeginBB, StateMachineDoneBarrierBB) 4622fe6060f1SDimitry Andric ->setDebugLoc(DLoc); 4623fe6060f1SDimitry Andric 46245f757f3fSDimitry Andric return true; 4625fe6060f1SDimitry Andric } 4626fe6060f1SDimitry Andric 4627fe6060f1SDimitry Andric /// Fixpoint iteration update function. Will be called every time a dependence 4628fe6060f1SDimitry Andric /// changed its state (and in the beginning). 4629fe6060f1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 4630fe6060f1SDimitry Andric KernelInfoState StateBefore = getState(); 4631fe6060f1SDimitry Andric 46325f757f3fSDimitry Andric // When we leave this function this RAII will make sure the member 46335f757f3fSDimitry Andric // KernelEnvC is updated properly depending on the state. That member is 46345f757f3fSDimitry Andric // used for simplification of values and needs to be up to date at all 46355f757f3fSDimitry Andric // times. 46365f757f3fSDimitry Andric struct UpdateKernelEnvCRAII { 46375f757f3fSDimitry Andric AAKernelInfoFunction &AA; 46385f757f3fSDimitry Andric 46395f757f3fSDimitry Andric UpdateKernelEnvCRAII(AAKernelInfoFunction &AA) : AA(AA) {} 46405f757f3fSDimitry Andric 46415f757f3fSDimitry Andric ~UpdateKernelEnvCRAII() { 46425f757f3fSDimitry Andric if (!AA.KernelEnvC) 46435f757f3fSDimitry Andric return; 46445f757f3fSDimitry Andric 46455f757f3fSDimitry Andric ConstantStruct *ExistingKernelEnvC = 46465f757f3fSDimitry Andric KernelInfo::getKernelEnvironementFromKernelInitCB(AA.KernelInitCB); 46475f757f3fSDimitry Andric 46485f757f3fSDimitry Andric if (!AA.isValidState()) { 46495f757f3fSDimitry Andric AA.KernelEnvC = ExistingKernelEnvC; 46505f757f3fSDimitry Andric return; 46515f757f3fSDimitry Andric } 46525f757f3fSDimitry Andric 46535f757f3fSDimitry Andric if (!AA.ReachedKnownParallelRegions.isValidState()) 46545f757f3fSDimitry Andric AA.setUseGenericStateMachineOfKernelEnvironment( 46555f757f3fSDimitry Andric KernelInfo::getUseGenericStateMachineFromKernelEnvironment( 46565f757f3fSDimitry Andric ExistingKernelEnvC)); 46575f757f3fSDimitry Andric 46585f757f3fSDimitry Andric if (!AA.SPMDCompatibilityTracker.isValidState()) 46595f757f3fSDimitry Andric AA.setExecModeOfKernelEnvironment( 46605f757f3fSDimitry Andric KernelInfo::getExecModeFromKernelEnvironment(ExistingKernelEnvC)); 46615f757f3fSDimitry Andric 46625f757f3fSDimitry Andric ConstantInt *MayUseNestedParallelismC = 46635f757f3fSDimitry Andric KernelInfo::getMayUseNestedParallelismFromKernelEnvironment( 46645f757f3fSDimitry Andric AA.KernelEnvC); 46655f757f3fSDimitry Andric ConstantInt *NewMayUseNestedParallelismC = ConstantInt::get( 4666cb14a3feSDimitry Andric MayUseNestedParallelismC->getIntegerType(), AA.NestedParallelism); 46675f757f3fSDimitry Andric AA.setMayUseNestedParallelismOfKernelEnvironment( 46685f757f3fSDimitry Andric NewMayUseNestedParallelismC); 46695f757f3fSDimitry Andric } 46705f757f3fSDimitry Andric } RAII(*this); 46715f757f3fSDimitry Andric 4672fe6060f1SDimitry Andric // Callback to check a read/write instruction. 4673fe6060f1SDimitry Andric auto CheckRWInst = [&](Instruction &I) { 4674fe6060f1SDimitry Andric // We handle calls later. 4675fe6060f1SDimitry Andric if (isa<CallBase>(I)) 4676fe6060f1SDimitry Andric return true; 4677fe6060f1SDimitry Andric // We only care about write effects. 4678fe6060f1SDimitry Andric if (!I.mayWriteToMemory()) 4679fe6060f1SDimitry Andric return true; 4680fe6060f1SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(&I)) { 468106c3fb27SDimitry Andric const auto *UnderlyingObjsAA = A.getAAFor<AAUnderlyingObjects>( 4682bdd1243dSDimitry Andric *this, IRPosition::value(*SI->getPointerOperand()), 4683bdd1243dSDimitry Andric DepClassTy::OPTIONAL); 468406c3fb27SDimitry Andric auto *HS = A.getAAFor<AAHeapToStack>( 4685349cc55cSDimitry Andric *this, IRPosition::function(*I.getFunction()), 4686349cc55cSDimitry Andric DepClassTy::OPTIONAL); 468706c3fb27SDimitry Andric if (UnderlyingObjsAA && 468806c3fb27SDimitry Andric UnderlyingObjsAA->forallUnderlyingObjects([&](Value &Obj) { 4689bdd1243dSDimitry Andric if (AA::isAssumedThreadLocalObject(A, Obj, *this)) 4690349cc55cSDimitry Andric return true; 4691bdd1243dSDimitry Andric // Check for AAHeapToStack moved objects which must not be 4692bdd1243dSDimitry Andric // guarded. 4693bdd1243dSDimitry Andric auto *CB = dyn_cast<CallBase>(&Obj); 469406c3fb27SDimitry Andric return CB && HS && HS->isAssumedHeapToStack(*CB); 4695bdd1243dSDimitry Andric })) 4696bdd1243dSDimitry Andric return true; 4697349cc55cSDimitry Andric } 4698349cc55cSDimitry Andric 4699349cc55cSDimitry Andric // Insert instruction that needs guarding. 4700fe6060f1SDimitry Andric SPMDCompatibilityTracker.insert(&I); 4701fe6060f1SDimitry Andric return true; 4702fe6060f1SDimitry Andric }; 4703fe6060f1SDimitry Andric 4704fe6060f1SDimitry Andric bool UsedAssumedInformationInCheckRWInst = false; 4705fe6060f1SDimitry Andric if (!SPMDCompatibilityTracker.isAtFixpoint()) 4706fe6060f1SDimitry Andric if (!A.checkForAllReadWriteInstructions( 4707fe6060f1SDimitry Andric CheckRWInst, *this, UsedAssumedInformationInCheckRWInst)) 4708fe6060f1SDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 4709fe6060f1SDimitry Andric 47104824e7fdSDimitry Andric bool UsedAssumedInformationFromReachingKernels = false; 4711fe6060f1SDimitry Andric if (!IsKernelEntry) { 4712fe6060f1SDimitry Andric updateParallelLevels(A); 4713349cc55cSDimitry Andric 47144824e7fdSDimitry Andric bool AllReachingKernelsKnown = true; 47154824e7fdSDimitry Andric updateReachingKernelEntries(A, AllReachingKernelsKnown); 47164824e7fdSDimitry Andric UsedAssumedInformationFromReachingKernels = !AllReachingKernelsKnown; 47174824e7fdSDimitry Andric 4718bdd1243dSDimitry Andric if (!SPMDCompatibilityTracker.empty()) { 4719349cc55cSDimitry Andric if (!ParallelLevels.isValidState()) 4720349cc55cSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 47214824e7fdSDimitry Andric else if (!ReachingKernelEntries.isValidState()) 47224824e7fdSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 4723bdd1243dSDimitry Andric else { 47244824e7fdSDimitry Andric // Check if all reaching kernels agree on the mode as we can otherwise 47254824e7fdSDimitry Andric // not guard instructions. We might not be sure about the mode so we 47264824e7fdSDimitry Andric // we cannot fix the internal spmd-zation state either. 47274824e7fdSDimitry Andric int SPMD = 0, Generic = 0; 47284824e7fdSDimitry Andric for (auto *Kernel : ReachingKernelEntries) { 472906c3fb27SDimitry Andric auto *CBAA = A.getAAFor<AAKernelInfo>( 47304824e7fdSDimitry Andric *this, IRPosition::function(*Kernel), DepClassTy::OPTIONAL); 473106c3fb27SDimitry Andric if (CBAA && CBAA->SPMDCompatibilityTracker.isValidState() && 473206c3fb27SDimitry Andric CBAA->SPMDCompatibilityTracker.isAssumed()) 47334824e7fdSDimitry Andric ++SPMD; 47344824e7fdSDimitry Andric else 47354824e7fdSDimitry Andric ++Generic; 473606c3fb27SDimitry Andric if (!CBAA || !CBAA->SPMDCompatibilityTracker.isAtFixpoint()) 47374824e7fdSDimitry Andric UsedAssumedInformationFromReachingKernels = true; 47384824e7fdSDimitry Andric } 47394824e7fdSDimitry Andric if (SPMD != 0 && Generic != 0) 47404824e7fdSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 47414824e7fdSDimitry Andric } 4742fe6060f1SDimitry Andric } 4743bdd1243dSDimitry Andric } 4744fe6060f1SDimitry Andric 4745fe6060f1SDimitry Andric // Callback to check a call instruction. 4746349cc55cSDimitry Andric bool AllParallelRegionStatesWereFixed = true; 4747fe6060f1SDimitry Andric bool AllSPMDStatesWereFixed = true; 4748fe6060f1SDimitry Andric auto CheckCallInst = [&](Instruction &I) { 4749fe6060f1SDimitry Andric auto &CB = cast<CallBase>(I); 475006c3fb27SDimitry Andric auto *CBAA = A.getAAFor<AAKernelInfo>( 4751fe6060f1SDimitry Andric *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL); 475206c3fb27SDimitry Andric if (!CBAA) 475306c3fb27SDimitry Andric return false; 475406c3fb27SDimitry Andric getState() ^= CBAA->getState(); 475506c3fb27SDimitry Andric AllSPMDStatesWereFixed &= CBAA->SPMDCompatibilityTracker.isAtFixpoint(); 4756349cc55cSDimitry Andric AllParallelRegionStatesWereFixed &= 475706c3fb27SDimitry Andric CBAA->ReachedKnownParallelRegions.isAtFixpoint(); 4758349cc55cSDimitry Andric AllParallelRegionStatesWereFixed &= 475906c3fb27SDimitry Andric CBAA->ReachedUnknownParallelRegions.isAtFixpoint(); 4760fe6060f1SDimitry Andric return true; 4761fe6060f1SDimitry Andric }; 4762fe6060f1SDimitry Andric 4763fe6060f1SDimitry Andric bool UsedAssumedInformationInCheckCallInst = false; 4764fe6060f1SDimitry Andric if (!A.checkForAllCallLikeInstructions( 4765349cc55cSDimitry Andric CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) { 4766349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << TAG 4767349cc55cSDimitry Andric << "Failed to visit all call-like instructions!\n";); 4768fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 4769349cc55cSDimitry Andric } 4770349cc55cSDimitry Andric 4771349cc55cSDimitry Andric // If we haven't used any assumed information for the reached parallel 4772349cc55cSDimitry Andric // region states we can fix it. 4773349cc55cSDimitry Andric if (!UsedAssumedInformationInCheckCallInst && 4774349cc55cSDimitry Andric AllParallelRegionStatesWereFixed) { 4775349cc55cSDimitry Andric ReachedKnownParallelRegions.indicateOptimisticFixpoint(); 4776349cc55cSDimitry Andric ReachedUnknownParallelRegions.indicateOptimisticFixpoint(); 4777349cc55cSDimitry Andric } 4778349cc55cSDimitry Andric 4779fe6060f1SDimitry Andric // If we haven't used any assumed information for the SPMD state we can fix 4780fe6060f1SDimitry Andric // it. 4781fe6060f1SDimitry Andric if (!UsedAssumedInformationInCheckRWInst && 47824824e7fdSDimitry Andric !UsedAssumedInformationInCheckCallInst && 47834824e7fdSDimitry Andric !UsedAssumedInformationFromReachingKernels && AllSPMDStatesWereFixed) 4784fe6060f1SDimitry Andric SPMDCompatibilityTracker.indicateOptimisticFixpoint(); 4785fe6060f1SDimitry Andric 4786fe6060f1SDimitry Andric return StateBefore == getState() ? ChangeStatus::UNCHANGED 4787fe6060f1SDimitry Andric : ChangeStatus::CHANGED; 4788fe6060f1SDimitry Andric } 4789fe6060f1SDimitry Andric 4790fe6060f1SDimitry Andric private: 4791fe6060f1SDimitry Andric /// Update info regarding reaching kernels. 47924824e7fdSDimitry Andric void updateReachingKernelEntries(Attributor &A, 47934824e7fdSDimitry Andric bool &AllReachingKernelsKnown) { 4794fe6060f1SDimitry Andric auto PredCallSite = [&](AbstractCallSite ACS) { 4795fe6060f1SDimitry Andric Function *Caller = ACS.getInstruction()->getFunction(); 4796fe6060f1SDimitry Andric 4797fe6060f1SDimitry Andric assert(Caller && "Caller is nullptr"); 4798fe6060f1SDimitry Andric 479906c3fb27SDimitry Andric auto *CAA = A.getOrCreateAAFor<AAKernelInfo>( 4800fe6060f1SDimitry Andric IRPosition::function(*Caller), this, DepClassTy::REQUIRED); 480106c3fb27SDimitry Andric if (CAA && CAA->ReachingKernelEntries.isValidState()) { 480206c3fb27SDimitry Andric ReachingKernelEntries ^= CAA->ReachingKernelEntries; 4803fe6060f1SDimitry Andric return true; 4804fe6060f1SDimitry Andric } 4805fe6060f1SDimitry Andric 4806fe6060f1SDimitry Andric // We lost track of the caller of the associated function, any kernel 4807fe6060f1SDimitry Andric // could reach now. 4808fe6060f1SDimitry Andric ReachingKernelEntries.indicatePessimisticFixpoint(); 4809fe6060f1SDimitry Andric 4810fe6060f1SDimitry Andric return true; 4811fe6060f1SDimitry Andric }; 4812fe6060f1SDimitry Andric 4813fe6060f1SDimitry Andric if (!A.checkForAllCallSites(PredCallSite, *this, 4814fe6060f1SDimitry Andric true /* RequireAllCallSites */, 48154824e7fdSDimitry Andric AllReachingKernelsKnown)) 4816fe6060f1SDimitry Andric ReachingKernelEntries.indicatePessimisticFixpoint(); 4817fe6060f1SDimitry Andric } 4818fe6060f1SDimitry Andric 4819fe6060f1SDimitry Andric /// Update info regarding parallel levels. 4820fe6060f1SDimitry Andric void updateParallelLevels(Attributor &A) { 4821fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 4822fe6060f1SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &Parallel51RFI = 4823fe6060f1SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; 4824fe6060f1SDimitry Andric 4825fe6060f1SDimitry Andric auto PredCallSite = [&](AbstractCallSite ACS) { 4826fe6060f1SDimitry Andric Function *Caller = ACS.getInstruction()->getFunction(); 4827fe6060f1SDimitry Andric 4828fe6060f1SDimitry Andric assert(Caller && "Caller is nullptr"); 4829fe6060f1SDimitry Andric 483006c3fb27SDimitry Andric auto *CAA = 4831fe6060f1SDimitry Andric A.getOrCreateAAFor<AAKernelInfo>(IRPosition::function(*Caller)); 483206c3fb27SDimitry Andric if (CAA && CAA->ParallelLevels.isValidState()) { 4833fe6060f1SDimitry Andric // Any function that is called by `__kmpc_parallel_51` will not be 4834fe6060f1SDimitry Andric // folded as the parallel level in the function is updated. In order to 4835fe6060f1SDimitry Andric // get it right, all the analysis would depend on the implentation. That 4836fe6060f1SDimitry Andric // said, if in the future any change to the implementation, the analysis 4837fe6060f1SDimitry Andric // could be wrong. As a consequence, we are just conservative here. 4838fe6060f1SDimitry Andric if (Caller == Parallel51RFI.Declaration) { 4839fe6060f1SDimitry Andric ParallelLevels.indicatePessimisticFixpoint(); 4840fe6060f1SDimitry Andric return true; 4841fe6060f1SDimitry Andric } 4842fe6060f1SDimitry Andric 484306c3fb27SDimitry Andric ParallelLevels ^= CAA->ParallelLevels; 4844fe6060f1SDimitry Andric 4845fe6060f1SDimitry Andric return true; 4846fe6060f1SDimitry Andric } 4847fe6060f1SDimitry Andric 4848fe6060f1SDimitry Andric // We lost track of the caller of the associated function, any kernel 4849fe6060f1SDimitry Andric // could reach now. 4850fe6060f1SDimitry Andric ParallelLevels.indicatePessimisticFixpoint(); 4851fe6060f1SDimitry Andric 4852fe6060f1SDimitry Andric return true; 4853fe6060f1SDimitry Andric }; 4854fe6060f1SDimitry Andric 4855fe6060f1SDimitry Andric bool AllCallSitesKnown = true; 4856fe6060f1SDimitry Andric if (!A.checkForAllCallSites(PredCallSite, *this, 4857fe6060f1SDimitry Andric true /* RequireAllCallSites */, 4858fe6060f1SDimitry Andric AllCallSitesKnown)) 4859fe6060f1SDimitry Andric ParallelLevels.indicatePessimisticFixpoint(); 4860fe6060f1SDimitry Andric } 4861fe6060f1SDimitry Andric }; 4862fe6060f1SDimitry Andric 4863fe6060f1SDimitry Andric /// The call site kernel info abstract attribute, basically, what can we say 4864fe6060f1SDimitry Andric /// about a call site with regards to the KernelInfoState. For now this simply 4865fe6060f1SDimitry Andric /// forwards the information from the callee. 4866fe6060f1SDimitry Andric struct AAKernelInfoCallSite : AAKernelInfo { 4867fe6060f1SDimitry Andric AAKernelInfoCallSite(const IRPosition &IRP, Attributor &A) 4868fe6060f1SDimitry Andric : AAKernelInfo(IRP, A) {} 4869fe6060f1SDimitry Andric 4870fe6060f1SDimitry Andric /// See AbstractAttribute::initialize(...). 4871fe6060f1SDimitry Andric void initialize(Attributor &A) override { 4872fe6060f1SDimitry Andric AAKernelInfo::initialize(A); 4873fe6060f1SDimitry Andric 4874fe6060f1SDimitry Andric CallBase &CB = cast<CallBase>(getAssociatedValue()); 487506c3fb27SDimitry Andric auto *AssumptionAA = A.getAAFor<AAAssumptionInfo>( 4876349cc55cSDimitry Andric *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL); 4877fe6060f1SDimitry Andric 4878fe6060f1SDimitry Andric // Check for SPMD-mode assumptions. 487906c3fb27SDimitry Andric if (AssumptionAA && AssumptionAA->hasAssumption("ompx_spmd_amenable")) { 4880349cc55cSDimitry Andric indicateOptimisticFixpoint(); 48815f757f3fSDimitry Andric return; 4882349cc55cSDimitry Andric } 4883fe6060f1SDimitry Andric 4884fe6060f1SDimitry Andric // First weed out calls we do not care about, that is readonly/readnone 4885fe6060f1SDimitry Andric // calls, intrinsics, and "no_openmp" calls. Neither of these can reach a 4886fe6060f1SDimitry Andric // parallel region or anything else we are looking for. 4887fe6060f1SDimitry Andric if (!CB.mayWriteToMemory() || isa<IntrinsicInst>(CB)) { 4888fe6060f1SDimitry Andric indicateOptimisticFixpoint(); 4889fe6060f1SDimitry Andric return; 4890fe6060f1SDimitry Andric } 4891fe6060f1SDimitry Andric 4892fe6060f1SDimitry Andric // Next we check if we know the callee. If it is a known OpenMP function 4893fe6060f1SDimitry Andric // we will handle them explicitly in the switch below. If it is not, we 4894fe6060f1SDimitry Andric // will use an AAKernelInfo object on the callee to gather information and 4895fe6060f1SDimitry Andric // merge that into the current state. The latter happens in the updateImpl. 48965f757f3fSDimitry Andric auto CheckCallee = [&](Function *Callee, unsigned NumCallees) { 4897fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 4898fe6060f1SDimitry Andric const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee); 4899fe6060f1SDimitry Andric if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) { 4900fe6060f1SDimitry Andric // Unknown caller or declarations are not analyzable, we give up. 4901fe6060f1SDimitry Andric if (!Callee || !A.isFunctionIPOAmendable(*Callee)) { 4902fe6060f1SDimitry Andric 4903fe6060f1SDimitry Andric // Unknown callees might contain parallel regions, except if they have 4904fe6060f1SDimitry Andric // an appropriate assumption attached. 490506c3fb27SDimitry Andric if (!AssumptionAA || 490606c3fb27SDimitry Andric !(AssumptionAA->hasAssumption("omp_no_openmp") || 490706c3fb27SDimitry Andric AssumptionAA->hasAssumption("omp_no_parallelism"))) 4908fe6060f1SDimitry Andric ReachedUnknownParallelRegions.insert(&CB); 4909fe6060f1SDimitry Andric 4910fe6060f1SDimitry Andric // If SPMDCompatibilityTracker is not fixed, we need to give up on the 4911fe6060f1SDimitry Andric // idea we can run something unknown in SPMD-mode. 4912349cc55cSDimitry Andric if (!SPMDCompatibilityTracker.isAtFixpoint()) { 4913349cc55cSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 4914fe6060f1SDimitry Andric SPMDCompatibilityTracker.insert(&CB); 4915349cc55cSDimitry Andric } 4916fe6060f1SDimitry Andric 49175f757f3fSDimitry Andric // We have updated the state for this unknown call properly, there 49185f757f3fSDimitry Andric // won't be any change so we indicate a fixpoint. 4919fe6060f1SDimitry Andric indicateOptimisticFixpoint(); 4920fe6060f1SDimitry Andric } 49215f757f3fSDimitry Andric // If the callee is known and can be used in IPO, we will update the 49225f757f3fSDimitry Andric // state based on the callee state in updateImpl. 49235f757f3fSDimitry Andric return; 49245f757f3fSDimitry Andric } 49255f757f3fSDimitry Andric if (NumCallees > 1) { 49265f757f3fSDimitry Andric indicatePessimisticFixpoint(); 4927fe6060f1SDimitry Andric return; 4928fe6060f1SDimitry Andric } 4929fe6060f1SDimitry Andric 4930fe6060f1SDimitry Andric RuntimeFunction RF = It->getSecond(); 4931fe6060f1SDimitry Andric switch (RF) { 4932fe6060f1SDimitry Andric // All the functions we know are compatible with SPMD mode. 4933fe6060f1SDimitry Andric case OMPRTL___kmpc_is_spmd_exec_mode: 4934349cc55cSDimitry Andric case OMPRTL___kmpc_distribute_static_fini: 4935fe6060f1SDimitry Andric case OMPRTL___kmpc_for_static_fini: 4936fe6060f1SDimitry Andric case OMPRTL___kmpc_global_thread_num: 4937fe6060f1SDimitry Andric case OMPRTL___kmpc_get_hardware_num_threads_in_block: 4938fe6060f1SDimitry Andric case OMPRTL___kmpc_get_hardware_num_blocks: 4939fe6060f1SDimitry Andric case OMPRTL___kmpc_single: 4940fe6060f1SDimitry Andric case OMPRTL___kmpc_end_single: 4941fe6060f1SDimitry Andric case OMPRTL___kmpc_master: 4942fe6060f1SDimitry Andric case OMPRTL___kmpc_end_master: 4943fe6060f1SDimitry Andric case OMPRTL___kmpc_barrier: 49440eae32dcSDimitry Andric case OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2: 49450eae32dcSDimitry Andric case OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2: 49465f757f3fSDimitry Andric case OMPRTL___kmpc_error: 49475f757f3fSDimitry Andric case OMPRTL___kmpc_flush: 49485f757f3fSDimitry Andric case OMPRTL___kmpc_get_hardware_thread_id_in_block: 49495f757f3fSDimitry Andric case OMPRTL___kmpc_get_warp_size: 49505f757f3fSDimitry Andric case OMPRTL_omp_get_thread_num: 49515f757f3fSDimitry Andric case OMPRTL_omp_get_num_threads: 49525f757f3fSDimitry Andric case OMPRTL_omp_get_max_threads: 49535f757f3fSDimitry Andric case OMPRTL_omp_in_parallel: 49545f757f3fSDimitry Andric case OMPRTL_omp_get_dynamic: 49555f757f3fSDimitry Andric case OMPRTL_omp_get_cancellation: 49565f757f3fSDimitry Andric case OMPRTL_omp_get_nested: 49575f757f3fSDimitry Andric case OMPRTL_omp_get_schedule: 49585f757f3fSDimitry Andric case OMPRTL_omp_get_thread_limit: 49595f757f3fSDimitry Andric case OMPRTL_omp_get_supported_active_levels: 49605f757f3fSDimitry Andric case OMPRTL_omp_get_max_active_levels: 49615f757f3fSDimitry Andric case OMPRTL_omp_get_level: 49625f757f3fSDimitry Andric case OMPRTL_omp_get_ancestor_thread_num: 49635f757f3fSDimitry Andric case OMPRTL_omp_get_team_size: 49645f757f3fSDimitry Andric case OMPRTL_omp_get_active_level: 49655f757f3fSDimitry Andric case OMPRTL_omp_in_final: 49665f757f3fSDimitry Andric case OMPRTL_omp_get_proc_bind: 49675f757f3fSDimitry Andric case OMPRTL_omp_get_num_places: 49685f757f3fSDimitry Andric case OMPRTL_omp_get_num_procs: 49695f757f3fSDimitry Andric case OMPRTL_omp_get_place_proc_ids: 49705f757f3fSDimitry Andric case OMPRTL_omp_get_place_num: 49715f757f3fSDimitry Andric case OMPRTL_omp_get_partition_num_places: 49725f757f3fSDimitry Andric case OMPRTL_omp_get_partition_place_nums: 49735f757f3fSDimitry Andric case OMPRTL_omp_get_wtime: 4974fe6060f1SDimitry Andric break; 4975349cc55cSDimitry Andric case OMPRTL___kmpc_distribute_static_init_4: 4976349cc55cSDimitry Andric case OMPRTL___kmpc_distribute_static_init_4u: 4977349cc55cSDimitry Andric case OMPRTL___kmpc_distribute_static_init_8: 4978349cc55cSDimitry Andric case OMPRTL___kmpc_distribute_static_init_8u: 4979fe6060f1SDimitry Andric case OMPRTL___kmpc_for_static_init_4: 4980fe6060f1SDimitry Andric case OMPRTL___kmpc_for_static_init_4u: 4981fe6060f1SDimitry Andric case OMPRTL___kmpc_for_static_init_8: 4982fe6060f1SDimitry Andric case OMPRTL___kmpc_for_static_init_8u: { 4983fe6060f1SDimitry Andric // Check the schedule and allow static schedule in SPMD mode. 4984fe6060f1SDimitry Andric unsigned ScheduleArgOpNo = 2; 4985fe6060f1SDimitry Andric auto *ScheduleTypeCI = 4986fe6060f1SDimitry Andric dyn_cast<ConstantInt>(CB.getArgOperand(ScheduleArgOpNo)); 4987fe6060f1SDimitry Andric unsigned ScheduleTypeVal = 4988fe6060f1SDimitry Andric ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0; 4989fe6060f1SDimitry Andric switch (OMPScheduleType(ScheduleTypeVal)) { 499081ad6265SDimitry Andric case OMPScheduleType::UnorderedStatic: 499181ad6265SDimitry Andric case OMPScheduleType::UnorderedStaticChunked: 499281ad6265SDimitry Andric case OMPScheduleType::OrderedDistribute: 499381ad6265SDimitry Andric case OMPScheduleType::OrderedDistributeChunked: 4994fe6060f1SDimitry Andric break; 4995fe6060f1SDimitry Andric default: 4996349cc55cSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 4997fe6060f1SDimitry Andric SPMDCompatibilityTracker.insert(&CB); 4998fe6060f1SDimitry Andric break; 4999fe6060f1SDimitry Andric }; 5000fe6060f1SDimitry Andric } break; 5001fe6060f1SDimitry Andric case OMPRTL___kmpc_target_init: 5002fe6060f1SDimitry Andric KernelInitCB = &CB; 5003fe6060f1SDimitry Andric break; 5004fe6060f1SDimitry Andric case OMPRTL___kmpc_target_deinit: 5005fe6060f1SDimitry Andric KernelDeinitCB = &CB; 5006fe6060f1SDimitry Andric break; 5007fe6060f1SDimitry Andric case OMPRTL___kmpc_parallel_51: 50085f757f3fSDimitry Andric if (!handleParallel51(A, CB)) 50095f757f3fSDimitry Andric indicatePessimisticFixpoint(); 50105f757f3fSDimitry Andric return; 5011fe6060f1SDimitry Andric case OMPRTL___kmpc_omp_task: 5012fe6060f1SDimitry Andric // We do not look into tasks right now, just give up. 50130eae32dcSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 5014fe6060f1SDimitry Andric SPMDCompatibilityTracker.insert(&CB); 5015fe6060f1SDimitry Andric ReachedUnknownParallelRegions.insert(&CB); 5016fe6060f1SDimitry Andric break; 5017fe6060f1SDimitry Andric case OMPRTL___kmpc_alloc_shared: 5018fe6060f1SDimitry Andric case OMPRTL___kmpc_free_shared: 5019fe6060f1SDimitry Andric // Return without setting a fixpoint, to be resolved in updateImpl. 5020fe6060f1SDimitry Andric return; 5021fe6060f1SDimitry Andric default: 5022fe6060f1SDimitry Andric // Unknown OpenMP runtime calls cannot be executed in SPMD-mode, 5023349cc55cSDimitry Andric // generally. However, they do not hide parallel regions. 50240eae32dcSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 5025fe6060f1SDimitry Andric SPMDCompatibilityTracker.insert(&CB); 5026fe6060f1SDimitry Andric break; 5027fe6060f1SDimitry Andric } 5028fe6060f1SDimitry Andric // All other OpenMP runtime calls will not reach parallel regions so they 50295f757f3fSDimitry Andric // can be safely ignored for now. Since it is a known OpenMP runtime call 50305f757f3fSDimitry Andric // we have now modeled all effects and there is no need for any update. 5031fe6060f1SDimitry Andric indicateOptimisticFixpoint(); 50325f757f3fSDimitry Andric }; 50335f757f3fSDimitry Andric 50345f757f3fSDimitry Andric const auto *AACE = 50355f757f3fSDimitry Andric A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::OPTIONAL); 50365f757f3fSDimitry Andric if (!AACE || !AACE->getState().isValidState() || AACE->hasUnknownCallee()) { 50375f757f3fSDimitry Andric CheckCallee(getAssociatedFunction(), 1); 50385f757f3fSDimitry Andric return; 50395f757f3fSDimitry Andric } 50405f757f3fSDimitry Andric const auto &OptimisticEdges = AACE->getOptimisticEdges(); 50415f757f3fSDimitry Andric for (auto *Callee : OptimisticEdges) { 50425f757f3fSDimitry Andric CheckCallee(Callee, OptimisticEdges.size()); 50435f757f3fSDimitry Andric if (isAtFixpoint()) 50445f757f3fSDimitry Andric break; 50455f757f3fSDimitry Andric } 5046fe6060f1SDimitry Andric } 5047fe6060f1SDimitry Andric 5048fe6060f1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 5049fe6060f1SDimitry Andric // TODO: Once we have call site specific value information we can provide 5050fe6060f1SDimitry Andric // call site specific liveness information and then it makes 5051fe6060f1SDimitry Andric // sense to specialize attributes for call sites arguments instead of 5052fe6060f1SDimitry Andric // redirecting requests to the callee argument. 5053fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 50545f757f3fSDimitry Andric KernelInfoState StateBefore = getState(); 50555f757f3fSDimitry Andric 50565f757f3fSDimitry Andric auto CheckCallee = [&](Function *F, int NumCallees) { 5057fe6060f1SDimitry Andric const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(F); 5058fe6060f1SDimitry Andric 50595f757f3fSDimitry Andric // If F is not a runtime function, propagate the AAKernelInfo of the 50605f757f3fSDimitry Andric // callee. 5061fe6060f1SDimitry Andric if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) { 5062fe6060f1SDimitry Andric const IRPosition &FnPos = IRPosition::function(*F); 50635f757f3fSDimitry Andric auto *FnAA = 50645f757f3fSDimitry Andric A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED); 506506c3fb27SDimitry Andric if (!FnAA) 506606c3fb27SDimitry Andric return indicatePessimisticFixpoint(); 506706c3fb27SDimitry Andric if (getState() == FnAA->getState()) 5068fe6060f1SDimitry Andric return ChangeStatus::UNCHANGED; 506906c3fb27SDimitry Andric getState() = FnAA->getState(); 5070fe6060f1SDimitry Andric return ChangeStatus::CHANGED; 5071fe6060f1SDimitry Andric } 50725f757f3fSDimitry Andric if (NumCallees > 1) 50735f757f3fSDimitry Andric return indicatePessimisticFixpoint(); 50745f757f3fSDimitry Andric 50755f757f3fSDimitry Andric CallBase &CB = cast<CallBase>(getAssociatedValue()); 50765f757f3fSDimitry Andric if (It->getSecond() == OMPRTL___kmpc_parallel_51) { 50775f757f3fSDimitry Andric if (!handleParallel51(A, CB)) 50785f757f3fSDimitry Andric return indicatePessimisticFixpoint(); 50795f757f3fSDimitry Andric return StateBefore == getState() ? ChangeStatus::UNCHANGED 50805f757f3fSDimitry Andric : ChangeStatus::CHANGED; 50815f757f3fSDimitry Andric } 5082fe6060f1SDimitry Andric 5083fe6060f1SDimitry Andric // F is a runtime function that allocates or frees memory, check 5084fe6060f1SDimitry Andric // AAHeapToStack and AAHeapToShared. 50855f757f3fSDimitry Andric assert( 50865f757f3fSDimitry Andric (It->getSecond() == OMPRTL___kmpc_alloc_shared || 5087fe6060f1SDimitry Andric It->getSecond() == OMPRTL___kmpc_free_shared) && 5088fe6060f1SDimitry Andric "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call"); 5089fe6060f1SDimitry Andric 509006c3fb27SDimitry Andric auto *HeapToStackAA = A.getAAFor<AAHeapToStack>( 5091fe6060f1SDimitry Andric *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL); 509206c3fb27SDimitry Andric auto *HeapToSharedAA = A.getAAFor<AAHeapToShared>( 5093fe6060f1SDimitry Andric *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL); 5094fe6060f1SDimitry Andric 5095fe6060f1SDimitry Andric RuntimeFunction RF = It->getSecond(); 5096fe6060f1SDimitry Andric 5097fe6060f1SDimitry Andric switch (RF) { 5098fe6060f1SDimitry Andric // If neither HeapToStack nor HeapToShared assume the call is removed, 5099fe6060f1SDimitry Andric // assume SPMD incompatibility. 5100fe6060f1SDimitry Andric case OMPRTL___kmpc_alloc_shared: 510106c3fb27SDimitry Andric if ((!HeapToStackAA || !HeapToStackAA->isAssumedHeapToStack(CB)) && 510206c3fb27SDimitry Andric (!HeapToSharedAA || !HeapToSharedAA->isAssumedHeapToShared(CB))) 5103fe6060f1SDimitry Andric SPMDCompatibilityTracker.insert(&CB); 5104fe6060f1SDimitry Andric break; 5105fe6060f1SDimitry Andric case OMPRTL___kmpc_free_shared: 510606c3fb27SDimitry Andric if ((!HeapToStackAA || 510706c3fb27SDimitry Andric !HeapToStackAA->isAssumedHeapToStackRemovedFree(CB)) && 510806c3fb27SDimitry Andric (!HeapToSharedAA || 510906c3fb27SDimitry Andric !HeapToSharedAA->isAssumedHeapToSharedRemovedFree(CB))) 5110fe6060f1SDimitry Andric SPMDCompatibilityTracker.insert(&CB); 5111fe6060f1SDimitry Andric break; 5112fe6060f1SDimitry Andric default: 51130eae32dcSDimitry Andric SPMDCompatibilityTracker.indicatePessimisticFixpoint(); 5114fe6060f1SDimitry Andric SPMDCompatibilityTracker.insert(&CB); 5115fe6060f1SDimitry Andric } 51165f757f3fSDimitry Andric return ChangeStatus::CHANGED; 51175f757f3fSDimitry Andric }; 51185f757f3fSDimitry Andric 51195f757f3fSDimitry Andric const auto *AACE = 51205f757f3fSDimitry Andric A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::OPTIONAL); 51215f757f3fSDimitry Andric if (!AACE || !AACE->getState().isValidState() || AACE->hasUnknownCallee()) { 51225f757f3fSDimitry Andric if (Function *F = getAssociatedFunction()) 51235f757f3fSDimitry Andric CheckCallee(F, /*NumCallees=*/1); 51245f757f3fSDimitry Andric } else { 51255f757f3fSDimitry Andric const auto &OptimisticEdges = AACE->getOptimisticEdges(); 51265f757f3fSDimitry Andric for (auto *Callee : OptimisticEdges) { 51275f757f3fSDimitry Andric CheckCallee(Callee, OptimisticEdges.size()); 51285f757f3fSDimitry Andric if (isAtFixpoint()) 51295f757f3fSDimitry Andric break; 51305f757f3fSDimitry Andric } 51315f757f3fSDimitry Andric } 5132fe6060f1SDimitry Andric 5133fe6060f1SDimitry Andric return StateBefore == getState() ? ChangeStatus::UNCHANGED 5134fe6060f1SDimitry Andric : ChangeStatus::CHANGED; 5135fe6060f1SDimitry Andric } 51365f757f3fSDimitry Andric 51375f757f3fSDimitry Andric /// Deal with a __kmpc_parallel_51 call (\p CB). Returns true if the call was 51385f757f3fSDimitry Andric /// handled, if a problem occurred, false is returned. 51395f757f3fSDimitry Andric bool handleParallel51(Attributor &A, CallBase &CB) { 51405f757f3fSDimitry Andric const unsigned int NonWrapperFunctionArgNo = 5; 51415f757f3fSDimitry Andric const unsigned int WrapperFunctionArgNo = 6; 51425f757f3fSDimitry Andric auto ParallelRegionOpArgNo = SPMDCompatibilityTracker.isAssumed() 51435f757f3fSDimitry Andric ? NonWrapperFunctionArgNo 51445f757f3fSDimitry Andric : WrapperFunctionArgNo; 51455f757f3fSDimitry Andric 51465f757f3fSDimitry Andric auto *ParallelRegion = dyn_cast<Function>( 51475f757f3fSDimitry Andric CB.getArgOperand(ParallelRegionOpArgNo)->stripPointerCasts()); 51485f757f3fSDimitry Andric if (!ParallelRegion) 51495f757f3fSDimitry Andric return false; 51505f757f3fSDimitry Andric 51515f757f3fSDimitry Andric ReachedKnownParallelRegions.insert(&CB); 51525f757f3fSDimitry Andric /// Check nested parallelism 51535f757f3fSDimitry Andric auto *FnAA = A.getAAFor<AAKernelInfo>( 51545f757f3fSDimitry Andric *this, IRPosition::function(*ParallelRegion), DepClassTy::OPTIONAL); 51555f757f3fSDimitry Andric NestedParallelism |= !FnAA || !FnAA->getState().isValidState() || 51565f757f3fSDimitry Andric !FnAA->ReachedKnownParallelRegions.empty() || 51575f757f3fSDimitry Andric !FnAA->ReachedKnownParallelRegions.isValidState() || 51585f757f3fSDimitry Andric !FnAA->ReachedUnknownParallelRegions.isValidState() || 51595f757f3fSDimitry Andric !FnAA->ReachedUnknownParallelRegions.empty(); 51605f757f3fSDimitry Andric return true; 51615f757f3fSDimitry Andric } 5162fe6060f1SDimitry Andric }; 5163fe6060f1SDimitry Andric 5164fe6060f1SDimitry Andric struct AAFoldRuntimeCall 5165fe6060f1SDimitry Andric : public StateWrapper<BooleanState, AbstractAttribute> { 5166fe6060f1SDimitry Andric using Base = StateWrapper<BooleanState, AbstractAttribute>; 5167fe6060f1SDimitry Andric 5168fe6060f1SDimitry Andric AAFoldRuntimeCall(const IRPosition &IRP, Attributor &A) : Base(IRP) {} 5169fe6060f1SDimitry Andric 5170fe6060f1SDimitry Andric /// Statistics are tracked as part of manifest for now. 5171fe6060f1SDimitry Andric void trackStatistics() const override {} 5172fe6060f1SDimitry Andric 5173fe6060f1SDimitry Andric /// Create an abstract attribute biew for the position \p IRP. 5174fe6060f1SDimitry Andric static AAFoldRuntimeCall &createForPosition(const IRPosition &IRP, 5175fe6060f1SDimitry Andric Attributor &A); 5176fe6060f1SDimitry Andric 5177fe6060f1SDimitry Andric /// See AbstractAttribute::getName() 5178fe6060f1SDimitry Andric const std::string getName() const override { return "AAFoldRuntimeCall"; } 5179fe6060f1SDimitry Andric 5180fe6060f1SDimitry Andric /// See AbstractAttribute::getIdAddr() 5181fe6060f1SDimitry Andric const char *getIdAddr() const override { return &ID; } 5182fe6060f1SDimitry Andric 5183fe6060f1SDimitry Andric /// This function should return true if the type of the \p AA is 5184fe6060f1SDimitry Andric /// AAFoldRuntimeCall 5185fe6060f1SDimitry Andric static bool classof(const AbstractAttribute *AA) { 5186fe6060f1SDimitry Andric return (AA->getIdAddr() == &ID); 5187fe6060f1SDimitry Andric } 5188fe6060f1SDimitry Andric 5189fe6060f1SDimitry Andric static const char ID; 5190fe6060f1SDimitry Andric }; 5191fe6060f1SDimitry Andric 5192fe6060f1SDimitry Andric struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall { 5193fe6060f1SDimitry Andric AAFoldRuntimeCallCallSiteReturned(const IRPosition &IRP, Attributor &A) 5194fe6060f1SDimitry Andric : AAFoldRuntimeCall(IRP, A) {} 5195fe6060f1SDimitry Andric 5196fe6060f1SDimitry Andric /// See AbstractAttribute::getAsStr() 519706c3fb27SDimitry Andric const std::string getAsStr(Attributor *) const override { 5198fe6060f1SDimitry Andric if (!isValidState()) 5199fe6060f1SDimitry Andric return "<invalid>"; 5200fe6060f1SDimitry Andric 5201fe6060f1SDimitry Andric std::string Str("simplified value: "); 5202fe6060f1SDimitry Andric 520381ad6265SDimitry Andric if (!SimplifiedValue) 5204fe6060f1SDimitry Andric return Str + std::string("none"); 5205fe6060f1SDimitry Andric 5206bdd1243dSDimitry Andric if (!*SimplifiedValue) 5207fe6060f1SDimitry Andric return Str + std::string("nullptr"); 5208fe6060f1SDimitry Andric 5209bdd1243dSDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(*SimplifiedValue)) 5210fe6060f1SDimitry Andric return Str + std::to_string(CI->getSExtValue()); 5211fe6060f1SDimitry Andric 5212fe6060f1SDimitry Andric return Str + std::string("unknown"); 5213fe6060f1SDimitry Andric } 5214fe6060f1SDimitry Andric 5215fe6060f1SDimitry Andric void initialize(Attributor &A) override { 5216349cc55cSDimitry Andric if (DisableOpenMPOptFolding) 5217349cc55cSDimitry Andric indicatePessimisticFixpoint(); 5218349cc55cSDimitry Andric 5219fe6060f1SDimitry Andric Function *Callee = getAssociatedFunction(); 5220fe6060f1SDimitry Andric 5221fe6060f1SDimitry Andric auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); 5222fe6060f1SDimitry Andric const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee); 5223fe6060f1SDimitry Andric assert(It != OMPInfoCache.RuntimeFunctionIDMap.end() && 5224fe6060f1SDimitry Andric "Expected a known OpenMP runtime function"); 5225fe6060f1SDimitry Andric 5226fe6060f1SDimitry Andric RFKind = It->getSecond(); 5227fe6060f1SDimitry Andric 5228fe6060f1SDimitry Andric CallBase &CB = cast<CallBase>(getAssociatedValue()); 5229fe6060f1SDimitry Andric A.registerSimplificationCallback( 5230fe6060f1SDimitry Andric IRPosition::callsite_returned(CB), 5231fe6060f1SDimitry Andric [&](const IRPosition &IRP, const AbstractAttribute *AA, 5232bdd1243dSDimitry Andric bool &UsedAssumedInformation) -> std::optional<Value *> { 523381ad6265SDimitry Andric assert((isValidState() || 5234bdd1243dSDimitry Andric (SimplifiedValue && *SimplifiedValue == nullptr)) && 5235fe6060f1SDimitry Andric "Unexpected invalid state!"); 5236fe6060f1SDimitry Andric 5237fe6060f1SDimitry Andric if (!isAtFixpoint()) { 5238fe6060f1SDimitry Andric UsedAssumedInformation = true; 5239fe6060f1SDimitry Andric if (AA) 5240fe6060f1SDimitry Andric A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); 5241fe6060f1SDimitry Andric } 5242fe6060f1SDimitry Andric return SimplifiedValue; 5243fe6060f1SDimitry Andric }); 5244fe6060f1SDimitry Andric } 5245fe6060f1SDimitry Andric 5246fe6060f1SDimitry Andric ChangeStatus updateImpl(Attributor &A) override { 5247fe6060f1SDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED; 5248fe6060f1SDimitry Andric switch (RFKind) { 5249fe6060f1SDimitry Andric case OMPRTL___kmpc_is_spmd_exec_mode: 5250fe6060f1SDimitry Andric Changed |= foldIsSPMDExecMode(A); 5251fe6060f1SDimitry Andric break; 5252fe6060f1SDimitry Andric case OMPRTL___kmpc_parallel_level: 5253fe6060f1SDimitry Andric Changed |= foldParallelLevel(A); 5254fe6060f1SDimitry Andric break; 5255fe6060f1SDimitry Andric case OMPRTL___kmpc_get_hardware_num_threads_in_block: 5256fe6060f1SDimitry Andric Changed = Changed | foldKernelFnAttribute(A, "omp_target_thread_limit"); 5257fe6060f1SDimitry Andric break; 5258fe6060f1SDimitry Andric case OMPRTL___kmpc_get_hardware_num_blocks: 5259fe6060f1SDimitry Andric Changed = Changed | foldKernelFnAttribute(A, "omp_target_num_teams"); 5260fe6060f1SDimitry Andric break; 5261fe6060f1SDimitry Andric default: 5262fe6060f1SDimitry Andric llvm_unreachable("Unhandled OpenMP runtime function!"); 5263fe6060f1SDimitry Andric } 5264fe6060f1SDimitry Andric 5265fe6060f1SDimitry Andric return Changed; 5266fe6060f1SDimitry Andric } 5267fe6060f1SDimitry Andric 5268fe6060f1SDimitry Andric ChangeStatus manifest(Attributor &A) override { 5269fe6060f1SDimitry Andric ChangeStatus Changed = ChangeStatus::UNCHANGED; 5270fe6060f1SDimitry Andric 527181ad6265SDimitry Andric if (SimplifiedValue && *SimplifiedValue) { 5272349cc55cSDimitry Andric Instruction &I = *getCtxI(); 527381ad6265SDimitry Andric A.changeAfterManifest(IRPosition::inst(I), **SimplifiedValue); 5274349cc55cSDimitry Andric A.deleteAfterManifest(I); 5275fe6060f1SDimitry Andric 5276349cc55cSDimitry Andric CallBase *CB = dyn_cast<CallBase>(&I); 5277349cc55cSDimitry Andric auto Remark = [&](OptimizationRemark OR) { 5278349cc55cSDimitry Andric if (auto *C = dyn_cast<ConstantInt>(*SimplifiedValue)) 5279349cc55cSDimitry Andric return OR << "Replacing OpenMP runtime call " 5280349cc55cSDimitry Andric << CB->getCalledFunction()->getName() << " with " 5281349cc55cSDimitry Andric << ore::NV("FoldedValue", C->getZExtValue()) << "."; 5282349cc55cSDimitry Andric return OR << "Replacing OpenMP runtime call " 5283349cc55cSDimitry Andric << CB->getCalledFunction()->getName() << "."; 5284349cc55cSDimitry Andric }; 5285349cc55cSDimitry Andric 5286349cc55cSDimitry Andric if (CB && EnableVerboseRemarks) 5287349cc55cSDimitry Andric A.emitRemark<OptimizationRemark>(CB, "OMP180", Remark); 5288349cc55cSDimitry Andric 5289349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Replacing runtime call: " << I << " with " 5290fe6060f1SDimitry Andric << **SimplifiedValue << "\n"); 5291fe6060f1SDimitry Andric 5292fe6060f1SDimitry Andric Changed = ChangeStatus::CHANGED; 5293fe6060f1SDimitry Andric } 5294fe6060f1SDimitry Andric 5295fe6060f1SDimitry Andric return Changed; 5296fe6060f1SDimitry Andric } 5297fe6060f1SDimitry Andric 5298fe6060f1SDimitry Andric ChangeStatus indicatePessimisticFixpoint() override { 5299fe6060f1SDimitry Andric SimplifiedValue = nullptr; 5300fe6060f1SDimitry Andric return AAFoldRuntimeCall::indicatePessimisticFixpoint(); 5301fe6060f1SDimitry Andric } 5302fe6060f1SDimitry Andric 5303fe6060f1SDimitry Andric private: 5304fe6060f1SDimitry Andric /// Fold __kmpc_is_spmd_exec_mode into a constant if possible. 5305fe6060f1SDimitry Andric ChangeStatus foldIsSPMDExecMode(Attributor &A) { 5306bdd1243dSDimitry Andric std::optional<Value *> SimplifiedValueBefore = SimplifiedValue; 5307fe6060f1SDimitry Andric 5308fe6060f1SDimitry Andric unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0; 5309fe6060f1SDimitry Andric unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0; 531006c3fb27SDimitry Andric auto *CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( 5311fe6060f1SDimitry Andric *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); 5312fe6060f1SDimitry Andric 531306c3fb27SDimitry Andric if (!CallerKernelInfoAA || 531406c3fb27SDimitry Andric !CallerKernelInfoAA->ReachingKernelEntries.isValidState()) 5315fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5316fe6060f1SDimitry Andric 531706c3fb27SDimitry Andric for (Kernel K : CallerKernelInfoAA->ReachingKernelEntries) { 531806c3fb27SDimitry Andric auto *AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K), 5319fe6060f1SDimitry Andric DepClassTy::REQUIRED); 5320fe6060f1SDimitry Andric 532106c3fb27SDimitry Andric if (!AA || !AA->isValidState()) { 5322fe6060f1SDimitry Andric SimplifiedValue = nullptr; 5323fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5324fe6060f1SDimitry Andric } 5325fe6060f1SDimitry Andric 532606c3fb27SDimitry Andric if (AA->SPMDCompatibilityTracker.isAssumed()) { 532706c3fb27SDimitry Andric if (AA->SPMDCompatibilityTracker.isAtFixpoint()) 5328fe6060f1SDimitry Andric ++KnownSPMDCount; 5329fe6060f1SDimitry Andric else 5330fe6060f1SDimitry Andric ++AssumedSPMDCount; 5331fe6060f1SDimitry Andric } else { 533206c3fb27SDimitry Andric if (AA->SPMDCompatibilityTracker.isAtFixpoint()) 5333fe6060f1SDimitry Andric ++KnownNonSPMDCount; 5334fe6060f1SDimitry Andric else 5335fe6060f1SDimitry Andric ++AssumedNonSPMDCount; 5336fe6060f1SDimitry Andric } 5337fe6060f1SDimitry Andric } 5338fe6060f1SDimitry Andric 5339fe6060f1SDimitry Andric if ((AssumedSPMDCount + KnownSPMDCount) && 5340fe6060f1SDimitry Andric (AssumedNonSPMDCount + KnownNonSPMDCount)) 5341fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5342fe6060f1SDimitry Andric 5343fe6060f1SDimitry Andric auto &Ctx = getAnchorValue().getContext(); 5344fe6060f1SDimitry Andric if (KnownSPMDCount || AssumedSPMDCount) { 5345fe6060f1SDimitry Andric assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && 5346fe6060f1SDimitry Andric "Expected only SPMD kernels!"); 5347fe6060f1SDimitry Andric // All reaching kernels are in SPMD mode. Update all function calls to 5348fe6060f1SDimitry Andric // __kmpc_is_spmd_exec_mode to 1. 5349fe6060f1SDimitry Andric SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true); 5350fe6060f1SDimitry Andric } else if (KnownNonSPMDCount || AssumedNonSPMDCount) { 5351fe6060f1SDimitry Andric assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 && 5352fe6060f1SDimitry Andric "Expected only non-SPMD kernels!"); 5353fe6060f1SDimitry Andric // All reaching kernels are in non-SPMD mode. Update all function 5354fe6060f1SDimitry Andric // calls to __kmpc_is_spmd_exec_mode to 0. 5355fe6060f1SDimitry Andric SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), false); 5356fe6060f1SDimitry Andric } else { 5357fe6060f1SDimitry Andric // We have empty reaching kernels, therefore we cannot tell if the 5358fe6060f1SDimitry Andric // associated call site can be folded. At this moment, SimplifiedValue 5359fe6060f1SDimitry Andric // must be none. 536081ad6265SDimitry Andric assert(!SimplifiedValue && "SimplifiedValue should be none"); 5361fe6060f1SDimitry Andric } 5362fe6060f1SDimitry Andric 5363fe6060f1SDimitry Andric return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED 5364fe6060f1SDimitry Andric : ChangeStatus::CHANGED; 5365fe6060f1SDimitry Andric } 5366fe6060f1SDimitry Andric 5367fe6060f1SDimitry Andric /// Fold __kmpc_parallel_level into a constant if possible. 5368fe6060f1SDimitry Andric ChangeStatus foldParallelLevel(Attributor &A) { 5369bdd1243dSDimitry Andric std::optional<Value *> SimplifiedValueBefore = SimplifiedValue; 5370fe6060f1SDimitry Andric 537106c3fb27SDimitry Andric auto *CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( 5372fe6060f1SDimitry Andric *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); 5373fe6060f1SDimitry Andric 537406c3fb27SDimitry Andric if (!CallerKernelInfoAA || 537506c3fb27SDimitry Andric !CallerKernelInfoAA->ParallelLevels.isValidState()) 5376fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5377fe6060f1SDimitry Andric 537806c3fb27SDimitry Andric if (!CallerKernelInfoAA->ReachingKernelEntries.isValidState()) 5379fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5380fe6060f1SDimitry Andric 538106c3fb27SDimitry Andric if (CallerKernelInfoAA->ReachingKernelEntries.empty()) { 538281ad6265SDimitry Andric assert(!SimplifiedValue && 5383fe6060f1SDimitry Andric "SimplifiedValue should keep none at this point"); 5384fe6060f1SDimitry Andric return ChangeStatus::UNCHANGED; 5385fe6060f1SDimitry Andric } 5386fe6060f1SDimitry Andric 5387fe6060f1SDimitry Andric unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0; 5388fe6060f1SDimitry Andric unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0; 538906c3fb27SDimitry Andric for (Kernel K : CallerKernelInfoAA->ReachingKernelEntries) { 539006c3fb27SDimitry Andric auto *AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K), 5391fe6060f1SDimitry Andric DepClassTy::REQUIRED); 539206c3fb27SDimitry Andric if (!AA || !AA->SPMDCompatibilityTracker.isValidState()) 5393fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5394fe6060f1SDimitry Andric 539506c3fb27SDimitry Andric if (AA->SPMDCompatibilityTracker.isAssumed()) { 539606c3fb27SDimitry Andric if (AA->SPMDCompatibilityTracker.isAtFixpoint()) 5397fe6060f1SDimitry Andric ++KnownSPMDCount; 5398fe6060f1SDimitry Andric else 5399fe6060f1SDimitry Andric ++AssumedSPMDCount; 5400fe6060f1SDimitry Andric } else { 540106c3fb27SDimitry Andric if (AA->SPMDCompatibilityTracker.isAtFixpoint()) 5402fe6060f1SDimitry Andric ++KnownNonSPMDCount; 5403fe6060f1SDimitry Andric else 5404fe6060f1SDimitry Andric ++AssumedNonSPMDCount; 5405fe6060f1SDimitry Andric } 5406fe6060f1SDimitry Andric } 5407fe6060f1SDimitry Andric 5408fe6060f1SDimitry Andric if ((AssumedSPMDCount + KnownSPMDCount) && 5409fe6060f1SDimitry Andric (AssumedNonSPMDCount + KnownNonSPMDCount)) 5410fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5411fe6060f1SDimitry Andric 5412fe6060f1SDimitry Andric auto &Ctx = getAnchorValue().getContext(); 5413fe6060f1SDimitry Andric // If the caller can only be reached by SPMD kernel entries, the parallel 5414fe6060f1SDimitry Andric // level is 1. Similarly, if the caller can only be reached by non-SPMD 5415fe6060f1SDimitry Andric // kernel entries, it is 0. 5416fe6060f1SDimitry Andric if (AssumedSPMDCount || KnownSPMDCount) { 5417fe6060f1SDimitry Andric assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && 5418fe6060f1SDimitry Andric "Expected only SPMD kernels!"); 5419fe6060f1SDimitry Andric SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1); 5420fe6060f1SDimitry Andric } else { 5421fe6060f1SDimitry Andric assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 && 5422fe6060f1SDimitry Andric "Expected only non-SPMD kernels!"); 5423fe6060f1SDimitry Andric SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 0); 5424fe6060f1SDimitry Andric } 5425fe6060f1SDimitry Andric return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED 5426fe6060f1SDimitry Andric : ChangeStatus::CHANGED; 5427fe6060f1SDimitry Andric } 5428fe6060f1SDimitry Andric 5429fe6060f1SDimitry Andric ChangeStatus foldKernelFnAttribute(Attributor &A, llvm::StringRef Attr) { 5430fe6060f1SDimitry Andric // Specialize only if all the calls agree with the attribute constant value 5431fe6060f1SDimitry Andric int32_t CurrentAttrValue = -1; 5432bdd1243dSDimitry Andric std::optional<Value *> SimplifiedValueBefore = SimplifiedValue; 5433fe6060f1SDimitry Andric 543406c3fb27SDimitry Andric auto *CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( 5435fe6060f1SDimitry Andric *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); 5436fe6060f1SDimitry Andric 543706c3fb27SDimitry Andric if (!CallerKernelInfoAA || 543806c3fb27SDimitry Andric !CallerKernelInfoAA->ReachingKernelEntries.isValidState()) 5439fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5440fe6060f1SDimitry Andric 5441fe6060f1SDimitry Andric // Iterate over the kernels that reach this function 544206c3fb27SDimitry Andric for (Kernel K : CallerKernelInfoAA->ReachingKernelEntries) { 5443bdd1243dSDimitry Andric int32_t NextAttrVal = K->getFnAttributeAsParsedInteger(Attr, -1); 5444fe6060f1SDimitry Andric 5445fe6060f1SDimitry Andric if (NextAttrVal == -1 || 5446fe6060f1SDimitry Andric (CurrentAttrValue != -1 && CurrentAttrValue != NextAttrVal)) 5447fe6060f1SDimitry Andric return indicatePessimisticFixpoint(); 5448fe6060f1SDimitry Andric CurrentAttrValue = NextAttrVal; 5449fe6060f1SDimitry Andric } 5450fe6060f1SDimitry Andric 5451fe6060f1SDimitry Andric if (CurrentAttrValue != -1) { 5452fe6060f1SDimitry Andric auto &Ctx = getAnchorValue().getContext(); 5453fe6060f1SDimitry Andric SimplifiedValue = 5454fe6060f1SDimitry Andric ConstantInt::get(Type::getInt32Ty(Ctx), CurrentAttrValue); 5455fe6060f1SDimitry Andric } 5456fe6060f1SDimitry Andric return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED 5457fe6060f1SDimitry Andric : ChangeStatus::CHANGED; 5458fe6060f1SDimitry Andric } 5459fe6060f1SDimitry Andric 5460fe6060f1SDimitry Andric /// An optional value the associated value is assumed to fold to. That is, we 5461fe6060f1SDimitry Andric /// assume the associated value (which is a call) can be replaced by this 5462fe6060f1SDimitry Andric /// simplified value. 5463bdd1243dSDimitry Andric std::optional<Value *> SimplifiedValue; 5464fe6060f1SDimitry Andric 5465fe6060f1SDimitry Andric /// The runtime function kind of the callee of the associated call site. 5466fe6060f1SDimitry Andric RuntimeFunction RFKind; 5467fe6060f1SDimitry Andric }; 5468fe6060f1SDimitry Andric 54695ffd83dbSDimitry Andric } // namespace 54705ffd83dbSDimitry Andric 5471fe6060f1SDimitry Andric /// Register folding callsite 5472fe6060f1SDimitry Andric void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) { 5473fe6060f1SDimitry Andric auto &RFI = OMPInfoCache.RFIs[RF]; 5474fe6060f1SDimitry Andric RFI.foreachUse(SCC, [&](Use &U, Function &F) { 5475fe6060f1SDimitry Andric CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &RFI); 5476fe6060f1SDimitry Andric if (!CI) 5477fe6060f1SDimitry Andric return false; 5478fe6060f1SDimitry Andric A.getOrCreateAAFor<AAFoldRuntimeCall>( 5479fe6060f1SDimitry Andric IRPosition::callsite_returned(*CI), /* QueryingAA */ nullptr, 5480fe6060f1SDimitry Andric DepClassTy::NONE, /* ForceUpdate */ false, 5481fe6060f1SDimitry Andric /* UpdateAfterInit */ false); 5482fe6060f1SDimitry Andric return false; 5483fe6060f1SDimitry Andric }); 5484fe6060f1SDimitry Andric } 5485fe6060f1SDimitry Andric 5486fe6060f1SDimitry Andric void OpenMPOpt::registerAAs(bool IsModulePass) { 5487fe6060f1SDimitry Andric if (SCC.empty()) 5488fe6060f1SDimitry Andric return; 548981ad6265SDimitry Andric 5490fe6060f1SDimitry Andric if (IsModulePass) { 5491fe6060f1SDimitry Andric // Ensure we create the AAKernelInfo AAs first and without triggering an 5492fe6060f1SDimitry Andric // update. This will make sure we register all value simplification 5493fe6060f1SDimitry Andric // callbacks before any other AA has the chance to create an AAValueSimplify 5494fe6060f1SDimitry Andric // or similar. 549581ad6265SDimitry Andric auto CreateKernelInfoCB = [&](Use &, Function &Kernel) { 5496fe6060f1SDimitry Andric A.getOrCreateAAFor<AAKernelInfo>( 549781ad6265SDimitry Andric IRPosition::function(Kernel), /* QueryingAA */ nullptr, 5498fe6060f1SDimitry Andric DepClassTy::NONE, /* ForceUpdate */ false, 5499fe6060f1SDimitry Andric /* UpdateAfterInit */ false); 550081ad6265SDimitry Andric return false; 550181ad6265SDimitry Andric }; 550281ad6265SDimitry Andric OMPInformationCache::RuntimeFunctionInfo &InitRFI = 550381ad6265SDimitry Andric OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; 550481ad6265SDimitry Andric InitRFI.foreachUse(SCC, CreateKernelInfoCB); 5505fe6060f1SDimitry Andric 5506fe6060f1SDimitry Andric registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode); 5507fe6060f1SDimitry Andric registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level); 5508fe6060f1SDimitry Andric registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_threads_in_block); 5509fe6060f1SDimitry Andric registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_blocks); 5510fe6060f1SDimitry Andric } 5511fe6060f1SDimitry Andric 5512fe6060f1SDimitry Andric // Create CallSite AA for all Getters. 5513bdd1243dSDimitry Andric if (DeduceICVValues) { 5514fe6060f1SDimitry Andric for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) { 5515fe6060f1SDimitry Andric auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)]; 5516fe6060f1SDimitry Andric 5517fe6060f1SDimitry Andric auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter]; 5518fe6060f1SDimitry Andric 5519fe6060f1SDimitry Andric auto CreateAA = [&](Use &U, Function &Caller) { 5520fe6060f1SDimitry Andric CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI); 5521fe6060f1SDimitry Andric if (!CI) 5522fe6060f1SDimitry Andric return false; 5523fe6060f1SDimitry Andric 5524fe6060f1SDimitry Andric auto &CB = cast<CallBase>(*CI); 5525fe6060f1SDimitry Andric 5526fe6060f1SDimitry Andric IRPosition CBPos = IRPosition::callsite_function(CB); 5527fe6060f1SDimitry Andric A.getOrCreateAAFor<AAICVTracker>(CBPos); 5528fe6060f1SDimitry Andric return false; 5529fe6060f1SDimitry Andric }; 5530fe6060f1SDimitry Andric 5531fe6060f1SDimitry Andric GetterRFI.foreachUse(SCC, CreateAA); 5532fe6060f1SDimitry Andric } 5533bdd1243dSDimitry Andric } 5534fe6060f1SDimitry Andric 5535fe6060f1SDimitry Andric // Create an ExecutionDomain AA for every function and a HeapToStack AA for 5536fe6060f1SDimitry Andric // every function if there is a device kernel. 5537fe6060f1SDimitry Andric if (!isOpenMPDevice(M)) 5538fe6060f1SDimitry Andric return; 5539fe6060f1SDimitry Andric 5540fe6060f1SDimitry Andric for (auto *F : SCC) { 5541fe6060f1SDimitry Andric if (F->isDeclaration()) 5542fe6060f1SDimitry Andric continue; 5543fe6060f1SDimitry Andric 5544bdd1243dSDimitry Andric // We look at internal functions only on-demand but if any use is not a 5545bdd1243dSDimitry Andric // direct call or outside the current set of analyzed functions, we have 5546bdd1243dSDimitry Andric // to do it eagerly. 5547bdd1243dSDimitry Andric if (F->hasLocalLinkage()) { 5548bdd1243dSDimitry Andric if (llvm::all_of(F->uses(), [this](const Use &U) { 5549bdd1243dSDimitry Andric const auto *CB = dyn_cast<CallBase>(U.getUser()); 5550bdd1243dSDimitry Andric return CB && CB->isCallee(&U) && 5551bdd1243dSDimitry Andric A.isRunOn(const_cast<Function *>(CB->getCaller())); 5552bdd1243dSDimitry Andric })) 5553bdd1243dSDimitry Andric continue; 5554bdd1243dSDimitry Andric } 5555bdd1243dSDimitry Andric registerAAsForFunction(A, *F); 5556bdd1243dSDimitry Andric } 5557bdd1243dSDimitry Andric } 5558fe6060f1SDimitry Andric 5559bdd1243dSDimitry Andric void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) { 5560bdd1243dSDimitry Andric if (!DisableOpenMPOptDeglobalization) 5561bdd1243dSDimitry Andric A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F)); 5562bdd1243dSDimitry Andric A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(F)); 5563bdd1243dSDimitry Andric if (!DisableOpenMPOptDeglobalization) 5564bdd1243dSDimitry Andric A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(F)); 556506c3fb27SDimitry Andric if (F.hasFnAttribute(Attribute::Convergent)) 556606c3fb27SDimitry Andric A.getOrCreateAAFor<AANonConvergent>(IRPosition::function(F)); 5567bdd1243dSDimitry Andric 5568bdd1243dSDimitry Andric for (auto &I : instructions(F)) { 5569fe6060f1SDimitry Andric if (auto *LI = dyn_cast<LoadInst>(&I)) { 5570fe6060f1SDimitry Andric bool UsedAssumedInformation = false; 5571fe6060f1SDimitry Andric A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr, 5572fcaf7f86SDimitry Andric UsedAssumedInformation, AA::Interprocedural); 5573bdd1243dSDimitry Andric continue; 5574bdd1243dSDimitry Andric } 55755f757f3fSDimitry Andric if (auto *CI = dyn_cast<CallBase>(&I)) { 55765f757f3fSDimitry Andric if (CI->isIndirectCall()) 55775f757f3fSDimitry Andric A.getOrCreateAAFor<AAIndirectCallInfo>( 55785f757f3fSDimitry Andric IRPosition::callsite_function(*CI)); 55795f757f3fSDimitry Andric } 5580bdd1243dSDimitry Andric if (auto *SI = dyn_cast<StoreInst>(&I)) { 558104eeddc0SDimitry Andric A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI)); 5582bdd1243dSDimitry Andric continue; 5583bdd1243dSDimitry Andric } 558406c3fb27SDimitry Andric if (auto *FI = dyn_cast<FenceInst>(&I)) { 558506c3fb27SDimitry Andric A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*FI)); 558606c3fb27SDimitry Andric continue; 558706c3fb27SDimitry Andric } 5588bdd1243dSDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(&I)) { 5589bdd1243dSDimitry Andric if (II->getIntrinsicID() == Intrinsic::assume) { 5590bdd1243dSDimitry Andric A.getOrCreateAAFor<AAPotentialValues>( 5591bdd1243dSDimitry Andric IRPosition::value(*II->getArgOperand(0))); 5592bdd1243dSDimitry Andric continue; 5593fe6060f1SDimitry Andric } 5594fe6060f1SDimitry Andric } 5595fe6060f1SDimitry Andric } 5596fe6060f1SDimitry Andric } 5597fe6060f1SDimitry Andric 55985ffd83dbSDimitry Andric const char AAICVTracker::ID = 0; 5599fe6060f1SDimitry Andric const char AAKernelInfo::ID = 0; 5600fe6060f1SDimitry Andric const char AAExecutionDomain::ID = 0; 5601fe6060f1SDimitry Andric const char AAHeapToShared::ID = 0; 5602fe6060f1SDimitry Andric const char AAFoldRuntimeCall::ID = 0; 56035ffd83dbSDimitry Andric 56045ffd83dbSDimitry Andric AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP, 56055ffd83dbSDimitry Andric Attributor &A) { 56065ffd83dbSDimitry Andric AAICVTracker *AA = nullptr; 56075ffd83dbSDimitry Andric switch (IRP.getPositionKind()) { 56085ffd83dbSDimitry Andric case IRPosition::IRP_INVALID: 56095ffd83dbSDimitry Andric case IRPosition::IRP_FLOAT: 56105ffd83dbSDimitry Andric case IRPosition::IRP_ARGUMENT: 56115ffd83dbSDimitry Andric case IRPosition::IRP_CALL_SITE_ARGUMENT: 56125ffd83dbSDimitry Andric llvm_unreachable("ICVTracker can only be created for function position!"); 5613e8d8bef9SDimitry Andric case IRPosition::IRP_RETURNED: 5614e8d8bef9SDimitry Andric AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A); 5615e8d8bef9SDimitry Andric break; 5616e8d8bef9SDimitry Andric case IRPosition::IRP_CALL_SITE_RETURNED: 5617e8d8bef9SDimitry Andric AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A); 5618e8d8bef9SDimitry Andric break; 5619e8d8bef9SDimitry Andric case IRPosition::IRP_CALL_SITE: 5620e8d8bef9SDimitry Andric AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A); 5621e8d8bef9SDimitry Andric break; 56225ffd83dbSDimitry Andric case IRPosition::IRP_FUNCTION: 56235ffd83dbSDimitry Andric AA = new (A.Allocator) AAICVTrackerFunction(IRP, A); 56245ffd83dbSDimitry Andric break; 56255ffd83dbSDimitry Andric } 56265ffd83dbSDimitry Andric 56275ffd83dbSDimitry Andric return *AA; 56285ffd83dbSDimitry Andric } 56295ffd83dbSDimitry Andric 5630fe6060f1SDimitry Andric AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP, 5631fe6060f1SDimitry Andric Attributor &A) { 5632fe6060f1SDimitry Andric AAExecutionDomainFunction *AA = nullptr; 5633fe6060f1SDimitry Andric switch (IRP.getPositionKind()) { 5634fe6060f1SDimitry Andric case IRPosition::IRP_INVALID: 5635fe6060f1SDimitry Andric case IRPosition::IRP_FLOAT: 5636fe6060f1SDimitry Andric case IRPosition::IRP_ARGUMENT: 5637fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE_ARGUMENT: 5638fe6060f1SDimitry Andric case IRPosition::IRP_RETURNED: 5639fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE_RETURNED: 5640fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE: 5641fe6060f1SDimitry Andric llvm_unreachable( 5642fe6060f1SDimitry Andric "AAExecutionDomain can only be created for function position!"); 5643fe6060f1SDimitry Andric case IRPosition::IRP_FUNCTION: 5644fe6060f1SDimitry Andric AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A); 5645fe6060f1SDimitry Andric break; 5646fe6060f1SDimitry Andric } 5647fe6060f1SDimitry Andric 5648fe6060f1SDimitry Andric return *AA; 5649fe6060f1SDimitry Andric } 5650fe6060f1SDimitry Andric 5651fe6060f1SDimitry Andric AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP, 5652fe6060f1SDimitry Andric Attributor &A) { 5653fe6060f1SDimitry Andric AAHeapToSharedFunction *AA = nullptr; 5654fe6060f1SDimitry Andric switch (IRP.getPositionKind()) { 5655fe6060f1SDimitry Andric case IRPosition::IRP_INVALID: 5656fe6060f1SDimitry Andric case IRPosition::IRP_FLOAT: 5657fe6060f1SDimitry Andric case IRPosition::IRP_ARGUMENT: 5658fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE_ARGUMENT: 5659fe6060f1SDimitry Andric case IRPosition::IRP_RETURNED: 5660fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE_RETURNED: 5661fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE: 5662fe6060f1SDimitry Andric llvm_unreachable( 5663fe6060f1SDimitry Andric "AAHeapToShared can only be created for function position!"); 5664fe6060f1SDimitry Andric case IRPosition::IRP_FUNCTION: 5665fe6060f1SDimitry Andric AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A); 5666fe6060f1SDimitry Andric break; 5667fe6060f1SDimitry Andric } 5668fe6060f1SDimitry Andric 5669fe6060f1SDimitry Andric return *AA; 5670fe6060f1SDimitry Andric } 5671fe6060f1SDimitry Andric 5672fe6060f1SDimitry Andric AAKernelInfo &AAKernelInfo::createForPosition(const IRPosition &IRP, 5673fe6060f1SDimitry Andric Attributor &A) { 5674fe6060f1SDimitry Andric AAKernelInfo *AA = nullptr; 5675fe6060f1SDimitry Andric switch (IRP.getPositionKind()) { 5676fe6060f1SDimitry Andric case IRPosition::IRP_INVALID: 5677fe6060f1SDimitry Andric case IRPosition::IRP_FLOAT: 5678fe6060f1SDimitry Andric case IRPosition::IRP_ARGUMENT: 5679fe6060f1SDimitry Andric case IRPosition::IRP_RETURNED: 5680fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE_RETURNED: 5681fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE_ARGUMENT: 5682fe6060f1SDimitry Andric llvm_unreachable("KernelInfo can only be created for function position!"); 5683fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE: 5684fe6060f1SDimitry Andric AA = new (A.Allocator) AAKernelInfoCallSite(IRP, A); 5685fe6060f1SDimitry Andric break; 5686fe6060f1SDimitry Andric case IRPosition::IRP_FUNCTION: 5687fe6060f1SDimitry Andric AA = new (A.Allocator) AAKernelInfoFunction(IRP, A); 5688fe6060f1SDimitry Andric break; 5689fe6060f1SDimitry Andric } 5690fe6060f1SDimitry Andric 5691fe6060f1SDimitry Andric return *AA; 5692fe6060f1SDimitry Andric } 5693fe6060f1SDimitry Andric 5694fe6060f1SDimitry Andric AAFoldRuntimeCall &AAFoldRuntimeCall::createForPosition(const IRPosition &IRP, 5695fe6060f1SDimitry Andric Attributor &A) { 5696fe6060f1SDimitry Andric AAFoldRuntimeCall *AA = nullptr; 5697fe6060f1SDimitry Andric switch (IRP.getPositionKind()) { 5698fe6060f1SDimitry Andric case IRPosition::IRP_INVALID: 5699fe6060f1SDimitry Andric case IRPosition::IRP_FLOAT: 5700fe6060f1SDimitry Andric case IRPosition::IRP_ARGUMENT: 5701fe6060f1SDimitry Andric case IRPosition::IRP_RETURNED: 5702fe6060f1SDimitry Andric case IRPosition::IRP_FUNCTION: 5703fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE: 5704fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE_ARGUMENT: 5705fe6060f1SDimitry Andric llvm_unreachable("KernelInfo can only be created for call site position!"); 5706fe6060f1SDimitry Andric case IRPosition::IRP_CALL_SITE_RETURNED: 5707fe6060f1SDimitry Andric AA = new (A.Allocator) AAFoldRuntimeCallCallSiteReturned(IRP, A); 5708fe6060f1SDimitry Andric break; 5709fe6060f1SDimitry Andric } 5710fe6060f1SDimitry Andric 5711fe6060f1SDimitry Andric return *AA; 5712fe6060f1SDimitry Andric } 5713fe6060f1SDimitry Andric 5714fe6060f1SDimitry Andric PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { 5715fe6060f1SDimitry Andric if (!containsOpenMP(M)) 5716fe6060f1SDimitry Andric return PreservedAnalyses::all(); 5717fe6060f1SDimitry Andric if (DisableOpenMPOptimizations) 57185ffd83dbSDimitry Andric return PreservedAnalyses::all(); 57195ffd83dbSDimitry Andric 5720fe6060f1SDimitry Andric FunctionAnalysisManager &FAM = 5721fe6060f1SDimitry Andric AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 5722fe6060f1SDimitry Andric KernelSet Kernels = getDeviceKernels(M); 5723fe6060f1SDimitry Andric 572481ad6265SDimitry Andric if (PrintModuleBeforeOptimizations) 572581ad6265SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt Module Pass:\n" << M); 572681ad6265SDimitry Andric 5727fe6060f1SDimitry Andric auto IsCalled = [&](Function &F) { 5728fe6060f1SDimitry Andric if (Kernels.contains(&F)) 5729fe6060f1SDimitry Andric return true; 5730fe6060f1SDimitry Andric for (const User *U : F.users()) 5731fe6060f1SDimitry Andric if (!isa<BlockAddress>(U)) 5732fe6060f1SDimitry Andric return true; 5733fe6060f1SDimitry Andric return false; 5734fe6060f1SDimitry Andric }; 5735fe6060f1SDimitry Andric 5736fe6060f1SDimitry Andric auto EmitRemark = [&](Function &F) { 5737fe6060f1SDimitry Andric auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 5738fe6060f1SDimitry Andric ORE.emit([&]() { 5739fe6060f1SDimitry Andric OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F); 5740fe6060f1SDimitry Andric return ORA << "Could not internalize function. " 57416e75b2fbSDimitry Andric << "Some optimizations may not be possible. [OMP140]"; 5742fe6060f1SDimitry Andric }); 5743fe6060f1SDimitry Andric }; 5744fe6060f1SDimitry Andric 574506c3fb27SDimitry Andric bool Changed = false; 574606c3fb27SDimitry Andric 5747fe6060f1SDimitry Andric // Create internal copies of each function if this is a kernel Module. This 5748fe6060f1SDimitry Andric // allows iterprocedural passes to see every call edge. 57496e75b2fbSDimitry Andric DenseMap<Function *, Function *> InternalizedMap; 57506e75b2fbSDimitry Andric if (isOpenMPDevice(M)) { 57516e75b2fbSDimitry Andric SmallPtrSet<Function *, 16> InternalizeFns; 5752fe6060f1SDimitry Andric for (Function &F : M) 5753fe6060f1SDimitry Andric if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) && 5754fe6060f1SDimitry Andric !DisableInternalization) { 57556e75b2fbSDimitry Andric if (Attributor::isInternalizable(F)) { 57566e75b2fbSDimitry Andric InternalizeFns.insert(&F); 5757fe6060f1SDimitry Andric } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) { 5758fe6060f1SDimitry Andric EmitRemark(F); 5759fe6060f1SDimitry Andric } 5760fe6060f1SDimitry Andric } 5761fe6060f1SDimitry Andric 576206c3fb27SDimitry Andric Changed |= 57636e75b2fbSDimitry Andric Attributor::internalizeFunctions(InternalizeFns, InternalizedMap); 57646e75b2fbSDimitry Andric } 57656e75b2fbSDimitry Andric 5766fe6060f1SDimitry Andric // Look at every function in the Module unless it was internalized. 5767bdd1243dSDimitry Andric SetVector<Function *> Functions; 5768fe6060f1SDimitry Andric SmallVector<Function *, 16> SCC; 5769fe6060f1SDimitry Andric for (Function &F : M) 5770bdd1243dSDimitry Andric if (!F.isDeclaration() && !InternalizedMap.lookup(&F)) { 5771fe6060f1SDimitry Andric SCC.push_back(&F); 5772bdd1243dSDimitry Andric Functions.insert(&F); 5773bdd1243dSDimitry Andric } 5774fe6060f1SDimitry Andric 5775fe6060f1SDimitry Andric if (SCC.empty()) 577606c3fb27SDimitry Andric return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 5777fe6060f1SDimitry Andric 5778fe6060f1SDimitry Andric AnalysisGetter AG(FAM); 5779fe6060f1SDimitry Andric 5780fe6060f1SDimitry Andric auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { 5781fe6060f1SDimitry Andric return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 5782fe6060f1SDimitry Andric }; 5783fe6060f1SDimitry Andric 5784fe6060f1SDimitry Andric BumpPtrAllocator Allocator; 5785fe6060f1SDimitry Andric CallGraphUpdater CGUpdater; 5786fe6060f1SDimitry Andric 57871ac55f4cSDimitry Andric bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink || 57881ac55f4cSDimitry Andric LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink; 578906c3fb27SDimitry Andric OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, PostLink); 5790fe6060f1SDimitry Andric 5791349cc55cSDimitry Andric unsigned MaxFixpointIterations = 5792349cc55cSDimitry Andric (isOpenMPDevice(M)) ? SetFixpointIterations : 32; 579381ad6265SDimitry Andric 579481ad6265SDimitry Andric AttributorConfig AC(CGUpdater); 579581ad6265SDimitry Andric AC.DefaultInitializeLiveInternals = false; 5796bdd1243dSDimitry Andric AC.IsModulePass = true; 579781ad6265SDimitry Andric AC.RewriteSignatures = false; 579881ad6265SDimitry Andric AC.MaxFixpointIterations = MaxFixpointIterations; 579981ad6265SDimitry Andric AC.OREGetter = OREGetter; 580081ad6265SDimitry Andric AC.PassName = DEBUG_TYPE; 5801bdd1243dSDimitry Andric AC.InitializationCallback = OpenMPOpt::registerAAsForFunction; 580206c3fb27SDimitry Andric AC.IPOAmendableCB = [](const Function &F) { 580306c3fb27SDimitry Andric return F.hasFnAttribute("kernel"); 580406c3fb27SDimitry Andric }; 580581ad6265SDimitry Andric 580681ad6265SDimitry Andric Attributor A(Functions, InfoCache, AC); 5807fe6060f1SDimitry Andric 5808fe6060f1SDimitry Andric OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); 580906c3fb27SDimitry Andric Changed |= OMPOpt.run(true); 5810349cc55cSDimitry Andric 5811349cc55cSDimitry Andric // Optionally inline device functions for potentially better performance. 5812349cc55cSDimitry Andric if (AlwaysInlineDeviceFunctions && isOpenMPDevice(M)) 5813349cc55cSDimitry Andric for (Function &F : M) 5814349cc55cSDimitry Andric if (!F.isDeclaration() && !Kernels.contains(&F) && 5815349cc55cSDimitry Andric !F.hasFnAttribute(Attribute::NoInline)) 5816349cc55cSDimitry Andric F.addFnAttr(Attribute::AlwaysInline); 5817349cc55cSDimitry Andric 5818349cc55cSDimitry Andric if (PrintModuleAfterOptimizations) 5819349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M); 5820349cc55cSDimitry Andric 5821fe6060f1SDimitry Andric if (Changed) 5822fe6060f1SDimitry Andric return PreservedAnalyses::none(); 5823fe6060f1SDimitry Andric 5824fe6060f1SDimitry Andric return PreservedAnalyses::all(); 5825fe6060f1SDimitry Andric } 5826fe6060f1SDimitry Andric 5827fe6060f1SDimitry Andric PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, 5828fe6060f1SDimitry Andric CGSCCAnalysisManager &AM, 5829fe6060f1SDimitry Andric LazyCallGraph &CG, 5830fe6060f1SDimitry Andric CGSCCUpdateResult &UR) { 5831fe6060f1SDimitry Andric if (!containsOpenMP(*C.begin()->getFunction().getParent())) 5832fe6060f1SDimitry Andric return PreservedAnalyses::all(); 58335ffd83dbSDimitry Andric if (DisableOpenMPOptimizations) 58345ffd83dbSDimitry Andric return PreservedAnalyses::all(); 58355ffd83dbSDimitry Andric 58365ffd83dbSDimitry Andric SmallVector<Function *, 16> SCC; 5837e8d8bef9SDimitry Andric // If there are kernels in the module, we have to run on all SCC's. 5838e8d8bef9SDimitry Andric for (LazyCallGraph::Node &N : C) { 5839e8d8bef9SDimitry Andric Function *Fn = &N.getFunction(); 5840e8d8bef9SDimitry Andric SCC.push_back(Fn); 5841e8d8bef9SDimitry Andric } 5842e8d8bef9SDimitry Andric 5843fe6060f1SDimitry Andric if (SCC.empty()) 58445ffd83dbSDimitry Andric return PreservedAnalyses::all(); 58455ffd83dbSDimitry Andric 5846fe6060f1SDimitry Andric Module &M = *C.begin()->getFunction().getParent(); 5847fe6060f1SDimitry Andric 584881ad6265SDimitry Andric if (PrintModuleBeforeOptimizations) 584981ad6265SDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt CGSCC Pass:\n" << M); 585081ad6265SDimitry Andric 5851fe6060f1SDimitry Andric KernelSet Kernels = getDeviceKernels(M); 5852fe6060f1SDimitry Andric 58535ffd83dbSDimitry Andric FunctionAnalysisManager &FAM = 58545ffd83dbSDimitry Andric AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 58555ffd83dbSDimitry Andric 58565ffd83dbSDimitry Andric AnalysisGetter AG(FAM); 58575ffd83dbSDimitry Andric 58585ffd83dbSDimitry Andric auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { 58595ffd83dbSDimitry Andric return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); 58605ffd83dbSDimitry Andric }; 58615ffd83dbSDimitry Andric 5862fe6060f1SDimitry Andric BumpPtrAllocator Allocator; 58635ffd83dbSDimitry Andric CallGraphUpdater CGUpdater; 58645ffd83dbSDimitry Andric CGUpdater.initialize(CG, C, AM, UR); 58655ffd83dbSDimitry Andric 58661ac55f4cSDimitry Andric bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink || 58671ac55f4cSDimitry Andric LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink; 58685ffd83dbSDimitry Andric SetVector<Function *> Functions(SCC.begin(), SCC.end()); 58695ffd83dbSDimitry Andric OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, 587006c3fb27SDimitry Andric /*CGSCC*/ &Functions, PostLink); 58715ffd83dbSDimitry Andric 5872349cc55cSDimitry Andric unsigned MaxFixpointIterations = 5873349cc55cSDimitry Andric (isOpenMPDevice(M)) ? SetFixpointIterations : 32; 587481ad6265SDimitry Andric 587581ad6265SDimitry Andric AttributorConfig AC(CGUpdater); 587681ad6265SDimitry Andric AC.DefaultInitializeLiveInternals = false; 587781ad6265SDimitry Andric AC.IsModulePass = false; 587881ad6265SDimitry Andric AC.RewriteSignatures = false; 587981ad6265SDimitry Andric AC.MaxFixpointIterations = MaxFixpointIterations; 588081ad6265SDimitry Andric AC.OREGetter = OREGetter; 588181ad6265SDimitry Andric AC.PassName = DEBUG_TYPE; 5882bdd1243dSDimitry Andric AC.InitializationCallback = OpenMPOpt::registerAAsForFunction; 588381ad6265SDimitry Andric 588481ad6265SDimitry Andric Attributor A(Functions, InfoCache, AC); 58855ffd83dbSDimitry Andric 58865ffd83dbSDimitry Andric OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); 5887fe6060f1SDimitry Andric bool Changed = OMPOpt.run(false); 5888349cc55cSDimitry Andric 5889349cc55cSDimitry Andric if (PrintModuleAfterOptimizations) 5890349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M); 5891349cc55cSDimitry Andric 58925ffd83dbSDimitry Andric if (Changed) 58935ffd83dbSDimitry Andric return PreservedAnalyses::none(); 58945ffd83dbSDimitry Andric 58955ffd83dbSDimitry Andric return PreservedAnalyses::all(); 58965ffd83dbSDimitry Andric } 58975ffd83dbSDimitry Andric 58985f757f3fSDimitry Andric bool llvm::omp::isOpenMPKernel(Function &Fn) { 58995f757f3fSDimitry Andric return Fn.hasFnAttribute("kernel"); 59005f757f3fSDimitry Andric } 590106c3fb27SDimitry Andric 5902fe6060f1SDimitry Andric KernelSet llvm::omp::getDeviceKernels(Module &M) { 5903fe6060f1SDimitry Andric // TODO: Create a more cross-platform way of determining device kernels. 5904bdd1243dSDimitry Andric NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations"); 5905fe6060f1SDimitry Andric KernelSet Kernels; 5906fe6060f1SDimitry Andric 59075ffd83dbSDimitry Andric if (!MD) 5908fe6060f1SDimitry Andric return Kernels; 59095ffd83dbSDimitry Andric 59105ffd83dbSDimitry Andric for (auto *Op : MD->operands()) { 59115ffd83dbSDimitry Andric if (Op->getNumOperands() < 2) 59125ffd83dbSDimitry Andric continue; 59135ffd83dbSDimitry Andric MDString *KindID = dyn_cast<MDString>(Op->getOperand(1)); 59145ffd83dbSDimitry Andric if (!KindID || KindID->getString() != "kernel") 59155ffd83dbSDimitry Andric continue; 59165ffd83dbSDimitry Andric 59175ffd83dbSDimitry Andric Function *KernelFn = 59185ffd83dbSDimitry Andric mdconst::dyn_extract_or_null<Function>(Op->getOperand(0)); 59195ffd83dbSDimitry Andric if (!KernelFn) 59205ffd83dbSDimitry Andric continue; 59215ffd83dbSDimitry Andric 59225f757f3fSDimitry Andric // We are only interested in OpenMP target regions. Others, such as kernels 59235f757f3fSDimitry Andric // generated by CUDA but linked together, are not interesting to this pass. 59245f757f3fSDimitry Andric if (isOpenMPKernel(*KernelFn)) { 59255ffd83dbSDimitry Andric ++NumOpenMPTargetRegionKernels; 59265ffd83dbSDimitry Andric Kernels.insert(KernelFn); 59275f757f3fSDimitry Andric } else 59285f757f3fSDimitry Andric ++NumNonOpenMPTargetRegionKernels; 59295ffd83dbSDimitry Andric } 5930fe6060f1SDimitry Andric 5931fe6060f1SDimitry Andric return Kernels; 59325ffd83dbSDimitry Andric } 59335ffd83dbSDimitry Andric 5934fe6060f1SDimitry Andric bool llvm::omp::containsOpenMP(Module &M) { 5935fe6060f1SDimitry Andric Metadata *MD = M.getModuleFlag("openmp"); 5936fe6060f1SDimitry Andric if (!MD) 5937fe6060f1SDimitry Andric return false; 59385ffd83dbSDimitry Andric 59395ffd83dbSDimitry Andric return true; 59405ffd83dbSDimitry Andric } 59415ffd83dbSDimitry Andric 5942fe6060f1SDimitry Andric bool llvm::omp::isOpenMPDevice(Module &M) { 5943fe6060f1SDimitry Andric Metadata *MD = M.getModuleFlag("openmp-device"); 5944fe6060f1SDimitry Andric if (!MD) 5945fe6060f1SDimitry Andric return false; 5946fe6060f1SDimitry Andric 5947fe6060f1SDimitry Andric return true; 59485ffd83dbSDimitry Andric } 5949