1*5f757f3fSDimitry Andric //===----- HipStdPar.cpp - HIP C++ Standard Parallelism Support Passes ----===// 2*5f757f3fSDimitry Andric // 3*5f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*5f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*5f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*5f757f3fSDimitry Andric // 7*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 8*5f757f3fSDimitry Andric // This file implements two passes that enable HIP C++ Standard Parallelism 9*5f757f3fSDimitry Andric // Support: 10*5f757f3fSDimitry Andric // 11*5f757f3fSDimitry Andric // 1. AcceleratorCodeSelection (required): Given that only algorithms are 12*5f757f3fSDimitry Andric // accelerated, and that the accelerated implementation exists in the form of 13*5f757f3fSDimitry Andric // a compute kernel, we assume that only the kernel, and all functions 14*5f757f3fSDimitry Andric // reachable from it, constitute code that the user expects the accelerator 15*5f757f3fSDimitry Andric // to execute. Thus, we identify the set of all functions reachable from 16*5f757f3fSDimitry Andric // kernels, and then remove all unreachable ones. This last part is necessary 17*5f757f3fSDimitry Andric // because it is possible for code that the user did not expect to execute on 18*5f757f3fSDimitry Andric // an accelerator to contain constructs that cannot be handled by the target 19*5f757f3fSDimitry Andric // BE, which cannot be provably demonstrated to be dead code in general, and 20*5f757f3fSDimitry Andric // thus can lead to mis-compilation. The degenerate case of this is when a 21*5f757f3fSDimitry Andric // Module contains no kernels (the parent TU had no algorithm invocations fit 22*5f757f3fSDimitry Andric // for acceleration), which we handle by completely emptying said module. 23*5f757f3fSDimitry Andric // **NOTE**: The above does not handle indirectly reachable functions i.e. 24*5f757f3fSDimitry Andric // it is possible to obtain a case where the target of an indirect 25*5f757f3fSDimitry Andric // call is otherwise unreachable and thus is removed; this 26*5f757f3fSDimitry Andric // restriction is aligned with the current `-hipstdpar` limitations 27*5f757f3fSDimitry Andric // and will be relaxed in the future. 28*5f757f3fSDimitry Andric // 29*5f757f3fSDimitry Andric // 2. AllocationInterposition (required only when on-demand paging is 30*5f757f3fSDimitry Andric // unsupported): Some accelerators or operating systems might not support 31*5f757f3fSDimitry Andric // transparent on-demand paging. Thus, they would only be able to access 32*5f757f3fSDimitry Andric // memory that is allocated by an accelerator-aware mechanism. For such cases 33*5f757f3fSDimitry Andric // the user can opt into enabling allocation / deallocation interposition, 34*5f757f3fSDimitry Andric // whereby we replace calls to known allocation / deallocation functions with 35*5f757f3fSDimitry Andric // calls to runtime implemented equivalents that forward the requests to 36*5f757f3fSDimitry Andric // accelerator-aware interfaces. We also support freeing system allocated 37*5f757f3fSDimitry Andric // memory that ends up in one of the runtime equivalents, since this can 38*5f757f3fSDimitry Andric // happen if e.g. a library that was compiled without interposition returns 39*5f757f3fSDimitry Andric // an allocation that can be validly passed to `free`. 40*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 41*5f757f3fSDimitry Andric 42*5f757f3fSDimitry Andric #include "llvm/Transforms/HipStdPar/HipStdPar.h" 43*5f757f3fSDimitry Andric 44*5f757f3fSDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 45*5f757f3fSDimitry Andric #include "llvm/ADT/SmallVector.h" 46*5f757f3fSDimitry Andric #include "llvm/ADT/STLExtras.h" 47*5f757f3fSDimitry Andric #include "llvm/Analysis/CallGraph.h" 48*5f757f3fSDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 49*5f757f3fSDimitry Andric #include "llvm/IR/Constants.h" 50*5f757f3fSDimitry Andric #include "llvm/IR/DebugInfoMetadata.h" 51*5f757f3fSDimitry Andric #include "llvm/IR/Function.h" 52*5f757f3fSDimitry Andric #include "llvm/IR/Module.h" 53*5f757f3fSDimitry Andric #include "llvm/Transforms/Utils/ModuleUtils.h" 54*5f757f3fSDimitry Andric 55*5f757f3fSDimitry Andric #include <cassert> 56*5f757f3fSDimitry Andric #include <string> 57*5f757f3fSDimitry Andric #include <utility> 58*5f757f3fSDimitry Andric 59*5f757f3fSDimitry Andric using namespace llvm; 60*5f757f3fSDimitry Andric 61*5f757f3fSDimitry Andric template<typename T> 62*5f757f3fSDimitry Andric static inline void eraseFromModule(T &ToErase) { 63*5f757f3fSDimitry Andric ToErase.replaceAllUsesWith(PoisonValue::get(ToErase.getType())); 64*5f757f3fSDimitry Andric ToErase.eraseFromParent(); 65*5f757f3fSDimitry Andric } 66*5f757f3fSDimitry Andric 67*5f757f3fSDimitry Andric static inline bool checkIfSupported(GlobalVariable &G) { 68*5f757f3fSDimitry Andric if (!G.isThreadLocal()) 69*5f757f3fSDimitry Andric return true; 70*5f757f3fSDimitry Andric 71*5f757f3fSDimitry Andric G.dropDroppableUses(); 72*5f757f3fSDimitry Andric 73*5f757f3fSDimitry Andric if (!G.isConstantUsed()) 74*5f757f3fSDimitry Andric return true; 75*5f757f3fSDimitry Andric 76*5f757f3fSDimitry Andric std::string W; 77*5f757f3fSDimitry Andric raw_string_ostream OS(W); 78*5f757f3fSDimitry Andric 79*5f757f3fSDimitry Andric OS << "Accelerator does not support the thread_local variable " 80*5f757f3fSDimitry Andric << G.getName(); 81*5f757f3fSDimitry Andric 82*5f757f3fSDimitry Andric Instruction *I = nullptr; 83*5f757f3fSDimitry Andric SmallVector<User *> Tmp(G.user_begin(), G.user_end()); 84*5f757f3fSDimitry Andric SmallPtrSet<User *, 5> Visited; 85*5f757f3fSDimitry Andric do { 86*5f757f3fSDimitry Andric auto U = std::move(Tmp.back()); 87*5f757f3fSDimitry Andric Tmp.pop_back(); 88*5f757f3fSDimitry Andric 89*5f757f3fSDimitry Andric if (Visited.contains(U)) 90*5f757f3fSDimitry Andric continue; 91*5f757f3fSDimitry Andric 92*5f757f3fSDimitry Andric if (isa<Instruction>(U)) 93*5f757f3fSDimitry Andric I = cast<Instruction>(U); 94*5f757f3fSDimitry Andric else 95*5f757f3fSDimitry Andric Tmp.insert(Tmp.end(), U->user_begin(), U->user_end()); 96*5f757f3fSDimitry Andric 97*5f757f3fSDimitry Andric Visited.insert(U); 98*5f757f3fSDimitry Andric } while (!I && !Tmp.empty()); 99*5f757f3fSDimitry Andric 100*5f757f3fSDimitry Andric assert(I && "thread_local global should have at least one non-constant use."); 101*5f757f3fSDimitry Andric 102*5f757f3fSDimitry Andric G.getContext().diagnose( 103*5f757f3fSDimitry Andric DiagnosticInfoUnsupported(*I->getParent()->getParent(), W, 104*5f757f3fSDimitry Andric I->getDebugLoc(), DS_Error)); 105*5f757f3fSDimitry Andric 106*5f757f3fSDimitry Andric return false; 107*5f757f3fSDimitry Andric } 108*5f757f3fSDimitry Andric 109*5f757f3fSDimitry Andric static inline void clearModule(Module &M) { // TODO: simplify. 110*5f757f3fSDimitry Andric while (!M.functions().empty()) 111*5f757f3fSDimitry Andric eraseFromModule(*M.begin()); 112*5f757f3fSDimitry Andric while (!M.globals().empty()) 113*5f757f3fSDimitry Andric eraseFromModule(*M.globals().begin()); 114*5f757f3fSDimitry Andric while (!M.aliases().empty()) 115*5f757f3fSDimitry Andric eraseFromModule(*M.aliases().begin()); 116*5f757f3fSDimitry Andric while (!M.ifuncs().empty()) 117*5f757f3fSDimitry Andric eraseFromModule(*M.ifuncs().begin()); 118*5f757f3fSDimitry Andric } 119*5f757f3fSDimitry Andric 120*5f757f3fSDimitry Andric static inline void maybeHandleGlobals(Module &M) { 121*5f757f3fSDimitry Andric unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace(); 122*5f757f3fSDimitry Andric for (auto &&G : M.globals()) { // TODO: should we handle these in the FE? 123*5f757f3fSDimitry Andric if (!checkIfSupported(G)) 124*5f757f3fSDimitry Andric return clearModule(M); 125*5f757f3fSDimitry Andric 126*5f757f3fSDimitry Andric if (G.isThreadLocal()) 127*5f757f3fSDimitry Andric continue; 128*5f757f3fSDimitry Andric if (G.isConstant()) 129*5f757f3fSDimitry Andric continue; 130*5f757f3fSDimitry Andric if (G.getAddressSpace() != GlobAS) 131*5f757f3fSDimitry Andric continue; 132*5f757f3fSDimitry Andric if (G.getLinkage() != GlobalVariable::ExternalLinkage) 133*5f757f3fSDimitry Andric continue; 134*5f757f3fSDimitry Andric 135*5f757f3fSDimitry Andric G.setLinkage(GlobalVariable::ExternalWeakLinkage); 136*5f757f3fSDimitry Andric G.setExternallyInitialized(true); 137*5f757f3fSDimitry Andric } 138*5f757f3fSDimitry Andric } 139*5f757f3fSDimitry Andric 140*5f757f3fSDimitry Andric template<unsigned N> 141*5f757f3fSDimitry Andric static inline void removeUnreachableFunctions( 142*5f757f3fSDimitry Andric const SmallPtrSet<const Function *, N>& Reachable, Module &M) { 143*5f757f3fSDimitry Andric removeFromUsedLists(M, [&](Constant *C) { 144*5f757f3fSDimitry Andric if (auto F = dyn_cast<Function>(C)) 145*5f757f3fSDimitry Andric return !Reachable.contains(F); 146*5f757f3fSDimitry Andric 147*5f757f3fSDimitry Andric return false; 148*5f757f3fSDimitry Andric }); 149*5f757f3fSDimitry Andric 150*5f757f3fSDimitry Andric SmallVector<std::reference_wrapper<Function>> ToRemove; 151*5f757f3fSDimitry Andric copy_if(M, std::back_inserter(ToRemove), [&](auto &&F) { 152*5f757f3fSDimitry Andric return !F.isIntrinsic() && !Reachable.contains(&F); 153*5f757f3fSDimitry Andric }); 154*5f757f3fSDimitry Andric 155*5f757f3fSDimitry Andric for_each(ToRemove, eraseFromModule<Function>); 156*5f757f3fSDimitry Andric } 157*5f757f3fSDimitry Andric 158*5f757f3fSDimitry Andric static inline bool isAcceleratorExecutionRoot(const Function *F) { 159*5f757f3fSDimitry Andric if (!F) 160*5f757f3fSDimitry Andric return false; 161*5f757f3fSDimitry Andric 162*5f757f3fSDimitry Andric return F->getCallingConv() == CallingConv::AMDGPU_KERNEL; 163*5f757f3fSDimitry Andric } 164*5f757f3fSDimitry Andric 165*5f757f3fSDimitry Andric static inline bool checkIfSupported(const Function *F, const CallBase *CB) { 166*5f757f3fSDimitry Andric const auto Dx = F->getName().rfind("__hipstdpar_unsupported"); 167*5f757f3fSDimitry Andric 168*5f757f3fSDimitry Andric if (Dx == StringRef::npos) 169*5f757f3fSDimitry Andric return true; 170*5f757f3fSDimitry Andric 171*5f757f3fSDimitry Andric const auto N = F->getName().substr(0, Dx); 172*5f757f3fSDimitry Andric 173*5f757f3fSDimitry Andric std::string W; 174*5f757f3fSDimitry Andric raw_string_ostream OS(W); 175*5f757f3fSDimitry Andric 176*5f757f3fSDimitry Andric if (N == "__ASM") 177*5f757f3fSDimitry Andric OS << "Accelerator does not support the ASM block:\n" 178*5f757f3fSDimitry Andric << cast<ConstantDataArray>(CB->getArgOperand(0))->getAsCString(); 179*5f757f3fSDimitry Andric else 180*5f757f3fSDimitry Andric OS << "Accelerator does not support the " << N << " function."; 181*5f757f3fSDimitry Andric 182*5f757f3fSDimitry Andric auto Caller = CB->getParent()->getParent(); 183*5f757f3fSDimitry Andric 184*5f757f3fSDimitry Andric Caller->getContext().diagnose( 185*5f757f3fSDimitry Andric DiagnosticInfoUnsupported(*Caller, W, CB->getDebugLoc(), DS_Error)); 186*5f757f3fSDimitry Andric 187*5f757f3fSDimitry Andric return false; 188*5f757f3fSDimitry Andric } 189*5f757f3fSDimitry Andric 190*5f757f3fSDimitry Andric PreservedAnalyses 191*5f757f3fSDimitry Andric HipStdParAcceleratorCodeSelectionPass::run(Module &M, 192*5f757f3fSDimitry Andric ModuleAnalysisManager &MAM) { 193*5f757f3fSDimitry Andric auto &CGA = MAM.getResult<CallGraphAnalysis>(M); 194*5f757f3fSDimitry Andric 195*5f757f3fSDimitry Andric SmallPtrSet<const Function *, 32> Reachable; 196*5f757f3fSDimitry Andric for (auto &&CGN : CGA) { 197*5f757f3fSDimitry Andric if (!isAcceleratorExecutionRoot(CGN.first)) 198*5f757f3fSDimitry Andric continue; 199*5f757f3fSDimitry Andric 200*5f757f3fSDimitry Andric Reachable.insert(CGN.first); 201*5f757f3fSDimitry Andric 202*5f757f3fSDimitry Andric SmallVector<const Function *> Tmp({CGN.first}); 203*5f757f3fSDimitry Andric do { 204*5f757f3fSDimitry Andric auto F = std::move(Tmp.back()); 205*5f757f3fSDimitry Andric Tmp.pop_back(); 206*5f757f3fSDimitry Andric 207*5f757f3fSDimitry Andric for (auto &&N : *CGA[F]) { 208*5f757f3fSDimitry Andric if (!N.second) 209*5f757f3fSDimitry Andric continue; 210*5f757f3fSDimitry Andric if (!N.second->getFunction()) 211*5f757f3fSDimitry Andric continue; 212*5f757f3fSDimitry Andric if (Reachable.contains(N.second->getFunction())) 213*5f757f3fSDimitry Andric continue; 214*5f757f3fSDimitry Andric 215*5f757f3fSDimitry Andric if (!checkIfSupported(N.second->getFunction(), 216*5f757f3fSDimitry Andric dyn_cast<CallBase>(*N.first))) 217*5f757f3fSDimitry Andric return PreservedAnalyses::none(); 218*5f757f3fSDimitry Andric 219*5f757f3fSDimitry Andric Reachable.insert(N.second->getFunction()); 220*5f757f3fSDimitry Andric Tmp.push_back(N.second->getFunction()); 221*5f757f3fSDimitry Andric } 222*5f757f3fSDimitry Andric } while (!std::empty(Tmp)); 223*5f757f3fSDimitry Andric } 224*5f757f3fSDimitry Andric 225*5f757f3fSDimitry Andric if (std::empty(Reachable)) 226*5f757f3fSDimitry Andric clearModule(M); 227*5f757f3fSDimitry Andric else 228*5f757f3fSDimitry Andric removeUnreachableFunctions(Reachable, M); 229*5f757f3fSDimitry Andric 230*5f757f3fSDimitry Andric maybeHandleGlobals(M); 231*5f757f3fSDimitry Andric 232*5f757f3fSDimitry Andric return PreservedAnalyses::none(); 233*5f757f3fSDimitry Andric } 234*5f757f3fSDimitry Andric 235*5f757f3fSDimitry Andric static constexpr std::pair<StringLiteral, StringLiteral> ReplaceMap[]{ 236*5f757f3fSDimitry Andric {"aligned_alloc", "__hipstdpar_aligned_alloc"}, 237*5f757f3fSDimitry Andric {"calloc", "__hipstdpar_calloc"}, 238*5f757f3fSDimitry Andric {"free", "__hipstdpar_free"}, 239*5f757f3fSDimitry Andric {"malloc", "__hipstdpar_malloc"}, 240*5f757f3fSDimitry Andric {"memalign", "__hipstdpar_aligned_alloc"}, 241*5f757f3fSDimitry Andric {"posix_memalign", "__hipstdpar_posix_aligned_alloc"}, 242*5f757f3fSDimitry Andric {"realloc", "__hipstdpar_realloc"}, 243*5f757f3fSDimitry Andric {"reallocarray", "__hipstdpar_realloc_array"}, 244*5f757f3fSDimitry Andric {"_ZdaPv", "__hipstdpar_operator_delete"}, 245*5f757f3fSDimitry Andric {"_ZdaPvm", "__hipstdpar_operator_delete_sized"}, 246*5f757f3fSDimitry Andric {"_ZdaPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"}, 247*5f757f3fSDimitry Andric {"_ZdaPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"}, 248*5f757f3fSDimitry Andric {"_ZdlPv", "__hipstdpar_operator_delete"}, 249*5f757f3fSDimitry Andric {"_ZdlPvm", "__hipstdpar_operator_delete_sized"}, 250*5f757f3fSDimitry Andric {"_ZdlPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"}, 251*5f757f3fSDimitry Andric {"_ZdlPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"}, 252*5f757f3fSDimitry Andric {"_Znam", "__hipstdpar_operator_new"}, 253*5f757f3fSDimitry Andric {"_ZnamRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"}, 254*5f757f3fSDimitry Andric {"_ZnamSt11align_val_t", "__hipstdpar_operator_new_aligned"}, 255*5f757f3fSDimitry Andric {"_ZnamSt11align_val_tRKSt9nothrow_t", 256*5f757f3fSDimitry Andric "__hipstdpar_operator_new_aligned_nothrow"}, 257*5f757f3fSDimitry Andric 258*5f757f3fSDimitry Andric {"_Znwm", "__hipstdpar_operator_new"}, 259*5f757f3fSDimitry Andric {"_ZnwmRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"}, 260*5f757f3fSDimitry Andric {"_ZnwmSt11align_val_t", "__hipstdpar_operator_new_aligned"}, 261*5f757f3fSDimitry Andric {"_ZnwmSt11align_val_tRKSt9nothrow_t", 262*5f757f3fSDimitry Andric "__hipstdpar_operator_new_aligned_nothrow"}, 263*5f757f3fSDimitry Andric {"__builtin_calloc", "__hipstdpar_calloc"}, 264*5f757f3fSDimitry Andric {"__builtin_free", "__hipstdpar_free"}, 265*5f757f3fSDimitry Andric {"__builtin_malloc", "__hipstdpar_malloc"}, 266*5f757f3fSDimitry Andric {"__builtin_operator_delete", "__hipstdpar_operator_delete"}, 267*5f757f3fSDimitry Andric {"__builtin_operator_new", "__hipstdpar_operator_new"}, 268*5f757f3fSDimitry Andric {"__builtin_realloc", "__hipstdpar_realloc"}, 269*5f757f3fSDimitry Andric {"__libc_calloc", "__hipstdpar_calloc"}, 270*5f757f3fSDimitry Andric {"__libc_free", "__hipstdpar_free"}, 271*5f757f3fSDimitry Andric {"__libc_malloc", "__hipstdpar_malloc"}, 272*5f757f3fSDimitry Andric {"__libc_memalign", "__hipstdpar_aligned_alloc"}, 273*5f757f3fSDimitry Andric {"__libc_realloc", "__hipstdpar_realloc"} 274*5f757f3fSDimitry Andric }; 275*5f757f3fSDimitry Andric 276*5f757f3fSDimitry Andric PreservedAnalyses 277*5f757f3fSDimitry Andric HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) { 278*5f757f3fSDimitry Andric SmallDenseMap<StringRef, StringRef> AllocReplacements(std::cbegin(ReplaceMap), 279*5f757f3fSDimitry Andric std::cend(ReplaceMap)); 280*5f757f3fSDimitry Andric 281*5f757f3fSDimitry Andric for (auto &&F : M) { 282*5f757f3fSDimitry Andric if (!F.hasName()) 283*5f757f3fSDimitry Andric continue; 284*5f757f3fSDimitry Andric if (!AllocReplacements.contains(F.getName())) 285*5f757f3fSDimitry Andric continue; 286*5f757f3fSDimitry Andric 287*5f757f3fSDimitry Andric if (auto R = M.getFunction(AllocReplacements[F.getName()])) { 288*5f757f3fSDimitry Andric F.replaceAllUsesWith(R); 289*5f757f3fSDimitry Andric } else { 290*5f757f3fSDimitry Andric std::string W; 291*5f757f3fSDimitry Andric raw_string_ostream OS(W); 292*5f757f3fSDimitry Andric 293*5f757f3fSDimitry Andric OS << "cannot be interposed, missing: " << AllocReplacements[F.getName()] 294*5f757f3fSDimitry Andric << ". Tried to run the allocation interposition pass without the " 295*5f757f3fSDimitry Andric << "replacement functions available."; 296*5f757f3fSDimitry Andric 297*5f757f3fSDimitry Andric F.getContext().diagnose(DiagnosticInfoUnsupported(F, W, 298*5f757f3fSDimitry Andric F.getSubprogram(), 299*5f757f3fSDimitry Andric DS_Warning)); 300*5f757f3fSDimitry Andric } 301*5f757f3fSDimitry Andric } 302*5f757f3fSDimitry Andric 303*5f757f3fSDimitry Andric if (auto F = M.getFunction("__hipstdpar_hidden_free")) { 304*5f757f3fSDimitry Andric auto LibcFree = M.getOrInsertFunction("__libc_free", F->getFunctionType(), 305*5f757f3fSDimitry Andric F->getAttributes()); 306*5f757f3fSDimitry Andric F->replaceAllUsesWith(LibcFree.getCallee()); 307*5f757f3fSDimitry Andric 308*5f757f3fSDimitry Andric eraseFromModule(*F); 309*5f757f3fSDimitry Andric } 310*5f757f3fSDimitry Andric 311*5f757f3fSDimitry Andric return PreservedAnalyses::none(); 312*5f757f3fSDimitry Andric } 313