15f757f3fSDimitry Andric //===----- HipStdPar.cpp - HIP C++ Standard Parallelism Support Passes ----===// 25f757f3fSDimitry Andric // 35f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65f757f3fSDimitry Andric // 75f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 85f757f3fSDimitry Andric // This file implements two passes that enable HIP C++ Standard Parallelism 95f757f3fSDimitry Andric // Support: 105f757f3fSDimitry Andric // 115f757f3fSDimitry Andric // 1. AcceleratorCodeSelection (required): Given that only algorithms are 125f757f3fSDimitry Andric // accelerated, and that the accelerated implementation exists in the form of 135f757f3fSDimitry Andric // a compute kernel, we assume that only the kernel, and all functions 145f757f3fSDimitry Andric // reachable from it, constitute code that the user expects the accelerator 155f757f3fSDimitry Andric // to execute. Thus, we identify the set of all functions reachable from 165f757f3fSDimitry Andric // kernels, and then remove all unreachable ones. This last part is necessary 175f757f3fSDimitry Andric // because it is possible for code that the user did not expect to execute on 185f757f3fSDimitry Andric // an accelerator to contain constructs that cannot be handled by the target 195f757f3fSDimitry Andric // BE, which cannot be provably demonstrated to be dead code in general, and 205f757f3fSDimitry Andric // thus can lead to mis-compilation. The degenerate case of this is when a 215f757f3fSDimitry Andric // Module contains no kernels (the parent TU had no algorithm invocations fit 225f757f3fSDimitry Andric // for acceleration), which we handle by completely emptying said module. 235f757f3fSDimitry Andric // **NOTE**: The above does not handle indirectly reachable functions i.e. 245f757f3fSDimitry Andric // it is possible to obtain a case where the target of an indirect 255f757f3fSDimitry Andric // call is otherwise unreachable and thus is removed; this 265f757f3fSDimitry Andric // restriction is aligned with the current `-hipstdpar` limitations 275f757f3fSDimitry Andric // and will be relaxed in the future. 285f757f3fSDimitry Andric // 295f757f3fSDimitry Andric // 2. AllocationInterposition (required only when on-demand paging is 305f757f3fSDimitry Andric // unsupported): Some accelerators or operating systems might not support 315f757f3fSDimitry Andric // transparent on-demand paging. Thus, they would only be able to access 325f757f3fSDimitry Andric // memory that is allocated by an accelerator-aware mechanism. For such cases 335f757f3fSDimitry Andric // the user can opt into enabling allocation / deallocation interposition, 345f757f3fSDimitry Andric // whereby we replace calls to known allocation / deallocation functions with 355f757f3fSDimitry Andric // calls to runtime implemented equivalents that forward the requests to 365f757f3fSDimitry Andric // accelerator-aware interfaces. We also support freeing system allocated 375f757f3fSDimitry Andric // memory that ends up in one of the runtime equivalents, since this can 385f757f3fSDimitry Andric // happen if e.g. a library that was compiled without interposition returns 395f757f3fSDimitry Andric // an allocation that can be validly passed to `free`. 405f757f3fSDimitry Andric //===----------------------------------------------------------------------===// 415f757f3fSDimitry Andric 425f757f3fSDimitry Andric #include "llvm/Transforms/HipStdPar/HipStdPar.h" 435f757f3fSDimitry Andric 445f757f3fSDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 455f757f3fSDimitry Andric #include "llvm/ADT/SmallVector.h" 465f757f3fSDimitry Andric #include "llvm/ADT/STLExtras.h" 475f757f3fSDimitry Andric #include "llvm/Analysis/CallGraph.h" 485f757f3fSDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 495f757f3fSDimitry Andric #include "llvm/IR/Constants.h" 505f757f3fSDimitry Andric #include "llvm/IR/DebugInfoMetadata.h" 515f757f3fSDimitry Andric #include "llvm/IR/Function.h" 525f757f3fSDimitry Andric #include "llvm/IR/Module.h" 535f757f3fSDimitry Andric #include "llvm/Transforms/Utils/ModuleUtils.h" 545f757f3fSDimitry Andric 555f757f3fSDimitry Andric #include <cassert> 565f757f3fSDimitry Andric #include <string> 575f757f3fSDimitry Andric #include <utility> 585f757f3fSDimitry Andric 595f757f3fSDimitry Andric using namespace llvm; 605f757f3fSDimitry Andric 615f757f3fSDimitry Andric template<typename T> 625f757f3fSDimitry Andric static inline void eraseFromModule(T &ToErase) { 635f757f3fSDimitry Andric ToErase.replaceAllUsesWith(PoisonValue::get(ToErase.getType())); 645f757f3fSDimitry Andric ToErase.eraseFromParent(); 655f757f3fSDimitry Andric } 665f757f3fSDimitry Andric 675f757f3fSDimitry Andric static inline bool checkIfSupported(GlobalVariable &G) { 685f757f3fSDimitry Andric if (!G.isThreadLocal()) 695f757f3fSDimitry Andric return true; 705f757f3fSDimitry Andric 715f757f3fSDimitry Andric G.dropDroppableUses(); 725f757f3fSDimitry Andric 735f757f3fSDimitry Andric if (!G.isConstantUsed()) 745f757f3fSDimitry Andric return true; 755f757f3fSDimitry Andric 765f757f3fSDimitry Andric std::string W; 775f757f3fSDimitry Andric raw_string_ostream OS(W); 785f757f3fSDimitry Andric 795f757f3fSDimitry Andric OS << "Accelerator does not support the thread_local variable " 805f757f3fSDimitry Andric << G.getName(); 815f757f3fSDimitry Andric 825f757f3fSDimitry Andric Instruction *I = nullptr; 835f757f3fSDimitry Andric SmallVector<User *> Tmp(G.user_begin(), G.user_end()); 845f757f3fSDimitry Andric SmallPtrSet<User *, 5> Visited; 855f757f3fSDimitry Andric do { 865f757f3fSDimitry Andric auto U = std::move(Tmp.back()); 875f757f3fSDimitry Andric Tmp.pop_back(); 885f757f3fSDimitry Andric 895f757f3fSDimitry Andric if (Visited.contains(U)) 905f757f3fSDimitry Andric continue; 915f757f3fSDimitry Andric 925f757f3fSDimitry Andric if (isa<Instruction>(U)) 935f757f3fSDimitry Andric I = cast<Instruction>(U); 945f757f3fSDimitry Andric else 955f757f3fSDimitry Andric Tmp.insert(Tmp.end(), U->user_begin(), U->user_end()); 965f757f3fSDimitry Andric 975f757f3fSDimitry Andric Visited.insert(U); 985f757f3fSDimitry Andric } while (!I && !Tmp.empty()); 995f757f3fSDimitry Andric 1005f757f3fSDimitry Andric assert(I && "thread_local global should have at least one non-constant use."); 1015f757f3fSDimitry Andric 1025f757f3fSDimitry Andric G.getContext().diagnose( 1035f757f3fSDimitry Andric DiagnosticInfoUnsupported(*I->getParent()->getParent(), W, 1045f757f3fSDimitry Andric I->getDebugLoc(), DS_Error)); 1055f757f3fSDimitry Andric 1065f757f3fSDimitry Andric return false; 1075f757f3fSDimitry Andric } 1085f757f3fSDimitry Andric 1095f757f3fSDimitry Andric static inline void clearModule(Module &M) { // TODO: simplify. 1105f757f3fSDimitry Andric while (!M.functions().empty()) 1115f757f3fSDimitry Andric eraseFromModule(*M.begin()); 1125f757f3fSDimitry Andric while (!M.globals().empty()) 1135f757f3fSDimitry Andric eraseFromModule(*M.globals().begin()); 1145f757f3fSDimitry Andric while (!M.aliases().empty()) 1155f757f3fSDimitry Andric eraseFromModule(*M.aliases().begin()); 1165f757f3fSDimitry Andric while (!M.ifuncs().empty()) 1175f757f3fSDimitry Andric eraseFromModule(*M.ifuncs().begin()); 1185f757f3fSDimitry Andric } 1195f757f3fSDimitry Andric 1205f757f3fSDimitry Andric static inline void maybeHandleGlobals(Module &M) { 1215f757f3fSDimitry Andric unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace(); 1225f757f3fSDimitry Andric for (auto &&G : M.globals()) { // TODO: should we handle these in the FE? 1235f757f3fSDimitry Andric if (!checkIfSupported(G)) 1245f757f3fSDimitry Andric return clearModule(M); 1255f757f3fSDimitry Andric 1265f757f3fSDimitry Andric if (G.isThreadLocal()) 1275f757f3fSDimitry Andric continue; 1285f757f3fSDimitry Andric if (G.isConstant()) 1295f757f3fSDimitry Andric continue; 1305f757f3fSDimitry Andric if (G.getAddressSpace() != GlobAS) 1315f757f3fSDimitry Andric continue; 1325f757f3fSDimitry Andric if (G.getLinkage() != GlobalVariable::ExternalLinkage) 1335f757f3fSDimitry Andric continue; 1345f757f3fSDimitry Andric 1355f757f3fSDimitry Andric G.setLinkage(GlobalVariable::ExternalWeakLinkage); 136*0fca6ea1SDimitry Andric G.setInitializer(nullptr); 1375f757f3fSDimitry Andric G.setExternallyInitialized(true); 1385f757f3fSDimitry Andric } 1395f757f3fSDimitry Andric } 1405f757f3fSDimitry Andric 1415f757f3fSDimitry Andric template<unsigned N> 1425f757f3fSDimitry Andric static inline void removeUnreachableFunctions( 1435f757f3fSDimitry Andric const SmallPtrSet<const Function *, N>& Reachable, Module &M) { 1445f757f3fSDimitry Andric removeFromUsedLists(M, [&](Constant *C) { 1455f757f3fSDimitry Andric if (auto F = dyn_cast<Function>(C)) 1465f757f3fSDimitry Andric return !Reachable.contains(F); 1475f757f3fSDimitry Andric 1485f757f3fSDimitry Andric return false; 1495f757f3fSDimitry Andric }); 1505f757f3fSDimitry Andric 1515f757f3fSDimitry Andric SmallVector<std::reference_wrapper<Function>> ToRemove; 1525f757f3fSDimitry Andric copy_if(M, std::back_inserter(ToRemove), [&](auto &&F) { 1535f757f3fSDimitry Andric return !F.isIntrinsic() && !Reachable.contains(&F); 1545f757f3fSDimitry Andric }); 1555f757f3fSDimitry Andric 1565f757f3fSDimitry Andric for_each(ToRemove, eraseFromModule<Function>); 1575f757f3fSDimitry Andric } 1585f757f3fSDimitry Andric 1595f757f3fSDimitry Andric static inline bool isAcceleratorExecutionRoot(const Function *F) { 1605f757f3fSDimitry Andric if (!F) 1615f757f3fSDimitry Andric return false; 1625f757f3fSDimitry Andric 1635f757f3fSDimitry Andric return F->getCallingConv() == CallingConv::AMDGPU_KERNEL; 1645f757f3fSDimitry Andric } 1655f757f3fSDimitry Andric 1665f757f3fSDimitry Andric static inline bool checkIfSupported(const Function *F, const CallBase *CB) { 1675f757f3fSDimitry Andric const auto Dx = F->getName().rfind("__hipstdpar_unsupported"); 1685f757f3fSDimitry Andric 1695f757f3fSDimitry Andric if (Dx == StringRef::npos) 1705f757f3fSDimitry Andric return true; 1715f757f3fSDimitry Andric 1725f757f3fSDimitry Andric const auto N = F->getName().substr(0, Dx); 1735f757f3fSDimitry Andric 1745f757f3fSDimitry Andric std::string W; 1755f757f3fSDimitry Andric raw_string_ostream OS(W); 1765f757f3fSDimitry Andric 1775f757f3fSDimitry Andric if (N == "__ASM") 1785f757f3fSDimitry Andric OS << "Accelerator does not support the ASM block:\n" 1795f757f3fSDimitry Andric << cast<ConstantDataArray>(CB->getArgOperand(0))->getAsCString(); 1805f757f3fSDimitry Andric else 1815f757f3fSDimitry Andric OS << "Accelerator does not support the " << N << " function."; 1825f757f3fSDimitry Andric 1835f757f3fSDimitry Andric auto Caller = CB->getParent()->getParent(); 1845f757f3fSDimitry Andric 1855f757f3fSDimitry Andric Caller->getContext().diagnose( 1865f757f3fSDimitry Andric DiagnosticInfoUnsupported(*Caller, W, CB->getDebugLoc(), DS_Error)); 1875f757f3fSDimitry Andric 1885f757f3fSDimitry Andric return false; 1895f757f3fSDimitry Andric } 1905f757f3fSDimitry Andric 1915f757f3fSDimitry Andric PreservedAnalyses 1925f757f3fSDimitry Andric HipStdParAcceleratorCodeSelectionPass::run(Module &M, 1935f757f3fSDimitry Andric ModuleAnalysisManager &MAM) { 1945f757f3fSDimitry Andric auto &CGA = MAM.getResult<CallGraphAnalysis>(M); 1955f757f3fSDimitry Andric 1965f757f3fSDimitry Andric SmallPtrSet<const Function *, 32> Reachable; 1975f757f3fSDimitry Andric for (auto &&CGN : CGA) { 1985f757f3fSDimitry Andric if (!isAcceleratorExecutionRoot(CGN.first)) 1995f757f3fSDimitry Andric continue; 2005f757f3fSDimitry Andric 2015f757f3fSDimitry Andric Reachable.insert(CGN.first); 2025f757f3fSDimitry Andric 2035f757f3fSDimitry Andric SmallVector<const Function *> Tmp({CGN.first}); 2045f757f3fSDimitry Andric do { 2055f757f3fSDimitry Andric auto F = std::move(Tmp.back()); 2065f757f3fSDimitry Andric Tmp.pop_back(); 2075f757f3fSDimitry Andric 2085f757f3fSDimitry Andric for (auto &&N : *CGA[F]) { 2095f757f3fSDimitry Andric if (!N.second) 2105f757f3fSDimitry Andric continue; 2115f757f3fSDimitry Andric if (!N.second->getFunction()) 2125f757f3fSDimitry Andric continue; 2135f757f3fSDimitry Andric if (Reachable.contains(N.second->getFunction())) 2145f757f3fSDimitry Andric continue; 2155f757f3fSDimitry Andric 2165f757f3fSDimitry Andric if (!checkIfSupported(N.second->getFunction(), 2175f757f3fSDimitry Andric dyn_cast<CallBase>(*N.first))) 2185f757f3fSDimitry Andric return PreservedAnalyses::none(); 2195f757f3fSDimitry Andric 2205f757f3fSDimitry Andric Reachable.insert(N.second->getFunction()); 2215f757f3fSDimitry Andric Tmp.push_back(N.second->getFunction()); 2225f757f3fSDimitry Andric } 2235f757f3fSDimitry Andric } while (!std::empty(Tmp)); 2245f757f3fSDimitry Andric } 2255f757f3fSDimitry Andric 2265f757f3fSDimitry Andric if (std::empty(Reachable)) 2275f757f3fSDimitry Andric clearModule(M); 2285f757f3fSDimitry Andric else 2295f757f3fSDimitry Andric removeUnreachableFunctions(Reachable, M); 2305f757f3fSDimitry Andric 2315f757f3fSDimitry Andric maybeHandleGlobals(M); 2325f757f3fSDimitry Andric 2335f757f3fSDimitry Andric return PreservedAnalyses::none(); 2345f757f3fSDimitry Andric } 2355f757f3fSDimitry Andric 2365f757f3fSDimitry Andric static constexpr std::pair<StringLiteral, StringLiteral> ReplaceMap[]{ 2375f757f3fSDimitry Andric {"aligned_alloc", "__hipstdpar_aligned_alloc"}, 2385f757f3fSDimitry Andric {"calloc", "__hipstdpar_calloc"}, 2395f757f3fSDimitry Andric {"free", "__hipstdpar_free"}, 2405f757f3fSDimitry Andric {"malloc", "__hipstdpar_malloc"}, 2415f757f3fSDimitry Andric {"memalign", "__hipstdpar_aligned_alloc"}, 2425f757f3fSDimitry Andric {"posix_memalign", "__hipstdpar_posix_aligned_alloc"}, 2435f757f3fSDimitry Andric {"realloc", "__hipstdpar_realloc"}, 2445f757f3fSDimitry Andric {"reallocarray", "__hipstdpar_realloc_array"}, 2455f757f3fSDimitry Andric {"_ZdaPv", "__hipstdpar_operator_delete"}, 2465f757f3fSDimitry Andric {"_ZdaPvm", "__hipstdpar_operator_delete_sized"}, 2475f757f3fSDimitry Andric {"_ZdaPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"}, 2485f757f3fSDimitry Andric {"_ZdaPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"}, 2495f757f3fSDimitry Andric {"_ZdlPv", "__hipstdpar_operator_delete"}, 2505f757f3fSDimitry Andric {"_ZdlPvm", "__hipstdpar_operator_delete_sized"}, 2515f757f3fSDimitry Andric {"_ZdlPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"}, 2525f757f3fSDimitry Andric {"_ZdlPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"}, 2535f757f3fSDimitry Andric {"_Znam", "__hipstdpar_operator_new"}, 2545f757f3fSDimitry Andric {"_ZnamRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"}, 2555f757f3fSDimitry Andric {"_ZnamSt11align_val_t", "__hipstdpar_operator_new_aligned"}, 2565f757f3fSDimitry Andric {"_ZnamSt11align_val_tRKSt9nothrow_t", 2575f757f3fSDimitry Andric "__hipstdpar_operator_new_aligned_nothrow"}, 2585f757f3fSDimitry Andric 2595f757f3fSDimitry Andric {"_Znwm", "__hipstdpar_operator_new"}, 2605f757f3fSDimitry Andric {"_ZnwmRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"}, 2615f757f3fSDimitry Andric {"_ZnwmSt11align_val_t", "__hipstdpar_operator_new_aligned"}, 2625f757f3fSDimitry Andric {"_ZnwmSt11align_val_tRKSt9nothrow_t", 2635f757f3fSDimitry Andric "__hipstdpar_operator_new_aligned_nothrow"}, 2645f757f3fSDimitry Andric {"__builtin_calloc", "__hipstdpar_calloc"}, 2655f757f3fSDimitry Andric {"__builtin_free", "__hipstdpar_free"}, 2665f757f3fSDimitry Andric {"__builtin_malloc", "__hipstdpar_malloc"}, 2675f757f3fSDimitry Andric {"__builtin_operator_delete", "__hipstdpar_operator_delete"}, 2685f757f3fSDimitry Andric {"__builtin_operator_new", "__hipstdpar_operator_new"}, 2695f757f3fSDimitry Andric {"__builtin_realloc", "__hipstdpar_realloc"}, 2705f757f3fSDimitry Andric {"__libc_calloc", "__hipstdpar_calloc"}, 2715f757f3fSDimitry Andric {"__libc_free", "__hipstdpar_free"}, 2725f757f3fSDimitry Andric {"__libc_malloc", "__hipstdpar_malloc"}, 2735f757f3fSDimitry Andric {"__libc_memalign", "__hipstdpar_aligned_alloc"}, 2745f757f3fSDimitry Andric {"__libc_realloc", "__hipstdpar_realloc"} 2755f757f3fSDimitry Andric }; 2765f757f3fSDimitry Andric 2775f757f3fSDimitry Andric PreservedAnalyses 2785f757f3fSDimitry Andric HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) { 2795f757f3fSDimitry Andric SmallDenseMap<StringRef, StringRef> AllocReplacements(std::cbegin(ReplaceMap), 2805f757f3fSDimitry Andric std::cend(ReplaceMap)); 2815f757f3fSDimitry Andric 2825f757f3fSDimitry Andric for (auto &&F : M) { 2835f757f3fSDimitry Andric if (!F.hasName()) 2845f757f3fSDimitry Andric continue; 2855f757f3fSDimitry Andric if (!AllocReplacements.contains(F.getName())) 2865f757f3fSDimitry Andric continue; 2875f757f3fSDimitry Andric 2885f757f3fSDimitry Andric if (auto R = M.getFunction(AllocReplacements[F.getName()])) { 2895f757f3fSDimitry Andric F.replaceAllUsesWith(R); 2905f757f3fSDimitry Andric } else { 2915f757f3fSDimitry Andric std::string W; 2925f757f3fSDimitry Andric raw_string_ostream OS(W); 2935f757f3fSDimitry Andric 2945f757f3fSDimitry Andric OS << "cannot be interposed, missing: " << AllocReplacements[F.getName()] 2955f757f3fSDimitry Andric << ". Tried to run the allocation interposition pass without the " 2965f757f3fSDimitry Andric << "replacement functions available."; 2975f757f3fSDimitry Andric 2985f757f3fSDimitry Andric F.getContext().diagnose(DiagnosticInfoUnsupported(F, W, 2995f757f3fSDimitry Andric F.getSubprogram(), 3005f757f3fSDimitry Andric DS_Warning)); 3015f757f3fSDimitry Andric } 3025f757f3fSDimitry Andric } 3035f757f3fSDimitry Andric 3045f757f3fSDimitry Andric if (auto F = M.getFunction("__hipstdpar_hidden_free")) { 3055f757f3fSDimitry Andric auto LibcFree = M.getOrInsertFunction("__libc_free", F->getFunctionType(), 3065f757f3fSDimitry Andric F->getAttributes()); 3075f757f3fSDimitry Andric F->replaceAllUsesWith(LibcFree.getCallee()); 3085f757f3fSDimitry Andric 3095f757f3fSDimitry Andric eraseFromModule(*F); 3105f757f3fSDimitry Andric } 3115f757f3fSDimitry Andric 3125f757f3fSDimitry Andric return PreservedAnalyses::none(); 3135f757f3fSDimitry Andric } 314