xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
15f757f3fSDimitry Andric //===----- HipStdPar.cpp - HIP C++ Standard Parallelism Support Passes ----===//
25f757f3fSDimitry Andric //
35f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65f757f3fSDimitry Andric //
75f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
85f757f3fSDimitry Andric // This file implements two passes that enable HIP C++ Standard Parallelism
95f757f3fSDimitry Andric // Support:
105f757f3fSDimitry Andric //
115f757f3fSDimitry Andric // 1. AcceleratorCodeSelection (required): Given that only algorithms are
125f757f3fSDimitry Andric //    accelerated, and that the accelerated implementation exists in the form of
135f757f3fSDimitry Andric //    a compute kernel, we assume that only the kernel, and all functions
145f757f3fSDimitry Andric //    reachable from it, constitute code that the user expects the accelerator
155f757f3fSDimitry Andric //    to execute. Thus, we identify the set of all functions reachable from
165f757f3fSDimitry Andric //    kernels, and then remove all unreachable ones. This last part is necessary
175f757f3fSDimitry Andric //    because it is possible for code that the user did not expect to execute on
185f757f3fSDimitry Andric //    an accelerator to contain constructs that cannot be handled by the target
195f757f3fSDimitry Andric //    BE, which cannot be provably demonstrated to be dead code in general, and
205f757f3fSDimitry Andric //    thus can lead to mis-compilation. The degenerate case of this is when a
215f757f3fSDimitry Andric //    Module contains no kernels (the parent TU had no algorithm invocations fit
225f757f3fSDimitry Andric //    for acceleration), which we handle by completely emptying said module.
235f757f3fSDimitry Andric //    **NOTE**: The above does not handle indirectly reachable functions i.e.
245f757f3fSDimitry Andric //              it is possible to obtain a case where the target of an indirect
255f757f3fSDimitry Andric //              call is otherwise unreachable and thus is removed; this
265f757f3fSDimitry Andric //              restriction is aligned with the current `-hipstdpar` limitations
275f757f3fSDimitry Andric //              and will be relaxed in the future.
285f757f3fSDimitry Andric //
295f757f3fSDimitry Andric // 2. AllocationInterposition (required only when on-demand paging is
305f757f3fSDimitry Andric //    unsupported): Some accelerators or operating systems might not support
315f757f3fSDimitry Andric //    transparent on-demand paging. Thus, they would only be able to access
325f757f3fSDimitry Andric //    memory that is allocated by an accelerator-aware mechanism. For such cases
335f757f3fSDimitry Andric //    the user can opt into enabling allocation / deallocation interposition,
345f757f3fSDimitry Andric //    whereby we replace calls to known allocation / deallocation functions with
355f757f3fSDimitry Andric //    calls to runtime implemented equivalents that forward the requests to
365f757f3fSDimitry Andric //    accelerator-aware interfaces. We also support freeing system allocated
375f757f3fSDimitry Andric //    memory that ends up in one of the runtime equivalents, since this can
385f757f3fSDimitry Andric //    happen if e.g. a library that was compiled without interposition returns
395f757f3fSDimitry Andric //    an allocation that can be validly passed to `free`.
405f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
415f757f3fSDimitry Andric 
425f757f3fSDimitry Andric #include "llvm/Transforms/HipStdPar/HipStdPar.h"
435f757f3fSDimitry Andric 
445f757f3fSDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
455f757f3fSDimitry Andric #include "llvm/ADT/SmallVector.h"
465f757f3fSDimitry Andric #include "llvm/ADT/STLExtras.h"
475f757f3fSDimitry Andric #include "llvm/Analysis/CallGraph.h"
485f757f3fSDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
495f757f3fSDimitry Andric #include "llvm/IR/Constants.h"
505f757f3fSDimitry Andric #include "llvm/IR/DebugInfoMetadata.h"
515f757f3fSDimitry Andric #include "llvm/IR/Function.h"
525f757f3fSDimitry Andric #include "llvm/IR/Module.h"
535f757f3fSDimitry Andric #include "llvm/Transforms/Utils/ModuleUtils.h"
545f757f3fSDimitry Andric 
555f757f3fSDimitry Andric #include <cassert>
565f757f3fSDimitry Andric #include <string>
575f757f3fSDimitry Andric #include <utility>
585f757f3fSDimitry Andric 
595f757f3fSDimitry Andric using namespace llvm;
605f757f3fSDimitry Andric 
615f757f3fSDimitry Andric template<typename T>
625f757f3fSDimitry Andric static inline void eraseFromModule(T &ToErase) {
635f757f3fSDimitry Andric   ToErase.replaceAllUsesWith(PoisonValue::get(ToErase.getType()));
645f757f3fSDimitry Andric   ToErase.eraseFromParent();
655f757f3fSDimitry Andric }
665f757f3fSDimitry Andric 
675f757f3fSDimitry Andric static inline bool checkIfSupported(GlobalVariable &G) {
685f757f3fSDimitry Andric   if (!G.isThreadLocal())
695f757f3fSDimitry Andric     return true;
705f757f3fSDimitry Andric 
715f757f3fSDimitry Andric   G.dropDroppableUses();
725f757f3fSDimitry Andric 
735f757f3fSDimitry Andric   if (!G.isConstantUsed())
745f757f3fSDimitry Andric     return true;
755f757f3fSDimitry Andric 
765f757f3fSDimitry Andric   std::string W;
775f757f3fSDimitry Andric   raw_string_ostream OS(W);
785f757f3fSDimitry Andric 
795f757f3fSDimitry Andric   OS << "Accelerator does not support the thread_local variable "
805f757f3fSDimitry Andric     << G.getName();
815f757f3fSDimitry Andric 
825f757f3fSDimitry Andric   Instruction *I = nullptr;
835f757f3fSDimitry Andric   SmallVector<User *> Tmp(G.user_begin(), G.user_end());
845f757f3fSDimitry Andric   SmallPtrSet<User *, 5> Visited;
855f757f3fSDimitry Andric   do {
865f757f3fSDimitry Andric     auto U = std::move(Tmp.back());
875f757f3fSDimitry Andric     Tmp.pop_back();
885f757f3fSDimitry Andric 
895f757f3fSDimitry Andric     if (Visited.contains(U))
905f757f3fSDimitry Andric       continue;
915f757f3fSDimitry Andric 
925f757f3fSDimitry Andric     if (isa<Instruction>(U))
935f757f3fSDimitry Andric       I = cast<Instruction>(U);
945f757f3fSDimitry Andric     else
955f757f3fSDimitry Andric       Tmp.insert(Tmp.end(), U->user_begin(), U->user_end());
965f757f3fSDimitry Andric 
975f757f3fSDimitry Andric     Visited.insert(U);
985f757f3fSDimitry Andric   } while (!I && !Tmp.empty());
995f757f3fSDimitry Andric 
1005f757f3fSDimitry Andric   assert(I && "thread_local global should have at least one non-constant use.");
1015f757f3fSDimitry Andric 
1025f757f3fSDimitry Andric   G.getContext().diagnose(
1035f757f3fSDimitry Andric     DiagnosticInfoUnsupported(*I->getParent()->getParent(), W,
1045f757f3fSDimitry Andric                               I->getDebugLoc(), DS_Error));
1055f757f3fSDimitry Andric 
1065f757f3fSDimitry Andric   return false;
1075f757f3fSDimitry Andric }
1085f757f3fSDimitry Andric 
1095f757f3fSDimitry Andric static inline void clearModule(Module &M) { // TODO: simplify.
1105f757f3fSDimitry Andric   while (!M.functions().empty())
1115f757f3fSDimitry Andric     eraseFromModule(*M.begin());
1125f757f3fSDimitry Andric   while (!M.globals().empty())
1135f757f3fSDimitry Andric     eraseFromModule(*M.globals().begin());
1145f757f3fSDimitry Andric   while (!M.aliases().empty())
1155f757f3fSDimitry Andric     eraseFromModule(*M.aliases().begin());
1165f757f3fSDimitry Andric   while (!M.ifuncs().empty())
1175f757f3fSDimitry Andric     eraseFromModule(*M.ifuncs().begin());
1185f757f3fSDimitry Andric }
1195f757f3fSDimitry Andric 
1205f757f3fSDimitry Andric static inline void maybeHandleGlobals(Module &M) {
1215f757f3fSDimitry Andric   unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace();
1225f757f3fSDimitry Andric   for (auto &&G : M.globals()) { // TODO: should we handle these in the FE?
1235f757f3fSDimitry Andric     if (!checkIfSupported(G))
1245f757f3fSDimitry Andric       return clearModule(M);
1255f757f3fSDimitry Andric 
1265f757f3fSDimitry Andric     if (G.isThreadLocal())
1275f757f3fSDimitry Andric       continue;
1285f757f3fSDimitry Andric     if (G.isConstant())
1295f757f3fSDimitry Andric       continue;
1305f757f3fSDimitry Andric     if (G.getAddressSpace() != GlobAS)
1315f757f3fSDimitry Andric       continue;
1325f757f3fSDimitry Andric     if (G.getLinkage() != GlobalVariable::ExternalLinkage)
1335f757f3fSDimitry Andric       continue;
1345f757f3fSDimitry Andric 
1355f757f3fSDimitry Andric     G.setLinkage(GlobalVariable::ExternalWeakLinkage);
136*0fca6ea1SDimitry Andric     G.setInitializer(nullptr);
1375f757f3fSDimitry Andric     G.setExternallyInitialized(true);
1385f757f3fSDimitry Andric   }
1395f757f3fSDimitry Andric }
1405f757f3fSDimitry Andric 
1415f757f3fSDimitry Andric template<unsigned N>
1425f757f3fSDimitry Andric static inline void removeUnreachableFunctions(
1435f757f3fSDimitry Andric   const SmallPtrSet<const Function *, N>& Reachable, Module &M) {
1445f757f3fSDimitry Andric   removeFromUsedLists(M, [&](Constant *C) {
1455f757f3fSDimitry Andric     if (auto F = dyn_cast<Function>(C))
1465f757f3fSDimitry Andric       return !Reachable.contains(F);
1475f757f3fSDimitry Andric 
1485f757f3fSDimitry Andric     return false;
1495f757f3fSDimitry Andric   });
1505f757f3fSDimitry Andric 
1515f757f3fSDimitry Andric   SmallVector<std::reference_wrapper<Function>> ToRemove;
1525f757f3fSDimitry Andric   copy_if(M, std::back_inserter(ToRemove), [&](auto &&F) {
1535f757f3fSDimitry Andric     return !F.isIntrinsic() && !Reachable.contains(&F);
1545f757f3fSDimitry Andric   });
1555f757f3fSDimitry Andric 
1565f757f3fSDimitry Andric   for_each(ToRemove, eraseFromModule<Function>);
1575f757f3fSDimitry Andric }
1585f757f3fSDimitry Andric 
1595f757f3fSDimitry Andric static inline bool isAcceleratorExecutionRoot(const Function *F) {
1605f757f3fSDimitry Andric     if (!F)
1615f757f3fSDimitry Andric       return false;
1625f757f3fSDimitry Andric 
1635f757f3fSDimitry Andric     return F->getCallingConv() == CallingConv::AMDGPU_KERNEL;
1645f757f3fSDimitry Andric }
1655f757f3fSDimitry Andric 
1665f757f3fSDimitry Andric static inline bool checkIfSupported(const Function *F, const CallBase *CB) {
1675f757f3fSDimitry Andric   const auto Dx = F->getName().rfind("__hipstdpar_unsupported");
1685f757f3fSDimitry Andric 
1695f757f3fSDimitry Andric   if (Dx == StringRef::npos)
1705f757f3fSDimitry Andric     return true;
1715f757f3fSDimitry Andric 
1725f757f3fSDimitry Andric   const auto N = F->getName().substr(0, Dx);
1735f757f3fSDimitry Andric 
1745f757f3fSDimitry Andric   std::string W;
1755f757f3fSDimitry Andric   raw_string_ostream OS(W);
1765f757f3fSDimitry Andric 
1775f757f3fSDimitry Andric   if (N == "__ASM")
1785f757f3fSDimitry Andric     OS << "Accelerator does not support the ASM block:\n"
1795f757f3fSDimitry Andric       << cast<ConstantDataArray>(CB->getArgOperand(0))->getAsCString();
1805f757f3fSDimitry Andric   else
1815f757f3fSDimitry Andric     OS << "Accelerator does not support the " << N << " function.";
1825f757f3fSDimitry Andric 
1835f757f3fSDimitry Andric   auto Caller = CB->getParent()->getParent();
1845f757f3fSDimitry Andric 
1855f757f3fSDimitry Andric   Caller->getContext().diagnose(
1865f757f3fSDimitry Andric     DiagnosticInfoUnsupported(*Caller, W, CB->getDebugLoc(), DS_Error));
1875f757f3fSDimitry Andric 
1885f757f3fSDimitry Andric   return false;
1895f757f3fSDimitry Andric }
1905f757f3fSDimitry Andric 
1915f757f3fSDimitry Andric PreservedAnalyses
1925f757f3fSDimitry Andric   HipStdParAcceleratorCodeSelectionPass::run(Module &M,
1935f757f3fSDimitry Andric                                              ModuleAnalysisManager &MAM) {
1945f757f3fSDimitry Andric   auto &CGA = MAM.getResult<CallGraphAnalysis>(M);
1955f757f3fSDimitry Andric 
1965f757f3fSDimitry Andric   SmallPtrSet<const Function *, 32> Reachable;
1975f757f3fSDimitry Andric   for (auto &&CGN : CGA) {
1985f757f3fSDimitry Andric     if (!isAcceleratorExecutionRoot(CGN.first))
1995f757f3fSDimitry Andric       continue;
2005f757f3fSDimitry Andric 
2015f757f3fSDimitry Andric     Reachable.insert(CGN.first);
2025f757f3fSDimitry Andric 
2035f757f3fSDimitry Andric     SmallVector<const Function *> Tmp({CGN.first});
2045f757f3fSDimitry Andric     do {
2055f757f3fSDimitry Andric       auto F = std::move(Tmp.back());
2065f757f3fSDimitry Andric       Tmp.pop_back();
2075f757f3fSDimitry Andric 
2085f757f3fSDimitry Andric       for (auto &&N : *CGA[F]) {
2095f757f3fSDimitry Andric         if (!N.second)
2105f757f3fSDimitry Andric           continue;
2115f757f3fSDimitry Andric         if (!N.second->getFunction())
2125f757f3fSDimitry Andric           continue;
2135f757f3fSDimitry Andric         if (Reachable.contains(N.second->getFunction()))
2145f757f3fSDimitry Andric           continue;
2155f757f3fSDimitry Andric 
2165f757f3fSDimitry Andric         if (!checkIfSupported(N.second->getFunction(),
2175f757f3fSDimitry Andric                               dyn_cast<CallBase>(*N.first)))
2185f757f3fSDimitry Andric           return PreservedAnalyses::none();
2195f757f3fSDimitry Andric 
2205f757f3fSDimitry Andric         Reachable.insert(N.second->getFunction());
2215f757f3fSDimitry Andric         Tmp.push_back(N.second->getFunction());
2225f757f3fSDimitry Andric       }
2235f757f3fSDimitry Andric     } while (!std::empty(Tmp));
2245f757f3fSDimitry Andric   }
2255f757f3fSDimitry Andric 
2265f757f3fSDimitry Andric   if (std::empty(Reachable))
2275f757f3fSDimitry Andric     clearModule(M);
2285f757f3fSDimitry Andric   else
2295f757f3fSDimitry Andric     removeUnreachableFunctions(Reachable, M);
2305f757f3fSDimitry Andric 
2315f757f3fSDimitry Andric   maybeHandleGlobals(M);
2325f757f3fSDimitry Andric 
2335f757f3fSDimitry Andric   return PreservedAnalyses::none();
2345f757f3fSDimitry Andric }
2355f757f3fSDimitry Andric 
2365f757f3fSDimitry Andric static constexpr std::pair<StringLiteral, StringLiteral> ReplaceMap[]{
2375f757f3fSDimitry Andric   {"aligned_alloc",             "__hipstdpar_aligned_alloc"},
2385f757f3fSDimitry Andric   {"calloc",                    "__hipstdpar_calloc"},
2395f757f3fSDimitry Andric   {"free",                      "__hipstdpar_free"},
2405f757f3fSDimitry Andric   {"malloc",                    "__hipstdpar_malloc"},
2415f757f3fSDimitry Andric   {"memalign",                  "__hipstdpar_aligned_alloc"},
2425f757f3fSDimitry Andric   {"posix_memalign",            "__hipstdpar_posix_aligned_alloc"},
2435f757f3fSDimitry Andric   {"realloc",                   "__hipstdpar_realloc"},
2445f757f3fSDimitry Andric   {"reallocarray",              "__hipstdpar_realloc_array"},
2455f757f3fSDimitry Andric   {"_ZdaPv",                    "__hipstdpar_operator_delete"},
2465f757f3fSDimitry Andric   {"_ZdaPvm",                   "__hipstdpar_operator_delete_sized"},
2475f757f3fSDimitry Andric   {"_ZdaPvSt11align_val_t",     "__hipstdpar_operator_delete_aligned"},
2485f757f3fSDimitry Andric   {"_ZdaPvmSt11align_val_t",    "__hipstdpar_operator_delete_aligned_sized"},
2495f757f3fSDimitry Andric   {"_ZdlPv",                    "__hipstdpar_operator_delete"},
2505f757f3fSDimitry Andric   {"_ZdlPvm",                   "__hipstdpar_operator_delete_sized"},
2515f757f3fSDimitry Andric   {"_ZdlPvSt11align_val_t",     "__hipstdpar_operator_delete_aligned"},
2525f757f3fSDimitry Andric   {"_ZdlPvmSt11align_val_t",    "__hipstdpar_operator_delete_aligned_sized"},
2535f757f3fSDimitry Andric   {"_Znam",                     "__hipstdpar_operator_new"},
2545f757f3fSDimitry Andric   {"_ZnamRKSt9nothrow_t",       "__hipstdpar_operator_new_nothrow"},
2555f757f3fSDimitry Andric   {"_ZnamSt11align_val_t",      "__hipstdpar_operator_new_aligned"},
2565f757f3fSDimitry Andric   {"_ZnamSt11align_val_tRKSt9nothrow_t",
2575f757f3fSDimitry Andric                                 "__hipstdpar_operator_new_aligned_nothrow"},
2585f757f3fSDimitry Andric 
2595f757f3fSDimitry Andric   {"_Znwm",                     "__hipstdpar_operator_new"},
2605f757f3fSDimitry Andric   {"_ZnwmRKSt9nothrow_t",       "__hipstdpar_operator_new_nothrow"},
2615f757f3fSDimitry Andric   {"_ZnwmSt11align_val_t",      "__hipstdpar_operator_new_aligned"},
2625f757f3fSDimitry Andric   {"_ZnwmSt11align_val_tRKSt9nothrow_t",
2635f757f3fSDimitry Andric                                 "__hipstdpar_operator_new_aligned_nothrow"},
2645f757f3fSDimitry Andric   {"__builtin_calloc",          "__hipstdpar_calloc"},
2655f757f3fSDimitry Andric   {"__builtin_free",            "__hipstdpar_free"},
2665f757f3fSDimitry Andric   {"__builtin_malloc",          "__hipstdpar_malloc"},
2675f757f3fSDimitry Andric   {"__builtin_operator_delete", "__hipstdpar_operator_delete"},
2685f757f3fSDimitry Andric   {"__builtin_operator_new",    "__hipstdpar_operator_new"},
2695f757f3fSDimitry Andric   {"__builtin_realloc",         "__hipstdpar_realloc"},
2705f757f3fSDimitry Andric   {"__libc_calloc",             "__hipstdpar_calloc"},
2715f757f3fSDimitry Andric   {"__libc_free",               "__hipstdpar_free"},
2725f757f3fSDimitry Andric   {"__libc_malloc",             "__hipstdpar_malloc"},
2735f757f3fSDimitry Andric   {"__libc_memalign",           "__hipstdpar_aligned_alloc"},
2745f757f3fSDimitry Andric   {"__libc_realloc",            "__hipstdpar_realloc"}
2755f757f3fSDimitry Andric };
2765f757f3fSDimitry Andric 
2775f757f3fSDimitry Andric PreservedAnalyses
2785f757f3fSDimitry Andric HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) {
2795f757f3fSDimitry Andric   SmallDenseMap<StringRef, StringRef> AllocReplacements(std::cbegin(ReplaceMap),
2805f757f3fSDimitry Andric                                                         std::cend(ReplaceMap));
2815f757f3fSDimitry Andric 
2825f757f3fSDimitry Andric   for (auto &&F : M) {
2835f757f3fSDimitry Andric     if (!F.hasName())
2845f757f3fSDimitry Andric       continue;
2855f757f3fSDimitry Andric     if (!AllocReplacements.contains(F.getName()))
2865f757f3fSDimitry Andric       continue;
2875f757f3fSDimitry Andric 
2885f757f3fSDimitry Andric     if (auto R = M.getFunction(AllocReplacements[F.getName()])) {
2895f757f3fSDimitry Andric       F.replaceAllUsesWith(R);
2905f757f3fSDimitry Andric     } else {
2915f757f3fSDimitry Andric       std::string W;
2925f757f3fSDimitry Andric       raw_string_ostream OS(W);
2935f757f3fSDimitry Andric 
2945f757f3fSDimitry Andric       OS << "cannot be interposed, missing: " << AllocReplacements[F.getName()]
2955f757f3fSDimitry Andric         << ". Tried to run the allocation interposition pass without the "
2965f757f3fSDimitry Andric         << "replacement functions available.";
2975f757f3fSDimitry Andric 
2985f757f3fSDimitry Andric       F.getContext().diagnose(DiagnosticInfoUnsupported(F, W,
2995f757f3fSDimitry Andric                                                         F.getSubprogram(),
3005f757f3fSDimitry Andric                                                         DS_Warning));
3015f757f3fSDimitry Andric     }
3025f757f3fSDimitry Andric   }
3035f757f3fSDimitry Andric 
3045f757f3fSDimitry Andric   if (auto F = M.getFunction("__hipstdpar_hidden_free")) {
3055f757f3fSDimitry Andric     auto LibcFree = M.getOrInsertFunction("__libc_free", F->getFunctionType(),
3065f757f3fSDimitry Andric                                           F->getAttributes());
3075f757f3fSDimitry Andric     F->replaceAllUsesWith(LibcFree.getCallee());
3085f757f3fSDimitry Andric 
3095f757f3fSDimitry Andric     eraseFromModule(*F);
3105f757f3fSDimitry Andric   }
3115f757f3fSDimitry Andric 
3125f757f3fSDimitry Andric   return PreservedAnalyses::none();
3135f757f3fSDimitry Andric }
314