xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
1*5f757f3fSDimitry Andric //===----- HipStdPar.cpp - HIP C++ Standard Parallelism Support Passes ----===//
2*5f757f3fSDimitry Andric //
3*5f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*5f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*5f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*5f757f3fSDimitry Andric //
7*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
8*5f757f3fSDimitry Andric // This file implements two passes that enable HIP C++ Standard Parallelism
9*5f757f3fSDimitry Andric // Support:
10*5f757f3fSDimitry Andric //
11*5f757f3fSDimitry Andric // 1. AcceleratorCodeSelection (required): Given that only algorithms are
12*5f757f3fSDimitry Andric //    accelerated, and that the accelerated implementation exists in the form of
13*5f757f3fSDimitry Andric //    a compute kernel, we assume that only the kernel, and all functions
14*5f757f3fSDimitry Andric //    reachable from it, constitute code that the user expects the accelerator
15*5f757f3fSDimitry Andric //    to execute. Thus, we identify the set of all functions reachable from
16*5f757f3fSDimitry Andric //    kernels, and then remove all unreachable ones. This last part is necessary
17*5f757f3fSDimitry Andric //    because it is possible for code that the user did not expect to execute on
18*5f757f3fSDimitry Andric //    an accelerator to contain constructs that cannot be handled by the target
19*5f757f3fSDimitry Andric //    BE, which cannot be provably demonstrated to be dead code in general, and
20*5f757f3fSDimitry Andric //    thus can lead to mis-compilation. The degenerate case of this is when a
21*5f757f3fSDimitry Andric //    Module contains no kernels (the parent TU had no algorithm invocations fit
22*5f757f3fSDimitry Andric //    for acceleration), which we handle by completely emptying said module.
23*5f757f3fSDimitry Andric //    **NOTE**: The above does not handle indirectly reachable functions i.e.
24*5f757f3fSDimitry Andric //              it is possible to obtain a case where the target of an indirect
25*5f757f3fSDimitry Andric //              call is otherwise unreachable and thus is removed; this
26*5f757f3fSDimitry Andric //              restriction is aligned with the current `-hipstdpar` limitations
27*5f757f3fSDimitry Andric //              and will be relaxed in the future.
28*5f757f3fSDimitry Andric //
29*5f757f3fSDimitry Andric // 2. AllocationInterposition (required only when on-demand paging is
30*5f757f3fSDimitry Andric //    unsupported): Some accelerators or operating systems might not support
31*5f757f3fSDimitry Andric //    transparent on-demand paging. Thus, they would only be able to access
32*5f757f3fSDimitry Andric //    memory that is allocated by an accelerator-aware mechanism. For such cases
33*5f757f3fSDimitry Andric //    the user can opt into enabling allocation / deallocation interposition,
34*5f757f3fSDimitry Andric //    whereby we replace calls to known allocation / deallocation functions with
35*5f757f3fSDimitry Andric //    calls to runtime implemented equivalents that forward the requests to
36*5f757f3fSDimitry Andric //    accelerator-aware interfaces. We also support freeing system allocated
37*5f757f3fSDimitry Andric //    memory that ends up in one of the runtime equivalents, since this can
38*5f757f3fSDimitry Andric //    happen if e.g. a library that was compiled without interposition returns
39*5f757f3fSDimitry Andric //    an allocation that can be validly passed to `free`.
40*5f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
41*5f757f3fSDimitry Andric 
42*5f757f3fSDimitry Andric #include "llvm/Transforms/HipStdPar/HipStdPar.h"
43*5f757f3fSDimitry Andric 
44*5f757f3fSDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
45*5f757f3fSDimitry Andric #include "llvm/ADT/SmallVector.h"
46*5f757f3fSDimitry Andric #include "llvm/ADT/STLExtras.h"
47*5f757f3fSDimitry Andric #include "llvm/Analysis/CallGraph.h"
48*5f757f3fSDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
49*5f757f3fSDimitry Andric #include "llvm/IR/Constants.h"
50*5f757f3fSDimitry Andric #include "llvm/IR/DebugInfoMetadata.h"
51*5f757f3fSDimitry Andric #include "llvm/IR/Function.h"
52*5f757f3fSDimitry Andric #include "llvm/IR/Module.h"
53*5f757f3fSDimitry Andric #include "llvm/Transforms/Utils/ModuleUtils.h"
54*5f757f3fSDimitry Andric 
55*5f757f3fSDimitry Andric #include <cassert>
56*5f757f3fSDimitry Andric #include <string>
57*5f757f3fSDimitry Andric #include <utility>
58*5f757f3fSDimitry Andric 
59*5f757f3fSDimitry Andric using namespace llvm;
60*5f757f3fSDimitry Andric 
61*5f757f3fSDimitry Andric template<typename T>
62*5f757f3fSDimitry Andric static inline void eraseFromModule(T &ToErase) {
63*5f757f3fSDimitry Andric   ToErase.replaceAllUsesWith(PoisonValue::get(ToErase.getType()));
64*5f757f3fSDimitry Andric   ToErase.eraseFromParent();
65*5f757f3fSDimitry Andric }
66*5f757f3fSDimitry Andric 
67*5f757f3fSDimitry Andric static inline bool checkIfSupported(GlobalVariable &G) {
68*5f757f3fSDimitry Andric   if (!G.isThreadLocal())
69*5f757f3fSDimitry Andric     return true;
70*5f757f3fSDimitry Andric 
71*5f757f3fSDimitry Andric   G.dropDroppableUses();
72*5f757f3fSDimitry Andric 
73*5f757f3fSDimitry Andric   if (!G.isConstantUsed())
74*5f757f3fSDimitry Andric     return true;
75*5f757f3fSDimitry Andric 
76*5f757f3fSDimitry Andric   std::string W;
77*5f757f3fSDimitry Andric   raw_string_ostream OS(W);
78*5f757f3fSDimitry Andric 
79*5f757f3fSDimitry Andric   OS << "Accelerator does not support the thread_local variable "
80*5f757f3fSDimitry Andric     << G.getName();
81*5f757f3fSDimitry Andric 
82*5f757f3fSDimitry Andric   Instruction *I = nullptr;
83*5f757f3fSDimitry Andric   SmallVector<User *> Tmp(G.user_begin(), G.user_end());
84*5f757f3fSDimitry Andric   SmallPtrSet<User *, 5> Visited;
85*5f757f3fSDimitry Andric   do {
86*5f757f3fSDimitry Andric     auto U = std::move(Tmp.back());
87*5f757f3fSDimitry Andric     Tmp.pop_back();
88*5f757f3fSDimitry Andric 
89*5f757f3fSDimitry Andric     if (Visited.contains(U))
90*5f757f3fSDimitry Andric       continue;
91*5f757f3fSDimitry Andric 
92*5f757f3fSDimitry Andric     if (isa<Instruction>(U))
93*5f757f3fSDimitry Andric       I = cast<Instruction>(U);
94*5f757f3fSDimitry Andric     else
95*5f757f3fSDimitry Andric       Tmp.insert(Tmp.end(), U->user_begin(), U->user_end());
96*5f757f3fSDimitry Andric 
97*5f757f3fSDimitry Andric     Visited.insert(U);
98*5f757f3fSDimitry Andric   } while (!I && !Tmp.empty());
99*5f757f3fSDimitry Andric 
100*5f757f3fSDimitry Andric   assert(I && "thread_local global should have at least one non-constant use.");
101*5f757f3fSDimitry Andric 
102*5f757f3fSDimitry Andric   G.getContext().diagnose(
103*5f757f3fSDimitry Andric     DiagnosticInfoUnsupported(*I->getParent()->getParent(), W,
104*5f757f3fSDimitry Andric                               I->getDebugLoc(), DS_Error));
105*5f757f3fSDimitry Andric 
106*5f757f3fSDimitry Andric   return false;
107*5f757f3fSDimitry Andric }
108*5f757f3fSDimitry Andric 
109*5f757f3fSDimitry Andric static inline void clearModule(Module &M) { // TODO: simplify.
110*5f757f3fSDimitry Andric   while (!M.functions().empty())
111*5f757f3fSDimitry Andric     eraseFromModule(*M.begin());
112*5f757f3fSDimitry Andric   while (!M.globals().empty())
113*5f757f3fSDimitry Andric     eraseFromModule(*M.globals().begin());
114*5f757f3fSDimitry Andric   while (!M.aliases().empty())
115*5f757f3fSDimitry Andric     eraseFromModule(*M.aliases().begin());
116*5f757f3fSDimitry Andric   while (!M.ifuncs().empty())
117*5f757f3fSDimitry Andric     eraseFromModule(*M.ifuncs().begin());
118*5f757f3fSDimitry Andric }
119*5f757f3fSDimitry Andric 
120*5f757f3fSDimitry Andric static inline void maybeHandleGlobals(Module &M) {
121*5f757f3fSDimitry Andric   unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace();
122*5f757f3fSDimitry Andric   for (auto &&G : M.globals()) { // TODO: should we handle these in the FE?
123*5f757f3fSDimitry Andric     if (!checkIfSupported(G))
124*5f757f3fSDimitry Andric       return clearModule(M);
125*5f757f3fSDimitry Andric 
126*5f757f3fSDimitry Andric     if (G.isThreadLocal())
127*5f757f3fSDimitry Andric       continue;
128*5f757f3fSDimitry Andric     if (G.isConstant())
129*5f757f3fSDimitry Andric       continue;
130*5f757f3fSDimitry Andric     if (G.getAddressSpace() != GlobAS)
131*5f757f3fSDimitry Andric       continue;
132*5f757f3fSDimitry Andric     if (G.getLinkage() != GlobalVariable::ExternalLinkage)
133*5f757f3fSDimitry Andric       continue;
134*5f757f3fSDimitry Andric 
135*5f757f3fSDimitry Andric     G.setLinkage(GlobalVariable::ExternalWeakLinkage);
136*5f757f3fSDimitry Andric     G.setExternallyInitialized(true);
137*5f757f3fSDimitry Andric   }
138*5f757f3fSDimitry Andric }
139*5f757f3fSDimitry Andric 
140*5f757f3fSDimitry Andric template<unsigned N>
141*5f757f3fSDimitry Andric static inline void removeUnreachableFunctions(
142*5f757f3fSDimitry Andric   const SmallPtrSet<const Function *, N>& Reachable, Module &M) {
143*5f757f3fSDimitry Andric   removeFromUsedLists(M, [&](Constant *C) {
144*5f757f3fSDimitry Andric     if (auto F = dyn_cast<Function>(C))
145*5f757f3fSDimitry Andric       return !Reachable.contains(F);
146*5f757f3fSDimitry Andric 
147*5f757f3fSDimitry Andric     return false;
148*5f757f3fSDimitry Andric   });
149*5f757f3fSDimitry Andric 
150*5f757f3fSDimitry Andric   SmallVector<std::reference_wrapper<Function>> ToRemove;
151*5f757f3fSDimitry Andric   copy_if(M, std::back_inserter(ToRemove), [&](auto &&F) {
152*5f757f3fSDimitry Andric     return !F.isIntrinsic() && !Reachable.contains(&F);
153*5f757f3fSDimitry Andric   });
154*5f757f3fSDimitry Andric 
155*5f757f3fSDimitry Andric   for_each(ToRemove, eraseFromModule<Function>);
156*5f757f3fSDimitry Andric }
157*5f757f3fSDimitry Andric 
158*5f757f3fSDimitry Andric static inline bool isAcceleratorExecutionRoot(const Function *F) {
159*5f757f3fSDimitry Andric     if (!F)
160*5f757f3fSDimitry Andric       return false;
161*5f757f3fSDimitry Andric 
162*5f757f3fSDimitry Andric     return F->getCallingConv() == CallingConv::AMDGPU_KERNEL;
163*5f757f3fSDimitry Andric }
164*5f757f3fSDimitry Andric 
165*5f757f3fSDimitry Andric static inline bool checkIfSupported(const Function *F, const CallBase *CB) {
166*5f757f3fSDimitry Andric   const auto Dx = F->getName().rfind("__hipstdpar_unsupported");
167*5f757f3fSDimitry Andric 
168*5f757f3fSDimitry Andric   if (Dx == StringRef::npos)
169*5f757f3fSDimitry Andric     return true;
170*5f757f3fSDimitry Andric 
171*5f757f3fSDimitry Andric   const auto N = F->getName().substr(0, Dx);
172*5f757f3fSDimitry Andric 
173*5f757f3fSDimitry Andric   std::string W;
174*5f757f3fSDimitry Andric   raw_string_ostream OS(W);
175*5f757f3fSDimitry Andric 
176*5f757f3fSDimitry Andric   if (N == "__ASM")
177*5f757f3fSDimitry Andric     OS << "Accelerator does not support the ASM block:\n"
178*5f757f3fSDimitry Andric       << cast<ConstantDataArray>(CB->getArgOperand(0))->getAsCString();
179*5f757f3fSDimitry Andric   else
180*5f757f3fSDimitry Andric     OS << "Accelerator does not support the " << N << " function.";
181*5f757f3fSDimitry Andric 
182*5f757f3fSDimitry Andric   auto Caller = CB->getParent()->getParent();
183*5f757f3fSDimitry Andric 
184*5f757f3fSDimitry Andric   Caller->getContext().diagnose(
185*5f757f3fSDimitry Andric     DiagnosticInfoUnsupported(*Caller, W, CB->getDebugLoc(), DS_Error));
186*5f757f3fSDimitry Andric 
187*5f757f3fSDimitry Andric   return false;
188*5f757f3fSDimitry Andric }
189*5f757f3fSDimitry Andric 
190*5f757f3fSDimitry Andric PreservedAnalyses
191*5f757f3fSDimitry Andric   HipStdParAcceleratorCodeSelectionPass::run(Module &M,
192*5f757f3fSDimitry Andric                                              ModuleAnalysisManager &MAM) {
193*5f757f3fSDimitry Andric   auto &CGA = MAM.getResult<CallGraphAnalysis>(M);
194*5f757f3fSDimitry Andric 
195*5f757f3fSDimitry Andric   SmallPtrSet<const Function *, 32> Reachable;
196*5f757f3fSDimitry Andric   for (auto &&CGN : CGA) {
197*5f757f3fSDimitry Andric     if (!isAcceleratorExecutionRoot(CGN.first))
198*5f757f3fSDimitry Andric       continue;
199*5f757f3fSDimitry Andric 
200*5f757f3fSDimitry Andric     Reachable.insert(CGN.first);
201*5f757f3fSDimitry Andric 
202*5f757f3fSDimitry Andric     SmallVector<const Function *> Tmp({CGN.first});
203*5f757f3fSDimitry Andric     do {
204*5f757f3fSDimitry Andric       auto F = std::move(Tmp.back());
205*5f757f3fSDimitry Andric       Tmp.pop_back();
206*5f757f3fSDimitry Andric 
207*5f757f3fSDimitry Andric       for (auto &&N : *CGA[F]) {
208*5f757f3fSDimitry Andric         if (!N.second)
209*5f757f3fSDimitry Andric           continue;
210*5f757f3fSDimitry Andric         if (!N.second->getFunction())
211*5f757f3fSDimitry Andric           continue;
212*5f757f3fSDimitry Andric         if (Reachable.contains(N.second->getFunction()))
213*5f757f3fSDimitry Andric           continue;
214*5f757f3fSDimitry Andric 
215*5f757f3fSDimitry Andric         if (!checkIfSupported(N.second->getFunction(),
216*5f757f3fSDimitry Andric                               dyn_cast<CallBase>(*N.first)))
217*5f757f3fSDimitry Andric           return PreservedAnalyses::none();
218*5f757f3fSDimitry Andric 
219*5f757f3fSDimitry Andric         Reachable.insert(N.second->getFunction());
220*5f757f3fSDimitry Andric         Tmp.push_back(N.second->getFunction());
221*5f757f3fSDimitry Andric       }
222*5f757f3fSDimitry Andric     } while (!std::empty(Tmp));
223*5f757f3fSDimitry Andric   }
224*5f757f3fSDimitry Andric 
225*5f757f3fSDimitry Andric   if (std::empty(Reachable))
226*5f757f3fSDimitry Andric     clearModule(M);
227*5f757f3fSDimitry Andric   else
228*5f757f3fSDimitry Andric     removeUnreachableFunctions(Reachable, M);
229*5f757f3fSDimitry Andric 
230*5f757f3fSDimitry Andric   maybeHandleGlobals(M);
231*5f757f3fSDimitry Andric 
232*5f757f3fSDimitry Andric   return PreservedAnalyses::none();
233*5f757f3fSDimitry Andric }
234*5f757f3fSDimitry Andric 
235*5f757f3fSDimitry Andric static constexpr std::pair<StringLiteral, StringLiteral> ReplaceMap[]{
236*5f757f3fSDimitry Andric   {"aligned_alloc",             "__hipstdpar_aligned_alloc"},
237*5f757f3fSDimitry Andric   {"calloc",                    "__hipstdpar_calloc"},
238*5f757f3fSDimitry Andric   {"free",                      "__hipstdpar_free"},
239*5f757f3fSDimitry Andric   {"malloc",                    "__hipstdpar_malloc"},
240*5f757f3fSDimitry Andric   {"memalign",                  "__hipstdpar_aligned_alloc"},
241*5f757f3fSDimitry Andric   {"posix_memalign",            "__hipstdpar_posix_aligned_alloc"},
242*5f757f3fSDimitry Andric   {"realloc",                   "__hipstdpar_realloc"},
243*5f757f3fSDimitry Andric   {"reallocarray",              "__hipstdpar_realloc_array"},
244*5f757f3fSDimitry Andric   {"_ZdaPv",                    "__hipstdpar_operator_delete"},
245*5f757f3fSDimitry Andric   {"_ZdaPvm",                   "__hipstdpar_operator_delete_sized"},
246*5f757f3fSDimitry Andric   {"_ZdaPvSt11align_val_t",     "__hipstdpar_operator_delete_aligned"},
247*5f757f3fSDimitry Andric   {"_ZdaPvmSt11align_val_t",    "__hipstdpar_operator_delete_aligned_sized"},
248*5f757f3fSDimitry Andric   {"_ZdlPv",                    "__hipstdpar_operator_delete"},
249*5f757f3fSDimitry Andric   {"_ZdlPvm",                   "__hipstdpar_operator_delete_sized"},
250*5f757f3fSDimitry Andric   {"_ZdlPvSt11align_val_t",     "__hipstdpar_operator_delete_aligned"},
251*5f757f3fSDimitry Andric   {"_ZdlPvmSt11align_val_t",    "__hipstdpar_operator_delete_aligned_sized"},
252*5f757f3fSDimitry Andric   {"_Znam",                     "__hipstdpar_operator_new"},
253*5f757f3fSDimitry Andric   {"_ZnamRKSt9nothrow_t",       "__hipstdpar_operator_new_nothrow"},
254*5f757f3fSDimitry Andric   {"_ZnamSt11align_val_t",      "__hipstdpar_operator_new_aligned"},
255*5f757f3fSDimitry Andric   {"_ZnamSt11align_val_tRKSt9nothrow_t",
256*5f757f3fSDimitry Andric                                 "__hipstdpar_operator_new_aligned_nothrow"},
257*5f757f3fSDimitry Andric 
258*5f757f3fSDimitry Andric   {"_Znwm",                     "__hipstdpar_operator_new"},
259*5f757f3fSDimitry Andric   {"_ZnwmRKSt9nothrow_t",       "__hipstdpar_operator_new_nothrow"},
260*5f757f3fSDimitry Andric   {"_ZnwmSt11align_val_t",      "__hipstdpar_operator_new_aligned"},
261*5f757f3fSDimitry Andric   {"_ZnwmSt11align_val_tRKSt9nothrow_t",
262*5f757f3fSDimitry Andric                                 "__hipstdpar_operator_new_aligned_nothrow"},
263*5f757f3fSDimitry Andric   {"__builtin_calloc",          "__hipstdpar_calloc"},
264*5f757f3fSDimitry Andric   {"__builtin_free",            "__hipstdpar_free"},
265*5f757f3fSDimitry Andric   {"__builtin_malloc",          "__hipstdpar_malloc"},
266*5f757f3fSDimitry Andric   {"__builtin_operator_delete", "__hipstdpar_operator_delete"},
267*5f757f3fSDimitry Andric   {"__builtin_operator_new",    "__hipstdpar_operator_new"},
268*5f757f3fSDimitry Andric   {"__builtin_realloc",         "__hipstdpar_realloc"},
269*5f757f3fSDimitry Andric   {"__libc_calloc",             "__hipstdpar_calloc"},
270*5f757f3fSDimitry Andric   {"__libc_free",               "__hipstdpar_free"},
271*5f757f3fSDimitry Andric   {"__libc_malloc",             "__hipstdpar_malloc"},
272*5f757f3fSDimitry Andric   {"__libc_memalign",           "__hipstdpar_aligned_alloc"},
273*5f757f3fSDimitry Andric   {"__libc_realloc",            "__hipstdpar_realloc"}
274*5f757f3fSDimitry Andric };
275*5f757f3fSDimitry Andric 
276*5f757f3fSDimitry Andric PreservedAnalyses
277*5f757f3fSDimitry Andric HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) {
278*5f757f3fSDimitry Andric   SmallDenseMap<StringRef, StringRef> AllocReplacements(std::cbegin(ReplaceMap),
279*5f757f3fSDimitry Andric                                                         std::cend(ReplaceMap));
280*5f757f3fSDimitry Andric 
281*5f757f3fSDimitry Andric   for (auto &&F : M) {
282*5f757f3fSDimitry Andric     if (!F.hasName())
283*5f757f3fSDimitry Andric       continue;
284*5f757f3fSDimitry Andric     if (!AllocReplacements.contains(F.getName()))
285*5f757f3fSDimitry Andric       continue;
286*5f757f3fSDimitry Andric 
287*5f757f3fSDimitry Andric     if (auto R = M.getFunction(AllocReplacements[F.getName()])) {
288*5f757f3fSDimitry Andric       F.replaceAllUsesWith(R);
289*5f757f3fSDimitry Andric     } else {
290*5f757f3fSDimitry Andric       std::string W;
291*5f757f3fSDimitry Andric       raw_string_ostream OS(W);
292*5f757f3fSDimitry Andric 
293*5f757f3fSDimitry Andric       OS << "cannot be interposed, missing: " << AllocReplacements[F.getName()]
294*5f757f3fSDimitry Andric         << ". Tried to run the allocation interposition pass without the "
295*5f757f3fSDimitry Andric         << "replacement functions available.";
296*5f757f3fSDimitry Andric 
297*5f757f3fSDimitry Andric       F.getContext().diagnose(DiagnosticInfoUnsupported(F, W,
298*5f757f3fSDimitry Andric                                                         F.getSubprogram(),
299*5f757f3fSDimitry Andric                                                         DS_Warning));
300*5f757f3fSDimitry Andric     }
301*5f757f3fSDimitry Andric   }
302*5f757f3fSDimitry Andric 
303*5f757f3fSDimitry Andric   if (auto F = M.getFunction("__hipstdpar_hidden_free")) {
304*5f757f3fSDimitry Andric     auto LibcFree = M.getOrInsertFunction("__libc_free", F->getFunctionType(),
305*5f757f3fSDimitry Andric                                           F->getAttributes());
306*5f757f3fSDimitry Andric     F->replaceAllUsesWith(LibcFree.getCallee());
307*5f757f3fSDimitry Andric 
308*5f757f3fSDimitry Andric     eraseFromModule(*F);
309*5f757f3fSDimitry Andric   }
310*5f757f3fSDimitry Andric 
311*5f757f3fSDimitry Andric   return PreservedAnalyses::none();
312*5f757f3fSDimitry Andric }
313