1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attributes are propagated from the
13 /// caller.
14 ///
15 /// The pass analyzes call graph and propagates ABI target features through the
16 /// call graph.
17 ///
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is cloned all call sites will
21 /// be updated to use a correct clone.
22 ///
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
26 //
27 //===----------------------------------------------------------------------===//
28
29 #include "AMDGPU.h"
30 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
31 #include "Utils/AMDGPUBaseInfo.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/CodeGen/TargetPassConfig.h"
34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
35 #include "llvm/IR/InstrTypes.h"
36 #include "llvm/Target/TargetMachine.h"
37 #include "llvm/Transforms/Utils/Cloning.h"
38
39 #define DEBUG_TYPE "amdgpu-propagate-attributes"
40
41 using namespace llvm;
42
43 namespace llvm {
44 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
45 }
46
47 namespace {
48
49 // Target features to propagate.
50 static constexpr const FeatureBitset TargetFeatures = {
51 AMDGPU::FeatureWavefrontSize16,
52 AMDGPU::FeatureWavefrontSize32,
53 AMDGPU::FeatureWavefrontSize64
54 };
55
56 // Attributes to propagate.
57 // TODO: Support conservative min/max merging instead of cloning.
58 static constexpr const char *AttributeNames[] = {"amdgpu-waves-per-eu"};
59
60 static constexpr unsigned NumAttr = std::size(AttributeNames);
61
62 class AMDGPUPropagateAttributes {
63
64 class FnProperties {
65 private:
FnProperties(const FeatureBitset && FB)66 explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
67
68 public:
FnProperties(const TargetMachine & TM,const Function & F)69 explicit FnProperties(const TargetMachine &TM, const Function &F) {
70 Features = TM.getSubtargetImpl(F)->getFeatureBits();
71
72 for (unsigned I = 0; I < NumAttr; ++I)
73 if (F.hasFnAttribute(AttributeNames[I]))
74 Attributes[I] = F.getFnAttribute(AttributeNames[I]);
75 }
76
operator ==(const FnProperties & Other) const77 bool operator == (const FnProperties &Other) const {
78 if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
79 return false;
80 for (unsigned I = 0; I < NumAttr; ++I)
81 if (Attributes[I] != Other.Attributes[I])
82 return false;
83 return true;
84 }
85
adjustToCaller(const FnProperties & CallerProps) const86 FnProperties adjustToCaller(const FnProperties &CallerProps) const {
87 FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
88 for (unsigned I = 0; I < NumAttr; ++I)
89 New.Attributes[I] = CallerProps.Attributes[I];
90 return New;
91 }
92
93 FeatureBitset Features;
94 std::optional<Attribute> Attributes[NumAttr];
95 };
96
97 class Clone {
98 public:
Clone(const FnProperties & Props,Function * OrigF,Function * NewF)99 Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
100 Properties(Props), OrigF(OrigF), NewF(NewF) {}
101
102 FnProperties Properties;
103 Function *OrigF;
104 Function *NewF;
105 };
106
107 const TargetMachine *TM;
108
109 // Clone functions as needed or just set attributes.
110 bool AllowClone;
111
112 // Option propagation roots.
113 SmallSet<Function *, 32> Roots;
114
115 // Clones of functions with their attributes.
116 SmallVector<Clone, 32> Clones;
117
118 // Find a clone with required features.
119 Function *findFunction(const FnProperties &PropsNeeded,
120 Function *OrigF);
121
122 // Clone function \p F and set \p NewProps on the clone.
123 // Cole takes the name of original function.
124 Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
125
126 // Set new function's features in place.
127 void setFeatures(Function &F, const FeatureBitset &NewFeatures);
128
129 // Set new function's attributes in place.
130 void setAttributes(Function &F,
131 const ArrayRef<std::optional<Attribute>> NewAttrs);
132
133 std::string getFeatureString(const FeatureBitset &Features) const;
134
135 // Propagate attributes from Roots.
136 bool process();
137
138 public:
AMDGPUPropagateAttributes(const TargetMachine * TM,bool AllowClone)139 AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
140 TM(TM), AllowClone(AllowClone) {}
141
142 // Use F as a root and propagate its attributes.
143 bool process(Function &F);
144
145 // Propagate attributes starting from kernel functions.
146 bool process(Module &M);
147 };
148
149 // Allows to propagate attributes early, but no cloning is allowed as it must
150 // be a function pass to run before any optimizations.
151 // TODO: We shall only need a one instance of module pass, but that needs to be
152 // in the linker pipeline which is currently not possible.
153 class AMDGPUPropagateAttributesEarly : public FunctionPass {
154 const TargetMachine *TM;
155
156 public:
157 static char ID; // Pass identification
158
AMDGPUPropagateAttributesEarly(const TargetMachine * TM=nullptr)159 AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
160 FunctionPass(ID), TM(TM) {
161 initializeAMDGPUPropagateAttributesEarlyPass(
162 *PassRegistry::getPassRegistry());
163 }
164
165 bool runOnFunction(Function &F) override;
166 };
167
168 // Allows to propagate attributes with cloning but does that late in the
169 // pipeline.
170 class AMDGPUPropagateAttributesLate : public ModulePass {
171 const TargetMachine *TM;
172
173 public:
174 static char ID; // Pass identification
175
AMDGPUPropagateAttributesLate(const TargetMachine * TM=nullptr)176 AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
177 ModulePass(ID), TM(TM) {
178 initializeAMDGPUPropagateAttributesLatePass(
179 *PassRegistry::getPassRegistry());
180 }
181
182 bool runOnModule(Module &M) override;
183 };
184
185 } // end anonymous namespace.
186
187 char AMDGPUPropagateAttributesEarly::ID = 0;
188 char AMDGPUPropagateAttributesLate::ID = 0;
189
190 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
191 "amdgpu-propagate-attributes-early",
192 "Early propagate attributes from kernels to functions",
193 false, false)
194 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
195 "amdgpu-propagate-attributes-late",
196 "Late propagate attributes from kernels to functions",
197 false, false)
198
199 Function *
findFunction(const FnProperties & PropsNeeded,Function * OrigF)200 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
201 Function *OrigF) {
202 // TODO: search for clone's clones.
203 for (Clone &C : Clones)
204 if (C.OrigF == OrigF && PropsNeeded == C.Properties)
205 return C.NewF;
206
207 return nullptr;
208 }
209
process(Module & M)210 bool AMDGPUPropagateAttributes::process(Module &M) {
211 for (auto &F : M.functions())
212 if (AMDGPU::isKernel(F.getCallingConv()))
213 Roots.insert(&F);
214
215 return Roots.empty() ? false : process();
216 }
217
process(Function & F)218 bool AMDGPUPropagateAttributes::process(Function &F) {
219 Roots.insert(&F);
220 return process();
221 }
222
process()223 bool AMDGPUPropagateAttributes::process() {
224 bool Changed = false;
225 SmallSet<Function *, 32> NewRoots;
226 SmallSet<Function *, 32> Replaced;
227
228 assert(!Roots.empty());
229 Module &M = *(*Roots.begin())->getParent();
230
231 do {
232 Roots.insert(NewRoots.begin(), NewRoots.end());
233 NewRoots.clear();
234
235 for (auto &F : M.functions()) {
236 if (F.isDeclaration())
237 continue;
238
239 const FnProperties CalleeProps(*TM, F);
240 SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
241 SmallSet<CallBase *, 32> Visited;
242
243 for (User *U : F.users()) {
244 Instruction *I = dyn_cast<Instruction>(U);
245 if (!I)
246 continue;
247 CallBase *CI = dyn_cast<CallBase>(I);
248 // Only propagate attributes if F is the called function. Specifically,
249 // do not propagate attributes if F is passed as an argument.
250 // FIXME: handle bitcasted callee, e.g.
251 // %retval = call i8* bitcast (i32* ()* @f to i8* ()*)()
252 if (!CI || CI->getCalledOperand() != &F)
253 continue;
254 Function *Caller = CI->getCaller();
255 if (!Caller || !Visited.insert(CI).second)
256 continue;
257 if (!Roots.count(Caller) && !NewRoots.count(Caller))
258 continue;
259
260 const FnProperties CallerProps(*TM, *Caller);
261
262 if (CalleeProps == CallerProps) {
263 if (!Roots.count(&F))
264 NewRoots.insert(&F);
265 continue;
266 }
267
268 Function *NewF = findFunction(CallerProps, &F);
269 if (!NewF) {
270 const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
271 if (!AllowClone) {
272 // This may set different features on different iterations if
273 // there is a contradiction in callers' attributes. In this case
274 // we rely on a second pass running on Module, which is allowed
275 // to clone.
276 setFeatures(F, NewProps.Features);
277 setAttributes(F, NewProps.Attributes);
278 NewRoots.insert(&F);
279 Changed = true;
280 break;
281 }
282
283 NewF = cloneWithProperties(F, NewProps);
284 Clones.push_back(Clone(CallerProps, &F, NewF));
285 NewRoots.insert(NewF);
286 }
287
288 ToReplace.push_back(std::pair(CI, NewF));
289 Replaced.insert(&F);
290
291 Changed = true;
292 }
293
294 while (!ToReplace.empty()) {
295 auto R = ToReplace.pop_back_val();
296 R.first->setCalledFunction(R.second);
297 }
298 }
299 } while (!NewRoots.empty());
300
301 for (Function *F : Replaced) {
302 if (F->use_empty())
303 F->eraseFromParent();
304 }
305
306 Roots.clear();
307 Clones.clear();
308
309 return Changed;
310 }
311
312 Function *
cloneWithProperties(Function & F,const FnProperties & NewProps)313 AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
314 const FnProperties &NewProps) {
315 LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
316
317 ValueToValueMapTy dummy;
318 Function *NewF = CloneFunction(&F, dummy);
319 setFeatures(*NewF, NewProps.Features);
320 setAttributes(*NewF, NewProps.Attributes);
321 NewF->setVisibility(GlobalValue::DefaultVisibility);
322 NewF->setLinkage(GlobalValue::InternalLinkage);
323
324 // Swap names. If that is the only clone it will retain the name of now
325 // dead value. Preserve original name for externally visible functions.
326 if (F.hasName() && F.hasLocalLinkage()) {
327 std::string NewName = std::string(NewF->getName());
328 NewF->takeName(&F);
329 F.setName(NewName);
330 }
331
332 return NewF;
333 }
334
setFeatures(Function & F,const FeatureBitset & NewFeatures)335 void AMDGPUPropagateAttributes::setFeatures(Function &F,
336 const FeatureBitset &NewFeatures) {
337 std::string NewFeatureStr = getFeatureString(NewFeatures);
338
339 LLVM_DEBUG(dbgs() << "Set features "
340 << getFeatureString(NewFeatures & TargetFeatures)
341 << " on " << F.getName() << '\n');
342
343 F.removeFnAttr("target-features");
344 F.addFnAttr("target-features", NewFeatureStr);
345 }
346
setAttributes(Function & F,const ArrayRef<std::optional<Attribute>> NewAttrs)347 void AMDGPUPropagateAttributes::setAttributes(
348 Function &F, const ArrayRef<std::optional<Attribute>> NewAttrs) {
349 LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
350 for (unsigned I = 0; I < NumAttr; ++I) {
351 F.removeFnAttr(AttributeNames[I]);
352 if (NewAttrs[I]) {
353 LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
354 F.addFnAttr(*NewAttrs[I]);
355 }
356 }
357 }
358
359 std::string
getFeatureString(const FeatureBitset & Features) const360 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
361 {
362 std::string Ret;
363 for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
364 if (Features[KV.Value])
365 Ret += (StringRef("+") + KV.Key + ",").str();
366 else if (TargetFeatures[KV.Value])
367 Ret += (StringRef("-") + KV.Key + ",").str();
368 }
369 Ret.pop_back(); // Remove last comma.
370 return Ret;
371 }
372
runOnFunction(Function & F)373 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
374 if (!TM) {
375 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
376 if (!TPC)
377 return false;
378
379 TM = &TPC->getTM<TargetMachine>();
380 }
381
382 if (!AMDGPU::isKernel(F.getCallingConv()))
383 return false;
384
385 return AMDGPUPropagateAttributes(TM, false).process(F);
386 }
387
runOnModule(Module & M)388 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
389 if (!TM) {
390 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
391 if (!TPC)
392 return false;
393
394 TM = &TPC->getTM<TargetMachine>();
395 }
396
397 return AMDGPUPropagateAttributes(TM, true).process(M);
398 }
399
400 FunctionPass
createAMDGPUPropagateAttributesEarlyPass(const TargetMachine * TM)401 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
402 return new AMDGPUPropagateAttributesEarly(TM);
403 }
404
405 ModulePass
createAMDGPUPropagateAttributesLatePass(const TargetMachine * TM)406 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
407 return new AMDGPUPropagateAttributesLate(TM);
408 }
409
410 PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)411 AMDGPUPropagateAttributesEarlyPass::run(Function &F,
412 FunctionAnalysisManager &AM) {
413 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
414 return PreservedAnalyses::all();
415
416 return AMDGPUPropagateAttributes(&TM, false).process(F)
417 ? PreservedAnalyses::none()
418 : PreservedAnalyses::all();
419 }
420
421 PreservedAnalyses
run(Module & M,ModuleAnalysisManager & AM)422 AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) {
423 return AMDGPUPropagateAttributes(&TM, true).process(M)
424 ? PreservedAnalyses::none()
425 : PreservedAnalyses::all();
426 }
427