1 //===- ValueEnumerator.cpp - Number values and types for bitcode writer ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ValueEnumerator class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "ValueEnumerator.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/Config/llvm-config.h"
16 #include "llvm/IR/Argument.h"
17 #include "llvm/IR/BasicBlock.h"
18 #include "llvm/IR/Constant.h"
19 #include "llvm/IR/DebugInfoMetadata.h"
20 #include "llvm/IR/DerivedTypes.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/GlobalAlias.h"
23 #include "llvm/IR/GlobalIFunc.h"
24 #include "llvm/IR/GlobalObject.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/Metadata.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/IR/Operator.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Use.h"
34 #include "llvm/IR/User.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/IR/ValueSymbolTable.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/Compiler.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/MathExtras.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include <algorithm>
43 #include <cstddef>
44 #include <iterator>
45 #include <tuple>
46
47 using namespace llvm;
48
49 namespace {
50
51 struct OrderMap {
52 DenseMap<const Value *, std::pair<unsigned, bool>> IDs;
53 unsigned LastGlobalValueID = 0;
54
55 OrderMap() = default;
56
isGlobalValue__anon4598129b0111::OrderMap57 bool isGlobalValue(unsigned ID) const {
58 return ID <= LastGlobalValueID;
59 }
60
size__anon4598129b0111::OrderMap61 unsigned size() const { return IDs.size(); }
operator []__anon4598129b0111::OrderMap62 std::pair<unsigned, bool> &operator[](const Value *V) { return IDs[V]; }
63
lookup__anon4598129b0111::OrderMap64 std::pair<unsigned, bool> lookup(const Value *V) const {
65 return IDs.lookup(V);
66 }
67
index__anon4598129b0111::OrderMap68 void index(const Value *V) {
69 // Explicitly sequence get-size and insert-value operations to avoid UB.
70 unsigned ID = IDs.size() + 1;
71 IDs[V].first = ID;
72 }
73 };
74
75 } // end anonymous namespace
76
orderValue(const Value * V,OrderMap & OM)77 static void orderValue(const Value *V, OrderMap &OM) {
78 if (OM.lookup(V).first)
79 return;
80
81 if (const Constant *C = dyn_cast<Constant>(V)) {
82 if (C->getNumOperands()) {
83 for (const Value *Op : C->operands())
84 if (!isa<BasicBlock>(Op) && !isa<GlobalValue>(Op))
85 orderValue(Op, OM);
86 if (auto *CE = dyn_cast<ConstantExpr>(C))
87 if (CE->getOpcode() == Instruction::ShuffleVector)
88 orderValue(CE->getShuffleMaskForBitcode(), OM);
89 }
90 }
91
92 // Note: we cannot cache this lookup above, since inserting into the map
93 // changes the map's size, and thus affects the other IDs.
94 OM.index(V);
95 }
96
orderModule(const Module & M)97 static OrderMap orderModule(const Module &M) {
98 // This needs to match the order used by ValueEnumerator::ValueEnumerator()
99 // and ValueEnumerator::incorporateFunction().
100 OrderMap OM;
101
102 // Initializers of GlobalValues are processed in
103 // BitcodeReader::ResolveGlobalAndAliasInits(). Match the order there rather
104 // than ValueEnumerator, and match the code in predictValueUseListOrderImpl()
105 // by giving IDs in reverse order.
106 //
107 // Since GlobalValues never reference each other directly (just through
108 // initializers), their relative IDs only matter for determining order of
109 // uses in their initializers.
110 for (const GlobalVariable &G : reverse(M.globals()))
111 orderValue(&G, OM);
112 for (const GlobalAlias &A : reverse(M.aliases()))
113 orderValue(&A, OM);
114 for (const GlobalIFunc &I : reverse(M.ifuncs()))
115 orderValue(&I, OM);
116 for (const Function &F : reverse(M))
117 orderValue(&F, OM);
118 OM.LastGlobalValueID = OM.size();
119
120 auto orderConstantValue = [&OM](const Value *V) {
121 if (isa<Constant>(V) || isa<InlineAsm>(V))
122 orderValue(V, OM);
123 };
124
125 for (const Function &F : M) {
126 if (F.isDeclaration())
127 continue;
128 // Here we need to match the union of ValueEnumerator::incorporateFunction()
129 // and WriteFunction(). Basic blocks are implicitly declared before
130 // anything else (by declaring their size).
131 for (const BasicBlock &BB : F)
132 orderValue(&BB, OM);
133
134 // Metadata used by instructions is decoded before the actual instructions,
135 // so visit any constants used by it beforehand.
136 for (const BasicBlock &BB : F)
137 for (const Instruction &I : BB)
138 for (const Value *V : I.operands()) {
139 if (const auto *MAV = dyn_cast<MetadataAsValue>(V)) {
140 if (const auto *VAM =
141 dyn_cast<ValueAsMetadata>(MAV->getMetadata())) {
142 orderConstantValue(VAM->getValue());
143 } else if (const auto *AL =
144 dyn_cast<DIArgList>(MAV->getMetadata())) {
145 for (const auto *VAM : AL->getArgs())
146 orderConstantValue(VAM->getValue());
147 }
148 }
149 }
150
151 for (const Argument &A : F.args())
152 orderValue(&A, OM);
153 for (const BasicBlock &BB : F)
154 for (const Instruction &I : BB) {
155 for (const Value *Op : I.operands())
156 orderConstantValue(Op);
157 if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
158 orderValue(SVI->getShuffleMaskForBitcode(), OM);
159 orderValue(&I, OM);
160 }
161 }
162 return OM;
163 }
164
predictValueUseListOrderImpl(const Value * V,const Function * F,unsigned ID,const OrderMap & OM,UseListOrderStack & Stack)165 static void predictValueUseListOrderImpl(const Value *V, const Function *F,
166 unsigned ID, const OrderMap &OM,
167 UseListOrderStack &Stack) {
168 // Predict use-list order for this one.
169 using Entry = std::pair<const Use *, unsigned>;
170 SmallVector<Entry, 64> List;
171 for (const Use &U : V->uses())
172 // Check if this user will be serialized.
173 if (OM.lookup(U.getUser()).first)
174 List.push_back(std::make_pair(&U, List.size()));
175
176 if (List.size() < 2)
177 // We may have lost some users.
178 return;
179
180 bool IsGlobalValue = OM.isGlobalValue(ID);
181 llvm::sort(List, [&](const Entry &L, const Entry &R) {
182 const Use *LU = L.first;
183 const Use *RU = R.first;
184 if (LU == RU)
185 return false;
186
187 auto LID = OM.lookup(LU->getUser()).first;
188 auto RID = OM.lookup(RU->getUser()).first;
189
190 // If ID is 4, then expect: 7 6 5 1 2 3.
191 if (LID < RID) {
192 if (RID <= ID)
193 if (!IsGlobalValue) // GlobalValue uses don't get reversed.
194 return true;
195 return false;
196 }
197 if (RID < LID) {
198 if (LID <= ID)
199 if (!IsGlobalValue) // GlobalValue uses don't get reversed.
200 return false;
201 return true;
202 }
203
204 // LID and RID are equal, so we have different operands of the same user.
205 // Assume operands are added in order for all instructions.
206 if (LID <= ID)
207 if (!IsGlobalValue) // GlobalValue uses don't get reversed.
208 return LU->getOperandNo() < RU->getOperandNo();
209 return LU->getOperandNo() > RU->getOperandNo();
210 });
211
212 if (llvm::is_sorted(List, llvm::less_second()))
213 // Order is already correct.
214 return;
215
216 // Store the shuffle.
217 Stack.emplace_back(V, F, List.size());
218 assert(List.size() == Stack.back().Shuffle.size() && "Wrong size");
219 for (size_t I = 0, E = List.size(); I != E; ++I)
220 Stack.back().Shuffle[I] = List[I].second;
221 }
222
predictValueUseListOrder(const Value * V,const Function * F,OrderMap & OM,UseListOrderStack & Stack)223 static void predictValueUseListOrder(const Value *V, const Function *F,
224 OrderMap &OM, UseListOrderStack &Stack) {
225 auto &IDPair = OM[V];
226 assert(IDPair.first && "Unmapped value");
227 if (IDPair.second)
228 // Already predicted.
229 return;
230
231 // Do the actual prediction.
232 IDPair.second = true;
233 if (!V->use_empty() && std::next(V->use_begin()) != V->use_end())
234 predictValueUseListOrderImpl(V, F, IDPair.first, OM, Stack);
235
236 // Recursive descent into constants.
237 if (const Constant *C = dyn_cast<Constant>(V)) {
238 if (C->getNumOperands()) { // Visit GlobalValues.
239 for (const Value *Op : C->operands())
240 if (isa<Constant>(Op)) // Visit GlobalValues.
241 predictValueUseListOrder(Op, F, OM, Stack);
242 if (auto *CE = dyn_cast<ConstantExpr>(C))
243 if (CE->getOpcode() == Instruction::ShuffleVector)
244 predictValueUseListOrder(CE->getShuffleMaskForBitcode(), F, OM,
245 Stack);
246 }
247 }
248 }
249
predictUseListOrder(const Module & M)250 static UseListOrderStack predictUseListOrder(const Module &M) {
251 OrderMap OM = orderModule(M);
252
253 // Use-list orders need to be serialized after all the users have been added
254 // to a value, or else the shuffles will be incomplete. Store them per
255 // function in a stack.
256 //
257 // Aside from function order, the order of values doesn't matter much here.
258 UseListOrderStack Stack;
259
260 // We want to visit the functions backward now so we can list function-local
261 // constants in the last Function they're used in. Module-level constants
262 // have already been visited above.
263 for (const Function &F : llvm::reverse(M)) {
264 if (F.isDeclaration())
265 continue;
266 for (const BasicBlock &BB : F)
267 predictValueUseListOrder(&BB, &F, OM, Stack);
268 for (const Argument &A : F.args())
269 predictValueUseListOrder(&A, &F, OM, Stack);
270 for (const BasicBlock &BB : F)
271 for (const Instruction &I : BB) {
272 for (const Value *Op : I.operands()) {
273 if (isa<Constant>(*Op) || isa<InlineAsm>(*Op)) // Visit GlobalValues.
274 predictValueUseListOrder(Op, &F, OM, Stack);
275 if (const auto *MAV = dyn_cast<MetadataAsValue>(Op)) {
276 if (const auto *VAM =
277 dyn_cast<ValueAsMetadata>(MAV->getMetadata())) {
278 predictValueUseListOrder(VAM->getValue(), &F, OM, Stack);
279 } else if (const auto *AL =
280 dyn_cast<DIArgList>(MAV->getMetadata())) {
281 for (const auto *VAM : AL->getArgs())
282 predictValueUseListOrder(VAM->getValue(), &F, OM, Stack);
283 }
284 }
285 }
286 if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
287 predictValueUseListOrder(SVI->getShuffleMaskForBitcode(), &F, OM,
288 Stack);
289 predictValueUseListOrder(&I, &F, OM, Stack);
290 }
291 }
292
293 // Visit globals last, since the module-level use-list block will be seen
294 // before the function bodies are processed.
295 for (const GlobalVariable &G : M.globals())
296 predictValueUseListOrder(&G, nullptr, OM, Stack);
297 for (const Function &F : M)
298 predictValueUseListOrder(&F, nullptr, OM, Stack);
299 for (const GlobalAlias &A : M.aliases())
300 predictValueUseListOrder(&A, nullptr, OM, Stack);
301 for (const GlobalIFunc &I : M.ifuncs())
302 predictValueUseListOrder(&I, nullptr, OM, Stack);
303 for (const GlobalVariable &G : M.globals())
304 if (G.hasInitializer())
305 predictValueUseListOrder(G.getInitializer(), nullptr, OM, Stack);
306 for (const GlobalAlias &A : M.aliases())
307 predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack);
308 for (const GlobalIFunc &I : M.ifuncs())
309 predictValueUseListOrder(I.getResolver(), nullptr, OM, Stack);
310 for (const Function &F : M) {
311 for (const Use &U : F.operands())
312 predictValueUseListOrder(U.get(), nullptr, OM, Stack);
313 }
314
315 return Stack;
316 }
317
isIntOrIntVectorValue(const std::pair<const Value *,unsigned> & V)318 static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
319 return V.first->getType()->isIntOrIntVectorTy();
320 }
321
ValueEnumerator(const Module & M,bool ShouldPreserveUseListOrder)322 ValueEnumerator::ValueEnumerator(const Module &M,
323 bool ShouldPreserveUseListOrder)
324 : ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
325 if (ShouldPreserveUseListOrder)
326 UseListOrders = predictUseListOrder(M);
327
328 // Enumerate the global variables.
329 for (const GlobalVariable &GV : M.globals()) {
330 EnumerateValue(&GV);
331 EnumerateType(GV.getValueType());
332 }
333
334 // Enumerate the functions.
335 for (const Function & F : M) {
336 EnumerateValue(&F);
337 EnumerateType(F.getValueType());
338 EnumerateAttributes(F.getAttributes());
339 }
340
341 // Enumerate the aliases.
342 for (const GlobalAlias &GA : M.aliases()) {
343 EnumerateValue(&GA);
344 EnumerateType(GA.getValueType());
345 }
346
347 // Enumerate the ifuncs.
348 for (const GlobalIFunc &GIF : M.ifuncs()) {
349 EnumerateValue(&GIF);
350 EnumerateType(GIF.getValueType());
351 }
352
353 // Remember what is the cutoff between globalvalue's and other constants.
354 unsigned FirstConstant = Values.size();
355
356 // Enumerate the global variable initializers and attributes.
357 for (const GlobalVariable &GV : M.globals()) {
358 if (GV.hasInitializer())
359 EnumerateValue(GV.getInitializer());
360 if (GV.hasAttributes())
361 EnumerateAttributes(GV.getAttributesAsList(AttributeList::FunctionIndex));
362 }
363
364 // Enumerate the aliasees.
365 for (const GlobalAlias &GA : M.aliases())
366 EnumerateValue(GA.getAliasee());
367
368 // Enumerate the ifunc resolvers.
369 for (const GlobalIFunc &GIF : M.ifuncs())
370 EnumerateValue(GIF.getResolver());
371
372 // Enumerate any optional Function data.
373 for (const Function &F : M)
374 for (const Use &U : F.operands())
375 EnumerateValue(U.get());
376
377 // Enumerate the metadata type.
378 //
379 // TODO: Move this to ValueEnumerator::EnumerateOperandType() once bitcode
380 // only encodes the metadata type when it's used as a value.
381 EnumerateType(Type::getMetadataTy(M.getContext()));
382
383 // Insert constants and metadata that are named at module level into the slot
384 // pool so that the module symbol table can refer to them...
385 EnumerateValueSymbolTable(M.getValueSymbolTable());
386 EnumerateNamedMetadata(M);
387
388 SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
389 for (const GlobalVariable &GV : M.globals()) {
390 MDs.clear();
391 GV.getAllMetadata(MDs);
392 for (const auto &I : MDs)
393 // FIXME: Pass GV to EnumerateMetadata and arrange for the bitcode writer
394 // to write metadata to the global variable's own metadata block
395 // (PR28134).
396 EnumerateMetadata(nullptr, I.second);
397 }
398
399 // Enumerate types used by function bodies and argument lists.
400 for (const Function &F : M) {
401 for (const Argument &A : F.args())
402 EnumerateType(A.getType());
403
404 // Enumerate metadata attached to this function.
405 MDs.clear();
406 F.getAllMetadata(MDs);
407 for (const auto &I : MDs)
408 EnumerateMetadata(F.isDeclaration() ? nullptr : &F, I.second);
409
410 for (const BasicBlock &BB : F)
411 for (const Instruction &I : BB) {
412 for (const Use &Op : I.operands()) {
413 auto *MD = dyn_cast<MetadataAsValue>(&Op);
414 if (!MD) {
415 EnumerateOperandType(Op);
416 continue;
417 }
418
419 // Local metadata is enumerated during function-incorporation, but
420 // any ConstantAsMetadata arguments in a DIArgList should be examined
421 // now.
422 if (isa<LocalAsMetadata>(MD->getMetadata()))
423 continue;
424 if (auto *AL = dyn_cast<DIArgList>(MD->getMetadata())) {
425 for (auto *VAM : AL->getArgs())
426 if (isa<ConstantAsMetadata>(VAM))
427 EnumerateMetadata(&F, VAM);
428 continue;
429 }
430
431 EnumerateMetadata(&F, MD->getMetadata());
432 }
433 if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
434 EnumerateType(SVI->getShuffleMaskForBitcode()->getType());
435 if (auto *GEP = dyn_cast<GetElementPtrInst>(&I))
436 EnumerateType(GEP->getSourceElementType());
437 if (auto *AI = dyn_cast<AllocaInst>(&I))
438 EnumerateType(AI->getAllocatedType());
439 EnumerateType(I.getType());
440 if (const auto *Call = dyn_cast<CallBase>(&I)) {
441 EnumerateAttributes(Call->getAttributes());
442 EnumerateType(Call->getFunctionType());
443 }
444
445 // Enumerate metadata attached with this instruction.
446 MDs.clear();
447 I.getAllMetadataOtherThanDebugLoc(MDs);
448 for (unsigned i = 0, e = MDs.size(); i != e; ++i)
449 EnumerateMetadata(&F, MDs[i].second);
450
451 // Don't enumerate the location directly -- it has a special record
452 // type -- but enumerate its operands.
453 if (DILocation *L = I.getDebugLoc())
454 for (const Metadata *Op : L->operands())
455 EnumerateMetadata(&F, Op);
456 }
457 }
458
459 // Optimize constant ordering.
460 OptimizeConstants(FirstConstant, Values.size());
461
462 // Organize metadata ordering.
463 organizeMetadata();
464 }
465
getInstructionID(const Instruction * Inst) const466 unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
467 InstructionMapType::const_iterator I = InstructionMap.find(Inst);
468 assert(I != InstructionMap.end() && "Instruction is not mapped!");
469 return I->second;
470 }
471
getComdatID(const Comdat * C) const472 unsigned ValueEnumerator::getComdatID(const Comdat *C) const {
473 unsigned ComdatID = Comdats.idFor(C);
474 assert(ComdatID && "Comdat not found!");
475 return ComdatID;
476 }
477
setInstructionID(const Instruction * I)478 void ValueEnumerator::setInstructionID(const Instruction *I) {
479 InstructionMap[I] = InstructionCount++;
480 }
481
getValueID(const Value * V) const482 unsigned ValueEnumerator::getValueID(const Value *V) const {
483 if (auto *MD = dyn_cast<MetadataAsValue>(V))
484 return getMetadataID(MD->getMetadata());
485
486 ValueMapType::const_iterator I = ValueMap.find(V);
487 assert(I != ValueMap.end() && "Value not in slotcalculator!");
488 return I->second-1;
489 }
490
491 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const492 LLVM_DUMP_METHOD void ValueEnumerator::dump() const {
493 print(dbgs(), ValueMap, "Default");
494 dbgs() << '\n';
495 print(dbgs(), MetadataMap, "MetaData");
496 dbgs() << '\n';
497 }
498 #endif
499
print(raw_ostream & OS,const ValueMapType & Map,const char * Name) const500 void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
501 const char *Name) const {
502 OS << "Map Name: " << Name << "\n";
503 OS << "Size: " << Map.size() << "\n";
504 for (const auto &I : Map) {
505 const Value *V = I.first;
506 if (V->hasName())
507 OS << "Value: " << V->getName();
508 else
509 OS << "Value: [null]\n";
510 V->print(errs());
511 errs() << '\n';
512
513 OS << " Uses(" << V->getNumUses() << "):";
514 for (const Use &U : V->uses()) {
515 if (&U != &*V->use_begin())
516 OS << ",";
517 if(U->hasName())
518 OS << " " << U->getName();
519 else
520 OS << " [null]";
521
522 }
523 OS << "\n\n";
524 }
525 }
526
print(raw_ostream & OS,const MetadataMapType & Map,const char * Name) const527 void ValueEnumerator::print(raw_ostream &OS, const MetadataMapType &Map,
528 const char *Name) const {
529 OS << "Map Name: " << Name << "\n";
530 OS << "Size: " << Map.size() << "\n";
531 for (const auto &I : Map) {
532 const Metadata *MD = I.first;
533 OS << "Metadata: slot = " << I.second.ID << "\n";
534 OS << "Metadata: function = " << I.second.F << "\n";
535 MD->print(OS);
536 OS << "\n";
537 }
538 }
539
540 /// OptimizeConstants - Reorder constant pool for denser encoding.
OptimizeConstants(unsigned CstStart,unsigned CstEnd)541 void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
542 if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
543
544 if (ShouldPreserveUseListOrder)
545 // Optimizing constants makes the use-list order difficult to predict.
546 // Disable it for now when trying to preserve the order.
547 return;
548
549 std::stable_sort(Values.begin() + CstStart, Values.begin() + CstEnd,
550 [this](const std::pair<const Value *, unsigned> &LHS,
551 const std::pair<const Value *, unsigned> &RHS) {
552 // Sort by plane.
553 if (LHS.first->getType() != RHS.first->getType())
554 return getTypeID(LHS.first->getType()) < getTypeID(RHS.first->getType());
555 // Then by frequency.
556 return LHS.second > RHS.second;
557 });
558
559 // Ensure that integer and vector of integer constants are at the start of the
560 // constant pool. This is important so that GEP structure indices come before
561 // gep constant exprs.
562 std::stable_partition(Values.begin() + CstStart, Values.begin() + CstEnd,
563 isIntOrIntVectorValue);
564
565 // Rebuild the modified portion of ValueMap.
566 for (; CstStart != CstEnd; ++CstStart)
567 ValueMap[Values[CstStart].first] = CstStart+1;
568 }
569
570 /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
571 /// table into the values table.
EnumerateValueSymbolTable(const ValueSymbolTable & VST)572 void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
573 for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
574 VI != VE; ++VI)
575 EnumerateValue(VI->getValue());
576 }
577
578 /// Insert all of the values referenced by named metadata in the specified
579 /// module.
EnumerateNamedMetadata(const Module & M)580 void ValueEnumerator::EnumerateNamedMetadata(const Module &M) {
581 for (const auto &I : M.named_metadata())
582 EnumerateNamedMDNode(&I);
583 }
584
EnumerateNamedMDNode(const NamedMDNode * MD)585 void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
586 for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
587 EnumerateMetadata(nullptr, MD->getOperand(i));
588 }
589
getMetadataFunctionID(const Function * F) const590 unsigned ValueEnumerator::getMetadataFunctionID(const Function *F) const {
591 return F ? getValueID(F) + 1 : 0;
592 }
593
EnumerateMetadata(const Function * F,const Metadata * MD)594 void ValueEnumerator::EnumerateMetadata(const Function *F, const Metadata *MD) {
595 EnumerateMetadata(getMetadataFunctionID(F), MD);
596 }
597
EnumerateFunctionLocalMetadata(const Function & F,const LocalAsMetadata * Local)598 void ValueEnumerator::EnumerateFunctionLocalMetadata(
599 const Function &F, const LocalAsMetadata *Local) {
600 EnumerateFunctionLocalMetadata(getMetadataFunctionID(&F), Local);
601 }
602
EnumerateFunctionLocalListMetadata(const Function & F,const DIArgList * ArgList)603 void ValueEnumerator::EnumerateFunctionLocalListMetadata(
604 const Function &F, const DIArgList *ArgList) {
605 EnumerateFunctionLocalListMetadata(getMetadataFunctionID(&F), ArgList);
606 }
607
dropFunctionFromMetadata(MetadataMapType::value_type & FirstMD)608 void ValueEnumerator::dropFunctionFromMetadata(
609 MetadataMapType::value_type &FirstMD) {
610 SmallVector<const MDNode *, 64> Worklist;
611 auto push = [&Worklist](MetadataMapType::value_type &MD) {
612 auto &Entry = MD.second;
613
614 // Nothing to do if this metadata isn't tagged.
615 if (!Entry.F)
616 return;
617
618 // Drop the function tag.
619 Entry.F = 0;
620
621 // If this is has an ID and is an MDNode, then its operands have entries as
622 // well. We need to drop the function from them too.
623 if (Entry.ID)
624 if (auto *N = dyn_cast<MDNode>(MD.first))
625 Worklist.push_back(N);
626 };
627 push(FirstMD);
628 while (!Worklist.empty())
629 for (const Metadata *Op : Worklist.pop_back_val()->operands()) {
630 if (!Op)
631 continue;
632 auto MD = MetadataMap.find(Op);
633 if (MD != MetadataMap.end())
634 push(*MD);
635 }
636 }
637
EnumerateMetadata(unsigned F,const Metadata * MD)638 void ValueEnumerator::EnumerateMetadata(unsigned F, const Metadata *MD) {
639 // It's vital for reader efficiency that uniqued subgraphs are done in
640 // post-order; it's expensive when their operands have forward references.
641 // If a distinct node is referenced from a uniqued node, it'll be delayed
642 // until the uniqued subgraph has been completely traversed.
643 SmallVector<const MDNode *, 32> DelayedDistinctNodes;
644
645 // Start by enumerating MD, and then work through its transitive operands in
646 // post-order. This requires a depth-first search.
647 SmallVector<std::pair<const MDNode *, MDNode::op_iterator>, 32> Worklist;
648 if (const MDNode *N = enumerateMetadataImpl(F, MD))
649 Worklist.push_back(std::make_pair(N, N->op_begin()));
650
651 while (!Worklist.empty()) {
652 const MDNode *N = Worklist.back().first;
653
654 // Enumerate operands until we hit a new node. We need to traverse these
655 // nodes' operands before visiting the rest of N's operands.
656 MDNode::op_iterator I = std::find_if(
657 Worklist.back().second, N->op_end(),
658 [&](const Metadata *MD) { return enumerateMetadataImpl(F, MD); });
659 if (I != N->op_end()) {
660 auto *Op = cast<MDNode>(*I);
661 Worklist.back().second = ++I;
662
663 // Delay traversing Op if it's a distinct node and N is uniqued.
664 if (Op->isDistinct() && !N->isDistinct())
665 DelayedDistinctNodes.push_back(Op);
666 else
667 Worklist.push_back(std::make_pair(Op, Op->op_begin()));
668 continue;
669 }
670
671 // All the operands have been visited. Now assign an ID.
672 Worklist.pop_back();
673 MDs.push_back(N);
674 MetadataMap[N].ID = MDs.size();
675
676 // Flush out any delayed distinct nodes; these are all the distinct nodes
677 // that are leaves in last uniqued subgraph.
678 if (Worklist.empty() || Worklist.back().first->isDistinct()) {
679 for (const MDNode *N : DelayedDistinctNodes)
680 Worklist.push_back(std::make_pair(N, N->op_begin()));
681 DelayedDistinctNodes.clear();
682 }
683 }
684 }
685
enumerateMetadataImpl(unsigned F,const Metadata * MD)686 const MDNode *ValueEnumerator::enumerateMetadataImpl(unsigned F, const Metadata *MD) {
687 if (!MD)
688 return nullptr;
689
690 assert(
691 (isa<MDNode>(MD) || isa<MDString>(MD) || isa<ConstantAsMetadata>(MD)) &&
692 "Invalid metadata kind");
693
694 auto Insertion = MetadataMap.insert(std::make_pair(MD, MDIndex(F)));
695 MDIndex &Entry = Insertion.first->second;
696 if (!Insertion.second) {
697 // Already mapped. If F doesn't match the function tag, drop it.
698 if (Entry.hasDifferentFunction(F))
699 dropFunctionFromMetadata(*Insertion.first);
700 return nullptr;
701 }
702
703 // Don't assign IDs to metadata nodes.
704 if (auto *N = dyn_cast<MDNode>(MD))
705 return N;
706
707 // Save the metadata.
708 MDs.push_back(MD);
709 Entry.ID = MDs.size();
710
711 // Enumerate the constant, if any.
712 if (auto *C = dyn_cast<ConstantAsMetadata>(MD))
713 EnumerateValue(C->getValue());
714
715 return nullptr;
716 }
717
718 /// EnumerateFunctionLocalMetadata - Incorporate function-local metadata
719 /// information reachable from the metadata.
EnumerateFunctionLocalMetadata(unsigned F,const LocalAsMetadata * Local)720 void ValueEnumerator::EnumerateFunctionLocalMetadata(
721 unsigned F, const LocalAsMetadata *Local) {
722 assert(F && "Expected a function");
723
724 // Check to see if it's already in!
725 MDIndex &Index = MetadataMap[Local];
726 if (Index.ID) {
727 assert(Index.F == F && "Expected the same function");
728 return;
729 }
730
731 MDs.push_back(Local);
732 Index.F = F;
733 Index.ID = MDs.size();
734
735 EnumerateValue(Local->getValue());
736 }
737
738 /// EnumerateFunctionLocalListMetadata - Incorporate function-local metadata
739 /// information reachable from the metadata.
EnumerateFunctionLocalListMetadata(unsigned F,const DIArgList * ArgList)740 void ValueEnumerator::EnumerateFunctionLocalListMetadata(
741 unsigned F, const DIArgList *ArgList) {
742 assert(F && "Expected a function");
743
744 // Check to see if it's already in!
745 MDIndex &Index = MetadataMap[ArgList];
746 if (Index.ID) {
747 assert(Index.F == F && "Expected the same function");
748 return;
749 }
750
751 for (ValueAsMetadata *VAM : ArgList->getArgs()) {
752 if (isa<LocalAsMetadata>(VAM)) {
753 assert(MetadataMap.count(VAM) &&
754 "LocalAsMetadata should be enumerated before DIArgList");
755 assert(MetadataMap[VAM].F == F &&
756 "Expected LocalAsMetadata in the same function");
757 } else {
758 assert(isa<ConstantAsMetadata>(VAM) &&
759 "Expected LocalAsMetadata or ConstantAsMetadata");
760 assert(ValueMap.count(VAM->getValue()) &&
761 "Constant should be enumerated beforeDIArgList");
762 EnumerateMetadata(F, VAM);
763 }
764 }
765
766 MDs.push_back(ArgList);
767 Index.F = F;
768 Index.ID = MDs.size();
769 }
770
getMetadataTypeOrder(const Metadata * MD)771 static unsigned getMetadataTypeOrder(const Metadata *MD) {
772 // Strings are emitted in bulk and must come first.
773 if (isa<MDString>(MD))
774 return 0;
775
776 // ConstantAsMetadata doesn't reference anything. We may as well shuffle it
777 // to the front since we can detect it.
778 auto *N = dyn_cast<MDNode>(MD);
779 if (!N)
780 return 1;
781
782 // The reader is fast forward references for distinct node operands, but slow
783 // when uniqued operands are unresolved.
784 return N->isDistinct() ? 2 : 3;
785 }
786
organizeMetadata()787 void ValueEnumerator::organizeMetadata() {
788 assert(MetadataMap.size() == MDs.size() &&
789 "Metadata map and vector out of sync");
790
791 if (MDs.empty())
792 return;
793
794 // Copy out the index information from MetadataMap in order to choose a new
795 // order.
796 SmallVector<MDIndex, 64> Order;
797 Order.reserve(MetadataMap.size());
798 for (const Metadata *MD : MDs)
799 Order.push_back(MetadataMap.lookup(MD));
800
801 // Partition:
802 // - by function, then
803 // - by isa<MDString>
804 // and then sort by the original/current ID. Since the IDs are guaranteed to
805 // be unique, the result of llvm::sort will be deterministic. There's no need
806 // for std::stable_sort.
807 llvm::sort(Order, [this](MDIndex LHS, MDIndex RHS) {
808 return std::make_tuple(LHS.F, getMetadataTypeOrder(LHS.get(MDs)), LHS.ID) <
809 std::make_tuple(RHS.F, getMetadataTypeOrder(RHS.get(MDs)), RHS.ID);
810 });
811
812 // Rebuild MDs, index the metadata ranges for each function in FunctionMDs,
813 // and fix up MetadataMap.
814 std::vector<const Metadata *> OldMDs;
815 MDs.swap(OldMDs);
816 MDs.reserve(OldMDs.size());
817 for (unsigned I = 0, E = Order.size(); I != E && !Order[I].F; ++I) {
818 auto *MD = Order[I].get(OldMDs);
819 MDs.push_back(MD);
820 MetadataMap[MD].ID = I + 1;
821 if (isa<MDString>(MD))
822 ++NumMDStrings;
823 }
824
825 // Return early if there's nothing for the functions.
826 if (MDs.size() == Order.size())
827 return;
828
829 // Build the function metadata ranges.
830 MDRange R;
831 FunctionMDs.reserve(OldMDs.size());
832 unsigned PrevF = 0;
833 for (unsigned I = MDs.size(), E = Order.size(), ID = MDs.size(); I != E;
834 ++I) {
835 unsigned F = Order[I].F;
836 if (!PrevF) {
837 PrevF = F;
838 } else if (PrevF != F) {
839 R.Last = FunctionMDs.size();
840 std::swap(R, FunctionMDInfo[PrevF]);
841 R.First = FunctionMDs.size();
842
843 ID = MDs.size();
844 PrevF = F;
845 }
846
847 auto *MD = Order[I].get(OldMDs);
848 FunctionMDs.push_back(MD);
849 MetadataMap[MD].ID = ++ID;
850 if (isa<MDString>(MD))
851 ++R.NumStrings;
852 }
853 R.Last = FunctionMDs.size();
854 FunctionMDInfo[PrevF] = R;
855 }
856
incorporateFunctionMetadata(const Function & F)857 void ValueEnumerator::incorporateFunctionMetadata(const Function &F) {
858 NumModuleMDs = MDs.size();
859
860 auto R = FunctionMDInfo.lookup(getValueID(&F) + 1);
861 NumMDStrings = R.NumStrings;
862 MDs.insert(MDs.end(), FunctionMDs.begin() + R.First,
863 FunctionMDs.begin() + R.Last);
864 }
865
EnumerateValue(const Value * V)866 void ValueEnumerator::EnumerateValue(const Value *V) {
867 assert(!V->getType()->isVoidTy() && "Can't insert void values!");
868 assert(!isa<MetadataAsValue>(V) && "EnumerateValue doesn't handle Metadata!");
869
870 // Check to see if it's already in!
871 unsigned &ValueID = ValueMap[V];
872 if (ValueID) {
873 // Increment use count.
874 Values[ValueID-1].second++;
875 return;
876 }
877
878 if (auto *GO = dyn_cast<GlobalObject>(V))
879 if (const Comdat *C = GO->getComdat())
880 Comdats.insert(C);
881
882 // Enumerate the type of this value.
883 EnumerateType(V->getType());
884
885 if (const Constant *C = dyn_cast<Constant>(V)) {
886 if (isa<GlobalValue>(C)) {
887 // Initializers for globals are handled explicitly elsewhere.
888 } else if (C->getNumOperands()) {
889 // If a constant has operands, enumerate them. This makes sure that if a
890 // constant has uses (for example an array of const ints), that they are
891 // inserted also.
892
893 // We prefer to enumerate them with values before we enumerate the user
894 // itself. This makes it more likely that we can avoid forward references
895 // in the reader. We know that there can be no cycles in the constants
896 // graph that don't go through a global variable.
897 for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
898 I != E; ++I)
899 if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
900 EnumerateValue(*I);
901 if (auto *CE = dyn_cast<ConstantExpr>(C)) {
902 if (CE->getOpcode() == Instruction::ShuffleVector)
903 EnumerateValue(CE->getShuffleMaskForBitcode());
904 if (auto *GEP = dyn_cast<GEPOperator>(CE))
905 EnumerateType(GEP->getSourceElementType());
906 }
907
908 // Finally, add the value. Doing this could make the ValueID reference be
909 // dangling, don't reuse it.
910 Values.push_back(std::make_pair(V, 1U));
911 ValueMap[V] = Values.size();
912 return;
913 }
914 }
915
916 // Add the value.
917 Values.push_back(std::make_pair(V, 1U));
918 ValueID = Values.size();
919 }
920
921
EnumerateType(Type * Ty)922 void ValueEnumerator::EnumerateType(Type *Ty) {
923 unsigned *TypeID = &TypeMap[Ty];
924
925 // We've already seen this type.
926 if (*TypeID)
927 return;
928
929 // If it is a non-anonymous struct, mark the type as being visited so that we
930 // don't recursively visit it. This is safe because we allow forward
931 // references of these in the bitcode reader.
932 if (StructType *STy = dyn_cast<StructType>(Ty))
933 if (!STy->isLiteral())
934 *TypeID = ~0U;
935
936 // Enumerate all of the subtypes before we enumerate this type. This ensures
937 // that the type will be enumerated in an order that can be directly built.
938 for (Type *SubTy : Ty->subtypes())
939 EnumerateType(SubTy);
940
941 // Refresh the TypeID pointer in case the table rehashed.
942 TypeID = &TypeMap[Ty];
943
944 // Check to see if we got the pointer another way. This can happen when
945 // enumerating recursive types that hit the base case deeper than they start.
946 //
947 // If this is actually a struct that we are treating as forward ref'able,
948 // then emit the definition now that all of its contents are available.
949 if (*TypeID && *TypeID != ~0U)
950 return;
951
952 // Add this type now that its contents are all happily enumerated.
953 Types.push_back(Ty);
954
955 *TypeID = Types.size();
956 }
957
958 // Enumerate the types for the specified value. If the value is a constant,
959 // walk through it, enumerating the types of the constant.
EnumerateOperandType(const Value * V)960 void ValueEnumerator::EnumerateOperandType(const Value *V) {
961 EnumerateType(V->getType());
962
963 assert(!isa<MetadataAsValue>(V) && "Unexpected metadata operand");
964
965 const Constant *C = dyn_cast<Constant>(V);
966 if (!C)
967 return;
968
969 // If this constant is already enumerated, ignore it, we know its type must
970 // be enumerated.
971 if (ValueMap.count(C))
972 return;
973
974 // This constant may have operands, make sure to enumerate the types in
975 // them.
976 for (const Value *Op : C->operands()) {
977 // Don't enumerate basic blocks here, this happens as operands to
978 // blockaddress.
979 if (isa<BasicBlock>(Op))
980 continue;
981
982 EnumerateOperandType(Op);
983 }
984 if (auto *CE = dyn_cast<ConstantExpr>(C)) {
985 if (CE->getOpcode() == Instruction::ShuffleVector)
986 EnumerateOperandType(CE->getShuffleMaskForBitcode());
987 if (CE->getOpcode() == Instruction::GetElementPtr)
988 EnumerateType(cast<GEPOperator>(CE)->getSourceElementType());
989 }
990 }
991
EnumerateAttributes(AttributeList PAL)992 void ValueEnumerator::EnumerateAttributes(AttributeList PAL) {
993 if (PAL.isEmpty()) return; // null is always 0.
994
995 // Do a lookup.
996 unsigned &Entry = AttributeListMap[PAL];
997 if (Entry == 0) {
998 // Never saw this before, add it.
999 AttributeLists.push_back(PAL);
1000 Entry = AttributeLists.size();
1001 }
1002
1003 // Do lookups for all attribute groups.
1004 for (unsigned i : PAL.indexes()) {
1005 AttributeSet AS = PAL.getAttributes(i);
1006 if (!AS.hasAttributes())
1007 continue;
1008 IndexAndAttrSet Pair = {i, AS};
1009 unsigned &Entry = AttributeGroupMap[Pair];
1010 if (Entry == 0) {
1011 AttributeGroups.push_back(Pair);
1012 Entry = AttributeGroups.size();
1013
1014 for (Attribute Attr : AS) {
1015 if (Attr.isTypeAttribute())
1016 EnumerateType(Attr.getValueAsType());
1017 }
1018 }
1019 }
1020 }
1021
incorporateFunction(const Function & F)1022 void ValueEnumerator::incorporateFunction(const Function &F) {
1023 InstructionCount = 0;
1024 NumModuleValues = Values.size();
1025
1026 // Add global metadata to the function block. This doesn't include
1027 // LocalAsMetadata.
1028 incorporateFunctionMetadata(F);
1029
1030 // Adding function arguments to the value table.
1031 for (const auto &I : F.args()) {
1032 EnumerateValue(&I);
1033 if (I.hasAttribute(Attribute::ByVal))
1034 EnumerateType(I.getParamByValType());
1035 else if (I.hasAttribute(Attribute::StructRet))
1036 EnumerateType(I.getParamStructRetType());
1037 else if (I.hasAttribute(Attribute::ByRef))
1038 EnumerateType(I.getParamByRefType());
1039 }
1040 FirstFuncConstantID = Values.size();
1041
1042 // Add all function-level constants to the value table.
1043 for (const BasicBlock &BB : F) {
1044 for (const Instruction &I : BB) {
1045 for (const Use &OI : I.operands()) {
1046 if ((isa<Constant>(OI) && !isa<GlobalValue>(OI)) || isa<InlineAsm>(OI))
1047 EnumerateValue(OI);
1048 }
1049 if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
1050 EnumerateValue(SVI->getShuffleMaskForBitcode());
1051 }
1052 BasicBlocks.push_back(&BB);
1053 ValueMap[&BB] = BasicBlocks.size();
1054 }
1055
1056 // Optimize the constant layout.
1057 OptimizeConstants(FirstFuncConstantID, Values.size());
1058
1059 // Add the function's parameter attributes so they are available for use in
1060 // the function's instruction.
1061 EnumerateAttributes(F.getAttributes());
1062
1063 FirstInstID = Values.size();
1064
1065 SmallVector<LocalAsMetadata *, 8> FnLocalMDVector;
1066 SmallVector<DIArgList *, 8> ArgListMDVector;
1067 // Add all of the instructions.
1068 for (const BasicBlock &BB : F) {
1069 for (const Instruction &I : BB) {
1070 for (const Use &OI : I.operands()) {
1071 if (auto *MD = dyn_cast<MetadataAsValue>(&OI)) {
1072 if (auto *Local = dyn_cast<LocalAsMetadata>(MD->getMetadata())) {
1073 // Enumerate metadata after the instructions they might refer to.
1074 FnLocalMDVector.push_back(Local);
1075 } else if (auto *ArgList = dyn_cast<DIArgList>(MD->getMetadata())) {
1076 ArgListMDVector.push_back(ArgList);
1077 for (ValueAsMetadata *VMD : ArgList->getArgs()) {
1078 if (auto *Local = dyn_cast<LocalAsMetadata>(VMD)) {
1079 // Enumerate metadata after the instructions they might refer
1080 // to.
1081 FnLocalMDVector.push_back(Local);
1082 }
1083 }
1084 }
1085 }
1086 }
1087
1088 if (!I.getType()->isVoidTy())
1089 EnumerateValue(&I);
1090 }
1091 }
1092
1093 // Add all of the function-local metadata.
1094 for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i) {
1095 // At this point, every local values have been incorporated, we shouldn't
1096 // have a metadata operand that references a value that hasn't been seen.
1097 assert(ValueMap.count(FnLocalMDVector[i]->getValue()) &&
1098 "Missing value for metadata operand");
1099 EnumerateFunctionLocalMetadata(F, FnLocalMDVector[i]);
1100 }
1101 // DIArgList entries must come after function-local metadata, as it is not
1102 // possible to forward-reference them.
1103 for (const DIArgList *ArgList : ArgListMDVector)
1104 EnumerateFunctionLocalListMetadata(F, ArgList);
1105 }
1106
purgeFunction()1107 void ValueEnumerator::purgeFunction() {
1108 /// Remove purged values from the ValueMap.
1109 for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
1110 ValueMap.erase(Values[i].first);
1111 for (unsigned i = NumModuleMDs, e = MDs.size(); i != e; ++i)
1112 MetadataMap.erase(MDs[i]);
1113 for (const BasicBlock *BB : BasicBlocks)
1114 ValueMap.erase(BB);
1115
1116 Values.resize(NumModuleValues);
1117 MDs.resize(NumModuleMDs);
1118 BasicBlocks.clear();
1119 NumMDStrings = 0;
1120 }
1121
IncorporateFunctionInfoGlobalBBIDs(const Function * F,DenseMap<const BasicBlock *,unsigned> & IDMap)1122 static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
1123 DenseMap<const BasicBlock*, unsigned> &IDMap) {
1124 unsigned Counter = 0;
1125 for (const BasicBlock &BB : *F)
1126 IDMap[&BB] = ++Counter;
1127 }
1128
1129 /// getGlobalBasicBlockID - This returns the function-specific ID for the
1130 /// specified basic block. This is relatively expensive information, so it
1131 /// should only be used by rare constructs such as address-of-label.
getGlobalBasicBlockID(const BasicBlock * BB) const1132 unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {
1133 unsigned &Idx = GlobalBasicBlockIDs[BB];
1134 if (Idx != 0)
1135 return Idx-1;
1136
1137 IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
1138 return getGlobalBasicBlockID(BB);
1139 }
1140
computeBitsRequiredForTypeIndicies() const1141 uint64_t ValueEnumerator::computeBitsRequiredForTypeIndicies() const {
1142 return Log2_32_Ceil(getTypes().size() + 1);
1143 }
1144