1 //===- ValueEnumerator.cpp - Number values and types for bitcode writer ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ValueEnumerator class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "ValueEnumerator.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/Config/llvm-config.h"
16 #include "llvm/IR/Argument.h"
17 #include "llvm/IR/BasicBlock.h"
18 #include "llvm/IR/Constant.h"
19 #include "llvm/IR/DebugInfoMetadata.h"
20 #include "llvm/IR/DerivedTypes.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/GlobalAlias.h"
23 #include "llvm/IR/GlobalIFunc.h"
24 #include "llvm/IR/GlobalObject.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/Metadata.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/IR/Use.h"
33 #include "llvm/IR/User.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/IR/ValueSymbolTable.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/Compiler.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include <algorithm>
42 #include <cstddef>
43 #include <iterator>
44 #include <tuple>
45
46 using namespace llvm;
47
48 namespace {
49
50 struct OrderMap {
51 DenseMap<const Value *, std::pair<unsigned, bool>> IDs;
52 unsigned LastGlobalConstantID = 0;
53 unsigned LastGlobalValueID = 0;
54
55 OrderMap() = default;
56
isGlobalConstant__anon163096cf0111::OrderMap57 bool isGlobalConstant(unsigned ID) const {
58 return ID <= LastGlobalConstantID;
59 }
60
isGlobalValue__anon163096cf0111::OrderMap61 bool isGlobalValue(unsigned ID) const {
62 return ID <= LastGlobalValueID && !isGlobalConstant(ID);
63 }
64
size__anon163096cf0111::OrderMap65 unsigned size() const { return IDs.size(); }
operator []__anon163096cf0111::OrderMap66 std::pair<unsigned, bool> &operator[](const Value *V) { return IDs[V]; }
67
lookup__anon163096cf0111::OrderMap68 std::pair<unsigned, bool> lookup(const Value *V) const {
69 return IDs.lookup(V);
70 }
71
index__anon163096cf0111::OrderMap72 void index(const Value *V) {
73 // Explicitly sequence get-size and insert-value operations to avoid UB.
74 unsigned ID = IDs.size() + 1;
75 IDs[V].first = ID;
76 }
77 };
78
79 } // end anonymous namespace
80
orderValue(const Value * V,OrderMap & OM)81 static void orderValue(const Value *V, OrderMap &OM) {
82 if (OM.lookup(V).first)
83 return;
84
85 if (const Constant *C = dyn_cast<Constant>(V)) {
86 if (C->getNumOperands() && !isa<GlobalValue>(C)) {
87 for (const Value *Op : C->operands())
88 if (!isa<BasicBlock>(Op) && !isa<GlobalValue>(Op))
89 orderValue(Op, OM);
90 if (auto *CE = dyn_cast<ConstantExpr>(C))
91 if (CE->getOpcode() == Instruction::ShuffleVector)
92 orderValue(CE->getShuffleMaskForBitcode(), OM);
93 }
94 }
95
96 // Note: we cannot cache this lookup above, since inserting into the map
97 // changes the map's size, and thus affects the other IDs.
98 OM.index(V);
99 }
100
orderModule(const Module & M)101 static OrderMap orderModule(const Module &M) {
102 // This needs to match the order used by ValueEnumerator::ValueEnumerator()
103 // and ValueEnumerator::incorporateFunction().
104 OrderMap OM;
105
106 // In the reader, initializers of GlobalValues are set *after* all the
107 // globals have been read. Rather than awkwardly modeling this behaviour
108 // directly in predictValueUseListOrderImpl(), just assign IDs to
109 // initializers of GlobalValues before GlobalValues themselves to model this
110 // implicitly.
111 for (const GlobalVariable &G : M.globals())
112 if (G.hasInitializer())
113 if (!isa<GlobalValue>(G.getInitializer()))
114 orderValue(G.getInitializer(), OM);
115 for (const GlobalAlias &A : M.aliases())
116 if (!isa<GlobalValue>(A.getAliasee()))
117 orderValue(A.getAliasee(), OM);
118 for (const GlobalIFunc &I : M.ifuncs())
119 if (!isa<GlobalValue>(I.getResolver()))
120 orderValue(I.getResolver(), OM);
121 for (const Function &F : M) {
122 for (const Use &U : F.operands())
123 if (!isa<GlobalValue>(U.get()))
124 orderValue(U.get(), OM);
125 }
126
127 // As constants used in metadata operands are emitted as module-level
128 // constants, we must order them before other operands. Also, we must order
129 // these before global values, as these will be read before setting the
130 // global values' initializers. The latter matters for constants which have
131 // uses towards other constants that are used as initializers.
132 auto orderConstantValue = [&OM](const Value *V) {
133 if ((isa<Constant>(V) && !isa<GlobalValue>(V)) || isa<InlineAsm>(V))
134 orderValue(V, OM);
135 };
136 for (const Function &F : M) {
137 if (F.isDeclaration())
138 continue;
139 for (const BasicBlock &BB : F)
140 for (const Instruction &I : BB)
141 for (const Value *V : I.operands()) {
142 if (const auto *MAV = dyn_cast<MetadataAsValue>(V)) {
143 if (const auto *VAM =
144 dyn_cast<ValueAsMetadata>(MAV->getMetadata())) {
145 orderConstantValue(VAM->getValue());
146 } else if (const auto *AL =
147 dyn_cast<DIArgList>(MAV->getMetadata())) {
148 for (const auto *VAM : AL->getArgs())
149 orderConstantValue(VAM->getValue());
150 }
151 }
152 }
153 }
154 OM.LastGlobalConstantID = OM.size();
155
156 // Initializers of GlobalValues are processed in
157 // BitcodeReader::ResolveGlobalAndAliasInits(). Match the order there rather
158 // than ValueEnumerator, and match the code in predictValueUseListOrderImpl()
159 // by giving IDs in reverse order.
160 //
161 // Since GlobalValues never reference each other directly (just through
162 // initializers), their relative IDs only matter for determining order of
163 // uses in their initializers.
164 for (const Function &F : M)
165 orderValue(&F, OM);
166 for (const GlobalAlias &A : M.aliases())
167 orderValue(&A, OM);
168 for (const GlobalIFunc &I : M.ifuncs())
169 orderValue(&I, OM);
170 for (const GlobalVariable &G : M.globals())
171 orderValue(&G, OM);
172 OM.LastGlobalValueID = OM.size();
173
174 for (const Function &F : M) {
175 if (F.isDeclaration())
176 continue;
177 // Here we need to match the union of ValueEnumerator::incorporateFunction()
178 // and WriteFunction(). Basic blocks are implicitly declared before
179 // anything else (by declaring their size).
180 for (const BasicBlock &BB : F)
181 orderValue(&BB, OM);
182 for (const Argument &A : F.args())
183 orderValue(&A, OM);
184 for (const BasicBlock &BB : F)
185 for (const Instruction &I : BB) {
186 for (const Value *Op : I.operands())
187 if ((isa<Constant>(*Op) && !isa<GlobalValue>(*Op)) ||
188 isa<InlineAsm>(*Op))
189 orderValue(Op, OM);
190 if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
191 orderValue(SVI->getShuffleMaskForBitcode(), OM);
192 }
193 for (const BasicBlock &BB : F)
194 for (const Instruction &I : BB)
195 orderValue(&I, OM);
196 }
197 return OM;
198 }
199
predictValueUseListOrderImpl(const Value * V,const Function * F,unsigned ID,const OrderMap & OM,UseListOrderStack & Stack)200 static void predictValueUseListOrderImpl(const Value *V, const Function *F,
201 unsigned ID, const OrderMap &OM,
202 UseListOrderStack &Stack) {
203 // Predict use-list order for this one.
204 using Entry = std::pair<const Use *, unsigned>;
205 SmallVector<Entry, 64> List;
206 for (const Use &U : V->uses())
207 // Check if this user will be serialized.
208 if (OM.lookup(U.getUser()).first)
209 List.push_back(std::make_pair(&U, List.size()));
210
211 if (List.size() < 2)
212 // We may have lost some users.
213 return;
214
215 bool IsGlobalValue = OM.isGlobalValue(ID);
216 llvm::sort(List, [&](const Entry &L, const Entry &R) {
217 const Use *LU = L.first;
218 const Use *RU = R.first;
219 if (LU == RU)
220 return false;
221
222 auto LID = OM.lookup(LU->getUser()).first;
223 auto RID = OM.lookup(RU->getUser()).first;
224
225 // Global values are processed in reverse order.
226 //
227 // Moreover, initializers of GlobalValues are set *after* all the globals
228 // have been read (despite having earlier IDs). Rather than awkwardly
229 // modeling this behaviour here, orderModule() has assigned IDs to
230 // initializers of GlobalValues before GlobalValues themselves.
231 if (OM.isGlobalValue(LID) && OM.isGlobalValue(RID))
232 return LID < RID;
233
234 // If ID is 4, then expect: 7 6 5 1 2 3.
235 if (LID < RID) {
236 if (RID <= ID)
237 if (!IsGlobalValue) // GlobalValue uses don't get reversed.
238 return true;
239 return false;
240 }
241 if (RID < LID) {
242 if (LID <= ID)
243 if (!IsGlobalValue) // GlobalValue uses don't get reversed.
244 return false;
245 return true;
246 }
247
248 // LID and RID are equal, so we have different operands of the same user.
249 // Assume operands are added in order for all instructions.
250 if (LID <= ID)
251 if (!IsGlobalValue) // GlobalValue uses don't get reversed.
252 return LU->getOperandNo() < RU->getOperandNo();
253 return LU->getOperandNo() > RU->getOperandNo();
254 });
255
256 if (llvm::is_sorted(List, [](const Entry &L, const Entry &R) {
257 return L.second < R.second;
258 }))
259 // Order is already correct.
260 return;
261
262 // Store the shuffle.
263 Stack.emplace_back(V, F, List.size());
264 assert(List.size() == Stack.back().Shuffle.size() && "Wrong size");
265 for (size_t I = 0, E = List.size(); I != E; ++I)
266 Stack.back().Shuffle[I] = List[I].second;
267 }
268
predictValueUseListOrder(const Value * V,const Function * F,OrderMap & OM,UseListOrderStack & Stack)269 static void predictValueUseListOrder(const Value *V, const Function *F,
270 OrderMap &OM, UseListOrderStack &Stack) {
271 auto &IDPair = OM[V];
272 assert(IDPair.first && "Unmapped value");
273 if (IDPair.second)
274 // Already predicted.
275 return;
276
277 // Do the actual prediction.
278 IDPair.second = true;
279 if (!V->use_empty() && std::next(V->use_begin()) != V->use_end())
280 predictValueUseListOrderImpl(V, F, IDPair.first, OM, Stack);
281
282 // Recursive descent into constants.
283 if (const Constant *C = dyn_cast<Constant>(V)) {
284 if (C->getNumOperands()) { // Visit GlobalValues.
285 for (const Value *Op : C->operands())
286 if (isa<Constant>(Op)) // Visit GlobalValues.
287 predictValueUseListOrder(Op, F, OM, Stack);
288 if (auto *CE = dyn_cast<ConstantExpr>(C))
289 if (CE->getOpcode() == Instruction::ShuffleVector)
290 predictValueUseListOrder(CE->getShuffleMaskForBitcode(), F, OM,
291 Stack);
292 }
293 }
294 }
295
predictUseListOrder(const Module & M)296 static UseListOrderStack predictUseListOrder(const Module &M) {
297 OrderMap OM = orderModule(M);
298
299 // Use-list orders need to be serialized after all the users have been added
300 // to a value, or else the shuffles will be incomplete. Store them per
301 // function in a stack.
302 //
303 // Aside from function order, the order of values doesn't matter much here.
304 UseListOrderStack Stack;
305
306 // We want to visit the functions backward now so we can list function-local
307 // constants in the last Function they're used in. Module-level constants
308 // have already been visited above.
309 for (auto I = M.rbegin(), E = M.rend(); I != E; ++I) {
310 const Function &F = *I;
311 if (F.isDeclaration())
312 continue;
313 for (const BasicBlock &BB : F)
314 predictValueUseListOrder(&BB, &F, OM, Stack);
315 for (const Argument &A : F.args())
316 predictValueUseListOrder(&A, &F, OM, Stack);
317 for (const BasicBlock &BB : F)
318 for (const Instruction &I : BB) {
319 for (const Value *Op : I.operands())
320 if (isa<Constant>(*Op) || isa<InlineAsm>(*Op)) // Visit GlobalValues.
321 predictValueUseListOrder(Op, &F, OM, Stack);
322 if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
323 predictValueUseListOrder(SVI->getShuffleMaskForBitcode(), &F, OM,
324 Stack);
325 }
326 for (const BasicBlock &BB : F)
327 for (const Instruction &I : BB)
328 predictValueUseListOrder(&I, &F, OM, Stack);
329 }
330
331 // Visit globals last, since the module-level use-list block will be seen
332 // before the function bodies are processed.
333 for (const GlobalVariable &G : M.globals())
334 predictValueUseListOrder(&G, nullptr, OM, Stack);
335 for (const Function &F : M)
336 predictValueUseListOrder(&F, nullptr, OM, Stack);
337 for (const GlobalAlias &A : M.aliases())
338 predictValueUseListOrder(&A, nullptr, OM, Stack);
339 for (const GlobalIFunc &I : M.ifuncs())
340 predictValueUseListOrder(&I, nullptr, OM, Stack);
341 for (const GlobalVariable &G : M.globals())
342 if (G.hasInitializer())
343 predictValueUseListOrder(G.getInitializer(), nullptr, OM, Stack);
344 for (const GlobalAlias &A : M.aliases())
345 predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack);
346 for (const GlobalIFunc &I : M.ifuncs())
347 predictValueUseListOrder(I.getResolver(), nullptr, OM, Stack);
348 for (const Function &F : M) {
349 for (const Use &U : F.operands())
350 predictValueUseListOrder(U.get(), nullptr, OM, Stack);
351 }
352
353 return Stack;
354 }
355
isIntOrIntVectorValue(const std::pair<const Value *,unsigned> & V)356 static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
357 return V.first->getType()->isIntOrIntVectorTy();
358 }
359
ValueEnumerator(const Module & M,bool ShouldPreserveUseListOrder)360 ValueEnumerator::ValueEnumerator(const Module &M,
361 bool ShouldPreserveUseListOrder)
362 : ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
363 if (ShouldPreserveUseListOrder)
364 UseListOrders = predictUseListOrder(M);
365
366 // Enumerate the global variables.
367 for (const GlobalVariable &GV : M.globals())
368 EnumerateValue(&GV);
369
370 // Enumerate the functions.
371 for (const Function & F : M) {
372 EnumerateValue(&F);
373 EnumerateAttributes(F.getAttributes());
374 }
375
376 // Enumerate the aliases.
377 for (const GlobalAlias &GA : M.aliases())
378 EnumerateValue(&GA);
379
380 // Enumerate the ifuncs.
381 for (const GlobalIFunc &GIF : M.ifuncs())
382 EnumerateValue(&GIF);
383
384 // Remember what is the cutoff between globalvalue's and other constants.
385 unsigned FirstConstant = Values.size();
386
387 // Enumerate the global variable initializers and attributes.
388 for (const GlobalVariable &GV : M.globals()) {
389 if (GV.hasInitializer())
390 EnumerateValue(GV.getInitializer());
391 if (GV.hasAttributes())
392 EnumerateAttributes(GV.getAttributesAsList(AttributeList::FunctionIndex));
393 }
394
395 // Enumerate the aliasees.
396 for (const GlobalAlias &GA : M.aliases())
397 EnumerateValue(GA.getAliasee());
398
399 // Enumerate the ifunc resolvers.
400 for (const GlobalIFunc &GIF : M.ifuncs())
401 EnumerateValue(GIF.getResolver());
402
403 // Enumerate any optional Function data.
404 for (const Function &F : M)
405 for (const Use &U : F.operands())
406 EnumerateValue(U.get());
407
408 // Enumerate the metadata type.
409 //
410 // TODO: Move this to ValueEnumerator::EnumerateOperandType() once bitcode
411 // only encodes the metadata type when it's used as a value.
412 EnumerateType(Type::getMetadataTy(M.getContext()));
413
414 // Insert constants and metadata that are named at module level into the slot
415 // pool so that the module symbol table can refer to them...
416 EnumerateValueSymbolTable(M.getValueSymbolTable());
417 EnumerateNamedMetadata(M);
418
419 SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
420 for (const GlobalVariable &GV : M.globals()) {
421 MDs.clear();
422 GV.getAllMetadata(MDs);
423 for (const auto &I : MDs)
424 // FIXME: Pass GV to EnumerateMetadata and arrange for the bitcode writer
425 // to write metadata to the global variable's own metadata block
426 // (PR28134).
427 EnumerateMetadata(nullptr, I.second);
428 }
429
430 // Enumerate types used by function bodies and argument lists.
431 for (const Function &F : M) {
432 for (const Argument &A : F.args())
433 EnumerateType(A.getType());
434
435 // Enumerate metadata attached to this function.
436 MDs.clear();
437 F.getAllMetadata(MDs);
438 for (const auto &I : MDs)
439 EnumerateMetadata(F.isDeclaration() ? nullptr : &F, I.second);
440
441 for (const BasicBlock &BB : F)
442 for (const Instruction &I : BB) {
443 for (const Use &Op : I.operands()) {
444 auto *MD = dyn_cast<MetadataAsValue>(&Op);
445 if (!MD) {
446 EnumerateOperandType(Op);
447 continue;
448 }
449
450 // Local metadata is enumerated during function-incorporation, but
451 // any ConstantAsMetadata arguments in a DIArgList should be examined
452 // now.
453 if (isa<LocalAsMetadata>(MD->getMetadata()))
454 continue;
455 if (auto *AL = dyn_cast<DIArgList>(MD->getMetadata())) {
456 for (auto *VAM : AL->getArgs())
457 if (isa<ConstantAsMetadata>(VAM))
458 EnumerateMetadata(&F, VAM);
459 continue;
460 }
461
462 EnumerateMetadata(&F, MD->getMetadata());
463 }
464 if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
465 EnumerateType(SVI->getShuffleMaskForBitcode()->getType());
466 EnumerateType(I.getType());
467 if (const auto *Call = dyn_cast<CallBase>(&I))
468 EnumerateAttributes(Call->getAttributes());
469
470 // Enumerate metadata attached with this instruction.
471 MDs.clear();
472 I.getAllMetadataOtherThanDebugLoc(MDs);
473 for (unsigned i = 0, e = MDs.size(); i != e; ++i)
474 EnumerateMetadata(&F, MDs[i].second);
475
476 // Don't enumerate the location directly -- it has a special record
477 // type -- but enumerate its operands.
478 if (DILocation *L = I.getDebugLoc())
479 for (const Metadata *Op : L->operands())
480 EnumerateMetadata(&F, Op);
481 }
482 }
483
484 // Optimize constant ordering.
485 OptimizeConstants(FirstConstant, Values.size());
486
487 // Organize metadata ordering.
488 organizeMetadata();
489 }
490
getInstructionID(const Instruction * Inst) const491 unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
492 InstructionMapType::const_iterator I = InstructionMap.find(Inst);
493 assert(I != InstructionMap.end() && "Instruction is not mapped!");
494 return I->second;
495 }
496
getComdatID(const Comdat * C) const497 unsigned ValueEnumerator::getComdatID(const Comdat *C) const {
498 unsigned ComdatID = Comdats.idFor(C);
499 assert(ComdatID && "Comdat not found!");
500 return ComdatID;
501 }
502
setInstructionID(const Instruction * I)503 void ValueEnumerator::setInstructionID(const Instruction *I) {
504 InstructionMap[I] = InstructionCount++;
505 }
506
getValueID(const Value * V) const507 unsigned ValueEnumerator::getValueID(const Value *V) const {
508 if (auto *MD = dyn_cast<MetadataAsValue>(V))
509 return getMetadataID(MD->getMetadata());
510
511 ValueMapType::const_iterator I = ValueMap.find(V);
512 assert(I != ValueMap.end() && "Value not in slotcalculator!");
513 return I->second-1;
514 }
515
516 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const517 LLVM_DUMP_METHOD void ValueEnumerator::dump() const {
518 print(dbgs(), ValueMap, "Default");
519 dbgs() << '\n';
520 print(dbgs(), MetadataMap, "MetaData");
521 dbgs() << '\n';
522 }
523 #endif
524
print(raw_ostream & OS,const ValueMapType & Map,const char * Name) const525 void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
526 const char *Name) const {
527 OS << "Map Name: " << Name << "\n";
528 OS << "Size: " << Map.size() << "\n";
529 for (ValueMapType::const_iterator I = Map.begin(),
530 E = Map.end(); I != E; ++I) {
531 const Value *V = I->first;
532 if (V->hasName())
533 OS << "Value: " << V->getName();
534 else
535 OS << "Value: [null]\n";
536 V->print(errs());
537 errs() << '\n';
538
539 OS << " Uses(" << V->getNumUses() << "):";
540 for (const Use &U : V->uses()) {
541 if (&U != &*V->use_begin())
542 OS << ",";
543 if(U->hasName())
544 OS << " " << U->getName();
545 else
546 OS << " [null]";
547
548 }
549 OS << "\n\n";
550 }
551 }
552
print(raw_ostream & OS,const MetadataMapType & Map,const char * Name) const553 void ValueEnumerator::print(raw_ostream &OS, const MetadataMapType &Map,
554 const char *Name) const {
555 OS << "Map Name: " << Name << "\n";
556 OS << "Size: " << Map.size() << "\n";
557 for (auto I = Map.begin(), E = Map.end(); I != E; ++I) {
558 const Metadata *MD = I->first;
559 OS << "Metadata: slot = " << I->second.ID << "\n";
560 OS << "Metadata: function = " << I->second.F << "\n";
561 MD->print(OS);
562 OS << "\n";
563 }
564 }
565
566 /// OptimizeConstants - Reorder constant pool for denser encoding.
OptimizeConstants(unsigned CstStart,unsigned CstEnd)567 void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
568 if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
569
570 if (ShouldPreserveUseListOrder)
571 // Optimizing constants makes the use-list order difficult to predict.
572 // Disable it for now when trying to preserve the order.
573 return;
574
575 std::stable_sort(Values.begin() + CstStart, Values.begin() + CstEnd,
576 [this](const std::pair<const Value *, unsigned> &LHS,
577 const std::pair<const Value *, unsigned> &RHS) {
578 // Sort by plane.
579 if (LHS.first->getType() != RHS.first->getType())
580 return getTypeID(LHS.first->getType()) < getTypeID(RHS.first->getType());
581 // Then by frequency.
582 return LHS.second > RHS.second;
583 });
584
585 // Ensure that integer and vector of integer constants are at the start of the
586 // constant pool. This is important so that GEP structure indices come before
587 // gep constant exprs.
588 std::stable_partition(Values.begin() + CstStart, Values.begin() + CstEnd,
589 isIntOrIntVectorValue);
590
591 // Rebuild the modified portion of ValueMap.
592 for (; CstStart != CstEnd; ++CstStart)
593 ValueMap[Values[CstStart].first] = CstStart+1;
594 }
595
596 /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
597 /// table into the values table.
EnumerateValueSymbolTable(const ValueSymbolTable & VST)598 void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
599 for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
600 VI != VE; ++VI)
601 EnumerateValue(VI->getValue());
602 }
603
604 /// Insert all of the values referenced by named metadata in the specified
605 /// module.
EnumerateNamedMetadata(const Module & M)606 void ValueEnumerator::EnumerateNamedMetadata(const Module &M) {
607 for (const auto &I : M.named_metadata())
608 EnumerateNamedMDNode(&I);
609 }
610
EnumerateNamedMDNode(const NamedMDNode * MD)611 void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
612 for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
613 EnumerateMetadata(nullptr, MD->getOperand(i));
614 }
615
getMetadataFunctionID(const Function * F) const616 unsigned ValueEnumerator::getMetadataFunctionID(const Function *F) const {
617 return F ? getValueID(F) + 1 : 0;
618 }
619
EnumerateMetadata(const Function * F,const Metadata * MD)620 void ValueEnumerator::EnumerateMetadata(const Function *F, const Metadata *MD) {
621 EnumerateMetadata(getMetadataFunctionID(F), MD);
622 }
623
EnumerateFunctionLocalMetadata(const Function & F,const LocalAsMetadata * Local)624 void ValueEnumerator::EnumerateFunctionLocalMetadata(
625 const Function &F, const LocalAsMetadata *Local) {
626 EnumerateFunctionLocalMetadata(getMetadataFunctionID(&F), Local);
627 }
628
EnumerateFunctionLocalListMetadata(const Function & F,const DIArgList * ArgList)629 void ValueEnumerator::EnumerateFunctionLocalListMetadata(
630 const Function &F, const DIArgList *ArgList) {
631 EnumerateFunctionLocalListMetadata(getMetadataFunctionID(&F), ArgList);
632 }
633
dropFunctionFromMetadata(MetadataMapType::value_type & FirstMD)634 void ValueEnumerator::dropFunctionFromMetadata(
635 MetadataMapType::value_type &FirstMD) {
636 SmallVector<const MDNode *, 64> Worklist;
637 auto push = [&Worklist](MetadataMapType::value_type &MD) {
638 auto &Entry = MD.second;
639
640 // Nothing to do if this metadata isn't tagged.
641 if (!Entry.F)
642 return;
643
644 // Drop the function tag.
645 Entry.F = 0;
646
647 // If this is has an ID and is an MDNode, then its operands have entries as
648 // well. We need to drop the function from them too.
649 if (Entry.ID)
650 if (auto *N = dyn_cast<MDNode>(MD.first))
651 Worklist.push_back(N);
652 };
653 push(FirstMD);
654 while (!Worklist.empty())
655 for (const Metadata *Op : Worklist.pop_back_val()->operands()) {
656 if (!Op)
657 continue;
658 auto MD = MetadataMap.find(Op);
659 if (MD != MetadataMap.end())
660 push(*MD);
661 }
662 }
663
EnumerateMetadata(unsigned F,const Metadata * MD)664 void ValueEnumerator::EnumerateMetadata(unsigned F, const Metadata *MD) {
665 // It's vital for reader efficiency that uniqued subgraphs are done in
666 // post-order; it's expensive when their operands have forward references.
667 // If a distinct node is referenced from a uniqued node, it'll be delayed
668 // until the uniqued subgraph has been completely traversed.
669 SmallVector<const MDNode *, 32> DelayedDistinctNodes;
670
671 // Start by enumerating MD, and then work through its transitive operands in
672 // post-order. This requires a depth-first search.
673 SmallVector<std::pair<const MDNode *, MDNode::op_iterator>, 32> Worklist;
674 if (const MDNode *N = enumerateMetadataImpl(F, MD))
675 Worklist.push_back(std::make_pair(N, N->op_begin()));
676
677 while (!Worklist.empty()) {
678 const MDNode *N = Worklist.back().first;
679
680 // Enumerate operands until we hit a new node. We need to traverse these
681 // nodes' operands before visiting the rest of N's operands.
682 MDNode::op_iterator I = std::find_if(
683 Worklist.back().second, N->op_end(),
684 [&](const Metadata *MD) { return enumerateMetadataImpl(F, MD); });
685 if (I != N->op_end()) {
686 auto *Op = cast<MDNode>(*I);
687 Worklist.back().second = ++I;
688
689 // Delay traversing Op if it's a distinct node and N is uniqued.
690 if (Op->isDistinct() && !N->isDistinct())
691 DelayedDistinctNodes.push_back(Op);
692 else
693 Worklist.push_back(std::make_pair(Op, Op->op_begin()));
694 continue;
695 }
696
697 // All the operands have been visited. Now assign an ID.
698 Worklist.pop_back();
699 MDs.push_back(N);
700 MetadataMap[N].ID = MDs.size();
701
702 // Flush out any delayed distinct nodes; these are all the distinct nodes
703 // that are leaves in last uniqued subgraph.
704 if (Worklist.empty() || Worklist.back().first->isDistinct()) {
705 for (const MDNode *N : DelayedDistinctNodes)
706 Worklist.push_back(std::make_pair(N, N->op_begin()));
707 DelayedDistinctNodes.clear();
708 }
709 }
710 }
711
enumerateMetadataImpl(unsigned F,const Metadata * MD)712 const MDNode *ValueEnumerator::enumerateMetadataImpl(unsigned F, const Metadata *MD) {
713 if (!MD)
714 return nullptr;
715
716 assert(
717 (isa<MDNode>(MD) || isa<MDString>(MD) || isa<ConstantAsMetadata>(MD)) &&
718 "Invalid metadata kind");
719
720 auto Insertion = MetadataMap.insert(std::make_pair(MD, MDIndex(F)));
721 MDIndex &Entry = Insertion.first->second;
722 if (!Insertion.second) {
723 // Already mapped. If F doesn't match the function tag, drop it.
724 if (Entry.hasDifferentFunction(F))
725 dropFunctionFromMetadata(*Insertion.first);
726 return nullptr;
727 }
728
729 // Don't assign IDs to metadata nodes.
730 if (auto *N = dyn_cast<MDNode>(MD))
731 return N;
732
733 // Save the metadata.
734 MDs.push_back(MD);
735 Entry.ID = MDs.size();
736
737 // Enumerate the constant, if any.
738 if (auto *C = dyn_cast<ConstantAsMetadata>(MD))
739 EnumerateValue(C->getValue());
740
741 return nullptr;
742 }
743
744 /// EnumerateFunctionLocalMetadata - Incorporate function-local metadata
745 /// information reachable from the metadata.
EnumerateFunctionLocalMetadata(unsigned F,const LocalAsMetadata * Local)746 void ValueEnumerator::EnumerateFunctionLocalMetadata(
747 unsigned F, const LocalAsMetadata *Local) {
748 assert(F && "Expected a function");
749
750 // Check to see if it's already in!
751 MDIndex &Index = MetadataMap[Local];
752 if (Index.ID) {
753 assert(Index.F == F && "Expected the same function");
754 return;
755 }
756
757 MDs.push_back(Local);
758 Index.F = F;
759 Index.ID = MDs.size();
760
761 EnumerateValue(Local->getValue());
762 }
763
764 /// EnumerateFunctionLocalListMetadata - Incorporate function-local metadata
765 /// information reachable from the metadata.
EnumerateFunctionLocalListMetadata(unsigned F,const DIArgList * ArgList)766 void ValueEnumerator::EnumerateFunctionLocalListMetadata(
767 unsigned F, const DIArgList *ArgList) {
768 assert(F && "Expected a function");
769
770 // Check to see if it's already in!
771 MDIndex &Index = MetadataMap[ArgList];
772 if (Index.ID) {
773 assert(Index.F == F && "Expected the same function");
774 return;
775 }
776
777 for (ValueAsMetadata *VAM : ArgList->getArgs()) {
778 if (isa<LocalAsMetadata>(VAM)) {
779 assert(MetadataMap.count(VAM) &&
780 "LocalAsMetadata should be enumerated before DIArgList");
781 assert(MetadataMap[VAM].F == F &&
782 "Expected LocalAsMetadata in the same function");
783 } else {
784 assert(isa<ConstantAsMetadata>(VAM) &&
785 "Expected LocalAsMetadata or ConstantAsMetadata");
786 assert(ValueMap.count(VAM->getValue()) &&
787 "Constant should be enumerated beforeDIArgList");
788 EnumerateMetadata(F, VAM);
789 }
790 }
791
792 MDs.push_back(ArgList);
793 Index.F = F;
794 Index.ID = MDs.size();
795 }
796
getMetadataTypeOrder(const Metadata * MD)797 static unsigned getMetadataTypeOrder(const Metadata *MD) {
798 // Strings are emitted in bulk and must come first.
799 if (isa<MDString>(MD))
800 return 0;
801
802 // ConstantAsMetadata doesn't reference anything. We may as well shuffle it
803 // to the front since we can detect it.
804 auto *N = dyn_cast<MDNode>(MD);
805 if (!N)
806 return 1;
807
808 // The reader is fast forward references for distinct node operands, but slow
809 // when uniqued operands are unresolved.
810 return N->isDistinct() ? 2 : 3;
811 }
812
organizeMetadata()813 void ValueEnumerator::organizeMetadata() {
814 assert(MetadataMap.size() == MDs.size() &&
815 "Metadata map and vector out of sync");
816
817 if (MDs.empty())
818 return;
819
820 // Copy out the index information from MetadataMap in order to choose a new
821 // order.
822 SmallVector<MDIndex, 64> Order;
823 Order.reserve(MetadataMap.size());
824 for (const Metadata *MD : MDs)
825 Order.push_back(MetadataMap.lookup(MD));
826
827 // Partition:
828 // - by function, then
829 // - by isa<MDString>
830 // and then sort by the original/current ID. Since the IDs are guaranteed to
831 // be unique, the result of std::sort will be deterministic. There's no need
832 // for std::stable_sort.
833 llvm::sort(Order, [this](MDIndex LHS, MDIndex RHS) {
834 return std::make_tuple(LHS.F, getMetadataTypeOrder(LHS.get(MDs)), LHS.ID) <
835 std::make_tuple(RHS.F, getMetadataTypeOrder(RHS.get(MDs)), RHS.ID);
836 });
837
838 // Rebuild MDs, index the metadata ranges for each function in FunctionMDs,
839 // and fix up MetadataMap.
840 std::vector<const Metadata *> OldMDs;
841 MDs.swap(OldMDs);
842 MDs.reserve(OldMDs.size());
843 for (unsigned I = 0, E = Order.size(); I != E && !Order[I].F; ++I) {
844 auto *MD = Order[I].get(OldMDs);
845 MDs.push_back(MD);
846 MetadataMap[MD].ID = I + 1;
847 if (isa<MDString>(MD))
848 ++NumMDStrings;
849 }
850
851 // Return early if there's nothing for the functions.
852 if (MDs.size() == Order.size())
853 return;
854
855 // Build the function metadata ranges.
856 MDRange R;
857 FunctionMDs.reserve(OldMDs.size());
858 unsigned PrevF = 0;
859 for (unsigned I = MDs.size(), E = Order.size(), ID = MDs.size(); I != E;
860 ++I) {
861 unsigned F = Order[I].F;
862 if (!PrevF) {
863 PrevF = F;
864 } else if (PrevF != F) {
865 R.Last = FunctionMDs.size();
866 std::swap(R, FunctionMDInfo[PrevF]);
867 R.First = FunctionMDs.size();
868
869 ID = MDs.size();
870 PrevF = F;
871 }
872
873 auto *MD = Order[I].get(OldMDs);
874 FunctionMDs.push_back(MD);
875 MetadataMap[MD].ID = ++ID;
876 if (isa<MDString>(MD))
877 ++R.NumStrings;
878 }
879 R.Last = FunctionMDs.size();
880 FunctionMDInfo[PrevF] = R;
881 }
882
incorporateFunctionMetadata(const Function & F)883 void ValueEnumerator::incorporateFunctionMetadata(const Function &F) {
884 NumModuleMDs = MDs.size();
885
886 auto R = FunctionMDInfo.lookup(getValueID(&F) + 1);
887 NumMDStrings = R.NumStrings;
888 MDs.insert(MDs.end(), FunctionMDs.begin() + R.First,
889 FunctionMDs.begin() + R.Last);
890 }
891
EnumerateValue(const Value * V)892 void ValueEnumerator::EnumerateValue(const Value *V) {
893 assert(!V->getType()->isVoidTy() && "Can't insert void values!");
894 assert(!isa<MetadataAsValue>(V) && "EnumerateValue doesn't handle Metadata!");
895
896 // Check to see if it's already in!
897 unsigned &ValueID = ValueMap[V];
898 if (ValueID) {
899 // Increment use count.
900 Values[ValueID-1].second++;
901 return;
902 }
903
904 if (auto *GO = dyn_cast<GlobalObject>(V))
905 if (const Comdat *C = GO->getComdat())
906 Comdats.insert(C);
907
908 // Enumerate the type of this value.
909 EnumerateType(V->getType());
910
911 if (const Constant *C = dyn_cast<Constant>(V)) {
912 if (isa<GlobalValue>(C)) {
913 // Initializers for globals are handled explicitly elsewhere.
914 } else if (C->getNumOperands()) {
915 // If a constant has operands, enumerate them. This makes sure that if a
916 // constant has uses (for example an array of const ints), that they are
917 // inserted also.
918
919 // We prefer to enumerate them with values before we enumerate the user
920 // itself. This makes it more likely that we can avoid forward references
921 // in the reader. We know that there can be no cycles in the constants
922 // graph that don't go through a global variable.
923 for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
924 I != E; ++I)
925 if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
926 EnumerateValue(*I);
927 if (auto *CE = dyn_cast<ConstantExpr>(C))
928 if (CE->getOpcode() == Instruction::ShuffleVector)
929 EnumerateValue(CE->getShuffleMaskForBitcode());
930
931 // Finally, add the value. Doing this could make the ValueID reference be
932 // dangling, don't reuse it.
933 Values.push_back(std::make_pair(V, 1U));
934 ValueMap[V] = Values.size();
935 return;
936 }
937 }
938
939 // Add the value.
940 Values.push_back(std::make_pair(V, 1U));
941 ValueID = Values.size();
942 }
943
944
EnumerateType(Type * Ty)945 void ValueEnumerator::EnumerateType(Type *Ty) {
946 unsigned *TypeID = &TypeMap[Ty];
947
948 // We've already seen this type.
949 if (*TypeID)
950 return;
951
952 // If it is a non-anonymous struct, mark the type as being visited so that we
953 // don't recursively visit it. This is safe because we allow forward
954 // references of these in the bitcode reader.
955 if (StructType *STy = dyn_cast<StructType>(Ty))
956 if (!STy->isLiteral())
957 *TypeID = ~0U;
958
959 // Enumerate all of the subtypes before we enumerate this type. This ensures
960 // that the type will be enumerated in an order that can be directly built.
961 for (Type *SubTy : Ty->subtypes())
962 EnumerateType(SubTy);
963
964 // Refresh the TypeID pointer in case the table rehashed.
965 TypeID = &TypeMap[Ty];
966
967 // Check to see if we got the pointer another way. This can happen when
968 // enumerating recursive types that hit the base case deeper than they start.
969 //
970 // If this is actually a struct that we are treating as forward ref'able,
971 // then emit the definition now that all of its contents are available.
972 if (*TypeID && *TypeID != ~0U)
973 return;
974
975 // Add this type now that its contents are all happily enumerated.
976 Types.push_back(Ty);
977
978 *TypeID = Types.size();
979 }
980
981 // Enumerate the types for the specified value. If the value is a constant,
982 // walk through it, enumerating the types of the constant.
EnumerateOperandType(const Value * V)983 void ValueEnumerator::EnumerateOperandType(const Value *V) {
984 EnumerateType(V->getType());
985
986 assert(!isa<MetadataAsValue>(V) && "Unexpected metadata operand");
987
988 const Constant *C = dyn_cast<Constant>(V);
989 if (!C)
990 return;
991
992 // If this constant is already enumerated, ignore it, we know its type must
993 // be enumerated.
994 if (ValueMap.count(C))
995 return;
996
997 // This constant may have operands, make sure to enumerate the types in
998 // them.
999 for (const Value *Op : C->operands()) {
1000 // Don't enumerate basic blocks here, this happens as operands to
1001 // blockaddress.
1002 if (isa<BasicBlock>(Op))
1003 continue;
1004
1005 EnumerateOperandType(Op);
1006 }
1007 if (auto *CE = dyn_cast<ConstantExpr>(C))
1008 if (CE->getOpcode() == Instruction::ShuffleVector)
1009 EnumerateOperandType(CE->getShuffleMaskForBitcode());
1010 }
1011
EnumerateAttributes(AttributeList PAL)1012 void ValueEnumerator::EnumerateAttributes(AttributeList PAL) {
1013 if (PAL.isEmpty()) return; // null is always 0.
1014
1015 // Do a lookup.
1016 unsigned &Entry = AttributeListMap[PAL];
1017 if (Entry == 0) {
1018 // Never saw this before, add it.
1019 AttributeLists.push_back(PAL);
1020 Entry = AttributeLists.size();
1021 }
1022
1023 // Do lookups for all attribute groups.
1024 for (unsigned i = PAL.index_begin(), e = PAL.index_end(); i != e; ++i) {
1025 AttributeSet AS = PAL.getAttributes(i);
1026 if (!AS.hasAttributes())
1027 continue;
1028 IndexAndAttrSet Pair = {i, AS};
1029 unsigned &Entry = AttributeGroupMap[Pair];
1030 if (Entry == 0) {
1031 AttributeGroups.push_back(Pair);
1032 Entry = AttributeGroups.size();
1033 }
1034 }
1035 }
1036
incorporateFunction(const Function & F)1037 void ValueEnumerator::incorporateFunction(const Function &F) {
1038 InstructionCount = 0;
1039 NumModuleValues = Values.size();
1040
1041 // Add global metadata to the function block. This doesn't include
1042 // LocalAsMetadata.
1043 incorporateFunctionMetadata(F);
1044
1045 // Adding function arguments to the value table.
1046 for (const auto &I : F.args()) {
1047 EnumerateValue(&I);
1048 if (I.hasAttribute(Attribute::ByVal))
1049 EnumerateType(I.getParamByValType());
1050 else if (I.hasAttribute(Attribute::StructRet))
1051 EnumerateType(I.getParamStructRetType());
1052 else if (I.hasAttribute(Attribute::ByRef))
1053 EnumerateType(I.getParamByRefType());
1054 }
1055 FirstFuncConstantID = Values.size();
1056
1057 // Add all function-level constants to the value table.
1058 for (const BasicBlock &BB : F) {
1059 for (const Instruction &I : BB) {
1060 for (const Use &OI : I.operands()) {
1061 if ((isa<Constant>(OI) && !isa<GlobalValue>(OI)) || isa<InlineAsm>(OI))
1062 EnumerateValue(OI);
1063 }
1064 if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
1065 EnumerateValue(SVI->getShuffleMaskForBitcode());
1066 }
1067 BasicBlocks.push_back(&BB);
1068 ValueMap[&BB] = BasicBlocks.size();
1069 }
1070
1071 // Optimize the constant layout.
1072 OptimizeConstants(FirstFuncConstantID, Values.size());
1073
1074 // Add the function's parameter attributes so they are available for use in
1075 // the function's instruction.
1076 EnumerateAttributes(F.getAttributes());
1077
1078 FirstInstID = Values.size();
1079
1080 SmallVector<LocalAsMetadata *, 8> FnLocalMDVector;
1081 SmallVector<DIArgList *, 8> ArgListMDVector;
1082 // Add all of the instructions.
1083 for (const BasicBlock &BB : F) {
1084 for (const Instruction &I : BB) {
1085 for (const Use &OI : I.operands()) {
1086 if (auto *MD = dyn_cast<MetadataAsValue>(&OI)) {
1087 if (auto *Local = dyn_cast<LocalAsMetadata>(MD->getMetadata())) {
1088 // Enumerate metadata after the instructions they might refer to.
1089 FnLocalMDVector.push_back(Local);
1090 } else if (auto *ArgList = dyn_cast<DIArgList>(MD->getMetadata())) {
1091 ArgListMDVector.push_back(ArgList);
1092 for (ValueAsMetadata *VMD : ArgList->getArgs()) {
1093 if (auto *Local = dyn_cast<LocalAsMetadata>(VMD)) {
1094 // Enumerate metadata after the instructions they might refer
1095 // to.
1096 FnLocalMDVector.push_back(Local);
1097 }
1098 }
1099 }
1100 }
1101 }
1102
1103 if (!I.getType()->isVoidTy())
1104 EnumerateValue(&I);
1105 }
1106 }
1107
1108 // Add all of the function-local metadata.
1109 for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i) {
1110 // At this point, every local values have been incorporated, we shouldn't
1111 // have a metadata operand that references a value that hasn't been seen.
1112 assert(ValueMap.count(FnLocalMDVector[i]->getValue()) &&
1113 "Missing value for metadata operand");
1114 EnumerateFunctionLocalMetadata(F, FnLocalMDVector[i]);
1115 }
1116 // DIArgList entries must come after function-local metadata, as it is not
1117 // possible to forward-reference them.
1118 for (const DIArgList *ArgList : ArgListMDVector)
1119 EnumerateFunctionLocalListMetadata(F, ArgList);
1120 }
1121
purgeFunction()1122 void ValueEnumerator::purgeFunction() {
1123 /// Remove purged values from the ValueMap.
1124 for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
1125 ValueMap.erase(Values[i].first);
1126 for (unsigned i = NumModuleMDs, e = MDs.size(); i != e; ++i)
1127 MetadataMap.erase(MDs[i]);
1128 for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
1129 ValueMap.erase(BasicBlocks[i]);
1130
1131 Values.resize(NumModuleValues);
1132 MDs.resize(NumModuleMDs);
1133 BasicBlocks.clear();
1134 NumMDStrings = 0;
1135 }
1136
IncorporateFunctionInfoGlobalBBIDs(const Function * F,DenseMap<const BasicBlock *,unsigned> & IDMap)1137 static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
1138 DenseMap<const BasicBlock*, unsigned> &IDMap) {
1139 unsigned Counter = 0;
1140 for (const BasicBlock &BB : *F)
1141 IDMap[&BB] = ++Counter;
1142 }
1143
1144 /// getGlobalBasicBlockID - This returns the function-specific ID for the
1145 /// specified basic block. This is relatively expensive information, so it
1146 /// should only be used by rare constructs such as address-of-label.
getGlobalBasicBlockID(const BasicBlock * BB) const1147 unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {
1148 unsigned &Idx = GlobalBasicBlockIDs[BB];
1149 if (Idx != 0)
1150 return Idx-1;
1151
1152 IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
1153 return getGlobalBasicBlockID(BB);
1154 }
1155
computeBitsRequiredForTypeIndicies() const1156 uint64_t ValueEnumerator::computeBitsRequiredForTypeIndicies() const {
1157 return Log2_32_Ceil(getTypes().size() + 1);
1158 }
1159