xref: /llvm-project/llvm/tools/verify-uselistorder/verify-uselistorder.cpp (revision acd7a688fcd26ce4d72cecbddeddef788482e17e)
1 //===- verify-uselistorder.cpp - The LLVM Modular Optimizer ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Verify that use-list order can be serialized correctly.  After reading the
10 // provided IR, this tool shuffles the use-lists and then writes and reads to a
11 // separate Module whose use-list orders are compared to the original.
12 //
13 // The shuffles are deterministic, but guarantee that use-lists will change.
14 // The algorithm per iteration is as follows:
15 //
16 //  1. Seed the random number generator.  The seed is different for each
17 //     shuffle.  Shuffle 0 uses default+0, shuffle 1 uses default+1, and so on.
18 //
19 //  2. Visit every Value in a deterministic order.
20 //
21 //  3. Assign a random number to each Use in the Value's use-list in order.
22 //
23 //  4. If the numbers are already in order, reassign numbers until they aren't.
24 //
25 //  5. Sort the use-list using Value::sortUseList(), which is a stable sort.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/DenseSet.h"
31 #include "llvm/AsmParser/Parser.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/Bitcode/BitcodeWriter.h"
34 #include "llvm/IR/LLVMContext.h"
35 #include "llvm/IR/Module.h"
36 #include "llvm/IR/UseListOrder.h"
37 #include "llvm/IR/Verifier.h"
38 #include "llvm/IRReader/IRReader.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/FileSystem.h"
43 #include "llvm/Support/FileUtilities.h"
44 #include "llvm/Support/InitLLVM.h"
45 #include "llvm/Support/MemoryBuffer.h"
46 #include "llvm/Support/SourceMgr.h"
47 #include "llvm/Support/SystemUtils.h"
48 #include "llvm/Support/raw_ostream.h"
49 #include <random>
50 #include <vector>
51 
52 using namespace llvm;
53 
54 #define DEBUG_TYPE "uselistorder"
55 
56 static cl::OptionCategory Cat("verify-uselistorder Options");
57 
58 static cl::opt<std::string> InputFilename(cl::Positional,
59                                           cl::desc("<input bitcode file>"),
60                                           cl::init("-"),
61                                           cl::value_desc("filename"));
62 
63 static cl::opt<bool> SaveTemps("save-temps", cl::desc("Save temp files"),
64                                cl::cat(Cat));
65 
66 static cl::opt<unsigned>
67     NumShuffles("num-shuffles",
68                 cl::desc("Number of times to shuffle and verify use-lists"),
69                 cl::init(1), cl::cat(Cat));
70 
71 extern cl::opt<cl::boolOrDefault> PreserveInputDbgFormat;
72 
73 namespace {
74 
75 struct TempFile {
76   std::string Filename;
77   FileRemover Remover;
78   bool init(const std::string &Ext);
79   bool writeBitcode(const Module &M) const;
80   bool writeAssembly(const Module &M) const;
81   std::unique_ptr<Module> readBitcode(LLVMContext &Context) const;
82   std::unique_ptr<Module> readAssembly(LLVMContext &Context) const;
83 };
84 
85 struct ValueMapping {
86   DenseMap<const Value *, unsigned> IDs;
87   std::vector<const Value *> Values;
88 
89   /// Construct a value mapping for module.
90   ///
91   /// Creates mapping from every value in \c M to an ID.  This mapping includes
92   /// un-referencable values.
93   ///
94   /// Every \a Value that gets serialized in some way should be represented
95   /// here.  The order needs to be deterministic, but it's unnecessary to match
96   /// the value-ids in the bitcode writer.
97   ///
98   /// All constants that are referenced by other values are included in the
99   /// mapping, but others -- which wouldn't be serialized -- are not.
100   ValueMapping(const Module &M);
101 
102   /// Map a value.
103   ///
104   /// Maps a value.  If it's a constant, maps all of its operands first.
105   void map(const Value *V);
lookup__anon3353bbfa0111::ValueMapping106   unsigned lookup(const Value *V) const { return IDs.lookup(V); }
107 };
108 
109 } // end namespace
110 
init(const std::string & Ext)111 bool TempFile::init(const std::string &Ext) {
112   SmallVector<char, 64> Vector;
113   LLVM_DEBUG(dbgs() << " - create-temp-file\n");
114   if (auto EC = sys::fs::createTemporaryFile("uselistorder", Ext, Vector)) {
115     errs() << "verify-uselistorder: error: " << EC.message() << "\n";
116     return true;
117   }
118   assert(!Vector.empty());
119 
120   Filename.assign(Vector.data(), Vector.data() + Vector.size());
121   Remover.setFile(Filename, !SaveTemps);
122   if (SaveTemps)
123     outs() << " - filename = " << Filename << "\n";
124   return false;
125 }
126 
writeBitcode(const Module & M) const127 bool TempFile::writeBitcode(const Module &M) const {
128   LLVM_DEBUG(dbgs() << " - write bitcode\n");
129   std::error_code EC;
130   raw_fd_ostream OS(Filename, EC, sys::fs::OF_None);
131   if (EC) {
132     errs() << "verify-uselistorder: error: " << EC.message() << "\n";
133     return true;
134   }
135 
136   WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true);
137   return false;
138 }
139 
writeAssembly(const Module & M) const140 bool TempFile::writeAssembly(const Module &M) const {
141   LLVM_DEBUG(dbgs() << " - write assembly\n");
142   std::error_code EC;
143   raw_fd_ostream OS(Filename, EC, sys::fs::OF_TextWithCRLF);
144   if (EC) {
145     errs() << "verify-uselistorder: error: " << EC.message() << "\n";
146     return true;
147   }
148 
149   M.print(OS, nullptr, /* ShouldPreserveUseListOrder */ true);
150   return false;
151 }
152 
readBitcode(LLVMContext & Context) const153 std::unique_ptr<Module> TempFile::readBitcode(LLVMContext &Context) const {
154   LLVM_DEBUG(dbgs() << " - read bitcode\n");
155   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOr =
156       MemoryBuffer::getFile(Filename);
157   if (!BufferOr) {
158     errs() << "verify-uselistorder: error: " << BufferOr.getError().message()
159            << "\n";
160     return nullptr;
161   }
162 
163   MemoryBuffer *Buffer = BufferOr.get().get();
164   Expected<std::unique_ptr<Module>> ModuleOr =
165       parseBitcodeFile(Buffer->getMemBufferRef(), Context);
166   if (!ModuleOr) {
167     logAllUnhandledErrors(ModuleOr.takeError(), errs(),
168                           "verify-uselistorder: error: ");
169     return nullptr;
170   }
171 
172   return std::move(ModuleOr.get());
173 }
174 
readAssembly(LLVMContext & Context) const175 std::unique_ptr<Module> TempFile::readAssembly(LLVMContext &Context) const {
176   LLVM_DEBUG(dbgs() << " - read assembly\n");
177   SMDiagnostic Err;
178   std::unique_ptr<Module> M = parseAssemblyFile(Filename, Err, Context);
179   if (!M)
180     Err.print("verify-uselistorder", errs());
181   return M;
182 }
183 
ValueMapping(const Module & M)184 ValueMapping::ValueMapping(const Module &M) {
185   // Every value should be mapped, including things like void instructions and
186   // basic blocks that are kept out of the ValueEnumerator.
187   //
188   // The current mapping order makes it easier to debug the tables.  It happens
189   // to be similar to the ID mapping when writing ValueEnumerator, but they
190   // aren't (and needn't be) in sync.
191 
192   // Globals.
193   for (const GlobalVariable &G : M.globals())
194     map(&G);
195   for (const GlobalAlias &A : M.aliases())
196     map(&A);
197   for (const GlobalIFunc &IF : M.ifuncs())
198     map(&IF);
199   for (const Function &F : M)
200     map(&F);
201 
202   // Constants used by globals.
203   for (const GlobalVariable &G : M.globals())
204     if (G.hasInitializer())
205       map(G.getInitializer());
206   for (const GlobalAlias &A : M.aliases())
207     map(A.getAliasee());
208   for (const GlobalIFunc &IF : M.ifuncs())
209     map(IF.getResolver());
210   for (const Function &F : M)
211     for (Value *Op : F.operands())
212       map(Op);
213 
214   // Function bodies.
215   for (const Function &F : M) {
216     for (const Argument &A : F.args())
217       map(&A);
218     for (const BasicBlock &BB : F)
219       map(&BB);
220     for (const BasicBlock &BB : F)
221       for (const Instruction &I : BB)
222         map(&I);
223 
224     // Constants used by instructions.
225     for (const BasicBlock &BB : F) {
226       for (const Instruction &I : BB) {
227         for (const DbgVariableRecord &DVR :
228              filterDbgVars(I.getDbgRecordRange())) {
229           for (Value *Op : DVR.location_ops())
230             map(Op);
231           if (DVR.isDbgAssign())
232             map(DVR.getAddress());
233         }
234         for (const Value *Op : I.operands()) {
235           // Look through a metadata wrapper.
236           if (const auto *MAV = dyn_cast<MetadataAsValue>(Op))
237             if (const auto *VAM = dyn_cast<ValueAsMetadata>(MAV->getMetadata()))
238               Op = VAM->getValue();
239 
240           if ((isa<Constant>(Op) && !isa<GlobalValue>(*Op)) ||
241               isa<InlineAsm>(Op))
242             map(Op);
243         }
244       }
245     }
246   }
247 }
248 
map(const Value * V)249 void ValueMapping::map(const Value *V) {
250   if (IDs.lookup(V))
251     return;
252 
253   if (auto *C = dyn_cast<Constant>(V))
254     if (!isa<GlobalValue>(C))
255       for (const Value *Op : C->operands())
256         map(Op);
257 
258   Values.push_back(V);
259   IDs[V] = Values.size();
260 }
261 
262 #ifndef NDEBUG
dumpMapping(const ValueMapping & VM)263 static void dumpMapping(const ValueMapping &VM) {
264   dbgs() << "value-mapping (size = " << VM.Values.size() << "):\n";
265   for (unsigned I = 0, E = VM.Values.size(); I != E; ++I) {
266     dbgs() << " - id = " << I << ", value = ";
267     VM.Values[I]->dump();
268   }
269 }
270 
debugValue(const ValueMapping & M,unsigned I,StringRef Desc)271 static void debugValue(const ValueMapping &M, unsigned I, StringRef Desc) {
272   const Value *V = M.Values[I];
273   dbgs() << " - " << Desc << " value = ";
274   V->dump();
275   for (const Use &U : V->uses()) {
276     dbgs() << "   => use: op = " << U.getOperandNo()
277            << ", user-id = " << M.IDs.lookup(U.getUser()) << ", user = ";
278     U.getUser()->dump();
279   }
280 }
281 
debugUserMismatch(const ValueMapping & L,const ValueMapping & R,unsigned I)282 static void debugUserMismatch(const ValueMapping &L, const ValueMapping &R,
283                               unsigned I) {
284   dbgs() << " - fail: user mismatch: ID = " << I << "\n";
285   debugValue(L, I, "LHS");
286   debugValue(R, I, "RHS");
287 
288   dbgs() << "\nlhs-";
289   dumpMapping(L);
290   dbgs() << "\nrhs-";
291   dumpMapping(R);
292 }
293 
debugSizeMismatch(const ValueMapping & L,const ValueMapping & R)294 static void debugSizeMismatch(const ValueMapping &L, const ValueMapping &R) {
295   dbgs() << " - fail: map size: " << L.Values.size()
296          << " != " << R.Values.size() << "\n";
297   dbgs() << "\nlhs-";
298   dumpMapping(L);
299   dbgs() << "\nrhs-";
300   dumpMapping(R);
301 }
302 #endif
303 
matches(const ValueMapping & LM,const ValueMapping & RM)304 static bool matches(const ValueMapping &LM, const ValueMapping &RM) {
305   LLVM_DEBUG(dbgs() << "compare value maps\n");
306   if (LM.Values.size() != RM.Values.size()) {
307     LLVM_DEBUG(debugSizeMismatch(LM, RM));
308     return false;
309   }
310 
311   // This mapping doesn't include dangling constant users, since those don't
312   // get serialized.  However, checking if users are constant and calling
313   // isConstantUsed() on every one is very expensive.  Instead, just check if
314   // the user is mapped.
315   auto skipUnmappedUsers =
316       [&](Value::const_use_iterator &U, Value::const_use_iterator E,
317           const ValueMapping &M) {
318     while (U != E && !M.lookup(U->getUser()))
319       ++U;
320   };
321 
322   // Iterate through all values, and check that both mappings have the same
323   // users.
324   for (unsigned I = 0, E = LM.Values.size(); I != E; ++I) {
325     const Value *L = LM.Values[I];
326     const Value *R = RM.Values[I];
327     auto LU = L->use_begin(), LE = L->use_end();
328     auto RU = R->use_begin(), RE = R->use_end();
329     skipUnmappedUsers(LU, LE, LM);
330     skipUnmappedUsers(RU, RE, RM);
331 
332     while (LU != LE) {
333       if (RU == RE) {
334         LLVM_DEBUG(debugUserMismatch(LM, RM, I));
335         return false;
336       }
337       if (LM.lookup(LU->getUser()) != RM.lookup(RU->getUser())) {
338         LLVM_DEBUG(debugUserMismatch(LM, RM, I));
339         return false;
340       }
341       if (LU->getOperandNo() != RU->getOperandNo()) {
342         LLVM_DEBUG(debugUserMismatch(LM, RM, I));
343         return false;
344       }
345       skipUnmappedUsers(++LU, LE, LM);
346       skipUnmappedUsers(++RU, RE, RM);
347     }
348     if (RU != RE) {
349       LLVM_DEBUG(debugUserMismatch(LM, RM, I));
350       return false;
351     }
352   }
353 
354   return true;
355 }
356 
verifyAfterRoundTrip(const Module & M,std::unique_ptr<Module> OtherM)357 static void verifyAfterRoundTrip(const Module &M,
358                                  std::unique_ptr<Module> OtherM) {
359   if (!OtherM)
360     report_fatal_error("parsing failed");
361   if (verifyModule(*OtherM, &errs()))
362     report_fatal_error("verification failed");
363   if (!matches(ValueMapping(M), ValueMapping(*OtherM)))
364     report_fatal_error("use-list order changed");
365 }
366 
verifyBitcodeUseListOrder(const Module & M)367 static void verifyBitcodeUseListOrder(const Module &M) {
368   TempFile F;
369   if (F.init("bc"))
370     report_fatal_error("failed to initialize bitcode file");
371 
372   if (F.writeBitcode(M))
373     report_fatal_error("failed to write bitcode");
374 
375   LLVMContext Context;
376   verifyAfterRoundTrip(M, F.readBitcode(Context));
377 }
378 
verifyAssemblyUseListOrder(const Module & M)379 static void verifyAssemblyUseListOrder(const Module &M) {
380   TempFile F;
381   if (F.init("ll"))
382     report_fatal_error("failed to initialize assembly file");
383 
384   if (F.writeAssembly(M))
385     report_fatal_error("failed to write assembly");
386 
387   LLVMContext Context;
388   verifyAfterRoundTrip(M, F.readAssembly(Context));
389 }
390 
verifyUseListOrder(const Module & M)391 static void verifyUseListOrder(const Module &M) {
392   outs() << "verify bitcode\n";
393   verifyBitcodeUseListOrder(M);
394   outs() << "verify assembly\n";
395   verifyAssemblyUseListOrder(M);
396 }
397 
shuffleValueUseLists(Value * V,std::minstd_rand0 & Gen,DenseSet<Value * > & Seen)398 static void shuffleValueUseLists(Value *V, std::minstd_rand0 &Gen,
399                                  DenseSet<Value *> &Seen) {
400   if (!Seen.insert(V).second)
401     return;
402 
403   if (auto *C = dyn_cast<Constant>(V))
404     if (!isa<GlobalValue>(C))
405       for (Value *Op : C->operands())
406         shuffleValueUseLists(Op, Gen, Seen);
407 
408   if (V->use_empty() || std::next(V->use_begin()) == V->use_end())
409     // Nothing to shuffle for 0 or 1 users.
410     return;
411 
412   // Generate random numbers between 10 and 99, which will line up nicely in
413   // debug output.  We're not worried about collisions here.
414   LLVM_DEBUG(dbgs() << "V = "; V->dump());
415   std::uniform_int_distribution<short> Dist(10, 99);
416   SmallDenseMap<const Use *, short, 16> Order;
417   auto compareUses =
418       [&Order](const Use &L, const Use &R) { return Order[&L] < Order[&R]; };
419   do {
420     for (const Use &U : V->uses()) {
421       auto I = Dist(Gen);
422       Order[&U] = I;
423       LLVM_DEBUG(dbgs() << " - order: " << I << ", op = " << U.getOperandNo()
424                         << ", U = ";
425                  U.getUser()->dump());
426     }
427   } while (std::is_sorted(V->use_begin(), V->use_end(), compareUses));
428 
429   LLVM_DEBUG(dbgs() << " => shuffle\n");
430   V->sortUseList(compareUses);
431 
432   LLVM_DEBUG({
433     for (const Use &U : V->uses()) {
434       dbgs() << " - order: " << Order.lookup(&U)
435              << ", op = " << U.getOperandNo() << ", U = ";
436       U.getUser()->dump();
437     }
438   });
439 }
440 
reverseValueUseLists(Value * V,DenseSet<Value * > & Seen)441 static void reverseValueUseLists(Value *V, DenseSet<Value *> &Seen) {
442   if (!Seen.insert(V).second)
443     return;
444 
445   if (auto *C = dyn_cast<Constant>(V))
446     if (!isa<GlobalValue>(C))
447       for (Value *Op : C->operands())
448         reverseValueUseLists(Op, Seen);
449 
450   if (V->use_empty() || std::next(V->use_begin()) == V->use_end())
451     // Nothing to shuffle for 0 or 1 users.
452     return;
453 
454   LLVM_DEBUG({
455     dbgs() << "V = ";
456     V->dump();
457     for (const Use &U : V->uses()) {
458       dbgs() << " - order: op = " << U.getOperandNo() << ", U = ";
459       U.getUser()->dump();
460     }
461     dbgs() << " => reverse\n";
462   });
463 
464   V->reverseUseList();
465 
466   LLVM_DEBUG({
467     for (const Use &U : V->uses()) {
468       dbgs() << " - order: op = " << U.getOperandNo() << ", U = ";
469       U.getUser()->dump();
470     }
471   });
472 }
473 
474 template <class Changer>
changeUseLists(Module & M,Changer changeValueUseList)475 static void changeUseLists(Module &M, Changer changeValueUseList) {
476   // Visit every value that would be serialized to an IR file.
477   //
478   // Globals.
479   for (GlobalVariable &G : M.globals())
480     changeValueUseList(&G);
481   for (GlobalAlias &A : M.aliases())
482     changeValueUseList(&A);
483   for (GlobalIFunc &IF : M.ifuncs())
484     changeValueUseList(&IF);
485   for (Function &F : M)
486     changeValueUseList(&F);
487 
488   // Constants used by globals.
489   for (GlobalVariable &G : M.globals())
490     if (G.hasInitializer())
491       changeValueUseList(G.getInitializer());
492   for (GlobalAlias &A : M.aliases())
493     changeValueUseList(A.getAliasee());
494   for (GlobalIFunc &IF : M.ifuncs())
495     changeValueUseList(IF.getResolver());
496   for (Function &F : M)
497     for (Value *Op : F.operands())
498       changeValueUseList(Op);
499 
500   // Function bodies.
501   for (Function &F : M) {
502     for (Argument &A : F.args())
503       changeValueUseList(&A);
504     for (BasicBlock &BB : F)
505       changeValueUseList(&BB);
506     for (BasicBlock &BB : F)
507       for (Instruction &I : BB)
508         changeValueUseList(&I);
509 
510     // Constants used by instructions.
511     for (BasicBlock &BB : F)
512       for (Instruction &I : BB)
513         for (Value *Op : I.operands()) {
514           // Look through a metadata wrapper.
515           if (auto *MAV = dyn_cast<MetadataAsValue>(Op))
516             if (auto *VAM = dyn_cast<ValueAsMetadata>(MAV->getMetadata()))
517               Op = VAM->getValue();
518           if ((isa<Constant>(Op) && !isa<GlobalValue>(*Op)) ||
519               isa<InlineAsm>(Op))
520             changeValueUseList(Op);
521         }
522   }
523 
524   if (verifyModule(M, &errs()))
525     report_fatal_error("verification failed");
526 }
527 
shuffleUseLists(Module & M,unsigned SeedOffset)528 static void shuffleUseLists(Module &M, unsigned SeedOffset) {
529   std::minstd_rand0 Gen(std::minstd_rand0::default_seed + SeedOffset);
530   DenseSet<Value *> Seen;
531   changeUseLists(M, [&](Value *V) { shuffleValueUseLists(V, Gen, Seen); });
532   LLVM_DEBUG(dbgs() << "\n");
533 }
534 
reverseUseLists(Module & M)535 static void reverseUseLists(Module &M) {
536   DenseSet<Value *> Seen;
537   changeUseLists(M, [&](Value *V) { reverseValueUseLists(V, Seen); });
538   LLVM_DEBUG(dbgs() << "\n");
539 }
540 
main(int argc,char ** argv)541 int main(int argc, char **argv) {
542   PreserveInputDbgFormat = cl::boolOrDefault::BOU_TRUE;
543   InitLLVM X(argc, argv);
544 
545   // Enable debug stream buffering.
546   EnableDebugBuffering = true;
547 
548   cl::HideUnrelatedOptions(Cat);
549   cl::ParseCommandLineOptions(argc, argv,
550                               "llvm tool to verify use-list order\n");
551 
552   LLVMContext Context;
553   SMDiagnostic Err;
554 
555   // Load the input module...
556   std::unique_ptr<Module> M = parseIRFile(InputFilename, Err, Context);
557 
558   if (!M) {
559     Err.print(argv[0], errs());
560     return 1;
561   }
562   if (verifyModule(*M, &errs())) {
563     errs() << argv[0] << ": " << InputFilename
564            << ": error: input module is broken!\n";
565     return 1;
566   }
567 
568   // Verify the use lists now and after reversing them.
569   outs() << "*** verify-uselistorder ***\n";
570   verifyUseListOrder(*M);
571   outs() << "reverse\n";
572   reverseUseLists(*M);
573   verifyUseListOrder(*M);
574 
575   for (unsigned I = 0, E = NumShuffles; I != E; ++I) {
576     outs() << "\n";
577 
578     // Shuffle with a different (deterministic) seed each time.
579     outs() << "shuffle (" << I + 1 << " of " << E << ")\n";
580     shuffleUseLists(*M, I);
581 
582     // Verify again before and after reversing.
583     verifyUseListOrder(*M);
584     outs() << "reverse\n";
585     reverseUseLists(*M);
586     verifyUseListOrder(*M);
587   }
588 
589   return 0;
590 }
591