xref: /netbsd-src/external/apache2/llvm/dist/llvm/tools/verify-uselistorder/verify-uselistorder.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===- verify-uselistorder.cpp - The LLVM Modular Optimizer ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Verify that use-list order can be serialized correctly.  After reading the
10 // provided IR, this tool shuffles the use-lists and then writes and reads to a
11 // separate Module whose use-list orders are compared to the original.
12 //
13 // The shuffles are deterministic, but guarantee that use-lists will change.
14 // The algorithm per iteration is as follows:
15 //
16 //  1. Seed the random number generator.  The seed is different for each
17 //     shuffle.  Shuffle 0 uses default+0, shuffle 1 uses default+1, and so on.
18 //
19 //  2. Visit every Value in a deterministic order.
20 //
21 //  3. Assign a random number to each Use in the Value's use-list in order.
22 //
23 //  4. If the numbers are already in order, reassign numbers until they aren't.
24 //
25 //  5. Sort the use-list using Value::sortUseList(), which is a stable sort.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/DenseSet.h"
31 #include "llvm/AsmParser/Parser.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/Bitcode/BitcodeWriter.h"
34 #include "llvm/IR/LLVMContext.h"
35 #include "llvm/IR/Module.h"
36 #include "llvm/IR/UseListOrder.h"
37 #include "llvm/IR/Verifier.h"
38 #include "llvm/IRReader/IRReader.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/FileSystem.h"
43 #include "llvm/Support/FileUtilities.h"
44 #include "llvm/Support/InitLLVM.h"
45 #include "llvm/Support/MemoryBuffer.h"
46 #include "llvm/Support/SourceMgr.h"
47 #include "llvm/Support/SystemUtils.h"
48 #include "llvm/Support/raw_ostream.h"
49 #include <random>
50 #include <vector>
51 
52 using namespace llvm;
53 
54 #define DEBUG_TYPE "uselistorder"
55 
56 static cl::opt<std::string> InputFilename(cl::Positional,
57                                           cl::desc("<input bitcode file>"),
58                                           cl::init("-"),
59                                           cl::value_desc("filename"));
60 
61 static cl::opt<bool> SaveTemps("save-temps", cl::desc("Save temp files"),
62                                cl::init(false));
63 
64 static cl::opt<unsigned>
65     NumShuffles("num-shuffles",
66                 cl::desc("Number of times to shuffle and verify use-lists"),
67                 cl::init(1));
68 
69 namespace {
70 
71 struct TempFile {
72   std::string Filename;
73   FileRemover Remover;
74   bool init(const std::string &Ext);
75   bool writeBitcode(const Module &M) const;
76   bool writeAssembly(const Module &M) const;
77   std::unique_ptr<Module> readBitcode(LLVMContext &Context) const;
78   std::unique_ptr<Module> readAssembly(LLVMContext &Context) const;
79 };
80 
81 struct ValueMapping {
82   DenseMap<const Value *, unsigned> IDs;
83   std::vector<const Value *> Values;
84 
85   /// Construct a value mapping for module.
86   ///
87   /// Creates mapping from every value in \c M to an ID.  This mapping includes
88   /// un-referencable values.
89   ///
90   /// Every \a Value that gets serialized in some way should be represented
91   /// here.  The order needs to be deterministic, but it's unnecessary to match
92   /// the value-ids in the bitcode writer.
93   ///
94   /// All constants that are referenced by other values are included in the
95   /// mapping, but others -- which wouldn't be serialized -- are not.
96   ValueMapping(const Module &M);
97 
98   /// Map a value.
99   ///
100   /// Maps a value.  If it's a constant, maps all of its operands first.
101   void map(const Value *V);
lookup__anon112481120111::ValueMapping102   unsigned lookup(const Value *V) const { return IDs.lookup(V); }
103 };
104 
105 } // end namespace
106 
init(const std::string & Ext)107 bool TempFile::init(const std::string &Ext) {
108   SmallVector<char, 64> Vector;
109   LLVM_DEBUG(dbgs() << " - create-temp-file\n");
110   if (auto EC = sys::fs::createTemporaryFile("uselistorder", Ext, Vector)) {
111     errs() << "verify-uselistorder: error: " << EC.message() << "\n";
112     return true;
113   }
114   assert(!Vector.empty());
115 
116   Filename.assign(Vector.data(), Vector.data() + Vector.size());
117   Remover.setFile(Filename, !SaveTemps);
118   if (SaveTemps)
119     outs() << " - filename = " << Filename << "\n";
120   return false;
121 }
122 
writeBitcode(const Module & M) const123 bool TempFile::writeBitcode(const Module &M) const {
124   LLVM_DEBUG(dbgs() << " - write bitcode\n");
125   std::error_code EC;
126   raw_fd_ostream OS(Filename, EC, sys::fs::OF_None);
127   if (EC) {
128     errs() << "verify-uselistorder: error: " << EC.message() << "\n";
129     return true;
130   }
131 
132   WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true);
133   return false;
134 }
135 
writeAssembly(const Module & M) const136 bool TempFile::writeAssembly(const Module &M) const {
137   LLVM_DEBUG(dbgs() << " - write assembly\n");
138   std::error_code EC;
139   raw_fd_ostream OS(Filename, EC, sys::fs::OF_TextWithCRLF);
140   if (EC) {
141     errs() << "verify-uselistorder: error: " << EC.message() << "\n";
142     return true;
143   }
144 
145   M.print(OS, nullptr, /* ShouldPreserveUseListOrder */ true);
146   return false;
147 }
148 
readBitcode(LLVMContext & Context) const149 std::unique_ptr<Module> TempFile::readBitcode(LLVMContext &Context) const {
150   LLVM_DEBUG(dbgs() << " - read bitcode\n");
151   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOr =
152       MemoryBuffer::getFile(Filename);
153   if (!BufferOr) {
154     errs() << "verify-uselistorder: error: " << BufferOr.getError().message()
155            << "\n";
156     return nullptr;
157   }
158 
159   MemoryBuffer *Buffer = BufferOr.get().get();
160   Expected<std::unique_ptr<Module>> ModuleOr =
161       parseBitcodeFile(Buffer->getMemBufferRef(), Context);
162   if (!ModuleOr) {
163     logAllUnhandledErrors(ModuleOr.takeError(), errs(),
164                           "verify-uselistorder: error: ");
165     return nullptr;
166   }
167   return std::move(ModuleOr.get());
168 }
169 
readAssembly(LLVMContext & Context) const170 std::unique_ptr<Module> TempFile::readAssembly(LLVMContext &Context) const {
171   LLVM_DEBUG(dbgs() << " - read assembly\n");
172   SMDiagnostic Err;
173   std::unique_ptr<Module> M = parseAssemblyFile(Filename, Err, Context);
174   if (!M.get())
175     Err.print("verify-uselistorder", errs());
176   return M;
177 }
178 
ValueMapping(const Module & M)179 ValueMapping::ValueMapping(const Module &M) {
180   // Every value should be mapped, including things like void instructions and
181   // basic blocks that are kept out of the ValueEnumerator.
182   //
183   // The current mapping order makes it easier to debug the tables.  It happens
184   // to be similar to the ID mapping when writing ValueEnumerator, but they
185   // aren't (and needn't be) in sync.
186 
187   // Globals.
188   for (const GlobalVariable &G : M.globals())
189     map(&G);
190   for (const GlobalAlias &A : M.aliases())
191     map(&A);
192   for (const GlobalIFunc &IF : M.ifuncs())
193     map(&IF);
194   for (const Function &F : M)
195     map(&F);
196 
197   // Constants used by globals.
198   for (const GlobalVariable &G : M.globals())
199     if (G.hasInitializer())
200       map(G.getInitializer());
201   for (const GlobalAlias &A : M.aliases())
202     map(A.getAliasee());
203   for (const GlobalIFunc &IF : M.ifuncs())
204     map(IF.getResolver());
205   for (const Function &F : M) {
206     if (F.hasPrefixData())
207       map(F.getPrefixData());
208     if (F.hasPrologueData())
209       map(F.getPrologueData());
210     if (F.hasPersonalityFn())
211       map(F.getPersonalityFn());
212   }
213 
214   // Function bodies.
215   for (const Function &F : M) {
216     for (const Argument &A : F.args())
217       map(&A);
218     for (const BasicBlock &BB : F)
219       map(&BB);
220     for (const BasicBlock &BB : F)
221       for (const Instruction &I : BB)
222         map(&I);
223 
224     // Constants used by instructions.
225     for (const BasicBlock &BB : F)
226       for (const Instruction &I : BB)
227         for (const Value *Op : I.operands()) {
228           // Look through a metadata wrapper.
229           if (const auto *MAV = dyn_cast<MetadataAsValue>(Op))
230             if (const auto *VAM = dyn_cast<ValueAsMetadata>(MAV->getMetadata()))
231               Op = VAM->getValue();
232 
233           if ((isa<Constant>(Op) && !isa<GlobalValue>(*Op)) ||
234               isa<InlineAsm>(Op))
235             map(Op);
236         }
237   }
238 }
239 
map(const Value * V)240 void ValueMapping::map(const Value *V) {
241   if (IDs.lookup(V))
242     return;
243 
244   if (auto *C = dyn_cast<Constant>(V))
245     if (!isa<GlobalValue>(C))
246       for (const Value *Op : C->operands())
247         map(Op);
248 
249   Values.push_back(V);
250   IDs[V] = Values.size();
251 }
252 
253 #ifndef NDEBUG
dumpMapping(const ValueMapping & VM)254 static void dumpMapping(const ValueMapping &VM) {
255   dbgs() << "value-mapping (size = " << VM.Values.size() << "):\n";
256   for (unsigned I = 0, E = VM.Values.size(); I != E; ++I) {
257     dbgs() << " - id = " << I << ", value = ";
258     VM.Values[I]->dump();
259   }
260 }
261 
debugValue(const ValueMapping & M,unsigned I,StringRef Desc)262 static void debugValue(const ValueMapping &M, unsigned I, StringRef Desc) {
263   const Value *V = M.Values[I];
264   dbgs() << " - " << Desc << " value = ";
265   V->dump();
266   for (const Use &U : V->uses()) {
267     dbgs() << "   => use: op = " << U.getOperandNo()
268            << ", user-id = " << M.IDs.lookup(U.getUser()) << ", user = ";
269     U.getUser()->dump();
270   }
271 }
272 
debugUserMismatch(const ValueMapping & L,const ValueMapping & R,unsigned I)273 static void debugUserMismatch(const ValueMapping &L, const ValueMapping &R,
274                               unsigned I) {
275   dbgs() << " - fail: user mismatch: ID = " << I << "\n";
276   debugValue(L, I, "LHS");
277   debugValue(R, I, "RHS");
278 
279   dbgs() << "\nlhs-";
280   dumpMapping(L);
281   dbgs() << "\nrhs-";
282   dumpMapping(R);
283 }
284 
debugSizeMismatch(const ValueMapping & L,const ValueMapping & R)285 static void debugSizeMismatch(const ValueMapping &L, const ValueMapping &R) {
286   dbgs() << " - fail: map size: " << L.Values.size()
287          << " != " << R.Values.size() << "\n";
288   dbgs() << "\nlhs-";
289   dumpMapping(L);
290   dbgs() << "\nrhs-";
291   dumpMapping(R);
292 }
293 #endif
294 
matches(const ValueMapping & LM,const ValueMapping & RM)295 static bool matches(const ValueMapping &LM, const ValueMapping &RM) {
296   LLVM_DEBUG(dbgs() << "compare value maps\n");
297   if (LM.Values.size() != RM.Values.size()) {
298     LLVM_DEBUG(debugSizeMismatch(LM, RM));
299     return false;
300   }
301 
302   // This mapping doesn't include dangling constant users, since those don't
303   // get serialized.  However, checking if users are constant and calling
304   // isConstantUsed() on every one is very expensive.  Instead, just check if
305   // the user is mapped.
306   auto skipUnmappedUsers =
307       [&](Value::const_use_iterator &U, Value::const_use_iterator E,
308           const ValueMapping &M) {
309     while (U != E && !M.lookup(U->getUser()))
310       ++U;
311   };
312 
313   // Iterate through all values, and check that both mappings have the same
314   // users.
315   for (unsigned I = 0, E = LM.Values.size(); I != E; ++I) {
316     const Value *L = LM.Values[I];
317     const Value *R = RM.Values[I];
318     auto LU = L->use_begin(), LE = L->use_end();
319     auto RU = R->use_begin(), RE = R->use_end();
320     skipUnmappedUsers(LU, LE, LM);
321     skipUnmappedUsers(RU, RE, RM);
322 
323     while (LU != LE) {
324       if (RU == RE) {
325         LLVM_DEBUG(debugUserMismatch(LM, RM, I));
326         return false;
327       }
328       if (LM.lookup(LU->getUser()) != RM.lookup(RU->getUser())) {
329         LLVM_DEBUG(debugUserMismatch(LM, RM, I));
330         return false;
331       }
332       if (LU->getOperandNo() != RU->getOperandNo()) {
333         LLVM_DEBUG(debugUserMismatch(LM, RM, I));
334         return false;
335       }
336       skipUnmappedUsers(++LU, LE, LM);
337       skipUnmappedUsers(++RU, RE, RM);
338     }
339     if (RU != RE) {
340       LLVM_DEBUG(debugUserMismatch(LM, RM, I));
341       return false;
342     }
343   }
344 
345   return true;
346 }
347 
verifyAfterRoundTrip(const Module & M,std::unique_ptr<Module> OtherM)348 static void verifyAfterRoundTrip(const Module &M,
349                                  std::unique_ptr<Module> OtherM) {
350   if (!OtherM)
351     report_fatal_error("parsing failed");
352   if (verifyModule(*OtherM, &errs()))
353     report_fatal_error("verification failed");
354   if (!matches(ValueMapping(M), ValueMapping(*OtherM)))
355     report_fatal_error("use-list order changed");
356 }
357 
verifyBitcodeUseListOrder(const Module & M)358 static void verifyBitcodeUseListOrder(const Module &M) {
359   TempFile F;
360   if (F.init("bc"))
361     report_fatal_error("failed to initialize bitcode file");
362 
363   if (F.writeBitcode(M))
364     report_fatal_error("failed to write bitcode");
365 
366   LLVMContext Context;
367   verifyAfterRoundTrip(M, F.readBitcode(Context));
368 }
369 
verifyAssemblyUseListOrder(const Module & M)370 static void verifyAssemblyUseListOrder(const Module &M) {
371   TempFile F;
372   if (F.init("ll"))
373     report_fatal_error("failed to initialize assembly file");
374 
375   if (F.writeAssembly(M))
376     report_fatal_error("failed to write assembly");
377 
378   LLVMContext Context;
379   verifyAfterRoundTrip(M, F.readAssembly(Context));
380 }
381 
verifyUseListOrder(const Module & M)382 static void verifyUseListOrder(const Module &M) {
383   outs() << "verify bitcode\n";
384   verifyBitcodeUseListOrder(M);
385   outs() << "verify assembly\n";
386   verifyAssemblyUseListOrder(M);
387 }
388 
shuffleValueUseLists(Value * V,std::minstd_rand0 & Gen,DenseSet<Value * > & Seen)389 static void shuffleValueUseLists(Value *V, std::minstd_rand0 &Gen,
390                                  DenseSet<Value *> &Seen) {
391   if (!Seen.insert(V).second)
392     return;
393 
394   if (auto *C = dyn_cast<Constant>(V))
395     if (!isa<GlobalValue>(C))
396       for (Value *Op : C->operands())
397         shuffleValueUseLists(Op, Gen, Seen);
398 
399   if (V->use_empty() || std::next(V->use_begin()) == V->use_end())
400     // Nothing to shuffle for 0 or 1 users.
401     return;
402 
403   // Generate random numbers between 10 and 99, which will line up nicely in
404   // debug output.  We're not worried about collisons here.
405   LLVM_DEBUG(dbgs() << "V = "; V->dump());
406   std::uniform_int_distribution<short> Dist(10, 99);
407   SmallDenseMap<const Use *, short, 16> Order;
408   auto compareUses =
409       [&Order](const Use &L, const Use &R) { return Order[&L] < Order[&R]; };
410   do {
411     for (const Use &U : V->uses()) {
412       auto I = Dist(Gen);
413       Order[&U] = I;
414       LLVM_DEBUG(dbgs() << " - order: " << I << ", op = " << U.getOperandNo()
415                         << ", U = ";
416                  U.getUser()->dump());
417     }
418   } while (std::is_sorted(V->use_begin(), V->use_end(), compareUses));
419 
420   LLVM_DEBUG(dbgs() << " => shuffle\n");
421   V->sortUseList(compareUses);
422 
423   LLVM_DEBUG({
424     for (const Use &U : V->uses()) {
425       dbgs() << " - order: " << Order.lookup(&U)
426              << ", op = " << U.getOperandNo() << ", U = ";
427       U.getUser()->dump();
428     }
429   });
430 }
431 
reverseValueUseLists(Value * V,DenseSet<Value * > & Seen)432 static void reverseValueUseLists(Value *V, DenseSet<Value *> &Seen) {
433   if (!Seen.insert(V).second)
434     return;
435 
436   if (auto *C = dyn_cast<Constant>(V))
437     if (!isa<GlobalValue>(C))
438       for (Value *Op : C->operands())
439         reverseValueUseLists(Op, Seen);
440 
441   if (V->use_empty() || std::next(V->use_begin()) == V->use_end())
442     // Nothing to shuffle for 0 or 1 users.
443     return;
444 
445   LLVM_DEBUG({
446     dbgs() << "V = ";
447     V->dump();
448     for (const Use &U : V->uses()) {
449       dbgs() << " - order: op = " << U.getOperandNo() << ", U = ";
450       U.getUser()->dump();
451     }
452     dbgs() << " => reverse\n";
453   });
454 
455   V->reverseUseList();
456 
457   LLVM_DEBUG({
458     for (const Use &U : V->uses()) {
459       dbgs() << " - order: op = " << U.getOperandNo() << ", U = ";
460       U.getUser()->dump();
461     }
462   });
463 }
464 
465 template <class Changer>
changeUseLists(Module & M,Changer changeValueUseList)466 static void changeUseLists(Module &M, Changer changeValueUseList) {
467   // Visit every value that would be serialized to an IR file.
468   //
469   // Globals.
470   for (GlobalVariable &G : M.globals())
471     changeValueUseList(&G);
472   for (GlobalAlias &A : M.aliases())
473     changeValueUseList(&A);
474   for (GlobalIFunc &IF : M.ifuncs())
475     changeValueUseList(&IF);
476   for (Function &F : M)
477     changeValueUseList(&F);
478 
479   // Constants used by globals.
480   for (GlobalVariable &G : M.globals())
481     if (G.hasInitializer())
482       changeValueUseList(G.getInitializer());
483   for (GlobalAlias &A : M.aliases())
484     changeValueUseList(A.getAliasee());
485   for (GlobalIFunc &IF : M.ifuncs())
486     changeValueUseList(IF.getResolver());
487   for (Function &F : M) {
488     if (F.hasPrefixData())
489       changeValueUseList(F.getPrefixData());
490     if (F.hasPrologueData())
491       changeValueUseList(F.getPrologueData());
492     if (F.hasPersonalityFn())
493       changeValueUseList(F.getPersonalityFn());
494   }
495 
496   // Function bodies.
497   for (Function &F : M) {
498     for (Argument &A : F.args())
499       changeValueUseList(&A);
500     for (BasicBlock &BB : F)
501       changeValueUseList(&BB);
502     for (BasicBlock &BB : F)
503       for (Instruction &I : BB)
504         changeValueUseList(&I);
505 
506     // Constants used by instructions.
507     for (BasicBlock &BB : F)
508       for (Instruction &I : BB)
509         for (Value *Op : I.operands()) {
510           // Look through a metadata wrapper.
511           if (auto *MAV = dyn_cast<MetadataAsValue>(Op))
512             if (auto *VAM = dyn_cast<ValueAsMetadata>(MAV->getMetadata()))
513               Op = VAM->getValue();
514           if ((isa<Constant>(Op) && !isa<GlobalValue>(*Op)) ||
515               isa<InlineAsm>(Op))
516             changeValueUseList(Op);
517         }
518   }
519 
520   if (verifyModule(M, &errs()))
521     report_fatal_error("verification failed");
522 }
523 
shuffleUseLists(Module & M,unsigned SeedOffset)524 static void shuffleUseLists(Module &M, unsigned SeedOffset) {
525   std::minstd_rand0 Gen(std::minstd_rand0::default_seed + SeedOffset);
526   DenseSet<Value *> Seen;
527   changeUseLists(M, [&](Value *V) { shuffleValueUseLists(V, Gen, Seen); });
528   LLVM_DEBUG(dbgs() << "\n");
529 }
530 
reverseUseLists(Module & M)531 static void reverseUseLists(Module &M) {
532   DenseSet<Value *> Seen;
533   changeUseLists(M, [&](Value *V) { reverseValueUseLists(V, Seen); });
534   LLVM_DEBUG(dbgs() << "\n");
535 }
536 
main(int argc,char ** argv)537 int main(int argc, char **argv) {
538   InitLLVM X(argc, argv);
539 
540   // Enable debug stream buffering.
541   EnableDebugBuffering = true;
542 
543   LLVMContext Context;
544 
545   cl::ParseCommandLineOptions(argc, argv,
546                               "llvm tool to verify use-list order\n");
547 
548   SMDiagnostic Err;
549 
550   // Load the input module...
551   std::unique_ptr<Module> M = parseIRFile(InputFilename, Err, Context);
552 
553   if (!M.get()) {
554     Err.print(argv[0], errs());
555     return 1;
556   }
557   if (verifyModule(*M, &errs())) {
558     errs() << argv[0] << ": " << InputFilename
559            << ": error: input module is broken!\n";
560     return 1;
561   }
562 
563   // Verify the use lists now and after reversing them.
564   outs() << "*** verify-uselistorder ***\n";
565   verifyUseListOrder(*M);
566   outs() << "reverse\n";
567   reverseUseLists(*M);
568   verifyUseListOrder(*M);
569 
570   for (unsigned I = 0, E = NumShuffles; I != E; ++I) {
571     outs() << "\n";
572 
573     // Shuffle with a different (deterministic) seed each time.
574     outs() << "shuffle (" << I + 1 << " of " << E << ")\n";
575     shuffleUseLists(*M, I);
576 
577     // Verify again before and after reversing.
578     verifyUseListOrder(*M);
579     outs() << "reverse\n";
580     reverseUseLists(*M);
581     verifyUseListOrder(*M);
582   }
583 
584   return 0;
585 }
586