xref: /freebsd-src/contrib/llvm-project/llvm/tools/llvm-sim/llvm-sim.cpp (revision fe6060f10f634930ff71b7c50291ddc610da2475)
1*fe6060f1SDimitry Andric //===-- llvm-sim.cpp - Find  similar sections of programs -------*- C++ -*-===//
2*fe6060f1SDimitry Andric //
3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*fe6060f1SDimitry Andric //
7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8*fe6060f1SDimitry Andric //
9*fe6060f1SDimitry Andric // This program finds similar sections of a Module, and exports them as a JSON
10*fe6060f1SDimitry Andric // file.
11*fe6060f1SDimitry Andric //
12*fe6060f1SDimitry Andric // To find similarities contained across multiple modules, please use llvm-link
13*fe6060f1SDimitry Andric // first to merge the modules.
14*fe6060f1SDimitry Andric //
15*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
16*fe6060f1SDimitry Andric 
17*fe6060f1SDimitry Andric #include "llvm/Analysis/IRSimilarityIdentifier.h"
18*fe6060f1SDimitry Andric #include "llvm/IRReader/IRReader.h"
19*fe6060f1SDimitry Andric #include "llvm/Support/CommandLine.h"
20*fe6060f1SDimitry Andric #include "llvm/Support/FileSystem.h"
21*fe6060f1SDimitry Andric #include "llvm/Support/InitLLVM.h"
22*fe6060f1SDimitry Andric #include "llvm/Support/JSON.h"
23*fe6060f1SDimitry Andric #include "llvm/Support/SourceMgr.h"
24*fe6060f1SDimitry Andric #include "llvm/Support/ToolOutputFile.h"
25*fe6060f1SDimitry Andric 
26*fe6060f1SDimitry Andric using namespace llvm;
27*fe6060f1SDimitry Andric using namespace IRSimilarity;
28*fe6060f1SDimitry Andric 
29*fe6060f1SDimitry Andric static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"),
30*fe6060f1SDimitry Andric                                            cl::init("-"),
31*fe6060f1SDimitry Andric                                            cl::value_desc("filename"));
32*fe6060f1SDimitry Andric 
33*fe6060f1SDimitry Andric static cl::opt<std::string> InputSourceFile(cl::Positional,
34*fe6060f1SDimitry Andric                                             cl::desc("<Source file>"),
35*fe6060f1SDimitry Andric                                             cl::init("-"),
36*fe6060f1SDimitry Andric                                             cl::value_desc("filename"));
37*fe6060f1SDimitry Andric 
38*fe6060f1SDimitry Andric /// Retrieve the unique number \p I was mapped to in parseBitcodeFile.
39*fe6060f1SDimitry Andric ///
40*fe6060f1SDimitry Andric /// \param I - The Instruction to find the instruction number for.
41*fe6060f1SDimitry Andric /// \param LLVMInstNum - The mapping of Instructions to their location in the
42*fe6060f1SDimitry Andric /// module represented by an unsigned integer.
43*fe6060f1SDimitry Andric /// \returns The instruction number for \p I if it exists.
44*fe6060f1SDimitry Andric Optional<unsigned>
45*fe6060f1SDimitry Andric getPositionInModule(const Instruction *I,
46*fe6060f1SDimitry Andric                     const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
47*fe6060f1SDimitry Andric   assert(I && "Instruction is nullptr!");
48*fe6060f1SDimitry Andric   DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(I);
49*fe6060f1SDimitry Andric   if (It == LLVMInstNum.end())
50*fe6060f1SDimitry Andric     return None;
51*fe6060f1SDimitry Andric   return It->second;
52*fe6060f1SDimitry Andric }
53*fe6060f1SDimitry Andric 
54*fe6060f1SDimitry Andric /// Exports the given SimilarityGroups to a JSON file at \p FilePath.
55*fe6060f1SDimitry Andric ///
56*fe6060f1SDimitry Andric /// \param FilePath - The path to the output location.
57*fe6060f1SDimitry Andric /// \param SimSections - The similarity groups to process.
58*fe6060f1SDimitry Andric /// \param LLVMInstNum - The mapping of Instructions to their location in the
59*fe6060f1SDimitry Andric /// module represented by an unsigned integer.
60*fe6060f1SDimitry Andric /// \returns A nonzero error code if there was a failure creating the file.
61*fe6060f1SDimitry Andric std::error_code
62*fe6060f1SDimitry Andric exportToFile(const StringRef FilePath,
63*fe6060f1SDimitry Andric              const SimilarityGroupList &SimSections,
64*fe6060f1SDimitry Andric              const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
65*fe6060f1SDimitry Andric   std::error_code EC;
66*fe6060f1SDimitry Andric   std::unique_ptr<ToolOutputFile> Out(
67*fe6060f1SDimitry Andric       new ToolOutputFile(FilePath, EC, sys::fs::OF_None));
68*fe6060f1SDimitry Andric   if (EC)
69*fe6060f1SDimitry Andric     return EC;
70*fe6060f1SDimitry Andric 
71*fe6060f1SDimitry Andric   json::OStream J(Out->os(), 1);
72*fe6060f1SDimitry Andric   J.objectBegin();
73*fe6060f1SDimitry Andric 
74*fe6060f1SDimitry Andric   unsigned SimOption = 1;
75*fe6060f1SDimitry Andric   // Process each list of SimilarityGroups organized by the Module.
76*fe6060f1SDimitry Andric   for (const SimilarityGroup &G : SimSections) {
77*fe6060f1SDimitry Andric     std::string SimOptionStr = std::to_string(SimOption);
78*fe6060f1SDimitry Andric     J.attributeBegin(SimOptionStr);
79*fe6060f1SDimitry Andric     J.arrayBegin();
80*fe6060f1SDimitry Andric     // For each file there is a list of the range where the similarity
81*fe6060f1SDimitry Andric     // exists.
82*fe6060f1SDimitry Andric     for (const IRSimilarityCandidate &C : G) {
83*fe6060f1SDimitry Andric       Optional<unsigned> Start =
84*fe6060f1SDimitry Andric           getPositionInModule((*C.front()).Inst, LLVMInstNum);
85*fe6060f1SDimitry Andric       Optional<unsigned> End =
86*fe6060f1SDimitry Andric           getPositionInModule((*C.back()).Inst, LLVMInstNum);
87*fe6060f1SDimitry Andric 
88*fe6060f1SDimitry Andric       assert(Start.hasValue() &&
89*fe6060f1SDimitry Andric              "Could not find instruction number for first instruction");
90*fe6060f1SDimitry Andric       assert(End.hasValue() &&
91*fe6060f1SDimitry Andric              "Could not find instruction number for last instruction");
92*fe6060f1SDimitry Andric 
93*fe6060f1SDimitry Andric       J.object([&] {
94*fe6060f1SDimitry Andric         J.attribute("start", Start.getValue());
95*fe6060f1SDimitry Andric         J.attribute("end", End.getValue());
96*fe6060f1SDimitry Andric       });
97*fe6060f1SDimitry Andric     }
98*fe6060f1SDimitry Andric     J.arrayEnd();
99*fe6060f1SDimitry Andric     J.attributeEnd();
100*fe6060f1SDimitry Andric     SimOption++;
101*fe6060f1SDimitry Andric   }
102*fe6060f1SDimitry Andric   J.objectEnd();
103*fe6060f1SDimitry Andric 
104*fe6060f1SDimitry Andric   Out->keep();
105*fe6060f1SDimitry Andric 
106*fe6060f1SDimitry Andric   return EC;
107*fe6060f1SDimitry Andric }
108*fe6060f1SDimitry Andric 
109*fe6060f1SDimitry Andric int main(int argc, const char *argv[]) {
110*fe6060f1SDimitry Andric   InitLLVM X(argc, argv);
111*fe6060f1SDimitry Andric 
112*fe6060f1SDimitry Andric   cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n");
113*fe6060f1SDimitry Andric 
114*fe6060f1SDimitry Andric   LLVMContext CurrContext;
115*fe6060f1SDimitry Andric   SMDiagnostic Err;
116*fe6060f1SDimitry Andric   std::unique_ptr<Module> ModuleToAnalyze =
117*fe6060f1SDimitry Andric       parseIRFile(InputSourceFile, Err, CurrContext);
118*fe6060f1SDimitry Andric 
119*fe6060f1SDimitry Andric   if (!ModuleToAnalyze) {
120*fe6060f1SDimitry Andric     Err.print(argv[0], errs());
121*fe6060f1SDimitry Andric     return 1;
122*fe6060f1SDimitry Andric   }
123*fe6060f1SDimitry Andric 
124*fe6060f1SDimitry Andric   // Mapping from an Instruction pointer to its occurrence in a sequential
125*fe6060f1SDimitry Andric   // list of all the Instructions in a Module.
126*fe6060f1SDimitry Andric   DenseMap<Instruction *, unsigned> LLVMInstNum;
127*fe6060f1SDimitry Andric 
128*fe6060f1SDimitry Andric   // We give each instruction a number, which gives us a start and end value
129*fe6060f1SDimitry Andric   // for the beginning and end of each IRSimilarityCandidate.
130*fe6060f1SDimitry Andric   unsigned InstructionNumber = 1;
131*fe6060f1SDimitry Andric   for (Function &F : *ModuleToAnalyze)
132*fe6060f1SDimitry Andric     for (BasicBlock &BB : F)
133*fe6060f1SDimitry Andric       for (Instruction &I : BB.instructionsWithoutDebug())
134*fe6060f1SDimitry Andric         LLVMInstNum[&I]= InstructionNumber++;
135*fe6060f1SDimitry Andric 
136*fe6060f1SDimitry Andric   // The similarity identifier we will use to find the similar sections.
137*fe6060f1SDimitry Andric   IRSimilarityIdentifier SimIdent;
138*fe6060f1SDimitry Andric   SimilarityGroupList SimilaritySections =
139*fe6060f1SDimitry Andric       SimIdent.findSimilarity(*ModuleToAnalyze);
140*fe6060f1SDimitry Andric 
141*fe6060f1SDimitry Andric   std::error_code E =
142*fe6060f1SDimitry Andric       exportToFile(OutputFilename, SimilaritySections, LLVMInstNum);
143*fe6060f1SDimitry Andric   if (E) {
144*fe6060f1SDimitry Andric     errs() << argv[0] << ": " << E.message() << '\n';
145*fe6060f1SDimitry Andric     return 2;
146*fe6060f1SDimitry Andric   }
147*fe6060f1SDimitry Andric 
148*fe6060f1SDimitry Andric   return 0;
149*fe6060f1SDimitry Andric }
150