xref: /freebsd-src/contrib/llvm-project/llvm/tools/llvm-sim/llvm-sim.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1fe6060f1SDimitry Andric //===-- llvm-sim.cpp - Find  similar sections of programs -------*- C++ -*-===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric // This program finds similar sections of a Module, and exports them as a JSON
10fe6060f1SDimitry Andric // file.
11fe6060f1SDimitry Andric //
12fe6060f1SDimitry Andric // To find similarities contained across multiple modules, please use llvm-link
13fe6060f1SDimitry Andric // first to merge the modules.
14fe6060f1SDimitry Andric //
15fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
16fe6060f1SDimitry Andric 
17fe6060f1SDimitry Andric #include "llvm/Analysis/IRSimilarityIdentifier.h"
18fe6060f1SDimitry Andric #include "llvm/IRReader/IRReader.h"
19fe6060f1SDimitry Andric #include "llvm/Support/CommandLine.h"
20fe6060f1SDimitry Andric #include "llvm/Support/FileSystem.h"
21fe6060f1SDimitry Andric #include "llvm/Support/InitLLVM.h"
22fe6060f1SDimitry Andric #include "llvm/Support/JSON.h"
23fe6060f1SDimitry Andric #include "llvm/Support/SourceMgr.h"
24fe6060f1SDimitry Andric #include "llvm/Support/ToolOutputFile.h"
25fe6060f1SDimitry Andric 
26fe6060f1SDimitry Andric using namespace llvm;
27fe6060f1SDimitry Andric using namespace IRSimilarity;
28fe6060f1SDimitry Andric 
29fe6060f1SDimitry Andric static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"),
30fe6060f1SDimitry Andric                                            cl::init("-"),
31fe6060f1SDimitry Andric                                            cl::value_desc("filename"));
32fe6060f1SDimitry Andric 
33fe6060f1SDimitry Andric static cl::opt<std::string> InputSourceFile(cl::Positional,
34fe6060f1SDimitry Andric                                             cl::desc("<Source file>"),
35fe6060f1SDimitry Andric                                             cl::init("-"),
36fe6060f1SDimitry Andric                                             cl::value_desc("filename"));
37fe6060f1SDimitry Andric 
38fe6060f1SDimitry Andric /// Retrieve the unique number \p I was mapped to in parseBitcodeFile.
39fe6060f1SDimitry Andric ///
40fe6060f1SDimitry Andric /// \param I - The Instruction to find the instruction number for.
41fe6060f1SDimitry Andric /// \param LLVMInstNum - The mapping of Instructions to their location in the
42fe6060f1SDimitry Andric /// module represented by an unsigned integer.
43fe6060f1SDimitry Andric /// \returns The instruction number for \p I if it exists.
44*bdd1243dSDimitry Andric std::optional<unsigned>
getPositionInModule(const Instruction * I,const DenseMap<Instruction *,unsigned> & LLVMInstNum)45fe6060f1SDimitry Andric getPositionInModule(const Instruction *I,
46fe6060f1SDimitry Andric                     const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
47fe6060f1SDimitry Andric   assert(I && "Instruction is nullptr!");
48fe6060f1SDimitry Andric   DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(I);
49fe6060f1SDimitry Andric   if (It == LLVMInstNum.end())
50*bdd1243dSDimitry Andric     return std::nullopt;
51fe6060f1SDimitry Andric   return It->second;
52fe6060f1SDimitry Andric }
53fe6060f1SDimitry Andric 
54fe6060f1SDimitry Andric /// Exports the given SimilarityGroups to a JSON file at \p FilePath.
55fe6060f1SDimitry Andric ///
56fe6060f1SDimitry Andric /// \param FilePath - The path to the output location.
57fe6060f1SDimitry Andric /// \param SimSections - The similarity groups to process.
58fe6060f1SDimitry Andric /// \param LLVMInstNum - The mapping of Instructions to their location in the
59fe6060f1SDimitry Andric /// module represented by an unsigned integer.
60fe6060f1SDimitry Andric /// \returns A nonzero error code if there was a failure creating the file.
61fe6060f1SDimitry Andric std::error_code
exportToFile(const StringRef FilePath,const SimilarityGroupList & SimSections,const DenseMap<Instruction *,unsigned> & LLVMInstNum)62fe6060f1SDimitry Andric exportToFile(const StringRef FilePath,
63fe6060f1SDimitry Andric              const SimilarityGroupList &SimSections,
64fe6060f1SDimitry Andric              const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
65fe6060f1SDimitry Andric   std::error_code EC;
66fe6060f1SDimitry Andric   std::unique_ptr<ToolOutputFile> Out(
67fe6060f1SDimitry Andric       new ToolOutputFile(FilePath, EC, sys::fs::OF_None));
68fe6060f1SDimitry Andric   if (EC)
69fe6060f1SDimitry Andric     return EC;
70fe6060f1SDimitry Andric 
71fe6060f1SDimitry Andric   json::OStream J(Out->os(), 1);
72fe6060f1SDimitry Andric   J.objectBegin();
73fe6060f1SDimitry Andric 
74fe6060f1SDimitry Andric   unsigned SimOption = 1;
75fe6060f1SDimitry Andric   // Process each list of SimilarityGroups organized by the Module.
76fe6060f1SDimitry Andric   for (const SimilarityGroup &G : SimSections) {
77fe6060f1SDimitry Andric     std::string SimOptionStr = std::to_string(SimOption);
78fe6060f1SDimitry Andric     J.attributeBegin(SimOptionStr);
79fe6060f1SDimitry Andric     J.arrayBegin();
80fe6060f1SDimitry Andric     // For each file there is a list of the range where the similarity
81fe6060f1SDimitry Andric     // exists.
82fe6060f1SDimitry Andric     for (const IRSimilarityCandidate &C : G) {
83*bdd1243dSDimitry Andric       std::optional<unsigned> Start =
84fe6060f1SDimitry Andric           getPositionInModule((*C.front()).Inst, LLVMInstNum);
85*bdd1243dSDimitry Andric       std::optional<unsigned> End =
86fe6060f1SDimitry Andric           getPositionInModule((*C.back()).Inst, LLVMInstNum);
87fe6060f1SDimitry Andric 
8881ad6265SDimitry Andric       assert(Start &&
89fe6060f1SDimitry Andric              "Could not find instruction number for first instruction");
9081ad6265SDimitry Andric       assert(End && "Could not find instruction number for last instruction");
91fe6060f1SDimitry Andric 
92fe6060f1SDimitry Andric       J.object([&] {
93*bdd1243dSDimitry Andric         J.attribute("start", *Start);
94*bdd1243dSDimitry Andric         J.attribute("end", *End);
95fe6060f1SDimitry Andric       });
96fe6060f1SDimitry Andric     }
97fe6060f1SDimitry Andric     J.arrayEnd();
98fe6060f1SDimitry Andric     J.attributeEnd();
99fe6060f1SDimitry Andric     SimOption++;
100fe6060f1SDimitry Andric   }
101fe6060f1SDimitry Andric   J.objectEnd();
102fe6060f1SDimitry Andric 
103fe6060f1SDimitry Andric   Out->keep();
104fe6060f1SDimitry Andric 
105fe6060f1SDimitry Andric   return EC;
106fe6060f1SDimitry Andric }
107fe6060f1SDimitry Andric 
main(int argc,const char * argv[])108fe6060f1SDimitry Andric int main(int argc, const char *argv[]) {
109fe6060f1SDimitry Andric   InitLLVM X(argc, argv);
110fe6060f1SDimitry Andric 
111fe6060f1SDimitry Andric   cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n");
112fe6060f1SDimitry Andric 
113fe6060f1SDimitry Andric   LLVMContext CurrContext;
114fe6060f1SDimitry Andric   SMDiagnostic Err;
115fe6060f1SDimitry Andric   std::unique_ptr<Module> ModuleToAnalyze =
116fe6060f1SDimitry Andric       parseIRFile(InputSourceFile, Err, CurrContext);
117fe6060f1SDimitry Andric 
118fe6060f1SDimitry Andric   if (!ModuleToAnalyze) {
119fe6060f1SDimitry Andric     Err.print(argv[0], errs());
120fe6060f1SDimitry Andric     return 1;
121fe6060f1SDimitry Andric   }
122fe6060f1SDimitry Andric 
123fe6060f1SDimitry Andric   // Mapping from an Instruction pointer to its occurrence in a sequential
124fe6060f1SDimitry Andric   // list of all the Instructions in a Module.
125fe6060f1SDimitry Andric   DenseMap<Instruction *, unsigned> LLVMInstNum;
126fe6060f1SDimitry Andric 
127fe6060f1SDimitry Andric   // We give each instruction a number, which gives us a start and end value
128fe6060f1SDimitry Andric   // for the beginning and end of each IRSimilarityCandidate.
129fe6060f1SDimitry Andric   unsigned InstructionNumber = 1;
130fe6060f1SDimitry Andric   for (Function &F : *ModuleToAnalyze)
131fe6060f1SDimitry Andric     for (BasicBlock &BB : F)
132fe6060f1SDimitry Andric       for (Instruction &I : BB.instructionsWithoutDebug())
133fe6060f1SDimitry Andric         LLVMInstNum[&I]= InstructionNumber++;
134fe6060f1SDimitry Andric 
135fe6060f1SDimitry Andric   // The similarity identifier we will use to find the similar sections.
136fe6060f1SDimitry Andric   IRSimilarityIdentifier SimIdent;
137fe6060f1SDimitry Andric   SimilarityGroupList SimilaritySections =
138fe6060f1SDimitry Andric       SimIdent.findSimilarity(*ModuleToAnalyze);
139fe6060f1SDimitry Andric 
140fe6060f1SDimitry Andric   std::error_code E =
141fe6060f1SDimitry Andric       exportToFile(OutputFilename, SimilaritySections, LLVMInstNum);
142fe6060f1SDimitry Andric   if (E) {
143fe6060f1SDimitry Andric     errs() << argv[0] << ": " << E.message() << '\n';
144fe6060f1SDimitry Andric     return 2;
145fe6060f1SDimitry Andric   }
146fe6060f1SDimitry Andric 
147fe6060f1SDimitry Andric   return 0;
148fe6060f1SDimitry Andric }
149