xref: /llvm-project/llvm/tools/llvm-sim/llvm-sim.cpp (revision 67ba5c507af2264ff79e4948b976477929436e86)
19e73f7c8SAndrew Litteken //===-- llvm-sim.cpp - Find  similar sections of programs -------*- C++ -*-===//
29e73f7c8SAndrew Litteken //
39e73f7c8SAndrew Litteken // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
49e73f7c8SAndrew Litteken // See https://llvm.org/LICENSE.txt for license information.
59e73f7c8SAndrew Litteken // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
69e73f7c8SAndrew Litteken //
79e73f7c8SAndrew Litteken //===----------------------------------------------------------------------===//
89e73f7c8SAndrew Litteken //
99e73f7c8SAndrew Litteken // This program finds similar sections of a Module, and exports them as a JSON
109e73f7c8SAndrew Litteken // file.
119e73f7c8SAndrew Litteken //
129e73f7c8SAndrew Litteken // To find similarities contained across multiple modules, please use llvm-link
139e73f7c8SAndrew Litteken // first to merge the modules.
149e73f7c8SAndrew Litteken //
159e73f7c8SAndrew Litteken //===----------------------------------------------------------------------===//
169e73f7c8SAndrew Litteken 
179e73f7c8SAndrew Litteken #include "llvm/Analysis/IRSimilarityIdentifier.h"
189e73f7c8SAndrew Litteken #include "llvm/IRReader/IRReader.h"
199e73f7c8SAndrew Litteken #include "llvm/Support/CommandLine.h"
209e73f7c8SAndrew Litteken #include "llvm/Support/FileSystem.h"
219e73f7c8SAndrew Litteken #include "llvm/Support/InitLLVM.h"
229e73f7c8SAndrew Litteken #include "llvm/Support/JSON.h"
239e73f7c8SAndrew Litteken #include "llvm/Support/SourceMgr.h"
249e73f7c8SAndrew Litteken #include "llvm/Support/ToolOutputFile.h"
259e73f7c8SAndrew Litteken 
269e73f7c8SAndrew Litteken using namespace llvm;
279e73f7c8SAndrew Litteken using namespace IRSimilarity;
289e73f7c8SAndrew Litteken 
299e73f7c8SAndrew Litteken static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"),
309e73f7c8SAndrew Litteken                                            cl::init("-"),
319e73f7c8SAndrew Litteken                                            cl::value_desc("filename"));
329e73f7c8SAndrew Litteken 
339e73f7c8SAndrew Litteken static cl::opt<std::string> InputSourceFile(cl::Positional,
349e73f7c8SAndrew Litteken                                             cl::desc("<Source file>"),
359e73f7c8SAndrew Litteken                                             cl::init("-"),
369e73f7c8SAndrew Litteken                                             cl::value_desc("filename"));
379e73f7c8SAndrew Litteken 
389e73f7c8SAndrew Litteken /// Retrieve the unique number \p I was mapped to in parseBitcodeFile.
399e73f7c8SAndrew Litteken ///
409e73f7c8SAndrew Litteken /// \param I - The Instruction to find the instruction number for.
419e73f7c8SAndrew Litteken /// \param LLVMInstNum - The mapping of Instructions to their location in the
429e73f7c8SAndrew Litteken /// module represented by an unsigned integer.
439e73f7c8SAndrew Litteken /// \returns The instruction number for \p I if it exists.
44da2f5d0aSFangrui Song std::optional<unsigned>
getPositionInModule(const Instruction * I,const DenseMap<Instruction *,unsigned> & LLVMInstNum)459e73f7c8SAndrew Litteken getPositionInModule(const Instruction *I,
469e73f7c8SAndrew Litteken                     const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
479e73f7c8SAndrew Litteken   assert(I && "Instruction is nullptr!");
489e73f7c8SAndrew Litteken   DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(I);
499e73f7c8SAndrew Litteken   if (It == LLVMInstNum.end())
50b4482f7cSKazu Hirata     return std::nullopt;
519e73f7c8SAndrew Litteken   return It->second;
529e73f7c8SAndrew Litteken }
539e73f7c8SAndrew Litteken 
549e73f7c8SAndrew Litteken /// Exports the given SimilarityGroups to a JSON file at \p FilePath.
559e73f7c8SAndrew Litteken ///
569e73f7c8SAndrew Litteken /// \param FilePath - The path to the output location.
579e73f7c8SAndrew Litteken /// \param SimSections - The similarity groups to process.
589e73f7c8SAndrew Litteken /// \param LLVMInstNum - The mapping of Instructions to their location in the
599e73f7c8SAndrew Litteken /// module represented by an unsigned integer.
609e73f7c8SAndrew Litteken /// \returns A nonzero error code if there was a failure creating the file.
619e73f7c8SAndrew Litteken std::error_code
exportToFile(const StringRef FilePath,const SimilarityGroupList & SimSections,const DenseMap<Instruction *,unsigned> & LLVMInstNum)629e73f7c8SAndrew Litteken exportToFile(const StringRef FilePath,
639e73f7c8SAndrew Litteken              const SimilarityGroupList &SimSections,
649e73f7c8SAndrew Litteken              const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
659e73f7c8SAndrew Litteken   std::error_code EC;
669e73f7c8SAndrew Litteken   std::unique_ptr<ToolOutputFile> Out(
679e73f7c8SAndrew Litteken       new ToolOutputFile(FilePath, EC, sys::fs::OF_None));
689e73f7c8SAndrew Litteken   if (EC)
699e73f7c8SAndrew Litteken     return EC;
709e73f7c8SAndrew Litteken 
719e73f7c8SAndrew Litteken   json::OStream J(Out->os(), 1);
729e73f7c8SAndrew Litteken   J.objectBegin();
739e73f7c8SAndrew Litteken 
749e73f7c8SAndrew Litteken   unsigned SimOption = 1;
759e73f7c8SAndrew Litteken   // Process each list of SimilarityGroups organized by the Module.
769e73f7c8SAndrew Litteken   for (const SimilarityGroup &G : SimSections) {
779e73f7c8SAndrew Litteken     std::string SimOptionStr = std::to_string(SimOption);
789e73f7c8SAndrew Litteken     J.attributeBegin(SimOptionStr);
799e73f7c8SAndrew Litteken     J.arrayBegin();
809e73f7c8SAndrew Litteken     // For each file there is a list of the range where the similarity
819e73f7c8SAndrew Litteken     // exists.
829e73f7c8SAndrew Litteken     for (const IRSimilarityCandidate &C : G) {
83da2f5d0aSFangrui Song       std::optional<unsigned> Start =
849e73f7c8SAndrew Litteken           getPositionInModule((*C.front()).Inst, LLVMInstNum);
85da2f5d0aSFangrui Song       std::optional<unsigned> End =
869e73f7c8SAndrew Litteken           getPositionInModule((*C.back()).Inst, LLVMInstNum);
879e73f7c8SAndrew Litteken 
88a7938c74SKazu Hirata       assert(Start &&
899e73f7c8SAndrew Litteken              "Could not find instruction number for first instruction");
90a7938c74SKazu Hirata       assert(End && "Could not find instruction number for last instruction");
919e73f7c8SAndrew Litteken 
929e73f7c8SAndrew Litteken       J.object([&] {
93*67ba5c50SFangrui Song         J.attribute("start", *Start);
94*67ba5c50SFangrui Song         J.attribute("end", *End);
959e73f7c8SAndrew Litteken       });
969e73f7c8SAndrew Litteken     }
979e73f7c8SAndrew Litteken     J.arrayEnd();
989e73f7c8SAndrew Litteken     J.attributeEnd();
999e73f7c8SAndrew Litteken     SimOption++;
1009e73f7c8SAndrew Litteken   }
1019e73f7c8SAndrew Litteken   J.objectEnd();
1029e73f7c8SAndrew Litteken 
1039e73f7c8SAndrew Litteken   Out->keep();
1049e73f7c8SAndrew Litteken 
1059e73f7c8SAndrew Litteken   return EC;
1069e73f7c8SAndrew Litteken }
1079e73f7c8SAndrew Litteken 
main(int argc,const char * argv[])1089e73f7c8SAndrew Litteken int main(int argc, const char *argv[]) {
1099e73f7c8SAndrew Litteken   InitLLVM X(argc, argv);
1109e73f7c8SAndrew Litteken 
1119e73f7c8SAndrew Litteken   cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n");
1129e73f7c8SAndrew Litteken 
1139e73f7c8SAndrew Litteken   LLVMContext CurrContext;
1149e73f7c8SAndrew Litteken   SMDiagnostic Err;
1159e73f7c8SAndrew Litteken   std::unique_ptr<Module> ModuleToAnalyze =
1169e73f7c8SAndrew Litteken       parseIRFile(InputSourceFile, Err, CurrContext);
1179e73f7c8SAndrew Litteken 
1189e73f7c8SAndrew Litteken   if (!ModuleToAnalyze) {
1199e73f7c8SAndrew Litteken     Err.print(argv[0], errs());
1209e73f7c8SAndrew Litteken     return 1;
1219e73f7c8SAndrew Litteken   }
1229e73f7c8SAndrew Litteken 
1239e73f7c8SAndrew Litteken   // Mapping from an Instruction pointer to its occurrence in a sequential
1249e73f7c8SAndrew Litteken   // list of all the Instructions in a Module.
1259e73f7c8SAndrew Litteken   DenseMap<Instruction *, unsigned> LLVMInstNum;
1269e73f7c8SAndrew Litteken 
1279e73f7c8SAndrew Litteken   // We give each instruction a number, which gives us a start and end value
1289e73f7c8SAndrew Litteken   // for the beginning and end of each IRSimilarityCandidate.
1299e73f7c8SAndrew Litteken   unsigned InstructionNumber = 1;
1309e73f7c8SAndrew Litteken   for (Function &F : *ModuleToAnalyze)
1319e73f7c8SAndrew Litteken     for (BasicBlock &BB : F)
1329e73f7c8SAndrew Litteken       for (Instruction &I : BB.instructionsWithoutDebug())
1339e73f7c8SAndrew Litteken         LLVMInstNum[&I]= InstructionNumber++;
1349e73f7c8SAndrew Litteken 
1359e73f7c8SAndrew Litteken   // The similarity identifier we will use to find the similar sections.
1369e73f7c8SAndrew Litteken   IRSimilarityIdentifier SimIdent;
1379e73f7c8SAndrew Litteken   SimilarityGroupList SimilaritySections =
1389e73f7c8SAndrew Litteken       SimIdent.findSimilarity(*ModuleToAnalyze);
1399e73f7c8SAndrew Litteken 
1409e73f7c8SAndrew Litteken   std::error_code E =
1419e73f7c8SAndrew Litteken       exportToFile(OutputFilename, SimilaritySections, LLVMInstNum);
1429e73f7c8SAndrew Litteken   if (E) {
1439e73f7c8SAndrew Litteken     errs() << argv[0] << ": " << E.message() << '\n';
1449e73f7c8SAndrew Litteken     return 2;
1459e73f7c8SAndrew Litteken   }
1469e73f7c8SAndrew Litteken 
1479e73f7c8SAndrew Litteken   return 0;
1489e73f7c8SAndrew Litteken }
149