1fe6060f1SDimitry Andric //===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===// 2fe6060f1SDimitry Andric // 3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric // 9fe6060f1SDimitry Andric // This program finds similar sections of a Module, and exports them as a JSON 10fe6060f1SDimitry Andric // file. 11fe6060f1SDimitry Andric // 12fe6060f1SDimitry Andric // To find similarities contained across multiple modules, please use llvm-link 13fe6060f1SDimitry Andric // first to merge the modules. 14fe6060f1SDimitry Andric // 15fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 16fe6060f1SDimitry Andric 17fe6060f1SDimitry Andric #include "llvm/Analysis/IRSimilarityIdentifier.h" 18fe6060f1SDimitry Andric #include "llvm/IRReader/IRReader.h" 19fe6060f1SDimitry Andric #include "llvm/Support/CommandLine.h" 20fe6060f1SDimitry Andric #include "llvm/Support/FileSystem.h" 21fe6060f1SDimitry Andric #include "llvm/Support/InitLLVM.h" 22fe6060f1SDimitry Andric #include "llvm/Support/JSON.h" 23fe6060f1SDimitry Andric #include "llvm/Support/SourceMgr.h" 24fe6060f1SDimitry Andric #include "llvm/Support/ToolOutputFile.h" 25fe6060f1SDimitry Andric 26fe6060f1SDimitry Andric using namespace llvm; 27fe6060f1SDimitry Andric using namespace IRSimilarity; 28fe6060f1SDimitry Andric 29fe6060f1SDimitry Andric static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"), 30fe6060f1SDimitry Andric cl::init("-"), 31fe6060f1SDimitry Andric cl::value_desc("filename")); 32fe6060f1SDimitry Andric 33fe6060f1SDimitry Andric static cl::opt<std::string> InputSourceFile(cl::Positional, 34fe6060f1SDimitry Andric cl::desc("<Source file>"), 35fe6060f1SDimitry Andric cl::init("-"), 36fe6060f1SDimitry Andric cl::value_desc("filename")); 37fe6060f1SDimitry Andric 38fe6060f1SDimitry Andric /// Retrieve the unique number \p I was mapped to in parseBitcodeFile. 39fe6060f1SDimitry Andric /// 40fe6060f1SDimitry Andric /// \param I - The Instruction to find the instruction number for. 41fe6060f1SDimitry Andric /// \param LLVMInstNum - The mapping of Instructions to their location in the 42fe6060f1SDimitry Andric /// module represented by an unsigned integer. 43fe6060f1SDimitry Andric /// \returns The instruction number for \p I if it exists. 44fe6060f1SDimitry Andric Optional<unsigned> 45fe6060f1SDimitry Andric getPositionInModule(const Instruction *I, 46fe6060f1SDimitry Andric const DenseMap<Instruction *, unsigned> &LLVMInstNum) { 47fe6060f1SDimitry Andric assert(I && "Instruction is nullptr!"); 48fe6060f1SDimitry Andric DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(I); 49fe6060f1SDimitry Andric if (It == LLVMInstNum.end()) 50fe6060f1SDimitry Andric return None; 51fe6060f1SDimitry Andric return It->second; 52fe6060f1SDimitry Andric } 53fe6060f1SDimitry Andric 54fe6060f1SDimitry Andric /// Exports the given SimilarityGroups to a JSON file at \p FilePath. 55fe6060f1SDimitry Andric /// 56fe6060f1SDimitry Andric /// \param FilePath - The path to the output location. 57fe6060f1SDimitry Andric /// \param SimSections - The similarity groups to process. 58fe6060f1SDimitry Andric /// \param LLVMInstNum - The mapping of Instructions to their location in the 59fe6060f1SDimitry Andric /// module represented by an unsigned integer. 60fe6060f1SDimitry Andric /// \returns A nonzero error code if there was a failure creating the file. 61fe6060f1SDimitry Andric std::error_code 62fe6060f1SDimitry Andric exportToFile(const StringRef FilePath, 63fe6060f1SDimitry Andric const SimilarityGroupList &SimSections, 64fe6060f1SDimitry Andric const DenseMap<Instruction *, unsigned> &LLVMInstNum) { 65fe6060f1SDimitry Andric std::error_code EC; 66fe6060f1SDimitry Andric std::unique_ptr<ToolOutputFile> Out( 67fe6060f1SDimitry Andric new ToolOutputFile(FilePath, EC, sys::fs::OF_None)); 68fe6060f1SDimitry Andric if (EC) 69fe6060f1SDimitry Andric return EC; 70fe6060f1SDimitry Andric 71fe6060f1SDimitry Andric json::OStream J(Out->os(), 1); 72fe6060f1SDimitry Andric J.objectBegin(); 73fe6060f1SDimitry Andric 74fe6060f1SDimitry Andric unsigned SimOption = 1; 75fe6060f1SDimitry Andric // Process each list of SimilarityGroups organized by the Module. 76fe6060f1SDimitry Andric for (const SimilarityGroup &G : SimSections) { 77fe6060f1SDimitry Andric std::string SimOptionStr = std::to_string(SimOption); 78fe6060f1SDimitry Andric J.attributeBegin(SimOptionStr); 79fe6060f1SDimitry Andric J.arrayBegin(); 80fe6060f1SDimitry Andric // For each file there is a list of the range where the similarity 81fe6060f1SDimitry Andric // exists. 82fe6060f1SDimitry Andric for (const IRSimilarityCandidate &C : G) { 83fe6060f1SDimitry Andric Optional<unsigned> Start = 84fe6060f1SDimitry Andric getPositionInModule((*C.front()).Inst, LLVMInstNum); 85fe6060f1SDimitry Andric Optional<unsigned> End = 86fe6060f1SDimitry Andric getPositionInModule((*C.back()).Inst, LLVMInstNum); 87fe6060f1SDimitry Andric 88*81ad6265SDimitry Andric assert(Start && 89fe6060f1SDimitry Andric "Could not find instruction number for first instruction"); 90*81ad6265SDimitry Andric assert(End && "Could not find instruction number for last instruction"); 91fe6060f1SDimitry Andric 92fe6060f1SDimitry Andric J.object([&] { 93fe6060f1SDimitry Andric J.attribute("start", Start.getValue()); 94fe6060f1SDimitry Andric J.attribute("end", End.getValue()); 95fe6060f1SDimitry Andric }); 96fe6060f1SDimitry Andric } 97fe6060f1SDimitry Andric J.arrayEnd(); 98fe6060f1SDimitry Andric J.attributeEnd(); 99fe6060f1SDimitry Andric SimOption++; 100fe6060f1SDimitry Andric } 101fe6060f1SDimitry Andric J.objectEnd(); 102fe6060f1SDimitry Andric 103fe6060f1SDimitry Andric Out->keep(); 104fe6060f1SDimitry Andric 105fe6060f1SDimitry Andric return EC; 106fe6060f1SDimitry Andric } 107fe6060f1SDimitry Andric 108fe6060f1SDimitry Andric int main(int argc, const char *argv[]) { 109fe6060f1SDimitry Andric InitLLVM X(argc, argv); 110fe6060f1SDimitry Andric 111fe6060f1SDimitry Andric cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n"); 112fe6060f1SDimitry Andric 113fe6060f1SDimitry Andric LLVMContext CurrContext; 114fe6060f1SDimitry Andric SMDiagnostic Err; 115fe6060f1SDimitry Andric std::unique_ptr<Module> ModuleToAnalyze = 116fe6060f1SDimitry Andric parseIRFile(InputSourceFile, Err, CurrContext); 117fe6060f1SDimitry Andric 118fe6060f1SDimitry Andric if (!ModuleToAnalyze) { 119fe6060f1SDimitry Andric Err.print(argv[0], errs()); 120fe6060f1SDimitry Andric return 1; 121fe6060f1SDimitry Andric } 122fe6060f1SDimitry Andric 123fe6060f1SDimitry Andric // Mapping from an Instruction pointer to its occurrence in a sequential 124fe6060f1SDimitry Andric // list of all the Instructions in a Module. 125fe6060f1SDimitry Andric DenseMap<Instruction *, unsigned> LLVMInstNum; 126fe6060f1SDimitry Andric 127fe6060f1SDimitry Andric // We give each instruction a number, which gives us a start and end value 128fe6060f1SDimitry Andric // for the beginning and end of each IRSimilarityCandidate. 129fe6060f1SDimitry Andric unsigned InstructionNumber = 1; 130fe6060f1SDimitry Andric for (Function &F : *ModuleToAnalyze) 131fe6060f1SDimitry Andric for (BasicBlock &BB : F) 132fe6060f1SDimitry Andric for (Instruction &I : BB.instructionsWithoutDebug()) 133fe6060f1SDimitry Andric LLVMInstNum[&I]= InstructionNumber++; 134fe6060f1SDimitry Andric 135fe6060f1SDimitry Andric // The similarity identifier we will use to find the similar sections. 136fe6060f1SDimitry Andric IRSimilarityIdentifier SimIdent; 137fe6060f1SDimitry Andric SimilarityGroupList SimilaritySections = 138fe6060f1SDimitry Andric SimIdent.findSimilarity(*ModuleToAnalyze); 139fe6060f1SDimitry Andric 140fe6060f1SDimitry Andric std::error_code E = 141fe6060f1SDimitry Andric exportToFile(OutputFilename, SimilaritySections, LLVMInstNum); 142fe6060f1SDimitry Andric if (E) { 143fe6060f1SDimitry Andric errs() << argv[0] << ": " << E.message() << '\n'; 144fe6060f1SDimitry Andric return 2; 145fe6060f1SDimitry Andric } 146fe6060f1SDimitry Andric 147fe6060f1SDimitry Andric return 0; 148fe6060f1SDimitry Andric } 149