1*fe6060f1SDimitry Andric //===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===// 2*fe6060f1SDimitry Andric // 3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*fe6060f1SDimitry Andric // 7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8*fe6060f1SDimitry Andric // 9*fe6060f1SDimitry Andric // This program finds similar sections of a Module, and exports them as a JSON 10*fe6060f1SDimitry Andric // file. 11*fe6060f1SDimitry Andric // 12*fe6060f1SDimitry Andric // To find similarities contained across multiple modules, please use llvm-link 13*fe6060f1SDimitry Andric // first to merge the modules. 14*fe6060f1SDimitry Andric // 15*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 16*fe6060f1SDimitry Andric 17*fe6060f1SDimitry Andric #include "llvm/Analysis/IRSimilarityIdentifier.h" 18*fe6060f1SDimitry Andric #include "llvm/IRReader/IRReader.h" 19*fe6060f1SDimitry Andric #include "llvm/Support/CommandLine.h" 20*fe6060f1SDimitry Andric #include "llvm/Support/FileSystem.h" 21*fe6060f1SDimitry Andric #include "llvm/Support/InitLLVM.h" 22*fe6060f1SDimitry Andric #include "llvm/Support/JSON.h" 23*fe6060f1SDimitry Andric #include "llvm/Support/SourceMgr.h" 24*fe6060f1SDimitry Andric #include "llvm/Support/ToolOutputFile.h" 25*fe6060f1SDimitry Andric 26*fe6060f1SDimitry Andric using namespace llvm; 27*fe6060f1SDimitry Andric using namespace IRSimilarity; 28*fe6060f1SDimitry Andric 29*fe6060f1SDimitry Andric static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"), 30*fe6060f1SDimitry Andric cl::init("-"), 31*fe6060f1SDimitry Andric cl::value_desc("filename")); 32*fe6060f1SDimitry Andric 33*fe6060f1SDimitry Andric static cl::opt<std::string> InputSourceFile(cl::Positional, 34*fe6060f1SDimitry Andric cl::desc("<Source file>"), 35*fe6060f1SDimitry Andric cl::init("-"), 36*fe6060f1SDimitry Andric cl::value_desc("filename")); 37*fe6060f1SDimitry Andric 38*fe6060f1SDimitry Andric /// Retrieve the unique number \p I was mapped to in parseBitcodeFile. 39*fe6060f1SDimitry Andric /// 40*fe6060f1SDimitry Andric /// \param I - The Instruction to find the instruction number for. 41*fe6060f1SDimitry Andric /// \param LLVMInstNum - The mapping of Instructions to their location in the 42*fe6060f1SDimitry Andric /// module represented by an unsigned integer. 43*fe6060f1SDimitry Andric /// \returns The instruction number for \p I if it exists. 44*fe6060f1SDimitry Andric Optional<unsigned> 45*fe6060f1SDimitry Andric getPositionInModule(const Instruction *I, 46*fe6060f1SDimitry Andric const DenseMap<Instruction *, unsigned> &LLVMInstNum) { 47*fe6060f1SDimitry Andric assert(I && "Instruction is nullptr!"); 48*fe6060f1SDimitry Andric DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(I); 49*fe6060f1SDimitry Andric if (It == LLVMInstNum.end()) 50*fe6060f1SDimitry Andric return None; 51*fe6060f1SDimitry Andric return It->second; 52*fe6060f1SDimitry Andric } 53*fe6060f1SDimitry Andric 54*fe6060f1SDimitry Andric /// Exports the given SimilarityGroups to a JSON file at \p FilePath. 55*fe6060f1SDimitry Andric /// 56*fe6060f1SDimitry Andric /// \param FilePath - The path to the output location. 57*fe6060f1SDimitry Andric /// \param SimSections - The similarity groups to process. 58*fe6060f1SDimitry Andric /// \param LLVMInstNum - The mapping of Instructions to their location in the 59*fe6060f1SDimitry Andric /// module represented by an unsigned integer. 60*fe6060f1SDimitry Andric /// \returns A nonzero error code if there was a failure creating the file. 61*fe6060f1SDimitry Andric std::error_code 62*fe6060f1SDimitry Andric exportToFile(const StringRef FilePath, 63*fe6060f1SDimitry Andric const SimilarityGroupList &SimSections, 64*fe6060f1SDimitry Andric const DenseMap<Instruction *, unsigned> &LLVMInstNum) { 65*fe6060f1SDimitry Andric std::error_code EC; 66*fe6060f1SDimitry Andric std::unique_ptr<ToolOutputFile> Out( 67*fe6060f1SDimitry Andric new ToolOutputFile(FilePath, EC, sys::fs::OF_None)); 68*fe6060f1SDimitry Andric if (EC) 69*fe6060f1SDimitry Andric return EC; 70*fe6060f1SDimitry Andric 71*fe6060f1SDimitry Andric json::OStream J(Out->os(), 1); 72*fe6060f1SDimitry Andric J.objectBegin(); 73*fe6060f1SDimitry Andric 74*fe6060f1SDimitry Andric unsigned SimOption = 1; 75*fe6060f1SDimitry Andric // Process each list of SimilarityGroups organized by the Module. 76*fe6060f1SDimitry Andric for (const SimilarityGroup &G : SimSections) { 77*fe6060f1SDimitry Andric std::string SimOptionStr = std::to_string(SimOption); 78*fe6060f1SDimitry Andric J.attributeBegin(SimOptionStr); 79*fe6060f1SDimitry Andric J.arrayBegin(); 80*fe6060f1SDimitry Andric // For each file there is a list of the range where the similarity 81*fe6060f1SDimitry Andric // exists. 82*fe6060f1SDimitry Andric for (const IRSimilarityCandidate &C : G) { 83*fe6060f1SDimitry Andric Optional<unsigned> Start = 84*fe6060f1SDimitry Andric getPositionInModule((*C.front()).Inst, LLVMInstNum); 85*fe6060f1SDimitry Andric Optional<unsigned> End = 86*fe6060f1SDimitry Andric getPositionInModule((*C.back()).Inst, LLVMInstNum); 87*fe6060f1SDimitry Andric 88*fe6060f1SDimitry Andric assert(Start.hasValue() && 89*fe6060f1SDimitry Andric "Could not find instruction number for first instruction"); 90*fe6060f1SDimitry Andric assert(End.hasValue() && 91*fe6060f1SDimitry Andric "Could not find instruction number for last instruction"); 92*fe6060f1SDimitry Andric 93*fe6060f1SDimitry Andric J.object([&] { 94*fe6060f1SDimitry Andric J.attribute("start", Start.getValue()); 95*fe6060f1SDimitry Andric J.attribute("end", End.getValue()); 96*fe6060f1SDimitry Andric }); 97*fe6060f1SDimitry Andric } 98*fe6060f1SDimitry Andric J.arrayEnd(); 99*fe6060f1SDimitry Andric J.attributeEnd(); 100*fe6060f1SDimitry Andric SimOption++; 101*fe6060f1SDimitry Andric } 102*fe6060f1SDimitry Andric J.objectEnd(); 103*fe6060f1SDimitry Andric 104*fe6060f1SDimitry Andric Out->keep(); 105*fe6060f1SDimitry Andric 106*fe6060f1SDimitry Andric return EC; 107*fe6060f1SDimitry Andric } 108*fe6060f1SDimitry Andric 109*fe6060f1SDimitry Andric int main(int argc, const char *argv[]) { 110*fe6060f1SDimitry Andric InitLLVM X(argc, argv); 111*fe6060f1SDimitry Andric 112*fe6060f1SDimitry Andric cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n"); 113*fe6060f1SDimitry Andric 114*fe6060f1SDimitry Andric LLVMContext CurrContext; 115*fe6060f1SDimitry Andric SMDiagnostic Err; 116*fe6060f1SDimitry Andric std::unique_ptr<Module> ModuleToAnalyze = 117*fe6060f1SDimitry Andric parseIRFile(InputSourceFile, Err, CurrContext); 118*fe6060f1SDimitry Andric 119*fe6060f1SDimitry Andric if (!ModuleToAnalyze) { 120*fe6060f1SDimitry Andric Err.print(argv[0], errs()); 121*fe6060f1SDimitry Andric return 1; 122*fe6060f1SDimitry Andric } 123*fe6060f1SDimitry Andric 124*fe6060f1SDimitry Andric // Mapping from an Instruction pointer to its occurrence in a sequential 125*fe6060f1SDimitry Andric // list of all the Instructions in a Module. 126*fe6060f1SDimitry Andric DenseMap<Instruction *, unsigned> LLVMInstNum; 127*fe6060f1SDimitry Andric 128*fe6060f1SDimitry Andric // We give each instruction a number, which gives us a start and end value 129*fe6060f1SDimitry Andric // for the beginning and end of each IRSimilarityCandidate. 130*fe6060f1SDimitry Andric unsigned InstructionNumber = 1; 131*fe6060f1SDimitry Andric for (Function &F : *ModuleToAnalyze) 132*fe6060f1SDimitry Andric for (BasicBlock &BB : F) 133*fe6060f1SDimitry Andric for (Instruction &I : BB.instructionsWithoutDebug()) 134*fe6060f1SDimitry Andric LLVMInstNum[&I]= InstructionNumber++; 135*fe6060f1SDimitry Andric 136*fe6060f1SDimitry Andric // The similarity identifier we will use to find the similar sections. 137*fe6060f1SDimitry Andric IRSimilarityIdentifier SimIdent; 138*fe6060f1SDimitry Andric SimilarityGroupList SimilaritySections = 139*fe6060f1SDimitry Andric SimIdent.findSimilarity(*ModuleToAnalyze); 140*fe6060f1SDimitry Andric 141*fe6060f1SDimitry Andric std::error_code E = 142*fe6060f1SDimitry Andric exportToFile(OutputFilename, SimilaritySections, LLVMInstNum); 143*fe6060f1SDimitry Andric if (E) { 144*fe6060f1SDimitry Andric errs() << argv[0] << ": " << E.message() << '\n'; 145*fe6060f1SDimitry Andric return 2; 146*fe6060f1SDimitry Andric } 147*fe6060f1SDimitry Andric 148*fe6060f1SDimitry Andric return 0; 149*fe6060f1SDimitry Andric } 150