xref: /freebsd-src/contrib/llvm-project/clang/lib/StaticAnalyzer/Checkers/CloneChecker.cpp (revision 647cbc5de815c5651677bf8582797f716ec7b48d)
10b57cec5SDimitry Andric //===--- CloneChecker.cpp - Clone detection checker -------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric ///
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// CloneChecker is a checker that reports clones in the current translation
110b57cec5SDimitry Andric /// unit.
120b57cec5SDimitry Andric ///
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
160b57cec5SDimitry Andric #include "clang/Analysis/CloneDetection.h"
170b57cec5SDimitry Andric #include "clang/Basic/Diagnostic.h"
180b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
190b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/Checker.h"
200b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/CheckerManager.h"
210b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
220b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric using namespace clang;
250b57cec5SDimitry Andric using namespace ento;
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric namespace {
280b57cec5SDimitry Andric class CloneChecker
290b57cec5SDimitry Andric     : public Checker<check::ASTCodeBody, check::EndOfTranslationUnit> {
300b57cec5SDimitry Andric public:
310b57cec5SDimitry Andric   // Checker options.
320b57cec5SDimitry Andric   int MinComplexity;
3381ad6265SDimitry Andric   bool ReportNormalClones = false;
340b57cec5SDimitry Andric   StringRef IgnoredFilesPattern;
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric private:
370b57cec5SDimitry Andric   mutable CloneDetector Detector;
38*647cbc5dSDimitry Andric   const BugType BT_Exact{this, "Exact code clone", "Code clone"};
39*647cbc5dSDimitry Andric   const BugType BT_Suspicious{this, "Suspicious code clone", "Code clone"};
400b57cec5SDimitry Andric 
410b57cec5SDimitry Andric public:
420b57cec5SDimitry Andric   void checkASTCodeBody(const Decl *D, AnalysisManager &Mgr,
430b57cec5SDimitry Andric                         BugReporter &BR) const;
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric   void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
460b57cec5SDimitry Andric                                  AnalysisManager &Mgr, BugReporter &BR) const;
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric   /// Reports all clones to the user.
490b57cec5SDimitry Andric   void reportClones(BugReporter &BR, AnalysisManager &Mgr,
500b57cec5SDimitry Andric                     std::vector<CloneDetector::CloneGroup> &CloneGroups) const;
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric   /// Reports only suspicious clones to the user along with information
530b57cec5SDimitry Andric   /// that explain why they are suspicious.
540b57cec5SDimitry Andric   void reportSuspiciousClones(
550b57cec5SDimitry Andric       BugReporter &BR, AnalysisManager &Mgr,
560b57cec5SDimitry Andric       std::vector<CloneDetector::CloneGroup> &CloneGroups) const;
570b57cec5SDimitry Andric };
580b57cec5SDimitry Andric } // end anonymous namespace
590b57cec5SDimitry Andric 
checkASTCodeBody(const Decl * D,AnalysisManager & Mgr,BugReporter & BR) const600b57cec5SDimitry Andric void CloneChecker::checkASTCodeBody(const Decl *D, AnalysisManager &Mgr,
610b57cec5SDimitry Andric                                     BugReporter &BR) const {
620b57cec5SDimitry Andric   // Every statement that should be included in the search for clones needs to
630b57cec5SDimitry Andric   // be passed to the CloneDetector.
640b57cec5SDimitry Andric   Detector.analyzeCodeBody(D);
650b57cec5SDimitry Andric }
660b57cec5SDimitry Andric 
checkEndOfTranslationUnit(const TranslationUnitDecl * TU,AnalysisManager & Mgr,BugReporter & BR) const670b57cec5SDimitry Andric void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
680b57cec5SDimitry Andric                                              AnalysisManager &Mgr,
690b57cec5SDimitry Andric                                              BugReporter &BR) const {
700b57cec5SDimitry Andric   // At this point, every statement in the translation unit has been analyzed by
710b57cec5SDimitry Andric   // the CloneDetector. The only thing left to do is to report the found clones.
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric   // Let the CloneDetector create a list of clones from all the analyzed
740b57cec5SDimitry Andric   // statements. We don't filter for matching variable patterns at this point
750b57cec5SDimitry Andric   // because reportSuspiciousClones() wants to search them for errors.
760b57cec5SDimitry Andric   std::vector<CloneDetector::CloneGroup> AllCloneGroups;
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric   Detector.findClones(
790b57cec5SDimitry Andric       AllCloneGroups, FilenamePatternConstraint(IgnoredFilesPattern),
800b57cec5SDimitry Andric       RecursiveCloneTypeIIHashConstraint(), MinGroupSizeConstraint(2),
810b57cec5SDimitry Andric       MinComplexityConstraint(MinComplexity),
820b57cec5SDimitry Andric       RecursiveCloneTypeIIVerifyConstraint(), OnlyLargestCloneConstraint());
830b57cec5SDimitry Andric 
840b57cec5SDimitry Andric   reportSuspiciousClones(BR, Mgr, AllCloneGroups);
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric   // We are done for this translation unit unless we also need to report normal
870b57cec5SDimitry Andric   // clones.
880b57cec5SDimitry Andric   if (!ReportNormalClones)
890b57cec5SDimitry Andric     return;
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric   // Now that the suspicious clone detector has checked for pattern errors,
920b57cec5SDimitry Andric   // we also filter all clones who don't have matching patterns
930b57cec5SDimitry Andric   CloneDetector::constrainClones(AllCloneGroups,
940b57cec5SDimitry Andric                                  MatchingVariablePatternConstraint(),
950b57cec5SDimitry Andric                                  MinGroupSizeConstraint(2));
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric   reportClones(BR, Mgr, AllCloneGroups);
980b57cec5SDimitry Andric }
990b57cec5SDimitry Andric 
makeLocation(const StmtSequence & S,AnalysisManager & Mgr)1000b57cec5SDimitry Andric static PathDiagnosticLocation makeLocation(const StmtSequence &S,
1010b57cec5SDimitry Andric                                            AnalysisManager &Mgr) {
1020b57cec5SDimitry Andric   ASTContext &ACtx = Mgr.getASTContext();
1030b57cec5SDimitry Andric   return PathDiagnosticLocation::createBegin(
1040b57cec5SDimitry Andric       S.front(), ACtx.getSourceManager(),
1050b57cec5SDimitry Andric       Mgr.getAnalysisDeclContext(ACtx.getTranslationUnitDecl()));
1060b57cec5SDimitry Andric }
1070b57cec5SDimitry Andric 
reportClones(BugReporter & BR,AnalysisManager & Mgr,std::vector<CloneDetector::CloneGroup> & CloneGroups) const1080b57cec5SDimitry Andric void CloneChecker::reportClones(
1090b57cec5SDimitry Andric     BugReporter &BR, AnalysisManager &Mgr,
1100b57cec5SDimitry Andric     std::vector<CloneDetector::CloneGroup> &CloneGroups) const {
1110b57cec5SDimitry Andric   for (const CloneDetector::CloneGroup &Group : CloneGroups) {
1120b57cec5SDimitry Andric     // We group the clones by printing the first as a warning and all others
1130b57cec5SDimitry Andric     // as a note.
114a7dea167SDimitry Andric     auto R = std::make_unique<BasicBugReport>(
115*647cbc5dSDimitry Andric         BT_Exact, "Duplicate code detected", makeLocation(Group.front(), Mgr));
1160b57cec5SDimitry Andric     R->addRange(Group.front().getSourceRange());
1170b57cec5SDimitry Andric 
1180b57cec5SDimitry Andric     for (unsigned i = 1; i < Group.size(); ++i)
1190b57cec5SDimitry Andric       R->addNote("Similar code here", makeLocation(Group[i], Mgr),
1200b57cec5SDimitry Andric                  Group[i].getSourceRange());
1210b57cec5SDimitry Andric     BR.emitReport(std::move(R));
1220b57cec5SDimitry Andric   }
1230b57cec5SDimitry Andric }
1240b57cec5SDimitry Andric 
reportSuspiciousClones(BugReporter & BR,AnalysisManager & Mgr,std::vector<CloneDetector::CloneGroup> & CloneGroups) const1250b57cec5SDimitry Andric void CloneChecker::reportSuspiciousClones(
1260b57cec5SDimitry Andric     BugReporter &BR, AnalysisManager &Mgr,
1270b57cec5SDimitry Andric     std::vector<CloneDetector::CloneGroup> &CloneGroups) const {
1280b57cec5SDimitry Andric   std::vector<VariablePattern::SuspiciousClonePair> Pairs;
1290b57cec5SDimitry Andric 
1300b57cec5SDimitry Andric   for (const CloneDetector::CloneGroup &Group : CloneGroups) {
1310b57cec5SDimitry Andric     for (unsigned i = 0; i < Group.size(); ++i) {
1320b57cec5SDimitry Andric       VariablePattern PatternA(Group[i]);
1330b57cec5SDimitry Andric 
1340b57cec5SDimitry Andric       for (unsigned j = i + 1; j < Group.size(); ++j) {
1350b57cec5SDimitry Andric         VariablePattern PatternB(Group[j]);
1360b57cec5SDimitry Andric 
1370b57cec5SDimitry Andric         VariablePattern::SuspiciousClonePair ClonePair;
1380b57cec5SDimitry Andric         // For now, we only report clones which break the variable pattern just
1390b57cec5SDimitry Andric         // once because multiple differences in a pattern are an indicator that
1400b57cec5SDimitry Andric         // those differences are maybe intended (e.g. because it's actually a
1410b57cec5SDimitry Andric         // different algorithm).
1420b57cec5SDimitry Andric         // FIXME: In very big clones even multiple variables can be unintended,
1430b57cec5SDimitry Andric         // so replacing this number with a percentage could better handle such
1440b57cec5SDimitry Andric         // cases. On the other hand it could increase the false-positive rate
1450b57cec5SDimitry Andric         // for all clones if the percentage is too high.
1460b57cec5SDimitry Andric         if (PatternA.countPatternDifferences(PatternB, &ClonePair) == 1) {
1470b57cec5SDimitry Andric           Pairs.push_back(ClonePair);
1480b57cec5SDimitry Andric           break;
1490b57cec5SDimitry Andric         }
1500b57cec5SDimitry Andric       }
1510b57cec5SDimitry Andric     }
1520b57cec5SDimitry Andric   }
1530b57cec5SDimitry Andric 
1540b57cec5SDimitry Andric   ASTContext &ACtx = BR.getContext();
1550b57cec5SDimitry Andric   SourceManager &SM = ACtx.getSourceManager();
1560b57cec5SDimitry Andric   AnalysisDeclContext *ADC =
1570b57cec5SDimitry Andric       Mgr.getAnalysisDeclContext(ACtx.getTranslationUnitDecl());
1580b57cec5SDimitry Andric 
1590b57cec5SDimitry Andric   for (VariablePattern::SuspiciousClonePair &Pair : Pairs) {
1600b57cec5SDimitry Andric     // FIXME: We are ignoring the suggestions currently, because they are
1610b57cec5SDimitry Andric     // only 50% accurate (even if the second suggestion is unavailable),
1620b57cec5SDimitry Andric     // which may confuse the user.
1630b57cec5SDimitry Andric     // Think how to perform more accurate suggestions?
1640b57cec5SDimitry Andric 
165a7dea167SDimitry Andric     auto R = std::make_unique<BasicBugReport>(
166*647cbc5dSDimitry Andric         BT_Suspicious,
1670b57cec5SDimitry Andric         "Potential copy-paste error; did you really mean to use '" +
1680b57cec5SDimitry Andric             Pair.FirstCloneInfo.Variable->getNameAsString() + "' here?",
1690b57cec5SDimitry Andric         PathDiagnosticLocation::createBegin(Pair.FirstCloneInfo.Mention, SM,
1700b57cec5SDimitry Andric                                             ADC));
1710b57cec5SDimitry Andric     R->addRange(Pair.FirstCloneInfo.Mention->getSourceRange());
1720b57cec5SDimitry Andric 
1730b57cec5SDimitry Andric     R->addNote("Similar code using '" +
1740b57cec5SDimitry Andric                    Pair.SecondCloneInfo.Variable->getNameAsString() + "' here",
1750b57cec5SDimitry Andric                PathDiagnosticLocation::createBegin(Pair.SecondCloneInfo.Mention,
1760b57cec5SDimitry Andric                                                    SM, ADC),
1770b57cec5SDimitry Andric                Pair.SecondCloneInfo.Mention->getSourceRange());
1780b57cec5SDimitry Andric 
1790b57cec5SDimitry Andric     BR.emitReport(std::move(R));
1800b57cec5SDimitry Andric   }
1810b57cec5SDimitry Andric }
1820b57cec5SDimitry Andric 
1830b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1840b57cec5SDimitry Andric // Register CloneChecker
1850b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1860b57cec5SDimitry Andric 
registerCloneChecker(CheckerManager & Mgr)1870b57cec5SDimitry Andric void ento::registerCloneChecker(CheckerManager &Mgr) {
1880b57cec5SDimitry Andric   auto *Checker = Mgr.registerChecker<CloneChecker>();
1890b57cec5SDimitry Andric 
1900b57cec5SDimitry Andric   Checker->MinComplexity = Mgr.getAnalyzerOptions().getCheckerIntegerOption(
1910b57cec5SDimitry Andric       Checker, "MinimumCloneComplexity");
1920b57cec5SDimitry Andric 
1930b57cec5SDimitry Andric   if (Checker->MinComplexity < 0)
1940b57cec5SDimitry Andric     Mgr.reportInvalidCheckerOptionValue(
1950b57cec5SDimitry Andric         Checker, "MinimumCloneComplexity", "a non-negative value");
1960b57cec5SDimitry Andric 
1970b57cec5SDimitry Andric   Checker->ReportNormalClones = Mgr.getAnalyzerOptions().getCheckerBooleanOption(
1980b57cec5SDimitry Andric       Checker, "ReportNormalClones");
1990b57cec5SDimitry Andric 
2000b57cec5SDimitry Andric   Checker->IgnoredFilesPattern = Mgr.getAnalyzerOptions()
2010b57cec5SDimitry Andric     .getCheckerStringOption(Checker, "IgnoredFilesPattern");
2020b57cec5SDimitry Andric }
2030b57cec5SDimitry Andric 
shouldRegisterCloneChecker(const CheckerManager & mgr)2045ffd83dbSDimitry Andric bool ento::shouldRegisterCloneChecker(const CheckerManager &mgr) {
2050b57cec5SDimitry Andric   return true;
2060b57cec5SDimitry Andric }
207