10b57cec5SDimitry Andric //===--- CloneChecker.cpp - Clone detection checker -------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric ///
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// CloneChecker is a checker that reports clones in the current translation
110b57cec5SDimitry Andric /// unit.
120b57cec5SDimitry Andric ///
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric
150b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
160b57cec5SDimitry Andric #include "clang/Analysis/CloneDetection.h"
170b57cec5SDimitry Andric #include "clang/Basic/Diagnostic.h"
180b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
190b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/Checker.h"
200b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/CheckerManager.h"
210b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
220b57cec5SDimitry Andric #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
230b57cec5SDimitry Andric
240b57cec5SDimitry Andric using namespace clang;
250b57cec5SDimitry Andric using namespace ento;
260b57cec5SDimitry Andric
270b57cec5SDimitry Andric namespace {
280b57cec5SDimitry Andric class CloneChecker
290b57cec5SDimitry Andric : public Checker<check::ASTCodeBody, check::EndOfTranslationUnit> {
300b57cec5SDimitry Andric public:
310b57cec5SDimitry Andric // Checker options.
320b57cec5SDimitry Andric int MinComplexity;
3381ad6265SDimitry Andric bool ReportNormalClones = false;
340b57cec5SDimitry Andric StringRef IgnoredFilesPattern;
350b57cec5SDimitry Andric
360b57cec5SDimitry Andric private:
370b57cec5SDimitry Andric mutable CloneDetector Detector;
38*647cbc5dSDimitry Andric const BugType BT_Exact{this, "Exact code clone", "Code clone"};
39*647cbc5dSDimitry Andric const BugType BT_Suspicious{this, "Suspicious code clone", "Code clone"};
400b57cec5SDimitry Andric
410b57cec5SDimitry Andric public:
420b57cec5SDimitry Andric void checkASTCodeBody(const Decl *D, AnalysisManager &Mgr,
430b57cec5SDimitry Andric BugReporter &BR) const;
440b57cec5SDimitry Andric
450b57cec5SDimitry Andric void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
460b57cec5SDimitry Andric AnalysisManager &Mgr, BugReporter &BR) const;
470b57cec5SDimitry Andric
480b57cec5SDimitry Andric /// Reports all clones to the user.
490b57cec5SDimitry Andric void reportClones(BugReporter &BR, AnalysisManager &Mgr,
500b57cec5SDimitry Andric std::vector<CloneDetector::CloneGroup> &CloneGroups) const;
510b57cec5SDimitry Andric
520b57cec5SDimitry Andric /// Reports only suspicious clones to the user along with information
530b57cec5SDimitry Andric /// that explain why they are suspicious.
540b57cec5SDimitry Andric void reportSuspiciousClones(
550b57cec5SDimitry Andric BugReporter &BR, AnalysisManager &Mgr,
560b57cec5SDimitry Andric std::vector<CloneDetector::CloneGroup> &CloneGroups) const;
570b57cec5SDimitry Andric };
580b57cec5SDimitry Andric } // end anonymous namespace
590b57cec5SDimitry Andric
checkASTCodeBody(const Decl * D,AnalysisManager & Mgr,BugReporter & BR) const600b57cec5SDimitry Andric void CloneChecker::checkASTCodeBody(const Decl *D, AnalysisManager &Mgr,
610b57cec5SDimitry Andric BugReporter &BR) const {
620b57cec5SDimitry Andric // Every statement that should be included in the search for clones needs to
630b57cec5SDimitry Andric // be passed to the CloneDetector.
640b57cec5SDimitry Andric Detector.analyzeCodeBody(D);
650b57cec5SDimitry Andric }
660b57cec5SDimitry Andric
checkEndOfTranslationUnit(const TranslationUnitDecl * TU,AnalysisManager & Mgr,BugReporter & BR) const670b57cec5SDimitry Andric void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
680b57cec5SDimitry Andric AnalysisManager &Mgr,
690b57cec5SDimitry Andric BugReporter &BR) const {
700b57cec5SDimitry Andric // At this point, every statement in the translation unit has been analyzed by
710b57cec5SDimitry Andric // the CloneDetector. The only thing left to do is to report the found clones.
720b57cec5SDimitry Andric
730b57cec5SDimitry Andric // Let the CloneDetector create a list of clones from all the analyzed
740b57cec5SDimitry Andric // statements. We don't filter for matching variable patterns at this point
750b57cec5SDimitry Andric // because reportSuspiciousClones() wants to search them for errors.
760b57cec5SDimitry Andric std::vector<CloneDetector::CloneGroup> AllCloneGroups;
770b57cec5SDimitry Andric
780b57cec5SDimitry Andric Detector.findClones(
790b57cec5SDimitry Andric AllCloneGroups, FilenamePatternConstraint(IgnoredFilesPattern),
800b57cec5SDimitry Andric RecursiveCloneTypeIIHashConstraint(), MinGroupSizeConstraint(2),
810b57cec5SDimitry Andric MinComplexityConstraint(MinComplexity),
820b57cec5SDimitry Andric RecursiveCloneTypeIIVerifyConstraint(), OnlyLargestCloneConstraint());
830b57cec5SDimitry Andric
840b57cec5SDimitry Andric reportSuspiciousClones(BR, Mgr, AllCloneGroups);
850b57cec5SDimitry Andric
860b57cec5SDimitry Andric // We are done for this translation unit unless we also need to report normal
870b57cec5SDimitry Andric // clones.
880b57cec5SDimitry Andric if (!ReportNormalClones)
890b57cec5SDimitry Andric return;
900b57cec5SDimitry Andric
910b57cec5SDimitry Andric // Now that the suspicious clone detector has checked for pattern errors,
920b57cec5SDimitry Andric // we also filter all clones who don't have matching patterns
930b57cec5SDimitry Andric CloneDetector::constrainClones(AllCloneGroups,
940b57cec5SDimitry Andric MatchingVariablePatternConstraint(),
950b57cec5SDimitry Andric MinGroupSizeConstraint(2));
960b57cec5SDimitry Andric
970b57cec5SDimitry Andric reportClones(BR, Mgr, AllCloneGroups);
980b57cec5SDimitry Andric }
990b57cec5SDimitry Andric
makeLocation(const StmtSequence & S,AnalysisManager & Mgr)1000b57cec5SDimitry Andric static PathDiagnosticLocation makeLocation(const StmtSequence &S,
1010b57cec5SDimitry Andric AnalysisManager &Mgr) {
1020b57cec5SDimitry Andric ASTContext &ACtx = Mgr.getASTContext();
1030b57cec5SDimitry Andric return PathDiagnosticLocation::createBegin(
1040b57cec5SDimitry Andric S.front(), ACtx.getSourceManager(),
1050b57cec5SDimitry Andric Mgr.getAnalysisDeclContext(ACtx.getTranslationUnitDecl()));
1060b57cec5SDimitry Andric }
1070b57cec5SDimitry Andric
reportClones(BugReporter & BR,AnalysisManager & Mgr,std::vector<CloneDetector::CloneGroup> & CloneGroups) const1080b57cec5SDimitry Andric void CloneChecker::reportClones(
1090b57cec5SDimitry Andric BugReporter &BR, AnalysisManager &Mgr,
1100b57cec5SDimitry Andric std::vector<CloneDetector::CloneGroup> &CloneGroups) const {
1110b57cec5SDimitry Andric for (const CloneDetector::CloneGroup &Group : CloneGroups) {
1120b57cec5SDimitry Andric // We group the clones by printing the first as a warning and all others
1130b57cec5SDimitry Andric // as a note.
114a7dea167SDimitry Andric auto R = std::make_unique<BasicBugReport>(
115*647cbc5dSDimitry Andric BT_Exact, "Duplicate code detected", makeLocation(Group.front(), Mgr));
1160b57cec5SDimitry Andric R->addRange(Group.front().getSourceRange());
1170b57cec5SDimitry Andric
1180b57cec5SDimitry Andric for (unsigned i = 1; i < Group.size(); ++i)
1190b57cec5SDimitry Andric R->addNote("Similar code here", makeLocation(Group[i], Mgr),
1200b57cec5SDimitry Andric Group[i].getSourceRange());
1210b57cec5SDimitry Andric BR.emitReport(std::move(R));
1220b57cec5SDimitry Andric }
1230b57cec5SDimitry Andric }
1240b57cec5SDimitry Andric
reportSuspiciousClones(BugReporter & BR,AnalysisManager & Mgr,std::vector<CloneDetector::CloneGroup> & CloneGroups) const1250b57cec5SDimitry Andric void CloneChecker::reportSuspiciousClones(
1260b57cec5SDimitry Andric BugReporter &BR, AnalysisManager &Mgr,
1270b57cec5SDimitry Andric std::vector<CloneDetector::CloneGroup> &CloneGroups) const {
1280b57cec5SDimitry Andric std::vector<VariablePattern::SuspiciousClonePair> Pairs;
1290b57cec5SDimitry Andric
1300b57cec5SDimitry Andric for (const CloneDetector::CloneGroup &Group : CloneGroups) {
1310b57cec5SDimitry Andric for (unsigned i = 0; i < Group.size(); ++i) {
1320b57cec5SDimitry Andric VariablePattern PatternA(Group[i]);
1330b57cec5SDimitry Andric
1340b57cec5SDimitry Andric for (unsigned j = i + 1; j < Group.size(); ++j) {
1350b57cec5SDimitry Andric VariablePattern PatternB(Group[j]);
1360b57cec5SDimitry Andric
1370b57cec5SDimitry Andric VariablePattern::SuspiciousClonePair ClonePair;
1380b57cec5SDimitry Andric // For now, we only report clones which break the variable pattern just
1390b57cec5SDimitry Andric // once because multiple differences in a pattern are an indicator that
1400b57cec5SDimitry Andric // those differences are maybe intended (e.g. because it's actually a
1410b57cec5SDimitry Andric // different algorithm).
1420b57cec5SDimitry Andric // FIXME: In very big clones even multiple variables can be unintended,
1430b57cec5SDimitry Andric // so replacing this number with a percentage could better handle such
1440b57cec5SDimitry Andric // cases. On the other hand it could increase the false-positive rate
1450b57cec5SDimitry Andric // for all clones if the percentage is too high.
1460b57cec5SDimitry Andric if (PatternA.countPatternDifferences(PatternB, &ClonePair) == 1) {
1470b57cec5SDimitry Andric Pairs.push_back(ClonePair);
1480b57cec5SDimitry Andric break;
1490b57cec5SDimitry Andric }
1500b57cec5SDimitry Andric }
1510b57cec5SDimitry Andric }
1520b57cec5SDimitry Andric }
1530b57cec5SDimitry Andric
1540b57cec5SDimitry Andric ASTContext &ACtx = BR.getContext();
1550b57cec5SDimitry Andric SourceManager &SM = ACtx.getSourceManager();
1560b57cec5SDimitry Andric AnalysisDeclContext *ADC =
1570b57cec5SDimitry Andric Mgr.getAnalysisDeclContext(ACtx.getTranslationUnitDecl());
1580b57cec5SDimitry Andric
1590b57cec5SDimitry Andric for (VariablePattern::SuspiciousClonePair &Pair : Pairs) {
1600b57cec5SDimitry Andric // FIXME: We are ignoring the suggestions currently, because they are
1610b57cec5SDimitry Andric // only 50% accurate (even if the second suggestion is unavailable),
1620b57cec5SDimitry Andric // which may confuse the user.
1630b57cec5SDimitry Andric // Think how to perform more accurate suggestions?
1640b57cec5SDimitry Andric
165a7dea167SDimitry Andric auto R = std::make_unique<BasicBugReport>(
166*647cbc5dSDimitry Andric BT_Suspicious,
1670b57cec5SDimitry Andric "Potential copy-paste error; did you really mean to use '" +
1680b57cec5SDimitry Andric Pair.FirstCloneInfo.Variable->getNameAsString() + "' here?",
1690b57cec5SDimitry Andric PathDiagnosticLocation::createBegin(Pair.FirstCloneInfo.Mention, SM,
1700b57cec5SDimitry Andric ADC));
1710b57cec5SDimitry Andric R->addRange(Pair.FirstCloneInfo.Mention->getSourceRange());
1720b57cec5SDimitry Andric
1730b57cec5SDimitry Andric R->addNote("Similar code using '" +
1740b57cec5SDimitry Andric Pair.SecondCloneInfo.Variable->getNameAsString() + "' here",
1750b57cec5SDimitry Andric PathDiagnosticLocation::createBegin(Pair.SecondCloneInfo.Mention,
1760b57cec5SDimitry Andric SM, ADC),
1770b57cec5SDimitry Andric Pair.SecondCloneInfo.Mention->getSourceRange());
1780b57cec5SDimitry Andric
1790b57cec5SDimitry Andric BR.emitReport(std::move(R));
1800b57cec5SDimitry Andric }
1810b57cec5SDimitry Andric }
1820b57cec5SDimitry Andric
1830b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1840b57cec5SDimitry Andric // Register CloneChecker
1850b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
1860b57cec5SDimitry Andric
registerCloneChecker(CheckerManager & Mgr)1870b57cec5SDimitry Andric void ento::registerCloneChecker(CheckerManager &Mgr) {
1880b57cec5SDimitry Andric auto *Checker = Mgr.registerChecker<CloneChecker>();
1890b57cec5SDimitry Andric
1900b57cec5SDimitry Andric Checker->MinComplexity = Mgr.getAnalyzerOptions().getCheckerIntegerOption(
1910b57cec5SDimitry Andric Checker, "MinimumCloneComplexity");
1920b57cec5SDimitry Andric
1930b57cec5SDimitry Andric if (Checker->MinComplexity < 0)
1940b57cec5SDimitry Andric Mgr.reportInvalidCheckerOptionValue(
1950b57cec5SDimitry Andric Checker, "MinimumCloneComplexity", "a non-negative value");
1960b57cec5SDimitry Andric
1970b57cec5SDimitry Andric Checker->ReportNormalClones = Mgr.getAnalyzerOptions().getCheckerBooleanOption(
1980b57cec5SDimitry Andric Checker, "ReportNormalClones");
1990b57cec5SDimitry Andric
2000b57cec5SDimitry Andric Checker->IgnoredFilesPattern = Mgr.getAnalyzerOptions()
2010b57cec5SDimitry Andric .getCheckerStringOption(Checker, "IgnoredFilesPattern");
2020b57cec5SDimitry Andric }
2030b57cec5SDimitry Andric
shouldRegisterCloneChecker(const CheckerManager & mgr)2045ffd83dbSDimitry Andric bool ento::shouldRegisterCloneChecker(const CheckerManager &mgr) {
2050b57cec5SDimitry Andric return true;
2060b57cec5SDimitry Andric }
207