1e5dd7070Spatrick //===--- CloneChecker.cpp - Clone detection checker -------------*- C++ -*-===//
2e5dd7070Spatrick //
3e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information.
5e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e5dd7070Spatrick //
7e5dd7070Spatrick //===----------------------------------------------------------------------===//
8e5dd7070Spatrick ///
9e5dd7070Spatrick /// \file
10e5dd7070Spatrick /// CloneChecker is a checker that reports clones in the current translation
11e5dd7070Spatrick /// unit.
12e5dd7070Spatrick ///
13e5dd7070Spatrick //===----------------------------------------------------------------------===//
14e5dd7070Spatrick
15e5dd7070Spatrick #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
16e5dd7070Spatrick #include "clang/Analysis/CloneDetection.h"
17e5dd7070Spatrick #include "clang/Basic/Diagnostic.h"
18e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
19e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/Checker.h"
20e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/CheckerManager.h"
21e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
22e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
23e5dd7070Spatrick
24e5dd7070Spatrick using namespace clang;
25e5dd7070Spatrick using namespace ento;
26e5dd7070Spatrick
27e5dd7070Spatrick namespace {
28e5dd7070Spatrick class CloneChecker
29e5dd7070Spatrick : public Checker<check::ASTCodeBody, check::EndOfTranslationUnit> {
30e5dd7070Spatrick public:
31e5dd7070Spatrick // Checker options.
32e5dd7070Spatrick int MinComplexity;
33*12c85518Srobert bool ReportNormalClones = false;
34e5dd7070Spatrick StringRef IgnoredFilesPattern;
35e5dd7070Spatrick
36e5dd7070Spatrick private:
37e5dd7070Spatrick mutable CloneDetector Detector;
38e5dd7070Spatrick mutable std::unique_ptr<BugType> BT_Exact, BT_Suspicious;
39e5dd7070Spatrick
40e5dd7070Spatrick public:
41e5dd7070Spatrick void checkASTCodeBody(const Decl *D, AnalysisManager &Mgr,
42e5dd7070Spatrick BugReporter &BR) const;
43e5dd7070Spatrick
44e5dd7070Spatrick void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
45e5dd7070Spatrick AnalysisManager &Mgr, BugReporter &BR) const;
46e5dd7070Spatrick
47e5dd7070Spatrick /// Reports all clones to the user.
48e5dd7070Spatrick void reportClones(BugReporter &BR, AnalysisManager &Mgr,
49e5dd7070Spatrick std::vector<CloneDetector::CloneGroup> &CloneGroups) const;
50e5dd7070Spatrick
51e5dd7070Spatrick /// Reports only suspicious clones to the user along with information
52e5dd7070Spatrick /// that explain why they are suspicious.
53e5dd7070Spatrick void reportSuspiciousClones(
54e5dd7070Spatrick BugReporter &BR, AnalysisManager &Mgr,
55e5dd7070Spatrick std::vector<CloneDetector::CloneGroup> &CloneGroups) const;
56e5dd7070Spatrick };
57e5dd7070Spatrick } // end anonymous namespace
58e5dd7070Spatrick
checkASTCodeBody(const Decl * D,AnalysisManager & Mgr,BugReporter & BR) const59e5dd7070Spatrick void CloneChecker::checkASTCodeBody(const Decl *D, AnalysisManager &Mgr,
60e5dd7070Spatrick BugReporter &BR) const {
61e5dd7070Spatrick // Every statement that should be included in the search for clones needs to
62e5dd7070Spatrick // be passed to the CloneDetector.
63e5dd7070Spatrick Detector.analyzeCodeBody(D);
64e5dd7070Spatrick }
65e5dd7070Spatrick
checkEndOfTranslationUnit(const TranslationUnitDecl * TU,AnalysisManager & Mgr,BugReporter & BR) const66e5dd7070Spatrick void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
67e5dd7070Spatrick AnalysisManager &Mgr,
68e5dd7070Spatrick BugReporter &BR) const {
69e5dd7070Spatrick // At this point, every statement in the translation unit has been analyzed by
70e5dd7070Spatrick // the CloneDetector. The only thing left to do is to report the found clones.
71e5dd7070Spatrick
72e5dd7070Spatrick // Let the CloneDetector create a list of clones from all the analyzed
73e5dd7070Spatrick // statements. We don't filter for matching variable patterns at this point
74e5dd7070Spatrick // because reportSuspiciousClones() wants to search them for errors.
75e5dd7070Spatrick std::vector<CloneDetector::CloneGroup> AllCloneGroups;
76e5dd7070Spatrick
77e5dd7070Spatrick Detector.findClones(
78e5dd7070Spatrick AllCloneGroups, FilenamePatternConstraint(IgnoredFilesPattern),
79e5dd7070Spatrick RecursiveCloneTypeIIHashConstraint(), MinGroupSizeConstraint(2),
80e5dd7070Spatrick MinComplexityConstraint(MinComplexity),
81e5dd7070Spatrick RecursiveCloneTypeIIVerifyConstraint(), OnlyLargestCloneConstraint());
82e5dd7070Spatrick
83e5dd7070Spatrick reportSuspiciousClones(BR, Mgr, AllCloneGroups);
84e5dd7070Spatrick
85e5dd7070Spatrick // We are done for this translation unit unless we also need to report normal
86e5dd7070Spatrick // clones.
87e5dd7070Spatrick if (!ReportNormalClones)
88e5dd7070Spatrick return;
89e5dd7070Spatrick
90e5dd7070Spatrick // Now that the suspicious clone detector has checked for pattern errors,
91e5dd7070Spatrick // we also filter all clones who don't have matching patterns
92e5dd7070Spatrick CloneDetector::constrainClones(AllCloneGroups,
93e5dd7070Spatrick MatchingVariablePatternConstraint(),
94e5dd7070Spatrick MinGroupSizeConstraint(2));
95e5dd7070Spatrick
96e5dd7070Spatrick reportClones(BR, Mgr, AllCloneGroups);
97e5dd7070Spatrick }
98e5dd7070Spatrick
makeLocation(const StmtSequence & S,AnalysisManager & Mgr)99e5dd7070Spatrick static PathDiagnosticLocation makeLocation(const StmtSequence &S,
100e5dd7070Spatrick AnalysisManager &Mgr) {
101e5dd7070Spatrick ASTContext &ACtx = Mgr.getASTContext();
102e5dd7070Spatrick return PathDiagnosticLocation::createBegin(
103e5dd7070Spatrick S.front(), ACtx.getSourceManager(),
104e5dd7070Spatrick Mgr.getAnalysisDeclContext(ACtx.getTranslationUnitDecl()));
105e5dd7070Spatrick }
106e5dd7070Spatrick
reportClones(BugReporter & BR,AnalysisManager & Mgr,std::vector<CloneDetector::CloneGroup> & CloneGroups) const107e5dd7070Spatrick void CloneChecker::reportClones(
108e5dd7070Spatrick BugReporter &BR, AnalysisManager &Mgr,
109e5dd7070Spatrick std::vector<CloneDetector::CloneGroup> &CloneGroups) const {
110e5dd7070Spatrick
111e5dd7070Spatrick if (!BT_Exact)
112e5dd7070Spatrick BT_Exact.reset(new BugType(this, "Exact code clone", "Code clone"));
113e5dd7070Spatrick
114e5dd7070Spatrick for (const CloneDetector::CloneGroup &Group : CloneGroups) {
115e5dd7070Spatrick // We group the clones by printing the first as a warning and all others
116e5dd7070Spatrick // as a note.
117e5dd7070Spatrick auto R = std::make_unique<BasicBugReport>(
118e5dd7070Spatrick *BT_Exact, "Duplicate code detected", makeLocation(Group.front(), Mgr));
119e5dd7070Spatrick R->addRange(Group.front().getSourceRange());
120e5dd7070Spatrick
121e5dd7070Spatrick for (unsigned i = 1; i < Group.size(); ++i)
122e5dd7070Spatrick R->addNote("Similar code here", makeLocation(Group[i], Mgr),
123e5dd7070Spatrick Group[i].getSourceRange());
124e5dd7070Spatrick BR.emitReport(std::move(R));
125e5dd7070Spatrick }
126e5dd7070Spatrick }
127e5dd7070Spatrick
reportSuspiciousClones(BugReporter & BR,AnalysisManager & Mgr,std::vector<CloneDetector::CloneGroup> & CloneGroups) const128e5dd7070Spatrick void CloneChecker::reportSuspiciousClones(
129e5dd7070Spatrick BugReporter &BR, AnalysisManager &Mgr,
130e5dd7070Spatrick std::vector<CloneDetector::CloneGroup> &CloneGroups) const {
131e5dd7070Spatrick std::vector<VariablePattern::SuspiciousClonePair> Pairs;
132e5dd7070Spatrick
133e5dd7070Spatrick for (const CloneDetector::CloneGroup &Group : CloneGroups) {
134e5dd7070Spatrick for (unsigned i = 0; i < Group.size(); ++i) {
135e5dd7070Spatrick VariablePattern PatternA(Group[i]);
136e5dd7070Spatrick
137e5dd7070Spatrick for (unsigned j = i + 1; j < Group.size(); ++j) {
138e5dd7070Spatrick VariablePattern PatternB(Group[j]);
139e5dd7070Spatrick
140e5dd7070Spatrick VariablePattern::SuspiciousClonePair ClonePair;
141e5dd7070Spatrick // For now, we only report clones which break the variable pattern just
142e5dd7070Spatrick // once because multiple differences in a pattern are an indicator that
143e5dd7070Spatrick // those differences are maybe intended (e.g. because it's actually a
144e5dd7070Spatrick // different algorithm).
145e5dd7070Spatrick // FIXME: In very big clones even multiple variables can be unintended,
146e5dd7070Spatrick // so replacing this number with a percentage could better handle such
147e5dd7070Spatrick // cases. On the other hand it could increase the false-positive rate
148e5dd7070Spatrick // for all clones if the percentage is too high.
149e5dd7070Spatrick if (PatternA.countPatternDifferences(PatternB, &ClonePair) == 1) {
150e5dd7070Spatrick Pairs.push_back(ClonePair);
151e5dd7070Spatrick break;
152e5dd7070Spatrick }
153e5dd7070Spatrick }
154e5dd7070Spatrick }
155e5dd7070Spatrick }
156e5dd7070Spatrick
157e5dd7070Spatrick if (!BT_Suspicious)
158e5dd7070Spatrick BT_Suspicious.reset(
159e5dd7070Spatrick new BugType(this, "Suspicious code clone", "Code clone"));
160e5dd7070Spatrick
161e5dd7070Spatrick ASTContext &ACtx = BR.getContext();
162e5dd7070Spatrick SourceManager &SM = ACtx.getSourceManager();
163e5dd7070Spatrick AnalysisDeclContext *ADC =
164e5dd7070Spatrick Mgr.getAnalysisDeclContext(ACtx.getTranslationUnitDecl());
165e5dd7070Spatrick
166e5dd7070Spatrick for (VariablePattern::SuspiciousClonePair &Pair : Pairs) {
167e5dd7070Spatrick // FIXME: We are ignoring the suggestions currently, because they are
168e5dd7070Spatrick // only 50% accurate (even if the second suggestion is unavailable),
169e5dd7070Spatrick // which may confuse the user.
170e5dd7070Spatrick // Think how to perform more accurate suggestions?
171e5dd7070Spatrick
172e5dd7070Spatrick auto R = std::make_unique<BasicBugReport>(
173e5dd7070Spatrick *BT_Suspicious,
174e5dd7070Spatrick "Potential copy-paste error; did you really mean to use '" +
175e5dd7070Spatrick Pair.FirstCloneInfo.Variable->getNameAsString() + "' here?",
176e5dd7070Spatrick PathDiagnosticLocation::createBegin(Pair.FirstCloneInfo.Mention, SM,
177e5dd7070Spatrick ADC));
178e5dd7070Spatrick R->addRange(Pair.FirstCloneInfo.Mention->getSourceRange());
179e5dd7070Spatrick
180e5dd7070Spatrick R->addNote("Similar code using '" +
181e5dd7070Spatrick Pair.SecondCloneInfo.Variable->getNameAsString() + "' here",
182e5dd7070Spatrick PathDiagnosticLocation::createBegin(Pair.SecondCloneInfo.Mention,
183e5dd7070Spatrick SM, ADC),
184e5dd7070Spatrick Pair.SecondCloneInfo.Mention->getSourceRange());
185e5dd7070Spatrick
186e5dd7070Spatrick BR.emitReport(std::move(R));
187e5dd7070Spatrick }
188e5dd7070Spatrick }
189e5dd7070Spatrick
190e5dd7070Spatrick //===----------------------------------------------------------------------===//
191e5dd7070Spatrick // Register CloneChecker
192e5dd7070Spatrick //===----------------------------------------------------------------------===//
193e5dd7070Spatrick
registerCloneChecker(CheckerManager & Mgr)194e5dd7070Spatrick void ento::registerCloneChecker(CheckerManager &Mgr) {
195e5dd7070Spatrick auto *Checker = Mgr.registerChecker<CloneChecker>();
196e5dd7070Spatrick
197e5dd7070Spatrick Checker->MinComplexity = Mgr.getAnalyzerOptions().getCheckerIntegerOption(
198e5dd7070Spatrick Checker, "MinimumCloneComplexity");
199e5dd7070Spatrick
200e5dd7070Spatrick if (Checker->MinComplexity < 0)
201e5dd7070Spatrick Mgr.reportInvalidCheckerOptionValue(
202e5dd7070Spatrick Checker, "MinimumCloneComplexity", "a non-negative value");
203e5dd7070Spatrick
204e5dd7070Spatrick Checker->ReportNormalClones = Mgr.getAnalyzerOptions().getCheckerBooleanOption(
205e5dd7070Spatrick Checker, "ReportNormalClones");
206e5dd7070Spatrick
207e5dd7070Spatrick Checker->IgnoredFilesPattern = Mgr.getAnalyzerOptions()
208e5dd7070Spatrick .getCheckerStringOption(Checker, "IgnoredFilesPattern");
209e5dd7070Spatrick }
210e5dd7070Spatrick
shouldRegisterCloneChecker(const CheckerManager & mgr)211ec727ea7Spatrick bool ento::shouldRegisterCloneChecker(const CheckerManager &mgr) {
212e5dd7070Spatrick return true;
213e5dd7070Spatrick }
214