xref: /llvm-project/clang-tools-extra/clang-tidy/bugprone/SuspiciousMissingCommaCheck.cpp (revision 7d2ea6c422d3f5712b7253407005e1a465a76946)
1 //===--- SuspiciousMissingCommaCheck.cpp - clang-tidy----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "SuspiciousMissingCommaCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 
13 using namespace clang::ast_matchers;
14 
15 namespace clang::tidy::bugprone {
16 
17 namespace {
18 
isConcatenatedLiteralsOnPurpose(ASTContext * Ctx,const StringLiteral * Lit)19 bool isConcatenatedLiteralsOnPurpose(ASTContext *Ctx,
20                                      const StringLiteral *Lit) {
21   // String literals surrounded by parentheses are assumed to be on purpose.
22   //    i.e.:  const char* Array[] = { ("a" "b" "c"), "d", [...] };
23 
24   TraversalKindScope RAII(*Ctx, TK_AsIs);
25   auto Parents = Ctx->getParents(*Lit);
26   if (Parents.size() == 1 && Parents[0].get<ParenExpr>() != nullptr)
27     return true;
28 
29   // Appropriately indented string literals are assumed to be on purpose.
30   // The following frequent indentation is accepted:
31   //     const char* Array[] = {
32   //       "first literal"
33   //           "indented literal"
34   //           "indented literal",
35   //       "second literal",
36   //       [...]
37   //     };
38   const SourceManager &SM = Ctx->getSourceManager();
39   bool IndentedCorrectly = true;
40   SourceLocation FirstToken = Lit->getStrTokenLoc(0);
41   FileID BaseFID = SM.getFileID(FirstToken);
42   unsigned int BaseIndent = SM.getSpellingColumnNumber(FirstToken);
43   unsigned int BaseLine = SM.getSpellingLineNumber(FirstToken);
44   for (unsigned int TokNum = 1; TokNum < Lit->getNumConcatenated(); ++TokNum) {
45     SourceLocation Token = Lit->getStrTokenLoc(TokNum);
46     FileID FID = SM.getFileID(Token);
47     unsigned int Indent = SM.getSpellingColumnNumber(Token);
48     unsigned int Line = SM.getSpellingLineNumber(Token);
49     if (FID != BaseFID || Line != BaseLine + TokNum || Indent <= BaseIndent) {
50       IndentedCorrectly = false;
51       break;
52     }
53   }
54   if (IndentedCorrectly)
55     return true;
56 
57   // There is no pattern recognized by the checker, assume it's not on purpose.
58   return false;
59 }
60 
AST_MATCHER_P(StringLiteral,isConcatenatedLiteral,unsigned,MaxConcatenatedTokens)61 AST_MATCHER_P(StringLiteral, isConcatenatedLiteral, unsigned,
62               MaxConcatenatedTokens) {
63   return Node.getNumConcatenated() > 1 &&
64          Node.getNumConcatenated() < MaxConcatenatedTokens &&
65          !isConcatenatedLiteralsOnPurpose(&Finder->getASTContext(), &Node);
66 }
67 
68 } // namespace
69 
SuspiciousMissingCommaCheck(StringRef Name,ClangTidyContext * Context)70 SuspiciousMissingCommaCheck::SuspiciousMissingCommaCheck(
71     StringRef Name, ClangTidyContext *Context)
72     : ClangTidyCheck(Name, Context),
73       SizeThreshold(Options.get("SizeThreshold", 5U)),
74       RatioThreshold(std::stod(Options.get("RatioThreshold", ".2").str())),
75       MaxConcatenatedTokens(Options.get("MaxConcatenatedTokens", 5U)) {}
76 
storeOptions(ClangTidyOptions::OptionMap & Opts)77 void SuspiciousMissingCommaCheck::storeOptions(
78     ClangTidyOptions::OptionMap &Opts) {
79   Options.store(Opts, "SizeThreshold", SizeThreshold);
80   Options.store(Opts, "RatioThreshold", std::to_string(RatioThreshold));
81   Options.store(Opts, "MaxConcatenatedTokens", MaxConcatenatedTokens);
82 }
83 
registerMatchers(MatchFinder * Finder)84 void SuspiciousMissingCommaCheck::registerMatchers(MatchFinder *Finder) {
85   const auto ConcatenatedStringLiteral =
86       stringLiteral(isConcatenatedLiteral(MaxConcatenatedTokens)).bind("str");
87 
88   const auto StringsInitializerList =
89       initListExpr(hasType(constantArrayType()),
90                    has(ignoringParenImpCasts(expr(ConcatenatedStringLiteral))));
91 
92   Finder->addMatcher(StringsInitializerList.bind("list"), this);
93 }
94 
check(const MatchFinder::MatchResult & Result)95 void SuspiciousMissingCommaCheck::check(
96     const MatchFinder::MatchResult &Result) {
97   const auto *InitializerList = Result.Nodes.getNodeAs<InitListExpr>("list");
98   const auto *ConcatenatedLiteral =
99       Result.Nodes.getNodeAs<StringLiteral>("str");
100   assert(InitializerList && ConcatenatedLiteral);
101 
102   // Skip small arrays as they often generate false-positive.
103   unsigned int Size = InitializerList->getNumInits();
104   if (Size < SizeThreshold)
105     return;
106 
107   // Count the number of occurrence of concatenated string literal.
108   unsigned int Count = 0;
109   for (unsigned int I = 0; I < Size; ++I) {
110     const Expr *Child = InitializerList->getInit(I)->IgnoreImpCasts();
111     if (const auto *Literal = dyn_cast<StringLiteral>(Child)) {
112       if (Literal->getNumConcatenated() > 1)
113         ++Count;
114     }
115   }
116 
117   // Warn only when concatenation is not common in this initializer list.
118   // The current threshold is set to less than 1/5 of the string literals.
119   if (double(Count) / Size > RatioThreshold)
120     return;
121 
122   diag(ConcatenatedLiteral->getBeginLoc(),
123        "suspicious string literal, probably missing a comma");
124 }
125 
126 } // namespace clang::tidy::bugprone
127