xref: /llvm-project/clang-tools-extra/clang-tidy/bugprone/StringIntegerAssignmentCheck.cpp (revision 7d2ea6c422d3f5712b7253407005e1a465a76946)
1 //===--- StringIntegerAssignmentCheck.cpp - clang-tidy---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "StringIntegerAssignmentCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Lex/Lexer.h"
13 
14 using namespace clang::ast_matchers;
15 
16 namespace clang::tidy::bugprone {
17 
registerMatchers(MatchFinder * Finder)18 void StringIntegerAssignmentCheck::registerMatchers(MatchFinder *Finder) {
19   Finder->addMatcher(
20       cxxOperatorCallExpr(
21           hasAnyOverloadedOperatorName("=", "+="),
22           callee(cxxMethodDecl(ofClass(classTemplateSpecializationDecl(
23               hasName("::std::basic_string"),
24               hasTemplateArgument(0, refersToType(hasCanonicalType(
25                                          qualType().bind("type")))))))),
26           hasArgument(
27               1,
28               ignoringImpCasts(
29                   expr(hasType(isInteger()), unless(hasType(isAnyCharacter())),
30                        // Ignore calls to tolower/toupper (see PR27723).
31                        unless(callExpr(callee(functionDecl(
32                            hasAnyName("tolower", "std::tolower", "toupper",
33                                       "std::toupper"))))),
34                        // Do not warn if assigning e.g. `CodePoint` to
35                        // `basic_string<CodePoint>`
36                        unless(hasType(qualType(
37                            hasCanonicalType(equalsBoundNode("type"))))))
38                       .bind("expr"))),
39           unless(isInTemplateInstantiation())),
40       this);
41 }
42 
43 class CharExpressionDetector {
44 public:
CharExpressionDetector(QualType CharType,const ASTContext & Ctx)45   CharExpressionDetector(QualType CharType, const ASTContext &Ctx)
46       : CharType(CharType), Ctx(Ctx) {}
47 
isLikelyCharExpression(const Expr * E) const48   bool isLikelyCharExpression(const Expr *E) const {
49     if (isCharTyped(E))
50       return true;
51 
52     if (const auto *BinOp = dyn_cast<BinaryOperator>(E)) {
53       const auto *LHS = BinOp->getLHS()->IgnoreParenImpCasts();
54       const auto *RHS = BinOp->getRHS()->IgnoreParenImpCasts();
55       // Handle both directions, e.g. `'a' + (i % 26)` and `(i % 26) + 'a'`.
56       if (BinOp->isAdditiveOp() || BinOp->isBitwiseOp())
57         return handleBinaryOp(BinOp->getOpcode(), LHS, RHS) ||
58                handleBinaryOp(BinOp->getOpcode(), RHS, LHS);
59       // Except in the case of '%'.
60       if (BinOp->getOpcode() == BO_Rem)
61         return handleBinaryOp(BinOp->getOpcode(), LHS, RHS);
62       return false;
63     }
64 
65     // Ternary where at least one branch is a likely char expression, e.g.
66     //    i < 265 ? i : ' '
67     if (const auto *CondOp = dyn_cast<AbstractConditionalOperator>(E))
68       return isLikelyCharExpression(
69                  CondOp->getFalseExpr()->IgnoreParenImpCasts()) ||
70              isLikelyCharExpression(
71                  CondOp->getTrueExpr()->IgnoreParenImpCasts());
72     return false;
73   }
74 
75 private:
handleBinaryOp(clang::BinaryOperatorKind Opcode,const Expr * const LHS,const Expr * const RHS) const76   bool handleBinaryOp(clang::BinaryOperatorKind Opcode, const Expr *const LHS,
77                       const Expr *const RHS) const {
78     // <char_expr> <op> <char_expr> (c++ integer promotion rules make this an
79     // int), e.g.
80     //    'a' + c
81     if (isCharTyped(LHS) && isCharTyped(RHS))
82       return true;
83 
84     // <expr> & <char_valued_constant> or <expr> % <char_valued_constant>, e.g.
85     //    i & 0xff
86     if ((Opcode == BO_And || Opcode == BO_Rem) && isCharValuedConstant(RHS))
87       return true;
88 
89     // <char_expr> | <char_valued_constant>, e.g.
90     //    c | 0x80
91     if (Opcode == BO_Or && isCharTyped(LHS) && isCharValuedConstant(RHS))
92       return true;
93 
94     // <char_constant> + <likely_char_expr>, e.g.
95     //    'a' + (i % 26)
96     if (Opcode == BO_Add)
97       return isCharConstant(LHS) && isLikelyCharExpression(RHS);
98 
99     return false;
100   }
101 
102   // Returns true if `E` is an character constant.
isCharConstant(const Expr * E) const103   bool isCharConstant(const Expr *E) const {
104     return isCharTyped(E) && isCharValuedConstant(E);
105   };
106 
107   // Returns true if `E` is an integer constant which fits in `CharType`.
isCharValuedConstant(const Expr * E) const108   bool isCharValuedConstant(const Expr *E) const {
109     if (E->isInstantiationDependent())
110       return false;
111     Expr::EvalResult EvalResult;
112     if (!E->EvaluateAsInt(EvalResult, Ctx, Expr::SE_AllowSideEffects))
113       return false;
114     return EvalResult.Val.getInt().getActiveBits() <= Ctx.getTypeSize(CharType);
115   };
116 
117   // Returns true if `E` has the right character type.
isCharTyped(const Expr * E) const118   bool isCharTyped(const Expr *E) const {
119     return E->getType().getCanonicalType().getTypePtr() ==
120            CharType.getTypePtr();
121   };
122 
123   const QualType CharType;
124   const ASTContext &Ctx;
125 };
126 
check(const MatchFinder::MatchResult & Result)127 void StringIntegerAssignmentCheck::check(
128     const MatchFinder::MatchResult &Result) {
129   const auto *Argument = Result.Nodes.getNodeAs<Expr>("expr");
130   const auto CharType =
131       Result.Nodes.getNodeAs<QualType>("type")->getCanonicalType();
132   SourceLocation Loc = Argument->getBeginLoc();
133 
134   // Try to detect a few common expressions to reduce false positives.
135   if (CharExpressionDetector(CharType, *Result.Context)
136           .isLikelyCharExpression(Argument))
137     return;
138 
139   auto Diag =
140       diag(Loc, "an integer is interpreted as a character code when assigning "
141                 "it to a string; if this is intended, cast the integer to the "
142                 "appropriate character type; if you want a string "
143                 "representation, use the appropriate conversion facility");
144 
145   if (Loc.isMacroID())
146     return;
147 
148   bool IsWideCharType = CharType->isWideCharType();
149   if (!CharType->isCharType() && !IsWideCharType)
150     return;
151   bool IsOneDigit = false;
152   bool IsLiteral = false;
153   if (const auto *Literal = dyn_cast<IntegerLiteral>(Argument)) {
154     IsOneDigit = Literal->getValue().getLimitedValue() < 10;
155     IsLiteral = true;
156   }
157 
158   SourceLocation EndLoc = Lexer::getLocForEndOfToken(
159       Argument->getEndLoc(), 0, *Result.SourceManager, getLangOpts());
160   if (IsOneDigit) {
161     Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "L'" : "'")
162          << FixItHint::CreateInsertion(EndLoc, "'");
163     return;
164   }
165   if (IsLiteral) {
166     Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "L\"" : "\"")
167          << FixItHint::CreateInsertion(EndLoc, "\"");
168     return;
169   }
170 
171   if (getLangOpts().CPlusPlus11) {
172     Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "std::to_wstring("
173                                                            : "std::to_string(")
174          << FixItHint::CreateInsertion(EndLoc, ")");
175   }
176 }
177 
178 } // namespace clang::tidy::bugprone
179