xref: /llvm-project/clang-tools-extra/clang-tidy/bugprone/StringIntegerAssignmentCheck.cpp (revision 97572fa6e9daecd648873496fd11f7d1e25a55f0)
1 //===--- StringIntegerAssignmentCheck.cpp - clang-tidy---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "StringIntegerAssignmentCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Lex/Lexer.h"
13 
14 using namespace clang::ast_matchers;
15 
16 namespace clang {
17 namespace tidy {
18 namespace bugprone {
19 
20 void StringIntegerAssignmentCheck::registerMatchers(MatchFinder *Finder) {
21   Finder->addMatcher(
22       cxxOperatorCallExpr(
23           hasAnyOverloadedOperatorName("=", "+="),
24           callee(cxxMethodDecl(ofClass(classTemplateSpecializationDecl(
25               hasName("::std::basic_string"),
26               hasTemplateArgument(0, refersToType(hasCanonicalType(
27                                          qualType().bind("type")))))))),
28           hasArgument(
29               1,
30               ignoringImpCasts(
31                   expr(hasType(isInteger()), unless(hasType(isAnyCharacter())),
32                        // Ignore calls to tolower/toupper (see PR27723).
33                        unless(callExpr(callee(functionDecl(
34                            hasAnyName("tolower", "std::tolower", "toupper",
35                                       "std::toupper"))))),
36                        // Do not warn if assigning e.g. `CodePoint` to
37                        // `basic_string<CodePoint>`
38                        unless(hasType(qualType(
39                            hasCanonicalType(equalsBoundNode("type"))))))
40                       .bind("expr"))),
41           unless(isInTemplateInstantiation())),
42       this);
43 }
44 
45 class CharExpressionDetector {
46 public:
47   CharExpressionDetector(QualType CharType, const ASTContext &Ctx)
48       : CharType(CharType), Ctx(Ctx) {}
49 
50   bool isLikelyCharExpression(const Expr *E) const {
51     if (isCharTyped(E))
52       return true;
53 
54     if (const auto *BinOp = dyn_cast<BinaryOperator>(E)) {
55       const auto *LHS = BinOp->getLHS()->IgnoreParenImpCasts();
56       const auto *RHS = BinOp->getRHS()->IgnoreParenImpCasts();
57       // Handle both directions, e.g. `'a' + (i % 26)` and `(i % 26) + 'a'`.
58       if (BinOp->isAdditiveOp() || BinOp->isBitwiseOp())
59         return handleBinaryOp(BinOp->getOpcode(), LHS, RHS) ||
60                handleBinaryOp(BinOp->getOpcode(), RHS, LHS);
61       // Except in the case of '%'.
62       if (BinOp->getOpcode() == BO_Rem)
63         return handleBinaryOp(BinOp->getOpcode(), LHS, RHS);
64       return false;
65     }
66 
67     // Ternary where at least one branch is a likely char expression, e.g.
68     //    i < 265 ? i : ' '
69     if (const auto *CondOp = dyn_cast<AbstractConditionalOperator>(E))
70       return isLikelyCharExpression(
71                  CondOp->getFalseExpr()->IgnoreParenImpCasts()) ||
72              isLikelyCharExpression(
73                  CondOp->getTrueExpr()->IgnoreParenImpCasts());
74     return false;
75   }
76 
77 private:
78   bool handleBinaryOp(clang::BinaryOperatorKind Opcode, const Expr *const LHS,
79                       const Expr *const RHS) const {
80     // <char_expr> <op> <char_expr> (c++ integer promotion rules make this an
81     // int), e.g.
82     //    'a' + c
83     if (isCharTyped(LHS) && isCharTyped(RHS))
84       return true;
85 
86     // <expr> & <char_valued_constant> or <expr> % <char_valued_constant>, e.g.
87     //    i & 0xff
88     if ((Opcode == BO_And || Opcode == BO_Rem) && isCharValuedConstant(RHS))
89       return true;
90 
91     // <char_expr> | <char_valued_constant>, e.g.
92     //    c | 0x80
93     if (Opcode == BO_Or && isCharTyped(LHS) && isCharValuedConstant(RHS))
94       return true;
95 
96     // <char_constant> + <likely_char_expr>, e.g.
97     //    'a' + (i % 26)
98     if (Opcode == BO_Add)
99       return isCharConstant(LHS) && isLikelyCharExpression(RHS);
100 
101     return false;
102   }
103 
104   // Returns true if `E` is an character constant.
105   bool isCharConstant(const Expr *E) const {
106     return isCharTyped(E) && isCharValuedConstant(E);
107   };
108 
109   // Returns true if `E` is an integer constant which fits in `CharType`.
110   bool isCharValuedConstant(const Expr *E) const {
111     if (E->isInstantiationDependent())
112       return false;
113     Expr::EvalResult EvalResult;
114     if (!E->EvaluateAsInt(EvalResult, Ctx, Expr::SE_AllowSideEffects))
115       return false;
116     return EvalResult.Val.getInt().getActiveBits() <= Ctx.getTypeSize(CharType);
117   };
118 
119   // Returns true if `E` has the right character type.
120   bool isCharTyped(const Expr *E) const {
121     return E->getType().getCanonicalType().getTypePtr() ==
122            CharType.getTypePtr();
123   };
124 
125   const QualType CharType;
126   const ASTContext &Ctx;
127 };
128 
129 void StringIntegerAssignmentCheck::check(
130     const MatchFinder::MatchResult &Result) {
131   const auto *Argument = Result.Nodes.getNodeAs<Expr>("expr");
132   const auto CharType =
133       Result.Nodes.getNodeAs<QualType>("type")->getCanonicalType();
134   SourceLocation Loc = Argument->getBeginLoc();
135 
136   // Try to detect a few common expressions to reduce false positives.
137   if (CharExpressionDetector(CharType, *Result.Context)
138           .isLikelyCharExpression(Argument))
139     return;
140 
141   auto Diag =
142       diag(Loc, "an integer is interpreted as a character code when assigning "
143                 "it to a string; if this is intended, cast the integer to the "
144                 "appropriate character type; if you want a string "
145                 "representation, use the appropriate conversion facility");
146 
147   if (Loc.isMacroID())
148     return;
149 
150   bool IsWideCharType = CharType->isWideCharType();
151   if (!CharType->isCharType() && !IsWideCharType)
152     return;
153   bool IsOneDigit = false;
154   bool IsLiteral = false;
155   if (const auto *Literal = dyn_cast<IntegerLiteral>(Argument)) {
156     IsOneDigit = Literal->getValue().getLimitedValue() < 10;
157     IsLiteral = true;
158   }
159 
160   SourceLocation EndLoc = Lexer::getLocForEndOfToken(
161       Argument->getEndLoc(), 0, *Result.SourceManager, getLangOpts());
162   if (IsOneDigit) {
163     Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "L'" : "'")
164          << FixItHint::CreateInsertion(EndLoc, "'");
165     return;
166   }
167   if (IsLiteral) {
168     Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "L\"" : "\"")
169          << FixItHint::CreateInsertion(EndLoc, "\"");
170     return;
171   }
172 
173   if (getLangOpts().CPlusPlus11) {
174     Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "std::to_wstring("
175                                                            : "std::to_string(")
176          << FixItHint::CreateInsertion(EndLoc, ")");
177   }
178 }
179 
180 } // namespace bugprone
181 } // namespace tidy
182 } // namespace clang
183