xref: /llvm-project/clang-tools-extra/clang-tidy/bugprone/StringIntegerAssignmentCheck.cpp (revision d8e78022c63b9fc9af6260eef667231c929e9cee)
1 //===--- StringIntegerAssignmentCheck.cpp - clang-tidy---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "StringIntegerAssignmentCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Lex/Lexer.h"
13 
14 using namespace clang::ast_matchers;
15 
16 namespace clang {
17 namespace tidy {
18 namespace bugprone {
19 
20 void StringIntegerAssignmentCheck::registerMatchers(MatchFinder *Finder) {
21   if (!getLangOpts().CPlusPlus)
22     return;
23   Finder->addMatcher(
24       cxxOperatorCallExpr(
25           anyOf(hasOverloadedOperatorName("="),
26                 hasOverloadedOperatorName("+=")),
27           callee(cxxMethodDecl(ofClass(classTemplateSpecializationDecl(
28               hasName("::std::basic_string"),
29               hasTemplateArgument(0, refersToType(hasCanonicalType(
30                                          qualType().bind("type")))))))),
31           hasArgument(
32               1,
33               ignoringImpCasts(
34                   expr(hasType(isInteger()), unless(hasType(isAnyCharacter())),
35                        // Ignore calls to tolower/toupper (see PR27723).
36                        unless(callExpr(callee(functionDecl(
37                            hasAnyName("tolower", "std::tolower", "toupper",
38                                       "std::toupper"))))),
39                        // Do not warn if assigning e.g. `CodePoint` to
40                        // `basic_string<CodePoint>`
41                        unless(hasType(qualType(
42                            hasCanonicalType(equalsBoundNode("type"))))))
43                       .bind("expr"))),
44           unless(isInTemplateInstantiation())),
45       this);
46 }
47 
48 class CharExpressionDetector {
49 public:
50   CharExpressionDetector(QualType CharType, const ASTContext &Ctx)
51       : CharType(CharType), Ctx(Ctx) {}
52 
53   bool isLikelyCharExpression(const Expr *E) const {
54     if (isCharTyped(E))
55       return true;
56 
57     if (const auto *BinOp = dyn_cast<BinaryOperator>(E)) {
58       const auto *LHS = BinOp->getLHS()->IgnoreParenImpCasts();
59       const auto *RHS = BinOp->getRHS()->IgnoreParenImpCasts();
60       // Handle both directions, e.g. `'a' + (i % 26)` and `(i % 26) + 'a'`.
61       if (BinOp->isAdditiveOp() || BinOp->isBitwiseOp())
62         return handleBinaryOp(BinOp->getOpcode(), LHS, RHS) ||
63                handleBinaryOp(BinOp->getOpcode(), RHS, LHS);
64       // Except in the case of '%'.
65       if (BinOp->getOpcode() == BO_Rem)
66         return handleBinaryOp(BinOp->getOpcode(), LHS, RHS);
67       return false;
68     }
69 
70     // Ternary where at least one branch is a likely char expression, e.g.
71     //    i < 265 ? i : ' '
72     if (const auto *CondOp = dyn_cast<AbstractConditionalOperator>(E))
73       return isLikelyCharExpression(
74                  CondOp->getFalseExpr()->IgnoreParenImpCasts()) ||
75              isLikelyCharExpression(
76                  CondOp->getTrueExpr()->IgnoreParenImpCasts());
77     return false;
78   }
79 
80 private:
81   bool handleBinaryOp(clang::BinaryOperatorKind Opcode, const Expr *const LHS,
82                       const Expr *const RHS) const {
83     // <char_expr> <op> <char_expr> (c++ integer promotion rules make this an
84     // int), e.g.
85     //    'a' + c
86     if (isCharTyped(LHS) && isCharTyped(RHS))
87       return true;
88 
89     // <expr> & <char_valued_constant> or <expr> % <char_valued_constant>, e.g.
90     //    i & 0xff
91     if ((Opcode == BO_And || Opcode == BO_Rem) && isCharValuedConstant(RHS))
92       return true;
93 
94     // <char_expr> | <char_valued_constant>, e.g.
95     //    c | 0x80
96     if (Opcode == BO_Or && isCharTyped(LHS) && isCharValuedConstant(RHS))
97       return true;
98 
99     // <char_constant> + <likely_char_expr>, e.g.
100     //    'a' + (i % 26)
101     if (Opcode == BO_Add)
102       return isCharConstant(LHS) && isLikelyCharExpression(RHS);
103 
104     return false;
105   }
106 
107   // Returns true if `E` is an character constant.
108   bool isCharConstant(const Expr *E) const {
109     return isCharTyped(E) && isCharValuedConstant(E);
110   };
111 
112   // Returns true if `E` is an integer constant which fits in `CharType`.
113   bool isCharValuedConstant(const Expr *E) const {
114     if (E->isInstantiationDependent())
115       return false;
116     Expr::EvalResult EvalResult;
117     if (!E->EvaluateAsInt(EvalResult, Ctx, Expr::SE_AllowSideEffects))
118       return false;
119     return EvalResult.Val.getInt().getActiveBits() <= Ctx.getTypeSize(CharType);
120   };
121 
122   // Returns true if `E` has the right character type.
123   bool isCharTyped(const Expr *E) const {
124     return E->getType().getCanonicalType().getTypePtr() ==
125            CharType.getTypePtr();
126   };
127 
128   const QualType CharType;
129   const ASTContext &Ctx;
130 };
131 
132 void StringIntegerAssignmentCheck::check(
133     const MatchFinder::MatchResult &Result) {
134   const auto *Argument = Result.Nodes.getNodeAs<Expr>("expr");
135   const auto CharType =
136       Result.Nodes.getNodeAs<QualType>("type")->getCanonicalType();
137   SourceLocation Loc = Argument->getBeginLoc();
138 
139   // Try to detect a few common expressions to reduce false positives.
140   if (CharExpressionDetector(CharType, *Result.Context)
141           .isLikelyCharExpression(Argument))
142     return;
143 
144   auto Diag =
145       diag(Loc, "an integer is interpreted as a character code when assigning "
146                 "it to a string; if this is intended, cast the integer to the "
147                 "appropriate character type; if you want a string "
148                 "representation, use the appropriate conversion facility");
149 
150   if (Loc.isMacroID())
151     return;
152 
153   bool IsWideCharType = CharType->isWideCharType();
154   if (!CharType->isCharType() && !IsWideCharType)
155     return;
156   bool IsOneDigit = false;
157   bool IsLiteral = false;
158   if (const auto *Literal = dyn_cast<IntegerLiteral>(Argument)) {
159     IsOneDigit = Literal->getValue().getLimitedValue() < 10;
160     IsLiteral = true;
161   }
162 
163   SourceLocation EndLoc = Lexer::getLocForEndOfToken(
164       Argument->getEndLoc(), 0, *Result.SourceManager, getLangOpts());
165   if (IsOneDigit) {
166     Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "L'" : "'")
167          << FixItHint::CreateInsertion(EndLoc, "'");
168     return;
169   }
170   if (IsLiteral) {
171     Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "L\"" : "\"")
172          << FixItHint::CreateInsertion(EndLoc, "\"");
173     return;
174   }
175 
176   if (getLangOpts().CPlusPlus11) {
177     Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "std::to_wstring("
178                                                            : "std::to_string(")
179          << FixItHint::CreateInsertion(EndLoc, ")");
180   }
181 }
182 
183 } // namespace bugprone
184 } // namespace tidy
185 } // namespace clang
186