xref: /llvm-project/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp (revision 48d0ef1a07993139e1acf65910704255443103a5)
1 //===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "RawStringLiteralCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Basic/LangOptions.h"
13 #include "clang/Basic/SourceManager.h"
14 #include "clang/Lex/Lexer.h"
15 #include "llvm/ADT/StringRef.h"
16 #include <optional>
17 
18 using namespace clang::ast_matchers;
19 
20 namespace clang::tidy::modernize {
21 
22 namespace {
23 
24 bool containsEscapes(StringRef HayStack, StringRef Escapes) {
25   size_t BackSlash = HayStack.find('\\');
26   if (BackSlash == StringRef::npos)
27     return false;
28 
29   while (BackSlash != StringRef::npos) {
30     if (!Escapes.contains(HayStack[BackSlash + 1]))
31       return false;
32     BackSlash = HayStack.find('\\', BackSlash + 2);
33   }
34 
35   return true;
36 }
37 
38 bool isRawStringLiteral(StringRef Text) {
39   // Already a raw string literal if R comes before ".
40   const size_t QuotePos = Text.find('"');
41   assert(QuotePos != StringRef::npos);
42   return (QuotePos > 0) && (Text[QuotePos - 1] == 'R');
43 }
44 
45 bool containsEscapedCharacters(const MatchFinder::MatchResult &Result,
46                                const StringLiteral *Literal,
47                                const CharsBitSet &DisallowedChars) {
48   // FIXME: Handle L"", u8"", u"" and U"" literals.
49   if (!Literal->isOrdinary())
50     return false;
51 
52   for (const unsigned char C : Literal->getBytes())
53     if (DisallowedChars.test(C))
54       return false;
55 
56   CharSourceRange CharRange = Lexer::makeFileCharRange(
57       CharSourceRange::getTokenRange(Literal->getSourceRange()),
58       *Result.SourceManager, Result.Context->getLangOpts());
59   StringRef Text = Lexer::getSourceText(CharRange, *Result.SourceManager,
60                                         Result.Context->getLangOpts());
61   if (Text.empty() || isRawStringLiteral(Text))
62     return false;
63 
64   return containsEscapes(Text, R"('\"?x01)");
65 }
66 
67 bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) {
68   return Bytes.find(Delimiter.empty()
69                         ? std::string(R"lit()")lit")
70                         : (")" + Delimiter + R"(")")) != StringRef::npos;
71 }
72 
73 } // namespace
74 
75 RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name,
76                                              ClangTidyContext *Context)
77     : ClangTidyCheck(Name, Context),
78       DelimiterStem(Options.get("DelimiterStem", "lit")),
79       ReplaceShorterLiterals(Options.get("ReplaceShorterLiterals", false)) {
80   // Non-printing characters are disallowed:
81   // \007 = \a bell
82   // \010 = \b backspace
83   // \011 = \t horizontal tab
84   // \012 = \n new line
85   // \013 = \v vertical tab
86   // \014 = \f form feed
87   // \015 = \r carriage return
88   // \177 = delete
89   for (const unsigned char C : StringRef("\000\001\002\003\004\005\006\a"
90                                          "\b\t\n\v\f\r\016\017"
91                                          "\020\021\022\023\024\025\026\027"
92                                          "\030\031\032\033\034\035\036\037"
93                                          "\177",
94                                          33))
95     DisallowedChars.set(C);
96 
97   // Non-ASCII are disallowed too.
98   for (unsigned int C = 0x80U; C <= 0xFFU; ++C)
99     DisallowedChars.set(static_cast<unsigned char>(C));
100 }
101 
102 void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
103   Options.store(Opts, "DelimiterStem", DelimiterStem);
104   Options.store(Opts, "ReplaceShorterLiterals", ReplaceShorterLiterals);
105 }
106 
107 void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) {
108   Finder->addMatcher(
109       stringLiteral(unless(hasParent(predefinedExpr()))).bind("lit"), this);
110 }
111 
112 static std::optional<StringRef>
113 createUserDefinedSuffix(const StringLiteral *Literal, const SourceManager &SM,
114                         const LangOptions &LangOpts) {
115   const CharSourceRange TokenRange =
116       CharSourceRange::getTokenRange(Literal->getSourceRange());
117   Token T;
118   if (Lexer::getRawToken(Literal->getBeginLoc(), T, SM, LangOpts))
119     return std::nullopt;
120   const CharSourceRange CharRange =
121       Lexer::makeFileCharRange(TokenRange, SM, LangOpts);
122   if (T.hasUDSuffix()) {
123     StringRef Text = Lexer::getSourceText(CharRange, SM, LangOpts);
124     const size_t UDSuffixPos = Text.find_last_of('"');
125     if (UDSuffixPos == StringRef::npos)
126       return std::nullopt;
127     return Text.slice(UDSuffixPos + 1, Text.size());
128   }
129   return std::nullopt;
130 }
131 
132 static std::string createRawStringLiteral(const StringLiteral *Literal,
133                                           const std::string &DelimiterStem,
134                                           const SourceManager &SM,
135                                           const LangOptions &LangOpts) {
136   const StringRef Bytes = Literal->getBytes();
137   std::string Delimiter;
138   for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) {
139     Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I);
140   }
141 
142   std::optional<StringRef> UserDefinedSuffix =
143       createUserDefinedSuffix(Literal, SM, LangOpts);
144 
145   if (Delimiter.empty())
146     return (R"(R"()" + Bytes + R"lit()")lit" + UserDefinedSuffix.value_or(""))
147         .str();
148 
149   return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")" +
150           UserDefinedSuffix.value_or(""))
151       .str();
152 }
153 
154 static bool compareStringLength(StringRef Replacement,
155                                 const StringLiteral *Literal,
156                                 const SourceManager &SM,
157                                 const LangOptions &LangOpts) {
158   return Replacement.size() <=
159          Lexer::MeasureTokenLength(Literal->getBeginLoc(), SM, LangOpts);
160 }
161 
162 void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) {
163   const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("lit");
164   if (Literal->getBeginLoc().isMacroID())
165     return;
166   const SourceManager &SM = *Result.SourceManager;
167   const LangOptions &LangOpts = getLangOpts();
168   if (containsEscapedCharacters(Result, Literal, DisallowedChars)) {
169     const std::string Replacement =
170         createRawStringLiteral(Literal, DelimiterStem, SM, LangOpts);
171     if (ReplaceShorterLiterals ||
172         compareStringLength(Replacement, Literal, SM, LangOpts)) {
173       diag(Literal->getBeginLoc(),
174            "escaped string literal can be written as a raw string literal")
175           << FixItHint::CreateReplacement(Literal->getSourceRange(),
176                                           Replacement);
177     }
178   }
179 }
180 
181 } // namespace clang::tidy::modernize
182