1 //===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "RawStringLiteralCheck.h" 10 #include "clang/AST/ASTContext.h" 11 #include "clang/ASTMatchers/ASTMatchFinder.h" 12 #include "clang/Basic/LangOptions.h" 13 #include "clang/Basic/SourceManager.h" 14 #include "clang/Lex/Lexer.h" 15 #include "llvm/ADT/StringRef.h" 16 #include <optional> 17 18 using namespace clang::ast_matchers; 19 20 namespace clang::tidy::modernize { 21 22 namespace { 23 24 bool containsEscapes(StringRef HayStack, StringRef Escapes) { 25 size_t BackSlash = HayStack.find('\\'); 26 if (BackSlash == StringRef::npos) 27 return false; 28 29 while (BackSlash != StringRef::npos) { 30 if (!Escapes.contains(HayStack[BackSlash + 1])) 31 return false; 32 BackSlash = HayStack.find('\\', BackSlash + 2); 33 } 34 35 return true; 36 } 37 38 bool isRawStringLiteral(StringRef Text) { 39 // Already a raw string literal if R comes before ". 40 const size_t QuotePos = Text.find('"'); 41 assert(QuotePos != StringRef::npos); 42 return (QuotePos > 0) && (Text[QuotePos - 1] == 'R'); 43 } 44 45 bool containsEscapedCharacters(const MatchFinder::MatchResult &Result, 46 const StringLiteral *Literal, 47 const CharsBitSet &DisallowedChars) { 48 // FIXME: Handle L"", u8"", u"" and U"" literals. 49 if (!Literal->isOrdinary()) 50 return false; 51 52 for (const unsigned char C : Literal->getBytes()) 53 if (DisallowedChars.test(C)) 54 return false; 55 56 CharSourceRange CharRange = Lexer::makeFileCharRange( 57 CharSourceRange::getTokenRange(Literal->getSourceRange()), 58 *Result.SourceManager, Result.Context->getLangOpts()); 59 StringRef Text = Lexer::getSourceText(CharRange, *Result.SourceManager, 60 Result.Context->getLangOpts()); 61 if (Text.empty() || isRawStringLiteral(Text)) 62 return false; 63 64 return containsEscapes(Text, R"('\"?x01)"); 65 } 66 67 bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) { 68 return Bytes.find(Delimiter.empty() 69 ? std::string(R"lit()")lit") 70 : (")" + Delimiter + R"(")")) != StringRef::npos; 71 } 72 73 } // namespace 74 75 RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name, 76 ClangTidyContext *Context) 77 : ClangTidyCheck(Name, Context), 78 DelimiterStem(Options.get("DelimiterStem", "lit")), 79 ReplaceShorterLiterals(Options.get("ReplaceShorterLiterals", false)) { 80 // Non-printing characters are disallowed: 81 // \007 = \a bell 82 // \010 = \b backspace 83 // \011 = \t horizontal tab 84 // \012 = \n new line 85 // \013 = \v vertical tab 86 // \014 = \f form feed 87 // \015 = \r carriage return 88 // \177 = delete 89 for (const unsigned char C : StringRef("\000\001\002\003\004\005\006\a" 90 "\b\t\n\v\f\r\016\017" 91 "\020\021\022\023\024\025\026\027" 92 "\030\031\032\033\034\035\036\037" 93 "\177", 94 33)) 95 DisallowedChars.set(C); 96 97 // Non-ASCII are disallowed too. 98 for (unsigned int C = 0x80U; C <= 0xFFU; ++C) 99 DisallowedChars.set(static_cast<unsigned char>(C)); 100 } 101 102 void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { 103 Options.store(Opts, "DelimiterStem", DelimiterStem); 104 Options.store(Opts, "ReplaceShorterLiterals", ReplaceShorterLiterals); 105 } 106 107 void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) { 108 Finder->addMatcher( 109 stringLiteral(unless(hasParent(predefinedExpr()))).bind("lit"), this); 110 } 111 112 static std::optional<StringRef> 113 createUserDefinedSuffix(const StringLiteral *Literal, const SourceManager &SM, 114 const LangOptions &LangOpts) { 115 const CharSourceRange TokenRange = 116 CharSourceRange::getTokenRange(Literal->getSourceRange()); 117 Token T; 118 if (Lexer::getRawToken(Literal->getBeginLoc(), T, SM, LangOpts)) 119 return std::nullopt; 120 const CharSourceRange CharRange = 121 Lexer::makeFileCharRange(TokenRange, SM, LangOpts); 122 if (T.hasUDSuffix()) { 123 StringRef Text = Lexer::getSourceText(CharRange, SM, LangOpts); 124 const size_t UDSuffixPos = Text.find_last_of('"'); 125 if (UDSuffixPos == StringRef::npos) 126 return std::nullopt; 127 return Text.slice(UDSuffixPos + 1, Text.size()); 128 } 129 return std::nullopt; 130 } 131 132 static std::string createRawStringLiteral(const StringLiteral *Literal, 133 const std::string &DelimiterStem, 134 const SourceManager &SM, 135 const LangOptions &LangOpts) { 136 const StringRef Bytes = Literal->getBytes(); 137 std::string Delimiter; 138 for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) { 139 Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I); 140 } 141 142 std::optional<StringRef> UserDefinedSuffix = 143 createUserDefinedSuffix(Literal, SM, LangOpts); 144 145 if (Delimiter.empty()) 146 return (R"(R"()" + Bytes + R"lit()")lit" + UserDefinedSuffix.value_or("")) 147 .str(); 148 149 return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")" + 150 UserDefinedSuffix.value_or("")) 151 .str(); 152 } 153 154 static bool compareStringLength(StringRef Replacement, 155 const StringLiteral *Literal, 156 const SourceManager &SM, 157 const LangOptions &LangOpts) { 158 return Replacement.size() <= 159 Lexer::MeasureTokenLength(Literal->getBeginLoc(), SM, LangOpts); 160 } 161 162 void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) { 163 const auto *Literal = Result.Nodes.getNodeAs<StringLiteral>("lit"); 164 if (Literal->getBeginLoc().isMacroID()) 165 return; 166 const SourceManager &SM = *Result.SourceManager; 167 const LangOptions &LangOpts = getLangOpts(); 168 if (containsEscapedCharacters(Result, Literal, DisallowedChars)) { 169 const std::string Replacement = 170 createRawStringLiteral(Literal, DelimiterStem, SM, LangOpts); 171 if (ReplaceShorterLiterals || 172 compareStringLength(Replacement, Literal, SM, LangOpts)) { 173 diag(Literal->getBeginLoc(), 174 "escaped string literal can be written as a raw string literal") 175 << FixItHint::CreateReplacement(Literal->getSourceRange(), 176 Replacement); 177 } 178 } 179 } 180 181 } // namespace clang::tidy::modernize 182