xref: /llvm-project/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp (revision a199fb1229987d0885a4367e3a439db336069156)
183f875dcSMike Crowe //===--- FormatStringConverter.cpp - clang-tidy----------------------------===//
283f875dcSMike Crowe //
383f875dcSMike Crowe // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
483f875dcSMike Crowe // See https://llvm.org/LICENSE.txt for license information.
583f875dcSMike Crowe // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
683f875dcSMike Crowe //
783f875dcSMike Crowe //===----------------------------------------------------------------------===//
883f875dcSMike Crowe ///
983f875dcSMike Crowe /// \file
1083f875dcSMike Crowe /// Implementation of the FormatStringConverter class which is used to convert
1183f875dcSMike Crowe /// printf format strings to C++ std::formatter format strings.
1283f875dcSMike Crowe ///
1383f875dcSMike Crowe //===----------------------------------------------------------------------===//
1483f875dcSMike Crowe 
1583f875dcSMike Crowe #include "FormatStringConverter.h"
1683f875dcSMike Crowe #include "../utils/FixItHintUtils.h"
1783f875dcSMike Crowe #include "clang/AST/Expr.h"
1883f875dcSMike Crowe #include "clang/ASTMatchers/ASTMatchFinder.h"
1983f875dcSMike Crowe #include "clang/Basic/LangOptions.h"
2083f875dcSMike Crowe #include "clang/Lex/Lexer.h"
21*a199fb12SMike Crowe #include "clang/Lex/Preprocessor.h"
2283f875dcSMike Crowe #include "clang/Tooling/FixIt.h"
2383f875dcSMike Crowe #include "llvm/ADT/StringExtras.h"
2483f875dcSMike Crowe #include "llvm/Support/Debug.h"
2583f875dcSMike Crowe 
2683f875dcSMike Crowe using namespace clang::ast_matchers;
2783f875dcSMike Crowe using namespace clang::analyze_printf;
2883f875dcSMike Crowe 
2983f875dcSMike Crowe namespace clang::tidy::utils {
3083f875dcSMike Crowe using clang::analyze_format_string::ConversionSpecifier;
3183f875dcSMike Crowe 
3283f875dcSMike Crowe /// Is the passed type the actual "char" type, whether that be signed or
3383f875dcSMike Crowe /// unsigned, rather than explicit signed char or unsigned char types.
3483f875dcSMike Crowe static bool isRealCharType(const clang::QualType &Ty) {
3583f875dcSMike Crowe   using namespace clang;
3683f875dcSMike Crowe   const Type *DesugaredType = Ty->getUnqualifiedDesugaredType();
3783f875dcSMike Crowe   if (const auto *BT = llvm::dyn_cast<BuiltinType>(DesugaredType))
3883f875dcSMike Crowe     return (BT->getKind() == BuiltinType::Char_U ||
3983f875dcSMike Crowe             BT->getKind() == BuiltinType::Char_S);
4083f875dcSMike Crowe   return false;
4183f875dcSMike Crowe }
4283f875dcSMike Crowe 
4383f875dcSMike Crowe /// If possible, return the text name of the signed type that corresponds to the
4483f875dcSMike Crowe /// passed integer type. If the passed type is already signed then its name is
4583f875dcSMike Crowe /// just returned. Only supports BuiltinTypes.
4683f875dcSMike Crowe static std::optional<std::string>
4783f875dcSMike Crowe getCorrespondingSignedTypeName(const clang::QualType &QT) {
4883f875dcSMike Crowe   using namespace clang;
4983f875dcSMike Crowe   const auto UQT = QT.getUnqualifiedType();
5083f875dcSMike Crowe   if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) {
5183f875dcSMike Crowe     switch (BT->getKind()) {
5283f875dcSMike Crowe     case BuiltinType::UChar:
5383f875dcSMike Crowe     case BuiltinType::Char_U:
5483f875dcSMike Crowe     case BuiltinType::SChar:
5583f875dcSMike Crowe     case BuiltinType::Char_S:
5683f875dcSMike Crowe       return "signed char";
5783f875dcSMike Crowe     case BuiltinType::UShort:
5883f875dcSMike Crowe     case BuiltinType::Short:
5983f875dcSMike Crowe       return "short";
6083f875dcSMike Crowe     case BuiltinType::UInt:
6183f875dcSMike Crowe     case BuiltinType::Int:
6283f875dcSMike Crowe       return "int";
6383f875dcSMike Crowe     case BuiltinType::ULong:
6483f875dcSMike Crowe     case BuiltinType::Long:
6583f875dcSMike Crowe       return "long";
6683f875dcSMike Crowe     case BuiltinType::ULongLong:
6783f875dcSMike Crowe     case BuiltinType::LongLong:
6883f875dcSMike Crowe       return "long long";
6983f875dcSMike Crowe     default:
7083f875dcSMike Crowe       llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '"
7183f875dcSMike Crowe                    << QT.getAsString() << "'\n";
7283f875dcSMike Crowe       return std::nullopt;
7383f875dcSMike Crowe     }
7483f875dcSMike Crowe   }
7583f875dcSMike Crowe 
7683f875dcSMike Crowe   // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
7783f875dcSMike Crowe   // if the argument type does.
7883f875dcSMike Crowe   const std::string TypeName = UQT.getAsString();
7983f875dcSMike Crowe   StringRef SimplifiedTypeName{TypeName};
8083f875dcSMike Crowe   const bool InStd = SimplifiedTypeName.consume_front("std::");
8183f875dcSMike Crowe   const StringRef Prefix = InStd ? "std::" : "";
8283f875dcSMike Crowe 
8383f875dcSMike Crowe   if (SimplifiedTypeName.starts_with("uint") &&
8483f875dcSMike Crowe       SimplifiedTypeName.ends_with("_t"))
8583f875dcSMike Crowe     return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str();
8683f875dcSMike Crowe 
8783f875dcSMike Crowe   if (SimplifiedTypeName == "size_t")
8883f875dcSMike Crowe     return (Twine(Prefix) + "ssize_t").str();
8983f875dcSMike Crowe 
9083f875dcSMike Crowe   llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '"
9183f875dcSMike Crowe                << UQT.getAsString() << "'\n";
9283f875dcSMike Crowe   return std::nullopt;
9383f875dcSMike Crowe }
9483f875dcSMike Crowe 
9583f875dcSMike Crowe /// If possible, return the text name of the unsigned type that corresponds to
9683f875dcSMike Crowe /// the passed integer type. If the passed type is already unsigned then its
9783f875dcSMike Crowe /// name is just returned. Only supports BuiltinTypes.
9883f875dcSMike Crowe static std::optional<std::string>
9983f875dcSMike Crowe getCorrespondingUnsignedTypeName(const clang::QualType &QT) {
10083f875dcSMike Crowe   using namespace clang;
10183f875dcSMike Crowe   const auto UQT = QT.getUnqualifiedType();
10283f875dcSMike Crowe   if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) {
10383f875dcSMike Crowe     switch (BT->getKind()) {
10483f875dcSMike Crowe     case BuiltinType::SChar:
10583f875dcSMike Crowe     case BuiltinType::Char_S:
10683f875dcSMike Crowe     case BuiltinType::UChar:
10783f875dcSMike Crowe     case BuiltinType::Char_U:
10883f875dcSMike Crowe       return "unsigned char";
10983f875dcSMike Crowe     case BuiltinType::Short:
11083f875dcSMike Crowe     case BuiltinType::UShort:
11183f875dcSMike Crowe       return "unsigned short";
11283f875dcSMike Crowe     case BuiltinType::Int:
11383f875dcSMike Crowe     case BuiltinType::UInt:
11483f875dcSMike Crowe       return "unsigned int";
11583f875dcSMike Crowe     case BuiltinType::Long:
11683f875dcSMike Crowe     case BuiltinType::ULong:
11783f875dcSMike Crowe       return "unsigned long";
11883f875dcSMike Crowe     case BuiltinType::LongLong:
11983f875dcSMike Crowe     case BuiltinType::ULongLong:
12083f875dcSMike Crowe       return "unsigned long long";
12183f875dcSMike Crowe     default:
12283f875dcSMike Crowe       llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '"
12383f875dcSMike Crowe                    << UQT.getAsString() << "'\n";
12483f875dcSMike Crowe       return std::nullopt;
12583f875dcSMike Crowe     }
12683f875dcSMike Crowe   }
12783f875dcSMike Crowe 
12883f875dcSMike Crowe   // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
12983f875dcSMike Crowe   // if the argument type does.
13083f875dcSMike Crowe   const std::string TypeName = UQT.getAsString();
13183f875dcSMike Crowe   StringRef SimplifiedTypeName{TypeName};
13283f875dcSMike Crowe   const bool InStd = SimplifiedTypeName.consume_front("std::");
13383f875dcSMike Crowe   const StringRef Prefix = InStd ? "std::" : "";
13483f875dcSMike Crowe 
13583f875dcSMike Crowe   if (SimplifiedTypeName.starts_with("int") &&
13683f875dcSMike Crowe       SimplifiedTypeName.ends_with("_t"))
13783f875dcSMike Crowe     return (Twine(Prefix) + "u" + SimplifiedTypeName).str();
13883f875dcSMike Crowe 
13983f875dcSMike Crowe   if (SimplifiedTypeName == "ssize_t")
14083f875dcSMike Crowe     return (Twine(Prefix) + "size_t").str();
14183f875dcSMike Crowe   if (SimplifiedTypeName == "ptrdiff_t")
14283f875dcSMike Crowe     return (Twine(Prefix) + "size_t").str();
14383f875dcSMike Crowe 
14483f875dcSMike Crowe   llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '"
14583f875dcSMike Crowe                << UQT.getAsString() << "'\n";
14683f875dcSMike Crowe   return std::nullopt;
14783f875dcSMike Crowe }
14883f875dcSMike Crowe 
14983f875dcSMike Crowe static std::optional<std::string>
15083f875dcSMike Crowe castTypeForArgument(ConversionSpecifier::Kind ArgKind,
15183f875dcSMike Crowe                     const clang::QualType &QT) {
15283f875dcSMike Crowe   if (ArgKind == ConversionSpecifier::Kind::uArg)
15383f875dcSMike Crowe     return getCorrespondingUnsignedTypeName(QT);
15483f875dcSMike Crowe   return getCorrespondingSignedTypeName(QT);
15583f875dcSMike Crowe }
15683f875dcSMike Crowe 
15783f875dcSMike Crowe static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind,
15883f875dcSMike Crowe                                  const clang::QualType &ArgType) {
15983f875dcSMike Crowe   if (const auto *BT = llvm::dyn_cast<BuiltinType>(ArgType)) {
16083f875dcSMike Crowe     // Unadorned char never matches any expected signedness since it
16183f875dcSMike Crowe     // could be signed or unsigned.
16283f875dcSMike Crowe     const auto ArgTypeKind = BT->getKind();
16383f875dcSMike Crowe     if (ArgTypeKind == BuiltinType::Char_U ||
16483f875dcSMike Crowe         ArgTypeKind == BuiltinType::Char_S)
16583f875dcSMike Crowe       return false;
16683f875dcSMike Crowe   }
16783f875dcSMike Crowe 
16883f875dcSMike Crowe   if (ArgKind == ConversionSpecifier::Kind::uArg)
16983f875dcSMike Crowe     return ArgType->isUnsignedIntegerType();
17083f875dcSMike Crowe   return ArgType->isSignedIntegerType();
17183f875dcSMike Crowe }
17283f875dcSMike Crowe 
17383f875dcSMike Crowe namespace {
17483f875dcSMike Crowe AST_MATCHER(clang::QualType, isRealChar) {
17583f875dcSMike Crowe   return clang::tidy::utils::isRealCharType(Node);
17683f875dcSMike Crowe }
17783f875dcSMike Crowe } // namespace
17883f875dcSMike Crowe 
17983f875dcSMike Crowe static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) {
18083f875dcSMike Crowe   /// For printf-style functions, the signedness of the type printed is
18183f875dcSMike Crowe   /// indicated by the corresponding type in the format string.
18283f875dcSMike Crowe   /// std::print will determine the signedness from the type of the
18383f875dcSMike Crowe   /// argument. This means that it is necessary to generate a cast in
18483f875dcSMike Crowe   /// StrictMode to ensure that the exact behaviour is maintained.
18583f875dcSMike Crowe   /// However, for templated functions like absl::PrintF and
18683f875dcSMike Crowe   /// fmt::printf, the signedness of the type printed is also taken from
18783f875dcSMike Crowe   /// the actual argument like std::print, so such casts are never
18883f875dcSMike Crowe   /// necessary. printf-style functions are variadic, whereas templated
18983f875dcSMike Crowe   /// ones aren't, so we can use that to distinguish between the two
19083f875dcSMike Crowe   /// cases.
19183f875dcSMike Crowe   if (StrictMode) {
19283f875dcSMike Crowe     const FunctionDecl *FuncDecl = Call->getDirectCallee();
19383f875dcSMike Crowe     assert(FuncDecl);
19483f875dcSMike Crowe     return FuncDecl->isVariadic();
19583f875dcSMike Crowe   }
19683f875dcSMike Crowe   return false;
19783f875dcSMike Crowe }
19883f875dcSMike Crowe 
199*a199fb12SMike Crowe FormatStringConverter::FormatStringConverter(
200*a199fb12SMike Crowe     ASTContext *ContextIn, const CallExpr *Call, unsigned FormatArgOffset,
201*a199fb12SMike Crowe     const Configuration ConfigIn, const LangOptions &LO, SourceManager &SM,
202*a199fb12SMike Crowe     Preprocessor &PP)
203af79372dSMike Crowe     : Context(ContextIn), Config(ConfigIn),
204af79372dSMike Crowe       CastMismatchedIntegerTypes(
205af79372dSMike Crowe           castMismatchedIntegerTypes(Call, ConfigIn.StrictMode)),
20683f875dcSMike Crowe       Args(Call->getArgs()), NumArgs(Call->getNumArgs()),
20783f875dcSMike Crowe       ArgsOffset(FormatArgOffset + 1), LangOpts(LO) {
20883f875dcSMike Crowe   assert(ArgsOffset <= NumArgs);
20983f875dcSMike Crowe   FormatExpr = llvm::dyn_cast<StringLiteral>(
21083f875dcSMike Crowe       Args[FormatArgOffset]->IgnoreImplicitAsWritten());
211*a199fb12SMike Crowe 
2120e62d5cfSMike Crowe   if (!FormatExpr || !FormatExpr->isOrdinary()) {
2130e62d5cfSMike Crowe     // Function must have a narrow string literal as its first argument.
2140e62d5cfSMike Crowe     conversionNotPossible("first argument is not a narrow string literal");
2150e62d5cfSMike Crowe     return;
2160e62d5cfSMike Crowe   }
217*a199fb12SMike Crowe 
218*a199fb12SMike Crowe   if (const std::optional<StringRef> MaybeMacroName =
219*a199fb12SMike Crowe           formatStringContainsUnreplaceableMacro(Call, FormatExpr, SM, PP);
220*a199fb12SMike Crowe       MaybeMacroName) {
221*a199fb12SMike Crowe     conversionNotPossible(
222*a199fb12SMike Crowe         ("format string contains unreplaceable macro '" + *MaybeMacroName + "'")
223*a199fb12SMike Crowe             .str());
224*a199fb12SMike Crowe     return;
225*a199fb12SMike Crowe   }
226*a199fb12SMike Crowe 
22783f875dcSMike Crowe   PrintfFormatString = FormatExpr->getString();
22883f875dcSMike Crowe 
22983f875dcSMike Crowe   // Assume that the output will be approximately the same size as the input,
23083f875dcSMike Crowe   // but perhaps with a few escapes expanded.
23183f875dcSMike Crowe   const size_t EstimatedGrowth = 8;
23283f875dcSMike Crowe   StandardFormatString.reserve(PrintfFormatString.size() + EstimatedGrowth);
23383f875dcSMike Crowe   StandardFormatString.push_back('\"');
23483f875dcSMike Crowe 
23583f875dcSMike Crowe   const bool IsFreeBsdkPrintf = false;
23683f875dcSMike Crowe 
23783f875dcSMike Crowe   using clang::analyze_format_string::ParsePrintfString;
23883f875dcSMike Crowe   ParsePrintfString(*this, PrintfFormatString.data(),
23983f875dcSMike Crowe                     PrintfFormatString.data() + PrintfFormatString.size(),
24083f875dcSMike Crowe                     LangOpts, Context->getTargetInfo(), IsFreeBsdkPrintf);
24183f875dcSMike Crowe   finalizeFormatText();
24283f875dcSMike Crowe }
24383f875dcSMike Crowe 
244*a199fb12SMike Crowe std::optional<StringRef>
245*a199fb12SMike Crowe FormatStringConverter::formatStringContainsUnreplaceableMacro(
246*a199fb12SMike Crowe     const CallExpr *Call, const StringLiteral *FormatExpr, SourceManager &SM,
247*a199fb12SMike Crowe     Preprocessor &PP) {
248*a199fb12SMike Crowe   // If a macro invocation surrounds the entire call then we don't want that to
249*a199fb12SMike Crowe   // inhibit conversion. The whole format string will appear to come from that
250*a199fb12SMike Crowe   // macro, as will the function call.
251*a199fb12SMike Crowe   std::optional<StringRef> MaybeSurroundingMacroName;
252*a199fb12SMike Crowe   if (SourceLocation BeginCallLoc = Call->getBeginLoc();
253*a199fb12SMike Crowe       BeginCallLoc.isMacroID())
254*a199fb12SMike Crowe     MaybeSurroundingMacroName =
255*a199fb12SMike Crowe         Lexer::getImmediateMacroName(BeginCallLoc, SM, PP.getLangOpts());
256*a199fb12SMike Crowe 
257*a199fb12SMike Crowe   for (auto I = FormatExpr->tokloc_begin(), E = FormatExpr->tokloc_end();
258*a199fb12SMike Crowe        I != E; ++I) {
259*a199fb12SMike Crowe     const SourceLocation &TokenLoc = *I;
260*a199fb12SMike Crowe     if (TokenLoc.isMacroID()) {
261*a199fb12SMike Crowe       const StringRef MacroName =
262*a199fb12SMike Crowe           Lexer::getImmediateMacroName(TokenLoc, SM, PP.getLangOpts());
263*a199fb12SMike Crowe 
264*a199fb12SMike Crowe       if (MaybeSurroundingMacroName != MacroName) {
265*a199fb12SMike Crowe         // glibc uses __PRI64_PREFIX and __PRIPTR_PREFIX to define the prefixes
266*a199fb12SMike Crowe         // for types that change size so we must look for multiple prefixes.
267*a199fb12SMike Crowe         if (!MacroName.starts_with("PRI") && !MacroName.starts_with("__PRI"))
268*a199fb12SMike Crowe           return MacroName;
269*a199fb12SMike Crowe 
270*a199fb12SMike Crowe         const SourceLocation TokenSpellingLoc = SM.getSpellingLoc(TokenLoc);
271*a199fb12SMike Crowe         const OptionalFileEntryRef MaybeFileEntry =
272*a199fb12SMike Crowe             SM.getFileEntryRefForID(SM.getFileID(TokenSpellingLoc));
273*a199fb12SMike Crowe         if (!MaybeFileEntry)
274*a199fb12SMike Crowe           return MacroName;
275*a199fb12SMike Crowe 
276*a199fb12SMike Crowe         HeaderSearch &HS = PP.getHeaderSearchInfo();
277*a199fb12SMike Crowe         // Check if the file is a system header
278*a199fb12SMike Crowe         if (!isSystem(HS.getFileDirFlavor(*MaybeFileEntry)) ||
279*a199fb12SMike Crowe             llvm::sys::path::filename(MaybeFileEntry->getName()) !=
280*a199fb12SMike Crowe                 "inttypes.h")
281*a199fb12SMike Crowe           return MacroName;
282*a199fb12SMike Crowe       }
283*a199fb12SMike Crowe     }
284*a199fb12SMike Crowe   }
285*a199fb12SMike Crowe   return std::nullopt;
286*a199fb12SMike Crowe }
287*a199fb12SMike Crowe 
28883f875dcSMike Crowe void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS,
28983f875dcSMike Crowe                                           std::string &FormatSpec) {
29083f875dcSMike Crowe   ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();
29183f875dcSMike Crowe 
29283f875dcSMike Crowe   // We only care about alignment if a field width is specified
29383f875dcSMike Crowe   if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) {
29483f875dcSMike Crowe     if (ArgKind == ConversionSpecifier::sArg) {
29583f875dcSMike Crowe       // Strings are left-aligned by default with std::format, so we only
29683f875dcSMike Crowe       // need to emit an alignment if this one needs to be right aligned.
29783f875dcSMike Crowe       if (!FS.isLeftJustified())
29883f875dcSMike Crowe         FormatSpec.push_back('>');
29983f875dcSMike Crowe     } else {
30083f875dcSMike Crowe       // Numbers are right-aligned by default with std::format, so we only
30183f875dcSMike Crowe       // need to emit an alignment if this one needs to be left aligned.
30283f875dcSMike Crowe       if (FS.isLeftJustified())
30383f875dcSMike Crowe         FormatSpec.push_back('<');
30483f875dcSMike Crowe     }
30583f875dcSMike Crowe   }
30683f875dcSMike Crowe }
30783f875dcSMike Crowe 
30883f875dcSMike Crowe void FormatStringConverter::emitSign(const PrintfSpecifier &FS,
30983f875dcSMike Crowe                                      std::string &FormatSpec) {
31083f875dcSMike Crowe   const ConversionSpecifier Spec = FS.getConversionSpecifier();
31183f875dcSMike Crowe 
31283f875dcSMike Crowe   // Ignore on something that isn't numeric. For printf it's would be a
31383f875dcSMike Crowe   // compile-time warning but ignored at runtime, but for std::format it
31483f875dcSMike Crowe   // ought to be a compile-time error.
31583f875dcSMike Crowe   if (Spec.isAnyIntArg() || Spec.isDoubleArg()) {
31683f875dcSMike Crowe     // + is preferred to ' '
31783f875dcSMike Crowe     if (FS.hasPlusPrefix())
31883f875dcSMike Crowe       FormatSpec.push_back('+');
31983f875dcSMike Crowe     else if (FS.hasSpacePrefix())
32083f875dcSMike Crowe       FormatSpec.push_back(' ');
32183f875dcSMike Crowe   }
32283f875dcSMike Crowe }
32383f875dcSMike Crowe 
32483f875dcSMike Crowe void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS,
32583f875dcSMike Crowe                                                 std::string &FormatSpec) {
32683f875dcSMike Crowe   if (FS.hasAlternativeForm()) {
32783f875dcSMike Crowe     switch (FS.getConversionSpecifier().getKind()) {
32883f875dcSMike Crowe     case ConversionSpecifier::Kind::aArg:
32983f875dcSMike Crowe     case ConversionSpecifier::Kind::AArg:
33083f875dcSMike Crowe     case ConversionSpecifier::Kind::eArg:
33183f875dcSMike Crowe     case ConversionSpecifier::Kind::EArg:
33283f875dcSMike Crowe     case ConversionSpecifier::Kind::fArg:
33383f875dcSMike Crowe     case ConversionSpecifier::Kind::FArg:
33483f875dcSMike Crowe     case ConversionSpecifier::Kind::gArg:
33583f875dcSMike Crowe     case ConversionSpecifier::Kind::GArg:
33683f875dcSMike Crowe     case ConversionSpecifier::Kind::xArg:
33783f875dcSMike Crowe     case ConversionSpecifier::Kind::XArg:
33883f875dcSMike Crowe     case ConversionSpecifier::Kind::oArg:
33983f875dcSMike Crowe       FormatSpec.push_back('#');
34083f875dcSMike Crowe       break;
34183f875dcSMike Crowe     default:
34283f875dcSMike Crowe       // Alternative forms don't exist for other argument kinds
34383f875dcSMike Crowe       break;
34483f875dcSMike Crowe     }
34583f875dcSMike Crowe   }
34683f875dcSMike Crowe }
34783f875dcSMike Crowe 
34883f875dcSMike Crowe void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS,
34983f875dcSMike Crowe                                            std::string &FormatSpec) {
35083f875dcSMike Crowe   {
35183f875dcSMike Crowe     const OptionalAmount FieldWidth = FS.getFieldWidth();
35283f875dcSMike Crowe     switch (FieldWidth.getHowSpecified()) {
35383f875dcSMike Crowe     case OptionalAmount::NotSpecified:
35483f875dcSMike Crowe       break;
35583f875dcSMike Crowe     case OptionalAmount::Constant:
35683f875dcSMike Crowe       FormatSpec.append(llvm::utostr(FieldWidth.getConstantAmount()));
35783f875dcSMike Crowe       break;
35883f875dcSMike Crowe     case OptionalAmount::Arg:
35983f875dcSMike Crowe       FormatSpec.push_back('{');
36083f875dcSMike Crowe       if (FieldWidth.usesPositionalArg()) {
36183f875dcSMike Crowe         // std::format argument identifiers are zero-based, whereas printf
36283f875dcSMike Crowe         // ones are one based.
36383f875dcSMike Crowe         assert(FieldWidth.getPositionalArgIndex() > 0U);
36483f875dcSMike Crowe         FormatSpec.append(llvm::utostr(FieldWidth.getPositionalArgIndex() - 1));
36583f875dcSMike Crowe       }
36683f875dcSMike Crowe       FormatSpec.push_back('}');
36783f875dcSMike Crowe       break;
36883f875dcSMike Crowe     case OptionalAmount::Invalid:
36983f875dcSMike Crowe       break;
37083f875dcSMike Crowe     }
37183f875dcSMike Crowe   }
37283f875dcSMike Crowe }
37383f875dcSMike Crowe 
37483f875dcSMike Crowe void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS,
37583f875dcSMike Crowe                                           std::string &FormatSpec) {
37683f875dcSMike Crowe   const OptionalAmount FieldPrecision = FS.getPrecision();
37783f875dcSMike Crowe   switch (FieldPrecision.getHowSpecified()) {
37883f875dcSMike Crowe   case OptionalAmount::NotSpecified:
37983f875dcSMike Crowe     break;
38083f875dcSMike Crowe   case OptionalAmount::Constant:
38183f875dcSMike Crowe     FormatSpec.push_back('.');
38283f875dcSMike Crowe     FormatSpec.append(llvm::utostr(FieldPrecision.getConstantAmount()));
38383f875dcSMike Crowe     break;
38483f875dcSMike Crowe   case OptionalAmount::Arg:
38583f875dcSMike Crowe     FormatSpec.push_back('.');
38683f875dcSMike Crowe     FormatSpec.push_back('{');
38783f875dcSMike Crowe     if (FieldPrecision.usesPositionalArg()) {
38883f875dcSMike Crowe       // std::format argument identifiers are zero-based, whereas printf
38983f875dcSMike Crowe       // ones are one based.
39083f875dcSMike Crowe       assert(FieldPrecision.getPositionalArgIndex() > 0U);
39183f875dcSMike Crowe       FormatSpec.append(
39283f875dcSMike Crowe           llvm::utostr(FieldPrecision.getPositionalArgIndex() - 1));
39383f875dcSMike Crowe     }
39483f875dcSMike Crowe     FormatSpec.push_back('}');
39583f875dcSMike Crowe     break;
39683f875dcSMike Crowe   case OptionalAmount::Invalid:
39783f875dcSMike Crowe     break;
39883f875dcSMike Crowe   }
39983f875dcSMike Crowe }
40083f875dcSMike Crowe 
4012806cf4bSMike Crowe void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) {
4022806cf4bSMike Crowe   unsigned ArgCount = 0;
4032806cf4bSMike Crowe   const OptionalAmount FieldWidth = FS.getFieldWidth();
4042806cf4bSMike Crowe   const OptionalAmount FieldPrecision = FS.getPrecision();
4052806cf4bSMike Crowe 
4062806cf4bSMike Crowe   if (FieldWidth.getHowSpecified() == OptionalAmount::Arg &&
4072806cf4bSMike Crowe       !FieldWidth.usesPositionalArg())
4082806cf4bSMike Crowe     ++ArgCount;
4092806cf4bSMike Crowe   if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg &&
4102806cf4bSMike Crowe       !FieldPrecision.usesPositionalArg())
4112806cf4bSMike Crowe     ++ArgCount;
4122806cf4bSMike Crowe 
4132806cf4bSMike Crowe   if (ArgCount)
4142806cf4bSMike Crowe     ArgRotates.emplace_back(FS.getArgIndex() + ArgsOffset, ArgCount);
4152806cf4bSMike Crowe }
4162806cf4bSMike Crowe 
417c6207f6eSMike Crowe void FormatStringConverter::emitStringArgument(unsigned ArgIndex,
418c6207f6eSMike Crowe                                                const Expr *Arg) {
41983f875dcSMike Crowe   // If the argument is the result of a call to std::string::c_str() or
42083f875dcSMike Crowe   // data() with a return type of char then we can remove that call and
42183f875dcSMike Crowe   // pass the std::string directly. We don't want to do so if the return
42283f875dcSMike Crowe   // type is not a char pointer (though it's unlikely that such code would
42383f875dcSMike Crowe   // compile without warnings anyway.) See RedundantStringCStrCheck.
42483f875dcSMike Crowe 
42583f875dcSMike Crowe   if (!StringCStrCallExprMatcher) {
42683f875dcSMike Crowe     // Lazily create the matcher
42783f875dcSMike Crowe     const auto StringDecl = type(hasUnqualifiedDesugaredType(recordType(
42883f875dcSMike Crowe         hasDeclaration(cxxRecordDecl(hasName("::std::basic_string"))))));
42983f875dcSMike Crowe     const auto StringExpr = expr(
43083f875dcSMike Crowe         anyOf(hasType(StringDecl), hasType(qualType(pointsTo(StringDecl)))));
43183f875dcSMike Crowe 
43283f875dcSMike Crowe     StringCStrCallExprMatcher =
43383f875dcSMike Crowe         cxxMemberCallExpr(
43483f875dcSMike Crowe             on(StringExpr.bind("arg")), callee(memberExpr().bind("member")),
43583f875dcSMike Crowe             callee(cxxMethodDecl(hasAnyName("c_str", "data"),
43683f875dcSMike Crowe                                  returns(pointerType(pointee(isRealChar()))))))
43783f875dcSMike Crowe             .bind("call");
43883f875dcSMike Crowe   }
43983f875dcSMike Crowe 
44083f875dcSMike Crowe   auto CStrMatches = match(*StringCStrCallExprMatcher, *Arg, *Context);
44183f875dcSMike Crowe   if (CStrMatches.size() == 1)
44283f875dcSMike Crowe     ArgCStrRemovals.push_back(CStrMatches.front());
44383f875dcSMike Crowe   else if (Arg->getType()->isPointerType()) {
44483f875dcSMike Crowe     const QualType Pointee = Arg->getType()->getPointeeType();
44583f875dcSMike Crowe     // printf is happy to print signed char and unsigned char strings, but
44683f875dcSMike Crowe     // std::format only likes char strings.
44783f875dcSMike Crowe     if (Pointee->isCharType() && !isRealCharType(Pointee))
448c6207f6eSMike Crowe       ArgFixes.emplace_back(ArgIndex, "reinterpret_cast<const char *>(");
44983f875dcSMike Crowe   }
45083f875dcSMike Crowe }
45183f875dcSMike Crowe 
45283f875dcSMike Crowe bool FormatStringConverter::emitIntegerArgument(
45383f875dcSMike Crowe     ConversionSpecifier::Kind ArgKind, const Expr *Arg, unsigned ArgIndex,
45483f875dcSMike Crowe     std::string &FormatSpec) {
45583f875dcSMike Crowe   const clang::QualType &ArgType = Arg->getType();
45683f875dcSMike Crowe   if (ArgType->isBooleanType()) {
45783f875dcSMike Crowe     // std::format will print bool as either "true" or "false" by default,
45883f875dcSMike Crowe     // but printf prints them as "0" or "1". Be compatible with printf by
45983f875dcSMike Crowe     // requesting decimal output.
46083f875dcSMike Crowe     FormatSpec.push_back('d');
46183f875dcSMike Crowe   } else if (ArgType->isEnumeralType()) {
46283f875dcSMike Crowe     // std::format will try to find a specialization to print the enum
46383f875dcSMike Crowe     // (and probably fail), whereas printf would have just expected it to
46483f875dcSMike Crowe     // be passed as its underlying type. However, printf will have forced
46583f875dcSMike Crowe     // the signedness based on the format string, so we need to do the
46683f875dcSMike Crowe     // same.
46783f875dcSMike Crowe     if (const auto *ET = ArgType->getAs<EnumType>()) {
46883f875dcSMike Crowe       if (const std::optional<std::string> MaybeCastType =
46983f875dcSMike Crowe               castTypeForArgument(ArgKind, ET->getDecl()->getIntegerType()))
47083f875dcSMike Crowe         ArgFixes.emplace_back(
471c6207f6eSMike Crowe             ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
47283f875dcSMike Crowe       else
47383f875dcSMike Crowe         return conversionNotPossible(
47483f875dcSMike Crowe             (Twine("argument ") + Twine(ArgIndex) + " has unexpected enum type")
47583f875dcSMike Crowe                 .str());
47683f875dcSMike Crowe     }
47783f875dcSMike Crowe   } else if (CastMismatchedIntegerTypes &&
47883f875dcSMike Crowe              !isMatchingSignedness(ArgKind, ArgType)) {
47983f875dcSMike Crowe     // printf will happily print an unsigned type as signed if told to.
48083f875dcSMike Crowe     // Even -Wformat doesn't warn for this. std::format will format as
48183f875dcSMike Crowe     // unsigned unless we cast it.
48283f875dcSMike Crowe     if (const std::optional<std::string> MaybeCastType =
48383f875dcSMike Crowe             castTypeForArgument(ArgKind, ArgType))
48483f875dcSMike Crowe       ArgFixes.emplace_back(
485c6207f6eSMike Crowe           ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
48683f875dcSMike Crowe     else
48783f875dcSMike Crowe       return conversionNotPossible(
48883f875dcSMike Crowe           (Twine("argument ") + Twine(ArgIndex) + " cannot be cast to " +
48983f875dcSMike Crowe            Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned"
49083f875dcSMike Crowe                                                             : "signed") +
49183f875dcSMike Crowe            " integer type to match format"
49283f875dcSMike Crowe            " specifier and StrictMode is enabled")
49383f875dcSMike Crowe               .str());
49483f875dcSMike Crowe   } else if (isRealCharType(ArgType) || !ArgType->isIntegerType()) {
49583f875dcSMike Crowe     // Only specify integer if the argument is of a different type
49683f875dcSMike Crowe     FormatSpec.push_back('d');
49783f875dcSMike Crowe   }
49883f875dcSMike Crowe   return true;
49983f875dcSMike Crowe }
50083f875dcSMike Crowe 
50183f875dcSMike Crowe /// Append the corresponding standard format string type fragment to FormatSpec,
50283f875dcSMike Crowe /// and store any argument fixes for later application.
50383f875dcSMike Crowe /// @returns true on success, false on failure
50483f875dcSMike Crowe bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg,
50583f875dcSMike Crowe                                      std::string &FormatSpec) {
50683f875dcSMike Crowe   ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();
50783f875dcSMike Crowe   switch (ArgKind) {
50883f875dcSMike Crowe   case ConversionSpecifier::Kind::sArg:
509c6207f6eSMike Crowe     emitStringArgument(FS.getArgIndex() + ArgsOffset, Arg);
51083f875dcSMike Crowe     break;
51183f875dcSMike Crowe   case ConversionSpecifier::Kind::cArg:
51283f875dcSMike Crowe     // The type must be "c" to get a character unless the type is exactly
51383f875dcSMike Crowe     // char (whether that be signed or unsigned for the target.)
51483f875dcSMike Crowe     if (!isRealCharType(Arg->getType()))
51583f875dcSMike Crowe       FormatSpec.push_back('c');
51683f875dcSMike Crowe     break;
51783f875dcSMike Crowe   case ConversionSpecifier::Kind::dArg:
51883f875dcSMike Crowe   case ConversionSpecifier::Kind::iArg:
51983f875dcSMike Crowe   case ConversionSpecifier::Kind::uArg:
52083f875dcSMike Crowe     if (!emitIntegerArgument(ArgKind, Arg, FS.getArgIndex() + ArgsOffset,
52183f875dcSMike Crowe                              FormatSpec))
52283f875dcSMike Crowe       return false;
52383f875dcSMike Crowe     break;
52483f875dcSMike Crowe   case ConversionSpecifier::Kind::pArg: {
52583f875dcSMike Crowe     const clang::QualType &ArgType = Arg->getType();
52683f875dcSMike Crowe     // std::format knows how to format void pointers and nullptrs
52783f875dcSMike Crowe     if (!ArgType->isNullPtrType() && !ArgType->isVoidPointerType())
528c6207f6eSMike Crowe       ArgFixes.emplace_back(FS.getArgIndex() + ArgsOffset,
529c6207f6eSMike Crowe                             "static_cast<const void *>(");
53083f875dcSMike Crowe     break;
53183f875dcSMike Crowe   }
53283f875dcSMike Crowe   case ConversionSpecifier::Kind::xArg:
53383f875dcSMike Crowe     FormatSpec.push_back('x');
53483f875dcSMike Crowe     break;
53583f875dcSMike Crowe   case ConversionSpecifier::Kind::XArg:
53683f875dcSMike Crowe     FormatSpec.push_back('X');
53783f875dcSMike Crowe     break;
53883f875dcSMike Crowe   case ConversionSpecifier::Kind::oArg:
53983f875dcSMike Crowe     FormatSpec.push_back('o');
54083f875dcSMike Crowe     break;
54183f875dcSMike Crowe   case ConversionSpecifier::Kind::aArg:
54283f875dcSMike Crowe     FormatSpec.push_back('a');
54383f875dcSMike Crowe     break;
54483f875dcSMike Crowe   case ConversionSpecifier::Kind::AArg:
54583f875dcSMike Crowe     FormatSpec.push_back('A');
54683f875dcSMike Crowe     break;
54783f875dcSMike Crowe   case ConversionSpecifier::Kind::eArg:
54883f875dcSMike Crowe     FormatSpec.push_back('e');
54983f875dcSMike Crowe     break;
55083f875dcSMike Crowe   case ConversionSpecifier::Kind::EArg:
55183f875dcSMike Crowe     FormatSpec.push_back('E');
55283f875dcSMike Crowe     break;
55383f875dcSMike Crowe   case ConversionSpecifier::Kind::fArg:
55483f875dcSMike Crowe     FormatSpec.push_back('f');
55583f875dcSMike Crowe     break;
55683f875dcSMike Crowe   case ConversionSpecifier::Kind::FArg:
55783f875dcSMike Crowe     FormatSpec.push_back('F');
55883f875dcSMike Crowe     break;
55983f875dcSMike Crowe   case ConversionSpecifier::Kind::gArg:
56083f875dcSMike Crowe     FormatSpec.push_back('g');
56183f875dcSMike Crowe     break;
56283f875dcSMike Crowe   case ConversionSpecifier::Kind::GArg:
56383f875dcSMike Crowe     FormatSpec.push_back('G');
56483f875dcSMike Crowe     break;
56583f875dcSMike Crowe   default:
56683f875dcSMike Crowe     // Something we don't understand
56783f875dcSMike Crowe     return conversionNotPossible((Twine("argument ") +
56883f875dcSMike Crowe                                   Twine(FS.getArgIndex() + ArgsOffset) +
56983f875dcSMike Crowe                                   " has an unsupported format specifier")
57083f875dcSMike Crowe                                      .str());
57183f875dcSMike Crowe   }
57283f875dcSMike Crowe 
57383f875dcSMike Crowe   return true;
57483f875dcSMike Crowe }
57583f875dcSMike Crowe 
57683f875dcSMike Crowe /// Append the standard format string equivalent of the passed PrintfSpecifier
57783f875dcSMike Crowe /// to StandardFormatString and store any argument fixes for later application.
57883f875dcSMike Crowe /// @returns true on success, false on failure
57983f875dcSMike Crowe bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS,
58083f875dcSMike Crowe                                             const Expr *Arg,
58183f875dcSMike Crowe                                             std::string &StandardFormatString) {
58283f875dcSMike Crowe   // The specifier must have an associated argument
58383f875dcSMike Crowe   assert(FS.consumesDataArgument());
58483f875dcSMike Crowe 
58583f875dcSMike Crowe   StandardFormatString.push_back('{');
58683f875dcSMike Crowe 
58783f875dcSMike Crowe   if (FS.usesPositionalArg()) {
58883f875dcSMike Crowe     // std::format argument identifiers are zero-based, whereas printf ones
58983f875dcSMike Crowe     // are one based.
59083f875dcSMike Crowe     assert(FS.getPositionalArgIndex() > 0U);
59183f875dcSMike Crowe     StandardFormatString.append(llvm::utostr(FS.getPositionalArgIndex() - 1));
59283f875dcSMike Crowe   }
59383f875dcSMike Crowe 
59483f875dcSMike Crowe   // std::format format argument parts to potentially emit:
59583f875dcSMike Crowe   // [[fill]align][sign]["#"]["0"][width]["."precision][type]
59683f875dcSMike Crowe   std::string FormatSpec;
59783f875dcSMike Crowe 
59883f875dcSMike Crowe   // printf doesn't support specifying the fill character - it's always a
59983f875dcSMike Crowe   // space, so we never need to generate one.
60083f875dcSMike Crowe 
60183f875dcSMike Crowe   emitAlignment(FS, FormatSpec);
60283f875dcSMike Crowe   emitSign(FS, FormatSpec);
60383f875dcSMike Crowe   emitAlternativeForm(FS, FormatSpec);
60483f875dcSMike Crowe 
60583f875dcSMike Crowe   if (FS.hasLeadingZeros())
60683f875dcSMike Crowe     FormatSpec.push_back('0');
60783f875dcSMike Crowe 
60883f875dcSMike Crowe   emitFieldWidth(FS, FormatSpec);
60983f875dcSMike Crowe   emitPrecision(FS, FormatSpec);
6102806cf4bSMike Crowe   maybeRotateArguments(FS);
61183f875dcSMike Crowe 
61283f875dcSMike Crowe   if (!emitType(FS, Arg, FormatSpec))
61383f875dcSMike Crowe     return false;
61483f875dcSMike Crowe 
61583f875dcSMike Crowe   if (!FormatSpec.empty()) {
61683f875dcSMike Crowe     StandardFormatString.push_back(':');
61783f875dcSMike Crowe     StandardFormatString.append(FormatSpec);
61883f875dcSMike Crowe   }
61983f875dcSMike Crowe 
62083f875dcSMike Crowe   StandardFormatString.push_back('}');
62183f875dcSMike Crowe   return true;
62283f875dcSMike Crowe }
62383f875dcSMike Crowe 
62483f875dcSMike Crowe /// Called for each format specifier by ParsePrintfString.
62583f875dcSMike Crowe bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS,
62683f875dcSMike Crowe                                                   const char *StartSpecifier,
62783f875dcSMike Crowe                                                   unsigned SpecifierLen,
62883f875dcSMike Crowe                                                   const TargetInfo &Target) {
62983f875dcSMike Crowe 
63083f875dcSMike Crowe   const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data();
63183f875dcSMike Crowe   assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size());
63283f875dcSMike Crowe 
63383f875dcSMike Crowe   // Everything before the specifier needs copying verbatim
63483f875dcSMike Crowe   assert(StartSpecifierPos >= PrintfFormatStringPos);
63583f875dcSMike Crowe 
63683f875dcSMike Crowe   appendFormatText(StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
63783f875dcSMike Crowe                              StartSpecifierPos - PrintfFormatStringPos));
63883f875dcSMike Crowe 
63983f875dcSMike Crowe   const ConversionSpecifier::Kind ArgKind =
64083f875dcSMike Crowe       FS.getConversionSpecifier().getKind();
64183f875dcSMike Crowe 
64283f875dcSMike Crowe   // Skip over specifier
64383f875dcSMike Crowe   PrintfFormatStringPos = StartSpecifierPos + SpecifierLen;
64483f875dcSMike Crowe   assert(PrintfFormatStringPos <= PrintfFormatString.size());
64583f875dcSMike Crowe 
64683f875dcSMike Crowe   FormatStringNeededRewriting = true;
64783f875dcSMike Crowe 
64883f875dcSMike Crowe   if (ArgKind == ConversionSpecifier::Kind::nArg) {
64983f875dcSMike Crowe     // std::print doesn't do the equivalent of %n
65083f875dcSMike Crowe     return conversionNotPossible("'%n' is not supported in format string");
65183f875dcSMike Crowe   }
65283f875dcSMike Crowe 
65383f875dcSMike Crowe   if (ArgKind == ConversionSpecifier::Kind::PrintErrno) {
65483f875dcSMike Crowe     // std::print doesn't support %m. In theory we could insert a
65583f875dcSMike Crowe     // strerror(errno) parameter (assuming that libc has a thread-safe
65683f875dcSMike Crowe     // implementation, which glibc does), but that would require keeping track
65783f875dcSMike Crowe     // of the input and output parameter indices for position arguments too.
65883f875dcSMike Crowe     return conversionNotPossible("'%m' is not supported in format string");
65983f875dcSMike Crowe   }
66083f875dcSMike Crowe 
66183f875dcSMike Crowe   if (ArgKind == ConversionSpecifier::PercentArg) {
66283f875dcSMike Crowe     StandardFormatString.push_back('%');
66383f875dcSMike Crowe     return true;
66483f875dcSMike Crowe   }
66583f875dcSMike Crowe 
66683f875dcSMike Crowe   const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset;
66783f875dcSMike Crowe   if (ArgIndex >= NumArgs) {
66883f875dcSMike Crowe     // Argument index out of range. Give up.
66983f875dcSMike Crowe     return conversionNotPossible(
67083f875dcSMike Crowe         (Twine("argument index ") + Twine(ArgIndex) + " is out of range")
67183f875dcSMike Crowe             .str());
67283f875dcSMike Crowe   }
67383f875dcSMike Crowe 
67483f875dcSMike Crowe   return convertArgument(FS, Args[ArgIndex]->IgnoreImplicitAsWritten(),
67583f875dcSMike Crowe                          StandardFormatString);
67683f875dcSMike Crowe }
67783f875dcSMike Crowe 
67883f875dcSMike Crowe /// Called at the very end just before applying fixes to capture the last part
67983f875dcSMike Crowe /// of the format string.
68083f875dcSMike Crowe void FormatStringConverter::finalizeFormatText() {
68183f875dcSMike Crowe   appendFormatText(
68283f875dcSMike Crowe       StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
68383f875dcSMike Crowe                 PrintfFormatString.size() - PrintfFormatStringPos));
68483f875dcSMike Crowe   PrintfFormatStringPos = PrintfFormatString.size();
68583f875dcSMike Crowe 
6862ce765ebSMike Crowe   // It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n")
6872ce765ebSMike Crowe   // than to std::println("Hello\r");
688af79372dSMike Crowe   // Use StringRef until C++20 std::string::ends_with() is available.
689af79372dSMike Crowe   const auto StandardFormatStringRef = StringRef(StandardFormatString);
690af79372dSMike Crowe   if (Config.AllowTrailingNewlineRemoval &&
691af79372dSMike Crowe       StandardFormatStringRef.ends_with("\\n") &&
692af79372dSMike Crowe       !StandardFormatStringRef.ends_with("\\\\n") &&
693af79372dSMike Crowe       !StandardFormatStringRef.ends_with("\\r\\n")) {
69483f875dcSMike Crowe     UsePrintNewlineFunction = true;
69583f875dcSMike Crowe     FormatStringNeededRewriting = true;
69683f875dcSMike Crowe     StandardFormatString.erase(StandardFormatString.end() - 2,
69783f875dcSMike Crowe                                StandardFormatString.end());
69883f875dcSMike Crowe   }
69983f875dcSMike Crowe 
70083f875dcSMike Crowe   StandardFormatString.push_back('\"');
70183f875dcSMike Crowe }
70283f875dcSMike Crowe 
70383f875dcSMike Crowe /// Append literal parts of the format text, reinstating escapes as required.
70483f875dcSMike Crowe void FormatStringConverter::appendFormatText(const StringRef Text) {
70583f875dcSMike Crowe   for (const char Ch : Text) {
70683f875dcSMike Crowe     if (Ch == '\a')
70783f875dcSMike Crowe       StandardFormatString += "\\a";
70883f875dcSMike Crowe     else if (Ch == '\b')
70983f875dcSMike Crowe       StandardFormatString += "\\b";
71083f875dcSMike Crowe     else if (Ch == '\f')
71183f875dcSMike Crowe       StandardFormatString += "\\f";
71283f875dcSMike Crowe     else if (Ch == '\n')
71383f875dcSMike Crowe       StandardFormatString += "\\n";
71483f875dcSMike Crowe     else if (Ch == '\r')
71583f875dcSMike Crowe       StandardFormatString += "\\r";
71683f875dcSMike Crowe     else if (Ch == '\t')
71783f875dcSMike Crowe       StandardFormatString += "\\t";
71883f875dcSMike Crowe     else if (Ch == '\v')
71983f875dcSMike Crowe       StandardFormatString += "\\v";
72083f875dcSMike Crowe     else if (Ch == '\"')
72183f875dcSMike Crowe       StandardFormatString += "\\\"";
72283f875dcSMike Crowe     else if (Ch == '\\')
72383f875dcSMike Crowe       StandardFormatString += "\\\\";
72483f875dcSMike Crowe     else if (Ch == '{') {
72583f875dcSMike Crowe       StandardFormatString += "{{";
72683f875dcSMike Crowe       FormatStringNeededRewriting = true;
72783f875dcSMike Crowe     } else if (Ch == '}') {
72883f875dcSMike Crowe       StandardFormatString += "}}";
72983f875dcSMike Crowe       FormatStringNeededRewriting = true;
73083f875dcSMike Crowe     } else if (Ch < 32) {
73183f875dcSMike Crowe       StandardFormatString += "\\x";
73283f875dcSMike Crowe       StandardFormatString += llvm::hexdigit(Ch >> 4, true);
73383f875dcSMike Crowe       StandardFormatString += llvm::hexdigit(Ch & 0xf, true);
73483f875dcSMike Crowe     } else
73583f875dcSMike Crowe       StandardFormatString += Ch;
73683f875dcSMike Crowe   }
73783f875dcSMike Crowe }
73883f875dcSMike Crowe 
739c6207f6eSMike Crowe static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch,
740c6207f6eSMike Crowe                                           ASTContext &Context) {
741c6207f6eSMike Crowe   const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>("arg");
742c6207f6eSMike Crowe   const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>("member");
743c6207f6eSMike Crowe   const bool Arrow = Member->isArrow();
744c6207f6eSMike Crowe   return Arrow ? utils::fixit::formatDereference(*Arg, Context)
745c6207f6eSMike Crowe                : tooling::fixit::getText(*Arg, Context).str();
746c6207f6eSMike Crowe }
747c6207f6eSMike Crowe 
74883f875dcSMike Crowe /// Called by the check when it is ready to apply the fixes.
74983f875dcSMike Crowe void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag,
75083f875dcSMike Crowe                                        SourceManager &SM) {
75183f875dcSMike Crowe   if (FormatStringNeededRewriting) {
75283f875dcSMike Crowe     Diag << FixItHint::CreateReplacement(
75383f875dcSMike Crowe         CharSourceRange::getTokenRange(FormatExpr->getBeginLoc(),
75483f875dcSMike Crowe                                        FormatExpr->getEndLoc()),
75583f875dcSMike Crowe         StandardFormatString);
75683f875dcSMike Crowe   }
75783f875dcSMike Crowe 
7582806cf4bSMike Crowe   // ArgCount is one less than the number of arguments to be rotated.
7592806cf4bSMike Crowe   for (auto [ValueArgIndex, ArgCount] : ArgRotates) {
7602806cf4bSMike Crowe     assert(ValueArgIndex < NumArgs);
7612806cf4bSMike Crowe     assert(ValueArgIndex > ArgCount);
7622806cf4bSMike Crowe 
763c6207f6eSMike Crowe     // First move the value argument to the right place. But if there's a
764c6207f6eSMike Crowe     // pending c_str() removal then we must do that at the same time.
765c6207f6eSMike Crowe     if (const auto CStrRemovalMatch =
766c6207f6eSMike Crowe             std::find_if(ArgCStrRemovals.cbegin(), ArgCStrRemovals.cend(),
767c6207f6eSMike Crowe                          [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()](
768c6207f6eSMike Crowe                              const BoundNodes &Match) {
769c6207f6eSMike Crowe                            // This c_str() removal corresponds to the argument
770c6207f6eSMike Crowe                            // being moved if they start at the same location.
771c6207f6eSMike Crowe                            const Expr *CStrArg = Match.getNodeAs<Expr>("arg");
772c6207f6eSMike Crowe                            return ArgStartPos == CStrArg->getBeginLoc();
773c6207f6eSMike Crowe                          });
774c6207f6eSMike Crowe         CStrRemovalMatch != ArgCStrRemovals.end()) {
775c6207f6eSMike Crowe       const std::string ArgText =
776c6207f6eSMike Crowe           withoutCStrReplacement(*CStrRemovalMatch, *Context);
777c6207f6eSMike Crowe       assert(!ArgText.empty());
778c6207f6eSMike Crowe 
779c6207f6eSMike Crowe       Diag << FixItHint::CreateReplacement(
780c6207f6eSMike Crowe           Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText);
781c6207f6eSMike Crowe 
782c6207f6eSMike Crowe       // That c_str() removal is now dealt with, so we don't need to do it again
783c6207f6eSMike Crowe       ArgCStrRemovals.erase(CStrRemovalMatch);
784c6207f6eSMike Crowe     } else
7852806cf4bSMike Crowe       Diag << tooling::fixit::createReplacement(*Args[ValueArgIndex - ArgCount],
7862806cf4bSMike Crowe                                                 *Args[ValueArgIndex], *Context);
7872806cf4bSMike Crowe 
7882806cf4bSMike Crowe     // Now shift down the field width and precision (if either are present) to
7892806cf4bSMike Crowe     // accommodate it.
7902806cf4bSMike Crowe     for (size_t Offset = 0; Offset < ArgCount; ++Offset)
7912806cf4bSMike Crowe       Diag << tooling::fixit::createReplacement(
7922806cf4bSMike Crowe           *Args[ValueArgIndex - Offset], *Args[ValueArgIndex - Offset - 1],
7932806cf4bSMike Crowe           *Context);
794c6207f6eSMike Crowe 
795c6207f6eSMike Crowe     // Now we need to modify the ArgFix index too so that we fix the right
796c6207f6eSMike Crowe     // argument. We don't need to care about the width and precision indices
797c6207f6eSMike Crowe     // since they never need fixing.
798c6207f6eSMike Crowe     for (auto &ArgFix : ArgFixes) {
799c6207f6eSMike Crowe       if (ArgFix.ArgIndex == ValueArgIndex)
800c6207f6eSMike Crowe         ArgFix.ArgIndex = ValueArgIndex - ArgCount;
801c6207f6eSMike Crowe     }
802c6207f6eSMike Crowe   }
803c6207f6eSMike Crowe 
804c6207f6eSMike Crowe   for (const auto &[ArgIndex, Replacement] : ArgFixes) {
805c6207f6eSMike Crowe     SourceLocation AfterOtherSide =
806c6207f6eSMike Crowe         Lexer::findNextToken(Args[ArgIndex]->getEndLoc(), SM, LangOpts)
807c6207f6eSMike Crowe             ->getLocation();
808c6207f6eSMike Crowe 
809c6207f6eSMike Crowe     Diag << FixItHint::CreateInsertion(Args[ArgIndex]->getBeginLoc(),
810c6207f6eSMike Crowe                                        Replacement, true)
811c6207f6eSMike Crowe          << FixItHint::CreateInsertion(AfterOtherSide, ")", true);
812c6207f6eSMike Crowe   }
813c6207f6eSMike Crowe 
814c6207f6eSMike Crowe   for (const auto &Match : ArgCStrRemovals) {
815c6207f6eSMike Crowe     const auto *Call = Match.getNodeAs<CallExpr>("call");
816c6207f6eSMike Crowe     const std::string ArgText = withoutCStrReplacement(Match, *Context);
817c6207f6eSMike Crowe     if (!ArgText.empty())
818c6207f6eSMike Crowe       Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText);
8192806cf4bSMike Crowe   }
82083f875dcSMike Crowe }
82183f875dcSMike Crowe } // namespace clang::tidy::utils
822