183f875dcSMike Crowe //===--- FormatStringConverter.cpp - clang-tidy----------------------------===// 283f875dcSMike Crowe // 383f875dcSMike Crowe // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 483f875dcSMike Crowe // See https://llvm.org/LICENSE.txt for license information. 583f875dcSMike Crowe // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 683f875dcSMike Crowe // 783f875dcSMike Crowe //===----------------------------------------------------------------------===// 883f875dcSMike Crowe /// 983f875dcSMike Crowe /// \file 1083f875dcSMike Crowe /// Implementation of the FormatStringConverter class which is used to convert 1183f875dcSMike Crowe /// printf format strings to C++ std::formatter format strings. 1283f875dcSMike Crowe /// 1383f875dcSMike Crowe //===----------------------------------------------------------------------===// 1483f875dcSMike Crowe 1583f875dcSMike Crowe #include "FormatStringConverter.h" 1683f875dcSMike Crowe #include "../utils/FixItHintUtils.h" 1783f875dcSMike Crowe #include "clang/AST/Expr.h" 1883f875dcSMike Crowe #include "clang/ASTMatchers/ASTMatchFinder.h" 1983f875dcSMike Crowe #include "clang/Basic/LangOptions.h" 2083f875dcSMike Crowe #include "clang/Lex/Lexer.h" 21*a199fb12SMike Crowe #include "clang/Lex/Preprocessor.h" 2283f875dcSMike Crowe #include "clang/Tooling/FixIt.h" 2383f875dcSMike Crowe #include "llvm/ADT/StringExtras.h" 2483f875dcSMike Crowe #include "llvm/Support/Debug.h" 2583f875dcSMike Crowe 2683f875dcSMike Crowe using namespace clang::ast_matchers; 2783f875dcSMike Crowe using namespace clang::analyze_printf; 2883f875dcSMike Crowe 2983f875dcSMike Crowe namespace clang::tidy::utils { 3083f875dcSMike Crowe using clang::analyze_format_string::ConversionSpecifier; 3183f875dcSMike Crowe 3283f875dcSMike Crowe /// Is the passed type the actual "char" type, whether that be signed or 3383f875dcSMike Crowe /// unsigned, rather than explicit signed char or unsigned char types. 3483f875dcSMike Crowe static bool isRealCharType(const clang::QualType &Ty) { 3583f875dcSMike Crowe using namespace clang; 3683f875dcSMike Crowe const Type *DesugaredType = Ty->getUnqualifiedDesugaredType(); 3783f875dcSMike Crowe if (const auto *BT = llvm::dyn_cast<BuiltinType>(DesugaredType)) 3883f875dcSMike Crowe return (BT->getKind() == BuiltinType::Char_U || 3983f875dcSMike Crowe BT->getKind() == BuiltinType::Char_S); 4083f875dcSMike Crowe return false; 4183f875dcSMike Crowe } 4283f875dcSMike Crowe 4383f875dcSMike Crowe /// If possible, return the text name of the signed type that corresponds to the 4483f875dcSMike Crowe /// passed integer type. If the passed type is already signed then its name is 4583f875dcSMike Crowe /// just returned. Only supports BuiltinTypes. 4683f875dcSMike Crowe static std::optional<std::string> 4783f875dcSMike Crowe getCorrespondingSignedTypeName(const clang::QualType &QT) { 4883f875dcSMike Crowe using namespace clang; 4983f875dcSMike Crowe const auto UQT = QT.getUnqualifiedType(); 5083f875dcSMike Crowe if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) { 5183f875dcSMike Crowe switch (BT->getKind()) { 5283f875dcSMike Crowe case BuiltinType::UChar: 5383f875dcSMike Crowe case BuiltinType::Char_U: 5483f875dcSMike Crowe case BuiltinType::SChar: 5583f875dcSMike Crowe case BuiltinType::Char_S: 5683f875dcSMike Crowe return "signed char"; 5783f875dcSMike Crowe case BuiltinType::UShort: 5883f875dcSMike Crowe case BuiltinType::Short: 5983f875dcSMike Crowe return "short"; 6083f875dcSMike Crowe case BuiltinType::UInt: 6183f875dcSMike Crowe case BuiltinType::Int: 6283f875dcSMike Crowe return "int"; 6383f875dcSMike Crowe case BuiltinType::ULong: 6483f875dcSMike Crowe case BuiltinType::Long: 6583f875dcSMike Crowe return "long"; 6683f875dcSMike Crowe case BuiltinType::ULongLong: 6783f875dcSMike Crowe case BuiltinType::LongLong: 6883f875dcSMike Crowe return "long long"; 6983f875dcSMike Crowe default: 7083f875dcSMike Crowe llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '" 7183f875dcSMike Crowe << QT.getAsString() << "'\n"; 7283f875dcSMike Crowe return std::nullopt; 7383f875dcSMike Crowe } 7483f875dcSMike Crowe } 7583f875dcSMike Crowe 7683f875dcSMike Crowe // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only 7783f875dcSMike Crowe // if the argument type does. 7883f875dcSMike Crowe const std::string TypeName = UQT.getAsString(); 7983f875dcSMike Crowe StringRef SimplifiedTypeName{TypeName}; 8083f875dcSMike Crowe const bool InStd = SimplifiedTypeName.consume_front("std::"); 8183f875dcSMike Crowe const StringRef Prefix = InStd ? "std::" : ""; 8283f875dcSMike Crowe 8383f875dcSMike Crowe if (SimplifiedTypeName.starts_with("uint") && 8483f875dcSMike Crowe SimplifiedTypeName.ends_with("_t")) 8583f875dcSMike Crowe return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str(); 8683f875dcSMike Crowe 8783f875dcSMike Crowe if (SimplifiedTypeName == "size_t") 8883f875dcSMike Crowe return (Twine(Prefix) + "ssize_t").str(); 8983f875dcSMike Crowe 9083f875dcSMike Crowe llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '" 9183f875dcSMike Crowe << UQT.getAsString() << "'\n"; 9283f875dcSMike Crowe return std::nullopt; 9383f875dcSMike Crowe } 9483f875dcSMike Crowe 9583f875dcSMike Crowe /// If possible, return the text name of the unsigned type that corresponds to 9683f875dcSMike Crowe /// the passed integer type. If the passed type is already unsigned then its 9783f875dcSMike Crowe /// name is just returned. Only supports BuiltinTypes. 9883f875dcSMike Crowe static std::optional<std::string> 9983f875dcSMike Crowe getCorrespondingUnsignedTypeName(const clang::QualType &QT) { 10083f875dcSMike Crowe using namespace clang; 10183f875dcSMike Crowe const auto UQT = QT.getUnqualifiedType(); 10283f875dcSMike Crowe if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) { 10383f875dcSMike Crowe switch (BT->getKind()) { 10483f875dcSMike Crowe case BuiltinType::SChar: 10583f875dcSMike Crowe case BuiltinType::Char_S: 10683f875dcSMike Crowe case BuiltinType::UChar: 10783f875dcSMike Crowe case BuiltinType::Char_U: 10883f875dcSMike Crowe return "unsigned char"; 10983f875dcSMike Crowe case BuiltinType::Short: 11083f875dcSMike Crowe case BuiltinType::UShort: 11183f875dcSMike Crowe return "unsigned short"; 11283f875dcSMike Crowe case BuiltinType::Int: 11383f875dcSMike Crowe case BuiltinType::UInt: 11483f875dcSMike Crowe return "unsigned int"; 11583f875dcSMike Crowe case BuiltinType::Long: 11683f875dcSMike Crowe case BuiltinType::ULong: 11783f875dcSMike Crowe return "unsigned long"; 11883f875dcSMike Crowe case BuiltinType::LongLong: 11983f875dcSMike Crowe case BuiltinType::ULongLong: 12083f875dcSMike Crowe return "unsigned long long"; 12183f875dcSMike Crowe default: 12283f875dcSMike Crowe llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '" 12383f875dcSMike Crowe << UQT.getAsString() << "'\n"; 12483f875dcSMike Crowe return std::nullopt; 12583f875dcSMike Crowe } 12683f875dcSMike Crowe } 12783f875dcSMike Crowe 12883f875dcSMike Crowe // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only 12983f875dcSMike Crowe // if the argument type does. 13083f875dcSMike Crowe const std::string TypeName = UQT.getAsString(); 13183f875dcSMike Crowe StringRef SimplifiedTypeName{TypeName}; 13283f875dcSMike Crowe const bool InStd = SimplifiedTypeName.consume_front("std::"); 13383f875dcSMike Crowe const StringRef Prefix = InStd ? "std::" : ""; 13483f875dcSMike Crowe 13583f875dcSMike Crowe if (SimplifiedTypeName.starts_with("int") && 13683f875dcSMike Crowe SimplifiedTypeName.ends_with("_t")) 13783f875dcSMike Crowe return (Twine(Prefix) + "u" + SimplifiedTypeName).str(); 13883f875dcSMike Crowe 13983f875dcSMike Crowe if (SimplifiedTypeName == "ssize_t") 14083f875dcSMike Crowe return (Twine(Prefix) + "size_t").str(); 14183f875dcSMike Crowe if (SimplifiedTypeName == "ptrdiff_t") 14283f875dcSMike Crowe return (Twine(Prefix) + "size_t").str(); 14383f875dcSMike Crowe 14483f875dcSMike Crowe llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '" 14583f875dcSMike Crowe << UQT.getAsString() << "'\n"; 14683f875dcSMike Crowe return std::nullopt; 14783f875dcSMike Crowe } 14883f875dcSMike Crowe 14983f875dcSMike Crowe static std::optional<std::string> 15083f875dcSMike Crowe castTypeForArgument(ConversionSpecifier::Kind ArgKind, 15183f875dcSMike Crowe const clang::QualType &QT) { 15283f875dcSMike Crowe if (ArgKind == ConversionSpecifier::Kind::uArg) 15383f875dcSMike Crowe return getCorrespondingUnsignedTypeName(QT); 15483f875dcSMike Crowe return getCorrespondingSignedTypeName(QT); 15583f875dcSMike Crowe } 15683f875dcSMike Crowe 15783f875dcSMike Crowe static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind, 15883f875dcSMike Crowe const clang::QualType &ArgType) { 15983f875dcSMike Crowe if (const auto *BT = llvm::dyn_cast<BuiltinType>(ArgType)) { 16083f875dcSMike Crowe // Unadorned char never matches any expected signedness since it 16183f875dcSMike Crowe // could be signed or unsigned. 16283f875dcSMike Crowe const auto ArgTypeKind = BT->getKind(); 16383f875dcSMike Crowe if (ArgTypeKind == BuiltinType::Char_U || 16483f875dcSMike Crowe ArgTypeKind == BuiltinType::Char_S) 16583f875dcSMike Crowe return false; 16683f875dcSMike Crowe } 16783f875dcSMike Crowe 16883f875dcSMike Crowe if (ArgKind == ConversionSpecifier::Kind::uArg) 16983f875dcSMike Crowe return ArgType->isUnsignedIntegerType(); 17083f875dcSMike Crowe return ArgType->isSignedIntegerType(); 17183f875dcSMike Crowe } 17283f875dcSMike Crowe 17383f875dcSMike Crowe namespace { 17483f875dcSMike Crowe AST_MATCHER(clang::QualType, isRealChar) { 17583f875dcSMike Crowe return clang::tidy::utils::isRealCharType(Node); 17683f875dcSMike Crowe } 17783f875dcSMike Crowe } // namespace 17883f875dcSMike Crowe 17983f875dcSMike Crowe static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) { 18083f875dcSMike Crowe /// For printf-style functions, the signedness of the type printed is 18183f875dcSMike Crowe /// indicated by the corresponding type in the format string. 18283f875dcSMike Crowe /// std::print will determine the signedness from the type of the 18383f875dcSMike Crowe /// argument. This means that it is necessary to generate a cast in 18483f875dcSMike Crowe /// StrictMode to ensure that the exact behaviour is maintained. 18583f875dcSMike Crowe /// However, for templated functions like absl::PrintF and 18683f875dcSMike Crowe /// fmt::printf, the signedness of the type printed is also taken from 18783f875dcSMike Crowe /// the actual argument like std::print, so such casts are never 18883f875dcSMike Crowe /// necessary. printf-style functions are variadic, whereas templated 18983f875dcSMike Crowe /// ones aren't, so we can use that to distinguish between the two 19083f875dcSMike Crowe /// cases. 19183f875dcSMike Crowe if (StrictMode) { 19283f875dcSMike Crowe const FunctionDecl *FuncDecl = Call->getDirectCallee(); 19383f875dcSMike Crowe assert(FuncDecl); 19483f875dcSMike Crowe return FuncDecl->isVariadic(); 19583f875dcSMike Crowe } 19683f875dcSMike Crowe return false; 19783f875dcSMike Crowe } 19883f875dcSMike Crowe 199*a199fb12SMike Crowe FormatStringConverter::FormatStringConverter( 200*a199fb12SMike Crowe ASTContext *ContextIn, const CallExpr *Call, unsigned FormatArgOffset, 201*a199fb12SMike Crowe const Configuration ConfigIn, const LangOptions &LO, SourceManager &SM, 202*a199fb12SMike Crowe Preprocessor &PP) 203af79372dSMike Crowe : Context(ContextIn), Config(ConfigIn), 204af79372dSMike Crowe CastMismatchedIntegerTypes( 205af79372dSMike Crowe castMismatchedIntegerTypes(Call, ConfigIn.StrictMode)), 20683f875dcSMike Crowe Args(Call->getArgs()), NumArgs(Call->getNumArgs()), 20783f875dcSMike Crowe ArgsOffset(FormatArgOffset + 1), LangOpts(LO) { 20883f875dcSMike Crowe assert(ArgsOffset <= NumArgs); 20983f875dcSMike Crowe FormatExpr = llvm::dyn_cast<StringLiteral>( 21083f875dcSMike Crowe Args[FormatArgOffset]->IgnoreImplicitAsWritten()); 211*a199fb12SMike Crowe 2120e62d5cfSMike Crowe if (!FormatExpr || !FormatExpr->isOrdinary()) { 2130e62d5cfSMike Crowe // Function must have a narrow string literal as its first argument. 2140e62d5cfSMike Crowe conversionNotPossible("first argument is not a narrow string literal"); 2150e62d5cfSMike Crowe return; 2160e62d5cfSMike Crowe } 217*a199fb12SMike Crowe 218*a199fb12SMike Crowe if (const std::optional<StringRef> MaybeMacroName = 219*a199fb12SMike Crowe formatStringContainsUnreplaceableMacro(Call, FormatExpr, SM, PP); 220*a199fb12SMike Crowe MaybeMacroName) { 221*a199fb12SMike Crowe conversionNotPossible( 222*a199fb12SMike Crowe ("format string contains unreplaceable macro '" + *MaybeMacroName + "'") 223*a199fb12SMike Crowe .str()); 224*a199fb12SMike Crowe return; 225*a199fb12SMike Crowe } 226*a199fb12SMike Crowe 22783f875dcSMike Crowe PrintfFormatString = FormatExpr->getString(); 22883f875dcSMike Crowe 22983f875dcSMike Crowe // Assume that the output will be approximately the same size as the input, 23083f875dcSMike Crowe // but perhaps with a few escapes expanded. 23183f875dcSMike Crowe const size_t EstimatedGrowth = 8; 23283f875dcSMike Crowe StandardFormatString.reserve(PrintfFormatString.size() + EstimatedGrowth); 23383f875dcSMike Crowe StandardFormatString.push_back('\"'); 23483f875dcSMike Crowe 23583f875dcSMike Crowe const bool IsFreeBsdkPrintf = false; 23683f875dcSMike Crowe 23783f875dcSMike Crowe using clang::analyze_format_string::ParsePrintfString; 23883f875dcSMike Crowe ParsePrintfString(*this, PrintfFormatString.data(), 23983f875dcSMike Crowe PrintfFormatString.data() + PrintfFormatString.size(), 24083f875dcSMike Crowe LangOpts, Context->getTargetInfo(), IsFreeBsdkPrintf); 24183f875dcSMike Crowe finalizeFormatText(); 24283f875dcSMike Crowe } 24383f875dcSMike Crowe 244*a199fb12SMike Crowe std::optional<StringRef> 245*a199fb12SMike Crowe FormatStringConverter::formatStringContainsUnreplaceableMacro( 246*a199fb12SMike Crowe const CallExpr *Call, const StringLiteral *FormatExpr, SourceManager &SM, 247*a199fb12SMike Crowe Preprocessor &PP) { 248*a199fb12SMike Crowe // If a macro invocation surrounds the entire call then we don't want that to 249*a199fb12SMike Crowe // inhibit conversion. The whole format string will appear to come from that 250*a199fb12SMike Crowe // macro, as will the function call. 251*a199fb12SMike Crowe std::optional<StringRef> MaybeSurroundingMacroName; 252*a199fb12SMike Crowe if (SourceLocation BeginCallLoc = Call->getBeginLoc(); 253*a199fb12SMike Crowe BeginCallLoc.isMacroID()) 254*a199fb12SMike Crowe MaybeSurroundingMacroName = 255*a199fb12SMike Crowe Lexer::getImmediateMacroName(BeginCallLoc, SM, PP.getLangOpts()); 256*a199fb12SMike Crowe 257*a199fb12SMike Crowe for (auto I = FormatExpr->tokloc_begin(), E = FormatExpr->tokloc_end(); 258*a199fb12SMike Crowe I != E; ++I) { 259*a199fb12SMike Crowe const SourceLocation &TokenLoc = *I; 260*a199fb12SMike Crowe if (TokenLoc.isMacroID()) { 261*a199fb12SMike Crowe const StringRef MacroName = 262*a199fb12SMike Crowe Lexer::getImmediateMacroName(TokenLoc, SM, PP.getLangOpts()); 263*a199fb12SMike Crowe 264*a199fb12SMike Crowe if (MaybeSurroundingMacroName != MacroName) { 265*a199fb12SMike Crowe // glibc uses __PRI64_PREFIX and __PRIPTR_PREFIX to define the prefixes 266*a199fb12SMike Crowe // for types that change size so we must look for multiple prefixes. 267*a199fb12SMike Crowe if (!MacroName.starts_with("PRI") && !MacroName.starts_with("__PRI")) 268*a199fb12SMike Crowe return MacroName; 269*a199fb12SMike Crowe 270*a199fb12SMike Crowe const SourceLocation TokenSpellingLoc = SM.getSpellingLoc(TokenLoc); 271*a199fb12SMike Crowe const OptionalFileEntryRef MaybeFileEntry = 272*a199fb12SMike Crowe SM.getFileEntryRefForID(SM.getFileID(TokenSpellingLoc)); 273*a199fb12SMike Crowe if (!MaybeFileEntry) 274*a199fb12SMike Crowe return MacroName; 275*a199fb12SMike Crowe 276*a199fb12SMike Crowe HeaderSearch &HS = PP.getHeaderSearchInfo(); 277*a199fb12SMike Crowe // Check if the file is a system header 278*a199fb12SMike Crowe if (!isSystem(HS.getFileDirFlavor(*MaybeFileEntry)) || 279*a199fb12SMike Crowe llvm::sys::path::filename(MaybeFileEntry->getName()) != 280*a199fb12SMike Crowe "inttypes.h") 281*a199fb12SMike Crowe return MacroName; 282*a199fb12SMike Crowe } 283*a199fb12SMike Crowe } 284*a199fb12SMike Crowe } 285*a199fb12SMike Crowe return std::nullopt; 286*a199fb12SMike Crowe } 287*a199fb12SMike Crowe 28883f875dcSMike Crowe void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS, 28983f875dcSMike Crowe std::string &FormatSpec) { 29083f875dcSMike Crowe ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind(); 29183f875dcSMike Crowe 29283f875dcSMike Crowe // We only care about alignment if a field width is specified 29383f875dcSMike Crowe if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) { 29483f875dcSMike Crowe if (ArgKind == ConversionSpecifier::sArg) { 29583f875dcSMike Crowe // Strings are left-aligned by default with std::format, so we only 29683f875dcSMike Crowe // need to emit an alignment if this one needs to be right aligned. 29783f875dcSMike Crowe if (!FS.isLeftJustified()) 29883f875dcSMike Crowe FormatSpec.push_back('>'); 29983f875dcSMike Crowe } else { 30083f875dcSMike Crowe // Numbers are right-aligned by default with std::format, so we only 30183f875dcSMike Crowe // need to emit an alignment if this one needs to be left aligned. 30283f875dcSMike Crowe if (FS.isLeftJustified()) 30383f875dcSMike Crowe FormatSpec.push_back('<'); 30483f875dcSMike Crowe } 30583f875dcSMike Crowe } 30683f875dcSMike Crowe } 30783f875dcSMike Crowe 30883f875dcSMike Crowe void FormatStringConverter::emitSign(const PrintfSpecifier &FS, 30983f875dcSMike Crowe std::string &FormatSpec) { 31083f875dcSMike Crowe const ConversionSpecifier Spec = FS.getConversionSpecifier(); 31183f875dcSMike Crowe 31283f875dcSMike Crowe // Ignore on something that isn't numeric. For printf it's would be a 31383f875dcSMike Crowe // compile-time warning but ignored at runtime, but for std::format it 31483f875dcSMike Crowe // ought to be a compile-time error. 31583f875dcSMike Crowe if (Spec.isAnyIntArg() || Spec.isDoubleArg()) { 31683f875dcSMike Crowe // + is preferred to ' ' 31783f875dcSMike Crowe if (FS.hasPlusPrefix()) 31883f875dcSMike Crowe FormatSpec.push_back('+'); 31983f875dcSMike Crowe else if (FS.hasSpacePrefix()) 32083f875dcSMike Crowe FormatSpec.push_back(' '); 32183f875dcSMike Crowe } 32283f875dcSMike Crowe } 32383f875dcSMike Crowe 32483f875dcSMike Crowe void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS, 32583f875dcSMike Crowe std::string &FormatSpec) { 32683f875dcSMike Crowe if (FS.hasAlternativeForm()) { 32783f875dcSMike Crowe switch (FS.getConversionSpecifier().getKind()) { 32883f875dcSMike Crowe case ConversionSpecifier::Kind::aArg: 32983f875dcSMike Crowe case ConversionSpecifier::Kind::AArg: 33083f875dcSMike Crowe case ConversionSpecifier::Kind::eArg: 33183f875dcSMike Crowe case ConversionSpecifier::Kind::EArg: 33283f875dcSMike Crowe case ConversionSpecifier::Kind::fArg: 33383f875dcSMike Crowe case ConversionSpecifier::Kind::FArg: 33483f875dcSMike Crowe case ConversionSpecifier::Kind::gArg: 33583f875dcSMike Crowe case ConversionSpecifier::Kind::GArg: 33683f875dcSMike Crowe case ConversionSpecifier::Kind::xArg: 33783f875dcSMike Crowe case ConversionSpecifier::Kind::XArg: 33883f875dcSMike Crowe case ConversionSpecifier::Kind::oArg: 33983f875dcSMike Crowe FormatSpec.push_back('#'); 34083f875dcSMike Crowe break; 34183f875dcSMike Crowe default: 34283f875dcSMike Crowe // Alternative forms don't exist for other argument kinds 34383f875dcSMike Crowe break; 34483f875dcSMike Crowe } 34583f875dcSMike Crowe } 34683f875dcSMike Crowe } 34783f875dcSMike Crowe 34883f875dcSMike Crowe void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS, 34983f875dcSMike Crowe std::string &FormatSpec) { 35083f875dcSMike Crowe { 35183f875dcSMike Crowe const OptionalAmount FieldWidth = FS.getFieldWidth(); 35283f875dcSMike Crowe switch (FieldWidth.getHowSpecified()) { 35383f875dcSMike Crowe case OptionalAmount::NotSpecified: 35483f875dcSMike Crowe break; 35583f875dcSMike Crowe case OptionalAmount::Constant: 35683f875dcSMike Crowe FormatSpec.append(llvm::utostr(FieldWidth.getConstantAmount())); 35783f875dcSMike Crowe break; 35883f875dcSMike Crowe case OptionalAmount::Arg: 35983f875dcSMike Crowe FormatSpec.push_back('{'); 36083f875dcSMike Crowe if (FieldWidth.usesPositionalArg()) { 36183f875dcSMike Crowe // std::format argument identifiers are zero-based, whereas printf 36283f875dcSMike Crowe // ones are one based. 36383f875dcSMike Crowe assert(FieldWidth.getPositionalArgIndex() > 0U); 36483f875dcSMike Crowe FormatSpec.append(llvm::utostr(FieldWidth.getPositionalArgIndex() - 1)); 36583f875dcSMike Crowe } 36683f875dcSMike Crowe FormatSpec.push_back('}'); 36783f875dcSMike Crowe break; 36883f875dcSMike Crowe case OptionalAmount::Invalid: 36983f875dcSMike Crowe break; 37083f875dcSMike Crowe } 37183f875dcSMike Crowe } 37283f875dcSMike Crowe } 37383f875dcSMike Crowe 37483f875dcSMike Crowe void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS, 37583f875dcSMike Crowe std::string &FormatSpec) { 37683f875dcSMike Crowe const OptionalAmount FieldPrecision = FS.getPrecision(); 37783f875dcSMike Crowe switch (FieldPrecision.getHowSpecified()) { 37883f875dcSMike Crowe case OptionalAmount::NotSpecified: 37983f875dcSMike Crowe break; 38083f875dcSMike Crowe case OptionalAmount::Constant: 38183f875dcSMike Crowe FormatSpec.push_back('.'); 38283f875dcSMike Crowe FormatSpec.append(llvm::utostr(FieldPrecision.getConstantAmount())); 38383f875dcSMike Crowe break; 38483f875dcSMike Crowe case OptionalAmount::Arg: 38583f875dcSMike Crowe FormatSpec.push_back('.'); 38683f875dcSMike Crowe FormatSpec.push_back('{'); 38783f875dcSMike Crowe if (FieldPrecision.usesPositionalArg()) { 38883f875dcSMike Crowe // std::format argument identifiers are zero-based, whereas printf 38983f875dcSMike Crowe // ones are one based. 39083f875dcSMike Crowe assert(FieldPrecision.getPositionalArgIndex() > 0U); 39183f875dcSMike Crowe FormatSpec.append( 39283f875dcSMike Crowe llvm::utostr(FieldPrecision.getPositionalArgIndex() - 1)); 39383f875dcSMike Crowe } 39483f875dcSMike Crowe FormatSpec.push_back('}'); 39583f875dcSMike Crowe break; 39683f875dcSMike Crowe case OptionalAmount::Invalid: 39783f875dcSMike Crowe break; 39883f875dcSMike Crowe } 39983f875dcSMike Crowe } 40083f875dcSMike Crowe 4012806cf4bSMike Crowe void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) { 4022806cf4bSMike Crowe unsigned ArgCount = 0; 4032806cf4bSMike Crowe const OptionalAmount FieldWidth = FS.getFieldWidth(); 4042806cf4bSMike Crowe const OptionalAmount FieldPrecision = FS.getPrecision(); 4052806cf4bSMike Crowe 4062806cf4bSMike Crowe if (FieldWidth.getHowSpecified() == OptionalAmount::Arg && 4072806cf4bSMike Crowe !FieldWidth.usesPositionalArg()) 4082806cf4bSMike Crowe ++ArgCount; 4092806cf4bSMike Crowe if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg && 4102806cf4bSMike Crowe !FieldPrecision.usesPositionalArg()) 4112806cf4bSMike Crowe ++ArgCount; 4122806cf4bSMike Crowe 4132806cf4bSMike Crowe if (ArgCount) 4142806cf4bSMike Crowe ArgRotates.emplace_back(FS.getArgIndex() + ArgsOffset, ArgCount); 4152806cf4bSMike Crowe } 4162806cf4bSMike Crowe 417c6207f6eSMike Crowe void FormatStringConverter::emitStringArgument(unsigned ArgIndex, 418c6207f6eSMike Crowe const Expr *Arg) { 41983f875dcSMike Crowe // If the argument is the result of a call to std::string::c_str() or 42083f875dcSMike Crowe // data() with a return type of char then we can remove that call and 42183f875dcSMike Crowe // pass the std::string directly. We don't want to do so if the return 42283f875dcSMike Crowe // type is not a char pointer (though it's unlikely that such code would 42383f875dcSMike Crowe // compile without warnings anyway.) See RedundantStringCStrCheck. 42483f875dcSMike Crowe 42583f875dcSMike Crowe if (!StringCStrCallExprMatcher) { 42683f875dcSMike Crowe // Lazily create the matcher 42783f875dcSMike Crowe const auto StringDecl = type(hasUnqualifiedDesugaredType(recordType( 42883f875dcSMike Crowe hasDeclaration(cxxRecordDecl(hasName("::std::basic_string")))))); 42983f875dcSMike Crowe const auto StringExpr = expr( 43083f875dcSMike Crowe anyOf(hasType(StringDecl), hasType(qualType(pointsTo(StringDecl))))); 43183f875dcSMike Crowe 43283f875dcSMike Crowe StringCStrCallExprMatcher = 43383f875dcSMike Crowe cxxMemberCallExpr( 43483f875dcSMike Crowe on(StringExpr.bind("arg")), callee(memberExpr().bind("member")), 43583f875dcSMike Crowe callee(cxxMethodDecl(hasAnyName("c_str", "data"), 43683f875dcSMike Crowe returns(pointerType(pointee(isRealChar())))))) 43783f875dcSMike Crowe .bind("call"); 43883f875dcSMike Crowe } 43983f875dcSMike Crowe 44083f875dcSMike Crowe auto CStrMatches = match(*StringCStrCallExprMatcher, *Arg, *Context); 44183f875dcSMike Crowe if (CStrMatches.size() == 1) 44283f875dcSMike Crowe ArgCStrRemovals.push_back(CStrMatches.front()); 44383f875dcSMike Crowe else if (Arg->getType()->isPointerType()) { 44483f875dcSMike Crowe const QualType Pointee = Arg->getType()->getPointeeType(); 44583f875dcSMike Crowe // printf is happy to print signed char and unsigned char strings, but 44683f875dcSMike Crowe // std::format only likes char strings. 44783f875dcSMike Crowe if (Pointee->isCharType() && !isRealCharType(Pointee)) 448c6207f6eSMike Crowe ArgFixes.emplace_back(ArgIndex, "reinterpret_cast<const char *>("); 44983f875dcSMike Crowe } 45083f875dcSMike Crowe } 45183f875dcSMike Crowe 45283f875dcSMike Crowe bool FormatStringConverter::emitIntegerArgument( 45383f875dcSMike Crowe ConversionSpecifier::Kind ArgKind, const Expr *Arg, unsigned ArgIndex, 45483f875dcSMike Crowe std::string &FormatSpec) { 45583f875dcSMike Crowe const clang::QualType &ArgType = Arg->getType(); 45683f875dcSMike Crowe if (ArgType->isBooleanType()) { 45783f875dcSMike Crowe // std::format will print bool as either "true" or "false" by default, 45883f875dcSMike Crowe // but printf prints them as "0" or "1". Be compatible with printf by 45983f875dcSMike Crowe // requesting decimal output. 46083f875dcSMike Crowe FormatSpec.push_back('d'); 46183f875dcSMike Crowe } else if (ArgType->isEnumeralType()) { 46283f875dcSMike Crowe // std::format will try to find a specialization to print the enum 46383f875dcSMike Crowe // (and probably fail), whereas printf would have just expected it to 46483f875dcSMike Crowe // be passed as its underlying type. However, printf will have forced 46583f875dcSMike Crowe // the signedness based on the format string, so we need to do the 46683f875dcSMike Crowe // same. 46783f875dcSMike Crowe if (const auto *ET = ArgType->getAs<EnumType>()) { 46883f875dcSMike Crowe if (const std::optional<std::string> MaybeCastType = 46983f875dcSMike Crowe castTypeForArgument(ArgKind, ET->getDecl()->getIntegerType())) 47083f875dcSMike Crowe ArgFixes.emplace_back( 471c6207f6eSMike Crowe ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str()); 47283f875dcSMike Crowe else 47383f875dcSMike Crowe return conversionNotPossible( 47483f875dcSMike Crowe (Twine("argument ") + Twine(ArgIndex) + " has unexpected enum type") 47583f875dcSMike Crowe .str()); 47683f875dcSMike Crowe } 47783f875dcSMike Crowe } else if (CastMismatchedIntegerTypes && 47883f875dcSMike Crowe !isMatchingSignedness(ArgKind, ArgType)) { 47983f875dcSMike Crowe // printf will happily print an unsigned type as signed if told to. 48083f875dcSMike Crowe // Even -Wformat doesn't warn for this. std::format will format as 48183f875dcSMike Crowe // unsigned unless we cast it. 48283f875dcSMike Crowe if (const std::optional<std::string> MaybeCastType = 48383f875dcSMike Crowe castTypeForArgument(ArgKind, ArgType)) 48483f875dcSMike Crowe ArgFixes.emplace_back( 485c6207f6eSMike Crowe ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str()); 48683f875dcSMike Crowe else 48783f875dcSMike Crowe return conversionNotPossible( 48883f875dcSMike Crowe (Twine("argument ") + Twine(ArgIndex) + " cannot be cast to " + 48983f875dcSMike Crowe Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned" 49083f875dcSMike Crowe : "signed") + 49183f875dcSMike Crowe " integer type to match format" 49283f875dcSMike Crowe " specifier and StrictMode is enabled") 49383f875dcSMike Crowe .str()); 49483f875dcSMike Crowe } else if (isRealCharType(ArgType) || !ArgType->isIntegerType()) { 49583f875dcSMike Crowe // Only specify integer if the argument is of a different type 49683f875dcSMike Crowe FormatSpec.push_back('d'); 49783f875dcSMike Crowe } 49883f875dcSMike Crowe return true; 49983f875dcSMike Crowe } 50083f875dcSMike Crowe 50183f875dcSMike Crowe /// Append the corresponding standard format string type fragment to FormatSpec, 50283f875dcSMike Crowe /// and store any argument fixes for later application. 50383f875dcSMike Crowe /// @returns true on success, false on failure 50483f875dcSMike Crowe bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg, 50583f875dcSMike Crowe std::string &FormatSpec) { 50683f875dcSMike Crowe ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind(); 50783f875dcSMike Crowe switch (ArgKind) { 50883f875dcSMike Crowe case ConversionSpecifier::Kind::sArg: 509c6207f6eSMike Crowe emitStringArgument(FS.getArgIndex() + ArgsOffset, Arg); 51083f875dcSMike Crowe break; 51183f875dcSMike Crowe case ConversionSpecifier::Kind::cArg: 51283f875dcSMike Crowe // The type must be "c" to get a character unless the type is exactly 51383f875dcSMike Crowe // char (whether that be signed or unsigned for the target.) 51483f875dcSMike Crowe if (!isRealCharType(Arg->getType())) 51583f875dcSMike Crowe FormatSpec.push_back('c'); 51683f875dcSMike Crowe break; 51783f875dcSMike Crowe case ConversionSpecifier::Kind::dArg: 51883f875dcSMike Crowe case ConversionSpecifier::Kind::iArg: 51983f875dcSMike Crowe case ConversionSpecifier::Kind::uArg: 52083f875dcSMike Crowe if (!emitIntegerArgument(ArgKind, Arg, FS.getArgIndex() + ArgsOffset, 52183f875dcSMike Crowe FormatSpec)) 52283f875dcSMike Crowe return false; 52383f875dcSMike Crowe break; 52483f875dcSMike Crowe case ConversionSpecifier::Kind::pArg: { 52583f875dcSMike Crowe const clang::QualType &ArgType = Arg->getType(); 52683f875dcSMike Crowe // std::format knows how to format void pointers and nullptrs 52783f875dcSMike Crowe if (!ArgType->isNullPtrType() && !ArgType->isVoidPointerType()) 528c6207f6eSMike Crowe ArgFixes.emplace_back(FS.getArgIndex() + ArgsOffset, 529c6207f6eSMike Crowe "static_cast<const void *>("); 53083f875dcSMike Crowe break; 53183f875dcSMike Crowe } 53283f875dcSMike Crowe case ConversionSpecifier::Kind::xArg: 53383f875dcSMike Crowe FormatSpec.push_back('x'); 53483f875dcSMike Crowe break; 53583f875dcSMike Crowe case ConversionSpecifier::Kind::XArg: 53683f875dcSMike Crowe FormatSpec.push_back('X'); 53783f875dcSMike Crowe break; 53883f875dcSMike Crowe case ConversionSpecifier::Kind::oArg: 53983f875dcSMike Crowe FormatSpec.push_back('o'); 54083f875dcSMike Crowe break; 54183f875dcSMike Crowe case ConversionSpecifier::Kind::aArg: 54283f875dcSMike Crowe FormatSpec.push_back('a'); 54383f875dcSMike Crowe break; 54483f875dcSMike Crowe case ConversionSpecifier::Kind::AArg: 54583f875dcSMike Crowe FormatSpec.push_back('A'); 54683f875dcSMike Crowe break; 54783f875dcSMike Crowe case ConversionSpecifier::Kind::eArg: 54883f875dcSMike Crowe FormatSpec.push_back('e'); 54983f875dcSMike Crowe break; 55083f875dcSMike Crowe case ConversionSpecifier::Kind::EArg: 55183f875dcSMike Crowe FormatSpec.push_back('E'); 55283f875dcSMike Crowe break; 55383f875dcSMike Crowe case ConversionSpecifier::Kind::fArg: 55483f875dcSMike Crowe FormatSpec.push_back('f'); 55583f875dcSMike Crowe break; 55683f875dcSMike Crowe case ConversionSpecifier::Kind::FArg: 55783f875dcSMike Crowe FormatSpec.push_back('F'); 55883f875dcSMike Crowe break; 55983f875dcSMike Crowe case ConversionSpecifier::Kind::gArg: 56083f875dcSMike Crowe FormatSpec.push_back('g'); 56183f875dcSMike Crowe break; 56283f875dcSMike Crowe case ConversionSpecifier::Kind::GArg: 56383f875dcSMike Crowe FormatSpec.push_back('G'); 56483f875dcSMike Crowe break; 56583f875dcSMike Crowe default: 56683f875dcSMike Crowe // Something we don't understand 56783f875dcSMike Crowe return conversionNotPossible((Twine("argument ") + 56883f875dcSMike Crowe Twine(FS.getArgIndex() + ArgsOffset) + 56983f875dcSMike Crowe " has an unsupported format specifier") 57083f875dcSMike Crowe .str()); 57183f875dcSMike Crowe } 57283f875dcSMike Crowe 57383f875dcSMike Crowe return true; 57483f875dcSMike Crowe } 57583f875dcSMike Crowe 57683f875dcSMike Crowe /// Append the standard format string equivalent of the passed PrintfSpecifier 57783f875dcSMike Crowe /// to StandardFormatString and store any argument fixes for later application. 57883f875dcSMike Crowe /// @returns true on success, false on failure 57983f875dcSMike Crowe bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS, 58083f875dcSMike Crowe const Expr *Arg, 58183f875dcSMike Crowe std::string &StandardFormatString) { 58283f875dcSMike Crowe // The specifier must have an associated argument 58383f875dcSMike Crowe assert(FS.consumesDataArgument()); 58483f875dcSMike Crowe 58583f875dcSMike Crowe StandardFormatString.push_back('{'); 58683f875dcSMike Crowe 58783f875dcSMike Crowe if (FS.usesPositionalArg()) { 58883f875dcSMike Crowe // std::format argument identifiers are zero-based, whereas printf ones 58983f875dcSMike Crowe // are one based. 59083f875dcSMike Crowe assert(FS.getPositionalArgIndex() > 0U); 59183f875dcSMike Crowe StandardFormatString.append(llvm::utostr(FS.getPositionalArgIndex() - 1)); 59283f875dcSMike Crowe } 59383f875dcSMike Crowe 59483f875dcSMike Crowe // std::format format argument parts to potentially emit: 59583f875dcSMike Crowe // [[fill]align][sign]["#"]["0"][width]["."precision][type] 59683f875dcSMike Crowe std::string FormatSpec; 59783f875dcSMike Crowe 59883f875dcSMike Crowe // printf doesn't support specifying the fill character - it's always a 59983f875dcSMike Crowe // space, so we never need to generate one. 60083f875dcSMike Crowe 60183f875dcSMike Crowe emitAlignment(FS, FormatSpec); 60283f875dcSMike Crowe emitSign(FS, FormatSpec); 60383f875dcSMike Crowe emitAlternativeForm(FS, FormatSpec); 60483f875dcSMike Crowe 60583f875dcSMike Crowe if (FS.hasLeadingZeros()) 60683f875dcSMike Crowe FormatSpec.push_back('0'); 60783f875dcSMike Crowe 60883f875dcSMike Crowe emitFieldWidth(FS, FormatSpec); 60983f875dcSMike Crowe emitPrecision(FS, FormatSpec); 6102806cf4bSMike Crowe maybeRotateArguments(FS); 61183f875dcSMike Crowe 61283f875dcSMike Crowe if (!emitType(FS, Arg, FormatSpec)) 61383f875dcSMike Crowe return false; 61483f875dcSMike Crowe 61583f875dcSMike Crowe if (!FormatSpec.empty()) { 61683f875dcSMike Crowe StandardFormatString.push_back(':'); 61783f875dcSMike Crowe StandardFormatString.append(FormatSpec); 61883f875dcSMike Crowe } 61983f875dcSMike Crowe 62083f875dcSMike Crowe StandardFormatString.push_back('}'); 62183f875dcSMike Crowe return true; 62283f875dcSMike Crowe } 62383f875dcSMike Crowe 62483f875dcSMike Crowe /// Called for each format specifier by ParsePrintfString. 62583f875dcSMike Crowe bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS, 62683f875dcSMike Crowe const char *StartSpecifier, 62783f875dcSMike Crowe unsigned SpecifierLen, 62883f875dcSMike Crowe const TargetInfo &Target) { 62983f875dcSMike Crowe 63083f875dcSMike Crowe const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data(); 63183f875dcSMike Crowe assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size()); 63283f875dcSMike Crowe 63383f875dcSMike Crowe // Everything before the specifier needs copying verbatim 63483f875dcSMike Crowe assert(StartSpecifierPos >= PrintfFormatStringPos); 63583f875dcSMike Crowe 63683f875dcSMike Crowe appendFormatText(StringRef(PrintfFormatString.begin() + PrintfFormatStringPos, 63783f875dcSMike Crowe StartSpecifierPos - PrintfFormatStringPos)); 63883f875dcSMike Crowe 63983f875dcSMike Crowe const ConversionSpecifier::Kind ArgKind = 64083f875dcSMike Crowe FS.getConversionSpecifier().getKind(); 64183f875dcSMike Crowe 64283f875dcSMike Crowe // Skip over specifier 64383f875dcSMike Crowe PrintfFormatStringPos = StartSpecifierPos + SpecifierLen; 64483f875dcSMike Crowe assert(PrintfFormatStringPos <= PrintfFormatString.size()); 64583f875dcSMike Crowe 64683f875dcSMike Crowe FormatStringNeededRewriting = true; 64783f875dcSMike Crowe 64883f875dcSMike Crowe if (ArgKind == ConversionSpecifier::Kind::nArg) { 64983f875dcSMike Crowe // std::print doesn't do the equivalent of %n 65083f875dcSMike Crowe return conversionNotPossible("'%n' is not supported in format string"); 65183f875dcSMike Crowe } 65283f875dcSMike Crowe 65383f875dcSMike Crowe if (ArgKind == ConversionSpecifier::Kind::PrintErrno) { 65483f875dcSMike Crowe // std::print doesn't support %m. In theory we could insert a 65583f875dcSMike Crowe // strerror(errno) parameter (assuming that libc has a thread-safe 65683f875dcSMike Crowe // implementation, which glibc does), but that would require keeping track 65783f875dcSMike Crowe // of the input and output parameter indices for position arguments too. 65883f875dcSMike Crowe return conversionNotPossible("'%m' is not supported in format string"); 65983f875dcSMike Crowe } 66083f875dcSMike Crowe 66183f875dcSMike Crowe if (ArgKind == ConversionSpecifier::PercentArg) { 66283f875dcSMike Crowe StandardFormatString.push_back('%'); 66383f875dcSMike Crowe return true; 66483f875dcSMike Crowe } 66583f875dcSMike Crowe 66683f875dcSMike Crowe const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset; 66783f875dcSMike Crowe if (ArgIndex >= NumArgs) { 66883f875dcSMike Crowe // Argument index out of range. Give up. 66983f875dcSMike Crowe return conversionNotPossible( 67083f875dcSMike Crowe (Twine("argument index ") + Twine(ArgIndex) + " is out of range") 67183f875dcSMike Crowe .str()); 67283f875dcSMike Crowe } 67383f875dcSMike Crowe 67483f875dcSMike Crowe return convertArgument(FS, Args[ArgIndex]->IgnoreImplicitAsWritten(), 67583f875dcSMike Crowe StandardFormatString); 67683f875dcSMike Crowe } 67783f875dcSMike Crowe 67883f875dcSMike Crowe /// Called at the very end just before applying fixes to capture the last part 67983f875dcSMike Crowe /// of the format string. 68083f875dcSMike Crowe void FormatStringConverter::finalizeFormatText() { 68183f875dcSMike Crowe appendFormatText( 68283f875dcSMike Crowe StringRef(PrintfFormatString.begin() + PrintfFormatStringPos, 68383f875dcSMike Crowe PrintfFormatString.size() - PrintfFormatStringPos)); 68483f875dcSMike Crowe PrintfFormatStringPos = PrintfFormatString.size(); 68583f875dcSMike Crowe 6862ce765ebSMike Crowe // It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n") 6872ce765ebSMike Crowe // than to std::println("Hello\r"); 688af79372dSMike Crowe // Use StringRef until C++20 std::string::ends_with() is available. 689af79372dSMike Crowe const auto StandardFormatStringRef = StringRef(StandardFormatString); 690af79372dSMike Crowe if (Config.AllowTrailingNewlineRemoval && 691af79372dSMike Crowe StandardFormatStringRef.ends_with("\\n") && 692af79372dSMike Crowe !StandardFormatStringRef.ends_with("\\\\n") && 693af79372dSMike Crowe !StandardFormatStringRef.ends_with("\\r\\n")) { 69483f875dcSMike Crowe UsePrintNewlineFunction = true; 69583f875dcSMike Crowe FormatStringNeededRewriting = true; 69683f875dcSMike Crowe StandardFormatString.erase(StandardFormatString.end() - 2, 69783f875dcSMike Crowe StandardFormatString.end()); 69883f875dcSMike Crowe } 69983f875dcSMike Crowe 70083f875dcSMike Crowe StandardFormatString.push_back('\"'); 70183f875dcSMike Crowe } 70283f875dcSMike Crowe 70383f875dcSMike Crowe /// Append literal parts of the format text, reinstating escapes as required. 70483f875dcSMike Crowe void FormatStringConverter::appendFormatText(const StringRef Text) { 70583f875dcSMike Crowe for (const char Ch : Text) { 70683f875dcSMike Crowe if (Ch == '\a') 70783f875dcSMike Crowe StandardFormatString += "\\a"; 70883f875dcSMike Crowe else if (Ch == '\b') 70983f875dcSMike Crowe StandardFormatString += "\\b"; 71083f875dcSMike Crowe else if (Ch == '\f') 71183f875dcSMike Crowe StandardFormatString += "\\f"; 71283f875dcSMike Crowe else if (Ch == '\n') 71383f875dcSMike Crowe StandardFormatString += "\\n"; 71483f875dcSMike Crowe else if (Ch == '\r') 71583f875dcSMike Crowe StandardFormatString += "\\r"; 71683f875dcSMike Crowe else if (Ch == '\t') 71783f875dcSMike Crowe StandardFormatString += "\\t"; 71883f875dcSMike Crowe else if (Ch == '\v') 71983f875dcSMike Crowe StandardFormatString += "\\v"; 72083f875dcSMike Crowe else if (Ch == '\"') 72183f875dcSMike Crowe StandardFormatString += "\\\""; 72283f875dcSMike Crowe else if (Ch == '\\') 72383f875dcSMike Crowe StandardFormatString += "\\\\"; 72483f875dcSMike Crowe else if (Ch == '{') { 72583f875dcSMike Crowe StandardFormatString += "{{"; 72683f875dcSMike Crowe FormatStringNeededRewriting = true; 72783f875dcSMike Crowe } else if (Ch == '}') { 72883f875dcSMike Crowe StandardFormatString += "}}"; 72983f875dcSMike Crowe FormatStringNeededRewriting = true; 73083f875dcSMike Crowe } else if (Ch < 32) { 73183f875dcSMike Crowe StandardFormatString += "\\x"; 73283f875dcSMike Crowe StandardFormatString += llvm::hexdigit(Ch >> 4, true); 73383f875dcSMike Crowe StandardFormatString += llvm::hexdigit(Ch & 0xf, true); 73483f875dcSMike Crowe } else 73583f875dcSMike Crowe StandardFormatString += Ch; 73683f875dcSMike Crowe } 73783f875dcSMike Crowe } 73883f875dcSMike Crowe 739c6207f6eSMike Crowe static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch, 740c6207f6eSMike Crowe ASTContext &Context) { 741c6207f6eSMike Crowe const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>("arg"); 742c6207f6eSMike Crowe const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>("member"); 743c6207f6eSMike Crowe const bool Arrow = Member->isArrow(); 744c6207f6eSMike Crowe return Arrow ? utils::fixit::formatDereference(*Arg, Context) 745c6207f6eSMike Crowe : tooling::fixit::getText(*Arg, Context).str(); 746c6207f6eSMike Crowe } 747c6207f6eSMike Crowe 74883f875dcSMike Crowe /// Called by the check when it is ready to apply the fixes. 74983f875dcSMike Crowe void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag, 75083f875dcSMike Crowe SourceManager &SM) { 75183f875dcSMike Crowe if (FormatStringNeededRewriting) { 75283f875dcSMike Crowe Diag << FixItHint::CreateReplacement( 75383f875dcSMike Crowe CharSourceRange::getTokenRange(FormatExpr->getBeginLoc(), 75483f875dcSMike Crowe FormatExpr->getEndLoc()), 75583f875dcSMike Crowe StandardFormatString); 75683f875dcSMike Crowe } 75783f875dcSMike Crowe 7582806cf4bSMike Crowe // ArgCount is one less than the number of arguments to be rotated. 7592806cf4bSMike Crowe for (auto [ValueArgIndex, ArgCount] : ArgRotates) { 7602806cf4bSMike Crowe assert(ValueArgIndex < NumArgs); 7612806cf4bSMike Crowe assert(ValueArgIndex > ArgCount); 7622806cf4bSMike Crowe 763c6207f6eSMike Crowe // First move the value argument to the right place. But if there's a 764c6207f6eSMike Crowe // pending c_str() removal then we must do that at the same time. 765c6207f6eSMike Crowe if (const auto CStrRemovalMatch = 766c6207f6eSMike Crowe std::find_if(ArgCStrRemovals.cbegin(), ArgCStrRemovals.cend(), 767c6207f6eSMike Crowe [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()]( 768c6207f6eSMike Crowe const BoundNodes &Match) { 769c6207f6eSMike Crowe // This c_str() removal corresponds to the argument 770c6207f6eSMike Crowe // being moved if they start at the same location. 771c6207f6eSMike Crowe const Expr *CStrArg = Match.getNodeAs<Expr>("arg"); 772c6207f6eSMike Crowe return ArgStartPos == CStrArg->getBeginLoc(); 773c6207f6eSMike Crowe }); 774c6207f6eSMike Crowe CStrRemovalMatch != ArgCStrRemovals.end()) { 775c6207f6eSMike Crowe const std::string ArgText = 776c6207f6eSMike Crowe withoutCStrReplacement(*CStrRemovalMatch, *Context); 777c6207f6eSMike Crowe assert(!ArgText.empty()); 778c6207f6eSMike Crowe 779c6207f6eSMike Crowe Diag << FixItHint::CreateReplacement( 780c6207f6eSMike Crowe Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText); 781c6207f6eSMike Crowe 782c6207f6eSMike Crowe // That c_str() removal is now dealt with, so we don't need to do it again 783c6207f6eSMike Crowe ArgCStrRemovals.erase(CStrRemovalMatch); 784c6207f6eSMike Crowe } else 7852806cf4bSMike Crowe Diag << tooling::fixit::createReplacement(*Args[ValueArgIndex - ArgCount], 7862806cf4bSMike Crowe *Args[ValueArgIndex], *Context); 7872806cf4bSMike Crowe 7882806cf4bSMike Crowe // Now shift down the field width and precision (if either are present) to 7892806cf4bSMike Crowe // accommodate it. 7902806cf4bSMike Crowe for (size_t Offset = 0; Offset < ArgCount; ++Offset) 7912806cf4bSMike Crowe Diag << tooling::fixit::createReplacement( 7922806cf4bSMike Crowe *Args[ValueArgIndex - Offset], *Args[ValueArgIndex - Offset - 1], 7932806cf4bSMike Crowe *Context); 794c6207f6eSMike Crowe 795c6207f6eSMike Crowe // Now we need to modify the ArgFix index too so that we fix the right 796c6207f6eSMike Crowe // argument. We don't need to care about the width and precision indices 797c6207f6eSMike Crowe // since they never need fixing. 798c6207f6eSMike Crowe for (auto &ArgFix : ArgFixes) { 799c6207f6eSMike Crowe if (ArgFix.ArgIndex == ValueArgIndex) 800c6207f6eSMike Crowe ArgFix.ArgIndex = ValueArgIndex - ArgCount; 801c6207f6eSMike Crowe } 802c6207f6eSMike Crowe } 803c6207f6eSMike Crowe 804c6207f6eSMike Crowe for (const auto &[ArgIndex, Replacement] : ArgFixes) { 805c6207f6eSMike Crowe SourceLocation AfterOtherSide = 806c6207f6eSMike Crowe Lexer::findNextToken(Args[ArgIndex]->getEndLoc(), SM, LangOpts) 807c6207f6eSMike Crowe ->getLocation(); 808c6207f6eSMike Crowe 809c6207f6eSMike Crowe Diag << FixItHint::CreateInsertion(Args[ArgIndex]->getBeginLoc(), 810c6207f6eSMike Crowe Replacement, true) 811c6207f6eSMike Crowe << FixItHint::CreateInsertion(AfterOtherSide, ")", true); 812c6207f6eSMike Crowe } 813c6207f6eSMike Crowe 814c6207f6eSMike Crowe for (const auto &Match : ArgCStrRemovals) { 815c6207f6eSMike Crowe const auto *Call = Match.getNodeAs<CallExpr>("call"); 816c6207f6eSMike Crowe const std::string ArgText = withoutCStrReplacement(Match, *Context); 817c6207f6eSMike Crowe if (!ArgText.empty()) 818c6207f6eSMike Crowe Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText); 8192806cf4bSMike Crowe } 82083f875dcSMike Crowe } 82183f875dcSMike Crowe } // namespace clang::tidy::utils 822