xref: /llvm-project/clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp (revision a199fb1229987d0885a4367e3a439db336069156)
1 //===--- FormatStringConverter.cpp - clang-tidy----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Implementation of the FormatStringConverter class which is used to convert
11 /// printf format strings to C++ std::formatter format strings.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "FormatStringConverter.h"
16 #include "../utils/FixItHintUtils.h"
17 #include "clang/AST/Expr.h"
18 #include "clang/ASTMatchers/ASTMatchFinder.h"
19 #include "clang/Basic/LangOptions.h"
20 #include "clang/Lex/Lexer.h"
21 #include "clang/Lex/Preprocessor.h"
22 #include "clang/Tooling/FixIt.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/Support/Debug.h"
25 
26 using namespace clang::ast_matchers;
27 using namespace clang::analyze_printf;
28 
29 namespace clang::tidy::utils {
30 using clang::analyze_format_string::ConversionSpecifier;
31 
32 /// Is the passed type the actual "char" type, whether that be signed or
33 /// unsigned, rather than explicit signed char or unsigned char types.
34 static bool isRealCharType(const clang::QualType &Ty) {
35   using namespace clang;
36   const Type *DesugaredType = Ty->getUnqualifiedDesugaredType();
37   if (const auto *BT = llvm::dyn_cast<BuiltinType>(DesugaredType))
38     return (BT->getKind() == BuiltinType::Char_U ||
39             BT->getKind() == BuiltinType::Char_S);
40   return false;
41 }
42 
43 /// If possible, return the text name of the signed type that corresponds to the
44 /// passed integer type. If the passed type is already signed then its name is
45 /// just returned. Only supports BuiltinTypes.
46 static std::optional<std::string>
47 getCorrespondingSignedTypeName(const clang::QualType &QT) {
48   using namespace clang;
49   const auto UQT = QT.getUnqualifiedType();
50   if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) {
51     switch (BT->getKind()) {
52     case BuiltinType::UChar:
53     case BuiltinType::Char_U:
54     case BuiltinType::SChar:
55     case BuiltinType::Char_S:
56       return "signed char";
57     case BuiltinType::UShort:
58     case BuiltinType::Short:
59       return "short";
60     case BuiltinType::UInt:
61     case BuiltinType::Int:
62       return "int";
63     case BuiltinType::ULong:
64     case BuiltinType::Long:
65       return "long";
66     case BuiltinType::ULongLong:
67     case BuiltinType::LongLong:
68       return "long long";
69     default:
70       llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '"
71                    << QT.getAsString() << "'\n";
72       return std::nullopt;
73     }
74   }
75 
76   // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
77   // if the argument type does.
78   const std::string TypeName = UQT.getAsString();
79   StringRef SimplifiedTypeName{TypeName};
80   const bool InStd = SimplifiedTypeName.consume_front("std::");
81   const StringRef Prefix = InStd ? "std::" : "";
82 
83   if (SimplifiedTypeName.starts_with("uint") &&
84       SimplifiedTypeName.ends_with("_t"))
85     return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str();
86 
87   if (SimplifiedTypeName == "size_t")
88     return (Twine(Prefix) + "ssize_t").str();
89 
90   llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '"
91                << UQT.getAsString() << "'\n";
92   return std::nullopt;
93 }
94 
95 /// If possible, return the text name of the unsigned type that corresponds to
96 /// the passed integer type. If the passed type is already unsigned then its
97 /// name is just returned. Only supports BuiltinTypes.
98 static std::optional<std::string>
99 getCorrespondingUnsignedTypeName(const clang::QualType &QT) {
100   using namespace clang;
101   const auto UQT = QT.getUnqualifiedType();
102   if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) {
103     switch (BT->getKind()) {
104     case BuiltinType::SChar:
105     case BuiltinType::Char_S:
106     case BuiltinType::UChar:
107     case BuiltinType::Char_U:
108       return "unsigned char";
109     case BuiltinType::Short:
110     case BuiltinType::UShort:
111       return "unsigned short";
112     case BuiltinType::Int:
113     case BuiltinType::UInt:
114       return "unsigned int";
115     case BuiltinType::Long:
116     case BuiltinType::ULong:
117       return "unsigned long";
118     case BuiltinType::LongLong:
119     case BuiltinType::ULongLong:
120       return "unsigned long long";
121     default:
122       llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '"
123                    << UQT.getAsString() << "'\n";
124       return std::nullopt;
125     }
126   }
127 
128   // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
129   // if the argument type does.
130   const std::string TypeName = UQT.getAsString();
131   StringRef SimplifiedTypeName{TypeName};
132   const bool InStd = SimplifiedTypeName.consume_front("std::");
133   const StringRef Prefix = InStd ? "std::" : "";
134 
135   if (SimplifiedTypeName.starts_with("int") &&
136       SimplifiedTypeName.ends_with("_t"))
137     return (Twine(Prefix) + "u" + SimplifiedTypeName).str();
138 
139   if (SimplifiedTypeName == "ssize_t")
140     return (Twine(Prefix) + "size_t").str();
141   if (SimplifiedTypeName == "ptrdiff_t")
142     return (Twine(Prefix) + "size_t").str();
143 
144   llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '"
145                << UQT.getAsString() << "'\n";
146   return std::nullopt;
147 }
148 
149 static std::optional<std::string>
150 castTypeForArgument(ConversionSpecifier::Kind ArgKind,
151                     const clang::QualType &QT) {
152   if (ArgKind == ConversionSpecifier::Kind::uArg)
153     return getCorrespondingUnsignedTypeName(QT);
154   return getCorrespondingSignedTypeName(QT);
155 }
156 
157 static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind,
158                                  const clang::QualType &ArgType) {
159   if (const auto *BT = llvm::dyn_cast<BuiltinType>(ArgType)) {
160     // Unadorned char never matches any expected signedness since it
161     // could be signed or unsigned.
162     const auto ArgTypeKind = BT->getKind();
163     if (ArgTypeKind == BuiltinType::Char_U ||
164         ArgTypeKind == BuiltinType::Char_S)
165       return false;
166   }
167 
168   if (ArgKind == ConversionSpecifier::Kind::uArg)
169     return ArgType->isUnsignedIntegerType();
170   return ArgType->isSignedIntegerType();
171 }
172 
173 namespace {
174 AST_MATCHER(clang::QualType, isRealChar) {
175   return clang::tidy::utils::isRealCharType(Node);
176 }
177 } // namespace
178 
179 static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) {
180   /// For printf-style functions, the signedness of the type printed is
181   /// indicated by the corresponding type in the format string.
182   /// std::print will determine the signedness from the type of the
183   /// argument. This means that it is necessary to generate a cast in
184   /// StrictMode to ensure that the exact behaviour is maintained.
185   /// However, for templated functions like absl::PrintF and
186   /// fmt::printf, the signedness of the type printed is also taken from
187   /// the actual argument like std::print, so such casts are never
188   /// necessary. printf-style functions are variadic, whereas templated
189   /// ones aren't, so we can use that to distinguish between the two
190   /// cases.
191   if (StrictMode) {
192     const FunctionDecl *FuncDecl = Call->getDirectCallee();
193     assert(FuncDecl);
194     return FuncDecl->isVariadic();
195   }
196   return false;
197 }
198 
199 FormatStringConverter::FormatStringConverter(
200     ASTContext *ContextIn, const CallExpr *Call, unsigned FormatArgOffset,
201     const Configuration ConfigIn, const LangOptions &LO, SourceManager &SM,
202     Preprocessor &PP)
203     : Context(ContextIn), Config(ConfigIn),
204       CastMismatchedIntegerTypes(
205           castMismatchedIntegerTypes(Call, ConfigIn.StrictMode)),
206       Args(Call->getArgs()), NumArgs(Call->getNumArgs()),
207       ArgsOffset(FormatArgOffset + 1), LangOpts(LO) {
208   assert(ArgsOffset <= NumArgs);
209   FormatExpr = llvm::dyn_cast<StringLiteral>(
210       Args[FormatArgOffset]->IgnoreImplicitAsWritten());
211 
212   if (!FormatExpr || !FormatExpr->isOrdinary()) {
213     // Function must have a narrow string literal as its first argument.
214     conversionNotPossible("first argument is not a narrow string literal");
215     return;
216   }
217 
218   if (const std::optional<StringRef> MaybeMacroName =
219           formatStringContainsUnreplaceableMacro(Call, FormatExpr, SM, PP);
220       MaybeMacroName) {
221     conversionNotPossible(
222         ("format string contains unreplaceable macro '" + *MaybeMacroName + "'")
223             .str());
224     return;
225   }
226 
227   PrintfFormatString = FormatExpr->getString();
228 
229   // Assume that the output will be approximately the same size as the input,
230   // but perhaps with a few escapes expanded.
231   const size_t EstimatedGrowth = 8;
232   StandardFormatString.reserve(PrintfFormatString.size() + EstimatedGrowth);
233   StandardFormatString.push_back('\"');
234 
235   const bool IsFreeBsdkPrintf = false;
236 
237   using clang::analyze_format_string::ParsePrintfString;
238   ParsePrintfString(*this, PrintfFormatString.data(),
239                     PrintfFormatString.data() + PrintfFormatString.size(),
240                     LangOpts, Context->getTargetInfo(), IsFreeBsdkPrintf);
241   finalizeFormatText();
242 }
243 
244 std::optional<StringRef>
245 FormatStringConverter::formatStringContainsUnreplaceableMacro(
246     const CallExpr *Call, const StringLiteral *FormatExpr, SourceManager &SM,
247     Preprocessor &PP) {
248   // If a macro invocation surrounds the entire call then we don't want that to
249   // inhibit conversion. The whole format string will appear to come from that
250   // macro, as will the function call.
251   std::optional<StringRef> MaybeSurroundingMacroName;
252   if (SourceLocation BeginCallLoc = Call->getBeginLoc();
253       BeginCallLoc.isMacroID())
254     MaybeSurroundingMacroName =
255         Lexer::getImmediateMacroName(BeginCallLoc, SM, PP.getLangOpts());
256 
257   for (auto I = FormatExpr->tokloc_begin(), E = FormatExpr->tokloc_end();
258        I != E; ++I) {
259     const SourceLocation &TokenLoc = *I;
260     if (TokenLoc.isMacroID()) {
261       const StringRef MacroName =
262           Lexer::getImmediateMacroName(TokenLoc, SM, PP.getLangOpts());
263 
264       if (MaybeSurroundingMacroName != MacroName) {
265         // glibc uses __PRI64_PREFIX and __PRIPTR_PREFIX to define the prefixes
266         // for types that change size so we must look for multiple prefixes.
267         if (!MacroName.starts_with("PRI") && !MacroName.starts_with("__PRI"))
268           return MacroName;
269 
270         const SourceLocation TokenSpellingLoc = SM.getSpellingLoc(TokenLoc);
271         const OptionalFileEntryRef MaybeFileEntry =
272             SM.getFileEntryRefForID(SM.getFileID(TokenSpellingLoc));
273         if (!MaybeFileEntry)
274           return MacroName;
275 
276         HeaderSearch &HS = PP.getHeaderSearchInfo();
277         // Check if the file is a system header
278         if (!isSystem(HS.getFileDirFlavor(*MaybeFileEntry)) ||
279             llvm::sys::path::filename(MaybeFileEntry->getName()) !=
280                 "inttypes.h")
281           return MacroName;
282       }
283     }
284   }
285   return std::nullopt;
286 }
287 
288 void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS,
289                                           std::string &FormatSpec) {
290   ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();
291 
292   // We only care about alignment if a field width is specified
293   if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) {
294     if (ArgKind == ConversionSpecifier::sArg) {
295       // Strings are left-aligned by default with std::format, so we only
296       // need to emit an alignment if this one needs to be right aligned.
297       if (!FS.isLeftJustified())
298         FormatSpec.push_back('>');
299     } else {
300       // Numbers are right-aligned by default with std::format, so we only
301       // need to emit an alignment if this one needs to be left aligned.
302       if (FS.isLeftJustified())
303         FormatSpec.push_back('<');
304     }
305   }
306 }
307 
308 void FormatStringConverter::emitSign(const PrintfSpecifier &FS,
309                                      std::string &FormatSpec) {
310   const ConversionSpecifier Spec = FS.getConversionSpecifier();
311 
312   // Ignore on something that isn't numeric. For printf it's would be a
313   // compile-time warning but ignored at runtime, but for std::format it
314   // ought to be a compile-time error.
315   if (Spec.isAnyIntArg() || Spec.isDoubleArg()) {
316     // + is preferred to ' '
317     if (FS.hasPlusPrefix())
318       FormatSpec.push_back('+');
319     else if (FS.hasSpacePrefix())
320       FormatSpec.push_back(' ');
321   }
322 }
323 
324 void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS,
325                                                 std::string &FormatSpec) {
326   if (FS.hasAlternativeForm()) {
327     switch (FS.getConversionSpecifier().getKind()) {
328     case ConversionSpecifier::Kind::aArg:
329     case ConversionSpecifier::Kind::AArg:
330     case ConversionSpecifier::Kind::eArg:
331     case ConversionSpecifier::Kind::EArg:
332     case ConversionSpecifier::Kind::fArg:
333     case ConversionSpecifier::Kind::FArg:
334     case ConversionSpecifier::Kind::gArg:
335     case ConversionSpecifier::Kind::GArg:
336     case ConversionSpecifier::Kind::xArg:
337     case ConversionSpecifier::Kind::XArg:
338     case ConversionSpecifier::Kind::oArg:
339       FormatSpec.push_back('#');
340       break;
341     default:
342       // Alternative forms don't exist for other argument kinds
343       break;
344     }
345   }
346 }
347 
348 void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS,
349                                            std::string &FormatSpec) {
350   {
351     const OptionalAmount FieldWidth = FS.getFieldWidth();
352     switch (FieldWidth.getHowSpecified()) {
353     case OptionalAmount::NotSpecified:
354       break;
355     case OptionalAmount::Constant:
356       FormatSpec.append(llvm::utostr(FieldWidth.getConstantAmount()));
357       break;
358     case OptionalAmount::Arg:
359       FormatSpec.push_back('{');
360       if (FieldWidth.usesPositionalArg()) {
361         // std::format argument identifiers are zero-based, whereas printf
362         // ones are one based.
363         assert(FieldWidth.getPositionalArgIndex() > 0U);
364         FormatSpec.append(llvm::utostr(FieldWidth.getPositionalArgIndex() - 1));
365       }
366       FormatSpec.push_back('}');
367       break;
368     case OptionalAmount::Invalid:
369       break;
370     }
371   }
372 }
373 
374 void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS,
375                                           std::string &FormatSpec) {
376   const OptionalAmount FieldPrecision = FS.getPrecision();
377   switch (FieldPrecision.getHowSpecified()) {
378   case OptionalAmount::NotSpecified:
379     break;
380   case OptionalAmount::Constant:
381     FormatSpec.push_back('.');
382     FormatSpec.append(llvm::utostr(FieldPrecision.getConstantAmount()));
383     break;
384   case OptionalAmount::Arg:
385     FormatSpec.push_back('.');
386     FormatSpec.push_back('{');
387     if (FieldPrecision.usesPositionalArg()) {
388       // std::format argument identifiers are zero-based, whereas printf
389       // ones are one based.
390       assert(FieldPrecision.getPositionalArgIndex() > 0U);
391       FormatSpec.append(
392           llvm::utostr(FieldPrecision.getPositionalArgIndex() - 1));
393     }
394     FormatSpec.push_back('}');
395     break;
396   case OptionalAmount::Invalid:
397     break;
398   }
399 }
400 
401 void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) {
402   unsigned ArgCount = 0;
403   const OptionalAmount FieldWidth = FS.getFieldWidth();
404   const OptionalAmount FieldPrecision = FS.getPrecision();
405 
406   if (FieldWidth.getHowSpecified() == OptionalAmount::Arg &&
407       !FieldWidth.usesPositionalArg())
408     ++ArgCount;
409   if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg &&
410       !FieldPrecision.usesPositionalArg())
411     ++ArgCount;
412 
413   if (ArgCount)
414     ArgRotates.emplace_back(FS.getArgIndex() + ArgsOffset, ArgCount);
415 }
416 
417 void FormatStringConverter::emitStringArgument(unsigned ArgIndex,
418                                                const Expr *Arg) {
419   // If the argument is the result of a call to std::string::c_str() or
420   // data() with a return type of char then we can remove that call and
421   // pass the std::string directly. We don't want to do so if the return
422   // type is not a char pointer (though it's unlikely that such code would
423   // compile without warnings anyway.) See RedundantStringCStrCheck.
424 
425   if (!StringCStrCallExprMatcher) {
426     // Lazily create the matcher
427     const auto StringDecl = type(hasUnqualifiedDesugaredType(recordType(
428         hasDeclaration(cxxRecordDecl(hasName("::std::basic_string"))))));
429     const auto StringExpr = expr(
430         anyOf(hasType(StringDecl), hasType(qualType(pointsTo(StringDecl)))));
431 
432     StringCStrCallExprMatcher =
433         cxxMemberCallExpr(
434             on(StringExpr.bind("arg")), callee(memberExpr().bind("member")),
435             callee(cxxMethodDecl(hasAnyName("c_str", "data"),
436                                  returns(pointerType(pointee(isRealChar()))))))
437             .bind("call");
438   }
439 
440   auto CStrMatches = match(*StringCStrCallExprMatcher, *Arg, *Context);
441   if (CStrMatches.size() == 1)
442     ArgCStrRemovals.push_back(CStrMatches.front());
443   else if (Arg->getType()->isPointerType()) {
444     const QualType Pointee = Arg->getType()->getPointeeType();
445     // printf is happy to print signed char and unsigned char strings, but
446     // std::format only likes char strings.
447     if (Pointee->isCharType() && !isRealCharType(Pointee))
448       ArgFixes.emplace_back(ArgIndex, "reinterpret_cast<const char *>(");
449   }
450 }
451 
452 bool FormatStringConverter::emitIntegerArgument(
453     ConversionSpecifier::Kind ArgKind, const Expr *Arg, unsigned ArgIndex,
454     std::string &FormatSpec) {
455   const clang::QualType &ArgType = Arg->getType();
456   if (ArgType->isBooleanType()) {
457     // std::format will print bool as either "true" or "false" by default,
458     // but printf prints them as "0" or "1". Be compatible with printf by
459     // requesting decimal output.
460     FormatSpec.push_back('d');
461   } else if (ArgType->isEnumeralType()) {
462     // std::format will try to find a specialization to print the enum
463     // (and probably fail), whereas printf would have just expected it to
464     // be passed as its underlying type. However, printf will have forced
465     // the signedness based on the format string, so we need to do the
466     // same.
467     if (const auto *ET = ArgType->getAs<EnumType>()) {
468       if (const std::optional<std::string> MaybeCastType =
469               castTypeForArgument(ArgKind, ET->getDecl()->getIntegerType()))
470         ArgFixes.emplace_back(
471             ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
472       else
473         return conversionNotPossible(
474             (Twine("argument ") + Twine(ArgIndex) + " has unexpected enum type")
475                 .str());
476     }
477   } else if (CastMismatchedIntegerTypes &&
478              !isMatchingSignedness(ArgKind, ArgType)) {
479     // printf will happily print an unsigned type as signed if told to.
480     // Even -Wformat doesn't warn for this. std::format will format as
481     // unsigned unless we cast it.
482     if (const std::optional<std::string> MaybeCastType =
483             castTypeForArgument(ArgKind, ArgType))
484       ArgFixes.emplace_back(
485           ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str());
486     else
487       return conversionNotPossible(
488           (Twine("argument ") + Twine(ArgIndex) + " cannot be cast to " +
489            Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned"
490                                                             : "signed") +
491            " integer type to match format"
492            " specifier and StrictMode is enabled")
493               .str());
494   } else if (isRealCharType(ArgType) || !ArgType->isIntegerType()) {
495     // Only specify integer if the argument is of a different type
496     FormatSpec.push_back('d');
497   }
498   return true;
499 }
500 
501 /// Append the corresponding standard format string type fragment to FormatSpec,
502 /// and store any argument fixes for later application.
503 /// @returns true on success, false on failure
504 bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg,
505                                      std::string &FormatSpec) {
506   ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();
507   switch (ArgKind) {
508   case ConversionSpecifier::Kind::sArg:
509     emitStringArgument(FS.getArgIndex() + ArgsOffset, Arg);
510     break;
511   case ConversionSpecifier::Kind::cArg:
512     // The type must be "c" to get a character unless the type is exactly
513     // char (whether that be signed or unsigned for the target.)
514     if (!isRealCharType(Arg->getType()))
515       FormatSpec.push_back('c');
516     break;
517   case ConversionSpecifier::Kind::dArg:
518   case ConversionSpecifier::Kind::iArg:
519   case ConversionSpecifier::Kind::uArg:
520     if (!emitIntegerArgument(ArgKind, Arg, FS.getArgIndex() + ArgsOffset,
521                              FormatSpec))
522       return false;
523     break;
524   case ConversionSpecifier::Kind::pArg: {
525     const clang::QualType &ArgType = Arg->getType();
526     // std::format knows how to format void pointers and nullptrs
527     if (!ArgType->isNullPtrType() && !ArgType->isVoidPointerType())
528       ArgFixes.emplace_back(FS.getArgIndex() + ArgsOffset,
529                             "static_cast<const void *>(");
530     break;
531   }
532   case ConversionSpecifier::Kind::xArg:
533     FormatSpec.push_back('x');
534     break;
535   case ConversionSpecifier::Kind::XArg:
536     FormatSpec.push_back('X');
537     break;
538   case ConversionSpecifier::Kind::oArg:
539     FormatSpec.push_back('o');
540     break;
541   case ConversionSpecifier::Kind::aArg:
542     FormatSpec.push_back('a');
543     break;
544   case ConversionSpecifier::Kind::AArg:
545     FormatSpec.push_back('A');
546     break;
547   case ConversionSpecifier::Kind::eArg:
548     FormatSpec.push_back('e');
549     break;
550   case ConversionSpecifier::Kind::EArg:
551     FormatSpec.push_back('E');
552     break;
553   case ConversionSpecifier::Kind::fArg:
554     FormatSpec.push_back('f');
555     break;
556   case ConversionSpecifier::Kind::FArg:
557     FormatSpec.push_back('F');
558     break;
559   case ConversionSpecifier::Kind::gArg:
560     FormatSpec.push_back('g');
561     break;
562   case ConversionSpecifier::Kind::GArg:
563     FormatSpec.push_back('G');
564     break;
565   default:
566     // Something we don't understand
567     return conversionNotPossible((Twine("argument ") +
568                                   Twine(FS.getArgIndex() + ArgsOffset) +
569                                   " has an unsupported format specifier")
570                                      .str());
571   }
572 
573   return true;
574 }
575 
576 /// Append the standard format string equivalent of the passed PrintfSpecifier
577 /// to StandardFormatString and store any argument fixes for later application.
578 /// @returns true on success, false on failure
579 bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS,
580                                             const Expr *Arg,
581                                             std::string &StandardFormatString) {
582   // The specifier must have an associated argument
583   assert(FS.consumesDataArgument());
584 
585   StandardFormatString.push_back('{');
586 
587   if (FS.usesPositionalArg()) {
588     // std::format argument identifiers are zero-based, whereas printf ones
589     // are one based.
590     assert(FS.getPositionalArgIndex() > 0U);
591     StandardFormatString.append(llvm::utostr(FS.getPositionalArgIndex() - 1));
592   }
593 
594   // std::format format argument parts to potentially emit:
595   // [[fill]align][sign]["#"]["0"][width]["."precision][type]
596   std::string FormatSpec;
597 
598   // printf doesn't support specifying the fill character - it's always a
599   // space, so we never need to generate one.
600 
601   emitAlignment(FS, FormatSpec);
602   emitSign(FS, FormatSpec);
603   emitAlternativeForm(FS, FormatSpec);
604 
605   if (FS.hasLeadingZeros())
606     FormatSpec.push_back('0');
607 
608   emitFieldWidth(FS, FormatSpec);
609   emitPrecision(FS, FormatSpec);
610   maybeRotateArguments(FS);
611 
612   if (!emitType(FS, Arg, FormatSpec))
613     return false;
614 
615   if (!FormatSpec.empty()) {
616     StandardFormatString.push_back(':');
617     StandardFormatString.append(FormatSpec);
618   }
619 
620   StandardFormatString.push_back('}');
621   return true;
622 }
623 
624 /// Called for each format specifier by ParsePrintfString.
625 bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS,
626                                                   const char *StartSpecifier,
627                                                   unsigned SpecifierLen,
628                                                   const TargetInfo &Target) {
629 
630   const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data();
631   assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size());
632 
633   // Everything before the specifier needs copying verbatim
634   assert(StartSpecifierPos >= PrintfFormatStringPos);
635 
636   appendFormatText(StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
637                              StartSpecifierPos - PrintfFormatStringPos));
638 
639   const ConversionSpecifier::Kind ArgKind =
640       FS.getConversionSpecifier().getKind();
641 
642   // Skip over specifier
643   PrintfFormatStringPos = StartSpecifierPos + SpecifierLen;
644   assert(PrintfFormatStringPos <= PrintfFormatString.size());
645 
646   FormatStringNeededRewriting = true;
647 
648   if (ArgKind == ConversionSpecifier::Kind::nArg) {
649     // std::print doesn't do the equivalent of %n
650     return conversionNotPossible("'%n' is not supported in format string");
651   }
652 
653   if (ArgKind == ConversionSpecifier::Kind::PrintErrno) {
654     // std::print doesn't support %m. In theory we could insert a
655     // strerror(errno) parameter (assuming that libc has a thread-safe
656     // implementation, which glibc does), but that would require keeping track
657     // of the input and output parameter indices for position arguments too.
658     return conversionNotPossible("'%m' is not supported in format string");
659   }
660 
661   if (ArgKind == ConversionSpecifier::PercentArg) {
662     StandardFormatString.push_back('%');
663     return true;
664   }
665 
666   const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset;
667   if (ArgIndex >= NumArgs) {
668     // Argument index out of range. Give up.
669     return conversionNotPossible(
670         (Twine("argument index ") + Twine(ArgIndex) + " is out of range")
671             .str());
672   }
673 
674   return convertArgument(FS, Args[ArgIndex]->IgnoreImplicitAsWritten(),
675                          StandardFormatString);
676 }
677 
678 /// Called at the very end just before applying fixes to capture the last part
679 /// of the format string.
680 void FormatStringConverter::finalizeFormatText() {
681   appendFormatText(
682       StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
683                 PrintfFormatString.size() - PrintfFormatStringPos));
684   PrintfFormatStringPos = PrintfFormatString.size();
685 
686   // It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n")
687   // than to std::println("Hello\r");
688   // Use StringRef until C++20 std::string::ends_with() is available.
689   const auto StandardFormatStringRef = StringRef(StandardFormatString);
690   if (Config.AllowTrailingNewlineRemoval &&
691       StandardFormatStringRef.ends_with("\\n") &&
692       !StandardFormatStringRef.ends_with("\\\\n") &&
693       !StandardFormatStringRef.ends_with("\\r\\n")) {
694     UsePrintNewlineFunction = true;
695     FormatStringNeededRewriting = true;
696     StandardFormatString.erase(StandardFormatString.end() - 2,
697                                StandardFormatString.end());
698   }
699 
700   StandardFormatString.push_back('\"');
701 }
702 
703 /// Append literal parts of the format text, reinstating escapes as required.
704 void FormatStringConverter::appendFormatText(const StringRef Text) {
705   for (const char Ch : Text) {
706     if (Ch == '\a')
707       StandardFormatString += "\\a";
708     else if (Ch == '\b')
709       StandardFormatString += "\\b";
710     else if (Ch == '\f')
711       StandardFormatString += "\\f";
712     else if (Ch == '\n')
713       StandardFormatString += "\\n";
714     else if (Ch == '\r')
715       StandardFormatString += "\\r";
716     else if (Ch == '\t')
717       StandardFormatString += "\\t";
718     else if (Ch == '\v')
719       StandardFormatString += "\\v";
720     else if (Ch == '\"')
721       StandardFormatString += "\\\"";
722     else if (Ch == '\\')
723       StandardFormatString += "\\\\";
724     else if (Ch == '{') {
725       StandardFormatString += "{{";
726       FormatStringNeededRewriting = true;
727     } else if (Ch == '}') {
728       StandardFormatString += "}}";
729       FormatStringNeededRewriting = true;
730     } else if (Ch < 32) {
731       StandardFormatString += "\\x";
732       StandardFormatString += llvm::hexdigit(Ch >> 4, true);
733       StandardFormatString += llvm::hexdigit(Ch & 0xf, true);
734     } else
735       StandardFormatString += Ch;
736   }
737 }
738 
739 static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch,
740                                           ASTContext &Context) {
741   const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>("arg");
742   const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>("member");
743   const bool Arrow = Member->isArrow();
744   return Arrow ? utils::fixit::formatDereference(*Arg, Context)
745                : tooling::fixit::getText(*Arg, Context).str();
746 }
747 
748 /// Called by the check when it is ready to apply the fixes.
749 void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag,
750                                        SourceManager &SM) {
751   if (FormatStringNeededRewriting) {
752     Diag << FixItHint::CreateReplacement(
753         CharSourceRange::getTokenRange(FormatExpr->getBeginLoc(),
754                                        FormatExpr->getEndLoc()),
755         StandardFormatString);
756   }
757 
758   // ArgCount is one less than the number of arguments to be rotated.
759   for (auto [ValueArgIndex, ArgCount] : ArgRotates) {
760     assert(ValueArgIndex < NumArgs);
761     assert(ValueArgIndex > ArgCount);
762 
763     // First move the value argument to the right place. But if there's a
764     // pending c_str() removal then we must do that at the same time.
765     if (const auto CStrRemovalMatch =
766             std::find_if(ArgCStrRemovals.cbegin(), ArgCStrRemovals.cend(),
767                          [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()](
768                              const BoundNodes &Match) {
769                            // This c_str() removal corresponds to the argument
770                            // being moved if they start at the same location.
771                            const Expr *CStrArg = Match.getNodeAs<Expr>("arg");
772                            return ArgStartPos == CStrArg->getBeginLoc();
773                          });
774         CStrRemovalMatch != ArgCStrRemovals.end()) {
775       const std::string ArgText =
776           withoutCStrReplacement(*CStrRemovalMatch, *Context);
777       assert(!ArgText.empty());
778 
779       Diag << FixItHint::CreateReplacement(
780           Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText);
781 
782       // That c_str() removal is now dealt with, so we don't need to do it again
783       ArgCStrRemovals.erase(CStrRemovalMatch);
784     } else
785       Diag << tooling::fixit::createReplacement(*Args[ValueArgIndex - ArgCount],
786                                                 *Args[ValueArgIndex], *Context);
787 
788     // Now shift down the field width and precision (if either are present) to
789     // accommodate it.
790     for (size_t Offset = 0; Offset < ArgCount; ++Offset)
791       Diag << tooling::fixit::createReplacement(
792           *Args[ValueArgIndex - Offset], *Args[ValueArgIndex - Offset - 1],
793           *Context);
794 
795     // Now we need to modify the ArgFix index too so that we fix the right
796     // argument. We don't need to care about the width and precision indices
797     // since they never need fixing.
798     for (auto &ArgFix : ArgFixes) {
799       if (ArgFix.ArgIndex == ValueArgIndex)
800         ArgFix.ArgIndex = ValueArgIndex - ArgCount;
801     }
802   }
803 
804   for (const auto &[ArgIndex, Replacement] : ArgFixes) {
805     SourceLocation AfterOtherSide =
806         Lexer::findNextToken(Args[ArgIndex]->getEndLoc(), SM, LangOpts)
807             ->getLocation();
808 
809     Diag << FixItHint::CreateInsertion(Args[ArgIndex]->getBeginLoc(),
810                                        Replacement, true)
811          << FixItHint::CreateInsertion(AfterOtherSide, ")", true);
812   }
813 
814   for (const auto &Match : ArgCStrRemovals) {
815     const auto *Call = Match.getNodeAs<CallExpr>("call");
816     const std::string ArgText = withoutCStrReplacement(Match, *Context);
817     if (!ArgText.empty())
818       Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText);
819   }
820 }
821 } // namespace clang::tidy::utils
822