1 //===--- FormatStringConverter.cpp - clang-tidy----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Implementation of the FormatStringConverter class which is used to convert 11 /// printf format strings to C++ std::formatter format strings. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "FormatStringConverter.h" 16 #include "../utils/FixItHintUtils.h" 17 #include "clang/AST/Expr.h" 18 #include "clang/ASTMatchers/ASTMatchFinder.h" 19 #include "clang/Basic/LangOptions.h" 20 #include "clang/Lex/Lexer.h" 21 #include "clang/Lex/Preprocessor.h" 22 #include "clang/Tooling/FixIt.h" 23 #include "llvm/ADT/StringExtras.h" 24 #include "llvm/Support/Debug.h" 25 26 using namespace clang::ast_matchers; 27 using namespace clang::analyze_printf; 28 29 namespace clang::tidy::utils { 30 using clang::analyze_format_string::ConversionSpecifier; 31 32 /// Is the passed type the actual "char" type, whether that be signed or 33 /// unsigned, rather than explicit signed char or unsigned char types. 34 static bool isRealCharType(const clang::QualType &Ty) { 35 using namespace clang; 36 const Type *DesugaredType = Ty->getUnqualifiedDesugaredType(); 37 if (const auto *BT = llvm::dyn_cast<BuiltinType>(DesugaredType)) 38 return (BT->getKind() == BuiltinType::Char_U || 39 BT->getKind() == BuiltinType::Char_S); 40 return false; 41 } 42 43 /// If possible, return the text name of the signed type that corresponds to the 44 /// passed integer type. If the passed type is already signed then its name is 45 /// just returned. Only supports BuiltinTypes. 46 static std::optional<std::string> 47 getCorrespondingSignedTypeName(const clang::QualType &QT) { 48 using namespace clang; 49 const auto UQT = QT.getUnqualifiedType(); 50 if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) { 51 switch (BT->getKind()) { 52 case BuiltinType::UChar: 53 case BuiltinType::Char_U: 54 case BuiltinType::SChar: 55 case BuiltinType::Char_S: 56 return "signed char"; 57 case BuiltinType::UShort: 58 case BuiltinType::Short: 59 return "short"; 60 case BuiltinType::UInt: 61 case BuiltinType::Int: 62 return "int"; 63 case BuiltinType::ULong: 64 case BuiltinType::Long: 65 return "long"; 66 case BuiltinType::ULongLong: 67 case BuiltinType::LongLong: 68 return "long long"; 69 default: 70 llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '" 71 << QT.getAsString() << "'\n"; 72 return std::nullopt; 73 } 74 } 75 76 // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only 77 // if the argument type does. 78 const std::string TypeName = UQT.getAsString(); 79 StringRef SimplifiedTypeName{TypeName}; 80 const bool InStd = SimplifiedTypeName.consume_front("std::"); 81 const StringRef Prefix = InStd ? "std::" : ""; 82 83 if (SimplifiedTypeName.starts_with("uint") && 84 SimplifiedTypeName.ends_with("_t")) 85 return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str(); 86 87 if (SimplifiedTypeName == "size_t") 88 return (Twine(Prefix) + "ssize_t").str(); 89 90 llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '" 91 << UQT.getAsString() << "'\n"; 92 return std::nullopt; 93 } 94 95 /// If possible, return the text name of the unsigned type that corresponds to 96 /// the passed integer type. If the passed type is already unsigned then its 97 /// name is just returned. Only supports BuiltinTypes. 98 static std::optional<std::string> 99 getCorrespondingUnsignedTypeName(const clang::QualType &QT) { 100 using namespace clang; 101 const auto UQT = QT.getUnqualifiedType(); 102 if (const auto *BT = llvm::dyn_cast<BuiltinType>(UQT)) { 103 switch (BT->getKind()) { 104 case BuiltinType::SChar: 105 case BuiltinType::Char_S: 106 case BuiltinType::UChar: 107 case BuiltinType::Char_U: 108 return "unsigned char"; 109 case BuiltinType::Short: 110 case BuiltinType::UShort: 111 return "unsigned short"; 112 case BuiltinType::Int: 113 case BuiltinType::UInt: 114 return "unsigned int"; 115 case BuiltinType::Long: 116 case BuiltinType::ULong: 117 return "unsigned long"; 118 case BuiltinType::LongLong: 119 case BuiltinType::ULongLong: 120 return "unsigned long long"; 121 default: 122 llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '" 123 << UQT.getAsString() << "'\n"; 124 return std::nullopt; 125 } 126 } 127 128 // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only 129 // if the argument type does. 130 const std::string TypeName = UQT.getAsString(); 131 StringRef SimplifiedTypeName{TypeName}; 132 const bool InStd = SimplifiedTypeName.consume_front("std::"); 133 const StringRef Prefix = InStd ? "std::" : ""; 134 135 if (SimplifiedTypeName.starts_with("int") && 136 SimplifiedTypeName.ends_with("_t")) 137 return (Twine(Prefix) + "u" + SimplifiedTypeName).str(); 138 139 if (SimplifiedTypeName == "ssize_t") 140 return (Twine(Prefix) + "size_t").str(); 141 if (SimplifiedTypeName == "ptrdiff_t") 142 return (Twine(Prefix) + "size_t").str(); 143 144 llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '" 145 << UQT.getAsString() << "'\n"; 146 return std::nullopt; 147 } 148 149 static std::optional<std::string> 150 castTypeForArgument(ConversionSpecifier::Kind ArgKind, 151 const clang::QualType &QT) { 152 if (ArgKind == ConversionSpecifier::Kind::uArg) 153 return getCorrespondingUnsignedTypeName(QT); 154 return getCorrespondingSignedTypeName(QT); 155 } 156 157 static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind, 158 const clang::QualType &ArgType) { 159 if (const auto *BT = llvm::dyn_cast<BuiltinType>(ArgType)) { 160 // Unadorned char never matches any expected signedness since it 161 // could be signed or unsigned. 162 const auto ArgTypeKind = BT->getKind(); 163 if (ArgTypeKind == BuiltinType::Char_U || 164 ArgTypeKind == BuiltinType::Char_S) 165 return false; 166 } 167 168 if (ArgKind == ConversionSpecifier::Kind::uArg) 169 return ArgType->isUnsignedIntegerType(); 170 return ArgType->isSignedIntegerType(); 171 } 172 173 namespace { 174 AST_MATCHER(clang::QualType, isRealChar) { 175 return clang::tidy::utils::isRealCharType(Node); 176 } 177 } // namespace 178 179 static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) { 180 /// For printf-style functions, the signedness of the type printed is 181 /// indicated by the corresponding type in the format string. 182 /// std::print will determine the signedness from the type of the 183 /// argument. This means that it is necessary to generate a cast in 184 /// StrictMode to ensure that the exact behaviour is maintained. 185 /// However, for templated functions like absl::PrintF and 186 /// fmt::printf, the signedness of the type printed is also taken from 187 /// the actual argument like std::print, so such casts are never 188 /// necessary. printf-style functions are variadic, whereas templated 189 /// ones aren't, so we can use that to distinguish between the two 190 /// cases. 191 if (StrictMode) { 192 const FunctionDecl *FuncDecl = Call->getDirectCallee(); 193 assert(FuncDecl); 194 return FuncDecl->isVariadic(); 195 } 196 return false; 197 } 198 199 FormatStringConverter::FormatStringConverter( 200 ASTContext *ContextIn, const CallExpr *Call, unsigned FormatArgOffset, 201 const Configuration ConfigIn, const LangOptions &LO, SourceManager &SM, 202 Preprocessor &PP) 203 : Context(ContextIn), Config(ConfigIn), 204 CastMismatchedIntegerTypes( 205 castMismatchedIntegerTypes(Call, ConfigIn.StrictMode)), 206 Args(Call->getArgs()), NumArgs(Call->getNumArgs()), 207 ArgsOffset(FormatArgOffset + 1), LangOpts(LO) { 208 assert(ArgsOffset <= NumArgs); 209 FormatExpr = llvm::dyn_cast<StringLiteral>( 210 Args[FormatArgOffset]->IgnoreImplicitAsWritten()); 211 212 if (!FormatExpr || !FormatExpr->isOrdinary()) { 213 // Function must have a narrow string literal as its first argument. 214 conversionNotPossible("first argument is not a narrow string literal"); 215 return; 216 } 217 218 if (const std::optional<StringRef> MaybeMacroName = 219 formatStringContainsUnreplaceableMacro(Call, FormatExpr, SM, PP); 220 MaybeMacroName) { 221 conversionNotPossible( 222 ("format string contains unreplaceable macro '" + *MaybeMacroName + "'") 223 .str()); 224 return; 225 } 226 227 PrintfFormatString = FormatExpr->getString(); 228 229 // Assume that the output will be approximately the same size as the input, 230 // but perhaps with a few escapes expanded. 231 const size_t EstimatedGrowth = 8; 232 StandardFormatString.reserve(PrintfFormatString.size() + EstimatedGrowth); 233 StandardFormatString.push_back('\"'); 234 235 const bool IsFreeBsdkPrintf = false; 236 237 using clang::analyze_format_string::ParsePrintfString; 238 ParsePrintfString(*this, PrintfFormatString.data(), 239 PrintfFormatString.data() + PrintfFormatString.size(), 240 LangOpts, Context->getTargetInfo(), IsFreeBsdkPrintf); 241 finalizeFormatText(); 242 } 243 244 std::optional<StringRef> 245 FormatStringConverter::formatStringContainsUnreplaceableMacro( 246 const CallExpr *Call, const StringLiteral *FormatExpr, SourceManager &SM, 247 Preprocessor &PP) { 248 // If a macro invocation surrounds the entire call then we don't want that to 249 // inhibit conversion. The whole format string will appear to come from that 250 // macro, as will the function call. 251 std::optional<StringRef> MaybeSurroundingMacroName; 252 if (SourceLocation BeginCallLoc = Call->getBeginLoc(); 253 BeginCallLoc.isMacroID()) 254 MaybeSurroundingMacroName = 255 Lexer::getImmediateMacroName(BeginCallLoc, SM, PP.getLangOpts()); 256 257 for (auto I = FormatExpr->tokloc_begin(), E = FormatExpr->tokloc_end(); 258 I != E; ++I) { 259 const SourceLocation &TokenLoc = *I; 260 if (TokenLoc.isMacroID()) { 261 const StringRef MacroName = 262 Lexer::getImmediateMacroName(TokenLoc, SM, PP.getLangOpts()); 263 264 if (MaybeSurroundingMacroName != MacroName) { 265 // glibc uses __PRI64_PREFIX and __PRIPTR_PREFIX to define the prefixes 266 // for types that change size so we must look for multiple prefixes. 267 if (!MacroName.starts_with("PRI") && !MacroName.starts_with("__PRI")) 268 return MacroName; 269 270 const SourceLocation TokenSpellingLoc = SM.getSpellingLoc(TokenLoc); 271 const OptionalFileEntryRef MaybeFileEntry = 272 SM.getFileEntryRefForID(SM.getFileID(TokenSpellingLoc)); 273 if (!MaybeFileEntry) 274 return MacroName; 275 276 HeaderSearch &HS = PP.getHeaderSearchInfo(); 277 // Check if the file is a system header 278 if (!isSystem(HS.getFileDirFlavor(*MaybeFileEntry)) || 279 llvm::sys::path::filename(MaybeFileEntry->getName()) != 280 "inttypes.h") 281 return MacroName; 282 } 283 } 284 } 285 return std::nullopt; 286 } 287 288 void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS, 289 std::string &FormatSpec) { 290 ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind(); 291 292 // We only care about alignment if a field width is specified 293 if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) { 294 if (ArgKind == ConversionSpecifier::sArg) { 295 // Strings are left-aligned by default with std::format, so we only 296 // need to emit an alignment if this one needs to be right aligned. 297 if (!FS.isLeftJustified()) 298 FormatSpec.push_back('>'); 299 } else { 300 // Numbers are right-aligned by default with std::format, so we only 301 // need to emit an alignment if this one needs to be left aligned. 302 if (FS.isLeftJustified()) 303 FormatSpec.push_back('<'); 304 } 305 } 306 } 307 308 void FormatStringConverter::emitSign(const PrintfSpecifier &FS, 309 std::string &FormatSpec) { 310 const ConversionSpecifier Spec = FS.getConversionSpecifier(); 311 312 // Ignore on something that isn't numeric. For printf it's would be a 313 // compile-time warning but ignored at runtime, but for std::format it 314 // ought to be a compile-time error. 315 if (Spec.isAnyIntArg() || Spec.isDoubleArg()) { 316 // + is preferred to ' ' 317 if (FS.hasPlusPrefix()) 318 FormatSpec.push_back('+'); 319 else if (FS.hasSpacePrefix()) 320 FormatSpec.push_back(' '); 321 } 322 } 323 324 void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS, 325 std::string &FormatSpec) { 326 if (FS.hasAlternativeForm()) { 327 switch (FS.getConversionSpecifier().getKind()) { 328 case ConversionSpecifier::Kind::aArg: 329 case ConversionSpecifier::Kind::AArg: 330 case ConversionSpecifier::Kind::eArg: 331 case ConversionSpecifier::Kind::EArg: 332 case ConversionSpecifier::Kind::fArg: 333 case ConversionSpecifier::Kind::FArg: 334 case ConversionSpecifier::Kind::gArg: 335 case ConversionSpecifier::Kind::GArg: 336 case ConversionSpecifier::Kind::xArg: 337 case ConversionSpecifier::Kind::XArg: 338 case ConversionSpecifier::Kind::oArg: 339 FormatSpec.push_back('#'); 340 break; 341 default: 342 // Alternative forms don't exist for other argument kinds 343 break; 344 } 345 } 346 } 347 348 void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS, 349 std::string &FormatSpec) { 350 { 351 const OptionalAmount FieldWidth = FS.getFieldWidth(); 352 switch (FieldWidth.getHowSpecified()) { 353 case OptionalAmount::NotSpecified: 354 break; 355 case OptionalAmount::Constant: 356 FormatSpec.append(llvm::utostr(FieldWidth.getConstantAmount())); 357 break; 358 case OptionalAmount::Arg: 359 FormatSpec.push_back('{'); 360 if (FieldWidth.usesPositionalArg()) { 361 // std::format argument identifiers are zero-based, whereas printf 362 // ones are one based. 363 assert(FieldWidth.getPositionalArgIndex() > 0U); 364 FormatSpec.append(llvm::utostr(FieldWidth.getPositionalArgIndex() - 1)); 365 } 366 FormatSpec.push_back('}'); 367 break; 368 case OptionalAmount::Invalid: 369 break; 370 } 371 } 372 } 373 374 void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS, 375 std::string &FormatSpec) { 376 const OptionalAmount FieldPrecision = FS.getPrecision(); 377 switch (FieldPrecision.getHowSpecified()) { 378 case OptionalAmount::NotSpecified: 379 break; 380 case OptionalAmount::Constant: 381 FormatSpec.push_back('.'); 382 FormatSpec.append(llvm::utostr(FieldPrecision.getConstantAmount())); 383 break; 384 case OptionalAmount::Arg: 385 FormatSpec.push_back('.'); 386 FormatSpec.push_back('{'); 387 if (FieldPrecision.usesPositionalArg()) { 388 // std::format argument identifiers are zero-based, whereas printf 389 // ones are one based. 390 assert(FieldPrecision.getPositionalArgIndex() > 0U); 391 FormatSpec.append( 392 llvm::utostr(FieldPrecision.getPositionalArgIndex() - 1)); 393 } 394 FormatSpec.push_back('}'); 395 break; 396 case OptionalAmount::Invalid: 397 break; 398 } 399 } 400 401 void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) { 402 unsigned ArgCount = 0; 403 const OptionalAmount FieldWidth = FS.getFieldWidth(); 404 const OptionalAmount FieldPrecision = FS.getPrecision(); 405 406 if (FieldWidth.getHowSpecified() == OptionalAmount::Arg && 407 !FieldWidth.usesPositionalArg()) 408 ++ArgCount; 409 if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg && 410 !FieldPrecision.usesPositionalArg()) 411 ++ArgCount; 412 413 if (ArgCount) 414 ArgRotates.emplace_back(FS.getArgIndex() + ArgsOffset, ArgCount); 415 } 416 417 void FormatStringConverter::emitStringArgument(unsigned ArgIndex, 418 const Expr *Arg) { 419 // If the argument is the result of a call to std::string::c_str() or 420 // data() with a return type of char then we can remove that call and 421 // pass the std::string directly. We don't want to do so if the return 422 // type is not a char pointer (though it's unlikely that such code would 423 // compile without warnings anyway.) See RedundantStringCStrCheck. 424 425 if (!StringCStrCallExprMatcher) { 426 // Lazily create the matcher 427 const auto StringDecl = type(hasUnqualifiedDesugaredType(recordType( 428 hasDeclaration(cxxRecordDecl(hasName("::std::basic_string")))))); 429 const auto StringExpr = expr( 430 anyOf(hasType(StringDecl), hasType(qualType(pointsTo(StringDecl))))); 431 432 StringCStrCallExprMatcher = 433 cxxMemberCallExpr( 434 on(StringExpr.bind("arg")), callee(memberExpr().bind("member")), 435 callee(cxxMethodDecl(hasAnyName("c_str", "data"), 436 returns(pointerType(pointee(isRealChar())))))) 437 .bind("call"); 438 } 439 440 auto CStrMatches = match(*StringCStrCallExprMatcher, *Arg, *Context); 441 if (CStrMatches.size() == 1) 442 ArgCStrRemovals.push_back(CStrMatches.front()); 443 else if (Arg->getType()->isPointerType()) { 444 const QualType Pointee = Arg->getType()->getPointeeType(); 445 // printf is happy to print signed char and unsigned char strings, but 446 // std::format only likes char strings. 447 if (Pointee->isCharType() && !isRealCharType(Pointee)) 448 ArgFixes.emplace_back(ArgIndex, "reinterpret_cast<const char *>("); 449 } 450 } 451 452 bool FormatStringConverter::emitIntegerArgument( 453 ConversionSpecifier::Kind ArgKind, const Expr *Arg, unsigned ArgIndex, 454 std::string &FormatSpec) { 455 const clang::QualType &ArgType = Arg->getType(); 456 if (ArgType->isBooleanType()) { 457 // std::format will print bool as either "true" or "false" by default, 458 // but printf prints them as "0" or "1". Be compatible with printf by 459 // requesting decimal output. 460 FormatSpec.push_back('d'); 461 } else if (ArgType->isEnumeralType()) { 462 // std::format will try to find a specialization to print the enum 463 // (and probably fail), whereas printf would have just expected it to 464 // be passed as its underlying type. However, printf will have forced 465 // the signedness based on the format string, so we need to do the 466 // same. 467 if (const auto *ET = ArgType->getAs<EnumType>()) { 468 if (const std::optional<std::string> MaybeCastType = 469 castTypeForArgument(ArgKind, ET->getDecl()->getIntegerType())) 470 ArgFixes.emplace_back( 471 ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str()); 472 else 473 return conversionNotPossible( 474 (Twine("argument ") + Twine(ArgIndex) + " has unexpected enum type") 475 .str()); 476 } 477 } else if (CastMismatchedIntegerTypes && 478 !isMatchingSignedness(ArgKind, ArgType)) { 479 // printf will happily print an unsigned type as signed if told to. 480 // Even -Wformat doesn't warn for this. std::format will format as 481 // unsigned unless we cast it. 482 if (const std::optional<std::string> MaybeCastType = 483 castTypeForArgument(ArgKind, ArgType)) 484 ArgFixes.emplace_back( 485 ArgIndex, (Twine("static_cast<") + *MaybeCastType + ">(").str()); 486 else 487 return conversionNotPossible( 488 (Twine("argument ") + Twine(ArgIndex) + " cannot be cast to " + 489 Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned" 490 : "signed") + 491 " integer type to match format" 492 " specifier and StrictMode is enabled") 493 .str()); 494 } else if (isRealCharType(ArgType) || !ArgType->isIntegerType()) { 495 // Only specify integer if the argument is of a different type 496 FormatSpec.push_back('d'); 497 } 498 return true; 499 } 500 501 /// Append the corresponding standard format string type fragment to FormatSpec, 502 /// and store any argument fixes for later application. 503 /// @returns true on success, false on failure 504 bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg, 505 std::string &FormatSpec) { 506 ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind(); 507 switch (ArgKind) { 508 case ConversionSpecifier::Kind::sArg: 509 emitStringArgument(FS.getArgIndex() + ArgsOffset, Arg); 510 break; 511 case ConversionSpecifier::Kind::cArg: 512 // The type must be "c" to get a character unless the type is exactly 513 // char (whether that be signed or unsigned for the target.) 514 if (!isRealCharType(Arg->getType())) 515 FormatSpec.push_back('c'); 516 break; 517 case ConversionSpecifier::Kind::dArg: 518 case ConversionSpecifier::Kind::iArg: 519 case ConversionSpecifier::Kind::uArg: 520 if (!emitIntegerArgument(ArgKind, Arg, FS.getArgIndex() + ArgsOffset, 521 FormatSpec)) 522 return false; 523 break; 524 case ConversionSpecifier::Kind::pArg: { 525 const clang::QualType &ArgType = Arg->getType(); 526 // std::format knows how to format void pointers and nullptrs 527 if (!ArgType->isNullPtrType() && !ArgType->isVoidPointerType()) 528 ArgFixes.emplace_back(FS.getArgIndex() + ArgsOffset, 529 "static_cast<const void *>("); 530 break; 531 } 532 case ConversionSpecifier::Kind::xArg: 533 FormatSpec.push_back('x'); 534 break; 535 case ConversionSpecifier::Kind::XArg: 536 FormatSpec.push_back('X'); 537 break; 538 case ConversionSpecifier::Kind::oArg: 539 FormatSpec.push_back('o'); 540 break; 541 case ConversionSpecifier::Kind::aArg: 542 FormatSpec.push_back('a'); 543 break; 544 case ConversionSpecifier::Kind::AArg: 545 FormatSpec.push_back('A'); 546 break; 547 case ConversionSpecifier::Kind::eArg: 548 FormatSpec.push_back('e'); 549 break; 550 case ConversionSpecifier::Kind::EArg: 551 FormatSpec.push_back('E'); 552 break; 553 case ConversionSpecifier::Kind::fArg: 554 FormatSpec.push_back('f'); 555 break; 556 case ConversionSpecifier::Kind::FArg: 557 FormatSpec.push_back('F'); 558 break; 559 case ConversionSpecifier::Kind::gArg: 560 FormatSpec.push_back('g'); 561 break; 562 case ConversionSpecifier::Kind::GArg: 563 FormatSpec.push_back('G'); 564 break; 565 default: 566 // Something we don't understand 567 return conversionNotPossible((Twine("argument ") + 568 Twine(FS.getArgIndex() + ArgsOffset) + 569 " has an unsupported format specifier") 570 .str()); 571 } 572 573 return true; 574 } 575 576 /// Append the standard format string equivalent of the passed PrintfSpecifier 577 /// to StandardFormatString and store any argument fixes for later application. 578 /// @returns true on success, false on failure 579 bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS, 580 const Expr *Arg, 581 std::string &StandardFormatString) { 582 // The specifier must have an associated argument 583 assert(FS.consumesDataArgument()); 584 585 StandardFormatString.push_back('{'); 586 587 if (FS.usesPositionalArg()) { 588 // std::format argument identifiers are zero-based, whereas printf ones 589 // are one based. 590 assert(FS.getPositionalArgIndex() > 0U); 591 StandardFormatString.append(llvm::utostr(FS.getPositionalArgIndex() - 1)); 592 } 593 594 // std::format format argument parts to potentially emit: 595 // [[fill]align][sign]["#"]["0"][width]["."precision][type] 596 std::string FormatSpec; 597 598 // printf doesn't support specifying the fill character - it's always a 599 // space, so we never need to generate one. 600 601 emitAlignment(FS, FormatSpec); 602 emitSign(FS, FormatSpec); 603 emitAlternativeForm(FS, FormatSpec); 604 605 if (FS.hasLeadingZeros()) 606 FormatSpec.push_back('0'); 607 608 emitFieldWidth(FS, FormatSpec); 609 emitPrecision(FS, FormatSpec); 610 maybeRotateArguments(FS); 611 612 if (!emitType(FS, Arg, FormatSpec)) 613 return false; 614 615 if (!FormatSpec.empty()) { 616 StandardFormatString.push_back(':'); 617 StandardFormatString.append(FormatSpec); 618 } 619 620 StandardFormatString.push_back('}'); 621 return true; 622 } 623 624 /// Called for each format specifier by ParsePrintfString. 625 bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS, 626 const char *StartSpecifier, 627 unsigned SpecifierLen, 628 const TargetInfo &Target) { 629 630 const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data(); 631 assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size()); 632 633 // Everything before the specifier needs copying verbatim 634 assert(StartSpecifierPos >= PrintfFormatStringPos); 635 636 appendFormatText(StringRef(PrintfFormatString.begin() + PrintfFormatStringPos, 637 StartSpecifierPos - PrintfFormatStringPos)); 638 639 const ConversionSpecifier::Kind ArgKind = 640 FS.getConversionSpecifier().getKind(); 641 642 // Skip over specifier 643 PrintfFormatStringPos = StartSpecifierPos + SpecifierLen; 644 assert(PrintfFormatStringPos <= PrintfFormatString.size()); 645 646 FormatStringNeededRewriting = true; 647 648 if (ArgKind == ConversionSpecifier::Kind::nArg) { 649 // std::print doesn't do the equivalent of %n 650 return conversionNotPossible("'%n' is not supported in format string"); 651 } 652 653 if (ArgKind == ConversionSpecifier::Kind::PrintErrno) { 654 // std::print doesn't support %m. In theory we could insert a 655 // strerror(errno) parameter (assuming that libc has a thread-safe 656 // implementation, which glibc does), but that would require keeping track 657 // of the input and output parameter indices for position arguments too. 658 return conversionNotPossible("'%m' is not supported in format string"); 659 } 660 661 if (ArgKind == ConversionSpecifier::PercentArg) { 662 StandardFormatString.push_back('%'); 663 return true; 664 } 665 666 const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset; 667 if (ArgIndex >= NumArgs) { 668 // Argument index out of range. Give up. 669 return conversionNotPossible( 670 (Twine("argument index ") + Twine(ArgIndex) + " is out of range") 671 .str()); 672 } 673 674 return convertArgument(FS, Args[ArgIndex]->IgnoreImplicitAsWritten(), 675 StandardFormatString); 676 } 677 678 /// Called at the very end just before applying fixes to capture the last part 679 /// of the format string. 680 void FormatStringConverter::finalizeFormatText() { 681 appendFormatText( 682 StringRef(PrintfFormatString.begin() + PrintfFormatStringPos, 683 PrintfFormatString.size() - PrintfFormatStringPos)); 684 PrintfFormatStringPos = PrintfFormatString.size(); 685 686 // It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n") 687 // than to std::println("Hello\r"); 688 // Use StringRef until C++20 std::string::ends_with() is available. 689 const auto StandardFormatStringRef = StringRef(StandardFormatString); 690 if (Config.AllowTrailingNewlineRemoval && 691 StandardFormatStringRef.ends_with("\\n") && 692 !StandardFormatStringRef.ends_with("\\\\n") && 693 !StandardFormatStringRef.ends_with("\\r\\n")) { 694 UsePrintNewlineFunction = true; 695 FormatStringNeededRewriting = true; 696 StandardFormatString.erase(StandardFormatString.end() - 2, 697 StandardFormatString.end()); 698 } 699 700 StandardFormatString.push_back('\"'); 701 } 702 703 /// Append literal parts of the format text, reinstating escapes as required. 704 void FormatStringConverter::appendFormatText(const StringRef Text) { 705 for (const char Ch : Text) { 706 if (Ch == '\a') 707 StandardFormatString += "\\a"; 708 else if (Ch == '\b') 709 StandardFormatString += "\\b"; 710 else if (Ch == '\f') 711 StandardFormatString += "\\f"; 712 else if (Ch == '\n') 713 StandardFormatString += "\\n"; 714 else if (Ch == '\r') 715 StandardFormatString += "\\r"; 716 else if (Ch == '\t') 717 StandardFormatString += "\\t"; 718 else if (Ch == '\v') 719 StandardFormatString += "\\v"; 720 else if (Ch == '\"') 721 StandardFormatString += "\\\""; 722 else if (Ch == '\\') 723 StandardFormatString += "\\\\"; 724 else if (Ch == '{') { 725 StandardFormatString += "{{"; 726 FormatStringNeededRewriting = true; 727 } else if (Ch == '}') { 728 StandardFormatString += "}}"; 729 FormatStringNeededRewriting = true; 730 } else if (Ch < 32) { 731 StandardFormatString += "\\x"; 732 StandardFormatString += llvm::hexdigit(Ch >> 4, true); 733 StandardFormatString += llvm::hexdigit(Ch & 0xf, true); 734 } else 735 StandardFormatString += Ch; 736 } 737 } 738 739 static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch, 740 ASTContext &Context) { 741 const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>("arg"); 742 const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>("member"); 743 const bool Arrow = Member->isArrow(); 744 return Arrow ? utils::fixit::formatDereference(*Arg, Context) 745 : tooling::fixit::getText(*Arg, Context).str(); 746 } 747 748 /// Called by the check when it is ready to apply the fixes. 749 void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag, 750 SourceManager &SM) { 751 if (FormatStringNeededRewriting) { 752 Diag << FixItHint::CreateReplacement( 753 CharSourceRange::getTokenRange(FormatExpr->getBeginLoc(), 754 FormatExpr->getEndLoc()), 755 StandardFormatString); 756 } 757 758 // ArgCount is one less than the number of arguments to be rotated. 759 for (auto [ValueArgIndex, ArgCount] : ArgRotates) { 760 assert(ValueArgIndex < NumArgs); 761 assert(ValueArgIndex > ArgCount); 762 763 // First move the value argument to the right place. But if there's a 764 // pending c_str() removal then we must do that at the same time. 765 if (const auto CStrRemovalMatch = 766 std::find_if(ArgCStrRemovals.cbegin(), ArgCStrRemovals.cend(), 767 [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()]( 768 const BoundNodes &Match) { 769 // This c_str() removal corresponds to the argument 770 // being moved if they start at the same location. 771 const Expr *CStrArg = Match.getNodeAs<Expr>("arg"); 772 return ArgStartPos == CStrArg->getBeginLoc(); 773 }); 774 CStrRemovalMatch != ArgCStrRemovals.end()) { 775 const std::string ArgText = 776 withoutCStrReplacement(*CStrRemovalMatch, *Context); 777 assert(!ArgText.empty()); 778 779 Diag << FixItHint::CreateReplacement( 780 Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText); 781 782 // That c_str() removal is now dealt with, so we don't need to do it again 783 ArgCStrRemovals.erase(CStrRemovalMatch); 784 } else 785 Diag << tooling::fixit::createReplacement(*Args[ValueArgIndex - ArgCount], 786 *Args[ValueArgIndex], *Context); 787 788 // Now shift down the field width and precision (if either are present) to 789 // accommodate it. 790 for (size_t Offset = 0; Offset < ArgCount; ++Offset) 791 Diag << tooling::fixit::createReplacement( 792 *Args[ValueArgIndex - Offset], *Args[ValueArgIndex - Offset - 1], 793 *Context); 794 795 // Now we need to modify the ArgFix index too so that we fix the right 796 // argument. We don't need to care about the width and precision indices 797 // since they never need fixing. 798 for (auto &ArgFix : ArgFixes) { 799 if (ArgFix.ArgIndex == ValueArgIndex) 800 ArgFix.ArgIndex = ValueArgIndex - ArgCount; 801 } 802 } 803 804 for (const auto &[ArgIndex, Replacement] : ArgFixes) { 805 SourceLocation AfterOtherSide = 806 Lexer::findNextToken(Args[ArgIndex]->getEndLoc(), SM, LangOpts) 807 ->getLocation(); 808 809 Diag << FixItHint::CreateInsertion(Args[ArgIndex]->getBeginLoc(), 810 Replacement, true) 811 << FixItHint::CreateInsertion(AfterOtherSide, ")", true); 812 } 813 814 for (const auto &Match : ArgCStrRemovals) { 815 const auto *Call = Match.getNodeAs<CallExpr>("call"); 816 const std::string ArgText = withoutCStrReplacement(Match, *Context); 817 if (!ArgText.empty()) 818 Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText); 819 } 820 } 821 } // namespace clang::tidy::utils 822