10b57cec5SDimitry Andric //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // Handling of format string in scanf and friends. The structure of format 100b57cec5SDimitry Andric // strings for fscanf() are described in C99 7.19.6.2. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "clang/AST/FormatString.h" 150b57cec5SDimitry Andric #include "FormatStringParsing.h" 160b57cec5SDimitry Andric #include "clang/Basic/TargetInfo.h" 170b57cec5SDimitry Andric 180b57cec5SDimitry Andric using clang::analyze_format_string::ArgType; 190b57cec5SDimitry Andric using clang::analyze_format_string::FormatStringHandler; 200b57cec5SDimitry Andric using clang::analyze_format_string::LengthModifier; 210b57cec5SDimitry Andric using clang::analyze_format_string::OptionalAmount; 220b57cec5SDimitry Andric using clang::analyze_format_string::ConversionSpecifier; 230b57cec5SDimitry Andric using clang::analyze_scanf::ScanfConversionSpecifier; 240b57cec5SDimitry Andric using clang::analyze_scanf::ScanfSpecifier; 250b57cec5SDimitry Andric using clang::UpdateOnReturn; 260b57cec5SDimitry Andric using namespace clang; 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 290b57cec5SDimitry Andric ScanfSpecifierResult; 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric static bool ParseScanList(FormatStringHandler &H, 320b57cec5SDimitry Andric ScanfConversionSpecifier &CS, 330b57cec5SDimitry Andric const char *&Beg, const char *E) { 340b57cec5SDimitry Andric const char *I = Beg; 350b57cec5SDimitry Andric const char *start = I - 1; 360b57cec5SDimitry Andric UpdateOnReturn <const char*> UpdateBeg(Beg, I); 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric // No more characters? 390b57cec5SDimitry Andric if (I == E) { 400b57cec5SDimitry Andric H.HandleIncompleteScanList(start, I); 410b57cec5SDimitry Andric return true; 420b57cec5SDimitry Andric } 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric // Special case: ']' is the first character. 450b57cec5SDimitry Andric if (*I == ']') { 460b57cec5SDimitry Andric if (++I == E) { 470b57cec5SDimitry Andric H.HandleIncompleteScanList(start, I - 1); 480b57cec5SDimitry Andric return true; 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric } 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric // Special case: "^]" are the first characters. 530b57cec5SDimitry Andric if (I + 1 != E && I[0] == '^' && I[1] == ']') { 540b57cec5SDimitry Andric I += 2; 550b57cec5SDimitry Andric if (I == E) { 560b57cec5SDimitry Andric H.HandleIncompleteScanList(start, I - 1); 570b57cec5SDimitry Andric return true; 580b57cec5SDimitry Andric } 590b57cec5SDimitry Andric } 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric // Look for a ']' character which denotes the end of the scan list. 620b57cec5SDimitry Andric while (*I != ']') { 630b57cec5SDimitry Andric if (++I == E) { 640b57cec5SDimitry Andric H.HandleIncompleteScanList(start, I - 1); 650b57cec5SDimitry Andric return true; 660b57cec5SDimitry Andric } 670b57cec5SDimitry Andric } 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric CS.setEndScanList(I); 700b57cec5SDimitry Andric return false; 710b57cec5SDimitry Andric } 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric // FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 740b57cec5SDimitry Andric // We can possibly refactor. 750b57cec5SDimitry Andric static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 760b57cec5SDimitry Andric const char *&Beg, 770b57cec5SDimitry Andric const char *E, 780b57cec5SDimitry Andric unsigned &argIndex, 790b57cec5SDimitry Andric const LangOptions &LO, 800b57cec5SDimitry Andric const TargetInfo &Target) { 810b57cec5SDimitry Andric using namespace clang::analyze_format_string; 820b57cec5SDimitry Andric using namespace clang::analyze_scanf; 830b57cec5SDimitry Andric const char *I = Beg; 840b57cec5SDimitry Andric const char *Start = nullptr; 850b57cec5SDimitry Andric UpdateOnReturn <const char*> UpdateBeg(Beg, I); 860b57cec5SDimitry Andric 870b57cec5SDimitry Andric // Look for a '%' character that indicates the start of a format specifier. 880b57cec5SDimitry Andric for ( ; I != E ; ++I) { 890b57cec5SDimitry Andric char c = *I; 900b57cec5SDimitry Andric if (c == '\0') { 910b57cec5SDimitry Andric // Detect spurious null characters, which are likely errors. 920b57cec5SDimitry Andric H.HandleNullChar(I); 930b57cec5SDimitry Andric return true; 940b57cec5SDimitry Andric } 950b57cec5SDimitry Andric if (c == '%') { 960b57cec5SDimitry Andric Start = I++; // Record the start of the format specifier. 970b57cec5SDimitry Andric break; 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric } 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric // No format specifier found? 1020b57cec5SDimitry Andric if (!Start) 1030b57cec5SDimitry Andric return false; 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric if (I == E) { 1060b57cec5SDimitry Andric // No more characters left? 1070b57cec5SDimitry Andric H.HandleIncompleteSpecifier(Start, E - Start); 1080b57cec5SDimitry Andric return true; 1090b57cec5SDimitry Andric } 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric ScanfSpecifier FS; 1120b57cec5SDimitry Andric if (ParseArgPosition(H, FS, Start, I, E)) 1130b57cec5SDimitry Andric return true; 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric if (I == E) { 1160b57cec5SDimitry Andric // No more characters left? 1170b57cec5SDimitry Andric H.HandleIncompleteSpecifier(Start, E - Start); 1180b57cec5SDimitry Andric return true; 1190b57cec5SDimitry Andric } 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric // Look for '*' flag if it is present. 1220b57cec5SDimitry Andric if (*I == '*') { 1230b57cec5SDimitry Andric FS.setSuppressAssignment(I); 1240b57cec5SDimitry Andric if (++I == E) { 1250b57cec5SDimitry Andric H.HandleIncompleteSpecifier(Start, E - Start); 1260b57cec5SDimitry Andric return true; 1270b57cec5SDimitry Andric } 1280b57cec5SDimitry Andric } 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric // Look for the field width (if any). Unlike printf, this is either 1310b57cec5SDimitry Andric // a fixed integer or isn't present. 1320b57cec5SDimitry Andric const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 1330b57cec5SDimitry Andric if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 1340b57cec5SDimitry Andric assert(Amt.getHowSpecified() == OptionalAmount::Constant); 1350b57cec5SDimitry Andric FS.setFieldWidth(Amt); 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric if (I == E) { 1380b57cec5SDimitry Andric // No more characters left? 1390b57cec5SDimitry Andric H.HandleIncompleteSpecifier(Start, E - Start); 1400b57cec5SDimitry Andric return true; 1410b57cec5SDimitry Andric } 1420b57cec5SDimitry Andric } 1430b57cec5SDimitry Andric 1440b57cec5SDimitry Andric // Look for the length modifier. 1450b57cec5SDimitry Andric if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) { 1460b57cec5SDimitry Andric // No more characters left? 1470b57cec5SDimitry Andric H.HandleIncompleteSpecifier(Start, E - Start); 1480b57cec5SDimitry Andric return true; 1490b57cec5SDimitry Andric } 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric // Detect spurious null characters, which are likely errors. 1520b57cec5SDimitry Andric if (*I == '\0') { 1530b57cec5SDimitry Andric H.HandleNullChar(I); 1540b57cec5SDimitry Andric return true; 1550b57cec5SDimitry Andric } 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric // Finally, look for the conversion specifier. 1580b57cec5SDimitry Andric const char *conversionPosition = I++; 1590b57cec5SDimitry Andric ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; 1600b57cec5SDimitry Andric switch (*conversionPosition) { 1610b57cec5SDimitry Andric default: 1620b57cec5SDimitry Andric break; 1630b57cec5SDimitry Andric case '%': k = ConversionSpecifier::PercentArg; break; 164bdd1243dSDimitry Andric case 'b': k = ConversionSpecifier::bArg; break; 1650b57cec5SDimitry Andric case 'A': k = ConversionSpecifier::AArg; break; 1660b57cec5SDimitry Andric case 'E': k = ConversionSpecifier::EArg; break; 1670b57cec5SDimitry Andric case 'F': k = ConversionSpecifier::FArg; break; 1680b57cec5SDimitry Andric case 'G': k = ConversionSpecifier::GArg; break; 1690b57cec5SDimitry Andric case 'X': k = ConversionSpecifier::XArg; break; 1700b57cec5SDimitry Andric case 'a': k = ConversionSpecifier::aArg; break; 1710b57cec5SDimitry Andric case 'd': k = ConversionSpecifier::dArg; break; 1720b57cec5SDimitry Andric case 'e': k = ConversionSpecifier::eArg; break; 1730b57cec5SDimitry Andric case 'f': k = ConversionSpecifier::fArg; break; 1740b57cec5SDimitry Andric case 'g': k = ConversionSpecifier::gArg; break; 1750b57cec5SDimitry Andric case 'i': k = ConversionSpecifier::iArg; break; 1760b57cec5SDimitry Andric case 'n': k = ConversionSpecifier::nArg; break; 1770b57cec5SDimitry Andric case 'c': k = ConversionSpecifier::cArg; break; 1780b57cec5SDimitry Andric case 'C': k = ConversionSpecifier::CArg; break; 1790b57cec5SDimitry Andric case 'S': k = ConversionSpecifier::SArg; break; 1800b57cec5SDimitry Andric case '[': k = ConversionSpecifier::ScanListArg; break; 1810b57cec5SDimitry Andric case 'u': k = ConversionSpecifier::uArg; break; 1820b57cec5SDimitry Andric case 'x': k = ConversionSpecifier::xArg; break; 1830b57cec5SDimitry Andric case 'o': k = ConversionSpecifier::oArg; break; 1840b57cec5SDimitry Andric case 's': k = ConversionSpecifier::sArg; break; 1850b57cec5SDimitry Andric case 'p': k = ConversionSpecifier::pArg; break; 1860b57cec5SDimitry Andric // Apple extensions 1870b57cec5SDimitry Andric // Apple-specific 1880b57cec5SDimitry Andric case 'D': 1890b57cec5SDimitry Andric if (Target.getTriple().isOSDarwin()) 1900b57cec5SDimitry Andric k = ConversionSpecifier::DArg; 1910b57cec5SDimitry Andric break; 1920b57cec5SDimitry Andric case 'O': 1930b57cec5SDimitry Andric if (Target.getTriple().isOSDarwin()) 1940b57cec5SDimitry Andric k = ConversionSpecifier::OArg; 1950b57cec5SDimitry Andric break; 1960b57cec5SDimitry Andric case 'U': 1970b57cec5SDimitry Andric if (Target.getTriple().isOSDarwin()) 1980b57cec5SDimitry Andric k = ConversionSpecifier::UArg; 1990b57cec5SDimitry Andric break; 2000b57cec5SDimitry Andric } 2010b57cec5SDimitry Andric ScanfConversionSpecifier CS(conversionPosition, k); 2020b57cec5SDimitry Andric if (k == ScanfConversionSpecifier::ScanListArg) { 2030b57cec5SDimitry Andric if (ParseScanList(H, CS, I, E)) 2040b57cec5SDimitry Andric return true; 2050b57cec5SDimitry Andric } 2060b57cec5SDimitry Andric FS.setConversionSpecifier(CS); 2070b57cec5SDimitry Andric if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 2080b57cec5SDimitry Andric && !FS.usesPositionalArg()) 2090b57cec5SDimitry Andric FS.setArgIndex(argIndex++); 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric // FIXME: '%' and '*' doesn't make sense. Issue a warning. 2120b57cec5SDimitry Andric // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 2130b57cec5SDimitry Andric 2140b57cec5SDimitry Andric if (k == ScanfConversionSpecifier::InvalidSpecifier) { 2150b57cec5SDimitry Andric unsigned Len = I - Beg; 2160b57cec5SDimitry Andric if (ParseUTF8InvalidSpecifier(Beg, E, Len)) { 2170b57cec5SDimitry Andric CS.setEndScanList(Beg + Len); 2180b57cec5SDimitry Andric FS.setConversionSpecifier(CS); 2190b57cec5SDimitry Andric } 2200b57cec5SDimitry Andric // Assume the conversion takes one argument. 2210b57cec5SDimitry Andric return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len); 2220b57cec5SDimitry Andric } 2230b57cec5SDimitry Andric return ScanfSpecifierResult(Start, FS); 2240b57cec5SDimitry Andric } 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const { 2270b57cec5SDimitry Andric const ScanfConversionSpecifier &CS = getConversionSpecifier(); 2280b57cec5SDimitry Andric 2290b57cec5SDimitry Andric if (!CS.consumesDataArgument()) 2300b57cec5SDimitry Andric return ArgType::Invalid(); 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric switch(CS.getKind()) { 2330b57cec5SDimitry Andric // Signed int. 2340b57cec5SDimitry Andric case ConversionSpecifier::dArg: 2350b57cec5SDimitry Andric case ConversionSpecifier::DArg: 2360b57cec5SDimitry Andric case ConversionSpecifier::iArg: 2370b57cec5SDimitry Andric switch (LM.getKind()) { 2380b57cec5SDimitry Andric case LengthModifier::None: 2390b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.IntTy); 2400b57cec5SDimitry Andric case LengthModifier::AsChar: 2410b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::AnyCharTy); 2420b57cec5SDimitry Andric case LengthModifier::AsShort: 2430b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.ShortTy); 2440b57cec5SDimitry Andric case LengthModifier::AsLong: 2450b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongTy); 2460b57cec5SDimitry Andric case LengthModifier::AsLongLong: 2470b57cec5SDimitry Andric case LengthModifier::AsQuad: 2480b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongLongTy); 2490b57cec5SDimitry Andric case LengthModifier::AsInt64: 2500b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); 2510b57cec5SDimitry Andric case LengthModifier::AsIntMax: 2520b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 2530b57cec5SDimitry Andric case LengthModifier::AsSizeT: 2540b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t")); 2550b57cec5SDimitry Andric case LengthModifier::AsPtrDiff: 2560b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 2570b57cec5SDimitry Andric case LengthModifier::AsLongDouble: 2580b57cec5SDimitry Andric // GNU extension. 2590b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongLongTy); 2600b57cec5SDimitry Andric case LengthModifier::AsAllocate: 2610b57cec5SDimitry Andric case LengthModifier::AsMAllocate: 2620b57cec5SDimitry Andric case LengthModifier::AsInt32: 2630b57cec5SDimitry Andric case LengthModifier::AsInt3264: 2640b57cec5SDimitry Andric case LengthModifier::AsWide: 2650b57cec5SDimitry Andric case LengthModifier::AsShortLong: 2660b57cec5SDimitry Andric return ArgType::Invalid(); 2670b57cec5SDimitry Andric } 2680b57cec5SDimitry Andric llvm_unreachable("Unsupported LengthModifier Type"); 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric // Unsigned int. 271bdd1243dSDimitry Andric case ConversionSpecifier::bArg: 2720b57cec5SDimitry Andric case ConversionSpecifier::oArg: 2730b57cec5SDimitry Andric case ConversionSpecifier::OArg: 2740b57cec5SDimitry Andric case ConversionSpecifier::uArg: 2750b57cec5SDimitry Andric case ConversionSpecifier::UArg: 2760b57cec5SDimitry Andric case ConversionSpecifier::xArg: 2770b57cec5SDimitry Andric case ConversionSpecifier::XArg: 2780b57cec5SDimitry Andric switch (LM.getKind()) { 2790b57cec5SDimitry Andric case LengthModifier::None: 2800b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedIntTy); 2810b57cec5SDimitry Andric case LengthModifier::AsChar: 2820b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedCharTy); 2830b57cec5SDimitry Andric case LengthModifier::AsShort: 2840b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedShortTy); 2850b57cec5SDimitry Andric case LengthModifier::AsLong: 2860b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedLongTy); 2870b57cec5SDimitry Andric case LengthModifier::AsLongLong: 2880b57cec5SDimitry Andric case LengthModifier::AsQuad: 2890b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 2900b57cec5SDimitry Andric case LengthModifier::AsInt64: 2910b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64")); 2920b57cec5SDimitry Andric case LengthModifier::AsIntMax: 2930b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t")); 2940b57cec5SDimitry Andric case LengthModifier::AsSizeT: 2950b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t")); 2960b57cec5SDimitry Andric case LengthModifier::AsPtrDiff: 2970b57cec5SDimitry Andric return ArgType::PtrTo( 2980b57cec5SDimitry Andric ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t")); 2990b57cec5SDimitry Andric case LengthModifier::AsLongDouble: 3000b57cec5SDimitry Andric // GNU extension. 3010b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 3020b57cec5SDimitry Andric case LengthModifier::AsAllocate: 3030b57cec5SDimitry Andric case LengthModifier::AsMAllocate: 3040b57cec5SDimitry Andric case LengthModifier::AsInt32: 3050b57cec5SDimitry Andric case LengthModifier::AsInt3264: 3060b57cec5SDimitry Andric case LengthModifier::AsWide: 3070b57cec5SDimitry Andric case LengthModifier::AsShortLong: 3080b57cec5SDimitry Andric return ArgType::Invalid(); 3090b57cec5SDimitry Andric } 3100b57cec5SDimitry Andric llvm_unreachable("Unsupported LengthModifier Type"); 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric // Float. 3130b57cec5SDimitry Andric case ConversionSpecifier::aArg: 3140b57cec5SDimitry Andric case ConversionSpecifier::AArg: 3150b57cec5SDimitry Andric case ConversionSpecifier::eArg: 3160b57cec5SDimitry Andric case ConversionSpecifier::EArg: 3170b57cec5SDimitry Andric case ConversionSpecifier::fArg: 3180b57cec5SDimitry Andric case ConversionSpecifier::FArg: 3190b57cec5SDimitry Andric case ConversionSpecifier::gArg: 3200b57cec5SDimitry Andric case ConversionSpecifier::GArg: 3210b57cec5SDimitry Andric switch (LM.getKind()) { 3220b57cec5SDimitry Andric case LengthModifier::None: 3230b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.FloatTy); 3240b57cec5SDimitry Andric case LengthModifier::AsLong: 3250b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.DoubleTy); 3260b57cec5SDimitry Andric case LengthModifier::AsLongDouble: 3270b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongDoubleTy); 3280b57cec5SDimitry Andric default: 3290b57cec5SDimitry Andric return ArgType::Invalid(); 3300b57cec5SDimitry Andric } 3310b57cec5SDimitry Andric 3320b57cec5SDimitry Andric // Char, string and scanlist. 3330b57cec5SDimitry Andric case ConversionSpecifier::cArg: 3340b57cec5SDimitry Andric case ConversionSpecifier::sArg: 3350b57cec5SDimitry Andric case ConversionSpecifier::ScanListArg: 3360b57cec5SDimitry Andric switch (LM.getKind()) { 3370b57cec5SDimitry Andric case LengthModifier::None: 3380b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::AnyCharTy); 3390b57cec5SDimitry Andric case LengthModifier::AsLong: 3400b57cec5SDimitry Andric case LengthModifier::AsWide: 3410b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 3420b57cec5SDimitry Andric case LengthModifier::AsAllocate: 3430b57cec5SDimitry Andric case LengthModifier::AsMAllocate: 3440b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::CStrTy); 3450b57cec5SDimitry Andric case LengthModifier::AsShort: 3460b57cec5SDimitry Andric if (Ctx.getTargetInfo().getTriple().isOSMSVCRT()) 3470b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::AnyCharTy); 348bdd1243dSDimitry Andric [[fallthrough]]; 3490b57cec5SDimitry Andric default: 3500b57cec5SDimitry Andric return ArgType::Invalid(); 3510b57cec5SDimitry Andric } 3520b57cec5SDimitry Andric case ConversionSpecifier::CArg: 3530b57cec5SDimitry Andric case ConversionSpecifier::SArg: 3540b57cec5SDimitry Andric // FIXME: Mac OS X specific? 3550b57cec5SDimitry Andric switch (LM.getKind()) { 3560b57cec5SDimitry Andric case LengthModifier::None: 3570b57cec5SDimitry Andric case LengthModifier::AsWide: 3580b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 3590b57cec5SDimitry Andric case LengthModifier::AsAllocate: 3600b57cec5SDimitry Andric case LengthModifier::AsMAllocate: 3610b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *")); 3620b57cec5SDimitry Andric case LengthModifier::AsShort: 3630b57cec5SDimitry Andric if (Ctx.getTargetInfo().getTriple().isOSMSVCRT()) 3640b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::AnyCharTy); 365bdd1243dSDimitry Andric [[fallthrough]]; 3660b57cec5SDimitry Andric default: 3670b57cec5SDimitry Andric return ArgType::Invalid(); 3680b57cec5SDimitry Andric } 3690b57cec5SDimitry Andric 3700b57cec5SDimitry Andric // Pointer. 3710b57cec5SDimitry Andric case ConversionSpecifier::pArg: 3720b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::CPointerTy); 3730b57cec5SDimitry Andric 3740b57cec5SDimitry Andric // Write-back. 3750b57cec5SDimitry Andric case ConversionSpecifier::nArg: 3760b57cec5SDimitry Andric switch (LM.getKind()) { 3770b57cec5SDimitry Andric case LengthModifier::None: 3780b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.IntTy); 3790b57cec5SDimitry Andric case LengthModifier::AsChar: 3800b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.SignedCharTy); 3810b57cec5SDimitry Andric case LengthModifier::AsShort: 3820b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.ShortTy); 3830b57cec5SDimitry Andric case LengthModifier::AsLong: 3840b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongTy); 3850b57cec5SDimitry Andric case LengthModifier::AsLongLong: 3860b57cec5SDimitry Andric case LengthModifier::AsQuad: 3870b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongLongTy); 3880b57cec5SDimitry Andric case LengthModifier::AsInt64: 3890b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); 3900b57cec5SDimitry Andric case LengthModifier::AsIntMax: 3910b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 3920b57cec5SDimitry Andric case LengthModifier::AsSizeT: 3930b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t")); 3940b57cec5SDimitry Andric case LengthModifier::AsPtrDiff: 3950b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 3960b57cec5SDimitry Andric case LengthModifier::AsLongDouble: 3970b57cec5SDimitry Andric return ArgType(); // FIXME: Is this a known extension? 3980b57cec5SDimitry Andric case LengthModifier::AsAllocate: 3990b57cec5SDimitry Andric case LengthModifier::AsMAllocate: 4000b57cec5SDimitry Andric case LengthModifier::AsInt32: 4010b57cec5SDimitry Andric case LengthModifier::AsInt3264: 4020b57cec5SDimitry Andric case LengthModifier::AsWide: 4030b57cec5SDimitry Andric case LengthModifier::AsShortLong: 4040b57cec5SDimitry Andric return ArgType::Invalid(); 4050b57cec5SDimitry Andric } 4060b57cec5SDimitry Andric 4070b57cec5SDimitry Andric default: 4080b57cec5SDimitry Andric break; 4090b57cec5SDimitry Andric } 4100b57cec5SDimitry Andric 4110b57cec5SDimitry Andric return ArgType(); 4120b57cec5SDimitry Andric } 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric bool ScanfSpecifier::fixType(QualType QT, QualType RawQT, 4150b57cec5SDimitry Andric const LangOptions &LangOpt, 4160b57cec5SDimitry Andric ASTContext &Ctx) { 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric // %n is different from other conversion specifiers; don't try to fix it. 4190b57cec5SDimitry Andric if (CS.getKind() == ConversionSpecifier::nArg) 4200b57cec5SDimitry Andric return false; 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric if (!QT->isPointerType()) 4230b57cec5SDimitry Andric return false; 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric QualType PT = QT->getPointeeType(); 4260b57cec5SDimitry Andric 4270b57cec5SDimitry Andric // If it's an enum, get its underlying type. 4280b57cec5SDimitry Andric if (const EnumType *ETy = PT->getAs<EnumType>()) { 4290b57cec5SDimitry Andric // Don't try to fix incomplete enums. 4300b57cec5SDimitry Andric if (!ETy->getDecl()->isComplete()) 4310b57cec5SDimitry Andric return false; 4320b57cec5SDimitry Andric PT = ETy->getDecl()->getIntegerType(); 4330b57cec5SDimitry Andric } 4340b57cec5SDimitry Andric 4350b57cec5SDimitry Andric const BuiltinType *BT = PT->getAs<BuiltinType>(); 4360b57cec5SDimitry Andric if (!BT) 4370b57cec5SDimitry Andric return false; 4380b57cec5SDimitry Andric 4390b57cec5SDimitry Andric // Pointer to a character. 4400b57cec5SDimitry Andric if (PT->isAnyCharacterType()) { 4410b57cec5SDimitry Andric CS.setKind(ConversionSpecifier::sArg); 4420b57cec5SDimitry Andric if (PT->isWideCharType()) 4430b57cec5SDimitry Andric LM.setKind(LengthModifier::AsWideChar); 4440b57cec5SDimitry Andric else 4450b57cec5SDimitry Andric LM.setKind(LengthModifier::None); 4460b57cec5SDimitry Andric 4470b57cec5SDimitry Andric // If we know the target array length, we can use it as a field width. 4480b57cec5SDimitry Andric if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) { 4495f757f3fSDimitry Andric if (CAT->getSizeModifier() == ArraySizeModifier::Normal) 4500b57cec5SDimitry Andric FieldWidth = OptionalAmount(OptionalAmount::Constant, 451*0fca6ea1SDimitry Andric CAT->getZExtSize() - 1, "", 0, false); 4520b57cec5SDimitry Andric } 4530b57cec5SDimitry Andric return true; 4540b57cec5SDimitry Andric } 4550b57cec5SDimitry Andric 4560b57cec5SDimitry Andric // Figure out the length modifier. 4570b57cec5SDimitry Andric switch (BT->getKind()) { 4580b57cec5SDimitry Andric // no modifier 4590b57cec5SDimitry Andric case BuiltinType::UInt: 4600b57cec5SDimitry Andric case BuiltinType::Int: 4610b57cec5SDimitry Andric case BuiltinType::Float: 4620b57cec5SDimitry Andric LM.setKind(LengthModifier::None); 4630b57cec5SDimitry Andric break; 4640b57cec5SDimitry Andric 4650b57cec5SDimitry Andric // hh 4660b57cec5SDimitry Andric case BuiltinType::Char_U: 4670b57cec5SDimitry Andric case BuiltinType::UChar: 4680b57cec5SDimitry Andric case BuiltinType::Char_S: 4690b57cec5SDimitry Andric case BuiltinType::SChar: 4700b57cec5SDimitry Andric LM.setKind(LengthModifier::AsChar); 4710b57cec5SDimitry Andric break; 4720b57cec5SDimitry Andric 4730b57cec5SDimitry Andric // h 4740b57cec5SDimitry Andric case BuiltinType::Short: 4750b57cec5SDimitry Andric case BuiltinType::UShort: 4760b57cec5SDimitry Andric LM.setKind(LengthModifier::AsShort); 4770b57cec5SDimitry Andric break; 4780b57cec5SDimitry Andric 4790b57cec5SDimitry Andric // l 4800b57cec5SDimitry Andric case BuiltinType::Long: 4810b57cec5SDimitry Andric case BuiltinType::ULong: 4820b57cec5SDimitry Andric case BuiltinType::Double: 4830b57cec5SDimitry Andric LM.setKind(LengthModifier::AsLong); 4840b57cec5SDimitry Andric break; 4850b57cec5SDimitry Andric 4860b57cec5SDimitry Andric // ll 4870b57cec5SDimitry Andric case BuiltinType::LongLong: 4880b57cec5SDimitry Andric case BuiltinType::ULongLong: 4890b57cec5SDimitry Andric LM.setKind(LengthModifier::AsLongLong); 4900b57cec5SDimitry Andric break; 4910b57cec5SDimitry Andric 4920b57cec5SDimitry Andric // L 4930b57cec5SDimitry Andric case BuiltinType::LongDouble: 4940b57cec5SDimitry Andric LM.setKind(LengthModifier::AsLongDouble); 4950b57cec5SDimitry Andric break; 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric // Don't know. 4980b57cec5SDimitry Andric default: 4990b57cec5SDimitry Andric return false; 5000b57cec5SDimitry Andric } 5010b57cec5SDimitry Andric 5020b57cec5SDimitry Andric // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 503bdd1243dSDimitry Andric if (LangOpt.C99 || LangOpt.CPlusPlus11) 5040b57cec5SDimitry Andric namedTypeToLengthModifier(PT, LM); 5050b57cec5SDimitry Andric 5060b57cec5SDimitry Andric // If fixing the length modifier was enough, we are done. 5070b57cec5SDimitry Andric if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) { 5080b57cec5SDimitry Andric const analyze_scanf::ArgType &AT = getArgType(Ctx); 5090b57cec5SDimitry Andric if (AT.isValid() && AT.matchesType(Ctx, QT)) 5100b57cec5SDimitry Andric return true; 5110b57cec5SDimitry Andric } 5120b57cec5SDimitry Andric 5130b57cec5SDimitry Andric // Figure out the conversion specifier. 5140b57cec5SDimitry Andric if (PT->isRealFloatingType()) 5150b57cec5SDimitry Andric CS.setKind(ConversionSpecifier::fArg); 5160b57cec5SDimitry Andric else if (PT->isSignedIntegerType()) 5170b57cec5SDimitry Andric CS.setKind(ConversionSpecifier::dArg); 5180b57cec5SDimitry Andric else if (PT->isUnsignedIntegerType()) 5190b57cec5SDimitry Andric CS.setKind(ConversionSpecifier::uArg); 5200b57cec5SDimitry Andric else 5210b57cec5SDimitry Andric llvm_unreachable("Unexpected type"); 5220b57cec5SDimitry Andric 5230b57cec5SDimitry Andric return true; 5240b57cec5SDimitry Andric } 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric void ScanfSpecifier::toString(raw_ostream &os) const { 5270b57cec5SDimitry Andric os << "%"; 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric if (usesPositionalArg()) 5300b57cec5SDimitry Andric os << getPositionalArgIndex() << "$"; 5310b57cec5SDimitry Andric if (SuppressAssignment) 5320b57cec5SDimitry Andric os << "*"; 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric FieldWidth.toString(os); 5350b57cec5SDimitry Andric os << LM.toString(); 5360b57cec5SDimitry Andric os << CS.toString(); 5370b57cec5SDimitry Andric } 5380b57cec5SDimitry Andric 5390b57cec5SDimitry Andric bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 5400b57cec5SDimitry Andric const char *I, 5410b57cec5SDimitry Andric const char *E, 5420b57cec5SDimitry Andric const LangOptions &LO, 5430b57cec5SDimitry Andric const TargetInfo &Target) { 5440b57cec5SDimitry Andric 5450b57cec5SDimitry Andric unsigned argIndex = 0; 5460b57cec5SDimitry Andric 5470b57cec5SDimitry Andric // Keep looking for a format specifier until we have exhausted the string. 5480b57cec5SDimitry Andric while (I != E) { 5490b57cec5SDimitry Andric const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex, 5500b57cec5SDimitry Andric LO, Target); 5510b57cec5SDimitry Andric // Did a fail-stop error of any kind occur when parsing the specifier? 5520b57cec5SDimitry Andric // If so, don't do any more processing. 5530b57cec5SDimitry Andric if (FSR.shouldStop()) 5540b57cec5SDimitry Andric return true; 5550b57cec5SDimitry Andric // Did we exhaust the string or encounter an error that 5560b57cec5SDimitry Andric // we can recover from? 5570b57cec5SDimitry Andric if (!FSR.hasValue()) 5580b57cec5SDimitry Andric continue; 5590b57cec5SDimitry Andric // We have a format specifier. Pass it to the callback. 5600b57cec5SDimitry Andric if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 5610b57cec5SDimitry Andric I - FSR.getStart())) { 5620b57cec5SDimitry Andric return true; 5630b57cec5SDimitry Andric } 5640b57cec5SDimitry Andric } 5650b57cec5SDimitry Andric assert(I == E && "Format string not exhausted"); 5660b57cec5SDimitry Andric return false; 5670b57cec5SDimitry Andric } 568