1*0b57cec5SDimitry Andric //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // Handling of format string in scanf and friends. The structure of format 10*0b57cec5SDimitry Andric // strings for fscanf() are described in C99 7.19.6.2. 11*0b57cec5SDimitry Andric // 12*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 13*0b57cec5SDimitry Andric 14*0b57cec5SDimitry Andric #include "clang/AST/FormatString.h" 15*0b57cec5SDimitry Andric #include "FormatStringParsing.h" 16*0b57cec5SDimitry Andric #include "clang/Basic/TargetInfo.h" 17*0b57cec5SDimitry Andric 18*0b57cec5SDimitry Andric using clang::analyze_format_string::ArgType; 19*0b57cec5SDimitry Andric using clang::analyze_format_string::FormatStringHandler; 20*0b57cec5SDimitry Andric using clang::analyze_format_string::LengthModifier; 21*0b57cec5SDimitry Andric using clang::analyze_format_string::OptionalAmount; 22*0b57cec5SDimitry Andric using clang::analyze_format_string::ConversionSpecifier; 23*0b57cec5SDimitry Andric using clang::analyze_scanf::ScanfConversionSpecifier; 24*0b57cec5SDimitry Andric using clang::analyze_scanf::ScanfSpecifier; 25*0b57cec5SDimitry Andric using clang::UpdateOnReturn; 26*0b57cec5SDimitry Andric using namespace clang; 27*0b57cec5SDimitry Andric 28*0b57cec5SDimitry Andric typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 29*0b57cec5SDimitry Andric ScanfSpecifierResult; 30*0b57cec5SDimitry Andric 31*0b57cec5SDimitry Andric static bool ParseScanList(FormatStringHandler &H, 32*0b57cec5SDimitry Andric ScanfConversionSpecifier &CS, 33*0b57cec5SDimitry Andric const char *&Beg, const char *E) { 34*0b57cec5SDimitry Andric const char *I = Beg; 35*0b57cec5SDimitry Andric const char *start = I - 1; 36*0b57cec5SDimitry Andric UpdateOnReturn <const char*> UpdateBeg(Beg, I); 37*0b57cec5SDimitry Andric 38*0b57cec5SDimitry Andric // No more characters? 39*0b57cec5SDimitry Andric if (I == E) { 40*0b57cec5SDimitry Andric H.HandleIncompleteScanList(start, I); 41*0b57cec5SDimitry Andric return true; 42*0b57cec5SDimitry Andric } 43*0b57cec5SDimitry Andric 44*0b57cec5SDimitry Andric // Special case: ']' is the first character. 45*0b57cec5SDimitry Andric if (*I == ']') { 46*0b57cec5SDimitry Andric if (++I == E) { 47*0b57cec5SDimitry Andric H.HandleIncompleteScanList(start, I - 1); 48*0b57cec5SDimitry Andric return true; 49*0b57cec5SDimitry Andric } 50*0b57cec5SDimitry Andric } 51*0b57cec5SDimitry Andric 52*0b57cec5SDimitry Andric // Special case: "^]" are the first characters. 53*0b57cec5SDimitry Andric if (I + 1 != E && I[0] == '^' && I[1] == ']') { 54*0b57cec5SDimitry Andric I += 2; 55*0b57cec5SDimitry Andric if (I == E) { 56*0b57cec5SDimitry Andric H.HandleIncompleteScanList(start, I - 1); 57*0b57cec5SDimitry Andric return true; 58*0b57cec5SDimitry Andric } 59*0b57cec5SDimitry Andric } 60*0b57cec5SDimitry Andric 61*0b57cec5SDimitry Andric // Look for a ']' character which denotes the end of the scan list. 62*0b57cec5SDimitry Andric while (*I != ']') { 63*0b57cec5SDimitry Andric if (++I == E) { 64*0b57cec5SDimitry Andric H.HandleIncompleteScanList(start, I - 1); 65*0b57cec5SDimitry Andric return true; 66*0b57cec5SDimitry Andric } 67*0b57cec5SDimitry Andric } 68*0b57cec5SDimitry Andric 69*0b57cec5SDimitry Andric CS.setEndScanList(I); 70*0b57cec5SDimitry Andric return false; 71*0b57cec5SDimitry Andric } 72*0b57cec5SDimitry Andric 73*0b57cec5SDimitry Andric // FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 74*0b57cec5SDimitry Andric // We can possibly refactor. 75*0b57cec5SDimitry Andric static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 76*0b57cec5SDimitry Andric const char *&Beg, 77*0b57cec5SDimitry Andric const char *E, 78*0b57cec5SDimitry Andric unsigned &argIndex, 79*0b57cec5SDimitry Andric const LangOptions &LO, 80*0b57cec5SDimitry Andric const TargetInfo &Target) { 81*0b57cec5SDimitry Andric using namespace clang::analyze_format_string; 82*0b57cec5SDimitry Andric using namespace clang::analyze_scanf; 83*0b57cec5SDimitry Andric const char *I = Beg; 84*0b57cec5SDimitry Andric const char *Start = nullptr; 85*0b57cec5SDimitry Andric UpdateOnReturn <const char*> UpdateBeg(Beg, I); 86*0b57cec5SDimitry Andric 87*0b57cec5SDimitry Andric // Look for a '%' character that indicates the start of a format specifier. 88*0b57cec5SDimitry Andric for ( ; I != E ; ++I) { 89*0b57cec5SDimitry Andric char c = *I; 90*0b57cec5SDimitry Andric if (c == '\0') { 91*0b57cec5SDimitry Andric // Detect spurious null characters, which are likely errors. 92*0b57cec5SDimitry Andric H.HandleNullChar(I); 93*0b57cec5SDimitry Andric return true; 94*0b57cec5SDimitry Andric } 95*0b57cec5SDimitry Andric if (c == '%') { 96*0b57cec5SDimitry Andric Start = I++; // Record the start of the format specifier. 97*0b57cec5SDimitry Andric break; 98*0b57cec5SDimitry Andric } 99*0b57cec5SDimitry Andric } 100*0b57cec5SDimitry Andric 101*0b57cec5SDimitry Andric // No format specifier found? 102*0b57cec5SDimitry Andric if (!Start) 103*0b57cec5SDimitry Andric return false; 104*0b57cec5SDimitry Andric 105*0b57cec5SDimitry Andric if (I == E) { 106*0b57cec5SDimitry Andric // No more characters left? 107*0b57cec5SDimitry Andric H.HandleIncompleteSpecifier(Start, E - Start); 108*0b57cec5SDimitry Andric return true; 109*0b57cec5SDimitry Andric } 110*0b57cec5SDimitry Andric 111*0b57cec5SDimitry Andric ScanfSpecifier FS; 112*0b57cec5SDimitry Andric if (ParseArgPosition(H, FS, Start, I, E)) 113*0b57cec5SDimitry Andric return true; 114*0b57cec5SDimitry Andric 115*0b57cec5SDimitry Andric if (I == E) { 116*0b57cec5SDimitry Andric // No more characters left? 117*0b57cec5SDimitry Andric H.HandleIncompleteSpecifier(Start, E - Start); 118*0b57cec5SDimitry Andric return true; 119*0b57cec5SDimitry Andric } 120*0b57cec5SDimitry Andric 121*0b57cec5SDimitry Andric // Look for '*' flag if it is present. 122*0b57cec5SDimitry Andric if (*I == '*') { 123*0b57cec5SDimitry Andric FS.setSuppressAssignment(I); 124*0b57cec5SDimitry Andric if (++I == E) { 125*0b57cec5SDimitry Andric H.HandleIncompleteSpecifier(Start, E - Start); 126*0b57cec5SDimitry Andric return true; 127*0b57cec5SDimitry Andric } 128*0b57cec5SDimitry Andric } 129*0b57cec5SDimitry Andric 130*0b57cec5SDimitry Andric // Look for the field width (if any). Unlike printf, this is either 131*0b57cec5SDimitry Andric // a fixed integer or isn't present. 132*0b57cec5SDimitry Andric const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 133*0b57cec5SDimitry Andric if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 134*0b57cec5SDimitry Andric assert(Amt.getHowSpecified() == OptionalAmount::Constant); 135*0b57cec5SDimitry Andric FS.setFieldWidth(Amt); 136*0b57cec5SDimitry Andric 137*0b57cec5SDimitry Andric if (I == E) { 138*0b57cec5SDimitry Andric // No more characters left? 139*0b57cec5SDimitry Andric H.HandleIncompleteSpecifier(Start, E - Start); 140*0b57cec5SDimitry Andric return true; 141*0b57cec5SDimitry Andric } 142*0b57cec5SDimitry Andric } 143*0b57cec5SDimitry Andric 144*0b57cec5SDimitry Andric // Look for the length modifier. 145*0b57cec5SDimitry Andric if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) { 146*0b57cec5SDimitry Andric // No more characters left? 147*0b57cec5SDimitry Andric H.HandleIncompleteSpecifier(Start, E - Start); 148*0b57cec5SDimitry Andric return true; 149*0b57cec5SDimitry Andric } 150*0b57cec5SDimitry Andric 151*0b57cec5SDimitry Andric // Detect spurious null characters, which are likely errors. 152*0b57cec5SDimitry Andric if (*I == '\0') { 153*0b57cec5SDimitry Andric H.HandleNullChar(I); 154*0b57cec5SDimitry Andric return true; 155*0b57cec5SDimitry Andric } 156*0b57cec5SDimitry Andric 157*0b57cec5SDimitry Andric // Finally, look for the conversion specifier. 158*0b57cec5SDimitry Andric const char *conversionPosition = I++; 159*0b57cec5SDimitry Andric ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; 160*0b57cec5SDimitry Andric switch (*conversionPosition) { 161*0b57cec5SDimitry Andric default: 162*0b57cec5SDimitry Andric break; 163*0b57cec5SDimitry Andric case '%': k = ConversionSpecifier::PercentArg; break; 164*0b57cec5SDimitry Andric case 'A': k = ConversionSpecifier::AArg; break; 165*0b57cec5SDimitry Andric case 'E': k = ConversionSpecifier::EArg; break; 166*0b57cec5SDimitry Andric case 'F': k = ConversionSpecifier::FArg; break; 167*0b57cec5SDimitry Andric case 'G': k = ConversionSpecifier::GArg; break; 168*0b57cec5SDimitry Andric case 'X': k = ConversionSpecifier::XArg; break; 169*0b57cec5SDimitry Andric case 'a': k = ConversionSpecifier::aArg; break; 170*0b57cec5SDimitry Andric case 'd': k = ConversionSpecifier::dArg; break; 171*0b57cec5SDimitry Andric case 'e': k = ConversionSpecifier::eArg; break; 172*0b57cec5SDimitry Andric case 'f': k = ConversionSpecifier::fArg; break; 173*0b57cec5SDimitry Andric case 'g': k = ConversionSpecifier::gArg; break; 174*0b57cec5SDimitry Andric case 'i': k = ConversionSpecifier::iArg; break; 175*0b57cec5SDimitry Andric case 'n': k = ConversionSpecifier::nArg; break; 176*0b57cec5SDimitry Andric case 'c': k = ConversionSpecifier::cArg; break; 177*0b57cec5SDimitry Andric case 'C': k = ConversionSpecifier::CArg; break; 178*0b57cec5SDimitry Andric case 'S': k = ConversionSpecifier::SArg; break; 179*0b57cec5SDimitry Andric case '[': k = ConversionSpecifier::ScanListArg; break; 180*0b57cec5SDimitry Andric case 'u': k = ConversionSpecifier::uArg; break; 181*0b57cec5SDimitry Andric case 'x': k = ConversionSpecifier::xArg; break; 182*0b57cec5SDimitry Andric case 'o': k = ConversionSpecifier::oArg; break; 183*0b57cec5SDimitry Andric case 's': k = ConversionSpecifier::sArg; break; 184*0b57cec5SDimitry Andric case 'p': k = ConversionSpecifier::pArg; break; 185*0b57cec5SDimitry Andric // Apple extensions 186*0b57cec5SDimitry Andric // Apple-specific 187*0b57cec5SDimitry Andric case 'D': 188*0b57cec5SDimitry Andric if (Target.getTriple().isOSDarwin()) 189*0b57cec5SDimitry Andric k = ConversionSpecifier::DArg; 190*0b57cec5SDimitry Andric break; 191*0b57cec5SDimitry Andric case 'O': 192*0b57cec5SDimitry Andric if (Target.getTriple().isOSDarwin()) 193*0b57cec5SDimitry Andric k = ConversionSpecifier::OArg; 194*0b57cec5SDimitry Andric break; 195*0b57cec5SDimitry Andric case 'U': 196*0b57cec5SDimitry Andric if (Target.getTriple().isOSDarwin()) 197*0b57cec5SDimitry Andric k = ConversionSpecifier::UArg; 198*0b57cec5SDimitry Andric break; 199*0b57cec5SDimitry Andric } 200*0b57cec5SDimitry Andric ScanfConversionSpecifier CS(conversionPosition, k); 201*0b57cec5SDimitry Andric if (k == ScanfConversionSpecifier::ScanListArg) { 202*0b57cec5SDimitry Andric if (ParseScanList(H, CS, I, E)) 203*0b57cec5SDimitry Andric return true; 204*0b57cec5SDimitry Andric } 205*0b57cec5SDimitry Andric FS.setConversionSpecifier(CS); 206*0b57cec5SDimitry Andric if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 207*0b57cec5SDimitry Andric && !FS.usesPositionalArg()) 208*0b57cec5SDimitry Andric FS.setArgIndex(argIndex++); 209*0b57cec5SDimitry Andric 210*0b57cec5SDimitry Andric // FIXME: '%' and '*' doesn't make sense. Issue a warning. 211*0b57cec5SDimitry Andric // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 212*0b57cec5SDimitry Andric 213*0b57cec5SDimitry Andric if (k == ScanfConversionSpecifier::InvalidSpecifier) { 214*0b57cec5SDimitry Andric unsigned Len = I - Beg; 215*0b57cec5SDimitry Andric if (ParseUTF8InvalidSpecifier(Beg, E, Len)) { 216*0b57cec5SDimitry Andric CS.setEndScanList(Beg + Len); 217*0b57cec5SDimitry Andric FS.setConversionSpecifier(CS); 218*0b57cec5SDimitry Andric } 219*0b57cec5SDimitry Andric // Assume the conversion takes one argument. 220*0b57cec5SDimitry Andric return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len); 221*0b57cec5SDimitry Andric } 222*0b57cec5SDimitry Andric return ScanfSpecifierResult(Start, FS); 223*0b57cec5SDimitry Andric } 224*0b57cec5SDimitry Andric 225*0b57cec5SDimitry Andric ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const { 226*0b57cec5SDimitry Andric const ScanfConversionSpecifier &CS = getConversionSpecifier(); 227*0b57cec5SDimitry Andric 228*0b57cec5SDimitry Andric if (!CS.consumesDataArgument()) 229*0b57cec5SDimitry Andric return ArgType::Invalid(); 230*0b57cec5SDimitry Andric 231*0b57cec5SDimitry Andric switch(CS.getKind()) { 232*0b57cec5SDimitry Andric // Signed int. 233*0b57cec5SDimitry Andric case ConversionSpecifier::dArg: 234*0b57cec5SDimitry Andric case ConversionSpecifier::DArg: 235*0b57cec5SDimitry Andric case ConversionSpecifier::iArg: 236*0b57cec5SDimitry Andric switch (LM.getKind()) { 237*0b57cec5SDimitry Andric case LengthModifier::None: 238*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.IntTy); 239*0b57cec5SDimitry Andric case LengthModifier::AsChar: 240*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::AnyCharTy); 241*0b57cec5SDimitry Andric case LengthModifier::AsShort: 242*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.ShortTy); 243*0b57cec5SDimitry Andric case LengthModifier::AsLong: 244*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongTy); 245*0b57cec5SDimitry Andric case LengthModifier::AsLongLong: 246*0b57cec5SDimitry Andric case LengthModifier::AsQuad: 247*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongLongTy); 248*0b57cec5SDimitry Andric case LengthModifier::AsInt64: 249*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); 250*0b57cec5SDimitry Andric case LengthModifier::AsIntMax: 251*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 252*0b57cec5SDimitry Andric case LengthModifier::AsSizeT: 253*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t")); 254*0b57cec5SDimitry Andric case LengthModifier::AsPtrDiff: 255*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 256*0b57cec5SDimitry Andric case LengthModifier::AsLongDouble: 257*0b57cec5SDimitry Andric // GNU extension. 258*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongLongTy); 259*0b57cec5SDimitry Andric case LengthModifier::AsAllocate: 260*0b57cec5SDimitry Andric case LengthModifier::AsMAllocate: 261*0b57cec5SDimitry Andric case LengthModifier::AsInt32: 262*0b57cec5SDimitry Andric case LengthModifier::AsInt3264: 263*0b57cec5SDimitry Andric case LengthModifier::AsWide: 264*0b57cec5SDimitry Andric case LengthModifier::AsShortLong: 265*0b57cec5SDimitry Andric return ArgType::Invalid(); 266*0b57cec5SDimitry Andric } 267*0b57cec5SDimitry Andric llvm_unreachable("Unsupported LengthModifier Type"); 268*0b57cec5SDimitry Andric 269*0b57cec5SDimitry Andric // Unsigned int. 270*0b57cec5SDimitry Andric case ConversionSpecifier::oArg: 271*0b57cec5SDimitry Andric case ConversionSpecifier::OArg: 272*0b57cec5SDimitry Andric case ConversionSpecifier::uArg: 273*0b57cec5SDimitry Andric case ConversionSpecifier::UArg: 274*0b57cec5SDimitry Andric case ConversionSpecifier::xArg: 275*0b57cec5SDimitry Andric case ConversionSpecifier::XArg: 276*0b57cec5SDimitry Andric switch (LM.getKind()) { 277*0b57cec5SDimitry Andric case LengthModifier::None: 278*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedIntTy); 279*0b57cec5SDimitry Andric case LengthModifier::AsChar: 280*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedCharTy); 281*0b57cec5SDimitry Andric case LengthModifier::AsShort: 282*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedShortTy); 283*0b57cec5SDimitry Andric case LengthModifier::AsLong: 284*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedLongTy); 285*0b57cec5SDimitry Andric case LengthModifier::AsLongLong: 286*0b57cec5SDimitry Andric case LengthModifier::AsQuad: 287*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 288*0b57cec5SDimitry Andric case LengthModifier::AsInt64: 289*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64")); 290*0b57cec5SDimitry Andric case LengthModifier::AsIntMax: 291*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t")); 292*0b57cec5SDimitry Andric case LengthModifier::AsSizeT: 293*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t")); 294*0b57cec5SDimitry Andric case LengthModifier::AsPtrDiff: 295*0b57cec5SDimitry Andric return ArgType::PtrTo( 296*0b57cec5SDimitry Andric ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t")); 297*0b57cec5SDimitry Andric case LengthModifier::AsLongDouble: 298*0b57cec5SDimitry Andric // GNU extension. 299*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 300*0b57cec5SDimitry Andric case LengthModifier::AsAllocate: 301*0b57cec5SDimitry Andric case LengthModifier::AsMAllocate: 302*0b57cec5SDimitry Andric case LengthModifier::AsInt32: 303*0b57cec5SDimitry Andric case LengthModifier::AsInt3264: 304*0b57cec5SDimitry Andric case LengthModifier::AsWide: 305*0b57cec5SDimitry Andric case LengthModifier::AsShortLong: 306*0b57cec5SDimitry Andric return ArgType::Invalid(); 307*0b57cec5SDimitry Andric } 308*0b57cec5SDimitry Andric llvm_unreachable("Unsupported LengthModifier Type"); 309*0b57cec5SDimitry Andric 310*0b57cec5SDimitry Andric // Float. 311*0b57cec5SDimitry Andric case ConversionSpecifier::aArg: 312*0b57cec5SDimitry Andric case ConversionSpecifier::AArg: 313*0b57cec5SDimitry Andric case ConversionSpecifier::eArg: 314*0b57cec5SDimitry Andric case ConversionSpecifier::EArg: 315*0b57cec5SDimitry Andric case ConversionSpecifier::fArg: 316*0b57cec5SDimitry Andric case ConversionSpecifier::FArg: 317*0b57cec5SDimitry Andric case ConversionSpecifier::gArg: 318*0b57cec5SDimitry Andric case ConversionSpecifier::GArg: 319*0b57cec5SDimitry Andric switch (LM.getKind()) { 320*0b57cec5SDimitry Andric case LengthModifier::None: 321*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.FloatTy); 322*0b57cec5SDimitry Andric case LengthModifier::AsLong: 323*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.DoubleTy); 324*0b57cec5SDimitry Andric case LengthModifier::AsLongDouble: 325*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongDoubleTy); 326*0b57cec5SDimitry Andric default: 327*0b57cec5SDimitry Andric return ArgType::Invalid(); 328*0b57cec5SDimitry Andric } 329*0b57cec5SDimitry Andric 330*0b57cec5SDimitry Andric // Char, string and scanlist. 331*0b57cec5SDimitry Andric case ConversionSpecifier::cArg: 332*0b57cec5SDimitry Andric case ConversionSpecifier::sArg: 333*0b57cec5SDimitry Andric case ConversionSpecifier::ScanListArg: 334*0b57cec5SDimitry Andric switch (LM.getKind()) { 335*0b57cec5SDimitry Andric case LengthModifier::None: 336*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::AnyCharTy); 337*0b57cec5SDimitry Andric case LengthModifier::AsLong: 338*0b57cec5SDimitry Andric case LengthModifier::AsWide: 339*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 340*0b57cec5SDimitry Andric case LengthModifier::AsAllocate: 341*0b57cec5SDimitry Andric case LengthModifier::AsMAllocate: 342*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::CStrTy); 343*0b57cec5SDimitry Andric case LengthModifier::AsShort: 344*0b57cec5SDimitry Andric if (Ctx.getTargetInfo().getTriple().isOSMSVCRT()) 345*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::AnyCharTy); 346*0b57cec5SDimitry Andric LLVM_FALLTHROUGH; 347*0b57cec5SDimitry Andric default: 348*0b57cec5SDimitry Andric return ArgType::Invalid(); 349*0b57cec5SDimitry Andric } 350*0b57cec5SDimitry Andric case ConversionSpecifier::CArg: 351*0b57cec5SDimitry Andric case ConversionSpecifier::SArg: 352*0b57cec5SDimitry Andric // FIXME: Mac OS X specific? 353*0b57cec5SDimitry Andric switch (LM.getKind()) { 354*0b57cec5SDimitry Andric case LengthModifier::None: 355*0b57cec5SDimitry Andric case LengthModifier::AsWide: 356*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 357*0b57cec5SDimitry Andric case LengthModifier::AsAllocate: 358*0b57cec5SDimitry Andric case LengthModifier::AsMAllocate: 359*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *")); 360*0b57cec5SDimitry Andric case LengthModifier::AsShort: 361*0b57cec5SDimitry Andric if (Ctx.getTargetInfo().getTriple().isOSMSVCRT()) 362*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::AnyCharTy); 363*0b57cec5SDimitry Andric LLVM_FALLTHROUGH; 364*0b57cec5SDimitry Andric default: 365*0b57cec5SDimitry Andric return ArgType::Invalid(); 366*0b57cec5SDimitry Andric } 367*0b57cec5SDimitry Andric 368*0b57cec5SDimitry Andric // Pointer. 369*0b57cec5SDimitry Andric case ConversionSpecifier::pArg: 370*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType::CPointerTy); 371*0b57cec5SDimitry Andric 372*0b57cec5SDimitry Andric // Write-back. 373*0b57cec5SDimitry Andric case ConversionSpecifier::nArg: 374*0b57cec5SDimitry Andric switch (LM.getKind()) { 375*0b57cec5SDimitry Andric case LengthModifier::None: 376*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.IntTy); 377*0b57cec5SDimitry Andric case LengthModifier::AsChar: 378*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.SignedCharTy); 379*0b57cec5SDimitry Andric case LengthModifier::AsShort: 380*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.ShortTy); 381*0b57cec5SDimitry Andric case LengthModifier::AsLong: 382*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongTy); 383*0b57cec5SDimitry Andric case LengthModifier::AsLongLong: 384*0b57cec5SDimitry Andric case LengthModifier::AsQuad: 385*0b57cec5SDimitry Andric return ArgType::PtrTo(Ctx.LongLongTy); 386*0b57cec5SDimitry Andric case LengthModifier::AsInt64: 387*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); 388*0b57cec5SDimitry Andric case LengthModifier::AsIntMax: 389*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 390*0b57cec5SDimitry Andric case LengthModifier::AsSizeT: 391*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t")); 392*0b57cec5SDimitry Andric case LengthModifier::AsPtrDiff: 393*0b57cec5SDimitry Andric return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 394*0b57cec5SDimitry Andric case LengthModifier::AsLongDouble: 395*0b57cec5SDimitry Andric return ArgType(); // FIXME: Is this a known extension? 396*0b57cec5SDimitry Andric case LengthModifier::AsAllocate: 397*0b57cec5SDimitry Andric case LengthModifier::AsMAllocate: 398*0b57cec5SDimitry Andric case LengthModifier::AsInt32: 399*0b57cec5SDimitry Andric case LengthModifier::AsInt3264: 400*0b57cec5SDimitry Andric case LengthModifier::AsWide: 401*0b57cec5SDimitry Andric case LengthModifier::AsShortLong: 402*0b57cec5SDimitry Andric return ArgType::Invalid(); 403*0b57cec5SDimitry Andric } 404*0b57cec5SDimitry Andric 405*0b57cec5SDimitry Andric default: 406*0b57cec5SDimitry Andric break; 407*0b57cec5SDimitry Andric } 408*0b57cec5SDimitry Andric 409*0b57cec5SDimitry Andric return ArgType(); 410*0b57cec5SDimitry Andric } 411*0b57cec5SDimitry Andric 412*0b57cec5SDimitry Andric bool ScanfSpecifier::fixType(QualType QT, QualType RawQT, 413*0b57cec5SDimitry Andric const LangOptions &LangOpt, 414*0b57cec5SDimitry Andric ASTContext &Ctx) { 415*0b57cec5SDimitry Andric 416*0b57cec5SDimitry Andric // %n is different from other conversion specifiers; don't try to fix it. 417*0b57cec5SDimitry Andric if (CS.getKind() == ConversionSpecifier::nArg) 418*0b57cec5SDimitry Andric return false; 419*0b57cec5SDimitry Andric 420*0b57cec5SDimitry Andric if (!QT->isPointerType()) 421*0b57cec5SDimitry Andric return false; 422*0b57cec5SDimitry Andric 423*0b57cec5SDimitry Andric QualType PT = QT->getPointeeType(); 424*0b57cec5SDimitry Andric 425*0b57cec5SDimitry Andric // If it's an enum, get its underlying type. 426*0b57cec5SDimitry Andric if (const EnumType *ETy = PT->getAs<EnumType>()) { 427*0b57cec5SDimitry Andric // Don't try to fix incomplete enums. 428*0b57cec5SDimitry Andric if (!ETy->getDecl()->isComplete()) 429*0b57cec5SDimitry Andric return false; 430*0b57cec5SDimitry Andric PT = ETy->getDecl()->getIntegerType(); 431*0b57cec5SDimitry Andric } 432*0b57cec5SDimitry Andric 433*0b57cec5SDimitry Andric const BuiltinType *BT = PT->getAs<BuiltinType>(); 434*0b57cec5SDimitry Andric if (!BT) 435*0b57cec5SDimitry Andric return false; 436*0b57cec5SDimitry Andric 437*0b57cec5SDimitry Andric // Pointer to a character. 438*0b57cec5SDimitry Andric if (PT->isAnyCharacterType()) { 439*0b57cec5SDimitry Andric CS.setKind(ConversionSpecifier::sArg); 440*0b57cec5SDimitry Andric if (PT->isWideCharType()) 441*0b57cec5SDimitry Andric LM.setKind(LengthModifier::AsWideChar); 442*0b57cec5SDimitry Andric else 443*0b57cec5SDimitry Andric LM.setKind(LengthModifier::None); 444*0b57cec5SDimitry Andric 445*0b57cec5SDimitry Andric // If we know the target array length, we can use it as a field width. 446*0b57cec5SDimitry Andric if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) { 447*0b57cec5SDimitry Andric if (CAT->getSizeModifier() == ArrayType::Normal) 448*0b57cec5SDimitry Andric FieldWidth = OptionalAmount(OptionalAmount::Constant, 449*0b57cec5SDimitry Andric CAT->getSize().getZExtValue() - 1, 450*0b57cec5SDimitry Andric "", 0, false); 451*0b57cec5SDimitry Andric 452*0b57cec5SDimitry Andric } 453*0b57cec5SDimitry Andric return true; 454*0b57cec5SDimitry Andric } 455*0b57cec5SDimitry Andric 456*0b57cec5SDimitry Andric // Figure out the length modifier. 457*0b57cec5SDimitry Andric switch (BT->getKind()) { 458*0b57cec5SDimitry Andric // no modifier 459*0b57cec5SDimitry Andric case BuiltinType::UInt: 460*0b57cec5SDimitry Andric case BuiltinType::Int: 461*0b57cec5SDimitry Andric case BuiltinType::Float: 462*0b57cec5SDimitry Andric LM.setKind(LengthModifier::None); 463*0b57cec5SDimitry Andric break; 464*0b57cec5SDimitry Andric 465*0b57cec5SDimitry Andric // hh 466*0b57cec5SDimitry Andric case BuiltinType::Char_U: 467*0b57cec5SDimitry Andric case BuiltinType::UChar: 468*0b57cec5SDimitry Andric case BuiltinType::Char_S: 469*0b57cec5SDimitry Andric case BuiltinType::SChar: 470*0b57cec5SDimitry Andric LM.setKind(LengthModifier::AsChar); 471*0b57cec5SDimitry Andric break; 472*0b57cec5SDimitry Andric 473*0b57cec5SDimitry Andric // h 474*0b57cec5SDimitry Andric case BuiltinType::Short: 475*0b57cec5SDimitry Andric case BuiltinType::UShort: 476*0b57cec5SDimitry Andric LM.setKind(LengthModifier::AsShort); 477*0b57cec5SDimitry Andric break; 478*0b57cec5SDimitry Andric 479*0b57cec5SDimitry Andric // l 480*0b57cec5SDimitry Andric case BuiltinType::Long: 481*0b57cec5SDimitry Andric case BuiltinType::ULong: 482*0b57cec5SDimitry Andric case BuiltinType::Double: 483*0b57cec5SDimitry Andric LM.setKind(LengthModifier::AsLong); 484*0b57cec5SDimitry Andric break; 485*0b57cec5SDimitry Andric 486*0b57cec5SDimitry Andric // ll 487*0b57cec5SDimitry Andric case BuiltinType::LongLong: 488*0b57cec5SDimitry Andric case BuiltinType::ULongLong: 489*0b57cec5SDimitry Andric LM.setKind(LengthModifier::AsLongLong); 490*0b57cec5SDimitry Andric break; 491*0b57cec5SDimitry Andric 492*0b57cec5SDimitry Andric // L 493*0b57cec5SDimitry Andric case BuiltinType::LongDouble: 494*0b57cec5SDimitry Andric LM.setKind(LengthModifier::AsLongDouble); 495*0b57cec5SDimitry Andric break; 496*0b57cec5SDimitry Andric 497*0b57cec5SDimitry Andric // Don't know. 498*0b57cec5SDimitry Andric default: 499*0b57cec5SDimitry Andric return false; 500*0b57cec5SDimitry Andric } 501*0b57cec5SDimitry Andric 502*0b57cec5SDimitry Andric // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 503*0b57cec5SDimitry Andric if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11)) 504*0b57cec5SDimitry Andric namedTypeToLengthModifier(PT, LM); 505*0b57cec5SDimitry Andric 506*0b57cec5SDimitry Andric // If fixing the length modifier was enough, we are done. 507*0b57cec5SDimitry Andric if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) { 508*0b57cec5SDimitry Andric const analyze_scanf::ArgType &AT = getArgType(Ctx); 509*0b57cec5SDimitry Andric if (AT.isValid() && AT.matchesType(Ctx, QT)) 510*0b57cec5SDimitry Andric return true; 511*0b57cec5SDimitry Andric } 512*0b57cec5SDimitry Andric 513*0b57cec5SDimitry Andric // Figure out the conversion specifier. 514*0b57cec5SDimitry Andric if (PT->isRealFloatingType()) 515*0b57cec5SDimitry Andric CS.setKind(ConversionSpecifier::fArg); 516*0b57cec5SDimitry Andric else if (PT->isSignedIntegerType()) 517*0b57cec5SDimitry Andric CS.setKind(ConversionSpecifier::dArg); 518*0b57cec5SDimitry Andric else if (PT->isUnsignedIntegerType()) 519*0b57cec5SDimitry Andric CS.setKind(ConversionSpecifier::uArg); 520*0b57cec5SDimitry Andric else 521*0b57cec5SDimitry Andric llvm_unreachable("Unexpected type"); 522*0b57cec5SDimitry Andric 523*0b57cec5SDimitry Andric return true; 524*0b57cec5SDimitry Andric } 525*0b57cec5SDimitry Andric 526*0b57cec5SDimitry Andric void ScanfSpecifier::toString(raw_ostream &os) const { 527*0b57cec5SDimitry Andric os << "%"; 528*0b57cec5SDimitry Andric 529*0b57cec5SDimitry Andric if (usesPositionalArg()) 530*0b57cec5SDimitry Andric os << getPositionalArgIndex() << "$"; 531*0b57cec5SDimitry Andric if (SuppressAssignment) 532*0b57cec5SDimitry Andric os << "*"; 533*0b57cec5SDimitry Andric 534*0b57cec5SDimitry Andric FieldWidth.toString(os); 535*0b57cec5SDimitry Andric os << LM.toString(); 536*0b57cec5SDimitry Andric os << CS.toString(); 537*0b57cec5SDimitry Andric } 538*0b57cec5SDimitry Andric 539*0b57cec5SDimitry Andric bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 540*0b57cec5SDimitry Andric const char *I, 541*0b57cec5SDimitry Andric const char *E, 542*0b57cec5SDimitry Andric const LangOptions &LO, 543*0b57cec5SDimitry Andric const TargetInfo &Target) { 544*0b57cec5SDimitry Andric 545*0b57cec5SDimitry Andric unsigned argIndex = 0; 546*0b57cec5SDimitry Andric 547*0b57cec5SDimitry Andric // Keep looking for a format specifier until we have exhausted the string. 548*0b57cec5SDimitry Andric while (I != E) { 549*0b57cec5SDimitry Andric const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex, 550*0b57cec5SDimitry Andric LO, Target); 551*0b57cec5SDimitry Andric // Did a fail-stop error of any kind occur when parsing the specifier? 552*0b57cec5SDimitry Andric // If so, don't do any more processing. 553*0b57cec5SDimitry Andric if (FSR.shouldStop()) 554*0b57cec5SDimitry Andric return true; 555*0b57cec5SDimitry Andric // Did we exhaust the string or encounter an error that 556*0b57cec5SDimitry Andric // we can recover from? 557*0b57cec5SDimitry Andric if (!FSR.hasValue()) 558*0b57cec5SDimitry Andric continue; 559*0b57cec5SDimitry Andric // We have a format specifier. Pass it to the callback. 560*0b57cec5SDimitry Andric if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 561*0b57cec5SDimitry Andric I - FSR.getStart())) { 562*0b57cec5SDimitry Andric return true; 563*0b57cec5SDimitry Andric } 564*0b57cec5SDimitry Andric } 565*0b57cec5SDimitry Andric assert(I == E && "Format string not exhausted"); 566*0b57cec5SDimitry Andric return false; 567*0b57cec5SDimitry Andric } 568