1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handling of format string in scanf and friends. The structure of format
10 // strings for fscanf() are described in C99 7.19.6.2.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "clang/AST/FormatString.h"
15 #include "FormatStringParsing.h"
16 #include "clang/Basic/TargetInfo.h"
17
18 using clang::analyze_format_string::ArgType;
19 using clang::analyze_format_string::FormatStringHandler;
20 using clang::analyze_format_string::LengthModifier;
21 using clang::analyze_format_string::OptionalAmount;
22 using clang::analyze_format_string::ConversionSpecifier;
23 using clang::analyze_scanf::ScanfConversionSpecifier;
24 using clang::analyze_scanf::ScanfSpecifier;
25 using clang::UpdateOnReturn;
26 using namespace clang;
27
28 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
29 ScanfSpecifierResult;
30
ParseScanList(FormatStringHandler & H,ScanfConversionSpecifier & CS,const char * & Beg,const char * E)31 static bool ParseScanList(FormatStringHandler &H,
32 ScanfConversionSpecifier &CS,
33 const char *&Beg, const char *E) {
34 const char *I = Beg;
35 const char *start = I - 1;
36 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
37
38 // No more characters?
39 if (I == E) {
40 H.HandleIncompleteScanList(start, I);
41 return true;
42 }
43
44 // Special case: ']' is the first character.
45 if (*I == ']') {
46 if (++I == E) {
47 H.HandleIncompleteScanList(start, I - 1);
48 return true;
49 }
50 }
51
52 // Special case: "^]" are the first characters.
53 if (I + 1 != E && I[0] == '^' && I[1] == ']') {
54 I += 2;
55 if (I == E) {
56 H.HandleIncompleteScanList(start, I - 1);
57 return true;
58 }
59 }
60
61 // Look for a ']' character which denotes the end of the scan list.
62 while (*I != ']') {
63 if (++I == E) {
64 H.HandleIncompleteScanList(start, I - 1);
65 return true;
66 }
67 }
68
69 CS.setEndScanList(I);
70 return false;
71 }
72
73 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
74 // We can possibly refactor.
ParseScanfSpecifier(FormatStringHandler & H,const char * & Beg,const char * E,unsigned & argIndex,const LangOptions & LO,const TargetInfo & Target)75 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
76 const char *&Beg,
77 const char *E,
78 unsigned &argIndex,
79 const LangOptions &LO,
80 const TargetInfo &Target) {
81 using namespace clang::analyze_format_string;
82 using namespace clang::analyze_scanf;
83 const char *I = Beg;
84 const char *Start = nullptr;
85 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
86
87 // Look for a '%' character that indicates the start of a format specifier.
88 for ( ; I != E ; ++I) {
89 char c = *I;
90 if (c == '\0') {
91 // Detect spurious null characters, which are likely errors.
92 H.HandleNullChar(I);
93 return true;
94 }
95 if (c == '%') {
96 Start = I++; // Record the start of the format specifier.
97 break;
98 }
99 }
100
101 // No format specifier found?
102 if (!Start)
103 return false;
104
105 if (I == E) {
106 // No more characters left?
107 H.HandleIncompleteSpecifier(Start, E - Start);
108 return true;
109 }
110
111 ScanfSpecifier FS;
112 if (ParseArgPosition(H, FS, Start, I, E))
113 return true;
114
115 if (I == E) {
116 // No more characters left?
117 H.HandleIncompleteSpecifier(Start, E - Start);
118 return true;
119 }
120
121 // Look for '*' flag if it is present.
122 if (*I == '*') {
123 FS.setSuppressAssignment(I);
124 if (++I == E) {
125 H.HandleIncompleteSpecifier(Start, E - Start);
126 return true;
127 }
128 }
129
130 // Look for the field width (if any). Unlike printf, this is either
131 // a fixed integer or isn't present.
132 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
133 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
134 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
135 FS.setFieldWidth(Amt);
136
137 if (I == E) {
138 // No more characters left?
139 H.HandleIncompleteSpecifier(Start, E - Start);
140 return true;
141 }
142 }
143
144 // Look for the length modifier.
145 if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) {
146 // No more characters left?
147 H.HandleIncompleteSpecifier(Start, E - Start);
148 return true;
149 }
150
151 // Detect spurious null characters, which are likely errors.
152 if (*I == '\0') {
153 H.HandleNullChar(I);
154 return true;
155 }
156
157 // Finally, look for the conversion specifier.
158 const char *conversionPosition = I++;
159 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
160 switch (*conversionPosition) {
161 default:
162 break;
163 case '%': k = ConversionSpecifier::PercentArg; break;
164 case 'A': k = ConversionSpecifier::AArg; break;
165 case 'E': k = ConversionSpecifier::EArg; break;
166 case 'F': k = ConversionSpecifier::FArg; break;
167 case 'G': k = ConversionSpecifier::GArg; break;
168 case 'X': k = ConversionSpecifier::XArg; break;
169 case 'a': k = ConversionSpecifier::aArg; break;
170 case 'd': k = ConversionSpecifier::dArg; break;
171 case 'e': k = ConversionSpecifier::eArg; break;
172 case 'f': k = ConversionSpecifier::fArg; break;
173 case 'g': k = ConversionSpecifier::gArg; break;
174 case 'i': k = ConversionSpecifier::iArg; break;
175 case 'n': k = ConversionSpecifier::nArg; break;
176 case 'c': k = ConversionSpecifier::cArg; break;
177 case 'C': k = ConversionSpecifier::CArg; break;
178 case 'S': k = ConversionSpecifier::SArg; break;
179 case '[': k = ConversionSpecifier::ScanListArg; break;
180 case 'u': k = ConversionSpecifier::uArg; break;
181 case 'x': k = ConversionSpecifier::xArg; break;
182 case 'o': k = ConversionSpecifier::oArg; break;
183 case 's': k = ConversionSpecifier::sArg; break;
184 case 'p': k = ConversionSpecifier::pArg; break;
185 // Apple extensions
186 // Apple-specific
187 case 'D':
188 if (Target.getTriple().isOSDarwin())
189 k = ConversionSpecifier::DArg;
190 break;
191 case 'O':
192 if (Target.getTriple().isOSDarwin())
193 k = ConversionSpecifier::OArg;
194 break;
195 case 'U':
196 if (Target.getTriple().isOSDarwin())
197 k = ConversionSpecifier::UArg;
198 break;
199 }
200 ScanfConversionSpecifier CS(conversionPosition, k);
201 if (k == ScanfConversionSpecifier::ScanListArg) {
202 if (ParseScanList(H, CS, I, E))
203 return true;
204 }
205 FS.setConversionSpecifier(CS);
206 if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
207 && !FS.usesPositionalArg())
208 FS.setArgIndex(argIndex++);
209
210 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
211 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
212
213 if (k == ScanfConversionSpecifier::InvalidSpecifier) {
214 unsigned Len = I - Beg;
215 if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
216 CS.setEndScanList(Beg + Len);
217 FS.setConversionSpecifier(CS);
218 }
219 // Assume the conversion takes one argument.
220 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
221 }
222 return ScanfSpecifierResult(Start, FS);
223 }
224
getArgType(ASTContext & Ctx) const225 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
226 const ScanfConversionSpecifier &CS = getConversionSpecifier();
227
228 if (!CS.consumesDataArgument())
229 return ArgType::Invalid();
230
231 switch(CS.getKind()) {
232 // Signed int.
233 case ConversionSpecifier::dArg:
234 case ConversionSpecifier::DArg:
235 case ConversionSpecifier::iArg:
236 switch (LM.getKind()) {
237 case LengthModifier::None:
238 return ArgType::PtrTo(Ctx.IntTy);
239 case LengthModifier::AsChar:
240 return ArgType::PtrTo(ArgType::AnyCharTy);
241 case LengthModifier::AsShort:
242 return ArgType::PtrTo(Ctx.ShortTy);
243 case LengthModifier::AsLong:
244 return ArgType::PtrTo(Ctx.LongTy);
245 case LengthModifier::AsLongLong:
246 case LengthModifier::AsQuad:
247 return ArgType::PtrTo(Ctx.LongLongTy);
248 case LengthModifier::AsInt64:
249 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
250 case LengthModifier::AsIntMax:
251 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
252 case LengthModifier::AsSizeT:
253 return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
254 case LengthModifier::AsPtrDiff:
255 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
256 case LengthModifier::AsLongDouble:
257 // GNU extension.
258 return ArgType::PtrTo(Ctx.LongLongTy);
259 case LengthModifier::AsAllocate:
260 case LengthModifier::AsMAllocate:
261 case LengthModifier::AsInt32:
262 case LengthModifier::AsInt3264:
263 case LengthModifier::AsWide:
264 case LengthModifier::AsShortLong:
265 return ArgType::Invalid();
266 }
267 llvm_unreachable("Unsupported LengthModifier Type");
268
269 // Unsigned int.
270 case ConversionSpecifier::oArg:
271 case ConversionSpecifier::OArg:
272 case ConversionSpecifier::uArg:
273 case ConversionSpecifier::UArg:
274 case ConversionSpecifier::xArg:
275 case ConversionSpecifier::XArg:
276 switch (LM.getKind()) {
277 case LengthModifier::None:
278 return ArgType::PtrTo(Ctx.UnsignedIntTy);
279 case LengthModifier::AsChar:
280 return ArgType::PtrTo(Ctx.UnsignedCharTy);
281 case LengthModifier::AsShort:
282 return ArgType::PtrTo(Ctx.UnsignedShortTy);
283 case LengthModifier::AsLong:
284 return ArgType::PtrTo(Ctx.UnsignedLongTy);
285 case LengthModifier::AsLongLong:
286 case LengthModifier::AsQuad:
287 return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
288 case LengthModifier::AsInt64:
289 return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
290 case LengthModifier::AsIntMax:
291 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
292 case LengthModifier::AsSizeT:
293 return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
294 case LengthModifier::AsPtrDiff:
295 return ArgType::PtrTo(
296 ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
297 case LengthModifier::AsLongDouble:
298 // GNU extension.
299 return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
300 case LengthModifier::AsAllocate:
301 case LengthModifier::AsMAllocate:
302 case LengthModifier::AsInt32:
303 case LengthModifier::AsInt3264:
304 case LengthModifier::AsWide:
305 case LengthModifier::AsShortLong:
306 return ArgType::Invalid();
307 }
308 llvm_unreachable("Unsupported LengthModifier Type");
309
310 // Float.
311 case ConversionSpecifier::aArg:
312 case ConversionSpecifier::AArg:
313 case ConversionSpecifier::eArg:
314 case ConversionSpecifier::EArg:
315 case ConversionSpecifier::fArg:
316 case ConversionSpecifier::FArg:
317 case ConversionSpecifier::gArg:
318 case ConversionSpecifier::GArg:
319 switch (LM.getKind()) {
320 case LengthModifier::None:
321 return ArgType::PtrTo(Ctx.FloatTy);
322 case LengthModifier::AsLong:
323 return ArgType::PtrTo(Ctx.DoubleTy);
324 case LengthModifier::AsLongDouble:
325 return ArgType::PtrTo(Ctx.LongDoubleTy);
326 default:
327 return ArgType::Invalid();
328 }
329
330 // Char, string and scanlist.
331 case ConversionSpecifier::cArg:
332 case ConversionSpecifier::sArg:
333 case ConversionSpecifier::ScanListArg:
334 switch (LM.getKind()) {
335 case LengthModifier::None:
336 return ArgType::PtrTo(ArgType::AnyCharTy);
337 case LengthModifier::AsLong:
338 case LengthModifier::AsWide:
339 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
340 case LengthModifier::AsAllocate:
341 case LengthModifier::AsMAllocate:
342 return ArgType::PtrTo(ArgType::CStrTy);
343 case LengthModifier::AsShort:
344 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
345 return ArgType::PtrTo(ArgType::AnyCharTy);
346 LLVM_FALLTHROUGH;
347 default:
348 return ArgType::Invalid();
349 }
350 case ConversionSpecifier::CArg:
351 case ConversionSpecifier::SArg:
352 // FIXME: Mac OS X specific?
353 switch (LM.getKind()) {
354 case LengthModifier::None:
355 case LengthModifier::AsWide:
356 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
357 case LengthModifier::AsAllocate:
358 case LengthModifier::AsMAllocate:
359 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
360 case LengthModifier::AsShort:
361 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
362 return ArgType::PtrTo(ArgType::AnyCharTy);
363 LLVM_FALLTHROUGH;
364 default:
365 return ArgType::Invalid();
366 }
367
368 // Pointer.
369 case ConversionSpecifier::pArg:
370 return ArgType::PtrTo(ArgType::CPointerTy);
371
372 // Write-back.
373 case ConversionSpecifier::nArg:
374 switch (LM.getKind()) {
375 case LengthModifier::None:
376 return ArgType::PtrTo(Ctx.IntTy);
377 case LengthModifier::AsChar:
378 return ArgType::PtrTo(Ctx.SignedCharTy);
379 case LengthModifier::AsShort:
380 return ArgType::PtrTo(Ctx.ShortTy);
381 case LengthModifier::AsLong:
382 return ArgType::PtrTo(Ctx.LongTy);
383 case LengthModifier::AsLongLong:
384 case LengthModifier::AsQuad:
385 return ArgType::PtrTo(Ctx.LongLongTy);
386 case LengthModifier::AsInt64:
387 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
388 case LengthModifier::AsIntMax:
389 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
390 case LengthModifier::AsSizeT:
391 return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
392 case LengthModifier::AsPtrDiff:
393 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
394 case LengthModifier::AsLongDouble:
395 return ArgType(); // FIXME: Is this a known extension?
396 case LengthModifier::AsAllocate:
397 case LengthModifier::AsMAllocate:
398 case LengthModifier::AsInt32:
399 case LengthModifier::AsInt3264:
400 case LengthModifier::AsWide:
401 case LengthModifier::AsShortLong:
402 return ArgType::Invalid();
403 }
404
405 default:
406 break;
407 }
408
409 return ArgType();
410 }
411
fixType(QualType QT,QualType RawQT,const LangOptions & LangOpt,ASTContext & Ctx)412 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
413 const LangOptions &LangOpt,
414 ASTContext &Ctx) {
415
416 // %n is different from other conversion specifiers; don't try to fix it.
417 if (CS.getKind() == ConversionSpecifier::nArg)
418 return false;
419
420 if (!QT->isPointerType())
421 return false;
422
423 QualType PT = QT->getPointeeType();
424
425 // If it's an enum, get its underlying type.
426 if (const EnumType *ETy = PT->getAs<EnumType>()) {
427 // Don't try to fix incomplete enums.
428 if (!ETy->getDecl()->isComplete())
429 return false;
430 PT = ETy->getDecl()->getIntegerType();
431 }
432
433 const BuiltinType *BT = PT->getAs<BuiltinType>();
434 if (!BT)
435 return false;
436
437 // Pointer to a character.
438 if (PT->isAnyCharacterType()) {
439 CS.setKind(ConversionSpecifier::sArg);
440 if (PT->isWideCharType())
441 LM.setKind(LengthModifier::AsWideChar);
442 else
443 LM.setKind(LengthModifier::None);
444
445 // If we know the target array length, we can use it as a field width.
446 if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
447 if (CAT->getSizeModifier() == ArrayType::Normal)
448 FieldWidth = OptionalAmount(OptionalAmount::Constant,
449 CAT->getSize().getZExtValue() - 1,
450 "", 0, false);
451
452 }
453 return true;
454 }
455
456 // Figure out the length modifier.
457 switch (BT->getKind()) {
458 // no modifier
459 case BuiltinType::UInt:
460 case BuiltinType::Int:
461 case BuiltinType::Float:
462 LM.setKind(LengthModifier::None);
463 break;
464
465 // hh
466 case BuiltinType::Char_U:
467 case BuiltinType::UChar:
468 case BuiltinType::Char_S:
469 case BuiltinType::SChar:
470 LM.setKind(LengthModifier::AsChar);
471 break;
472
473 // h
474 case BuiltinType::Short:
475 case BuiltinType::UShort:
476 LM.setKind(LengthModifier::AsShort);
477 break;
478
479 // l
480 case BuiltinType::Long:
481 case BuiltinType::ULong:
482 case BuiltinType::Double:
483 LM.setKind(LengthModifier::AsLong);
484 break;
485
486 // ll
487 case BuiltinType::LongLong:
488 case BuiltinType::ULongLong:
489 LM.setKind(LengthModifier::AsLongLong);
490 break;
491
492 // L
493 case BuiltinType::LongDouble:
494 LM.setKind(LengthModifier::AsLongDouble);
495 break;
496
497 // Don't know.
498 default:
499 return false;
500 }
501
502 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
503 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
504 namedTypeToLengthModifier(PT, LM);
505
506 // If fixing the length modifier was enough, we are done.
507 if (hasValidLengthModifier(Ctx.getTargetInfo(), LangOpt)) {
508 const analyze_scanf::ArgType &AT = getArgType(Ctx);
509 if (AT.isValid() && AT.matchesType(Ctx, QT))
510 return true;
511 }
512
513 // Figure out the conversion specifier.
514 if (PT->isRealFloatingType())
515 CS.setKind(ConversionSpecifier::fArg);
516 else if (PT->isSignedIntegerType())
517 CS.setKind(ConversionSpecifier::dArg);
518 else if (PT->isUnsignedIntegerType())
519 CS.setKind(ConversionSpecifier::uArg);
520 else
521 llvm_unreachable("Unexpected type");
522
523 return true;
524 }
525
toString(raw_ostream & os) const526 void ScanfSpecifier::toString(raw_ostream &os) const {
527 os << "%";
528
529 if (usesPositionalArg())
530 os << getPositionalArgIndex() << "$";
531 if (SuppressAssignment)
532 os << "*";
533
534 FieldWidth.toString(os);
535 os << LM.toString();
536 os << CS.toString();
537 }
538
ParseScanfString(FormatStringHandler & H,const char * I,const char * E,const LangOptions & LO,const TargetInfo & Target)539 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
540 const char *I,
541 const char *E,
542 const LangOptions &LO,
543 const TargetInfo &Target) {
544
545 unsigned argIndex = 0;
546
547 // Keep looking for a format specifier until we have exhausted the string.
548 while (I != E) {
549 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
550 LO, Target);
551 // Did a fail-stop error of any kind occur when parsing the specifier?
552 // If so, don't do any more processing.
553 if (FSR.shouldStop())
554 return true;
555 // Did we exhaust the string or encounter an error that
556 // we can recover from?
557 if (!FSR.hasValue())
558 continue;
559 // We have a format specifier. Pass it to the callback.
560 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
561 I - FSR.getStart())) {
562 return true;
563 }
564 }
565 assert(I == E && "Format string not exhausted");
566 return false;
567 }
568