xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp (revision 536456a7e93d73b9ff4e92f3e51d1aa1c72628fe)
1 //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker improves modeling of a few simple library functions.
10 // It does not generate warnings.
11 //
12 // This checker provides a specification format - `Summary' - and
13 // contains descriptions of some library functions in this format. Each
14 // specification contains a list of branches for splitting the program state
15 // upon call, and range constraints on argument and return-value symbols that
16 // are satisfied on each branch. This spec can be expanded to include more
17 // items, like external effects of the function.
18 //
19 // The main difference between this approach and the body farms technique is
20 // in more explicit control over how many branches are produced. For example,
21 // consider standard C function `ispunct(int x)', which returns a non-zero value
22 // iff `x' is a punctuation character, that is, when `x' is in range
23 //   ['!', '/']   [':', '@']  U  ['[', '\`']  U  ['{', '~'].
24 // `Summary' provides only two branches for this function. However,
25 // any attempt to describe this range with if-statements in the body farm
26 // would result in many more branches. Because each branch needs to be analyzed
27 // independently, this significantly reduces performance. Additionally,
28 // once we consider a branch on which `x' is in range, say, ['!', '/'],
29 // we assume that such branch is an important separate path through the program,
30 // which may lead to false positives because considering this particular path
31 // was not consciously intended, and therefore it might have been unreachable.
32 //
33 // This checker uses eval::Call for modeling pure functions (functions without
34 // side effets), for which their `Summary' is a precise model. This avoids
35 // unnecessary invalidation passes. Conflicts with other checkers are unlikely
36 // because if the function has no other effects, other checkers would probably
37 // never want to improve upon the modeling done by this checker.
38 //
39 // Non-pure functions, for which only partial improvement over the default
40 // behavior is expected, are modeled via check::PostCall, non-intrusively.
41 //
42 // The following standard C functions are currently supported:
43 //
44 //   fgetc      getline   isdigit   isupper
45 //   fread      isalnum   isgraph   isxdigit
46 //   fwrite     isalpha   islower   read
47 //   getc       isascii   isprint   write
48 //   getchar    isblank   ispunct
49 //   getdelim   iscntrl   isspace
50 //
51 //===----------------------------------------------------------------------===//
52 
53 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
54 #include "clang/StaticAnalyzer/Core/Checker.h"
55 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
56 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
57 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
58 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
59 
60 using namespace clang;
61 using namespace clang::ento;
62 
63 namespace {
64 class StdLibraryFunctionsChecker : public Checker<check::PostCall, eval::Call> {
65   /// Below is a series of typedefs necessary to define function specs.
66   /// We avoid nesting types here because each additional qualifier
67   /// would need to be repeated in every function spec.
68   struct Summary;
69 
70   /// Specify how much the analyzer engine should entrust modeling this function
71   /// to us. If he doesn't, he performs additional invalidations.
72   enum InvalidationKind { NoEvalCall, EvalCallAsPure };
73 
74   /// A pair of ValueRangeKind and IntRangeVector would describe a range
75   /// imposed on a particular argument or return value symbol.
76   ///
77   /// Given a range, should the argument stay inside or outside this range?
78   /// The special `ComparesToArgument' value indicates that we should
79   /// impose a constraint that involves other argument or return value symbols.
80   enum ValueRangeKind { OutOfRange, WithinRange, ComparesToArgument };
81 
82   // The universal integral type to use in value range descriptions.
83   // Unsigned to make sure overflows are well-defined.
84   typedef uint64_t RangeInt;
85 
86   /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is
87   /// a non-negative integer, which less than 5 and not equal to 2. For
88   /// `ComparesToArgument', holds information about how exactly to compare to
89   /// the argument.
90   typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector;
91 
92   /// A reference to an argument or return value by its number.
93   /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but
94   /// obviously uint32_t should be enough for all practical purposes.
95   typedef uint32_t ArgNo;
96   static const ArgNo Ret = std::numeric_limits<ArgNo>::max();
97 
98   /// Incapsulates a single range on a single symbol within a branch.
99   class ValueRange {
100     ArgNo ArgN;          // Argument to which we apply the range.
101     ValueRangeKind Kind; // Kind of range definition.
102     IntRangeVector Args; // Polymorphic arguments.
103 
104   public:
105     ValueRange(ArgNo ArgN, ValueRangeKind Kind, const IntRangeVector &Args)
106         : ArgN(ArgN), Kind(Kind), Args(Args) {}
107 
108     ArgNo getArgNo() const { return ArgN; }
109     ValueRangeKind getKind() const { return Kind; }
110 
111     BinaryOperator::Opcode getOpcode() const {
112       assert(Kind == ComparesToArgument);
113       assert(Args.size() == 1);
114       BinaryOperator::Opcode Op =
115           static_cast<BinaryOperator::Opcode>(Args[0].first);
116       assert(BinaryOperator::isComparisonOp(Op) &&
117              "Only comparison ops are supported for ComparesToArgument");
118       return Op;
119     }
120 
121     ArgNo getOtherArgNo() const {
122       assert(Kind == ComparesToArgument);
123       assert(Args.size() == 1);
124       return static_cast<ArgNo>(Args[0].second);
125     }
126 
127     const IntRangeVector &getRanges() const {
128       assert(Kind != ComparesToArgument);
129       return Args;
130     }
131 
132     // We avoid creating a virtual apply() method because
133     // it makes initializer lists harder to write.
134   private:
135     ProgramStateRef applyAsOutOfRange(ProgramStateRef State,
136                                       const CallEvent &Call,
137                                       const Summary &Summary) const;
138     ProgramStateRef applyAsWithinRange(ProgramStateRef State,
139                                        const CallEvent &Call,
140                                        const Summary &Summary) const;
141     ProgramStateRef applyAsComparesToArgument(ProgramStateRef State,
142                                               const CallEvent &Call,
143                                               const Summary &Summary) const;
144 
145   public:
146     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
147                           const Summary &Summary) const {
148       switch (Kind) {
149       case OutOfRange:
150         return applyAsOutOfRange(State, Call, Summary);
151       case WithinRange:
152         return applyAsWithinRange(State, Call, Summary);
153       case ComparesToArgument:
154         return applyAsComparesToArgument(State, Call, Summary);
155       }
156       llvm_unreachable("Unknown ValueRange kind!");
157     }
158   };
159 
160   /// The complete list of ranges that defines a single branch.
161   typedef std::vector<ValueRange> ValueRangeSet;
162 
163   using ArgTypes = std::vector<QualType>;
164   using Ranges = std::vector<ValueRangeSet>;
165 
166   /// Includes information about function prototype (which is necessary to
167   /// ensure we're modeling the right function and casting values properly),
168   /// approach to invalidation, and a list of branches - essentially, a list
169   /// of list of ranges - essentially, a list of lists of lists of segments.
170   struct Summary {
171     const ArgTypes ArgTys;
172     const QualType RetTy;
173     const InvalidationKind InvalidationKd;
174     Ranges Cases;
175     ValueRangeSet ArgConstraints;
176 
177     Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd)
178         : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {}
179 
180     Summary &Case(ValueRangeSet VRS) {
181       Cases.push_back(VRS);
182       return *this;
183     }
184 
185   private:
186     static void assertTypeSuitableForSummary(QualType T) {
187       assert(!T->isVoidType() &&
188              "We should have had no significant void types in the spec");
189       assert(T.isCanonical() &&
190              "We should only have canonical types in the spec");
191       // FIXME: lift this assert (but not the ones above!)
192       assert(T->isIntegralOrEnumerationType() &&
193              "We only support integral ranges in the spec");
194     }
195 
196   public:
197     QualType getArgType(ArgNo ArgN) const {
198       QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN];
199       assertTypeSuitableForSummary(T);
200       return T;
201     }
202 
203     /// Try our best to figure out if the call expression is the call of
204     /// *the* library function to which this specification applies.
205     bool matchesCall(const CallExpr *CE) const;
206   };
207 
208   // The same function (as in, function identifier) may have different
209   // summaries assigned to it, with different argument and return value types.
210   // We call these "variants" of the function. This can be useful for handling
211   // C++ function overloads, and also it can be used when the same function
212   // may have different definitions on different platforms.
213   typedef std::vector<Summary> Summaries;
214 
215   // The map of all functions supported by the checker. It is initialized
216   // lazily, and it doesn't change after initialization.
217   mutable llvm::StringMap<Summaries> FunctionSummaryMap;
218 
219   // Auxiliary functions to support ArgNo within all structures
220   // in a unified manner.
221   static QualType getArgType(const Summary &Summary, ArgNo ArgN) {
222     return Summary.getArgType(ArgN);
223   }
224   static QualType getArgType(const CallEvent &Call, ArgNo ArgN) {
225     return ArgN == Ret ? Call.getResultType().getCanonicalType()
226                        : Call.getArgExpr(ArgN)->getType().getCanonicalType();
227   }
228   static QualType getArgType(const CallExpr *CE, ArgNo ArgN) {
229     return ArgN == Ret ? CE->getType().getCanonicalType()
230                        : CE->getArg(ArgN)->getType().getCanonicalType();
231   }
232   static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) {
233     return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN);
234   }
235 
236 public:
237   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
238   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
239 
240 private:
241   Optional<Summary> findFunctionSummary(const FunctionDecl *FD,
242                                         const CallExpr *CE,
243                                         CheckerContext &C) const;
244 
245   void initFunctionSummaries(CheckerContext &C) const;
246 };
247 } // end of anonymous namespace
248 
249 ProgramStateRef StdLibraryFunctionsChecker::ValueRange::applyAsOutOfRange(
250     ProgramStateRef State, const CallEvent &Call,
251     const Summary &Summary) const {
252 
253   ProgramStateManager &Mgr = State->getStateManager();
254   SValBuilder &SVB = Mgr.getSValBuilder();
255   BasicValueFactory &BVF = SVB.getBasicValueFactory();
256   ConstraintManager &CM = Mgr.getConstraintManager();
257   QualType T = getArgType(Summary, getArgNo());
258   SVal V = getArgSVal(Call, getArgNo());
259 
260   if (auto N = V.getAs<NonLoc>()) {
261     const IntRangeVector &R = getRanges();
262     size_t E = R.size();
263     for (size_t I = 0; I != E; ++I) {
264       const llvm::APSInt &Min = BVF.getValue(R[I].first, T);
265       const llvm::APSInt &Max = BVF.getValue(R[I].second, T);
266       assert(Min <= Max);
267       State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
268       if (!State)
269         break;
270     }
271   }
272 
273   return State;
274 }
275 
276 ProgramStateRef StdLibraryFunctionsChecker::ValueRange::applyAsWithinRange(
277     ProgramStateRef State, const CallEvent &Call,
278     const Summary &Summary) const {
279 
280   ProgramStateManager &Mgr = State->getStateManager();
281   SValBuilder &SVB = Mgr.getSValBuilder();
282   BasicValueFactory &BVF = SVB.getBasicValueFactory();
283   ConstraintManager &CM = Mgr.getConstraintManager();
284   QualType T = getArgType(Summary, getArgNo());
285   SVal V = getArgSVal(Call, getArgNo());
286 
287   // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R".
288   // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary,
289   // and then cut away all holes in R one by one.
290   //
291   // E.g. consider a range list R as [A, B] and [C, D]
292   // -------+--------+------------------+------------+----------->
293   //        A        B                  C            D
294   // Then we assume that the value is not in [-inf, A - 1],
295   // then not in [D + 1, +inf], then not in [B + 1, C - 1]
296   if (auto N = V.getAs<NonLoc>()) {
297     const IntRangeVector &R = getRanges();
298     size_t E = R.size();
299 
300     const llvm::APSInt &MinusInf = BVF.getMinValue(T);
301     const llvm::APSInt &PlusInf = BVF.getMaxValue(T);
302 
303     const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T);
304     if (Left != PlusInf) {
305       assert(MinusInf <= Left);
306       State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false);
307       if (!State)
308         return nullptr;
309     }
310 
311     const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T);
312     if (Right != MinusInf) {
313       assert(Right <= PlusInf);
314       State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false);
315       if (!State)
316         return nullptr;
317     }
318 
319     for (size_t I = 1; I != E; ++I) {
320       const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T);
321       const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T);
322       if (Min <= Max) {
323         State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
324         if (!State)
325           return nullptr;
326       }
327     }
328   }
329 
330   return State;
331 }
332 
333 ProgramStateRef
334 StdLibraryFunctionsChecker::ValueRange::applyAsComparesToArgument(
335     ProgramStateRef State, const CallEvent &Call,
336     const Summary &Summary) const {
337 
338   ProgramStateManager &Mgr = State->getStateManager();
339   SValBuilder &SVB = Mgr.getSValBuilder();
340   QualType CondT = SVB.getConditionType();
341   QualType T = getArgType(Summary, getArgNo());
342   SVal V = getArgSVal(Call, getArgNo());
343 
344   BinaryOperator::Opcode Op = getOpcode();
345   ArgNo OtherArg = getOtherArgNo();
346   SVal OtherV = getArgSVal(Call, OtherArg);
347   QualType OtherT = getArgType(Call, OtherArg);
348   // Note: we avoid integral promotion for comparison.
349   OtherV = SVB.evalCast(OtherV, T, OtherT);
350   if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT)
351                        .getAs<DefinedOrUnknownSVal>())
352     State = State->assume(*CompV, true);
353   return State;
354 }
355 
356 void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call,
357                                                CheckerContext &C) const {
358   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
359   if (!FD)
360     return;
361 
362   const CallExpr *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
363   if (!CE)
364     return;
365 
366   Optional<Summary> FoundSummary = findFunctionSummary(FD, CE, C);
367   if (!FoundSummary)
368     return;
369 
370   // Now apply ranges.
371   const Summary &Summary = *FoundSummary;
372   ProgramStateRef State = C.getState();
373 
374   // Apply case/branch specifications.
375   for (const auto &VRS : Summary.Cases) {
376     ProgramStateRef NewState = State;
377     for (const auto &VR: VRS) {
378       NewState = VR.apply(NewState, Call, Summary);
379       if (!NewState)
380         break;
381     }
382 
383     if (NewState && NewState != State)
384       C.addTransition(NewState);
385   }
386 }
387 
388 bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call,
389                                           CheckerContext &C) const {
390   const auto *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
391   if (!FD)
392     return false;
393 
394   const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
395   if (!CE)
396     return false;
397 
398   Optional<Summary> FoundSummary = findFunctionSummary(FD, CE, C);
399   if (!FoundSummary)
400     return false;
401 
402   const Summary &Summary = *FoundSummary;
403   switch (Summary.InvalidationKd) {
404   case EvalCallAsPure: {
405     ProgramStateRef State = C.getState();
406     const LocationContext *LC = C.getLocationContext();
407     SVal V = C.getSValBuilder().conjureSymbolVal(
408         CE, LC, CE->getType().getCanonicalType(), C.blockCount());
409     State = State->BindExpr(CE, LC, V);
410     C.addTransition(State);
411     return true;
412   }
413   case NoEvalCall:
414     // Summary tells us to avoid performing eval::Call. The function is possibly
415     // evaluated by another checker, or evaluated conservatively.
416     return false;
417   }
418   llvm_unreachable("Unknown invalidation kind!");
419 }
420 
421 bool StdLibraryFunctionsChecker::Summary::matchesCall(
422     const CallExpr *CE) const {
423   // Check number of arguments:
424   if (CE->getNumArgs() != ArgTys.size())
425     return false;
426 
427   // Check return type if relevant:
428   if (!RetTy.isNull() && RetTy != CE->getType().getCanonicalType())
429     return false;
430 
431   // Check argument types when relevant:
432   for (size_t I = 0, E = ArgTys.size(); I != E; ++I) {
433     QualType FormalT = ArgTys[I];
434     // Null type marks irrelevant arguments.
435     if (FormalT.isNull())
436       continue;
437 
438     assertTypeSuitableForSummary(FormalT);
439 
440     QualType ActualT = StdLibraryFunctionsChecker::getArgType(CE, I);
441     assert(ActualT.isCanonical());
442     if (ActualT != FormalT)
443       return false;
444   }
445 
446   return true;
447 }
448 
449 Optional<StdLibraryFunctionsChecker::Summary>
450 StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD,
451                                                 const CallExpr *CE,
452                                                 CheckerContext &C) const {
453   // Note: we cannot always obtain FD from CE
454   // (eg. virtual call, or call by pointer).
455   assert(CE);
456 
457   if (!FD)
458     return None;
459 
460   initFunctionSummaries(C);
461 
462   IdentifierInfo *II = FD->getIdentifier();
463   if (!II)
464     return None;
465   StringRef Name = II->getName();
466   if (Name.empty() || !C.isCLibraryFunction(FD, Name))
467     return None;
468 
469   auto FSMI = FunctionSummaryMap.find(Name);
470   if (FSMI == FunctionSummaryMap.end())
471     return None;
472 
473   // Verify that function signature matches the spec in advance.
474   // Otherwise we might be modeling the wrong function.
475   // Strict checking is important because we will be conducting
476   // very integral-type-sensitive operations on arguments and
477   // return values.
478   const Summaries &SpecVariants = FSMI->second;
479   for (const Summary &Spec : SpecVariants)
480     if (Spec.matchesCall(CE))
481       return Spec;
482 
483   return None;
484 }
485 
486 void StdLibraryFunctionsChecker::initFunctionSummaries(
487     CheckerContext &C) const {
488   if (!FunctionSummaryMap.empty())
489     return;
490 
491   SValBuilder &SVB = C.getSValBuilder();
492   BasicValueFactory &BVF = SVB.getBasicValueFactory();
493   const ASTContext &ACtx = BVF.getContext();
494 
495   // These types are useful for writing specifications quickly,
496   // New specifications should probably introduce more types.
497   // Some types are hard to obtain from the AST, eg. "ssize_t".
498   // In such cases it should be possible to provide multiple variants
499   // of function summary for common cases (eg. ssize_t could be int or long
500   // or long long, so three summary variants would be enough).
501   // Of course, function variants are also useful for C++ overloads.
502   const QualType
503       Irrelevant; // A placeholder, whenever we do not care about the type.
504   const QualType IntTy = ACtx.IntTy;
505   const QualType LongTy = ACtx.LongTy;
506   const QualType LongLongTy = ACtx.LongLongTy;
507   const QualType SizeTy = ACtx.getSizeType();
508 
509   const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue();
510   const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue();
511   const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue();
512 
513   const RangeInt UCharMax =
514       BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue();
515 
516   // The platform dependent value of EOF.
517   // Try our best to parse this from the Preprocessor, otherwise fallback to -1.
518   const auto EOFv = [&C]() -> RangeInt {
519     if (const llvm::Optional<int> OptInt =
520             tryExpandAsInteger("EOF", C.getPreprocessor()))
521       return *OptInt;
522     return -1;
523   }();
524 
525   // We are finally ready to define specifications for all supported functions.
526   //
527   // The signature needs to have the correct number of arguments.
528   // However, we insert `Irrelevant' when the type is insignificant.
529   //
530   // Argument ranges should always cover all variants. If return value
531   // is completely unknown, omit it from the respective range set.
532   //
533   // All types in the spec need to be canonical.
534   //
535   // Every item in the list of range sets represents a particular
536   // execution path the analyzer would need to explore once
537   // the call is modeled - a new program state is constructed
538   // for every range set, and each range line in the range set
539   // corresponds to a specific constraint within this state.
540   //
541   // Upon comparing to another argument, the other argument is casted
542   // to the current argument's type. This avoids proper promotion but
543   // seems useful. For example, read() receives size_t argument,
544   // and its return value, which is of type ssize_t, cannot be greater
545   // than this argument. If we made a promotion, and the size argument
546   // is equal to, say, 10, then we'd impose a range of [0, 10] on the
547   // return value, however the correct range is [-1, 10].
548   //
549   // Please update the list of functions in the header after editing!
550   //
551 
552   // Below are helper functions to create the summaries.
553   auto ArgumentCondition = [](ArgNo ArgN, ValueRangeKind Kind,
554                               IntRangeVector Ranges) -> ValueRange {
555     ValueRange VR{ArgN, Kind, Ranges};
556     return VR;
557   };
558   auto ReturnValueCondition = [](ValueRangeKind Kind,
559                                  IntRangeVector Ranges) -> ValueRange {
560     ValueRange VR{Ret, Kind, Ranges};
561     return VR;
562   };
563   auto Range = [](RangeInt b, RangeInt e) {
564     return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}};
565   };
566   auto SingleValue = [](RangeInt v) {
567     return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}};
568   };
569   auto IsLessThan = [](ArgNo ArgN) { return IntRangeVector{{BO_LE, ArgN}}; };
570 
571   using RetType = QualType;
572 
573   // Templates for summaries that are reused by many functions.
574   auto Getc = [&]() {
575     return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall)
576         .Case(
577             {ReturnValueCondition(WithinRange, {{EOFv, EOFv}, {0, UCharMax}})});
578   };
579   auto Read = [&](RetType R, RangeInt Max) {
580     return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R},
581                    NoEvalCall)
582         .Case({ReturnValueCondition(ComparesToArgument, IsLessThan(2)),
583                ReturnValueCondition(WithinRange, Range(-1, Max))});
584   };
585   auto Fread = [&]() {
586     return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy, Irrelevant},
587                    RetType{SizeTy}, NoEvalCall)
588         .Case({
589             ReturnValueCondition(ComparesToArgument, IsLessThan(2)),
590         });
591   };
592   auto Getline = [&](RetType R, RangeInt Max) {
593     return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R},
594                    NoEvalCall)
595         .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})});
596   };
597 
598   FunctionSummaryMap = {
599       // The isascii() family of functions.
600       {
601           "isalnum",
602           Summaries{
603               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
604                   // Boils down to isupper() or islower() or isdigit().
605                   .Case(
606                       {ArgumentCondition(0U, WithinRange,
607                                          {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}),
608                        ReturnValueCondition(OutOfRange, SingleValue(0))})
609                   // The locale-specific range.
610                   // No post-condition. We are completely unaware of
611                   // locale-specific return values.
612                   .Case({ArgumentCondition(0U, WithinRange, {{128, UCharMax}})})
613                   .Case({ArgumentCondition(0U, OutOfRange,
614                                            {{'0', '9'},
615                                             {'A', 'Z'},
616                                             {'a', 'z'},
617                                             {128, UCharMax}}),
618                          ReturnValueCondition(WithinRange, SingleValue(0))})},
619       },
620       {
621           "isalpha",
622           Summaries{
623               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
624                   .Case({ArgumentCondition(0U, WithinRange,
625                                            {{'A', 'Z'}, {'a', 'z'}}),
626                          ReturnValueCondition(OutOfRange, SingleValue(0))})
627                   // The locale-specific range.
628                   .Case({ArgumentCondition(0U, WithinRange, {{128, UCharMax}})})
629                   .Case({ArgumentCondition(
630                              0U, OutOfRange,
631                              {{'A', 'Z'}, {'a', 'z'}, {128, UCharMax}}),
632                          ReturnValueCondition(WithinRange, SingleValue(0))})},
633       },
634       {
635           "isascii",
636           Summaries{
637               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
638                   .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
639                          ReturnValueCondition(OutOfRange, SingleValue(0))})
640                   .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)),
641                          ReturnValueCondition(WithinRange, SingleValue(0))})},
642       },
643       {
644           "isblank",
645           Summaries{
646               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
647                   .Case({ArgumentCondition(0U, WithinRange,
648                                            {{'\t', '\t'}, {' ', ' '}}),
649                          ReturnValueCondition(OutOfRange, SingleValue(0))})
650                   .Case({ArgumentCondition(0U, OutOfRange,
651                                            {{'\t', '\t'}, {' ', ' '}}),
652                          ReturnValueCondition(WithinRange, SingleValue(0))})},
653       },
654       {
655           "iscntrl",
656           Summaries{
657               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
658                   .Case({ArgumentCondition(0U, WithinRange,
659                                            {{0, 32}, {127, 127}}),
660                          ReturnValueCondition(OutOfRange, SingleValue(0))})
661                   .Case(
662                       {ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}),
663                        ReturnValueCondition(WithinRange, SingleValue(0))})},
664       },
665       {
666           "isdigit",
667           Summaries{
668               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
669                   .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')),
670                          ReturnValueCondition(OutOfRange, SingleValue(0))})
671                   .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')),
672                          ReturnValueCondition(WithinRange, SingleValue(0))})},
673       },
674       {
675           "isgraph",
676           Summaries{
677               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
678                   .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)),
679                          ReturnValueCondition(OutOfRange, SingleValue(0))})
680                   .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)),
681                          ReturnValueCondition(WithinRange, SingleValue(0))})},
682       },
683       {
684           "islower",
685           Summaries{
686               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
687                   // Is certainly lowercase.
688                   .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')),
689                          ReturnValueCondition(OutOfRange, SingleValue(0))})
690                   // Is ascii but not lowercase.
691                   .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
692                          ArgumentCondition(0U, OutOfRange, Range('a', 'z')),
693                          ReturnValueCondition(WithinRange, SingleValue(0))})
694                   // The locale-specific range.
695                   .Case({ArgumentCondition(0U, WithinRange, {{128, UCharMax}})})
696                   // Is not an unsigned char.
697                   .Case({ArgumentCondition(0U, OutOfRange, Range(0, UCharMax)),
698                          ReturnValueCondition(WithinRange, SingleValue(0))})},
699       },
700       {
701           "isprint",
702           Summaries{
703               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
704                   .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)),
705                          ReturnValueCondition(OutOfRange, SingleValue(0))})
706                   .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)),
707                          ReturnValueCondition(WithinRange, SingleValue(0))})},
708       },
709       {
710           "ispunct",
711           Summaries{
712               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
713                   .Case({ArgumentCondition(
714                              0U, WithinRange,
715                              {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
716                          ReturnValueCondition(OutOfRange, SingleValue(0))})
717                   .Case({ArgumentCondition(
718                              0U, OutOfRange,
719                              {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
720                          ReturnValueCondition(WithinRange, SingleValue(0))})},
721       },
722       {
723           "isspace",
724           Summaries{
725               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
726                   // Space, '\f', '\n', '\r', '\t', '\v'.
727                   .Case({ArgumentCondition(0U, WithinRange,
728                                            {{9, 13}, {' ', ' '}}),
729                          ReturnValueCondition(OutOfRange, SingleValue(0))})
730                   // The locale-specific range.
731                   .Case({ArgumentCondition(0U, WithinRange, {{128, UCharMax}})})
732                   .Case({ArgumentCondition(
733                              0U, OutOfRange,
734                              {{9, 13}, {' ', ' '}, {128, UCharMax}}),
735                          ReturnValueCondition(WithinRange, SingleValue(0))})},
736       },
737       {
738           "isupper",
739           Summaries{
740               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
741                   // Is certainly uppercase.
742                   .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')),
743                          ReturnValueCondition(OutOfRange, SingleValue(0))})
744                   // The locale-specific range.
745                   .Case({ArgumentCondition(0U, WithinRange, {{128, UCharMax}})})
746                   // Other.
747                   .Case({ArgumentCondition(0U, OutOfRange,
748                                            {{'A', 'Z'}, {128, UCharMax}}),
749                          ReturnValueCondition(WithinRange, SingleValue(0))})},
750       },
751       {
752           "isxdigit",
753           Summaries{
754               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
755                   .Case(
756                       {ArgumentCondition(0U, WithinRange,
757                                          {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
758                        ReturnValueCondition(OutOfRange, SingleValue(0))})
759                   .Case(
760                       {ArgumentCondition(0U, OutOfRange,
761                                          {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
762                        ReturnValueCondition(WithinRange, SingleValue(0))})},
763       },
764 
765       // The getc() family of functions that returns either a char or an EOF.
766       {"getc", Summaries{Getc()}},
767       {"fgetc", Summaries{Getc()}},
768       {"getchar",
769        Summaries{Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall)
770                      .Case({ReturnValueCondition(
771                          WithinRange, {{EOFv, EOFv}, {0, UCharMax}})})}},
772 
773       // read()-like functions that never return more than buffer size.
774       // We are not sure how ssize_t is defined on every platform, so we
775       // provide three variants that should cover common cases.
776       {"read", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax),
777                          Read(LongLongTy, LongLongMax)}},
778       {"write", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax),
779                           Read(LongLongTy, LongLongMax)}},
780       {"fread", Summaries{Fread()}},
781       {"fwrite", Summaries{Fread()}},
782       // getline()-like functions either fail or read at least the delimiter.
783       {"getline", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax),
784                             Getline(LongLongTy, LongLongMax)}},
785       {"getdelim", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax),
786                              Getline(LongLongTy, LongLongMax)}},
787   };
788 }
789 
790 void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) {
791   // If this checker grows large enough to support C++, Objective-C, or other
792   // standard libraries, we could use multiple register...Checker() functions,
793   // which would register various checkers with the help of the same Checker
794   // class, turning on different function summaries.
795   mgr.registerChecker<StdLibraryFunctionsChecker>();
796 }
797 
798 bool ento::shouldRegisterStdCLibraryFunctionsChecker(const LangOptions &LO) {
799   return true;
800 }
801