xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp (revision c6b8484e855bffb0a7da487cd715cef774a46fb1)
1 //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker improves modeling of a few simple library functions.
10 // It does not generate warnings.
11 //
12 // This checker provides a specification format - `Summary' - and
13 // contains descriptions of some library functions in this format. Each
14 // specification contains a list of branches for splitting the program state
15 // upon call, and range constraints on argument and return-value symbols that
16 // are satisfied on each branch. This spec can be expanded to include more
17 // items, like external effects of the function.
18 //
19 // The main difference between this approach and the body farms technique is
20 // in more explicit control over how many branches are produced. For example,
21 // consider standard C function `ispunct(int x)', which returns a non-zero value
22 // iff `x' is a punctuation character, that is, when `x' is in range
23 //   ['!', '/']   [':', '@']  U  ['[', '\`']  U  ['{', '~'].
24 // `Summary' provides only two branches for this function. However,
25 // any attempt to describe this range with if-statements in the body farm
26 // would result in many more branches. Because each branch needs to be analyzed
27 // independently, this significantly reduces performance. Additionally,
28 // once we consider a branch on which `x' is in range, say, ['!', '/'],
29 // we assume that such branch is an important separate path through the program,
30 // which may lead to false positives because considering this particular path
31 // was not consciously intended, and therefore it might have been unreachable.
32 //
33 // This checker uses eval::Call for modeling pure functions (functions without
34 // side effets), for which their `Summary' is a precise model. This avoids
35 // unnecessary invalidation passes. Conflicts with other checkers are unlikely
36 // because if the function has no other effects, other checkers would probably
37 // never want to improve upon the modeling done by this checker.
38 //
39 // Non-pure functions, for which only partial improvement over the default
40 // behavior is expected, are modeled via check::PostCall, non-intrusively.
41 //
42 // The following standard C functions are currently supported:
43 //
44 //   fgetc      getline   isdigit   isupper
45 //   fread      isalnum   isgraph   isxdigit
46 //   fwrite     isalpha   islower   read
47 //   getc       isascii   isprint   write
48 //   getchar    isblank   ispunct
49 //   getdelim   iscntrl   isspace
50 //
51 //===----------------------------------------------------------------------===//
52 
53 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
54 #include "clang/StaticAnalyzer/Core/Checker.h"
55 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
56 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
57 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
58 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
59 
60 using namespace clang;
61 using namespace clang::ento;
62 
63 namespace {
64 class StdLibraryFunctionsChecker : public Checker<check::PostCall, eval::Call> {
65   /// Below is a series of typedefs necessary to define function specs.
66   /// We avoid nesting types here because each additional qualifier
67   /// would need to be repeated in every function spec.
68   struct Summary;
69 
70   /// Specify how much the analyzer engine should entrust modeling this function
71   /// to us. If he doesn't, he performs additional invalidations.
72   enum InvalidationKind { NoEvalCall, EvalCallAsPure };
73 
74   // The universal integral type to use in value range descriptions.
75   // Unsigned to make sure overflows are well-defined.
76   typedef uint64_t RangeInt;
77 
78   /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is
79   /// a non-negative integer, which less than 5 and not equal to 2. For
80   /// `ComparesToArgument', holds information about how exactly to compare to
81   /// the argument.
82   typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector;
83 
84   /// A reference to an argument or return value by its number.
85   /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but
86   /// obviously uint32_t should be enough for all practical purposes.
87   typedef uint32_t ArgNo;
88   static const ArgNo Ret;
89 
90   /// Polymorphic base class that represents a constraint on a given argument
91   /// (or return value) of a function. Derived classes implement different kind
92   /// of constraints, e.g range constraints or correlation between two
93   /// arguments.
94   class ValueConstraint {
95   public:
96     ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {}
97     virtual ~ValueConstraint() {}
98     /// Apply the effects of the constraint on the given program state. If null
99     /// is returned then the constraint is not feasible.
100     virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
101                                   const Summary &Summary) const = 0;
102     ArgNo getArgNo() const { return ArgN; }
103 
104   protected:
105     ArgNo ArgN; // Argument to which we apply the constraint.
106   };
107 
108   /// Given a range, should the argument stay inside or outside this range?
109   enum RangeKind { OutOfRange, WithinRange };
110 
111   /// Encapsulates a single range on a single symbol within a branch.
112   class RangeConstraint : public ValueConstraint {
113     RangeKind Kind;      // Kind of range definition.
114     IntRangeVector Args; // Polymorphic arguments.
115 
116   public:
117     RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args)
118         : ValueConstraint(ArgN), Kind(Kind), Args(Args) {}
119 
120     const IntRangeVector &getRanges() const {
121       return Args;
122     }
123 
124   private:
125     ProgramStateRef applyAsOutOfRange(ProgramStateRef State,
126                                       const CallEvent &Call,
127                                       const Summary &Summary) const;
128     ProgramStateRef applyAsWithinRange(ProgramStateRef State,
129                                        const CallEvent &Call,
130                                        const Summary &Summary) const;
131   public:
132     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
133                           const Summary &Summary) const override {
134       switch (Kind) {
135       case OutOfRange:
136         return applyAsOutOfRange(State, Call, Summary);
137       case WithinRange:
138         return applyAsWithinRange(State, Call, Summary);
139       }
140       llvm_unreachable("Unknown range kind!");
141     }
142   };
143 
144   class ComparisonConstraint : public ValueConstraint {
145     BinaryOperator::Opcode Opcode;
146     ArgNo OtherArgN;
147 
148   public:
149     ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode,
150                          ArgNo OtherArgN)
151         : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {}
152     ArgNo getOtherArgNo() const { return OtherArgN; }
153     BinaryOperator::Opcode getOpcode() const { return Opcode; }
154     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
155                           const Summary &Summary) const override;
156   };
157 
158   // Pointer to the ValueConstraint. We need a copyable, polymorphic and
159   // default initialize able type (vector needs that). A raw pointer was good,
160   // however, we cannot default initialize that. unique_ptr makes the Summary
161   // class non-copyable, therefore not an option. Releasing the copyability
162   // requirement would render the initialization of the Summary map infeasible.
163   using ValueConstraintPtr = std::shared_ptr<ValueConstraint>;
164   /// The complete list of constraints that defines a single branch.
165   typedef std::vector<ValueConstraintPtr> ConstraintSet;
166 
167   using ArgTypes = std::vector<QualType>;
168   using Cases = std::vector<ConstraintSet>;
169 
170   /// Includes information about function prototype (which is necessary to
171   /// ensure we're modeling the right function and casting values properly),
172   /// approach to invalidation, and a list of branches - essentially, a list
173   /// of list of ranges - essentially, a list of lists of lists of segments.
174   struct Summary {
175     const ArgTypes ArgTys;
176     const QualType RetTy;
177     const InvalidationKind InvalidationKd;
178     Cases CaseConstraints;
179     ConstraintSet ArgConstraints;
180 
181     Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd)
182         : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {}
183 
184     Summary &Case(ConstraintSet&& CS) {
185       CaseConstraints.push_back(std::move(CS));
186       return *this;
187     }
188 
189   private:
190     static void assertTypeSuitableForSummary(QualType T) {
191       assert(!T->isVoidType() &&
192              "We should have had no significant void types in the spec");
193       assert(T.isCanonical() &&
194              "We should only have canonical types in the spec");
195       // FIXME: lift this assert (but not the ones above!)
196       assert(T->isIntegralOrEnumerationType() &&
197              "We only support integral ranges in the spec");
198     }
199 
200   public:
201     QualType getArgType(ArgNo ArgN) const {
202       QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN];
203       assertTypeSuitableForSummary(T);
204       return T;
205     }
206 
207     /// Try our best to figure out if the call expression is the call of
208     /// *the* library function to which this specification applies.
209     bool matchesCall(const CallExpr *CE) const;
210   };
211 
212   // The same function (as in, function identifier) may have different
213   // summaries assigned to it, with different argument and return value types.
214   // We call these "variants" of the function. This can be useful for handling
215   // C++ function overloads, and also it can be used when the same function
216   // may have different definitions on different platforms.
217   typedef std::vector<Summary> Summaries;
218 
219   // The map of all functions supported by the checker. It is initialized
220   // lazily, and it doesn't change after initialization.
221   mutable llvm::StringMap<Summaries> FunctionSummaryMap;
222 
223   // Auxiliary functions to support ArgNo within all structures
224   // in a unified manner.
225   static QualType getArgType(const Summary &Summary, ArgNo ArgN) {
226     return Summary.getArgType(ArgN);
227   }
228   static QualType getArgType(const CallEvent &Call, ArgNo ArgN) {
229     return ArgN == Ret ? Call.getResultType().getCanonicalType()
230                        : Call.getArgExpr(ArgN)->getType().getCanonicalType();
231   }
232   static QualType getArgType(const CallExpr *CE, ArgNo ArgN) {
233     return ArgN == Ret ? CE->getType().getCanonicalType()
234                        : CE->getArg(ArgN)->getType().getCanonicalType();
235   }
236   static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) {
237     return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN);
238   }
239 
240 public:
241   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
242   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
243 
244 private:
245   Optional<Summary> findFunctionSummary(const FunctionDecl *FD,
246                                         const CallExpr *CE,
247                                         CheckerContext &C) const;
248 
249   void initFunctionSummaries(CheckerContext &C) const;
250 };
251 
252 const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret =
253     std::numeric_limits<ArgNo>::max();
254 
255 } // end of anonymous namespace
256 
257 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange(
258     ProgramStateRef State, const CallEvent &Call,
259     const Summary &Summary) const {
260 
261   ProgramStateManager &Mgr = State->getStateManager();
262   SValBuilder &SVB = Mgr.getSValBuilder();
263   BasicValueFactory &BVF = SVB.getBasicValueFactory();
264   ConstraintManager &CM = Mgr.getConstraintManager();
265   QualType T = getArgType(Summary, getArgNo());
266   SVal V = getArgSVal(Call, getArgNo());
267 
268   if (auto N = V.getAs<NonLoc>()) {
269     const IntRangeVector &R = getRanges();
270     size_t E = R.size();
271     for (size_t I = 0; I != E; ++I) {
272       const llvm::APSInt &Min = BVF.getValue(R[I].first, T);
273       const llvm::APSInt &Max = BVF.getValue(R[I].second, T);
274       assert(Min <= Max);
275       State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
276       if (!State)
277         break;
278     }
279   }
280 
281   return State;
282 }
283 
284 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange(
285     ProgramStateRef State, const CallEvent &Call,
286     const Summary &Summary) const {
287 
288   ProgramStateManager &Mgr = State->getStateManager();
289   SValBuilder &SVB = Mgr.getSValBuilder();
290   BasicValueFactory &BVF = SVB.getBasicValueFactory();
291   ConstraintManager &CM = Mgr.getConstraintManager();
292   QualType T = getArgType(Summary, getArgNo());
293   SVal V = getArgSVal(Call, getArgNo());
294 
295   // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R".
296   // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary,
297   // and then cut away all holes in R one by one.
298   //
299   // E.g. consider a range list R as [A, B] and [C, D]
300   // -------+--------+------------------+------------+----------->
301   //        A        B                  C            D
302   // Then we assume that the value is not in [-inf, A - 1],
303   // then not in [D + 1, +inf], then not in [B + 1, C - 1]
304   if (auto N = V.getAs<NonLoc>()) {
305     const IntRangeVector &R = getRanges();
306     size_t E = R.size();
307 
308     const llvm::APSInt &MinusInf = BVF.getMinValue(T);
309     const llvm::APSInt &PlusInf = BVF.getMaxValue(T);
310 
311     const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T);
312     if (Left != PlusInf) {
313       assert(MinusInf <= Left);
314       State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false);
315       if (!State)
316         return nullptr;
317     }
318 
319     const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T);
320     if (Right != MinusInf) {
321       assert(Right <= PlusInf);
322       State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false);
323       if (!State)
324         return nullptr;
325     }
326 
327     for (size_t I = 1; I != E; ++I) {
328       const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T);
329       const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T);
330       if (Min <= Max) {
331         State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
332         if (!State)
333           return nullptr;
334       }
335     }
336   }
337 
338   return State;
339 }
340 
341 ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply(
342     ProgramStateRef State, const CallEvent &Call,
343     const Summary &Summary) const {
344 
345   ProgramStateManager &Mgr = State->getStateManager();
346   SValBuilder &SVB = Mgr.getSValBuilder();
347   QualType CondT = SVB.getConditionType();
348   QualType T = getArgType(Summary, getArgNo());
349   SVal V = getArgSVal(Call, getArgNo());
350 
351   BinaryOperator::Opcode Op = getOpcode();
352   ArgNo OtherArg = getOtherArgNo();
353   SVal OtherV = getArgSVal(Call, OtherArg);
354   QualType OtherT = getArgType(Call, OtherArg);
355   // Note: we avoid integral promotion for comparison.
356   OtherV = SVB.evalCast(OtherV, T, OtherT);
357   if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT)
358                        .getAs<DefinedOrUnknownSVal>())
359     State = State->assume(*CompV, true);
360   return State;
361 }
362 
363 void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call,
364                                                CheckerContext &C) const {
365   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
366   if (!FD)
367     return;
368 
369   const CallExpr *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
370   if (!CE)
371     return;
372 
373   Optional<Summary> FoundSummary = findFunctionSummary(FD, CE, C);
374   if (!FoundSummary)
375     return;
376 
377   // Now apply the constraints.
378   const Summary &Summary = *FoundSummary;
379   ProgramStateRef State = C.getState();
380 
381   // Apply case/branch specifications.
382   for (const auto &VRS : Summary.CaseConstraints) {
383     ProgramStateRef NewState = State;
384     for (const auto &VR: VRS) {
385       NewState = VR->apply(NewState, Call, Summary);
386       if (!NewState)
387         break;
388     }
389 
390     if (NewState && NewState != State)
391       C.addTransition(NewState);
392   }
393 }
394 
395 bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call,
396                                           CheckerContext &C) const {
397   const auto *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
398   if (!FD)
399     return false;
400 
401   const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
402   if (!CE)
403     return false;
404 
405   Optional<Summary> FoundSummary = findFunctionSummary(FD, CE, C);
406   if (!FoundSummary)
407     return false;
408 
409   const Summary &Summary = *FoundSummary;
410   switch (Summary.InvalidationKd) {
411   case EvalCallAsPure: {
412     ProgramStateRef State = C.getState();
413     const LocationContext *LC = C.getLocationContext();
414     SVal V = C.getSValBuilder().conjureSymbolVal(
415         CE, LC, CE->getType().getCanonicalType(), C.blockCount());
416     State = State->BindExpr(CE, LC, V);
417     C.addTransition(State);
418     return true;
419   }
420   case NoEvalCall:
421     // Summary tells us to avoid performing eval::Call. The function is possibly
422     // evaluated by another checker, or evaluated conservatively.
423     return false;
424   }
425   llvm_unreachable("Unknown invalidation kind!");
426 }
427 
428 bool StdLibraryFunctionsChecker::Summary::matchesCall(
429     const CallExpr *CE) const {
430   // Check number of arguments:
431   if (CE->getNumArgs() != ArgTys.size())
432     return false;
433 
434   // Check return type if relevant:
435   if (!RetTy.isNull() && RetTy != CE->getType().getCanonicalType())
436     return false;
437 
438   // Check argument types when relevant:
439   for (size_t I = 0, E = ArgTys.size(); I != E; ++I) {
440     QualType FormalT = ArgTys[I];
441     // Null type marks irrelevant arguments.
442     if (FormalT.isNull())
443       continue;
444 
445     assertTypeSuitableForSummary(FormalT);
446 
447     QualType ActualT = StdLibraryFunctionsChecker::getArgType(CE, I);
448     assert(ActualT.isCanonical());
449     if (ActualT != FormalT)
450       return false;
451   }
452 
453   return true;
454 }
455 
456 Optional<StdLibraryFunctionsChecker::Summary>
457 StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD,
458                                                 const CallExpr *CE,
459                                                 CheckerContext &C) const {
460   // Note: we cannot always obtain FD from CE
461   // (eg. virtual call, or call by pointer).
462   assert(CE);
463 
464   if (!FD)
465     return None;
466 
467   initFunctionSummaries(C);
468 
469   IdentifierInfo *II = FD->getIdentifier();
470   if (!II)
471     return None;
472   StringRef Name = II->getName();
473   if (Name.empty() || !C.isCLibraryFunction(FD, Name))
474     return None;
475 
476   auto FSMI = FunctionSummaryMap.find(Name);
477   if (FSMI == FunctionSummaryMap.end())
478     return None;
479 
480   // Verify that function signature matches the spec in advance.
481   // Otherwise we might be modeling the wrong function.
482   // Strict checking is important because we will be conducting
483   // very integral-type-sensitive operations on arguments and
484   // return values.
485   const Summaries &SpecVariants = FSMI->second;
486   for (const Summary &Spec : SpecVariants)
487     if (Spec.matchesCall(CE))
488       return Spec;
489 
490   return None;
491 }
492 
493 void StdLibraryFunctionsChecker::initFunctionSummaries(
494     CheckerContext &C) const {
495   if (!FunctionSummaryMap.empty())
496     return;
497 
498   SValBuilder &SVB = C.getSValBuilder();
499   BasicValueFactory &BVF = SVB.getBasicValueFactory();
500   const ASTContext &ACtx = BVF.getContext();
501 
502   // These types are useful for writing specifications quickly,
503   // New specifications should probably introduce more types.
504   // Some types are hard to obtain from the AST, eg. "ssize_t".
505   // In such cases it should be possible to provide multiple variants
506   // of function summary for common cases (eg. ssize_t could be int or long
507   // or long long, so three summary variants would be enough).
508   // Of course, function variants are also useful for C++ overloads.
509   const QualType
510       Irrelevant{}; // A placeholder, whenever we do not care about the type.
511   const QualType IntTy = ACtx.IntTy;
512   const QualType LongTy = ACtx.LongTy;
513   const QualType LongLongTy = ACtx.LongLongTy;
514   const QualType SizeTy = ACtx.getSizeType();
515 
516   const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue();
517   const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue();
518   const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue();
519 
520   // Set UCharRangeMax to min of int or uchar maximum value.
521   // The C standard states that the arguments of functions like isalpha must
522   // be representable as an unsigned char. Their type is 'int', so the max
523   // value of the argument should be min(UCharMax, IntMax). This just happen
524   // to be true for commonly used and well tested instruction set
525   // architectures, but not for others.
526   const RangeInt UCharRangeMax =
527       std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax);
528 
529   // The platform dependent value of EOF.
530   // Try our best to parse this from the Preprocessor, otherwise fallback to -1.
531   const auto EOFv = [&C]() -> RangeInt {
532     if (const llvm::Optional<int> OptInt =
533             tryExpandAsInteger("EOF", C.getPreprocessor()))
534       return *OptInt;
535     return -1;
536   }();
537 
538   // We are finally ready to define specifications for all supported functions.
539   //
540   // The signature needs to have the correct number of arguments.
541   // However, we insert `Irrelevant' when the type is insignificant.
542   //
543   // Argument ranges should always cover all variants. If return value
544   // is completely unknown, omit it from the respective range set.
545   //
546   // All types in the spec need to be canonical.
547   //
548   // Every item in the list of range sets represents a particular
549   // execution path the analyzer would need to explore once
550   // the call is modeled - a new program state is constructed
551   // for every range set, and each range line in the range set
552   // corresponds to a specific constraint within this state.
553   //
554   // Upon comparing to another argument, the other argument is casted
555   // to the current argument's type. This avoids proper promotion but
556   // seems useful. For example, read() receives size_t argument,
557   // and its return value, which is of type ssize_t, cannot be greater
558   // than this argument. If we made a promotion, and the size argument
559   // is equal to, say, 10, then we'd impose a range of [0, 10] on the
560   // return value, however the correct range is [-1, 10].
561   //
562   // Please update the list of functions in the header after editing!
563   //
564 
565   // Below are helpers functions to create the summaries.
566   auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind,
567                               IntRangeVector Ranges) {
568     return std::make_shared<RangeConstraint>(ArgN, Kind, Ranges);
569   };
570   struct {
571     auto operator()(RangeKind Kind, IntRangeVector Ranges) {
572       return std::make_shared<RangeConstraint>(Ret, Kind, Ranges);
573     }
574     auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) {
575       return std::make_shared<ComparisonConstraint>(Ret, Op, OtherArgN);
576     }
577   } ReturnValueCondition;
578   auto Range = [](RangeInt b, RangeInt e) {
579     return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}};
580   };
581   auto SingleValue = [](RangeInt v) {
582     return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}};
583   };
584   auto LessThanOrEq = BO_LE;
585 
586   using RetType = QualType;
587 
588   // Templates for summaries that are reused by many functions.
589   auto Getc = [&]() {
590     return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall)
591         .Case({ReturnValueCondition(WithinRange,
592                                     {{EOFv, EOFv}, {0, UCharRangeMax}})});
593   };
594   auto Read = [&](RetType R, RangeInt Max) {
595     return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R},
596                    NoEvalCall)
597         .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)),
598                ReturnValueCondition(WithinRange, Range(-1, Max))});
599   };
600   auto Fread = [&]() {
601     return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy, Irrelevant},
602                    RetType{SizeTy}, NoEvalCall)
603         .Case({
604             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
605         });
606   };
607   auto Getline = [&](RetType R, RangeInt Max) {
608     return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R},
609                    NoEvalCall)
610         .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})});
611   };
612 
613   FunctionSummaryMap = {
614       // The isascii() family of functions.
615       {
616           "isalnum",
617           Summaries{
618               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
619                   // Boils down to isupper() or islower() or isdigit().
620                   .Case(
621                       {ArgumentCondition(0U, WithinRange,
622                                          {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}),
623                        ReturnValueCondition(OutOfRange, SingleValue(0))})
624                   // The locale-specific range.
625                   // No post-condition. We are completely unaware of
626                   // locale-specific return values.
627                   .Case({ArgumentCondition(0U, WithinRange,
628                                            {{128, UCharRangeMax}})})
629                   .Case({ArgumentCondition(0U, OutOfRange,
630                                            {{'0', '9'},
631                                             {'A', 'Z'},
632                                             {'a', 'z'},
633                                             {128, UCharRangeMax}}),
634                          ReturnValueCondition(WithinRange, SingleValue(0))})},
635       },
636       {
637           "isalpha",
638           Summaries{
639               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
640                   .Case({ArgumentCondition(0U, WithinRange,
641                                            {{'A', 'Z'}, {'a', 'z'}}),
642                          ReturnValueCondition(OutOfRange, SingleValue(0))})
643                   // The locale-specific range.
644                   .Case({ArgumentCondition(0U, WithinRange,
645                                            {{128, UCharRangeMax}})})
646                   .Case({ArgumentCondition(
647                              0U, OutOfRange,
648                              {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}),
649                          ReturnValueCondition(WithinRange, SingleValue(0))})},
650       },
651       {
652           "isascii",
653           Summaries{
654               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
655                   .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
656                          ReturnValueCondition(OutOfRange, SingleValue(0))})
657                   .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)),
658                          ReturnValueCondition(WithinRange, SingleValue(0))})},
659       },
660       {
661           "isblank",
662           Summaries{
663               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
664                   .Case({ArgumentCondition(0U, WithinRange,
665                                            {{'\t', '\t'}, {' ', ' '}}),
666                          ReturnValueCondition(OutOfRange, SingleValue(0))})
667                   .Case({ArgumentCondition(0U, OutOfRange,
668                                            {{'\t', '\t'}, {' ', ' '}}),
669                          ReturnValueCondition(WithinRange, SingleValue(0))})},
670       },
671       {
672           "iscntrl",
673           Summaries{
674               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
675                   .Case({ArgumentCondition(0U, WithinRange,
676                                            {{0, 32}, {127, 127}}),
677                          ReturnValueCondition(OutOfRange, SingleValue(0))})
678                   .Case(
679                       {ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}),
680                        ReturnValueCondition(WithinRange, SingleValue(0))})},
681       },
682       {
683           "isdigit",
684           Summaries{
685               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
686                   .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')),
687                          ReturnValueCondition(OutOfRange, SingleValue(0))})
688                   .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')),
689                          ReturnValueCondition(WithinRange, SingleValue(0))})},
690       },
691       {
692           "isgraph",
693           Summaries{
694               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
695                   .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)),
696                          ReturnValueCondition(OutOfRange, SingleValue(0))})
697                   .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)),
698                          ReturnValueCondition(WithinRange, SingleValue(0))})},
699       },
700       {
701           "islower",
702           Summaries{
703               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
704                   // Is certainly lowercase.
705                   .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')),
706                          ReturnValueCondition(OutOfRange, SingleValue(0))})
707                   // Is ascii but not lowercase.
708                   .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
709                          ArgumentCondition(0U, OutOfRange, Range('a', 'z')),
710                          ReturnValueCondition(WithinRange, SingleValue(0))})
711                   // The locale-specific range.
712                   .Case({ArgumentCondition(0U, WithinRange,
713                                            {{128, UCharRangeMax}})})
714                   // Is not an unsigned char.
715                   .Case({ArgumentCondition(0U, OutOfRange,
716                                            Range(0, UCharRangeMax)),
717                          ReturnValueCondition(WithinRange, SingleValue(0))})},
718       },
719       {
720           "isprint",
721           Summaries{
722               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
723                   .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)),
724                          ReturnValueCondition(OutOfRange, SingleValue(0))})
725                   .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)),
726                          ReturnValueCondition(WithinRange, SingleValue(0))})},
727       },
728       {
729           "ispunct",
730           Summaries{
731               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
732                   .Case({ArgumentCondition(
733                              0U, WithinRange,
734                              {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
735                          ReturnValueCondition(OutOfRange, SingleValue(0))})
736                   .Case({ArgumentCondition(
737                              0U, OutOfRange,
738                              {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
739                          ReturnValueCondition(WithinRange, SingleValue(0))})},
740       },
741       {
742           "isspace",
743           Summaries{
744               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
745                   // Space, '\f', '\n', '\r', '\t', '\v'.
746                   .Case({ArgumentCondition(0U, WithinRange,
747                                            {{9, 13}, {' ', ' '}}),
748                          ReturnValueCondition(OutOfRange, SingleValue(0))})
749                   // The locale-specific range.
750                   .Case({ArgumentCondition(0U, WithinRange,
751                                            {{128, UCharRangeMax}})})
752                   .Case({ArgumentCondition(
753                              0U, OutOfRange,
754                              {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}),
755                          ReturnValueCondition(WithinRange, SingleValue(0))})},
756       },
757       {
758           "isupper",
759           Summaries{
760               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
761                   // Is certainly uppercase.
762                   .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')),
763                          ReturnValueCondition(OutOfRange, SingleValue(0))})
764                   // The locale-specific range.
765                   .Case({ArgumentCondition(0U, WithinRange,
766                                            {{128, UCharRangeMax}})})
767                   // Other.
768                   .Case({ArgumentCondition(0U, OutOfRange,
769                                            {{'A', 'Z'}, {128, UCharRangeMax}}),
770                          ReturnValueCondition(WithinRange, SingleValue(0))})},
771       },
772       {
773           "isxdigit",
774           Summaries{
775               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
776                   .Case(
777                       {ArgumentCondition(0U, WithinRange,
778                                          {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
779                        ReturnValueCondition(OutOfRange, SingleValue(0))})
780                   .Case(
781                       {ArgumentCondition(0U, OutOfRange,
782                                          {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
783                        ReturnValueCondition(WithinRange, SingleValue(0))})},
784       },
785 
786       // The getc() family of functions that returns either a char or an EOF.
787       {"getc", Summaries{Getc()}},
788       {"fgetc", Summaries{Getc()}},
789       {"getchar",
790        Summaries{Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall)
791                      .Case({ReturnValueCondition(
792                          WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})}},
793 
794       // read()-like functions that never return more than buffer size.
795       // We are not sure how ssize_t is defined on every platform, so we
796       // provide three variants that should cover common cases.
797       {"read", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax),
798                          Read(LongLongTy, LongLongMax)}},
799       {"write", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax),
800                           Read(LongLongTy, LongLongMax)}},
801       {"fread", Summaries{Fread()}},
802       {"fwrite", Summaries{Fread()}},
803       // getline()-like functions either fail or read at least the delimiter.
804       {"getline", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax),
805                             Getline(LongLongTy, LongLongMax)}},
806       {"getdelim", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax),
807                              Getline(LongLongTy, LongLongMax)}},
808   };
809 }
810 
811 void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) {
812   // If this checker grows large enough to support C++, Objective-C, or other
813   // standard libraries, we could use multiple register...Checker() functions,
814   // which would register various checkers with the help of the same Checker
815   // class, turning on different function summaries.
816   mgr.registerChecker<StdLibraryFunctionsChecker>();
817 }
818 
819 bool ento::shouldRegisterStdCLibraryFunctionsChecker(const LangOptions &LO) {
820   return true;
821 }
822