xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp (revision f59bb40e361c061adaa19e988f1f5769d3b8fac7)
1 //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker improves modeling of a few simple library functions.
10 //
11 // This checker provides a specification format - `Summary' - and
12 // contains descriptions of some library functions in this format. Each
13 // specification contains a list of branches for splitting the program state
14 // upon call, and range constraints on argument and return-value symbols that
15 // are satisfied on each branch. This spec can be expanded to include more
16 // items, like external effects of the function.
17 //
18 // The main difference between this approach and the body farms technique is
19 // in more explicit control over how many branches are produced. For example,
20 // consider standard C function `ispunct(int x)', which returns a non-zero value
21 // iff `x' is a punctuation character, that is, when `x' is in range
22 //   ['!', '/']   [':', '@']  U  ['[', '\`']  U  ['{', '~'].
23 // `Summary' provides only two branches for this function. However,
24 // any attempt to describe this range with if-statements in the body farm
25 // would result in many more branches. Because each branch needs to be analyzed
26 // independently, this significantly reduces performance. Additionally,
27 // once we consider a branch on which `x' is in range, say, ['!', '/'],
28 // we assume that such branch is an important separate path through the program,
29 // which may lead to false positives because considering this particular path
30 // was not consciously intended, and therefore it might have been unreachable.
31 //
32 // This checker uses eval::Call for modeling pure functions (functions without
33 // side effets), for which their `Summary' is a precise model. This avoids
34 // unnecessary invalidation passes. Conflicts with other checkers are unlikely
35 // because if the function has no other effects, other checkers would probably
36 // never want to improve upon the modeling done by this checker.
37 //
38 // Non-pure functions, for which only partial improvement over the default
39 // behavior is expected, are modeled via check::PostCall, non-intrusively.
40 //
41 // The following standard C functions are currently supported:
42 //
43 //   fgetc      getline   isdigit   isupper
44 //   fread      isalnum   isgraph   isxdigit
45 //   fwrite     isalpha   islower   read
46 //   getc       isascii   isprint   write
47 //   getchar    isblank   ispunct
48 //   getdelim   iscntrl   isspace
49 //
50 //===----------------------------------------------------------------------===//
51 
52 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
53 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
54 #include "clang/StaticAnalyzer/Core/Checker.h"
55 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
56 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
57 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
58 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
59 
60 using namespace clang;
61 using namespace clang::ento;
62 
63 namespace {
64 class StdLibraryFunctionsChecker
65     : public Checker<check::PreCall, check::PostCall, eval::Call> {
66   /// Below is a series of typedefs necessary to define function specs.
67   /// We avoid nesting types here because each additional qualifier
68   /// would need to be repeated in every function spec.
69   struct Summary;
70 
71   /// Specify how much the analyzer engine should entrust modeling this function
72   /// to us. If he doesn't, he performs additional invalidations.
73   enum InvalidationKind { NoEvalCall, EvalCallAsPure };
74 
75   // The universal integral type to use in value range descriptions.
76   // Unsigned to make sure overflows are well-defined.
77   typedef uint64_t RangeInt;
78 
79   /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is
80   /// a non-negative integer, which less than 5 and not equal to 2. For
81   /// `ComparesToArgument', holds information about how exactly to compare to
82   /// the argument.
83   typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector;
84 
85   /// A reference to an argument or return value by its number.
86   /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but
87   /// obviously uint32_t should be enough for all practical purposes.
88   typedef uint32_t ArgNo;
89   static const ArgNo Ret;
90 
91   class ValueConstraint;
92 
93   // Pointer to the ValueConstraint. We need a copyable, polymorphic and
94   // default initialize able type (vector needs that). A raw pointer was good,
95   // however, we cannot default initialize that. unique_ptr makes the Summary
96   // class non-copyable, therefore not an option. Releasing the copyability
97   // requirement would render the initialization of the Summary map infeasible.
98   using ValueConstraintPtr = std::shared_ptr<ValueConstraint>;
99 
100   /// Polymorphic base class that represents a constraint on a given argument
101   /// (or return value) of a function. Derived classes implement different kind
102   /// of constraints, e.g range constraints or correlation between two
103   /// arguments.
104   class ValueConstraint {
105   public:
106     ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {}
107     virtual ~ValueConstraint() {}
108     /// Apply the effects of the constraint on the given program state. If null
109     /// is returned then the constraint is not feasible.
110     virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
111                                   const Summary &Summary) const = 0;
112     virtual ValueConstraintPtr negate() const {
113       llvm_unreachable("Not implemented");
114     };
115     ArgNo getArgNo() const { return ArgN; }
116 
117   protected:
118     ArgNo ArgN; // Argument to which we apply the constraint.
119   };
120 
121   /// Given a range, should the argument stay inside or outside this range?
122   enum RangeKind { OutOfRange, WithinRange };
123 
124   /// Encapsulates a single range on a single symbol within a branch.
125   class RangeConstraint : public ValueConstraint {
126     RangeKind Kind;      // Kind of range definition.
127     IntRangeVector Args; // Polymorphic arguments.
128 
129   public:
130     RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args)
131         : ValueConstraint(ArgN), Kind(Kind), Args(Args) {}
132 
133     const IntRangeVector &getRanges() const {
134       return Args;
135     }
136 
137   private:
138     ProgramStateRef applyAsOutOfRange(ProgramStateRef State,
139                                       const CallEvent &Call,
140                                       const Summary &Summary) const;
141     ProgramStateRef applyAsWithinRange(ProgramStateRef State,
142                                        const CallEvent &Call,
143                                        const Summary &Summary) const;
144   public:
145     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
146                           const Summary &Summary) const override {
147       switch (Kind) {
148       case OutOfRange:
149         return applyAsOutOfRange(State, Call, Summary);
150       case WithinRange:
151         return applyAsWithinRange(State, Call, Summary);
152       }
153       llvm_unreachable("Unknown range kind!");
154     }
155 
156     ValueConstraintPtr negate() const override {
157       RangeConstraint Tmp(*this);
158       switch (Kind) {
159       case OutOfRange:
160         Tmp.Kind = WithinRange;
161         break;
162       case WithinRange:
163         Tmp.Kind = OutOfRange;
164         break;
165       }
166       return std::make_shared<RangeConstraint>(Tmp);
167     }
168   };
169 
170   class ComparisonConstraint : public ValueConstraint {
171     BinaryOperator::Opcode Opcode;
172     ArgNo OtherArgN;
173 
174   public:
175     ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode,
176                          ArgNo OtherArgN)
177         : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {}
178     ArgNo getOtherArgNo() const { return OtherArgN; }
179     BinaryOperator::Opcode getOpcode() const { return Opcode; }
180     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
181                           const Summary &Summary) const override;
182   };
183 
184   class NotNullConstraint : public ValueConstraint {
185     using ValueConstraint::ValueConstraint;
186     // This variable has a role when we negate the constraint.
187     bool CannotBeNull = true;
188 
189   public:
190     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
191                           const Summary &Summary) const override {
192       SVal V = getArgSVal(Call, getArgNo());
193       DefinedOrUnknownSVal L = V.castAs<DefinedOrUnknownSVal>();
194       if (!L.getAs<Loc>())
195         return State;
196 
197       return State->assume(L, CannotBeNull);
198     }
199 
200     ValueConstraintPtr negate() const override {
201       NotNullConstraint Tmp(*this);
202       Tmp.CannotBeNull = !this->CannotBeNull;
203       return std::make_shared<NotNullConstraint>(Tmp);
204     }
205   };
206 
207   /// The complete list of constraints that defines a single branch.
208   typedef std::vector<ValueConstraintPtr> ConstraintSet;
209 
210   using ArgTypes = std::vector<QualType>;
211   using Cases = std::vector<ConstraintSet>;
212 
213   /// Includes information about
214   ///   * function prototype (which is necessary to
215   ///     ensure we're modeling the right function and casting values properly),
216   ///   * approach to invalidation,
217   ///   * a list of branches - a list of list of ranges -
218   ///     A branch represents a path in the exploded graph of a function (which
219   ///     is a tree). So, a branch is a series of assumptions. In other words,
220   ///     branches represent split states and additional assumptions on top of
221   ///     the splitting assumption.
222   ///     For example, consider the branches in `isalpha(x)`
223   ///       Branch 1)
224   ///         x is in range ['A', 'Z'] or in ['a', 'z']
225   ///         then the return value is not 0. (I.e. out-of-range [0, 0])
226   ///       Branch 2)
227   ///         x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z']
228   ///         then the return value is 0.
229   ///   * a list of argument constraints, that must be true on every branch.
230   ///     If these constraints are not satisfied that means a fatal error
231   ///     usually resulting in undefined behaviour.
232   struct Summary {
233     const ArgTypes ArgTys;
234     const QualType RetTy;
235     const InvalidationKind InvalidationKd;
236     Cases CaseConstraints;
237     ConstraintSet ArgConstraints;
238 
239     Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd)
240         : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {}
241 
242     Summary &Case(ConstraintSet&& CS) {
243       CaseConstraints.push_back(std::move(CS));
244       return *this;
245     }
246     Summary &ArgConstraint(ValueConstraintPtr VC) {
247       ArgConstraints.push_back(VC);
248       return *this;
249     }
250 
251   private:
252     static void assertTypeSuitableForSummary(QualType T) {
253       assert(!T->isVoidType() &&
254              "We should have had no significant void types in the spec");
255       assert(T.isCanonical() &&
256              "We should only have canonical types in the spec");
257     }
258 
259   public:
260     QualType getArgType(ArgNo ArgN) const {
261       QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN];
262       assertTypeSuitableForSummary(T);
263       return T;
264     }
265 
266     /// Try our best to figure out if the call expression is the call of
267     /// *the* library function to which this specification applies.
268     bool matchesCall(const CallExpr *CE) const;
269   };
270 
271   // The same function (as in, function identifier) may have different
272   // summaries assigned to it, with different argument and return value types.
273   // We call these "variants" of the function. This can be useful for handling
274   // C++ function overloads, and also it can be used when the same function
275   // may have different definitions on different platforms.
276   typedef std::vector<Summary> Summaries;
277 
278   // The map of all functions supported by the checker. It is initialized
279   // lazily, and it doesn't change after initialization.
280   mutable llvm::StringMap<Summaries> FunctionSummaryMap;
281 
282   mutable std::unique_ptr<BugType> BT_InvalidArg;
283 
284   // Auxiliary functions to support ArgNo within all structures
285   // in a unified manner.
286   static QualType getArgType(const Summary &Summary, ArgNo ArgN) {
287     return Summary.getArgType(ArgN);
288   }
289   static QualType getArgType(const CallEvent &Call, ArgNo ArgN) {
290     return ArgN == Ret ? Call.getResultType().getCanonicalType()
291                        : Call.getArgExpr(ArgN)->getType().getCanonicalType();
292   }
293   static QualType getArgType(const CallExpr *CE, ArgNo ArgN) {
294     return ArgN == Ret ? CE->getType().getCanonicalType()
295                        : CE->getArg(ArgN)->getType().getCanonicalType();
296   }
297   static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) {
298     return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN);
299   }
300 
301 public:
302   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
303   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
304   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
305 
306   enum CheckKind { CK_StdCLibraryFunctionArgsChecker, CK_NumCheckKinds };
307   DefaultBool ChecksEnabled[CK_NumCheckKinds];
308   CheckerNameRef CheckNames[CK_NumCheckKinds];
309 
310 private:
311   Optional<Summary> findFunctionSummary(const FunctionDecl *FD,
312                                         const CallExpr *CE,
313                                         CheckerContext &C) const;
314   Optional<Summary> findFunctionSummary(const CallEvent &Call,
315                                         CheckerContext &C) const;
316 
317   void initFunctionSummaries(CheckerContext &C) const;
318 
319   void reportBug(const CallEvent &Call, ExplodedNode *N,
320                  CheckerContext &C) const {
321     if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker])
322       return;
323     // TODO Add detailed diagnostic.
324     StringRef Msg = "Function argument constraint is not satisfied";
325     if (!BT_InvalidArg)
326       BT_InvalidArg = std::make_unique<BugType>(
327           CheckNames[CK_StdCLibraryFunctionArgsChecker],
328           "Unsatisfied argument constraints", categories::LogicError);
329     auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N);
330     bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R);
331     C.emitReport(std::move(R));
332   }
333 };
334 
335 const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret =
336     std::numeric_limits<ArgNo>::max();
337 
338 } // end of anonymous namespace
339 
340 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange(
341     ProgramStateRef State, const CallEvent &Call,
342     const Summary &Summary) const {
343 
344   ProgramStateManager &Mgr = State->getStateManager();
345   SValBuilder &SVB = Mgr.getSValBuilder();
346   BasicValueFactory &BVF = SVB.getBasicValueFactory();
347   ConstraintManager &CM = Mgr.getConstraintManager();
348   QualType T = getArgType(Summary, getArgNo());
349   SVal V = getArgSVal(Call, getArgNo());
350 
351   if (auto N = V.getAs<NonLoc>()) {
352     const IntRangeVector &R = getRanges();
353     size_t E = R.size();
354     for (size_t I = 0; I != E; ++I) {
355       const llvm::APSInt &Min = BVF.getValue(R[I].first, T);
356       const llvm::APSInt &Max = BVF.getValue(R[I].second, T);
357       assert(Min <= Max);
358       State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
359       if (!State)
360         break;
361     }
362   }
363 
364   return State;
365 }
366 
367 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange(
368     ProgramStateRef State, const CallEvent &Call,
369     const Summary &Summary) const {
370 
371   ProgramStateManager &Mgr = State->getStateManager();
372   SValBuilder &SVB = Mgr.getSValBuilder();
373   BasicValueFactory &BVF = SVB.getBasicValueFactory();
374   ConstraintManager &CM = Mgr.getConstraintManager();
375   QualType T = getArgType(Summary, getArgNo());
376   SVal V = getArgSVal(Call, getArgNo());
377 
378   // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R".
379   // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary,
380   // and then cut away all holes in R one by one.
381   //
382   // E.g. consider a range list R as [A, B] and [C, D]
383   // -------+--------+------------------+------------+----------->
384   //        A        B                  C            D
385   // Then we assume that the value is not in [-inf, A - 1],
386   // then not in [D + 1, +inf], then not in [B + 1, C - 1]
387   if (auto N = V.getAs<NonLoc>()) {
388     const IntRangeVector &R = getRanges();
389     size_t E = R.size();
390 
391     const llvm::APSInt &MinusInf = BVF.getMinValue(T);
392     const llvm::APSInt &PlusInf = BVF.getMaxValue(T);
393 
394     const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T);
395     if (Left != PlusInf) {
396       assert(MinusInf <= Left);
397       State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false);
398       if (!State)
399         return nullptr;
400     }
401 
402     const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T);
403     if (Right != MinusInf) {
404       assert(Right <= PlusInf);
405       State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false);
406       if (!State)
407         return nullptr;
408     }
409 
410     for (size_t I = 1; I != E; ++I) {
411       const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T);
412       const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T);
413       if (Min <= Max) {
414         State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
415         if (!State)
416           return nullptr;
417       }
418     }
419   }
420 
421   return State;
422 }
423 
424 ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply(
425     ProgramStateRef State, const CallEvent &Call,
426     const Summary &Summary) const {
427 
428   ProgramStateManager &Mgr = State->getStateManager();
429   SValBuilder &SVB = Mgr.getSValBuilder();
430   QualType CondT = SVB.getConditionType();
431   QualType T = getArgType(Summary, getArgNo());
432   SVal V = getArgSVal(Call, getArgNo());
433 
434   BinaryOperator::Opcode Op = getOpcode();
435   ArgNo OtherArg = getOtherArgNo();
436   SVal OtherV = getArgSVal(Call, OtherArg);
437   QualType OtherT = getArgType(Call, OtherArg);
438   // Note: we avoid integral promotion for comparison.
439   OtherV = SVB.evalCast(OtherV, T, OtherT);
440   if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT)
441                        .getAs<DefinedOrUnknownSVal>())
442     State = State->assume(*CompV, true);
443   return State;
444 }
445 
446 void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call,
447                                               CheckerContext &C) const {
448   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
449   if (!FoundSummary)
450     return;
451 
452   const Summary &Summary = *FoundSummary;
453   ProgramStateRef State = C.getState();
454 
455   for (const ValueConstraintPtr& VC : Summary.ArgConstraints) {
456     ProgramStateRef SuccessSt = VC->apply(State, Call, Summary);
457     ProgramStateRef FailureSt = VC->negate()->apply(State, Call, Summary);
458     // The argument constraint is not satisfied.
459     if (FailureSt && !SuccessSt) {
460       if (ExplodedNode *N = C.generateErrorNode(State))
461         reportBug(Call, N, C);
462       break;
463     } else {
464       // Apply the constraint even if we cannot reason about the argument. This
465       // means both SuccessSt and FailureSt can be true. If we weren't applying
466       // the constraint that would mean that symbolic execution continues on a
467       // code whose behaviour is undefined.
468       assert(SuccessSt);
469       C.addTransition(SuccessSt);
470     }
471   }
472 }
473 
474 void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call,
475                                                CheckerContext &C) const {
476   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
477   if (!FoundSummary)
478     return;
479 
480   // Now apply the constraints.
481   const Summary &Summary = *FoundSummary;
482   ProgramStateRef State = C.getState();
483 
484   // Apply case/branch specifications.
485   for (const auto &VRS : Summary.CaseConstraints) {
486     ProgramStateRef NewState = State;
487     for (const auto &VR: VRS) {
488       NewState = VR->apply(NewState, Call, Summary);
489       if (!NewState)
490         break;
491     }
492 
493     if (NewState && NewState != State)
494       C.addTransition(NewState);
495   }
496 }
497 
498 bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call,
499                                           CheckerContext &C) const {
500   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
501   if (!FoundSummary)
502     return false;
503 
504   const Summary &Summary = *FoundSummary;
505   switch (Summary.InvalidationKd) {
506   case EvalCallAsPure: {
507     ProgramStateRef State = C.getState();
508     const LocationContext *LC = C.getLocationContext();
509     const auto *CE = cast_or_null<CallExpr>(Call.getOriginExpr());
510     SVal V = C.getSValBuilder().conjureSymbolVal(
511         CE, LC, CE->getType().getCanonicalType(), C.blockCount());
512     State = State->BindExpr(CE, LC, V);
513     C.addTransition(State);
514     return true;
515   }
516   case NoEvalCall:
517     // Summary tells us to avoid performing eval::Call. The function is possibly
518     // evaluated by another checker, or evaluated conservatively.
519     return false;
520   }
521   llvm_unreachable("Unknown invalidation kind!");
522 }
523 
524 bool StdLibraryFunctionsChecker::Summary::matchesCall(
525     const CallExpr *CE) const {
526   // Check number of arguments:
527   if (CE->getNumArgs() != ArgTys.size())
528     return false;
529 
530   // Check return type if relevant:
531   if (!RetTy.isNull() && RetTy != CE->getType().getCanonicalType())
532     return false;
533 
534   // Check argument types when relevant:
535   for (size_t I = 0, E = ArgTys.size(); I != E; ++I) {
536     QualType FormalT = ArgTys[I];
537     // Null type marks irrelevant arguments.
538     if (FormalT.isNull())
539       continue;
540 
541     assertTypeSuitableForSummary(FormalT);
542 
543     QualType ActualT = StdLibraryFunctionsChecker::getArgType(CE, I);
544     assert(ActualT.isCanonical());
545     if (ActualT != FormalT)
546       return false;
547   }
548 
549   return true;
550 }
551 
552 Optional<StdLibraryFunctionsChecker::Summary>
553 StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD,
554                                                 const CallExpr *CE,
555                                                 CheckerContext &C) const {
556   // Note: we cannot always obtain FD from CE
557   // (eg. virtual call, or call by pointer).
558   assert(CE);
559 
560   if (!FD)
561     return None;
562 
563   initFunctionSummaries(C);
564 
565   IdentifierInfo *II = FD->getIdentifier();
566   if (!II)
567     return None;
568   StringRef Name = II->getName();
569   if (Name.empty() || !C.isCLibraryFunction(FD, Name))
570     return None;
571 
572   auto FSMI = FunctionSummaryMap.find(Name);
573   if (FSMI == FunctionSummaryMap.end())
574     return None;
575 
576   // Verify that function signature matches the spec in advance.
577   // Otherwise we might be modeling the wrong function.
578   // Strict checking is important because we will be conducting
579   // very integral-type-sensitive operations on arguments and
580   // return values.
581   const Summaries &SpecVariants = FSMI->second;
582   for (const Summary &Spec : SpecVariants)
583     if (Spec.matchesCall(CE))
584       return Spec;
585 
586   return None;
587 }
588 
589 Optional<StdLibraryFunctionsChecker::Summary>
590 StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call,
591                                                 CheckerContext &C) const {
592   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
593   if (!FD)
594     return None;
595   const CallExpr *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
596   if (!CE)
597     return None;
598   return findFunctionSummary(FD, CE, C);
599 }
600 
601 void StdLibraryFunctionsChecker::initFunctionSummaries(
602     CheckerContext &C) const {
603   if (!FunctionSummaryMap.empty())
604     return;
605 
606   SValBuilder &SVB = C.getSValBuilder();
607   BasicValueFactory &BVF = SVB.getBasicValueFactory();
608   const ASTContext &ACtx = BVF.getContext();
609 
610   // These types are useful for writing specifications quickly,
611   // New specifications should probably introduce more types.
612   // Some types are hard to obtain from the AST, eg. "ssize_t".
613   // In such cases it should be possible to provide multiple variants
614   // of function summary for common cases (eg. ssize_t could be int or long
615   // or long long, so three summary variants would be enough).
616   // Of course, function variants are also useful for C++ overloads.
617   const QualType
618       Irrelevant{}; // A placeholder, whenever we do not care about the type.
619   const QualType IntTy = ACtx.IntTy;
620   const QualType LongTy = ACtx.LongTy;
621   const QualType LongLongTy = ACtx.LongLongTy;
622   const QualType SizeTy = ACtx.getSizeType();
623   const QualType VoidPtrTy = ACtx.VoidPtrTy; // void *T
624   const QualType ConstVoidPtrTy =
625       ACtx.getPointerType(ACtx.VoidTy.withConst()); // const void *T
626 
627   const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue();
628   const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue();
629   const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue();
630 
631   // Set UCharRangeMax to min of int or uchar maximum value.
632   // The C standard states that the arguments of functions like isalpha must
633   // be representable as an unsigned char. Their type is 'int', so the max
634   // value of the argument should be min(UCharMax, IntMax). This just happen
635   // to be true for commonly used and well tested instruction set
636   // architectures, but not for others.
637   const RangeInt UCharRangeMax =
638       std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax);
639 
640   // The platform dependent value of EOF.
641   // Try our best to parse this from the Preprocessor, otherwise fallback to -1.
642   const auto EOFv = [&C]() -> RangeInt {
643     if (const llvm::Optional<int> OptInt =
644             tryExpandAsInteger("EOF", C.getPreprocessor()))
645       return *OptInt;
646     return -1;
647   }();
648 
649   // We are finally ready to define specifications for all supported functions.
650   //
651   // The signature needs to have the correct number of arguments.
652   // However, we insert `Irrelevant' when the type is insignificant.
653   //
654   // Argument ranges should always cover all variants. If return value
655   // is completely unknown, omit it from the respective range set.
656   //
657   // All types in the spec need to be canonical.
658   //
659   // Every item in the list of range sets represents a particular
660   // execution path the analyzer would need to explore once
661   // the call is modeled - a new program state is constructed
662   // for every range set, and each range line in the range set
663   // corresponds to a specific constraint within this state.
664   //
665   // Upon comparing to another argument, the other argument is casted
666   // to the current argument's type. This avoids proper promotion but
667   // seems useful. For example, read() receives size_t argument,
668   // and its return value, which is of type ssize_t, cannot be greater
669   // than this argument. If we made a promotion, and the size argument
670   // is equal to, say, 10, then we'd impose a range of [0, 10] on the
671   // return value, however the correct range is [-1, 10].
672   //
673   // Please update the list of functions in the header after editing!
674   //
675 
676   // Below are helpers functions to create the summaries.
677   auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind,
678                               IntRangeVector Ranges) {
679     return std::make_shared<RangeConstraint>(ArgN, Kind, Ranges);
680   };
681   struct {
682     auto operator()(RangeKind Kind, IntRangeVector Ranges) {
683       return std::make_shared<RangeConstraint>(Ret, Kind, Ranges);
684     }
685     auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) {
686       return std::make_shared<ComparisonConstraint>(Ret, Op, OtherArgN);
687     }
688   } ReturnValueCondition;
689   auto Range = [](RangeInt b, RangeInt e) {
690     return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}};
691   };
692   auto SingleValue = [](RangeInt v) {
693     return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}};
694   };
695   auto LessThanOrEq = BO_LE;
696   auto NotNull = [&](ArgNo ArgN) {
697     return std::make_shared<NotNullConstraint>(ArgN);
698   };
699 
700   using RetType = QualType;
701   // Templates for summaries that are reused by many functions.
702   auto Getc = [&]() {
703     return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall)
704         .Case({ReturnValueCondition(WithinRange,
705                                     {{EOFv, EOFv}, {0, UCharRangeMax}})});
706   };
707   auto Read = [&](RetType R, RangeInt Max) {
708     return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R},
709                    NoEvalCall)
710         .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)),
711                ReturnValueCondition(WithinRange, Range(-1, Max))});
712   };
713   auto Fread = [&]() {
714     return Summary(ArgTypes{VoidPtrTy, Irrelevant, SizeTy, Irrelevant},
715                    RetType{SizeTy}, NoEvalCall)
716         .Case({
717             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
718         })
719         .ArgConstraint(NotNull(ArgNo(0)));
720   };
721   auto Fwrite = [&]() {
722     return Summary(ArgTypes{ConstVoidPtrTy, Irrelevant, SizeTy, Irrelevant},
723                    RetType{SizeTy}, NoEvalCall)
724         .Case({
725             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
726         })
727         .ArgConstraint(NotNull(ArgNo(0)));
728   };
729   auto Getline = [&](RetType R, RangeInt Max) {
730     return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R},
731                    NoEvalCall)
732         .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})});
733   };
734 
735   FunctionSummaryMap = {
736       // The isascii() family of functions.
737       // The behavior is undefined if the value of the argument is not
738       // representable as unsigned char or is not equal to EOF. See e.g. C99
739       // 7.4.1.2 The isalpha function (p: 181-182).
740       {
741           "isalnum",
742           Summaries{
743               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
744                   // Boils down to isupper() or islower() or isdigit().
745                   .Case(
746                       {ArgumentCondition(0U, WithinRange,
747                                          {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}),
748                        ReturnValueCondition(OutOfRange, SingleValue(0))})
749                   // The locale-specific range.
750                   // No post-condition. We are completely unaware of
751                   // locale-specific return values.
752                   .Case({ArgumentCondition(0U, WithinRange,
753                                            {{128, UCharRangeMax}})})
754                   .Case({ArgumentCondition(0U, OutOfRange,
755                                            {{'0', '9'},
756                                             {'A', 'Z'},
757                                             {'a', 'z'},
758                                             {128, UCharRangeMax}}),
759                          ReturnValueCondition(WithinRange, SingleValue(0))})
760                   .ArgConstraint(ArgumentCondition(
761                       0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))},
762       },
763       {
764           "isalpha",
765           Summaries{
766               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
767                   .Case({ArgumentCondition(0U, WithinRange,
768                                            {{'A', 'Z'}, {'a', 'z'}}),
769                          ReturnValueCondition(OutOfRange, SingleValue(0))})
770                   // The locale-specific range.
771                   .Case({ArgumentCondition(0U, WithinRange,
772                                            {{128, UCharRangeMax}})})
773                   .Case({ArgumentCondition(
774                              0U, OutOfRange,
775                              {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}),
776                          ReturnValueCondition(WithinRange, SingleValue(0))})},
777       },
778       {
779           "isascii",
780           Summaries{
781               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
782                   .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
783                          ReturnValueCondition(OutOfRange, SingleValue(0))})
784                   .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)),
785                          ReturnValueCondition(WithinRange, SingleValue(0))})},
786       },
787       {
788           "isblank",
789           Summaries{
790               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
791                   .Case({ArgumentCondition(0U, WithinRange,
792                                            {{'\t', '\t'}, {' ', ' '}}),
793                          ReturnValueCondition(OutOfRange, SingleValue(0))})
794                   .Case({ArgumentCondition(0U, OutOfRange,
795                                            {{'\t', '\t'}, {' ', ' '}}),
796                          ReturnValueCondition(WithinRange, SingleValue(0))})},
797       },
798       {
799           "iscntrl",
800           Summaries{
801               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
802                   .Case({ArgumentCondition(0U, WithinRange,
803                                            {{0, 32}, {127, 127}}),
804                          ReturnValueCondition(OutOfRange, SingleValue(0))})
805                   .Case(
806                       {ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}),
807                        ReturnValueCondition(WithinRange, SingleValue(0))})},
808       },
809       {
810           "isdigit",
811           Summaries{
812               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
813                   .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')),
814                          ReturnValueCondition(OutOfRange, SingleValue(0))})
815                   .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')),
816                          ReturnValueCondition(WithinRange, SingleValue(0))})},
817       },
818       {
819           "isgraph",
820           Summaries{
821               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
822                   .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)),
823                          ReturnValueCondition(OutOfRange, SingleValue(0))})
824                   .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)),
825                          ReturnValueCondition(WithinRange, SingleValue(0))})},
826       },
827       {
828           "islower",
829           Summaries{
830               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
831                   // Is certainly lowercase.
832                   .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')),
833                          ReturnValueCondition(OutOfRange, SingleValue(0))})
834                   // Is ascii but not lowercase.
835                   .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
836                          ArgumentCondition(0U, OutOfRange, Range('a', 'z')),
837                          ReturnValueCondition(WithinRange, SingleValue(0))})
838                   // The locale-specific range.
839                   .Case({ArgumentCondition(0U, WithinRange,
840                                            {{128, UCharRangeMax}})})
841                   // Is not an unsigned char.
842                   .Case({ArgumentCondition(0U, OutOfRange,
843                                            Range(0, UCharRangeMax)),
844                          ReturnValueCondition(WithinRange, SingleValue(0))})},
845       },
846       {
847           "isprint",
848           Summaries{
849               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
850                   .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)),
851                          ReturnValueCondition(OutOfRange, SingleValue(0))})
852                   .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)),
853                          ReturnValueCondition(WithinRange, SingleValue(0))})},
854       },
855       {
856           "ispunct",
857           Summaries{
858               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
859                   .Case({ArgumentCondition(
860                              0U, WithinRange,
861                              {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
862                          ReturnValueCondition(OutOfRange, SingleValue(0))})
863                   .Case({ArgumentCondition(
864                              0U, OutOfRange,
865                              {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
866                          ReturnValueCondition(WithinRange, SingleValue(0))})},
867       },
868       {
869           "isspace",
870           Summaries{
871               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
872                   // Space, '\f', '\n', '\r', '\t', '\v'.
873                   .Case({ArgumentCondition(0U, WithinRange,
874                                            {{9, 13}, {' ', ' '}}),
875                          ReturnValueCondition(OutOfRange, SingleValue(0))})
876                   // The locale-specific range.
877                   .Case({ArgumentCondition(0U, WithinRange,
878                                            {{128, UCharRangeMax}})})
879                   .Case({ArgumentCondition(
880                              0U, OutOfRange,
881                              {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}),
882                          ReturnValueCondition(WithinRange, SingleValue(0))})},
883       },
884       {
885           "isupper",
886           Summaries{
887               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
888                   // Is certainly uppercase.
889                   .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')),
890                          ReturnValueCondition(OutOfRange, SingleValue(0))})
891                   // The locale-specific range.
892                   .Case({ArgumentCondition(0U, WithinRange,
893                                            {{128, UCharRangeMax}})})
894                   // Other.
895                   .Case({ArgumentCondition(0U, OutOfRange,
896                                            {{'A', 'Z'}, {128, UCharRangeMax}}),
897                          ReturnValueCondition(WithinRange, SingleValue(0))})},
898       },
899       {
900           "isxdigit",
901           Summaries{
902               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
903                   .Case(
904                       {ArgumentCondition(0U, WithinRange,
905                                          {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
906                        ReturnValueCondition(OutOfRange, SingleValue(0))})
907                   .Case(
908                       {ArgumentCondition(0U, OutOfRange,
909                                          {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
910                        ReturnValueCondition(WithinRange, SingleValue(0))})},
911       },
912 
913       // The getc() family of functions that returns either a char or an EOF.
914       {"getc", Summaries{Getc()}},
915       {"fgetc", Summaries{Getc()}},
916       {"getchar",
917        Summaries{Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall)
918                      .Case({ReturnValueCondition(
919                          WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})}},
920 
921       // read()-like functions that never return more than buffer size.
922       // We are not sure how ssize_t is defined on every platform, so we
923       // provide three variants that should cover common cases.
924       {"read", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax),
925                          Read(LongLongTy, LongLongMax)}},
926       {"write", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax),
927                           Read(LongLongTy, LongLongMax)}},
928       {"fread", Summaries{Fread()}},
929       {"fwrite", Summaries{Fwrite()}},
930       // getline()-like functions either fail or read at least the delimiter.
931       {"getline", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax),
932                             Getline(LongLongTy, LongLongMax)}},
933       {"getdelim", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax),
934                              Getline(LongLongTy, LongLongMax)}},
935   };
936 }
937 
938 void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) {
939   mgr.registerChecker<StdLibraryFunctionsChecker>();
940 }
941 
942 bool ento::shouldRegisterStdCLibraryFunctionsChecker(const LangOptions &LO) {
943   return true;
944 }
945 
946 #define REGISTER_CHECKER(name)                                                 \
947   void ento::register##name(CheckerManager &mgr) {                             \
948     StdLibraryFunctionsChecker *checker =                                      \
949         mgr.getChecker<StdLibraryFunctionsChecker>();                          \
950     checker->ChecksEnabled[StdLibraryFunctionsChecker::CK_##name] = true;      \
951     checker->CheckNames[StdLibraryFunctionsChecker::CK_##name] =               \
952         mgr.getCurrentCheckerName();                                           \
953   }                                                                            \
954                                                                                \
955   bool ento::shouldRegister##name(const LangOptions &LO) { return true; }
956 
957 REGISTER_CHECKER(StdCLibraryFunctionArgsChecker)
958