xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp (revision defd95ef45171252ee8491729d3f3c863bbfe530)
1 //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker improves modeling of a few simple library functions.
10 //
11 // This checker provides a specification format - `Summary' - and
12 // contains descriptions of some library functions in this format. Each
13 // specification contains a list of branches for splitting the program state
14 // upon call, and range constraints on argument and return-value symbols that
15 // are satisfied on each branch. This spec can be expanded to include more
16 // items, like external effects of the function.
17 //
18 // The main difference between this approach and the body farms technique is
19 // in more explicit control over how many branches are produced. For example,
20 // consider standard C function `ispunct(int x)', which returns a non-zero value
21 // iff `x' is a punctuation character, that is, when `x' is in range
22 //   ['!', '/']   [':', '@']  U  ['[', '\`']  U  ['{', '~'].
23 // `Summary' provides only two branches for this function. However,
24 // any attempt to describe this range with if-statements in the body farm
25 // would result in many more branches. Because each branch needs to be analyzed
26 // independently, this significantly reduces performance. Additionally,
27 // once we consider a branch on which `x' is in range, say, ['!', '/'],
28 // we assume that such branch is an important separate path through the program,
29 // which may lead to false positives because considering this particular path
30 // was not consciously intended, and therefore it might have been unreachable.
31 //
32 // This checker uses eval::Call for modeling pure functions (functions without
33 // side effets), for which their `Summary' is a precise model. This avoids
34 // unnecessary invalidation passes. Conflicts with other checkers are unlikely
35 // because if the function has no other effects, other checkers would probably
36 // never want to improve upon the modeling done by this checker.
37 //
38 // Non-pure functions, for which only partial improvement over the default
39 // behavior is expected, are modeled via check::PostCall, non-intrusively.
40 //
41 // The following standard C functions are currently supported:
42 //
43 //   fgetc      getline   isdigit   isupper
44 //   fread      isalnum   isgraph   isxdigit
45 //   fwrite     isalpha   islower   read
46 //   getc       isascii   isprint   write
47 //   getchar    isblank   ispunct
48 //   getdelim   iscntrl   isspace
49 //
50 //===----------------------------------------------------------------------===//
51 
52 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
53 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
54 #include "clang/StaticAnalyzer/Core/Checker.h"
55 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
56 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
57 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
58 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
59 
60 using namespace clang;
61 using namespace clang::ento;
62 
63 namespace {
64 class StdLibraryFunctionsChecker
65     : public Checker<check::PreCall, check::PostCall, eval::Call> {
66   /// Below is a series of typedefs necessary to define function specs.
67   /// We avoid nesting types here because each additional qualifier
68   /// would need to be repeated in every function spec.
69   struct Summary;
70 
71   /// Specify how much the analyzer engine should entrust modeling this function
72   /// to us. If he doesn't, he performs additional invalidations.
73   enum InvalidationKind { NoEvalCall, EvalCallAsPure };
74 
75   // The universal integral type to use in value range descriptions.
76   // Unsigned to make sure overflows are well-defined.
77   typedef uint64_t RangeInt;
78 
79   /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is
80   /// a non-negative integer, which less than 5 and not equal to 2. For
81   /// `ComparesToArgument', holds information about how exactly to compare to
82   /// the argument.
83   typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector;
84 
85   /// A reference to an argument or return value by its number.
86   /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but
87   /// obviously uint32_t should be enough for all practical purposes.
88   typedef uint32_t ArgNo;
89   static const ArgNo Ret;
90 
91   class ValueConstraint;
92 
93   // Pointer to the ValueConstraint. We need a copyable, polymorphic and
94   // default initialize able type (vector needs that). A raw pointer was good,
95   // however, we cannot default initialize that. unique_ptr makes the Summary
96   // class non-copyable, therefore not an option. Releasing the copyability
97   // requirement would render the initialization of the Summary map infeasible.
98   using ValueConstraintPtr = std::shared_ptr<ValueConstraint>;
99 
100   /// Polymorphic base class that represents a constraint on a given argument
101   /// (or return value) of a function. Derived classes implement different kind
102   /// of constraints, e.g range constraints or correlation between two
103   /// arguments.
104   class ValueConstraint {
105   public:
106     ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {}
107     virtual ~ValueConstraint() {}
108     /// Apply the effects of the constraint on the given program state. If null
109     /// is returned then the constraint is not feasible.
110     virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
111                                   const Summary &Summary) const = 0;
112     virtual ValueConstraintPtr negate() const {
113       llvm_unreachable("Not implemented");
114     };
115     ArgNo getArgNo() const { return ArgN; }
116 
117   protected:
118     ArgNo ArgN; // Argument to which we apply the constraint.
119   };
120 
121   /// Given a range, should the argument stay inside or outside this range?
122   enum RangeKind { OutOfRange, WithinRange };
123 
124   /// Encapsulates a single range on a single symbol within a branch.
125   class RangeConstraint : public ValueConstraint {
126     RangeKind Kind;      // Kind of range definition.
127     IntRangeVector Args; // Polymorphic arguments.
128 
129   public:
130     RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args)
131         : ValueConstraint(ArgN), Kind(Kind), Args(Args) {}
132 
133     const IntRangeVector &getRanges() const {
134       return Args;
135     }
136 
137   private:
138     ProgramStateRef applyAsOutOfRange(ProgramStateRef State,
139                                       const CallEvent &Call,
140                                       const Summary &Summary) const;
141     ProgramStateRef applyAsWithinRange(ProgramStateRef State,
142                                        const CallEvent &Call,
143                                        const Summary &Summary) const;
144   public:
145     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
146                           const Summary &Summary) const override {
147       switch (Kind) {
148       case OutOfRange:
149         return applyAsOutOfRange(State, Call, Summary);
150       case WithinRange:
151         return applyAsWithinRange(State, Call, Summary);
152       }
153       llvm_unreachable("Unknown range kind!");
154     }
155 
156     ValueConstraintPtr negate() const override {
157       RangeConstraint Tmp(*this);
158       switch (Kind) {
159       case OutOfRange:
160         Tmp.Kind = WithinRange;
161         break;
162       case WithinRange:
163         Tmp.Kind = OutOfRange;
164         break;
165       }
166       return std::make_shared<RangeConstraint>(Tmp);
167     }
168   };
169 
170   class ComparisonConstraint : public ValueConstraint {
171     BinaryOperator::Opcode Opcode;
172     ArgNo OtherArgN;
173 
174   public:
175     ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode,
176                          ArgNo OtherArgN)
177         : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {}
178     ArgNo getOtherArgNo() const { return OtherArgN; }
179     BinaryOperator::Opcode getOpcode() const { return Opcode; }
180     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
181                           const Summary &Summary) const override;
182   };
183 
184   class NotNullConstraint : public ValueConstraint {
185     using ValueConstraint::ValueConstraint;
186     // This variable has a role when we negate the constraint.
187     bool CannotBeNull = true;
188 
189   public:
190     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
191                           const Summary &Summary) const override {
192       SVal V = getArgSVal(Call, getArgNo());
193       if (V.isUndef())
194         return State;
195 
196       DefinedOrUnknownSVal L = V.castAs<DefinedOrUnknownSVal>();
197       if (!L.getAs<Loc>())
198         return State;
199 
200       return State->assume(L, CannotBeNull);
201     }
202 
203     ValueConstraintPtr negate() const override {
204       NotNullConstraint Tmp(*this);
205       Tmp.CannotBeNull = !this->CannotBeNull;
206       return std::make_shared<NotNullConstraint>(Tmp);
207     }
208   };
209 
210   /// The complete list of constraints that defines a single branch.
211   typedef std::vector<ValueConstraintPtr> ConstraintSet;
212 
213   using ArgTypes = std::vector<QualType>;
214   using Cases = std::vector<ConstraintSet>;
215 
216   /// Includes information about
217   ///   * function prototype (which is necessary to
218   ///     ensure we're modeling the right function and casting values properly),
219   ///   * approach to invalidation,
220   ///   * a list of branches - a list of list of ranges -
221   ///     A branch represents a path in the exploded graph of a function (which
222   ///     is a tree). So, a branch is a series of assumptions. In other words,
223   ///     branches represent split states and additional assumptions on top of
224   ///     the splitting assumption.
225   ///     For example, consider the branches in `isalpha(x)`
226   ///       Branch 1)
227   ///         x is in range ['A', 'Z'] or in ['a', 'z']
228   ///         then the return value is not 0. (I.e. out-of-range [0, 0])
229   ///       Branch 2)
230   ///         x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z']
231   ///         then the return value is 0.
232   ///   * a list of argument constraints, that must be true on every branch.
233   ///     If these constraints are not satisfied that means a fatal error
234   ///     usually resulting in undefined behaviour.
235   struct Summary {
236     const ArgTypes ArgTys;
237     const QualType RetTy;
238     const InvalidationKind InvalidationKd;
239     Cases CaseConstraints;
240     ConstraintSet ArgConstraints;
241 
242     Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd)
243         : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {}
244 
245     Summary &Case(ConstraintSet&& CS) {
246       CaseConstraints.push_back(std::move(CS));
247       return *this;
248     }
249     Summary &ArgConstraint(ValueConstraintPtr VC) {
250       ArgConstraints.push_back(VC);
251       return *this;
252     }
253 
254   private:
255     static void assertTypeSuitableForSummary(QualType T) {
256       assert(!T->isVoidType() &&
257              "We should have had no significant void types in the spec");
258       assert(T.isCanonical() &&
259              "We should only have canonical types in the spec");
260     }
261 
262   public:
263     QualType getArgType(ArgNo ArgN) const {
264       QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN];
265       assertTypeSuitableForSummary(T);
266       return T;
267     }
268 
269     /// Try our best to figure out if the call expression is the call of
270     /// *the* library function to which this specification applies.
271     bool matchesCall(const CallExpr *CE) const;
272   };
273 
274   // The same function (as in, function identifier) may have different
275   // summaries assigned to it, with different argument and return value types.
276   // We call these "variants" of the function. This can be useful for handling
277   // C++ function overloads, and also it can be used when the same function
278   // may have different definitions on different platforms.
279   typedef std::vector<Summary> Summaries;
280 
281   // The map of all functions supported by the checker. It is initialized
282   // lazily, and it doesn't change after initialization.
283   mutable llvm::StringMap<Summaries> FunctionSummaryMap;
284 
285   mutable std::unique_ptr<BugType> BT_InvalidArg;
286 
287   // Auxiliary functions to support ArgNo within all structures
288   // in a unified manner.
289   static QualType getArgType(const Summary &Summary, ArgNo ArgN) {
290     return Summary.getArgType(ArgN);
291   }
292   static QualType getArgType(const CallEvent &Call, ArgNo ArgN) {
293     return ArgN == Ret ? Call.getResultType().getCanonicalType()
294                        : Call.getArgExpr(ArgN)->getType().getCanonicalType();
295   }
296   static QualType getArgType(const CallExpr *CE, ArgNo ArgN) {
297     return ArgN == Ret ? CE->getType().getCanonicalType()
298                        : CE->getArg(ArgN)->getType().getCanonicalType();
299   }
300   static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) {
301     return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN);
302   }
303 
304 public:
305   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
306   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
307   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
308 
309   enum CheckKind { CK_StdCLibraryFunctionArgsChecker, CK_NumCheckKinds };
310   DefaultBool ChecksEnabled[CK_NumCheckKinds];
311   CheckerNameRef CheckNames[CK_NumCheckKinds];
312 
313 private:
314   Optional<Summary> findFunctionSummary(const FunctionDecl *FD,
315                                         const CallExpr *CE,
316                                         CheckerContext &C) const;
317   Optional<Summary> findFunctionSummary(const CallEvent &Call,
318                                         CheckerContext &C) const;
319 
320   void initFunctionSummaries(CheckerContext &C) const;
321 
322   void reportBug(const CallEvent &Call, ExplodedNode *N,
323                  CheckerContext &C) const {
324     if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker])
325       return;
326     // TODO Add detailed diagnostic.
327     StringRef Msg = "Function argument constraint is not satisfied";
328     if (!BT_InvalidArg)
329       BT_InvalidArg = std::make_unique<BugType>(
330           CheckNames[CK_StdCLibraryFunctionArgsChecker],
331           "Unsatisfied argument constraints", categories::LogicError);
332     auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N);
333     bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R);
334     C.emitReport(std::move(R));
335   }
336 };
337 
338 const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret =
339     std::numeric_limits<ArgNo>::max();
340 
341 } // end of anonymous namespace
342 
343 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange(
344     ProgramStateRef State, const CallEvent &Call,
345     const Summary &Summary) const {
346 
347   ProgramStateManager &Mgr = State->getStateManager();
348   SValBuilder &SVB = Mgr.getSValBuilder();
349   BasicValueFactory &BVF = SVB.getBasicValueFactory();
350   ConstraintManager &CM = Mgr.getConstraintManager();
351   QualType T = getArgType(Summary, getArgNo());
352   SVal V = getArgSVal(Call, getArgNo());
353 
354   if (auto N = V.getAs<NonLoc>()) {
355     const IntRangeVector &R = getRanges();
356     size_t E = R.size();
357     for (size_t I = 0; I != E; ++I) {
358       const llvm::APSInt &Min = BVF.getValue(R[I].first, T);
359       const llvm::APSInt &Max = BVF.getValue(R[I].second, T);
360       assert(Min <= Max);
361       State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
362       if (!State)
363         break;
364     }
365   }
366 
367   return State;
368 }
369 
370 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange(
371     ProgramStateRef State, const CallEvent &Call,
372     const Summary &Summary) const {
373 
374   ProgramStateManager &Mgr = State->getStateManager();
375   SValBuilder &SVB = Mgr.getSValBuilder();
376   BasicValueFactory &BVF = SVB.getBasicValueFactory();
377   ConstraintManager &CM = Mgr.getConstraintManager();
378   QualType T = getArgType(Summary, getArgNo());
379   SVal V = getArgSVal(Call, getArgNo());
380 
381   // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R".
382   // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary,
383   // and then cut away all holes in R one by one.
384   //
385   // E.g. consider a range list R as [A, B] and [C, D]
386   // -------+--------+------------------+------------+----------->
387   //        A        B                  C            D
388   // Then we assume that the value is not in [-inf, A - 1],
389   // then not in [D + 1, +inf], then not in [B + 1, C - 1]
390   if (auto N = V.getAs<NonLoc>()) {
391     const IntRangeVector &R = getRanges();
392     size_t E = R.size();
393 
394     const llvm::APSInt &MinusInf = BVF.getMinValue(T);
395     const llvm::APSInt &PlusInf = BVF.getMaxValue(T);
396 
397     const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T);
398     if (Left != PlusInf) {
399       assert(MinusInf <= Left);
400       State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false);
401       if (!State)
402         return nullptr;
403     }
404 
405     const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T);
406     if (Right != MinusInf) {
407       assert(Right <= PlusInf);
408       State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false);
409       if (!State)
410         return nullptr;
411     }
412 
413     for (size_t I = 1; I != E; ++I) {
414       const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T);
415       const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T);
416       if (Min <= Max) {
417         State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
418         if (!State)
419           return nullptr;
420       }
421     }
422   }
423 
424   return State;
425 }
426 
427 ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply(
428     ProgramStateRef State, const CallEvent &Call,
429     const Summary &Summary) const {
430 
431   ProgramStateManager &Mgr = State->getStateManager();
432   SValBuilder &SVB = Mgr.getSValBuilder();
433   QualType CondT = SVB.getConditionType();
434   QualType T = getArgType(Summary, getArgNo());
435   SVal V = getArgSVal(Call, getArgNo());
436 
437   BinaryOperator::Opcode Op = getOpcode();
438   ArgNo OtherArg = getOtherArgNo();
439   SVal OtherV = getArgSVal(Call, OtherArg);
440   QualType OtherT = getArgType(Call, OtherArg);
441   // Note: we avoid integral promotion for comparison.
442   OtherV = SVB.evalCast(OtherV, T, OtherT);
443   if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT)
444                        .getAs<DefinedOrUnknownSVal>())
445     State = State->assume(*CompV, true);
446   return State;
447 }
448 
449 void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call,
450                                               CheckerContext &C) const {
451   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
452   if (!FoundSummary)
453     return;
454 
455   const Summary &Summary = *FoundSummary;
456   ProgramStateRef State = C.getState();
457 
458   for (const ValueConstraintPtr& VC : Summary.ArgConstraints) {
459     ProgramStateRef SuccessSt = VC->apply(State, Call, Summary);
460     ProgramStateRef FailureSt = VC->negate()->apply(State, Call, Summary);
461     // The argument constraint is not satisfied.
462     if (FailureSt && !SuccessSt) {
463       if (ExplodedNode *N = C.generateErrorNode(State))
464         reportBug(Call, N, C);
465       break;
466     } else {
467       // Apply the constraint even if we cannot reason about the argument. This
468       // means both SuccessSt and FailureSt can be true. If we weren't applying
469       // the constraint that would mean that symbolic execution continues on a
470       // code whose behaviour is undefined.
471       assert(SuccessSt);
472       C.addTransition(SuccessSt);
473     }
474   }
475 }
476 
477 void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call,
478                                                CheckerContext &C) const {
479   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
480   if (!FoundSummary)
481     return;
482 
483   // Now apply the constraints.
484   const Summary &Summary = *FoundSummary;
485   ProgramStateRef State = C.getState();
486 
487   // Apply case/branch specifications.
488   for (const auto &VRS : Summary.CaseConstraints) {
489     ProgramStateRef NewState = State;
490     for (const auto &VR: VRS) {
491       NewState = VR->apply(NewState, Call, Summary);
492       if (!NewState)
493         break;
494     }
495 
496     if (NewState && NewState != State)
497       C.addTransition(NewState);
498   }
499 }
500 
501 bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call,
502                                           CheckerContext &C) const {
503   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
504   if (!FoundSummary)
505     return false;
506 
507   const Summary &Summary = *FoundSummary;
508   switch (Summary.InvalidationKd) {
509   case EvalCallAsPure: {
510     ProgramStateRef State = C.getState();
511     const LocationContext *LC = C.getLocationContext();
512     const auto *CE = cast_or_null<CallExpr>(Call.getOriginExpr());
513     SVal V = C.getSValBuilder().conjureSymbolVal(
514         CE, LC, CE->getType().getCanonicalType(), C.blockCount());
515     State = State->BindExpr(CE, LC, V);
516     C.addTransition(State);
517     return true;
518   }
519   case NoEvalCall:
520     // Summary tells us to avoid performing eval::Call. The function is possibly
521     // evaluated by another checker, or evaluated conservatively.
522     return false;
523   }
524   llvm_unreachable("Unknown invalidation kind!");
525 }
526 
527 bool StdLibraryFunctionsChecker::Summary::matchesCall(
528     const CallExpr *CE) const {
529   // Check number of arguments:
530   if (CE->getNumArgs() != ArgTys.size())
531     return false;
532 
533   // Check return type if relevant:
534   if (!RetTy.isNull() && RetTy != CE->getType().getCanonicalType())
535     return false;
536 
537   // Check argument types when relevant:
538   for (size_t I = 0, E = ArgTys.size(); I != E; ++I) {
539     QualType FormalT = ArgTys[I];
540     // Null type marks irrelevant arguments.
541     if (FormalT.isNull())
542       continue;
543 
544     assertTypeSuitableForSummary(FormalT);
545 
546     QualType ActualT = StdLibraryFunctionsChecker::getArgType(CE, I);
547     assert(ActualT.isCanonical());
548     if (ActualT != FormalT)
549       return false;
550   }
551 
552   return true;
553 }
554 
555 Optional<StdLibraryFunctionsChecker::Summary>
556 StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD,
557                                                 const CallExpr *CE,
558                                                 CheckerContext &C) const {
559   // Note: we cannot always obtain FD from CE
560   // (eg. virtual call, or call by pointer).
561   assert(CE);
562 
563   if (!FD)
564     return None;
565 
566   initFunctionSummaries(C);
567 
568   IdentifierInfo *II = FD->getIdentifier();
569   if (!II)
570     return None;
571   StringRef Name = II->getName();
572   if (Name.empty() || !C.isCLibraryFunction(FD, Name))
573     return None;
574 
575   auto FSMI = FunctionSummaryMap.find(Name);
576   if (FSMI == FunctionSummaryMap.end())
577     return None;
578 
579   // Verify that function signature matches the spec in advance.
580   // Otherwise we might be modeling the wrong function.
581   // Strict checking is important because we will be conducting
582   // very integral-type-sensitive operations on arguments and
583   // return values.
584   const Summaries &SpecVariants = FSMI->second;
585   for (const Summary &Spec : SpecVariants)
586     if (Spec.matchesCall(CE))
587       return Spec;
588 
589   return None;
590 }
591 
592 Optional<StdLibraryFunctionsChecker::Summary>
593 StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call,
594                                                 CheckerContext &C) const {
595   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
596   if (!FD)
597     return None;
598   const CallExpr *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
599   if (!CE)
600     return None;
601   return findFunctionSummary(FD, CE, C);
602 }
603 
604 void StdLibraryFunctionsChecker::initFunctionSummaries(
605     CheckerContext &C) const {
606   if (!FunctionSummaryMap.empty())
607     return;
608 
609   SValBuilder &SVB = C.getSValBuilder();
610   BasicValueFactory &BVF = SVB.getBasicValueFactory();
611   const ASTContext &ACtx = BVF.getContext();
612 
613   // These types are useful for writing specifications quickly,
614   // New specifications should probably introduce more types.
615   // Some types are hard to obtain from the AST, eg. "ssize_t".
616   // In such cases it should be possible to provide multiple variants
617   // of function summary for common cases (eg. ssize_t could be int or long
618   // or long long, so three summary variants would be enough).
619   // Of course, function variants are also useful for C++ overloads.
620   const QualType
621       Irrelevant{}; // A placeholder, whenever we do not care about the type.
622   const QualType IntTy = ACtx.IntTy;
623   const QualType LongTy = ACtx.LongTy;
624   const QualType LongLongTy = ACtx.LongLongTy;
625   const QualType SizeTy = ACtx.getSizeType();
626   const QualType VoidPtrTy = ACtx.VoidPtrTy; // void *T
627   const QualType ConstVoidPtrTy =
628       ACtx.getPointerType(ACtx.VoidTy.withConst()); // const void *T
629 
630   const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue();
631   const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue();
632   const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue();
633 
634   // Set UCharRangeMax to min of int or uchar maximum value.
635   // The C standard states that the arguments of functions like isalpha must
636   // be representable as an unsigned char. Their type is 'int', so the max
637   // value of the argument should be min(UCharMax, IntMax). This just happen
638   // to be true for commonly used and well tested instruction set
639   // architectures, but not for others.
640   const RangeInt UCharRangeMax =
641       std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax);
642 
643   // The platform dependent value of EOF.
644   // Try our best to parse this from the Preprocessor, otherwise fallback to -1.
645   const auto EOFv = [&C]() -> RangeInt {
646     if (const llvm::Optional<int> OptInt =
647             tryExpandAsInteger("EOF", C.getPreprocessor()))
648       return *OptInt;
649     return -1;
650   }();
651 
652   // We are finally ready to define specifications for all supported functions.
653   //
654   // The signature needs to have the correct number of arguments.
655   // However, we insert `Irrelevant' when the type is insignificant.
656   //
657   // Argument ranges should always cover all variants. If return value
658   // is completely unknown, omit it from the respective range set.
659   //
660   // All types in the spec need to be canonical.
661   //
662   // Every item in the list of range sets represents a particular
663   // execution path the analyzer would need to explore once
664   // the call is modeled - a new program state is constructed
665   // for every range set, and each range line in the range set
666   // corresponds to a specific constraint within this state.
667   //
668   // Upon comparing to another argument, the other argument is casted
669   // to the current argument's type. This avoids proper promotion but
670   // seems useful. For example, read() receives size_t argument,
671   // and its return value, which is of type ssize_t, cannot be greater
672   // than this argument. If we made a promotion, and the size argument
673   // is equal to, say, 10, then we'd impose a range of [0, 10] on the
674   // return value, however the correct range is [-1, 10].
675   //
676   // Please update the list of functions in the header after editing!
677   //
678 
679   // Below are helpers functions to create the summaries.
680   auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind,
681                               IntRangeVector Ranges) {
682     return std::make_shared<RangeConstraint>(ArgN, Kind, Ranges);
683   };
684   struct {
685     auto operator()(RangeKind Kind, IntRangeVector Ranges) {
686       return std::make_shared<RangeConstraint>(Ret, Kind, Ranges);
687     }
688     auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) {
689       return std::make_shared<ComparisonConstraint>(Ret, Op, OtherArgN);
690     }
691   } ReturnValueCondition;
692   auto Range = [](RangeInt b, RangeInt e) {
693     return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}};
694   };
695   auto SingleValue = [](RangeInt v) {
696     return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}};
697   };
698   auto LessThanOrEq = BO_LE;
699   auto NotNull = [&](ArgNo ArgN) {
700     return std::make_shared<NotNullConstraint>(ArgN);
701   };
702 
703   using RetType = QualType;
704   // Templates for summaries that are reused by many functions.
705   auto Getc = [&]() {
706     return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall)
707         .Case({ReturnValueCondition(WithinRange,
708                                     {{EOFv, EOFv}, {0, UCharRangeMax}})});
709   };
710   auto Read = [&](RetType R, RangeInt Max) {
711     return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R},
712                    NoEvalCall)
713         .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)),
714                ReturnValueCondition(WithinRange, Range(-1, Max))});
715   };
716   auto Fread = [&]() {
717     return Summary(ArgTypes{VoidPtrTy, Irrelevant, SizeTy, Irrelevant},
718                    RetType{SizeTy}, NoEvalCall)
719         .Case({
720             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
721         })
722         .ArgConstraint(NotNull(ArgNo(0)));
723   };
724   auto Fwrite = [&]() {
725     return Summary(ArgTypes{ConstVoidPtrTy, Irrelevant, SizeTy, Irrelevant},
726                    RetType{SizeTy}, NoEvalCall)
727         .Case({
728             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
729         })
730         .ArgConstraint(NotNull(ArgNo(0)));
731   };
732   auto Getline = [&](RetType R, RangeInt Max) {
733     return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R},
734                    NoEvalCall)
735         .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})});
736   };
737 
738   FunctionSummaryMap = {
739       // The isascii() family of functions.
740       // The behavior is undefined if the value of the argument is not
741       // representable as unsigned char or is not equal to EOF. See e.g. C99
742       // 7.4.1.2 The isalpha function (p: 181-182).
743       {
744           "isalnum",
745           Summaries{
746               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
747                   // Boils down to isupper() or islower() or isdigit().
748                   .Case(
749                       {ArgumentCondition(0U, WithinRange,
750                                          {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}),
751                        ReturnValueCondition(OutOfRange, SingleValue(0))})
752                   // The locale-specific range.
753                   // No post-condition. We are completely unaware of
754                   // locale-specific return values.
755                   .Case({ArgumentCondition(0U, WithinRange,
756                                            {{128, UCharRangeMax}})})
757                   .Case({ArgumentCondition(0U, OutOfRange,
758                                            {{'0', '9'},
759                                             {'A', 'Z'},
760                                             {'a', 'z'},
761                                             {128, UCharRangeMax}}),
762                          ReturnValueCondition(WithinRange, SingleValue(0))})
763                   .ArgConstraint(ArgumentCondition(
764                       0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))},
765       },
766       {
767           "isalpha",
768           Summaries{
769               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
770                   .Case({ArgumentCondition(0U, WithinRange,
771                                            {{'A', 'Z'}, {'a', 'z'}}),
772                          ReturnValueCondition(OutOfRange, SingleValue(0))})
773                   // The locale-specific range.
774                   .Case({ArgumentCondition(0U, WithinRange,
775                                            {{128, UCharRangeMax}})})
776                   .Case({ArgumentCondition(
777                              0U, OutOfRange,
778                              {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}),
779                          ReturnValueCondition(WithinRange, SingleValue(0))})},
780       },
781       {
782           "isascii",
783           Summaries{
784               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
785                   .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
786                          ReturnValueCondition(OutOfRange, SingleValue(0))})
787                   .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)),
788                          ReturnValueCondition(WithinRange, SingleValue(0))})},
789       },
790       {
791           "isblank",
792           Summaries{
793               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
794                   .Case({ArgumentCondition(0U, WithinRange,
795                                            {{'\t', '\t'}, {' ', ' '}}),
796                          ReturnValueCondition(OutOfRange, SingleValue(0))})
797                   .Case({ArgumentCondition(0U, OutOfRange,
798                                            {{'\t', '\t'}, {' ', ' '}}),
799                          ReturnValueCondition(WithinRange, SingleValue(0))})},
800       },
801       {
802           "iscntrl",
803           Summaries{
804               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
805                   .Case({ArgumentCondition(0U, WithinRange,
806                                            {{0, 32}, {127, 127}}),
807                          ReturnValueCondition(OutOfRange, SingleValue(0))})
808                   .Case(
809                       {ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}),
810                        ReturnValueCondition(WithinRange, SingleValue(0))})},
811       },
812       {
813           "isdigit",
814           Summaries{
815               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
816                   .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')),
817                          ReturnValueCondition(OutOfRange, SingleValue(0))})
818                   .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')),
819                          ReturnValueCondition(WithinRange, SingleValue(0))})},
820       },
821       {
822           "isgraph",
823           Summaries{
824               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
825                   .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)),
826                          ReturnValueCondition(OutOfRange, SingleValue(0))})
827                   .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)),
828                          ReturnValueCondition(WithinRange, SingleValue(0))})},
829       },
830       {
831           "islower",
832           Summaries{
833               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
834                   // Is certainly lowercase.
835                   .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')),
836                          ReturnValueCondition(OutOfRange, SingleValue(0))})
837                   // Is ascii but not lowercase.
838                   .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
839                          ArgumentCondition(0U, OutOfRange, Range('a', 'z')),
840                          ReturnValueCondition(WithinRange, SingleValue(0))})
841                   // The locale-specific range.
842                   .Case({ArgumentCondition(0U, WithinRange,
843                                            {{128, UCharRangeMax}})})
844                   // Is not an unsigned char.
845                   .Case({ArgumentCondition(0U, OutOfRange,
846                                            Range(0, UCharRangeMax)),
847                          ReturnValueCondition(WithinRange, SingleValue(0))})},
848       },
849       {
850           "isprint",
851           Summaries{
852               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
853                   .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)),
854                          ReturnValueCondition(OutOfRange, SingleValue(0))})
855                   .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)),
856                          ReturnValueCondition(WithinRange, SingleValue(0))})},
857       },
858       {
859           "ispunct",
860           Summaries{
861               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
862                   .Case({ArgumentCondition(
863                              0U, WithinRange,
864                              {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
865                          ReturnValueCondition(OutOfRange, SingleValue(0))})
866                   .Case({ArgumentCondition(
867                              0U, OutOfRange,
868                              {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
869                          ReturnValueCondition(WithinRange, SingleValue(0))})},
870       },
871       {
872           "isspace",
873           Summaries{
874               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
875                   // Space, '\f', '\n', '\r', '\t', '\v'.
876                   .Case({ArgumentCondition(0U, WithinRange,
877                                            {{9, 13}, {' ', ' '}}),
878                          ReturnValueCondition(OutOfRange, SingleValue(0))})
879                   // The locale-specific range.
880                   .Case({ArgumentCondition(0U, WithinRange,
881                                            {{128, UCharRangeMax}})})
882                   .Case({ArgumentCondition(
883                              0U, OutOfRange,
884                              {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}),
885                          ReturnValueCondition(WithinRange, SingleValue(0))})},
886       },
887       {
888           "isupper",
889           Summaries{
890               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
891                   // Is certainly uppercase.
892                   .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')),
893                          ReturnValueCondition(OutOfRange, SingleValue(0))})
894                   // The locale-specific range.
895                   .Case({ArgumentCondition(0U, WithinRange,
896                                            {{128, UCharRangeMax}})})
897                   // Other.
898                   .Case({ArgumentCondition(0U, OutOfRange,
899                                            {{'A', 'Z'}, {128, UCharRangeMax}}),
900                          ReturnValueCondition(WithinRange, SingleValue(0))})},
901       },
902       {
903           "isxdigit",
904           Summaries{
905               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
906                   .Case(
907                       {ArgumentCondition(0U, WithinRange,
908                                          {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
909                        ReturnValueCondition(OutOfRange, SingleValue(0))})
910                   .Case(
911                       {ArgumentCondition(0U, OutOfRange,
912                                          {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
913                        ReturnValueCondition(WithinRange, SingleValue(0))})},
914       },
915 
916       // The getc() family of functions that returns either a char or an EOF.
917       {"getc", Summaries{Getc()}},
918       {"fgetc", Summaries{Getc()}},
919       {"getchar",
920        Summaries{Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall)
921                      .Case({ReturnValueCondition(
922                          WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})}},
923 
924       // read()-like functions that never return more than buffer size.
925       // We are not sure how ssize_t is defined on every platform, so we
926       // provide three variants that should cover common cases.
927       {"read", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax),
928                          Read(LongLongTy, LongLongMax)}},
929       {"write", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax),
930                           Read(LongLongTy, LongLongMax)}},
931       {"fread", Summaries{Fread()}},
932       {"fwrite", Summaries{Fwrite()}},
933       // getline()-like functions either fail or read at least the delimiter.
934       {"getline", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax),
935                             Getline(LongLongTy, LongLongMax)}},
936       {"getdelim", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax),
937                              Getline(LongLongTy, LongLongMax)}},
938   };
939 }
940 
941 void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) {
942   mgr.registerChecker<StdLibraryFunctionsChecker>();
943 }
944 
945 bool ento::shouldRegisterStdCLibraryFunctionsChecker(const CheckerManager &mgr) {
946   return true;
947 }
948 
949 #define REGISTER_CHECKER(name)                                                 \
950   void ento::register##name(CheckerManager &mgr) {                             \
951     StdLibraryFunctionsChecker *checker =                                      \
952         mgr.getChecker<StdLibraryFunctionsChecker>();                          \
953     checker->ChecksEnabled[StdLibraryFunctionsChecker::CK_##name] = true;      \
954     checker->CheckNames[StdLibraryFunctionsChecker::CK_##name] =               \
955         mgr.getCurrentCheckerName();                                           \
956   }                                                                            \
957                                                                                \
958   bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
959 
960 REGISTER_CHECKER(StdCLibraryFunctionArgsChecker)
961