xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp (revision 62e747f6172942db0e9fd55a6a25f166f338459a)
1 //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker improves modeling of a few simple library functions.
10 //
11 // This checker provides a specification format - `Summary' - and
12 // contains descriptions of some library functions in this format. Each
13 // specification contains a list of branches for splitting the program state
14 // upon call, and range constraints on argument and return-value symbols that
15 // are satisfied on each branch. This spec can be expanded to include more
16 // items, like external effects of the function.
17 //
18 // The main difference between this approach and the body farms technique is
19 // in more explicit control over how many branches are produced. For example,
20 // consider standard C function `ispunct(int x)', which returns a non-zero value
21 // iff `x' is a punctuation character, that is, when `x' is in range
22 //   ['!', '/']   [':', '@']  U  ['[', '\`']  U  ['{', '~'].
23 // `Summary' provides only two branches for this function. However,
24 // any attempt to describe this range with if-statements in the body farm
25 // would result in many more branches. Because each branch needs to be analyzed
26 // independently, this significantly reduces performance. Additionally,
27 // once we consider a branch on which `x' is in range, say, ['!', '/'],
28 // we assume that such branch is an important separate path through the program,
29 // which may lead to false positives because considering this particular path
30 // was not consciously intended, and therefore it might have been unreachable.
31 //
32 // This checker uses eval::Call for modeling pure functions (functions without
33 // side effets), for which their `Summary' is a precise model. This avoids
34 // unnecessary invalidation passes. Conflicts with other checkers are unlikely
35 // because if the function has no other effects, other checkers would probably
36 // never want to improve upon the modeling done by this checker.
37 //
38 // Non-pure functions, for which only partial improvement over the default
39 // behavior is expected, are modeled via check::PostCall, non-intrusively.
40 //
41 // The following standard C functions are currently supported:
42 //
43 //   fgetc      getline   isdigit   isupper
44 //   fread      isalnum   isgraph   isxdigit
45 //   fwrite     isalpha   islower   read
46 //   getc       isascii   isprint   write
47 //   getchar    isblank   ispunct
48 //   getdelim   iscntrl   isspace
49 //
50 //===----------------------------------------------------------------------===//
51 
52 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
53 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
54 #include "clang/StaticAnalyzer/Core/Checker.h"
55 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
56 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
57 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
58 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
59 
60 using namespace clang;
61 using namespace clang::ento;
62 
63 namespace {
64 class StdLibraryFunctionsChecker
65     : public Checker<check::PreCall, check::PostCall, eval::Call> {
66   /// Below is a series of typedefs necessary to define function specs.
67   /// We avoid nesting types here because each additional qualifier
68   /// would need to be repeated in every function spec.
69   struct Summary;
70 
71   /// Specify how much the analyzer engine should entrust modeling this function
72   /// to us. If he doesn't, he performs additional invalidations.
73   enum InvalidationKind { NoEvalCall, EvalCallAsPure };
74 
75   // The universal integral type to use in value range descriptions.
76   // Unsigned to make sure overflows are well-defined.
77   typedef uint64_t RangeInt;
78 
79   /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is
80   /// a non-negative integer, which less than 5 and not equal to 2. For
81   /// `ComparesToArgument', holds information about how exactly to compare to
82   /// the argument.
83   typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector;
84 
85   /// A reference to an argument or return value by its number.
86   /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but
87   /// obviously uint32_t should be enough for all practical purposes.
88   typedef uint32_t ArgNo;
89   static const ArgNo Ret;
90 
91   class ValueConstraint;
92 
93   // Pointer to the ValueConstraint. We need a copyable, polymorphic and
94   // default initialize able type (vector needs that). A raw pointer was good,
95   // however, we cannot default initialize that. unique_ptr makes the Summary
96   // class non-copyable, therefore not an option. Releasing the copyability
97   // requirement would render the initialization of the Summary map infeasible.
98   using ValueConstraintPtr = std::shared_ptr<ValueConstraint>;
99 
100   /// Polymorphic base class that represents a constraint on a given argument
101   /// (or return value) of a function. Derived classes implement different kind
102   /// of constraints, e.g range constraints or correlation between two
103   /// arguments.
104   class ValueConstraint {
105   public:
106     ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {}
107     virtual ~ValueConstraint() {}
108     /// Apply the effects of the constraint on the given program state. If null
109     /// is returned then the constraint is not feasible.
110     virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
111                                   const Summary &Summary) const = 0;
112     virtual ValueConstraintPtr negate() const {
113       llvm_unreachable("Not implemented");
114     };
115     ArgNo getArgNo() const { return ArgN; }
116 
117   protected:
118     ArgNo ArgN; // Argument to which we apply the constraint.
119   };
120 
121   /// Given a range, should the argument stay inside or outside this range?
122   enum RangeKind { OutOfRange, WithinRange };
123 
124   /// Encapsulates a single range on a single symbol within a branch.
125   class RangeConstraint : public ValueConstraint {
126     RangeKind Kind;      // Kind of range definition.
127     IntRangeVector Args; // Polymorphic arguments.
128 
129   public:
130     RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args)
131         : ValueConstraint(ArgN), Kind(Kind), Args(Args) {}
132 
133     const IntRangeVector &getRanges() const {
134       return Args;
135     }
136 
137   private:
138     ProgramStateRef applyAsOutOfRange(ProgramStateRef State,
139                                       const CallEvent &Call,
140                                       const Summary &Summary) const;
141     ProgramStateRef applyAsWithinRange(ProgramStateRef State,
142                                        const CallEvent &Call,
143                                        const Summary &Summary) const;
144   public:
145     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
146                           const Summary &Summary) const override {
147       switch (Kind) {
148       case OutOfRange:
149         return applyAsOutOfRange(State, Call, Summary);
150       case WithinRange:
151         return applyAsWithinRange(State, Call, Summary);
152       }
153       llvm_unreachable("Unknown range kind!");
154     }
155 
156     ValueConstraintPtr negate() const override {
157       RangeConstraint Tmp(*this);
158       switch (Kind) {
159       case OutOfRange:
160         Tmp.Kind = WithinRange;
161         break;
162       case WithinRange:
163         Tmp.Kind = OutOfRange;
164         break;
165       }
166       return std::make_shared<RangeConstraint>(Tmp);
167     }
168   };
169 
170   class ComparisonConstraint : public ValueConstraint {
171     BinaryOperator::Opcode Opcode;
172     ArgNo OtherArgN;
173 
174   public:
175     ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode,
176                          ArgNo OtherArgN)
177         : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {}
178     ArgNo getOtherArgNo() const { return OtherArgN; }
179     BinaryOperator::Opcode getOpcode() const { return Opcode; }
180     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
181                           const Summary &Summary) const override;
182   };
183 
184   class NotNullConstraint : public ValueConstraint {
185     using ValueConstraint::ValueConstraint;
186     // This variable has a role when we negate the constraint.
187     bool CannotBeNull = true;
188 
189   public:
190     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
191                           const Summary &Summary) const override {
192       SVal V = getArgSVal(Call, getArgNo());
193       if (V.isUndef())
194         return State;
195 
196       DefinedOrUnknownSVal L = V.castAs<DefinedOrUnknownSVal>();
197       if (!L.getAs<Loc>())
198         return State;
199 
200       return State->assume(L, CannotBeNull);
201     }
202 
203     ValueConstraintPtr negate() const override {
204       NotNullConstraint Tmp(*this);
205       Tmp.CannotBeNull = !this->CannotBeNull;
206       return std::make_shared<NotNullConstraint>(Tmp);
207     }
208   };
209 
210   /// The complete list of constraints that defines a single branch.
211   typedef std::vector<ValueConstraintPtr> ConstraintSet;
212 
213   using ArgTypes = std::vector<QualType>;
214   using Cases = std::vector<ConstraintSet>;
215 
216   /// Includes information about
217   ///   * function prototype (which is necessary to
218   ///     ensure we're modeling the right function and casting values properly),
219   ///   * approach to invalidation,
220   ///   * a list of branches - a list of list of ranges -
221   ///     A branch represents a path in the exploded graph of a function (which
222   ///     is a tree). So, a branch is a series of assumptions. In other words,
223   ///     branches represent split states and additional assumptions on top of
224   ///     the splitting assumption.
225   ///     For example, consider the branches in `isalpha(x)`
226   ///       Branch 1)
227   ///         x is in range ['A', 'Z'] or in ['a', 'z']
228   ///         then the return value is not 0. (I.e. out-of-range [0, 0])
229   ///       Branch 2)
230   ///         x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z']
231   ///         then the return value is 0.
232   ///   * a list of argument constraints, that must be true on every branch.
233   ///     If these constraints are not satisfied that means a fatal error
234   ///     usually resulting in undefined behaviour.
235   struct Summary {
236     const ArgTypes ArgTys;
237     const QualType RetTy;
238     const InvalidationKind InvalidationKd;
239     Cases CaseConstraints;
240     ConstraintSet ArgConstraints;
241 
242     Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd)
243         : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {}
244 
245     Summary &Case(ConstraintSet&& CS) {
246       CaseConstraints.push_back(std::move(CS));
247       return *this;
248     }
249     Summary &ArgConstraint(ValueConstraintPtr VC) {
250       ArgConstraints.push_back(VC);
251       return *this;
252     }
253 
254   private:
255     static void assertTypeSuitableForSummary(QualType T) {
256       assert(!T->isVoidType() &&
257              "We should have had no significant void types in the spec");
258       assert(T.isCanonical() &&
259              "We should only have canonical types in the spec");
260     }
261 
262   public:
263     QualType getArgType(ArgNo ArgN) const {
264       QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN];
265       assertTypeSuitableForSummary(T);
266       return T;
267     }
268 
269     /// Try our best to figure out if the summary's signature matches
270     /// *the* library function to which this specification applies.
271     bool matchesSignature(const FunctionDecl *FD) const;
272   };
273 
274   // The map of all functions supported by the checker. It is initialized
275   // lazily, and it doesn't change after initialization.
276   using FunctionSummaryMapType = llvm::DenseMap<const FunctionDecl *, Summary>;
277   mutable FunctionSummaryMapType FunctionSummaryMap;
278 
279   mutable std::unique_ptr<BugType> BT_InvalidArg;
280 
281   // Auxiliary functions to support ArgNo within all structures
282   // in a unified manner.
283   static QualType getArgType(const Summary &Summary, ArgNo ArgN) {
284     return Summary.getArgType(ArgN);
285   }
286   static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) {
287     return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN);
288   }
289 
290 public:
291   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
292   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
293   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
294 
295   enum CheckKind {
296     CK_StdCLibraryFunctionArgsChecker,
297     CK_StdCLibraryFunctionsTesterChecker,
298     CK_NumCheckKinds
299   };
300   DefaultBool ChecksEnabled[CK_NumCheckKinds];
301   CheckerNameRef CheckNames[CK_NumCheckKinds];
302 
303 private:
304   Optional<Summary> findFunctionSummary(const FunctionDecl *FD,
305                                         CheckerContext &C) const;
306   Optional<Summary> findFunctionSummary(const CallEvent &Call,
307                                         CheckerContext &C) const;
308 
309   void initFunctionSummaries(CheckerContext &C) const;
310 
311   void reportBug(const CallEvent &Call, ExplodedNode *N,
312                  CheckerContext &C) const {
313     if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker])
314       return;
315     // TODO Add detailed diagnostic.
316     StringRef Msg = "Function argument constraint is not satisfied";
317     if (!BT_InvalidArg)
318       BT_InvalidArg = std::make_unique<BugType>(
319           CheckNames[CK_StdCLibraryFunctionArgsChecker],
320           "Unsatisfied argument constraints", categories::LogicError);
321     auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N);
322     bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R);
323     C.emitReport(std::move(R));
324   }
325 };
326 
327 const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret =
328     std::numeric_limits<ArgNo>::max();
329 
330 } // end of anonymous namespace
331 
332 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange(
333     ProgramStateRef State, const CallEvent &Call,
334     const Summary &Summary) const {
335 
336   ProgramStateManager &Mgr = State->getStateManager();
337   SValBuilder &SVB = Mgr.getSValBuilder();
338   BasicValueFactory &BVF = SVB.getBasicValueFactory();
339   ConstraintManager &CM = Mgr.getConstraintManager();
340   QualType T = getArgType(Summary, getArgNo());
341   SVal V = getArgSVal(Call, getArgNo());
342 
343   if (auto N = V.getAs<NonLoc>()) {
344     const IntRangeVector &R = getRanges();
345     size_t E = R.size();
346     for (size_t I = 0; I != E; ++I) {
347       const llvm::APSInt &Min = BVF.getValue(R[I].first, T);
348       const llvm::APSInt &Max = BVF.getValue(R[I].second, T);
349       assert(Min <= Max);
350       State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
351       if (!State)
352         break;
353     }
354   }
355 
356   return State;
357 }
358 
359 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange(
360     ProgramStateRef State, const CallEvent &Call,
361     const Summary &Summary) const {
362 
363   ProgramStateManager &Mgr = State->getStateManager();
364   SValBuilder &SVB = Mgr.getSValBuilder();
365   BasicValueFactory &BVF = SVB.getBasicValueFactory();
366   ConstraintManager &CM = Mgr.getConstraintManager();
367   QualType T = getArgType(Summary, getArgNo());
368   SVal V = getArgSVal(Call, getArgNo());
369 
370   // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R".
371   // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary,
372   // and then cut away all holes in R one by one.
373   //
374   // E.g. consider a range list R as [A, B] and [C, D]
375   // -------+--------+------------------+------------+----------->
376   //        A        B                  C            D
377   // Then we assume that the value is not in [-inf, A - 1],
378   // then not in [D + 1, +inf], then not in [B + 1, C - 1]
379   if (auto N = V.getAs<NonLoc>()) {
380     const IntRangeVector &R = getRanges();
381     size_t E = R.size();
382 
383     const llvm::APSInt &MinusInf = BVF.getMinValue(T);
384     const llvm::APSInt &PlusInf = BVF.getMaxValue(T);
385 
386     const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T);
387     if (Left != PlusInf) {
388       assert(MinusInf <= Left);
389       State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false);
390       if (!State)
391         return nullptr;
392     }
393 
394     const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T);
395     if (Right != MinusInf) {
396       assert(Right <= PlusInf);
397       State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false);
398       if (!State)
399         return nullptr;
400     }
401 
402     for (size_t I = 1; I != E; ++I) {
403       const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T);
404       const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T);
405       if (Min <= Max) {
406         State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
407         if (!State)
408           return nullptr;
409       }
410     }
411   }
412 
413   return State;
414 }
415 
416 ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply(
417     ProgramStateRef State, const CallEvent &Call,
418     const Summary &Summary) const {
419 
420   ProgramStateManager &Mgr = State->getStateManager();
421   SValBuilder &SVB = Mgr.getSValBuilder();
422   QualType CondT = SVB.getConditionType();
423   QualType T = getArgType(Summary, getArgNo());
424   SVal V = getArgSVal(Call, getArgNo());
425 
426   BinaryOperator::Opcode Op = getOpcode();
427   ArgNo OtherArg = getOtherArgNo();
428   SVal OtherV = getArgSVal(Call, OtherArg);
429   QualType OtherT = getArgType(Summary, OtherArg);
430   // Note: we avoid integral promotion for comparison.
431   OtherV = SVB.evalCast(OtherV, T, OtherT);
432   if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT)
433                        .getAs<DefinedOrUnknownSVal>())
434     State = State->assume(*CompV, true);
435   return State;
436 }
437 
438 void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call,
439                                               CheckerContext &C) const {
440   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
441   if (!FoundSummary)
442     return;
443 
444   const Summary &Summary = *FoundSummary;
445   ProgramStateRef State = C.getState();
446 
447   ProgramStateRef NewState = State;
448   for (const ValueConstraintPtr& VC : Summary.ArgConstraints) {
449     ProgramStateRef SuccessSt = VC->apply(NewState, Call, Summary);
450     ProgramStateRef FailureSt = VC->negate()->apply(NewState, Call, Summary);
451     // The argument constraint is not satisfied.
452     if (FailureSt && !SuccessSt) {
453       if (ExplodedNode *N = C.generateErrorNode(NewState))
454         reportBug(Call, N, C);
455       break;
456     } else {
457       // We will apply the constraint even if we cannot reason about the
458       // argument. This means both SuccessSt and FailureSt can be true. If we
459       // weren't applying the constraint that would mean that symbolic
460       // execution continues on a code whose behaviour is undefined.
461       assert(SuccessSt);
462       NewState = SuccessSt;
463     }
464   }
465   if (NewState && NewState != State)
466     C.addTransition(NewState);
467 }
468 
469 void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call,
470                                                CheckerContext &C) const {
471   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
472   if (!FoundSummary)
473     return;
474 
475   // Now apply the constraints.
476   const Summary &Summary = *FoundSummary;
477   ProgramStateRef State = C.getState();
478 
479   // Apply case/branch specifications.
480   for (const auto &VRS : Summary.CaseConstraints) {
481     ProgramStateRef NewState = State;
482     for (const auto &VR: VRS) {
483       NewState = VR->apply(NewState, Call, Summary);
484       if (!NewState)
485         break;
486     }
487 
488     if (NewState && NewState != State)
489       C.addTransition(NewState);
490   }
491 }
492 
493 bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call,
494                                           CheckerContext &C) const {
495   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
496   if (!FoundSummary)
497     return false;
498 
499   const Summary &Summary = *FoundSummary;
500   switch (Summary.InvalidationKd) {
501   case EvalCallAsPure: {
502     ProgramStateRef State = C.getState();
503     const LocationContext *LC = C.getLocationContext();
504     const auto *CE = cast_or_null<CallExpr>(Call.getOriginExpr());
505     SVal V = C.getSValBuilder().conjureSymbolVal(
506         CE, LC, CE->getType().getCanonicalType(), C.blockCount());
507     State = State->BindExpr(CE, LC, V);
508     C.addTransition(State);
509     return true;
510   }
511   case NoEvalCall:
512     // Summary tells us to avoid performing eval::Call. The function is possibly
513     // evaluated by another checker, or evaluated conservatively.
514     return false;
515   }
516   llvm_unreachable("Unknown invalidation kind!");
517 }
518 
519 bool StdLibraryFunctionsChecker::Summary::matchesSignature(
520     const FunctionDecl *FD) const {
521   // Check number of arguments:
522   if (FD->param_size() != ArgTys.size())
523     return false;
524 
525   // Check return type if relevant:
526   if (!RetTy.isNull() && RetTy != FD->getReturnType().getCanonicalType())
527     return false;
528 
529   // Check argument types when relevant:
530   for (size_t I = 0, E = ArgTys.size(); I != E; ++I) {
531     QualType FormalT = ArgTys[I];
532     // Null type marks irrelevant arguments.
533     if (FormalT.isNull())
534       continue;
535 
536     assertTypeSuitableForSummary(FormalT);
537 
538     QualType ActualT = FD->getParamDecl(I)->getType().getCanonicalType();
539     if (ActualT != FormalT)
540       return false;
541   }
542 
543   return true;
544 }
545 
546 Optional<StdLibraryFunctionsChecker::Summary>
547 StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD,
548                                                 CheckerContext &C) const {
549   if (!FD)
550     return None;
551 
552   initFunctionSummaries(C);
553 
554   auto FSMI = FunctionSummaryMap.find(FD->getCanonicalDecl());
555   if (FSMI == FunctionSummaryMap.end())
556     return None;
557   return FSMI->second;
558 }
559 
560 Optional<StdLibraryFunctionsChecker::Summary>
561 StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call,
562                                                 CheckerContext &C) const {
563   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
564   if (!FD)
565     return None;
566   return findFunctionSummary(FD, C);
567 }
568 
569 llvm::Optional<const FunctionDecl *>
570 lookupGlobalCFunction(StringRef Name, const ASTContext &ACtx) {
571   IdentifierInfo &II = ACtx.Idents.get(Name);
572   auto LookupRes = ACtx.getTranslationUnitDecl()->lookup(&II);
573   if (LookupRes.size() == 0)
574     return None;
575 
576   assert(LookupRes.size() == 1 && "In C, identifiers should be unique");
577   Decl *D = LookupRes.front()->getCanonicalDecl();
578   auto *FD = dyn_cast<FunctionDecl>(D);
579   if (!FD)
580     return None;
581   return FD->getCanonicalDecl();
582 }
583 
584 void StdLibraryFunctionsChecker::initFunctionSummaries(
585     CheckerContext &C) const {
586   if (!FunctionSummaryMap.empty())
587     return;
588 
589   SValBuilder &SVB = C.getSValBuilder();
590   BasicValueFactory &BVF = SVB.getBasicValueFactory();
591   const ASTContext &ACtx = BVF.getContext();
592 
593   // These types are useful for writing specifications quickly,
594   // New specifications should probably introduce more types.
595   // Some types are hard to obtain from the AST, eg. "ssize_t".
596   // In such cases it should be possible to provide multiple variants
597   // of function summary for common cases (eg. ssize_t could be int or long
598   // or long long, so three summary variants would be enough).
599   // Of course, function variants are also useful for C++ overloads.
600   const QualType
601       Irrelevant{}; // A placeholder, whenever we do not care about the type.
602   const QualType IntTy = ACtx.IntTy;
603   const QualType LongTy = ACtx.LongTy;
604   const QualType LongLongTy = ACtx.LongLongTy;
605   const QualType SizeTy = ACtx.getSizeType();
606   const QualType VoidPtrTy = ACtx.VoidPtrTy; // void *
607   const QualType VoidPtrRestrictTy =
608       ACtx.getRestrictType(VoidPtrTy); // void *restrict
609   const QualType ConstVoidPtrTy =
610       ACtx.getPointerType(ACtx.VoidTy.withConst()); // const void *
611   const QualType ConstCharPtrTy =
612       ACtx.getPointerType(ACtx.CharTy.withConst()); // const char *
613   const QualType ConstVoidPtrRestrictTy =
614       ACtx.getRestrictType(ConstVoidPtrTy); // const void *restrict
615 
616   const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue();
617   const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue();
618   const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue();
619 
620   // Set UCharRangeMax to min of int or uchar maximum value.
621   // The C standard states that the arguments of functions like isalpha must
622   // be representable as an unsigned char. Their type is 'int', so the max
623   // value of the argument should be min(UCharMax, IntMax). This just happen
624   // to be true for commonly used and well tested instruction set
625   // architectures, but not for others.
626   const RangeInt UCharRangeMax =
627       std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax);
628 
629   // The platform dependent value of EOF.
630   // Try our best to parse this from the Preprocessor, otherwise fallback to -1.
631   const auto EOFv = [&C]() -> RangeInt {
632     if (const llvm::Optional<int> OptInt =
633             tryExpandAsInteger("EOF", C.getPreprocessor()))
634       return *OptInt;
635     return -1;
636   }();
637 
638   // Auxiliary class to aid adding summaries to the summary map.
639   struct AddToFunctionSummaryMap {
640     const ASTContext &ACtx;
641     FunctionSummaryMapType &Map;
642     AddToFunctionSummaryMap(const ASTContext &ACtx, FunctionSummaryMapType &FSM)
643         : ACtx(ACtx), Map(FSM) {}
644     // Add a summary to a FunctionDecl found by lookup. The lookup is performed
645     // by the given Name, and in the global scope. The summary will be attached
646     // to the found FunctionDecl only if the signatures match.
647     void operator()(StringRef Name, const Summary &S) {
648       IdentifierInfo &II = ACtx.Idents.get(Name);
649       auto LookupRes = ACtx.getTranslationUnitDecl()->lookup(&II);
650       if (LookupRes.size() == 0)
651         return;
652       for (Decl *D : LookupRes) {
653         if (auto *FD = dyn_cast<FunctionDecl>(D)) {
654           if (S.matchesSignature(FD)) {
655             auto Res = Map.insert({FD->getCanonicalDecl(), S});
656             assert(Res.second && "Function already has a summary set!");
657             (void)Res;
658             return;
659           }
660         }
661       }
662     }
663     // Add several summaries for the given name.
664     void operator()(StringRef Name, const std::vector<Summary> &Summaries) {
665       for (const Summary &S : Summaries)
666         operator()(Name, S);
667     }
668   } addToFunctionSummaryMap(ACtx, FunctionSummaryMap);
669 
670   // We are finally ready to define specifications for all supported functions.
671   //
672   // The signature needs to have the correct number of arguments.
673   // However, we insert `Irrelevant' when the type is insignificant.
674   //
675   // Argument ranges should always cover all variants. If return value
676   // is completely unknown, omit it from the respective range set.
677   //
678   // All types in the spec need to be canonical.
679   //
680   // Every item in the list of range sets represents a particular
681   // execution path the analyzer would need to explore once
682   // the call is modeled - a new program state is constructed
683   // for every range set, and each range line in the range set
684   // corresponds to a specific constraint within this state.
685   //
686   // Upon comparing to another argument, the other argument is casted
687   // to the current argument's type. This avoids proper promotion but
688   // seems useful. For example, read() receives size_t argument,
689   // and its return value, which is of type ssize_t, cannot be greater
690   // than this argument. If we made a promotion, and the size argument
691   // is equal to, say, 10, then we'd impose a range of [0, 10] on the
692   // return value, however the correct range is [-1, 10].
693   //
694   // Please update the list of functions in the header after editing!
695 
696   // Below are helpers functions to create the summaries.
697   auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind,
698                               IntRangeVector Ranges) {
699     return std::make_shared<RangeConstraint>(ArgN, Kind, Ranges);
700   };
701   struct {
702     auto operator()(RangeKind Kind, IntRangeVector Ranges) {
703       return std::make_shared<RangeConstraint>(Ret, Kind, Ranges);
704     }
705     auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) {
706       return std::make_shared<ComparisonConstraint>(Ret, Op, OtherArgN);
707     }
708   } ReturnValueCondition;
709   auto Range = [](RangeInt b, RangeInt e) {
710     return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}};
711   };
712   auto SingleValue = [](RangeInt v) {
713     return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}};
714   };
715   auto LessThanOrEq = BO_LE;
716   auto NotNull = [&](ArgNo ArgN) {
717     return std::make_shared<NotNullConstraint>(ArgN);
718   };
719 
720   using RetType = QualType;
721   // Templates for summaries that are reused by many functions.
722   auto Getc = [&]() {
723     return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall)
724         .Case({ReturnValueCondition(WithinRange,
725                                     {{EOFv, EOFv}, {0, UCharRangeMax}})});
726   };
727   auto Read = [&](RetType R, RangeInt Max) {
728     return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R},
729                    NoEvalCall)
730         .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)),
731                ReturnValueCondition(WithinRange, Range(-1, Max))});
732   };
733   auto Fread = [&]() {
734     return Summary(ArgTypes{VoidPtrRestrictTy, Irrelevant, SizeTy, Irrelevant},
735                    RetType{SizeTy}, NoEvalCall)
736         .Case({
737             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
738         })
739         .ArgConstraint(NotNull(ArgNo(0)));
740   };
741   auto Fwrite = [&]() {
742     return Summary(
743                ArgTypes{ConstVoidPtrRestrictTy, Irrelevant, SizeTy, Irrelevant},
744                RetType{SizeTy}, NoEvalCall)
745         .Case({
746             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
747         })
748         .ArgConstraint(NotNull(ArgNo(0)));
749   };
750   auto Getline = [&](RetType R, RangeInt Max) {
751     return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R},
752                    NoEvalCall)
753         .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})});
754   };
755 
756   // The isascii() family of functions.
757   // The behavior is undefined if the value of the argument is not
758   // representable as unsigned char or is not equal to EOF. See e.g. C99
759   // 7.4.1.2 The isalpha function (p: 181-182).
760   addToFunctionSummaryMap(
761       "isalnum",
762       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
763           // Boils down to isupper() or islower() or isdigit().
764           .Case({ArgumentCondition(0U, WithinRange,
765                                    {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}),
766                  ReturnValueCondition(OutOfRange, SingleValue(0))})
767           // The locale-specific range.
768           // No post-condition. We are completely unaware of
769           // locale-specific return values.
770           .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
771           .Case(
772               {ArgumentCondition(
773                    0U, OutOfRange,
774                    {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}),
775                ReturnValueCondition(WithinRange, SingleValue(0))})
776           .ArgConstraint(ArgumentCondition(
777               0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})));
778   addToFunctionSummaryMap(
779       "isalpha",
780       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
781           .Case({ArgumentCondition(0U, WithinRange, {{'A', 'Z'}, {'a', 'z'}}),
782                  ReturnValueCondition(OutOfRange, SingleValue(0))})
783           // The locale-specific range.
784           .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
785           .Case({ArgumentCondition(
786                      0U, OutOfRange,
787                      {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}),
788                  ReturnValueCondition(WithinRange, SingleValue(0))}));
789   addToFunctionSummaryMap(
790       "isascii",
791       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
792           .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
793                  ReturnValueCondition(OutOfRange, SingleValue(0))})
794           .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)),
795                  ReturnValueCondition(WithinRange, SingleValue(0))}));
796   addToFunctionSummaryMap(
797       "isblank",
798       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
799           .Case({ArgumentCondition(0U, WithinRange, {{'\t', '\t'}, {' ', ' '}}),
800                  ReturnValueCondition(OutOfRange, SingleValue(0))})
801           .Case({ArgumentCondition(0U, OutOfRange, {{'\t', '\t'}, {' ', ' '}}),
802                  ReturnValueCondition(WithinRange, SingleValue(0))}));
803   addToFunctionSummaryMap(
804       "iscntrl",
805       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
806           .Case({ArgumentCondition(0U, WithinRange, {{0, 32}, {127, 127}}),
807                  ReturnValueCondition(OutOfRange, SingleValue(0))})
808           .Case({ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}),
809                  ReturnValueCondition(WithinRange, SingleValue(0))}));
810   addToFunctionSummaryMap(
811       "isdigit",
812       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
813           .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')),
814                  ReturnValueCondition(OutOfRange, SingleValue(0))})
815           .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')),
816                  ReturnValueCondition(WithinRange, SingleValue(0))}));
817   addToFunctionSummaryMap(
818       "isgraph",
819       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
820           .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)),
821                  ReturnValueCondition(OutOfRange, SingleValue(0))})
822           .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)),
823                  ReturnValueCondition(WithinRange, SingleValue(0))}));
824   addToFunctionSummaryMap(
825       "islower",
826       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
827           // Is certainly lowercase.
828           .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')),
829                  ReturnValueCondition(OutOfRange, SingleValue(0))})
830           // Is ascii but not lowercase.
831           .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
832                  ArgumentCondition(0U, OutOfRange, Range('a', 'z')),
833                  ReturnValueCondition(WithinRange, SingleValue(0))})
834           // The locale-specific range.
835           .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
836           // Is not an unsigned char.
837           .Case({ArgumentCondition(0U, OutOfRange, Range(0, UCharRangeMax)),
838                  ReturnValueCondition(WithinRange, SingleValue(0))}));
839   addToFunctionSummaryMap(
840       "isprint",
841       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
842           .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)),
843                  ReturnValueCondition(OutOfRange, SingleValue(0))})
844           .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)),
845                  ReturnValueCondition(WithinRange, SingleValue(0))}));
846   addToFunctionSummaryMap(
847       "ispunct",
848       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
849           .Case({ArgumentCondition(
850                      0U, WithinRange,
851                      {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
852                  ReturnValueCondition(OutOfRange, SingleValue(0))})
853           .Case({ArgumentCondition(
854                      0U, OutOfRange,
855                      {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
856                  ReturnValueCondition(WithinRange, SingleValue(0))}));
857   addToFunctionSummaryMap(
858       "isspace",
859       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
860           // Space, '\f', '\n', '\r', '\t', '\v'.
861           .Case({ArgumentCondition(0U, WithinRange, {{9, 13}, {' ', ' '}}),
862                  ReturnValueCondition(OutOfRange, SingleValue(0))})
863           // The locale-specific range.
864           .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
865           .Case({ArgumentCondition(0U, OutOfRange,
866                                    {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}),
867                  ReturnValueCondition(WithinRange, SingleValue(0))}));
868   addToFunctionSummaryMap(
869       "isupper",
870       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
871           // Is certainly uppercase.
872           .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')),
873                  ReturnValueCondition(OutOfRange, SingleValue(0))})
874           // The locale-specific range.
875           .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
876           // Other.
877           .Case({ArgumentCondition(0U, OutOfRange,
878                                    {{'A', 'Z'}, {128, UCharRangeMax}}),
879                  ReturnValueCondition(WithinRange, SingleValue(0))}));
880   addToFunctionSummaryMap(
881       "isxdigit",
882       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
883           .Case({ArgumentCondition(0U, WithinRange,
884                                    {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
885                  ReturnValueCondition(OutOfRange, SingleValue(0))})
886           .Case({ArgumentCondition(0U, OutOfRange,
887                                    {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
888                  ReturnValueCondition(WithinRange, SingleValue(0))}));
889 
890   // The getc() family of functions that returns either a char or an EOF.
891   addToFunctionSummaryMap("getc", Getc());
892   addToFunctionSummaryMap("fgetc", Getc());
893   addToFunctionSummaryMap(
894       "getchar", Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall)
895                      .Case({ReturnValueCondition(
896                          WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})}));
897 
898   // read()-like functions that never return more than buffer size.
899   // We are not sure how ssize_t is defined on every platform, so we
900   // provide three variants that should cover common cases.
901   addToFunctionSummaryMap("read", {Read(IntTy, IntMax), Read(LongTy, LongMax),
902                                    Read(LongLongTy, LongLongMax)});
903   addToFunctionSummaryMap("write", {Read(IntTy, IntMax), Read(LongTy, LongMax),
904                                     Read(LongLongTy, LongLongMax)});
905   addToFunctionSummaryMap("fread", Fread());
906   addToFunctionSummaryMap("fwrite", Fwrite());
907   // getline()-like functions either fail or read at least the delimiter.
908   addToFunctionSummaryMap("getline",
909                           {Getline(IntTy, IntMax), Getline(LongTy, LongMax),
910                            Getline(LongLongTy, LongLongMax)});
911   addToFunctionSummaryMap("getdelim",
912                           {Getline(IntTy, IntMax), Getline(LongTy, LongMax),
913                            Getline(LongLongTy, LongLongMax)});
914 
915   // Functions for testing.
916   if (ChecksEnabled[CK_StdCLibraryFunctionsTesterChecker]) {
917     addToFunctionSummaryMap(
918         "__two_constrained_args",
919         Summary(ArgTypes{IntTy, IntTy}, RetType{IntTy}, EvalCallAsPure)
920             .ArgConstraint(ArgumentCondition(0U, WithinRange, SingleValue(1)))
921             .ArgConstraint(ArgumentCondition(1U, WithinRange, SingleValue(1))));
922     addToFunctionSummaryMap(
923         "__arg_constrained_twice",
924         Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
925             .ArgConstraint(ArgumentCondition(0U, OutOfRange, SingleValue(1)))
926             .ArgConstraint(ArgumentCondition(0U, OutOfRange, SingleValue(2))));
927     addToFunctionSummaryMap(
928         "__defaultparam",
929         Summary(ArgTypes{Irrelevant, IntTy}, RetType{IntTy}, EvalCallAsPure)
930             .ArgConstraint(NotNull(ArgNo(0))));
931     addToFunctionSummaryMap("__variadic",
932                             Summary(ArgTypes{VoidPtrTy, ConstCharPtrTy},
933                                     RetType{IntTy}, EvalCallAsPure)
934                                 .ArgConstraint(NotNull(ArgNo(0)))
935                                 .ArgConstraint(NotNull(ArgNo(1))));
936   }
937 }
938 
939 void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) {
940   mgr.registerChecker<StdLibraryFunctionsChecker>();
941 }
942 
943 bool ento::shouldRegisterStdCLibraryFunctionsChecker(const CheckerManager &mgr) {
944   return true;
945 }
946 
947 #define REGISTER_CHECKER(name)                                                 \
948   void ento::register##name(CheckerManager &mgr) {                             \
949     StdLibraryFunctionsChecker *checker =                                      \
950         mgr.getChecker<StdLibraryFunctionsChecker>();                          \
951     checker->ChecksEnabled[StdLibraryFunctionsChecker::CK_##name] = true;      \
952     checker->CheckNames[StdLibraryFunctionsChecker::CK_##name] =               \
953         mgr.getCurrentCheckerName();                                           \
954   }                                                                            \
955                                                                                \
956   bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
957 
958 REGISTER_CHECKER(StdCLibraryFunctionArgsChecker)
959 REGISTER_CHECKER(StdCLibraryFunctionsTesterChecker)
960