xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp (revision ededa65d559dac04ae50d3a33610875bb8f1a85e)
1 //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker improves modeling of a few simple library functions.
10 //
11 // This checker provides a specification format - `Summary' - and
12 // contains descriptions of some library functions in this format. Each
13 // specification contains a list of branches for splitting the program state
14 // upon call, and range constraints on argument and return-value symbols that
15 // are satisfied on each branch. This spec can be expanded to include more
16 // items, like external effects of the function.
17 //
18 // The main difference between this approach and the body farms technique is
19 // in more explicit control over how many branches are produced. For example,
20 // consider standard C function `ispunct(int x)', which returns a non-zero value
21 // iff `x' is a punctuation character, that is, when `x' is in range
22 //   ['!', '/']   [':', '@']  U  ['[', '\`']  U  ['{', '~'].
23 // `Summary' provides only two branches for this function. However,
24 // any attempt to describe this range with if-statements in the body farm
25 // would result in many more branches. Because each branch needs to be analyzed
26 // independently, this significantly reduces performance. Additionally,
27 // once we consider a branch on which `x' is in range, say, ['!', '/'],
28 // we assume that such branch is an important separate path through the program,
29 // which may lead to false positives because considering this particular path
30 // was not consciously intended, and therefore it might have been unreachable.
31 //
32 // This checker uses eval::Call for modeling pure functions (functions without
33 // side effets), for which their `Summary' is a precise model. This avoids
34 // unnecessary invalidation passes. Conflicts with other checkers are unlikely
35 // because if the function has no other effects, other checkers would probably
36 // never want to improve upon the modeling done by this checker.
37 //
38 // Non-pure functions, for which only partial improvement over the default
39 // behavior is expected, are modeled via check::PostCall, non-intrusively.
40 //
41 // The following standard C functions are currently supported:
42 //
43 //   fgetc      getline   isdigit   isupper
44 //   fread      isalnum   isgraph   isxdigit
45 //   fwrite     isalpha   islower   read
46 //   getc       isascii   isprint   write
47 //   getchar    isblank   ispunct
48 //   getdelim   iscntrl   isspace
49 //
50 //===----------------------------------------------------------------------===//
51 
52 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
53 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
54 #include "clang/StaticAnalyzer/Core/Checker.h"
55 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
56 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
57 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
58 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
59 
60 using namespace clang;
61 using namespace clang::ento;
62 
63 namespace {
64 class StdLibraryFunctionsChecker
65     : public Checker<check::PreCall, check::PostCall, eval::Call> {
66   /// Below is a series of typedefs necessary to define function specs.
67   /// We avoid nesting types here because each additional qualifier
68   /// would need to be repeated in every function spec.
69   struct Summary;
70 
71   /// Specify how much the analyzer engine should entrust modeling this function
72   /// to us. If he doesn't, he performs additional invalidations.
73   enum InvalidationKind { NoEvalCall, EvalCallAsPure };
74 
75   // The universal integral type to use in value range descriptions.
76   // Unsigned to make sure overflows are well-defined.
77   typedef uint64_t RangeInt;
78 
79   /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is
80   /// a non-negative integer, which less than 5 and not equal to 2. For
81   /// `ComparesToArgument', holds information about how exactly to compare to
82   /// the argument.
83   typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector;
84 
85   /// A reference to an argument or return value by its number.
86   /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but
87   /// obviously uint32_t should be enough for all practical purposes.
88   typedef uint32_t ArgNo;
89   static const ArgNo Ret;
90 
91   class ValueConstraint;
92 
93   // Pointer to the ValueConstraint. We need a copyable, polymorphic and
94   // default initialize able type (vector needs that). A raw pointer was good,
95   // however, we cannot default initialize that. unique_ptr makes the Summary
96   // class non-copyable, therefore not an option. Releasing the copyability
97   // requirement would render the initialization of the Summary map infeasible.
98   using ValueConstraintPtr = std::shared_ptr<ValueConstraint>;
99 
100   /// Polymorphic base class that represents a constraint on a given argument
101   /// (or return value) of a function. Derived classes implement different kind
102   /// of constraints, e.g range constraints or correlation between two
103   /// arguments.
104   class ValueConstraint {
105   public:
106     ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {}
107     virtual ~ValueConstraint() {}
108     /// Apply the effects of the constraint on the given program state. If null
109     /// is returned then the constraint is not feasible.
110     virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
111                                   const Summary &Summary) const = 0;
112     virtual ValueConstraintPtr negate() const {
113       llvm_unreachable("Not implemented");
114     };
115     ArgNo getArgNo() const { return ArgN; }
116 
117   protected:
118     ArgNo ArgN; // Argument to which we apply the constraint.
119   };
120 
121   /// Given a range, should the argument stay inside or outside this range?
122   enum RangeKind { OutOfRange, WithinRange };
123 
124   /// Encapsulates a single range on a single symbol within a branch.
125   class RangeConstraint : public ValueConstraint {
126     RangeKind Kind;      // Kind of range definition.
127     IntRangeVector Args; // Polymorphic arguments.
128 
129   public:
130     RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args)
131         : ValueConstraint(ArgN), Kind(Kind), Args(Args) {}
132 
133     const IntRangeVector &getRanges() const {
134       return Args;
135     }
136 
137   private:
138     ProgramStateRef applyAsOutOfRange(ProgramStateRef State,
139                                       const CallEvent &Call,
140                                       const Summary &Summary) const;
141     ProgramStateRef applyAsWithinRange(ProgramStateRef State,
142                                        const CallEvent &Call,
143                                        const Summary &Summary) const;
144   public:
145     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
146                           const Summary &Summary) const override {
147       switch (Kind) {
148       case OutOfRange:
149         return applyAsOutOfRange(State, Call, Summary);
150       case WithinRange:
151         return applyAsWithinRange(State, Call, Summary);
152       }
153       llvm_unreachable("Unknown range kind!");
154     }
155 
156     ValueConstraintPtr negate() const override {
157       RangeConstraint Tmp(*this);
158       switch (Kind) {
159       case OutOfRange:
160         Tmp.Kind = WithinRange;
161         break;
162       case WithinRange:
163         Tmp.Kind = OutOfRange;
164         break;
165       default:
166         llvm_unreachable("Unknown RangeConstraint kind!");
167       }
168       return std::make_shared<RangeConstraint>(Tmp);
169     }
170   };
171 
172   class ComparisonConstraint : public ValueConstraint {
173     BinaryOperator::Opcode Opcode;
174     ArgNo OtherArgN;
175 
176   public:
177     ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode,
178                          ArgNo OtherArgN)
179         : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {}
180     ArgNo getOtherArgNo() const { return OtherArgN; }
181     BinaryOperator::Opcode getOpcode() const { return Opcode; }
182     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
183                           const Summary &Summary) const override;
184   };
185 
186   class NotNullConstraint : public ValueConstraint {
187     using ValueConstraint::ValueConstraint;
188     // This variable has a role when we negate the constraint.
189     bool CannotBeNull = true;
190 
191   public:
192     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
193                           const Summary &Summary) const override {
194       SVal V = getArgSVal(Call, getArgNo());
195       DefinedOrUnknownSVal L = V.castAs<DefinedOrUnknownSVal>();
196       if (!L.getAs<Loc>())
197         return State;
198 
199       return State->assume(L, CannotBeNull);
200     }
201 
202     ValueConstraintPtr negate() const override {
203       NotNullConstraint Tmp(*this);
204       Tmp.CannotBeNull = !this->CannotBeNull;
205       return std::make_shared<NotNullConstraint>(Tmp);
206     }
207   };
208 
209   /// The complete list of constraints that defines a single branch.
210   typedef std::vector<ValueConstraintPtr> ConstraintSet;
211 
212   using ArgTypes = std::vector<QualType>;
213   using Cases = std::vector<ConstraintSet>;
214 
215   /// Includes information about
216   ///   * function prototype (which is necessary to
217   ///     ensure we're modeling the right function and casting values properly),
218   ///   * approach to invalidation,
219   ///   * a list of branches - a list of list of ranges -
220   ///     A branch represents a path in the exploded graph of a function (which
221   ///     is a tree). So, a branch is a series of assumptions. In other words,
222   ///     branches represent split states and additional assumptions on top of
223   ///     the splitting assumption.
224   ///     For example, consider the branches in `isalpha(x)`
225   ///       Branch 1)
226   ///         x is in range ['A', 'Z'] or in ['a', 'z']
227   ///         then the return value is not 0. (I.e. out-of-range [0, 0])
228   ///       Branch 2)
229   ///         x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z']
230   ///         then the return value is 0.
231   ///   * a list of argument constraints, that must be true on every branch.
232   ///     If these constraints are not satisfied that means a fatal error
233   ///     usually resulting in undefined behaviour.
234   struct Summary {
235     const ArgTypes ArgTys;
236     const QualType RetTy;
237     const InvalidationKind InvalidationKd;
238     Cases CaseConstraints;
239     ConstraintSet ArgConstraints;
240 
241     Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd)
242         : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {}
243 
244     Summary &Case(ConstraintSet&& CS) {
245       CaseConstraints.push_back(std::move(CS));
246       return *this;
247     }
248     Summary &ArgConstraint(ValueConstraintPtr VC) {
249       ArgConstraints.push_back(VC);
250       return *this;
251     }
252 
253   private:
254     static void assertTypeSuitableForSummary(QualType T) {
255       assert(!T->isVoidType() &&
256              "We should have had no significant void types in the spec");
257       assert(T.isCanonical() &&
258              "We should only have canonical types in the spec");
259     }
260 
261   public:
262     QualType getArgType(ArgNo ArgN) const {
263       QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN];
264       assertTypeSuitableForSummary(T);
265       return T;
266     }
267 
268     /// Try our best to figure out if the call expression is the call of
269     /// *the* library function to which this specification applies.
270     bool matchesCall(const CallExpr *CE) const;
271   };
272 
273   // The same function (as in, function identifier) may have different
274   // summaries assigned to it, with different argument and return value types.
275   // We call these "variants" of the function. This can be useful for handling
276   // C++ function overloads, and also it can be used when the same function
277   // may have different definitions on different platforms.
278   typedef std::vector<Summary> Summaries;
279 
280   // The map of all functions supported by the checker. It is initialized
281   // lazily, and it doesn't change after initialization.
282   mutable llvm::StringMap<Summaries> FunctionSummaryMap;
283 
284   mutable std::unique_ptr<BugType> BT_InvalidArg;
285 
286   // Auxiliary functions to support ArgNo within all structures
287   // in a unified manner.
288   static QualType getArgType(const Summary &Summary, ArgNo ArgN) {
289     return Summary.getArgType(ArgN);
290   }
291   static QualType getArgType(const CallEvent &Call, ArgNo ArgN) {
292     return ArgN == Ret ? Call.getResultType().getCanonicalType()
293                        : Call.getArgExpr(ArgN)->getType().getCanonicalType();
294   }
295   static QualType getArgType(const CallExpr *CE, ArgNo ArgN) {
296     return ArgN == Ret ? CE->getType().getCanonicalType()
297                        : CE->getArg(ArgN)->getType().getCanonicalType();
298   }
299   static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) {
300     return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN);
301   }
302 
303 public:
304   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
305   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
306   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
307 
308   enum CheckKind { CK_StdCLibraryFunctionArgsChecker, CK_NumCheckKinds };
309   DefaultBool ChecksEnabled[CK_NumCheckKinds];
310   CheckerNameRef CheckNames[CK_NumCheckKinds];
311 
312 private:
313   Optional<Summary> findFunctionSummary(const FunctionDecl *FD,
314                                         const CallExpr *CE,
315                                         CheckerContext &C) const;
316   Optional<Summary> findFunctionSummary(const CallEvent &Call,
317                                         CheckerContext &C) const;
318 
319   void initFunctionSummaries(CheckerContext &C) const;
320 
321   void reportBug(const CallEvent &Call, ExplodedNode *N,
322                  CheckerContext &C) const {
323     if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker])
324       return;
325     // TODO Add detailed diagnostic.
326     StringRef Msg = "Function argument constraint is not satisfied";
327     if (!BT_InvalidArg)
328       BT_InvalidArg = std::make_unique<BugType>(
329           CheckNames[CK_StdCLibraryFunctionArgsChecker],
330           "Unsatisfied argument constraints", categories::LogicError);
331     auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N);
332     bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R);
333     C.emitReport(std::move(R));
334   }
335 };
336 
337 const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret =
338     std::numeric_limits<ArgNo>::max();
339 
340 } // end of anonymous namespace
341 
342 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange(
343     ProgramStateRef State, const CallEvent &Call,
344     const Summary &Summary) const {
345 
346   ProgramStateManager &Mgr = State->getStateManager();
347   SValBuilder &SVB = Mgr.getSValBuilder();
348   BasicValueFactory &BVF = SVB.getBasicValueFactory();
349   ConstraintManager &CM = Mgr.getConstraintManager();
350   QualType T = getArgType(Summary, getArgNo());
351   SVal V = getArgSVal(Call, getArgNo());
352 
353   if (auto N = V.getAs<NonLoc>()) {
354     const IntRangeVector &R = getRanges();
355     size_t E = R.size();
356     for (size_t I = 0; I != E; ++I) {
357       const llvm::APSInt &Min = BVF.getValue(R[I].first, T);
358       const llvm::APSInt &Max = BVF.getValue(R[I].second, T);
359       assert(Min <= Max);
360       State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
361       if (!State)
362         break;
363     }
364   }
365 
366   return State;
367 }
368 
369 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange(
370     ProgramStateRef State, const CallEvent &Call,
371     const Summary &Summary) const {
372 
373   ProgramStateManager &Mgr = State->getStateManager();
374   SValBuilder &SVB = Mgr.getSValBuilder();
375   BasicValueFactory &BVF = SVB.getBasicValueFactory();
376   ConstraintManager &CM = Mgr.getConstraintManager();
377   QualType T = getArgType(Summary, getArgNo());
378   SVal V = getArgSVal(Call, getArgNo());
379 
380   // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R".
381   // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary,
382   // and then cut away all holes in R one by one.
383   //
384   // E.g. consider a range list R as [A, B] and [C, D]
385   // -------+--------+------------------+------------+----------->
386   //        A        B                  C            D
387   // Then we assume that the value is not in [-inf, A - 1],
388   // then not in [D + 1, +inf], then not in [B + 1, C - 1]
389   if (auto N = V.getAs<NonLoc>()) {
390     const IntRangeVector &R = getRanges();
391     size_t E = R.size();
392 
393     const llvm::APSInt &MinusInf = BVF.getMinValue(T);
394     const llvm::APSInt &PlusInf = BVF.getMaxValue(T);
395 
396     const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T);
397     if (Left != PlusInf) {
398       assert(MinusInf <= Left);
399       State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false);
400       if (!State)
401         return nullptr;
402     }
403 
404     const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T);
405     if (Right != MinusInf) {
406       assert(Right <= PlusInf);
407       State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false);
408       if (!State)
409         return nullptr;
410     }
411 
412     for (size_t I = 1; I != E; ++I) {
413       const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T);
414       const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T);
415       if (Min <= Max) {
416         State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
417         if (!State)
418           return nullptr;
419       }
420     }
421   }
422 
423   return State;
424 }
425 
426 ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply(
427     ProgramStateRef State, const CallEvent &Call,
428     const Summary &Summary) const {
429 
430   ProgramStateManager &Mgr = State->getStateManager();
431   SValBuilder &SVB = Mgr.getSValBuilder();
432   QualType CondT = SVB.getConditionType();
433   QualType T = getArgType(Summary, getArgNo());
434   SVal V = getArgSVal(Call, getArgNo());
435 
436   BinaryOperator::Opcode Op = getOpcode();
437   ArgNo OtherArg = getOtherArgNo();
438   SVal OtherV = getArgSVal(Call, OtherArg);
439   QualType OtherT = getArgType(Call, OtherArg);
440   // Note: we avoid integral promotion for comparison.
441   OtherV = SVB.evalCast(OtherV, T, OtherT);
442   if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT)
443                        .getAs<DefinedOrUnknownSVal>())
444     State = State->assume(*CompV, true);
445   return State;
446 }
447 
448 void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call,
449                                               CheckerContext &C) const {
450   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
451   if (!FoundSummary)
452     return;
453 
454   const Summary &Summary = *FoundSummary;
455   ProgramStateRef State = C.getState();
456 
457   for (const ValueConstraintPtr& VC : Summary.ArgConstraints) {
458     ProgramStateRef SuccessSt = VC->apply(State, Call, Summary);
459     ProgramStateRef FailureSt = VC->negate()->apply(State, Call, Summary);
460     // The argument constraint is not satisfied.
461     if (FailureSt && !SuccessSt) {
462       if (ExplodedNode *N = C.generateErrorNode(State))
463         reportBug(Call, N, C);
464       break;
465     } else {
466       // Apply the constraint even if we cannot reason about the argument. This
467       // means both SuccessSt and FailureSt can be true. If we weren't applying
468       // the constraint that would mean that symbolic execution continues on a
469       // code whose behaviour is undefined.
470       assert(SuccessSt);
471       C.addTransition(SuccessSt);
472     }
473   }
474 }
475 
476 void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call,
477                                                CheckerContext &C) const {
478   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
479   if (!FoundSummary)
480     return;
481 
482   // Now apply the constraints.
483   const Summary &Summary = *FoundSummary;
484   ProgramStateRef State = C.getState();
485 
486   // Apply case/branch specifications.
487   for (const auto &VRS : Summary.CaseConstraints) {
488     ProgramStateRef NewState = State;
489     for (const auto &VR: VRS) {
490       NewState = VR->apply(NewState, Call, Summary);
491       if (!NewState)
492         break;
493     }
494 
495     if (NewState && NewState != State)
496       C.addTransition(NewState);
497   }
498 }
499 
500 bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call,
501                                           CheckerContext &C) const {
502   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
503   if (!FoundSummary)
504     return false;
505 
506   const Summary &Summary = *FoundSummary;
507   switch (Summary.InvalidationKd) {
508   case EvalCallAsPure: {
509     ProgramStateRef State = C.getState();
510     const LocationContext *LC = C.getLocationContext();
511     const auto *CE = cast_or_null<CallExpr>(Call.getOriginExpr());
512     SVal V = C.getSValBuilder().conjureSymbolVal(
513         CE, LC, CE->getType().getCanonicalType(), C.blockCount());
514     State = State->BindExpr(CE, LC, V);
515     C.addTransition(State);
516     return true;
517   }
518   case NoEvalCall:
519     // Summary tells us to avoid performing eval::Call. The function is possibly
520     // evaluated by another checker, or evaluated conservatively.
521     return false;
522   }
523   llvm_unreachable("Unknown invalidation kind!");
524 }
525 
526 bool StdLibraryFunctionsChecker::Summary::matchesCall(
527     const CallExpr *CE) const {
528   // Check number of arguments:
529   if (CE->getNumArgs() != ArgTys.size())
530     return false;
531 
532   // Check return type if relevant:
533   if (!RetTy.isNull() && RetTy != CE->getType().getCanonicalType())
534     return false;
535 
536   // Check argument types when relevant:
537   for (size_t I = 0, E = ArgTys.size(); I != E; ++I) {
538     QualType FormalT = ArgTys[I];
539     // Null type marks irrelevant arguments.
540     if (FormalT.isNull())
541       continue;
542 
543     assertTypeSuitableForSummary(FormalT);
544 
545     QualType ActualT = StdLibraryFunctionsChecker::getArgType(CE, I);
546     assert(ActualT.isCanonical());
547     if (ActualT != FormalT)
548       return false;
549   }
550 
551   return true;
552 }
553 
554 Optional<StdLibraryFunctionsChecker::Summary>
555 StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD,
556                                                 const CallExpr *CE,
557                                                 CheckerContext &C) const {
558   // Note: we cannot always obtain FD from CE
559   // (eg. virtual call, or call by pointer).
560   assert(CE);
561 
562   if (!FD)
563     return None;
564 
565   initFunctionSummaries(C);
566 
567   IdentifierInfo *II = FD->getIdentifier();
568   if (!II)
569     return None;
570   StringRef Name = II->getName();
571   if (Name.empty() || !C.isCLibraryFunction(FD, Name))
572     return None;
573 
574   auto FSMI = FunctionSummaryMap.find(Name);
575   if (FSMI == FunctionSummaryMap.end())
576     return None;
577 
578   // Verify that function signature matches the spec in advance.
579   // Otherwise we might be modeling the wrong function.
580   // Strict checking is important because we will be conducting
581   // very integral-type-sensitive operations on arguments and
582   // return values.
583   const Summaries &SpecVariants = FSMI->second;
584   for (const Summary &Spec : SpecVariants)
585     if (Spec.matchesCall(CE))
586       return Spec;
587 
588   return None;
589 }
590 
591 Optional<StdLibraryFunctionsChecker::Summary>
592 StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call,
593                                                 CheckerContext &C) const {
594   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
595   if (!FD)
596     return None;
597   const CallExpr *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
598   if (!CE)
599     return None;
600   return findFunctionSummary(FD, CE, C);
601 }
602 
603 void StdLibraryFunctionsChecker::initFunctionSummaries(
604     CheckerContext &C) const {
605   if (!FunctionSummaryMap.empty())
606     return;
607 
608   SValBuilder &SVB = C.getSValBuilder();
609   BasicValueFactory &BVF = SVB.getBasicValueFactory();
610   const ASTContext &ACtx = BVF.getContext();
611 
612   // These types are useful for writing specifications quickly,
613   // New specifications should probably introduce more types.
614   // Some types are hard to obtain from the AST, eg. "ssize_t".
615   // In such cases it should be possible to provide multiple variants
616   // of function summary for common cases (eg. ssize_t could be int or long
617   // or long long, so three summary variants would be enough).
618   // Of course, function variants are also useful for C++ overloads.
619   const QualType
620       Irrelevant{}; // A placeholder, whenever we do not care about the type.
621   const QualType IntTy = ACtx.IntTy;
622   const QualType LongTy = ACtx.LongTy;
623   const QualType LongLongTy = ACtx.LongLongTy;
624   const QualType SizeTy = ACtx.getSizeType();
625   const QualType VoidPtrTy = ACtx.VoidPtrTy; // void *T
626   const QualType ConstVoidPtrTy =
627       ACtx.getPointerType(ACtx.VoidTy.withConst()); // const void *T
628 
629   const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue();
630   const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue();
631   const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue();
632 
633   // Set UCharRangeMax to min of int or uchar maximum value.
634   // The C standard states that the arguments of functions like isalpha must
635   // be representable as an unsigned char. Their type is 'int', so the max
636   // value of the argument should be min(UCharMax, IntMax). This just happen
637   // to be true for commonly used and well tested instruction set
638   // architectures, but not for others.
639   const RangeInt UCharRangeMax =
640       std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax);
641 
642   // The platform dependent value of EOF.
643   // Try our best to parse this from the Preprocessor, otherwise fallback to -1.
644   const auto EOFv = [&C]() -> RangeInt {
645     if (const llvm::Optional<int> OptInt =
646             tryExpandAsInteger("EOF", C.getPreprocessor()))
647       return *OptInt;
648     return -1;
649   }();
650 
651   // We are finally ready to define specifications for all supported functions.
652   //
653   // The signature needs to have the correct number of arguments.
654   // However, we insert `Irrelevant' when the type is insignificant.
655   //
656   // Argument ranges should always cover all variants. If return value
657   // is completely unknown, omit it from the respective range set.
658   //
659   // All types in the spec need to be canonical.
660   //
661   // Every item in the list of range sets represents a particular
662   // execution path the analyzer would need to explore once
663   // the call is modeled - a new program state is constructed
664   // for every range set, and each range line in the range set
665   // corresponds to a specific constraint within this state.
666   //
667   // Upon comparing to another argument, the other argument is casted
668   // to the current argument's type. This avoids proper promotion but
669   // seems useful. For example, read() receives size_t argument,
670   // and its return value, which is of type ssize_t, cannot be greater
671   // than this argument. If we made a promotion, and the size argument
672   // is equal to, say, 10, then we'd impose a range of [0, 10] on the
673   // return value, however the correct range is [-1, 10].
674   //
675   // Please update the list of functions in the header after editing!
676   //
677 
678   // Below are helpers functions to create the summaries.
679   auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind,
680                               IntRangeVector Ranges) {
681     return std::make_shared<RangeConstraint>(ArgN, Kind, Ranges);
682   };
683   struct {
684     auto operator()(RangeKind Kind, IntRangeVector Ranges) {
685       return std::make_shared<RangeConstraint>(Ret, Kind, Ranges);
686     }
687     auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) {
688       return std::make_shared<ComparisonConstraint>(Ret, Op, OtherArgN);
689     }
690   } ReturnValueCondition;
691   auto Range = [](RangeInt b, RangeInt e) {
692     return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}};
693   };
694   auto SingleValue = [](RangeInt v) {
695     return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}};
696   };
697   auto LessThanOrEq = BO_LE;
698   auto NotNull = [&](ArgNo ArgN) {
699     return std::make_shared<NotNullConstraint>(ArgN);
700   };
701 
702   using RetType = QualType;
703   // Templates for summaries that are reused by many functions.
704   auto Getc = [&]() {
705     return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall)
706         .Case({ReturnValueCondition(WithinRange,
707                                     {{EOFv, EOFv}, {0, UCharRangeMax}})});
708   };
709   auto Read = [&](RetType R, RangeInt Max) {
710     return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R},
711                    NoEvalCall)
712         .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)),
713                ReturnValueCondition(WithinRange, Range(-1, Max))});
714   };
715   auto Fread = [&]() {
716     return Summary(ArgTypes{VoidPtrTy, Irrelevant, SizeTy, Irrelevant},
717                    RetType{SizeTy}, NoEvalCall)
718         .Case({
719             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
720         })
721         .ArgConstraint(NotNull(ArgNo(0)));
722   };
723   auto Fwrite = [&]() {
724     return Summary(ArgTypes{ConstVoidPtrTy, Irrelevant, SizeTy, Irrelevant},
725                    RetType{SizeTy}, NoEvalCall)
726         .Case({
727             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
728         })
729         .ArgConstraint(NotNull(ArgNo(0)));
730   };
731   auto Getline = [&](RetType R, RangeInt Max) {
732     return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R},
733                    NoEvalCall)
734         .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})});
735   };
736 
737   FunctionSummaryMap = {
738       // The isascii() family of functions.
739       // The behavior is undefined if the value of the argument is not
740       // representable as unsigned char or is not equal to EOF. See e.g. C99
741       // 7.4.1.2 The isalpha function (p: 181-182).
742       {
743           "isalnum",
744           Summaries{
745               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
746                   // Boils down to isupper() or islower() or isdigit().
747                   .Case(
748                       {ArgumentCondition(0U, WithinRange,
749                                          {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}),
750                        ReturnValueCondition(OutOfRange, SingleValue(0))})
751                   // The locale-specific range.
752                   // No post-condition. We are completely unaware of
753                   // locale-specific return values.
754                   .Case({ArgumentCondition(0U, WithinRange,
755                                            {{128, UCharRangeMax}})})
756                   .Case({ArgumentCondition(0U, OutOfRange,
757                                            {{'0', '9'},
758                                             {'A', 'Z'},
759                                             {'a', 'z'},
760                                             {128, UCharRangeMax}}),
761                          ReturnValueCondition(WithinRange, SingleValue(0))})
762                   .ArgConstraint(ArgumentCondition(
763                       0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))},
764       },
765       {
766           "isalpha",
767           Summaries{
768               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
769                   .Case({ArgumentCondition(0U, WithinRange,
770                                            {{'A', 'Z'}, {'a', 'z'}}),
771                          ReturnValueCondition(OutOfRange, SingleValue(0))})
772                   // The locale-specific range.
773                   .Case({ArgumentCondition(0U, WithinRange,
774                                            {{128, UCharRangeMax}})})
775                   .Case({ArgumentCondition(
776                              0U, OutOfRange,
777                              {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}),
778                          ReturnValueCondition(WithinRange, SingleValue(0))})},
779       },
780       {
781           "isascii",
782           Summaries{
783               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
784                   .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
785                          ReturnValueCondition(OutOfRange, SingleValue(0))})
786                   .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)),
787                          ReturnValueCondition(WithinRange, SingleValue(0))})},
788       },
789       {
790           "isblank",
791           Summaries{
792               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
793                   .Case({ArgumentCondition(0U, WithinRange,
794                                            {{'\t', '\t'}, {' ', ' '}}),
795                          ReturnValueCondition(OutOfRange, SingleValue(0))})
796                   .Case({ArgumentCondition(0U, OutOfRange,
797                                            {{'\t', '\t'}, {' ', ' '}}),
798                          ReturnValueCondition(WithinRange, SingleValue(0))})},
799       },
800       {
801           "iscntrl",
802           Summaries{
803               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
804                   .Case({ArgumentCondition(0U, WithinRange,
805                                            {{0, 32}, {127, 127}}),
806                          ReturnValueCondition(OutOfRange, SingleValue(0))})
807                   .Case(
808                       {ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}),
809                        ReturnValueCondition(WithinRange, SingleValue(0))})},
810       },
811       {
812           "isdigit",
813           Summaries{
814               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
815                   .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')),
816                          ReturnValueCondition(OutOfRange, SingleValue(0))})
817                   .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')),
818                          ReturnValueCondition(WithinRange, SingleValue(0))})},
819       },
820       {
821           "isgraph",
822           Summaries{
823               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
824                   .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)),
825                          ReturnValueCondition(OutOfRange, SingleValue(0))})
826                   .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)),
827                          ReturnValueCondition(WithinRange, SingleValue(0))})},
828       },
829       {
830           "islower",
831           Summaries{
832               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
833                   // Is certainly lowercase.
834                   .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')),
835                          ReturnValueCondition(OutOfRange, SingleValue(0))})
836                   // Is ascii but not lowercase.
837                   .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
838                          ArgumentCondition(0U, OutOfRange, Range('a', 'z')),
839                          ReturnValueCondition(WithinRange, SingleValue(0))})
840                   // The locale-specific range.
841                   .Case({ArgumentCondition(0U, WithinRange,
842                                            {{128, UCharRangeMax}})})
843                   // Is not an unsigned char.
844                   .Case({ArgumentCondition(0U, OutOfRange,
845                                            Range(0, UCharRangeMax)),
846                          ReturnValueCondition(WithinRange, SingleValue(0))})},
847       },
848       {
849           "isprint",
850           Summaries{
851               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
852                   .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)),
853                          ReturnValueCondition(OutOfRange, SingleValue(0))})
854                   .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)),
855                          ReturnValueCondition(WithinRange, SingleValue(0))})},
856       },
857       {
858           "ispunct",
859           Summaries{
860               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
861                   .Case({ArgumentCondition(
862                              0U, WithinRange,
863                              {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
864                          ReturnValueCondition(OutOfRange, SingleValue(0))})
865                   .Case({ArgumentCondition(
866                              0U, OutOfRange,
867                              {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
868                          ReturnValueCondition(WithinRange, SingleValue(0))})},
869       },
870       {
871           "isspace",
872           Summaries{
873               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
874                   // Space, '\f', '\n', '\r', '\t', '\v'.
875                   .Case({ArgumentCondition(0U, WithinRange,
876                                            {{9, 13}, {' ', ' '}}),
877                          ReturnValueCondition(OutOfRange, SingleValue(0))})
878                   // The locale-specific range.
879                   .Case({ArgumentCondition(0U, WithinRange,
880                                            {{128, UCharRangeMax}})})
881                   .Case({ArgumentCondition(
882                              0U, OutOfRange,
883                              {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}),
884                          ReturnValueCondition(WithinRange, SingleValue(0))})},
885       },
886       {
887           "isupper",
888           Summaries{
889               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
890                   // Is certainly uppercase.
891                   .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')),
892                          ReturnValueCondition(OutOfRange, SingleValue(0))})
893                   // The locale-specific range.
894                   .Case({ArgumentCondition(0U, WithinRange,
895                                            {{128, UCharRangeMax}})})
896                   // Other.
897                   .Case({ArgumentCondition(0U, OutOfRange,
898                                            {{'A', 'Z'}, {128, UCharRangeMax}}),
899                          ReturnValueCondition(WithinRange, SingleValue(0))})},
900       },
901       {
902           "isxdigit",
903           Summaries{
904               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
905                   .Case(
906                       {ArgumentCondition(0U, WithinRange,
907                                          {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
908                        ReturnValueCondition(OutOfRange, SingleValue(0))})
909                   .Case(
910                       {ArgumentCondition(0U, OutOfRange,
911                                          {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
912                        ReturnValueCondition(WithinRange, SingleValue(0))})},
913       },
914 
915       // The getc() family of functions that returns either a char or an EOF.
916       {"getc", Summaries{Getc()}},
917       {"fgetc", Summaries{Getc()}},
918       {"getchar",
919        Summaries{Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall)
920                      .Case({ReturnValueCondition(
921                          WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})}},
922 
923       // read()-like functions that never return more than buffer size.
924       // We are not sure how ssize_t is defined on every platform, so we
925       // provide three variants that should cover common cases.
926       {"read", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax),
927                          Read(LongLongTy, LongLongMax)}},
928       {"write", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax),
929                           Read(LongLongTy, LongLongMax)}},
930       {"fread", Summaries{Fread()}},
931       {"fwrite", Summaries{Fwrite()}},
932       // getline()-like functions either fail or read at least the delimiter.
933       {"getline", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax),
934                             Getline(LongLongTy, LongLongMax)}},
935       {"getdelim", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax),
936                              Getline(LongLongTy, LongLongMax)}},
937   };
938 }
939 
940 void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) {
941   mgr.registerChecker<StdLibraryFunctionsChecker>();
942 }
943 
944 bool ento::shouldRegisterStdCLibraryFunctionsChecker(const LangOptions &LO) {
945   return true;
946 }
947 
948 #define REGISTER_CHECKER(name)                                                 \
949   void ento::register##name(CheckerManager &mgr) {                             \
950     StdLibraryFunctionsChecker *checker =                                      \
951         mgr.getChecker<StdLibraryFunctionsChecker>();                          \
952     checker->ChecksEnabled[StdLibraryFunctionsChecker::CK_##name] = true;      \
953     checker->CheckNames[StdLibraryFunctionsChecker::CK_##name] =               \
954         mgr.getCurrentCheckerName();                                           \
955   }                                                                            \
956                                                                                \
957   bool ento::shouldRegister##name(const LangOptions &LO) { return true; }
958 
959 REGISTER_CHECKER(StdCLibraryFunctionArgsChecker)
960