xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp (revision ab1fad8a3a8b8e3264c34448205061add013b8d7)
1 //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker improves modeling of a few simple library functions.
10 //
11 // This checker provides a specification format - `Summary' - and
12 // contains descriptions of some library functions in this format. Each
13 // specification contains a list of branches for splitting the program state
14 // upon call, and range constraints on argument and return-value symbols that
15 // are satisfied on each branch. This spec can be expanded to include more
16 // items, like external effects of the function.
17 //
18 // The main difference between this approach and the body farms technique is
19 // in more explicit control over how many branches are produced. For example,
20 // consider standard C function `ispunct(int x)', which returns a non-zero value
21 // iff `x' is a punctuation character, that is, when `x' is in range
22 //   ['!', '/']   [':', '@']  U  ['[', '\`']  U  ['{', '~'].
23 // `Summary' provides only two branches for this function. However,
24 // any attempt to describe this range with if-statements in the body farm
25 // would result in many more branches. Because each branch needs to be analyzed
26 // independently, this significantly reduces performance. Additionally,
27 // once we consider a branch on which `x' is in range, say, ['!', '/'],
28 // we assume that such branch is an important separate path through the program,
29 // which may lead to false positives because considering this particular path
30 // was not consciously intended, and therefore it might have been unreachable.
31 //
32 // This checker uses eval::Call for modeling pure functions (functions without
33 // side effets), for which their `Summary' is a precise model. This avoids
34 // unnecessary invalidation passes. Conflicts with other checkers are unlikely
35 // because if the function has no other effects, other checkers would probably
36 // never want to improve upon the modeling done by this checker.
37 //
38 // Non-pure functions, for which only partial improvement over the default
39 // behavior is expected, are modeled via check::PostCall, non-intrusively.
40 //
41 // The following standard C functions are currently supported:
42 //
43 //   fgetc      getline   isdigit   isupper
44 //   fread      isalnum   isgraph   isxdigit
45 //   fwrite     isalpha   islower   read
46 //   getc       isascii   isprint   write
47 //   getchar    isblank   ispunct
48 //   getdelim   iscntrl   isspace
49 //
50 //===----------------------------------------------------------------------===//
51 
52 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
53 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
54 #include "clang/StaticAnalyzer/Core/Checker.h"
55 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
56 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
57 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
58 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
59 
60 using namespace clang;
61 using namespace clang::ento;
62 
63 namespace {
64 class StdLibraryFunctionsChecker
65     : public Checker<check::PreCall, check::PostCall, eval::Call> {
66   /// Below is a series of typedefs necessary to define function specs.
67   /// We avoid nesting types here because each additional qualifier
68   /// would need to be repeated in every function spec.
69   struct Summary;
70 
71   /// Specify how much the analyzer engine should entrust modeling this function
72   /// to us. If he doesn't, he performs additional invalidations.
73   enum InvalidationKind { NoEvalCall, EvalCallAsPure };
74 
75   // The universal integral type to use in value range descriptions.
76   // Unsigned to make sure overflows are well-defined.
77   typedef uint64_t RangeInt;
78 
79   /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is
80   /// a non-negative integer, which less than 5 and not equal to 2. For
81   /// `ComparesToArgument', holds information about how exactly to compare to
82   /// the argument.
83   typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector;
84 
85   /// A reference to an argument or return value by its number.
86   /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but
87   /// obviously uint32_t should be enough for all practical purposes.
88   typedef uint32_t ArgNo;
89   static const ArgNo Ret;
90 
91   class ValueConstraint;
92 
93   // Pointer to the ValueConstraint. We need a copyable, polymorphic and
94   // default initialize able type (vector needs that). A raw pointer was good,
95   // however, we cannot default initialize that. unique_ptr makes the Summary
96   // class non-copyable, therefore not an option. Releasing the copyability
97   // requirement would render the initialization of the Summary map infeasible.
98   using ValueConstraintPtr = std::shared_ptr<ValueConstraint>;
99 
100   /// Polymorphic base class that represents a constraint on a given argument
101   /// (or return value) of a function. Derived classes implement different kind
102   /// of constraints, e.g range constraints or correlation between two
103   /// arguments.
104   class ValueConstraint {
105   public:
106     ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {}
107     virtual ~ValueConstraint() {}
108     /// Apply the effects of the constraint on the given program state. If null
109     /// is returned then the constraint is not feasible.
110     virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
111                                   const Summary &Summary) const = 0;
112     virtual ValueConstraintPtr negate() const {
113       llvm_unreachable("Not implemented");
114     };
115     ArgNo getArgNo() const { return ArgN; }
116 
117   protected:
118     ArgNo ArgN; // Argument to which we apply the constraint.
119   };
120 
121   /// Given a range, should the argument stay inside or outside this range?
122   enum RangeKind { OutOfRange, WithinRange };
123 
124   /// Encapsulates a single range on a single symbol within a branch.
125   class RangeConstraint : public ValueConstraint {
126     RangeKind Kind;      // Kind of range definition.
127     IntRangeVector Args; // Polymorphic arguments.
128 
129   public:
130     RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args)
131         : ValueConstraint(ArgN), Kind(Kind), Args(Args) {}
132 
133     const IntRangeVector &getRanges() const {
134       return Args;
135     }
136 
137   private:
138     ProgramStateRef applyAsOutOfRange(ProgramStateRef State,
139                                       const CallEvent &Call,
140                                       const Summary &Summary) const;
141     ProgramStateRef applyAsWithinRange(ProgramStateRef State,
142                                        const CallEvent &Call,
143                                        const Summary &Summary) const;
144   public:
145     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
146                           const Summary &Summary) const override {
147       switch (Kind) {
148       case OutOfRange:
149         return applyAsOutOfRange(State, Call, Summary);
150       case WithinRange:
151         return applyAsWithinRange(State, Call, Summary);
152       }
153       llvm_unreachable("Unknown range kind!");
154     }
155 
156     ValueConstraintPtr negate() const override {
157       RangeConstraint Tmp(*this);
158       switch (Kind) {
159       case OutOfRange:
160         Tmp.Kind = WithinRange;
161         break;
162       case WithinRange:
163         Tmp.Kind = OutOfRange;
164         break;
165       }
166       return std::make_shared<RangeConstraint>(Tmp);
167     }
168   };
169 
170   class ComparisonConstraint : public ValueConstraint {
171     BinaryOperator::Opcode Opcode;
172     ArgNo OtherArgN;
173 
174   public:
175     ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode,
176                          ArgNo OtherArgN)
177         : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {}
178     ArgNo getOtherArgNo() const { return OtherArgN; }
179     BinaryOperator::Opcode getOpcode() const { return Opcode; }
180     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
181                           const Summary &Summary) const override;
182   };
183 
184   class NotNullConstraint : public ValueConstraint {
185     using ValueConstraint::ValueConstraint;
186     // This variable has a role when we negate the constraint.
187     bool CannotBeNull = true;
188 
189   public:
190     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
191                           const Summary &Summary) const override {
192       SVal V = getArgSVal(Call, getArgNo());
193       if (V.isUndef())
194         return State;
195 
196       DefinedOrUnknownSVal L = V.castAs<DefinedOrUnknownSVal>();
197       if (!L.getAs<Loc>())
198         return State;
199 
200       return State->assume(L, CannotBeNull);
201     }
202 
203     ValueConstraintPtr negate() const override {
204       NotNullConstraint Tmp(*this);
205       Tmp.CannotBeNull = !this->CannotBeNull;
206       return std::make_shared<NotNullConstraint>(Tmp);
207     }
208   };
209 
210   /// The complete list of constraints that defines a single branch.
211   typedef std::vector<ValueConstraintPtr> ConstraintSet;
212 
213   using ArgTypes = std::vector<QualType>;
214   using Cases = std::vector<ConstraintSet>;
215 
216   /// Includes information about
217   ///   * function prototype (which is necessary to
218   ///     ensure we're modeling the right function and casting values properly),
219   ///   * approach to invalidation,
220   ///   * a list of branches - a list of list of ranges -
221   ///     A branch represents a path in the exploded graph of a function (which
222   ///     is a tree). So, a branch is a series of assumptions. In other words,
223   ///     branches represent split states and additional assumptions on top of
224   ///     the splitting assumption.
225   ///     For example, consider the branches in `isalpha(x)`
226   ///       Branch 1)
227   ///         x is in range ['A', 'Z'] or in ['a', 'z']
228   ///         then the return value is not 0. (I.e. out-of-range [0, 0])
229   ///       Branch 2)
230   ///         x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z']
231   ///         then the return value is 0.
232   ///   * a list of argument constraints, that must be true on every branch.
233   ///     If these constraints are not satisfied that means a fatal error
234   ///     usually resulting in undefined behaviour.
235   struct Summary {
236     const ArgTypes ArgTys;
237     const QualType RetTy;
238     const InvalidationKind InvalidationKd;
239     Cases CaseConstraints;
240     ConstraintSet ArgConstraints;
241 
242     Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd)
243         : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {}
244 
245     Summary &Case(ConstraintSet&& CS) {
246       CaseConstraints.push_back(std::move(CS));
247       return *this;
248     }
249     Summary &ArgConstraint(ValueConstraintPtr VC) {
250       ArgConstraints.push_back(VC);
251       return *this;
252     }
253 
254   private:
255     static void assertTypeSuitableForSummary(QualType T) {
256       assert(!T->isVoidType() &&
257              "We should have had no significant void types in the spec");
258       assert(T.isCanonical() &&
259              "We should only have canonical types in the spec");
260     }
261 
262   public:
263     QualType getArgType(ArgNo ArgN) const {
264       QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN];
265       assertTypeSuitableForSummary(T);
266       return T;
267     }
268 
269     /// Try our best to figure out if the call expression is the call of
270     /// *the* library function to which this specification applies.
271     bool matchesCall(const CallExpr *CE) const;
272   };
273 
274   // The same function (as in, function identifier) may have different
275   // summaries assigned to it, with different argument and return value types.
276   // We call these "variants" of the function. This can be useful for handling
277   // C++ function overloads, and also it can be used when the same function
278   // may have different definitions on different platforms.
279   typedef std::vector<Summary> Summaries;
280 
281   // The map of all functions supported by the checker. It is initialized
282   // lazily, and it doesn't change after initialization.
283   mutable llvm::StringMap<Summaries> FunctionSummaryMap;
284 
285   mutable std::unique_ptr<BugType> BT_InvalidArg;
286 
287   // Auxiliary functions to support ArgNo within all structures
288   // in a unified manner.
289   static QualType getArgType(const Summary &Summary, ArgNo ArgN) {
290     return Summary.getArgType(ArgN);
291   }
292   static QualType getArgType(const CallEvent &Call, ArgNo ArgN) {
293     return ArgN == Ret ? Call.getResultType().getCanonicalType()
294                        : Call.getArgExpr(ArgN)->getType().getCanonicalType();
295   }
296   static QualType getArgType(const CallExpr *CE, ArgNo ArgN) {
297     return ArgN == Ret ? CE->getType().getCanonicalType()
298                        : CE->getArg(ArgN)->getType().getCanonicalType();
299   }
300   static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) {
301     return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN);
302   }
303 
304 public:
305   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
306   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
307   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
308 
309   enum CheckKind {
310     CK_StdCLibraryFunctionArgsChecker,
311     CK_StdCLibraryFunctionsTesterChecker,
312     CK_NumCheckKinds
313   };
314   DefaultBool ChecksEnabled[CK_NumCheckKinds];
315   CheckerNameRef CheckNames[CK_NumCheckKinds];
316 
317 private:
318   Optional<Summary> findFunctionSummary(const FunctionDecl *FD,
319                                         const CallExpr *CE,
320                                         CheckerContext &C) const;
321   Optional<Summary> findFunctionSummary(const CallEvent &Call,
322                                         CheckerContext &C) const;
323 
324   void initFunctionSummaries(CheckerContext &C) const;
325 
326   void reportBug(const CallEvent &Call, ExplodedNode *N,
327                  CheckerContext &C) const {
328     if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker])
329       return;
330     // TODO Add detailed diagnostic.
331     StringRef Msg = "Function argument constraint is not satisfied";
332     if (!BT_InvalidArg)
333       BT_InvalidArg = std::make_unique<BugType>(
334           CheckNames[CK_StdCLibraryFunctionArgsChecker],
335           "Unsatisfied argument constraints", categories::LogicError);
336     auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N);
337     bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R);
338     C.emitReport(std::move(R));
339   }
340 };
341 
342 const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret =
343     std::numeric_limits<ArgNo>::max();
344 
345 } // end of anonymous namespace
346 
347 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange(
348     ProgramStateRef State, const CallEvent &Call,
349     const Summary &Summary) const {
350 
351   ProgramStateManager &Mgr = State->getStateManager();
352   SValBuilder &SVB = Mgr.getSValBuilder();
353   BasicValueFactory &BVF = SVB.getBasicValueFactory();
354   ConstraintManager &CM = Mgr.getConstraintManager();
355   QualType T = getArgType(Summary, getArgNo());
356   SVal V = getArgSVal(Call, getArgNo());
357 
358   if (auto N = V.getAs<NonLoc>()) {
359     const IntRangeVector &R = getRanges();
360     size_t E = R.size();
361     for (size_t I = 0; I != E; ++I) {
362       const llvm::APSInt &Min = BVF.getValue(R[I].first, T);
363       const llvm::APSInt &Max = BVF.getValue(R[I].second, T);
364       assert(Min <= Max);
365       State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
366       if (!State)
367         break;
368     }
369   }
370 
371   return State;
372 }
373 
374 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange(
375     ProgramStateRef State, const CallEvent &Call,
376     const Summary &Summary) const {
377 
378   ProgramStateManager &Mgr = State->getStateManager();
379   SValBuilder &SVB = Mgr.getSValBuilder();
380   BasicValueFactory &BVF = SVB.getBasicValueFactory();
381   ConstraintManager &CM = Mgr.getConstraintManager();
382   QualType T = getArgType(Summary, getArgNo());
383   SVal V = getArgSVal(Call, getArgNo());
384 
385   // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R".
386   // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary,
387   // and then cut away all holes in R one by one.
388   //
389   // E.g. consider a range list R as [A, B] and [C, D]
390   // -------+--------+------------------+------------+----------->
391   //        A        B                  C            D
392   // Then we assume that the value is not in [-inf, A - 1],
393   // then not in [D + 1, +inf], then not in [B + 1, C - 1]
394   if (auto N = V.getAs<NonLoc>()) {
395     const IntRangeVector &R = getRanges();
396     size_t E = R.size();
397 
398     const llvm::APSInt &MinusInf = BVF.getMinValue(T);
399     const llvm::APSInt &PlusInf = BVF.getMaxValue(T);
400 
401     const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T);
402     if (Left != PlusInf) {
403       assert(MinusInf <= Left);
404       State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false);
405       if (!State)
406         return nullptr;
407     }
408 
409     const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T);
410     if (Right != MinusInf) {
411       assert(Right <= PlusInf);
412       State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false);
413       if (!State)
414         return nullptr;
415     }
416 
417     for (size_t I = 1; I != E; ++I) {
418       const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T);
419       const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T);
420       if (Min <= Max) {
421         State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
422         if (!State)
423           return nullptr;
424       }
425     }
426   }
427 
428   return State;
429 }
430 
431 ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply(
432     ProgramStateRef State, const CallEvent &Call,
433     const Summary &Summary) const {
434 
435   ProgramStateManager &Mgr = State->getStateManager();
436   SValBuilder &SVB = Mgr.getSValBuilder();
437   QualType CondT = SVB.getConditionType();
438   QualType T = getArgType(Summary, getArgNo());
439   SVal V = getArgSVal(Call, getArgNo());
440 
441   BinaryOperator::Opcode Op = getOpcode();
442   ArgNo OtherArg = getOtherArgNo();
443   SVal OtherV = getArgSVal(Call, OtherArg);
444   QualType OtherT = getArgType(Call, OtherArg);
445   // Note: we avoid integral promotion for comparison.
446   OtherV = SVB.evalCast(OtherV, T, OtherT);
447   if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT)
448                        .getAs<DefinedOrUnknownSVal>())
449     State = State->assume(*CompV, true);
450   return State;
451 }
452 
453 void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call,
454                                               CheckerContext &C) const {
455   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
456   if (!FoundSummary)
457     return;
458 
459   const Summary &Summary = *FoundSummary;
460   ProgramStateRef State = C.getState();
461 
462   ProgramStateRef NewState = State;
463   for (const ValueConstraintPtr& VC : Summary.ArgConstraints) {
464     ProgramStateRef SuccessSt = VC->apply(NewState, Call, Summary);
465     ProgramStateRef FailureSt = VC->negate()->apply(NewState, Call, Summary);
466     // The argument constraint is not satisfied.
467     if (FailureSt && !SuccessSt) {
468       if (ExplodedNode *N = C.generateErrorNode(NewState))
469         reportBug(Call, N, C);
470       break;
471     } else {
472       // We will apply the constraint even if we cannot reason about the
473       // argument. This means both SuccessSt and FailureSt can be true. If we
474       // weren't applying the constraint that would mean that symbolic
475       // execution continues on a code whose behaviour is undefined.
476       assert(SuccessSt);
477       NewState = SuccessSt;
478     }
479   }
480   if (NewState && NewState != State)
481     C.addTransition(NewState);
482 }
483 
484 void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call,
485                                                CheckerContext &C) const {
486   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
487   if (!FoundSummary)
488     return;
489 
490   // Now apply the constraints.
491   const Summary &Summary = *FoundSummary;
492   ProgramStateRef State = C.getState();
493 
494   // Apply case/branch specifications.
495   for (const auto &VRS : Summary.CaseConstraints) {
496     ProgramStateRef NewState = State;
497     for (const auto &VR: VRS) {
498       NewState = VR->apply(NewState, Call, Summary);
499       if (!NewState)
500         break;
501     }
502 
503     if (NewState && NewState != State)
504       C.addTransition(NewState);
505   }
506 }
507 
508 bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call,
509                                           CheckerContext &C) const {
510   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
511   if (!FoundSummary)
512     return false;
513 
514   const Summary &Summary = *FoundSummary;
515   switch (Summary.InvalidationKd) {
516   case EvalCallAsPure: {
517     ProgramStateRef State = C.getState();
518     const LocationContext *LC = C.getLocationContext();
519     const auto *CE = cast_or_null<CallExpr>(Call.getOriginExpr());
520     SVal V = C.getSValBuilder().conjureSymbolVal(
521         CE, LC, CE->getType().getCanonicalType(), C.blockCount());
522     State = State->BindExpr(CE, LC, V);
523     C.addTransition(State);
524     return true;
525   }
526   case NoEvalCall:
527     // Summary tells us to avoid performing eval::Call. The function is possibly
528     // evaluated by another checker, or evaluated conservatively.
529     return false;
530   }
531   llvm_unreachable("Unknown invalidation kind!");
532 }
533 
534 bool StdLibraryFunctionsChecker::Summary::matchesCall(
535     const CallExpr *CE) const {
536   // Check number of arguments:
537   if (CE->getNumArgs() != ArgTys.size())
538     return false;
539 
540   // Check return type if relevant:
541   if (!RetTy.isNull() && RetTy != CE->getType().getCanonicalType())
542     return false;
543 
544   // Check argument types when relevant:
545   for (size_t I = 0, E = ArgTys.size(); I != E; ++I) {
546     QualType FormalT = ArgTys[I];
547     // Null type marks irrelevant arguments.
548     if (FormalT.isNull())
549       continue;
550 
551     assertTypeSuitableForSummary(FormalT);
552 
553     QualType ActualT = StdLibraryFunctionsChecker::getArgType(CE, I);
554     assert(ActualT.isCanonical());
555     if (ActualT != FormalT)
556       return false;
557   }
558 
559   return true;
560 }
561 
562 Optional<StdLibraryFunctionsChecker::Summary>
563 StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD,
564                                                 const CallExpr *CE,
565                                                 CheckerContext &C) const {
566   // Note: we cannot always obtain FD from CE
567   // (eg. virtual call, or call by pointer).
568   assert(CE);
569 
570   if (!FD)
571     return None;
572 
573   initFunctionSummaries(C);
574 
575   IdentifierInfo *II = FD->getIdentifier();
576   if (!II)
577     return None;
578   StringRef Name = II->getName();
579   if (Name.empty() || !C.isCLibraryFunction(FD, Name))
580     return None;
581 
582   auto FSMI = FunctionSummaryMap.find(Name);
583   if (FSMI == FunctionSummaryMap.end())
584     return None;
585 
586   // Verify that function signature matches the spec in advance.
587   // Otherwise we might be modeling the wrong function.
588   // Strict checking is important because we will be conducting
589   // very integral-type-sensitive operations on arguments and
590   // return values.
591   const Summaries &SpecVariants = FSMI->second;
592   for (const Summary &Spec : SpecVariants)
593     if (Spec.matchesCall(CE))
594       return Spec;
595 
596   return None;
597 }
598 
599 Optional<StdLibraryFunctionsChecker::Summary>
600 StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call,
601                                                 CheckerContext &C) const {
602   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
603   if (!FD)
604     return None;
605   const CallExpr *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
606   if (!CE)
607     return None;
608   return findFunctionSummary(FD, CE, C);
609 }
610 
611 void StdLibraryFunctionsChecker::initFunctionSummaries(
612     CheckerContext &C) const {
613   if (!FunctionSummaryMap.empty())
614     return;
615 
616   SValBuilder &SVB = C.getSValBuilder();
617   BasicValueFactory &BVF = SVB.getBasicValueFactory();
618   const ASTContext &ACtx = BVF.getContext();
619 
620   // These types are useful for writing specifications quickly,
621   // New specifications should probably introduce more types.
622   // Some types are hard to obtain from the AST, eg. "ssize_t".
623   // In such cases it should be possible to provide multiple variants
624   // of function summary for common cases (eg. ssize_t could be int or long
625   // or long long, so three summary variants would be enough).
626   // Of course, function variants are also useful for C++ overloads.
627   const QualType
628       Irrelevant{}; // A placeholder, whenever we do not care about the type.
629   const QualType IntTy = ACtx.IntTy;
630   const QualType LongTy = ACtx.LongTy;
631   const QualType LongLongTy = ACtx.LongLongTy;
632   const QualType SizeTy = ACtx.getSizeType();
633   const QualType VoidPtrTy = ACtx.VoidPtrTy; // void *T
634   const QualType ConstVoidPtrTy =
635       ACtx.getPointerType(ACtx.VoidTy.withConst()); // const void *T
636 
637   const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue();
638   const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue();
639   const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue();
640 
641   // Set UCharRangeMax to min of int or uchar maximum value.
642   // The C standard states that the arguments of functions like isalpha must
643   // be representable as an unsigned char. Their type is 'int', so the max
644   // value of the argument should be min(UCharMax, IntMax). This just happen
645   // to be true for commonly used and well tested instruction set
646   // architectures, but not for others.
647   const RangeInt UCharRangeMax =
648       std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax);
649 
650   // The platform dependent value of EOF.
651   // Try our best to parse this from the Preprocessor, otherwise fallback to -1.
652   const auto EOFv = [&C]() -> RangeInt {
653     if (const llvm::Optional<int> OptInt =
654             tryExpandAsInteger("EOF", C.getPreprocessor()))
655       return *OptInt;
656     return -1;
657   }();
658 
659   // We are finally ready to define specifications for all supported functions.
660   //
661   // The signature needs to have the correct number of arguments.
662   // However, we insert `Irrelevant' when the type is insignificant.
663   //
664   // Argument ranges should always cover all variants. If return value
665   // is completely unknown, omit it from the respective range set.
666   //
667   // All types in the spec need to be canonical.
668   //
669   // Every item in the list of range sets represents a particular
670   // execution path the analyzer would need to explore once
671   // the call is modeled - a new program state is constructed
672   // for every range set, and each range line in the range set
673   // corresponds to a specific constraint within this state.
674   //
675   // Upon comparing to another argument, the other argument is casted
676   // to the current argument's type. This avoids proper promotion but
677   // seems useful. For example, read() receives size_t argument,
678   // and its return value, which is of type ssize_t, cannot be greater
679   // than this argument. If we made a promotion, and the size argument
680   // is equal to, say, 10, then we'd impose a range of [0, 10] on the
681   // return value, however the correct range is [-1, 10].
682   //
683   // Please update the list of functions in the header after editing!
684   //
685 
686   // Below are helpers functions to create the summaries.
687   auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind,
688                               IntRangeVector Ranges) {
689     return std::make_shared<RangeConstraint>(ArgN, Kind, Ranges);
690   };
691   struct {
692     auto operator()(RangeKind Kind, IntRangeVector Ranges) {
693       return std::make_shared<RangeConstraint>(Ret, Kind, Ranges);
694     }
695     auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) {
696       return std::make_shared<ComparisonConstraint>(Ret, Op, OtherArgN);
697     }
698   } ReturnValueCondition;
699   auto Range = [](RangeInt b, RangeInt e) {
700     return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}};
701   };
702   auto SingleValue = [](RangeInt v) {
703     return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}};
704   };
705   auto LessThanOrEq = BO_LE;
706   auto NotNull = [&](ArgNo ArgN) {
707     return std::make_shared<NotNullConstraint>(ArgN);
708   };
709 
710   using RetType = QualType;
711   // Templates for summaries that are reused by many functions.
712   auto Getc = [&]() {
713     return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall)
714         .Case({ReturnValueCondition(WithinRange,
715                                     {{EOFv, EOFv}, {0, UCharRangeMax}})});
716   };
717   auto Read = [&](RetType R, RangeInt Max) {
718     return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R},
719                    NoEvalCall)
720         .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)),
721                ReturnValueCondition(WithinRange, Range(-1, Max))});
722   };
723   auto Fread = [&]() {
724     return Summary(ArgTypes{VoidPtrTy, Irrelevant, SizeTy, Irrelevant},
725                    RetType{SizeTy}, NoEvalCall)
726         .Case({
727             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
728         })
729         .ArgConstraint(NotNull(ArgNo(0)));
730   };
731   auto Fwrite = [&]() {
732     return Summary(ArgTypes{ConstVoidPtrTy, Irrelevant, SizeTy, Irrelevant},
733                    RetType{SizeTy}, NoEvalCall)
734         .Case({
735             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
736         })
737         .ArgConstraint(NotNull(ArgNo(0)));
738   };
739   auto Getline = [&](RetType R, RangeInt Max) {
740     return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R},
741                    NoEvalCall)
742         .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})});
743   };
744 
745   FunctionSummaryMap = {
746       // The isascii() family of functions.
747       // The behavior is undefined if the value of the argument is not
748       // representable as unsigned char or is not equal to EOF. See e.g. C99
749       // 7.4.1.2 The isalpha function (p: 181-182).
750       {
751           "isalnum",
752           Summaries{
753               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
754                   // Boils down to isupper() or islower() or isdigit().
755                   .Case(
756                       {ArgumentCondition(0U, WithinRange,
757                                          {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}),
758                        ReturnValueCondition(OutOfRange, SingleValue(0))})
759                   // The locale-specific range.
760                   // No post-condition. We are completely unaware of
761                   // locale-specific return values.
762                   .Case({ArgumentCondition(0U, WithinRange,
763                                            {{128, UCharRangeMax}})})
764                   .Case({ArgumentCondition(0U, OutOfRange,
765                                            {{'0', '9'},
766                                             {'A', 'Z'},
767                                             {'a', 'z'},
768                                             {128, UCharRangeMax}}),
769                          ReturnValueCondition(WithinRange, SingleValue(0))})
770                   .ArgConstraint(ArgumentCondition(
771                       0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))},
772       },
773       {
774           "isalpha",
775           Summaries{
776               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
777                   .Case({ArgumentCondition(0U, WithinRange,
778                                            {{'A', 'Z'}, {'a', 'z'}}),
779                          ReturnValueCondition(OutOfRange, SingleValue(0))})
780                   // The locale-specific range.
781                   .Case({ArgumentCondition(0U, WithinRange,
782                                            {{128, UCharRangeMax}})})
783                   .Case({ArgumentCondition(
784                              0U, OutOfRange,
785                              {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}),
786                          ReturnValueCondition(WithinRange, SingleValue(0))})},
787       },
788       {
789           "isascii",
790           Summaries{
791               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
792                   .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
793                          ReturnValueCondition(OutOfRange, SingleValue(0))})
794                   .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)),
795                          ReturnValueCondition(WithinRange, SingleValue(0))})},
796       },
797       {
798           "isblank",
799           Summaries{
800               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
801                   .Case({ArgumentCondition(0U, WithinRange,
802                                            {{'\t', '\t'}, {' ', ' '}}),
803                          ReturnValueCondition(OutOfRange, SingleValue(0))})
804                   .Case({ArgumentCondition(0U, OutOfRange,
805                                            {{'\t', '\t'}, {' ', ' '}}),
806                          ReturnValueCondition(WithinRange, SingleValue(0))})},
807       },
808       {
809           "iscntrl",
810           Summaries{
811               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
812                   .Case({ArgumentCondition(0U, WithinRange,
813                                            {{0, 32}, {127, 127}}),
814                          ReturnValueCondition(OutOfRange, SingleValue(0))})
815                   .Case(
816                       {ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}),
817                        ReturnValueCondition(WithinRange, SingleValue(0))})},
818       },
819       {
820           "isdigit",
821           Summaries{
822               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
823                   .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')),
824                          ReturnValueCondition(OutOfRange, SingleValue(0))})
825                   .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')),
826                          ReturnValueCondition(WithinRange, SingleValue(0))})},
827       },
828       {
829           "isgraph",
830           Summaries{
831               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
832                   .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)),
833                          ReturnValueCondition(OutOfRange, SingleValue(0))})
834                   .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)),
835                          ReturnValueCondition(WithinRange, SingleValue(0))})},
836       },
837       {
838           "islower",
839           Summaries{
840               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
841                   // Is certainly lowercase.
842                   .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')),
843                          ReturnValueCondition(OutOfRange, SingleValue(0))})
844                   // Is ascii but not lowercase.
845                   .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
846                          ArgumentCondition(0U, OutOfRange, Range('a', 'z')),
847                          ReturnValueCondition(WithinRange, SingleValue(0))})
848                   // The locale-specific range.
849                   .Case({ArgumentCondition(0U, WithinRange,
850                                            {{128, UCharRangeMax}})})
851                   // Is not an unsigned char.
852                   .Case({ArgumentCondition(0U, OutOfRange,
853                                            Range(0, UCharRangeMax)),
854                          ReturnValueCondition(WithinRange, SingleValue(0))})},
855       },
856       {
857           "isprint",
858           Summaries{
859               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
860                   .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)),
861                          ReturnValueCondition(OutOfRange, SingleValue(0))})
862                   .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)),
863                          ReturnValueCondition(WithinRange, SingleValue(0))})},
864       },
865       {
866           "ispunct",
867           Summaries{
868               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
869                   .Case({ArgumentCondition(
870                              0U, WithinRange,
871                              {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
872                          ReturnValueCondition(OutOfRange, SingleValue(0))})
873                   .Case({ArgumentCondition(
874                              0U, OutOfRange,
875                              {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
876                          ReturnValueCondition(WithinRange, SingleValue(0))})},
877       },
878       {
879           "isspace",
880           Summaries{
881               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
882                   // Space, '\f', '\n', '\r', '\t', '\v'.
883                   .Case({ArgumentCondition(0U, WithinRange,
884                                            {{9, 13}, {' ', ' '}}),
885                          ReturnValueCondition(OutOfRange, SingleValue(0))})
886                   // The locale-specific range.
887                   .Case({ArgumentCondition(0U, WithinRange,
888                                            {{128, UCharRangeMax}})})
889                   .Case({ArgumentCondition(
890                              0U, OutOfRange,
891                              {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}),
892                          ReturnValueCondition(WithinRange, SingleValue(0))})},
893       },
894       {
895           "isupper",
896           Summaries{
897               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
898                   // Is certainly uppercase.
899                   .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')),
900                          ReturnValueCondition(OutOfRange, SingleValue(0))})
901                   // The locale-specific range.
902                   .Case({ArgumentCondition(0U, WithinRange,
903                                            {{128, UCharRangeMax}})})
904                   // Other.
905                   .Case({ArgumentCondition(0U, OutOfRange,
906                                            {{'A', 'Z'}, {128, UCharRangeMax}}),
907                          ReturnValueCondition(WithinRange, SingleValue(0))})},
908       },
909       {
910           "isxdigit",
911           Summaries{
912               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
913                   .Case(
914                       {ArgumentCondition(0U, WithinRange,
915                                          {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
916                        ReturnValueCondition(OutOfRange, SingleValue(0))})
917                   .Case(
918                       {ArgumentCondition(0U, OutOfRange,
919                                          {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
920                        ReturnValueCondition(WithinRange, SingleValue(0))})},
921       },
922 
923       // The getc() family of functions that returns either a char or an EOF.
924       {"getc", Summaries{Getc()}},
925       {"fgetc", Summaries{Getc()}},
926       {"getchar",
927        Summaries{Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall)
928                      .Case({ReturnValueCondition(
929                          WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})}},
930 
931       // read()-like functions that never return more than buffer size.
932       // We are not sure how ssize_t is defined on every platform, so we
933       // provide three variants that should cover common cases.
934       {"read", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax),
935                          Read(LongLongTy, LongLongMax)}},
936       {"write", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax),
937                           Read(LongLongTy, LongLongMax)}},
938       {"fread", Summaries{Fread()}},
939       {"fwrite", Summaries{Fwrite()}},
940       // getline()-like functions either fail or read at least the delimiter.
941       {"getline", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax),
942                             Getline(LongLongTy, LongLongMax)}},
943       {"getdelim", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax),
944                              Getline(LongLongTy, LongLongMax)}},
945   };
946 
947   // Functions for testing.
948   if (ChecksEnabled[CK_StdCLibraryFunctionsTesterChecker]) {
949     llvm::StringMap<Summaries> TestFunctionSummaryMap = {
950         {"__two_constrained_args",
951          Summaries{
952              Summary(ArgTypes{IntTy, IntTy}, RetType{IntTy}, EvalCallAsPure)
953                  .ArgConstraint(
954                      ArgumentCondition(0U, WithinRange, SingleValue(1)))
955                  .ArgConstraint(
956                      ArgumentCondition(1U, WithinRange, SingleValue(1)))}},
957         {"__arg_constrained_twice",
958          Summaries{Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
959                        .ArgConstraint(
960                            ArgumentCondition(0U, OutOfRange, SingleValue(1)))
961                        .ArgConstraint(
962                            ArgumentCondition(0U, OutOfRange, SingleValue(2)))}},
963         {"__defaultparam", Summaries{Summary(ArgTypes{Irrelevant, IntTy},
964                                              RetType{IntTy}, EvalCallAsPure)
965                                          .ArgConstraint(NotNull(ArgNo(0)))}},
966     };
967     for (auto &E : TestFunctionSummaryMap) {
968       auto InsertRes =
969           FunctionSummaryMap.insert({std::string(E.getKey()), E.getValue()});
970       assert(InsertRes.second &&
971              "Test functions must not clash with modeled functions");
972       (void)InsertRes;
973     }
974   }
975 }
976 
977 void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) {
978   mgr.registerChecker<StdLibraryFunctionsChecker>();
979 }
980 
981 bool ento::shouldRegisterStdCLibraryFunctionsChecker(const CheckerManager &mgr) {
982   return true;
983 }
984 
985 #define REGISTER_CHECKER(name)                                                 \
986   void ento::register##name(CheckerManager &mgr) {                             \
987     StdLibraryFunctionsChecker *checker =                                      \
988         mgr.getChecker<StdLibraryFunctionsChecker>();                          \
989     checker->ChecksEnabled[StdLibraryFunctionsChecker::CK_##name] = true;      \
990     checker->CheckNames[StdLibraryFunctionsChecker::CK_##name] =               \
991         mgr.getCurrentCheckerName();                                           \
992   }                                                                            \
993                                                                                \
994   bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
995 
996 REGISTER_CHECKER(StdCLibraryFunctionArgsChecker)
997 REGISTER_CHECKER(StdCLibraryFunctionsTesterChecker)
998