xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp (revision 350dadaa8ab3db34ff41d7291f43442c57719de3)
1 //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker improves modeling of a few simple library functions.
10 //
11 // This checker provides a specification format - `Summary' - and
12 // contains descriptions of some library functions in this format. Each
13 // specification contains a list of branches for splitting the program state
14 // upon call, and range constraints on argument and return-value symbols that
15 // are satisfied on each branch. This spec can be expanded to include more
16 // items, like external effects of the function.
17 //
18 // The main difference between this approach and the body farms technique is
19 // in more explicit control over how many branches are produced. For example,
20 // consider standard C function `ispunct(int x)', which returns a non-zero value
21 // iff `x' is a punctuation character, that is, when `x' is in range
22 //   ['!', '/']   [':', '@']  U  ['[', '\`']  U  ['{', '~'].
23 // `Summary' provides only two branches for this function. However,
24 // any attempt to describe this range with if-statements in the body farm
25 // would result in many more branches. Because each branch needs to be analyzed
26 // independently, this significantly reduces performance. Additionally,
27 // once we consider a branch on which `x' is in range, say, ['!', '/'],
28 // we assume that such branch is an important separate path through the program,
29 // which may lead to false positives because considering this particular path
30 // was not consciously intended, and therefore it might have been unreachable.
31 //
32 // This checker uses eval::Call for modeling pure functions (functions without
33 // side effets), for which their `Summary' is a precise model. This avoids
34 // unnecessary invalidation passes. Conflicts with other checkers are unlikely
35 // because if the function has no other effects, other checkers would probably
36 // never want to improve upon the modeling done by this checker.
37 //
38 // Non-pure functions, for which only partial improvement over the default
39 // behavior is expected, are modeled via check::PostCall, non-intrusively.
40 //
41 // The following standard C functions are currently supported:
42 //
43 //   fgetc      getline   isdigit   isupper
44 //   fread      isalnum   isgraph   isxdigit
45 //   fwrite     isalpha   islower   read
46 //   getc       isascii   isprint   write
47 //   getchar    isblank   ispunct
48 //   getdelim   iscntrl   isspace
49 //
50 //===----------------------------------------------------------------------===//
51 
52 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
53 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
54 #include "clang/StaticAnalyzer/Core/Checker.h"
55 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
56 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
57 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
58 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
59 
60 using namespace clang;
61 using namespace clang::ento;
62 
63 namespace {
64 class StdLibraryFunctionsChecker
65     : public Checker<check::PreCall, check::PostCall, eval::Call> {
66   /// Below is a series of typedefs necessary to define function specs.
67   /// We avoid nesting types here because each additional qualifier
68   /// would need to be repeated in every function spec.
69   struct Summary;
70 
71   /// Specify how much the analyzer engine should entrust modeling this function
72   /// to us. If he doesn't, he performs additional invalidations.
73   enum InvalidationKind { NoEvalCall, EvalCallAsPure };
74 
75   // The universal integral type to use in value range descriptions.
76   // Unsigned to make sure overflows are well-defined.
77   typedef uint64_t RangeInt;
78 
79   /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is
80   /// a non-negative integer, which less than 5 and not equal to 2. For
81   /// `ComparesToArgument', holds information about how exactly to compare to
82   /// the argument.
83   typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector;
84 
85   /// A reference to an argument or return value by its number.
86   /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but
87   /// obviously uint32_t should be enough for all practical purposes.
88   typedef uint32_t ArgNo;
89   static const ArgNo Ret;
90 
91   class ValueConstraint;
92 
93   // Pointer to the ValueConstraint. We need a copyable, polymorphic and
94   // default initialize able type (vector needs that). A raw pointer was good,
95   // however, we cannot default initialize that. unique_ptr makes the Summary
96   // class non-copyable, therefore not an option. Releasing the copyability
97   // requirement would render the initialization of the Summary map infeasible.
98   using ValueConstraintPtr = std::shared_ptr<ValueConstraint>;
99 
100   /// Polymorphic base class that represents a constraint on a given argument
101   /// (or return value) of a function. Derived classes implement different kind
102   /// of constraints, e.g range constraints or correlation between two
103   /// arguments.
104   class ValueConstraint {
105   public:
106     ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {}
107     virtual ~ValueConstraint() {}
108     /// Apply the effects of the constraint on the given program state. If null
109     /// is returned then the constraint is not feasible.
110     virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
111                                   const Summary &Summary) const = 0;
112     virtual ValueConstraintPtr negate() const {
113       llvm_unreachable("Not implemented");
114     };
115     ArgNo getArgNo() const { return ArgN; }
116 
117   protected:
118     ArgNo ArgN; // Argument to which we apply the constraint.
119   };
120 
121   /// Given a range, should the argument stay inside or outside this range?
122   enum RangeKind { OutOfRange, WithinRange };
123 
124   /// Encapsulates a single range on a single symbol within a branch.
125   class RangeConstraint : public ValueConstraint {
126     RangeKind Kind;      // Kind of range definition.
127     IntRangeVector Args; // Polymorphic arguments.
128 
129   public:
130     RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args)
131         : ValueConstraint(ArgN), Kind(Kind), Args(Args) {}
132 
133     const IntRangeVector &getRanges() const {
134       return Args;
135     }
136 
137   private:
138     ProgramStateRef applyAsOutOfRange(ProgramStateRef State,
139                                       const CallEvent &Call,
140                                       const Summary &Summary) const;
141     ProgramStateRef applyAsWithinRange(ProgramStateRef State,
142                                        const CallEvent &Call,
143                                        const Summary &Summary) const;
144   public:
145     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
146                           const Summary &Summary) const override {
147       switch (Kind) {
148       case OutOfRange:
149         return applyAsOutOfRange(State, Call, Summary);
150       case WithinRange:
151         return applyAsWithinRange(State, Call, Summary);
152       }
153       llvm_unreachable("Unknown range kind!");
154     }
155 
156     ValueConstraintPtr negate() const override {
157       RangeConstraint Tmp(*this);
158       switch (Kind) {
159       case OutOfRange:
160         Tmp.Kind = WithinRange;
161         break;
162       case WithinRange:
163         Tmp.Kind = OutOfRange;
164         break;
165       }
166       return std::make_shared<RangeConstraint>(Tmp);
167     }
168   };
169 
170   class ComparisonConstraint : public ValueConstraint {
171     BinaryOperator::Opcode Opcode;
172     ArgNo OtherArgN;
173 
174   public:
175     ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode,
176                          ArgNo OtherArgN)
177         : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {}
178     ArgNo getOtherArgNo() const { return OtherArgN; }
179     BinaryOperator::Opcode getOpcode() const { return Opcode; }
180     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
181                           const Summary &Summary) const override;
182   };
183 
184   class NotNullConstraint : public ValueConstraint {
185     using ValueConstraint::ValueConstraint;
186     // This variable has a role when we negate the constraint.
187     bool CannotBeNull = true;
188 
189   public:
190     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
191                           const Summary &Summary) const override {
192       SVal V = getArgSVal(Call, getArgNo());
193       if (V.isUndef())
194         return State;
195 
196       DefinedOrUnknownSVal L = V.castAs<DefinedOrUnknownSVal>();
197       if (!L.getAs<Loc>())
198         return State;
199 
200       return State->assume(L, CannotBeNull);
201     }
202 
203     ValueConstraintPtr negate() const override {
204       NotNullConstraint Tmp(*this);
205       Tmp.CannotBeNull = !this->CannotBeNull;
206       return std::make_shared<NotNullConstraint>(Tmp);
207     }
208   };
209 
210   /// The complete list of constraints that defines a single branch.
211   typedef std::vector<ValueConstraintPtr> ConstraintSet;
212 
213   using ArgTypes = std::vector<QualType>;
214   using Cases = std::vector<ConstraintSet>;
215 
216   /// Includes information about
217   ///   * function prototype (which is necessary to
218   ///     ensure we're modeling the right function and casting values properly),
219   ///   * approach to invalidation,
220   ///   * a list of branches - a list of list of ranges -
221   ///     A branch represents a path in the exploded graph of a function (which
222   ///     is a tree). So, a branch is a series of assumptions. In other words,
223   ///     branches represent split states and additional assumptions on top of
224   ///     the splitting assumption.
225   ///     For example, consider the branches in `isalpha(x)`
226   ///       Branch 1)
227   ///         x is in range ['A', 'Z'] or in ['a', 'z']
228   ///         then the return value is not 0. (I.e. out-of-range [0, 0])
229   ///       Branch 2)
230   ///         x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z']
231   ///         then the return value is 0.
232   ///   * a list of argument constraints, that must be true on every branch.
233   ///     If these constraints are not satisfied that means a fatal error
234   ///     usually resulting in undefined behaviour.
235   struct Summary {
236     const ArgTypes ArgTys;
237     const QualType RetTy;
238     const InvalidationKind InvalidationKd;
239     Cases CaseConstraints;
240     ConstraintSet ArgConstraints;
241 
242     Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd)
243         : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {}
244 
245     Summary &Case(ConstraintSet&& CS) {
246       CaseConstraints.push_back(std::move(CS));
247       return *this;
248     }
249     Summary &ArgConstraint(ValueConstraintPtr VC) {
250       ArgConstraints.push_back(VC);
251       return *this;
252     }
253 
254   private:
255     static void assertTypeSuitableForSummary(QualType T) {
256       assert(!T->isVoidType() &&
257              "We should have had no significant void types in the spec");
258       assert(T.isCanonical() &&
259              "We should only have canonical types in the spec");
260     }
261 
262   public:
263     QualType getArgType(ArgNo ArgN) const {
264       QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN];
265       assertTypeSuitableForSummary(T);
266       return T;
267     }
268 
269     /// Try our best to figure out if the summary's signature matches
270     /// *the* library function to which this specification applies.
271     bool matchesSignature(const FunctionDecl *FD) const;
272   };
273 
274   // The map of all functions supported by the checker. It is initialized
275   // lazily, and it doesn't change after initialization.
276   using FunctionSummaryMapType = llvm::DenseMap<const FunctionDecl *, Summary>;
277   mutable FunctionSummaryMapType FunctionSummaryMap;
278 
279   mutable std::unique_ptr<BugType> BT_InvalidArg;
280 
281   // Auxiliary functions to support ArgNo within all structures
282   // in a unified manner.
283   static QualType getArgType(const Summary &Summary, ArgNo ArgN) {
284     return Summary.getArgType(ArgN);
285   }
286   static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) {
287     return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN);
288   }
289 
290 public:
291   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
292   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
293   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
294 
295   enum CheckKind {
296     CK_StdCLibraryFunctionArgsChecker,
297     CK_StdCLibraryFunctionsTesterChecker,
298     CK_NumCheckKinds
299   };
300   DefaultBool ChecksEnabled[CK_NumCheckKinds];
301   CheckerNameRef CheckNames[CK_NumCheckKinds];
302 
303   bool DisplayLoadedSummaries = false;
304 
305 private:
306   Optional<Summary> findFunctionSummary(const FunctionDecl *FD,
307                                         CheckerContext &C) const;
308   Optional<Summary> findFunctionSummary(const CallEvent &Call,
309                                         CheckerContext &C) const;
310 
311   void initFunctionSummaries(CheckerContext &C) const;
312 
313   void reportBug(const CallEvent &Call, ExplodedNode *N,
314                  CheckerContext &C) const {
315     if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker])
316       return;
317     // TODO Add detailed diagnostic.
318     StringRef Msg = "Function argument constraint is not satisfied";
319     if (!BT_InvalidArg)
320       BT_InvalidArg = std::make_unique<BugType>(
321           CheckNames[CK_StdCLibraryFunctionArgsChecker],
322           "Unsatisfied argument constraints", categories::LogicError);
323     auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N);
324     bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R);
325     C.emitReport(std::move(R));
326   }
327 };
328 
329 const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret =
330     std::numeric_limits<ArgNo>::max();
331 
332 } // end of anonymous namespace
333 
334 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange(
335     ProgramStateRef State, const CallEvent &Call,
336     const Summary &Summary) const {
337 
338   ProgramStateManager &Mgr = State->getStateManager();
339   SValBuilder &SVB = Mgr.getSValBuilder();
340   BasicValueFactory &BVF = SVB.getBasicValueFactory();
341   ConstraintManager &CM = Mgr.getConstraintManager();
342   QualType T = getArgType(Summary, getArgNo());
343   SVal V = getArgSVal(Call, getArgNo());
344 
345   if (auto N = V.getAs<NonLoc>()) {
346     const IntRangeVector &R = getRanges();
347     size_t E = R.size();
348     for (size_t I = 0; I != E; ++I) {
349       const llvm::APSInt &Min = BVF.getValue(R[I].first, T);
350       const llvm::APSInt &Max = BVF.getValue(R[I].second, T);
351       assert(Min <= Max);
352       State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
353       if (!State)
354         break;
355     }
356   }
357 
358   return State;
359 }
360 
361 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange(
362     ProgramStateRef State, const CallEvent &Call,
363     const Summary &Summary) const {
364 
365   ProgramStateManager &Mgr = State->getStateManager();
366   SValBuilder &SVB = Mgr.getSValBuilder();
367   BasicValueFactory &BVF = SVB.getBasicValueFactory();
368   ConstraintManager &CM = Mgr.getConstraintManager();
369   QualType T = getArgType(Summary, getArgNo());
370   SVal V = getArgSVal(Call, getArgNo());
371 
372   // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R".
373   // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary,
374   // and then cut away all holes in R one by one.
375   //
376   // E.g. consider a range list R as [A, B] and [C, D]
377   // -------+--------+------------------+------------+----------->
378   //        A        B                  C            D
379   // Then we assume that the value is not in [-inf, A - 1],
380   // then not in [D + 1, +inf], then not in [B + 1, C - 1]
381   if (auto N = V.getAs<NonLoc>()) {
382     const IntRangeVector &R = getRanges();
383     size_t E = R.size();
384 
385     const llvm::APSInt &MinusInf = BVF.getMinValue(T);
386     const llvm::APSInt &PlusInf = BVF.getMaxValue(T);
387 
388     const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T);
389     if (Left != PlusInf) {
390       assert(MinusInf <= Left);
391       State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false);
392       if (!State)
393         return nullptr;
394     }
395 
396     const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T);
397     if (Right != MinusInf) {
398       assert(Right <= PlusInf);
399       State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false);
400       if (!State)
401         return nullptr;
402     }
403 
404     for (size_t I = 1; I != E; ++I) {
405       const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T);
406       const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T);
407       if (Min <= Max) {
408         State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
409         if (!State)
410           return nullptr;
411       }
412     }
413   }
414 
415   return State;
416 }
417 
418 ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply(
419     ProgramStateRef State, const CallEvent &Call,
420     const Summary &Summary) const {
421 
422   ProgramStateManager &Mgr = State->getStateManager();
423   SValBuilder &SVB = Mgr.getSValBuilder();
424   QualType CondT = SVB.getConditionType();
425   QualType T = getArgType(Summary, getArgNo());
426   SVal V = getArgSVal(Call, getArgNo());
427 
428   BinaryOperator::Opcode Op = getOpcode();
429   ArgNo OtherArg = getOtherArgNo();
430   SVal OtherV = getArgSVal(Call, OtherArg);
431   QualType OtherT = getArgType(Summary, OtherArg);
432   // Note: we avoid integral promotion for comparison.
433   OtherV = SVB.evalCast(OtherV, T, OtherT);
434   if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT)
435                        .getAs<DefinedOrUnknownSVal>())
436     State = State->assume(*CompV, true);
437   return State;
438 }
439 
440 void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call,
441                                               CheckerContext &C) const {
442   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
443   if (!FoundSummary)
444     return;
445 
446   const Summary &Summary = *FoundSummary;
447   ProgramStateRef State = C.getState();
448 
449   ProgramStateRef NewState = State;
450   for (const ValueConstraintPtr& VC : Summary.ArgConstraints) {
451     ProgramStateRef SuccessSt = VC->apply(NewState, Call, Summary);
452     ProgramStateRef FailureSt = VC->negate()->apply(NewState, Call, Summary);
453     // The argument constraint is not satisfied.
454     if (FailureSt && !SuccessSt) {
455       if (ExplodedNode *N = C.generateErrorNode(NewState))
456         reportBug(Call, N, C);
457       break;
458     } else {
459       // We will apply the constraint even if we cannot reason about the
460       // argument. This means both SuccessSt and FailureSt can be true. If we
461       // weren't applying the constraint that would mean that symbolic
462       // execution continues on a code whose behaviour is undefined.
463       assert(SuccessSt);
464       NewState = SuccessSt;
465     }
466   }
467   if (NewState && NewState != State)
468     C.addTransition(NewState);
469 }
470 
471 void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call,
472                                                CheckerContext &C) const {
473   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
474   if (!FoundSummary)
475     return;
476 
477   // Now apply the constraints.
478   const Summary &Summary = *FoundSummary;
479   ProgramStateRef State = C.getState();
480 
481   // Apply case/branch specifications.
482   for (const auto &VRS : Summary.CaseConstraints) {
483     ProgramStateRef NewState = State;
484     for (const auto &VR: VRS) {
485       NewState = VR->apply(NewState, Call, Summary);
486       if (!NewState)
487         break;
488     }
489 
490     if (NewState && NewState != State)
491       C.addTransition(NewState);
492   }
493 }
494 
495 bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call,
496                                           CheckerContext &C) const {
497   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
498   if (!FoundSummary)
499     return false;
500 
501   const Summary &Summary = *FoundSummary;
502   switch (Summary.InvalidationKd) {
503   case EvalCallAsPure: {
504     ProgramStateRef State = C.getState();
505     const LocationContext *LC = C.getLocationContext();
506     const auto *CE = cast_or_null<CallExpr>(Call.getOriginExpr());
507     SVal V = C.getSValBuilder().conjureSymbolVal(
508         CE, LC, CE->getType().getCanonicalType(), C.blockCount());
509     State = State->BindExpr(CE, LC, V);
510     C.addTransition(State);
511     return true;
512   }
513   case NoEvalCall:
514     // Summary tells us to avoid performing eval::Call. The function is possibly
515     // evaluated by another checker, or evaluated conservatively.
516     return false;
517   }
518   llvm_unreachable("Unknown invalidation kind!");
519 }
520 
521 bool StdLibraryFunctionsChecker::Summary::matchesSignature(
522     const FunctionDecl *FD) const {
523   // Check number of arguments:
524   if (FD->param_size() != ArgTys.size())
525     return false;
526 
527   // Check return type if relevant:
528   if (!RetTy.isNull() && RetTy != FD->getReturnType().getCanonicalType())
529     return false;
530 
531   // Check argument types when relevant:
532   for (size_t I = 0, E = ArgTys.size(); I != E; ++I) {
533     QualType FormalT = ArgTys[I];
534     // Null type marks irrelevant arguments.
535     if (FormalT.isNull())
536       continue;
537 
538     assertTypeSuitableForSummary(FormalT);
539 
540     QualType ActualT = FD->getParamDecl(I)->getType().getCanonicalType();
541     if (ActualT != FormalT)
542       return false;
543   }
544 
545   return true;
546 }
547 
548 Optional<StdLibraryFunctionsChecker::Summary>
549 StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD,
550                                                 CheckerContext &C) const {
551   if (!FD)
552     return None;
553 
554   initFunctionSummaries(C);
555 
556   auto FSMI = FunctionSummaryMap.find(FD->getCanonicalDecl());
557   if (FSMI == FunctionSummaryMap.end())
558     return None;
559   return FSMI->second;
560 }
561 
562 Optional<StdLibraryFunctionsChecker::Summary>
563 StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call,
564                                                 CheckerContext &C) const {
565   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
566   if (!FD)
567     return None;
568   return findFunctionSummary(FD, C);
569 }
570 
571 void StdLibraryFunctionsChecker::initFunctionSummaries(
572     CheckerContext &C) const {
573   if (!FunctionSummaryMap.empty())
574     return;
575 
576   SValBuilder &SVB = C.getSValBuilder();
577   BasicValueFactory &BVF = SVB.getBasicValueFactory();
578   const ASTContext &ACtx = BVF.getContext();
579 
580   // These types are useful for writing specifications quickly,
581   // New specifications should probably introduce more types.
582   // Some types are hard to obtain from the AST, eg. "ssize_t".
583   // In such cases it should be possible to provide multiple variants
584   // of function summary for common cases (eg. ssize_t could be int or long
585   // or long long, so three summary variants would be enough).
586   // Of course, function variants are also useful for C++ overloads.
587   const QualType
588       Irrelevant{}; // A placeholder, whenever we do not care about the type.
589   const QualType IntTy = ACtx.IntTy;
590   const QualType LongTy = ACtx.LongTy;
591   const QualType LongLongTy = ACtx.LongLongTy;
592   const QualType SizeTy = ACtx.getSizeType();
593   const QualType VoidPtrTy = ACtx.VoidPtrTy; // void *
594   const QualType VoidPtrRestrictTy =
595       ACtx.getRestrictType(VoidPtrTy); // void *restrict
596   const QualType ConstVoidPtrTy =
597       ACtx.getPointerType(ACtx.VoidTy.withConst()); // const void *
598   const QualType ConstCharPtrTy =
599       ACtx.getPointerType(ACtx.CharTy.withConst()); // const char *
600   const QualType ConstVoidPtrRestrictTy =
601       ACtx.getRestrictType(ConstVoidPtrTy); // const void *restrict
602 
603   const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue();
604   const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue();
605   const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue();
606 
607   // Set UCharRangeMax to min of int or uchar maximum value.
608   // The C standard states that the arguments of functions like isalpha must
609   // be representable as an unsigned char. Their type is 'int', so the max
610   // value of the argument should be min(UCharMax, IntMax). This just happen
611   // to be true for commonly used and well tested instruction set
612   // architectures, but not for others.
613   const RangeInt UCharRangeMax =
614       std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax);
615 
616   // The platform dependent value of EOF.
617   // Try our best to parse this from the Preprocessor, otherwise fallback to -1.
618   const auto EOFv = [&C]() -> RangeInt {
619     if (const llvm::Optional<int> OptInt =
620             tryExpandAsInteger("EOF", C.getPreprocessor()))
621       return *OptInt;
622     return -1;
623   }();
624 
625   // Auxiliary class to aid adding summaries to the summary map.
626   struct AddToFunctionSummaryMap {
627     const ASTContext &ACtx;
628     FunctionSummaryMapType &Map;
629     bool DisplayLoadedSummaries;
630     AddToFunctionSummaryMap(const ASTContext &ACtx, FunctionSummaryMapType &FSM,
631                             bool DisplayLoadedSummaries)
632         : ACtx(ACtx), Map(FSM), DisplayLoadedSummaries(DisplayLoadedSummaries) {
633     }
634 
635     // Add a summary to a FunctionDecl found by lookup. The lookup is performed
636     // by the given Name, and in the global scope. The summary will be attached
637     // to the found FunctionDecl only if the signatures match.
638     void operator()(StringRef Name, const Summary &S) {
639       IdentifierInfo &II = ACtx.Idents.get(Name);
640       auto LookupRes = ACtx.getTranslationUnitDecl()->lookup(&II);
641       if (LookupRes.size() == 0)
642         return;
643       for (Decl *D : LookupRes) {
644         if (auto *FD = dyn_cast<FunctionDecl>(D)) {
645           if (S.matchesSignature(FD)) {
646             auto Res = Map.insert({FD->getCanonicalDecl(), S});
647             assert(Res.second && "Function already has a summary set!");
648             (void)Res;
649             if (DisplayLoadedSummaries) {
650               llvm::errs() << "Loaded summary for: ";
651               FD->print(llvm::errs());
652               llvm::errs() << "\n";
653             }
654             return;
655           }
656         }
657       }
658     }
659     // Add several summaries for the given name.
660     void operator()(StringRef Name, const std::vector<Summary> &Summaries) {
661       for (const Summary &S : Summaries)
662         operator()(Name, S);
663     }
664   } addToFunctionSummaryMap(ACtx, FunctionSummaryMap, DisplayLoadedSummaries);
665 
666   // We are finally ready to define specifications for all supported functions.
667   //
668   // The signature needs to have the correct number of arguments.
669   // However, we insert `Irrelevant' when the type is insignificant.
670   //
671   // Argument ranges should always cover all variants. If return value
672   // is completely unknown, omit it from the respective range set.
673   //
674   // All types in the spec need to be canonical.
675   //
676   // Every item in the list of range sets represents a particular
677   // execution path the analyzer would need to explore once
678   // the call is modeled - a new program state is constructed
679   // for every range set, and each range line in the range set
680   // corresponds to a specific constraint within this state.
681   //
682   // Upon comparing to another argument, the other argument is casted
683   // to the current argument's type. This avoids proper promotion but
684   // seems useful. For example, read() receives size_t argument,
685   // and its return value, which is of type ssize_t, cannot be greater
686   // than this argument. If we made a promotion, and the size argument
687   // is equal to, say, 10, then we'd impose a range of [0, 10] on the
688   // return value, however the correct range is [-1, 10].
689   //
690   // Please update the list of functions in the header after editing!
691 
692   // Below are helpers functions to create the summaries.
693   auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind,
694                               IntRangeVector Ranges) {
695     return std::make_shared<RangeConstraint>(ArgN, Kind, Ranges);
696   };
697   struct {
698     auto operator()(RangeKind Kind, IntRangeVector Ranges) {
699       return std::make_shared<RangeConstraint>(Ret, Kind, Ranges);
700     }
701     auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) {
702       return std::make_shared<ComparisonConstraint>(Ret, Op, OtherArgN);
703     }
704   } ReturnValueCondition;
705   auto Range = [](RangeInt b, RangeInt e) {
706     return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}};
707   };
708   auto SingleValue = [](RangeInt v) {
709     return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}};
710   };
711   auto LessThanOrEq = BO_LE;
712   auto NotNull = [&](ArgNo ArgN) {
713     return std::make_shared<NotNullConstraint>(ArgN);
714   };
715 
716   using RetType = QualType;
717   // Templates for summaries that are reused by many functions.
718   auto Getc = [&]() {
719     return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall)
720         .Case({ReturnValueCondition(WithinRange,
721                                     {{EOFv, EOFv}, {0, UCharRangeMax}})});
722   };
723   auto Read = [&](RetType R, RangeInt Max) {
724     return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R},
725                    NoEvalCall)
726         .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)),
727                ReturnValueCondition(WithinRange, Range(-1, Max))});
728   };
729   auto Fread = [&]() {
730     return Summary(ArgTypes{VoidPtrRestrictTy, Irrelevant, SizeTy, Irrelevant},
731                    RetType{SizeTy}, NoEvalCall)
732         .Case({
733             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
734         })
735         .ArgConstraint(NotNull(ArgNo(0)));
736   };
737   auto Fwrite = [&]() {
738     return Summary(
739                ArgTypes{ConstVoidPtrRestrictTy, Irrelevant, SizeTy, Irrelevant},
740                RetType{SizeTy}, NoEvalCall)
741         .Case({
742             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
743         })
744         .ArgConstraint(NotNull(ArgNo(0)));
745   };
746   auto Getline = [&](RetType R, RangeInt Max) {
747     return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R},
748                    NoEvalCall)
749         .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})});
750   };
751 
752   // The isascii() family of functions.
753   // The behavior is undefined if the value of the argument is not
754   // representable as unsigned char or is not equal to EOF. See e.g. C99
755   // 7.4.1.2 The isalpha function (p: 181-182).
756   addToFunctionSummaryMap(
757       "isalnum",
758       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
759           // Boils down to isupper() or islower() or isdigit().
760           .Case({ArgumentCondition(0U, WithinRange,
761                                    {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}),
762                  ReturnValueCondition(OutOfRange, SingleValue(0))})
763           // The locale-specific range.
764           // No post-condition. We are completely unaware of
765           // locale-specific return values.
766           .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
767           .Case(
768               {ArgumentCondition(
769                    0U, OutOfRange,
770                    {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}),
771                ReturnValueCondition(WithinRange, SingleValue(0))})
772           .ArgConstraint(ArgumentCondition(
773               0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})));
774   addToFunctionSummaryMap(
775       "isalpha",
776       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
777           .Case({ArgumentCondition(0U, WithinRange, {{'A', 'Z'}, {'a', 'z'}}),
778                  ReturnValueCondition(OutOfRange, SingleValue(0))})
779           // The locale-specific range.
780           .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
781           .Case({ArgumentCondition(
782                      0U, OutOfRange,
783                      {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}),
784                  ReturnValueCondition(WithinRange, SingleValue(0))}));
785   addToFunctionSummaryMap(
786       "isascii",
787       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
788           .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
789                  ReturnValueCondition(OutOfRange, SingleValue(0))})
790           .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)),
791                  ReturnValueCondition(WithinRange, SingleValue(0))}));
792   addToFunctionSummaryMap(
793       "isblank",
794       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
795           .Case({ArgumentCondition(0U, WithinRange, {{'\t', '\t'}, {' ', ' '}}),
796                  ReturnValueCondition(OutOfRange, SingleValue(0))})
797           .Case({ArgumentCondition(0U, OutOfRange, {{'\t', '\t'}, {' ', ' '}}),
798                  ReturnValueCondition(WithinRange, SingleValue(0))}));
799   addToFunctionSummaryMap(
800       "iscntrl",
801       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
802           .Case({ArgumentCondition(0U, WithinRange, {{0, 32}, {127, 127}}),
803                  ReturnValueCondition(OutOfRange, SingleValue(0))})
804           .Case({ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}),
805                  ReturnValueCondition(WithinRange, SingleValue(0))}));
806   addToFunctionSummaryMap(
807       "isdigit",
808       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
809           .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')),
810                  ReturnValueCondition(OutOfRange, SingleValue(0))})
811           .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')),
812                  ReturnValueCondition(WithinRange, SingleValue(0))}));
813   addToFunctionSummaryMap(
814       "isgraph",
815       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
816           .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)),
817                  ReturnValueCondition(OutOfRange, SingleValue(0))})
818           .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)),
819                  ReturnValueCondition(WithinRange, SingleValue(0))}));
820   addToFunctionSummaryMap(
821       "islower",
822       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
823           // Is certainly lowercase.
824           .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')),
825                  ReturnValueCondition(OutOfRange, SingleValue(0))})
826           // Is ascii but not lowercase.
827           .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
828                  ArgumentCondition(0U, OutOfRange, Range('a', 'z')),
829                  ReturnValueCondition(WithinRange, SingleValue(0))})
830           // The locale-specific range.
831           .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
832           // Is not an unsigned char.
833           .Case({ArgumentCondition(0U, OutOfRange, Range(0, UCharRangeMax)),
834                  ReturnValueCondition(WithinRange, SingleValue(0))}));
835   addToFunctionSummaryMap(
836       "isprint",
837       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
838           .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)),
839                  ReturnValueCondition(OutOfRange, SingleValue(0))})
840           .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)),
841                  ReturnValueCondition(WithinRange, SingleValue(0))}));
842   addToFunctionSummaryMap(
843       "ispunct",
844       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
845           .Case({ArgumentCondition(
846                      0U, WithinRange,
847                      {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
848                  ReturnValueCondition(OutOfRange, SingleValue(0))})
849           .Case({ArgumentCondition(
850                      0U, OutOfRange,
851                      {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
852                  ReturnValueCondition(WithinRange, SingleValue(0))}));
853   addToFunctionSummaryMap(
854       "isspace",
855       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
856           // Space, '\f', '\n', '\r', '\t', '\v'.
857           .Case({ArgumentCondition(0U, WithinRange, {{9, 13}, {' ', ' '}}),
858                  ReturnValueCondition(OutOfRange, SingleValue(0))})
859           // The locale-specific range.
860           .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
861           .Case({ArgumentCondition(0U, OutOfRange,
862                                    {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}),
863                  ReturnValueCondition(WithinRange, SingleValue(0))}));
864   addToFunctionSummaryMap(
865       "isupper",
866       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
867           // Is certainly uppercase.
868           .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')),
869                  ReturnValueCondition(OutOfRange, SingleValue(0))})
870           // The locale-specific range.
871           .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
872           // Other.
873           .Case({ArgumentCondition(0U, OutOfRange,
874                                    {{'A', 'Z'}, {128, UCharRangeMax}}),
875                  ReturnValueCondition(WithinRange, SingleValue(0))}));
876   addToFunctionSummaryMap(
877       "isxdigit",
878       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
879           .Case({ArgumentCondition(0U, WithinRange,
880                                    {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
881                  ReturnValueCondition(OutOfRange, SingleValue(0))})
882           .Case({ArgumentCondition(0U, OutOfRange,
883                                    {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
884                  ReturnValueCondition(WithinRange, SingleValue(0))}));
885 
886   // The getc() family of functions that returns either a char or an EOF.
887   addToFunctionSummaryMap("getc", Getc());
888   addToFunctionSummaryMap("fgetc", Getc());
889   addToFunctionSummaryMap(
890       "getchar", Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall)
891                      .Case({ReturnValueCondition(
892                          WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})}));
893 
894   // read()-like functions that never return more than buffer size.
895   // We are not sure how ssize_t is defined on every platform, so we
896   // provide three variants that should cover common cases.
897   addToFunctionSummaryMap("read", {Read(IntTy, IntMax), Read(LongTy, LongMax),
898                                    Read(LongLongTy, LongLongMax)});
899   addToFunctionSummaryMap("write", {Read(IntTy, IntMax), Read(LongTy, LongMax),
900                                     Read(LongLongTy, LongLongMax)});
901   addToFunctionSummaryMap("fread", Fread());
902   addToFunctionSummaryMap("fwrite", Fwrite());
903   // getline()-like functions either fail or read at least the delimiter.
904   addToFunctionSummaryMap("getline",
905                           {Getline(IntTy, IntMax), Getline(LongTy, LongMax),
906                            Getline(LongLongTy, LongLongMax)});
907   addToFunctionSummaryMap("getdelim",
908                           {Getline(IntTy, IntMax), Getline(LongTy, LongMax),
909                            Getline(LongLongTy, LongLongMax)});
910 
911   // Functions for testing.
912   if (ChecksEnabled[CK_StdCLibraryFunctionsTesterChecker]) {
913     addToFunctionSummaryMap(
914         "__two_constrained_args",
915         Summary(ArgTypes{IntTy, IntTy}, RetType{IntTy}, EvalCallAsPure)
916             .ArgConstraint(ArgumentCondition(0U, WithinRange, SingleValue(1)))
917             .ArgConstraint(ArgumentCondition(1U, WithinRange, SingleValue(1))));
918     addToFunctionSummaryMap(
919         "__arg_constrained_twice",
920         Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
921             .ArgConstraint(ArgumentCondition(0U, OutOfRange, SingleValue(1)))
922             .ArgConstraint(ArgumentCondition(0U, OutOfRange, SingleValue(2))));
923     addToFunctionSummaryMap(
924         "__defaultparam",
925         Summary(ArgTypes{Irrelevant, IntTy}, RetType{IntTy}, EvalCallAsPure)
926             .ArgConstraint(NotNull(ArgNo(0))));
927     addToFunctionSummaryMap("__variadic",
928                             Summary(ArgTypes{VoidPtrTy, ConstCharPtrTy},
929                                     RetType{IntTy}, EvalCallAsPure)
930                                 .ArgConstraint(NotNull(ArgNo(0)))
931                                 .ArgConstraint(NotNull(ArgNo(1))));
932   }
933 }
934 
935 void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) {
936   auto *Checker = mgr.registerChecker<StdLibraryFunctionsChecker>();
937   Checker->DisplayLoadedSummaries =
938       mgr.getAnalyzerOptions().getCheckerBooleanOption(
939           Checker, "DisplayLoadedSummaries");
940 }
941 
942 bool ento::shouldRegisterStdCLibraryFunctionsChecker(const CheckerManager &mgr) {
943   return true;
944 }
945 
946 #define REGISTER_CHECKER(name)                                                 \
947   void ento::register##name(CheckerManager &mgr) {                             \
948     StdLibraryFunctionsChecker *checker =                                      \
949         mgr.getChecker<StdLibraryFunctionsChecker>();                          \
950     checker->ChecksEnabled[StdLibraryFunctionsChecker::CK_##name] = true;      \
951     checker->CheckNames[StdLibraryFunctionsChecker::CK_##name] =               \
952         mgr.getCurrentCheckerName();                                           \
953   }                                                                            \
954                                                                                \
955   bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
956 
957 REGISTER_CHECKER(StdCLibraryFunctionArgsChecker)
958 REGISTER_CHECKER(StdCLibraryFunctionsTesterChecker)
959