xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp (revision 8f961399739f539cb0b3c9ac68ca1b62c2a17a80)
1 //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker improves modeling of a few simple library functions.
10 //
11 // This checker provides a specification format - `Summary' - and
12 // contains descriptions of some library functions in this format. Each
13 // specification contains a list of branches for splitting the program state
14 // upon call, and range constraints on argument and return-value symbols that
15 // are satisfied on each branch. This spec can be expanded to include more
16 // items, like external effects of the function.
17 //
18 // The main difference between this approach and the body farms technique is
19 // in more explicit control over how many branches are produced. For example,
20 // consider standard C function `ispunct(int x)', which returns a non-zero value
21 // iff `x' is a punctuation character, that is, when `x' is in range
22 //   ['!', '/']   [':', '@']  U  ['[', '\`']  U  ['{', '~'].
23 // `Summary' provides only two branches for this function. However,
24 // any attempt to describe this range with if-statements in the body farm
25 // would result in many more branches. Because each branch needs to be analyzed
26 // independently, this significantly reduces performance. Additionally,
27 // once we consider a branch on which `x' is in range, say, ['!', '/'],
28 // we assume that such branch is an important separate path through the program,
29 // which may lead to false positives because considering this particular path
30 // was not consciously intended, and therefore it might have been unreachable.
31 //
32 // This checker uses eval::Call for modeling pure functions (functions without
33 // side effets), for which their `Summary' is a precise model. This avoids
34 // unnecessary invalidation passes. Conflicts with other checkers are unlikely
35 // because if the function has no other effects, other checkers would probably
36 // never want to improve upon the modeling done by this checker.
37 //
38 // Non-pure functions, for which only partial improvement over the default
39 // behavior is expected, are modeled via check::PostCall, non-intrusively.
40 //
41 // The following standard C functions are currently supported:
42 //
43 //   fgetc      getline   isdigit   isupper
44 //   fread      isalnum   isgraph   isxdigit
45 //   fwrite     isalpha   islower   read
46 //   getc       isascii   isprint   write
47 //   getchar    isblank   ispunct
48 //   getdelim   iscntrl   isspace
49 //
50 //===----------------------------------------------------------------------===//
51 
52 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
53 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
54 #include "clang/StaticAnalyzer/Core/Checker.h"
55 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
56 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
57 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
58 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
59 
60 using namespace clang;
61 using namespace clang::ento;
62 
63 namespace {
64 class StdLibraryFunctionsChecker
65     : public Checker<check::PreCall, check::PostCall, eval::Call> {
66   /// Below is a series of typedefs necessary to define function specs.
67   /// We avoid nesting types here because each additional qualifier
68   /// would need to be repeated in every function spec.
69   struct Summary;
70 
71   /// Specify how much the analyzer engine should entrust modeling this function
72   /// to us. If he doesn't, he performs additional invalidations.
73   enum InvalidationKind { NoEvalCall, EvalCallAsPure };
74 
75   // The universal integral type to use in value range descriptions.
76   // Unsigned to make sure overflows are well-defined.
77   typedef uint64_t RangeInt;
78 
79   /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is
80   /// a non-negative integer, which less than 5 and not equal to 2. For
81   /// `ComparesToArgument', holds information about how exactly to compare to
82   /// the argument.
83   typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector;
84 
85   /// A reference to an argument or return value by its number.
86   /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but
87   /// obviously uint32_t should be enough for all practical purposes.
88   typedef uint32_t ArgNo;
89   static const ArgNo Ret;
90 
91   class ValueConstraint;
92 
93   // Pointer to the ValueConstraint. We need a copyable, polymorphic and
94   // default initialize able type (vector needs that). A raw pointer was good,
95   // however, we cannot default initialize that. unique_ptr makes the Summary
96   // class non-copyable, therefore not an option. Releasing the copyability
97   // requirement would render the initialization of the Summary map infeasible.
98   using ValueConstraintPtr = std::shared_ptr<ValueConstraint>;
99 
100   /// Polymorphic base class that represents a constraint on a given argument
101   /// (or return value) of a function. Derived classes implement different kind
102   /// of constraints, e.g range constraints or correlation between two
103   /// arguments.
104   class ValueConstraint {
105   public:
106     ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {}
107     virtual ~ValueConstraint() {}
108     /// Apply the effects of the constraint on the given program state. If null
109     /// is returned then the constraint is not feasible.
110     virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
111                                   const Summary &Summary) const = 0;
112     virtual ValueConstraintPtr negate() const {
113       llvm_unreachable("Not implemented");
114     };
115     ArgNo getArgNo() const { return ArgN; }
116 
117   protected:
118     ArgNo ArgN; // Argument to which we apply the constraint.
119   };
120 
121   /// Given a range, should the argument stay inside or outside this range?
122   enum RangeKind { OutOfRange, WithinRange };
123 
124   /// Encapsulates a single range on a single symbol within a branch.
125   class RangeConstraint : public ValueConstraint {
126     RangeKind Kind;      // Kind of range definition.
127     IntRangeVector Args; // Polymorphic arguments.
128 
129   public:
130     RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args)
131         : ValueConstraint(ArgN), Kind(Kind), Args(Args) {}
132 
133     const IntRangeVector &getRanges() const {
134       return Args;
135     }
136 
137   private:
138     ProgramStateRef applyAsOutOfRange(ProgramStateRef State,
139                                       const CallEvent &Call,
140                                       const Summary &Summary) const;
141     ProgramStateRef applyAsWithinRange(ProgramStateRef State,
142                                        const CallEvent &Call,
143                                        const Summary &Summary) const;
144   public:
145     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
146                           const Summary &Summary) const override {
147       switch (Kind) {
148       case OutOfRange:
149         return applyAsOutOfRange(State, Call, Summary);
150       case WithinRange:
151         return applyAsWithinRange(State, Call, Summary);
152       }
153       llvm_unreachable("Unknown range kind!");
154     }
155 
156     ValueConstraintPtr negate() const override {
157       RangeConstraint Tmp(*this);
158       switch (Kind) {
159       case OutOfRange:
160         Tmp.Kind = WithinRange;
161         break;
162       case WithinRange:
163         Tmp.Kind = OutOfRange;
164         break;
165       }
166       return std::make_shared<RangeConstraint>(Tmp);
167     }
168   };
169 
170   class ComparisonConstraint : public ValueConstraint {
171     BinaryOperator::Opcode Opcode;
172     ArgNo OtherArgN;
173 
174   public:
175     ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode,
176                          ArgNo OtherArgN)
177         : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {}
178     ArgNo getOtherArgNo() const { return OtherArgN; }
179     BinaryOperator::Opcode getOpcode() const { return Opcode; }
180     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
181                           const Summary &Summary) const override;
182   };
183 
184   class NotNullConstraint : public ValueConstraint {
185     using ValueConstraint::ValueConstraint;
186     // This variable has a role when we negate the constraint.
187     bool CannotBeNull = true;
188 
189   public:
190     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
191                           const Summary &Summary) const override {
192       SVal V = getArgSVal(Call, getArgNo());
193       if (V.isUndef())
194         return State;
195 
196       DefinedOrUnknownSVal L = V.castAs<DefinedOrUnknownSVal>();
197       if (!L.getAs<Loc>())
198         return State;
199 
200       return State->assume(L, CannotBeNull);
201     }
202 
203     ValueConstraintPtr negate() const override {
204       NotNullConstraint Tmp(*this);
205       Tmp.CannotBeNull = !this->CannotBeNull;
206       return std::make_shared<NotNullConstraint>(Tmp);
207     }
208   };
209 
210   /// The complete list of constraints that defines a single branch.
211   typedef std::vector<ValueConstraintPtr> ConstraintSet;
212 
213   using ArgTypes = std::vector<QualType>;
214   using Cases = std::vector<ConstraintSet>;
215 
216   /// Includes information about
217   ///   * function prototype (which is necessary to
218   ///     ensure we're modeling the right function and casting values properly),
219   ///   * approach to invalidation,
220   ///   * a list of branches - a list of list of ranges -
221   ///     A branch represents a path in the exploded graph of a function (which
222   ///     is a tree). So, a branch is a series of assumptions. In other words,
223   ///     branches represent split states and additional assumptions on top of
224   ///     the splitting assumption.
225   ///     For example, consider the branches in `isalpha(x)`
226   ///       Branch 1)
227   ///         x is in range ['A', 'Z'] or in ['a', 'z']
228   ///         then the return value is not 0. (I.e. out-of-range [0, 0])
229   ///       Branch 2)
230   ///         x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z']
231   ///         then the return value is 0.
232   ///   * a list of argument constraints, that must be true on every branch.
233   ///     If these constraints are not satisfied that means a fatal error
234   ///     usually resulting in undefined behaviour.
235   struct Summary {
236     const ArgTypes ArgTys;
237     const QualType RetTy;
238     const InvalidationKind InvalidationKd;
239     Cases CaseConstraints;
240     ConstraintSet ArgConstraints;
241 
242     Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd)
243         : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {}
244 
245     Summary &Case(ConstraintSet&& CS) {
246       CaseConstraints.push_back(std::move(CS));
247       return *this;
248     }
249     Summary &ArgConstraint(ValueConstraintPtr VC) {
250       ArgConstraints.push_back(VC);
251       return *this;
252     }
253 
254   private:
255     static void assertTypeSuitableForSummary(QualType T) {
256       assert(!T->isVoidType() &&
257              "We should have had no significant void types in the spec");
258       assert(T.isCanonical() &&
259              "We should only have canonical types in the spec");
260     }
261 
262   public:
263     QualType getArgType(ArgNo ArgN) const {
264       QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN];
265       assertTypeSuitableForSummary(T);
266       return T;
267     }
268 
269     /// Try our best to figure out if the call expression is the call of
270     /// *the* library function to which this specification applies.
271     bool matchesCall(const FunctionDecl *FD) const;
272   };
273 
274   // The same function (as in, function identifier) may have different
275   // summaries assigned to it, with different argument and return value types.
276   // We call these "variants" of the function. This can be useful for handling
277   // C++ function overloads, and also it can be used when the same function
278   // may have different definitions on different platforms.
279   typedef std::vector<Summary> Summaries;
280 
281   // The map of all functions supported by the checker. It is initialized
282   // lazily, and it doesn't change after initialization.
283   mutable llvm::StringMap<Summaries> FunctionSummaryMap;
284 
285   mutable std::unique_ptr<BugType> BT_InvalidArg;
286 
287   // Auxiliary functions to support ArgNo within all structures
288   // in a unified manner.
289   static QualType getArgType(const Summary &Summary, ArgNo ArgN) {
290     return Summary.getArgType(ArgN);
291   }
292   static QualType getArgType(const CallEvent &Call, ArgNo ArgN) {
293     return ArgN == Ret ? Call.getResultType().getCanonicalType()
294                        : Call.getArgExpr(ArgN)->getType().getCanonicalType();
295   }
296   static QualType getArgType(const CallExpr *CE, ArgNo ArgN) {
297     return ArgN == Ret ? CE->getType().getCanonicalType()
298                        : CE->getArg(ArgN)->getType().getCanonicalType();
299   }
300   static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) {
301     return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN);
302   }
303 
304 public:
305   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
306   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
307   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
308 
309   enum CheckKind {
310     CK_StdCLibraryFunctionArgsChecker,
311     CK_StdCLibraryFunctionsTesterChecker,
312     CK_NumCheckKinds
313   };
314   DefaultBool ChecksEnabled[CK_NumCheckKinds];
315   CheckerNameRef CheckNames[CK_NumCheckKinds];
316 
317 private:
318   Optional<Summary> findFunctionSummary(const FunctionDecl *FD,
319                                         CheckerContext &C) const;
320   Optional<Summary> findFunctionSummary(const CallEvent &Call,
321                                         CheckerContext &C) const;
322 
323   void initFunctionSummaries(CheckerContext &C) const;
324 
325   void reportBug(const CallEvent &Call, ExplodedNode *N,
326                  CheckerContext &C) const {
327     if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker])
328       return;
329     // TODO Add detailed diagnostic.
330     StringRef Msg = "Function argument constraint is not satisfied";
331     if (!BT_InvalidArg)
332       BT_InvalidArg = std::make_unique<BugType>(
333           CheckNames[CK_StdCLibraryFunctionArgsChecker],
334           "Unsatisfied argument constraints", categories::LogicError);
335     auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N);
336     bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R);
337     C.emitReport(std::move(R));
338   }
339 };
340 
341 const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret =
342     std::numeric_limits<ArgNo>::max();
343 
344 } // end of anonymous namespace
345 
346 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange(
347     ProgramStateRef State, const CallEvent &Call,
348     const Summary &Summary) const {
349 
350   ProgramStateManager &Mgr = State->getStateManager();
351   SValBuilder &SVB = Mgr.getSValBuilder();
352   BasicValueFactory &BVF = SVB.getBasicValueFactory();
353   ConstraintManager &CM = Mgr.getConstraintManager();
354   QualType T = getArgType(Summary, getArgNo());
355   SVal V = getArgSVal(Call, getArgNo());
356 
357   if (auto N = V.getAs<NonLoc>()) {
358     const IntRangeVector &R = getRanges();
359     size_t E = R.size();
360     for (size_t I = 0; I != E; ++I) {
361       const llvm::APSInt &Min = BVF.getValue(R[I].first, T);
362       const llvm::APSInt &Max = BVF.getValue(R[I].second, T);
363       assert(Min <= Max);
364       State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
365       if (!State)
366         break;
367     }
368   }
369 
370   return State;
371 }
372 
373 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange(
374     ProgramStateRef State, const CallEvent &Call,
375     const Summary &Summary) const {
376 
377   ProgramStateManager &Mgr = State->getStateManager();
378   SValBuilder &SVB = Mgr.getSValBuilder();
379   BasicValueFactory &BVF = SVB.getBasicValueFactory();
380   ConstraintManager &CM = Mgr.getConstraintManager();
381   QualType T = getArgType(Summary, getArgNo());
382   SVal V = getArgSVal(Call, getArgNo());
383 
384   // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R".
385   // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary,
386   // and then cut away all holes in R one by one.
387   //
388   // E.g. consider a range list R as [A, B] and [C, D]
389   // -------+--------+------------------+------------+----------->
390   //        A        B                  C            D
391   // Then we assume that the value is not in [-inf, A - 1],
392   // then not in [D + 1, +inf], then not in [B + 1, C - 1]
393   if (auto N = V.getAs<NonLoc>()) {
394     const IntRangeVector &R = getRanges();
395     size_t E = R.size();
396 
397     const llvm::APSInt &MinusInf = BVF.getMinValue(T);
398     const llvm::APSInt &PlusInf = BVF.getMaxValue(T);
399 
400     const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T);
401     if (Left != PlusInf) {
402       assert(MinusInf <= Left);
403       State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false);
404       if (!State)
405         return nullptr;
406     }
407 
408     const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T);
409     if (Right != MinusInf) {
410       assert(Right <= PlusInf);
411       State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false);
412       if (!State)
413         return nullptr;
414     }
415 
416     for (size_t I = 1; I != E; ++I) {
417       const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T);
418       const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T);
419       if (Min <= Max) {
420         State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
421         if (!State)
422           return nullptr;
423       }
424     }
425   }
426 
427   return State;
428 }
429 
430 ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply(
431     ProgramStateRef State, const CallEvent &Call,
432     const Summary &Summary) const {
433 
434   ProgramStateManager &Mgr = State->getStateManager();
435   SValBuilder &SVB = Mgr.getSValBuilder();
436   QualType CondT = SVB.getConditionType();
437   QualType T = getArgType(Summary, getArgNo());
438   SVal V = getArgSVal(Call, getArgNo());
439 
440   BinaryOperator::Opcode Op = getOpcode();
441   ArgNo OtherArg = getOtherArgNo();
442   SVal OtherV = getArgSVal(Call, OtherArg);
443   QualType OtherT = getArgType(Call, OtherArg);
444   // Note: we avoid integral promotion for comparison.
445   OtherV = SVB.evalCast(OtherV, T, OtherT);
446   if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT)
447                        .getAs<DefinedOrUnknownSVal>())
448     State = State->assume(*CompV, true);
449   return State;
450 }
451 
452 void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call,
453                                               CheckerContext &C) const {
454   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
455   if (!FoundSummary)
456     return;
457 
458   const Summary &Summary = *FoundSummary;
459   ProgramStateRef State = C.getState();
460 
461   ProgramStateRef NewState = State;
462   for (const ValueConstraintPtr& VC : Summary.ArgConstraints) {
463     ProgramStateRef SuccessSt = VC->apply(NewState, Call, Summary);
464     ProgramStateRef FailureSt = VC->negate()->apply(NewState, Call, Summary);
465     // The argument constraint is not satisfied.
466     if (FailureSt && !SuccessSt) {
467       if (ExplodedNode *N = C.generateErrorNode(NewState))
468         reportBug(Call, N, C);
469       break;
470     } else {
471       // We will apply the constraint even if we cannot reason about the
472       // argument. This means both SuccessSt and FailureSt can be true. If we
473       // weren't applying the constraint that would mean that symbolic
474       // execution continues on a code whose behaviour is undefined.
475       assert(SuccessSt);
476       NewState = SuccessSt;
477     }
478   }
479   if (NewState && NewState != State)
480     C.addTransition(NewState);
481 }
482 
483 void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call,
484                                                CheckerContext &C) const {
485   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
486   if (!FoundSummary)
487     return;
488 
489   // Now apply the constraints.
490   const Summary &Summary = *FoundSummary;
491   ProgramStateRef State = C.getState();
492 
493   // Apply case/branch specifications.
494   for (const auto &VRS : Summary.CaseConstraints) {
495     ProgramStateRef NewState = State;
496     for (const auto &VR: VRS) {
497       NewState = VR->apply(NewState, Call, Summary);
498       if (!NewState)
499         break;
500     }
501 
502     if (NewState && NewState != State)
503       C.addTransition(NewState);
504   }
505 }
506 
507 bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call,
508                                           CheckerContext &C) const {
509   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
510   if (!FoundSummary)
511     return false;
512 
513   const Summary &Summary = *FoundSummary;
514   switch (Summary.InvalidationKd) {
515   case EvalCallAsPure: {
516     ProgramStateRef State = C.getState();
517     const LocationContext *LC = C.getLocationContext();
518     const auto *CE = cast_or_null<CallExpr>(Call.getOriginExpr());
519     SVal V = C.getSValBuilder().conjureSymbolVal(
520         CE, LC, CE->getType().getCanonicalType(), C.blockCount());
521     State = State->BindExpr(CE, LC, V);
522     C.addTransition(State);
523     return true;
524   }
525   case NoEvalCall:
526     // Summary tells us to avoid performing eval::Call. The function is possibly
527     // evaluated by another checker, or evaluated conservatively.
528     return false;
529   }
530   llvm_unreachable("Unknown invalidation kind!");
531 }
532 
533 bool StdLibraryFunctionsChecker::Summary::matchesCall(
534     const FunctionDecl *FD) const {
535   // Check number of arguments:
536   if (FD->param_size() != ArgTys.size())
537     return false;
538 
539   // Check return type if relevant:
540   if (!RetTy.isNull() && RetTy != FD->getReturnType().getCanonicalType())
541     return false;
542 
543   // Check argument types when relevant:
544   for (size_t I = 0, E = ArgTys.size(); I != E; ++I) {
545     QualType FormalT = ArgTys[I];
546     // Null type marks irrelevant arguments.
547     if (FormalT.isNull())
548       continue;
549 
550     assertTypeSuitableForSummary(FormalT);
551 
552     QualType ActualT = FD->getParamDecl(I)->getType().getCanonicalType();
553     if (ActualT != FormalT)
554       return false;
555   }
556 
557   return true;
558 }
559 
560 Optional<StdLibraryFunctionsChecker::Summary>
561 StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD,
562                                                 CheckerContext &C) const {
563   if (!FD)
564     return None;
565 
566   initFunctionSummaries(C);
567 
568   IdentifierInfo *II = FD->getIdentifier();
569   if (!II)
570     return None;
571   StringRef Name = II->getName();
572   if (Name.empty() || !C.isCLibraryFunction(FD, Name))
573     return None;
574 
575   auto FSMI = FunctionSummaryMap.find(Name);
576   if (FSMI == FunctionSummaryMap.end())
577     return None;
578 
579   // Verify that function signature matches the spec in advance.
580   // Otherwise we might be modeling the wrong function.
581   // Strict checking is important because we will be conducting
582   // very integral-type-sensitive operations on arguments and
583   // return values.
584   const Summaries &SpecVariants = FSMI->second;
585   for (const Summary &Spec : SpecVariants)
586     if (Spec.matchesCall(FD))
587       return Spec;
588 
589   return None;
590 }
591 
592 Optional<StdLibraryFunctionsChecker::Summary>
593 StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call,
594                                                 CheckerContext &C) const {
595   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
596   if (!FD)
597     return None;
598   return findFunctionSummary(FD, C);
599 }
600 
601 void StdLibraryFunctionsChecker::initFunctionSummaries(
602     CheckerContext &C) const {
603   if (!FunctionSummaryMap.empty())
604     return;
605 
606   SValBuilder &SVB = C.getSValBuilder();
607   BasicValueFactory &BVF = SVB.getBasicValueFactory();
608   const ASTContext &ACtx = BVF.getContext();
609 
610   // These types are useful for writing specifications quickly,
611   // New specifications should probably introduce more types.
612   // Some types are hard to obtain from the AST, eg. "ssize_t".
613   // In such cases it should be possible to provide multiple variants
614   // of function summary for common cases (eg. ssize_t could be int or long
615   // or long long, so three summary variants would be enough).
616   // Of course, function variants are also useful for C++ overloads.
617   const QualType
618       Irrelevant{}; // A placeholder, whenever we do not care about the type.
619   const QualType IntTy = ACtx.IntTy;
620   const QualType LongTy = ACtx.LongTy;
621   const QualType LongLongTy = ACtx.LongLongTy;
622   const QualType SizeTy = ACtx.getSizeType();
623   const QualType VoidPtrTy = ACtx.VoidPtrTy; // void *
624   const QualType VoidPtrRestrictTy =
625       ACtx.getRestrictType(VoidPtrTy); // void *restrict
626   const QualType ConstVoidPtrTy =
627       ACtx.getPointerType(ACtx.VoidTy.withConst()); // const void *
628   const QualType ConstCharPtrTy =
629       ACtx.getPointerType(ACtx.CharTy.withConst()); // const char *
630   const QualType ConstVoidPtrRestrictTy =
631       ACtx.getRestrictType(ConstVoidPtrTy); // const void *restrict
632 
633   const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue();
634   const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue();
635   const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue();
636 
637   // Set UCharRangeMax to min of int or uchar maximum value.
638   // The C standard states that the arguments of functions like isalpha must
639   // be representable as an unsigned char. Their type is 'int', so the max
640   // value of the argument should be min(UCharMax, IntMax). This just happen
641   // to be true for commonly used and well tested instruction set
642   // architectures, but not for others.
643   const RangeInt UCharRangeMax =
644       std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax);
645 
646   // The platform dependent value of EOF.
647   // Try our best to parse this from the Preprocessor, otherwise fallback to -1.
648   const auto EOFv = [&C]() -> RangeInt {
649     if (const llvm::Optional<int> OptInt =
650             tryExpandAsInteger("EOF", C.getPreprocessor()))
651       return *OptInt;
652     return -1;
653   }();
654 
655   // We are finally ready to define specifications for all supported functions.
656   //
657   // The signature needs to have the correct number of arguments.
658   // However, we insert `Irrelevant' when the type is insignificant.
659   //
660   // Argument ranges should always cover all variants. If return value
661   // is completely unknown, omit it from the respective range set.
662   //
663   // All types in the spec need to be canonical.
664   //
665   // Every item in the list of range sets represents a particular
666   // execution path the analyzer would need to explore once
667   // the call is modeled - a new program state is constructed
668   // for every range set, and each range line in the range set
669   // corresponds to a specific constraint within this state.
670   //
671   // Upon comparing to another argument, the other argument is casted
672   // to the current argument's type. This avoids proper promotion but
673   // seems useful. For example, read() receives size_t argument,
674   // and its return value, which is of type ssize_t, cannot be greater
675   // than this argument. If we made a promotion, and the size argument
676   // is equal to, say, 10, then we'd impose a range of [0, 10] on the
677   // return value, however the correct range is [-1, 10].
678   //
679   // Please update the list of functions in the header after editing!
680   //
681 
682   // Below are helpers functions to create the summaries.
683   auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind,
684                               IntRangeVector Ranges) {
685     return std::make_shared<RangeConstraint>(ArgN, Kind, Ranges);
686   };
687   struct {
688     auto operator()(RangeKind Kind, IntRangeVector Ranges) {
689       return std::make_shared<RangeConstraint>(Ret, Kind, Ranges);
690     }
691     auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) {
692       return std::make_shared<ComparisonConstraint>(Ret, Op, OtherArgN);
693     }
694   } ReturnValueCondition;
695   auto Range = [](RangeInt b, RangeInt e) {
696     return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}};
697   };
698   auto SingleValue = [](RangeInt v) {
699     return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}};
700   };
701   auto LessThanOrEq = BO_LE;
702   auto NotNull = [&](ArgNo ArgN) {
703     return std::make_shared<NotNullConstraint>(ArgN);
704   };
705 
706   using RetType = QualType;
707   // Templates for summaries that are reused by many functions.
708   auto Getc = [&]() {
709     return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall)
710         .Case({ReturnValueCondition(WithinRange,
711                                     {{EOFv, EOFv}, {0, UCharRangeMax}})});
712   };
713   auto Read = [&](RetType R, RangeInt Max) {
714     return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R},
715                    NoEvalCall)
716         .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)),
717                ReturnValueCondition(WithinRange, Range(-1, Max))});
718   };
719   auto Fread = [&]() {
720     return Summary(ArgTypes{VoidPtrRestrictTy, Irrelevant, SizeTy, Irrelevant},
721                    RetType{SizeTy}, NoEvalCall)
722         .Case({
723             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
724         })
725         .ArgConstraint(NotNull(ArgNo(0)));
726   };
727   auto Fwrite = [&]() {
728     return Summary(
729                ArgTypes{ConstVoidPtrRestrictTy, Irrelevant, SizeTy, Irrelevant},
730                RetType{SizeTy}, NoEvalCall)
731         .Case({
732             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
733         })
734         .ArgConstraint(NotNull(ArgNo(0)));
735   };
736   auto Getline = [&](RetType R, RangeInt Max) {
737     return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R},
738                    NoEvalCall)
739         .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})});
740   };
741 
742   FunctionSummaryMap = {
743       // The isascii() family of functions.
744       // The behavior is undefined if the value of the argument is not
745       // representable as unsigned char or is not equal to EOF. See e.g. C99
746       // 7.4.1.2 The isalpha function (p: 181-182).
747       {
748           "isalnum",
749           Summaries{
750               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
751                   // Boils down to isupper() or islower() or isdigit().
752                   .Case(
753                       {ArgumentCondition(0U, WithinRange,
754                                          {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}),
755                        ReturnValueCondition(OutOfRange, SingleValue(0))})
756                   // The locale-specific range.
757                   // No post-condition. We are completely unaware of
758                   // locale-specific return values.
759                   .Case({ArgumentCondition(0U, WithinRange,
760                                            {{128, UCharRangeMax}})})
761                   .Case({ArgumentCondition(0U, OutOfRange,
762                                            {{'0', '9'},
763                                             {'A', 'Z'},
764                                             {'a', 'z'},
765                                             {128, UCharRangeMax}}),
766                          ReturnValueCondition(WithinRange, SingleValue(0))})
767                   .ArgConstraint(ArgumentCondition(
768                       0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))},
769       },
770       {
771           "isalpha",
772           Summaries{
773               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
774                   .Case({ArgumentCondition(0U, WithinRange,
775                                            {{'A', 'Z'}, {'a', 'z'}}),
776                          ReturnValueCondition(OutOfRange, SingleValue(0))})
777                   // The locale-specific range.
778                   .Case({ArgumentCondition(0U, WithinRange,
779                                            {{128, UCharRangeMax}})})
780                   .Case({ArgumentCondition(
781                              0U, OutOfRange,
782                              {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}),
783                          ReturnValueCondition(WithinRange, SingleValue(0))})},
784       },
785       {
786           "isascii",
787           Summaries{
788               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
789                   .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
790                          ReturnValueCondition(OutOfRange, SingleValue(0))})
791                   .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)),
792                          ReturnValueCondition(WithinRange, SingleValue(0))})},
793       },
794       {
795           "isblank",
796           Summaries{
797               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
798                   .Case({ArgumentCondition(0U, WithinRange,
799                                            {{'\t', '\t'}, {' ', ' '}}),
800                          ReturnValueCondition(OutOfRange, SingleValue(0))})
801                   .Case({ArgumentCondition(0U, OutOfRange,
802                                            {{'\t', '\t'}, {' ', ' '}}),
803                          ReturnValueCondition(WithinRange, SingleValue(0))})},
804       },
805       {
806           "iscntrl",
807           Summaries{
808               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
809                   .Case({ArgumentCondition(0U, WithinRange,
810                                            {{0, 32}, {127, 127}}),
811                          ReturnValueCondition(OutOfRange, SingleValue(0))})
812                   .Case(
813                       {ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}),
814                        ReturnValueCondition(WithinRange, SingleValue(0))})},
815       },
816       {
817           "isdigit",
818           Summaries{
819               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
820                   .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')),
821                          ReturnValueCondition(OutOfRange, SingleValue(0))})
822                   .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')),
823                          ReturnValueCondition(WithinRange, SingleValue(0))})},
824       },
825       {
826           "isgraph",
827           Summaries{
828               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
829                   .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)),
830                          ReturnValueCondition(OutOfRange, SingleValue(0))})
831                   .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)),
832                          ReturnValueCondition(WithinRange, SingleValue(0))})},
833       },
834       {
835           "islower",
836           Summaries{
837               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
838                   // Is certainly lowercase.
839                   .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')),
840                          ReturnValueCondition(OutOfRange, SingleValue(0))})
841                   // Is ascii but not lowercase.
842                   .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
843                          ArgumentCondition(0U, OutOfRange, Range('a', 'z')),
844                          ReturnValueCondition(WithinRange, SingleValue(0))})
845                   // The locale-specific range.
846                   .Case({ArgumentCondition(0U, WithinRange,
847                                            {{128, UCharRangeMax}})})
848                   // Is not an unsigned char.
849                   .Case({ArgumentCondition(0U, OutOfRange,
850                                            Range(0, UCharRangeMax)),
851                          ReturnValueCondition(WithinRange, SingleValue(0))})},
852       },
853       {
854           "isprint",
855           Summaries{
856               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
857                   .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)),
858                          ReturnValueCondition(OutOfRange, SingleValue(0))})
859                   .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)),
860                          ReturnValueCondition(WithinRange, SingleValue(0))})},
861       },
862       {
863           "ispunct",
864           Summaries{
865               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
866                   .Case({ArgumentCondition(
867                              0U, WithinRange,
868                              {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
869                          ReturnValueCondition(OutOfRange, SingleValue(0))})
870                   .Case({ArgumentCondition(
871                              0U, OutOfRange,
872                              {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
873                          ReturnValueCondition(WithinRange, SingleValue(0))})},
874       },
875       {
876           "isspace",
877           Summaries{
878               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
879                   // Space, '\f', '\n', '\r', '\t', '\v'.
880                   .Case({ArgumentCondition(0U, WithinRange,
881                                            {{9, 13}, {' ', ' '}}),
882                          ReturnValueCondition(OutOfRange, SingleValue(0))})
883                   // The locale-specific range.
884                   .Case({ArgumentCondition(0U, WithinRange,
885                                            {{128, UCharRangeMax}})})
886                   .Case({ArgumentCondition(
887                              0U, OutOfRange,
888                              {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}),
889                          ReturnValueCondition(WithinRange, SingleValue(0))})},
890       },
891       {
892           "isupper",
893           Summaries{
894               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
895                   // Is certainly uppercase.
896                   .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')),
897                          ReturnValueCondition(OutOfRange, SingleValue(0))})
898                   // The locale-specific range.
899                   .Case({ArgumentCondition(0U, WithinRange,
900                                            {{128, UCharRangeMax}})})
901                   // Other.
902                   .Case({ArgumentCondition(0U, OutOfRange,
903                                            {{'A', 'Z'}, {128, UCharRangeMax}}),
904                          ReturnValueCondition(WithinRange, SingleValue(0))})},
905       },
906       {
907           "isxdigit",
908           Summaries{
909               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
910                   .Case(
911                       {ArgumentCondition(0U, WithinRange,
912                                          {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
913                        ReturnValueCondition(OutOfRange, SingleValue(0))})
914                   .Case(
915                       {ArgumentCondition(0U, OutOfRange,
916                                          {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
917                        ReturnValueCondition(WithinRange, SingleValue(0))})},
918       },
919 
920       // The getc() family of functions that returns either a char or an EOF.
921       {"getc", Summaries{Getc()}},
922       {"fgetc", Summaries{Getc()}},
923       {"getchar",
924        Summaries{Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall)
925                      .Case({ReturnValueCondition(
926                          WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})}},
927 
928       // read()-like functions that never return more than buffer size.
929       // We are not sure how ssize_t is defined on every platform, so we
930       // provide three variants that should cover common cases.
931       {"read", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax),
932                          Read(LongLongTy, LongLongMax)}},
933       {"write", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax),
934                           Read(LongLongTy, LongLongMax)}},
935       {"fread", Summaries{Fread()}},
936       {"fwrite", Summaries{Fwrite()}},
937       // getline()-like functions either fail or read at least the delimiter.
938       {"getline", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax),
939                             Getline(LongLongTy, LongLongMax)}},
940       {"getdelim", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax),
941                              Getline(LongLongTy, LongLongMax)}},
942   };
943 
944   // Functions for testing.
945   if (ChecksEnabled[CK_StdCLibraryFunctionsTesterChecker]) {
946     llvm::StringMap<Summaries> TestFunctionSummaryMap = {
947         {"__two_constrained_args",
948          Summaries{
949              Summary(ArgTypes{IntTy, IntTy}, RetType{IntTy}, EvalCallAsPure)
950                  .ArgConstraint(
951                      ArgumentCondition(0U, WithinRange, SingleValue(1)))
952                  .ArgConstraint(
953                      ArgumentCondition(1U, WithinRange, SingleValue(1)))}},
954         {"__arg_constrained_twice",
955          Summaries{Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
956                        .ArgConstraint(
957                            ArgumentCondition(0U, OutOfRange, SingleValue(1)))
958                        .ArgConstraint(
959                            ArgumentCondition(0U, OutOfRange, SingleValue(2)))}},
960         {"__defaultparam", Summaries{Summary(ArgTypes{Irrelevant, IntTy},
961                                              RetType{IntTy}, EvalCallAsPure)
962                                          .ArgConstraint(NotNull(ArgNo(0)))}},
963         {"__variadic", Summaries{Summary(ArgTypes{VoidPtrTy, ConstCharPtrTy},
964                                          RetType{IntTy}, EvalCallAsPure)
965                                      .ArgConstraint(NotNull(ArgNo(0)))
966                                      .ArgConstraint(NotNull(ArgNo(1)))}}};
967     for (auto &E : TestFunctionSummaryMap) {
968       auto InsertRes =
969           FunctionSummaryMap.insert({std::string(E.getKey()), E.getValue()});
970       assert(InsertRes.second &&
971              "Test functions must not clash with modeled functions");
972       (void)InsertRes;
973     }
974   }
975 }
976 
977 void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) {
978   mgr.registerChecker<StdLibraryFunctionsChecker>();
979 }
980 
981 bool ento::shouldRegisterStdCLibraryFunctionsChecker(const CheckerManager &mgr) {
982   return true;
983 }
984 
985 #define REGISTER_CHECKER(name)                                                 \
986   void ento::register##name(CheckerManager &mgr) {                             \
987     StdLibraryFunctionsChecker *checker =                                      \
988         mgr.getChecker<StdLibraryFunctionsChecker>();                          \
989     checker->ChecksEnabled[StdLibraryFunctionsChecker::CK_##name] = true;      \
990     checker->CheckNames[StdLibraryFunctionsChecker::CK_##name] =               \
991         mgr.getCurrentCheckerName();                                           \
992   }                                                                            \
993                                                                                \
994   bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
995 
996 REGISTER_CHECKER(StdCLibraryFunctionArgsChecker)
997 REGISTER_CHECKER(StdCLibraryFunctionsTesterChecker)
998