xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp (revision 94061df6e5f24c2f25ad7c55af13dd17cccc5856)
1 //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker improves modeling of a few simple library functions.
10 //
11 // This checker provides a specification format - `Summary' - and
12 // contains descriptions of some library functions in this format. Each
13 // specification contains a list of branches for splitting the program state
14 // upon call, and range constraints on argument and return-value symbols that
15 // are satisfied on each branch. This spec can be expanded to include more
16 // items, like external effects of the function.
17 //
18 // The main difference between this approach and the body farms technique is
19 // in more explicit control over how many branches are produced. For example,
20 // consider standard C function `ispunct(int x)', which returns a non-zero value
21 // iff `x' is a punctuation character, that is, when `x' is in range
22 //   ['!', '/']   [':', '@']  U  ['[', '\`']  U  ['{', '~'].
23 // `Summary' provides only two branches for this function. However,
24 // any attempt to describe this range with if-statements in the body farm
25 // would result in many more branches. Because each branch needs to be analyzed
26 // independently, this significantly reduces performance. Additionally,
27 // once we consider a branch on which `x' is in range, say, ['!', '/'],
28 // we assume that such branch is an important separate path through the program,
29 // which may lead to false positives because considering this particular path
30 // was not consciously intended, and therefore it might have been unreachable.
31 //
32 // This checker uses eval::Call for modeling pure functions (functions without
33 // side effets), for which their `Summary' is a precise model. This avoids
34 // unnecessary invalidation passes. Conflicts with other checkers are unlikely
35 // because if the function has no other effects, other checkers would probably
36 // never want to improve upon the modeling done by this checker.
37 //
38 // Non-pure functions, for which only partial improvement over the default
39 // behavior is expected, are modeled via check::PostCall, non-intrusively.
40 //
41 // The following standard C functions are currently supported:
42 //
43 //   fgetc      getline   isdigit   isupper
44 //   fread      isalnum   isgraph   isxdigit
45 //   fwrite     isalpha   islower   read
46 //   getc       isascii   isprint   write
47 //   getchar    isblank   ispunct
48 //   getdelim   iscntrl   isspace
49 //
50 //===----------------------------------------------------------------------===//
51 
52 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
53 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
54 #include "clang/StaticAnalyzer/Core/Checker.h"
55 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
56 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
57 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
58 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
59 
60 using namespace clang;
61 using namespace clang::ento;
62 
63 namespace {
64 class StdLibraryFunctionsChecker
65     : public Checker<check::PreCall, check::PostCall, eval::Call> {
66   /// Below is a series of typedefs necessary to define function specs.
67   /// We avoid nesting types here because each additional qualifier
68   /// would need to be repeated in every function spec.
69   struct Summary;
70 
71   /// Specify how much the analyzer engine should entrust modeling this function
72   /// to us. If he doesn't, he performs additional invalidations.
73   enum InvalidationKind { NoEvalCall, EvalCallAsPure };
74 
75   // The universal integral type to use in value range descriptions.
76   // Unsigned to make sure overflows are well-defined.
77   typedef uint64_t RangeInt;
78 
79   /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is
80   /// a non-negative integer, which less than 5 and not equal to 2. For
81   /// `ComparesToArgument', holds information about how exactly to compare to
82   /// the argument.
83   typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector;
84 
85   /// A reference to an argument or return value by its number.
86   /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but
87   /// obviously uint32_t should be enough for all practical purposes.
88   typedef uint32_t ArgNo;
89   static const ArgNo Ret;
90 
91   class ValueConstraint;
92 
93   // Pointer to the ValueConstraint. We need a copyable, polymorphic and
94   // default initialize able type (vector needs that). A raw pointer was good,
95   // however, we cannot default initialize that. unique_ptr makes the Summary
96   // class non-copyable, therefore not an option. Releasing the copyability
97   // requirement would render the initialization of the Summary map infeasible.
98   using ValueConstraintPtr = std::shared_ptr<ValueConstraint>;
99 
100   /// Polymorphic base class that represents a constraint on a given argument
101   /// (or return value) of a function. Derived classes implement different kind
102   /// of constraints, e.g range constraints or correlation between two
103   /// arguments.
104   class ValueConstraint {
105   public:
106     ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {}
107     virtual ~ValueConstraint() {}
108     /// Apply the effects of the constraint on the given program state. If null
109     /// is returned then the constraint is not feasible.
110     virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
111                                   const Summary &Summary) const = 0;
112     virtual ValueConstraintPtr negate() const {
113       llvm_unreachable("Not implemented");
114     };
115     ArgNo getArgNo() const { return ArgN; }
116 
117   protected:
118     ArgNo ArgN; // Argument to which we apply the constraint.
119   };
120 
121   /// Given a range, should the argument stay inside or outside this range?
122   enum RangeKind { OutOfRange, WithinRange };
123 
124   /// Encapsulates a single range on a single symbol within a branch.
125   class RangeConstraint : public ValueConstraint {
126     RangeKind Kind;      // Kind of range definition.
127     IntRangeVector Args; // Polymorphic arguments.
128 
129   public:
130     RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args)
131         : ValueConstraint(ArgN), Kind(Kind), Args(Args) {}
132 
133     const IntRangeVector &getRanges() const {
134       return Args;
135     }
136 
137   private:
138     ProgramStateRef applyAsOutOfRange(ProgramStateRef State,
139                                       const CallEvent &Call,
140                                       const Summary &Summary) const;
141     ProgramStateRef applyAsWithinRange(ProgramStateRef State,
142                                        const CallEvent &Call,
143                                        const Summary &Summary) const;
144   public:
145     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
146                           const Summary &Summary) const override {
147       switch (Kind) {
148       case OutOfRange:
149         return applyAsOutOfRange(State, Call, Summary);
150       case WithinRange:
151         return applyAsWithinRange(State, Call, Summary);
152       }
153       llvm_unreachable("Unknown range kind!");
154     }
155 
156     ValueConstraintPtr negate() const override {
157       RangeConstraint Tmp(*this);
158       switch (Kind) {
159       case OutOfRange:
160         Tmp.Kind = WithinRange;
161         break;
162       case WithinRange:
163         Tmp.Kind = OutOfRange;
164         break;
165       default:
166         llvm_unreachable("Unknown RangeConstraint kind!");
167       }
168       return std::make_shared<RangeConstraint>(Tmp);
169     }
170   };
171 
172   class ComparisonConstraint : public ValueConstraint {
173     BinaryOperator::Opcode Opcode;
174     ArgNo OtherArgN;
175 
176   public:
177     ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode,
178                          ArgNo OtherArgN)
179         : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {}
180     ArgNo getOtherArgNo() const { return OtherArgN; }
181     BinaryOperator::Opcode getOpcode() const { return Opcode; }
182     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
183                           const Summary &Summary) const override;
184   };
185 
186   /// The complete list of constraints that defines a single branch.
187   typedef std::vector<ValueConstraintPtr> ConstraintSet;
188 
189   using ArgTypes = std::vector<QualType>;
190   using Cases = std::vector<ConstraintSet>;
191 
192   /// Includes information about
193   ///   * function prototype (which is necessary to
194   ///     ensure we're modeling the right function and casting values properly),
195   ///   * approach to invalidation,
196   ///   * a list of branches - a list of list of ranges -
197   ///     A branch represents a path in the exploded graph of a function (which
198   ///     is a tree). So, a branch is a series of assumptions. In other words,
199   ///     branches represent split states and additional assumptions on top of
200   ///     the splitting assumption.
201   ///     For example, consider the branches in `isalpha(x)`
202   ///       Branch 1)
203   ///         x is in range ['A', 'Z'] or in ['a', 'z']
204   ///         then the return value is not 0. (I.e. out-of-range [0, 0])
205   ///       Branch 2)
206   ///         x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z']
207   ///         then the return value is 0.
208   ///   * a list of argument constraints, that must be true on every branch.
209   ///     If these constraints are not satisfied that means a fatal error
210   ///     usually resulting in undefined behaviour.
211   struct Summary {
212     const ArgTypes ArgTys;
213     const QualType RetTy;
214     const InvalidationKind InvalidationKd;
215     Cases CaseConstraints;
216     ConstraintSet ArgConstraints;
217 
218     Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd)
219         : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {}
220 
221     Summary &Case(ConstraintSet&& CS) {
222       CaseConstraints.push_back(std::move(CS));
223       return *this;
224     }
225     Summary &ArgConstraint(ValueConstraintPtr VC) {
226       ArgConstraints.push_back(VC);
227       return *this;
228     }
229 
230   private:
231     static void assertTypeSuitableForSummary(QualType T) {
232       assert(!T->isVoidType() &&
233              "We should have had no significant void types in the spec");
234       assert(T.isCanonical() &&
235              "We should only have canonical types in the spec");
236       // FIXME: lift this assert (but not the ones above!)
237       assert(T->isIntegralOrEnumerationType() &&
238              "We only support integral ranges in the spec");
239     }
240 
241   public:
242     QualType getArgType(ArgNo ArgN) const {
243       QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN];
244       assertTypeSuitableForSummary(T);
245       return T;
246     }
247 
248     /// Try our best to figure out if the call expression is the call of
249     /// *the* library function to which this specification applies.
250     bool matchesCall(const CallExpr *CE) const;
251   };
252 
253   // The same function (as in, function identifier) may have different
254   // summaries assigned to it, with different argument and return value types.
255   // We call these "variants" of the function. This can be useful for handling
256   // C++ function overloads, and also it can be used when the same function
257   // may have different definitions on different platforms.
258   typedef std::vector<Summary> Summaries;
259 
260   // The map of all functions supported by the checker. It is initialized
261   // lazily, and it doesn't change after initialization.
262   mutable llvm::StringMap<Summaries> FunctionSummaryMap;
263 
264   mutable std::unique_ptr<BugType> BT_InvalidArg;
265 
266   // Auxiliary functions to support ArgNo within all structures
267   // in a unified manner.
268   static QualType getArgType(const Summary &Summary, ArgNo ArgN) {
269     return Summary.getArgType(ArgN);
270   }
271   static QualType getArgType(const CallEvent &Call, ArgNo ArgN) {
272     return ArgN == Ret ? Call.getResultType().getCanonicalType()
273                        : Call.getArgExpr(ArgN)->getType().getCanonicalType();
274   }
275   static QualType getArgType(const CallExpr *CE, ArgNo ArgN) {
276     return ArgN == Ret ? CE->getType().getCanonicalType()
277                        : CE->getArg(ArgN)->getType().getCanonicalType();
278   }
279   static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) {
280     return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN);
281   }
282 
283 public:
284   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
285   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
286   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
287 
288   enum CheckKind { CK_StdCLibraryFunctionArgsChecker, CK_NumCheckKinds };
289   DefaultBool ChecksEnabled[CK_NumCheckKinds];
290   CheckerNameRef CheckNames[CK_NumCheckKinds];
291 
292 private:
293   Optional<Summary> findFunctionSummary(const FunctionDecl *FD,
294                                         const CallExpr *CE,
295                                         CheckerContext &C) const;
296   Optional<Summary> findFunctionSummary(const CallEvent &Call,
297                                         CheckerContext &C) const;
298 
299   void initFunctionSummaries(CheckerContext &C) const;
300 
301   void reportBug(const CallEvent &Call, ExplodedNode *N,
302                  CheckerContext &C) const {
303     if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker])
304       return;
305     // TODO Add detailed diagnostic.
306     StringRef Msg = "Function argument constraint is not satisfied";
307     if (!BT_InvalidArg)
308       BT_InvalidArg = std::make_unique<BugType>(
309           CheckNames[CK_StdCLibraryFunctionArgsChecker],
310           "Unsatisfied argument constraints", categories::LogicError);
311     auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N);
312     bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R);
313     C.emitReport(std::move(R));
314   }
315 };
316 
317 const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret =
318     std::numeric_limits<ArgNo>::max();
319 
320 } // end of anonymous namespace
321 
322 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange(
323     ProgramStateRef State, const CallEvent &Call,
324     const Summary &Summary) const {
325 
326   ProgramStateManager &Mgr = State->getStateManager();
327   SValBuilder &SVB = Mgr.getSValBuilder();
328   BasicValueFactory &BVF = SVB.getBasicValueFactory();
329   ConstraintManager &CM = Mgr.getConstraintManager();
330   QualType T = getArgType(Summary, getArgNo());
331   SVal V = getArgSVal(Call, getArgNo());
332 
333   if (auto N = V.getAs<NonLoc>()) {
334     const IntRangeVector &R = getRanges();
335     size_t E = R.size();
336     for (size_t I = 0; I != E; ++I) {
337       const llvm::APSInt &Min = BVF.getValue(R[I].first, T);
338       const llvm::APSInt &Max = BVF.getValue(R[I].second, T);
339       assert(Min <= Max);
340       State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
341       if (!State)
342         break;
343     }
344   }
345 
346   return State;
347 }
348 
349 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange(
350     ProgramStateRef State, const CallEvent &Call,
351     const Summary &Summary) const {
352 
353   ProgramStateManager &Mgr = State->getStateManager();
354   SValBuilder &SVB = Mgr.getSValBuilder();
355   BasicValueFactory &BVF = SVB.getBasicValueFactory();
356   ConstraintManager &CM = Mgr.getConstraintManager();
357   QualType T = getArgType(Summary, getArgNo());
358   SVal V = getArgSVal(Call, getArgNo());
359 
360   // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R".
361   // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary,
362   // and then cut away all holes in R one by one.
363   //
364   // E.g. consider a range list R as [A, B] and [C, D]
365   // -------+--------+------------------+------------+----------->
366   //        A        B                  C            D
367   // Then we assume that the value is not in [-inf, A - 1],
368   // then not in [D + 1, +inf], then not in [B + 1, C - 1]
369   if (auto N = V.getAs<NonLoc>()) {
370     const IntRangeVector &R = getRanges();
371     size_t E = R.size();
372 
373     const llvm::APSInt &MinusInf = BVF.getMinValue(T);
374     const llvm::APSInt &PlusInf = BVF.getMaxValue(T);
375 
376     const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T);
377     if (Left != PlusInf) {
378       assert(MinusInf <= Left);
379       State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false);
380       if (!State)
381         return nullptr;
382     }
383 
384     const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T);
385     if (Right != MinusInf) {
386       assert(Right <= PlusInf);
387       State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false);
388       if (!State)
389         return nullptr;
390     }
391 
392     for (size_t I = 1; I != E; ++I) {
393       const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T);
394       const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T);
395       if (Min <= Max) {
396         State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
397         if (!State)
398           return nullptr;
399       }
400     }
401   }
402 
403   return State;
404 }
405 
406 ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply(
407     ProgramStateRef State, const CallEvent &Call,
408     const Summary &Summary) const {
409 
410   ProgramStateManager &Mgr = State->getStateManager();
411   SValBuilder &SVB = Mgr.getSValBuilder();
412   QualType CondT = SVB.getConditionType();
413   QualType T = getArgType(Summary, getArgNo());
414   SVal V = getArgSVal(Call, getArgNo());
415 
416   BinaryOperator::Opcode Op = getOpcode();
417   ArgNo OtherArg = getOtherArgNo();
418   SVal OtherV = getArgSVal(Call, OtherArg);
419   QualType OtherT = getArgType(Call, OtherArg);
420   // Note: we avoid integral promotion for comparison.
421   OtherV = SVB.evalCast(OtherV, T, OtherT);
422   if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT)
423                        .getAs<DefinedOrUnknownSVal>())
424     State = State->assume(*CompV, true);
425   return State;
426 }
427 
428 void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call,
429                                               CheckerContext &C) const {
430   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
431   if (!FoundSummary)
432     return;
433 
434   const Summary &Summary = *FoundSummary;
435   ProgramStateRef State = C.getState();
436 
437   for (const ValueConstraintPtr& VC : Summary.ArgConstraints) {
438     ProgramStateRef SuccessSt = VC->apply(State, Call, Summary);
439     ProgramStateRef FailureSt = VC->negate()->apply(State, Call, Summary);
440     // The argument constraint is not satisfied.
441     if (FailureSt && !SuccessSt) {
442       if (ExplodedNode *N = C.generateErrorNode(State))
443         reportBug(Call, N, C);
444       break;
445     } else {
446       // Apply the constraint even if we cannot reason about the argument. This
447       // means both SuccessSt and FailureSt can be true. If we weren't applying
448       // the constraint that would mean that symbolic execution continues on a
449       // code whose behaviour is undefined.
450       assert(SuccessSt);
451       C.addTransition(SuccessSt);
452     }
453   }
454 }
455 
456 void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call,
457                                                CheckerContext &C) const {
458   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
459   if (!FoundSummary)
460     return;
461 
462   // Now apply the constraints.
463   const Summary &Summary = *FoundSummary;
464   ProgramStateRef State = C.getState();
465 
466   // Apply case/branch specifications.
467   for (const auto &VRS : Summary.CaseConstraints) {
468     ProgramStateRef NewState = State;
469     for (const auto &VR: VRS) {
470       NewState = VR->apply(NewState, Call, Summary);
471       if (!NewState)
472         break;
473     }
474 
475     if (NewState && NewState != State)
476       C.addTransition(NewState);
477   }
478 }
479 
480 bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call,
481                                           CheckerContext &C) const {
482   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
483   if (!FoundSummary)
484     return false;
485 
486   const Summary &Summary = *FoundSummary;
487   switch (Summary.InvalidationKd) {
488   case EvalCallAsPure: {
489     ProgramStateRef State = C.getState();
490     const LocationContext *LC = C.getLocationContext();
491     const auto *CE = cast_or_null<CallExpr>(Call.getOriginExpr());
492     SVal V = C.getSValBuilder().conjureSymbolVal(
493         CE, LC, CE->getType().getCanonicalType(), C.blockCount());
494     State = State->BindExpr(CE, LC, V);
495     C.addTransition(State);
496     return true;
497   }
498   case NoEvalCall:
499     // Summary tells us to avoid performing eval::Call. The function is possibly
500     // evaluated by another checker, or evaluated conservatively.
501     return false;
502   }
503   llvm_unreachable("Unknown invalidation kind!");
504 }
505 
506 bool StdLibraryFunctionsChecker::Summary::matchesCall(
507     const CallExpr *CE) const {
508   // Check number of arguments:
509   if (CE->getNumArgs() != ArgTys.size())
510     return false;
511 
512   // Check return type if relevant:
513   if (!RetTy.isNull() && RetTy != CE->getType().getCanonicalType())
514     return false;
515 
516   // Check argument types when relevant:
517   for (size_t I = 0, E = ArgTys.size(); I != E; ++I) {
518     QualType FormalT = ArgTys[I];
519     // Null type marks irrelevant arguments.
520     if (FormalT.isNull())
521       continue;
522 
523     assertTypeSuitableForSummary(FormalT);
524 
525     QualType ActualT = StdLibraryFunctionsChecker::getArgType(CE, I);
526     assert(ActualT.isCanonical());
527     if (ActualT != FormalT)
528       return false;
529   }
530 
531   return true;
532 }
533 
534 Optional<StdLibraryFunctionsChecker::Summary>
535 StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD,
536                                                 const CallExpr *CE,
537                                                 CheckerContext &C) const {
538   // Note: we cannot always obtain FD from CE
539   // (eg. virtual call, or call by pointer).
540   assert(CE);
541 
542   if (!FD)
543     return None;
544 
545   initFunctionSummaries(C);
546 
547   IdentifierInfo *II = FD->getIdentifier();
548   if (!II)
549     return None;
550   StringRef Name = II->getName();
551   if (Name.empty() || !C.isCLibraryFunction(FD, Name))
552     return None;
553 
554   auto FSMI = FunctionSummaryMap.find(Name);
555   if (FSMI == FunctionSummaryMap.end())
556     return None;
557 
558   // Verify that function signature matches the spec in advance.
559   // Otherwise we might be modeling the wrong function.
560   // Strict checking is important because we will be conducting
561   // very integral-type-sensitive operations on arguments and
562   // return values.
563   const Summaries &SpecVariants = FSMI->second;
564   for (const Summary &Spec : SpecVariants)
565     if (Spec.matchesCall(CE))
566       return Spec;
567 
568   return None;
569 }
570 
571 Optional<StdLibraryFunctionsChecker::Summary>
572 StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call,
573                                                 CheckerContext &C) const {
574   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
575   if (!FD)
576     return None;
577   const CallExpr *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
578   if (!CE)
579     return None;
580   return findFunctionSummary(FD, CE, C);
581 }
582 
583 void StdLibraryFunctionsChecker::initFunctionSummaries(
584     CheckerContext &C) const {
585   if (!FunctionSummaryMap.empty())
586     return;
587 
588   SValBuilder &SVB = C.getSValBuilder();
589   BasicValueFactory &BVF = SVB.getBasicValueFactory();
590   const ASTContext &ACtx = BVF.getContext();
591 
592   // These types are useful for writing specifications quickly,
593   // New specifications should probably introduce more types.
594   // Some types are hard to obtain from the AST, eg. "ssize_t".
595   // In such cases it should be possible to provide multiple variants
596   // of function summary for common cases (eg. ssize_t could be int or long
597   // or long long, so three summary variants would be enough).
598   // Of course, function variants are also useful for C++ overloads.
599   const QualType
600       Irrelevant{}; // A placeholder, whenever we do not care about the type.
601   const QualType IntTy = ACtx.IntTy;
602   const QualType LongTy = ACtx.LongTy;
603   const QualType LongLongTy = ACtx.LongLongTy;
604   const QualType SizeTy = ACtx.getSizeType();
605 
606   const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue();
607   const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue();
608   const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue();
609 
610   // Set UCharRangeMax to min of int or uchar maximum value.
611   // The C standard states that the arguments of functions like isalpha must
612   // be representable as an unsigned char. Their type is 'int', so the max
613   // value of the argument should be min(UCharMax, IntMax). This just happen
614   // to be true for commonly used and well tested instruction set
615   // architectures, but not for others.
616   const RangeInt UCharRangeMax =
617       std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax);
618 
619   // The platform dependent value of EOF.
620   // Try our best to parse this from the Preprocessor, otherwise fallback to -1.
621   const auto EOFv = [&C]() -> RangeInt {
622     if (const llvm::Optional<int> OptInt =
623             tryExpandAsInteger("EOF", C.getPreprocessor()))
624       return *OptInt;
625     return -1;
626   }();
627 
628   // We are finally ready to define specifications for all supported functions.
629   //
630   // The signature needs to have the correct number of arguments.
631   // However, we insert `Irrelevant' when the type is insignificant.
632   //
633   // Argument ranges should always cover all variants. If return value
634   // is completely unknown, omit it from the respective range set.
635   //
636   // All types in the spec need to be canonical.
637   //
638   // Every item in the list of range sets represents a particular
639   // execution path the analyzer would need to explore once
640   // the call is modeled - a new program state is constructed
641   // for every range set, and each range line in the range set
642   // corresponds to a specific constraint within this state.
643   //
644   // Upon comparing to another argument, the other argument is casted
645   // to the current argument's type. This avoids proper promotion but
646   // seems useful. For example, read() receives size_t argument,
647   // and its return value, which is of type ssize_t, cannot be greater
648   // than this argument. If we made a promotion, and the size argument
649   // is equal to, say, 10, then we'd impose a range of [0, 10] on the
650   // return value, however the correct range is [-1, 10].
651   //
652   // Please update the list of functions in the header after editing!
653   //
654 
655   // Below are helpers functions to create the summaries.
656   auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind,
657                               IntRangeVector Ranges) {
658     return std::make_shared<RangeConstraint>(ArgN, Kind, Ranges);
659   };
660   struct {
661     auto operator()(RangeKind Kind, IntRangeVector Ranges) {
662       return std::make_shared<RangeConstraint>(Ret, Kind, Ranges);
663     }
664     auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) {
665       return std::make_shared<ComparisonConstraint>(Ret, Op, OtherArgN);
666     }
667   } ReturnValueCondition;
668   auto Range = [](RangeInt b, RangeInt e) {
669     return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}};
670   };
671   auto SingleValue = [](RangeInt v) {
672     return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}};
673   };
674   auto LessThanOrEq = BO_LE;
675 
676   using RetType = QualType;
677   // Templates for summaries that are reused by many functions.
678   auto Getc = [&]() {
679     return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall)
680         .Case({ReturnValueCondition(WithinRange,
681                                     {{EOFv, EOFv}, {0, UCharRangeMax}})});
682   };
683   auto Read = [&](RetType R, RangeInt Max) {
684     return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R},
685                    NoEvalCall)
686         .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)),
687                ReturnValueCondition(WithinRange, Range(-1, Max))});
688   };
689   auto Fread = [&]() {
690     return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy, Irrelevant},
691                    RetType{SizeTy}, NoEvalCall)
692         .Case({
693             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
694         });
695   };
696   auto Getline = [&](RetType R, RangeInt Max) {
697     return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R},
698                    NoEvalCall)
699         .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})});
700   };
701 
702   FunctionSummaryMap = {
703       // The isascii() family of functions.
704       // The behavior is undefined if the value of the argument is not
705       // representable as unsigned char or is not equal to EOF. See e.g. C99
706       // 7.4.1.2 The isalpha function (p: 181-182).
707       {
708           "isalnum",
709           Summaries{
710               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
711                   // Boils down to isupper() or islower() or isdigit().
712                   .Case(
713                       {ArgumentCondition(0U, WithinRange,
714                                          {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}),
715                        ReturnValueCondition(OutOfRange, SingleValue(0))})
716                   // The locale-specific range.
717                   // No post-condition. We are completely unaware of
718                   // locale-specific return values.
719                   .Case({ArgumentCondition(0U, WithinRange,
720                                            {{128, UCharRangeMax}})})
721                   .Case({ArgumentCondition(0U, OutOfRange,
722                                            {{'0', '9'},
723                                             {'A', 'Z'},
724                                             {'a', 'z'},
725                                             {128, UCharRangeMax}}),
726                          ReturnValueCondition(WithinRange, SingleValue(0))})
727                   .ArgConstraint(ArgumentCondition(
728                       0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))},
729       },
730       {
731           "isalpha",
732           Summaries{
733               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
734                   .Case({ArgumentCondition(0U, WithinRange,
735                                            {{'A', 'Z'}, {'a', 'z'}}),
736                          ReturnValueCondition(OutOfRange, SingleValue(0))})
737                   // The locale-specific range.
738                   .Case({ArgumentCondition(0U, WithinRange,
739                                            {{128, UCharRangeMax}})})
740                   .Case({ArgumentCondition(
741                              0U, OutOfRange,
742                              {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}),
743                          ReturnValueCondition(WithinRange, SingleValue(0))})},
744       },
745       {
746           "isascii",
747           Summaries{
748               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
749                   .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
750                          ReturnValueCondition(OutOfRange, SingleValue(0))})
751                   .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)),
752                          ReturnValueCondition(WithinRange, SingleValue(0))})},
753       },
754       {
755           "isblank",
756           Summaries{
757               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
758                   .Case({ArgumentCondition(0U, WithinRange,
759                                            {{'\t', '\t'}, {' ', ' '}}),
760                          ReturnValueCondition(OutOfRange, SingleValue(0))})
761                   .Case({ArgumentCondition(0U, OutOfRange,
762                                            {{'\t', '\t'}, {' ', ' '}}),
763                          ReturnValueCondition(WithinRange, SingleValue(0))})},
764       },
765       {
766           "iscntrl",
767           Summaries{
768               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
769                   .Case({ArgumentCondition(0U, WithinRange,
770                                            {{0, 32}, {127, 127}}),
771                          ReturnValueCondition(OutOfRange, SingleValue(0))})
772                   .Case(
773                       {ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}),
774                        ReturnValueCondition(WithinRange, SingleValue(0))})},
775       },
776       {
777           "isdigit",
778           Summaries{
779               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
780                   .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')),
781                          ReturnValueCondition(OutOfRange, SingleValue(0))})
782                   .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')),
783                          ReturnValueCondition(WithinRange, SingleValue(0))})},
784       },
785       {
786           "isgraph",
787           Summaries{
788               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
789                   .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)),
790                          ReturnValueCondition(OutOfRange, SingleValue(0))})
791                   .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)),
792                          ReturnValueCondition(WithinRange, SingleValue(0))})},
793       },
794       {
795           "islower",
796           Summaries{
797               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
798                   // Is certainly lowercase.
799                   .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')),
800                          ReturnValueCondition(OutOfRange, SingleValue(0))})
801                   // Is ascii but not lowercase.
802                   .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
803                          ArgumentCondition(0U, OutOfRange, Range('a', 'z')),
804                          ReturnValueCondition(WithinRange, SingleValue(0))})
805                   // The locale-specific range.
806                   .Case({ArgumentCondition(0U, WithinRange,
807                                            {{128, UCharRangeMax}})})
808                   // Is not an unsigned char.
809                   .Case({ArgumentCondition(0U, OutOfRange,
810                                            Range(0, UCharRangeMax)),
811                          ReturnValueCondition(WithinRange, SingleValue(0))})},
812       },
813       {
814           "isprint",
815           Summaries{
816               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
817                   .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)),
818                          ReturnValueCondition(OutOfRange, SingleValue(0))})
819                   .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)),
820                          ReturnValueCondition(WithinRange, SingleValue(0))})},
821       },
822       {
823           "ispunct",
824           Summaries{
825               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
826                   .Case({ArgumentCondition(
827                              0U, WithinRange,
828                              {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
829                          ReturnValueCondition(OutOfRange, SingleValue(0))})
830                   .Case({ArgumentCondition(
831                              0U, OutOfRange,
832                              {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
833                          ReturnValueCondition(WithinRange, SingleValue(0))})},
834       },
835       {
836           "isspace",
837           Summaries{
838               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
839                   // Space, '\f', '\n', '\r', '\t', '\v'.
840                   .Case({ArgumentCondition(0U, WithinRange,
841                                            {{9, 13}, {' ', ' '}}),
842                          ReturnValueCondition(OutOfRange, SingleValue(0))})
843                   // The locale-specific range.
844                   .Case({ArgumentCondition(0U, WithinRange,
845                                            {{128, UCharRangeMax}})})
846                   .Case({ArgumentCondition(
847                              0U, OutOfRange,
848                              {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}),
849                          ReturnValueCondition(WithinRange, SingleValue(0))})},
850       },
851       {
852           "isupper",
853           Summaries{
854               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
855                   // Is certainly uppercase.
856                   .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')),
857                          ReturnValueCondition(OutOfRange, SingleValue(0))})
858                   // The locale-specific range.
859                   .Case({ArgumentCondition(0U, WithinRange,
860                                            {{128, UCharRangeMax}})})
861                   // Other.
862                   .Case({ArgumentCondition(0U, OutOfRange,
863                                            {{'A', 'Z'}, {128, UCharRangeMax}}),
864                          ReturnValueCondition(WithinRange, SingleValue(0))})},
865       },
866       {
867           "isxdigit",
868           Summaries{
869               Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
870                   .Case(
871                       {ArgumentCondition(0U, WithinRange,
872                                          {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
873                        ReturnValueCondition(OutOfRange, SingleValue(0))})
874                   .Case(
875                       {ArgumentCondition(0U, OutOfRange,
876                                          {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
877                        ReturnValueCondition(WithinRange, SingleValue(0))})},
878       },
879 
880       // The getc() family of functions that returns either a char or an EOF.
881       {"getc", Summaries{Getc()}},
882       {"fgetc", Summaries{Getc()}},
883       {"getchar",
884        Summaries{Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall)
885                      .Case({ReturnValueCondition(
886                          WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})}},
887 
888       // read()-like functions that never return more than buffer size.
889       // We are not sure how ssize_t is defined on every platform, so we
890       // provide three variants that should cover common cases.
891       {"read", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax),
892                          Read(LongLongTy, LongLongMax)}},
893       {"write", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax),
894                           Read(LongLongTy, LongLongMax)}},
895       {"fread", Summaries{Fread()}},
896       {"fwrite", Summaries{Fread()}},
897       // getline()-like functions either fail or read at least the delimiter.
898       {"getline", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax),
899                             Getline(LongLongTy, LongLongMax)}},
900       {"getdelim", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax),
901                              Getline(LongLongTy, LongLongMax)}},
902   };
903 }
904 
905 void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) {
906   mgr.registerChecker<StdLibraryFunctionsChecker>();
907 }
908 
909 bool ento::shouldRegisterStdCLibraryFunctionsChecker(const LangOptions &LO) {
910   return true;
911 }
912 
913 #define REGISTER_CHECKER(name)                                                 \
914   void ento::register##name(CheckerManager &mgr) {                             \
915     StdLibraryFunctionsChecker *checker =                                      \
916         mgr.getChecker<StdLibraryFunctionsChecker>();                          \
917     checker->ChecksEnabled[StdLibraryFunctionsChecker::CK_##name] = true;      \
918     checker->CheckNames[StdLibraryFunctionsChecker::CK_##name] =               \
919         mgr.getCurrentCheckerName();                                           \
920   }                                                                            \
921                                                                                \
922   bool ento::shouldRegister##name(const LangOptions &LO) { return true; }
923 
924 REGISTER_CHECKER(StdCLibraryFunctionArgsChecker)
925