xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp (revision bd03ef19beb8a3476d5cd9f744c5fba5ca287c51)
1 //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker improves modeling of a few simple library functions.
10 //
11 // This checker provides a specification format - `Summary' - and
12 // contains descriptions of some library functions in this format. Each
13 // specification contains a list of branches for splitting the program state
14 // upon call, and range constraints on argument and return-value symbols that
15 // are satisfied on each branch. This spec can be expanded to include more
16 // items, like external effects of the function.
17 //
18 // The main difference between this approach and the body farms technique is
19 // in more explicit control over how many branches are produced. For example,
20 // consider standard C function `ispunct(int x)', which returns a non-zero value
21 // iff `x' is a punctuation character, that is, when `x' is in range
22 //   ['!', '/']   [':', '@']  U  ['[', '\`']  U  ['{', '~'].
23 // `Summary' provides only two branches for this function. However,
24 // any attempt to describe this range with if-statements in the body farm
25 // would result in many more branches. Because each branch needs to be analyzed
26 // independently, this significantly reduces performance. Additionally,
27 // once we consider a branch on which `x' is in range, say, ['!', '/'],
28 // we assume that such branch is an important separate path through the program,
29 // which may lead to false positives because considering this particular path
30 // was not consciously intended, and therefore it might have been unreachable.
31 //
32 // This checker uses eval::Call for modeling pure functions (functions without
33 // side effets), for which their `Summary' is a precise model. This avoids
34 // unnecessary invalidation passes. Conflicts with other checkers are unlikely
35 // because if the function has no other effects, other checkers would probably
36 // never want to improve upon the modeling done by this checker.
37 //
38 // Non-pure functions, for which only partial improvement over the default
39 // behavior is expected, are modeled via check::PostCall, non-intrusively.
40 //
41 // The following standard C functions are currently supported:
42 //
43 //   fgetc      getline   isdigit   isupper
44 //   fread      isalnum   isgraph   isxdigit
45 //   fwrite     isalpha   islower   read
46 //   getc       isascii   isprint   write
47 //   getchar    isblank   ispunct
48 //   getdelim   iscntrl   isspace
49 //
50 //===----------------------------------------------------------------------===//
51 
52 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
53 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
54 #include "clang/StaticAnalyzer/Core/Checker.h"
55 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
56 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
57 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
58 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
59 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h"
60 
61 using namespace clang;
62 using namespace clang::ento;
63 
64 namespace {
65 class StdLibraryFunctionsChecker
66     : public Checker<check::PreCall, check::PostCall, eval::Call> {
67   /// Below is a series of typedefs necessary to define function specs.
68   /// We avoid nesting types here because each additional qualifier
69   /// would need to be repeated in every function spec.
70   struct Summary;
71 
72   /// Specify how much the analyzer engine should entrust modeling this function
73   /// to us. If he doesn't, he performs additional invalidations.
74   enum InvalidationKind { NoEvalCall, EvalCallAsPure };
75 
76   // The universal integral type to use in value range descriptions.
77   // Unsigned to make sure overflows are well-defined.
78   typedef uint64_t RangeInt;
79 
80   /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is
81   /// a non-negative integer, which less than 5 and not equal to 2. For
82   /// `ComparesToArgument', holds information about how exactly to compare to
83   /// the argument.
84   typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector;
85 
86   /// A reference to an argument or return value by its number.
87   /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but
88   /// obviously uint32_t should be enough for all practical purposes.
89   typedef uint32_t ArgNo;
90   static const ArgNo Ret;
91 
92   class ValueConstraint;
93 
94   // Pointer to the ValueConstraint. We need a copyable, polymorphic and
95   // default initialize able type (vector needs that). A raw pointer was good,
96   // however, we cannot default initialize that. unique_ptr makes the Summary
97   // class non-copyable, therefore not an option. Releasing the copyability
98   // requirement would render the initialization of the Summary map infeasible.
99   using ValueConstraintPtr = std::shared_ptr<ValueConstraint>;
100 
101   /// Polymorphic base class that represents a constraint on a given argument
102   /// (or return value) of a function. Derived classes implement different kind
103   /// of constraints, e.g range constraints or correlation between two
104   /// arguments.
105   class ValueConstraint {
106   public:
107     ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {}
108     virtual ~ValueConstraint() {}
109     /// Apply the effects of the constraint on the given program state. If null
110     /// is returned then the constraint is not feasible.
111     virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
112                                   const Summary &Summary,
113                                   CheckerContext &C) const = 0;
114     virtual ValueConstraintPtr negate() const {
115       llvm_unreachable("Not implemented");
116     };
117     ArgNo getArgNo() const { return ArgN; }
118 
119   protected:
120     ArgNo ArgN; // Argument to which we apply the constraint.
121   };
122 
123   /// Given a range, should the argument stay inside or outside this range?
124   enum RangeKind { OutOfRange, WithinRange };
125 
126   /// Encapsulates a single range on a single symbol within a branch.
127   class RangeConstraint : public ValueConstraint {
128     RangeKind Kind;      // Kind of range definition.
129     IntRangeVector Args; // Polymorphic arguments.
130 
131   public:
132     RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args)
133         : ValueConstraint(ArgN), Kind(Kind), Args(Args) {}
134 
135     const IntRangeVector &getRanges() const {
136       return Args;
137     }
138 
139   private:
140     ProgramStateRef applyAsOutOfRange(ProgramStateRef State,
141                                       const CallEvent &Call,
142                                       const Summary &Summary) const;
143     ProgramStateRef applyAsWithinRange(ProgramStateRef State,
144                                        const CallEvent &Call,
145                                        const Summary &Summary) const;
146   public:
147     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
148                           const Summary &Summary,
149                           CheckerContext &C) const override {
150       switch (Kind) {
151       case OutOfRange:
152         return applyAsOutOfRange(State, Call, Summary);
153       case WithinRange:
154         return applyAsWithinRange(State, Call, Summary);
155       }
156       llvm_unreachable("Unknown range kind!");
157     }
158 
159     ValueConstraintPtr negate() const override {
160       RangeConstraint Tmp(*this);
161       switch (Kind) {
162       case OutOfRange:
163         Tmp.Kind = WithinRange;
164         break;
165       case WithinRange:
166         Tmp.Kind = OutOfRange;
167         break;
168       }
169       return std::make_shared<RangeConstraint>(Tmp);
170     }
171   };
172 
173   class ComparisonConstraint : public ValueConstraint {
174     BinaryOperator::Opcode Opcode;
175     ArgNo OtherArgN;
176 
177   public:
178     ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode,
179                          ArgNo OtherArgN)
180         : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {}
181     ArgNo getOtherArgNo() const { return OtherArgN; }
182     BinaryOperator::Opcode getOpcode() const { return Opcode; }
183     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
184                           const Summary &Summary,
185                           CheckerContext &C) const override;
186   };
187 
188   class NotNullConstraint : public ValueConstraint {
189     using ValueConstraint::ValueConstraint;
190     // This variable has a role when we negate the constraint.
191     bool CannotBeNull = true;
192 
193   public:
194     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
195                           const Summary &Summary,
196                           CheckerContext &C) const override {
197       SVal V = getArgSVal(Call, getArgNo());
198       if (V.isUndef())
199         return State;
200 
201       DefinedOrUnknownSVal L = V.castAs<DefinedOrUnknownSVal>();
202       if (!L.getAs<Loc>())
203         return State;
204 
205       return State->assume(L, CannotBeNull);
206     }
207 
208     ValueConstraintPtr negate() const override {
209       NotNullConstraint Tmp(*this);
210       Tmp.CannotBeNull = !this->CannotBeNull;
211       return std::make_shared<NotNullConstraint>(Tmp);
212     }
213   };
214 
215   // Represents a buffer argument with an additional size argument.
216   // E.g. the first two arguments here:
217   //   ctime_s(char *buffer, rsize_t bufsz, const time_t *time);
218   class BufferSizeConstraint : public ValueConstraint {
219     // The argument which holds the size of the buffer.
220     ArgNo SizeArgN;
221     // The operator we use in apply. This is negated in negate().
222     BinaryOperator::Opcode Op = BO_LE;
223 
224   public:
225     BufferSizeConstraint(ArgNo Buffer, ArgNo BufSize)
226         : ValueConstraint(Buffer), SizeArgN(BufSize) {}
227 
228     ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call,
229                           const Summary &Summary,
230                           CheckerContext &C) const override {
231       // The buffer argument.
232       SVal BufV = getArgSVal(Call, getArgNo());
233       // The size argument.
234       SVal SizeV = getArgSVal(Call, SizeArgN);
235       // The dynamic size of the buffer argument, got from the analyzer engine.
236       SVal BufDynSize = getDynamicSizeWithOffset(State, BufV);
237 
238       SValBuilder &SvalBuilder = C.getSValBuilder();
239       SVal Feasible = SvalBuilder.evalBinOp(State, Op, SizeV, BufDynSize,
240                                             SvalBuilder.getContext().BoolTy);
241       if (auto F = Feasible.getAs<DefinedOrUnknownSVal>())
242         return State->assume(*F, true);
243 
244       // We can get here only if the size argument or the dynamic size is
245       // undefined. But the dynamic size should never be undefined, only
246       // unknown. So, here, the size of the argument is undefined, i.e. we
247       // cannot apply the constraint. Actually, other checkers like
248       // CallAndMessage should catch this situation earlier, because we call a
249       // function with an uninitialized argument.
250       llvm_unreachable("Size argument or the dynamic size is Undefined");
251     }
252 
253     ValueConstraintPtr negate() const override {
254       BufferSizeConstraint Tmp(*this);
255       Tmp.Op = BinaryOperator::negateComparisonOp(Op);
256       return std::make_shared<BufferSizeConstraint>(Tmp);
257     }
258   };
259 
260   /// The complete list of constraints that defines a single branch.
261   typedef std::vector<ValueConstraintPtr> ConstraintSet;
262 
263   using ArgTypes = std::vector<QualType>;
264   using Cases = std::vector<ConstraintSet>;
265 
266   /// Includes information about
267   ///   * function prototype (which is necessary to
268   ///     ensure we're modeling the right function and casting values properly),
269   ///   * approach to invalidation,
270   ///   * a list of branches - a list of list of ranges -
271   ///     A branch represents a path in the exploded graph of a function (which
272   ///     is a tree). So, a branch is a series of assumptions. In other words,
273   ///     branches represent split states and additional assumptions on top of
274   ///     the splitting assumption.
275   ///     For example, consider the branches in `isalpha(x)`
276   ///       Branch 1)
277   ///         x is in range ['A', 'Z'] or in ['a', 'z']
278   ///         then the return value is not 0. (I.e. out-of-range [0, 0])
279   ///       Branch 2)
280   ///         x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z']
281   ///         then the return value is 0.
282   ///   * a list of argument constraints, that must be true on every branch.
283   ///     If these constraints are not satisfied that means a fatal error
284   ///     usually resulting in undefined behaviour.
285   struct Summary {
286     const ArgTypes ArgTys;
287     const QualType RetTy;
288     const InvalidationKind InvalidationKd;
289     Cases CaseConstraints;
290     ConstraintSet ArgConstraints;
291 
292     Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd)
293         : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {}
294 
295     Summary &Case(ConstraintSet&& CS) {
296       CaseConstraints.push_back(std::move(CS));
297       return *this;
298     }
299     Summary &ArgConstraint(ValueConstraintPtr VC) {
300       ArgConstraints.push_back(VC);
301       return *this;
302     }
303 
304   private:
305     static void assertTypeSuitableForSummary(QualType T) {
306       assert(!T->isVoidType() &&
307              "We should have had no significant void types in the spec");
308       assert(T.isCanonical() &&
309              "We should only have canonical types in the spec");
310     }
311 
312   public:
313     QualType getArgType(ArgNo ArgN) const {
314       QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN];
315       assertTypeSuitableForSummary(T);
316       return T;
317     }
318 
319     /// Try our best to figure out if the summary's signature matches
320     /// *the* library function to which this specification applies.
321     bool matchesSignature(const FunctionDecl *FD) const;
322   };
323 
324   // The map of all functions supported by the checker. It is initialized
325   // lazily, and it doesn't change after initialization.
326   using FunctionSummaryMapType = llvm::DenseMap<const FunctionDecl *, Summary>;
327   mutable FunctionSummaryMapType FunctionSummaryMap;
328 
329   mutable std::unique_ptr<BugType> BT_InvalidArg;
330 
331   // Auxiliary functions to support ArgNo within all structures
332   // in a unified manner.
333   static QualType getArgType(const Summary &Summary, ArgNo ArgN) {
334     return Summary.getArgType(ArgN);
335   }
336   static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) {
337     return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN);
338   }
339 
340 public:
341   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
342   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
343   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
344 
345   enum CheckKind {
346     CK_StdCLibraryFunctionArgsChecker,
347     CK_StdCLibraryFunctionsTesterChecker,
348     CK_NumCheckKinds
349   };
350   DefaultBool ChecksEnabled[CK_NumCheckKinds];
351   CheckerNameRef CheckNames[CK_NumCheckKinds];
352 
353   bool DisplayLoadedSummaries = false;
354 
355 private:
356   Optional<Summary> findFunctionSummary(const FunctionDecl *FD,
357                                         CheckerContext &C) const;
358   Optional<Summary> findFunctionSummary(const CallEvent &Call,
359                                         CheckerContext &C) const;
360 
361   void initFunctionSummaries(CheckerContext &C) const;
362 
363   void reportBug(const CallEvent &Call, ExplodedNode *N,
364                  CheckerContext &C) const {
365     if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker])
366       return;
367     // TODO Add detailed diagnostic.
368     StringRef Msg = "Function argument constraint is not satisfied";
369     if (!BT_InvalidArg)
370       BT_InvalidArg = std::make_unique<BugType>(
371           CheckNames[CK_StdCLibraryFunctionArgsChecker],
372           "Unsatisfied argument constraints", categories::LogicError);
373     auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N);
374     bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R);
375     C.emitReport(std::move(R));
376   }
377 };
378 
379 const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret =
380     std::numeric_limits<ArgNo>::max();
381 
382 } // end of anonymous namespace
383 
384 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange(
385     ProgramStateRef State, const CallEvent &Call,
386     const Summary &Summary) const {
387 
388   ProgramStateManager &Mgr = State->getStateManager();
389   SValBuilder &SVB = Mgr.getSValBuilder();
390   BasicValueFactory &BVF = SVB.getBasicValueFactory();
391   ConstraintManager &CM = Mgr.getConstraintManager();
392   QualType T = getArgType(Summary, getArgNo());
393   SVal V = getArgSVal(Call, getArgNo());
394 
395   if (auto N = V.getAs<NonLoc>()) {
396     const IntRangeVector &R = getRanges();
397     size_t E = R.size();
398     for (size_t I = 0; I != E; ++I) {
399       const llvm::APSInt &Min = BVF.getValue(R[I].first, T);
400       const llvm::APSInt &Max = BVF.getValue(R[I].second, T);
401       assert(Min <= Max);
402       State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
403       if (!State)
404         break;
405     }
406   }
407 
408   return State;
409 }
410 
411 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange(
412     ProgramStateRef State, const CallEvent &Call,
413     const Summary &Summary) const {
414 
415   ProgramStateManager &Mgr = State->getStateManager();
416   SValBuilder &SVB = Mgr.getSValBuilder();
417   BasicValueFactory &BVF = SVB.getBasicValueFactory();
418   ConstraintManager &CM = Mgr.getConstraintManager();
419   QualType T = getArgType(Summary, getArgNo());
420   SVal V = getArgSVal(Call, getArgNo());
421 
422   // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R".
423   // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary,
424   // and then cut away all holes in R one by one.
425   //
426   // E.g. consider a range list R as [A, B] and [C, D]
427   // -------+--------+------------------+------------+----------->
428   //        A        B                  C            D
429   // Then we assume that the value is not in [-inf, A - 1],
430   // then not in [D + 1, +inf], then not in [B + 1, C - 1]
431   if (auto N = V.getAs<NonLoc>()) {
432     const IntRangeVector &R = getRanges();
433     size_t E = R.size();
434 
435     const llvm::APSInt &MinusInf = BVF.getMinValue(T);
436     const llvm::APSInt &PlusInf = BVF.getMaxValue(T);
437 
438     const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T);
439     if (Left != PlusInf) {
440       assert(MinusInf <= Left);
441       State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false);
442       if (!State)
443         return nullptr;
444     }
445 
446     const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T);
447     if (Right != MinusInf) {
448       assert(Right <= PlusInf);
449       State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false);
450       if (!State)
451         return nullptr;
452     }
453 
454     for (size_t I = 1; I != E; ++I) {
455       const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T);
456       const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T);
457       if (Min <= Max) {
458         State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
459         if (!State)
460           return nullptr;
461       }
462     }
463   }
464 
465   return State;
466 }
467 
468 ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply(
469     ProgramStateRef State, const CallEvent &Call, const Summary &Summary,
470     CheckerContext &C) const {
471 
472   ProgramStateManager &Mgr = State->getStateManager();
473   SValBuilder &SVB = Mgr.getSValBuilder();
474   QualType CondT = SVB.getConditionType();
475   QualType T = getArgType(Summary, getArgNo());
476   SVal V = getArgSVal(Call, getArgNo());
477 
478   BinaryOperator::Opcode Op = getOpcode();
479   ArgNo OtherArg = getOtherArgNo();
480   SVal OtherV = getArgSVal(Call, OtherArg);
481   QualType OtherT = getArgType(Summary, OtherArg);
482   // Note: we avoid integral promotion for comparison.
483   OtherV = SVB.evalCast(OtherV, T, OtherT);
484   if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT)
485                        .getAs<DefinedOrUnknownSVal>())
486     State = State->assume(*CompV, true);
487   return State;
488 }
489 
490 void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call,
491                                               CheckerContext &C) const {
492   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
493   if (!FoundSummary)
494     return;
495 
496   const Summary &Summary = *FoundSummary;
497   ProgramStateRef State = C.getState();
498 
499   ProgramStateRef NewState = State;
500   for (const ValueConstraintPtr& VC : Summary.ArgConstraints) {
501     ProgramStateRef SuccessSt = VC->apply(NewState, Call, Summary, C);
502     ProgramStateRef FailureSt = VC->negate()->apply(NewState, Call, Summary, C);
503     // The argument constraint is not satisfied.
504     if (FailureSt && !SuccessSt) {
505       if (ExplodedNode *N = C.generateErrorNode(NewState))
506         reportBug(Call, N, C);
507       break;
508     } else {
509       // We will apply the constraint even if we cannot reason about the
510       // argument. This means both SuccessSt and FailureSt can be true. If we
511       // weren't applying the constraint that would mean that symbolic
512       // execution continues on a code whose behaviour is undefined.
513       assert(SuccessSt);
514       NewState = SuccessSt;
515     }
516   }
517   if (NewState && NewState != State)
518     C.addTransition(NewState);
519 }
520 
521 void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call,
522                                                CheckerContext &C) const {
523   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
524   if (!FoundSummary)
525     return;
526 
527   // Now apply the constraints.
528   const Summary &Summary = *FoundSummary;
529   ProgramStateRef State = C.getState();
530 
531   // Apply case/branch specifications.
532   for (const auto &VRS : Summary.CaseConstraints) {
533     ProgramStateRef NewState = State;
534     for (const auto &VR: VRS) {
535       NewState = VR->apply(NewState, Call, Summary, C);
536       if (!NewState)
537         break;
538     }
539 
540     if (NewState && NewState != State)
541       C.addTransition(NewState);
542   }
543 }
544 
545 bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call,
546                                           CheckerContext &C) const {
547   Optional<Summary> FoundSummary = findFunctionSummary(Call, C);
548   if (!FoundSummary)
549     return false;
550 
551   const Summary &Summary = *FoundSummary;
552   switch (Summary.InvalidationKd) {
553   case EvalCallAsPure: {
554     ProgramStateRef State = C.getState();
555     const LocationContext *LC = C.getLocationContext();
556     const auto *CE = cast_or_null<CallExpr>(Call.getOriginExpr());
557     SVal V = C.getSValBuilder().conjureSymbolVal(
558         CE, LC, CE->getType().getCanonicalType(), C.blockCount());
559     State = State->BindExpr(CE, LC, V);
560     C.addTransition(State);
561     return true;
562   }
563   case NoEvalCall:
564     // Summary tells us to avoid performing eval::Call. The function is possibly
565     // evaluated by another checker, or evaluated conservatively.
566     return false;
567   }
568   llvm_unreachable("Unknown invalidation kind!");
569 }
570 
571 bool StdLibraryFunctionsChecker::Summary::matchesSignature(
572     const FunctionDecl *FD) const {
573   // Check number of arguments:
574   if (FD->param_size() != ArgTys.size())
575     return false;
576 
577   // Check return type if relevant:
578   if (!RetTy.isNull() && RetTy != FD->getReturnType().getCanonicalType())
579     return false;
580 
581   // Check argument types when relevant:
582   for (size_t I = 0, E = ArgTys.size(); I != E; ++I) {
583     QualType FormalT = ArgTys[I];
584     // Null type marks irrelevant arguments.
585     if (FormalT.isNull())
586       continue;
587 
588     assertTypeSuitableForSummary(FormalT);
589 
590     QualType ActualT = FD->getParamDecl(I)->getType().getCanonicalType();
591     if (ActualT != FormalT)
592       return false;
593   }
594 
595   return true;
596 }
597 
598 Optional<StdLibraryFunctionsChecker::Summary>
599 StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD,
600                                                 CheckerContext &C) const {
601   if (!FD)
602     return None;
603 
604   initFunctionSummaries(C);
605 
606   auto FSMI = FunctionSummaryMap.find(FD->getCanonicalDecl());
607   if (FSMI == FunctionSummaryMap.end())
608     return None;
609   return FSMI->second;
610 }
611 
612 Optional<StdLibraryFunctionsChecker::Summary>
613 StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call,
614                                                 CheckerContext &C) const {
615   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
616   if (!FD)
617     return None;
618   return findFunctionSummary(FD, C);
619 }
620 
621 void StdLibraryFunctionsChecker::initFunctionSummaries(
622     CheckerContext &C) const {
623   if (!FunctionSummaryMap.empty())
624     return;
625 
626   SValBuilder &SVB = C.getSValBuilder();
627   BasicValueFactory &BVF = SVB.getBasicValueFactory();
628   const ASTContext &ACtx = BVF.getContext();
629 
630   // These types are useful for writing specifications quickly,
631   // New specifications should probably introduce more types.
632   // Some types are hard to obtain from the AST, eg. "ssize_t".
633   // In such cases it should be possible to provide multiple variants
634   // of function summary for common cases (eg. ssize_t could be int or long
635   // or long long, so three summary variants would be enough).
636   // Of course, function variants are also useful for C++ overloads.
637   const QualType
638       Irrelevant{}; // A placeholder, whenever we do not care about the type.
639   const QualType IntTy = ACtx.IntTy;
640   const QualType LongTy = ACtx.LongTy;
641   const QualType LongLongTy = ACtx.LongLongTy;
642   const QualType SizeTy = ACtx.getSizeType();
643   const QualType VoidPtrTy = ACtx.VoidPtrTy; // void *
644   const QualType VoidPtrRestrictTy =
645       ACtx.getRestrictType(VoidPtrTy); // void *restrict
646   const QualType ConstVoidPtrTy =
647       ACtx.getPointerType(ACtx.VoidTy.withConst()); // const void *
648   const QualType ConstCharPtrTy =
649       ACtx.getPointerType(ACtx.CharTy.withConst()); // const char *
650   const QualType ConstVoidPtrRestrictTy =
651       ACtx.getRestrictType(ConstVoidPtrTy); // const void *restrict
652 
653   const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue();
654   const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue();
655   const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue();
656 
657   // Set UCharRangeMax to min of int or uchar maximum value.
658   // The C standard states that the arguments of functions like isalpha must
659   // be representable as an unsigned char. Their type is 'int', so the max
660   // value of the argument should be min(UCharMax, IntMax). This just happen
661   // to be true for commonly used and well tested instruction set
662   // architectures, but not for others.
663   const RangeInt UCharRangeMax =
664       std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax);
665 
666   // The platform dependent value of EOF.
667   // Try our best to parse this from the Preprocessor, otherwise fallback to -1.
668   const auto EOFv = [&C]() -> RangeInt {
669     if (const llvm::Optional<int> OptInt =
670             tryExpandAsInteger("EOF", C.getPreprocessor()))
671       return *OptInt;
672     return -1;
673   }();
674 
675   // Auxiliary class to aid adding summaries to the summary map.
676   struct AddToFunctionSummaryMap {
677     const ASTContext &ACtx;
678     FunctionSummaryMapType &Map;
679     bool DisplayLoadedSummaries;
680     AddToFunctionSummaryMap(const ASTContext &ACtx, FunctionSummaryMapType &FSM,
681                             bool DisplayLoadedSummaries)
682         : ACtx(ACtx), Map(FSM), DisplayLoadedSummaries(DisplayLoadedSummaries) {
683     }
684 
685     // Add a summary to a FunctionDecl found by lookup. The lookup is performed
686     // by the given Name, and in the global scope. The summary will be attached
687     // to the found FunctionDecl only if the signatures match.
688     void operator()(StringRef Name, const Summary &S) {
689       IdentifierInfo &II = ACtx.Idents.get(Name);
690       auto LookupRes = ACtx.getTranslationUnitDecl()->lookup(&II);
691       if (LookupRes.size() == 0)
692         return;
693       for (Decl *D : LookupRes) {
694         if (auto *FD = dyn_cast<FunctionDecl>(D)) {
695           if (S.matchesSignature(FD)) {
696             auto Res = Map.insert({FD->getCanonicalDecl(), S});
697             assert(Res.second && "Function already has a summary set!");
698             (void)Res;
699             if (DisplayLoadedSummaries) {
700               llvm::errs() << "Loaded summary for: ";
701               FD->print(llvm::errs());
702               llvm::errs() << "\n";
703             }
704             return;
705           }
706         }
707       }
708     }
709     // Add several summaries for the given name.
710     void operator()(StringRef Name, const std::vector<Summary> &Summaries) {
711       for (const Summary &S : Summaries)
712         operator()(Name, S);
713     }
714   } addToFunctionSummaryMap(ACtx, FunctionSummaryMap, DisplayLoadedSummaries);
715 
716   // We are finally ready to define specifications for all supported functions.
717   //
718   // The signature needs to have the correct number of arguments.
719   // However, we insert `Irrelevant' when the type is insignificant.
720   //
721   // Argument ranges should always cover all variants. If return value
722   // is completely unknown, omit it from the respective range set.
723   //
724   // All types in the spec need to be canonical.
725   //
726   // Every item in the list of range sets represents a particular
727   // execution path the analyzer would need to explore once
728   // the call is modeled - a new program state is constructed
729   // for every range set, and each range line in the range set
730   // corresponds to a specific constraint within this state.
731   //
732   // Upon comparing to another argument, the other argument is casted
733   // to the current argument's type. This avoids proper promotion but
734   // seems useful. For example, read() receives size_t argument,
735   // and its return value, which is of type ssize_t, cannot be greater
736   // than this argument. If we made a promotion, and the size argument
737   // is equal to, say, 10, then we'd impose a range of [0, 10] on the
738   // return value, however the correct range is [-1, 10].
739   //
740   // Please update the list of functions in the header after editing!
741 
742   // Below are helpers functions to create the summaries.
743   auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind,
744                               IntRangeVector Ranges) {
745     return std::make_shared<RangeConstraint>(ArgN, Kind, Ranges);
746   };
747   auto BufferSize = [](ArgNo BufArgN, ArgNo SizeArgN) {
748     return std::make_shared<BufferSizeConstraint>(BufArgN, SizeArgN);
749   };
750   struct {
751     auto operator()(RangeKind Kind, IntRangeVector Ranges) {
752       return std::make_shared<RangeConstraint>(Ret, Kind, Ranges);
753     }
754     auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) {
755       return std::make_shared<ComparisonConstraint>(Ret, Op, OtherArgN);
756     }
757   } ReturnValueCondition;
758   auto Range = [](RangeInt b, RangeInt e) {
759     return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}};
760   };
761   auto SingleValue = [](RangeInt v) {
762     return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}};
763   };
764   auto LessThanOrEq = BO_LE;
765   auto NotNull = [&](ArgNo ArgN) {
766     return std::make_shared<NotNullConstraint>(ArgN);
767   };
768 
769   using RetType = QualType;
770   // Templates for summaries that are reused by many functions.
771   auto Getc = [&]() {
772     return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall)
773         .Case({ReturnValueCondition(WithinRange,
774                                     {{EOFv, EOFv}, {0, UCharRangeMax}})});
775   };
776   auto Read = [&](RetType R, RangeInt Max) {
777     return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R},
778                    NoEvalCall)
779         .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)),
780                ReturnValueCondition(WithinRange, Range(-1, Max))});
781   };
782   auto Fread = [&]() {
783     return Summary(ArgTypes{VoidPtrRestrictTy, Irrelevant, SizeTy, Irrelevant},
784                    RetType{SizeTy}, NoEvalCall)
785         .Case({
786             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
787         })
788         .ArgConstraint(NotNull(ArgNo(0)));
789   };
790   auto Fwrite = [&]() {
791     return Summary(
792                ArgTypes{ConstVoidPtrRestrictTy, Irrelevant, SizeTy, Irrelevant},
793                RetType{SizeTy}, NoEvalCall)
794         .Case({
795             ReturnValueCondition(LessThanOrEq, ArgNo(2)),
796         })
797         .ArgConstraint(NotNull(ArgNo(0)));
798   };
799   auto Getline = [&](RetType R, RangeInt Max) {
800     return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R},
801                    NoEvalCall)
802         .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})});
803   };
804 
805   // The isascii() family of functions.
806   // The behavior is undefined if the value of the argument is not
807   // representable as unsigned char or is not equal to EOF. See e.g. C99
808   // 7.4.1.2 The isalpha function (p: 181-182).
809   addToFunctionSummaryMap(
810       "isalnum",
811       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
812           // Boils down to isupper() or islower() or isdigit().
813           .Case({ArgumentCondition(0U, WithinRange,
814                                    {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}),
815                  ReturnValueCondition(OutOfRange, SingleValue(0))})
816           // The locale-specific range.
817           // No post-condition. We are completely unaware of
818           // locale-specific return values.
819           .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
820           .Case(
821               {ArgumentCondition(
822                    0U, OutOfRange,
823                    {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}),
824                ReturnValueCondition(WithinRange, SingleValue(0))})
825           .ArgConstraint(ArgumentCondition(
826               0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})));
827   addToFunctionSummaryMap(
828       "isalpha",
829       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
830           .Case({ArgumentCondition(0U, WithinRange, {{'A', 'Z'}, {'a', 'z'}}),
831                  ReturnValueCondition(OutOfRange, SingleValue(0))})
832           // The locale-specific range.
833           .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
834           .Case({ArgumentCondition(
835                      0U, OutOfRange,
836                      {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}),
837                  ReturnValueCondition(WithinRange, SingleValue(0))}));
838   addToFunctionSummaryMap(
839       "isascii",
840       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
841           .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
842                  ReturnValueCondition(OutOfRange, SingleValue(0))})
843           .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)),
844                  ReturnValueCondition(WithinRange, SingleValue(0))}));
845   addToFunctionSummaryMap(
846       "isblank",
847       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
848           .Case({ArgumentCondition(0U, WithinRange, {{'\t', '\t'}, {' ', ' '}}),
849                  ReturnValueCondition(OutOfRange, SingleValue(0))})
850           .Case({ArgumentCondition(0U, OutOfRange, {{'\t', '\t'}, {' ', ' '}}),
851                  ReturnValueCondition(WithinRange, SingleValue(0))}));
852   addToFunctionSummaryMap(
853       "iscntrl",
854       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
855           .Case({ArgumentCondition(0U, WithinRange, {{0, 32}, {127, 127}}),
856                  ReturnValueCondition(OutOfRange, SingleValue(0))})
857           .Case({ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}),
858                  ReturnValueCondition(WithinRange, SingleValue(0))}));
859   addToFunctionSummaryMap(
860       "isdigit",
861       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
862           .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')),
863                  ReturnValueCondition(OutOfRange, SingleValue(0))})
864           .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')),
865                  ReturnValueCondition(WithinRange, SingleValue(0))}));
866   addToFunctionSummaryMap(
867       "isgraph",
868       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
869           .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)),
870                  ReturnValueCondition(OutOfRange, SingleValue(0))})
871           .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)),
872                  ReturnValueCondition(WithinRange, SingleValue(0))}));
873   addToFunctionSummaryMap(
874       "islower",
875       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
876           // Is certainly lowercase.
877           .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')),
878                  ReturnValueCondition(OutOfRange, SingleValue(0))})
879           // Is ascii but not lowercase.
880           .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)),
881                  ArgumentCondition(0U, OutOfRange, Range('a', 'z')),
882                  ReturnValueCondition(WithinRange, SingleValue(0))})
883           // The locale-specific range.
884           .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
885           // Is not an unsigned char.
886           .Case({ArgumentCondition(0U, OutOfRange, Range(0, UCharRangeMax)),
887                  ReturnValueCondition(WithinRange, SingleValue(0))}));
888   addToFunctionSummaryMap(
889       "isprint",
890       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
891           .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)),
892                  ReturnValueCondition(OutOfRange, SingleValue(0))})
893           .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)),
894                  ReturnValueCondition(WithinRange, SingleValue(0))}));
895   addToFunctionSummaryMap(
896       "ispunct",
897       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
898           .Case({ArgumentCondition(
899                      0U, WithinRange,
900                      {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
901                  ReturnValueCondition(OutOfRange, SingleValue(0))})
902           .Case({ArgumentCondition(
903                      0U, OutOfRange,
904                      {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}),
905                  ReturnValueCondition(WithinRange, SingleValue(0))}));
906   addToFunctionSummaryMap(
907       "isspace",
908       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
909           // Space, '\f', '\n', '\r', '\t', '\v'.
910           .Case({ArgumentCondition(0U, WithinRange, {{9, 13}, {' ', ' '}}),
911                  ReturnValueCondition(OutOfRange, SingleValue(0))})
912           // The locale-specific range.
913           .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
914           .Case({ArgumentCondition(0U, OutOfRange,
915                                    {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}),
916                  ReturnValueCondition(WithinRange, SingleValue(0))}));
917   addToFunctionSummaryMap(
918       "isupper",
919       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
920           // Is certainly uppercase.
921           .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')),
922                  ReturnValueCondition(OutOfRange, SingleValue(0))})
923           // The locale-specific range.
924           .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})})
925           // Other.
926           .Case({ArgumentCondition(0U, OutOfRange,
927                                    {{'A', 'Z'}, {128, UCharRangeMax}}),
928                  ReturnValueCondition(WithinRange, SingleValue(0))}));
929   addToFunctionSummaryMap(
930       "isxdigit",
931       Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
932           .Case({ArgumentCondition(0U, WithinRange,
933                                    {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
934                  ReturnValueCondition(OutOfRange, SingleValue(0))})
935           .Case({ArgumentCondition(0U, OutOfRange,
936                                    {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}),
937                  ReturnValueCondition(WithinRange, SingleValue(0))}));
938 
939   // The getc() family of functions that returns either a char or an EOF.
940   addToFunctionSummaryMap("getc", Getc());
941   addToFunctionSummaryMap("fgetc", Getc());
942   addToFunctionSummaryMap(
943       "getchar", Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall)
944                      .Case({ReturnValueCondition(
945                          WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})}));
946 
947   // read()-like functions that never return more than buffer size.
948   // We are not sure how ssize_t is defined on every platform, so we
949   // provide three variants that should cover common cases.
950   addToFunctionSummaryMap("read", {Read(IntTy, IntMax), Read(LongTy, LongMax),
951                                    Read(LongLongTy, LongLongMax)});
952   addToFunctionSummaryMap("write", {Read(IntTy, IntMax), Read(LongTy, LongMax),
953                                     Read(LongLongTy, LongLongMax)});
954   addToFunctionSummaryMap("fread", Fread());
955   addToFunctionSummaryMap("fwrite", Fwrite());
956   // getline()-like functions either fail or read at least the delimiter.
957   addToFunctionSummaryMap("getline",
958                           {Getline(IntTy, IntMax), Getline(LongTy, LongMax),
959                            Getline(LongLongTy, LongLongMax)});
960   addToFunctionSummaryMap("getdelim",
961                           {Getline(IntTy, IntMax), Getline(LongTy, LongMax),
962                            Getline(LongLongTy, LongLongMax)});
963 
964   // Functions for testing.
965   if (ChecksEnabled[CK_StdCLibraryFunctionsTesterChecker]) {
966     addToFunctionSummaryMap(
967         "__two_constrained_args",
968         Summary(ArgTypes{IntTy, IntTy}, RetType{IntTy}, EvalCallAsPure)
969             .ArgConstraint(ArgumentCondition(0U, WithinRange, SingleValue(1)))
970             .ArgConstraint(ArgumentCondition(1U, WithinRange, SingleValue(1))));
971     addToFunctionSummaryMap(
972         "__arg_constrained_twice",
973         Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure)
974             .ArgConstraint(ArgumentCondition(0U, OutOfRange, SingleValue(1)))
975             .ArgConstraint(ArgumentCondition(0U, OutOfRange, SingleValue(2))));
976     addToFunctionSummaryMap(
977         "__defaultparam",
978         Summary(ArgTypes{Irrelevant, IntTy}, RetType{IntTy}, EvalCallAsPure)
979             .ArgConstraint(NotNull(ArgNo(0))));
980     addToFunctionSummaryMap("__variadic",
981                             Summary(ArgTypes{VoidPtrTy, ConstCharPtrTy},
982                                     RetType{IntTy}, EvalCallAsPure)
983                                 .ArgConstraint(NotNull(ArgNo(0)))
984                                 .ArgConstraint(NotNull(ArgNo(1))));
985     addToFunctionSummaryMap(
986         "__buf_size_arg_constraint",
987         Summary(ArgTypes{ConstVoidPtrTy, SizeTy}, RetType{IntTy},
988                 EvalCallAsPure)
989             .ArgConstraint(
990                 BufferSize(/*Buffer=*/ArgNo(0), /*BufSize=*/ArgNo(1))));
991   }
992 }
993 
994 void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) {
995   auto *Checker = mgr.registerChecker<StdLibraryFunctionsChecker>();
996   Checker->DisplayLoadedSummaries =
997       mgr.getAnalyzerOptions().getCheckerBooleanOption(
998           Checker, "DisplayLoadedSummaries");
999 }
1000 
1001 bool ento::shouldRegisterStdCLibraryFunctionsChecker(const CheckerManager &mgr) {
1002   return true;
1003 }
1004 
1005 #define REGISTER_CHECKER(name)                                                 \
1006   void ento::register##name(CheckerManager &mgr) {                             \
1007     StdLibraryFunctionsChecker *checker =                                      \
1008         mgr.getChecker<StdLibraryFunctionsChecker>();                          \
1009     checker->ChecksEnabled[StdLibraryFunctionsChecker::CK_##name] = true;      \
1010     checker->CheckNames[StdLibraryFunctionsChecker::CK_##name] =               \
1011         mgr.getCurrentCheckerName();                                           \
1012   }                                                                            \
1013                                                                                \
1014   bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
1015 
1016 REGISTER_CHECKER(StdCLibraryFunctionArgsChecker)
1017 REGISTER_CHECKER(StdCLibraryFunctionsTesterChecker)
1018