//=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This checker improves modeling of a few simple library functions. // // This checker provides a specification format - `Summary' - and // contains descriptions of some library functions in this format. Each // specification contains a list of branches for splitting the program state // upon call, and range constraints on argument and return-value symbols that // are satisfied on each branch. This spec can be expanded to include more // items, like external effects of the function. // // The main difference between this approach and the body farms technique is // in more explicit control over how many branches are produced. For example, // consider standard C function `ispunct(int x)', which returns a non-zero value // iff `x' is a punctuation character, that is, when `x' is in range // ['!', '/'] [':', '@'] U ['[', '\`'] U ['{', '~']. // `Summary' provides only two branches for this function. However, // any attempt to describe this range with if-statements in the body farm // would result in many more branches. Because each branch needs to be analyzed // independently, this significantly reduces performance. Additionally, // once we consider a branch on which `x' is in range, say, ['!', '/'], // we assume that such branch is an important separate path through the program, // which may lead to false positives because considering this particular path // was not consciously intended, and therefore it might have been unreachable. // // This checker uses eval::Call for modeling pure functions (functions without // side effets), for which their `Summary' is a precise model. This avoids // unnecessary invalidation passes. Conflicts with other checkers are unlikely // because if the function has no other effects, other checkers would probably // never want to improve upon the modeling done by this checker. // // Non-pure functions, for which only partial improvement over the default // behavior is expected, are modeled via check::PostCall, non-intrusively. // // The following standard C functions are currently supported: // // fgetc getline isdigit isupper // fread isalnum isgraph isxdigit // fwrite isalpha islower read // getc isascii isprint write // getchar isblank ispunct // getdelim iscntrl isspace // //===----------------------------------------------------------------------===// #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" using namespace clang; using namespace clang::ento; namespace { class StdLibraryFunctionsChecker : public Checker { /// Below is a series of typedefs necessary to define function specs. /// We avoid nesting types here because each additional qualifier /// would need to be repeated in every function spec. struct Summary; /// Specify how much the analyzer engine should entrust modeling this function /// to us. If he doesn't, he performs additional invalidations. enum InvalidationKind { NoEvalCall, EvalCallAsPure }; // The universal integral type to use in value range descriptions. // Unsigned to make sure overflows are well-defined. typedef uint64_t RangeInt; /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is /// a non-negative integer, which less than 5 and not equal to 2. For /// `ComparesToArgument', holds information about how exactly to compare to /// the argument. typedef std::vector> IntRangeVector; /// A reference to an argument or return value by its number. /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but /// obviously uint32_t should be enough for all practical purposes. typedef uint32_t ArgNo; static const ArgNo Ret; class ValueConstraint; // Pointer to the ValueConstraint. We need a copyable, polymorphic and // default initialize able type (vector needs that). A raw pointer was good, // however, we cannot default initialize that. unique_ptr makes the Summary // class non-copyable, therefore not an option. Releasing the copyability // requirement would render the initialization of the Summary map infeasible. using ValueConstraintPtr = std::shared_ptr; /// Polymorphic base class that represents a constraint on a given argument /// (or return value) of a function. Derived classes implement different kind /// of constraints, e.g range constraints or correlation between two /// arguments. class ValueConstraint { public: ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {} virtual ~ValueConstraint() {} /// Apply the effects of the constraint on the given program state. If null /// is returned then the constraint is not feasible. virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, const Summary &Summary) const = 0; virtual ValueConstraintPtr negate() const { llvm_unreachable("Not implemented"); }; ArgNo getArgNo() const { return ArgN; } protected: ArgNo ArgN; // Argument to which we apply the constraint. }; /// Given a range, should the argument stay inside or outside this range? enum RangeKind { OutOfRange, WithinRange }; /// Encapsulates a single range on a single symbol within a branch. class RangeConstraint : public ValueConstraint { RangeKind Kind; // Kind of range definition. IntRangeVector Args; // Polymorphic arguments. public: RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args) : ValueConstraint(ArgN), Kind(Kind), Args(Args) {} const IntRangeVector &getRanges() const { return Args; } private: ProgramStateRef applyAsOutOfRange(ProgramStateRef State, const CallEvent &Call, const Summary &Summary) const; ProgramStateRef applyAsWithinRange(ProgramStateRef State, const CallEvent &Call, const Summary &Summary) const; public: ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, const Summary &Summary) const override { switch (Kind) { case OutOfRange: return applyAsOutOfRange(State, Call, Summary); case WithinRange: return applyAsWithinRange(State, Call, Summary); } llvm_unreachable("Unknown range kind!"); } ValueConstraintPtr negate() const override { RangeConstraint Tmp(*this); switch (Kind) { case OutOfRange: Tmp.Kind = WithinRange; break; case WithinRange: Tmp.Kind = OutOfRange; break; } return std::make_shared(Tmp); } }; class ComparisonConstraint : public ValueConstraint { BinaryOperator::Opcode Opcode; ArgNo OtherArgN; public: ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode, ArgNo OtherArgN) : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {} ArgNo getOtherArgNo() const { return OtherArgN; } BinaryOperator::Opcode getOpcode() const { return Opcode; } ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, const Summary &Summary) const override; }; class NotNullConstraint : public ValueConstraint { using ValueConstraint::ValueConstraint; // This variable has a role when we negate the constraint. bool CannotBeNull = true; public: ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, const Summary &Summary) const override { SVal V = getArgSVal(Call, getArgNo()); DefinedOrUnknownSVal L = V.castAs(); if (!L.getAs()) return State; return State->assume(L, CannotBeNull); } ValueConstraintPtr negate() const override { NotNullConstraint Tmp(*this); Tmp.CannotBeNull = !this->CannotBeNull; return std::make_shared(Tmp); } }; /// The complete list of constraints that defines a single branch. typedef std::vector ConstraintSet; using ArgTypes = std::vector; using Cases = std::vector; /// Includes information about /// * function prototype (which is necessary to /// ensure we're modeling the right function and casting values properly), /// * approach to invalidation, /// * a list of branches - a list of list of ranges - /// A branch represents a path in the exploded graph of a function (which /// is a tree). So, a branch is a series of assumptions. In other words, /// branches represent split states and additional assumptions on top of /// the splitting assumption. /// For example, consider the branches in `isalpha(x)` /// Branch 1) /// x is in range ['A', 'Z'] or in ['a', 'z'] /// then the return value is not 0. (I.e. out-of-range [0, 0]) /// Branch 2) /// x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z'] /// then the return value is 0. /// * a list of argument constraints, that must be true on every branch. /// If these constraints are not satisfied that means a fatal error /// usually resulting in undefined behaviour. struct Summary { const ArgTypes ArgTys; const QualType RetTy; const InvalidationKind InvalidationKd; Cases CaseConstraints; ConstraintSet ArgConstraints; Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd) : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {} Summary &Case(ConstraintSet&& CS) { CaseConstraints.push_back(std::move(CS)); return *this; } Summary &ArgConstraint(ValueConstraintPtr VC) { ArgConstraints.push_back(VC); return *this; } private: static void assertTypeSuitableForSummary(QualType T) { assert(!T->isVoidType() && "We should have had no significant void types in the spec"); assert(T.isCanonical() && "We should only have canonical types in the spec"); } public: QualType getArgType(ArgNo ArgN) const { QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN]; assertTypeSuitableForSummary(T); return T; } /// Try our best to figure out if the call expression is the call of /// *the* library function to which this specification applies. bool matchesCall(const CallExpr *CE) const; }; // The same function (as in, function identifier) may have different // summaries assigned to it, with different argument and return value types. // We call these "variants" of the function. This can be useful for handling // C++ function overloads, and also it can be used when the same function // may have different definitions on different platforms. typedef std::vector Summaries; // The map of all functions supported by the checker. It is initialized // lazily, and it doesn't change after initialization. mutable llvm::StringMap FunctionSummaryMap; mutable std::unique_ptr BT_InvalidArg; // Auxiliary functions to support ArgNo within all structures // in a unified manner. static QualType getArgType(const Summary &Summary, ArgNo ArgN) { return Summary.getArgType(ArgN); } static QualType getArgType(const CallEvent &Call, ArgNo ArgN) { return ArgN == Ret ? Call.getResultType().getCanonicalType() : Call.getArgExpr(ArgN)->getType().getCanonicalType(); } static QualType getArgType(const CallExpr *CE, ArgNo ArgN) { return ArgN == Ret ? CE->getType().getCanonicalType() : CE->getArg(ArgN)->getType().getCanonicalType(); } static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) { return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN); } public: void checkPreCall(const CallEvent &Call, CheckerContext &C) const; void checkPostCall(const CallEvent &Call, CheckerContext &C) const; bool evalCall(const CallEvent &Call, CheckerContext &C) const; enum CheckKind { CK_StdCLibraryFunctionArgsChecker, CK_NumCheckKinds }; DefaultBool ChecksEnabled[CK_NumCheckKinds]; CheckerNameRef CheckNames[CK_NumCheckKinds]; private: Optional findFunctionSummary(const FunctionDecl *FD, const CallExpr *CE, CheckerContext &C) const; Optional findFunctionSummary(const CallEvent &Call, CheckerContext &C) const; void initFunctionSummaries(CheckerContext &C) const; void reportBug(const CallEvent &Call, ExplodedNode *N, CheckerContext &C) const { if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker]) return; // TODO Add detailed diagnostic. StringRef Msg = "Function argument constraint is not satisfied"; if (!BT_InvalidArg) BT_InvalidArg = std::make_unique( CheckNames[CK_StdCLibraryFunctionArgsChecker], "Unsatisfied argument constraints", categories::LogicError); auto R = std::make_unique(*BT_InvalidArg, Msg, N); bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R); C.emitReport(std::move(R)); } }; const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret = std::numeric_limits::max(); } // end of anonymous namespace ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange( ProgramStateRef State, const CallEvent &Call, const Summary &Summary) const { ProgramStateManager &Mgr = State->getStateManager(); SValBuilder &SVB = Mgr.getSValBuilder(); BasicValueFactory &BVF = SVB.getBasicValueFactory(); ConstraintManager &CM = Mgr.getConstraintManager(); QualType T = getArgType(Summary, getArgNo()); SVal V = getArgSVal(Call, getArgNo()); if (auto N = V.getAs()) { const IntRangeVector &R = getRanges(); size_t E = R.size(); for (size_t I = 0; I != E; ++I) { const llvm::APSInt &Min = BVF.getValue(R[I].first, T); const llvm::APSInt &Max = BVF.getValue(R[I].second, T); assert(Min <= Max); State = CM.assumeInclusiveRange(State, *N, Min, Max, false); if (!State) break; } } return State; } ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange( ProgramStateRef State, const CallEvent &Call, const Summary &Summary) const { ProgramStateManager &Mgr = State->getStateManager(); SValBuilder &SVB = Mgr.getSValBuilder(); BasicValueFactory &BVF = SVB.getBasicValueFactory(); ConstraintManager &CM = Mgr.getConstraintManager(); QualType T = getArgType(Summary, getArgNo()); SVal V = getArgSVal(Call, getArgNo()); // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R". // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary, // and then cut away all holes in R one by one. // // E.g. consider a range list R as [A, B] and [C, D] // -------+--------+------------------+------------+-----------> // A B C D // Then we assume that the value is not in [-inf, A - 1], // then not in [D + 1, +inf], then not in [B + 1, C - 1] if (auto N = V.getAs()) { const IntRangeVector &R = getRanges(); size_t E = R.size(); const llvm::APSInt &MinusInf = BVF.getMinValue(T); const llvm::APSInt &PlusInf = BVF.getMaxValue(T); const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T); if (Left != PlusInf) { assert(MinusInf <= Left); State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false); if (!State) return nullptr; } const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T); if (Right != MinusInf) { assert(Right <= PlusInf); State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false); if (!State) return nullptr; } for (size_t I = 1; I != E; ++I) { const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T); const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T); if (Min <= Max) { State = CM.assumeInclusiveRange(State, *N, Min, Max, false); if (!State) return nullptr; } } } return State; } ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply( ProgramStateRef State, const CallEvent &Call, const Summary &Summary) const { ProgramStateManager &Mgr = State->getStateManager(); SValBuilder &SVB = Mgr.getSValBuilder(); QualType CondT = SVB.getConditionType(); QualType T = getArgType(Summary, getArgNo()); SVal V = getArgSVal(Call, getArgNo()); BinaryOperator::Opcode Op = getOpcode(); ArgNo OtherArg = getOtherArgNo(); SVal OtherV = getArgSVal(Call, OtherArg); QualType OtherT = getArgType(Call, OtherArg); // Note: we avoid integral promotion for comparison. OtherV = SVB.evalCast(OtherV, T, OtherT); if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT) .getAs()) State = State->assume(*CompV, true); return State; } void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { Optional FoundSummary = findFunctionSummary(Call, C); if (!FoundSummary) return; const Summary &Summary = *FoundSummary; ProgramStateRef State = C.getState(); for (const ValueConstraintPtr& VC : Summary.ArgConstraints) { ProgramStateRef SuccessSt = VC->apply(State, Call, Summary); ProgramStateRef FailureSt = VC->negate()->apply(State, Call, Summary); // The argument constraint is not satisfied. if (FailureSt && !SuccessSt) { if (ExplodedNode *N = C.generateErrorNode(State)) reportBug(Call, N, C); break; } else { // Apply the constraint even if we cannot reason about the argument. This // means both SuccessSt and FailureSt can be true. If we weren't applying // the constraint that would mean that symbolic execution continues on a // code whose behaviour is undefined. assert(SuccessSt); C.addTransition(SuccessSt); } } } void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const { Optional FoundSummary = findFunctionSummary(Call, C); if (!FoundSummary) return; // Now apply the constraints. const Summary &Summary = *FoundSummary; ProgramStateRef State = C.getState(); // Apply case/branch specifications. for (const auto &VRS : Summary.CaseConstraints) { ProgramStateRef NewState = State; for (const auto &VR: VRS) { NewState = VR->apply(NewState, Call, Summary); if (!NewState) break; } if (NewState && NewState != State) C.addTransition(NewState); } } bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call, CheckerContext &C) const { Optional FoundSummary = findFunctionSummary(Call, C); if (!FoundSummary) return false; const Summary &Summary = *FoundSummary; switch (Summary.InvalidationKd) { case EvalCallAsPure: { ProgramStateRef State = C.getState(); const LocationContext *LC = C.getLocationContext(); const auto *CE = cast_or_null(Call.getOriginExpr()); SVal V = C.getSValBuilder().conjureSymbolVal( CE, LC, CE->getType().getCanonicalType(), C.blockCount()); State = State->BindExpr(CE, LC, V); C.addTransition(State); return true; } case NoEvalCall: // Summary tells us to avoid performing eval::Call. The function is possibly // evaluated by another checker, or evaluated conservatively. return false; } llvm_unreachable("Unknown invalidation kind!"); } bool StdLibraryFunctionsChecker::Summary::matchesCall( const CallExpr *CE) const { // Check number of arguments: if (CE->getNumArgs() != ArgTys.size()) return false; // Check return type if relevant: if (!RetTy.isNull() && RetTy != CE->getType().getCanonicalType()) return false; // Check argument types when relevant: for (size_t I = 0, E = ArgTys.size(); I != E; ++I) { QualType FormalT = ArgTys[I]; // Null type marks irrelevant arguments. if (FormalT.isNull()) continue; assertTypeSuitableForSummary(FormalT); QualType ActualT = StdLibraryFunctionsChecker::getArgType(CE, I); assert(ActualT.isCanonical()); if (ActualT != FormalT) return false; } return true; } Optional StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD, const CallExpr *CE, CheckerContext &C) const { // Note: we cannot always obtain FD from CE // (eg. virtual call, or call by pointer). assert(CE); if (!FD) return None; initFunctionSummaries(C); IdentifierInfo *II = FD->getIdentifier(); if (!II) return None; StringRef Name = II->getName(); if (Name.empty() || !C.isCLibraryFunction(FD, Name)) return None; auto FSMI = FunctionSummaryMap.find(Name); if (FSMI == FunctionSummaryMap.end()) return None; // Verify that function signature matches the spec in advance. // Otherwise we might be modeling the wrong function. // Strict checking is important because we will be conducting // very integral-type-sensitive operations on arguments and // return values. const Summaries &SpecVariants = FSMI->second; for (const Summary &Spec : SpecVariants) if (Spec.matchesCall(CE)) return Spec; return None; } Optional StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call, CheckerContext &C) const { const FunctionDecl *FD = dyn_cast_or_null(Call.getDecl()); if (!FD) return None; const CallExpr *CE = dyn_cast_or_null(Call.getOriginExpr()); if (!CE) return None; return findFunctionSummary(FD, CE, C); } void StdLibraryFunctionsChecker::initFunctionSummaries( CheckerContext &C) const { if (!FunctionSummaryMap.empty()) return; SValBuilder &SVB = C.getSValBuilder(); BasicValueFactory &BVF = SVB.getBasicValueFactory(); const ASTContext &ACtx = BVF.getContext(); // These types are useful for writing specifications quickly, // New specifications should probably introduce more types. // Some types are hard to obtain from the AST, eg. "ssize_t". // In such cases it should be possible to provide multiple variants // of function summary for common cases (eg. ssize_t could be int or long // or long long, so three summary variants would be enough). // Of course, function variants are also useful for C++ overloads. const QualType Irrelevant{}; // A placeholder, whenever we do not care about the type. const QualType IntTy = ACtx.IntTy; const QualType LongTy = ACtx.LongTy; const QualType LongLongTy = ACtx.LongLongTy; const QualType SizeTy = ACtx.getSizeType(); const QualType VoidPtrTy = ACtx.VoidPtrTy; // void *T const QualType ConstVoidPtrTy = ACtx.getPointerType(ACtx.VoidTy.withConst()); // const void *T const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue(); const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue(); const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue(); // Set UCharRangeMax to min of int or uchar maximum value. // The C standard states that the arguments of functions like isalpha must // be representable as an unsigned char. Their type is 'int', so the max // value of the argument should be min(UCharMax, IntMax). This just happen // to be true for commonly used and well tested instruction set // architectures, but not for others. const RangeInt UCharRangeMax = std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax); // The platform dependent value of EOF. // Try our best to parse this from the Preprocessor, otherwise fallback to -1. const auto EOFv = [&C]() -> RangeInt { if (const llvm::Optional OptInt = tryExpandAsInteger("EOF", C.getPreprocessor())) return *OptInt; return -1; }(); // We are finally ready to define specifications for all supported functions. // // The signature needs to have the correct number of arguments. // However, we insert `Irrelevant' when the type is insignificant. // // Argument ranges should always cover all variants. If return value // is completely unknown, omit it from the respective range set. // // All types in the spec need to be canonical. // // Every item in the list of range sets represents a particular // execution path the analyzer would need to explore once // the call is modeled - a new program state is constructed // for every range set, and each range line in the range set // corresponds to a specific constraint within this state. // // Upon comparing to another argument, the other argument is casted // to the current argument's type. This avoids proper promotion but // seems useful. For example, read() receives size_t argument, // and its return value, which is of type ssize_t, cannot be greater // than this argument. If we made a promotion, and the size argument // is equal to, say, 10, then we'd impose a range of [0, 10] on the // return value, however the correct range is [-1, 10]. // // Please update the list of functions in the header after editing! // // Below are helpers functions to create the summaries. auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind, IntRangeVector Ranges) { return std::make_shared(ArgN, Kind, Ranges); }; struct { auto operator()(RangeKind Kind, IntRangeVector Ranges) { return std::make_shared(Ret, Kind, Ranges); } auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) { return std::make_shared(Ret, Op, OtherArgN); } } ReturnValueCondition; auto Range = [](RangeInt b, RangeInt e) { return IntRangeVector{std::pair{b, e}}; }; auto SingleValue = [](RangeInt v) { return IntRangeVector{std::pair{v, v}}; }; auto LessThanOrEq = BO_LE; auto NotNull = [&](ArgNo ArgN) { return std::make_shared(ArgN); }; using RetType = QualType; // Templates for summaries that are reused by many functions. auto Getc = [&]() { return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall) .Case({ReturnValueCondition(WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})}); }; auto Read = [&](RetType R, RangeInt Max) { return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R}, NoEvalCall) .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)), ReturnValueCondition(WithinRange, Range(-1, Max))}); }; auto Fread = [&]() { return Summary(ArgTypes{VoidPtrTy, Irrelevant, SizeTy, Irrelevant}, RetType{SizeTy}, NoEvalCall) .Case({ ReturnValueCondition(LessThanOrEq, ArgNo(2)), }) .ArgConstraint(NotNull(ArgNo(0))); }; auto Fwrite = [&]() { return Summary(ArgTypes{ConstVoidPtrTy, Irrelevant, SizeTy, Irrelevant}, RetType{SizeTy}, NoEvalCall) .Case({ ReturnValueCondition(LessThanOrEq, ArgNo(2)), }) .ArgConstraint(NotNull(ArgNo(0))); }; auto Getline = [&](RetType R, RangeInt Max) { return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R}, NoEvalCall) .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})}); }; FunctionSummaryMap = { // The isascii() family of functions. // The behavior is undefined if the value of the argument is not // representable as unsigned char or is not equal to EOF. See e.g. C99 // 7.4.1.2 The isalpha function (p: 181-182). { "isalnum", Summaries{ Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) // Boils down to isupper() or islower() or isdigit(). .Case( {ArgumentCondition(0U, WithinRange, {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}), ReturnValueCondition(OutOfRange, SingleValue(0))}) // The locale-specific range. // No post-condition. We are completely unaware of // locale-specific return values. .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) .Case({ArgumentCondition(0U, OutOfRange, {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}), ReturnValueCondition(WithinRange, SingleValue(0))}) .ArgConstraint(ArgumentCondition( 0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))}, }, { "isalpha", Summaries{ Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) .Case({ArgumentCondition(0U, WithinRange, {{'A', 'Z'}, {'a', 'z'}}), ReturnValueCondition(OutOfRange, SingleValue(0))}) // The locale-specific range. .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) .Case({ArgumentCondition( 0U, OutOfRange, {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}), ReturnValueCondition(WithinRange, SingleValue(0))})}, }, { "isascii", Summaries{ Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), ReturnValueCondition(OutOfRange, SingleValue(0))}) .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)), ReturnValueCondition(WithinRange, SingleValue(0))})}, }, { "isblank", Summaries{ Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) .Case({ArgumentCondition(0U, WithinRange, {{'\t', '\t'}, {' ', ' '}}), ReturnValueCondition(OutOfRange, SingleValue(0))}) .Case({ArgumentCondition(0U, OutOfRange, {{'\t', '\t'}, {' ', ' '}}), ReturnValueCondition(WithinRange, SingleValue(0))})}, }, { "iscntrl", Summaries{ Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) .Case({ArgumentCondition(0U, WithinRange, {{0, 32}, {127, 127}}), ReturnValueCondition(OutOfRange, SingleValue(0))}) .Case( {ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}), ReturnValueCondition(WithinRange, SingleValue(0))})}, }, { "isdigit", Summaries{ Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')), ReturnValueCondition(OutOfRange, SingleValue(0))}) .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')), ReturnValueCondition(WithinRange, SingleValue(0))})}, }, { "isgraph", Summaries{ Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)), ReturnValueCondition(OutOfRange, SingleValue(0))}) .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)), ReturnValueCondition(WithinRange, SingleValue(0))})}, }, { "islower", Summaries{ Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) // Is certainly lowercase. .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')), ReturnValueCondition(OutOfRange, SingleValue(0))}) // Is ascii but not lowercase. .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), ArgumentCondition(0U, OutOfRange, Range('a', 'z')), ReturnValueCondition(WithinRange, SingleValue(0))}) // The locale-specific range. .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) // Is not an unsigned char. .Case({ArgumentCondition(0U, OutOfRange, Range(0, UCharRangeMax)), ReturnValueCondition(WithinRange, SingleValue(0))})}, }, { "isprint", Summaries{ Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)), ReturnValueCondition(OutOfRange, SingleValue(0))}) .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)), ReturnValueCondition(WithinRange, SingleValue(0))})}, }, { "ispunct", Summaries{ Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) .Case({ArgumentCondition( 0U, WithinRange, {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), ReturnValueCondition(OutOfRange, SingleValue(0))}) .Case({ArgumentCondition( 0U, OutOfRange, {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), ReturnValueCondition(WithinRange, SingleValue(0))})}, }, { "isspace", Summaries{ Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) // Space, '\f', '\n', '\r', '\t', '\v'. .Case({ArgumentCondition(0U, WithinRange, {{9, 13}, {' ', ' '}}), ReturnValueCondition(OutOfRange, SingleValue(0))}) // The locale-specific range. .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) .Case({ArgumentCondition( 0U, OutOfRange, {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}), ReturnValueCondition(WithinRange, SingleValue(0))})}, }, { "isupper", Summaries{ Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) // Is certainly uppercase. .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')), ReturnValueCondition(OutOfRange, SingleValue(0))}) // The locale-specific range. .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) // Other. .Case({ArgumentCondition(0U, OutOfRange, {{'A', 'Z'}, {128, UCharRangeMax}}), ReturnValueCondition(WithinRange, SingleValue(0))})}, }, { "isxdigit", Summaries{ Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) .Case( {ArgumentCondition(0U, WithinRange, {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), ReturnValueCondition(OutOfRange, SingleValue(0))}) .Case( {ArgumentCondition(0U, OutOfRange, {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), ReturnValueCondition(WithinRange, SingleValue(0))})}, }, // The getc() family of functions that returns either a char or an EOF. {"getc", Summaries{Getc()}}, {"fgetc", Summaries{Getc()}}, {"getchar", Summaries{Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall) .Case({ReturnValueCondition( WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})}}, // read()-like functions that never return more than buffer size. // We are not sure how ssize_t is defined on every platform, so we // provide three variants that should cover common cases. {"read", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax), Read(LongLongTy, LongLongMax)}}, {"write", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax), Read(LongLongTy, LongLongMax)}}, {"fread", Summaries{Fread()}}, {"fwrite", Summaries{Fwrite()}}, // getline()-like functions either fail or read at least the delimiter. {"getline", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax), Getline(LongLongTy, LongLongMax)}}, {"getdelim", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax), Getline(LongLongTy, LongLongMax)}}, }; } void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) { mgr.registerChecker(); } bool ento::shouldRegisterStdCLibraryFunctionsChecker(const LangOptions &LO) { return true; } #define REGISTER_CHECKER(name) \ void ento::register##name(CheckerManager &mgr) { \ StdLibraryFunctionsChecker *checker = \ mgr.getChecker(); \ checker->ChecksEnabled[StdLibraryFunctionsChecker::CK_##name] = true; \ checker->CheckNames[StdLibraryFunctionsChecker::CK_##name] = \ mgr.getCurrentCheckerName(); \ } \ \ bool ento::shouldRegister##name(const LangOptions &LO) { return true; } REGISTER_CHECKER(StdCLibraryFunctionArgsChecker)