1 //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker improves modeling of a few simple library functions. 10 // 11 // This checker provides a specification format - `Summary' - and 12 // contains descriptions of some library functions in this format. Each 13 // specification contains a list of branches for splitting the program state 14 // upon call, and range constraints on argument and return-value symbols that 15 // are satisfied on each branch. This spec can be expanded to include more 16 // items, like external effects of the function. 17 // 18 // The main difference between this approach and the body farms technique is 19 // in more explicit control over how many branches are produced. For example, 20 // consider standard C function `ispunct(int x)', which returns a non-zero value 21 // iff `x' is a punctuation character, that is, when `x' is in range 22 // ['!', '/'] [':', '@'] U ['[', '\`'] U ['{', '~']. 23 // `Summary' provides only two branches for this function. However, 24 // any attempt to describe this range with if-statements in the body farm 25 // would result in many more branches. Because each branch needs to be analyzed 26 // independently, this significantly reduces performance. Additionally, 27 // once we consider a branch on which `x' is in range, say, ['!', '/'], 28 // we assume that such branch is an important separate path through the program, 29 // which may lead to false positives because considering this particular path 30 // was not consciously intended, and therefore it might have been unreachable. 31 // 32 // This checker uses eval::Call for modeling pure functions (functions without 33 // side effets), for which their `Summary' is a precise model. This avoids 34 // unnecessary invalidation passes. Conflicts with other checkers are unlikely 35 // because if the function has no other effects, other checkers would probably 36 // never want to improve upon the modeling done by this checker. 37 // 38 // Non-pure functions, for which only partial improvement over the default 39 // behavior is expected, are modeled via check::PostCall, non-intrusively. 40 // 41 // The following standard C functions are currently supported: 42 // 43 // fgetc getline isdigit isupper 44 // fread isalnum isgraph isxdigit 45 // fwrite isalpha islower read 46 // getc isascii isprint write 47 // getchar isblank ispunct 48 // getdelim iscntrl isspace 49 // 50 //===----------------------------------------------------------------------===// 51 52 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 53 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 54 #include "clang/StaticAnalyzer/Core/Checker.h" 55 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 56 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 57 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 58 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" 59 60 using namespace clang; 61 using namespace clang::ento; 62 63 namespace { 64 class StdLibraryFunctionsChecker 65 : public Checker<check::PreCall, check::PostCall, eval::Call> { 66 /// Below is a series of typedefs necessary to define function specs. 67 /// We avoid nesting types here because each additional qualifier 68 /// would need to be repeated in every function spec. 69 struct Summary; 70 71 /// Specify how much the analyzer engine should entrust modeling this function 72 /// to us. If he doesn't, he performs additional invalidations. 73 enum InvalidationKind { NoEvalCall, EvalCallAsPure }; 74 75 // The universal integral type to use in value range descriptions. 76 // Unsigned to make sure overflows are well-defined. 77 typedef uint64_t RangeInt; 78 79 /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is 80 /// a non-negative integer, which less than 5 and not equal to 2. For 81 /// `ComparesToArgument', holds information about how exactly to compare to 82 /// the argument. 83 typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector; 84 85 /// A reference to an argument or return value by its number. 86 /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but 87 /// obviously uint32_t should be enough for all practical purposes. 88 typedef uint32_t ArgNo; 89 static const ArgNo Ret; 90 91 class ValueConstraint; 92 93 // Pointer to the ValueConstraint. We need a copyable, polymorphic and 94 // default initialize able type (vector needs that). A raw pointer was good, 95 // however, we cannot default initialize that. unique_ptr makes the Summary 96 // class non-copyable, therefore not an option. Releasing the copyability 97 // requirement would render the initialization of the Summary map infeasible. 98 using ValueConstraintPtr = std::shared_ptr<ValueConstraint>; 99 100 /// Polymorphic base class that represents a constraint on a given argument 101 /// (or return value) of a function. Derived classes implement different kind 102 /// of constraints, e.g range constraints or correlation between two 103 /// arguments. 104 class ValueConstraint { 105 public: 106 ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {} 107 virtual ~ValueConstraint() {} 108 /// Apply the effects of the constraint on the given program state. If null 109 /// is returned then the constraint is not feasible. 110 virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 111 const Summary &Summary) const = 0; 112 virtual ValueConstraintPtr negate() const { 113 llvm_unreachable("Not implemented"); 114 }; 115 ArgNo getArgNo() const { return ArgN; } 116 117 protected: 118 ArgNo ArgN; // Argument to which we apply the constraint. 119 }; 120 121 /// Given a range, should the argument stay inside or outside this range? 122 enum RangeKind { OutOfRange, WithinRange }; 123 124 /// Encapsulates a single range on a single symbol within a branch. 125 class RangeConstraint : public ValueConstraint { 126 RangeKind Kind; // Kind of range definition. 127 IntRangeVector Args; // Polymorphic arguments. 128 129 public: 130 RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args) 131 : ValueConstraint(ArgN), Kind(Kind), Args(Args) {} 132 133 const IntRangeVector &getRanges() const { 134 return Args; 135 } 136 137 private: 138 ProgramStateRef applyAsOutOfRange(ProgramStateRef State, 139 const CallEvent &Call, 140 const Summary &Summary) const; 141 ProgramStateRef applyAsWithinRange(ProgramStateRef State, 142 const CallEvent &Call, 143 const Summary &Summary) const; 144 public: 145 ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 146 const Summary &Summary) const override { 147 switch (Kind) { 148 case OutOfRange: 149 return applyAsOutOfRange(State, Call, Summary); 150 case WithinRange: 151 return applyAsWithinRange(State, Call, Summary); 152 } 153 llvm_unreachable("Unknown range kind!"); 154 } 155 156 ValueConstraintPtr negate() const override { 157 RangeConstraint Tmp(*this); 158 switch (Kind) { 159 case OutOfRange: 160 Tmp.Kind = WithinRange; 161 break; 162 case WithinRange: 163 Tmp.Kind = OutOfRange; 164 break; 165 } 166 return std::make_shared<RangeConstraint>(Tmp); 167 } 168 }; 169 170 class ComparisonConstraint : public ValueConstraint { 171 BinaryOperator::Opcode Opcode; 172 ArgNo OtherArgN; 173 174 public: 175 ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode, 176 ArgNo OtherArgN) 177 : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {} 178 ArgNo getOtherArgNo() const { return OtherArgN; } 179 BinaryOperator::Opcode getOpcode() const { return Opcode; } 180 ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 181 const Summary &Summary) const override; 182 }; 183 184 class NotNullConstraint : public ValueConstraint { 185 using ValueConstraint::ValueConstraint; 186 // This variable has a role when we negate the constraint. 187 bool CannotBeNull = true; 188 189 public: 190 ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 191 const Summary &Summary) const override { 192 SVal V = getArgSVal(Call, getArgNo()); 193 DefinedOrUnknownSVal L = V.castAs<DefinedOrUnknownSVal>(); 194 if (!L.getAs<Loc>()) 195 return State; 196 197 return State->assume(L, CannotBeNull); 198 } 199 200 ValueConstraintPtr negate() const override { 201 NotNullConstraint Tmp(*this); 202 Tmp.CannotBeNull = !this->CannotBeNull; 203 return std::make_shared<NotNullConstraint>(Tmp); 204 } 205 }; 206 207 /// The complete list of constraints that defines a single branch. 208 typedef std::vector<ValueConstraintPtr> ConstraintSet; 209 210 using ArgTypes = std::vector<QualType>; 211 using Cases = std::vector<ConstraintSet>; 212 213 /// Includes information about 214 /// * function prototype (which is necessary to 215 /// ensure we're modeling the right function and casting values properly), 216 /// * approach to invalidation, 217 /// * a list of branches - a list of list of ranges - 218 /// A branch represents a path in the exploded graph of a function (which 219 /// is a tree). So, a branch is a series of assumptions. In other words, 220 /// branches represent split states and additional assumptions on top of 221 /// the splitting assumption. 222 /// For example, consider the branches in `isalpha(x)` 223 /// Branch 1) 224 /// x is in range ['A', 'Z'] or in ['a', 'z'] 225 /// then the return value is not 0. (I.e. out-of-range [0, 0]) 226 /// Branch 2) 227 /// x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z'] 228 /// then the return value is 0. 229 /// * a list of argument constraints, that must be true on every branch. 230 /// If these constraints are not satisfied that means a fatal error 231 /// usually resulting in undefined behaviour. 232 struct Summary { 233 const ArgTypes ArgTys; 234 const QualType RetTy; 235 const InvalidationKind InvalidationKd; 236 Cases CaseConstraints; 237 ConstraintSet ArgConstraints; 238 239 Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd) 240 : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {} 241 242 Summary &Case(ConstraintSet&& CS) { 243 CaseConstraints.push_back(std::move(CS)); 244 return *this; 245 } 246 Summary &ArgConstraint(ValueConstraintPtr VC) { 247 ArgConstraints.push_back(VC); 248 return *this; 249 } 250 251 private: 252 static void assertTypeSuitableForSummary(QualType T) { 253 assert(!T->isVoidType() && 254 "We should have had no significant void types in the spec"); 255 assert(T.isCanonical() && 256 "We should only have canonical types in the spec"); 257 } 258 259 public: 260 QualType getArgType(ArgNo ArgN) const { 261 QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN]; 262 assertTypeSuitableForSummary(T); 263 return T; 264 } 265 266 /// Try our best to figure out if the call expression is the call of 267 /// *the* library function to which this specification applies. 268 bool matchesCall(const CallExpr *CE) const; 269 }; 270 271 // The same function (as in, function identifier) may have different 272 // summaries assigned to it, with different argument and return value types. 273 // We call these "variants" of the function. This can be useful for handling 274 // C++ function overloads, and also it can be used when the same function 275 // may have different definitions on different platforms. 276 typedef std::vector<Summary> Summaries; 277 278 // The map of all functions supported by the checker. It is initialized 279 // lazily, and it doesn't change after initialization. 280 mutable llvm::StringMap<Summaries> FunctionSummaryMap; 281 282 mutable std::unique_ptr<BugType> BT_InvalidArg; 283 284 // Auxiliary functions to support ArgNo within all structures 285 // in a unified manner. 286 static QualType getArgType(const Summary &Summary, ArgNo ArgN) { 287 return Summary.getArgType(ArgN); 288 } 289 static QualType getArgType(const CallEvent &Call, ArgNo ArgN) { 290 return ArgN == Ret ? Call.getResultType().getCanonicalType() 291 : Call.getArgExpr(ArgN)->getType().getCanonicalType(); 292 } 293 static QualType getArgType(const CallExpr *CE, ArgNo ArgN) { 294 return ArgN == Ret ? CE->getType().getCanonicalType() 295 : CE->getArg(ArgN)->getType().getCanonicalType(); 296 } 297 static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) { 298 return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN); 299 } 300 301 public: 302 void checkPreCall(const CallEvent &Call, CheckerContext &C) const; 303 void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 304 bool evalCall(const CallEvent &Call, CheckerContext &C) const; 305 306 enum CheckKind { CK_StdCLibraryFunctionArgsChecker, CK_NumCheckKinds }; 307 DefaultBool ChecksEnabled[CK_NumCheckKinds]; 308 CheckerNameRef CheckNames[CK_NumCheckKinds]; 309 310 private: 311 Optional<Summary> findFunctionSummary(const FunctionDecl *FD, 312 const CallExpr *CE, 313 CheckerContext &C) const; 314 Optional<Summary> findFunctionSummary(const CallEvent &Call, 315 CheckerContext &C) const; 316 317 void initFunctionSummaries(CheckerContext &C) const; 318 319 void reportBug(const CallEvent &Call, ExplodedNode *N, 320 CheckerContext &C) const { 321 if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker]) 322 return; 323 // TODO Add detailed diagnostic. 324 StringRef Msg = "Function argument constraint is not satisfied"; 325 if (!BT_InvalidArg) 326 BT_InvalidArg = std::make_unique<BugType>( 327 CheckNames[CK_StdCLibraryFunctionArgsChecker], 328 "Unsatisfied argument constraints", categories::LogicError); 329 auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N); 330 bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R); 331 C.emitReport(std::move(R)); 332 } 333 }; 334 335 const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret = 336 std::numeric_limits<ArgNo>::max(); 337 338 } // end of anonymous namespace 339 340 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange( 341 ProgramStateRef State, const CallEvent &Call, 342 const Summary &Summary) const { 343 344 ProgramStateManager &Mgr = State->getStateManager(); 345 SValBuilder &SVB = Mgr.getSValBuilder(); 346 BasicValueFactory &BVF = SVB.getBasicValueFactory(); 347 ConstraintManager &CM = Mgr.getConstraintManager(); 348 QualType T = getArgType(Summary, getArgNo()); 349 SVal V = getArgSVal(Call, getArgNo()); 350 351 if (auto N = V.getAs<NonLoc>()) { 352 const IntRangeVector &R = getRanges(); 353 size_t E = R.size(); 354 for (size_t I = 0; I != E; ++I) { 355 const llvm::APSInt &Min = BVF.getValue(R[I].first, T); 356 const llvm::APSInt &Max = BVF.getValue(R[I].second, T); 357 assert(Min <= Max); 358 State = CM.assumeInclusiveRange(State, *N, Min, Max, false); 359 if (!State) 360 break; 361 } 362 } 363 364 return State; 365 } 366 367 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange( 368 ProgramStateRef State, const CallEvent &Call, 369 const Summary &Summary) const { 370 371 ProgramStateManager &Mgr = State->getStateManager(); 372 SValBuilder &SVB = Mgr.getSValBuilder(); 373 BasicValueFactory &BVF = SVB.getBasicValueFactory(); 374 ConstraintManager &CM = Mgr.getConstraintManager(); 375 QualType T = getArgType(Summary, getArgNo()); 376 SVal V = getArgSVal(Call, getArgNo()); 377 378 // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R". 379 // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary, 380 // and then cut away all holes in R one by one. 381 // 382 // E.g. consider a range list R as [A, B] and [C, D] 383 // -------+--------+------------------+------------+-----------> 384 // A B C D 385 // Then we assume that the value is not in [-inf, A - 1], 386 // then not in [D + 1, +inf], then not in [B + 1, C - 1] 387 if (auto N = V.getAs<NonLoc>()) { 388 const IntRangeVector &R = getRanges(); 389 size_t E = R.size(); 390 391 const llvm::APSInt &MinusInf = BVF.getMinValue(T); 392 const llvm::APSInt &PlusInf = BVF.getMaxValue(T); 393 394 const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T); 395 if (Left != PlusInf) { 396 assert(MinusInf <= Left); 397 State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false); 398 if (!State) 399 return nullptr; 400 } 401 402 const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T); 403 if (Right != MinusInf) { 404 assert(Right <= PlusInf); 405 State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false); 406 if (!State) 407 return nullptr; 408 } 409 410 for (size_t I = 1; I != E; ++I) { 411 const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T); 412 const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T); 413 if (Min <= Max) { 414 State = CM.assumeInclusiveRange(State, *N, Min, Max, false); 415 if (!State) 416 return nullptr; 417 } 418 } 419 } 420 421 return State; 422 } 423 424 ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply( 425 ProgramStateRef State, const CallEvent &Call, 426 const Summary &Summary) const { 427 428 ProgramStateManager &Mgr = State->getStateManager(); 429 SValBuilder &SVB = Mgr.getSValBuilder(); 430 QualType CondT = SVB.getConditionType(); 431 QualType T = getArgType(Summary, getArgNo()); 432 SVal V = getArgSVal(Call, getArgNo()); 433 434 BinaryOperator::Opcode Op = getOpcode(); 435 ArgNo OtherArg = getOtherArgNo(); 436 SVal OtherV = getArgSVal(Call, OtherArg); 437 QualType OtherT = getArgType(Call, OtherArg); 438 // Note: we avoid integral promotion for comparison. 439 OtherV = SVB.evalCast(OtherV, T, OtherT); 440 if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT) 441 .getAs<DefinedOrUnknownSVal>()) 442 State = State->assume(*CompV, true); 443 return State; 444 } 445 446 void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call, 447 CheckerContext &C) const { 448 Optional<Summary> FoundSummary = findFunctionSummary(Call, C); 449 if (!FoundSummary) 450 return; 451 452 const Summary &Summary = *FoundSummary; 453 ProgramStateRef State = C.getState(); 454 455 for (const ValueConstraintPtr& VC : Summary.ArgConstraints) { 456 ProgramStateRef SuccessSt = VC->apply(State, Call, Summary); 457 ProgramStateRef FailureSt = VC->negate()->apply(State, Call, Summary); 458 // The argument constraint is not satisfied. 459 if (FailureSt && !SuccessSt) { 460 if (ExplodedNode *N = C.generateErrorNode(State)) 461 reportBug(Call, N, C); 462 break; 463 } else { 464 // Apply the constraint even if we cannot reason about the argument. This 465 // means both SuccessSt and FailureSt can be true. If we weren't applying 466 // the constraint that would mean that symbolic execution continues on a 467 // code whose behaviour is undefined. 468 assert(SuccessSt); 469 C.addTransition(SuccessSt); 470 } 471 } 472 } 473 474 void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call, 475 CheckerContext &C) const { 476 Optional<Summary> FoundSummary = findFunctionSummary(Call, C); 477 if (!FoundSummary) 478 return; 479 480 // Now apply the constraints. 481 const Summary &Summary = *FoundSummary; 482 ProgramStateRef State = C.getState(); 483 484 // Apply case/branch specifications. 485 for (const auto &VRS : Summary.CaseConstraints) { 486 ProgramStateRef NewState = State; 487 for (const auto &VR: VRS) { 488 NewState = VR->apply(NewState, Call, Summary); 489 if (!NewState) 490 break; 491 } 492 493 if (NewState && NewState != State) 494 C.addTransition(NewState); 495 } 496 } 497 498 bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call, 499 CheckerContext &C) const { 500 Optional<Summary> FoundSummary = findFunctionSummary(Call, C); 501 if (!FoundSummary) 502 return false; 503 504 const Summary &Summary = *FoundSummary; 505 switch (Summary.InvalidationKd) { 506 case EvalCallAsPure: { 507 ProgramStateRef State = C.getState(); 508 const LocationContext *LC = C.getLocationContext(); 509 const auto *CE = cast_or_null<CallExpr>(Call.getOriginExpr()); 510 SVal V = C.getSValBuilder().conjureSymbolVal( 511 CE, LC, CE->getType().getCanonicalType(), C.blockCount()); 512 State = State->BindExpr(CE, LC, V); 513 C.addTransition(State); 514 return true; 515 } 516 case NoEvalCall: 517 // Summary tells us to avoid performing eval::Call. The function is possibly 518 // evaluated by another checker, or evaluated conservatively. 519 return false; 520 } 521 llvm_unreachable("Unknown invalidation kind!"); 522 } 523 524 bool StdLibraryFunctionsChecker::Summary::matchesCall( 525 const CallExpr *CE) const { 526 // Check number of arguments: 527 if (CE->getNumArgs() != ArgTys.size()) 528 return false; 529 530 // Check return type if relevant: 531 if (!RetTy.isNull() && RetTy != CE->getType().getCanonicalType()) 532 return false; 533 534 // Check argument types when relevant: 535 for (size_t I = 0, E = ArgTys.size(); I != E; ++I) { 536 QualType FormalT = ArgTys[I]; 537 // Null type marks irrelevant arguments. 538 if (FormalT.isNull()) 539 continue; 540 541 assertTypeSuitableForSummary(FormalT); 542 543 QualType ActualT = StdLibraryFunctionsChecker::getArgType(CE, I); 544 assert(ActualT.isCanonical()); 545 if (ActualT != FormalT) 546 return false; 547 } 548 549 return true; 550 } 551 552 Optional<StdLibraryFunctionsChecker::Summary> 553 StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD, 554 const CallExpr *CE, 555 CheckerContext &C) const { 556 // Note: we cannot always obtain FD from CE 557 // (eg. virtual call, or call by pointer). 558 assert(CE); 559 560 if (!FD) 561 return None; 562 563 initFunctionSummaries(C); 564 565 IdentifierInfo *II = FD->getIdentifier(); 566 if (!II) 567 return None; 568 StringRef Name = II->getName(); 569 if (Name.empty() || !C.isCLibraryFunction(FD, Name)) 570 return None; 571 572 auto FSMI = FunctionSummaryMap.find(Name); 573 if (FSMI == FunctionSummaryMap.end()) 574 return None; 575 576 // Verify that function signature matches the spec in advance. 577 // Otherwise we might be modeling the wrong function. 578 // Strict checking is important because we will be conducting 579 // very integral-type-sensitive operations on arguments and 580 // return values. 581 const Summaries &SpecVariants = FSMI->second; 582 for (const Summary &Spec : SpecVariants) 583 if (Spec.matchesCall(CE)) 584 return Spec; 585 586 return None; 587 } 588 589 Optional<StdLibraryFunctionsChecker::Summary> 590 StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call, 591 CheckerContext &C) const { 592 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); 593 if (!FD) 594 return None; 595 const CallExpr *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr()); 596 if (!CE) 597 return None; 598 return findFunctionSummary(FD, CE, C); 599 } 600 601 void StdLibraryFunctionsChecker::initFunctionSummaries( 602 CheckerContext &C) const { 603 if (!FunctionSummaryMap.empty()) 604 return; 605 606 SValBuilder &SVB = C.getSValBuilder(); 607 BasicValueFactory &BVF = SVB.getBasicValueFactory(); 608 const ASTContext &ACtx = BVF.getContext(); 609 610 // These types are useful for writing specifications quickly, 611 // New specifications should probably introduce more types. 612 // Some types are hard to obtain from the AST, eg. "ssize_t". 613 // In such cases it should be possible to provide multiple variants 614 // of function summary for common cases (eg. ssize_t could be int or long 615 // or long long, so three summary variants would be enough). 616 // Of course, function variants are also useful for C++ overloads. 617 const QualType 618 Irrelevant{}; // A placeholder, whenever we do not care about the type. 619 const QualType IntTy = ACtx.IntTy; 620 const QualType LongTy = ACtx.LongTy; 621 const QualType LongLongTy = ACtx.LongLongTy; 622 const QualType SizeTy = ACtx.getSizeType(); 623 const QualType VoidPtrTy = ACtx.VoidPtrTy; // void *T 624 const QualType ConstVoidPtrTy = 625 ACtx.getPointerType(ACtx.VoidTy.withConst()); // const void *T 626 627 const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue(); 628 const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue(); 629 const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue(); 630 631 // Set UCharRangeMax to min of int or uchar maximum value. 632 // The C standard states that the arguments of functions like isalpha must 633 // be representable as an unsigned char. Their type is 'int', so the max 634 // value of the argument should be min(UCharMax, IntMax). This just happen 635 // to be true for commonly used and well tested instruction set 636 // architectures, but not for others. 637 const RangeInt UCharRangeMax = 638 std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax); 639 640 // The platform dependent value of EOF. 641 // Try our best to parse this from the Preprocessor, otherwise fallback to -1. 642 const auto EOFv = [&C]() -> RangeInt { 643 if (const llvm::Optional<int> OptInt = 644 tryExpandAsInteger("EOF", C.getPreprocessor())) 645 return *OptInt; 646 return -1; 647 }(); 648 649 // We are finally ready to define specifications for all supported functions. 650 // 651 // The signature needs to have the correct number of arguments. 652 // However, we insert `Irrelevant' when the type is insignificant. 653 // 654 // Argument ranges should always cover all variants. If return value 655 // is completely unknown, omit it from the respective range set. 656 // 657 // All types in the spec need to be canonical. 658 // 659 // Every item in the list of range sets represents a particular 660 // execution path the analyzer would need to explore once 661 // the call is modeled - a new program state is constructed 662 // for every range set, and each range line in the range set 663 // corresponds to a specific constraint within this state. 664 // 665 // Upon comparing to another argument, the other argument is casted 666 // to the current argument's type. This avoids proper promotion but 667 // seems useful. For example, read() receives size_t argument, 668 // and its return value, which is of type ssize_t, cannot be greater 669 // than this argument. If we made a promotion, and the size argument 670 // is equal to, say, 10, then we'd impose a range of [0, 10] on the 671 // return value, however the correct range is [-1, 10]. 672 // 673 // Please update the list of functions in the header after editing! 674 // 675 676 // Below are helpers functions to create the summaries. 677 auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind, 678 IntRangeVector Ranges) { 679 return std::make_shared<RangeConstraint>(ArgN, Kind, Ranges); 680 }; 681 struct { 682 auto operator()(RangeKind Kind, IntRangeVector Ranges) { 683 return std::make_shared<RangeConstraint>(Ret, Kind, Ranges); 684 } 685 auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) { 686 return std::make_shared<ComparisonConstraint>(Ret, Op, OtherArgN); 687 } 688 } ReturnValueCondition; 689 auto Range = [](RangeInt b, RangeInt e) { 690 return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}}; 691 }; 692 auto SingleValue = [](RangeInt v) { 693 return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}}; 694 }; 695 auto LessThanOrEq = BO_LE; 696 auto NotNull = [&](ArgNo ArgN) { 697 return std::make_shared<NotNullConstraint>(ArgN); 698 }; 699 700 using RetType = QualType; 701 // Templates for summaries that are reused by many functions. 702 auto Getc = [&]() { 703 return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall) 704 .Case({ReturnValueCondition(WithinRange, 705 {{EOFv, EOFv}, {0, UCharRangeMax}})}); 706 }; 707 auto Read = [&](RetType R, RangeInt Max) { 708 return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R}, 709 NoEvalCall) 710 .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)), 711 ReturnValueCondition(WithinRange, Range(-1, Max))}); 712 }; 713 auto Fread = [&]() { 714 return Summary(ArgTypes{VoidPtrTy, Irrelevant, SizeTy, Irrelevant}, 715 RetType{SizeTy}, NoEvalCall) 716 .Case({ 717 ReturnValueCondition(LessThanOrEq, ArgNo(2)), 718 }) 719 .ArgConstraint(NotNull(ArgNo(0))); 720 }; 721 auto Fwrite = [&]() { 722 return Summary(ArgTypes{ConstVoidPtrTy, Irrelevant, SizeTy, Irrelevant}, 723 RetType{SizeTy}, NoEvalCall) 724 .Case({ 725 ReturnValueCondition(LessThanOrEq, ArgNo(2)), 726 }) 727 .ArgConstraint(NotNull(ArgNo(0))); 728 }; 729 auto Getline = [&](RetType R, RangeInt Max) { 730 return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R}, 731 NoEvalCall) 732 .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})}); 733 }; 734 735 FunctionSummaryMap = { 736 // The isascii() family of functions. 737 // The behavior is undefined if the value of the argument is not 738 // representable as unsigned char or is not equal to EOF. See e.g. C99 739 // 7.4.1.2 The isalpha function (p: 181-182). 740 { 741 "isalnum", 742 Summaries{ 743 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 744 // Boils down to isupper() or islower() or isdigit(). 745 .Case( 746 {ArgumentCondition(0U, WithinRange, 747 {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}), 748 ReturnValueCondition(OutOfRange, SingleValue(0))}) 749 // The locale-specific range. 750 // No post-condition. We are completely unaware of 751 // locale-specific return values. 752 .Case({ArgumentCondition(0U, WithinRange, 753 {{128, UCharRangeMax}})}) 754 .Case({ArgumentCondition(0U, OutOfRange, 755 {{'0', '9'}, 756 {'A', 'Z'}, 757 {'a', 'z'}, 758 {128, UCharRangeMax}}), 759 ReturnValueCondition(WithinRange, SingleValue(0))}) 760 .ArgConstraint(ArgumentCondition( 761 0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))}, 762 }, 763 { 764 "isalpha", 765 Summaries{ 766 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 767 .Case({ArgumentCondition(0U, WithinRange, 768 {{'A', 'Z'}, {'a', 'z'}}), 769 ReturnValueCondition(OutOfRange, SingleValue(0))}) 770 // The locale-specific range. 771 .Case({ArgumentCondition(0U, WithinRange, 772 {{128, UCharRangeMax}})}) 773 .Case({ArgumentCondition( 774 0U, OutOfRange, 775 {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}), 776 ReturnValueCondition(WithinRange, SingleValue(0))})}, 777 }, 778 { 779 "isascii", 780 Summaries{ 781 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 782 .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), 783 ReturnValueCondition(OutOfRange, SingleValue(0))}) 784 .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)), 785 ReturnValueCondition(WithinRange, SingleValue(0))})}, 786 }, 787 { 788 "isblank", 789 Summaries{ 790 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 791 .Case({ArgumentCondition(0U, WithinRange, 792 {{'\t', '\t'}, {' ', ' '}}), 793 ReturnValueCondition(OutOfRange, SingleValue(0))}) 794 .Case({ArgumentCondition(0U, OutOfRange, 795 {{'\t', '\t'}, {' ', ' '}}), 796 ReturnValueCondition(WithinRange, SingleValue(0))})}, 797 }, 798 { 799 "iscntrl", 800 Summaries{ 801 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 802 .Case({ArgumentCondition(0U, WithinRange, 803 {{0, 32}, {127, 127}}), 804 ReturnValueCondition(OutOfRange, SingleValue(0))}) 805 .Case( 806 {ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}), 807 ReturnValueCondition(WithinRange, SingleValue(0))})}, 808 }, 809 { 810 "isdigit", 811 Summaries{ 812 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 813 .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')), 814 ReturnValueCondition(OutOfRange, SingleValue(0))}) 815 .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')), 816 ReturnValueCondition(WithinRange, SingleValue(0))})}, 817 }, 818 { 819 "isgraph", 820 Summaries{ 821 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 822 .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)), 823 ReturnValueCondition(OutOfRange, SingleValue(0))}) 824 .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)), 825 ReturnValueCondition(WithinRange, SingleValue(0))})}, 826 }, 827 { 828 "islower", 829 Summaries{ 830 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 831 // Is certainly lowercase. 832 .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')), 833 ReturnValueCondition(OutOfRange, SingleValue(0))}) 834 // Is ascii but not lowercase. 835 .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), 836 ArgumentCondition(0U, OutOfRange, Range('a', 'z')), 837 ReturnValueCondition(WithinRange, SingleValue(0))}) 838 // The locale-specific range. 839 .Case({ArgumentCondition(0U, WithinRange, 840 {{128, UCharRangeMax}})}) 841 // Is not an unsigned char. 842 .Case({ArgumentCondition(0U, OutOfRange, 843 Range(0, UCharRangeMax)), 844 ReturnValueCondition(WithinRange, SingleValue(0))})}, 845 }, 846 { 847 "isprint", 848 Summaries{ 849 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 850 .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)), 851 ReturnValueCondition(OutOfRange, SingleValue(0))}) 852 .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)), 853 ReturnValueCondition(WithinRange, SingleValue(0))})}, 854 }, 855 { 856 "ispunct", 857 Summaries{ 858 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 859 .Case({ArgumentCondition( 860 0U, WithinRange, 861 {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), 862 ReturnValueCondition(OutOfRange, SingleValue(0))}) 863 .Case({ArgumentCondition( 864 0U, OutOfRange, 865 {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), 866 ReturnValueCondition(WithinRange, SingleValue(0))})}, 867 }, 868 { 869 "isspace", 870 Summaries{ 871 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 872 // Space, '\f', '\n', '\r', '\t', '\v'. 873 .Case({ArgumentCondition(0U, WithinRange, 874 {{9, 13}, {' ', ' '}}), 875 ReturnValueCondition(OutOfRange, SingleValue(0))}) 876 // The locale-specific range. 877 .Case({ArgumentCondition(0U, WithinRange, 878 {{128, UCharRangeMax}})}) 879 .Case({ArgumentCondition( 880 0U, OutOfRange, 881 {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}), 882 ReturnValueCondition(WithinRange, SingleValue(0))})}, 883 }, 884 { 885 "isupper", 886 Summaries{ 887 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 888 // Is certainly uppercase. 889 .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')), 890 ReturnValueCondition(OutOfRange, SingleValue(0))}) 891 // The locale-specific range. 892 .Case({ArgumentCondition(0U, WithinRange, 893 {{128, UCharRangeMax}})}) 894 // Other. 895 .Case({ArgumentCondition(0U, OutOfRange, 896 {{'A', 'Z'}, {128, UCharRangeMax}}), 897 ReturnValueCondition(WithinRange, SingleValue(0))})}, 898 }, 899 { 900 "isxdigit", 901 Summaries{ 902 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 903 .Case( 904 {ArgumentCondition(0U, WithinRange, 905 {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), 906 ReturnValueCondition(OutOfRange, SingleValue(0))}) 907 .Case( 908 {ArgumentCondition(0U, OutOfRange, 909 {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), 910 ReturnValueCondition(WithinRange, SingleValue(0))})}, 911 }, 912 913 // The getc() family of functions that returns either a char or an EOF. 914 {"getc", Summaries{Getc()}}, 915 {"fgetc", Summaries{Getc()}}, 916 {"getchar", 917 Summaries{Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall) 918 .Case({ReturnValueCondition( 919 WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})}}, 920 921 // read()-like functions that never return more than buffer size. 922 // We are not sure how ssize_t is defined on every platform, so we 923 // provide three variants that should cover common cases. 924 {"read", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax), 925 Read(LongLongTy, LongLongMax)}}, 926 {"write", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax), 927 Read(LongLongTy, LongLongMax)}}, 928 {"fread", Summaries{Fread()}}, 929 {"fwrite", Summaries{Fwrite()}}, 930 // getline()-like functions either fail or read at least the delimiter. 931 {"getline", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax), 932 Getline(LongLongTy, LongLongMax)}}, 933 {"getdelim", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax), 934 Getline(LongLongTy, LongLongMax)}}, 935 }; 936 } 937 938 void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) { 939 mgr.registerChecker<StdLibraryFunctionsChecker>(); 940 } 941 942 bool ento::shouldRegisterStdCLibraryFunctionsChecker(const LangOptions &LO) { 943 return true; 944 } 945 946 #define REGISTER_CHECKER(name) \ 947 void ento::register##name(CheckerManager &mgr) { \ 948 StdLibraryFunctionsChecker *checker = \ 949 mgr.getChecker<StdLibraryFunctionsChecker>(); \ 950 checker->ChecksEnabled[StdLibraryFunctionsChecker::CK_##name] = true; \ 951 checker->CheckNames[StdLibraryFunctionsChecker::CK_##name] = \ 952 mgr.getCurrentCheckerName(); \ 953 } \ 954 \ 955 bool ento::shouldRegister##name(const LangOptions &LO) { return true; } 956 957 REGISTER_CHECKER(StdCLibraryFunctionArgsChecker) 958