1 //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker improves modeling of a few simple library functions. 10 // 11 // This checker provides a specification format - `Summary' - and 12 // contains descriptions of some library functions in this format. Each 13 // specification contains a list of branches for splitting the program state 14 // upon call, and range constraints on argument and return-value symbols that 15 // are satisfied on each branch. This spec can be expanded to include more 16 // items, like external effects of the function. 17 // 18 // The main difference between this approach and the body farms technique is 19 // in more explicit control over how many branches are produced. For example, 20 // consider standard C function `ispunct(int x)', which returns a non-zero value 21 // iff `x' is a punctuation character, that is, when `x' is in range 22 // ['!', '/'] [':', '@'] U ['[', '\`'] U ['{', '~']. 23 // `Summary' provides only two branches for this function. However, 24 // any attempt to describe this range with if-statements in the body farm 25 // would result in many more branches. Because each branch needs to be analyzed 26 // independently, this significantly reduces performance. Additionally, 27 // once we consider a branch on which `x' is in range, say, ['!', '/'], 28 // we assume that such branch is an important separate path through the program, 29 // which may lead to false positives because considering this particular path 30 // was not consciously intended, and therefore it might have been unreachable. 31 // 32 // This checker uses eval::Call for modeling pure functions (functions without 33 // side effets), for which their `Summary' is a precise model. This avoids 34 // unnecessary invalidation passes. Conflicts with other checkers are unlikely 35 // because if the function has no other effects, other checkers would probably 36 // never want to improve upon the modeling done by this checker. 37 // 38 // Non-pure functions, for which only partial improvement over the default 39 // behavior is expected, are modeled via check::PostCall, non-intrusively. 40 // 41 // The following standard C functions are currently supported: 42 // 43 // fgetc getline isdigit isupper 44 // fread isalnum isgraph isxdigit 45 // fwrite isalpha islower read 46 // getc isascii isprint write 47 // getchar isblank ispunct 48 // getdelim iscntrl isspace 49 // 50 //===----------------------------------------------------------------------===// 51 52 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 53 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 54 #include "clang/StaticAnalyzer/Core/Checker.h" 55 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 56 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 57 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 58 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" 59 60 using namespace clang; 61 using namespace clang::ento; 62 63 namespace { 64 class StdLibraryFunctionsChecker 65 : public Checker<check::PreCall, check::PostCall, eval::Call> { 66 /// Below is a series of typedefs necessary to define function specs. 67 /// We avoid nesting types here because each additional qualifier 68 /// would need to be repeated in every function spec. 69 struct Summary; 70 71 /// Specify how much the analyzer engine should entrust modeling this function 72 /// to us. If he doesn't, he performs additional invalidations. 73 enum InvalidationKind { NoEvalCall, EvalCallAsPure }; 74 75 // The universal integral type to use in value range descriptions. 76 // Unsigned to make sure overflows are well-defined. 77 typedef uint64_t RangeInt; 78 79 /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is 80 /// a non-negative integer, which less than 5 and not equal to 2. For 81 /// `ComparesToArgument', holds information about how exactly to compare to 82 /// the argument. 83 typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector; 84 85 /// A reference to an argument or return value by its number. 86 /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but 87 /// obviously uint32_t should be enough for all practical purposes. 88 typedef uint32_t ArgNo; 89 static const ArgNo Ret; 90 91 class ValueConstraint; 92 93 // Pointer to the ValueConstraint. We need a copyable, polymorphic and 94 // default initialize able type (vector needs that). A raw pointer was good, 95 // however, we cannot default initialize that. unique_ptr makes the Summary 96 // class non-copyable, therefore not an option. Releasing the copyability 97 // requirement would render the initialization of the Summary map infeasible. 98 using ValueConstraintPtr = std::shared_ptr<ValueConstraint>; 99 100 /// Polymorphic base class that represents a constraint on a given argument 101 /// (or return value) of a function. Derived classes implement different kind 102 /// of constraints, e.g range constraints or correlation between two 103 /// arguments. 104 class ValueConstraint { 105 public: 106 ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {} 107 virtual ~ValueConstraint() {} 108 /// Apply the effects of the constraint on the given program state. If null 109 /// is returned then the constraint is not feasible. 110 virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 111 const Summary &Summary) const = 0; 112 virtual ValueConstraintPtr negate() const { 113 llvm_unreachable("Not implemented"); 114 }; 115 ArgNo getArgNo() const { return ArgN; } 116 117 protected: 118 ArgNo ArgN; // Argument to which we apply the constraint. 119 }; 120 121 /// Given a range, should the argument stay inside or outside this range? 122 enum RangeKind { OutOfRange, WithinRange }; 123 124 /// Encapsulates a single range on a single symbol within a branch. 125 class RangeConstraint : public ValueConstraint { 126 RangeKind Kind; // Kind of range definition. 127 IntRangeVector Args; // Polymorphic arguments. 128 129 public: 130 RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args) 131 : ValueConstraint(ArgN), Kind(Kind), Args(Args) {} 132 133 const IntRangeVector &getRanges() const { 134 return Args; 135 } 136 137 private: 138 ProgramStateRef applyAsOutOfRange(ProgramStateRef State, 139 const CallEvent &Call, 140 const Summary &Summary) const; 141 ProgramStateRef applyAsWithinRange(ProgramStateRef State, 142 const CallEvent &Call, 143 const Summary &Summary) const; 144 public: 145 ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 146 const Summary &Summary) const override { 147 switch (Kind) { 148 case OutOfRange: 149 return applyAsOutOfRange(State, Call, Summary); 150 case WithinRange: 151 return applyAsWithinRange(State, Call, Summary); 152 } 153 llvm_unreachable("Unknown range kind!"); 154 } 155 156 ValueConstraintPtr negate() const override { 157 RangeConstraint Tmp(*this); 158 switch (Kind) { 159 case OutOfRange: 160 Tmp.Kind = WithinRange; 161 break; 162 case WithinRange: 163 Tmp.Kind = OutOfRange; 164 break; 165 default: 166 llvm_unreachable("Unknown RangeConstraint kind!"); 167 } 168 return std::make_shared<RangeConstraint>(Tmp); 169 } 170 }; 171 172 class ComparisonConstraint : public ValueConstraint { 173 BinaryOperator::Opcode Opcode; 174 ArgNo OtherArgN; 175 176 public: 177 ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode, 178 ArgNo OtherArgN) 179 : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {} 180 ArgNo getOtherArgNo() const { return OtherArgN; } 181 BinaryOperator::Opcode getOpcode() const { return Opcode; } 182 ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 183 const Summary &Summary) const override; 184 }; 185 186 /// The complete list of constraints that defines a single branch. 187 typedef std::vector<ValueConstraintPtr> ConstraintSet; 188 189 using ArgTypes = std::vector<QualType>; 190 using Cases = std::vector<ConstraintSet>; 191 192 /// Includes information about 193 /// * function prototype (which is necessary to 194 /// ensure we're modeling the right function and casting values properly), 195 /// * approach to invalidation, 196 /// * a list of branches - a list of list of ranges - 197 /// A branch represents a path in the exploded graph of a function (which 198 /// is a tree). So, a branch is a series of assumptions. In other words, 199 /// branches represent split states and additional assumptions on top of 200 /// the splitting assumption. 201 /// For example, consider the branches in `isalpha(x)` 202 /// Branch 1) 203 /// x is in range ['A', 'Z'] or in ['a', 'z'] 204 /// then the return value is not 0. (I.e. out-of-range [0, 0]) 205 /// Branch 2) 206 /// x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z'] 207 /// then the return value is 0. 208 /// * a list of argument constraints, that must be true on every branch. 209 /// If these constraints are not satisfied that means a fatal error 210 /// usually resulting in undefined behaviour. 211 struct Summary { 212 const ArgTypes ArgTys; 213 const QualType RetTy; 214 const InvalidationKind InvalidationKd; 215 Cases CaseConstraints; 216 ConstraintSet ArgConstraints; 217 218 Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd) 219 : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {} 220 221 Summary &Case(ConstraintSet&& CS) { 222 CaseConstraints.push_back(std::move(CS)); 223 return *this; 224 } 225 Summary &ArgConstraint(ValueConstraintPtr VC) { 226 ArgConstraints.push_back(VC); 227 return *this; 228 } 229 230 private: 231 static void assertTypeSuitableForSummary(QualType T) { 232 assert(!T->isVoidType() && 233 "We should have had no significant void types in the spec"); 234 assert(T.isCanonical() && 235 "We should only have canonical types in the spec"); 236 // FIXME: lift this assert (but not the ones above!) 237 assert(T->isIntegralOrEnumerationType() && 238 "We only support integral ranges in the spec"); 239 } 240 241 public: 242 QualType getArgType(ArgNo ArgN) const { 243 QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN]; 244 assertTypeSuitableForSummary(T); 245 return T; 246 } 247 248 /// Try our best to figure out if the call expression is the call of 249 /// *the* library function to which this specification applies. 250 bool matchesCall(const CallExpr *CE) const; 251 }; 252 253 // The same function (as in, function identifier) may have different 254 // summaries assigned to it, with different argument and return value types. 255 // We call these "variants" of the function. This can be useful for handling 256 // C++ function overloads, and also it can be used when the same function 257 // may have different definitions on different platforms. 258 typedef std::vector<Summary> Summaries; 259 260 // The map of all functions supported by the checker. It is initialized 261 // lazily, and it doesn't change after initialization. 262 mutable llvm::StringMap<Summaries> FunctionSummaryMap; 263 264 mutable std::unique_ptr<BugType> BT_InvalidArg; 265 266 // Auxiliary functions to support ArgNo within all structures 267 // in a unified manner. 268 static QualType getArgType(const Summary &Summary, ArgNo ArgN) { 269 return Summary.getArgType(ArgN); 270 } 271 static QualType getArgType(const CallEvent &Call, ArgNo ArgN) { 272 return ArgN == Ret ? Call.getResultType().getCanonicalType() 273 : Call.getArgExpr(ArgN)->getType().getCanonicalType(); 274 } 275 static QualType getArgType(const CallExpr *CE, ArgNo ArgN) { 276 return ArgN == Ret ? CE->getType().getCanonicalType() 277 : CE->getArg(ArgN)->getType().getCanonicalType(); 278 } 279 static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) { 280 return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN); 281 } 282 283 public: 284 void checkPreCall(const CallEvent &Call, CheckerContext &C) const; 285 void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 286 bool evalCall(const CallEvent &Call, CheckerContext &C) const; 287 288 enum CheckKind { CK_StdCLibraryFunctionArgsChecker, CK_NumCheckKinds }; 289 DefaultBool ChecksEnabled[CK_NumCheckKinds]; 290 CheckerNameRef CheckNames[CK_NumCheckKinds]; 291 292 private: 293 Optional<Summary> findFunctionSummary(const FunctionDecl *FD, 294 const CallExpr *CE, 295 CheckerContext &C) const; 296 Optional<Summary> findFunctionSummary(const CallEvent &Call, 297 CheckerContext &C) const; 298 299 void initFunctionSummaries(CheckerContext &C) const; 300 301 void reportBug(const CallEvent &Call, ExplodedNode *N, 302 CheckerContext &C) const { 303 if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker]) 304 return; 305 // TODO Add detailed diagnostic. 306 StringRef Msg = "Function argument constraint is not satisfied"; 307 if (!BT_InvalidArg) 308 BT_InvalidArg = std::make_unique<BugType>( 309 CheckNames[CK_StdCLibraryFunctionArgsChecker], 310 "Unsatisfied argument constraints", categories::LogicError); 311 auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N); 312 bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R); 313 C.emitReport(std::move(R)); 314 } 315 }; 316 317 const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret = 318 std::numeric_limits<ArgNo>::max(); 319 320 } // end of anonymous namespace 321 322 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange( 323 ProgramStateRef State, const CallEvent &Call, 324 const Summary &Summary) const { 325 326 ProgramStateManager &Mgr = State->getStateManager(); 327 SValBuilder &SVB = Mgr.getSValBuilder(); 328 BasicValueFactory &BVF = SVB.getBasicValueFactory(); 329 ConstraintManager &CM = Mgr.getConstraintManager(); 330 QualType T = getArgType(Summary, getArgNo()); 331 SVal V = getArgSVal(Call, getArgNo()); 332 333 if (auto N = V.getAs<NonLoc>()) { 334 const IntRangeVector &R = getRanges(); 335 size_t E = R.size(); 336 for (size_t I = 0; I != E; ++I) { 337 const llvm::APSInt &Min = BVF.getValue(R[I].first, T); 338 const llvm::APSInt &Max = BVF.getValue(R[I].second, T); 339 assert(Min <= Max); 340 State = CM.assumeInclusiveRange(State, *N, Min, Max, false); 341 if (!State) 342 break; 343 } 344 } 345 346 return State; 347 } 348 349 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange( 350 ProgramStateRef State, const CallEvent &Call, 351 const Summary &Summary) const { 352 353 ProgramStateManager &Mgr = State->getStateManager(); 354 SValBuilder &SVB = Mgr.getSValBuilder(); 355 BasicValueFactory &BVF = SVB.getBasicValueFactory(); 356 ConstraintManager &CM = Mgr.getConstraintManager(); 357 QualType T = getArgType(Summary, getArgNo()); 358 SVal V = getArgSVal(Call, getArgNo()); 359 360 // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R". 361 // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary, 362 // and then cut away all holes in R one by one. 363 // 364 // E.g. consider a range list R as [A, B] and [C, D] 365 // -------+--------+------------------+------------+-----------> 366 // A B C D 367 // Then we assume that the value is not in [-inf, A - 1], 368 // then not in [D + 1, +inf], then not in [B + 1, C - 1] 369 if (auto N = V.getAs<NonLoc>()) { 370 const IntRangeVector &R = getRanges(); 371 size_t E = R.size(); 372 373 const llvm::APSInt &MinusInf = BVF.getMinValue(T); 374 const llvm::APSInt &PlusInf = BVF.getMaxValue(T); 375 376 const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T); 377 if (Left != PlusInf) { 378 assert(MinusInf <= Left); 379 State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false); 380 if (!State) 381 return nullptr; 382 } 383 384 const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T); 385 if (Right != MinusInf) { 386 assert(Right <= PlusInf); 387 State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false); 388 if (!State) 389 return nullptr; 390 } 391 392 for (size_t I = 1; I != E; ++I) { 393 const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T); 394 const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T); 395 if (Min <= Max) { 396 State = CM.assumeInclusiveRange(State, *N, Min, Max, false); 397 if (!State) 398 return nullptr; 399 } 400 } 401 } 402 403 return State; 404 } 405 406 ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply( 407 ProgramStateRef State, const CallEvent &Call, 408 const Summary &Summary) const { 409 410 ProgramStateManager &Mgr = State->getStateManager(); 411 SValBuilder &SVB = Mgr.getSValBuilder(); 412 QualType CondT = SVB.getConditionType(); 413 QualType T = getArgType(Summary, getArgNo()); 414 SVal V = getArgSVal(Call, getArgNo()); 415 416 BinaryOperator::Opcode Op = getOpcode(); 417 ArgNo OtherArg = getOtherArgNo(); 418 SVal OtherV = getArgSVal(Call, OtherArg); 419 QualType OtherT = getArgType(Call, OtherArg); 420 // Note: we avoid integral promotion for comparison. 421 OtherV = SVB.evalCast(OtherV, T, OtherT); 422 if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT) 423 .getAs<DefinedOrUnknownSVal>()) 424 State = State->assume(*CompV, true); 425 return State; 426 } 427 428 void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call, 429 CheckerContext &C) const { 430 Optional<Summary> FoundSummary = findFunctionSummary(Call, C); 431 if (!FoundSummary) 432 return; 433 434 const Summary &Summary = *FoundSummary; 435 ProgramStateRef State = C.getState(); 436 437 for (const ValueConstraintPtr& VC : Summary.ArgConstraints) { 438 ProgramStateRef SuccessSt = VC->apply(State, Call, Summary); 439 ProgramStateRef FailureSt = VC->negate()->apply(State, Call, Summary); 440 // The argument constraint is not satisfied. 441 if (FailureSt && !SuccessSt) { 442 if (ExplodedNode *N = C.generateErrorNode(State)) 443 reportBug(Call, N, C); 444 break; 445 } else { 446 // Apply the constraint even if we cannot reason about the argument. This 447 // means both SuccessSt and FailureSt can be true. If we weren't applying 448 // the constraint that would mean that symbolic execution continues on a 449 // code whose behaviour is undefined. 450 assert(SuccessSt); 451 C.addTransition(SuccessSt); 452 } 453 } 454 } 455 456 void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call, 457 CheckerContext &C) const { 458 Optional<Summary> FoundSummary = findFunctionSummary(Call, C); 459 if (!FoundSummary) 460 return; 461 462 // Now apply the constraints. 463 const Summary &Summary = *FoundSummary; 464 ProgramStateRef State = C.getState(); 465 466 // Apply case/branch specifications. 467 for (const auto &VRS : Summary.CaseConstraints) { 468 ProgramStateRef NewState = State; 469 for (const auto &VR: VRS) { 470 NewState = VR->apply(NewState, Call, Summary); 471 if (!NewState) 472 break; 473 } 474 475 if (NewState && NewState != State) 476 C.addTransition(NewState); 477 } 478 } 479 480 bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call, 481 CheckerContext &C) const { 482 Optional<Summary> FoundSummary = findFunctionSummary(Call, C); 483 if (!FoundSummary) 484 return false; 485 486 const Summary &Summary = *FoundSummary; 487 switch (Summary.InvalidationKd) { 488 case EvalCallAsPure: { 489 ProgramStateRef State = C.getState(); 490 const LocationContext *LC = C.getLocationContext(); 491 const auto *CE = cast_or_null<CallExpr>(Call.getOriginExpr()); 492 SVal V = C.getSValBuilder().conjureSymbolVal( 493 CE, LC, CE->getType().getCanonicalType(), C.blockCount()); 494 State = State->BindExpr(CE, LC, V); 495 C.addTransition(State); 496 return true; 497 } 498 case NoEvalCall: 499 // Summary tells us to avoid performing eval::Call. The function is possibly 500 // evaluated by another checker, or evaluated conservatively. 501 return false; 502 } 503 llvm_unreachable("Unknown invalidation kind!"); 504 } 505 506 bool StdLibraryFunctionsChecker::Summary::matchesCall( 507 const CallExpr *CE) const { 508 // Check number of arguments: 509 if (CE->getNumArgs() != ArgTys.size()) 510 return false; 511 512 // Check return type if relevant: 513 if (!RetTy.isNull() && RetTy != CE->getType().getCanonicalType()) 514 return false; 515 516 // Check argument types when relevant: 517 for (size_t I = 0, E = ArgTys.size(); I != E; ++I) { 518 QualType FormalT = ArgTys[I]; 519 // Null type marks irrelevant arguments. 520 if (FormalT.isNull()) 521 continue; 522 523 assertTypeSuitableForSummary(FormalT); 524 525 QualType ActualT = StdLibraryFunctionsChecker::getArgType(CE, I); 526 assert(ActualT.isCanonical()); 527 if (ActualT != FormalT) 528 return false; 529 } 530 531 return true; 532 } 533 534 Optional<StdLibraryFunctionsChecker::Summary> 535 StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD, 536 const CallExpr *CE, 537 CheckerContext &C) const { 538 // Note: we cannot always obtain FD from CE 539 // (eg. virtual call, or call by pointer). 540 assert(CE); 541 542 if (!FD) 543 return None; 544 545 initFunctionSummaries(C); 546 547 IdentifierInfo *II = FD->getIdentifier(); 548 if (!II) 549 return None; 550 StringRef Name = II->getName(); 551 if (Name.empty() || !C.isCLibraryFunction(FD, Name)) 552 return None; 553 554 auto FSMI = FunctionSummaryMap.find(Name); 555 if (FSMI == FunctionSummaryMap.end()) 556 return None; 557 558 // Verify that function signature matches the spec in advance. 559 // Otherwise we might be modeling the wrong function. 560 // Strict checking is important because we will be conducting 561 // very integral-type-sensitive operations on arguments and 562 // return values. 563 const Summaries &SpecVariants = FSMI->second; 564 for (const Summary &Spec : SpecVariants) 565 if (Spec.matchesCall(CE)) 566 return Spec; 567 568 return None; 569 } 570 571 Optional<StdLibraryFunctionsChecker::Summary> 572 StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call, 573 CheckerContext &C) const { 574 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); 575 if (!FD) 576 return None; 577 const CallExpr *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr()); 578 if (!CE) 579 return None; 580 return findFunctionSummary(FD, CE, C); 581 } 582 583 void StdLibraryFunctionsChecker::initFunctionSummaries( 584 CheckerContext &C) const { 585 if (!FunctionSummaryMap.empty()) 586 return; 587 588 SValBuilder &SVB = C.getSValBuilder(); 589 BasicValueFactory &BVF = SVB.getBasicValueFactory(); 590 const ASTContext &ACtx = BVF.getContext(); 591 592 // These types are useful for writing specifications quickly, 593 // New specifications should probably introduce more types. 594 // Some types are hard to obtain from the AST, eg. "ssize_t". 595 // In such cases it should be possible to provide multiple variants 596 // of function summary for common cases (eg. ssize_t could be int or long 597 // or long long, so three summary variants would be enough). 598 // Of course, function variants are also useful for C++ overloads. 599 const QualType 600 Irrelevant{}; // A placeholder, whenever we do not care about the type. 601 const QualType IntTy = ACtx.IntTy; 602 const QualType LongTy = ACtx.LongTy; 603 const QualType LongLongTy = ACtx.LongLongTy; 604 const QualType SizeTy = ACtx.getSizeType(); 605 606 const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue(); 607 const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue(); 608 const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue(); 609 610 // Set UCharRangeMax to min of int or uchar maximum value. 611 // The C standard states that the arguments of functions like isalpha must 612 // be representable as an unsigned char. Their type is 'int', so the max 613 // value of the argument should be min(UCharMax, IntMax). This just happen 614 // to be true for commonly used and well tested instruction set 615 // architectures, but not for others. 616 const RangeInt UCharRangeMax = 617 std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax); 618 619 // The platform dependent value of EOF. 620 // Try our best to parse this from the Preprocessor, otherwise fallback to -1. 621 const auto EOFv = [&C]() -> RangeInt { 622 if (const llvm::Optional<int> OptInt = 623 tryExpandAsInteger("EOF", C.getPreprocessor())) 624 return *OptInt; 625 return -1; 626 }(); 627 628 // We are finally ready to define specifications for all supported functions. 629 // 630 // The signature needs to have the correct number of arguments. 631 // However, we insert `Irrelevant' when the type is insignificant. 632 // 633 // Argument ranges should always cover all variants. If return value 634 // is completely unknown, omit it from the respective range set. 635 // 636 // All types in the spec need to be canonical. 637 // 638 // Every item in the list of range sets represents a particular 639 // execution path the analyzer would need to explore once 640 // the call is modeled - a new program state is constructed 641 // for every range set, and each range line in the range set 642 // corresponds to a specific constraint within this state. 643 // 644 // Upon comparing to another argument, the other argument is casted 645 // to the current argument's type. This avoids proper promotion but 646 // seems useful. For example, read() receives size_t argument, 647 // and its return value, which is of type ssize_t, cannot be greater 648 // than this argument. If we made a promotion, and the size argument 649 // is equal to, say, 10, then we'd impose a range of [0, 10] on the 650 // return value, however the correct range is [-1, 10]. 651 // 652 // Please update the list of functions in the header after editing! 653 // 654 655 // Below are helpers functions to create the summaries. 656 auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind, 657 IntRangeVector Ranges) { 658 return std::make_shared<RangeConstraint>(ArgN, Kind, Ranges); 659 }; 660 struct { 661 auto operator()(RangeKind Kind, IntRangeVector Ranges) { 662 return std::make_shared<RangeConstraint>(Ret, Kind, Ranges); 663 } 664 auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) { 665 return std::make_shared<ComparisonConstraint>(Ret, Op, OtherArgN); 666 } 667 } ReturnValueCondition; 668 auto Range = [](RangeInt b, RangeInt e) { 669 return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}}; 670 }; 671 auto SingleValue = [](RangeInt v) { 672 return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}}; 673 }; 674 auto LessThanOrEq = BO_LE; 675 676 using RetType = QualType; 677 // Templates for summaries that are reused by many functions. 678 auto Getc = [&]() { 679 return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall) 680 .Case({ReturnValueCondition(WithinRange, 681 {{EOFv, EOFv}, {0, UCharRangeMax}})}); 682 }; 683 auto Read = [&](RetType R, RangeInt Max) { 684 return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R}, 685 NoEvalCall) 686 .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)), 687 ReturnValueCondition(WithinRange, Range(-1, Max))}); 688 }; 689 auto Fread = [&]() { 690 return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy, Irrelevant}, 691 RetType{SizeTy}, NoEvalCall) 692 .Case({ 693 ReturnValueCondition(LessThanOrEq, ArgNo(2)), 694 }); 695 }; 696 auto Getline = [&](RetType R, RangeInt Max) { 697 return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R}, 698 NoEvalCall) 699 .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})}); 700 }; 701 702 FunctionSummaryMap = { 703 // The isascii() family of functions. 704 // The behavior is undefined if the value of the argument is not 705 // representable as unsigned char or is not equal to EOF. See e.g. C99 706 // 7.4.1.2 The isalpha function (p: 181-182). 707 { 708 "isalnum", 709 Summaries{ 710 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 711 // Boils down to isupper() or islower() or isdigit(). 712 .Case( 713 {ArgumentCondition(0U, WithinRange, 714 {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}), 715 ReturnValueCondition(OutOfRange, SingleValue(0))}) 716 // The locale-specific range. 717 // No post-condition. We are completely unaware of 718 // locale-specific return values. 719 .Case({ArgumentCondition(0U, WithinRange, 720 {{128, UCharRangeMax}})}) 721 .Case({ArgumentCondition(0U, OutOfRange, 722 {{'0', '9'}, 723 {'A', 'Z'}, 724 {'a', 'z'}, 725 {128, UCharRangeMax}}), 726 ReturnValueCondition(WithinRange, SingleValue(0))}) 727 .ArgConstraint(ArgumentCondition( 728 0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))}, 729 }, 730 { 731 "isalpha", 732 Summaries{ 733 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 734 .Case({ArgumentCondition(0U, WithinRange, 735 {{'A', 'Z'}, {'a', 'z'}}), 736 ReturnValueCondition(OutOfRange, SingleValue(0))}) 737 // The locale-specific range. 738 .Case({ArgumentCondition(0U, WithinRange, 739 {{128, UCharRangeMax}})}) 740 .Case({ArgumentCondition( 741 0U, OutOfRange, 742 {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}), 743 ReturnValueCondition(WithinRange, SingleValue(0))})}, 744 }, 745 { 746 "isascii", 747 Summaries{ 748 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 749 .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), 750 ReturnValueCondition(OutOfRange, SingleValue(0))}) 751 .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)), 752 ReturnValueCondition(WithinRange, SingleValue(0))})}, 753 }, 754 { 755 "isblank", 756 Summaries{ 757 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 758 .Case({ArgumentCondition(0U, WithinRange, 759 {{'\t', '\t'}, {' ', ' '}}), 760 ReturnValueCondition(OutOfRange, SingleValue(0))}) 761 .Case({ArgumentCondition(0U, OutOfRange, 762 {{'\t', '\t'}, {' ', ' '}}), 763 ReturnValueCondition(WithinRange, SingleValue(0))})}, 764 }, 765 { 766 "iscntrl", 767 Summaries{ 768 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 769 .Case({ArgumentCondition(0U, WithinRange, 770 {{0, 32}, {127, 127}}), 771 ReturnValueCondition(OutOfRange, SingleValue(0))}) 772 .Case( 773 {ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}), 774 ReturnValueCondition(WithinRange, SingleValue(0))})}, 775 }, 776 { 777 "isdigit", 778 Summaries{ 779 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 780 .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')), 781 ReturnValueCondition(OutOfRange, SingleValue(0))}) 782 .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')), 783 ReturnValueCondition(WithinRange, SingleValue(0))})}, 784 }, 785 { 786 "isgraph", 787 Summaries{ 788 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 789 .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)), 790 ReturnValueCondition(OutOfRange, SingleValue(0))}) 791 .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)), 792 ReturnValueCondition(WithinRange, SingleValue(0))})}, 793 }, 794 { 795 "islower", 796 Summaries{ 797 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 798 // Is certainly lowercase. 799 .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')), 800 ReturnValueCondition(OutOfRange, SingleValue(0))}) 801 // Is ascii but not lowercase. 802 .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), 803 ArgumentCondition(0U, OutOfRange, Range('a', 'z')), 804 ReturnValueCondition(WithinRange, SingleValue(0))}) 805 // The locale-specific range. 806 .Case({ArgumentCondition(0U, WithinRange, 807 {{128, UCharRangeMax}})}) 808 // Is not an unsigned char. 809 .Case({ArgumentCondition(0U, OutOfRange, 810 Range(0, UCharRangeMax)), 811 ReturnValueCondition(WithinRange, SingleValue(0))})}, 812 }, 813 { 814 "isprint", 815 Summaries{ 816 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 817 .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)), 818 ReturnValueCondition(OutOfRange, SingleValue(0))}) 819 .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)), 820 ReturnValueCondition(WithinRange, SingleValue(0))})}, 821 }, 822 { 823 "ispunct", 824 Summaries{ 825 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 826 .Case({ArgumentCondition( 827 0U, WithinRange, 828 {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), 829 ReturnValueCondition(OutOfRange, SingleValue(0))}) 830 .Case({ArgumentCondition( 831 0U, OutOfRange, 832 {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), 833 ReturnValueCondition(WithinRange, SingleValue(0))})}, 834 }, 835 { 836 "isspace", 837 Summaries{ 838 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 839 // Space, '\f', '\n', '\r', '\t', '\v'. 840 .Case({ArgumentCondition(0U, WithinRange, 841 {{9, 13}, {' ', ' '}}), 842 ReturnValueCondition(OutOfRange, SingleValue(0))}) 843 // The locale-specific range. 844 .Case({ArgumentCondition(0U, WithinRange, 845 {{128, UCharRangeMax}})}) 846 .Case({ArgumentCondition( 847 0U, OutOfRange, 848 {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}), 849 ReturnValueCondition(WithinRange, SingleValue(0))})}, 850 }, 851 { 852 "isupper", 853 Summaries{ 854 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 855 // Is certainly uppercase. 856 .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')), 857 ReturnValueCondition(OutOfRange, SingleValue(0))}) 858 // The locale-specific range. 859 .Case({ArgumentCondition(0U, WithinRange, 860 {{128, UCharRangeMax}})}) 861 // Other. 862 .Case({ArgumentCondition(0U, OutOfRange, 863 {{'A', 'Z'}, {128, UCharRangeMax}}), 864 ReturnValueCondition(WithinRange, SingleValue(0))})}, 865 }, 866 { 867 "isxdigit", 868 Summaries{ 869 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 870 .Case( 871 {ArgumentCondition(0U, WithinRange, 872 {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), 873 ReturnValueCondition(OutOfRange, SingleValue(0))}) 874 .Case( 875 {ArgumentCondition(0U, OutOfRange, 876 {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), 877 ReturnValueCondition(WithinRange, SingleValue(0))})}, 878 }, 879 880 // The getc() family of functions that returns either a char or an EOF. 881 {"getc", Summaries{Getc()}}, 882 {"fgetc", Summaries{Getc()}}, 883 {"getchar", 884 Summaries{Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall) 885 .Case({ReturnValueCondition( 886 WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})}}, 887 888 // read()-like functions that never return more than buffer size. 889 // We are not sure how ssize_t is defined on every platform, so we 890 // provide three variants that should cover common cases. 891 {"read", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax), 892 Read(LongLongTy, LongLongMax)}}, 893 {"write", Summaries{Read(IntTy, IntMax), Read(LongTy, LongMax), 894 Read(LongLongTy, LongLongMax)}}, 895 {"fread", Summaries{Fread()}}, 896 {"fwrite", Summaries{Fread()}}, 897 // getline()-like functions either fail or read at least the delimiter. 898 {"getline", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax), 899 Getline(LongLongTy, LongLongMax)}}, 900 {"getdelim", Summaries{Getline(IntTy, IntMax), Getline(LongTy, LongMax), 901 Getline(LongLongTy, LongLongMax)}}, 902 }; 903 } 904 905 void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) { 906 mgr.registerChecker<StdLibraryFunctionsChecker>(); 907 } 908 909 bool ento::shouldRegisterStdCLibraryFunctionsChecker(const LangOptions &LO) { 910 return true; 911 } 912 913 #define REGISTER_CHECKER(name) \ 914 void ento::register##name(CheckerManager &mgr) { \ 915 StdLibraryFunctionsChecker *checker = \ 916 mgr.getChecker<StdLibraryFunctionsChecker>(); \ 917 checker->ChecksEnabled[StdLibraryFunctionsChecker::CK_##name] = true; \ 918 checker->CheckNames[StdLibraryFunctionsChecker::CK_##name] = \ 919 mgr.getCurrentCheckerName(); \ 920 } \ 921 \ 922 bool ento::shouldRegister##name(const LangOptions &LO) { return true; } 923 924 REGISTER_CHECKER(StdCLibraryFunctionArgsChecker) 925