1 //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker improves modeling of a few simple library functions. 10 // 11 // This checker provides a specification format - `Summary' - and 12 // contains descriptions of some library functions in this format. Each 13 // specification contains a list of branches for splitting the program state 14 // upon call, and range constraints on argument and return-value symbols that 15 // are satisfied on each branch. This spec can be expanded to include more 16 // items, like external effects of the function. 17 // 18 // The main difference between this approach and the body farms technique is 19 // in more explicit control over how many branches are produced. For example, 20 // consider standard C function `ispunct(int x)', which returns a non-zero value 21 // iff `x' is a punctuation character, that is, when `x' is in range 22 // ['!', '/'] [':', '@'] U ['[', '\`'] U ['{', '~']. 23 // `Summary' provides only two branches for this function. However, 24 // any attempt to describe this range with if-statements in the body farm 25 // would result in many more branches. Because each branch needs to be analyzed 26 // independently, this significantly reduces performance. Additionally, 27 // once we consider a branch on which `x' is in range, say, ['!', '/'], 28 // we assume that such branch is an important separate path through the program, 29 // which may lead to false positives because considering this particular path 30 // was not consciously intended, and therefore it might have been unreachable. 31 // 32 // This checker uses eval::Call for modeling pure functions (functions without 33 // side effets), for which their `Summary' is a precise model. This avoids 34 // unnecessary invalidation passes. Conflicts with other checkers are unlikely 35 // because if the function has no other effects, other checkers would probably 36 // never want to improve upon the modeling done by this checker. 37 // 38 // Non-pure functions, for which only partial improvement over the default 39 // behavior is expected, are modeled via check::PostCall, non-intrusively. 40 // 41 // The following standard C functions are currently supported: 42 // 43 // fgetc getline isdigit isupper 44 // fread isalnum isgraph isxdigit 45 // fwrite isalpha islower read 46 // getc isascii isprint write 47 // getchar isblank ispunct 48 // getdelim iscntrl isspace 49 // 50 //===----------------------------------------------------------------------===// 51 52 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 53 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 54 #include "clang/StaticAnalyzer/Core/Checker.h" 55 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 56 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 57 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 58 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" 59 60 using namespace clang; 61 using namespace clang::ento; 62 63 namespace { 64 class StdLibraryFunctionsChecker 65 : public Checker<check::PreCall, check::PostCall, eval::Call> { 66 /// Below is a series of typedefs necessary to define function specs. 67 /// We avoid nesting types here because each additional qualifier 68 /// would need to be repeated in every function spec. 69 struct Summary; 70 71 /// Specify how much the analyzer engine should entrust modeling this function 72 /// to us. If he doesn't, he performs additional invalidations. 73 enum InvalidationKind { NoEvalCall, EvalCallAsPure }; 74 75 // The universal integral type to use in value range descriptions. 76 // Unsigned to make sure overflows are well-defined. 77 typedef uint64_t RangeInt; 78 79 /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is 80 /// a non-negative integer, which less than 5 and not equal to 2. For 81 /// `ComparesToArgument', holds information about how exactly to compare to 82 /// the argument. 83 typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector; 84 85 /// A reference to an argument or return value by its number. 86 /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but 87 /// obviously uint32_t should be enough for all practical purposes. 88 typedef uint32_t ArgNo; 89 static const ArgNo Ret; 90 91 class ValueConstraint; 92 93 // Pointer to the ValueConstraint. We need a copyable, polymorphic and 94 // default initialize able type (vector needs that). A raw pointer was good, 95 // however, we cannot default initialize that. unique_ptr makes the Summary 96 // class non-copyable, therefore not an option. Releasing the copyability 97 // requirement would render the initialization of the Summary map infeasible. 98 using ValueConstraintPtr = std::shared_ptr<ValueConstraint>; 99 100 /// Polymorphic base class that represents a constraint on a given argument 101 /// (or return value) of a function. Derived classes implement different kind 102 /// of constraints, e.g range constraints or correlation between two 103 /// arguments. 104 class ValueConstraint { 105 public: 106 ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {} 107 virtual ~ValueConstraint() {} 108 /// Apply the effects of the constraint on the given program state. If null 109 /// is returned then the constraint is not feasible. 110 virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 111 const Summary &Summary) const = 0; 112 virtual ValueConstraintPtr negate() const { 113 llvm_unreachable("Not implemented"); 114 }; 115 ArgNo getArgNo() const { return ArgN; } 116 117 protected: 118 ArgNo ArgN; // Argument to which we apply the constraint. 119 }; 120 121 /// Given a range, should the argument stay inside or outside this range? 122 enum RangeKind { OutOfRange, WithinRange }; 123 124 /// Encapsulates a single range on a single symbol within a branch. 125 class RangeConstraint : public ValueConstraint { 126 RangeKind Kind; // Kind of range definition. 127 IntRangeVector Args; // Polymorphic arguments. 128 129 public: 130 RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args) 131 : ValueConstraint(ArgN), Kind(Kind), Args(Args) {} 132 133 const IntRangeVector &getRanges() const { 134 return Args; 135 } 136 137 private: 138 ProgramStateRef applyAsOutOfRange(ProgramStateRef State, 139 const CallEvent &Call, 140 const Summary &Summary) const; 141 ProgramStateRef applyAsWithinRange(ProgramStateRef State, 142 const CallEvent &Call, 143 const Summary &Summary) const; 144 public: 145 ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 146 const Summary &Summary) const override { 147 switch (Kind) { 148 case OutOfRange: 149 return applyAsOutOfRange(State, Call, Summary); 150 case WithinRange: 151 return applyAsWithinRange(State, Call, Summary); 152 } 153 llvm_unreachable("Unknown range kind!"); 154 } 155 156 ValueConstraintPtr negate() const override { 157 RangeConstraint Tmp(*this); 158 switch (Kind) { 159 case OutOfRange: 160 Tmp.Kind = WithinRange; 161 break; 162 case WithinRange: 163 Tmp.Kind = OutOfRange; 164 break; 165 } 166 return std::make_shared<RangeConstraint>(Tmp); 167 } 168 }; 169 170 class ComparisonConstraint : public ValueConstraint { 171 BinaryOperator::Opcode Opcode; 172 ArgNo OtherArgN; 173 174 public: 175 ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode, 176 ArgNo OtherArgN) 177 : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {} 178 ArgNo getOtherArgNo() const { return OtherArgN; } 179 BinaryOperator::Opcode getOpcode() const { return Opcode; } 180 ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 181 const Summary &Summary) const override; 182 }; 183 184 class NotNullConstraint : public ValueConstraint { 185 using ValueConstraint::ValueConstraint; 186 // This variable has a role when we negate the constraint. 187 bool CannotBeNull = true; 188 189 public: 190 ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 191 const Summary &Summary) const override { 192 SVal V = getArgSVal(Call, getArgNo()); 193 if (V.isUndef()) 194 return State; 195 196 DefinedOrUnknownSVal L = V.castAs<DefinedOrUnknownSVal>(); 197 if (!L.getAs<Loc>()) 198 return State; 199 200 return State->assume(L, CannotBeNull); 201 } 202 203 ValueConstraintPtr negate() const override { 204 NotNullConstraint Tmp(*this); 205 Tmp.CannotBeNull = !this->CannotBeNull; 206 return std::make_shared<NotNullConstraint>(Tmp); 207 } 208 }; 209 210 /// The complete list of constraints that defines a single branch. 211 typedef std::vector<ValueConstraintPtr> ConstraintSet; 212 213 using ArgTypes = std::vector<QualType>; 214 using Cases = std::vector<ConstraintSet>; 215 216 /// Includes information about 217 /// * function prototype (which is necessary to 218 /// ensure we're modeling the right function and casting values properly), 219 /// * approach to invalidation, 220 /// * a list of branches - a list of list of ranges - 221 /// A branch represents a path in the exploded graph of a function (which 222 /// is a tree). So, a branch is a series of assumptions. In other words, 223 /// branches represent split states and additional assumptions on top of 224 /// the splitting assumption. 225 /// For example, consider the branches in `isalpha(x)` 226 /// Branch 1) 227 /// x is in range ['A', 'Z'] or in ['a', 'z'] 228 /// then the return value is not 0. (I.e. out-of-range [0, 0]) 229 /// Branch 2) 230 /// x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z'] 231 /// then the return value is 0. 232 /// * a list of argument constraints, that must be true on every branch. 233 /// If these constraints are not satisfied that means a fatal error 234 /// usually resulting in undefined behaviour. 235 struct Summary { 236 const ArgTypes ArgTys; 237 const QualType RetTy; 238 const InvalidationKind InvalidationKd; 239 Cases CaseConstraints; 240 ConstraintSet ArgConstraints; 241 242 Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd) 243 : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {} 244 245 Summary &Case(ConstraintSet&& CS) { 246 CaseConstraints.push_back(std::move(CS)); 247 return *this; 248 } 249 Summary &ArgConstraint(ValueConstraintPtr VC) { 250 ArgConstraints.push_back(VC); 251 return *this; 252 } 253 254 private: 255 static void assertTypeSuitableForSummary(QualType T) { 256 assert(!T->isVoidType() && 257 "We should have had no significant void types in the spec"); 258 assert(T.isCanonical() && 259 "We should only have canonical types in the spec"); 260 } 261 262 public: 263 QualType getArgType(ArgNo ArgN) const { 264 QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN]; 265 assertTypeSuitableForSummary(T); 266 return T; 267 } 268 269 /// Try our best to figure out if the summary's signature matches 270 /// *the* library function to which this specification applies. 271 bool matchesSignature(const FunctionDecl *FD) const; 272 }; 273 274 // The map of all functions supported by the checker. It is initialized 275 // lazily, and it doesn't change after initialization. 276 using FunctionSummaryMapType = llvm::DenseMap<const FunctionDecl *, Summary>; 277 mutable FunctionSummaryMapType FunctionSummaryMap; 278 279 mutable std::unique_ptr<BugType> BT_InvalidArg; 280 281 // Auxiliary functions to support ArgNo within all structures 282 // in a unified manner. 283 static QualType getArgType(const Summary &Summary, ArgNo ArgN) { 284 return Summary.getArgType(ArgN); 285 } 286 static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) { 287 return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN); 288 } 289 290 public: 291 void checkPreCall(const CallEvent &Call, CheckerContext &C) const; 292 void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 293 bool evalCall(const CallEvent &Call, CheckerContext &C) const; 294 295 enum CheckKind { 296 CK_StdCLibraryFunctionArgsChecker, 297 CK_StdCLibraryFunctionsTesterChecker, 298 CK_NumCheckKinds 299 }; 300 DefaultBool ChecksEnabled[CK_NumCheckKinds]; 301 CheckerNameRef CheckNames[CK_NumCheckKinds]; 302 303 bool DisplayLoadedSummaries = false; 304 305 private: 306 Optional<Summary> findFunctionSummary(const FunctionDecl *FD, 307 CheckerContext &C) const; 308 Optional<Summary> findFunctionSummary(const CallEvent &Call, 309 CheckerContext &C) const; 310 311 void initFunctionSummaries(CheckerContext &C) const; 312 313 void reportBug(const CallEvent &Call, ExplodedNode *N, 314 CheckerContext &C) const { 315 if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker]) 316 return; 317 // TODO Add detailed diagnostic. 318 StringRef Msg = "Function argument constraint is not satisfied"; 319 if (!BT_InvalidArg) 320 BT_InvalidArg = std::make_unique<BugType>( 321 CheckNames[CK_StdCLibraryFunctionArgsChecker], 322 "Unsatisfied argument constraints", categories::LogicError); 323 auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N); 324 bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R); 325 C.emitReport(std::move(R)); 326 } 327 }; 328 329 const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret = 330 std::numeric_limits<ArgNo>::max(); 331 332 } // end of anonymous namespace 333 334 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange( 335 ProgramStateRef State, const CallEvent &Call, 336 const Summary &Summary) const { 337 338 ProgramStateManager &Mgr = State->getStateManager(); 339 SValBuilder &SVB = Mgr.getSValBuilder(); 340 BasicValueFactory &BVF = SVB.getBasicValueFactory(); 341 ConstraintManager &CM = Mgr.getConstraintManager(); 342 QualType T = getArgType(Summary, getArgNo()); 343 SVal V = getArgSVal(Call, getArgNo()); 344 345 if (auto N = V.getAs<NonLoc>()) { 346 const IntRangeVector &R = getRanges(); 347 size_t E = R.size(); 348 for (size_t I = 0; I != E; ++I) { 349 const llvm::APSInt &Min = BVF.getValue(R[I].first, T); 350 const llvm::APSInt &Max = BVF.getValue(R[I].second, T); 351 assert(Min <= Max); 352 State = CM.assumeInclusiveRange(State, *N, Min, Max, false); 353 if (!State) 354 break; 355 } 356 } 357 358 return State; 359 } 360 361 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange( 362 ProgramStateRef State, const CallEvent &Call, 363 const Summary &Summary) const { 364 365 ProgramStateManager &Mgr = State->getStateManager(); 366 SValBuilder &SVB = Mgr.getSValBuilder(); 367 BasicValueFactory &BVF = SVB.getBasicValueFactory(); 368 ConstraintManager &CM = Mgr.getConstraintManager(); 369 QualType T = getArgType(Summary, getArgNo()); 370 SVal V = getArgSVal(Call, getArgNo()); 371 372 // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R". 373 // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary, 374 // and then cut away all holes in R one by one. 375 // 376 // E.g. consider a range list R as [A, B] and [C, D] 377 // -------+--------+------------------+------------+-----------> 378 // A B C D 379 // Then we assume that the value is not in [-inf, A - 1], 380 // then not in [D + 1, +inf], then not in [B + 1, C - 1] 381 if (auto N = V.getAs<NonLoc>()) { 382 const IntRangeVector &R = getRanges(); 383 size_t E = R.size(); 384 385 const llvm::APSInt &MinusInf = BVF.getMinValue(T); 386 const llvm::APSInt &PlusInf = BVF.getMaxValue(T); 387 388 const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T); 389 if (Left != PlusInf) { 390 assert(MinusInf <= Left); 391 State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false); 392 if (!State) 393 return nullptr; 394 } 395 396 const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T); 397 if (Right != MinusInf) { 398 assert(Right <= PlusInf); 399 State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false); 400 if (!State) 401 return nullptr; 402 } 403 404 for (size_t I = 1; I != E; ++I) { 405 const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T); 406 const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T); 407 if (Min <= Max) { 408 State = CM.assumeInclusiveRange(State, *N, Min, Max, false); 409 if (!State) 410 return nullptr; 411 } 412 } 413 } 414 415 return State; 416 } 417 418 ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply( 419 ProgramStateRef State, const CallEvent &Call, 420 const Summary &Summary) const { 421 422 ProgramStateManager &Mgr = State->getStateManager(); 423 SValBuilder &SVB = Mgr.getSValBuilder(); 424 QualType CondT = SVB.getConditionType(); 425 QualType T = getArgType(Summary, getArgNo()); 426 SVal V = getArgSVal(Call, getArgNo()); 427 428 BinaryOperator::Opcode Op = getOpcode(); 429 ArgNo OtherArg = getOtherArgNo(); 430 SVal OtherV = getArgSVal(Call, OtherArg); 431 QualType OtherT = getArgType(Summary, OtherArg); 432 // Note: we avoid integral promotion for comparison. 433 OtherV = SVB.evalCast(OtherV, T, OtherT); 434 if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT) 435 .getAs<DefinedOrUnknownSVal>()) 436 State = State->assume(*CompV, true); 437 return State; 438 } 439 440 void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call, 441 CheckerContext &C) const { 442 Optional<Summary> FoundSummary = findFunctionSummary(Call, C); 443 if (!FoundSummary) 444 return; 445 446 const Summary &Summary = *FoundSummary; 447 ProgramStateRef State = C.getState(); 448 449 ProgramStateRef NewState = State; 450 for (const ValueConstraintPtr& VC : Summary.ArgConstraints) { 451 ProgramStateRef SuccessSt = VC->apply(NewState, Call, Summary); 452 ProgramStateRef FailureSt = VC->negate()->apply(NewState, Call, Summary); 453 // The argument constraint is not satisfied. 454 if (FailureSt && !SuccessSt) { 455 if (ExplodedNode *N = C.generateErrorNode(NewState)) 456 reportBug(Call, N, C); 457 break; 458 } else { 459 // We will apply the constraint even if we cannot reason about the 460 // argument. This means both SuccessSt and FailureSt can be true. If we 461 // weren't applying the constraint that would mean that symbolic 462 // execution continues on a code whose behaviour is undefined. 463 assert(SuccessSt); 464 NewState = SuccessSt; 465 } 466 } 467 if (NewState && NewState != State) 468 C.addTransition(NewState); 469 } 470 471 void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call, 472 CheckerContext &C) const { 473 Optional<Summary> FoundSummary = findFunctionSummary(Call, C); 474 if (!FoundSummary) 475 return; 476 477 // Now apply the constraints. 478 const Summary &Summary = *FoundSummary; 479 ProgramStateRef State = C.getState(); 480 481 // Apply case/branch specifications. 482 for (const auto &VRS : Summary.CaseConstraints) { 483 ProgramStateRef NewState = State; 484 for (const auto &VR: VRS) { 485 NewState = VR->apply(NewState, Call, Summary); 486 if (!NewState) 487 break; 488 } 489 490 if (NewState && NewState != State) 491 C.addTransition(NewState); 492 } 493 } 494 495 bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call, 496 CheckerContext &C) const { 497 Optional<Summary> FoundSummary = findFunctionSummary(Call, C); 498 if (!FoundSummary) 499 return false; 500 501 const Summary &Summary = *FoundSummary; 502 switch (Summary.InvalidationKd) { 503 case EvalCallAsPure: { 504 ProgramStateRef State = C.getState(); 505 const LocationContext *LC = C.getLocationContext(); 506 const auto *CE = cast_or_null<CallExpr>(Call.getOriginExpr()); 507 SVal V = C.getSValBuilder().conjureSymbolVal( 508 CE, LC, CE->getType().getCanonicalType(), C.blockCount()); 509 State = State->BindExpr(CE, LC, V); 510 C.addTransition(State); 511 return true; 512 } 513 case NoEvalCall: 514 // Summary tells us to avoid performing eval::Call. The function is possibly 515 // evaluated by another checker, or evaluated conservatively. 516 return false; 517 } 518 llvm_unreachable("Unknown invalidation kind!"); 519 } 520 521 bool StdLibraryFunctionsChecker::Summary::matchesSignature( 522 const FunctionDecl *FD) const { 523 // Check number of arguments: 524 if (FD->param_size() != ArgTys.size()) 525 return false; 526 527 // Check return type if relevant: 528 if (!RetTy.isNull() && RetTy != FD->getReturnType().getCanonicalType()) 529 return false; 530 531 // Check argument types when relevant: 532 for (size_t I = 0, E = ArgTys.size(); I != E; ++I) { 533 QualType FormalT = ArgTys[I]; 534 // Null type marks irrelevant arguments. 535 if (FormalT.isNull()) 536 continue; 537 538 assertTypeSuitableForSummary(FormalT); 539 540 QualType ActualT = FD->getParamDecl(I)->getType().getCanonicalType(); 541 if (ActualT != FormalT) 542 return false; 543 } 544 545 return true; 546 } 547 548 Optional<StdLibraryFunctionsChecker::Summary> 549 StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD, 550 CheckerContext &C) const { 551 if (!FD) 552 return None; 553 554 initFunctionSummaries(C); 555 556 auto FSMI = FunctionSummaryMap.find(FD->getCanonicalDecl()); 557 if (FSMI == FunctionSummaryMap.end()) 558 return None; 559 return FSMI->second; 560 } 561 562 Optional<StdLibraryFunctionsChecker::Summary> 563 StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call, 564 CheckerContext &C) const { 565 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); 566 if (!FD) 567 return None; 568 return findFunctionSummary(FD, C); 569 } 570 571 llvm::Optional<const FunctionDecl *> 572 lookupGlobalCFunction(StringRef Name, const ASTContext &ACtx) { 573 IdentifierInfo &II = ACtx.Idents.get(Name); 574 auto LookupRes = ACtx.getTranslationUnitDecl()->lookup(&II); 575 if (LookupRes.size() == 0) 576 return None; 577 578 assert(LookupRes.size() == 1 && "In C, identifiers should be unique"); 579 Decl *D = LookupRes.front()->getCanonicalDecl(); 580 auto *FD = dyn_cast<FunctionDecl>(D); 581 if (!FD) 582 return None; 583 return FD->getCanonicalDecl(); 584 } 585 586 void StdLibraryFunctionsChecker::initFunctionSummaries( 587 CheckerContext &C) const { 588 if (!FunctionSummaryMap.empty()) 589 return; 590 591 SValBuilder &SVB = C.getSValBuilder(); 592 BasicValueFactory &BVF = SVB.getBasicValueFactory(); 593 const ASTContext &ACtx = BVF.getContext(); 594 595 // These types are useful for writing specifications quickly, 596 // New specifications should probably introduce more types. 597 // Some types are hard to obtain from the AST, eg. "ssize_t". 598 // In such cases it should be possible to provide multiple variants 599 // of function summary for common cases (eg. ssize_t could be int or long 600 // or long long, so three summary variants would be enough). 601 // Of course, function variants are also useful for C++ overloads. 602 const QualType 603 Irrelevant{}; // A placeholder, whenever we do not care about the type. 604 const QualType IntTy = ACtx.IntTy; 605 const QualType LongTy = ACtx.LongTy; 606 const QualType LongLongTy = ACtx.LongLongTy; 607 const QualType SizeTy = ACtx.getSizeType(); 608 const QualType VoidPtrTy = ACtx.VoidPtrTy; // void * 609 const QualType VoidPtrRestrictTy = 610 ACtx.getRestrictType(VoidPtrTy); // void *restrict 611 const QualType ConstVoidPtrTy = 612 ACtx.getPointerType(ACtx.VoidTy.withConst()); // const void * 613 const QualType ConstCharPtrTy = 614 ACtx.getPointerType(ACtx.CharTy.withConst()); // const char * 615 const QualType ConstVoidPtrRestrictTy = 616 ACtx.getRestrictType(ConstVoidPtrTy); // const void *restrict 617 618 const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue(); 619 const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue(); 620 const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue(); 621 622 // Set UCharRangeMax to min of int or uchar maximum value. 623 // The C standard states that the arguments of functions like isalpha must 624 // be representable as an unsigned char. Their type is 'int', so the max 625 // value of the argument should be min(UCharMax, IntMax). This just happen 626 // to be true for commonly used and well tested instruction set 627 // architectures, but not for others. 628 const RangeInt UCharRangeMax = 629 std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax); 630 631 // The platform dependent value of EOF. 632 // Try our best to parse this from the Preprocessor, otherwise fallback to -1. 633 const auto EOFv = [&C]() -> RangeInt { 634 if (const llvm::Optional<int> OptInt = 635 tryExpandAsInteger("EOF", C.getPreprocessor())) 636 return *OptInt; 637 return -1; 638 }(); 639 640 // Auxiliary class to aid adding summaries to the summary map. 641 struct AddToFunctionSummaryMap { 642 const ASTContext &ACtx; 643 FunctionSummaryMapType ⤅ 644 bool DisplayLoadedSummaries; 645 AddToFunctionSummaryMap(const ASTContext &ACtx, FunctionSummaryMapType &FSM, 646 bool DisplayLoadedSummaries) 647 : ACtx(ACtx), Map(FSM), DisplayLoadedSummaries(DisplayLoadedSummaries) { 648 } 649 650 // Add a summary to a FunctionDecl found by lookup. The lookup is performed 651 // by the given Name, and in the global scope. The summary will be attached 652 // to the found FunctionDecl only if the signatures match. 653 void operator()(StringRef Name, const Summary &S) { 654 IdentifierInfo &II = ACtx.Idents.get(Name); 655 auto LookupRes = ACtx.getTranslationUnitDecl()->lookup(&II); 656 if (LookupRes.size() == 0) 657 return; 658 for (Decl *D : LookupRes) { 659 if (auto *FD = dyn_cast<FunctionDecl>(D)) { 660 if (S.matchesSignature(FD)) { 661 auto Res = Map.insert({FD->getCanonicalDecl(), S}); 662 assert(Res.second && "Function already has a summary set!"); 663 (void)Res; 664 if (DisplayLoadedSummaries) { 665 llvm::errs() << "Loaded summary for: "; 666 FD->print(llvm::errs()); 667 llvm::errs() << "\n"; 668 } 669 return; 670 } 671 } 672 } 673 } 674 // Add several summaries for the given name. 675 void operator()(StringRef Name, const std::vector<Summary> &Summaries) { 676 for (const Summary &S : Summaries) 677 operator()(Name, S); 678 } 679 } addToFunctionSummaryMap(ACtx, FunctionSummaryMap, DisplayLoadedSummaries); 680 681 // We are finally ready to define specifications for all supported functions. 682 // 683 // The signature needs to have the correct number of arguments. 684 // However, we insert `Irrelevant' when the type is insignificant. 685 // 686 // Argument ranges should always cover all variants. If return value 687 // is completely unknown, omit it from the respective range set. 688 // 689 // All types in the spec need to be canonical. 690 // 691 // Every item in the list of range sets represents a particular 692 // execution path the analyzer would need to explore once 693 // the call is modeled - a new program state is constructed 694 // for every range set, and each range line in the range set 695 // corresponds to a specific constraint within this state. 696 // 697 // Upon comparing to another argument, the other argument is casted 698 // to the current argument's type. This avoids proper promotion but 699 // seems useful. For example, read() receives size_t argument, 700 // and its return value, which is of type ssize_t, cannot be greater 701 // than this argument. If we made a promotion, and the size argument 702 // is equal to, say, 10, then we'd impose a range of [0, 10] on the 703 // return value, however the correct range is [-1, 10]. 704 // 705 // Please update the list of functions in the header after editing! 706 707 // Below are helpers functions to create the summaries. 708 auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind, 709 IntRangeVector Ranges) { 710 return std::make_shared<RangeConstraint>(ArgN, Kind, Ranges); 711 }; 712 struct { 713 auto operator()(RangeKind Kind, IntRangeVector Ranges) { 714 return std::make_shared<RangeConstraint>(Ret, Kind, Ranges); 715 } 716 auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) { 717 return std::make_shared<ComparisonConstraint>(Ret, Op, OtherArgN); 718 } 719 } ReturnValueCondition; 720 auto Range = [](RangeInt b, RangeInt e) { 721 return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}}; 722 }; 723 auto SingleValue = [](RangeInt v) { 724 return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}}; 725 }; 726 auto LessThanOrEq = BO_LE; 727 auto NotNull = [&](ArgNo ArgN) { 728 return std::make_shared<NotNullConstraint>(ArgN); 729 }; 730 731 using RetType = QualType; 732 // Templates for summaries that are reused by many functions. 733 auto Getc = [&]() { 734 return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall) 735 .Case({ReturnValueCondition(WithinRange, 736 {{EOFv, EOFv}, {0, UCharRangeMax}})}); 737 }; 738 auto Read = [&](RetType R, RangeInt Max) { 739 return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R}, 740 NoEvalCall) 741 .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)), 742 ReturnValueCondition(WithinRange, Range(-1, Max))}); 743 }; 744 auto Fread = [&]() { 745 return Summary(ArgTypes{VoidPtrRestrictTy, Irrelevant, SizeTy, Irrelevant}, 746 RetType{SizeTy}, NoEvalCall) 747 .Case({ 748 ReturnValueCondition(LessThanOrEq, ArgNo(2)), 749 }) 750 .ArgConstraint(NotNull(ArgNo(0))); 751 }; 752 auto Fwrite = [&]() { 753 return Summary( 754 ArgTypes{ConstVoidPtrRestrictTy, Irrelevant, SizeTy, Irrelevant}, 755 RetType{SizeTy}, NoEvalCall) 756 .Case({ 757 ReturnValueCondition(LessThanOrEq, ArgNo(2)), 758 }) 759 .ArgConstraint(NotNull(ArgNo(0))); 760 }; 761 auto Getline = [&](RetType R, RangeInt Max) { 762 return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R}, 763 NoEvalCall) 764 .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})}); 765 }; 766 767 // The isascii() family of functions. 768 // The behavior is undefined if the value of the argument is not 769 // representable as unsigned char or is not equal to EOF. See e.g. C99 770 // 7.4.1.2 The isalpha function (p: 181-182). 771 addToFunctionSummaryMap( 772 "isalnum", 773 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 774 // Boils down to isupper() or islower() or isdigit(). 775 .Case({ArgumentCondition(0U, WithinRange, 776 {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}), 777 ReturnValueCondition(OutOfRange, SingleValue(0))}) 778 // The locale-specific range. 779 // No post-condition. We are completely unaware of 780 // locale-specific return values. 781 .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) 782 .Case( 783 {ArgumentCondition( 784 0U, OutOfRange, 785 {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}), 786 ReturnValueCondition(WithinRange, SingleValue(0))}) 787 .ArgConstraint(ArgumentCondition( 788 0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))); 789 addToFunctionSummaryMap( 790 "isalpha", 791 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 792 .Case({ArgumentCondition(0U, WithinRange, {{'A', 'Z'}, {'a', 'z'}}), 793 ReturnValueCondition(OutOfRange, SingleValue(0))}) 794 // The locale-specific range. 795 .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) 796 .Case({ArgumentCondition( 797 0U, OutOfRange, 798 {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}), 799 ReturnValueCondition(WithinRange, SingleValue(0))})); 800 addToFunctionSummaryMap( 801 "isascii", 802 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 803 .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), 804 ReturnValueCondition(OutOfRange, SingleValue(0))}) 805 .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)), 806 ReturnValueCondition(WithinRange, SingleValue(0))})); 807 addToFunctionSummaryMap( 808 "isblank", 809 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 810 .Case({ArgumentCondition(0U, WithinRange, {{'\t', '\t'}, {' ', ' '}}), 811 ReturnValueCondition(OutOfRange, SingleValue(0))}) 812 .Case({ArgumentCondition(0U, OutOfRange, {{'\t', '\t'}, {' ', ' '}}), 813 ReturnValueCondition(WithinRange, SingleValue(0))})); 814 addToFunctionSummaryMap( 815 "iscntrl", 816 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 817 .Case({ArgumentCondition(0U, WithinRange, {{0, 32}, {127, 127}}), 818 ReturnValueCondition(OutOfRange, SingleValue(0))}) 819 .Case({ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}), 820 ReturnValueCondition(WithinRange, SingleValue(0))})); 821 addToFunctionSummaryMap( 822 "isdigit", 823 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 824 .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')), 825 ReturnValueCondition(OutOfRange, SingleValue(0))}) 826 .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')), 827 ReturnValueCondition(WithinRange, SingleValue(0))})); 828 addToFunctionSummaryMap( 829 "isgraph", 830 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 831 .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)), 832 ReturnValueCondition(OutOfRange, SingleValue(0))}) 833 .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)), 834 ReturnValueCondition(WithinRange, SingleValue(0))})); 835 addToFunctionSummaryMap( 836 "islower", 837 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 838 // Is certainly lowercase. 839 .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')), 840 ReturnValueCondition(OutOfRange, SingleValue(0))}) 841 // Is ascii but not lowercase. 842 .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), 843 ArgumentCondition(0U, OutOfRange, Range('a', 'z')), 844 ReturnValueCondition(WithinRange, SingleValue(0))}) 845 // The locale-specific range. 846 .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) 847 // Is not an unsigned char. 848 .Case({ArgumentCondition(0U, OutOfRange, Range(0, UCharRangeMax)), 849 ReturnValueCondition(WithinRange, SingleValue(0))})); 850 addToFunctionSummaryMap( 851 "isprint", 852 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 853 .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)), 854 ReturnValueCondition(OutOfRange, SingleValue(0))}) 855 .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)), 856 ReturnValueCondition(WithinRange, SingleValue(0))})); 857 addToFunctionSummaryMap( 858 "ispunct", 859 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 860 .Case({ArgumentCondition( 861 0U, WithinRange, 862 {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), 863 ReturnValueCondition(OutOfRange, SingleValue(0))}) 864 .Case({ArgumentCondition( 865 0U, OutOfRange, 866 {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), 867 ReturnValueCondition(WithinRange, SingleValue(0))})); 868 addToFunctionSummaryMap( 869 "isspace", 870 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 871 // Space, '\f', '\n', '\r', '\t', '\v'. 872 .Case({ArgumentCondition(0U, WithinRange, {{9, 13}, {' ', ' '}}), 873 ReturnValueCondition(OutOfRange, SingleValue(0))}) 874 // The locale-specific range. 875 .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) 876 .Case({ArgumentCondition(0U, OutOfRange, 877 {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}), 878 ReturnValueCondition(WithinRange, SingleValue(0))})); 879 addToFunctionSummaryMap( 880 "isupper", 881 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 882 // Is certainly uppercase. 883 .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')), 884 ReturnValueCondition(OutOfRange, SingleValue(0))}) 885 // The locale-specific range. 886 .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) 887 // Other. 888 .Case({ArgumentCondition(0U, OutOfRange, 889 {{'A', 'Z'}, {128, UCharRangeMax}}), 890 ReturnValueCondition(WithinRange, SingleValue(0))})); 891 addToFunctionSummaryMap( 892 "isxdigit", 893 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 894 .Case({ArgumentCondition(0U, WithinRange, 895 {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), 896 ReturnValueCondition(OutOfRange, SingleValue(0))}) 897 .Case({ArgumentCondition(0U, OutOfRange, 898 {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), 899 ReturnValueCondition(WithinRange, SingleValue(0))})); 900 901 // The getc() family of functions that returns either a char or an EOF. 902 addToFunctionSummaryMap("getc", Getc()); 903 addToFunctionSummaryMap("fgetc", Getc()); 904 addToFunctionSummaryMap( 905 "getchar", Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall) 906 .Case({ReturnValueCondition( 907 WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})); 908 909 // read()-like functions that never return more than buffer size. 910 // We are not sure how ssize_t is defined on every platform, so we 911 // provide three variants that should cover common cases. 912 addToFunctionSummaryMap("read", {Read(IntTy, IntMax), Read(LongTy, LongMax), 913 Read(LongLongTy, LongLongMax)}); 914 addToFunctionSummaryMap("write", {Read(IntTy, IntMax), Read(LongTy, LongMax), 915 Read(LongLongTy, LongLongMax)}); 916 addToFunctionSummaryMap("fread", Fread()); 917 addToFunctionSummaryMap("fwrite", Fwrite()); 918 // getline()-like functions either fail or read at least the delimiter. 919 addToFunctionSummaryMap("getline", 920 {Getline(IntTy, IntMax), Getline(LongTy, LongMax), 921 Getline(LongLongTy, LongLongMax)}); 922 addToFunctionSummaryMap("getdelim", 923 {Getline(IntTy, IntMax), Getline(LongTy, LongMax), 924 Getline(LongLongTy, LongLongMax)}); 925 926 // Functions for testing. 927 if (ChecksEnabled[CK_StdCLibraryFunctionsTesterChecker]) { 928 addToFunctionSummaryMap( 929 "__two_constrained_args", 930 Summary(ArgTypes{IntTy, IntTy}, RetType{IntTy}, EvalCallAsPure) 931 .ArgConstraint(ArgumentCondition(0U, WithinRange, SingleValue(1))) 932 .ArgConstraint(ArgumentCondition(1U, WithinRange, SingleValue(1)))); 933 addToFunctionSummaryMap( 934 "__arg_constrained_twice", 935 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 936 .ArgConstraint(ArgumentCondition(0U, OutOfRange, SingleValue(1))) 937 .ArgConstraint(ArgumentCondition(0U, OutOfRange, SingleValue(2)))); 938 addToFunctionSummaryMap( 939 "__defaultparam", 940 Summary(ArgTypes{Irrelevant, IntTy}, RetType{IntTy}, EvalCallAsPure) 941 .ArgConstraint(NotNull(ArgNo(0)))); 942 addToFunctionSummaryMap("__variadic", 943 Summary(ArgTypes{VoidPtrTy, ConstCharPtrTy}, 944 RetType{IntTy}, EvalCallAsPure) 945 .ArgConstraint(NotNull(ArgNo(0))) 946 .ArgConstraint(NotNull(ArgNo(1)))); 947 } 948 } 949 950 void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) { 951 auto *Checker = mgr.registerChecker<StdLibraryFunctionsChecker>(); 952 Checker->DisplayLoadedSummaries = 953 mgr.getAnalyzerOptions().getCheckerBooleanOption( 954 Checker, "DisplayLoadedSummaries"); 955 } 956 957 bool ento::shouldRegisterStdCLibraryFunctionsChecker(const CheckerManager &mgr) { 958 return true; 959 } 960 961 #define REGISTER_CHECKER(name) \ 962 void ento::register##name(CheckerManager &mgr) { \ 963 StdLibraryFunctionsChecker *checker = \ 964 mgr.getChecker<StdLibraryFunctionsChecker>(); \ 965 checker->ChecksEnabled[StdLibraryFunctionsChecker::CK_##name] = true; \ 966 checker->CheckNames[StdLibraryFunctionsChecker::CK_##name] = \ 967 mgr.getCurrentCheckerName(); \ 968 } \ 969 \ 970 bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; } 971 972 REGISTER_CHECKER(StdCLibraryFunctionArgsChecker) 973 REGISTER_CHECKER(StdCLibraryFunctionsTesterChecker) 974