1 //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker improves modeling of a few simple library functions. 10 // 11 // This checker provides a specification format - `Summary' - and 12 // contains descriptions of some library functions in this format. Each 13 // specification contains a list of branches for splitting the program state 14 // upon call, and range constraints on argument and return-value symbols that 15 // are satisfied on each branch. This spec can be expanded to include more 16 // items, like external effects of the function. 17 // 18 // The main difference between this approach and the body farms technique is 19 // in more explicit control over how many branches are produced. For example, 20 // consider standard C function `ispunct(int x)', which returns a non-zero value 21 // iff `x' is a punctuation character, that is, when `x' is in range 22 // ['!', '/'] [':', '@'] U ['[', '\`'] U ['{', '~']. 23 // `Summary' provides only two branches for this function. However, 24 // any attempt to describe this range with if-statements in the body farm 25 // would result in many more branches. Because each branch needs to be analyzed 26 // independently, this significantly reduces performance. Additionally, 27 // once we consider a branch on which `x' is in range, say, ['!', '/'], 28 // we assume that such branch is an important separate path through the program, 29 // which may lead to false positives because considering this particular path 30 // was not consciously intended, and therefore it might have been unreachable. 31 // 32 // This checker uses eval::Call for modeling pure functions (functions without 33 // side effets), for which their `Summary' is a precise model. This avoids 34 // unnecessary invalidation passes. Conflicts with other checkers are unlikely 35 // because if the function has no other effects, other checkers would probably 36 // never want to improve upon the modeling done by this checker. 37 // 38 // Non-pure functions, for which only partial improvement over the default 39 // behavior is expected, are modeled via check::PostCall, non-intrusively. 40 // 41 // The following standard C functions are currently supported: 42 // 43 // fgetc getline isdigit isupper 44 // fread isalnum isgraph isxdigit 45 // fwrite isalpha islower read 46 // getc isascii isprint write 47 // getchar isblank ispunct 48 // getdelim iscntrl isspace 49 // 50 //===----------------------------------------------------------------------===// 51 52 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 53 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 54 #include "clang/StaticAnalyzer/Core/Checker.h" 55 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 56 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 57 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 58 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" 59 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" 60 61 using namespace clang; 62 using namespace clang::ento; 63 64 namespace { 65 class StdLibraryFunctionsChecker 66 : public Checker<check::PreCall, check::PostCall, eval::Call> { 67 /// Below is a series of typedefs necessary to define function specs. 68 /// We avoid nesting types here because each additional qualifier 69 /// would need to be repeated in every function spec. 70 struct Summary; 71 72 /// Specify how much the analyzer engine should entrust modeling this function 73 /// to us. If he doesn't, he performs additional invalidations. 74 enum InvalidationKind { NoEvalCall, EvalCallAsPure }; 75 76 // The universal integral type to use in value range descriptions. 77 // Unsigned to make sure overflows are well-defined. 78 typedef uint64_t RangeInt; 79 80 /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is 81 /// a non-negative integer, which less than 5 and not equal to 2. For 82 /// `ComparesToArgument', holds information about how exactly to compare to 83 /// the argument. 84 typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector; 85 86 /// A reference to an argument or return value by its number. 87 /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but 88 /// obviously uint32_t should be enough for all practical purposes. 89 typedef uint32_t ArgNo; 90 static const ArgNo Ret; 91 92 class ValueConstraint; 93 94 // Pointer to the ValueConstraint. We need a copyable, polymorphic and 95 // default initialize able type (vector needs that). A raw pointer was good, 96 // however, we cannot default initialize that. unique_ptr makes the Summary 97 // class non-copyable, therefore not an option. Releasing the copyability 98 // requirement would render the initialization of the Summary map infeasible. 99 using ValueConstraintPtr = std::shared_ptr<ValueConstraint>; 100 101 /// Polymorphic base class that represents a constraint on a given argument 102 /// (or return value) of a function. Derived classes implement different kind 103 /// of constraints, e.g range constraints or correlation between two 104 /// arguments. 105 class ValueConstraint { 106 public: 107 ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {} 108 virtual ~ValueConstraint() {} 109 /// Apply the effects of the constraint on the given program state. If null 110 /// is returned then the constraint is not feasible. 111 virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 112 const Summary &Summary, 113 CheckerContext &C) const = 0; 114 virtual ValueConstraintPtr negate() const { 115 llvm_unreachable("Not implemented"); 116 }; 117 ArgNo getArgNo() const { return ArgN; } 118 119 protected: 120 ArgNo ArgN; // Argument to which we apply the constraint. 121 }; 122 123 /// Given a range, should the argument stay inside or outside this range? 124 enum RangeKind { OutOfRange, WithinRange }; 125 126 /// Encapsulates a single range on a single symbol within a branch. 127 class RangeConstraint : public ValueConstraint { 128 RangeKind Kind; // Kind of range definition. 129 IntRangeVector Args; // Polymorphic arguments. 130 131 public: 132 RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args) 133 : ValueConstraint(ArgN), Kind(Kind), Args(Args) {} 134 135 const IntRangeVector &getRanges() const { 136 return Args; 137 } 138 139 private: 140 ProgramStateRef applyAsOutOfRange(ProgramStateRef State, 141 const CallEvent &Call, 142 const Summary &Summary) const; 143 ProgramStateRef applyAsWithinRange(ProgramStateRef State, 144 const CallEvent &Call, 145 const Summary &Summary) const; 146 public: 147 ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 148 const Summary &Summary, 149 CheckerContext &C) const override { 150 switch (Kind) { 151 case OutOfRange: 152 return applyAsOutOfRange(State, Call, Summary); 153 case WithinRange: 154 return applyAsWithinRange(State, Call, Summary); 155 } 156 llvm_unreachable("Unknown range kind!"); 157 } 158 159 ValueConstraintPtr negate() const override { 160 RangeConstraint Tmp(*this); 161 switch (Kind) { 162 case OutOfRange: 163 Tmp.Kind = WithinRange; 164 break; 165 case WithinRange: 166 Tmp.Kind = OutOfRange; 167 break; 168 } 169 return std::make_shared<RangeConstraint>(Tmp); 170 } 171 }; 172 173 class ComparisonConstraint : public ValueConstraint { 174 BinaryOperator::Opcode Opcode; 175 ArgNo OtherArgN; 176 177 public: 178 ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode, 179 ArgNo OtherArgN) 180 : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {} 181 ArgNo getOtherArgNo() const { return OtherArgN; } 182 BinaryOperator::Opcode getOpcode() const { return Opcode; } 183 ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 184 const Summary &Summary, 185 CheckerContext &C) const override; 186 }; 187 188 class NotNullConstraint : public ValueConstraint { 189 using ValueConstraint::ValueConstraint; 190 // This variable has a role when we negate the constraint. 191 bool CannotBeNull = true; 192 193 public: 194 ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 195 const Summary &Summary, 196 CheckerContext &C) const override { 197 SVal V = getArgSVal(Call, getArgNo()); 198 if (V.isUndef()) 199 return State; 200 201 DefinedOrUnknownSVal L = V.castAs<DefinedOrUnknownSVal>(); 202 if (!L.getAs<Loc>()) 203 return State; 204 205 return State->assume(L, CannotBeNull); 206 } 207 208 ValueConstraintPtr negate() const override { 209 NotNullConstraint Tmp(*this); 210 Tmp.CannotBeNull = !this->CannotBeNull; 211 return std::make_shared<NotNullConstraint>(Tmp); 212 } 213 }; 214 215 // Represents a buffer argument with an additional size argument. 216 // E.g. the first two arguments here: 217 // ctime_s(char *buffer, rsize_t bufsz, const time_t *time); 218 // Another example: 219 // size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); 220 // // Here, ptr is the buffer, and its minimum size is `size * nmemb`. 221 class BufferSizeConstraint : public ValueConstraint { 222 // The argument which holds the size of the buffer. 223 ArgNo SizeArgN; 224 // The argument which is a multiplier to size. This is set in case of 225 // `fread` like functions where the size is computed as a multiplication of 226 // two arguments. 227 llvm::Optional<ArgNo> SizeMultiplierArgN; 228 // The operator we use in apply. This is negated in negate(). 229 BinaryOperator::Opcode Op = BO_LE; 230 231 public: 232 BufferSizeConstraint(ArgNo Buffer, ArgNo BufSize) 233 : ValueConstraint(Buffer), SizeArgN(BufSize) {} 234 235 BufferSizeConstraint(ArgNo Buffer, ArgNo BufSize, ArgNo BufSizeMultiplier) 236 : ValueConstraint(Buffer), SizeArgN(BufSize), 237 SizeMultiplierArgN(BufSizeMultiplier) {} 238 239 ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 240 const Summary &Summary, 241 CheckerContext &C) const override { 242 SValBuilder &SvalBuilder = C.getSValBuilder(); 243 // The buffer argument. 244 SVal BufV = getArgSVal(Call, getArgNo()); 245 // The size argument. 246 SVal SizeV = getArgSVal(Call, SizeArgN); 247 // Multiply with another argument if given. 248 if (SizeMultiplierArgN) { 249 SVal SizeMulV = getArgSVal(Call, *SizeMultiplierArgN); 250 SizeV = SvalBuilder.evalBinOp(State, BO_Mul, SizeV, SizeMulV, 251 Summary.getArgType(SizeArgN)); 252 } 253 // The dynamic size of the buffer argument, got from the analyzer engine. 254 SVal BufDynSize = getDynamicSizeWithOffset(State, BufV); 255 256 SVal Feasible = SvalBuilder.evalBinOp(State, Op, SizeV, BufDynSize, 257 SvalBuilder.getContext().BoolTy); 258 if (auto F = Feasible.getAs<DefinedOrUnknownSVal>()) 259 return State->assume(*F, true); 260 261 // We can get here only if the size argument or the dynamic size is 262 // undefined. But the dynamic size should never be undefined, only 263 // unknown. So, here, the size of the argument is undefined, i.e. we 264 // cannot apply the constraint. Actually, other checkers like 265 // CallAndMessage should catch this situation earlier, because we call a 266 // function with an uninitialized argument. 267 llvm_unreachable("Size argument or the dynamic size is Undefined"); 268 } 269 270 ValueConstraintPtr negate() const override { 271 BufferSizeConstraint Tmp(*this); 272 Tmp.Op = BinaryOperator::negateComparisonOp(Op); 273 return std::make_shared<BufferSizeConstraint>(Tmp); 274 } 275 }; 276 277 /// The complete list of constraints that defines a single branch. 278 typedef std::vector<ValueConstraintPtr> ConstraintSet; 279 280 using ArgTypes = std::vector<QualType>; 281 using Cases = std::vector<ConstraintSet>; 282 283 /// Includes information about 284 /// * function prototype (which is necessary to 285 /// ensure we're modeling the right function and casting values properly), 286 /// * approach to invalidation, 287 /// * a list of branches - a list of list of ranges - 288 /// A branch represents a path in the exploded graph of a function (which 289 /// is a tree). So, a branch is a series of assumptions. In other words, 290 /// branches represent split states and additional assumptions on top of 291 /// the splitting assumption. 292 /// For example, consider the branches in `isalpha(x)` 293 /// Branch 1) 294 /// x is in range ['A', 'Z'] or in ['a', 'z'] 295 /// then the return value is not 0. (I.e. out-of-range [0, 0]) 296 /// Branch 2) 297 /// x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z'] 298 /// then the return value is 0. 299 /// * a list of argument constraints, that must be true on every branch. 300 /// If these constraints are not satisfied that means a fatal error 301 /// usually resulting in undefined behaviour. 302 struct Summary { 303 const ArgTypes ArgTys; 304 const QualType RetTy; 305 const InvalidationKind InvalidationKd; 306 Cases CaseConstraints; 307 ConstraintSet ArgConstraints; 308 309 Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd) 310 : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {} 311 312 Summary &Case(ConstraintSet&& CS) { 313 CaseConstraints.push_back(std::move(CS)); 314 return *this; 315 } 316 Summary &ArgConstraint(ValueConstraintPtr VC) { 317 ArgConstraints.push_back(VC); 318 return *this; 319 } 320 321 private: 322 static void assertTypeSuitableForSummary(QualType T) { 323 assert(!T->isVoidType() && 324 "We should have had no significant void types in the spec"); 325 assert(T.isCanonical() && 326 "We should only have canonical types in the spec"); 327 } 328 329 public: 330 QualType getArgType(ArgNo ArgN) const { 331 QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN]; 332 assertTypeSuitableForSummary(T); 333 return T; 334 } 335 336 /// Try our best to figure out if the summary's signature matches 337 /// *the* library function to which this specification applies. 338 bool matchesSignature(const FunctionDecl *FD) const; 339 }; 340 341 // The map of all functions supported by the checker. It is initialized 342 // lazily, and it doesn't change after initialization. 343 using FunctionSummaryMapType = llvm::DenseMap<const FunctionDecl *, Summary>; 344 mutable FunctionSummaryMapType FunctionSummaryMap; 345 346 mutable std::unique_ptr<BugType> BT_InvalidArg; 347 348 // Auxiliary functions to support ArgNo within all structures 349 // in a unified manner. 350 static QualType getArgType(const Summary &Summary, ArgNo ArgN) { 351 return Summary.getArgType(ArgN); 352 } 353 static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) { 354 return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN); 355 } 356 357 public: 358 void checkPreCall(const CallEvent &Call, CheckerContext &C) const; 359 void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 360 bool evalCall(const CallEvent &Call, CheckerContext &C) const; 361 362 enum CheckKind { 363 CK_StdCLibraryFunctionArgsChecker, 364 CK_StdCLibraryFunctionsTesterChecker, 365 CK_NumCheckKinds 366 }; 367 DefaultBool ChecksEnabled[CK_NumCheckKinds]; 368 CheckerNameRef CheckNames[CK_NumCheckKinds]; 369 370 bool DisplayLoadedSummaries = false; 371 372 private: 373 Optional<Summary> findFunctionSummary(const FunctionDecl *FD, 374 CheckerContext &C) const; 375 Optional<Summary> findFunctionSummary(const CallEvent &Call, 376 CheckerContext &C) const; 377 378 void initFunctionSummaries(CheckerContext &C) const; 379 380 void reportBug(const CallEvent &Call, ExplodedNode *N, 381 CheckerContext &C) const { 382 if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker]) 383 return; 384 // TODO Add detailed diagnostic. 385 StringRef Msg = "Function argument constraint is not satisfied"; 386 if (!BT_InvalidArg) 387 BT_InvalidArg = std::make_unique<BugType>( 388 CheckNames[CK_StdCLibraryFunctionArgsChecker], 389 "Unsatisfied argument constraints", categories::LogicError); 390 auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N); 391 bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R); 392 C.emitReport(std::move(R)); 393 } 394 }; 395 396 const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret = 397 std::numeric_limits<ArgNo>::max(); 398 399 } // end of anonymous namespace 400 401 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange( 402 ProgramStateRef State, const CallEvent &Call, 403 const Summary &Summary) const { 404 405 ProgramStateManager &Mgr = State->getStateManager(); 406 SValBuilder &SVB = Mgr.getSValBuilder(); 407 BasicValueFactory &BVF = SVB.getBasicValueFactory(); 408 ConstraintManager &CM = Mgr.getConstraintManager(); 409 QualType T = getArgType(Summary, getArgNo()); 410 SVal V = getArgSVal(Call, getArgNo()); 411 412 if (auto N = V.getAs<NonLoc>()) { 413 const IntRangeVector &R = getRanges(); 414 size_t E = R.size(); 415 for (size_t I = 0; I != E; ++I) { 416 const llvm::APSInt &Min = BVF.getValue(R[I].first, T); 417 const llvm::APSInt &Max = BVF.getValue(R[I].second, T); 418 assert(Min <= Max); 419 State = CM.assumeInclusiveRange(State, *N, Min, Max, false); 420 if (!State) 421 break; 422 } 423 } 424 425 return State; 426 } 427 428 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange( 429 ProgramStateRef State, const CallEvent &Call, 430 const Summary &Summary) const { 431 432 ProgramStateManager &Mgr = State->getStateManager(); 433 SValBuilder &SVB = Mgr.getSValBuilder(); 434 BasicValueFactory &BVF = SVB.getBasicValueFactory(); 435 ConstraintManager &CM = Mgr.getConstraintManager(); 436 QualType T = getArgType(Summary, getArgNo()); 437 SVal V = getArgSVal(Call, getArgNo()); 438 439 // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R". 440 // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary, 441 // and then cut away all holes in R one by one. 442 // 443 // E.g. consider a range list R as [A, B] and [C, D] 444 // -------+--------+------------------+------------+-----------> 445 // A B C D 446 // Then we assume that the value is not in [-inf, A - 1], 447 // then not in [D + 1, +inf], then not in [B + 1, C - 1] 448 if (auto N = V.getAs<NonLoc>()) { 449 const IntRangeVector &R = getRanges(); 450 size_t E = R.size(); 451 452 const llvm::APSInt &MinusInf = BVF.getMinValue(T); 453 const llvm::APSInt &PlusInf = BVF.getMaxValue(T); 454 455 const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T); 456 if (Left != PlusInf) { 457 assert(MinusInf <= Left); 458 State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false); 459 if (!State) 460 return nullptr; 461 } 462 463 const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T); 464 if (Right != MinusInf) { 465 assert(Right <= PlusInf); 466 State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false); 467 if (!State) 468 return nullptr; 469 } 470 471 for (size_t I = 1; I != E; ++I) { 472 const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T); 473 const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T); 474 if (Min <= Max) { 475 State = CM.assumeInclusiveRange(State, *N, Min, Max, false); 476 if (!State) 477 return nullptr; 478 } 479 } 480 } 481 482 return State; 483 } 484 485 ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply( 486 ProgramStateRef State, const CallEvent &Call, const Summary &Summary, 487 CheckerContext &C) const { 488 489 ProgramStateManager &Mgr = State->getStateManager(); 490 SValBuilder &SVB = Mgr.getSValBuilder(); 491 QualType CondT = SVB.getConditionType(); 492 QualType T = getArgType(Summary, getArgNo()); 493 SVal V = getArgSVal(Call, getArgNo()); 494 495 BinaryOperator::Opcode Op = getOpcode(); 496 ArgNo OtherArg = getOtherArgNo(); 497 SVal OtherV = getArgSVal(Call, OtherArg); 498 QualType OtherT = getArgType(Summary, OtherArg); 499 // Note: we avoid integral promotion for comparison. 500 OtherV = SVB.evalCast(OtherV, T, OtherT); 501 if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT) 502 .getAs<DefinedOrUnknownSVal>()) 503 State = State->assume(*CompV, true); 504 return State; 505 } 506 507 void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call, 508 CheckerContext &C) const { 509 Optional<Summary> FoundSummary = findFunctionSummary(Call, C); 510 if (!FoundSummary) 511 return; 512 513 const Summary &Summary = *FoundSummary; 514 ProgramStateRef State = C.getState(); 515 516 ProgramStateRef NewState = State; 517 for (const ValueConstraintPtr& VC : Summary.ArgConstraints) { 518 ProgramStateRef SuccessSt = VC->apply(NewState, Call, Summary, C); 519 ProgramStateRef FailureSt = VC->negate()->apply(NewState, Call, Summary, C); 520 // The argument constraint is not satisfied. 521 if (FailureSt && !SuccessSt) { 522 if (ExplodedNode *N = C.generateErrorNode(NewState)) 523 reportBug(Call, N, C); 524 break; 525 } else { 526 // We will apply the constraint even if we cannot reason about the 527 // argument. This means both SuccessSt and FailureSt can be true. If we 528 // weren't applying the constraint that would mean that symbolic 529 // execution continues on a code whose behaviour is undefined. 530 assert(SuccessSt); 531 NewState = SuccessSt; 532 } 533 } 534 if (NewState && NewState != State) 535 C.addTransition(NewState); 536 } 537 538 void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call, 539 CheckerContext &C) const { 540 Optional<Summary> FoundSummary = findFunctionSummary(Call, C); 541 if (!FoundSummary) 542 return; 543 544 // Now apply the constraints. 545 const Summary &Summary = *FoundSummary; 546 ProgramStateRef State = C.getState(); 547 548 // Apply case/branch specifications. 549 for (const auto &VRS : Summary.CaseConstraints) { 550 ProgramStateRef NewState = State; 551 for (const auto &VR: VRS) { 552 NewState = VR->apply(NewState, Call, Summary, C); 553 if (!NewState) 554 break; 555 } 556 557 if (NewState && NewState != State) 558 C.addTransition(NewState); 559 } 560 } 561 562 bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call, 563 CheckerContext &C) const { 564 Optional<Summary> FoundSummary = findFunctionSummary(Call, C); 565 if (!FoundSummary) 566 return false; 567 568 const Summary &Summary = *FoundSummary; 569 switch (Summary.InvalidationKd) { 570 case EvalCallAsPure: { 571 ProgramStateRef State = C.getState(); 572 const LocationContext *LC = C.getLocationContext(); 573 const auto *CE = cast_or_null<CallExpr>(Call.getOriginExpr()); 574 SVal V = C.getSValBuilder().conjureSymbolVal( 575 CE, LC, CE->getType().getCanonicalType(), C.blockCount()); 576 State = State->BindExpr(CE, LC, V); 577 C.addTransition(State); 578 return true; 579 } 580 case NoEvalCall: 581 // Summary tells us to avoid performing eval::Call. The function is possibly 582 // evaluated by another checker, or evaluated conservatively. 583 return false; 584 } 585 llvm_unreachable("Unknown invalidation kind!"); 586 } 587 588 bool StdLibraryFunctionsChecker::Summary::matchesSignature( 589 const FunctionDecl *FD) const { 590 // Check number of arguments: 591 if (FD->param_size() != ArgTys.size()) 592 return false; 593 594 // Check return type if relevant: 595 if (!RetTy.isNull() && RetTy != FD->getReturnType().getCanonicalType()) 596 return false; 597 598 // Check argument types when relevant: 599 for (size_t I = 0, E = ArgTys.size(); I != E; ++I) { 600 QualType FormalT = ArgTys[I]; 601 // Null type marks irrelevant arguments. 602 if (FormalT.isNull()) 603 continue; 604 605 assertTypeSuitableForSummary(FormalT); 606 607 QualType ActualT = FD->getParamDecl(I)->getType().getCanonicalType(); 608 if (ActualT != FormalT) 609 return false; 610 } 611 612 return true; 613 } 614 615 Optional<StdLibraryFunctionsChecker::Summary> 616 StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD, 617 CheckerContext &C) const { 618 if (!FD) 619 return None; 620 621 initFunctionSummaries(C); 622 623 auto FSMI = FunctionSummaryMap.find(FD->getCanonicalDecl()); 624 if (FSMI == FunctionSummaryMap.end()) 625 return None; 626 return FSMI->second; 627 } 628 629 Optional<StdLibraryFunctionsChecker::Summary> 630 StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call, 631 CheckerContext &C) const { 632 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); 633 if (!FD) 634 return None; 635 return findFunctionSummary(FD, C); 636 } 637 638 void StdLibraryFunctionsChecker::initFunctionSummaries( 639 CheckerContext &C) const { 640 if (!FunctionSummaryMap.empty()) 641 return; 642 643 SValBuilder &SVB = C.getSValBuilder(); 644 BasicValueFactory &BVF = SVB.getBasicValueFactory(); 645 const ASTContext &ACtx = BVF.getContext(); 646 647 // These types are useful for writing specifications quickly, 648 // New specifications should probably introduce more types. 649 // Some types are hard to obtain from the AST, eg. "ssize_t". 650 // In such cases it should be possible to provide multiple variants 651 // of function summary for common cases (eg. ssize_t could be int or long 652 // or long long, so three summary variants would be enough). 653 // Of course, function variants are also useful for C++ overloads. 654 const QualType 655 Irrelevant{}; // A placeholder, whenever we do not care about the type. 656 const QualType IntTy = ACtx.IntTy; 657 const QualType LongTy = ACtx.LongTy; 658 const QualType LongLongTy = ACtx.LongLongTy; 659 const QualType SizeTy = ACtx.getSizeType(); 660 const QualType VoidPtrTy = ACtx.VoidPtrTy; // void * 661 const QualType VoidPtrRestrictTy = 662 ACtx.getRestrictType(VoidPtrTy); // void *restrict 663 const QualType ConstVoidPtrTy = 664 ACtx.getPointerType(ACtx.VoidTy.withConst()); // const void * 665 const QualType ConstCharPtrTy = 666 ACtx.getPointerType(ACtx.CharTy.withConst()); // const char * 667 const QualType ConstVoidPtrRestrictTy = 668 ACtx.getRestrictType(ConstVoidPtrTy); // const void *restrict 669 670 const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue(); 671 const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue(); 672 const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue(); 673 674 // Set UCharRangeMax to min of int or uchar maximum value. 675 // The C standard states that the arguments of functions like isalpha must 676 // be representable as an unsigned char. Their type is 'int', so the max 677 // value of the argument should be min(UCharMax, IntMax). This just happen 678 // to be true for commonly used and well tested instruction set 679 // architectures, but not for others. 680 const RangeInt UCharRangeMax = 681 std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax); 682 683 // The platform dependent value of EOF. 684 // Try our best to parse this from the Preprocessor, otherwise fallback to -1. 685 const auto EOFv = [&C]() -> RangeInt { 686 if (const llvm::Optional<int> OptInt = 687 tryExpandAsInteger("EOF", C.getPreprocessor())) 688 return *OptInt; 689 return -1; 690 }(); 691 692 // Auxiliary class to aid adding summaries to the summary map. 693 struct AddToFunctionSummaryMap { 694 const ASTContext &ACtx; 695 FunctionSummaryMapType ⤅ 696 bool DisplayLoadedSummaries; 697 AddToFunctionSummaryMap(const ASTContext &ACtx, FunctionSummaryMapType &FSM, 698 bool DisplayLoadedSummaries) 699 : ACtx(ACtx), Map(FSM), DisplayLoadedSummaries(DisplayLoadedSummaries) { 700 } 701 702 // Add a summary to a FunctionDecl found by lookup. The lookup is performed 703 // by the given Name, and in the global scope. The summary will be attached 704 // to the found FunctionDecl only if the signatures match. 705 void operator()(StringRef Name, const Summary &S) { 706 IdentifierInfo &II = ACtx.Idents.get(Name); 707 auto LookupRes = ACtx.getTranslationUnitDecl()->lookup(&II); 708 if (LookupRes.size() == 0) 709 return; 710 for (Decl *D : LookupRes) { 711 if (auto *FD = dyn_cast<FunctionDecl>(D)) { 712 if (S.matchesSignature(FD)) { 713 auto Res = Map.insert({FD->getCanonicalDecl(), S}); 714 assert(Res.second && "Function already has a summary set!"); 715 (void)Res; 716 if (DisplayLoadedSummaries) { 717 llvm::errs() << "Loaded summary for: "; 718 FD->print(llvm::errs()); 719 llvm::errs() << "\n"; 720 } 721 return; 722 } 723 } 724 } 725 } 726 // Add several summaries for the given name. 727 void operator()(StringRef Name, const std::vector<Summary> &Summaries) { 728 for (const Summary &S : Summaries) 729 operator()(Name, S); 730 } 731 } addToFunctionSummaryMap(ACtx, FunctionSummaryMap, DisplayLoadedSummaries); 732 733 // We are finally ready to define specifications for all supported functions. 734 // 735 // The signature needs to have the correct number of arguments. 736 // However, we insert `Irrelevant' when the type is insignificant. 737 // 738 // Argument ranges should always cover all variants. If return value 739 // is completely unknown, omit it from the respective range set. 740 // 741 // All types in the spec need to be canonical. 742 // 743 // Every item in the list of range sets represents a particular 744 // execution path the analyzer would need to explore once 745 // the call is modeled - a new program state is constructed 746 // for every range set, and each range line in the range set 747 // corresponds to a specific constraint within this state. 748 // 749 // Upon comparing to another argument, the other argument is casted 750 // to the current argument's type. This avoids proper promotion but 751 // seems useful. For example, read() receives size_t argument, 752 // and its return value, which is of type ssize_t, cannot be greater 753 // than this argument. If we made a promotion, and the size argument 754 // is equal to, say, 10, then we'd impose a range of [0, 10] on the 755 // return value, however the correct range is [-1, 10]. 756 // 757 // Please update the list of functions in the header after editing! 758 759 // Below are helpers functions to create the summaries. 760 auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind, 761 IntRangeVector Ranges) { 762 return std::make_shared<RangeConstraint>(ArgN, Kind, Ranges); 763 }; 764 auto BufferSize = [](auto... Args) { 765 return std::make_shared<BufferSizeConstraint>(Args...); 766 }; 767 struct { 768 auto operator()(RangeKind Kind, IntRangeVector Ranges) { 769 return std::make_shared<RangeConstraint>(Ret, Kind, Ranges); 770 } 771 auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) { 772 return std::make_shared<ComparisonConstraint>(Ret, Op, OtherArgN); 773 } 774 } ReturnValueCondition; 775 auto Range = [](RangeInt b, RangeInt e) { 776 return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}}; 777 }; 778 auto SingleValue = [](RangeInt v) { 779 return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}}; 780 }; 781 auto LessThanOrEq = BO_LE; 782 auto NotNull = [&](ArgNo ArgN) { 783 return std::make_shared<NotNullConstraint>(ArgN); 784 }; 785 786 using RetType = QualType; 787 // Templates for summaries that are reused by many functions. 788 auto Getc = [&]() { 789 return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall) 790 .Case({ReturnValueCondition(WithinRange, 791 {{EOFv, EOFv}, {0, UCharRangeMax}})}); 792 }; 793 auto Read = [&](RetType R, RangeInt Max) { 794 return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R}, 795 NoEvalCall) 796 .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)), 797 ReturnValueCondition(WithinRange, Range(-1, Max))}); 798 }; 799 auto Fread = [&]() { 800 return Summary(ArgTypes{VoidPtrRestrictTy, Irrelevant, SizeTy, Irrelevant}, 801 RetType{SizeTy}, NoEvalCall) 802 .Case({ 803 ReturnValueCondition(LessThanOrEq, ArgNo(2)), 804 }) 805 .ArgConstraint(NotNull(ArgNo(0))); 806 }; 807 auto Fwrite = [&]() { 808 return Summary( 809 ArgTypes{ConstVoidPtrRestrictTy, Irrelevant, SizeTy, Irrelevant}, 810 RetType{SizeTy}, NoEvalCall) 811 .Case({ 812 ReturnValueCondition(LessThanOrEq, ArgNo(2)), 813 }) 814 .ArgConstraint(NotNull(ArgNo(0))); 815 }; 816 auto Getline = [&](RetType R, RangeInt Max) { 817 return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R}, 818 NoEvalCall) 819 .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})}); 820 }; 821 822 // The isascii() family of functions. 823 // The behavior is undefined if the value of the argument is not 824 // representable as unsigned char or is not equal to EOF. See e.g. C99 825 // 7.4.1.2 The isalpha function (p: 181-182). 826 addToFunctionSummaryMap( 827 "isalnum", 828 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 829 // Boils down to isupper() or islower() or isdigit(). 830 .Case({ArgumentCondition(0U, WithinRange, 831 {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}), 832 ReturnValueCondition(OutOfRange, SingleValue(0))}) 833 // The locale-specific range. 834 // No post-condition. We are completely unaware of 835 // locale-specific return values. 836 .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) 837 .Case( 838 {ArgumentCondition( 839 0U, OutOfRange, 840 {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}), 841 ReturnValueCondition(WithinRange, SingleValue(0))}) 842 .ArgConstraint(ArgumentCondition( 843 0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))); 844 addToFunctionSummaryMap( 845 "isalpha", 846 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 847 .Case({ArgumentCondition(0U, WithinRange, {{'A', 'Z'}, {'a', 'z'}}), 848 ReturnValueCondition(OutOfRange, SingleValue(0))}) 849 // The locale-specific range. 850 .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) 851 .Case({ArgumentCondition( 852 0U, OutOfRange, 853 {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}), 854 ReturnValueCondition(WithinRange, SingleValue(0))})); 855 addToFunctionSummaryMap( 856 "isascii", 857 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 858 .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), 859 ReturnValueCondition(OutOfRange, SingleValue(0))}) 860 .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)), 861 ReturnValueCondition(WithinRange, SingleValue(0))})); 862 addToFunctionSummaryMap( 863 "isblank", 864 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 865 .Case({ArgumentCondition(0U, WithinRange, {{'\t', '\t'}, {' ', ' '}}), 866 ReturnValueCondition(OutOfRange, SingleValue(0))}) 867 .Case({ArgumentCondition(0U, OutOfRange, {{'\t', '\t'}, {' ', ' '}}), 868 ReturnValueCondition(WithinRange, SingleValue(0))})); 869 addToFunctionSummaryMap( 870 "iscntrl", 871 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 872 .Case({ArgumentCondition(0U, WithinRange, {{0, 32}, {127, 127}}), 873 ReturnValueCondition(OutOfRange, SingleValue(0))}) 874 .Case({ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}), 875 ReturnValueCondition(WithinRange, SingleValue(0))})); 876 addToFunctionSummaryMap( 877 "isdigit", 878 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 879 .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')), 880 ReturnValueCondition(OutOfRange, SingleValue(0))}) 881 .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')), 882 ReturnValueCondition(WithinRange, SingleValue(0))})); 883 addToFunctionSummaryMap( 884 "isgraph", 885 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 886 .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)), 887 ReturnValueCondition(OutOfRange, SingleValue(0))}) 888 .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)), 889 ReturnValueCondition(WithinRange, SingleValue(0))})); 890 addToFunctionSummaryMap( 891 "islower", 892 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 893 // Is certainly lowercase. 894 .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')), 895 ReturnValueCondition(OutOfRange, SingleValue(0))}) 896 // Is ascii but not lowercase. 897 .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), 898 ArgumentCondition(0U, OutOfRange, Range('a', 'z')), 899 ReturnValueCondition(WithinRange, SingleValue(0))}) 900 // The locale-specific range. 901 .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) 902 // Is not an unsigned char. 903 .Case({ArgumentCondition(0U, OutOfRange, Range(0, UCharRangeMax)), 904 ReturnValueCondition(WithinRange, SingleValue(0))})); 905 addToFunctionSummaryMap( 906 "isprint", 907 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 908 .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)), 909 ReturnValueCondition(OutOfRange, SingleValue(0))}) 910 .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)), 911 ReturnValueCondition(WithinRange, SingleValue(0))})); 912 addToFunctionSummaryMap( 913 "ispunct", 914 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 915 .Case({ArgumentCondition( 916 0U, WithinRange, 917 {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), 918 ReturnValueCondition(OutOfRange, SingleValue(0))}) 919 .Case({ArgumentCondition( 920 0U, OutOfRange, 921 {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), 922 ReturnValueCondition(WithinRange, SingleValue(0))})); 923 addToFunctionSummaryMap( 924 "isspace", 925 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 926 // Space, '\f', '\n', '\r', '\t', '\v'. 927 .Case({ArgumentCondition(0U, WithinRange, {{9, 13}, {' ', ' '}}), 928 ReturnValueCondition(OutOfRange, SingleValue(0))}) 929 // The locale-specific range. 930 .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) 931 .Case({ArgumentCondition(0U, OutOfRange, 932 {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}), 933 ReturnValueCondition(WithinRange, SingleValue(0))})); 934 addToFunctionSummaryMap( 935 "isupper", 936 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 937 // Is certainly uppercase. 938 .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')), 939 ReturnValueCondition(OutOfRange, SingleValue(0))}) 940 // The locale-specific range. 941 .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) 942 // Other. 943 .Case({ArgumentCondition(0U, OutOfRange, 944 {{'A', 'Z'}, {128, UCharRangeMax}}), 945 ReturnValueCondition(WithinRange, SingleValue(0))})); 946 addToFunctionSummaryMap( 947 "isxdigit", 948 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 949 .Case({ArgumentCondition(0U, WithinRange, 950 {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), 951 ReturnValueCondition(OutOfRange, SingleValue(0))}) 952 .Case({ArgumentCondition(0U, OutOfRange, 953 {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), 954 ReturnValueCondition(WithinRange, SingleValue(0))})); 955 956 // The getc() family of functions that returns either a char or an EOF. 957 addToFunctionSummaryMap("getc", Getc()); 958 addToFunctionSummaryMap("fgetc", Getc()); 959 addToFunctionSummaryMap( 960 "getchar", Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall) 961 .Case({ReturnValueCondition( 962 WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})); 963 964 // read()-like functions that never return more than buffer size. 965 // We are not sure how ssize_t is defined on every platform, so we 966 // provide three variants that should cover common cases. 967 addToFunctionSummaryMap("read", {Read(IntTy, IntMax), Read(LongTy, LongMax), 968 Read(LongLongTy, LongLongMax)}); 969 addToFunctionSummaryMap("write", {Read(IntTy, IntMax), Read(LongTy, LongMax), 970 Read(LongLongTy, LongLongMax)}); 971 addToFunctionSummaryMap("fread", Fread()); 972 addToFunctionSummaryMap("fwrite", Fwrite()); 973 // getline()-like functions either fail or read at least the delimiter. 974 addToFunctionSummaryMap("getline", 975 {Getline(IntTy, IntMax), Getline(LongTy, LongMax), 976 Getline(LongLongTy, LongLongMax)}); 977 addToFunctionSummaryMap("getdelim", 978 {Getline(IntTy, IntMax), Getline(LongTy, LongMax), 979 Getline(LongLongTy, LongLongMax)}); 980 981 // Functions for testing. 982 if (ChecksEnabled[CK_StdCLibraryFunctionsTesterChecker]) { 983 addToFunctionSummaryMap( 984 "__two_constrained_args", 985 Summary(ArgTypes{IntTy, IntTy}, RetType{IntTy}, EvalCallAsPure) 986 .ArgConstraint(ArgumentCondition(0U, WithinRange, SingleValue(1))) 987 .ArgConstraint(ArgumentCondition(1U, WithinRange, SingleValue(1)))); 988 addToFunctionSummaryMap( 989 "__arg_constrained_twice", 990 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 991 .ArgConstraint(ArgumentCondition(0U, OutOfRange, SingleValue(1))) 992 .ArgConstraint(ArgumentCondition(0U, OutOfRange, SingleValue(2)))); 993 addToFunctionSummaryMap( 994 "__defaultparam", 995 Summary(ArgTypes{Irrelevant, IntTy}, RetType{IntTy}, EvalCallAsPure) 996 .ArgConstraint(NotNull(ArgNo(0)))); 997 addToFunctionSummaryMap("__variadic", 998 Summary(ArgTypes{VoidPtrTy, ConstCharPtrTy}, 999 RetType{IntTy}, EvalCallAsPure) 1000 .ArgConstraint(NotNull(ArgNo(0))) 1001 .ArgConstraint(NotNull(ArgNo(1)))); 1002 addToFunctionSummaryMap( 1003 "__buf_size_arg_constraint", 1004 Summary(ArgTypes{ConstVoidPtrTy, SizeTy}, RetType{IntTy}, 1005 EvalCallAsPure) 1006 .ArgConstraint( 1007 BufferSize(/*Buffer=*/ArgNo(0), /*BufSize=*/ArgNo(1)))); 1008 addToFunctionSummaryMap( 1009 "__buf_size_arg_constraint_mul", 1010 Summary(ArgTypes{ConstVoidPtrTy, SizeTy, SizeTy}, RetType{IntTy}, 1011 EvalCallAsPure) 1012 .ArgConstraint(BufferSize(/*Buffer=*/ArgNo(0), /*BufSize=*/ArgNo(1), 1013 /*BufSizeMultiplier=*/ArgNo(2)))); 1014 } 1015 } 1016 1017 void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) { 1018 auto *Checker = mgr.registerChecker<StdLibraryFunctionsChecker>(); 1019 Checker->DisplayLoadedSummaries = 1020 mgr.getAnalyzerOptions().getCheckerBooleanOption( 1021 Checker, "DisplayLoadedSummaries"); 1022 } 1023 1024 bool ento::shouldRegisterStdCLibraryFunctionsChecker(const CheckerManager &mgr) { 1025 return true; 1026 } 1027 1028 #define REGISTER_CHECKER(name) \ 1029 void ento::register##name(CheckerManager &mgr) { \ 1030 StdLibraryFunctionsChecker *checker = \ 1031 mgr.getChecker<StdLibraryFunctionsChecker>(); \ 1032 checker->ChecksEnabled[StdLibraryFunctionsChecker::CK_##name] = true; \ 1033 checker->CheckNames[StdLibraryFunctionsChecker::CK_##name] = \ 1034 mgr.getCurrentCheckerName(); \ 1035 } \ 1036 \ 1037 bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; } 1038 1039 REGISTER_CHECKER(StdCLibraryFunctionArgsChecker) 1040 REGISTER_CHECKER(StdCLibraryFunctionsTesterChecker) 1041