1 //=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This checker improves modeling of a few simple library functions. 10 // 11 // This checker provides a specification format - `Summary' - and 12 // contains descriptions of some library functions in this format. Each 13 // specification contains a list of branches for splitting the program state 14 // upon call, and range constraints on argument and return-value symbols that 15 // are satisfied on each branch. This spec can be expanded to include more 16 // items, like external effects of the function. 17 // 18 // The main difference between this approach and the body farms technique is 19 // in more explicit control over how many branches are produced. For example, 20 // consider standard C function `ispunct(int x)', which returns a non-zero value 21 // iff `x' is a punctuation character, that is, when `x' is in range 22 // ['!', '/'] [':', '@'] U ['[', '\`'] U ['{', '~']. 23 // `Summary' provides only two branches for this function. However, 24 // any attempt to describe this range with if-statements in the body farm 25 // would result in many more branches. Because each branch needs to be analyzed 26 // independently, this significantly reduces performance. Additionally, 27 // once we consider a branch on which `x' is in range, say, ['!', '/'], 28 // we assume that such branch is an important separate path through the program, 29 // which may lead to false positives because considering this particular path 30 // was not consciously intended, and therefore it might have been unreachable. 31 // 32 // This checker uses eval::Call for modeling pure functions (functions without 33 // side effets), for which their `Summary' is a precise model. This avoids 34 // unnecessary invalidation passes. Conflicts with other checkers are unlikely 35 // because if the function has no other effects, other checkers would probably 36 // never want to improve upon the modeling done by this checker. 37 // 38 // Non-pure functions, for which only partial improvement over the default 39 // behavior is expected, are modeled via check::PostCall, non-intrusively. 40 // 41 // The following standard C functions are currently supported: 42 // 43 // fgetc getline isdigit isupper 44 // fread isalnum isgraph isxdigit 45 // fwrite isalpha islower read 46 // getc isascii isprint write 47 // getchar isblank ispunct 48 // getdelim iscntrl isspace 49 // 50 //===----------------------------------------------------------------------===// 51 52 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 53 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 54 #include "clang/StaticAnalyzer/Core/Checker.h" 55 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 56 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 57 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 58 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" 59 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicSize.h" 60 61 using namespace clang; 62 using namespace clang::ento; 63 64 namespace { 65 class StdLibraryFunctionsChecker 66 : public Checker<check::PreCall, check::PostCall, eval::Call> { 67 /// Below is a series of typedefs necessary to define function specs. 68 /// We avoid nesting types here because each additional qualifier 69 /// would need to be repeated in every function spec. 70 struct Summary; 71 72 /// Specify how much the analyzer engine should entrust modeling this function 73 /// to us. If he doesn't, he performs additional invalidations. 74 enum InvalidationKind { NoEvalCall, EvalCallAsPure }; 75 76 // The universal integral type to use in value range descriptions. 77 // Unsigned to make sure overflows are well-defined. 78 typedef uint64_t RangeInt; 79 80 /// Normally, describes a single range constraint, eg. {{0, 1}, {3, 4}} is 81 /// a non-negative integer, which less than 5 and not equal to 2. For 82 /// `ComparesToArgument', holds information about how exactly to compare to 83 /// the argument. 84 typedef std::vector<std::pair<RangeInt, RangeInt>> IntRangeVector; 85 86 /// A reference to an argument or return value by its number. 87 /// ArgNo in CallExpr and CallEvent is defined as Unsigned, but 88 /// obviously uint32_t should be enough for all practical purposes. 89 typedef uint32_t ArgNo; 90 static const ArgNo Ret; 91 92 class ValueConstraint; 93 94 // Pointer to the ValueConstraint. We need a copyable, polymorphic and 95 // default initialize able type (vector needs that). A raw pointer was good, 96 // however, we cannot default initialize that. unique_ptr makes the Summary 97 // class non-copyable, therefore not an option. Releasing the copyability 98 // requirement would render the initialization of the Summary map infeasible. 99 using ValueConstraintPtr = std::shared_ptr<ValueConstraint>; 100 101 /// Polymorphic base class that represents a constraint on a given argument 102 /// (or return value) of a function. Derived classes implement different kind 103 /// of constraints, e.g range constraints or correlation between two 104 /// arguments. 105 class ValueConstraint { 106 public: 107 ValueConstraint(ArgNo ArgN) : ArgN(ArgN) {} 108 virtual ~ValueConstraint() {} 109 /// Apply the effects of the constraint on the given program state. If null 110 /// is returned then the constraint is not feasible. 111 virtual ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 112 const Summary &Summary, 113 CheckerContext &C) const = 0; 114 virtual ValueConstraintPtr negate() const { 115 llvm_unreachable("Not implemented"); 116 }; 117 ArgNo getArgNo() const { return ArgN; } 118 119 protected: 120 ArgNo ArgN; // Argument to which we apply the constraint. 121 }; 122 123 /// Given a range, should the argument stay inside or outside this range? 124 enum RangeKind { OutOfRange, WithinRange }; 125 126 /// Encapsulates a single range on a single symbol within a branch. 127 class RangeConstraint : public ValueConstraint { 128 RangeKind Kind; // Kind of range definition. 129 IntRangeVector Args; // Polymorphic arguments. 130 131 public: 132 RangeConstraint(ArgNo ArgN, RangeKind Kind, const IntRangeVector &Args) 133 : ValueConstraint(ArgN), Kind(Kind), Args(Args) {} 134 135 const IntRangeVector &getRanges() const { 136 return Args; 137 } 138 139 private: 140 ProgramStateRef applyAsOutOfRange(ProgramStateRef State, 141 const CallEvent &Call, 142 const Summary &Summary) const; 143 ProgramStateRef applyAsWithinRange(ProgramStateRef State, 144 const CallEvent &Call, 145 const Summary &Summary) const; 146 public: 147 ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 148 const Summary &Summary, 149 CheckerContext &C) const override { 150 switch (Kind) { 151 case OutOfRange: 152 return applyAsOutOfRange(State, Call, Summary); 153 case WithinRange: 154 return applyAsWithinRange(State, Call, Summary); 155 } 156 llvm_unreachable("Unknown range kind!"); 157 } 158 159 ValueConstraintPtr negate() const override { 160 RangeConstraint Tmp(*this); 161 switch (Kind) { 162 case OutOfRange: 163 Tmp.Kind = WithinRange; 164 break; 165 case WithinRange: 166 Tmp.Kind = OutOfRange; 167 break; 168 } 169 return std::make_shared<RangeConstraint>(Tmp); 170 } 171 }; 172 173 class ComparisonConstraint : public ValueConstraint { 174 BinaryOperator::Opcode Opcode; 175 ArgNo OtherArgN; 176 177 public: 178 ComparisonConstraint(ArgNo ArgN, BinaryOperator::Opcode Opcode, 179 ArgNo OtherArgN) 180 : ValueConstraint(ArgN), Opcode(Opcode), OtherArgN(OtherArgN) {} 181 ArgNo getOtherArgNo() const { return OtherArgN; } 182 BinaryOperator::Opcode getOpcode() const { return Opcode; } 183 ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 184 const Summary &Summary, 185 CheckerContext &C) const override; 186 }; 187 188 class NotNullConstraint : public ValueConstraint { 189 using ValueConstraint::ValueConstraint; 190 // This variable has a role when we negate the constraint. 191 bool CannotBeNull = true; 192 193 public: 194 ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 195 const Summary &Summary, 196 CheckerContext &C) const override { 197 SVal V = getArgSVal(Call, getArgNo()); 198 if (V.isUndef()) 199 return State; 200 201 DefinedOrUnknownSVal L = V.castAs<DefinedOrUnknownSVal>(); 202 if (!L.getAs<Loc>()) 203 return State; 204 205 return State->assume(L, CannotBeNull); 206 } 207 208 ValueConstraintPtr negate() const override { 209 NotNullConstraint Tmp(*this); 210 Tmp.CannotBeNull = !this->CannotBeNull; 211 return std::make_shared<NotNullConstraint>(Tmp); 212 } 213 }; 214 215 // Represents a buffer argument with an additional size argument. 216 // E.g. the first two arguments here: 217 // ctime_s(char *buffer, rsize_t bufsz, const time_t *time); 218 class BufferSizeConstraint : public ValueConstraint { 219 // The argument which holds the size of the buffer. 220 ArgNo SizeArgN; 221 // The operator we use in apply. This is negated in negate(). 222 BinaryOperator::Opcode Op = BO_LE; 223 224 public: 225 BufferSizeConstraint(ArgNo Buffer, ArgNo BufSize) 226 : ValueConstraint(Buffer), SizeArgN(BufSize) {} 227 228 ProgramStateRef apply(ProgramStateRef State, const CallEvent &Call, 229 const Summary &Summary, 230 CheckerContext &C) const override { 231 // The buffer argument. 232 SVal BufV = getArgSVal(Call, getArgNo()); 233 // The size argument. 234 SVal SizeV = getArgSVal(Call, SizeArgN); 235 // The dynamic size of the buffer argument, got from the analyzer engine. 236 SVal BufDynSize = getDynamicSizeWithOffset(State, BufV); 237 238 SValBuilder &SvalBuilder = C.getSValBuilder(); 239 SVal Feasible = SvalBuilder.evalBinOp(State, Op, SizeV, BufDynSize, 240 SvalBuilder.getContext().BoolTy); 241 if (auto F = Feasible.getAs<DefinedOrUnknownSVal>()) 242 return State->assume(*F, true); 243 244 // We can get here only if the size argument or the dynamic size is 245 // undefined. But the dynamic size should never be undefined, only 246 // unknown. So, here, the size of the argument is undefined, i.e. we 247 // cannot apply the constraint. Actually, other checkers like 248 // CallAndMessage should catch this situation earlier, because we call a 249 // function with an uninitialized argument. 250 llvm_unreachable("Size argument or the dynamic size is Undefined"); 251 } 252 253 ValueConstraintPtr negate() const override { 254 BufferSizeConstraint Tmp(*this); 255 Tmp.Op = BinaryOperator::negateComparisonOp(Op); 256 return std::make_shared<BufferSizeConstraint>(Tmp); 257 } 258 }; 259 260 /// The complete list of constraints that defines a single branch. 261 typedef std::vector<ValueConstraintPtr> ConstraintSet; 262 263 using ArgTypes = std::vector<QualType>; 264 using Cases = std::vector<ConstraintSet>; 265 266 /// Includes information about 267 /// * function prototype (which is necessary to 268 /// ensure we're modeling the right function and casting values properly), 269 /// * approach to invalidation, 270 /// * a list of branches - a list of list of ranges - 271 /// A branch represents a path in the exploded graph of a function (which 272 /// is a tree). So, a branch is a series of assumptions. In other words, 273 /// branches represent split states and additional assumptions on top of 274 /// the splitting assumption. 275 /// For example, consider the branches in `isalpha(x)` 276 /// Branch 1) 277 /// x is in range ['A', 'Z'] or in ['a', 'z'] 278 /// then the return value is not 0. (I.e. out-of-range [0, 0]) 279 /// Branch 2) 280 /// x is out-of-range ['A', 'Z'] and out-of-range ['a', 'z'] 281 /// then the return value is 0. 282 /// * a list of argument constraints, that must be true on every branch. 283 /// If these constraints are not satisfied that means a fatal error 284 /// usually resulting in undefined behaviour. 285 struct Summary { 286 const ArgTypes ArgTys; 287 const QualType RetTy; 288 const InvalidationKind InvalidationKd; 289 Cases CaseConstraints; 290 ConstraintSet ArgConstraints; 291 292 Summary(ArgTypes ArgTys, QualType RetTy, InvalidationKind InvalidationKd) 293 : ArgTys(ArgTys), RetTy(RetTy), InvalidationKd(InvalidationKd) {} 294 295 Summary &Case(ConstraintSet&& CS) { 296 CaseConstraints.push_back(std::move(CS)); 297 return *this; 298 } 299 Summary &ArgConstraint(ValueConstraintPtr VC) { 300 ArgConstraints.push_back(VC); 301 return *this; 302 } 303 304 private: 305 static void assertTypeSuitableForSummary(QualType T) { 306 assert(!T->isVoidType() && 307 "We should have had no significant void types in the spec"); 308 assert(T.isCanonical() && 309 "We should only have canonical types in the spec"); 310 } 311 312 public: 313 QualType getArgType(ArgNo ArgN) const { 314 QualType T = (ArgN == Ret) ? RetTy : ArgTys[ArgN]; 315 assertTypeSuitableForSummary(T); 316 return T; 317 } 318 319 /// Try our best to figure out if the summary's signature matches 320 /// *the* library function to which this specification applies. 321 bool matchesSignature(const FunctionDecl *FD) const; 322 }; 323 324 // The map of all functions supported by the checker. It is initialized 325 // lazily, and it doesn't change after initialization. 326 using FunctionSummaryMapType = llvm::DenseMap<const FunctionDecl *, Summary>; 327 mutable FunctionSummaryMapType FunctionSummaryMap; 328 329 mutable std::unique_ptr<BugType> BT_InvalidArg; 330 331 // Auxiliary functions to support ArgNo within all structures 332 // in a unified manner. 333 static QualType getArgType(const Summary &Summary, ArgNo ArgN) { 334 return Summary.getArgType(ArgN); 335 } 336 static SVal getArgSVal(const CallEvent &Call, ArgNo ArgN) { 337 return ArgN == Ret ? Call.getReturnValue() : Call.getArgSVal(ArgN); 338 } 339 340 public: 341 void checkPreCall(const CallEvent &Call, CheckerContext &C) const; 342 void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 343 bool evalCall(const CallEvent &Call, CheckerContext &C) const; 344 345 enum CheckKind { 346 CK_StdCLibraryFunctionArgsChecker, 347 CK_StdCLibraryFunctionsTesterChecker, 348 CK_NumCheckKinds 349 }; 350 DefaultBool ChecksEnabled[CK_NumCheckKinds]; 351 CheckerNameRef CheckNames[CK_NumCheckKinds]; 352 353 bool DisplayLoadedSummaries = false; 354 355 private: 356 Optional<Summary> findFunctionSummary(const FunctionDecl *FD, 357 CheckerContext &C) const; 358 Optional<Summary> findFunctionSummary(const CallEvent &Call, 359 CheckerContext &C) const; 360 361 void initFunctionSummaries(CheckerContext &C) const; 362 363 void reportBug(const CallEvent &Call, ExplodedNode *N, 364 CheckerContext &C) const { 365 if (!ChecksEnabled[CK_StdCLibraryFunctionArgsChecker]) 366 return; 367 // TODO Add detailed diagnostic. 368 StringRef Msg = "Function argument constraint is not satisfied"; 369 if (!BT_InvalidArg) 370 BT_InvalidArg = std::make_unique<BugType>( 371 CheckNames[CK_StdCLibraryFunctionArgsChecker], 372 "Unsatisfied argument constraints", categories::LogicError); 373 auto R = std::make_unique<PathSensitiveBugReport>(*BT_InvalidArg, Msg, N); 374 bugreporter::trackExpressionValue(N, Call.getArgExpr(0), *R); 375 C.emitReport(std::move(R)); 376 } 377 }; 378 379 const StdLibraryFunctionsChecker::ArgNo StdLibraryFunctionsChecker::Ret = 380 std::numeric_limits<ArgNo>::max(); 381 382 } // end of anonymous namespace 383 384 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsOutOfRange( 385 ProgramStateRef State, const CallEvent &Call, 386 const Summary &Summary) const { 387 388 ProgramStateManager &Mgr = State->getStateManager(); 389 SValBuilder &SVB = Mgr.getSValBuilder(); 390 BasicValueFactory &BVF = SVB.getBasicValueFactory(); 391 ConstraintManager &CM = Mgr.getConstraintManager(); 392 QualType T = getArgType(Summary, getArgNo()); 393 SVal V = getArgSVal(Call, getArgNo()); 394 395 if (auto N = V.getAs<NonLoc>()) { 396 const IntRangeVector &R = getRanges(); 397 size_t E = R.size(); 398 for (size_t I = 0; I != E; ++I) { 399 const llvm::APSInt &Min = BVF.getValue(R[I].first, T); 400 const llvm::APSInt &Max = BVF.getValue(R[I].second, T); 401 assert(Min <= Max); 402 State = CM.assumeInclusiveRange(State, *N, Min, Max, false); 403 if (!State) 404 break; 405 } 406 } 407 408 return State; 409 } 410 411 ProgramStateRef StdLibraryFunctionsChecker::RangeConstraint::applyAsWithinRange( 412 ProgramStateRef State, const CallEvent &Call, 413 const Summary &Summary) const { 414 415 ProgramStateManager &Mgr = State->getStateManager(); 416 SValBuilder &SVB = Mgr.getSValBuilder(); 417 BasicValueFactory &BVF = SVB.getBasicValueFactory(); 418 ConstraintManager &CM = Mgr.getConstraintManager(); 419 QualType T = getArgType(Summary, getArgNo()); 420 SVal V = getArgSVal(Call, getArgNo()); 421 422 // "WithinRange R" is treated as "outside [T_MIN, T_MAX] \ R". 423 // We cut off [T_MIN, min(R) - 1] and [max(R) + 1, T_MAX] if necessary, 424 // and then cut away all holes in R one by one. 425 // 426 // E.g. consider a range list R as [A, B] and [C, D] 427 // -------+--------+------------------+------------+-----------> 428 // A B C D 429 // Then we assume that the value is not in [-inf, A - 1], 430 // then not in [D + 1, +inf], then not in [B + 1, C - 1] 431 if (auto N = V.getAs<NonLoc>()) { 432 const IntRangeVector &R = getRanges(); 433 size_t E = R.size(); 434 435 const llvm::APSInt &MinusInf = BVF.getMinValue(T); 436 const llvm::APSInt &PlusInf = BVF.getMaxValue(T); 437 438 const llvm::APSInt &Left = BVF.getValue(R[0].first - 1ULL, T); 439 if (Left != PlusInf) { 440 assert(MinusInf <= Left); 441 State = CM.assumeInclusiveRange(State, *N, MinusInf, Left, false); 442 if (!State) 443 return nullptr; 444 } 445 446 const llvm::APSInt &Right = BVF.getValue(R[E - 1].second + 1ULL, T); 447 if (Right != MinusInf) { 448 assert(Right <= PlusInf); 449 State = CM.assumeInclusiveRange(State, *N, Right, PlusInf, false); 450 if (!State) 451 return nullptr; 452 } 453 454 for (size_t I = 1; I != E; ++I) { 455 const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T); 456 const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T); 457 if (Min <= Max) { 458 State = CM.assumeInclusiveRange(State, *N, Min, Max, false); 459 if (!State) 460 return nullptr; 461 } 462 } 463 } 464 465 return State; 466 } 467 468 ProgramStateRef StdLibraryFunctionsChecker::ComparisonConstraint::apply( 469 ProgramStateRef State, const CallEvent &Call, const Summary &Summary, 470 CheckerContext &C) const { 471 472 ProgramStateManager &Mgr = State->getStateManager(); 473 SValBuilder &SVB = Mgr.getSValBuilder(); 474 QualType CondT = SVB.getConditionType(); 475 QualType T = getArgType(Summary, getArgNo()); 476 SVal V = getArgSVal(Call, getArgNo()); 477 478 BinaryOperator::Opcode Op = getOpcode(); 479 ArgNo OtherArg = getOtherArgNo(); 480 SVal OtherV = getArgSVal(Call, OtherArg); 481 QualType OtherT = getArgType(Summary, OtherArg); 482 // Note: we avoid integral promotion for comparison. 483 OtherV = SVB.evalCast(OtherV, T, OtherT); 484 if (auto CompV = SVB.evalBinOp(State, Op, V, OtherV, CondT) 485 .getAs<DefinedOrUnknownSVal>()) 486 State = State->assume(*CompV, true); 487 return State; 488 } 489 490 void StdLibraryFunctionsChecker::checkPreCall(const CallEvent &Call, 491 CheckerContext &C) const { 492 Optional<Summary> FoundSummary = findFunctionSummary(Call, C); 493 if (!FoundSummary) 494 return; 495 496 const Summary &Summary = *FoundSummary; 497 ProgramStateRef State = C.getState(); 498 499 ProgramStateRef NewState = State; 500 for (const ValueConstraintPtr& VC : Summary.ArgConstraints) { 501 ProgramStateRef SuccessSt = VC->apply(NewState, Call, Summary, C); 502 ProgramStateRef FailureSt = VC->negate()->apply(NewState, Call, Summary, C); 503 // The argument constraint is not satisfied. 504 if (FailureSt && !SuccessSt) { 505 if (ExplodedNode *N = C.generateErrorNode(NewState)) 506 reportBug(Call, N, C); 507 break; 508 } else { 509 // We will apply the constraint even if we cannot reason about the 510 // argument. This means both SuccessSt and FailureSt can be true. If we 511 // weren't applying the constraint that would mean that symbolic 512 // execution continues on a code whose behaviour is undefined. 513 assert(SuccessSt); 514 NewState = SuccessSt; 515 } 516 } 517 if (NewState && NewState != State) 518 C.addTransition(NewState); 519 } 520 521 void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call, 522 CheckerContext &C) const { 523 Optional<Summary> FoundSummary = findFunctionSummary(Call, C); 524 if (!FoundSummary) 525 return; 526 527 // Now apply the constraints. 528 const Summary &Summary = *FoundSummary; 529 ProgramStateRef State = C.getState(); 530 531 // Apply case/branch specifications. 532 for (const auto &VRS : Summary.CaseConstraints) { 533 ProgramStateRef NewState = State; 534 for (const auto &VR: VRS) { 535 NewState = VR->apply(NewState, Call, Summary, C); 536 if (!NewState) 537 break; 538 } 539 540 if (NewState && NewState != State) 541 C.addTransition(NewState); 542 } 543 } 544 545 bool StdLibraryFunctionsChecker::evalCall(const CallEvent &Call, 546 CheckerContext &C) const { 547 Optional<Summary> FoundSummary = findFunctionSummary(Call, C); 548 if (!FoundSummary) 549 return false; 550 551 const Summary &Summary = *FoundSummary; 552 switch (Summary.InvalidationKd) { 553 case EvalCallAsPure: { 554 ProgramStateRef State = C.getState(); 555 const LocationContext *LC = C.getLocationContext(); 556 const auto *CE = cast_or_null<CallExpr>(Call.getOriginExpr()); 557 SVal V = C.getSValBuilder().conjureSymbolVal( 558 CE, LC, CE->getType().getCanonicalType(), C.blockCount()); 559 State = State->BindExpr(CE, LC, V); 560 C.addTransition(State); 561 return true; 562 } 563 case NoEvalCall: 564 // Summary tells us to avoid performing eval::Call. The function is possibly 565 // evaluated by another checker, or evaluated conservatively. 566 return false; 567 } 568 llvm_unreachable("Unknown invalidation kind!"); 569 } 570 571 bool StdLibraryFunctionsChecker::Summary::matchesSignature( 572 const FunctionDecl *FD) const { 573 // Check number of arguments: 574 if (FD->param_size() != ArgTys.size()) 575 return false; 576 577 // Check return type if relevant: 578 if (!RetTy.isNull() && RetTy != FD->getReturnType().getCanonicalType()) 579 return false; 580 581 // Check argument types when relevant: 582 for (size_t I = 0, E = ArgTys.size(); I != E; ++I) { 583 QualType FormalT = ArgTys[I]; 584 // Null type marks irrelevant arguments. 585 if (FormalT.isNull()) 586 continue; 587 588 assertTypeSuitableForSummary(FormalT); 589 590 QualType ActualT = FD->getParamDecl(I)->getType().getCanonicalType(); 591 if (ActualT != FormalT) 592 return false; 593 } 594 595 return true; 596 } 597 598 Optional<StdLibraryFunctionsChecker::Summary> 599 StdLibraryFunctionsChecker::findFunctionSummary(const FunctionDecl *FD, 600 CheckerContext &C) const { 601 if (!FD) 602 return None; 603 604 initFunctionSummaries(C); 605 606 auto FSMI = FunctionSummaryMap.find(FD->getCanonicalDecl()); 607 if (FSMI == FunctionSummaryMap.end()) 608 return None; 609 return FSMI->second; 610 } 611 612 Optional<StdLibraryFunctionsChecker::Summary> 613 StdLibraryFunctionsChecker::findFunctionSummary(const CallEvent &Call, 614 CheckerContext &C) const { 615 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); 616 if (!FD) 617 return None; 618 return findFunctionSummary(FD, C); 619 } 620 621 void StdLibraryFunctionsChecker::initFunctionSummaries( 622 CheckerContext &C) const { 623 if (!FunctionSummaryMap.empty()) 624 return; 625 626 SValBuilder &SVB = C.getSValBuilder(); 627 BasicValueFactory &BVF = SVB.getBasicValueFactory(); 628 const ASTContext &ACtx = BVF.getContext(); 629 630 // These types are useful for writing specifications quickly, 631 // New specifications should probably introduce more types. 632 // Some types are hard to obtain from the AST, eg. "ssize_t". 633 // In such cases it should be possible to provide multiple variants 634 // of function summary for common cases (eg. ssize_t could be int or long 635 // or long long, so three summary variants would be enough). 636 // Of course, function variants are also useful for C++ overloads. 637 const QualType 638 Irrelevant{}; // A placeholder, whenever we do not care about the type. 639 const QualType IntTy = ACtx.IntTy; 640 const QualType LongTy = ACtx.LongTy; 641 const QualType LongLongTy = ACtx.LongLongTy; 642 const QualType SizeTy = ACtx.getSizeType(); 643 const QualType VoidPtrTy = ACtx.VoidPtrTy; // void * 644 const QualType VoidPtrRestrictTy = 645 ACtx.getRestrictType(VoidPtrTy); // void *restrict 646 const QualType ConstVoidPtrTy = 647 ACtx.getPointerType(ACtx.VoidTy.withConst()); // const void * 648 const QualType ConstCharPtrTy = 649 ACtx.getPointerType(ACtx.CharTy.withConst()); // const char * 650 const QualType ConstVoidPtrRestrictTy = 651 ACtx.getRestrictType(ConstVoidPtrTy); // const void *restrict 652 653 const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue(); 654 const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue(); 655 const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue(); 656 657 // Set UCharRangeMax to min of int or uchar maximum value. 658 // The C standard states that the arguments of functions like isalpha must 659 // be representable as an unsigned char. Their type is 'int', so the max 660 // value of the argument should be min(UCharMax, IntMax). This just happen 661 // to be true for commonly used and well tested instruction set 662 // architectures, but not for others. 663 const RangeInt UCharRangeMax = 664 std::min(BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue(), IntMax); 665 666 // The platform dependent value of EOF. 667 // Try our best to parse this from the Preprocessor, otherwise fallback to -1. 668 const auto EOFv = [&C]() -> RangeInt { 669 if (const llvm::Optional<int> OptInt = 670 tryExpandAsInteger("EOF", C.getPreprocessor())) 671 return *OptInt; 672 return -1; 673 }(); 674 675 // Auxiliary class to aid adding summaries to the summary map. 676 struct AddToFunctionSummaryMap { 677 const ASTContext &ACtx; 678 FunctionSummaryMapType ⤅ 679 bool DisplayLoadedSummaries; 680 AddToFunctionSummaryMap(const ASTContext &ACtx, FunctionSummaryMapType &FSM, 681 bool DisplayLoadedSummaries) 682 : ACtx(ACtx), Map(FSM), DisplayLoadedSummaries(DisplayLoadedSummaries) { 683 } 684 685 // Add a summary to a FunctionDecl found by lookup. The lookup is performed 686 // by the given Name, and in the global scope. The summary will be attached 687 // to the found FunctionDecl only if the signatures match. 688 void operator()(StringRef Name, const Summary &S) { 689 IdentifierInfo &II = ACtx.Idents.get(Name); 690 auto LookupRes = ACtx.getTranslationUnitDecl()->lookup(&II); 691 if (LookupRes.size() == 0) 692 return; 693 for (Decl *D : LookupRes) { 694 if (auto *FD = dyn_cast<FunctionDecl>(D)) { 695 if (S.matchesSignature(FD)) { 696 auto Res = Map.insert({FD->getCanonicalDecl(), S}); 697 assert(Res.second && "Function already has a summary set!"); 698 (void)Res; 699 if (DisplayLoadedSummaries) { 700 llvm::errs() << "Loaded summary for: "; 701 FD->print(llvm::errs()); 702 llvm::errs() << "\n"; 703 } 704 return; 705 } 706 } 707 } 708 } 709 // Add several summaries for the given name. 710 void operator()(StringRef Name, const std::vector<Summary> &Summaries) { 711 for (const Summary &S : Summaries) 712 operator()(Name, S); 713 } 714 } addToFunctionSummaryMap(ACtx, FunctionSummaryMap, DisplayLoadedSummaries); 715 716 // We are finally ready to define specifications for all supported functions. 717 // 718 // The signature needs to have the correct number of arguments. 719 // However, we insert `Irrelevant' when the type is insignificant. 720 // 721 // Argument ranges should always cover all variants. If return value 722 // is completely unknown, omit it from the respective range set. 723 // 724 // All types in the spec need to be canonical. 725 // 726 // Every item in the list of range sets represents a particular 727 // execution path the analyzer would need to explore once 728 // the call is modeled - a new program state is constructed 729 // for every range set, and each range line in the range set 730 // corresponds to a specific constraint within this state. 731 // 732 // Upon comparing to another argument, the other argument is casted 733 // to the current argument's type. This avoids proper promotion but 734 // seems useful. For example, read() receives size_t argument, 735 // and its return value, which is of type ssize_t, cannot be greater 736 // than this argument. If we made a promotion, and the size argument 737 // is equal to, say, 10, then we'd impose a range of [0, 10] on the 738 // return value, however the correct range is [-1, 10]. 739 // 740 // Please update the list of functions in the header after editing! 741 742 // Below are helpers functions to create the summaries. 743 auto ArgumentCondition = [](ArgNo ArgN, RangeKind Kind, 744 IntRangeVector Ranges) { 745 return std::make_shared<RangeConstraint>(ArgN, Kind, Ranges); 746 }; 747 auto BufferSize = [](ArgNo BufArgN, ArgNo SizeArgN) { 748 return std::make_shared<BufferSizeConstraint>(BufArgN, SizeArgN); 749 }; 750 struct { 751 auto operator()(RangeKind Kind, IntRangeVector Ranges) { 752 return std::make_shared<RangeConstraint>(Ret, Kind, Ranges); 753 } 754 auto operator()(BinaryOperator::Opcode Op, ArgNo OtherArgN) { 755 return std::make_shared<ComparisonConstraint>(Ret, Op, OtherArgN); 756 } 757 } ReturnValueCondition; 758 auto Range = [](RangeInt b, RangeInt e) { 759 return IntRangeVector{std::pair<RangeInt, RangeInt>{b, e}}; 760 }; 761 auto SingleValue = [](RangeInt v) { 762 return IntRangeVector{std::pair<RangeInt, RangeInt>{v, v}}; 763 }; 764 auto LessThanOrEq = BO_LE; 765 auto NotNull = [&](ArgNo ArgN) { 766 return std::make_shared<NotNullConstraint>(ArgN); 767 }; 768 769 using RetType = QualType; 770 // Templates for summaries that are reused by many functions. 771 auto Getc = [&]() { 772 return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall) 773 .Case({ReturnValueCondition(WithinRange, 774 {{EOFv, EOFv}, {0, UCharRangeMax}})}); 775 }; 776 auto Read = [&](RetType R, RangeInt Max) { 777 return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R}, 778 NoEvalCall) 779 .Case({ReturnValueCondition(LessThanOrEq, ArgNo(2)), 780 ReturnValueCondition(WithinRange, Range(-1, Max))}); 781 }; 782 auto Fread = [&]() { 783 return Summary(ArgTypes{VoidPtrRestrictTy, Irrelevant, SizeTy, Irrelevant}, 784 RetType{SizeTy}, NoEvalCall) 785 .Case({ 786 ReturnValueCondition(LessThanOrEq, ArgNo(2)), 787 }) 788 .ArgConstraint(NotNull(ArgNo(0))); 789 }; 790 auto Fwrite = [&]() { 791 return Summary( 792 ArgTypes{ConstVoidPtrRestrictTy, Irrelevant, SizeTy, Irrelevant}, 793 RetType{SizeTy}, NoEvalCall) 794 .Case({ 795 ReturnValueCondition(LessThanOrEq, ArgNo(2)), 796 }) 797 .ArgConstraint(NotNull(ArgNo(0))); 798 }; 799 auto Getline = [&](RetType R, RangeInt Max) { 800 return Summary(ArgTypes{Irrelevant, Irrelevant, Irrelevant}, RetType{R}, 801 NoEvalCall) 802 .Case({ReturnValueCondition(WithinRange, {{-1, -1}, {1, Max}})}); 803 }; 804 805 // The isascii() family of functions. 806 // The behavior is undefined if the value of the argument is not 807 // representable as unsigned char or is not equal to EOF. See e.g. C99 808 // 7.4.1.2 The isalpha function (p: 181-182). 809 addToFunctionSummaryMap( 810 "isalnum", 811 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 812 // Boils down to isupper() or islower() or isdigit(). 813 .Case({ArgumentCondition(0U, WithinRange, 814 {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}}), 815 ReturnValueCondition(OutOfRange, SingleValue(0))}) 816 // The locale-specific range. 817 // No post-condition. We are completely unaware of 818 // locale-specific return values. 819 .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) 820 .Case( 821 {ArgumentCondition( 822 0U, OutOfRange, 823 {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}), 824 ReturnValueCondition(WithinRange, SingleValue(0))}) 825 .ArgConstraint(ArgumentCondition( 826 0U, WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}}))); 827 addToFunctionSummaryMap( 828 "isalpha", 829 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 830 .Case({ArgumentCondition(0U, WithinRange, {{'A', 'Z'}, {'a', 'z'}}), 831 ReturnValueCondition(OutOfRange, SingleValue(0))}) 832 // The locale-specific range. 833 .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) 834 .Case({ArgumentCondition( 835 0U, OutOfRange, 836 {{'A', 'Z'}, {'a', 'z'}, {128, UCharRangeMax}}), 837 ReturnValueCondition(WithinRange, SingleValue(0))})); 838 addToFunctionSummaryMap( 839 "isascii", 840 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 841 .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), 842 ReturnValueCondition(OutOfRange, SingleValue(0))}) 843 .Case({ArgumentCondition(0U, OutOfRange, Range(0, 127)), 844 ReturnValueCondition(WithinRange, SingleValue(0))})); 845 addToFunctionSummaryMap( 846 "isblank", 847 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 848 .Case({ArgumentCondition(0U, WithinRange, {{'\t', '\t'}, {' ', ' '}}), 849 ReturnValueCondition(OutOfRange, SingleValue(0))}) 850 .Case({ArgumentCondition(0U, OutOfRange, {{'\t', '\t'}, {' ', ' '}}), 851 ReturnValueCondition(WithinRange, SingleValue(0))})); 852 addToFunctionSummaryMap( 853 "iscntrl", 854 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 855 .Case({ArgumentCondition(0U, WithinRange, {{0, 32}, {127, 127}}), 856 ReturnValueCondition(OutOfRange, SingleValue(0))}) 857 .Case({ArgumentCondition(0U, OutOfRange, {{0, 32}, {127, 127}}), 858 ReturnValueCondition(WithinRange, SingleValue(0))})); 859 addToFunctionSummaryMap( 860 "isdigit", 861 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 862 .Case({ArgumentCondition(0U, WithinRange, Range('0', '9')), 863 ReturnValueCondition(OutOfRange, SingleValue(0))}) 864 .Case({ArgumentCondition(0U, OutOfRange, Range('0', '9')), 865 ReturnValueCondition(WithinRange, SingleValue(0))})); 866 addToFunctionSummaryMap( 867 "isgraph", 868 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 869 .Case({ArgumentCondition(0U, WithinRange, Range(33, 126)), 870 ReturnValueCondition(OutOfRange, SingleValue(0))}) 871 .Case({ArgumentCondition(0U, OutOfRange, Range(33, 126)), 872 ReturnValueCondition(WithinRange, SingleValue(0))})); 873 addToFunctionSummaryMap( 874 "islower", 875 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 876 // Is certainly lowercase. 877 .Case({ArgumentCondition(0U, WithinRange, Range('a', 'z')), 878 ReturnValueCondition(OutOfRange, SingleValue(0))}) 879 // Is ascii but not lowercase. 880 .Case({ArgumentCondition(0U, WithinRange, Range(0, 127)), 881 ArgumentCondition(0U, OutOfRange, Range('a', 'z')), 882 ReturnValueCondition(WithinRange, SingleValue(0))}) 883 // The locale-specific range. 884 .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) 885 // Is not an unsigned char. 886 .Case({ArgumentCondition(0U, OutOfRange, Range(0, UCharRangeMax)), 887 ReturnValueCondition(WithinRange, SingleValue(0))})); 888 addToFunctionSummaryMap( 889 "isprint", 890 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 891 .Case({ArgumentCondition(0U, WithinRange, Range(32, 126)), 892 ReturnValueCondition(OutOfRange, SingleValue(0))}) 893 .Case({ArgumentCondition(0U, OutOfRange, Range(32, 126)), 894 ReturnValueCondition(WithinRange, SingleValue(0))})); 895 addToFunctionSummaryMap( 896 "ispunct", 897 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 898 .Case({ArgumentCondition( 899 0U, WithinRange, 900 {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), 901 ReturnValueCondition(OutOfRange, SingleValue(0))}) 902 .Case({ArgumentCondition( 903 0U, OutOfRange, 904 {{'!', '/'}, {':', '@'}, {'[', '`'}, {'{', '~'}}), 905 ReturnValueCondition(WithinRange, SingleValue(0))})); 906 addToFunctionSummaryMap( 907 "isspace", 908 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 909 // Space, '\f', '\n', '\r', '\t', '\v'. 910 .Case({ArgumentCondition(0U, WithinRange, {{9, 13}, {' ', ' '}}), 911 ReturnValueCondition(OutOfRange, SingleValue(0))}) 912 // The locale-specific range. 913 .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) 914 .Case({ArgumentCondition(0U, OutOfRange, 915 {{9, 13}, {' ', ' '}, {128, UCharRangeMax}}), 916 ReturnValueCondition(WithinRange, SingleValue(0))})); 917 addToFunctionSummaryMap( 918 "isupper", 919 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 920 // Is certainly uppercase. 921 .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')), 922 ReturnValueCondition(OutOfRange, SingleValue(0))}) 923 // The locale-specific range. 924 .Case({ArgumentCondition(0U, WithinRange, {{128, UCharRangeMax}})}) 925 // Other. 926 .Case({ArgumentCondition(0U, OutOfRange, 927 {{'A', 'Z'}, {128, UCharRangeMax}}), 928 ReturnValueCondition(WithinRange, SingleValue(0))})); 929 addToFunctionSummaryMap( 930 "isxdigit", 931 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 932 .Case({ArgumentCondition(0U, WithinRange, 933 {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), 934 ReturnValueCondition(OutOfRange, SingleValue(0))}) 935 .Case({ArgumentCondition(0U, OutOfRange, 936 {{'0', '9'}, {'A', 'F'}, {'a', 'f'}}), 937 ReturnValueCondition(WithinRange, SingleValue(0))})); 938 939 // The getc() family of functions that returns either a char or an EOF. 940 addToFunctionSummaryMap("getc", Getc()); 941 addToFunctionSummaryMap("fgetc", Getc()); 942 addToFunctionSummaryMap( 943 "getchar", Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall) 944 .Case({ReturnValueCondition( 945 WithinRange, {{EOFv, EOFv}, {0, UCharRangeMax}})})); 946 947 // read()-like functions that never return more than buffer size. 948 // We are not sure how ssize_t is defined on every platform, so we 949 // provide three variants that should cover common cases. 950 addToFunctionSummaryMap("read", {Read(IntTy, IntMax), Read(LongTy, LongMax), 951 Read(LongLongTy, LongLongMax)}); 952 addToFunctionSummaryMap("write", {Read(IntTy, IntMax), Read(LongTy, LongMax), 953 Read(LongLongTy, LongLongMax)}); 954 addToFunctionSummaryMap("fread", Fread()); 955 addToFunctionSummaryMap("fwrite", Fwrite()); 956 // getline()-like functions either fail or read at least the delimiter. 957 addToFunctionSummaryMap("getline", 958 {Getline(IntTy, IntMax), Getline(LongTy, LongMax), 959 Getline(LongLongTy, LongLongMax)}); 960 addToFunctionSummaryMap("getdelim", 961 {Getline(IntTy, IntMax), Getline(LongTy, LongMax), 962 Getline(LongLongTy, LongLongMax)}); 963 964 // Functions for testing. 965 if (ChecksEnabled[CK_StdCLibraryFunctionsTesterChecker]) { 966 addToFunctionSummaryMap( 967 "__two_constrained_args", 968 Summary(ArgTypes{IntTy, IntTy}, RetType{IntTy}, EvalCallAsPure) 969 .ArgConstraint(ArgumentCondition(0U, WithinRange, SingleValue(1))) 970 .ArgConstraint(ArgumentCondition(1U, WithinRange, SingleValue(1)))); 971 addToFunctionSummaryMap( 972 "__arg_constrained_twice", 973 Summary(ArgTypes{IntTy}, RetType{IntTy}, EvalCallAsPure) 974 .ArgConstraint(ArgumentCondition(0U, OutOfRange, SingleValue(1))) 975 .ArgConstraint(ArgumentCondition(0U, OutOfRange, SingleValue(2)))); 976 addToFunctionSummaryMap( 977 "__defaultparam", 978 Summary(ArgTypes{Irrelevant, IntTy}, RetType{IntTy}, EvalCallAsPure) 979 .ArgConstraint(NotNull(ArgNo(0)))); 980 addToFunctionSummaryMap("__variadic", 981 Summary(ArgTypes{VoidPtrTy, ConstCharPtrTy}, 982 RetType{IntTy}, EvalCallAsPure) 983 .ArgConstraint(NotNull(ArgNo(0))) 984 .ArgConstraint(NotNull(ArgNo(1)))); 985 addToFunctionSummaryMap( 986 "__buf_size_arg_constraint", 987 Summary(ArgTypes{ConstVoidPtrTy, SizeTy}, RetType{IntTy}, 988 EvalCallAsPure) 989 .ArgConstraint( 990 BufferSize(/*Buffer=*/ArgNo(0), /*BufSize=*/ArgNo(1)))); 991 } 992 } 993 994 void ento::registerStdCLibraryFunctionsChecker(CheckerManager &mgr) { 995 auto *Checker = mgr.registerChecker<StdLibraryFunctionsChecker>(); 996 Checker->DisplayLoadedSummaries = 997 mgr.getAnalyzerOptions().getCheckerBooleanOption( 998 Checker, "DisplayLoadedSummaries"); 999 } 1000 1001 bool ento::shouldRegisterStdCLibraryFunctionsChecker(const CheckerManager &mgr) { 1002 return true; 1003 } 1004 1005 #define REGISTER_CHECKER(name) \ 1006 void ento::register##name(CheckerManager &mgr) { \ 1007 StdLibraryFunctionsChecker *checker = \ 1008 mgr.getChecker<StdLibraryFunctionsChecker>(); \ 1009 checker->ChecksEnabled[StdLibraryFunctionsChecker::CK_##name] = true; \ 1010 checker->CheckNames[StdLibraryFunctionsChecker::CK_##name] = \ 1011 mgr.getCurrentCheckerName(); \ 1012 } \ 1013 \ 1014 bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; } 1015 1016 REGISTER_CHECKER(StdCLibraryFunctionArgsChecker) 1017 REGISTER_CHECKER(StdCLibraryFunctionsTesterChecker) 1018