1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This defines CStringChecker, which is an assortment of checks on calls 11 // to functions in <string.h>. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ClangSACheckers.h" 16 #include "InterCheckerAPI.h" 17 #include "clang/Basic/CharInfo.h" 18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 19 #include "clang/StaticAnalyzer/Core/Checker.h" 20 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringSwitch.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 using namespace clang; 29 using namespace ento; 30 31 namespace { 32 class CStringChecker : public Checker< eval::Call, 33 check::PreStmt<DeclStmt>, 34 check::LiveSymbols, 35 check::DeadSymbols, 36 check::RegionChanges 37 > { 38 mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap, 39 BT_NotCString, BT_AdditionOverflow; 40 41 mutable const char *CurrentFunctionDescription; 42 43 public: 44 /// The filter is used to filter out the diagnostics which are not enabled by 45 /// the user. 46 struct CStringChecksFilter { 47 DefaultBool CheckCStringNullArg; 48 DefaultBool CheckCStringOutOfBounds; 49 DefaultBool CheckCStringBufferOverlap; 50 DefaultBool CheckCStringNotNullTerm; 51 52 CheckName CheckNameCStringNullArg; 53 CheckName CheckNameCStringOutOfBounds; 54 CheckName CheckNameCStringBufferOverlap; 55 CheckName CheckNameCStringNotNullTerm; 56 }; 57 58 CStringChecksFilter Filter; 59 60 static void *getTag() { static int tag; return &tag; } 61 62 bool evalCall(const CallExpr *CE, CheckerContext &C) const; 63 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; 64 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const; 65 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; 66 bool wantsRegionChangeUpdate(ProgramStateRef state) const; 67 68 ProgramStateRef 69 checkRegionChanges(ProgramStateRef state, 70 const InvalidatedSymbols *, 71 ArrayRef<const MemRegion *> ExplicitRegions, 72 ArrayRef<const MemRegion *> Regions, 73 const CallEvent *Call) const; 74 75 typedef void (CStringChecker::*FnCheck)(CheckerContext &, 76 const CallExpr *) const; 77 78 void evalMemcpy(CheckerContext &C, const CallExpr *CE) const; 79 void evalMempcpy(CheckerContext &C, const CallExpr *CE) const; 80 void evalMemmove(CheckerContext &C, const CallExpr *CE) const; 81 void evalBcopy(CheckerContext &C, const CallExpr *CE) const; 82 void evalCopyCommon(CheckerContext &C, const CallExpr *CE, 83 ProgramStateRef state, 84 const Expr *Size, 85 const Expr *Source, 86 const Expr *Dest, 87 bool Restricted = false, 88 bool IsMempcpy = false) const; 89 90 void evalMemcmp(CheckerContext &C, const CallExpr *CE) const; 91 92 void evalstrLength(CheckerContext &C, const CallExpr *CE) const; 93 void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; 94 void evalstrLengthCommon(CheckerContext &C, 95 const CallExpr *CE, 96 bool IsStrnlen = false) const; 97 98 void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; 99 void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; 100 void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; 101 void evalStrcpyCommon(CheckerContext &C, 102 const CallExpr *CE, 103 bool returnEnd, 104 bool isBounded, 105 bool isAppending) const; 106 107 void evalStrcat(CheckerContext &C, const CallExpr *CE) const; 108 void evalStrncat(CheckerContext &C, const CallExpr *CE) const; 109 110 void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; 111 void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; 112 void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; 113 void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; 114 void evalStrcmpCommon(CheckerContext &C, 115 const CallExpr *CE, 116 bool isBounded = false, 117 bool ignoreCase = false) const; 118 119 void evalStrsep(CheckerContext &C, const CallExpr *CE) const; 120 121 // Utility methods 122 std::pair<ProgramStateRef , ProgramStateRef > 123 static assumeZero(CheckerContext &C, 124 ProgramStateRef state, SVal V, QualType Ty); 125 126 static ProgramStateRef setCStringLength(ProgramStateRef state, 127 const MemRegion *MR, 128 SVal strLength); 129 static SVal getCStringLengthForRegion(CheckerContext &C, 130 ProgramStateRef &state, 131 const Expr *Ex, 132 const MemRegion *MR, 133 bool hypothetical); 134 SVal getCStringLength(CheckerContext &C, 135 ProgramStateRef &state, 136 const Expr *Ex, 137 SVal Buf, 138 bool hypothetical = false) const; 139 140 const StringLiteral *getCStringLiteral(CheckerContext &C, 141 ProgramStateRef &state, 142 const Expr *expr, 143 SVal val) const; 144 145 static ProgramStateRef InvalidateBuffer(CheckerContext &C, 146 ProgramStateRef state, 147 const Expr *Ex, SVal V, 148 bool IsSourceBuffer, 149 const Expr *Size); 150 151 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 152 const MemRegion *MR); 153 154 // Re-usable checks 155 ProgramStateRef checkNonNull(CheckerContext &C, 156 ProgramStateRef state, 157 const Expr *S, 158 SVal l) const; 159 ProgramStateRef CheckLocation(CheckerContext &C, 160 ProgramStateRef state, 161 const Expr *S, 162 SVal l, 163 const char *message = nullptr) const; 164 ProgramStateRef CheckBufferAccess(CheckerContext &C, 165 ProgramStateRef state, 166 const Expr *Size, 167 const Expr *FirstBuf, 168 const Expr *SecondBuf, 169 const char *firstMessage = nullptr, 170 const char *secondMessage = nullptr, 171 bool WarnAboutSize = false) const; 172 173 ProgramStateRef CheckBufferAccess(CheckerContext &C, 174 ProgramStateRef state, 175 const Expr *Size, 176 const Expr *Buf, 177 const char *message = nullptr, 178 bool WarnAboutSize = false) const { 179 // This is a convenience override. 180 return CheckBufferAccess(C, state, Size, Buf, nullptr, message, nullptr, 181 WarnAboutSize); 182 } 183 ProgramStateRef CheckOverlap(CheckerContext &C, 184 ProgramStateRef state, 185 const Expr *Size, 186 const Expr *First, 187 const Expr *Second) const; 188 void emitOverlapBug(CheckerContext &C, 189 ProgramStateRef state, 190 const Stmt *First, 191 const Stmt *Second) const; 192 193 ProgramStateRef checkAdditionOverflow(CheckerContext &C, 194 ProgramStateRef state, 195 NonLoc left, 196 NonLoc right) const; 197 198 // Return true if destination buffer of copy function is in bound. 199 // Expects SVal of Size to be positive and unsigned. 200 // Expects SVal of FirstBuf to be a FieldRegion. 201 static bool IsFirstBufInBound(CheckerContext &C, 202 ProgramStateRef state, 203 const Expr *FirstBuf, 204 const Expr *Size); 205 }; 206 207 } //end anonymous namespace 208 209 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal) 210 211 //===----------------------------------------------------------------------===// 212 // Individual checks and utility methods. 213 //===----------------------------------------------------------------------===// 214 215 std::pair<ProgramStateRef , ProgramStateRef > 216 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V, 217 QualType Ty) { 218 Optional<DefinedSVal> val = V.getAs<DefinedSVal>(); 219 if (!val) 220 return std::pair<ProgramStateRef , ProgramStateRef >(state, state); 221 222 SValBuilder &svalBuilder = C.getSValBuilder(); 223 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 224 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 225 } 226 227 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C, 228 ProgramStateRef state, 229 const Expr *S, SVal l) const { 230 // If a previous check has failed, propagate the failure. 231 if (!state) 232 return nullptr; 233 234 ProgramStateRef stateNull, stateNonNull; 235 std::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType()); 236 237 if (stateNull && !stateNonNull) { 238 if (!Filter.CheckCStringNullArg) 239 return nullptr; 240 241 ExplodedNode *N = C.generateSink(stateNull); 242 if (!N) 243 return nullptr; 244 245 if (!BT_Null) 246 BT_Null.reset(new BuiltinBug( 247 Filter.CheckNameCStringNullArg, categories::UnixAPI, 248 "Null pointer argument in call to byte string function")); 249 250 SmallString<80> buf; 251 llvm::raw_svector_ostream os(buf); 252 assert(CurrentFunctionDescription); 253 os << "Null pointer argument in call to " << CurrentFunctionDescription; 254 255 // Generate a report for this bug. 256 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get()); 257 auto report = llvm::make_unique<BugReport>(*BT, os.str(), N); 258 259 report->addRange(S->getSourceRange()); 260 bugreporter::trackNullOrUndefValue(N, S, *report); 261 C.emitReport(std::move(report)); 262 return nullptr; 263 } 264 265 // From here on, assume that the value is non-null. 266 assert(stateNonNull); 267 return stateNonNull; 268 } 269 270 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 271 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, 272 ProgramStateRef state, 273 const Expr *S, SVal l, 274 const char *warningMsg) const { 275 // If a previous check has failed, propagate the failure. 276 if (!state) 277 return nullptr; 278 279 // Check for out of bound array element access. 280 const MemRegion *R = l.getAsRegion(); 281 if (!R) 282 return state; 283 284 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 285 if (!ER) 286 return state; 287 288 assert(ER->getValueType() == C.getASTContext().CharTy && 289 "CheckLocation should only be called with char* ElementRegions"); 290 291 // Get the size of the array. 292 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 293 SValBuilder &svalBuilder = C.getSValBuilder(); 294 SVal Extent = 295 svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); 296 DefinedOrUnknownSVal Size = Extent.castAs<DefinedOrUnknownSVal>(); 297 298 // Get the index of the accessed element. 299 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); 300 301 ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true); 302 ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false); 303 if (StOutBound && !StInBound) { 304 ExplodedNode *N = C.generateSink(StOutBound); 305 if (!N) 306 return nullptr; 307 308 if (!BT_Bounds) { 309 BT_Bounds.reset(new BuiltinBug( 310 Filter.CheckNameCStringOutOfBounds, "Out-of-bound array access", 311 "Byte string function accesses out-of-bound array element")); 312 } 313 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get()); 314 315 // Generate a report for this bug. 316 std::unique_ptr<BugReport> report; 317 if (warningMsg) { 318 report = llvm::make_unique<BugReport>(*BT, warningMsg, N); 319 } else { 320 assert(CurrentFunctionDescription); 321 assert(CurrentFunctionDescription[0] != '\0'); 322 323 SmallString<80> buf; 324 llvm::raw_svector_ostream os(buf); 325 os << toUppercase(CurrentFunctionDescription[0]) 326 << &CurrentFunctionDescription[1] 327 << " accesses out-of-bound array element"; 328 report = llvm::make_unique<BugReport>(*BT, os.str(), N); 329 } 330 331 // FIXME: It would be nice to eventually make this diagnostic more clear, 332 // e.g., by referencing the original declaration or by saying *why* this 333 // reference is outside the range. 334 335 report->addRange(S->getSourceRange()); 336 C.emitReport(std::move(report)); 337 return nullptr; 338 } 339 340 // Array bound check succeeded. From this point forward the array bound 341 // should always succeed. 342 return StInBound; 343 } 344 345 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C, 346 ProgramStateRef state, 347 const Expr *Size, 348 const Expr *FirstBuf, 349 const Expr *SecondBuf, 350 const char *firstMessage, 351 const char *secondMessage, 352 bool WarnAboutSize) const { 353 // If a previous check has failed, propagate the failure. 354 if (!state) 355 return nullptr; 356 357 SValBuilder &svalBuilder = C.getSValBuilder(); 358 ASTContext &Ctx = svalBuilder.getContext(); 359 const LocationContext *LCtx = C.getLocationContext(); 360 361 QualType sizeTy = Size->getType(); 362 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 363 364 // Check that the first buffer is non-null. 365 SVal BufVal = state->getSVal(FirstBuf, LCtx); 366 state = checkNonNull(C, state, FirstBuf, BufVal); 367 if (!state) 368 return nullptr; 369 370 // If out-of-bounds checking is turned off, skip the rest. 371 if (!Filter.CheckCStringOutOfBounds) 372 return state; 373 374 // Get the access length and make sure it is known. 375 // FIXME: This assumes the caller has already checked that the access length 376 // is positive. And that it's unsigned. 377 SVal LengthVal = state->getSVal(Size, LCtx); 378 Optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 379 if (!Length) 380 return state; 381 382 // Compute the offset of the last element to be accessed: size-1. 383 NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); 384 NonLoc LastOffset = svalBuilder 385 .evalBinOpNN(state, BO_Sub, *Length, One, sizeTy).castAs<NonLoc>(); 386 387 // Check that the first buffer is sufficiently long. 388 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 389 if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) { 390 const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf); 391 392 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 393 LastOffset, PtrTy); 394 state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage); 395 396 // If the buffer isn't large enough, abort. 397 if (!state) 398 return nullptr; 399 } 400 401 // If there's a second buffer, check it as well. 402 if (SecondBuf) { 403 BufVal = state->getSVal(SecondBuf, LCtx); 404 state = checkNonNull(C, state, SecondBuf, BufVal); 405 if (!state) 406 return nullptr; 407 408 BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); 409 if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) { 410 const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf); 411 412 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 413 LastOffset, PtrTy); 414 state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage); 415 } 416 } 417 418 // Large enough or not, return this state! 419 return state; 420 } 421 422 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, 423 ProgramStateRef state, 424 const Expr *Size, 425 const Expr *First, 426 const Expr *Second) const { 427 if (!Filter.CheckCStringBufferOverlap) 428 return state; 429 430 // Do a simple check for overlap: if the two arguments are from the same 431 // buffer, see if the end of the first is greater than the start of the second 432 // or vice versa. 433 434 // If a previous check has failed, propagate the failure. 435 if (!state) 436 return nullptr; 437 438 ProgramStateRef stateTrue, stateFalse; 439 440 // Get the buffer values and make sure they're known locations. 441 const LocationContext *LCtx = C.getLocationContext(); 442 SVal firstVal = state->getSVal(First, LCtx); 443 SVal secondVal = state->getSVal(Second, LCtx); 444 445 Optional<Loc> firstLoc = firstVal.getAs<Loc>(); 446 if (!firstLoc) 447 return state; 448 449 Optional<Loc> secondLoc = secondVal.getAs<Loc>(); 450 if (!secondLoc) 451 return state; 452 453 // Are the two values the same? 454 SValBuilder &svalBuilder = C.getSValBuilder(); 455 std::tie(stateTrue, stateFalse) = 456 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 457 458 if (stateTrue && !stateFalse) { 459 // If the values are known to be equal, that's automatically an overlap. 460 emitOverlapBug(C, stateTrue, First, Second); 461 return nullptr; 462 } 463 464 // assume the two expressions are not equal. 465 assert(stateFalse); 466 state = stateFalse; 467 468 // Which value comes first? 469 QualType cmpTy = svalBuilder.getConditionType(); 470 SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, 471 *firstLoc, *secondLoc, cmpTy); 472 Optional<DefinedOrUnknownSVal> reverseTest = 473 reverse.getAs<DefinedOrUnknownSVal>(); 474 if (!reverseTest) 475 return state; 476 477 std::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 478 if (stateTrue) { 479 if (stateFalse) { 480 // If we don't know which one comes first, we can't perform this test. 481 return state; 482 } else { 483 // Switch the values so that firstVal is before secondVal. 484 std::swap(firstLoc, secondLoc); 485 486 // Switch the Exprs as well, so that they still correspond. 487 std::swap(First, Second); 488 } 489 } 490 491 // Get the length, and make sure it too is known. 492 SVal LengthVal = state->getSVal(Size, LCtx); 493 Optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 494 if (!Length) 495 return state; 496 497 // Convert the first buffer's start address to char*. 498 // Bail out if the cast fails. 499 ASTContext &Ctx = svalBuilder.getContext(); 500 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 501 SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, 502 First->getType()); 503 Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>(); 504 if (!FirstStartLoc) 505 return state; 506 507 // Compute the end of the first buffer. Bail out if THAT fails. 508 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, 509 *FirstStartLoc, *Length, CharPtrTy); 510 Optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>(); 511 if (!FirstEndLoc) 512 return state; 513 514 // Is the end of the first buffer past the start of the second buffer? 515 SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT, 516 *FirstEndLoc, *secondLoc, cmpTy); 517 Optional<DefinedOrUnknownSVal> OverlapTest = 518 Overlap.getAs<DefinedOrUnknownSVal>(); 519 if (!OverlapTest) 520 return state; 521 522 std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 523 524 if (stateTrue && !stateFalse) { 525 // Overlap! 526 emitOverlapBug(C, stateTrue, First, Second); 527 return nullptr; 528 } 529 530 // assume the two expressions don't overlap. 531 assert(stateFalse); 532 return stateFalse; 533 } 534 535 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state, 536 const Stmt *First, const Stmt *Second) const { 537 ExplodedNode *N = C.generateSink(state); 538 if (!N) 539 return; 540 541 if (!BT_Overlap) 542 BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap, 543 categories::UnixAPI, "Improper arguments")); 544 545 // Generate a report for this bug. 546 auto report = llvm::make_unique<BugReport>( 547 *BT_Overlap, "Arguments must not be overlapping buffers", N); 548 report->addRange(First->getSourceRange()); 549 report->addRange(Second->getSourceRange()); 550 551 C.emitReport(std::move(report)); 552 } 553 554 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C, 555 ProgramStateRef state, 556 NonLoc left, 557 NonLoc right) const { 558 // If out-of-bounds checking is turned off, skip the rest. 559 if (!Filter.CheckCStringOutOfBounds) 560 return state; 561 562 // If a previous check has failed, propagate the failure. 563 if (!state) 564 return nullptr; 565 566 SValBuilder &svalBuilder = C.getSValBuilder(); 567 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 568 569 QualType sizeTy = svalBuilder.getContext().getSizeType(); 570 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 571 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt); 572 573 SVal maxMinusRight; 574 if (right.getAs<nonloc::ConcreteInt>()) { 575 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right, 576 sizeTy); 577 } else { 578 // Try switching the operands. (The order of these two assignments is 579 // important!) 580 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, 581 sizeTy); 582 left = right; 583 } 584 585 if (Optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) { 586 QualType cmpTy = svalBuilder.getConditionType(); 587 // If left > max - right, we have an overflow. 588 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, 589 *maxMinusRightNL, cmpTy); 590 591 ProgramStateRef stateOverflow, stateOkay; 592 std::tie(stateOverflow, stateOkay) = 593 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>()); 594 595 if (stateOverflow && !stateOkay) { 596 // We have an overflow. Emit a bug report. 597 ExplodedNode *N = C.generateSink(stateOverflow); 598 if (!N) 599 return nullptr; 600 601 if (!BT_AdditionOverflow) 602 BT_AdditionOverflow.reset( 603 new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API", 604 "Sum of expressions causes overflow")); 605 606 // This isn't a great error message, but this should never occur in real 607 // code anyway -- you'd have to create a buffer longer than a size_t can 608 // represent, which is sort of a contradiction. 609 const char *warning = 610 "This expression will create a string whose length is too big to " 611 "be represented as a size_t"; 612 613 // Generate a report for this bug. 614 C.emitReport( 615 llvm::make_unique<BugReport>(*BT_AdditionOverflow, warning, N)); 616 617 return nullptr; 618 } 619 620 // From now on, assume an overflow didn't occur. 621 assert(stateOkay); 622 state = stateOkay; 623 } 624 625 return state; 626 } 627 628 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state, 629 const MemRegion *MR, 630 SVal strLength) { 631 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 632 633 MR = MR->StripCasts(); 634 635 switch (MR->getKind()) { 636 case MemRegion::StringRegionKind: 637 // FIXME: This can happen if we strcpy() into a string region. This is 638 // undefined [C99 6.4.5p6], but we should still warn about it. 639 return state; 640 641 case MemRegion::SymbolicRegionKind: 642 case MemRegion::AllocaRegionKind: 643 case MemRegion::VarRegionKind: 644 case MemRegion::FieldRegionKind: 645 case MemRegion::ObjCIvarRegionKind: 646 // These are the types we can currently track string lengths for. 647 break; 648 649 case MemRegion::ElementRegionKind: 650 // FIXME: Handle element regions by upper-bounding the parent region's 651 // string length. 652 return state; 653 654 default: 655 // Other regions (mostly non-data) can't have a reliable C string length. 656 // For now, just ignore the change. 657 // FIXME: These are rare but not impossible. We should output some kind of 658 // warning for things like strcpy((char[]){'a', 0}, "b"); 659 return state; 660 } 661 662 if (strLength.isUnknown()) 663 return state->remove<CStringLength>(MR); 664 665 return state->set<CStringLength>(MR, strLength); 666 } 667 668 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 669 ProgramStateRef &state, 670 const Expr *Ex, 671 const MemRegion *MR, 672 bool hypothetical) { 673 if (!hypothetical) { 674 // If there's a recorded length, go ahead and return it. 675 const SVal *Recorded = state->get<CStringLength>(MR); 676 if (Recorded) 677 return *Recorded; 678 } 679 680 // Otherwise, get a new symbol and update the state. 681 SValBuilder &svalBuilder = C.getSValBuilder(); 682 QualType sizeTy = svalBuilder.getContext().getSizeType(); 683 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), 684 MR, Ex, sizeTy, 685 C.blockCount()); 686 687 if (!hypothetical) { 688 if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) { 689 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4 690 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 691 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 692 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4); 693 const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt, 694 fourInt); 695 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt); 696 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, 697 maxLength, sizeTy); 698 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true); 699 } 700 state = state->set<CStringLength>(MR, strLength); 701 } 702 703 return strLength; 704 } 705 706 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, 707 const Expr *Ex, SVal Buf, 708 bool hypothetical) const { 709 const MemRegion *MR = Buf.getAsRegion(); 710 if (!MR) { 711 // If we can't get a region, see if it's something we /know/ isn't a 712 // C string. In the context of locations, the only time we can issue such 713 // a warning is for labels. 714 if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) { 715 if (!Filter.CheckCStringNotNullTerm) 716 return UndefinedVal(); 717 718 if (ExplodedNode *N = C.addTransition(state)) { 719 if (!BT_NotCString) 720 BT_NotCString.reset(new BuiltinBug( 721 Filter.CheckNameCStringNotNullTerm, categories::UnixAPI, 722 "Argument is not a null-terminated string.")); 723 724 SmallString<120> buf; 725 llvm::raw_svector_ostream os(buf); 726 assert(CurrentFunctionDescription); 727 os << "Argument to " << CurrentFunctionDescription 728 << " is the address of the label '" << Label->getLabel()->getName() 729 << "', which is not a null-terminated string"; 730 731 // Generate a report for this bug. 732 auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N); 733 734 report->addRange(Ex->getSourceRange()); 735 C.emitReport(std::move(report)); 736 } 737 return UndefinedVal(); 738 739 } 740 741 // If it's not a region and not a label, give up. 742 return UnknownVal(); 743 } 744 745 // If we have a region, strip casts from it and see if we can figure out 746 // its length. For anything we can't figure out, just return UnknownVal. 747 MR = MR->StripCasts(); 748 749 switch (MR->getKind()) { 750 case MemRegion::StringRegionKind: { 751 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 752 // so we can assume that the byte length is the correct C string length. 753 SValBuilder &svalBuilder = C.getSValBuilder(); 754 QualType sizeTy = svalBuilder.getContext().getSizeType(); 755 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 756 return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); 757 } 758 case MemRegion::SymbolicRegionKind: 759 case MemRegion::AllocaRegionKind: 760 case MemRegion::VarRegionKind: 761 case MemRegion::FieldRegionKind: 762 case MemRegion::ObjCIvarRegionKind: 763 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); 764 case MemRegion::CompoundLiteralRegionKind: 765 // FIXME: Can we track this? Is it necessary? 766 return UnknownVal(); 767 case MemRegion::ElementRegionKind: 768 // FIXME: How can we handle this? It's not good enough to subtract the 769 // offset from the base string length; consider "123\x00567" and &a[5]. 770 return UnknownVal(); 771 default: 772 // Other regions (mostly non-data) can't have a reliable C string length. 773 // In this case, an error is emitted and UndefinedVal is returned. 774 // The caller should always be prepared to handle this case. 775 if (!Filter.CheckCStringNotNullTerm) 776 return UndefinedVal(); 777 778 if (ExplodedNode *N = C.addTransition(state)) { 779 if (!BT_NotCString) 780 BT_NotCString.reset(new BuiltinBug( 781 Filter.CheckNameCStringNotNullTerm, categories::UnixAPI, 782 "Argument is not a null-terminated string.")); 783 784 SmallString<120> buf; 785 llvm::raw_svector_ostream os(buf); 786 787 assert(CurrentFunctionDescription); 788 os << "Argument to " << CurrentFunctionDescription << " is "; 789 790 if (SummarizeRegion(os, C.getASTContext(), MR)) 791 os << ", which is not a null-terminated string"; 792 else 793 os << "not a null-terminated string"; 794 795 // Generate a report for this bug. 796 auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N); 797 798 report->addRange(Ex->getSourceRange()); 799 C.emitReport(std::move(report)); 800 } 801 802 return UndefinedVal(); 803 } 804 } 805 806 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, 807 ProgramStateRef &state, const Expr *expr, SVal val) const { 808 809 // Get the memory region pointed to by the val. 810 const MemRegion *bufRegion = val.getAsRegion(); 811 if (!bufRegion) 812 return nullptr; 813 814 // Strip casts off the memory region. 815 bufRegion = bufRegion->StripCasts(); 816 817 // Cast the memory region to a string region. 818 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion); 819 if (!strRegion) 820 return nullptr; 821 822 // Return the actual string in the string region. 823 return strRegion->getStringLiteral(); 824 } 825 826 bool CStringChecker::IsFirstBufInBound(CheckerContext &C, 827 ProgramStateRef state, 828 const Expr *FirstBuf, 829 const Expr *Size) { 830 831 // Originally copied from CheckBufferAccess and CheckLocation. 832 SValBuilder &svalBuilder = C.getSValBuilder(); 833 ASTContext &Ctx = svalBuilder.getContext(); 834 const LocationContext *LCtx = C.getLocationContext(); 835 836 QualType sizeTy = Size->getType(); 837 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 838 SVal BufVal = state->getSVal(FirstBuf, LCtx); 839 840 SVal LengthVal = state->getSVal(Size, LCtx); 841 // Cast is safe as the size argument to copy functions are of integral type. 842 NonLoc Length = LengthVal.castAs<NonLoc>(); 843 844 // Compute the offset of the last element to be accessed: size-1. 845 NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); 846 NonLoc LastOffset = 847 svalBuilder.evalBinOpNN(state, BO_Sub, Length, One, sizeTy) 848 .castAs<NonLoc>(); 849 850 // Check that the first buffer is sufficiently long. 851 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 852 // Cast is safe as caller checks BufVal is a MemRegionVal. 853 Loc BufLoc = BufStart.castAs<Loc>(); 854 855 SVal BufEnd = 856 svalBuilder.evalBinOpLN(state, BO_Add, BufLoc, LastOffset, PtrTy); 857 858 // Check for out of bound array element access. 859 const MemRegion *R = BufEnd.getAsRegion(); 860 // BufStart is a MemRegionVal so BufEnd should be one too. 861 assert(R && "BufEnd should be a MemRegion"); 862 863 // Cast is safe as BufVal's region is a FieldRegion. 864 const ElementRegion *ER = cast<ElementRegion>(R); 865 866 assert(ER->getValueType() == C.getASTContext().CharTy && 867 "IsFirstBufInBound should only be called with char* ElementRegions"); 868 869 // Get the size of the array. 870 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 871 SVal Extent = 872 svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); 873 DefinedOrUnknownSVal ExtentSize = Extent.castAs<DefinedOrUnknownSVal>(); 874 875 // Get the index of the accessed element. 876 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); 877 878 ProgramStateRef StInBound = state->assumeInBound(Idx, ExtentSize, true); 879 880 return static_cast<bool>(StInBound); 881 } 882 883 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C, 884 ProgramStateRef state, 885 const Expr *E, SVal V, 886 bool IsSourceBuffer, 887 const Expr *Size) { 888 Optional<Loc> L = V.getAs<Loc>(); 889 if (!L) 890 return state; 891 892 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 893 // some assumptions about the value that CFRefCount can't. Even so, it should 894 // probably be refactored. 895 if (Optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) { 896 const MemRegion *R = MR->getRegion()->StripCasts(); 897 898 // Are we dealing with an ElementRegion? If so, we should be invalidating 899 // the super-region. 900 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 901 R = ER->getSuperRegion(); 902 // FIXME: What about layers of ElementRegions? 903 } 904 905 // Invalidate this region. 906 const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); 907 908 bool CausesPointerEscape = false; 909 RegionAndSymbolInvalidationTraits ITraits; 910 // Invalidate and escape only indirect regions accessible through the source 911 // buffer. 912 if (IsSourceBuffer) { 913 ITraits.setTrait(R, 914 RegionAndSymbolInvalidationTraits::TK_PreserveContents); 915 ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape); 916 CausesPointerEscape = true; 917 } else { 918 const MemRegion::Kind& K = R->getKind(); 919 if (K == MemRegion::FieldRegionKind) 920 if (Size && IsFirstBufInBound(C, state, E, Size)) { 921 // If destination buffer is a field region and access is in bound, 922 // do not invalidate its super region. 923 ITraits.setTrait( 924 R, 925 RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); 926 } 927 } 928 929 return state->invalidateRegions(R, E, C.blockCount(), LCtx, 930 CausesPointerEscape, nullptr, nullptr, 931 &ITraits); 932 } 933 934 // If we have a non-region value by chance, just remove the binding. 935 // FIXME: is this necessary or correct? This handles the non-Region 936 // cases. Is it ever valid to store to these? 937 return state->killBinding(*L); 938 } 939 940 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 941 const MemRegion *MR) { 942 const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR); 943 944 switch (MR->getKind()) { 945 case MemRegion::FunctionTextRegionKind: { 946 const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl(); 947 if (FD) 948 os << "the address of the function '" << *FD << '\''; 949 else 950 os << "the address of a function"; 951 return true; 952 } 953 case MemRegion::BlockTextRegionKind: 954 os << "block text"; 955 return true; 956 case MemRegion::BlockDataRegionKind: 957 os << "a block"; 958 return true; 959 case MemRegion::CXXThisRegionKind: 960 case MemRegion::CXXTempObjectRegionKind: 961 os << "a C++ temp object of type " << TVR->getValueType().getAsString(); 962 return true; 963 case MemRegion::VarRegionKind: 964 os << "a variable of type" << TVR->getValueType().getAsString(); 965 return true; 966 case MemRegion::FieldRegionKind: 967 os << "a field of type " << TVR->getValueType().getAsString(); 968 return true; 969 case MemRegion::ObjCIvarRegionKind: 970 os << "an instance variable of type " << TVR->getValueType().getAsString(); 971 return true; 972 default: 973 return false; 974 } 975 } 976 977 //===----------------------------------------------------------------------===// 978 // evaluation of individual function calls. 979 //===----------------------------------------------------------------------===// 980 981 void CStringChecker::evalCopyCommon(CheckerContext &C, 982 const CallExpr *CE, 983 ProgramStateRef state, 984 const Expr *Size, const Expr *Dest, 985 const Expr *Source, bool Restricted, 986 bool IsMempcpy) const { 987 CurrentFunctionDescription = "memory copy function"; 988 989 // See if the size argument is zero. 990 const LocationContext *LCtx = C.getLocationContext(); 991 SVal sizeVal = state->getSVal(Size, LCtx); 992 QualType sizeTy = Size->getType(); 993 994 ProgramStateRef stateZeroSize, stateNonZeroSize; 995 std::tie(stateZeroSize, stateNonZeroSize) = 996 assumeZero(C, state, sizeVal, sizeTy); 997 998 // Get the value of the Dest. 999 SVal destVal = state->getSVal(Dest, LCtx); 1000 1001 // If the size is zero, there won't be any actual memory access, so 1002 // just bind the return value to the destination buffer and return. 1003 if (stateZeroSize && !stateNonZeroSize) { 1004 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal); 1005 C.addTransition(stateZeroSize); 1006 return; 1007 } 1008 1009 // If the size can be nonzero, we have to check the other arguments. 1010 if (stateNonZeroSize) { 1011 state = stateNonZeroSize; 1012 1013 // Ensure the destination is not null. If it is NULL there will be a 1014 // NULL pointer dereference. 1015 state = checkNonNull(C, state, Dest, destVal); 1016 if (!state) 1017 return; 1018 1019 // Get the value of the Src. 1020 SVal srcVal = state->getSVal(Source, LCtx); 1021 1022 // Ensure the source is not null. If it is NULL there will be a 1023 // NULL pointer dereference. 1024 state = checkNonNull(C, state, Source, srcVal); 1025 if (!state) 1026 return; 1027 1028 // Ensure the accesses are valid and that the buffers do not overlap. 1029 const char * const writeWarning = 1030 "Memory copy function overflows destination buffer"; 1031 state = CheckBufferAccess(C, state, Size, Dest, Source, 1032 writeWarning, /* sourceWarning = */ nullptr); 1033 if (Restricted) 1034 state = CheckOverlap(C, state, Size, Dest, Source); 1035 1036 if (!state) 1037 return; 1038 1039 // If this is mempcpy, get the byte after the last byte copied and 1040 // bind the expr. 1041 if (IsMempcpy) { 1042 loc::MemRegionVal destRegVal = destVal.castAs<loc::MemRegionVal>(); 1043 1044 // Get the length to copy. 1045 if (Optional<NonLoc> lenValNonLoc = sizeVal.getAs<NonLoc>()) { 1046 // Get the byte after the last byte copied. 1047 SValBuilder &SvalBuilder = C.getSValBuilder(); 1048 ASTContext &Ctx = SvalBuilder.getContext(); 1049 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 1050 loc::MemRegionVal DestRegCharVal = SvalBuilder.evalCast(destRegVal, 1051 CharPtrTy, Dest->getType()).castAs<loc::MemRegionVal>(); 1052 SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add, 1053 DestRegCharVal, 1054 *lenValNonLoc, 1055 Dest->getType()); 1056 1057 // The byte after the last byte copied is the return value. 1058 state = state->BindExpr(CE, LCtx, lastElement); 1059 } else { 1060 // If we don't know how much we copied, we can at least 1061 // conjure a return value for later. 1062 SVal result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1063 C.blockCount()); 1064 state = state->BindExpr(CE, LCtx, result); 1065 } 1066 1067 } else { 1068 // All other copies return the destination buffer. 1069 // (Well, bcopy() has a void return type, but this won't hurt.) 1070 state = state->BindExpr(CE, LCtx, destVal); 1071 } 1072 1073 // Invalidate the destination (regular invalidation without pointer-escaping 1074 // the address of the top-level region). 1075 // FIXME: Even if we can't perfectly model the copy, we should see if we 1076 // can use LazyCompoundVals to copy the source values into the destination. 1077 // This would probably remove any existing bindings past the end of the 1078 // copied region, but that's still an improvement over blank invalidation. 1079 state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest), 1080 /*IsSourceBuffer*/false, Size); 1081 1082 // Invalidate the source (const-invalidation without const-pointer-escaping 1083 // the address of the top-level region). 1084 state = InvalidateBuffer(C, state, Source, C.getSVal(Source), 1085 /*IsSourceBuffer*/true, nullptr); 1086 1087 C.addTransition(state); 1088 } 1089 } 1090 1091 1092 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const { 1093 if (CE->getNumArgs() < 3) 1094 return; 1095 1096 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 1097 // The return value is the address of the destination buffer. 1098 const Expr *Dest = CE->getArg(0); 1099 ProgramStateRef state = C.getState(); 1100 1101 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true); 1102 } 1103 1104 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const { 1105 if (CE->getNumArgs() < 3) 1106 return; 1107 1108 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n); 1109 // The return value is a pointer to the byte following the last written byte. 1110 const Expr *Dest = CE->getArg(0); 1111 ProgramStateRef state = C.getState(); 1112 1113 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true); 1114 } 1115 1116 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const { 1117 if (CE->getNumArgs() < 3) 1118 return; 1119 1120 // void *memmove(void *dst, const void *src, size_t n); 1121 // The return value is the address of the destination buffer. 1122 const Expr *Dest = CE->getArg(0); 1123 ProgramStateRef state = C.getState(); 1124 1125 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1)); 1126 } 1127 1128 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { 1129 if (CE->getNumArgs() < 3) 1130 return; 1131 1132 // void bcopy(const void *src, void *dst, size_t n); 1133 evalCopyCommon(C, CE, C.getState(), 1134 CE->getArg(2), CE->getArg(1), CE->getArg(0)); 1135 } 1136 1137 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const { 1138 if (CE->getNumArgs() < 3) 1139 return; 1140 1141 // int memcmp(const void *s1, const void *s2, size_t n); 1142 CurrentFunctionDescription = "memory comparison function"; 1143 1144 const Expr *Left = CE->getArg(0); 1145 const Expr *Right = CE->getArg(1); 1146 const Expr *Size = CE->getArg(2); 1147 1148 ProgramStateRef state = C.getState(); 1149 SValBuilder &svalBuilder = C.getSValBuilder(); 1150 1151 // See if the size argument is zero. 1152 const LocationContext *LCtx = C.getLocationContext(); 1153 SVal sizeVal = state->getSVal(Size, LCtx); 1154 QualType sizeTy = Size->getType(); 1155 1156 ProgramStateRef stateZeroSize, stateNonZeroSize; 1157 std::tie(stateZeroSize, stateNonZeroSize) = 1158 assumeZero(C, state, sizeVal, sizeTy); 1159 1160 // If the size can be zero, the result will be 0 in that case, and we don't 1161 // have to check either of the buffers. 1162 if (stateZeroSize) { 1163 state = stateZeroSize; 1164 state = state->BindExpr(CE, LCtx, 1165 svalBuilder.makeZeroVal(CE->getType())); 1166 C.addTransition(state); 1167 } 1168 1169 // If the size can be nonzero, we have to check the other arguments. 1170 if (stateNonZeroSize) { 1171 state = stateNonZeroSize; 1172 // If we know the two buffers are the same, we know the result is 0. 1173 // First, get the two buffers' addresses. Another checker will have already 1174 // made sure they're not undefined. 1175 DefinedOrUnknownSVal LV = 1176 state->getSVal(Left, LCtx).castAs<DefinedOrUnknownSVal>(); 1177 DefinedOrUnknownSVal RV = 1178 state->getSVal(Right, LCtx).castAs<DefinedOrUnknownSVal>(); 1179 1180 // See if they are the same. 1181 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 1182 ProgramStateRef StSameBuf, StNotSameBuf; 1183 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 1184 1185 // If the two arguments might be the same buffer, we know the result is 0, 1186 // and we only need to check one size. 1187 if (StSameBuf) { 1188 state = StSameBuf; 1189 state = CheckBufferAccess(C, state, Size, Left); 1190 if (state) { 1191 state = StSameBuf->BindExpr(CE, LCtx, 1192 svalBuilder.makeZeroVal(CE->getType())); 1193 C.addTransition(state); 1194 } 1195 } 1196 1197 // If the two arguments might be different buffers, we have to check the 1198 // size of both of them. 1199 if (StNotSameBuf) { 1200 state = StNotSameBuf; 1201 state = CheckBufferAccess(C, state, Size, Left, Right); 1202 if (state) { 1203 // The return value is the comparison result, which we don't know. 1204 SVal CmpV = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, 1205 C.blockCount()); 1206 state = state->BindExpr(CE, LCtx, CmpV); 1207 C.addTransition(state); 1208 } 1209 } 1210 } 1211 } 1212 1213 void CStringChecker::evalstrLength(CheckerContext &C, 1214 const CallExpr *CE) const { 1215 if (CE->getNumArgs() < 1) 1216 return; 1217 1218 // size_t strlen(const char *s); 1219 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); 1220 } 1221 1222 void CStringChecker::evalstrnLength(CheckerContext &C, 1223 const CallExpr *CE) const { 1224 if (CE->getNumArgs() < 2) 1225 return; 1226 1227 // size_t strnlen(const char *s, size_t maxlen); 1228 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); 1229 } 1230 1231 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 1232 bool IsStrnlen) const { 1233 CurrentFunctionDescription = "string length function"; 1234 ProgramStateRef state = C.getState(); 1235 const LocationContext *LCtx = C.getLocationContext(); 1236 1237 if (IsStrnlen) { 1238 const Expr *maxlenExpr = CE->getArg(1); 1239 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 1240 1241 ProgramStateRef stateZeroSize, stateNonZeroSize; 1242 std::tie(stateZeroSize, stateNonZeroSize) = 1243 assumeZero(C, state, maxlenVal, maxlenExpr->getType()); 1244 1245 // If the size can be zero, the result will be 0 in that case, and we don't 1246 // have to check the string itself. 1247 if (stateZeroSize) { 1248 SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); 1249 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero); 1250 C.addTransition(stateZeroSize); 1251 } 1252 1253 // If the size is GUARANTEED to be zero, we're done! 1254 if (!stateNonZeroSize) 1255 return; 1256 1257 // Otherwise, record the assumption that the size is nonzero. 1258 state = stateNonZeroSize; 1259 } 1260 1261 // Check that the string argument is non-null. 1262 const Expr *Arg = CE->getArg(0); 1263 SVal ArgVal = state->getSVal(Arg, LCtx); 1264 1265 state = checkNonNull(C, state, Arg, ArgVal); 1266 1267 if (!state) 1268 return; 1269 1270 SVal strLength = getCStringLength(C, state, Arg, ArgVal); 1271 1272 // If the argument isn't a valid C string, there's no valid state to 1273 // transition to. 1274 if (strLength.isUndef()) 1275 return; 1276 1277 DefinedOrUnknownSVal result = UnknownVal(); 1278 1279 // If the check is for strnlen() then bind the return value to no more than 1280 // the maxlen value. 1281 if (IsStrnlen) { 1282 QualType cmpTy = C.getSValBuilder().getConditionType(); 1283 1284 // It's a little unfortunate to be getting this again, 1285 // but it's not that expensive... 1286 const Expr *maxlenExpr = CE->getArg(1); 1287 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 1288 1289 Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); 1290 Optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>(); 1291 1292 if (strLengthNL && maxlenValNL) { 1293 ProgramStateRef stateStringTooLong, stateStringNotTooLong; 1294 1295 // Check if the strLength is greater than the maxlen. 1296 std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume( 1297 C.getSValBuilder() 1298 .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy) 1299 .castAs<DefinedOrUnknownSVal>()); 1300 1301 if (stateStringTooLong && !stateStringNotTooLong) { 1302 // If the string is longer than maxlen, return maxlen. 1303 result = *maxlenValNL; 1304 } else if (stateStringNotTooLong && !stateStringTooLong) { 1305 // If the string is shorter than maxlen, return its length. 1306 result = *strLengthNL; 1307 } 1308 } 1309 1310 if (result.isUnknown()) { 1311 // If we don't have enough information for a comparison, there's 1312 // no guarantee the full string length will actually be returned. 1313 // All we know is the return value is the min of the string length 1314 // and the limit. This is better than nothing. 1315 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1316 C.blockCount()); 1317 NonLoc resultNL = result.castAs<NonLoc>(); 1318 1319 if (strLengthNL) { 1320 state = state->assume(C.getSValBuilder().evalBinOpNN( 1321 state, BO_LE, resultNL, *strLengthNL, cmpTy) 1322 .castAs<DefinedOrUnknownSVal>(), true); 1323 } 1324 1325 if (maxlenValNL) { 1326 state = state->assume(C.getSValBuilder().evalBinOpNN( 1327 state, BO_LE, resultNL, *maxlenValNL, cmpTy) 1328 .castAs<DefinedOrUnknownSVal>(), true); 1329 } 1330 } 1331 1332 } else { 1333 // This is a plain strlen(), not strnlen(). 1334 result = strLength.castAs<DefinedOrUnknownSVal>(); 1335 1336 // If we don't know the length of the string, conjure a return 1337 // value, so it can be used in constraints, at least. 1338 if (result.isUnknown()) { 1339 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1340 C.blockCount()); 1341 } 1342 } 1343 1344 // Bind the return value. 1345 assert(!result.isUnknown() && "Should have conjured a value by now"); 1346 state = state->BindExpr(CE, LCtx, result); 1347 C.addTransition(state); 1348 } 1349 1350 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { 1351 if (CE->getNumArgs() < 2) 1352 return; 1353 1354 // char *strcpy(char *restrict dst, const char *restrict src); 1355 evalStrcpyCommon(C, CE, 1356 /* returnEnd = */ false, 1357 /* isBounded = */ false, 1358 /* isAppending = */ false); 1359 } 1360 1361 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { 1362 if (CE->getNumArgs() < 3) 1363 return; 1364 1365 // char *strncpy(char *restrict dst, const char *restrict src, size_t n); 1366 evalStrcpyCommon(C, CE, 1367 /* returnEnd = */ false, 1368 /* isBounded = */ true, 1369 /* isAppending = */ false); 1370 } 1371 1372 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { 1373 if (CE->getNumArgs() < 2) 1374 return; 1375 1376 // char *stpcpy(char *restrict dst, const char *restrict src); 1377 evalStrcpyCommon(C, CE, 1378 /* returnEnd = */ true, 1379 /* isBounded = */ false, 1380 /* isAppending = */ false); 1381 } 1382 1383 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const { 1384 if (CE->getNumArgs() < 2) 1385 return; 1386 1387 //char *strcat(char *restrict s1, const char *restrict s2); 1388 evalStrcpyCommon(C, CE, 1389 /* returnEnd = */ false, 1390 /* isBounded = */ false, 1391 /* isAppending = */ true); 1392 } 1393 1394 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { 1395 if (CE->getNumArgs() < 3) 1396 return; 1397 1398 //char *strncat(char *restrict s1, const char *restrict s2, size_t n); 1399 evalStrcpyCommon(C, CE, 1400 /* returnEnd = */ false, 1401 /* isBounded = */ true, 1402 /* isAppending = */ true); 1403 } 1404 1405 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 1406 bool returnEnd, bool isBounded, 1407 bool isAppending) const { 1408 CurrentFunctionDescription = "string copy function"; 1409 ProgramStateRef state = C.getState(); 1410 const LocationContext *LCtx = C.getLocationContext(); 1411 1412 // Check that the destination is non-null. 1413 const Expr *Dst = CE->getArg(0); 1414 SVal DstVal = state->getSVal(Dst, LCtx); 1415 1416 state = checkNonNull(C, state, Dst, DstVal); 1417 if (!state) 1418 return; 1419 1420 // Check that the source is non-null. 1421 const Expr *srcExpr = CE->getArg(1); 1422 SVal srcVal = state->getSVal(srcExpr, LCtx); 1423 state = checkNonNull(C, state, srcExpr, srcVal); 1424 if (!state) 1425 return; 1426 1427 // Get the string length of the source. 1428 SVal strLength = getCStringLength(C, state, srcExpr, srcVal); 1429 1430 // If the source isn't a valid C string, give up. 1431 if (strLength.isUndef()) 1432 return; 1433 1434 SValBuilder &svalBuilder = C.getSValBuilder(); 1435 QualType cmpTy = svalBuilder.getConditionType(); 1436 QualType sizeTy = svalBuilder.getContext().getSizeType(); 1437 1438 // These two values allow checking two kinds of errors: 1439 // - actual overflows caused by a source that doesn't fit in the destination 1440 // - potential overflows caused by a bound that could exceed the destination 1441 SVal amountCopied = UnknownVal(); 1442 SVal maxLastElementIndex = UnknownVal(); 1443 const char *boundWarning = nullptr; 1444 1445 // If the function is strncpy, strncat, etc... it is bounded. 1446 if (isBounded) { 1447 // Get the max number of characters to copy. 1448 const Expr *lenExpr = CE->getArg(2); 1449 SVal lenVal = state->getSVal(lenExpr, LCtx); 1450 1451 // Protect against misdeclared strncpy(). 1452 lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType()); 1453 1454 Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); 1455 Optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>(); 1456 1457 // If we know both values, we might be able to figure out how much 1458 // we're copying. 1459 if (strLengthNL && lenValNL) { 1460 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong; 1461 1462 // Check if the max number to copy is less than the length of the src. 1463 // If the bound is equal to the source length, strncpy won't null- 1464 // terminate the result! 1465 std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume( 1466 svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy) 1467 .castAs<DefinedOrUnknownSVal>()); 1468 1469 if (stateSourceTooLong && !stateSourceNotTooLong) { 1470 // Max number to copy is less than the length of the src, so the actual 1471 // strLength copied is the max number arg. 1472 state = stateSourceTooLong; 1473 amountCopied = lenVal; 1474 1475 } else if (!stateSourceTooLong && stateSourceNotTooLong) { 1476 // The source buffer entirely fits in the bound. 1477 state = stateSourceNotTooLong; 1478 amountCopied = strLength; 1479 } 1480 } 1481 1482 // We still want to know if the bound is known to be too large. 1483 if (lenValNL) { 1484 if (isAppending) { 1485 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst) 1486 1487 // Get the string length of the destination. If the destination is 1488 // memory that can't have a string length, we shouldn't be copying 1489 // into it anyway. 1490 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); 1491 if (dstStrLength.isUndef()) 1492 return; 1493 1494 if (Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>()) { 1495 maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add, 1496 *lenValNL, 1497 *dstStrLengthNL, 1498 sizeTy); 1499 boundWarning = "Size argument is greater than the free space in the " 1500 "destination buffer"; 1501 } 1502 1503 } else { 1504 // For strncpy, this is just checking that lenVal <= sizeof(dst) 1505 // (Yes, strncpy and strncat differ in how they treat termination. 1506 // strncat ALWAYS terminates, but strncpy doesn't.) 1507 1508 // We need a special case for when the copy size is zero, in which 1509 // case strncpy will do no work at all. Our bounds check uses n-1 1510 // as the last element accessed, so n == 0 is problematic. 1511 ProgramStateRef StateZeroSize, StateNonZeroSize; 1512 std::tie(StateZeroSize, StateNonZeroSize) = 1513 assumeZero(C, state, *lenValNL, sizeTy); 1514 1515 // If the size is known to be zero, we're done. 1516 if (StateZeroSize && !StateNonZeroSize) { 1517 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal); 1518 C.addTransition(StateZeroSize); 1519 return; 1520 } 1521 1522 // Otherwise, go ahead and figure out the last element we'll touch. 1523 // We don't record the non-zero assumption here because we can't 1524 // be sure. We won't warn on a possible zero. 1525 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); 1526 maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, 1527 one, sizeTy); 1528 boundWarning = "Size argument is greater than the length of the " 1529 "destination buffer"; 1530 } 1531 } 1532 1533 // If we couldn't pin down the copy length, at least bound it. 1534 // FIXME: We should actually run this code path for append as well, but 1535 // right now it creates problems with constraints (since we can end up 1536 // trying to pass constraints from symbol to symbol). 1537 if (amountCopied.isUnknown() && !isAppending) { 1538 // Try to get a "hypothetical" string length symbol, which we can later 1539 // set as a real value if that turns out to be the case. 1540 amountCopied = getCStringLength(C, state, lenExpr, srcVal, true); 1541 assert(!amountCopied.isUndef()); 1542 1543 if (Optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>()) { 1544 if (lenValNL) { 1545 // amountCopied <= lenVal 1546 SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE, 1547 *amountCopiedNL, 1548 *lenValNL, 1549 cmpTy); 1550 state = state->assume( 1551 copiedLessThanBound.castAs<DefinedOrUnknownSVal>(), true); 1552 if (!state) 1553 return; 1554 } 1555 1556 if (strLengthNL) { 1557 // amountCopied <= strlen(source) 1558 SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE, 1559 *amountCopiedNL, 1560 *strLengthNL, 1561 cmpTy); 1562 state = state->assume( 1563 copiedLessThanSrc.castAs<DefinedOrUnknownSVal>(), true); 1564 if (!state) 1565 return; 1566 } 1567 } 1568 } 1569 1570 } else { 1571 // The function isn't bounded. The amount copied should match the length 1572 // of the source buffer. 1573 amountCopied = strLength; 1574 } 1575 1576 assert(state); 1577 1578 // This represents the number of characters copied into the destination 1579 // buffer. (It may not actually be the strlen if the destination buffer 1580 // is not terminated.) 1581 SVal finalStrLength = UnknownVal(); 1582 1583 // If this is an appending function (strcat, strncat...) then set the 1584 // string length to strlen(src) + strlen(dst) since the buffer will 1585 // ultimately contain both. 1586 if (isAppending) { 1587 // Get the string length of the destination. If the destination is memory 1588 // that can't have a string length, we shouldn't be copying into it anyway. 1589 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); 1590 if (dstStrLength.isUndef()) 1591 return; 1592 1593 Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>(); 1594 Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>(); 1595 1596 // If we know both string lengths, we might know the final string length. 1597 if (srcStrLengthNL && dstStrLengthNL) { 1598 // Make sure the two lengths together don't overflow a size_t. 1599 state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL); 1600 if (!state) 1601 return; 1602 1603 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL, 1604 *dstStrLengthNL, sizeTy); 1605 } 1606 1607 // If we couldn't get a single value for the final string length, 1608 // we can at least bound it by the individual lengths. 1609 if (finalStrLength.isUnknown()) { 1610 // Try to get a "hypothetical" string length symbol, which we can later 1611 // set as a real value if that turns out to be the case. 1612 finalStrLength = getCStringLength(C, state, CE, DstVal, true); 1613 assert(!finalStrLength.isUndef()); 1614 1615 if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) { 1616 if (srcStrLengthNL) { 1617 // finalStrLength >= srcStrLength 1618 SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1619 *finalStrLengthNL, 1620 *srcStrLengthNL, 1621 cmpTy); 1622 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(), 1623 true); 1624 if (!state) 1625 return; 1626 } 1627 1628 if (dstStrLengthNL) { 1629 // finalStrLength >= dstStrLength 1630 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1631 *finalStrLengthNL, 1632 *dstStrLengthNL, 1633 cmpTy); 1634 state = 1635 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true); 1636 if (!state) 1637 return; 1638 } 1639 } 1640 } 1641 1642 } else { 1643 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and 1644 // the final string length will match the input string length. 1645 finalStrLength = amountCopied; 1646 } 1647 1648 // The final result of the function will either be a pointer past the last 1649 // copied element, or a pointer to the start of the destination buffer. 1650 SVal Result = (returnEnd ? UnknownVal() : DstVal); 1651 1652 assert(state); 1653 1654 // If the destination is a MemRegion, try to check for a buffer overflow and 1655 // record the new string length. 1656 if (Optional<loc::MemRegionVal> dstRegVal = 1657 DstVal.getAs<loc::MemRegionVal>()) { 1658 QualType ptrTy = Dst->getType(); 1659 1660 // If we have an exact value on a bounded copy, use that to check for 1661 // overflows, rather than our estimate about how much is actually copied. 1662 if (boundWarning) { 1663 if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) { 1664 SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 1665 *maxLastNL, ptrTy); 1666 state = CheckLocation(C, state, CE->getArg(2), maxLastElement, 1667 boundWarning); 1668 if (!state) 1669 return; 1670 } 1671 } 1672 1673 // Then, if the final length is known... 1674 if (Optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) { 1675 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 1676 *knownStrLength, ptrTy); 1677 1678 // ...and we haven't checked the bound, we'll check the actual copy. 1679 if (!boundWarning) { 1680 const char * const warningMsg = 1681 "String copy function overflows destination buffer"; 1682 state = CheckLocation(C, state, Dst, lastElement, warningMsg); 1683 if (!state) 1684 return; 1685 } 1686 1687 // If this is a stpcpy-style copy, the last element is the return value. 1688 if (returnEnd) 1689 Result = lastElement; 1690 } 1691 1692 // Invalidate the destination (regular invalidation without pointer-escaping 1693 // the address of the top-level region). This must happen before we set the 1694 // C string length because invalidation will clear the length. 1695 // FIXME: Even if we can't perfectly model the copy, we should see if we 1696 // can use LazyCompoundVals to copy the source values into the destination. 1697 // This would probably remove any existing bindings past the end of the 1698 // string, but that's still an improvement over blank invalidation. 1699 state = InvalidateBuffer(C, state, Dst, *dstRegVal, 1700 /*IsSourceBuffer*/false, nullptr); 1701 1702 // Invalidate the source (const-invalidation without const-pointer-escaping 1703 // the address of the top-level region). 1704 state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true, 1705 nullptr); 1706 1707 // Set the C string length of the destination, if we know it. 1708 if (isBounded && !isAppending) { 1709 // strncpy is annoying in that it doesn't guarantee to null-terminate 1710 // the result string. If the original string didn't fit entirely inside 1711 // the bound (including the null-terminator), we don't know how long the 1712 // result is. 1713 if (amountCopied != strLength) 1714 finalStrLength = UnknownVal(); 1715 } 1716 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); 1717 } 1718 1719 assert(state); 1720 1721 // If this is a stpcpy-style copy, but we were unable to check for a buffer 1722 // overflow, we still need a result. Conjure a return value. 1723 if (returnEnd && Result.isUnknown()) { 1724 Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 1725 } 1726 1727 // Set the return value. 1728 state = state->BindExpr(CE, LCtx, Result); 1729 C.addTransition(state); 1730 } 1731 1732 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { 1733 if (CE->getNumArgs() < 2) 1734 return; 1735 1736 //int strcmp(const char *s1, const char *s2); 1737 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false); 1738 } 1739 1740 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { 1741 if (CE->getNumArgs() < 3) 1742 return; 1743 1744 //int strncmp(const char *s1, const char *s2, size_t n); 1745 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false); 1746 } 1747 1748 void CStringChecker::evalStrcasecmp(CheckerContext &C, 1749 const CallExpr *CE) const { 1750 if (CE->getNumArgs() < 2) 1751 return; 1752 1753 //int strcasecmp(const char *s1, const char *s2); 1754 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true); 1755 } 1756 1757 void CStringChecker::evalStrncasecmp(CheckerContext &C, 1758 const CallExpr *CE) const { 1759 if (CE->getNumArgs() < 3) 1760 return; 1761 1762 //int strncasecmp(const char *s1, const char *s2, size_t n); 1763 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true); 1764 } 1765 1766 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, 1767 bool isBounded, bool ignoreCase) const { 1768 CurrentFunctionDescription = "string comparison function"; 1769 ProgramStateRef state = C.getState(); 1770 const LocationContext *LCtx = C.getLocationContext(); 1771 1772 // Check that the first string is non-null 1773 const Expr *s1 = CE->getArg(0); 1774 SVal s1Val = state->getSVal(s1, LCtx); 1775 state = checkNonNull(C, state, s1, s1Val); 1776 if (!state) 1777 return; 1778 1779 // Check that the second string is non-null. 1780 const Expr *s2 = CE->getArg(1); 1781 SVal s2Val = state->getSVal(s2, LCtx); 1782 state = checkNonNull(C, state, s2, s2Val); 1783 if (!state) 1784 return; 1785 1786 // Get the string length of the first string or give up. 1787 SVal s1Length = getCStringLength(C, state, s1, s1Val); 1788 if (s1Length.isUndef()) 1789 return; 1790 1791 // Get the string length of the second string or give up. 1792 SVal s2Length = getCStringLength(C, state, s2, s2Val); 1793 if (s2Length.isUndef()) 1794 return; 1795 1796 // If we know the two buffers are the same, we know the result is 0. 1797 // First, get the two buffers' addresses. Another checker will have already 1798 // made sure they're not undefined. 1799 DefinedOrUnknownSVal LV = s1Val.castAs<DefinedOrUnknownSVal>(); 1800 DefinedOrUnknownSVal RV = s2Val.castAs<DefinedOrUnknownSVal>(); 1801 1802 // See if they are the same. 1803 SValBuilder &svalBuilder = C.getSValBuilder(); 1804 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 1805 ProgramStateRef StSameBuf, StNotSameBuf; 1806 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 1807 1808 // If the two arguments might be the same buffer, we know the result is 0, 1809 // and we only need to check one size. 1810 if (StSameBuf) { 1811 StSameBuf = StSameBuf->BindExpr(CE, LCtx, 1812 svalBuilder.makeZeroVal(CE->getType())); 1813 C.addTransition(StSameBuf); 1814 1815 // If the two arguments are GUARANTEED to be the same, we're done! 1816 if (!StNotSameBuf) 1817 return; 1818 } 1819 1820 assert(StNotSameBuf); 1821 state = StNotSameBuf; 1822 1823 // At this point we can go about comparing the two buffers. 1824 // For now, we only do this if they're both known string literals. 1825 1826 // Attempt to extract string literals from both expressions. 1827 const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val); 1828 const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val); 1829 bool canComputeResult = false; 1830 1831 if (s1StrLiteral && s2StrLiteral) { 1832 StringRef s1StrRef = s1StrLiteral->getString(); 1833 StringRef s2StrRef = s2StrLiteral->getString(); 1834 1835 if (isBounded) { 1836 // Get the max number of characters to compare. 1837 const Expr *lenExpr = CE->getArg(2); 1838 SVal lenVal = state->getSVal(lenExpr, LCtx); 1839 1840 // If the length is known, we can get the right substrings. 1841 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) { 1842 // Create substrings of each to compare the prefix. 1843 s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue()); 1844 s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue()); 1845 canComputeResult = true; 1846 } 1847 } else { 1848 // This is a normal, unbounded strcmp. 1849 canComputeResult = true; 1850 } 1851 1852 if (canComputeResult) { 1853 // Real strcmp stops at null characters. 1854 size_t s1Term = s1StrRef.find('\0'); 1855 if (s1Term != StringRef::npos) 1856 s1StrRef = s1StrRef.substr(0, s1Term); 1857 1858 size_t s2Term = s2StrRef.find('\0'); 1859 if (s2Term != StringRef::npos) 1860 s2StrRef = s2StrRef.substr(0, s2Term); 1861 1862 // Use StringRef's comparison methods to compute the actual result. 1863 int result; 1864 1865 if (ignoreCase) { 1866 // Compare string 1 to string 2 the same way strcasecmp() does. 1867 result = s1StrRef.compare_lower(s2StrRef); 1868 } else { 1869 // Compare string 1 to string 2 the same way strcmp() does. 1870 result = s1StrRef.compare(s2StrRef); 1871 } 1872 1873 // Build the SVal of the comparison and bind the return value. 1874 SVal resultVal = svalBuilder.makeIntVal(result, CE->getType()); 1875 state = state->BindExpr(CE, LCtx, resultVal); 1876 } 1877 } 1878 1879 if (!canComputeResult) { 1880 // Conjure a symbolic value. It's the best we can do. 1881 SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, 1882 C.blockCount()); 1883 state = state->BindExpr(CE, LCtx, resultVal); 1884 } 1885 1886 // Record this as a possible path. 1887 C.addTransition(state); 1888 } 1889 1890 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const { 1891 //char *strsep(char **stringp, const char *delim); 1892 if (CE->getNumArgs() < 2) 1893 return; 1894 1895 // Sanity: does the search string parameter match the return type? 1896 const Expr *SearchStrPtr = CE->getArg(0); 1897 QualType CharPtrTy = SearchStrPtr->getType()->getPointeeType(); 1898 if (CharPtrTy.isNull() || 1899 CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType()) 1900 return; 1901 1902 CurrentFunctionDescription = "strsep()"; 1903 ProgramStateRef State = C.getState(); 1904 const LocationContext *LCtx = C.getLocationContext(); 1905 1906 // Check that the search string pointer is non-null (though it may point to 1907 // a null string). 1908 SVal SearchStrVal = State->getSVal(SearchStrPtr, LCtx); 1909 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal); 1910 if (!State) 1911 return; 1912 1913 // Check that the delimiter string is non-null. 1914 const Expr *DelimStr = CE->getArg(1); 1915 SVal DelimStrVal = State->getSVal(DelimStr, LCtx); 1916 State = checkNonNull(C, State, DelimStr, DelimStrVal); 1917 if (!State) 1918 return; 1919 1920 SValBuilder &SVB = C.getSValBuilder(); 1921 SVal Result; 1922 if (Optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) { 1923 // Get the current value of the search string pointer, as a char*. 1924 Result = State->getSVal(*SearchStrLoc, CharPtrTy); 1925 1926 // Invalidate the search string, representing the change of one delimiter 1927 // character to NUL. 1928 State = InvalidateBuffer(C, State, SearchStrPtr, Result, 1929 /*IsSourceBuffer*/false, nullptr); 1930 1931 // Overwrite the search string pointer. The new value is either an address 1932 // further along in the same string, or NULL if there are no more tokens. 1933 State = State->bindLoc(*SearchStrLoc, 1934 SVB.conjureSymbolVal(getTag(), CE, LCtx, CharPtrTy, 1935 C.blockCount())); 1936 } else { 1937 assert(SearchStrVal.isUnknown()); 1938 // Conjure a symbolic value. It's the best we can do. 1939 Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 1940 } 1941 1942 // Set the return value, and finish. 1943 State = State->BindExpr(CE, LCtx, Result); 1944 C.addTransition(State); 1945 } 1946 1947 1948 //===----------------------------------------------------------------------===// 1949 // The driver method, and other Checker callbacks. 1950 //===----------------------------------------------------------------------===// 1951 1952 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { 1953 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 1954 1955 if (!FDecl) 1956 return false; 1957 1958 // FIXME: Poorly-factored string switches are slow. 1959 FnCheck evalFunction = nullptr; 1960 if (C.isCLibraryFunction(FDecl, "memcpy")) 1961 evalFunction = &CStringChecker::evalMemcpy; 1962 else if (C.isCLibraryFunction(FDecl, "mempcpy")) 1963 evalFunction = &CStringChecker::evalMempcpy; 1964 else if (C.isCLibraryFunction(FDecl, "memcmp")) 1965 evalFunction = &CStringChecker::evalMemcmp; 1966 else if (C.isCLibraryFunction(FDecl, "memmove")) 1967 evalFunction = &CStringChecker::evalMemmove; 1968 else if (C.isCLibraryFunction(FDecl, "strcpy")) 1969 evalFunction = &CStringChecker::evalStrcpy; 1970 else if (C.isCLibraryFunction(FDecl, "strncpy")) 1971 evalFunction = &CStringChecker::evalStrncpy; 1972 else if (C.isCLibraryFunction(FDecl, "stpcpy")) 1973 evalFunction = &CStringChecker::evalStpcpy; 1974 else if (C.isCLibraryFunction(FDecl, "strcat")) 1975 evalFunction = &CStringChecker::evalStrcat; 1976 else if (C.isCLibraryFunction(FDecl, "strncat")) 1977 evalFunction = &CStringChecker::evalStrncat; 1978 else if (C.isCLibraryFunction(FDecl, "strlen")) 1979 evalFunction = &CStringChecker::evalstrLength; 1980 else if (C.isCLibraryFunction(FDecl, "strnlen")) 1981 evalFunction = &CStringChecker::evalstrnLength; 1982 else if (C.isCLibraryFunction(FDecl, "strcmp")) 1983 evalFunction = &CStringChecker::evalStrcmp; 1984 else if (C.isCLibraryFunction(FDecl, "strncmp")) 1985 evalFunction = &CStringChecker::evalStrncmp; 1986 else if (C.isCLibraryFunction(FDecl, "strcasecmp")) 1987 evalFunction = &CStringChecker::evalStrcasecmp; 1988 else if (C.isCLibraryFunction(FDecl, "strncasecmp")) 1989 evalFunction = &CStringChecker::evalStrncasecmp; 1990 else if (C.isCLibraryFunction(FDecl, "strsep")) 1991 evalFunction = &CStringChecker::evalStrsep; 1992 else if (C.isCLibraryFunction(FDecl, "bcopy")) 1993 evalFunction = &CStringChecker::evalBcopy; 1994 else if (C.isCLibraryFunction(FDecl, "bcmp")) 1995 evalFunction = &CStringChecker::evalMemcmp; 1996 1997 // If the callee isn't a string function, let another checker handle it. 1998 if (!evalFunction) 1999 return false; 2000 2001 // Check and evaluate the call. 2002 (this->*evalFunction)(C, CE); 2003 2004 // If the evaluate call resulted in no change, chain to the next eval call 2005 // handler. 2006 // Note, the custom CString evaluation calls assume that basic safety 2007 // properties are held. However, if the user chooses to turn off some of these 2008 // checks, we ignore the issues and leave the call evaluation to a generic 2009 // handler. 2010 if (!C.isDifferent()) 2011 return false; 2012 2013 return true; 2014 } 2015 2016 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { 2017 // Record string length for char a[] = "abc"; 2018 ProgramStateRef state = C.getState(); 2019 2020 for (const auto *I : DS->decls()) { 2021 const VarDecl *D = dyn_cast<VarDecl>(I); 2022 if (!D) 2023 continue; 2024 2025 // FIXME: Handle array fields of structs. 2026 if (!D->getType()->isArrayType()) 2027 continue; 2028 2029 const Expr *Init = D->getInit(); 2030 if (!Init) 2031 continue; 2032 if (!isa<StringLiteral>(Init)) 2033 continue; 2034 2035 Loc VarLoc = state->getLValue(D, C.getLocationContext()); 2036 const MemRegion *MR = VarLoc.getAsRegion(); 2037 if (!MR) 2038 continue; 2039 2040 SVal StrVal = state->getSVal(Init, C.getLocationContext()); 2041 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 2042 DefinedOrUnknownSVal strLength = 2043 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>(); 2044 2045 state = state->set<CStringLength>(MR, strLength); 2046 } 2047 2048 C.addTransition(state); 2049 } 2050 2051 bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const { 2052 CStringLengthTy Entries = state->get<CStringLength>(); 2053 return !Entries.isEmpty(); 2054 } 2055 2056 ProgramStateRef 2057 CStringChecker::checkRegionChanges(ProgramStateRef state, 2058 const InvalidatedSymbols *, 2059 ArrayRef<const MemRegion *> ExplicitRegions, 2060 ArrayRef<const MemRegion *> Regions, 2061 const CallEvent *Call) const { 2062 CStringLengthTy Entries = state->get<CStringLength>(); 2063 if (Entries.isEmpty()) 2064 return state; 2065 2066 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 2067 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 2068 2069 // First build sets for the changed regions and their super-regions. 2070 for (ArrayRef<const MemRegion *>::iterator 2071 I = Regions.begin(), E = Regions.end(); I != E; ++I) { 2072 const MemRegion *MR = *I; 2073 Invalidated.insert(MR); 2074 2075 SuperRegions.insert(MR); 2076 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 2077 MR = SR->getSuperRegion(); 2078 SuperRegions.insert(MR); 2079 } 2080 } 2081 2082 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 2083 2084 // Then loop over the entries in the current state. 2085 for (CStringLengthTy::iterator I = Entries.begin(), 2086 E = Entries.end(); I != E; ++I) { 2087 const MemRegion *MR = I.getKey(); 2088 2089 // Is this entry for a super-region of a changed region? 2090 if (SuperRegions.count(MR)) { 2091 Entries = F.remove(Entries, MR); 2092 continue; 2093 } 2094 2095 // Is this entry for a sub-region of a changed region? 2096 const MemRegion *Super = MR; 2097 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 2098 Super = SR->getSuperRegion(); 2099 if (Invalidated.count(Super)) { 2100 Entries = F.remove(Entries, MR); 2101 break; 2102 } 2103 } 2104 } 2105 2106 return state->set<CStringLength>(Entries); 2107 } 2108 2109 void CStringChecker::checkLiveSymbols(ProgramStateRef state, 2110 SymbolReaper &SR) const { 2111 // Mark all symbols in our string length map as valid. 2112 CStringLengthTy Entries = state->get<CStringLength>(); 2113 2114 for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); 2115 I != E; ++I) { 2116 SVal Len = I.getData(); 2117 2118 for (SymExpr::symbol_iterator si = Len.symbol_begin(), 2119 se = Len.symbol_end(); si != se; ++si) 2120 SR.markInUse(*si); 2121 } 2122 } 2123 2124 void CStringChecker::checkDeadSymbols(SymbolReaper &SR, 2125 CheckerContext &C) const { 2126 if (!SR.hasDeadSymbols()) 2127 return; 2128 2129 ProgramStateRef state = C.getState(); 2130 CStringLengthTy Entries = state->get<CStringLength>(); 2131 if (Entries.isEmpty()) 2132 return; 2133 2134 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 2135 for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); 2136 I != E; ++I) { 2137 SVal Len = I.getData(); 2138 if (SymbolRef Sym = Len.getAsSymbol()) { 2139 if (SR.isDead(Sym)) 2140 Entries = F.remove(Entries, I.getKey()); 2141 } 2142 } 2143 2144 state = state->set<CStringLength>(Entries); 2145 C.addTransition(state); 2146 } 2147 2148 #define REGISTER_CHECKER(name) \ 2149 void ento::register##name(CheckerManager &mgr) { \ 2150 CStringChecker *checker = mgr.registerChecker<CStringChecker>(); \ 2151 checker->Filter.Check##name = true; \ 2152 checker->Filter.CheckName##name = mgr.getCurrentCheckName(); \ 2153 } 2154 2155 REGISTER_CHECKER(CStringNullArg) 2156 REGISTER_CHECKER(CStringOutOfBounds) 2157 REGISTER_CHECKER(CStringBufferOverlap) 2158 REGISTER_CHECKER(CStringNotNullTerm) 2159 2160 void ento::registerCStringCheckerBasic(CheckerManager &Mgr) { 2161 registerCStringNullArg(Mgr); 2162 } 2163