1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This defines CStringChecker, which is an assortment of checks on calls 11 // to functions in <string.h>. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ClangSACheckers.h" 16 #include "InterCheckerAPI.h" 17 #include "clang/Basic/CharInfo.h" 18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 19 #include "clang/StaticAnalyzer/Core/Checker.h" 20 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringSwitch.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 using namespace clang; 29 using namespace ento; 30 31 namespace { 32 class CStringChecker : public Checker< eval::Call, 33 check::PreStmt<DeclStmt>, 34 check::LiveSymbols, 35 check::DeadSymbols, 36 check::RegionChanges 37 > { 38 mutable OwningPtr<BugType> BT_Null, 39 BT_Bounds, 40 BT_Overlap, 41 BT_NotCString, 42 BT_AdditionOverflow; 43 44 mutable const char *CurrentFunctionDescription; 45 46 public: 47 /// The filter is used to filter out the diagnostics which are not enabled by 48 /// the user. 49 struct CStringChecksFilter { 50 DefaultBool CheckCStringNullArg; 51 DefaultBool CheckCStringOutOfBounds; 52 DefaultBool CheckCStringBufferOverlap; 53 DefaultBool CheckCStringNotNullTerm; 54 55 CheckName CheckNameCStringNullArg; 56 CheckName CheckNameCStringOutOfBounds; 57 CheckName CheckNameCStringBufferOverlap; 58 CheckName CheckNameCStringNotNullTerm; 59 }; 60 61 CStringChecksFilter Filter; 62 63 static void *getTag() { static int tag; return &tag; } 64 65 bool evalCall(const CallExpr *CE, CheckerContext &C) const; 66 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; 67 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const; 68 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; 69 bool wantsRegionChangeUpdate(ProgramStateRef state) const; 70 71 ProgramStateRef 72 checkRegionChanges(ProgramStateRef state, 73 const InvalidatedSymbols *, 74 ArrayRef<const MemRegion *> ExplicitRegions, 75 ArrayRef<const MemRegion *> Regions, 76 const CallEvent *Call) const; 77 78 typedef void (CStringChecker::*FnCheck)(CheckerContext &, 79 const CallExpr *) const; 80 81 void evalMemcpy(CheckerContext &C, const CallExpr *CE) const; 82 void evalMempcpy(CheckerContext &C, const CallExpr *CE) const; 83 void evalMemmove(CheckerContext &C, const CallExpr *CE) const; 84 void evalBcopy(CheckerContext &C, const CallExpr *CE) const; 85 void evalCopyCommon(CheckerContext &C, const CallExpr *CE, 86 ProgramStateRef state, 87 const Expr *Size, 88 const Expr *Source, 89 const Expr *Dest, 90 bool Restricted = false, 91 bool IsMempcpy = false) const; 92 93 void evalMemcmp(CheckerContext &C, const CallExpr *CE) const; 94 95 void evalstrLength(CheckerContext &C, const CallExpr *CE) const; 96 void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; 97 void evalstrLengthCommon(CheckerContext &C, 98 const CallExpr *CE, 99 bool IsStrnlen = false) const; 100 101 void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; 102 void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; 103 void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; 104 void evalStrcpyCommon(CheckerContext &C, 105 const CallExpr *CE, 106 bool returnEnd, 107 bool isBounded, 108 bool isAppending) const; 109 110 void evalStrcat(CheckerContext &C, const CallExpr *CE) const; 111 void evalStrncat(CheckerContext &C, const CallExpr *CE) const; 112 113 void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; 114 void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; 115 void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; 116 void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; 117 void evalStrcmpCommon(CheckerContext &C, 118 const CallExpr *CE, 119 bool isBounded = false, 120 bool ignoreCase = false) const; 121 122 void evalStrsep(CheckerContext &C, const CallExpr *CE) const; 123 124 // Utility methods 125 std::pair<ProgramStateRef , ProgramStateRef > 126 static assumeZero(CheckerContext &C, 127 ProgramStateRef state, SVal V, QualType Ty); 128 129 static ProgramStateRef setCStringLength(ProgramStateRef state, 130 const MemRegion *MR, 131 SVal strLength); 132 static SVal getCStringLengthForRegion(CheckerContext &C, 133 ProgramStateRef &state, 134 const Expr *Ex, 135 const MemRegion *MR, 136 bool hypothetical); 137 SVal getCStringLength(CheckerContext &C, 138 ProgramStateRef &state, 139 const Expr *Ex, 140 SVal Buf, 141 bool hypothetical = false) const; 142 143 const StringLiteral *getCStringLiteral(CheckerContext &C, 144 ProgramStateRef &state, 145 const Expr *expr, 146 SVal val) const; 147 148 static ProgramStateRef InvalidateBuffer(CheckerContext &C, 149 ProgramStateRef state, 150 const Expr *Ex, SVal V, 151 bool IsSourceBuffer); 152 153 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 154 const MemRegion *MR); 155 156 // Re-usable checks 157 ProgramStateRef checkNonNull(CheckerContext &C, 158 ProgramStateRef state, 159 const Expr *S, 160 SVal l) const; 161 ProgramStateRef CheckLocation(CheckerContext &C, 162 ProgramStateRef state, 163 const Expr *S, 164 SVal l, 165 const char *message = NULL) const; 166 ProgramStateRef CheckBufferAccess(CheckerContext &C, 167 ProgramStateRef state, 168 const Expr *Size, 169 const Expr *FirstBuf, 170 const Expr *SecondBuf, 171 const char *firstMessage = NULL, 172 const char *secondMessage = NULL, 173 bool WarnAboutSize = false) const; 174 175 ProgramStateRef CheckBufferAccess(CheckerContext &C, 176 ProgramStateRef state, 177 const Expr *Size, 178 const Expr *Buf, 179 const char *message = NULL, 180 bool WarnAboutSize = false) const { 181 // This is a convenience override. 182 return CheckBufferAccess(C, state, Size, Buf, NULL, message, NULL, 183 WarnAboutSize); 184 } 185 ProgramStateRef CheckOverlap(CheckerContext &C, 186 ProgramStateRef state, 187 const Expr *Size, 188 const Expr *First, 189 const Expr *Second) const; 190 void emitOverlapBug(CheckerContext &C, 191 ProgramStateRef state, 192 const Stmt *First, 193 const Stmt *Second) const; 194 195 ProgramStateRef checkAdditionOverflow(CheckerContext &C, 196 ProgramStateRef state, 197 NonLoc left, 198 NonLoc right) const; 199 }; 200 201 } //end anonymous namespace 202 203 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal) 204 205 //===----------------------------------------------------------------------===// 206 // Individual checks and utility methods. 207 //===----------------------------------------------------------------------===// 208 209 std::pair<ProgramStateRef , ProgramStateRef > 210 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V, 211 QualType Ty) { 212 Optional<DefinedSVal> val = V.getAs<DefinedSVal>(); 213 if (!val) 214 return std::pair<ProgramStateRef , ProgramStateRef >(state, state); 215 216 SValBuilder &svalBuilder = C.getSValBuilder(); 217 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 218 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 219 } 220 221 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C, 222 ProgramStateRef state, 223 const Expr *S, SVal l) const { 224 // If a previous check has failed, propagate the failure. 225 if (!state) 226 return NULL; 227 228 ProgramStateRef stateNull, stateNonNull; 229 llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType()); 230 231 if (stateNull && !stateNonNull) { 232 if (!Filter.CheckCStringNullArg) 233 return NULL; 234 235 ExplodedNode *N = C.generateSink(stateNull); 236 if (!N) 237 return NULL; 238 239 if (!BT_Null) 240 BT_Null.reset(new BuiltinBug( 241 Filter.CheckNameCStringNullArg, categories::UnixAPI, 242 "Null pointer argument in call to byte string function")); 243 244 SmallString<80> buf; 245 llvm::raw_svector_ostream os(buf); 246 assert(CurrentFunctionDescription); 247 os << "Null pointer argument in call to " << CurrentFunctionDescription; 248 249 // Generate a report for this bug. 250 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get()); 251 BugReport *report = new BugReport(*BT, os.str(), N); 252 253 report->addRange(S->getSourceRange()); 254 bugreporter::trackNullOrUndefValue(N, S, *report); 255 C.emitReport(report); 256 return NULL; 257 } 258 259 // From here on, assume that the value is non-null. 260 assert(stateNonNull); 261 return stateNonNull; 262 } 263 264 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 265 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, 266 ProgramStateRef state, 267 const Expr *S, SVal l, 268 const char *warningMsg) const { 269 // If a previous check has failed, propagate the failure. 270 if (!state) 271 return NULL; 272 273 // Check for out of bound array element access. 274 const MemRegion *R = l.getAsRegion(); 275 if (!R) 276 return state; 277 278 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 279 if (!ER) 280 return state; 281 282 assert(ER->getValueType() == C.getASTContext().CharTy && 283 "CheckLocation should only be called with char* ElementRegions"); 284 285 // Get the size of the array. 286 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 287 SValBuilder &svalBuilder = C.getSValBuilder(); 288 SVal Extent = 289 svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); 290 DefinedOrUnknownSVal Size = Extent.castAs<DefinedOrUnknownSVal>(); 291 292 // Get the index of the accessed element. 293 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); 294 295 ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true); 296 ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false); 297 if (StOutBound && !StInBound) { 298 ExplodedNode *N = C.generateSink(StOutBound); 299 if (!N) 300 return NULL; 301 302 if (!BT_Bounds) { 303 BT_Bounds.reset(new BuiltinBug( 304 Filter.CheckNameCStringOutOfBounds, "Out-of-bound array access", 305 "Byte string function accesses out-of-bound array element")); 306 } 307 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get()); 308 309 // Generate a report for this bug. 310 BugReport *report; 311 if (warningMsg) { 312 report = new BugReport(*BT, warningMsg, N); 313 } else { 314 assert(CurrentFunctionDescription); 315 assert(CurrentFunctionDescription[0] != '\0'); 316 317 SmallString<80> buf; 318 llvm::raw_svector_ostream os(buf); 319 os << toUppercase(CurrentFunctionDescription[0]) 320 << &CurrentFunctionDescription[1] 321 << " accesses out-of-bound array element"; 322 report = new BugReport(*BT, os.str(), N); 323 } 324 325 // FIXME: It would be nice to eventually make this diagnostic more clear, 326 // e.g., by referencing the original declaration or by saying *why* this 327 // reference is outside the range. 328 329 report->addRange(S->getSourceRange()); 330 C.emitReport(report); 331 return NULL; 332 } 333 334 // Array bound check succeeded. From this point forward the array bound 335 // should always succeed. 336 return StInBound; 337 } 338 339 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C, 340 ProgramStateRef state, 341 const Expr *Size, 342 const Expr *FirstBuf, 343 const Expr *SecondBuf, 344 const char *firstMessage, 345 const char *secondMessage, 346 bool WarnAboutSize) const { 347 // If a previous check has failed, propagate the failure. 348 if (!state) 349 return NULL; 350 351 SValBuilder &svalBuilder = C.getSValBuilder(); 352 ASTContext &Ctx = svalBuilder.getContext(); 353 const LocationContext *LCtx = C.getLocationContext(); 354 355 QualType sizeTy = Size->getType(); 356 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 357 358 // Check that the first buffer is non-null. 359 SVal BufVal = state->getSVal(FirstBuf, LCtx); 360 state = checkNonNull(C, state, FirstBuf, BufVal); 361 if (!state) 362 return NULL; 363 364 // If out-of-bounds checking is turned off, skip the rest. 365 if (!Filter.CheckCStringOutOfBounds) 366 return state; 367 368 // Get the access length and make sure it is known. 369 // FIXME: This assumes the caller has already checked that the access length 370 // is positive. And that it's unsigned. 371 SVal LengthVal = state->getSVal(Size, LCtx); 372 Optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 373 if (!Length) 374 return state; 375 376 // Compute the offset of the last element to be accessed: size-1. 377 NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); 378 NonLoc LastOffset = svalBuilder 379 .evalBinOpNN(state, BO_Sub, *Length, One, sizeTy).castAs<NonLoc>(); 380 381 // Check that the first buffer is sufficiently long. 382 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 383 if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) { 384 const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf); 385 386 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 387 LastOffset, PtrTy); 388 state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage); 389 390 // If the buffer isn't large enough, abort. 391 if (!state) 392 return NULL; 393 } 394 395 // If there's a second buffer, check it as well. 396 if (SecondBuf) { 397 BufVal = state->getSVal(SecondBuf, LCtx); 398 state = checkNonNull(C, state, SecondBuf, BufVal); 399 if (!state) 400 return NULL; 401 402 BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); 403 if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) { 404 const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf); 405 406 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 407 LastOffset, PtrTy); 408 state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage); 409 } 410 } 411 412 // Large enough or not, return this state! 413 return state; 414 } 415 416 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, 417 ProgramStateRef state, 418 const Expr *Size, 419 const Expr *First, 420 const Expr *Second) const { 421 if (!Filter.CheckCStringBufferOverlap) 422 return state; 423 424 // Do a simple check for overlap: if the two arguments are from the same 425 // buffer, see if the end of the first is greater than the start of the second 426 // or vice versa. 427 428 // If a previous check has failed, propagate the failure. 429 if (!state) 430 return NULL; 431 432 ProgramStateRef stateTrue, stateFalse; 433 434 // Get the buffer values and make sure they're known locations. 435 const LocationContext *LCtx = C.getLocationContext(); 436 SVal firstVal = state->getSVal(First, LCtx); 437 SVal secondVal = state->getSVal(Second, LCtx); 438 439 Optional<Loc> firstLoc = firstVal.getAs<Loc>(); 440 if (!firstLoc) 441 return state; 442 443 Optional<Loc> secondLoc = secondVal.getAs<Loc>(); 444 if (!secondLoc) 445 return state; 446 447 // Are the two values the same? 448 SValBuilder &svalBuilder = C.getSValBuilder(); 449 llvm::tie(stateTrue, stateFalse) = 450 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 451 452 if (stateTrue && !stateFalse) { 453 // If the values are known to be equal, that's automatically an overlap. 454 emitOverlapBug(C, stateTrue, First, Second); 455 return NULL; 456 } 457 458 // assume the two expressions are not equal. 459 assert(stateFalse); 460 state = stateFalse; 461 462 // Which value comes first? 463 QualType cmpTy = svalBuilder.getConditionType(); 464 SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, 465 *firstLoc, *secondLoc, cmpTy); 466 Optional<DefinedOrUnknownSVal> reverseTest = 467 reverse.getAs<DefinedOrUnknownSVal>(); 468 if (!reverseTest) 469 return state; 470 471 llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 472 if (stateTrue) { 473 if (stateFalse) { 474 // If we don't know which one comes first, we can't perform this test. 475 return state; 476 } else { 477 // Switch the values so that firstVal is before secondVal. 478 std::swap(firstLoc, secondLoc); 479 480 // Switch the Exprs as well, so that they still correspond. 481 std::swap(First, Second); 482 } 483 } 484 485 // Get the length, and make sure it too is known. 486 SVal LengthVal = state->getSVal(Size, LCtx); 487 Optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 488 if (!Length) 489 return state; 490 491 // Convert the first buffer's start address to char*. 492 // Bail out if the cast fails. 493 ASTContext &Ctx = svalBuilder.getContext(); 494 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 495 SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, 496 First->getType()); 497 Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>(); 498 if (!FirstStartLoc) 499 return state; 500 501 // Compute the end of the first buffer. Bail out if THAT fails. 502 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, 503 *FirstStartLoc, *Length, CharPtrTy); 504 Optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>(); 505 if (!FirstEndLoc) 506 return state; 507 508 // Is the end of the first buffer past the start of the second buffer? 509 SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT, 510 *FirstEndLoc, *secondLoc, cmpTy); 511 Optional<DefinedOrUnknownSVal> OverlapTest = 512 Overlap.getAs<DefinedOrUnknownSVal>(); 513 if (!OverlapTest) 514 return state; 515 516 llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 517 518 if (stateTrue && !stateFalse) { 519 // Overlap! 520 emitOverlapBug(C, stateTrue, First, Second); 521 return NULL; 522 } 523 524 // assume the two expressions don't overlap. 525 assert(stateFalse); 526 return stateFalse; 527 } 528 529 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state, 530 const Stmt *First, const Stmt *Second) const { 531 ExplodedNode *N = C.generateSink(state); 532 if (!N) 533 return; 534 535 if (!BT_Overlap) 536 BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap, 537 categories::UnixAPI, "Improper arguments")); 538 539 // Generate a report for this bug. 540 BugReport *report = 541 new BugReport(*BT_Overlap, 542 "Arguments must not be overlapping buffers", N); 543 report->addRange(First->getSourceRange()); 544 report->addRange(Second->getSourceRange()); 545 546 C.emitReport(report); 547 } 548 549 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C, 550 ProgramStateRef state, 551 NonLoc left, 552 NonLoc right) const { 553 // If out-of-bounds checking is turned off, skip the rest. 554 if (!Filter.CheckCStringOutOfBounds) 555 return state; 556 557 // If a previous check has failed, propagate the failure. 558 if (!state) 559 return NULL; 560 561 SValBuilder &svalBuilder = C.getSValBuilder(); 562 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 563 564 QualType sizeTy = svalBuilder.getContext().getSizeType(); 565 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 566 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt); 567 568 SVal maxMinusRight; 569 if (right.getAs<nonloc::ConcreteInt>()) { 570 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right, 571 sizeTy); 572 } else { 573 // Try switching the operands. (The order of these two assignments is 574 // important!) 575 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, 576 sizeTy); 577 left = right; 578 } 579 580 if (Optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) { 581 QualType cmpTy = svalBuilder.getConditionType(); 582 // If left > max - right, we have an overflow. 583 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, 584 *maxMinusRightNL, cmpTy); 585 586 ProgramStateRef stateOverflow, stateOkay; 587 llvm::tie(stateOverflow, stateOkay) = 588 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>()); 589 590 if (stateOverflow && !stateOkay) { 591 // We have an overflow. Emit a bug report. 592 ExplodedNode *N = C.generateSink(stateOverflow); 593 if (!N) 594 return NULL; 595 596 if (!BT_AdditionOverflow) 597 BT_AdditionOverflow.reset( 598 new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API", 599 "Sum of expressions causes overflow")); 600 601 // This isn't a great error message, but this should never occur in real 602 // code anyway -- you'd have to create a buffer longer than a size_t can 603 // represent, which is sort of a contradiction. 604 const char *warning = 605 "This expression will create a string whose length is too big to " 606 "be represented as a size_t"; 607 608 // Generate a report for this bug. 609 BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N); 610 C.emitReport(report); 611 612 return NULL; 613 } 614 615 // From now on, assume an overflow didn't occur. 616 assert(stateOkay); 617 state = stateOkay; 618 } 619 620 return state; 621 } 622 623 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state, 624 const MemRegion *MR, 625 SVal strLength) { 626 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 627 628 MR = MR->StripCasts(); 629 630 switch (MR->getKind()) { 631 case MemRegion::StringRegionKind: 632 // FIXME: This can happen if we strcpy() into a string region. This is 633 // undefined [C99 6.4.5p6], but we should still warn about it. 634 return state; 635 636 case MemRegion::SymbolicRegionKind: 637 case MemRegion::AllocaRegionKind: 638 case MemRegion::VarRegionKind: 639 case MemRegion::FieldRegionKind: 640 case MemRegion::ObjCIvarRegionKind: 641 // These are the types we can currently track string lengths for. 642 break; 643 644 case MemRegion::ElementRegionKind: 645 // FIXME: Handle element regions by upper-bounding the parent region's 646 // string length. 647 return state; 648 649 default: 650 // Other regions (mostly non-data) can't have a reliable C string length. 651 // For now, just ignore the change. 652 // FIXME: These are rare but not impossible. We should output some kind of 653 // warning for things like strcpy((char[]){'a', 0}, "b"); 654 return state; 655 } 656 657 if (strLength.isUnknown()) 658 return state->remove<CStringLength>(MR); 659 660 return state->set<CStringLength>(MR, strLength); 661 } 662 663 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 664 ProgramStateRef &state, 665 const Expr *Ex, 666 const MemRegion *MR, 667 bool hypothetical) { 668 if (!hypothetical) { 669 // If there's a recorded length, go ahead and return it. 670 const SVal *Recorded = state->get<CStringLength>(MR); 671 if (Recorded) 672 return *Recorded; 673 } 674 675 // Otherwise, get a new symbol and update the state. 676 SValBuilder &svalBuilder = C.getSValBuilder(); 677 QualType sizeTy = svalBuilder.getContext().getSizeType(); 678 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), 679 MR, Ex, sizeTy, 680 C.blockCount()); 681 682 if (!hypothetical) { 683 if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) { 684 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4 685 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 686 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 687 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4); 688 const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt, 689 fourInt); 690 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt); 691 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, 692 maxLength, sizeTy); 693 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true); 694 } 695 state = state->set<CStringLength>(MR, strLength); 696 } 697 698 return strLength; 699 } 700 701 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, 702 const Expr *Ex, SVal Buf, 703 bool hypothetical) const { 704 const MemRegion *MR = Buf.getAsRegion(); 705 if (!MR) { 706 // If we can't get a region, see if it's something we /know/ isn't a 707 // C string. In the context of locations, the only time we can issue such 708 // a warning is for labels. 709 if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) { 710 if (!Filter.CheckCStringNotNullTerm) 711 return UndefinedVal(); 712 713 if (ExplodedNode *N = C.addTransition(state)) { 714 if (!BT_NotCString) 715 BT_NotCString.reset(new BuiltinBug( 716 Filter.CheckNameCStringNotNullTerm, categories::UnixAPI, 717 "Argument is not a null-terminated string.")); 718 719 SmallString<120> buf; 720 llvm::raw_svector_ostream os(buf); 721 assert(CurrentFunctionDescription); 722 os << "Argument to " << CurrentFunctionDescription 723 << " is the address of the label '" << Label->getLabel()->getName() 724 << "', which is not a null-terminated string"; 725 726 // Generate a report for this bug. 727 BugReport *report = new BugReport(*BT_NotCString, os.str(), N); 728 729 report->addRange(Ex->getSourceRange()); 730 C.emitReport(report); 731 } 732 return UndefinedVal(); 733 734 } 735 736 // If it's not a region and not a label, give up. 737 return UnknownVal(); 738 } 739 740 // If we have a region, strip casts from it and see if we can figure out 741 // its length. For anything we can't figure out, just return UnknownVal. 742 MR = MR->StripCasts(); 743 744 switch (MR->getKind()) { 745 case MemRegion::StringRegionKind: { 746 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 747 // so we can assume that the byte length is the correct C string length. 748 SValBuilder &svalBuilder = C.getSValBuilder(); 749 QualType sizeTy = svalBuilder.getContext().getSizeType(); 750 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 751 return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); 752 } 753 case MemRegion::SymbolicRegionKind: 754 case MemRegion::AllocaRegionKind: 755 case MemRegion::VarRegionKind: 756 case MemRegion::FieldRegionKind: 757 case MemRegion::ObjCIvarRegionKind: 758 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); 759 case MemRegion::CompoundLiteralRegionKind: 760 // FIXME: Can we track this? Is it necessary? 761 return UnknownVal(); 762 case MemRegion::ElementRegionKind: 763 // FIXME: How can we handle this? It's not good enough to subtract the 764 // offset from the base string length; consider "123\x00567" and &a[5]. 765 return UnknownVal(); 766 default: 767 // Other regions (mostly non-data) can't have a reliable C string length. 768 // In this case, an error is emitted and UndefinedVal is returned. 769 // The caller should always be prepared to handle this case. 770 if (!Filter.CheckCStringNotNullTerm) 771 return UndefinedVal(); 772 773 if (ExplodedNode *N = C.addTransition(state)) { 774 if (!BT_NotCString) 775 BT_NotCString.reset(new BuiltinBug( 776 Filter.CheckNameCStringNotNullTerm, categories::UnixAPI, 777 "Argument is not a null-terminated string.")); 778 779 SmallString<120> buf; 780 llvm::raw_svector_ostream os(buf); 781 782 assert(CurrentFunctionDescription); 783 os << "Argument to " << CurrentFunctionDescription << " is "; 784 785 if (SummarizeRegion(os, C.getASTContext(), MR)) 786 os << ", which is not a null-terminated string"; 787 else 788 os << "not a null-terminated string"; 789 790 // Generate a report for this bug. 791 BugReport *report = new BugReport(*BT_NotCString, 792 os.str(), N); 793 794 report->addRange(Ex->getSourceRange()); 795 C.emitReport(report); 796 } 797 798 return UndefinedVal(); 799 } 800 } 801 802 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, 803 ProgramStateRef &state, const Expr *expr, SVal val) const { 804 805 // Get the memory region pointed to by the val. 806 const MemRegion *bufRegion = val.getAsRegion(); 807 if (!bufRegion) 808 return NULL; 809 810 // Strip casts off the memory region. 811 bufRegion = bufRegion->StripCasts(); 812 813 // Cast the memory region to a string region. 814 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion); 815 if (!strRegion) 816 return NULL; 817 818 // Return the actual string in the string region. 819 return strRegion->getStringLiteral(); 820 } 821 822 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C, 823 ProgramStateRef state, 824 const Expr *E, SVal V, 825 bool IsSourceBuffer) { 826 Optional<Loc> L = V.getAs<Loc>(); 827 if (!L) 828 return state; 829 830 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 831 // some assumptions about the value that CFRefCount can't. Even so, it should 832 // probably be refactored. 833 if (Optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) { 834 const MemRegion *R = MR->getRegion()->StripCasts(); 835 836 // Are we dealing with an ElementRegion? If so, we should be invalidating 837 // the super-region. 838 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 839 R = ER->getSuperRegion(); 840 // FIXME: What about layers of ElementRegions? 841 } 842 843 // Invalidate this region. 844 const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); 845 846 bool CausesPointerEscape = false; 847 RegionAndSymbolInvalidationTraits ITraits; 848 // Invalidate and escape only indirect regions accessible through the source 849 // buffer. 850 if (IsSourceBuffer) { 851 ITraits.setTrait(R, 852 RegionAndSymbolInvalidationTraits::TK_PreserveContents); 853 ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape); 854 CausesPointerEscape = true; 855 } 856 857 return state->invalidateRegions(R, E, C.blockCount(), LCtx, 858 CausesPointerEscape, 0, 0, &ITraits); 859 } 860 861 // If we have a non-region value by chance, just remove the binding. 862 // FIXME: is this necessary or correct? This handles the non-Region 863 // cases. Is it ever valid to store to these? 864 return state->killBinding(*L); 865 } 866 867 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 868 const MemRegion *MR) { 869 const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR); 870 871 switch (MR->getKind()) { 872 case MemRegion::FunctionTextRegionKind: { 873 const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl(); 874 if (FD) 875 os << "the address of the function '" << *FD << '\''; 876 else 877 os << "the address of a function"; 878 return true; 879 } 880 case MemRegion::BlockTextRegionKind: 881 os << "block text"; 882 return true; 883 case MemRegion::BlockDataRegionKind: 884 os << "a block"; 885 return true; 886 case MemRegion::CXXThisRegionKind: 887 case MemRegion::CXXTempObjectRegionKind: 888 os << "a C++ temp object of type " << TVR->getValueType().getAsString(); 889 return true; 890 case MemRegion::VarRegionKind: 891 os << "a variable of type" << TVR->getValueType().getAsString(); 892 return true; 893 case MemRegion::FieldRegionKind: 894 os << "a field of type " << TVR->getValueType().getAsString(); 895 return true; 896 case MemRegion::ObjCIvarRegionKind: 897 os << "an instance variable of type " << TVR->getValueType().getAsString(); 898 return true; 899 default: 900 return false; 901 } 902 } 903 904 //===----------------------------------------------------------------------===// 905 // evaluation of individual function calls. 906 //===----------------------------------------------------------------------===// 907 908 void CStringChecker::evalCopyCommon(CheckerContext &C, 909 const CallExpr *CE, 910 ProgramStateRef state, 911 const Expr *Size, const Expr *Dest, 912 const Expr *Source, bool Restricted, 913 bool IsMempcpy) const { 914 CurrentFunctionDescription = "memory copy function"; 915 916 // See if the size argument is zero. 917 const LocationContext *LCtx = C.getLocationContext(); 918 SVal sizeVal = state->getSVal(Size, LCtx); 919 QualType sizeTy = Size->getType(); 920 921 ProgramStateRef stateZeroSize, stateNonZeroSize; 922 llvm::tie(stateZeroSize, stateNonZeroSize) = 923 assumeZero(C, state, sizeVal, sizeTy); 924 925 // Get the value of the Dest. 926 SVal destVal = state->getSVal(Dest, LCtx); 927 928 // If the size is zero, there won't be any actual memory access, so 929 // just bind the return value to the destination buffer and return. 930 if (stateZeroSize && !stateNonZeroSize) { 931 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal); 932 C.addTransition(stateZeroSize); 933 return; 934 } 935 936 // If the size can be nonzero, we have to check the other arguments. 937 if (stateNonZeroSize) { 938 state = stateNonZeroSize; 939 940 // Ensure the destination is not null. If it is NULL there will be a 941 // NULL pointer dereference. 942 state = checkNonNull(C, state, Dest, destVal); 943 if (!state) 944 return; 945 946 // Get the value of the Src. 947 SVal srcVal = state->getSVal(Source, LCtx); 948 949 // Ensure the source is not null. If it is NULL there will be a 950 // NULL pointer dereference. 951 state = checkNonNull(C, state, Source, srcVal); 952 if (!state) 953 return; 954 955 // Ensure the accesses are valid and that the buffers do not overlap. 956 const char * const writeWarning = 957 "Memory copy function overflows destination buffer"; 958 state = CheckBufferAccess(C, state, Size, Dest, Source, 959 writeWarning, /* sourceWarning = */ NULL); 960 if (Restricted) 961 state = CheckOverlap(C, state, Size, Dest, Source); 962 963 if (!state) 964 return; 965 966 // If this is mempcpy, get the byte after the last byte copied and 967 // bind the expr. 968 if (IsMempcpy) { 969 loc::MemRegionVal destRegVal = destVal.castAs<loc::MemRegionVal>(); 970 971 // Get the length to copy. 972 if (Optional<NonLoc> lenValNonLoc = sizeVal.getAs<NonLoc>()) { 973 // Get the byte after the last byte copied. 974 SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add, 975 destRegVal, 976 *lenValNonLoc, 977 Dest->getType()); 978 979 // The byte after the last byte copied is the return value. 980 state = state->BindExpr(CE, LCtx, lastElement); 981 } else { 982 // If we don't know how much we copied, we can at least 983 // conjure a return value for later. 984 SVal result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, 985 C.blockCount()); 986 state = state->BindExpr(CE, LCtx, result); 987 } 988 989 } else { 990 // All other copies return the destination buffer. 991 // (Well, bcopy() has a void return type, but this won't hurt.) 992 state = state->BindExpr(CE, LCtx, destVal); 993 } 994 995 // Invalidate the destination (regular invalidation without pointer-escaping 996 // the address of the top-level region). 997 // FIXME: Even if we can't perfectly model the copy, we should see if we 998 // can use LazyCompoundVals to copy the source values into the destination. 999 // This would probably remove any existing bindings past the end of the 1000 // copied region, but that's still an improvement over blank invalidation. 1001 state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest), 1002 /*IsSourceBuffer*/false); 1003 1004 // Invalidate the source (const-invalidation without const-pointer-escaping 1005 // the address of the top-level region). 1006 state = InvalidateBuffer(C, state, Source, C.getSVal(Source), 1007 /*IsSourceBuffer*/true); 1008 1009 C.addTransition(state); 1010 } 1011 } 1012 1013 1014 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const { 1015 if (CE->getNumArgs() < 3) 1016 return; 1017 1018 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 1019 // The return value is the address of the destination buffer. 1020 const Expr *Dest = CE->getArg(0); 1021 ProgramStateRef state = C.getState(); 1022 1023 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true); 1024 } 1025 1026 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const { 1027 if (CE->getNumArgs() < 3) 1028 return; 1029 1030 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n); 1031 // The return value is a pointer to the byte following the last written byte. 1032 const Expr *Dest = CE->getArg(0); 1033 ProgramStateRef state = C.getState(); 1034 1035 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true); 1036 } 1037 1038 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const { 1039 if (CE->getNumArgs() < 3) 1040 return; 1041 1042 // void *memmove(void *dst, const void *src, size_t n); 1043 // The return value is the address of the destination buffer. 1044 const Expr *Dest = CE->getArg(0); 1045 ProgramStateRef state = C.getState(); 1046 1047 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1)); 1048 } 1049 1050 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { 1051 if (CE->getNumArgs() < 3) 1052 return; 1053 1054 // void bcopy(const void *src, void *dst, size_t n); 1055 evalCopyCommon(C, CE, C.getState(), 1056 CE->getArg(2), CE->getArg(1), CE->getArg(0)); 1057 } 1058 1059 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const { 1060 if (CE->getNumArgs() < 3) 1061 return; 1062 1063 // int memcmp(const void *s1, const void *s2, size_t n); 1064 CurrentFunctionDescription = "memory comparison function"; 1065 1066 const Expr *Left = CE->getArg(0); 1067 const Expr *Right = CE->getArg(1); 1068 const Expr *Size = CE->getArg(2); 1069 1070 ProgramStateRef state = C.getState(); 1071 SValBuilder &svalBuilder = C.getSValBuilder(); 1072 1073 // See if the size argument is zero. 1074 const LocationContext *LCtx = C.getLocationContext(); 1075 SVal sizeVal = state->getSVal(Size, LCtx); 1076 QualType sizeTy = Size->getType(); 1077 1078 ProgramStateRef stateZeroSize, stateNonZeroSize; 1079 llvm::tie(stateZeroSize, stateNonZeroSize) = 1080 assumeZero(C, state, sizeVal, sizeTy); 1081 1082 // If the size can be zero, the result will be 0 in that case, and we don't 1083 // have to check either of the buffers. 1084 if (stateZeroSize) { 1085 state = stateZeroSize; 1086 state = state->BindExpr(CE, LCtx, 1087 svalBuilder.makeZeroVal(CE->getType())); 1088 C.addTransition(state); 1089 } 1090 1091 // If the size can be nonzero, we have to check the other arguments. 1092 if (stateNonZeroSize) { 1093 state = stateNonZeroSize; 1094 // If we know the two buffers are the same, we know the result is 0. 1095 // First, get the two buffers' addresses. Another checker will have already 1096 // made sure they're not undefined. 1097 DefinedOrUnknownSVal LV = 1098 state->getSVal(Left, LCtx).castAs<DefinedOrUnknownSVal>(); 1099 DefinedOrUnknownSVal RV = 1100 state->getSVal(Right, LCtx).castAs<DefinedOrUnknownSVal>(); 1101 1102 // See if they are the same. 1103 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 1104 ProgramStateRef StSameBuf, StNotSameBuf; 1105 llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 1106 1107 // If the two arguments might be the same buffer, we know the result is 0, 1108 // and we only need to check one size. 1109 if (StSameBuf) { 1110 state = StSameBuf; 1111 state = CheckBufferAccess(C, state, Size, Left); 1112 if (state) { 1113 state = StSameBuf->BindExpr(CE, LCtx, 1114 svalBuilder.makeZeroVal(CE->getType())); 1115 C.addTransition(state); 1116 } 1117 } 1118 1119 // If the two arguments might be different buffers, we have to check the 1120 // size of both of them. 1121 if (StNotSameBuf) { 1122 state = StNotSameBuf; 1123 state = CheckBufferAccess(C, state, Size, Left, Right); 1124 if (state) { 1125 // The return value is the comparison result, which we don't know. 1126 SVal CmpV = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount()); 1127 state = state->BindExpr(CE, LCtx, CmpV); 1128 C.addTransition(state); 1129 } 1130 } 1131 } 1132 } 1133 1134 void CStringChecker::evalstrLength(CheckerContext &C, 1135 const CallExpr *CE) const { 1136 if (CE->getNumArgs() < 1) 1137 return; 1138 1139 // size_t strlen(const char *s); 1140 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); 1141 } 1142 1143 void CStringChecker::evalstrnLength(CheckerContext &C, 1144 const CallExpr *CE) const { 1145 if (CE->getNumArgs() < 2) 1146 return; 1147 1148 // size_t strnlen(const char *s, size_t maxlen); 1149 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); 1150 } 1151 1152 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 1153 bool IsStrnlen) const { 1154 CurrentFunctionDescription = "string length function"; 1155 ProgramStateRef state = C.getState(); 1156 const LocationContext *LCtx = C.getLocationContext(); 1157 1158 if (IsStrnlen) { 1159 const Expr *maxlenExpr = CE->getArg(1); 1160 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 1161 1162 ProgramStateRef stateZeroSize, stateNonZeroSize; 1163 llvm::tie(stateZeroSize, stateNonZeroSize) = 1164 assumeZero(C, state, maxlenVal, maxlenExpr->getType()); 1165 1166 // If the size can be zero, the result will be 0 in that case, and we don't 1167 // have to check the string itself. 1168 if (stateZeroSize) { 1169 SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); 1170 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero); 1171 C.addTransition(stateZeroSize); 1172 } 1173 1174 // If the size is GUARANTEED to be zero, we're done! 1175 if (!stateNonZeroSize) 1176 return; 1177 1178 // Otherwise, record the assumption that the size is nonzero. 1179 state = stateNonZeroSize; 1180 } 1181 1182 // Check that the string argument is non-null. 1183 const Expr *Arg = CE->getArg(0); 1184 SVal ArgVal = state->getSVal(Arg, LCtx); 1185 1186 state = checkNonNull(C, state, Arg, ArgVal); 1187 1188 if (!state) 1189 return; 1190 1191 SVal strLength = getCStringLength(C, state, Arg, ArgVal); 1192 1193 // If the argument isn't a valid C string, there's no valid state to 1194 // transition to. 1195 if (strLength.isUndef()) 1196 return; 1197 1198 DefinedOrUnknownSVal result = UnknownVal(); 1199 1200 // If the check is for strnlen() then bind the return value to no more than 1201 // the maxlen value. 1202 if (IsStrnlen) { 1203 QualType cmpTy = C.getSValBuilder().getConditionType(); 1204 1205 // It's a little unfortunate to be getting this again, 1206 // but it's not that expensive... 1207 const Expr *maxlenExpr = CE->getArg(1); 1208 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 1209 1210 Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); 1211 Optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>(); 1212 1213 if (strLengthNL && maxlenValNL) { 1214 ProgramStateRef stateStringTooLong, stateStringNotTooLong; 1215 1216 // Check if the strLength is greater than the maxlen. 1217 llvm::tie(stateStringTooLong, stateStringNotTooLong) = 1218 state->assume(C.getSValBuilder().evalBinOpNN( 1219 state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy) 1220 .castAs<DefinedOrUnknownSVal>()); 1221 1222 if (stateStringTooLong && !stateStringNotTooLong) { 1223 // If the string is longer than maxlen, return maxlen. 1224 result = *maxlenValNL; 1225 } else if (stateStringNotTooLong && !stateStringTooLong) { 1226 // If the string is shorter than maxlen, return its length. 1227 result = *strLengthNL; 1228 } 1229 } 1230 1231 if (result.isUnknown()) { 1232 // If we don't have enough information for a comparison, there's 1233 // no guarantee the full string length will actually be returned. 1234 // All we know is the return value is the min of the string length 1235 // and the limit. This is better than nothing. 1236 result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount()); 1237 NonLoc resultNL = result.castAs<NonLoc>(); 1238 1239 if (strLengthNL) { 1240 state = state->assume(C.getSValBuilder().evalBinOpNN( 1241 state, BO_LE, resultNL, *strLengthNL, cmpTy) 1242 .castAs<DefinedOrUnknownSVal>(), true); 1243 } 1244 1245 if (maxlenValNL) { 1246 state = state->assume(C.getSValBuilder().evalBinOpNN( 1247 state, BO_LE, resultNL, *maxlenValNL, cmpTy) 1248 .castAs<DefinedOrUnknownSVal>(), true); 1249 } 1250 } 1251 1252 } else { 1253 // This is a plain strlen(), not strnlen(). 1254 result = strLength.castAs<DefinedOrUnknownSVal>(); 1255 1256 // If we don't know the length of the string, conjure a return 1257 // value, so it can be used in constraints, at least. 1258 if (result.isUnknown()) { 1259 result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount()); 1260 } 1261 } 1262 1263 // Bind the return value. 1264 assert(!result.isUnknown() && "Should have conjured a value by now"); 1265 state = state->BindExpr(CE, LCtx, result); 1266 C.addTransition(state); 1267 } 1268 1269 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { 1270 if (CE->getNumArgs() < 2) 1271 return; 1272 1273 // char *strcpy(char *restrict dst, const char *restrict src); 1274 evalStrcpyCommon(C, CE, 1275 /* returnEnd = */ false, 1276 /* isBounded = */ false, 1277 /* isAppending = */ false); 1278 } 1279 1280 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { 1281 if (CE->getNumArgs() < 3) 1282 return; 1283 1284 // char *strncpy(char *restrict dst, const char *restrict src, size_t n); 1285 evalStrcpyCommon(C, CE, 1286 /* returnEnd = */ false, 1287 /* isBounded = */ true, 1288 /* isAppending = */ false); 1289 } 1290 1291 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { 1292 if (CE->getNumArgs() < 2) 1293 return; 1294 1295 // char *stpcpy(char *restrict dst, const char *restrict src); 1296 evalStrcpyCommon(C, CE, 1297 /* returnEnd = */ true, 1298 /* isBounded = */ false, 1299 /* isAppending = */ false); 1300 } 1301 1302 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const { 1303 if (CE->getNumArgs() < 2) 1304 return; 1305 1306 //char *strcat(char *restrict s1, const char *restrict s2); 1307 evalStrcpyCommon(C, CE, 1308 /* returnEnd = */ false, 1309 /* isBounded = */ false, 1310 /* isAppending = */ true); 1311 } 1312 1313 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { 1314 if (CE->getNumArgs() < 3) 1315 return; 1316 1317 //char *strncat(char *restrict s1, const char *restrict s2, size_t n); 1318 evalStrcpyCommon(C, CE, 1319 /* returnEnd = */ false, 1320 /* isBounded = */ true, 1321 /* isAppending = */ true); 1322 } 1323 1324 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 1325 bool returnEnd, bool isBounded, 1326 bool isAppending) const { 1327 CurrentFunctionDescription = "string copy function"; 1328 ProgramStateRef state = C.getState(); 1329 const LocationContext *LCtx = C.getLocationContext(); 1330 1331 // Check that the destination is non-null. 1332 const Expr *Dst = CE->getArg(0); 1333 SVal DstVal = state->getSVal(Dst, LCtx); 1334 1335 state = checkNonNull(C, state, Dst, DstVal); 1336 if (!state) 1337 return; 1338 1339 // Check that the source is non-null. 1340 const Expr *srcExpr = CE->getArg(1); 1341 SVal srcVal = state->getSVal(srcExpr, LCtx); 1342 state = checkNonNull(C, state, srcExpr, srcVal); 1343 if (!state) 1344 return; 1345 1346 // Get the string length of the source. 1347 SVal strLength = getCStringLength(C, state, srcExpr, srcVal); 1348 1349 // If the source isn't a valid C string, give up. 1350 if (strLength.isUndef()) 1351 return; 1352 1353 SValBuilder &svalBuilder = C.getSValBuilder(); 1354 QualType cmpTy = svalBuilder.getConditionType(); 1355 QualType sizeTy = svalBuilder.getContext().getSizeType(); 1356 1357 // These two values allow checking two kinds of errors: 1358 // - actual overflows caused by a source that doesn't fit in the destination 1359 // - potential overflows caused by a bound that could exceed the destination 1360 SVal amountCopied = UnknownVal(); 1361 SVal maxLastElementIndex = UnknownVal(); 1362 const char *boundWarning = NULL; 1363 1364 // If the function is strncpy, strncat, etc... it is bounded. 1365 if (isBounded) { 1366 // Get the max number of characters to copy. 1367 const Expr *lenExpr = CE->getArg(2); 1368 SVal lenVal = state->getSVal(lenExpr, LCtx); 1369 1370 // Protect against misdeclared strncpy(). 1371 lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType()); 1372 1373 Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); 1374 Optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>(); 1375 1376 // If we know both values, we might be able to figure out how much 1377 // we're copying. 1378 if (strLengthNL && lenValNL) { 1379 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong; 1380 1381 // Check if the max number to copy is less than the length of the src. 1382 // If the bound is equal to the source length, strncpy won't null- 1383 // terminate the result! 1384 llvm::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume( 1385 svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy) 1386 .castAs<DefinedOrUnknownSVal>()); 1387 1388 if (stateSourceTooLong && !stateSourceNotTooLong) { 1389 // Max number to copy is less than the length of the src, so the actual 1390 // strLength copied is the max number arg. 1391 state = stateSourceTooLong; 1392 amountCopied = lenVal; 1393 1394 } else if (!stateSourceTooLong && stateSourceNotTooLong) { 1395 // The source buffer entirely fits in the bound. 1396 state = stateSourceNotTooLong; 1397 amountCopied = strLength; 1398 } 1399 } 1400 1401 // We still want to know if the bound is known to be too large. 1402 if (lenValNL) { 1403 if (isAppending) { 1404 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst) 1405 1406 // Get the string length of the destination. If the destination is 1407 // memory that can't have a string length, we shouldn't be copying 1408 // into it anyway. 1409 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); 1410 if (dstStrLength.isUndef()) 1411 return; 1412 1413 if (Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>()) { 1414 maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add, 1415 *lenValNL, 1416 *dstStrLengthNL, 1417 sizeTy); 1418 boundWarning = "Size argument is greater than the free space in the " 1419 "destination buffer"; 1420 } 1421 1422 } else { 1423 // For strncpy, this is just checking that lenVal <= sizeof(dst) 1424 // (Yes, strncpy and strncat differ in how they treat termination. 1425 // strncat ALWAYS terminates, but strncpy doesn't.) 1426 1427 // We need a special case for when the copy size is zero, in which 1428 // case strncpy will do no work at all. Our bounds check uses n-1 1429 // as the last element accessed, so n == 0 is problematic. 1430 ProgramStateRef StateZeroSize, StateNonZeroSize; 1431 llvm::tie(StateZeroSize, StateNonZeroSize) = 1432 assumeZero(C, state, *lenValNL, sizeTy); 1433 1434 // If the size is known to be zero, we're done. 1435 if (StateZeroSize && !StateNonZeroSize) { 1436 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal); 1437 C.addTransition(StateZeroSize); 1438 return; 1439 } 1440 1441 // Otherwise, go ahead and figure out the last element we'll touch. 1442 // We don't record the non-zero assumption here because we can't 1443 // be sure. We won't warn on a possible zero. 1444 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); 1445 maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, 1446 one, sizeTy); 1447 boundWarning = "Size argument is greater than the length of the " 1448 "destination buffer"; 1449 } 1450 } 1451 1452 // If we couldn't pin down the copy length, at least bound it. 1453 // FIXME: We should actually run this code path for append as well, but 1454 // right now it creates problems with constraints (since we can end up 1455 // trying to pass constraints from symbol to symbol). 1456 if (amountCopied.isUnknown() && !isAppending) { 1457 // Try to get a "hypothetical" string length symbol, which we can later 1458 // set as a real value if that turns out to be the case. 1459 amountCopied = getCStringLength(C, state, lenExpr, srcVal, true); 1460 assert(!amountCopied.isUndef()); 1461 1462 if (Optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>()) { 1463 if (lenValNL) { 1464 // amountCopied <= lenVal 1465 SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE, 1466 *amountCopiedNL, 1467 *lenValNL, 1468 cmpTy); 1469 state = state->assume( 1470 copiedLessThanBound.castAs<DefinedOrUnknownSVal>(), true); 1471 if (!state) 1472 return; 1473 } 1474 1475 if (strLengthNL) { 1476 // amountCopied <= strlen(source) 1477 SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE, 1478 *amountCopiedNL, 1479 *strLengthNL, 1480 cmpTy); 1481 state = state->assume( 1482 copiedLessThanSrc.castAs<DefinedOrUnknownSVal>(), true); 1483 if (!state) 1484 return; 1485 } 1486 } 1487 } 1488 1489 } else { 1490 // The function isn't bounded. The amount copied should match the length 1491 // of the source buffer. 1492 amountCopied = strLength; 1493 } 1494 1495 assert(state); 1496 1497 // This represents the number of characters copied into the destination 1498 // buffer. (It may not actually be the strlen if the destination buffer 1499 // is not terminated.) 1500 SVal finalStrLength = UnknownVal(); 1501 1502 // If this is an appending function (strcat, strncat...) then set the 1503 // string length to strlen(src) + strlen(dst) since the buffer will 1504 // ultimately contain both. 1505 if (isAppending) { 1506 // Get the string length of the destination. If the destination is memory 1507 // that can't have a string length, we shouldn't be copying into it anyway. 1508 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); 1509 if (dstStrLength.isUndef()) 1510 return; 1511 1512 Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>(); 1513 Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>(); 1514 1515 // If we know both string lengths, we might know the final string length. 1516 if (srcStrLengthNL && dstStrLengthNL) { 1517 // Make sure the two lengths together don't overflow a size_t. 1518 state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL); 1519 if (!state) 1520 return; 1521 1522 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL, 1523 *dstStrLengthNL, sizeTy); 1524 } 1525 1526 // If we couldn't get a single value for the final string length, 1527 // we can at least bound it by the individual lengths. 1528 if (finalStrLength.isUnknown()) { 1529 // Try to get a "hypothetical" string length symbol, which we can later 1530 // set as a real value if that turns out to be the case. 1531 finalStrLength = getCStringLength(C, state, CE, DstVal, true); 1532 assert(!finalStrLength.isUndef()); 1533 1534 if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) { 1535 if (srcStrLengthNL) { 1536 // finalStrLength >= srcStrLength 1537 SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1538 *finalStrLengthNL, 1539 *srcStrLengthNL, 1540 cmpTy); 1541 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(), 1542 true); 1543 if (!state) 1544 return; 1545 } 1546 1547 if (dstStrLengthNL) { 1548 // finalStrLength >= dstStrLength 1549 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1550 *finalStrLengthNL, 1551 *dstStrLengthNL, 1552 cmpTy); 1553 state = 1554 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true); 1555 if (!state) 1556 return; 1557 } 1558 } 1559 } 1560 1561 } else { 1562 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and 1563 // the final string length will match the input string length. 1564 finalStrLength = amountCopied; 1565 } 1566 1567 // The final result of the function will either be a pointer past the last 1568 // copied element, or a pointer to the start of the destination buffer. 1569 SVal Result = (returnEnd ? UnknownVal() : DstVal); 1570 1571 assert(state); 1572 1573 // If the destination is a MemRegion, try to check for a buffer overflow and 1574 // record the new string length. 1575 if (Optional<loc::MemRegionVal> dstRegVal = 1576 DstVal.getAs<loc::MemRegionVal>()) { 1577 QualType ptrTy = Dst->getType(); 1578 1579 // If we have an exact value on a bounded copy, use that to check for 1580 // overflows, rather than our estimate about how much is actually copied. 1581 if (boundWarning) { 1582 if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) { 1583 SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 1584 *maxLastNL, ptrTy); 1585 state = CheckLocation(C, state, CE->getArg(2), maxLastElement, 1586 boundWarning); 1587 if (!state) 1588 return; 1589 } 1590 } 1591 1592 // Then, if the final length is known... 1593 if (Optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) { 1594 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 1595 *knownStrLength, ptrTy); 1596 1597 // ...and we haven't checked the bound, we'll check the actual copy. 1598 if (!boundWarning) { 1599 const char * const warningMsg = 1600 "String copy function overflows destination buffer"; 1601 state = CheckLocation(C, state, Dst, lastElement, warningMsg); 1602 if (!state) 1603 return; 1604 } 1605 1606 // If this is a stpcpy-style copy, the last element is the return value. 1607 if (returnEnd) 1608 Result = lastElement; 1609 } 1610 1611 // Invalidate the destination (regular invalidation without pointer-escaping 1612 // the address of the top-level region). This must happen before we set the 1613 // C string length because invalidation will clear the length. 1614 // FIXME: Even if we can't perfectly model the copy, we should see if we 1615 // can use LazyCompoundVals to copy the source values into the destination. 1616 // This would probably remove any existing bindings past the end of the 1617 // string, but that's still an improvement over blank invalidation. 1618 state = InvalidateBuffer(C, state, Dst, *dstRegVal, 1619 /*IsSourceBuffer*/false); 1620 1621 // Invalidate the source (const-invalidation without const-pointer-escaping 1622 // the address of the top-level region). 1623 state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true); 1624 1625 // Set the C string length of the destination, if we know it. 1626 if (isBounded && !isAppending) { 1627 // strncpy is annoying in that it doesn't guarantee to null-terminate 1628 // the result string. If the original string didn't fit entirely inside 1629 // the bound (including the null-terminator), we don't know how long the 1630 // result is. 1631 if (amountCopied != strLength) 1632 finalStrLength = UnknownVal(); 1633 } 1634 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); 1635 } 1636 1637 assert(state); 1638 1639 // If this is a stpcpy-style copy, but we were unable to check for a buffer 1640 // overflow, we still need a result. Conjure a return value. 1641 if (returnEnd && Result.isUnknown()) { 1642 Result = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount()); 1643 } 1644 1645 // Set the return value. 1646 state = state->BindExpr(CE, LCtx, Result); 1647 C.addTransition(state); 1648 } 1649 1650 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { 1651 if (CE->getNumArgs() < 2) 1652 return; 1653 1654 //int strcmp(const char *s1, const char *s2); 1655 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false); 1656 } 1657 1658 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { 1659 if (CE->getNumArgs() < 3) 1660 return; 1661 1662 //int strncmp(const char *s1, const char *s2, size_t n); 1663 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false); 1664 } 1665 1666 void CStringChecker::evalStrcasecmp(CheckerContext &C, 1667 const CallExpr *CE) const { 1668 if (CE->getNumArgs() < 2) 1669 return; 1670 1671 //int strcasecmp(const char *s1, const char *s2); 1672 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true); 1673 } 1674 1675 void CStringChecker::evalStrncasecmp(CheckerContext &C, 1676 const CallExpr *CE) const { 1677 if (CE->getNumArgs() < 3) 1678 return; 1679 1680 //int strncasecmp(const char *s1, const char *s2, size_t n); 1681 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true); 1682 } 1683 1684 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, 1685 bool isBounded, bool ignoreCase) const { 1686 CurrentFunctionDescription = "string comparison function"; 1687 ProgramStateRef state = C.getState(); 1688 const LocationContext *LCtx = C.getLocationContext(); 1689 1690 // Check that the first string is non-null 1691 const Expr *s1 = CE->getArg(0); 1692 SVal s1Val = state->getSVal(s1, LCtx); 1693 state = checkNonNull(C, state, s1, s1Val); 1694 if (!state) 1695 return; 1696 1697 // Check that the second string is non-null. 1698 const Expr *s2 = CE->getArg(1); 1699 SVal s2Val = state->getSVal(s2, LCtx); 1700 state = checkNonNull(C, state, s2, s2Val); 1701 if (!state) 1702 return; 1703 1704 // Get the string length of the first string or give up. 1705 SVal s1Length = getCStringLength(C, state, s1, s1Val); 1706 if (s1Length.isUndef()) 1707 return; 1708 1709 // Get the string length of the second string or give up. 1710 SVal s2Length = getCStringLength(C, state, s2, s2Val); 1711 if (s2Length.isUndef()) 1712 return; 1713 1714 // If we know the two buffers are the same, we know the result is 0. 1715 // First, get the two buffers' addresses. Another checker will have already 1716 // made sure they're not undefined. 1717 DefinedOrUnknownSVal LV = s1Val.castAs<DefinedOrUnknownSVal>(); 1718 DefinedOrUnknownSVal RV = s2Val.castAs<DefinedOrUnknownSVal>(); 1719 1720 // See if they are the same. 1721 SValBuilder &svalBuilder = C.getSValBuilder(); 1722 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 1723 ProgramStateRef StSameBuf, StNotSameBuf; 1724 llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 1725 1726 // If the two arguments might be the same buffer, we know the result is 0, 1727 // and we only need to check one size. 1728 if (StSameBuf) { 1729 StSameBuf = StSameBuf->BindExpr(CE, LCtx, 1730 svalBuilder.makeZeroVal(CE->getType())); 1731 C.addTransition(StSameBuf); 1732 1733 // If the two arguments are GUARANTEED to be the same, we're done! 1734 if (!StNotSameBuf) 1735 return; 1736 } 1737 1738 assert(StNotSameBuf); 1739 state = StNotSameBuf; 1740 1741 // At this point we can go about comparing the two buffers. 1742 // For now, we only do this if they're both known string literals. 1743 1744 // Attempt to extract string literals from both expressions. 1745 const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val); 1746 const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val); 1747 bool canComputeResult = false; 1748 1749 if (s1StrLiteral && s2StrLiteral) { 1750 StringRef s1StrRef = s1StrLiteral->getString(); 1751 StringRef s2StrRef = s2StrLiteral->getString(); 1752 1753 if (isBounded) { 1754 // Get the max number of characters to compare. 1755 const Expr *lenExpr = CE->getArg(2); 1756 SVal lenVal = state->getSVal(lenExpr, LCtx); 1757 1758 // If the length is known, we can get the right substrings. 1759 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) { 1760 // Create substrings of each to compare the prefix. 1761 s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue()); 1762 s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue()); 1763 canComputeResult = true; 1764 } 1765 } else { 1766 // This is a normal, unbounded strcmp. 1767 canComputeResult = true; 1768 } 1769 1770 if (canComputeResult) { 1771 // Real strcmp stops at null characters. 1772 size_t s1Term = s1StrRef.find('\0'); 1773 if (s1Term != StringRef::npos) 1774 s1StrRef = s1StrRef.substr(0, s1Term); 1775 1776 size_t s2Term = s2StrRef.find('\0'); 1777 if (s2Term != StringRef::npos) 1778 s2StrRef = s2StrRef.substr(0, s2Term); 1779 1780 // Use StringRef's comparison methods to compute the actual result. 1781 int result; 1782 1783 if (ignoreCase) { 1784 // Compare string 1 to string 2 the same way strcasecmp() does. 1785 result = s1StrRef.compare_lower(s2StrRef); 1786 } else { 1787 // Compare string 1 to string 2 the same way strcmp() does. 1788 result = s1StrRef.compare(s2StrRef); 1789 } 1790 1791 // Build the SVal of the comparison and bind the return value. 1792 SVal resultVal = svalBuilder.makeIntVal(result, CE->getType()); 1793 state = state->BindExpr(CE, LCtx, resultVal); 1794 } 1795 } 1796 1797 if (!canComputeResult) { 1798 // Conjure a symbolic value. It's the best we can do. 1799 SVal resultVal = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount()); 1800 state = state->BindExpr(CE, LCtx, resultVal); 1801 } 1802 1803 // Record this as a possible path. 1804 C.addTransition(state); 1805 } 1806 1807 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const { 1808 //char *strsep(char **stringp, const char *delim); 1809 if (CE->getNumArgs() < 2) 1810 return; 1811 1812 // Sanity: does the search string parameter match the return type? 1813 const Expr *SearchStrPtr = CE->getArg(0); 1814 QualType CharPtrTy = SearchStrPtr->getType()->getPointeeType(); 1815 if (CharPtrTy.isNull() || 1816 CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType()) 1817 return; 1818 1819 CurrentFunctionDescription = "strsep()"; 1820 ProgramStateRef State = C.getState(); 1821 const LocationContext *LCtx = C.getLocationContext(); 1822 1823 // Check that the search string pointer is non-null (though it may point to 1824 // a null string). 1825 SVal SearchStrVal = State->getSVal(SearchStrPtr, LCtx); 1826 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal); 1827 if (!State) 1828 return; 1829 1830 // Check that the delimiter string is non-null. 1831 const Expr *DelimStr = CE->getArg(1); 1832 SVal DelimStrVal = State->getSVal(DelimStr, LCtx); 1833 State = checkNonNull(C, State, DelimStr, DelimStrVal); 1834 if (!State) 1835 return; 1836 1837 SValBuilder &SVB = C.getSValBuilder(); 1838 SVal Result; 1839 if (Optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) { 1840 // Get the current value of the search string pointer, as a char*. 1841 Result = State->getSVal(*SearchStrLoc, CharPtrTy); 1842 1843 // Invalidate the search string, representing the change of one delimiter 1844 // character to NUL. 1845 State = InvalidateBuffer(C, State, SearchStrPtr, Result, 1846 /*IsSourceBuffer*/false); 1847 1848 // Overwrite the search string pointer. The new value is either an address 1849 // further along in the same string, or NULL if there are no more tokens. 1850 State = State->bindLoc(*SearchStrLoc, 1851 SVB.conjureSymbolVal(getTag(), CE, LCtx, CharPtrTy, 1852 C.blockCount())); 1853 } else { 1854 assert(SearchStrVal.isUnknown()); 1855 // Conjure a symbolic value. It's the best we can do. 1856 Result = SVB.conjureSymbolVal(0, CE, LCtx, C.blockCount()); 1857 } 1858 1859 // Set the return value, and finish. 1860 State = State->BindExpr(CE, LCtx, Result); 1861 C.addTransition(State); 1862 } 1863 1864 1865 //===----------------------------------------------------------------------===// 1866 // The driver method, and other Checker callbacks. 1867 //===----------------------------------------------------------------------===// 1868 1869 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { 1870 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 1871 1872 if (!FDecl) 1873 return false; 1874 1875 // FIXME: Poorly-factored string switches are slow. 1876 FnCheck evalFunction = 0; 1877 if (C.isCLibraryFunction(FDecl, "memcpy")) 1878 evalFunction = &CStringChecker::evalMemcpy; 1879 else if (C.isCLibraryFunction(FDecl, "mempcpy")) 1880 evalFunction = &CStringChecker::evalMempcpy; 1881 else if (C.isCLibraryFunction(FDecl, "memcmp")) 1882 evalFunction = &CStringChecker::evalMemcmp; 1883 else if (C.isCLibraryFunction(FDecl, "memmove")) 1884 evalFunction = &CStringChecker::evalMemmove; 1885 else if (C.isCLibraryFunction(FDecl, "strcpy")) 1886 evalFunction = &CStringChecker::evalStrcpy; 1887 else if (C.isCLibraryFunction(FDecl, "strncpy")) 1888 evalFunction = &CStringChecker::evalStrncpy; 1889 else if (C.isCLibraryFunction(FDecl, "stpcpy")) 1890 evalFunction = &CStringChecker::evalStpcpy; 1891 else if (C.isCLibraryFunction(FDecl, "strcat")) 1892 evalFunction = &CStringChecker::evalStrcat; 1893 else if (C.isCLibraryFunction(FDecl, "strncat")) 1894 evalFunction = &CStringChecker::evalStrncat; 1895 else if (C.isCLibraryFunction(FDecl, "strlen")) 1896 evalFunction = &CStringChecker::evalstrLength; 1897 else if (C.isCLibraryFunction(FDecl, "strnlen")) 1898 evalFunction = &CStringChecker::evalstrnLength; 1899 else if (C.isCLibraryFunction(FDecl, "strcmp")) 1900 evalFunction = &CStringChecker::evalStrcmp; 1901 else if (C.isCLibraryFunction(FDecl, "strncmp")) 1902 evalFunction = &CStringChecker::evalStrncmp; 1903 else if (C.isCLibraryFunction(FDecl, "strcasecmp")) 1904 evalFunction = &CStringChecker::evalStrcasecmp; 1905 else if (C.isCLibraryFunction(FDecl, "strncasecmp")) 1906 evalFunction = &CStringChecker::evalStrncasecmp; 1907 else if (C.isCLibraryFunction(FDecl, "strsep")) 1908 evalFunction = &CStringChecker::evalStrsep; 1909 else if (C.isCLibraryFunction(FDecl, "bcopy")) 1910 evalFunction = &CStringChecker::evalBcopy; 1911 else if (C.isCLibraryFunction(FDecl, "bcmp")) 1912 evalFunction = &CStringChecker::evalMemcmp; 1913 1914 // If the callee isn't a string function, let another checker handle it. 1915 if (!evalFunction) 1916 return false; 1917 1918 // Make sure each function sets its own description. 1919 // (But don't bother in a release build.) 1920 assert(!(CurrentFunctionDescription = NULL)); 1921 1922 // Check and evaluate the call. 1923 (this->*evalFunction)(C, CE); 1924 1925 // If the evaluate call resulted in no change, chain to the next eval call 1926 // handler. 1927 // Note, the custom CString evaluation calls assume that basic safety 1928 // properties are held. However, if the user chooses to turn off some of these 1929 // checks, we ignore the issues and leave the call evaluation to a generic 1930 // handler. 1931 if (!C.isDifferent()) 1932 return false; 1933 1934 return true; 1935 } 1936 1937 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { 1938 // Record string length for char a[] = "abc"; 1939 ProgramStateRef state = C.getState(); 1940 1941 for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end(); 1942 I != E; ++I) { 1943 const VarDecl *D = dyn_cast<VarDecl>(*I); 1944 if (!D) 1945 continue; 1946 1947 // FIXME: Handle array fields of structs. 1948 if (!D->getType()->isArrayType()) 1949 continue; 1950 1951 const Expr *Init = D->getInit(); 1952 if (!Init) 1953 continue; 1954 if (!isa<StringLiteral>(Init)) 1955 continue; 1956 1957 Loc VarLoc = state->getLValue(D, C.getLocationContext()); 1958 const MemRegion *MR = VarLoc.getAsRegion(); 1959 if (!MR) 1960 continue; 1961 1962 SVal StrVal = state->getSVal(Init, C.getLocationContext()); 1963 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 1964 DefinedOrUnknownSVal strLength = 1965 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>(); 1966 1967 state = state->set<CStringLength>(MR, strLength); 1968 } 1969 1970 C.addTransition(state); 1971 } 1972 1973 bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const { 1974 CStringLengthTy Entries = state->get<CStringLength>(); 1975 return !Entries.isEmpty(); 1976 } 1977 1978 ProgramStateRef 1979 CStringChecker::checkRegionChanges(ProgramStateRef state, 1980 const InvalidatedSymbols *, 1981 ArrayRef<const MemRegion *> ExplicitRegions, 1982 ArrayRef<const MemRegion *> Regions, 1983 const CallEvent *Call) const { 1984 CStringLengthTy Entries = state->get<CStringLength>(); 1985 if (Entries.isEmpty()) 1986 return state; 1987 1988 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 1989 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 1990 1991 // First build sets for the changed regions and their super-regions. 1992 for (ArrayRef<const MemRegion *>::iterator 1993 I = Regions.begin(), E = Regions.end(); I != E; ++I) { 1994 const MemRegion *MR = *I; 1995 Invalidated.insert(MR); 1996 1997 SuperRegions.insert(MR); 1998 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 1999 MR = SR->getSuperRegion(); 2000 SuperRegions.insert(MR); 2001 } 2002 } 2003 2004 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 2005 2006 // Then loop over the entries in the current state. 2007 for (CStringLengthTy::iterator I = Entries.begin(), 2008 E = Entries.end(); I != E; ++I) { 2009 const MemRegion *MR = I.getKey(); 2010 2011 // Is this entry for a super-region of a changed region? 2012 if (SuperRegions.count(MR)) { 2013 Entries = F.remove(Entries, MR); 2014 continue; 2015 } 2016 2017 // Is this entry for a sub-region of a changed region? 2018 const MemRegion *Super = MR; 2019 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 2020 Super = SR->getSuperRegion(); 2021 if (Invalidated.count(Super)) { 2022 Entries = F.remove(Entries, MR); 2023 break; 2024 } 2025 } 2026 } 2027 2028 return state->set<CStringLength>(Entries); 2029 } 2030 2031 void CStringChecker::checkLiveSymbols(ProgramStateRef state, 2032 SymbolReaper &SR) const { 2033 // Mark all symbols in our string length map as valid. 2034 CStringLengthTy Entries = state->get<CStringLength>(); 2035 2036 for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); 2037 I != E; ++I) { 2038 SVal Len = I.getData(); 2039 2040 for (SymExpr::symbol_iterator si = Len.symbol_begin(), 2041 se = Len.symbol_end(); si != se; ++si) 2042 SR.markInUse(*si); 2043 } 2044 } 2045 2046 void CStringChecker::checkDeadSymbols(SymbolReaper &SR, 2047 CheckerContext &C) const { 2048 if (!SR.hasDeadSymbols()) 2049 return; 2050 2051 ProgramStateRef state = C.getState(); 2052 CStringLengthTy Entries = state->get<CStringLength>(); 2053 if (Entries.isEmpty()) 2054 return; 2055 2056 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 2057 for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); 2058 I != E; ++I) { 2059 SVal Len = I.getData(); 2060 if (SymbolRef Sym = Len.getAsSymbol()) { 2061 if (SR.isDead(Sym)) 2062 Entries = F.remove(Entries, I.getKey()); 2063 } 2064 } 2065 2066 state = state->set<CStringLength>(Entries); 2067 C.addTransition(state); 2068 } 2069 2070 #define REGISTER_CHECKER(name) \ 2071 void ento::register##name(CheckerManager &mgr) { \ 2072 CStringChecker *checker = mgr.registerChecker<CStringChecker>(); \ 2073 checker->Filter.Check##name = true; \ 2074 checker->Filter.CheckName##name = mgr.getCurrentCheckName(); \ 2075 } 2076 2077 REGISTER_CHECKER(CStringNullArg) 2078 REGISTER_CHECKER(CStringOutOfBounds) 2079 REGISTER_CHECKER(CStringBufferOverlap) 2080 REGISTER_CHECKER(CStringNotNullTerm) 2081 2082 void ento::registerCStringCheckerBasic(CheckerManager &Mgr) { 2083 registerCStringNullArg(Mgr); 2084 } 2085