1 //= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This defines CStringChecker, which is an assortment of checks on calls 11 // to functions in <string.h>. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ClangSACheckers.h" 16 #include "clang/StaticAnalyzer/Core/Checker.h" 17 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 18 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 19 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 20 #include "clang/StaticAnalyzer/Core/PathSensitive/GRStateTrait.h" 21 #include "llvm/ADT/StringSwitch.h" 22 23 using namespace clang; 24 using namespace ento; 25 26 namespace { 27 class CStringChecker : public Checker< eval::Call, 28 check::PreStmt<DeclStmt>, 29 check::LiveSymbols, 30 check::DeadSymbols, 31 check::RegionChanges 32 > { 33 mutable llvm::OwningPtr<BugType> BT_Null, BT_Bounds, 34 BT_Overlap, BT_NotCString, 35 BT_AdditionOverflow; 36 mutable const char *CurrentFunctionDescription; 37 38 public: 39 static void *getTag() { static int tag; return &tag; } 40 41 bool evalCall(const CallExpr *CE, CheckerContext &C) const; 42 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; 43 void checkLiveSymbols(const GRState *state, SymbolReaper &SR) const; 44 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; 45 bool wantsRegionChangeUpdate(const GRState *state) const; 46 47 const GRState *checkRegionChanges(const GRState *state, 48 const StoreManager::InvalidatedSymbols *, 49 const MemRegion * const *Begin, 50 const MemRegion * const *End) const; 51 52 typedef void (CStringChecker::*FnCheck)(CheckerContext &, 53 const CallExpr *) const; 54 55 void evalMemcpy(CheckerContext &C, const CallExpr *CE) const; 56 void evalMempcpy(CheckerContext &C, const CallExpr *CE) const; 57 void evalMemmove(CheckerContext &C, const CallExpr *CE) const; 58 void evalBcopy(CheckerContext &C, const CallExpr *CE) const; 59 void evalCopyCommon(CheckerContext &C, const CallExpr *CE, 60 const GRState *state, 61 const Expr *Size, const Expr *Source, const Expr *Dest, 62 bool Restricted = false, 63 bool IsMempcpy = false) const; 64 65 void evalMemcmp(CheckerContext &C, const CallExpr *CE) const; 66 67 void evalstrLength(CheckerContext &C, const CallExpr *CE) const; 68 void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; 69 void evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 70 bool IsStrnlen = false) const; 71 72 void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; 73 void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; 74 void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; 75 void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool returnEnd, 76 bool isBounded, bool isAppending) const; 77 78 void evalStrcat(CheckerContext &C, const CallExpr *CE) const; 79 void evalStrncat(CheckerContext &C, const CallExpr *CE) const; 80 81 void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; 82 void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; 83 void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; 84 void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; 85 void evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, 86 bool isBounded = false, bool ignoreCase = false) const; 87 88 // Utility methods 89 std::pair<const GRState*, const GRState*> 90 static assumeZero(CheckerContext &C, 91 const GRState *state, SVal V, QualType Ty); 92 93 static const GRState *setCStringLength(const GRState *state, 94 const MemRegion *MR, SVal strLength); 95 static SVal getCStringLengthForRegion(CheckerContext &C, 96 const GRState *&state, 97 const Expr *Ex, const MemRegion *MR, 98 bool hypothetical); 99 SVal getCStringLength(CheckerContext &C, const GRState *&state, 100 const Expr *Ex, SVal Buf, 101 bool hypothetical = false) const; 102 103 const StringLiteral *getCStringLiteral(CheckerContext &C, 104 const GRState *&state, 105 const Expr *expr, 106 SVal val) const; 107 108 static const GRState *InvalidateBuffer(CheckerContext &C, 109 const GRState *state, 110 const Expr *Ex, SVal V); 111 112 static bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, 113 const MemRegion *MR); 114 115 // Re-usable checks 116 const GRState *checkNonNull(CheckerContext &C, const GRState *state, 117 const Expr *S, SVal l) const; 118 const GRState *CheckLocation(CheckerContext &C, const GRState *state, 119 const Expr *S, SVal l, 120 const char *message = NULL) const; 121 const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state, 122 const Expr *Size, 123 const Expr *FirstBuf, 124 const Expr *SecondBuf, 125 const char *firstMessage = NULL, 126 const char *secondMessage = NULL) const; 127 const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state, 128 const Expr *Size, const Expr *Buf, 129 const char *message = NULL) const { 130 // This is a convenience override. 131 return CheckBufferAccess(C, state, Size, Buf, NULL, message, NULL); 132 } 133 const GRState *CheckOverlap(CheckerContext &C, const GRState *state, 134 const Expr *Size, const Expr *First, 135 const Expr *Second) const; 136 void emitOverlapBug(CheckerContext &C, const GRState *state, 137 const Stmt *First, const Stmt *Second) const; 138 const GRState *checkAdditionOverflow(CheckerContext &C, const GRState *state, 139 NonLoc left, NonLoc right) const; 140 }; 141 142 class CStringLength { 143 public: 144 typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap; 145 }; 146 } //end anonymous namespace 147 148 namespace clang { 149 namespace ento { 150 template <> 151 struct GRStateTrait<CStringLength> 152 : public GRStatePartialTrait<CStringLength::EntryMap> { 153 static void *GDMIndex() { return CStringChecker::getTag(); } 154 }; 155 } 156 } 157 158 //===----------------------------------------------------------------------===// 159 // Individual checks and utility methods. 160 //===----------------------------------------------------------------------===// 161 162 std::pair<const GRState*, const GRState*> 163 CStringChecker::assumeZero(CheckerContext &C, const GRState *state, SVal V, 164 QualType Ty) { 165 DefinedSVal *val = dyn_cast<DefinedSVal>(&V); 166 if (!val) 167 return std::pair<const GRState*, const GRState *>(state, state); 168 169 SValBuilder &svalBuilder = C.getSValBuilder(); 170 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 171 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 172 } 173 174 const GRState *CStringChecker::checkNonNull(CheckerContext &C, 175 const GRState *state, 176 const Expr *S, SVal l) const { 177 // If a previous check has failed, propagate the failure. 178 if (!state) 179 return NULL; 180 181 const GRState *stateNull, *stateNonNull; 182 llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType()); 183 184 if (stateNull && !stateNonNull) { 185 ExplodedNode *N = C.generateSink(stateNull); 186 if (!N) 187 return NULL; 188 189 if (!BT_Null) 190 BT_Null.reset(new BuiltinBug("API", 191 "Null pointer argument in call to byte string function")); 192 193 llvm::SmallString<80> buf; 194 llvm::raw_svector_ostream os(buf); 195 assert(CurrentFunctionDescription); 196 os << "Null pointer argument in call to " << CurrentFunctionDescription; 197 198 // Generate a report for this bug. 199 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get()); 200 EnhancedBugReport *report = new EnhancedBugReport(*BT, os.str(), N); 201 202 report->addRange(S->getSourceRange()); 203 report->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue, S); 204 C.EmitReport(report); 205 return NULL; 206 } 207 208 // From here on, assume that the value is non-null. 209 assert(stateNonNull); 210 return stateNonNull; 211 } 212 213 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 214 const GRState *CStringChecker::CheckLocation(CheckerContext &C, 215 const GRState *state, 216 const Expr *S, SVal l, 217 const char *warningMsg) const { 218 // If a previous check has failed, propagate the failure. 219 if (!state) 220 return NULL; 221 222 // Check for out of bound array element access. 223 const MemRegion *R = l.getAsRegion(); 224 if (!R) 225 return state; 226 227 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 228 if (!ER) 229 return state; 230 231 assert(ER->getValueType() == C.getASTContext().CharTy && 232 "CheckLocation should only be called with char* ElementRegions"); 233 234 // Get the size of the array. 235 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 236 SValBuilder &svalBuilder = C.getSValBuilder(); 237 SVal Extent = 238 svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); 239 DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent); 240 241 // Get the index of the accessed element. 242 DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex()); 243 244 const GRState *StInBound = state->assumeInBound(Idx, Size, true); 245 const GRState *StOutBound = state->assumeInBound(Idx, Size, false); 246 if (StOutBound && !StInBound) { 247 ExplodedNode *N = C.generateSink(StOutBound); 248 if (!N) 249 return NULL; 250 251 if (!BT_Bounds) { 252 BT_Bounds.reset(new BuiltinBug("Out-of-bound array access", 253 "Byte string function accesses out-of-bound array element")); 254 } 255 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get()); 256 257 // Generate a report for this bug. 258 RangedBugReport *report; 259 if (warningMsg) { 260 report = new RangedBugReport(*BT, warningMsg, N); 261 } else { 262 assert(CurrentFunctionDescription); 263 assert(CurrentFunctionDescription[0] != '\0'); 264 265 llvm::SmallString<80> buf; 266 llvm::raw_svector_ostream os(buf); 267 os << (char)toupper(CurrentFunctionDescription[0]) 268 << &CurrentFunctionDescription[1] 269 << " accesses out-of-bound array element"; 270 report = new RangedBugReport(*BT, os.str(), N); 271 } 272 273 // FIXME: It would be nice to eventually make this diagnostic more clear, 274 // e.g., by referencing the original declaration or by saying *why* this 275 // reference is outside the range. 276 277 report->addRange(S->getSourceRange()); 278 C.EmitReport(report); 279 return NULL; 280 } 281 282 // Array bound check succeeded. From this point forward the array bound 283 // should always succeed. 284 return StInBound; 285 } 286 287 const GRState *CStringChecker::CheckBufferAccess(CheckerContext &C, 288 const GRState *state, 289 const Expr *Size, 290 const Expr *FirstBuf, 291 const Expr *SecondBuf, 292 const char *firstMessage, 293 const char *secondMessage) const { 294 // If a previous check has failed, propagate the failure. 295 if (!state) 296 return NULL; 297 298 SValBuilder &svalBuilder = C.getSValBuilder(); 299 ASTContext &Ctx = svalBuilder.getContext(); 300 301 QualType sizeTy = Size->getType(); 302 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 303 304 // Check that the first buffer is non-null. 305 SVal BufVal = state->getSVal(FirstBuf); 306 state = checkNonNull(C, state, FirstBuf, BufVal); 307 if (!state) 308 return NULL; 309 310 // Get the access length and make sure it is known. 311 // FIXME: This assumes the caller has already checked that the access length 312 // is positive. And that it's unsigned. 313 SVal LengthVal = state->getSVal(Size); 314 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 315 if (!Length) 316 return state; 317 318 // Compute the offset of the last element to be accessed: size-1. 319 NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy)); 320 NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub, 321 *Length, One, sizeTy)); 322 323 // Check that the first buffer is sufficiently long. 324 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 325 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 326 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 327 LastOffset, PtrTy); 328 state = CheckLocation(C, state, FirstBuf, BufEnd, firstMessage); 329 330 // If the buffer isn't large enough, abort. 331 if (!state) 332 return NULL; 333 } 334 335 // If there's a second buffer, check it as well. 336 if (SecondBuf) { 337 BufVal = state->getSVal(SecondBuf); 338 state = checkNonNull(C, state, SecondBuf, BufVal); 339 if (!state) 340 return NULL; 341 342 BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); 343 if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) { 344 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 345 LastOffset, PtrTy); 346 state = CheckLocation(C, state, SecondBuf, BufEnd, secondMessage); 347 } 348 } 349 350 // Large enough or not, return this state! 351 return state; 352 } 353 354 const GRState *CStringChecker::CheckOverlap(CheckerContext &C, 355 const GRState *state, 356 const Expr *Size, 357 const Expr *First, 358 const Expr *Second) const { 359 // Do a simple check for overlap: if the two arguments are from the same 360 // buffer, see if the end of the first is greater than the start of the second 361 // or vice versa. 362 363 // If a previous check has failed, propagate the failure. 364 if (!state) 365 return NULL; 366 367 const GRState *stateTrue, *stateFalse; 368 369 // Get the buffer values and make sure they're known locations. 370 SVal firstVal = state->getSVal(First); 371 SVal secondVal = state->getSVal(Second); 372 373 Loc *firstLoc = dyn_cast<Loc>(&firstVal); 374 if (!firstLoc) 375 return state; 376 377 Loc *secondLoc = dyn_cast<Loc>(&secondVal); 378 if (!secondLoc) 379 return state; 380 381 // Are the two values the same? 382 SValBuilder &svalBuilder = C.getSValBuilder(); 383 llvm::tie(stateTrue, stateFalse) = 384 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 385 386 if (stateTrue && !stateFalse) { 387 // If the values are known to be equal, that's automatically an overlap. 388 emitOverlapBug(C, stateTrue, First, Second); 389 return NULL; 390 } 391 392 // assume the two expressions are not equal. 393 assert(stateFalse); 394 state = stateFalse; 395 396 // Which value comes first? 397 QualType cmpTy = svalBuilder.getConditionType(); 398 SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, 399 *firstLoc, *secondLoc, cmpTy); 400 DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse); 401 if (!reverseTest) 402 return state; 403 404 llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 405 if (stateTrue) { 406 if (stateFalse) { 407 // If we don't know which one comes first, we can't perform this test. 408 return state; 409 } else { 410 // Switch the values so that firstVal is before secondVal. 411 Loc *tmpLoc = firstLoc; 412 firstLoc = secondLoc; 413 secondLoc = tmpLoc; 414 415 // Switch the Exprs as well, so that they still correspond. 416 const Expr *tmpExpr = First; 417 First = Second; 418 Second = tmpExpr; 419 } 420 } 421 422 // Get the length, and make sure it too is known. 423 SVal LengthVal = state->getSVal(Size); 424 NonLoc *Length = dyn_cast<NonLoc>(&LengthVal); 425 if (!Length) 426 return state; 427 428 // Convert the first buffer's start address to char*. 429 // Bail out if the cast fails. 430 ASTContext &Ctx = svalBuilder.getContext(); 431 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 432 SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, 433 First->getType()); 434 Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart); 435 if (!FirstStartLoc) 436 return state; 437 438 // Compute the end of the first buffer. Bail out if THAT fails. 439 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, 440 *FirstStartLoc, *Length, CharPtrTy); 441 Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd); 442 if (!FirstEndLoc) 443 return state; 444 445 // Is the end of the first buffer past the start of the second buffer? 446 SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT, 447 *FirstEndLoc, *secondLoc, cmpTy); 448 DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap); 449 if (!OverlapTest) 450 return state; 451 452 llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 453 454 if (stateTrue && !stateFalse) { 455 // Overlap! 456 emitOverlapBug(C, stateTrue, First, Second); 457 return NULL; 458 } 459 460 // assume the two expressions don't overlap. 461 assert(stateFalse); 462 return stateFalse; 463 } 464 465 void CStringChecker::emitOverlapBug(CheckerContext &C, const GRState *state, 466 const Stmt *First, const Stmt *Second) const { 467 ExplodedNode *N = C.generateSink(state); 468 if (!N) 469 return; 470 471 if (!BT_Overlap) 472 BT_Overlap.reset(new BugType("Unix API", "Improper arguments")); 473 474 // Generate a report for this bug. 475 RangedBugReport *report = 476 new RangedBugReport(*BT_Overlap, 477 "Arguments must not be overlapping buffers", N); 478 report->addRange(First->getSourceRange()); 479 report->addRange(Second->getSourceRange()); 480 481 C.EmitReport(report); 482 } 483 484 const GRState *CStringChecker::checkAdditionOverflow(CheckerContext &C, 485 const GRState *state, 486 NonLoc left, 487 NonLoc right) const { 488 // If a previous check has failed, propagate the failure. 489 if (!state) 490 return NULL; 491 492 SValBuilder &svalBuilder = C.getSValBuilder(); 493 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 494 495 QualType sizeTy = svalBuilder.getContext().getSizeType(); 496 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 497 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt); 498 499 SVal maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right, 500 sizeTy); 501 502 if (maxMinusRight.isUnknownOrUndef()) { 503 // Try switching the operands. (The order of these two assignments is 504 // important!) 505 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, 506 sizeTy); 507 left = right; 508 } 509 510 if (NonLoc *maxMinusRightNL = dyn_cast<NonLoc>(&maxMinusRight)) { 511 QualType cmpTy = svalBuilder.getConditionType(); 512 // If left > max - right, we have an overflow. 513 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, 514 *maxMinusRightNL, cmpTy); 515 516 const GRState *stateOverflow, *stateOkay; 517 llvm::tie(stateOverflow, stateOkay) = 518 state->assume(cast<DefinedOrUnknownSVal>(willOverflow)); 519 520 if (stateOverflow && !stateOkay) { 521 // We have an overflow. Emit a bug report. 522 ExplodedNode *N = C.generateSink(stateOverflow); 523 if (!N) 524 return NULL; 525 526 if (!BT_AdditionOverflow) 527 BT_AdditionOverflow.reset(new BuiltinBug("API", 528 "Sum of expressions causes overflow")); 529 530 llvm::SmallString<120> buf; 531 llvm::raw_svector_ostream os(buf); 532 // This isn't a great error message, but this should never occur in real 533 // code anyway -- you'd have to create a buffer longer than a size_t can 534 // represent, which is sort of a contradiction. 535 os << "This expression will create a string whose length is too big to " 536 << "be represented as a size_t"; 537 538 // Generate a report for this bug. 539 BugReport *report = new BugReport(*BT_AdditionOverflow, os.str(), N); 540 C.EmitReport(report); 541 542 return NULL; 543 } 544 545 // From now on, assume an overflow didn't occur. 546 assert(stateOkay); 547 state = stateOkay; 548 } 549 550 return state; 551 } 552 553 const GRState *CStringChecker::setCStringLength(const GRState *state, 554 const MemRegion *MR, 555 SVal strLength) { 556 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 557 558 MR = MR->StripCasts(); 559 560 switch (MR->getKind()) { 561 case MemRegion::StringRegionKind: 562 // FIXME: This can happen if we strcpy() into a string region. This is 563 // undefined [C99 6.4.5p6], but we should still warn about it. 564 return state; 565 566 case MemRegion::SymbolicRegionKind: 567 case MemRegion::AllocaRegionKind: 568 case MemRegion::VarRegionKind: 569 case MemRegion::FieldRegionKind: 570 case MemRegion::ObjCIvarRegionKind: 571 // These are the types we can currently track string lengths for. 572 break; 573 574 case MemRegion::ElementRegionKind: 575 // FIXME: Handle element regions by upper-bounding the parent region's 576 // string length. 577 return state; 578 579 default: 580 // Other regions (mostly non-data) can't have a reliable C string length. 581 // For now, just ignore the change. 582 // FIXME: These are rare but not impossible. We should output some kind of 583 // warning for things like strcpy((char[]){'a', 0}, "b"); 584 return state; 585 } 586 587 if (strLength.isUnknown()) 588 return state->remove<CStringLength>(MR); 589 590 return state->set<CStringLength>(MR, strLength); 591 } 592 593 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 594 const GRState *&state, 595 const Expr *Ex, 596 const MemRegion *MR, 597 bool hypothetical) { 598 if (!hypothetical) { 599 // If there's a recorded length, go ahead and return it. 600 const SVal *Recorded = state->get<CStringLength>(MR); 601 if (Recorded) 602 return *Recorded; 603 } 604 605 // Otherwise, get a new symbol and update the state. 606 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 607 SValBuilder &svalBuilder = C.getSValBuilder(); 608 QualType sizeTy = svalBuilder.getContext().getSizeType(); 609 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), 610 MR, Ex, sizeTy, Count); 611 612 if (!hypothetical) 613 state = state->set<CStringLength>(MR, strLength); 614 615 return strLength; 616 } 617 618 SVal CStringChecker::getCStringLength(CheckerContext &C, const GRState *&state, 619 const Expr *Ex, SVal Buf, 620 bool hypothetical) const { 621 const MemRegion *MR = Buf.getAsRegion(); 622 if (!MR) { 623 // If we can't get a region, see if it's something we /know/ isn't a 624 // C string. In the context of locations, the only time we can issue such 625 // a warning is for labels. 626 if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) { 627 if (ExplodedNode *N = C.generateNode(state)) { 628 if (!BT_NotCString) 629 BT_NotCString.reset(new BuiltinBug("API", 630 "Argument is not a null-terminated string.")); 631 632 llvm::SmallString<120> buf; 633 llvm::raw_svector_ostream os(buf); 634 assert(CurrentFunctionDescription); 635 os << "Argument to " << CurrentFunctionDescription 636 << " is the address of the label '" << Label->getLabel()->getName() 637 << "', which is not a null-terminated string"; 638 639 // Generate a report for this bug. 640 EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString, 641 os.str(), N); 642 643 report->addRange(Ex->getSourceRange()); 644 C.EmitReport(report); 645 } 646 647 return UndefinedVal(); 648 } 649 650 // If it's not a region and not a label, give up. 651 return UnknownVal(); 652 } 653 654 // If we have a region, strip casts from it and see if we can figure out 655 // its length. For anything we can't figure out, just return UnknownVal. 656 MR = MR->StripCasts(); 657 658 switch (MR->getKind()) { 659 case MemRegion::StringRegionKind: { 660 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 661 // so we can assume that the byte length is the correct C string length. 662 SValBuilder &svalBuilder = C.getSValBuilder(); 663 QualType sizeTy = svalBuilder.getContext().getSizeType(); 664 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 665 return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); 666 } 667 case MemRegion::SymbolicRegionKind: 668 case MemRegion::AllocaRegionKind: 669 case MemRegion::VarRegionKind: 670 case MemRegion::FieldRegionKind: 671 case MemRegion::ObjCIvarRegionKind: 672 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); 673 case MemRegion::CompoundLiteralRegionKind: 674 // FIXME: Can we track this? Is it necessary? 675 return UnknownVal(); 676 case MemRegion::ElementRegionKind: 677 // FIXME: How can we handle this? It's not good enough to subtract the 678 // offset from the base string length; consider "123\x00567" and &a[5]. 679 return UnknownVal(); 680 default: 681 // Other regions (mostly non-data) can't have a reliable C string length. 682 // In this case, an error is emitted and UndefinedVal is returned. 683 // The caller should always be prepared to handle this case. 684 if (ExplodedNode *N = C.generateNode(state)) { 685 if (!BT_NotCString) 686 BT_NotCString.reset(new BuiltinBug("API", 687 "Argument is not a null-terminated string.")); 688 689 llvm::SmallString<120> buf; 690 llvm::raw_svector_ostream os(buf); 691 692 assert(CurrentFunctionDescription); 693 os << "Argument to " << CurrentFunctionDescription << " is "; 694 695 if (SummarizeRegion(os, C.getASTContext(), MR)) 696 os << ", which is not a null-terminated string"; 697 else 698 os << "not a null-terminated string"; 699 700 // Generate a report for this bug. 701 EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString, 702 os.str(), N); 703 704 report->addRange(Ex->getSourceRange()); 705 C.EmitReport(report); 706 } 707 708 return UndefinedVal(); 709 } 710 } 711 712 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, 713 const GRState *&state, const Expr *expr, SVal val) const { 714 715 // Get the memory region pointed to by the val. 716 const MemRegion *bufRegion = val.getAsRegion(); 717 if (!bufRegion) 718 return NULL; 719 720 // Strip casts off the memory region. 721 bufRegion = bufRegion->StripCasts(); 722 723 // Cast the memory region to a string region. 724 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion); 725 if (!strRegion) 726 return NULL; 727 728 // Return the actual string in the string region. 729 return strRegion->getStringLiteral(); 730 } 731 732 const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C, 733 const GRState *state, 734 const Expr *E, SVal V) { 735 Loc *L = dyn_cast<Loc>(&V); 736 if (!L) 737 return state; 738 739 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 740 // some assumptions about the value that CFRefCount can't. Even so, it should 741 // probably be refactored. 742 if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) { 743 const MemRegion *R = MR->getRegion()->StripCasts(); 744 745 // Are we dealing with an ElementRegion? If so, we should be invalidating 746 // the super-region. 747 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 748 R = ER->getSuperRegion(); 749 // FIXME: What about layers of ElementRegions? 750 } 751 752 // Invalidate this region. 753 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 754 return state->invalidateRegion(R, E, Count, NULL); 755 } 756 757 // If we have a non-region value by chance, just remove the binding. 758 // FIXME: is this necessary or correct? This handles the non-Region 759 // cases. Is it ever valid to store to these? 760 return state->unbindLoc(*L); 761 } 762 763 bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx, 764 const MemRegion *MR) { 765 const TypedRegion *TR = dyn_cast<TypedRegion>(MR); 766 if (!TR) 767 return false; 768 769 switch (TR->getKind()) { 770 case MemRegion::FunctionTextRegionKind: { 771 const FunctionDecl *FD = cast<FunctionTextRegion>(TR)->getDecl(); 772 if (FD) 773 os << "the address of the function '" << FD << "'"; 774 else 775 os << "the address of a function"; 776 return true; 777 } 778 case MemRegion::BlockTextRegionKind: 779 os << "block text"; 780 return true; 781 case MemRegion::BlockDataRegionKind: 782 os << "a block"; 783 return true; 784 case MemRegion::CXXThisRegionKind: 785 case MemRegion::CXXTempObjectRegionKind: 786 os << "a C++ temp object of type " << TR->getValueType().getAsString(); 787 return true; 788 case MemRegion::VarRegionKind: 789 os << "a variable of type" << TR->getValueType().getAsString(); 790 return true; 791 case MemRegion::FieldRegionKind: 792 os << "a field of type " << TR->getValueType().getAsString(); 793 return true; 794 case MemRegion::ObjCIvarRegionKind: 795 os << "an instance variable of type " << TR->getValueType().getAsString(); 796 return true; 797 default: 798 return false; 799 } 800 } 801 802 //===----------------------------------------------------------------------===// 803 // evaluation of individual function calls. 804 //===----------------------------------------------------------------------===// 805 806 void CStringChecker::evalCopyCommon(CheckerContext &C, 807 const CallExpr *CE, 808 const GRState *state, 809 const Expr *Size, const Expr *Dest, 810 const Expr *Source, bool Restricted, 811 bool IsMempcpy) const { 812 CurrentFunctionDescription = "memory copy function"; 813 814 // See if the size argument is zero. 815 SVal sizeVal = state->getSVal(Size); 816 QualType sizeTy = Size->getType(); 817 818 const GRState *stateZeroSize, *stateNonZeroSize; 819 llvm::tie(stateZeroSize, stateNonZeroSize) = 820 assumeZero(C, state, sizeVal, sizeTy); 821 822 // Get the value of the Dest. 823 SVal destVal = state->getSVal(Dest); 824 825 // If the size is zero, there won't be any actual memory access, so 826 // just bind the return value to the destination buffer and return. 827 if (stateZeroSize) { 828 stateZeroSize = stateZeroSize->BindExpr(CE, destVal); 829 C.addTransition(stateZeroSize); 830 } 831 832 // If the size can be nonzero, we have to check the other arguments. 833 if (stateNonZeroSize) { 834 state = stateNonZeroSize; 835 836 // Ensure the destination is not null. If it is NULL there will be a 837 // NULL pointer dereference. 838 state = checkNonNull(C, state, Dest, destVal); 839 if (!state) 840 return; 841 842 // Get the value of the Src. 843 SVal srcVal = state->getSVal(Source); 844 845 // Ensure the source is not null. If it is NULL there will be a 846 // NULL pointer dereference. 847 state = checkNonNull(C, state, Source, srcVal); 848 if (!state) 849 return; 850 851 // Ensure the accesses are valid and that the buffers do not overlap. 852 const char * const writeWarning = 853 "Memory copy function overflows destination buffer"; 854 state = CheckBufferAccess(C, state, Size, Dest, Source, 855 writeWarning, /* sourceWarning = */ NULL); 856 if (Restricted) 857 state = CheckOverlap(C, state, Size, Dest, Source); 858 859 if (!state) 860 return; 861 862 // If this is mempcpy, get the byte after the last byte copied and 863 // bind the expr. 864 if (IsMempcpy) { 865 loc::MemRegionVal *destRegVal = dyn_cast<loc::MemRegionVal>(&destVal); 866 assert(destRegVal && "Destination should be a known MemRegionVal here"); 867 868 // Get the length to copy. 869 NonLoc *lenValNonLoc = dyn_cast<NonLoc>(&sizeVal); 870 871 if (lenValNonLoc) { 872 // Get the byte after the last byte copied. 873 SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add, 874 *destRegVal, 875 *lenValNonLoc, 876 Dest->getType()); 877 878 // The byte after the last byte copied is the return value. 879 state = state->BindExpr(CE, lastElement); 880 } else { 881 // If we don't know how much we copied, we can at least 882 // conjure a return value for later. 883 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 884 SVal result = 885 C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); 886 state = state->BindExpr(CE, result); 887 } 888 889 } else { 890 // All other copies return the destination buffer. 891 // (Well, bcopy() has a void return type, but this won't hurt.) 892 state = state->BindExpr(CE, destVal); 893 } 894 895 // Invalidate the destination. 896 // FIXME: Even if we can't perfectly model the copy, we should see if we 897 // can use LazyCompoundVals to copy the source values into the destination. 898 // This would probably remove any existing bindings past the end of the 899 // copied region, but that's still an improvement over blank invalidation. 900 state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest)); 901 C.addTransition(state); 902 } 903 } 904 905 906 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const { 907 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 908 // The return value is the address of the destination buffer. 909 const Expr *Dest = CE->getArg(0); 910 const GRState *state = C.getState(); 911 912 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true); 913 } 914 915 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const { 916 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n); 917 // The return value is a pointer to the byte following the last written byte. 918 const Expr *Dest = CE->getArg(0); 919 const GRState *state = C.getState(); 920 921 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true); 922 } 923 924 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const { 925 // void *memmove(void *dst, const void *src, size_t n); 926 // The return value is the address of the destination buffer. 927 const Expr *Dest = CE->getArg(0); 928 const GRState *state = C.getState(); 929 930 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1)); 931 } 932 933 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { 934 // void bcopy(const void *src, void *dst, size_t n); 935 evalCopyCommon(C, CE, C.getState(), 936 CE->getArg(2), CE->getArg(1), CE->getArg(0)); 937 } 938 939 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const { 940 // int memcmp(const void *s1, const void *s2, size_t n); 941 CurrentFunctionDescription = "memory comparison function"; 942 943 const Expr *Left = CE->getArg(0); 944 const Expr *Right = CE->getArg(1); 945 const Expr *Size = CE->getArg(2); 946 947 const GRState *state = C.getState(); 948 SValBuilder &svalBuilder = C.getSValBuilder(); 949 950 // See if the size argument is zero. 951 SVal sizeVal = state->getSVal(Size); 952 QualType sizeTy = Size->getType(); 953 954 const GRState *stateZeroSize, *stateNonZeroSize; 955 llvm::tie(stateZeroSize, stateNonZeroSize) = 956 assumeZero(C, state, sizeVal, sizeTy); 957 958 // If the size can be zero, the result will be 0 in that case, and we don't 959 // have to check either of the buffers. 960 if (stateZeroSize) { 961 state = stateZeroSize; 962 state = state->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 963 C.addTransition(state); 964 } 965 966 // If the size can be nonzero, we have to check the other arguments. 967 if (stateNonZeroSize) { 968 state = stateNonZeroSize; 969 // If we know the two buffers are the same, we know the result is 0. 970 // First, get the two buffers' addresses. Another checker will have already 971 // made sure they're not undefined. 972 DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(state->getSVal(Left)); 973 DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(state->getSVal(Right)); 974 975 // See if they are the same. 976 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 977 const GRState *StSameBuf, *StNotSameBuf; 978 llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 979 980 // If the two arguments might be the same buffer, we know the result is 0, 981 // and we only need to check one size. 982 if (StSameBuf) { 983 state = StSameBuf; 984 state = CheckBufferAccess(C, state, Size, Left); 985 if (state) { 986 state = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 987 C.addTransition(state); 988 } 989 } 990 991 // If the two arguments might be different buffers, we have to check the 992 // size of both of them. 993 if (StNotSameBuf) { 994 state = StNotSameBuf; 995 state = CheckBufferAccess(C, state, Size, Left, Right); 996 if (state) { 997 // The return value is the comparison result, which we don't know. 998 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 999 SVal CmpV = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 1000 state = state->BindExpr(CE, CmpV); 1001 C.addTransition(state); 1002 } 1003 } 1004 } 1005 } 1006 1007 void CStringChecker::evalstrLength(CheckerContext &C, 1008 const CallExpr *CE) const { 1009 // size_t strlen(const char *s); 1010 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); 1011 } 1012 1013 void CStringChecker::evalstrnLength(CheckerContext &C, 1014 const CallExpr *CE) const { 1015 // size_t strnlen(const char *s, size_t maxlen); 1016 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); 1017 } 1018 1019 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 1020 bool IsStrnlen) const { 1021 CurrentFunctionDescription = "string length function"; 1022 const GRState *state = C.getState(); 1023 1024 if (IsStrnlen) { 1025 const Expr *maxlenExpr = CE->getArg(1); 1026 SVal maxlenVal = state->getSVal(maxlenExpr); 1027 1028 const GRState *stateZeroSize, *stateNonZeroSize; 1029 llvm::tie(stateZeroSize, stateNonZeroSize) = 1030 assumeZero(C, state, maxlenVal, maxlenExpr->getType()); 1031 1032 // If the size can be zero, the result will be 0 in that case, and we don't 1033 // have to check the string itself. 1034 if (stateZeroSize) { 1035 SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); 1036 stateZeroSize = stateZeroSize->BindExpr(CE, zero); 1037 C.addTransition(stateZeroSize); 1038 } 1039 1040 // If the size is GUARANTEED to be zero, we're done! 1041 if (!stateNonZeroSize) 1042 return; 1043 1044 // Otherwise, record the assumption that the size is nonzero. 1045 state = stateNonZeroSize; 1046 } 1047 1048 // Check that the string argument is non-null. 1049 const Expr *Arg = CE->getArg(0); 1050 SVal ArgVal = state->getSVal(Arg); 1051 1052 state = checkNonNull(C, state, Arg, ArgVal); 1053 1054 if (!state) 1055 return; 1056 1057 SVal strLength = getCStringLength(C, state, Arg, ArgVal); 1058 1059 // If the argument isn't a valid C string, there's no valid state to 1060 // transition to. 1061 if (strLength.isUndef()) 1062 return; 1063 1064 DefinedOrUnknownSVal result = UnknownVal(); 1065 1066 // If the check is for strnlen() then bind the return value to no more than 1067 // the maxlen value. 1068 if (IsStrnlen) { 1069 QualType cmpTy = C.getSValBuilder().getConditionType(); 1070 1071 // It's a little unfortunate to be getting this again, 1072 // but it's not that expensive... 1073 const Expr *maxlenExpr = CE->getArg(1); 1074 SVal maxlenVal = state->getSVal(maxlenExpr); 1075 1076 NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); 1077 NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal); 1078 1079 if (strLengthNL && maxlenValNL) { 1080 const GRState *stateStringTooLong, *stateStringNotTooLong; 1081 1082 // Check if the strLength is greater than the maxlen. 1083 llvm::tie(stateStringTooLong, stateStringNotTooLong) = 1084 state->assume(cast<DefinedOrUnknownSVal> 1085 (C.getSValBuilder().evalBinOpNN(state, BO_GT, 1086 *strLengthNL, 1087 *maxlenValNL, 1088 cmpTy))); 1089 1090 if (stateStringTooLong && !stateStringNotTooLong) { 1091 // If the string is longer than maxlen, return maxlen. 1092 result = *maxlenValNL; 1093 } else if (stateStringNotTooLong && !stateStringTooLong) { 1094 // If the string is shorter than maxlen, return its length. 1095 result = *strLengthNL; 1096 } 1097 } 1098 1099 if (result.isUnknown()) { 1100 // If we don't have enough information for a comparison, there's 1101 // no guarantee the full string length will actually be returned. 1102 // All we know is the return value is the min of the string length 1103 // and the limit. This is better than nothing. 1104 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 1105 result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); 1106 NonLoc *resultNL = cast<NonLoc>(&result); 1107 1108 if (strLengthNL) { 1109 state = state->assume(cast<DefinedOrUnknownSVal> 1110 (C.getSValBuilder().evalBinOpNN(state, BO_LE, 1111 *resultNL, 1112 *strLengthNL, 1113 cmpTy)), true); 1114 } 1115 1116 if (maxlenValNL) { 1117 state = state->assume(cast<DefinedOrUnknownSVal> 1118 (C.getSValBuilder().evalBinOpNN(state, BO_LE, 1119 *resultNL, 1120 *maxlenValNL, 1121 cmpTy)), true); 1122 } 1123 } 1124 1125 } else { 1126 // This is a plain strlen(), not strnlen(). 1127 result = cast<DefinedOrUnknownSVal>(strLength); 1128 1129 // If we don't know the length of the string, conjure a return 1130 // value, so it can be used in constraints, at least. 1131 if (result.isUnknown()) { 1132 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 1133 result = C.getSValBuilder().getConjuredSymbolVal(NULL, CE, Count); 1134 } 1135 } 1136 1137 // Bind the return value. 1138 assert(!result.isUnknown() && "Should have conjured a value by now"); 1139 state = state->BindExpr(CE, result); 1140 C.addTransition(state); 1141 } 1142 1143 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { 1144 // char *strcpy(char *restrict dst, const char *restrict src); 1145 evalStrcpyCommon(C, CE, 1146 /* returnEnd = */ false, 1147 /* isBounded = */ false, 1148 /* isAppending = */ false); 1149 } 1150 1151 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { 1152 // char *strncpy(char *restrict dst, const char *restrict src, size_t n); 1153 evalStrcpyCommon(C, CE, 1154 /* returnEnd = */ false, 1155 /* isBounded = */ true, 1156 /* isAppending = */ false); 1157 } 1158 1159 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { 1160 // char *stpcpy(char *restrict dst, const char *restrict src); 1161 evalStrcpyCommon(C, CE, 1162 /* returnEnd = */ true, 1163 /* isBounded = */ false, 1164 /* isAppending = */ false); 1165 } 1166 1167 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const { 1168 //char *strcat(char *restrict s1, const char *restrict s2); 1169 evalStrcpyCommon(C, CE, 1170 /* returnEnd = */ false, 1171 /* isBounded = */ false, 1172 /* isAppending = */ true); 1173 } 1174 1175 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { 1176 //char *strncat(char *restrict s1, const char *restrict s2, size_t n); 1177 evalStrcpyCommon(C, CE, 1178 /* returnEnd = */ false, 1179 /* isBounded = */ true, 1180 /* isAppending = */ true); 1181 } 1182 1183 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 1184 bool returnEnd, bool isBounded, 1185 bool isAppending) const { 1186 CurrentFunctionDescription = "string copy function"; 1187 const GRState *state = C.getState(); 1188 1189 // Check that the destination is non-null. 1190 const Expr *Dst = CE->getArg(0); 1191 SVal DstVal = state->getSVal(Dst); 1192 1193 state = checkNonNull(C, state, Dst, DstVal); 1194 if (!state) 1195 return; 1196 1197 // Check that the source is non-null. 1198 const Expr *srcExpr = CE->getArg(1); 1199 SVal srcVal = state->getSVal(srcExpr); 1200 state = checkNonNull(C, state, srcExpr, srcVal); 1201 if (!state) 1202 return; 1203 1204 // Get the string length of the source. 1205 SVal strLength = getCStringLength(C, state, srcExpr, srcVal); 1206 1207 // If the source isn't a valid C string, give up. 1208 if (strLength.isUndef()) 1209 return; 1210 1211 SValBuilder &svalBuilder = C.getSValBuilder(); 1212 QualType cmpTy = svalBuilder.getConditionType(); 1213 1214 SVal amountCopied = UnknownVal(); 1215 1216 // If the function is strncpy, strncat, etc... it is bounded. 1217 if (isBounded) { 1218 // Get the max number of characters to copy. 1219 const Expr *lenExpr = CE->getArg(2); 1220 SVal lenVal = state->getSVal(lenExpr); 1221 1222 // Protect against misdeclared strncpy(). 1223 lenVal = svalBuilder.evalCast(lenVal, 1224 svalBuilder.getContext().getSizeType(), 1225 lenExpr->getType()); 1226 1227 NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength); 1228 NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal); 1229 1230 // If we know both values, we might be able to figure out how much 1231 // we're copying. 1232 if (strLengthNL && lenValNL) { 1233 const GRState *stateSourceTooLong, *stateSourceNotTooLong; 1234 1235 // Check if the max number to copy is less than the length of the src. 1236 llvm::tie(stateSourceTooLong, stateSourceNotTooLong) = 1237 state->assume(cast<DefinedOrUnknownSVal> 1238 (svalBuilder.evalBinOpNN(state, BO_GT, *strLengthNL, 1239 *lenValNL, cmpTy))); 1240 1241 if (stateSourceTooLong && !stateSourceNotTooLong) { 1242 // Max number to copy is less than the length of the src, so the actual 1243 // strLength copied is the max number arg. 1244 state = stateSourceTooLong; 1245 amountCopied = lenVal; 1246 1247 } else if (!stateSourceTooLong && stateSourceNotTooLong) { 1248 // The source buffer entirely fits in the bound. 1249 state = stateSourceNotTooLong; 1250 amountCopied = strLength; 1251 } 1252 } 1253 1254 // If we couldn't pin down the copy length, at least bound it. 1255 if (amountCopied.isUnknown()) { 1256 // Try to get a "hypothetical" string length symbol, which we can later 1257 // set as a real value if that turns out to be the case. 1258 amountCopied = getCStringLength(C, state, lenExpr, srcVal, true); 1259 assert(!amountCopied.isUndef()); 1260 1261 if (NonLoc *amountCopiedNL = dyn_cast<NonLoc>(&amountCopied)) { 1262 if (lenValNL) { 1263 // amountCopied <= lenVal 1264 SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE, 1265 *amountCopiedNL, 1266 *lenValNL, 1267 cmpTy); 1268 state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanBound), 1269 true); 1270 if (!state) 1271 return; 1272 } 1273 1274 if (strLengthNL) { 1275 // amountCopied <= strlen(source) 1276 SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE, 1277 *amountCopiedNL, 1278 *strLengthNL, 1279 cmpTy); 1280 state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanSrc), 1281 true); 1282 if (!state) 1283 return; 1284 } 1285 } 1286 } 1287 1288 } else { 1289 // The function isn't bounded. The amount copied should match the length 1290 // of the source buffer. 1291 amountCopied = strLength; 1292 } 1293 1294 assert(state); 1295 1296 // This represents the number of characters copied into the destination 1297 // buffer. (It may not actually be the strlen if the destination buffer 1298 // is not terminated.) 1299 SVal finalStrLength = UnknownVal(); 1300 1301 // If this is an appending function (strcat, strncat...) then set the 1302 // string length to strlen(src) + strlen(dst) since the buffer will 1303 // ultimately contain both. 1304 if (isAppending) { 1305 // Get the string length of the destination. If the destination is memory 1306 // that can't have a string length, we shouldn't be copying into it anyway. 1307 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); 1308 if (dstStrLength.isUndef()) 1309 return; 1310 1311 QualType sizeTy = svalBuilder.getContext().getSizeType(); 1312 1313 NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&amountCopied); 1314 NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength); 1315 1316 // If we know both string lengths, we might know the final string length. 1317 if (srcStrLengthNL && dstStrLengthNL) { 1318 // Make sure the two lengths together don't overflow a size_t. 1319 state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL); 1320 if (!state) 1321 return; 1322 1323 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL, 1324 *dstStrLengthNL, sizeTy); 1325 } 1326 1327 // If we couldn't get a single value for the final string length, 1328 // we can at least bound it by the individual lengths. 1329 if (finalStrLength.isUnknown()) { 1330 // Try to get a "hypothetical" string length symbol, which we can later 1331 // set as a real value if that turns out to be the case. 1332 finalStrLength = getCStringLength(C, state, CE, DstVal, true); 1333 assert(!finalStrLength.isUndef()); 1334 1335 if (NonLoc *finalStrLengthNL = dyn_cast<NonLoc>(&finalStrLength)) { 1336 if (srcStrLengthNL) { 1337 // finalStrLength >= srcStrLength 1338 SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1339 *finalStrLengthNL, 1340 *srcStrLengthNL, 1341 cmpTy); 1342 state = state->assume(cast<DefinedOrUnknownSVal>(sourceInResult), 1343 true); 1344 if (!state) 1345 return; 1346 } 1347 1348 if (dstStrLengthNL) { 1349 // finalStrLength >= dstStrLength 1350 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1351 *finalStrLengthNL, 1352 *dstStrLengthNL, 1353 cmpTy); 1354 state = state->assume(cast<DefinedOrUnknownSVal>(destInResult), 1355 true); 1356 if (!state) 1357 return; 1358 } 1359 } 1360 } 1361 1362 } else { 1363 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and 1364 // the final string length will match the input string length. 1365 finalStrLength = amountCopied; 1366 } 1367 1368 // The final result of the function will either be a pointer past the last 1369 // copied element, or a pointer to the start of the destination buffer. 1370 SVal Result = (returnEnd ? UnknownVal() : DstVal); 1371 1372 assert(state); 1373 1374 // If the destination is a MemRegion, try to check for a buffer overflow and 1375 // record the new string length. 1376 if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) { 1377 // If the final length is known, we can check for an overflow. 1378 if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&finalStrLength)) { 1379 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 1380 *knownStrLength, 1381 Dst->getType()); 1382 1383 const char * const warningMsg = 1384 "String copy function overflows destination buffer"; 1385 state = CheckLocation(C, state, Dst, lastElement, warningMsg); 1386 if (!state) 1387 return; 1388 1389 // If this is a stpcpy-style copy, the last element is the return value. 1390 if (returnEnd) 1391 Result = lastElement; 1392 } 1393 1394 // Invalidate the destination. This must happen before we set the C string 1395 // length because invalidation will clear the length. 1396 // FIXME: Even if we can't perfectly model the copy, we should see if we 1397 // can use LazyCompoundVals to copy the source values into the destination. 1398 // This would probably remove any existing bindings past the end of the 1399 // string, but that's still an improvement over blank invalidation. 1400 state = InvalidateBuffer(C, state, Dst, *dstRegVal); 1401 1402 // Set the C string length of the destination. 1403 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); 1404 } 1405 1406 assert(state); 1407 1408 // If this is a stpcpy-style copy, but we were unable to check for a buffer 1409 // overflow, we still need a result. Conjure a return value. 1410 if (returnEnd && Result.isUnknown()) { 1411 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 1412 Result = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 1413 } 1414 1415 // Set the return value. 1416 state = state->BindExpr(CE, Result); 1417 C.addTransition(state); 1418 } 1419 1420 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { 1421 //int strcmp(const char *s1, const char *s2); 1422 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false); 1423 } 1424 1425 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { 1426 //int strncmp(const char *s1, const char *s2, size_t n); 1427 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false); 1428 } 1429 1430 void CStringChecker::evalStrcasecmp(CheckerContext &C, 1431 const CallExpr *CE) const { 1432 //int strcasecmp(const char *s1, const char *s2); 1433 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true); 1434 } 1435 1436 void CStringChecker::evalStrncasecmp(CheckerContext &C, 1437 const CallExpr *CE) const { 1438 //int strncasecmp(const char *s1, const char *s2, size_t n); 1439 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true); 1440 } 1441 1442 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, 1443 bool isBounded, bool ignoreCase) const { 1444 CurrentFunctionDescription = "string comparison function"; 1445 const GRState *state = C.getState(); 1446 1447 // Check that the first string is non-null 1448 const Expr *s1 = CE->getArg(0); 1449 SVal s1Val = state->getSVal(s1); 1450 state = checkNonNull(C, state, s1, s1Val); 1451 if (!state) 1452 return; 1453 1454 // Check that the second string is non-null. 1455 const Expr *s2 = CE->getArg(1); 1456 SVal s2Val = state->getSVal(s2); 1457 state = checkNonNull(C, state, s2, s2Val); 1458 if (!state) 1459 return; 1460 1461 // Get the string length of the first string or give up. 1462 SVal s1Length = getCStringLength(C, state, s1, s1Val); 1463 if (s1Length.isUndef()) 1464 return; 1465 1466 // Get the string length of the second string or give up. 1467 SVal s2Length = getCStringLength(C, state, s2, s2Val); 1468 if (s2Length.isUndef()) 1469 return; 1470 1471 // If we know the two buffers are the same, we know the result is 0. 1472 // First, get the two buffers' addresses. Another checker will have already 1473 // made sure they're not undefined. 1474 DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(s1Val); 1475 DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(s2Val); 1476 1477 // See if they are the same. 1478 SValBuilder &svalBuilder = C.getSValBuilder(); 1479 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 1480 const GRState *StSameBuf, *StNotSameBuf; 1481 llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 1482 1483 // If the two arguments might be the same buffer, we know the result is 0, 1484 // and we only need to check one size. 1485 if (StSameBuf) { 1486 StSameBuf = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType())); 1487 C.addTransition(StSameBuf); 1488 1489 // If the two arguments are GUARANTEED to be the same, we're done! 1490 if (!StNotSameBuf) 1491 return; 1492 } 1493 1494 assert(StNotSameBuf); 1495 state = StNotSameBuf; 1496 1497 // At this point we can go about comparing the two buffers. 1498 // For now, we only do this if they're both known string literals. 1499 1500 // Attempt to extract string literals from both expressions. 1501 const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val); 1502 const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val); 1503 bool canComputeResult = false; 1504 1505 if (s1StrLiteral && s2StrLiteral) { 1506 llvm::StringRef s1StrRef = s1StrLiteral->getString(); 1507 llvm::StringRef s2StrRef = s2StrLiteral->getString(); 1508 1509 if (isBounded) { 1510 // Get the max number of characters to compare. 1511 const Expr *lenExpr = CE->getArg(2); 1512 SVal lenVal = state->getSVal(lenExpr); 1513 1514 // If the length is known, we can get the right substrings. 1515 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) { 1516 // Create substrings of each to compare the prefix. 1517 s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue()); 1518 s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue()); 1519 canComputeResult = true; 1520 } 1521 } else { 1522 // This is a normal, unbounded strcmp. 1523 canComputeResult = true; 1524 } 1525 1526 if (canComputeResult) { 1527 // Real strcmp stops at null characters. 1528 size_t s1Term = s1StrRef.find('\0'); 1529 if (s1Term != llvm::StringRef::npos) 1530 s1StrRef = s1StrRef.substr(0, s1Term); 1531 1532 size_t s2Term = s2StrRef.find('\0'); 1533 if (s2Term != llvm::StringRef::npos) 1534 s2StrRef = s2StrRef.substr(0, s2Term); 1535 1536 // Use StringRef's comparison methods to compute the actual result. 1537 int result; 1538 1539 if (ignoreCase) { 1540 // Compare string 1 to string 2 the same way strcasecmp() does. 1541 result = s1StrRef.compare_lower(s2StrRef); 1542 } else { 1543 // Compare string 1 to string 2 the same way strcmp() does. 1544 result = s1StrRef.compare(s2StrRef); 1545 } 1546 1547 // Build the SVal of the comparison and bind the return value. 1548 SVal resultVal = svalBuilder.makeIntVal(result, CE->getType()); 1549 state = state->BindExpr(CE, resultVal); 1550 } 1551 } 1552 1553 if (!canComputeResult) { 1554 // Conjure a symbolic value. It's the best we can do. 1555 unsigned Count = C.getNodeBuilder().getCurrentBlockCount(); 1556 SVal resultVal = svalBuilder.getConjuredSymbolVal(NULL, CE, Count); 1557 state = state->BindExpr(CE, resultVal); 1558 } 1559 1560 // Record this as a possible path. 1561 C.addTransition(state); 1562 } 1563 1564 //===----------------------------------------------------------------------===// 1565 // The driver method, and other Checker callbacks. 1566 //===----------------------------------------------------------------------===// 1567 1568 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { 1569 // Get the callee. All the functions we care about are C functions 1570 // with simple identifiers. 1571 const GRState *state = C.getState(); 1572 const Expr *Callee = CE->getCallee(); 1573 const FunctionDecl *FD = state->getSVal(Callee).getAsFunctionDecl(); 1574 1575 if (!FD) 1576 return false; 1577 1578 // Get the name of the callee. If it's a builtin, strip off the prefix. 1579 IdentifierInfo *II = FD->getIdentifier(); 1580 if (!II) // if no identifier, not a simple C function 1581 return false; 1582 llvm::StringRef Name = II->getName(); 1583 if (Name.startswith("__builtin_")) 1584 Name = Name.substr(10); 1585 1586 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 1587 .Cases("memcpy", "__memcpy_chk", &CStringChecker::evalMemcpy) 1588 .Cases("mempcpy", "__mempcpy_chk", &CStringChecker::evalMempcpy) 1589 .Cases("memcmp", "bcmp", &CStringChecker::evalMemcmp) 1590 .Cases("memmove", "__memmove_chk", &CStringChecker::evalMemmove) 1591 .Cases("strcpy", "__strcpy_chk", &CStringChecker::evalStrcpy) 1592 //.Cases("strncpy", "__strncpy_chk", &CStringChecker::evalStrncpy) 1593 .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::evalStpcpy) 1594 .Cases("strcat", "__strcat_chk", &CStringChecker::evalStrcat) 1595 .Cases("strncat", "__strncat_chk", &CStringChecker::evalStrncat) 1596 .Case("strlen", &CStringChecker::evalstrLength) 1597 .Case("strnlen", &CStringChecker::evalstrnLength) 1598 .Case("strcmp", &CStringChecker::evalStrcmp) 1599 .Case("strncmp", &CStringChecker::evalStrncmp) 1600 .Case("strcasecmp", &CStringChecker::evalStrcasecmp) 1601 .Case("strncasecmp", &CStringChecker::evalStrncasecmp) 1602 .Case("bcopy", &CStringChecker::evalBcopy) 1603 .Default(NULL); 1604 1605 // If the callee isn't a string function, let another checker handle it. 1606 if (!evalFunction) 1607 return false; 1608 1609 // Make sure each function sets its own description. 1610 // (But don't bother in a release build.) 1611 assert(!(CurrentFunctionDescription = NULL)); 1612 1613 // Check and evaluate the call. 1614 (this->*evalFunction)(C, CE); 1615 return true; 1616 } 1617 1618 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { 1619 // Record string length for char a[] = "abc"; 1620 const GRState *state = C.getState(); 1621 1622 for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end(); 1623 I != E; ++I) { 1624 const VarDecl *D = dyn_cast<VarDecl>(*I); 1625 if (!D) 1626 continue; 1627 1628 // FIXME: Handle array fields of structs. 1629 if (!D->getType()->isArrayType()) 1630 continue; 1631 1632 const Expr *Init = D->getInit(); 1633 if (!Init) 1634 continue; 1635 if (!isa<StringLiteral>(Init)) 1636 continue; 1637 1638 Loc VarLoc = state->getLValue(D, C.getPredecessor()->getLocationContext()); 1639 const MemRegion *MR = VarLoc.getAsRegion(); 1640 if (!MR) 1641 continue; 1642 1643 SVal StrVal = state->getSVal(Init); 1644 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 1645 DefinedOrUnknownSVal strLength 1646 = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal)); 1647 1648 state = state->set<CStringLength>(MR, strLength); 1649 } 1650 1651 C.addTransition(state); 1652 } 1653 1654 bool CStringChecker::wantsRegionChangeUpdate(const GRState *state) const { 1655 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1656 return !Entries.isEmpty(); 1657 } 1658 1659 const GRState * 1660 CStringChecker::checkRegionChanges(const GRState *state, 1661 const StoreManager::InvalidatedSymbols *, 1662 const MemRegion * const *Begin, 1663 const MemRegion * const *End) const { 1664 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1665 if (Entries.isEmpty()) 1666 return state; 1667 1668 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 1669 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 1670 1671 // First build sets for the changed regions and their super-regions. 1672 for ( ; Begin != End; ++Begin) { 1673 const MemRegion *MR = *Begin; 1674 Invalidated.insert(MR); 1675 1676 SuperRegions.insert(MR); 1677 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 1678 MR = SR->getSuperRegion(); 1679 SuperRegions.insert(MR); 1680 } 1681 } 1682 1683 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 1684 1685 // Then loop over the entries in the current state. 1686 for (CStringLength::EntryMap::iterator I = Entries.begin(), 1687 E = Entries.end(); I != E; ++I) { 1688 const MemRegion *MR = I.getKey(); 1689 1690 // Is this entry for a super-region of a changed region? 1691 if (SuperRegions.count(MR)) { 1692 Entries = F.remove(Entries, MR); 1693 continue; 1694 } 1695 1696 // Is this entry for a sub-region of a changed region? 1697 const MemRegion *Super = MR; 1698 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 1699 Super = SR->getSuperRegion(); 1700 if (Invalidated.count(Super)) { 1701 Entries = F.remove(Entries, MR); 1702 break; 1703 } 1704 } 1705 } 1706 1707 return state->set<CStringLength>(Entries); 1708 } 1709 1710 void CStringChecker::checkLiveSymbols(const GRState *state, 1711 SymbolReaper &SR) const { 1712 // Mark all symbols in our string length map as valid. 1713 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1714 1715 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 1716 I != E; ++I) { 1717 SVal Len = I.getData(); 1718 1719 for (SVal::symbol_iterator si = Len.symbol_begin(), se = Len.symbol_end(); 1720 si != se; ++si) 1721 SR.markInUse(*si); 1722 } 1723 } 1724 1725 void CStringChecker::checkDeadSymbols(SymbolReaper &SR, 1726 CheckerContext &C) const { 1727 if (!SR.hasDeadSymbols()) 1728 return; 1729 1730 const GRState *state = C.getState(); 1731 CStringLength::EntryMap Entries = state->get<CStringLength>(); 1732 if (Entries.isEmpty()) 1733 return; 1734 1735 CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>(); 1736 for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end(); 1737 I != E; ++I) { 1738 SVal Len = I.getData(); 1739 if (SymbolRef Sym = Len.getAsSymbol()) { 1740 if (SR.isDead(Sym)) 1741 Entries = F.remove(Entries, I.getKey()); 1742 } 1743 } 1744 1745 state = state->set<CStringLength>(Entries); 1746 C.generateNode(state); 1747 } 1748 1749 void ento::registerCStringChecker(CheckerManager &mgr) { 1750 mgr.registerChecker<CStringChecker>(); 1751 } 1752