1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This defines CStringChecker, which is an assortment of checks on calls 10 // to functions in <string.h>. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "InterCheckerAPI.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/CharInfo.h" 17 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 19 #include "clang/StaticAnalyzer/Core/Checker.h" 20 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 24 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 26 #include "llvm/ADT/STLExtras.h" 27 #include "llvm/ADT/SmallString.h" 28 #include "llvm/ADT/StringExtras.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <functional> 31 #include <optional> 32 33 using namespace clang; 34 using namespace ento; 35 using namespace std::placeholders; 36 37 namespace { 38 struct AnyArgExpr { 39 // FIXME: Remove constructor in C++17 to turn it into an aggregate. 40 AnyArgExpr(const Expr *Expression, unsigned ArgumentIndex) 41 : Expression{Expression}, ArgumentIndex{ArgumentIndex} {} 42 const Expr *Expression; 43 unsigned ArgumentIndex; 44 }; 45 46 struct SourceArgExpr : AnyArgExpr { 47 using AnyArgExpr::AnyArgExpr; // FIXME: Remove using in C++17. 48 }; 49 50 struct DestinationArgExpr : AnyArgExpr { 51 using AnyArgExpr::AnyArgExpr; // FIXME: Same. 52 }; 53 54 struct SizeArgExpr : AnyArgExpr { 55 using AnyArgExpr::AnyArgExpr; // FIXME: Same. 56 }; 57 58 using ErrorMessage = SmallString<128>; 59 enum class AccessKind { write, read }; 60 61 static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription, 62 AccessKind Access) { 63 ErrorMessage Message; 64 llvm::raw_svector_ostream Os(Message); 65 66 // Function classification like: Memory copy function 67 Os << toUppercase(FunctionDescription.front()) 68 << &FunctionDescription.data()[1]; 69 70 if (Access == AccessKind::write) { 71 Os << " overflows the destination buffer"; 72 } else { // read access 73 Os << " accesses out-of-bound array element"; 74 } 75 76 return Message; 77 } 78 79 enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 }; 80 81 enum class CharKind { Regular = 0, Wide }; 82 constexpr CharKind CK_Regular = CharKind::Regular; 83 constexpr CharKind CK_Wide = CharKind::Wide; 84 85 static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) { 86 return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy 87 : Ctx.WideCharTy); 88 } 89 90 class CStringChecker : public Checker< eval::Call, 91 check::PreStmt<DeclStmt>, 92 check::LiveSymbols, 93 check::DeadSymbols, 94 check::RegionChanges 95 > { 96 mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap, 97 BT_NotCString, BT_AdditionOverflow, BT_UninitRead; 98 99 mutable const char *CurrentFunctionDescription; 100 101 public: 102 /// The filter is used to filter out the diagnostics which are not enabled by 103 /// the user. 104 struct CStringChecksFilter { 105 bool CheckCStringNullArg = false; 106 bool CheckCStringOutOfBounds = false; 107 bool CheckCStringBufferOverlap = false; 108 bool CheckCStringNotNullTerm = false; 109 bool CheckCStringUninitializedRead = false; 110 111 CheckerNameRef CheckNameCStringNullArg; 112 CheckerNameRef CheckNameCStringOutOfBounds; 113 CheckerNameRef CheckNameCStringBufferOverlap; 114 CheckerNameRef CheckNameCStringNotNullTerm; 115 CheckerNameRef CheckNameCStringUninitializedRead; 116 }; 117 118 CStringChecksFilter Filter; 119 120 static void *getTag() { static int tag; return &tag; } 121 122 bool evalCall(const CallEvent &Call, CheckerContext &C) const; 123 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; 124 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const; 125 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; 126 127 ProgramStateRef 128 checkRegionChanges(ProgramStateRef state, 129 const InvalidatedSymbols *, 130 ArrayRef<const MemRegion *> ExplicitRegions, 131 ArrayRef<const MemRegion *> Regions, 132 const LocationContext *LCtx, 133 const CallEvent *Call) const; 134 135 using FnCheck = std::function<void(const CStringChecker *, CheckerContext &, 136 const CallExpr *)>; 137 138 CallDescriptionMap<FnCheck> Callbacks = { 139 {{CDF_MaybeBuiltin, {"memcpy"}, 3}, 140 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)}, 141 {{CDF_MaybeBuiltin, {"wmemcpy"}, 3}, 142 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)}, 143 {{CDF_MaybeBuiltin, {"mempcpy"}, 3}, 144 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)}, 145 {{CDF_None, {"wmempcpy"}, 3}, 146 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)}, 147 {{CDF_MaybeBuiltin, {"memcmp"}, 3}, 148 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)}, 149 {{CDF_MaybeBuiltin, {"wmemcmp"}, 3}, 150 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)}, 151 {{CDF_MaybeBuiltin, {"memmove"}, 3}, 152 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)}, 153 {{CDF_MaybeBuiltin, {"wmemmove"}, 3}, 154 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)}, 155 {{CDF_MaybeBuiltin, {"memset"}, 3}, &CStringChecker::evalMemset}, 156 {{CDF_MaybeBuiltin, {"explicit_memset"}, 3}, &CStringChecker::evalMemset}, 157 {{CDF_MaybeBuiltin, {"strcpy"}, 2}, &CStringChecker::evalStrcpy}, 158 {{CDF_MaybeBuiltin, {"strncpy"}, 3}, &CStringChecker::evalStrncpy}, 159 {{CDF_MaybeBuiltin, {"stpcpy"}, 2}, &CStringChecker::evalStpcpy}, 160 {{CDF_MaybeBuiltin, {"strlcpy"}, 3}, &CStringChecker::evalStrlcpy}, 161 {{CDF_MaybeBuiltin, {"strcat"}, 2}, &CStringChecker::evalStrcat}, 162 {{CDF_MaybeBuiltin, {"strncat"}, 3}, &CStringChecker::evalStrncat}, 163 {{CDF_MaybeBuiltin, {"strlcat"}, 3}, &CStringChecker::evalStrlcat}, 164 {{CDF_MaybeBuiltin, {"strlen"}, 1}, &CStringChecker::evalstrLength}, 165 {{CDF_MaybeBuiltin, {"wcslen"}, 1}, &CStringChecker::evalstrLength}, 166 {{CDF_MaybeBuiltin, {"strnlen"}, 2}, &CStringChecker::evalstrnLength}, 167 {{CDF_MaybeBuiltin, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength}, 168 {{CDF_MaybeBuiltin, {"strcmp"}, 2}, &CStringChecker::evalStrcmp}, 169 {{CDF_MaybeBuiltin, {"strncmp"}, 3}, &CStringChecker::evalStrncmp}, 170 {{CDF_MaybeBuiltin, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp}, 171 {{CDF_MaybeBuiltin, {"strncasecmp"}, 3}, 172 &CStringChecker::evalStrncasecmp}, 173 {{CDF_MaybeBuiltin, {"strsep"}, 2}, &CStringChecker::evalStrsep}, 174 {{CDF_MaybeBuiltin, {"bcopy"}, 3}, &CStringChecker::evalBcopy}, 175 {{CDF_MaybeBuiltin, {"bcmp"}, 3}, 176 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)}, 177 {{CDF_MaybeBuiltin, {"bzero"}, 2}, &CStringChecker::evalBzero}, 178 {{CDF_MaybeBuiltin, {"explicit_bzero"}, 2}, &CStringChecker::evalBzero}, 179 {{CDF_MaybeBuiltin, {"sprintf"}, 2}, &CStringChecker::evalSprintf}, 180 {{CDF_MaybeBuiltin, {"snprintf"}, 2}, &CStringChecker::evalSnprintf}, 181 }; 182 183 // These require a bit of special handling. 184 CallDescription StdCopy{{"std", "copy"}, 3}, 185 StdCopyBackward{{"std", "copy_backward"}, 3}; 186 187 FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const; 188 void evalMemcpy(CheckerContext &C, const CallExpr *CE, CharKind CK) const; 189 void evalMempcpy(CheckerContext &C, const CallExpr *CE, CharKind CK) const; 190 void evalMemmove(CheckerContext &C, const CallExpr *CE, CharKind CK) const; 191 void evalBcopy(CheckerContext &C, const CallExpr *CE) const; 192 void evalCopyCommon(CheckerContext &C, const CallExpr *CE, 193 ProgramStateRef state, SizeArgExpr Size, 194 DestinationArgExpr Dest, SourceArgExpr Source, 195 bool Restricted, bool IsMempcpy, CharKind CK) const; 196 197 void evalMemcmp(CheckerContext &C, const CallExpr *CE, CharKind CK) const; 198 199 void evalstrLength(CheckerContext &C, const CallExpr *CE) const; 200 void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; 201 void evalstrLengthCommon(CheckerContext &C, 202 const CallExpr *CE, 203 bool IsStrnlen = false) const; 204 205 void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; 206 void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; 207 void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; 208 void evalStrlcpy(CheckerContext &C, const CallExpr *CE) const; 209 void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool ReturnEnd, 210 bool IsBounded, ConcatFnKind appendK, 211 bool returnPtr = true) const; 212 213 void evalStrcat(CheckerContext &C, const CallExpr *CE) const; 214 void evalStrncat(CheckerContext &C, const CallExpr *CE) const; 215 void evalStrlcat(CheckerContext &C, const CallExpr *CE) const; 216 217 void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; 218 void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; 219 void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; 220 void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; 221 void evalStrcmpCommon(CheckerContext &C, 222 const CallExpr *CE, 223 bool IsBounded = false, 224 bool IgnoreCase = false) const; 225 226 void evalStrsep(CheckerContext &C, const CallExpr *CE) const; 227 228 void evalStdCopy(CheckerContext &C, const CallExpr *CE) const; 229 void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const; 230 void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const; 231 void evalMemset(CheckerContext &C, const CallExpr *CE) const; 232 void evalBzero(CheckerContext &C, const CallExpr *CE) const; 233 234 void evalSprintf(CheckerContext &C, const CallExpr *CE) const; 235 void evalSnprintf(CheckerContext &C, const CallExpr *CE) const; 236 void evalSprintfCommon(CheckerContext &C, const CallExpr *CE, bool IsBounded, 237 bool IsBuiltin) const; 238 239 // Utility methods 240 std::pair<ProgramStateRef , ProgramStateRef > 241 static assumeZero(CheckerContext &C, 242 ProgramStateRef state, SVal V, QualType Ty); 243 244 static ProgramStateRef setCStringLength(ProgramStateRef state, 245 const MemRegion *MR, 246 SVal strLength); 247 static SVal getCStringLengthForRegion(CheckerContext &C, 248 ProgramStateRef &state, 249 const Expr *Ex, 250 const MemRegion *MR, 251 bool hypothetical); 252 SVal getCStringLength(CheckerContext &C, 253 ProgramStateRef &state, 254 const Expr *Ex, 255 SVal Buf, 256 bool hypothetical = false) const; 257 258 const StringLiteral *getCStringLiteral(CheckerContext &C, 259 ProgramStateRef &state, 260 const Expr *expr, 261 SVal val) const; 262 263 static ProgramStateRef InvalidateBuffer(CheckerContext &C, 264 ProgramStateRef state, 265 const Expr *Ex, SVal V, 266 bool IsSourceBuffer, 267 const Expr *Size); 268 269 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 270 const MemRegion *MR); 271 272 static bool memsetAux(const Expr *DstBuffer, SVal CharE, 273 const Expr *Size, CheckerContext &C, 274 ProgramStateRef &State); 275 276 // Re-usable checks 277 ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State, 278 AnyArgExpr Arg, SVal l) const; 279 ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state, 280 AnyArgExpr Buffer, SVal Element, 281 AccessKind Access, 282 CharKind CK = CharKind::Regular) const; 283 ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State, 284 AnyArgExpr Buffer, SizeArgExpr Size, 285 AccessKind Access, 286 CharKind CK = CharKind::Regular) const; 287 ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state, 288 SizeArgExpr Size, AnyArgExpr First, 289 AnyArgExpr Second, 290 CharKind CK = CharKind::Regular) const; 291 void emitOverlapBug(CheckerContext &C, 292 ProgramStateRef state, 293 const Stmt *First, 294 const Stmt *Second) const; 295 296 void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S, 297 StringRef WarningMsg) const; 298 void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State, 299 const Stmt *S, StringRef WarningMsg) const; 300 void emitNotCStringBug(CheckerContext &C, ProgramStateRef State, 301 const Stmt *S, StringRef WarningMsg) const; 302 void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const; 303 void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State, 304 const Expr *E) const; 305 ProgramStateRef checkAdditionOverflow(CheckerContext &C, 306 ProgramStateRef state, 307 NonLoc left, 308 NonLoc right) const; 309 310 // Return true if the destination buffer of the copy function may be in bound. 311 // Expects SVal of Size to be positive and unsigned. 312 // Expects SVal of FirstBuf to be a FieldRegion. 313 static bool IsFirstBufInBound(CheckerContext &C, 314 ProgramStateRef state, 315 const Expr *FirstBuf, 316 const Expr *Size); 317 }; 318 319 } //end anonymous namespace 320 321 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal) 322 323 //===----------------------------------------------------------------------===// 324 // Individual checks and utility methods. 325 //===----------------------------------------------------------------------===// 326 327 std::pair<ProgramStateRef , ProgramStateRef > 328 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V, 329 QualType Ty) { 330 std::optional<DefinedSVal> val = V.getAs<DefinedSVal>(); 331 if (!val) 332 return std::pair<ProgramStateRef , ProgramStateRef >(state, state); 333 334 SValBuilder &svalBuilder = C.getSValBuilder(); 335 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 336 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 337 } 338 339 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C, 340 ProgramStateRef State, 341 AnyArgExpr Arg, SVal l) const { 342 // If a previous check has failed, propagate the failure. 343 if (!State) 344 return nullptr; 345 346 ProgramStateRef stateNull, stateNonNull; 347 std::tie(stateNull, stateNonNull) = 348 assumeZero(C, State, l, Arg.Expression->getType()); 349 350 if (stateNull && !stateNonNull) { 351 if (Filter.CheckCStringNullArg) { 352 SmallString<80> buf; 353 llvm::raw_svector_ostream OS(buf); 354 assert(CurrentFunctionDescription); 355 OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1) 356 << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to " 357 << CurrentFunctionDescription; 358 359 emitNullArgBug(C, stateNull, Arg.Expression, OS.str()); 360 } 361 return nullptr; 362 } 363 364 // From here on, assume that the value is non-null. 365 assert(stateNonNull); 366 return stateNonNull; 367 } 368 369 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 370 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, 371 ProgramStateRef state, 372 AnyArgExpr Buffer, SVal Element, 373 AccessKind Access, 374 CharKind CK) const { 375 376 // If a previous check has failed, propagate the failure. 377 if (!state) 378 return nullptr; 379 380 // Check for out of bound array element access. 381 const MemRegion *R = Element.getAsRegion(); 382 if (!R) 383 return state; 384 385 const auto *ER = dyn_cast<ElementRegion>(R); 386 if (!ER) 387 return state; 388 389 SValBuilder &svalBuilder = C.getSValBuilder(); 390 ASTContext &Ctx = svalBuilder.getContext(); 391 392 // Get the index of the accessed element. 393 NonLoc Idx = ER->getIndex(); 394 395 if (CK == CharKind::Regular) { 396 if (ER->getValueType() != Ctx.CharTy) 397 return state; 398 } else { 399 if (ER->getValueType() != Ctx.WideCharTy) 400 return state; 401 402 QualType SizeTy = Ctx.getSizeType(); 403 NonLoc WideSize = 404 svalBuilder 405 .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(), 406 SizeTy) 407 .castAs<NonLoc>(); 408 SVal Offset = svalBuilder.evalBinOpNN(state, BO_Mul, Idx, WideSize, SizeTy); 409 if (Offset.isUnknown()) 410 return state; 411 Idx = Offset.castAs<NonLoc>(); 412 } 413 414 // Get the size of the array. 415 const auto *superReg = cast<SubRegion>(ER->getSuperRegion()); 416 DefinedOrUnknownSVal Size = 417 getDynamicExtent(state, superReg, C.getSValBuilder()); 418 419 ProgramStateRef StInBound, StOutBound; 420 std::tie(StInBound, StOutBound) = state->assumeInBoundDual(Idx, Size); 421 if (StOutBound && !StInBound) { 422 // These checks are either enabled by the CString out-of-bounds checker 423 // explicitly or implicitly by the Malloc checker. 424 // In the latter case we only do modeling but do not emit warning. 425 if (!Filter.CheckCStringOutOfBounds) 426 return nullptr; 427 428 // Emit a bug report. 429 ErrorMessage Message = 430 createOutOfBoundErrorMsg(CurrentFunctionDescription, Access); 431 emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message); 432 return nullptr; 433 } 434 435 // Ensure that we wouldn't read uninitialized value. 436 if (Access == AccessKind::read) { 437 if (Filter.CheckCStringUninitializedRead && 438 StInBound->getSVal(ER).isUndef()) { 439 emitUninitializedReadBug(C, StInBound, Buffer.Expression); 440 return nullptr; 441 } 442 } 443 444 // Array bound check succeeded. From this point forward the array bound 445 // should always succeed. 446 return StInBound; 447 } 448 449 ProgramStateRef 450 CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State, 451 AnyArgExpr Buffer, SizeArgExpr Size, 452 AccessKind Access, CharKind CK) const { 453 // If a previous check has failed, propagate the failure. 454 if (!State) 455 return nullptr; 456 457 SValBuilder &svalBuilder = C.getSValBuilder(); 458 ASTContext &Ctx = svalBuilder.getContext(); 459 460 QualType SizeTy = Size.Expression->getType(); 461 QualType PtrTy = getCharPtrType(Ctx, CK); 462 463 // Check that the first buffer is non-null. 464 SVal BufVal = C.getSVal(Buffer.Expression); 465 State = checkNonNull(C, State, Buffer, BufVal); 466 if (!State) 467 return nullptr; 468 469 // If out-of-bounds checking is turned off, skip the rest. 470 if (!Filter.CheckCStringOutOfBounds) 471 return State; 472 473 // Get the access length and make sure it is known. 474 // FIXME: This assumes the caller has already checked that the access length 475 // is positive. And that it's unsigned. 476 SVal LengthVal = C.getSVal(Size.Expression); 477 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 478 if (!Length) 479 return State; 480 481 // Compute the offset of the last element to be accessed: size-1. 482 NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>(); 483 SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy); 484 if (Offset.isUnknown()) 485 return nullptr; 486 NonLoc LastOffset = Offset.castAs<NonLoc>(); 487 488 // Check that the first buffer is sufficiently long. 489 SVal BufStart = 490 svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType()); 491 if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) { 492 493 SVal BufEnd = 494 svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy); 495 State = CheckLocation(C, State, Buffer, BufEnd, Access, CK); 496 497 // If the buffer isn't large enough, abort. 498 if (!State) 499 return nullptr; 500 } 501 502 // Large enough or not, return this state! 503 return State; 504 } 505 506 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, 507 ProgramStateRef state, 508 SizeArgExpr Size, AnyArgExpr First, 509 AnyArgExpr Second, 510 CharKind CK) const { 511 if (!Filter.CheckCStringBufferOverlap) 512 return state; 513 514 // Do a simple check for overlap: if the two arguments are from the same 515 // buffer, see if the end of the first is greater than the start of the second 516 // or vice versa. 517 518 // If a previous check has failed, propagate the failure. 519 if (!state) 520 return nullptr; 521 522 ProgramStateRef stateTrue, stateFalse; 523 524 // Assume different address spaces cannot overlap. 525 if (First.Expression->getType()->getPointeeType().getAddressSpace() != 526 Second.Expression->getType()->getPointeeType().getAddressSpace()) 527 return state; 528 529 // Get the buffer values and make sure they're known locations. 530 const LocationContext *LCtx = C.getLocationContext(); 531 SVal firstVal = state->getSVal(First.Expression, LCtx); 532 SVal secondVal = state->getSVal(Second.Expression, LCtx); 533 534 std::optional<Loc> firstLoc = firstVal.getAs<Loc>(); 535 if (!firstLoc) 536 return state; 537 538 std::optional<Loc> secondLoc = secondVal.getAs<Loc>(); 539 if (!secondLoc) 540 return state; 541 542 // Are the two values the same? 543 SValBuilder &svalBuilder = C.getSValBuilder(); 544 std::tie(stateTrue, stateFalse) = 545 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 546 547 if (stateTrue && !stateFalse) { 548 // If the values are known to be equal, that's automatically an overlap. 549 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression); 550 return nullptr; 551 } 552 553 // assume the two expressions are not equal. 554 assert(stateFalse); 555 state = stateFalse; 556 557 // Which value comes first? 558 QualType cmpTy = svalBuilder.getConditionType(); 559 SVal reverse = 560 svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy); 561 std::optional<DefinedOrUnknownSVal> reverseTest = 562 reverse.getAs<DefinedOrUnknownSVal>(); 563 if (!reverseTest) 564 return state; 565 566 std::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 567 if (stateTrue) { 568 if (stateFalse) { 569 // If we don't know which one comes first, we can't perform this test. 570 return state; 571 } else { 572 // Switch the values so that firstVal is before secondVal. 573 std::swap(firstLoc, secondLoc); 574 575 // Switch the Exprs as well, so that they still correspond. 576 std::swap(First, Second); 577 } 578 } 579 580 // Get the length, and make sure it too is known. 581 SVal LengthVal = state->getSVal(Size.Expression, LCtx); 582 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 583 if (!Length) 584 return state; 585 586 // Convert the first buffer's start address to char*. 587 // Bail out if the cast fails. 588 ASTContext &Ctx = svalBuilder.getContext(); 589 QualType CharPtrTy = getCharPtrType(Ctx, CK); 590 SVal FirstStart = 591 svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType()); 592 std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>(); 593 if (!FirstStartLoc) 594 return state; 595 596 // Compute the end of the first buffer. Bail out if THAT fails. 597 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc, 598 *Length, CharPtrTy); 599 std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>(); 600 if (!FirstEndLoc) 601 return state; 602 603 // Is the end of the first buffer past the start of the second buffer? 604 SVal Overlap = 605 svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy); 606 std::optional<DefinedOrUnknownSVal> OverlapTest = 607 Overlap.getAs<DefinedOrUnknownSVal>(); 608 if (!OverlapTest) 609 return state; 610 611 std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 612 613 if (stateTrue && !stateFalse) { 614 // Overlap! 615 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression); 616 return nullptr; 617 } 618 619 // assume the two expressions don't overlap. 620 assert(stateFalse); 621 return stateFalse; 622 } 623 624 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state, 625 const Stmt *First, const Stmt *Second) const { 626 ExplodedNode *N = C.generateErrorNode(state); 627 if (!N) 628 return; 629 630 if (!BT_Overlap) 631 BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap, 632 categories::UnixAPI, "Improper arguments")); 633 634 // Generate a report for this bug. 635 auto report = std::make_unique<PathSensitiveBugReport>( 636 *BT_Overlap, "Arguments must not be overlapping buffers", N); 637 report->addRange(First->getSourceRange()); 638 report->addRange(Second->getSourceRange()); 639 640 C.emitReport(std::move(report)); 641 } 642 643 void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State, 644 const Stmt *S, StringRef WarningMsg) const { 645 if (ExplodedNode *N = C.generateErrorNode(State)) { 646 if (!BT_Null) 647 BT_Null.reset(new BuiltinBug( 648 Filter.CheckNameCStringNullArg, categories::UnixAPI, 649 "Null pointer argument in call to byte string function")); 650 651 BuiltinBug *BT = static_cast<BuiltinBug *>(BT_Null.get()); 652 auto Report = std::make_unique<PathSensitiveBugReport>(*BT, WarningMsg, N); 653 Report->addRange(S->getSourceRange()); 654 if (const auto *Ex = dyn_cast<Expr>(S)) 655 bugreporter::trackExpressionValue(N, Ex, *Report); 656 C.emitReport(std::move(Report)); 657 } 658 } 659 660 void CStringChecker::emitUninitializedReadBug(CheckerContext &C, 661 ProgramStateRef State, 662 const Expr *E) const { 663 if (ExplodedNode *N = C.generateErrorNode(State)) { 664 const char *Msg = 665 "Bytes string function accesses uninitialized/garbage values"; 666 if (!BT_UninitRead) 667 BT_UninitRead.reset( 668 new BuiltinBug(Filter.CheckNameCStringUninitializedRead, 669 "Accessing unitialized/garbage values", Msg)); 670 671 BuiltinBug *BT = static_cast<BuiltinBug *>(BT_UninitRead.get()); 672 673 auto Report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); 674 Report->addRange(E->getSourceRange()); 675 bugreporter::trackExpressionValue(N, E, *Report); 676 C.emitReport(std::move(Report)); 677 } 678 } 679 680 void CStringChecker::emitOutOfBoundsBug(CheckerContext &C, 681 ProgramStateRef State, const Stmt *S, 682 StringRef WarningMsg) const { 683 if (ExplodedNode *N = C.generateErrorNode(State)) { 684 if (!BT_Bounds) 685 BT_Bounds.reset(new BuiltinBug( 686 Filter.CheckCStringOutOfBounds ? Filter.CheckNameCStringOutOfBounds 687 : Filter.CheckNameCStringNullArg, 688 "Out-of-bound array access", 689 "Byte string function accesses out-of-bound array element")); 690 691 BuiltinBug *BT = static_cast<BuiltinBug *>(BT_Bounds.get()); 692 693 // FIXME: It would be nice to eventually make this diagnostic more clear, 694 // e.g., by referencing the original declaration or by saying *why* this 695 // reference is outside the range. 696 auto Report = std::make_unique<PathSensitiveBugReport>(*BT, WarningMsg, N); 697 Report->addRange(S->getSourceRange()); 698 C.emitReport(std::move(Report)); 699 } 700 } 701 702 void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State, 703 const Stmt *S, 704 StringRef WarningMsg) const { 705 if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) { 706 if (!BT_NotCString) 707 BT_NotCString.reset(new BuiltinBug( 708 Filter.CheckNameCStringNotNullTerm, categories::UnixAPI, 709 "Argument is not a null-terminated string.")); 710 711 auto Report = 712 std::make_unique<PathSensitiveBugReport>(*BT_NotCString, WarningMsg, N); 713 714 Report->addRange(S->getSourceRange()); 715 C.emitReport(std::move(Report)); 716 } 717 } 718 719 void CStringChecker::emitAdditionOverflowBug(CheckerContext &C, 720 ProgramStateRef State) const { 721 if (ExplodedNode *N = C.generateErrorNode(State)) { 722 if (!BT_AdditionOverflow) 723 BT_AdditionOverflow.reset( 724 new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API", 725 "Sum of expressions causes overflow.")); 726 727 // This isn't a great error message, but this should never occur in real 728 // code anyway -- you'd have to create a buffer longer than a size_t can 729 // represent, which is sort of a contradiction. 730 const char *WarningMsg = 731 "This expression will create a string whose length is too big to " 732 "be represented as a size_t"; 733 734 auto Report = std::make_unique<PathSensitiveBugReport>(*BT_AdditionOverflow, 735 WarningMsg, N); 736 C.emitReport(std::move(Report)); 737 } 738 } 739 740 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C, 741 ProgramStateRef state, 742 NonLoc left, 743 NonLoc right) const { 744 // If out-of-bounds checking is turned off, skip the rest. 745 if (!Filter.CheckCStringOutOfBounds) 746 return state; 747 748 // If a previous check has failed, propagate the failure. 749 if (!state) 750 return nullptr; 751 752 SValBuilder &svalBuilder = C.getSValBuilder(); 753 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 754 755 QualType sizeTy = svalBuilder.getContext().getSizeType(); 756 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 757 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt); 758 759 SVal maxMinusRight; 760 if (isa<nonloc::ConcreteInt>(right)) { 761 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right, 762 sizeTy); 763 } else { 764 // Try switching the operands. (The order of these two assignments is 765 // important!) 766 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, 767 sizeTy); 768 left = right; 769 } 770 771 if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) { 772 QualType cmpTy = svalBuilder.getConditionType(); 773 // If left > max - right, we have an overflow. 774 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, 775 *maxMinusRightNL, cmpTy); 776 777 ProgramStateRef stateOverflow, stateOkay; 778 std::tie(stateOverflow, stateOkay) = 779 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>()); 780 781 if (stateOverflow && !stateOkay) { 782 // We have an overflow. Emit a bug report. 783 emitAdditionOverflowBug(C, stateOverflow); 784 return nullptr; 785 } 786 787 // From now on, assume an overflow didn't occur. 788 assert(stateOkay); 789 state = stateOkay; 790 } 791 792 return state; 793 } 794 795 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state, 796 const MemRegion *MR, 797 SVal strLength) { 798 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 799 800 MR = MR->StripCasts(); 801 802 switch (MR->getKind()) { 803 case MemRegion::StringRegionKind: 804 // FIXME: This can happen if we strcpy() into a string region. This is 805 // undefined [C99 6.4.5p6], but we should still warn about it. 806 return state; 807 808 case MemRegion::SymbolicRegionKind: 809 case MemRegion::AllocaRegionKind: 810 case MemRegion::NonParamVarRegionKind: 811 case MemRegion::ParamVarRegionKind: 812 case MemRegion::FieldRegionKind: 813 case MemRegion::ObjCIvarRegionKind: 814 // These are the types we can currently track string lengths for. 815 break; 816 817 case MemRegion::ElementRegionKind: 818 // FIXME: Handle element regions by upper-bounding the parent region's 819 // string length. 820 return state; 821 822 default: 823 // Other regions (mostly non-data) can't have a reliable C string length. 824 // For now, just ignore the change. 825 // FIXME: These are rare but not impossible. We should output some kind of 826 // warning for things like strcpy((char[]){'a', 0}, "b"); 827 return state; 828 } 829 830 if (strLength.isUnknown()) 831 return state->remove<CStringLength>(MR); 832 833 return state->set<CStringLength>(MR, strLength); 834 } 835 836 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 837 ProgramStateRef &state, 838 const Expr *Ex, 839 const MemRegion *MR, 840 bool hypothetical) { 841 if (!hypothetical) { 842 // If there's a recorded length, go ahead and return it. 843 const SVal *Recorded = state->get<CStringLength>(MR); 844 if (Recorded) 845 return *Recorded; 846 } 847 848 // Otherwise, get a new symbol and update the state. 849 SValBuilder &svalBuilder = C.getSValBuilder(); 850 QualType sizeTy = svalBuilder.getContext().getSizeType(); 851 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), 852 MR, Ex, sizeTy, 853 C.getLocationContext(), 854 C.blockCount()); 855 856 if (!hypothetical) { 857 if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) { 858 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4 859 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 860 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 861 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4); 862 const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt, 863 fourInt); 864 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt); 865 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, 866 maxLength, sizeTy); 867 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true); 868 } 869 state = state->set<CStringLength>(MR, strLength); 870 } 871 872 return strLength; 873 } 874 875 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, 876 const Expr *Ex, SVal Buf, 877 bool hypothetical) const { 878 const MemRegion *MR = Buf.getAsRegion(); 879 if (!MR) { 880 // If we can't get a region, see if it's something we /know/ isn't a 881 // C string. In the context of locations, the only time we can issue such 882 // a warning is for labels. 883 if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) { 884 if (Filter.CheckCStringNotNullTerm) { 885 SmallString<120> buf; 886 llvm::raw_svector_ostream os(buf); 887 assert(CurrentFunctionDescription); 888 os << "Argument to " << CurrentFunctionDescription 889 << " is the address of the label '" << Label->getLabel()->getName() 890 << "', which is not a null-terminated string"; 891 892 emitNotCStringBug(C, state, Ex, os.str()); 893 } 894 return UndefinedVal(); 895 } 896 897 // If it's not a region and not a label, give up. 898 return UnknownVal(); 899 } 900 901 // If we have a region, strip casts from it and see if we can figure out 902 // its length. For anything we can't figure out, just return UnknownVal. 903 MR = MR->StripCasts(); 904 905 switch (MR->getKind()) { 906 case MemRegion::StringRegionKind: { 907 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 908 // so we can assume that the byte length is the correct C string length. 909 SValBuilder &svalBuilder = C.getSValBuilder(); 910 QualType sizeTy = svalBuilder.getContext().getSizeType(); 911 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 912 return svalBuilder.makeIntVal(strLit->getLength(), sizeTy); 913 } 914 case MemRegion::SymbolicRegionKind: 915 case MemRegion::AllocaRegionKind: 916 case MemRegion::NonParamVarRegionKind: 917 case MemRegion::ParamVarRegionKind: 918 case MemRegion::FieldRegionKind: 919 case MemRegion::ObjCIvarRegionKind: 920 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); 921 case MemRegion::CompoundLiteralRegionKind: 922 // FIXME: Can we track this? Is it necessary? 923 return UnknownVal(); 924 case MemRegion::ElementRegionKind: 925 // FIXME: How can we handle this? It's not good enough to subtract the 926 // offset from the base string length; consider "123\x00567" and &a[5]. 927 return UnknownVal(); 928 default: 929 // Other regions (mostly non-data) can't have a reliable C string length. 930 // In this case, an error is emitted and UndefinedVal is returned. 931 // The caller should always be prepared to handle this case. 932 if (Filter.CheckCStringNotNullTerm) { 933 SmallString<120> buf; 934 llvm::raw_svector_ostream os(buf); 935 936 assert(CurrentFunctionDescription); 937 os << "Argument to " << CurrentFunctionDescription << " is "; 938 939 if (SummarizeRegion(os, C.getASTContext(), MR)) 940 os << ", which is not a null-terminated string"; 941 else 942 os << "not a null-terminated string"; 943 944 emitNotCStringBug(C, state, Ex, os.str()); 945 } 946 return UndefinedVal(); 947 } 948 } 949 950 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, 951 ProgramStateRef &state, const Expr *expr, SVal val) const { 952 953 // Get the memory region pointed to by the val. 954 const MemRegion *bufRegion = val.getAsRegion(); 955 if (!bufRegion) 956 return nullptr; 957 958 // Strip casts off the memory region. 959 bufRegion = bufRegion->StripCasts(); 960 961 // Cast the memory region to a string region. 962 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion); 963 if (!strRegion) 964 return nullptr; 965 966 // Return the actual string in the string region. 967 return strRegion->getStringLiteral(); 968 } 969 970 bool CStringChecker::IsFirstBufInBound(CheckerContext &C, 971 ProgramStateRef state, 972 const Expr *FirstBuf, 973 const Expr *Size) { 974 // If we do not know that the buffer is long enough we return 'true'. 975 // Otherwise the parent region of this field region would also get 976 // invalidated, which would lead to warnings based on an unknown state. 977 978 // Originally copied from CheckBufferAccess and CheckLocation. 979 SValBuilder &svalBuilder = C.getSValBuilder(); 980 ASTContext &Ctx = svalBuilder.getContext(); 981 const LocationContext *LCtx = C.getLocationContext(); 982 983 QualType sizeTy = Size->getType(); 984 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 985 SVal BufVal = state->getSVal(FirstBuf, LCtx); 986 987 SVal LengthVal = state->getSVal(Size, LCtx); 988 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 989 if (!Length) 990 return true; // cf top comment. 991 992 // Compute the offset of the last element to be accessed: size-1. 993 NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); 994 SVal Offset = svalBuilder.evalBinOpNN(state, BO_Sub, *Length, One, sizeTy); 995 if (Offset.isUnknown()) 996 return true; // cf top comment 997 NonLoc LastOffset = Offset.castAs<NonLoc>(); 998 999 // Check that the first buffer is sufficiently long. 1000 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 1001 std::optional<Loc> BufLoc = BufStart.getAs<Loc>(); 1002 if (!BufLoc) 1003 return true; // cf top comment. 1004 1005 SVal BufEnd = 1006 svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, LastOffset, PtrTy); 1007 1008 // Check for out of bound array element access. 1009 const MemRegion *R = BufEnd.getAsRegion(); 1010 if (!R) 1011 return true; // cf top comment. 1012 1013 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 1014 if (!ER) 1015 return true; // cf top comment. 1016 1017 // FIXME: Does this crash when a non-standard definition 1018 // of a library function is encountered? 1019 assert(ER->getValueType() == C.getASTContext().CharTy && 1020 "IsFirstBufInBound should only be called with char* ElementRegions"); 1021 1022 // Get the size of the array. 1023 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 1024 DefinedOrUnknownSVal SizeDV = getDynamicExtent(state, superReg, svalBuilder); 1025 1026 // Get the index of the accessed element. 1027 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); 1028 1029 ProgramStateRef StInBound = state->assumeInBound(Idx, SizeDV, true); 1030 1031 return static_cast<bool>(StInBound); 1032 } 1033 1034 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C, 1035 ProgramStateRef state, 1036 const Expr *E, SVal V, 1037 bool IsSourceBuffer, 1038 const Expr *Size) { 1039 std::optional<Loc> L = V.getAs<Loc>(); 1040 if (!L) 1041 return state; 1042 1043 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 1044 // some assumptions about the value that CFRefCount can't. Even so, it should 1045 // probably be refactored. 1046 if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) { 1047 const MemRegion *R = MR->getRegion()->StripCasts(); 1048 1049 // Are we dealing with an ElementRegion? If so, we should be invalidating 1050 // the super-region. 1051 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 1052 R = ER->getSuperRegion(); 1053 // FIXME: What about layers of ElementRegions? 1054 } 1055 1056 // Invalidate this region. 1057 const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); 1058 1059 bool CausesPointerEscape = false; 1060 RegionAndSymbolInvalidationTraits ITraits; 1061 // Invalidate and escape only indirect regions accessible through the source 1062 // buffer. 1063 if (IsSourceBuffer) { 1064 ITraits.setTrait(R->getBaseRegion(), 1065 RegionAndSymbolInvalidationTraits::TK_PreserveContents); 1066 ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape); 1067 CausesPointerEscape = true; 1068 } else { 1069 const MemRegion::Kind& K = R->getKind(); 1070 if (K == MemRegion::FieldRegionKind) 1071 if (Size && IsFirstBufInBound(C, state, E, Size)) { 1072 // If destination buffer is a field region and access is in bound, 1073 // do not invalidate its super region. 1074 ITraits.setTrait( 1075 R, 1076 RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); 1077 } 1078 } 1079 1080 return state->invalidateRegions(R, E, C.blockCount(), LCtx, 1081 CausesPointerEscape, nullptr, nullptr, 1082 &ITraits); 1083 } 1084 1085 // If we have a non-region value by chance, just remove the binding. 1086 // FIXME: is this necessary or correct? This handles the non-Region 1087 // cases. Is it ever valid to store to these? 1088 return state->killBinding(*L); 1089 } 1090 1091 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 1092 const MemRegion *MR) { 1093 switch (MR->getKind()) { 1094 case MemRegion::FunctionCodeRegionKind: { 1095 if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl()) 1096 os << "the address of the function '" << *FD << '\''; 1097 else 1098 os << "the address of a function"; 1099 return true; 1100 } 1101 case MemRegion::BlockCodeRegionKind: 1102 os << "block text"; 1103 return true; 1104 case MemRegion::BlockDataRegionKind: 1105 os << "a block"; 1106 return true; 1107 case MemRegion::CXXThisRegionKind: 1108 case MemRegion::CXXTempObjectRegionKind: 1109 os << "a C++ temp object of type " 1110 << cast<TypedValueRegion>(MR)->getValueType(); 1111 return true; 1112 case MemRegion::NonParamVarRegionKind: 1113 os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType(); 1114 return true; 1115 case MemRegion::ParamVarRegionKind: 1116 os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType(); 1117 return true; 1118 case MemRegion::FieldRegionKind: 1119 os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType(); 1120 return true; 1121 case MemRegion::ObjCIvarRegionKind: 1122 os << "an instance variable of type " 1123 << cast<TypedValueRegion>(MR)->getValueType(); 1124 return true; 1125 default: 1126 return false; 1127 } 1128 } 1129 1130 bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal, 1131 const Expr *Size, CheckerContext &C, 1132 ProgramStateRef &State) { 1133 SVal MemVal = C.getSVal(DstBuffer); 1134 SVal SizeVal = C.getSVal(Size); 1135 const MemRegion *MR = MemVal.getAsRegion(); 1136 if (!MR) 1137 return false; 1138 1139 // We're about to model memset by producing a "default binding" in the Store. 1140 // Our current implementation - RegionStore - doesn't support default bindings 1141 // that don't cover the whole base region. So we should first get the offset 1142 // and the base region to figure out whether the offset of buffer is 0. 1143 RegionOffset Offset = MR->getAsOffset(); 1144 const MemRegion *BR = Offset.getRegion(); 1145 1146 std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>(); 1147 if (!SizeNL) 1148 return false; 1149 1150 SValBuilder &svalBuilder = C.getSValBuilder(); 1151 ASTContext &Ctx = C.getASTContext(); 1152 1153 // void *memset(void *dest, int ch, size_t count); 1154 // For now we can only handle the case of offset is 0 and concrete char value. 1155 if (Offset.isValid() && !Offset.hasSymbolicOffset() && 1156 Offset.getOffset() == 0) { 1157 // Get the base region's size. 1158 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder); 1159 1160 ProgramStateRef StateWholeReg, StateNotWholeReg; 1161 std::tie(StateWholeReg, StateNotWholeReg) = 1162 State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL)); 1163 1164 // With the semantic of 'memset()', we should convert the CharVal to 1165 // unsigned char. 1166 CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy); 1167 1168 ProgramStateRef StateNullChar, StateNonNullChar; 1169 std::tie(StateNullChar, StateNonNullChar) = 1170 assumeZero(C, State, CharVal, Ctx.UnsignedCharTy); 1171 1172 if (StateWholeReg && !StateNotWholeReg && StateNullChar && 1173 !StateNonNullChar) { 1174 // If the 'memset()' acts on the whole region of destination buffer and 1175 // the value of the second argument of 'memset()' is zero, bind the second 1176 // argument's value to the destination buffer with 'default binding'. 1177 // FIXME: Since there is no perfect way to bind the non-zero character, we 1178 // can only deal with zero value here. In the future, we need to deal with 1179 // the binding of non-zero value in the case of whole region. 1180 State = State->bindDefaultZero(svalBuilder.makeLoc(BR), 1181 C.getLocationContext()); 1182 } else { 1183 // If the destination buffer's extent is not equal to the value of 1184 // third argument, just invalidate buffer. 1185 State = InvalidateBuffer(C, State, DstBuffer, MemVal, 1186 /*IsSourceBuffer*/ false, Size); 1187 } 1188 1189 if (StateNullChar && !StateNonNullChar) { 1190 // If the value of the second argument of 'memset()' is zero, set the 1191 // string length of destination buffer to 0 directly. 1192 State = setCStringLength(State, MR, 1193 svalBuilder.makeZeroVal(Ctx.getSizeType())); 1194 } else if (!StateNullChar && StateNonNullChar) { 1195 SVal NewStrLen = svalBuilder.getMetadataSymbolVal( 1196 CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(), 1197 C.getLocationContext(), C.blockCount()); 1198 1199 // If the value of second argument is not zero, then the string length 1200 // is at least the size argument. 1201 SVal NewStrLenGESize = svalBuilder.evalBinOp( 1202 State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType()); 1203 1204 State = setCStringLength( 1205 State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true), 1206 MR, NewStrLen); 1207 } 1208 } else { 1209 // If the offset is not zero and char value is not concrete, we can do 1210 // nothing but invalidate the buffer. 1211 State = InvalidateBuffer(C, State, DstBuffer, MemVal, 1212 /*IsSourceBuffer*/ false, Size); 1213 } 1214 return true; 1215 } 1216 1217 //===----------------------------------------------------------------------===// 1218 // evaluation of individual function calls. 1219 //===----------------------------------------------------------------------===// 1220 1221 void CStringChecker::evalCopyCommon(CheckerContext &C, const CallExpr *CE, 1222 ProgramStateRef state, SizeArgExpr Size, 1223 DestinationArgExpr Dest, 1224 SourceArgExpr Source, bool Restricted, 1225 bool IsMempcpy, CharKind CK) const { 1226 CurrentFunctionDescription = "memory copy function"; 1227 1228 // See if the size argument is zero. 1229 const LocationContext *LCtx = C.getLocationContext(); 1230 SVal sizeVal = state->getSVal(Size.Expression, LCtx); 1231 QualType sizeTy = Size.Expression->getType(); 1232 1233 ProgramStateRef stateZeroSize, stateNonZeroSize; 1234 std::tie(stateZeroSize, stateNonZeroSize) = 1235 assumeZero(C, state, sizeVal, sizeTy); 1236 1237 // Get the value of the Dest. 1238 SVal destVal = state->getSVal(Dest.Expression, LCtx); 1239 1240 // If the size is zero, there won't be any actual memory access, so 1241 // just bind the return value to the destination buffer and return. 1242 if (stateZeroSize && !stateNonZeroSize) { 1243 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal); 1244 C.addTransition(stateZeroSize); 1245 return; 1246 } 1247 1248 // If the size can be nonzero, we have to check the other arguments. 1249 if (stateNonZeroSize) { 1250 state = stateNonZeroSize; 1251 1252 // Ensure the destination is not null. If it is NULL there will be a 1253 // NULL pointer dereference. 1254 state = checkNonNull(C, state, Dest, destVal); 1255 if (!state) 1256 return; 1257 1258 // Get the value of the Src. 1259 SVal srcVal = state->getSVal(Source.Expression, LCtx); 1260 1261 // Ensure the source is not null. If it is NULL there will be a 1262 // NULL pointer dereference. 1263 state = checkNonNull(C, state, Source, srcVal); 1264 if (!state) 1265 return; 1266 1267 // Ensure the accesses are valid and that the buffers do not overlap. 1268 state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK); 1269 state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK); 1270 1271 if (Restricted) 1272 state = CheckOverlap(C, state, Size, Dest, Source, CK); 1273 1274 if (!state) 1275 return; 1276 1277 // If this is mempcpy, get the byte after the last byte copied and 1278 // bind the expr. 1279 if (IsMempcpy) { 1280 // Get the byte after the last byte copied. 1281 SValBuilder &SvalBuilder = C.getSValBuilder(); 1282 ASTContext &Ctx = SvalBuilder.getContext(); 1283 QualType CharPtrTy = getCharPtrType(Ctx, CK); 1284 SVal DestRegCharVal = 1285 SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType()); 1286 SVal lastElement = C.getSValBuilder().evalBinOp( 1287 state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType()); 1288 // If we don't know how much we copied, we can at least 1289 // conjure a return value for later. 1290 if (lastElement.isUnknown()) 1291 lastElement = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1292 C.blockCount()); 1293 1294 // The byte after the last byte copied is the return value. 1295 state = state->BindExpr(CE, LCtx, lastElement); 1296 } else { 1297 // All other copies return the destination buffer. 1298 // (Well, bcopy() has a void return type, but this won't hurt.) 1299 state = state->BindExpr(CE, LCtx, destVal); 1300 } 1301 1302 // Invalidate the destination (regular invalidation without pointer-escaping 1303 // the address of the top-level region). 1304 // FIXME: Even if we can't perfectly model the copy, we should see if we 1305 // can use LazyCompoundVals to copy the source values into the destination. 1306 // This would probably remove any existing bindings past the end of the 1307 // copied region, but that's still an improvement over blank invalidation. 1308 state = 1309 InvalidateBuffer(C, state, Dest.Expression, C.getSVal(Dest.Expression), 1310 /*IsSourceBuffer*/ false, Size.Expression); 1311 1312 // Invalidate the source (const-invalidation without const-pointer-escaping 1313 // the address of the top-level region). 1314 state = InvalidateBuffer(C, state, Source.Expression, 1315 C.getSVal(Source.Expression), 1316 /*IsSourceBuffer*/ true, nullptr); 1317 1318 C.addTransition(state); 1319 } 1320 } 1321 1322 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE, 1323 CharKind CK) const { 1324 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 1325 // The return value is the address of the destination buffer. 1326 DestinationArgExpr Dest = {CE->getArg(0), 0}; 1327 SourceArgExpr Src = {CE->getArg(1), 1}; 1328 SizeArgExpr Size = {CE->getArg(2), 2}; 1329 1330 ProgramStateRef State = C.getState(); 1331 1332 constexpr bool IsRestricted = true; 1333 constexpr bool IsMempcpy = false; 1334 evalCopyCommon(C, CE, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK); 1335 } 1336 1337 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE, 1338 CharKind CK) const { 1339 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n); 1340 // The return value is a pointer to the byte following the last written byte. 1341 DestinationArgExpr Dest = {CE->getArg(0), 0}; 1342 SourceArgExpr Src = {CE->getArg(1), 1}; 1343 SizeArgExpr Size = {CE->getArg(2), 2}; 1344 1345 constexpr bool IsRestricted = true; 1346 constexpr bool IsMempcpy = true; 1347 evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy, 1348 CK); 1349 } 1350 1351 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE, 1352 CharKind CK) const { 1353 // void *memmove(void *dst, const void *src, size_t n); 1354 // The return value is the address of the destination buffer. 1355 DestinationArgExpr Dest = {CE->getArg(0), 0}; 1356 SourceArgExpr Src = {CE->getArg(1), 1}; 1357 SizeArgExpr Size = {CE->getArg(2), 2}; 1358 1359 constexpr bool IsRestricted = false; 1360 constexpr bool IsMempcpy = false; 1361 evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy, 1362 CK); 1363 } 1364 1365 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { 1366 // void bcopy(const void *src, void *dst, size_t n); 1367 SourceArgExpr Src(CE->getArg(0), 0); 1368 DestinationArgExpr Dest = {CE->getArg(1), 1}; 1369 SizeArgExpr Size = {CE->getArg(2), 2}; 1370 1371 constexpr bool IsRestricted = false; 1372 constexpr bool IsMempcpy = false; 1373 evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy, 1374 CharKind::Regular); 1375 } 1376 1377 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE, 1378 CharKind CK) const { 1379 // int memcmp(const void *s1, const void *s2, size_t n); 1380 CurrentFunctionDescription = "memory comparison function"; 1381 1382 AnyArgExpr Left = {CE->getArg(0), 0}; 1383 AnyArgExpr Right = {CE->getArg(1), 1}; 1384 SizeArgExpr Size = {CE->getArg(2), 2}; 1385 1386 ProgramStateRef State = C.getState(); 1387 SValBuilder &Builder = C.getSValBuilder(); 1388 const LocationContext *LCtx = C.getLocationContext(); 1389 1390 // See if the size argument is zero. 1391 SVal sizeVal = State->getSVal(Size.Expression, LCtx); 1392 QualType sizeTy = Size.Expression->getType(); 1393 1394 ProgramStateRef stateZeroSize, stateNonZeroSize; 1395 std::tie(stateZeroSize, stateNonZeroSize) = 1396 assumeZero(C, State, sizeVal, sizeTy); 1397 1398 // If the size can be zero, the result will be 0 in that case, and we don't 1399 // have to check either of the buffers. 1400 if (stateZeroSize) { 1401 State = stateZeroSize; 1402 State = State->BindExpr(CE, LCtx, Builder.makeZeroVal(CE->getType())); 1403 C.addTransition(State); 1404 } 1405 1406 // If the size can be nonzero, we have to check the other arguments. 1407 if (stateNonZeroSize) { 1408 State = stateNonZeroSize; 1409 // If we know the two buffers are the same, we know the result is 0. 1410 // First, get the two buffers' addresses. Another checker will have already 1411 // made sure they're not undefined. 1412 DefinedOrUnknownSVal LV = 1413 State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>(); 1414 DefinedOrUnknownSVal RV = 1415 State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>(); 1416 1417 // See if they are the same. 1418 ProgramStateRef SameBuffer, NotSameBuffer; 1419 std::tie(SameBuffer, NotSameBuffer) = 1420 State->assume(Builder.evalEQ(State, LV, RV)); 1421 1422 // If the two arguments are the same buffer, we know the result is 0, 1423 // and we only need to check one size. 1424 if (SameBuffer && !NotSameBuffer) { 1425 State = SameBuffer; 1426 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read); 1427 if (State) { 1428 State = 1429 SameBuffer->BindExpr(CE, LCtx, Builder.makeZeroVal(CE->getType())); 1430 C.addTransition(State); 1431 } 1432 return; 1433 } 1434 1435 // If the two arguments might be different buffers, we have to check 1436 // the size of both of them. 1437 assert(NotSameBuffer); 1438 State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK); 1439 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK); 1440 if (State) { 1441 // The return value is the comparison result, which we don't know. 1442 SVal CmpV = Builder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 1443 State = State->BindExpr(CE, LCtx, CmpV); 1444 C.addTransition(State); 1445 } 1446 } 1447 } 1448 1449 void CStringChecker::evalstrLength(CheckerContext &C, 1450 const CallExpr *CE) const { 1451 // size_t strlen(const char *s); 1452 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); 1453 } 1454 1455 void CStringChecker::evalstrnLength(CheckerContext &C, 1456 const CallExpr *CE) const { 1457 // size_t strnlen(const char *s, size_t maxlen); 1458 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); 1459 } 1460 1461 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 1462 bool IsStrnlen) const { 1463 CurrentFunctionDescription = "string length function"; 1464 ProgramStateRef state = C.getState(); 1465 const LocationContext *LCtx = C.getLocationContext(); 1466 1467 if (IsStrnlen) { 1468 const Expr *maxlenExpr = CE->getArg(1); 1469 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 1470 1471 ProgramStateRef stateZeroSize, stateNonZeroSize; 1472 std::tie(stateZeroSize, stateNonZeroSize) = 1473 assumeZero(C, state, maxlenVal, maxlenExpr->getType()); 1474 1475 // If the size can be zero, the result will be 0 in that case, and we don't 1476 // have to check the string itself. 1477 if (stateZeroSize) { 1478 SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); 1479 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero); 1480 C.addTransition(stateZeroSize); 1481 } 1482 1483 // If the size is GUARANTEED to be zero, we're done! 1484 if (!stateNonZeroSize) 1485 return; 1486 1487 // Otherwise, record the assumption that the size is nonzero. 1488 state = stateNonZeroSize; 1489 } 1490 1491 // Check that the string argument is non-null. 1492 AnyArgExpr Arg = {CE->getArg(0), 0}; 1493 SVal ArgVal = state->getSVal(Arg.Expression, LCtx); 1494 state = checkNonNull(C, state, Arg, ArgVal); 1495 1496 if (!state) 1497 return; 1498 1499 SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal); 1500 1501 // If the argument isn't a valid C string, there's no valid state to 1502 // transition to. 1503 if (strLength.isUndef()) 1504 return; 1505 1506 DefinedOrUnknownSVal result = UnknownVal(); 1507 1508 // If the check is for strnlen() then bind the return value to no more than 1509 // the maxlen value. 1510 if (IsStrnlen) { 1511 QualType cmpTy = C.getSValBuilder().getConditionType(); 1512 1513 // It's a little unfortunate to be getting this again, 1514 // but it's not that expensive... 1515 const Expr *maxlenExpr = CE->getArg(1); 1516 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 1517 1518 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); 1519 std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>(); 1520 1521 if (strLengthNL && maxlenValNL) { 1522 ProgramStateRef stateStringTooLong, stateStringNotTooLong; 1523 1524 // Check if the strLength is greater than the maxlen. 1525 std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume( 1526 C.getSValBuilder() 1527 .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy) 1528 .castAs<DefinedOrUnknownSVal>()); 1529 1530 if (stateStringTooLong && !stateStringNotTooLong) { 1531 // If the string is longer than maxlen, return maxlen. 1532 result = *maxlenValNL; 1533 } else if (stateStringNotTooLong && !stateStringTooLong) { 1534 // If the string is shorter than maxlen, return its length. 1535 result = *strLengthNL; 1536 } 1537 } 1538 1539 if (result.isUnknown()) { 1540 // If we don't have enough information for a comparison, there's 1541 // no guarantee the full string length will actually be returned. 1542 // All we know is the return value is the min of the string length 1543 // and the limit. This is better than nothing. 1544 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1545 C.blockCount()); 1546 NonLoc resultNL = result.castAs<NonLoc>(); 1547 1548 if (strLengthNL) { 1549 state = state->assume(C.getSValBuilder().evalBinOpNN( 1550 state, BO_LE, resultNL, *strLengthNL, cmpTy) 1551 .castAs<DefinedOrUnknownSVal>(), true); 1552 } 1553 1554 if (maxlenValNL) { 1555 state = state->assume(C.getSValBuilder().evalBinOpNN( 1556 state, BO_LE, resultNL, *maxlenValNL, cmpTy) 1557 .castAs<DefinedOrUnknownSVal>(), true); 1558 } 1559 } 1560 1561 } else { 1562 // This is a plain strlen(), not strnlen(). 1563 result = strLength.castAs<DefinedOrUnknownSVal>(); 1564 1565 // If we don't know the length of the string, conjure a return 1566 // value, so it can be used in constraints, at least. 1567 if (result.isUnknown()) { 1568 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1569 C.blockCount()); 1570 } 1571 } 1572 1573 // Bind the return value. 1574 assert(!result.isUnknown() && "Should have conjured a value by now"); 1575 state = state->BindExpr(CE, LCtx, result); 1576 C.addTransition(state); 1577 } 1578 1579 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { 1580 // char *strcpy(char *restrict dst, const char *restrict src); 1581 evalStrcpyCommon(C, CE, 1582 /* ReturnEnd = */ false, 1583 /* IsBounded = */ false, 1584 /* appendK = */ ConcatFnKind::none); 1585 } 1586 1587 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { 1588 // char *strncpy(char *restrict dst, const char *restrict src, size_t n); 1589 evalStrcpyCommon(C, CE, 1590 /* ReturnEnd = */ false, 1591 /* IsBounded = */ true, 1592 /* appendK = */ ConcatFnKind::none); 1593 } 1594 1595 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { 1596 // char *stpcpy(char *restrict dst, const char *restrict src); 1597 evalStrcpyCommon(C, CE, 1598 /* ReturnEnd = */ true, 1599 /* IsBounded = */ false, 1600 /* appendK = */ ConcatFnKind::none); 1601 } 1602 1603 void CStringChecker::evalStrlcpy(CheckerContext &C, const CallExpr *CE) const { 1604 // size_t strlcpy(char *dest, const char *src, size_t size); 1605 evalStrcpyCommon(C, CE, 1606 /* ReturnEnd = */ true, 1607 /* IsBounded = */ true, 1608 /* appendK = */ ConcatFnKind::none, 1609 /* returnPtr = */ false); 1610 } 1611 1612 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const { 1613 // char *strcat(char *restrict s1, const char *restrict s2); 1614 evalStrcpyCommon(C, CE, 1615 /* ReturnEnd = */ false, 1616 /* IsBounded = */ false, 1617 /* appendK = */ ConcatFnKind::strcat); 1618 } 1619 1620 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { 1621 // char *strncat(char *restrict s1, const char *restrict s2, size_t n); 1622 evalStrcpyCommon(C, CE, 1623 /* ReturnEnd = */ false, 1624 /* IsBounded = */ true, 1625 /* appendK = */ ConcatFnKind::strcat); 1626 } 1627 1628 void CStringChecker::evalStrlcat(CheckerContext &C, const CallExpr *CE) const { 1629 // size_t strlcat(char *dst, const char *src, size_t size); 1630 // It will append at most size - strlen(dst) - 1 bytes, 1631 // NULL-terminating the result. 1632 evalStrcpyCommon(C, CE, 1633 /* ReturnEnd = */ false, 1634 /* IsBounded = */ true, 1635 /* appendK = */ ConcatFnKind::strlcat, 1636 /* returnPtr = */ false); 1637 } 1638 1639 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 1640 bool ReturnEnd, bool IsBounded, 1641 ConcatFnKind appendK, 1642 bool returnPtr) const { 1643 if (appendK == ConcatFnKind::none) 1644 CurrentFunctionDescription = "string copy function"; 1645 else 1646 CurrentFunctionDescription = "string concatenation function"; 1647 1648 ProgramStateRef state = C.getState(); 1649 const LocationContext *LCtx = C.getLocationContext(); 1650 1651 // Check that the destination is non-null. 1652 DestinationArgExpr Dst = {CE->getArg(0), 0}; 1653 SVal DstVal = state->getSVal(Dst.Expression, LCtx); 1654 state = checkNonNull(C, state, Dst, DstVal); 1655 if (!state) 1656 return; 1657 1658 // Check that the source is non-null. 1659 SourceArgExpr srcExpr = {CE->getArg(1), 1}; 1660 SVal srcVal = state->getSVal(srcExpr.Expression, LCtx); 1661 state = checkNonNull(C, state, srcExpr, srcVal); 1662 if (!state) 1663 return; 1664 1665 // Get the string length of the source. 1666 SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal); 1667 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); 1668 1669 // Get the string length of the destination buffer. 1670 SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal); 1671 std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>(); 1672 1673 // If the source isn't a valid C string, give up. 1674 if (strLength.isUndef()) 1675 return; 1676 1677 SValBuilder &svalBuilder = C.getSValBuilder(); 1678 QualType cmpTy = svalBuilder.getConditionType(); 1679 QualType sizeTy = svalBuilder.getContext().getSizeType(); 1680 1681 // These two values allow checking two kinds of errors: 1682 // - actual overflows caused by a source that doesn't fit in the destination 1683 // - potential overflows caused by a bound that could exceed the destination 1684 SVal amountCopied = UnknownVal(); 1685 SVal maxLastElementIndex = UnknownVal(); 1686 const char *boundWarning = nullptr; 1687 1688 // FIXME: Why do we choose the srcExpr if the access has no size? 1689 // Note that the 3rd argument of the call would be the size parameter. 1690 SizeArgExpr SrcExprAsSizeDummy = {srcExpr.Expression, srcExpr.ArgumentIndex}; 1691 state = CheckOverlap( 1692 C, state, 1693 (IsBounded ? SizeArgExpr{CE->getArg(2), 2} : SrcExprAsSizeDummy), Dst, 1694 srcExpr); 1695 1696 if (!state) 1697 return; 1698 1699 // If the function is strncpy, strncat, etc... it is bounded. 1700 if (IsBounded) { 1701 // Get the max number of characters to copy. 1702 SizeArgExpr lenExpr = {CE->getArg(2), 2}; 1703 SVal lenVal = state->getSVal(lenExpr.Expression, LCtx); 1704 1705 // Protect against misdeclared strncpy(). 1706 lenVal = 1707 svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType()); 1708 1709 std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>(); 1710 1711 // If we know both values, we might be able to figure out how much 1712 // we're copying. 1713 if (strLengthNL && lenValNL) { 1714 switch (appendK) { 1715 case ConcatFnKind::none: 1716 case ConcatFnKind::strcat: { 1717 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong; 1718 // Check if the max number to copy is less than the length of the src. 1719 // If the bound is equal to the source length, strncpy won't null- 1720 // terminate the result! 1721 std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume( 1722 svalBuilder 1723 .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy) 1724 .castAs<DefinedOrUnknownSVal>()); 1725 1726 if (stateSourceTooLong && !stateSourceNotTooLong) { 1727 // Max number to copy is less than the length of the src, so the 1728 // actual strLength copied is the max number arg. 1729 state = stateSourceTooLong; 1730 amountCopied = lenVal; 1731 1732 } else if (!stateSourceTooLong && stateSourceNotTooLong) { 1733 // The source buffer entirely fits in the bound. 1734 state = stateSourceNotTooLong; 1735 amountCopied = strLength; 1736 } 1737 break; 1738 } 1739 case ConcatFnKind::strlcat: 1740 if (!dstStrLengthNL) 1741 return; 1742 1743 // amountCopied = min (size - dstLen - 1 , srcLen) 1744 SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, 1745 *dstStrLengthNL, sizeTy); 1746 if (!isa<NonLoc>(freeSpace)) 1747 return; 1748 freeSpace = 1749 svalBuilder.evalBinOp(state, BO_Sub, freeSpace, 1750 svalBuilder.makeIntVal(1, sizeTy), sizeTy); 1751 std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>(); 1752 1753 // While unlikely, it is possible that the subtraction is 1754 // too complex to compute, let's check whether it succeeded. 1755 if (!freeSpaceNL) 1756 return; 1757 SVal hasEnoughSpace = svalBuilder.evalBinOpNN( 1758 state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy); 1759 1760 ProgramStateRef TrueState, FalseState; 1761 std::tie(TrueState, FalseState) = 1762 state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>()); 1763 1764 // srcStrLength <= size - dstStrLength -1 1765 if (TrueState && !FalseState) { 1766 amountCopied = strLength; 1767 } 1768 1769 // srcStrLength > size - dstStrLength -1 1770 if (!TrueState && FalseState) { 1771 amountCopied = freeSpace; 1772 } 1773 1774 if (TrueState && FalseState) 1775 amountCopied = UnknownVal(); 1776 break; 1777 } 1778 } 1779 // We still want to know if the bound is known to be too large. 1780 if (lenValNL) { 1781 switch (appendK) { 1782 case ConcatFnKind::strcat: 1783 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst) 1784 1785 // Get the string length of the destination. If the destination is 1786 // memory that can't have a string length, we shouldn't be copying 1787 // into it anyway. 1788 if (dstStrLength.isUndef()) 1789 return; 1790 1791 if (dstStrLengthNL) { 1792 maxLastElementIndex = svalBuilder.evalBinOpNN( 1793 state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy); 1794 1795 boundWarning = "Size argument is greater than the free space in the " 1796 "destination buffer"; 1797 } 1798 break; 1799 case ConcatFnKind::none: 1800 case ConcatFnKind::strlcat: 1801 // For strncpy and strlcat, this is just checking 1802 // that lenVal <= sizeof(dst). 1803 // (Yes, strncpy and strncat differ in how they treat termination. 1804 // strncat ALWAYS terminates, but strncpy doesn't.) 1805 1806 // We need a special case for when the copy size is zero, in which 1807 // case strncpy will do no work at all. Our bounds check uses n-1 1808 // as the last element accessed, so n == 0 is problematic. 1809 ProgramStateRef StateZeroSize, StateNonZeroSize; 1810 std::tie(StateZeroSize, StateNonZeroSize) = 1811 assumeZero(C, state, *lenValNL, sizeTy); 1812 1813 // If the size is known to be zero, we're done. 1814 if (StateZeroSize && !StateNonZeroSize) { 1815 if (returnPtr) { 1816 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal); 1817 } else { 1818 if (appendK == ConcatFnKind::none) { 1819 // strlcpy returns strlen(src) 1820 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, strLength); 1821 } else { 1822 // strlcat returns strlen(src) + strlen(dst) 1823 SVal retSize = svalBuilder.evalBinOp( 1824 state, BO_Add, strLength, dstStrLength, sizeTy); 1825 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, retSize); 1826 } 1827 } 1828 C.addTransition(StateZeroSize); 1829 return; 1830 } 1831 1832 // Otherwise, go ahead and figure out the last element we'll touch. 1833 // We don't record the non-zero assumption here because we can't 1834 // be sure. We won't warn on a possible zero. 1835 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); 1836 maxLastElementIndex = 1837 svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy); 1838 boundWarning = "Size argument is greater than the length of the " 1839 "destination buffer"; 1840 break; 1841 } 1842 } 1843 } else { 1844 // The function isn't bounded. The amount copied should match the length 1845 // of the source buffer. 1846 amountCopied = strLength; 1847 } 1848 1849 assert(state); 1850 1851 // This represents the number of characters copied into the destination 1852 // buffer. (It may not actually be the strlen if the destination buffer 1853 // is not terminated.) 1854 SVal finalStrLength = UnknownVal(); 1855 SVal strlRetVal = UnknownVal(); 1856 1857 if (appendK == ConcatFnKind::none && !returnPtr) { 1858 // strlcpy returns the sizeof(src) 1859 strlRetVal = strLength; 1860 } 1861 1862 // If this is an appending function (strcat, strncat...) then set the 1863 // string length to strlen(src) + strlen(dst) since the buffer will 1864 // ultimately contain both. 1865 if (appendK != ConcatFnKind::none) { 1866 // Get the string length of the destination. If the destination is memory 1867 // that can't have a string length, we shouldn't be copying into it anyway. 1868 if (dstStrLength.isUndef()) 1869 return; 1870 1871 if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) { 1872 strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL, 1873 *dstStrLengthNL, sizeTy); 1874 } 1875 1876 std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>(); 1877 1878 // If we know both string lengths, we might know the final string length. 1879 if (amountCopiedNL && dstStrLengthNL) { 1880 // Make sure the two lengths together don't overflow a size_t. 1881 state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL); 1882 if (!state) 1883 return; 1884 1885 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL, 1886 *dstStrLengthNL, sizeTy); 1887 } 1888 1889 // If we couldn't get a single value for the final string length, 1890 // we can at least bound it by the individual lengths. 1891 if (finalStrLength.isUnknown()) { 1892 // Try to get a "hypothetical" string length symbol, which we can later 1893 // set as a real value if that turns out to be the case. 1894 finalStrLength = getCStringLength(C, state, CE, DstVal, true); 1895 assert(!finalStrLength.isUndef()); 1896 1897 if (std::optional<NonLoc> finalStrLengthNL = 1898 finalStrLength.getAs<NonLoc>()) { 1899 if (amountCopiedNL && appendK == ConcatFnKind::none) { 1900 // we overwrite dst string with the src 1901 // finalStrLength >= srcStrLength 1902 SVal sourceInResult = svalBuilder.evalBinOpNN( 1903 state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy); 1904 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(), 1905 true); 1906 if (!state) 1907 return; 1908 } 1909 1910 if (dstStrLengthNL && appendK != ConcatFnKind::none) { 1911 // we extend the dst string with the src 1912 // finalStrLength >= dstStrLength 1913 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1914 *finalStrLengthNL, 1915 *dstStrLengthNL, 1916 cmpTy); 1917 state = 1918 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true); 1919 if (!state) 1920 return; 1921 } 1922 } 1923 } 1924 1925 } else { 1926 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and 1927 // the final string length will match the input string length. 1928 finalStrLength = amountCopied; 1929 } 1930 1931 SVal Result; 1932 1933 if (returnPtr) { 1934 // The final result of the function will either be a pointer past the last 1935 // copied element, or a pointer to the start of the destination buffer. 1936 Result = (ReturnEnd ? UnknownVal() : DstVal); 1937 } else { 1938 if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none) 1939 //strlcpy, strlcat 1940 Result = strlRetVal; 1941 else 1942 Result = finalStrLength; 1943 } 1944 1945 assert(state); 1946 1947 // If the destination is a MemRegion, try to check for a buffer overflow and 1948 // record the new string length. 1949 if (std::optional<loc::MemRegionVal> dstRegVal = 1950 DstVal.getAs<loc::MemRegionVal>()) { 1951 QualType ptrTy = Dst.Expression->getType(); 1952 1953 // If we have an exact value on a bounded copy, use that to check for 1954 // overflows, rather than our estimate about how much is actually copied. 1955 if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) { 1956 SVal maxLastElement = 1957 svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy); 1958 1959 state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write); 1960 if (!state) 1961 return; 1962 } 1963 1964 // Then, if the final length is known... 1965 if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) { 1966 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 1967 *knownStrLength, ptrTy); 1968 1969 // ...and we haven't checked the bound, we'll check the actual copy. 1970 if (!boundWarning) { 1971 state = CheckLocation(C, state, Dst, lastElement, AccessKind::write); 1972 if (!state) 1973 return; 1974 } 1975 1976 // If this is a stpcpy-style copy, the last element is the return value. 1977 if (returnPtr && ReturnEnd) 1978 Result = lastElement; 1979 } 1980 1981 // Invalidate the destination (regular invalidation without pointer-escaping 1982 // the address of the top-level region). This must happen before we set the 1983 // C string length because invalidation will clear the length. 1984 // FIXME: Even if we can't perfectly model the copy, we should see if we 1985 // can use LazyCompoundVals to copy the source values into the destination. 1986 // This would probably remove any existing bindings past the end of the 1987 // string, but that's still an improvement over blank invalidation. 1988 state = InvalidateBuffer(C, state, Dst.Expression, *dstRegVal, 1989 /*IsSourceBuffer*/ false, nullptr); 1990 1991 // Invalidate the source (const-invalidation without const-pointer-escaping 1992 // the address of the top-level region). 1993 state = InvalidateBuffer(C, state, srcExpr.Expression, srcVal, 1994 /*IsSourceBuffer*/ true, nullptr); 1995 1996 // Set the C string length of the destination, if we know it. 1997 if (IsBounded && (appendK == ConcatFnKind::none)) { 1998 // strncpy is annoying in that it doesn't guarantee to null-terminate 1999 // the result string. If the original string didn't fit entirely inside 2000 // the bound (including the null-terminator), we don't know how long the 2001 // result is. 2002 if (amountCopied != strLength) 2003 finalStrLength = UnknownVal(); 2004 } 2005 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); 2006 } 2007 2008 assert(state); 2009 2010 if (returnPtr) { 2011 // If this is a stpcpy-style copy, but we were unable to check for a buffer 2012 // overflow, we still need a result. Conjure a return value. 2013 if (ReturnEnd && Result.isUnknown()) { 2014 Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 2015 } 2016 } 2017 // Set the return value. 2018 state = state->BindExpr(CE, LCtx, Result); 2019 C.addTransition(state); 2020 } 2021 2022 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { 2023 //int strcmp(const char *s1, const char *s2); 2024 evalStrcmpCommon(C, CE, /* IsBounded = */ false, /* IgnoreCase = */ false); 2025 } 2026 2027 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { 2028 //int strncmp(const char *s1, const char *s2, size_t n); 2029 evalStrcmpCommon(C, CE, /* IsBounded = */ true, /* IgnoreCase = */ false); 2030 } 2031 2032 void CStringChecker::evalStrcasecmp(CheckerContext &C, 2033 const CallExpr *CE) const { 2034 //int strcasecmp(const char *s1, const char *s2); 2035 evalStrcmpCommon(C, CE, /* IsBounded = */ false, /* IgnoreCase = */ true); 2036 } 2037 2038 void CStringChecker::evalStrncasecmp(CheckerContext &C, 2039 const CallExpr *CE) const { 2040 //int strncasecmp(const char *s1, const char *s2, size_t n); 2041 evalStrcmpCommon(C, CE, /* IsBounded = */ true, /* IgnoreCase = */ true); 2042 } 2043 2044 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, 2045 bool IsBounded, bool IgnoreCase) const { 2046 CurrentFunctionDescription = "string comparison function"; 2047 ProgramStateRef state = C.getState(); 2048 const LocationContext *LCtx = C.getLocationContext(); 2049 2050 // Check that the first string is non-null 2051 AnyArgExpr Left = {CE->getArg(0), 0}; 2052 SVal LeftVal = state->getSVal(Left.Expression, LCtx); 2053 state = checkNonNull(C, state, Left, LeftVal); 2054 if (!state) 2055 return; 2056 2057 // Check that the second string is non-null. 2058 AnyArgExpr Right = {CE->getArg(1), 1}; 2059 SVal RightVal = state->getSVal(Right.Expression, LCtx); 2060 state = checkNonNull(C, state, Right, RightVal); 2061 if (!state) 2062 return; 2063 2064 // Get the string length of the first string or give up. 2065 SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal); 2066 if (LeftLength.isUndef()) 2067 return; 2068 2069 // Get the string length of the second string or give up. 2070 SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal); 2071 if (RightLength.isUndef()) 2072 return; 2073 2074 // If we know the two buffers are the same, we know the result is 0. 2075 // First, get the two buffers' addresses. Another checker will have already 2076 // made sure they're not undefined. 2077 DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>(); 2078 DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>(); 2079 2080 // See if they are the same. 2081 SValBuilder &svalBuilder = C.getSValBuilder(); 2082 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 2083 ProgramStateRef StSameBuf, StNotSameBuf; 2084 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 2085 2086 // If the two arguments might be the same buffer, we know the result is 0, 2087 // and we only need to check one size. 2088 if (StSameBuf) { 2089 StSameBuf = StSameBuf->BindExpr(CE, LCtx, 2090 svalBuilder.makeZeroVal(CE->getType())); 2091 C.addTransition(StSameBuf); 2092 2093 // If the two arguments are GUARANTEED to be the same, we're done! 2094 if (!StNotSameBuf) 2095 return; 2096 } 2097 2098 assert(StNotSameBuf); 2099 state = StNotSameBuf; 2100 2101 // At this point we can go about comparing the two buffers. 2102 // For now, we only do this if they're both known string literals. 2103 2104 // Attempt to extract string literals from both expressions. 2105 const StringLiteral *LeftStrLiteral = 2106 getCStringLiteral(C, state, Left.Expression, LeftVal); 2107 const StringLiteral *RightStrLiteral = 2108 getCStringLiteral(C, state, Right.Expression, RightVal); 2109 bool canComputeResult = false; 2110 SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, 2111 C.blockCount()); 2112 2113 if (LeftStrLiteral && RightStrLiteral) { 2114 StringRef LeftStrRef = LeftStrLiteral->getString(); 2115 StringRef RightStrRef = RightStrLiteral->getString(); 2116 2117 if (IsBounded) { 2118 // Get the max number of characters to compare. 2119 const Expr *lenExpr = CE->getArg(2); 2120 SVal lenVal = state->getSVal(lenExpr, LCtx); 2121 2122 // If the length is known, we can get the right substrings. 2123 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) { 2124 // Create substrings of each to compare the prefix. 2125 LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue()); 2126 RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue()); 2127 canComputeResult = true; 2128 } 2129 } else { 2130 // This is a normal, unbounded strcmp. 2131 canComputeResult = true; 2132 } 2133 2134 if (canComputeResult) { 2135 // Real strcmp stops at null characters. 2136 size_t s1Term = LeftStrRef.find('\0'); 2137 if (s1Term != StringRef::npos) 2138 LeftStrRef = LeftStrRef.substr(0, s1Term); 2139 2140 size_t s2Term = RightStrRef.find('\0'); 2141 if (s2Term != StringRef::npos) 2142 RightStrRef = RightStrRef.substr(0, s2Term); 2143 2144 // Use StringRef's comparison methods to compute the actual result. 2145 int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef) 2146 : LeftStrRef.compare(RightStrRef); 2147 2148 // The strcmp function returns an integer greater than, equal to, or less 2149 // than zero, [c11, p7.24.4.2]. 2150 if (compareRes == 0) { 2151 resultVal = svalBuilder.makeIntVal(compareRes, CE->getType()); 2152 } 2153 else { 2154 DefinedSVal zeroVal = svalBuilder.makeIntVal(0, CE->getType()); 2155 // Constrain strcmp's result range based on the result of StringRef's 2156 // comparison methods. 2157 BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT; 2158 SVal compareWithZero = 2159 svalBuilder.evalBinOp(state, op, resultVal, zeroVal, 2160 svalBuilder.getConditionType()); 2161 DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>(); 2162 state = state->assume(compareWithZeroVal, true); 2163 } 2164 } 2165 } 2166 2167 state = state->BindExpr(CE, LCtx, resultVal); 2168 2169 // Record this as a possible path. 2170 C.addTransition(state); 2171 } 2172 2173 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const { 2174 // char *strsep(char **stringp, const char *delim); 2175 // Verify whether the search string parameter matches the return type. 2176 SourceArgExpr SearchStrPtr = {CE->getArg(0), 0}; 2177 2178 QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType(); 2179 if (CharPtrTy.isNull() || 2180 CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType()) 2181 return; 2182 2183 CurrentFunctionDescription = "strsep()"; 2184 ProgramStateRef State = C.getState(); 2185 const LocationContext *LCtx = C.getLocationContext(); 2186 2187 // Check that the search string pointer is non-null (though it may point to 2188 // a null string). 2189 SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx); 2190 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal); 2191 if (!State) 2192 return; 2193 2194 // Check that the delimiter string is non-null. 2195 AnyArgExpr DelimStr = {CE->getArg(1), 1}; 2196 SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx); 2197 State = checkNonNull(C, State, DelimStr, DelimStrVal); 2198 if (!State) 2199 return; 2200 2201 SValBuilder &SVB = C.getSValBuilder(); 2202 SVal Result; 2203 if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) { 2204 // Get the current value of the search string pointer, as a char*. 2205 Result = State->getSVal(*SearchStrLoc, CharPtrTy); 2206 2207 // Invalidate the search string, representing the change of one delimiter 2208 // character to NUL. 2209 State = InvalidateBuffer(C, State, SearchStrPtr.Expression, Result, 2210 /*IsSourceBuffer*/ false, nullptr); 2211 2212 // Overwrite the search string pointer. The new value is either an address 2213 // further along in the same string, or NULL if there are no more tokens. 2214 State = State->bindLoc(*SearchStrLoc, 2215 SVB.conjureSymbolVal(getTag(), 2216 CE, 2217 LCtx, 2218 CharPtrTy, 2219 C.blockCount()), 2220 LCtx); 2221 } else { 2222 assert(SearchStrVal.isUnknown()); 2223 // Conjure a symbolic value. It's the best we can do. 2224 Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 2225 } 2226 2227 // Set the return value, and finish. 2228 State = State->BindExpr(CE, LCtx, Result); 2229 C.addTransition(State); 2230 } 2231 2232 // These should probably be moved into a C++ standard library checker. 2233 void CStringChecker::evalStdCopy(CheckerContext &C, const CallExpr *CE) const { 2234 evalStdCopyCommon(C, CE); 2235 } 2236 2237 void CStringChecker::evalStdCopyBackward(CheckerContext &C, 2238 const CallExpr *CE) const { 2239 evalStdCopyCommon(C, CE); 2240 } 2241 2242 void CStringChecker::evalStdCopyCommon(CheckerContext &C, 2243 const CallExpr *CE) const { 2244 if (!CE->getArg(2)->getType()->isPointerType()) 2245 return; 2246 2247 ProgramStateRef State = C.getState(); 2248 2249 const LocationContext *LCtx = C.getLocationContext(); 2250 2251 // template <class _InputIterator, class _OutputIterator> 2252 // _OutputIterator 2253 // copy(_InputIterator __first, _InputIterator __last, 2254 // _OutputIterator __result) 2255 2256 // Invalidate the destination buffer 2257 const Expr *Dst = CE->getArg(2); 2258 SVal DstVal = State->getSVal(Dst, LCtx); 2259 State = InvalidateBuffer(C, State, Dst, DstVal, /*IsSource=*/false, 2260 /*Size=*/nullptr); 2261 2262 SValBuilder &SVB = C.getSValBuilder(); 2263 2264 SVal ResultVal = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 2265 State = State->BindExpr(CE, LCtx, ResultVal); 2266 2267 C.addTransition(State); 2268 } 2269 2270 void CStringChecker::evalMemset(CheckerContext &C, const CallExpr *CE) const { 2271 // void *memset(void *s, int c, size_t n); 2272 CurrentFunctionDescription = "memory set function"; 2273 2274 DestinationArgExpr Buffer = {CE->getArg(0), 0}; 2275 AnyArgExpr CharE = {CE->getArg(1), 1}; 2276 SizeArgExpr Size = {CE->getArg(2), 2}; 2277 2278 ProgramStateRef State = C.getState(); 2279 2280 // See if the size argument is zero. 2281 const LocationContext *LCtx = C.getLocationContext(); 2282 SVal SizeVal = C.getSVal(Size.Expression); 2283 QualType SizeTy = Size.Expression->getType(); 2284 2285 ProgramStateRef ZeroSize, NonZeroSize; 2286 std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy); 2287 2288 // Get the value of the memory area. 2289 SVal BufferPtrVal = C.getSVal(Buffer.Expression); 2290 2291 // If the size is zero, there won't be any actual memory access, so 2292 // just bind the return value to the buffer and return. 2293 if (ZeroSize && !NonZeroSize) { 2294 ZeroSize = ZeroSize->BindExpr(CE, LCtx, BufferPtrVal); 2295 C.addTransition(ZeroSize); 2296 return; 2297 } 2298 2299 // Ensure the memory area is not null. 2300 // If it is NULL there will be a NULL pointer dereference. 2301 State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal); 2302 if (!State) 2303 return; 2304 2305 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write); 2306 if (!State) 2307 return; 2308 2309 // According to the values of the arguments, bind the value of the second 2310 // argument to the destination buffer and set string length, or just 2311 // invalidate the destination buffer. 2312 if (!memsetAux(Buffer.Expression, C.getSVal(CharE.Expression), 2313 Size.Expression, C, State)) 2314 return; 2315 2316 State = State->BindExpr(CE, LCtx, BufferPtrVal); 2317 C.addTransition(State); 2318 } 2319 2320 void CStringChecker::evalBzero(CheckerContext &C, const CallExpr *CE) const { 2321 CurrentFunctionDescription = "memory clearance function"; 2322 2323 DestinationArgExpr Buffer = {CE->getArg(0), 0}; 2324 SizeArgExpr Size = {CE->getArg(1), 1}; 2325 SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy); 2326 2327 ProgramStateRef State = C.getState(); 2328 2329 // See if the size argument is zero. 2330 SVal SizeVal = C.getSVal(Size.Expression); 2331 QualType SizeTy = Size.Expression->getType(); 2332 2333 ProgramStateRef StateZeroSize, StateNonZeroSize; 2334 std::tie(StateZeroSize, StateNonZeroSize) = 2335 assumeZero(C, State, SizeVal, SizeTy); 2336 2337 // If the size is zero, there won't be any actual memory access, 2338 // In this case we just return. 2339 if (StateZeroSize && !StateNonZeroSize) { 2340 C.addTransition(StateZeroSize); 2341 return; 2342 } 2343 2344 // Get the value of the memory area. 2345 SVal MemVal = C.getSVal(Buffer.Expression); 2346 2347 // Ensure the memory area is not null. 2348 // If it is NULL there will be a NULL pointer dereference. 2349 State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal); 2350 if (!State) 2351 return; 2352 2353 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write); 2354 if (!State) 2355 return; 2356 2357 if (!memsetAux(Buffer.Expression, Zero, Size.Expression, C, State)) 2358 return; 2359 2360 C.addTransition(State); 2361 } 2362 2363 void CStringChecker::evalSprintf(CheckerContext &C, const CallExpr *CE) const { 2364 CurrentFunctionDescription = "'sprintf'"; 2365 bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___sprintf_chk; 2366 evalSprintfCommon(C, CE, /* IsBounded */ false, IsBI); 2367 } 2368 2369 void CStringChecker::evalSnprintf(CheckerContext &C, const CallExpr *CE) const { 2370 CurrentFunctionDescription = "'snprintf'"; 2371 bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___snprintf_chk; 2372 evalSprintfCommon(C, CE, /* IsBounded */ true, IsBI); 2373 } 2374 2375 void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallExpr *CE, 2376 bool IsBounded, bool IsBuiltin) const { 2377 ProgramStateRef State = C.getState(); 2378 DestinationArgExpr Dest = {CE->getArg(0), 0}; 2379 2380 const auto NumParams = CE->getCalleeDecl()->getAsFunction()->getNumParams(); 2381 assert(CE->getNumArgs() >= NumParams); 2382 2383 const auto AllArguments = 2384 llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs()); 2385 const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams); 2386 2387 for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) { 2388 // We consider only string buffers 2389 if (const QualType type = ArgExpr->getType(); 2390 !type->isAnyPointerType() || 2391 !type->getPointeeType()->isAnyCharacterType()) 2392 continue; 2393 SourceArgExpr Source = {ArgExpr, unsigned(ArgIdx)}; 2394 2395 // Ensure the buffers do not overlap. 2396 SizeArgExpr SrcExprAsSizeDummy = {Source.Expression, Source.ArgumentIndex}; 2397 State = CheckOverlap( 2398 C, State, 2399 (IsBounded ? SizeArgExpr{CE->getArg(1), 1} : SrcExprAsSizeDummy), Dest, 2400 Source); 2401 if (!State) 2402 return; 2403 } 2404 2405 C.addTransition(State); 2406 } 2407 2408 //===----------------------------------------------------------------------===// 2409 // The driver method, and other Checker callbacks. 2410 //===----------------------------------------------------------------------===// 2411 2412 CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call, 2413 CheckerContext &C) const { 2414 const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr()); 2415 if (!CE) 2416 return nullptr; 2417 2418 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); 2419 if (!FD) 2420 return nullptr; 2421 2422 if (StdCopy.matches(Call)) 2423 return &CStringChecker::evalStdCopy; 2424 if (StdCopyBackward.matches(Call)) 2425 return &CStringChecker::evalStdCopyBackward; 2426 2427 // Pro-actively check that argument types are safe to do arithmetic upon. 2428 // We do not want to crash if someone accidentally passes a structure 2429 // into, say, a C++ overload of any of these functions. We could not check 2430 // that for std::copy because they may have arguments of other types. 2431 for (auto I : CE->arguments()) { 2432 QualType T = I->getType(); 2433 if (!T->isIntegralOrEnumerationType() && !T->isPointerType()) 2434 return nullptr; 2435 } 2436 2437 const FnCheck *Callback = Callbacks.lookup(Call); 2438 if (Callback) 2439 return *Callback; 2440 2441 return nullptr; 2442 } 2443 2444 bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const { 2445 FnCheck Callback = identifyCall(Call, C); 2446 2447 // If the callee isn't a string function, let another checker handle it. 2448 if (!Callback) 2449 return false; 2450 2451 // Check and evaluate the call. 2452 const auto *CE = cast<CallExpr>(Call.getOriginExpr()); 2453 Callback(this, C, CE); 2454 2455 // If the evaluate call resulted in no change, chain to the next eval call 2456 // handler. 2457 // Note, the custom CString evaluation calls assume that basic safety 2458 // properties are held. However, if the user chooses to turn off some of these 2459 // checks, we ignore the issues and leave the call evaluation to a generic 2460 // handler. 2461 return C.isDifferent(); 2462 } 2463 2464 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { 2465 // Record string length for char a[] = "abc"; 2466 ProgramStateRef state = C.getState(); 2467 2468 for (const auto *I : DS->decls()) { 2469 const VarDecl *D = dyn_cast<VarDecl>(I); 2470 if (!D) 2471 continue; 2472 2473 // FIXME: Handle array fields of structs. 2474 if (!D->getType()->isArrayType()) 2475 continue; 2476 2477 const Expr *Init = D->getInit(); 2478 if (!Init) 2479 continue; 2480 if (!isa<StringLiteral>(Init)) 2481 continue; 2482 2483 Loc VarLoc = state->getLValue(D, C.getLocationContext()); 2484 const MemRegion *MR = VarLoc.getAsRegion(); 2485 if (!MR) 2486 continue; 2487 2488 SVal StrVal = C.getSVal(Init); 2489 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 2490 DefinedOrUnknownSVal strLength = 2491 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>(); 2492 2493 state = state->set<CStringLength>(MR, strLength); 2494 } 2495 2496 C.addTransition(state); 2497 } 2498 2499 ProgramStateRef 2500 CStringChecker::checkRegionChanges(ProgramStateRef state, 2501 const InvalidatedSymbols *, 2502 ArrayRef<const MemRegion *> ExplicitRegions, 2503 ArrayRef<const MemRegion *> Regions, 2504 const LocationContext *LCtx, 2505 const CallEvent *Call) const { 2506 CStringLengthTy Entries = state->get<CStringLength>(); 2507 if (Entries.isEmpty()) 2508 return state; 2509 2510 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 2511 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 2512 2513 // First build sets for the changed regions and their super-regions. 2514 for (ArrayRef<const MemRegion *>::iterator 2515 I = Regions.begin(), E = Regions.end(); I != E; ++I) { 2516 const MemRegion *MR = *I; 2517 Invalidated.insert(MR); 2518 2519 SuperRegions.insert(MR); 2520 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 2521 MR = SR->getSuperRegion(); 2522 SuperRegions.insert(MR); 2523 } 2524 } 2525 2526 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 2527 2528 // Then loop over the entries in the current state. 2529 for (CStringLengthTy::iterator I = Entries.begin(), 2530 E = Entries.end(); I != E; ++I) { 2531 const MemRegion *MR = I.getKey(); 2532 2533 // Is this entry for a super-region of a changed region? 2534 if (SuperRegions.count(MR)) { 2535 Entries = F.remove(Entries, MR); 2536 continue; 2537 } 2538 2539 // Is this entry for a sub-region of a changed region? 2540 const MemRegion *Super = MR; 2541 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 2542 Super = SR->getSuperRegion(); 2543 if (Invalidated.count(Super)) { 2544 Entries = F.remove(Entries, MR); 2545 break; 2546 } 2547 } 2548 } 2549 2550 return state->set<CStringLength>(Entries); 2551 } 2552 2553 void CStringChecker::checkLiveSymbols(ProgramStateRef state, 2554 SymbolReaper &SR) const { 2555 // Mark all symbols in our string length map as valid. 2556 CStringLengthTy Entries = state->get<CStringLength>(); 2557 2558 for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); 2559 I != E; ++I) { 2560 SVal Len = I.getData(); 2561 2562 for (SymExpr::symbol_iterator si = Len.symbol_begin(), 2563 se = Len.symbol_end(); si != se; ++si) 2564 SR.markInUse(*si); 2565 } 2566 } 2567 2568 void CStringChecker::checkDeadSymbols(SymbolReaper &SR, 2569 CheckerContext &C) const { 2570 ProgramStateRef state = C.getState(); 2571 CStringLengthTy Entries = state->get<CStringLength>(); 2572 if (Entries.isEmpty()) 2573 return; 2574 2575 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 2576 for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); 2577 I != E; ++I) { 2578 SVal Len = I.getData(); 2579 if (SymbolRef Sym = Len.getAsSymbol()) { 2580 if (SR.isDead(Sym)) 2581 Entries = F.remove(Entries, I.getKey()); 2582 } 2583 } 2584 2585 state = state->set<CStringLength>(Entries); 2586 C.addTransition(state); 2587 } 2588 2589 void ento::registerCStringModeling(CheckerManager &Mgr) { 2590 Mgr.registerChecker<CStringChecker>(); 2591 } 2592 2593 bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) { 2594 return true; 2595 } 2596 2597 #define REGISTER_CHECKER(name) \ 2598 void ento::register##name(CheckerManager &mgr) { \ 2599 CStringChecker *checker = mgr.getChecker<CStringChecker>(); \ 2600 checker->Filter.Check##name = true; \ 2601 checker->Filter.CheckName##name = mgr.getCurrentCheckerName(); \ 2602 } \ 2603 \ 2604 bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; } 2605 2606 REGISTER_CHECKER(CStringNullArg) 2607 REGISTER_CHECKER(CStringOutOfBounds) 2608 REGISTER_CHECKER(CStringBufferOverlap) 2609 REGISTER_CHECKER(CStringNotNullTerm) 2610 REGISTER_CHECKER(CStringUninitializedRead) 2611