1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This defines CStringChecker, which is an assortment of checks on calls 10 // to functions in <string.h>. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "InterCheckerAPI.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/CharInfo.h" 17 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 19 #include "clang/StaticAnalyzer/Core/Checker.h" 20 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 24 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 26 #include "llvm/ADT/STLExtras.h" 27 #include "llvm/ADT/SmallString.h" 28 #include "llvm/ADT/StringExtras.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <functional> 31 #include <optional> 32 33 using namespace clang; 34 using namespace ento; 35 using namespace std::placeholders; 36 37 namespace { 38 struct AnyArgExpr { 39 // FIXME: Remove constructor in C++17 to turn it into an aggregate. 40 AnyArgExpr(const Expr *Expression, unsigned ArgumentIndex) 41 : Expression{Expression}, ArgumentIndex{ArgumentIndex} {} 42 const Expr *Expression; 43 unsigned ArgumentIndex; 44 }; 45 46 struct SourceArgExpr : AnyArgExpr { 47 using AnyArgExpr::AnyArgExpr; // FIXME: Remove using in C++17. 48 }; 49 50 struct DestinationArgExpr : AnyArgExpr { 51 using AnyArgExpr::AnyArgExpr; // FIXME: Same. 52 }; 53 54 struct SizeArgExpr : AnyArgExpr { 55 using AnyArgExpr::AnyArgExpr; // FIXME: Same. 56 }; 57 58 using ErrorMessage = SmallString<128>; 59 enum class AccessKind { write, read }; 60 61 static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription, 62 AccessKind Access) { 63 ErrorMessage Message; 64 llvm::raw_svector_ostream Os(Message); 65 66 // Function classification like: Memory copy function 67 Os << toUppercase(FunctionDescription.front()) 68 << &FunctionDescription.data()[1]; 69 70 if (Access == AccessKind::write) { 71 Os << " overflows the destination buffer"; 72 } else { // read access 73 Os << " accesses out-of-bound array element"; 74 } 75 76 return Message; 77 } 78 79 enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 }; 80 81 enum class CharKind { Regular = 0, Wide }; 82 constexpr CharKind CK_Regular = CharKind::Regular; 83 constexpr CharKind CK_Wide = CharKind::Wide; 84 85 static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) { 86 return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy 87 : Ctx.WideCharTy); 88 } 89 90 class CStringChecker : public Checker< eval::Call, 91 check::PreStmt<DeclStmt>, 92 check::LiveSymbols, 93 check::DeadSymbols, 94 check::RegionChanges 95 > { 96 mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap, 97 BT_NotCString, BT_AdditionOverflow, BT_UninitRead; 98 99 mutable const char *CurrentFunctionDescription; 100 101 public: 102 /// The filter is used to filter out the diagnostics which are not enabled by 103 /// the user. 104 struct CStringChecksFilter { 105 bool CheckCStringNullArg = false; 106 bool CheckCStringOutOfBounds = false; 107 bool CheckCStringBufferOverlap = false; 108 bool CheckCStringNotNullTerm = false; 109 bool CheckCStringUninitializedRead = false; 110 111 CheckerNameRef CheckNameCStringNullArg; 112 CheckerNameRef CheckNameCStringOutOfBounds; 113 CheckerNameRef CheckNameCStringBufferOverlap; 114 CheckerNameRef CheckNameCStringNotNullTerm; 115 CheckerNameRef CheckNameCStringUninitializedRead; 116 }; 117 118 CStringChecksFilter Filter; 119 120 static void *getTag() { static int tag; return &tag; } 121 122 bool evalCall(const CallEvent &Call, CheckerContext &C) const; 123 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; 124 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const; 125 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; 126 127 ProgramStateRef 128 checkRegionChanges(ProgramStateRef state, 129 const InvalidatedSymbols *, 130 ArrayRef<const MemRegion *> ExplicitRegions, 131 ArrayRef<const MemRegion *> Regions, 132 const LocationContext *LCtx, 133 const CallEvent *Call) const; 134 135 using FnCheck = std::function<void(const CStringChecker *, CheckerContext &, 136 const CallExpr *)>; 137 138 CallDescriptionMap<FnCheck> Callbacks = { 139 {{CDF_MaybeBuiltin, {"memcpy"}, 3}, 140 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)}, 141 {{CDF_MaybeBuiltin, {"wmemcpy"}, 3}, 142 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)}, 143 {{CDF_MaybeBuiltin, {"mempcpy"}, 3}, 144 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)}, 145 {{CDF_None, {"wmempcpy"}, 3}, 146 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)}, 147 {{CDF_MaybeBuiltin, {"memcmp"}, 3}, 148 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)}, 149 {{CDF_MaybeBuiltin, {"wmemcmp"}, 3}, 150 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)}, 151 {{CDF_MaybeBuiltin, {"memmove"}, 3}, 152 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)}, 153 {{CDF_MaybeBuiltin, {"wmemmove"}, 3}, 154 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)}, 155 {{CDF_MaybeBuiltin, {"memset"}, 3}, &CStringChecker::evalMemset}, 156 {{CDF_MaybeBuiltin, {"explicit_memset"}, 3}, &CStringChecker::evalMemset}, 157 {{CDF_MaybeBuiltin, {"strcpy"}, 2}, &CStringChecker::evalStrcpy}, 158 {{CDF_MaybeBuiltin, {"strncpy"}, 3}, &CStringChecker::evalStrncpy}, 159 {{CDF_MaybeBuiltin, {"stpcpy"}, 2}, &CStringChecker::evalStpcpy}, 160 {{CDF_MaybeBuiltin, {"strlcpy"}, 3}, &CStringChecker::evalStrlcpy}, 161 {{CDF_MaybeBuiltin, {"strcat"}, 2}, &CStringChecker::evalStrcat}, 162 {{CDF_MaybeBuiltin, {"strncat"}, 3}, &CStringChecker::evalStrncat}, 163 {{CDF_MaybeBuiltin, {"strlcat"}, 3}, &CStringChecker::evalStrlcat}, 164 {{CDF_MaybeBuiltin, {"strlen"}, 1}, &CStringChecker::evalstrLength}, 165 {{CDF_MaybeBuiltin, {"wcslen"}, 1}, &CStringChecker::evalstrLength}, 166 {{CDF_MaybeBuiltin, {"strnlen"}, 2}, &CStringChecker::evalstrnLength}, 167 {{CDF_MaybeBuiltin, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength}, 168 {{CDF_MaybeBuiltin, {"strcmp"}, 2}, &CStringChecker::evalStrcmp}, 169 {{CDF_MaybeBuiltin, {"strncmp"}, 3}, &CStringChecker::evalStrncmp}, 170 {{CDF_MaybeBuiltin, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp}, 171 {{CDF_MaybeBuiltin, {"strncasecmp"}, 3}, 172 &CStringChecker::evalStrncasecmp}, 173 {{CDF_MaybeBuiltin, {"strsep"}, 2}, &CStringChecker::evalStrsep}, 174 {{CDF_MaybeBuiltin, {"bcopy"}, 3}, &CStringChecker::evalBcopy}, 175 {{CDF_MaybeBuiltin, {"bcmp"}, 3}, 176 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)}, 177 {{CDF_MaybeBuiltin, {"bzero"}, 2}, &CStringChecker::evalBzero}, 178 {{CDF_MaybeBuiltin, {"explicit_bzero"}, 2}, &CStringChecker::evalBzero}, 179 {{CDF_MaybeBuiltin, {"sprintf"}, 2}, &CStringChecker::evalSprintf}, 180 {{CDF_MaybeBuiltin, {"snprintf"}, 2}, &CStringChecker::evalSnprintf}, 181 }; 182 183 // These require a bit of special handling. 184 CallDescription StdCopy{{"std", "copy"}, 3}, 185 StdCopyBackward{{"std", "copy_backward"}, 3}; 186 187 FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const; 188 void evalMemcpy(CheckerContext &C, const CallExpr *CE, CharKind CK) const; 189 void evalMempcpy(CheckerContext &C, const CallExpr *CE, CharKind CK) const; 190 void evalMemmove(CheckerContext &C, const CallExpr *CE, CharKind CK) const; 191 void evalBcopy(CheckerContext &C, const CallExpr *CE) const; 192 void evalCopyCommon(CheckerContext &C, const CallExpr *CE, 193 ProgramStateRef state, SizeArgExpr Size, 194 DestinationArgExpr Dest, SourceArgExpr Source, 195 bool Restricted, bool IsMempcpy, CharKind CK) const; 196 197 void evalMemcmp(CheckerContext &C, const CallExpr *CE, CharKind CK) const; 198 199 void evalstrLength(CheckerContext &C, const CallExpr *CE) const; 200 void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; 201 void evalstrLengthCommon(CheckerContext &C, 202 const CallExpr *CE, 203 bool IsStrnlen = false) const; 204 205 void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; 206 void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; 207 void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; 208 void evalStrlcpy(CheckerContext &C, const CallExpr *CE) const; 209 void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool ReturnEnd, 210 bool IsBounded, ConcatFnKind appendK, 211 bool returnPtr = true) const; 212 213 void evalStrcat(CheckerContext &C, const CallExpr *CE) const; 214 void evalStrncat(CheckerContext &C, const CallExpr *CE) const; 215 void evalStrlcat(CheckerContext &C, const CallExpr *CE) const; 216 217 void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; 218 void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; 219 void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; 220 void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; 221 void evalStrcmpCommon(CheckerContext &C, 222 const CallExpr *CE, 223 bool IsBounded = false, 224 bool IgnoreCase = false) const; 225 226 void evalStrsep(CheckerContext &C, const CallExpr *CE) const; 227 228 void evalStdCopy(CheckerContext &C, const CallExpr *CE) const; 229 void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const; 230 void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const; 231 void evalMemset(CheckerContext &C, const CallExpr *CE) const; 232 void evalBzero(CheckerContext &C, const CallExpr *CE) const; 233 234 void evalSprintf(CheckerContext &C, const CallExpr *CE) const; 235 void evalSnprintf(CheckerContext &C, const CallExpr *CE) const; 236 void evalSprintfCommon(CheckerContext &C, const CallExpr *CE, bool IsBounded, 237 bool IsBuiltin) const; 238 239 // Utility methods 240 std::pair<ProgramStateRef , ProgramStateRef > 241 static assumeZero(CheckerContext &C, 242 ProgramStateRef state, SVal V, QualType Ty); 243 244 static ProgramStateRef setCStringLength(ProgramStateRef state, 245 const MemRegion *MR, 246 SVal strLength); 247 static SVal getCStringLengthForRegion(CheckerContext &C, 248 ProgramStateRef &state, 249 const Expr *Ex, 250 const MemRegion *MR, 251 bool hypothetical); 252 SVal getCStringLength(CheckerContext &C, 253 ProgramStateRef &state, 254 const Expr *Ex, 255 SVal Buf, 256 bool hypothetical = false) const; 257 258 const StringLiteral *getCStringLiteral(CheckerContext &C, 259 ProgramStateRef &state, 260 const Expr *expr, 261 SVal val) const; 262 263 /// Invalidate the destination buffer determined by characters copied. 264 static ProgramStateRef 265 invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S, 266 const Expr *BufE, SVal BufV, SVal SizeV, 267 QualType SizeTy); 268 269 /// Operation never overflows, do not invalidate the super region. 270 static ProgramStateRef invalidateDestinationBufferNeverOverflows( 271 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV); 272 273 /// We do not know whether the operation can overflow (e.g. size is unknown), 274 /// invalidate the super region and escape related pointers. 275 static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion( 276 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV); 277 278 /// Invalidate the source buffer for escaping pointers. 279 static ProgramStateRef invalidateSourceBuffer(CheckerContext &C, 280 ProgramStateRef S, 281 const Expr *BufE, SVal BufV); 282 283 /// @param InvalidationTraitOperations Determine how to invlidate the 284 /// MemRegion by setting the invalidation traits. Return true to cause pointer 285 /// escape, or false otherwise. 286 static ProgramStateRef invalidateBufferAux( 287 CheckerContext &C, ProgramStateRef State, const Expr *Ex, SVal V, 288 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &, 289 const MemRegion *)> 290 InvalidationTraitOperations); 291 292 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 293 const MemRegion *MR); 294 295 static bool memsetAux(const Expr *DstBuffer, SVal CharE, 296 const Expr *Size, CheckerContext &C, 297 ProgramStateRef &State); 298 299 // Re-usable checks 300 ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State, 301 AnyArgExpr Arg, SVal l) const; 302 ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state, 303 AnyArgExpr Buffer, SVal Element, 304 AccessKind Access, 305 CharKind CK = CharKind::Regular) const; 306 ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State, 307 AnyArgExpr Buffer, SizeArgExpr Size, 308 AccessKind Access, 309 CharKind CK = CharKind::Regular) const; 310 ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state, 311 SizeArgExpr Size, AnyArgExpr First, 312 AnyArgExpr Second, 313 CharKind CK = CharKind::Regular) const; 314 void emitOverlapBug(CheckerContext &C, 315 ProgramStateRef state, 316 const Stmt *First, 317 const Stmt *Second) const; 318 319 void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S, 320 StringRef WarningMsg) const; 321 void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State, 322 const Stmt *S, StringRef WarningMsg) const; 323 void emitNotCStringBug(CheckerContext &C, ProgramStateRef State, 324 const Stmt *S, StringRef WarningMsg) const; 325 void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const; 326 void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State, 327 const Expr *E) const; 328 ProgramStateRef checkAdditionOverflow(CheckerContext &C, 329 ProgramStateRef state, 330 NonLoc left, 331 NonLoc right) const; 332 333 // Return true if the destination buffer of the copy function may be in bound. 334 // Expects SVal of Size to be positive and unsigned. 335 // Expects SVal of FirstBuf to be a FieldRegion. 336 static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State, 337 SVal BufVal, QualType BufTy, SVal LengthVal, 338 QualType LengthTy); 339 }; 340 341 } //end anonymous namespace 342 343 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal) 344 345 //===----------------------------------------------------------------------===// 346 // Individual checks and utility methods. 347 //===----------------------------------------------------------------------===// 348 349 std::pair<ProgramStateRef , ProgramStateRef > 350 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V, 351 QualType Ty) { 352 std::optional<DefinedSVal> val = V.getAs<DefinedSVal>(); 353 if (!val) 354 return std::pair<ProgramStateRef , ProgramStateRef >(state, state); 355 356 SValBuilder &svalBuilder = C.getSValBuilder(); 357 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 358 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 359 } 360 361 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C, 362 ProgramStateRef State, 363 AnyArgExpr Arg, SVal l) const { 364 // If a previous check has failed, propagate the failure. 365 if (!State) 366 return nullptr; 367 368 ProgramStateRef stateNull, stateNonNull; 369 std::tie(stateNull, stateNonNull) = 370 assumeZero(C, State, l, Arg.Expression->getType()); 371 372 if (stateNull && !stateNonNull) { 373 if (Filter.CheckCStringNullArg) { 374 SmallString<80> buf; 375 llvm::raw_svector_ostream OS(buf); 376 assert(CurrentFunctionDescription); 377 OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1) 378 << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to " 379 << CurrentFunctionDescription; 380 381 emitNullArgBug(C, stateNull, Arg.Expression, OS.str()); 382 } 383 return nullptr; 384 } 385 386 // From here on, assume that the value is non-null. 387 assert(stateNonNull); 388 return stateNonNull; 389 } 390 391 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 392 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, 393 ProgramStateRef state, 394 AnyArgExpr Buffer, SVal Element, 395 AccessKind Access, 396 CharKind CK) const { 397 398 // If a previous check has failed, propagate the failure. 399 if (!state) 400 return nullptr; 401 402 // Check for out of bound array element access. 403 const MemRegion *R = Element.getAsRegion(); 404 if (!R) 405 return state; 406 407 const auto *ER = dyn_cast<ElementRegion>(R); 408 if (!ER) 409 return state; 410 411 SValBuilder &svalBuilder = C.getSValBuilder(); 412 ASTContext &Ctx = svalBuilder.getContext(); 413 414 // Get the index of the accessed element. 415 NonLoc Idx = ER->getIndex(); 416 417 if (CK == CharKind::Regular) { 418 if (ER->getValueType() != Ctx.CharTy) 419 return state; 420 } else { 421 if (ER->getValueType() != Ctx.WideCharTy) 422 return state; 423 424 QualType SizeTy = Ctx.getSizeType(); 425 NonLoc WideSize = 426 svalBuilder 427 .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(), 428 SizeTy) 429 .castAs<NonLoc>(); 430 SVal Offset = svalBuilder.evalBinOpNN(state, BO_Mul, Idx, WideSize, SizeTy); 431 if (Offset.isUnknown()) 432 return state; 433 Idx = Offset.castAs<NonLoc>(); 434 } 435 436 // Get the size of the array. 437 const auto *superReg = cast<SubRegion>(ER->getSuperRegion()); 438 DefinedOrUnknownSVal Size = 439 getDynamicExtent(state, superReg, C.getSValBuilder()); 440 441 ProgramStateRef StInBound, StOutBound; 442 std::tie(StInBound, StOutBound) = state->assumeInBoundDual(Idx, Size); 443 if (StOutBound && !StInBound) { 444 // These checks are either enabled by the CString out-of-bounds checker 445 // explicitly or implicitly by the Malloc checker. 446 // In the latter case we only do modeling but do not emit warning. 447 if (!Filter.CheckCStringOutOfBounds) 448 return nullptr; 449 450 // Emit a bug report. 451 ErrorMessage Message = 452 createOutOfBoundErrorMsg(CurrentFunctionDescription, Access); 453 emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message); 454 return nullptr; 455 } 456 457 // Ensure that we wouldn't read uninitialized value. 458 if (Access == AccessKind::read) { 459 if (Filter.CheckCStringUninitializedRead && 460 StInBound->getSVal(ER).isUndef()) { 461 emitUninitializedReadBug(C, StInBound, Buffer.Expression); 462 return nullptr; 463 } 464 } 465 466 // Array bound check succeeded. From this point forward the array bound 467 // should always succeed. 468 return StInBound; 469 } 470 471 ProgramStateRef 472 CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State, 473 AnyArgExpr Buffer, SizeArgExpr Size, 474 AccessKind Access, CharKind CK) const { 475 // If a previous check has failed, propagate the failure. 476 if (!State) 477 return nullptr; 478 479 SValBuilder &svalBuilder = C.getSValBuilder(); 480 ASTContext &Ctx = svalBuilder.getContext(); 481 482 QualType SizeTy = Size.Expression->getType(); 483 QualType PtrTy = getCharPtrType(Ctx, CK); 484 485 // Check that the first buffer is non-null. 486 SVal BufVal = C.getSVal(Buffer.Expression); 487 State = checkNonNull(C, State, Buffer, BufVal); 488 if (!State) 489 return nullptr; 490 491 // If out-of-bounds checking is turned off, skip the rest. 492 if (!Filter.CheckCStringOutOfBounds) 493 return State; 494 495 // Get the access length and make sure it is known. 496 // FIXME: This assumes the caller has already checked that the access length 497 // is positive. And that it's unsigned. 498 SVal LengthVal = C.getSVal(Size.Expression); 499 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 500 if (!Length) 501 return State; 502 503 // Compute the offset of the last element to be accessed: size-1. 504 NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>(); 505 SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy); 506 if (Offset.isUnknown()) 507 return nullptr; 508 NonLoc LastOffset = Offset.castAs<NonLoc>(); 509 510 // Check that the first buffer is sufficiently long. 511 SVal BufStart = 512 svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType()); 513 if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) { 514 515 SVal BufEnd = 516 svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy); 517 State = CheckLocation(C, State, Buffer, BufEnd, Access, CK); 518 519 // If the buffer isn't large enough, abort. 520 if (!State) 521 return nullptr; 522 } 523 524 // Large enough or not, return this state! 525 return State; 526 } 527 528 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, 529 ProgramStateRef state, 530 SizeArgExpr Size, AnyArgExpr First, 531 AnyArgExpr Second, 532 CharKind CK) const { 533 if (!Filter.CheckCStringBufferOverlap) 534 return state; 535 536 // Do a simple check for overlap: if the two arguments are from the same 537 // buffer, see if the end of the first is greater than the start of the second 538 // or vice versa. 539 540 // If a previous check has failed, propagate the failure. 541 if (!state) 542 return nullptr; 543 544 ProgramStateRef stateTrue, stateFalse; 545 546 // Assume different address spaces cannot overlap. 547 if (First.Expression->getType()->getPointeeType().getAddressSpace() != 548 Second.Expression->getType()->getPointeeType().getAddressSpace()) 549 return state; 550 551 // Get the buffer values and make sure they're known locations. 552 const LocationContext *LCtx = C.getLocationContext(); 553 SVal firstVal = state->getSVal(First.Expression, LCtx); 554 SVal secondVal = state->getSVal(Second.Expression, LCtx); 555 556 std::optional<Loc> firstLoc = firstVal.getAs<Loc>(); 557 if (!firstLoc) 558 return state; 559 560 std::optional<Loc> secondLoc = secondVal.getAs<Loc>(); 561 if (!secondLoc) 562 return state; 563 564 // Are the two values the same? 565 SValBuilder &svalBuilder = C.getSValBuilder(); 566 std::tie(stateTrue, stateFalse) = 567 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 568 569 if (stateTrue && !stateFalse) { 570 // If the values are known to be equal, that's automatically an overlap. 571 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression); 572 return nullptr; 573 } 574 575 // assume the two expressions are not equal. 576 assert(stateFalse); 577 state = stateFalse; 578 579 // Which value comes first? 580 QualType cmpTy = svalBuilder.getConditionType(); 581 SVal reverse = 582 svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy); 583 std::optional<DefinedOrUnknownSVal> reverseTest = 584 reverse.getAs<DefinedOrUnknownSVal>(); 585 if (!reverseTest) 586 return state; 587 588 std::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 589 if (stateTrue) { 590 if (stateFalse) { 591 // If we don't know which one comes first, we can't perform this test. 592 return state; 593 } else { 594 // Switch the values so that firstVal is before secondVal. 595 std::swap(firstLoc, secondLoc); 596 597 // Switch the Exprs as well, so that they still correspond. 598 std::swap(First, Second); 599 } 600 } 601 602 // Get the length, and make sure it too is known. 603 SVal LengthVal = state->getSVal(Size.Expression, LCtx); 604 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 605 if (!Length) 606 return state; 607 608 // Convert the first buffer's start address to char*. 609 // Bail out if the cast fails. 610 ASTContext &Ctx = svalBuilder.getContext(); 611 QualType CharPtrTy = getCharPtrType(Ctx, CK); 612 SVal FirstStart = 613 svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType()); 614 std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>(); 615 if (!FirstStartLoc) 616 return state; 617 618 // Compute the end of the first buffer. Bail out if THAT fails. 619 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc, 620 *Length, CharPtrTy); 621 std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>(); 622 if (!FirstEndLoc) 623 return state; 624 625 // Is the end of the first buffer past the start of the second buffer? 626 SVal Overlap = 627 svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy); 628 std::optional<DefinedOrUnknownSVal> OverlapTest = 629 Overlap.getAs<DefinedOrUnknownSVal>(); 630 if (!OverlapTest) 631 return state; 632 633 std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 634 635 if (stateTrue && !stateFalse) { 636 // Overlap! 637 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression); 638 return nullptr; 639 } 640 641 // assume the two expressions don't overlap. 642 assert(stateFalse); 643 return stateFalse; 644 } 645 646 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state, 647 const Stmt *First, const Stmt *Second) const { 648 ExplodedNode *N = C.generateErrorNode(state); 649 if (!N) 650 return; 651 652 if (!BT_Overlap) 653 BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap, 654 categories::UnixAPI, "Improper arguments")); 655 656 // Generate a report for this bug. 657 auto report = std::make_unique<PathSensitiveBugReport>( 658 *BT_Overlap, "Arguments must not be overlapping buffers", N); 659 report->addRange(First->getSourceRange()); 660 report->addRange(Second->getSourceRange()); 661 662 C.emitReport(std::move(report)); 663 } 664 665 void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State, 666 const Stmt *S, StringRef WarningMsg) const { 667 if (ExplodedNode *N = C.generateErrorNode(State)) { 668 if (!BT_Null) 669 BT_Null.reset(new BuiltinBug( 670 Filter.CheckNameCStringNullArg, categories::UnixAPI, 671 "Null pointer argument in call to byte string function")); 672 673 BuiltinBug *BT = static_cast<BuiltinBug *>(BT_Null.get()); 674 auto Report = std::make_unique<PathSensitiveBugReport>(*BT, WarningMsg, N); 675 Report->addRange(S->getSourceRange()); 676 if (const auto *Ex = dyn_cast<Expr>(S)) 677 bugreporter::trackExpressionValue(N, Ex, *Report); 678 C.emitReport(std::move(Report)); 679 } 680 } 681 682 void CStringChecker::emitUninitializedReadBug(CheckerContext &C, 683 ProgramStateRef State, 684 const Expr *E) const { 685 if (ExplodedNode *N = C.generateErrorNode(State)) { 686 const char *Msg = 687 "Bytes string function accesses uninitialized/garbage values"; 688 if (!BT_UninitRead) 689 BT_UninitRead.reset( 690 new BuiltinBug(Filter.CheckNameCStringUninitializedRead, 691 "Accessing unitialized/garbage values", Msg)); 692 693 BuiltinBug *BT = static_cast<BuiltinBug *>(BT_UninitRead.get()); 694 695 auto Report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); 696 Report->addRange(E->getSourceRange()); 697 bugreporter::trackExpressionValue(N, E, *Report); 698 C.emitReport(std::move(Report)); 699 } 700 } 701 702 void CStringChecker::emitOutOfBoundsBug(CheckerContext &C, 703 ProgramStateRef State, const Stmt *S, 704 StringRef WarningMsg) const { 705 if (ExplodedNode *N = C.generateErrorNode(State)) { 706 if (!BT_Bounds) 707 BT_Bounds.reset(new BuiltinBug( 708 Filter.CheckCStringOutOfBounds ? Filter.CheckNameCStringOutOfBounds 709 : Filter.CheckNameCStringNullArg, 710 "Out-of-bound array access", 711 "Byte string function accesses out-of-bound array element")); 712 713 BuiltinBug *BT = static_cast<BuiltinBug *>(BT_Bounds.get()); 714 715 // FIXME: It would be nice to eventually make this diagnostic more clear, 716 // e.g., by referencing the original declaration or by saying *why* this 717 // reference is outside the range. 718 auto Report = std::make_unique<PathSensitiveBugReport>(*BT, WarningMsg, N); 719 Report->addRange(S->getSourceRange()); 720 C.emitReport(std::move(Report)); 721 } 722 } 723 724 void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State, 725 const Stmt *S, 726 StringRef WarningMsg) const { 727 if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) { 728 if (!BT_NotCString) 729 BT_NotCString.reset(new BuiltinBug( 730 Filter.CheckNameCStringNotNullTerm, categories::UnixAPI, 731 "Argument is not a null-terminated string.")); 732 733 auto Report = 734 std::make_unique<PathSensitiveBugReport>(*BT_NotCString, WarningMsg, N); 735 736 Report->addRange(S->getSourceRange()); 737 C.emitReport(std::move(Report)); 738 } 739 } 740 741 void CStringChecker::emitAdditionOverflowBug(CheckerContext &C, 742 ProgramStateRef State) const { 743 if (ExplodedNode *N = C.generateErrorNode(State)) { 744 if (!BT_AdditionOverflow) 745 BT_AdditionOverflow.reset( 746 new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API", 747 "Sum of expressions causes overflow.")); 748 749 // This isn't a great error message, but this should never occur in real 750 // code anyway -- you'd have to create a buffer longer than a size_t can 751 // represent, which is sort of a contradiction. 752 const char *WarningMsg = 753 "This expression will create a string whose length is too big to " 754 "be represented as a size_t"; 755 756 auto Report = std::make_unique<PathSensitiveBugReport>(*BT_AdditionOverflow, 757 WarningMsg, N); 758 C.emitReport(std::move(Report)); 759 } 760 } 761 762 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C, 763 ProgramStateRef state, 764 NonLoc left, 765 NonLoc right) const { 766 // If out-of-bounds checking is turned off, skip the rest. 767 if (!Filter.CheckCStringOutOfBounds) 768 return state; 769 770 // If a previous check has failed, propagate the failure. 771 if (!state) 772 return nullptr; 773 774 SValBuilder &svalBuilder = C.getSValBuilder(); 775 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 776 777 QualType sizeTy = svalBuilder.getContext().getSizeType(); 778 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 779 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt); 780 781 SVal maxMinusRight; 782 if (isa<nonloc::ConcreteInt>(right)) { 783 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right, 784 sizeTy); 785 } else { 786 // Try switching the operands. (The order of these two assignments is 787 // important!) 788 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, 789 sizeTy); 790 left = right; 791 } 792 793 if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) { 794 QualType cmpTy = svalBuilder.getConditionType(); 795 // If left > max - right, we have an overflow. 796 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, 797 *maxMinusRightNL, cmpTy); 798 799 ProgramStateRef stateOverflow, stateOkay; 800 std::tie(stateOverflow, stateOkay) = 801 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>()); 802 803 if (stateOverflow && !stateOkay) { 804 // We have an overflow. Emit a bug report. 805 emitAdditionOverflowBug(C, stateOverflow); 806 return nullptr; 807 } 808 809 // From now on, assume an overflow didn't occur. 810 assert(stateOkay); 811 state = stateOkay; 812 } 813 814 return state; 815 } 816 817 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state, 818 const MemRegion *MR, 819 SVal strLength) { 820 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 821 822 MR = MR->StripCasts(); 823 824 switch (MR->getKind()) { 825 case MemRegion::StringRegionKind: 826 // FIXME: This can happen if we strcpy() into a string region. This is 827 // undefined [C99 6.4.5p6], but we should still warn about it. 828 return state; 829 830 case MemRegion::SymbolicRegionKind: 831 case MemRegion::AllocaRegionKind: 832 case MemRegion::NonParamVarRegionKind: 833 case MemRegion::ParamVarRegionKind: 834 case MemRegion::FieldRegionKind: 835 case MemRegion::ObjCIvarRegionKind: 836 // These are the types we can currently track string lengths for. 837 break; 838 839 case MemRegion::ElementRegionKind: 840 // FIXME: Handle element regions by upper-bounding the parent region's 841 // string length. 842 return state; 843 844 default: 845 // Other regions (mostly non-data) can't have a reliable C string length. 846 // For now, just ignore the change. 847 // FIXME: These are rare but not impossible. We should output some kind of 848 // warning for things like strcpy((char[]){'a', 0}, "b"); 849 return state; 850 } 851 852 if (strLength.isUnknown()) 853 return state->remove<CStringLength>(MR); 854 855 return state->set<CStringLength>(MR, strLength); 856 } 857 858 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 859 ProgramStateRef &state, 860 const Expr *Ex, 861 const MemRegion *MR, 862 bool hypothetical) { 863 if (!hypothetical) { 864 // If there's a recorded length, go ahead and return it. 865 const SVal *Recorded = state->get<CStringLength>(MR); 866 if (Recorded) 867 return *Recorded; 868 } 869 870 // Otherwise, get a new symbol and update the state. 871 SValBuilder &svalBuilder = C.getSValBuilder(); 872 QualType sizeTy = svalBuilder.getContext().getSizeType(); 873 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), 874 MR, Ex, sizeTy, 875 C.getLocationContext(), 876 C.blockCount()); 877 878 if (!hypothetical) { 879 if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) { 880 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4 881 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 882 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 883 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4); 884 const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt, 885 fourInt); 886 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt); 887 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, 888 maxLength, sizeTy); 889 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true); 890 } 891 state = state->set<CStringLength>(MR, strLength); 892 } 893 894 return strLength; 895 } 896 897 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, 898 const Expr *Ex, SVal Buf, 899 bool hypothetical) const { 900 const MemRegion *MR = Buf.getAsRegion(); 901 if (!MR) { 902 // If we can't get a region, see if it's something we /know/ isn't a 903 // C string. In the context of locations, the only time we can issue such 904 // a warning is for labels. 905 if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) { 906 if (Filter.CheckCStringNotNullTerm) { 907 SmallString<120> buf; 908 llvm::raw_svector_ostream os(buf); 909 assert(CurrentFunctionDescription); 910 os << "Argument to " << CurrentFunctionDescription 911 << " is the address of the label '" << Label->getLabel()->getName() 912 << "', which is not a null-terminated string"; 913 914 emitNotCStringBug(C, state, Ex, os.str()); 915 } 916 return UndefinedVal(); 917 } 918 919 // If it's not a region and not a label, give up. 920 return UnknownVal(); 921 } 922 923 // If we have a region, strip casts from it and see if we can figure out 924 // its length. For anything we can't figure out, just return UnknownVal. 925 MR = MR->StripCasts(); 926 927 switch (MR->getKind()) { 928 case MemRegion::StringRegionKind: { 929 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 930 // so we can assume that the byte length is the correct C string length. 931 SValBuilder &svalBuilder = C.getSValBuilder(); 932 QualType sizeTy = svalBuilder.getContext().getSizeType(); 933 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 934 return svalBuilder.makeIntVal(strLit->getLength(), sizeTy); 935 } 936 case MemRegion::SymbolicRegionKind: 937 case MemRegion::AllocaRegionKind: 938 case MemRegion::NonParamVarRegionKind: 939 case MemRegion::ParamVarRegionKind: 940 case MemRegion::FieldRegionKind: 941 case MemRegion::ObjCIvarRegionKind: 942 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); 943 case MemRegion::CompoundLiteralRegionKind: 944 // FIXME: Can we track this? Is it necessary? 945 return UnknownVal(); 946 case MemRegion::ElementRegionKind: 947 // FIXME: How can we handle this? It's not good enough to subtract the 948 // offset from the base string length; consider "123\x00567" and &a[5]. 949 return UnknownVal(); 950 default: 951 // Other regions (mostly non-data) can't have a reliable C string length. 952 // In this case, an error is emitted and UndefinedVal is returned. 953 // The caller should always be prepared to handle this case. 954 if (Filter.CheckCStringNotNullTerm) { 955 SmallString<120> buf; 956 llvm::raw_svector_ostream os(buf); 957 958 assert(CurrentFunctionDescription); 959 os << "Argument to " << CurrentFunctionDescription << " is "; 960 961 if (SummarizeRegion(os, C.getASTContext(), MR)) 962 os << ", which is not a null-terminated string"; 963 else 964 os << "not a null-terminated string"; 965 966 emitNotCStringBug(C, state, Ex, os.str()); 967 } 968 return UndefinedVal(); 969 } 970 } 971 972 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, 973 ProgramStateRef &state, const Expr *expr, SVal val) const { 974 975 // Get the memory region pointed to by the val. 976 const MemRegion *bufRegion = val.getAsRegion(); 977 if (!bufRegion) 978 return nullptr; 979 980 // Strip casts off the memory region. 981 bufRegion = bufRegion->StripCasts(); 982 983 // Cast the memory region to a string region. 984 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion); 985 if (!strRegion) 986 return nullptr; 987 988 // Return the actual string in the string region. 989 return strRegion->getStringLiteral(); 990 } 991 992 bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State, 993 SVal BufVal, QualType BufTy, 994 SVal LengthVal, QualType LengthTy) { 995 // If we do not know that the buffer is long enough we return 'true'. 996 // Otherwise the parent region of this field region would also get 997 // invalidated, which would lead to warnings based on an unknown state. 998 999 if (LengthVal.isUnknown()) 1000 return false; 1001 1002 // Originally copied from CheckBufferAccess and CheckLocation. 1003 SValBuilder &SB = C.getSValBuilder(); 1004 ASTContext &Ctx = C.getASTContext(); 1005 1006 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 1007 1008 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 1009 if (!Length) 1010 return true; // cf top comment. 1011 1012 // Compute the offset of the last element to be accessed: size-1. 1013 NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>(); 1014 SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy); 1015 if (Offset.isUnknown()) 1016 return true; // cf top comment 1017 NonLoc LastOffset = Offset.castAs<NonLoc>(); 1018 1019 // Check that the first buffer is sufficiently long. 1020 SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy); 1021 std::optional<Loc> BufLoc = BufStart.getAs<Loc>(); 1022 if (!BufLoc) 1023 return true; // cf top comment. 1024 1025 SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy); 1026 1027 // Check for out of bound array element access. 1028 const MemRegion *R = BufEnd.getAsRegion(); 1029 if (!R) 1030 return true; // cf top comment. 1031 1032 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 1033 if (!ER) 1034 return true; // cf top comment. 1035 1036 // FIXME: Does this crash when a non-standard definition 1037 // of a library function is encountered? 1038 assert(ER->getValueType() == C.getASTContext().CharTy && 1039 "isFirstBufInBound should only be called with char* ElementRegions"); 1040 1041 // Get the size of the array. 1042 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 1043 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB); 1044 1045 // Get the index of the accessed element. 1046 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); 1047 1048 ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true); 1049 1050 return static_cast<bool>(StInBound); 1051 } 1052 1053 ProgramStateRef CStringChecker::invalidateDestinationBufferBySize( 1054 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV, 1055 SVal SizeV, QualType SizeTy) { 1056 auto InvalidationTraitOperations = 1057 [&C, S, BufTy = BufE->getType(), BufV, SizeV, 1058 SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { 1059 // If destination buffer is a field region and access is in bound, do 1060 // not invalidate its super region. 1061 if (MemRegion::FieldRegionKind == R->getKind() && 1062 isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) { 1063 ITraits.setTrait( 1064 R, 1065 RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); 1066 } 1067 return false; 1068 }; 1069 1070 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); 1071 } 1072 1073 ProgramStateRef 1074 CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion( 1075 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) { 1076 auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &, 1077 const MemRegion *R) { 1078 return isa<FieldRegion>(R); 1079 }; 1080 1081 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); 1082 } 1083 1084 ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows( 1085 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) { 1086 auto InvalidationTraitOperations = 1087 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { 1088 if (MemRegion::FieldRegionKind == R->getKind()) 1089 ITraits.setTrait( 1090 R, 1091 RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); 1092 return false; 1093 }; 1094 1095 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); 1096 } 1097 1098 ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C, 1099 ProgramStateRef S, 1100 const Expr *BufE, 1101 SVal BufV) { 1102 auto InvalidationTraitOperations = 1103 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { 1104 ITraits.setTrait( 1105 R->getBaseRegion(), 1106 RegionAndSymbolInvalidationTraits::TK_PreserveContents); 1107 ITraits.setTrait(R, 1108 RegionAndSymbolInvalidationTraits::TK_SuppressEscape); 1109 return true; 1110 }; 1111 1112 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); 1113 } 1114 1115 ProgramStateRef CStringChecker::invalidateBufferAux( 1116 CheckerContext &C, ProgramStateRef State, const Expr *E, SVal V, 1117 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &, 1118 const MemRegion *)> 1119 InvalidationTraitOperations) { 1120 std::optional<Loc> L = V.getAs<Loc>(); 1121 if (!L) 1122 return State; 1123 1124 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 1125 // some assumptions about the value that CFRefCount can't. Even so, it should 1126 // probably be refactored. 1127 if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) { 1128 const MemRegion *R = MR->getRegion()->StripCasts(); 1129 1130 // Are we dealing with an ElementRegion? If so, we should be invalidating 1131 // the super-region. 1132 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 1133 R = ER->getSuperRegion(); 1134 // FIXME: What about layers of ElementRegions? 1135 } 1136 1137 // Invalidate this region. 1138 const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); 1139 RegionAndSymbolInvalidationTraits ITraits; 1140 bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R); 1141 1142 return State->invalidateRegions(R, E, C.blockCount(), LCtx, 1143 CausesPointerEscape, nullptr, nullptr, 1144 &ITraits); 1145 } 1146 1147 // If we have a non-region value by chance, just remove the binding. 1148 // FIXME: is this necessary or correct? This handles the non-Region 1149 // cases. Is it ever valid to store to these? 1150 return State->killBinding(*L); 1151 } 1152 1153 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 1154 const MemRegion *MR) { 1155 switch (MR->getKind()) { 1156 case MemRegion::FunctionCodeRegionKind: { 1157 if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl()) 1158 os << "the address of the function '" << *FD << '\''; 1159 else 1160 os << "the address of a function"; 1161 return true; 1162 } 1163 case MemRegion::BlockCodeRegionKind: 1164 os << "block text"; 1165 return true; 1166 case MemRegion::BlockDataRegionKind: 1167 os << "a block"; 1168 return true; 1169 case MemRegion::CXXThisRegionKind: 1170 case MemRegion::CXXTempObjectRegionKind: 1171 os << "a C++ temp object of type " 1172 << cast<TypedValueRegion>(MR)->getValueType(); 1173 return true; 1174 case MemRegion::NonParamVarRegionKind: 1175 os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType(); 1176 return true; 1177 case MemRegion::ParamVarRegionKind: 1178 os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType(); 1179 return true; 1180 case MemRegion::FieldRegionKind: 1181 os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType(); 1182 return true; 1183 case MemRegion::ObjCIvarRegionKind: 1184 os << "an instance variable of type " 1185 << cast<TypedValueRegion>(MR)->getValueType(); 1186 return true; 1187 default: 1188 return false; 1189 } 1190 } 1191 1192 bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal, 1193 const Expr *Size, CheckerContext &C, 1194 ProgramStateRef &State) { 1195 SVal MemVal = C.getSVal(DstBuffer); 1196 SVal SizeVal = C.getSVal(Size); 1197 const MemRegion *MR = MemVal.getAsRegion(); 1198 if (!MR) 1199 return false; 1200 1201 // We're about to model memset by producing a "default binding" in the Store. 1202 // Our current implementation - RegionStore - doesn't support default bindings 1203 // that don't cover the whole base region. So we should first get the offset 1204 // and the base region to figure out whether the offset of buffer is 0. 1205 RegionOffset Offset = MR->getAsOffset(); 1206 const MemRegion *BR = Offset.getRegion(); 1207 1208 std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>(); 1209 if (!SizeNL) 1210 return false; 1211 1212 SValBuilder &svalBuilder = C.getSValBuilder(); 1213 ASTContext &Ctx = C.getASTContext(); 1214 1215 // void *memset(void *dest, int ch, size_t count); 1216 // For now we can only handle the case of offset is 0 and concrete char value. 1217 if (Offset.isValid() && !Offset.hasSymbolicOffset() && 1218 Offset.getOffset() == 0) { 1219 // Get the base region's size. 1220 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder); 1221 1222 ProgramStateRef StateWholeReg, StateNotWholeReg; 1223 std::tie(StateWholeReg, StateNotWholeReg) = 1224 State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL)); 1225 1226 // With the semantic of 'memset()', we should convert the CharVal to 1227 // unsigned char. 1228 CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy); 1229 1230 ProgramStateRef StateNullChar, StateNonNullChar; 1231 std::tie(StateNullChar, StateNonNullChar) = 1232 assumeZero(C, State, CharVal, Ctx.UnsignedCharTy); 1233 1234 if (StateWholeReg && !StateNotWholeReg && StateNullChar && 1235 !StateNonNullChar) { 1236 // If the 'memset()' acts on the whole region of destination buffer and 1237 // the value of the second argument of 'memset()' is zero, bind the second 1238 // argument's value to the destination buffer with 'default binding'. 1239 // FIXME: Since there is no perfect way to bind the non-zero character, we 1240 // can only deal with zero value here. In the future, we need to deal with 1241 // the binding of non-zero value in the case of whole region. 1242 State = State->bindDefaultZero(svalBuilder.makeLoc(BR), 1243 C.getLocationContext()); 1244 } else { 1245 // If the destination buffer's extent is not equal to the value of 1246 // third argument, just invalidate buffer. 1247 State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal, 1248 SizeVal, Size->getType()); 1249 } 1250 1251 if (StateNullChar && !StateNonNullChar) { 1252 // If the value of the second argument of 'memset()' is zero, set the 1253 // string length of destination buffer to 0 directly. 1254 State = setCStringLength(State, MR, 1255 svalBuilder.makeZeroVal(Ctx.getSizeType())); 1256 } else if (!StateNullChar && StateNonNullChar) { 1257 SVal NewStrLen = svalBuilder.getMetadataSymbolVal( 1258 CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(), 1259 C.getLocationContext(), C.blockCount()); 1260 1261 // If the value of second argument is not zero, then the string length 1262 // is at least the size argument. 1263 SVal NewStrLenGESize = svalBuilder.evalBinOp( 1264 State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType()); 1265 1266 State = setCStringLength( 1267 State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true), 1268 MR, NewStrLen); 1269 } 1270 } else { 1271 // If the offset is not zero and char value is not concrete, we can do 1272 // nothing but invalidate the buffer. 1273 State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal, 1274 SizeVal, Size->getType()); 1275 } 1276 return true; 1277 } 1278 1279 //===----------------------------------------------------------------------===// 1280 // evaluation of individual function calls. 1281 //===----------------------------------------------------------------------===// 1282 1283 void CStringChecker::evalCopyCommon(CheckerContext &C, const CallExpr *CE, 1284 ProgramStateRef state, SizeArgExpr Size, 1285 DestinationArgExpr Dest, 1286 SourceArgExpr Source, bool Restricted, 1287 bool IsMempcpy, CharKind CK) const { 1288 CurrentFunctionDescription = "memory copy function"; 1289 1290 // See if the size argument is zero. 1291 const LocationContext *LCtx = C.getLocationContext(); 1292 SVal sizeVal = state->getSVal(Size.Expression, LCtx); 1293 QualType sizeTy = Size.Expression->getType(); 1294 1295 ProgramStateRef stateZeroSize, stateNonZeroSize; 1296 std::tie(stateZeroSize, stateNonZeroSize) = 1297 assumeZero(C, state, sizeVal, sizeTy); 1298 1299 // Get the value of the Dest. 1300 SVal destVal = state->getSVal(Dest.Expression, LCtx); 1301 1302 // If the size is zero, there won't be any actual memory access, so 1303 // just bind the return value to the destination buffer and return. 1304 if (stateZeroSize && !stateNonZeroSize) { 1305 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal); 1306 C.addTransition(stateZeroSize); 1307 return; 1308 } 1309 1310 // If the size can be nonzero, we have to check the other arguments. 1311 if (stateNonZeroSize) { 1312 state = stateNonZeroSize; 1313 1314 // Ensure the destination is not null. If it is NULL there will be a 1315 // NULL pointer dereference. 1316 state = checkNonNull(C, state, Dest, destVal); 1317 if (!state) 1318 return; 1319 1320 // Get the value of the Src. 1321 SVal srcVal = state->getSVal(Source.Expression, LCtx); 1322 1323 // Ensure the source is not null. If it is NULL there will be a 1324 // NULL pointer dereference. 1325 state = checkNonNull(C, state, Source, srcVal); 1326 if (!state) 1327 return; 1328 1329 // Ensure the accesses are valid and that the buffers do not overlap. 1330 state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK); 1331 state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK); 1332 1333 if (Restricted) 1334 state = CheckOverlap(C, state, Size, Dest, Source, CK); 1335 1336 if (!state) 1337 return; 1338 1339 // If this is mempcpy, get the byte after the last byte copied and 1340 // bind the expr. 1341 if (IsMempcpy) { 1342 // Get the byte after the last byte copied. 1343 SValBuilder &SvalBuilder = C.getSValBuilder(); 1344 ASTContext &Ctx = SvalBuilder.getContext(); 1345 QualType CharPtrTy = getCharPtrType(Ctx, CK); 1346 SVal DestRegCharVal = 1347 SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType()); 1348 SVal lastElement = C.getSValBuilder().evalBinOp( 1349 state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType()); 1350 // If we don't know how much we copied, we can at least 1351 // conjure a return value for later. 1352 if (lastElement.isUnknown()) 1353 lastElement = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1354 C.blockCount()); 1355 1356 // The byte after the last byte copied is the return value. 1357 state = state->BindExpr(CE, LCtx, lastElement); 1358 } else { 1359 // All other copies return the destination buffer. 1360 // (Well, bcopy() has a void return type, but this won't hurt.) 1361 state = state->BindExpr(CE, LCtx, destVal); 1362 } 1363 1364 // Invalidate the destination (regular invalidation without pointer-escaping 1365 // the address of the top-level region). 1366 // FIXME: Even if we can't perfectly model the copy, we should see if we 1367 // can use LazyCompoundVals to copy the source values into the destination. 1368 // This would probably remove any existing bindings past the end of the 1369 // copied region, but that's still an improvement over blank invalidation. 1370 state = invalidateDestinationBufferBySize( 1371 C, state, Dest.Expression, C.getSVal(Dest.Expression), sizeVal, 1372 Size.Expression->getType()); 1373 1374 // Invalidate the source (const-invalidation without const-pointer-escaping 1375 // the address of the top-level region). 1376 state = invalidateSourceBuffer(C, state, Source.Expression, 1377 C.getSVal(Source.Expression)); 1378 1379 C.addTransition(state); 1380 } 1381 } 1382 1383 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE, 1384 CharKind CK) const { 1385 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 1386 // The return value is the address of the destination buffer. 1387 DestinationArgExpr Dest = {CE->getArg(0), 0}; 1388 SourceArgExpr Src = {CE->getArg(1), 1}; 1389 SizeArgExpr Size = {CE->getArg(2), 2}; 1390 1391 ProgramStateRef State = C.getState(); 1392 1393 constexpr bool IsRestricted = true; 1394 constexpr bool IsMempcpy = false; 1395 evalCopyCommon(C, CE, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK); 1396 } 1397 1398 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE, 1399 CharKind CK) const { 1400 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n); 1401 // The return value is a pointer to the byte following the last written byte. 1402 DestinationArgExpr Dest = {CE->getArg(0), 0}; 1403 SourceArgExpr Src = {CE->getArg(1), 1}; 1404 SizeArgExpr Size = {CE->getArg(2), 2}; 1405 1406 constexpr bool IsRestricted = true; 1407 constexpr bool IsMempcpy = true; 1408 evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy, 1409 CK); 1410 } 1411 1412 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE, 1413 CharKind CK) const { 1414 // void *memmove(void *dst, const void *src, size_t n); 1415 // The return value is the address of the destination buffer. 1416 DestinationArgExpr Dest = {CE->getArg(0), 0}; 1417 SourceArgExpr Src = {CE->getArg(1), 1}; 1418 SizeArgExpr Size = {CE->getArg(2), 2}; 1419 1420 constexpr bool IsRestricted = false; 1421 constexpr bool IsMempcpy = false; 1422 evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy, 1423 CK); 1424 } 1425 1426 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { 1427 // void bcopy(const void *src, void *dst, size_t n); 1428 SourceArgExpr Src(CE->getArg(0), 0); 1429 DestinationArgExpr Dest = {CE->getArg(1), 1}; 1430 SizeArgExpr Size = {CE->getArg(2), 2}; 1431 1432 constexpr bool IsRestricted = false; 1433 constexpr bool IsMempcpy = false; 1434 evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy, 1435 CharKind::Regular); 1436 } 1437 1438 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE, 1439 CharKind CK) const { 1440 // int memcmp(const void *s1, const void *s2, size_t n); 1441 CurrentFunctionDescription = "memory comparison function"; 1442 1443 AnyArgExpr Left = {CE->getArg(0), 0}; 1444 AnyArgExpr Right = {CE->getArg(1), 1}; 1445 SizeArgExpr Size = {CE->getArg(2), 2}; 1446 1447 ProgramStateRef State = C.getState(); 1448 SValBuilder &Builder = C.getSValBuilder(); 1449 const LocationContext *LCtx = C.getLocationContext(); 1450 1451 // See if the size argument is zero. 1452 SVal sizeVal = State->getSVal(Size.Expression, LCtx); 1453 QualType sizeTy = Size.Expression->getType(); 1454 1455 ProgramStateRef stateZeroSize, stateNonZeroSize; 1456 std::tie(stateZeroSize, stateNonZeroSize) = 1457 assumeZero(C, State, sizeVal, sizeTy); 1458 1459 // If the size can be zero, the result will be 0 in that case, and we don't 1460 // have to check either of the buffers. 1461 if (stateZeroSize) { 1462 State = stateZeroSize; 1463 State = State->BindExpr(CE, LCtx, Builder.makeZeroVal(CE->getType())); 1464 C.addTransition(State); 1465 } 1466 1467 // If the size can be nonzero, we have to check the other arguments. 1468 if (stateNonZeroSize) { 1469 State = stateNonZeroSize; 1470 // If we know the two buffers are the same, we know the result is 0. 1471 // First, get the two buffers' addresses. Another checker will have already 1472 // made sure they're not undefined. 1473 DefinedOrUnknownSVal LV = 1474 State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>(); 1475 DefinedOrUnknownSVal RV = 1476 State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>(); 1477 1478 // See if they are the same. 1479 ProgramStateRef SameBuffer, NotSameBuffer; 1480 std::tie(SameBuffer, NotSameBuffer) = 1481 State->assume(Builder.evalEQ(State, LV, RV)); 1482 1483 // If the two arguments are the same buffer, we know the result is 0, 1484 // and we only need to check one size. 1485 if (SameBuffer && !NotSameBuffer) { 1486 State = SameBuffer; 1487 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read); 1488 if (State) { 1489 State = 1490 SameBuffer->BindExpr(CE, LCtx, Builder.makeZeroVal(CE->getType())); 1491 C.addTransition(State); 1492 } 1493 return; 1494 } 1495 1496 // If the two arguments might be different buffers, we have to check 1497 // the size of both of them. 1498 assert(NotSameBuffer); 1499 State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK); 1500 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK); 1501 if (State) { 1502 // The return value is the comparison result, which we don't know. 1503 SVal CmpV = Builder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 1504 State = State->BindExpr(CE, LCtx, CmpV); 1505 C.addTransition(State); 1506 } 1507 } 1508 } 1509 1510 void CStringChecker::evalstrLength(CheckerContext &C, 1511 const CallExpr *CE) const { 1512 // size_t strlen(const char *s); 1513 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); 1514 } 1515 1516 void CStringChecker::evalstrnLength(CheckerContext &C, 1517 const CallExpr *CE) const { 1518 // size_t strnlen(const char *s, size_t maxlen); 1519 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); 1520 } 1521 1522 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 1523 bool IsStrnlen) const { 1524 CurrentFunctionDescription = "string length function"; 1525 ProgramStateRef state = C.getState(); 1526 const LocationContext *LCtx = C.getLocationContext(); 1527 1528 if (IsStrnlen) { 1529 const Expr *maxlenExpr = CE->getArg(1); 1530 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 1531 1532 ProgramStateRef stateZeroSize, stateNonZeroSize; 1533 std::tie(stateZeroSize, stateNonZeroSize) = 1534 assumeZero(C, state, maxlenVal, maxlenExpr->getType()); 1535 1536 // If the size can be zero, the result will be 0 in that case, and we don't 1537 // have to check the string itself. 1538 if (stateZeroSize) { 1539 SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); 1540 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero); 1541 C.addTransition(stateZeroSize); 1542 } 1543 1544 // If the size is GUARANTEED to be zero, we're done! 1545 if (!stateNonZeroSize) 1546 return; 1547 1548 // Otherwise, record the assumption that the size is nonzero. 1549 state = stateNonZeroSize; 1550 } 1551 1552 // Check that the string argument is non-null. 1553 AnyArgExpr Arg = {CE->getArg(0), 0}; 1554 SVal ArgVal = state->getSVal(Arg.Expression, LCtx); 1555 state = checkNonNull(C, state, Arg, ArgVal); 1556 1557 if (!state) 1558 return; 1559 1560 SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal); 1561 1562 // If the argument isn't a valid C string, there's no valid state to 1563 // transition to. 1564 if (strLength.isUndef()) 1565 return; 1566 1567 DefinedOrUnknownSVal result = UnknownVal(); 1568 1569 // If the check is for strnlen() then bind the return value to no more than 1570 // the maxlen value. 1571 if (IsStrnlen) { 1572 QualType cmpTy = C.getSValBuilder().getConditionType(); 1573 1574 // It's a little unfortunate to be getting this again, 1575 // but it's not that expensive... 1576 const Expr *maxlenExpr = CE->getArg(1); 1577 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 1578 1579 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); 1580 std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>(); 1581 1582 if (strLengthNL && maxlenValNL) { 1583 ProgramStateRef stateStringTooLong, stateStringNotTooLong; 1584 1585 // Check if the strLength is greater than the maxlen. 1586 std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume( 1587 C.getSValBuilder() 1588 .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy) 1589 .castAs<DefinedOrUnknownSVal>()); 1590 1591 if (stateStringTooLong && !stateStringNotTooLong) { 1592 // If the string is longer than maxlen, return maxlen. 1593 result = *maxlenValNL; 1594 } else if (stateStringNotTooLong && !stateStringTooLong) { 1595 // If the string is shorter than maxlen, return its length. 1596 result = *strLengthNL; 1597 } 1598 } 1599 1600 if (result.isUnknown()) { 1601 // If we don't have enough information for a comparison, there's 1602 // no guarantee the full string length will actually be returned. 1603 // All we know is the return value is the min of the string length 1604 // and the limit. This is better than nothing. 1605 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1606 C.blockCount()); 1607 NonLoc resultNL = result.castAs<NonLoc>(); 1608 1609 if (strLengthNL) { 1610 state = state->assume(C.getSValBuilder().evalBinOpNN( 1611 state, BO_LE, resultNL, *strLengthNL, cmpTy) 1612 .castAs<DefinedOrUnknownSVal>(), true); 1613 } 1614 1615 if (maxlenValNL) { 1616 state = state->assume(C.getSValBuilder().evalBinOpNN( 1617 state, BO_LE, resultNL, *maxlenValNL, cmpTy) 1618 .castAs<DefinedOrUnknownSVal>(), true); 1619 } 1620 } 1621 1622 } else { 1623 // This is a plain strlen(), not strnlen(). 1624 result = strLength.castAs<DefinedOrUnknownSVal>(); 1625 1626 // If we don't know the length of the string, conjure a return 1627 // value, so it can be used in constraints, at least. 1628 if (result.isUnknown()) { 1629 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1630 C.blockCount()); 1631 } 1632 } 1633 1634 // Bind the return value. 1635 assert(!result.isUnknown() && "Should have conjured a value by now"); 1636 state = state->BindExpr(CE, LCtx, result); 1637 C.addTransition(state); 1638 } 1639 1640 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { 1641 // char *strcpy(char *restrict dst, const char *restrict src); 1642 evalStrcpyCommon(C, CE, 1643 /* ReturnEnd = */ false, 1644 /* IsBounded = */ false, 1645 /* appendK = */ ConcatFnKind::none); 1646 } 1647 1648 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { 1649 // char *strncpy(char *restrict dst, const char *restrict src, size_t n); 1650 evalStrcpyCommon(C, CE, 1651 /* ReturnEnd = */ false, 1652 /* IsBounded = */ true, 1653 /* appendK = */ ConcatFnKind::none); 1654 } 1655 1656 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { 1657 // char *stpcpy(char *restrict dst, const char *restrict src); 1658 evalStrcpyCommon(C, CE, 1659 /* ReturnEnd = */ true, 1660 /* IsBounded = */ false, 1661 /* appendK = */ ConcatFnKind::none); 1662 } 1663 1664 void CStringChecker::evalStrlcpy(CheckerContext &C, const CallExpr *CE) const { 1665 // size_t strlcpy(char *dest, const char *src, size_t size); 1666 evalStrcpyCommon(C, CE, 1667 /* ReturnEnd = */ true, 1668 /* IsBounded = */ true, 1669 /* appendK = */ ConcatFnKind::none, 1670 /* returnPtr = */ false); 1671 } 1672 1673 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const { 1674 // char *strcat(char *restrict s1, const char *restrict s2); 1675 evalStrcpyCommon(C, CE, 1676 /* ReturnEnd = */ false, 1677 /* IsBounded = */ false, 1678 /* appendK = */ ConcatFnKind::strcat); 1679 } 1680 1681 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { 1682 // char *strncat(char *restrict s1, const char *restrict s2, size_t n); 1683 evalStrcpyCommon(C, CE, 1684 /* ReturnEnd = */ false, 1685 /* IsBounded = */ true, 1686 /* appendK = */ ConcatFnKind::strcat); 1687 } 1688 1689 void CStringChecker::evalStrlcat(CheckerContext &C, const CallExpr *CE) const { 1690 // size_t strlcat(char *dst, const char *src, size_t size); 1691 // It will append at most size - strlen(dst) - 1 bytes, 1692 // NULL-terminating the result. 1693 evalStrcpyCommon(C, CE, 1694 /* ReturnEnd = */ false, 1695 /* IsBounded = */ true, 1696 /* appendK = */ ConcatFnKind::strlcat, 1697 /* returnPtr = */ false); 1698 } 1699 1700 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 1701 bool ReturnEnd, bool IsBounded, 1702 ConcatFnKind appendK, 1703 bool returnPtr) const { 1704 if (appendK == ConcatFnKind::none) 1705 CurrentFunctionDescription = "string copy function"; 1706 else 1707 CurrentFunctionDescription = "string concatenation function"; 1708 1709 ProgramStateRef state = C.getState(); 1710 const LocationContext *LCtx = C.getLocationContext(); 1711 1712 // Check that the destination is non-null. 1713 DestinationArgExpr Dst = {CE->getArg(0), 0}; 1714 SVal DstVal = state->getSVal(Dst.Expression, LCtx); 1715 state = checkNonNull(C, state, Dst, DstVal); 1716 if (!state) 1717 return; 1718 1719 // Check that the source is non-null. 1720 SourceArgExpr srcExpr = {CE->getArg(1), 1}; 1721 SVal srcVal = state->getSVal(srcExpr.Expression, LCtx); 1722 state = checkNonNull(C, state, srcExpr, srcVal); 1723 if (!state) 1724 return; 1725 1726 // Get the string length of the source. 1727 SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal); 1728 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); 1729 1730 // Get the string length of the destination buffer. 1731 SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal); 1732 std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>(); 1733 1734 // If the source isn't a valid C string, give up. 1735 if (strLength.isUndef()) 1736 return; 1737 1738 SValBuilder &svalBuilder = C.getSValBuilder(); 1739 QualType cmpTy = svalBuilder.getConditionType(); 1740 QualType sizeTy = svalBuilder.getContext().getSizeType(); 1741 1742 // These two values allow checking two kinds of errors: 1743 // - actual overflows caused by a source that doesn't fit in the destination 1744 // - potential overflows caused by a bound that could exceed the destination 1745 SVal amountCopied = UnknownVal(); 1746 SVal maxLastElementIndex = UnknownVal(); 1747 const char *boundWarning = nullptr; 1748 1749 // FIXME: Why do we choose the srcExpr if the access has no size? 1750 // Note that the 3rd argument of the call would be the size parameter. 1751 SizeArgExpr SrcExprAsSizeDummy = {srcExpr.Expression, srcExpr.ArgumentIndex}; 1752 state = CheckOverlap( 1753 C, state, 1754 (IsBounded ? SizeArgExpr{CE->getArg(2), 2} : SrcExprAsSizeDummy), Dst, 1755 srcExpr); 1756 1757 if (!state) 1758 return; 1759 1760 // If the function is strncpy, strncat, etc... it is bounded. 1761 if (IsBounded) { 1762 // Get the max number of characters to copy. 1763 SizeArgExpr lenExpr = {CE->getArg(2), 2}; 1764 SVal lenVal = state->getSVal(lenExpr.Expression, LCtx); 1765 1766 // Protect against misdeclared strncpy(). 1767 lenVal = 1768 svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType()); 1769 1770 std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>(); 1771 1772 // If we know both values, we might be able to figure out how much 1773 // we're copying. 1774 if (strLengthNL && lenValNL) { 1775 switch (appendK) { 1776 case ConcatFnKind::none: 1777 case ConcatFnKind::strcat: { 1778 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong; 1779 // Check if the max number to copy is less than the length of the src. 1780 // If the bound is equal to the source length, strncpy won't null- 1781 // terminate the result! 1782 std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume( 1783 svalBuilder 1784 .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy) 1785 .castAs<DefinedOrUnknownSVal>()); 1786 1787 if (stateSourceTooLong && !stateSourceNotTooLong) { 1788 // Max number to copy is less than the length of the src, so the 1789 // actual strLength copied is the max number arg. 1790 state = stateSourceTooLong; 1791 amountCopied = lenVal; 1792 1793 } else if (!stateSourceTooLong && stateSourceNotTooLong) { 1794 // The source buffer entirely fits in the bound. 1795 state = stateSourceNotTooLong; 1796 amountCopied = strLength; 1797 } 1798 break; 1799 } 1800 case ConcatFnKind::strlcat: 1801 if (!dstStrLengthNL) 1802 return; 1803 1804 // amountCopied = min (size - dstLen - 1 , srcLen) 1805 SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, 1806 *dstStrLengthNL, sizeTy); 1807 if (!isa<NonLoc>(freeSpace)) 1808 return; 1809 freeSpace = 1810 svalBuilder.evalBinOp(state, BO_Sub, freeSpace, 1811 svalBuilder.makeIntVal(1, sizeTy), sizeTy); 1812 std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>(); 1813 1814 // While unlikely, it is possible that the subtraction is 1815 // too complex to compute, let's check whether it succeeded. 1816 if (!freeSpaceNL) 1817 return; 1818 SVal hasEnoughSpace = svalBuilder.evalBinOpNN( 1819 state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy); 1820 1821 ProgramStateRef TrueState, FalseState; 1822 std::tie(TrueState, FalseState) = 1823 state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>()); 1824 1825 // srcStrLength <= size - dstStrLength -1 1826 if (TrueState && !FalseState) { 1827 amountCopied = strLength; 1828 } 1829 1830 // srcStrLength > size - dstStrLength -1 1831 if (!TrueState && FalseState) { 1832 amountCopied = freeSpace; 1833 } 1834 1835 if (TrueState && FalseState) 1836 amountCopied = UnknownVal(); 1837 break; 1838 } 1839 } 1840 // We still want to know if the bound is known to be too large. 1841 if (lenValNL) { 1842 switch (appendK) { 1843 case ConcatFnKind::strcat: 1844 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst) 1845 1846 // Get the string length of the destination. If the destination is 1847 // memory that can't have a string length, we shouldn't be copying 1848 // into it anyway. 1849 if (dstStrLength.isUndef()) 1850 return; 1851 1852 if (dstStrLengthNL) { 1853 maxLastElementIndex = svalBuilder.evalBinOpNN( 1854 state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy); 1855 1856 boundWarning = "Size argument is greater than the free space in the " 1857 "destination buffer"; 1858 } 1859 break; 1860 case ConcatFnKind::none: 1861 case ConcatFnKind::strlcat: 1862 // For strncpy and strlcat, this is just checking 1863 // that lenVal <= sizeof(dst). 1864 // (Yes, strncpy and strncat differ in how they treat termination. 1865 // strncat ALWAYS terminates, but strncpy doesn't.) 1866 1867 // We need a special case for when the copy size is zero, in which 1868 // case strncpy will do no work at all. Our bounds check uses n-1 1869 // as the last element accessed, so n == 0 is problematic. 1870 ProgramStateRef StateZeroSize, StateNonZeroSize; 1871 std::tie(StateZeroSize, StateNonZeroSize) = 1872 assumeZero(C, state, *lenValNL, sizeTy); 1873 1874 // If the size is known to be zero, we're done. 1875 if (StateZeroSize && !StateNonZeroSize) { 1876 if (returnPtr) { 1877 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal); 1878 } else { 1879 if (appendK == ConcatFnKind::none) { 1880 // strlcpy returns strlen(src) 1881 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, strLength); 1882 } else { 1883 // strlcat returns strlen(src) + strlen(dst) 1884 SVal retSize = svalBuilder.evalBinOp( 1885 state, BO_Add, strLength, dstStrLength, sizeTy); 1886 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, retSize); 1887 } 1888 } 1889 C.addTransition(StateZeroSize); 1890 return; 1891 } 1892 1893 // Otherwise, go ahead and figure out the last element we'll touch. 1894 // We don't record the non-zero assumption here because we can't 1895 // be sure. We won't warn on a possible zero. 1896 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); 1897 maxLastElementIndex = 1898 svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy); 1899 boundWarning = "Size argument is greater than the length of the " 1900 "destination buffer"; 1901 break; 1902 } 1903 } 1904 } else { 1905 // The function isn't bounded. The amount copied should match the length 1906 // of the source buffer. 1907 amountCopied = strLength; 1908 } 1909 1910 assert(state); 1911 1912 // This represents the number of characters copied into the destination 1913 // buffer. (It may not actually be the strlen if the destination buffer 1914 // is not terminated.) 1915 SVal finalStrLength = UnknownVal(); 1916 SVal strlRetVal = UnknownVal(); 1917 1918 if (appendK == ConcatFnKind::none && !returnPtr) { 1919 // strlcpy returns the sizeof(src) 1920 strlRetVal = strLength; 1921 } 1922 1923 // If this is an appending function (strcat, strncat...) then set the 1924 // string length to strlen(src) + strlen(dst) since the buffer will 1925 // ultimately contain both. 1926 if (appendK != ConcatFnKind::none) { 1927 // Get the string length of the destination. If the destination is memory 1928 // that can't have a string length, we shouldn't be copying into it anyway. 1929 if (dstStrLength.isUndef()) 1930 return; 1931 1932 if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) { 1933 strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL, 1934 *dstStrLengthNL, sizeTy); 1935 } 1936 1937 std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>(); 1938 1939 // If we know both string lengths, we might know the final string length. 1940 if (amountCopiedNL && dstStrLengthNL) { 1941 // Make sure the two lengths together don't overflow a size_t. 1942 state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL); 1943 if (!state) 1944 return; 1945 1946 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL, 1947 *dstStrLengthNL, sizeTy); 1948 } 1949 1950 // If we couldn't get a single value for the final string length, 1951 // we can at least bound it by the individual lengths. 1952 if (finalStrLength.isUnknown()) { 1953 // Try to get a "hypothetical" string length symbol, which we can later 1954 // set as a real value if that turns out to be the case. 1955 finalStrLength = getCStringLength(C, state, CE, DstVal, true); 1956 assert(!finalStrLength.isUndef()); 1957 1958 if (std::optional<NonLoc> finalStrLengthNL = 1959 finalStrLength.getAs<NonLoc>()) { 1960 if (amountCopiedNL && appendK == ConcatFnKind::none) { 1961 // we overwrite dst string with the src 1962 // finalStrLength >= srcStrLength 1963 SVal sourceInResult = svalBuilder.evalBinOpNN( 1964 state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy); 1965 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(), 1966 true); 1967 if (!state) 1968 return; 1969 } 1970 1971 if (dstStrLengthNL && appendK != ConcatFnKind::none) { 1972 // we extend the dst string with the src 1973 // finalStrLength >= dstStrLength 1974 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1975 *finalStrLengthNL, 1976 *dstStrLengthNL, 1977 cmpTy); 1978 state = 1979 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true); 1980 if (!state) 1981 return; 1982 } 1983 } 1984 } 1985 1986 } else { 1987 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and 1988 // the final string length will match the input string length. 1989 finalStrLength = amountCopied; 1990 } 1991 1992 SVal Result; 1993 1994 if (returnPtr) { 1995 // The final result of the function will either be a pointer past the last 1996 // copied element, or a pointer to the start of the destination buffer. 1997 Result = (ReturnEnd ? UnknownVal() : DstVal); 1998 } else { 1999 if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none) 2000 //strlcpy, strlcat 2001 Result = strlRetVal; 2002 else 2003 Result = finalStrLength; 2004 } 2005 2006 assert(state); 2007 2008 // If the destination is a MemRegion, try to check for a buffer overflow and 2009 // record the new string length. 2010 if (std::optional<loc::MemRegionVal> dstRegVal = 2011 DstVal.getAs<loc::MemRegionVal>()) { 2012 QualType ptrTy = Dst.Expression->getType(); 2013 2014 // If we have an exact value on a bounded copy, use that to check for 2015 // overflows, rather than our estimate about how much is actually copied. 2016 if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) { 2017 SVal maxLastElement = 2018 svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy); 2019 2020 state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write); 2021 if (!state) 2022 return; 2023 } 2024 2025 // Then, if the final length is known... 2026 if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) { 2027 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 2028 *knownStrLength, ptrTy); 2029 2030 // ...and we haven't checked the bound, we'll check the actual copy. 2031 if (!boundWarning) { 2032 state = CheckLocation(C, state, Dst, lastElement, AccessKind::write); 2033 if (!state) 2034 return; 2035 } 2036 2037 // If this is a stpcpy-style copy, the last element is the return value. 2038 if (returnPtr && ReturnEnd) 2039 Result = lastElement; 2040 } 2041 2042 // Invalidate the destination (regular invalidation without pointer-escaping 2043 // the address of the top-level region). This must happen before we set the 2044 // C string length because invalidation will clear the length. 2045 // FIXME: Even if we can't perfectly model the copy, we should see if we 2046 // can use LazyCompoundVals to copy the source values into the destination. 2047 // This would probably remove any existing bindings past the end of the 2048 // string, but that's still an improvement over blank invalidation. 2049 state = invalidateDestinationBufferBySize(C, state, Dst.Expression, 2050 *dstRegVal, amountCopied, 2051 C.getASTContext().getSizeType()); 2052 2053 // Invalidate the source (const-invalidation without const-pointer-escaping 2054 // the address of the top-level region). 2055 state = invalidateSourceBuffer(C, state, srcExpr.Expression, srcVal); 2056 2057 // Set the C string length of the destination, if we know it. 2058 if (IsBounded && (appendK == ConcatFnKind::none)) { 2059 // strncpy is annoying in that it doesn't guarantee to null-terminate 2060 // the result string. If the original string didn't fit entirely inside 2061 // the bound (including the null-terminator), we don't know how long the 2062 // result is. 2063 if (amountCopied != strLength) 2064 finalStrLength = UnknownVal(); 2065 } 2066 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); 2067 } 2068 2069 assert(state); 2070 2071 if (returnPtr) { 2072 // If this is a stpcpy-style copy, but we were unable to check for a buffer 2073 // overflow, we still need a result. Conjure a return value. 2074 if (ReturnEnd && Result.isUnknown()) { 2075 Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 2076 } 2077 } 2078 // Set the return value. 2079 state = state->BindExpr(CE, LCtx, Result); 2080 C.addTransition(state); 2081 } 2082 2083 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { 2084 //int strcmp(const char *s1, const char *s2); 2085 evalStrcmpCommon(C, CE, /* IsBounded = */ false, /* IgnoreCase = */ false); 2086 } 2087 2088 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { 2089 //int strncmp(const char *s1, const char *s2, size_t n); 2090 evalStrcmpCommon(C, CE, /* IsBounded = */ true, /* IgnoreCase = */ false); 2091 } 2092 2093 void CStringChecker::evalStrcasecmp(CheckerContext &C, 2094 const CallExpr *CE) const { 2095 //int strcasecmp(const char *s1, const char *s2); 2096 evalStrcmpCommon(C, CE, /* IsBounded = */ false, /* IgnoreCase = */ true); 2097 } 2098 2099 void CStringChecker::evalStrncasecmp(CheckerContext &C, 2100 const CallExpr *CE) const { 2101 //int strncasecmp(const char *s1, const char *s2, size_t n); 2102 evalStrcmpCommon(C, CE, /* IsBounded = */ true, /* IgnoreCase = */ true); 2103 } 2104 2105 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, 2106 bool IsBounded, bool IgnoreCase) const { 2107 CurrentFunctionDescription = "string comparison function"; 2108 ProgramStateRef state = C.getState(); 2109 const LocationContext *LCtx = C.getLocationContext(); 2110 2111 // Check that the first string is non-null 2112 AnyArgExpr Left = {CE->getArg(0), 0}; 2113 SVal LeftVal = state->getSVal(Left.Expression, LCtx); 2114 state = checkNonNull(C, state, Left, LeftVal); 2115 if (!state) 2116 return; 2117 2118 // Check that the second string is non-null. 2119 AnyArgExpr Right = {CE->getArg(1), 1}; 2120 SVal RightVal = state->getSVal(Right.Expression, LCtx); 2121 state = checkNonNull(C, state, Right, RightVal); 2122 if (!state) 2123 return; 2124 2125 // Get the string length of the first string or give up. 2126 SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal); 2127 if (LeftLength.isUndef()) 2128 return; 2129 2130 // Get the string length of the second string or give up. 2131 SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal); 2132 if (RightLength.isUndef()) 2133 return; 2134 2135 // If we know the two buffers are the same, we know the result is 0. 2136 // First, get the two buffers' addresses. Another checker will have already 2137 // made sure they're not undefined. 2138 DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>(); 2139 DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>(); 2140 2141 // See if they are the same. 2142 SValBuilder &svalBuilder = C.getSValBuilder(); 2143 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 2144 ProgramStateRef StSameBuf, StNotSameBuf; 2145 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 2146 2147 // If the two arguments might be the same buffer, we know the result is 0, 2148 // and we only need to check one size. 2149 if (StSameBuf) { 2150 StSameBuf = StSameBuf->BindExpr(CE, LCtx, 2151 svalBuilder.makeZeroVal(CE->getType())); 2152 C.addTransition(StSameBuf); 2153 2154 // If the two arguments are GUARANTEED to be the same, we're done! 2155 if (!StNotSameBuf) 2156 return; 2157 } 2158 2159 assert(StNotSameBuf); 2160 state = StNotSameBuf; 2161 2162 // At this point we can go about comparing the two buffers. 2163 // For now, we only do this if they're both known string literals. 2164 2165 // Attempt to extract string literals from both expressions. 2166 const StringLiteral *LeftStrLiteral = 2167 getCStringLiteral(C, state, Left.Expression, LeftVal); 2168 const StringLiteral *RightStrLiteral = 2169 getCStringLiteral(C, state, Right.Expression, RightVal); 2170 bool canComputeResult = false; 2171 SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, 2172 C.blockCount()); 2173 2174 if (LeftStrLiteral && RightStrLiteral) { 2175 StringRef LeftStrRef = LeftStrLiteral->getString(); 2176 StringRef RightStrRef = RightStrLiteral->getString(); 2177 2178 if (IsBounded) { 2179 // Get the max number of characters to compare. 2180 const Expr *lenExpr = CE->getArg(2); 2181 SVal lenVal = state->getSVal(lenExpr, LCtx); 2182 2183 // If the length is known, we can get the right substrings. 2184 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) { 2185 // Create substrings of each to compare the prefix. 2186 LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue()); 2187 RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue()); 2188 canComputeResult = true; 2189 } 2190 } else { 2191 // This is a normal, unbounded strcmp. 2192 canComputeResult = true; 2193 } 2194 2195 if (canComputeResult) { 2196 // Real strcmp stops at null characters. 2197 size_t s1Term = LeftStrRef.find('\0'); 2198 if (s1Term != StringRef::npos) 2199 LeftStrRef = LeftStrRef.substr(0, s1Term); 2200 2201 size_t s2Term = RightStrRef.find('\0'); 2202 if (s2Term != StringRef::npos) 2203 RightStrRef = RightStrRef.substr(0, s2Term); 2204 2205 // Use StringRef's comparison methods to compute the actual result. 2206 int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef) 2207 : LeftStrRef.compare(RightStrRef); 2208 2209 // The strcmp function returns an integer greater than, equal to, or less 2210 // than zero, [c11, p7.24.4.2]. 2211 if (compareRes == 0) { 2212 resultVal = svalBuilder.makeIntVal(compareRes, CE->getType()); 2213 } 2214 else { 2215 DefinedSVal zeroVal = svalBuilder.makeIntVal(0, CE->getType()); 2216 // Constrain strcmp's result range based on the result of StringRef's 2217 // comparison methods. 2218 BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT; 2219 SVal compareWithZero = 2220 svalBuilder.evalBinOp(state, op, resultVal, zeroVal, 2221 svalBuilder.getConditionType()); 2222 DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>(); 2223 state = state->assume(compareWithZeroVal, true); 2224 } 2225 } 2226 } 2227 2228 state = state->BindExpr(CE, LCtx, resultVal); 2229 2230 // Record this as a possible path. 2231 C.addTransition(state); 2232 } 2233 2234 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const { 2235 // char *strsep(char **stringp, const char *delim); 2236 // Verify whether the search string parameter matches the return type. 2237 SourceArgExpr SearchStrPtr = {CE->getArg(0), 0}; 2238 2239 QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType(); 2240 if (CharPtrTy.isNull() || 2241 CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType()) 2242 return; 2243 2244 CurrentFunctionDescription = "strsep()"; 2245 ProgramStateRef State = C.getState(); 2246 const LocationContext *LCtx = C.getLocationContext(); 2247 2248 // Check that the search string pointer is non-null (though it may point to 2249 // a null string). 2250 SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx); 2251 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal); 2252 if (!State) 2253 return; 2254 2255 // Check that the delimiter string is non-null. 2256 AnyArgExpr DelimStr = {CE->getArg(1), 1}; 2257 SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx); 2258 State = checkNonNull(C, State, DelimStr, DelimStrVal); 2259 if (!State) 2260 return; 2261 2262 SValBuilder &SVB = C.getSValBuilder(); 2263 SVal Result; 2264 if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) { 2265 // Get the current value of the search string pointer, as a char*. 2266 Result = State->getSVal(*SearchStrLoc, CharPtrTy); 2267 2268 // Invalidate the search string, representing the change of one delimiter 2269 // character to NUL. 2270 // As the replacement never overflows, do not invalidate its super region. 2271 State = invalidateDestinationBufferNeverOverflows( 2272 C, State, SearchStrPtr.Expression, Result); 2273 2274 // Overwrite the search string pointer. The new value is either an address 2275 // further along in the same string, or NULL if there are no more tokens. 2276 State = State->bindLoc(*SearchStrLoc, 2277 SVB.conjureSymbolVal(getTag(), 2278 CE, 2279 LCtx, 2280 CharPtrTy, 2281 C.blockCount()), 2282 LCtx); 2283 } else { 2284 assert(SearchStrVal.isUnknown()); 2285 // Conjure a symbolic value. It's the best we can do. 2286 Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 2287 } 2288 2289 // Set the return value, and finish. 2290 State = State->BindExpr(CE, LCtx, Result); 2291 C.addTransition(State); 2292 } 2293 2294 // These should probably be moved into a C++ standard library checker. 2295 void CStringChecker::evalStdCopy(CheckerContext &C, const CallExpr *CE) const { 2296 evalStdCopyCommon(C, CE); 2297 } 2298 2299 void CStringChecker::evalStdCopyBackward(CheckerContext &C, 2300 const CallExpr *CE) const { 2301 evalStdCopyCommon(C, CE); 2302 } 2303 2304 void CStringChecker::evalStdCopyCommon(CheckerContext &C, 2305 const CallExpr *CE) const { 2306 if (!CE->getArg(2)->getType()->isPointerType()) 2307 return; 2308 2309 ProgramStateRef State = C.getState(); 2310 2311 const LocationContext *LCtx = C.getLocationContext(); 2312 2313 // template <class _InputIterator, class _OutputIterator> 2314 // _OutputIterator 2315 // copy(_InputIterator __first, _InputIterator __last, 2316 // _OutputIterator __result) 2317 2318 // Invalidate the destination buffer 2319 const Expr *Dst = CE->getArg(2); 2320 SVal DstVal = State->getSVal(Dst, LCtx); 2321 // FIXME: As we do not know how many items are copied, we also invalidate the 2322 // super region containing the target location. 2323 State = 2324 invalidateDestinationBufferAlwaysEscapeSuperRegion(C, State, Dst, DstVal); 2325 2326 SValBuilder &SVB = C.getSValBuilder(); 2327 2328 SVal ResultVal = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 2329 State = State->BindExpr(CE, LCtx, ResultVal); 2330 2331 C.addTransition(State); 2332 } 2333 2334 void CStringChecker::evalMemset(CheckerContext &C, const CallExpr *CE) const { 2335 // void *memset(void *s, int c, size_t n); 2336 CurrentFunctionDescription = "memory set function"; 2337 2338 DestinationArgExpr Buffer = {CE->getArg(0), 0}; 2339 AnyArgExpr CharE = {CE->getArg(1), 1}; 2340 SizeArgExpr Size = {CE->getArg(2), 2}; 2341 2342 ProgramStateRef State = C.getState(); 2343 2344 // See if the size argument is zero. 2345 const LocationContext *LCtx = C.getLocationContext(); 2346 SVal SizeVal = C.getSVal(Size.Expression); 2347 QualType SizeTy = Size.Expression->getType(); 2348 2349 ProgramStateRef ZeroSize, NonZeroSize; 2350 std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy); 2351 2352 // Get the value of the memory area. 2353 SVal BufferPtrVal = C.getSVal(Buffer.Expression); 2354 2355 // If the size is zero, there won't be any actual memory access, so 2356 // just bind the return value to the buffer and return. 2357 if (ZeroSize && !NonZeroSize) { 2358 ZeroSize = ZeroSize->BindExpr(CE, LCtx, BufferPtrVal); 2359 C.addTransition(ZeroSize); 2360 return; 2361 } 2362 2363 // Ensure the memory area is not null. 2364 // If it is NULL there will be a NULL pointer dereference. 2365 State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal); 2366 if (!State) 2367 return; 2368 2369 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write); 2370 if (!State) 2371 return; 2372 2373 // According to the values of the arguments, bind the value of the second 2374 // argument to the destination buffer and set string length, or just 2375 // invalidate the destination buffer. 2376 if (!memsetAux(Buffer.Expression, C.getSVal(CharE.Expression), 2377 Size.Expression, C, State)) 2378 return; 2379 2380 State = State->BindExpr(CE, LCtx, BufferPtrVal); 2381 C.addTransition(State); 2382 } 2383 2384 void CStringChecker::evalBzero(CheckerContext &C, const CallExpr *CE) const { 2385 CurrentFunctionDescription = "memory clearance function"; 2386 2387 DestinationArgExpr Buffer = {CE->getArg(0), 0}; 2388 SizeArgExpr Size = {CE->getArg(1), 1}; 2389 SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy); 2390 2391 ProgramStateRef State = C.getState(); 2392 2393 // See if the size argument is zero. 2394 SVal SizeVal = C.getSVal(Size.Expression); 2395 QualType SizeTy = Size.Expression->getType(); 2396 2397 ProgramStateRef StateZeroSize, StateNonZeroSize; 2398 std::tie(StateZeroSize, StateNonZeroSize) = 2399 assumeZero(C, State, SizeVal, SizeTy); 2400 2401 // If the size is zero, there won't be any actual memory access, 2402 // In this case we just return. 2403 if (StateZeroSize && !StateNonZeroSize) { 2404 C.addTransition(StateZeroSize); 2405 return; 2406 } 2407 2408 // Get the value of the memory area. 2409 SVal MemVal = C.getSVal(Buffer.Expression); 2410 2411 // Ensure the memory area is not null. 2412 // If it is NULL there will be a NULL pointer dereference. 2413 State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal); 2414 if (!State) 2415 return; 2416 2417 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write); 2418 if (!State) 2419 return; 2420 2421 if (!memsetAux(Buffer.Expression, Zero, Size.Expression, C, State)) 2422 return; 2423 2424 C.addTransition(State); 2425 } 2426 2427 void CStringChecker::evalSprintf(CheckerContext &C, const CallExpr *CE) const { 2428 CurrentFunctionDescription = "'sprintf'"; 2429 bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___sprintf_chk; 2430 evalSprintfCommon(C, CE, /* IsBounded */ false, IsBI); 2431 } 2432 2433 void CStringChecker::evalSnprintf(CheckerContext &C, const CallExpr *CE) const { 2434 CurrentFunctionDescription = "'snprintf'"; 2435 bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___snprintf_chk; 2436 evalSprintfCommon(C, CE, /* IsBounded */ true, IsBI); 2437 } 2438 2439 void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallExpr *CE, 2440 bool IsBounded, bool IsBuiltin) const { 2441 ProgramStateRef State = C.getState(); 2442 DestinationArgExpr Dest = {CE->getArg(0), 0}; 2443 2444 const auto NumParams = CE->getCalleeDecl()->getAsFunction()->getNumParams(); 2445 assert(CE->getNumArgs() >= NumParams); 2446 2447 const auto AllArguments = 2448 llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs()); 2449 const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams); 2450 2451 for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) { 2452 // We consider only string buffers 2453 if (const QualType type = ArgExpr->getType(); 2454 !type->isAnyPointerType() || 2455 !type->getPointeeType()->isAnyCharacterType()) 2456 continue; 2457 SourceArgExpr Source = {ArgExpr, unsigned(ArgIdx)}; 2458 2459 // Ensure the buffers do not overlap. 2460 SizeArgExpr SrcExprAsSizeDummy = {Source.Expression, Source.ArgumentIndex}; 2461 State = CheckOverlap( 2462 C, State, 2463 (IsBounded ? SizeArgExpr{CE->getArg(1), 1} : SrcExprAsSizeDummy), Dest, 2464 Source); 2465 if (!State) 2466 return; 2467 } 2468 2469 C.addTransition(State); 2470 } 2471 2472 //===----------------------------------------------------------------------===// 2473 // The driver method, and other Checker callbacks. 2474 //===----------------------------------------------------------------------===// 2475 2476 CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call, 2477 CheckerContext &C) const { 2478 const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr()); 2479 if (!CE) 2480 return nullptr; 2481 2482 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); 2483 if (!FD) 2484 return nullptr; 2485 2486 if (StdCopy.matches(Call)) 2487 return &CStringChecker::evalStdCopy; 2488 if (StdCopyBackward.matches(Call)) 2489 return &CStringChecker::evalStdCopyBackward; 2490 2491 // Pro-actively check that argument types are safe to do arithmetic upon. 2492 // We do not want to crash if someone accidentally passes a structure 2493 // into, say, a C++ overload of any of these functions. We could not check 2494 // that for std::copy because they may have arguments of other types. 2495 for (auto I : CE->arguments()) { 2496 QualType T = I->getType(); 2497 if (!T->isIntegralOrEnumerationType() && !T->isPointerType()) 2498 return nullptr; 2499 } 2500 2501 const FnCheck *Callback = Callbacks.lookup(Call); 2502 if (Callback) 2503 return *Callback; 2504 2505 return nullptr; 2506 } 2507 2508 bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const { 2509 FnCheck Callback = identifyCall(Call, C); 2510 2511 // If the callee isn't a string function, let another checker handle it. 2512 if (!Callback) 2513 return false; 2514 2515 // Check and evaluate the call. 2516 const auto *CE = cast<CallExpr>(Call.getOriginExpr()); 2517 Callback(this, C, CE); 2518 2519 // If the evaluate call resulted in no change, chain to the next eval call 2520 // handler. 2521 // Note, the custom CString evaluation calls assume that basic safety 2522 // properties are held. However, if the user chooses to turn off some of these 2523 // checks, we ignore the issues and leave the call evaluation to a generic 2524 // handler. 2525 return C.isDifferent(); 2526 } 2527 2528 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { 2529 // Record string length for char a[] = "abc"; 2530 ProgramStateRef state = C.getState(); 2531 2532 for (const auto *I : DS->decls()) { 2533 const VarDecl *D = dyn_cast<VarDecl>(I); 2534 if (!D) 2535 continue; 2536 2537 // FIXME: Handle array fields of structs. 2538 if (!D->getType()->isArrayType()) 2539 continue; 2540 2541 const Expr *Init = D->getInit(); 2542 if (!Init) 2543 continue; 2544 if (!isa<StringLiteral>(Init)) 2545 continue; 2546 2547 Loc VarLoc = state->getLValue(D, C.getLocationContext()); 2548 const MemRegion *MR = VarLoc.getAsRegion(); 2549 if (!MR) 2550 continue; 2551 2552 SVal StrVal = C.getSVal(Init); 2553 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 2554 DefinedOrUnknownSVal strLength = 2555 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>(); 2556 2557 state = state->set<CStringLength>(MR, strLength); 2558 } 2559 2560 C.addTransition(state); 2561 } 2562 2563 ProgramStateRef 2564 CStringChecker::checkRegionChanges(ProgramStateRef state, 2565 const InvalidatedSymbols *, 2566 ArrayRef<const MemRegion *> ExplicitRegions, 2567 ArrayRef<const MemRegion *> Regions, 2568 const LocationContext *LCtx, 2569 const CallEvent *Call) const { 2570 CStringLengthTy Entries = state->get<CStringLength>(); 2571 if (Entries.isEmpty()) 2572 return state; 2573 2574 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 2575 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 2576 2577 // First build sets for the changed regions and their super-regions. 2578 for (ArrayRef<const MemRegion *>::iterator 2579 I = Regions.begin(), E = Regions.end(); I != E; ++I) { 2580 const MemRegion *MR = *I; 2581 Invalidated.insert(MR); 2582 2583 SuperRegions.insert(MR); 2584 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 2585 MR = SR->getSuperRegion(); 2586 SuperRegions.insert(MR); 2587 } 2588 } 2589 2590 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 2591 2592 // Then loop over the entries in the current state. 2593 for (CStringLengthTy::iterator I = Entries.begin(), 2594 E = Entries.end(); I != E; ++I) { 2595 const MemRegion *MR = I.getKey(); 2596 2597 // Is this entry for a super-region of a changed region? 2598 if (SuperRegions.count(MR)) { 2599 Entries = F.remove(Entries, MR); 2600 continue; 2601 } 2602 2603 // Is this entry for a sub-region of a changed region? 2604 const MemRegion *Super = MR; 2605 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 2606 Super = SR->getSuperRegion(); 2607 if (Invalidated.count(Super)) { 2608 Entries = F.remove(Entries, MR); 2609 break; 2610 } 2611 } 2612 } 2613 2614 return state->set<CStringLength>(Entries); 2615 } 2616 2617 void CStringChecker::checkLiveSymbols(ProgramStateRef state, 2618 SymbolReaper &SR) const { 2619 // Mark all symbols in our string length map as valid. 2620 CStringLengthTy Entries = state->get<CStringLength>(); 2621 2622 for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); 2623 I != E; ++I) { 2624 SVal Len = I.getData(); 2625 2626 for (SymExpr::symbol_iterator si = Len.symbol_begin(), 2627 se = Len.symbol_end(); si != se; ++si) 2628 SR.markInUse(*si); 2629 } 2630 } 2631 2632 void CStringChecker::checkDeadSymbols(SymbolReaper &SR, 2633 CheckerContext &C) const { 2634 ProgramStateRef state = C.getState(); 2635 CStringLengthTy Entries = state->get<CStringLength>(); 2636 if (Entries.isEmpty()) 2637 return; 2638 2639 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 2640 for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); 2641 I != E; ++I) { 2642 SVal Len = I.getData(); 2643 if (SymbolRef Sym = Len.getAsSymbol()) { 2644 if (SR.isDead(Sym)) 2645 Entries = F.remove(Entries, I.getKey()); 2646 } 2647 } 2648 2649 state = state->set<CStringLength>(Entries); 2650 C.addTransition(state); 2651 } 2652 2653 void ento::registerCStringModeling(CheckerManager &Mgr) { 2654 Mgr.registerChecker<CStringChecker>(); 2655 } 2656 2657 bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) { 2658 return true; 2659 } 2660 2661 #define REGISTER_CHECKER(name) \ 2662 void ento::register##name(CheckerManager &mgr) { \ 2663 CStringChecker *checker = mgr.getChecker<CStringChecker>(); \ 2664 checker->Filter.Check##name = true; \ 2665 checker->Filter.CheckName##name = mgr.getCurrentCheckerName(); \ 2666 } \ 2667 \ 2668 bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; } 2669 2670 REGISTER_CHECKER(CStringNullArg) 2671 REGISTER_CHECKER(CStringOutOfBounds) 2672 REGISTER_CHECKER(CStringBufferOverlap) 2673 REGISTER_CHECKER(CStringNotNullTerm) 2674 REGISTER_CHECKER(CStringUninitializedRead) 2675