1 //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines ArrayBoundCheckerV2, which is a path-sensitive check 10 // which looks for an out-of-bound array element access. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/AST/CharUnits.h" 15 #include "clang/AST/ParentMapContext.h" 16 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 17 #include "clang/StaticAnalyzer/Checkers/Taint.h" 18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 19 #include "clang/StaticAnalyzer/Core/Checker.h" 20 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" 24 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/Support/FormatVariadic.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <optional> 29 30 using namespace clang; 31 using namespace ento; 32 using namespace taint; 33 using llvm::formatv; 34 35 namespace { 36 /// If `E` is a "clean" array subscript expression, return the type of the 37 /// accessed element. If the base of the subscript expression is modified by 38 /// pointer arithmetic (and not the beginning of a "full" memory region), this 39 /// always returns nullopt because that's the right (or the least bad) thing to 40 /// do for the diagnostic output that's relying on this. 41 static std::optional<QualType> determineElementType(const Expr *E, 42 const CheckerContext &C) { 43 const auto *ASE = dyn_cast<ArraySubscriptExpr>(E); 44 if (!ASE) 45 return std::nullopt; 46 47 const MemRegion *SubscriptBaseReg = C.getSVal(ASE->getBase()).getAsRegion(); 48 if (!SubscriptBaseReg) 49 return std::nullopt; 50 51 // The base of the subscript expression is affected by pointer arithmetics, 52 // so we want to report byte offsets instead of indices. 53 if (isa<ElementRegion>(SubscriptBaseReg->StripCasts())) 54 return std::nullopt; 55 56 return ASE->getType(); 57 } 58 59 static std::optional<int64_t> 60 determineElementSize(const std::optional<QualType> T, const CheckerContext &C) { 61 if (!T) 62 return std::nullopt; 63 return C.getASTContext().getTypeSizeInChars(*T).getQuantity(); 64 } 65 66 class StateUpdateReporter { 67 const SubRegion *Reg; 68 const NonLoc ByteOffsetVal; 69 const std::optional<QualType> ElementType; 70 const std::optional<int64_t> ElementSize; 71 bool AssumedNonNegative = false; 72 std::optional<NonLoc> AssumedUpperBound = std::nullopt; 73 74 public: 75 StateUpdateReporter(const SubRegion *R, NonLoc ByteOffsVal, const Expr *E, 76 CheckerContext &C) 77 : Reg(R), ByteOffsetVal(ByteOffsVal), 78 ElementType(determineElementType(E, C)), 79 ElementSize(determineElementSize(ElementType, C)) {} 80 81 void recordNonNegativeAssumption() { AssumedNonNegative = true; } 82 void recordUpperBoundAssumption(NonLoc UpperBoundVal) { 83 AssumedUpperBound = UpperBoundVal; 84 } 85 86 const NoteTag *createNoteTag(CheckerContext &C) const; 87 88 private: 89 std::string getMessage(PathSensitiveBugReport &BR) const; 90 91 /// Return true if information about the value of `Sym` can put constraints 92 /// on some symbol which is interesting within the bug report `BR`. 93 /// In particular, this returns true when `Sym` is interesting within `BR`; 94 /// but it also returns true if `Sym` is an expression that contains integer 95 /// constants and a single symbolic operand which is interesting (in `BR`). 96 /// We need to use this instead of plain `BR.isInteresting()` because if we 97 /// are analyzing code like 98 /// int array[10]; 99 /// int f(int arg) { 100 /// return array[arg] && array[arg + 10]; 101 /// } 102 /// then the byte offsets are `arg * 4` and `(arg + 10) * 4`, which are not 103 /// sub-expressions of each other (but `getSimplifiedOffsets` is smart enough 104 /// to detect this out of bounds access). 105 static bool providesInformationAboutInteresting(SymbolRef Sym, 106 PathSensitiveBugReport &BR); 107 static bool providesInformationAboutInteresting(SVal SV, 108 PathSensitiveBugReport &BR) { 109 return providesInformationAboutInteresting(SV.getAsSymbol(), BR); 110 } 111 }; 112 113 struct Messages { 114 std::string Short, Full; 115 }; 116 117 // NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt` 118 // instead of `PreStmt` because the current implementation passes the whole 119 // expression to `CheckerContext::getSVal()` which only works after the 120 // symbolic evaluation of the expression. (To turn them into `PreStmt` 121 // callbacks, we'd need to duplicate the logic that evaluates these 122 // expressions.) The `MemberExpr` callback would work as `PreStmt` but it's 123 // defined as `PostStmt` for the sake of consistency with the other callbacks. 124 class ArrayBoundCheckerV2 : public Checker<check::PostStmt<ArraySubscriptExpr>, 125 check::PostStmt<UnaryOperator>, 126 check::PostStmt<MemberExpr>> { 127 BugType BT{this, "Out-of-bound access"}; 128 BugType TaintBT{this, "Out-of-bound access", categories::TaintedData}; 129 130 void performCheck(const Expr *E, CheckerContext &C) const; 131 132 void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, Messages Msgs, 133 NonLoc Offset, std::optional<NonLoc> Extent, 134 bool IsTaintBug = false) const; 135 136 static void markPartsInteresting(PathSensitiveBugReport &BR, 137 ProgramStateRef ErrorState, NonLoc Val, 138 bool MarkTaint); 139 140 static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC); 141 142 static bool isIdiomaticPastTheEndPtr(const Expr *E, ProgramStateRef State, 143 NonLoc Offset, NonLoc Limit, 144 CheckerContext &C); 145 static bool isInAddressOf(const Stmt *S, ASTContext &AC); 146 147 public: 148 void checkPostStmt(const ArraySubscriptExpr *E, CheckerContext &C) const { 149 performCheck(E, C); 150 } 151 void checkPostStmt(const UnaryOperator *E, CheckerContext &C) const { 152 if (E->getOpcode() == UO_Deref) 153 performCheck(E, C); 154 } 155 void checkPostStmt(const MemberExpr *E, CheckerContext &C) const { 156 if (E->isArrow()) 157 performCheck(E->getBase(), C); 158 } 159 }; 160 161 } // anonymous namespace 162 163 /// For a given Location that can be represented as a symbolic expression 164 /// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block 165 /// Arr and the distance of Location from the beginning of Arr (expressed in a 166 /// NonLoc that specifies the number of CharUnits). Returns nullopt when these 167 /// cannot be determined. 168 static std::optional<std::pair<const SubRegion *, NonLoc>> 169 computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) { 170 QualType T = SVB.getArrayIndexType(); 171 auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) { 172 // We will use this utility to add and multiply values. 173 return SVB.evalBinOpNN(State, Op, L, R, T).getAs<NonLoc>(); 174 }; 175 176 const SubRegion *OwnerRegion = nullptr; 177 std::optional<NonLoc> Offset = SVB.makeZeroArrayIndex(); 178 179 const ElementRegion *CurRegion = 180 dyn_cast_or_null<ElementRegion>(Location.getAsRegion()); 181 182 while (CurRegion) { 183 const auto Index = CurRegion->getIndex().getAs<NonLoc>(); 184 if (!Index) 185 return std::nullopt; 186 187 QualType ElemType = CurRegion->getElementType(); 188 189 // FIXME: The following early return was presumably added to safeguard the 190 // getTypeSizeInChars() call (which doesn't accept an incomplete type), but 191 // it seems that `ElemType` cannot be incomplete at this point. 192 if (ElemType->isIncompleteType()) 193 return std::nullopt; 194 195 // Calculate Delta = Index * sizeof(ElemType). 196 NonLoc Size = SVB.makeArrayIndex( 197 SVB.getContext().getTypeSizeInChars(ElemType).getQuantity()); 198 auto Delta = EvalBinOp(BO_Mul, *Index, Size); 199 if (!Delta) 200 return std::nullopt; 201 202 // Perform Offset += Delta. 203 Offset = EvalBinOp(BO_Add, *Offset, *Delta); 204 if (!Offset) 205 return std::nullopt; 206 207 OwnerRegion = CurRegion->getSuperRegion()->getAs<SubRegion>(); 208 // When this is just another ElementRegion layer, we need to continue the 209 // offset calculations: 210 CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion); 211 } 212 213 if (OwnerRegion) 214 return std::make_pair(OwnerRegion, *Offset); 215 216 return std::nullopt; 217 } 218 219 // NOTE: This function is the "heart" of this checker. It simplifies 220 // inequalities with transformations that are valid (and very elementary) in 221 // pure mathematics, but become invalid if we use them in C++ number model 222 // where the calculations may overflow. 223 // Due to the overflow issues I think it's impossible (or at least not 224 // practical) to integrate this kind of simplification into the resolution of 225 // arbitrary inequalities (i.e. the code of `evalBinOp`); but this function 226 // produces valid results when the calculations are handling memory offsets 227 // and every value is well below SIZE_MAX. 228 // TODO: This algorithm should be moved to a central location where it's 229 // available for other checkers that need to compare memory offsets. 230 // NOTE: the simplification preserves the order of the two operands in a 231 // mathematical sense, but it may change the result produced by a C++ 232 // comparison operator (and the automatic type conversions). 233 // For example, consider a comparison "X+1 < 0", where the LHS is stored as a 234 // size_t and the RHS is stored in an int. (As size_t is unsigned, this 235 // comparison is false for all values of "X".) However, the simplification may 236 // turn it into "X < -1", which is still always false in a mathematical sense, 237 // but can produce a true result when evaluated by `evalBinOp` (which follows 238 // the rules of C++ and casts -1 to SIZE_MAX). 239 static std::pair<NonLoc, nonloc::ConcreteInt> 240 getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, 241 SValBuilder &svalBuilder) { 242 std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>(); 243 if (SymVal && SymVal->isExpression()) { 244 if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) { 245 llvm::APSInt constant = 246 APSIntType(extent.getValue()).convert(SIE->getRHS()); 247 switch (SIE->getOpcode()) { 248 case BO_Mul: 249 // The constant should never be 0 here, becasue multiplication by zero 250 // is simplified by the engine. 251 if ((extent.getValue() % constant) != 0) 252 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); 253 else 254 return getSimplifiedOffsets( 255 nonloc::SymbolVal(SIE->getLHS()), 256 svalBuilder.makeIntVal(extent.getValue() / constant), 257 svalBuilder); 258 case BO_Add: 259 return getSimplifiedOffsets( 260 nonloc::SymbolVal(SIE->getLHS()), 261 svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder); 262 default: 263 break; 264 } 265 } 266 } 267 268 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); 269 } 270 271 // Evaluate the comparison Value < Threshold with the help of the custom 272 // simplification algorithm defined for this checker. Return a pair of states, 273 // where the first one corresponds to "value below threshold" and the second 274 // corresponds to "value at or above threshold". Returns {nullptr, nullptr} in 275 // the case when the evaluation fails. 276 // If the optional argument CheckEquality is true, then use BO_EQ instead of 277 // the default BO_LT after consistently applying the same simplification steps. 278 static std::pair<ProgramStateRef, ProgramStateRef> 279 compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, 280 SValBuilder &SVB, bool CheckEquality = false) { 281 if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) { 282 std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB); 283 } 284 if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) { 285 QualType T = Value.getType(SVB.getContext()); 286 if (T->isUnsignedIntegerType() && ConcreteThreshold->getValue().isNegative()) { 287 // In this case we reduced the bound check to a comparison of the form 288 // (symbol or value with unsigned type) < (negative number) 289 // which is always false. We are handling these cases separately because 290 // evalBinOpNN can perform a signed->unsigned conversion that turns the 291 // negative number into a huge positive value and leads to wildly 292 // inaccurate conclusions. 293 return {nullptr, State}; 294 } 295 } 296 const BinaryOperatorKind OpKind = CheckEquality ? BO_EQ : BO_LT; 297 auto BelowThreshold = 298 SVB.evalBinOpNN(State, OpKind, Value, Threshold, SVB.getConditionType()) 299 .getAs<NonLoc>(); 300 301 if (BelowThreshold) 302 return State->assume(*BelowThreshold); 303 304 return {nullptr, nullptr}; 305 } 306 307 static std::string getRegionName(const SubRegion *Region) { 308 if (std::string RegName = Region->getDescriptiveName(); !RegName.empty()) 309 return RegName; 310 311 // Field regions only have descriptive names when their parent has a 312 // descriptive name; so we provide a fallback representation for them: 313 if (const auto *FR = Region->getAs<FieldRegion>()) { 314 if (StringRef Name = FR->getDecl()->getName(); !Name.empty()) 315 return formatv("the field '{0}'", Name); 316 return "the unnamed field"; 317 } 318 319 if (isa<AllocaRegion>(Region)) 320 return "the memory returned by 'alloca'"; 321 322 if (isa<SymbolicRegion>(Region) && 323 isa<HeapSpaceRegion>(Region->getMemorySpace())) 324 return "the heap area"; 325 326 if (isa<StringRegion>(Region)) 327 return "the string literal"; 328 329 return "the region"; 330 } 331 332 static std::optional<int64_t> getConcreteValue(NonLoc SV) { 333 if (auto ConcreteVal = SV.getAs<nonloc::ConcreteInt>()) { 334 return ConcreteVal->getValue().tryExtValue(); 335 } 336 return std::nullopt; 337 } 338 339 static std::optional<int64_t> getConcreteValue(std::optional<NonLoc> SV) { 340 return SV ? getConcreteValue(*SV) : std::nullopt; 341 } 342 343 static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) { 344 std::string RegName = getRegionName(Region); 345 SmallString<128> Buf; 346 llvm::raw_svector_ostream Out(Buf); 347 Out << "Access of " << RegName << " at negative byte offset"; 348 if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>()) 349 Out << ' ' << ConcreteIdx->getValue(); 350 return {formatv("Out of bound access to memory preceding {0}", RegName), 351 std::string(Buf)}; 352 } 353 354 /// Try to divide `Val1` and `Val2` (in place) by `Divisor` and return true if 355 /// it can be performed (`Divisor` is nonzero and there is no remainder). The 356 /// values `Val1` and `Val2` may be nullopt and in that case the corresponding 357 /// division is considered to be successful. 358 static bool tryDividePair(std::optional<int64_t> &Val1, 359 std::optional<int64_t> &Val2, int64_t Divisor) { 360 if (!Divisor) 361 return false; 362 const bool Val1HasRemainder = Val1 && *Val1 % Divisor; 363 const bool Val2HasRemainder = Val2 && *Val2 % Divisor; 364 if (!Val1HasRemainder && !Val2HasRemainder) { 365 if (Val1) 366 *Val1 /= Divisor; 367 if (Val2) 368 *Val2 /= Divisor; 369 return true; 370 } 371 return false; 372 } 373 374 static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region, 375 NonLoc Offset, NonLoc Extent, SVal Location) { 376 std::string RegName = getRegionName(Region); 377 const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>(); 378 assert(EReg && "this checker only handles element access"); 379 QualType ElemType = EReg->getElementType(); 380 381 std::optional<int64_t> OffsetN = getConcreteValue(Offset); 382 std::optional<int64_t> ExtentN = getConcreteValue(Extent); 383 384 int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity(); 385 386 bool UseByteOffsets = !tryDividePair(OffsetN, ExtentN, ElemSize); 387 388 SmallString<256> Buf; 389 llvm::raw_svector_ostream Out(Buf); 390 Out << "Access of "; 391 if (!ExtentN && !UseByteOffsets) 392 Out << "'" << ElemType.getAsString() << "' element in "; 393 Out << RegName << " at "; 394 if (OffsetN) { 395 Out << (UseByteOffsets ? "byte offset " : "index ") << *OffsetN; 396 } else { 397 Out << "an overflowing " << (UseByteOffsets ? "byte offset" : "index"); 398 } 399 if (ExtentN) { 400 Out << ", while it holds only "; 401 if (*ExtentN != 1) 402 Out << *ExtentN; 403 else 404 Out << "a single"; 405 if (UseByteOffsets) 406 Out << " byte"; 407 else 408 Out << " '" << ElemType.getAsString() << "' element"; 409 410 if (*ExtentN > 1) 411 Out << "s"; 412 } 413 414 return { 415 formatv("Out of bound access to memory after the end of {0}", RegName), 416 std::string(Buf)}; 417 } 418 419 static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName) { 420 std::string RegName = getRegionName(Region); 421 return {formatv("Potential out of bound access to {0} with tainted {1}", 422 RegName, OffsetName), 423 formatv("Access of {0} with a tainted {1} that may be too large", 424 RegName, OffsetName)}; 425 } 426 427 const NoteTag *StateUpdateReporter::createNoteTag(CheckerContext &C) const { 428 // Don't create a note tag if we didn't assume anything: 429 if (!AssumedNonNegative && !AssumedUpperBound) 430 return nullptr; 431 432 return C.getNoteTag([*this](PathSensitiveBugReport &BR) -> std::string { 433 return getMessage(BR); 434 }); 435 } 436 437 std::string StateUpdateReporter::getMessage(PathSensitiveBugReport &BR) const { 438 bool ShouldReportNonNegative = AssumedNonNegative; 439 if (!providesInformationAboutInteresting(ByteOffsetVal, BR)) { 440 if (AssumedUpperBound && 441 providesInformationAboutInteresting(*AssumedUpperBound, BR)) { 442 // Even if the byte offset isn't interesting (e.g. it's a constant value), 443 // the assumption can still be interesting if it provides information 444 // about an interesting symbolic upper bound. 445 ShouldReportNonNegative = false; 446 } else { 447 // We don't have anything interesting, don't report the assumption. 448 return ""; 449 } 450 } 451 452 std::optional<int64_t> OffsetN = getConcreteValue(ByteOffsetVal); 453 std::optional<int64_t> ExtentN = getConcreteValue(AssumedUpperBound); 454 455 const bool UseIndex = 456 ElementSize && tryDividePair(OffsetN, ExtentN, *ElementSize); 457 458 SmallString<256> Buf; 459 llvm::raw_svector_ostream Out(Buf); 460 Out << "Assuming "; 461 if (UseIndex) { 462 Out << "index "; 463 if (OffsetN) 464 Out << "'" << OffsetN << "' "; 465 } else if (AssumedUpperBound) { 466 Out << "byte offset "; 467 if (OffsetN) 468 Out << "'" << OffsetN << "' "; 469 } else { 470 Out << "offset "; 471 } 472 473 Out << "is"; 474 if (ShouldReportNonNegative) { 475 Out << " non-negative"; 476 } 477 if (AssumedUpperBound) { 478 if (ShouldReportNonNegative) 479 Out << " and"; 480 Out << " less than "; 481 if (ExtentN) 482 Out << *ExtentN << ", "; 483 if (UseIndex && ElementType) 484 Out << "the number of '" << ElementType->getAsString() 485 << "' elements in "; 486 else 487 Out << "the extent of "; 488 Out << getRegionName(Reg); 489 } 490 return std::string(Out.str()); 491 } 492 493 bool StateUpdateReporter::providesInformationAboutInteresting( 494 SymbolRef Sym, PathSensitiveBugReport &BR) { 495 if (!Sym) 496 return false; 497 for (SymbolRef PartSym : Sym->symbols()) { 498 // The interestingess mark may appear on any layer as we're stripping off 499 // the SymIntExpr, UnarySymExpr etc. layers... 500 if (BR.isInteresting(PartSym)) 501 return true; 502 // ...but if both sides of the expression are symbolic, then there is no 503 // practical algorithm to produce separate constraints for the two 504 // operands (from the single combined result). 505 if (isa<SymSymExpr>(PartSym)) 506 return false; 507 } 508 return false; 509 } 510 511 void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const { 512 const SVal Location = C.getSVal(E); 513 514 // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as 515 // #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX) 516 // and incomplete analysis of these leads to false positives. As even 517 // accurate reports would be confusing for the users, just disable reports 518 // from these macros: 519 if (isFromCtypeMacro(E, C.getASTContext())) 520 return; 521 522 ProgramStateRef State = C.getState(); 523 SValBuilder &SVB = C.getSValBuilder(); 524 525 const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset = 526 computeOffset(State, SVB, Location); 527 528 if (!RawOffset) 529 return; 530 531 auto [Reg, ByteOffset] = *RawOffset; 532 533 // The state updates will be reported as a single note tag, which will be 534 // composed by this helper class. 535 StateUpdateReporter SUR(Reg, ByteOffset, E, C); 536 537 // CHECK LOWER BOUND 538 const MemSpaceRegion *Space = Reg->getMemorySpace(); 539 if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) { 540 // A symbolic region in unknown space represents an unknown pointer that 541 // may point into the middle of an array, so we don't look for underflows. 542 // Both conditions are significant because we want to check underflows in 543 // symbolic regions on the heap (which may be introduced by checkers like 544 // MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and 545 // non-symbolic regions (e.g. a field subregion of a symbolic region) in 546 // unknown space. 547 auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold( 548 State, ByteOffset, SVB.makeZeroArrayIndex(), SVB); 549 550 if (PrecedesLowerBound) { 551 // The offset may be invalid (negative)... 552 if (!WithinLowerBound) { 553 // ...and it cannot be valid (>= 0), so report an error. 554 Messages Msgs = getPrecedesMsgs(Reg, ByteOffset); 555 reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt); 556 return; 557 } 558 // ...but it can be valid as well, so the checker will (optimistically) 559 // assume that it's valid and mention this in the note tag. 560 SUR.recordNonNegativeAssumption(); 561 } 562 563 // Actually update the state. The "if" only fails in the extremely unlikely 564 // case when compareValueToThreshold returns {nullptr, nullptr} becasue 565 // evalBinOpNN fails to evaluate the less-than operator. 566 if (WithinLowerBound) 567 State = WithinLowerBound; 568 } 569 570 // CHECK UPPER BOUND 571 DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB); 572 if (auto KnownSize = Size.getAs<NonLoc>()) { 573 auto [WithinUpperBound, ExceedsUpperBound] = 574 compareValueToThreshold(State, ByteOffset, *KnownSize, SVB); 575 576 if (ExceedsUpperBound) { 577 // The offset may be invalid (>= Size)... 578 if (!WithinUpperBound) { 579 // ...and it cannot be within bounds, so report an error, unless we can 580 // definitely determine that this is an idiomatic `&array[size]` 581 // expression that calculates the past-the-end pointer. 582 if (isIdiomaticPastTheEndPtr(E, ExceedsUpperBound, ByteOffset, 583 *KnownSize, C)) { 584 C.addTransition(ExceedsUpperBound, SUR.createNoteTag(C)); 585 return; 586 } 587 588 Messages Msgs = getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, 589 *KnownSize, Location); 590 reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize); 591 return; 592 } 593 // ...and it can be valid as well... 594 if (isTainted(State, ByteOffset)) { 595 // ...but it's tainted, so report an error. 596 597 // Diagnostic detail: saying "tainted offset" is always correct, but 598 // the common case is that 'idx' is tainted in 'arr[idx]' and then it's 599 // nicer to say "tainted index". 600 const char *OffsetName = "offset"; 601 if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) 602 if (isTainted(State, ASE->getIdx(), C.getLocationContext())) 603 OffsetName = "index"; 604 605 Messages Msgs = getTaintMsgs(Reg, OffsetName); 606 reportOOB(C, ExceedsUpperBound, Msgs, ByteOffset, KnownSize, 607 /*IsTaintBug=*/true); 608 return; 609 } 610 // ...and it isn't tainted, so the checker will (optimistically) assume 611 // that the offset is in bounds and mention this in the note tag. 612 SUR.recordUpperBoundAssumption(*KnownSize); 613 } 614 615 // Actually update the state. The "if" only fails in the extremely unlikely 616 // case when compareValueToThreshold returns {nullptr, nullptr} becasue 617 // evalBinOpNN fails to evaluate the less-than operator. 618 if (WithinUpperBound) 619 State = WithinUpperBound; 620 } 621 622 // Add a transition, reporting the state updates that we accumulated. 623 C.addTransition(State, SUR.createNoteTag(C)); 624 } 625 626 void ArrayBoundCheckerV2::markPartsInteresting(PathSensitiveBugReport &BR, 627 ProgramStateRef ErrorState, 628 NonLoc Val, bool MarkTaint) { 629 if (SymbolRef Sym = Val.getAsSymbol()) { 630 // If the offset is a symbolic value, iterate over its "parts" with 631 // `SymExpr::symbols()` and mark each of them as interesting. 632 // For example, if the offset is `x*4 + y` then we put interestingness onto 633 // the SymSymExpr `x*4 + y`, the SymIntExpr `x*4` and the two data symbols 634 // `x` and `y`. 635 for (SymbolRef PartSym : Sym->symbols()) 636 BR.markInteresting(PartSym); 637 } 638 639 if (MarkTaint) { 640 // If the issue that we're reporting depends on the taintedness of the 641 // offset, then put interestingness onto symbols that could be the origin 642 // of the taint. Note that this may find symbols that did not appear in 643 // `Sym->symbols()` (because they're only loosely connected to `Val`). 644 for (SymbolRef Sym : getTaintedSymbols(ErrorState, Val)) 645 BR.markInteresting(Sym); 646 } 647 } 648 649 void ArrayBoundCheckerV2::reportOOB(CheckerContext &C, 650 ProgramStateRef ErrorState, Messages Msgs, 651 NonLoc Offset, std::optional<NonLoc> Extent, 652 bool IsTaintBug /*=false*/) const { 653 654 ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState); 655 if (!ErrorNode) 656 return; 657 658 auto BR = std::make_unique<PathSensitiveBugReport>( 659 IsTaintBug ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode); 660 661 // FIXME: ideally we would just call trackExpressionValue() and that would 662 // "do the right thing": mark the relevant symbols as interesting, track the 663 // control dependencies and statements storing the relevant values and add 664 // helpful diagnostic pieces. However, right now trackExpressionValue() is 665 // a heap of unreliable heuristics, so it would cause several issues: 666 // - Interestingness is not applied consistently, e.g. if `array[x+10]` 667 // causes an overflow, then `x` is not marked as interesting. 668 // - We get irrelevant diagnostic pieces, e.g. in the code 669 // `int *p = (int*)malloc(2*sizeof(int)); p[3] = 0;` 670 // it places a "Storing uninitialized value" note on the `malloc` call 671 // (which is technically true, but irrelevant). 672 // If trackExpressionValue() becomes reliable, it should be applied instead 673 // of this custom markPartsInteresting(). 674 markPartsInteresting(*BR, ErrorState, Offset, IsTaintBug); 675 if (Extent) 676 markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug); 677 678 C.emitReport(std::move(BR)); 679 } 680 681 bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) { 682 SourceLocation Loc = S->getBeginLoc(); 683 if (!Loc.isMacroID()) 684 return false; 685 686 StringRef MacroName = Lexer::getImmediateMacroName( 687 Loc, ACtx.getSourceManager(), ACtx.getLangOpts()); 688 689 if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's') 690 return false; 691 692 return ((MacroName == "isalnum") || (MacroName == "isalpha") || 693 (MacroName == "isblank") || (MacroName == "isdigit") || 694 (MacroName == "isgraph") || (MacroName == "islower") || 695 (MacroName == "isnctrl") || (MacroName == "isprint") || 696 (MacroName == "ispunct") || (MacroName == "isspace") || 697 (MacroName == "isupper") || (MacroName == "isxdigit")); 698 } 699 700 bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, ASTContext &ACtx) { 701 ParentMapContext &ParentCtx = ACtx.getParentMapContext(); 702 do { 703 const DynTypedNodeList Parents = ParentCtx.getParents(*S); 704 if (Parents.empty()) 705 return false; 706 S = Parents[0].get<Stmt>(); 707 } while (isa_and_nonnull<ParenExpr, ImplicitCastExpr>(S)); 708 const auto *UnaryOp = dyn_cast_or_null<UnaryOperator>(S); 709 return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf; 710 } 711 712 bool ArrayBoundCheckerV2::isIdiomaticPastTheEndPtr(const Expr *E, 713 ProgramStateRef State, 714 NonLoc Offset, NonLoc Limit, 715 CheckerContext &C) { 716 if (isa<ArraySubscriptExpr>(E) && isInAddressOf(E, C.getASTContext())) { 717 auto [EqualsToThreshold, NotEqualToThreshold] = compareValueToThreshold( 718 State, Offset, Limit, C.getSValBuilder(), /*CheckEquality=*/true); 719 return EqualsToThreshold && !NotEqualToThreshold; 720 } 721 return false; 722 } 723 724 void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) { 725 mgr.registerChecker<ArrayBoundCheckerV2>(); 726 } 727 728 bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) { 729 return true; 730 } 731