1 //== RegionStore.cpp - Field-sensitive store model --------------*- C++ -*--==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a basic region store model. In this model, we do have field 10 // sensitivity. But we assume nothing about the heap shape. So recursive data 11 // structures are largely ignored. Basically we do 1-limiting analysis. 12 // Parameter pointers are assumed with no aliasing. Pointee objects of 13 // parameters are created lazily. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "clang/AST/Attr.h" 18 #include "clang/AST/CharUnits.h" 19 #include "clang/ASTMatchers/ASTMatchFinder.h" 20 #include "clang/Analysis/Analyses/LiveVariables.h" 21 #include "clang/Analysis/AnalysisDeclContext.h" 22 #include "clang/Basic/JsonSupport.h" 23 #include "clang/Basic/TargetInfo.h" 24 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 26 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" 27 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" 28 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" 29 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 30 #include "llvm/ADT/ImmutableMap.h" 31 #include "llvm/ADT/STLExtras.h" 32 #include "llvm/Support/raw_ostream.h" 33 #include <optional> 34 #include <utility> 35 36 using namespace clang; 37 using namespace ento; 38 39 //===----------------------------------------------------------------------===// 40 // Representation of binding keys. 41 //===----------------------------------------------------------------------===// 42 43 namespace { 44 class BindingKey { 45 public: 46 enum Kind { Default = 0x0, Direct = 0x1 }; 47 private: 48 enum { Symbolic = 0x2 }; 49 50 llvm::PointerIntPair<const MemRegion *, 2> P; 51 uint64_t Data; 52 53 /// Create a key for a binding to region \p r, which has a symbolic offset 54 /// from region \p Base. 55 explicit BindingKey(const SubRegion *r, const SubRegion *Base, Kind k) 56 : P(r, k | Symbolic), Data(reinterpret_cast<uintptr_t>(Base)) { 57 assert(r && Base && "Must have known regions."); 58 assert(getConcreteOffsetRegion() == Base && "Failed to store base region"); 59 } 60 61 /// Create a key for a binding at \p offset from base region \p r. 62 explicit BindingKey(const MemRegion *r, uint64_t offset, Kind k) 63 : P(r, k), Data(offset) { 64 assert(r && "Must have known regions."); 65 assert(getOffset() == offset && "Failed to store offset"); 66 assert((r == r->getBaseRegion() || 67 isa<ObjCIvarRegion, CXXDerivedObjectRegion>(r)) && 68 "Not a base"); 69 } 70 71 public: 72 bool isDirect() const { return P.getInt() & Direct; } 73 bool isDefault() const { return !isDirect(); } 74 bool hasSymbolicOffset() const { return P.getInt() & Symbolic; } 75 76 const MemRegion *getRegion() const { return P.getPointer(); } 77 uint64_t getOffset() const { 78 assert(!hasSymbolicOffset()); 79 return Data; 80 } 81 82 const SubRegion *getConcreteOffsetRegion() const { 83 assert(hasSymbolicOffset()); 84 return reinterpret_cast<const SubRegion *>(static_cast<uintptr_t>(Data)); 85 } 86 87 const MemRegion *getBaseRegion() const { 88 if (hasSymbolicOffset()) 89 return getConcreteOffsetRegion()->getBaseRegion(); 90 return getRegion()->getBaseRegion(); 91 } 92 93 void Profile(llvm::FoldingSetNodeID& ID) const { 94 ID.AddPointer(P.getOpaqueValue()); 95 ID.AddInteger(Data); 96 } 97 98 static BindingKey Make(const MemRegion *R, Kind k); 99 100 bool operator<(const BindingKey &X) const { 101 if (P.getOpaqueValue() < X.P.getOpaqueValue()) 102 return true; 103 if (P.getOpaqueValue() > X.P.getOpaqueValue()) 104 return false; 105 return Data < X.Data; 106 } 107 108 bool operator==(const BindingKey &X) const { 109 return P.getOpaqueValue() == X.P.getOpaqueValue() && 110 Data == X.Data; 111 } 112 113 LLVM_DUMP_METHOD void dump() const; 114 }; 115 } // end anonymous namespace 116 117 BindingKey BindingKey::Make(const MemRegion *R, Kind k) { 118 const RegionOffset &RO = R->getAsOffset(); 119 if (RO.hasSymbolicOffset()) 120 return BindingKey(cast<SubRegion>(R), cast<SubRegion>(RO.getRegion()), k); 121 122 return BindingKey(RO.getRegion(), RO.getOffset(), k); 123 } 124 125 namespace llvm { 126 static inline raw_ostream &operator<<(raw_ostream &Out, BindingKey K) { 127 Out << "\"kind\": \"" << (K.isDirect() ? "Direct" : "Default") 128 << "\", \"offset\": "; 129 130 if (!K.hasSymbolicOffset()) 131 Out << K.getOffset(); 132 else 133 Out << "null"; 134 135 return Out; 136 } 137 138 } // namespace llvm 139 140 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 141 void BindingKey::dump() const { llvm::errs() << *this; } 142 #endif 143 144 //===----------------------------------------------------------------------===// 145 // Actual Store type. 146 //===----------------------------------------------------------------------===// 147 148 typedef llvm::ImmutableMap<BindingKey, SVal> ClusterBindings; 149 typedef llvm::ImmutableMapRef<BindingKey, SVal> ClusterBindingsRef; 150 typedef std::pair<BindingKey, SVal> BindingPair; 151 152 typedef llvm::ImmutableMap<const MemRegion *, ClusterBindings> 153 RegionBindings; 154 155 namespace { 156 class RegionBindingsRef : public llvm::ImmutableMapRef<const MemRegion *, 157 ClusterBindings> { 158 ClusterBindings::Factory *CBFactory; 159 160 // This flag indicates whether the current bindings are within the analysis 161 // that has started from main(). It affects how we perform loads from 162 // global variables that have initializers: if we have observed the 163 // program execution from the start and we know that these variables 164 // have not been overwritten yet, we can be sure that their initializers 165 // are still relevant. This flag never gets changed when the bindings are 166 // updated, so it could potentially be moved into RegionStoreManager 167 // (as if it's the same bindings but a different loading procedure) 168 // however that would have made the manager needlessly stateful. 169 bool IsMainAnalysis; 170 171 public: 172 typedef llvm::ImmutableMapRef<const MemRegion *, ClusterBindings> 173 ParentTy; 174 175 RegionBindingsRef(ClusterBindings::Factory &CBFactory, 176 const RegionBindings::TreeTy *T, 177 RegionBindings::TreeTy::Factory *F, 178 bool IsMainAnalysis) 179 : llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>(T, F), 180 CBFactory(&CBFactory), IsMainAnalysis(IsMainAnalysis) {} 181 182 RegionBindingsRef(const ParentTy &P, 183 ClusterBindings::Factory &CBFactory, 184 bool IsMainAnalysis) 185 : llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>(P), 186 CBFactory(&CBFactory), IsMainAnalysis(IsMainAnalysis) {} 187 188 RegionBindingsRef add(key_type_ref K, data_type_ref D) const { 189 return RegionBindingsRef(static_cast<const ParentTy *>(this)->add(K, D), 190 *CBFactory, IsMainAnalysis); 191 } 192 193 RegionBindingsRef remove(key_type_ref K) const { 194 return RegionBindingsRef(static_cast<const ParentTy *>(this)->remove(K), 195 *CBFactory, IsMainAnalysis); 196 } 197 198 RegionBindingsRef addBinding(BindingKey K, SVal V) const; 199 200 RegionBindingsRef addBinding(const MemRegion *R, 201 BindingKey::Kind k, SVal V) const; 202 203 const SVal *lookup(BindingKey K) const; 204 const SVal *lookup(const MemRegion *R, BindingKey::Kind k) const; 205 using llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>::lookup; 206 207 RegionBindingsRef removeBinding(BindingKey K); 208 209 RegionBindingsRef removeBinding(const MemRegion *R, 210 BindingKey::Kind k); 211 212 RegionBindingsRef removeBinding(const MemRegion *R) { 213 return removeBinding(R, BindingKey::Direct). 214 removeBinding(R, BindingKey::Default); 215 } 216 217 std::optional<SVal> getDirectBinding(const MemRegion *R) const; 218 219 /// getDefaultBinding - Returns an SVal* representing an optional default 220 /// binding associated with a region and its subregions. 221 std::optional<SVal> getDefaultBinding(const MemRegion *R) const; 222 223 /// Return the internal tree as a Store. 224 Store asStore() const { 225 llvm::PointerIntPair<Store, 1, bool> Ptr = { 226 asImmutableMap().getRootWithoutRetain(), IsMainAnalysis}; 227 return reinterpret_cast<Store>(Ptr.getOpaqueValue()); 228 } 229 230 bool isMainAnalysis() const { 231 return IsMainAnalysis; 232 } 233 234 void printJson(raw_ostream &Out, const char *NL = "\n", 235 unsigned int Space = 0, bool IsDot = false) const { 236 using namespace llvm; 237 DenseMap<const MemRegion *, std::string> StringifyCache; 238 auto ToString = [&StringifyCache](const MemRegion *R) { 239 auto [Place, Inserted] = StringifyCache.try_emplace(R); 240 if (!Inserted) 241 return Place->second; 242 std::string Res; 243 raw_string_ostream OS(Res); 244 OS << R; 245 Place->second = Res; 246 return Res; 247 }; 248 249 using Cluster = 250 std::pair<const MemRegion *, ImmutableMap<BindingKey, SVal>>; 251 using Binding = std::pair<BindingKey, SVal>; 252 253 const auto MemSpaceBeforeRegionName = [&ToString](const Cluster *L, 254 const Cluster *R) { 255 if (isa<MemSpaceRegion>(L->first) && !isa<MemSpaceRegion>(R->first)) 256 return true; 257 if (!isa<MemSpaceRegion>(L->first) && isa<MemSpaceRegion>(R->first)) 258 return false; 259 return ToString(L->first) < ToString(R->first); 260 }; 261 262 const auto SymbolicBeforeOffset = [&ToString](const BindingKey &L, 263 const BindingKey &R) { 264 if (L.hasSymbolicOffset() && !R.hasSymbolicOffset()) 265 return true; 266 if (!L.hasSymbolicOffset() && R.hasSymbolicOffset()) 267 return false; 268 if (L.hasSymbolicOffset() && R.hasSymbolicOffset()) 269 return ToString(L.getRegion()) < ToString(R.getRegion()); 270 return L.getOffset() < R.getOffset(); 271 }; 272 273 const auto DefaultBindingBeforeDirectBindings = 274 [&SymbolicBeforeOffset](const Binding *LPtr, const Binding *RPtr) { 275 const BindingKey &L = LPtr->first; 276 const BindingKey &R = RPtr->first; 277 if (L.isDefault() && !R.isDefault()) 278 return true; 279 if (!L.isDefault() && R.isDefault()) 280 return false; 281 assert(L.isDefault() == R.isDefault()); 282 return SymbolicBeforeOffset(L, R); 283 }; 284 285 const auto AddrOf = [](const auto &Item) { return &Item; }; 286 287 std::vector<const Cluster *> SortedClusters; 288 SortedClusters.reserve(std::distance(begin(), end())); 289 append_range(SortedClusters, map_range(*this, AddrOf)); 290 llvm::sort(SortedClusters, MemSpaceBeforeRegionName); 291 292 for (auto [Idx, C] : llvm::enumerate(SortedClusters)) { 293 const auto &[BaseRegion, Bindings] = *C; 294 Indent(Out, Space, IsDot) 295 << "{ \"cluster\": \"" << BaseRegion << "\", \"pointer\": \"" 296 << (const void *)BaseRegion << "\", \"items\": [" << NL; 297 298 std::vector<const Binding *> SortedBindings; 299 SortedBindings.reserve(std::distance(Bindings.begin(), Bindings.end())); 300 append_range(SortedBindings, map_range(Bindings, AddrOf)); 301 llvm::sort(SortedBindings, DefaultBindingBeforeDirectBindings); 302 303 ++Space; 304 for (auto [Idx, B] : llvm::enumerate(SortedBindings)) { 305 const auto &[Key, Value] = *B; 306 Indent(Out, Space, IsDot) << "{ " << Key << ", \"value\": "; 307 Value.printJson(Out, /*AddQuotes=*/true); 308 Out << " }"; 309 if (Idx != SortedBindings.size() - 1) 310 Out << ','; 311 Out << NL; 312 } 313 --Space; 314 Indent(Out, Space, IsDot) << "]}"; 315 if (Idx != SortedClusters.size() - 1) 316 Out << ','; 317 Out << NL; 318 } 319 } 320 321 LLVM_DUMP_METHOD void dump() const { printJson(llvm::errs()); } 322 }; 323 } // end anonymous namespace 324 325 typedef const RegionBindingsRef& RegionBindingsConstRef; 326 327 std::optional<SVal> 328 RegionBindingsRef::getDirectBinding(const MemRegion *R) const { 329 const SVal *V = lookup(R, BindingKey::Direct); 330 return V ? std::optional<SVal>(*V) : std::nullopt; 331 } 332 333 std::optional<SVal> 334 RegionBindingsRef::getDefaultBinding(const MemRegion *R) const { 335 const SVal *V = lookup(R, BindingKey::Default); 336 return V ? std::optional<SVal>(*V) : std::nullopt; 337 } 338 339 RegionBindingsRef RegionBindingsRef::addBinding(BindingKey K, SVal V) const { 340 const MemRegion *Base = K.getBaseRegion(); 341 342 const ClusterBindings *ExistingCluster = lookup(Base); 343 ClusterBindings Cluster = 344 (ExistingCluster ? *ExistingCluster : CBFactory->getEmptyMap()); 345 346 ClusterBindings NewCluster = CBFactory->add(Cluster, K, V); 347 return add(Base, NewCluster); 348 } 349 350 351 RegionBindingsRef RegionBindingsRef::addBinding(const MemRegion *R, 352 BindingKey::Kind k, 353 SVal V) const { 354 return addBinding(BindingKey::Make(R, k), V); 355 } 356 357 const SVal *RegionBindingsRef::lookup(BindingKey K) const { 358 const ClusterBindings *Cluster = lookup(K.getBaseRegion()); 359 if (!Cluster) 360 return nullptr; 361 return Cluster->lookup(K); 362 } 363 364 const SVal *RegionBindingsRef::lookup(const MemRegion *R, 365 BindingKey::Kind k) const { 366 return lookup(BindingKey::Make(R, k)); 367 } 368 369 RegionBindingsRef RegionBindingsRef::removeBinding(BindingKey K) { 370 const MemRegion *Base = K.getBaseRegion(); 371 const ClusterBindings *Cluster = lookup(Base); 372 if (!Cluster) 373 return *this; 374 375 ClusterBindings NewCluster = CBFactory->remove(*Cluster, K); 376 if (NewCluster.isEmpty()) 377 return remove(Base); 378 return add(Base, NewCluster); 379 } 380 381 RegionBindingsRef RegionBindingsRef::removeBinding(const MemRegion *R, 382 BindingKey::Kind k){ 383 return removeBinding(BindingKey::Make(R, k)); 384 } 385 386 //===----------------------------------------------------------------------===// 387 // Main RegionStore logic. 388 //===----------------------------------------------------------------------===// 389 390 namespace { 391 class InvalidateRegionsWorker; 392 393 class RegionStoreManager : public StoreManager { 394 public: 395 RegionBindings::Factory RBFactory; 396 mutable ClusterBindings::Factory CBFactory; 397 398 typedef std::vector<SVal> SValListTy; 399 private: 400 typedef llvm::DenseMap<const LazyCompoundValData *, 401 SValListTy> LazyBindingsMapTy; 402 LazyBindingsMapTy LazyBindingsMap; 403 404 /// The largest number of fields a struct can have and still be 405 /// considered "small". 406 /// 407 /// This is currently used to decide whether or not it is worth "forcing" a 408 /// LazyCompoundVal on bind. 409 /// 410 /// This is controlled by 'region-store-small-struct-limit' option. 411 /// To disable all small-struct-dependent behavior, set the option to "0". 412 unsigned SmallStructLimit; 413 414 /// The largest number of element an array can have and still be 415 /// considered "small". 416 /// 417 /// This is currently used to decide whether or not it is worth "forcing" a 418 /// LazyCompoundVal on bind. 419 /// 420 /// This is controlled by 'region-store-small-struct-limit' option. 421 /// To disable all small-struct-dependent behavior, set the option to "0". 422 unsigned SmallArrayLimit; 423 424 /// A helper used to populate the work list with the given set of 425 /// regions. 426 void populateWorkList(InvalidateRegionsWorker &W, 427 ArrayRef<SVal> Values, 428 InvalidatedRegions *TopLevelRegions); 429 430 public: 431 RegionStoreManager(ProgramStateManager &mgr) 432 : StoreManager(mgr), RBFactory(mgr.getAllocator()), 433 CBFactory(mgr.getAllocator()), SmallStructLimit(0), SmallArrayLimit(0) { 434 ExprEngine &Eng = StateMgr.getOwningEngine(); 435 AnalyzerOptions &Options = Eng.getAnalysisManager().options; 436 SmallStructLimit = Options.RegionStoreSmallStructLimit; 437 SmallArrayLimit = Options.RegionStoreSmallArrayLimit; 438 } 439 440 /// setImplicitDefaultValue - Set the default binding for the provided 441 /// MemRegion to the value implicitly defined for compound literals when 442 /// the value is not specified. 443 RegionBindingsRef setImplicitDefaultValue(RegionBindingsConstRef B, 444 const MemRegion *R, QualType T); 445 446 /// ArrayToPointer - Emulates the "decay" of an array to a pointer 447 /// type. 'Array' represents the lvalue of the array being decayed 448 /// to a pointer, and the returned SVal represents the decayed 449 /// version of that lvalue (i.e., a pointer to the first element of 450 /// the array). This is called by ExprEngine when evaluating 451 /// casts from arrays to pointers. 452 SVal ArrayToPointer(Loc Array, QualType ElementTy) override; 453 454 /// Creates the Store that correctly represents memory contents before 455 /// the beginning of the analysis of the given top-level stack frame. 456 StoreRef getInitialStore(const LocationContext *InitLoc) override { 457 bool IsMainAnalysis = false; 458 if (const auto *FD = dyn_cast<FunctionDecl>(InitLoc->getDecl())) 459 IsMainAnalysis = FD->isMain() && !Ctx.getLangOpts().CPlusPlus; 460 return StoreRef(RegionBindingsRef( 461 RegionBindingsRef::ParentTy(RBFactory.getEmptyMap(), RBFactory), 462 CBFactory, IsMainAnalysis).asStore(), *this); 463 } 464 465 //===-------------------------------------------------------------------===// 466 // Binding values to regions. 467 //===-------------------------------------------------------------------===// 468 RegionBindingsRef invalidateGlobalRegion(MemRegion::Kind K, const Stmt *S, 469 unsigned Count, 470 const LocationContext *LCtx, 471 RegionBindingsRef B, 472 InvalidatedRegions *Invalidated); 473 474 StoreRef invalidateRegions(Store store, ArrayRef<SVal> Values, const Stmt *S, 475 unsigned Count, const LocationContext *LCtx, 476 const CallEvent *Call, InvalidatedSymbols &IS, 477 RegionAndSymbolInvalidationTraits &ITraits, 478 InvalidatedRegions *Invalidated, 479 InvalidatedRegions *InvalidatedTopLevel) override; 480 481 bool scanReachableSymbols(Store S, const MemRegion *R, 482 ScanReachableSymbols &Callbacks) override; 483 484 RegionBindingsRef removeSubRegionBindings(RegionBindingsConstRef B, 485 const SubRegion *R); 486 std::optional<SVal> 487 getConstantValFromConstArrayInitializer(RegionBindingsConstRef B, 488 const ElementRegion *R); 489 std::optional<SVal> 490 getSValFromInitListExpr(const InitListExpr *ILE, 491 const SmallVector<uint64_t, 2> &ConcreteOffsets, 492 QualType ElemT); 493 SVal getSValFromStringLiteral(const StringLiteral *SL, uint64_t Offset, 494 QualType ElemT); 495 496 public: // Part of public interface to class. 497 498 StoreRef Bind(Store store, Loc LV, SVal V) override { 499 return StoreRef(bind(getRegionBindings(store), LV, V).asStore(), *this); 500 } 501 502 RegionBindingsRef bind(RegionBindingsConstRef B, Loc LV, SVal V); 503 504 // BindDefaultInitial is only used to initialize a region with 505 // a default value. 506 StoreRef BindDefaultInitial(Store store, const MemRegion *R, 507 SVal V) override { 508 RegionBindingsRef B = getRegionBindings(store); 509 // Use other APIs when you have to wipe the region that was initialized 510 // earlier. 511 assert(!(B.getDefaultBinding(R) || B.getDirectBinding(R)) && 512 "Double initialization!"); 513 B = B.addBinding(BindingKey::Make(R, BindingKey::Default), V); 514 return StoreRef(B.asImmutableMap().getRootWithoutRetain(), *this); 515 } 516 517 // BindDefaultZero is used for zeroing constructors that may accidentally 518 // overwrite existing bindings. 519 StoreRef BindDefaultZero(Store store, const MemRegion *R) override { 520 // FIXME: The offsets of empty bases can be tricky because of 521 // of the so called "empty base class optimization". 522 // If a base class has been optimized out 523 // we should not try to create a binding, otherwise we should. 524 // Unfortunately, at the moment ASTRecordLayout doesn't expose 525 // the actual sizes of the empty bases 526 // and trying to infer them from offsets/alignments 527 // seems to be error-prone and non-trivial because of the trailing padding. 528 // As a temporary mitigation we don't create bindings for empty bases. 529 if (const auto *BR = dyn_cast<CXXBaseObjectRegion>(R)) 530 if (BR->getDecl()->isEmpty()) 531 return StoreRef(store, *this); 532 533 RegionBindingsRef B = getRegionBindings(store); 534 SVal V = svalBuilder.makeZeroVal(Ctx.CharTy); 535 B = removeSubRegionBindings(B, cast<SubRegion>(R)); 536 B = B.addBinding(BindingKey::Make(R, BindingKey::Default), V); 537 return StoreRef(B.asImmutableMap().getRootWithoutRetain(), *this); 538 } 539 540 /// Attempt to extract the fields of \p LCV and bind them to the struct region 541 /// \p R. 542 /// 543 /// This path is used when it seems advantageous to "force" loading the values 544 /// within a LazyCompoundVal to bind memberwise to the struct region, rather 545 /// than using a Default binding at the base of the entire region. This is a 546 /// heuristic attempting to avoid building long chains of LazyCompoundVals. 547 /// 548 /// \returns The updated store bindings, or \c std::nullopt if binding 549 /// non-lazily would be too expensive. 550 std::optional<RegionBindingsRef> 551 tryBindSmallStruct(RegionBindingsConstRef B, const TypedValueRegion *R, 552 const RecordDecl *RD, nonloc::LazyCompoundVal LCV); 553 554 /// BindStruct - Bind a compound value to a structure. 555 RegionBindingsRef bindStruct(RegionBindingsConstRef B, 556 const TypedValueRegion* R, SVal V); 557 558 /// BindVector - Bind a compound value to a vector. 559 RegionBindingsRef bindVector(RegionBindingsConstRef B, 560 const TypedValueRegion* R, SVal V); 561 562 std::optional<RegionBindingsRef> 563 tryBindSmallArray(RegionBindingsConstRef B, const TypedValueRegion *R, 564 const ArrayType *AT, nonloc::LazyCompoundVal LCV); 565 566 RegionBindingsRef bindArray(RegionBindingsConstRef B, 567 const TypedValueRegion* R, 568 SVal V); 569 570 /// Clears out all bindings in the given region and assigns a new value 571 /// as a Default binding. 572 RegionBindingsRef bindAggregate(RegionBindingsConstRef B, 573 const TypedRegion *R, 574 SVal DefaultVal); 575 576 /// Create a new store with the specified binding removed. 577 /// \param ST the original store, that is the basis for the new store. 578 /// \param L the location whose binding should be removed. 579 StoreRef killBinding(Store ST, Loc L) override; 580 581 void incrementReferenceCount(Store store) override { 582 getRegionBindings(store).manualRetain(); 583 } 584 585 /// If the StoreManager supports it, decrement the reference count of 586 /// the specified Store object. If the reference count hits 0, the memory 587 /// associated with the object is recycled. 588 void decrementReferenceCount(Store store) override { 589 getRegionBindings(store).manualRelease(); 590 } 591 592 bool includedInBindings(Store store, const MemRegion *region) const override; 593 594 /// Return the value bound to specified location in a given state. 595 /// 596 /// The high level logic for this method is this: 597 /// getBinding (L) 598 /// if L has binding 599 /// return L's binding 600 /// else if L is in killset 601 /// return unknown 602 /// else 603 /// if L is on stack or heap 604 /// return undefined 605 /// else 606 /// return symbolic 607 SVal getBinding(Store S, Loc L, QualType T) override { 608 return getBinding(getRegionBindings(S), L, T); 609 } 610 611 std::optional<SVal> getUniqueDefaultBinding(RegionBindingsConstRef B, 612 const TypedValueRegion *R) const; 613 std::optional<SVal> 614 getUniqueDefaultBinding(nonloc::LazyCompoundVal LCV) const; 615 616 std::optional<SVal> getDefaultBinding(Store S, const MemRegion *R) override { 617 RegionBindingsRef B = getRegionBindings(S); 618 // Default bindings are always applied over a base region so look up the 619 // base region's default binding, otherwise the lookup will fail when R 620 // is at an offset from R->getBaseRegion(). 621 return B.getDefaultBinding(R->getBaseRegion()); 622 } 623 624 SVal getBinding(RegionBindingsConstRef B, Loc L, QualType T = QualType()); 625 626 SVal getBindingForElement(RegionBindingsConstRef B, const ElementRegion *R); 627 628 SVal getBindingForField(RegionBindingsConstRef B, const FieldRegion *R); 629 630 SVal getBindingForObjCIvar(RegionBindingsConstRef B, const ObjCIvarRegion *R); 631 632 SVal getBindingForVar(RegionBindingsConstRef B, const VarRegion *R); 633 634 SVal getBindingForLazySymbol(const TypedValueRegion *R); 635 636 SVal getBindingForFieldOrElementCommon(RegionBindingsConstRef B, 637 const TypedValueRegion *R, 638 QualType Ty); 639 640 SVal getLazyBinding(const SubRegion *LazyBindingRegion, 641 RegionBindingsRef LazyBinding); 642 643 /// Get bindings for the values in a struct and return a CompoundVal, used 644 /// when doing struct copy: 645 /// struct s x, y; 646 /// x = y; 647 /// y's value is retrieved by this method. 648 SVal getBindingForStruct(RegionBindingsConstRef B, const TypedValueRegion *R); 649 SVal getBindingForArray(RegionBindingsConstRef B, const TypedValueRegion *R); 650 NonLoc createLazyBinding(RegionBindingsConstRef B, const TypedValueRegion *R); 651 652 /// Used to lazily generate derived symbols for bindings that are defined 653 /// implicitly by default bindings in a super region. 654 /// 655 /// Note that callers may need to specially handle LazyCompoundVals, which 656 /// are returned as is in case the caller needs to treat them differently. 657 std::optional<SVal> 658 getBindingForDerivedDefaultValue(RegionBindingsConstRef B, 659 const MemRegion *superR, 660 const TypedValueRegion *R, QualType Ty); 661 662 /// Get the state and region whose binding this region \p R corresponds to. 663 /// 664 /// If there is no lazy binding for \p R, the returned value will have a null 665 /// \c second. Note that a null pointer can represents a valid Store. 666 std::pair<Store, const SubRegion *> 667 findLazyBinding(RegionBindingsConstRef B, const SubRegion *R, 668 const SubRegion *originalRegion); 669 670 /// Returns the cached set of interesting SVals contained within a lazy 671 /// binding. 672 /// 673 /// The precise value of "interesting" is determined for the purposes of 674 /// RegionStore's internal analysis. It must always contain all regions and 675 /// symbols, but may omit constants and other kinds of SVal. 676 /// 677 /// In contrast to compound values, LazyCompoundVals are also added 678 /// to the 'interesting values' list in addition to the child interesting 679 /// values. 680 const SValListTy &getInterestingValues(nonloc::LazyCompoundVal LCV); 681 682 //===------------------------------------------------------------------===// 683 // State pruning. 684 //===------------------------------------------------------------------===// 685 686 /// removeDeadBindings - Scans the RegionStore of 'state' for dead values. 687 /// It returns a new Store with these values removed. 688 StoreRef removeDeadBindings(Store store, const StackFrameContext *LCtx, 689 SymbolReaper& SymReaper) override; 690 691 //===------------------------------------------------------------------===// 692 // Utility methods. 693 //===------------------------------------------------------------------===// 694 695 RegionBindingsRef getRegionBindings(Store store) const { 696 llvm::PointerIntPair<Store, 1, bool> Ptr; 697 Ptr.setFromOpaqueValue(const_cast<void *>(store)); 698 return RegionBindingsRef( 699 CBFactory, 700 static_cast<const RegionBindings::TreeTy *>(Ptr.getPointer()), 701 RBFactory.getTreeFactory(), 702 Ptr.getInt()); 703 } 704 705 void printJson(raw_ostream &Out, Store S, const char *NL = "\n", 706 unsigned int Space = 0, bool IsDot = false) const override; 707 708 void iterBindings(Store store, BindingsHandler& f) override { 709 RegionBindingsRef B = getRegionBindings(store); 710 for (const auto &[Region, Cluster] : B) { 711 for (const auto &[Key, Value] : Cluster) { 712 if (!Key.isDirect()) 713 continue; 714 if (const SubRegion *R = dyn_cast<SubRegion>(Key.getRegion())) { 715 // FIXME: Possibly incorporate the offset? 716 if (!f.HandleBinding(*this, store, R, Value)) 717 return; 718 } 719 } 720 } 721 } 722 }; 723 724 } // end anonymous namespace 725 726 //===----------------------------------------------------------------------===// 727 // RegionStore creation. 728 //===----------------------------------------------------------------------===// 729 730 std::unique_ptr<StoreManager> 731 ento::CreateRegionStoreManager(ProgramStateManager &StMgr) { 732 return std::make_unique<RegionStoreManager>(StMgr); 733 } 734 735 //===----------------------------------------------------------------------===// 736 // Region Cluster analysis. 737 //===----------------------------------------------------------------------===// 738 739 namespace { 740 /// Used to determine which global regions are automatically included in the 741 /// initial worklist of a ClusterAnalysis. 742 enum GlobalsFilterKind { 743 /// Don't include any global regions. 744 GFK_None, 745 /// Only include system globals. 746 GFK_SystemOnly, 747 /// Include all global regions. 748 GFK_All 749 }; 750 751 template <typename DERIVED> 752 class ClusterAnalysis { 753 protected: 754 typedef llvm::DenseMap<const MemRegion *, const ClusterBindings *> ClusterMap; 755 typedef const MemRegion * WorkListElement; 756 typedef SmallVector<WorkListElement, 10> WorkList; 757 758 llvm::SmallPtrSet<const ClusterBindings *, 16> Visited; 759 760 WorkList WL; 761 762 RegionStoreManager &RM; 763 ASTContext &Ctx; 764 SValBuilder &svalBuilder; 765 766 RegionBindingsRef B; 767 768 769 protected: 770 const ClusterBindings *getCluster(const MemRegion *R) { 771 return B.lookup(R); 772 } 773 774 /// Returns true if all clusters in the given memspace should be initially 775 /// included in the cluster analysis. Subclasses may provide their 776 /// own implementation. 777 bool includeEntireMemorySpace(const MemRegion *Base) { 778 return false; 779 } 780 781 public: 782 ClusterAnalysis(RegionStoreManager &rm, ProgramStateManager &StateMgr, 783 RegionBindingsRef b) 784 : RM(rm), Ctx(StateMgr.getContext()), 785 svalBuilder(StateMgr.getSValBuilder()), B(std::move(b)) {} 786 787 RegionBindingsRef getRegionBindings() const { return B; } 788 789 bool isVisited(const MemRegion *R) { 790 return Visited.count(getCluster(R)); 791 } 792 793 void GenerateClusters() { 794 // Scan the entire set of bindings and record the region clusters. 795 for (RegionBindingsRef::iterator RI = B.begin(), RE = B.end(); 796 RI != RE; ++RI){ 797 const MemRegion *Base = RI.getKey(); 798 799 const ClusterBindings &Cluster = RI.getData(); 800 assert(!Cluster.isEmpty() && "Empty clusters should be removed"); 801 static_cast<DERIVED*>(this)->VisitAddedToCluster(Base, Cluster); 802 803 // If the base's memspace should be entirely invalidated, add the cluster 804 // to the workspace up front. 805 if (static_cast<DERIVED*>(this)->includeEntireMemorySpace(Base)) 806 AddToWorkList(WorkListElement(Base), &Cluster); 807 } 808 } 809 810 bool AddToWorkList(WorkListElement E, const ClusterBindings *C) { 811 if (C && !Visited.insert(C).second) 812 return false; 813 WL.push_back(E); 814 return true; 815 } 816 817 bool AddToWorkList(const MemRegion *R) { 818 return static_cast<DERIVED*>(this)->AddToWorkList(R); 819 } 820 821 void RunWorkList() { 822 while (!WL.empty()) { 823 WorkListElement E = WL.pop_back_val(); 824 const MemRegion *BaseR = E; 825 826 static_cast<DERIVED*>(this)->VisitCluster(BaseR, getCluster(BaseR)); 827 } 828 } 829 830 void VisitAddedToCluster(const MemRegion *baseR, const ClusterBindings &C) {} 831 void VisitCluster(const MemRegion *baseR, const ClusterBindings *C) {} 832 833 void VisitCluster(const MemRegion *BaseR, const ClusterBindings *C, 834 bool Flag) { 835 static_cast<DERIVED*>(this)->VisitCluster(BaseR, C); 836 } 837 }; 838 } 839 840 //===----------------------------------------------------------------------===// 841 // Binding invalidation. 842 //===----------------------------------------------------------------------===// 843 844 bool RegionStoreManager::scanReachableSymbols(Store S, const MemRegion *R, 845 ScanReachableSymbols &Callbacks) { 846 assert(R == R->getBaseRegion() && "Should only be called for base regions"); 847 RegionBindingsRef B = getRegionBindings(S); 848 const ClusterBindings *Cluster = B.lookup(R); 849 850 if (!Cluster) 851 return true; 852 853 for (ClusterBindings::iterator RI = Cluster->begin(), RE = Cluster->end(); 854 RI != RE; ++RI) { 855 if (!Callbacks.scan(RI.getData())) 856 return false; 857 } 858 859 return true; 860 } 861 862 static inline bool isUnionField(const FieldRegion *FR) { 863 return FR->getDecl()->getParent()->isUnion(); 864 } 865 866 typedef SmallVector<const FieldDecl *, 8> FieldVector; 867 868 static void getSymbolicOffsetFields(BindingKey K, FieldVector &Fields) { 869 assert(K.hasSymbolicOffset() && "Not implemented for concrete offset keys"); 870 871 const MemRegion *Base = K.getConcreteOffsetRegion(); 872 const MemRegion *R = K.getRegion(); 873 874 while (R != Base) { 875 if (const FieldRegion *FR = dyn_cast<FieldRegion>(R)) 876 if (!isUnionField(FR)) 877 Fields.push_back(FR->getDecl()); 878 879 R = cast<SubRegion>(R)->getSuperRegion(); 880 } 881 } 882 883 static bool isCompatibleWithFields(BindingKey K, const FieldVector &Fields) { 884 assert(K.hasSymbolicOffset() && "Not implemented for concrete offset keys"); 885 886 if (Fields.empty()) 887 return true; 888 889 FieldVector FieldsInBindingKey; 890 getSymbolicOffsetFields(K, FieldsInBindingKey); 891 892 ptrdiff_t Delta = FieldsInBindingKey.size() - Fields.size(); 893 if (Delta >= 0) 894 return std::equal(FieldsInBindingKey.begin() + Delta, 895 FieldsInBindingKey.end(), 896 Fields.begin()); 897 else 898 return std::equal(FieldsInBindingKey.begin(), FieldsInBindingKey.end(), 899 Fields.begin() - Delta); 900 } 901 902 /// Collects all bindings in \p Cluster that may refer to bindings within 903 /// \p Top. 904 /// 905 /// Each binding is a pair whose \c first is the key (a BindingKey) and whose 906 /// \c second is the value (an SVal). 907 /// 908 /// The \p IncludeAllDefaultBindings parameter specifies whether to include 909 /// default bindings that may extend beyond \p Top itself, e.g. if \p Top is 910 /// an aggregate within a larger aggregate with a default binding. 911 static void 912 collectSubRegionBindings(SmallVectorImpl<BindingPair> &Bindings, 913 SValBuilder &SVB, const ClusterBindings &Cluster, 914 const SubRegion *Top, BindingKey TopKey, 915 bool IncludeAllDefaultBindings) { 916 FieldVector FieldsInSymbolicSubregions; 917 if (TopKey.hasSymbolicOffset()) { 918 getSymbolicOffsetFields(TopKey, FieldsInSymbolicSubregions); 919 Top = TopKey.getConcreteOffsetRegion(); 920 TopKey = BindingKey::Make(Top, BindingKey::Default); 921 } 922 923 // Find the length (in bits) of the region being invalidated. 924 uint64_t Length = UINT64_MAX; 925 SVal Extent = Top->getMemRegionManager().getStaticSize(Top, SVB); 926 if (std::optional<nonloc::ConcreteInt> ExtentCI = 927 Extent.getAs<nonloc::ConcreteInt>()) { 928 const llvm::APSInt &ExtentInt = ExtentCI->getValue(); 929 assert(ExtentInt.isNonNegative() || ExtentInt.isUnsigned()); 930 // Extents are in bytes but region offsets are in bits. Be careful! 931 Length = ExtentInt.getLimitedValue() * SVB.getContext().getCharWidth(); 932 } else if (const FieldRegion *FR = dyn_cast<FieldRegion>(Top)) { 933 if (FR->getDecl()->isBitField()) 934 Length = FR->getDecl()->getBitWidthValue(); 935 } 936 937 for (const auto &StoreEntry : Cluster) { 938 BindingKey NextKey = StoreEntry.first; 939 if (NextKey.getRegion() == TopKey.getRegion()) { 940 // FIXME: This doesn't catch the case where we're really invalidating a 941 // region with a symbolic offset. Example: 942 // R: points[i].y 943 // Next: points[0].x 944 945 if (NextKey.getOffset() > TopKey.getOffset() && 946 NextKey.getOffset() - TopKey.getOffset() < Length) { 947 // Case 1: The next binding is inside the region we're invalidating. 948 // Include it. 949 Bindings.push_back(StoreEntry); 950 951 } else if (NextKey.getOffset() == TopKey.getOffset()) { 952 // Case 2: The next binding is at the same offset as the region we're 953 // invalidating. In this case, we need to leave default bindings alone, 954 // since they may be providing a default value for a regions beyond what 955 // we're invalidating. 956 // FIXME: This is probably incorrect; consider invalidating an outer 957 // struct whose first field is bound to a LazyCompoundVal. 958 if (IncludeAllDefaultBindings || NextKey.isDirect()) 959 Bindings.push_back(StoreEntry); 960 } 961 962 } else if (NextKey.hasSymbolicOffset()) { 963 const MemRegion *Base = NextKey.getConcreteOffsetRegion(); 964 if (Top->isSubRegionOf(Base) && Top != Base) { 965 // Case 3: The next key is symbolic and we just changed something within 966 // its concrete region. We don't know if the binding is still valid, so 967 // we'll be conservative and include it. 968 if (IncludeAllDefaultBindings || NextKey.isDirect()) 969 if (isCompatibleWithFields(NextKey, FieldsInSymbolicSubregions)) 970 Bindings.push_back(StoreEntry); 971 } else if (const SubRegion *BaseSR = dyn_cast<SubRegion>(Base)) { 972 // Case 4: The next key is symbolic, but we changed a known 973 // super-region. In this case the binding is certainly included. 974 if (BaseSR->isSubRegionOf(Top)) 975 if (isCompatibleWithFields(NextKey, FieldsInSymbolicSubregions)) 976 Bindings.push_back(StoreEntry); 977 } 978 } 979 } 980 } 981 982 static void 983 collectSubRegionBindings(SmallVectorImpl<BindingPair> &Bindings, 984 SValBuilder &SVB, const ClusterBindings &Cluster, 985 const SubRegion *Top, bool IncludeAllDefaultBindings) { 986 collectSubRegionBindings(Bindings, SVB, Cluster, Top, 987 BindingKey::Make(Top, BindingKey::Default), 988 IncludeAllDefaultBindings); 989 } 990 991 RegionBindingsRef 992 RegionStoreManager::removeSubRegionBindings(RegionBindingsConstRef B, 993 const SubRegion *Top) { 994 BindingKey TopKey = BindingKey::Make(Top, BindingKey::Default); 995 const MemRegion *ClusterHead = TopKey.getBaseRegion(); 996 997 if (Top == ClusterHead) { 998 // We can remove an entire cluster's bindings all in one go. 999 return B.remove(Top); 1000 } 1001 1002 const ClusterBindings *Cluster = B.lookup(ClusterHead); 1003 if (!Cluster) { 1004 // If we're invalidating a region with a symbolic offset, we need to make 1005 // sure we don't treat the base region as uninitialized anymore. 1006 if (TopKey.hasSymbolicOffset()) { 1007 const SubRegion *Concrete = TopKey.getConcreteOffsetRegion(); 1008 return B.addBinding(Concrete, BindingKey::Default, UnknownVal()); 1009 } 1010 return B; 1011 } 1012 1013 SmallVector<BindingPair, 32> Bindings; 1014 collectSubRegionBindings(Bindings, svalBuilder, *Cluster, Top, TopKey, 1015 /*IncludeAllDefaultBindings=*/false); 1016 1017 ClusterBindingsRef Result(*Cluster, CBFactory); 1018 for (BindingKey Key : llvm::make_first_range(Bindings)) 1019 Result = Result.remove(Key); 1020 1021 // If we're invalidating a region with a symbolic offset, we need to make sure 1022 // we don't treat the base region as uninitialized anymore. 1023 // FIXME: This isn't very precise; see the example in 1024 // collectSubRegionBindings. 1025 if (TopKey.hasSymbolicOffset()) { 1026 const SubRegion *Concrete = TopKey.getConcreteOffsetRegion(); 1027 Result = Result.add(BindingKey::Make(Concrete, BindingKey::Default), 1028 UnknownVal()); 1029 } 1030 1031 if (Result.isEmpty()) 1032 return B.remove(ClusterHead); 1033 return B.add(ClusterHead, Result.asImmutableMap()); 1034 } 1035 1036 namespace { 1037 class InvalidateRegionsWorker : public ClusterAnalysis<InvalidateRegionsWorker> 1038 { 1039 const Stmt *S; 1040 unsigned Count; 1041 const LocationContext *LCtx; 1042 InvalidatedSymbols &IS; 1043 RegionAndSymbolInvalidationTraits &ITraits; 1044 StoreManager::InvalidatedRegions *Regions; 1045 GlobalsFilterKind GlobalsFilter; 1046 public: 1047 InvalidateRegionsWorker(RegionStoreManager &rm, ProgramStateManager &stateMgr, 1048 RegionBindingsRef b, const Stmt *S, unsigned count, 1049 const LocationContext *lctx, InvalidatedSymbols &is, 1050 RegionAndSymbolInvalidationTraits &ITraitsIn, 1051 StoreManager::InvalidatedRegions *r, 1052 GlobalsFilterKind GFK) 1053 : ClusterAnalysis<InvalidateRegionsWorker>(rm, stateMgr, b), S(S), 1054 Count(count), LCtx(lctx), IS(is), ITraits(ITraitsIn), Regions(r), 1055 GlobalsFilter(GFK) {} 1056 1057 void VisitCluster(const MemRegion *baseR, const ClusterBindings *C); 1058 void VisitBinding(SVal V); 1059 1060 using ClusterAnalysis::AddToWorkList; 1061 1062 bool AddToWorkList(const MemRegion *R); 1063 1064 /// Returns true if all clusters in the memory space for \p Base should be 1065 /// be invalidated. 1066 bool includeEntireMemorySpace(const MemRegion *Base); 1067 1068 /// Returns true if the memory space of the given region is one of the global 1069 /// regions specially included at the start of invalidation. 1070 bool isInitiallyIncludedGlobalRegion(const MemRegion *R); 1071 }; 1072 } 1073 1074 bool InvalidateRegionsWorker::AddToWorkList(const MemRegion *R) { 1075 bool doNotInvalidateSuperRegion = ITraits.hasTrait( 1076 R, RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); 1077 const MemRegion *BaseR = doNotInvalidateSuperRegion ? R : R->getBaseRegion(); 1078 return AddToWorkList(WorkListElement(BaseR), getCluster(BaseR)); 1079 } 1080 1081 void InvalidateRegionsWorker::VisitBinding(SVal V) { 1082 // A symbol? Mark it touched by the invalidation. 1083 if (SymbolRef Sym = V.getAsSymbol()) 1084 IS.insert(Sym); 1085 1086 if (const MemRegion *R = V.getAsRegion()) { 1087 AddToWorkList(R); 1088 return; 1089 } 1090 1091 // Is it a LazyCompoundVal? All references get invalidated as well. 1092 if (std::optional<nonloc::LazyCompoundVal> LCS = 1093 V.getAs<nonloc::LazyCompoundVal>()) { 1094 1095 // `getInterestingValues()` returns SVals contained within LazyCompoundVals, 1096 // so there is no need to visit them. 1097 for (SVal V : RM.getInterestingValues(*LCS)) 1098 if (!isa<nonloc::LazyCompoundVal>(V)) 1099 VisitBinding(V); 1100 1101 return; 1102 } 1103 } 1104 1105 void InvalidateRegionsWorker::VisitCluster(const MemRegion *baseR, 1106 const ClusterBindings *C) { 1107 1108 bool PreserveRegionsContents = 1109 ITraits.hasTrait(baseR, 1110 RegionAndSymbolInvalidationTraits::TK_PreserveContents); 1111 1112 if (C) { 1113 for (SVal Val : llvm::make_second_range(*C)) 1114 VisitBinding(Val); 1115 1116 // Invalidate regions contents. 1117 if (!PreserveRegionsContents) 1118 B = B.remove(baseR); 1119 } 1120 1121 if (const auto *TO = dyn_cast<TypedValueRegion>(baseR)) { 1122 if (const auto *RD = TO->getValueType()->getAsCXXRecordDecl()) { 1123 1124 // Lambdas can affect all static local variables without explicitly 1125 // capturing those. 1126 // We invalidate all static locals referenced inside the lambda body. 1127 if (RD->isLambda() && RD->getLambdaCallOperator()->getBody()) { 1128 using namespace ast_matchers; 1129 1130 const char *DeclBind = "DeclBind"; 1131 StatementMatcher RefToStatic = stmt(hasDescendant(declRefExpr( 1132 to(varDecl(hasStaticStorageDuration()).bind(DeclBind))))); 1133 auto Matches = 1134 match(RefToStatic, *RD->getLambdaCallOperator()->getBody(), 1135 RD->getASTContext()); 1136 1137 for (BoundNodes &Match : Matches) { 1138 auto *VD = Match.getNodeAs<VarDecl>(DeclBind); 1139 const VarRegion *ToInvalidate = 1140 RM.getRegionManager().getVarRegion(VD, LCtx); 1141 AddToWorkList(ToInvalidate); 1142 } 1143 } 1144 } 1145 } 1146 1147 // BlockDataRegion? If so, invalidate captured variables that are passed 1148 // by reference. 1149 if (const BlockDataRegion *BR = dyn_cast<BlockDataRegion>(baseR)) { 1150 for (auto Var : BR->referenced_vars()) { 1151 const VarRegion *VR = Var.getCapturedRegion(); 1152 const VarDecl *VD = VR->getDecl(); 1153 if (VD->hasAttr<BlocksAttr>() || !VD->hasLocalStorage()) { 1154 AddToWorkList(VR); 1155 } 1156 else if (Loc::isLocType(VR->getValueType())) { 1157 // Map the current bindings to a Store to retrieve the value 1158 // of the binding. If that binding itself is a region, we should 1159 // invalidate that region. This is because a block may capture 1160 // a pointer value, but the thing pointed by that pointer may 1161 // get invalidated. 1162 SVal V = RM.getBinding(B, loc::MemRegionVal(VR)); 1163 if (std::optional<Loc> L = V.getAs<Loc>()) { 1164 if (const MemRegion *LR = L->getAsRegion()) 1165 AddToWorkList(LR); 1166 } 1167 } 1168 } 1169 return; 1170 } 1171 1172 // Symbolic region? 1173 if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(baseR)) 1174 IS.insert(SR->getSymbol()); 1175 1176 // Nothing else should be done in the case when we preserve regions context. 1177 if (PreserveRegionsContents) 1178 return; 1179 1180 // Otherwise, we have a normal data region. Record that we touched the region. 1181 if (Regions) 1182 Regions->push_back(baseR); 1183 1184 if (isa<AllocaRegion, SymbolicRegion>(baseR)) { 1185 // Invalidate the region by setting its default value to 1186 // conjured symbol. The type of the symbol is irrelevant. 1187 DefinedOrUnknownSVal V = 1188 svalBuilder.conjureSymbolVal(baseR, S, LCtx, Ctx.IntTy, Count); 1189 B = B.addBinding(baseR, BindingKey::Default, V); 1190 return; 1191 } 1192 1193 if (!baseR->isBoundable()) 1194 return; 1195 1196 const TypedValueRegion *TR = cast<TypedValueRegion>(baseR); 1197 QualType T = TR->getValueType(); 1198 1199 if (isInitiallyIncludedGlobalRegion(baseR)) { 1200 // If the region is a global and we are invalidating all globals, 1201 // erasing the entry is good enough. This causes all globals to be lazily 1202 // symbolicated from the same base symbol. 1203 return; 1204 } 1205 1206 if (T->isRecordType()) { 1207 // Invalidate the region by setting its default value to 1208 // conjured symbol. The type of the symbol is irrelevant. 1209 DefinedOrUnknownSVal V = 1210 svalBuilder.conjureSymbolVal(baseR, S, LCtx, Ctx.IntTy, Count); 1211 B = B.addBinding(baseR, BindingKey::Default, V); 1212 return; 1213 } 1214 1215 if (const ArrayType *AT = Ctx.getAsArrayType(T)) { 1216 bool doNotInvalidateSuperRegion = ITraits.hasTrait( 1217 baseR, 1218 RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); 1219 1220 if (doNotInvalidateSuperRegion) { 1221 // We are not doing blank invalidation of the whole array region so we 1222 // have to manually invalidate each elements. 1223 std::optional<uint64_t> NumElements; 1224 1225 // Compute lower and upper offsets for region within array. 1226 if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT)) 1227 NumElements = CAT->getZExtSize(); 1228 if (!NumElements) // We are not dealing with a constant size array 1229 goto conjure_default; 1230 QualType ElementTy = AT->getElementType(); 1231 uint64_t ElemSize = Ctx.getTypeSize(ElementTy); 1232 const RegionOffset &RO = baseR->getAsOffset(); 1233 const MemRegion *SuperR = baseR->getBaseRegion(); 1234 if (RO.hasSymbolicOffset()) { 1235 // If base region has a symbolic offset, 1236 // we revert to invalidating the super region. 1237 if (SuperR) 1238 AddToWorkList(SuperR); 1239 goto conjure_default; 1240 } 1241 1242 uint64_t LowerOffset = RO.getOffset(); 1243 uint64_t UpperOffset = LowerOffset + *NumElements * ElemSize; 1244 bool UpperOverflow = UpperOffset < LowerOffset; 1245 1246 // Invalidate regions which are within array boundaries, 1247 // or have a symbolic offset. 1248 if (!SuperR) 1249 goto conjure_default; 1250 1251 const ClusterBindings *C = B.lookup(SuperR); 1252 if (!C) 1253 goto conjure_default; 1254 1255 for (const auto &[BK, V] : *C) { 1256 std::optional<uint64_t> ROffset = 1257 BK.hasSymbolicOffset() ? std::optional<uint64_t>() : BK.getOffset(); 1258 1259 // Check offset is not symbolic and within array's boundaries. 1260 // Handles arrays of 0 elements and of 0-sized elements as well. 1261 if (!ROffset || 1262 ((*ROffset >= LowerOffset && *ROffset < UpperOffset) || 1263 (UpperOverflow && 1264 (*ROffset >= LowerOffset || *ROffset < UpperOffset)) || 1265 (LowerOffset == UpperOffset && *ROffset == LowerOffset))) { 1266 B = B.removeBinding(BK); 1267 // Bound symbolic regions need to be invalidated for dead symbol 1268 // detection. 1269 const MemRegion *R = V.getAsRegion(); 1270 if (isa_and_nonnull<SymbolicRegion>(R)) 1271 VisitBinding(V); 1272 } 1273 } 1274 } 1275 conjure_default: 1276 // Set the default value of the array to conjured symbol. 1277 DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal( 1278 baseR, S, LCtx, AT->getElementType(), Count); 1279 B = B.addBinding(baseR, BindingKey::Default, V); 1280 return; 1281 } 1282 1283 DefinedOrUnknownSVal V = 1284 svalBuilder.conjureSymbolVal(baseR, S, LCtx, T, Count); 1285 assert(SymbolManager::canSymbolicate(T) || V.isUnknown()); 1286 B = B.addBinding(baseR, BindingKey::Direct, V); 1287 } 1288 1289 bool InvalidateRegionsWorker::isInitiallyIncludedGlobalRegion( 1290 const MemRegion *R) { 1291 switch (GlobalsFilter) { 1292 case GFK_None: 1293 return false; 1294 case GFK_SystemOnly: 1295 return isa<GlobalSystemSpaceRegion>(R->getMemorySpace()); 1296 case GFK_All: 1297 return isa<NonStaticGlobalSpaceRegion>(R->getMemorySpace()); 1298 } 1299 1300 llvm_unreachable("unknown globals filter"); 1301 } 1302 1303 bool InvalidateRegionsWorker::includeEntireMemorySpace(const MemRegion *Base) { 1304 if (isInitiallyIncludedGlobalRegion(Base)) 1305 return true; 1306 1307 const MemSpaceRegion *MemSpace = Base->getMemorySpace(); 1308 return ITraits.hasTrait(MemSpace, 1309 RegionAndSymbolInvalidationTraits::TK_EntireMemSpace); 1310 } 1311 1312 RegionBindingsRef RegionStoreManager::invalidateGlobalRegion( 1313 MemRegion::Kind K, const Stmt *S, unsigned Count, 1314 const LocationContext *LCtx, RegionBindingsRef B, 1315 InvalidatedRegions *Invalidated) { 1316 // Bind the globals memory space to a new symbol that we will use to derive 1317 // the bindings for all globals. 1318 const GlobalsSpaceRegion *GS = MRMgr.getGlobalsRegion(K); 1319 SVal V = 1320 svalBuilder.conjureSymbolVal(/* symbolTag = */ (const void *)GS, S, LCtx, 1321 /* type does not matter */ Ctx.IntTy, Count); 1322 1323 B = B.removeBinding(GS) 1324 .addBinding(BindingKey::Make(GS, BindingKey::Default), V); 1325 1326 // Even if there are no bindings in the global scope, we still need to 1327 // record that we touched it. 1328 if (Invalidated) 1329 Invalidated->push_back(GS); 1330 1331 return B; 1332 } 1333 1334 void RegionStoreManager::populateWorkList(InvalidateRegionsWorker &W, 1335 ArrayRef<SVal> Values, 1336 InvalidatedRegions *TopLevelRegions) { 1337 for (SVal V : Values) { 1338 if (auto LCS = V.getAs<nonloc::LazyCompoundVal>()) { 1339 for (SVal S : getInterestingValues(*LCS)) 1340 if (const MemRegion *R = S.getAsRegion()) 1341 W.AddToWorkList(R); 1342 1343 continue; 1344 } 1345 1346 if (const MemRegion *R = V.getAsRegion()) { 1347 if (TopLevelRegions) 1348 TopLevelRegions->push_back(R); 1349 W.AddToWorkList(R); 1350 continue; 1351 } 1352 } 1353 } 1354 1355 StoreRef RegionStoreManager::invalidateRegions( 1356 Store store, ArrayRef<SVal> Values, const Stmt *S, unsigned Count, 1357 const LocationContext *LCtx, const CallEvent *Call, InvalidatedSymbols &IS, 1358 RegionAndSymbolInvalidationTraits &ITraits, 1359 InvalidatedRegions *TopLevelRegions, InvalidatedRegions *Invalidated) { 1360 GlobalsFilterKind GlobalsFilter; 1361 if (Call) { 1362 if (Call->isInSystemHeader()) 1363 GlobalsFilter = GFK_SystemOnly; 1364 else 1365 GlobalsFilter = GFK_All; 1366 } else { 1367 GlobalsFilter = GFK_None; 1368 } 1369 1370 RegionBindingsRef B = getRegionBindings(store); 1371 InvalidateRegionsWorker W(*this, StateMgr, B, S, Count, LCtx, IS, ITraits, 1372 Invalidated, GlobalsFilter); 1373 1374 // Scan the bindings and generate the clusters. 1375 W.GenerateClusters(); 1376 1377 // Add the regions to the worklist. 1378 populateWorkList(W, Values, TopLevelRegions); 1379 1380 W.RunWorkList(); 1381 1382 // Return the new bindings. 1383 B = W.getRegionBindings(); 1384 1385 // For calls, determine which global regions should be invalidated and 1386 // invalidate them. (Note that function-static and immutable globals are never 1387 // invalidated by this.) 1388 // TODO: This could possibly be more precise with modules. 1389 switch (GlobalsFilter) { 1390 case GFK_All: 1391 B = invalidateGlobalRegion(MemRegion::GlobalInternalSpaceRegionKind, S, 1392 Count, LCtx, B, Invalidated); 1393 [[fallthrough]]; 1394 case GFK_SystemOnly: 1395 B = invalidateGlobalRegion(MemRegion::GlobalSystemSpaceRegionKind, S, Count, 1396 LCtx, B, Invalidated); 1397 [[fallthrough]]; 1398 case GFK_None: 1399 break; 1400 } 1401 1402 return StoreRef(B.asStore(), *this); 1403 } 1404 1405 //===----------------------------------------------------------------------===// 1406 // Location and region casting. 1407 //===----------------------------------------------------------------------===// 1408 1409 /// ArrayToPointer - Emulates the "decay" of an array to a pointer 1410 /// type. 'Array' represents the lvalue of the array being decayed 1411 /// to a pointer, and the returned SVal represents the decayed 1412 /// version of that lvalue (i.e., a pointer to the first element of 1413 /// the array). This is called by ExprEngine when evaluating casts 1414 /// from arrays to pointers. 1415 SVal RegionStoreManager::ArrayToPointer(Loc Array, QualType T) { 1416 if (isa<loc::ConcreteInt>(Array)) 1417 return Array; 1418 1419 if (!isa<loc::MemRegionVal>(Array)) 1420 return UnknownVal(); 1421 1422 const SubRegion *R = 1423 cast<SubRegion>(Array.castAs<loc::MemRegionVal>().getRegion()); 1424 NonLoc ZeroIdx = svalBuilder.makeZeroArrayIndex(); 1425 return loc::MemRegionVal(MRMgr.getElementRegion(T, ZeroIdx, R, Ctx)); 1426 } 1427 1428 //===----------------------------------------------------------------------===// 1429 // Loading values from regions. 1430 //===----------------------------------------------------------------------===// 1431 1432 SVal RegionStoreManager::getBinding(RegionBindingsConstRef B, Loc L, QualType T) { 1433 assert(!isa<UnknownVal>(L) && "location unknown"); 1434 assert(!isa<UndefinedVal>(L) && "location undefined"); 1435 1436 // For access to concrete addresses, return UnknownVal. Checks 1437 // for null dereferences (and similar errors) are done by checkers, not 1438 // the Store. 1439 // FIXME: We can consider lazily symbolicating such memory, but we really 1440 // should defer this when we can reason easily about symbolicating arrays 1441 // of bytes. 1442 if (L.getAs<loc::ConcreteInt>()) { 1443 return UnknownVal(); 1444 } 1445 if (!L.getAs<loc::MemRegionVal>()) { 1446 return UnknownVal(); 1447 } 1448 1449 const MemRegion *MR = L.castAs<loc::MemRegionVal>().getRegion(); 1450 1451 if (isa<BlockDataRegion>(MR)) { 1452 return UnknownVal(); 1453 } 1454 1455 // Auto-detect the binding type. 1456 if (T.isNull()) { 1457 if (const auto *TVR = dyn_cast<TypedValueRegion>(MR)) 1458 T = TVR->getValueType(); 1459 else if (const auto *TR = dyn_cast<TypedRegion>(MR)) 1460 T = TR->getLocationType()->getPointeeType(); 1461 else if (const auto *SR = dyn_cast<SymbolicRegion>(MR)) 1462 T = SR->getPointeeStaticType(); 1463 } 1464 assert(!T.isNull() && "Unable to auto-detect binding type!"); 1465 assert(!T->isVoidType() && "Attempting to dereference a void pointer!"); 1466 1467 if (!isa<TypedValueRegion>(MR)) 1468 MR = GetElementZeroRegion(cast<SubRegion>(MR), T); 1469 1470 // FIXME: Perhaps this method should just take a 'const MemRegion*' argument 1471 // instead of 'Loc', and have the other Loc cases handled at a higher level. 1472 const TypedValueRegion *R = cast<TypedValueRegion>(MR); 1473 QualType RTy = R->getValueType(); 1474 1475 // FIXME: we do not yet model the parts of a complex type, so treat the 1476 // whole thing as "unknown". 1477 if (RTy->isAnyComplexType()) 1478 return UnknownVal(); 1479 1480 // FIXME: We should eventually handle funny addressing. e.g.: 1481 // 1482 // int x = ...; 1483 // int *p = &x; 1484 // char *q = (char*) p; 1485 // char c = *q; // returns the first byte of 'x'. 1486 // 1487 // Such funny addressing will occur due to layering of regions. 1488 if (RTy->isStructureOrClassType()) 1489 return getBindingForStruct(B, R); 1490 1491 // FIXME: Handle unions. 1492 if (RTy->isUnionType()) 1493 return createLazyBinding(B, R); 1494 1495 if (RTy->isArrayType()) { 1496 if (RTy->isConstantArrayType()) 1497 return getBindingForArray(B, R); 1498 else 1499 return UnknownVal(); 1500 } 1501 1502 // FIXME: handle Vector types. 1503 if (RTy->isVectorType()) 1504 return UnknownVal(); 1505 1506 if (const FieldRegion* FR = dyn_cast<FieldRegion>(R)) 1507 return svalBuilder.evalCast(getBindingForField(B, FR), T, QualType{}); 1508 1509 if (const ElementRegion* ER = dyn_cast<ElementRegion>(R)) { 1510 // FIXME: Here we actually perform an implicit conversion from the loaded 1511 // value to the element type. Eventually we want to compose these values 1512 // more intelligently. For example, an 'element' can encompass multiple 1513 // bound regions (e.g., several bound bytes), or could be a subset of 1514 // a larger value. 1515 return svalBuilder.evalCast(getBindingForElement(B, ER), T, QualType{}); 1516 } 1517 1518 if (const ObjCIvarRegion *IVR = dyn_cast<ObjCIvarRegion>(R)) { 1519 // FIXME: Here we actually perform an implicit conversion from the loaded 1520 // value to the ivar type. What we should model is stores to ivars 1521 // that blow past the extent of the ivar. If the address of the ivar is 1522 // reinterpretted, it is possible we stored a different value that could 1523 // fit within the ivar. Either we need to cast these when storing them 1524 // or reinterpret them lazily (as we do here). 1525 return svalBuilder.evalCast(getBindingForObjCIvar(B, IVR), T, QualType{}); 1526 } 1527 1528 if (const VarRegion *VR = dyn_cast<VarRegion>(R)) { 1529 // FIXME: Here we actually perform an implicit conversion from the loaded 1530 // value to the variable type. What we should model is stores to variables 1531 // that blow past the extent of the variable. If the address of the 1532 // variable is reinterpretted, it is possible we stored a different value 1533 // that could fit within the variable. Either we need to cast these when 1534 // storing them or reinterpret them lazily (as we do here). 1535 return svalBuilder.evalCast(getBindingForVar(B, VR), T, QualType{}); 1536 } 1537 1538 const SVal *V = B.lookup(R, BindingKey::Direct); 1539 1540 // Check if the region has a binding. 1541 if (V) 1542 return *V; 1543 1544 // The location does not have a bound value. This means that it has 1545 // the value it had upon its creation and/or entry to the analyzed 1546 // function/method. These are either symbolic values or 'undefined'. 1547 if (R->hasStackNonParametersStorage()) { 1548 // All stack variables are considered to have undefined values 1549 // upon creation. All heap allocated blocks are considered to 1550 // have undefined values as well unless they are explicitly bound 1551 // to specific values. 1552 return UndefinedVal(); 1553 } 1554 1555 // All other values are symbolic. 1556 return svalBuilder.getRegionValueSymbolVal(R); 1557 } 1558 1559 static QualType getUnderlyingType(const SubRegion *R) { 1560 QualType RegionTy; 1561 if (const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(R)) 1562 RegionTy = TVR->getValueType(); 1563 1564 if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(R)) 1565 RegionTy = SR->getSymbol()->getType(); 1566 1567 return RegionTy; 1568 } 1569 1570 /// Checks to see if store \p B has a lazy binding for region \p R. 1571 /// 1572 /// If \p AllowSubregionBindings is \c false, a lazy binding will be rejected 1573 /// if there are additional bindings within \p R. 1574 /// 1575 /// Note that unlike RegionStoreManager::findLazyBinding, this will not search 1576 /// for lazy bindings for super-regions of \p R. 1577 static std::optional<nonloc::LazyCompoundVal> 1578 getExistingLazyBinding(SValBuilder &SVB, RegionBindingsConstRef B, 1579 const SubRegion *R, bool AllowSubregionBindings) { 1580 std::optional<SVal> V = B.getDefaultBinding(R); 1581 if (!V) 1582 return std::nullopt; 1583 1584 std::optional<nonloc::LazyCompoundVal> LCV = 1585 V->getAs<nonloc::LazyCompoundVal>(); 1586 if (!LCV) 1587 return std::nullopt; 1588 1589 // If the LCV is for a subregion, the types might not match, and we shouldn't 1590 // reuse the binding. 1591 QualType RegionTy = getUnderlyingType(R); 1592 if (!RegionTy.isNull() && 1593 !RegionTy->isVoidPointerType()) { 1594 QualType SourceRegionTy = LCV->getRegion()->getValueType(); 1595 if (!SVB.getContext().hasSameUnqualifiedType(RegionTy, SourceRegionTy)) 1596 return std::nullopt; 1597 } 1598 1599 if (!AllowSubregionBindings) { 1600 // If there are any other bindings within this region, we shouldn't reuse 1601 // the top-level binding. 1602 SmallVector<BindingPair, 16> Bindings; 1603 collectSubRegionBindings(Bindings, SVB, *B.lookup(R->getBaseRegion()), R, 1604 /*IncludeAllDefaultBindings=*/true); 1605 if (Bindings.size() > 1) 1606 return std::nullopt; 1607 } 1608 1609 return *LCV; 1610 } 1611 1612 std::pair<Store, const SubRegion *> 1613 RegionStoreManager::findLazyBinding(RegionBindingsConstRef B, 1614 const SubRegion *R, 1615 const SubRegion *originalRegion) { 1616 if (originalRegion != R) { 1617 if (std::optional<nonloc::LazyCompoundVal> V = 1618 getExistingLazyBinding(svalBuilder, B, R, true)) 1619 return std::make_pair(V->getStore(), V->getRegion()); 1620 } 1621 1622 typedef std::pair<Store, const SubRegion *> StoreRegionPair; 1623 StoreRegionPair Result = StoreRegionPair(); 1624 1625 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 1626 Result = findLazyBinding(B, cast<SubRegion>(ER->getSuperRegion()), 1627 originalRegion); 1628 1629 if (Result.second) 1630 Result.second = MRMgr.getElementRegionWithSuper(ER, Result.second); 1631 1632 } else if (const FieldRegion *FR = dyn_cast<FieldRegion>(R)) { 1633 Result = findLazyBinding(B, cast<SubRegion>(FR->getSuperRegion()), 1634 originalRegion); 1635 1636 if (Result.second) 1637 Result.second = MRMgr.getFieldRegionWithSuper(FR, Result.second); 1638 1639 } else if (const CXXBaseObjectRegion *BaseReg = 1640 dyn_cast<CXXBaseObjectRegion>(R)) { 1641 // C++ base object region is another kind of region that we should blast 1642 // through to look for lazy compound value. It is like a field region. 1643 Result = findLazyBinding(B, cast<SubRegion>(BaseReg->getSuperRegion()), 1644 originalRegion); 1645 1646 if (Result.second) 1647 Result.second = MRMgr.getCXXBaseObjectRegionWithSuper(BaseReg, 1648 Result.second); 1649 } 1650 1651 return Result; 1652 } 1653 1654 /// This is a helper function for `getConstantValFromConstArrayInitializer`. 1655 /// 1656 /// Return an array of extents of the declared array type. 1657 /// 1658 /// E.g. for `int x[1][2][3];` returns { 1, 2, 3 }. 1659 static SmallVector<uint64_t, 2> 1660 getConstantArrayExtents(const ConstantArrayType *CAT) { 1661 assert(CAT && "ConstantArrayType should not be null"); 1662 CAT = cast<ConstantArrayType>(CAT->getCanonicalTypeInternal()); 1663 SmallVector<uint64_t, 2> Extents; 1664 do { 1665 Extents.push_back(CAT->getZExtSize()); 1666 } while ((CAT = dyn_cast<ConstantArrayType>(CAT->getElementType()))); 1667 return Extents; 1668 } 1669 1670 /// This is a helper function for `getConstantValFromConstArrayInitializer`. 1671 /// 1672 /// Return an array of offsets from nested ElementRegions and a root base 1673 /// region. The array is never empty and a base region is never null. 1674 /// 1675 /// E.g. for `Element{Element{Element{VarRegion},1},2},3}` returns { 3, 2, 1 }. 1676 /// This represents an access through indirection: `arr[1][2][3];` 1677 /// 1678 /// \param ER The given (possibly nested) ElementRegion. 1679 /// 1680 /// \note The result array is in the reverse order of indirection expression: 1681 /// arr[1][2][3] -> { 3, 2, 1 }. This helps to provide complexity O(n), where n 1682 /// is a number of indirections. It may not affect performance in real-life 1683 /// code, though. 1684 static std::pair<SmallVector<SVal, 2>, const MemRegion *> 1685 getElementRegionOffsetsWithBase(const ElementRegion *ER) { 1686 assert(ER && "ConstantArrayType should not be null"); 1687 const MemRegion *Base; 1688 SmallVector<SVal, 2> SValOffsets; 1689 do { 1690 SValOffsets.push_back(ER->getIndex()); 1691 Base = ER->getSuperRegion(); 1692 ER = dyn_cast<ElementRegion>(Base); 1693 } while (ER); 1694 return {SValOffsets, Base}; 1695 } 1696 1697 /// This is a helper function for `getConstantValFromConstArrayInitializer`. 1698 /// 1699 /// Convert array of offsets from `SVal` to `uint64_t` in consideration of 1700 /// respective array extents. 1701 /// \param SrcOffsets [in] The array of offsets of type `SVal` in reversed 1702 /// order (expectedly received from `getElementRegionOffsetsWithBase`). 1703 /// \param ArrayExtents [in] The array of extents. 1704 /// \param DstOffsets [out] The array of offsets of type `uint64_t`. 1705 /// \returns: 1706 /// - `std::nullopt` for successful convertion. 1707 /// - `UndefinedVal` or `UnknownVal` otherwise. It's expected that this SVal 1708 /// will be returned as a suitable value of the access operation. 1709 /// which should be returned as a correct 1710 /// 1711 /// \example: 1712 /// const int arr[10][20][30] = {}; // ArrayExtents { 10, 20, 30 } 1713 /// int x1 = arr[4][5][6]; // SrcOffsets { NonLoc(6), NonLoc(5), NonLoc(4) } 1714 /// // DstOffsets { 4, 5, 6 } 1715 /// // returns std::nullopt 1716 /// int x2 = arr[42][5][-6]; // returns UndefinedVal 1717 /// int x3 = arr[4][5][x2]; // returns UnknownVal 1718 static std::optional<SVal> 1719 convertOffsetsFromSvalToUnsigneds(const SmallVector<SVal, 2> &SrcOffsets, 1720 const SmallVector<uint64_t, 2> ArrayExtents, 1721 SmallVector<uint64_t, 2> &DstOffsets) { 1722 // Check offsets for being out of bounds. 1723 // C++20 [expr.add] 7.6.6.4 (excerpt): 1724 // If P points to an array element i of an array object x with n 1725 // elements, where i < 0 or i > n, the behavior is undefined. 1726 // Dereferencing is not allowed on the "one past the last 1727 // element", when i == n. 1728 // Example: 1729 // const int arr[3][2] = {{1, 2}, {3, 4}}; 1730 // arr[0][0]; // 1 1731 // arr[0][1]; // 2 1732 // arr[0][2]; // UB 1733 // arr[1][0]; // 3 1734 // arr[1][1]; // 4 1735 // arr[1][-1]; // UB 1736 // arr[2][0]; // 0 1737 // arr[2][1]; // 0 1738 // arr[-2][0]; // UB 1739 DstOffsets.resize(SrcOffsets.size()); 1740 auto ExtentIt = ArrayExtents.begin(); 1741 auto OffsetIt = DstOffsets.begin(); 1742 // Reverse `SValOffsets` to make it consistent with `ArrayExtents`. 1743 for (SVal V : llvm::reverse(SrcOffsets)) { 1744 if (auto CI = V.getAs<nonloc::ConcreteInt>()) { 1745 // When offset is out of array's bounds, result is UB. 1746 const llvm::APSInt &Offset = CI->getValue(); 1747 if (Offset.isNegative() || Offset.uge(*(ExtentIt++))) 1748 return UndefinedVal(); 1749 // Store index in a reversive order. 1750 *(OffsetIt++) = Offset.getZExtValue(); 1751 continue; 1752 } 1753 // Symbolic index presented. Return Unknown value. 1754 // FIXME: We also need to take ElementRegions with symbolic indexes into 1755 // account. 1756 return UnknownVal(); 1757 } 1758 return std::nullopt; 1759 } 1760 1761 std::optional<SVal> RegionStoreManager::getConstantValFromConstArrayInitializer( 1762 RegionBindingsConstRef B, const ElementRegion *R) { 1763 assert(R && "ElementRegion should not be null"); 1764 1765 // Treat an n-dimensional array. 1766 SmallVector<SVal, 2> SValOffsets; 1767 const MemRegion *Base; 1768 std::tie(SValOffsets, Base) = getElementRegionOffsetsWithBase(R); 1769 const VarRegion *VR = dyn_cast<VarRegion>(Base); 1770 if (!VR) 1771 return std::nullopt; 1772 1773 assert(!SValOffsets.empty() && "getElementRegionOffsets guarantees the " 1774 "offsets vector is not empty."); 1775 1776 // Check if the containing array has an initialized value that we can trust. 1777 // We can trust a const value or a value of a global initializer in main(). 1778 const VarDecl *VD = VR->getDecl(); 1779 if (!VD->getType().isConstQualified() && 1780 !R->getElementType().isConstQualified() && 1781 (!B.isMainAnalysis() || !VD->hasGlobalStorage())) 1782 return std::nullopt; 1783 1784 // Array's declaration should have `ConstantArrayType` type, because only this 1785 // type contains an array extent. It may happen that array type can be of 1786 // `IncompleteArrayType` type. To get the declaration of `ConstantArrayType` 1787 // type, we should find the declaration in the redeclarations chain that has 1788 // the initialization expression. 1789 // NOTE: `getAnyInitializer` has an out-parameter, which returns a new `VD` 1790 // from which an initializer is obtained. We replace current `VD` with the new 1791 // `VD`. If the return value of the function is null than `VD` won't be 1792 // replaced. 1793 const Expr *Init = VD->getAnyInitializer(VD); 1794 // NOTE: If `Init` is non-null, then a new `VD` is non-null for sure. So check 1795 // `Init` for null only and don't worry about the replaced `VD`. 1796 if (!Init) 1797 return std::nullopt; 1798 1799 // Array's declaration should have ConstantArrayType type, because only this 1800 // type contains an array extent. 1801 const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(VD->getType()); 1802 if (!CAT) 1803 return std::nullopt; 1804 1805 // Get array extents. 1806 SmallVector<uint64_t, 2> Extents = getConstantArrayExtents(CAT); 1807 1808 // The number of offsets should equal to the numbers of extents, 1809 // otherwise wrong type punning occurred. For instance: 1810 // int arr[1][2][3]; 1811 // auto ptr = (int(*)[42])arr; 1812 // auto x = ptr[4][2]; // UB 1813 // FIXME: Should return UndefinedVal. 1814 if (SValOffsets.size() != Extents.size()) 1815 return std::nullopt; 1816 1817 SmallVector<uint64_t, 2> ConcreteOffsets; 1818 if (std::optional<SVal> V = convertOffsetsFromSvalToUnsigneds( 1819 SValOffsets, Extents, ConcreteOffsets)) 1820 return *V; 1821 1822 // Handle InitListExpr. 1823 // Example: 1824 // const char arr[4][2] = { { 1, 2 }, { 3 }, 4, 5 }; 1825 if (const auto *ILE = dyn_cast<InitListExpr>(Init)) 1826 return getSValFromInitListExpr(ILE, ConcreteOffsets, R->getElementType()); 1827 1828 // Handle StringLiteral. 1829 // Example: 1830 // const char arr[] = "abc"; 1831 if (const auto *SL = dyn_cast<StringLiteral>(Init)) 1832 return getSValFromStringLiteral(SL, ConcreteOffsets.front(), 1833 R->getElementType()); 1834 1835 // FIXME: Handle CompoundLiteralExpr. 1836 1837 return std::nullopt; 1838 } 1839 1840 /// Returns an SVal, if possible, for the specified position of an 1841 /// initialization list. 1842 /// 1843 /// \param ILE The given initialization list. 1844 /// \param Offsets The array of unsigned offsets. E.g. for the expression 1845 /// `int x = arr[1][2][3];` an array should be { 1, 2, 3 }. 1846 /// \param ElemT The type of the result SVal expression. 1847 /// \return Optional SVal for the particular position in the initialization 1848 /// list. E.g. for the list `{{1, 2},[3, 4],{5, 6}, {}}` offsets: 1849 /// - {1, 1} returns SVal{4}, because it's the second position in the second 1850 /// sublist; 1851 /// - {3, 0} returns SVal{0}, because there's no explicit value at this 1852 /// position in the sublist. 1853 /// 1854 /// NOTE: Inorder to get a valid SVal, a caller shall guarantee valid offsets 1855 /// for the given initialization list. Otherwise SVal can be an equivalent to 0 1856 /// or lead to assertion. 1857 std::optional<SVal> RegionStoreManager::getSValFromInitListExpr( 1858 const InitListExpr *ILE, const SmallVector<uint64_t, 2> &Offsets, 1859 QualType ElemT) { 1860 assert(ILE && "InitListExpr should not be null"); 1861 1862 for (uint64_t Offset : Offsets) { 1863 // C++20 [dcl.init.string] 9.4.2.1: 1864 // An array of ordinary character type [...] can be initialized by [...] 1865 // an appropriately-typed string-literal enclosed in braces. 1866 // Example: 1867 // const char arr[] = { "abc" }; 1868 if (ILE->isStringLiteralInit()) 1869 if (const auto *SL = dyn_cast<StringLiteral>(ILE->getInit(0))) 1870 return getSValFromStringLiteral(SL, Offset, ElemT); 1871 1872 // C++20 [expr.add] 9.4.17.5 (excerpt): 1873 // i-th array element is value-initialized for each k < i ≤ n, 1874 // where k is an expression-list size and n is an array extent. 1875 if (Offset >= ILE->getNumInits()) 1876 return svalBuilder.makeZeroVal(ElemT); 1877 1878 const Expr *E = ILE->getInit(Offset); 1879 const auto *IL = dyn_cast<InitListExpr>(E); 1880 if (!IL) 1881 // Return a constant value, if it is presented. 1882 // FIXME: Support other SVals. 1883 return svalBuilder.getConstantVal(E); 1884 1885 // Go to the nested initializer list. 1886 ILE = IL; 1887 } 1888 1889 assert(ILE); 1890 1891 // FIXME: Unhandeled InitListExpr sub-expression, possibly constructing an 1892 // enum? 1893 return std::nullopt; 1894 } 1895 1896 /// Returns an SVal, if possible, for the specified position in a string 1897 /// literal. 1898 /// 1899 /// \param SL The given string literal. 1900 /// \param Offset The unsigned offset. E.g. for the expression 1901 /// `char x = str[42];` an offset should be 42. 1902 /// E.g. for the string "abc" offset: 1903 /// - 1 returns SVal{b}, because it's the second position in the string. 1904 /// - 42 returns SVal{0}, because there's no explicit value at this 1905 /// position in the string. 1906 /// \param ElemT The type of the result SVal expression. 1907 /// 1908 /// NOTE: We return `0` for every offset >= the literal length for array 1909 /// declarations, like: 1910 /// const char str[42] = "123"; // Literal length is 4. 1911 /// char c = str[41]; // Offset is 41. 1912 /// FIXME: Nevertheless, we can't do the same for pointer declaraions, like: 1913 /// const char * const str = "123"; // Literal length is 4. 1914 /// char c = str[41]; // Offset is 41. Returns `0`, but Undef 1915 /// // expected. 1916 /// It should be properly handled before reaching this point. 1917 /// The main problem is that we can't distinguish between these declarations, 1918 /// because in case of array we can get the Decl from VarRegion, but in case 1919 /// of pointer the region is a StringRegion, which doesn't contain a Decl. 1920 /// Possible solution could be passing an array extent along with the offset. 1921 SVal RegionStoreManager::getSValFromStringLiteral(const StringLiteral *SL, 1922 uint64_t Offset, 1923 QualType ElemT) { 1924 assert(SL && "StringLiteral should not be null"); 1925 // C++20 [dcl.init.string] 9.4.2.3: 1926 // If there are fewer initializers than there are array elements, each 1927 // element not explicitly initialized shall be zero-initialized [dcl.init]. 1928 uint32_t Code = (Offset >= SL->getLength()) ? 0 : SL->getCodeUnit(Offset); 1929 return svalBuilder.makeIntVal(Code, ElemT); 1930 } 1931 1932 static std::optional<SVal> getDerivedSymbolForBinding( 1933 RegionBindingsConstRef B, const TypedValueRegion *BaseRegion, 1934 const TypedValueRegion *SubReg, const ASTContext &Ctx, SValBuilder &SVB) { 1935 assert(BaseRegion); 1936 QualType BaseTy = BaseRegion->getValueType(); 1937 QualType Ty = SubReg->getValueType(); 1938 if (BaseTy->isScalarType() && Ty->isScalarType()) { 1939 if (Ctx.getTypeSizeInChars(BaseTy) >= Ctx.getTypeSizeInChars(Ty)) { 1940 if (const std::optional<SVal> &ParentValue = 1941 B.getDirectBinding(BaseRegion)) { 1942 if (SymbolRef ParentValueAsSym = ParentValue->getAsSymbol()) 1943 return SVB.getDerivedRegionValueSymbolVal(ParentValueAsSym, SubReg); 1944 1945 if (ParentValue->isUndef()) 1946 return UndefinedVal(); 1947 1948 // Other cases: give up. We are indexing into a larger object 1949 // that has some value, but we don't know how to handle that yet. 1950 return UnknownVal(); 1951 } 1952 } 1953 } 1954 return std::nullopt; 1955 } 1956 1957 SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B, 1958 const ElementRegion* R) { 1959 // Check if the region has a binding. 1960 if (const std::optional<SVal> &V = B.getDirectBinding(R)) 1961 return *V; 1962 1963 const MemRegion* superR = R->getSuperRegion(); 1964 1965 // Check if the region is an element region of a string literal. 1966 if (const StringRegion *StrR = dyn_cast<StringRegion>(superR)) { 1967 // FIXME: Handle loads from strings where the literal is treated as 1968 // an integer, e.g., *((unsigned int*)"hello"). Such loads are UB according 1969 // to C++20 7.2.1.11 [basic.lval]. 1970 QualType T = Ctx.getAsArrayType(StrR->getValueType())->getElementType(); 1971 if (!Ctx.hasSameUnqualifiedType(T, R->getElementType())) 1972 return UnknownVal(); 1973 if (const auto CI = R->getIndex().getAs<nonloc::ConcreteInt>()) { 1974 const llvm::APSInt &Idx = CI->getValue(); 1975 if (Idx < 0) 1976 return UndefinedVal(); 1977 const StringLiteral *SL = StrR->getStringLiteral(); 1978 return getSValFromStringLiteral(SL, Idx.getZExtValue(), T); 1979 } 1980 } else if (isa<ElementRegion, VarRegion>(superR)) { 1981 if (std::optional<SVal> V = getConstantValFromConstArrayInitializer(B, R)) 1982 return *V; 1983 } 1984 1985 // Check for loads from a code text region. For such loads, just give up. 1986 if (isa<CodeTextRegion>(superR)) 1987 return UnknownVal(); 1988 1989 // Handle the case where we are indexing into a larger scalar object. 1990 // For example, this handles: 1991 // int x = ... 1992 // char *y = &x; 1993 // return *y; 1994 // FIXME: This is a hack, and doesn't do anything really intelligent yet. 1995 const RegionRawOffset &O = R->getAsArrayOffset(); 1996 1997 // If we cannot reason about the offset, return an unknown value. 1998 if (!O.getRegion()) 1999 return UnknownVal(); 2000 2001 if (const TypedValueRegion *baseR = dyn_cast<TypedValueRegion>(O.getRegion())) 2002 if (auto V = getDerivedSymbolForBinding(B, baseR, R, Ctx, svalBuilder)) 2003 return *V; 2004 2005 return getBindingForFieldOrElementCommon(B, R, R->getElementType()); 2006 } 2007 2008 SVal RegionStoreManager::getBindingForField(RegionBindingsConstRef B, 2009 const FieldRegion* R) { 2010 2011 // Check if the region has a binding. 2012 if (const std::optional<SVal> &V = B.getDirectBinding(R)) 2013 return *V; 2014 2015 // If the containing record was initialized, try to get its constant value. 2016 const FieldDecl *FD = R->getDecl(); 2017 QualType Ty = FD->getType(); 2018 const MemRegion* superR = R->getSuperRegion(); 2019 if (const auto *VR = dyn_cast<VarRegion>(superR)) { 2020 const VarDecl *VD = VR->getDecl(); 2021 QualType RecordVarTy = VD->getType(); 2022 unsigned Index = FD->getFieldIndex(); 2023 // Either the record variable or the field has an initializer that we can 2024 // trust. We trust initializers of constants and, additionally, respect 2025 // initializers of globals when analyzing main(). 2026 if (RecordVarTy.isConstQualified() || Ty.isConstQualified() || 2027 (B.isMainAnalysis() && VD->hasGlobalStorage())) 2028 if (const Expr *Init = VD->getAnyInitializer()) 2029 if (const auto *InitList = dyn_cast<InitListExpr>(Init)) { 2030 if (Index < InitList->getNumInits()) { 2031 if (const Expr *FieldInit = InitList->getInit(Index)) 2032 if (std::optional<SVal> V = svalBuilder.getConstantVal(FieldInit)) 2033 return *V; 2034 } else { 2035 return svalBuilder.makeZeroVal(Ty); 2036 } 2037 } 2038 } 2039 2040 // Handle the case where we are accessing into a larger scalar object. 2041 // For example, this handles: 2042 // struct header { 2043 // unsigned a : 1; 2044 // unsigned b : 1; 2045 // }; 2046 // struct parse_t { 2047 // unsigned bits0 : 1; 2048 // unsigned bits2 : 2; // <-- header 2049 // unsigned bits4 : 4; 2050 // }; 2051 // int parse(parse_t *p) { 2052 // unsigned copy = p->bits2; 2053 // header *bits = (header *)© 2054 // return bits->b; <-- here 2055 // } 2056 if (const auto *Base = dyn_cast<TypedValueRegion>(R->getBaseRegion())) 2057 if (auto V = getDerivedSymbolForBinding(B, Base, R, Ctx, svalBuilder)) 2058 return *V; 2059 2060 return getBindingForFieldOrElementCommon(B, R, Ty); 2061 } 2062 2063 std::optional<SVal> RegionStoreManager::getBindingForDerivedDefaultValue( 2064 RegionBindingsConstRef B, const MemRegion *superR, 2065 const TypedValueRegion *R, QualType Ty) { 2066 2067 if (const std::optional<SVal> &D = B.getDefaultBinding(superR)) { 2068 SVal val = *D; 2069 if (SymbolRef parentSym = val.getAsSymbol()) 2070 return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R); 2071 2072 if (val.isZeroConstant()) 2073 return svalBuilder.makeZeroVal(Ty); 2074 2075 if (val.isUnknownOrUndef()) 2076 return val; 2077 2078 // Lazy bindings are usually handled through getExistingLazyBinding(). 2079 // We should unify these two code paths at some point. 2080 if (isa<nonloc::LazyCompoundVal, nonloc::CompoundVal>(val)) 2081 return val; 2082 2083 llvm_unreachable("Unknown default value"); 2084 } 2085 2086 return std::nullopt; 2087 } 2088 2089 SVal RegionStoreManager::getLazyBinding(const SubRegion *LazyBindingRegion, 2090 RegionBindingsRef LazyBinding) { 2091 SVal Result; 2092 if (const ElementRegion *ER = dyn_cast<ElementRegion>(LazyBindingRegion)) 2093 Result = getBindingForElement(LazyBinding, ER); 2094 else 2095 Result = getBindingForField(LazyBinding, 2096 cast<FieldRegion>(LazyBindingRegion)); 2097 2098 // FIXME: This is a hack to deal with RegionStore's inability to distinguish a 2099 // default value for /part/ of an aggregate from a default value for the 2100 // /entire/ aggregate. The most common case of this is when struct Outer 2101 // has as its first member a struct Inner, which is copied in from a stack 2102 // variable. In this case, even if the Outer's default value is symbolic, 0, 2103 // or unknown, it gets overridden by the Inner's default value of undefined. 2104 // 2105 // This is a general problem -- if the Inner is zero-initialized, the Outer 2106 // will now look zero-initialized. The proper way to solve this is with a 2107 // new version of RegionStore that tracks the extent of a binding as well 2108 // as the offset. 2109 // 2110 // This hack only takes care of the undefined case because that can very 2111 // quickly result in a warning. 2112 if (Result.isUndef()) 2113 Result = UnknownVal(); 2114 2115 return Result; 2116 } 2117 2118 SVal 2119 RegionStoreManager::getBindingForFieldOrElementCommon(RegionBindingsConstRef B, 2120 const TypedValueRegion *R, 2121 QualType Ty) { 2122 2123 // At this point we have already checked in either getBindingForElement or 2124 // getBindingForField if 'R' has a direct binding. 2125 2126 // Lazy binding? 2127 Store lazyBindingStore = nullptr; 2128 const SubRegion *lazyBindingRegion = nullptr; 2129 std::tie(lazyBindingStore, lazyBindingRegion) = findLazyBinding(B, R, R); 2130 if (lazyBindingRegion) 2131 return getLazyBinding(lazyBindingRegion, 2132 getRegionBindings(lazyBindingStore)); 2133 2134 // Record whether or not we see a symbolic index. That can completely 2135 // be out of scope of our lookup. 2136 bool hasSymbolicIndex = false; 2137 2138 // FIXME: This is a hack to deal with RegionStore's inability to distinguish a 2139 // default value for /part/ of an aggregate from a default value for the 2140 // /entire/ aggregate. The most common case of this is when struct Outer 2141 // has as its first member a struct Inner, which is copied in from a stack 2142 // variable. In this case, even if the Outer's default value is symbolic, 0, 2143 // or unknown, it gets overridden by the Inner's default value of undefined. 2144 // 2145 // This is a general problem -- if the Inner is zero-initialized, the Outer 2146 // will now look zero-initialized. The proper way to solve this is with a 2147 // new version of RegionStore that tracks the extent of a binding as well 2148 // as the offset. 2149 // 2150 // This hack only takes care of the undefined case because that can very 2151 // quickly result in a warning. 2152 bool hasPartialLazyBinding = false; 2153 2154 const SubRegion *SR = R; 2155 while (SR) { 2156 const MemRegion *Base = SR->getSuperRegion(); 2157 if (std::optional<SVal> D = 2158 getBindingForDerivedDefaultValue(B, Base, R, Ty)) { 2159 if (D->getAs<nonloc::LazyCompoundVal>()) { 2160 hasPartialLazyBinding = true; 2161 break; 2162 } 2163 2164 return *D; 2165 } 2166 2167 if (const ElementRegion *ER = dyn_cast<ElementRegion>(Base)) { 2168 NonLoc index = ER->getIndex(); 2169 if (!index.isConstant()) 2170 hasSymbolicIndex = true; 2171 } 2172 2173 // If our super region is a field or element itself, walk up the region 2174 // hierarchy to see if there is a default value installed in an ancestor. 2175 SR = dyn_cast<SubRegion>(Base); 2176 } 2177 2178 if (R->hasStackNonParametersStorage()) { 2179 if (isa<ElementRegion>(R)) { 2180 // Currently we don't reason specially about Clang-style vectors. Check 2181 // if superR is a vector and if so return Unknown. 2182 if (const TypedValueRegion *typedSuperR = 2183 dyn_cast<TypedValueRegion>(R->getSuperRegion())) { 2184 if (typedSuperR->getValueType()->isVectorType()) 2185 return UnknownVal(); 2186 } 2187 } 2188 2189 // FIXME: We also need to take ElementRegions with symbolic indexes into 2190 // account. This case handles both directly accessing an ElementRegion 2191 // with a symbolic offset, but also fields within an element with 2192 // a symbolic offset. 2193 if (hasSymbolicIndex) 2194 return UnknownVal(); 2195 2196 // Additionally allow introspection of a block's internal layout. 2197 // Try to get direct binding if all other attempts failed thus far. 2198 // Else, return UndefinedVal() 2199 if (!hasPartialLazyBinding && !isa<BlockDataRegion>(R->getBaseRegion())) { 2200 if (const std::optional<SVal> &V = B.getDefaultBinding(R)) 2201 return *V; 2202 return UndefinedVal(); 2203 } 2204 } 2205 2206 // All other values are symbolic. 2207 return svalBuilder.getRegionValueSymbolVal(R); 2208 } 2209 2210 SVal RegionStoreManager::getBindingForObjCIvar(RegionBindingsConstRef B, 2211 const ObjCIvarRegion* R) { 2212 // Check if the region has a binding. 2213 if (const std::optional<SVal> &V = B.getDirectBinding(R)) 2214 return *V; 2215 2216 const MemRegion *superR = R->getSuperRegion(); 2217 2218 // Check if the super region has a default binding. 2219 if (const std::optional<SVal> &V = B.getDefaultBinding(superR)) { 2220 if (SymbolRef parentSym = V->getAsSymbol()) 2221 return svalBuilder.getDerivedRegionValueSymbolVal(parentSym, R); 2222 2223 // Other cases: give up. 2224 return UnknownVal(); 2225 } 2226 2227 return getBindingForLazySymbol(R); 2228 } 2229 2230 SVal RegionStoreManager::getBindingForVar(RegionBindingsConstRef B, 2231 const VarRegion *R) { 2232 2233 // Check if the region has a binding. 2234 if (std::optional<SVal> V = B.getDirectBinding(R)) 2235 return *V; 2236 2237 if (std::optional<SVal> V = B.getDefaultBinding(R)) 2238 return *V; 2239 2240 // Lazily derive a value for the VarRegion. 2241 const VarDecl *VD = R->getDecl(); 2242 const MemSpaceRegion *MS = R->getMemorySpace(); 2243 2244 // Arguments are always symbolic. 2245 if (isa<StackArgumentsSpaceRegion>(MS)) 2246 return svalBuilder.getRegionValueSymbolVal(R); 2247 2248 // Is 'VD' declared constant? If so, retrieve the constant value. 2249 if (VD->getType().isConstQualified()) { 2250 if (const Expr *Init = VD->getAnyInitializer()) { 2251 if (std::optional<SVal> V = svalBuilder.getConstantVal(Init)) 2252 return *V; 2253 2254 // If the variable is const qualified and has an initializer but 2255 // we couldn't evaluate initializer to a value, treat the value as 2256 // unknown. 2257 return UnknownVal(); 2258 } 2259 } 2260 2261 // This must come after the check for constants because closure-captured 2262 // constant variables may appear in UnknownSpaceRegion. 2263 if (isa<UnknownSpaceRegion>(MS)) 2264 return svalBuilder.getRegionValueSymbolVal(R); 2265 2266 if (isa<GlobalsSpaceRegion>(MS)) { 2267 QualType T = VD->getType(); 2268 2269 // If we're in main(), then global initializers have not become stale yet. 2270 if (B.isMainAnalysis()) 2271 if (const Expr *Init = VD->getAnyInitializer()) 2272 if (std::optional<SVal> V = svalBuilder.getConstantVal(Init)) 2273 return *V; 2274 2275 // Function-scoped static variables are default-initialized to 0; if they 2276 // have an initializer, it would have been processed by now. 2277 // FIXME: This is only true when we're starting analysis from main(). 2278 // We're losing a lot of coverage here. 2279 if (isa<StaticGlobalSpaceRegion>(MS)) 2280 return svalBuilder.makeZeroVal(T); 2281 2282 if (std::optional<SVal> V = getBindingForDerivedDefaultValue(B, MS, R, T)) { 2283 assert(!V->getAs<nonloc::LazyCompoundVal>()); 2284 return *V; 2285 } 2286 2287 return svalBuilder.getRegionValueSymbolVal(R); 2288 } 2289 2290 return UndefinedVal(); 2291 } 2292 2293 SVal RegionStoreManager::getBindingForLazySymbol(const TypedValueRegion *R) { 2294 // All other values are symbolic. 2295 return svalBuilder.getRegionValueSymbolVal(R); 2296 } 2297 2298 const RegionStoreManager::SValListTy & 2299 RegionStoreManager::getInterestingValues(nonloc::LazyCompoundVal LCV) { 2300 // First, check the cache. 2301 LazyBindingsMapTy::iterator I = LazyBindingsMap.find(LCV.getCVData()); 2302 if (I != LazyBindingsMap.end()) 2303 return I->second; 2304 2305 // If we don't have a list of values cached, start constructing it. 2306 SValListTy List; 2307 2308 const SubRegion *LazyR = LCV.getRegion(); 2309 RegionBindingsRef B = getRegionBindings(LCV.getStore()); 2310 2311 // If this region had /no/ bindings at the time, there are no interesting 2312 // values to return. 2313 const ClusterBindings *Cluster = B.lookup(LazyR->getBaseRegion()); 2314 if (!Cluster) 2315 return (LazyBindingsMap[LCV.getCVData()] = std::move(List)); 2316 2317 SmallVector<BindingPair, 32> Bindings; 2318 collectSubRegionBindings(Bindings, svalBuilder, *Cluster, LazyR, 2319 /*IncludeAllDefaultBindings=*/true); 2320 for (SVal V : llvm::make_second_range(Bindings)) { 2321 if (V.isUnknownOrUndef() || V.isConstant()) 2322 continue; 2323 2324 if (auto InnerLCV = V.getAs<nonloc::LazyCompoundVal>()) { 2325 const SValListTy &InnerList = getInterestingValues(*InnerLCV); 2326 List.insert(List.end(), InnerList.begin(), InnerList.end()); 2327 } 2328 2329 List.push_back(V); 2330 } 2331 2332 return (LazyBindingsMap[LCV.getCVData()] = std::move(List)); 2333 } 2334 2335 NonLoc RegionStoreManager::createLazyBinding(RegionBindingsConstRef B, 2336 const TypedValueRegion *R) { 2337 if (std::optional<nonloc::LazyCompoundVal> V = 2338 getExistingLazyBinding(svalBuilder, B, R, false)) 2339 return *V; 2340 2341 return svalBuilder.makeLazyCompoundVal(StoreRef(B.asStore(), *this), R); 2342 } 2343 2344 SVal RegionStoreManager::getBindingForStruct(RegionBindingsConstRef B, 2345 const TypedValueRegion *R) { 2346 const RecordDecl *RD = R->getValueType()->castAs<RecordType>()->getDecl(); 2347 if (!RD->getDefinition()) 2348 return UnknownVal(); 2349 2350 // We also create a LCV for copying empty structs because then the store 2351 // behavior doesn't depend on the struct layout. 2352 // This way even an empty struct can carry taint, no matter if creduce drops 2353 // the last field member or not. 2354 2355 // Try to avoid creating a LCV if it would anyways just refer to a single 2356 // default binding. 2357 if (std::optional<SVal> Val = getUniqueDefaultBinding(B, R)) 2358 return *Val; 2359 return createLazyBinding(B, R); 2360 } 2361 2362 SVal RegionStoreManager::getBindingForArray(RegionBindingsConstRef B, 2363 const TypedValueRegion *R) { 2364 assert(Ctx.getAsConstantArrayType(R->getValueType()) && 2365 "Only constant array types can have compound bindings."); 2366 2367 return createLazyBinding(B, R); 2368 } 2369 2370 bool RegionStoreManager::includedInBindings(Store store, 2371 const MemRegion *region) const { 2372 RegionBindingsRef B = getRegionBindings(store); 2373 region = region->getBaseRegion(); 2374 2375 // Quick path: if the base is the head of a cluster, the region is live. 2376 if (B.lookup(region)) 2377 return true; 2378 2379 // Slow path: if the region is the VALUE of any binding, it is live. 2380 for (RegionBindingsRef::iterator RI = B.begin(), RE = B.end(); RI != RE; ++RI) { 2381 const ClusterBindings &Cluster = RI.getData(); 2382 for (ClusterBindings::iterator CI = Cluster.begin(), CE = Cluster.end(); 2383 CI != CE; ++CI) { 2384 SVal D = CI.getData(); 2385 if (const MemRegion *R = D.getAsRegion()) 2386 if (R->getBaseRegion() == region) 2387 return true; 2388 } 2389 } 2390 2391 return false; 2392 } 2393 2394 //===----------------------------------------------------------------------===// 2395 // Binding values to regions. 2396 //===----------------------------------------------------------------------===// 2397 2398 StoreRef RegionStoreManager::killBinding(Store ST, Loc L) { 2399 if (std::optional<loc::MemRegionVal> LV = L.getAs<loc::MemRegionVal>()) 2400 if (const MemRegion* R = LV->getRegion()) 2401 return StoreRef(getRegionBindings(ST).removeBinding(R) 2402 .asImmutableMap() 2403 .getRootWithoutRetain(), 2404 *this); 2405 2406 return StoreRef(ST, *this); 2407 } 2408 2409 RegionBindingsRef 2410 RegionStoreManager::bind(RegionBindingsConstRef B, Loc L, SVal V) { 2411 // We only care about region locations. 2412 auto MemRegVal = L.getAs<loc::MemRegionVal>(); 2413 if (!MemRegVal) 2414 return B; 2415 2416 const MemRegion *R = MemRegVal->getRegion(); 2417 2418 // Check if the region is a struct region. 2419 if (const TypedValueRegion* TR = dyn_cast<TypedValueRegion>(R)) { 2420 QualType Ty = TR->getValueType(); 2421 if (Ty->isArrayType()) 2422 return bindArray(B, TR, V); 2423 if (Ty->isStructureOrClassType()) 2424 return bindStruct(B, TR, V); 2425 if (Ty->isVectorType()) 2426 return bindVector(B, TR, V); 2427 if (Ty->isUnionType()) 2428 return bindAggregate(B, TR, V); 2429 } 2430 2431 // Binding directly to a symbolic region should be treated as binding 2432 // to element 0. 2433 if (const auto *SymReg = dyn_cast<SymbolicRegion>(R)) { 2434 QualType Ty = SymReg->getPointeeStaticType(); 2435 if (Ty->isVoidType()) 2436 Ty = StateMgr.getContext().CharTy; 2437 R = GetElementZeroRegion(SymReg, Ty); 2438 } 2439 2440 assert((!isa<CXXThisRegion>(R) || !B.lookup(R)) && 2441 "'this' pointer is not an l-value and is not assignable"); 2442 2443 // Clear out bindings that may overlap with this binding. 2444 RegionBindingsRef NewB = removeSubRegionBindings(B, cast<SubRegion>(R)); 2445 2446 // LazyCompoundVals should be always bound as 'default' bindings. 2447 auto KeyKind = isa<nonloc::LazyCompoundVal>(V) ? BindingKey::Default 2448 : BindingKey::Direct; 2449 return NewB.addBinding(BindingKey::Make(R, KeyKind), V); 2450 } 2451 2452 RegionBindingsRef 2453 RegionStoreManager::setImplicitDefaultValue(RegionBindingsConstRef B, 2454 const MemRegion *R, 2455 QualType T) { 2456 SVal V; 2457 2458 if (Loc::isLocType(T)) 2459 V = svalBuilder.makeNullWithType(T); 2460 else if (T->isIntegralOrEnumerationType()) 2461 V = svalBuilder.makeZeroVal(T); 2462 else if (T->isStructureOrClassType() || T->isArrayType()) { 2463 // Set the default value to a zero constant when it is a structure 2464 // or array. The type doesn't really matter. 2465 V = svalBuilder.makeZeroVal(Ctx.IntTy); 2466 } 2467 else { 2468 // We can't represent values of this type, but we still need to set a value 2469 // to record that the region has been initialized. 2470 // If this assertion ever fires, a new case should be added above -- we 2471 // should know how to default-initialize any value we can symbolicate. 2472 assert(!SymbolManager::canSymbolicate(T) && "This type is representable"); 2473 V = UnknownVal(); 2474 } 2475 2476 return B.addBinding(R, BindingKey::Default, V); 2477 } 2478 2479 std::optional<RegionBindingsRef> RegionStoreManager::tryBindSmallArray( 2480 RegionBindingsConstRef B, const TypedValueRegion *R, const ArrayType *AT, 2481 nonloc::LazyCompoundVal LCV) { 2482 2483 auto CAT = dyn_cast<ConstantArrayType>(AT); 2484 2485 // If we don't know the size, create a lazyCompoundVal instead. 2486 if (!CAT) 2487 return std::nullopt; 2488 2489 QualType Ty = CAT->getElementType(); 2490 if (!(Ty->isScalarType() || Ty->isReferenceType())) 2491 return std::nullopt; 2492 2493 // If the array is too big, create a LCV instead. 2494 uint64_t ArrSize = CAT->getLimitedSize(); 2495 if (ArrSize > SmallArrayLimit) 2496 return std::nullopt; 2497 2498 RegionBindingsRef NewB = B; 2499 2500 for (uint64_t i = 0; i < ArrSize; ++i) { 2501 auto Idx = svalBuilder.makeArrayIndex(i); 2502 const ElementRegion *SrcER = 2503 MRMgr.getElementRegion(Ty, Idx, LCV.getRegion(), Ctx); 2504 SVal V = getBindingForElement(getRegionBindings(LCV.getStore()), SrcER); 2505 2506 const ElementRegion *DstER = MRMgr.getElementRegion(Ty, Idx, R, Ctx); 2507 NewB = bind(NewB, loc::MemRegionVal(DstER), V); 2508 } 2509 2510 return NewB; 2511 } 2512 2513 RegionBindingsRef 2514 RegionStoreManager::bindArray(RegionBindingsConstRef B, 2515 const TypedValueRegion* R, 2516 SVal Init) { 2517 2518 const ArrayType *AT =cast<ArrayType>(Ctx.getCanonicalType(R->getValueType())); 2519 QualType ElementTy = AT->getElementType(); 2520 std::optional<uint64_t> Size; 2521 2522 if (const ConstantArrayType* CAT = dyn_cast<ConstantArrayType>(AT)) 2523 Size = CAT->getZExtSize(); 2524 2525 // Check if the init expr is a literal. If so, bind the rvalue instead. 2526 // FIXME: It's not responsibility of the Store to transform this lvalue 2527 // to rvalue. ExprEngine or maybe even CFG should do this before binding. 2528 if (std::optional<loc::MemRegionVal> MRV = Init.getAs<loc::MemRegionVal>()) { 2529 SVal V = getBinding(B.asStore(), *MRV, R->getValueType()); 2530 return bindAggregate(B, R, V); 2531 } 2532 2533 // Handle lazy compound values. 2534 if (std::optional<nonloc::LazyCompoundVal> LCV = 2535 Init.getAs<nonloc::LazyCompoundVal>()) { 2536 if (std::optional<RegionBindingsRef> NewB = 2537 tryBindSmallArray(B, R, AT, *LCV)) 2538 return *NewB; 2539 2540 return bindAggregate(B, R, Init); 2541 } 2542 2543 if (Init.isUnknown()) 2544 return bindAggregate(B, R, UnknownVal()); 2545 2546 // Remaining case: explicit compound values. 2547 const nonloc::CompoundVal& CV = Init.castAs<nonloc::CompoundVal>(); 2548 nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end(); 2549 uint64_t i = 0; 2550 2551 RegionBindingsRef NewB(B); 2552 2553 for (; Size ? i < *Size : true; ++i, ++VI) { 2554 // The init list might be shorter than the array length. 2555 if (VI == VE) 2556 break; 2557 2558 NonLoc Idx = svalBuilder.makeArrayIndex(i); 2559 const ElementRegion *ER = MRMgr.getElementRegion(ElementTy, Idx, R, Ctx); 2560 2561 if (ElementTy->isStructureOrClassType()) 2562 NewB = bindStruct(NewB, ER, *VI); 2563 else if (ElementTy->isArrayType()) 2564 NewB = bindArray(NewB, ER, *VI); 2565 else 2566 NewB = bind(NewB, loc::MemRegionVal(ER), *VI); 2567 } 2568 2569 // If the init list is shorter than the array length (or the array has 2570 // variable length), set the array default value. Values that are already set 2571 // are not overwritten. 2572 if (!Size || i < *Size) 2573 NewB = setImplicitDefaultValue(NewB, R, ElementTy); 2574 2575 return NewB; 2576 } 2577 2578 RegionBindingsRef RegionStoreManager::bindVector(RegionBindingsConstRef B, 2579 const TypedValueRegion* R, 2580 SVal V) { 2581 QualType T = R->getValueType(); 2582 const VectorType *VT = T->castAs<VectorType>(); // Use castAs for typedefs. 2583 2584 // Handle lazy compound values and symbolic values. 2585 if (isa<nonloc::LazyCompoundVal, nonloc::SymbolVal>(V)) 2586 return bindAggregate(B, R, V); 2587 2588 // We may get non-CompoundVal accidentally due to imprecise cast logic or 2589 // that we are binding symbolic struct value. Kill the field values, and if 2590 // the value is symbolic go and bind it as a "default" binding. 2591 if (!isa<nonloc::CompoundVal>(V)) { 2592 return bindAggregate(B, R, UnknownVal()); 2593 } 2594 2595 QualType ElemType = VT->getElementType(); 2596 nonloc::CompoundVal CV = V.castAs<nonloc::CompoundVal>(); 2597 nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end(); 2598 unsigned index = 0, numElements = VT->getNumElements(); 2599 RegionBindingsRef NewB(B); 2600 2601 for ( ; index != numElements ; ++index) { 2602 if (VI == VE) 2603 break; 2604 2605 NonLoc Idx = svalBuilder.makeArrayIndex(index); 2606 const ElementRegion *ER = MRMgr.getElementRegion(ElemType, Idx, R, Ctx); 2607 2608 if (ElemType->isArrayType()) 2609 NewB = bindArray(NewB, ER, *VI); 2610 else if (ElemType->isStructureOrClassType()) 2611 NewB = bindStruct(NewB, ER, *VI); 2612 else 2613 NewB = bind(NewB, loc::MemRegionVal(ER), *VI); 2614 } 2615 return NewB; 2616 } 2617 2618 std::optional<SVal> 2619 RegionStoreManager::getUniqueDefaultBinding(RegionBindingsConstRef B, 2620 const TypedValueRegion *R) const { 2621 if (R != R->getBaseRegion()) 2622 return std::nullopt; 2623 2624 const auto *Cluster = B.lookup(R); 2625 if (!Cluster || !llvm::hasSingleElement(*Cluster)) 2626 return std::nullopt; 2627 2628 const auto [Key, Value] = *Cluster->begin(); 2629 return Key.isDirect() ? std::optional<SVal>{} : Value; 2630 } 2631 2632 std::optional<SVal> 2633 RegionStoreManager::getUniqueDefaultBinding(nonloc::LazyCompoundVal LCV) const { 2634 RegionBindingsConstRef B = getRegionBindings(LCV.getStore()); 2635 return getUniqueDefaultBinding(B, LCV.getRegion()); 2636 } 2637 2638 std::optional<RegionBindingsRef> RegionStoreManager::tryBindSmallStruct( 2639 RegionBindingsConstRef B, const TypedValueRegion *R, const RecordDecl *RD, 2640 nonloc::LazyCompoundVal LCV) { 2641 // If we try to copy a Conjured value representing the value of the whole 2642 // struct, don't try to element-wise copy each field. 2643 // That would unnecessarily bind Derived symbols slicing off the subregion for 2644 // the field from the whole Conjured symbol. 2645 // 2646 // struct Window { int width; int height; }; 2647 // Window getWindow(); <-- opaque fn. 2648 // Window w = getWindow(); <-- conjures a new Window. 2649 // Window w2 = w; <-- trivial copy "w", calling "tryBindSmallStruct" 2650 // 2651 // We should not end up with a new Store for "w2" like this: 2652 // Direct [ 0..31]: Derived{Conj{}, w.width} 2653 // Direct [32..63]: Derived{Conj{}, w.height} 2654 // Instead, we should just bind that Conjured value instead. 2655 if (std::optional<SVal> Val = getUniqueDefaultBinding(LCV)) { 2656 return B.addBinding(BindingKey::Make(R, BindingKey::Default), Val.value()); 2657 } 2658 2659 FieldVector Fields; 2660 2661 if (const CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(RD)) 2662 if (Class->getNumBases() != 0 || Class->getNumVBases() != 0) 2663 return std::nullopt; 2664 2665 for (const auto *FD : RD->fields()) { 2666 if (FD->isUnnamedBitField()) 2667 continue; 2668 2669 // If there are too many fields, or if any of the fields are aggregates, 2670 // just use the LCV as a default binding. 2671 if (Fields.size() == SmallStructLimit) 2672 return std::nullopt; 2673 2674 QualType Ty = FD->getType(); 2675 2676 // Zero length arrays are basically no-ops, so we also ignore them here. 2677 if (Ty->isConstantArrayType() && 2678 Ctx.getConstantArrayElementCount(Ctx.getAsConstantArrayType(Ty)) == 0) 2679 continue; 2680 2681 if (!(Ty->isScalarType() || Ty->isReferenceType())) 2682 return std::nullopt; 2683 2684 Fields.push_back(FD); 2685 } 2686 2687 RegionBindingsRef NewB = B; 2688 2689 for (const FieldDecl *Field : Fields) { 2690 const FieldRegion *SourceFR = MRMgr.getFieldRegion(Field, LCV.getRegion()); 2691 SVal V = getBindingForField(getRegionBindings(LCV.getStore()), SourceFR); 2692 2693 const FieldRegion *DestFR = MRMgr.getFieldRegion(Field, R); 2694 NewB = bind(NewB, loc::MemRegionVal(DestFR), V); 2695 } 2696 2697 return NewB; 2698 } 2699 2700 RegionBindingsRef RegionStoreManager::bindStruct(RegionBindingsConstRef B, 2701 const TypedValueRegion *R, 2702 SVal V) { 2703 QualType T = R->getValueType(); 2704 assert(T->isStructureOrClassType()); 2705 2706 const RecordType* RT = T->castAs<RecordType>(); 2707 const RecordDecl *RD = RT->getDecl(); 2708 2709 if (!RD->isCompleteDefinition()) 2710 return B; 2711 2712 // Handle lazy compound values and symbolic values. 2713 if (std::optional<nonloc::LazyCompoundVal> LCV = 2714 V.getAs<nonloc::LazyCompoundVal>()) { 2715 if (std::optional<RegionBindingsRef> NewB = 2716 tryBindSmallStruct(B, R, RD, *LCV)) 2717 return *NewB; 2718 return bindAggregate(B, R, V); 2719 } 2720 if (isa<nonloc::SymbolVal>(V)) 2721 return bindAggregate(B, R, V); 2722 2723 // We may get non-CompoundVal accidentally due to imprecise cast logic or 2724 // that we are binding symbolic struct value. Kill the field values, and if 2725 // the value is symbolic go and bind it as a "default" binding. 2726 if (V.isUnknown() || !isa<nonloc::CompoundVal>(V)) 2727 return bindAggregate(B, R, UnknownVal()); 2728 2729 // The raw CompoundVal is essentially a symbolic InitListExpr: an (immutable) 2730 // list of other values. It appears pretty much only when there's an actual 2731 // initializer list expression in the program, and the analyzer tries to 2732 // unwrap it as soon as possible. 2733 // This code is where such unwrap happens: when the compound value is put into 2734 // the object that it was supposed to initialize (it's an *initializer* list, 2735 // after all), instead of binding the whole value to the whole object, we bind 2736 // sub-values to sub-objects. Sub-values may themselves be compound values, 2737 // and in this case the procedure becomes recursive. 2738 // FIXME: The annoying part about compound values is that they don't carry 2739 // any sort of information about which value corresponds to which sub-object. 2740 // It's simply a list of values in the middle of nowhere; we expect to match 2741 // them to sub-objects, essentially, "by index": first value binds to 2742 // the first field, second value binds to the second field, etc. 2743 // It would have been much safer to organize non-lazy compound values as 2744 // a mapping from fields/bases to values. 2745 const nonloc::CompoundVal& CV = V.castAs<nonloc::CompoundVal>(); 2746 nonloc::CompoundVal::iterator VI = CV.begin(), VE = CV.end(); 2747 2748 RegionBindingsRef NewB(B); 2749 2750 // In C++17 aggregates may have base classes, handle those as well. 2751 // They appear before fields in the initializer list / compound value. 2752 if (const auto *CRD = dyn_cast<CXXRecordDecl>(RD)) { 2753 // If the object was constructed with a constructor, its value is a 2754 // LazyCompoundVal. If it's a raw CompoundVal, it means that we're 2755 // performing aggregate initialization. The only exception from this 2756 // rule is sending an Objective-C++ message that returns a C++ object 2757 // to a nil receiver; in this case the semantics is to return a 2758 // zero-initialized object even if it's a C++ object that doesn't have 2759 // this sort of constructor; the CompoundVal is empty in this case. 2760 assert((CRD->isAggregate() || (Ctx.getLangOpts().ObjC && VI == VE)) && 2761 "Non-aggregates are constructed with a constructor!"); 2762 2763 for (const auto &B : CRD->bases()) { 2764 // (Multiple inheritance is fine though.) 2765 assert(!B.isVirtual() && "Aggregates cannot have virtual base classes!"); 2766 2767 if (VI == VE) 2768 break; 2769 2770 QualType BTy = B.getType(); 2771 assert(BTy->isStructureOrClassType() && "Base classes must be classes!"); 2772 2773 const CXXRecordDecl *BRD = BTy->getAsCXXRecordDecl(); 2774 assert(BRD && "Base classes must be C++ classes!"); 2775 2776 const CXXBaseObjectRegion *BR = 2777 MRMgr.getCXXBaseObjectRegion(BRD, R, /*IsVirtual=*/false); 2778 2779 NewB = bindStruct(NewB, BR, *VI); 2780 2781 ++VI; 2782 } 2783 } 2784 2785 RecordDecl::field_iterator FI, FE; 2786 2787 for (FI = RD->field_begin(), FE = RD->field_end(); FI != FE; ++FI) { 2788 2789 if (VI == VE) 2790 break; 2791 2792 // Skip any unnamed bitfields to stay in sync with the initializers. 2793 if (FI->isUnnamedBitField()) 2794 continue; 2795 2796 QualType FTy = FI->getType(); 2797 const FieldRegion* FR = MRMgr.getFieldRegion(*FI, R); 2798 2799 if (FTy->isArrayType()) 2800 NewB = bindArray(NewB, FR, *VI); 2801 else if (FTy->isStructureOrClassType()) 2802 NewB = bindStruct(NewB, FR, *VI); 2803 else 2804 NewB = bind(NewB, loc::MemRegionVal(FR), *VI); 2805 ++VI; 2806 } 2807 2808 // There may be fewer values in the initialize list than the fields of struct. 2809 if (FI != FE) { 2810 NewB = NewB.addBinding(R, BindingKey::Default, 2811 svalBuilder.makeIntVal(0, false)); 2812 } 2813 2814 return NewB; 2815 } 2816 2817 RegionBindingsRef 2818 RegionStoreManager::bindAggregate(RegionBindingsConstRef B, 2819 const TypedRegion *R, 2820 SVal Val) { 2821 // Remove the old bindings, using 'R' as the root of all regions 2822 // we will invalidate. Then add the new binding. 2823 return removeSubRegionBindings(B, R).addBinding(R, BindingKey::Default, Val); 2824 } 2825 2826 //===----------------------------------------------------------------------===// 2827 // State pruning. 2828 //===----------------------------------------------------------------------===// 2829 2830 namespace { 2831 class RemoveDeadBindingsWorker 2832 : public ClusterAnalysis<RemoveDeadBindingsWorker> { 2833 SmallVector<const SymbolicRegion *, 12> Postponed; 2834 SymbolReaper &SymReaper; 2835 const StackFrameContext *CurrentLCtx; 2836 2837 public: 2838 RemoveDeadBindingsWorker(RegionStoreManager &rm, 2839 ProgramStateManager &stateMgr, 2840 RegionBindingsRef b, SymbolReaper &symReaper, 2841 const StackFrameContext *LCtx) 2842 : ClusterAnalysis<RemoveDeadBindingsWorker>(rm, stateMgr, b), 2843 SymReaper(symReaper), CurrentLCtx(LCtx) {} 2844 2845 // Called by ClusterAnalysis. 2846 void VisitAddedToCluster(const MemRegion *baseR, const ClusterBindings &C); 2847 void VisitCluster(const MemRegion *baseR, const ClusterBindings *C); 2848 using ClusterAnalysis<RemoveDeadBindingsWorker>::VisitCluster; 2849 2850 using ClusterAnalysis::AddToWorkList; 2851 2852 bool AddToWorkList(const MemRegion *R); 2853 2854 bool UpdatePostponed(); 2855 void VisitBinding(SVal V); 2856 }; 2857 } 2858 2859 bool RemoveDeadBindingsWorker::AddToWorkList(const MemRegion *R) { 2860 const MemRegion *BaseR = R->getBaseRegion(); 2861 return AddToWorkList(WorkListElement(BaseR), getCluster(BaseR)); 2862 } 2863 2864 void RemoveDeadBindingsWorker::VisitAddedToCluster(const MemRegion *baseR, 2865 const ClusterBindings &C) { 2866 2867 if (const VarRegion *VR = dyn_cast<VarRegion>(baseR)) { 2868 if (SymReaper.isLive(VR)) 2869 AddToWorkList(baseR, &C); 2870 2871 return; 2872 } 2873 2874 if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(baseR)) { 2875 if (SymReaper.isLive(SR->getSymbol())) 2876 AddToWorkList(SR, &C); 2877 else 2878 Postponed.push_back(SR); 2879 2880 return; 2881 } 2882 2883 if (isa<NonStaticGlobalSpaceRegion>(baseR)) { 2884 AddToWorkList(baseR, &C); 2885 return; 2886 } 2887 2888 // CXXThisRegion in the current or parent location context is live. 2889 if (const CXXThisRegion *TR = dyn_cast<CXXThisRegion>(baseR)) { 2890 const auto *StackReg = 2891 cast<StackArgumentsSpaceRegion>(TR->getSuperRegion()); 2892 const StackFrameContext *RegCtx = StackReg->getStackFrame(); 2893 if (CurrentLCtx && 2894 (RegCtx == CurrentLCtx || RegCtx->isParentOf(CurrentLCtx))) 2895 AddToWorkList(TR, &C); 2896 } 2897 } 2898 2899 void RemoveDeadBindingsWorker::VisitCluster(const MemRegion *baseR, 2900 const ClusterBindings *C) { 2901 if (!C) 2902 return; 2903 2904 // Mark the symbol for any SymbolicRegion with live bindings as live itself. 2905 // This means we should continue to track that symbol. 2906 if (const SymbolicRegion *SymR = dyn_cast<SymbolicRegion>(baseR)) 2907 SymReaper.markLive(SymR->getSymbol()); 2908 2909 for (const auto &[Key, Val] : *C) { 2910 // Element index of a binding key is live. 2911 SymReaper.markElementIndicesLive(Key.getRegion()); 2912 2913 VisitBinding(Val); 2914 } 2915 } 2916 2917 void RemoveDeadBindingsWorker::VisitBinding(SVal V) { 2918 // Is it a LazyCompoundVal? All referenced regions are live as well. 2919 // The LazyCompoundVal itself is not live but should be readable. 2920 if (auto LCS = V.getAs<nonloc::LazyCompoundVal>()) { 2921 SymReaper.markLazilyCopied(LCS->getRegion()); 2922 2923 for (SVal V : RM.getInterestingValues(*LCS)) { 2924 if (auto DepLCS = V.getAs<nonloc::LazyCompoundVal>()) 2925 SymReaper.markLazilyCopied(DepLCS->getRegion()); 2926 else 2927 VisitBinding(V); 2928 } 2929 2930 return; 2931 } 2932 2933 // If V is a region, then add it to the worklist. 2934 if (const MemRegion *R = V.getAsRegion()) { 2935 AddToWorkList(R); 2936 SymReaper.markLive(R); 2937 2938 // All regions captured by a block are also live. 2939 if (const BlockDataRegion *BR = dyn_cast<BlockDataRegion>(R)) { 2940 for (auto Var : BR->referenced_vars()) 2941 AddToWorkList(Var.getCapturedRegion()); 2942 } 2943 } 2944 2945 2946 // Update the set of live symbols. 2947 for (SymbolRef Sym : V.symbols()) 2948 SymReaper.markLive(Sym); 2949 } 2950 2951 bool RemoveDeadBindingsWorker::UpdatePostponed() { 2952 // See if any postponed SymbolicRegions are actually live now, after 2953 // having done a scan. 2954 bool Changed = false; 2955 2956 for (const SymbolicRegion *SR : Postponed) { 2957 if (SymReaper.isLive(SR->getSymbol())) { 2958 Changed |= AddToWorkList(SR); 2959 SR = nullptr; 2960 } 2961 } 2962 2963 return Changed; 2964 } 2965 2966 StoreRef RegionStoreManager::removeDeadBindings(Store store, 2967 const StackFrameContext *LCtx, 2968 SymbolReaper& SymReaper) { 2969 RegionBindingsRef B = getRegionBindings(store); 2970 RemoveDeadBindingsWorker W(*this, StateMgr, B, SymReaper, LCtx); 2971 W.GenerateClusters(); 2972 2973 // Enqueue the region roots onto the worklist. 2974 for (const MemRegion *Reg : SymReaper.regions()) { 2975 W.AddToWorkList(Reg); 2976 } 2977 2978 do W.RunWorkList(); while (W.UpdatePostponed()); 2979 2980 // We have now scanned the store, marking reachable regions and symbols 2981 // as live. We now remove all the regions that are dead from the store 2982 // as well as update DSymbols with the set symbols that are now dead. 2983 for (const MemRegion *Base : llvm::make_first_range(B)) { 2984 // If the cluster has been visited, we know the region has been marked. 2985 // Otherwise, remove the dead entry. 2986 if (!W.isVisited(Base)) 2987 B = B.remove(Base); 2988 } 2989 2990 return StoreRef(B.asStore(), *this); 2991 } 2992 2993 //===----------------------------------------------------------------------===// 2994 // Utility methods. 2995 //===----------------------------------------------------------------------===// 2996 2997 void RegionStoreManager::printJson(raw_ostream &Out, Store S, const char *NL, 2998 unsigned int Space, bool IsDot) const { 2999 RegionBindingsRef Bindings = getRegionBindings(S); 3000 3001 Indent(Out, Space, IsDot) << "\"store\": "; 3002 3003 if (Bindings.isEmpty()) { 3004 Out << "null," << NL; 3005 return; 3006 } 3007 3008 Out << "{ \"pointer\": \"" << Bindings.asStore() << "\", \"items\": [" << NL; 3009 Bindings.printJson(Out, NL, Space + 1, IsDot); 3010 Indent(Out, Space, IsDot) << "]}," << NL; 3011 } 3012