1 //===- SVals.h - Abstract Values for Static Analysis ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines SVal, Loc, and NonLoc, classes that represent 10 // abstract r-values for use with path-sensitive value tracking. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H 15 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H 16 17 #include "clang/AST/Expr.h" 18 #include "clang/AST/Type.h" 19 #include "clang/Basic/LLVM.h" 20 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntPtr.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" 22 #include "llvm/ADT/APSInt.h" 23 #include "llvm/ADT/FoldingSet.h" 24 #include "llvm/ADT/ImmutableList.h" 25 #include "llvm/ADT/PointerUnion.h" 26 #include "llvm/ADT/STLForwardCompat.h" 27 #include "llvm/ADT/iterator_range.h" 28 #include "llvm/Support/Casting.h" 29 #include <cassert> 30 #include <cstdint> 31 #include <optional> 32 #include <utility> 33 34 //==------------------------------------------------------------------------==// 35 // Base SVal types. 36 //==------------------------------------------------------------------------==// 37 38 namespace clang { 39 40 class CXXBaseSpecifier; 41 class FunctionDecl; 42 class LabelDecl; 43 44 namespace ento { 45 46 class CompoundValData; 47 class LazyCompoundValData; 48 class MemRegion; 49 class PointerToMemberData; 50 class SValBuilder; 51 class TypedValueRegion; 52 53 /// SVal - This represents a symbolic expression, which can be either 54 /// an L-value or an R-value. 55 /// 56 class SVal { 57 public: 58 enum SValKind : unsigned char { 59 #define BASIC_SVAL(Id, Parent) Id##Kind, 60 #define LOC_SVAL(Id, Parent) Loc##Id##Kind, 61 #define NONLOC_SVAL(Id, Parent) NonLoc##Id##Kind, 62 #define SVAL_RANGE(Id, First, Last) \ 63 BEGIN_##Id = Id##First##Kind, END_##Id = Id##Last##Kind, 64 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def" 65 }; 66 67 protected: 68 const void *Data = nullptr; 69 SValKind Kind = UndefinedValKind; 70 71 explicit SVal(SValKind Kind, const void *Data = nullptr) 72 : Data(Data), Kind(Kind) {} 73 74 template <typename T> const T *castDataAs() const { 75 return static_cast<const T *>(Data); 76 } 77 78 public: 79 explicit SVal() = default; 80 81 /// Convert to the specified SVal type, asserting that this SVal is of 82 /// the desired type. 83 template <typename T> T castAs() const { return llvm::cast<T>(*this); } 84 85 /// Convert to the specified SVal type, returning std::nullopt if this SVal is 86 /// not of the desired type. 87 template <typename T> std::optional<T> getAs() const { 88 return llvm::dyn_cast<T>(*this); 89 } 90 91 SValKind getKind() const { return Kind; } 92 93 StringRef getKindStr() const; 94 95 // This method is required for using SVal in a FoldingSetNode. It 96 // extracts a unique signature for this SVal object. 97 void Profile(llvm::FoldingSetNodeID &ID) const { 98 ID.AddPointer(Data); 99 ID.AddInteger(llvm::to_underlying(getKind())); 100 } 101 102 bool operator==(SVal R) const { return Kind == R.Kind && Data == R.Data; } 103 bool operator!=(SVal R) const { return !(*this == R); } 104 105 bool isUnknown() const { return getKind() == UnknownValKind; } 106 107 bool isUndef() const { return getKind() == UndefinedValKind; } 108 109 bool isUnknownOrUndef() const { return isUnknown() || isUndef(); } 110 111 bool isValid() const { return !isUnknownOrUndef(); } 112 113 bool isConstant() const; 114 115 bool isConstant(int I) const; 116 117 bool isZeroConstant() const; 118 119 /// getAsFunctionDecl - If this SVal is a MemRegionVal and wraps a 120 /// CodeTextRegion wrapping a FunctionDecl, return that FunctionDecl. 121 /// Otherwise return 0. 122 const FunctionDecl *getAsFunctionDecl() const; 123 124 /// If this SVal is a location and wraps a symbol, return that 125 /// SymbolRef. Otherwise return 0. 126 /// 127 /// Casts are ignored during lookup. 128 /// \param IncludeBaseRegions The boolean that controls whether the search 129 /// should continue to the base regions if the region is not symbolic. 130 SymbolRef getAsLocSymbol(bool IncludeBaseRegions = false) const; 131 132 /// Get the symbol in the SVal or its base region. 133 SymbolRef getLocSymbolInBase() const; 134 135 /// If this SVal wraps a symbol return that SymbolRef. 136 /// Otherwise, return 0. 137 /// 138 /// Casts are ignored during lookup. 139 /// \param IncludeBaseRegions The boolean that controls whether the search 140 /// should continue to the base regions if the region is not symbolic. 141 SymbolRef getAsSymbol(bool IncludeBaseRegions = false) const; 142 143 /// If this SVal is loc::ConcreteInt or nonloc::ConcreteInt, 144 /// return a pointer to APSInt which is held in it. 145 /// Otherwise, return nullptr. 146 const llvm::APSInt *getAsInteger() const; 147 148 const MemRegion *getAsRegion() const; 149 150 /// printJson - Pretty-prints in JSON format. 151 void printJson(raw_ostream &Out, bool AddQuotes) const; 152 153 void dumpToStream(raw_ostream &OS) const; 154 void dump() const; 155 156 llvm::iterator_range<SymExpr::symbol_iterator> symbols() const { 157 if (const SymExpr *SE = getAsSymbol(/*IncludeBaseRegions=*/true)) 158 return SE->symbols(); 159 SymExpr::symbol_iterator end{}; 160 return llvm::make_range(end, end); 161 } 162 163 /// Try to get a reasonable type for the given value. 164 /// 165 /// \returns The best approximation of the value type or Null. 166 /// In theory, all symbolic values should be typed, but this function 167 /// is still a WIP and might have a few blind spots. 168 /// 169 /// \note This function should not be used when the user has access to the 170 /// bound expression AST node as well, since AST always has exact types. 171 /// 172 /// \note Loc values are interpreted as pointer rvalues for the purposes of 173 /// this method. 174 QualType getType(const ASTContext &) const; 175 }; 176 177 inline raw_ostream &operator<<(raw_ostream &os, clang::ento::SVal V) { 178 V.dumpToStream(os); 179 return os; 180 } 181 182 namespace nonloc { 183 /// Sub-kinds for NonLoc values. 184 #define NONLOC_SVAL(Id, Parent) \ 185 inline constexpr auto Id##Kind = SVal::SValKind::NonLoc##Id##Kind; 186 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def" 187 } // namespace nonloc 188 189 namespace loc { 190 /// Sub-kinds for Loc values. 191 #define LOC_SVAL(Id, Parent) \ 192 inline constexpr auto Id##Kind = SVal::SValKind::Loc##Id##Kind; 193 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def" 194 } // namespace loc 195 196 class UndefinedVal : public SVal { 197 public: 198 UndefinedVal() : SVal(UndefinedValKind) {} 199 static bool classof(SVal V) { return V.getKind() == UndefinedValKind; } 200 }; 201 202 class DefinedOrUnknownSVal : public SVal { 203 public: 204 // We want calling these methods to be a compiler error since they are 205 // tautologically false. 206 bool isUndef() const = delete; 207 bool isValid() const = delete; 208 209 static bool classof(SVal V) { return !V.isUndef(); } 210 211 protected: 212 explicit DefinedOrUnknownSVal(SValKind Kind, const void *Data = nullptr) 213 : SVal(Kind, Data) {} 214 }; 215 216 class UnknownVal : public DefinedOrUnknownSVal { 217 public: 218 explicit UnknownVal() : DefinedOrUnknownSVal(UnknownValKind) {} 219 220 static bool classof(SVal V) { return V.getKind() == UnknownValKind; } 221 }; 222 223 class DefinedSVal : public DefinedOrUnknownSVal { 224 public: 225 // We want calling these methods to be a compiler error since they are 226 // tautologically true/false. 227 bool isUnknown() const = delete; 228 bool isUnknownOrUndef() const = delete; 229 bool isValid() const = delete; 230 231 static bool classof(SVal V) { return !V.isUnknownOrUndef(); } 232 233 protected: 234 explicit DefinedSVal(SValKind Kind, const void *Data) 235 : DefinedOrUnknownSVal(Kind, Data) {} 236 }; 237 238 class NonLoc : public DefinedSVal { 239 protected: 240 NonLoc(SValKind Kind, const void *Data) : DefinedSVal(Kind, Data) {} 241 242 public: 243 void dumpToStream(raw_ostream &Out) const; 244 245 static bool isCompoundType(QualType T) { 246 return T->isArrayType() || T->isRecordType() || 247 T->isAnyComplexType() || T->isVectorType(); 248 } 249 250 static bool classof(SVal V) { 251 return BEGIN_NonLoc <= V.getKind() && V.getKind() <= END_NonLoc; 252 } 253 }; 254 255 class Loc : public DefinedSVal { 256 protected: 257 Loc(SValKind Kind, const void *Data) : DefinedSVal(Kind, Data) {} 258 259 public: 260 void dumpToStream(raw_ostream &Out) const; 261 262 static bool isLocType(QualType T) { 263 return T->isAnyPointerType() || T->isBlockPointerType() || 264 T->isReferenceType() || T->isNullPtrType(); 265 } 266 267 static bool classof(SVal V) { 268 return BEGIN_Loc <= V.getKind() && V.getKind() <= END_Loc; 269 } 270 }; 271 272 //==------------------------------------------------------------------------==// 273 // Subclasses of NonLoc. 274 //==------------------------------------------------------------------------==// 275 276 namespace nonloc { 277 278 /// Represents symbolic expression that isn't a location. 279 class SymbolVal : public NonLoc { 280 public: 281 SymbolVal() = delete; 282 explicit SymbolVal(SymbolRef Sym) : NonLoc(SymbolValKind, Sym) { 283 assert(Sym); 284 assert(!Loc::isLocType(Sym->getType())); 285 } 286 287 LLVM_ATTRIBUTE_RETURNS_NONNULL 288 SymbolRef getSymbol() const { 289 return (const SymExpr *) Data; 290 } 291 292 bool isExpression() const { 293 return !isa<SymbolData>(getSymbol()); 294 } 295 296 static bool classof(SVal V) { return V.getKind() == SymbolValKind; } 297 }; 298 299 /// Value representing integer constant. 300 class ConcreteInt : public NonLoc { 301 public: 302 explicit ConcreteInt(APSIntPtr V) : NonLoc(ConcreteIntKind, V.get()) {} 303 304 APSIntPtr getValue() const { 305 // This is safe because in the ctor we take a safe APSIntPtr. 306 return APSIntPtr::unsafeConstructor(castDataAs<llvm::APSInt>()); 307 } 308 309 static bool classof(SVal V) { return V.getKind() == ConcreteIntKind; } 310 }; 311 312 class LocAsInteger : public NonLoc { 313 friend class ento::SValBuilder; 314 315 explicit LocAsInteger(const std::pair<SVal, uintptr_t> &data) 316 : NonLoc(LocAsIntegerKind, &data) { 317 // We do not need to represent loc::ConcreteInt as LocAsInteger, 318 // as it'd collapse into a nonloc::ConcreteInt instead. 319 [[maybe_unused]] SValKind K = data.first.getKind(); 320 assert(K == loc::MemRegionValKind || K == loc::GotoLabelKind); 321 } 322 323 public: 324 Loc getLoc() const { 325 return castDataAs<std::pair<SVal, uintptr_t>>()->first.castAs<Loc>(); 326 } 327 328 unsigned getNumBits() const { 329 return castDataAs<std::pair<SVal, uintptr_t>>()->second; 330 } 331 332 static bool classof(SVal V) { return V.getKind() == LocAsIntegerKind; } 333 }; 334 335 /// The simplest example of a concrete compound value is nonloc::CompoundVal, 336 /// which represents a concrete r-value of an initializer-list or a string. 337 /// Internally, it contains an llvm::ImmutableList of SVal's stored inside the 338 /// literal. 339 class CompoundVal : public NonLoc { 340 friend class ento::SValBuilder; 341 342 explicit CompoundVal(const CompoundValData *D) : NonLoc(CompoundValKind, D) { 343 assert(D); 344 } 345 346 public: 347 LLVM_ATTRIBUTE_RETURNS_NONNULL 348 const CompoundValData* getValue() const { 349 return castDataAs<CompoundValData>(); 350 } 351 352 using iterator = llvm::ImmutableList<SVal>::iterator; 353 iterator begin() const; 354 iterator end() const; 355 356 static bool classof(SVal V) { return V.getKind() == CompoundValKind; } 357 }; 358 359 /// While nonloc::CompoundVal covers a few simple use cases, 360 /// nonloc::LazyCompoundVal is a more performant and flexible way to represent 361 /// an rvalue of record type, so it shows up much more frequently during 362 /// analysis. This value is an r-value that represents a snapshot of any 363 /// structure "as a whole" at a given moment during the analysis. Such value is 364 /// already quite far from being referred to as "concrete", as many fields 365 /// inside it would be unknown or symbolic. nonloc::LazyCompoundVal operates by 366 /// storing two things: 367 /// * a reference to the TypedValueRegion being snapshotted (yes, it is always 368 /// typed), and also 369 /// * a reference to the whole Store object, obtained from the ProgramState in 370 /// which the nonloc::LazyCompoundVal was created. 371 /// 372 /// Note that the old ProgramState and its Store is kept alive during the 373 /// analysis because these are immutable functional data structures and each new 374 /// Store value is represented as "earlier Store" + "additional binding". 375 /// 376 /// Essentially, nonloc::LazyCompoundVal is a performance optimization for the 377 /// analyzer. Because Store is immutable, creating a nonloc::LazyCompoundVal is 378 /// a very cheap operation. Note that the Store contains all region bindings in 379 /// the program state, not only related to the region. Later, if necessary, such 380 /// value can be unpacked -- eg. when it is assigned to another variable. 381 /// 382 /// If you ever need to inspect the contents of the LazyCompoundVal, you can use 383 /// StoreManager::iterBindings(). It'll iterate through all values in the Store, 384 /// but you're only interested in the ones that belong to 385 /// LazyCompoundVal::getRegion(); other bindings are immaterial. 386 /// 387 /// NOTE: LazyCompoundVal::getRegion() itself is also immaterial (see the actual 388 /// method docs for details). 389 class LazyCompoundVal : public NonLoc { 390 friend class ento::SValBuilder; 391 392 explicit LazyCompoundVal(const LazyCompoundValData *D) 393 : NonLoc(LazyCompoundValKind, D) { 394 assert(D); 395 } 396 397 public: 398 LLVM_ATTRIBUTE_RETURNS_NONNULL 399 const LazyCompoundValData *getCVData() const { 400 return castDataAs<LazyCompoundValData>(); 401 } 402 403 /// It might return null. 404 const void *getStore() const; 405 406 /// This function itself is immaterial. It is only an implementation detail. 407 /// LazyCompoundVal represents only the rvalue, the data (known or unknown) 408 /// that *was* stored in that region *at some point in the past*. The region 409 /// should not be used for any purpose other than figuring out what part of 410 /// the frozen Store you're interested in. The value does not represent the 411 /// *current* value of that region. Sometimes it may, but this should not be 412 /// relied upon. Instead, if you want to figure out what region it represents, 413 /// you typically need to see where you got it from in the first place. The 414 /// region is absolutely not analogous to the C++ "this" pointer. It is also 415 /// not a valid way to "materialize" the prvalue into a glvalue in C++, 416 /// because the region represents the *old* storage (sometimes very old), not 417 /// the *future* storage. 418 LLVM_ATTRIBUTE_RETURNS_NONNULL 419 const TypedValueRegion *getRegion() const; 420 421 static bool classof(SVal V) { return V.getKind() == LazyCompoundValKind; } 422 }; 423 424 /// Value representing pointer-to-member. 425 /// 426 /// This value is qualified as NonLoc because neither loading nor storing 427 /// operations are applied to it. Instead, the analyzer uses the L-value coming 428 /// from pointer-to-member applied to an object. 429 /// This SVal is represented by a NamedDecl which can be a member function 430 /// pointer or a member data pointer and an optional list of CXXBaseSpecifiers. 431 /// This list is required to accumulate the pointer-to-member cast history to 432 /// figure out the correct subobject field. In particular, implicit casts grow 433 /// this list and explicit casts like static_cast shrink this list. 434 class PointerToMember : public NonLoc { 435 friend class ento::SValBuilder; 436 437 public: 438 using PTMDataType = 439 llvm::PointerUnion<const NamedDecl *, const PointerToMemberData *>; 440 441 const PTMDataType getPTMData() const { 442 return PTMDataType::getFromOpaqueValue(const_cast<void *>(Data)); 443 } 444 445 bool isNullMemberPointer() const; 446 447 const NamedDecl *getDecl() const; 448 449 template<typename AdjustedDecl> 450 const AdjustedDecl *getDeclAs() const { 451 return dyn_cast_or_null<AdjustedDecl>(getDecl()); 452 } 453 454 using iterator = llvm::ImmutableList<const CXXBaseSpecifier *>::iterator; 455 456 iterator begin() const; 457 iterator end() const; 458 459 static bool classof(SVal V) { return V.getKind() == PointerToMemberKind; } 460 461 private: 462 explicit PointerToMember(const PTMDataType D) 463 : NonLoc(PointerToMemberKind, D.getOpaqueValue()) {} 464 }; 465 466 } // namespace nonloc 467 468 //==------------------------------------------------------------------------==// 469 // Subclasses of Loc. 470 //==------------------------------------------------------------------------==// 471 472 namespace loc { 473 474 class GotoLabel : public Loc { 475 public: 476 explicit GotoLabel(const LabelDecl *Label) : Loc(GotoLabelKind, Label) { 477 assert(Label); 478 } 479 480 const LabelDecl *getLabel() const { return castDataAs<LabelDecl>(); } 481 482 static bool classof(SVal V) { return V.getKind() == GotoLabelKind; } 483 }; 484 485 class MemRegionVal : public Loc { 486 public: 487 explicit MemRegionVal(const MemRegion *r) : Loc(MemRegionValKind, r) { 488 assert(r); 489 } 490 491 /// Get the underlining region. 492 LLVM_ATTRIBUTE_RETURNS_NONNULL 493 const MemRegion *getRegion() const { return castDataAs<MemRegion>(); } 494 495 /// Get the underlining region and strip casts. 496 LLVM_ATTRIBUTE_RETURNS_NONNULL 497 const MemRegion* stripCasts(bool StripBaseCasts = true) const; 498 499 template <typename REGION> 500 const REGION* getRegionAs() const { 501 return dyn_cast<REGION>(getRegion()); 502 } 503 504 bool operator==(const MemRegionVal &R) const { 505 return getRegion() == R.getRegion(); 506 } 507 508 bool operator!=(const MemRegionVal &R) const { 509 return getRegion() != R.getRegion(); 510 } 511 512 static bool classof(SVal V) { return V.getKind() == MemRegionValKind; } 513 }; 514 515 class ConcreteInt : public Loc { 516 public: 517 explicit ConcreteInt(APSIntPtr V) : Loc(ConcreteIntKind, V.get()) {} 518 519 APSIntPtr getValue() const { 520 // This is safe because in the ctor we take a safe APSIntPtr. 521 return APSIntPtr::unsafeConstructor(castDataAs<llvm::APSInt>()); 522 } 523 524 static bool classof(SVal V) { return V.getKind() == ConcreteIntKind; } 525 }; 526 527 } // namespace loc 528 } // namespace ento 529 } // namespace clang 530 531 namespace llvm { 532 template <typename To, typename From> 533 struct CastInfo< 534 To, From, 535 std::enable_if_t<std::is_base_of<::clang::ento::SVal, From>::value>> 536 : public CastIsPossible<To, ::clang::ento::SVal> { 537 using Self = CastInfo< 538 To, From, 539 std::enable_if_t<std::is_base_of<::clang::ento::SVal, From>::value>>; 540 static bool isPossible(const From &V) { 541 return To::classof(*static_cast<const ::clang::ento::SVal *>(&V)); 542 } 543 static std::optional<To> castFailed() { return std::optional<To>{}; } 544 static To doCast(const From &f) { 545 return *static_cast<const To *>(cast<::clang::ento::SVal>(&f)); 546 } 547 static std::optional<To> doCastIfPossible(const From &f) { 548 if (!Self::isPossible(f)) 549 return Self::castFailed(); 550 return doCast(f); 551 } 552 }; 553 } // namespace llvm 554 555 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H 556