xref: /llvm-project/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h (revision 13e20bcb98e57831d46162b9ba42a78d85e8283d)
1 //===- SVals.h - Abstract Values for Static Analysis ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines SVal, Loc, and NonLoc, classes that represent
10 //  abstract r-values for use with path-sensitive value tracking.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H
15 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H
16 
17 #include "clang/AST/Expr.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Basic/LLVM.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntPtr.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
22 #include "llvm/ADT/APSInt.h"
23 #include "llvm/ADT/FoldingSet.h"
24 #include "llvm/ADT/ImmutableList.h"
25 #include "llvm/ADT/PointerUnion.h"
26 #include "llvm/ADT/STLForwardCompat.h"
27 #include "llvm/ADT/iterator_range.h"
28 #include "llvm/Support/Casting.h"
29 #include <cassert>
30 #include <cstdint>
31 #include <optional>
32 #include <utility>
33 
34 //==------------------------------------------------------------------------==//
35 //  Base SVal types.
36 //==------------------------------------------------------------------------==//
37 
38 namespace clang {
39 
40 class CXXBaseSpecifier;
41 class FunctionDecl;
42 class LabelDecl;
43 
44 namespace ento {
45 
46 class CompoundValData;
47 class LazyCompoundValData;
48 class MemRegion;
49 class PointerToMemberData;
50 class SValBuilder;
51 class TypedValueRegion;
52 
53 /// SVal - This represents a symbolic expression, which can be either
54 ///  an L-value or an R-value.
55 ///
56 class SVal {
57 public:
58   enum SValKind : unsigned char {
59 #define BASIC_SVAL(Id, Parent) Id##Kind,
60 #define LOC_SVAL(Id, Parent) Loc##Id##Kind,
61 #define NONLOC_SVAL(Id, Parent) NonLoc##Id##Kind,
62 #define SVAL_RANGE(Id, First, Last)                                            \
63   BEGIN_##Id = Id##First##Kind, END_##Id = Id##Last##Kind,
64 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
65   };
66 
67 protected:
68   const void *Data = nullptr;
69   SValKind Kind = UndefinedValKind;
70 
71   explicit SVal(SValKind Kind, const void *Data = nullptr)
72       : Data(Data), Kind(Kind) {}
73 
74   template <typename T> const T *castDataAs() const {
75     return static_cast<const T *>(Data);
76   }
77 
78 public:
79   explicit SVal() = default;
80 
81   /// Convert to the specified SVal type, asserting that this SVal is of
82   /// the desired type.
83   template <typename T> T castAs() const { return llvm::cast<T>(*this); }
84 
85   /// Convert to the specified SVal type, returning std::nullopt if this SVal is
86   /// not of the desired type.
87   template <typename T> std::optional<T> getAs() const {
88     return llvm::dyn_cast<T>(*this);
89   }
90 
91   SValKind getKind() const { return Kind; }
92 
93   StringRef getKindStr() const;
94 
95   // This method is required for using SVal in a FoldingSetNode.  It
96   // extracts a unique signature for this SVal object.
97   void Profile(llvm::FoldingSetNodeID &ID) const {
98     ID.AddPointer(Data);
99     ID.AddInteger(llvm::to_underlying(getKind()));
100   }
101 
102   bool operator==(SVal R) const { return Kind == R.Kind && Data == R.Data; }
103   bool operator!=(SVal R) const { return !(*this == R); }
104 
105   bool isUnknown() const { return getKind() == UnknownValKind; }
106 
107   bool isUndef() const { return getKind() == UndefinedValKind; }
108 
109   bool isUnknownOrUndef() const { return isUnknown() || isUndef(); }
110 
111   bool isValid() const { return !isUnknownOrUndef(); }
112 
113   bool isConstant() const;
114 
115   bool isConstant(int I) const;
116 
117   bool isZeroConstant() const;
118 
119   /// getAsFunctionDecl - If this SVal is a MemRegionVal and wraps a
120   /// CodeTextRegion wrapping a FunctionDecl, return that FunctionDecl.
121   /// Otherwise return 0.
122   const FunctionDecl *getAsFunctionDecl() const;
123 
124   /// If this SVal is a location and wraps a symbol, return that
125   ///  SymbolRef. Otherwise return 0.
126   ///
127   /// Casts are ignored during lookup.
128   /// \param IncludeBaseRegions The boolean that controls whether the search
129   /// should continue to the base regions if the region is not symbolic.
130   SymbolRef getAsLocSymbol(bool IncludeBaseRegions = false) const;
131 
132   /// Get the symbol in the SVal or its base region.
133   SymbolRef getLocSymbolInBase() const;
134 
135   /// If this SVal wraps a symbol return that SymbolRef.
136   /// Otherwise, return 0.
137   ///
138   /// Casts are ignored during lookup.
139   /// \param IncludeBaseRegions The boolean that controls whether the search
140   /// should continue to the base regions if the region is not symbolic.
141   SymbolRef getAsSymbol(bool IncludeBaseRegions = false) const;
142 
143   /// If this SVal is loc::ConcreteInt or nonloc::ConcreteInt,
144   /// return a pointer to APSInt which is held in it.
145   /// Otherwise, return nullptr.
146   const llvm::APSInt *getAsInteger() const;
147 
148   const MemRegion *getAsRegion() const;
149 
150   /// printJson - Pretty-prints in JSON format.
151   void printJson(raw_ostream &Out, bool AddQuotes) const;
152 
153   void dumpToStream(raw_ostream &OS) const;
154   void dump() const;
155 
156   llvm::iterator_range<SymExpr::symbol_iterator> symbols() const {
157     if (const SymExpr *SE = getAsSymbol(/*IncludeBaseRegions=*/true))
158       return SE->symbols();
159     SymExpr::symbol_iterator end{};
160     return llvm::make_range(end, end);
161   }
162 
163   /// Try to get a reasonable type for the given value.
164   ///
165   /// \returns The best approximation of the value type or Null.
166   /// In theory, all symbolic values should be typed, but this function
167   /// is still a WIP and might have a few blind spots.
168   ///
169   /// \note This function should not be used when the user has access to the
170   /// bound expression AST node as well, since AST always has exact types.
171   ///
172   /// \note Loc values are interpreted as pointer rvalues for the purposes of
173   /// this method.
174   QualType getType(const ASTContext &) const;
175 };
176 
177 inline raw_ostream &operator<<(raw_ostream &os, clang::ento::SVal V) {
178   V.dumpToStream(os);
179   return os;
180 }
181 
182 namespace nonloc {
183 /// Sub-kinds for NonLoc values.
184 #define NONLOC_SVAL(Id, Parent)                                                \
185   inline constexpr auto Id##Kind = SVal::SValKind::NonLoc##Id##Kind;
186 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
187 } // namespace nonloc
188 
189 namespace loc {
190 /// Sub-kinds for Loc values.
191 #define LOC_SVAL(Id, Parent)                                                   \
192   inline constexpr auto Id##Kind = SVal::SValKind::Loc##Id##Kind;
193 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
194 } // namespace loc
195 
196 class UndefinedVal : public SVal {
197 public:
198   UndefinedVal() : SVal(UndefinedValKind) {}
199   static bool classof(SVal V) { return V.getKind() == UndefinedValKind; }
200 };
201 
202 class DefinedOrUnknownSVal : public SVal {
203 public:
204   // We want calling these methods to be a compiler error since they are
205   // tautologically false.
206   bool isUndef() const = delete;
207   bool isValid() const = delete;
208 
209   static bool classof(SVal V) { return !V.isUndef(); }
210 
211 protected:
212   explicit DefinedOrUnknownSVal(SValKind Kind, const void *Data = nullptr)
213       : SVal(Kind, Data) {}
214 };
215 
216 class UnknownVal : public DefinedOrUnknownSVal {
217 public:
218   explicit UnknownVal() : DefinedOrUnknownSVal(UnknownValKind) {}
219 
220   static bool classof(SVal V) { return V.getKind() == UnknownValKind; }
221 };
222 
223 class DefinedSVal : public DefinedOrUnknownSVal {
224 public:
225   // We want calling these methods to be a compiler error since they are
226   // tautologically true/false.
227   bool isUnknown() const = delete;
228   bool isUnknownOrUndef() const = delete;
229   bool isValid() const = delete;
230 
231   static bool classof(SVal V) { return !V.isUnknownOrUndef(); }
232 
233 protected:
234   explicit DefinedSVal(SValKind Kind, const void *Data)
235       : DefinedOrUnknownSVal(Kind, Data) {}
236 };
237 
238 class NonLoc : public DefinedSVal {
239 protected:
240   NonLoc(SValKind Kind, const void *Data) : DefinedSVal(Kind, Data) {}
241 
242 public:
243   void dumpToStream(raw_ostream &Out) const;
244 
245   static bool isCompoundType(QualType T) {
246     return T->isArrayType() || T->isRecordType() ||
247            T->isAnyComplexType() || T->isVectorType();
248   }
249 
250   static bool classof(SVal V) {
251     return BEGIN_NonLoc <= V.getKind() && V.getKind() <= END_NonLoc;
252   }
253 };
254 
255 class Loc : public DefinedSVal {
256 protected:
257   Loc(SValKind Kind, const void *Data) : DefinedSVal(Kind, Data) {}
258 
259 public:
260   void dumpToStream(raw_ostream &Out) const;
261 
262   static bool isLocType(QualType T) {
263     return T->isAnyPointerType() || T->isBlockPointerType() ||
264            T->isReferenceType() || T->isNullPtrType();
265   }
266 
267   static bool classof(SVal V) {
268     return BEGIN_Loc <= V.getKind() && V.getKind() <= END_Loc;
269   }
270 };
271 
272 //==------------------------------------------------------------------------==//
273 //  Subclasses of NonLoc.
274 //==------------------------------------------------------------------------==//
275 
276 namespace nonloc {
277 
278 /// Represents symbolic expression that isn't a location.
279 class SymbolVal : public NonLoc {
280 public:
281   SymbolVal() = delete;
282   explicit SymbolVal(SymbolRef Sym) : NonLoc(SymbolValKind, Sym) {
283     assert(Sym);
284     assert(!Loc::isLocType(Sym->getType()));
285   }
286 
287   LLVM_ATTRIBUTE_RETURNS_NONNULL
288   SymbolRef getSymbol() const {
289     return (const SymExpr *) Data;
290   }
291 
292   bool isExpression() const {
293     return !isa<SymbolData>(getSymbol());
294   }
295 
296   static bool classof(SVal V) { return V.getKind() == SymbolValKind; }
297 };
298 
299 /// Value representing integer constant.
300 class ConcreteInt : public NonLoc {
301 public:
302   explicit ConcreteInt(APSIntPtr V) : NonLoc(ConcreteIntKind, V.get()) {}
303 
304   APSIntPtr getValue() const {
305     // This is safe because in the ctor we take a safe APSIntPtr.
306     return APSIntPtr::unsafeConstructor(castDataAs<llvm::APSInt>());
307   }
308 
309   static bool classof(SVal V) { return V.getKind() == ConcreteIntKind; }
310 };
311 
312 class LocAsInteger : public NonLoc {
313   friend class ento::SValBuilder;
314 
315   explicit LocAsInteger(const std::pair<SVal, uintptr_t> &data)
316       : NonLoc(LocAsIntegerKind, &data) {
317     // We do not need to represent loc::ConcreteInt as LocAsInteger,
318     // as it'd collapse into a nonloc::ConcreteInt instead.
319     [[maybe_unused]] SValKind K = data.first.getKind();
320     assert(K == loc::MemRegionValKind || K == loc::GotoLabelKind);
321   }
322 
323 public:
324   Loc getLoc() const {
325     return castDataAs<std::pair<SVal, uintptr_t>>()->first.castAs<Loc>();
326   }
327 
328   unsigned getNumBits() const {
329     return castDataAs<std::pair<SVal, uintptr_t>>()->second;
330   }
331 
332   static bool classof(SVal V) { return V.getKind() == LocAsIntegerKind; }
333 };
334 
335 /// The simplest example of a concrete compound value is nonloc::CompoundVal,
336 /// which represents a concrete r-value of an initializer-list or a string.
337 /// Internally, it contains an llvm::ImmutableList of SVal's stored inside the
338 /// literal.
339 class CompoundVal : public NonLoc {
340   friend class ento::SValBuilder;
341 
342   explicit CompoundVal(const CompoundValData *D) : NonLoc(CompoundValKind, D) {
343     assert(D);
344   }
345 
346 public:
347   LLVM_ATTRIBUTE_RETURNS_NONNULL
348   const CompoundValData* getValue() const {
349     return castDataAs<CompoundValData>();
350   }
351 
352   using iterator = llvm::ImmutableList<SVal>::iterator;
353   iterator begin() const;
354   iterator end() const;
355 
356   static bool classof(SVal V) { return V.getKind() == CompoundValKind; }
357 };
358 
359 /// While nonloc::CompoundVal covers a few simple use cases,
360 /// nonloc::LazyCompoundVal is a more performant and flexible way to represent
361 /// an rvalue of record type, so it shows up much more frequently during
362 /// analysis. This value is an r-value that represents a snapshot of any
363 /// structure "as a whole" at a given moment during the analysis. Such value is
364 /// already quite far from being referred to as "concrete", as many fields
365 /// inside it would be unknown or symbolic. nonloc::LazyCompoundVal operates by
366 /// storing two things:
367 ///   * a reference to the TypedValueRegion being snapshotted (yes, it is always
368 ///     typed), and also
369 ///   * a reference to the whole Store object, obtained from the ProgramState in
370 ///     which the nonloc::LazyCompoundVal was created.
371 ///
372 /// Note that the old ProgramState and its Store is kept alive during the
373 /// analysis because these are immutable functional data structures and each new
374 /// Store value is represented as "earlier Store" + "additional binding".
375 ///
376 /// Essentially, nonloc::LazyCompoundVal is a performance optimization for the
377 /// analyzer. Because Store is immutable, creating a nonloc::LazyCompoundVal is
378 /// a very cheap operation. Note that the Store contains all region bindings in
379 /// the program state, not only related to the region. Later, if necessary, such
380 /// value can be unpacked -- eg. when it is assigned to another variable.
381 ///
382 /// If you ever need to inspect the contents of the LazyCompoundVal, you can use
383 /// StoreManager::iterBindings(). It'll iterate through all values in the Store,
384 /// but you're only interested in the ones that belong to
385 /// LazyCompoundVal::getRegion(); other bindings are immaterial.
386 ///
387 /// NOTE: LazyCompoundVal::getRegion() itself is also immaterial (see the actual
388 /// method docs for details).
389 class LazyCompoundVal : public NonLoc {
390   friend class ento::SValBuilder;
391 
392   explicit LazyCompoundVal(const LazyCompoundValData *D)
393       : NonLoc(LazyCompoundValKind, D) {
394     assert(D);
395   }
396 
397 public:
398   LLVM_ATTRIBUTE_RETURNS_NONNULL
399   const LazyCompoundValData *getCVData() const {
400     return castDataAs<LazyCompoundValData>();
401   }
402 
403   /// It might return null.
404   const void *getStore() const;
405 
406   /// This function itself is immaterial. It is only an implementation detail.
407   /// LazyCompoundVal represents only the rvalue, the data (known or unknown)
408   /// that *was* stored in that region *at some point in the past*. The region
409   /// should not be used for any purpose other than figuring out what part of
410   /// the frozen Store you're interested in. The value does not represent the
411   /// *current* value of that region. Sometimes it may, but this should not be
412   /// relied upon. Instead, if you want to figure out what region it represents,
413   /// you typically need to see where you got it from in the first place. The
414   /// region is absolutely not analogous to the C++ "this" pointer. It is also
415   /// not a valid way to "materialize" the prvalue into a glvalue in C++,
416   /// because the region represents the *old* storage (sometimes very old), not
417   /// the *future* storage.
418   LLVM_ATTRIBUTE_RETURNS_NONNULL
419   const TypedValueRegion *getRegion() const;
420 
421   static bool classof(SVal V) { return V.getKind() == LazyCompoundValKind; }
422 };
423 
424 /// Value representing pointer-to-member.
425 ///
426 /// This value is qualified as NonLoc because neither loading nor storing
427 /// operations are applied to it. Instead, the analyzer uses the L-value coming
428 /// from pointer-to-member applied to an object.
429 /// This SVal is represented by a NamedDecl which can be a member function
430 /// pointer or a member data pointer and an optional list of CXXBaseSpecifiers.
431 /// This list is required to accumulate the pointer-to-member cast history to
432 /// figure out the correct subobject field. In particular, implicit casts grow
433 /// this list and explicit casts like static_cast shrink this list.
434 class PointerToMember : public NonLoc {
435   friend class ento::SValBuilder;
436 
437 public:
438   using PTMDataType =
439       llvm::PointerUnion<const NamedDecl *, const PointerToMemberData *>;
440 
441   const PTMDataType getPTMData() const {
442     return PTMDataType::getFromOpaqueValue(const_cast<void *>(Data));
443   }
444 
445   bool isNullMemberPointer() const;
446 
447   const NamedDecl *getDecl() const;
448 
449   template<typename AdjustedDecl>
450   const AdjustedDecl *getDeclAs() const {
451     return dyn_cast_or_null<AdjustedDecl>(getDecl());
452   }
453 
454   using iterator = llvm::ImmutableList<const CXXBaseSpecifier *>::iterator;
455 
456   iterator begin() const;
457   iterator end() const;
458 
459   static bool classof(SVal V) { return V.getKind() == PointerToMemberKind; }
460 
461 private:
462   explicit PointerToMember(const PTMDataType D)
463       : NonLoc(PointerToMemberKind, D.getOpaqueValue()) {}
464 };
465 
466 } // namespace nonloc
467 
468 //==------------------------------------------------------------------------==//
469 //  Subclasses of Loc.
470 //==------------------------------------------------------------------------==//
471 
472 namespace loc {
473 
474 class GotoLabel : public Loc {
475 public:
476   explicit GotoLabel(const LabelDecl *Label) : Loc(GotoLabelKind, Label) {
477     assert(Label);
478   }
479 
480   const LabelDecl *getLabel() const { return castDataAs<LabelDecl>(); }
481 
482   static bool classof(SVal V) { return V.getKind() == GotoLabelKind; }
483 };
484 
485 class MemRegionVal : public Loc {
486 public:
487   explicit MemRegionVal(const MemRegion *r) : Loc(MemRegionValKind, r) {
488     assert(r);
489   }
490 
491   /// Get the underlining region.
492   LLVM_ATTRIBUTE_RETURNS_NONNULL
493   const MemRegion *getRegion() const { return castDataAs<MemRegion>(); }
494 
495   /// Get the underlining region and strip casts.
496   LLVM_ATTRIBUTE_RETURNS_NONNULL
497   const MemRegion* stripCasts(bool StripBaseCasts = true) const;
498 
499   template <typename REGION>
500   const REGION* getRegionAs() const {
501     return dyn_cast<REGION>(getRegion());
502   }
503 
504   bool operator==(const MemRegionVal &R) const {
505     return getRegion() == R.getRegion();
506   }
507 
508   bool operator!=(const MemRegionVal &R) const {
509     return getRegion() != R.getRegion();
510   }
511 
512   static bool classof(SVal V) { return V.getKind() == MemRegionValKind; }
513 };
514 
515 class ConcreteInt : public Loc {
516 public:
517   explicit ConcreteInt(APSIntPtr V) : Loc(ConcreteIntKind, V.get()) {}
518 
519   APSIntPtr getValue() const {
520     // This is safe because in the ctor we take a safe APSIntPtr.
521     return APSIntPtr::unsafeConstructor(castDataAs<llvm::APSInt>());
522   }
523 
524   static bool classof(SVal V) { return V.getKind() == ConcreteIntKind; }
525 };
526 
527 } // namespace loc
528 } // namespace ento
529 } // namespace clang
530 
531 namespace llvm {
532 template <typename To, typename From>
533 struct CastInfo<
534     To, From,
535     std::enable_if_t<std::is_base_of<::clang::ento::SVal, From>::value>>
536     : public CastIsPossible<To, ::clang::ento::SVal> {
537   using Self = CastInfo<
538       To, From,
539       std::enable_if_t<std::is_base_of<::clang::ento::SVal, From>::value>>;
540   static bool isPossible(const From &V) {
541     return To::classof(*static_cast<const ::clang::ento::SVal *>(&V));
542   }
543   static std::optional<To> castFailed() { return std::optional<To>{}; }
544   static To doCast(const From &f) {
545     return *static_cast<const To *>(cast<::clang::ento::SVal>(&f));
546   }
547   static std::optional<To> doCastIfPossible(const From &f) {
548     if (!Self::isPossible(f))
549       return Self::castFailed();
550     return doCast(f);
551   }
552 };
553 } // namespace llvm
554 
555 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H
556