1 //== RangedConstraintManager.h ----------------------------------*- C++ -*--==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  Ranged constraint manager, built on SimpleConstraintManager.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_CLANG_LIB_STATICANALYZER_CORE_RANGEDCONSTRAINTMANAGER_H
14 #define LLVM_CLANG_LIB_STATICANALYZER_CORE_RANGEDCONSTRAINTMANAGER_H
15 
16 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
17 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
18 #include "clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h"
19 #include "llvm/ADT/APSInt.h"
20 #include "llvm/Support/Allocator.h"
21 
22 namespace clang {
23 
24 namespace ento {
25 
26 /// A Range represents the closed range [from, to].  The caller must
27 /// guarantee that from <= to.  Note that Range is immutable, so as not
28 /// to subvert RangeSet's immutability.
29 class Range {
30 public:
Range(const llvm::APSInt & From,const llvm::APSInt & To)31   Range(const llvm::APSInt &From, const llvm::APSInt &To) : Impl(&From, &To) {
32     assert(From <= To);
33   }
34 
Range(const llvm::APSInt & Point)35   Range(const llvm::APSInt &Point) : Range(Point, Point) {}
36 
Includes(const llvm::APSInt & Point)37   bool Includes(const llvm::APSInt &Point) const {
38     return From() <= Point && Point <= To();
39   }
From()40   const llvm::APSInt &From() const { return *Impl.first; }
To()41   const llvm::APSInt &To() const { return *Impl.second; }
getConcreteValue()42   const llvm::APSInt *getConcreteValue() const {
43     return &From() == &To() ? &From() : nullptr;
44   }
45 
Profile(llvm::FoldingSetNodeID & ID)46   void Profile(llvm::FoldingSetNodeID &ID) const {
47     ID.AddPointer(&From());
48     ID.AddPointer(&To());
49   }
50   void dump(raw_ostream &OS) const;
51 
52   // In order to keep non-overlapping ranges sorted, we can compare only From
53   // points.
54   bool operator<(const Range &RHS) const { return From() < RHS.From(); }
55 
56   bool operator==(const Range &RHS) const { return Impl == RHS.Impl; }
57   bool operator!=(const Range &RHS) const { return !operator==(RHS); }
58 
59 private:
60   std::pair<const llvm::APSInt *, const llvm::APSInt *> Impl;
61 };
62 
63 /// @class RangeSet is a persistent set of non-overlapping ranges.
64 ///
65 /// New RangeSet objects can be ONLY produced by RangeSet::Factory object, which
66 /// also supports the most common operations performed on range sets.
67 ///
68 /// Empty set corresponds to an overly constrained symbol meaning that there
69 /// are no possible values for that symbol.
70 class RangeSet {
71 public:
72   class Factory;
73 
74 private:
75   // We use llvm::SmallVector as the underlying container for the following
76   // reasons:
77   //
78   //   * Range sets are usually very simple, 1 or 2 ranges.
79   //     That's why llvm::ImmutableSet is not perfect.
80   //
81   //   * Ranges in sets are NOT overlapping, so it is natural to keep them
82   //     sorted for efficient operations and queries.  For this reason,
83   //     llvm::SmallSet doesn't fit the requirements, it is not sorted when it
84   //     is a vector.
85   //
86   //   * Range set operations usually a bit harder than add/remove a range.
87   //     Complex operations might do many of those for just one range set.
88   //     Formerly it used to be llvm::ImmutableSet, which is inefficient for our
89   //     purposes as we want to make these operations BOTH immutable AND
90   //     efficient.
91   //
92   //   * Iteration over ranges is widespread and a more cache-friendly
93   //     structure is preferred.
94   using ImplType = llvm::SmallVector<Range, 4>;
95 
96   struct ContainerType : public ImplType, public llvm::FoldingSetNode {
ProfileContainerType97     void Profile(llvm::FoldingSetNodeID &ID) const {
98       for (const Range &It : *this) {
99         It.Profile(ID);
100       }
101     }
102   };
103   // This is a non-owning pointer to an actual container.
104   // The memory is fully managed by the factory and is alive as long as the
105   // factory itself is alive.
106   // It is a pointer as opposed to a reference, so we can easily reassign
107   // RangeSet objects.
108   using UnderlyingType = const ContainerType *;
109   UnderlyingType Impl;
110 
111 public:
112   using const_iterator = ImplType::const_iterator;
113 
begin()114   const_iterator begin() const { return Impl->begin(); }
end()115   const_iterator end() const { return Impl->end(); }
size()116   size_t size() const { return Impl->size(); }
117 
isEmpty()118   bool isEmpty() const { return Impl->empty(); }
119 
120   class Factory {
121   public:
Factory(BasicValueFactory & BV)122     Factory(BasicValueFactory &BV) : ValueFactory(BV) {}
123 
124     /// Create a new set with all ranges from both LHS and RHS.
125     /// Possible intersections are not checked here.
126     ///
127     /// Complexity: O(N + M)
128     ///             where N = size(LHS), M = size(RHS)
129     RangeSet add(RangeSet LHS, RangeSet RHS);
130     /// Create a new set with all ranges from the original set plus the new one.
131     /// Possible intersections are not checked here.
132     ///
133     /// Complexity: O(N)
134     ///             where N = size(Original)
135     RangeSet add(RangeSet Original, Range Element);
136     /// Create a new set with all ranges from the original set plus the point.
137     /// Possible intersections are not checked here.
138     ///
139     /// Complexity: O(N)
140     ///             where N = size(Original)
141     RangeSet add(RangeSet Original, const llvm::APSInt &Point);
142 
getEmptySet()143     RangeSet getEmptySet() { return &EmptySet; }
144 
145     /// Create a new set with just one range.
146     /// @{
147     RangeSet getRangeSet(Range Origin);
getRangeSet(const llvm::APSInt & From,const llvm::APSInt & To)148     RangeSet getRangeSet(const llvm::APSInt &From, const llvm::APSInt &To) {
149       return getRangeSet(Range(From, To));
150     }
getRangeSet(const llvm::APSInt & Origin)151     RangeSet getRangeSet(const llvm::APSInt &Origin) {
152       return getRangeSet(Origin, Origin);
153     }
154     /// @}
155 
156     /// Intersect the given range sets.
157     ///
158     /// Complexity: O(N + M)
159     ///             where N = size(LHS), M = size(RHS)
160     RangeSet intersect(RangeSet LHS, RangeSet RHS);
161     /// Intersect the given set with the closed range [Lower, Upper].
162     ///
163     /// Unlike the Range type, this range uses modular arithmetic, corresponding
164     /// to the common treatment of C integer overflow. Thus, if the Lower bound
165     /// is greater than the Upper bound, the range is taken to wrap around. This
166     /// is equivalent to taking the intersection with the two ranges [Min,
167     /// Upper] and [Lower, Max], or, alternatively, /removing/ all integers
168     /// between Upper and Lower.
169     ///
170     /// Complexity: O(N)
171     ///             where N = size(What)
172     RangeSet intersect(RangeSet What, llvm::APSInt Lower, llvm::APSInt Upper);
173     /// Intersect the given range with the given point.
174     ///
175     /// The result can be either an empty set or a set containing the given
176     /// point depending on whether the point is in the range set.
177     ///
178     /// Complexity: O(logN)
179     ///             where N = size(What)
180     RangeSet intersect(RangeSet What, llvm::APSInt Point);
181 
182     /// Delete the given point from the range set.
183     ///
184     /// Complexity: O(N)
185     ///             where N = size(From)
186     RangeSet deletePoint(RangeSet From, const llvm::APSInt &Point);
187     /// Negate the given range set.
188     ///
189     /// Turn all [A, B] ranges to [-B, -A], when "-" is a C-like unary minus
190     /// operation under the values of the type.
191     ///
192     /// We also handle MIN because applying unary minus to MIN does not change
193     /// it.
194     /// Example 1:
195     /// char x = -128;        // -128 is a MIN value in a range of 'char'
196     /// char y = -x;          // y: -128
197     ///
198     /// Example 2:
199     /// unsigned char x = 0;  // 0 is a MIN value in a range of 'unsigned char'
200     /// unsigned char y = -x; // y: 0
201     ///
202     /// And it makes us to separate the range
203     /// like [MIN, N] to [MIN, MIN] U [-N, MAX].
204     /// For instance, whole range is {-128..127} and subrange is [-128,-126],
205     /// thus [-128,-127,-126,...] negates to [-128,...,126,127].
206     ///
207     /// Negate restores disrupted ranges on bounds,
208     /// e.g. [MIN, B] => [MIN, MIN] U [-B, MAX] => [MIN, B].
209     ///
210     /// Negate is a self-inverse function, i.e. negate(negate(R)) == R.
211     ///
212     /// Complexity: O(N)
213     ///             where N = size(What)
214     RangeSet negate(RangeSet What);
215 
216   private:
217     /// Return a persistent version of the given container.
218     RangeSet makePersistent(ContainerType &&From);
219     /// Construct a new persistent version of the given container.
220     ContainerType *construct(ContainerType &&From);
221 
222     RangeSet intersect(const ContainerType &LHS, const ContainerType &RHS);
223 
224     // Many operations include producing new APSInt values and that's why
225     // we need this factory.
226     BasicValueFactory &ValueFactory;
227     // Allocator for all the created containers.
228     // Containers might own their own memory and that's why it is specific
229     // for the type, so it calls container destructors upon deletion.
230     llvm::SpecificBumpPtrAllocator<ContainerType> Arena;
231     // Usually we deal with the same ranges and range sets over and over.
232     // Here we track all created containers and try not to repeat ourselves.
233     llvm::FoldingSet<ContainerType> Cache;
234     static ContainerType EmptySet;
235   };
236 
237   RangeSet(const RangeSet &) = default;
238   RangeSet &operator=(const RangeSet &) = default;
239   RangeSet(RangeSet &&) = default;
240   RangeSet &operator=(RangeSet &&) = default;
241   ~RangeSet() = default;
242 
243   /// Construct a new RangeSet representing '{ [From, To] }'.
RangeSet(Factory & F,const llvm::APSInt & From,const llvm::APSInt & To)244   RangeSet(Factory &F, const llvm::APSInt &From, const llvm::APSInt &To)
245       : RangeSet(F.getRangeSet(From, To)) {}
246 
247   /// Construct a new RangeSet representing the given point as a range.
RangeSet(Factory & F,const llvm::APSInt & Point)248   RangeSet(Factory &F, const llvm::APSInt &Point)
249       : RangeSet(F.getRangeSet(Point)) {}
250 
Profile(llvm::FoldingSetNodeID & ID,const RangeSet & RS)251   static void Profile(llvm::FoldingSetNodeID &ID, const RangeSet &RS) {
252     ID.AddPointer(RS.Impl);
253   }
254 
255   /// Profile - Generates a hash profile of this RangeSet for use
256   ///  by FoldingSet.
Profile(llvm::FoldingSetNodeID & ID)257   void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, *this); }
258 
259   /// getConcreteValue - If a symbol is contrained to equal a specific integer
260   ///  constant then this method returns that value.  Otherwise, it returns
261   ///  NULL.
getConcreteValue()262   const llvm::APSInt *getConcreteValue() const {
263     return Impl->size() == 1 ? begin()->getConcreteValue() : nullptr;
264   }
265 
266   /// Get the minimal value covered by the ranges in the set.
267   ///
268   /// Complexity: O(1)
269   const llvm::APSInt &getMinValue() const;
270   /// Get the maximal value covered by the ranges in the set.
271   ///
272   /// Complexity: O(1)
273   const llvm::APSInt &getMaxValue() const;
274 
275   /// Test whether the given point is contained by any of the ranges.
276   ///
277   /// Complexity: O(logN)
278   ///             where N = size(this)
contains(llvm::APSInt Point)279   bool contains(llvm::APSInt Point) const { return containsImpl(Point); }
280 
281   void dump(raw_ostream &OS) const;
282 
283   bool operator==(const RangeSet &Other) const { return *Impl == *Other.Impl; }
284   bool operator!=(const RangeSet &Other) const { return !(*this == Other); }
285 
286 private:
RangeSet(ContainerType * RawContainer)287   /* implicit */ RangeSet(ContainerType *RawContainer) : Impl(RawContainer) {}
RangeSet(UnderlyingType Ptr)288   /* implicit */ RangeSet(UnderlyingType Ptr) : Impl(Ptr) {}
289 
290   /// Pin given points to the type represented by the current range set.
291   ///
292   /// This makes parameter points to be in-out parameters.
293   /// In order to maintain consistent types across all of the ranges in the set
294   /// and to keep all the operations to compare ONLY points of the same type, we
295   /// need to pin every point before any operation.
296   ///
297   /// @Returns true if the given points can be converted to the target type
298   ///          without changing the values (i.e. trivially) and false otherwise.
299   /// @{
300   bool pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const;
301   bool pin(llvm::APSInt &Point) const;
302   /// @}
303 
304   // This version of this function modifies its arguments (pins it).
305   bool containsImpl(llvm::APSInt &Point) const;
306 
307   friend class Factory;
308 };
309 
310 using ConstraintMap = llvm::ImmutableMap<SymbolRef, RangeSet>;
311 ConstraintMap getConstraintMap(ProgramStateRef State);
312 
313 class RangedConstraintManager : public SimpleConstraintManager {
314 public:
RangedConstraintManager(ExprEngine * EE,SValBuilder & SB)315   RangedConstraintManager(ExprEngine *EE, SValBuilder &SB)
316       : SimpleConstraintManager(EE, SB) {}
317 
318   ~RangedConstraintManager() override;
319 
320   //===------------------------------------------------------------------===//
321   // Implementation for interface from SimpleConstraintManager.
322   //===------------------------------------------------------------------===//
323 
324   ProgramStateRef assumeSym(ProgramStateRef State, SymbolRef Sym,
325                             bool Assumption) override;
326 
327   ProgramStateRef assumeSymInclusiveRange(ProgramStateRef State, SymbolRef Sym,
328                                           const llvm::APSInt &From,
329                                           const llvm::APSInt &To,
330                                           bool InRange) override;
331 
332   ProgramStateRef assumeSymUnsupported(ProgramStateRef State, SymbolRef Sym,
333                                        bool Assumption) override;
334 
335 protected:
336   /// Assume a constraint between a symbolic expression and a concrete integer.
337   virtual ProgramStateRef assumeSymRel(ProgramStateRef State, SymbolRef Sym,
338                                        BinaryOperator::Opcode op,
339                                        const llvm::APSInt &Int);
340 
341   //===------------------------------------------------------------------===//
342   // Interface that subclasses must implement.
343   //===------------------------------------------------------------------===//
344 
345   // Each of these is of the form "$Sym+Adj <> V", where "<>" is the comparison
346   // operation for the method being invoked.
347 
348   virtual ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym,
349                                       const llvm::APSInt &V,
350                                       const llvm::APSInt &Adjustment) = 0;
351 
352   virtual ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym,
353                                       const llvm::APSInt &V,
354                                       const llvm::APSInt &Adjustment) = 0;
355 
356   virtual ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym,
357                                       const llvm::APSInt &V,
358                                       const llvm::APSInt &Adjustment) = 0;
359 
360   virtual ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym,
361                                       const llvm::APSInt &V,
362                                       const llvm::APSInt &Adjustment) = 0;
363 
364   virtual ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym,
365                                       const llvm::APSInt &V,
366                                       const llvm::APSInt &Adjustment) = 0;
367 
368   virtual ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym,
369                                       const llvm::APSInt &V,
370                                       const llvm::APSInt &Adjustment) = 0;
371 
372   virtual ProgramStateRef assumeSymWithinInclusiveRange(
373       ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
374       const llvm::APSInt &To, const llvm::APSInt &Adjustment) = 0;
375 
376   virtual ProgramStateRef assumeSymOutsideInclusiveRange(
377       ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
378       const llvm::APSInt &To, const llvm::APSInt &Adjustment) = 0;
379 
380   //===------------------------------------------------------------------===//
381   // Internal implementation.
382   //===------------------------------------------------------------------===//
383 private:
384   static void computeAdjustment(SymbolRef &Sym, llvm::APSInt &Adjustment);
385 };
386 
387 } // namespace ento
388 } // namespace clang
389 
390 REGISTER_FACTORY_WITH_PROGRAMSTATE(ConstraintMap)
391 
392 #endif
393