xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp (revision de2547329b41ad6ea4ea876d12731bde5a6b64c5)
1 //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines ArrayBoundCheckerV2, which is a path-sensitive check
10 // which looks for an out-of-bound array element access.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/AST/CharUnits.h"
15 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
16 #include "clang/StaticAnalyzer/Checkers/Taint.h"
17 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
18 #include "clang/StaticAnalyzer/Core/Checker.h"
19 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <optional>
27 
28 using namespace clang;
29 using namespace ento;
30 using namespace taint;
31 
32 namespace {
33 class ArrayBoundCheckerV2 :
34     public Checker<check::Location> {
35   mutable std::unique_ptr<BuiltinBug> BT;
36   mutable std::unique_ptr<BugType> TaintBT;
37 
38   enum OOB_Kind { OOB_Precedes, OOB_Excedes };
39 
40   void reportOOB(CheckerContext &C, ProgramStateRef errorState,
41                  OOB_Kind kind) const;
42   void reportTaintOOB(CheckerContext &C, ProgramStateRef errorState,
43                       SVal TaintedSVal) const;
44 
45 public:
46   void checkLocation(SVal l, bool isLoad, const Stmt*S,
47                      CheckerContext &C) const;
48 };
49 
50 // FIXME: Eventually replace RegionRawOffset with this class.
51 class RegionRawOffsetV2 {
52 private:
53   const SubRegion *baseRegion;
54   SVal byteOffset;
55 
56   RegionRawOffsetV2()
57     : baseRegion(nullptr), byteOffset(UnknownVal()) {}
58 
59 public:
60   RegionRawOffsetV2(const SubRegion *base, NonLoc offset)
61       : baseRegion(base), byteOffset(offset) { assert(base); }
62 
63   NonLoc getByteOffset() const { return byteOffset.castAs<NonLoc>(); }
64   const SubRegion *getRegion() const { return baseRegion; }
65 
66   static RegionRawOffsetV2 computeOffset(ProgramStateRef state,
67                                          SValBuilder &svalBuilder,
68                                          SVal location);
69 
70   void dump() const;
71   void dumpToStream(raw_ostream &os) const;
72 };
73 }
74 
75 // TODO: once the constraint manager is smart enough to handle non simplified
76 // symbolic expressions remove this function. Note that this can not be used in
77 // the constraint manager as is, since this does not handle overflows. It is
78 // safe to assume, however, that memory offsets will not overflow.
79 // NOTE: callers of this function need to be aware of the effects of overflows
80 // and signed<->unsigned conversions!
81 static std::pair<NonLoc, nonloc::ConcreteInt>
82 getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
83                      SValBuilder &svalBuilder) {
84   std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>();
85   if (SymVal && SymVal->isExpression()) {
86     if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
87       llvm::APSInt constant =
88           APSIntType(extent.getValue()).convert(SIE->getRHS());
89       switch (SIE->getOpcode()) {
90       case BO_Mul:
91         // The constant should never be 0 here, since it the result of scaling
92         // based on the size of a type which is never 0.
93         if ((extent.getValue() % constant) != 0)
94           return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
95         else
96           return getSimplifiedOffsets(
97               nonloc::SymbolVal(SIE->getLHS()),
98               svalBuilder.makeIntVal(extent.getValue() / constant),
99               svalBuilder);
100       case BO_Add:
101         return getSimplifiedOffsets(
102             nonloc::SymbolVal(SIE->getLHS()),
103             svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder);
104       default:
105         break;
106       }
107     }
108   }
109 
110   return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
111 }
112 
113 // Evaluate the comparison Value < Threshold with the help of the custom
114 // simplification algorithm defined for this checker. Return a pair of states,
115 // where the first one corresponds to "value below threshold" and the second
116 // corresponds to "value at or above threshold". Returns {nullptr, nullptr} in
117 // the case when the evaluation fails.
118 static std::pair<ProgramStateRef, ProgramStateRef>
119 compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
120                         SValBuilder &SVB) {
121   if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
122     std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB);
123   }
124   if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
125     QualType T = Value.getType(SVB.getContext());
126     if (T->isUnsignedIntegerType() && ConcreteThreshold->getValue().isNegative()) {
127       // In this case we reduced the bound check to a comparison of the form
128       //   (symbol or value with unsigned type) < (negative number)
129       // which is always false. We are handling these cases separately because
130       // evalBinOpNN can perform a signed->unsigned conversion that turns the
131       // negative number into a huge positive value and leads to wildly
132       // inaccurate conclusions.
133       return {nullptr, State};
134     }
135   }
136   auto BelowThreshold =
137       SVB.evalBinOpNN(State, BO_LT, Value, Threshold, SVB.getConditionType()).getAs<NonLoc>();
138 
139   if (BelowThreshold)
140     return State->assume(*BelowThreshold);
141 
142   return {nullptr, nullptr};
143 }
144 
145 void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
146                                         const Stmt* LoadS,
147                                         CheckerContext &checkerContext) const {
148 
149   // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
150   // some new logic here that reasons directly about memory region extents.
151   // Once that logic is more mature, we can bring it back to assumeInBound()
152   // for all clients to use.
153   //
154   // The algorithm we are using here for bounds checking is to see if the
155   // memory access is within the extent of the base region.  Since we
156   // have some flexibility in defining the base region, we can achieve
157   // various levels of conservatism in our buffer overflow checking.
158   ProgramStateRef state = checkerContext.getState();
159 
160   SValBuilder &svalBuilder = checkerContext.getSValBuilder();
161   const RegionRawOffsetV2 &rawOffset =
162     RegionRawOffsetV2::computeOffset(state, svalBuilder, location);
163 
164   if (!rawOffset.getRegion())
165     return;
166 
167   NonLoc ByteOffset = rawOffset.getByteOffset();
168 
169   // CHECK LOWER BOUND
170   const MemSpaceRegion *SR = rawOffset.getRegion()->getMemorySpace();
171   if (!llvm::isa<UnknownSpaceRegion>(SR)) {
172     // A pointer to UnknownSpaceRegion may point to the middle of
173     // an allocated region.
174 
175     auto [state_precedesLowerBound, state_withinLowerBound] =
176         compareValueToThreshold(state, ByteOffset,
177                                 svalBuilder.makeZeroArrayIndex(), svalBuilder);
178 
179     if (state_precedesLowerBound && !state_withinLowerBound) {
180       // We know that the index definitely precedes the lower bound.
181       reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
182       return;
183     }
184 
185     if (state_withinLowerBound)
186       state = state_withinLowerBound;
187   }
188 
189   // CHECK UPPER BOUND
190   DefinedOrUnknownSVal Size =
191       getDynamicExtent(state, rawOffset.getRegion(), svalBuilder);
192   if (auto KnownSize = Size.getAs<NonLoc>()) {
193     auto [state_withinUpperBound, state_exceedsUpperBound] =
194         compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder);
195 
196     if (state_exceedsUpperBound) {
197       if (!state_withinUpperBound) {
198         // We know that the index definitely exceeds the upper bound.
199         reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
200         return;
201       }
202       if (isTainted(state, ByteOffset)) {
203         // Both cases are possible, but the index is tainted, so report.
204         reportTaintOOB(checkerContext, state_exceedsUpperBound, ByteOffset);
205         return;
206       }
207     }
208 
209     if (state_withinUpperBound)
210       state = state_withinUpperBound;
211   }
212 
213   checkerContext.addTransition(state);
214 }
215 
216 void ArrayBoundCheckerV2::reportTaintOOB(CheckerContext &checkerContext,
217                                          ProgramStateRef errorState,
218                                          SVal TaintedSVal) const {
219   ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
220   if (!errorNode)
221     return;
222 
223   if (!TaintBT)
224     TaintBT.reset(
225         new BugType(this, "Out-of-bound access", categories::TaintedData));
226 
227   SmallString<256> buf;
228   llvm::raw_svector_ostream os(buf);
229   os << "Out of bound memory access (index is tainted)";
230   auto BR =
231       std::make_unique<PathSensitiveBugReport>(*TaintBT, os.str(), errorNode);
232 
233   // Track back the propagation of taintedness.
234   for (SymbolRef Sym : getTaintedSymbols(errorState, TaintedSVal)) {
235     BR->markInteresting(Sym);
236   }
237 
238   checkerContext.emitReport(std::move(BR));
239 }
240 
241 void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext,
242                                     ProgramStateRef errorState,
243                                     OOB_Kind kind) const {
244 
245   ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
246   if (!errorNode)
247     return;
248 
249   if (!BT)
250     BT.reset(new BuiltinBug(this, "Out-of-bound access"));
251 
252   // FIXME: This diagnostics are preliminary.  We should get far better
253   // diagnostics for explaining buffer overruns.
254 
255   SmallString<256> buf;
256   llvm::raw_svector_ostream os(buf);
257   os << "Out of bound memory access ";
258   switch (kind) {
259   case OOB_Precedes:
260     os << "(accessed memory precedes memory block)";
261     break;
262   case OOB_Excedes:
263     os << "(access exceeds upper limit of memory block)";
264     break;
265   }
266   auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode);
267   checkerContext.emitReport(std::move(BR));
268 }
269 
270 #ifndef NDEBUG
271 LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const {
272   dumpToStream(llvm::errs());
273 }
274 
275 void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const {
276   os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}';
277 }
278 #endif
279 
280 // Lazily computes a value to be used by 'computeOffset'.  If 'val'
281 // is unknown or undefined, we lazily substitute '0'.  Otherwise,
282 // return 'val'.
283 static inline SVal getValue(SVal val, SValBuilder &svalBuilder) {
284   return val.isUndef() ? svalBuilder.makeZeroArrayIndex() : val;
285 }
286 
287 // Scale a base value by a scaling factor, and return the scaled
288 // value as an SVal.  Used by 'computeOffset'.
289 static inline SVal scaleValue(ProgramStateRef state,
290                               NonLoc baseVal, CharUnits scaling,
291                               SValBuilder &sb) {
292   return sb.evalBinOpNN(state, BO_Mul, baseVal,
293                         sb.makeArrayIndex(scaling.getQuantity()),
294                         sb.getArrayIndexType());
295 }
296 
297 // Add an SVal to another, treating unknown and undefined values as
298 // summing to UnknownVal.  Used by 'computeOffset'.
299 static SVal addValue(ProgramStateRef state, SVal x, SVal y,
300                      SValBuilder &svalBuilder) {
301   // We treat UnknownVals and UndefinedVals the same here because we
302   // only care about computing offsets.
303   if (x.isUnknownOrUndef() || y.isUnknownOrUndef())
304     return UnknownVal();
305 
306   return svalBuilder.evalBinOpNN(state, BO_Add, x.castAs<NonLoc>(),
307                                  y.castAs<NonLoc>(),
308                                  svalBuilder.getArrayIndexType());
309 }
310 
311 /// Compute a raw byte offset from a base region.  Used for array bounds
312 /// checking.
313 RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state,
314                                                    SValBuilder &svalBuilder,
315                                                    SVal location)
316 {
317   const MemRegion *region = location.getAsRegion();
318   SVal offset = UndefinedVal();
319 
320   while (region) {
321     switch (region->getKind()) {
322       default: {
323         if (const SubRegion *subReg = dyn_cast<SubRegion>(region)) {
324           if (auto Offset = getValue(offset, svalBuilder).getAs<NonLoc>())
325             return RegionRawOffsetV2(subReg, *Offset);
326         }
327         return RegionRawOffsetV2();
328       }
329       case MemRegion::ElementRegionKind: {
330         const ElementRegion *elemReg = cast<ElementRegion>(region);
331         SVal index = elemReg->getIndex();
332         if (!isa<NonLoc>(index))
333           return RegionRawOffsetV2();
334         QualType elemType = elemReg->getElementType();
335         // If the element is an incomplete type, go no further.
336         ASTContext &astContext = svalBuilder.getContext();
337         if (elemType->isIncompleteType())
338           return RegionRawOffsetV2();
339 
340         // Update the offset.
341         offset = addValue(state,
342                           getValue(offset, svalBuilder),
343                           scaleValue(state,
344                           index.castAs<NonLoc>(),
345                           astContext.getTypeSizeInChars(elemType),
346                           svalBuilder),
347                           svalBuilder);
348 
349         if (offset.isUnknownOrUndef())
350           return RegionRawOffsetV2();
351 
352         region = elemReg->getSuperRegion();
353         continue;
354       }
355     }
356   }
357   return RegionRawOffsetV2();
358 }
359 
360 void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
361   mgr.registerChecker<ArrayBoundCheckerV2>();
362 }
363 
364 bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) {
365   return true;
366 }
367