xref: /llvm-project/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp (revision aceb34c7046d315d615feaa94a5941db13299a1b)
1 //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines ArrayBoundCheckerV2, which is a path-sensitive check
10 // which looks for an out-of-bound array element access.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/AST/CharUnits.h"
15 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
16 #include "clang/StaticAnalyzer/Checkers/Taint.h"
17 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
18 #include "clang/StaticAnalyzer/Core/Checker.h"
19 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <optional>
27 
28 using namespace clang;
29 using namespace ento;
30 using namespace taint;
31 
32 namespace {
33 class ArrayBoundCheckerV2 :
34     public Checker<check::Location> {
35   BugType BT{this, "Out-of-bound access"};
36   BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};
37 
38   enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
39 
40   void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
41                  SVal TaintedSVal = UnknownVal()) const;
42 
43   static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);
44 
45 public:
46   void checkLocation(SVal l, bool isLoad, const Stmt *S,
47                      CheckerContext &C) const;
48 };
49 } // anonymous namespace
50 
51 /// For a given Location that can be represented as a symbolic expression
52 /// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block
53 /// Arr and the distance of Location from the beginning of Arr (expressed in a
54 /// NonLoc that specifies the number of CharUnits). Returns nullopt when these
55 /// cannot be determined.
56 std::optional<std::pair<const SubRegion *, NonLoc>>
57 computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
58   QualType T = SVB.getArrayIndexType();
59   auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) {
60     // We will use this utility to add and multiply values.
61     return SVB.evalBinOpNN(State, Op, L, R, T).getAs<NonLoc>();
62   };
63 
64   const SubRegion *OwnerRegion = nullptr;
65   std::optional<NonLoc> Offset = SVB.makeZeroArrayIndex();
66 
67   const ElementRegion *CurRegion =
68       dyn_cast_or_null<ElementRegion>(Location.getAsRegion());
69 
70   while (CurRegion) {
71     const auto Index = CurRegion->getIndex().getAs<NonLoc>();
72     if (!Index)
73       return std::nullopt;
74 
75     QualType ElemType = CurRegion->getElementType();
76 
77     // FIXME: The following early return was presumably added to safeguard the
78     // getTypeSizeInChars() call (which doesn't accept an incomplete type), but
79     // it seems that `ElemType` cannot be incomplete at this point.
80     if (ElemType->isIncompleteType())
81       return std::nullopt;
82 
83     // Calculate Delta = Index * sizeof(ElemType).
84     NonLoc Size = SVB.makeArrayIndex(
85         SVB.getContext().getTypeSizeInChars(ElemType).getQuantity());
86     auto Delta = EvalBinOp(BO_Mul, *Index, Size);
87     if (!Delta)
88       return std::nullopt;
89 
90     // Perform Offset += Delta.
91     Offset = EvalBinOp(BO_Add, *Offset, *Delta);
92     if (!Offset)
93       return std::nullopt;
94 
95     OwnerRegion = CurRegion->getSuperRegion()->getAs<SubRegion>();
96     // When this is just another ElementRegion layer, we need to continue the
97     // offset calculations:
98     CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion);
99   }
100 
101   if (OwnerRegion)
102     return std::make_pair(OwnerRegion, *Offset);
103 
104   return std::nullopt;
105 }
106 
107 // TODO: once the constraint manager is smart enough to handle non simplified
108 // symbolic expressions remove this function. Note that this can not be used in
109 // the constraint manager as is, since this does not handle overflows. It is
110 // safe to assume, however, that memory offsets will not overflow.
111 // NOTE: callers of this function need to be aware of the effects of overflows
112 // and signed<->unsigned conversions!
113 static std::pair<NonLoc, nonloc::ConcreteInt>
114 getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
115                      SValBuilder &svalBuilder) {
116   std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>();
117   if (SymVal && SymVal->isExpression()) {
118     if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
119       llvm::APSInt constant =
120           APSIntType(extent.getValue()).convert(SIE->getRHS());
121       switch (SIE->getOpcode()) {
122       case BO_Mul:
123         // The constant should never be 0 here, becasue multiplication by zero
124         // is simplified by the engine.
125         if ((extent.getValue() % constant) != 0)
126           return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
127         else
128           return getSimplifiedOffsets(
129               nonloc::SymbolVal(SIE->getLHS()),
130               svalBuilder.makeIntVal(extent.getValue() / constant),
131               svalBuilder);
132       case BO_Add:
133         return getSimplifiedOffsets(
134             nonloc::SymbolVal(SIE->getLHS()),
135             svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder);
136       default:
137         break;
138       }
139     }
140   }
141 
142   return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
143 }
144 
145 // Evaluate the comparison Value < Threshold with the help of the custom
146 // simplification algorithm defined for this checker. Return a pair of states,
147 // where the first one corresponds to "value below threshold" and the second
148 // corresponds to "value at or above threshold". Returns {nullptr, nullptr} in
149 // the case when the evaluation fails.
150 static std::pair<ProgramStateRef, ProgramStateRef>
151 compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
152                         SValBuilder &SVB) {
153   if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
154     std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB);
155   }
156   if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) {
157     QualType T = Value.getType(SVB.getContext());
158     if (T->isUnsignedIntegerType() && ConcreteThreshold->getValue().isNegative()) {
159       // In this case we reduced the bound check to a comparison of the form
160       //   (symbol or value with unsigned type) < (negative number)
161       // which is always false. We are handling these cases separately because
162       // evalBinOpNN can perform a signed->unsigned conversion that turns the
163       // negative number into a huge positive value and leads to wildly
164       // inaccurate conclusions.
165       return {nullptr, State};
166     }
167   }
168   auto BelowThreshold =
169       SVB.evalBinOpNN(State, BO_LT, Value, Threshold, SVB.getConditionType()).getAs<NonLoc>();
170 
171   if (BelowThreshold)
172     return State->assume(*BelowThreshold);
173 
174   return {nullptr, nullptr};
175 }
176 
177 void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
178                                         const Stmt* LoadS,
179                                         CheckerContext &checkerContext) const {
180 
181   // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
182   // some new logic here that reasons directly about memory region extents.
183   // Once that logic is more mature, we can bring it back to assumeInBound()
184   // for all clients to use.
185   //
186   // The algorithm we are using here for bounds checking is to see if the
187   // memory access is within the extent of the base region.  Since we
188   // have some flexibility in defining the base region, we can achieve
189   // various levels of conservatism in our buffer overflow checking.
190 
191   // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as
192   //   #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX)
193   // and incomplete analysis of these leads to false positives. As even
194   // accurate reports would be confusing for the users, just disable reports
195   // from these macros:
196   if (isFromCtypeMacro(LoadS, checkerContext.getASTContext()))
197     return;
198 
199   ProgramStateRef state = checkerContext.getState();
200   SValBuilder &svalBuilder = checkerContext.getSValBuilder();
201 
202   const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
203       computeOffset(state, svalBuilder, location);
204 
205   if (!RawOffset)
206     return;
207 
208   auto [Reg, ByteOffset] = *RawOffset;
209 
210   // CHECK LOWER BOUND
211   const MemSpaceRegion *Space = Reg->getMemorySpace();
212   if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) {
213     // A symbolic region in unknown space represents an unknown pointer that
214     // may point into the middle of an array, so we don't look for underflows.
215     // Both conditions are significant because we want to check underflows in
216     // symbolic regions on the heap (which may be introduced by checkers like
217     // MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and
218     // non-symbolic regions (e.g. a field subregion of a symbolic region) in
219     // unknown space.
220     auto [state_precedesLowerBound, state_withinLowerBound] =
221         compareValueToThreshold(state, ByteOffset,
222                                 svalBuilder.makeZeroArrayIndex(), svalBuilder);
223 
224     if (state_precedesLowerBound && !state_withinLowerBound) {
225       // We know that the index definitely precedes the lower bound.
226       reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
227       return;
228     }
229 
230     if (state_withinLowerBound)
231       state = state_withinLowerBound;
232   }
233 
234   // CHECK UPPER BOUND
235   DefinedOrUnknownSVal Size = getDynamicExtent(state, Reg, svalBuilder);
236   if (auto KnownSize = Size.getAs<NonLoc>()) {
237     auto [state_withinUpperBound, state_exceedsUpperBound] =
238         compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder);
239 
240     if (state_exceedsUpperBound) {
241       if (!state_withinUpperBound) {
242         // We know that the index definitely exceeds the upper bound.
243         reportOOB(checkerContext, state_exceedsUpperBound, OOB_Exceeds);
244         return;
245       }
246       if (isTainted(state, ByteOffset)) {
247         // Both cases are possible, but the index is tainted, so report.
248         reportOOB(checkerContext, state_exceedsUpperBound, OOB_Taint,
249                   ByteOffset);
250         return;
251       }
252     }
253 
254     if (state_withinUpperBound)
255       state = state_withinUpperBound;
256   }
257 
258   checkerContext.addTransition(state);
259 }
260 
261 void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
262                                     ProgramStateRef ErrorState, OOB_Kind Kind,
263                                     SVal TaintedSVal) const {
264 
265   ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
266   if (!ErrorNode)
267     return;
268 
269   // FIXME: These diagnostics are preliminary, and they'll be replaced soon by
270   // a follow-up commit.
271 
272   SmallString<128> Buf;
273   llvm::raw_svector_ostream Out(Buf);
274   Out << "Out of bound memory access ";
275 
276   switch (Kind) {
277   case OOB_Precedes:
278     Out << "(accessed memory precedes memory block)";
279     break;
280   case OOB_Exceeds:
281     Out << "(access exceeds upper limit of memory block)";
282     break;
283   case OOB_Taint:
284     Out << "(index is tainted)";
285     break;
286   }
287   auto BR = std::make_unique<PathSensitiveBugReport>(
288       Kind == OOB_Taint ? TaintBT : BT, Out.str(), ErrorNode);
289   // Track back the propagation of taintedness, or do nothing if TaintedSVal is
290   // the default UnknownVal().
291   for (SymbolRef Sym : getTaintedSymbols(ErrorState, TaintedSVal)) {
292     BR->markInteresting(Sym);
293   }
294   C.emitReport(std::move(BR));
295 }
296 
297 bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) {
298   SourceLocation Loc = S->getBeginLoc();
299   if (!Loc.isMacroID())
300     return false;
301 
302   StringRef MacroName = Lexer::getImmediateMacroName(
303       Loc, ACtx.getSourceManager(), ACtx.getLangOpts());
304 
305   if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's')
306     return false;
307 
308   return ((MacroName == "isalnum") || (MacroName == "isalpha") ||
309           (MacroName == "isblank") || (MacroName == "isdigit") ||
310           (MacroName == "isgraph") || (MacroName == "islower") ||
311           (MacroName == "isnctrl") || (MacroName == "isprint") ||
312           (MacroName == "ispunct") || (MacroName == "isspace") ||
313           (MacroName == "isupper") || (MacroName == "isxdigit"));
314 }
315 
316 void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
317   mgr.registerChecker<ArrayBoundCheckerV2>();
318 }
319 
320 bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) {
321   return true;
322 }
323