1 //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines ArrayBoundCheckerV2, which is a path-sensitive check 10 // which looks for an out-of-bound array element access. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/AST/CharUnits.h" 15 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 16 #include "clang/StaticAnalyzer/Checkers/Taint.h" 17 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 18 #include "clang/StaticAnalyzer/Core/Checker.h" 19 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 20 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <optional> 27 28 using namespace clang; 29 using namespace ento; 30 using namespace taint; 31 32 namespace { 33 class ArrayBoundCheckerV2 : 34 public Checker<check::Location> { 35 BugType BT{this, "Out-of-bound access"}; 36 BugType TaintBT{this, "Out-of-bound access", categories::TaintedData}; 37 38 enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint }; 39 40 void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind, 41 SVal TaintedSVal = UnknownVal()) const; 42 43 static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC); 44 45 public: 46 void checkLocation(SVal l, bool isLoad, const Stmt *S, 47 CheckerContext &C) const; 48 }; 49 } // anonymous namespace 50 51 /// For a given Location that can be represented as a symbolic expression 52 /// Arr[Idx] (or perhaps Arr[Idx1][Idx2] etc.), return the parent memory block 53 /// Arr and the distance of Location from the beginning of Arr (expressed in a 54 /// NonLoc that specifies the number of CharUnits). Returns nullopt when these 55 /// cannot be determined. 56 std::optional<std::pair<const SubRegion *, NonLoc>> 57 computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) { 58 QualType T = SVB.getArrayIndexType(); 59 auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) { 60 // We will use this utility to add and multiply values. 61 return SVB.evalBinOpNN(State, Op, L, R, T).getAs<NonLoc>(); 62 }; 63 64 const SubRegion *OwnerRegion = nullptr; 65 std::optional<NonLoc> Offset = SVB.makeZeroArrayIndex(); 66 67 const ElementRegion *CurRegion = 68 dyn_cast_or_null<ElementRegion>(Location.getAsRegion()); 69 70 while (CurRegion) { 71 const auto Index = CurRegion->getIndex().getAs<NonLoc>(); 72 if (!Index) 73 return std::nullopt; 74 75 QualType ElemType = CurRegion->getElementType(); 76 77 // FIXME: The following early return was presumably added to safeguard the 78 // getTypeSizeInChars() call (which doesn't accept an incomplete type), but 79 // it seems that `ElemType` cannot be incomplete at this point. 80 if (ElemType->isIncompleteType()) 81 return std::nullopt; 82 83 // Calculate Delta = Index * sizeof(ElemType). 84 NonLoc Size = SVB.makeArrayIndex( 85 SVB.getContext().getTypeSizeInChars(ElemType).getQuantity()); 86 auto Delta = EvalBinOp(BO_Mul, *Index, Size); 87 if (!Delta) 88 return std::nullopt; 89 90 // Perform Offset += Delta. 91 Offset = EvalBinOp(BO_Add, *Offset, *Delta); 92 if (!Offset) 93 return std::nullopt; 94 95 OwnerRegion = CurRegion->getSuperRegion()->getAs<SubRegion>(); 96 // When this is just another ElementRegion layer, we need to continue the 97 // offset calculations: 98 CurRegion = dyn_cast_or_null<ElementRegion>(OwnerRegion); 99 } 100 101 if (OwnerRegion) 102 return std::make_pair(OwnerRegion, *Offset); 103 104 return std::nullopt; 105 } 106 107 // TODO: once the constraint manager is smart enough to handle non simplified 108 // symbolic expressions remove this function. Note that this can not be used in 109 // the constraint manager as is, since this does not handle overflows. It is 110 // safe to assume, however, that memory offsets will not overflow. 111 // NOTE: callers of this function need to be aware of the effects of overflows 112 // and signed<->unsigned conversions! 113 static std::pair<NonLoc, nonloc::ConcreteInt> 114 getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, 115 SValBuilder &svalBuilder) { 116 std::optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>(); 117 if (SymVal && SymVal->isExpression()) { 118 if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) { 119 llvm::APSInt constant = 120 APSIntType(extent.getValue()).convert(SIE->getRHS()); 121 switch (SIE->getOpcode()) { 122 case BO_Mul: 123 // The constant should never be 0 here, becasue multiplication by zero 124 // is simplified by the engine. 125 if ((extent.getValue() % constant) != 0) 126 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); 127 else 128 return getSimplifiedOffsets( 129 nonloc::SymbolVal(SIE->getLHS()), 130 svalBuilder.makeIntVal(extent.getValue() / constant), 131 svalBuilder); 132 case BO_Add: 133 return getSimplifiedOffsets( 134 nonloc::SymbolVal(SIE->getLHS()), 135 svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder); 136 default: 137 break; 138 } 139 } 140 } 141 142 return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); 143 } 144 145 // Evaluate the comparison Value < Threshold with the help of the custom 146 // simplification algorithm defined for this checker. Return a pair of states, 147 // where the first one corresponds to "value below threshold" and the second 148 // corresponds to "value at or above threshold". Returns {nullptr, nullptr} in 149 // the case when the evaluation fails. 150 static std::pair<ProgramStateRef, ProgramStateRef> 151 compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, 152 SValBuilder &SVB) { 153 if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) { 154 std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB); 155 } 156 if (auto ConcreteThreshold = Threshold.getAs<nonloc::ConcreteInt>()) { 157 QualType T = Value.getType(SVB.getContext()); 158 if (T->isUnsignedIntegerType() && ConcreteThreshold->getValue().isNegative()) { 159 // In this case we reduced the bound check to a comparison of the form 160 // (symbol or value with unsigned type) < (negative number) 161 // which is always false. We are handling these cases separately because 162 // evalBinOpNN can perform a signed->unsigned conversion that turns the 163 // negative number into a huge positive value and leads to wildly 164 // inaccurate conclusions. 165 return {nullptr, State}; 166 } 167 } 168 auto BelowThreshold = 169 SVB.evalBinOpNN(State, BO_LT, Value, Threshold, SVB.getConditionType()).getAs<NonLoc>(); 170 171 if (BelowThreshold) 172 return State->assume(*BelowThreshold); 173 174 return {nullptr, nullptr}; 175 } 176 177 void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, 178 const Stmt* LoadS, 179 CheckerContext &checkerContext) const { 180 181 // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping 182 // some new logic here that reasons directly about memory region extents. 183 // Once that logic is more mature, we can bring it back to assumeInBound() 184 // for all clients to use. 185 // 186 // The algorithm we are using here for bounds checking is to see if the 187 // memory access is within the extent of the base region. Since we 188 // have some flexibility in defining the base region, we can achieve 189 // various levels of conservatism in our buffer overflow checking. 190 191 // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as 192 // #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX) 193 // and incomplete analysis of these leads to false positives. As even 194 // accurate reports would be confusing for the users, just disable reports 195 // from these macros: 196 if (isFromCtypeMacro(LoadS, checkerContext.getASTContext())) 197 return; 198 199 ProgramStateRef state = checkerContext.getState(); 200 SValBuilder &svalBuilder = checkerContext.getSValBuilder(); 201 202 const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset = 203 computeOffset(state, svalBuilder, location); 204 205 if (!RawOffset) 206 return; 207 208 auto [Reg, ByteOffset] = *RawOffset; 209 210 // CHECK LOWER BOUND 211 const MemSpaceRegion *Space = Reg->getMemorySpace(); 212 if (!(isa<SymbolicRegion>(Reg) && isa<UnknownSpaceRegion>(Space))) { 213 // A symbolic region in unknown space represents an unknown pointer that 214 // may point into the middle of an array, so we don't look for underflows. 215 // Both conditions are significant because we want to check underflows in 216 // symbolic regions on the heap (which may be introduced by checkers like 217 // MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and 218 // non-symbolic regions (e.g. a field subregion of a symbolic region) in 219 // unknown space. 220 auto [state_precedesLowerBound, state_withinLowerBound] = 221 compareValueToThreshold(state, ByteOffset, 222 svalBuilder.makeZeroArrayIndex(), svalBuilder); 223 224 if (state_precedesLowerBound && !state_withinLowerBound) { 225 // We know that the index definitely precedes the lower bound. 226 reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes); 227 return; 228 } 229 230 if (state_withinLowerBound) 231 state = state_withinLowerBound; 232 } 233 234 // CHECK UPPER BOUND 235 DefinedOrUnknownSVal Size = getDynamicExtent(state, Reg, svalBuilder); 236 if (auto KnownSize = Size.getAs<NonLoc>()) { 237 auto [state_withinUpperBound, state_exceedsUpperBound] = 238 compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder); 239 240 if (state_exceedsUpperBound) { 241 if (!state_withinUpperBound) { 242 // We know that the index definitely exceeds the upper bound. 243 reportOOB(checkerContext, state_exceedsUpperBound, OOB_Exceeds); 244 return; 245 } 246 if (isTainted(state, ByteOffset)) { 247 // Both cases are possible, but the index is tainted, so report. 248 reportOOB(checkerContext, state_exceedsUpperBound, OOB_Taint, 249 ByteOffset); 250 return; 251 } 252 } 253 254 if (state_withinUpperBound) 255 state = state_withinUpperBound; 256 } 257 258 checkerContext.addTransition(state); 259 } 260 261 void ArrayBoundCheckerV2::reportOOB(CheckerContext &C, 262 ProgramStateRef ErrorState, OOB_Kind Kind, 263 SVal TaintedSVal) const { 264 265 ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState); 266 if (!ErrorNode) 267 return; 268 269 // FIXME: These diagnostics are preliminary, and they'll be replaced soon by 270 // a follow-up commit. 271 272 SmallString<128> Buf; 273 llvm::raw_svector_ostream Out(Buf); 274 Out << "Out of bound memory access "; 275 276 switch (Kind) { 277 case OOB_Precedes: 278 Out << "(accessed memory precedes memory block)"; 279 break; 280 case OOB_Exceeds: 281 Out << "(access exceeds upper limit of memory block)"; 282 break; 283 case OOB_Taint: 284 Out << "(index is tainted)"; 285 break; 286 } 287 auto BR = std::make_unique<PathSensitiveBugReport>( 288 Kind == OOB_Taint ? TaintBT : BT, Out.str(), ErrorNode); 289 // Track back the propagation of taintedness, or do nothing if TaintedSVal is 290 // the default UnknownVal(). 291 for (SymbolRef Sym : getTaintedSymbols(ErrorState, TaintedSVal)) { 292 BR->markInteresting(Sym); 293 } 294 C.emitReport(std::move(BR)); 295 } 296 297 bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) { 298 SourceLocation Loc = S->getBeginLoc(); 299 if (!Loc.isMacroID()) 300 return false; 301 302 StringRef MacroName = Lexer::getImmediateMacroName( 303 Loc, ACtx.getSourceManager(), ACtx.getLangOpts()); 304 305 if (MacroName.size() < 7 || MacroName[0] != 'i' || MacroName[1] != 's') 306 return false; 307 308 return ((MacroName == "isalnum") || (MacroName == "isalpha") || 309 (MacroName == "isblank") || (MacroName == "isdigit") || 310 (MacroName == "isgraph") || (MacroName == "islower") || 311 (MacroName == "isnctrl") || (MacroName == "isprint") || 312 (MacroName == "ispunct") || (MacroName == "isspace") || 313 (MacroName == "isupper") || (MacroName == "isxdigit")); 314 } 315 316 void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) { 317 mgr.registerChecker<ArrayBoundCheckerV2>(); 318 } 319 320 bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) { 321 return true; 322 } 323