1*e5dd7070Spatrick //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==// 2*e5dd7070Spatrick // 3*e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information. 5*e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*e5dd7070Spatrick // 7*e5dd7070Spatrick //===----------------------------------------------------------------------===// 8*e5dd7070Spatrick // 9*e5dd7070Spatrick // This file defines ArrayBoundCheckerV2, which is a path-sensitive check 10*e5dd7070Spatrick // which looks for an out-of-bound array element access. 11*e5dd7070Spatrick // 12*e5dd7070Spatrick //===----------------------------------------------------------------------===// 13*e5dd7070Spatrick 14*e5dd7070Spatrick #include "Taint.h" 15*e5dd7070Spatrick #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 16*e5dd7070Spatrick #include "clang/AST/CharUnits.h" 17*e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 18*e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/Checker.h" 19*e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/CheckerManager.h" 20*e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" 21*e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 22*e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" 23*e5dd7070Spatrick #include "llvm/ADT/SmallString.h" 24*e5dd7070Spatrick #include "llvm/Support/raw_ostream.h" 25*e5dd7070Spatrick 26*e5dd7070Spatrick using namespace clang; 27*e5dd7070Spatrick using namespace ento; 28*e5dd7070Spatrick using namespace taint; 29*e5dd7070Spatrick 30*e5dd7070Spatrick namespace { 31*e5dd7070Spatrick class ArrayBoundCheckerV2 : 32*e5dd7070Spatrick public Checker<check::Location> { 33*e5dd7070Spatrick mutable std::unique_ptr<BuiltinBug> BT; 34*e5dd7070Spatrick 35*e5dd7070Spatrick enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted }; 36*e5dd7070Spatrick 37*e5dd7070Spatrick void reportOOB(CheckerContext &C, ProgramStateRef errorState, OOB_Kind kind, 38*e5dd7070Spatrick std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const; 39*e5dd7070Spatrick 40*e5dd7070Spatrick public: 41*e5dd7070Spatrick void checkLocation(SVal l, bool isLoad, const Stmt*S, 42*e5dd7070Spatrick CheckerContext &C) const; 43*e5dd7070Spatrick }; 44*e5dd7070Spatrick 45*e5dd7070Spatrick // FIXME: Eventually replace RegionRawOffset with this class. 46*e5dd7070Spatrick class RegionRawOffsetV2 { 47*e5dd7070Spatrick private: 48*e5dd7070Spatrick const SubRegion *baseRegion; 49*e5dd7070Spatrick SVal byteOffset; 50*e5dd7070Spatrick 51*e5dd7070Spatrick RegionRawOffsetV2() 52*e5dd7070Spatrick : baseRegion(nullptr), byteOffset(UnknownVal()) {} 53*e5dd7070Spatrick 54*e5dd7070Spatrick public: 55*e5dd7070Spatrick RegionRawOffsetV2(const SubRegion* base, SVal offset) 56*e5dd7070Spatrick : baseRegion(base), byteOffset(offset) {} 57*e5dd7070Spatrick 58*e5dd7070Spatrick NonLoc getByteOffset() const { return byteOffset.castAs<NonLoc>(); } 59*e5dd7070Spatrick const SubRegion *getRegion() const { return baseRegion; } 60*e5dd7070Spatrick 61*e5dd7070Spatrick static RegionRawOffsetV2 computeOffset(ProgramStateRef state, 62*e5dd7070Spatrick SValBuilder &svalBuilder, 63*e5dd7070Spatrick SVal location); 64*e5dd7070Spatrick 65*e5dd7070Spatrick void dump() const; 66*e5dd7070Spatrick void dumpToStream(raw_ostream &os) const; 67*e5dd7070Spatrick }; 68*e5dd7070Spatrick } 69*e5dd7070Spatrick 70*e5dd7070Spatrick static SVal computeExtentBegin(SValBuilder &svalBuilder, 71*e5dd7070Spatrick const MemRegion *region) { 72*e5dd7070Spatrick const MemSpaceRegion *SR = region->getMemorySpace(); 73*e5dd7070Spatrick if (SR->getKind() == MemRegion::UnknownSpaceRegionKind) 74*e5dd7070Spatrick return UnknownVal(); 75*e5dd7070Spatrick else 76*e5dd7070Spatrick return svalBuilder.makeZeroArrayIndex(); 77*e5dd7070Spatrick } 78*e5dd7070Spatrick 79*e5dd7070Spatrick // TODO: once the constraint manager is smart enough to handle non simplified 80*e5dd7070Spatrick // symbolic expressions remove this function. Note that this can not be used in 81*e5dd7070Spatrick // the constraint manager as is, since this does not handle overflows. It is 82*e5dd7070Spatrick // safe to assume, however, that memory offsets will not overflow. 83*e5dd7070Spatrick static std::pair<NonLoc, nonloc::ConcreteInt> 84*e5dd7070Spatrick getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, 85*e5dd7070Spatrick SValBuilder &svalBuilder) { 86*e5dd7070Spatrick Optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>(); 87*e5dd7070Spatrick if (SymVal && SymVal->isExpression()) { 88*e5dd7070Spatrick if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) { 89*e5dd7070Spatrick llvm::APSInt constant = 90*e5dd7070Spatrick APSIntType(extent.getValue()).convert(SIE->getRHS()); 91*e5dd7070Spatrick switch (SIE->getOpcode()) { 92*e5dd7070Spatrick case BO_Mul: 93*e5dd7070Spatrick // The constant should never be 0 here, since it the result of scaling 94*e5dd7070Spatrick // based on the size of a type which is never 0. 95*e5dd7070Spatrick if ((extent.getValue() % constant) != 0) 96*e5dd7070Spatrick return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); 97*e5dd7070Spatrick else 98*e5dd7070Spatrick return getSimplifiedOffsets( 99*e5dd7070Spatrick nonloc::SymbolVal(SIE->getLHS()), 100*e5dd7070Spatrick svalBuilder.makeIntVal(extent.getValue() / constant), 101*e5dd7070Spatrick svalBuilder); 102*e5dd7070Spatrick case BO_Add: 103*e5dd7070Spatrick return getSimplifiedOffsets( 104*e5dd7070Spatrick nonloc::SymbolVal(SIE->getLHS()), 105*e5dd7070Spatrick svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder); 106*e5dd7070Spatrick default: 107*e5dd7070Spatrick break; 108*e5dd7070Spatrick } 109*e5dd7070Spatrick } 110*e5dd7070Spatrick } 111*e5dd7070Spatrick 112*e5dd7070Spatrick return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent); 113*e5dd7070Spatrick } 114*e5dd7070Spatrick 115*e5dd7070Spatrick void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad, 116*e5dd7070Spatrick const Stmt* LoadS, 117*e5dd7070Spatrick CheckerContext &checkerContext) const { 118*e5dd7070Spatrick 119*e5dd7070Spatrick // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping 120*e5dd7070Spatrick // some new logic here that reasons directly about memory region extents. 121*e5dd7070Spatrick // Once that logic is more mature, we can bring it back to assumeInBound() 122*e5dd7070Spatrick // for all clients to use. 123*e5dd7070Spatrick // 124*e5dd7070Spatrick // The algorithm we are using here for bounds checking is to see if the 125*e5dd7070Spatrick // memory access is within the extent of the base region. Since we 126*e5dd7070Spatrick // have some flexibility in defining the base region, we can achieve 127*e5dd7070Spatrick // various levels of conservatism in our buffer overflow checking. 128*e5dd7070Spatrick ProgramStateRef state = checkerContext.getState(); 129*e5dd7070Spatrick 130*e5dd7070Spatrick SValBuilder &svalBuilder = checkerContext.getSValBuilder(); 131*e5dd7070Spatrick const RegionRawOffsetV2 &rawOffset = 132*e5dd7070Spatrick RegionRawOffsetV2::computeOffset(state, svalBuilder, location); 133*e5dd7070Spatrick 134*e5dd7070Spatrick if (!rawOffset.getRegion()) 135*e5dd7070Spatrick return; 136*e5dd7070Spatrick 137*e5dd7070Spatrick NonLoc rawOffsetVal = rawOffset.getByteOffset(); 138*e5dd7070Spatrick 139*e5dd7070Spatrick // CHECK LOWER BOUND: Is byteOffset < extent begin? 140*e5dd7070Spatrick // If so, we are doing a load/store 141*e5dd7070Spatrick // before the first valid offset in the memory region. 142*e5dd7070Spatrick 143*e5dd7070Spatrick SVal extentBegin = computeExtentBegin(svalBuilder, rawOffset.getRegion()); 144*e5dd7070Spatrick 145*e5dd7070Spatrick if (Optional<NonLoc> NV = extentBegin.getAs<NonLoc>()) { 146*e5dd7070Spatrick if (NV->getAs<nonloc::ConcreteInt>()) { 147*e5dd7070Spatrick std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets = 148*e5dd7070Spatrick getSimplifiedOffsets(rawOffset.getByteOffset(), 149*e5dd7070Spatrick NV->castAs<nonloc::ConcreteInt>(), 150*e5dd7070Spatrick svalBuilder); 151*e5dd7070Spatrick rawOffsetVal = simplifiedOffsets.first; 152*e5dd7070Spatrick *NV = simplifiedOffsets.second; 153*e5dd7070Spatrick } 154*e5dd7070Spatrick 155*e5dd7070Spatrick SVal lowerBound = svalBuilder.evalBinOpNN(state, BO_LT, rawOffsetVal, *NV, 156*e5dd7070Spatrick svalBuilder.getConditionType()); 157*e5dd7070Spatrick 158*e5dd7070Spatrick Optional<NonLoc> lowerBoundToCheck = lowerBound.getAs<NonLoc>(); 159*e5dd7070Spatrick if (!lowerBoundToCheck) 160*e5dd7070Spatrick return; 161*e5dd7070Spatrick 162*e5dd7070Spatrick ProgramStateRef state_precedesLowerBound, state_withinLowerBound; 163*e5dd7070Spatrick std::tie(state_precedesLowerBound, state_withinLowerBound) = 164*e5dd7070Spatrick state->assume(*lowerBoundToCheck); 165*e5dd7070Spatrick 166*e5dd7070Spatrick // Are we constrained enough to definitely precede the lower bound? 167*e5dd7070Spatrick if (state_precedesLowerBound && !state_withinLowerBound) { 168*e5dd7070Spatrick reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes); 169*e5dd7070Spatrick return; 170*e5dd7070Spatrick } 171*e5dd7070Spatrick 172*e5dd7070Spatrick // Otherwise, assume the constraint of the lower bound. 173*e5dd7070Spatrick assert(state_withinLowerBound); 174*e5dd7070Spatrick state = state_withinLowerBound; 175*e5dd7070Spatrick } 176*e5dd7070Spatrick 177*e5dd7070Spatrick do { 178*e5dd7070Spatrick // CHECK UPPER BOUND: Is byteOffset >= extent(baseRegion)? If so, 179*e5dd7070Spatrick // we are doing a load/store after the last valid offset. 180*e5dd7070Spatrick DefinedOrUnknownSVal extentVal = 181*e5dd7070Spatrick rawOffset.getRegion()->getExtent(svalBuilder); 182*e5dd7070Spatrick if (!extentVal.getAs<NonLoc>()) 183*e5dd7070Spatrick break; 184*e5dd7070Spatrick 185*e5dd7070Spatrick if (extentVal.getAs<nonloc::ConcreteInt>()) { 186*e5dd7070Spatrick std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets = 187*e5dd7070Spatrick getSimplifiedOffsets(rawOffset.getByteOffset(), 188*e5dd7070Spatrick extentVal.castAs<nonloc::ConcreteInt>(), 189*e5dd7070Spatrick svalBuilder); 190*e5dd7070Spatrick rawOffsetVal = simplifiedOffsets.first; 191*e5dd7070Spatrick extentVal = simplifiedOffsets.second; 192*e5dd7070Spatrick } 193*e5dd7070Spatrick 194*e5dd7070Spatrick SVal upperbound = svalBuilder.evalBinOpNN(state, BO_GE, rawOffsetVal, 195*e5dd7070Spatrick extentVal.castAs<NonLoc>(), 196*e5dd7070Spatrick svalBuilder.getConditionType()); 197*e5dd7070Spatrick 198*e5dd7070Spatrick Optional<NonLoc> upperboundToCheck = upperbound.getAs<NonLoc>(); 199*e5dd7070Spatrick if (!upperboundToCheck) 200*e5dd7070Spatrick break; 201*e5dd7070Spatrick 202*e5dd7070Spatrick ProgramStateRef state_exceedsUpperBound, state_withinUpperBound; 203*e5dd7070Spatrick std::tie(state_exceedsUpperBound, state_withinUpperBound) = 204*e5dd7070Spatrick state->assume(*upperboundToCheck); 205*e5dd7070Spatrick 206*e5dd7070Spatrick // If we are under constrained and the index variables are tainted, report. 207*e5dd7070Spatrick if (state_exceedsUpperBound && state_withinUpperBound) { 208*e5dd7070Spatrick SVal ByteOffset = rawOffset.getByteOffset(); 209*e5dd7070Spatrick if (isTainted(state, ByteOffset)) { 210*e5dd7070Spatrick reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted, 211*e5dd7070Spatrick std::make_unique<TaintBugVisitor>(ByteOffset)); 212*e5dd7070Spatrick return; 213*e5dd7070Spatrick } 214*e5dd7070Spatrick } else if (state_exceedsUpperBound) { 215*e5dd7070Spatrick // If we are constrained enough to definitely exceed the upper bound, 216*e5dd7070Spatrick // report. 217*e5dd7070Spatrick assert(!state_withinUpperBound); 218*e5dd7070Spatrick reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes); 219*e5dd7070Spatrick return; 220*e5dd7070Spatrick } 221*e5dd7070Spatrick 222*e5dd7070Spatrick assert(state_withinUpperBound); 223*e5dd7070Spatrick state = state_withinUpperBound; 224*e5dd7070Spatrick } 225*e5dd7070Spatrick while (false); 226*e5dd7070Spatrick 227*e5dd7070Spatrick checkerContext.addTransition(state); 228*e5dd7070Spatrick } 229*e5dd7070Spatrick 230*e5dd7070Spatrick void ArrayBoundCheckerV2::reportOOB( 231*e5dd7070Spatrick CheckerContext &checkerContext, ProgramStateRef errorState, OOB_Kind kind, 232*e5dd7070Spatrick std::unique_ptr<BugReporterVisitor> Visitor) const { 233*e5dd7070Spatrick 234*e5dd7070Spatrick ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState); 235*e5dd7070Spatrick if (!errorNode) 236*e5dd7070Spatrick return; 237*e5dd7070Spatrick 238*e5dd7070Spatrick if (!BT) 239*e5dd7070Spatrick BT.reset(new BuiltinBug(this, "Out-of-bound access")); 240*e5dd7070Spatrick 241*e5dd7070Spatrick // FIXME: This diagnostics are preliminary. We should get far better 242*e5dd7070Spatrick // diagnostics for explaining buffer overruns. 243*e5dd7070Spatrick 244*e5dd7070Spatrick SmallString<256> buf; 245*e5dd7070Spatrick llvm::raw_svector_ostream os(buf); 246*e5dd7070Spatrick os << "Out of bound memory access "; 247*e5dd7070Spatrick switch (kind) { 248*e5dd7070Spatrick case OOB_Precedes: 249*e5dd7070Spatrick os << "(accessed memory precedes memory block)"; 250*e5dd7070Spatrick break; 251*e5dd7070Spatrick case OOB_Excedes: 252*e5dd7070Spatrick os << "(access exceeds upper limit of memory block)"; 253*e5dd7070Spatrick break; 254*e5dd7070Spatrick case OOB_Tainted: 255*e5dd7070Spatrick os << "(index is tainted)"; 256*e5dd7070Spatrick break; 257*e5dd7070Spatrick } 258*e5dd7070Spatrick 259*e5dd7070Spatrick auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode); 260*e5dd7070Spatrick BR->addVisitor(std::move(Visitor)); 261*e5dd7070Spatrick checkerContext.emitReport(std::move(BR)); 262*e5dd7070Spatrick } 263*e5dd7070Spatrick 264*e5dd7070Spatrick #ifndef NDEBUG 265*e5dd7070Spatrick LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const { 266*e5dd7070Spatrick dumpToStream(llvm::errs()); 267*e5dd7070Spatrick } 268*e5dd7070Spatrick 269*e5dd7070Spatrick void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const { 270*e5dd7070Spatrick os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}'; 271*e5dd7070Spatrick } 272*e5dd7070Spatrick #endif 273*e5dd7070Spatrick 274*e5dd7070Spatrick // Lazily computes a value to be used by 'computeOffset'. If 'val' 275*e5dd7070Spatrick // is unknown or undefined, we lazily substitute '0'. Otherwise, 276*e5dd7070Spatrick // return 'val'. 277*e5dd7070Spatrick static inline SVal getValue(SVal val, SValBuilder &svalBuilder) { 278*e5dd7070Spatrick return val.getAs<UndefinedVal>() ? svalBuilder.makeArrayIndex(0) : val; 279*e5dd7070Spatrick } 280*e5dd7070Spatrick 281*e5dd7070Spatrick // Scale a base value by a scaling factor, and return the scaled 282*e5dd7070Spatrick // value as an SVal. Used by 'computeOffset'. 283*e5dd7070Spatrick static inline SVal scaleValue(ProgramStateRef state, 284*e5dd7070Spatrick NonLoc baseVal, CharUnits scaling, 285*e5dd7070Spatrick SValBuilder &sb) { 286*e5dd7070Spatrick return sb.evalBinOpNN(state, BO_Mul, baseVal, 287*e5dd7070Spatrick sb.makeArrayIndex(scaling.getQuantity()), 288*e5dd7070Spatrick sb.getArrayIndexType()); 289*e5dd7070Spatrick } 290*e5dd7070Spatrick 291*e5dd7070Spatrick // Add an SVal to another, treating unknown and undefined values as 292*e5dd7070Spatrick // summing to UnknownVal. Used by 'computeOffset'. 293*e5dd7070Spatrick static SVal addValue(ProgramStateRef state, SVal x, SVal y, 294*e5dd7070Spatrick SValBuilder &svalBuilder) { 295*e5dd7070Spatrick // We treat UnknownVals and UndefinedVals the same here because we 296*e5dd7070Spatrick // only care about computing offsets. 297*e5dd7070Spatrick if (x.isUnknownOrUndef() || y.isUnknownOrUndef()) 298*e5dd7070Spatrick return UnknownVal(); 299*e5dd7070Spatrick 300*e5dd7070Spatrick return svalBuilder.evalBinOpNN(state, BO_Add, x.castAs<NonLoc>(), 301*e5dd7070Spatrick y.castAs<NonLoc>(), 302*e5dd7070Spatrick svalBuilder.getArrayIndexType()); 303*e5dd7070Spatrick } 304*e5dd7070Spatrick 305*e5dd7070Spatrick /// Compute a raw byte offset from a base region. Used for array bounds 306*e5dd7070Spatrick /// checking. 307*e5dd7070Spatrick RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state, 308*e5dd7070Spatrick SValBuilder &svalBuilder, 309*e5dd7070Spatrick SVal location) 310*e5dd7070Spatrick { 311*e5dd7070Spatrick const MemRegion *region = location.getAsRegion(); 312*e5dd7070Spatrick SVal offset = UndefinedVal(); 313*e5dd7070Spatrick 314*e5dd7070Spatrick while (region) { 315*e5dd7070Spatrick switch (region->getKind()) { 316*e5dd7070Spatrick default: { 317*e5dd7070Spatrick if (const SubRegion *subReg = dyn_cast<SubRegion>(region)) { 318*e5dd7070Spatrick offset = getValue(offset, svalBuilder); 319*e5dd7070Spatrick if (!offset.isUnknownOrUndef()) 320*e5dd7070Spatrick return RegionRawOffsetV2(subReg, offset); 321*e5dd7070Spatrick } 322*e5dd7070Spatrick return RegionRawOffsetV2(); 323*e5dd7070Spatrick } 324*e5dd7070Spatrick case MemRegion::ElementRegionKind: { 325*e5dd7070Spatrick const ElementRegion *elemReg = cast<ElementRegion>(region); 326*e5dd7070Spatrick SVal index = elemReg->getIndex(); 327*e5dd7070Spatrick if (!index.getAs<NonLoc>()) 328*e5dd7070Spatrick return RegionRawOffsetV2(); 329*e5dd7070Spatrick QualType elemType = elemReg->getElementType(); 330*e5dd7070Spatrick // If the element is an incomplete type, go no further. 331*e5dd7070Spatrick ASTContext &astContext = svalBuilder.getContext(); 332*e5dd7070Spatrick if (elemType->isIncompleteType()) 333*e5dd7070Spatrick return RegionRawOffsetV2(); 334*e5dd7070Spatrick 335*e5dd7070Spatrick // Update the offset. 336*e5dd7070Spatrick offset = addValue(state, 337*e5dd7070Spatrick getValue(offset, svalBuilder), 338*e5dd7070Spatrick scaleValue(state, 339*e5dd7070Spatrick index.castAs<NonLoc>(), 340*e5dd7070Spatrick astContext.getTypeSizeInChars(elemType), 341*e5dd7070Spatrick svalBuilder), 342*e5dd7070Spatrick svalBuilder); 343*e5dd7070Spatrick 344*e5dd7070Spatrick if (offset.isUnknownOrUndef()) 345*e5dd7070Spatrick return RegionRawOffsetV2(); 346*e5dd7070Spatrick 347*e5dd7070Spatrick region = elemReg->getSuperRegion(); 348*e5dd7070Spatrick continue; 349*e5dd7070Spatrick } 350*e5dd7070Spatrick } 351*e5dd7070Spatrick } 352*e5dd7070Spatrick return RegionRawOffsetV2(); 353*e5dd7070Spatrick } 354*e5dd7070Spatrick 355*e5dd7070Spatrick void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) { 356*e5dd7070Spatrick mgr.registerChecker<ArrayBoundCheckerV2>(); 357*e5dd7070Spatrick } 358*e5dd7070Spatrick 359*e5dd7070Spatrick bool ento::shouldRegisterArrayBoundCheckerV2(const LangOptions &LO) { 360*e5dd7070Spatrick return true; 361*e5dd7070Spatrick } 362