17330f729Sjoerg //== ArrayBoundCheckerV2.cpp ------------------------------------*- C++ -*--==//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
87330f729Sjoerg //
97330f729Sjoerg // This file defines ArrayBoundCheckerV2, which is a path-sensitive check
107330f729Sjoerg // which looks for an out-of-bound array element access.
117330f729Sjoerg //
127330f729Sjoerg //===----------------------------------------------------------------------===//
137330f729Sjoerg
147330f729Sjoerg #include "Taint.h"
157330f729Sjoerg #include "clang/AST/CharUnits.h"
16*e038c9c4Sjoerg #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
177330f729Sjoerg #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
187330f729Sjoerg #include "clang/StaticAnalyzer/Core/Checker.h"
197330f729Sjoerg #include "clang/StaticAnalyzer/Core/CheckerManager.h"
207330f729Sjoerg #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
217330f729Sjoerg #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
22*e038c9c4Sjoerg #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
237330f729Sjoerg #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
247330f729Sjoerg #include "llvm/ADT/SmallString.h"
257330f729Sjoerg #include "llvm/Support/raw_ostream.h"
267330f729Sjoerg
277330f729Sjoerg using namespace clang;
287330f729Sjoerg using namespace ento;
297330f729Sjoerg using namespace taint;
307330f729Sjoerg
317330f729Sjoerg namespace {
327330f729Sjoerg class ArrayBoundCheckerV2 :
337330f729Sjoerg public Checker<check::Location> {
347330f729Sjoerg mutable std::unique_ptr<BuiltinBug> BT;
357330f729Sjoerg
367330f729Sjoerg enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted };
377330f729Sjoerg
387330f729Sjoerg void reportOOB(CheckerContext &C, ProgramStateRef errorState, OOB_Kind kind,
397330f729Sjoerg std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const;
407330f729Sjoerg
417330f729Sjoerg public:
427330f729Sjoerg void checkLocation(SVal l, bool isLoad, const Stmt*S,
437330f729Sjoerg CheckerContext &C) const;
447330f729Sjoerg };
457330f729Sjoerg
467330f729Sjoerg // FIXME: Eventually replace RegionRawOffset with this class.
477330f729Sjoerg class RegionRawOffsetV2 {
487330f729Sjoerg private:
497330f729Sjoerg const SubRegion *baseRegion;
507330f729Sjoerg SVal byteOffset;
517330f729Sjoerg
RegionRawOffsetV2()527330f729Sjoerg RegionRawOffsetV2()
537330f729Sjoerg : baseRegion(nullptr), byteOffset(UnknownVal()) {}
547330f729Sjoerg
557330f729Sjoerg public:
RegionRawOffsetV2(const SubRegion * base,SVal offset)567330f729Sjoerg RegionRawOffsetV2(const SubRegion* base, SVal offset)
577330f729Sjoerg : baseRegion(base), byteOffset(offset) {}
587330f729Sjoerg
getByteOffset() const597330f729Sjoerg NonLoc getByteOffset() const { return byteOffset.castAs<NonLoc>(); }
getRegion() const607330f729Sjoerg const SubRegion *getRegion() const { return baseRegion; }
617330f729Sjoerg
627330f729Sjoerg static RegionRawOffsetV2 computeOffset(ProgramStateRef state,
637330f729Sjoerg SValBuilder &svalBuilder,
647330f729Sjoerg SVal location);
657330f729Sjoerg
667330f729Sjoerg void dump() const;
677330f729Sjoerg void dumpToStream(raw_ostream &os) const;
687330f729Sjoerg };
697330f729Sjoerg }
707330f729Sjoerg
computeExtentBegin(SValBuilder & svalBuilder,const MemRegion * region)717330f729Sjoerg static SVal computeExtentBegin(SValBuilder &svalBuilder,
727330f729Sjoerg const MemRegion *region) {
737330f729Sjoerg const MemSpaceRegion *SR = region->getMemorySpace();
747330f729Sjoerg if (SR->getKind() == MemRegion::UnknownSpaceRegionKind)
757330f729Sjoerg return UnknownVal();
767330f729Sjoerg else
777330f729Sjoerg return svalBuilder.makeZeroArrayIndex();
787330f729Sjoerg }
797330f729Sjoerg
807330f729Sjoerg // TODO: once the constraint manager is smart enough to handle non simplified
817330f729Sjoerg // symbolic expressions remove this function. Note that this can not be used in
827330f729Sjoerg // the constraint manager as is, since this does not handle overflows. It is
837330f729Sjoerg // safe to assume, however, that memory offsets will not overflow.
847330f729Sjoerg static std::pair<NonLoc, nonloc::ConcreteInt>
getSimplifiedOffsets(NonLoc offset,nonloc::ConcreteInt extent,SValBuilder & svalBuilder)857330f729Sjoerg getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent,
867330f729Sjoerg SValBuilder &svalBuilder) {
877330f729Sjoerg Optional<nonloc::SymbolVal> SymVal = offset.getAs<nonloc::SymbolVal>();
887330f729Sjoerg if (SymVal && SymVal->isExpression()) {
897330f729Sjoerg if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SymVal->getSymbol())) {
907330f729Sjoerg llvm::APSInt constant =
917330f729Sjoerg APSIntType(extent.getValue()).convert(SIE->getRHS());
927330f729Sjoerg switch (SIE->getOpcode()) {
937330f729Sjoerg case BO_Mul:
947330f729Sjoerg // The constant should never be 0 here, since it the result of scaling
957330f729Sjoerg // based on the size of a type which is never 0.
967330f729Sjoerg if ((extent.getValue() % constant) != 0)
977330f729Sjoerg return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
987330f729Sjoerg else
997330f729Sjoerg return getSimplifiedOffsets(
1007330f729Sjoerg nonloc::SymbolVal(SIE->getLHS()),
1017330f729Sjoerg svalBuilder.makeIntVal(extent.getValue() / constant),
1027330f729Sjoerg svalBuilder);
1037330f729Sjoerg case BO_Add:
1047330f729Sjoerg return getSimplifiedOffsets(
1057330f729Sjoerg nonloc::SymbolVal(SIE->getLHS()),
1067330f729Sjoerg svalBuilder.makeIntVal(extent.getValue() - constant), svalBuilder);
1077330f729Sjoerg default:
1087330f729Sjoerg break;
1097330f729Sjoerg }
1107330f729Sjoerg }
1117330f729Sjoerg }
1127330f729Sjoerg
1137330f729Sjoerg return std::pair<NonLoc, nonloc::ConcreteInt>(offset, extent);
1147330f729Sjoerg }
1157330f729Sjoerg
checkLocation(SVal location,bool isLoad,const Stmt * LoadS,CheckerContext & checkerContext) const1167330f729Sjoerg void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
1177330f729Sjoerg const Stmt* LoadS,
1187330f729Sjoerg CheckerContext &checkerContext) const {
1197330f729Sjoerg
1207330f729Sjoerg // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
1217330f729Sjoerg // some new logic here that reasons directly about memory region extents.
1227330f729Sjoerg // Once that logic is more mature, we can bring it back to assumeInBound()
1237330f729Sjoerg // for all clients to use.
1247330f729Sjoerg //
1257330f729Sjoerg // The algorithm we are using here for bounds checking is to see if the
1267330f729Sjoerg // memory access is within the extent of the base region. Since we
1277330f729Sjoerg // have some flexibility in defining the base region, we can achieve
1287330f729Sjoerg // various levels of conservatism in our buffer overflow checking.
1297330f729Sjoerg ProgramStateRef state = checkerContext.getState();
1307330f729Sjoerg
1317330f729Sjoerg SValBuilder &svalBuilder = checkerContext.getSValBuilder();
1327330f729Sjoerg const RegionRawOffsetV2 &rawOffset =
1337330f729Sjoerg RegionRawOffsetV2::computeOffset(state, svalBuilder, location);
1347330f729Sjoerg
1357330f729Sjoerg if (!rawOffset.getRegion())
1367330f729Sjoerg return;
1377330f729Sjoerg
1387330f729Sjoerg NonLoc rawOffsetVal = rawOffset.getByteOffset();
1397330f729Sjoerg
1407330f729Sjoerg // CHECK LOWER BOUND: Is byteOffset < extent begin?
1417330f729Sjoerg // If so, we are doing a load/store
1427330f729Sjoerg // before the first valid offset in the memory region.
1437330f729Sjoerg
1447330f729Sjoerg SVal extentBegin = computeExtentBegin(svalBuilder, rawOffset.getRegion());
1457330f729Sjoerg
1467330f729Sjoerg if (Optional<NonLoc> NV = extentBegin.getAs<NonLoc>()) {
1477330f729Sjoerg if (NV->getAs<nonloc::ConcreteInt>()) {
1487330f729Sjoerg std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets =
1497330f729Sjoerg getSimplifiedOffsets(rawOffset.getByteOffset(),
1507330f729Sjoerg NV->castAs<nonloc::ConcreteInt>(),
1517330f729Sjoerg svalBuilder);
1527330f729Sjoerg rawOffsetVal = simplifiedOffsets.first;
1537330f729Sjoerg *NV = simplifiedOffsets.second;
1547330f729Sjoerg }
1557330f729Sjoerg
1567330f729Sjoerg SVal lowerBound = svalBuilder.evalBinOpNN(state, BO_LT, rawOffsetVal, *NV,
1577330f729Sjoerg svalBuilder.getConditionType());
1587330f729Sjoerg
1597330f729Sjoerg Optional<NonLoc> lowerBoundToCheck = lowerBound.getAs<NonLoc>();
1607330f729Sjoerg if (!lowerBoundToCheck)
1617330f729Sjoerg return;
1627330f729Sjoerg
1637330f729Sjoerg ProgramStateRef state_precedesLowerBound, state_withinLowerBound;
1647330f729Sjoerg std::tie(state_precedesLowerBound, state_withinLowerBound) =
1657330f729Sjoerg state->assume(*lowerBoundToCheck);
1667330f729Sjoerg
1677330f729Sjoerg // Are we constrained enough to definitely precede the lower bound?
1687330f729Sjoerg if (state_precedesLowerBound && !state_withinLowerBound) {
1697330f729Sjoerg reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
1707330f729Sjoerg return;
1717330f729Sjoerg }
1727330f729Sjoerg
1737330f729Sjoerg // Otherwise, assume the constraint of the lower bound.
1747330f729Sjoerg assert(state_withinLowerBound);
1757330f729Sjoerg state = state_withinLowerBound;
1767330f729Sjoerg }
1777330f729Sjoerg
1787330f729Sjoerg do {
179*e038c9c4Sjoerg // CHECK UPPER BOUND: Is byteOffset >= size(baseRegion)? If so,
1807330f729Sjoerg // we are doing a load/store after the last valid offset.
181*e038c9c4Sjoerg const MemRegion *MR = rawOffset.getRegion();
182*e038c9c4Sjoerg DefinedOrUnknownSVal Size = getDynamicExtent(state, MR, svalBuilder);
183*e038c9c4Sjoerg if (!Size.getAs<NonLoc>())
1847330f729Sjoerg break;
1857330f729Sjoerg
186*e038c9c4Sjoerg if (Size.getAs<nonloc::ConcreteInt>()) {
1877330f729Sjoerg std::pair<NonLoc, nonloc::ConcreteInt> simplifiedOffsets =
1887330f729Sjoerg getSimplifiedOffsets(rawOffset.getByteOffset(),
189*e038c9c4Sjoerg Size.castAs<nonloc::ConcreteInt>(), svalBuilder);
1907330f729Sjoerg rawOffsetVal = simplifiedOffsets.first;
191*e038c9c4Sjoerg Size = simplifiedOffsets.second;
1927330f729Sjoerg }
1937330f729Sjoerg
1947330f729Sjoerg SVal upperbound = svalBuilder.evalBinOpNN(state, BO_GE, rawOffsetVal,
195*e038c9c4Sjoerg Size.castAs<NonLoc>(),
1967330f729Sjoerg svalBuilder.getConditionType());
1977330f729Sjoerg
1987330f729Sjoerg Optional<NonLoc> upperboundToCheck = upperbound.getAs<NonLoc>();
1997330f729Sjoerg if (!upperboundToCheck)
2007330f729Sjoerg break;
2017330f729Sjoerg
2027330f729Sjoerg ProgramStateRef state_exceedsUpperBound, state_withinUpperBound;
2037330f729Sjoerg std::tie(state_exceedsUpperBound, state_withinUpperBound) =
2047330f729Sjoerg state->assume(*upperboundToCheck);
2057330f729Sjoerg
2067330f729Sjoerg // If we are under constrained and the index variables are tainted, report.
2077330f729Sjoerg if (state_exceedsUpperBound && state_withinUpperBound) {
2087330f729Sjoerg SVal ByteOffset = rawOffset.getByteOffset();
2097330f729Sjoerg if (isTainted(state, ByteOffset)) {
2107330f729Sjoerg reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted,
2117330f729Sjoerg std::make_unique<TaintBugVisitor>(ByteOffset));
2127330f729Sjoerg return;
2137330f729Sjoerg }
2147330f729Sjoerg } else if (state_exceedsUpperBound) {
2157330f729Sjoerg // If we are constrained enough to definitely exceed the upper bound,
2167330f729Sjoerg // report.
2177330f729Sjoerg assert(!state_withinUpperBound);
2187330f729Sjoerg reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
2197330f729Sjoerg return;
2207330f729Sjoerg }
2217330f729Sjoerg
2227330f729Sjoerg assert(state_withinUpperBound);
2237330f729Sjoerg state = state_withinUpperBound;
2247330f729Sjoerg }
2257330f729Sjoerg while (false);
2267330f729Sjoerg
2277330f729Sjoerg checkerContext.addTransition(state);
2287330f729Sjoerg }
2297330f729Sjoerg
reportOOB(CheckerContext & checkerContext,ProgramStateRef errorState,OOB_Kind kind,std::unique_ptr<BugReporterVisitor> Visitor) const2307330f729Sjoerg void ArrayBoundCheckerV2::reportOOB(
2317330f729Sjoerg CheckerContext &checkerContext, ProgramStateRef errorState, OOB_Kind kind,
2327330f729Sjoerg std::unique_ptr<BugReporterVisitor> Visitor) const {
2337330f729Sjoerg
2347330f729Sjoerg ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
2357330f729Sjoerg if (!errorNode)
2367330f729Sjoerg return;
2377330f729Sjoerg
2387330f729Sjoerg if (!BT)
2397330f729Sjoerg BT.reset(new BuiltinBug(this, "Out-of-bound access"));
2407330f729Sjoerg
2417330f729Sjoerg // FIXME: This diagnostics are preliminary. We should get far better
2427330f729Sjoerg // diagnostics for explaining buffer overruns.
2437330f729Sjoerg
2447330f729Sjoerg SmallString<256> buf;
2457330f729Sjoerg llvm::raw_svector_ostream os(buf);
2467330f729Sjoerg os << "Out of bound memory access ";
2477330f729Sjoerg switch (kind) {
2487330f729Sjoerg case OOB_Precedes:
2497330f729Sjoerg os << "(accessed memory precedes memory block)";
2507330f729Sjoerg break;
2517330f729Sjoerg case OOB_Excedes:
2527330f729Sjoerg os << "(access exceeds upper limit of memory block)";
2537330f729Sjoerg break;
2547330f729Sjoerg case OOB_Tainted:
2557330f729Sjoerg os << "(index is tainted)";
2567330f729Sjoerg break;
2577330f729Sjoerg }
2587330f729Sjoerg
2597330f729Sjoerg auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode);
2607330f729Sjoerg BR->addVisitor(std::move(Visitor));
2617330f729Sjoerg checkerContext.emitReport(std::move(BR));
2627330f729Sjoerg }
2637330f729Sjoerg
2647330f729Sjoerg #ifndef NDEBUG
dump() const2657330f729Sjoerg LLVM_DUMP_METHOD void RegionRawOffsetV2::dump() const {
2667330f729Sjoerg dumpToStream(llvm::errs());
2677330f729Sjoerg }
2687330f729Sjoerg
dumpToStream(raw_ostream & os) const2697330f729Sjoerg void RegionRawOffsetV2::dumpToStream(raw_ostream &os) const {
2707330f729Sjoerg os << "raw_offset_v2{" << getRegion() << ',' << getByteOffset() << '}';
2717330f729Sjoerg }
2727330f729Sjoerg #endif
2737330f729Sjoerg
2747330f729Sjoerg // Lazily computes a value to be used by 'computeOffset'. If 'val'
2757330f729Sjoerg // is unknown or undefined, we lazily substitute '0'. Otherwise,
2767330f729Sjoerg // return 'val'.
getValue(SVal val,SValBuilder & svalBuilder)2777330f729Sjoerg static inline SVal getValue(SVal val, SValBuilder &svalBuilder) {
2787330f729Sjoerg return val.getAs<UndefinedVal>() ? svalBuilder.makeArrayIndex(0) : val;
2797330f729Sjoerg }
2807330f729Sjoerg
2817330f729Sjoerg // Scale a base value by a scaling factor, and return the scaled
2827330f729Sjoerg // value as an SVal. Used by 'computeOffset'.
scaleValue(ProgramStateRef state,NonLoc baseVal,CharUnits scaling,SValBuilder & sb)2837330f729Sjoerg static inline SVal scaleValue(ProgramStateRef state,
2847330f729Sjoerg NonLoc baseVal, CharUnits scaling,
2857330f729Sjoerg SValBuilder &sb) {
2867330f729Sjoerg return sb.evalBinOpNN(state, BO_Mul, baseVal,
2877330f729Sjoerg sb.makeArrayIndex(scaling.getQuantity()),
2887330f729Sjoerg sb.getArrayIndexType());
2897330f729Sjoerg }
2907330f729Sjoerg
2917330f729Sjoerg // Add an SVal to another, treating unknown and undefined values as
2927330f729Sjoerg // summing to UnknownVal. Used by 'computeOffset'.
addValue(ProgramStateRef state,SVal x,SVal y,SValBuilder & svalBuilder)2937330f729Sjoerg static SVal addValue(ProgramStateRef state, SVal x, SVal y,
2947330f729Sjoerg SValBuilder &svalBuilder) {
2957330f729Sjoerg // We treat UnknownVals and UndefinedVals the same here because we
2967330f729Sjoerg // only care about computing offsets.
2977330f729Sjoerg if (x.isUnknownOrUndef() || y.isUnknownOrUndef())
2987330f729Sjoerg return UnknownVal();
2997330f729Sjoerg
3007330f729Sjoerg return svalBuilder.evalBinOpNN(state, BO_Add, x.castAs<NonLoc>(),
3017330f729Sjoerg y.castAs<NonLoc>(),
3027330f729Sjoerg svalBuilder.getArrayIndexType());
3037330f729Sjoerg }
3047330f729Sjoerg
3057330f729Sjoerg /// Compute a raw byte offset from a base region. Used for array bounds
3067330f729Sjoerg /// checking.
computeOffset(ProgramStateRef state,SValBuilder & svalBuilder,SVal location)3077330f729Sjoerg RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state,
3087330f729Sjoerg SValBuilder &svalBuilder,
3097330f729Sjoerg SVal location)
3107330f729Sjoerg {
3117330f729Sjoerg const MemRegion *region = location.getAsRegion();
3127330f729Sjoerg SVal offset = UndefinedVal();
3137330f729Sjoerg
3147330f729Sjoerg while (region) {
3157330f729Sjoerg switch (region->getKind()) {
3167330f729Sjoerg default: {
3177330f729Sjoerg if (const SubRegion *subReg = dyn_cast<SubRegion>(region)) {
3187330f729Sjoerg offset = getValue(offset, svalBuilder);
3197330f729Sjoerg if (!offset.isUnknownOrUndef())
3207330f729Sjoerg return RegionRawOffsetV2(subReg, offset);
3217330f729Sjoerg }
3227330f729Sjoerg return RegionRawOffsetV2();
3237330f729Sjoerg }
3247330f729Sjoerg case MemRegion::ElementRegionKind: {
3257330f729Sjoerg const ElementRegion *elemReg = cast<ElementRegion>(region);
3267330f729Sjoerg SVal index = elemReg->getIndex();
3277330f729Sjoerg if (!index.getAs<NonLoc>())
3287330f729Sjoerg return RegionRawOffsetV2();
3297330f729Sjoerg QualType elemType = elemReg->getElementType();
3307330f729Sjoerg // If the element is an incomplete type, go no further.
3317330f729Sjoerg ASTContext &astContext = svalBuilder.getContext();
3327330f729Sjoerg if (elemType->isIncompleteType())
3337330f729Sjoerg return RegionRawOffsetV2();
3347330f729Sjoerg
3357330f729Sjoerg // Update the offset.
3367330f729Sjoerg offset = addValue(state,
3377330f729Sjoerg getValue(offset, svalBuilder),
3387330f729Sjoerg scaleValue(state,
3397330f729Sjoerg index.castAs<NonLoc>(),
3407330f729Sjoerg astContext.getTypeSizeInChars(elemType),
3417330f729Sjoerg svalBuilder),
3427330f729Sjoerg svalBuilder);
3437330f729Sjoerg
3447330f729Sjoerg if (offset.isUnknownOrUndef())
3457330f729Sjoerg return RegionRawOffsetV2();
3467330f729Sjoerg
3477330f729Sjoerg region = elemReg->getSuperRegion();
3487330f729Sjoerg continue;
3497330f729Sjoerg }
3507330f729Sjoerg }
3517330f729Sjoerg }
3527330f729Sjoerg return RegionRawOffsetV2();
3537330f729Sjoerg }
3547330f729Sjoerg
registerArrayBoundCheckerV2(CheckerManager & mgr)3557330f729Sjoerg void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) {
3567330f729Sjoerg mgr.registerChecker<ArrayBoundCheckerV2>();
3577330f729Sjoerg }
3587330f729Sjoerg
shouldRegisterArrayBoundCheckerV2(const CheckerManager & mgr)359*e038c9c4Sjoerg bool ento::shouldRegisterArrayBoundCheckerV2(const CheckerManager &mgr) {
3607330f729Sjoerg return true;
3617330f729Sjoerg }
362