17330f729Sjoerg //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
87330f729Sjoerg //
97330f729Sjoerg // Defines basic, non-domain-specific mechanisms for tracking tainted values.
107330f729Sjoerg //
117330f729Sjoerg //===----------------------------------------------------------------------===//
127330f729Sjoerg
137330f729Sjoerg #include "Taint.h"
147330f729Sjoerg #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
157330f729Sjoerg #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
167330f729Sjoerg
177330f729Sjoerg using namespace clang;
187330f729Sjoerg using namespace ento;
197330f729Sjoerg using namespace taint;
207330f729Sjoerg
217330f729Sjoerg // Fully tainted symbols.
REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap,SymbolRef,TaintTagType)227330f729Sjoerg REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType)
237330f729Sjoerg
247330f729Sjoerg // Partially tainted symbols.
257330f729Sjoerg REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *,
267330f729Sjoerg TaintTagType)
277330f729Sjoerg REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
287330f729Sjoerg
297330f729Sjoerg void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
307330f729Sjoerg const char *Sep) {
317330f729Sjoerg TaintMapTy TM = State->get<TaintMap>();
327330f729Sjoerg
337330f729Sjoerg if (!TM.isEmpty())
347330f729Sjoerg Out << "Tainted symbols:" << NL;
357330f729Sjoerg
367330f729Sjoerg for (const auto &I : TM)
377330f729Sjoerg Out << I.first << " : " << I.second << NL;
387330f729Sjoerg }
397330f729Sjoerg
dumpTaint(ProgramStateRef State)40*e038c9c4Sjoerg void dumpTaint(ProgramStateRef State) { printTaint(State, llvm::errs()); }
417330f729Sjoerg
addTaint(ProgramStateRef State,const Stmt * S,const LocationContext * LCtx,TaintTagType Kind)427330f729Sjoerg ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S,
437330f729Sjoerg const LocationContext *LCtx,
447330f729Sjoerg TaintTagType Kind) {
457330f729Sjoerg return addTaint(State, State->getSVal(S, LCtx), Kind);
467330f729Sjoerg }
477330f729Sjoerg
addTaint(ProgramStateRef State,SVal V,TaintTagType Kind)487330f729Sjoerg ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V,
497330f729Sjoerg TaintTagType Kind) {
507330f729Sjoerg SymbolRef Sym = V.getAsSymbol();
517330f729Sjoerg if (Sym)
527330f729Sjoerg return addTaint(State, Sym, Kind);
537330f729Sjoerg
547330f729Sjoerg // If the SVal represents a structure, try to mass-taint all values within the
557330f729Sjoerg // structure. For now it only works efficiently on lazy compound values that
567330f729Sjoerg // were conjured during a conservative evaluation of a function - either as
577330f729Sjoerg // return values of functions that return structures or arrays by value, or as
587330f729Sjoerg // values of structures or arrays passed into the function by reference,
597330f729Sjoerg // directly or through pointer aliasing. Such lazy compound values are
607330f729Sjoerg // characterized by having exactly one binding in their captured store within
617330f729Sjoerg // their parent region, which is a conjured symbol default-bound to the base
627330f729Sjoerg // region of the parent region.
637330f729Sjoerg if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
647330f729Sjoerg if (Optional<SVal> binding =
65*e038c9c4Sjoerg State->getStateManager().getStoreManager().getDefaultBinding(
66*e038c9c4Sjoerg *LCV)) {
677330f729Sjoerg if (SymbolRef Sym = binding->getAsSymbol())
687330f729Sjoerg return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
697330f729Sjoerg }
707330f729Sjoerg }
717330f729Sjoerg
727330f729Sjoerg const MemRegion *R = V.getAsRegion();
737330f729Sjoerg return addTaint(State, R, Kind);
747330f729Sjoerg }
757330f729Sjoerg
addTaint(ProgramStateRef State,const MemRegion * R,TaintTagType Kind)767330f729Sjoerg ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R,
777330f729Sjoerg TaintTagType Kind) {
787330f729Sjoerg if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
797330f729Sjoerg return addTaint(State, SR->getSymbol(), Kind);
807330f729Sjoerg return State;
817330f729Sjoerg }
827330f729Sjoerg
addTaint(ProgramStateRef State,SymbolRef Sym,TaintTagType Kind)837330f729Sjoerg ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym,
847330f729Sjoerg TaintTagType Kind) {
857330f729Sjoerg // If this is a symbol cast, remove the cast before adding the taint. Taint
867330f729Sjoerg // is cast agnostic.
877330f729Sjoerg while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
887330f729Sjoerg Sym = SC->getOperand();
897330f729Sjoerg
907330f729Sjoerg ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
917330f729Sjoerg assert(NewState);
927330f729Sjoerg return NewState;
937330f729Sjoerg }
947330f729Sjoerg
removeTaint(ProgramStateRef State,SVal V)95*e038c9c4Sjoerg ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) {
96*e038c9c4Sjoerg SymbolRef Sym = V.getAsSymbol();
97*e038c9c4Sjoerg if (Sym)
98*e038c9c4Sjoerg return removeTaint(State, Sym);
99*e038c9c4Sjoerg
100*e038c9c4Sjoerg const MemRegion *R = V.getAsRegion();
101*e038c9c4Sjoerg return removeTaint(State, R);
102*e038c9c4Sjoerg }
103*e038c9c4Sjoerg
removeTaint(ProgramStateRef State,const MemRegion * R)104*e038c9c4Sjoerg ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) {
105*e038c9c4Sjoerg if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
106*e038c9c4Sjoerg return removeTaint(State, SR->getSymbol());
107*e038c9c4Sjoerg return State;
108*e038c9c4Sjoerg }
109*e038c9c4Sjoerg
removeTaint(ProgramStateRef State,SymbolRef Sym)110*e038c9c4Sjoerg ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) {
111*e038c9c4Sjoerg // If this is a symbol cast, remove the cast before adding the taint. Taint
112*e038c9c4Sjoerg // is cast agnostic.
113*e038c9c4Sjoerg while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
114*e038c9c4Sjoerg Sym = SC->getOperand();
115*e038c9c4Sjoerg
116*e038c9c4Sjoerg ProgramStateRef NewState = State->remove<TaintMap>(Sym);
117*e038c9c4Sjoerg assert(NewState);
118*e038c9c4Sjoerg return NewState;
119*e038c9c4Sjoerg }
120*e038c9c4Sjoerg
addPartialTaint(ProgramStateRef State,SymbolRef ParentSym,const SubRegion * SubRegion,TaintTagType Kind)1217330f729Sjoerg ProgramStateRef taint::addPartialTaint(ProgramStateRef State,
1227330f729Sjoerg SymbolRef ParentSym,
1237330f729Sjoerg const SubRegion *SubRegion,
1247330f729Sjoerg TaintTagType Kind) {
1257330f729Sjoerg // Ignore partial taint if the entire parent symbol is already tainted.
1267330f729Sjoerg if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
1277330f729Sjoerg if (*T == Kind)
1287330f729Sjoerg return State;
1297330f729Sjoerg
1307330f729Sjoerg // Partial taint applies if only a portion of the symbol is tainted.
1317330f729Sjoerg if (SubRegion == SubRegion->getBaseRegion())
1327330f729Sjoerg return addTaint(State, ParentSym, Kind);
1337330f729Sjoerg
1347330f729Sjoerg const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
1357330f729Sjoerg TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
1367330f729Sjoerg TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();
1377330f729Sjoerg
1387330f729Sjoerg Regs = F.add(Regs, SubRegion, Kind);
1397330f729Sjoerg ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
1407330f729Sjoerg assert(NewState);
1417330f729Sjoerg return NewState;
1427330f729Sjoerg }
1437330f729Sjoerg
isTainted(ProgramStateRef State,const Stmt * S,const LocationContext * LCtx,TaintTagType Kind)1447330f729Sjoerg bool taint::isTainted(ProgramStateRef State, const Stmt *S,
1457330f729Sjoerg const LocationContext *LCtx, TaintTagType Kind) {
1467330f729Sjoerg SVal val = State->getSVal(S, LCtx);
1477330f729Sjoerg return isTainted(State, val, Kind);
1487330f729Sjoerg }
1497330f729Sjoerg
isTainted(ProgramStateRef State,SVal V,TaintTagType Kind)1507330f729Sjoerg bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
151*e038c9c4Sjoerg if (SymbolRef Sym = V.getAsSymbol())
1527330f729Sjoerg return isTainted(State, Sym, Kind);
1537330f729Sjoerg if (const MemRegion *Reg = V.getAsRegion())
1547330f729Sjoerg return isTainted(State, Reg, Kind);
1557330f729Sjoerg return false;
1567330f729Sjoerg }
1577330f729Sjoerg
isTainted(ProgramStateRef State,const MemRegion * Reg,TaintTagType K)1587330f729Sjoerg bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
1597330f729Sjoerg TaintTagType K) {
1607330f729Sjoerg if (!Reg)
1617330f729Sjoerg return false;
1627330f729Sjoerg
1637330f729Sjoerg // Element region (array element) is tainted if either the base or the offset
1647330f729Sjoerg // are tainted.
1657330f729Sjoerg if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
1667330f729Sjoerg return isTainted(State, ER->getSuperRegion(), K) ||
1677330f729Sjoerg isTainted(State, ER->getIndex(), K);
1687330f729Sjoerg
1697330f729Sjoerg if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
1707330f729Sjoerg return isTainted(State, SR->getSymbol(), K);
1717330f729Sjoerg
1727330f729Sjoerg if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
1737330f729Sjoerg return isTainted(State, ER->getSuperRegion(), K);
1747330f729Sjoerg
1757330f729Sjoerg return false;
1767330f729Sjoerg }
1777330f729Sjoerg
isTainted(ProgramStateRef State,SymbolRef Sym,TaintTagType Kind)1787330f729Sjoerg bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
1797330f729Sjoerg if (!Sym)
1807330f729Sjoerg return false;
1817330f729Sjoerg
1827330f729Sjoerg // Traverse all the symbols this symbol depends on to see if any are tainted.
1837330f729Sjoerg for (SymExpr::symbol_iterator SI = Sym->symbol_begin(),
184*e038c9c4Sjoerg SE = Sym->symbol_end();
185*e038c9c4Sjoerg SI != SE; ++SI) {
1867330f729Sjoerg if (!isa<SymbolData>(*SI))
1877330f729Sjoerg continue;
1887330f729Sjoerg
1897330f729Sjoerg if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) {
1907330f729Sjoerg if (*Tag == Kind)
1917330f729Sjoerg return true;
1927330f729Sjoerg }
1937330f729Sjoerg
1947330f729Sjoerg if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) {
1957330f729Sjoerg // If this is a SymbolDerived with a tainted parent, it's also tainted.
1967330f729Sjoerg if (isTainted(State, SD->getParentSymbol(), Kind))
1977330f729Sjoerg return true;
1987330f729Sjoerg
1997330f729Sjoerg // If this is a SymbolDerived with the same parent symbol as another
2007330f729Sjoerg // tainted SymbolDerived and a region that's a sub-region of that tainted
2017330f729Sjoerg // symbol, it's also tainted.
2027330f729Sjoerg if (const TaintedSubRegions *Regs =
2037330f729Sjoerg State->get<DerivedSymTaint>(SD->getParentSymbol())) {
2047330f729Sjoerg const TypedValueRegion *R = SD->getRegion();
2057330f729Sjoerg for (auto I : *Regs) {
2067330f729Sjoerg // FIXME: The logic to identify tainted regions could be more
2077330f729Sjoerg // complete. For example, this would not currently identify
2087330f729Sjoerg // overlapping fields in a union as tainted. To identify this we can
2097330f729Sjoerg // check for overlapping/nested byte offsets.
2107330f729Sjoerg if (Kind == I.second && R->isSubRegionOf(I.first))
2117330f729Sjoerg return true;
2127330f729Sjoerg }
2137330f729Sjoerg }
2147330f729Sjoerg }
2157330f729Sjoerg
2167330f729Sjoerg // If memory region is tainted, data is also tainted.
2177330f729Sjoerg if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
2187330f729Sjoerg if (isTainted(State, SRV->getRegion(), Kind))
2197330f729Sjoerg return true;
2207330f729Sjoerg }
2217330f729Sjoerg
2227330f729Sjoerg // If this is a SymbolCast from a tainted value, it's also tainted.
2237330f729Sjoerg if (const auto *SC = dyn_cast<SymbolCast>(*SI)) {
2247330f729Sjoerg if (isTainted(State, SC->getOperand(), Kind))
2257330f729Sjoerg return true;
2267330f729Sjoerg }
2277330f729Sjoerg }
2287330f729Sjoerg
2297330f729Sjoerg return false;
2307330f729Sjoerg }
2317330f729Sjoerg
VisitNode(const ExplodedNode * N,BugReporterContext & BRC,PathSensitiveBugReport & BR)2327330f729Sjoerg PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N,
2337330f729Sjoerg BugReporterContext &BRC,
2347330f729Sjoerg PathSensitiveBugReport &BR) {
2357330f729Sjoerg
2367330f729Sjoerg // Find the ExplodedNode where the taint was first introduced
2377330f729Sjoerg if (!isTainted(N->getState(), V) ||
2387330f729Sjoerg isTainted(N->getFirstPred()->getState(), V))
2397330f729Sjoerg return nullptr;
2407330f729Sjoerg
2417330f729Sjoerg const Stmt *S = N->getStmtForDiagnostics();
2427330f729Sjoerg if (!S)
2437330f729Sjoerg return nullptr;
2447330f729Sjoerg
2457330f729Sjoerg const LocationContext *NCtx = N->getLocationContext();
2467330f729Sjoerg PathDiagnosticLocation L =
2477330f729Sjoerg PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
2487330f729Sjoerg if (!L.isValid() || !L.asLocation().isValid())
2497330f729Sjoerg return nullptr;
2507330f729Sjoerg
2517330f729Sjoerg return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here");
2527330f729Sjoerg }
253