xref: /netbsd-src/external/apache2/llvm/dist/clang/lib/StaticAnalyzer/Checkers/Taint.cpp (revision e038c9c4676b0f19b1b7dd08a940c6ed64a6d5ae)
17330f729Sjoerg //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
87330f729Sjoerg //
97330f729Sjoerg // Defines basic, non-domain-specific mechanisms for tracking tainted values.
107330f729Sjoerg //
117330f729Sjoerg //===----------------------------------------------------------------------===//
127330f729Sjoerg 
137330f729Sjoerg #include "Taint.h"
147330f729Sjoerg #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
157330f729Sjoerg #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
167330f729Sjoerg 
177330f729Sjoerg using namespace clang;
187330f729Sjoerg using namespace ento;
197330f729Sjoerg using namespace taint;
207330f729Sjoerg 
217330f729Sjoerg // Fully tainted symbols.
REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap,SymbolRef,TaintTagType)227330f729Sjoerg REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType)
237330f729Sjoerg 
247330f729Sjoerg // Partially tainted symbols.
257330f729Sjoerg REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *,
267330f729Sjoerg                                        TaintTagType)
277330f729Sjoerg REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
287330f729Sjoerg 
297330f729Sjoerg void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
307330f729Sjoerg                        const char *Sep) {
317330f729Sjoerg   TaintMapTy TM = State->get<TaintMap>();
327330f729Sjoerg 
337330f729Sjoerg   if (!TM.isEmpty())
347330f729Sjoerg     Out << "Tainted symbols:" << NL;
357330f729Sjoerg 
367330f729Sjoerg   for (const auto &I : TM)
377330f729Sjoerg     Out << I.first << " : " << I.second << NL;
387330f729Sjoerg }
397330f729Sjoerg 
dumpTaint(ProgramStateRef State)40*e038c9c4Sjoerg void dumpTaint(ProgramStateRef State) { printTaint(State, llvm::errs()); }
417330f729Sjoerg 
addTaint(ProgramStateRef State,const Stmt * S,const LocationContext * LCtx,TaintTagType Kind)427330f729Sjoerg ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S,
437330f729Sjoerg                                 const LocationContext *LCtx,
447330f729Sjoerg                                 TaintTagType Kind) {
457330f729Sjoerg   return addTaint(State, State->getSVal(S, LCtx), Kind);
467330f729Sjoerg }
477330f729Sjoerg 
addTaint(ProgramStateRef State,SVal V,TaintTagType Kind)487330f729Sjoerg ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V,
497330f729Sjoerg                                 TaintTagType Kind) {
507330f729Sjoerg   SymbolRef Sym = V.getAsSymbol();
517330f729Sjoerg   if (Sym)
527330f729Sjoerg     return addTaint(State, Sym, Kind);
537330f729Sjoerg 
547330f729Sjoerg   // If the SVal represents a structure, try to mass-taint all values within the
557330f729Sjoerg   // structure. For now it only works efficiently on lazy compound values that
567330f729Sjoerg   // were conjured during a conservative evaluation of a function - either as
577330f729Sjoerg   // return values of functions that return structures or arrays by value, or as
587330f729Sjoerg   // values of structures or arrays passed into the function by reference,
597330f729Sjoerg   // directly or through pointer aliasing. Such lazy compound values are
607330f729Sjoerg   // characterized by having exactly one binding in their captured store within
617330f729Sjoerg   // their parent region, which is a conjured symbol default-bound to the base
627330f729Sjoerg   // region of the parent region.
637330f729Sjoerg   if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
647330f729Sjoerg     if (Optional<SVal> binding =
65*e038c9c4Sjoerg             State->getStateManager().getStoreManager().getDefaultBinding(
66*e038c9c4Sjoerg                 *LCV)) {
677330f729Sjoerg       if (SymbolRef Sym = binding->getAsSymbol())
687330f729Sjoerg         return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
697330f729Sjoerg     }
707330f729Sjoerg   }
717330f729Sjoerg 
727330f729Sjoerg   const MemRegion *R = V.getAsRegion();
737330f729Sjoerg   return addTaint(State, R, Kind);
747330f729Sjoerg }
757330f729Sjoerg 
addTaint(ProgramStateRef State,const MemRegion * R,TaintTagType Kind)767330f729Sjoerg ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R,
777330f729Sjoerg                                 TaintTagType Kind) {
787330f729Sjoerg   if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
797330f729Sjoerg     return addTaint(State, SR->getSymbol(), Kind);
807330f729Sjoerg   return State;
817330f729Sjoerg }
827330f729Sjoerg 
addTaint(ProgramStateRef State,SymbolRef Sym,TaintTagType Kind)837330f729Sjoerg ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym,
847330f729Sjoerg                                 TaintTagType Kind) {
857330f729Sjoerg   // If this is a symbol cast, remove the cast before adding the taint. Taint
867330f729Sjoerg   // is cast agnostic.
877330f729Sjoerg   while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
887330f729Sjoerg     Sym = SC->getOperand();
897330f729Sjoerg 
907330f729Sjoerg   ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
917330f729Sjoerg   assert(NewState);
927330f729Sjoerg   return NewState;
937330f729Sjoerg }
947330f729Sjoerg 
removeTaint(ProgramStateRef State,SVal V)95*e038c9c4Sjoerg ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) {
96*e038c9c4Sjoerg   SymbolRef Sym = V.getAsSymbol();
97*e038c9c4Sjoerg   if (Sym)
98*e038c9c4Sjoerg     return removeTaint(State, Sym);
99*e038c9c4Sjoerg 
100*e038c9c4Sjoerg   const MemRegion *R = V.getAsRegion();
101*e038c9c4Sjoerg   return removeTaint(State, R);
102*e038c9c4Sjoerg }
103*e038c9c4Sjoerg 
removeTaint(ProgramStateRef State,const MemRegion * R)104*e038c9c4Sjoerg ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) {
105*e038c9c4Sjoerg   if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
106*e038c9c4Sjoerg     return removeTaint(State, SR->getSymbol());
107*e038c9c4Sjoerg   return State;
108*e038c9c4Sjoerg }
109*e038c9c4Sjoerg 
removeTaint(ProgramStateRef State,SymbolRef Sym)110*e038c9c4Sjoerg ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) {
111*e038c9c4Sjoerg   // If this is a symbol cast, remove the cast before adding the taint. Taint
112*e038c9c4Sjoerg   // is cast agnostic.
113*e038c9c4Sjoerg   while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
114*e038c9c4Sjoerg     Sym = SC->getOperand();
115*e038c9c4Sjoerg 
116*e038c9c4Sjoerg   ProgramStateRef NewState = State->remove<TaintMap>(Sym);
117*e038c9c4Sjoerg   assert(NewState);
118*e038c9c4Sjoerg   return NewState;
119*e038c9c4Sjoerg }
120*e038c9c4Sjoerg 
addPartialTaint(ProgramStateRef State,SymbolRef ParentSym,const SubRegion * SubRegion,TaintTagType Kind)1217330f729Sjoerg ProgramStateRef taint::addPartialTaint(ProgramStateRef State,
1227330f729Sjoerg                                        SymbolRef ParentSym,
1237330f729Sjoerg                                        const SubRegion *SubRegion,
1247330f729Sjoerg                                        TaintTagType Kind) {
1257330f729Sjoerg   // Ignore partial taint if the entire parent symbol is already tainted.
1267330f729Sjoerg   if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
1277330f729Sjoerg     if (*T == Kind)
1287330f729Sjoerg       return State;
1297330f729Sjoerg 
1307330f729Sjoerg   // Partial taint applies if only a portion of the symbol is tainted.
1317330f729Sjoerg   if (SubRegion == SubRegion->getBaseRegion())
1327330f729Sjoerg     return addTaint(State, ParentSym, Kind);
1337330f729Sjoerg 
1347330f729Sjoerg   const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
1357330f729Sjoerg   TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
1367330f729Sjoerg   TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();
1377330f729Sjoerg 
1387330f729Sjoerg   Regs = F.add(Regs, SubRegion, Kind);
1397330f729Sjoerg   ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
1407330f729Sjoerg   assert(NewState);
1417330f729Sjoerg   return NewState;
1427330f729Sjoerg }
1437330f729Sjoerg 
isTainted(ProgramStateRef State,const Stmt * S,const LocationContext * LCtx,TaintTagType Kind)1447330f729Sjoerg bool taint::isTainted(ProgramStateRef State, const Stmt *S,
1457330f729Sjoerg                       const LocationContext *LCtx, TaintTagType Kind) {
1467330f729Sjoerg   SVal val = State->getSVal(S, LCtx);
1477330f729Sjoerg   return isTainted(State, val, Kind);
1487330f729Sjoerg }
1497330f729Sjoerg 
isTainted(ProgramStateRef State,SVal V,TaintTagType Kind)1507330f729Sjoerg bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
151*e038c9c4Sjoerg   if (SymbolRef Sym = V.getAsSymbol())
1527330f729Sjoerg     return isTainted(State, Sym, Kind);
1537330f729Sjoerg   if (const MemRegion *Reg = V.getAsRegion())
1547330f729Sjoerg     return isTainted(State, Reg, Kind);
1557330f729Sjoerg   return false;
1567330f729Sjoerg }
1577330f729Sjoerg 
isTainted(ProgramStateRef State,const MemRegion * Reg,TaintTagType K)1587330f729Sjoerg bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
1597330f729Sjoerg                       TaintTagType K) {
1607330f729Sjoerg   if (!Reg)
1617330f729Sjoerg     return false;
1627330f729Sjoerg 
1637330f729Sjoerg   // Element region (array element) is tainted if either the base or the offset
1647330f729Sjoerg   // are tainted.
1657330f729Sjoerg   if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
1667330f729Sjoerg     return isTainted(State, ER->getSuperRegion(), K) ||
1677330f729Sjoerg            isTainted(State, ER->getIndex(), K);
1687330f729Sjoerg 
1697330f729Sjoerg   if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
1707330f729Sjoerg     return isTainted(State, SR->getSymbol(), K);
1717330f729Sjoerg 
1727330f729Sjoerg   if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
1737330f729Sjoerg     return isTainted(State, ER->getSuperRegion(), K);
1747330f729Sjoerg 
1757330f729Sjoerg   return false;
1767330f729Sjoerg }
1777330f729Sjoerg 
isTainted(ProgramStateRef State,SymbolRef Sym,TaintTagType Kind)1787330f729Sjoerg bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
1797330f729Sjoerg   if (!Sym)
1807330f729Sjoerg     return false;
1817330f729Sjoerg 
1827330f729Sjoerg   // Traverse all the symbols this symbol depends on to see if any are tainted.
1837330f729Sjoerg   for (SymExpr::symbol_iterator SI = Sym->symbol_begin(),
184*e038c9c4Sjoerg                                 SE = Sym->symbol_end();
185*e038c9c4Sjoerg        SI != SE; ++SI) {
1867330f729Sjoerg     if (!isa<SymbolData>(*SI))
1877330f729Sjoerg       continue;
1887330f729Sjoerg 
1897330f729Sjoerg     if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) {
1907330f729Sjoerg       if (*Tag == Kind)
1917330f729Sjoerg         return true;
1927330f729Sjoerg     }
1937330f729Sjoerg 
1947330f729Sjoerg     if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) {
1957330f729Sjoerg       // If this is a SymbolDerived with a tainted parent, it's also tainted.
1967330f729Sjoerg       if (isTainted(State, SD->getParentSymbol(), Kind))
1977330f729Sjoerg         return true;
1987330f729Sjoerg 
1997330f729Sjoerg       // If this is a SymbolDerived with the same parent symbol as another
2007330f729Sjoerg       // tainted SymbolDerived and a region that's a sub-region of that tainted
2017330f729Sjoerg       // symbol, it's also tainted.
2027330f729Sjoerg       if (const TaintedSubRegions *Regs =
2037330f729Sjoerg               State->get<DerivedSymTaint>(SD->getParentSymbol())) {
2047330f729Sjoerg         const TypedValueRegion *R = SD->getRegion();
2057330f729Sjoerg         for (auto I : *Regs) {
2067330f729Sjoerg           // FIXME: The logic to identify tainted regions could be more
2077330f729Sjoerg           // complete. For example, this would not currently identify
2087330f729Sjoerg           // overlapping fields in a union as tainted. To identify this we can
2097330f729Sjoerg           // check for overlapping/nested byte offsets.
2107330f729Sjoerg           if (Kind == I.second && R->isSubRegionOf(I.first))
2117330f729Sjoerg             return true;
2127330f729Sjoerg         }
2137330f729Sjoerg       }
2147330f729Sjoerg     }
2157330f729Sjoerg 
2167330f729Sjoerg     // If memory region is tainted, data is also tainted.
2177330f729Sjoerg     if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
2187330f729Sjoerg       if (isTainted(State, SRV->getRegion(), Kind))
2197330f729Sjoerg         return true;
2207330f729Sjoerg     }
2217330f729Sjoerg 
2227330f729Sjoerg     // If this is a SymbolCast from a tainted value, it's also tainted.
2237330f729Sjoerg     if (const auto *SC = dyn_cast<SymbolCast>(*SI)) {
2247330f729Sjoerg       if (isTainted(State, SC->getOperand(), Kind))
2257330f729Sjoerg         return true;
2267330f729Sjoerg     }
2277330f729Sjoerg   }
2287330f729Sjoerg 
2297330f729Sjoerg   return false;
2307330f729Sjoerg }
2317330f729Sjoerg 
VisitNode(const ExplodedNode * N,BugReporterContext & BRC,PathSensitiveBugReport & BR)2327330f729Sjoerg PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N,
2337330f729Sjoerg                                                   BugReporterContext &BRC,
2347330f729Sjoerg                                                   PathSensitiveBugReport &BR) {
2357330f729Sjoerg 
2367330f729Sjoerg   // Find the ExplodedNode where the taint was first introduced
2377330f729Sjoerg   if (!isTainted(N->getState(), V) ||
2387330f729Sjoerg       isTainted(N->getFirstPred()->getState(), V))
2397330f729Sjoerg     return nullptr;
2407330f729Sjoerg 
2417330f729Sjoerg   const Stmt *S = N->getStmtForDiagnostics();
2427330f729Sjoerg   if (!S)
2437330f729Sjoerg     return nullptr;
2447330f729Sjoerg 
2457330f729Sjoerg   const LocationContext *NCtx = N->getLocationContext();
2467330f729Sjoerg   PathDiagnosticLocation L =
2477330f729Sjoerg       PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
2487330f729Sjoerg   if (!L.isValid() || !L.asLocation().isValid())
2497330f729Sjoerg     return nullptr;
2507330f729Sjoerg 
2517330f729Sjoerg   return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here");
2527330f729Sjoerg }
253