1e5dd7070Spatrick //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 2e5dd7070Spatrick // 3e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information. 5e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e5dd7070Spatrick // 7e5dd7070Spatrick //===----------------------------------------------------------------------===// 8e5dd7070Spatrick // 9e5dd7070Spatrick // This checker defines the attack surface for generic taint propagation. 10e5dd7070Spatrick // 11e5dd7070Spatrick // The taint information produced by it might be useful to other checkers. For 12e5dd7070Spatrick // example, checkers should report errors which involve tainted data more 13e5dd7070Spatrick // aggressively, even if the involved symbols are under constrained. 14e5dd7070Spatrick // 15e5dd7070Spatrick //===----------------------------------------------------------------------===// 16e5dd7070Spatrick 17e5dd7070Spatrick #include "Taint.h" 18e5dd7070Spatrick #include "Yaml.h" 19e5dd7070Spatrick #include "clang/AST/Attr.h" 20e5dd7070Spatrick #include "clang/Basic/Builtins.h" 21e5dd7070Spatrick #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 22e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 23e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/Checker.h" 24e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/CheckerManager.h" 25*ec727ea7Spatrick #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 26e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 27e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 28e5dd7070Spatrick #include "llvm/Support/YAMLTraits.h" 29*ec727ea7Spatrick 30e5dd7070Spatrick #include <algorithm> 31e5dd7070Spatrick #include <limits> 32*ec727ea7Spatrick #include <memory> 33e5dd7070Spatrick #include <unordered_map> 34e5dd7070Spatrick #include <utility> 35e5dd7070Spatrick 36e5dd7070Spatrick using namespace clang; 37e5dd7070Spatrick using namespace ento; 38e5dd7070Spatrick using namespace taint; 39e5dd7070Spatrick 40e5dd7070Spatrick namespace { 41*ec727ea7Spatrick class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { 42e5dd7070Spatrick public: 43e5dd7070Spatrick static void *getTag() { 44e5dd7070Spatrick static int Tag; 45e5dd7070Spatrick return &Tag; 46e5dd7070Spatrick } 47e5dd7070Spatrick 48*ec727ea7Spatrick void checkPreCall(const CallEvent &Call, CheckerContext &C) const; 49*ec727ea7Spatrick void checkPostCall(const CallEvent &Call, CheckerContext &C) const; 50e5dd7070Spatrick 51e5dd7070Spatrick void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, 52e5dd7070Spatrick const char *Sep) const override; 53e5dd7070Spatrick 54e5dd7070Spatrick using ArgVector = SmallVector<unsigned, 2>; 55e5dd7070Spatrick using SignedArgVector = SmallVector<int, 2>; 56e5dd7070Spatrick 57e5dd7070Spatrick enum class VariadicType { None, Src, Dst }; 58e5dd7070Spatrick 59e5dd7070Spatrick /// Used to parse the configuration file. 60e5dd7070Spatrick struct TaintConfiguration { 61e5dd7070Spatrick using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>; 62e5dd7070Spatrick 63e5dd7070Spatrick struct Propagation { 64e5dd7070Spatrick std::string Name; 65e5dd7070Spatrick std::string Scope; 66e5dd7070Spatrick ArgVector SrcArgs; 67e5dd7070Spatrick SignedArgVector DstArgs; 68e5dd7070Spatrick VariadicType VarType; 69e5dd7070Spatrick unsigned VarIndex; 70e5dd7070Spatrick }; 71e5dd7070Spatrick 72e5dd7070Spatrick std::vector<Propagation> Propagations; 73e5dd7070Spatrick std::vector<NameScopeArgs> Filters; 74e5dd7070Spatrick std::vector<NameScopeArgs> Sinks; 75e5dd7070Spatrick 76e5dd7070Spatrick TaintConfiguration() = default; 77e5dd7070Spatrick TaintConfiguration(const TaintConfiguration &) = default; 78e5dd7070Spatrick TaintConfiguration(TaintConfiguration &&) = default; 79e5dd7070Spatrick TaintConfiguration &operator=(const TaintConfiguration &) = default; 80e5dd7070Spatrick TaintConfiguration &operator=(TaintConfiguration &&) = default; 81e5dd7070Spatrick }; 82e5dd7070Spatrick 83e5dd7070Spatrick /// Convert SignedArgVector to ArgVector. 84e5dd7070Spatrick ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option, 85*ec727ea7Spatrick const SignedArgVector &Args); 86e5dd7070Spatrick 87e5dd7070Spatrick /// Parse the config. 88e5dd7070Spatrick void parseConfiguration(CheckerManager &Mgr, const std::string &Option, 89e5dd7070Spatrick TaintConfiguration &&Config); 90e5dd7070Spatrick 91e5dd7070Spatrick static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()}; 92e5dd7070Spatrick /// Denotes the return vale. 93e5dd7070Spatrick static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() - 94e5dd7070Spatrick 1}; 95e5dd7070Spatrick 96e5dd7070Spatrick private: 97e5dd7070Spatrick mutable std::unique_ptr<BugType> BT; 98e5dd7070Spatrick void initBugType() const { 99e5dd7070Spatrick if (!BT) 100*ec727ea7Spatrick BT = std::make_unique<BugType>(this, "Use of Untrusted Data", 101*ec727ea7Spatrick "Untrusted Data"); 102e5dd7070Spatrick } 103e5dd7070Spatrick 104e5dd7070Spatrick struct FunctionData { 105e5dd7070Spatrick FunctionData() = delete; 106e5dd7070Spatrick FunctionData(const FunctionData &) = default; 107e5dd7070Spatrick FunctionData(FunctionData &&) = default; 108e5dd7070Spatrick FunctionData &operator=(const FunctionData &) = delete; 109e5dd7070Spatrick FunctionData &operator=(FunctionData &&) = delete; 110e5dd7070Spatrick 111*ec727ea7Spatrick static Optional<FunctionData> create(const CallEvent &Call, 112e5dd7070Spatrick const CheckerContext &C) { 113*ec727ea7Spatrick if (!Call.getDecl()) 114*ec727ea7Spatrick return None; 115*ec727ea7Spatrick 116*ec727ea7Spatrick const FunctionDecl *FDecl = Call.getDecl()->getAsFunction(); 117e5dd7070Spatrick if (!FDecl || (FDecl->getKind() != Decl::Function && 118e5dd7070Spatrick FDecl->getKind() != Decl::CXXMethod)) 119e5dd7070Spatrick return None; 120e5dd7070Spatrick 121e5dd7070Spatrick StringRef Name = C.getCalleeName(FDecl); 122e5dd7070Spatrick std::string FullName = FDecl->getQualifiedNameAsString(); 123e5dd7070Spatrick if (Name.empty() || FullName.empty()) 124e5dd7070Spatrick return None; 125e5dd7070Spatrick 126e5dd7070Spatrick return FunctionData{FDecl, Name, FullName}; 127e5dd7070Spatrick } 128e5dd7070Spatrick 129e5dd7070Spatrick bool isInScope(StringRef Scope) const { 130e5dd7070Spatrick return StringRef(FullName).startswith(Scope); 131e5dd7070Spatrick } 132e5dd7070Spatrick 133e5dd7070Spatrick const FunctionDecl *const FDecl; 134e5dd7070Spatrick const StringRef Name; 135e5dd7070Spatrick const std::string FullName; 136e5dd7070Spatrick }; 137e5dd7070Spatrick 138e5dd7070Spatrick /// Catch taint related bugs. Check if tainted data is passed to a 139e5dd7070Spatrick /// system call etc. Returns true on matching. 140*ec727ea7Spatrick bool checkPre(const CallEvent &Call, const FunctionData &FData, 141e5dd7070Spatrick CheckerContext &C) const; 142e5dd7070Spatrick 143e5dd7070Spatrick /// Add taint sources on a pre-visit. Returns true on matching. 144*ec727ea7Spatrick bool addSourcesPre(const CallEvent &Call, const FunctionData &FData, 145e5dd7070Spatrick CheckerContext &C) const; 146e5dd7070Spatrick 147e5dd7070Spatrick /// Mark filter's arguments not tainted on a pre-visit. Returns true on 148e5dd7070Spatrick /// matching. 149*ec727ea7Spatrick bool addFiltersPre(const CallEvent &Call, const FunctionData &FData, 150e5dd7070Spatrick CheckerContext &C) const; 151e5dd7070Spatrick 152e5dd7070Spatrick /// Propagate taint generated at pre-visit. Returns true on matching. 153*ec727ea7Spatrick static bool propagateFromPre(const CallEvent &Call, CheckerContext &C); 154e5dd7070Spatrick 155e5dd7070Spatrick /// Check if the region the expression evaluates to is the standard input, 156e5dd7070Spatrick /// and thus, is tainted. 157e5dd7070Spatrick static bool isStdin(const Expr *E, CheckerContext &C); 158e5dd7070Spatrick 159e5dd7070Spatrick /// Given a pointer argument, return the value it points to. 160*ec727ea7Spatrick static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg); 161e5dd7070Spatrick 162e5dd7070Spatrick /// Check for CWE-134: Uncontrolled Format String. 163e5dd7070Spatrick static constexpr llvm::StringLiteral MsgUncontrolledFormatString = 164e5dd7070Spatrick "Untrusted data is used as a format string " 165e5dd7070Spatrick "(CWE-134: Uncontrolled Format String)"; 166*ec727ea7Spatrick bool checkUncontrolledFormatString(const CallEvent &Call, 167e5dd7070Spatrick CheckerContext &C) const; 168e5dd7070Spatrick 169e5dd7070Spatrick /// Check for: 170e5dd7070Spatrick /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 171e5dd7070Spatrick /// CWE-78, "Failure to Sanitize Data into an OS Command" 172e5dd7070Spatrick static constexpr llvm::StringLiteral MsgSanitizeSystemArgs = 173e5dd7070Spatrick "Untrusted data is passed to a system call " 174e5dd7070Spatrick "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 175*ec727ea7Spatrick bool checkSystemCall(const CallEvent &Call, StringRef Name, 176e5dd7070Spatrick CheckerContext &C) const; 177e5dd7070Spatrick 178e5dd7070Spatrick /// Check if tainted data is used as a buffer size ins strn.. functions, 179e5dd7070Spatrick /// and allocators. 180e5dd7070Spatrick static constexpr llvm::StringLiteral MsgTaintedBufferSize = 181e5dd7070Spatrick "Untrusted data is used to specify the buffer size " 182e5dd7070Spatrick "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " 183e5dd7070Spatrick "for character data and the null terminator)"; 184*ec727ea7Spatrick bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const; 185e5dd7070Spatrick 186e5dd7070Spatrick /// Check if tainted data is used as a custom sink's parameter. 187e5dd7070Spatrick static constexpr llvm::StringLiteral MsgCustomSink = 188e5dd7070Spatrick "Untrusted data is passed to a user-defined sink"; 189*ec727ea7Spatrick bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData, 190e5dd7070Spatrick CheckerContext &C) const; 191e5dd7070Spatrick 192e5dd7070Spatrick /// Generate a report if the expression is tainted or points to tainted data. 193e5dd7070Spatrick bool generateReportIfTainted(const Expr *E, StringRef Msg, 194e5dd7070Spatrick CheckerContext &C) const; 195e5dd7070Spatrick 196e5dd7070Spatrick struct TaintPropagationRule; 197e5dd7070Spatrick template <typename T> 198e5dd7070Spatrick using ConfigDataMap = 199e5dd7070Spatrick std::unordered_multimap<std::string, std::pair<std::string, T>>; 200e5dd7070Spatrick using NameRuleMap = ConfigDataMap<TaintPropagationRule>; 201e5dd7070Spatrick using NameArgMap = ConfigDataMap<ArgVector>; 202e5dd7070Spatrick 203e5dd7070Spatrick /// Find a function with the given name and scope. Returns the first match 204e5dd7070Spatrick /// or the end of the map. 205e5dd7070Spatrick template <typename T> 206e5dd7070Spatrick static auto findFunctionInConfig(const ConfigDataMap<T> &Map, 207e5dd7070Spatrick const FunctionData &FData); 208e5dd7070Spatrick 209e5dd7070Spatrick /// A struct used to specify taint propagation rules for a function. 210e5dd7070Spatrick /// 211e5dd7070Spatrick /// If any of the possible taint source arguments is tainted, all of the 212e5dd7070Spatrick /// destination arguments should also be tainted. Use InvalidArgIndex in the 213e5dd7070Spatrick /// src list to specify that all of the arguments can introduce taint. Use 214e5dd7070Spatrick /// InvalidArgIndex in the dst arguments to signify that all the non-const 215e5dd7070Spatrick /// pointer and reference arguments might be tainted on return. If 216e5dd7070Spatrick /// ReturnValueIndex is added to the dst list, the return value will be 217e5dd7070Spatrick /// tainted. 218e5dd7070Spatrick struct TaintPropagationRule { 219*ec727ea7Spatrick using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call, 220e5dd7070Spatrick CheckerContext &C); 221e5dd7070Spatrick 222e5dd7070Spatrick /// List of arguments which can be taint sources and should be checked. 223e5dd7070Spatrick ArgVector SrcArgs; 224e5dd7070Spatrick /// List of arguments which should be tainted on function return. 225e5dd7070Spatrick ArgVector DstArgs; 226e5dd7070Spatrick /// Index for the first variadic parameter if exist. 227e5dd7070Spatrick unsigned VariadicIndex; 228e5dd7070Spatrick /// Show when a function has variadic parameters. If it has, it marks all 229e5dd7070Spatrick /// of them as source or destination. 230e5dd7070Spatrick VariadicType VarType; 231e5dd7070Spatrick /// Special function for tainted source determination. If defined, it can 232e5dd7070Spatrick /// override the default behavior. 233e5dd7070Spatrick PropagationFuncType PropagationFunc; 234e5dd7070Spatrick 235e5dd7070Spatrick TaintPropagationRule() 236e5dd7070Spatrick : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None), 237e5dd7070Spatrick PropagationFunc(nullptr) {} 238e5dd7070Spatrick 239e5dd7070Spatrick TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst, 240e5dd7070Spatrick VariadicType Var = VariadicType::None, 241e5dd7070Spatrick unsigned VarIndex = InvalidArgIndex, 242e5dd7070Spatrick PropagationFuncType Func = nullptr) 243e5dd7070Spatrick : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)), 244e5dd7070Spatrick VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {} 245e5dd7070Spatrick 246e5dd7070Spatrick /// Get the propagation rule for a given function. 247e5dd7070Spatrick static TaintPropagationRule 248e5dd7070Spatrick getTaintPropagationRule(const NameRuleMap &CustomPropagations, 249e5dd7070Spatrick const FunctionData &FData, CheckerContext &C); 250e5dd7070Spatrick 251e5dd7070Spatrick void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 252e5dd7070Spatrick void addDstArg(unsigned A) { DstArgs.push_back(A); } 253e5dd7070Spatrick 254e5dd7070Spatrick bool isNull() const { 255e5dd7070Spatrick return SrcArgs.empty() && DstArgs.empty() && 256e5dd7070Spatrick VariadicType::None == VarType; 257e5dd7070Spatrick } 258e5dd7070Spatrick 259e5dd7070Spatrick bool isDestinationArgument(unsigned ArgNum) const { 260e5dd7070Spatrick return (llvm::find(DstArgs, ArgNum) != DstArgs.end()); 261e5dd7070Spatrick } 262e5dd7070Spatrick 263*ec727ea7Spatrick static bool isTaintedOrPointsToTainted(const Expr *E, 264*ec727ea7Spatrick const ProgramStateRef &State, 265e5dd7070Spatrick CheckerContext &C) { 266e5dd7070Spatrick if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C)) 267e5dd7070Spatrick return true; 268e5dd7070Spatrick 269e5dd7070Spatrick if (!E->getType().getTypePtr()->isPointerType()) 270e5dd7070Spatrick return false; 271e5dd7070Spatrick 272*ec727ea7Spatrick Optional<SVal> V = getPointeeOf(C, E); 273e5dd7070Spatrick return (V && isTainted(State, *V)); 274e5dd7070Spatrick } 275e5dd7070Spatrick 276e5dd7070Spatrick /// Pre-process a function which propagates taint according to the 277e5dd7070Spatrick /// taint rule. 278*ec727ea7Spatrick ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const; 279e5dd7070Spatrick 280e5dd7070Spatrick // Functions for custom taintedness propagation. 281*ec727ea7Spatrick static bool postSocket(bool IsTainted, const CallEvent &Call, 282e5dd7070Spatrick CheckerContext &C); 283e5dd7070Spatrick }; 284e5dd7070Spatrick 285e5dd7070Spatrick /// Defines a map between the propagation function's name, scope 286e5dd7070Spatrick /// and TaintPropagationRule. 287e5dd7070Spatrick NameRuleMap CustomPropagations; 288e5dd7070Spatrick 289e5dd7070Spatrick /// Defines a map between the filter function's name, scope and filtering 290e5dd7070Spatrick /// args. 291e5dd7070Spatrick NameArgMap CustomFilters; 292e5dd7070Spatrick 293e5dd7070Spatrick /// Defines a map between the sink function's name, scope and sinking args. 294e5dd7070Spatrick NameArgMap CustomSinks; 295e5dd7070Spatrick }; 296e5dd7070Spatrick 297e5dd7070Spatrick const unsigned GenericTaintChecker::ReturnValueIndex; 298e5dd7070Spatrick const unsigned GenericTaintChecker::InvalidArgIndex; 299e5dd7070Spatrick 300e5dd7070Spatrick // FIXME: these lines can be removed in C++17 301e5dd7070Spatrick constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString; 302e5dd7070Spatrick constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs; 303e5dd7070Spatrick constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize; 304e5dd7070Spatrick constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink; 305e5dd7070Spatrick } // end of anonymous namespace 306e5dd7070Spatrick 307e5dd7070Spatrick using TaintConfig = GenericTaintChecker::TaintConfiguration; 308e5dd7070Spatrick 309e5dd7070Spatrick LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation) 310e5dd7070Spatrick LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs) 311e5dd7070Spatrick 312e5dd7070Spatrick namespace llvm { 313e5dd7070Spatrick namespace yaml { 314e5dd7070Spatrick template <> struct MappingTraits<TaintConfig> { 315e5dd7070Spatrick static void mapping(IO &IO, TaintConfig &Config) { 316e5dd7070Spatrick IO.mapOptional("Propagations", Config.Propagations); 317e5dd7070Spatrick IO.mapOptional("Filters", Config.Filters); 318e5dd7070Spatrick IO.mapOptional("Sinks", Config.Sinks); 319e5dd7070Spatrick } 320e5dd7070Spatrick }; 321e5dd7070Spatrick 322e5dd7070Spatrick template <> struct MappingTraits<TaintConfig::Propagation> { 323e5dd7070Spatrick static void mapping(IO &IO, TaintConfig::Propagation &Propagation) { 324e5dd7070Spatrick IO.mapRequired("Name", Propagation.Name); 325e5dd7070Spatrick IO.mapOptional("Scope", Propagation.Scope); 326e5dd7070Spatrick IO.mapOptional("SrcArgs", Propagation.SrcArgs); 327e5dd7070Spatrick IO.mapOptional("DstArgs", Propagation.DstArgs); 328e5dd7070Spatrick IO.mapOptional("VariadicType", Propagation.VarType, 329e5dd7070Spatrick GenericTaintChecker::VariadicType::None); 330e5dd7070Spatrick IO.mapOptional("VariadicIndex", Propagation.VarIndex, 331e5dd7070Spatrick GenericTaintChecker::InvalidArgIndex); 332e5dd7070Spatrick } 333e5dd7070Spatrick }; 334e5dd7070Spatrick 335e5dd7070Spatrick template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> { 336e5dd7070Spatrick static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) { 337e5dd7070Spatrick IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None); 338e5dd7070Spatrick IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src); 339e5dd7070Spatrick IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst); 340e5dd7070Spatrick } 341e5dd7070Spatrick }; 342e5dd7070Spatrick 343e5dd7070Spatrick template <> struct MappingTraits<TaintConfig::NameScopeArgs> { 344e5dd7070Spatrick static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) { 345e5dd7070Spatrick IO.mapRequired("Name", std::get<0>(NSA)); 346e5dd7070Spatrick IO.mapOptional("Scope", std::get<1>(NSA)); 347e5dd7070Spatrick IO.mapRequired("Args", std::get<2>(NSA)); 348e5dd7070Spatrick } 349e5dd7070Spatrick }; 350e5dd7070Spatrick } // namespace yaml 351e5dd7070Spatrick } // namespace llvm 352e5dd7070Spatrick 353e5dd7070Spatrick /// A set which is used to pass information from call pre-visit instruction 354e5dd7070Spatrick /// to the call post-visit. The values are unsigned integers, which are either 355e5dd7070Spatrick /// ReturnValueIndex, or indexes of the pointer/reference argument, which 356e5dd7070Spatrick /// points to data, which should be tainted on return. 357e5dd7070Spatrick REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 358e5dd7070Spatrick 359*ec727ea7Spatrick GenericTaintChecker::ArgVector 360*ec727ea7Spatrick GenericTaintChecker::convertToArgVector(CheckerManager &Mgr, 361*ec727ea7Spatrick const std::string &Option, 362*ec727ea7Spatrick const SignedArgVector &Args) { 363e5dd7070Spatrick ArgVector Result; 364e5dd7070Spatrick for (int Arg : Args) { 365e5dd7070Spatrick if (Arg == -1) 366e5dd7070Spatrick Result.push_back(ReturnValueIndex); 367e5dd7070Spatrick else if (Arg < -1) { 368e5dd7070Spatrick Result.push_back(InvalidArgIndex); 369e5dd7070Spatrick Mgr.reportInvalidCheckerOptionValue( 370e5dd7070Spatrick this, Option, 371e5dd7070Spatrick "an argument number for propagation rules greater or equal to -1"); 372e5dd7070Spatrick } else 373e5dd7070Spatrick Result.push_back(static_cast<unsigned>(Arg)); 374e5dd7070Spatrick } 375e5dd7070Spatrick return Result; 376e5dd7070Spatrick } 377e5dd7070Spatrick 378e5dd7070Spatrick void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr, 379e5dd7070Spatrick const std::string &Option, 380e5dd7070Spatrick TaintConfiguration &&Config) { 381e5dd7070Spatrick for (auto &P : Config.Propagations) { 382e5dd7070Spatrick GenericTaintChecker::CustomPropagations.emplace( 383e5dd7070Spatrick P.Name, 384e5dd7070Spatrick std::make_pair(P.Scope, TaintPropagationRule{ 385e5dd7070Spatrick std::move(P.SrcArgs), 386e5dd7070Spatrick convertToArgVector(Mgr, Option, P.DstArgs), 387e5dd7070Spatrick P.VarType, P.VarIndex})); 388e5dd7070Spatrick } 389e5dd7070Spatrick 390e5dd7070Spatrick for (auto &F : Config.Filters) { 391e5dd7070Spatrick GenericTaintChecker::CustomFilters.emplace( 392e5dd7070Spatrick std::get<0>(F), 393e5dd7070Spatrick std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F)))); 394e5dd7070Spatrick } 395e5dd7070Spatrick 396e5dd7070Spatrick for (auto &S : Config.Sinks) { 397e5dd7070Spatrick GenericTaintChecker::CustomSinks.emplace( 398e5dd7070Spatrick std::get<0>(S), 399e5dd7070Spatrick std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S)))); 400e5dd7070Spatrick } 401e5dd7070Spatrick } 402e5dd7070Spatrick 403e5dd7070Spatrick template <typename T> 404e5dd7070Spatrick auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map, 405e5dd7070Spatrick const FunctionData &FData) { 406*ec727ea7Spatrick auto Range = Map.equal_range(std::string(FData.Name)); 407e5dd7070Spatrick auto It = 408e5dd7070Spatrick std::find_if(Range.first, Range.second, [&FData](const auto &Entry) { 409e5dd7070Spatrick const auto &Value = Entry.second; 410e5dd7070Spatrick StringRef Scope = Value.first; 411e5dd7070Spatrick return Scope.empty() || FData.isInScope(Scope); 412e5dd7070Spatrick }); 413e5dd7070Spatrick return It != Range.second ? It : Map.end(); 414e5dd7070Spatrick } 415e5dd7070Spatrick 416e5dd7070Spatrick GenericTaintChecker::TaintPropagationRule 417e5dd7070Spatrick GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 418e5dd7070Spatrick const NameRuleMap &CustomPropagations, const FunctionData &FData, 419e5dd7070Spatrick CheckerContext &C) { 420e5dd7070Spatrick // TODO: Currently, we might lose precision here: we always mark a return 421e5dd7070Spatrick // value as tainted even if it's just a pointer, pointing to tainted data. 422e5dd7070Spatrick 423e5dd7070Spatrick // Check for exact name match for functions without builtin substitutes. 424e5dd7070Spatrick // Use qualified name, because these are C functions without namespace. 425e5dd7070Spatrick TaintPropagationRule Rule = 426e5dd7070Spatrick llvm::StringSwitch<TaintPropagationRule>(FData.FullName) 427e5dd7070Spatrick // Source functions 428e5dd7070Spatrick // TODO: Add support for vfscanf & family. 429*ec727ea7Spatrick .Case("fdopen", {{}, {ReturnValueIndex}}) 430*ec727ea7Spatrick .Case("fopen", {{}, {ReturnValueIndex}}) 431*ec727ea7Spatrick .Case("freopen", {{}, {ReturnValueIndex}}) 432*ec727ea7Spatrick .Case("getch", {{}, {ReturnValueIndex}}) 433*ec727ea7Spatrick .Case("getchar", {{}, {ReturnValueIndex}}) 434*ec727ea7Spatrick .Case("getchar_unlocked", {{}, {ReturnValueIndex}}) 435*ec727ea7Spatrick .Case("getenv", {{}, {ReturnValueIndex}}) 436*ec727ea7Spatrick .Case("gets", {{}, {0, ReturnValueIndex}}) 437*ec727ea7Spatrick .Case("scanf", {{}, {}, VariadicType::Dst, 1}) 438*ec727ea7Spatrick .Case("socket", {{}, 439*ec727ea7Spatrick {ReturnValueIndex}, 440*ec727ea7Spatrick VariadicType::None, 441e5dd7070Spatrick InvalidArgIndex, 442*ec727ea7Spatrick &TaintPropagationRule::postSocket}) 443*ec727ea7Spatrick .Case("wgetch", {{}, {ReturnValueIndex}}) 444e5dd7070Spatrick // Propagating functions 445*ec727ea7Spatrick .Case("atoi", {{0}, {ReturnValueIndex}}) 446*ec727ea7Spatrick .Case("atol", {{0}, {ReturnValueIndex}}) 447*ec727ea7Spatrick .Case("atoll", {{0}, {ReturnValueIndex}}) 448*ec727ea7Spatrick .Case("fgetc", {{0}, {ReturnValueIndex}}) 449*ec727ea7Spatrick .Case("fgetln", {{0}, {ReturnValueIndex}}) 450*ec727ea7Spatrick .Case("fgets", {{2}, {0, ReturnValueIndex}}) 451*ec727ea7Spatrick .Case("fscanf", {{0}, {}, VariadicType::Dst, 2}) 452*ec727ea7Spatrick .Case("sscanf", {{0}, {}, VariadicType::Dst, 2}) 453*ec727ea7Spatrick .Case("getc", {{0}, {ReturnValueIndex}}) 454*ec727ea7Spatrick .Case("getc_unlocked", {{0}, {ReturnValueIndex}}) 455*ec727ea7Spatrick .Case("getdelim", {{3}, {0}}) 456*ec727ea7Spatrick .Case("getline", {{2}, {0}}) 457*ec727ea7Spatrick .Case("getw", {{0}, {ReturnValueIndex}}) 458*ec727ea7Spatrick .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}}) 459*ec727ea7Spatrick .Case("read", {{0, 2}, {1, ReturnValueIndex}}) 460*ec727ea7Spatrick .Case("strchr", {{0}, {ReturnValueIndex}}) 461*ec727ea7Spatrick .Case("strrchr", {{0}, {ReturnValueIndex}}) 462*ec727ea7Spatrick .Case("tolower", {{0}, {ReturnValueIndex}}) 463*ec727ea7Spatrick .Case("toupper", {{0}, {ReturnValueIndex}}) 464*ec727ea7Spatrick .Default({}); 465e5dd7070Spatrick 466e5dd7070Spatrick if (!Rule.isNull()) 467e5dd7070Spatrick return Rule; 468*ec727ea7Spatrick assert(FData.FDecl); 469e5dd7070Spatrick 470e5dd7070Spatrick // Check if it's one of the memory setting/copying functions. 471e5dd7070Spatrick // This check is specialized but faster then calling isCLibraryFunction. 472e5dd7070Spatrick const FunctionDecl *FDecl = FData.FDecl; 473e5dd7070Spatrick unsigned BId = 0; 474*ec727ea7Spatrick if ((BId = FDecl->getMemoryFunctionKind())) { 475e5dd7070Spatrick switch (BId) { 476e5dd7070Spatrick case Builtin::BImemcpy: 477e5dd7070Spatrick case Builtin::BImemmove: 478e5dd7070Spatrick case Builtin::BIstrncpy: 479e5dd7070Spatrick case Builtin::BIstrncat: 480*ec727ea7Spatrick return {{1, 2}, {0, ReturnValueIndex}}; 481e5dd7070Spatrick case Builtin::BIstrlcpy: 482e5dd7070Spatrick case Builtin::BIstrlcat: 483*ec727ea7Spatrick return {{1, 2}, {0}}; 484e5dd7070Spatrick case Builtin::BIstrndup: 485*ec727ea7Spatrick return {{0, 1}, {ReturnValueIndex}}; 486e5dd7070Spatrick 487e5dd7070Spatrick default: 488e5dd7070Spatrick break; 489*ec727ea7Spatrick } 490*ec727ea7Spatrick } 491e5dd7070Spatrick 492e5dd7070Spatrick // Process all other functions which could be defined as builtins. 493e5dd7070Spatrick if (Rule.isNull()) { 494*ec727ea7Spatrick const auto OneOf = [FDecl](const auto &... Name) { 495*ec727ea7Spatrick // FIXME: use fold expression in C++17 496*ec727ea7Spatrick using unused = int[]; 497*ec727ea7Spatrick bool ret = false; 498*ec727ea7Spatrick static_cast<void>(unused{ 499*ec727ea7Spatrick 0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...}); 500*ec727ea7Spatrick return ret; 501*ec727ea7Spatrick }; 502*ec727ea7Spatrick if (OneOf("snprintf")) 503*ec727ea7Spatrick return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3}; 504*ec727ea7Spatrick if (OneOf("sprintf")) 505*ec727ea7Spatrick return {{}, {0, ReturnValueIndex}, VariadicType::Src, 2}; 506*ec727ea7Spatrick if (OneOf("strcpy", "stpcpy", "strcat")) 507*ec727ea7Spatrick return {{1}, {0, ReturnValueIndex}}; 508*ec727ea7Spatrick if (OneOf("bcopy")) 509*ec727ea7Spatrick return {{0, 2}, {1}}; 510*ec727ea7Spatrick if (OneOf("strdup", "strdupa", "wcsdup")) 511*ec727ea7Spatrick return {{0}, {ReturnValueIndex}}; 512e5dd7070Spatrick } 513e5dd7070Spatrick 514*ec727ea7Spatrick // Skipping the following functions, since they might be used for cleansing or 515*ec727ea7Spatrick // smart memory copy: 516e5dd7070Spatrick // - memccpy - copying until hitting a special character. 517e5dd7070Spatrick 518e5dd7070Spatrick auto It = findFunctionInConfig(CustomPropagations, FData); 519*ec727ea7Spatrick if (It != CustomPropagations.end()) 520*ec727ea7Spatrick return It->second.second; 521*ec727ea7Spatrick return {}; 522e5dd7070Spatrick } 523e5dd7070Spatrick 524*ec727ea7Spatrick void GenericTaintChecker::checkPreCall(const CallEvent &Call, 525e5dd7070Spatrick CheckerContext &C) const { 526*ec727ea7Spatrick Optional<FunctionData> FData = FunctionData::create(Call, C); 527e5dd7070Spatrick if (!FData) 528e5dd7070Spatrick return; 529e5dd7070Spatrick 530e5dd7070Spatrick // Check for taintedness related errors first: system call, uncontrolled 531e5dd7070Spatrick // format string, tainted buffer size. 532*ec727ea7Spatrick if (checkPre(Call, *FData, C)) 533e5dd7070Spatrick return; 534e5dd7070Spatrick 535e5dd7070Spatrick // Marks the function's arguments and/or return value tainted if it present in 536e5dd7070Spatrick // the list. 537*ec727ea7Spatrick if (addSourcesPre(Call, *FData, C)) 538e5dd7070Spatrick return; 539e5dd7070Spatrick 540*ec727ea7Spatrick addFiltersPre(Call, *FData, C); 541e5dd7070Spatrick } 542e5dd7070Spatrick 543*ec727ea7Spatrick void GenericTaintChecker::checkPostCall(const CallEvent &Call, 544e5dd7070Spatrick CheckerContext &C) const { 545e5dd7070Spatrick // Set the marked values as tainted. The return value only accessible from 546e5dd7070Spatrick // checkPostStmt. 547*ec727ea7Spatrick propagateFromPre(Call, C); 548e5dd7070Spatrick } 549e5dd7070Spatrick 550e5dd7070Spatrick void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, 551e5dd7070Spatrick const char *NL, const char *Sep) const { 552e5dd7070Spatrick printTaint(State, Out, NL, Sep); 553e5dd7070Spatrick } 554e5dd7070Spatrick 555*ec727ea7Spatrick bool GenericTaintChecker::addSourcesPre(const CallEvent &Call, 556e5dd7070Spatrick const FunctionData &FData, 557e5dd7070Spatrick CheckerContext &C) const { 558e5dd7070Spatrick // First, try generating a propagation rule for this function. 559e5dd7070Spatrick TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule( 560e5dd7070Spatrick this->CustomPropagations, FData, C); 561e5dd7070Spatrick if (!Rule.isNull()) { 562*ec727ea7Spatrick ProgramStateRef State = Rule.process(Call, C); 563e5dd7070Spatrick if (State) { 564e5dd7070Spatrick C.addTransition(State); 565e5dd7070Spatrick return true; 566e5dd7070Spatrick } 567e5dd7070Spatrick } 568e5dd7070Spatrick return false; 569e5dd7070Spatrick } 570e5dd7070Spatrick 571*ec727ea7Spatrick bool GenericTaintChecker::addFiltersPre(const CallEvent &Call, 572e5dd7070Spatrick const FunctionData &FData, 573e5dd7070Spatrick CheckerContext &C) const { 574e5dd7070Spatrick auto It = findFunctionInConfig(CustomFilters, FData); 575e5dd7070Spatrick if (It == CustomFilters.end()) 576e5dd7070Spatrick return false; 577e5dd7070Spatrick 578e5dd7070Spatrick ProgramStateRef State = C.getState(); 579e5dd7070Spatrick const auto &Value = It->second; 580e5dd7070Spatrick const ArgVector &Args = Value.second; 581e5dd7070Spatrick for (unsigned ArgNum : Args) { 582*ec727ea7Spatrick if (ArgNum >= Call.getNumArgs()) 583e5dd7070Spatrick continue; 584e5dd7070Spatrick 585*ec727ea7Spatrick const Expr *Arg = Call.getArgExpr(ArgNum); 586*ec727ea7Spatrick Optional<SVal> V = getPointeeOf(C, Arg); 587e5dd7070Spatrick if (V) 588e5dd7070Spatrick State = removeTaint(State, *V); 589e5dd7070Spatrick } 590e5dd7070Spatrick 591e5dd7070Spatrick if (State != C.getState()) { 592e5dd7070Spatrick C.addTransition(State); 593e5dd7070Spatrick return true; 594e5dd7070Spatrick } 595e5dd7070Spatrick return false; 596e5dd7070Spatrick } 597e5dd7070Spatrick 598*ec727ea7Spatrick bool GenericTaintChecker::propagateFromPre(const CallEvent &Call, 599*ec727ea7Spatrick CheckerContext &C) { 600e5dd7070Spatrick ProgramStateRef State = C.getState(); 601e5dd7070Spatrick 602e5dd7070Spatrick // Depending on what was tainted at pre-visit, we determined a set of 603e5dd7070Spatrick // arguments which should be tainted after the function returns. These are 604e5dd7070Spatrick // stored in the state as TaintArgsOnPostVisit set. 605e5dd7070Spatrick TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 606e5dd7070Spatrick if (TaintArgs.isEmpty()) 607e5dd7070Spatrick return false; 608e5dd7070Spatrick 609e5dd7070Spatrick for (unsigned ArgNum : TaintArgs) { 610e5dd7070Spatrick // Special handling for the tainted return value. 611e5dd7070Spatrick if (ArgNum == ReturnValueIndex) { 612*ec727ea7Spatrick State = addTaint(State, Call.getReturnValue()); 613e5dd7070Spatrick continue; 614e5dd7070Spatrick } 615e5dd7070Spatrick 616e5dd7070Spatrick // The arguments are pointer arguments. The data they are pointing at is 617e5dd7070Spatrick // tainted after the call. 618*ec727ea7Spatrick if (Call.getNumArgs() < (ArgNum + 1)) 619e5dd7070Spatrick return false; 620*ec727ea7Spatrick const Expr *Arg = Call.getArgExpr(ArgNum); 621*ec727ea7Spatrick Optional<SVal> V = getPointeeOf(C, Arg); 622e5dd7070Spatrick if (V) 623e5dd7070Spatrick State = addTaint(State, *V); 624e5dd7070Spatrick } 625e5dd7070Spatrick 626e5dd7070Spatrick // Clear up the taint info from the state. 627e5dd7070Spatrick State = State->remove<TaintArgsOnPostVisit>(); 628e5dd7070Spatrick 629e5dd7070Spatrick if (State != C.getState()) { 630e5dd7070Spatrick C.addTransition(State); 631e5dd7070Spatrick return true; 632e5dd7070Spatrick } 633e5dd7070Spatrick return false; 634e5dd7070Spatrick } 635e5dd7070Spatrick 636*ec727ea7Spatrick bool GenericTaintChecker::checkPre(const CallEvent &Call, 637e5dd7070Spatrick const FunctionData &FData, 638e5dd7070Spatrick CheckerContext &C) const { 639*ec727ea7Spatrick if (checkUncontrolledFormatString(Call, C)) 640e5dd7070Spatrick return true; 641e5dd7070Spatrick 642*ec727ea7Spatrick if (checkSystemCall(Call, FData.Name, C)) 643e5dd7070Spatrick return true; 644e5dd7070Spatrick 645*ec727ea7Spatrick if (checkTaintedBufferSize(Call, C)) 646e5dd7070Spatrick return true; 647e5dd7070Spatrick 648*ec727ea7Spatrick return checkCustomSinks(Call, FData, C); 649e5dd7070Spatrick } 650e5dd7070Spatrick 651*ec727ea7Spatrick Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C, 652e5dd7070Spatrick const Expr *Arg) { 653e5dd7070Spatrick ProgramStateRef State = C.getState(); 654e5dd7070Spatrick SVal AddrVal = C.getSVal(Arg->IgnoreParens()); 655e5dd7070Spatrick if (AddrVal.isUnknownOrUndef()) 656e5dd7070Spatrick return None; 657e5dd7070Spatrick 658e5dd7070Spatrick Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 659e5dd7070Spatrick if (!AddrLoc) 660e5dd7070Spatrick return None; 661e5dd7070Spatrick 662e5dd7070Spatrick QualType ArgTy = Arg->getType().getCanonicalType(); 663e5dd7070Spatrick if (!ArgTy->isPointerType()) 664e5dd7070Spatrick return State->getSVal(*AddrLoc); 665e5dd7070Spatrick 666e5dd7070Spatrick QualType ValTy = ArgTy->getPointeeType(); 667e5dd7070Spatrick 668e5dd7070Spatrick // Do not dereference void pointers. Treat them as byte pointers instead. 669e5dd7070Spatrick // FIXME: we might want to consider more than just the first byte. 670e5dd7070Spatrick if (ValTy->isVoidType()) 671e5dd7070Spatrick ValTy = C.getASTContext().CharTy; 672e5dd7070Spatrick 673e5dd7070Spatrick return State->getSVal(*AddrLoc, ValTy); 674e5dd7070Spatrick } 675e5dd7070Spatrick 676e5dd7070Spatrick ProgramStateRef 677*ec727ea7Spatrick GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call, 678e5dd7070Spatrick CheckerContext &C) const { 679e5dd7070Spatrick ProgramStateRef State = C.getState(); 680e5dd7070Spatrick 681e5dd7070Spatrick // Check for taint in arguments. 682e5dd7070Spatrick bool IsTainted = true; 683e5dd7070Spatrick for (unsigned ArgNum : SrcArgs) { 684*ec727ea7Spatrick if (ArgNum >= Call.getNumArgs()) 685e5dd7070Spatrick continue; 686e5dd7070Spatrick 687*ec727ea7Spatrick if ((IsTainted = 688*ec727ea7Spatrick isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C))) 689e5dd7070Spatrick break; 690e5dd7070Spatrick } 691e5dd7070Spatrick 692e5dd7070Spatrick // Check for taint in variadic arguments. 693e5dd7070Spatrick if (!IsTainted && VariadicType::Src == VarType) { 694e5dd7070Spatrick // Check if any of the arguments is tainted 695*ec727ea7Spatrick for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) { 696*ec727ea7Spatrick if ((IsTainted = 697*ec727ea7Spatrick isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C))) 698e5dd7070Spatrick break; 699e5dd7070Spatrick } 700e5dd7070Spatrick } 701e5dd7070Spatrick 702e5dd7070Spatrick if (PropagationFunc) 703*ec727ea7Spatrick IsTainted = PropagationFunc(IsTainted, Call, C); 704e5dd7070Spatrick 705e5dd7070Spatrick if (!IsTainted) 706e5dd7070Spatrick return State; 707e5dd7070Spatrick 708e5dd7070Spatrick // Mark the arguments which should be tainted after the function returns. 709e5dd7070Spatrick for (unsigned ArgNum : DstArgs) { 710e5dd7070Spatrick // Should mark the return value? 711e5dd7070Spatrick if (ArgNum == ReturnValueIndex) { 712e5dd7070Spatrick State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 713e5dd7070Spatrick continue; 714e5dd7070Spatrick } 715e5dd7070Spatrick 716*ec727ea7Spatrick if (ArgNum >= Call.getNumArgs()) 717e5dd7070Spatrick continue; 718e5dd7070Spatrick 719e5dd7070Spatrick // Mark the given argument. 720e5dd7070Spatrick State = State->add<TaintArgsOnPostVisit>(ArgNum); 721e5dd7070Spatrick } 722e5dd7070Spatrick 723e5dd7070Spatrick // Mark all variadic arguments tainted if present. 724e5dd7070Spatrick if (VariadicType::Dst == VarType) { 725e5dd7070Spatrick // For all pointer and references that were passed in: 726e5dd7070Spatrick // If they are not pointing to const data, mark data as tainted. 727e5dd7070Spatrick // TODO: So far we are just going one level down; ideally we'd need to 728e5dd7070Spatrick // recurse here. 729*ec727ea7Spatrick for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) { 730*ec727ea7Spatrick const Expr *Arg = Call.getArgExpr(i); 731e5dd7070Spatrick // Process pointer argument. 732e5dd7070Spatrick const Type *ArgTy = Arg->getType().getTypePtr(); 733e5dd7070Spatrick QualType PType = ArgTy->getPointeeType(); 734e5dd7070Spatrick if ((!PType.isNull() && !PType.isConstQualified()) || 735*ec727ea7Spatrick (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) { 736e5dd7070Spatrick State = State->add<TaintArgsOnPostVisit>(i); 737e5dd7070Spatrick } 738e5dd7070Spatrick } 739*ec727ea7Spatrick } 740e5dd7070Spatrick 741e5dd7070Spatrick return State; 742e5dd7070Spatrick } 743e5dd7070Spatrick 744e5dd7070Spatrick // If argument 0(protocol domain) is network, the return value should get taint. 745*ec727ea7Spatrick bool GenericTaintChecker::TaintPropagationRule::postSocket( 746*ec727ea7Spatrick bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) { 747*ec727ea7Spatrick SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc(); 748e5dd7070Spatrick StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 749e5dd7070Spatrick // White list the internal communication protocols. 750e5dd7070Spatrick if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 751e5dd7070Spatrick DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 752e5dd7070Spatrick return false; 753e5dd7070Spatrick return true; 754e5dd7070Spatrick } 755e5dd7070Spatrick 756e5dd7070Spatrick bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 757e5dd7070Spatrick ProgramStateRef State = C.getState(); 758e5dd7070Spatrick SVal Val = C.getSVal(E); 759e5dd7070Spatrick 760e5dd7070Spatrick // stdin is a pointer, so it would be a region. 761e5dd7070Spatrick const MemRegion *MemReg = Val.getAsRegion(); 762e5dd7070Spatrick 763e5dd7070Spatrick // The region should be symbolic, we do not know it's value. 764*ec727ea7Spatrick const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 765e5dd7070Spatrick if (!SymReg) 766e5dd7070Spatrick return false; 767e5dd7070Spatrick 768e5dd7070Spatrick // Get it's symbol and find the declaration region it's pointing to. 769*ec727ea7Spatrick const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 770e5dd7070Spatrick if (!Sm) 771e5dd7070Spatrick return false; 772*ec727ea7Spatrick const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 773e5dd7070Spatrick if (!DeclReg) 774e5dd7070Spatrick return false; 775e5dd7070Spatrick 776e5dd7070Spatrick // This region corresponds to a declaration, find out if it's a global/extern 777e5dd7070Spatrick // variable named stdin with the proper type. 778e5dd7070Spatrick if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 779e5dd7070Spatrick D = D->getCanonicalDecl(); 780e5dd7070Spatrick if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) { 781e5dd7070Spatrick const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr()); 782e5dd7070Spatrick if (PtrTy && PtrTy->getPointeeType().getCanonicalType() == 783e5dd7070Spatrick C.getASTContext().getFILEType().getCanonicalType()) 784e5dd7070Spatrick return true; 785e5dd7070Spatrick } 786e5dd7070Spatrick } 787e5dd7070Spatrick return false; 788e5dd7070Spatrick } 789e5dd7070Spatrick 790*ec727ea7Spatrick static bool getPrintfFormatArgumentNum(const CallEvent &Call, 791e5dd7070Spatrick const CheckerContext &C, 792e5dd7070Spatrick unsigned &ArgNum) { 793e5dd7070Spatrick // Find if the function contains a format string argument. 794e5dd7070Spatrick // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 795e5dd7070Spatrick // vsnprintf, syslog, custom annotated functions. 796*ec727ea7Spatrick const FunctionDecl *FDecl = Call.getDecl()->getAsFunction(); 797e5dd7070Spatrick if (!FDecl) 798e5dd7070Spatrick return false; 799e5dd7070Spatrick for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 800e5dd7070Spatrick ArgNum = Format->getFormatIdx() - 1; 801*ec727ea7Spatrick if ((Format->getType()->getName() == "printf") && 802*ec727ea7Spatrick Call.getNumArgs() > ArgNum) 803e5dd7070Spatrick return true; 804e5dd7070Spatrick } 805e5dd7070Spatrick 806e5dd7070Spatrick // Or if a function is named setproctitle (this is a heuristic). 807*ec727ea7Spatrick if (C.getCalleeName(FDecl).find("setproctitle") != StringRef::npos) { 808e5dd7070Spatrick ArgNum = 0; 809e5dd7070Spatrick return true; 810e5dd7070Spatrick } 811e5dd7070Spatrick 812e5dd7070Spatrick return false; 813e5dd7070Spatrick } 814e5dd7070Spatrick 815e5dd7070Spatrick bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, 816e5dd7070Spatrick CheckerContext &C) const { 817e5dd7070Spatrick assert(E); 818e5dd7070Spatrick 819e5dd7070Spatrick // Check for taint. 820e5dd7070Spatrick ProgramStateRef State = C.getState(); 821*ec727ea7Spatrick Optional<SVal> PointedToSVal = getPointeeOf(C, E); 822e5dd7070Spatrick SVal TaintedSVal; 823e5dd7070Spatrick if (PointedToSVal && isTainted(State, *PointedToSVal)) 824e5dd7070Spatrick TaintedSVal = *PointedToSVal; 825e5dd7070Spatrick else if (isTainted(State, E, C.getLocationContext())) 826e5dd7070Spatrick TaintedSVal = C.getSVal(E); 827e5dd7070Spatrick else 828e5dd7070Spatrick return false; 829e5dd7070Spatrick 830e5dd7070Spatrick // Generate diagnostic. 831e5dd7070Spatrick if (ExplodedNode *N = C.generateNonFatalErrorNode()) { 832e5dd7070Spatrick initBugType(); 833e5dd7070Spatrick auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N); 834e5dd7070Spatrick report->addRange(E->getSourceRange()); 835e5dd7070Spatrick report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal)); 836e5dd7070Spatrick C.emitReport(std::move(report)); 837e5dd7070Spatrick return true; 838e5dd7070Spatrick } 839e5dd7070Spatrick return false; 840e5dd7070Spatrick } 841e5dd7070Spatrick 842e5dd7070Spatrick bool GenericTaintChecker::checkUncontrolledFormatString( 843*ec727ea7Spatrick const CallEvent &Call, CheckerContext &C) const { 844e5dd7070Spatrick // Check if the function contains a format string argument. 845e5dd7070Spatrick unsigned ArgNum = 0; 846*ec727ea7Spatrick if (!getPrintfFormatArgumentNum(Call, C, ArgNum)) 847e5dd7070Spatrick return false; 848e5dd7070Spatrick 849e5dd7070Spatrick // If either the format string content or the pointer itself are tainted, 850e5dd7070Spatrick // warn. 851*ec727ea7Spatrick return generateReportIfTainted(Call.getArgExpr(ArgNum), 852e5dd7070Spatrick MsgUncontrolledFormatString, C); 853e5dd7070Spatrick } 854e5dd7070Spatrick 855*ec727ea7Spatrick bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name, 856e5dd7070Spatrick CheckerContext &C) const { 857e5dd7070Spatrick // TODO: It might make sense to run this check on demand. In some cases, 858e5dd7070Spatrick // we should check if the environment has been cleansed here. We also might 859e5dd7070Spatrick // need to know if the user was reset before these calls(seteuid). 860e5dd7070Spatrick unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 861e5dd7070Spatrick .Case("system", 0) 862e5dd7070Spatrick .Case("popen", 0) 863e5dd7070Spatrick .Case("execl", 0) 864e5dd7070Spatrick .Case("execle", 0) 865e5dd7070Spatrick .Case("execlp", 0) 866e5dd7070Spatrick .Case("execv", 0) 867e5dd7070Spatrick .Case("execvp", 0) 868e5dd7070Spatrick .Case("execvP", 0) 869e5dd7070Spatrick .Case("execve", 0) 870e5dd7070Spatrick .Case("dlopen", 0) 871e5dd7070Spatrick .Default(InvalidArgIndex); 872e5dd7070Spatrick 873*ec727ea7Spatrick if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1)) 874e5dd7070Spatrick return false; 875e5dd7070Spatrick 876*ec727ea7Spatrick return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs, 877*ec727ea7Spatrick C); 878e5dd7070Spatrick } 879e5dd7070Spatrick 880e5dd7070Spatrick // TODO: Should this check be a part of the CString checker? 881e5dd7070Spatrick // If yes, should taint be a global setting? 882*ec727ea7Spatrick bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call, 883e5dd7070Spatrick CheckerContext &C) const { 884*ec727ea7Spatrick const auto *FDecl = Call.getDecl()->getAsFunction(); 885e5dd7070Spatrick // If the function has a buffer size argument, set ArgNum. 886e5dd7070Spatrick unsigned ArgNum = InvalidArgIndex; 887e5dd7070Spatrick unsigned BId = 0; 888*ec727ea7Spatrick if ((BId = FDecl->getMemoryFunctionKind())) { 889e5dd7070Spatrick switch (BId) { 890e5dd7070Spatrick case Builtin::BImemcpy: 891e5dd7070Spatrick case Builtin::BImemmove: 892e5dd7070Spatrick case Builtin::BIstrncpy: 893e5dd7070Spatrick ArgNum = 2; 894e5dd7070Spatrick break; 895e5dd7070Spatrick case Builtin::BIstrndup: 896e5dd7070Spatrick ArgNum = 1; 897e5dd7070Spatrick break; 898e5dd7070Spatrick default: 899e5dd7070Spatrick break; 900*ec727ea7Spatrick } 901*ec727ea7Spatrick } 902e5dd7070Spatrick 903e5dd7070Spatrick if (ArgNum == InvalidArgIndex) { 904*ec727ea7Spatrick using CCtx = CheckerContext; 905*ec727ea7Spatrick if (CCtx::isCLibraryFunction(FDecl, "malloc") || 906*ec727ea7Spatrick CCtx::isCLibraryFunction(FDecl, "calloc") || 907*ec727ea7Spatrick CCtx::isCLibraryFunction(FDecl, "alloca")) 908e5dd7070Spatrick ArgNum = 0; 909*ec727ea7Spatrick else if (CCtx::isCLibraryFunction(FDecl, "memccpy")) 910e5dd7070Spatrick ArgNum = 3; 911*ec727ea7Spatrick else if (CCtx::isCLibraryFunction(FDecl, "realloc")) 912e5dd7070Spatrick ArgNum = 1; 913*ec727ea7Spatrick else if (CCtx::isCLibraryFunction(FDecl, "bcopy")) 914e5dd7070Spatrick ArgNum = 2; 915e5dd7070Spatrick } 916e5dd7070Spatrick 917*ec727ea7Spatrick return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum && 918*ec727ea7Spatrick generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize, 919*ec727ea7Spatrick C); 920e5dd7070Spatrick } 921e5dd7070Spatrick 922*ec727ea7Spatrick bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call, 923e5dd7070Spatrick const FunctionData &FData, 924e5dd7070Spatrick CheckerContext &C) const { 925e5dd7070Spatrick auto It = findFunctionInConfig(CustomSinks, FData); 926e5dd7070Spatrick if (It == CustomSinks.end()) 927e5dd7070Spatrick return false; 928e5dd7070Spatrick 929e5dd7070Spatrick const auto &Value = It->second; 930e5dd7070Spatrick const GenericTaintChecker::ArgVector &Args = Value.second; 931e5dd7070Spatrick for (unsigned ArgNum : Args) { 932*ec727ea7Spatrick if (ArgNum >= Call.getNumArgs()) 933e5dd7070Spatrick continue; 934e5dd7070Spatrick 935*ec727ea7Spatrick if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C)) 936e5dd7070Spatrick return true; 937e5dd7070Spatrick } 938e5dd7070Spatrick 939e5dd7070Spatrick return false; 940e5dd7070Spatrick } 941e5dd7070Spatrick 942e5dd7070Spatrick void ento::registerGenericTaintChecker(CheckerManager &Mgr) { 943e5dd7070Spatrick auto *Checker = Mgr.registerChecker<GenericTaintChecker>(); 944e5dd7070Spatrick std::string Option{"Config"}; 945e5dd7070Spatrick StringRef ConfigFile = 946e5dd7070Spatrick Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option); 947e5dd7070Spatrick llvm::Optional<TaintConfig> Config = 948e5dd7070Spatrick getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile); 949e5dd7070Spatrick if (Config) 950e5dd7070Spatrick Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue())); 951e5dd7070Spatrick } 952e5dd7070Spatrick 953*ec727ea7Spatrick bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) { 954e5dd7070Spatrick return true; 955e5dd7070Spatrick } 956