xref: /openbsd-src/gnu/llvm/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (revision ec727ea710c91afd8ce4f788c5aaa8482b7b69b2)
1e5dd7070Spatrick //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2e5dd7070Spatrick //
3e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information.
5e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e5dd7070Spatrick //
7e5dd7070Spatrick //===----------------------------------------------------------------------===//
8e5dd7070Spatrick //
9e5dd7070Spatrick // This checker defines the attack surface for generic taint propagation.
10e5dd7070Spatrick //
11e5dd7070Spatrick // The taint information produced by it might be useful to other checkers. For
12e5dd7070Spatrick // example, checkers should report errors which involve tainted data more
13e5dd7070Spatrick // aggressively, even if the involved symbols are under constrained.
14e5dd7070Spatrick //
15e5dd7070Spatrick //===----------------------------------------------------------------------===//
16e5dd7070Spatrick 
17e5dd7070Spatrick #include "Taint.h"
18e5dd7070Spatrick #include "Yaml.h"
19e5dd7070Spatrick #include "clang/AST/Attr.h"
20e5dd7070Spatrick #include "clang/Basic/Builtins.h"
21e5dd7070Spatrick #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
22e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/Checker.h"
24e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/CheckerManager.h"
25*ec727ea7Spatrick #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
26e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
27e5dd7070Spatrick #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
28e5dd7070Spatrick #include "llvm/Support/YAMLTraits.h"
29*ec727ea7Spatrick 
30e5dd7070Spatrick #include <algorithm>
31e5dd7070Spatrick #include <limits>
32*ec727ea7Spatrick #include <memory>
33e5dd7070Spatrick #include <unordered_map>
34e5dd7070Spatrick #include <utility>
35e5dd7070Spatrick 
36e5dd7070Spatrick using namespace clang;
37e5dd7070Spatrick using namespace ento;
38e5dd7070Spatrick using namespace taint;
39e5dd7070Spatrick 
40e5dd7070Spatrick namespace {
41*ec727ea7Spatrick class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
42e5dd7070Spatrick public:
43e5dd7070Spatrick   static void *getTag() {
44e5dd7070Spatrick     static int Tag;
45e5dd7070Spatrick     return &Tag;
46e5dd7070Spatrick   }
47e5dd7070Spatrick 
48*ec727ea7Spatrick   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
49*ec727ea7Spatrick   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
50e5dd7070Spatrick 
51e5dd7070Spatrick   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
52e5dd7070Spatrick                   const char *Sep) const override;
53e5dd7070Spatrick 
54e5dd7070Spatrick   using ArgVector = SmallVector<unsigned, 2>;
55e5dd7070Spatrick   using SignedArgVector = SmallVector<int, 2>;
56e5dd7070Spatrick 
57e5dd7070Spatrick   enum class VariadicType { None, Src, Dst };
58e5dd7070Spatrick 
59e5dd7070Spatrick   /// Used to parse the configuration file.
60e5dd7070Spatrick   struct TaintConfiguration {
61e5dd7070Spatrick     using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
62e5dd7070Spatrick 
63e5dd7070Spatrick     struct Propagation {
64e5dd7070Spatrick       std::string Name;
65e5dd7070Spatrick       std::string Scope;
66e5dd7070Spatrick       ArgVector SrcArgs;
67e5dd7070Spatrick       SignedArgVector DstArgs;
68e5dd7070Spatrick       VariadicType VarType;
69e5dd7070Spatrick       unsigned VarIndex;
70e5dd7070Spatrick     };
71e5dd7070Spatrick 
72e5dd7070Spatrick     std::vector<Propagation> Propagations;
73e5dd7070Spatrick     std::vector<NameScopeArgs> Filters;
74e5dd7070Spatrick     std::vector<NameScopeArgs> Sinks;
75e5dd7070Spatrick 
76e5dd7070Spatrick     TaintConfiguration() = default;
77e5dd7070Spatrick     TaintConfiguration(const TaintConfiguration &) = default;
78e5dd7070Spatrick     TaintConfiguration(TaintConfiguration &&) = default;
79e5dd7070Spatrick     TaintConfiguration &operator=(const TaintConfiguration &) = default;
80e5dd7070Spatrick     TaintConfiguration &operator=(TaintConfiguration &&) = default;
81e5dd7070Spatrick   };
82e5dd7070Spatrick 
83e5dd7070Spatrick   /// Convert SignedArgVector to ArgVector.
84e5dd7070Spatrick   ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
85*ec727ea7Spatrick                                const SignedArgVector &Args);
86e5dd7070Spatrick 
87e5dd7070Spatrick   /// Parse the config.
88e5dd7070Spatrick   void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
89e5dd7070Spatrick                           TaintConfiguration &&Config);
90e5dd7070Spatrick 
91e5dd7070Spatrick   static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
92e5dd7070Spatrick   /// Denotes the return vale.
93e5dd7070Spatrick   static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
94e5dd7070Spatrick                                          1};
95e5dd7070Spatrick 
96e5dd7070Spatrick private:
97e5dd7070Spatrick   mutable std::unique_ptr<BugType> BT;
98e5dd7070Spatrick   void initBugType() const {
99e5dd7070Spatrick     if (!BT)
100*ec727ea7Spatrick       BT = std::make_unique<BugType>(this, "Use of Untrusted Data",
101*ec727ea7Spatrick                                      "Untrusted Data");
102e5dd7070Spatrick   }
103e5dd7070Spatrick 
104e5dd7070Spatrick   struct FunctionData {
105e5dd7070Spatrick     FunctionData() = delete;
106e5dd7070Spatrick     FunctionData(const FunctionData &) = default;
107e5dd7070Spatrick     FunctionData(FunctionData &&) = default;
108e5dd7070Spatrick     FunctionData &operator=(const FunctionData &) = delete;
109e5dd7070Spatrick     FunctionData &operator=(FunctionData &&) = delete;
110e5dd7070Spatrick 
111*ec727ea7Spatrick     static Optional<FunctionData> create(const CallEvent &Call,
112e5dd7070Spatrick                                          const CheckerContext &C) {
113*ec727ea7Spatrick       if (!Call.getDecl())
114*ec727ea7Spatrick         return None;
115*ec727ea7Spatrick 
116*ec727ea7Spatrick       const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
117e5dd7070Spatrick       if (!FDecl || (FDecl->getKind() != Decl::Function &&
118e5dd7070Spatrick                      FDecl->getKind() != Decl::CXXMethod))
119e5dd7070Spatrick         return None;
120e5dd7070Spatrick 
121e5dd7070Spatrick       StringRef Name = C.getCalleeName(FDecl);
122e5dd7070Spatrick       std::string FullName = FDecl->getQualifiedNameAsString();
123e5dd7070Spatrick       if (Name.empty() || FullName.empty())
124e5dd7070Spatrick         return None;
125e5dd7070Spatrick 
126e5dd7070Spatrick       return FunctionData{FDecl, Name, FullName};
127e5dd7070Spatrick     }
128e5dd7070Spatrick 
129e5dd7070Spatrick     bool isInScope(StringRef Scope) const {
130e5dd7070Spatrick       return StringRef(FullName).startswith(Scope);
131e5dd7070Spatrick     }
132e5dd7070Spatrick 
133e5dd7070Spatrick     const FunctionDecl *const FDecl;
134e5dd7070Spatrick     const StringRef Name;
135e5dd7070Spatrick     const std::string FullName;
136e5dd7070Spatrick   };
137e5dd7070Spatrick 
138e5dd7070Spatrick   /// Catch taint related bugs. Check if tainted data is passed to a
139e5dd7070Spatrick   /// system call etc. Returns true on matching.
140*ec727ea7Spatrick   bool checkPre(const CallEvent &Call, const FunctionData &FData,
141e5dd7070Spatrick                 CheckerContext &C) const;
142e5dd7070Spatrick 
143e5dd7070Spatrick   /// Add taint sources on a pre-visit. Returns true on matching.
144*ec727ea7Spatrick   bool addSourcesPre(const CallEvent &Call, const FunctionData &FData,
145e5dd7070Spatrick                      CheckerContext &C) const;
146e5dd7070Spatrick 
147e5dd7070Spatrick   /// Mark filter's arguments not tainted on a pre-visit. Returns true on
148e5dd7070Spatrick   /// matching.
149*ec727ea7Spatrick   bool addFiltersPre(const CallEvent &Call, const FunctionData &FData,
150e5dd7070Spatrick                      CheckerContext &C) const;
151e5dd7070Spatrick 
152e5dd7070Spatrick   /// Propagate taint generated at pre-visit. Returns true on matching.
153*ec727ea7Spatrick   static bool propagateFromPre(const CallEvent &Call, CheckerContext &C);
154e5dd7070Spatrick 
155e5dd7070Spatrick   /// Check if the region the expression evaluates to is the standard input,
156e5dd7070Spatrick   /// and thus, is tainted.
157e5dd7070Spatrick   static bool isStdin(const Expr *E, CheckerContext &C);
158e5dd7070Spatrick 
159e5dd7070Spatrick   /// Given a pointer argument, return the value it points to.
160*ec727ea7Spatrick   static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg);
161e5dd7070Spatrick 
162e5dd7070Spatrick   /// Check for CWE-134: Uncontrolled Format String.
163e5dd7070Spatrick   static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
164e5dd7070Spatrick       "Untrusted data is used as a format string "
165e5dd7070Spatrick       "(CWE-134: Uncontrolled Format String)";
166*ec727ea7Spatrick   bool checkUncontrolledFormatString(const CallEvent &Call,
167e5dd7070Spatrick                                      CheckerContext &C) const;
168e5dd7070Spatrick 
169e5dd7070Spatrick   /// Check for:
170e5dd7070Spatrick   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
171e5dd7070Spatrick   /// CWE-78, "Failure to Sanitize Data into an OS Command"
172e5dd7070Spatrick   static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
173e5dd7070Spatrick       "Untrusted data is passed to a system call "
174e5dd7070Spatrick       "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
175*ec727ea7Spatrick   bool checkSystemCall(const CallEvent &Call, StringRef Name,
176e5dd7070Spatrick                        CheckerContext &C) const;
177e5dd7070Spatrick 
178e5dd7070Spatrick   /// Check if tainted data is used as a buffer size ins strn.. functions,
179e5dd7070Spatrick   /// and allocators.
180e5dd7070Spatrick   static constexpr llvm::StringLiteral MsgTaintedBufferSize =
181e5dd7070Spatrick       "Untrusted data is used to specify the buffer size "
182e5dd7070Spatrick       "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
183e5dd7070Spatrick       "for character data and the null terminator)";
184*ec727ea7Spatrick   bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const;
185e5dd7070Spatrick 
186e5dd7070Spatrick   /// Check if tainted data is used as a custom sink's parameter.
187e5dd7070Spatrick   static constexpr llvm::StringLiteral MsgCustomSink =
188e5dd7070Spatrick       "Untrusted data is passed to a user-defined sink";
189*ec727ea7Spatrick   bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData,
190e5dd7070Spatrick                         CheckerContext &C) const;
191e5dd7070Spatrick 
192e5dd7070Spatrick   /// Generate a report if the expression is tainted or points to tainted data.
193e5dd7070Spatrick   bool generateReportIfTainted(const Expr *E, StringRef Msg,
194e5dd7070Spatrick                                CheckerContext &C) const;
195e5dd7070Spatrick 
196e5dd7070Spatrick   struct TaintPropagationRule;
197e5dd7070Spatrick   template <typename T>
198e5dd7070Spatrick   using ConfigDataMap =
199e5dd7070Spatrick       std::unordered_multimap<std::string, std::pair<std::string, T>>;
200e5dd7070Spatrick   using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
201e5dd7070Spatrick   using NameArgMap = ConfigDataMap<ArgVector>;
202e5dd7070Spatrick 
203e5dd7070Spatrick   /// Find a function with the given name and scope. Returns the first match
204e5dd7070Spatrick   /// or the end of the map.
205e5dd7070Spatrick   template <typename T>
206e5dd7070Spatrick   static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
207e5dd7070Spatrick                                    const FunctionData &FData);
208e5dd7070Spatrick 
209e5dd7070Spatrick   /// A struct used to specify taint propagation rules for a function.
210e5dd7070Spatrick   ///
211e5dd7070Spatrick   /// If any of the possible taint source arguments is tainted, all of the
212e5dd7070Spatrick   /// destination arguments should also be tainted. Use InvalidArgIndex in the
213e5dd7070Spatrick   /// src list to specify that all of the arguments can introduce taint. Use
214e5dd7070Spatrick   /// InvalidArgIndex in the dst arguments to signify that all the non-const
215e5dd7070Spatrick   /// pointer and reference arguments might be tainted on return. If
216e5dd7070Spatrick   /// ReturnValueIndex is added to the dst list, the return value will be
217e5dd7070Spatrick   /// tainted.
218e5dd7070Spatrick   struct TaintPropagationRule {
219*ec727ea7Spatrick     using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call,
220e5dd7070Spatrick                                          CheckerContext &C);
221e5dd7070Spatrick 
222e5dd7070Spatrick     /// List of arguments which can be taint sources and should be checked.
223e5dd7070Spatrick     ArgVector SrcArgs;
224e5dd7070Spatrick     /// List of arguments which should be tainted on function return.
225e5dd7070Spatrick     ArgVector DstArgs;
226e5dd7070Spatrick     /// Index for the first variadic parameter if exist.
227e5dd7070Spatrick     unsigned VariadicIndex;
228e5dd7070Spatrick     /// Show when a function has variadic parameters. If it has, it marks all
229e5dd7070Spatrick     /// of them as source or destination.
230e5dd7070Spatrick     VariadicType VarType;
231e5dd7070Spatrick     /// Special function for tainted source determination. If defined, it can
232e5dd7070Spatrick     /// override the default behavior.
233e5dd7070Spatrick     PropagationFuncType PropagationFunc;
234e5dd7070Spatrick 
235e5dd7070Spatrick     TaintPropagationRule()
236e5dd7070Spatrick         : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
237e5dd7070Spatrick           PropagationFunc(nullptr) {}
238e5dd7070Spatrick 
239e5dd7070Spatrick     TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
240e5dd7070Spatrick                          VariadicType Var = VariadicType::None,
241e5dd7070Spatrick                          unsigned VarIndex = InvalidArgIndex,
242e5dd7070Spatrick                          PropagationFuncType Func = nullptr)
243e5dd7070Spatrick         : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
244e5dd7070Spatrick           VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
245e5dd7070Spatrick 
246e5dd7070Spatrick     /// Get the propagation rule for a given function.
247e5dd7070Spatrick     static TaintPropagationRule
248e5dd7070Spatrick     getTaintPropagationRule(const NameRuleMap &CustomPropagations,
249e5dd7070Spatrick                             const FunctionData &FData, CheckerContext &C);
250e5dd7070Spatrick 
251e5dd7070Spatrick     void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
252e5dd7070Spatrick     void addDstArg(unsigned A) { DstArgs.push_back(A); }
253e5dd7070Spatrick 
254e5dd7070Spatrick     bool isNull() const {
255e5dd7070Spatrick       return SrcArgs.empty() && DstArgs.empty() &&
256e5dd7070Spatrick              VariadicType::None == VarType;
257e5dd7070Spatrick     }
258e5dd7070Spatrick 
259e5dd7070Spatrick     bool isDestinationArgument(unsigned ArgNum) const {
260e5dd7070Spatrick       return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
261e5dd7070Spatrick     }
262e5dd7070Spatrick 
263*ec727ea7Spatrick     static bool isTaintedOrPointsToTainted(const Expr *E,
264*ec727ea7Spatrick                                            const ProgramStateRef &State,
265e5dd7070Spatrick                                            CheckerContext &C) {
266e5dd7070Spatrick       if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
267e5dd7070Spatrick         return true;
268e5dd7070Spatrick 
269e5dd7070Spatrick       if (!E->getType().getTypePtr()->isPointerType())
270e5dd7070Spatrick         return false;
271e5dd7070Spatrick 
272*ec727ea7Spatrick       Optional<SVal> V = getPointeeOf(C, E);
273e5dd7070Spatrick       return (V && isTainted(State, *V));
274e5dd7070Spatrick     }
275e5dd7070Spatrick 
276e5dd7070Spatrick     /// Pre-process a function which propagates taint according to the
277e5dd7070Spatrick     /// taint rule.
278*ec727ea7Spatrick     ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const;
279e5dd7070Spatrick 
280e5dd7070Spatrick     // Functions for custom taintedness propagation.
281*ec727ea7Spatrick     static bool postSocket(bool IsTainted, const CallEvent &Call,
282e5dd7070Spatrick                            CheckerContext &C);
283e5dd7070Spatrick   };
284e5dd7070Spatrick 
285e5dd7070Spatrick   /// Defines a map between the propagation function's name, scope
286e5dd7070Spatrick   /// and TaintPropagationRule.
287e5dd7070Spatrick   NameRuleMap CustomPropagations;
288e5dd7070Spatrick 
289e5dd7070Spatrick   /// Defines a map between the filter function's name, scope and filtering
290e5dd7070Spatrick   /// args.
291e5dd7070Spatrick   NameArgMap CustomFilters;
292e5dd7070Spatrick 
293e5dd7070Spatrick   /// Defines a map between the sink function's name, scope and sinking args.
294e5dd7070Spatrick   NameArgMap CustomSinks;
295e5dd7070Spatrick };
296e5dd7070Spatrick 
297e5dd7070Spatrick const unsigned GenericTaintChecker::ReturnValueIndex;
298e5dd7070Spatrick const unsigned GenericTaintChecker::InvalidArgIndex;
299e5dd7070Spatrick 
300e5dd7070Spatrick // FIXME: these lines can be removed in C++17
301e5dd7070Spatrick constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
302e5dd7070Spatrick constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
303e5dd7070Spatrick constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
304e5dd7070Spatrick constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
305e5dd7070Spatrick } // end of anonymous namespace
306e5dd7070Spatrick 
307e5dd7070Spatrick using TaintConfig = GenericTaintChecker::TaintConfiguration;
308e5dd7070Spatrick 
309e5dd7070Spatrick LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
310e5dd7070Spatrick LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
311e5dd7070Spatrick 
312e5dd7070Spatrick namespace llvm {
313e5dd7070Spatrick namespace yaml {
314e5dd7070Spatrick template <> struct MappingTraits<TaintConfig> {
315e5dd7070Spatrick   static void mapping(IO &IO, TaintConfig &Config) {
316e5dd7070Spatrick     IO.mapOptional("Propagations", Config.Propagations);
317e5dd7070Spatrick     IO.mapOptional("Filters", Config.Filters);
318e5dd7070Spatrick     IO.mapOptional("Sinks", Config.Sinks);
319e5dd7070Spatrick   }
320e5dd7070Spatrick };
321e5dd7070Spatrick 
322e5dd7070Spatrick template <> struct MappingTraits<TaintConfig::Propagation> {
323e5dd7070Spatrick   static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
324e5dd7070Spatrick     IO.mapRequired("Name", Propagation.Name);
325e5dd7070Spatrick     IO.mapOptional("Scope", Propagation.Scope);
326e5dd7070Spatrick     IO.mapOptional("SrcArgs", Propagation.SrcArgs);
327e5dd7070Spatrick     IO.mapOptional("DstArgs", Propagation.DstArgs);
328e5dd7070Spatrick     IO.mapOptional("VariadicType", Propagation.VarType,
329e5dd7070Spatrick                    GenericTaintChecker::VariadicType::None);
330e5dd7070Spatrick     IO.mapOptional("VariadicIndex", Propagation.VarIndex,
331e5dd7070Spatrick                    GenericTaintChecker::InvalidArgIndex);
332e5dd7070Spatrick   }
333e5dd7070Spatrick };
334e5dd7070Spatrick 
335e5dd7070Spatrick template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
336e5dd7070Spatrick   static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
337e5dd7070Spatrick     IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
338e5dd7070Spatrick     IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
339e5dd7070Spatrick     IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
340e5dd7070Spatrick   }
341e5dd7070Spatrick };
342e5dd7070Spatrick 
343e5dd7070Spatrick template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
344e5dd7070Spatrick   static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
345e5dd7070Spatrick     IO.mapRequired("Name", std::get<0>(NSA));
346e5dd7070Spatrick     IO.mapOptional("Scope", std::get<1>(NSA));
347e5dd7070Spatrick     IO.mapRequired("Args", std::get<2>(NSA));
348e5dd7070Spatrick   }
349e5dd7070Spatrick };
350e5dd7070Spatrick } // namespace yaml
351e5dd7070Spatrick } // namespace llvm
352e5dd7070Spatrick 
353e5dd7070Spatrick /// A set which is used to pass information from call pre-visit instruction
354e5dd7070Spatrick /// to the call post-visit. The values are unsigned integers, which are either
355e5dd7070Spatrick /// ReturnValueIndex, or indexes of the pointer/reference argument, which
356e5dd7070Spatrick /// points to data, which should be tainted on return.
357e5dd7070Spatrick REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
358e5dd7070Spatrick 
359*ec727ea7Spatrick GenericTaintChecker::ArgVector
360*ec727ea7Spatrick GenericTaintChecker::convertToArgVector(CheckerManager &Mgr,
361*ec727ea7Spatrick                                         const std::string &Option,
362*ec727ea7Spatrick                                         const SignedArgVector &Args) {
363e5dd7070Spatrick   ArgVector Result;
364e5dd7070Spatrick   for (int Arg : Args) {
365e5dd7070Spatrick     if (Arg == -1)
366e5dd7070Spatrick       Result.push_back(ReturnValueIndex);
367e5dd7070Spatrick     else if (Arg < -1) {
368e5dd7070Spatrick       Result.push_back(InvalidArgIndex);
369e5dd7070Spatrick       Mgr.reportInvalidCheckerOptionValue(
370e5dd7070Spatrick           this, Option,
371e5dd7070Spatrick           "an argument number for propagation rules greater or equal to -1");
372e5dd7070Spatrick     } else
373e5dd7070Spatrick       Result.push_back(static_cast<unsigned>(Arg));
374e5dd7070Spatrick   }
375e5dd7070Spatrick   return Result;
376e5dd7070Spatrick }
377e5dd7070Spatrick 
378e5dd7070Spatrick void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
379e5dd7070Spatrick                                              const std::string &Option,
380e5dd7070Spatrick                                              TaintConfiguration &&Config) {
381e5dd7070Spatrick   for (auto &P : Config.Propagations) {
382e5dd7070Spatrick     GenericTaintChecker::CustomPropagations.emplace(
383e5dd7070Spatrick         P.Name,
384e5dd7070Spatrick         std::make_pair(P.Scope, TaintPropagationRule{
385e5dd7070Spatrick                                     std::move(P.SrcArgs),
386e5dd7070Spatrick                                     convertToArgVector(Mgr, Option, P.DstArgs),
387e5dd7070Spatrick                                     P.VarType, P.VarIndex}));
388e5dd7070Spatrick   }
389e5dd7070Spatrick 
390e5dd7070Spatrick   for (auto &F : Config.Filters) {
391e5dd7070Spatrick     GenericTaintChecker::CustomFilters.emplace(
392e5dd7070Spatrick         std::get<0>(F),
393e5dd7070Spatrick         std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
394e5dd7070Spatrick   }
395e5dd7070Spatrick 
396e5dd7070Spatrick   for (auto &S : Config.Sinks) {
397e5dd7070Spatrick     GenericTaintChecker::CustomSinks.emplace(
398e5dd7070Spatrick         std::get<0>(S),
399e5dd7070Spatrick         std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
400e5dd7070Spatrick   }
401e5dd7070Spatrick }
402e5dd7070Spatrick 
403e5dd7070Spatrick template <typename T>
404e5dd7070Spatrick auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
405e5dd7070Spatrick                                                const FunctionData &FData) {
406*ec727ea7Spatrick   auto Range = Map.equal_range(std::string(FData.Name));
407e5dd7070Spatrick   auto It =
408e5dd7070Spatrick       std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
409e5dd7070Spatrick         const auto &Value = Entry.second;
410e5dd7070Spatrick         StringRef Scope = Value.first;
411e5dd7070Spatrick         return Scope.empty() || FData.isInScope(Scope);
412e5dd7070Spatrick       });
413e5dd7070Spatrick   return It != Range.second ? It : Map.end();
414e5dd7070Spatrick }
415e5dd7070Spatrick 
416e5dd7070Spatrick GenericTaintChecker::TaintPropagationRule
417e5dd7070Spatrick GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
418e5dd7070Spatrick     const NameRuleMap &CustomPropagations, const FunctionData &FData,
419e5dd7070Spatrick     CheckerContext &C) {
420e5dd7070Spatrick   // TODO: Currently, we might lose precision here: we always mark a return
421e5dd7070Spatrick   // value as tainted even if it's just a pointer, pointing to tainted data.
422e5dd7070Spatrick 
423e5dd7070Spatrick   // Check for exact name match for functions without builtin substitutes.
424e5dd7070Spatrick   // Use qualified name, because these are C functions without namespace.
425e5dd7070Spatrick   TaintPropagationRule Rule =
426e5dd7070Spatrick       llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
427e5dd7070Spatrick           // Source functions
428e5dd7070Spatrick           // TODO: Add support for vfscanf & family.
429*ec727ea7Spatrick           .Case("fdopen", {{}, {ReturnValueIndex}})
430*ec727ea7Spatrick           .Case("fopen", {{}, {ReturnValueIndex}})
431*ec727ea7Spatrick           .Case("freopen", {{}, {ReturnValueIndex}})
432*ec727ea7Spatrick           .Case("getch", {{}, {ReturnValueIndex}})
433*ec727ea7Spatrick           .Case("getchar", {{}, {ReturnValueIndex}})
434*ec727ea7Spatrick           .Case("getchar_unlocked", {{}, {ReturnValueIndex}})
435*ec727ea7Spatrick           .Case("getenv", {{}, {ReturnValueIndex}})
436*ec727ea7Spatrick           .Case("gets", {{}, {0, ReturnValueIndex}})
437*ec727ea7Spatrick           .Case("scanf", {{}, {}, VariadicType::Dst, 1})
438*ec727ea7Spatrick           .Case("socket", {{},
439*ec727ea7Spatrick                            {ReturnValueIndex},
440*ec727ea7Spatrick                            VariadicType::None,
441e5dd7070Spatrick                            InvalidArgIndex,
442*ec727ea7Spatrick                            &TaintPropagationRule::postSocket})
443*ec727ea7Spatrick           .Case("wgetch", {{}, {ReturnValueIndex}})
444e5dd7070Spatrick           // Propagating functions
445*ec727ea7Spatrick           .Case("atoi", {{0}, {ReturnValueIndex}})
446*ec727ea7Spatrick           .Case("atol", {{0}, {ReturnValueIndex}})
447*ec727ea7Spatrick           .Case("atoll", {{0}, {ReturnValueIndex}})
448*ec727ea7Spatrick           .Case("fgetc", {{0}, {ReturnValueIndex}})
449*ec727ea7Spatrick           .Case("fgetln", {{0}, {ReturnValueIndex}})
450*ec727ea7Spatrick           .Case("fgets", {{2}, {0, ReturnValueIndex}})
451*ec727ea7Spatrick           .Case("fscanf", {{0}, {}, VariadicType::Dst, 2})
452*ec727ea7Spatrick           .Case("sscanf", {{0}, {}, VariadicType::Dst, 2})
453*ec727ea7Spatrick           .Case("getc", {{0}, {ReturnValueIndex}})
454*ec727ea7Spatrick           .Case("getc_unlocked", {{0}, {ReturnValueIndex}})
455*ec727ea7Spatrick           .Case("getdelim", {{3}, {0}})
456*ec727ea7Spatrick           .Case("getline", {{2}, {0}})
457*ec727ea7Spatrick           .Case("getw", {{0}, {ReturnValueIndex}})
458*ec727ea7Spatrick           .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}})
459*ec727ea7Spatrick           .Case("read", {{0, 2}, {1, ReturnValueIndex}})
460*ec727ea7Spatrick           .Case("strchr", {{0}, {ReturnValueIndex}})
461*ec727ea7Spatrick           .Case("strrchr", {{0}, {ReturnValueIndex}})
462*ec727ea7Spatrick           .Case("tolower", {{0}, {ReturnValueIndex}})
463*ec727ea7Spatrick           .Case("toupper", {{0}, {ReturnValueIndex}})
464*ec727ea7Spatrick           .Default({});
465e5dd7070Spatrick 
466e5dd7070Spatrick   if (!Rule.isNull())
467e5dd7070Spatrick     return Rule;
468*ec727ea7Spatrick   assert(FData.FDecl);
469e5dd7070Spatrick 
470e5dd7070Spatrick   // Check if it's one of the memory setting/copying functions.
471e5dd7070Spatrick   // This check is specialized but faster then calling isCLibraryFunction.
472e5dd7070Spatrick   const FunctionDecl *FDecl = FData.FDecl;
473e5dd7070Spatrick   unsigned BId = 0;
474*ec727ea7Spatrick   if ((BId = FDecl->getMemoryFunctionKind())) {
475e5dd7070Spatrick     switch (BId) {
476e5dd7070Spatrick     case Builtin::BImemcpy:
477e5dd7070Spatrick     case Builtin::BImemmove:
478e5dd7070Spatrick     case Builtin::BIstrncpy:
479e5dd7070Spatrick     case Builtin::BIstrncat:
480*ec727ea7Spatrick       return {{1, 2}, {0, ReturnValueIndex}};
481e5dd7070Spatrick     case Builtin::BIstrlcpy:
482e5dd7070Spatrick     case Builtin::BIstrlcat:
483*ec727ea7Spatrick       return {{1, 2}, {0}};
484e5dd7070Spatrick     case Builtin::BIstrndup:
485*ec727ea7Spatrick       return {{0, 1}, {ReturnValueIndex}};
486e5dd7070Spatrick 
487e5dd7070Spatrick     default:
488e5dd7070Spatrick       break;
489*ec727ea7Spatrick     }
490*ec727ea7Spatrick   }
491e5dd7070Spatrick 
492e5dd7070Spatrick   // Process all other functions which could be defined as builtins.
493e5dd7070Spatrick   if (Rule.isNull()) {
494*ec727ea7Spatrick     const auto OneOf = [FDecl](const auto &... Name) {
495*ec727ea7Spatrick       // FIXME: use fold expression in C++17
496*ec727ea7Spatrick       using unused = int[];
497*ec727ea7Spatrick       bool ret = false;
498*ec727ea7Spatrick       static_cast<void>(unused{
499*ec727ea7Spatrick           0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...});
500*ec727ea7Spatrick       return ret;
501*ec727ea7Spatrick     };
502*ec727ea7Spatrick     if (OneOf("snprintf"))
503*ec727ea7Spatrick       return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3};
504*ec727ea7Spatrick     if (OneOf("sprintf"))
505*ec727ea7Spatrick       return {{}, {0, ReturnValueIndex}, VariadicType::Src, 2};
506*ec727ea7Spatrick     if (OneOf("strcpy", "stpcpy", "strcat"))
507*ec727ea7Spatrick       return {{1}, {0, ReturnValueIndex}};
508*ec727ea7Spatrick     if (OneOf("bcopy"))
509*ec727ea7Spatrick       return {{0, 2}, {1}};
510*ec727ea7Spatrick     if (OneOf("strdup", "strdupa", "wcsdup"))
511*ec727ea7Spatrick       return {{0}, {ReturnValueIndex}};
512e5dd7070Spatrick   }
513e5dd7070Spatrick 
514*ec727ea7Spatrick   // Skipping the following functions, since they might be used for cleansing or
515*ec727ea7Spatrick   // smart memory copy:
516e5dd7070Spatrick   // - memccpy - copying until hitting a special character.
517e5dd7070Spatrick 
518e5dd7070Spatrick   auto It = findFunctionInConfig(CustomPropagations, FData);
519*ec727ea7Spatrick   if (It != CustomPropagations.end())
520*ec727ea7Spatrick     return It->second.second;
521*ec727ea7Spatrick   return {};
522e5dd7070Spatrick }
523e5dd7070Spatrick 
524*ec727ea7Spatrick void GenericTaintChecker::checkPreCall(const CallEvent &Call,
525e5dd7070Spatrick                                        CheckerContext &C) const {
526*ec727ea7Spatrick   Optional<FunctionData> FData = FunctionData::create(Call, C);
527e5dd7070Spatrick   if (!FData)
528e5dd7070Spatrick     return;
529e5dd7070Spatrick 
530e5dd7070Spatrick   // Check for taintedness related errors first: system call, uncontrolled
531e5dd7070Spatrick   // format string, tainted buffer size.
532*ec727ea7Spatrick   if (checkPre(Call, *FData, C))
533e5dd7070Spatrick     return;
534e5dd7070Spatrick 
535e5dd7070Spatrick   // Marks the function's arguments and/or return value tainted if it present in
536e5dd7070Spatrick   // the list.
537*ec727ea7Spatrick   if (addSourcesPre(Call, *FData, C))
538e5dd7070Spatrick     return;
539e5dd7070Spatrick 
540*ec727ea7Spatrick   addFiltersPre(Call, *FData, C);
541e5dd7070Spatrick }
542e5dd7070Spatrick 
543*ec727ea7Spatrick void GenericTaintChecker::checkPostCall(const CallEvent &Call,
544e5dd7070Spatrick                                         CheckerContext &C) const {
545e5dd7070Spatrick   // Set the marked values as tainted. The return value only accessible from
546e5dd7070Spatrick   // checkPostStmt.
547*ec727ea7Spatrick   propagateFromPre(Call, C);
548e5dd7070Spatrick }
549e5dd7070Spatrick 
550e5dd7070Spatrick void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
551e5dd7070Spatrick                                      const char *NL, const char *Sep) const {
552e5dd7070Spatrick   printTaint(State, Out, NL, Sep);
553e5dd7070Spatrick }
554e5dd7070Spatrick 
555*ec727ea7Spatrick bool GenericTaintChecker::addSourcesPre(const CallEvent &Call,
556e5dd7070Spatrick                                         const FunctionData &FData,
557e5dd7070Spatrick                                         CheckerContext &C) const {
558e5dd7070Spatrick   // First, try generating a propagation rule for this function.
559e5dd7070Spatrick   TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
560e5dd7070Spatrick       this->CustomPropagations, FData, C);
561e5dd7070Spatrick   if (!Rule.isNull()) {
562*ec727ea7Spatrick     ProgramStateRef State = Rule.process(Call, C);
563e5dd7070Spatrick     if (State) {
564e5dd7070Spatrick       C.addTransition(State);
565e5dd7070Spatrick       return true;
566e5dd7070Spatrick     }
567e5dd7070Spatrick   }
568e5dd7070Spatrick   return false;
569e5dd7070Spatrick }
570e5dd7070Spatrick 
571*ec727ea7Spatrick bool GenericTaintChecker::addFiltersPre(const CallEvent &Call,
572e5dd7070Spatrick                                         const FunctionData &FData,
573e5dd7070Spatrick                                         CheckerContext &C) const {
574e5dd7070Spatrick   auto It = findFunctionInConfig(CustomFilters, FData);
575e5dd7070Spatrick   if (It == CustomFilters.end())
576e5dd7070Spatrick     return false;
577e5dd7070Spatrick 
578e5dd7070Spatrick   ProgramStateRef State = C.getState();
579e5dd7070Spatrick   const auto &Value = It->second;
580e5dd7070Spatrick   const ArgVector &Args = Value.second;
581e5dd7070Spatrick   for (unsigned ArgNum : Args) {
582*ec727ea7Spatrick     if (ArgNum >= Call.getNumArgs())
583e5dd7070Spatrick       continue;
584e5dd7070Spatrick 
585*ec727ea7Spatrick     const Expr *Arg = Call.getArgExpr(ArgNum);
586*ec727ea7Spatrick     Optional<SVal> V = getPointeeOf(C, Arg);
587e5dd7070Spatrick     if (V)
588e5dd7070Spatrick       State = removeTaint(State, *V);
589e5dd7070Spatrick   }
590e5dd7070Spatrick 
591e5dd7070Spatrick   if (State != C.getState()) {
592e5dd7070Spatrick     C.addTransition(State);
593e5dd7070Spatrick     return true;
594e5dd7070Spatrick   }
595e5dd7070Spatrick   return false;
596e5dd7070Spatrick }
597e5dd7070Spatrick 
598*ec727ea7Spatrick bool GenericTaintChecker::propagateFromPre(const CallEvent &Call,
599*ec727ea7Spatrick                                            CheckerContext &C) {
600e5dd7070Spatrick   ProgramStateRef State = C.getState();
601e5dd7070Spatrick 
602e5dd7070Spatrick   // Depending on what was tainted at pre-visit, we determined a set of
603e5dd7070Spatrick   // arguments which should be tainted after the function returns. These are
604e5dd7070Spatrick   // stored in the state as TaintArgsOnPostVisit set.
605e5dd7070Spatrick   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
606e5dd7070Spatrick   if (TaintArgs.isEmpty())
607e5dd7070Spatrick     return false;
608e5dd7070Spatrick 
609e5dd7070Spatrick   for (unsigned ArgNum : TaintArgs) {
610e5dd7070Spatrick     // Special handling for the tainted return value.
611e5dd7070Spatrick     if (ArgNum == ReturnValueIndex) {
612*ec727ea7Spatrick       State = addTaint(State, Call.getReturnValue());
613e5dd7070Spatrick       continue;
614e5dd7070Spatrick     }
615e5dd7070Spatrick 
616e5dd7070Spatrick     // The arguments are pointer arguments. The data they are pointing at is
617e5dd7070Spatrick     // tainted after the call.
618*ec727ea7Spatrick     if (Call.getNumArgs() < (ArgNum + 1))
619e5dd7070Spatrick       return false;
620*ec727ea7Spatrick     const Expr *Arg = Call.getArgExpr(ArgNum);
621*ec727ea7Spatrick     Optional<SVal> V = getPointeeOf(C, Arg);
622e5dd7070Spatrick     if (V)
623e5dd7070Spatrick       State = addTaint(State, *V);
624e5dd7070Spatrick   }
625e5dd7070Spatrick 
626e5dd7070Spatrick   // Clear up the taint info from the state.
627e5dd7070Spatrick   State = State->remove<TaintArgsOnPostVisit>();
628e5dd7070Spatrick 
629e5dd7070Spatrick   if (State != C.getState()) {
630e5dd7070Spatrick     C.addTransition(State);
631e5dd7070Spatrick     return true;
632e5dd7070Spatrick   }
633e5dd7070Spatrick   return false;
634e5dd7070Spatrick }
635e5dd7070Spatrick 
636*ec727ea7Spatrick bool GenericTaintChecker::checkPre(const CallEvent &Call,
637e5dd7070Spatrick                                    const FunctionData &FData,
638e5dd7070Spatrick                                    CheckerContext &C) const {
639*ec727ea7Spatrick   if (checkUncontrolledFormatString(Call, C))
640e5dd7070Spatrick     return true;
641e5dd7070Spatrick 
642*ec727ea7Spatrick   if (checkSystemCall(Call, FData.Name, C))
643e5dd7070Spatrick     return true;
644e5dd7070Spatrick 
645*ec727ea7Spatrick   if (checkTaintedBufferSize(Call, C))
646e5dd7070Spatrick     return true;
647e5dd7070Spatrick 
648*ec727ea7Spatrick   return checkCustomSinks(Call, FData, C);
649e5dd7070Spatrick }
650e5dd7070Spatrick 
651*ec727ea7Spatrick Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C,
652e5dd7070Spatrick                                                  const Expr *Arg) {
653e5dd7070Spatrick   ProgramStateRef State = C.getState();
654e5dd7070Spatrick   SVal AddrVal = C.getSVal(Arg->IgnoreParens());
655e5dd7070Spatrick   if (AddrVal.isUnknownOrUndef())
656e5dd7070Spatrick     return None;
657e5dd7070Spatrick 
658e5dd7070Spatrick   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
659e5dd7070Spatrick   if (!AddrLoc)
660e5dd7070Spatrick     return None;
661e5dd7070Spatrick 
662e5dd7070Spatrick   QualType ArgTy = Arg->getType().getCanonicalType();
663e5dd7070Spatrick   if (!ArgTy->isPointerType())
664e5dd7070Spatrick     return State->getSVal(*AddrLoc);
665e5dd7070Spatrick 
666e5dd7070Spatrick   QualType ValTy = ArgTy->getPointeeType();
667e5dd7070Spatrick 
668e5dd7070Spatrick   // Do not dereference void pointers. Treat them as byte pointers instead.
669e5dd7070Spatrick   // FIXME: we might want to consider more than just the first byte.
670e5dd7070Spatrick   if (ValTy->isVoidType())
671e5dd7070Spatrick     ValTy = C.getASTContext().CharTy;
672e5dd7070Spatrick 
673e5dd7070Spatrick   return State->getSVal(*AddrLoc, ValTy);
674e5dd7070Spatrick }
675e5dd7070Spatrick 
676e5dd7070Spatrick ProgramStateRef
677*ec727ea7Spatrick GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call,
678e5dd7070Spatrick                                                    CheckerContext &C) const {
679e5dd7070Spatrick   ProgramStateRef State = C.getState();
680e5dd7070Spatrick 
681e5dd7070Spatrick   // Check for taint in arguments.
682e5dd7070Spatrick   bool IsTainted = true;
683e5dd7070Spatrick   for (unsigned ArgNum : SrcArgs) {
684*ec727ea7Spatrick     if (ArgNum >= Call.getNumArgs())
685e5dd7070Spatrick       continue;
686e5dd7070Spatrick 
687*ec727ea7Spatrick     if ((IsTainted =
688*ec727ea7Spatrick              isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C)))
689e5dd7070Spatrick       break;
690e5dd7070Spatrick   }
691e5dd7070Spatrick 
692e5dd7070Spatrick   // Check for taint in variadic arguments.
693e5dd7070Spatrick   if (!IsTainted && VariadicType::Src == VarType) {
694e5dd7070Spatrick     // Check if any of the arguments is tainted
695*ec727ea7Spatrick     for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
696*ec727ea7Spatrick       if ((IsTainted =
697*ec727ea7Spatrick                isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C)))
698e5dd7070Spatrick         break;
699e5dd7070Spatrick     }
700e5dd7070Spatrick   }
701e5dd7070Spatrick 
702e5dd7070Spatrick   if (PropagationFunc)
703*ec727ea7Spatrick     IsTainted = PropagationFunc(IsTainted, Call, C);
704e5dd7070Spatrick 
705e5dd7070Spatrick   if (!IsTainted)
706e5dd7070Spatrick     return State;
707e5dd7070Spatrick 
708e5dd7070Spatrick   // Mark the arguments which should be tainted after the function returns.
709e5dd7070Spatrick   for (unsigned ArgNum : DstArgs) {
710e5dd7070Spatrick     // Should mark the return value?
711e5dd7070Spatrick     if (ArgNum == ReturnValueIndex) {
712e5dd7070Spatrick       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
713e5dd7070Spatrick       continue;
714e5dd7070Spatrick     }
715e5dd7070Spatrick 
716*ec727ea7Spatrick     if (ArgNum >= Call.getNumArgs())
717e5dd7070Spatrick       continue;
718e5dd7070Spatrick 
719e5dd7070Spatrick     // Mark the given argument.
720e5dd7070Spatrick     State = State->add<TaintArgsOnPostVisit>(ArgNum);
721e5dd7070Spatrick   }
722e5dd7070Spatrick 
723e5dd7070Spatrick   // Mark all variadic arguments tainted if present.
724e5dd7070Spatrick   if (VariadicType::Dst == VarType) {
725e5dd7070Spatrick     // For all pointer and references that were passed in:
726e5dd7070Spatrick     //   If they are not pointing to const data, mark data as tainted.
727e5dd7070Spatrick     //   TODO: So far we are just going one level down; ideally we'd need to
728e5dd7070Spatrick     //         recurse here.
729*ec727ea7Spatrick     for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
730*ec727ea7Spatrick       const Expr *Arg = Call.getArgExpr(i);
731e5dd7070Spatrick       // Process pointer argument.
732e5dd7070Spatrick       const Type *ArgTy = Arg->getType().getTypePtr();
733e5dd7070Spatrick       QualType PType = ArgTy->getPointeeType();
734e5dd7070Spatrick       if ((!PType.isNull() && !PType.isConstQualified()) ||
735*ec727ea7Spatrick           (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) {
736e5dd7070Spatrick         State = State->add<TaintArgsOnPostVisit>(i);
737e5dd7070Spatrick       }
738e5dd7070Spatrick     }
739*ec727ea7Spatrick   }
740e5dd7070Spatrick 
741e5dd7070Spatrick   return State;
742e5dd7070Spatrick }
743e5dd7070Spatrick 
744e5dd7070Spatrick // If argument 0(protocol domain) is network, the return value should get taint.
745*ec727ea7Spatrick bool GenericTaintChecker::TaintPropagationRule::postSocket(
746*ec727ea7Spatrick     bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) {
747*ec727ea7Spatrick   SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
748e5dd7070Spatrick   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
749e5dd7070Spatrick   // White list the internal communication protocols.
750e5dd7070Spatrick   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
751e5dd7070Spatrick       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
752e5dd7070Spatrick     return false;
753e5dd7070Spatrick   return true;
754e5dd7070Spatrick }
755e5dd7070Spatrick 
756e5dd7070Spatrick bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
757e5dd7070Spatrick   ProgramStateRef State = C.getState();
758e5dd7070Spatrick   SVal Val = C.getSVal(E);
759e5dd7070Spatrick 
760e5dd7070Spatrick   // stdin is a pointer, so it would be a region.
761e5dd7070Spatrick   const MemRegion *MemReg = Val.getAsRegion();
762e5dd7070Spatrick 
763e5dd7070Spatrick   // The region should be symbolic, we do not know it's value.
764*ec727ea7Spatrick   const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
765e5dd7070Spatrick   if (!SymReg)
766e5dd7070Spatrick     return false;
767e5dd7070Spatrick 
768e5dd7070Spatrick   // Get it's symbol and find the declaration region it's pointing to.
769*ec727ea7Spatrick   const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
770e5dd7070Spatrick   if (!Sm)
771e5dd7070Spatrick     return false;
772*ec727ea7Spatrick   const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
773e5dd7070Spatrick   if (!DeclReg)
774e5dd7070Spatrick     return false;
775e5dd7070Spatrick 
776e5dd7070Spatrick   // This region corresponds to a declaration, find out if it's a global/extern
777e5dd7070Spatrick   // variable named stdin with the proper type.
778e5dd7070Spatrick   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
779e5dd7070Spatrick     D = D->getCanonicalDecl();
780e5dd7070Spatrick     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
781e5dd7070Spatrick       const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
782e5dd7070Spatrick       if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
783e5dd7070Spatrick                        C.getASTContext().getFILEType().getCanonicalType())
784e5dd7070Spatrick         return true;
785e5dd7070Spatrick     }
786e5dd7070Spatrick   }
787e5dd7070Spatrick   return false;
788e5dd7070Spatrick }
789e5dd7070Spatrick 
790*ec727ea7Spatrick static bool getPrintfFormatArgumentNum(const CallEvent &Call,
791e5dd7070Spatrick                                        const CheckerContext &C,
792e5dd7070Spatrick                                        unsigned &ArgNum) {
793e5dd7070Spatrick   // Find if the function contains a format string argument.
794e5dd7070Spatrick   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
795e5dd7070Spatrick   // vsnprintf, syslog, custom annotated functions.
796*ec727ea7Spatrick   const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
797e5dd7070Spatrick   if (!FDecl)
798e5dd7070Spatrick     return false;
799e5dd7070Spatrick   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
800e5dd7070Spatrick     ArgNum = Format->getFormatIdx() - 1;
801*ec727ea7Spatrick     if ((Format->getType()->getName() == "printf") &&
802*ec727ea7Spatrick         Call.getNumArgs() > ArgNum)
803e5dd7070Spatrick       return true;
804e5dd7070Spatrick   }
805e5dd7070Spatrick 
806e5dd7070Spatrick   // Or if a function is named setproctitle (this is a heuristic).
807*ec727ea7Spatrick   if (C.getCalleeName(FDecl).find("setproctitle") != StringRef::npos) {
808e5dd7070Spatrick     ArgNum = 0;
809e5dd7070Spatrick     return true;
810e5dd7070Spatrick   }
811e5dd7070Spatrick 
812e5dd7070Spatrick   return false;
813e5dd7070Spatrick }
814e5dd7070Spatrick 
815e5dd7070Spatrick bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
816e5dd7070Spatrick                                                   CheckerContext &C) const {
817e5dd7070Spatrick   assert(E);
818e5dd7070Spatrick 
819e5dd7070Spatrick   // Check for taint.
820e5dd7070Spatrick   ProgramStateRef State = C.getState();
821*ec727ea7Spatrick   Optional<SVal> PointedToSVal = getPointeeOf(C, E);
822e5dd7070Spatrick   SVal TaintedSVal;
823e5dd7070Spatrick   if (PointedToSVal && isTainted(State, *PointedToSVal))
824e5dd7070Spatrick     TaintedSVal = *PointedToSVal;
825e5dd7070Spatrick   else if (isTainted(State, E, C.getLocationContext()))
826e5dd7070Spatrick     TaintedSVal = C.getSVal(E);
827e5dd7070Spatrick   else
828e5dd7070Spatrick     return false;
829e5dd7070Spatrick 
830e5dd7070Spatrick   // Generate diagnostic.
831e5dd7070Spatrick   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
832e5dd7070Spatrick     initBugType();
833e5dd7070Spatrick     auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
834e5dd7070Spatrick     report->addRange(E->getSourceRange());
835e5dd7070Spatrick     report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
836e5dd7070Spatrick     C.emitReport(std::move(report));
837e5dd7070Spatrick     return true;
838e5dd7070Spatrick   }
839e5dd7070Spatrick   return false;
840e5dd7070Spatrick }
841e5dd7070Spatrick 
842e5dd7070Spatrick bool GenericTaintChecker::checkUncontrolledFormatString(
843*ec727ea7Spatrick     const CallEvent &Call, CheckerContext &C) const {
844e5dd7070Spatrick   // Check if the function contains a format string argument.
845e5dd7070Spatrick   unsigned ArgNum = 0;
846*ec727ea7Spatrick   if (!getPrintfFormatArgumentNum(Call, C, ArgNum))
847e5dd7070Spatrick     return false;
848e5dd7070Spatrick 
849e5dd7070Spatrick   // If either the format string content or the pointer itself are tainted,
850e5dd7070Spatrick   // warn.
851*ec727ea7Spatrick   return generateReportIfTainted(Call.getArgExpr(ArgNum),
852e5dd7070Spatrick                                  MsgUncontrolledFormatString, C);
853e5dd7070Spatrick }
854e5dd7070Spatrick 
855*ec727ea7Spatrick bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name,
856e5dd7070Spatrick                                           CheckerContext &C) const {
857e5dd7070Spatrick   // TODO: It might make sense to run this check on demand. In some cases,
858e5dd7070Spatrick   // we should check if the environment has been cleansed here. We also might
859e5dd7070Spatrick   // need to know if the user was reset before these calls(seteuid).
860e5dd7070Spatrick   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
861e5dd7070Spatrick                         .Case("system", 0)
862e5dd7070Spatrick                         .Case("popen", 0)
863e5dd7070Spatrick                         .Case("execl", 0)
864e5dd7070Spatrick                         .Case("execle", 0)
865e5dd7070Spatrick                         .Case("execlp", 0)
866e5dd7070Spatrick                         .Case("execv", 0)
867e5dd7070Spatrick                         .Case("execvp", 0)
868e5dd7070Spatrick                         .Case("execvP", 0)
869e5dd7070Spatrick                         .Case("execve", 0)
870e5dd7070Spatrick                         .Case("dlopen", 0)
871e5dd7070Spatrick                         .Default(InvalidArgIndex);
872e5dd7070Spatrick 
873*ec727ea7Spatrick   if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1))
874e5dd7070Spatrick     return false;
875e5dd7070Spatrick 
876*ec727ea7Spatrick   return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs,
877*ec727ea7Spatrick                                  C);
878e5dd7070Spatrick }
879e5dd7070Spatrick 
880e5dd7070Spatrick // TODO: Should this check be a part of the CString checker?
881e5dd7070Spatrick // If yes, should taint be a global setting?
882*ec727ea7Spatrick bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call,
883e5dd7070Spatrick                                                  CheckerContext &C) const {
884*ec727ea7Spatrick   const auto *FDecl = Call.getDecl()->getAsFunction();
885e5dd7070Spatrick   // If the function has a buffer size argument, set ArgNum.
886e5dd7070Spatrick   unsigned ArgNum = InvalidArgIndex;
887e5dd7070Spatrick   unsigned BId = 0;
888*ec727ea7Spatrick   if ((BId = FDecl->getMemoryFunctionKind())) {
889e5dd7070Spatrick     switch (BId) {
890e5dd7070Spatrick     case Builtin::BImemcpy:
891e5dd7070Spatrick     case Builtin::BImemmove:
892e5dd7070Spatrick     case Builtin::BIstrncpy:
893e5dd7070Spatrick       ArgNum = 2;
894e5dd7070Spatrick       break;
895e5dd7070Spatrick     case Builtin::BIstrndup:
896e5dd7070Spatrick       ArgNum = 1;
897e5dd7070Spatrick       break;
898e5dd7070Spatrick     default:
899e5dd7070Spatrick       break;
900*ec727ea7Spatrick     }
901*ec727ea7Spatrick   }
902e5dd7070Spatrick 
903e5dd7070Spatrick   if (ArgNum == InvalidArgIndex) {
904*ec727ea7Spatrick     using CCtx = CheckerContext;
905*ec727ea7Spatrick     if (CCtx::isCLibraryFunction(FDecl, "malloc") ||
906*ec727ea7Spatrick         CCtx::isCLibraryFunction(FDecl, "calloc") ||
907*ec727ea7Spatrick         CCtx::isCLibraryFunction(FDecl, "alloca"))
908e5dd7070Spatrick       ArgNum = 0;
909*ec727ea7Spatrick     else if (CCtx::isCLibraryFunction(FDecl, "memccpy"))
910e5dd7070Spatrick       ArgNum = 3;
911*ec727ea7Spatrick     else if (CCtx::isCLibraryFunction(FDecl, "realloc"))
912e5dd7070Spatrick       ArgNum = 1;
913*ec727ea7Spatrick     else if (CCtx::isCLibraryFunction(FDecl, "bcopy"))
914e5dd7070Spatrick       ArgNum = 2;
915e5dd7070Spatrick   }
916e5dd7070Spatrick 
917*ec727ea7Spatrick   return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum &&
918*ec727ea7Spatrick          generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize,
919*ec727ea7Spatrick                                  C);
920e5dd7070Spatrick }
921e5dd7070Spatrick 
922*ec727ea7Spatrick bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call,
923e5dd7070Spatrick                                            const FunctionData &FData,
924e5dd7070Spatrick                                            CheckerContext &C) const {
925e5dd7070Spatrick   auto It = findFunctionInConfig(CustomSinks, FData);
926e5dd7070Spatrick   if (It == CustomSinks.end())
927e5dd7070Spatrick     return false;
928e5dd7070Spatrick 
929e5dd7070Spatrick   const auto &Value = It->second;
930e5dd7070Spatrick   const GenericTaintChecker::ArgVector &Args = Value.second;
931e5dd7070Spatrick   for (unsigned ArgNum : Args) {
932*ec727ea7Spatrick     if (ArgNum >= Call.getNumArgs())
933e5dd7070Spatrick       continue;
934e5dd7070Spatrick 
935*ec727ea7Spatrick     if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C))
936e5dd7070Spatrick       return true;
937e5dd7070Spatrick   }
938e5dd7070Spatrick 
939e5dd7070Spatrick   return false;
940e5dd7070Spatrick }
941e5dd7070Spatrick 
942e5dd7070Spatrick void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
943e5dd7070Spatrick   auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
944e5dd7070Spatrick   std::string Option{"Config"};
945e5dd7070Spatrick   StringRef ConfigFile =
946e5dd7070Spatrick       Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
947e5dd7070Spatrick   llvm::Optional<TaintConfig> Config =
948e5dd7070Spatrick       getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
949e5dd7070Spatrick   if (Config)
950e5dd7070Spatrick     Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
951e5dd7070Spatrick }
952e5dd7070Spatrick 
953*ec727ea7Spatrick bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {
954e5dd7070Spatrick   return true;
955e5dd7070Spatrick }
956