xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp (revision aef05a12329cf83c0a6fe46b464ab6ac08ee3439)
1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines a DataflowAnalysisContext class that owns objects that
10 //  encompass the state of a program and stores context that is used during
11 //  dataflow analysis.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/Analysis/FlowSensitive/DebugSupport.h"
18 #include "clang/Analysis/FlowSensitive/Formula.h"
19 #include "clang/Analysis/FlowSensitive/Logger.h"
20 #include "clang/Analysis/FlowSensitive/Value.h"
21 #include "llvm/ADT/SetOperations.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cassert>
29 #include <memory>
30 #include <string>
31 #include <utility>
32 #include <vector>
33 
34 static llvm::cl::opt<std::string> DataflowLog(
35     "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
36     llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
37                    "log to stderr. With an arg, writes HTML logs under the "
38                    "specified directory (one per analyzed function)."));
39 
40 namespace clang {
41 namespace dataflow {
42 
43 FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) {
44   // During context-sensitive analysis, a struct may be allocated in one
45   // function, but its field accessed in a function lower in the stack than
46   // the allocation. Since we only collect fields used in the function where
47   // the allocation occurs, we can't apply that filter when performing
48   // context-sensitive analysis. But, this only applies to storage locations,
49   // since field access it not allowed to fail. In contrast, field *values*
50   // don't need this allowance, since the API allows for uninitialized fields.
51   if (Opts.ContextSensitiveOpts)
52     return getObjectFields(Type);
53 
54   return llvm::set_intersection(getObjectFields(Type), ModeledFields);
55 }
56 
57 void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) {
58   ModeledFields.set_union(Fields);
59 }
60 
61 StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) {
62   if (!Type.isNull() && Type->isRecordType()) {
63     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
64     for (const FieldDecl *Field : getModeledFields(Type))
65       if (Field->getType()->isReferenceType())
66         FieldLocs.insert({Field, nullptr});
67       else
68         FieldLocs.insert({Field, &createStorageLocation(
69                                      Field->getType().getNonReferenceType())});
70     return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs));
71   }
72   return arena().create<ScalarStorageLocation>(Type);
73 }
74 
75 StorageLocation &
76 DataflowAnalysisContext::getStableStorageLocation(const VarDecl &D) {
77   if (auto *Loc = DeclToLoc.lookup(&D))
78     return *Loc;
79   auto &Loc = createStorageLocation(D.getType().getNonReferenceType());
80   DeclToLoc[&D] = &Loc;
81   return Loc;
82 }
83 
84 StorageLocation &
85 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) {
86   const Expr &CanonE = ignoreCFGOmittedNodes(E);
87 
88   if (auto *Loc = ExprToLoc.lookup(&CanonE))
89     return *Loc;
90   auto &Loc = createStorageLocation(CanonE.getType());
91   ExprToLoc[&CanonE] = &Loc;
92   return Loc;
93 }
94 
95 PointerValue &
96 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
97   auto CanonicalPointeeType =
98       PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType();
99   auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr);
100   if (Res.second) {
101     auto &PointeeLoc = createStorageLocation(CanonicalPointeeType);
102     Res.first->second = &arena().create<PointerValue>(PointeeLoc);
103   }
104   return *Res.first->second;
105 }
106 
107 void DataflowAnalysisContext::addFlowConditionConstraint(
108     Atom Token, const Formula &Constraint) {
109   auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint);
110   if (!Res.second) {
111     Res.first->second =
112         &arena().makeAnd(*Res.first->second, Constraint);
113   }
114 }
115 
116 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) {
117   Atom ForkToken = arena().makeFlowConditionToken();
118   FlowConditionDeps[ForkToken].insert(Token);
119   addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token));
120   return ForkToken;
121 }
122 
123 Atom
124 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken,
125                                             Atom SecondToken) {
126   Atom Token = arena().makeFlowConditionToken();
127   FlowConditionDeps[Token].insert(FirstToken);
128   FlowConditionDeps[Token].insert(SecondToken);
129   addFlowConditionConstraint(Token,
130                              arena().makeOr(arena().makeAtomRef(FirstToken),
131                                             arena().makeAtomRef(SecondToken)));
132   return Token;
133 }
134 
135 Solver::Result DataflowAnalysisContext::querySolver(
136     llvm::SetVector<const Formula *> Constraints) {
137   Constraints.insert(&arena().makeLiteral(true));
138   Constraints.insert(&arena().makeNot(arena().makeLiteral(false)));
139   return S->solve(Constraints.getArrayRef());
140 }
141 
142 bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
143                                                    const Formula &Val) {
144   // Returns true if and only if truth assignment of the flow condition implies
145   // that `Val` is also true. We prove whether or not this property holds by
146   // reducing the problem to satisfiability checking. In other words, we attempt
147   // to show that assuming `Val` is false makes the constraints induced by the
148   // flow condition unsatisfiable.
149   llvm::SetVector<const Formula *> Constraints;
150   Constraints.insert(&arena().makeAtomRef(Token));
151   Constraints.insert(&arena().makeNot(Val));
152   llvm::DenseSet<Atom> VisitedTokens;
153   addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens);
154   return isUnsatisfiable(std::move(Constraints));
155 }
156 
157 bool DataflowAnalysisContext::flowConditionIsTautology(Atom Token) {
158   // Returns true if and only if we cannot prove that the flow condition can
159   // ever be false.
160   llvm::SetVector<const Formula *> Constraints;
161   Constraints.insert(&arena().makeNot(arena().makeAtomRef(Token)));
162   llvm::DenseSet<Atom> VisitedTokens;
163   addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens);
164   return isUnsatisfiable(std::move(Constraints));
165 }
166 
167 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1,
168                                                  const Formula &Val2) {
169   llvm::SetVector<const Formula *> Constraints;
170   Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2)));
171   return isUnsatisfiable(std::move(Constraints));
172 }
173 
174 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
175     Atom Token, llvm::SetVector<const Formula *> &Constraints,
176     llvm::DenseSet<Atom> &VisitedTokens) {
177   auto Res = VisitedTokens.insert(Token);
178   if (!Res.second)
179     return;
180 
181   auto ConstraintsIt = FlowConditionConstraints.find(Token);
182   if (ConstraintsIt == FlowConditionConstraints.end()) {
183     Constraints.insert(&arena().makeAtomRef(Token));
184   } else {
185     // Bind flow condition token via `iff` to its set of constraints:
186     // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
187     Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token),
188                                            *ConstraintsIt->second));
189   }
190 
191   auto DepsIt = FlowConditionDeps.find(Token);
192   if (DepsIt != FlowConditionDeps.end()) {
193     for (Atom DepToken : DepsIt->second) {
194       addTransitiveFlowConditionConstraints(DepToken, Constraints,
195                                             VisitedTokens);
196     }
197   }
198 }
199 
200 void DataflowAnalysisContext::dumpFlowCondition(Atom Token,
201                                                 llvm::raw_ostream &OS) {
202   llvm::SetVector<const Formula *> Constraints;
203   Constraints.insert(&arena().makeAtomRef(Token));
204   llvm::DenseSet<Atom> VisitedTokens;
205   addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens);
206 
207   // TODO: have formulas know about true/false directly instead
208   Atom True = arena().makeLiteral(true).getAtom();
209   Atom False = arena().makeLiteral(false).getAtom();
210   Formula::AtomNames Names = {{False, "false"}, {True, "true"}};
211 
212   for (const auto *Constraint : Constraints) {
213     Constraint->print(OS, &Names);
214     OS << "\n";
215   }
216 }
217 
218 const ControlFlowContext *
219 DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) {
220   // Canonicalize the key:
221   F = F->getDefinition();
222   if (F == nullptr)
223     return nullptr;
224   auto It = FunctionContexts.find(F);
225   if (It != FunctionContexts.end())
226     return &It->second;
227 
228   if (F->hasBody()) {
229     auto CFCtx = ControlFlowContext::build(*F);
230     // FIXME: Handle errors.
231     assert(CFCtx);
232     auto Result = FunctionContexts.insert({F, std::move(*CFCtx)});
233     return &Result.first->second;
234   }
235 
236   return nullptr;
237 }
238 
239 static std::unique_ptr<Logger> makeLoggerFromCommandLine() {
240   if (DataflowLog.empty())
241     return Logger::textual(llvm::errs());
242 
243   llvm::StringRef Dir = DataflowLog;
244   if (auto EC = llvm::sys::fs::create_directories(Dir))
245     llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
246   // All analysis runs within a process will log to the same directory.
247   // Share a counter so they don't all overwrite each other's 0.html.
248   // (Don't share a logger, it's not threadsafe).
249   static std::atomic<unsigned> Counter = {0};
250   auto StreamFactory =
251       [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
252     llvm::SmallString<256> File(Dir);
253     llvm::sys::path::append(File,
254                             std::to_string(Counter.fetch_add(1)) + ".html");
255     std::error_code EC;
256     auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC);
257     if (EC) {
258       llvm::errs() << "Failed to create log " << File << ": " << EC.message()
259                    << "\n";
260       return std::make_unique<llvm::raw_null_ostream>();
261     }
262     return OS;
263   };
264   return Logger::html(std::move(StreamFactory));
265 }
266 
267 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S,
268                                                  Options Opts)
269     : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) {
270   assert(this->S != nullptr);
271   // If the -dataflow-log command-line flag was set, synthesize a logger.
272   // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
273   // based tools.
274   if (Opts.Log == nullptr) {
275     if (DataflowLog.getNumOccurrences() > 0) {
276       LogOwner = makeLoggerFromCommandLine();
277       this->Opts.Log = LogOwner.get();
278       // FIXME: if the flag is given a value, write an HTML log to a file.
279     } else {
280       this->Opts.Log = &Logger::null();
281     }
282   }
283 }
284 
285 DataflowAnalysisContext::~DataflowAnalysisContext() = default;
286 
287 } // namespace dataflow
288 } // namespace clang
289 
290 using namespace clang;
291 
292 const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) {
293   const Expr *Current = &E;
294   if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) {
295     Current = EWC->getSubExpr();
296     assert(Current != nullptr);
297   }
298   Current = Current->IgnoreParens();
299   assert(Current != nullptr);
300   return *Current;
301 }
302 
303 const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) {
304   if (auto *E = dyn_cast<Expr>(&S))
305     return ignoreCFGOmittedNodes(*E);
306   return S;
307 }
308 
309 // FIXME: Does not precisely handle non-virtual diamond inheritance. A single
310 // field decl will be modeled for all instances of the inherited field.
311 static void getFieldsFromClassHierarchy(QualType Type,
312                                         clang::dataflow::FieldSet &Fields) {
313   if (Type->isIncompleteType() || Type->isDependentType() ||
314       !Type->isRecordType())
315     return;
316 
317   for (const FieldDecl *Field : Type->getAsRecordDecl()->fields())
318     Fields.insert(Field);
319   if (auto *CXXRecord = Type->getAsCXXRecordDecl())
320     for (const CXXBaseSpecifier &Base : CXXRecord->bases())
321       getFieldsFromClassHierarchy(Base.getType(), Fields);
322 }
323 
324 /// Gets the set of all fields in the type.
325 clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) {
326   FieldSet Fields;
327   getFieldsFromClassHierarchy(Type, Fields);
328   return Fields;
329 }
330