xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp (revision fc9821a877d4e1443603d67539e4369b3ec72890)
1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines a DataflowAnalysisContext class that owns objects that
10 //  encompass the state of a program and stores context that is used during
11 //  dataflow analysis.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/Analysis/FlowSensitive/DebugSupport.h"
18 #include "clang/Analysis/FlowSensitive/Formula.h"
19 #include "clang/Analysis/FlowSensitive/Logger.h"
20 #include "clang/Analysis/FlowSensitive/Value.h"
21 #include "llvm/ADT/SetOperations.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cassert>
29 #include <memory>
30 #include <string>
31 #include <utility>
32 #include <vector>
33 
34 static llvm::cl::opt<std::string> DataflowLog(
35     "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
36     llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
37                    "log to stderr. With an arg, writes HTML logs under the "
38                    "specified directory (one per analyzed function)."));
39 
40 namespace clang {
41 namespace dataflow {
42 
43 void DataflowAnalysisContext::addModeledFields(
44     const llvm::DenseSet<const FieldDecl *> &Fields) {
45   llvm::set_union(ModeledFields, Fields);
46 }
47 
48 llvm::DenseSet<const FieldDecl *>
49 DataflowAnalysisContext::getReferencedFields(QualType Type) {
50   llvm::DenseSet<const FieldDecl *> Fields = getObjectFields(Type);
51   llvm::set_intersect(Fields, ModeledFields);
52   return Fields;
53 }
54 
55 StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) {
56   if (!Type.isNull() && Type->isRecordType()) {
57     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
58     // During context-sensitive analysis, a struct may be allocated in one
59     // function, but its field accessed in a function lower in the stack than
60     // the allocation. Since we only collect fields used in the function where
61     // the allocation occurs, we can't apply that filter when performing
62     // context-sensitive analysis. But, this only applies to storage locations,
63     // since field access it not allowed to fail. In contrast, field *values*
64     // don't need this allowance, since the API allows for uninitialized fields.
65     auto Fields = Opts.ContextSensitiveOpts ? getObjectFields(Type)
66                                             : getReferencedFields(Type);
67     for (const FieldDecl *Field : Fields)
68       FieldLocs.insert({Field, &createStorageLocation(Field->getType())});
69     return arena().create<AggregateStorageLocation>(Type, std::move(FieldLocs));
70   }
71   return arena().create<ScalarStorageLocation>(Type);
72 }
73 
74 StorageLocation &
75 DataflowAnalysisContext::getStableStorageLocation(const VarDecl &D) {
76   if (auto *Loc = getStorageLocation(D))
77     return *Loc;
78   auto &Loc = createStorageLocation(D.getType());
79   setStorageLocation(D, Loc);
80   return Loc;
81 }
82 
83 StorageLocation &
84 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) {
85   if (auto *Loc = getStorageLocation(E))
86     return *Loc;
87   auto &Loc = createStorageLocation(E.getType());
88   setStorageLocation(E, Loc);
89   return Loc;
90 }
91 
92 PointerValue &
93 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
94   auto CanonicalPointeeType =
95       PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType();
96   auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr);
97   if (Res.second) {
98     auto &PointeeLoc = createStorageLocation(CanonicalPointeeType);
99     Res.first->second = &arena().create<PointerValue>(PointeeLoc);
100   }
101   return *Res.first->second;
102 }
103 
104 void DataflowAnalysisContext::addFlowConditionConstraint(
105     Atom Token, const Formula &Constraint) {
106   auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint);
107   if (!Res.second) {
108     Res.first->second =
109         &arena().makeAnd(*Res.first->second, Constraint);
110   }
111 }
112 
113 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) {
114   Atom ForkToken = arena().makeFlowConditionToken();
115   FlowConditionDeps[ForkToken].insert(Token);
116   addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token));
117   return ForkToken;
118 }
119 
120 Atom
121 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken,
122                                             Atom SecondToken) {
123   Atom Token = arena().makeFlowConditionToken();
124   FlowConditionDeps[Token].insert(FirstToken);
125   FlowConditionDeps[Token].insert(SecondToken);
126   addFlowConditionConstraint(Token,
127                              arena().makeOr(arena().makeAtomRef(FirstToken),
128                                             arena().makeAtomRef(SecondToken)));
129   return Token;
130 }
131 
132 Solver::Result DataflowAnalysisContext::querySolver(
133     llvm::SetVector<const Formula *> Constraints) {
134   Constraints.insert(&arena().makeLiteral(true));
135   Constraints.insert(&arena().makeNot(arena().makeLiteral(false)));
136   return S->solve(Constraints.getArrayRef());
137 }
138 
139 bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
140                                                    const Formula &Val) {
141   // Returns true if and only if truth assignment of the flow condition implies
142   // that `Val` is also true. We prove whether or not this property holds by
143   // reducing the problem to satisfiability checking. In other words, we attempt
144   // to show that assuming `Val` is false makes the constraints induced by the
145   // flow condition unsatisfiable.
146   llvm::SetVector<const Formula *> Constraints;
147   Constraints.insert(&arena().makeAtomRef(Token));
148   Constraints.insert(&arena().makeNot(Val));
149   llvm::DenseSet<Atom> VisitedTokens;
150   addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens);
151   return isUnsatisfiable(std::move(Constraints));
152 }
153 
154 bool DataflowAnalysisContext::flowConditionIsTautology(Atom Token) {
155   // Returns true if and only if we cannot prove that the flow condition can
156   // ever be false.
157   llvm::SetVector<const Formula *> Constraints;
158   Constraints.insert(&arena().makeNot(arena().makeAtomRef(Token)));
159   llvm::DenseSet<Atom> VisitedTokens;
160   addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens);
161   return isUnsatisfiable(std::move(Constraints));
162 }
163 
164 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1,
165                                                  const Formula &Val2) {
166   llvm::SetVector<const Formula *> Constraints;
167   Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2)));
168   return isUnsatisfiable(std::move(Constraints));
169 }
170 
171 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
172     Atom Token, llvm::SetVector<const Formula *> &Constraints,
173     llvm::DenseSet<Atom> &VisitedTokens) {
174   auto Res = VisitedTokens.insert(Token);
175   if (!Res.second)
176     return;
177 
178   auto ConstraintsIt = FlowConditionConstraints.find(Token);
179   if (ConstraintsIt == FlowConditionConstraints.end()) {
180     Constraints.insert(&arena().makeAtomRef(Token));
181   } else {
182     // Bind flow condition token via `iff` to its set of constraints:
183     // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
184     Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token),
185                                            *ConstraintsIt->second));
186   }
187 
188   auto DepsIt = FlowConditionDeps.find(Token);
189   if (DepsIt != FlowConditionDeps.end()) {
190     for (Atom DepToken : DepsIt->second) {
191       addTransitiveFlowConditionConstraints(DepToken, Constraints,
192                                             VisitedTokens);
193     }
194   }
195 }
196 
197 void DataflowAnalysisContext::dumpFlowCondition(Atom Token,
198                                                 llvm::raw_ostream &OS) {
199   llvm::SetVector<const Formula *> Constraints;
200   Constraints.insert(&arena().makeAtomRef(Token));
201   llvm::DenseSet<Atom> VisitedTokens;
202   addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens);
203 
204   // TODO: have formulas know about true/false directly instead
205   Atom True = arena().makeLiteral(true).getAtom();
206   Atom False = arena().makeLiteral(false).getAtom();
207   Formula::AtomNames Names = {{False, "false"}, {True, "true"}};
208 
209   for (const auto *Constraint : Constraints) {
210     Constraint->print(OS, &Names);
211     OS << "\n";
212   }
213 }
214 
215 const ControlFlowContext *
216 DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) {
217   // Canonicalize the key:
218   F = F->getDefinition();
219   if (F == nullptr)
220     return nullptr;
221   auto It = FunctionContexts.find(F);
222   if (It != FunctionContexts.end())
223     return &It->second;
224 
225   if (F->hasBody()) {
226     auto CFCtx = ControlFlowContext::build(*F);
227     // FIXME: Handle errors.
228     assert(CFCtx);
229     auto Result = FunctionContexts.insert({F, std::move(*CFCtx)});
230     return &Result.first->second;
231   }
232 
233   return nullptr;
234 }
235 
236 static std::unique_ptr<Logger> makeLoggerFromCommandLine() {
237   if (DataflowLog.empty())
238     return Logger::textual(llvm::errs());
239 
240   llvm::StringRef Dir = DataflowLog;
241   if (auto EC = llvm::sys::fs::create_directories(Dir))
242     llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
243   // All analysis runs within a process will log to the same directory.
244   // Share a counter so they don't all overwrite each other's 0.html.
245   // (Don't share a logger, it's not threadsafe).
246   static std::atomic<unsigned> Counter = {0};
247   auto StreamFactory =
248       [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
249     llvm::SmallString<256> File(Dir);
250     llvm::sys::path::append(File,
251                             std::to_string(Counter.fetch_add(1)) + ".html");
252     std::error_code EC;
253     auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC);
254     if (EC) {
255       llvm::errs() << "Failed to create log " << File << ": " << EC.message()
256                    << "\n";
257       return std::make_unique<llvm::raw_null_ostream>();
258     }
259     return OS;
260   };
261   return Logger::html(std::move(StreamFactory));
262 }
263 
264 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S,
265                                                  Options Opts)
266     : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) {
267   assert(this->S != nullptr);
268   // If the -dataflow-log command-line flag was set, synthesize a logger.
269   // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
270   // based tools.
271   if (Opts.Log == nullptr) {
272     if (DataflowLog.getNumOccurrences() > 0) {
273       LogOwner = makeLoggerFromCommandLine();
274       this->Opts.Log = LogOwner.get();
275       // FIXME: if the flag is given a value, write an HTML log to a file.
276     } else {
277       this->Opts.Log = &Logger::null();
278     }
279   }
280 }
281 
282 DataflowAnalysisContext::~DataflowAnalysisContext() = default;
283 
284 } // namespace dataflow
285 } // namespace clang
286 
287 using namespace clang;
288 
289 const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) {
290   const Expr *Current = &E;
291   if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) {
292     Current = EWC->getSubExpr();
293     assert(Current != nullptr);
294   }
295   Current = Current->IgnoreParens();
296   assert(Current != nullptr);
297   return *Current;
298 }
299 
300 const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) {
301   if (auto *E = dyn_cast<Expr>(&S))
302     return ignoreCFGOmittedNodes(*E);
303   return S;
304 }
305 
306 // FIXME: Does not precisely handle non-virtual diamond inheritance. A single
307 // field decl will be modeled for all instances of the inherited field.
308 static void
309 getFieldsFromClassHierarchy(QualType Type,
310                             llvm::DenseSet<const FieldDecl *> &Fields) {
311   if (Type->isIncompleteType() || Type->isDependentType() ||
312       !Type->isRecordType())
313     return;
314 
315   for (const FieldDecl *Field : Type->getAsRecordDecl()->fields())
316     Fields.insert(Field);
317   if (auto *CXXRecord = Type->getAsCXXRecordDecl())
318     for (const CXXBaseSpecifier &Base : CXXRecord->bases())
319       getFieldsFromClassHierarchy(Base.getType(), Fields);
320 }
321 
322 /// Gets the set of all fields in the type.
323 llvm::DenseSet<const FieldDecl *>
324 clang::dataflow::getObjectFields(QualType Type) {
325   llvm::DenseSet<const FieldDecl *> Fields;
326   getFieldsFromClassHierarchy(Type, Fields);
327   return Fields;
328 }
329