xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp (revision a737a33eb995cf704f8ae9653811ca503d77ed65)
1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines a DataflowAnalysisContext class that owns objects that
10 //  encompass the state of a program and stores context that is used during
11 //  dataflow analysis.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/Analysis/FlowSensitive/DebugSupport.h"
18 #include "clang/Analysis/FlowSensitive/Formula.h"
19 #include "clang/Analysis/FlowSensitive/Logger.h"
20 #include "clang/Analysis/FlowSensitive/SimplifyConstraints.h"
21 #include "clang/Analysis/FlowSensitive/Value.h"
22 #include "llvm/ADT/SetOperations.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/Path.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 #include <memory>
31 #include <string>
32 #include <utility>
33 #include <vector>
34 
35 static llvm::cl::opt<std::string> DataflowLog(
36     "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
37     llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
38                    "log to stderr. With an arg, writes HTML logs under the "
39                    "specified directory (one per analyzed function)."));
40 
41 namespace clang {
42 namespace dataflow {
43 
44 FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) {
45   // During context-sensitive analysis, a struct may be allocated in one
46   // function, but its field accessed in a function lower in the stack than
47   // the allocation. Since we only collect fields used in the function where
48   // the allocation occurs, we can't apply that filter when performing
49   // context-sensitive analysis. But, this only applies to storage locations,
50   // since field access it not allowed to fail. In contrast, field *values*
51   // don't need this allowance, since the API allows for uninitialized fields.
52   if (Opts.ContextSensitiveOpts)
53     return getObjectFields(Type);
54 
55   return llvm::set_intersection(getObjectFields(Type), ModeledFields);
56 }
57 
58 void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) {
59   ModeledFields.set_union(Fields);
60 }
61 
62 StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) {
63   if (!Type.isNull() && Type->isRecordType()) {
64     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
65     for (const FieldDecl *Field : getModeledFields(Type))
66       if (Field->getType()->isReferenceType())
67         FieldLocs.insert({Field, nullptr});
68       else
69         FieldLocs.insert({Field, &createStorageLocation(
70                                      Field->getType().getNonReferenceType())});
71     return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs));
72   }
73   return arena().create<ScalarStorageLocation>(Type);
74 }
75 
76 StorageLocation &
77 DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) {
78   if (auto *Loc = DeclToLoc.lookup(&D))
79     return *Loc;
80   auto &Loc = createStorageLocation(D.getType().getNonReferenceType());
81   DeclToLoc[&D] = &Loc;
82   return Loc;
83 }
84 
85 StorageLocation &
86 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) {
87   const Expr &CanonE = ignoreCFGOmittedNodes(E);
88 
89   if (auto *Loc = ExprToLoc.lookup(&CanonE))
90     return *Loc;
91   auto &Loc = createStorageLocation(CanonE.getType());
92   ExprToLoc[&CanonE] = &Loc;
93   return Loc;
94 }
95 
96 PointerValue &
97 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
98   auto CanonicalPointeeType =
99       PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType();
100   auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr);
101   if (Res.second) {
102     auto &PointeeLoc = createStorageLocation(CanonicalPointeeType);
103     Res.first->second = &arena().create<PointerValue>(PointeeLoc);
104   }
105   return *Res.first->second;
106 }
107 
108 void DataflowAnalysisContext::addInvariant(const Formula &Constraint) {
109   if (Invariant == nullptr)
110     Invariant = &Constraint;
111   else
112     Invariant = &arena().makeAnd(*Invariant, Constraint);
113 }
114 
115 void DataflowAnalysisContext::addFlowConditionConstraint(
116     Atom Token, const Formula &Constraint) {
117   auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint);
118   if (!Res.second) {
119     Res.first->second =
120         &arena().makeAnd(*Res.first->second, Constraint);
121   }
122 }
123 
124 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) {
125   Atom ForkToken = arena().makeFlowConditionToken();
126   FlowConditionDeps[ForkToken].insert(Token);
127   addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token));
128   return ForkToken;
129 }
130 
131 Atom
132 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken,
133                                             Atom SecondToken) {
134   Atom Token = arena().makeFlowConditionToken();
135   FlowConditionDeps[Token].insert(FirstToken);
136   FlowConditionDeps[Token].insert(SecondToken);
137   addFlowConditionConstraint(Token,
138                              arena().makeOr(arena().makeAtomRef(FirstToken),
139                                             arena().makeAtomRef(SecondToken)));
140   return Token;
141 }
142 
143 Solver::Result DataflowAnalysisContext::querySolver(
144     llvm::SetVector<const Formula *> Constraints) {
145   return S->solve(Constraints.getArrayRef());
146 }
147 
148 bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
149                                                    const Formula &F) {
150   // Returns true if and only if truth assignment of the flow condition implies
151   // that `F` is also true. We prove whether or not this property holds by
152   // reducing the problem to satisfiability checking. In other words, we attempt
153   // to show that assuming `F` is false makes the constraints induced by the
154   // flow condition unsatisfiable.
155   llvm::SetVector<const Formula *> Constraints;
156   Constraints.insert(&arena().makeAtomRef(Token));
157   Constraints.insert(&arena().makeNot(F));
158   addTransitiveFlowConditionConstraints(Token, Constraints);
159   return isUnsatisfiable(std::move(Constraints));
160 }
161 
162 bool DataflowAnalysisContext::flowConditionAllows(Atom Token,
163                                                   const Formula &F) {
164   llvm::SetVector<const Formula *> Constraints;
165   Constraints.insert(&arena().makeAtomRef(Token));
166   Constraints.insert(&F);
167   addTransitiveFlowConditionConstraints(Token, Constraints);
168   return isSatisfiable(std::move(Constraints));
169 }
170 
171 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1,
172                                                  const Formula &Val2) {
173   llvm::SetVector<const Formula *> Constraints;
174   Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2)));
175   return isUnsatisfiable(std::move(Constraints));
176 }
177 
178 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
179     Atom Token, llvm::SetVector<const Formula *> &Constraints) {
180   llvm::DenseSet<Atom> AddedTokens;
181   std::vector<Atom> Remaining = {Token};
182 
183   if (Invariant)
184     Constraints.insert(Invariant);
185   // Define all the flow conditions that might be referenced in constraints.
186   while (!Remaining.empty()) {
187     auto Token = Remaining.back();
188     Remaining.pop_back();
189     if (!AddedTokens.insert(Token).second)
190       continue;
191 
192     auto ConstraintsIt = FlowConditionConstraints.find(Token);
193     if (ConstraintsIt == FlowConditionConstraints.end()) {
194       Constraints.insert(&arena().makeAtomRef(Token));
195     } else {
196       // Bind flow condition token via `iff` to its set of constraints:
197       // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
198       Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token),
199                                              *ConstraintsIt->second));
200     }
201 
202     if (auto DepsIt = FlowConditionDeps.find(Token);
203         DepsIt != FlowConditionDeps.end())
204       for (Atom A : DepsIt->second)
205         Remaining.push_back(A);
206   }
207 }
208 
209 static void printAtomList(const llvm::SmallVector<Atom> &Atoms,
210                           llvm::raw_ostream &OS) {
211   OS << "(";
212   for (size_t i = 0; i < Atoms.size(); ++i) {
213     OS << Atoms[i];
214     if (i + 1 < Atoms.size())
215       OS << ", ";
216   }
217   OS << ")\n";
218 }
219 
220 void DataflowAnalysisContext::dumpFlowCondition(Atom Token,
221                                                 llvm::raw_ostream &OS) {
222   llvm::SetVector<const Formula *> Constraints;
223   Constraints.insert(&arena().makeAtomRef(Token));
224   addTransitiveFlowConditionConstraints(Token, Constraints);
225 
226   OS << "Flow condition token: " << Token << "\n";
227   SimplifyConstraintsInfo Info;
228   llvm::SetVector<const Formula *> OriginalConstraints = Constraints;
229   simplifyConstraints(Constraints, arena(), &Info);
230   if (!Constraints.empty()) {
231     OS << "Constraints:\n";
232     for (const auto *Constraint : Constraints) {
233       Constraint->print(OS);
234       OS << "\n";
235     }
236   }
237   if (!Info.TrueAtoms.empty()) {
238     OS << "True atoms: ";
239     printAtomList(Info.TrueAtoms, OS);
240   }
241   if (!Info.FalseAtoms.empty()) {
242     OS << "False atoms: ";
243     printAtomList(Info.FalseAtoms, OS);
244   }
245   if (!Info.EquivalentAtoms.empty()) {
246     OS << "Equivalent atoms:\n";
247     for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms)
248       printAtomList(Class, OS);
249   }
250 
251   OS << "\nFlow condition constraints before simplification:\n";
252   for (const auto *Constraint : OriginalConstraints) {
253     Constraint->print(OS);
254     OS << "\n";
255   }
256 }
257 
258 const ControlFlowContext *
259 DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) {
260   // Canonicalize the key:
261   F = F->getDefinition();
262   if (F == nullptr)
263     return nullptr;
264   auto It = FunctionContexts.find(F);
265   if (It != FunctionContexts.end())
266     return &It->second;
267 
268   if (F->hasBody()) {
269     auto CFCtx = ControlFlowContext::build(*F);
270     // FIXME: Handle errors.
271     assert(CFCtx);
272     auto Result = FunctionContexts.insert({F, std::move(*CFCtx)});
273     return &Result.first->second;
274   }
275 
276   return nullptr;
277 }
278 
279 static std::unique_ptr<Logger> makeLoggerFromCommandLine() {
280   if (DataflowLog.empty())
281     return Logger::textual(llvm::errs());
282 
283   llvm::StringRef Dir = DataflowLog;
284   if (auto EC = llvm::sys::fs::create_directories(Dir))
285     llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
286   // All analysis runs within a process will log to the same directory.
287   // Share a counter so they don't all overwrite each other's 0.html.
288   // (Don't share a logger, it's not threadsafe).
289   static std::atomic<unsigned> Counter = {0};
290   auto StreamFactory =
291       [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
292     llvm::SmallString<256> File(Dir);
293     llvm::sys::path::append(File,
294                             std::to_string(Counter.fetch_add(1)) + ".html");
295     std::error_code EC;
296     auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC);
297     if (EC) {
298       llvm::errs() << "Failed to create log " << File << ": " << EC.message()
299                    << "\n";
300       return std::make_unique<llvm::raw_null_ostream>();
301     }
302     return OS;
303   };
304   return Logger::html(std::move(StreamFactory));
305 }
306 
307 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S,
308                                                  Options Opts)
309     : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) {
310   assert(this->S != nullptr);
311   // If the -dataflow-log command-line flag was set, synthesize a logger.
312   // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
313   // based tools.
314   if (Opts.Log == nullptr) {
315     if (DataflowLog.getNumOccurrences() > 0) {
316       LogOwner = makeLoggerFromCommandLine();
317       this->Opts.Log = LogOwner.get();
318       // FIXME: if the flag is given a value, write an HTML log to a file.
319     } else {
320       this->Opts.Log = &Logger::null();
321     }
322   }
323 }
324 
325 DataflowAnalysisContext::~DataflowAnalysisContext() = default;
326 
327 } // namespace dataflow
328 } // namespace clang
329 
330 using namespace clang;
331 
332 const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) {
333   const Expr *Current = &E;
334   if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) {
335     Current = EWC->getSubExpr();
336     assert(Current != nullptr);
337   }
338   Current = Current->IgnoreParens();
339   assert(Current != nullptr);
340   return *Current;
341 }
342 
343 const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) {
344   if (auto *E = dyn_cast<Expr>(&S))
345     return ignoreCFGOmittedNodes(*E);
346   return S;
347 }
348 
349 // FIXME: Does not precisely handle non-virtual diamond inheritance. A single
350 // field decl will be modeled for all instances of the inherited field.
351 static void getFieldsFromClassHierarchy(QualType Type,
352                                         clang::dataflow::FieldSet &Fields) {
353   if (Type->isIncompleteType() || Type->isDependentType() ||
354       !Type->isRecordType())
355     return;
356 
357   for (const FieldDecl *Field : Type->getAsRecordDecl()->fields())
358     Fields.insert(Field);
359   if (auto *CXXRecord = Type->getAsCXXRecordDecl())
360     for (const CXXBaseSpecifier &Base : CXXRecord->bases())
361       getFieldsFromClassHierarchy(Base.getType(), Fields);
362 }
363 
364 /// Gets the set of all fields in the type.
365 clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) {
366   FieldSet Fields;
367   getFieldsFromClassHierarchy(Type, Fields);
368   return Fields;
369 }
370