xref: /freebsd-src/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines a DataflowAnalysisContext class that owns objects that
10 //  encompass the state of a program and stores context that is used during
11 //  dataflow analysis.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/Analysis/FlowSensitive/DebugSupport.h"
18 #include "clang/Analysis/FlowSensitive/Formula.h"
19 #include "clang/Analysis/FlowSensitive/Logger.h"
20 #include "clang/Analysis/FlowSensitive/SimplifyConstraints.h"
21 #include "clang/Analysis/FlowSensitive/Value.h"
22 #include "llvm/ADT/SetOperations.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/Path.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 #include <memory>
31 #include <string>
32 #include <utility>
33 #include <vector>
34 
35 static llvm::cl::opt<std::string> DataflowLog(
36     "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
37     llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
38                    "log to stderr. With an arg, writes HTML logs under the "
39                    "specified directory (one per analyzed function)."));
40 
41 namespace clang {
42 namespace dataflow {
43 
44 FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) {
45   // During context-sensitive analysis, a struct may be allocated in one
46   // function, but its field accessed in a function lower in the stack than
47   // the allocation. Since we only collect fields used in the function where
48   // the allocation occurs, we can't apply that filter when performing
49   // context-sensitive analysis. But, this only applies to storage locations,
50   // since field access it not allowed to fail. In contrast, field *values*
51   // don't need this allowance, since the API allows for uninitialized fields.
52   if (Opts.ContextSensitiveOpts)
53     return getObjectFields(Type);
54 
55   return llvm::set_intersection(getObjectFields(Type), ModeledFields);
56 }
57 
58 void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) {
59   ModeledFields.set_union(Fields);
60 }
61 
62 StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) {
63   if (!Type.isNull() && Type->isRecordType()) {
64     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
65     for (const FieldDecl *Field : getModeledFields(Type))
66       if (Field->getType()->isReferenceType())
67         FieldLocs.insert({Field, nullptr});
68       else
69         FieldLocs.insert({Field, &createStorageLocation(
70                                      Field->getType().getNonReferenceType())});
71 
72     RecordStorageLocation::SyntheticFieldMap SyntheticFields;
73     for (const auto &Entry : getSyntheticFields(Type))
74       SyntheticFields.insert(
75           {Entry.getKey(),
76            &createStorageLocation(Entry.getValue().getNonReferenceType())});
77 
78     return createRecordStorageLocation(Type, std::move(FieldLocs),
79                                        std::move(SyntheticFields));
80   }
81   return arena().create<ScalarStorageLocation>(Type);
82 }
83 
84 // Returns the keys for a given `StringMap`.
85 // Can't use `StringSet` as the return type as it doesn't support `operator==`.
86 template <typename T>
87 static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) {
88   return llvm::DenseSet<llvm::StringRef>(Map.keys().begin(), Map.keys().end());
89 }
90 
91 RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation(
92     QualType Type, RecordStorageLocation::FieldToLoc FieldLocs,
93     RecordStorageLocation::SyntheticFieldMap SyntheticFields) {
94   assert(Type->isRecordType());
95   assert(containsSameFields(getModeledFields(Type), FieldLocs));
96   assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields));
97 
98   RecordStorageLocationCreated = true;
99   return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs),
100                                                std::move(SyntheticFields));
101 }
102 
103 StorageLocation &
104 DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) {
105   if (auto *Loc = DeclToLoc.lookup(&D))
106     return *Loc;
107   auto &Loc = createStorageLocation(D.getType().getNonReferenceType());
108   DeclToLoc[&D] = &Loc;
109   return Loc;
110 }
111 
112 StorageLocation &
113 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) {
114   const Expr &CanonE = ignoreCFGOmittedNodes(E);
115 
116   if (auto *Loc = ExprToLoc.lookup(&CanonE))
117     return *Loc;
118   auto &Loc = createStorageLocation(CanonE.getType());
119   ExprToLoc[&CanonE] = &Loc;
120   return Loc;
121 }
122 
123 PointerValue &
124 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
125   auto CanonicalPointeeType =
126       PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType();
127   auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr);
128   if (Res.second) {
129     auto &PointeeLoc = createStorageLocation(CanonicalPointeeType);
130     Res.first->second = &arena().create<PointerValue>(PointeeLoc);
131   }
132   return *Res.first->second;
133 }
134 
135 void DataflowAnalysisContext::addInvariant(const Formula &Constraint) {
136   if (Invariant == nullptr)
137     Invariant = &Constraint;
138   else
139     Invariant = &arena().makeAnd(*Invariant, Constraint);
140 }
141 
142 void DataflowAnalysisContext::addFlowConditionConstraint(
143     Atom Token, const Formula &Constraint) {
144   auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint);
145   if (!Res.second) {
146     Res.first->second =
147         &arena().makeAnd(*Res.first->second, Constraint);
148   }
149 }
150 
151 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) {
152   Atom ForkToken = arena().makeFlowConditionToken();
153   FlowConditionDeps[ForkToken].insert(Token);
154   addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token));
155   return ForkToken;
156 }
157 
158 Atom
159 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken,
160                                             Atom SecondToken) {
161   Atom Token = arena().makeFlowConditionToken();
162   FlowConditionDeps[Token].insert(FirstToken);
163   FlowConditionDeps[Token].insert(SecondToken);
164   addFlowConditionConstraint(Token,
165                              arena().makeOr(arena().makeAtomRef(FirstToken),
166                                             arena().makeAtomRef(SecondToken)));
167   return Token;
168 }
169 
170 Solver::Result DataflowAnalysisContext::querySolver(
171     llvm::SetVector<const Formula *> Constraints) {
172   return S->solve(Constraints.getArrayRef());
173 }
174 
175 bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
176                                                    const Formula &F) {
177   // Returns true if and only if truth assignment of the flow condition implies
178   // that `F` is also true. We prove whether or not this property holds by
179   // reducing the problem to satisfiability checking. In other words, we attempt
180   // to show that assuming `F` is false makes the constraints induced by the
181   // flow condition unsatisfiable.
182   llvm::SetVector<const Formula *> Constraints;
183   Constraints.insert(&arena().makeAtomRef(Token));
184   Constraints.insert(&arena().makeNot(F));
185   addTransitiveFlowConditionConstraints(Token, Constraints);
186   return isUnsatisfiable(std::move(Constraints));
187 }
188 
189 bool DataflowAnalysisContext::flowConditionAllows(Atom Token,
190                                                   const Formula &F) {
191   llvm::SetVector<const Formula *> Constraints;
192   Constraints.insert(&arena().makeAtomRef(Token));
193   Constraints.insert(&F);
194   addTransitiveFlowConditionConstraints(Token, Constraints);
195   return isSatisfiable(std::move(Constraints));
196 }
197 
198 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1,
199                                                  const Formula &Val2) {
200   llvm::SetVector<const Formula *> Constraints;
201   Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2)));
202   return isUnsatisfiable(std::move(Constraints));
203 }
204 
205 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
206     Atom Token, llvm::SetVector<const Formula *> &Constraints) {
207   llvm::DenseSet<Atom> AddedTokens;
208   std::vector<Atom> Remaining = {Token};
209 
210   if (Invariant)
211     Constraints.insert(Invariant);
212   // Define all the flow conditions that might be referenced in constraints.
213   while (!Remaining.empty()) {
214     auto Token = Remaining.back();
215     Remaining.pop_back();
216     if (!AddedTokens.insert(Token).second)
217       continue;
218 
219     auto ConstraintsIt = FlowConditionConstraints.find(Token);
220     if (ConstraintsIt == FlowConditionConstraints.end()) {
221       Constraints.insert(&arena().makeAtomRef(Token));
222     } else {
223       // Bind flow condition token via `iff` to its set of constraints:
224       // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
225       Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token),
226                                              *ConstraintsIt->second));
227     }
228 
229     if (auto DepsIt = FlowConditionDeps.find(Token);
230         DepsIt != FlowConditionDeps.end())
231       for (Atom A : DepsIt->second)
232         Remaining.push_back(A);
233   }
234 }
235 
236 static void printAtomList(const llvm::SmallVector<Atom> &Atoms,
237                           llvm::raw_ostream &OS) {
238   OS << "(";
239   for (size_t i = 0; i < Atoms.size(); ++i) {
240     OS << Atoms[i];
241     if (i + 1 < Atoms.size())
242       OS << ", ";
243   }
244   OS << ")\n";
245 }
246 
247 void DataflowAnalysisContext::dumpFlowCondition(Atom Token,
248                                                 llvm::raw_ostream &OS) {
249   llvm::SetVector<const Formula *> Constraints;
250   Constraints.insert(&arena().makeAtomRef(Token));
251   addTransitiveFlowConditionConstraints(Token, Constraints);
252 
253   OS << "Flow condition token: " << Token << "\n";
254   SimplifyConstraintsInfo Info;
255   llvm::SetVector<const Formula *> OriginalConstraints = Constraints;
256   simplifyConstraints(Constraints, arena(), &Info);
257   if (!Constraints.empty()) {
258     OS << "Constraints:\n";
259     for (const auto *Constraint : Constraints) {
260       Constraint->print(OS);
261       OS << "\n";
262     }
263   }
264   if (!Info.TrueAtoms.empty()) {
265     OS << "True atoms: ";
266     printAtomList(Info.TrueAtoms, OS);
267   }
268   if (!Info.FalseAtoms.empty()) {
269     OS << "False atoms: ";
270     printAtomList(Info.FalseAtoms, OS);
271   }
272   if (!Info.EquivalentAtoms.empty()) {
273     OS << "Equivalent atoms:\n";
274     for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms)
275       printAtomList(Class, OS);
276   }
277 
278   OS << "\nFlow condition constraints before simplification:\n";
279   for (const auto *Constraint : OriginalConstraints) {
280     Constraint->print(OS);
281     OS << "\n";
282   }
283 }
284 
285 const ControlFlowContext *
286 DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) {
287   // Canonicalize the key:
288   F = F->getDefinition();
289   if (F == nullptr)
290     return nullptr;
291   auto It = FunctionContexts.find(F);
292   if (It != FunctionContexts.end())
293     return &It->second;
294 
295   if (F->hasBody()) {
296     auto CFCtx = ControlFlowContext::build(*F);
297     // FIXME: Handle errors.
298     assert(CFCtx);
299     auto Result = FunctionContexts.insert({F, std::move(*CFCtx)});
300     return &Result.first->second;
301   }
302 
303   return nullptr;
304 }
305 
306 static std::unique_ptr<Logger> makeLoggerFromCommandLine() {
307   if (DataflowLog.empty())
308     return Logger::textual(llvm::errs());
309 
310   llvm::StringRef Dir = DataflowLog;
311   if (auto EC = llvm::sys::fs::create_directories(Dir))
312     llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
313   // All analysis runs within a process will log to the same directory.
314   // Share a counter so they don't all overwrite each other's 0.html.
315   // (Don't share a logger, it's not threadsafe).
316   static std::atomic<unsigned> Counter = {0};
317   auto StreamFactory =
318       [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
319     llvm::SmallString<256> File(Dir);
320     llvm::sys::path::append(File,
321                             std::to_string(Counter.fetch_add(1)) + ".html");
322     std::error_code EC;
323     auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC);
324     if (EC) {
325       llvm::errs() << "Failed to create log " << File << ": " << EC.message()
326                    << "\n";
327       return std::make_unique<llvm::raw_null_ostream>();
328     }
329     return OS;
330   };
331   return Logger::html(std::move(StreamFactory));
332 }
333 
334 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S,
335                                                  Options Opts)
336     : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) {
337   assert(this->S != nullptr);
338   // If the -dataflow-log command-line flag was set, synthesize a logger.
339   // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
340   // based tools.
341   if (Opts.Log == nullptr) {
342     if (DataflowLog.getNumOccurrences() > 0) {
343       LogOwner = makeLoggerFromCommandLine();
344       this->Opts.Log = LogOwner.get();
345       // FIXME: if the flag is given a value, write an HTML log to a file.
346     } else {
347       this->Opts.Log = &Logger::null();
348     }
349   }
350 }
351 
352 DataflowAnalysisContext::~DataflowAnalysisContext() = default;
353 
354 } // namespace dataflow
355 } // namespace clang
356 
357 using namespace clang;
358 
359 const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) {
360   const Expr *Current = &E;
361   if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) {
362     Current = EWC->getSubExpr();
363     assert(Current != nullptr);
364   }
365   Current = Current->IgnoreParens();
366   assert(Current != nullptr);
367   return *Current;
368 }
369 
370 const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) {
371   if (auto *E = dyn_cast<Expr>(&S))
372     return ignoreCFGOmittedNodes(*E);
373   return S;
374 }
375 
376 // FIXME: Does not precisely handle non-virtual diamond inheritance. A single
377 // field decl will be modeled for all instances of the inherited field.
378 static void getFieldsFromClassHierarchy(QualType Type,
379                                         clang::dataflow::FieldSet &Fields) {
380   if (Type->isIncompleteType() || Type->isDependentType() ||
381       !Type->isRecordType())
382     return;
383 
384   for (const FieldDecl *Field : Type->getAsRecordDecl()->fields())
385     Fields.insert(Field);
386   if (auto *CXXRecord = Type->getAsCXXRecordDecl())
387     for (const CXXBaseSpecifier &Base : CXXRecord->bases())
388       getFieldsFromClassHierarchy(Base.getType(), Fields);
389 }
390 
391 /// Gets the set of all fields in the type.
392 clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) {
393   FieldSet Fields;
394   getFieldsFromClassHierarchy(Type, Fields);
395   return Fields;
396 }
397 
398 bool clang::dataflow::containsSameFields(
399     const clang::dataflow::FieldSet &Fields,
400     const clang::dataflow::RecordStorageLocation::FieldToLoc &FieldLocs) {
401   if (Fields.size() != FieldLocs.size())
402     return false;
403   for ([[maybe_unused]] auto [Field, Loc] : FieldLocs)
404     if (!Fields.contains(cast_or_null<FieldDecl>(Field)))
405       return false;
406   return true;
407 }
408